diff options
author | Mike Pall <mike> | 2009-12-08 19:46:35 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2009-12-08 19:46:35 +0100 |
commit | 55b16959717084884fd4a0cbae6d19e3786c20c7 (patch) | |
tree | c8a07a43c13679751ed25a9d06796e9e7b2134a6 | |
download | luajit-2.0.0-beta1.tar.gz luajit-2.0.0-beta1.tar.bz2 luajit-2.0.0-beta1.zip |
RELEASE LuaJIT-2.0.0-beta1v2.0.0-beta1
122 files changed, 42143 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..1a07bf75 --- /dev/null +++ b/.gitignore | |||
@@ -0,0 +1,11 @@ | |||
1 | *.[oa] | ||
2 | *.so | ||
3 | *.obj | ||
4 | *.lib | ||
5 | *.exp | ||
6 | *.dll | ||
7 | *.exe | ||
8 | *.manifest | ||
9 | *.dmp | ||
10 | *.swp | ||
11 | .tags | ||
diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..67347041 --- /dev/null +++ b/Makefile | |||
@@ -0,0 +1,84 @@ | |||
1 | ############################################################################## | ||
2 | # LuaJIT top level Makefile for installation. Requires GNU Make. | ||
3 | # | ||
4 | # Suitable for POSIX platforms (Linux, *BSD, OSX etc.). | ||
5 | # Note: src/Makefile has many more configurable options. | ||
6 | # | ||
7 | # ##### This Makefile is NOT useful for installation on Windows! ##### | ||
8 | # For MSVC, please follow the instructions given in src/msvcbuild.bat. | ||
9 | # For MinGW and Cygwin, cd to src and run make with the Makefile there. | ||
10 | # NYI: add wininstall.bat | ||
11 | # | ||
12 | # Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
13 | ############################################################################## | ||
14 | |||
15 | BASEVER= 2.0.0 | ||
16 | VERSION= 2.0.0-beta1 | ||
17 | |||
18 | ############################################################################## | ||
19 | # | ||
20 | # Change the installation path as needed and modify src/luaconf.h accordingly. | ||
21 | # Note: PREFIX must be an absolute path! | ||
22 | # | ||
23 | PREFIX= /usr/local | ||
24 | ############################################################################## | ||
25 | |||
26 | INSTALL_BIN= $(PREFIX)/bin | ||
27 | INSTALL_NAME= luajit-$(VERSION) | ||
28 | INSTALL_T= $(INSTALL_BIN)/$(INSTALL_NAME) | ||
29 | INSTALL_TSYM= $(INSTALL_BIN)/luajit | ||
30 | INSTALL_INC= $(PREFIX)/include/luajit-$(BASEVER) | ||
31 | INSTALL_JITLIB= $(PREFIX)/share/luajit-$(VERSION)/jit | ||
32 | |||
33 | MKDIR= mkdir -p | ||
34 | SYMLINK= ln -f -s | ||
35 | INSTALL_X= install -m 0755 | ||
36 | INSTALL_F= install -m 0644 | ||
37 | |||
38 | FILES_T= luajit | ||
39 | FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h | ||
40 | FILES_JITLIB= bc.lua v.lua dump.lua dis_x86.lua dis_x64.lua vmdef.lua | ||
41 | |||
42 | ############################################################################## | ||
43 | |||
44 | INSTALL_DEP= src/luajit | ||
45 | |||
46 | all $(INSTALL_DEP): | ||
47 | @echo "==== Building LuaJIT $(VERSION) ====" | ||
48 | $(MAKE) -C src | ||
49 | @echo "==== Successfully built LuaJIT $(VERSION) ====" | ||
50 | |||
51 | install: $(INSTALL_DEP) | ||
52 | @echo "==== Installing LuaJIT $(VERSION) to $(PREFIX) ====" | ||
53 | $(MKDIR) $(INSTALL_BIN) $(INSTALL_INC) $(INSTALL_JITLIB) | ||
54 | cd src && $(INSTALL_X) $(FILES_T) $(INSTALL_T) | ||
55 | cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC) | ||
56 | cd lib && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB) | ||
57 | @echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ====" | ||
58 | @echo "" | ||
59 | @echo "Note: the beta releases deliberately do NOT install a symlink for luajit" | ||
60 | @echo "You can do this now by running this command (with sudo):" | ||
61 | @echo "" | ||
62 | @echo " $(SYMLINK) $(INSTALL_NAME) $(INSTALL_TSYM)" | ||
63 | @echo "" | ||
64 | |||
65 | ############################################################################## | ||
66 | |||
67 | amalg: | ||
68 | @echo "Building LuaJIT $(VERSION)" | ||
69 | $(MAKE) -C src amalg | ||
70 | |||
71 | clean: | ||
72 | $(MAKE) -C src clean | ||
73 | |||
74 | cleaner: | ||
75 | $(MAKE) -C src cleaner | ||
76 | |||
77 | distclean: | ||
78 | $(MAKE) -C src distclean | ||
79 | |||
80 | SUB_TARGETS= amalg clean cleaner distclean | ||
81 | |||
82 | .PHONY: all install $(SUB_TARGETS) | ||
83 | |||
84 | ############################################################################## | ||
@@ -0,0 +1,16 @@ | |||
1 | README for LuaJIT 2.0.0-beta1 | ||
2 | ----------------------------- | ||
3 | |||
4 | LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language. | ||
5 | |||
6 | Project Homepage: http://luajit.org/ | ||
7 | |||
8 | LuaJIT is Copyright (C) 2005-2009 Mike Pall. | ||
9 | LuaJIT is free software, released under the MIT/X license. | ||
10 | See full Copyright Notice in src/luajit.h | ||
11 | |||
12 | Documentation for LuaJIT is available in HTML format. | ||
13 | Please point your favorite browser to: | ||
14 | |||
15 | doc/luajit.html | ||
16 | |||
diff --git a/doc/api.html b/doc/api.html new file mode 100644 index 00000000..79788d95 --- /dev/null +++ b/doc/api.html | |||
@@ -0,0 +1,203 @@ | |||
1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> | ||
2 | <html> | ||
3 | <head> | ||
4 | <title>API Extensions</title> | ||
5 | <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> | ||
6 | <meta name="Author" content="Mike Pall"> | ||
7 | <meta name="Copyright" content="Copyright (C) 2005-2009, Mike Pall"> | ||
8 | <meta name="Language" content="en"> | ||
9 | <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> | ||
10 | <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> | ||
11 | </head> | ||
12 | <body> | ||
13 | <div id="site"> | ||
14 | <a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> | ||
15 | </div> | ||
16 | <div id="head"> | ||
17 | <h1>API Extensions</h1> | ||
18 | </div> | ||
19 | <div id="nav"> | ||
20 | <ul><li> | ||
21 | <a href="luajit.html">LuaJIT</a> | ||
22 | <ul><li> | ||
23 | <a href="install.html">Installation</a> | ||
24 | </li><li> | ||
25 | <a href="running.html">Running</a> | ||
26 | </li><li> | ||
27 | <a class="current" href="api.html">API Extensions</a> | ||
28 | </li></ul> | ||
29 | </li><li> | ||
30 | <a href="status.html">Status</a> | ||
31 | <ul><li> | ||
32 | <a href="changes.html">Changes</a> | ||
33 | </li></ul> | ||
34 | </li><li> | ||
35 | <a href="faq.html">FAQ</a> | ||
36 | </li><li> | ||
37 | <a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> | ||
38 | </li></ul> | ||
39 | </div> | ||
40 | <div id="main"> | ||
41 | <p> | ||
42 | LuaJIT is fully upwards-compatible with Lua 5.1. It supports all | ||
43 | <a href="http://www.lua.org/manual/5.1/manual.html#5"><span class="ext">»</span> standard Lua | ||
44 | library functions</a> and the full set of | ||
45 | <a href="http://www.lua.org/manual/5.1/manual.html#3"><span class="ext">»</span> Lua/C API | ||
46 | functions</a>. | ||
47 | </p> | ||
48 | <p> | ||
49 | LuaJIT is also fully ABI-compatible to Lua 5.1 at the linker/dynamic | ||
50 | loader level. This means you can compile a C module against the | ||
51 | standard Lua headers and load the same shared library from either Lua | ||
52 | or LuaJIT. | ||
53 | </p> | ||
54 | |||
55 | <h2 id="bit"><tt>bit.*</tt> — Bitwise Operations</h2> | ||
56 | <p> | ||
57 | LuaJIT supports all bitwise operations as defined by | ||
58 | <a href="http://bitop.luajit.org"><span class="ext">»</span> Lua BitOp</a>: | ||
59 | </p> | ||
60 | <pre class="code"> | ||
61 | bit.tobit bit.tohex bit.bnot bit.band bit.bor bit.bxor | ||
62 | bit.lshift bit.rshift bit.arshift bit.rol bit.ror bit.bswap | ||
63 | </pre> | ||
64 | <p> | ||
65 | This module is a LuaJIT built-in — you don't need to download or | ||
66 | install Lua BitOp. The Lua BitOp site has full documentation for all | ||
67 | <a href="http://bitop.luajit.org/api.html"><span class="ext">»</span> Lua BitOp API functions</a>. | ||
68 | </p> | ||
69 | <p> | ||
70 | Please make sure to <tt>require</tt> the module before using any of | ||
71 | its functions: | ||
72 | </p> | ||
73 | <pre class="code"> | ||
74 | local bit = require("bit") | ||
75 | </pre> | ||
76 | <p> | ||
77 | An already installed Lua BitOp module is ignored by LuaJIT. | ||
78 | This way you can use bit operations from both Lua and LuaJIT on a | ||
79 | shared installation. | ||
80 | </p> | ||
81 | |||
82 | <h2 id="jit"><tt>jit.*</tt> — JIT compiler control</h2> | ||
83 | <p> | ||
84 | The functions in this built-in module control the behavior | ||
85 | of the JIT compiler engine. | ||
86 | </p> | ||
87 | |||
88 | <h3 id="jit_onoff"><tt>jit.on()<br> | ||
89 | jit.off()</tt></h3> | ||
90 | <p> | ||
91 | Turns the whole JIT compiler on (default) or off. | ||
92 | </p> | ||
93 | <p> | ||
94 | These functions are typically used with the command line options | ||
95 | <tt>-j on</tt> or <tt>-j off</tt>. | ||
96 | </p> | ||
97 | |||
98 | <h3 id="jit_flush"><tt>jit.flush()</tt></h3> | ||
99 | <p> | ||
100 | Flushes the whole cache of compiled code. | ||
101 | </p> | ||
102 | |||
103 | <h3 id="jit_flush_tr"><tt>jit.flush(tr)</tt></h3> | ||
104 | <p> | ||
105 | Flushes the code for the specified root trace and all of its | ||
106 | side traces from the cache. | ||
107 | </p> | ||
108 | |||
109 | <h3 id="jit_onoff_func"><tt>jit.on(func|true [,true|false])<br> | ||
110 | jit.off(func|true [,true|false])<br> | ||
111 | jit.flush(func|true [,true|false])</tt></h3> | ||
112 | <p> | ||
113 | <tt>jit.on</tt> enables JIT compilation for a Lua function (this is | ||
114 | the default). | ||
115 | </p> | ||
116 | <p> | ||
117 | <tt>jit.off</tt> disables JIT compilation for a Lua function and | ||
118 | flushes any already compiled code from the code cache. | ||
119 | </p> | ||
120 | <p> | ||
121 | <tt>jit.flush</tt> flushes the code, but doesn't affect the | ||
122 | enable/disable status. | ||
123 | </p> | ||
124 | <p> | ||
125 | The current function, i.e. the Lua function calling this library | ||
126 | function, can also be specified by passing <tt>true</tt> as the first | ||
127 | argument. | ||
128 | </p> | ||
129 | <p> | ||
130 | If the second argument is <tt>true</tt>, JIT compilation is also | ||
131 | enabled, disabled or flushed recursively for all subfunctions of a | ||
132 | function. With <tt>false</tt> only the subfunctions are affected. | ||
133 | </p> | ||
134 | <p> | ||
135 | The <tt>jit.on</tt> and <tt>jit.off</tt> functions only set a flag | ||
136 | which is checked when the function is about to be compiled. They do | ||
137 | not trigger immediate compilation. | ||
138 | </p> | ||
139 | <p> | ||
140 | Typical usage is <tt>jit.off(true, true)</tt> in the main chunk | ||
141 | of a module to turn off JIT compilation for the whole module for | ||
142 | debugging purposes. | ||
143 | </p> | ||
144 | |||
145 | <h3 id="jit_version"><tt>jit.version</tt></h3> | ||
146 | <p> | ||
147 | Contains the LuaJIT version string. | ||
148 | </p> | ||
149 | |||
150 | <h3 id="jit_version_num"><tt>jit.version_num</tt></h3> | ||
151 | <p> | ||
152 | Contains the version number of the LuaJIT core. Version xx.yy.zz | ||
153 | is represented by the decimal number xxyyzz. | ||
154 | </p> | ||
155 | |||
156 | <h3 id="jit_arch"><tt>jit.arch</tt></h3> | ||
157 | <p> | ||
158 | Contains the target architecture name (CPU and optional ABI). | ||
159 | </p> | ||
160 | |||
161 | <h2 id="jit_opt"><tt>jit.opt.*</tt> — JIT compiler optimization control</h2> | ||
162 | <p> | ||
163 | This module provides the backend for the <tt>-O</tt> command line | ||
164 | option. | ||
165 | </p> | ||
166 | <p> | ||
167 | You can also use it programmatically, e.g.: | ||
168 | </p> | ||
169 | <pre class="code"> | ||
170 | jit.opt.start(2) -- same as -O2 | ||
171 | jit.opt.start("-dce") | ||
172 | jit.opt.start("hotloop=10", "hotexit=2") | ||
173 | </pre> | ||
174 | <p> | ||
175 | Unlike in LuaJIT 1.x, the module is built-in and | ||
176 | <b>optimization is turned on by default!</b> | ||
177 | It's no longer necessary to run <tt>require("jit.opt").start()</tt>, | ||
178 | which was one of the ways to enable optimization. | ||
179 | </p> | ||
180 | |||
181 | <h2 id="jit_util"><tt>jit.util.*</tt> — JIT compiler introspection</h2> | ||
182 | <p> | ||
183 | This module holds functions to introspect the bytecode, generated | ||
184 | traces, the IR and the generated machine code. The functionality | ||
185 | provided by this module is still in flux and therefore undocumented. | ||
186 | </p> | ||
187 | <p> | ||
188 | The debug modules <tt>-jbc</tt>, <tt>-jv</tt> and <tt>-jdump</tt> make | ||
189 | extensive use of these functions. Please check out their source code, | ||
190 | if you want to know more. | ||
191 | </p> | ||
192 | <br class="flush"> | ||
193 | </div> | ||
194 | <div id="foot"> | ||
195 | <hr class="hide"> | ||
196 | Copyright © 2005-2009 Mike Pall | ||
197 | <span class="noprint"> | ||
198 | · | ||
199 | <a href="contact.html">Contact</a> | ||
200 | </span> | ||
201 | </div> | ||
202 | </body> | ||
203 | </html> | ||
diff --git a/doc/bluequad-print.css b/doc/bluequad-print.css new file mode 100644 index 00000000..00a6b154 --- /dev/null +++ b/doc/bluequad-print.css | |||
@@ -0,0 +1,166 @@ | |||
1 | /* Copyright (C) 2004-2009 Mike Pall. | ||
2 | * | ||
3 | * You are welcome to use the general ideas of this design for your own sites. | ||
4 | * But please do not steal the stylesheet, the layout or the color scheme. | ||
5 | */ | ||
6 | body { | ||
7 | font-family: serif; | ||
8 | font-size: 11pt; | ||
9 | margin: 0 3em; | ||
10 | padding: 0; | ||
11 | border: none; | ||
12 | } | ||
13 | a:link, a:visited, a:hover, a:active { | ||
14 | text-decoration: none; | ||
15 | background: transparent; | ||
16 | color: #0000ff; | ||
17 | } | ||
18 | h1, h2, h3 { | ||
19 | font-family: sans-serif; | ||
20 | font-weight: bold; | ||
21 | text-align: left; | ||
22 | margin: 0.5em 0; | ||
23 | padding: 0; | ||
24 | } | ||
25 | h1 { | ||
26 | font-size: 200%; | ||
27 | } | ||
28 | h2 { | ||
29 | font-size: 150%; | ||
30 | } | ||
31 | h3 { | ||
32 | font-size: 125%; | ||
33 | } | ||
34 | p { | ||
35 | margin: 0 0 0.5em 0; | ||
36 | padding: 0; | ||
37 | } | ||
38 | ul, ol { | ||
39 | margin: 0.5em 0; | ||
40 | padding: 0 0 0 2em; | ||
41 | } | ||
42 | ul { | ||
43 | list-style: outside square; | ||
44 | } | ||
45 | ol { | ||
46 | list-style: outside decimal; | ||
47 | } | ||
48 | li { | ||
49 | margin: 0; | ||
50 | padding: 0; | ||
51 | } | ||
52 | dl { | ||
53 | margin: 1em 0; | ||
54 | padding: 1em; | ||
55 | border: 1px solid black; | ||
56 | } | ||
57 | dt { | ||
58 | font-weight: bold; | ||
59 | margin: 0; | ||
60 | padding: 0; | ||
61 | } | ||
62 | dt sup { | ||
63 | float: right; | ||
64 | margin-left: 1em; | ||
65 | } | ||
66 | dd { | ||
67 | margin: 0.5em 0 0 2em; | ||
68 | padding: 0; | ||
69 | } | ||
70 | table { | ||
71 | table-layout: fixed; | ||
72 | width: 100%; | ||
73 | margin: 1em 0; | ||
74 | padding: 0; | ||
75 | border: 1px solid black; | ||
76 | border-spacing: 0; | ||
77 | border-collapse: collapse; | ||
78 | } | ||
79 | tr { | ||
80 | margin: 0; | ||
81 | padding: 0; | ||
82 | border: none; | ||
83 | } | ||
84 | td { | ||
85 | text-align: left; | ||
86 | margin: 0; | ||
87 | padding: 0.2em 0.5em; | ||
88 | border-top: 1px solid black; | ||
89 | border-bottom: 1px solid black; | ||
90 | } | ||
91 | tr.separate td { | ||
92 | border-top: double; | ||
93 | } | ||
94 | tt, pre, code, kbd, samp { | ||
95 | font-family: monospace; | ||
96 | font-size: 75%; | ||
97 | } | ||
98 | kbd { | ||
99 | font-weight: bolder; | ||
100 | } | ||
101 | blockquote, pre { | ||
102 | margin: 1em 2em; | ||
103 | padding: 0; | ||
104 | } | ||
105 | img { | ||
106 | border: none; | ||
107 | vertical-align: baseline; | ||
108 | margin: 0; | ||
109 | padding: 0; | ||
110 | } | ||
111 | img.left { | ||
112 | float: left; | ||
113 | margin: 0.5em 1em 0.5em 0; | ||
114 | } | ||
115 | img.right { | ||
116 | float: right; | ||
117 | margin: 0.5em 0 0.5em 1em; | ||
118 | } | ||
119 | .flush { | ||
120 | clear: both; | ||
121 | visibility: hidden; | ||
122 | } | ||
123 | .hide, .noprint, #nav { | ||
124 | display: none !important; | ||
125 | } | ||
126 | .pagebreak { | ||
127 | page-break-before: always; | ||
128 | } | ||
129 | #site { | ||
130 | text-align: right; | ||
131 | font-family: sans-serif; | ||
132 | font-weight: bold; | ||
133 | margin: 0 1em; | ||
134 | border-bottom: 1pt solid black; | ||
135 | } | ||
136 | #site a { | ||
137 | font-size: 1.2em; | ||
138 | } | ||
139 | #site a:link, #site a:visited { | ||
140 | text-decoration: none; | ||
141 | font-weight: bold; | ||
142 | background: transparent; | ||
143 | color: #ffffff; | ||
144 | } | ||
145 | #logo { | ||
146 | color: #ff8000; | ||
147 | } | ||
148 | #head { | ||
149 | clear: both; | ||
150 | margin: 0 1em; | ||
151 | } | ||
152 | #main { | ||
153 | line-height: 1.3; | ||
154 | text-align: justify; | ||
155 | margin: 1em; | ||
156 | } | ||
157 | #foot { | ||
158 | clear: both; | ||
159 | font-size: 80%; | ||
160 | text-align: center; | ||
161 | margin: 0 1.25em; | ||
162 | padding: 0.5em 0 0 0; | ||
163 | border-top: 1pt solid black; | ||
164 | page-break-before: avoid; | ||
165 | page-break-after: avoid; | ||
166 | } | ||
diff --git a/doc/bluequad.css b/doc/bluequad.css new file mode 100644 index 00000000..7e52102f --- /dev/null +++ b/doc/bluequad.css | |||
@@ -0,0 +1,303 @@ | |||
1 | /* Copyright (C) 2004-2009 Mike Pall. | ||
2 | * | ||
3 | * You are welcome to use the general ideas of this design for your own sites. | ||
4 | * But please do not steal the stylesheet, the layout or the color scheme. | ||
5 | */ | ||
6 | /* colorscheme: | ||
7 | * | ||
8 | * site | head #4162bf/white | #6078bf/#e6ecff | ||
9 | * ------+------ ----------------+------------------- | ||
10 | * nav | main #bfcfff | #e6ecff/black | ||
11 | * | ||
12 | * nav: hiback loback #c5d5ff #b9c9f9 | ||
13 | * hiborder loborder #e6ecff #97a7d7 | ||
14 | * link hover #2142bf #ff0000 | ||
15 | * | ||
16 | * link: link visited hover #2142bf #8122bf #ff0000 | ||
17 | * | ||
18 | * main: boxback boxborder #f0f4ff #bfcfff | ||
19 | */ | ||
20 | body { | ||
21 | font-family: Verdana, Arial, Helvetica, sans-serif; | ||
22 | font-size: 10pt; | ||
23 | margin: 0; | ||
24 | padding: 0; | ||
25 | border: none; | ||
26 | background: #e0e0e0; | ||
27 | color: #000000; | ||
28 | } | ||
29 | a:link { | ||
30 | text-decoration: none; | ||
31 | background: transparent; | ||
32 | color: #2142bf; | ||
33 | } | ||
34 | a:visited { | ||
35 | text-decoration: none; | ||
36 | background: transparent; | ||
37 | color: #8122bf; | ||
38 | } | ||
39 | a:hover, a:active { | ||
40 | text-decoration: underline; | ||
41 | background: transparent; | ||
42 | color: #ff0000; | ||
43 | } | ||
44 | h1, h2, h3 { | ||
45 | font-weight: bold; | ||
46 | text-align: left; | ||
47 | margin: 0.5em 0; | ||
48 | padding: 0; | ||
49 | background: transparent; | ||
50 | } | ||
51 | h1 { | ||
52 | font-size: 200%; | ||
53 | line-height: 3em; /* really 6em relative to body, match #site span */ | ||
54 | margin: 0; | ||
55 | } | ||
56 | h2 { | ||
57 | font-size: 150%; | ||
58 | color: #606060; | ||
59 | } | ||
60 | h3 { | ||
61 | font-size: 125%; | ||
62 | color: #404040; | ||
63 | } | ||
64 | p { | ||
65 | max-width: 600px; | ||
66 | margin: 0 0 0.5em 0; | ||
67 | padding: 0; | ||
68 | } | ||
69 | b { | ||
70 | color: #404040; | ||
71 | } | ||
72 | ul, ol { | ||
73 | max-width: 600px; | ||
74 | margin: 0.5em 0; | ||
75 | padding: 0 0 0 2em; | ||
76 | } | ||
77 | ul { | ||
78 | list-style: outside square; | ||
79 | } | ||
80 | ol { | ||
81 | list-style: outside decimal; | ||
82 | } | ||
83 | li { | ||
84 | margin: 0; | ||
85 | padding: 0; | ||
86 | } | ||
87 | dl { | ||
88 | max-width: 600px; | ||
89 | margin: 1em 0; | ||
90 | padding: 1em; | ||
91 | border: 1px solid #bfcfff; | ||
92 | background: #f0f4ff; | ||
93 | } | ||
94 | dt { | ||
95 | font-weight: bold; | ||
96 | margin: 0; | ||
97 | padding: 0; | ||
98 | } | ||
99 | dt sup { | ||
100 | float: right; | ||
101 | margin-left: 1em; | ||
102 | color: #808080; | ||
103 | } | ||
104 | dt a:visited { | ||
105 | text-decoration: none; | ||
106 | color: #2142bf; | ||
107 | } | ||
108 | dt a:hover, dt a:active { | ||
109 | text-decoration: none; | ||
110 | color: #ff0000; | ||
111 | } | ||
112 | dd { | ||
113 | margin: 0.5em 0 0 2em; | ||
114 | padding: 0; | ||
115 | } | ||
116 | div.tablewrap { /* for IE *sigh* */ | ||
117 | max-width: 600px; | ||
118 | } | ||
119 | table { | ||
120 | table-layout: fixed; | ||
121 | border-spacing: 0; | ||
122 | border-collapse: collapse; | ||
123 | max-width: 600px; | ||
124 | width: 100%; | ||
125 | margin: 1em 0; | ||
126 | padding: 0; | ||
127 | border: 1px solid #bfcfff; | ||
128 | } | ||
129 | tr { | ||
130 | margin: 0; | ||
131 | padding: 0; | ||
132 | border: none; | ||
133 | } | ||
134 | tr.odd { | ||
135 | background: #f0f4ff; | ||
136 | } | ||
137 | tr.separate td { | ||
138 | border-top: 1px solid #bfcfff; | ||
139 | } | ||
140 | td { | ||
141 | text-align: left; | ||
142 | margin: 0; | ||
143 | padding: 0.2em 0.5em; | ||
144 | border: none; | ||
145 | } | ||
146 | tt, code, kbd, samp { | ||
147 | font-family: Courier New, Courier, monospace; | ||
148 | line-height: 1.2; | ||
149 | font-size: 110%; | ||
150 | } | ||
151 | kbd { | ||
152 | font-weight: bolder; | ||
153 | } | ||
154 | blockquote, pre { | ||
155 | max-width: 600px; | ||
156 | margin: 1em 2em; | ||
157 | padding: 0; | ||
158 | } | ||
159 | pre { | ||
160 | line-height: 1.1; | ||
161 | } | ||
162 | pre.code { | ||
163 | line-height: 1.4; | ||
164 | margin: 0.5em 0 1em 0.5em; | ||
165 | padding: 0.5em 1em; | ||
166 | border: 1px solid #bfcfff; | ||
167 | background: #f0f4ff; | ||
168 | } | ||
169 | img { | ||
170 | border: none; | ||
171 | vertical-align: baseline; | ||
172 | margin: 0; | ||
173 | padding: 0; | ||
174 | } | ||
175 | img.left { | ||
176 | float: left; | ||
177 | margin: 0.5em 1em 0.5em 0; | ||
178 | } | ||
179 | img.right { | ||
180 | float: right; | ||
181 | margin: 0.5em 0 0.5em 1em; | ||
182 | } | ||
183 | .indent { | ||
184 | padding-left: 1em; | ||
185 | } | ||
186 | .flush { | ||
187 | clear: both; | ||
188 | visibility: hidden; | ||
189 | } | ||
190 | .hide, .noscreen { | ||
191 | display: none !important; | ||
192 | } | ||
193 | .ext { | ||
194 | color: #ff8000; | ||
195 | } | ||
196 | #site { | ||
197 | clear: both; | ||
198 | float: left; | ||
199 | width: 13em; | ||
200 | text-align: center; | ||
201 | font-weight: bold; | ||
202 | margin: 0; | ||
203 | padding: 0; | ||
204 | background: transparent; | ||
205 | color: #ffffff; | ||
206 | } | ||
207 | #site a { | ||
208 | font-size: 200%; | ||
209 | } | ||
210 | #site a:link, #site a:visited { | ||
211 | text-decoration: none; | ||
212 | font-weight: bold; | ||
213 | background: transparent; | ||
214 | color: #ffffff; | ||
215 | } | ||
216 | #site span { | ||
217 | line-height: 3em; /* really 6em relative to body, match h1 */ | ||
218 | } | ||
219 | #logo { | ||
220 | color: #ffb380; | ||
221 | } | ||
222 | #head { | ||
223 | margin: 0; | ||
224 | padding: 0 0 0 2em; | ||
225 | border-left: solid 13em #4162bf; | ||
226 | border-right: solid 3em #6078bf; | ||
227 | background: #6078bf; | ||
228 | color: #e6ecff; | ||
229 | } | ||
230 | #nav { | ||
231 | clear: both; | ||
232 | float: left; | ||
233 | overflow: hidden; | ||
234 | text-align: left; | ||
235 | line-height: 1.5; | ||
236 | width: 13em; | ||
237 | padding-top: 1em; | ||
238 | background: transparent; | ||
239 | } | ||
240 | #nav ul { | ||
241 | list-style: none outside; | ||
242 | margin: 0; | ||
243 | padding: 0; | ||
244 | } | ||
245 | #nav li { | ||
246 | margin: 0; | ||
247 | padding: 0; | ||
248 | } | ||
249 | #nav a { | ||
250 | display: block; | ||
251 | text-decoration: none; | ||
252 | font-weight: bold; | ||
253 | margin: 0; | ||
254 | padding: 2px 1em; | ||
255 | border-top: 1px solid transparent; | ||
256 | border-bottom: 1px solid transparent; | ||
257 | background: transparent; | ||
258 | color: #2142bf; | ||
259 | } | ||
260 | #nav a:hover, #nav a:active { | ||
261 | text-decoration: none; | ||
262 | border-top: 1px solid #97a7d7; | ||
263 | border-bottom: 1px solid #e6ecff; | ||
264 | background: #b9c9f9; | ||
265 | color: #ff0000; | ||
266 | } | ||
267 | #nav a.current, #nav a.current:hover, #nav a.current:active { | ||
268 | border-top: 1px solid #e6ecff; | ||
269 | border-bottom: 1px solid #97a7d7; | ||
270 | background: #c5d5ff; | ||
271 | color: #2142bf; | ||
272 | } | ||
273 | #nav ul ul a { | ||
274 | padding: 0 1em 0 2em; | ||
275 | } | ||
276 | #main { | ||
277 | line-height: 1.5; | ||
278 | text-align: left; | ||
279 | margin: 0; | ||
280 | padding: 1em 2em; | ||
281 | border-left: solid 13em #bfcfff; | ||
282 | border-right: solid 3em #e6ecff; | ||
283 | background: #e6ecff; | ||
284 | } | ||
285 | #foot { | ||
286 | clear: both; | ||
287 | font-size: 80%; | ||
288 | text-align: center; | ||
289 | margin: 0; | ||
290 | padding: 0.5em; | ||
291 | background: #6078bf; | ||
292 | color: #ffffff; | ||
293 | } | ||
294 | #foot a:link, #foot a:visited { | ||
295 | text-decoration: underline; | ||
296 | background: transparent; | ||
297 | color: #ffffff; | ||
298 | } | ||
299 | #foot a:hover, #foot a:active { | ||
300 | text-decoration: underline; | ||
301 | background: transparent; | ||
302 | color: #bfcfff; | ||
303 | } | ||
diff --git a/doc/changes.html b/doc/changes.html new file mode 100644 index 00000000..6c34b8be --- /dev/null +++ b/doc/changes.html | |||
@@ -0,0 +1,281 @@ | |||
1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> | ||
2 | <html> | ||
3 | <head> | ||
4 | <title>LuaJIT Change History</title> | ||
5 | <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> | ||
6 | <meta name="Author" content="Mike Pall"> | ||
7 | <meta name="Copyright" content="Copyright (C) 2005-2009, Mike Pall"> | ||
8 | <meta name="Language" content="en"> | ||
9 | <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> | ||
10 | <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> | ||
11 | <style type="text/css"> | ||
12 | div.major { max-width: 600px; padding: 1em; margin: 1em 0 1em 0; } | ||
13 | </style> | ||
14 | </head> | ||
15 | <body> | ||
16 | <div id="site"> | ||
17 | <a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> | ||
18 | </div> | ||
19 | <div id="head"> | ||
20 | <h1>LuaJIT Change History</h1> | ||
21 | </div> | ||
22 | <div id="nav"> | ||
23 | <ul><li> | ||
24 | <a href="luajit.html">LuaJIT</a> | ||
25 | <ul><li> | ||
26 | <a href="install.html">Installation</a> | ||
27 | </li><li> | ||
28 | <a href="running.html">Running</a> | ||
29 | </li><li> | ||
30 | <a href="api.html">API Extensions</a> | ||
31 | </li></ul> | ||
32 | </li><li> | ||
33 | <a href="status.html">Status</a> | ||
34 | <ul><li> | ||
35 | <a class="current" href="changes.html">Changes</a> | ||
36 | </li></ul> | ||
37 | </li><li> | ||
38 | <a href="faq.html">FAQ</a> | ||
39 | </li><li> | ||
40 | <a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> | ||
41 | </li></ul> | ||
42 | </div> | ||
43 | <div id="main"> | ||
44 | <p> | ||
45 | This is a list of changes between the released versions of LuaJIT.<br> | ||
46 | The current <span style="color: #c00000;">development version</span> is <strong>LuaJIT 2.0.0-beta1</strong>.<br> | ||
47 | The current <span style="color: #0000c0;">stable version</span> is <strong>LuaJIT 1.1.5</strong>. | ||
48 | </p> | ||
49 | <p> | ||
50 | Please check the | ||
51 | <a href="http://luajit.org/luajit_changes.html"><span class="ext">»</span> Online Change History</a> | ||
52 | to see whether newer versions are available. | ||
53 | </p> | ||
54 | |||
55 | <div class="major" style="background: #ffd0d0;"> | ||
56 | <h2 id="LuaJIT-2.0.0-beta1">LuaJIT 2.0.0-beta1 — 2009-10-31</h2> | ||
57 | <ul> | ||
58 | <li>This is the first public release of LuaJIT 2.0.</li> | ||
59 | <li>The whole VM has been rewritten from the ground up, so there's | ||
60 | no point in listing differences over earlier versions.</li> | ||
61 | </ul> | ||
62 | </div> | ||
63 | |||
64 | <div class="major" style="background: #d0d0ff;"> | ||
65 | <h2 id="LuaJIT-1.1.5">LuaJIT 1.1.5 — 2008-10-25</h2> | ||
66 | <ul> | ||
67 | <li>Merged with Lua 5.1.4. Fixes all | ||
68 | <a href="http://www.lua.org/bugs.html#5.1.3"><span class="ext">»</span> known bugs in Lua 5.1.3</a>.</li> | ||
69 | </ul> | ||
70 | |||
71 | <h2 id="LuaJIT-1.1.4">LuaJIT 1.1.4 — 2008-02-05</h2> | ||
72 | <ul> | ||
73 | <li>Merged with Lua 5.1.3. Fixes all | ||
74 | <a href="http://www.lua.org/bugs.html#5.1.2"><span class="ext">»</span> known bugs in Lua 5.1.2</a>.</li> | ||
75 | <li>Fixed possible (but unlikely) stack corruption while compiling | ||
76 | <tt>k^x</tt> expressions.</li> | ||
77 | <li>Fixed DynASM template for cmpss instruction.</li> | ||
78 | </ul> | ||
79 | |||
80 | <h2 id="LuaJIT-1.1.3">LuaJIT 1.1.3 — 2007-05-24</h2> | ||
81 | <ul> | ||
82 | <li>Merged with Lua 5.1.2. Fixes all | ||
83 | <a href="http://www.lua.org/bugs.html#5.1.1"><span class="ext">»</span> known bugs in Lua 5.1.1</a>.</li> | ||
84 | <li>Merged pending Lua 5.1.x fixes: "return -nil" bug, spurious count hook call.</li> | ||
85 | <li>Remove a (sometimes) wrong assertion in <tt>luaJIT_findpc()</tt>.</li> | ||
86 | <li>DynASM now allows labels for displacements and <tt>.aword</tt>.</li> | ||
87 | <li>Fix some compiler warnings for DynASM glue (internal API change).</li> | ||
88 | <li>Correct naming for SSSE3 (temporarily known as SSE4) in DynASM and x86 disassembler.</li> | ||
89 | <li>The loadable debug modules now handle redirection to stdout | ||
90 | (e.g. <tt>-j trace=-</tt>).</li> | ||
91 | </ul> | ||
92 | |||
93 | <h2 id="LuaJIT-1.1.2">LuaJIT 1.1.2 — 2006-06-24</h2> | ||
94 | <ul> | ||
95 | <li>Fix MSVC inline assembly: use only local variables with | ||
96 | <tt>lua_number2int()</tt>.</li> | ||
97 | <li>Fix "attempt to call a thread value" bug on Mac OS X: | ||
98 | make values of consts used as lightuserdata keys unique | ||
99 | to avoid joining by the compiler/linker.</li> | ||
100 | </ul> | ||
101 | |||
102 | <h2 id="LuaJIT-1.1.1">LuaJIT 1.1.1 — 2006-06-20</h2> | ||
103 | <ul> | ||
104 | <li>Merged with Lua 5.1.1. Fixes all | ||
105 | <a href="http://www.lua.org/bugs.html#5.1"><span class="ext">»</span> known bugs in Lua 5.1</a>.</li> | ||
106 | <li>Enforce (dynamic) linker error for EXE/DLL version mismatches.</li> | ||
107 | <li>Minor changes to DynASM: faster preprocessing, smaller encoding | ||
108 | for some immediates.</li> | ||
109 | </ul> | ||
110 | <p> | ||
111 | This release is in sync with Coco 1.1.1 (see the | ||
112 | <a href="http://coco.luajit.org/changes.html"><span class="ext">»</span> Coco Change History</a>). | ||
113 | </p> | ||
114 | |||
115 | <h2 id="LuaJIT-1.1.0">LuaJIT 1.1.0 — 2006-03-13</h2> | ||
116 | <ul> | ||
117 | <li>Merged with Lua 5.1 (final).</li> | ||
118 | |||
119 | <li>New JIT call frame setup: | ||
120 | <ul> | ||
121 | <li>The C stack is kept 16 byte aligned (faster). | ||
122 | Mandatory for Mac OS X on Intel, too.</li> | ||
123 | <li>Faster calling conventions for internal C helper functions.</li> | ||
124 | <li>Better instruction scheduling for function prologue, OP_CALL and | ||
125 | OP_RETURN.</li> | ||
126 | </ul></li> | ||
127 | |||
128 | <li>Miscellaneous optimizations: | ||
129 | <ul> | ||
130 | <li>Faster loads of FP constants. Remove narrow-to-wide store-to-load | ||
131 | forwarding stalls.</li> | ||
132 | <li>Use (scalar) SSE2 ops (if the CPU supports it) to speed up slot moves | ||
133 | and FP to integer conversions.</li> | ||
134 | <li>Optimized the two-argument form of <tt>OP_CONCAT</tt> (<tt>a..b</tt>).</li> | ||
135 | <li>Inlined <tt>OP_MOD</tt> (<tt>a%b</tt>). | ||
136 | With better accuracy than the C variant, too.</li> | ||
137 | <li>Inlined <tt>OP_POW</tt> (<tt>a^b</tt>). Unroll <tt>x^k</tt> or | ||
138 | use <tt>k^x = 2^(log2(k)*x)</tt> or call <tt>pow()</tt>.</li> | ||
139 | </ul></li> | ||
140 | |||
141 | <li>Changes in the optimizer: | ||
142 | <ul> | ||
143 | <li>Improved hinting for table keys derived from table values | ||
144 | (<tt>t1[t2[x]]</tt>).</li> | ||
145 | <li>Lookup hinting now works with arbitrary object types and | ||
146 | supports index chains, too.</li> | ||
147 | <li>Generate type hints for arithmetic and comparison operators, | ||
148 | OP_LEN, OP_CONCAT and OP_FORPREP.</li> | ||
149 | <li>Remove several hint definitions in favour of a generic COMBINE hint.</li> | ||
150 | <li>Complete rewrite of <tt>jit.opt_inline</tt> module | ||
151 | (ex <tt>jit.opt_lib</tt>).</li> | ||
152 | </ul></li> | ||
153 | |||
154 | <li>Use adaptive deoptimization: | ||
155 | <ul> | ||
156 | <li>If runtime verification of a contract fails, the affected | ||
157 | instruction is recompiled and patched on-the-fly. | ||
158 | Regular programs will trigger deoptimization only occasionally.</li> | ||
159 | <li>This avoids generating code for uncommon fallback cases | ||
160 | most of the time. Generated code is up to 30% smaller compared to | ||
161 | LuaJIT 1.0.3.</li> | ||
162 | <li>Deoptimization is used for many opcodes and contracts: | ||
163 | <ul> | ||
164 | <li>OP_CALL, OP_TAILCALL: type mismatch for callable.</li> | ||
165 | <li>Inlined calls: closure mismatch, parameter number and type mismatches.</li> | ||
166 | <li>OP_GETTABLE, OP_SETTABLE: table or key type and range mismatches.</li> | ||
167 | <li>All arithmetic and comparison operators, OP_LEN, OP_CONCAT, | ||
168 | OP_FORPREP: operand type and range mismatches.</li> | ||
169 | </ul></li> | ||
170 | <li>Complete redesign of the debug and traceback info | ||
171 | (bytecode ↔ mcode) to support deoptimization. | ||
172 | Much more flexible and needs only 50% of the space.</li> | ||
173 | <li>The modules <tt>jit.trace</tt>, <tt>jit.dumphints</tt> and | ||
174 | <tt>jit.dump</tt> handle deoptimization.</li> | ||
175 | </ul></li> | ||
176 | |||
177 | <li>Inlined many popular library functions | ||
178 | (for commonly used arguments only): | ||
179 | <ul> | ||
180 | <li>Most <tt>math.*</tt> functions (the 18 most used ones) | ||
181 | [2x-10x faster].</li> | ||
182 | <li><tt>string.len</tt>, <tt>string.sub</tt> and <tt>string.char</tt> | ||
183 | [2x-10x faster].</li> | ||
184 | <li><tt>table.insert</tt>, <tt>table.remove</tt> and <tt>table.getn</tt> | ||
185 | [3x-5x faster].</li> | ||
186 | <li><tt>coroutine.yield</tt> and <tt>coroutine.resume</tt> | ||
187 | [3x-5x faster].</li> | ||
188 | <li><tt>pairs</tt>, <tt>ipairs</tt> and the corresponding iterators | ||
189 | [8x-15x faster].</li> | ||
190 | </ul></li> | ||
191 | |||
192 | <li>Changes in the core and loadable modules and the stand-alone executable: | ||
193 | <ul> | ||
194 | <li>Added <tt>jit.version</tt>, <tt>jit.version_num</tt> | ||
195 | and <tt>jit.arch</tt>.</li> | ||
196 | <li>Reorganized some internal API functions (<tt>jit.util.*mcode*</tt>).</li> | ||
197 | <li>The <tt>-j dump</tt> output now shows JSUB names, too.</li> | ||
198 | <li>New x86 disassembler module written in pure Lua. No dependency | ||
199 | on ndisasm anymore. Flexible API, very compact (500 lines) | ||
200 | and complete (x87, MMX, SSE, SSE2, SSE3, SSSE3, privileged instructions).</li> | ||
201 | <li><tt>luajit -v</tt> prints the LuaJIT version and copyright | ||
202 | on a separate line.</li> | ||
203 | </ul></li> | ||
204 | |||
205 | <li>Added SSE, SSE2, SSE3 and SSSE3 support to DynASM.</li> | ||
206 | <li>Miscellaneous doc changes. Added a section about | ||
207 | <a href="luajit_install.html#embedding">embedding LuaJIT</a>.</li> | ||
208 | </ul> | ||
209 | <p> | ||
210 | This release is in sync with Coco 1.1.0 (see the | ||
211 | <a href="http://coco.luajit.org/changes.html"><span class="ext">»</span> Coco Change History</a>). | ||
212 | </p> | ||
213 | </div> | ||
214 | |||
215 | <div class="major" style="background: #ffffd0;"> | ||
216 | <h2 id="LuaJIT-1.0.3">LuaJIT 1.0.3 — 2005-09-08</h2> | ||
217 | <ul> | ||
218 | <li>Even more docs.</li> | ||
219 | <li>Unified closure checks in <tt>jit.*</tt>.</li> | ||
220 | <li>Fixed some range checks in <tt>jit.util.*</tt>.</li> | ||
221 | <li>Fixed __newindex call originating from <tt>jit_settable_str()</tt>.</li> | ||
222 | <li>Merged with Lua 5.1 alpha (including early bugfixes).</li> | ||
223 | </ul> | ||
224 | <p> | ||
225 | This is the first public release of LuaJIT. | ||
226 | </p> | ||
227 | |||
228 | <h2 id="LuaJIT-1.0.2">LuaJIT 1.0.2 — 2005-09-02</h2> | ||
229 | <ul> | ||
230 | <li>Add support for flushing the Valgrind translation cache <br> | ||
231 | (<tt>MYCFLAGS= -DUSE_VALGRIND</tt>).</li> | ||
232 | <li>Add support for freeing executable mcode memory to the <tt>mmap()</tt>-based | ||
233 | variant for POSIX systems.</li> | ||
234 | <li>Reorganized the C function signature handling in | ||
235 | <tt>jit.opt_lib</tt>.</li> | ||
236 | <li>Changed to index-based hints for inlining C functions. | ||
237 | Still no support in the backend for inlining.</li> | ||
238 | <li>Hardcode <tt>HEAP_CREATE_ENABLE_EXECUTE</tt> value if undefined.</li> | ||
239 | <li>Misc. changes to the <tt>jit.*</tt> modules.</li> | ||
240 | <li>Misc. changes to the Makefiles.</li> | ||
241 | <li>Lots of new docs.</li> | ||
242 | <li>Complete doc reorg.</li> | ||
243 | </ul> | ||
244 | <p> | ||
245 | Not released because Lua 5.1 alpha came out today. | ||
246 | </p> | ||
247 | |||
248 | <h2 id="LuaJIT-1.0.1">LuaJIT 1.0.1 — 2005-08-31</h2> | ||
249 | <ul> | ||
250 | <li>Missing GC step in <tt>OP_CONCAT</tt>.</li> | ||
251 | <li>Fix result handling for C –> JIT calls.</li> | ||
252 | <li>Detect CPU feature bits.</li> | ||
253 | <li>Encode conditional moves (<tt>fucomip</tt>) only when supported.</li> | ||
254 | <li>Add fallback instructions for FP compares.</li> | ||
255 | <li>Add support for <tt>LUA_COMPAT_VARARG</tt>. Still disabled by default.</li> | ||
256 | <li>MSVC needs a specific place for the <tt>CALLBACK</tt> attribute | ||
257 | (David Burgess).</li> | ||
258 | <li>Misc. doc updates.</li> | ||
259 | </ul> | ||
260 | <p> | ||
261 | Interim non-public release. | ||
262 | Special thanks to Adam D. Moss for reporting most of the bugs. | ||
263 | </p> | ||
264 | |||
265 | <h2 id="LuaJIT-1.0.0">LuaJIT 1.0.0 — 2005-08-29</h2> | ||
266 | <p> | ||
267 | This is the initial non-public release of LuaJIT. | ||
268 | </p> | ||
269 | </div> | ||
270 | <br class="flush"> | ||
271 | </div> | ||
272 | <div id="foot"> | ||
273 | <hr class="hide"> | ||
274 | Copyright © 2005-2009 Mike Pall | ||
275 | <span class="noprint"> | ||
276 | · | ||
277 | <a href="contact.html">Contact</a> | ||
278 | </span> | ||
279 | </div> | ||
280 | </body> | ||
281 | </html> | ||
diff --git a/doc/contact.html b/doc/contact.html new file mode 100644 index 00000000..36d5a825 --- /dev/null +++ b/doc/contact.html | |||
@@ -0,0 +1,84 @@ | |||
1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> | ||
2 | <html> | ||
3 | <head> | ||
4 | <title>Contact</title> | ||
5 | <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> | ||
6 | <meta name="Author" content="Mike Pall"> | ||
7 | <meta name="Copyright" content="Copyright (C) 2005-2009, Mike Pall"> | ||
8 | <meta name="Language" content="en"> | ||
9 | <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> | ||
10 | <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> | ||
11 | </head> | ||
12 | <body> | ||
13 | <div id="site"> | ||
14 | <a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> | ||
15 | </div> | ||
16 | <div id="head"> | ||
17 | <h1>Contact</h1> | ||
18 | </div> | ||
19 | <div id="nav"> | ||
20 | <ul><li> | ||
21 | <a href="luajit.html">LuaJIT</a> | ||
22 | <ul><li> | ||
23 | <a href="install.html">Installation</a> | ||
24 | </li><li> | ||
25 | <a href="running.html">Running</a> | ||
26 | </li><li> | ||
27 | <a href="api.html">API Extensions</a> | ||
28 | </li></ul> | ||
29 | </li><li> | ||
30 | <a href="status.html">Status</a> | ||
31 | <ul><li> | ||
32 | <a href="changes.html">Changes</a> | ||
33 | </li></ul> | ||
34 | </li><li> | ||
35 | <a href="faq.html">FAQ</a> | ||
36 | </li><li> | ||
37 | <a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> | ||
38 | </li></ul> | ||
39 | </div> | ||
40 | <div id="main"> | ||
41 | <p> | ||
42 | Please send general questions to the | ||
43 | <a href="http://www.lua.org/lua-l.html"><span class="ext">»</span> Lua mailing list</a>. | ||
44 | You can also send any questions you have directly to me: | ||
45 | </p> | ||
46 | |||
47 | <script type="text/javascript"> | ||
48 | <!-- | ||
49 | var xS="@-: .0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZa<b>cdefghijklmnopqrstuvwxyz" | ||
50 | function xD(s) | ||
51 | {var len=s.length;var r="";for(var i=0;i<len;i++) | ||
52 | {var c=s.charAt(i);var n=xS.indexOf(c);if(n!=-1) | ||
53 | c=xS.charAt(66-n);r+=c;} | ||
54 | document.write("<"+"p>"+r+"<"+"/p>\n");} | ||
55 | //--> | ||
56 | </script> | ||
57 | <script type="text/javascript"> | ||
58 | <!-- | ||
59 | xD("ewYKA7vu-EIwslx7 K9A.t41C") | ||
60 | //--></script> | ||
61 | <noscript> | ||
62 | <p><img src="img/contact.png" alt="Contact info in image" width="170" height="13"> | ||
63 | </p> | ||
64 | </noscript> | ||
65 | |||
66 | <h2>Copyright</h2> | ||
67 | <p> | ||
68 | All documentation is | ||
69 | Copyright © 2005-2009 Mike Pall. | ||
70 | </p> | ||
71 | |||
72 | |||
73 | <br class="flush"> | ||
74 | </div> | ||
75 | <div id="foot"> | ||
76 | <hr class="hide"> | ||
77 | Copyright © 2005-2009 Mike Pall | ||
78 | <span class="noprint"> | ||
79 | · | ||
80 | <a href="contact.html">Contact</a> | ||
81 | </span> | ||
82 | </div> | ||
83 | </body> | ||
84 | </html> | ||
diff --git a/doc/faq.html b/doc/faq.html new file mode 100644 index 00000000..6f62e1eb --- /dev/null +++ b/doc/faq.html | |||
@@ -0,0 +1,141 @@ | |||
1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> | ||
2 | <html> | ||
3 | <head> | ||
4 | <title>Frequently Asked Questions (FAQ)</title> | ||
5 | <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> | ||
6 | <meta name="Author" content="Mike Pall"> | ||
7 | <meta name="Copyright" content="Copyright (C) 2005-2009, Mike Pall"> | ||
8 | <meta name="Language" content="en"> | ||
9 | <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> | ||
10 | <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> | ||
11 | <style type="text/css"> | ||
12 | dd { margin-left: 1.5em; } | ||
13 | </style> | ||
14 | </head> | ||
15 | <body> | ||
16 | <div id="site"> | ||
17 | <a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> | ||
18 | </div> | ||
19 | <div id="head"> | ||
20 | <h1>Frequently Asked Questions (FAQ)</h1> | ||
21 | </div> | ||
22 | <div id="nav"> | ||
23 | <ul><li> | ||
24 | <a href="luajit.html">LuaJIT</a> | ||
25 | <ul><li> | ||
26 | <a href="install.html">Installation</a> | ||
27 | </li><li> | ||
28 | <a href="running.html">Running</a> | ||
29 | </li><li> | ||
30 | <a href="api.html">API Extensions</a> | ||
31 | </li></ul> | ||
32 | </li><li> | ||
33 | <a href="status.html">Status</a> | ||
34 | <ul><li> | ||
35 | <a href="changes.html">Changes</a> | ||
36 | </li></ul> | ||
37 | </li><li> | ||
38 | <a class="current" href="faq.html">FAQ</a> | ||
39 | </li><li> | ||
40 | <a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> | ||
41 | </li></ul> | ||
42 | </div> | ||
43 | <div id="main"> | ||
44 | <dl> | ||
45 | <dt>Q: Where can I learn more about Lua and LuaJIT?</dt> | ||
46 | <dd> | ||
47 | <ul style="padding: 0;"> | ||
48 | <li>The <a href="http://lua.org"><span class="ext">»</span> main Lua.org site</a> has complete | ||
49 | <a href="http://www.lua.org/docs.html"><span class="ext">»</span> documentation</a> of the language | ||
50 | and links to books and papers about Lua.</li> | ||
51 | <li>The community-managed <a href="http://lua-users.org/wiki/"><span class="ext">»</span> Lua Wiki</a> | ||
52 | has information about diverse topics.</li> | ||
53 | <li>The primary source of information for the latest developments surrounding | ||
54 | Lua is the <a href="http://www.lua.org/lua-l.html"><span class="ext">»</span> Lua mailing list</a>. | ||
55 | You can check out the <a href="http://lua-users.org/lists/lua-l/"><span class="ext">»</span> mailing | ||
56 | list archive</a> or | ||
57 | <a href="http://bazar2.conectiva.com.br/mailman/listinfo/lua"><span class="ext">»</span> subscribe</a> | ||
58 | to the list (you need to be subscribed before posting).<br> | ||
59 | This is also the place where announcements and discussions about LuaJIT | ||
60 | take place.</li> | ||
61 | </ul> | ||
62 | </dl> | ||
63 | |||
64 | <dl> | ||
65 | <dt>Q: Where can I learn more about the compiler technology used by LuaJIT?</dt> | ||
66 | <dd> | ||
67 | I'm planning to write more documentation about the internals of LuaJIT. | ||
68 | In the meantime, please use the following Google Scholar searches | ||
69 | to find relevant papers:<br> | ||
70 | Search for: <a href="http://scholar.google.com/scholar?q=Trace+Compiler"><span class="ext">»</span> Trace Compiler</a><br> | ||
71 | Search for: <a href="http://scholar.google.com/scholar?q=JIT+Compiler"><span class="ext">»</span> JIT Compiler</a><br> | ||
72 | Search for: <a href="http://scholar.google.com/scholar?q=Dynamic+Language+Optimizations"><span class="ext">»</span> Dynamic Language Optimizations</a><br> | ||
73 | Search for: <a href="http://scholar.google.com/scholar?q=SSA+Form"><span class="ext">»</span> SSA Form</a><br> | ||
74 | Search for: <a href="http://scholar.google.com/scholar?q=Linear+Scan+Register+Allocation"><span class="ext">»</span> Linear Scan Register Allocation</a><br> | ||
75 | And, you know, reading the source is of course the only way to enlightenment. :-) | ||
76 | </dd> | ||
77 | </dl> | ||
78 | |||
79 | <dl> | ||
80 | <dt>Q: Why do I get this error: "attempt to index global 'arg' (a nil value)"?<br> | ||
81 | Q: My vararg functions fail after switching to LuaJIT!</dt> | ||
82 | <dd>LuaJIT is compatible to the Lua 5.1 language standard. It doesn't | ||
83 | support the implicit <tt>arg</tt> parameter for old-style vararg | ||
84 | functions from Lua 5.0.<br>Please convert your code to the | ||
85 | <a href="http://www.lua.org/manual/5.1/manual.html#2.5.9"><span class="ext">»</span> Lua 5.1 | ||
86 | vararg syntax</a>.</dd> | ||
87 | </dl> | ||
88 | |||
89 | <dl> | ||
90 | <dt>Q: Sometimes Ctrl-C fails to stop my Lua program. Why?</dt> | ||
91 | <dd>The interrupt signal handler sets a Lua debug hook. But this is | ||
92 | currently ignored by compiled code (this will eventually be fixed). If | ||
93 | your program is running in a tight loop and never falls back to the | ||
94 | interpreter, the debug hook never runs and can't throw the | ||
95 | "interrupted!" error.<br> In the meantime you have to press Ctrl-C | ||
96 | twice to get stop your program. That's similar to when it's stuck | ||
97 | running inside a C function under the Lua interpreter.</dd> | ||
98 | </dl> | ||
99 | |||
100 | <dl> | ||
101 | <dt>Q: Why doesn't my favorite power-patch for Lua apply against LuaJIT?</dt> | ||
102 | <dd>Because it's a completely redesigned VM and has very little code | ||
103 | in common with Lua anymore. Also, if the patch introduces changes to | ||
104 | the Lua semantics, this would need to be reflected everywhere in the | ||
105 | VM, from the interpreter up to all stages of the compiler.<br> Please | ||
106 | use only standard Lua language constructs. For many common needs you | ||
107 | can use source transformations or use wrapper or proxy functions. | ||
108 | The compiler will happily optimize away such indirections.</dd> | ||
109 | </dl> | ||
110 | |||
111 | <dl> | ||
112 | <dt>Q: Lua runs everywhere. Why doesn't LuaJIT support my CPU?</dt> | ||
113 | <dd>Because it's a compiler — it needs to generate native | ||
114 | machine code. This means the code generator must be ported to each | ||
115 | architecture. And the fast interpreter is written in assembler and | ||
116 | must be ported, too. This is quite an undertaking.<br> Currently only | ||
117 | x86 CPUs are supported. x64 support is in the works. Other | ||
118 | architectures will follow with sufficient demand and/or | ||
119 | sponsoring.</dd> | ||
120 | </dl> | ||
121 | |||
122 | <dl> | ||
123 | <dt>Q: When will feature X be added? When will the next version be released?</dt> | ||
124 | <dd>When it's ready.<br> | ||
125 | C'mon, it's open source — I'm doing it on my own time and you're | ||
126 | getting it for free. You can either contribute a patch or sponsor | ||
127 | the development of certain features, if they are important to you. | ||
128 | </dd> | ||
129 | </dl> | ||
130 | <br class="flush"> | ||
131 | </div> | ||
132 | <div id="foot"> | ||
133 | <hr class="hide"> | ||
134 | Copyright © 2005-2009 Mike Pall | ||
135 | <span class="noprint"> | ||
136 | · | ||
137 | <a href="contact.html">Contact</a> | ||
138 | </span> | ||
139 | </div> | ||
140 | </body> | ||
141 | </html> | ||
diff --git a/doc/img/contact.png b/doc/img/contact.png new file mode 100644 index 00000000..9c73dc59 --- /dev/null +++ b/doc/img/contact.png | |||
Binary files differ | |||
diff --git a/doc/install.html b/doc/install.html new file mode 100644 index 00000000..b7211d21 --- /dev/null +++ b/doc/install.html | |||
@@ -0,0 +1,216 @@ | |||
1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> | ||
2 | <html> | ||
3 | <head> | ||
4 | <title>Installation</title> | ||
5 | <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> | ||
6 | <meta name="Author" content="Mike Pall"> | ||
7 | <meta name="Copyright" content="Copyright (C) 2005-2009, Mike Pall"> | ||
8 | <meta name="Language" content="en"> | ||
9 | <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> | ||
10 | <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> | ||
11 | </head> | ||
12 | <body> | ||
13 | <div id="site"> | ||
14 | <a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> | ||
15 | </div> | ||
16 | <div id="head"> | ||
17 | <h1>Installation</h1> | ||
18 | </div> | ||
19 | <div id="nav"> | ||
20 | <ul><li> | ||
21 | <a href="luajit.html">LuaJIT</a> | ||
22 | <ul><li> | ||
23 | <a class="current" href="install.html">Installation</a> | ||
24 | </li><li> | ||
25 | <a href="running.html">Running</a> | ||
26 | </li><li> | ||
27 | <a href="api.html">API Extensions</a> | ||
28 | </li></ul> | ||
29 | </li><li> | ||
30 | <a href="status.html">Status</a> | ||
31 | <ul><li> | ||
32 | <a href="changes.html">Changes</a> | ||
33 | </li></ul> | ||
34 | </li><li> | ||
35 | <a href="faq.html">FAQ</a> | ||
36 | </li><li> | ||
37 | <a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> | ||
38 | </li></ul> | ||
39 | </div> | ||
40 | <div id="main"> | ||
41 | <p> | ||
42 | LuaJIT is only distributed as a source package. This page explains | ||
43 | how to build and install LuaJIT with different operating systems | ||
44 | and C compilers. | ||
45 | </p> | ||
46 | <p> | ||
47 | For the impatient (on POSIX systems): | ||
48 | </p> | ||
49 | <pre class="code"> | ||
50 | make && sudo make install | ||
51 | </pre> | ||
52 | <p> | ||
53 | LuaJIT currently builds out-of-the box on all popular x86 systems | ||
54 | (Linux, Windows, OSX etc.). It builds and runs fine as a 32 bit | ||
55 | application under x64-based systems, too. | ||
56 | </p> | ||
57 | |||
58 | <h2>Configuring LuaJIT</h2> | ||
59 | <p> | ||
60 | The standard configuration should work fine for most installations. | ||
61 | Usually there is no need to tweak the settings, except when you want to | ||
62 | install to a non-standard path. The following three files hold all | ||
63 | user-configurable settings: | ||
64 | </p> | ||
65 | <ul> | ||
66 | <li><tt>src/luaconf.h</tt> sets some configuration variables, in | ||
67 | particular the default paths for loading modules.</li> | ||
68 | <li><tt>Makefile</tt> has settings for installing LuaJIT (POSIX | ||
69 | only).</li> | ||
70 | <li><tt>src/Makefile</tt> has settings for compiling LuaJIT under POSIX, | ||
71 | MinGW and Cygwin.</li> | ||
72 | <li><tt>src/msvcbuild.bat</tt> has settings for compiling LuaJIT with | ||
73 | MSVC.</li> | ||
74 | </ul> | ||
75 | <p> | ||
76 | Please read the instructions given in these files, before changing | ||
77 | any settings. | ||
78 | </p> | ||
79 | |||
80 | <h2 id="posix">POSIX Systems (Linux, OSX, *BSD etc.)</h2> | ||
81 | <h3>Prerequisites</h3> | ||
82 | <p> | ||
83 | Depending on your distribution, you may need to install a package for | ||
84 | GCC (GCC 3.4 or later required), the development headers and/or a | ||
85 | complete SDK. | ||
86 | </p> | ||
87 | <p> | ||
88 | E.g. on a current Debian/Ubuntu, install <tt>libc6-dev</tt> | ||
89 | with the package manager. Currently LuaJIT only builds as a 32 bit | ||
90 | application, so you actually need to install <tt>libc6-dev-i386</tt> | ||
91 | when building on an x64 OS. | ||
92 | </p> | ||
93 | <p> | ||
94 | Download the current source package (pick the .tar.gz), if you haven't | ||
95 | already done so. Move it to a directory of your choice, open a | ||
96 | terminal window and change to this directory. Now unpack the archive | ||
97 | and change to the newly created directory: | ||
98 | </p> | ||
99 | <pre class="code"> | ||
100 | tar zxf LuaJIT-2.0.0-beta1.tar.gz | ||
101 | cd LuaJIT-2.0.0-beta1 | ||
102 | </pre> | ||
103 | <h3>Building LuaJIT</h3> | ||
104 | <p> | ||
105 | The supplied Makefiles try to auto-detect the settings needed for your | ||
106 | operating system and your compiler. They need to be run with GNU Make, | ||
107 | which is probably the default on your system, anyway. Simply run: | ||
108 | </p> | ||
109 | <pre class="code"> | ||
110 | make | ||
111 | </pre> | ||
112 | <h3>Installing LuaJIT</h3> | ||
113 | <p> | ||
114 | The top-level Makefile installs LuaJIT by default under | ||
115 | <tt>/usr/local</tt>, i.e. the executable ends up in | ||
116 | <tt>/usr/local/bin</tt> and so on. You need to have root privileges | ||
117 | to write to this path. So, assuming sudo is installed on your system, | ||
118 | run the following command and enter your sudo password: | ||
119 | </p> | ||
120 | <pre class="code"> | ||
121 | sudo make install | ||
122 | </pre> | ||
123 | <p> | ||
124 | Otherwise specify the directory prefix as an absolute path, e.g.: | ||
125 | </p> | ||
126 | <pre class="code"> | ||
127 | sudo make install PREFIX=/opt/lj2 | ||
128 | </pre> | ||
129 | <p> | ||
130 | But note that the installation prefix and the prefix for the module paths | ||
131 | (configured in <tt>src/luaconf.h</tt>) must match. | ||
132 | </p> | ||
133 | <p style="color: #c00000;"> | ||
134 | Note: to avoid overwriting a previous version, the beta test releases | ||
135 | only install the LuaJIT executable under the versioned name (i.e. | ||
136 | <tt>luajit-2.0.0-beta1</tt>). You probably want to create a symlink | ||
137 | for convenience, with a command like this: | ||
138 | </p> | ||
139 | <pre class="code" style="color: #c00000;"> | ||
140 | sudo ln -sf luajit-2.0.0-beta1 /usr/local/bin/luajit | ||
141 | </pre> | ||
142 | |||
143 | <h2 id="windows">Windows Systems</h2> | ||
144 | <h3>Prerequisites</h3> | ||
145 | <p> | ||
146 | Either install one of the open source SDKs | ||
147 | (<a href="http://mingw.org/"><span class="ext">»</span> MinGW</a> or | ||
148 | <a href="http://www.cygwin.com/"><span class="ext">»</span> Cygwin</a>) which come with modified | ||
149 | versions of GCC plus the required development headers. | ||
150 | </p> | ||
151 | <p> | ||
152 | Or install Microsoft's Visual C++ (MSVC) — the freely downloadable | ||
153 | <a href="http://www.microsoft.com/Express/VC/"><span class="ext">»</span> Express Edition</a> | ||
154 | works just fine. | ||
155 | </p> | ||
156 | <p> | ||
157 | Next, download the source package and unpack it using an archive manager | ||
158 | (e.g. the Windows Explorer) to a directory of your choice. | ||
159 | </p> | ||
160 | <h3>Building with MSVC</h3> | ||
161 | <p> | ||
162 | Open a "Visual Studio .NET Command Prompt" and <tt>cd</tt> to the | ||
163 | directory where you've unpacked the sources. Then run this command: | ||
164 | </p> | ||
165 | <pre class="code"> | ||
166 | cd src | ||
167 | msvcbuild | ||
168 | </pre> | ||
169 | <p> | ||
170 | Then follow the installation instructions below. | ||
171 | </p> | ||
172 | <h3>Building with MinGW or Cygwin</h3> | ||
173 | <p> | ||
174 | Open a command prompt window and make sure the MinGW or Cygwin programs | ||
175 | are in your path. Then <tt>cd</tt> to the directory where | ||
176 | you've unpacked the sources and run this command for MinGW: | ||
177 | </p> | ||
178 | <pre class="code"> | ||
179 | cd src | ||
180 | mingw32-make | ||
181 | </pre> | ||
182 | <p> | ||
183 | Or this command for Cygwin: | ||
184 | </p> | ||
185 | <pre class="code"> | ||
186 | cd src | ||
187 | make | ||
188 | </pre> | ||
189 | <p> | ||
190 | Then follow the installation instructions below. | ||
191 | </p> | ||
192 | <h3>Installing LuaJIT</h3> | ||
193 | <p> | ||
194 | Copy <tt>luajit.exe</tt> and <tt>lua51.dll</tt> | ||
195 | to a newly created directory (any location is ok). Add <tt>lua</tt> | ||
196 | and <tt>lua\jit</tt> directories below it and copy all Lua files | ||
197 | from the <tt>lib</tt> directory of the distribution to the latter directory. | ||
198 | </p> | ||
199 | <p> | ||
200 | There are no hardcoded | ||
201 | absolute path names — all modules are loaded relative to the | ||
202 | directory where <tt>luajit.exe</tt> is installed | ||
203 | (see <tt>src/luaconf.h</tt>). | ||
204 | </p> | ||
205 | <br class="flush"> | ||
206 | </div> | ||
207 | <div id="foot"> | ||
208 | <hr class="hide"> | ||
209 | Copyright © 2005-2009 Mike Pall | ||
210 | <span class="noprint"> | ||
211 | · | ||
212 | <a href="contact.html">Contact</a> | ||
213 | </span> | ||
214 | </div> | ||
215 | </body> | ||
216 | </html> | ||
diff --git a/doc/luajit.html b/doc/luajit.html new file mode 100644 index 00000000..9b16ea37 --- /dev/null +++ b/doc/luajit.html | |||
@@ -0,0 +1,120 @@ | |||
1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> | ||
2 | <html> | ||
3 | <head> | ||
4 | <title>LuaJIT</title> | ||
5 | <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> | ||
6 | <meta name="Author" content="Mike Pall"> | ||
7 | <meta name="Copyright" content="Copyright (C) 2005-2009, Mike Pall"> | ||
8 | <meta name="Language" content="en"> | ||
9 | <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> | ||
10 | <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> | ||
11 | </head> | ||
12 | <body> | ||
13 | <div id="site"> | ||
14 | <a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> | ||
15 | </div> | ||
16 | <div id="head"> | ||
17 | <h1>LuaJIT</h1> | ||
18 | </div> | ||
19 | <div id="nav"> | ||
20 | <ul><li> | ||
21 | <a class="current" href="luajit.html">LuaJIT</a> | ||
22 | <ul><li> | ||
23 | <a href="install.html">Installation</a> | ||
24 | </li><li> | ||
25 | <a href="running.html">Running</a> | ||
26 | </li><li> | ||
27 | <a href="api.html">API Extensions</a> | ||
28 | </li></ul> | ||
29 | </li><li> | ||
30 | <a href="status.html">Status</a> | ||
31 | <ul><li> | ||
32 | <a href="changes.html">Changes</a> | ||
33 | </li></ul> | ||
34 | </li><li> | ||
35 | <a href="faq.html">FAQ</a> | ||
36 | </li><li> | ||
37 | <a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> | ||
38 | </li></ul> | ||
39 | </div> | ||
40 | <div id="main"> | ||
41 | <p> | ||
42 | LuaJIT is a <b>Just-In-Time Compiler</b> for the Lua<sup>*</sup> | ||
43 | programming language. | ||
44 | </p> | ||
45 | <p> | ||
46 | LuaJIT is Copyright © 2005-2008 Mike Pall. | ||
47 | LuaJIT is open source software, released under the | ||
48 | <a href="http://www.opensource.org/licenses/mit-license.php"><span class="ext">»</span> MIT/X license</a>. | ||
49 | </p> | ||
50 | <p class="indent" style="color: #606060;"> | ||
51 | * Lua is a powerful, dynamic and light-weight programming language | ||
52 | designed for extending applications. Lua is also frequently used as a | ||
53 | general-purpose, stand-alone language. More information about | ||
54 | Lua can be found at: <a href="http://www.lua.org/"><span class="ext">»</span> http://www.lua.org/</a> | ||
55 | </p> | ||
56 | <h2>Compatibility</h2> | ||
57 | <p> | ||
58 | LuaJIT implements the full set of language features defined by Lua 5.1. | ||
59 | The virtual machine (VM) is <b>API- and ABI-compatible</b> to the | ||
60 | standard Lua interpreter and can be deployed as a drop-in replacement. | ||
61 | </p> | ||
62 | <p> | ||
63 | LuaJIT offers more performance, at the expense of portability. It | ||
64 | currently runs on all popular operating systems based on <b>x86 CPUs</b> | ||
65 | (Linux, Windows, OSX etc.). It will be ported to x64 CPUs and other | ||
66 | platforms in the future, based on user demand and sponsoring. | ||
67 | </p> | ||
68 | |||
69 | <h2>Overview</h2> | ||
70 | <p> | ||
71 | LuaJIT has been successfully used as a <b>scripting middleware</b> in | ||
72 | games, 3D modellers, numerical simulations, trading platforms and many | ||
73 | other specialty applications. It combines high flexibility with high | ||
74 | performance and an unmatched <b>low memory footprint</b>: less than | ||
75 | <b>120K</b> for the VM plus less than <b>80K</b> for the JIT compiler. | ||
76 | </p> | ||
77 | <p> | ||
78 | LuaJIT has been in continuous development since 2005. It's widely | ||
79 | considered to be <b>one of the fastest dynamic language | ||
80 | implementations</b>. It has outperfomed other dynamic languages on many | ||
81 | cross-language benchmarks since its first release — often by a | ||
82 | substantial margin. Only now, in 2009, other dynamic language VMs are | ||
83 | starting to catch up with the performance of LuaJIT 1.x … | ||
84 | </p> | ||
85 | <p> | ||
86 | 2009 also marks the first release of the long-awaited <b>LuaJIT 2.0</b>. | ||
87 | The whole VM has been rewritten from the ground up and relentlessly | ||
88 | optimized for performance. It combines a high-speed interpreter, | ||
89 | written in assembler, with a state-of-the-art JIT compiler. | ||
90 | </p> | ||
91 | <p> | ||
92 | An innovative <b>trace compiler</b> is integrated with advanced, | ||
93 | SSA-based optimizations and a highly tuned code generation backend. This | ||
94 | allows a substantial reduction of the overhead associated with dynamic | ||
95 | language features. It's destined to break into the performance range | ||
96 | traditionally reserved for offline, static language compilers. | ||
97 | </p> | ||
98 | |||
99 | <h2>More ...</h2> | ||
100 | <p> | ||
101 | Click on the LuaJIT sub-topics in the navigation bar to learn more | ||
102 | about LuaJIT. | ||
103 | </p> | ||
104 | <p><p> | ||
105 | Click on the Logo in the upper left corner to visit | ||
106 | the LuaJIT project page on the web. All other links to online | ||
107 | resources are marked with a '<span class="ext">»</span>'. | ||
108 | </p> | ||
109 | <br class="flush"> | ||
110 | </div> | ||
111 | <div id="foot"> | ||
112 | <hr class="hide"> | ||
113 | Copyright © 2005-2009 Mike Pall | ||
114 | <span class="noprint"> | ||
115 | · | ||
116 | <a href="contact.html">Contact</a> | ||
117 | </span> | ||
118 | </div> | ||
119 | </body> | ||
120 | </html> | ||
diff --git a/doc/running.html b/doc/running.html new file mode 100644 index 00000000..db69578c --- /dev/null +++ b/doc/running.html | |||
@@ -0,0 +1,233 @@ | |||
1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> | ||
2 | <html> | ||
3 | <head> | ||
4 | <title>Running LuaJIT</title> | ||
5 | <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> | ||
6 | <meta name="Author" content="Mike Pall"> | ||
7 | <meta name="Copyright" content="Copyright (C) 2005-2009, Mike Pall"> | ||
8 | <meta name="Language" content="en"> | ||
9 | <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> | ||
10 | <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> | ||
11 | <style type="text/css"> | ||
12 | table.opt { | ||
13 | line-height: 1.2; | ||
14 | } | ||
15 | tr.opthead td { | ||
16 | font-weight: bold; | ||
17 | } | ||
18 | td.flag_name { | ||
19 | width: 4em; | ||
20 | } | ||
21 | td.flag_level { | ||
22 | width: 2em; | ||
23 | text-align: center; | ||
24 | } | ||
25 | td.param_name { | ||
26 | width: 6em; | ||
27 | } | ||
28 | td.param_default { | ||
29 | width: 4em; | ||
30 | text-align: right; | ||
31 | } | ||
32 | </style> | ||
33 | </head> | ||
34 | <body> | ||
35 | <div id="site"> | ||
36 | <a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> | ||
37 | </div> | ||
38 | <div id="head"> | ||
39 | <h1>Running LuaJIT</h1> | ||
40 | </div> | ||
41 | <div id="nav"> | ||
42 | <ul><li> | ||
43 | <a href="luajit.html">LuaJIT</a> | ||
44 | <ul><li> | ||
45 | <a href="install.html">Installation</a> | ||
46 | </li><li> | ||
47 | <a class="current" href="running.html">Running</a> | ||
48 | </li><li> | ||
49 | <a href="api.html">API Extensions</a> | ||
50 | </li></ul> | ||
51 | </li><li> | ||
52 | <a href="status.html">Status</a> | ||
53 | <ul><li> | ||
54 | <a href="changes.html">Changes</a> | ||
55 | </li></ul> | ||
56 | </li><li> | ||
57 | <a href="faq.html">FAQ</a> | ||
58 | </li><li> | ||
59 | <a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> | ||
60 | </li></ul> | ||
61 | </div> | ||
62 | <div id="main"> | ||
63 | <p> | ||
64 | LuaJIT has only a single stand-alone executable, called <tt>luajit</tt> on | ||
65 | POSIX systems or <tt>luajit.exe</tt> on Windows. It can be used to run simple | ||
66 | Lua statements or whole Lua applications from the command line. It has an | ||
67 | interactive mode, too. | ||
68 | </p> | ||
69 | <p class="indent" style="color: #c00000;"> | ||
70 | Note: the beta test releases only install under the versioned name on | ||
71 | POSIX systems (to avoid overwriting a previous version). You either need | ||
72 | to type <tt>luajit-2.0.0-beta1</tt> to start it or create a symlink | ||
73 | with a command like this: | ||
74 | </p> | ||
75 | <pre class="code" style="color: #c00000;"> | ||
76 | sudo ln -sf luajit-2.0.0-beta1 /usr/local/bin/luajit | ||
77 | </pre> | ||
78 | <p> | ||
79 | Unlike previous versions <b>optimization is turned on by default</b> in | ||
80 | LuaJIT 2.0!<br>It's no longer necessary to use <tt>luajit -O</tt>. | ||
81 | </p> | ||
82 | |||
83 | <h2 id="options">Command Line Options</h2> | ||
84 | <p> | ||
85 | The <tt>luajit</tt> stand-alone executable is just a slightly modified | ||
86 | version of the regular <tt>lua</tt> stand-alone executable. | ||
87 | It supports the same basic options, too. <tt>luajit -h</tt> | ||
88 | prints a short list of the available options. Please have a look at the | ||
89 | <a href="http://www.lua.org/manual/5.1/manual.html#6"><span class="ext">»</span> Lua manual</a> | ||
90 | for details. | ||
91 | </p> | ||
92 | <p> | ||
93 | Two additional options control the behavior of LuaJIT: | ||
94 | </p> | ||
95 | |||
96 | <h3 id="opt_j"><tt>-j cmd[=arg[,arg...]]</tt></h3> | ||
97 | <p> | ||
98 | This option performs a LuaJIT control command or activates one of the | ||
99 | loadable extension modules. The command is first looked up in the | ||
100 | <tt>jit.*</tt> library. If no matching function is found, a module | ||
101 | named <tt>jit.<cmd></tt> is loaded and the <tt>start()</tt> | ||
102 | function of the module is called with the specified arguments (if | ||
103 | any). The space between <tt>-j</tt> and <tt>cmd</tt> is optional. | ||
104 | </p> | ||
105 | <p> | ||
106 | Here are the available LuaJIT control commands: | ||
107 | </p> | ||
108 | <ul> | ||
109 | <li id="j_on"><tt>-jon</tt> — Turns the JIT compiler on (default).</li> | ||
110 | <li id="j_off"><tt>-joff</tt> — Turns the JIT compiler off (only use the interpreter).</li> | ||
111 | <li id="j_flush"><tt>-jflush</tt> — Flushes the whole cache of compiled code.</li> | ||
112 | <li id="j_v"><tt>-jv</tt> — Shows verbose information about the progress of the JIT compiler.</li> | ||
113 | <li id="j_dump"><tt>-jdump</tt> — Dumps the code and structures used in various compiler stages.</li> | ||
114 | </ul> | ||
115 | <p> | ||
116 | The <tt>-jv</tt> and <tt>-jdump</tt> commands are extension modules | ||
117 | written in Lua. They are mainly used for debugging the JIT compiler | ||
118 | itself. For a description of their options and output format, please | ||
119 | read the comment block at the start of their source. | ||
120 | They can be found in the <tt>lib</tt> directory of the source | ||
121 | distribution or installed under the <tt>jit</tt> directory. By default | ||
122 | this is <tt>/usr/local/share/luajit-2.0.0-beta1/jit</tt> on POSIX | ||
123 | systems. | ||
124 | </p> | ||
125 | |||
126 | <h3 id="opt_O"><tt>-O[level]</tt><br> | ||
127 | <tt>-O[+]flag</tt> <tt>-O-flag</tt><br> | ||
128 | <tt>-Oparam=value</tt></h3> | ||
129 | <p> | ||
130 | This options allows fine-tuned control of the optimizations used by | ||
131 | the JIT compiler. This is mainly intended for debugging LuaJIT itself. | ||
132 | Please note that the JIT compiler is extremly fast (we are talking | ||
133 | about the microsecond to millisecond range). Disabling optimizations | ||
134 | doesn't have any visible impact on its overhead, but usually generates | ||
135 | code that runs slower. | ||
136 | </p> | ||
137 | <p> | ||
138 | The first form sets an optimization level — this enables a | ||
139 | specific mix of optimization flags. <tt>-O0</tt> turns off all | ||
140 | optimizations and higher numbers enable more optimizations. Omitting | ||
141 | the level (i.e. just <tt>-O</tt>) sets the default optimization level, | ||
142 | which is <tt>-O3</tt> in the current version. | ||
143 | </p> | ||
144 | <p> | ||
145 | The second form adds or removes individual optimization flags. | ||
146 | The third form sets a parameter for the VM or the JIT compiler | ||
147 | to a specific value. | ||
148 | </p> | ||
149 | <p> | ||
150 | You can either use this option multiple times (like <tt>-Ocse | ||
151 | -O-dce -Ohotloop=10</tt>) or separate several settings with a comma | ||
152 | (like <tt>-O+cse,-dce,hotloop=10</tt>). The settings are applied from | ||
153 | left to right and later settings override earlier ones. You can freely | ||
154 | mix the three forms, but note that setting an optimization level | ||
155 | overrides all earlier flags. | ||
156 | </p> | ||
157 | <p> | ||
158 | Here are the available flags and at what optimization levels they | ||
159 | are enabled: | ||
160 | </p> | ||
161 | <table class="opt"> | ||
162 | <tr class="opthead"> | ||
163 | <td class="flag_name">Flag</td> | ||
164 | <td class="flag_level">-O1</td> | ||
165 | <td class="flag_level">-O2</td> | ||
166 | <td class="flag_level">-O3</td> | ||
167 | <td class="flag_desc"> </td> | ||
168 | </tr> | ||
169 | <tr class="odd separate"> | ||
170 | <td class="flag_name">fold</td><td class="flag_level">•</td><td class="flag_level">•</td><td class="flag_level">•</td><td class="flag_desc">Constant Folding, Simplifications and Reassociation</td></tr> | ||
171 | <tr class="even"> | ||
172 | <td class="flag_name">cse</td><td class="flag_level">•</td><td class="flag_level">•</td><td class="flag_level">•</td><td class="flag_desc">Common-Subexpression Elimination</td></tr> | ||
173 | <tr class="odd"> | ||
174 | <td class="flag_name">dce</td><td class="flag_level">•</td><td class="flag_level">•</td><td class="flag_level">•</td><td class="flag_desc">Dead-Code Elimination</td></tr> | ||
175 | <tr class="even"> | ||
176 | <td class="flag_name">narrow</td><td class="flag_level"> </td><td class="flag_level">•</td><td class="flag_level">•</td><td class="flag_desc">Narrowing of numbers to integers</td></tr> | ||
177 | <tr class="odd"> | ||
178 | <td class="flag_name">loop</td><td class="flag_level"> </td><td class="flag_level">•</td><td class="flag_level">•</td><td class="flag_desc">Loop Optimizations (code hoisting)</td></tr> | ||
179 | <tr class="even"> | ||
180 | <td class="flag_name">fwd</td><td class="flag_level"> </td><td class="flag_level"> </td><td class="flag_level">•</td><td class="flag_desc">Load Forwarding (L2L) and Store Forwarding (S2L)</td></tr> | ||
181 | <tr class="odd"> | ||
182 | <td class="flag_name">dse</td><td class="flag_level"> </td><td class="flag_level"> </td><td class="flag_level">•</td><td class="flag_desc">Dead-Store Elimination</td></tr> | ||
183 | <tr class="even"> | ||
184 | <td class="flag_name">fuse</td><td class="flag_level"> </td><td class="flag_level"> </td><td class="flag_level">•</td><td class="flag_desc">Fusion of operands into instructions</td></tr> | ||
185 | </table> | ||
186 | <p> | ||
187 | Here are the parameters and their default settings: | ||
188 | </p> | ||
189 | <table class="opt"> | ||
190 | <tr class="opthead"> | ||
191 | <td class="param_name">Parameter</td> | ||
192 | <td class="param_default">Default</td> | ||
193 | <td class="param_desc"> </td> | ||
194 | </tr> | ||
195 | <tr class="odd separate"> | ||
196 | <td class="param_name">maxtrace</td><td class="param_default">1000</td><td class="param_desc">Max. number of traces in the cache</td></tr> | ||
197 | <tr class="even"> | ||
198 | <td class="param_name">maxrecord</td><td class="param_default">2000</td><td class="param_desc">Max. number of recorded IR instructions</td></tr> | ||
199 | <tr class="odd"> | ||
200 | <td class="param_name">maxirconst</td><td class="param_default">500</td><td class="param_desc">Max. number of IR constants of a trace</td></tr> | ||
201 | <tr class="even"> | ||
202 | <td class="param_name">maxside</td><td class="param_default">100</td><td class="param_desc">Max. number of side traces of a root trace</td></tr> | ||
203 | <tr class="odd"> | ||
204 | <td class="param_name">maxsnap</td><td class="param_default">100</td><td class="param_desc">Max. number of snapshots for a trace</td></tr> | ||
205 | <tr class="even separate"> | ||
206 | <td class="param_name">hotloop</td><td class="param_default">57</td><td class="param_desc">Number of iterations to detect a hot loop</td></tr> | ||
207 | <tr class="odd"> | ||
208 | <td class="param_name">hotexit</td><td class="param_default">10</td><td class="param_desc">Number of taken exits to start a side trace</td></tr> | ||
209 | <tr class="even"> | ||
210 | <td class="param_name">tryside</td><td class="param_default">4</td><td class="param_desc">Number of attempts to compile a side trace</td></tr> | ||
211 | <tr class="odd separate"> | ||
212 | <td class="param_name">instunroll</td><td class="param_default">4</td><td class="param_desc">Max. unroll factor for instable loops</td></tr> | ||
213 | <tr class="even"> | ||
214 | <td class="param_name">loopunroll</td><td class="param_default">7</td><td class="param_desc">Max. unroll factor for loop ops in side traces</td></tr> | ||
215 | <tr class="odd"> | ||
216 | <td class="param_name">callunroll</td><td class="param_default">3</td><td class="param_desc">Max. unroll factor for pseudo-recursive calls</td></tr> | ||
217 | <tr class="even separate"> | ||
218 | <td class="param_name">sizemcode</td><td class="param_default">32</td><td class="param_desc">Size of each machine code area in KBytes (Windows: 64K)</td></tr> | ||
219 | <tr class="odd"> | ||
220 | <td class="param_name">maxmcode</td><td class="param_default">512</td><td class="param_desc">Max. total size of all machine code areas in KBytes</td></tr> | ||
221 | </table> | ||
222 | <br class="flush"> | ||
223 | </div> | ||
224 | <div id="foot"> | ||
225 | <hr class="hide"> | ||
226 | Copyright © 2005-2009 Mike Pall | ||
227 | <span class="noprint"> | ||
228 | · | ||
229 | <a href="contact.html">Contact</a> | ||
230 | </span> | ||
231 | </div> | ||
232 | </body> | ||
233 | </html> | ||
diff --git a/doc/status.html b/doc/status.html new file mode 100644 index 00000000..23c14c76 --- /dev/null +++ b/doc/status.html | |||
@@ -0,0 +1,235 @@ | |||
1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> | ||
2 | <html> | ||
3 | <head> | ||
4 | <title>Status & Roadmap</title> | ||
5 | <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> | ||
6 | <meta name="Author" content="Mike Pall"> | ||
7 | <meta name="Copyright" content="Copyright (C) 2005-2009, Mike Pall"> | ||
8 | <meta name="Language" content="en"> | ||
9 | <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> | ||
10 | <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> | ||
11 | <style type="text/css"> | ||
12 | ul li { padding-bottom: 0.3em; } | ||
13 | </style> | ||
14 | </head> | ||
15 | <body> | ||
16 | <div id="site"> | ||
17 | <a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> | ||
18 | </div> | ||
19 | <div id="head"> | ||
20 | <h1>Status & Roadmap</h1> | ||
21 | </div> | ||
22 | <div id="nav"> | ||
23 | <ul><li> | ||
24 | <a href="luajit.html">LuaJIT</a> | ||
25 | <ul><li> | ||
26 | <a href="install.html">Installation</a> | ||
27 | </li><li> | ||
28 | <a href="running.html">Running</a> | ||
29 | </li><li> | ||
30 | <a href="api.html">API Extensions</a> | ||
31 | </li></ul> | ||
32 | </li><li> | ||
33 | <a class="current" href="status.html">Status</a> | ||
34 | <ul><li> | ||
35 | <a href="changes.html">Changes</a> | ||
36 | </li></ul> | ||
37 | </li><li> | ||
38 | <a href="faq.html">FAQ</a> | ||
39 | </li><li> | ||
40 | <a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> | ||
41 | </li></ul> | ||
42 | </div> | ||
43 | <div id="main"> | ||
44 | <p> | ||
45 | The <span style="color: #0000c0;">LuaJIT 1.x</span> series represents | ||
46 | the current <span style="color: #0000c0;">stable branch</span>. As of | ||
47 | this writing there have been no open bugs since about a year. So, if | ||
48 | you need a rock-solid VM, you are encouraged to fetch the latest | ||
49 | release of LuaJIT 1.x from the <a href="http://luajit.org/download.html"><span class="ext">»</span> Download</a> | ||
50 | page. | ||
51 | </p> | ||
52 | <p> | ||
53 | <span style="color: #c00000;">LuaJIT 2.0</span> is the currently active | ||
54 | <span style="color: #c00000;">development branch</span>. | ||
55 | It has <b>Beta Test</b> status and is still undergoing | ||
56 | substantial changes. It's expected to quickly mature within the next | ||
57 | months. You should definitely start to evaluate it for new projects | ||
58 | right now. But deploying it in production environments is not yet | ||
59 | recommended. | ||
60 | </p> | ||
61 | |||
62 | <h2>Current Status</h2> | ||
63 | <p> | ||
64 | This is a list of the things you should know about the LuaJIT 2.0 beta test: | ||
65 | </p> | ||
66 | <ul> | ||
67 | <li> | ||
68 | The JIT compiler can only generate code for CPUs with <b>SSE2</b> at the | ||
69 | moment. I.e. you need at least a P4, Core 2/i5/i7 or K8/K10 to use it. I | ||
70 | plan to fix this during the beta phase and add support for emitting x87 | ||
71 | instructions to the backend. | ||
72 | </li> | ||
73 | <li> | ||
74 | Obviously there will be many <b>bugs</b> in a VM which has been | ||
75 | rewritten from the ground up. Please report your findings together with | ||
76 | the circumstances needed to reproduce the bug. If possible reduce the | ||
77 | problem down to a simple test cases.<br> | ||
78 | There is no formal bug tracker at the moment. The best place for | ||
79 | discussion is the | ||
80 | <a href="http://www.lua.org/lua-l.html"><span class="ext">»</span> Lua mailing list</a>. Of course | ||
81 | you may also send your bug report directly to me, especially when they | ||
82 | contains lengthy debug output. Please check the | ||
83 | <a href="contact.html">Contact</a> page for details. | ||
84 | </li> | ||
85 | <li> | ||
86 | The VM is complete in the sense that it <b>should</b> run all Lua code | ||
87 | just fine. It's considered a serious bug if the VM crashes or produces | ||
88 | unexpected results — please report it. There are only very few | ||
89 | known incompatibilities with standard Lua: | ||
90 | <ul> | ||
91 | <li> | ||
92 | The Lua <b>debug API</b> is missing a couple of features (call/return | ||
93 | hooks) and shows slightly different behavior (no per-coroutine hooks). | ||
94 | </li> | ||
95 | <li> | ||
96 | Most other issues you're likely to find (e.g. with the existing test | ||
97 | suites) are differences in the <b>implementation-defined</b> behavior. | ||
98 | These either have a good reason (like early tail call resolving which | ||
99 | may cause differences in error reporting), are arbitrary design choices | ||
100 | or are due to quirks in the VM. The latter cases may get fixed if a | ||
101 | demonstrable need is shown. | ||
102 | </li> | ||
103 | </ul> | ||
104 | </li> | ||
105 | <li> | ||
106 | The <b>JIT compiler</b> is not complete (yet) and falls back to the | ||
107 | interpreter in some cases. All of this works transparently, so unless | ||
108 | you use -jv, you'll probably never notice (the interpreter is quite | ||
109 | fast, too). Here are the known issues: | ||
110 | <ul> | ||
111 | <li> | ||
112 | Many known issues cause a <b>NYI</b> (not yet implemented) trace abort | ||
113 | message. E.g. for calls to vararg functions or many string library | ||
114 | functions. Reporting these is only mildly useful, except if you have good | ||
115 | example code that shows the problem. Obviously, reports accompanied with | ||
116 | a patch to fix the issue are more than welcome. But please check back | ||
117 | with me, before writing major improvements, to avoid duplication of | ||
118 | effort. | ||
119 | </li> | ||
120 | <li> | ||
121 | <b>Recursion</b> is not traced yet. Often no trace will be generated at | ||
122 | all or some unroll limit will catch it and aborts the trace. | ||
123 | </li> | ||
124 | <li> | ||
125 | The trace compiler currently does not back off specialization for | ||
126 | function call dispatch. It should really fall back to specializing on | ||
127 | the prototype, not the closure identity. This can lead to the so-called | ||
128 | "trace explosion" problem with <b>closure-heavy programming</b>. The | ||
129 | trace linking heuristics prevent this, but in the worst case this | ||
130 | means the code always falls back to the interpreter. | ||
131 | </li> | ||
132 | <li> | ||
133 | <b>Trace management</b> needs more tuning: better blacklisting of aborted | ||
134 | traces, less drastic countermeasures against trace explosion and better | ||
135 | heuristics in general. | ||
136 | </li> | ||
137 | <li> | ||
138 | Some checks are missing in the JIT-compiled code for obscure situations | ||
139 | with <b>open upvalues aliasing</b> one of the SSA slots later on (or | ||
140 | vice versa). Bonus points, if you can find a real world test case for | ||
141 | this. | ||
142 | </li> | ||
143 | </ul> | ||
144 | </li> | ||
145 | </ul> | ||
146 | |||
147 | <h2>Roadmap</h2> | ||
148 | <p> | ||
149 | Rather than stating exact release dates (I'm well known for making | ||
150 | spectacularly wrong guesses), this roadmap lists the general project | ||
151 | plan, sorted by priority, as well as ideas for the future: | ||
152 | </p> | ||
153 | <ul> | ||
154 | <li> | ||
155 | The main goal right now is to stabilize LuaJIT 2.0 and get it out of | ||
156 | beta test. <b>Correctness</b> has priority over completeness. This | ||
157 | implies the first stable release will certainly NOT compile every | ||
158 | library function call and will fall back to the interpreter from time | ||
159 | to time. This is perfectly ok, since it still executes all Lua code, | ||
160 | just not at the highest possible speed. | ||
161 | </li> | ||
162 | <li> | ||
163 | The next step is to get it to compile more library functions and handle | ||
164 | more cases where the compiler currently bails out. This doesn't mean it | ||
165 | will compile every corner case. It's much more important that it | ||
166 | performs well in a majority of use cases. Every compiler has to make | ||
167 | these trade-offs — <b>completeness</b> just cannot be the | ||
168 | overriding goal for a low-footprint, low-overhead JIT compiler. | ||
169 | </li> | ||
170 | <li> | ||
171 | More <b>optimizations</b> will be added in parallel to the last step on | ||
172 | an as-needed basis. Array-bounds-check (ABC) removal, sinking of stores | ||
173 | to aggregates and sinking of allocations are high on the list. Faster | ||
174 | handling of NEWREF and better alias analysis are desirable, too. More | ||
175 | complex optimizations with less pay-off, such as value-range-propagation | ||
176 | (VRP) will have to wait. | ||
177 | </li> | ||
178 | <li> | ||
179 | LuaJIT 2.0 has been designed with <b>portability</b> in mind. | ||
180 | Nonetheless, it compiles to native code and needs to be adapted to each | ||
181 | architecture. Porting the compiler backend is probably the easier task, | ||
182 | but a key element of its design is the fast interpreter, written in | ||
183 | machine-specific assembler.<br> | ||
184 | The code base and the internal structures are already prepared for | ||
185 | easier porting to 64 bit architectures. The most likely next target is a | ||
186 | port to <b>x64</b>, but this will have to wait until the x86 port | ||
187 | stabilizes. Other ports will follow — companies which are | ||
188 | interested in sponsoring a port to a particular architecture, please | ||
189 | <a href="contact.html">contact me</a>. | ||
190 | </li> | ||
191 | <li> | ||
192 | There are some planned <b>structural improvements</b> to the compiler, | ||
193 | like compressed snapshot maps or generic handling of calls to helper | ||
194 | methods. These are of lesser importance, unless other developments | ||
195 | elevate their priority. | ||
196 | </li> | ||
197 | <li> | ||
198 | <b>Documentation</b> about the <b>internals</b> of LuaJIT is still sorely | ||
199 | missing. Although the source code is included and is IMHO well | ||
200 | commented, many basic design decisions are in need of an explanation. | ||
201 | The rather un-traditional compiler architecture and the many highly | ||
202 | optimized data structures are a barrier for outside participation in | ||
203 | the development. Alas, as I've repeatedly stated, I'm better at | ||
204 | writing code than papers and I'm not in need of any academical merits. | ||
205 | Someday I will find the time for it. :-) | ||
206 | </li> | ||
207 | <li> | ||
208 | Producing good code for unbiased branches is a key problem for trace | ||
209 | compilers. This is the main cause for "trace explosion". | ||
210 | <b>Hyperblock scheduling</b> promises to solve this nicely at the | ||
211 | price of a major redesign of the compiler. This would also pave the | ||
212 | way for emitting predicated instructions, which is a prerequisite | ||
213 | for efficient <b>vectorization</b>. | ||
214 | </li> | ||
215 | <li> | ||
216 | Currently Lua is missing a standard library for access to <b>structured | ||
217 | binary data</b> and <b>arrays/buffers</b> holding low-level data types. | ||
218 | Allowing calls to arbitrary C functions (<b>FFI</b>) would obviate the | ||
219 | need to write manual bindings. A variety of extension modules is floating | ||
220 | around, with different scope and capabilities. Alas, none of them has been | ||
221 | designed with a JIT compiler in mind. | ||
222 | </li> | ||
223 | </ul> | ||
224 | <br class="flush"> | ||
225 | </div> | ||
226 | <div id="foot"> | ||
227 | <hr class="hide"> | ||
228 | Copyright © 2005-2009 Mike Pall | ||
229 | <span class="noprint"> | ||
230 | · | ||
231 | <a href="contact.html">Contact</a> | ||
232 | </span> | ||
233 | </div> | ||
234 | </body> | ||
235 | </html> | ||
diff --git a/dynasm/dasm_proto.h b/dynasm/dasm_proto.h new file mode 100644 index 00000000..94d9a9e2 --- /dev/null +++ b/dynasm/dasm_proto.h | |||
@@ -0,0 +1,69 @@ | |||
1 | /* | ||
2 | ** DynASM encoding engine prototypes. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. All rights reserved. | ||
4 | ** Released under the MIT/X license. See dynasm.lua for full copyright notice. | ||
5 | */ | ||
6 | |||
7 | #ifndef _DASM_PROTO_H | ||
8 | #define _DASM_PROTO_H | ||
9 | |||
10 | #include <stddef.h> | ||
11 | #include <stdarg.h> | ||
12 | |||
13 | #define DASM_IDENT "DynASM 1.2.1" | ||
14 | #define DASM_VERSION 10201 /* 1.2.1 */ | ||
15 | |||
16 | #ifndef Dst_DECL | ||
17 | #define Dst_DECL dasm_State *Dst | ||
18 | #endif | ||
19 | |||
20 | #ifndef Dst_GET | ||
21 | #define Dst_GET (Dst) | ||
22 | #endif | ||
23 | |||
24 | #ifndef DASM_FDEF | ||
25 | #define DASM_FDEF extern | ||
26 | #endif | ||
27 | |||
28 | |||
29 | /* Internal DynASM encoder state. */ | ||
30 | typedef struct dasm_State dasm_State; | ||
31 | |||
32 | /* Action list type. */ | ||
33 | typedef const unsigned char *dasm_ActList; | ||
34 | |||
35 | |||
36 | /* Initialize and free DynASM state. */ | ||
37 | DASM_FDEF void dasm_init(Dst_DECL, int maxsection); | ||
38 | DASM_FDEF void dasm_free(Dst_DECL); | ||
39 | |||
40 | /* Setup global array. Must be called before dasm_setup(). */ | ||
41 | DASM_FDEF void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl); | ||
42 | |||
43 | /* Grow PC label array. Can be called after dasm_setup(), too. */ | ||
44 | DASM_FDEF void dasm_growpc(Dst_DECL, unsigned int maxpc); | ||
45 | |||
46 | /* Setup encoder. */ | ||
47 | DASM_FDEF void dasm_setup(Dst_DECL, dasm_ActList actionlist); | ||
48 | |||
49 | /* Feed encoder with actions. Calls are generated by pre-processor. */ | ||
50 | DASM_FDEF void dasm_put(Dst_DECL, int start, ...); | ||
51 | |||
52 | /* Link sections and return the resulting size. */ | ||
53 | DASM_FDEF int dasm_link(Dst_DECL, size_t *szp); | ||
54 | |||
55 | /* Encode sections into buffer. */ | ||
56 | DASM_FDEF int dasm_encode(Dst_DECL, void *buffer); | ||
57 | |||
58 | /* Get PC label offset. */ | ||
59 | DASM_FDEF int dasm_getpclabel(Dst_DECL, unsigned int pc); | ||
60 | |||
61 | #ifdef DASM_CHECKS | ||
62 | /* Optional sanity checker to call between isolated encoding steps. */ | ||
63 | DASM_FDEF int dasm_checkstep(Dst_DECL, int secmatch); | ||
64 | #else | ||
65 | #define dasm_checkstep(a, b) 0 | ||
66 | #endif | ||
67 | |||
68 | |||
69 | #endif /* _DASM_PROTO_H */ | ||
diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h new file mode 100644 index 00000000..dab33e5a --- /dev/null +++ b/dynasm/dasm_x86.h | |||
@@ -0,0 +1,467 @@ | |||
1 | /* | ||
2 | ** DynASM x86 encoding engine. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. All rights reserved. | ||
4 | ** Released under the MIT/X license. See dynasm.lua for full copyright notice. | ||
5 | */ | ||
6 | |||
7 | #include <stddef.h> | ||
8 | #include <stdarg.h> | ||
9 | #include <string.h> | ||
10 | #include <stdlib.h> | ||
11 | |||
12 | #define DASM_ARCH "x86" | ||
13 | |||
14 | #ifndef DASM_EXTERN | ||
15 | #define DASM_EXTERN(a,b,c,d) 0 | ||
16 | #endif | ||
17 | |||
18 | /* Action definitions. DASM_STOP must be 255. */ | ||
19 | enum { | ||
20 | DASM_DISP = 233, | ||
21 | DASM_IMM_S, DASM_IMM_B, DASM_IMM_W, DASM_IMM_D, DASM_IMM_WB, DASM_IMM_DB, | ||
22 | DASM_VREG, DASM_SPACE, DASM_SETLABEL, DASM_REL_A, DASM_REL_LG, DASM_REL_PC, | ||
23 | DASM_IMM_LG, DASM_IMM_PC, DASM_LABEL_LG, DASM_LABEL_PC, DASM_ALIGN, | ||
24 | DASM_EXTERN, DASM_ESC, DASM_MARK, DASM_SECTION, DASM_STOP | ||
25 | }; | ||
26 | |||
27 | /* Maximum number of section buffer positions for a single dasm_put() call. */ | ||
28 | #define DASM_MAXSECPOS 25 | ||
29 | |||
30 | /* DynASM encoder status codes. Action list offset or number are or'ed in. */ | ||
31 | #define DASM_S_OK 0x00000000 | ||
32 | #define DASM_S_NOMEM 0x01000000 | ||
33 | #define DASM_S_PHASE 0x02000000 | ||
34 | #define DASM_S_MATCH_SEC 0x03000000 | ||
35 | #define DASM_S_RANGE_I 0x11000000 | ||
36 | #define DASM_S_RANGE_SEC 0x12000000 | ||
37 | #define DASM_S_RANGE_LG 0x13000000 | ||
38 | #define DASM_S_RANGE_PC 0x14000000 | ||
39 | #define DASM_S_RANGE_VREG 0x15000000 | ||
40 | #define DASM_S_UNDEF_L 0x21000000 | ||
41 | #define DASM_S_UNDEF_PC 0x22000000 | ||
42 | |||
43 | /* Macros to convert positions (8 bit section + 24 bit index). */ | ||
44 | #define DASM_POS2IDX(pos) ((pos)&0x00ffffff) | ||
45 | #define DASM_POS2BIAS(pos) ((pos)&0xff000000) | ||
46 | #define DASM_SEC2POS(sec) ((sec)<<24) | ||
47 | #define DASM_POS2SEC(pos) ((pos)>>24) | ||
48 | #define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) | ||
49 | |||
50 | /* Per-section structure. */ | ||
51 | typedef struct dasm_Section { | ||
52 | int *rbuf; /* Biased buffer pointer (negative section bias). */ | ||
53 | int *buf; /* True buffer pointer. */ | ||
54 | size_t bsize; /* Buffer size in bytes. */ | ||
55 | int pos; /* Biased buffer position. */ | ||
56 | int epos; /* End of biased buffer position - max single put. */ | ||
57 | int ofs; /* Byte offset into section. */ | ||
58 | } dasm_Section; | ||
59 | |||
60 | /* Core structure holding the DynASM encoding state. */ | ||
61 | struct dasm_State { | ||
62 | size_t psize; /* Allocated size of this structure. */ | ||
63 | dasm_ActList actionlist; /* Current actionlist pointer. */ | ||
64 | int *lglabels; /* Local/global chain/pos ptrs. */ | ||
65 | size_t lgsize; | ||
66 | int *pclabels; /* PC label chains/pos ptrs. */ | ||
67 | size_t pcsize; | ||
68 | void **globals; /* Array of globals (bias -10). */ | ||
69 | dasm_Section *section; /* Pointer to active section. */ | ||
70 | size_t codesize; /* Total size of all code sections. */ | ||
71 | int maxsection; /* 0 <= sectionidx < maxsection. */ | ||
72 | int status; /* Status code. */ | ||
73 | dasm_Section sections[1]; /* All sections. Alloc-extended. */ | ||
74 | }; | ||
75 | |||
76 | /* The size of the core structure depends on the max. number of sections. */ | ||
77 | #define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) | ||
78 | |||
79 | |||
80 | /* Initialize DynASM state. */ | ||
81 | void dasm_init(Dst_DECL, int maxsection) | ||
82 | { | ||
83 | dasm_State *D; | ||
84 | size_t psz = 0; | ||
85 | int i; | ||
86 | Dst_REF = NULL; | ||
87 | DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); | ||
88 | D = Dst_REF; | ||
89 | D->psize = psz; | ||
90 | D->lglabels = NULL; | ||
91 | D->lgsize = 0; | ||
92 | D->pclabels = NULL; | ||
93 | D->pcsize = 0; | ||
94 | D->globals = NULL; | ||
95 | D->maxsection = maxsection; | ||
96 | for (i = 0; i < maxsection; i++) { | ||
97 | D->sections[i].buf = NULL; /* Need this for pass3. */ | ||
98 | D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); | ||
99 | D->sections[i].bsize = 0; | ||
100 | D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ | ||
101 | } | ||
102 | } | ||
103 | |||
104 | /* Free DynASM state. */ | ||
105 | void dasm_free(Dst_DECL) | ||
106 | { | ||
107 | dasm_State *D = Dst_REF; | ||
108 | int i; | ||
109 | for (i = 0; i < D->maxsection; i++) | ||
110 | if (D->sections[i].buf) | ||
111 | DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); | ||
112 | if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); | ||
113 | if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); | ||
114 | DASM_M_FREE(Dst, D, D->psize); | ||
115 | } | ||
116 | |||
117 | /* Setup global label array. Must be called before dasm_setup(). */ | ||
118 | void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) | ||
119 | { | ||
120 | dasm_State *D = Dst_REF; | ||
121 | D->globals = gl - 10; /* Negative bias to compensate for locals. */ | ||
122 | DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); | ||
123 | } | ||
124 | |||
125 | /* Grow PC label array. Can be called after dasm_setup(), too. */ | ||
126 | void dasm_growpc(Dst_DECL, unsigned int maxpc) | ||
127 | { | ||
128 | dasm_State *D = Dst_REF; | ||
129 | size_t osz = D->pcsize; | ||
130 | DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); | ||
131 | memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); | ||
132 | } | ||
133 | |||
134 | /* Setup encoder. */ | ||
135 | void dasm_setup(Dst_DECL, dasm_ActList actionlist) | ||
136 | { | ||
137 | dasm_State *D = Dst_REF; | ||
138 | int i; | ||
139 | D->actionlist = actionlist; | ||
140 | D->status = DASM_S_OK; | ||
141 | D->section = &D->sections[0]; | ||
142 | memset((void *)D->lglabels, 0, D->lgsize); | ||
143 | if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); | ||
144 | for (i = 0; i < D->maxsection; i++) { | ||
145 | D->sections[i].pos = DASM_SEC2POS(i); | ||
146 | D->sections[i].ofs = 0; | ||
147 | } | ||
148 | } | ||
149 | |||
150 | |||
151 | #ifdef DASM_CHECKS | ||
152 | #define CK(x, st) \ | ||
153 | do { if (!(x)) { \ | ||
154 | D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) | ||
155 | #define CKPL(kind, st) \ | ||
156 | do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ | ||
157 | D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) | ||
158 | #else | ||
159 | #define CK(x, st) ((void)0) | ||
160 | #define CKPL(kind, st) ((void)0) | ||
161 | #endif | ||
162 | |||
163 | /* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ | ||
164 | void dasm_put(Dst_DECL, int start, ...) | ||
165 | { | ||
166 | va_list ap; | ||
167 | dasm_State *D = Dst_REF; | ||
168 | dasm_ActList p = D->actionlist + start; | ||
169 | dasm_Section *sec = D->section; | ||
170 | int pos = sec->pos, ofs = sec->ofs, mrm = 4; | ||
171 | int *b; | ||
172 | |||
173 | if (pos >= sec->epos) { | ||
174 | DASM_M_GROW(Dst, int, sec->buf, sec->bsize, | ||
175 | sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); | ||
176 | sec->rbuf = sec->buf - DASM_POS2BIAS(pos); | ||
177 | sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); | ||
178 | } | ||
179 | |||
180 | b = sec->rbuf; | ||
181 | b[pos++] = start; | ||
182 | |||
183 | va_start(ap, start); | ||
184 | while (1) { | ||
185 | int action = *p++; | ||
186 | if (action < DASM_DISP) { | ||
187 | ofs++; | ||
188 | } else if (action <= DASM_REL_A) { | ||
189 | int n = va_arg(ap, int); | ||
190 | b[pos++] = n; | ||
191 | switch (action) { | ||
192 | case DASM_DISP: | ||
193 | if (n == 0) { if ((mrm&7) == 4) mrm = p[-2]; if ((mrm&7) != 5) break; } | ||
194 | case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; | ||
195 | case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */ | ||
196 | case DASM_IMM_D: ofs += 4; break; | ||
197 | case DASM_IMM_S: CK(((n+128)&-256) == 0, RANGE_I); goto ob; | ||
198 | case DASM_IMM_B: CK((n&-256) == 0, RANGE_I); ob: ofs++; break; | ||
199 | case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob; | ||
200 | case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break; | ||
201 | case DASM_SPACE: p++; ofs += n; break; | ||
202 | case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */ | ||
203 | case DASM_VREG: CK((n&-8) == 0 && (n != 4 || (*p&1) == 0), RANGE_VREG); | ||
204 | if (*p++ == 1 && *p == DASM_DISP) mrm = n; continue; | ||
205 | } | ||
206 | mrm = 4; | ||
207 | } else { | ||
208 | int *pl, n; | ||
209 | switch (action) { | ||
210 | case DASM_REL_LG: | ||
211 | case DASM_IMM_LG: | ||
212 | n = *p++; pl = D->lglabels + n; | ||
213 | if (n <= 246) { CKPL(lg, LG); goto putrel; } /* Bkwd rel or global. */ | ||
214 | pl -= 246; n = *pl; | ||
215 | if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ | ||
216 | goto linkrel; | ||
217 | case DASM_REL_PC: | ||
218 | case DASM_IMM_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC); | ||
219 | putrel: | ||
220 | n = *pl; | ||
221 | if (n < 0) { /* Label exists. Get label pos and store it. */ | ||
222 | b[pos] = -n; | ||
223 | } else { | ||
224 | linkrel: | ||
225 | b[pos] = n; /* Else link to rel chain, anchored at label. */ | ||
226 | *pl = pos; | ||
227 | } | ||
228 | pos++; | ||
229 | ofs += 4; /* Maximum offset needed. */ | ||
230 | if (action == DASM_REL_LG || action == DASM_REL_PC) | ||
231 | b[pos++] = ofs; /* Store pass1 offset estimate. */ | ||
232 | break; | ||
233 | case DASM_LABEL_LG: pl = D->lglabels + *p++; CKPL(lg, LG); goto putlabel; | ||
234 | case DASM_LABEL_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC); | ||
235 | putlabel: | ||
236 | n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ | ||
237 | while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; } | ||
238 | *pl = -pos; /* Label exists now. */ | ||
239 | b[pos++] = ofs; /* Store pass1 offset estimate. */ | ||
240 | break; | ||
241 | case DASM_ALIGN: | ||
242 | ofs += *p++; /* Maximum alignment needed (arg is 2**n-1). */ | ||
243 | b[pos++] = ofs; /* Store pass1 offset estimate. */ | ||
244 | break; | ||
245 | case DASM_EXTERN: p += 2; ofs += 4; break; | ||
246 | case DASM_ESC: p++; ofs++; break; | ||
247 | case DASM_MARK: mrm = p[-2]; break; | ||
248 | case DASM_SECTION: | ||
249 | n = *p; CK(n < D->maxsection, RANGE_SEC); D->section = &D->sections[n]; | ||
250 | case DASM_STOP: goto stop; | ||
251 | } | ||
252 | } | ||
253 | } | ||
254 | stop: | ||
255 | va_end(ap); | ||
256 | sec->pos = pos; | ||
257 | sec->ofs = ofs; | ||
258 | } | ||
259 | #undef CK | ||
260 | |||
261 | /* Pass 2: Link sections, shrink branches/aligns, fix label offsets. */ | ||
262 | int dasm_link(Dst_DECL, size_t *szp) | ||
263 | { | ||
264 | dasm_State *D = Dst_REF; | ||
265 | int secnum; | ||
266 | int ofs = 0; | ||
267 | |||
268 | #ifdef DASM_CHECKS | ||
269 | *szp = 0; | ||
270 | if (D->status != DASM_S_OK) return D->status; | ||
271 | { | ||
272 | int pc; | ||
273 | for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) | ||
274 | if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; | ||
275 | } | ||
276 | #endif | ||
277 | |||
278 | { /* Handle globals not defined in this translation unit. */ | ||
279 | int idx; | ||
280 | for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { | ||
281 | int n = D->lglabels[idx]; | ||
282 | /* Undefined label: Collapse rel chain and replace with marker (< 0). */ | ||
283 | while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } | ||
284 | } | ||
285 | } | ||
286 | |||
287 | /* Combine all code sections. No support for data sections (yet). */ | ||
288 | for (secnum = 0; secnum < D->maxsection; secnum++) { | ||
289 | dasm_Section *sec = D->sections + secnum; | ||
290 | int *b = sec->rbuf; | ||
291 | int pos = DASM_SEC2POS(secnum); | ||
292 | int lastpos = sec->pos; | ||
293 | |||
294 | while (pos != lastpos) { | ||
295 | dasm_ActList p = D->actionlist + b[pos++]; | ||
296 | while (1) { | ||
297 | int op, action = *p++; | ||
298 | switch (action) { | ||
299 | case DASM_REL_LG: p++; op = p[-3]; goto rel_pc; | ||
300 | case DASM_REL_PC: op = p[-2]; rel_pc: { | ||
301 | int shrink = op == 0xe9 ? 3 : ((op&0xf0) == 0x80 ? 4 : 0); | ||
302 | if (shrink) { /* Shrinkable branch opcode? */ | ||
303 | int lofs, lpos = b[pos]; | ||
304 | if (lpos < 0) goto noshrink; /* Ext global? */ | ||
305 | lofs = *DASM_POS2PTR(D, lpos); | ||
306 | if (lpos > pos) { /* Fwd label: add cumulative section offsets. */ | ||
307 | int i; | ||
308 | for (i = secnum; i < DASM_POS2SEC(lpos); i++) | ||
309 | lofs += D->sections[i].ofs; | ||
310 | } else { | ||
311 | lofs -= ofs; /* Bkwd label: unfix offset. */ | ||
312 | } | ||
313 | lofs -= b[pos+1]; /* Short branch ok? */ | ||
314 | if (lofs >= -128-shrink && lofs <= 127) ofs -= shrink; /* Yes. */ | ||
315 | else { noshrink: shrink = 0; } /* No, cannot shrink op. */ | ||
316 | } | ||
317 | b[pos+1] = shrink; | ||
318 | pos += 2; | ||
319 | break; | ||
320 | } | ||
321 | case DASM_SPACE: case DASM_IMM_LG: case DASM_VREG: p++; | ||
322 | case DASM_DISP: case DASM_IMM_S: case DASM_IMM_B: case DASM_IMM_W: | ||
323 | case DASM_IMM_D: case DASM_IMM_WB: case DASM_IMM_DB: | ||
324 | case DASM_SETLABEL: case DASM_REL_A: case DASM_IMM_PC: pos++; break; | ||
325 | case DASM_LABEL_LG: p++; | ||
326 | case DASM_LABEL_PC: b[pos++] += ofs; break; /* Fix label offset. */ | ||
327 | case DASM_ALIGN: ofs -= (b[pos++]+ofs)&*p++; break; /* Adjust ofs. */ | ||
328 | case DASM_EXTERN: p += 2; break; | ||
329 | case DASM_ESC: p++; break; | ||
330 | case DASM_MARK: break; | ||
331 | case DASM_SECTION: case DASM_STOP: goto stop; | ||
332 | } | ||
333 | } | ||
334 | stop: (void)0; | ||
335 | } | ||
336 | ofs += sec->ofs; /* Next section starts right after current section. */ | ||
337 | } | ||
338 | |||
339 | D->codesize = ofs; /* Total size of all code sections */ | ||
340 | *szp = ofs; | ||
341 | return DASM_S_OK; | ||
342 | } | ||
343 | |||
344 | #define dasmb(x) *cp++ = (unsigned char)(x) | ||
345 | #ifndef DASM_ALIGNED_WRITES | ||
346 | #define dasmw(x) \ | ||
347 | do { *((unsigned short *)cp) = (unsigned short)(x); cp+=2; } while (0) | ||
348 | #define dasmd(x) \ | ||
349 | do { *((unsigned int *)cp) = (unsigned int)(x); cp+=4; } while (0) | ||
350 | #else | ||
351 | #define dasmw(x) do { dasmb(x); dasmb((x)>>8); } while (0) | ||
352 | #define dasmd(x) do { dasmw(x); dasmw((x)>>16); } while (0) | ||
353 | #endif | ||
354 | |||
355 | /* Pass 3: Encode sections. */ | ||
356 | int dasm_encode(Dst_DECL, void *buffer) | ||
357 | { | ||
358 | dasm_State *D = Dst_REF; | ||
359 | unsigned char *base = (unsigned char *)buffer; | ||
360 | unsigned char *cp = base; | ||
361 | int secnum; | ||
362 | |||
363 | /* Encode all code sections. No support for data sections (yet). */ | ||
364 | for (secnum = 0; secnum < D->maxsection; secnum++) { | ||
365 | dasm_Section *sec = D->sections + secnum; | ||
366 | int *b = sec->buf; | ||
367 | int *endb = sec->rbuf + sec->pos; | ||
368 | |||
369 | while (b != endb) { | ||
370 | dasm_ActList p = D->actionlist + *b++; | ||
371 | unsigned char *mark = NULL; | ||
372 | while (1) { | ||
373 | int action = *p++; | ||
374 | int n = (action >= DASM_DISP && action <= DASM_ALIGN) ? *b++ : 0; | ||
375 | switch (action) { | ||
376 | case DASM_DISP: if (!mark) mark = cp; { | ||
377 | unsigned char *mm = mark; | ||
378 | if (*p != DASM_IMM_DB && *p != DASM_IMM_WB) mark = NULL; | ||
379 | if (n == 0) { int mrm = mm[-1]&7; if (mrm == 4) mrm = mm[0]&7; | ||
380 | if (mrm != 5) { mm[-1] -= 0x80; break; } } | ||
381 | if (((n+128) & -256) != 0) goto wd; else mm[-1] -= 0x40; | ||
382 | } | ||
383 | case DASM_IMM_S: case DASM_IMM_B: wb: dasmb(n); break; | ||
384 | case DASM_IMM_DB: if (((n+128)&-256) == 0) { | ||
385 | db: if (!mark) mark = cp; mark[-2] += 2; mark = NULL; goto wb; | ||
386 | } else mark = NULL; | ||
387 | case DASM_IMM_D: wd: dasmd(n); break; | ||
388 | case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL; | ||
389 | case DASM_IMM_W: dasmw(n); break; | ||
390 | case DASM_VREG: { int t = *p++; if (t >= 2) n<<=3; cp[-1] |= n; break; } | ||
391 | case DASM_REL_LG: p++; if (n >= 0) goto rel_pc; | ||
392 | b++; n = (int)(ptrdiff_t)D->globals[-n]; | ||
393 | case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */ | ||
394 | case DASM_REL_PC: rel_pc: { | ||
395 | int shrink = *b++; | ||
396 | int *pb = DASM_POS2PTR(D, n); if (*pb < 0) { n = pb[1]; goto rel_a; } | ||
397 | n = *pb - ((int)(cp-base) + 4-shrink); | ||
398 | if (shrink == 0) goto wd; | ||
399 | if (shrink == 4) { cp--; cp[-1] = *cp-0x10; } else cp[-1] = 0xeb; | ||
400 | goto wb; | ||
401 | } | ||
402 | case DASM_IMM_LG: | ||
403 | p++; if (n < 0) { n = (int)(ptrdiff_t)D->globals[-n]; goto wd; } | ||
404 | case DASM_IMM_PC: { | ||
405 | int *pb = DASM_POS2PTR(D, n); | ||
406 | n = *pb < 0 ? pb[1] : (*pb + (int)(ptrdiff_t)base); | ||
407 | goto wd; | ||
408 | } | ||
409 | case DASM_LABEL_LG: { | ||
410 | int idx = *p++; | ||
411 | if (idx >= 10) | ||
412 | D->globals[idx] = (void *)(base + (*p == DASM_SETLABEL ? *b : n)); | ||
413 | break; | ||
414 | } | ||
415 | case DASM_LABEL_PC: case DASM_SETLABEL: break; | ||
416 | case DASM_SPACE: { int fill = *p++; while (n--) *cp++ = fill; break; } | ||
417 | case DASM_ALIGN: | ||
418 | n = *p++; | ||
419 | while (((cp-base) & n)) *cp++ = 0x90; /* nop */ | ||
420 | break; | ||
421 | case DASM_EXTERN: n = DASM_EXTERN(Dst, cp, p[1], *p); p += 2; goto wd; | ||
422 | case DASM_MARK: mark = cp; break; | ||
423 | case DASM_ESC: action = *p++; | ||
424 | default: *cp++ = action; break; | ||
425 | case DASM_SECTION: case DASM_STOP: goto stop; | ||
426 | } | ||
427 | } | ||
428 | stop: (void)0; | ||
429 | } | ||
430 | } | ||
431 | |||
432 | if (base + D->codesize != cp) /* Check for phase errors. */ | ||
433 | return DASM_S_PHASE; | ||
434 | return DASM_S_OK; | ||
435 | } | ||
436 | |||
437 | /* Get PC label offset. */ | ||
438 | int dasm_getpclabel(Dst_DECL, unsigned int pc) | ||
439 | { | ||
440 | dasm_State *D = Dst_REF; | ||
441 | if (pc*sizeof(int) < D->pcsize) { | ||
442 | int pos = D->pclabels[pc]; | ||
443 | if (pos < 0) return *DASM_POS2PTR(D, -pos); | ||
444 | if (pos > 0) return -1; /* Undefined. */ | ||
445 | } | ||
446 | return -2; /* Unused or out of range. */ | ||
447 | } | ||
448 | |||
449 | #ifdef DASM_CHECKS | ||
450 | /* Optional sanity checker to call between isolated encoding steps. */ | ||
451 | int dasm_checkstep(Dst_DECL, int secmatch) | ||
452 | { | ||
453 | dasm_State *D = Dst_REF; | ||
454 | if (D->status == DASM_S_OK) { | ||
455 | int i; | ||
456 | for (i = 1; i <= 9; i++) { | ||
457 | if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_L|i; break; } | ||
458 | D->lglabels[i] = 0; | ||
459 | } | ||
460 | } | ||
461 | if (D->status == DASM_S_OK && secmatch >= 0 && | ||
462 | D->section != &D->sections[secmatch]) | ||
463 | D->status = DASM_S_MATCH_SEC|(D->section-D->sections); | ||
464 | return D->status; | ||
465 | } | ||
466 | #endif | ||
467 | |||
diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua new file mode 100644 index 00000000..82210806 --- /dev/null +++ b/dynasm/dasm_x86.lua | |||
@@ -0,0 +1,1799 @@ | |||
1 | ------------------------------------------------------------------------------ | ||
2 | -- DynASM x86 module. | ||
3 | -- | ||
4 | -- Copyright (C) 2005-2009 Mike Pall. All rights reserved. | ||
5 | -- See dynasm.lua for full copyright notice. | ||
6 | ------------------------------------------------------------------------------ | ||
7 | |||
8 | -- Module information: | ||
9 | local _info = { | ||
10 | arch = "x86", | ||
11 | description = "DynASM x86 (i386) module", | ||
12 | version = "1.2.1", | ||
13 | vernum = 10201, | ||
14 | release = "2009-04-16", | ||
15 | author = "Mike Pall", | ||
16 | license = "MIT", | ||
17 | } | ||
18 | |||
19 | -- Exported glue functions for the arch-specific module. | ||
20 | local _M = { _info = _info } | ||
21 | |||
22 | -- Cache library functions. | ||
23 | local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs | ||
24 | local assert, unpack = assert, unpack | ||
25 | local _s = string | ||
26 | local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char | ||
27 | local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub | ||
28 | local concat, sort = table.concat, table.sort | ||
29 | local char, unpack = string.char, unpack | ||
30 | |||
31 | -- Inherited tables and callbacks. | ||
32 | local g_opt, g_arch | ||
33 | local wline, werror, wfatal, wwarn | ||
34 | |||
35 | -- Action name list. | ||
36 | -- CHECK: Keep this in sync with the C code! | ||
37 | local action_names = { | ||
38 | -- int arg, 1 buffer pos: | ||
39 | "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB", | ||
40 | -- action arg (1 byte), int arg, 1 buffer pos (reg/num): | ||
41 | "VREG", "SPACE", | ||
42 | -- ptrdiff_t arg, 1 buffer pos (address): !x64 | ||
43 | "SETLABEL", "REL_A", | ||
44 | -- action arg (1 byte) or int arg, 2 buffer pos (link, offset): | ||
45 | "REL_LG", "REL_PC", | ||
46 | -- action arg (1 byte) or int arg, 1 buffer pos (link): | ||
47 | "IMM_LG", "IMM_PC", | ||
48 | -- action arg (1 byte) or int arg, 1 buffer pos (offset): | ||
49 | "LABEL_LG", "LABEL_PC", | ||
50 | -- action arg (1 byte), 1 buffer pos (offset): | ||
51 | "ALIGN", | ||
52 | -- action args (2 bytes), no buffer pos. | ||
53 | "EXTERN", | ||
54 | -- action arg (1 byte), no buffer pos. | ||
55 | "ESC", | ||
56 | -- no action arg, no buffer pos. | ||
57 | "MARK", | ||
58 | -- action arg (1 byte), no buffer pos, terminal action: | ||
59 | "SECTION", | ||
60 | -- no args, no buffer pos, terminal action: | ||
61 | "STOP" | ||
62 | } | ||
63 | |||
64 | -- Maximum number of section buffer positions for dasm_put(). | ||
65 | -- CHECK: Keep this in sync with the C code! | ||
66 | local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. | ||
67 | |||
68 | -- Action name -> action number (dynamically generated below). | ||
69 | local map_action = {} | ||
70 | -- First action number. Everything below does not need to be escaped. | ||
71 | local actfirst = 256-#action_names | ||
72 | |||
73 | -- Action list buffer and string (only used to remove dupes). | ||
74 | local actlist = {} | ||
75 | local actstr = "" | ||
76 | |||
77 | -- Argument list for next dasm_put(). Start with offset 0 into action list. | ||
78 | local actargs = { 0 } | ||
79 | |||
80 | -- Current number of section buffer positions for dasm_put(). | ||
81 | local secpos = 1 | ||
82 | |||
83 | ------------------------------------------------------------------------------ | ||
84 | |||
85 | -- Compute action numbers for action names. | ||
86 | for n,name in ipairs(action_names) do | ||
87 | local num = actfirst + n - 1 | ||
88 | map_action[name] = num | ||
89 | end | ||
90 | |||
91 | -- Dump action names and numbers. | ||
92 | local function dumpactions(out) | ||
93 | out:write("DynASM encoding engine action codes:\n") | ||
94 | for n,name in ipairs(action_names) do | ||
95 | local num = map_action[name] | ||
96 | out:write(format(" %-10s %02X %d\n", name, num, num)) | ||
97 | end | ||
98 | out:write("\n") | ||
99 | end | ||
100 | |||
101 | -- Write action list buffer as a huge static C array. | ||
102 | local function writeactions(out, name) | ||
103 | local nn = #actlist | ||
104 | local last = actlist[nn] or 255 | ||
105 | actlist[nn] = nil -- Remove last byte. | ||
106 | if nn == 0 then nn = 1 end | ||
107 | out:write("static const unsigned char ", name, "[", nn, "] = {\n") | ||
108 | local s = " " | ||
109 | for n,b in ipairs(actlist) do | ||
110 | s = s..b.."," | ||
111 | if #s >= 75 then | ||
112 | assert(out:write(s, "\n")) | ||
113 | s = " " | ||
114 | end | ||
115 | end | ||
116 | out:write(s, last, "\n};\n\n") -- Add last byte back. | ||
117 | end | ||
118 | |||
119 | ------------------------------------------------------------------------------ | ||
120 | |||
121 | -- Add byte to action list. | ||
122 | local function wputxb(n) | ||
123 | assert(n >= 0 and n <= 255 and n % 1 == 0, "byte out of range") | ||
124 | actlist[#actlist+1] = n | ||
125 | end | ||
126 | |||
127 | -- Add action to list with optional arg. Advance buffer pos, too. | ||
128 | local function waction(action, a, num) | ||
129 | wputxb(assert(map_action[action], "bad action name `"..action.."'")) | ||
130 | if a then actargs[#actargs+1] = a end | ||
131 | if a or num then secpos = secpos + (num or 1) end | ||
132 | end | ||
133 | |||
134 | -- Add call to embedded DynASM C code. | ||
135 | local function wcall(func, args) | ||
136 | wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true) | ||
137 | end | ||
138 | |||
139 | -- Delete duplicate action list chunks. A tad slow, but so what. | ||
140 | local function dedupechunk(offset) | ||
141 | local al, as = actlist, actstr | ||
142 | local chunk = char(unpack(al, offset+1, #al)) | ||
143 | local orig = find(as, chunk, 1, true) | ||
144 | if orig then | ||
145 | actargs[1] = orig-1 -- Replace with original offset. | ||
146 | for i=offset+1,#al do al[i] = nil end -- Kill dupe. | ||
147 | else | ||
148 | actstr = as..chunk | ||
149 | end | ||
150 | end | ||
151 | |||
152 | -- Flush action list (intervening C code or buffer pos overflow). | ||
153 | local function wflush(term) | ||
154 | local offset = actargs[1] | ||
155 | if #actlist == offset then return end -- Nothing to flush. | ||
156 | if not term then waction("STOP") end -- Terminate action list. | ||
157 | dedupechunk(offset) | ||
158 | wcall("put", actargs) -- Add call to dasm_put(). | ||
159 | actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). | ||
160 | secpos = 1 -- The actionlist offset occupies a buffer position, too. | ||
161 | end | ||
162 | |||
163 | -- Put escaped byte. | ||
164 | local function wputb(n) | ||
165 | if n >= actfirst then waction("ESC") end -- Need to escape byte. | ||
166 | wputxb(n) | ||
167 | end | ||
168 | |||
169 | ------------------------------------------------------------------------------ | ||
170 | |||
171 | -- Global label name -> global label number. With auto assignment on 1st use. | ||
172 | local next_global = 10 | ||
173 | local map_global = setmetatable({}, { __index = function(t, name) | ||
174 | if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end | ||
175 | local n = next_global | ||
176 | if n > 246 then werror("too many global labels") end | ||
177 | next_global = n + 1 | ||
178 | t[name] = n | ||
179 | return n | ||
180 | end}) | ||
181 | |||
182 | -- Dump global labels. | ||
183 | local function dumpglobals(out, lvl) | ||
184 | local t = {} | ||
185 | for name, n in pairs(map_global) do t[n] = name end | ||
186 | out:write("Global labels:\n") | ||
187 | for i=10,next_global-1 do | ||
188 | out:write(format(" %s\n", t[i])) | ||
189 | end | ||
190 | out:write("\n") | ||
191 | end | ||
192 | |||
193 | -- Write global label enum. | ||
194 | local function writeglobals(out, prefix) | ||
195 | local t = {} | ||
196 | for name, n in pairs(map_global) do t[n] = name end | ||
197 | out:write("enum {\n") | ||
198 | for i=10,next_global-1 do | ||
199 | out:write(" ", prefix, t[i], ",\n") | ||
200 | end | ||
201 | out:write(" ", prefix, "_MAX\n};\n") | ||
202 | end | ||
203 | |||
204 | -- Write global label names. | ||
205 | local function writeglobalnames(out, name) | ||
206 | local t = {} | ||
207 | for name, n in pairs(map_global) do t[n] = name end | ||
208 | out:write("static const char *const ", name, "[] = {\n") | ||
209 | for i=10,next_global-1 do | ||
210 | out:write(" \"", t[i], "\",\n") | ||
211 | end | ||
212 | out:write(" (const char *)0\n};\n") | ||
213 | end | ||
214 | |||
215 | ------------------------------------------------------------------------------ | ||
216 | |||
217 | -- Extern label name -> extern label number. With auto assignment on 1st use. | ||
218 | local next_extern = -1 | ||
219 | local map_extern = setmetatable({}, { __index = function(t, name) | ||
220 | -- No restrictions on the name for now. | ||
221 | local n = next_extern | ||
222 | if n < -256 then werror("too many extern labels") end | ||
223 | next_extern = n - 1 | ||
224 | t[name] = n | ||
225 | return n | ||
226 | end}) | ||
227 | |||
228 | -- Dump extern labels. | ||
229 | local function dumpexterns(out, lvl) | ||
230 | local t = {} | ||
231 | for name, n in pairs(map_extern) do t[-n] = name end | ||
232 | out:write("Extern labels:\n") | ||
233 | for i=1,-next_extern-1 do | ||
234 | out:write(format(" %s\n", t[i])) | ||
235 | end | ||
236 | out:write("\n") | ||
237 | end | ||
238 | |||
239 | -- Write extern label names. | ||
240 | local function writeexternnames(out, name) | ||
241 | local t = {} | ||
242 | for name, n in pairs(map_extern) do t[-n] = name end | ||
243 | out:write("static const char *const ", name, "[] = {\n") | ||
244 | for i=1,-next_extern-1 do | ||
245 | out:write(" \"", t[i], "\",\n") | ||
246 | end | ||
247 | out:write(" (const char *)0\n};\n") | ||
248 | end | ||
249 | |||
250 | ------------------------------------------------------------------------------ | ||
251 | |||
252 | -- Arch-specific maps. | ||
253 | local map_archdef = {} -- Ext. register name -> int. name. | ||
254 | local map_reg_rev = {} -- Int. register name -> ext. name. | ||
255 | local map_reg_num = {} -- Int. register name -> register number. | ||
256 | local map_reg_opsize = {} -- Int. register name -> operand size. | ||
257 | local map_reg_valid_base = {} -- Int. register name -> valid base register? | ||
258 | local map_reg_valid_index = {} -- Int. register name -> valid index register? | ||
259 | local reg_list = {} -- Canonical list of int. register names. | ||
260 | |||
261 | local map_type = {} -- Type name -> { ctype, reg } | ||
262 | local ctypenum = 0 -- Type number (for _PTx macros). | ||
263 | |||
264 | local addrsize = "d" -- Size for address operands. !x64 | ||
265 | |||
266 | -- Helper function to fill register maps. | ||
267 | local function mkrmap(sz, cl, names) | ||
268 | local cname = format("@%s", sz) | ||
269 | reg_list[#reg_list+1] = cname | ||
270 | map_archdef[cl] = cname | ||
271 | map_reg_rev[cname] = cl | ||
272 | map_reg_num[cname] = -1 | ||
273 | map_reg_opsize[cname] = sz | ||
274 | if sz == addrsize then | ||
275 | map_reg_valid_base[cname] = true | ||
276 | map_reg_valid_index[cname] = true | ||
277 | end | ||
278 | for n,name in ipairs(names) do | ||
279 | local iname = format("@%s%x", sz, n-1) | ||
280 | reg_list[#reg_list+1] = iname | ||
281 | map_archdef[name] = iname | ||
282 | map_reg_rev[iname] = name | ||
283 | map_reg_num[iname] = n-1 | ||
284 | map_reg_opsize[iname] = sz | ||
285 | if sz == addrsize then | ||
286 | map_reg_valid_base[iname] = true | ||
287 | map_reg_valid_index[iname] = true | ||
288 | end | ||
289 | end | ||
290 | reg_list[#reg_list+1] = "" | ||
291 | end | ||
292 | |||
293 | -- Integer registers (dword, word and byte sized). | ||
294 | mkrmap("d", "Rd", {"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"}) | ||
295 | map_reg_valid_index[map_archdef.esp] = false | ||
296 | mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"}) | ||
297 | mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"}) | ||
298 | map_archdef["Ra"] = "@"..addrsize | ||
299 | |||
300 | -- FP registers (internally tword sized, but use "f" as operand size). | ||
301 | mkrmap("f", "Rf", {"st0", "st1", "st2", "st3", "st4", "st5", "st6", "st7"}) | ||
302 | |||
303 | -- SSE registers (oword sized, but qword and dword accessible). | ||
304 | mkrmap("o", "xmm", {"xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7"}) | ||
305 | |||
306 | -- Operand size prefixes to codes. | ||
307 | local map_opsize = { | ||
308 | byte = "b", word = "w", dword = "d", qword = "q", oword = "o", tword = "t", | ||
309 | aword = addrsize, | ||
310 | } | ||
311 | |||
312 | -- Operand size code to number. | ||
313 | local map_opsizenum = { | ||
314 | b = 1, w = 2, d = 4, q = 8, o = 16, t = 10, | ||
315 | } | ||
316 | |||
317 | -- Operand size code to name. | ||
318 | local map_opsizename = { | ||
319 | b = "byte", w = "word", d = "dword", q = "qword", o = "oword", t = "tword", | ||
320 | f = "fpword", | ||
321 | } | ||
322 | |||
323 | -- Valid index register scale factors. | ||
324 | local map_xsc = { | ||
325 | ["1"] = 0, ["2"] = 1, ["4"] = 2, ["8"] = 3, | ||
326 | } | ||
327 | |||
328 | -- Condition codes. | ||
329 | local map_cc = { | ||
330 | o = 0, no = 1, b = 2, nb = 3, e = 4, ne = 5, be = 6, nbe = 7, | ||
331 | s = 8, ns = 9, p = 10, np = 11, l = 12, nl = 13, le = 14, nle = 15, | ||
332 | c = 2, nae = 2, nc = 3, ae = 3, z = 4, nz = 5, na = 6, a = 7, | ||
333 | pe = 10, po = 11, nge = 12, ge = 13, ng = 14, g = 15, | ||
334 | } | ||
335 | |||
336 | |||
337 | -- Reverse defines for registers. | ||
338 | function _M.revdef(s) | ||
339 | return gsub(s, "@%w+", map_reg_rev) | ||
340 | end | ||
341 | |||
342 | -- Dump register names and numbers | ||
343 | local function dumpregs(out) | ||
344 | out:write("Register names, sizes and internal numbers:\n") | ||
345 | for _,reg in ipairs(reg_list) do | ||
346 | if reg == "" then | ||
347 | out:write("\n") | ||
348 | else | ||
349 | local name = map_reg_rev[reg] | ||
350 | local num = map_reg_num[reg] | ||
351 | local opsize = map_opsizename[map_reg_opsize[reg]] | ||
352 | out:write(format(" %-5s %-8s %s\n", name, opsize, | ||
353 | num < 0 and "(variable)" or num)) | ||
354 | end | ||
355 | end | ||
356 | end | ||
357 | |||
358 | ------------------------------------------------------------------------------ | ||
359 | |||
360 | -- Put action for label arg (IMM_LG, IMM_PC, REL_LG, REL_PC). | ||
361 | local function wputlabel(aprefix, imm, num) | ||
362 | if type(imm) == "number" then | ||
363 | if imm < 0 then | ||
364 | waction("EXTERN") | ||
365 | wputxb(aprefix == "IMM_" and 0 or 1) | ||
366 | imm = -imm-1 | ||
367 | else | ||
368 | waction(aprefix.."LG", nil, num); | ||
369 | end | ||
370 | wputxb(imm) | ||
371 | else | ||
372 | waction(aprefix.."PC", imm, num) | ||
373 | end | ||
374 | end | ||
375 | |||
376 | -- Put signed byte or arg. | ||
377 | local function wputsbarg(n) | ||
378 | if type(n) == "number" then | ||
379 | if n < -128 or n > 127 then | ||
380 | werror("signed immediate byte out of range") | ||
381 | end | ||
382 | if n < 0 then n = n + 256 end | ||
383 | wputb(n) | ||
384 | else waction("IMM_S", n) end | ||
385 | end | ||
386 | |||
387 | -- Put unsigned byte or arg. | ||
388 | local function wputbarg(n) | ||
389 | if type(n) == "number" then | ||
390 | if n < 0 or n > 255 then | ||
391 | werror("unsigned immediate byte out of range") | ||
392 | end | ||
393 | wputb(n) | ||
394 | else waction("IMM_B", n) end | ||
395 | end | ||
396 | |||
397 | -- Put unsigned word or arg. | ||
398 | local function wputwarg(n) | ||
399 | if type(n) == "number" then | ||
400 | if n < 0 or n > 65535 then | ||
401 | werror("unsigned immediate word out of range") | ||
402 | end | ||
403 | local r = n%256; n = (n-r)/256; wputb(r); wputb(n); | ||
404 | else waction("IMM_W", n) end | ||
405 | end | ||
406 | |||
407 | -- Put signed or unsigned dword or arg. | ||
408 | local function wputdarg(n) | ||
409 | local tn = type(n) | ||
410 | if tn == "number" then | ||
411 | if n < 0 then n = n + 4294967296 end | ||
412 | local r = n%256; n = (n-r)/256; wputb(r); | ||
413 | r = n%256; n = (n-r)/256; wputb(r); | ||
414 | r = n%256; n = (n-r)/256; wputb(r); wputb(n); | ||
415 | elseif tn == "table" then | ||
416 | wputlabel("IMM_", n[1], 1) | ||
417 | else | ||
418 | waction("IMM_D", n) | ||
419 | end | ||
420 | end | ||
421 | |||
422 | -- Put operand-size dependent number or arg (defaults to dword). | ||
423 | local function wputszarg(sz, n) | ||
424 | if not sz or sz == "d" then wputdarg(n) | ||
425 | elseif sz == "w" then wputwarg(n) | ||
426 | elseif sz == "b" then wputbarg(n) | ||
427 | elseif sz == "s" then wputsbarg(n) | ||
428 | else werror("bad operand size") end | ||
429 | end | ||
430 | |||
431 | -- Put multi-byte opcode with operand-size dependent modifications. | ||
432 | local function wputop(sz, op) | ||
433 | local r | ||
434 | if sz == "w" then wputb(102) end | ||
435 | -- Needs >32 bit numbers, but only for crc32 eax, word [ebx] | ||
436 | if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end | ||
437 | if op >= 16777216 then r = op % 16777216 wputb((op-r) / 16777216) op = r end | ||
438 | if op >= 65536 then r = op % 65536 wputb((op-r) / 65536) op = r end | ||
439 | if op >= 256 then r = op % 256 wputb((op-r) / 256) op = r end | ||
440 | if sz == "b" then op = op - 1 end | ||
441 | wputb(op) | ||
442 | end | ||
443 | |||
444 | -- Put ModRM or SIB formatted byte. | ||
445 | local function wputmodrm(m, s, rm, vs, vrm) | ||
446 | assert(m < 4 and s < 8 and rm < 8, "bad modrm operands") | ||
447 | wputb(64*m + 8*s + rm) | ||
448 | end | ||
449 | |||
450 | -- Put ModRM/SIB plus optional displacement. | ||
451 | local function wputmrmsib(t, imark, s, vsreg) | ||
452 | local vreg, vxreg | ||
453 | local reg, xreg = t.reg, t.xreg | ||
454 | if reg and reg < 0 then reg = 0; vreg = t.vreg end | ||
455 | if xreg and xreg < 0 then xreg = 0; vxreg = t.vxreg end | ||
456 | if s < 0 then s = 0 end | ||
457 | |||
458 | -- Register mode. | ||
459 | if sub(t.mode, 1, 1) == "r" then | ||
460 | wputmodrm(3, s, reg) | ||
461 | if vsreg then waction("VREG", vsreg); wputxb(2) end | ||
462 | if vreg then waction("VREG", vreg); wputxb(0) end | ||
463 | return | ||
464 | end | ||
465 | |||
466 | local disp = t.disp | ||
467 | local tdisp = type(disp) | ||
468 | -- No base register? | ||
469 | if not reg then | ||
470 | if xreg then | ||
471 | -- Indexed mode with index register only. | ||
472 | -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp) | ||
473 | wputmodrm(0, s, 4) | ||
474 | if imark then waction("MARK") end | ||
475 | if vsreg then waction("VREG", vsreg); wputxb(2) end | ||
476 | wputmodrm(t.xsc, xreg, 5) | ||
477 | if vxreg then waction("VREG", vxreg); wputxb(3) end | ||
478 | else | ||
479 | -- Pure displacement. | ||
480 | wputmodrm(0, s, 5) -- [disp] -> (0, s, ebp) | ||
481 | if imark then waction("MARK") end | ||
482 | if vsreg then waction("VREG", vsreg); wputxb(2) end | ||
483 | end | ||
484 | wputdarg(disp) | ||
485 | return | ||
486 | end | ||
487 | |||
488 | local m | ||
489 | if tdisp == "number" then -- Check displacement size at assembly time. | ||
490 | if disp == 0 and reg ~= 5 then -- [ebp] -> [ebp+0] (in SIB, too) | ||
491 | if not vreg then m = 0 end -- Force DISP to allow [Rd(5)] -> [ebp+0] | ||
492 | elseif disp >= -128 and disp <= 127 then m = 1 | ||
493 | else m = 2 end | ||
494 | elseif tdisp == "table" then | ||
495 | m = 2 | ||
496 | end | ||
497 | |||
498 | -- Index register present or esp as base register: need SIB encoding. | ||
499 | if xreg or reg == 4 then | ||
500 | wputmodrm(m or 2, s, 4) -- ModRM. | ||
501 | if m == nil or imark then waction("MARK") end | ||
502 | if vsreg then waction("VREG", vsreg); wputxb(2) end | ||
503 | wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB. | ||
504 | if vxreg then waction("VREG", vxreg); wputxb(3) end | ||
505 | if vreg then waction("VREG", vreg); wputxb(1) end | ||
506 | else | ||
507 | wputmodrm(m or 2, s, reg) -- ModRM. | ||
508 | if (imark and (m == 1 or m == 2)) or | ||
509 | (m == nil and (vsreg or vreg)) then waction("MARK") end | ||
510 | if vsreg then waction("VREG", vsreg); wputxb(2) end | ||
511 | if vreg then waction("VREG", vreg); wputxb(1) end | ||
512 | end | ||
513 | |||
514 | -- Put displacement. | ||
515 | if m == 1 then wputsbarg(disp) | ||
516 | elseif m == 2 then wputdarg(disp) | ||
517 | elseif m == nil then waction("DISP", disp) end | ||
518 | end | ||
519 | |||
520 | ------------------------------------------------------------------------------ | ||
521 | |||
522 | -- Return human-readable operand mode string. | ||
523 | local function opmodestr(op, args) | ||
524 | local m = {} | ||
525 | for i=1,#args do | ||
526 | local a = args[i] | ||
527 | m[#m+1] = sub(a.mode, 1, 1)..(a.opsize or "?") | ||
528 | end | ||
529 | return op.." "..concat(m, ",") | ||
530 | end | ||
531 | |||
532 | -- Convert number to valid integer or nil. | ||
533 | local function toint(expr) | ||
534 | local n = tonumber(expr) | ||
535 | if n then | ||
536 | if n % 1 ~= 0 or n < -2147483648 or n > 4294967295 then | ||
537 | werror("bad integer number `"..expr.."'") | ||
538 | end | ||
539 | return n | ||
540 | end | ||
541 | end | ||
542 | |||
543 | -- Parse immediate expression. | ||
544 | local function immexpr(expr) | ||
545 | -- &expr (pointer) | ||
546 | if sub(expr, 1, 1) == "&" then | ||
547 | return "iPJ", format("(ptrdiff_t)(%s)", sub(expr,2)) | ||
548 | end | ||
549 | |||
550 | local prefix = sub(expr, 1, 2) | ||
551 | -- =>expr (pc label reference) | ||
552 | if prefix == "=>" then | ||
553 | return "iJ", sub(expr, 3) | ||
554 | end | ||
555 | -- ->name (global label reference) | ||
556 | if prefix == "->" then | ||
557 | return "iJ", map_global[sub(expr, 3)] | ||
558 | end | ||
559 | |||
560 | -- [<>][1-9] (local label reference) | ||
561 | local dir, lnum = match(expr, "^([<>])([1-9])$") | ||
562 | if dir then -- Fwd: 247-255, Bkwd: 1-9. | ||
563 | return "iJ", lnum + (dir == ">" and 246 or 0) | ||
564 | end | ||
565 | |||
566 | local extname = match(expr, "^extern%s+(%S+)$") | ||
567 | if extname then | ||
568 | return "iJ", map_extern[extname] | ||
569 | end | ||
570 | |||
571 | -- expr (interpreted as immediate) | ||
572 | return "iI", expr | ||
573 | end | ||
574 | |||
575 | -- Parse displacement expression: +-num, +-expr, +-opsize*num | ||
576 | local function dispexpr(expr) | ||
577 | local disp = expr == "" and 0 or toint(expr) | ||
578 | if disp then return disp end | ||
579 | local c, dispt = match(expr, "^([+-])%s*(.+)$") | ||
580 | if c == "+" then | ||
581 | expr = dispt | ||
582 | elseif not c then | ||
583 | werror("bad displacement expression `"..expr.."'") | ||
584 | end | ||
585 | local opsize, tailops = match(dispt, "^(%w+)%s*%*%s*(.+)$") | ||
586 | local ops, imm = map_opsize[opsize], toint(tailops) | ||
587 | if ops and imm then | ||
588 | if c == "-" then imm = -imm end | ||
589 | return imm*map_opsizenum[ops] | ||
590 | end | ||
591 | local mode, iexpr = immexpr(dispt) | ||
592 | if mode == "iJ" then | ||
593 | if c == "-" then werror("cannot invert label reference") end | ||
594 | return { iexpr } | ||
595 | end | ||
596 | return expr -- Need to return original signed expression. | ||
597 | end | ||
598 | |||
599 | -- Parse register or type expression. | ||
600 | local function rtexpr(expr) | ||
601 | if not expr then return end | ||
602 | local tname, ovreg = match(expr, "^([%w_]+):(@[%w_]+)$") | ||
603 | local tp = map_type[tname or expr] | ||
604 | if tp then | ||
605 | local reg = ovreg or tp.reg | ||
606 | local rnum = map_reg_num[reg] | ||
607 | if not rnum then | ||
608 | werror("type `"..(tname or expr).."' needs a register override") | ||
609 | end | ||
610 | if not map_reg_valid_base[reg] then | ||
611 | werror("bad base register override `"..(map_reg_rev[reg] or reg).."'") | ||
612 | end | ||
613 | return reg, rnum, tp | ||
614 | end | ||
615 | return expr, map_reg_num[expr] | ||
616 | end | ||
617 | |||
618 | -- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }. | ||
619 | local function parseoperand(param) | ||
620 | local t = {} | ||
621 | |||
622 | local expr = param | ||
623 | local opsize, tailops = match(param, "^(%w+)%s*(.+)$") | ||
624 | if opsize then | ||
625 | t.opsize = map_opsize[opsize] | ||
626 | if t.opsize then expr = tailops end | ||
627 | end | ||
628 | |||
629 | local br = match(expr, "^%[%s*(.-)%s*%]$") | ||
630 | repeat | ||
631 | if br then | ||
632 | t.mode = "xm" | ||
633 | |||
634 | -- [disp] | ||
635 | t.disp = toint(br) | ||
636 | if t.disp then | ||
637 | t.mode = "xmO" | ||
638 | break | ||
639 | end | ||
640 | |||
641 | -- [reg...] | ||
642 | local tp | ||
643 | local reg, tailr = match(br, "^([@%w_:]+)%s*(.*)$") | ||
644 | reg, t.reg, tp = rtexpr(reg) | ||
645 | if not t.reg then | ||
646 | -- [expr] | ||
647 | t.mode = "xmO" | ||
648 | t.disp = dispexpr("+"..br) | ||
649 | break | ||
650 | end | ||
651 | |||
652 | if t.reg == -1 then | ||
653 | t.vreg, tailr = match(tailr, "^(%b())(.*)$") | ||
654 | if not t.vreg then werror("bad variable register expression") end | ||
655 | end | ||
656 | |||
657 | -- [xreg*xsc] or [xreg*xsc+-disp] or [xreg*xsc+-expr] | ||
658 | local xsc, tailsc = match(tailr, "^%*%s*([1248])%s*(.*)$") | ||
659 | if xsc then | ||
660 | if not map_reg_valid_index[reg] then | ||
661 | werror("bad index register `"..map_reg_rev[reg].."'") | ||
662 | end | ||
663 | t.xsc = map_xsc[xsc] | ||
664 | t.xreg = t.reg | ||
665 | t.vxreg = t.vreg | ||
666 | t.reg = nil | ||
667 | t.vreg = nil | ||
668 | t.disp = dispexpr(tailsc) | ||
669 | break | ||
670 | end | ||
671 | if not map_reg_valid_base[reg] then | ||
672 | werror("bad base register `"..map_reg_rev[reg].."'") | ||
673 | end | ||
674 | |||
675 | -- [reg] or [reg+-disp] | ||
676 | t.disp = toint(tailr) or (tailr == "" and 0) | ||
677 | if t.disp then break end | ||
678 | |||
679 | -- [reg+xreg...] | ||
680 | local xreg, tailx = match(tailr, "^+%s*([@%w_:]+)%s*(.*)$") | ||
681 | xreg, t.xreg, tp = rtexpr(xreg) | ||
682 | if not t.xreg then | ||
683 | -- [reg+-expr] | ||
684 | t.disp = dispexpr(tailr) | ||
685 | break | ||
686 | end | ||
687 | if not map_reg_valid_index[xreg] then | ||
688 | werror("bad index register `"..map_reg_rev[xreg].."'") | ||
689 | end | ||
690 | |||
691 | if t.xreg == -1 then | ||
692 | t.vxreg, tailx = match(tailx, "^(%b())(.*)$") | ||
693 | if not t.vxreg then werror("bad variable register expression") end | ||
694 | end | ||
695 | |||
696 | -- [reg+xreg*xsc...] | ||
697 | local xsc, tailsc = match(tailx, "^%*%s*([1248])%s*(.*)$") | ||
698 | if xsc then | ||
699 | t.xsc = map_xsc[xsc] | ||
700 | tailx = tailsc | ||
701 | end | ||
702 | |||
703 | -- [...] or [...+-disp] or [...+-expr] | ||
704 | t.disp = dispexpr(tailx) | ||
705 | else | ||
706 | -- imm or opsize*imm | ||
707 | local imm = toint(expr) | ||
708 | if not imm and sub(expr, 1, 1) == "*" and t.opsize then | ||
709 | imm = toint(sub(expr, 2)) | ||
710 | if imm then | ||
711 | imm = imm * map_opsizenum[t.opsize] | ||
712 | t.opsize = nil | ||
713 | end | ||
714 | end | ||
715 | if imm then | ||
716 | if t.opsize then werror("bad operand size override") end | ||
717 | local m = "i" | ||
718 | if imm == 1 then m = m.."1" end | ||
719 | if imm >= 4294967168 and imm <= 4294967295 then imm = imm-4294967296 end | ||
720 | if imm >= -128 and imm <= 127 then m = m.."S" end | ||
721 | t.imm = imm | ||
722 | t.mode = m | ||
723 | break | ||
724 | end | ||
725 | |||
726 | local tp | ||
727 | local reg, tailr = match(expr, "^([@%w_:]+)%s*(.*)$") | ||
728 | reg, t.reg, tp = rtexpr(reg) | ||
729 | if t.reg then | ||
730 | if t.reg == -1 then | ||
731 | t.vreg, tailr = match(tailr, "^(%b())(.*)$") | ||
732 | if not t.vreg then werror("bad variable register expression") end | ||
733 | end | ||
734 | -- reg | ||
735 | if tailr == "" then | ||
736 | if t.opsize then werror("bad operand size override") end | ||
737 | t.opsize = map_reg_opsize[reg] | ||
738 | if t.opsize == "f" then | ||
739 | t.mode = t.reg == 0 and "fF" or "f" | ||
740 | else | ||
741 | if reg == "@w4" then wwarn("bad idea, try again with `esp'") end | ||
742 | t.mode = t.reg == 0 and "rmR" or (reg == "@b1" and "rmC" or "rm") | ||
743 | end | ||
744 | break | ||
745 | end | ||
746 | |||
747 | -- type[idx], type[idx].field, type->field -> [reg+offset_expr] | ||
748 | if not tp then werror("bad operand `"..param.."'") end | ||
749 | t.mode = "xm" | ||
750 | t.disp = format(tp.ctypefmt, tailr) | ||
751 | else | ||
752 | t.mode, t.imm = immexpr(expr) | ||
753 | if sub(t.mode, -1) == "J" then | ||
754 | if t.opsize and t.opsize ~= addrsize then | ||
755 | werror("bad operand size override") | ||
756 | end | ||
757 | t.opsize = addrsize | ||
758 | end | ||
759 | end | ||
760 | end | ||
761 | until true | ||
762 | return t | ||
763 | end | ||
764 | |||
765 | ------------------------------------------------------------------------------ | ||
766 | -- x86 Template String Description | ||
767 | -- =============================== | ||
768 | -- | ||
769 | -- Each template string is a list of [match:]pattern pairs, | ||
770 | -- separated by "|". The first match wins. No match means a | ||
771 | -- bad or unsupported combination of operand modes or sizes. | ||
772 | -- | ||
773 | -- The match part and the ":" is omitted if the operation has | ||
774 | -- no operands. Otherwise the first N characters are matched | ||
775 | -- against the mode strings of each of the N operands. | ||
776 | -- | ||
777 | -- The mode string for each operand type is (see parseoperand()): | ||
778 | -- Integer register: "rm", +"R" for eax, ax, al, +"C" for cl | ||
779 | -- FP register: "f", +"F" for st0 | ||
780 | -- Index operand: "xm", +"O" for [disp] (pure offset) | ||
781 | -- Immediate: "i", +"S" for signed 8 bit, +"1" for 1, | ||
782 | -- +"I" for arg, +"P" for pointer | ||
783 | -- Any: +"J" for valid jump targets | ||
784 | -- | ||
785 | -- So a match character "m" (mixed) matches both an integer register | ||
786 | -- and an index operand (to be encoded with the ModRM/SIB scheme). | ||
787 | -- But "r" matches only a register and "x" only an index operand | ||
788 | -- (e.g. for FP memory access operations). | ||
789 | -- | ||
790 | -- The operand size match string starts right after the mode match | ||
791 | -- characters and ends before the ":". "dwb" is assumed, if empty. | ||
792 | -- The effective data size of the operation is matched against this list. | ||
793 | -- | ||
794 | -- If only the regular "b", "w", "d", "q", "t" operand sizes are | ||
795 | -- present, then all operands must be the same size. Unspecified sizes | ||
796 | -- are ignored, but at least one operand must have a size or the pattern | ||
797 | -- won't match (use the "byte", "word", "dword", "qword", "tword" | ||
798 | -- operand size overrides. E.g.: mov dword [eax], 1). | ||
799 | -- | ||
800 | -- If the list has a "1" or "2" prefix, the operand size is taken | ||
801 | -- from the respective operand and any other operand sizes are ignored. | ||
802 | -- If the list contains only ".", all operand sizes are ignored. | ||
803 | -- If the list has a "/" prefix, the concatenated (mixed) operand sizes | ||
804 | -- are compared to the match. | ||
805 | -- | ||
806 | -- E.g. "rrdw" matches for either two dword registers or two word | ||
807 | -- registers. "Fx2dq" matches an st0 operand plus an index operand | ||
808 | -- pointing to a dword (float) or qword (double). | ||
809 | -- | ||
810 | -- Every character after the ":" is part of the pattern string: | ||
811 | -- Hex chars are accumulated to form the opcode (left to right). | ||
812 | -- "n" disables the standard opcode mods | ||
813 | -- (otherwise: -1 for "b", o16 prefix for "w") | ||
814 | -- "r"/"R" adds the reg. number from the 1st/2nd operand to the opcode. | ||
815 | -- "m"/"M" generates ModRM/SIB from the 1st/2nd operand. | ||
816 | -- The spare 3 bits are either filled with the last hex digit or | ||
817 | -- the result from a previous "r"/"R". The opcode is restored. | ||
818 | -- | ||
819 | -- All of the following characters force a flush of the opcode: | ||
820 | -- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand. | ||
821 | -- "S" stores a signed 8 bit immediate from the last operand. | ||
822 | -- "U" stores an unsigned 8 bit immediate from the last operand. | ||
823 | -- "W" stores an unsigned 16 bit immediate from the last operand. | ||
824 | -- "i" stores an operand sized immediate from the last operand. | ||
825 | -- "I" dito, but generates an action code to optionally modify | ||
826 | -- the opcode (+2) for a signed 8 bit immediate. | ||
827 | -- "J" generates one of the REL action codes from the last operand. | ||
828 | -- | ||
829 | ------------------------------------------------------------------------------ | ||
830 | |||
831 | -- Template strings for x86 instructions. Ordered by first opcode byte. | ||
832 | -- Unimplemented opcodes (deliberate omissions) are marked with *. | ||
833 | local map_op = { | ||
834 | -- 00-05: add... | ||
835 | -- 06: *push es | ||
836 | -- 07: *pop es | ||
837 | -- 08-0D: or... | ||
838 | -- 0E: *push cs | ||
839 | -- 0F: two byte opcode prefix | ||
840 | -- 10-15: adc... | ||
841 | -- 16: *push ss | ||
842 | -- 17: *pop ss | ||
843 | -- 18-1D: sbb... | ||
844 | -- 1E: *push ds | ||
845 | -- 1F: *pop ds | ||
846 | -- 20-25: and... | ||
847 | es_0 = "26", | ||
848 | -- 27: *daa | ||
849 | -- 28-2D: sub... | ||
850 | cs_0 = "2E", | ||
851 | -- 2F: *das | ||
852 | -- 30-35: xor... | ||
853 | ss_0 = "36", | ||
854 | -- 37: *aaa | ||
855 | -- 38-3D: cmp... | ||
856 | ds_0 = "3E", | ||
857 | -- 3F: *aas | ||
858 | inc_1 = "rdw:40r|m:FF0m", | ||
859 | dec_1 = "rdw:48r|m:FF1m", | ||
860 | push_1 = "rdw:50r|mdw:FF6m|S.:6AS|ib:n6Ai|i.:68i", | ||
861 | pop_1 = "rdw:58r|mdw:8F0m", | ||
862 | -- 60: *pusha, *pushad, *pushaw | ||
863 | -- 61: *popa, *popad, *popaw | ||
864 | -- 62: *bound rdw,x | ||
865 | -- 63: *arpl mw,rw | ||
866 | fs_0 = "64", | ||
867 | gs_0 = "65", | ||
868 | o16_0 = "66", | ||
869 | a16_0 = "67", | ||
870 | -- 68: push idw | ||
871 | -- 69: imul rdw,mdw,idw | ||
872 | -- 6A: push ib | ||
873 | -- 6B: imul rdw,mdw,S | ||
874 | -- 6C: *insb | ||
875 | -- 6D: *insd, *insw | ||
876 | -- 6E: *outsb | ||
877 | -- 6F: *outsd, *outsw | ||
878 | -- 70-7F: jcc lb | ||
879 | -- 80: add... mb,i | ||
880 | -- 81: add... mdw,i | ||
881 | -- 82: *undefined | ||
882 | -- 83: add... mdw,S | ||
883 | test_2 = "mr:85Rm|rm:85rM|Ri:A9ri|mi:F70mi", | ||
884 | -- 86: xchg rb,mb | ||
885 | -- 87: xchg rdw,mdw | ||
886 | -- 88: mov mb,r | ||
887 | -- 89: mov mdw,r | ||
888 | -- 8A: mov r,mb | ||
889 | -- 8B: mov r,mdw | ||
890 | -- 8C: *mov mdw,seg | ||
891 | lea_2 = "rxd:8DrM", | ||
892 | -- 8E: *mov seg,mdw | ||
893 | -- 8F: pop mdw | ||
894 | nop_0 = "90", | ||
895 | xchg_2 = "Rrdw:90R|rRdw:90r|rm:87rM|mr:87Rm", | ||
896 | cbw_0 = "6698", | ||
897 | cwde_0 = "98", | ||
898 | cwd_0 = "6699", | ||
899 | cdq_0 = "99", | ||
900 | -- 9A: *call iw:idw | ||
901 | wait_0 = "9B", | ||
902 | fwait_0 = "9B", | ||
903 | pushf_0 = "9C", | ||
904 | pushfw_0 = "669C", | ||
905 | pushfd_0 = "9C", | ||
906 | popf_0 = "9D", | ||
907 | popfw_0 = "669D", | ||
908 | popfd_0 = "9D", | ||
909 | sahf_0 = "9E", | ||
910 | lahf_0 = "9F", | ||
911 | mov_2 = "OR:A3o|RO:A1O|mr:89Rm|rm:8BrM|rib:nB0ri|ridw:B8ri|mi:C70mi", | ||
912 | movsb_0 = "A4", | ||
913 | movsw_0 = "66A5", | ||
914 | movsd_0 = "A5", | ||
915 | cmpsb_0 = "A6", | ||
916 | cmpsw_0 = "66A7", | ||
917 | cmpsd_0 = "A7", | ||
918 | -- A8: test Rb,i | ||
919 | -- A9: test Rdw,i | ||
920 | stosb_0 = "AA", | ||
921 | stosw_0 = "66AB", | ||
922 | stosd_0 = "AB", | ||
923 | lodsb_0 = "AC", | ||
924 | lodsw_0 = "66AD", | ||
925 | lodsd_0 = "AD", | ||
926 | scasb_0 = "AE", | ||
927 | scasw_0 = "66AF", | ||
928 | scasd_0 = "AF", | ||
929 | -- B0-B7: mov rb,i | ||
930 | -- B8-BF: mov rdw,i | ||
931 | -- C0: rol... mb,i | ||
932 | -- C1: rol... mdw,i | ||
933 | ret_1 = "i.:nC2W", | ||
934 | ret_0 = "C3", | ||
935 | -- C4: *les rdw,mq | ||
936 | -- C5: *lds rdw,mq | ||
937 | -- C6: mov mb,i | ||
938 | -- C7: mov mdw,i | ||
939 | -- C8: *enter iw,ib | ||
940 | leave_0 = "C9", | ||
941 | -- CA: *retf iw | ||
942 | -- CB: *retf | ||
943 | int3_0 = "CC", | ||
944 | int_1 = "i.:nCDU", | ||
945 | into_0 = "CE", | ||
946 | -- CF: *iret | ||
947 | -- D0: rol... mb,1 | ||
948 | -- D1: rol... mdw,1 | ||
949 | -- D2: rol... mb,cl | ||
950 | -- D3: rol... mb,cl | ||
951 | -- D4: *aam ib | ||
952 | -- D5: *aad ib | ||
953 | -- D6: *salc | ||
954 | -- D7: *xlat | ||
955 | -- D8-DF: floating point ops | ||
956 | -- E0: *loopne | ||
957 | -- E1: *loope | ||
958 | -- E2: *loop | ||
959 | -- E3: *jcxz, *jecxz | ||
960 | -- E4: *in Rb,ib | ||
961 | -- E5: *in Rdw,ib | ||
962 | -- E6: *out ib,Rb | ||
963 | -- E7: *out ib,Rdw | ||
964 | call_1 = "md:FF2m|J.:E8J", | ||
965 | jmp_1 = "md:FF4m|J.:E9J", -- short: EB | ||
966 | -- EA: *jmp iw:idw | ||
967 | -- EB: jmp ib | ||
968 | -- EC: *in Rb,dx | ||
969 | -- ED: *in Rdw,dx | ||
970 | -- EE: *out dx,Rb | ||
971 | -- EF: *out dx,Rdw | ||
972 | -- F0: *lock | ||
973 | int1_0 = "F1", | ||
974 | repne_0 = "F2", | ||
975 | repnz_0 = "F2", | ||
976 | rep_0 = "F3", | ||
977 | repe_0 = "F3", | ||
978 | repz_0 = "F3", | ||
979 | -- F4: *hlt | ||
980 | cmc_0 = "F5", | ||
981 | -- F6: test... mb,i; div... mb | ||
982 | -- F7: test... mdw,i; div... mdw | ||
983 | clc_0 = "F8", | ||
984 | stc_0 = "F9", | ||
985 | -- FA: *cli | ||
986 | cld_0 = "FC", | ||
987 | std_0 = "FD", | ||
988 | -- FE: inc... mb | ||
989 | -- FF: inc... mdw | ||
990 | |||
991 | -- misc ops | ||
992 | not_1 = "m:F72m", | ||
993 | neg_1 = "m:F73m", | ||
994 | mul_1 = "m:F74m", | ||
995 | imul_1 = "m:F75m", | ||
996 | div_1 = "m:F76m", | ||
997 | idiv_1 = "m:F77m", | ||
998 | |||
999 | imul_2 = "rmdw:0FAFrM|rIdw:69rmI|rSdw:6BrmS|ridw:69rmi", | ||
1000 | imul_3 = "rmIdw:69rMI|rmSdw:6BrMS|rmidw:69rMi", | ||
1001 | |||
1002 | movzx_2 = "rm/db:0FB6rM|rm/wb:0FB6rM|rm/dw:0FB7rM", | ||
1003 | movsx_2 = "rm/db:0FBErM|rm/wb:0FBErM|rm/dw:0FBFrM", | ||
1004 | |||
1005 | bswap_1 = "rd:0FC8r", | ||
1006 | bsf_2 = "rmdw:0FBCrM", | ||
1007 | bsr_2 = "rmdw:0FBDrM", | ||
1008 | bt_2 = "mrdw:0FA3Rm|midw:0FBA4mU", | ||
1009 | btc_2 = "mrdw:0FBBRm|midw:0FBA7mU", | ||
1010 | btr_2 = "mrdw:0FB3Rm|midw:0FBA6mU", | ||
1011 | bts_2 = "mrdw:0FABRm|midw:0FBA5mU", | ||
1012 | |||
1013 | rdtsc_0 = "0F31", -- P1+ | ||
1014 | cpuid_0 = "0FA2", -- P1+ | ||
1015 | |||
1016 | -- floating point ops | ||
1017 | fst_1 = "ff:DDD0r|xd:D92m|xq:DD2m", | ||
1018 | fstp_1 = "ff:DDD8r|xd:D93m|xq:DD3m|xt:DB7m", | ||
1019 | fld_1 = "ff:D9C0r|xd:D90m|xq:DD0m|xt:DB5m", | ||
1020 | |||
1021 | fpop_0 = "DDD8", -- Alias for fstp st0. | ||
1022 | |||
1023 | fist_1 = "xw:nDF2m|xd:DB2m", | ||
1024 | fistp_1 = "xw:nDF3m|xd:DB3m|xq:DF7m", | ||
1025 | fild_1 = "xw:nDF0m|xd:DB0m|xq:DF5m", | ||
1026 | |||
1027 | fxch_0 = "D9C9", | ||
1028 | fxch_1 = "ff:D9C8r", | ||
1029 | fxch_2 = "fFf:D9C8r|Fff:D9C8R", | ||
1030 | |||
1031 | fucom_1 = "ff:DDE0r", | ||
1032 | fucom_2 = "Fff:DDE0R", | ||
1033 | fucomp_1 = "ff:DDE8r", | ||
1034 | fucomp_2 = "Fff:DDE8R", | ||
1035 | fucomi_1 = "ff:DBE8r", -- P6+ | ||
1036 | fucomi_2 = "Fff:DBE8R", -- P6+ | ||
1037 | fucomip_1 = "ff:DFE8r", -- P6+ | ||
1038 | fucomip_2 = "Fff:DFE8R", -- P6+ | ||
1039 | fcomi_1 = "ff:DBF0r", -- P6+ | ||
1040 | fcomi_2 = "Fff:DBF0R", -- P6+ | ||
1041 | fcomip_1 = "ff:DFF0r", -- P6+ | ||
1042 | fcomip_2 = "Fff:DFF0R", -- P6+ | ||
1043 | fucompp_0 = "DAE9", | ||
1044 | fcompp_0 = "DED9", | ||
1045 | |||
1046 | fldcw_1 = "xw:nD95m", | ||
1047 | fstcw_1 = "xw:n9BD97m", | ||
1048 | fnstcw_1 = "xw:nD97m", | ||
1049 | fstsw_1 = "Rw:n9BDFE0|xw:n9BDD7m", | ||
1050 | fnstsw_1 = "Rw:nDFE0|xw:nDD7m", | ||
1051 | fclex_0 = "9BDBE2", | ||
1052 | fnclex_0 = "DBE2", | ||
1053 | |||
1054 | fnop_0 = "D9D0", | ||
1055 | -- D9D1-D9DF: unassigned | ||
1056 | |||
1057 | fchs_0 = "D9E0", | ||
1058 | fabs_0 = "D9E1", | ||
1059 | -- D9E2: unassigned | ||
1060 | -- D9E3: unassigned | ||
1061 | ftst_0 = "D9E4", | ||
1062 | fxam_0 = "D9E5", | ||
1063 | -- D9E6: unassigned | ||
1064 | -- D9E7: unassigned | ||
1065 | fld1_0 = "D9E8", | ||
1066 | fldl2t_0 = "D9E9", | ||
1067 | fldl2e_0 = "D9EA", | ||
1068 | fldpi_0 = "D9EB", | ||
1069 | fldlg2_0 = "D9EC", | ||
1070 | fldln2_0 = "D9ED", | ||
1071 | fldz_0 = "D9EE", | ||
1072 | -- D9EF: unassigned | ||
1073 | |||
1074 | f2xm1_0 = "D9F0", | ||
1075 | fyl2x_0 = "D9F1", | ||
1076 | fptan_0 = "D9F2", | ||
1077 | fpatan_0 = "D9F3", | ||
1078 | fxtract_0 = "D9F4", | ||
1079 | fprem1_0 = "D9F5", | ||
1080 | fdecstp_0 = "D9F6", | ||
1081 | fincstp_0 = "D9F7", | ||
1082 | fprem_0 = "D9F8", | ||
1083 | fyl2xp1_0 = "D9F9", | ||
1084 | fsqrt_0 = "D9FA", | ||
1085 | fsincos_0 = "D9FB", | ||
1086 | frndint_0 = "D9FC", | ||
1087 | fscale_0 = "D9FD", | ||
1088 | fsin_0 = "D9FE", | ||
1089 | fcos_0 = "D9FF", | ||
1090 | |||
1091 | -- SSE, SSE2 | ||
1092 | andnpd_2 = "rmo:660F55rM", | ||
1093 | andnps_2 = "rmo:0F55rM", | ||
1094 | andpd_2 = "rmo:660F54rM", | ||
1095 | andps_2 = "rmo:0F54rM", | ||
1096 | clflush_1 = "x.:0FAE7m", | ||
1097 | cmppd_3 = "rmio:660FC2rMU", | ||
1098 | cmpps_3 = "rmio:0FC2rMU", | ||
1099 | cmpsd_3 = "rmio:F20FC2rMU", | ||
1100 | cmpss_3 = "rmio:F30FC2rMU", | ||
1101 | comisd_2 = "rmo:660F2FrM", | ||
1102 | comiss_2 = "rmo:0F2FrM", | ||
1103 | cvtdq2pd_2 = "rro:F30FE6rM|rx/oq:", | ||
1104 | cvtdq2ps_2 = "rmo:0F5BrM", | ||
1105 | cvtpd2dq_2 = "rmo:F20FE6rM", | ||
1106 | cvtpd2ps_2 = "rmo:660F5ArM", | ||
1107 | cvtpi2pd_2 = "rx/oq:660F2ArM", | ||
1108 | cvtpi2ps_2 = "rx/oq:0F2ArM", | ||
1109 | cvtps2dq_2 = "rmo:660F5BrM", | ||
1110 | cvtps2pd_2 = "rro:0F5ArM|rx/oq:", | ||
1111 | cvtsd2si_2 = "rr/do:F20F2DrM|rx/dq:", | ||
1112 | cvtsd2ss_2 = "rro:F20F5ArM|rx/oq:", | ||
1113 | cvtsi2sd_2 = "rm/od:F20F2ArM", | ||
1114 | cvtsi2ss_2 = "rm/od:F30F2ArM", | ||
1115 | cvtss2sd_2 = "rro:F30F5ArM|rx/od:", | ||
1116 | cvtss2si_2 = "rr/do:F20F2CrM|rx/dd:", | ||
1117 | cvttpd2dq_2 = "rmo:660FE6rM", | ||
1118 | cvttps2dq_2 = "rmo:F30F5BrM", | ||
1119 | cvttsd2si_2 = "rr/do:F20F2CrM|rx/dq:", | ||
1120 | cvttss2si_2 = "rr/do:F30F2CrM|rx/dd:", | ||
1121 | ldmxcsr_1 = "xd:0FAE2m", | ||
1122 | lfence_0 = "0FAEE8", | ||
1123 | maskmovdqu_2 = "rro:660FF7rM", | ||
1124 | mfence_0 = "0FAEF0", | ||
1125 | movapd_2 = "rmo:660F28rM|mro:660F29Rm", | ||
1126 | movaps_2 = "rmo:0F28rM|mro:0F29Rm", | ||
1127 | movd_2 = "rm/od:660F6ErM|mr/do:660F7ERm", | ||
1128 | movdqa_2 = "rmo:660F6FrM|mro:660F7FRm", | ||
1129 | movdqu_2 = "rmo:F30F6FrM|mro:F30F7FRm", | ||
1130 | movhlps_2 = "rro:0F12rM", | ||
1131 | movhpd_2 = "rx/oq:660F16rM|xr/qo:660F17Rm", | ||
1132 | movhps_2 = "rx/oq:0F16rM|xr/qo:0F17Rm", | ||
1133 | movlhps_2 = "rro:0F16rM", | ||
1134 | movlpd_2 = "rx/oq:660F12rM|xr/qo:660F13Rm", | ||
1135 | movlps_2 = "rx/oq:0F12rM|xr/qo:0F13Rm", | ||
1136 | movmskpd_2 = "rr/do:660F50rM", | ||
1137 | movmskps_2 = "rr/do:0F50rM", | ||
1138 | movntdq_2 = "xro:660FE7Rm", | ||
1139 | movnti_2 = "xrd:0FC3Rm", | ||
1140 | movntpd_2 = "xro:660F2BRm", | ||
1141 | movntps_2 = "xro:0F2BRm", | ||
1142 | movq_2 = "rro:F30F7ErM|rx/oq:|xr/qo:660FD6Rm", | ||
1143 | movsd_2 = "rro:F20F10rM|rx/oq:|xr/qo:F20F11Rm", | ||
1144 | movss_2 = "rro:F30F10rM|rx/od:|xr/do:F30F11Rm", | ||
1145 | movupd_2 = "rmo:660F10rM|mro:660F11Rm", | ||
1146 | movups_2 = "rmo:0F10rM|mro:0F11Rm", | ||
1147 | orpd_2 = "rmo:660F56rM", | ||
1148 | orps_2 = "rmo:0F56rM", | ||
1149 | packssdw_2 = "rmo:660F6BrM", | ||
1150 | packsswb_2 = "rmo:660F63rM", | ||
1151 | packuswb_2 = "rmo:660F67rM", | ||
1152 | paddb_2 = "rmo:660FFCrM", | ||
1153 | paddd_2 = "rmo:660FFErM", | ||
1154 | paddq_2 = "rmo:660FD4rM", | ||
1155 | paddsb_2 = "rmo:660FECrM", | ||
1156 | paddsw_2 = "rmo:660FEDrM", | ||
1157 | paddusb_2 = "rmo:660FDCrM", | ||
1158 | paddusw_2 = "rmo:660FDDrM", | ||
1159 | paddw_2 = "rmo:660FFDrM", | ||
1160 | pand_2 = "rmo:660FDBrM", | ||
1161 | pandn_2 = "rmo:660FDFrM", | ||
1162 | pause_0 = "F390", | ||
1163 | pavgb_2 = "rmo:660FE0rM", | ||
1164 | pavgw_2 = "rmo:660FE3rM", | ||
1165 | pcmpeqb_2 = "rmo:660F74rM", | ||
1166 | pcmpeqd_2 = "rmo:660F76rM", | ||
1167 | pcmpeqw_2 = "rmo:660F75rM", | ||
1168 | pcmpgtb_2 = "rmo:660F64rM", | ||
1169 | pcmpgtd_2 = "rmo:660F66rM", | ||
1170 | pcmpgtw_2 = "rmo:660F65rM", | ||
1171 | pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nrMU", -- Mem op: SSE4.1 only. | ||
1172 | pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:", | ||
1173 | pmaddwd_2 = "rmo:660FF5rM", | ||
1174 | pmaxsw_2 = "rmo:660FEErM", | ||
1175 | pmaxub_2 = "rmo:660FDErM", | ||
1176 | pminsw_2 = "rmo:660FEArM", | ||
1177 | pminub_2 = "rmo:660FDArM", | ||
1178 | pmovmskb_2 = "rr/do:660FD7rM", | ||
1179 | pmulhuw_2 = "rmo:660FE4rM", | ||
1180 | pmulhw_2 = "rmo:660FE5rM", | ||
1181 | pmullw_2 = "rmo:660FD5rM", | ||
1182 | pmuludq_2 = "rmo:660FF4rM", | ||
1183 | por_2 = "rmo:660FEBrM", | ||
1184 | prefetchnta_1 = "xb:n0F180m", | ||
1185 | prefetcht0_1 = "xb:n0F181m", | ||
1186 | prefetcht1_1 = "xb:n0F182m", | ||
1187 | prefetcht2_1 = "xb:n0F183m", | ||
1188 | psadbw_2 = "rmo:660FF6rM", | ||
1189 | pshufd_3 = "rmio:660F70rMU", | ||
1190 | pshufhw_3 = "rmio:F30F70rMU", | ||
1191 | pshuflw_3 = "rmio:F20F70rMU", | ||
1192 | pslld_2 = "rmo:660FF2rM|rio:660F726mU", | ||
1193 | pslldq_2 = "rio:660F737mU", | ||
1194 | psllq_2 = "rmo:660FF3rM|rio:660F736mU", | ||
1195 | psllw_2 = "rmo:660FF1rM|rio:660F716mU", | ||
1196 | psrad_2 = "rmo:660FE2rM|rio:660F724mU", | ||
1197 | psraw_2 = "rmo:660FE1rM|rio:660F714mU", | ||
1198 | psrld_2 = "rmo:660FD2rM|rio:660F722mU", | ||
1199 | psrldq_2 = "rio:660F733mU", | ||
1200 | psrlq_2 = "rmo:660FD3rM|rio:660F732mU", | ||
1201 | psrlw_2 = "rmo:660FD1rM|rio:660F712mU", | ||
1202 | psubb_2 = "rmo:660FF8rM", | ||
1203 | psubd_2 = "rmo:660FFArM", | ||
1204 | psubq_2 = "rmo:660FFBrM", | ||
1205 | psubsb_2 = "rmo:660FE8rM", | ||
1206 | psubsw_2 = "rmo:660FE9rM", | ||
1207 | psubusb_2 = "rmo:660FD8rM", | ||
1208 | psubusw_2 = "rmo:660FD9rM", | ||
1209 | psubw_2 = "rmo:660FF9rM", | ||
1210 | punpckhbw_2 = "rmo:660F68rM", | ||
1211 | punpckhdq_2 = "rmo:660F6ArM", | ||
1212 | punpckhqdq_2 = "rmo:660F6DrM", | ||
1213 | punpckhwd_2 = "rmo:660F69rM", | ||
1214 | punpcklbw_2 = "rmo:660F60rM", | ||
1215 | punpckldq_2 = "rmo:660F62rM", | ||
1216 | punpcklqdq_2 = "rmo:660F6CrM", | ||
1217 | punpcklwd_2 = "rmo:660F61rM", | ||
1218 | pxor_2 = "rmo:660FEFrM", | ||
1219 | rcpps_2 = "rmo:0F53rM", | ||
1220 | rcpss_2 = "rmo:F30F53rM", | ||
1221 | rsqrtps_2 = "rmo:0F52rM", | ||
1222 | rsqrtss_2 = "rmo:F30F52rM", | ||
1223 | sfence_0 = "0FAEF8", | ||
1224 | shufpd_3 = "rmio:660FC6rMU", | ||
1225 | shufps_3 = "rmio:0FC6rMU", | ||
1226 | stmxcsr_1 = "xd:0FAE3m", | ||
1227 | ucomisd_2 = "rmo:660F2ErM", | ||
1228 | ucomiss_2 = "rmo:0F2ErM", | ||
1229 | unpckhpd_2 = "rmo:660F15rM", | ||
1230 | unpckhps_2 = "rmo:0F15rM", | ||
1231 | unpcklpd_2 = "rmo:660F14rM", | ||
1232 | unpcklps_2 = "rmo:0F14rM", | ||
1233 | xorpd_2 = "rmo:660F57rM", | ||
1234 | xorps_2 = "rmo:0F57rM", | ||
1235 | |||
1236 | -- SSE3 ops | ||
1237 | fisttp_1 = "xw:nDF1m|xd:DB1m|xq:DD1m", | ||
1238 | addsubpd_2 = "rmo:660FD0rM", | ||
1239 | addsubps_2 = "rmo:F20FD0rM", | ||
1240 | haddpd_2 = "rmo:660F7CrM", | ||
1241 | haddps_2 = "rmo:F20F7CrM", | ||
1242 | hsubpd_2 = "rmo:660F7DrM", | ||
1243 | hsubps_2 = "rmo:F20F7DrM", | ||
1244 | lddqu_2 = "rxo:F20FF0rM", | ||
1245 | movddup_2 = "rmo:F20F12rM", | ||
1246 | movshdup_2 = "rmo:F30F16rM", | ||
1247 | movsldup_2 = "rmo:F30F12rM", | ||
1248 | |||
1249 | -- SSSE3 ops | ||
1250 | pabsb_2 = "rmo:660F381CrM", | ||
1251 | pabsd_2 = "rmo:660F381ErM", | ||
1252 | pabsw_2 = "rmo:660F381DrM", | ||
1253 | palignr_3 = "rmio:660F3A0FrMU", | ||
1254 | phaddd_2 = "rmo:660F3802rM", | ||
1255 | phaddsw_2 = "rmo:660F3803rM", | ||
1256 | phaddw_2 = "rmo:660F3801rM", | ||
1257 | phsubd_2 = "rmo:660F3806rM", | ||
1258 | phsubsw_2 = "rmo:660F3807rM", | ||
1259 | phsubw_2 = "rmo:660F3805rM", | ||
1260 | pmaddubsw_2 = "rmo:660F3804rM", | ||
1261 | pmulhrsw_2 = "rmo:660F380BrM", | ||
1262 | pshufb_2 = "rmo:660F3800rM", | ||
1263 | psignb_2 = "rmo:660F3808rM", | ||
1264 | psignd_2 = "rmo:660F380ArM", | ||
1265 | psignw_2 = "rmo:660F3809rM", | ||
1266 | |||
1267 | -- SSE4.1 ops | ||
1268 | blendpd_3 = "rmio:660F3A0DrMU", | ||
1269 | blendps_3 = "rmio:660F3A0CrMU", | ||
1270 | blendvpd_3 = "rmRo:660F3815rM", | ||
1271 | blendvps_3 = "rmRo:660F3814rM", | ||
1272 | dppd_3 = "rmio:660F3A41rMU", | ||
1273 | dpps_3 = "rmio:660F3A40rMU", | ||
1274 | extractps_3 = "mri/do:660F3A17RmU", | ||
1275 | insertps_3 = "rrio:660F3A41rMU|rxi/od:", | ||
1276 | movntdqa_2 = "rmo:660F382ArM", | ||
1277 | mpsadbw_3 = "rmio:660F3A42rMU", | ||
1278 | packusdw_2 = "rmo:660F382BrM", | ||
1279 | pblendvb_3 = "rmRo:660F3810rM", | ||
1280 | pblendw_3 = "rmio:660F3A0ErMU", | ||
1281 | pcmpeqq_2 = "rmo:660F3829rM", | ||
1282 | pextrb_3 = "rri/do:660F3A14nRmU|xri/bo:", | ||
1283 | pextrd_3 = "mri/do:660F3A16RmU", | ||
1284 | -- x64: pextrq | ||
1285 | -- pextrw is SSE2, mem operand is SSE4.1 only | ||
1286 | phminposuw_2 = "rmo:660F3841rM", | ||
1287 | pinsrb_3 = "rri/od:660F3A20nrMU|rxi/ob:", | ||
1288 | pinsrd_3 = "rmi/od:660F3A22rMU", | ||
1289 | -- x64: pinsrq | ||
1290 | pmaxsb_2 = "rmo:660F383CrM", | ||
1291 | pmaxsd_2 = "rmo:660F383DrM", | ||
1292 | pmaxud_2 = "rmo:660F383FrM", | ||
1293 | pmaxuw_2 = "rmo:660F383ErM", | ||
1294 | pminsb_2 = "rmo:660F3838rM", | ||
1295 | pminsd_2 = "rmo:660F3839rM", | ||
1296 | pminud_2 = "rmo:660F383BrM", | ||
1297 | pminuw_2 = "rmo:660F383ArM", | ||
1298 | pmovsxbd_2 = "rro:660F3821rM|rx/od:", | ||
1299 | pmovsxbq_2 = "rro:660F3822rM|rx/ow:", | ||
1300 | pmovsxbw_2 = "rro:660F3820rM|rx/oq:", | ||
1301 | pmovsxdq_2 = "rro:660F3825rM|rx/oq:", | ||
1302 | pmovsxwd_2 = "rro:660F3823rM|rx/oq:", | ||
1303 | pmovsxwq_2 = "rro:660F3824rM|rx/od:", | ||
1304 | pmovzxbd_2 = "rro:660F3831rM|rx/od:", | ||
1305 | pmovzxbq_2 = "rro:660F3832rM|rx/ow:", | ||
1306 | pmovzxbw_2 = "rro:660F3830rM|rx/oq:", | ||
1307 | pmovzxdq_2 = "rro:660F3835rM|rx/oq:", | ||
1308 | pmovzxwd_2 = "rro:660F3833rM|rx/oq:", | ||
1309 | pmovzxwq_2 = "rro:660F3834rM|rx/od:", | ||
1310 | pmuldq_2 = "rmo:660F3828rM", | ||
1311 | pmulld_2 = "rmo:660F3840rM", | ||
1312 | ptest_2 = "rmo:660F3817rM", | ||
1313 | roundpd_3 = "rmio:660F3A09rMU", | ||
1314 | roundps_3 = "rmio:660F3A08rMU", | ||
1315 | roundsd_3 = "rrio:660F3A0BrMU|rxi/oq:", | ||
1316 | roundss_3 = "rrio:660F3A0ArMU|rxi/od:", | ||
1317 | |||
1318 | -- SSE4.2 ops | ||
1319 | crc32_2 = "rmd:F20F38F1rM|rm/dw:66F20F38F1rM|rm/db:F20F38F0nrM", | ||
1320 | pcmpestri_3 = "rmio:660F3A61rMU", | ||
1321 | pcmpestrm_3 = "rmio:660F3A60rMU", | ||
1322 | pcmpgtq_2 = "rmo:660F3837rM", | ||
1323 | pcmpistri_3 = "rmio:660F3A63rMU", | ||
1324 | pcmpistrm_3 = "rmio:660F3A62rMU", | ||
1325 | popcnt_2 = "rmdw:F30FB8rM", | ||
1326 | |||
1327 | -- SSE4a | ||
1328 | extrq_2 = "rro:660F79rM", | ||
1329 | extrq_3 = "riio:660F780mUU", | ||
1330 | insertq_2 = "rro:F20F79rM", | ||
1331 | insertq_4 = "rriio:F20F78rMUU", | ||
1332 | lzcnt_2 = "rmdw:F30FBDrM", | ||
1333 | movntsd_2 = "xr/qo:F20F2BRm", | ||
1334 | movntss_2 = "xr/do:F30F2BRm", | ||
1335 | -- popcnt is also in SSE4.2 | ||
1336 | } | ||
1337 | |||
1338 | ------------------------------------------------------------------------------ | ||
1339 | |||
1340 | -- Arithmetic ops. | ||
1341 | for name,n in pairs{ add = 0, ["or"] = 1, adc = 2, sbb = 3, | ||
1342 | ["and"] = 4, sub = 5, xor = 6, cmp = 7 } do | ||
1343 | local n8 = n * 8 | ||
1344 | map_op[name.."_2"] = format( | ||
1345 | "mr:%02XRm|rm:%02XrM|mI1dw:81%XmI|mS1dw:83%XmS|Ri1dwb:%02Xri|mi1dwb:81%Xmi", | ||
1346 | 1+n8, 3+n8, n, n, 5+n8, n) | ||
1347 | end | ||
1348 | |||
1349 | -- Shift ops. | ||
1350 | for name,n in pairs{ rol = 0, ror = 1, rcl = 2, rcr = 3, | ||
1351 | shl = 4, shr = 5, sar = 7, sal = 4 } do | ||
1352 | map_op[name.."_2"] = format("m1:D1%Xm|mC1dwb:D3%Xm|mi:C1%XmU", n, n, n) | ||
1353 | end | ||
1354 | |||
1355 | -- Conditional ops. | ||
1356 | for cc,n in pairs(map_cc) do | ||
1357 | map_op["j"..cc.."_1"] = format("J.:0F8%XJ", n) -- short: 7%X | ||
1358 | map_op["set"..cc.."_1"] = format("mb:n0F9%X2m", n) | ||
1359 | map_op["cmov"..cc.."_2"] = format("rmdw:0F4%XrM", n) -- P6+ | ||
1360 | end | ||
1361 | |||
1362 | -- FP arithmetic ops. | ||
1363 | for name,n in pairs{ add = 0, mul = 1, com = 2, comp = 3, | ||
1364 | sub = 4, subr = 5, div = 6, divr = 7 } do | ||
1365 | local nc = 192 + n * 8 | ||
1366 | local nr = nc + (n < 4 and 0 or (n % 2 == 0 and 8 or -8)) | ||
1367 | local fn = "f"..name | ||
1368 | map_op[fn.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:DC%Xm", nc, n, n) | ||
1369 | if n == 2 or n == 3 then | ||
1370 | map_op[fn.."_2"] = format("Fff:D8%02XR|Fx2d:D8%XM|Fx2q:DC%XM", nc, n, n) | ||
1371 | else | ||
1372 | map_op[fn.."_2"] = format("Fff:D8%02XR|fFf:DC%02Xr|Fx2d:D8%XM|Fx2q:DC%XM", nc, nr, n, n) | ||
1373 | map_op[fn.."p_1"] = format("ff:DE%02Xr", nr) | ||
1374 | map_op[fn.."p_2"] = format("fFf:DE%02Xr", nr) | ||
1375 | end | ||
1376 | map_op["fi"..name.."_1"] = format("xd:DA%Xm|xw:nDE%Xm", n, n) | ||
1377 | end | ||
1378 | |||
1379 | -- FP conditional moves. | ||
1380 | for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do | ||
1381 | local n4 = n % 4 | ||
1382 | local nc = 56000 + n4 * 8 + (n-n4) * 64 | ||
1383 | map_op["fcmov"..cc.."_1"] = format("ff:%04Xr", nc) -- P6+ | ||
1384 | map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+ | ||
1385 | end | ||
1386 | |||
1387 | -- SSE FP arithmetic ops. | ||
1388 | for name,n in pairs{ sqrt = 1, add = 8, mul = 9, | ||
1389 | sub = 12, min = 13, div = 14, max = 15 } do | ||
1390 | map_op[name.."ps_2"] = format("rmo:0F5%XrM", n) | ||
1391 | map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n) | ||
1392 | map_op[name.."pd_2"] = format("rmo:660F5%XrM", n) | ||
1393 | map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n) | ||
1394 | end | ||
1395 | |||
1396 | ------------------------------------------------------------------------------ | ||
1397 | |||
1398 | -- Process pattern string. | ||
1399 | local function dopattern(pat, args, sz, op) | ||
1400 | local digit, addin | ||
1401 | local opcode = 0 | ||
1402 | local szov = sz | ||
1403 | local narg = 1 | ||
1404 | |||
1405 | -- Limit number of section buffer positions used by a single dasm_put(). | ||
1406 | -- A single opcode needs a maximum of 2 positions. !x64 | ||
1407 | if secpos+2 > maxsecpos then wflush() end | ||
1408 | |||
1409 | -- Process each character. | ||
1410 | for c in gmatch(pat.."|", ".") do | ||
1411 | if match(c, "%x") then -- Hex digit. | ||
1412 | digit = byte(c) - 48 | ||
1413 | if digit > 48 then digit = digit - 39 | ||
1414 | elseif digit > 16 then digit = digit - 7 end | ||
1415 | opcode = opcode*16 + digit | ||
1416 | addin = nil | ||
1417 | elseif c == "n" then -- Disable operand size mods for opcode. | ||
1418 | szov = nil | ||
1419 | elseif c == "r" then -- Merge 1st operand regno. into opcode. | ||
1420 | addin = args[1]; opcode = opcode + addin.reg | ||
1421 | if narg < 2 then narg = 2 end | ||
1422 | elseif c == "R" then -- Merge 2nd operand regno. into opcode. | ||
1423 | addin = args[2]; opcode = opcode + addin.reg | ||
1424 | narg = 3 | ||
1425 | elseif c == "m" or c == "M" then -- Encode ModRM/SIB. | ||
1426 | local s | ||
1427 | if addin then | ||
1428 | s = addin.reg | ||
1429 | opcode = opcode - s -- Undo regno opcode merge. | ||
1430 | else | ||
1431 | s = opcode % 16 -- Undo last digit. | ||
1432 | opcode = (opcode - s) / 16 | ||
1433 | end | ||
1434 | wputop(szov, opcode); opcode = nil | ||
1435 | local imark = (sub(pat, -1) == "I") -- Force a mark (ugly). | ||
1436 | -- Put ModRM/SIB with regno/last digit as spare. | ||
1437 | local nn = c == "m" and 1 or 2 | ||
1438 | wputmrmsib(args[nn], imark, s, addin and addin.vreg) | ||
1439 | if narg <= nn then narg = nn + 1 end | ||
1440 | addin = nil | ||
1441 | else | ||
1442 | if opcode then -- Flush opcode. | ||
1443 | if addin and addin.reg == -1 then | ||
1444 | wputop(szov, opcode + 1) | ||
1445 | waction("VREG", addin.vreg); wputxb(0) | ||
1446 | else | ||
1447 | wputop(szov, opcode) | ||
1448 | end | ||
1449 | opcode = nil | ||
1450 | end | ||
1451 | if c == "|" then break end | ||
1452 | if c == "o" then -- Offset (pure 32 bit displacement). | ||
1453 | wputdarg(args[1].disp); if narg < 2 then narg = 2 end | ||
1454 | elseif c == "O" then | ||
1455 | wputdarg(args[2].disp); narg = 3 | ||
1456 | else | ||
1457 | -- Anything else is an immediate operand. | ||
1458 | local a = args[narg] | ||
1459 | narg = narg + 1 | ||
1460 | local mode, imm = a.mode, a.imm | ||
1461 | if mode == "iJ" and not match("iIJ", c) then | ||
1462 | werror("bad operand size for label") | ||
1463 | end | ||
1464 | if c == "S" then | ||
1465 | wputsbarg(imm) | ||
1466 | elseif c == "U" then | ||
1467 | wputbarg(imm) | ||
1468 | elseif c == "W" then | ||
1469 | wputwarg(imm) | ||
1470 | elseif c == "i" or c == "I" then | ||
1471 | if mode == "iJ" then | ||
1472 | wputlabel("IMM_", imm, 1) | ||
1473 | elseif mode == "iI" and c == "I" then | ||
1474 | waction(sz == "w" and "IMM_WB" or "IMM_DB", imm) | ||
1475 | else | ||
1476 | wputszarg(sz, imm) | ||
1477 | end | ||
1478 | elseif c == "J" then | ||
1479 | if mode == "iPJ" then | ||
1480 | waction("REL_A", imm) -- !x64 (secpos) | ||
1481 | else | ||
1482 | wputlabel("REL_", imm, 2) | ||
1483 | end | ||
1484 | else | ||
1485 | werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'") | ||
1486 | end | ||
1487 | end | ||
1488 | end | ||
1489 | end | ||
1490 | end | ||
1491 | |||
1492 | ------------------------------------------------------------------------------ | ||
1493 | |||
1494 | -- Mapping of operand modes to short names. Suppress output with '#'. | ||
1495 | local map_modename = { | ||
1496 | r = "reg", R = "eax", C = "cl", x = "mem", m = "mrm", i = "imm", | ||
1497 | f = "stx", F = "st0", J = "lbl", ["1"] = "1", | ||
1498 | I = "#", S = "#", O = "#", | ||
1499 | } | ||
1500 | |||
1501 | -- Return a table/string showing all possible operand modes. | ||
1502 | local function templatehelp(template, nparams) | ||
1503 | if nparams == 0 then return "" end | ||
1504 | local t = {} | ||
1505 | for tm in gmatch(template, "[^%|]+") do | ||
1506 | local s = map_modename[sub(tm, 1, 1)] | ||
1507 | s = s..gsub(sub(tm, 2, nparams), ".", function(c) | ||
1508 | return ", "..map_modename[c] | ||
1509 | end) | ||
1510 | if not match(s, "#") then t[#t+1] = s end | ||
1511 | end | ||
1512 | return t | ||
1513 | end | ||
1514 | |||
1515 | -- Match operand modes against mode match part of template. | ||
1516 | local function matchtm(tm, args) | ||
1517 | for i=1,#args do | ||
1518 | if not match(args[i].mode, sub(tm, i, i)) then return end | ||
1519 | end | ||
1520 | return true | ||
1521 | end | ||
1522 | |||
1523 | -- Handle opcodes defined with template strings. | ||
1524 | map_op[".template__"] = function(params, template, nparams) | ||
1525 | if not params then return templatehelp(template, nparams) end | ||
1526 | local args = {} | ||
1527 | |||
1528 | -- Zero-operand opcodes have no match part. | ||
1529 | if #params == 0 then | ||
1530 | dopattern(template, args, "d", params.op) | ||
1531 | return | ||
1532 | end | ||
1533 | |||
1534 | -- Determine common operand size (coerce undefined size) or flag as mixed. | ||
1535 | local sz, szmix | ||
1536 | for i,p in ipairs(params) do | ||
1537 | args[i] = parseoperand(p) | ||
1538 | local nsz = args[i].opsize | ||
1539 | if nsz then | ||
1540 | if sz and sz ~= nsz then szmix = true else sz = nsz end | ||
1541 | end | ||
1542 | end | ||
1543 | |||
1544 | -- Try all match:pattern pairs (separated by '|'). | ||
1545 | local gotmatch, lastpat | ||
1546 | for tm in gmatch(template, "[^%|]+") do | ||
1547 | -- Split off size match (starts after mode match) and pattern string. | ||
1548 | local szm, pat = match(tm, "^(.-):(.*)$", #args+1) | ||
1549 | if pat == "" then pat = lastpat else lastpat = pat end | ||
1550 | if matchtm(tm, args) then | ||
1551 | local prefix = sub(szm, 1, 1) | ||
1552 | if prefix == "/" then -- Match both operand sizes. | ||
1553 | if args[1].opsize == sub(szm, 2, 2) and | ||
1554 | args[2].opsize == sub(szm, 3, 3) then | ||
1555 | dopattern(pat, args, sz, params.op) -- Process pattern string. | ||
1556 | return | ||
1557 | end | ||
1558 | else -- Match common operand size. | ||
1559 | local szp = sz | ||
1560 | if szm == "" then szm = "dwb" end -- Default size match. | ||
1561 | if prefix == "1" then szp = args[1].opsize; szmix = nil | ||
1562 | elseif prefix == "2" then szp = args[2].opsize; szmix = nil end | ||
1563 | if not szmix and (prefix == "." or match(szm, szp or "#")) then | ||
1564 | dopattern(pat, args, szp, params.op) -- Process pattern string. | ||
1565 | return | ||
1566 | end | ||
1567 | end | ||
1568 | gotmatch = true | ||
1569 | end | ||
1570 | end | ||
1571 | |||
1572 | local msg = "bad operand mode" | ||
1573 | if gotmatch then | ||
1574 | if szmix then | ||
1575 | msg = "mixed operand size" | ||
1576 | else | ||
1577 | msg = sz and "bad operand size" or "missing operand size" | ||
1578 | end | ||
1579 | end | ||
1580 | |||
1581 | werror(msg.." in `"..opmodestr(params.op, args).."'") | ||
1582 | end | ||
1583 | |||
1584 | ------------------------------------------------------------------------------ | ||
1585 | |||
1586 | -- Pseudo-opcodes for data storage. | ||
1587 | local function op_data(params) | ||
1588 | if not params then return "imm..." end | ||
1589 | local sz = sub(params.op, 2, 2) | ||
1590 | if sz == "a" then sz = addrsize end | ||
1591 | for _,p in ipairs(params) do | ||
1592 | local a = parseoperand(p) | ||
1593 | if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then | ||
1594 | werror("bad mode or size in `"..p.."'") | ||
1595 | end | ||
1596 | if a.mode == "iJ" then | ||
1597 | wputlabel("IMM_", a.imm, 1) | ||
1598 | else | ||
1599 | wputszarg(sz, a.imm) | ||
1600 | end | ||
1601 | end | ||
1602 | end | ||
1603 | |||
1604 | map_op[".byte_*"] = op_data | ||
1605 | map_op[".sbyte_*"] = op_data | ||
1606 | map_op[".word_*"] = op_data | ||
1607 | map_op[".dword_*"] = op_data | ||
1608 | map_op[".aword_*"] = op_data | ||
1609 | |||
1610 | ------------------------------------------------------------------------------ | ||
1611 | |||
1612 | -- Pseudo-opcode to mark the position where the action list is to be emitted. | ||
1613 | map_op[".actionlist_1"] = function(params) | ||
1614 | if not params then return "cvar" end | ||
1615 | local name = params[1] -- No syntax check. You get to keep the pieces. | ||
1616 | wline(function(out) writeactions(out, name) end) | ||
1617 | end | ||
1618 | |||
1619 | -- Pseudo-opcode to mark the position where the global enum is to be emitted. | ||
1620 | map_op[".globals_1"] = function(params) | ||
1621 | if not params then return "prefix" end | ||
1622 | local prefix = params[1] -- No syntax check. You get to keep the pieces. | ||
1623 | wline(function(out) writeglobals(out, prefix) end) | ||
1624 | end | ||
1625 | |||
1626 | -- Pseudo-opcode to mark the position where the global names are to be emitted. | ||
1627 | map_op[".globalnames_1"] = function(params) | ||
1628 | if not params then return "cvar" end | ||
1629 | local name = params[1] -- No syntax check. You get to keep the pieces. | ||
1630 | wline(function(out) writeglobalnames(out, name) end) | ||
1631 | end | ||
1632 | |||
1633 | -- Pseudo-opcode to mark the position where the extern names are to be emitted. | ||
1634 | map_op[".externnames_1"] = function(params) | ||
1635 | if not params then return "cvar" end | ||
1636 | local name = params[1] -- No syntax check. You get to keep the pieces. | ||
1637 | wline(function(out) writeexternnames(out, name) end) | ||
1638 | end | ||
1639 | |||
1640 | ------------------------------------------------------------------------------ | ||
1641 | |||
1642 | -- Label pseudo-opcode (converted from trailing colon form). | ||
1643 | map_op[".label_2"] = function(params) | ||
1644 | if not params then return "[1-9] | ->global | =>pcexpr [, addr]" end | ||
1645 | local a = parseoperand(params[1]) | ||
1646 | local mode, imm = a.mode, a.imm | ||
1647 | if type(imm) == "number" and (mode == "iJ" or (imm >= 1 and imm <= 9)) then | ||
1648 | -- Local label (1: ... 9:) or global label (->global:). | ||
1649 | waction("LABEL_LG", nil, 1) | ||
1650 | wputxb(imm) | ||
1651 | elseif mode == "iJ" then | ||
1652 | -- PC label (=>pcexpr:). | ||
1653 | waction("LABEL_PC", imm) | ||
1654 | else | ||
1655 | werror("bad label definition") | ||
1656 | end | ||
1657 | -- SETLABEL must immediately follow LABEL_LG/LABEL_PC. | ||
1658 | local addr = params[2] | ||
1659 | if addr then | ||
1660 | local a = parseoperand(params[2]) | ||
1661 | if a.mode == "iPJ" then | ||
1662 | waction("SETLABEL", a.imm) -- !x64 (secpos) | ||
1663 | else | ||
1664 | werror("bad label assignment") | ||
1665 | end | ||
1666 | end | ||
1667 | end | ||
1668 | map_op[".label_1"] = map_op[".label_2"] | ||
1669 | |||
1670 | ------------------------------------------------------------------------------ | ||
1671 | |||
1672 | -- Alignment pseudo-opcode. | ||
1673 | map_op[".align_1"] = function(params) | ||
1674 | if not params then return "numpow2" end | ||
1675 | local align = tonumber(params[1]) or map_opsizenum[map_opsize[params[1]]] | ||
1676 | if align then | ||
1677 | local x = align | ||
1678 | -- Must be a power of 2 in the range (2 ... 256). | ||
1679 | for i=1,8 do | ||
1680 | x = x / 2 | ||
1681 | if x == 1 then | ||
1682 | waction("ALIGN", nil, 1) | ||
1683 | wputxb(align-1) -- Action byte is 2**n-1. | ||
1684 | return | ||
1685 | end | ||
1686 | end | ||
1687 | end | ||
1688 | werror("bad alignment") | ||
1689 | end | ||
1690 | |||
1691 | -- Spacing pseudo-opcode. | ||
1692 | map_op[".space_2"] = function(params) | ||
1693 | if not params then return "num [, filler]" end | ||
1694 | waction("SPACE", params[1]) | ||
1695 | local fill = params[2] | ||
1696 | if fill then | ||
1697 | fill = tonumber(fill) | ||
1698 | if not fill or fill < 0 or fill > 255 then werror("bad filler") end | ||
1699 | end | ||
1700 | wputxb(fill or 0) | ||
1701 | end | ||
1702 | map_op[".space_1"] = map_op[".space_2"] | ||
1703 | |||
1704 | ------------------------------------------------------------------------------ | ||
1705 | |||
1706 | -- Pseudo-opcode for (primitive) type definitions (map to C types). | ||
1707 | map_op[".type_3"] = function(params, nparams) | ||
1708 | if not params then | ||
1709 | return nparams == 2 and "name, ctype" or "name, ctype, reg" | ||
1710 | end | ||
1711 | local name, ctype, reg = params[1], params[2], params[3] | ||
1712 | if not match(name, "^[%a_][%w_]*$") then | ||
1713 | werror("bad type name `"..name.."'") | ||
1714 | end | ||
1715 | local tp = map_type[name] | ||
1716 | if tp then | ||
1717 | werror("duplicate type `"..name.."'") | ||
1718 | end | ||
1719 | if reg and not map_reg_valid_base[reg] then | ||
1720 | werror("bad base register `"..(map_reg_rev[reg] or reg).."'") | ||
1721 | end | ||
1722 | -- Add #type to defines. A bit unclean to put it in map_archdef. | ||
1723 | map_archdef["#"..name] = "sizeof("..ctype..")" | ||
1724 | -- Add new type and emit shortcut define. | ||
1725 | local num = ctypenum + 1 | ||
1726 | map_type[name] = { | ||
1727 | ctype = ctype, | ||
1728 | ctypefmt = format("Dt%X(%%s)", num), | ||
1729 | reg = reg, | ||
1730 | } | ||
1731 | wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) | ||
1732 | ctypenum = num | ||
1733 | end | ||
1734 | map_op[".type_2"] = map_op[".type_3"] | ||
1735 | |||
1736 | -- Dump type definitions. | ||
1737 | local function dumptypes(out, lvl) | ||
1738 | local t = {} | ||
1739 | for name in pairs(map_type) do t[#t+1] = name end | ||
1740 | sort(t) | ||
1741 | out:write("Type definitions:\n") | ||
1742 | for _,name in ipairs(t) do | ||
1743 | local tp = map_type[name] | ||
1744 | local reg = tp.reg and map_reg_rev[tp.reg] or "" | ||
1745 | out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) | ||
1746 | end | ||
1747 | out:write("\n") | ||
1748 | end | ||
1749 | |||
1750 | ------------------------------------------------------------------------------ | ||
1751 | |||
1752 | -- Set the current section. | ||
1753 | function _M.section(num) | ||
1754 | waction("SECTION") | ||
1755 | wputxb(num) | ||
1756 | wflush(true) -- SECTION is a terminal action. | ||
1757 | end | ||
1758 | |||
1759 | ------------------------------------------------------------------------------ | ||
1760 | |||
1761 | -- Dump architecture description. | ||
1762 | function _M.dumparch(out) | ||
1763 | out:write(format("DynASM %s version %s, released %s\n\n", | ||
1764 | _info.arch, _info.version, _info.release)) | ||
1765 | dumpregs(out) | ||
1766 | dumpactions(out) | ||
1767 | end | ||
1768 | |||
1769 | -- Dump all user defined elements. | ||
1770 | function _M.dumpdef(out, lvl) | ||
1771 | dumptypes(out, lvl) | ||
1772 | dumpglobals(out, lvl) | ||
1773 | dumpexterns(out, lvl) | ||
1774 | end | ||
1775 | |||
1776 | ------------------------------------------------------------------------------ | ||
1777 | |||
1778 | -- Pass callbacks from/to the DynASM core. | ||
1779 | function _M.passcb(wl, we, wf, ww) | ||
1780 | wline, werror, wfatal, wwarn = wl, we, wf, ww | ||
1781 | return wflush | ||
1782 | end | ||
1783 | |||
1784 | -- Setup the arch-specific module. | ||
1785 | function _M.setup(arch, opt) | ||
1786 | g_arch, g_opt = arch, opt | ||
1787 | end | ||
1788 | |||
1789 | -- Merge the core maps and the arch-specific maps. | ||
1790 | function _M.mergemaps(map_coreop, map_def) | ||
1791 | setmetatable(map_op, { __index = map_coreop }) | ||
1792 | setmetatable(map_def, { __index = map_archdef }) | ||
1793 | return map_op, map_def | ||
1794 | end | ||
1795 | |||
1796 | return _M | ||
1797 | |||
1798 | ------------------------------------------------------------------------------ | ||
1799 | |||
diff --git a/dynasm/dynasm.lua b/dynasm/dynasm.lua new file mode 100644 index 00000000..20ff9cf5 --- /dev/null +++ b/dynasm/dynasm.lua | |||
@@ -0,0 +1,1070 @@ | |||
1 | ------------------------------------------------------------------------------ | ||
2 | -- DynASM. A dynamic assembler for code generation engines. | ||
3 | -- Originally designed and implemented for LuaJIT. | ||
4 | -- | ||
5 | -- Copyright (C) 2005-2009 Mike Pall. All rights reserved. | ||
6 | -- See below for full copyright notice. | ||
7 | ------------------------------------------------------------------------------ | ||
8 | |||
9 | -- Application information. | ||
10 | local _info = { | ||
11 | name = "DynASM", | ||
12 | description = "A dynamic assembler for code generation engines", | ||
13 | version = "1.2.1", | ||
14 | vernum = 10201, | ||
15 | release = "2009-04-16", | ||
16 | author = "Mike Pall", | ||
17 | url = "http://luajit.org/dynasm.html", | ||
18 | license = "MIT", | ||
19 | copyright = [[ | ||
20 | Copyright (C) 2005-2009 Mike Pall. All rights reserved. | ||
21 | |||
22 | Permission is hereby granted, free of charge, to any person obtaining | ||
23 | a copy of this software and associated documentation files (the | ||
24 | "Software"), to deal in the Software without restriction, including | ||
25 | without limitation the rights to use, copy, modify, merge, publish, | ||
26 | distribute, sublicense, and/or sell copies of the Software, and to | ||
27 | permit persons to whom the Software is furnished to do so, subject to | ||
28 | the following conditions: | ||
29 | |||
30 | The above copyright notice and this permission notice shall be | ||
31 | included in all copies or substantial portions of the Software. | ||
32 | |||
33 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
34 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
35 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | ||
36 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | ||
37 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
38 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
39 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
40 | |||
41 | [ MIT license: http://www.opensource.org/licenses/mit-license.php ] | ||
42 | ]], | ||
43 | } | ||
44 | |||
45 | -- Cache library functions. | ||
46 | local type, pairs, ipairs = type, pairs, ipairs | ||
47 | local pcall, error, assert = pcall, error, assert | ||
48 | local _s = string | ||
49 | local sub, match, gmatch, gsub = _s.sub, _s.match, _s.gmatch, _s.gsub | ||
50 | local format, rep, upper = _s.format, _s.rep, _s.upper | ||
51 | local _t = table | ||
52 | local insert, remove, concat, sort = _t.insert, _t.remove, _t.concat, _t.sort | ||
53 | local exit = os.exit | ||
54 | local io = io | ||
55 | local stdin, stdout, stderr = io.stdin, io.stdout, io.stderr | ||
56 | |||
57 | ------------------------------------------------------------------------------ | ||
58 | |||
59 | -- Program options. | ||
60 | local g_opt = {} | ||
61 | |||
62 | -- Global state for current file. | ||
63 | local g_fname, g_curline, g_indent, g_lineno, g_synclineno, g_arch | ||
64 | local g_errcount = 0 | ||
65 | |||
66 | -- Write buffer for output file. | ||
67 | local g_wbuffer, g_capbuffer | ||
68 | |||
69 | ------------------------------------------------------------------------------ | ||
70 | |||
71 | -- Write an output line (or callback function) to the buffer. | ||
72 | local function wline(line, needindent) | ||
73 | local buf = g_capbuffer or g_wbuffer | ||
74 | buf[#buf+1] = needindent and g_indent..line or line | ||
75 | g_synclineno = g_synclineno + 1 | ||
76 | end | ||
77 | |||
78 | -- Write assembler line as a comment, if requestd. | ||
79 | local function wcomment(aline) | ||
80 | if g_opt.comment then | ||
81 | wline(g_opt.comment..aline..g_opt.endcomment, true) | ||
82 | end | ||
83 | end | ||
84 | |||
85 | -- Resync CPP line numbers. | ||
86 | local function wsync() | ||
87 | if g_synclineno ~= g_lineno and g_opt.cpp then | ||
88 | wline("# "..g_lineno..' "'..g_fname..'"') | ||
89 | g_synclineno = g_lineno | ||
90 | end | ||
91 | end | ||
92 | |||
93 | -- Dummy action flush function. Replaced with arch-specific function later. | ||
94 | local function wflush(term) | ||
95 | end | ||
96 | |||
97 | -- Dump all buffered output lines. | ||
98 | local function wdumplines(out, buf) | ||
99 | for _,line in ipairs(buf) do | ||
100 | if type(line) == "string" then | ||
101 | assert(out:write(line, "\n")) | ||
102 | else | ||
103 | -- Special callback to dynamically insert lines after end of processing. | ||
104 | line(out) | ||
105 | end | ||
106 | end | ||
107 | end | ||
108 | |||
109 | ------------------------------------------------------------------------------ | ||
110 | |||
111 | -- Emit an error. Processing continues with next statement. | ||
112 | local function werror(msg) | ||
113 | error(format("%s:%s: error: %s:\n%s", g_fname, g_lineno, msg, g_curline), 0) | ||
114 | end | ||
115 | |||
116 | -- Emit a fatal error. Processing stops. | ||
117 | local function wfatal(msg) | ||
118 | g_errcount = "fatal" | ||
119 | werror(msg) | ||
120 | end | ||
121 | |||
122 | -- Print a warning. Processing continues. | ||
123 | local function wwarn(msg) | ||
124 | stderr:write(format("%s:%s: warning: %s:\n%s\n", | ||
125 | g_fname, g_lineno, msg, g_curline)) | ||
126 | end | ||
127 | |||
128 | -- Print caught error message. But suppress excessive errors. | ||
129 | local function wprinterr(...) | ||
130 | if type(g_errcount) == "number" then | ||
131 | -- Regular error. | ||
132 | g_errcount = g_errcount + 1 | ||
133 | if g_errcount < 21 then -- Seems to be a reasonable limit. | ||
134 | stderr:write(...) | ||
135 | elseif g_errcount == 21 then | ||
136 | stderr:write(g_fname, | ||
137 | ":*: warning: too many errors (suppressed further messages).\n") | ||
138 | end | ||
139 | else | ||
140 | -- Fatal error. | ||
141 | stderr:write(...) | ||
142 | return true -- Stop processing. | ||
143 | end | ||
144 | end | ||
145 | |||
146 | ------------------------------------------------------------------------------ | ||
147 | |||
148 | -- Map holding all option handlers. | ||
149 | local opt_map = {} | ||
150 | local opt_current | ||
151 | |||
152 | -- Print error and exit with error status. | ||
153 | local function opterror(...) | ||
154 | stderr:write("dynasm.lua: ERROR: ", ...) | ||
155 | stderr:write("\n") | ||
156 | exit(1) | ||
157 | end | ||
158 | |||
159 | -- Get option parameter. | ||
160 | local function optparam(args) | ||
161 | local argn = args.argn | ||
162 | local p = args[argn] | ||
163 | if not p then | ||
164 | opterror("missing parameter for option `", opt_current, "'.") | ||
165 | end | ||
166 | args.argn = argn + 1 | ||
167 | return p | ||
168 | end | ||
169 | |||
170 | ------------------------------------------------------------------------------ | ||
171 | |||
172 | -- Core pseudo-opcodes. | ||
173 | local map_coreop = {} | ||
174 | -- Dummy opcode map. Replaced by arch-specific map. | ||
175 | local map_op = {} | ||
176 | |||
177 | -- Forward declarations. | ||
178 | local dostmt | ||
179 | local readfile | ||
180 | |||
181 | ------------------------------------------------------------------------------ | ||
182 | |||
183 | -- Map for defines (initially empty, chains to arch-specific map). | ||
184 | local map_def = {} | ||
185 | |||
186 | -- Pseudo-opcode to define a substitution. | ||
187 | map_coreop[".define_2"] = function(params, nparams) | ||
188 | if not params then return nparams == 1 and "name" or "name, subst" end | ||
189 | local name, def = params[1], params[2] or "1" | ||
190 | if not match(name, "^[%a_][%w_]*$") then werror("bad or duplicate define") end | ||
191 | map_def[name] = def | ||
192 | end | ||
193 | map_coreop[".define_1"] = map_coreop[".define_2"] | ||
194 | |||
195 | -- Define a substitution on the command line. | ||
196 | function opt_map.D(args) | ||
197 | local namesubst = optparam(args) | ||
198 | local name, subst = match(namesubst, "^([%a_][%w_]*)=(.*)$") | ||
199 | if name then | ||
200 | map_def[name] = subst | ||
201 | elseif match(namesubst, "^[%a_][%w_]*$") then | ||
202 | map_def[namesubst] = "1" | ||
203 | else | ||
204 | opterror("bad define") | ||
205 | end | ||
206 | end | ||
207 | |||
208 | -- Undefine a substitution on the command line. | ||
209 | function opt_map.U(args) | ||
210 | local name = optparam(args) | ||
211 | if match(name, "^[%a_][%w_]*$") then | ||
212 | map_def[name] = nil | ||
213 | else | ||
214 | opterror("bad define") | ||
215 | end | ||
216 | end | ||
217 | |||
218 | -- Helper for definesubst. | ||
219 | local gotsubst | ||
220 | |||
221 | local function definesubst_one(word) | ||
222 | local subst = map_def[word] | ||
223 | if subst then gotsubst = word; return subst else return word end | ||
224 | end | ||
225 | |||
226 | -- Iteratively substitute defines. | ||
227 | local function definesubst(stmt) | ||
228 | -- Limit number of iterations. | ||
229 | for i=1,100 do | ||
230 | gotsubst = false | ||
231 | stmt = gsub(stmt, "#?[%w_]+", definesubst_one) | ||
232 | if not gotsubst then break end | ||
233 | end | ||
234 | if gotsubst then wfatal("recursive define involving `"..gotsubst.."'") end | ||
235 | return stmt | ||
236 | end | ||
237 | |||
238 | -- Dump all defines. | ||
239 | local function dumpdefines(out, lvl) | ||
240 | local t = {} | ||
241 | for name in pairs(map_def) do | ||
242 | t[#t+1] = name | ||
243 | end | ||
244 | sort(t) | ||
245 | out:write("Defines:\n") | ||
246 | for _,name in ipairs(t) do | ||
247 | local subst = map_def[name] | ||
248 | if g_arch then subst = g_arch.revdef(subst) end | ||
249 | out:write(format(" %-20s %s\n", name, subst)) | ||
250 | end | ||
251 | out:write("\n") | ||
252 | end | ||
253 | |||
254 | ------------------------------------------------------------------------------ | ||
255 | |||
256 | -- Support variables for conditional assembly. | ||
257 | local condlevel = 0 | ||
258 | local condstack = {} | ||
259 | |||
260 | -- Evaluate condition with a Lua expression. Substitutions already performed. | ||
261 | local function cond_eval(cond) | ||
262 | local func, err = loadstring("return "..cond) | ||
263 | if func then | ||
264 | setfenv(func, {}) -- No globals. All unknown identifiers evaluate to nil. | ||
265 | local ok, res = pcall(func) | ||
266 | if ok then | ||
267 | if res == 0 then return false end -- Oh well. | ||
268 | return not not res | ||
269 | end | ||
270 | err = res | ||
271 | end | ||
272 | wfatal("bad condition: "..err) | ||
273 | end | ||
274 | |||
275 | -- Skip statements until next conditional pseudo-opcode at the same level. | ||
276 | local function stmtskip() | ||
277 | local dostmt_save = dostmt | ||
278 | local lvl = 0 | ||
279 | dostmt = function(stmt) | ||
280 | local op = match(stmt, "^%s*(%S+)") | ||
281 | if op == ".if" then | ||
282 | lvl = lvl + 1 | ||
283 | elseif lvl ~= 0 then | ||
284 | if op == ".endif" then lvl = lvl - 1 end | ||
285 | elseif op == ".elif" or op == ".else" or op == ".endif" then | ||
286 | dostmt = dostmt_save | ||
287 | dostmt(stmt) | ||
288 | end | ||
289 | end | ||
290 | end | ||
291 | |||
292 | -- Pseudo-opcodes for conditional assembly. | ||
293 | map_coreop[".if_1"] = function(params) | ||
294 | if not params then return "condition" end | ||
295 | local lvl = condlevel + 1 | ||
296 | local res = cond_eval(params[1]) | ||
297 | condlevel = lvl | ||
298 | condstack[lvl] = res | ||
299 | if not res then stmtskip() end | ||
300 | end | ||
301 | |||
302 | map_coreop[".elif_1"] = function(params) | ||
303 | if not params then return "condition" end | ||
304 | if condlevel == 0 then wfatal(".elif without .if") end | ||
305 | local lvl = condlevel | ||
306 | local res = condstack[lvl] | ||
307 | if res then | ||
308 | if res == "else" then wfatal(".elif after .else") end | ||
309 | else | ||
310 | res = cond_eval(params[1]) | ||
311 | if res then | ||
312 | condstack[lvl] = res | ||
313 | return | ||
314 | end | ||
315 | end | ||
316 | stmtskip() | ||
317 | end | ||
318 | |||
319 | map_coreop[".else_0"] = function(params) | ||
320 | if condlevel == 0 then wfatal(".else without .if") end | ||
321 | local lvl = condlevel | ||
322 | local res = condstack[lvl] | ||
323 | condstack[lvl] = "else" | ||
324 | if res then | ||
325 | if res == "else" then wfatal(".else after .else") end | ||
326 | stmtskip() | ||
327 | end | ||
328 | end | ||
329 | |||
330 | map_coreop[".endif_0"] = function(params) | ||
331 | local lvl = condlevel | ||
332 | if lvl == 0 then wfatal(".endif without .if") end | ||
333 | condlevel = lvl - 1 | ||
334 | end | ||
335 | |||
336 | -- Check for unfinished conditionals. | ||
337 | local function checkconds() | ||
338 | if g_errcount ~= "fatal" and condlevel ~= 0 then | ||
339 | wprinterr(g_fname, ":*: error: unbalanced conditional\n") | ||
340 | end | ||
341 | end | ||
342 | |||
343 | ------------------------------------------------------------------------------ | ||
344 | |||
345 | -- Search for a file in the given path and open it for reading. | ||
346 | local function pathopen(path, name) | ||
347 | local dirsep = match(package.path, "\\") and "\\" or "/" | ||
348 | for _,p in ipairs(path) do | ||
349 | local fullname = p == "" and name or p..dirsep..name | ||
350 | local fin = io.open(fullname, "r") | ||
351 | if fin then | ||
352 | g_fname = fullname | ||
353 | return fin | ||
354 | end | ||
355 | end | ||
356 | end | ||
357 | |||
358 | -- Include a file. | ||
359 | map_coreop[".include_1"] = function(params) | ||
360 | if not params then return "filename" end | ||
361 | local name = params[1] | ||
362 | -- Save state. Ugly, I know. but upvalues are fast. | ||
363 | local gf, gl, gcl, gi = g_fname, g_lineno, g_curline, g_indent | ||
364 | -- Read the included file. | ||
365 | local fatal = readfile(pathopen(g_opt.include, name) or | ||
366 | wfatal("include file `"..name.."' not found")) | ||
367 | -- Restore state. | ||
368 | g_synclineno = -1 | ||
369 | g_fname, g_lineno, g_curline, g_indent = gf, gl, gcl, gi | ||
370 | if fatal then wfatal("in include file") end | ||
371 | end | ||
372 | |||
373 | -- Make .include initially available, too. | ||
374 | map_op[".include_1"] = map_coreop[".include_1"] | ||
375 | |||
376 | ------------------------------------------------------------------------------ | ||
377 | |||
378 | -- Support variables for macros. | ||
379 | local mac_capture, mac_lineno, mac_name | ||
380 | local mac_active = {} | ||
381 | local mac_list = {} | ||
382 | |||
383 | -- Pseudo-opcode to define a macro. | ||
384 | map_coreop[".macro_*"] = function(mparams) | ||
385 | if not mparams then return "name [, params...]" end | ||
386 | -- Split off and validate macro name. | ||
387 | local name = remove(mparams, 1) | ||
388 | if not name then werror("missing macro name") end | ||
389 | if not (match(name, "^[%a_][%w_%.]*$") or match(name, "^%.[%w_%.]+$")) then | ||
390 | wfatal("bad macro name `"..name.."'") | ||
391 | end | ||
392 | -- Validate macro parameter names. | ||
393 | local mdup = {} | ||
394 | for _,mp in ipairs(mparams) do | ||
395 | if not match(mp, "^[%a_][%w_]*$") then | ||
396 | wfatal("bad macro parameter name `"..mp.."'") | ||
397 | end | ||
398 | if mdup[mp] then wfatal("duplicate macro parameter name `"..mp.."'") end | ||
399 | mdup[mp] = true | ||
400 | end | ||
401 | -- Check for duplicate or recursive macro definitions. | ||
402 | local opname = name.."_"..#mparams | ||
403 | if map_op[opname] or map_op[name.."_*"] then | ||
404 | wfatal("duplicate macro `"..name.."' ("..#mparams.." parameters)") | ||
405 | end | ||
406 | if mac_capture then wfatal("recursive macro definition") end | ||
407 | |||
408 | -- Enable statement capture. | ||
409 | local lines = {} | ||
410 | mac_lineno = g_lineno | ||
411 | mac_name = name | ||
412 | mac_capture = function(stmt) -- Statement capture function. | ||
413 | -- Stop macro definition with .endmacro pseudo-opcode. | ||
414 | if not match(stmt, "^%s*.endmacro%s*$") then | ||
415 | lines[#lines+1] = stmt | ||
416 | return | ||
417 | end | ||
418 | mac_capture = nil | ||
419 | mac_lineno = nil | ||
420 | mac_name = nil | ||
421 | mac_list[#mac_list+1] = opname | ||
422 | -- Add macro-op definition. | ||
423 | map_op[opname] = function(params) | ||
424 | if not params then return mparams, lines end | ||
425 | -- Protect against recursive macro invocation. | ||
426 | if mac_active[opname] then wfatal("recursive macro invocation") end | ||
427 | mac_active[opname] = true | ||
428 | -- Setup substitution map. | ||
429 | local subst = {} | ||
430 | for i,mp in ipairs(mparams) do subst[mp] = params[i] end | ||
431 | local mcom | ||
432 | if g_opt.maccomment and g_opt.comment then | ||
433 | mcom = " MACRO "..name.." ("..#mparams..")" | ||
434 | wcomment("{"..mcom) | ||
435 | end | ||
436 | -- Loop through all captured statements | ||
437 | for _,stmt in ipairs(lines) do | ||
438 | -- Substitute macro parameters. | ||
439 | local st = gsub(stmt, "[%w_]+", subst) | ||
440 | st = definesubst(st) | ||
441 | st = gsub(st, "%s*%.%.%s*", "") -- Token paste a..b. | ||
442 | if mcom and sub(st, 1, 1) ~= "|" then wcomment(st) end | ||
443 | -- Emit statement. Use a protected call for better diagnostics. | ||
444 | local ok, err = pcall(dostmt, st) | ||
445 | if not ok then | ||
446 | -- Add the captured statement to the error. | ||
447 | wprinterr(err, "\n", g_indent, "| ", stmt, | ||
448 | "\t[MACRO ", name, " (", #mparams, ")]\n") | ||
449 | end | ||
450 | end | ||
451 | if mcom then wcomment("}"..mcom) end | ||
452 | mac_active[opname] = nil | ||
453 | end | ||
454 | end | ||
455 | end | ||
456 | |||
457 | -- An .endmacro pseudo-opcode outside of a macro definition is an error. | ||
458 | map_coreop[".endmacro_0"] = function(params) | ||
459 | wfatal(".endmacro without .macro") | ||
460 | end | ||
461 | |||
462 | -- Dump all macros and their contents (with -PP only). | ||
463 | local function dumpmacros(out, lvl) | ||
464 | sort(mac_list) | ||
465 | out:write("Macros:\n") | ||
466 | for _,opname in ipairs(mac_list) do | ||
467 | local name = sub(opname, 1, -3) | ||
468 | local params, lines = map_op[opname]() | ||
469 | out:write(format(" %-20s %s\n", name, concat(params, ", "))) | ||
470 | if lvl > 1 then | ||
471 | for _,line in ipairs(lines) do | ||
472 | out:write(" |", line, "\n") | ||
473 | end | ||
474 | out:write("\n") | ||
475 | end | ||
476 | end | ||
477 | out:write("\n") | ||
478 | end | ||
479 | |||
480 | -- Check for unfinished macro definitions. | ||
481 | local function checkmacros() | ||
482 | if mac_capture then | ||
483 | wprinterr(g_fname, ":", mac_lineno, | ||
484 | ": error: unfinished .macro `", mac_name ,"'\n") | ||
485 | end | ||
486 | end | ||
487 | |||
488 | ------------------------------------------------------------------------------ | ||
489 | |||
490 | -- Support variables for captures. | ||
491 | local cap_lineno, cap_name | ||
492 | local cap_buffers = {} | ||
493 | local cap_used = {} | ||
494 | |||
495 | -- Start a capture. | ||
496 | map_coreop[".capture_1"] = function(params) | ||
497 | if not params then return "name" end | ||
498 | wflush() | ||
499 | local name = params[1] | ||
500 | if not match(name, "^[%a_][%w_]*$") then | ||
501 | wfatal("bad capture name `"..name.."'") | ||
502 | end | ||
503 | if cap_name then | ||
504 | wfatal("already capturing to `"..cap_name.."' since line "..cap_lineno) | ||
505 | end | ||
506 | cap_name = name | ||
507 | cap_lineno = g_lineno | ||
508 | -- Create or continue a capture buffer and start the output line capture. | ||
509 | local buf = cap_buffers[name] | ||
510 | if not buf then buf = {}; cap_buffers[name] = buf end | ||
511 | g_capbuffer = buf | ||
512 | g_synclineno = 0 | ||
513 | end | ||
514 | |||
515 | -- Stop a capture. | ||
516 | map_coreop[".endcapture_0"] = function(params) | ||
517 | wflush() | ||
518 | if not cap_name then wfatal(".endcapture without a valid .capture") end | ||
519 | cap_name = nil | ||
520 | cap_lineno = nil | ||
521 | g_capbuffer = nil | ||
522 | g_synclineno = 0 | ||
523 | end | ||
524 | |||
525 | -- Dump a capture buffer. | ||
526 | map_coreop[".dumpcapture_1"] = function(params) | ||
527 | if not params then return "name" end | ||
528 | wflush() | ||
529 | local name = params[1] | ||
530 | if not match(name, "^[%a_][%w_]*$") then | ||
531 | wfatal("bad capture name `"..name.."'") | ||
532 | end | ||
533 | cap_used[name] = true | ||
534 | wline(function(out) | ||
535 | local buf = cap_buffers[name] | ||
536 | if buf then wdumplines(out, buf) end | ||
537 | end) | ||
538 | g_synclineno = 0 | ||
539 | end | ||
540 | |||
541 | -- Dump all captures and their buffers (with -PP only). | ||
542 | local function dumpcaptures(out, lvl) | ||
543 | out:write("Captures:\n") | ||
544 | for name,buf in pairs(cap_buffers) do | ||
545 | out:write(format(" %-20s %4s)\n", name, "("..#buf)) | ||
546 | if lvl > 1 then | ||
547 | local bar = rep("=", 76) | ||
548 | out:write(" ", bar, "\n") | ||
549 | for _,line in ipairs(buf) do | ||
550 | out:write(" ", line, "\n") | ||
551 | end | ||
552 | out:write(" ", bar, "\n\n") | ||
553 | end | ||
554 | end | ||
555 | out:write("\n") | ||
556 | end | ||
557 | |||
558 | -- Check for unfinished or unused captures. | ||
559 | local function checkcaptures() | ||
560 | if cap_name then | ||
561 | wprinterr(g_fname, ":", cap_lineno, | ||
562 | ": error: unfinished .capture `", cap_name,"'\n") | ||
563 | return | ||
564 | end | ||
565 | for name in pairs(cap_buffers) do | ||
566 | if not cap_used[name] then | ||
567 | wprinterr(g_fname, ":*: error: missing .dumpcapture ", name ,"\n") | ||
568 | end | ||
569 | end | ||
570 | end | ||
571 | |||
572 | ------------------------------------------------------------------------------ | ||
573 | |||
574 | -- Sections names. | ||
575 | local map_sections = {} | ||
576 | |||
577 | -- Pseudo-opcode to define code sections. | ||
578 | -- TODO: Data sections, BSS sections. Needs extra C code and API. | ||
579 | map_coreop[".section_*"] = function(params) | ||
580 | if not params then return "name..." end | ||
581 | if #map_sections > 0 then werror("duplicate section definition") end | ||
582 | wflush() | ||
583 | for sn,name in ipairs(params) do | ||
584 | local opname = "."..name.."_0" | ||
585 | if not match(name, "^[%a][%w_]*$") or | ||
586 | map_op[opname] or map_op["."..name.."_*"] then | ||
587 | werror("bad section name `"..name.."'") | ||
588 | end | ||
589 | map_sections[#map_sections+1] = name | ||
590 | wline(format("#define DASM_SECTION_%s\t%d", upper(name), sn-1)) | ||
591 | map_op[opname] = function(params) g_arch.section(sn-1) end | ||
592 | end | ||
593 | wline(format("#define DASM_MAXSECTION\t\t%d", #map_sections)) | ||
594 | end | ||
595 | |||
596 | -- Dump all sections. | ||
597 | local function dumpsections(out, lvl) | ||
598 | out:write("Sections:\n") | ||
599 | for _,name in ipairs(map_sections) do | ||
600 | out:write(format(" %s\n", name)) | ||
601 | end | ||
602 | out:write("\n") | ||
603 | end | ||
604 | |||
605 | ------------------------------------------------------------------------------ | ||
606 | |||
607 | -- Load architecture-specific module. | ||
608 | local function loadarch(arch) | ||
609 | if not match(arch, "^[%w_]+$") then return "bad arch name" end | ||
610 | local ok, m_arch = pcall(require, "dasm_"..arch) | ||
611 | if not ok then return "cannot load module: "..m_arch end | ||
612 | g_arch = m_arch | ||
613 | wflush = m_arch.passcb(wline, werror, wfatal, wwarn) | ||
614 | m_arch.setup(arch, g_opt) | ||
615 | map_op, map_def = m_arch.mergemaps(map_coreop, map_def) | ||
616 | end | ||
617 | |||
618 | -- Dump architecture description. | ||
619 | function opt_map.dumparch(args) | ||
620 | local name = optparam(args) | ||
621 | if not g_arch then | ||
622 | local err = loadarch(name) | ||
623 | if err then opterror(err) end | ||
624 | end | ||
625 | |||
626 | local t = {} | ||
627 | for name in pairs(map_coreop) do t[#t+1] = name end | ||
628 | for name in pairs(map_op) do t[#t+1] = name end | ||
629 | sort(t) | ||
630 | |||
631 | local out = stdout | ||
632 | local _arch = g_arch._info | ||
633 | out:write(format("%s version %s, released %s, %s\n", | ||
634 | _info.name, _info.version, _info.release, _info.url)) | ||
635 | g_arch.dumparch(out) | ||
636 | |||
637 | local pseudo = true | ||
638 | out:write("Pseudo-Opcodes:\n") | ||
639 | for _,sname in ipairs(t) do | ||
640 | local name, nparam = match(sname, "^(.+)_([0-9%*])$") | ||
641 | if name then | ||
642 | if pseudo and sub(name, 1, 1) ~= "." then | ||
643 | out:write("\nOpcodes:\n") | ||
644 | pseudo = false | ||
645 | end | ||
646 | local f = map_op[sname] | ||
647 | local s | ||
648 | if nparam ~= "*" then nparam = nparam + 0 end | ||
649 | if nparam == 0 then | ||
650 | s = "" | ||
651 | elseif type(f) == "string" then | ||
652 | s = map_op[".template__"](nil, f, nparam) | ||
653 | else | ||
654 | s = f(nil, nparam) | ||
655 | end | ||
656 | if type(s) == "table" then | ||
657 | for _,s2 in ipairs(s) do | ||
658 | out:write(format(" %-12s %s\n", name, s2)) | ||
659 | end | ||
660 | else | ||
661 | out:write(format(" %-12s %s\n", name, s)) | ||
662 | end | ||
663 | end | ||
664 | end | ||
665 | out:write("\n") | ||
666 | exit(0) | ||
667 | end | ||
668 | |||
669 | -- Pseudo-opcode to set the architecture. | ||
670 | -- Only initially available (map_op is replaced when called). | ||
671 | map_op[".arch_1"] = function(params) | ||
672 | if not params then return "name" end | ||
673 | local err = loadarch(params[1]) | ||
674 | if err then wfatal(err) end | ||
675 | end | ||
676 | |||
677 | -- Dummy .arch pseudo-opcode to improve the error report. | ||
678 | map_coreop[".arch_1"] = function(params) | ||
679 | if not params then return "name" end | ||
680 | wfatal("duplicate .arch statement") | ||
681 | end | ||
682 | |||
683 | ------------------------------------------------------------------------------ | ||
684 | |||
685 | -- Dummy pseudo-opcode. Don't confuse '.nop' with 'nop'. | ||
686 | map_coreop[".nop_*"] = function(params) | ||
687 | if not params then return "[ignored...]" end | ||
688 | end | ||
689 | |||
690 | -- Pseudo-opcodes to raise errors. | ||
691 | map_coreop[".error_1"] = function(params) | ||
692 | if not params then return "message" end | ||
693 | werror(params[1]) | ||
694 | end | ||
695 | |||
696 | map_coreop[".fatal_1"] = function(params) | ||
697 | if not params then return "message" end | ||
698 | wfatal(params[1]) | ||
699 | end | ||
700 | |||
701 | -- Dump all user defined elements. | ||
702 | local function dumpdef(out) | ||
703 | local lvl = g_opt.dumpdef | ||
704 | if lvl == 0 then return end | ||
705 | dumpsections(out, lvl) | ||
706 | dumpdefines(out, lvl) | ||
707 | if g_arch then g_arch.dumpdef(out, lvl) end | ||
708 | dumpmacros(out, lvl) | ||
709 | dumpcaptures(out, lvl) | ||
710 | end | ||
711 | |||
712 | ------------------------------------------------------------------------------ | ||
713 | |||
714 | -- Helper for splitstmt. | ||
715 | local splitlvl | ||
716 | |||
717 | local function splitstmt_one(c) | ||
718 | if c == "(" then | ||
719 | splitlvl = ")"..splitlvl | ||
720 | elseif c == "[" then | ||
721 | splitlvl = "]"..splitlvl | ||
722 | elseif c == ")" or c == "]" then | ||
723 | if sub(splitlvl, 1, 1) ~= c then werror("unbalanced () or []") end | ||
724 | splitlvl = sub(splitlvl, 2) | ||
725 | elseif splitlvl == "" then | ||
726 | return " \0 " | ||
727 | end | ||
728 | return c | ||
729 | end | ||
730 | |||
731 | -- Split statement into (pseudo-)opcode and params. | ||
732 | local function splitstmt(stmt) | ||
733 | -- Convert label with trailing-colon into .label statement. | ||
734 | local label = match(stmt, "^%s*(.+):%s*$") | ||
735 | if label then return ".label", {label} end | ||
736 | |||
737 | -- Split at commas and equal signs, but obey parentheses and brackets. | ||
738 | splitlvl = "" | ||
739 | stmt = gsub(stmt, "[,%(%)%[%]]", splitstmt_one) | ||
740 | if splitlvl ~= "" then werror("unbalanced () or []") end | ||
741 | |||
742 | -- Split off opcode. | ||
743 | local op, other = match(stmt, "^%s*([^%s%z]+)%s*(.*)$") | ||
744 | if not op then werror("bad statement syntax") end | ||
745 | |||
746 | -- Split parameters. | ||
747 | local params = {} | ||
748 | for p in gmatch(other, "%s*(%Z+)%z?") do | ||
749 | params[#params+1] = gsub(p, "%s+$", "") | ||
750 | end | ||
751 | if #params > 16 then werror("too many parameters") end | ||
752 | |||
753 | params.op = op | ||
754 | return op, params | ||
755 | end | ||
756 | |||
757 | -- Process a single statement. | ||
758 | dostmt = function(stmt) | ||
759 | -- Ignore empty statements. | ||
760 | if match(stmt, "^%s*$") then return end | ||
761 | |||
762 | -- Capture macro defs before substitution. | ||
763 | if mac_capture then return mac_capture(stmt) end | ||
764 | stmt = definesubst(stmt) | ||
765 | |||
766 | -- Emit C code without parsing the line. | ||
767 | if sub(stmt, 1, 1) == "|" then | ||
768 | local tail = sub(stmt, 2) | ||
769 | wflush() | ||
770 | if sub(tail, 1, 2) == "//" then wcomment(tail) else wline(tail, true) end | ||
771 | return | ||
772 | end | ||
773 | |||
774 | -- Split into (pseudo-)opcode and params. | ||
775 | local op, params = splitstmt(stmt) | ||
776 | |||
777 | -- Get opcode handler (matching # of parameters or generic handler). | ||
778 | local f = map_op[op.."_"..#params] or map_op[op.."_*"] | ||
779 | if not f then | ||
780 | if not g_arch then wfatal("first statement must be .arch") end | ||
781 | -- Improve error report. | ||
782 | for i=0,16 do | ||
783 | if map_op[op.."_"..i] then | ||
784 | werror("wrong number of parameters for `"..op.."'") | ||
785 | end | ||
786 | end | ||
787 | werror("unknown statement `"..op.."'") | ||
788 | end | ||
789 | |||
790 | -- Call opcode handler or special handler for template strings. | ||
791 | if type(f) == "string" then | ||
792 | map_op[".template__"](params, f) | ||
793 | else | ||
794 | f(params) | ||
795 | end | ||
796 | end | ||
797 | |||
798 | -- Process a single line. | ||
799 | local function doline(line) | ||
800 | if g_opt.flushline then wflush() end | ||
801 | |||
802 | -- Assembler line? | ||
803 | local indent, aline = match(line, "^(%s*)%|(.*)$") | ||
804 | if not aline then | ||
805 | -- No, plain C code line, need to flush first. | ||
806 | wflush() | ||
807 | wsync() | ||
808 | wline(line, false) | ||
809 | return | ||
810 | end | ||
811 | |||
812 | g_indent = indent -- Remember current line indentation. | ||
813 | |||
814 | -- Emit C code (even from macros). Avoids echo and line parsing. | ||
815 | if sub(aline, 1, 1) == "|" then | ||
816 | if not mac_capture then | ||
817 | wsync() | ||
818 | elseif g_opt.comment then | ||
819 | wsync() | ||
820 | wcomment(aline) | ||
821 | end | ||
822 | dostmt(aline) | ||
823 | return | ||
824 | end | ||
825 | |||
826 | -- Echo assembler line as a comment. | ||
827 | if g_opt.comment then | ||
828 | wsync() | ||
829 | wcomment(aline) | ||
830 | end | ||
831 | |||
832 | -- Strip assembler comments. | ||
833 | aline = gsub(aline, "//.*$", "") | ||
834 | |||
835 | -- Split line into statements at semicolons. | ||
836 | if match(aline, ";") then | ||
837 | for stmt in gmatch(aline, "[^;]+") do dostmt(stmt) end | ||
838 | else | ||
839 | dostmt(aline) | ||
840 | end | ||
841 | end | ||
842 | |||
843 | ------------------------------------------------------------------------------ | ||
844 | |||
845 | -- Write DynASM header. | ||
846 | local function dasmhead(out) | ||
847 | out:write(format([[ | ||
848 | /* | ||
849 | ** This file has been pre-processed with DynASM. | ||
850 | ** %s | ||
851 | ** DynASM version %s, DynASM %s version %s | ||
852 | ** DO NOT EDIT! The original file is in "%s". | ||
853 | */ | ||
854 | |||
855 | #if DASM_VERSION != %d | ||
856 | #error "Version mismatch between DynASM and included encoding engine" | ||
857 | #endif | ||
858 | |||
859 | ]], _info.url, | ||
860 | _info.version, g_arch._info.arch, g_arch._info.version, | ||
861 | g_fname, _info.vernum)) | ||
862 | end | ||
863 | |||
864 | -- Read input file. | ||
865 | readfile = function(fin) | ||
866 | g_indent = "" | ||
867 | g_lineno = 0 | ||
868 | g_synclineno = -1 | ||
869 | |||
870 | -- Process all lines. | ||
871 | for line in fin:lines() do | ||
872 | g_lineno = g_lineno + 1 | ||
873 | g_curline = line | ||
874 | local ok, err = pcall(doline, line) | ||
875 | if not ok and wprinterr(err, "\n") then return true end | ||
876 | end | ||
877 | wflush() | ||
878 | |||
879 | -- Close input file. | ||
880 | assert(fin == stdin or fin:close()) | ||
881 | end | ||
882 | |||
883 | -- Write output file. | ||
884 | local function writefile(outfile) | ||
885 | local fout | ||
886 | |||
887 | -- Open output file. | ||
888 | if outfile == nil or outfile == "-" then | ||
889 | fout = stdout | ||
890 | else | ||
891 | fout = assert(io.open(outfile, "w")) | ||
892 | end | ||
893 | |||
894 | -- Write all buffered lines | ||
895 | wdumplines(fout, g_wbuffer) | ||
896 | |||
897 | -- Close output file. | ||
898 | assert(fout == stdout or fout:close()) | ||
899 | |||
900 | -- Optionally dump definitions. | ||
901 | dumpdef(fout == stdout and stderr or stdout) | ||
902 | end | ||
903 | |||
904 | -- Translate an input file to an output file. | ||
905 | local function translate(infile, outfile) | ||
906 | g_wbuffer = {} | ||
907 | g_indent = "" | ||
908 | g_lineno = 0 | ||
909 | g_synclineno = -1 | ||
910 | |||
911 | -- Put header. | ||
912 | wline(dasmhead) | ||
913 | |||
914 | -- Read input file. | ||
915 | local fin | ||
916 | if infile == "-" then | ||
917 | g_fname = "(stdin)" | ||
918 | fin = stdin | ||
919 | else | ||
920 | g_fname = infile | ||
921 | fin = assert(io.open(infile, "r")) | ||
922 | end | ||
923 | readfile(fin) | ||
924 | |||
925 | -- Check for errors. | ||
926 | if not g_arch then | ||
927 | wprinterr(g_fname, ":*: error: missing .arch directive\n") | ||
928 | end | ||
929 | checkconds() | ||
930 | checkmacros() | ||
931 | checkcaptures() | ||
932 | |||
933 | if g_errcount ~= 0 then | ||
934 | stderr:write(g_fname, ":*: info: ", g_errcount, " error", | ||
935 | (type(g_errcount) == "number" and g_errcount > 1) and "s" or "", | ||
936 | " in input file -- no output file generated.\n") | ||
937 | dumpdef(stderr) | ||
938 | exit(1) | ||
939 | end | ||
940 | |||
941 | -- Write output file. | ||
942 | writefile(outfile) | ||
943 | end | ||
944 | |||
945 | ------------------------------------------------------------------------------ | ||
946 | |||
947 | -- Print help text. | ||
948 | function opt_map.help() | ||
949 | stdout:write("DynASM -- ", _info.description, ".\n") | ||
950 | stdout:write("DynASM ", _info.version, " ", _info.release, " ", _info.url, "\n") | ||
951 | stdout:write[[ | ||
952 | |||
953 | Usage: dynasm [OPTION]... INFILE.dasc|- | ||
954 | |||
955 | -h, --help Display this help text. | ||
956 | -V, --version Display version and copyright information. | ||
957 | |||
958 | -o, --outfile FILE Output file name (default is stdout). | ||
959 | -I, --include DIR Add directory to the include search path. | ||
960 | |||
961 | -c, --ccomment Use /* */ comments for assembler lines. | ||
962 | -C, --cppcomment Use // comments for assembler lines (default). | ||
963 | -N, --nocomment Suppress assembler lines in output. | ||
964 | -M, --maccomment Show macro expansions as comments (default off). | ||
965 | |||
966 | -L, --nolineno Suppress CPP line number information in output. | ||
967 | -F, --flushline Flush action list for every line. | ||
968 | |||
969 | -D NAME[=SUBST] Define a substitution. | ||
970 | -U NAME Undefine a substitution. | ||
971 | |||
972 | -P, --dumpdef Dump defines, macros, etc. Repeat for more output. | ||
973 | -A, --dumparch ARCH Load architecture ARCH and dump description. | ||
974 | ]] | ||
975 | exit(0) | ||
976 | end | ||
977 | |||
978 | -- Print version information. | ||
979 | function opt_map.version() | ||
980 | stdout:write(format("%s version %s, released %s\n%s\n\n%s", | ||
981 | _info.name, _info.version, _info.release, _info.url, _info.copyright)) | ||
982 | exit(0) | ||
983 | end | ||
984 | |||
985 | -- Misc. options. | ||
986 | function opt_map.outfile(args) g_opt.outfile = optparam(args) end | ||
987 | function opt_map.include(args) insert(g_opt.include, 1, optparam(args)) end | ||
988 | function opt_map.ccomment() g_opt.comment = "/*|"; g_opt.endcomment = " */" end | ||
989 | function opt_map.cppcomment() g_opt.comment = "//|"; g_opt.endcomment = "" end | ||
990 | function opt_map.nocomment() g_opt.comment = false end | ||
991 | function opt_map.maccomment() g_opt.maccomment = true end | ||
992 | function opt_map.nolineno() g_opt.cpp = false end | ||
993 | function opt_map.flushline() g_opt.flushline = true end | ||
994 | function opt_map.dumpdef() g_opt.dumpdef = g_opt.dumpdef + 1 end | ||
995 | |||
996 | ------------------------------------------------------------------------------ | ||
997 | |||
998 | -- Short aliases for long options. | ||
999 | local opt_alias = { | ||
1000 | h = "help", ["?"] = "help", V = "version", | ||
1001 | o = "outfile", I = "include", | ||
1002 | c = "ccomment", C = "cppcomment", N = "nocomment", M = "maccomment", | ||
1003 | L = "nolineno", F = "flushline", | ||
1004 | P = "dumpdef", A = "dumparch", | ||
1005 | } | ||
1006 | |||
1007 | -- Parse single option. | ||
1008 | local function parseopt(opt, args) | ||
1009 | opt_current = #opt == 1 and "-"..opt or "--"..opt | ||
1010 | local f = opt_map[opt] or opt_map[opt_alias[opt]] | ||
1011 | if not f then | ||
1012 | opterror("unrecognized option `", opt_current, "'. Try `--help'.\n") | ||
1013 | end | ||
1014 | f(args) | ||
1015 | end | ||
1016 | |||
1017 | -- Parse arguments. | ||
1018 | local function parseargs(args) | ||
1019 | -- Default options. | ||
1020 | g_opt.comment = "//|" | ||
1021 | g_opt.endcomment = "" | ||
1022 | g_opt.cpp = true | ||
1023 | g_opt.dumpdef = 0 | ||
1024 | g_opt.include = { "" } | ||
1025 | |||
1026 | -- Process all option arguments. | ||
1027 | args.argn = 1 | ||
1028 | repeat | ||
1029 | local a = args[args.argn] | ||
1030 | if not a then break end | ||
1031 | local lopt, opt = match(a, "^%-(%-?)(.+)") | ||
1032 | if not opt then break end | ||
1033 | args.argn = args.argn + 1 | ||
1034 | if lopt == "" then | ||
1035 | -- Loop through short options. | ||
1036 | for o in gmatch(opt, ".") do parseopt(o, args) end | ||
1037 | else | ||
1038 | -- Long option. | ||
1039 | parseopt(opt, args) | ||
1040 | end | ||
1041 | until false | ||
1042 | |||
1043 | -- Check for proper number of arguments. | ||
1044 | local nargs = #args - args.argn + 1 | ||
1045 | if nargs ~= 1 then | ||
1046 | if nargs == 0 then | ||
1047 | if g_opt.dumpdef > 0 then return dumpdef(stdout) end | ||
1048 | end | ||
1049 | opt_map.help() | ||
1050 | end | ||
1051 | |||
1052 | -- Translate a single input file to a single output file | ||
1053 | -- TODO: Handle multiple files? | ||
1054 | translate(args[args.argn], g_opt.outfile) | ||
1055 | end | ||
1056 | |||
1057 | ------------------------------------------------------------------------------ | ||
1058 | |||
1059 | -- Add the directory dynasm.lua resides in to the Lua module search path. | ||
1060 | local arg = arg | ||
1061 | if arg and arg[0] then | ||
1062 | local prefix = match(arg[0], "^(.*[/\\])") | ||
1063 | if prefix then package.path = prefix.."?.lua;"..package.path end | ||
1064 | end | ||
1065 | |||
1066 | -- Start DynASM. | ||
1067 | parseargs{...} | ||
1068 | |||
1069 | ------------------------------------------------------------------------------ | ||
1070 | |||
diff --git a/etc/strict.lua b/etc/strict.lua new file mode 100644 index 00000000..604619dd --- /dev/null +++ b/etc/strict.lua | |||
@@ -0,0 +1,41 @@ | |||
1 | -- | ||
2 | -- strict.lua | ||
3 | -- checks uses of undeclared global variables | ||
4 | -- All global variables must be 'declared' through a regular assignment | ||
5 | -- (even assigning nil will do) in a main chunk before being used | ||
6 | -- anywhere or assigned to inside a function. | ||
7 | -- | ||
8 | |||
9 | local getinfo, error, rawset, rawget = debug.getinfo, error, rawset, rawget | ||
10 | |||
11 | local mt = getmetatable(_G) | ||
12 | if mt == nil then | ||
13 | mt = {} | ||
14 | setmetatable(_G, mt) | ||
15 | end | ||
16 | |||
17 | mt.__declared = {} | ||
18 | |||
19 | local function what () | ||
20 | local d = getinfo(3, "S") | ||
21 | return d and d.what or "C" | ||
22 | end | ||
23 | |||
24 | mt.__newindex = function (t, n, v) | ||
25 | if not mt.__declared[n] then | ||
26 | local w = what() | ||
27 | if w ~= "main" and w ~= "C" then | ||
28 | error("assign to undeclared variable '"..n.."'", 2) | ||
29 | end | ||
30 | mt.__declared[n] = true | ||
31 | end | ||
32 | rawset(t, n, v) | ||
33 | end | ||
34 | |||
35 | mt.__index = function (t, n) | ||
36 | if not mt.__declared[n] and what() ~= "C" then | ||
37 | error("variable '"..n.."' is not declared", 2) | ||
38 | end | ||
39 | return rawget(t, n) | ||
40 | end | ||
41 | |||
diff --git a/lib/.gitignore b/lib/.gitignore new file mode 100644 index 00000000..500e2855 --- /dev/null +++ b/lib/.gitignore | |||
@@ -0,0 +1 @@ | |||
vmdef.lua | |||
diff --git a/lib/bc.lua b/lib/bc.lua new file mode 100644 index 00000000..532f2493 --- /dev/null +++ b/lib/bc.lua | |||
@@ -0,0 +1,182 @@ | |||
1 | ---------------------------------------------------------------------------- | ||
2 | -- LuaJIT bytecode listing module. | ||
3 | -- | ||
4 | -- Copyright (C) 2005-2009 Mike Pall. All rights reserved. | ||
5 | -- Released under the MIT/X license. See Copyright Notice in luajit.h | ||
6 | ---------------------------------------------------------------------------- | ||
7 | -- | ||
8 | -- This module lists the bytecode of a Lua function. If it's loaded by -jbc | ||
9 | -- it hooks into the parser and lists all functions of a chunk as they | ||
10 | -- are parsed. | ||
11 | -- | ||
12 | -- Example usage: | ||
13 | -- | ||
14 | -- luajit -jbc -e 'local x=0; for i=1,1e6 do x=x+i end; print(x)' | ||
15 | -- luajit -jbc=- foo.lua | ||
16 | -- luajit -jbc=foo.list foo.lua | ||
17 | -- | ||
18 | -- Default output is to stderr. To redirect the output to a file, pass a | ||
19 | -- filename as an argument (use '-' for stdout) or set the environment | ||
20 | -- variable LUAJIT_LISTFILE. The file is overwritten every time the module | ||
21 | -- is started. | ||
22 | -- | ||
23 | -- This module can also be used programmatically: | ||
24 | -- | ||
25 | -- local bc = require("jit.bc") | ||
26 | -- | ||
27 | -- local function foo() print("hello") end | ||
28 | -- | ||
29 | -- bc.dump(foo) --> -- BYTECODE -- [...] | ||
30 | -- print(bc.line(foo, 2)) --> 0002 KSTR 1 1 ; "hello" | ||
31 | -- | ||
32 | -- local out = { | ||
33 | -- -- Do something wich each line: | ||
34 | -- write = function(t, ...) io.write(...) end, | ||
35 | -- close = function(t) end, | ||
36 | -- flush = function(t) end, | ||
37 | -- } | ||
38 | -- bc.dump(foo, out) | ||
39 | -- | ||
40 | ------------------------------------------------------------------------------ | ||
41 | |||
42 | -- Cache some library functions and objects. | ||
43 | local jit = require("jit") | ||
44 | assert(jit.version_num == 20000, "LuaJIT core/library version mismatch") | ||
45 | local jutil = require("jit.util") | ||
46 | local vmdef = require("jit.vmdef") | ||
47 | local bit = require("bit") | ||
48 | local sub, gsub, format = string.sub, string.gsub, string.format | ||
49 | local byte, band, shr = string.byte, bit.band, bit.rshift | ||
50 | local funcinfo, funcbc, funck = jutil.funcinfo, jutil.funcbc, jutil.funck | ||
51 | local funcuvname = jutil.funcuvname | ||
52 | local bcnames = vmdef.bcnames | ||
53 | local stdout, stderr = io.stdout, io.stderr | ||
54 | |||
55 | ------------------------------------------------------------------------------ | ||
56 | |||
57 | local function ctlsub(c) | ||
58 | if c == "\n" then return "\\n" | ||
59 | elseif c == "\r" then return "\\r" | ||
60 | elseif c == "\t" then return "\\t" | ||
61 | elseif c == "\r" then return "\\r" | ||
62 | else return format("\\%03d", byte(c)) | ||
63 | end | ||
64 | end | ||
65 | |||
66 | -- Return one bytecode line. | ||
67 | local function bcline(func, pc, prefix) | ||
68 | local ins, m = funcbc(func, pc) | ||
69 | if not ins then return end | ||
70 | local ma, mb, mc = band(m, 7), band(m, 15*8), band(m, 15*128) | ||
71 | local a = band(shr(ins, 8), 0xff) | ||
72 | local oidx = 6*band(ins, 0xff) | ||
73 | local s = format("%04d %s %-6s %3s ", | ||
74 | pc, prefix or " ", sub(bcnames, oidx+1, oidx+6), ma == 0 and "" or a) | ||
75 | local d = shr(ins, 16) | ||
76 | if mc == 13*128 then -- BCMjump | ||
77 | if ma == 0 then | ||
78 | return format("%s=> %04d\n", sub(s, 1, -3), pc+d-0x7fff) | ||
79 | end | ||
80 | return format("%s=> %04d\n", s, pc+d-0x7fff) | ||
81 | end | ||
82 | if mb ~= 0 then d = band(d, 0xff) end | ||
83 | local kc | ||
84 | if mc == 10*128 then -- BCMstr | ||
85 | kc = funck(func, -d-1) | ||
86 | kc = format(#kc > 40 and '"%.40s"~' or '"%s"', gsub(kc, "%c", ctlsub)) | ||
87 | elseif mc == 9*128 then -- BCMnum | ||
88 | kc = funck(func, d) | ||
89 | elseif mc == 12*128 then -- BCMfunc | ||
90 | local fi = funcinfo(funck(func, -d-1)) | ||
91 | if fi.ffid then | ||
92 | kc = vmdef.ffnames[fi.ffid] | ||
93 | else | ||
94 | kc = fi.loc | ||
95 | end | ||
96 | elseif mc == 5*128 then -- BCMuv | ||
97 | kc = funcuvname(func, d) | ||
98 | end | ||
99 | if ma == 5 then -- BCMuv | ||
100 | local ka = funcuvname(func, a) | ||
101 | if kc then kc = ka.." ; "..kc else kc = ka end | ||
102 | end | ||
103 | if mb ~= 0 then | ||
104 | local b = shr(ins, 24) | ||
105 | if kc then return format("%s%3d %3d ; %s\n", s, b, d, kc) end | ||
106 | return format("%s%3d %3d\n", s, b, d) | ||
107 | end | ||
108 | if kc then return format("%s%3d ; %s\n", s, d, kc) end | ||
109 | if mc == 7*128 and d > 32767 then d = d - 65536 end -- BCMlits | ||
110 | return format("%s%3d\n", s, d) | ||
111 | end | ||
112 | |||
113 | -- Collect branch targets of a function. | ||
114 | local function bctargets(func) | ||
115 | local target = {} | ||
116 | for pc=1,1000000000 do | ||
117 | local ins, m = funcbc(func, pc) | ||
118 | if not ins then break end | ||
119 | if band(m, 15*128) == 13*128 then target[pc+shr(ins, 16)-0x7fff] = true end | ||
120 | end | ||
121 | return target | ||
122 | end | ||
123 | |||
124 | -- Dump bytecode instructions of a function. | ||
125 | local function bcdump(func, out) | ||
126 | if not out then out = stdout end | ||
127 | local fi = funcinfo(func) | ||
128 | out:write(format("-- BYTECODE -- %s-%d\n", fi.loc, fi.lastlinedefined)) | ||
129 | local target = bctargets(func) | ||
130 | for pc=1,1000000000 do | ||
131 | local s = bcline(func, pc, target[pc] and "=>") | ||
132 | if not s then break end | ||
133 | out:write(s) | ||
134 | end | ||
135 | out:write("\n") | ||
136 | out:flush() | ||
137 | end | ||
138 | |||
139 | ------------------------------------------------------------------------------ | ||
140 | |||
141 | -- Active flag and output file handle. | ||
142 | local active, out | ||
143 | |||
144 | -- List handler. | ||
145 | local function h_list(func) | ||
146 | return bcdump(func, out) | ||
147 | end | ||
148 | |||
149 | -- Detach list handler. | ||
150 | local function bclistoff() | ||
151 | if active then | ||
152 | active = false | ||
153 | jit.attach(h_list) | ||
154 | if out and out ~= stdout and out ~= stderr then out:close() end | ||
155 | out = nil | ||
156 | end | ||
157 | end | ||
158 | |||
159 | -- Open the output file and attach list handler. | ||
160 | local function bcliston(outfile) | ||
161 | if active then bclistoff() end | ||
162 | if not outfile then outfile = os.getenv("LUAJIT_LISTFILE") end | ||
163 | if outfile then | ||
164 | out = outfile == "-" and stdout or assert(io.open(outfile, "w")) | ||
165 | else | ||
166 | out = stderr | ||
167 | end | ||
168 | jit.attach(h_list, "bc") | ||
169 | active = true | ||
170 | end | ||
171 | |||
172 | -- Public module functions. | ||
173 | module(...) | ||
174 | |||
175 | line = bcline | ||
176 | dump = bcdump | ||
177 | targets = bctargets | ||
178 | |||
179 | on = bcliston | ||
180 | off = bclistoff | ||
181 | start = bcliston -- For -j command line option. | ||
182 | |||
diff --git a/lib/dis_x64.lua b/lib/dis_x64.lua new file mode 100644 index 00000000..da3d63f8 --- /dev/null +++ b/lib/dis_x64.lua | |||
@@ -0,0 +1,19 @@ | |||
1 | ---------------------------------------------------------------------------- | ||
2 | -- LuaJIT x64 disassembler wrapper module. | ||
3 | -- | ||
4 | -- Copyright (C) 2005-2009 Mike Pall. All rights reserved. | ||
5 | -- Released under the MIT/X license. See Copyright Notice in luajit.h | ||
6 | ---------------------------------------------------------------------------- | ||
7 | -- This module just exports the 64 bit functions from the combined | ||
8 | -- x86/x64 disassembler module. All the interesting stuff is there. | ||
9 | ------------------------------------------------------------------------------ | ||
10 | |||
11 | local require = require | ||
12 | |||
13 | module(...) | ||
14 | |||
15 | local dis_x86 = require(_PACKAGE.."dis_x86") | ||
16 | |||
17 | create = dis_x86.create64 | ||
18 | disass = dis_x86.disass64 | ||
19 | |||
diff --git a/lib/dis_x86.lua b/lib/dis_x86.lua new file mode 100644 index 00000000..8f127bee --- /dev/null +++ b/lib/dis_x86.lua | |||
@@ -0,0 +1,824 @@ | |||
1 | ---------------------------------------------------------------------------- | ||
2 | -- LuaJIT x86/x64 disassembler module. | ||
3 | -- | ||
4 | -- Copyright (C) 2005-2009 Mike Pall. All rights reserved. | ||
5 | -- Released under the MIT/X license. See Copyright Notice in luajit.h | ||
6 | ---------------------------------------------------------------------------- | ||
7 | -- This is a helper module used by the LuaJIT machine code dumper module. | ||
8 | -- | ||
9 | -- Sending small code snippets to an external disassembler and mixing the | ||
10 | -- output with our own stuff was too fragile. So I had to bite the bullet | ||
11 | -- and write yet another x86 disassembler. Oh well ... | ||
12 | -- | ||
13 | -- The output format is very similar to what ndisasm generates. But it has | ||
14 | -- been developed independently by looking at the opcode tables from the | ||
15 | -- Intel and AMD manuals. The supported instruction set is quite extensive | ||
16 | -- and reflects what a current generation Intel or AMD CPU implements in | ||
17 | -- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3, | ||
18 | -- SSE4.1, SSE4.2, SSE4a and even privileged and hypervisor (VMX/SVM) | ||
19 | -- instructions. | ||
20 | -- | ||
21 | -- Notes: | ||
22 | -- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported. | ||
23 | -- * No attempt at optimization has been made -- it's fast enough for my needs. | ||
24 | -- * The public API may change when more architectures are added. | ||
25 | ------------------------------------------------------------------------------ | ||
26 | |||
27 | local type = type | ||
28 | local sub, byte, format = string.sub, string.byte, string.format | ||
29 | local match, gmatch, gsub = string.match, string.gmatch, string.gsub | ||
30 | local lower, rep = string.lower, string.rep | ||
31 | |||
32 | -- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on. | ||
33 | local map_opc1_32 = { | ||
34 | --0x | ||
35 | [0]="addBmr","addVmr","addBrm","addVrm","addBai","addVai","push es","pop es", | ||
36 | "orBmr","orVmr","orBrm","orVrm","orBai","orVai","push cs","opc2*", | ||
37 | --1x | ||
38 | "adcBmr","adcVmr","adcBrm","adcVrm","adcBai","adcVai","push ss","pop ss", | ||
39 | "sbbBmr","sbbVmr","sbbBrm","sbbVrm","sbbBai","sbbVai","push ds","pop ds", | ||
40 | --2x | ||
41 | "andBmr","andVmr","andBrm","andVrm","andBai","andVai","es:seg","daa", | ||
42 | "subBmr","subVmr","subBrm","subVrm","subBai","subVai","cs:seg","das", | ||
43 | --3x | ||
44 | "xorBmr","xorVmr","xorBrm","xorVrm","xorBai","xorVai","ss:seg","aaa", | ||
45 | "cmpBmr","cmpVmr","cmpBrm","cmpVrm","cmpBai","cmpVai","ds:seg","aas", | ||
46 | --4x | ||
47 | "incVR","incVR","incVR","incVR","incVR","incVR","incVR","incVR", | ||
48 | "decVR","decVR","decVR","decVR","decVR","decVR","decVR","decVR", | ||
49 | --5x | ||
50 | "pushUR","pushUR","pushUR","pushUR","pushUR","pushUR","pushUR","pushUR", | ||
51 | "popUR","popUR","popUR","popUR","popUR","popUR","popUR","popUR", | ||
52 | --6x | ||
53 | "sz*pushaw,pusha","sz*popaw,popa","boundVrm","arplWmr", | ||
54 | "fs:seg","gs:seg","o16:","a16", | ||
55 | "pushUi","imulVrmi","pushBs","imulVrms", | ||
56 | "insb","insVS","outsb","outsVS", | ||
57 | --7x | ||
58 | "joBj","jnoBj","jbBj","jnbBj","jzBj","jnzBj","jbeBj","jaBj", | ||
59 | "jsBj","jnsBj","jpeBj","jpoBj","jlBj","jgeBj","jleBj","jgBj", | ||
60 | --8x | ||
61 | "arith!Bmi","arith!Vmi","arith!Bmi","arith!Vms", | ||
62 | "testBmr","testVmr","xchgBrm","xchgVrm", | ||
63 | "movBmr","movVmr","movBrm","movVrm", | ||
64 | "movVmg","leaVrm","movWgm","popUm", | ||
65 | --9x | ||
66 | "nop*xchgVaR|pause|xchgWaR|repne nop","xchgVaR","xchgVaR","xchgVaR", | ||
67 | "xchgVaR","xchgVaR","xchgVaR","xchgVaR", | ||
68 | "sz*cbw,cwde,cdqe","sz*cwd,cdq,cqo","call farViw","wait", | ||
69 | "sz*pushfw,pushf","sz*popfw,popf","sahf","lahf", | ||
70 | --Ax | ||
71 | "movBao","movVao","movBoa","movVoa", | ||
72 | "movsb","movsVS","cmpsb","cmpsVS", | ||
73 | "testBai","testVai","stosb","stosVS", | ||
74 | "lodsb","lodsVS","scasb","scasVS", | ||
75 | --Bx | ||
76 | "movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi", | ||
77 | "movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI", | ||
78 | --Cx | ||
79 | "shift!Bmu","shift!Vmu","retBw","ret","$lesVrm","$ldsVrm","movBmi","movVmi", | ||
80 | "enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS", | ||
81 | --Dx | ||
82 | "shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb", | ||
83 | "fp*0","fp*1","fp*2","fp*3","fp*4","fp*5","fp*6","fp*7", | ||
84 | --Ex | ||
85 | "loopneBj","loopeBj","loopBj","sz*jcxzBj,jecxzBj,jrcxzBj", | ||
86 | "inBau","inVau","outBua","outVua", | ||
87 | "callVj","jmpVj","jmp farViw","jmpBj","inBad","inVad","outBda","outVda", | ||
88 | --Fx | ||
89 | "lock:","int1","repne:rep","rep:","hlt","cmc","testb!Bm","testv!Vm", | ||
90 | "clc","stc","cli","sti","cld","std","incb!Bm","incd!Vm", | ||
91 | } | ||
92 | assert(#map_opc1_32 == 255) | ||
93 | |||
94 | -- Map for 1st opcode byte in 64 bit mode (overrides only). | ||
95 | local map_opc1_64 = setmetatable({ | ||
96 | [0x06]=false, [0x07]=false, [0x0e]=false, | ||
97 | [0x16]=false, [0x17]=false, [0x1e]=false, [0x1f]=false, | ||
98 | [0x27]=false, [0x2f]=false, [0x37]=false, [0x3f]=false, | ||
99 | [0x60]=false, [0x61]=false, [0x62]=false, [0x63]="movsxdVrDmt", [0x67]="a32:", | ||
100 | [0x40]="rex*", [0x41]="rex*b", [0x42]="rex*x", [0x43]="rex*xb", | ||
101 | [0x44]="rex*r", [0x45]="rex*rb", [0x46]="rex*rx", [0x47]="rex*rxb", | ||
102 | [0x48]="rex*w", [0x49]="rex*wb", [0x4a]="rex*wx", [0x4b]="rex*wxb", | ||
103 | [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb", | ||
104 | [0x82]=false, [0x9a]=false, [0xc4]=false, [0xc5]=false, [0xce]=false, | ||
105 | [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false, | ||
106 | }, { __index = map_opc1_32 }) | ||
107 | |||
108 | -- Map for 2nd opcode byte (0F xx). True CISC hell. Hey, I told you. | ||
109 | -- Prefix dependent MMX/SSE opcodes: (none)|rep|o16|repne, -|F3|66|F2 | ||
110 | local map_opc2 = { | ||
111 | --0x | ||
112 | [0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret", | ||
113 | "invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu", | ||
114 | --1x | ||
115 | "movupsXrm|movssXrm|movupdXrm|movsdXrm", | ||
116 | "movupsXmr|movssXmr|movupdXmr|movsdXmr", | ||
117 | "movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm", | ||
118 | "movlpsXmr||movlpdXmr", | ||
119 | "unpcklpsXrm||unpcklpdXrm", | ||
120 | "unpckhpsXrm||unpckhpdXrm", | ||
121 | "movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm", | ||
122 | "movhpsXmr||movhpdXmr", | ||
123 | "$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm", | ||
124 | "hintnopVm","hintnopVm","hintnopVm","hintnopVm", | ||
125 | --2x | ||
126 | "movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil, | ||
127 | "movapsXrm||movapdXrm", | ||
128 | "movapsXmr||movapdXmr", | ||
129 | "cvtpi2psXrMm|cvtsi2ssXrVm|cvtpi2pdXrMm|cvtsi2sdXrVm", | ||
130 | "movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr", | ||
131 | "cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm", | ||
132 | "cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm", | ||
133 | "ucomissXrm||ucomisdXrm", | ||
134 | "comissXrm||comisdXrm", | ||
135 | --3x | ||
136 | "wrmsr","rdtsc","rdmsr","rdpmc","sysenter","sysexit",nil,"getsec", | ||
137 | "opc3*38",nil,"opc3*3a",nil,nil,nil,nil,nil, | ||
138 | --4x | ||
139 | "cmovoVrm","cmovnoVrm","cmovbVrm","cmovnbVrm", | ||
140 | "cmovzVrm","cmovnzVrm","cmovbeVrm","cmovaVrm", | ||
141 | "cmovsVrm","cmovnsVrm","cmovpeVrm","cmovpoVrm", | ||
142 | "cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm", | ||
143 | --5x | ||
144 | "movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm", | ||
145 | "rsqrtpsXrm|rsqrtssXrm","rcppsXrm|rcpssXrm", | ||
146 | "andpsXrm||andpdXrm","andnpsXrm||andnpdXrm", | ||
147 | "orpsXrm||orpdXrm","xorpsXrm||xorpdXrm", | ||
148 | "addpsXrm|addssXrm|addpdXrm|addsdXrm","mulpsXrm|mulssXrm|mulpdXrm|mulsdXrm", | ||
149 | "cvtps2pdXrm|cvtss2sdXrm|cvtpd2psXrm|cvtsd2ssXrm", | ||
150 | "cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm", | ||
151 | "subpsXrm|subssXrm|subpdXrm|subsdXrm","minpsXrm|minssXrm|minpdXrm|minsdXrm", | ||
152 | "divpsXrm|divssXrm|divpdXrm|divsdXrm","maxpsXrm|maxssXrm|maxpdXrm|maxsdXrm", | ||
153 | --6x | ||
154 | "punpcklbwPrm","punpcklwdPrm","punpckldqPrm","packsswbPrm", | ||
155 | "pcmpgtbPrm","pcmpgtwPrm","pcmpgtdPrm","packuswbPrm", | ||
156 | "punpckhbwPrm","punpckhwdPrm","punpckhdqPrm","packssdwPrm", | ||
157 | "||punpcklqdqXrm","||punpckhqdqXrm", | ||
158 | "movPrVSm","movqMrm|movdquXrm|movdqaXrm", | ||
159 | --7x | ||
160 | "pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pmu", | ||
161 | "pshiftd!Pmu","pshiftq!Mmu||pshiftdq!Xmu", | ||
162 | "pcmpeqbPrm","pcmpeqwPrm","pcmpeqdPrm","emms|", | ||
163 | "vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$", | ||
164 | nil,nil, | ||
165 | "||haddpdXrm|haddpsXrm","||hsubpdXrm|hsubpsXrm", | ||
166 | "movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr", | ||
167 | --8x | ||
168 | "joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj", | ||
169 | "jsVj","jnsVj","jpeVj","jpoVj","jlVj","jgeVj","jleVj","jgVj", | ||
170 | --9x | ||
171 | "setoBm","setnoBm","setbBm","setnbBm","setzBm","setnzBm","setbeBm","setaBm", | ||
172 | "setsBm","setnsBm","setpeBm","setpoBm","setlBm","setgeBm","setleBm","setgBm", | ||
173 | --Ax | ||
174 | "push fs","pop fs","cpuid","btVmr","shldVmru","shldVmrc",nil,nil, | ||
175 | "push gs","pop gs","rsm","btsVmr","shrdVmru","shrdVmrc","fxsave!Dmp","imulVrm", | ||
176 | --Bx | ||
177 | "cmpxchgBmr","cmpxchgVmr","$lssVrm","btrVmr", | ||
178 | "$lfsVrm","$lgsVrm","movzxVrBmt","movzxVrWmt", | ||
179 | "|popcntVrm","ud2Dp","bt!Vmu","btcVmr", | ||
180 | "bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt", | ||
181 | --Cx | ||
182 | "xaddBmr","xaddVmr", | ||
183 | "cmppsXrmu|cmpssXrmu|cmppdXrmu|cmpsdXrmu","$movntiVmr|", | ||
184 | "pinsrwPrWmu","pextrwDrPmu", | ||
185 | "shufpsXrmu||shufpdXrmu","$cmpxchg!Qmp", | ||
186 | "bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR", | ||
187 | --Dx | ||
188 | "||addsubpdXrm|addsubpsXrm","psrlwPrm","psrldPrm","psrlqPrm", | ||
189 | "paddqPrm","pmullwPrm", | ||
190 | "|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm", | ||
191 | "psubusbPrm","psubuswPrm","pminubPrm","pandPrm", | ||
192 | "paddusbPrm","padduswPrm","pmaxubPrm","pandnPrm", | ||
193 | --Ex | ||
194 | "pavgbPrm","psrawPrm","psradPrm","pavgwPrm", | ||
195 | "pmulhuwPrm","pmulhwPrm", | ||
196 | "|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr", | ||
197 | "psubsbPrm","psubswPrm","pminswPrm","porPrm", | ||
198 | "paddsbPrm","paddswPrm","pmaxswPrm","pxorPrm", | ||
199 | --Fx | ||
200 | "|||lddquXrm","psllwPrm","pslldPrm","psllqPrm", | ||
201 | "pmuludqPrm","pmaddwdPrm","psadbwPrm","maskmovqMrm||maskmovdquXrm$", | ||
202 | "psubbPrm","psubwPrm","psubdPrm","psubqPrm", | ||
203 | "paddbPrm","paddwPrm","padddPrm","ud", | ||
204 | } | ||
205 | assert(map_opc2[255] == "ud") | ||
206 | |||
207 | -- Map for three-byte opcodes. Can't wait for their next invention. | ||
208 | local map_opc3 = { | ||
209 | ["38"] = { -- [66] 0f 38 xx | ||
210 | --0x | ||
211 | [0]="pshufbPrm","phaddwPrm","phadddPrm","phaddswPrm", | ||
212 | "pmaddubswPrm","phsubwPrm","phsubdPrm","phsubswPrm", | ||
213 | "psignbPrm","psignwPrm","psigndPrm","pmulhrswPrm", | ||
214 | nil,nil,nil,nil, | ||
215 | --1x | ||
216 | "||pblendvbXrma",nil,nil,nil, | ||
217 | "||blendvpsXrma","||blendvpdXrma",nil,"||ptestXrm", | ||
218 | nil,nil,nil,nil, | ||
219 | "pabsbPrm","pabswPrm","pabsdPrm",nil, | ||
220 | --2x | ||
221 | "||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm", | ||
222 | "||pmovsxwqXrm","||pmovsxdqXrm",nil,nil, | ||
223 | "||pmuldqXrm","||pcmpeqqXrm","||$movntdqaXrm","||packusdwXrm", | ||
224 | nil,nil,nil,nil, | ||
225 | --3x | ||
226 | "||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm", | ||
227 | "||pmovzxwqXrm","||pmovzxdqXrm",nil,"||pcmpgtqXrm", | ||
228 | "||pminsbXrm","||pminsdXrm","||pminuwXrm","||pminudXrm", | ||
229 | "||pmaxsbXrm","||pmaxsdXrm","||pmaxuwXrm","||pmaxudXrm", | ||
230 | --4x | ||
231 | "||pmulddXrm","||phminposuwXrm", | ||
232 | --Fx | ||
233 | [0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt", | ||
234 | }, | ||
235 | |||
236 | ["3a"] = { -- [66] 0f 3a xx | ||
237 | --0x | ||
238 | [0x00]=nil,nil,nil,nil,nil,nil,nil,nil, | ||
239 | "||roundpsXrmu","||roundpdXrmu","||roundssXrmu","||roundsdXrmu", | ||
240 | "||blendpsXrmu","||blendpdXrmu","||pblendwXrmu","palignrPrmu", | ||
241 | --1x | ||
242 | nil,nil,nil,nil, | ||
243 | "||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru", | ||
244 | nil,nil,nil,nil,nil,nil,nil,nil, | ||
245 | --2x | ||
246 | "||pinsrbXrVmu","||insertpsXrmu","||pinsrXrVmuS",nil, | ||
247 | --4x | ||
248 | [0x40] = "||dppsXrmu", | ||
249 | [0x41] = "||dppdXrmu", | ||
250 | [0x42] = "||mpsadbwXrmu", | ||
251 | --6x | ||
252 | [0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu", | ||
253 | [0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu", | ||
254 | }, | ||
255 | } | ||
256 | |||
257 | -- Map for VMX/SVM opcodes 0F 01 C0-FF (sgdt group with register operands). | ||
258 | local map_opcvm = { | ||
259 | [0xc1]="vmcall",[0xc2]="vmlaunch",[0xc3]="vmresume",[0xc4]="vmxoff", | ||
260 | [0xc8]="monitor",[0xc9]="mwait", | ||
261 | [0xd8]="vmrun",[0xd9]="vmmcall",[0xda]="vmload",[0xdb]="vmsave", | ||
262 | [0xdc]="stgi",[0xdd]="clgi",[0xde]="skinit",[0xdf]="invlpga", | ||
263 | [0xf8]="swapgs",[0xf9]="rdtscp", | ||
264 | } | ||
265 | |||
266 | -- Map for FP opcodes. And you thought stack machines are simple? | ||
267 | local map_opcfp = { | ||
268 | -- D8-DF 00-BF: opcodes with a memory operand. | ||
269 | -- D8 | ||
270 | [0]="faddFm","fmulFm","fcomFm","fcompFm","fsubFm","fsubrFm","fdivFm","fdivrFm", | ||
271 | "fldFm",nil,"fstFm","fstpFm","fldenvVm","fldcwWm","fnstenvVm","fnstcwWm", | ||
272 | -- DA | ||
273 | "fiaddDm","fimulDm","ficomDm","ficompDm", | ||
274 | "fisubDm","fisubrDm","fidivDm","fidivrDm", | ||
275 | -- DB | ||
276 | "fildDm","fisttpDm","fistDm","fistpDm",nil,"fld twordFmp",nil,"fstp twordFmp", | ||
277 | -- DC | ||
278 | "faddGm","fmulGm","fcomGm","fcompGm","fsubGm","fsubrGm","fdivGm","fdivrGm", | ||
279 | -- DD | ||
280 | "fldGm","fisttpQm","fstGm","fstpGm","frstorDmp",nil,"fnsaveDmp","fnstswWm", | ||
281 | -- DE | ||
282 | "fiaddWm","fimulWm","ficomWm","ficompWm", | ||
283 | "fisubWm","fisubrWm","fidivWm","fidivrWm", | ||
284 | -- DF | ||
285 | "fildWm","fisttpWm","fistWm","fistpWm", | ||
286 | "fbld twordFmp","fildQm","fbstp twordFmp","fistpQm", | ||
287 | -- xx C0-FF: opcodes with a pseudo-register operand. | ||
288 | -- D8 | ||
289 | "faddFf","fmulFf","fcomFf","fcompFf","fsubFf","fsubrFf","fdivFf","fdivrFf", | ||
290 | -- D9 | ||
291 | "fldFf","fxchFf",{"fnop"},nil, | ||
292 | {"fchs","fabs",nil,nil,"ftst","fxam"}, | ||
293 | {"fld1","fldl2t","fldl2e","fldpi","fldlg2","fldln2","fldz"}, | ||
294 | {"f2xm1","fyl2x","fptan","fpatan","fxtract","fprem1","fdecstp","fincstp"}, | ||
295 | {"fprem","fyl2xp1","fsqrt","fsincos","frndint","fscale","fsin","fcos"}, | ||
296 | -- DA | ||
297 | "fcmovbFf","fcmoveFf","fcmovbeFf","fcmovuFf",nil,{nil,"fucompp"},nil,nil, | ||
298 | -- DB | ||
299 | "fcmovnbFf","fcmovneFf","fcmovnbeFf","fcmovnuFf", | ||
300 | {nil,nil,"fnclex","fninit"},"fucomiFf","fcomiFf",nil, | ||
301 | -- DC | ||
302 | "fadd toFf","fmul toFf",nil,nil, | ||
303 | "fsub toFf","fsubr toFf","fdivr toFf","fdiv toFf", | ||
304 | -- DD | ||
305 | "ffreeFf",nil,"fstFf","fstpFf","fucomFf","fucompFf",nil,nil, | ||
306 | -- DE | ||
307 | "faddpFf","fmulpFf",nil,{nil,"fcompp"}, | ||
308 | "fsubrpFf","fsubpFf","fdivrpFf","fdivpFf", | ||
309 | -- DF | ||
310 | nil,nil,nil,nil,{"fnstsw ax"},"fucomipFf","fcomipFf",nil, | ||
311 | } | ||
312 | assert(map_opcfp[126] == "fcomipFf") | ||
313 | |||
314 | -- Map for opcode groups. The subkey is sp from the ModRM byte. | ||
315 | local map_opcgroup = { | ||
316 | arith = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" }, | ||
317 | shift = { "rol", "ror", "rcl", "rcr", "shl", "shr", "sal", "sar" }, | ||
318 | testb = { "testBmi", "testBmi", "not", "neg", "mul", "imul", "div", "idiv" }, | ||
319 | testv = { "testVmi", "testVmi", "not", "neg", "mul", "imul", "div", "idiv" }, | ||
320 | incb = { "inc", "dec" }, | ||
321 | incd = { "inc", "dec", "callDmp", "$call farDmp", | ||
322 | "jmpDmp", "$jmp farDmp", "pushUm" }, | ||
323 | sldt = { "sldt", "str", "lldt", "ltr", "verr", "verw" }, | ||
324 | sgdt = { "vm*$sgdt", "vm*$sidt", "$lgdt", "vm*$lidt", | ||
325 | "smsw", nil, "lmsw", "vm*$invlpg" }, | ||
326 | bt = { nil, nil, nil, nil, "bt", "bts", "btr", "btc" }, | ||
327 | cmpxchg = { nil, "sz*,cmpxchg8bQmp,cmpxchg16bXmp", nil, nil, | ||
328 | nil, nil, "vmptrld|vmxon|vmclear", "vmptrst" }, | ||
329 | pshiftw = { nil, nil, "psrlw", nil, "psraw", nil, "psllw" }, | ||
330 | pshiftd = { nil, nil, "psrld", nil, "psrad", nil, "pslld" }, | ||
331 | pshiftq = { nil, nil, "psrlq", nil, nil, nil, "psllq" }, | ||
332 | pshiftdq = { nil, nil, "psrlq", "psrldq", nil, nil, "psllq", "pslldq" }, | ||
333 | fxsave = { "$fxsave", "$fxrstor", "$ldmxcsr", "$stmxcsr", | ||
334 | nil, "lfenceDp$", "mfenceDp$", "sfenceDp$clflush" }, | ||
335 | prefetch = { "prefetch", "prefetchw" }, | ||
336 | prefetcht = { "prefetchnta", "prefetcht0", "prefetcht1", "prefetcht2" }, | ||
337 | } | ||
338 | |||
339 | ------------------------------------------------------------------------------ | ||
340 | |||
341 | -- Maps for register names. | ||
342 | local map_regs = { | ||
343 | B = { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh", | ||
344 | "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b" }, | ||
345 | B64 = { "al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil", | ||
346 | "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b" }, | ||
347 | W = { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di", | ||
348 | "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w" }, | ||
349 | D = { "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", | ||
350 | "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" }, | ||
351 | Q = { "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", | ||
352 | "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" }, | ||
353 | M = { "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", | ||
354 | "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext! | ||
355 | X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", | ||
356 | "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" }, | ||
357 | } | ||
358 | local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" } | ||
359 | |||
360 | -- Maps for size names. | ||
361 | local map_sz2n = { | ||
362 | B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16, | ||
363 | } | ||
364 | local map_sz2prefix = { | ||
365 | B = "byte", W = "word", D = "dword", | ||
366 | Q = "qword", | ||
367 | M = "qword", X = "xword", | ||
368 | F = "dword", G = "qword", -- No need for sizes/register names for these two. | ||
369 | } | ||
370 | |||
371 | ------------------------------------------------------------------------------ | ||
372 | |||
373 | -- Output a nicely formatted line with an opcode and operands. | ||
374 | local function putop(ctx, text, operands) | ||
375 | local code, pos, hex = ctx.code, ctx.pos, "" | ||
376 | local hmax = ctx.hexdump | ||
377 | if hmax > 0 then | ||
378 | for i=ctx.start,pos-1 do | ||
379 | hex = hex..format("%02X", byte(code, i, i)) | ||
380 | end | ||
381 | if #hex > hmax then hex = sub(hex, 1, hmax)..". " | ||
382 | else hex = hex..rep(" ", hmax-#hex+2) end | ||
383 | end | ||
384 | if operands then text = text.." "..operands end | ||
385 | if ctx.o16 then text = "o16 "..text; ctx.o16 = false end | ||
386 | if ctx.a32 then text = "a32 "..text; ctx.a32 = false end | ||
387 | if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end | ||
388 | if ctx.rex then | ||
389 | local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "").. | ||
390 | (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "") | ||
391 | if t ~= "" then text = "rex."..t.." "..text end | ||
392 | ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false | ||
393 | ctx.rex = false | ||
394 | end | ||
395 | if ctx.seg then | ||
396 | local text2, n = gsub(text, "%[", "["..ctx.seg..":") | ||
397 | if n == 0 then text = ctx.seg.." "..text else text = text2 end | ||
398 | ctx.seg = false | ||
399 | end | ||
400 | if ctx.lock then text = "lock "..text; ctx.lock = false end | ||
401 | local imm = ctx.imm | ||
402 | if imm then | ||
403 | local sym = ctx.symtab[imm] | ||
404 | if sym then text = text.."\t->"..sym end | ||
405 | end | ||
406 | ctx.out(format("%08x %s%s\n", ctx.addr+ctx.start, hex, text)) | ||
407 | ctx.mrm = false | ||
408 | ctx.start = pos | ||
409 | ctx.imm = nil | ||
410 | end | ||
411 | |||
412 | -- Clear all prefix flags. | ||
413 | local function clearprefixes(ctx) | ||
414 | ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false | ||
415 | ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false | ||
416 | ctx.rex = false; ctx.a32 = false | ||
417 | end | ||
418 | |||
419 | -- Fallback for incomplete opcodes at the end. | ||
420 | local function incomplete(ctx) | ||
421 | ctx.pos = ctx.stop+1 | ||
422 | clearprefixes(ctx) | ||
423 | return putop(ctx, "(incomplete)") | ||
424 | end | ||
425 | |||
426 | -- Fallback for unknown opcodes. | ||
427 | local function unknown(ctx) | ||
428 | clearprefixes(ctx) | ||
429 | return putop(ctx, "(unknown)") | ||
430 | end | ||
431 | |||
432 | -- Return an immediate of the specified size. | ||
433 | local function getimm(ctx, pos, n) | ||
434 | if pos+n-1 > ctx.stop then return incomplete(ctx) end | ||
435 | local code = ctx.code | ||
436 | if n == 1 then | ||
437 | local b1 = byte(code, pos, pos) | ||
438 | return b1 | ||
439 | elseif n == 2 then | ||
440 | local b1, b2 = byte(code, pos, pos+1) | ||
441 | return b1+b2*256 | ||
442 | else | ||
443 | local b1, b2, b3, b4 = byte(code, pos, pos+3) | ||
444 | local imm = b1+b2*256+b3*65536+b4*16777216 | ||
445 | ctx.imm = imm | ||
446 | return imm | ||
447 | end | ||
448 | end | ||
449 | |||
450 | -- Process pattern string and generate the operands. | ||
451 | local function putpat(ctx, name, pat) | ||
452 | local operands, regs, sz, mode, sp, rm, sc, rx, sdisp | ||
453 | local code, pos, stop = ctx.code, ctx.pos, ctx.stop | ||
454 | |||
455 | -- Chars used: 1DFGIMPQRSTUVWXacdfgijmoprstuwxyz | ||
456 | for p in gmatch(pat, ".") do | ||
457 | local x = nil | ||
458 | if p == "V" or p == "U" then | ||
459 | if ctx.rexw then sz = "Q"; ctx.rexw = false | ||
460 | elseif ctx.o16 then sz = "W"; ctx.o16 = false | ||
461 | elseif p == "U" and ctx.x64 then sz = "Q" | ||
462 | else sz = "D" end | ||
463 | regs = map_regs[sz] | ||
464 | elseif p == "T" then | ||
465 | if ctx.rexw then sz = "Q"; ctx.rexw = false else sz = "D" end | ||
466 | regs = map_regs[sz] | ||
467 | elseif p == "B" then | ||
468 | sz = "B" | ||
469 | regs = ctx.rex and map_regs.B64 or map_regs.B | ||
470 | elseif match(p, "[WDQMXFG]") then | ||
471 | sz = p | ||
472 | regs = map_regs[sz] | ||
473 | elseif p == "P" then | ||
474 | sz = ctx.o16 and "X" or "M"; ctx.o16 = false | ||
475 | regs = map_regs[sz] | ||
476 | elseif p == "S" then | ||
477 | name = name..lower(sz) | ||
478 | elseif p == "s" then | ||
479 | local imm = getimm(ctx, pos, 1); if not imm then return end | ||
480 | x = imm <= 127 and format("+0x%02x", imm) | ||
481 | or format("-0x%02x", 256-imm) | ||
482 | pos = pos+1 | ||
483 | elseif p == "u" then | ||
484 | local imm = getimm(ctx, pos, 1); if not imm then return end | ||
485 | x = format("0x%02x", imm) | ||
486 | pos = pos+1 | ||
487 | elseif p == "w" then | ||
488 | local imm = getimm(ctx, pos, 2); if not imm then return end | ||
489 | x = format("0x%x", imm) | ||
490 | pos = pos+2 | ||
491 | elseif p == "o" then -- [offset] | ||
492 | if ctx.x64 then | ||
493 | local imm1 = getimm(ctx, pos, 4); if not imm1 then return end | ||
494 | local imm2 = getimm(ctx, pos+4, 4); if not imm2 then return end | ||
495 | x = format("[0x%08x%08x]", imm2, imm1) | ||
496 | pos = pos+8 | ||
497 | else | ||
498 | local imm = getimm(ctx, pos, 4); if not imm then return end | ||
499 | x = format("[0x%08x]", imm) | ||
500 | pos = pos+4 | ||
501 | end | ||
502 | elseif p == "i" or p == "I" then | ||
503 | local n = map_sz2n[sz] | ||
504 | if n == 8 and ctx.x64 and p == "I" then | ||
505 | local imm1 = getimm(ctx, pos, 4); if not imm1 then return end | ||
506 | local imm2 = getimm(ctx, pos+4, 4); if not imm2 then return end | ||
507 | x = format("0x%08x%08x", imm2, imm1) | ||
508 | else | ||
509 | if n == 8 then n = 4 end | ||
510 | local imm = getimm(ctx, pos, n); if not imm then return end | ||
511 | if sz == "Q" and (imm < 0 or imm > 0x7fffffff) then | ||
512 | imm = (0xffffffff+1)-imm | ||
513 | x = format(imm > 65535 and "-0x%08x" or "-0x%x", imm) | ||
514 | else | ||
515 | x = format(imm > 65535 and "0x%08x" or "0x%x", imm) | ||
516 | end | ||
517 | end | ||
518 | pos = pos+n | ||
519 | elseif p == "j" then | ||
520 | local n = map_sz2n[sz] | ||
521 | if n == 8 then n = 4 end | ||
522 | local imm = getimm(ctx, pos, n); if not imm then return end | ||
523 | if sz == "B" and imm > 127 then imm = imm-256 | ||
524 | elseif imm > 2147483647 then imm = imm-4294967296 end | ||
525 | pos = pos+n | ||
526 | imm = imm + pos + ctx.addr | ||
527 | if imm > 4294967295 and not ctx.x64 then imm = imm-4294967296 end | ||
528 | ctx.imm = imm | ||
529 | if sz == "W" then | ||
530 | x = format("word 0x%04x", imm%65536) | ||
531 | elseif ctx.x64 then | ||
532 | local lo = imm % 0x1000000 | ||
533 | x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo) | ||
534 | else | ||
535 | x = format("0x%08x", imm) | ||
536 | end | ||
537 | elseif p == "R" then | ||
538 | local r = byte(code, pos-1, pos-1)%8 | ||
539 | if ctx.rexb then r = r + 8; ctx.rexb = false end | ||
540 | x = regs[r+1] | ||
541 | elseif p == "a" then x = regs[1] | ||
542 | elseif p == "c" then x = "cl" | ||
543 | elseif p == "d" then x = "dx" | ||
544 | elseif p == "1" then x = "1" | ||
545 | else | ||
546 | if not mode then | ||
547 | mode = ctx.mrm | ||
548 | if not mode then | ||
549 | if pos > stop then return incomplete(ctx) end | ||
550 | mode = byte(code, pos, pos) | ||
551 | pos = pos+1 | ||
552 | end | ||
553 | rm = mode%8; mode = (mode-rm)/8 | ||
554 | sp = mode%8; mode = (mode-sp)/8 | ||
555 | sdisp = "" | ||
556 | if mode < 3 then | ||
557 | if rm == 4 then | ||
558 | if pos > stop then return incomplete(ctx) end | ||
559 | sc = byte(code, pos, pos) | ||
560 | pos = pos+1 | ||
561 | rm = sc%8; sc = (sc-rm)/8 | ||
562 | rx = sc%8; sc = (sc-rx)/8 | ||
563 | if ctx.rexx then rx = rx + 8; ctx.rexx = false end | ||
564 | if rx == 4 then rx = nil end | ||
565 | end | ||
566 | if mode > 0 or rm == 5 then | ||
567 | local dsz = mode | ||
568 | if dsz ~= 1 then dsz = 4 end | ||
569 | local disp = getimm(ctx, pos, dsz); if not disp then return end | ||
570 | if mode == 0 then rm = nil end | ||
571 | if rm or rx or (not sc and ctx.x64 and not ctx.a32) then | ||
572 | if dsz == 1 and disp > 127 then | ||
573 | sdisp = format("-0x%x", 256-disp) | ||
574 | elseif disp >= 0 and disp <= 0x7fffffff then | ||
575 | sdisp = format("+0x%x", disp) | ||
576 | else | ||
577 | sdisp = format("-0x%x", (0xffffffff+1)-disp) | ||
578 | end | ||
579 | else | ||
580 | sdisp = format(ctx.x64 and not ctx.a32 and | ||
581 | not (disp >= 0 and disp <= 0x7fffffff) | ||
582 | and "0xffffffff%08x" or "0x%08x", disp) | ||
583 | end | ||
584 | pos = pos+dsz | ||
585 | end | ||
586 | end | ||
587 | if rm and ctx.rexb then rm = rm + 8; ctx.rexb = false end | ||
588 | if ctx.rexr then sp = sp + 8; ctx.rexr = false end | ||
589 | end | ||
590 | if p == "m" then | ||
591 | if mode == 3 then x = regs[rm+1] | ||
592 | else | ||
593 | local aregs = ctx.a32 and map_regs.D or ctx.aregs | ||
594 | local srm, srx = "", "" | ||
595 | if rm then srm = aregs[rm+1] | ||
596 | elseif not sc and ctx.x64 and not ctx.a32 then srm = "rip" end | ||
597 | ctx.a32 = false | ||
598 | if rx then | ||
599 | if rm then srm = srm.."+" end | ||
600 | srx = aregs[rx+1] | ||
601 | if sc > 0 then srx = srx.."*"..(2^sc) end | ||
602 | end | ||
603 | x = format("[%s%s%s]", srm, srx, sdisp) | ||
604 | end | ||
605 | if mode < 3 and | ||
606 | (not match(pat, "[aRrgp]") or match(pat, "t")) then -- Yuck. | ||
607 | x = map_sz2prefix[sz].." "..x | ||
608 | end | ||
609 | elseif p == "r" then x = regs[sp+1] | ||
610 | elseif p == "g" then x = map_segregs[sp+1] | ||
611 | elseif p == "p" then -- Suppress prefix. | ||
612 | elseif p == "f" then x = "st"..rm | ||
613 | elseif p == "x" then | ||
614 | if sp == 0 and ctx.lock and not ctx.x64 then | ||
615 | x = "CR8"; ctx.lock = false | ||
616 | else | ||
617 | x = "CR"..sp | ||
618 | end | ||
619 | elseif p == "y" then x = "DR"..sp | ||
620 | elseif p == "z" then x = "TR"..sp | ||
621 | elseif p == "t" then | ||
622 | else | ||
623 | error("bad pattern `"..pat.."'") | ||
624 | end | ||
625 | end | ||
626 | if x then operands = operands and operands..", "..x or x end | ||
627 | end | ||
628 | ctx.pos = pos | ||
629 | return putop(ctx, name, operands) | ||
630 | end | ||
631 | |||
632 | -- Forward declaration. | ||
633 | local map_act | ||
634 | |||
635 | -- Fetch and cache MRM byte. | ||
636 | local function getmrm(ctx) | ||
637 | local mrm = ctx.mrm | ||
638 | if not mrm then | ||
639 | local pos = ctx.pos | ||
640 | if pos > ctx.stop then return nil end | ||
641 | mrm = byte(ctx.code, pos, pos) | ||
642 | ctx.pos = pos+1 | ||
643 | ctx.mrm = mrm | ||
644 | end | ||
645 | return mrm | ||
646 | end | ||
647 | |||
648 | -- Dispatch to handler depending on pattern. | ||
649 | local function dispatch(ctx, opat, patgrp) | ||
650 | if not opat then return unknown(ctx) end | ||
651 | if match(opat, "%|") then -- MMX/SSE variants depending on prefix. | ||
652 | local p | ||
653 | if ctx.rep then | ||
654 | p = ctx.rep=="rep" and "%|([^%|]*)" or "%|[^%|]*%|[^%|]*%|([^%|]*)" | ||
655 | ctx.rep = false | ||
656 | elseif ctx.o16 then p = "%|[^%|]*%|([^%|]*)"; ctx.o16 = false | ||
657 | else p = "^[^%|]*" end | ||
658 | opat = match(opat, p) | ||
659 | if not opat then return unknown(ctx) end | ||
660 | -- ctx.rep = false; ctx.o16 = false | ||
661 | --XXX fails for 66 f2 0f 38 f1 06 crc32 eax,WORD PTR [esi] | ||
662 | --XXX remove in branches? | ||
663 | end | ||
664 | if match(opat, "%$") then -- reg$mem variants. | ||
665 | local mrm = getmrm(ctx); if not mrm then return incomplete(ctx) end | ||
666 | opat = match(opat, mrm >= 192 and "^[^%$]*" or "%$(.*)") | ||
667 | if opat == "" then return unknown(ctx) end | ||
668 | end | ||
669 | if opat == "" then return unknown(ctx) end | ||
670 | local name, pat = match(opat, "^([a-z0-9 ]*)(.*)") | ||
671 | if pat == "" and patgrp then pat = patgrp end | ||
672 | return map_act[sub(pat, 1, 1)](ctx, name, pat) | ||
673 | end | ||
674 | |||
675 | -- Get a pattern from an opcode map and dispatch to handler. | ||
676 | local function dispatchmap(ctx, opcmap) | ||
677 | local pos = ctx.pos | ||
678 | local opat = opcmap[byte(ctx.code, pos, pos)] | ||
679 | pos = pos + 1 | ||
680 | ctx.pos = pos | ||
681 | return dispatch(ctx, opat) | ||
682 | end | ||
683 | |||
684 | -- Map for action codes. The key is the first char after the name. | ||
685 | map_act = { | ||
686 | -- Simple opcodes without operands. | ||
687 | [""] = function(ctx, name, pat) | ||
688 | return putop(ctx, name) | ||
689 | end, | ||
690 | |||
691 | -- Operand size chars fall right through. | ||
692 | B = putpat, W = putpat, D = putpat, Q = putpat, | ||
693 | V = putpat, U = putpat, T = putpat, | ||
694 | M = putpat, X = putpat, P = putpat, | ||
695 | F = putpat, G = putpat, | ||
696 | |||
697 | -- Collect prefixes. | ||
698 | [":"] = function(ctx, name, pat) | ||
699 | ctx[pat == ":" and name or sub(pat, 2)] = name | ||
700 | if ctx.pos - ctx.start > 5 then return unknown(ctx) end -- Limit #prefixes. | ||
701 | end, | ||
702 | |||
703 | -- Chain to special handler specified by name. | ||
704 | ["*"] = function(ctx, name, pat) | ||
705 | return map_act[name](ctx, name, sub(pat, 2)) | ||
706 | end, | ||
707 | |||
708 | -- Use named subtable for opcode group. | ||
709 | ["!"] = function(ctx, name, pat) | ||
710 | local mrm = getmrm(ctx); if not mrm then return incomplete(ctx) end | ||
711 | return dispatch(ctx, map_opcgroup[name][((mrm-(mrm%8))/8)%8+1], sub(pat, 2)) | ||
712 | end, | ||
713 | |||
714 | -- o16,o32[,o64] variants. | ||
715 | sz = function(ctx, name, pat) | ||
716 | if ctx.o16 then ctx.o16 = false | ||
717 | else | ||
718 | pat = match(pat, ",(.*)") | ||
719 | if ctx.rexw then | ||
720 | local p = match(pat, ",(.*)") | ||
721 | if p then pat = p; ctx.rexw = false end | ||
722 | end | ||
723 | end | ||
724 | pat = match(pat, "^[^,]*") | ||
725 | return dispatch(ctx, pat) | ||
726 | end, | ||
727 | |||
728 | -- Two-byte opcode dispatch. | ||
729 | opc2 = function(ctx, name, pat) | ||
730 | return dispatchmap(ctx, map_opc2) | ||
731 | end, | ||
732 | |||
733 | -- Three-byte opcode dispatch. | ||
734 | opc3 = function(ctx, name, pat) | ||
735 | return dispatchmap(ctx, map_opc3[pat]) | ||
736 | end, | ||
737 | |||
738 | -- VMX/SVM dispatch. | ||
739 | vm = function(ctx, name, pat) | ||
740 | return dispatch(ctx, map_opcvm[ctx.mrm]) | ||
741 | end, | ||
742 | |||
743 | -- Floating point opcode dispatch. | ||
744 | fp = function(ctx, name, pat) | ||
745 | local mrm = getmrm(ctx); if not mrm then return incomplete(ctx) end | ||
746 | local rm = mrm%8 | ||
747 | local idx = pat*8 + ((mrm-rm)/8)%8 | ||
748 | if mrm >= 192 then idx = idx + 64 end | ||
749 | local opat = map_opcfp[idx] | ||
750 | if type(opat) == "table" then opat = opat[rm+1] end | ||
751 | return dispatch(ctx, opat) | ||
752 | end, | ||
753 | |||
754 | -- REX prefix. | ||
755 | rex = function(ctx, name, pat) | ||
756 | if ctx.rex then return unknown(ctx) end -- Only 1 REX prefix allowed. | ||
757 | for p in gmatch(pat, ".") do ctx["rex"..p] = true end | ||
758 | ctx.rex = true | ||
759 | end, | ||
760 | |||
761 | -- Special case for nop with REX prefix. | ||
762 | nop = function(ctx, name, pat) | ||
763 | return dispatch(ctx, ctx.rex and pat or "nop") | ||
764 | end, | ||
765 | } | ||
766 | |||
767 | ------------------------------------------------------------------------------ | ||
768 | |||
769 | -- Disassemble a block of code. | ||
770 | local function disass_block(ctx, ofs, len) | ||
771 | if not ofs then ofs = 0 end | ||
772 | local stop = len and ofs+len or #ctx.code | ||
773 | ofs = ofs + 1 | ||
774 | ctx.start = ofs | ||
775 | ctx.pos = ofs | ||
776 | ctx.stop = stop | ||
777 | ctx.imm = nil | ||
778 | ctx.mrm = false | ||
779 | clearprefixes(ctx) | ||
780 | while ctx.pos <= stop do dispatchmap(ctx, ctx.map1) end | ||
781 | if ctx.pos ~= ctx.start then incomplete(ctx) end | ||
782 | end | ||
783 | |||
784 | -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). | ||
785 | local function create_(code, addr, out) | ||
786 | local ctx = {} | ||
787 | ctx.code = code | ||
788 | ctx.addr = (addr or 0) - 1 | ||
789 | ctx.out = out or io.write | ||
790 | ctx.symtab = {} | ||
791 | ctx.disass = disass_block | ||
792 | ctx.hexdump = 16 | ||
793 | ctx.x64 = false | ||
794 | ctx.map1 = map_opc1_32 | ||
795 | ctx.aregs = map_regs.D | ||
796 | return ctx | ||
797 | end | ||
798 | |||
799 | local function create64_(code, addr, out) | ||
800 | local ctx = create_(code, addr, out) | ||
801 | ctx.x64 = true | ||
802 | ctx.map1 = map_opc1_64 | ||
803 | ctx.aregs = map_regs.Q | ||
804 | return ctx | ||
805 | end | ||
806 | |||
807 | -- Simple API: disassemble code (a string) at address and output via out. | ||
808 | local function disass_(code, addr, out) | ||
809 | create_(code, addr, out):disass() | ||
810 | end | ||
811 | |||
812 | local function disass64_(code, addr, out) | ||
813 | create64_(code, addr, out):disass() | ||
814 | end | ||
815 | |||
816 | |||
817 | -- Public module functions. | ||
818 | module(...) | ||
819 | |||
820 | create = create_ | ||
821 | create64 = create64_ | ||
822 | disass = disass_ | ||
823 | disass64 = disass64_ | ||
824 | |||
diff --git a/lib/dump.lua b/lib/dump.lua new file mode 100644 index 00000000..9fde87c1 --- /dev/null +++ b/lib/dump.lua | |||
@@ -0,0 +1,567 @@ | |||
1 | ---------------------------------------------------------------------------- | ||
2 | -- LuaJIT compiler dump module. | ||
3 | -- | ||
4 | -- Copyright (C) 2005-2009 Mike Pall. All rights reserved. | ||
5 | -- Released under the MIT/X license. See Copyright Notice in luajit.h | ||
6 | ---------------------------------------------------------------------------- | ||
7 | -- | ||
8 | -- This module can be used to debug the JIT compiler itself. It dumps the | ||
9 | -- code representations and structures used in various compiler stages. | ||
10 | -- | ||
11 | -- Example usage: | ||
12 | -- | ||
13 | -- luajit -jdump -e "local x=0; for i=1,1e6 do x=x+i end; print(x)" | ||
14 | -- luajit -jdump=im -e "for i=1,1000 do for j=1,1000 do end end" | less -R | ||
15 | -- luajit -jdump=is myapp.lua | less -R | ||
16 | -- luajit -jdump=-b myapp.lua | ||
17 | -- luajit -jdump=+aH,myapp.html myapp.lua | ||
18 | -- luajit -jdump=ixT,myapp.dump myapp.lua | ||
19 | -- | ||
20 | -- The first argument specifies the dump mode. The second argument gives | ||
21 | -- the output file name. Default output is to stdout, unless the environment | ||
22 | -- variable LUAJIT_DUMPFILE is set. The file is overwritten every time the | ||
23 | -- module is started. | ||
24 | -- | ||
25 | -- Different features can be turned on or off with the dump mode. If the | ||
26 | -- mode starts with a '+', the following features are added to the default | ||
27 | -- set of features; a '-' removes them. Otherwise the features are replaced. | ||
28 | -- | ||
29 | -- The following dump features are available (* marks the default): | ||
30 | -- | ||
31 | -- * t Print a line for each started, ended or aborted trace (see also -jv). | ||
32 | -- * b Dump the traced bytecode. | ||
33 | -- * i Dump the IR (intermediate representation). | ||
34 | -- r Augment the IR with register/stack slots. | ||
35 | -- s Dump the snapshot map. | ||
36 | -- * m Dump the generated machine code. | ||
37 | -- x Print each taken trace exit. | ||
38 | -- X Print each taken trace exit and the contents of all registers. | ||
39 | -- | ||
40 | -- The output format can be set with the following characters: | ||
41 | -- | ||
42 | -- T Plain text output. | ||
43 | -- A ANSI-colored text output | ||
44 | -- H Colorized HTML + CSS output. | ||
45 | -- | ||
46 | -- The default output format is plain text. It's set to ANSI-colored text | ||
47 | -- if the COLORTERM variable is set. Note: this is independent of any output | ||
48 | -- redirection, which is actually considered a feature. | ||
49 | -- | ||
50 | -- You probably want to use less -R to enjoy viewing ANSI-colored text from | ||
51 | -- a pipe or a file. Add this to your ~/.bashrc: export LESS="-R" | ||
52 | -- | ||
53 | ------------------------------------------------------------------------------ | ||
54 | |||
55 | -- Cache some library functions and objects. | ||
56 | local jit = require("jit") | ||
57 | assert(jit.version_num == 20000, "LuaJIT core/library version mismatch") | ||
58 | local jutil = require("jit.util") | ||
59 | local vmdef = require("jit.vmdef") | ||
60 | local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc | ||
61 | local traceinfo, traceir, tracek = jutil.traceinfo, jutil.traceir, jutil.tracek | ||
62 | local tracemc, traceexitstub = jutil.tracemc, jutil.traceexitstub | ||
63 | local tracesnap = jutil.tracesnap | ||
64 | local bit = require("bit") | ||
65 | local band, shl, shr = bit.band, bit.lshift, bit.rshift | ||
66 | local sub, gsub, format = string.sub, string.gsub, string.format | ||
67 | local byte, char, rep = string.byte, string.char, string.rep | ||
68 | local type, tostring = type, tostring | ||
69 | local stdout, stderr = io.stdout, io.stderr | ||
70 | |||
71 | -- Load other modules on-demand. | ||
72 | local bcline, discreate | ||
73 | |||
74 | -- Active flag, output file handle and dump mode. | ||
75 | local active, out, dumpmode | ||
76 | |||
77 | ------------------------------------------------------------------------------ | ||
78 | |||
79 | local symtab = {} | ||
80 | local nexitsym = 0 | ||
81 | |||
82 | -- Fill symbol table with trace exit addresses. | ||
83 | local function fillsymtab(nexit) | ||
84 | local t = symtab | ||
85 | if nexit > nexitsym then | ||
86 | for i=nexitsym,nexit-1 do t[traceexitstub(i)] = tostring(i) end | ||
87 | nexitsym = nexit | ||
88 | end | ||
89 | return t | ||
90 | end | ||
91 | |||
92 | local function dumpwrite(s) | ||
93 | out:write(s) | ||
94 | end | ||
95 | |||
96 | -- Disassemble machine code. | ||
97 | local function dump_mcode(tr) | ||
98 | local info = traceinfo(tr) | ||
99 | if not info then return end | ||
100 | local mcode, addr, loop = tracemc(tr) | ||
101 | if not mcode then return end | ||
102 | if not discreate then | ||
103 | discreate = require("jit.dis_"..jit.arch).create | ||
104 | end | ||
105 | out:write("---- TRACE ", tr, " mcode ", #mcode, "\n") | ||
106 | local ctx = discreate(mcode, addr, dumpwrite) | ||
107 | ctx.hexdump = 0 | ||
108 | ctx.symtab = fillsymtab(info.nexit) | ||
109 | if loop ~= 0 then | ||
110 | symtab[addr+loop] = "LOOP" | ||
111 | ctx:disass(0, loop) | ||
112 | out:write("->LOOP:\n") | ||
113 | ctx:disass(loop, #mcode-loop) | ||
114 | symtab[addr+loop] = nil | ||
115 | else | ||
116 | ctx:disass(0, #mcode) | ||
117 | end | ||
118 | end | ||
119 | |||
120 | ------------------------------------------------------------------------------ | ||
121 | |||
122 | local irtype_text = { | ||
123 | [0] = "nil", | ||
124 | "fal", | ||
125 | "tru", | ||
126 | "lud", | ||
127 | "str", | ||
128 | "ptr", | ||
129 | "thr", | ||
130 | "pro", | ||
131 | "fun", | ||
132 | "t09", | ||
133 | "tab", | ||
134 | "udt", | ||
135 | "num", | ||
136 | "int", | ||
137 | "i8 ", | ||
138 | "u8 ", | ||
139 | "i16", | ||
140 | "u16", | ||
141 | } | ||
142 | |||
143 | local colortype_ansi = { | ||
144 | [0] = "%s", | ||
145 | "%s", | ||
146 | "%s", | ||
147 | "%s", | ||
148 | "\027[32m%s\027[m", | ||
149 | "%s", | ||
150 | "\027[1m%s\027[m", | ||
151 | "%s", | ||
152 | "\027[1m%s\027[m", | ||
153 | "%s", | ||
154 | "\027[31m%s\027[m", | ||
155 | "\027[36m%s\027[m", | ||
156 | "\027[34m%s\027[m", | ||
157 | "\027[35m%s\027[m", | ||
158 | "\027[35m%s\027[m", | ||
159 | "\027[35m%s\027[m", | ||
160 | "\027[35m%s\027[m", | ||
161 | "\027[35m%s\027[m", | ||
162 | } | ||
163 | |||
164 | local function colorize_text(s, t) | ||
165 | return s | ||
166 | end | ||
167 | |||
168 | local function colorize_ansi(s, t) | ||
169 | return format(colortype_ansi[t], s) | ||
170 | end | ||
171 | |||
172 | local irtype_ansi = setmetatable({}, | ||
173 | { __index = function(tab, t) | ||
174 | local s = colorize_ansi(irtype_text[t], t); tab[t] = s; return s; end }) | ||
175 | |||
176 | local html_escape = { ["<"] = "<", [">"] = ">", ["&"] = "&", } | ||
177 | |||
178 | local function colorize_html(s, t) | ||
179 | s = gsub(s, "[<>&]", html_escape) | ||
180 | return format('<span class="irt_%s">%s</span>', irtype_text[t], s) | ||
181 | end | ||
182 | |||
183 | local irtype_html = setmetatable({}, | ||
184 | { __index = function(tab, t) | ||
185 | local s = colorize_html(irtype_text[t], t); tab[t] = s; return s; end }) | ||
186 | |||
187 | local header_html = [[ | ||
188 | <style type="text/css"> | ||
189 | background { background: #ffffff; color: #000000; } | ||
190 | pre.ljdump { | ||
191 | font-size: 10pt; | ||
192 | background: #f0f4ff; | ||
193 | color: #000000; | ||
194 | border: 1px solid #bfcfff; | ||
195 | padding: 0.5em; | ||
196 | margin-left: 2em; | ||
197 | margin-right: 2em; | ||
198 | } | ||
199 | span.irt_str { color: #00a000; } | ||
200 | span.irt_thr, span.irt_fun { color: #404040; font-weight: bold; } | ||
201 | span.irt_tab { color: #c00000; } | ||
202 | span.irt_udt { color: #00c0c0; } | ||
203 | span.irt_num { color: #0000c0; } | ||
204 | span.irt_int { color: #c000c0; } | ||
205 | </style> | ||
206 | ]] | ||
207 | |||
208 | local colorize, irtype | ||
209 | |||
210 | -- Lookup table to convert some literals into names. | ||
211 | local litname = { | ||
212 | ["SLOAD "] = { [0] = "", "I", "R", "RI", "P", "PI", "PR", "PRI", }, | ||
213 | ["XLOAD "] = { [0] = "", "unaligned", }, | ||
214 | ["TOINT "] = { [0] = "check", "index", "", }, | ||
215 | ["FLOAD "] = vmdef.irfield, | ||
216 | ["FREF "] = vmdef.irfield, | ||
217 | ["FPMATH"] = vmdef.irfpm, | ||
218 | } | ||
219 | |||
220 | local function ctlsub(c) | ||
221 | if c == "\n" then return "\\n" | ||
222 | elseif c == "\r" then return "\\r" | ||
223 | elseif c == "\t" then return "\\t" | ||
224 | elseif c == "\r" then return "\\r" | ||
225 | else return format("\\%03d", byte(c)) | ||
226 | end | ||
227 | end | ||
228 | |||
229 | local function formatk(tr, idx) | ||
230 | local k, t, slot = tracek(tr, idx) | ||
231 | local tn = type(k) | ||
232 | local s | ||
233 | if tn == "number" then | ||
234 | if k == 2^52+2^51 then | ||
235 | s = "bias" | ||
236 | else | ||
237 | s = format("%+.14g", k) | ||
238 | end | ||
239 | elseif tn == "string" then | ||
240 | s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub)) | ||
241 | elseif tn == "function" then | ||
242 | local fi = funcinfo(k) | ||
243 | if fi.ffid then | ||
244 | s = vmdef.ffnames[fi.ffid] | ||
245 | else | ||
246 | s = fi.loc | ||
247 | end | ||
248 | elseif tn == "table" then | ||
249 | s = format("{%p}", k) | ||
250 | elseif tn == "userdata" then | ||
251 | if t == 11 then | ||
252 | s = format("userdata:%p", k) | ||
253 | else | ||
254 | s = format("[%p]", k) | ||
255 | if s == "[0x00000000]" then s = "NULL" end | ||
256 | end | ||
257 | else | ||
258 | s = tostring(k) -- For primitives. | ||
259 | end | ||
260 | s = colorize(format("%-4s", s), t) | ||
261 | if slot then | ||
262 | s = format("%s @%d", s, slot) | ||
263 | end | ||
264 | return s | ||
265 | end | ||
266 | |||
267 | local function printsnap(tr, snap) | ||
268 | for i=1,#snap do | ||
269 | local ref = snap[i] | ||
270 | if not ref then | ||
271 | out:write("---- ") | ||
272 | elseif ref < 0 then | ||
273 | out:write(formatk(tr, ref), " ") | ||
274 | else | ||
275 | local m, ot, op1, op2 = traceir(tr, ref) | ||
276 | local t = band(ot, 15) | ||
277 | local sep = " " | ||
278 | if t == 8 then | ||
279 | local oidx = 6*shr(ot, 8) | ||
280 | local op = sub(vmdef.irnames, oidx+1, oidx+6) | ||
281 | if op == "FRAME " then | ||
282 | sep = "|" | ||
283 | end | ||
284 | end | ||
285 | out:write(colorize(format("%04d", ref), t), sep) | ||
286 | end | ||
287 | end | ||
288 | out:write("]\n") | ||
289 | end | ||
290 | |||
291 | -- Dump snapshots (not interleaved with IR). | ||
292 | local function dump_snap(tr) | ||
293 | out:write("---- TRACE ", tr, " snapshots\n") | ||
294 | for i=0,1000000000 do | ||
295 | local snap = tracesnap(tr, i) | ||
296 | if not snap then break end | ||
297 | out:write(format("#%-3d %04d [ ", i, snap[0])) | ||
298 | printsnap(tr, snap) | ||
299 | end | ||
300 | end | ||
301 | |||
302 | -- NYI: should really get the register map from the disassembler. | ||
303 | local reg_map = { | ||
304 | [0] = "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", | ||
305 | "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", | ||
306 | } | ||
307 | |||
308 | -- Return a register name or stack slot for a rid/sp location. | ||
309 | local function ridsp_name(ridsp) | ||
310 | local rid = band(ridsp, 0xff) | ||
311 | if ridsp > 255 then return format("[%x]", shr(ridsp, 8)*4) end | ||
312 | if rid < 128 then return reg_map[rid] end | ||
313 | return "" | ||
314 | end | ||
315 | |||
316 | -- Dump IR and interleaved snapshots. | ||
317 | local function dump_ir(tr, dumpsnap, dumpreg) | ||
318 | local info = traceinfo(tr) | ||
319 | if not info then return end | ||
320 | local nins = info.nins | ||
321 | out:write("---- TRACE ", tr, " IR\n") | ||
322 | local irnames = vmdef.irnames | ||
323 | local snapref = 65536 | ||
324 | local snap, snapno | ||
325 | if dumpsnap then | ||
326 | snap = tracesnap(tr, 0) | ||
327 | snapref = snap[0] | ||
328 | snapno = 0 | ||
329 | end | ||
330 | for ins=1,nins do | ||
331 | if ins >= snapref then | ||
332 | if dumpreg then | ||
333 | out:write(format(".... SNAP #%-3d [ ", snapno)) | ||
334 | else | ||
335 | out:write(format(".... SNAP #%-3d [ ", snapno)) | ||
336 | end | ||
337 | printsnap(tr, snap) | ||
338 | snapno = snapno + 1 | ||
339 | snap = tracesnap(tr, snapno) | ||
340 | snapref = snap and snap[0] or 65536 | ||
341 | end | ||
342 | local m, ot, op1, op2, ridsp = traceir(tr, ins) | ||
343 | local oidx, t = 6*shr(ot, 8), band(ot, 31) | ||
344 | local op = sub(irnames, oidx+1, oidx+6) | ||
345 | if op == "LOOP " then | ||
346 | if dumpreg then | ||
347 | out:write(format("%04d ------------ LOOP ------------\n", ins)) | ||
348 | else | ||
349 | out:write(format("%04d ------ LOOP ------------\n", ins)) | ||
350 | end | ||
351 | elseif op ~= "NOP " and (dumpreg or op ~= "RENAME") then | ||
352 | if dumpreg then | ||
353 | out:write(format("%04d %-5s ", ins, ridsp_name(ridsp))) | ||
354 | else | ||
355 | out:write(format("%04d ", ins)) | ||
356 | end | ||
357 | out:write(format("%s%s %s %s ", | ||
358 | band(ot, 64) == 0 and " " or ">", | ||
359 | band(ot, 128) == 0 and " " or "+", | ||
360 | irtype[t], op)) | ||
361 | local m1 = band(m, 3) | ||
362 | if m1 ~= 3 then -- op1 != IRMnone | ||
363 | if op1 < 0 then | ||
364 | out:write(formatk(tr, op1)) | ||
365 | else | ||
366 | out:write(format(m1 == 0 and "%04d" or "#%-3d", op1)) | ||
367 | end | ||
368 | local m2 = band(m, 3*4) | ||
369 | if m2 ~= 3*4 then -- op2 != IRMnone | ||
370 | if m2 == 1*4 then -- op2 == IRMlit | ||
371 | local litn = litname[op] | ||
372 | if litn and litn[op2] then | ||
373 | out:write(" ", litn[op2]) | ||
374 | else | ||
375 | out:write(format(" #%-3d", op2)) | ||
376 | end | ||
377 | elseif op2 < 0 then | ||
378 | out:write(" ", formatk(tr, op2)) | ||
379 | else | ||
380 | out:write(format(" %04d", op2)) | ||
381 | end | ||
382 | end | ||
383 | end | ||
384 | out:write("\n") | ||
385 | end | ||
386 | end | ||
387 | if snap then | ||
388 | if dumpreg then | ||
389 | out:write(format(".... SNAP #%-3d [ ", snapno)) | ||
390 | else | ||
391 | out:write(format(".... SNAP #%-3d [ ", snapno)) | ||
392 | end | ||
393 | printsnap(tr, snap) | ||
394 | end | ||
395 | end | ||
396 | |||
397 | ------------------------------------------------------------------------------ | ||
398 | |||
399 | local recprefix = "" | ||
400 | local recdepth = 0 | ||
401 | |||
402 | -- Format trace error message. | ||
403 | local function fmterr(err, info) | ||
404 | if type(err) == "number" then | ||
405 | if type(info) == "function" then | ||
406 | local fi = funcinfo(info) | ||
407 | if fi.ffid then | ||
408 | info = vmdef.ffnames[fi.ffid] | ||
409 | else | ||
410 | info = fi.loc | ||
411 | end | ||
412 | end | ||
413 | err = format(vmdef.traceerr[err], info) | ||
414 | end | ||
415 | return err | ||
416 | end | ||
417 | |||
418 | -- Dump trace states. | ||
419 | local function dump_trace(what, tr, func, pc, otr, oex) | ||
420 | if what == "stop" or (what == "abort" and dumpmode.a) then | ||
421 | if dumpmode.i then dump_ir(tr, dumpmode.s, dumpmode.r and what == "stop") | ||
422 | elseif dumpmode.s then dump_snap(tr) end | ||
423 | if dumpmode.m then dump_mcode(tr) end | ||
424 | end | ||
425 | if what == "start" then | ||
426 | if dumpmode.H then out:write('<pre class="ljdump">\n') end | ||
427 | out:write("---- TRACE ", tr, " ", what) | ||
428 | if otr then out:write(" ", otr, "/", oex) end | ||
429 | local fi = funcinfo(func, pc) | ||
430 | out:write(" ", fi.loc, "\n") | ||
431 | recprefix = "" | ||
432 | reclevel = 0 | ||
433 | elseif what == "stop" or what == "abort" then | ||
434 | out:write("---- TRACE ", tr, " ", what) | ||
435 | recprefix = nil | ||
436 | if what == "abort" then | ||
437 | local fi = funcinfo(func, pc) | ||
438 | out:write(" ", fi.loc, " -- ", fmterr(otr, oex), "\n") | ||
439 | else | ||
440 | local link = traceinfo(tr).link | ||
441 | if link == tr then | ||
442 | link = "loop" | ||
443 | elseif link == 0 then | ||
444 | link = "interpreter" | ||
445 | end | ||
446 | out:write(" -> ", link, "\n") | ||
447 | end | ||
448 | if dumpmode.H then out:write("</pre>\n\n") else out:write("\n") end | ||
449 | else | ||
450 | out:write("---- TRACE ", what, "\n\n") | ||
451 | end | ||
452 | out:flush() | ||
453 | end | ||
454 | |||
455 | -- Dump recorded bytecode. | ||
456 | local function dump_record(tr, func, pc, depth, callee) | ||
457 | if depth ~= recdepth then | ||
458 | recdepth = depth | ||
459 | recprefix = rep(" .", depth) | ||
460 | end | ||
461 | local line = bcline(func, pc, recprefix) | ||
462 | if dumpmode.H then line = gsub(line, "[<>&]", html_escape) end | ||
463 | if type(callee) == "function" then | ||
464 | local fi = funcinfo(callee) | ||
465 | if fi.ffid then | ||
466 | out:write(sub(line, 1, -2), " ; ", vmdef.ffnames[fi.ffid], "\n") | ||
467 | else | ||
468 | out:write(sub(line, 1, -2), " ; ", fi.loc, "\n") | ||
469 | end | ||
470 | else | ||
471 | out:write(line) | ||
472 | end | ||
473 | if band(funcbc(func, pc), 0xff) < 16 then -- Write JMP for cond. ORDER BC | ||
474 | out:write(bcline(func, pc+1, recprefix)) | ||
475 | end | ||
476 | end | ||
477 | |||
478 | ------------------------------------------------------------------------------ | ||
479 | |||
480 | -- Dump taken trace exits. | ||
481 | local function dump_texit(tr, ex, ngpr, nfpr, ...) | ||
482 | out:write("---- TRACE ", tr, " exit ", ex, "\n") | ||
483 | if dumpmode.X then | ||
484 | local regs = {...} | ||
485 | for i=1,ngpr do | ||
486 | out:write(format(" %08x", regs[i])) | ||
487 | if i % 8 == 0 then out:write("\n") end | ||
488 | end | ||
489 | for i=1,nfpr do | ||
490 | out:write(format(" %+17.14g", regs[ngpr+i])) | ||
491 | if i % 4 == 0 then out:write("\n") end | ||
492 | end | ||
493 | end | ||
494 | end | ||
495 | |||
496 | ------------------------------------------------------------------------------ | ||
497 | |||
498 | -- Detach dump handlers. | ||
499 | local function dumpoff() | ||
500 | if active then | ||
501 | active = false | ||
502 | jit.attach(dump_texit) | ||
503 | jit.attach(dump_record) | ||
504 | jit.attach(dump_trace) | ||
505 | if out and out ~= stdout and out ~= stderr then out:close() end | ||
506 | out = nil | ||
507 | end | ||
508 | end | ||
509 | |||
510 | -- Open the output file and attach dump handlers. | ||
511 | local function dumpon(opt, outfile) | ||
512 | if active then dumpoff() end | ||
513 | |||
514 | local colormode = os.getenv("COLORTERM") and "A" or "T" | ||
515 | if opt then | ||
516 | opt = gsub(opt, "[TAH]", function(mode) colormode = mode; return ""; end) | ||
517 | end | ||
518 | |||
519 | local m = { t=true, b=true, i=true, m=true, } | ||
520 | if opt and opt ~= "" then | ||
521 | local o = sub(opt, 1, 1) | ||
522 | if o ~= "+" and o ~= "-" then m = {} end | ||
523 | for i=1,#opt do m[sub(opt, i, i)] = (o ~= "-") end | ||
524 | end | ||
525 | dumpmode = m | ||
526 | |||
527 | if m.t or m.b or m.i or m.s or m.m then | ||
528 | jit.attach(dump_trace, "trace") | ||
529 | end | ||
530 | if m.b then | ||
531 | jit.attach(dump_record, "record") | ||
532 | if not bcline then bcline = require("jit.bc").line end | ||
533 | end | ||
534 | if m.x or m.X then | ||
535 | jit.attach(dump_texit, "texit") | ||
536 | end | ||
537 | |||
538 | if not outfile then outfile = os.getenv("LUAJIT_DUMPFILE") end | ||
539 | if outfile then | ||
540 | out = outfile == "-" and stdout or assert(io.open(outfile, "w")) | ||
541 | else | ||
542 | out = stdout | ||
543 | end | ||
544 | |||
545 | m[colormode] = true | ||
546 | if colormode == "A" then | ||
547 | colorize = colorize_ansi | ||
548 | irtype = irtype_ansi | ||
549 | elseif colormode == "H" then | ||
550 | colorize = colorize_html | ||
551 | irtype = irtype_html | ||
552 | out:write(header_html) | ||
553 | else | ||
554 | colorize = colorize_text | ||
555 | irtype = irtype_text | ||
556 | end | ||
557 | |||
558 | active = true | ||
559 | end | ||
560 | |||
561 | -- Public module functions. | ||
562 | module(...) | ||
563 | |||
564 | on = dumpon | ||
565 | off = dumpoff | ||
566 | start = dumpon -- For -j command line option. | ||
567 | |||
diff --git a/lib/v.lua b/lib/v.lua new file mode 100644 index 00000000..39fb8ed5 --- /dev/null +++ b/lib/v.lua | |||
@@ -0,0 +1,156 @@ | |||
1 | ---------------------------------------------------------------------------- | ||
2 | -- Verbose mode of the LuaJIT compiler. | ||
3 | -- | ||
4 | -- Copyright (C) 2005-2009 Mike Pall. All rights reserved. | ||
5 | -- Released under the MIT/X license. See Copyright Notice in luajit.h | ||
6 | ---------------------------------------------------------------------------- | ||
7 | -- | ||
8 | -- This module shows verbose information about the progress of the | ||
9 | -- JIT compiler. It prints one line for each generated trace. This module | ||
10 | -- is useful to see which code has been compiled or where the compiler | ||
11 | -- punts and falls back to the interpreter. | ||
12 | -- | ||
13 | -- Example usage: | ||
14 | -- | ||
15 | -- luajit -jv -e "for i=1,1000 do for j=1,1000 do end end" | ||
16 | -- luajit -jv=myapp.out myapp.lua | ||
17 | -- | ||
18 | -- Default output is to stderr. To redirect the output to a file, pass a | ||
19 | -- filename as an argument (use '-' for stdout) or set the environment | ||
20 | -- variable LUAJIT_VERBOSEFILE. The file is overwritten every time the | ||
21 | -- module is started. | ||
22 | -- | ||
23 | -- The output from the first example should look like this: | ||
24 | -- | ||
25 | -- [TRACE 1 (command line):1] | ||
26 | -- [TRACE 2 (1/3) (command line):1 -> 1] | ||
27 | -- | ||
28 | -- The first number in each line is the internal trace number. Next are | ||
29 | -- the file name ('(command line)') and the line number (':1') where the | ||
30 | -- trace has started. Side traces also show the parent trace number and | ||
31 | -- the exit number where they are attached to in parentheses ('(1/3)'). | ||
32 | -- An arrow at the end shows where the trace links to ('-> 1'), unless | ||
33 | -- it loops to itself. | ||
34 | -- | ||
35 | -- In this case the inner loop gets hot and is traced first, generating | ||
36 | -- a root trace. Then the last exit from the 1st trace gets hot, too, | ||
37 | -- and triggers generation of the 2nd trace. The side trace follows the | ||
38 | -- path along the outer loop and *around* the inner loop, back to its | ||
39 | -- start, and then links to the 1st trace. Yes, this may seem unusual, | ||
40 | -- if you know how traditional compilers work. Trace compilers are full | ||
41 | -- of surprises like this -- have fun! :-) | ||
42 | -- | ||
43 | -- Aborted traces are shown like this: | ||
44 | -- | ||
45 | -- [TRACE --- foo.lua:44 -- leaving loop in root trace at foo:lua:50] | ||
46 | -- | ||
47 | -- Don't worry -- trace aborts are quite common, even in programs which | ||
48 | -- can be fully compiled. The compiler may retry several times until it | ||
49 | -- finds a suitable trace. | ||
50 | -- | ||
51 | -- Of course this doesn't work with features that are not-yet-implemented | ||
52 | -- (NYI error messages). The VM simply falls back to the interpreter. This | ||
53 | -- may not matter at all if the particular trace is not very high up in | ||
54 | -- the CPU usage profile. Oh, and the interpreter is quite fast, too. | ||
55 | -- | ||
56 | -- Also check out the -jdump module, which prints all the gory details. | ||
57 | -- | ||
58 | ------------------------------------------------------------------------------ | ||
59 | |||
60 | -- Cache some library functions and objects. | ||
61 | local jit = require("jit") | ||
62 | assert(jit.version_num == 20000, "LuaJIT core/library version mismatch") | ||
63 | local jutil = require("jit.util") | ||
64 | local vmdef = require("jit.vmdef") | ||
65 | local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo | ||
66 | local type, format = type, string.format | ||
67 | local stdout, stderr = io.stdout, io.stderr | ||
68 | |||
69 | -- Active flag and output file handle. | ||
70 | local active, out | ||
71 | |||
72 | ------------------------------------------------------------------------------ | ||
73 | |||
74 | local startloc, startex | ||
75 | |||
76 | -- Format trace error message. | ||
77 | local function fmterr(err, info) | ||
78 | if type(err) == "number" then | ||
79 | if type(info) == "function" then | ||
80 | local fi = funcinfo(info) | ||
81 | if fi.ffid then | ||
82 | info = vmdef.ffnames[fi.ffid] | ||
83 | else | ||
84 | info = fi.loc | ||
85 | end | ||
86 | end | ||
87 | err = format(vmdef.traceerr[err], info) | ||
88 | end | ||
89 | return err | ||
90 | end | ||
91 | |||
92 | -- Dump trace states. | ||
93 | local function dump_trace(what, tr, func, pc, otr, oex) | ||
94 | if what == "start" then | ||
95 | startloc = funcinfo(func, pc).loc | ||
96 | startex = otr and "("..otr.."/"..oex..") " or "" | ||
97 | else | ||
98 | if what == "abort" then | ||
99 | local loc = funcinfo(func, pc).loc | ||
100 | if loc ~= startloc then | ||
101 | out:write(format("[TRACE --- %s%s -- %s at %s]\n", | ||
102 | startex, startloc, fmterr(otr, oex), loc)) | ||
103 | else | ||
104 | out:write(format("[TRACE --- %s%s -- %s]\n", | ||
105 | startex, startloc, fmterr(otr, oex))) | ||
106 | end | ||
107 | elseif what == "stop" then | ||
108 | local link = traceinfo(tr).link | ||
109 | if link == 0 then | ||
110 | out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n", | ||
111 | tr, startex, startloc)) | ||
112 | elseif link == tr then | ||
113 | out:write(format("[TRACE %3s %s%s]\n", tr, startex, startloc)) | ||
114 | else | ||
115 | out:write(format("[TRACE %3s %s%s -> %d]\n", | ||
116 | tr, startex, startloc, link)) | ||
117 | end | ||
118 | else | ||
119 | out:write(format("[TRACE %s]\n", what)) | ||
120 | end | ||
121 | out:flush() | ||
122 | end | ||
123 | end | ||
124 | |||
125 | ------------------------------------------------------------------------------ | ||
126 | |||
127 | -- Detach dump handlers. | ||
128 | local function dumpoff() | ||
129 | if active then | ||
130 | active = false | ||
131 | jit.attach(dump_trace) | ||
132 | if out and out ~= stdout and out ~= stderr then out:close() end | ||
133 | out = nil | ||
134 | end | ||
135 | end | ||
136 | |||
137 | -- Open the output file and attach dump handlers. | ||
138 | local function dumpon(outfile) | ||
139 | if active then dumpoff() end | ||
140 | if not outfile then outfile = os.getenv("LUAJIT_VERBOSEFILE") end | ||
141 | if outfile then | ||
142 | out = outfile == "-" and stdout or assert(io.open(outfile, "w")) | ||
143 | else | ||
144 | out = stderr | ||
145 | end | ||
146 | jit.attach(dump_trace, "trace") | ||
147 | active = true | ||
148 | end | ||
149 | |||
150 | -- Public module functions. | ||
151 | module(...) | ||
152 | |||
153 | on = dumpon | ||
154 | off = dumpoff | ||
155 | start = dumpon -- For -j command line option. | ||
156 | |||
diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 00000000..e9f998ce --- /dev/null +++ b/src/.gitignore | |||
@@ -0,0 +1,8 @@ | |||
1 | luajit | ||
2 | buildvm | ||
3 | buildvm_*.h | ||
4 | lj_ffdef.h | ||
5 | lj_libdef.h | ||
6 | lj_recdef.h | ||
7 | lj_folddef.h | ||
8 | lj_vm.s | ||
diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 00000000..bb1839d1 --- /dev/null +++ b/src/Makefile | |||
@@ -0,0 +1,326 @@ | |||
1 | ############################################################################## | ||
2 | # LuaJIT Makefile. Requires GNU Make. | ||
3 | # | ||
4 | # Suitable for POSIX platforms (Linux, *BSD, OSX etc.). | ||
5 | # Also works with MinGW and Cygwin on Windows. | ||
6 | # Please check msvcbuild.bat for building with MSVC on Windows. | ||
7 | # | ||
8 | # Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
9 | ############################################################################## | ||
10 | |||
11 | ############################################################################## | ||
12 | # Compiler options: change them as needed. This mainly affects the speed of | ||
13 | # the JIT compiler itself, not the speed of the JIT compiled code. | ||
14 | # Turn any of the optional settings on by removing the '#' in front of them. | ||
15 | # | ||
16 | # Note: LuaJIT can only be compiled for x86, and not for x64 (yet)! | ||
17 | # In the meantime, the x86 binary runs fine under a x64 OS. | ||
18 | # | ||
19 | # It's recommended to compile at least for i686. By default the assembler part | ||
20 | # of the interpreter makes use of CMOV/FCOMI*/FUCOMI* instructions, anyway. | ||
21 | CC= gcc -m32 -march=i686 | ||
22 | # Use this for GCC 4.2 or higher if you don't intend to distribute the | ||
23 | # binaries to a different machine: | ||
24 | #CC= gcc -m32 -march=native | ||
25 | # | ||
26 | # Since the assembler part does NOT maintain a frame pointer, it's pointless | ||
27 | # to slow down the C part by not omitting it. Debugging and tracebacks are | ||
28 | # not affected -- the assembler part has frame unwind information and GCC | ||
29 | # emits it with -g (see CCDEBUG below). | ||
30 | CCOPT= -O2 -fomit-frame-pointer | ||
31 | # Use this if you want to generate a smaller binary (but it's slower): | ||
32 | #CCOPT= -Os -fomit-frame-pointer | ||
33 | # Note: it's no longer recommended to use -O3 with GCC 4.x. | ||
34 | # The I-Cache bloat usually outweighs the benefits from aggressive inlining. | ||
35 | # | ||
36 | CCDEBUG= | ||
37 | # Uncomment the next line to generate debug information: | ||
38 | #CCDEBUG= -g | ||
39 | # | ||
40 | CCWARN= -Wall | ||
41 | # Uncomment the next line to enable more warnings: | ||
42 | #CCWARN+= -Wextra -Wdeclaration-after-statement -Wredundant-decls -Wshadow -Wpointer-arith | ||
43 | # | ||
44 | ############################################################################## | ||
45 | |||
46 | ############################################################################## | ||
47 | # Compile time definitions: change them as needed, but make sure you force | ||
48 | # a full recompile with "make clean", followed by "make". | ||
49 | # Note that most of these are NOT suitable for benchmarking or release mode! | ||
50 | XCFLAGS= | ||
51 | # | ||
52 | # Disable the use of CMOV and FCOMI*/FUCOMI* instructions in the interpreter. | ||
53 | # This is only necessary if you intend to run the code on REALLY ANCIENT CPUs | ||
54 | # (before Pentium Pro, or on the VIA C3). This generally slows down the | ||
55 | # interpreter. Don't bother if your OS wouldn't run on them, anyway. | ||
56 | #XCFLAGS+= -DLUAJIT_CPU_NOCMOV | ||
57 | # | ||
58 | # Disable the JIT compiler, i.e. turn LuaJIT into a pure interpreter: | ||
59 | #XCFLAGS+= -DLUAJIT_DISABLE_JIT | ||
60 | # | ||
61 | # Use the system provided memory allocator (realloc) instead of the | ||
62 | # bundled memory allocator. This is slower, but sometimes helpful for | ||
63 | # debugging. It's mandatory for Valgrind's memcheck tool, too. | ||
64 | #XCFLAGS+= -DLUAJIT_USE_SYSMALLOC | ||
65 | # | ||
66 | # This define is required to run LuaJIT under Valgrind. The Valgrind | ||
67 | # header files must be installed. You should enable debug information, too. | ||
68 | #XCFLAGS+= -DLUAJIT_USE_VALGRIND | ||
69 | # | ||
70 | # This is the client for the GDB JIT API. GDB 7.0 or higher is required | ||
71 | # to make use of it. See lj_gdbjit.c for details. Enabling this causes | ||
72 | # a non-negligible overhead, even when not running under GDB. | ||
73 | #XCFLAGS+= -DLUAJIT_USE_GDBJIT | ||
74 | # | ||
75 | # Turn on assertions for the Lua/C API to debug problems with lua_* calls. | ||
76 | # This is rather slow -- use only while developing C libraries/embeddings. | ||
77 | #XCFLAGS+= -DLUA_USE_APICHECK | ||
78 | # | ||
79 | # Turn on assertions for the whole LuaJIT VM. This significantly slows down | ||
80 | # everything. Use only if you suspect a problem with LuaJIT itself. | ||
81 | #XCFLAGS+= -DLUA_USE_ASSERT | ||
82 | # | ||
83 | ############################################################################## | ||
84 | # You probably don't need to change anything below this line. | ||
85 | ############################################################################## | ||
86 | |||
87 | CCOPTIONS= $(CCDEBUG) $(CCOPT) $(CCWARN) $(CFLAGS) $(XCFLAGS) | ||
88 | LDOPTIONS= $(CCDEBUG) $(LDFLAGS) | ||
89 | |||
90 | HOST_CC= $(CC) | ||
91 | HOST_RM= rm -f | ||
92 | HOST_XCFLAGS= | ||
93 | HOST_XLDFLAGS= | ||
94 | HOST_XLIBS= | ||
95 | |||
96 | TARGET_CC= $(CC) | ||
97 | TARGET_STRIP= strip | ||
98 | TARGET_XCFLAGS= -D_FILE_OFFSET_BITS=64 | ||
99 | TARGET_XLDFLAGS= | ||
100 | TARGET_XSHLDFLAGS= -shared | ||
101 | TARGET_XLIBS= | ||
102 | TARGET_ARCH= $(patsubst %,-DLUAJIT_TARGET=LUAJIT_ARCH_%,$(TARGET)) | ||
103 | TARGET_DISABLE= -U_FORTIFY_SOURCE | ||
104 | ifneq (,$(findstring stack-protector,$(shell $(CC) -dumpspecs))) | ||
105 | TARGET_DISABLE+= -fno-stack-protector | ||
106 | endif | ||
107 | |||
108 | ifneq (,$(findstring Windows,$(OS))) | ||
109 | TARGET_SYS= Windows | ||
110 | else | ||
111 | TARGET_SYS:= $(shell uname -s) | ||
112 | ifneq (,$(findstring CYGWIN,$(TARGET_SYS))) | ||
113 | TARGET_SYS= Windows | ||
114 | endif | ||
115 | endif | ||
116 | |||
117 | ifeq (Linux,$(TARGET_SYS)) | ||
118 | TARGET_XLIBS= -ldl | ||
119 | TARGET_XLDFLAGS= -Wl,-E | ||
120 | else | ||
121 | ifeq (Windows,$(TARGET_SYS)) | ||
122 | HOST_RM= del | ||
123 | TARGET_STRIP= strip --strip-unneeded | ||
124 | else | ||
125 | ifeq (Darwin,$(TARGET_SYS)) | ||
126 | TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup | ||
127 | TARGET_STRIP= strip -x | ||
128 | export MACOSX_DEPLOYMENT_TARGET=10.3 | ||
129 | else | ||
130 | TARGET_XLDFLAGS= -Wl,-E | ||
131 | endif | ||
132 | endif | ||
133 | endif | ||
134 | |||
135 | # NOTE: The LuaJIT distribution comes with a pre-generated buildvm_*.h. | ||
136 | # You DO NOT NEED an installed copy of (plain) Lua 5.1 to run DynASM unless | ||
137 | # you want to MODIFY the corresponding *.dasc file. You can also use LuaJIT | ||
138 | # itself (bootstrapped from the pre-generated file) to run DynASM of course. | ||
139 | DASM_LUA= lua | ||
140 | |||
141 | Q= @ | ||
142 | E= @echo | ||
143 | #Q= | ||
144 | #E= @: | ||
145 | |||
146 | ############################################################################## | ||
147 | |||
148 | TARGET_CFLAGS= $(CCOPTIONS) $(TARGET_DISABLE) $(TARGET_XCFLAGS) | ||
149 | TARGET_LDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) | ||
150 | TARGET_SHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) | ||
151 | TARGET_LIBS= -lm $(TARGET_XLIBS) | ||
152 | ifneq (,$(CCDEBUG)) | ||
153 | TARGET_STRIP= @: | ||
154 | endif | ||
155 | |||
156 | HOST_CFLAGS= $(CCOPTIONS) $(HOST_XCFLAGS) $(TARGET_ARCH) | ||
157 | HOST_LDFLAGS= $(LDOPTIONS) $(HOST_XLDFLAGS) | ||
158 | HOST_LIBS= $(HOST_XLIBS) | ||
159 | |||
160 | DASM_DIR= ../dynasm | ||
161 | DASM= $(DASM_LUA) $(DASM_DIR)/dynasm.lua | ||
162 | DASM_FLAGS= | ||
163 | DASM_DISTFLAGS= -LN | ||
164 | |||
165 | BUILDVM_O= buildvm.o buildvm_asm.o buildvm_peobj.o buildvm_lib.o buildvm_fold.o | ||
166 | BUILDVM_T= buildvm | ||
167 | |||
168 | HOST_O= $(BUILDVM_O) | ||
169 | HOST_T= $(BUILDVM_T) | ||
170 | |||
171 | LJVM_S= lj_vm.s | ||
172 | LJVM_O= lj_vm.o | ||
173 | LJVM_BOUT= $(LJVM_S) | ||
174 | LJVM_MODE= asm | ||
175 | |||
176 | LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \ | ||
177 | lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o | ||
178 | LJLIB_C= $(LJLIB_O:.o=.c) | ||
179 | |||
180 | LJCORE_O= lj_gc.o lj_err.o lj_ctype.o lj_bc.o lj_obj.o \ | ||
181 | lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o \ | ||
182 | lj_state.o lj_dispatch.o lj_vmevent.o lj_api.o \ | ||
183 | lj_lex.o lj_parse.o \ | ||
184 | lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ | ||
185 | lj_opt_dce.o lj_opt_loop.o \ | ||
186 | lj_mcode.o lj_snap.o lj_record.o lj_asm.o lj_trace.o lj_gdbjit.o \ | ||
187 | lj_lib.o lj_alloc.o lib_aux.o \ | ||
188 | $(LJLIB_O) lib_init.o | ||
189 | |||
190 | LJVMCORE_O= $(LJVM_O) $(LJCORE_O) | ||
191 | |||
192 | # NYI: Need complete support for building as a shared library on POSIX. | ||
193 | # This is currently *only* suitable for MinGW and Cygwin, see below. | ||
194 | LUAJIT_O= luajit.o | ||
195 | LUAJIT_SO= luajit.so | ||
196 | LUAJIT_T= luajit | ||
197 | |||
198 | LIB_VMDEF= ../lib/vmdef.lua | ||
199 | |||
200 | TARGET_DEP= $(LIB_VMDEF) | ||
201 | TARGET_O= $(LJVMCORE_O) $(LUAJIT_O) | ||
202 | TARGET_T= $(LUAJIT_T) | ||
203 | |||
204 | ALL_GEN= $(LJVM_S) lj_ffdef.h lj_libdef.h lj_recdef.h $(LIB_VMDEF) lj_folddef.h | ||
205 | ALL_DYNGEN= buildvm_x86.h | ||
206 | WIN_RM= *.obj *.lib *.exp *.dll *.exe *.manifest | ||
207 | ALL_RM= $(LUAJIT_T) $(LUAJIT_SO) $(HOST_T) $(ALL_GEN) *.o $(WIN_RM) | ||
208 | |||
209 | ifeq (Windows,$(TARGET_SYS)) | ||
210 | LJVM_BOUT= $(LJVM_O) | ||
211 | LJVM_MODE= peobj | ||
212 | LIB_VMDEF= ..\lib\vmdef.lua | ||
213 | # Imported symbols are bound to a specific DLL name under Windows. | ||
214 | LUAJIT_SO= lua51.dll | ||
215 | LUAJIT_T= luajit.exe | ||
216 | BUILDVM_T= buildvm.exe | ||
217 | # | ||
218 | # You can comment out the following two lines to build a static executable. | ||
219 | # But then you won't be able to dynamically load any C modules, because | ||
220 | # they bind to lua51.dll. | ||
221 | # | ||
222 | TARGET_XCFLAGS+= -DLUA_BUILD_AS_DLL | ||
223 | TARGET_O= $(LUAJIT_SO) $(LUAJIT_O) | ||
224 | endif | ||
225 | |||
226 | ############################################################################## | ||
227 | |||
228 | default: $(TARGET_T) | ||
229 | |||
230 | all: $(TARGET_T) | ||
231 | |||
232 | amalg: | ||
233 | @grep "^[+|]" ljamalg.c | ||
234 | $(MAKE) all "LJCORE_O=ljamalg.o" | ||
235 | |||
236 | MAKE_TARGETS= amalg | ||
237 | |||
238 | ############################################################################## | ||
239 | |||
240 | buildvm_x86.h: buildvm_x86.dasc | ||
241 | $(E) "DYNASM $@" | ||
242 | $(Q)$(DASM) $(DASM_FLAGS) -o $@ buildvm_x86.dasc | ||
243 | |||
244 | $(BUILDVM_T): $(BUILDVM_O) | ||
245 | $(E) "HOSTLINK $@" | ||
246 | $(Q)$(HOST_CC) $(HOST_LDFLAGS) -o $@ $(BUILDVM_O) $(HOST_LIBS) | ||
247 | |||
248 | $(LJVM_BOUT): $(BUILDVM_T) | ||
249 | $(E) "BUILDVM $@" | ||
250 | $(Q)./$(BUILDVM_T) -m $(LJVM_MODE) -o $@ | ||
251 | |||
252 | lj_ffdef.h: $(BUILDVM_T) $(LJLIB_C) | ||
253 | $(E) "BUILDVM $@" | ||
254 | $(Q)./$(BUILDVM_T) -m ffdef -o $@ $(LJLIB_C) | ||
255 | |||
256 | lj_libdef.h: $(BUILDVM_T) $(LJLIB_C) | ||
257 | $(E) "BUILDVM $@" | ||
258 | $(Q)./$(BUILDVM_T) -m libdef -o $@ $(LJLIB_C) | ||
259 | |||
260 | lj_recdef.h: $(BUILDVM_T) $(LJLIB_C) | ||
261 | $(E) "BUILDVM $@" | ||
262 | $(Q)./$(BUILDVM_T) -m recdef -o $@ $(LJLIB_C) | ||
263 | |||
264 | $(LIB_VMDEF): $(BUILDVM_T) $(LJLIB_C) | ||
265 | $(E) "BUILDVM $@" | ||
266 | $(Q)./$(BUILDVM_T) -m vmdef -o $@ $(LJLIB_C) | ||
267 | |||
268 | lj_folddef.h: $(BUILDVM_T) lj_opt_fold.c | ||
269 | $(E) "BUILDVM $@" | ||
270 | $(Q)./$(BUILDVM_T) -m folddef -o $@ lj_opt_fold.c | ||
271 | |||
272 | $(LUAJIT_SO): $(LJVMCORE_O) | ||
273 | $(E) "LINK $@" | ||
274 | $(Q)$(TARGET_CC) $(TARGET_SHLDFLAGS) -o $@ $(LJVMCORE_O) $(TARGET_LIBS) | ||
275 | $(Q)$(TARGET_STRIP) $@ | ||
276 | |||
277 | $(LUAJIT_T): $(TARGET_O) $(TARGET_DEP) | ||
278 | $(E) "LINK $@" | ||
279 | $(Q)$(TARGET_CC) $(TARGET_LDFLAGS) -o $@ $(TARGET_O) $(TARGET_LIBS) | ||
280 | $(Q)$(TARGET_STRIP) $@ | ||
281 | $(E) "OK Successfully built LuaJIT" | ||
282 | |||
283 | ############################################################################## | ||
284 | |||
285 | %.o: %.c | ||
286 | $(E) "CC $@" | ||
287 | $(Q)$(TARGET_CC) $(TARGET_CFLAGS) -c -o $@ $< | ||
288 | |||
289 | %.o: %.s | ||
290 | $(E) "ASM $@" | ||
291 | $(Q)$(TARGET_CC) $(TARGET_CFLAGS) -c -o $@ $< | ||
292 | |||
293 | $(HOST_O): %.o: %.c | ||
294 | $(E) "HOSTCC $@" | ||
295 | $(Q)$(HOST_CC) $(HOST_CFLAGS) -c -o $@ $< | ||
296 | |||
297 | include Makefile.dep | ||
298 | |||
299 | ############################################################################## | ||
300 | |||
301 | clean: | ||
302 | $(HOST_RM) $(ALL_RM) | ||
303 | |||
304 | cleaner: clean | ||
305 | $(HOST_RM) $(ALL_DYNGEN) | ||
306 | |||
307 | distclean: clean | ||
308 | $(E) "DYNASM $@" | ||
309 | $(Q)$(DASM) $(DASM_DISTFLAGS) -o buildvm_x86.h buildvm_x86.dasc | ||
310 | |||
311 | depend: | ||
312 | @test -f lj_ffdef.h || touch lj_ffdef.h | ||
313 | @test -f lj_libdef.h || touch lj_libdef.h | ||
314 | @test -f lj_recdef.h || touch lj_recdef.h | ||
315 | @test -f lj_folddef.h || touch lj_folddef.h | ||
316 | @test -f buildvm_x86.h || touch buildvm_x86.h | ||
317 | @$(HOST_CC) $(HOST_CFLAGS) -MM *.c | sed "s|$(DASM_DIR)|\$$(DASM_DIR)|g" >Makefile.dep | ||
318 | @test -s lj_ffdef.h || $(HOST_RM) lj_ffdef.h | ||
319 | @test -s lj_libdef.h || $(HOST_RM) lj_libdef.h | ||
320 | @test -s lj_recdef.h || $(HOST_RM) lj_recdef.h | ||
321 | @test -s lj_folddef.h || $(HOST_RM) lj_folddef.h | ||
322 | @test -s buildvm_x86.h || $(HOST_RM) buildvm_x86.h | ||
323 | |||
324 | .PHONY: default all $(MAKE_TARGETS) clean cleaner distclean depend | ||
325 | |||
326 | ############################################################################## | ||
diff --git a/src/Makefile.dep b/src/Makefile.dep new file mode 100644 index 00000000..b1cdd93b --- /dev/null +++ b/src/Makefile.dep | |||
@@ -0,0 +1,139 @@ | |||
1 | buildvm.o: buildvm.c lua.h luaconf.h luajit.h lj_obj.h lj_def.h lj_arch.h \ | ||
2 | lj_gc.h lj_bc.h lj_ir.h lj_frame.h lj_dispatch.h lj_jit.h lj_target.h \ | ||
3 | lj_target_x86.h buildvm.h $(DASM_DIR)/dasm_proto.h $(DASM_DIR)/dasm_x86.h \ | ||
4 | buildvm_x86.h lj_traceerr.h | ||
5 | buildvm_asm.o: buildvm_asm.c buildvm.h lj_def.h lua.h luaconf.h lj_arch.h \ | ||
6 | lj_bc.h | ||
7 | buildvm_fold.o: buildvm_fold.c lj_obj.h lua.h luaconf.h lj_def.h \ | ||
8 | lj_arch.h lj_ir.h buildvm.h | ||
9 | buildvm_lib.o: buildvm_lib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | ||
10 | lj_lib.h buildvm.h | ||
11 | buildvm_peobj.o: buildvm_peobj.c buildvm.h lj_def.h lua.h luaconf.h \ | ||
12 | lj_arch.h lj_bc.h | ||
13 | lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ | ||
14 | lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_alloc.h | ||
15 | lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ | ||
16 | lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h \ | ||
17 | lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_ctype.h lj_lib.h lj_libdef.h | ||
18 | lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ | ||
19 | lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_lib.h lj_libdef.h | ||
20 | lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ | ||
21 | lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h | ||
22 | lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h | ||
23 | lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ | ||
24 | lj_arch.h lj_err.h lj_errmsg.h lj_gc.h lj_ff.h lj_ffdef.h lj_lib.h \ | ||
25 | lj_libdef.h | ||
26 | lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \ | ||
27 | lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ir.h \ | ||
28 | lj_jit.h lj_iropt.h lj_dispatch.h lj_bc.h lj_vm.h lj_vmevent.h lj_lib.h \ | ||
29 | luajit.h lj_libdef.h | ||
30 | lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ | ||
31 | lj_def.h lj_arch.h lj_lib.h lj_libdef.h | ||
32 | lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ | ||
33 | lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h | ||
34 | lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ | ||
35 | lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h | ||
36 | lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ | ||
37 | lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_state.h \ | ||
38 | lj_ff.h lj_ffdef.h lj_ctype.h lj_lib.h lj_libdef.h | ||
39 | lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ | ||
40 | lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_lib.h \ | ||
41 | lj_libdef.h | ||
42 | lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h | ||
43 | lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | ||
44 | lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h \ | ||
45 | lj_state.h lj_frame.h lj_bc.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ | ||
46 | lj_traceerr.h lj_vm.h lj_lex.h lj_parse.h | ||
47 | lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | ||
48 | lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_mcode.h lj_trace.h \ | ||
49 | lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_asm.h lj_vm.h \ | ||
50 | lj_target.h lj_target_x86.h | ||
51 | lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h | ||
52 | lj_ctype.o: lj_ctype.c lj_ctype.h lj_def.h lua.h luaconf.h | ||
53 | lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | ||
54 | lj_err.h lj_errmsg.h lj_state.h lj_frame.h lj_bc.h lj_jit.h lj_ir.h \ | ||
55 | lj_trace.h lj_dispatch.h lj_traceerr.h lj_vm.h luajit.h | ||
56 | lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \ | ||
57 | lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_state.h lj_frame.h lj_bc.h \ | ||
58 | lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h | ||
59 | lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | ||
60 | lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ | ||
61 | lj_traceerr.h lj_vm.h | ||
62 | lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | ||
63 | lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h \ | ||
64 | lj_state.h lj_frame.h lj_bc.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ | ||
65 | lj_traceerr.h lj_vm.h | ||
66 | lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | ||
67 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_frame.h lj_bc.h lj_jit.h \ | ||
68 | lj_ir.h lj_dispatch.h | ||
69 | lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | ||
70 | lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h \ | ||
71 | lj_traceerr.h | ||
72 | lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | ||
73 | lj_err.h lj_errmsg.h lj_str.h lj_lex.h lj_parse.h lj_ctype.h | ||
74 | lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \ | ||
75 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_vm.h \ | ||
76 | lj_lib.h | ||
77 | lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | ||
78 | lj_gc.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h lj_dispatch.h lj_bc.h \ | ||
79 | lj_traceerr.h | ||
80 | lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | ||
81 | lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_bc.h lj_vm.h | ||
82 | lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h | ||
83 | lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | ||
84 | lj_ir.h lj_jit.h lj_iropt.h | ||
85 | lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | ||
86 | lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h \ | ||
87 | lj_traceerr.h lj_vm.h lj_folddef.h | ||
88 | lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | ||
89 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h \ | ||
90 | lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h | ||
91 | lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | ||
92 | lj_tab.h lj_ir.h lj_jit.h lj_iropt.h | ||
93 | lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ | ||
94 | lj_arch.h lj_str.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ | ||
95 | lj_dispatch.h lj_traceerr.h | ||
96 | lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | ||
97 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_state.h \ | ||
98 | lj_bc.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h | ||
99 | lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | ||
100 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_state.h lj_frame.h \ | ||
101 | lj_bc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ | ||
102 | lj_dispatch.h lj_traceerr.h lj_record.h lj_snap.h lj_asm.h lj_vm.h \ | ||
103 | lj_recdef.h | ||
104 | lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | ||
105 | lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ | ||
106 | lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h lj_target_x86.h | ||
107 | lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | ||
108 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_meta.h \ | ||
109 | lj_state.h lj_frame.h lj_bc.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ | ||
110 | lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h | ||
111 | lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | ||
112 | lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_ctype.h | ||
113 | lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | ||
114 | lj_err.h lj_errmsg.h lj_tab.h | ||
115 | lj_trace.o: lj_trace.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | ||
116 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_frame.h lj_bc.h lj_state.h \ | ||
117 | lj_ir.h lj_jit.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h \ | ||
118 | lj_traceerr.h lj_snap.h lj_gdbjit.h lj_record.h lj_asm.h lj_vm.h \ | ||
119 | lj_vmevent.h lj_target.h lj_target_x86.h | ||
120 | lj_udata.o: lj_udata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | ||
121 | lj_gc.h lj_udata.h | ||
122 | lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | ||
123 | lj_str.h lj_tab.h lj_state.h lj_dispatch.h lj_bc.h lj_jit.h lj_ir.h \ | ||
124 | lj_vm.h lj_vmevent.h | ||
125 | ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \ | ||
126 | lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h \ | ||
127 | lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_trace.h lj_jit.h \ | ||
128 | lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_err.c lj_ctype.c \ | ||
129 | lj_ctype.h lj_bc.c lj_obj.c lj_str.c lj_tab.c lj_func.c lj_udata.c \ | ||
130 | lj_meta.c lj_state.c lj_lex.h lj_alloc.h lj_dispatch.c luajit.h \ | ||
131 | lj_vmevent.c lj_vmevent.h lj_api.c lj_parse.h lj_lex.c lj_parse.c \ | ||
132 | lj_lib.c lj_lib.h lj_ir.c lj_iropt.h lj_opt_mem.c lj_opt_fold.c \ | ||
133 | lj_folddef.h lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c lj_snap.h \ | ||
134 | lj_mcode.c lj_mcode.h lj_snap.c lj_target.h lj_target_x86.h lj_record.c \ | ||
135 | lj_ff.h lj_ffdef.h lj_record.h lj_asm.h lj_recdef.h lj_asm.c lj_trace.c \ | ||
136 | lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c lib_base.c lualib.h \ | ||
137 | lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c lib_os.c \ | ||
138 | lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_init.c | ||
139 | luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h | ||
diff --git a/src/buildvm.c b/src/buildvm.c new file mode 100644 index 00000000..b3738db4 --- /dev/null +++ b/src/buildvm.c | |||
@@ -0,0 +1,438 @@ | |||
1 | /* | ||
2 | ** LuaJIT VM builder. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** | ||
5 | ** This is a tool to build the hand-tuned assembler code required for | ||
6 | ** LuaJIT's bytecode interpreter. It supports a variety of output formats | ||
7 | ** to feed different toolchains (see usage() below). | ||
8 | ** | ||
9 | ** This tool is not particularly optimized because it's only used while | ||
10 | ** _building_ LuaJIT. There's no point in distributing or installing it. | ||
11 | ** Only the object code generated by this tool is linked into LuaJIT. | ||
12 | ** | ||
13 | ** Caveat: some memory is not free'd, error handling is lazy. | ||
14 | ** It's a one-shot tool -- any effort fixing this would be wasted. | ||
15 | */ | ||
16 | |||
17 | #include "lua.h" | ||
18 | #include "luajit.h" | ||
19 | |||
20 | #ifdef LUA_USE_WIN | ||
21 | #include <fcntl.h> | ||
22 | #include <io.h> | ||
23 | #endif | ||
24 | |||
25 | #include "lj_obj.h" | ||
26 | #include "lj_gc.h" | ||
27 | #include "lj_bc.h" | ||
28 | #include "lj_ir.h" | ||
29 | #include "lj_frame.h" | ||
30 | #include "lj_dispatch.h" | ||
31 | #include "lj_target.h" | ||
32 | |||
33 | #include "buildvm.h" | ||
34 | |||
35 | /* ------------------------------------------------------------------------ */ | ||
36 | |||
37 | /* DynASM glue definitions. */ | ||
38 | #define Dst ctx | ||
39 | #define Dst_DECL BuildCtx *ctx | ||
40 | #define Dst_REF (ctx->D) | ||
41 | |||
42 | #include "../dynasm/dasm_proto.h" | ||
43 | |||
44 | /* Glue macros for DynASM. */ | ||
45 | #define DASM_M_GROW(ctx, t, p, sz, need) \ | ||
46 | do { \ | ||
47 | size_t _sz = (sz), _need = (need); \ | ||
48 | if (_sz < _need) { \ | ||
49 | if (_sz < 16) _sz = 16; \ | ||
50 | while (_sz < _need) _sz += _sz; \ | ||
51 | (p) = (t *)realloc((p), _sz); \ | ||
52 | if ((p) == NULL) exit(1); \ | ||
53 | (sz) = _sz; \ | ||
54 | } \ | ||
55 | } while(0) | ||
56 | |||
57 | #define DASM_M_FREE(ctx, p, sz) free(p) | ||
58 | |||
59 | static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type); | ||
60 | |||
61 | #define DASM_EXTERN(ctx, addr, idx, type) \ | ||
62 | collect_reloc(ctx, addr, idx, type) | ||
63 | |||
64 | /* ------------------------------------------------------------------------ */ | ||
65 | |||
66 | /* Avoid trouble if cross-compiling for an x86 target. Speed doesn't matter. */ | ||
67 | #define DASM_ALIGNED_WRITES 1 | ||
68 | |||
69 | /* Embed architecture-specific DynASM encoder and backend. */ | ||
70 | #if LJ_TARGET_X86 | ||
71 | #include "../dynasm/dasm_x86.h" | ||
72 | #include "buildvm_x86.h" | ||
73 | #else | ||
74 | #error "No support for this architecture (yet)" | ||
75 | #endif | ||
76 | |||
77 | /* ------------------------------------------------------------------------ */ | ||
78 | |||
79 | void owrite(BuildCtx *ctx, const void *ptr, size_t sz) | ||
80 | { | ||
81 | if (fwrite(ptr, 1, sz, ctx->fp) != sz) { | ||
82 | fprintf(stderr, "Error: cannot write to output file: %s\n", | ||
83 | strerror(errno)); | ||
84 | exit(1); | ||
85 | } | ||
86 | } | ||
87 | |||
88 | /* ------------------------------------------------------------------------ */ | ||
89 | |||
90 | /* Emit code as raw bytes. Only used for DynASM debugging. */ | ||
91 | static void emit_raw(BuildCtx *ctx) | ||
92 | { | ||
93 | owrite(ctx, ctx->code, ctx->codesz); | ||
94 | } | ||
95 | |||
96 | /* -- Build machine code -------------------------------------------------- */ | ||
97 | |||
98 | /* Collect external relocations. */ | ||
99 | static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type) | ||
100 | { | ||
101 | if (ctx->nreloc >= BUILD_MAX_RELOC) { | ||
102 | fprintf(stderr, "Error: too many relocations, increase BUILD_MAX_RELOC.\n"); | ||
103 | exit(1); | ||
104 | } | ||
105 | ctx->reloc[ctx->nreloc].ofs = (int32_t)(addr - ctx->code); | ||
106 | ctx->reloc[ctx->nreloc].sym = idx; | ||
107 | ctx->reloc[ctx->nreloc].type = type; | ||
108 | ctx->nreloc++; | ||
109 | return 0; /* Encode symbol offset of 0. */ | ||
110 | } | ||
111 | |||
112 | /* Naive insertion sort. Performance doesn't matter here. */ | ||
113 | static void perm_insert(int *perm, int32_t *ofs, int i) | ||
114 | { | ||
115 | perm[i] = i; | ||
116 | while (i > 0) { | ||
117 | int a = perm[i-1]; | ||
118 | int b = perm[i]; | ||
119 | if (ofs[a] <= ofs[b]) break; | ||
120 | perm[i] = a; | ||
121 | perm[i-1] = b; | ||
122 | i--; | ||
123 | } | ||
124 | } | ||
125 | |||
126 | /* Build the machine code. */ | ||
127 | static int build_code(BuildCtx *ctx) | ||
128 | { | ||
129 | int status; | ||
130 | int i, j; | ||
131 | |||
132 | /* Initialize DynASM structures. */ | ||
133 | ctx->nglob = GLOB__MAX; | ||
134 | ctx->glob = (void **)malloc(ctx->nglob*sizeof(void *)); | ||
135 | memset(ctx->glob, 0, ctx->nglob*sizeof(void *)); | ||
136 | ctx->nreloc = 0; | ||
137 | |||
138 | ctx->extnames = extnames; | ||
139 | ctx->globnames = globnames; | ||
140 | |||
141 | ctx->dasm_ident = DASM_IDENT; | ||
142 | ctx->dasm_arch = DASM_ARCH; | ||
143 | |||
144 | dasm_init(Dst, DASM_MAXSECTION); | ||
145 | dasm_setupglobal(Dst, ctx->glob, ctx->nglob); | ||
146 | dasm_setup(Dst, build_actionlist); | ||
147 | |||
148 | /* Call arch-specific backend to emit the code. */ | ||
149 | ctx->npc = build_backend(ctx); | ||
150 | |||
151 | /* Finalize the code. */ | ||
152 | (void)dasm_checkstep(Dst, DASM_SECTION_CODE); | ||
153 | if ((status = dasm_link(Dst, &ctx->codesz))) return status; | ||
154 | ctx->code = (uint8_t *)malloc(ctx->codesz); | ||
155 | if ((status = dasm_encode(Dst, (void *)ctx->code))) return status; | ||
156 | |||
157 | /* Allocate the symbol offset and permutation tables. */ | ||
158 | ctx->nsym = ctx->npc + ctx->nglob; | ||
159 | ctx->perm = (int *)malloc((ctx->nsym+1)*sizeof(int *)); | ||
160 | ctx->sym_ofs = (int32_t *)malloc((ctx->nsym+1)*sizeof(int32_t)); | ||
161 | |||
162 | /* Collect the opcodes (PC labels). */ | ||
163 | for (i = 0; i < ctx->npc; i++) { | ||
164 | int32_t n = dasm_getpclabel(Dst, i); | ||
165 | if (n < 0) return 0x22000000|i; | ||
166 | ctx->sym_ofs[i] = n; | ||
167 | perm_insert(ctx->perm, ctx->sym_ofs, i); | ||
168 | } | ||
169 | |||
170 | /* Collect the globals (named labels). */ | ||
171 | for (j = 0; j < ctx->nglob; j++, i++) { | ||
172 | const char *gl = globnames[j]; | ||
173 | int len = (int)strlen(gl); | ||
174 | if (!ctx->glob[j]) { | ||
175 | fprintf(stderr, "Error: undefined global %s\n", gl); | ||
176 | exit(2); | ||
177 | } | ||
178 | if (len >= 2 && gl[len-2] == '_' && gl[len-1] == 'Z') | ||
179 | ctx->sym_ofs[i] = -1; /* Skip the _Z symbols. */ | ||
180 | else | ||
181 | ctx->sym_ofs[i] = (int32_t)((uint8_t *)(ctx->glob[j]) - ctx->code); | ||
182 | perm_insert(ctx->perm, ctx->sym_ofs, i); | ||
183 | } | ||
184 | |||
185 | /* Close the address range. */ | ||
186 | ctx->sym_ofs[i] = (int32_t)ctx->codesz; | ||
187 | perm_insert(ctx->perm, ctx->sym_ofs, i); | ||
188 | |||
189 | dasm_free(Dst); | ||
190 | |||
191 | return 0; | ||
192 | } | ||
193 | |||
194 | /* -- Generate VM enums --------------------------------------------------- */ | ||
195 | |||
196 | const char *const bc_names[] = { | ||
197 | #define BCNAME(name, ma, mb, mc, mt) #name, | ||
198 | BCDEF(BCNAME) | ||
199 | #undef BCNAME | ||
200 | NULL | ||
201 | }; | ||
202 | |||
203 | const char *const ir_names[] = { | ||
204 | #define IRNAME(name, m, m1, m2) #name, | ||
205 | IRDEF(IRNAME) | ||
206 | #undef IRNAME | ||
207 | NULL | ||
208 | }; | ||
209 | |||
210 | const char *const irfpm_names[] = { | ||
211 | #define FPMNAME(name) #name, | ||
212 | IRFPMDEF(FPMNAME) | ||
213 | #undef FPMNAME | ||
214 | NULL | ||
215 | }; | ||
216 | |||
217 | const char *const irfield_names[] = { | ||
218 | #define FLNAME(name, type, field) #name, | ||
219 | IRFLDEF(FLNAME) | ||
220 | #undef FLNAME | ||
221 | NULL | ||
222 | }; | ||
223 | |||
224 | static const char *const trace_errors[] = { | ||
225 | #define TREDEF(name, msg) msg, | ||
226 | #include "lj_traceerr.h" | ||
227 | NULL | ||
228 | }; | ||
229 | |||
230 | static const char *lower(char *buf, const char *s) | ||
231 | { | ||
232 | char *p = buf; | ||
233 | while (*s) { | ||
234 | *p++ = (*s >= 'A' && *s <= 'Z') ? *s+0x20 : *s; | ||
235 | s++; | ||
236 | } | ||
237 | *p = '\0'; | ||
238 | return buf; | ||
239 | } | ||
240 | |||
241 | /* Emit VM definitions as Lua code for debug modules. */ | ||
242 | static void emit_vmdef(BuildCtx *ctx) | ||
243 | { | ||
244 | char buf[80]; | ||
245 | int i; | ||
246 | fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n"); | ||
247 | fprintf(ctx->fp, "module(...)\n\n"); | ||
248 | |||
249 | fprintf(ctx->fp, "bcnames = \""); | ||
250 | for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]); | ||
251 | fprintf(ctx->fp, "\"\n\n"); | ||
252 | |||
253 | fprintf(ctx->fp, "irnames = \""); | ||
254 | for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]); | ||
255 | fprintf(ctx->fp, "\"\n\n"); | ||
256 | |||
257 | fprintf(ctx->fp, "irfpm = { [0]="); | ||
258 | for (i = 0; irfpm_names[i]; i++) | ||
259 | fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i])); | ||
260 | fprintf(ctx->fp, "}\n\n"); | ||
261 | |||
262 | fprintf(ctx->fp, "irfield = { [0]="); | ||
263 | for (i = 0; irfield_names[i]; i++) { | ||
264 | char *p; | ||
265 | lower(buf, irfield_names[i]); | ||
266 | p = strchr(buf, '_'); | ||
267 | if (p) *p = '.'; | ||
268 | fprintf(ctx->fp, "\"%s\", ", buf); | ||
269 | } | ||
270 | fprintf(ctx->fp, "}\n\n"); | ||
271 | |||
272 | fprintf(ctx->fp, "traceerr = {\n[0]="); | ||
273 | for (i = 0; trace_errors[i]; i++) | ||
274 | fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]); | ||
275 | fprintf(ctx->fp, "}\n\n"); | ||
276 | } | ||
277 | |||
278 | /* -- Argument parsing ---------------------------------------------------- */ | ||
279 | |||
280 | /* Build mode names. */ | ||
281 | static const char *const modenames[] = { | ||
282 | #define BUILDNAME(name) #name, | ||
283 | BUILDDEF(BUILDNAME) | ||
284 | #undef BUILDNAME | ||
285 | NULL | ||
286 | }; | ||
287 | |||
288 | /* Print usage information and exit. */ | ||
289 | static void usage(void) | ||
290 | { | ||
291 | int i; | ||
292 | fprintf(stderr, LUAJIT_VERSION " VM builder.\n"); | ||
293 | fprintf(stderr, LUAJIT_COPYRIGHT ", " LUAJIT_URL "\n"); | ||
294 | fprintf(stderr, "Target architecture: " LJ_ARCH_NAME "\n\n"); | ||
295 | fprintf(stderr, "Usage: buildvm -m mode [-o outfile] [infiles...]\n\n"); | ||
296 | fprintf(stderr, "Available modes:\n"); | ||
297 | for (i = 0; i < BUILD__MAX; i++) | ||
298 | fprintf(stderr, " %s\n", modenames[i]); | ||
299 | exit(1); | ||
300 | } | ||
301 | |||
302 | /* Parse the output mode name. */ | ||
303 | static BuildMode parsemode(const char *mode) | ||
304 | { | ||
305 | int i; | ||
306 | for (i = 0; modenames[i]; i++) | ||
307 | if (!strcmp(mode, modenames[i])) | ||
308 | return (BuildMode)i; | ||
309 | usage(); | ||
310 | return (BuildMode)-1; | ||
311 | } | ||
312 | |||
313 | /* Parse arguments. */ | ||
314 | static void parseargs(BuildCtx *ctx, char **argv) | ||
315 | { | ||
316 | const char *a; | ||
317 | int i; | ||
318 | ctx->mode = (BuildMode)-1; | ||
319 | ctx->outname = "-"; | ||
320 | for (i = 1; (a = argv[i]) != NULL; i++) { | ||
321 | if (a[0] != '-') | ||
322 | break; | ||
323 | switch (a[1]) { | ||
324 | case '-': | ||
325 | if (a[2]) goto err; | ||
326 | i++; | ||
327 | goto ok; | ||
328 | case '\0': | ||
329 | goto ok; | ||
330 | case 'm': | ||
331 | i++; | ||
332 | if (a[2] || argv[i] == NULL) goto err; | ||
333 | ctx->mode = parsemode(argv[i]); | ||
334 | break; | ||
335 | case 'o': | ||
336 | i++; | ||
337 | if (a[2] || argv[i] == NULL) goto err; | ||
338 | ctx->outname = argv[i]; | ||
339 | break; | ||
340 | default: err: | ||
341 | usage(); | ||
342 | break; | ||
343 | } | ||
344 | } | ||
345 | ok: | ||
346 | ctx->args = argv+i; | ||
347 | if (ctx->mode == (BuildMode)-1) goto err; | ||
348 | } | ||
349 | |||
350 | int main(int argc, char **argv) | ||
351 | { | ||
352 | BuildCtx ctx_; | ||
353 | BuildCtx *ctx = &ctx_; | ||
354 | int status, binmode; | ||
355 | |||
356 | UNUSED(argc); | ||
357 | parseargs(ctx, argv); | ||
358 | |||
359 | if ((status = build_code(ctx))) { | ||
360 | fprintf(stderr,"Error: DASM error %08x\n", status); | ||
361 | return 1; | ||
362 | } | ||
363 | |||
364 | switch (ctx->mode) { | ||
365 | #if LJ_TARGET_X86ORX64 | ||
366 | case BUILD_peobj: | ||
367 | #endif | ||
368 | case BUILD_raw: | ||
369 | binmode = 1; | ||
370 | break; | ||
371 | default: | ||
372 | binmode = 0; | ||
373 | break; | ||
374 | } | ||
375 | |||
376 | if (ctx->outname[0] == '-' && ctx->outname[1] == '\0') { | ||
377 | ctx->fp = stdout; | ||
378 | #ifdef LUA_USE_WIN | ||
379 | if (binmode) | ||
380 | _setmode(_fileno(stdout), _O_BINARY); /* Yuck. */ | ||
381 | #endif | ||
382 | } else if (!(ctx->fp = fopen(ctx->outname, binmode ? "wb" : "w"))) { | ||
383 | fprintf(stderr, "Error: cannot open output file '%s': %s\n", | ||
384 | ctx->outname, strerror(errno)); | ||
385 | exit(1); | ||
386 | } | ||
387 | |||
388 | switch (ctx->mode) { | ||
389 | case BUILD_asm: | ||
390 | #if defined(__ELF__) | ||
391 | ctx->mode = BUILD_elfasm; | ||
392 | #elif defined(__MACH__) | ||
393 | ctx->mode = BUILD_machasm; | ||
394 | #else | ||
395 | fprintf(stderr,"Error: auto-guessing the system assembler failed\n"); | ||
396 | return 1; | ||
397 | #endif | ||
398 | /* fallthrough */ | ||
399 | case BUILD_elfasm: | ||
400 | case BUILD_coffasm: | ||
401 | case BUILD_machasm: | ||
402 | emit_asm(ctx); | ||
403 | emit_asm_debug(ctx); | ||
404 | break; | ||
405 | #if LJ_TARGET_X86ORX64 | ||
406 | case BUILD_peobj: | ||
407 | emit_peobj(ctx); | ||
408 | break; | ||
409 | #endif | ||
410 | case BUILD_raw: | ||
411 | emit_raw(ctx); | ||
412 | break; | ||
413 | case BUILD_vmdef: | ||
414 | emit_vmdef(ctx); | ||
415 | /* fallthrough */ | ||
416 | case BUILD_ffdef: | ||
417 | case BUILD_libdef: | ||
418 | case BUILD_recdef: | ||
419 | emit_lib(ctx); | ||
420 | break; | ||
421 | case BUILD_folddef: | ||
422 | emit_fold(ctx); | ||
423 | break; | ||
424 | default: | ||
425 | break; | ||
426 | } | ||
427 | |||
428 | fflush(ctx->fp); | ||
429 | if (ferror(ctx->fp)) { | ||
430 | fprintf(stderr, "Error: cannot write to output file: %s\n", | ||
431 | strerror(errno)); | ||
432 | exit(1); | ||
433 | } | ||
434 | fclose(ctx->fp); | ||
435 | |||
436 | return 0; | ||
437 | } | ||
438 | |||
diff --git a/src/buildvm.h b/src/buildvm.h new file mode 100644 index 00000000..e55527fd --- /dev/null +++ b/src/buildvm.h | |||
@@ -0,0 +1,106 @@ | |||
1 | /* | ||
2 | ** LuaJIT VM builder. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _BUILDVM_H | ||
7 | #define _BUILDVM_H | ||
8 | |||
9 | #include <sys/types.h> | ||
10 | #include <stdio.h> | ||
11 | #include <stdlib.h> | ||
12 | #include <string.h> | ||
13 | #include <errno.h> | ||
14 | |||
15 | #include "lj_def.h" | ||
16 | #include "lj_arch.h" | ||
17 | |||
18 | /* Hardcoded limits. Increase as needed. */ | ||
19 | #define BUILD_MAX_RELOC 100 /* Max. number of relocations. */ | ||
20 | #define BUILD_MAX_FOLD 4096 /* Max. number of fold rules. */ | ||
21 | |||
22 | /* Prefix for scanned library definitions. */ | ||
23 | #define LIBDEF_PREFIX "LJLIB_" | ||
24 | |||
25 | /* Prefix for scanned fold definitions. */ | ||
26 | #define FOLDDEF_PREFIX "LJFOLD" | ||
27 | |||
28 | /* Prefixes for generated labels. */ | ||
29 | #define LABEL_PREFIX "lj_" | ||
30 | #define LABEL_PREFIX_BC LABEL_PREFIX "BC_" | ||
31 | #define LABEL_PREFIX_FF LABEL_PREFIX "ff_" | ||
32 | #define LABEL_PREFIX_CF LABEL_PREFIX "cf_" | ||
33 | #define LABEL_PREFIX_FFH LABEL_PREFIX "ffh_" | ||
34 | #define LABEL_PREFIX_LIBCF LABEL_PREFIX "lib_cf_" | ||
35 | #define LABEL_PREFIX_LIBINIT LABEL_PREFIX "lib_init_" | ||
36 | |||
37 | /* Extra labels. */ | ||
38 | #define LABEL_ASM_BEGIN LABEL_PREFIX "vm_asm_begin" | ||
39 | #define LABEL_OP_OFS LABEL_PREFIX "vm_op_ofs" | ||
40 | |||
41 | /* Forward declaration. */ | ||
42 | struct dasm_State; | ||
43 | |||
44 | /* Build modes. */ | ||
45 | #if LJ_TARGET_X86ORX64 | ||
46 | #define BUILDDEFX(_) _(peobj) | ||
47 | #else | ||
48 | #define BUILDDEFX(_) | ||
49 | #endif | ||
50 | |||
51 | #define BUILDDEF(_) \ | ||
52 | _(asm) _(elfasm) _(coffasm) _(machasm) BUILDDEFX(_) _(raw) \ | ||
53 | _(ffdef) _(libdef) _(recdef) _(vmdef) \ | ||
54 | _(folddef) | ||
55 | |||
56 | typedef enum { | ||
57 | #define BUILDENUM(name) BUILD_##name, | ||
58 | BUILDDEF(BUILDENUM) | ||
59 | #undef BUILDENUM | ||
60 | BUILD__MAX | ||
61 | } BuildMode; | ||
62 | |||
63 | /* Code relocation. */ | ||
64 | typedef struct BuildReloc { | ||
65 | int32_t ofs; | ||
66 | int sym; | ||
67 | int type; | ||
68 | } BuildReloc; | ||
69 | |||
70 | /* Build context structure. */ | ||
71 | typedef struct BuildCtx { | ||
72 | /* DynASM state pointer. Should be first member. */ | ||
73 | struct dasm_State *D; | ||
74 | /* Parsed command line. */ | ||
75 | BuildMode mode; | ||
76 | FILE *fp; | ||
77 | const char *outname; | ||
78 | char **args; | ||
79 | /* Code and symbols generated by DynASM. */ | ||
80 | uint8_t *code; | ||
81 | size_t codesz; | ||
82 | int npc, nglob, nsym, nreloc; | ||
83 | void **glob; | ||
84 | int *perm; | ||
85 | int32_t *sym_ofs; | ||
86 | /* Strings generated by DynASM. */ | ||
87 | const char *const *extnames; | ||
88 | const char *const *globnames; | ||
89 | const char *dasm_ident; | ||
90 | const char *dasm_arch; | ||
91 | /* Relocations. */ | ||
92 | BuildReloc reloc[BUILD_MAX_RELOC]; | ||
93 | } BuildCtx; | ||
94 | |||
95 | extern void owrite(BuildCtx *ctx, const void *ptr, size_t sz); | ||
96 | extern void emit_asm(BuildCtx *ctx); | ||
97 | extern void emit_peobj(BuildCtx *ctx); | ||
98 | extern void emit_lib(BuildCtx *ctx); | ||
99 | extern void emit_fold(BuildCtx *ctx); | ||
100 | |||
101 | extern const char *const bc_names[]; | ||
102 | extern const char *const ir_names[]; | ||
103 | extern const char *const irfpm_names[]; | ||
104 | extern const char *const irfield_names[]; | ||
105 | |||
106 | #endif | ||
diff --git a/src/buildvm_asm.c b/src/buildvm_asm.c new file mode 100644 index 00000000..e6972bd5 --- /dev/null +++ b/src/buildvm_asm.c | |||
@@ -0,0 +1,220 @@ | |||
1 | /* | ||
2 | ** LuaJIT VM builder: Assembler source code emitter. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #include "buildvm.h" | ||
7 | #include "lj_bc.h" | ||
8 | |||
9 | /* ------------------------------------------------------------------------ */ | ||
10 | |||
11 | /* Emit bytes piecewise as assembler text. */ | ||
12 | static void emit_asm_bytes(BuildCtx *ctx, uint8_t *p, int n) | ||
13 | { | ||
14 | int i; | ||
15 | for (i = 0; i < n; i++) { | ||
16 | if ((i & 15) == 0) | ||
17 | fprintf(ctx->fp, "\t.byte %d", p[i]); | ||
18 | else | ||
19 | fprintf(ctx->fp, ",%d", p[i]); | ||
20 | if ((i & 15) == 15) putc('\n', ctx->fp); | ||
21 | } | ||
22 | if ((n & 15) != 0) putc('\n', ctx->fp); | ||
23 | } | ||
24 | |||
25 | /* Emit relocation */ | ||
26 | static void emit_asm_reloc(BuildCtx *ctx, BuildReloc *r) | ||
27 | { | ||
28 | const char *sym = ctx->extnames[r->sym]; | ||
29 | switch (ctx->mode) { | ||
30 | case BUILD_elfasm: | ||
31 | if (r->type) | ||
32 | fprintf(ctx->fp, "\t.long %s-.-4\n", sym); | ||
33 | else | ||
34 | fprintf(ctx->fp, "\t.long %s\n", sym); | ||
35 | break; | ||
36 | case BUILD_coffasm: | ||
37 | fprintf(ctx->fp, "\t.def _%s; .scl 3; .type 32; .endef\n", sym); | ||
38 | if (r->type) | ||
39 | fprintf(ctx->fp, "\t.long _%s-.-4\n", sym); | ||
40 | else | ||
41 | fprintf(ctx->fp, "\t.long _%s\n", sym); | ||
42 | break; | ||
43 | default: /* BUILD_machasm for relative relocations handled below. */ | ||
44 | fprintf(ctx->fp, "\t.long _%s\n", sym); | ||
45 | break; | ||
46 | } | ||
47 | } | ||
48 | |||
49 | static const char *const jccnames[] = { | ||
50 | "jo", "jno", "jb", "jnb", "jz", "jnz", "jbe", "ja", | ||
51 | "js", "jns", "jpe", "jpo", "jl", "jge", "jle", "jg" | ||
52 | }; | ||
53 | |||
54 | /* Emit relocation for the incredibly stupid OSX assembler. */ | ||
55 | static void emit_asm_reloc_mach(BuildCtx *ctx, uint8_t *cp, int n, | ||
56 | const char *sym) | ||
57 | { | ||
58 | const char *opname = NULL; | ||
59 | if (--n < 0) goto err; | ||
60 | if (cp[n] == 0xe8) { | ||
61 | opname = "call"; | ||
62 | } else if (cp[n] == 0xe9) { | ||
63 | opname = "jmp"; | ||
64 | } else if (cp[n] >= 0x80 && cp[n] <= 0x8f && n > 0 && cp[n-1] == 0x0f) { | ||
65 | opname = jccnames[cp[n]-0x80]; | ||
66 | n--; | ||
67 | } else { | ||
68 | err: | ||
69 | fprintf(stderr, "Error: unsupported opcode for %s symbol relocation.\n", | ||
70 | sym); | ||
71 | exit(1); | ||
72 | } | ||
73 | emit_asm_bytes(ctx, cp, n); | ||
74 | if (!strncmp(sym, LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) | ||
75 | fprintf(ctx->fp, "\t%s _%s\n", opname, sym); | ||
76 | else | ||
77 | fprintf(ctx->fp, "\t%s _" LABEL_PREFIX "wrapper_%s\n", opname, sym); | ||
78 | } | ||
79 | |||
80 | /* Emit an assembler label. */ | ||
81 | static void emit_asm_label(BuildCtx *ctx, const char *name, int size, int isfunc) | ||
82 | { | ||
83 | switch (ctx->mode) { | ||
84 | case BUILD_elfasm: | ||
85 | fprintf(ctx->fp, | ||
86 | "\n\t.globl %s\n" | ||
87 | "\t.hidden %s\n" | ||
88 | "\t.type %s, @%s\n" | ||
89 | "\t.size %s, %d\n" | ||
90 | "%s:\n", | ||
91 | name, name, name, isfunc ? "function" : "object", name, size, name); | ||
92 | break; | ||
93 | case BUILD_coffasm: | ||
94 | fprintf(ctx->fp, "\n\t.globl _%s\n", name); | ||
95 | if (isfunc) | ||
96 | fprintf(ctx->fp, "\t.def _%s; .scl 3; .type 32; .endef\n", name); | ||
97 | fprintf(ctx->fp, "_%s:\n", name); | ||
98 | break; | ||
99 | case BUILD_machasm: | ||
100 | fprintf(ctx->fp, | ||
101 | "\n\t.private_extern _%s\n" | ||
102 | "_%s:\n", name, name); | ||
103 | break; | ||
104 | default: | ||
105 | break; | ||
106 | } | ||
107 | } | ||
108 | |||
109 | /* Emit alignment. */ | ||
110 | static void emit_asm_align(BuildCtx *ctx, int bits) | ||
111 | { | ||
112 | switch (ctx->mode) { | ||
113 | case BUILD_elfasm: | ||
114 | case BUILD_coffasm: | ||
115 | fprintf(ctx->fp, "\t.p2align %d\n", bits); | ||
116 | break; | ||
117 | case BUILD_machasm: | ||
118 | fprintf(ctx->fp, "\t.align %d\n", bits); | ||
119 | break; | ||
120 | default: | ||
121 | break; | ||
122 | } | ||
123 | } | ||
124 | |||
125 | /* ------------------------------------------------------------------------ */ | ||
126 | |||
127 | /* Emit assembler source code. */ | ||
128 | void emit_asm(BuildCtx *ctx) | ||
129 | { | ||
130 | char name[80]; | ||
131 | int32_t prev; | ||
132 | int i, pi, rel; | ||
133 | |||
134 | fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch); | ||
135 | fprintf(ctx->fp, "\t.text\n"); | ||
136 | emit_asm_align(ctx, 4); | ||
137 | |||
138 | emit_asm_label(ctx, LABEL_ASM_BEGIN, 0, 1); | ||
139 | if (ctx->mode == BUILD_elfasm) | ||
140 | fprintf(ctx->fp, ".Lbegin:\n"); | ||
141 | |||
142 | i = 0; | ||
143 | do { | ||
144 | pi = ctx->perm[i++]; | ||
145 | prev = ctx->sym_ofs[pi]; | ||
146 | } while (prev < 0); /* Skip the _Z symbols. */ | ||
147 | |||
148 | for (rel = 0; i <= ctx->nsym; i++) { | ||
149 | int ni = ctx->perm[i]; | ||
150 | int32_t next = ctx->sym_ofs[ni]; | ||
151 | int size = (int)(next - prev); | ||
152 | int32_t stop = next; | ||
153 | if (pi >= ctx->npc) { | ||
154 | sprintf(name, LABEL_PREFIX "%s", ctx->globnames[pi-ctx->npc]); | ||
155 | emit_asm_label(ctx, name, size, 1); | ||
156 | #if LJ_HASJIT | ||
157 | } else { | ||
158 | #else | ||
159 | } else if (!(pi == BC_JFORI || pi == BC_JFORL || pi == BC_JITERL || | ||
160 | pi == BC_JLOOP || pi == BC_IFORL || pi == BC_IITERL || | ||
161 | pi == BC_ILOOP)) { | ||
162 | #endif | ||
163 | sprintf(name, LABEL_PREFIX_BC "%s", bc_names[pi]); | ||
164 | emit_asm_label(ctx, name, size, 1); | ||
165 | } | ||
166 | while (rel < ctx->nreloc && ctx->reloc[rel].ofs < stop) { | ||
167 | int n = ctx->reloc[rel].ofs - prev; | ||
168 | if (ctx->mode == BUILD_machasm && ctx->reloc[rel].type != 0) { | ||
169 | emit_asm_reloc_mach(ctx, ctx->code+prev, n, | ||
170 | ctx->extnames[ctx->reloc[rel].sym]); | ||
171 | } else { | ||
172 | emit_asm_bytes(ctx, ctx->code+prev, n); | ||
173 | emit_asm_reloc(ctx, &ctx->reloc[rel]); | ||
174 | } | ||
175 | prev += n+4; | ||
176 | rel++; | ||
177 | } | ||
178 | emit_asm_bytes(ctx, ctx->code+prev, stop-prev); | ||
179 | prev = next; | ||
180 | pi = ni; | ||
181 | } | ||
182 | |||
183 | switch (ctx->mode) { | ||
184 | case BUILD_elfasm: | ||
185 | fprintf(ctx->fp, "\n\t.section .rodata\n"); | ||
186 | break; | ||
187 | case BUILD_coffasm: | ||
188 | fprintf(ctx->fp, "\n\t.section .rdata,\"dr\"\n"); | ||
189 | break; | ||
190 | case BUILD_machasm: | ||
191 | fprintf(ctx->fp, "\n\t.const\n"); | ||
192 | break; | ||
193 | default: | ||
194 | break; | ||
195 | } | ||
196 | emit_asm_align(ctx, 5); | ||
197 | |||
198 | emit_asm_label(ctx, LABEL_OP_OFS, 2*ctx->npc, 0); | ||
199 | for (i = 0; i < ctx->npc; i++) | ||
200 | fprintf(ctx->fp, "\t.short %d\n", ctx->sym_ofs[i]); | ||
201 | |||
202 | fprintf(ctx->fp, "\n"); | ||
203 | switch (ctx->mode) { | ||
204 | case BUILD_elfasm: | ||
205 | fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\",@progbits\n"); | ||
206 | /* fallthrough */ | ||
207 | case BUILD_coffasm: | ||
208 | fprintf(ctx->fp, "\t.ident \"%s\"\n", ctx->dasm_ident); | ||
209 | break; | ||
210 | case BUILD_machasm: | ||
211 | fprintf(ctx->fp, | ||
212 | "\t.cstring\n" | ||
213 | "\t.ascii \"%s\\0\"\n", ctx->dasm_ident); | ||
214 | break; | ||
215 | default: | ||
216 | break; | ||
217 | } | ||
218 | fprintf(ctx->fp, "\n"); | ||
219 | } | ||
220 | |||
diff --git a/src/buildvm_fold.c b/src/buildvm_fold.c new file mode 100644 index 00000000..5f065643 --- /dev/null +++ b/src/buildvm_fold.c | |||
@@ -0,0 +1,206 @@ | |||
1 | /* | ||
2 | ** LuaJIT VM builder: IR folding hash table generator. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #include "lj_obj.h" | ||
7 | #include "lj_ir.h" | ||
8 | |||
9 | #include "buildvm.h" | ||
10 | |||
11 | /* Context for the folding hash table generator. */ | ||
12 | static int lineno; | ||
13 | static int funcidx; | ||
14 | static uint32_t foldkeys[BUILD_MAX_FOLD]; | ||
15 | static uint32_t nkeys; | ||
16 | |||
17 | /* Try to fill the hash table with keys using the hash parameters. */ | ||
18 | static int tryhash(uint32_t *htab, uint32_t sz, uint32_t r, int dorol) | ||
19 | { | ||
20 | uint32_t i; | ||
21 | if (dorol && ((r & 31) == 0 || (r>>5) == 0)) | ||
22 | return 0; /* Avoid zero rotates. */ | ||
23 | memset(htab, 0xff, (sz+1)*sizeof(uint32_t)); | ||
24 | for (i = 0; i < nkeys; i++) { | ||
25 | uint32_t key = foldkeys[i]; | ||
26 | uint32_t k = key & 0xffffff; | ||
27 | uint32_t h = (dorol ? lj_rol(lj_rol(k, r>>5) - k, r&31) : | ||
28 | (((k << (r>>5)) - k) << (r&31))) % sz; | ||
29 | if (htab[h] != 0xffffffff) { /* Collision on primary slot. */ | ||
30 | if (htab[h+1] != 0xffffffff) { /* Collision on secondary slot. */ | ||
31 | /* Try to move the colliding key, if possible. */ | ||
32 | if (h < sz-1 && htab[h+2] == 0xffffffff) { | ||
33 | uint32_t k2 = htab[h+1] & 0xffffff; | ||
34 | uint32_t h2 = (dorol ? lj_rol(lj_rol(k2, r>>5) - k2, r&31) : | ||
35 | (((k2 << (r>>5)) - k2) << (r&31))) % sz; | ||
36 | if (h2 != h+1) return 0; /* Cannot resolve collision. */ | ||
37 | htab[h+2] = htab[h+1]; /* Move colliding key to secondary slot. */ | ||
38 | } else { | ||
39 | return 0; /* Collision. */ | ||
40 | } | ||
41 | } | ||
42 | htab[h+1] = key; | ||
43 | } else { | ||
44 | htab[h] = key; | ||
45 | } | ||
46 | } | ||
47 | return 1; /* Success, all keys could be stored. */ | ||
48 | } | ||
49 | |||
50 | /* Print the generated hash table. */ | ||
51 | static void printhash(BuildCtx *ctx, uint32_t *htab, uint32_t sz) | ||
52 | { | ||
53 | uint32_t i; | ||
54 | fprintf(ctx->fp, "static const uint32_t fold_hash[%d] = {\n0x%08x", | ||
55 | sz+1, htab[0]); | ||
56 | for (i = 1; i < sz+1; i++) | ||
57 | fprintf(ctx->fp, ",\n0x%08x", htab[i]); | ||
58 | fprintf(ctx->fp, "\n};\n\n"); | ||
59 | } | ||
60 | |||
61 | /* Exhaustive search for the shortest semi-perfect hash table. */ | ||
62 | static void makehash(BuildCtx *ctx) | ||
63 | { | ||
64 | uint32_t htab[BUILD_MAX_FOLD*2+1]; | ||
65 | uint32_t sz, r; | ||
66 | /* Search for the smallest hash table with an odd size. */ | ||
67 | for (sz = (nkeys|1); sz < BUILD_MAX_FOLD*2; sz += 2) { | ||
68 | /* First try all shift hash combinations. */ | ||
69 | for (r = 0; r < 32*32; r++) { | ||
70 | if (tryhash(htab, sz, r, 0)) { | ||
71 | printhash(ctx, htab, sz); | ||
72 | fprintf(ctx->fp, | ||
73 | "#define fold_hashkey(k)\t(((((k)<<%u)-(k))<<%u)%%%u)\n\n", | ||
74 | r>>5, r&31, sz); | ||
75 | return; | ||
76 | } | ||
77 | } | ||
78 | /* Then try all rotate hash combinations. */ | ||
79 | for (r = 0; r < 32*32; r++) { | ||
80 | if (tryhash(htab, sz, r, 1)) { | ||
81 | printhash(ctx, htab, sz); | ||
82 | fprintf(ctx->fp, | ||
83 | "#define fold_hashkey(k)\t(lj_rol(lj_rol((k),%u)-(k),%u)%%%u)\n\n", | ||
84 | r>>5, r&31, sz); | ||
85 | return; | ||
86 | } | ||
87 | } | ||
88 | } | ||
89 | fprintf(stderr, "Error: search for perfect hash failed\n"); | ||
90 | exit(1); | ||
91 | } | ||
92 | |||
93 | /* Parse one token of a fold rule. */ | ||
94 | static uint32_t nexttoken(char **pp, int allowlit, int allowany) | ||
95 | { | ||
96 | char *p = *pp; | ||
97 | if (p) { | ||
98 | uint32_t i; | ||
99 | char *q = strchr(p, ' '); | ||
100 | if (q) *q++ = '\0'; | ||
101 | *pp = q; | ||
102 | if (allowlit && !strncmp(p, "IRFPM_", 6)) { | ||
103 | for (i = 0; irfpm_names[i]; i++) | ||
104 | if (!strcmp(irfpm_names[i], p+6)) | ||
105 | return i; | ||
106 | } else if (allowlit && !strncmp(p, "IRFL_", 5)) { | ||
107 | for (i = 0; irfield_names[i]; i++) | ||
108 | if (!strcmp(irfield_names[i], p+5)) | ||
109 | return i; | ||
110 | } else if (allowany && !strcmp("any", p)) { | ||
111 | return 0xff; | ||
112 | } else { | ||
113 | for (i = 0; ir_names[i]; i++) | ||
114 | if (!strcmp(ir_names[i], p)) | ||
115 | return i; | ||
116 | } | ||
117 | fprintf(stderr, "Error: bad fold definition token \"%s\" at line %d\n", p, lineno); | ||
118 | exit(1); | ||
119 | } | ||
120 | return 0; | ||
121 | } | ||
122 | |||
123 | /* Parse a fold rule. */ | ||
124 | static void foldrule(char *p) | ||
125 | { | ||
126 | uint32_t op = nexttoken(&p, 0, 0); | ||
127 | uint32_t left = nexttoken(&p, 0, 1); | ||
128 | uint32_t right = nexttoken(&p, 1, 1); | ||
129 | uint32_t key = (funcidx << 24) | (op << 16) | (left << 8) | right; | ||
130 | uint32_t i; | ||
131 | if (nkeys >= BUILD_MAX_FOLD) { | ||
132 | fprintf(stderr, "Error: too many fold rules, increase BUILD_MAX_FOLD.\n"); | ||
133 | exit(1); | ||
134 | } | ||
135 | /* Simple insertion sort to detect duplicates. */ | ||
136 | for (i = nkeys; i > 0; i--) { | ||
137 | if ((foldkeys[i-1]&0xffffff) < (key & 0xffffff)) | ||
138 | break; | ||
139 | if ((foldkeys[i-1]&0xffffff) == (key & 0xffffff)) { | ||
140 | fprintf(stderr, "Error: duplicate fold definition at line %d\n", lineno); | ||
141 | exit(1); | ||
142 | } | ||
143 | foldkeys[i] = foldkeys[i-1]; | ||
144 | } | ||
145 | foldkeys[i] = key; | ||
146 | nkeys++; | ||
147 | } | ||
148 | |||
149 | /* Emit C source code for IR folding hash table. */ | ||
150 | void emit_fold(BuildCtx *ctx) | ||
151 | { | ||
152 | char buf[256]; /* We don't care about analyzing lines longer than that. */ | ||
153 | const char *fname = ctx->args[0]; | ||
154 | FILE *fp; | ||
155 | |||
156 | if (fname == NULL) { | ||
157 | fprintf(stderr, "Error: missing input filename\n"); | ||
158 | exit(1); | ||
159 | } | ||
160 | |||
161 | if (fname[0] == '-' && fname[1] == '\0') { | ||
162 | fp = stdin; | ||
163 | } else { | ||
164 | fp = fopen(fname, "r"); | ||
165 | if (!fp) { | ||
166 | fprintf(stderr, "Error: cannot open input file '%s': %s\n", | ||
167 | fname, strerror(errno)); | ||
168 | exit(1); | ||
169 | } | ||
170 | } | ||
171 | |||
172 | fprintf(ctx->fp, "/* This is a generated file. DO NOT EDIT! */\n\n"); | ||
173 | fprintf(ctx->fp, "static const FoldFunc fold_func[] = {\n"); | ||
174 | |||
175 | lineno = 0; | ||
176 | funcidx = 0; | ||
177 | nkeys = 0; | ||
178 | while (fgets(buf, sizeof(buf), fp) != NULL) { | ||
179 | lineno++; | ||
180 | /* The prefix must be at the start of a line, otherwise it's ignored. */ | ||
181 | if (!strncmp(buf, FOLDDEF_PREFIX, sizeof(FOLDDEF_PREFIX)-1)) { | ||
182 | char *p = buf+sizeof(FOLDDEF_PREFIX)-1; | ||
183 | char *q = strchr(p, ')'); | ||
184 | if (p[0] == '(' && q) { | ||
185 | p++; | ||
186 | *q = '\0'; | ||
187 | foldrule(p); | ||
188 | } else if ((p[0] == 'F' || p[0] == 'X') && p[1] == '(' && q) { | ||
189 | p += 2; | ||
190 | *q = '\0'; | ||
191 | fprintf(ctx->fp, funcidx ? ",\n %s" : " %s", p); | ||
192 | funcidx++; | ||
193 | } else { | ||
194 | buf[strlen(buf)-1] = '\0'; | ||
195 | fprintf(stderr, "Error: unknown fold definition tag %s%s at line %d\n", | ||
196 | FOLDDEF_PREFIX, p, lineno); | ||
197 | exit(1); | ||
198 | } | ||
199 | } | ||
200 | } | ||
201 | fclose(fp); | ||
202 | fprintf(ctx->fp, "\n};\n\n"); | ||
203 | |||
204 | makehash(ctx); | ||
205 | } | ||
206 | |||
diff --git a/src/buildvm_lib.c b/src/buildvm_lib.c new file mode 100644 index 00000000..cc572200 --- /dev/null +++ b/src/buildvm_lib.c | |||
@@ -0,0 +1,365 @@ | |||
1 | /* | ||
2 | ** LuaJIT VM builder: library definition compiler. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #include "lj_obj.h" | ||
7 | #include "lj_lib.h" | ||
8 | |||
9 | #include "buildvm.h" | ||
10 | |||
11 | /* Context for library definitions. */ | ||
12 | static uint8_t obuf[8192]; | ||
13 | static uint8_t *optr; | ||
14 | static char modname[80]; | ||
15 | static size_t modnamelen; | ||
16 | static char funcname[80]; | ||
17 | static int modstate, regfunc; | ||
18 | static int ffid, recffid; | ||
19 | |||
20 | enum { | ||
21 | REGFUNC_OK, | ||
22 | REGFUNC_NOREG, | ||
23 | REGFUNC_NOREGUV | ||
24 | }; | ||
25 | |||
26 | static void libdef_name(char *p, int kind) | ||
27 | { | ||
28 | size_t n = strlen(p); | ||
29 | if (kind != LIBINIT_STRING) { | ||
30 | if (n > modnamelen && p[modnamelen] == '_' && | ||
31 | !strncmp(p, modname, modnamelen)) { | ||
32 | p += modnamelen+1; | ||
33 | n -= modnamelen+1; | ||
34 | } | ||
35 | } | ||
36 | if (n > LIBINIT_MAXSTR) { | ||
37 | fprintf(stderr, "Error: string too long: '%s'\n", p); | ||
38 | exit(1); | ||
39 | } | ||
40 | if (optr+1+n+2 > obuf+sizeof(obuf)) { /* +2 for caller. */ | ||
41 | fprintf(stderr, "Error: output buffer overflow\n"); | ||
42 | exit(1); | ||
43 | } | ||
44 | *optr++ = (uint8_t)(n | kind); | ||
45 | memcpy(optr, p, n); | ||
46 | optr += n; | ||
47 | } | ||
48 | |||
49 | static void libdef_endmodule(BuildCtx *ctx) | ||
50 | { | ||
51 | if (modstate != 0) { | ||
52 | char line[80]; | ||
53 | const uint8_t *p; | ||
54 | int n; | ||
55 | if (modstate == 1) | ||
56 | fprintf(ctx->fp, " (lua_CFunction)0"); | ||
57 | fprintf(ctx->fp, "\n};\n"); | ||
58 | fprintf(ctx->fp, "static const uint8_t %s%s[] = {\n", | ||
59 | LABEL_PREFIX_LIBINIT, modname); | ||
60 | line[0] = '\0'; | ||
61 | for (n = 0, p = obuf; p < optr; p++) { | ||
62 | n += sprintf(line+n, "%d,", *p); | ||
63 | if (n >= 75) { | ||
64 | fprintf(ctx->fp, "%s\n", line); | ||
65 | n = 0; | ||
66 | line[0] = '\0'; | ||
67 | } | ||
68 | } | ||
69 | fprintf(ctx->fp, "%s%d\n};\n#endif\n\n", line, LIBINIT_END); | ||
70 | } | ||
71 | } | ||
72 | |||
73 | static void libdef_module(BuildCtx *ctx, char *p, int arg) | ||
74 | { | ||
75 | UNUSED(arg); | ||
76 | if (ctx->mode == BUILD_libdef) { | ||
77 | libdef_endmodule(ctx); | ||
78 | optr = obuf; | ||
79 | *optr++ = (uint8_t)ffid; | ||
80 | *optr++ = 0; | ||
81 | modstate = 1; | ||
82 | fprintf(ctx->fp, "#ifdef %sMODULE_%s\n", LIBDEF_PREFIX, p); | ||
83 | fprintf(ctx->fp, "#undef %sMODULE_%s\n", LIBDEF_PREFIX, p); | ||
84 | fprintf(ctx->fp, "static const lua_CFunction %s%s[] = {\n", | ||
85 | LABEL_PREFIX_LIBCF, p); | ||
86 | } | ||
87 | modnamelen = strlen(p); | ||
88 | if (modnamelen > sizeof(modname)-1) { | ||
89 | fprintf(stderr, "Error: module name too long: '%s'\n", p); | ||
90 | exit(1); | ||
91 | } | ||
92 | strcpy(modname, p); | ||
93 | } | ||
94 | |||
95 | static int find_ffofs(BuildCtx *ctx, const char *name) | ||
96 | { | ||
97 | int i; | ||
98 | for (i = 0; i < ctx->nglob; i++) { | ||
99 | const char *gl = ctx->globnames[i]; | ||
100 | if (gl[0] == 'f' && gl[1] == 'f' && gl[2] == '_' && !strcmp(gl+3, name)) { | ||
101 | return (int)((uint8_t *)ctx->glob[i] - ctx->code); | ||
102 | } | ||
103 | } | ||
104 | fprintf(stderr, "Error: undefined fast function %s%s\n", | ||
105 | LABEL_PREFIX_FF, name); | ||
106 | exit(1); | ||
107 | } | ||
108 | |||
109 | static void libdef_func(BuildCtx *ctx, char *p, int arg) | ||
110 | { | ||
111 | if (ctx->mode == BUILD_libdef) { | ||
112 | int ofs = arg != LIBINIT_CF ? find_ffofs(ctx, p) : 0; | ||
113 | if (modstate == 0) { | ||
114 | fprintf(stderr, "Error: no module for function definition %s\n", p); | ||
115 | exit(1); | ||
116 | } | ||
117 | if (regfunc == REGFUNC_NOREG) { | ||
118 | if (optr+1 > obuf+sizeof(obuf)) { | ||
119 | fprintf(stderr, "Error: output buffer overflow\n"); | ||
120 | exit(1); | ||
121 | } | ||
122 | *optr++ = LIBINIT_FFID; | ||
123 | } else { | ||
124 | if (arg != LIBINIT_ASM_) { | ||
125 | if (modstate != 1) fprintf(ctx->fp, ",\n"); | ||
126 | modstate = 2; | ||
127 | fprintf(ctx->fp, " %s%s", arg ? LABEL_PREFIX_FFH : LABEL_PREFIX_CF, p); | ||
128 | } | ||
129 | if (regfunc != REGFUNC_NOREGUV) obuf[1]++; /* Bump hash table size. */ | ||
130 | libdef_name(regfunc == REGFUNC_NOREGUV ? "" : p, arg); | ||
131 | if (arg) { | ||
132 | *optr++ = (uint8_t)ofs; | ||
133 | *optr++ = (uint8_t)(ofs >> 8); | ||
134 | } | ||
135 | } | ||
136 | } else if (ctx->mode == BUILD_ffdef) { | ||
137 | fprintf(ctx->fp, "FFDEF(%s)\n", p); | ||
138 | } else if (ctx->mode == BUILD_recdef) { | ||
139 | if (strlen(p) > sizeof(funcname)-1) { | ||
140 | fprintf(stderr, "Error: function name too long: '%s'\n", p); | ||
141 | exit(1); | ||
142 | } | ||
143 | strcpy(funcname, p); | ||
144 | } else if (ctx->mode == BUILD_vmdef) { | ||
145 | int i; | ||
146 | for (i = 1; p[i] && modname[i-1]; i++) | ||
147 | if (p[i] == '_') p[i] = '.'; | ||
148 | fprintf(ctx->fp, "\"%s\",\n", p); | ||
149 | } | ||
150 | ffid++; | ||
151 | regfunc = REGFUNC_OK; | ||
152 | } | ||
153 | |||
154 | static uint32_t find_rec(char *name) | ||
155 | { | ||
156 | char *p = (char *)obuf; | ||
157 | uint32_t n; | ||
158 | for (n = 2; *p; n++) { | ||
159 | if (strcmp(p, name) == 0) | ||
160 | return n; | ||
161 | p += strlen(p)+1; | ||
162 | } | ||
163 | if (p+strlen(name)+1 >= (char *)obuf+sizeof(obuf)) { | ||
164 | fprintf(stderr, "Error: output buffer overflow\n"); | ||
165 | exit(1); | ||
166 | } | ||
167 | strcpy(p, name); | ||
168 | return n; | ||
169 | } | ||
170 | |||
171 | static void libdef_rec(BuildCtx *ctx, char *p, int arg) | ||
172 | { | ||
173 | UNUSED(arg); | ||
174 | if (ctx->mode == BUILD_recdef) { | ||
175 | char *q; | ||
176 | uint32_t n; | ||
177 | for (; recffid+1 < ffid; recffid++) | ||
178 | fprintf(ctx->fp, ",\n0"); | ||
179 | recffid = ffid; | ||
180 | if (*p == '.') p = funcname; | ||
181 | q = strchr(p, ' '); | ||
182 | if (q) *q++ = '\0'; | ||
183 | n = find_rec(p); | ||
184 | if (q) | ||
185 | fprintf(ctx->fp, ",\n0x%02x00+(%s)", n, q); | ||
186 | else | ||
187 | fprintf(ctx->fp, ",\n0x%02x00", n); | ||
188 | } | ||
189 | } | ||
190 | |||
191 | static void memcpy_endian(void *dst, void *src, size_t n) | ||
192 | { | ||
193 | union { uint8_t b; uint32_t u; } host_endian; | ||
194 | host_endian.u = 1; | ||
195 | if (host_endian.b == LJ_ENDIAN_SELECT(1, 0)) { | ||
196 | memcpy(dst, src, n); | ||
197 | } else { | ||
198 | size_t i; | ||
199 | for (i = 0; i < n; i++) | ||
200 | ((uint8_t *)dst)[i] = ((uint8_t *)src)[n-i]; | ||
201 | } | ||
202 | } | ||
203 | |||
204 | static void libdef_push(BuildCtx *ctx, char *p, int arg) | ||
205 | { | ||
206 | UNUSED(arg); | ||
207 | if (ctx->mode == BUILD_libdef) { | ||
208 | int len = (int)strlen(p); | ||
209 | if (*p == '"') { | ||
210 | if (len > 1 && p[len-1] == '"') { | ||
211 | p[len-1] = '\0'; | ||
212 | libdef_name(p+1, LIBINIT_STRING); | ||
213 | return; | ||
214 | } | ||
215 | } else if (*p >= '0' && *p <= '9') { | ||
216 | char *ep; | ||
217 | double d = strtod(p, &ep); | ||
218 | if (*ep == '\0') { | ||
219 | if (optr+1+sizeof(double) > obuf+sizeof(obuf)) { | ||
220 | fprintf(stderr, "Error: output buffer overflow\n"); | ||
221 | exit(1); | ||
222 | } | ||
223 | *optr++ = LIBINIT_NUMBER; | ||
224 | memcpy_endian(optr, &d, sizeof(double)); | ||
225 | optr += sizeof(double); | ||
226 | return; | ||
227 | } | ||
228 | } else if (!strcmp(p, "lastcl")) { | ||
229 | if (optr+1 > obuf+sizeof(obuf)) { | ||
230 | fprintf(stderr, "Error: output buffer overflow\n"); | ||
231 | exit(1); | ||
232 | } | ||
233 | *optr++ = LIBINIT_LASTCL; | ||
234 | return; | ||
235 | } else if (len > 4 && !strncmp(p, "top-", 4)) { | ||
236 | if (optr+2 > obuf+sizeof(obuf)) { | ||
237 | fprintf(stderr, "Error: output buffer overflow\n"); | ||
238 | exit(1); | ||
239 | } | ||
240 | *optr++ = LIBINIT_COPY; | ||
241 | *optr++ = (uint8_t)atoi(p+4); | ||
242 | return; | ||
243 | } | ||
244 | fprintf(stderr, "Error: bad value for %sPUSH(%s)\n", LIBDEF_PREFIX, p); | ||
245 | exit(1); | ||
246 | } | ||
247 | } | ||
248 | |||
249 | static void libdef_set(BuildCtx *ctx, char *p, int arg) | ||
250 | { | ||
251 | UNUSED(arg); | ||
252 | if (ctx->mode == BUILD_libdef) { | ||
253 | if (p[0] == '!' && p[1] == '\0') p[0] = '\0'; /* Set env. */ | ||
254 | libdef_name(p, LIBINIT_STRING); | ||
255 | *optr++ = LIBINIT_SET; | ||
256 | obuf[1]++; /* Bump hash table size. */ | ||
257 | } | ||
258 | } | ||
259 | |||
260 | static void libdef_regfunc(BuildCtx *ctx, char *p, int arg) | ||
261 | { | ||
262 | UNUSED(ctx); UNUSED(p); | ||
263 | regfunc = arg; | ||
264 | } | ||
265 | |||
266 | typedef void (*LibDefFunc)(BuildCtx *ctx, char *p, int arg); | ||
267 | |||
268 | typedef struct LibDefHandler { | ||
269 | const char *suffix; | ||
270 | const char *stop; | ||
271 | const LibDefFunc func; | ||
272 | const int arg; | ||
273 | } LibDefHandler; | ||
274 | |||
275 | static const LibDefHandler libdef_handlers[] = { | ||
276 | { "MODULE_", " \t\r\n", libdef_module, 0 }, | ||
277 | { "CF(", ")", libdef_func, LIBINIT_CF }, | ||
278 | { "ASM(", ")", libdef_func, LIBINIT_ASM }, | ||
279 | { "ASM_(", ")", libdef_func, LIBINIT_ASM_ }, | ||
280 | { "REC(", ")", libdef_rec, 0 }, | ||
281 | { "PUSH(", ")", libdef_push, 0 }, | ||
282 | { "SET(", ")", libdef_set, 0 }, | ||
283 | { "NOREGUV", NULL, libdef_regfunc, REGFUNC_NOREGUV }, | ||
284 | { "NOREG", NULL, libdef_regfunc, REGFUNC_NOREG }, | ||
285 | { NULL, NULL, (LibDefFunc)0, 0 } | ||
286 | }; | ||
287 | |||
288 | /* Emit C source code for library function definitions. */ | ||
289 | void emit_lib(BuildCtx *ctx) | ||
290 | { | ||
291 | const char *fname; | ||
292 | |||
293 | if (ctx->mode == BUILD_ffdef || ctx->mode == BUILD_libdef || | ||
294 | ctx->mode == BUILD_recdef) | ||
295 | fprintf(ctx->fp, "/* This is a generated file. DO NOT EDIT! */\n\n"); | ||
296 | else if (ctx->mode == BUILD_vmdef) | ||
297 | fprintf(ctx->fp, "ffnames = {\n[0]=\"Lua\",\n\"C\",\n"); | ||
298 | if (ctx->mode == BUILD_recdef) | ||
299 | fprintf(ctx->fp, "static const uint16_t recff_idmap[] = {\n0,\n0x0100"); | ||
300 | recffid = ffid = FF_C+1; | ||
301 | |||
302 | while ((fname = *ctx->args++)) { | ||
303 | char buf[256]; /* We don't care about analyzing lines longer than that. */ | ||
304 | FILE *fp; | ||
305 | if (fname[0] == '-' && fname[1] == '\0') { | ||
306 | fp = stdin; | ||
307 | } else { | ||
308 | fp = fopen(fname, "r"); | ||
309 | if (!fp) { | ||
310 | fprintf(stderr, "Error: cannot open input file '%s': %s\n", | ||
311 | fname, strerror(errno)); | ||
312 | exit(1); | ||
313 | } | ||
314 | } | ||
315 | modstate = 0; | ||
316 | regfunc = REGFUNC_OK; | ||
317 | while (fgets(buf, sizeof(buf), fp) != NULL) { | ||
318 | char *p; | ||
319 | for (p = buf; (p = strstr(p, LIBDEF_PREFIX)) != NULL; ) { | ||
320 | const LibDefHandler *ldh; | ||
321 | p += sizeof(LIBDEF_PREFIX)-1; | ||
322 | for (ldh = libdef_handlers; ldh->suffix != NULL; ldh++) { | ||
323 | size_t n, len = strlen(ldh->suffix); | ||
324 | if (!strncmp(p, ldh->suffix, len)) { | ||
325 | p += len; | ||
326 | n = ldh->stop ? strcspn(p, ldh->stop) : 0; | ||
327 | if (!p[n]) break; | ||
328 | p[n] = '\0'; | ||
329 | ldh->func(ctx, p, ldh->arg); | ||
330 | p += n+1; | ||
331 | break; | ||
332 | } | ||
333 | } | ||
334 | if (ldh->suffix == NULL) { | ||
335 | buf[strlen(buf)-1] = '\0'; | ||
336 | fprintf(stderr, "Error: unknown library definition tag %s%s\n", | ||
337 | LIBDEF_PREFIX, p); | ||
338 | exit(1); | ||
339 | } | ||
340 | } | ||
341 | } | ||
342 | fclose(fp); | ||
343 | if (ctx->mode == BUILD_libdef) { | ||
344 | libdef_endmodule(ctx); | ||
345 | } | ||
346 | } | ||
347 | |||
348 | if (ctx->mode == BUILD_ffdef) { | ||
349 | fprintf(ctx->fp, "\n#undef FFDEF\n\n"); | ||
350 | } else if (ctx->mode == BUILD_vmdef) { | ||
351 | fprintf(ctx->fp, "}\n\n"); | ||
352 | } else if (ctx->mode == BUILD_recdef) { | ||
353 | char *p = (char *)obuf; | ||
354 | fprintf(ctx->fp, "\n};\n\n"); | ||
355 | fprintf(ctx->fp, "static const RecordFunc recff_func[] = {\n" | ||
356 | "recff_nyi,\n" | ||
357 | "recff_c"); | ||
358 | while (*p) { | ||
359 | fprintf(ctx->fp, ",\nrecff_%s", p); | ||
360 | p += strlen(p)+1; | ||
361 | } | ||
362 | fprintf(ctx->fp, "\n};\n\n"); | ||
363 | } | ||
364 | } | ||
365 | |||
diff --git a/src/buildvm_peobj.c b/src/buildvm_peobj.c new file mode 100644 index 00000000..9acf6b76 --- /dev/null +++ b/src/buildvm_peobj.c | |||
@@ -0,0 +1,303 @@ | |||
1 | /* | ||
2 | ** LuaJIT VM builder: PE object emitter. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** | ||
5 | ** Only used for building on Windows, since we cannot assume the presence | ||
6 | ** of a suitable assembler. The host and target byte order must match. | ||
7 | */ | ||
8 | |||
9 | #include "buildvm.h" | ||
10 | #include "lj_bc.h" | ||
11 | |||
12 | #if LJ_TARGET_X86ORX64 | ||
13 | |||
14 | /* Context for PE object emitter. */ | ||
15 | static char *strtab; | ||
16 | static size_t strtabofs; | ||
17 | |||
18 | /* -- PE object definitions ----------------------------------------------- */ | ||
19 | |||
20 | /* PE header. */ | ||
21 | typedef struct PEheader { | ||
22 | uint16_t arch; | ||
23 | uint16_t nsects; | ||
24 | uint32_t time; | ||
25 | uint32_t symtabofs; | ||
26 | uint32_t nsyms; | ||
27 | uint16_t opthdrsz; | ||
28 | uint16_t flags; | ||
29 | } PEheader; | ||
30 | |||
31 | /* PE section. */ | ||
32 | typedef struct PEsection { | ||
33 | char name[8]; | ||
34 | uint32_t vsize; | ||
35 | uint32_t vaddr; | ||
36 | uint32_t size; | ||
37 | uint32_t ofs; | ||
38 | uint32_t relocofs; | ||
39 | uint32_t lineofs; | ||
40 | uint16_t nreloc; | ||
41 | uint16_t nline; | ||
42 | uint32_t flags; | ||
43 | } PEsection; | ||
44 | |||
45 | /* PE relocation. */ | ||
46 | typedef struct PEreloc { | ||
47 | uint32_t vaddr; | ||
48 | uint32_t symidx; | ||
49 | uint16_t type; | ||
50 | } PEreloc; | ||
51 | |||
52 | /* Cannot use sizeof, because it pads up to the max. alignment. */ | ||
53 | #define PEOBJ_RELOC_SIZE (4+4+2) | ||
54 | |||
55 | /* PE symbol table entry. */ | ||
56 | typedef struct PEsym { | ||
57 | union { | ||
58 | char name[8]; | ||
59 | uint32_t nameref[2]; | ||
60 | } n; | ||
61 | uint32_t value; | ||
62 | int16_t sect; | ||
63 | uint16_t type; | ||
64 | uint8_t scl; | ||
65 | uint8_t naux; | ||
66 | } PEsym; | ||
67 | |||
68 | /* PE symbol table auxiliary entry for a section. */ | ||
69 | typedef struct PEsymaux { | ||
70 | uint32_t size; | ||
71 | uint16_t nreloc; | ||
72 | uint16_t nline; | ||
73 | uint32_t cksum; | ||
74 | uint16_t assoc; | ||
75 | uint8_t comdatsel; | ||
76 | uint8_t unused[3]; | ||
77 | } PEsymaux; | ||
78 | |||
79 | /* Cannot use sizeof, because it pads up to the max. alignment. */ | ||
80 | #define PEOBJ_SYM_SIZE (8+4+2+2+1+1) | ||
81 | |||
82 | /* PE object CPU specific defines. */ | ||
83 | #if LJ_TARGET_X86 | ||
84 | #define PEOBJ_ARCH_TARGET 0x014c | ||
85 | #define PEOBJ_RELOC_REL32 0x14 /* MS: REL32, GNU: DISP32. */ | ||
86 | #define PEOBJ_RELOC_DIR32 0x06 | ||
87 | #define PEOBJ_SYM_PREFIX "_" | ||
88 | #elif LJ_TARGET_X64 | ||
89 | #define PEOBJ_ARCH_TARGET 0x8664 | ||
90 | #define PEOBJ_RELOC_REL32 0x04 /* MS: REL32, GNU: DISP32. */ | ||
91 | #define PEOBJ_RELOC_DIR32 0x02 | ||
92 | #define PEOBJ_SYM_PREFIX "" | ||
93 | #endif | ||
94 | |||
95 | /* Section numbers (0-based). */ | ||
96 | enum { | ||
97 | PEOBJ_SECT_ABS = -2, | ||
98 | PEOBJ_SECT_UNDEF = -1, | ||
99 | PEOBJ_SECT_TEXT, | ||
100 | /* TODO: add .pdata/.xdata for x64. */ | ||
101 | PEOBJ_SECT_RDATA, | ||
102 | PEOBJ_SECT_RDATA_Z, | ||
103 | PEOBJ_NSECTIONS | ||
104 | }; | ||
105 | |||
106 | /* Symbol types. */ | ||
107 | #define PEOBJ_TYPE_NULL 0 | ||
108 | #define PEOBJ_TYPE_FUNC 0x20 | ||
109 | |||
110 | /* Symbol storage class. */ | ||
111 | #define PEOBJ_SCL_EXTERN 2 | ||
112 | #define PEOBJ_SCL_STATIC 3 | ||
113 | |||
114 | /* -- PE object emitter --------------------------------------------------- */ | ||
115 | |||
116 | /* Emit PE object symbol. */ | ||
117 | static void emit_peobj_sym(BuildCtx *ctx, const char *name, uint32_t value, | ||
118 | int sect, int type, int scl) | ||
119 | { | ||
120 | PEsym sym; | ||
121 | size_t len = strlen(name); | ||
122 | if (!strtab) { /* Pass 1: only calculate string table length. */ | ||
123 | if (len > 8) strtabofs += len+1; | ||
124 | return; | ||
125 | } | ||
126 | if (len <= 8) { | ||
127 | memcpy(sym.n.name, name, len); | ||
128 | memset(sym.n.name+len, 0, 8-len); | ||
129 | } else { | ||
130 | sym.n.nameref[0] = 0; | ||
131 | sym.n.nameref[1] = strtabofs; | ||
132 | memcpy(strtab + strtabofs, name, len); | ||
133 | strtab[strtabofs+len] = 0; | ||
134 | strtabofs += len+1; | ||
135 | } | ||
136 | sym.value = value; | ||
137 | sym.sect = (int16_t)(sect+1); /* 1-based section number. */ | ||
138 | sym.type = (uint16_t)type; | ||
139 | sym.scl = (uint8_t)scl; | ||
140 | sym.naux = 0; | ||
141 | owrite(ctx, &sym, PEOBJ_SYM_SIZE); | ||
142 | } | ||
143 | |||
144 | /* Emit PE object section symbol. */ | ||
145 | static void emit_peobj_sym_sect(BuildCtx *ctx, PEsection *pesect, int sect) | ||
146 | { | ||
147 | PEsym sym; | ||
148 | PEsymaux aux; | ||
149 | if (!strtab) return; /* Pass 1: no output. */ | ||
150 | memcpy(sym.n.name, pesect[sect].name, 8); | ||
151 | sym.value = 0; | ||
152 | sym.sect = (int16_t)(sect+1); /* 1-based section number. */ | ||
153 | sym.type = PEOBJ_TYPE_NULL; | ||
154 | sym.scl = PEOBJ_SCL_STATIC; | ||
155 | sym.naux = 1; | ||
156 | owrite(ctx, &sym, PEOBJ_SYM_SIZE); | ||
157 | memset(&aux, 0, sizeof(PEsymaux)); | ||
158 | aux.size = pesect[sect].size; | ||
159 | aux.nreloc = pesect[sect].nreloc; | ||
160 | owrite(ctx, &aux, PEOBJ_SYM_SIZE); | ||
161 | } | ||
162 | |||
163 | #define emit_peobj_sym_func(ctx, name, ofs) \ | ||
164 | emit_peobj_sym(ctx, name, (uint32_t)(ofs), \ | ||
165 | PEOBJ_SECT_TEXT, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN) | ||
166 | #define emit_peobj_sym_rdata(ctx, name, ofs) \ | ||
167 | emit_peobj_sym(ctx, name, (uint32_t)(ofs), \ | ||
168 | PEOBJ_SECT_RDATA, PEOBJ_TYPE_NULL, PEOBJ_SCL_EXTERN) | ||
169 | |||
170 | /* Emit Windows PE object file. */ | ||
171 | void emit_peobj(BuildCtx *ctx) | ||
172 | { | ||
173 | PEheader pehdr; | ||
174 | PEsection pesect[PEOBJ_NSECTIONS]; | ||
175 | int nzsym, relocsyms; | ||
176 | uint32_t sofs; | ||
177 | int i; | ||
178 | union { uint8_t b; uint32_t u; } host_endian; | ||
179 | |||
180 | host_endian.u = 1; | ||
181 | if (host_endian.b != LJ_ENDIAN_SELECT(1, 0)) { | ||
182 | fprintf(stderr, "Error: different byte order for host and target\n"); | ||
183 | exit(1); | ||
184 | } | ||
185 | |||
186 | sofs = sizeof(PEheader) + PEOBJ_NSECTIONS*sizeof(PEsection); | ||
187 | |||
188 | /* Fill in PE sections. */ | ||
189 | memset(&pesect, 0, PEOBJ_NSECTIONS*sizeof(PEsection)); | ||
190 | memcpy(pesect[PEOBJ_SECT_TEXT].name, ".text", sizeof(".text")-1); | ||
191 | pesect[PEOBJ_SECT_TEXT].ofs = sofs; | ||
192 | sofs += (pesect[PEOBJ_SECT_TEXT].size = (uint32_t)ctx->codesz); | ||
193 | pesect[PEOBJ_SECT_TEXT].relocofs = sofs; | ||
194 | sofs += (pesect[PEOBJ_SECT_TEXT].nreloc = (uint16_t)ctx->nreloc) * PEOBJ_RELOC_SIZE; | ||
195 | /* Flags: 60 = read+execute, 50 = align16, 20 = code. */ | ||
196 | pesect[PEOBJ_SECT_TEXT].flags = 0x60500020; | ||
197 | |||
198 | memcpy(pesect[PEOBJ_SECT_RDATA].name, ".rdata", sizeof(".rdata")-1); | ||
199 | pesect[PEOBJ_SECT_RDATA].ofs = sofs; | ||
200 | sofs += (pesect[PEOBJ_SECT_RDATA].size = ctx->npc*sizeof(uint16_t)); | ||
201 | /* Flags: 40 = read, 30 = align4, 40 = initialized data. */ | ||
202 | pesect[PEOBJ_SECT_RDATA].flags = 0x40300040; | ||
203 | |||
204 | memcpy(pesect[PEOBJ_SECT_RDATA_Z].name, ".rdata$Z", sizeof(".rdata$Z")-1); | ||
205 | pesect[PEOBJ_SECT_RDATA_Z].ofs = sofs; | ||
206 | sofs += (pesect[PEOBJ_SECT_RDATA_Z].size = (uint32_t)strlen(ctx->dasm_ident)+1); | ||
207 | /* Flags: 40 = read, 30 = align4, 40 = initialized data. */ | ||
208 | pesect[PEOBJ_SECT_RDATA_Z].flags = 0x40300040; | ||
209 | |||
210 | /* Fill in PE header. */ | ||
211 | pehdr.arch = PEOBJ_ARCH_TARGET; | ||
212 | pehdr.nsects = PEOBJ_NSECTIONS; | ||
213 | pehdr.time = 0; /* Timestamp is optional. */ | ||
214 | pehdr.symtabofs = sofs; | ||
215 | pehdr.opthdrsz = 0; | ||
216 | pehdr.flags = 0; | ||
217 | |||
218 | /* Compute the size of the symbol table: | ||
219 | ** @feat.00 + nsections*2 | ||
220 | ** + asm_start + (nsyms-nzsym) + op_ofs | ||
221 | ** + relocsyms | ||
222 | */ | ||
223 | /* Skip _Z syms. */ | ||
224 | for (nzsym = 0; ctx->sym_ofs[ctx->perm[nzsym]] < 0; nzsym++) ; | ||
225 | for (relocsyms = 0; ctx->extnames[relocsyms]; relocsyms++) ; | ||
226 | pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+(ctx->nsym-nzsym)+1 + relocsyms; | ||
227 | |||
228 | /* Write PE object header and all sections. */ | ||
229 | owrite(ctx, &pehdr, sizeof(PEheader)); | ||
230 | owrite(ctx, &pesect, sizeof(PEsection)*PEOBJ_NSECTIONS); | ||
231 | |||
232 | /* Write .text section. */ | ||
233 | owrite(ctx, ctx->code, ctx->codesz); | ||
234 | for (i = 0; i < ctx->nreloc; i++) { | ||
235 | PEreloc reloc; | ||
236 | reloc.vaddr = (uint32_t)ctx->reloc[i].ofs; | ||
237 | reloc.symidx = 1+2+ctx->reloc[i].sym; /* Reloc syms are after .text sym. */ | ||
238 | reloc.type = ctx->reloc[i].type ? PEOBJ_RELOC_REL32 : PEOBJ_RELOC_DIR32; | ||
239 | owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); | ||
240 | } | ||
241 | |||
242 | /* Write .rdata section. */ | ||
243 | for (i = 0; i < ctx->npc; i++) { | ||
244 | uint16_t pcofs = (uint16_t)ctx->sym_ofs[i]; | ||
245 | owrite(ctx, &pcofs, 2); | ||
246 | } | ||
247 | |||
248 | /* Write .rdata$Z section. */ | ||
249 | owrite(ctx, ctx->dasm_ident, strlen(ctx->dasm_ident)+1); | ||
250 | |||
251 | /* Write symbol table. */ | ||
252 | strtab = NULL; /* 1st pass: collect string sizes. */ | ||
253 | for (;;) { | ||
254 | char name[80]; | ||
255 | |||
256 | strtabofs = 4; | ||
257 | /* Mark as SafeSEH compliant. */ | ||
258 | emit_peobj_sym(ctx, "@feat.00", 1, | ||
259 | PEOBJ_SECT_ABS, PEOBJ_TYPE_NULL, PEOBJ_SCL_STATIC); | ||
260 | |||
261 | emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_TEXT); | ||
262 | for (i = 0; ctx->extnames[i]; i++) { | ||
263 | sprintf(name, PEOBJ_SYM_PREFIX "%s", ctx->extnames[i]); | ||
264 | emit_peobj_sym(ctx, name, 0, | ||
265 | PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN); | ||
266 | } | ||
267 | emit_peobj_sym_func(ctx, PEOBJ_SYM_PREFIX LABEL_ASM_BEGIN, 0); | ||
268 | for (i = nzsym; i < ctx->nsym; i++) { | ||
269 | int pi = ctx->perm[i]; | ||
270 | if (pi >= ctx->npc) { | ||
271 | sprintf(name, PEOBJ_SYM_PREFIX LABEL_PREFIX "%s", | ||
272 | ctx->globnames[pi-ctx->npc]); | ||
273 | emit_peobj_sym_func(ctx, name, ctx->sym_ofs[pi]); | ||
274 | #if LJ_HASJIT | ||
275 | } else { | ||
276 | #else | ||
277 | } else if (!(pi == BC_JFORI || pi == BC_JFORL || pi == BC_JITERL || | ||
278 | pi == BC_JLOOP || pi == BC_IFORL || pi == BC_IITERL || | ||
279 | pi == BC_ILOOP)) { | ||
280 | #endif | ||
281 | sprintf(name, PEOBJ_SYM_PREFIX LABEL_PREFIX_BC "%s", | ||
282 | bc_names[pi]); | ||
283 | emit_peobj_sym_func(ctx, name, ctx->sym_ofs[pi]); | ||
284 | } | ||
285 | } | ||
286 | |||
287 | emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_RDATA); | ||
288 | emit_peobj_sym_rdata(ctx, PEOBJ_SYM_PREFIX LABEL_OP_OFS, 0); | ||
289 | |||
290 | emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_RDATA_Z); | ||
291 | |||
292 | if (strtab) | ||
293 | break; | ||
294 | /* 2nd pass: alloc strtab, write syms and copy strings. */ | ||
295 | strtab = (char *)malloc(strtabofs); | ||
296 | *(uint32_t *)strtab = strtabofs; | ||
297 | } | ||
298 | |||
299 | /* Write string table. */ | ||
300 | owrite(ctx, strtab, strtabofs); | ||
301 | } | ||
302 | |||
303 | #endif | ||
diff --git a/src/buildvm_x86.dasc b/src/buildvm_x86.dasc new file mode 100644 index 00000000..add00c9d --- /dev/null +++ b/src/buildvm_x86.dasc | |||
@@ -0,0 +1,3592 @@ | |||
1 | |// Low-level VM code for x86 CPUs. | ||
2 | |// Bytecode interpreter, fast functions and helper functions. | ||
3 | |// Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | | | ||
5 | |.arch x86 | ||
6 | |.section code_op, code_sub | ||
7 | | | ||
8 | |.actionlist build_actionlist | ||
9 | |.globals GLOB_ | ||
10 | |.globalnames globnames | ||
11 | |.externnames extnames | ||
12 | | | ||
13 | |//----------------------------------------------------------------------- | ||
14 | | | ||
15 | |// Fixed register assignments for the interpreter. | ||
16 | |// This is very fragile and has many dependencies. Caveat emptor. | ||
17 | |.define BASE, edx // Not C callee-save, refetched anyway. | ||
18 | |.define KBASE, edi // Must be C callee-save. | ||
19 | |.define PC, esi // Must be C callee-save. | ||
20 | |.define DISPATCH, ebx // Must be C callee-save. | ||
21 | | | ||
22 | |.define RA, ecx | ||
23 | |.define RAL, cl | ||
24 | |.define RB, ebp // Must be ebp (C callee-save). | ||
25 | |.define RC, eax // Must be eax (fcomparepp and others). | ||
26 | |.define RCW, ax | ||
27 | |.define RCH, ah | ||
28 | |.define RCL, al | ||
29 | |.define OP, RB | ||
30 | |.define RD, RC | ||
31 | |.define RDL, RCL | ||
32 | | | ||
33 | |// Type definitions. Some of these are only used for documentation. | ||
34 | |.type L, lua_State | ||
35 | |.type GL, global_State | ||
36 | |.type TVALUE, TValue | ||
37 | |.type GCOBJ, GCobj | ||
38 | |.type STR, GCstr | ||
39 | |.type TAB, GCtab | ||
40 | |.type LFUNC, GCfuncL | ||
41 | |.type CFUNC, GCfuncC | ||
42 | |.type PROTO, GCproto | ||
43 | |.type UPVAL, GCupval | ||
44 | |.type NODE, Node | ||
45 | |.type NARGS, int | ||
46 | |.type TRACE, Trace | ||
47 | |.type EXITINFO, ExitInfo | ||
48 | | | ||
49 | |// Stack layout while in interpreter. Must match with lj_frame.h. | ||
50 | |.macro saveregs | ||
51 | | push ebp; push edi; push esi; push ebx | ||
52 | |.endmacro | ||
53 | |.macro restoreregs | ||
54 | | pop ebx; pop esi; pop edi; pop ebp | ||
55 | |.endmacro | ||
56 | |.define CFRAME_SPACE, aword*7 // Delta for esp (see <--). | ||
57 | | | ||
58 | |.define INARG_4, aword [esp+aword*15] | ||
59 | |.define INARG_3, aword [esp+aword*14] | ||
60 | |.define INARG_2, aword [esp+aword*13] | ||
61 | |.define INARG_1, aword [esp+aword*12] | ||
62 | |//----- 16 byte aligned, ^^^ arguments from C caller | ||
63 | |.define SAVE_RET, aword [esp+aword*11] //<-- esp entering interpreter. | ||
64 | |.define SAVE_R4, aword [esp+aword*10] | ||
65 | |.define SAVE_R3, aword [esp+aword*9] | ||
66 | |.define SAVE_R2, aword [esp+aword*8] | ||
67 | |//----- 16 byte aligned | ||
68 | |.define SAVE_R1, aword [esp+aword*7] //<-- esp after register saves. | ||
69 | |.define SAVE_PC, aword [esp+aword*6] | ||
70 | |.define ARG6, aword [esp+aword*5] | ||
71 | |.define ARG5, aword [esp+aword*4] | ||
72 | |//----- 16 byte aligned | ||
73 | |.define ARG4, aword [esp+aword*3] | ||
74 | |.define ARG3, aword [esp+aword*2] | ||
75 | |.define ARG2, aword [esp+aword*1] | ||
76 | |.define ARG1, aword [esp] //<-- esp while in interpreter. | ||
77 | |//----- 16 byte aligned, ^^^ arguments for C callee | ||
78 | | | ||
79 | |// FPARGx overlaps ARGx and ARG(x+1) on x86. | ||
80 | |.define FPARG5, qword [esp+qword*2] | ||
81 | |.define FPARG3, qword [esp+qword*1] | ||
82 | |.define FPARG1, qword [esp] | ||
83 | |// NRESULTS overlaps ARG6 (and FPARG5) | ||
84 | |.define NRESULTS, ARG6 | ||
85 | | | ||
86 | |// Arguments for vm_call and vm_pcall. | ||
87 | |.define INARG_P_ERRF, INARG_4 // vm_pcall only. | ||
88 | |.define INARG_NRES, INARG_3 | ||
89 | |.define INARG_BASE, INARG_2 | ||
90 | |.define SAVE_L, INARG_1 | ||
91 | | | ||
92 | |.define SAVE_CFRAME, INARG_BASE // Overwrites INARG_BASE! | ||
93 | | | ||
94 | |// Arguments for vm_cpcall. | ||
95 | |.define INARG_CP_UD, INARG_4 | ||
96 | |.define INARG_CP_FUNC, INARG_3 | ||
97 | |.define INARG_CP_CALL, INARG_2 | ||
98 | | | ||
99 | |//----------------------------------------------------------------------- | ||
100 | | | ||
101 | |// Instruction headers. | ||
102 | |.macro ins_A; .endmacro | ||
103 | |.macro ins_AD; .endmacro | ||
104 | |.macro ins_AJ; .endmacro | ||
105 | |.macro ins_ABC; movzx RB, RCH; movzx RC, RCL; .endmacro | ||
106 | |.macro ins_AB_; movzx RB, RCH; .endmacro | ||
107 | |.macro ins_A_C; movzx RC, RCL; .endmacro | ||
108 | |.macro ins_AND; not RD; .endmacro | ||
109 | | | ||
110 | |// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster). | ||
111 | |.macro ins_NEXT | ||
112 | | mov RC, [PC] | ||
113 | | movzx RA, RCH | ||
114 | | movzx OP, RCL | ||
115 | | add PC, 4 | ||
116 | | shr RC, 16 | ||
117 | | jmp aword [DISPATCH+OP*4] | ||
118 | |.endmacro | ||
119 | | | ||
120 | |// Instruction footer. | ||
121 | |.if 1 | ||
122 | | // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. | ||
123 | | .define ins_next, ins_NEXT | ||
124 | | .define ins_next_, ins_NEXT | ||
125 | |.else | ||
126 | | // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. | ||
127 | | // Affects only certain kinds of benchmarks (and only with -j off). | ||
128 | | // Around 10%-30% slower on Core2, a lot more slower on P4. | ||
129 | | .macro ins_next | ||
130 | | jmp ->ins_next | ||
131 | | .endmacro | ||
132 | | .macro ins_next_ | ||
133 | | ->ins_next: | ||
134 | | ins_NEXT | ||
135 | | .endmacro | ||
136 | |.endif | ||
137 | | | ||
138 | |//----------------------------------------------------------------------- | ||
139 | | | ||
140 | |// Macros to test operand types. | ||
141 | |.macro checktp, reg, tp; cmp dword [BASE+reg*8+4], tp; .endmacro | ||
142 | |.macro checknum, reg, target; checktp reg, LJ_TISNUM; ja target; .endmacro | ||
143 | |.macro checkstr, reg, target; checktp reg, LJ_TSTR; jne target; .endmacro | ||
144 | |.macro checktab, reg, target; checktp reg, LJ_TTAB; jne target; .endmacro | ||
145 | | | ||
146 | |// These operands must be used with movzx. | ||
147 | |.define PC_OP, byte [PC-4] | ||
148 | |.define PC_RA, byte [PC-3] | ||
149 | |.define PC_RB, byte [PC-1] | ||
150 | |.define PC_RC, byte [PC-2] | ||
151 | |.define PC_RD, word [PC-2] | ||
152 | | | ||
153 | |.macro branchPC, reg | ||
154 | | lea PC, [PC+reg*4-BCBIAS_J*4] | ||
155 | |.endmacro | ||
156 | | | ||
157 | |// Assumes DISPATCH is relative to GL. | ||
158 | #define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) | ||
159 | #define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) | ||
160 | | | ||
161 | |// Decrement hashed hotcount and trigger trace recorder if zero. | ||
162 | |.macro hotloop, reg | ||
163 | | mov reg, PC | ||
164 | | shr reg, 1 | ||
165 | | and reg, HOTCOUNT_PCMASK | ||
166 | | sub word [DISPATCH+reg+GG_DISP2HOT], 1 | ||
167 | | jz ->vm_hotloop | ||
168 | |.endmacro | ||
169 | | | ||
170 | |.macro hotcall, reg | ||
171 | | mov reg, PC | ||
172 | | shr reg, 1 | ||
173 | | and reg, HOTCOUNT_PCMASK | ||
174 | | sub word [DISPATCH+reg+GG_DISP2HOT], 1 | ||
175 | | jz ->vm_hotcall | ||
176 | |.endmacro | ||
177 | | | ||
178 | |// Set current VM state. | ||
179 | |.macro set_vmstate, st | ||
180 | | mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st | ||
181 | |.endmacro | ||
182 | | | ||
183 | |// Annoying x87 stuff: support for two compare variants. | ||
184 | |.macro fcomparepp // Compare and pop st0 >< st1. | ||
185 | ||if (cmov) { | ||
186 | | fucomip st1 | ||
187 | | fpop | ||
188 | ||} else { | ||
189 | | fucompp | ||
190 | | fnstsw ax // eax modified! | ||
191 | | sahf | ||
192 | ||} | ||
193 | |.endmacro | ||
194 | | | ||
195 | |.macro fdup; fld st0; .endmacro | ||
196 | |.macro fpop1; fstp st1; .endmacro | ||
197 | | | ||
198 | |// Move table write barrier back. Overwrites reg. | ||
199 | |.macro barrierback, tab, reg | ||
200 | | and byte tab->marked, cast_byte(~LJ_GC_BLACK) // black2gray(tab) | ||
201 | | mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)] | ||
202 | | mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab | ||
203 | | mov tab->gclist, reg | ||
204 | |.endmacro | ||
205 | | | ||
206 | |//----------------------------------------------------------------------- | ||
207 | |||
208 | /* Generate subroutines used by opcodes and other parts of the VM. */ | ||
209 | /* The .code_sub section should be last to help static branch prediction. */ | ||
210 | static void build_subroutines(BuildCtx *ctx, int cmov) | ||
211 | { | ||
212 | |.code_sub | ||
213 | | | ||
214 | |//----------------------------------------------------------------------- | ||
215 | |//-- Call and return handling ------------------------------------------- | ||
216 | |//----------------------------------------------------------------------- | ||
217 | | | ||
218 | |// Reminder: A call gate may be called with func/args above L->maxstack, | ||
219 | |// i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot, | ||
220 | |// too. This means all call gates (L*, C and fast functions) must check | ||
221 | |// for stack overflow _before_ adding more slots! | ||
222 | | | ||
223 | |//-- Call gates --------------------------------------------------------- | ||
224 | | | ||
225 | |->gate_lf: // Call gate for fixarg Lua functions. | ||
226 | | // RA = new base, RB = LFUNC, RC = nargs+1, (BASE = old base), PC = return | ||
227 | | // DISPATCH initialized | ||
228 | | mov BASE, RA | ||
229 | | mov PROTO:RB, LFUNC:RB->pt | ||
230 | | mov [BASE-4], PC // Store caller PC. | ||
231 | | movzx RA, byte PROTO:RB->framesize | ||
232 | | mov PC, PROTO:RB->bc | ||
233 | | mov KBASE, PROTO:RB->k | ||
234 | | mov L:RB, SAVE_L | ||
235 | | lea RA, [BASE+RA*8] // Top of frame. | ||
236 | | lea RC, [BASE+NARGS:RC*8-4] // Points to tag of 1st free slot. | ||
237 | | cmp RA, L:RB->maxstack | ||
238 | | ja ->gate_lf_growstack | ||
239 | |9: // Entry point from vararg setup below. | ||
240 | | mov RB, LJ_TNIL | ||
241 | |1: // Clear free slots until top of frame. | ||
242 | | mov [RC], RB | ||
243 | | mov [RC+8], RB | ||
244 | | add RC, 16 | ||
245 | | cmp RC, RA | ||
246 | | jb <1 | ||
247 | #if LJ_HASJIT | ||
248 | | // NYI: Disabled, until the tracer supports recursion/upcalls/leaves. | ||
249 | | // hotcall RB | ||
250 | #endif | ||
251 | | ins_next | ||
252 | | | ||
253 | |->gate_lv: // Call gate for vararg Lua functions. | ||
254 | | // RA = new base, RB = LFUNC, RC = nargs+1, (BASE = old base), PC = return | ||
255 | | // DISPATCH initialized | ||
256 | | mov [RA-4], PC // Store caller PC. | ||
257 | | lea PC, [NARGS:RC*8+FRAME_VARG] | ||
258 | | lea BASE, [RA+PC-FRAME_VARG] | ||
259 | | mov [BASE-8], LFUNC:RB // Store copy of LFUNC. | ||
260 | | mov PROTO:RB, LFUNC:RB->pt | ||
261 | | mov [BASE-4], PC // Store delta + FRAME_VARG. | ||
262 | | movzx PC, byte PROTO:RB->framesize | ||
263 | | lea KBASE, [BASE+PC*8] | ||
264 | | mov L:PC, SAVE_L | ||
265 | | lea RC, [BASE+4] | ||
266 | | cmp KBASE, L:PC->maxstack | ||
267 | | ja ->gate_lv_growstack // Need to grow stack. | ||
268 | | movzx PC, byte PROTO:RB->numparams | ||
269 | | test PC, PC | ||
270 | | jz >2 | ||
271 | |1: // Copy fixarg slots up. | ||
272 | | add RA, 8 | ||
273 | | cmp RA, BASE | ||
274 | | jnb >2 | ||
275 | | mov KBASE, [RA-8] | ||
276 | | mov [RC-4], KBASE | ||
277 | | mov KBASE, [RA-4] | ||
278 | | mov [RC], KBASE | ||
279 | | add RC, 8 | ||
280 | | mov dword [RA-4], LJ_TNIL // Clear old fixarg slot (help the GC). | ||
281 | | sub PC, 1 | ||
282 | | jnz <1 | ||
283 | |2: | ||
284 | | movzx RA, byte PROTO:RB->framesize | ||
285 | | mov PC, PROTO:RB->bc | ||
286 | | mov KBASE, PROTO:RB->k | ||
287 | | lea RA, [BASE+RA*8] | ||
288 | | jmp <9 | ||
289 | | | ||
290 | |->gate_c: // Call gate for C functions. | ||
291 | | // RA = new base, RB = CFUNC, RC = nargs+1, (BASE = old base), PC = return | ||
292 | | mov [RA-4], PC | ||
293 | | mov KBASE, CFUNC:RB->f | ||
294 | | mov L:RB, SAVE_L | ||
295 | | lea RC, [RA+NARGS:RC*8-8] | ||
296 | | mov L:RB->base, RA | ||
297 | | lea RA, [RC+8*LUA_MINSTACK] | ||
298 | | mov ARG1, L:RB | ||
299 | | mov L:RB->top, RC | ||
300 | | cmp RA, L:RB->maxstack | ||
301 | | ja ->gate_c_growstack // Need to grow stack. | ||
302 | | set_vmstate C | ||
303 | | call KBASE // (lua_State *L) | ||
304 | | set_vmstate INTERP | ||
305 | | // nresults returned in eax (RD). | ||
306 | | mov BASE, L:RB->base | ||
307 | | lea RA, [BASE+RD*8] | ||
308 | | neg RA | ||
309 | | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 | ||
310 | |->vm_returnc: | ||
311 | | add RD, 1 // RD = nresults+1 | ||
312 | | mov NRESULTS, RD | ||
313 | | test PC, FRAME_TYPE | ||
314 | | jz ->BC_RET_Z // Handle regular return to Lua. | ||
315 | | | ||
316 | |//-- Return handling (non-inline) --------------------------------------- | ||
317 | | | ||
318 | |->vm_return: | ||
319 | | // BASE = base, RA = resultofs, RD = nresults+1 (= NRESULTS), PC = return | ||
320 | | test PC, FRAME_C | ||
321 | | jz ->vm_returnp | ||
322 | | | ||
323 | | // Return to C. | ||
324 | | set_vmstate C | ||
325 | | and PC, -8 | ||
326 | | sub PC, BASE | ||
327 | | neg PC // Previous base = BASE - delta. | ||
328 | | | ||
329 | | sub RD, 1 | ||
330 | | jz >2 | ||
331 | |1: | ||
332 | | mov RB, [BASE+RA] // Move results down. | ||
333 | | mov [BASE-8], RB | ||
334 | | mov RB, [BASE+RA+4] | ||
335 | | mov [BASE-4], RB | ||
336 | | add BASE, 8 | ||
337 | | sub RD, 1 | ||
338 | | jnz <1 | ||
339 | |2: | ||
340 | | mov L:RB, SAVE_L | ||
341 | | mov L:RB->base, PC | ||
342 | |3: | ||
343 | | mov RD, NRESULTS | ||
344 | | mov RA, INARG_NRES // RA = wanted nresults+1 | ||
345 | |4: | ||
346 | | cmp RA, RD | ||
347 | | jne >6 // More/less results wanted? | ||
348 | |5: | ||
349 | | sub BASE, 8 | ||
350 | | mov L:RB->top, BASE | ||
351 | | | ||
352 | |->vm_leave_cp: | ||
353 | | mov RA, SAVE_CFRAME // Restore previous C frame. | ||
354 | | mov L:RB->cframe, RA | ||
355 | | xor eax, eax // Ok return status for vm_pcall. | ||
356 | | | ||
357 | |->vm_leave_unw: | ||
358 | | add esp, CFRAME_SPACE | ||
359 | | restoreregs | ||
360 | | ret | ||
361 | | | ||
362 | |6: | ||
363 | | jb >7 // Less results wanted? | ||
364 | | // More results wanted. Check stack size and fill up results with nil. | ||
365 | | cmp BASE, L:RB->maxstack | ||
366 | | ja >8 | ||
367 | | mov dword [BASE-4], LJ_TNIL | ||
368 | | add BASE, 8 | ||
369 | | add RD, 1 | ||
370 | | jmp <4 | ||
371 | | | ||
372 | |7: // Less results wanted. | ||
373 | | test RA, RA | ||
374 | | jz <5 // But check for LUA_MULTRET+1. | ||
375 | | sub RA, RD // Negative result! | ||
376 | | lea BASE, [BASE+RA*8] // Correct top. | ||
377 | | jmp <5 | ||
378 | | | ||
379 | |8: // Corner case: need to grow stack for filling up results. | ||
380 | | // This can happen if: | ||
381 | | // - A C function grows the stack (a lot). | ||
382 | | // - The GC shrinks the stack in between. | ||
383 | | // - A return back from a lua_call() with (high) nresults adjustment. | ||
384 | | mov L:RB->top, BASE // Save current top held in BASE (yes). | ||
385 | | mov NRESULTS, RD // Need to fill only remainder with nil. | ||
386 | | mov ARG2, RA // Grow by wanted nresults+1. | ||
387 | | mov ARG1, L:RB | ||
388 | | call extern lj_state_growstack // (lua_State *L, int n) | ||
389 | | mov BASE, L:RB->top // Need the (realloced) L->top in BASE. | ||
390 | | jmp <3 | ||
391 | | | ||
392 | |->vm_unwind_c: // Unwind C stack, return from vm_pcall. | ||
393 | | // (void *cframe, int errcode) | ||
394 | | mov ecx, [esp+4] | ||
395 | | mov eax, [esp+8] // Error return status for vm_pcall. | ||
396 | | and ecx, CFRAME_RAWMASK | ||
397 | | mov esp, ecx | ||
398 | | mov L:RB, SAVE_L | ||
399 | | mov GL:RB, L:RB->glref | ||
400 | | mov dword GL:RB->vmstate, ~LJ_VMST_C | ||
401 | | jmp ->vm_leave_unw | ||
402 | | | ||
403 | |->vm_unwind_ff: // Unwind C stack, return from ff pcall. | ||
404 | | mov ecx, [esp+4] | ||
405 | | and ecx, CFRAME_RAWMASK | ||
406 | | mov esp, ecx | ||
407 | | mov L:RB, SAVE_L | ||
408 | | mov RA, -8 // Results start at BASE+RA = BASE-8. | ||
409 | | mov RD, 1+1 // Really 1+2 results, incr. later. | ||
410 | | mov BASE, L:RB->base | ||
411 | | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | ||
412 | | add DISPATCH, GG_G2DISP | ||
413 | | mov PC, [BASE-4] // Fetch PC of previous frame. | ||
414 | | mov dword [BASE-4], LJ_TFALSE // Prepend false to error message. | ||
415 | | set_vmstate INTERP | ||
416 | | jmp ->vm_returnc // Increments RD/NRESULTS and returns. | ||
417 | | | ||
418 | |->vm_returnp: | ||
419 | | test PC, FRAME_P | ||
420 | | jz ->cont_dispatch | ||
421 | | | ||
422 | | // Return from pcall or xpcall fast func. | ||
423 | | and PC, -8 | ||
424 | | sub BASE, PC // Restore caller base. | ||
425 | | lea RA, [RA+PC-8] // Rebase RA and prepend one result. | ||
426 | | mov PC, [BASE-4] // Fetch PC of previous frame. | ||
427 | | // Prepending may overwrite the pcall frame, so do it at the end. | ||
428 | | mov dword [BASE+RA+4], LJ_TTRUE // Prepend true to results. | ||
429 | | jmp ->vm_returnc // Increments RD/NRESULTS and returns. | ||
430 | | | ||
431 | |//-- Grow stack on-demand ----------------------------------------------- | ||
432 | | | ||
433 | |->gate_c_growstack: // Grow stack for C function. | ||
434 | | mov ARG2, LUA_MINSTACK | ||
435 | | jmp >1 | ||
436 | | | ||
437 | |->gate_lv_growstack: // Grow stack for vararg Lua function. | ||
438 | | sub RC, 8 | ||
439 | | mov BASE, RA | ||
440 | | mov RA, KBASE | ||
441 | | mov PC, PROTO:RB->bc | ||
442 | | mov L:RB, SAVE_L | ||
443 | | | ||
444 | |->gate_lf_growstack: // Grow stack for fixarg Lua function. | ||
445 | | // BASE = new base, RA = requested top, RC = top (offset +4 bytes) | ||
446 | | // RB = L, PC = first PC of called function (or anything if C function) | ||
447 | | sub RC, 4 // Adjust top. | ||
448 | | sub RA, BASE | ||
449 | | shr RA, 3 // n = pt->framesize - L->top | ||
450 | | add PC, 4 // Must point after first instruction. | ||
451 | | mov L:RB->base, BASE | ||
452 | | mov L:RB->top, RC | ||
453 | | mov SAVE_PC, PC | ||
454 | | mov ARG2, RA | ||
455 | | mov ARG1, L:RB | ||
456 | |1: | ||
457 | | // L:RB = L, L->base = new base, L->top = top | ||
458 | | // SAVE_PC = initial PC+1 (undefined for C functions) | ||
459 | | call extern lj_state_growstack // (lua_State *L, int n) | ||
460 | | mov RA, L:RB->base | ||
461 | | mov RC, L:RB->top | ||
462 | | mov LFUNC:RB, [RA-8] | ||
463 | | mov PC, [RA-4] | ||
464 | | sub RC, RA | ||
465 | | shr RC, 3 | ||
466 | | add NARGS:RC, 1 | ||
467 | | // RA = new base, RB = LFUNC, RC = nargs+1, (BASE = invalid), PC restored. | ||
468 | | jmp aword LFUNC:RB->gate // Just retry call. | ||
469 | | | ||
470 | |//----------------------------------------------------------------------- | ||
471 | |//-- Entry points into the assembler VM --------------------------------- | ||
472 | |//----------------------------------------------------------------------- | ||
473 | | | ||
474 | |->vm_resume: // Setup C frame and resume thread. | ||
475 | | // (lua_State *L, StkId base, int nres1 = 0, ptrdiff_t ef = 0) | ||
476 | | saveregs | ||
477 | | mov PC, FRAME_C | ||
478 | | sub esp, CFRAME_SPACE | ||
479 | | xor RD, RD | ||
480 | | mov L:RB, SAVE_L | ||
481 | | lea KBASE, [esp+CFRAME_RESUME] | ||
482 | | mov RA, INARG_BASE | ||
483 | | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | ||
484 | | add DISPATCH, GG_G2DISP | ||
485 | | mov L:RB->cframe, KBASE | ||
486 | | mov SAVE_CFRAME, RD // Caveat: overlaps INARG_BASE! | ||
487 | | mov SAVE_PC, RD // Any value outside of bytecode is ok. | ||
488 | | cmp byte L:RB->status, RDL | ||
489 | | je >3 // Initial resume (like a call). | ||
490 | | | ||
491 | | // Resume after yield (like a return). | ||
492 | | set_vmstate INTERP | ||
493 | | mov byte L:RB->status, RDL | ||
494 | | mov BASE, L:RB->base | ||
495 | | mov RD, L:RB->top | ||
496 | | sub RD, RA | ||
497 | | shr RD, 3 | ||
498 | | add RD, 1 // RD = nresults+1 | ||
499 | | sub RA, BASE // RA = resultofs | ||
500 | | mov PC, [BASE-4] | ||
501 | | mov NRESULTS, RD | ||
502 | | test PC, FRAME_TYPE | ||
503 | | jz ->BC_RET_Z | ||
504 | | jmp ->vm_return | ||
505 | | | ||
506 | |->vm_pcall: // Setup protected C frame and enter VM. | ||
507 | | // (lua_State *L, StkId base, int nres1, ptrdiff_t ef) | ||
508 | | saveregs | ||
509 | | mov PC, FRAME_CP | ||
510 | | jmp >1 | ||
511 | | | ||
512 | |->vm_call: // Setup C frame and enter VM. | ||
513 | | // (lua_State *L, StkId base, int nres1) | ||
514 | | saveregs | ||
515 | | mov PC, FRAME_C | ||
516 | | | ||
517 | |1: // Entry point for vm_pcall above (PC = ftype). | ||
518 | | sub esp, CFRAME_SPACE | ||
519 | | mov L:RB, SAVE_L | ||
520 | | mov RA, INARG_BASE | ||
521 | | | ||
522 | |2: // Entry point for vm_cpcall below (RA = base, RB = L, PC = ftype). | ||
523 | | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. | ||
524 | | mov SAVE_CFRAME, KBASE // Caveat: overlaps INARG_BASE! | ||
525 | | mov SAVE_PC, esp // Any value outside of bytecode is ok. | ||
526 | | mov L:RB->cframe, esp | ||
527 | | | ||
528 | | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | ||
529 | | add DISPATCH, GG_G2DISP | ||
530 | | | ||
531 | |3: // Entry point for vm_resume above (RA = base, RB = L, PC = ftype). | ||
532 | | set_vmstate INTERP | ||
533 | | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). | ||
534 | | add PC, RA | ||
535 | | sub PC, BASE // PC = frame delta + frame type | ||
536 | | | ||
537 | | mov RC, L:RB->top | ||
538 | | sub RC, RA | ||
539 | | shr NARGS:RC, 3 | ||
540 | | add NARGS:RC, 1 // RC = nargs+1 | ||
541 | | | ||
542 | | mov LFUNC:RB, [RA-8] | ||
543 | | cmp dword [RA-4], LJ_TFUNC | ||
544 | | jne ->vmeta_call // Ensure KBASE defined and != BASE. | ||
545 | | jmp aword LFUNC:RB->gate | ||
546 | | // RA = new base, RB = LFUNC/CFUNC, RC = nargs+1. | ||
547 | | | ||
548 | |->vm_cpcall: // Setup protected C frame, call C. | ||
549 | | // (lua_State *L, lua_CPFunction cp, lua_CFunction func, void *ud) | ||
550 | | saveregs | ||
551 | | sub esp, CFRAME_SPACE | ||
552 | | | ||
553 | | mov L:RB, SAVE_L | ||
554 | | mov RC, INARG_CP_UD | ||
555 | | mov RA, INARG_CP_FUNC | ||
556 | | mov BASE, INARG_CP_CALL | ||
557 | | mov SAVE_PC, esp // Any value outside of bytecode is ok. | ||
558 | | | ||
559 | | // Caveat: INARG_P_* and INARG_CP_* overlap! | ||
560 | | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). | ||
561 | | sub KBASE, L:RB->top | ||
562 | | mov INARG_P_ERRF, 0 // No error function. | ||
563 | | mov INARG_NRES, KBASE // Neg. delta means cframe w/o frame. | ||
564 | | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). | ||
565 | | | ||
566 | | mov ARG3, RC | ||
567 | | mov ARG2, RA | ||
568 | | mov ARG1, L:RB | ||
569 | | | ||
570 | | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. | ||
571 | | mov SAVE_CFRAME, KBASE // Caveat: overlaps INARG_CP_CALL! | ||
572 | | mov L:RB->cframe, esp | ||
573 | | | ||
574 | | call BASE // (lua_State *L, lua_CFunction func, void *ud) | ||
575 | | // StkId (new base) or NULL returned in eax (RC). | ||
576 | | test RC, RC | ||
577 | | jz ->vm_leave_cp // No base? Just remove C frame. | ||
578 | | mov RA, RC | ||
579 | | mov PC, FRAME_CP | ||
580 | | jmp <2 // Else continue with the call. | ||
581 | | | ||
582 | |//----------------------------------------------------------------------- | ||
583 | |//-- Metamethod handling ------------------------------------------------ | ||
584 | |//----------------------------------------------------------------------- | ||
585 | | | ||
586 | |//-- Continuation dispatch ---------------------------------------------- | ||
587 | | | ||
588 | |->cont_dispatch: | ||
589 | | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in NRESULTS) | ||
590 | | add RA, BASE | ||
591 | | and PC, -8 | ||
592 | | mov RB, BASE | ||
593 | | sub BASE, PC // Restore caller BASE. | ||
594 | | mov dword [RA+RD*8-4], LJ_TNIL // Ensure one valid arg. | ||
595 | | mov RC, RA // ... in [RC] | ||
596 | | mov PC, [RB-12] // Restore PC from [cont|PC]. | ||
597 | | mov LFUNC:KBASE, [BASE-8] | ||
598 | | mov PROTO:KBASE, LFUNC:KBASE->pt | ||
599 | | mov KBASE, PROTO:KBASE->k | ||
600 | | // BASE = base, RC = result, RB = meta base | ||
601 | | jmp dword [RB-16] // Jump to continuation. | ||
602 | | | ||
603 | |->cont_cat: // BASE = base, RC = result, RB = mbase | ||
604 | | movzx RA, PC_RB | ||
605 | | sub RB, 16 | ||
606 | | lea RA, [BASE+RA*8] | ||
607 | | sub RA, RB | ||
608 | | je ->cont_ra | ||
609 | | neg RA | ||
610 | | shr RA, 3 | ||
611 | | mov ARG3, RA | ||
612 | | mov RA, [RC+4] | ||
613 | | mov RC, [RC] | ||
614 | | mov [RB+4], RA | ||
615 | | mov [RB], RC | ||
616 | | mov ARG2, RB | ||
617 | | jmp ->BC_CAT_Z | ||
618 | | | ||
619 | |//-- Table indexing metamethods ----------------------------------------- | ||
620 | | | ||
621 | |->vmeta_tgets: | ||
622 | | mov ARG5, RC // RC = GCstr * | ||
623 | | mov ARG6, LJ_TSTR | ||
624 | | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6. | ||
625 | | cmp PC_OP, BC_GGET | ||
626 | | jne >1 | ||
627 | | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. | ||
628 | | mov [RA], TAB:RB // RB = GCtab * | ||
629 | | mov dword [RA+4], LJ_TTAB | ||
630 | | mov RB, RA | ||
631 | | jmp >2 | ||
632 | | | ||
633 | |->vmeta_tgetb: | ||
634 | | movzx RC, PC_RC // Ugly, cannot fild from a byte. | ||
635 | | mov ARG4, RC | ||
636 | | fild ARG4 | ||
637 | | fstp FPARG5 | ||
638 | | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6. | ||
639 | | jmp >1 | ||
640 | | | ||
641 | |->vmeta_tgetv: | ||
642 | | movzx RC, PC_RC // Reload TValue *k from RC. | ||
643 | | lea RC, [BASE+RC*8] | ||
644 | |1: | ||
645 | | movzx RB, PC_RB // Reload TValue *t from RB. | ||
646 | | lea RB, [BASE+RB*8] | ||
647 | |2: | ||
648 | | mov ARG2, RB | ||
649 | | mov L:RB, SAVE_L | ||
650 | | mov ARG3, RC | ||
651 | | mov ARG1, L:RB | ||
652 | | mov SAVE_PC, PC | ||
653 | | mov L:RB->base, BASE | ||
654 | | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) | ||
655 | | // TValue * (finished) or NULL (metamethod) returned in eax (RC). | ||
656 | | mov BASE, L:RB->base | ||
657 | | test RC, RC | ||
658 | | jz >3 | ||
659 | |->cont_ra: // BASE = base, RC = result | ||
660 | | movzx RA, PC_RA | ||
661 | | mov RB, [RC+4] | ||
662 | | mov RC, [RC] | ||
663 | | mov [BASE+RA*8+4], RB | ||
664 | | mov [BASE+RA*8], RC | ||
665 | | ins_next | ||
666 | | | ||
667 | |3: // Call __index metamethod. | ||
668 | | // BASE = base, L->top = new base, stack = cont/func/t/k | ||
669 | | mov RA, L:RB->top | ||
670 | | mov [RA-12], PC // [cont|PC] | ||
671 | | lea PC, [RA+FRAME_CONT] | ||
672 | | sub PC, BASE | ||
673 | | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here. | ||
674 | | mov NARGS:RC, 3 // 2+1 args for func(t, k). | ||
675 | | jmp aword LFUNC:RB->gate | ||
676 | | | ||
677 | |//----------------------------------------------------------------------- | ||
678 | | | ||
679 | |->vmeta_tsets: | ||
680 | | mov ARG5, RC // RC = GCstr * | ||
681 | | mov ARG6, LJ_TSTR | ||
682 | | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6. | ||
683 | | cmp PC_OP, BC_GSET | ||
684 | | jne >1 | ||
685 | | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. | ||
686 | | mov [RA], TAB:RB // RB = GCtab * | ||
687 | | mov dword [RA+4], LJ_TTAB | ||
688 | | mov RB, RA | ||
689 | | jmp >2 | ||
690 | | | ||
691 | |->vmeta_tsetb: | ||
692 | | movzx RC, PC_RC // Ugly, cannot fild from a byte. | ||
693 | | mov ARG4, RC | ||
694 | | fild ARG4 | ||
695 | | fstp FPARG5 | ||
696 | | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6. | ||
697 | | jmp >1 | ||
698 | | | ||
699 | |->vmeta_tsetv: | ||
700 | | movzx RC, PC_RC // Reload TValue *k from RC. | ||
701 | | lea RC, [BASE+RC*8] | ||
702 | |1: | ||
703 | | movzx RB, PC_RB // Reload TValue *t from RB. | ||
704 | | lea RB, [BASE+RB*8] | ||
705 | |2: | ||
706 | | mov ARG2, RB | ||
707 | | mov L:RB, SAVE_L | ||
708 | | mov ARG3, RC | ||
709 | | mov ARG1, L:RB | ||
710 | | mov SAVE_PC, PC | ||
711 | | mov L:RB->base, BASE | ||
712 | | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) | ||
713 | | // TValue * (finished) or NULL (metamethod) returned in eax (RC). | ||
714 | | mov BASE, L:RB->base | ||
715 | | test RC, RC | ||
716 | | jz >3 | ||
717 | | // NOBARRIER: lj_meta_tset ensures the table is not black. | ||
718 | | movzx RA, PC_RA | ||
719 | | mov RB, [BASE+RA*8+4] | ||
720 | | mov RA, [BASE+RA*8] | ||
721 | | mov [RC+4], RB | ||
722 | | mov [RC], RA | ||
723 | |->cont_nop: // BASE = base, (RC = result) | ||
724 | | ins_next | ||
725 | | | ||
726 | |3: // Call __newindex metamethod. | ||
727 | | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) | ||
728 | | mov RA, L:RB->top | ||
729 | | mov [RA-12], PC // [cont|PC] | ||
730 | | movzx RC, PC_RA | ||
731 | | mov RB, [BASE+RC*8+4] // Copy value to third argument. | ||
732 | | mov RC, [BASE+RC*8] | ||
733 | | mov [RA+20], RB | ||
734 | | mov [RA+16], RC | ||
735 | | lea PC, [RA+FRAME_CONT] | ||
736 | | sub PC, BASE | ||
737 | | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here. | ||
738 | | mov NARGS:RC, 4 // 3+1 args for func(t, k, v). | ||
739 | | jmp aword LFUNC:RB->gate | ||
740 | | | ||
741 | |//-- Comparison metamethods --------------------------------------------- | ||
742 | | | ||
743 | |->vmeta_comp: | ||
744 | | movzx RB, PC_OP | ||
745 | | lea RD, [BASE+RD*8] | ||
746 | | lea RA, [BASE+RA*8] | ||
747 | | mov ARG4, RB | ||
748 | | mov L:RB, SAVE_L | ||
749 | | mov ARG3, RD | ||
750 | | mov ARG2, RA | ||
751 | | mov ARG1, L:RB | ||
752 | | mov SAVE_PC, PC | ||
753 | | mov L:RB->base, BASE | ||
754 | | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) | ||
755 | | // 0/1 or TValue * (metamethod) returned in eax (RC). | ||
756 | |3: | ||
757 | | mov BASE, L:RB->base | ||
758 | | cmp RC, 1 | ||
759 | | ja ->vmeta_binop | ||
760 | |4: | ||
761 | | lea PC, [PC+4] | ||
762 | | jb >6 | ||
763 | |5: | ||
764 | | movzx RD, PC_RD | ||
765 | | branchPC RD | ||
766 | |6: | ||
767 | | ins_next | ||
768 | | | ||
769 | |->cont_condt: // BASE = base, RC = result | ||
770 | | add PC, 4 | ||
771 | | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is true. | ||
772 | | jb <5 | ||
773 | | jmp <6 | ||
774 | | | ||
775 | |->cont_condf: // BASE = base, RC = result | ||
776 | | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is false. | ||
777 | | jmp <4 | ||
778 | | | ||
779 | |->vmeta_equal: | ||
780 | | mov ARG4, RB | ||
781 | | mov L:RB, SAVE_L | ||
782 | | sub PC, 4 | ||
783 | | mov ARG3, RD | ||
784 | | mov ARG2, RA | ||
785 | | mov ARG1, L:RB | ||
786 | | mov SAVE_PC, PC | ||
787 | | mov L:RB->base, BASE | ||
788 | | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) | ||
789 | | // 0/1 or TValue * (metamethod) returned in eax (RC). | ||
790 | | jmp <3 | ||
791 | | | ||
792 | |//-- Arithmetic metamethods --------------------------------------------- | ||
793 | | | ||
794 | |->vmeta_arith_vn: | ||
795 | | lea RC, [KBASE+RC*8] | ||
796 | | jmp >1 | ||
797 | | | ||
798 | |->vmeta_arith_nv: | ||
799 | | lea RC, [KBASE+RC*8] | ||
800 | | lea RB, [BASE+RB*8] | ||
801 | | xchg RB, RC | ||
802 | | jmp >2 | ||
803 | | | ||
804 | |->vmeta_unm: | ||
805 | | lea RC, [BASE+RD*8] | ||
806 | | mov RB, RC | ||
807 | | jmp >2 | ||
808 | | | ||
809 | |->vmeta_arith_vv: | ||
810 | | lea RC, [BASE+RC*8] | ||
811 | |1: | ||
812 | | lea RB, [BASE+RB*8] | ||
813 | |2: | ||
814 | | lea RA, [BASE+RA*8] | ||
815 | | mov ARG3, RB | ||
816 | | mov L:RB, SAVE_L | ||
817 | | mov ARG4, RC | ||
818 | | movzx RC, PC_OP | ||
819 | | mov ARG2, RA | ||
820 | | mov ARG5, RC | ||
821 | | mov ARG1, L:RB | ||
822 | | mov SAVE_PC, PC | ||
823 | | mov L:RB->base, BASE | ||
824 | | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) | ||
825 | | // NULL (finished) or TValue * (metamethod) returned in eax (RC). | ||
826 | | mov BASE, L:RB->base | ||
827 | | test RC, RC | ||
828 | | jz ->cont_nop | ||
829 | | | ||
830 | | // Call metamethod for binary op. | ||
831 | |->vmeta_binop: | ||
832 | | // BASE = base, RC = new base, stack = cont/func/o1/o2 | ||
833 | | mov RA, RC | ||
834 | | sub RC, BASE | ||
835 | | mov [RA-12], PC // [cont|PC] | ||
836 | | lea PC, [RC+FRAME_CONT] | ||
837 | | mov LFUNC:RB, [RA-8] | ||
838 | | mov NARGS:RC, 3 // 2+1 args for func(o1, o2). | ||
839 | | cmp dword [RA-4], LJ_TFUNC | ||
840 | | jne ->vmeta_call | ||
841 | | jmp aword LFUNC:RB->gate | ||
842 | | | ||
843 | |->vmeta_len: | ||
844 | | lea RD, [BASE+RD*8] | ||
845 | | mov L:RB, SAVE_L | ||
846 | | mov ARG2, RD | ||
847 | | mov ARG1, L:RB | ||
848 | | mov SAVE_PC, PC | ||
849 | | mov L:RB->base, BASE | ||
850 | | call extern lj_meta_len // (lua_State *L, TValue *o) | ||
851 | | // TValue * (metamethod) returned in eax (RC). | ||
852 | | mov BASE, L:RB->base | ||
853 | | jmp ->vmeta_binop // Binop call for compatibility. | ||
854 | | | ||
855 | |//-- Call metamethod ---------------------------------------------------- | ||
856 | | | ||
857 | |->vmeta_call: // Resolve and call __call metamethod. | ||
858 | | // RA = new base, RC = nargs+1, BASE = old base, PC = return | ||
859 | | mov ARG4, RA // Save RA, RC for us. | ||
860 | | mov ARG5, NARGS:RC | ||
861 | | sub RA, 8 | ||
862 | | lea RC, [RA+NARGS:RC*8] | ||
863 | | mov L:RB, SAVE_L | ||
864 | | mov ARG2, RA | ||
865 | | mov ARG3, RC | ||
866 | | mov ARG1, L:RB | ||
867 | | mov SAVE_PC, PC | ||
868 | | mov L:RB->base, BASE // This is the callers base! | ||
869 | | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) | ||
870 | | mov BASE, L:RB->base | ||
871 | | mov RA, ARG4 | ||
872 | | mov NARGS:RC, ARG5 | ||
873 | | mov LFUNC:RB, [RA-8] | ||
874 | | add NARGS:RC, 1 | ||
875 | | // This is fragile. L->base must not move, KBASE must always be defined. | ||
876 | | cmp KBASE, BASE // Continue with CALLT if flag set. | ||
877 | | je ->BC_CALLT_Z | ||
878 | | jmp aword LFUNC:RB->gate // Otherwise call resolved metamethod. | ||
879 | | | ||
880 | |//-- Argument coercion for 'for' statement ------------------------------ | ||
881 | | | ||
882 | |->vmeta_for: | ||
883 | | mov L:RB, SAVE_L | ||
884 | | mov ARG2, RA | ||
885 | | mov ARG1, L:RB | ||
886 | | mov SAVE_PC, PC | ||
887 | | mov L:RB->base, BASE | ||
888 | | call extern lj_meta_for // (lua_State *L, StkId base) | ||
889 | | mov BASE, L:RB->base | ||
890 | | mov RC, [PC-4] | ||
891 | | movzx RA, RCH | ||
892 | | movzx OP, RCL | ||
893 | | shr RC, 16 | ||
894 | | jmp aword [DISPATCH+OP*4+GG_DISP_STATIC*4] // Retry FORI or JFORI. | ||
895 | | | ||
896 | |//----------------------------------------------------------------------- | ||
897 | |//-- Fast functions ----------------------------------------------------- | ||
898 | |//----------------------------------------------------------------------- | ||
899 | | | ||
900 | |.macro .ffunc, name | ||
901 | |->ff_ .. name: | ||
902 | |.endmacro | ||
903 | | | ||
904 | |.macro .ffunc_1, name | ||
905 | |->ff_ .. name: | ||
906 | | cmp NARGS:RC, 1+1; jb ->fff_fallback | ||
907 | |.endmacro | ||
908 | | | ||
909 | |.macro .ffunc_2, name | ||
910 | |->ff_ .. name: | ||
911 | | cmp NARGS:RC, 2+1; jb ->fff_fallback | ||
912 | |.endmacro | ||
913 | | | ||
914 | |.macro .ffunc_n, name | ||
915 | | .ffunc_1 name | ||
916 | | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback | ||
917 | | fld qword [RA] | ||
918 | |.endmacro | ||
919 | | | ||
920 | |.macro .ffunc_n, name, op | ||
921 | | .ffunc_1 name | ||
922 | | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback | ||
923 | | op | ||
924 | | fld qword [RA] | ||
925 | |.endmacro | ||
926 | | | ||
927 | |.macro .ffunc_nn, name | ||
928 | | .ffunc_2 name | ||
929 | | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback | ||
930 | | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback | ||
931 | | fld qword [RA] | ||
932 | | fld qword [RA+8] | ||
933 | |.endmacro | ||
934 | | | ||
935 | |.macro .ffunc_nnr, name | ||
936 | | .ffunc_2 name | ||
937 | | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback | ||
938 | | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback | ||
939 | | fld qword [RA+8] | ||
940 | | fld qword [RA] | ||
941 | |.endmacro | ||
942 | | | ||
943 | |// Inlined GC threshold check. Caveat: uses label 1. | ||
944 | |.macro ffgccheck | ||
945 | | mov RB, [DISPATCH+DISPATCH_GL(gc.total)] | ||
946 | | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)] | ||
947 | | jb >1 | ||
948 | | call ->fff_gcstep | ||
949 | |1: | ||
950 | |.endmacro | ||
951 | | | ||
952 | |//-- Base library: checks ----------------------------------------------- | ||
953 | | | ||
954 | |.ffunc_1 assert | ||
955 | | mov RB, [RA+4] | ||
956 | | cmp RB, LJ_TISTRUECOND; jae ->fff_fallback | ||
957 | | mov NRESULTS, RD | ||
958 | | mov [RA-4], RB | ||
959 | | mov RB, [RA] | ||
960 | | mov [RA-8], RB | ||
961 | | sub RD, 2 | ||
962 | | jz >2 | ||
963 | | mov ARG1, RA | ||
964 | |1: | ||
965 | | add RA, 8 | ||
966 | | mov RB, [RA+4] | ||
967 | | mov [RA-4], RB | ||
968 | | mov RB, [RA] | ||
969 | | mov [RA-8], RB | ||
970 | | sub RD, 1 | ||
971 | | jnz <1 | ||
972 | | mov RA, ARG1 | ||
973 | |2: | ||
974 | | mov RD, NRESULTS | ||
975 | | jmp ->fff_res_ | ||
976 | | | ||
977 | |.ffunc_1 type | ||
978 | | mov RB, [RA+4] | ||
979 | | mov RC, ~LJ_TNUMX | ||
980 | | not RB | ||
981 | | cmp RC, RB | ||
982 | ||if (cmov) { | ||
983 | | cmova RC, RB | ||
984 | ||} else { | ||
985 | | jbe >1; mov RC, RB; 1: | ||
986 | ||} | ||
987 | | mov CFUNC:RB, [RA-8] | ||
988 | | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))] | ||
989 | | mov dword [RA-4], LJ_TSTR | ||
990 | | mov [RA-8], STR:RC | ||
991 | | jmp ->fff_res1 | ||
992 | | | ||
993 | |//-- Base library: getters and setters --------------------------------- | ||
994 | | | ||
995 | |.ffunc_1 getmetatable | ||
996 | | mov RB, [RA+4] | ||
997 | | cmp RB, LJ_TTAB; jne >6 | ||
998 | |1: // Field metatable must be at same offset for GCtab and GCudata! | ||
999 | | mov TAB:RB, [RA] | ||
1000 | | mov TAB:RB, TAB:RB->metatable | ||
1001 | |2: | ||
1002 | | test TAB:RB, TAB:RB | ||
1003 | | mov dword [RA-4], LJ_TNIL | ||
1004 | | jz ->fff_res1 | ||
1005 | | mov CFUNC:RC, [RA-8] | ||
1006 | | mov STR:RC, [DISPATCH+DISPATCH_GL(mmname)+4*MM_metatable] | ||
1007 | | mov dword [RA-4], LJ_TTAB // Store metatable as default result. | ||
1008 | | mov [RA-8], TAB:RB | ||
1009 | | mov ARG1, RA // Save result pointer. | ||
1010 | | mov RA, TAB:RB->hmask | ||
1011 | | and RA, STR:RC->hash | ||
1012 | | imul RA, #NODE | ||
1013 | | add NODE:RA, TAB:RB->node | ||
1014 | |3: // Rearranged logic, because we expect _not_ to find the key. | ||
1015 | | cmp dword NODE:RA->key.it, LJ_TSTR | ||
1016 | | jne >4 | ||
1017 | | cmp dword NODE:RA->key.gcr, STR:RC | ||
1018 | | je >5 | ||
1019 | |4: | ||
1020 | | mov NODE:RA, NODE:RA->next | ||
1021 | | test NODE:RA, NODE:RA | ||
1022 | | jnz <3 | ||
1023 | | jmp ->fff_res1 // Not found, keep default result. | ||
1024 | |5: | ||
1025 | | mov RB, [RA+4] | ||
1026 | | cmp RB, LJ_TNIL; je ->fff_res1 // Dito for nil value. | ||
1027 | | mov RC, [RA] | ||
1028 | | mov RA, ARG1 // Restore result pointer. | ||
1029 | | mov [RA-4], RB // Return value of mt.__metatable. | ||
1030 | | mov [RA-8], RC | ||
1031 | | jmp ->fff_res1 | ||
1032 | | | ||
1033 | |6: | ||
1034 | | cmp RB, LJ_TUDATA; je <1 | ||
1035 | | cmp RB, LJ_TISNUM; ja >7 | ||
1036 | | mov RB, LJ_TNUMX | ||
1037 | |7: | ||
1038 | | not RB | ||
1039 | | mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(basemt)] | ||
1040 | | jmp <2 | ||
1041 | | | ||
1042 | |.ffunc_2 setmetatable | ||
1043 | | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback | ||
1044 | | // Fast path: no mt for table yet and not clearing the mt. | ||
1045 | | mov TAB:RB, [RA] | ||
1046 | | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback | ||
1047 | | cmp dword [RA+12], LJ_TTAB; jne ->fff_fallback | ||
1048 | | mov TAB:RC, [RA+8] | ||
1049 | | mov TAB:RB->metatable, TAB:RC | ||
1050 | | mov dword [RA-4], LJ_TTAB // Return original table. | ||
1051 | | mov [RA-8], TAB:RB | ||
1052 | | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) | ||
1053 | | jz >1 | ||
1054 | | // Possible write barrier. Table is black, but skip iswhite(mt) check. | ||
1055 | | barrierback TAB:RB, RC | ||
1056 | |1: | ||
1057 | | jmp ->fff_res1 | ||
1058 | | | ||
1059 | |.ffunc_2 rawget | ||
1060 | | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback | ||
1061 | | mov TAB:RC, [RA] | ||
1062 | | mov L:RB, SAVE_L | ||
1063 | | mov ARG2, TAB:RC | ||
1064 | | mov ARG1, L:RB | ||
1065 | | mov RB, RA | ||
1066 | | mov ARG4, BASE // Save BASE and RA. | ||
1067 | | add RA, 8 | ||
1068 | | mov ARG3, RA | ||
1069 | | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) | ||
1070 | | // cTValue * returned in eax (RC). | ||
1071 | | mov RA, RB | ||
1072 | | mov BASE, ARG4 | ||
1073 | | mov RB, [RC] // Copy table slot. | ||
1074 | | mov RC, [RC+4] | ||
1075 | | mov [RA-8], RB | ||
1076 | | mov [RA-4], RC | ||
1077 | | jmp ->fff_res1 | ||
1078 | | | ||
1079 | |//-- Base library: conversions ------------------------------------------ | ||
1080 | | | ||
1081 | |.ffunc tonumber | ||
1082 | | // Only handles the number case inline (without a base argument). | ||
1083 | | cmp NARGS:RC, 1+1; jne ->fff_fallback // Exactly one argument. | ||
1084 | | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback | ||
1085 | | fld qword [RA] | ||
1086 | | jmp ->fff_resn | ||
1087 | | | ||
1088 | |.ffunc_1 tostring | ||
1089 | | // Only handles the string or number case inline. | ||
1090 | | cmp dword [RA+4], LJ_TSTR; jne >3 | ||
1091 | | // A __tostring method in the string base metatable is ignored. | ||
1092 | | mov STR:RC, [RA] | ||
1093 | |2: | ||
1094 | | mov dword [RA-4], LJ_TSTR | ||
1095 | | mov [RA-8], STR:RC | ||
1096 | | jmp ->fff_res1 | ||
1097 | |3: // Handle numbers inline, unless a number base metatable is present. | ||
1098 | | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback | ||
1099 | | cmp dword [DISPATCH+DISPATCH_GL(basemt)+4*(~LJ_TNUMX)], 0 | ||
1100 | | jne ->fff_fallback | ||
1101 | | ffgccheck // Caveat: uses label 1. | ||
1102 | | mov L:RB, SAVE_L | ||
1103 | | mov ARG1, L:RB | ||
1104 | | mov ARG2, RA | ||
1105 | | mov L:RB->base, RA // Add frame since C call can throw. | ||
1106 | | mov [RA-4], PC | ||
1107 | | mov SAVE_PC, PC // Redundant (but a defined value). | ||
1108 | | mov ARG3, BASE // Save BASE. | ||
1109 | | call extern lj_str_fromnum // (lua_State *L, lua_Number *np) | ||
1110 | | // GCstr returned in eax (RC). | ||
1111 | | mov RA, L:RB->base | ||
1112 | | mov BASE, ARG3 | ||
1113 | | jmp <2 | ||
1114 | | | ||
1115 | |//-- Base library: iterators ------------------------------------------- | ||
1116 | | | ||
1117 | |.ffunc_1 next | ||
1118 | | je >2 // Missing 2nd arg? | ||
1119 | |1: | ||
1120 | | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback | ||
1121 | | mov TAB:RB, [RA] | ||
1122 | | mov ARG2, TAB:RB | ||
1123 | | mov L:RB, SAVE_L | ||
1124 | | mov ARG1, L:RB | ||
1125 | | mov L:RB->base, RA // Add frame since C call can throw. | ||
1126 | | mov [RA-4], PC | ||
1127 | | mov SAVE_PC, PC // Redundant (but a defined value). | ||
1128 | | mov ARG4, BASE // Save BASE. | ||
1129 | | add RA, 8 | ||
1130 | | mov ARG3, RA | ||
1131 | | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) | ||
1132 | | // Flag returned in eax (RC). | ||
1133 | | mov RA, L:RB->base | ||
1134 | | mov BASE, ARG4 | ||
1135 | | test RC, RC; jz >3 // End of traversal? | ||
1136 | | mov RB, [RA+8] // Copy key and value to results. | ||
1137 | | mov RC, [RA+12] | ||
1138 | | mov [RA-8], RB | ||
1139 | | mov [RA-4], RC | ||
1140 | | mov RB, [RA+16] | ||
1141 | | mov RC, [RA+20] | ||
1142 | | mov [RA], RB | ||
1143 | | mov [RA+4], RC | ||
1144 | |->fff_res2: | ||
1145 | | mov RD, 1+2 | ||
1146 | | jmp ->fff_res | ||
1147 | |2: // Set missing 2nd arg to nil. | ||
1148 | | mov dword [RA+12], LJ_TNIL | ||
1149 | | jmp <1 | ||
1150 | |3: // End of traversal: return nil. | ||
1151 | | mov dword [RA-4], LJ_TNIL | ||
1152 | | jmp ->fff_res1 | ||
1153 | | | ||
1154 | |.ffunc_1 pairs | ||
1155 | | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback | ||
1156 | | mov CFUNC:RC, CFUNC:RB->upvalue[0] | ||
1157 | | mov dword [RA-4], LJ_TFUNC | ||
1158 | | mov [RA-8], CFUNC:RC | ||
1159 | | mov dword [RA+12], LJ_TNIL | ||
1160 | | mov RD, 1+3 | ||
1161 | | jmp ->fff_res | ||
1162 | | | ||
1163 | |.ffunc_1 ipairs_aux | ||
1164 | | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback | ||
1165 | | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback | ||
1166 | | fld qword [RA+8] | ||
1167 | | fld1 | ||
1168 | | faddp st1 | ||
1169 | | fist ARG2 | ||
1170 | | fstp qword [RA-8] | ||
1171 | | mov TAB:RB, [RA] | ||
1172 | | mov RC, ARG2 | ||
1173 | | cmp RC, TAB:RB->asize; jae >2 // Not in array part? | ||
1174 | | shl RC, 3 | ||
1175 | | add RC, TAB:RB->array | ||
1176 | |1: | ||
1177 | | cmp dword [RC+4], LJ_TNIL; je ->fff_res0 | ||
1178 | | mov RB, [RC] // Copy array slot. | ||
1179 | | mov RC, [RC+4] | ||
1180 | | mov [RA], RB | ||
1181 | | mov [RA+4], RC | ||
1182 | | jmp ->fff_res2 | ||
1183 | |2: // Check for empty hash part first. Otherwise call C function. | ||
1184 | | cmp dword TAB:RB->hmask, 0; je ->fff_res0 | ||
1185 | | mov ARG1, TAB:RB | ||
1186 | | mov ARG3, BASE // Save BASE and RA. | ||
1187 | | mov RB, RA | ||
1188 | | call extern lj_tab_getinth // (GCtab *t, int32_t key) | ||
1189 | | // cTValue * or NULL returned in eax (RC). | ||
1190 | | mov RA, RB | ||
1191 | | mov BASE, ARG3 | ||
1192 | | test RC, RC | ||
1193 | | jnz <1 | ||
1194 | |->fff_res0: | ||
1195 | | mov RD, 1+0 | ||
1196 | | jmp ->fff_res | ||
1197 | | | ||
1198 | |.ffunc_1 ipairs | ||
1199 | | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback | ||
1200 | | mov CFUNC:RC, CFUNC:RB->upvalue[0] | ||
1201 | | mov dword [RA-4], LJ_TFUNC | ||
1202 | | mov [RA-8], CFUNC:RC | ||
1203 | | fldz | ||
1204 | | fstp qword [RA+8] | ||
1205 | | mov RD, 1+3 | ||
1206 | | jmp ->fff_res | ||
1207 | | | ||
1208 | |//-- Base library: catch errors ---------------------------------------- | ||
1209 | | | ||
1210 | |.ffunc_1 pcall | ||
1211 | | mov [RA-4], PC | ||
1212 | | mov PC, 8+FRAME_PCALL | ||
1213 | | mov BASE, RA | ||
1214 | | add RA, 8 | ||
1215 | | sub NARGS:RC, 1 | ||
1216 | | mov LFUNC:RB, [RA-8] | ||
1217 | |1: | ||
1218 | | test byte [DISPATCH+DISPATCH_GL(hookmask)], HOOK_ACTIVE | ||
1219 | | jnz >3 // Hook active before pcall? | ||
1220 | |2: | ||
1221 | | cmp dword [RA-4], LJ_TFUNC | ||
1222 | | jne ->vmeta_call // Ensure KBASE defined and != BASE. | ||
1223 | | jmp aword LFUNC:RB->gate | ||
1224 | |3: | ||
1225 | | add PC, 1 // Use FRAME_PCALLH if hook was active. | ||
1226 | | jmp <2 | ||
1227 | | | ||
1228 | |.ffunc_2 xpcall | ||
1229 | | cmp dword [RA+12], LJ_TFUNC; jne ->fff_fallback | ||
1230 | | mov [RA-4], PC | ||
1231 | | mov RB, [RA+4] // Swap function and traceback. | ||
1232 | | mov [RA+12], RB | ||
1233 | | mov dword [RA+4], LJ_TFUNC | ||
1234 | | mov LFUNC:RB, [RA] | ||
1235 | | mov PC, [RA+8] | ||
1236 | | mov [RA+8], LFUNC:RB | ||
1237 | | mov [RA], PC | ||
1238 | | mov PC, 2*8+FRAME_PCALL | ||
1239 | | mov BASE, RA | ||
1240 | | add RA, 2*8 | ||
1241 | | sub NARGS:RC, 2 | ||
1242 | | jmp <1 | ||
1243 | | | ||
1244 | |//-- Coroutine library -------------------------------------------------- | ||
1245 | | | ||
1246 | |.macro coroutine_resume_wrap, resume | ||
1247 | |9: // Need to restore PC for fallback handler. | ||
1248 | | mov PC, SAVE_PC | ||
1249 | | jmp ->fff_fallback | ||
1250 | | | ||
1251 | |.if resume | ||
1252 | |.ffunc_1 coroutine_resume | ||
1253 | | mov L:RB, [RA] | ||
1254 | |.else | ||
1255 | |.ffunc coroutine_wrap_aux | ||
1256 | | mov L:RB, CFUNC:RB->upvalue[0].gcr | ||
1257 | |.endif | ||
1258 | | mov [RA-4], PC | ||
1259 | | mov SAVE_PC, PC | ||
1260 | | mov ARG1, L:RB | ||
1261 | |.if resume | ||
1262 | | cmp dword [RA+4], LJ_TTHREAD; jne <9 | ||
1263 | |.endif | ||
1264 | | cmp aword L:RB->cframe, 0; jne <9 | ||
1265 | | cmp byte L:RB->status, LUA_YIELD; ja <9 | ||
1266 | | mov PC, L:RB->top | ||
1267 | | mov ARG2, PC | ||
1268 | | je >1 // Status != LUA_YIELD (i.e. 0)? | ||
1269 | | cmp PC, L:RB->base; je <9 // Check for presence of initial func. | ||
1270 | |1: | ||
1271 | |.if resume | ||
1272 | | lea PC, [PC+NARGS:RC*8-16] // Check stack space (-1-thread). | ||
1273 | |.else | ||
1274 | | lea PC, [PC+NARGS:RC*8-8] // Check stack space (-1). | ||
1275 | |.endif | ||
1276 | | cmp PC, L:RB->maxstack; ja <9 | ||
1277 | | mov L:RB->top, PC | ||
1278 | | | ||
1279 | | mov L:RB, SAVE_L | ||
1280 | | mov L:RB->base, RA | ||
1281 | |.if resume | ||
1282 | | add RA, 8 // Keep resumed thread in stack for GC. | ||
1283 | |.endif | ||
1284 | | mov L:RB->top, RA | ||
1285 | | mov RB, ARG2 | ||
1286 | |.if resume | ||
1287 | | lea RA, [RA+NARGS:RC*8-24] // RA = end of source for stack move. | ||
1288 | |.else | ||
1289 | | lea RA, [RA+NARGS:RC*8-16] // RA = end of source for stack move. | ||
1290 | |.endif | ||
1291 | | sub RA, PC // Relative to PC. | ||
1292 | | | ||
1293 | | cmp PC, RB | ||
1294 | | je >3 | ||
1295 | |2: // Move args to coroutine. | ||
1296 | | mov RC, [PC+RA+4] | ||
1297 | | mov [PC-4], RC | ||
1298 | | mov RC, [PC+RA] | ||
1299 | | mov [PC-8], RC | ||
1300 | | sub PC, 8 | ||
1301 | | cmp PC, RB | ||
1302 | | jne <2 | ||
1303 | |3: | ||
1304 | | xor RA, RA | ||
1305 | | mov ARG4, RA | ||
1306 | | mov ARG3, RA | ||
1307 | | call ->vm_resume // (lua_State *L, StkId base, 0, 0) | ||
1308 | | set_vmstate INTERP | ||
1309 | | | ||
1310 | | mov L:RB, SAVE_L | ||
1311 | | mov L:PC, ARG1 // The callee doesn't modify SAVE_L. | ||
1312 | | mov BASE, L:RB->base | ||
1313 | | cmp eax, LUA_YIELD | ||
1314 | | ja >8 | ||
1315 | |4: | ||
1316 | | mov RA, L:PC->base | ||
1317 | | mov KBASE, L:PC->top | ||
1318 | | mov L:PC->top, RA // Clear coroutine stack. | ||
1319 | | mov PC, KBASE | ||
1320 | | sub PC, RA | ||
1321 | | je >6 // No results? | ||
1322 | | lea RD, [BASE+PC] | ||
1323 | | shr PC, 3 | ||
1324 | | cmp RD, L:RB->maxstack | ||
1325 | | ja >9 // Need to grow stack? | ||
1326 | | | ||
1327 | | mov RB, BASE | ||
1328 | | sub RB, RA | ||
1329 | |5: // Move results from coroutine. | ||
1330 | | mov RD, [RA] | ||
1331 | | mov [RA+RB], RD | ||
1332 | | mov RD, [RA+4] | ||
1333 | | mov [RA+RB+4], RD | ||
1334 | | add RA, 8 | ||
1335 | | cmp RA, KBASE | ||
1336 | | jne <5 | ||
1337 | |6: | ||
1338 | |.if resume | ||
1339 | | lea RD, [PC+2] // nresults+1 = 1 + true + results. | ||
1340 | | mov dword [BASE-4], LJ_TTRUE // Prepend true to results. | ||
1341 | |.else | ||
1342 | | lea RD, [PC+1] // nresults+1 = 1 + results. | ||
1343 | |.endif | ||
1344 | |7: | ||
1345 | | mov PC, SAVE_PC | ||
1346 | | mov NRESULTS, RD | ||
1347 | |.if resume | ||
1348 | | mov RA, -8 | ||
1349 | |.else | ||
1350 | | xor RA, RA | ||
1351 | |.endif | ||
1352 | | test PC, FRAME_TYPE | ||
1353 | | jz ->BC_RET_Z | ||
1354 | | jmp ->vm_return | ||
1355 | | | ||
1356 | |8: // Coroutine returned with error (at co->top-1). | ||
1357 | |.if resume | ||
1358 | | mov dword [BASE-4], LJ_TFALSE // Prepend false to results. | ||
1359 | | mov RA, L:PC->top | ||
1360 | | sub RA, 8 | ||
1361 | | mov L:PC->top, RA // Clear error from coroutine stack. | ||
1362 | | mov RD, [RA] // Copy error message. | ||
1363 | | mov [BASE], RD | ||
1364 | | mov RD, [RA+4] | ||
1365 | | mov [BASE+4], RD | ||
1366 | | mov RD, 1+2 // nresults+1 = 1 + false + error. | ||
1367 | | jmp <7 | ||
1368 | |.else | ||
1369 | | mov ARG2, L:PC | ||
1370 | | mov ARG1, L:RB | ||
1371 | | call extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) | ||
1372 | | // Error function does not return. | ||
1373 | |.endif | ||
1374 | | | ||
1375 | |9: // Handle stack expansion on return from yield. | ||
1376 | | mov L:RA, ARG1 // The callee doesn't modify SAVE_L. | ||
1377 | | mov L:RA->top, KBASE // Undo coroutine stack clearing. | ||
1378 | | mov ARG2, PC | ||
1379 | | mov ARG1, L:RB | ||
1380 | | call extern lj_state_growstack // (lua_State *L, int n) | ||
1381 | | mov BASE, L:RB->base | ||
1382 | | jmp <4 // Retry the stack move. | ||
1383 | |.endmacro | ||
1384 | | | ||
1385 | | coroutine_resume_wrap 1 // coroutine.resume | ||
1386 | | coroutine_resume_wrap 0 // coroutine.wrap | ||
1387 | | | ||
1388 | |.ffunc coroutine_yield | ||
1389 | | mov L:RB, SAVE_L | ||
1390 | | mov [RA-4], PC | ||
1391 | | test aword L:RB->cframe, CFRAME_CANYIELD | ||
1392 | | jz ->fff_fallback | ||
1393 | | mov L:RB->base, RA | ||
1394 | | lea RC, [RA+NARGS:RC*8-8] | ||
1395 | | mov L:RB->top, RC | ||
1396 | | xor eax, eax | ||
1397 | | mov aword L:RB->cframe, eax | ||
1398 | | mov al, LUA_YIELD | ||
1399 | | mov byte L:RB->status, al | ||
1400 | | jmp ->vm_leave_unw | ||
1401 | | | ||
1402 | |//-- Math library ------------------------------------------------------- | ||
1403 | | | ||
1404 | |.ffunc_n math_abs | ||
1405 | | fabs | ||
1406 | | // fallthrough | ||
1407 | |->fff_resn: | ||
1408 | | fstp qword [RA-8] | ||
1409 | |->fff_res1: | ||
1410 | | mov RD, 1+1 | ||
1411 | |->fff_res: | ||
1412 | | mov NRESULTS, RD | ||
1413 | |->fff_res_: | ||
1414 | | test PC, FRAME_TYPE | ||
1415 | | jnz >7 | ||
1416 | |5: | ||
1417 | | cmp PC_RB, RDL // More results expected? | ||
1418 | | ja >6 | ||
1419 | | // BASE and KBASE are assumed to be set for the calling frame. | ||
1420 | | ins_next | ||
1421 | | | ||
1422 | |6: // Fill up results with nil. | ||
1423 | | mov dword [RA+RD*8-12], LJ_TNIL | ||
1424 | | add RD, 1 | ||
1425 | | jmp <5 | ||
1426 | | | ||
1427 | |7: // Non-standard return case. | ||
1428 | | mov BASE, RA | ||
1429 | | mov RA, -8 // Results start at BASE+RA = BASE-8. | ||
1430 | | jmp ->vm_return | ||
1431 | | | ||
1432 | |.ffunc_n math_floor; call ->vm_floor; jmp ->fff_resn | ||
1433 | |.ffunc_n math_ceil; call ->vm_ceil; jmp ->fff_resn | ||
1434 | | | ||
1435 | |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn | ||
1436 | | | ||
1437 | |.ffunc_n math_log, fldln2; fyl2x; jmp ->fff_resn | ||
1438 | |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn | ||
1439 | |.ffunc_n math_exp; call ->vm_exp; jmp ->fff_resn | ||
1440 | | | ||
1441 | |.ffunc_n math_sin; fsin; jmp ->fff_resn | ||
1442 | |.ffunc_n math_cos; fcos; jmp ->fff_resn | ||
1443 | |.ffunc_n math_tan; fptan; fpop; jmp ->fff_resn | ||
1444 | | | ||
1445 | |.ffunc_n math_asin | ||
1446 | | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fpatan | ||
1447 | | jmp ->fff_resn | ||
1448 | |.ffunc_n math_acos | ||
1449 | | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fxch; fpatan | ||
1450 | | jmp ->fff_resn | ||
1451 | |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn | ||
1452 | | | ||
1453 | |.macro math_extern, func | ||
1454 | |.ffunc_n math_ .. func | ||
1455 | | mov ARG5, RA | ||
1456 | | fstp FPARG1 | ||
1457 | | mov RB, BASE | ||
1458 | | call extern func | ||
1459 | | mov RA, ARG5 | ||
1460 | | mov BASE, RB | ||
1461 | | jmp ->fff_resn | ||
1462 | |.endmacro | ||
1463 | | | ||
1464 | | math_extern sinh | ||
1465 | | math_extern cosh | ||
1466 | | math_extern tanh | ||
1467 | | | ||
1468 | |->ff_math_deg: | ||
1469 | |.ffunc_n math_rad; fmul qword CFUNC:RB->upvalue[0]; jmp ->fff_resn | ||
1470 | | | ||
1471 | |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn | ||
1472 | |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn | ||
1473 | | | ||
1474 | |.ffunc_1 math_frexp | ||
1475 | | mov RB, [RA+4] | ||
1476 | | cmp RB, LJ_TISNUM; ja ->fff_fallback | ||
1477 | | mov RC, [RA] | ||
1478 | | mov [RA-4], RB; mov [RA-8], RC | ||
1479 | | shl RB, 1; cmp RB, 0xffe00000; jae >3 | ||
1480 | | or RC, RB; jz >3 | ||
1481 | | mov RC, 1022 | ||
1482 | | cmp RB, 0x00200000; jb >4 | ||
1483 | |1: | ||
1484 | | shr RB, 21; sub RB, RC // Extract and unbias exponent. | ||
1485 | | mov ARG1, RB; fild ARG1 | ||
1486 | | mov RB, [RA-4] | ||
1487 | | and RB, 0x800fffff // Mask off exponent. | ||
1488 | | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. | ||
1489 | | mov [RA-4], RB | ||
1490 | |2: | ||
1491 | | fstp qword [RA] | ||
1492 | | mov RD, 1+2 | ||
1493 | | jmp ->fff_res | ||
1494 | |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. | ||
1495 | | fldz; jmp <2 | ||
1496 | |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. | ||
1497 | | fld qword [RA] | ||
1498 | | mov ARG1, 0x5a800000; fmul ARG1 // x = x*2^54 | ||
1499 | | fstp qword [RA-8] | ||
1500 | | mov RB, [RA-4]; mov RC, 1076; shl RB, 1; jmp <1 | ||
1501 | | | ||
1502 | |.ffunc_n math_modf | ||
1503 | | mov RB, [RA+4] | ||
1504 | | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? | ||
1505 | | fdup | ||
1506 | | call ->vm_trunc | ||
1507 | | fsub st1, st0 | ||
1508 | |1: | ||
1509 | | fstp qword [RA-8]; fstp qword [RA] | ||
1510 | | mov RC, [RA-4]; mov RB, [RA+4] | ||
1511 | | xor RC, RB; js >3 // Need to adjust sign? | ||
1512 | |2: | ||
1513 | | mov RD, 1+2 | ||
1514 | | jmp ->fff_res | ||
1515 | |3: | ||
1516 | | xor RB, 0x80000000; mov [RA+4], RB; jmp <2 // Flip sign of fraction. | ||
1517 | |4: | ||
1518 | | fldz; fxch; jmp <1 // Return +-Inf and +-0. | ||
1519 | | | ||
1520 | |.ffunc_nnr math_fmod | ||
1521 | |1: ; fprem; fnstsw ax; sahf; jp <1 | ||
1522 | | fpop1 | ||
1523 | | jmp ->fff_resn | ||
1524 | | | ||
1525 | |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn | ||
1526 | | | ||
1527 | |.macro math_minmax, name, cmovop, nocmovop | ||
1528 | |.ffunc_n name | ||
1529 | | mov RB, 2 | ||
1530 | |1: | ||
1531 | | cmp RB, RD; jae ->fff_resn | ||
1532 | | cmp dword [RA+RB*8-4], LJ_TISNUM; ja >5 | ||
1533 | | fld qword [RA+RB*8-8] | ||
1534 | ||if (cmov) { | ||
1535 | | fucomi st1; cmovop st1; fpop1 | ||
1536 | ||} else { | ||
1537 | | push eax | ||
1538 | | fucom st1; fnstsw ax; test ah, 1; nocmovop >2; fxch; 2: ; fpop | ||
1539 | | pop eax | ||
1540 | ||} | ||
1541 | | add RB, 1 | ||
1542 | | jmp <1 | ||
1543 | |.endmacro | ||
1544 | | | ||
1545 | | math_minmax math_min, fcmovnbe, jz | ||
1546 | | math_minmax math_max, fcmovbe, jnz | ||
1547 | |5: | ||
1548 | | fpop; jmp ->fff_fallback | ||
1549 | | | ||
1550 | |//-- String library ----------------------------------------------------- | ||
1551 | | | ||
1552 | |.ffunc_1 string_len | ||
1553 | | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback | ||
1554 | | mov STR:RB, [RA] | ||
1555 | | fild dword STR:RB->len | ||
1556 | | jmp ->fff_resn | ||
1557 | | | ||
1558 | |.ffunc string_byte // Only handle the 1-arg case here. | ||
1559 | | cmp NARGS:RC, 1+1; jne ->fff_fallback | ||
1560 | | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback | ||
1561 | | mov STR:RB, [RA] | ||
1562 | | cmp dword STR:RB->len, 1 | ||
1563 | | jb ->fff_res0 // Return no results for empty string. | ||
1564 | | movzx RB, byte STR:RB[1] | ||
1565 | | mov ARG1, RB | ||
1566 | | fild ARG1 | ||
1567 | | jmp ->fff_resn | ||
1568 | | | ||
1569 | |.ffunc string_char // Only handle the 1-arg case here. | ||
1570 | | ffgccheck | ||
1571 | | cmp NARGS:RC, 1+1; jne ->fff_fallback // *Exactly* 1 arg. | ||
1572 | | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback | ||
1573 | | fld qword [RA] | ||
1574 | | fistp ARG4 | ||
1575 | | cmp ARG4, 255; ja ->fff_fallback | ||
1576 | | lea RC, ARG4 // Little-endian. | ||
1577 | | mov ARG5, RA // Save RA. | ||
1578 | | mov ARG3, 1 | ||
1579 | | mov ARG2, RC | ||
1580 | |->fff_newstr: | ||
1581 | | mov L:RB, SAVE_L | ||
1582 | | mov ARG1, L:RB | ||
1583 | | mov SAVE_PC, PC | ||
1584 | | mov L:RB->base, BASE | ||
1585 | | call extern lj_str_new // (lua_State *L, char *str, size_t l) | ||
1586 | | // GCstr * returned in eax (RC). | ||
1587 | | mov RA, ARG5 | ||
1588 | | mov BASE, L:RB->base | ||
1589 | | mov dword [RA-4], LJ_TSTR | ||
1590 | | mov [RA-8], STR:RC | ||
1591 | | jmp ->fff_res1 | ||
1592 | | | ||
1593 | |.ffunc string_sub | ||
1594 | | ffgccheck | ||
1595 | | mov ARG5, RA // Save RA. | ||
1596 | | mov ARG4, -1 | ||
1597 | | cmp NARGS:RC, 1+2; jb ->fff_fallback | ||
1598 | | jna >1 | ||
1599 | | cmp dword [RA+20], LJ_TISNUM; ja ->fff_fallback | ||
1600 | | fld qword [RA+16] | ||
1601 | | fistp ARG4 | ||
1602 | |1: | ||
1603 | | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback | ||
1604 | | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback | ||
1605 | | mov STR:RB, [RA] | ||
1606 | | mov ARG2, STR:RB | ||
1607 | | mov RB, STR:RB->len | ||
1608 | | fld qword [RA+8] | ||
1609 | | fistp ARG3 | ||
1610 | | mov RC, ARG4 | ||
1611 | | cmp RB, RC // len < end? (unsigned compare) | ||
1612 | | jb >5 | ||
1613 | |2: | ||
1614 | | mov RA, ARG3 | ||
1615 | | test RA, RA // start <= 0? | ||
1616 | | jle >7 | ||
1617 | |3: | ||
1618 | | mov STR:RB, ARG2 | ||
1619 | | sub RC, RA // start > end? | ||
1620 | | jl ->fff_emptystr | ||
1621 | | lea RB, [STR:RB+RA+#STR-1] | ||
1622 | | add RC, 1 | ||
1623 | |4: | ||
1624 | | mov ARG2, RB | ||
1625 | | mov ARG3, RC | ||
1626 | | jmp ->fff_newstr | ||
1627 | | | ||
1628 | |5: // Negative end or overflow. | ||
1629 | | jl >6 | ||
1630 | | lea RC, [RC+RB+1] // end = end+(len+1) | ||
1631 | | jmp <2 | ||
1632 | |6: // Overflow. | ||
1633 | | mov RC, RB // end = len | ||
1634 | | jmp <2 | ||
1635 | | | ||
1636 | |7: // Negative start or underflow. | ||
1637 | | je >8 | ||
1638 | | add RA, RB // start = start+(len+1) | ||
1639 | | add RA, 1 | ||
1640 | | jg <3 // start > 0? | ||
1641 | |8: // Underflow. | ||
1642 | | mov RA, 1 // start = 1 | ||
1643 | | jmp <3 | ||
1644 | | | ||
1645 | |->fff_emptystr: // Range underflow. | ||
1646 | | xor RC, RC // Zero length. Any ptr in RB is ok. | ||
1647 | | jmp <4 | ||
1648 | | | ||
1649 | |.ffunc_2 string_rep // Only handle the 1-char case inline. | ||
1650 | | ffgccheck | ||
1651 | | mov ARG5, RA // Save RA. | ||
1652 | | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback | ||
1653 | | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback | ||
1654 | | mov STR:RB, [RA] | ||
1655 | | fld qword [RA+8] | ||
1656 | | fistp ARG4 | ||
1657 | | mov RC, ARG4 | ||
1658 | | test RC, RC | ||
1659 | | jle ->fff_emptystr // Count <= 0? (or non-int) | ||
1660 | | cmp dword STR:RB->len, 1 | ||
1661 | | jb ->fff_emptystr // Zero length string? | ||
1662 | | jne ->fff_fallback_2 // Fallback for > 1-char strings. | ||
1663 | | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_2 | ||
1664 | | movzx RA, byte STR:RB[1] | ||
1665 | | mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] | ||
1666 | | mov ARG3, RC | ||
1667 | | mov ARG2, RB | ||
1668 | |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?). | ||
1669 | | mov [RB], RAL | ||
1670 | | add RB, 1 | ||
1671 | | sub RC, 1 | ||
1672 | | jnz <1 | ||
1673 | | jmp ->fff_newstr | ||
1674 | | | ||
1675 | |.ffunc_1 string_reverse | ||
1676 | | ffgccheck | ||
1677 | | mov ARG5, RA // Save RA. | ||
1678 | | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback | ||
1679 | | mov STR:RB, [RA] | ||
1680 | | mov RC, STR:RB->len | ||
1681 | | test RC, RC | ||
1682 | | jz ->fff_emptystr // Zero length string? | ||
1683 | | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 | ||
1684 | | add RB, #STR | ||
1685 | | mov ARG4, PC // Need another temp register. | ||
1686 | | mov ARG3, RC | ||
1687 | | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] | ||
1688 | | mov ARG2, PC | ||
1689 | |1: | ||
1690 | | movzx RA, byte [RB] | ||
1691 | | add RB, 1 | ||
1692 | | sub RC, 1 | ||
1693 | | mov [PC+RC], RAL | ||
1694 | | jnz <1 | ||
1695 | | mov PC, ARG4 | ||
1696 | | jmp ->fff_newstr | ||
1697 | | | ||
1698 | |.macro ffstring_case, name, lo, hi | ||
1699 | | .ffunc_1 name | ||
1700 | | ffgccheck | ||
1701 | | mov ARG5, RA // Save RA. | ||
1702 | | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback | ||
1703 | | mov STR:RB, [RA] | ||
1704 | | mov RC, STR:RB->len | ||
1705 | | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 | ||
1706 | | add RB, #STR | ||
1707 | | mov ARG4, PC // Need another temp register. | ||
1708 | | mov ARG3, RC | ||
1709 | | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] | ||
1710 | | mov ARG2, PC | ||
1711 | | jmp >3 | ||
1712 | |1: // ASCII case conversion. Yes, this is suboptimal code (do you care?). | ||
1713 | | movzx RA, byte [RB+RC] | ||
1714 | | cmp RA, lo | ||
1715 | | jb >2 | ||
1716 | | cmp RA, hi | ||
1717 | | ja >2 | ||
1718 | | xor RA, 0x20 | ||
1719 | |2: | ||
1720 | | mov [PC+RC], RAL | ||
1721 | |3: | ||
1722 | | sub RC, 1 | ||
1723 | | jns <1 | ||
1724 | | mov PC, ARG4 | ||
1725 | | jmp ->fff_newstr | ||
1726 | |.endmacro | ||
1727 | | | ||
1728 | |ffstring_case string_lower, 0x41, 0x5a | ||
1729 | |ffstring_case string_upper, 0x61, 0x7a | ||
1730 | | | ||
1731 | |//-- Table library ------------------------------------------------------ | ||
1732 | | | ||
1733 | |.ffunc_1 table_getn | ||
1734 | | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback | ||
1735 | | mov TAB:RB, [RA] | ||
1736 | | mov ARG1, TAB:RB | ||
1737 | | mov RB, RA // Save RA and BASE. | ||
1738 | | mov ARG2, BASE | ||
1739 | | call extern lj_tab_len // (GCtab *t) | ||
1740 | | // Length of table returned in eax (RC). | ||
1741 | | mov ARG1, RC | ||
1742 | | mov RA, RB // Restore RA and BASE. | ||
1743 | | mov BASE, ARG2 | ||
1744 | | fild ARG1 | ||
1745 | | jmp ->fff_resn | ||
1746 | | | ||
1747 | |//-- Bit library -------------------------------------------------------- | ||
1748 | | | ||
1749 | |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!). | ||
1750 | | | ||
1751 | |.ffunc_n bit_tobit | ||
1752 | | mov ARG5, TOBIT_BIAS | ||
1753 | | fadd ARG5 | ||
1754 | | fstp FPARG1 // 64 bit FP store. | ||
1755 | | fild ARG1 // 32 bit integer load (s2lfwd ok). | ||
1756 | | jmp ->fff_resn | ||
1757 | | | ||
1758 | |.macro .ffunc_bit, name | ||
1759 | | .ffunc_n name | ||
1760 | | mov ARG5, TOBIT_BIAS | ||
1761 | | fadd ARG5 | ||
1762 | | fstp FPARG1 | ||
1763 | | mov RB, ARG1 | ||
1764 | |.endmacro | ||
1765 | | | ||
1766 | |.macro .ffunc_bit_op, name, ins | ||
1767 | | .ffunc_bit name | ||
1768 | | mov NRESULTS, NARGS:RC // Save for fallback. | ||
1769 | | lea RC, [RA+NARGS:RC*8-16] | ||
1770 | |1: | ||
1771 | | cmp RC, RA | ||
1772 | | jbe ->fff_resbit | ||
1773 | | cmp dword [RC+4], LJ_TISNUM; ja ->fff_fallback_bit_op | ||
1774 | | fld qword [RC] | ||
1775 | | fadd ARG5 | ||
1776 | | fstp FPARG1 | ||
1777 | | ins RB, ARG1 | ||
1778 | | sub RC, 8 | ||
1779 | | jmp <1 | ||
1780 | |.endmacro | ||
1781 | | | ||
1782 | |.ffunc_bit_op bit_band, and | ||
1783 | |.ffunc_bit_op bit_bor, or | ||
1784 | |.ffunc_bit_op bit_bxor, xor | ||
1785 | | | ||
1786 | |.ffunc_bit bit_bswap | ||
1787 | | bswap RB | ||
1788 | | jmp ->fff_resbit | ||
1789 | | | ||
1790 | |.ffunc_bit bit_bnot | ||
1791 | | not RB | ||
1792 | |->fff_resbit: | ||
1793 | | mov ARG1, RB | ||
1794 | | fild ARG1 | ||
1795 | | jmp ->fff_resn | ||
1796 | | | ||
1797 | |->fff_fallback_bit_op: | ||
1798 | | mov NARGS:RC, NRESULTS // Restore for fallback | ||
1799 | | jmp ->fff_fallback | ||
1800 | | | ||
1801 | |.macro .ffunc_bit_sh, name, ins | ||
1802 | | .ffunc_nn name | ||
1803 | | mov ARG5, TOBIT_BIAS | ||
1804 | | fadd ARG5 | ||
1805 | | fstp FPARG3 | ||
1806 | | fadd ARG5 | ||
1807 | | fstp FPARG1 | ||
1808 | | mov RC, RA // Assumes RA is ecx. | ||
1809 | | mov RA, ARG3 | ||
1810 | | mov RB, ARG1 | ||
1811 | | ins RB, cl | ||
1812 | | mov RA, RC | ||
1813 | | jmp ->fff_resbit | ||
1814 | |.endmacro | ||
1815 | | | ||
1816 | |.ffunc_bit_sh bit_lshift, shl | ||
1817 | |.ffunc_bit_sh bit_rshift, shr | ||
1818 | |.ffunc_bit_sh bit_arshift, sar | ||
1819 | |.ffunc_bit_sh bit_rol, rol | ||
1820 | |.ffunc_bit_sh bit_ror, ror | ||
1821 | | | ||
1822 | |//----------------------------------------------------------------------- | ||
1823 | | | ||
1824 | |->fff_fallback_2: | ||
1825 | | mov NARGS:RC, 1+2 // Other args are ignored, anyway. | ||
1826 | | jmp ->fff_fallback | ||
1827 | |->fff_fallback_1: | ||
1828 | | mov NARGS:RC, 1+1 // Other args are ignored, anyway. | ||
1829 | |->fff_fallback: // Call fast function fallback handler. | ||
1830 | | // RA = new base, RC = nargs+1 | ||
1831 | | mov L:RB, SAVE_L | ||
1832 | | sub BASE, RA | ||
1833 | | mov [RA-4], PC | ||
1834 | | mov SAVE_PC, PC // Redundant (but a defined value). | ||
1835 | | mov ARG3, BASE // Save old BASE (relative). | ||
1836 | | mov L:RB->base, RA | ||
1837 | | lea RC, [RA+NARGS:RC*8-8] | ||
1838 | | mov ARG1, L:RB | ||
1839 | | lea BASE, [RC+8*LUA_MINSTACK] // Ensure enough space for handler. | ||
1840 | | mov L:RB->top, RC | ||
1841 | | mov CFUNC:RA, [RA-8] | ||
1842 | | cmp BASE, L:RB->maxstack | ||
1843 | | ja >5 // Need to grow stack. | ||
1844 | | call aword CFUNC:RA->f // (lua_State *L) | ||
1845 | | // Either throws an error or recovers and returns 0 or NRESULTS (+1). | ||
1846 | | test RC, RC; jnz >3 | ||
1847 | |1: // Returned 0: retry fast path. | ||
1848 | | mov RA, L:RB->base | ||
1849 | | mov RC, L:RB->top | ||
1850 | | sub RC, RA | ||
1851 | | shr RC, 3 | ||
1852 | | add NARGS:RC, 1 | ||
1853 | | mov LFUNC:RB, [RA-8] | ||
1854 | | mov BASE, ARG3 // Restore old BASE. | ||
1855 | | add BASE, RA | ||
1856 | | cmp [RA-4], PC; jne >2 // Callable modified by handler? | ||
1857 | | jmp aword LFUNC:RB->gate // Retry the call. | ||
1858 | | | ||
1859 | |2: // Run modified callable. | ||
1860 | | cmp dword [RA-4], LJ_TFUNC | ||
1861 | | jne ->vmeta_call | ||
1862 | | jmp aword LFUNC:RB->gate // Retry the call. | ||
1863 | | | ||
1864 | |3: // Returned NRESULTS (already in RC/RD). | ||
1865 | | mov RA, L:RB->base | ||
1866 | | mov BASE, ARG3 // Restore old BASE. | ||
1867 | | add BASE, RA | ||
1868 | | jmp ->fff_res | ||
1869 | | | ||
1870 | |5: // Grow stack for fallback handler. | ||
1871 | | mov ARG2, LUA_MINSTACK | ||
1872 | | call extern lj_state_growstack // (lua_State *L, int n) | ||
1873 | | jmp <1 // Dumb retry (goes through ff first). | ||
1874 | | | ||
1875 | |->fff_gcstep: // Call GC step function. | ||
1876 | | // RA = new base, RC = nargs+1 | ||
1877 | | pop RB // Must keep stack at same level. | ||
1878 | | mov ARG3, RB // Save return address | ||
1879 | | mov L:RB, SAVE_L | ||
1880 | | sub BASE, RA | ||
1881 | | mov ARG2, BASE // Save old BASE (relative). | ||
1882 | | mov [RA-4], PC | ||
1883 | | mov SAVE_PC, PC // Redundant (but a defined value). | ||
1884 | | mov L:RB->base, RA | ||
1885 | | lea RC, [RA+NARGS:RC*8-8] | ||
1886 | | mov ARG1, L:RB | ||
1887 | | mov L:RB->top, RC | ||
1888 | | call extern lj_gc_step // (lua_State *L) | ||
1889 | | mov RA, L:RB->base | ||
1890 | | mov RC, L:RB->top | ||
1891 | | sub RC, RA | ||
1892 | | shr RC, 3 | ||
1893 | | add NARGS:RC, 1 | ||
1894 | | mov PC, [RA-4] | ||
1895 | | mov BASE, ARG2 // Restore old BASE. | ||
1896 | | add BASE, RA | ||
1897 | | mov RB, ARG3 | ||
1898 | | push RB // Restore return address. | ||
1899 | | mov LFUNC:RB, [RA-8] | ||
1900 | | ret | ||
1901 | | | ||
1902 | |//----------------------------------------------------------------------- | ||
1903 | |//-- Special dispatch targets ------------------------------------------- | ||
1904 | |//----------------------------------------------------------------------- | ||
1905 | | | ||
1906 | |->vm_record: // Dispatch target for recording phase. | ||
1907 | #if LJ_HASJIT | ||
1908 | | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] | ||
1909 | | test RDL, HOOK_VMEVENT // No recording while in vmevent. | ||
1910 | | jnz >5 | ||
1911 | | // Decrement the hookcount for consistency, but always do the call. | ||
1912 | | test RDL, HOOK_ACTIVE | ||
1913 | | jnz >1 | ||
1914 | | test RDL, LUA_MASKLINE|LUA_MASKCOUNT | ||
1915 | | jz >1 | ||
1916 | | dec dword [DISPATCH+DISPATCH_GL(hookcount)] | ||
1917 | | jmp >1 | ||
1918 | #endif | ||
1919 | | | ||
1920 | |->vm_hook: // Dispatch target with enabled hooks. | ||
1921 | | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] | ||
1922 | | test RDL, HOOK_ACTIVE // Hook already active? | ||
1923 | | jnz >5 | ||
1924 | | | ||
1925 | | test RDL, LUA_MASKLINE|LUA_MASKCOUNT | ||
1926 | | jz >5 | ||
1927 | | dec dword [DISPATCH+DISPATCH_GL(hookcount)] | ||
1928 | | jz >1 | ||
1929 | | test RDL, LUA_MASKLINE | ||
1930 | | jz >5 | ||
1931 | |1: | ||
1932 | | mov L:RB, SAVE_L | ||
1933 | | mov RD, NRESULTS // Dynamic top for *M instructions. | ||
1934 | | mov ARG3, RD | ||
1935 | | mov L:RB->base, BASE | ||
1936 | | mov ARG2, PC | ||
1937 | | mov ARG1, L:RB | ||
1938 | | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. | ||
1939 | | call extern lj_dispatch_ins // (lua_State *L, BCIns *pc, int nres) | ||
1940 | |4: | ||
1941 | | mov BASE, L:RB->base | ||
1942 | | movzx RA, PC_RA | ||
1943 | |5: | ||
1944 | | movzx OP, PC_OP | ||
1945 | | movzx RD, PC_RD | ||
1946 | | jmp aword [DISPATCH+OP*4+GG_DISP_STATIC*4] // Re-dispatch to static ins. | ||
1947 | | | ||
1948 | |->vm_hotloop: // Hot loop counter underflow. | ||
1949 | #if LJ_HASJIT | ||
1950 | | mov L:RB, SAVE_L | ||
1951 | | lea RA, [DISPATCH+GG_DISP2J] | ||
1952 | | mov ARG2, PC | ||
1953 | | mov ARG1, RA | ||
1954 | | mov [DISPATCH+DISPATCH_J(L)], L:RB | ||
1955 | | mov SAVE_PC, PC | ||
1956 | | mov L:RB->base, BASE | ||
1957 | | call extern lj_trace_hot // (jit_State *J, const BCIns *pc) | ||
1958 | | jmp <4 | ||
1959 | #endif | ||
1960 | | | ||
1961 | |->vm_hotcall: // Hot call counter underflow. | ||
1962 | #if LJ_HASJIT | ||
1963 | | mov L:RB, SAVE_L | ||
1964 | | lea RA, [DISPATCH+GG_DISP2J] | ||
1965 | | mov ARG2, PC | ||
1966 | | mov ARG1, RA | ||
1967 | | mov [DISPATCH+DISPATCH_J(L)], L:RB | ||
1968 | | mov SAVE_PC, PC | ||
1969 | | mov L:RB->base, BASE | ||
1970 | | call extern lj_trace_hot // (jit_State *J, const BCIns *pc) | ||
1971 | | mov BASE, L:RB->base | ||
1972 | | // Dispatch the first instruction and optionally record it. | ||
1973 | | ins_next | ||
1974 | #endif | ||
1975 | | | ||
1976 | |//----------------------------------------------------------------------- | ||
1977 | |//-- Trace exit handler ------------------------------------------------- | ||
1978 | |//----------------------------------------------------------------------- | ||
1979 | | | ||
1980 | |// Called from an exit stub with the exit number on the stack. | ||
1981 | |// The 16 bit exit number is stored with two (sign-extended) push imm8. | ||
1982 | |->vm_exit_handler: | ||
1983 | #if LJ_HASJIT | ||
1984 | | push ebp; lea ebp, [esp+12]; push ebp | ||
1985 | | push ebx; push edx; push ecx; push eax | ||
1986 | | movzx RC, byte [ebp-4] // Reconstruct exit number. | ||
1987 | | mov RCH, byte [ebp-8] | ||
1988 | | mov [ebp-4], edi; mov [ebp-8], esi | ||
1989 | | // Caveat: DISPATCH is ebx. | ||
1990 | | mov DISPATCH, [ebp] | ||
1991 | | mov RA, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number. | ||
1992 | | set_vmstate EXIT | ||
1993 | | mov [DISPATCH+DISPATCH_J(exitno)], RC | ||
1994 | | mov [DISPATCH+DISPATCH_J(parent)], RA | ||
1995 | | sub esp, 8*8+16 // Room for SSE regs + args. | ||
1996 | | | ||
1997 | | // Must not access SSE regs if SSE2 is not present. | ||
1998 | | test dword [DISPATCH+DISPATCH_J(flags)], JIT_F_SSE2 | ||
1999 | | jz >1 | ||
2000 | | movsd qword [ebp-40], xmm7; movsd qword [ebp-48], xmm6 | ||
2001 | | movsd qword [ebp-56], xmm5; movsd qword [ebp-64], xmm4 | ||
2002 | | movsd qword [ebp-72], xmm3; movsd qword [ebp-80], xmm2 | ||
2003 | | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0 | ||
2004 | |1: | ||
2005 | | // Caveat: RB is ebp. | ||
2006 | | mov L:RB, [DISPATCH+DISPATCH_GL(jit_L)] | ||
2007 | | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] | ||
2008 | | mov [DISPATCH+DISPATCH_J(L)], L:RB | ||
2009 | | lea RC, [esp+16] | ||
2010 | | mov L:RB->base, BASE | ||
2011 | | lea RA, [DISPATCH+GG_DISP2J] | ||
2012 | | mov ARG2, RC | ||
2013 | | mov ARG1, RA | ||
2014 | | call extern lj_trace_exit // (jit_State *J, ExitState *ex) | ||
2015 | | // Interpreter C frame returned in eax. | ||
2016 | | mov esp, eax // Reposition stack to C frame. | ||
2017 | | mov BASE, L:RB->base | ||
2018 | | mov PC, SAVE_PC | ||
2019 | | mov SAVE_L, L:RB // Needed for on-trace resume/yield. | ||
2020 | #endif | ||
2021 | |->vm_exit_interp: | ||
2022 | #if LJ_HASJIT | ||
2023 | | mov LFUNC:KBASE, [BASE-8] | ||
2024 | | mov PROTO:KBASE, LFUNC:KBASE->pt | ||
2025 | | mov KBASE, PROTO:KBASE->k | ||
2026 | | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0 | ||
2027 | | set_vmstate INTERP | ||
2028 | | ins_next | ||
2029 | #endif | ||
2030 | | | ||
2031 | |//----------------------------------------------------------------------- | ||
2032 | |//-- Math helper functions ---------------------------------------------- | ||
2033 | |//----------------------------------------------------------------------- | ||
2034 | | | ||
2035 | |// FP value rounding. Called by math.floor/math.ceil fast functions | ||
2036 | |// and from JIT code. Arg/ret on x87 stack. No int/xmm registers modified. | ||
2037 | |.macro vm_round, mode1, mode2 | ||
2038 | | fnstcw word [esp+4] // Caveat: overwrites ARG1 and ARG2. | ||
2039 | | mov [esp+8], eax | ||
2040 | | mov ax, mode1 | ||
2041 | | or ax, [esp+4] | ||
2042 | |.if mode2 ~= 0xffff | ||
2043 | | and ax, mode2 | ||
2044 | |.endif | ||
2045 | | mov [esp+6], ax | ||
2046 | | fldcw word [esp+6] | ||
2047 | | frndint | ||
2048 | | fldcw word [esp+4] | ||
2049 | | mov eax, [esp+8] | ||
2050 | | ret | ||
2051 | |.endmacro | ||
2052 | | | ||
2053 | |->vm_floor: | ||
2054 | | vm_round 0x0400, 0xf7ff | ||
2055 | | | ||
2056 | |->vm_ceil: | ||
2057 | | vm_round 0x0800, 0xfbff | ||
2058 | | | ||
2059 | |->vm_trunc: | ||
2060 | | vm_round 0x0c00, 0xffff | ||
2061 | | | ||
2062 | |// FP modulo x%y. Called by BC_MOD* and vm_arith. | ||
2063 | |// Args/ret on x87 stack (y on top). No xmm registers modified. | ||
2064 | |// Caveat: needs 3 slots on x87 stack! RC (eax) modified! | ||
2065 | |->vm_mod: | ||
2066 | | fld st1 | ||
2067 | | fdiv st1 | ||
2068 | | fnstcw word [esp+4] | ||
2069 | | mov ax, 0x0400 | ||
2070 | | or ax, [esp+4] | ||
2071 | | and ax, 0xf7ff | ||
2072 | | mov [esp+6], ax | ||
2073 | | fldcw word [esp+6] | ||
2074 | | frndint | ||
2075 | | fldcw word [esp+4] | ||
2076 | | fmulp st1 | ||
2077 | | fsubp st1 | ||
2078 | | ret | ||
2079 | | | ||
2080 | |// FP exponentiation e^x and 2^x. Called by math.exp fast function and | ||
2081 | |// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified. | ||
2082 | |// Caveat: needs 3 slots on x87 stack! | ||
2083 | |->vm_exp: | ||
2084 | | fldl2e; fmulp st1 // e^x ==> 2^(x*log2(e)) | ||
2085 | |->vm_exp2: | ||
2086 | | fst dword [esp+4] // Caveat: overwrites ARG1. | ||
2087 | | cmp dword [esp+4], 0x7f800000; je >1 // Special case: e^+Inf = +Inf | ||
2088 | | cmp dword [esp+4], 0xff800000; je >2 // Special case: e^-Inf = 0 | ||
2089 | |->vm_exp2raw: // Entry point for vm_pow. Without +-Inf check. | ||
2090 | | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part. | ||
2091 | | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int | ||
2092 | |1: | ||
2093 | | ret | ||
2094 | |2: | ||
2095 | | fpop; fldz; ret | ||
2096 | | | ||
2097 | |// Generic power function x^y. Called by BC_POW, math.pow fast function | ||
2098 | |// and vm_arith. Args/ret on x87 stack (y on top). No int/xmm regs modified. | ||
2099 | |// Caveat: needs 3 slots on x87 stack! | ||
2100 | |->vm_pow: | ||
2101 | | fist dword [esp+4] // Store/reload int before comparison. | ||
2102 | | fild dword [esp+4] // Integral exponent used in vm_powi. | ||
2103 | ||if (cmov) { | ||
2104 | | fucomip st1 | ||
2105 | ||} else { | ||
2106 | | push eax; fucomp st1; fnstsw ax; sahf; pop eax | ||
2107 | ||} | ||
2108 | | jnz >8 // Branch for FP exponents. | ||
2109 | | jp >9 // Branch for NaN exponent. | ||
2110 | | fpop // Pop y and fallthrough to vm_powi. | ||
2111 | | | ||
2112 | |// FP/int power function x^i. Called from JIT code. Arg1/ret on x87 stack. | ||
2113 | |// Arg2 (int) on C stack. No int/xmm regs modified. | ||
2114 | |// Caveat: needs 2 slots on x87 stack! | ||
2115 | |->vm_powi: | ||
2116 | | push eax | ||
2117 | | mov eax, [esp+8] | ||
2118 | | cmp eax, 1; jle >6 // i<=1? | ||
2119 | | // Now 1 < (unsigned)i <= 0x80000000. | ||
2120 | |1: // Handle leading zeros. | ||
2121 | | test eax, 1; jnz >2 | ||
2122 | | fmul st0 | ||
2123 | | shr eax, 1 | ||
2124 | | jmp <1 | ||
2125 | |2: | ||
2126 | | shr eax, 1; jz >5 | ||
2127 | | fdup | ||
2128 | |3: // Handle trailing bits. | ||
2129 | | fmul st0 | ||
2130 | | shr eax, 1; jz >4 | ||
2131 | | jnc <3 | ||
2132 | | fmul st1, st0 | ||
2133 | | jmp <3 | ||
2134 | |4: | ||
2135 | | fmulp st1 | ||
2136 | |5: | ||
2137 | | pop eax | ||
2138 | | ret | ||
2139 | |6: | ||
2140 | | je <5 // x^1 ==> x | ||
2141 | | jb >7 | ||
2142 | | fld1; fdivrp st1 | ||
2143 | | neg eax | ||
2144 | | cmp eax, 1; je <5 // x^-1 ==> 1/x | ||
2145 | | jmp <1 // x^-i ==> (1/x)^i | ||
2146 | |7: | ||
2147 | | fpop; fld1 // x^0 ==> 1 | ||
2148 | | pop eax | ||
2149 | | ret | ||
2150 | | | ||
2151 | |8: // FP/FP power function x^y. | ||
2152 | | push eax | ||
2153 | | fst dword [esp+8] | ||
2154 | | fxch | ||
2155 | | fst dword [esp+12] | ||
2156 | | mov eax, [esp+8]; shl eax, 1 | ||
2157 | | cmp eax, 0xff000000; je >2 // x^+-Inf? | ||
2158 | | mov eax, [esp+12]; shl eax, 1; je >4 // +-0^y? | ||
2159 | | cmp eax, 0xff000000; je >4 // +-Inf^y? | ||
2160 | | pop eax | ||
2161 | | fyl2x | ||
2162 | | jmp ->vm_exp2raw | ||
2163 | | | ||
2164 | |9: // Handle x^NaN. | ||
2165 | | fld1 | ||
2166 | ||if (cmov) { | ||
2167 | | fucomip st2 | ||
2168 | ||} else { | ||
2169 | | push eax; fucomp st2; fnstsw ax; sahf; pop eax | ||
2170 | ||} | ||
2171 | | je >1 // 1^NaN ==> 1 | ||
2172 | | fxch // x^NaN ==> NaN | ||
2173 | |1: | ||
2174 | | fpop | ||
2175 | | ret | ||
2176 | | | ||
2177 | |2: // Handle x^+-Inf. | ||
2178 | | fabs | ||
2179 | | fld1 | ||
2180 | ||if (cmov) { | ||
2181 | | fucomip st1 | ||
2182 | ||} else { | ||
2183 | | fucomp st1; fnstsw ax; sahf | ||
2184 | ||} | ||
2185 | | je >3 // +-1^+-Inf ==> 1 | ||
2186 | | fpop; fabs; fldz; mov eax, 0; setc al | ||
2187 | | ror eax, 1; xor eax, [esp+8]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0 | ||
2188 | | fxch | ||
2189 | |3: | ||
2190 | | fpop1; fabs; pop eax | ||
2191 | | ret | ||
2192 | | | ||
2193 | |4: // Handle +-0^y or +-Inf^y. | ||
2194 | | cmp dword [esp+8], 0; jge <3 // y >= 0, x^y ==> |x| | ||
2195 | | fpop; fpop | ||
2196 | | test eax, eax; pop eax; jz >5 // y < 0, +-0^y ==> +Inf | ||
2197 | | fldz // y < 0, +-Inf^y ==> 0 | ||
2198 | | ret | ||
2199 | |5: | ||
2200 | | mov dword [esp+8], 0x7f800000 // Return +Inf. | ||
2201 | | fld dword [esp+8] | ||
2202 | | ret | ||
2203 | | | ||
2204 | |// Callable from C: double lj_vm_foldfpm(double x, int fpm) | ||
2205 | |// Computes fpm(x) for extended math functions. ORDER FPM. | ||
2206 | |->vm_foldfpm: | ||
2207 | | mov eax, [esp+12] | ||
2208 | | fld qword [esp+4] | ||
2209 | | cmp eax, 1; jb ->vm_floor; je ->vm_ceil | ||
2210 | | cmp eax, 3; jb ->vm_trunc; ja >1 | ||
2211 | | fsqrt; ret | ||
2212 | |1: ; cmp eax, 5; jb ->vm_exp; je ->vm_exp2 | ||
2213 | | cmp eax, 7; je >1; ja >2 | ||
2214 | | fldln2; fxch; fyl2x; ret | ||
2215 | |1: ; fld1; fxch; fyl2x; ret | ||
2216 | |2: ; cmp eax, 9; je >1; ja >2 | ||
2217 | | fldlg2; fxch; fyl2x; ret | ||
2218 | |1: ; fsin; ret | ||
2219 | |2: ; cmp eax, 11; je >1; ja >9 | ||
2220 | | fcos; ret | ||
2221 | |1: ; fptan; fpop; ret | ||
2222 | |9: ; int3 // Bad fpm. | ||
2223 | | | ||
2224 | |// Callable from C: double lj_vm_foldarith(double x, double y, int op) | ||
2225 | |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -) | ||
2226 | |// and basic math functions. ORDER ARITH | ||
2227 | |->vm_foldarith: | ||
2228 | | mov eax, [esp+20] | ||
2229 | | fld qword [esp+4] | ||
2230 | | fld qword [esp+12] | ||
2231 | | cmp eax, 1; je >1; ja >2 | ||
2232 | | faddp st1; ret | ||
2233 | |1: ; fsubp st1; ret | ||
2234 | |2: ; cmp eax, 3; je >1; ja >2 | ||
2235 | | fmulp st1; ret | ||
2236 | |1: ; fdivp st1; ret | ||
2237 | |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow | ||
2238 | | cmp eax, 7; je >1; ja >2 | ||
2239 | | fpop; fchs; ret | ||
2240 | |1: ; fpop; fabs; ret | ||
2241 | |2: ; cmp eax, 9; je >1; ja >2 | ||
2242 | | fpatan; ret | ||
2243 | |1: ; fxch; fscale; fpop1; ret | ||
2244 | |2: ; cmp eax, 11; je >1; ja >9 | ||
2245 | ||if (cmov) { | ||
2246 | | fucomi st1; fcmovnbe st1; fpop1; ret | ||
2247 | |1: ; fucomi st1; fcmovbe st1; fpop1; ret | ||
2248 | ||} else { | ||
2249 | | fucom st1; fnstsw ax; test ah, 1; jz >2; fxch; 2: ; fpop; ret | ||
2250 | |1: ; fucom st1; fnstsw ax; test ah, 1; jnz >2; fxch; 2: ; fpop; ret | ||
2251 | ||} | ||
2252 | |9: ; int3 // Bad op. | ||
2253 | | | ||
2254 | |//----------------------------------------------------------------------- | ||
2255 | |//-- Miscellaneous functions -------------------------------------------- | ||
2256 | |//----------------------------------------------------------------------- | ||
2257 | | | ||
2258 | |// int lj_vm_cpuid(uint32_t f, uint32_t res[4]) | ||
2259 | |->vm_cpuid: | ||
2260 | | pushfd | ||
2261 | | pop edx | ||
2262 | | mov ecx, edx | ||
2263 | | xor edx, 0x00200000 // Toggle ID bit in flags. | ||
2264 | | push edx | ||
2265 | | popfd | ||
2266 | | pushfd | ||
2267 | | pop edx | ||
2268 | | xor eax, eax // Zero means no features supported. | ||
2269 | | cmp ecx, edx | ||
2270 | | jz >1 // No ID toggle means no CPUID support. | ||
2271 | | mov eax, [esp+4] // Argument 1 is function number. | ||
2272 | | push edi | ||
2273 | | push ebx | ||
2274 | | cpuid | ||
2275 | | mov edi, [esp+16] // Argument 2 is result area. | ||
2276 | | mov [edi], eax | ||
2277 | | mov [edi+4], ebx | ||
2278 | | mov [edi+8], ecx | ||
2279 | | mov [edi+12], edx | ||
2280 | | pop ebx | ||
2281 | | pop edi | ||
2282 | |1: | ||
2283 | | ret | ||
2284 | | | ||
2285 | |//----------------------------------------------------------------------- | ||
2286 | } | ||
2287 | |||
2288 | /* Generate the code for a single instruction. */ | ||
2289 | static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov) | ||
2290 | { | ||
2291 | int vk = 0; | ||
2292 | |// Note: aligning all instructions does not pay off. | ||
2293 | |=>defop: | ||
2294 | |||
2295 | switch (op) { | ||
2296 | |||
2297 | /* -- Comparison ops ---------------------------------------------------- */ | ||
2298 | |||
2299 | /* Remember: all ops branch for a true comparison, fall through otherwise. */ | ||
2300 | |||
2301 | case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: | ||
2302 | | // RA = src1, RD = src2, JMP with RD = target | ||
2303 | | ins_AD | ||
2304 | | checknum RA, ->vmeta_comp | ||
2305 | | checknum RD, ->vmeta_comp | ||
2306 | | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A. | ||
2307 | | fld qword [BASE+RD*8] | ||
2308 | | add PC, 4 | ||
2309 | | fcomparepp // eax (RD) modified! | ||
2310 | | // Unordered: all of ZF CF PF set, ordered: PF clear. | ||
2311 | | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. | ||
2312 | switch (op) { | ||
2313 | case BC_ISLT: | ||
2314 | | jbe >2 | ||
2315 | break; | ||
2316 | case BC_ISGE: | ||
2317 | | ja >2 | ||
2318 | break; | ||
2319 | case BC_ISLE: | ||
2320 | | jb >2 | ||
2321 | break; | ||
2322 | case BC_ISGT: | ||
2323 | | jae >2 | ||
2324 | break; | ||
2325 | default: break; /* Shut up GCC. */ | ||
2326 | } | ||
2327 | |1: | ||
2328 | | movzx RD, PC_RD | ||
2329 | | branchPC RD | ||
2330 | |2: | ||
2331 | | ins_next | ||
2332 | break; | ||
2333 | |||
2334 | case BC_ISEQV: case BC_ISNEV: | ||
2335 | vk = op == BC_ISEQV; | ||
2336 | | ins_AD // RA = src1, RD = src2, JMP with RD = target | ||
2337 | | mov RB, [BASE+RD*8+4] | ||
2338 | | add PC, 4 | ||
2339 | | cmp RB, LJ_TISNUM; ja >5 | ||
2340 | | checknum RA, >5 | ||
2341 | | fld qword [BASE+RA*8] | ||
2342 | | fld qword [BASE+RD*8] | ||
2343 | | fcomparepp // eax (RD) modified! | ||
2344 | iseqne_fp: | ||
2345 | if (vk) { | ||
2346 | | jp >2 // Unordered means not equal. | ||
2347 | | jne >2 | ||
2348 | } else { | ||
2349 | | jp >2 // Unordered means not equal. | ||
2350 | | je >1 | ||
2351 | } | ||
2352 | iseqne_end: | ||
2353 | if (vk) { | ||
2354 | |1: // EQ: Branch to the target. | ||
2355 | | movzx RD, PC_RD | ||
2356 | | branchPC RD | ||
2357 | |2: // NE: Fallthrough to next instruction. | ||
2358 | } else { | ||
2359 | |2: // NE: Branch to the target. | ||
2360 | | movzx RD, PC_RD | ||
2361 | | branchPC RD | ||
2362 | |1: // EQ: Fallthrough to next instruction. | ||
2363 | } | ||
2364 | | ins_next | ||
2365 | | | ||
2366 | if (op == BC_ISEQV || op == BC_ISNEV) { | ||
2367 | |5: // Either or both types are not numbers. | ||
2368 | | checktp RA, RB // Compare types. | ||
2369 | | jne <2 // Not the same type? | ||
2370 | | cmp RB, LJ_TISPRI | ||
2371 | | jae <1 // Same type and primitive type? | ||
2372 | | | ||
2373 | | // Same types and not a primitive type. Compare GCobj or pvalue. | ||
2374 | | mov RA, [BASE+RA*8] | ||
2375 | | mov RD, [BASE+RD*8] | ||
2376 | | cmp RA, RD | ||
2377 | | je <1 // Same GCobjs or pvalues? | ||
2378 | | cmp RB, LJ_TISTABUD | ||
2379 | | ja <2 // Different objects and not table/ud? | ||
2380 | | | ||
2381 | | // Different tables or userdatas. Need to check __eq metamethod. | ||
2382 | | // Field metatable must be at same offset for GCtab and GCudata! | ||
2383 | | mov TAB:RB, TAB:RA->metatable | ||
2384 | | test TAB:RB, TAB:RB | ||
2385 | | jz <2 // No metatable? | ||
2386 | | test byte TAB:RB->nomm, 1<<MM_eq | ||
2387 | | jnz <2 // Or 'no __eq' flag set? | ||
2388 | if (vk) { | ||
2389 | | xor RB, RB // ne = 0 | ||
2390 | } else { | ||
2391 | | mov RB, 1 // ne = 1 | ||
2392 | } | ||
2393 | | jmp ->vmeta_equal // Handle __eq metamethod. | ||
2394 | } | ||
2395 | break; | ||
2396 | case BC_ISEQS: case BC_ISNES: | ||
2397 | vk = op == BC_ISEQS; | ||
2398 | | ins_AND // RA = src, RD = str const, JMP with RD = target | ||
2399 | | add PC, 4 | ||
2400 | | checkstr RA, >2 | ||
2401 | | mov RA, [BASE+RA*8] | ||
2402 | | cmp RA, [KBASE+RD*4] | ||
2403 | iseqne_test: | ||
2404 | if (vk) { | ||
2405 | | jne >2 | ||
2406 | } else { | ||
2407 | | je >1 | ||
2408 | } | ||
2409 | goto iseqne_end; | ||
2410 | case BC_ISEQN: case BC_ISNEN: | ||
2411 | vk = op == BC_ISEQN; | ||
2412 | | ins_AD // RA = src, RD = num const, JMP with RD = target | ||
2413 | | add PC, 4 | ||
2414 | | checknum RA, >2 | ||
2415 | | fld qword [BASE+RA*8] | ||
2416 | | fld qword [KBASE+RD*8] | ||
2417 | | fcomparepp // eax (RD) modified! | ||
2418 | goto iseqne_fp; | ||
2419 | case BC_ISEQP: case BC_ISNEP: | ||
2420 | vk = op == BC_ISEQP; | ||
2421 | | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target | ||
2422 | | add PC, 4 | ||
2423 | | checktp RA, RD | ||
2424 | goto iseqne_test; | ||
2425 | |||
2426 | /* -- Unary test and copy ops ------------------------------------------- */ | ||
2427 | |||
2428 | case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: | ||
2429 | | ins_AD // RA = dst or unused, RD = src, JMP with RD = target | ||
2430 | | mov RB, [BASE+RD*8+4] | ||
2431 | | add PC, 4 | ||
2432 | | cmp RB, LJ_TISTRUECOND | ||
2433 | if (op == BC_IST || op == BC_ISTC) { | ||
2434 | | jae >1 | ||
2435 | } else { | ||
2436 | | jb >1 | ||
2437 | } | ||
2438 | if (op == BC_ISTC || op == BC_ISFC) { | ||
2439 | | mov [BASE+RA*8+4], RB | ||
2440 | | mov RB, [BASE+RD*8] | ||
2441 | | mov [BASE+RA*8], RB | ||
2442 | } | ||
2443 | | movzx RD, PC_RD | ||
2444 | | branchPC RD | ||
2445 | |1: // Fallthrough to the next instruction. | ||
2446 | | ins_next | ||
2447 | break; | ||
2448 | |||
2449 | /* -- Unary ops --------------------------------------------------------- */ | ||
2450 | |||
2451 | case BC_MOV: | ||
2452 | | ins_AD // RA = dst, RD = src | ||
2453 | | mov RB, [BASE+RD*8+4] | ||
2454 | | mov RD, [BASE+RD*8] // Overwrites RD. | ||
2455 | | mov [BASE+RA*8+4], RB | ||
2456 | | mov [BASE+RA*8], RD | ||
2457 | | ins_next_ | ||
2458 | break; | ||
2459 | case BC_NOT: | ||
2460 | | ins_AD // RA = dst, RD = src | ||
2461 | | xor RB, RB | ||
2462 | | checktp RD, LJ_TISTRUECOND | ||
2463 | | adc RB, LJ_TTRUE | ||
2464 | | mov [BASE+RA*8+4], RB | ||
2465 | | ins_next | ||
2466 | break; | ||
2467 | case BC_UNM: | ||
2468 | | ins_AD // RA = dst, RD = src | ||
2469 | | checknum RD, ->vmeta_unm | ||
2470 | | fld qword [BASE+RD*8] | ||
2471 | | fchs | ||
2472 | | fstp qword [BASE+RA*8] | ||
2473 | | ins_next | ||
2474 | break; | ||
2475 | case BC_LEN: | ||
2476 | | ins_AD // RA = dst, RD = src | ||
2477 | | checkstr RD, >2 | ||
2478 | | mov STR:RD, [BASE+RD*8] | ||
2479 | | fild dword STR:RD->len | ||
2480 | |1: | ||
2481 | | fstp qword [BASE+RA*8] | ||
2482 | | ins_next | ||
2483 | |2: | ||
2484 | | checktab RD, ->vmeta_len | ||
2485 | | mov TAB:RD, [BASE+RD*8] | ||
2486 | | mov ARG1, TAB:RD | ||
2487 | | mov RB, BASE // Save BASE. | ||
2488 | | call extern lj_tab_len // (GCtab *t) | ||
2489 | | // Length of table returned in eax (RC). | ||
2490 | | mov ARG1, RC | ||
2491 | | mov BASE, RB // Restore BASE. | ||
2492 | | fild ARG1 | ||
2493 | | movzx RA, PC_RA | ||
2494 | | jmp <1 | ||
2495 | break; | ||
2496 | |||
2497 | /* -- Binary ops -------------------------------------------------------- */ | ||
2498 | |||
2499 | |.macro ins_arithpre, ins | ||
2500 | | ins_ABC | ||
2501 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); | ||
2502 | ||switch (vk) { | ||
2503 | ||case 0: | ||
2504 | | checknum RB, ->vmeta_arith_vn | ||
2505 | | fld qword [BASE+RB*8] | ||
2506 | | ins qword [KBASE+RC*8] | ||
2507 | || break; | ||
2508 | ||case 1: | ||
2509 | | checknum RB, ->vmeta_arith_nv | ||
2510 | | fld qword [KBASE+RC*8] | ||
2511 | | ins qword [BASE+RB*8] | ||
2512 | || break; | ||
2513 | ||default: | ||
2514 | | checknum RB, ->vmeta_arith_vv | ||
2515 | | checknum RC, ->vmeta_arith_vv | ||
2516 | | fld qword [BASE+RB*8] | ||
2517 | | ins qword [BASE+RC*8] | ||
2518 | || break; | ||
2519 | ||} | ||
2520 | |.endmacro | ||
2521 | | | ||
2522 | |.macro ins_arith, ins | ||
2523 | | ins_arithpre ins | ||
2524 | | fstp qword [BASE+RA*8] | ||
2525 | | ins_next | ||
2526 | |.endmacro | ||
2527 | |||
2528 | | // RA = dst, RB = src1 or num const, RC = src2 or num const | ||
2529 | case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: | ||
2530 | | ins_arith fadd | ||
2531 | break; | ||
2532 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: | ||
2533 | | ins_arith fsub | ||
2534 | break; | ||
2535 | case BC_MULVN: case BC_MULNV: case BC_MULVV: | ||
2536 | | ins_arith fmul | ||
2537 | break; | ||
2538 | case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: | ||
2539 | | ins_arith fdiv | ||
2540 | break; | ||
2541 | case BC_MODVN: | ||
2542 | | ins_arithpre fld | ||
2543 | |->BC_MODVN_Z: | ||
2544 | | call ->vm_mod | ||
2545 | | fstp qword [BASE+RA*8] | ||
2546 | | ins_next | ||
2547 | break; | ||
2548 | case BC_MODNV: case BC_MODVV: | ||
2549 | | ins_arithpre fld | ||
2550 | | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. | ||
2551 | break; | ||
2552 | case BC_POW: | ||
2553 | | ins_arithpre fld | ||
2554 | | call ->vm_pow | ||
2555 | | fstp qword [BASE+RA*8] | ||
2556 | | ins_next | ||
2557 | break; | ||
2558 | |||
2559 | case BC_CAT: | ||
2560 | | ins_ABC // RA = dst, RB = src_start, RC = src_end | ||
2561 | | lea RA, [BASE+RC*8] | ||
2562 | | sub RC, RB | ||
2563 | | mov ARG2, RA | ||
2564 | | mov ARG3, RC | ||
2565 | |->BC_CAT_Z: | ||
2566 | | mov L:RB, SAVE_L | ||
2567 | | mov ARG1, L:RB | ||
2568 | | mov SAVE_PC, PC | ||
2569 | | mov L:RB->base, BASE | ||
2570 | | call extern lj_meta_cat // (lua_State *L, TValue *top, int left) | ||
2571 | | // NULL (finished) or TValue * (metamethod) returned in eax (RC). | ||
2572 | | mov BASE, L:RB->base | ||
2573 | | test RC, RC | ||
2574 | | jnz ->vmeta_binop | ||
2575 | | movzx RB, PC_RB // Copy result to Stk[RA] from Stk[RB]. | ||
2576 | | movzx RA, PC_RA | ||
2577 | | mov RC, [BASE+RB*8+4] | ||
2578 | | mov RB, [BASE+RB*8] | ||
2579 | | mov [BASE+RA*8+4], RC | ||
2580 | | mov [BASE+RA*8], RB | ||
2581 | | ins_next | ||
2582 | break; | ||
2583 | |||
2584 | /* -- Constant ops ------------------------------------------------------ */ | ||
2585 | |||
2586 | case BC_KSTR: | ||
2587 | | ins_AND // RA = dst, RD = str const (~) | ||
2588 | | mov RD, [KBASE+RD*4] | ||
2589 | | mov dword [BASE+RA*8+4], LJ_TSTR | ||
2590 | | mov [BASE+RA*8], RD | ||
2591 | | ins_next | ||
2592 | break; | ||
2593 | case BC_KSHORT: | ||
2594 | | ins_AD // RA = dst, RD = signed int16 literal | ||
2595 | | fild PC_RD // Refetch signed RD from instruction. | ||
2596 | | fstp qword [BASE+RA*8] | ||
2597 | | ins_next | ||
2598 | break; | ||
2599 | case BC_KNUM: | ||
2600 | | ins_AD // RA = dst, RD = num const | ||
2601 | | fld qword [KBASE+RD*8] | ||
2602 | | fstp qword [BASE+RA*8] | ||
2603 | | ins_next | ||
2604 | break; | ||
2605 | case BC_KPRI: | ||
2606 | | ins_AND // RA = dst, RD = primitive type (~) | ||
2607 | | mov [BASE+RA*8+4], RD | ||
2608 | | ins_next | ||
2609 | break; | ||
2610 | case BC_KNIL: | ||
2611 | | ins_AD // RA = dst_start, RD = dst_end | ||
2612 | | lea RA, [BASE+RA*8+12] | ||
2613 | | lea RD, [BASE+RD*8+4] | ||
2614 | | mov RB, LJ_TNIL | ||
2615 | | mov [RA-8], RB // Sets minimum 2 slots. | ||
2616 | |1: | ||
2617 | | mov [RA], RB | ||
2618 | | add RA, 8 | ||
2619 | | cmp RA, RD | ||
2620 | | jbe <1 | ||
2621 | | ins_next | ||
2622 | break; | ||
2623 | |||
2624 | /* -- Upvalue and function ops ------------------------------------------ */ | ||
2625 | |||
2626 | case BC_UGET: | ||
2627 | | ins_AD // RA = dst, RD = upvalue # | ||
2628 | | mov LFUNC:RB, [BASE-8] | ||
2629 | | mov UPVAL:RB, [LFUNC:RB+RD*4+offsetof(GCfuncL, uvptr)] | ||
2630 | | mov RB, UPVAL:RB->v | ||
2631 | | mov RD, [RB+4] | ||
2632 | | mov RB, [RB] | ||
2633 | | mov [BASE+RA*8+4], RD | ||
2634 | | mov [BASE+RA*8], RB | ||
2635 | | ins_next | ||
2636 | break; | ||
2637 | case BC_USETV: | ||
2638 | | ins_AD // RA = upvalue #, RD = src | ||
2639 | | // Really ugly code due to the lack of a 4th free register. | ||
2640 | | mov LFUNC:RB, [BASE-8] | ||
2641 | | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] | ||
2642 | | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv) | ||
2643 | | jnz >4 | ||
2644 | |1: | ||
2645 | | mov RA, [BASE+RD*8] | ||
2646 | |2: | ||
2647 | | mov RB, UPVAL:RB->v | ||
2648 | | mov RD, [BASE+RD*8+4] | ||
2649 | | mov [RB], RA | ||
2650 | | mov [RB+4], RD | ||
2651 | |3: | ||
2652 | | ins_next | ||
2653 | | | ||
2654 | |4: // Upvalue is black. Check if new value is collectable and white. | ||
2655 | | mov RA, [BASE+RD*8+4] | ||
2656 | | sub RA, LJ_TISGCV | ||
2657 | | cmp RA, LJ_TISNUM - LJ_TISGCV // tvisgcv(v) | ||
2658 | | jbe <1 | ||
2659 | | mov GCOBJ:RA, [BASE+RD*8] | ||
2660 | | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v) | ||
2661 | | jz <2 | ||
2662 | | // Crossed a write barrier. So move the barrier forward. | ||
2663 | | mov ARG2, UPVAL:RB | ||
2664 | | mov ARG3, GCOBJ:RA | ||
2665 | | mov RB, UPVAL:RB->v | ||
2666 | | mov RD, [BASE+RD*8+4] | ||
2667 | | mov [RB], GCOBJ:RA | ||
2668 | | mov [RB+4], RD | ||
2669 | |->BC_USETV_Z: | ||
2670 | | mov L:RB, SAVE_L | ||
2671 | | lea GL:RA, [DISPATCH+GG_DISP2G] | ||
2672 | | mov L:RB->base, BASE | ||
2673 | | mov ARG1, GL:RA | ||
2674 | | call extern lj_gc_barrieruv // (global_State *g, GCobj *o, GCobj *v) | ||
2675 | | mov BASE, L:RB->base | ||
2676 | | jmp <3 | ||
2677 | break; | ||
2678 | case BC_USETS: | ||
2679 | | ins_AND // RA = upvalue #, RD = str const (~) | ||
2680 | | mov LFUNC:RB, [BASE-8] | ||
2681 | | mov GCOBJ:RD, [KBASE+RD*4] | ||
2682 | | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] | ||
2683 | | mov RA, UPVAL:RB->v | ||
2684 | | mov dword [RA+4], LJ_TSTR | ||
2685 | | mov [RA], GCOBJ:RD | ||
2686 | | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv) | ||
2687 | | jnz >2 | ||
2688 | |1: | ||
2689 | | ins_next | ||
2690 | | | ||
2691 | |2: // Upvalue is black. Check if string is white. | ||
2692 | | test byte GCOBJ:RD->gch.marked, LJ_GC_WHITES // iswhite(str) | ||
2693 | | jz <1 | ||
2694 | | // Crossed a write barrier. So move the barrier forward. | ||
2695 | | mov ARG3, GCOBJ:RD | ||
2696 | | mov ARG2, UPVAL:RB | ||
2697 | | jmp ->BC_USETV_Z | ||
2698 | break; | ||
2699 | case BC_USETN: | ||
2700 | | ins_AD // RA = upvalue #, RD = num const | ||
2701 | | mov LFUNC:RB, [BASE-8] | ||
2702 | | fld qword [KBASE+RD*8] | ||
2703 | | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] | ||
2704 | | mov RA, UPVAL:RB->v | ||
2705 | | fstp qword [RA] | ||
2706 | | ins_next | ||
2707 | break; | ||
2708 | case BC_USETP: | ||
2709 | | ins_AND // RA = upvalue #, RD = primitive type (~) | ||
2710 | | mov LFUNC:RB, [BASE-8] | ||
2711 | | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] | ||
2712 | | mov RA, UPVAL:RB->v | ||
2713 | | mov [RA+4], RD | ||
2714 | | ins_next | ||
2715 | break; | ||
2716 | case BC_UCLO: | ||
2717 | | ins_AD // RA = level, RD = target | ||
2718 | | branchPC RD // Do this first to free RD. | ||
2719 | | mov L:RB, SAVE_L | ||
2720 | | cmp dword L:RB->openupval, 0 | ||
2721 | | je >1 | ||
2722 | | lea RA, [BASE+RA*8] | ||
2723 | | mov ARG2, RA | ||
2724 | | mov ARG1, L:RB | ||
2725 | | mov L:RB->base, BASE | ||
2726 | | call extern lj_func_closeuv // (lua_State *L, StkId level) | ||
2727 | | mov BASE, L:RB->base | ||
2728 | |1: | ||
2729 | | ins_next | ||
2730 | break; | ||
2731 | |||
2732 | case BC_FNEW: | ||
2733 | | ins_AND // RA = dst, RD = proto const (~) (holding function prototype) | ||
2734 | | mov LFUNC:RA, [BASE-8] | ||
2735 | | mov PROTO:RD, [KBASE+RD*4] // Fetch GCproto *. | ||
2736 | | mov L:RB, SAVE_L | ||
2737 | | mov ARG3, LFUNC:RA | ||
2738 | | mov ARG2, PROTO:RD | ||
2739 | | mov SAVE_PC, PC | ||
2740 | | mov ARG1, L:RB | ||
2741 | | mov L:RB->base, BASE | ||
2742 | | // (lua_State *L, GCproto *pt, GCfuncL *parent) | ||
2743 | | call extern lj_func_newL_gc | ||
2744 | | // GCfuncL * returned in eax (RC). | ||
2745 | | mov BASE, L:RB->base | ||
2746 | | movzx RA, PC_RA | ||
2747 | | mov [BASE+RA*8], LFUNC:RC | ||
2748 | | mov dword [BASE+RA*8+4], LJ_TFUNC | ||
2749 | | ins_next | ||
2750 | break; | ||
2751 | |||
2752 | /* -- Table ops --------------------------------------------------------- */ | ||
2753 | |||
2754 | case BC_TNEW: | ||
2755 | | ins_AD // RA = dst, RD = hbits|asize | ||
2756 | | mov RB, RD | ||
2757 | | and RD, 0x7ff | ||
2758 | | shr RB, 11 | ||
2759 | | cmp RD, 0x7ff // Turn 0x7ff into 0x801. | ||
2760 | | sete RAL | ||
2761 | | mov ARG3, RB | ||
2762 | | add RD, RA | ||
2763 | | mov L:RB, SAVE_L | ||
2764 | | add RD, RA | ||
2765 | | mov ARG2, RD | ||
2766 | | mov SAVE_PC, PC | ||
2767 | | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] | ||
2768 | | mov ARG1, L:RB | ||
2769 | | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] | ||
2770 | | mov L:RB->base, BASE | ||
2771 | | jae >2 | ||
2772 | |1: | ||
2773 | | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) | ||
2774 | | // Table * returned in eax (RC). | ||
2775 | | mov BASE, L:RB->base | ||
2776 | | movzx RA, PC_RA | ||
2777 | | mov [BASE+RA*8], TAB:RC | ||
2778 | | mov dword [BASE+RA*8+4], LJ_TTAB | ||
2779 | | ins_next | ||
2780 | |2: | ||
2781 | | call extern lj_gc_step_fixtop // (lua_State *L) | ||
2782 | | mov ARG1, L:RB // Args owned by callee. Set it again. | ||
2783 | | jmp <1 | ||
2784 | break; | ||
2785 | case BC_TDUP: | ||
2786 | | ins_AND // RA = dst, RD = table const (~) (holding template table) | ||
2787 | | mov TAB:RD, [KBASE+RD*4] | ||
2788 | | mov L:RB, SAVE_L | ||
2789 | | mov ARG2, TAB:RD | ||
2790 | | mov ARG1, L:RB | ||
2791 | | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] | ||
2792 | | mov SAVE_PC, PC | ||
2793 | | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] | ||
2794 | | mov L:RB->base, BASE | ||
2795 | | jae >3 | ||
2796 | |2: | ||
2797 | | call extern lj_tab_dup // (lua_State *L, Table *kt) | ||
2798 | | // Table * returned in eax (RC). | ||
2799 | | mov BASE, L:RB->base | ||
2800 | | movzx RA, PC_RA | ||
2801 | | mov [BASE+RA*8], TAB:RC | ||
2802 | | mov dword [BASE+RA*8+4], LJ_TTAB | ||
2803 | | ins_next | ||
2804 | |3: | ||
2805 | | call extern lj_gc_step_fixtop // (lua_State *L) | ||
2806 | | mov ARG1, L:RB // Args owned by callee. Set it again. | ||
2807 | | jmp <2 | ||
2808 | break; | ||
2809 | |||
2810 | case BC_GGET: | ||
2811 | | ins_AND // RA = dst, RD = str const (~) | ||
2812 | | mov LFUNC:RB, [BASE-8] | ||
2813 | | mov TAB:RB, LFUNC:RB->env | ||
2814 | | mov STR:RC, [KBASE+RD*4] | ||
2815 | | jmp ->BC_TGETS_Z | ||
2816 | break; | ||
2817 | case BC_GSET: | ||
2818 | | ins_AND // RA = src, RD = str const (~) | ||
2819 | | mov LFUNC:RB, [BASE-8] | ||
2820 | | mov TAB:RB, LFUNC:RB->env | ||
2821 | | mov STR:RC, [KBASE+RD*4] | ||
2822 | | jmp ->BC_TSETS_Z | ||
2823 | break; | ||
2824 | |||
2825 | case BC_TGETV: | ||
2826 | | ins_ABC // RA = dst, RB = table, RC = key | ||
2827 | | checktab RB, ->vmeta_tgetv | ||
2828 | | mov TAB:RB, [BASE+RB*8] | ||
2829 | | | ||
2830 | | // Integer key? Convert number to int and back and compare. | ||
2831 | | checknum RC, >5 | ||
2832 | | fld qword [BASE+RC*8] | ||
2833 | | fist ARG1 | ||
2834 | | fild ARG1 | ||
2835 | | fcomparepp // eax (RC) modified! | ||
2836 | | mov RC, ARG1 | ||
2837 | | jne ->vmeta_tgetv // Generic numeric key? Use fallback. | ||
2838 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. | ||
2839 | | jae ->vmeta_tgetv // Not in array part? Use fallback. | ||
2840 | | shl RC, 3 | ||
2841 | | add RC, TAB:RB->array | ||
2842 | | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath. | ||
2843 | | je >2 | ||
2844 | |1: | ||
2845 | | mov RB, [RC] // Get array slot. | ||
2846 | | mov RC, [RC+4] | ||
2847 | | mov [BASE+RA*8], RB | ||
2848 | | mov [BASE+RA*8+4], RC | ||
2849 | | ins_next | ||
2850 | | | ||
2851 | |2: // Check for __index if table value is nil. | ||
2852 | | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. | ||
2853 | | jz <1 | ||
2854 | | mov TAB:RA, TAB:RB->metatable | ||
2855 | | test byte TAB:RA->nomm, 1<<MM_index | ||
2856 | | jz ->vmeta_tgetv // 'no __index' flag NOT set: check. | ||
2857 | | movzx RA, PC_RA // Restore RA. | ||
2858 | | jmp <1 | ||
2859 | | | ||
2860 | |5: // String key? | ||
2861 | | checkstr RC, ->vmeta_tgetv | ||
2862 | | mov STR:RC, [BASE+RC*8] | ||
2863 | | jmp ->BC_TGETS_Z | ||
2864 | break; | ||
2865 | case BC_TGETS: | ||
2866 | | ins_ABC // RA = dst, RB = table, RC = str const (~) | ||
2867 | | not RC | ||
2868 | | mov STR:RC, [KBASE+RC*4] | ||
2869 | | checktab RB, ->vmeta_tgets | ||
2870 | | mov TAB:RB, [BASE+RB*8] | ||
2871 | |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. | ||
2872 | | mov RA, TAB:RB->hmask | ||
2873 | | and RA, STR:RC->hash | ||
2874 | | imul RA, #NODE | ||
2875 | | add NODE:RA, TAB:RB->node | ||
2876 | |1: | ||
2877 | | cmp dword NODE:RA->key.it, LJ_TSTR | ||
2878 | | jne >4 | ||
2879 | | cmp dword NODE:RA->key.gcr, STR:RC | ||
2880 | | jne >4 | ||
2881 | | // Ok, key found. Assumes: offsetof(Node, val) == 0 | ||
2882 | | cmp dword [RA+4], LJ_TNIL // Avoid overwriting RB in fastpath. | ||
2883 | | je >5 // Key found, but nil value? | ||
2884 | | movzx RC, PC_RA | ||
2885 | | mov RB, [RA] // Get node value. | ||
2886 | | mov RA, [RA+4] | ||
2887 | | mov [BASE+RC*8], RB | ||
2888 | |2: | ||
2889 | | mov [BASE+RC*8+4], RA | ||
2890 | | ins_next | ||
2891 | | | ||
2892 | |3: | ||
2893 | | movzx RC, PC_RA | ||
2894 | | mov RA, LJ_TNIL | ||
2895 | | jmp <2 | ||
2896 | | | ||
2897 | |4: // Follow hash chain. | ||
2898 | | mov NODE:RA, NODE:RA->next | ||
2899 | | test NODE:RA, NODE:RA | ||
2900 | | jnz <1 | ||
2901 | | // End of hash chain: key not found, nil result. | ||
2902 | | | ||
2903 | |5: // Check for __index if table value is nil. | ||
2904 | | mov TAB:RA, TAB:RB->metatable | ||
2905 | | test TAB:RA, TAB:RA | ||
2906 | | jz <3 // No metatable: done. | ||
2907 | | test byte TAB:RA->nomm, 1<<MM_index | ||
2908 | | jnz <3 // 'no __index' flag set: done. | ||
2909 | | jmp ->vmeta_tgets // Caveat: preserve STR:RC. | ||
2910 | break; | ||
2911 | case BC_TGETB: | ||
2912 | | ins_ABC // RA = dst, RB = table, RC = byte literal | ||
2913 | | checktab RB, ->vmeta_tgetb | ||
2914 | | mov TAB:RB, [BASE+RB*8] | ||
2915 | | cmp RC, TAB:RB->asize | ||
2916 | | jae ->vmeta_tgetb | ||
2917 | | shl RC, 3 | ||
2918 | | add RC, TAB:RB->array | ||
2919 | | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath. | ||
2920 | | je >2 | ||
2921 | |1: | ||
2922 | | mov RB, [RC] // Get array slot. | ||
2923 | | mov RC, [RC+4] | ||
2924 | | mov [BASE+RA*8], RB | ||
2925 | | mov [BASE+RA*8+4], RC | ||
2926 | | ins_next | ||
2927 | | | ||
2928 | |2: // Check for __index if table value is nil. | ||
2929 | | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. | ||
2930 | | jz <1 | ||
2931 | | mov TAB:RA, TAB:RB->metatable | ||
2932 | | test byte TAB:RA->nomm, 1<<MM_index | ||
2933 | | jz ->vmeta_tgetb // 'no __index' flag NOT set: check. | ||
2934 | | movzx RA, PC_RA // Restore RA. | ||
2935 | | jmp <1 | ||
2936 | break; | ||
2937 | |||
2938 | case BC_TSETV: | ||
2939 | | ins_ABC // RA = src, RB = table, RC = key | ||
2940 | | checktab RB, ->vmeta_tsetv | ||
2941 | | mov TAB:RB, [BASE+RB*8] | ||
2942 | | | ||
2943 | | // Integer key? Convert number to int and back and compare. | ||
2944 | | checknum RC, >5 | ||
2945 | | fld qword [BASE+RC*8] | ||
2946 | | fist ARG1 | ||
2947 | | fild ARG1 | ||
2948 | | fcomparepp // eax (RC) modified! | ||
2949 | | mov RC, ARG1 | ||
2950 | | jne ->vmeta_tsetv // Generic numeric key? Use fallback. | ||
2951 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. | ||
2952 | | jae ->vmeta_tsetv | ||
2953 | | shl RC, 3 | ||
2954 | | add RC, TAB:RB->array | ||
2955 | | cmp dword [RC+4], LJ_TNIL | ||
2956 | | je >3 // Previous value is nil? | ||
2957 | |1: | ||
2958 | | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) | ||
2959 | | jnz >7 | ||
2960 | |2: | ||
2961 | | mov RB, [BASE+RA*8+4] // Set array slot. | ||
2962 | | mov RA, [BASE+RA*8] | ||
2963 | | mov [RC+4], RB | ||
2964 | | mov [RC], RA | ||
2965 | | ins_next | ||
2966 | | | ||
2967 | |3: // Check for __newindex if previous value is nil. | ||
2968 | | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. | ||
2969 | | jz <1 | ||
2970 | | mov TAB:RA, TAB:RB->metatable | ||
2971 | | test byte TAB:RA->nomm, 1<<MM_newindex | ||
2972 | | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check. | ||
2973 | | movzx RA, PC_RA // Restore RA. | ||
2974 | | jmp <1 | ||
2975 | | | ||
2976 | |5: // String key? | ||
2977 | | checkstr RC, ->vmeta_tsetv | ||
2978 | | mov STR:RC, [BASE+RC*8] | ||
2979 | | jmp ->BC_TSETS_Z | ||
2980 | | | ||
2981 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
2982 | | barrierback TAB:RB, RA | ||
2983 | | movzx RA, PC_RA // Restore RA. | ||
2984 | | jmp <2 | ||
2985 | break; | ||
2986 | case BC_TSETS: | ||
2987 | | ins_ABC // RA = src, RB = table, RC = str const (~) | ||
2988 | | not RC | ||
2989 | | mov STR:RC, [KBASE+RC*4] | ||
2990 | | checktab RB, ->vmeta_tsets | ||
2991 | | mov TAB:RB, [BASE+RB*8] | ||
2992 | |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. | ||
2993 | | mov RA, TAB:RB->hmask | ||
2994 | | and RA, STR:RC->hash | ||
2995 | | imul RA, #NODE | ||
2996 | | mov byte TAB:RB->nomm, 0 // Clear metamethod cache. | ||
2997 | | add NODE:RA, TAB:RB->node | ||
2998 | |1: | ||
2999 | | cmp dword NODE:RA->key.it, LJ_TSTR | ||
3000 | | jne >5 | ||
3001 | | cmp dword NODE:RA->key.gcr, STR:RC | ||
3002 | | jne >5 | ||
3003 | | // Ok, key found. Assumes: offsetof(Node, val) == 0 | ||
3004 | | cmp dword [RA+4], LJ_TNIL | ||
3005 | | je >4 // Previous value is nil? | ||
3006 | |2: | ||
3007 | | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) | ||
3008 | | jnz >7 | ||
3009 | |3: | ||
3010 | | movzx RC, PC_RA | ||
3011 | | mov RB, [BASE+RC*8+4] // Set node value. | ||
3012 | | mov RC, [BASE+RC*8] | ||
3013 | | mov [RA+4], RB | ||
3014 | | mov [RA], RC | ||
3015 | | ins_next | ||
3016 | | | ||
3017 | |4: // Check for __newindex if previous value is nil. | ||
3018 | | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. | ||
3019 | | jz <2 | ||
3020 | | mov ARG1, RA // Save RA. | ||
3021 | | mov TAB:RA, TAB:RB->metatable | ||
3022 | | test byte TAB:RA->nomm, 1<<MM_newindex | ||
3023 | | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check. | ||
3024 | | mov RA, ARG1 // Restore RA. | ||
3025 | | jmp <2 | ||
3026 | | | ||
3027 | |5: // Follow hash chain. | ||
3028 | | mov NODE:RA, NODE:RA->next | ||
3029 | | test NODE:RA, NODE:RA | ||
3030 | | jnz <1 | ||
3031 | | // End of hash chain: key not found, add a new one. | ||
3032 | | | ||
3033 | | // But check for __newindex first. | ||
3034 | | mov TAB:RA, TAB:RB->metatable | ||
3035 | | test TAB:RA, TAB:RA | ||
3036 | | jz >6 // No metatable: continue. | ||
3037 | | test byte TAB:RA->nomm, 1<<MM_newindex | ||
3038 | | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check. | ||
3039 | |6: | ||
3040 | | mov ARG5, STR:RC | ||
3041 | | mov ARG6, LJ_TSTR | ||
3042 | | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6. | ||
3043 | | mov ARG4, TAB:RB // Save TAB:RB for us. | ||
3044 | | mov ARG2, TAB:RB | ||
3045 | | mov L:RB, SAVE_L | ||
3046 | | mov ARG3, RC | ||
3047 | | mov ARG1, L:RB | ||
3048 | | mov SAVE_PC, PC | ||
3049 | | mov L:RB->base, BASE | ||
3050 | | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) | ||
3051 | | // Handles write barrier for the new key. TValue * returned in eax (RC). | ||
3052 | | mov BASE, L:RB->base | ||
3053 | | mov TAB:RB, ARG4 // Need TAB:RB for barrier. | ||
3054 | | mov RA, eax | ||
3055 | | jmp <2 // Must check write barrier for value. | ||
3056 | | | ||
3057 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
3058 | | barrierback TAB:RB, RC // Destroys STR:RC. | ||
3059 | | jmp <3 | ||
3060 | break; | ||
3061 | case BC_TSETB: | ||
3062 | | ins_ABC // RA = src, RB = table, RC = byte literal | ||
3063 | | checktab RB, ->vmeta_tsetb | ||
3064 | | mov TAB:RB, [BASE+RB*8] | ||
3065 | | cmp RC, TAB:RB->asize | ||
3066 | | jae ->vmeta_tsetb | ||
3067 | | shl RC, 3 | ||
3068 | | add RC, TAB:RB->array | ||
3069 | | cmp dword [RC+4], LJ_TNIL | ||
3070 | | je >3 // Previous value is nil? | ||
3071 | |1: | ||
3072 | | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) | ||
3073 | | jnz >7 | ||
3074 | |2: | ||
3075 | | mov RB, [BASE+RA*8+4] // Set array slot. | ||
3076 | | mov RA, [BASE+RA*8] | ||
3077 | | mov [RC+4], RB | ||
3078 | | mov [RC], RA | ||
3079 | | ins_next | ||
3080 | | | ||
3081 | |3: // Check for __newindex if previous value is nil. | ||
3082 | | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. | ||
3083 | | jz <1 | ||
3084 | | mov TAB:RA, TAB:RB->metatable | ||
3085 | | test byte TAB:RA->nomm, 1<<MM_newindex | ||
3086 | | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check. | ||
3087 | | movzx RA, PC_RA // Restore RA. | ||
3088 | | jmp <1 | ||
3089 | | | ||
3090 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
3091 | | barrierback TAB:RB, RA | ||
3092 | | movzx RA, PC_RA // Restore RA. | ||
3093 | | jmp <2 | ||
3094 | break; | ||
3095 | |||
3096 | case BC_TSETM: | ||
3097 | | ins_AD // RA = base (table at base-1), RD = num const (start index) | ||
3098 | | mov ARG5, KBASE // Need one more free register. | ||
3099 | | fld qword [KBASE+RD*8] | ||
3100 | | fistp ARG4 // Const is guaranteed to be an int. | ||
3101 | |1: | ||
3102 | | lea RA, [BASE+RA*8] | ||
3103 | | mov TAB:RB, [RA-8] // Guaranteed to be a table. | ||
3104 | | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) | ||
3105 | | jnz >7 | ||
3106 | |2: | ||
3107 | | mov RD, NRESULTS | ||
3108 | | mov KBASE, ARG4 | ||
3109 | | sub RD, 1 | ||
3110 | | jz >4 // Nothing to copy? | ||
3111 | | add RD, KBASE // Compute needed size. | ||
3112 | | cmp RD, TAB:RB->asize | ||
3113 | | jae >5 // Does not fit into array part? | ||
3114 | | sub RD, KBASE | ||
3115 | | shl KBASE, 3 | ||
3116 | | add KBASE, TAB:RB->array | ||
3117 | |3: // Copy result slots to table. | ||
3118 | | mov RB, [RA] | ||
3119 | | mov [KBASE], RB | ||
3120 | | mov RB, [RA+4] | ||
3121 | | add RA, 8 | ||
3122 | | mov [KBASE+4], RB | ||
3123 | | add KBASE, 8 | ||
3124 | | sub RD, 1 | ||
3125 | | jnz <3 | ||
3126 | |4: | ||
3127 | | mov KBASE, ARG5 | ||
3128 | | ins_next | ||
3129 | | | ||
3130 | |5: // Need to resize array part. | ||
3131 | | mov ARG2, TAB:RB | ||
3132 | | mov L:RB, SAVE_L | ||
3133 | | mov ARG3, RD | ||
3134 | | mov ARG1, L:RB | ||
3135 | | mov SAVE_PC, PC | ||
3136 | | mov L:RB->base, BASE | ||
3137 | | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) | ||
3138 | | mov BASE, L:RB->base | ||
3139 | | movzx RA, PC_RA // Restore RA. | ||
3140 | | jmp <1 // Retry. | ||
3141 | | | ||
3142 | |7: // Possible table write barrier for any value. Skip valiswhite check. | ||
3143 | | barrierback TAB:RB, RD | ||
3144 | | jmp <2 | ||
3145 | break; | ||
3146 | |||
3147 | /* -- Calls and vararg handling ----------------------------------------- */ | ||
3148 | |||
3149 | case BC_CALL: case BC_CALLM: | ||
3150 | | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs | ||
3151 | if (op == BC_CALLM) { | ||
3152 | | add NARGS:RC, NRESULTS | ||
3153 | } | ||
3154 | | lea RA, [BASE+RA*8+8] | ||
3155 | | mov LFUNC:RB, [RA-8] | ||
3156 | | cmp dword [RA-4], LJ_TFUNC | ||
3157 | | jne ->vmeta_call | ||
3158 | | jmp aword LFUNC:RB->gate | ||
3159 | break; | ||
3160 | |||
3161 | case BC_CALLMT: | ||
3162 | | ins_AD // RA = base, RD = extra_nargs | ||
3163 | | add NARGS:RD, NRESULTS | ||
3164 | | // Fall through. Assumes BC_CALLMT follows and ins_AD is a no-op. | ||
3165 | break; | ||
3166 | case BC_CALLT: | ||
3167 | | ins_AD // RA = base, RD = nargs+1 | ||
3168 | | lea RA, [BASE+RA*8+8] | ||
3169 | | mov KBASE, BASE // Use KBASE for move + vmeta_call hint. | ||
3170 | | mov LFUNC:RB, [RA-8] | ||
3171 | | cmp dword [RA-4], LJ_TFUNC | ||
3172 | | jne ->vmeta_call | ||
3173 | |->BC_CALLT_Z: | ||
3174 | | mov PC, [BASE-4] | ||
3175 | | test PC, FRAME_TYPE | ||
3176 | | jnz >7 | ||
3177 | |1: | ||
3178 | | mov [BASE-8], LFUNC:RB // Copy function down, reloaded below. | ||
3179 | | mov NRESULTS, NARGS:RD | ||
3180 | | sub NARGS:RD, 1 | ||
3181 | | jz >3 | ||
3182 | |2: | ||
3183 | | mov RB, [RA] // Move args down. | ||
3184 | | mov [KBASE], RB | ||
3185 | | mov RB, [RA+4] | ||
3186 | | mov [KBASE+4], RB | ||
3187 | | add KBASE, 8 | ||
3188 | | add RA, 8 | ||
3189 | | sub NARGS:RD, 1 | ||
3190 | | jnz <2 | ||
3191 | | | ||
3192 | | mov LFUNC:RB, [BASE-8] | ||
3193 | |3: | ||
3194 | | mov RA, BASE // BASE is ignored, except when ... | ||
3195 | | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function? | ||
3196 | | ja >5 | ||
3197 | |4: | ||
3198 | | mov NARGS:RD, NRESULTS | ||
3199 | | jmp aword LFUNC:RB->gate | ||
3200 | | | ||
3201 | |5: // Tailcall to a fast function. | ||
3202 | | test PC, FRAME_TYPE // Lua frame below? | ||
3203 | | jnz <4 | ||
3204 | | movzx RD, PC_RA // Need to prepare BASE/KBASE. | ||
3205 | | not RD | ||
3206 | | lea BASE, [BASE+RD*8] | ||
3207 | | mov LFUNC:KBASE, [BASE-8] | ||
3208 | | mov PROTO:KBASE, LFUNC:KBASE->pt | ||
3209 | | mov KBASE, PROTO:KBASE->k | ||
3210 | | jmp <4 | ||
3211 | | | ||
3212 | |7: // Tailcall from a vararg function. | ||
3213 | | jnp <1 // Vararg frame below? | ||
3214 | | and PC, -8 | ||
3215 | | sub BASE, PC // Need to relocate BASE/KBASE down. | ||
3216 | | mov KBASE, BASE | ||
3217 | | mov PC, [BASE-4] | ||
3218 | | jmp <1 | ||
3219 | break; | ||
3220 | |||
3221 | case BC_ITERC: | ||
3222 | | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1) | ||
3223 | | lea RA, [BASE+RA*8+8] // fb = base+1 | ||
3224 | | mov RB, [RA-24] // Copy state. fb[0] = fb[-3]. | ||
3225 | | mov RC, [RA-20] | ||
3226 | | mov [RA], RB | ||
3227 | | mov [RA+4], RC | ||
3228 | | mov RB, [RA-16] // Copy control var. fb[1] = fb[-2]. | ||
3229 | | mov RC, [RA-12] | ||
3230 | | mov [RA+8], RB | ||
3231 | | mov [RA+12], RC | ||
3232 | | mov LFUNC:RB, [RA-32] // Copy callable. fb[-1] = fb[-4] | ||
3233 | | mov RC, [RA-28] | ||
3234 | | mov [RA-8], LFUNC:RB | ||
3235 | | mov [RA-4], RC | ||
3236 | | cmp RC, LJ_TFUNC // Handle like a regular 2-arg call. | ||
3237 | | mov NARGS:RC, 3 | ||
3238 | | jne ->vmeta_call | ||
3239 | | jmp aword LFUNC:RB->gate | ||
3240 | break; | ||
3241 | |||
3242 | case BC_VARG: | ||
3243 | | ins_AB_ // RA = base, RB = nresults+1, (RC = 1) | ||
3244 | | mov LFUNC:RC, [BASE-8] | ||
3245 | | lea RA, [BASE+RA*8] | ||
3246 | | mov PROTO:RC, LFUNC:RC->pt | ||
3247 | | movzx RC, byte PROTO:RC->numparams | ||
3248 | | mov ARG3, KBASE // Need one more free register. | ||
3249 | | lea KBASE, [BASE+RC*8+(8+FRAME_VARG)] | ||
3250 | | sub KBASE, [BASE-4] | ||
3251 | | // Note: KBASE may now be even _above_ BASE if nargs was < numparams. | ||
3252 | | test RB, RB | ||
3253 | | jz >5 // Copy all varargs? | ||
3254 | | lea RB, [RA+RB*8-8] | ||
3255 | | cmp KBASE, BASE // No vararg slots? | ||
3256 | | jnb >2 | ||
3257 | |1: // Copy vararg slots to destination slots. | ||
3258 | | mov RC, [KBASE-8] | ||
3259 | | mov [RA], RC | ||
3260 | | mov RC, [KBASE-4] | ||
3261 | | add KBASE, 8 | ||
3262 | | mov [RA+4], RC | ||
3263 | | add RA, 8 | ||
3264 | | cmp RA, RB // All destination slots filled? | ||
3265 | | jnb >3 | ||
3266 | | cmp KBASE, BASE // No more vararg slots? | ||
3267 | | jb <1 | ||
3268 | |2: // Fill up remainder with nil. | ||
3269 | | mov dword [RA+4], LJ_TNIL | ||
3270 | | add RA, 8 | ||
3271 | | cmp RA, RB | ||
3272 | | jb <2 | ||
3273 | |3: | ||
3274 | | mov KBASE, ARG3 | ||
3275 | | ins_next | ||
3276 | | | ||
3277 | |5: // Copy all varargs. | ||
3278 | | mov NRESULTS, 1 // NRESULTS = 0+1 | ||
3279 | | mov RC, BASE | ||
3280 | | sub RC, KBASE | ||
3281 | | jbe <3 // No vararg slots? | ||
3282 | | mov RB, RC | ||
3283 | | shr RB, 3 | ||
3284 | | mov ARG2, RB // Store this for stack growth below. | ||
3285 | | add RB, 1 | ||
3286 | | mov NRESULTS, RB // NRESULTS = #varargs+1 | ||
3287 | | mov L:RB, SAVE_L | ||
3288 | | add RC, RA | ||
3289 | | cmp RC, L:RB->maxstack | ||
3290 | | ja >7 // Need to grow stack? | ||
3291 | |6: // Copy all vararg slots. | ||
3292 | | mov RC, [KBASE-8] | ||
3293 | | mov [RA], RC | ||
3294 | | mov RC, [KBASE-4] | ||
3295 | | add KBASE, 8 | ||
3296 | | mov [RA+4], RC | ||
3297 | | add RA, 8 | ||
3298 | | cmp KBASE, BASE // No more vararg slots? | ||
3299 | | jb <6 | ||
3300 | | jmp <3 | ||
3301 | | | ||
3302 | |7: // Grow stack for varargs. | ||
3303 | | mov L:RB->base, BASE | ||
3304 | | mov L:RB->top, RA | ||
3305 | | mov SAVE_PC, PC | ||
3306 | | sub KBASE, BASE // Need delta, because BASE may change. | ||
3307 | | mov ARG1, L:RB | ||
3308 | | call extern lj_state_growstack // (lua_State *L, int n) | ||
3309 | | mov BASE, L:RB->base | ||
3310 | | mov RA, L:RB->top | ||
3311 | | add KBASE, BASE | ||
3312 | | jmp <6 | ||
3313 | break; | ||
3314 | |||
3315 | /* -- Returns ----------------------------------------------------------- */ | ||
3316 | |||
3317 | case BC_RETM: | ||
3318 | | ins_AD // RA = results, RD = extra_nresults | ||
3319 | | add RD, NRESULTS // NRESULTS >=1, so RD >=1. | ||
3320 | | // Fall through. Assumes BC_RET follows and ins_AD is a no-op. | ||
3321 | break; | ||
3322 | |||
3323 | case BC_RET: case BC_RET0: case BC_RET1: | ||
3324 | | ins_AD // RA = results, RD = nresults+1 | ||
3325 | if (op != BC_RET0) { | ||
3326 | | shl RA, 3 | ||
3327 | } | ||
3328 | |1: | ||
3329 | | mov PC, [BASE-4] | ||
3330 | | mov NRESULTS, RD // Save nresults+1. | ||
3331 | | test PC, FRAME_TYPE // Check frame type marker. | ||
3332 | | jnz >7 // Not returning to a fixarg Lua func? | ||
3333 | switch (op) { | ||
3334 | case BC_RET: | ||
3335 | |->BC_RET_Z: | ||
3336 | | mov KBASE, BASE // Use KBASE for result move. | ||
3337 | | sub RD, 1 | ||
3338 | | jz >3 | ||
3339 | |2: | ||
3340 | | mov RB, [KBASE+RA] // Move results down. | ||
3341 | | mov [KBASE-8], RB | ||
3342 | | mov RB, [KBASE+RA+4] | ||
3343 | | mov [KBASE-4], RB | ||
3344 | | add KBASE, 8 | ||
3345 | | sub RD, 1 | ||
3346 | | jnz <2 | ||
3347 | |3: | ||
3348 | | mov RD, NRESULTS // Note: NRESULTS may be >255. | ||
3349 | | movzx RB, PC_RB // So cannot compare with RDL! | ||
3350 | |5: | ||
3351 | | cmp RB, RD // More results expected? | ||
3352 | | ja >6 | ||
3353 | break; | ||
3354 | case BC_RET1: | ||
3355 | | mov RB, [BASE+RA+4] | ||
3356 | | mov [BASE-4], RB | ||
3357 | | mov RB, [BASE+RA] | ||
3358 | | mov [BASE-8], RB | ||
3359 | /* fallthrough */ | ||
3360 | case BC_RET0: | ||
3361 | |5: | ||
3362 | | cmp PC_RB, RDL // More results expected? | ||
3363 | | ja >6 | ||
3364 | default: | ||
3365 | break; | ||
3366 | } | ||
3367 | | movzx RA, PC_RA | ||
3368 | | not RA // Note: ~RA = -(RA+1) | ||
3369 | | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8 | ||
3370 | | mov LFUNC:KBASE, [BASE-8] | ||
3371 | | mov PROTO:KBASE, LFUNC:KBASE->pt | ||
3372 | | mov KBASE, PROTO:KBASE->k | ||
3373 | | ins_next | ||
3374 | | | ||
3375 | |6: // Fill up results with nil. | ||
3376 | if (op == BC_RET) { | ||
3377 | | mov dword [KBASE-4], LJ_TNIL // Note: relies on shifted base. | ||
3378 | | add KBASE, 8 | ||
3379 | } else { | ||
3380 | | mov dword [BASE+RD*8-12], LJ_TNIL | ||
3381 | } | ||
3382 | | add RD, 1 | ||
3383 | | jmp <5 | ||
3384 | | | ||
3385 | |7: // Non-standard return case. | ||
3386 | | jnp ->vm_return | ||
3387 | | // Return from vararg function: relocate BASE down and RA up. | ||
3388 | | and PC, -8 | ||
3389 | | sub BASE, PC | ||
3390 | if (op != BC_RET0) { | ||
3391 | | add RA, PC | ||
3392 | } | ||
3393 | | jmp <1 | ||
3394 | break; | ||
3395 | |||
3396 | /* -- Loops and branches ------------------------------------------------ */ | ||
3397 | |||
3398 | |.define FOR_IDX, qword [RA]; .define FOR_TIDX, dword [RA+4] | ||
3399 | |.define FOR_STOP, qword [RA+8]; .define FOR_TSTOP, dword [RA+12] | ||
3400 | |.define FOR_STEP, qword [RA+16]; .define FOR_TSTEP, dword [RA+20] | ||
3401 | |.define FOR_EXT, qword [RA+24]; .define FOR_TEXT, dword [RA+28] | ||
3402 | |||
3403 | case BC_FORL: | ||
3404 | #if LJ_HASJIT | ||
3405 | | hotloop RB | ||
3406 | #endif | ||
3407 | | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op. | ||
3408 | break; | ||
3409 | |||
3410 | case BC_JFORI: | ||
3411 | case BC_JFORL: | ||
3412 | #if !LJ_HASJIT | ||
3413 | break; | ||
3414 | #endif | ||
3415 | case BC_FORI: | ||
3416 | case BC_IFORL: | ||
3417 | vk = (op == BC_IFORL || op == BC_JFORL); | ||
3418 | | ins_AJ // RA = base, RD = target (after end of loop or start of loop) | ||
3419 | | lea RA, [BASE+RA*8] | ||
3420 | if (!vk) { | ||
3421 | | cmp FOR_TIDX, LJ_TISNUM; ja ->vmeta_for // Type checks | ||
3422 | | cmp FOR_TSTOP, LJ_TISNUM; ja ->vmeta_for | ||
3423 | } | ||
3424 | | mov RB, FOR_TSTEP // Load type/hiword of for step. | ||
3425 | if (!vk) { | ||
3426 | | cmp RB, LJ_TISNUM; ja ->vmeta_for | ||
3427 | } | ||
3428 | | fld FOR_STOP | ||
3429 | | fld FOR_IDX | ||
3430 | if (vk) { | ||
3431 | | fadd FOR_STEP // nidx = idx + step | ||
3432 | | fst FOR_IDX | ||
3433 | } | ||
3434 | | fst FOR_EXT | ||
3435 | | test RB, RB // Swap lim/(n)idx if step non-negative. | ||
3436 | | js >1 | ||
3437 | | fxch | ||
3438 | |1: | ||
3439 | | fcomparepp // eax (RD) modified if !cmov. | ||
3440 | if (!cmov) { | ||
3441 | | movzx RD, PC_RD // Need to reload RD. | ||
3442 | } | ||
3443 | if (op == BC_FORI) { | ||
3444 | | jnb >2 | ||
3445 | | branchPC RD | ||
3446 | } else if (op == BC_JFORI) { | ||
3447 | | branchPC RD | ||
3448 | | movzx RD, PC_RD | ||
3449 | | jnb =>BC_JLOOP | ||
3450 | } else if (op == BC_IFORL) { | ||
3451 | | jb >2 | ||
3452 | | branchPC RD | ||
3453 | } else { | ||
3454 | | jnb =>BC_JLOOP | ||
3455 | } | ||
3456 | |2: | ||
3457 | | ins_next | ||
3458 | break; | ||
3459 | |||
3460 | case BC_ITERL: | ||
3461 | #if LJ_HASJIT | ||
3462 | | hotloop RB | ||
3463 | #endif | ||
3464 | | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op. | ||
3465 | break; | ||
3466 | |||
3467 | case BC_JITERL: | ||
3468 | #if !LJ_HASJIT | ||
3469 | break; | ||
3470 | #endif | ||
3471 | case BC_IITERL: | ||
3472 | | ins_AJ // RA = base, RD = target | ||
3473 | | lea RA, [BASE+RA*8] | ||
3474 | | mov RB, [RA+4] | ||
3475 | | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil. | ||
3476 | if (op == BC_JITERL) { | ||
3477 | | mov [RA-4], RB | ||
3478 | | mov RB, [RA] | ||
3479 | | mov [RA-8], RB | ||
3480 | | jmp =>BC_JLOOP | ||
3481 | } else { | ||
3482 | | branchPC RD // Otherwise save control var + branch. | ||
3483 | | mov RD, [RA] | ||
3484 | | mov [RA-4], RB | ||
3485 | | mov [RA-8], RD | ||
3486 | } | ||
3487 | |1: | ||
3488 | | ins_next | ||
3489 | break; | ||
3490 | |||
3491 | case BC_LOOP: | ||
3492 | | ins_A // RA = base, RD = target (loop extent) | ||
3493 | | // Note: RA/RD is only used by trace recorder to determine scope/extent | ||
3494 | | // This opcode does NOT jump, it's only purpose is to detect a hot loop. | ||
3495 | #if LJ_HASJIT | ||
3496 | | hotloop RB | ||
3497 | #endif | ||
3498 | | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. | ||
3499 | break; | ||
3500 | |||
3501 | case BC_ILOOP: | ||
3502 | | ins_A // RA = base, RD = target (loop extent) | ||
3503 | | ins_next | ||
3504 | break; | ||
3505 | |||
3506 | case BC_JLOOP: | ||
3507 | #if LJ_HASJIT | ||
3508 | | ins_AD // RA = base (ignored), RD = traceno | ||
3509 | | mov RA, [DISPATCH+DISPATCH_J(trace)] | ||
3510 | | mov TRACE:RD, [RA+RD*4] | ||
3511 | | mov RD, TRACE:RD->mcode | ||
3512 | | mov L:RB, SAVE_L | ||
3513 | | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE | ||
3514 | | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB | ||
3515 | | jmp RD | ||
3516 | #endif | ||
3517 | break; | ||
3518 | |||
3519 | case BC_JMP: | ||
3520 | | ins_AJ // RA = unused, RD = target | ||
3521 | | branchPC RD | ||
3522 | | ins_next | ||
3523 | break; | ||
3524 | |||
3525 | /* ---------------------------------------------------------------------- */ | ||
3526 | |||
3527 | default: | ||
3528 | fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); | ||
3529 | exit(2); | ||
3530 | break; | ||
3531 | } | ||
3532 | } | ||
3533 | |||
3534 | static int build_backend(BuildCtx *ctx) | ||
3535 | { | ||
3536 | int op; | ||
3537 | int cmov = 1; | ||
3538 | #ifdef LUAJIT_CPU_NOCMOV | ||
3539 | cmov = 0; | ||
3540 | #endif | ||
3541 | |||
3542 | dasm_growpc(Dst, BC__MAX); | ||
3543 | |||
3544 | build_subroutines(ctx, cmov); | ||
3545 | |||
3546 | |.code_op | ||
3547 | for (op = 0; op < BC__MAX; op++) | ||
3548 | build_ins(ctx, (BCOp)op, op, cmov); | ||
3549 | |||
3550 | return BC__MAX; | ||
3551 | } | ||
3552 | |||
3553 | /* Emit pseudo frame-info for all assembler functions. */ | ||
3554 | static void emit_asm_debug(BuildCtx *ctx) | ||
3555 | { | ||
3556 | switch (ctx->mode) { | ||
3557 | case BUILD_elfasm: | ||
3558 | fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); | ||
3559 | fprintf(ctx->fp, | ||
3560 | ".Lframe0:\n" | ||
3561 | "\t.long .LECIE0-.LSCIE0\n" | ||
3562 | ".LSCIE0:\n" | ||
3563 | "\t.long 0xffffffff\n" | ||
3564 | "\t.byte 0x1\n" | ||
3565 | "\t.string \"\"\n" | ||
3566 | "\t.uleb128 0x1\n" | ||
3567 | "\t.sleb128 -4\n" | ||
3568 | "\t.byte 0x8\n" | ||
3569 | "\t.byte 0xc\n\t.uleb128 0x4\n\t.uleb128 0x4\n" | ||
3570 | "\t.byte 0x88\n\t.uleb128 0x1\n" | ||
3571 | "\t.align 4\n" | ||
3572 | ".LECIE0:\n\n"); | ||
3573 | fprintf(ctx->fp, | ||
3574 | ".LSFDE0:\n" | ||
3575 | "\t.long .LEFDE0-.LASFDE0\n" | ||
3576 | ".LASFDE0:\n" | ||
3577 | "\t.long .Lframe0\n" | ||
3578 | "\t.long .Lbegin\n" | ||
3579 | "\t.long %d\n" | ||
3580 | "\t.byte 0xe\n\t.uleb128 0x30\n" /* def_cfa_offset */ | ||
3581 | "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */ | ||
3582 | "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */ | ||
3583 | "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */ | ||
3584 | "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */ | ||
3585 | "\t.align 4\n" | ||
3586 | ".LEFDE0:\n\n", (int)ctx->codesz); | ||
3587 | break; | ||
3588 | default: /* Difficult for other modes. */ | ||
3589 | break; | ||
3590 | } | ||
3591 | } | ||
3592 | |||
diff --git a/src/lauxlib.h b/src/lauxlib.h new file mode 100644 index 00000000..505a9f52 --- /dev/null +++ b/src/lauxlib.h | |||
@@ -0,0 +1,159 @@ | |||
1 | /* | ||
2 | ** $Id: lauxlib.h,v 1.88.1.1 2007/12/27 13:02:25 roberto Exp $ | ||
3 | ** Auxiliary functions for building Lua libraries | ||
4 | ** See Copyright Notice in lua.h | ||
5 | */ | ||
6 | |||
7 | |||
8 | #ifndef lauxlib_h | ||
9 | #define lauxlib_h | ||
10 | |||
11 | |||
12 | #include <stddef.h> | ||
13 | #include <stdio.h> | ||
14 | |||
15 | #include "lua.h" | ||
16 | |||
17 | |||
18 | #define luaL_getn(L,i) ((int)lua_objlen(L, i)) | ||
19 | #define luaL_setn(L,i,j) ((void)0) /* no op! */ | ||
20 | |||
21 | /* extra error code for `luaL_load' */ | ||
22 | #define LUA_ERRFILE (LUA_ERRERR+1) | ||
23 | |||
24 | typedef struct luaL_Reg { | ||
25 | const char *name; | ||
26 | lua_CFunction func; | ||
27 | } luaL_Reg; | ||
28 | |||
29 | LUALIB_API void (luaL_openlib) (lua_State *L, const char *libname, | ||
30 | const luaL_Reg *l, int nup); | ||
31 | LUALIB_API void (luaL_register) (lua_State *L, const char *libname, | ||
32 | const luaL_Reg *l); | ||
33 | LUALIB_API int (luaL_getmetafield) (lua_State *L, int obj, const char *e); | ||
34 | LUALIB_API int (luaL_callmeta) (lua_State *L, int obj, const char *e); | ||
35 | LUALIB_API int (luaL_typerror) (lua_State *L, int narg, const char *tname); | ||
36 | LUALIB_API int (luaL_argerror) (lua_State *L, int numarg, const char *extramsg); | ||
37 | LUALIB_API const char *(luaL_checklstring) (lua_State *L, int numArg, | ||
38 | size_t *l); | ||
39 | LUALIB_API const char *(luaL_optlstring) (lua_State *L, int numArg, | ||
40 | const char *def, size_t *l); | ||
41 | LUALIB_API lua_Number (luaL_checknumber) (lua_State *L, int numArg); | ||
42 | LUALIB_API lua_Number (luaL_optnumber) (lua_State *L, int nArg, lua_Number def); | ||
43 | |||
44 | LUALIB_API lua_Integer (luaL_checkinteger) (lua_State *L, int numArg); | ||
45 | LUALIB_API lua_Integer (luaL_optinteger) (lua_State *L, int nArg, | ||
46 | lua_Integer def); | ||
47 | |||
48 | LUALIB_API void (luaL_checkstack) (lua_State *L, int sz, const char *msg); | ||
49 | LUALIB_API void (luaL_checktype) (lua_State *L, int narg, int t); | ||
50 | LUALIB_API void (luaL_checkany) (lua_State *L, int narg); | ||
51 | |||
52 | LUALIB_API int (luaL_newmetatable) (lua_State *L, const char *tname); | ||
53 | LUALIB_API void *(luaL_checkudata) (lua_State *L, int ud, const char *tname); | ||
54 | |||
55 | LUALIB_API void (luaL_where) (lua_State *L, int lvl); | ||
56 | LUALIB_API int (luaL_error) (lua_State *L, const char *fmt, ...); | ||
57 | |||
58 | LUALIB_API int (luaL_checkoption) (lua_State *L, int narg, const char *def, | ||
59 | const char *const lst[]); | ||
60 | |||
61 | LUALIB_API int (luaL_ref) (lua_State *L, int t); | ||
62 | LUALIB_API void (luaL_unref) (lua_State *L, int t, int ref); | ||
63 | |||
64 | LUALIB_API int (luaL_loadfile) (lua_State *L, const char *filename); | ||
65 | LUALIB_API int (luaL_loadbuffer) (lua_State *L, const char *buff, size_t sz, | ||
66 | const char *name); | ||
67 | LUALIB_API int (luaL_loadstring) (lua_State *L, const char *s); | ||
68 | |||
69 | LUALIB_API lua_State *(luaL_newstate) (void); | ||
70 | |||
71 | |||
72 | LUALIB_API const char *(luaL_gsub) (lua_State *L, const char *s, const char *p, | ||
73 | const char *r); | ||
74 | |||
75 | LUALIB_API const char *(luaL_findtable) (lua_State *L, int idx, | ||
76 | const char *fname, int szhint); | ||
77 | |||
78 | |||
79 | |||
80 | |||
81 | /* | ||
82 | ** =============================================================== | ||
83 | ** some useful macros | ||
84 | ** =============================================================== | ||
85 | */ | ||
86 | |||
87 | #define luaL_argcheck(L, cond,numarg,extramsg) \ | ||
88 | ((void)((cond) || luaL_argerror(L, (numarg), (extramsg)))) | ||
89 | #define luaL_checkstring(L,n) (luaL_checklstring(L, (n), NULL)) | ||
90 | #define luaL_optstring(L,n,d) (luaL_optlstring(L, (n), (d), NULL)) | ||
91 | #define luaL_checkint(L,n) ((int)luaL_checkinteger(L, (n))) | ||
92 | #define luaL_optint(L,n,d) ((int)luaL_optinteger(L, (n), (d))) | ||
93 | #define luaL_checklong(L,n) ((long)luaL_checkinteger(L, (n))) | ||
94 | #define luaL_optlong(L,n,d) ((long)luaL_optinteger(L, (n), (d))) | ||
95 | |||
96 | #define luaL_typename(L,i) lua_typename(L, lua_type(L,(i))) | ||
97 | |||
98 | #define luaL_dofile(L, fn) \ | ||
99 | (luaL_loadfile(L, fn) || lua_pcall(L, 0, LUA_MULTRET, 0)) | ||
100 | |||
101 | #define luaL_dostring(L, s) \ | ||
102 | (luaL_loadstring(L, s) || lua_pcall(L, 0, LUA_MULTRET, 0)) | ||
103 | |||
104 | #define luaL_getmetatable(L,n) (lua_getfield(L, LUA_REGISTRYINDEX, (n))) | ||
105 | |||
106 | #define luaL_opt(L,f,n,d) (lua_isnoneornil(L,(n)) ? (d) : f(L,(n))) | ||
107 | |||
108 | /* | ||
109 | ** {====================================================== | ||
110 | ** Generic Buffer manipulation | ||
111 | ** ======================================================= | ||
112 | */ | ||
113 | |||
114 | |||
115 | |||
116 | typedef struct luaL_Buffer { | ||
117 | char *p; /* current position in buffer */ | ||
118 | int lvl; /* number of strings in the stack (level) */ | ||
119 | lua_State *L; | ||
120 | char buffer[LUAL_BUFFERSIZE]; | ||
121 | } luaL_Buffer; | ||
122 | |||
123 | #define luaL_addchar(B,c) \ | ||
124 | ((void)((B)->p < ((B)->buffer+LUAL_BUFFERSIZE) || luaL_prepbuffer(B)), \ | ||
125 | (*(B)->p++ = (char)(c))) | ||
126 | |||
127 | /* compatibility only */ | ||
128 | #define luaL_putchar(B,c) luaL_addchar(B,c) | ||
129 | |||
130 | #define luaL_addsize(B,n) ((B)->p += (n)) | ||
131 | |||
132 | LUALIB_API void (luaL_buffinit) (lua_State *L, luaL_Buffer *B); | ||
133 | LUALIB_API char *(luaL_prepbuffer) (luaL_Buffer *B); | ||
134 | LUALIB_API void (luaL_addlstring) (luaL_Buffer *B, const char *s, size_t l); | ||
135 | LUALIB_API void (luaL_addstring) (luaL_Buffer *B, const char *s); | ||
136 | LUALIB_API void (luaL_addvalue) (luaL_Buffer *B); | ||
137 | LUALIB_API void (luaL_pushresult) (luaL_Buffer *B); | ||
138 | |||
139 | |||
140 | /* }====================================================== */ | ||
141 | |||
142 | |||
143 | /* compatibility with ref system */ | ||
144 | |||
145 | /* pre-defined references */ | ||
146 | #define LUA_NOREF (-2) | ||
147 | #define LUA_REFNIL (-1) | ||
148 | |||
149 | #define lua_ref(L,lock) ((lock) ? luaL_ref(L, LUA_REGISTRYINDEX) : \ | ||
150 | (lua_pushstring(L, "unlocked references are obsolete"), lua_error(L), 0)) | ||
151 | |||
152 | #define lua_unref(L,ref) luaL_unref(L, LUA_REGISTRYINDEX, (ref)) | ||
153 | |||
154 | #define lua_getref(L,ref) lua_rawgeti(L, LUA_REGISTRYINDEX, (ref)) | ||
155 | |||
156 | |||
157 | #define luaL_reg luaL_Reg | ||
158 | |||
159 | #endif | ||
diff --git a/src/lib_aux.c b/src/lib_aux.c new file mode 100644 index 00000000..1ae32dbc --- /dev/null +++ b/src/lib_aux.c | |||
@@ -0,0 +1,438 @@ | |||
1 | /* | ||
2 | ** Auxiliary library for the Lua/C API. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** | ||
5 | ** Major parts taken verbatim or adapted from the Lua interpreter. | ||
6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h | ||
7 | */ | ||
8 | |||
9 | #include <errno.h> | ||
10 | #include <stdarg.h> | ||
11 | #include <stdio.h> | ||
12 | |||
13 | #define lib_aux_c | ||
14 | #define LUA_LIB | ||
15 | |||
16 | #include "lua.h" | ||
17 | #include "lauxlib.h" | ||
18 | |||
19 | #include "lj_obj.h" | ||
20 | #include "lj_err.h" | ||
21 | #include "lj_lib.h" | ||
22 | |||
23 | /* convert a stack index to positive */ | ||
24 | #define abs_index(L, i) \ | ||
25 | ((i) > 0 || (i) <= LUA_REGISTRYINDEX ? (i) : lua_gettop(L) + (i) + 1) | ||
26 | |||
27 | /* -- Type checks --------------------------------------------------------- */ | ||
28 | |||
29 | LUALIB_API void luaL_checkstack(lua_State *L, int size, const char *msg) | ||
30 | { | ||
31 | if (!lua_checkstack(L, size)) | ||
32 | lj_err_callerv(L, LJ_ERR_STKOVM, msg); | ||
33 | } | ||
34 | |||
35 | LUALIB_API void luaL_checktype(lua_State *L, int narg, int tt) | ||
36 | { | ||
37 | if (lua_type(L, narg) != tt) | ||
38 | lj_err_argt(L, narg, tt); | ||
39 | } | ||
40 | |||
41 | LUALIB_API void luaL_checkany(lua_State *L, int narg) | ||
42 | { | ||
43 | lj_lib_checkany(L, narg); | ||
44 | } | ||
45 | |||
46 | LUALIB_API const char *luaL_checklstring(lua_State *L, int narg, size_t *len) | ||
47 | { | ||
48 | GCstr *s = lj_lib_checkstr(L, narg); | ||
49 | if (len != NULL) *len = s->len; | ||
50 | return strdata(s); | ||
51 | } | ||
52 | |||
53 | LUALIB_API const char *luaL_optlstring(lua_State *L, int narg, | ||
54 | const char *def, size_t *len) | ||
55 | { | ||
56 | GCstr *s = lj_lib_optstr(L, narg); | ||
57 | if (s) { | ||
58 | if (len != NULL) *len = s->len; | ||
59 | return strdata(s); | ||
60 | } | ||
61 | if (len != NULL) *len = def ? strlen(def) : 0; | ||
62 | return def; | ||
63 | } | ||
64 | |||
65 | LUALIB_API lua_Number luaL_checknumber(lua_State *L, int narg) | ||
66 | { | ||
67 | return lj_lib_checknum(L, narg); | ||
68 | } | ||
69 | |||
70 | LUALIB_API lua_Number luaL_optnumber(lua_State *L, int narg, lua_Number def) | ||
71 | { | ||
72 | lj_lib_opt(L, narg, | ||
73 | return lj_lib_checknum(L, narg); | ||
74 | , | ||
75 | return def; | ||
76 | ) | ||
77 | } | ||
78 | |||
79 | LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int narg) | ||
80 | { | ||
81 | #if LJ_64 | ||
82 | return (lua_Integer)lj_lib_checknum(L, narg); | ||
83 | #else | ||
84 | return lj_lib_checkint(L, narg); | ||
85 | #endif | ||
86 | } | ||
87 | |||
88 | LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int narg, lua_Integer def) | ||
89 | { | ||
90 | #if LJ_64 | ||
91 | lj_lib_opt(L, narg, | ||
92 | return (lua_Integer)lj_lib_checknum(L, narg); | ||
93 | , | ||
94 | return def; | ||
95 | ) | ||
96 | #else | ||
97 | return lj_lib_optint(L, narg, def); | ||
98 | #endif | ||
99 | } | ||
100 | |||
101 | LUALIB_API int luaL_checkoption(lua_State *L, int narg, const char *def, | ||
102 | const char *const lst[]) | ||
103 | { | ||
104 | GCstr *s = lj_lib_optstr(L, narg); | ||
105 | const char *opt = s ? strdata(s) : def; | ||
106 | uint32_t i; | ||
107 | if (!opt) lj_err_argt(L, narg, LUA_TSTRING); | ||
108 | for (i = 0; lst[i]; i++) | ||
109 | if (strcmp(lst[i], opt) == 0) | ||
110 | return (int)i; | ||
111 | lj_err_argv(L, narg, LJ_ERR_INVOPTM, opt); | ||
112 | } | ||
113 | |||
114 | /* -- Module registration ------------------------------------------------- */ | ||
115 | |||
116 | LUALIB_API const char *luaL_findtable(lua_State *L, int idx, | ||
117 | const char *fname, int szhint) | ||
118 | { | ||
119 | const char *e; | ||
120 | lua_pushvalue(L, idx); | ||
121 | do { | ||
122 | e = strchr(fname, '.'); | ||
123 | if (e == NULL) e = fname + strlen(fname); | ||
124 | lua_pushlstring(L, fname, (size_t)(e - fname)); | ||
125 | lua_rawget(L, -2); | ||
126 | if (lua_isnil(L, -1)) { /* no such field? */ | ||
127 | lua_pop(L, 1); /* remove this nil */ | ||
128 | lua_createtable(L, 0, (*e == '.' ? 1 : szhint)); /* new table for field */ | ||
129 | lua_pushlstring(L, fname, (size_t)(e - fname)); | ||
130 | lua_pushvalue(L, -2); | ||
131 | lua_settable(L, -4); /* set new table into field */ | ||
132 | } else if (!lua_istable(L, -1)) { /* field has a non-table value? */ | ||
133 | lua_pop(L, 2); /* remove table and value */ | ||
134 | return fname; /* return problematic part of the name */ | ||
135 | } | ||
136 | lua_remove(L, -2); /* remove previous table */ | ||
137 | fname = e + 1; | ||
138 | } while (*e == '.'); | ||
139 | return NULL; | ||
140 | } | ||
141 | |||
142 | static int libsize(const luaL_Reg *l) | ||
143 | { | ||
144 | int size = 0; | ||
145 | for (; l->name; l++) size++; | ||
146 | return size; | ||
147 | } | ||
148 | |||
149 | LUALIB_API void luaL_openlib(lua_State *L, const char *libname, | ||
150 | const luaL_Reg *l, int nup) | ||
151 | { | ||
152 | if (libname) { | ||
153 | int size = libsize(l); | ||
154 | /* check whether lib already exists */ | ||
155 | luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16); | ||
156 | lua_getfield(L, -1, libname); /* get _LOADED[libname] */ | ||
157 | if (!lua_istable(L, -1)) { /* not found? */ | ||
158 | lua_pop(L, 1); /* remove previous result */ | ||
159 | /* try global variable (and create one if it does not exist) */ | ||
160 | if (luaL_findtable(L, LUA_GLOBALSINDEX, libname, size) != NULL) | ||
161 | lj_err_callerv(L, LJ_ERR_BADMODN, libname); | ||
162 | lua_pushvalue(L, -1); | ||
163 | lua_setfield(L, -3, libname); /* _LOADED[libname] = new table */ | ||
164 | } | ||
165 | lua_remove(L, -2); /* remove _LOADED table */ | ||
166 | lua_insert(L, -(nup+1)); /* move library table to below upvalues */ | ||
167 | } | ||
168 | for (; l->name; l++) { | ||
169 | int i; | ||
170 | for (i = 0; i < nup; i++) /* copy upvalues to the top */ | ||
171 | lua_pushvalue(L, -nup); | ||
172 | lua_pushcclosure(L, l->func, nup); | ||
173 | lua_setfield(L, -(nup+2), l->name); | ||
174 | } | ||
175 | lua_pop(L, nup); /* remove upvalues */ | ||
176 | } | ||
177 | |||
178 | LUALIB_API void luaL_register(lua_State *L, const char *libname, | ||
179 | const luaL_Reg *l) | ||
180 | { | ||
181 | luaL_openlib(L, libname, l, 0); | ||
182 | } | ||
183 | |||
184 | LUALIB_API const char *luaL_gsub(lua_State *L, const char *s, | ||
185 | const char *p, const char *r) | ||
186 | { | ||
187 | const char *wild; | ||
188 | size_t l = strlen(p); | ||
189 | luaL_Buffer b; | ||
190 | luaL_buffinit(L, &b); | ||
191 | while ((wild = strstr(s, p)) != NULL) { | ||
192 | luaL_addlstring(&b, s, (size_t)(wild - s)); /* push prefix */ | ||
193 | luaL_addstring(&b, r); /* push replacement in place of pattern */ | ||
194 | s = wild + l; /* continue after `p' */ | ||
195 | } | ||
196 | luaL_addstring(&b, s); /* push last suffix */ | ||
197 | luaL_pushresult(&b); | ||
198 | return lua_tostring(L, -1); | ||
199 | } | ||
200 | |||
201 | /* -- Buffer handling ----------------------------------------------------- */ | ||
202 | |||
203 | #define bufflen(B) ((size_t)((B)->p - (B)->buffer)) | ||
204 | #define bufffree(B) ((size_t)(LUAL_BUFFERSIZE - bufflen(B))) | ||
205 | |||
206 | static int emptybuffer(luaL_Buffer *B) | ||
207 | { | ||
208 | size_t l = bufflen(B); | ||
209 | if (l == 0) | ||
210 | return 0; /* put nothing on stack */ | ||
211 | lua_pushlstring(B->L, B->buffer, l); | ||
212 | B->p = B->buffer; | ||
213 | B->lvl++; | ||
214 | return 1; | ||
215 | } | ||
216 | |||
217 | static void adjuststack(luaL_Buffer *B) | ||
218 | { | ||
219 | if (B->lvl > 1) { | ||
220 | lua_State *L = B->L; | ||
221 | int toget = 1; /* number of levels to concat */ | ||
222 | size_t toplen = lua_strlen(L, -1); | ||
223 | do { | ||
224 | size_t l = lua_strlen(L, -(toget+1)); | ||
225 | if (!(B->lvl - toget + 1 >= LUA_MINSTACK/2 || toplen > l)) | ||
226 | break; | ||
227 | toplen += l; | ||
228 | toget++; | ||
229 | } while (toget < B->lvl); | ||
230 | lua_concat(L, toget); | ||
231 | B->lvl = B->lvl - toget + 1; | ||
232 | } | ||
233 | } | ||
234 | |||
235 | LUALIB_API char *luaL_prepbuffer(luaL_Buffer *B) | ||
236 | { | ||
237 | if (emptybuffer(B)) | ||
238 | adjuststack(B); | ||
239 | return B->buffer; | ||
240 | } | ||
241 | |||
242 | LUALIB_API void luaL_addlstring(luaL_Buffer *B, const char *s, size_t l) | ||
243 | { | ||
244 | while (l--) | ||
245 | luaL_addchar(B, *s++); | ||
246 | } | ||
247 | |||
248 | LUALIB_API void luaL_addstring(luaL_Buffer *B, const char *s) | ||
249 | { | ||
250 | luaL_addlstring(B, s, strlen(s)); | ||
251 | } | ||
252 | |||
253 | LUALIB_API void luaL_pushresult(luaL_Buffer *B) | ||
254 | { | ||
255 | emptybuffer(B); | ||
256 | lua_concat(B->L, B->lvl); | ||
257 | B->lvl = 1; | ||
258 | } | ||
259 | |||
260 | LUALIB_API void luaL_addvalue(luaL_Buffer *B) | ||
261 | { | ||
262 | lua_State *L = B->L; | ||
263 | size_t vl; | ||
264 | const char *s = lua_tolstring(L, -1, &vl); | ||
265 | if (vl <= bufffree(B)) { /* fit into buffer? */ | ||
266 | memcpy(B->p, s, vl); /* put it there */ | ||
267 | B->p += vl; | ||
268 | lua_pop(L, 1); /* remove from stack */ | ||
269 | } else { | ||
270 | if (emptybuffer(B)) | ||
271 | lua_insert(L, -2); /* put buffer before new value */ | ||
272 | B->lvl++; /* add new value into B stack */ | ||
273 | adjuststack(B); | ||
274 | } | ||
275 | } | ||
276 | |||
277 | LUALIB_API void luaL_buffinit(lua_State *L, luaL_Buffer *B) | ||
278 | { | ||
279 | B->L = L; | ||
280 | B->p = B->buffer; | ||
281 | B->lvl = 0; | ||
282 | } | ||
283 | |||
284 | /* -- Reference management ------------------------------------------------ */ | ||
285 | |||
286 | #define FREELIST_REF 0 | ||
287 | |||
288 | LUALIB_API int luaL_ref(lua_State *L, int t) | ||
289 | { | ||
290 | int ref; | ||
291 | t = abs_index(L, t); | ||
292 | if (lua_isnil(L, -1)) { | ||
293 | lua_pop(L, 1); /* remove from stack */ | ||
294 | return LUA_REFNIL; /* `nil' has a unique fixed reference */ | ||
295 | } | ||
296 | lua_rawgeti(L, t, FREELIST_REF); /* get first free element */ | ||
297 | ref = (int)lua_tointeger(L, -1); /* ref = t[FREELIST_REF] */ | ||
298 | lua_pop(L, 1); /* remove it from stack */ | ||
299 | if (ref != 0) { /* any free element? */ | ||
300 | lua_rawgeti(L, t, ref); /* remove it from list */ | ||
301 | lua_rawseti(L, t, FREELIST_REF); /* (t[FREELIST_REF] = t[ref]) */ | ||
302 | } else { /* no free elements */ | ||
303 | ref = (int)lua_objlen(L, t); | ||
304 | ref++; /* create new reference */ | ||
305 | } | ||
306 | lua_rawseti(L, t, ref); | ||
307 | return ref; | ||
308 | } | ||
309 | |||
310 | LUALIB_API void luaL_unref(lua_State *L, int t, int ref) | ||
311 | { | ||
312 | if (ref >= 0) { | ||
313 | t = abs_index(L, t); | ||
314 | lua_rawgeti(L, t, FREELIST_REF); | ||
315 | lua_rawseti(L, t, ref); /* t[ref] = t[FREELIST_REF] */ | ||
316 | lua_pushinteger(L, ref); | ||
317 | lua_rawseti(L, t, FREELIST_REF); /* t[FREELIST_REF] = ref */ | ||
318 | } | ||
319 | } | ||
320 | |||
321 | /* -- Load Lua code ------------------------------------------------------- */ | ||
322 | |||
323 | typedef struct FileReaderCtx { | ||
324 | FILE *fp; | ||
325 | char buf[LUAL_BUFFERSIZE]; | ||
326 | } FileReaderCtx; | ||
327 | |||
328 | static const char *reader_file(lua_State *L, void *ud, size_t *size) | ||
329 | { | ||
330 | FileReaderCtx *ctx = (FileReaderCtx *)ud; | ||
331 | UNUSED(L); | ||
332 | if (feof(ctx->fp)) return NULL; | ||
333 | *size = fread(ctx->buf, 1, sizeof(ctx->buf), ctx->fp); | ||
334 | return *size > 0 ? ctx->buf : NULL; | ||
335 | } | ||
336 | |||
337 | LUALIB_API int luaL_loadfile(lua_State *L, const char *filename) | ||
338 | { | ||
339 | FileReaderCtx ctx; | ||
340 | int status; | ||
341 | const char *chunkname; | ||
342 | if (filename) { | ||
343 | ctx.fp = fopen(filename, "r"); | ||
344 | if (ctx.fp == NULL) { | ||
345 | lua_pushfstring(L, "cannot open %s: %s", filename, strerror(errno)); | ||
346 | return LUA_ERRFILE; | ||
347 | } | ||
348 | chunkname = lua_pushfstring(L, "@%s", filename); | ||
349 | } else { | ||
350 | ctx.fp = stdin; | ||
351 | chunkname = "=stdin"; | ||
352 | } | ||
353 | status = lua_load(L, reader_file, &ctx, chunkname); | ||
354 | if (ferror(ctx.fp)) { | ||
355 | L->top -= filename ? 2 : 1; | ||
356 | lua_pushfstring(L, "cannot read %s: %s", chunkname+1, strerror(errno)); | ||
357 | if (filename) | ||
358 | fclose(ctx.fp); | ||
359 | return LUA_ERRFILE; | ||
360 | } | ||
361 | if (filename) { | ||
362 | L->top--; | ||
363 | copyTV(L, L->top-1, L->top); | ||
364 | fclose(ctx.fp); | ||
365 | } | ||
366 | return status; | ||
367 | } | ||
368 | |||
369 | typedef struct StringReaderCtx { | ||
370 | const char *str; | ||
371 | size_t size; | ||
372 | } StringReaderCtx; | ||
373 | |||
374 | static const char *reader_string(lua_State *L, void *ud, size_t *size) | ||
375 | { | ||
376 | StringReaderCtx *ctx = (StringReaderCtx *)ud; | ||
377 | UNUSED(L); | ||
378 | if (ctx->size == 0) return NULL; | ||
379 | *size = ctx->size; | ||
380 | ctx->size = 0; | ||
381 | return ctx->str; | ||
382 | } | ||
383 | |||
384 | LUALIB_API int luaL_loadbuffer(lua_State *L, const char *buf, size_t size, | ||
385 | const char *name) | ||
386 | { | ||
387 | StringReaderCtx ctx; | ||
388 | ctx.str = buf; | ||
389 | ctx.size = size; | ||
390 | return lua_load(L, reader_string, &ctx, name); | ||
391 | } | ||
392 | |||
393 | LUALIB_API int luaL_loadstring(lua_State *L, const char *s) | ||
394 | { | ||
395 | return luaL_loadbuffer(L, s, strlen(s), s); | ||
396 | } | ||
397 | |||
398 | /* -- Default allocator and panic function -------------------------------- */ | ||
399 | |||
400 | #ifdef LUAJIT_USE_SYSMALLOC | ||
401 | |||
402 | static void *mem_alloc(void *ud, void *ptr, size_t osize, size_t nsize) | ||
403 | { | ||
404 | (void)ud; | ||
405 | (void)osize; | ||
406 | if (nsize == 0) { | ||
407 | free(ptr); | ||
408 | return NULL; | ||
409 | } else { | ||
410 | return realloc(ptr, nsize); | ||
411 | } | ||
412 | } | ||
413 | |||
414 | #define mem_create() NULL | ||
415 | |||
416 | #else | ||
417 | |||
418 | #include "lj_alloc.h" | ||
419 | |||
420 | #define mem_alloc lj_alloc_f | ||
421 | #define mem_create lj_alloc_create | ||
422 | |||
423 | #endif | ||
424 | |||
425 | static int panic(lua_State *L) | ||
426 | { | ||
427 | fprintf(stderr, "PANIC: unprotected error in call to Lua API (%s)\n", | ||
428 | lua_tostring(L, -1)); | ||
429 | return 0; | ||
430 | } | ||
431 | |||
432 | LUALIB_API lua_State *luaL_newstate(void) | ||
433 | { | ||
434 | lua_State *L = lua_newstate(mem_alloc, mem_create()); | ||
435 | if (L) G(L)->panic = panic; | ||
436 | return L; | ||
437 | } | ||
438 | |||
diff --git a/src/lib_base.c b/src/lib_base.c new file mode 100644 index 00000000..6b9e8eef --- /dev/null +++ b/src/lib_base.c | |||
@@ -0,0 +1,560 @@ | |||
1 | /* | ||
2 | ** Base and coroutine library. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** | ||
5 | ** Major portions taken verbatim or adapted from the Lua interpreter. | ||
6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h | ||
7 | */ | ||
8 | |||
9 | #include <stdio.h> | ||
10 | |||
11 | #define lib_base_c | ||
12 | #define LUA_LIB | ||
13 | |||
14 | #include "lua.h" | ||
15 | #include "lauxlib.h" | ||
16 | #include "lualib.h" | ||
17 | |||
18 | #include "lj_obj.h" | ||
19 | #include "lj_gc.h" | ||
20 | #include "lj_err.h" | ||
21 | #include "lj_str.h" | ||
22 | #include "lj_tab.h" | ||
23 | #include "lj_meta.h" | ||
24 | #include "lj_state.h" | ||
25 | #include "lj_ff.h" | ||
26 | #include "lj_ctype.h" | ||
27 | #include "lj_lib.h" | ||
28 | |||
29 | /* -- Base library: checks ------------------------------------------------ */ | ||
30 | |||
31 | #define LJLIB_MODULE_base | ||
32 | |||
33 | LJLIB_ASM(assert) LJLIB_REC(.) | ||
34 | { | ||
35 | GCstr *s; | ||
36 | lj_lib_checkany(L, 1); | ||
37 | s = lj_lib_optstr(L, 2); | ||
38 | if (s) | ||
39 | lj_err_callermsg(L, strdata(s)); | ||
40 | else | ||
41 | lj_err_caller(L, LJ_ERR_ASSERT); | ||
42 | return FFH_UNREACHABLE; | ||
43 | } | ||
44 | |||
45 | /* ORDER LJ_T */ | ||
46 | LJLIB_PUSH("nil") | ||
47 | LJLIB_PUSH("boolean") | ||
48 | LJLIB_PUSH(top-1) /* boolean */ | ||
49 | LJLIB_PUSH("userdata") | ||
50 | LJLIB_PUSH("string") | ||
51 | LJLIB_PUSH("upval") | ||
52 | LJLIB_PUSH("thread") | ||
53 | LJLIB_PUSH("proto") | ||
54 | LJLIB_PUSH("function") | ||
55 | LJLIB_PUSH("deadkey") | ||
56 | LJLIB_PUSH("table") | ||
57 | LJLIB_PUSH(top-8) /* userdata */ | ||
58 | LJLIB_PUSH("number") | ||
59 | LJLIB_ASM_(type) LJLIB_REC(.) | ||
60 | /* Recycle the lj_lib_checkany(L, 1) from assert. */ | ||
61 | |||
62 | /* -- Base library: getters and setters ----------------------------------- */ | ||
63 | |||
64 | LJLIB_ASM_(getmetatable) LJLIB_REC(.) | ||
65 | /* Recycle the lj_lib_checkany(L, 1) from assert. */ | ||
66 | |||
67 | LJLIB_ASM(setmetatable) LJLIB_REC(.) | ||
68 | { | ||
69 | GCtab *t = lj_lib_checktab(L, 1); | ||
70 | GCtab *mt = lj_lib_checktabornil(L, 2); | ||
71 | if (!tvisnil(lj_meta_lookup(L, L->base, MM_metatable))) | ||
72 | lj_err_caller(L, LJ_ERR_PROTMT); | ||
73 | setgcref(t->metatable, obj2gco(mt)); | ||
74 | if (mt) { lj_gc_objbarriert(L, t, mt); } | ||
75 | settabV(L, L->base-1, t); | ||
76 | return FFH_RES(1); | ||
77 | } | ||
78 | |||
79 | LJLIB_CF(getfenv) | ||
80 | { | ||
81 | GCfunc *fn; | ||
82 | cTValue *o = L->base; | ||
83 | if (!(o < L->top && tvisfunc(o))) { | ||
84 | int level = lj_lib_optint(L, 1, 1); | ||
85 | o = lj_err_getframe(L, level, &level); | ||
86 | if (o == NULL) | ||
87 | lj_err_arg(L, 1, LJ_ERR_INVLVL); | ||
88 | } | ||
89 | fn = &gcval(o)->fn; | ||
90 | settabV(L, L->top++, isluafunc(fn) ? tabref(fn->l.env) : tabref(L->env)); | ||
91 | return 1; | ||
92 | } | ||
93 | |||
94 | LJLIB_CF(setfenv) | ||
95 | { | ||
96 | GCfunc *fn; | ||
97 | GCtab *t = lj_lib_checktab(L, 2); | ||
98 | cTValue *o = L->base; | ||
99 | if (!(o < L->top && tvisfunc(o))) { | ||
100 | int level = lj_lib_checkint(L, 1); | ||
101 | if (level == 0) { | ||
102 | /* NOBARRIER: A thread (i.e. L) is never black. */ | ||
103 | setgcref(L->env, obj2gco(t)); | ||
104 | return 0; | ||
105 | } | ||
106 | o = lj_err_getframe(L, level, &level); | ||
107 | if (o == NULL) | ||
108 | lj_err_arg(L, 1, LJ_ERR_INVLVL); | ||
109 | } | ||
110 | fn = &gcval(o)->fn; | ||
111 | if (!isluafunc(fn)) | ||
112 | lj_err_caller(L, LJ_ERR_SETFENV); | ||
113 | setgcref(fn->l.env, obj2gco(t)); | ||
114 | lj_gc_objbarrier(L, obj2gco(fn), t); | ||
115 | setfuncV(L, L->top++, fn); | ||
116 | return 1; | ||
117 | } | ||
118 | |||
119 | LJLIB_ASM(rawget) LJLIB_REC(.) | ||
120 | { | ||
121 | lj_lib_checktab(L, 1); | ||
122 | lj_lib_checkany(L, 2); | ||
123 | return FFH_UNREACHABLE; | ||
124 | } | ||
125 | |||
126 | LJLIB_CF(rawset) LJLIB_REC(.) | ||
127 | { | ||
128 | lj_lib_checktab(L, 1); | ||
129 | lj_lib_checkany(L, 2); | ||
130 | L->top = 1+lj_lib_checkany(L, 3); | ||
131 | lua_rawset(L, 1); | ||
132 | return 1; | ||
133 | } | ||
134 | |||
135 | LJLIB_CF(rawequal) LJLIB_REC(.) | ||
136 | { | ||
137 | cTValue *o1 = lj_lib_checkany(L, 1); | ||
138 | cTValue *o2 = lj_lib_checkany(L, 2); | ||
139 | setboolV(L->top-1, lj_obj_equal(o1, o2)); | ||
140 | return 1; | ||
141 | } | ||
142 | |||
143 | LJLIB_CF(unpack) | ||
144 | { | ||
145 | GCtab *t = lj_lib_checktab(L, 1); | ||
146 | int32_t n, i = lj_lib_optint(L, 2, 1); | ||
147 | int32_t e = (L->base+3-1 < L->top && !tvisnil(L->base+3-1)) ? | ||
148 | lj_lib_checkint(L, 3) : (int32_t)lj_tab_len(t); | ||
149 | if (i > e) return 0; | ||
150 | n = e - i + 1; | ||
151 | if (n <= 0 || !lua_checkstack(L, n)) | ||
152 | lj_err_caller(L, LJ_ERR_UNPACK); | ||
153 | do { | ||
154 | cTValue *tv = lj_tab_getint(t, i); | ||
155 | if (tv) { | ||
156 | copyTV(L, L->top++, tv); | ||
157 | } else { | ||
158 | setnilV(L->top++); | ||
159 | } | ||
160 | } while (i++ < e); | ||
161 | return n; | ||
162 | } | ||
163 | |||
164 | LJLIB_CF(select) | ||
165 | { | ||
166 | int32_t n = (int32_t)(L->top - L->base); | ||
167 | if (n >= 1 && tvisstr(L->base) && *strVdata(L->base) == '#') { | ||
168 | setintV(L->top-1, n-1); | ||
169 | return 1; | ||
170 | } else { | ||
171 | int32_t i = lj_lib_checkint(L, 1); | ||
172 | if (i < 0) i = n + i; else if (i > n) i = n; | ||
173 | if (i < 1) | ||
174 | lj_err_arg(L, 1, LJ_ERR_IDXRNG); | ||
175 | return n - i; | ||
176 | } | ||
177 | } | ||
178 | |||
179 | /* -- Base library: conversions ------------------------------------------- */ | ||
180 | |||
181 | LJLIB_ASM(tonumber) LJLIB_REC(.) | ||
182 | { | ||
183 | int32_t base = lj_lib_optint(L, 2, 10); | ||
184 | if (base == 10) { | ||
185 | TValue *o = lj_lib_checkany(L, 1); | ||
186 | if (tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), o))) { | ||
187 | setnumV(L->base-1, numV(o)); | ||
188 | return FFH_RES(1); | ||
189 | } | ||
190 | } else { | ||
191 | const char *p = strdata(lj_lib_checkstr(L, 1)); | ||
192 | char *ep; | ||
193 | unsigned long ul; | ||
194 | if (base < 2 || base > 36) | ||
195 | lj_err_arg(L, 2, LJ_ERR_BASERNG); | ||
196 | ul = strtoul(p, &ep, base); | ||
197 | if (p != ep) { | ||
198 | while (lj_ctype_isspace((unsigned char)(*ep))) ep++; | ||
199 | if (*ep == '\0') { | ||
200 | setnumV(L->base-1, cast_num(ul)); | ||
201 | return FFH_RES(1); | ||
202 | } | ||
203 | } | ||
204 | } | ||
205 | setnilV(L->base-1); | ||
206 | return FFH_RES(1); | ||
207 | } | ||
208 | |||
209 | LJLIB_ASM(tostring) LJLIB_REC(.) | ||
210 | { | ||
211 | TValue *o = lj_lib_checkany(L, 1); | ||
212 | cTValue *mo; | ||
213 | L->top = o+1; /* Only keep one argument. */ | ||
214 | if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { | ||
215 | copyTV(L, L->base-1, mo); /* Replace callable. */ | ||
216 | return FFH_RETRY; | ||
217 | } else { | ||
218 | GCstr *s; | ||
219 | if (tvisnum(o)) { | ||
220 | s = lj_str_fromnum(L, &o->n); | ||
221 | } else if (tvisnil(o)) { | ||
222 | s = lj_str_newlit(L, "nil"); | ||
223 | } else if (tvisfalse(o)) { | ||
224 | s = lj_str_newlit(L, "false"); | ||
225 | } else if (tvistrue(o)) { | ||
226 | s = lj_str_newlit(L, "true"); | ||
227 | } else { | ||
228 | if (tvisfunc(o) && isffunc(funcV(o))) | ||
229 | lua_pushfstring(L, "function: fast#%d", funcV(o)->c.ffid); | ||
230 | else | ||
231 | lua_pushfstring(L, "%s: %p", typename(o), lua_topointer(L, 1)); | ||
232 | /* Note: lua_pushfstring calls the GC which may invalidate o. */ | ||
233 | s = strV(L->top-1); | ||
234 | } | ||
235 | setstrV(L, L->base-1, s); | ||
236 | return FFH_RES(1); | ||
237 | } | ||
238 | } | ||
239 | |||
240 | /* -- Base library: iterators --------------------------------------------- */ | ||
241 | |||
242 | LJLIB_ASM(next) | ||
243 | { | ||
244 | lj_lib_checktab(L, 1); | ||
245 | lj_lib_checknum(L, 2); /* For ipairs_aux. */ | ||
246 | return FFH_UNREACHABLE; | ||
247 | } | ||
248 | |||
249 | LJLIB_PUSH(lastcl) | ||
250 | LJLIB_ASM_(pairs) | ||
251 | |||
252 | LJLIB_NOREGUV LJLIB_ASM_(ipairs_aux) LJLIB_REC(.) | ||
253 | |||
254 | LJLIB_PUSH(lastcl) | ||
255 | LJLIB_ASM_(ipairs) LJLIB_REC(.) | ||
256 | |||
257 | /* -- Base library: throw and catch errors -------------------------------- */ | ||
258 | |||
259 | LJLIB_CF(error) | ||
260 | { | ||
261 | int32_t level = lj_lib_optint(L, 2, 1); | ||
262 | lua_settop(L, 1); | ||
263 | if (lua_isstring(L, 1) && level > 0) { | ||
264 | luaL_where(L, level); | ||
265 | lua_pushvalue(L, 1); | ||
266 | lua_concat(L, 2); | ||
267 | } | ||
268 | return lua_error(L); | ||
269 | } | ||
270 | |||
271 | LJLIB_ASM(pcall) LJLIB_REC(.) | ||
272 | { | ||
273 | lj_lib_checkany(L, 1); | ||
274 | lj_lib_checkfunc(L, 2); /* For xpcall only. */ | ||
275 | return FFH_UNREACHABLE; | ||
276 | } | ||
277 | LJLIB_ASM_(xpcall) LJLIB_REC(.) | ||
278 | |||
279 | /* -- Base library: load Lua code ----------------------------------------- */ | ||
280 | |||
281 | static int load_aux(lua_State *L, int status) | ||
282 | { | ||
283 | if (status == 0) | ||
284 | return 1; | ||
285 | copyTV(L, L->top, L->top-1); | ||
286 | setnilV(L->top-1); | ||
287 | L->top++; | ||
288 | return 2; | ||
289 | } | ||
290 | |||
291 | LJLIB_CF(loadstring) | ||
292 | { | ||
293 | GCstr *s = lj_lib_checkstr(L, 1); | ||
294 | GCstr *name = lj_lib_optstr(L, 2); | ||
295 | return load_aux(L, | ||
296 | luaL_loadbuffer(L, strdata(s), s->len, strdata(name ? name : s))); | ||
297 | } | ||
298 | |||
299 | LJLIB_CF(loadfile) | ||
300 | { | ||
301 | GCstr *fname = lj_lib_optstr(L, 1); | ||
302 | return load_aux(L, luaL_loadfile(L, fname ? strdata(fname) : NULL)); | ||
303 | } | ||
304 | |||
305 | static const char *reader_func(lua_State *L, void *ud, size_t *size) | ||
306 | { | ||
307 | UNUSED(ud); | ||
308 | luaL_checkstack(L, 2, "too many nested functions"); | ||
309 | copyTV(L, L->top++, L->base); | ||
310 | lua_call(L, 0, 1); /* Call user-supplied function. */ | ||
311 | L->top--; | ||
312 | if (tvisnil(L->top)) { | ||
313 | *size = 0; | ||
314 | return NULL; | ||
315 | } else if (tvisstr(L->top) || tvisnum(L->top)) { | ||
316 | copyTV(L, L->base+2, L->top); /* Anchor string in reserved stack slot. */ | ||
317 | return lua_tolstring(L, 3, size); | ||
318 | } else { | ||
319 | lj_err_caller(L, LJ_ERR_RDRSTR); | ||
320 | return NULL; | ||
321 | } | ||
322 | } | ||
323 | |||
324 | LJLIB_CF(load) | ||
325 | { | ||
326 | GCstr *name = lj_lib_optstr(L, 2); | ||
327 | lj_lib_checkfunc(L, 1); | ||
328 | lua_settop(L, 3); /* Reserve a slot for the string from the reader. */ | ||
329 | return load_aux(L, | ||
330 | lua_load(L, reader_func, NULL, name ? strdata(name) : "=(load)")); | ||
331 | } | ||
332 | |||
333 | LJLIB_CF(dofile) | ||
334 | { | ||
335 | GCstr *fname = lj_lib_optstr(L, 1); | ||
336 | setnilV(L->top); | ||
337 | L->top = L->base+1; | ||
338 | if (luaL_loadfile(L, fname ? strdata(fname) : NULL) != 0) | ||
339 | lua_error(L); | ||
340 | lua_call(L, 0, LUA_MULTRET); | ||
341 | return (L->top - L->base) - 1; | ||
342 | } | ||
343 | |||
344 | /* -- Base library: GC control -------------------------------------------- */ | ||
345 | |||
346 | LJLIB_CF(gcinfo) | ||
347 | { | ||
348 | setintV(L->top++, (G(L)->gc.total >> 10)); | ||
349 | return 1; | ||
350 | } | ||
351 | |||
352 | LJLIB_CF(collectgarbage) | ||
353 | { | ||
354 | int opt = lj_lib_checkopt(L, 1, LUA_GCCOLLECT, /* ORDER LUA_GC* */ | ||
355 | "\4stop\7restart\7collect\5count\1\377\4step\10setpause\12setstepmul"); | ||
356 | int32_t data = lj_lib_optint(L, 2, 0); | ||
357 | if (opt == LUA_GCCOUNT) { | ||
358 | setnumV(L->top-1, cast_num((int32_t)G(L)->gc.total)/1024.0); | ||
359 | } else { | ||
360 | int res = lua_gc(L, opt, data); | ||
361 | if (opt == LUA_GCSTEP) | ||
362 | setboolV(L->top-1, res); | ||
363 | else | ||
364 | setintV(L->top-1, res); | ||
365 | } | ||
366 | return 1; | ||
367 | } | ||
368 | |||
369 | /* -- Base library: miscellaneous functions ------------------------------- */ | ||
370 | |||
371 | LJLIB_PUSH(top-2) /* Upvalue holds weak table. */ | ||
372 | LJLIB_CF(newproxy) | ||
373 | { | ||
374 | lua_settop(L, 1); | ||
375 | lua_newuserdata(L, 0); | ||
376 | if (lua_toboolean(L, 1) == 0) { /* newproxy(): without metatable. */ | ||
377 | return 1; | ||
378 | } else if (lua_isboolean(L, 1)) { /* newproxy(true): with metatable. */ | ||
379 | lua_newtable(L); | ||
380 | lua_pushvalue(L, -1); | ||
381 | lua_pushboolean(L, 1); | ||
382 | lua_rawset(L, lua_upvalueindex(1)); /* Remember mt in weak table. */ | ||
383 | } else { /* newproxy(proxy): inherit metatable. */ | ||
384 | int validproxy = 0; | ||
385 | if (lua_getmetatable(L, 1)) { | ||
386 | lua_rawget(L, lua_upvalueindex(1)); | ||
387 | validproxy = lua_toboolean(L, -1); | ||
388 | lua_pop(L, 1); | ||
389 | } | ||
390 | if (!validproxy) | ||
391 | lj_err_arg(L, 1, LJ_ERR_NOPROXY); | ||
392 | lua_getmetatable(L, 1); | ||
393 | } | ||
394 | lua_setmetatable(L, 2); | ||
395 | return 1; | ||
396 | } | ||
397 | |||
398 | LJLIB_PUSH("tostring") | ||
399 | LJLIB_CF(print) | ||
400 | { | ||
401 | ptrdiff_t i, nargs = L->top - L->base; | ||
402 | cTValue *tv = lj_tab_getstr(tabref(L->env), strV(lj_lib_upvalue(L, 1))); | ||
403 | int shortcut = (tv && tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring); | ||
404 | copyTV(L, L->top++, tv ? tv : niltv(L)); | ||
405 | for (i = 0; i < nargs; i++) { | ||
406 | const char *str; | ||
407 | size_t size; | ||
408 | cTValue *o = &L->base[i]; | ||
409 | if (shortcut && tvisstr(o)) { | ||
410 | str = strVdata(o); | ||
411 | size = strV(o)->len; | ||
412 | } else if (shortcut && tvisnum(o)) { | ||
413 | char buf[LUAI_MAXNUMBER2STR]; | ||
414 | lua_Number n = numV(o); | ||
415 | size = (size_t)lua_number2str(buf, n); | ||
416 | str = buf; | ||
417 | } else { | ||
418 | copyTV(L, L->top+1, o); | ||
419 | copyTV(L, L->top, L->top-1); | ||
420 | L->top += 2; | ||
421 | lua_call(L, 1, 1); | ||
422 | str = lua_tolstring(L, -1, &size); | ||
423 | if (!str) | ||
424 | lj_err_caller(L, LJ_ERR_PRTOSTR); | ||
425 | L->top--; | ||
426 | } | ||
427 | if (i) | ||
428 | putchar('\t'); | ||
429 | fwrite(str, 1, size, stdout); | ||
430 | } | ||
431 | putchar('\n'); | ||
432 | return 0; | ||
433 | } | ||
434 | |||
435 | LJLIB_PUSH(top-3) | ||
436 | LJLIB_SET(_VERSION) | ||
437 | |||
438 | #include "lj_libdef.h" | ||
439 | |||
440 | /* -- Coroutine library --------------------------------------------------- */ | ||
441 | |||
442 | #define LJLIB_MODULE_coroutine | ||
443 | |||
444 | LJLIB_CF(coroutine_status) | ||
445 | { | ||
446 | const char *s; | ||
447 | lua_State *co; | ||
448 | if (!(L->top > L->base && tvisthread(L->base))) | ||
449 | lj_err_arg(L, 1, LJ_ERR_NOCORO); | ||
450 | co = threadV(L->base); | ||
451 | if (co == L) s = "running"; | ||
452 | else if (co->status == LUA_YIELD) s = "suspended"; | ||
453 | else if (co->status != 0) s = "dead"; | ||
454 | else if (co->base > co->stack+1) s = "normal"; | ||
455 | else if (co->top == co->base) s = "dead"; | ||
456 | else s = "suspended"; | ||
457 | lua_pushstring(L, s); | ||
458 | return 1; | ||
459 | } | ||
460 | |||
461 | LJLIB_CF(coroutine_running) | ||
462 | { | ||
463 | if (lua_pushthread(L)) | ||
464 | setnilV(L->top++); | ||
465 | return 1; | ||
466 | } | ||
467 | |||
468 | LJLIB_CF(coroutine_create) | ||
469 | { | ||
470 | lua_State *L1 = lua_newthread(L); | ||
471 | if (!(L->top > L->base && tvisfunc(L->base) && isluafunc(funcV(L->base)))) | ||
472 | lj_err_arg(L, 1, LJ_ERR_NOLFUNC); | ||
473 | setfuncV(L, L1->top++, funcV(L->base)); | ||
474 | return 1; | ||
475 | } | ||
476 | |||
477 | LJLIB_ASM(coroutine_yield) | ||
478 | { | ||
479 | lj_err_caller(L, LJ_ERR_CYIELD); | ||
480 | return FFH_UNREACHABLE; | ||
481 | } | ||
482 | |||
483 | static int ffh_resume(lua_State *L, lua_State *co, int wrap) | ||
484 | { | ||
485 | if (co->cframe != NULL || co->status > LUA_YIELD || | ||
486 | (co->status == 0 && co->top == co->base)) { | ||
487 | ErrMsg em = co->cframe ? LJ_ERR_CORUN : LJ_ERR_CODEAD; | ||
488 | if (wrap) lj_err_caller(L, em); | ||
489 | setboolV(L->base-1, 0); | ||
490 | setstrV(L, L->base, lj_err_str(L, em)); | ||
491 | return FFH_RES(2); | ||
492 | } | ||
493 | lj_state_growstack(co, (MSize)(L->top - L->base - 1)); | ||
494 | return FFH_RETRY; | ||
495 | } | ||
496 | |||
497 | LJLIB_ASM(coroutine_resume) | ||
498 | { | ||
499 | if (!(L->top > L->base && tvisthread(L->base))) | ||
500 | lj_err_arg(L, 1, LJ_ERR_NOCORO); | ||
501 | return ffh_resume(L, threadV(L->base), 0); | ||
502 | } | ||
503 | |||
504 | LJLIB_NOREG LJLIB_ASM(coroutine_wrap_aux) | ||
505 | { | ||
506 | return ffh_resume(L, threadV(lj_lib_upvalue(L, 1)), 1); | ||
507 | } | ||
508 | |||
509 | /* Inline declarations. */ | ||
510 | LJ_ASMF void lj_ff_coroutine_wrap_aux(void); | ||
511 | LJ_FUNCA_NORET void lj_ffh_coroutine_wrap_err(lua_State *L, lua_State *co); | ||
512 | |||
513 | /* Error handler, called from assembler VM. */ | ||
514 | void lj_ffh_coroutine_wrap_err(lua_State *L, lua_State *co) | ||
515 | { | ||
516 | co->top--; copyTV(L, L->top, co->top); L->top++; | ||
517 | if (tvisstr(L->top-1)) | ||
518 | lj_err_callermsg(L, strVdata(L->top-1)); | ||
519 | else | ||
520 | lj_err_run(L); | ||
521 | } | ||
522 | |||
523 | LJLIB_CF(coroutine_wrap) | ||
524 | { | ||
525 | GCfunc *fn; | ||
526 | lj_cf_coroutine_create(L); | ||
527 | lua_pushcclosure(L, lj_ffh_coroutine_wrap_aux, 1); | ||
528 | fn = funcV(L->top-1); | ||
529 | fn->c.gate = lj_ff_coroutine_wrap_aux; | ||
530 | fn->c.ffid = FF_coroutine_wrap_aux; | ||
531 | return 1; | ||
532 | } | ||
533 | |||
534 | #include "lj_libdef.h" | ||
535 | |||
536 | /* ------------------------------------------------------------------------ */ | ||
537 | |||
538 | static void newproxy_weaktable(lua_State *L) | ||
539 | { | ||
540 | /* NOBARRIER: The table is new (marked white). */ | ||
541 | GCtab *t = lj_tab_new(L, 0, 1); | ||
542 | settabV(L, L->top++, t); | ||
543 | setgcref(t->metatable, obj2gco(t)); | ||
544 | setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "__mode")), | ||
545 | lj_str_newlit(L, "kv")); | ||
546 | t->nomm = cast_byte(~(1u<<MM_mode)); | ||
547 | } | ||
548 | |||
549 | LUALIB_API int luaopen_base(lua_State *L) | ||
550 | { | ||
551 | /* NOBARRIER: Table and value are the same. */ | ||
552 | GCtab *env = tabref(L->env); | ||
553 | settabV(L, lj_tab_setstr(L, env, lj_str_newlit(L, "_G")), env); | ||
554 | lua_pushliteral(L, LUA_VERSION); /* top-3. */ | ||
555 | newproxy_weaktable(L); /* top-2. */ | ||
556 | LJ_LIB_REG_(L, "_G", base); | ||
557 | LJ_LIB_REG(L, coroutine); | ||
558 | return 2; | ||
559 | } | ||
560 | |||
diff --git a/src/lib_bit.c b/src/lib_bit.c new file mode 100644 index 00000000..2f727e68 --- /dev/null +++ b/src/lib_bit.c | |||
@@ -0,0 +1,74 @@ | |||
1 | /* | ||
2 | ** Bit manipulation library. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #define lib_bit_c | ||
7 | #define LUA_LIB | ||
8 | |||
9 | #include "lua.h" | ||
10 | #include "lauxlib.h" | ||
11 | #include "lualib.h" | ||
12 | |||
13 | #include "lj_obj.h" | ||
14 | #include "lj_err.h" | ||
15 | #include "lj_str.h" | ||
16 | #include "lj_lib.h" | ||
17 | |||
18 | /* ------------------------------------------------------------------------ */ | ||
19 | |||
20 | #define LJLIB_MODULE_bit | ||
21 | |||
22 | LJLIB_ASM(bit_tobit) LJLIB_REC(bit_unary IR_TOBIT) | ||
23 | { | ||
24 | lj_lib_checknum(L, 1); | ||
25 | return FFH_RETRY; | ||
26 | } | ||
27 | LJLIB_ASM_(bit_bnot) LJLIB_REC(bit_unary IR_BNOT) | ||
28 | LJLIB_ASM_(bit_bswap) LJLIB_REC(bit_unary IR_BSWAP) | ||
29 | |||
30 | LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL) | ||
31 | { | ||
32 | lj_lib_checknum(L, 1); | ||
33 | lj_lib_checknum(L, 2); | ||
34 | return FFH_RETRY; | ||
35 | } | ||
36 | LJLIB_ASM_(bit_rshift) LJLIB_REC(bit_shift IR_BSHR) | ||
37 | LJLIB_ASM_(bit_arshift) LJLIB_REC(bit_shift IR_BSAR) | ||
38 | LJLIB_ASM_(bit_rol) LJLIB_REC(bit_shift IR_BROL) | ||
39 | LJLIB_ASM_(bit_ror) LJLIB_REC(bit_shift IR_BROR) | ||
40 | |||
41 | LJLIB_ASM(bit_band) LJLIB_REC(bit_nary IR_BAND) | ||
42 | { | ||
43 | int i = 0; | ||
44 | do { lj_lib_checknum(L, ++i); } while (L->base+i < L->top); | ||
45 | return FFH_RETRY; | ||
46 | } | ||
47 | LJLIB_ASM_(bit_bor) LJLIB_REC(bit_nary IR_BOR) | ||
48 | LJLIB_ASM_(bit_bxor) LJLIB_REC(bit_nary IR_BXOR) | ||
49 | |||
50 | /* ------------------------------------------------------------------------ */ | ||
51 | |||
52 | LJLIB_CF(bit_tohex) | ||
53 | { | ||
54 | uint32_t b = (uint32_t)lj_num2bit(lj_lib_checknum(L, 1)); | ||
55 | int32_t i, n = L->base+1 >= L->top ? 8 : lj_num2bit(lj_lib_checknum(L, 2)); | ||
56 | const char *hexdigits = "0123456789abcdef"; | ||
57 | char buf[8]; | ||
58 | if (n < 0) { n = -n; hexdigits = "0123456789ABCDEF"; } | ||
59 | if (n > 8) n = 8; | ||
60 | for (i = n; --i >= 0; ) { buf[i] = hexdigits[b & 15]; b >>= 4; } | ||
61 | lua_pushlstring(L, buf, (size_t)n); | ||
62 | return 1; | ||
63 | } | ||
64 | |||
65 | /* ------------------------------------------------------------------------ */ | ||
66 | |||
67 | #include "lj_libdef.h" | ||
68 | |||
69 | LUALIB_API int luaopen_bit(lua_State *L) | ||
70 | { | ||
71 | LJ_LIB_REG(L, bit); | ||
72 | return 1; | ||
73 | } | ||
74 | |||
diff --git a/src/lib_debug.c b/src/lib_debug.c new file mode 100644 index 00000000..0e6c35e5 --- /dev/null +++ b/src/lib_debug.c | |||
@@ -0,0 +1,366 @@ | |||
1 | /* | ||
2 | ** Debug library. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** | ||
5 | ** Major portions taken verbatim or adapted from the Lua interpreter. | ||
6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h | ||
7 | */ | ||
8 | |||
9 | #define lib_debug_c | ||
10 | #define LUA_LIB | ||
11 | |||
12 | #include "lua.h" | ||
13 | #include "lauxlib.h" | ||
14 | #include "lualib.h" | ||
15 | |||
16 | #include "lj_obj.h" | ||
17 | #include "lj_err.h" | ||
18 | #include "lj_lib.h" | ||
19 | |||
20 | /* ------------------------------------------------------------------------ */ | ||
21 | |||
22 | #define LJLIB_MODULE_debug | ||
23 | |||
24 | LJLIB_CF(debug_getregistry) | ||
25 | { | ||
26 | copyTV(L, L->top++, registry(L)); | ||
27 | return 1; | ||
28 | } | ||
29 | |||
30 | LJLIB_CF(debug_getmetatable) | ||
31 | { | ||
32 | lj_lib_checkany(L, 1); | ||
33 | if (!lua_getmetatable(L, 1)) { | ||
34 | setnilV(L->top-1); | ||
35 | } | ||
36 | return 1; | ||
37 | } | ||
38 | |||
39 | LJLIB_CF(debug_setmetatable) | ||
40 | { | ||
41 | lj_lib_checktabornil(L, 2); | ||
42 | L->top = L->base+2; | ||
43 | lua_setmetatable(L, 1); | ||
44 | setboolV(L->top-1, 1); | ||
45 | return 1; | ||
46 | } | ||
47 | |||
48 | LJLIB_CF(debug_getfenv) | ||
49 | { | ||
50 | lj_lib_checkany(L, 1); | ||
51 | lua_getfenv(L, 1); | ||
52 | return 1; | ||
53 | } | ||
54 | |||
55 | LJLIB_CF(debug_setfenv) | ||
56 | { | ||
57 | lj_lib_checktab(L, 2); | ||
58 | L->top = L->base+2; | ||
59 | if (!lua_setfenv(L, 1)) | ||
60 | lj_err_caller(L, LJ_ERR_SETFENV); | ||
61 | return 1; | ||
62 | } | ||
63 | |||
64 | /* ------------------------------------------------------------------------ */ | ||
65 | |||
66 | static void settabss(lua_State *L, const char *i, const char *v) | ||
67 | { | ||
68 | lua_pushstring(L, v); | ||
69 | lua_setfield(L, -2, i); | ||
70 | } | ||
71 | |||
72 | static void settabsi(lua_State *L, const char *i, int v) | ||
73 | { | ||
74 | lua_pushinteger(L, v); | ||
75 | lua_setfield(L, -2, i); | ||
76 | } | ||
77 | |||
78 | static lua_State *getthread(lua_State *L, int *arg) | ||
79 | { | ||
80 | if (L->base < L->top && tvisthread(L->base)) { | ||
81 | *arg = 1; | ||
82 | return threadV(L->base); | ||
83 | } else { | ||
84 | *arg = 0; | ||
85 | return L; | ||
86 | } | ||
87 | } | ||
88 | |||
89 | static void treatstackoption(lua_State *L, lua_State *L1, const char *fname) | ||
90 | { | ||
91 | if (L == L1) { | ||
92 | lua_pushvalue(L, -2); | ||
93 | lua_remove(L, -3); | ||
94 | } | ||
95 | else | ||
96 | lua_xmove(L1, L, 1); | ||
97 | lua_setfield(L, -2, fname); | ||
98 | } | ||
99 | |||
100 | LJLIB_CF(debug_getinfo) | ||
101 | { | ||
102 | lua_Debug ar; | ||
103 | int arg; | ||
104 | lua_State *L1 = getthread(L, &arg); | ||
105 | const char *options = luaL_optstring(L, arg+2, "flnSu"); | ||
106 | if (lua_isnumber(L, arg+1)) { | ||
107 | if (!lua_getstack(L1, (int)lua_tointeger(L, arg+1), &ar)) { | ||
108 | setnilV(L->top-1); | ||
109 | return 1; | ||
110 | } | ||
111 | } else if (L->base+arg < L->top && tvisfunc(L->base+arg)) { | ||
112 | options = lua_pushfstring(L, ">%s", options); | ||
113 | setfuncV(L1, L1->top++, funcV(L->base+arg)); | ||
114 | } else { | ||
115 | lj_err_arg(L, arg+1, LJ_ERR_NOFUNCL); | ||
116 | } | ||
117 | if (!lua_getinfo(L1, options, &ar)) | ||
118 | lj_err_arg(L, arg+2, LJ_ERR_INVOPT); | ||
119 | lua_createtable(L, 0, 16); | ||
120 | if (strchr(options, 'S')) { | ||
121 | settabss(L, "source", ar.source); | ||
122 | settabss(L, "short_src", ar.short_src); | ||
123 | settabsi(L, "linedefined", ar.linedefined); | ||
124 | settabsi(L, "lastlinedefined", ar.lastlinedefined); | ||
125 | settabss(L, "what", ar.what); | ||
126 | } | ||
127 | if (strchr(options, 'l')) | ||
128 | settabsi(L, "currentline", ar.currentline); | ||
129 | if (strchr(options, 'u')) | ||
130 | settabsi(L, "nups", ar.nups); | ||
131 | if (strchr(options, 'n')) { | ||
132 | settabss(L, "name", ar.name); | ||
133 | settabss(L, "namewhat", ar.namewhat); | ||
134 | } | ||
135 | if (strchr(options, 'L')) | ||
136 | treatstackoption(L, L1, "activelines"); | ||
137 | if (strchr(options, 'f')) | ||
138 | treatstackoption(L, L1, "func"); | ||
139 | return 1; /* return table */ | ||
140 | } | ||
141 | |||
142 | LJLIB_CF(debug_getlocal) | ||
143 | { | ||
144 | int arg; | ||
145 | lua_State *L1 = getthread(L, &arg); | ||
146 | lua_Debug ar; | ||
147 | const char *name; | ||
148 | if (!lua_getstack(L1, lj_lib_checkint(L, arg+1), &ar)) | ||
149 | lj_err_arg(L, arg+1, LJ_ERR_LVLRNG); | ||
150 | name = lua_getlocal(L1, &ar, lj_lib_checkint(L, arg+2)); | ||
151 | if (name) { | ||
152 | lua_xmove(L1, L, 1); | ||
153 | lua_pushstring(L, name); | ||
154 | lua_pushvalue(L, -2); | ||
155 | return 2; | ||
156 | } else { | ||
157 | setnilV(L->top-1); | ||
158 | return 1; | ||
159 | } | ||
160 | } | ||
161 | |||
162 | LJLIB_CF(debug_setlocal) | ||
163 | { | ||
164 | int arg; | ||
165 | lua_State *L1 = getthread(L, &arg); | ||
166 | lua_Debug ar; | ||
167 | TValue *tv; | ||
168 | if (!lua_getstack(L1, lj_lib_checkint(L, arg+1), &ar)) | ||
169 | lj_err_arg(L, arg+1, LJ_ERR_LVLRNG); | ||
170 | tv = lj_lib_checkany(L, arg+3); | ||
171 | copyTV(L1, L1->top++, tv); | ||
172 | lua_pushstring(L, lua_setlocal(L1, &ar, lj_lib_checkint(L, arg+2))); | ||
173 | return 1; | ||
174 | } | ||
175 | |||
176 | static int debug_getupvalue(lua_State *L, int get) | ||
177 | { | ||
178 | int32_t n = lj_lib_checkint(L, 2); | ||
179 | if (isluafunc(lj_lib_checkfunc(L, 1))) { | ||
180 | const char *name = get ? lua_getupvalue(L, 1, n) : lua_setupvalue(L, 1, n); | ||
181 | if (name) { | ||
182 | lua_pushstring(L, name); | ||
183 | if (!get) return 1; | ||
184 | copyTV(L, L->top, L->top-2); | ||
185 | L->top++; | ||
186 | return 2; | ||
187 | } | ||
188 | } | ||
189 | return 0; | ||
190 | } | ||
191 | |||
192 | LJLIB_CF(debug_getupvalue) | ||
193 | { | ||
194 | return debug_getupvalue(L, 1); | ||
195 | } | ||
196 | |||
197 | LJLIB_CF(debug_setupvalue) | ||
198 | { | ||
199 | lj_lib_checkany(L, 3); | ||
200 | return debug_getupvalue(L, 0); | ||
201 | } | ||
202 | |||
203 | /* ------------------------------------------------------------------------ */ | ||
204 | |||
205 | static const char KEY_HOOK = 'h'; | ||
206 | |||
207 | static void hookf(lua_State *L, lua_Debug *ar) | ||
208 | { | ||
209 | static const char *const hooknames[] = | ||
210 | {"call", "return", "line", "count", "tail return"}; | ||
211 | lua_pushlightuserdata(L, (void *)&KEY_HOOK); | ||
212 | lua_rawget(L, LUA_REGISTRYINDEX); | ||
213 | if (lua_isfunction(L, -1)) { | ||
214 | lua_pushstring(L, hooknames[(int)ar->event]); | ||
215 | if (ar->currentline >= 0) | ||
216 | lua_pushinteger(L, ar->currentline); | ||
217 | else lua_pushnil(L); | ||
218 | lua_call(L, 2, 0); | ||
219 | } | ||
220 | } | ||
221 | |||
222 | static int makemask(const char *smask, int count) | ||
223 | { | ||
224 | int mask = 0; | ||
225 | if (strchr(smask, 'c')) mask |= LUA_MASKCALL; | ||
226 | if (strchr(smask, 'r')) mask |= LUA_MASKRET; | ||
227 | if (strchr(smask, 'l')) mask |= LUA_MASKLINE; | ||
228 | if (count > 0) mask |= LUA_MASKCOUNT; | ||
229 | return mask; | ||
230 | } | ||
231 | |||
232 | static char *unmakemask(int mask, char *smask) | ||
233 | { | ||
234 | int i = 0; | ||
235 | if (mask & LUA_MASKCALL) smask[i++] = 'c'; | ||
236 | if (mask & LUA_MASKRET) smask[i++] = 'r'; | ||
237 | if (mask & LUA_MASKLINE) smask[i++] = 'l'; | ||
238 | smask[i] = '\0'; | ||
239 | return smask; | ||
240 | } | ||
241 | |||
242 | LJLIB_CF(debug_sethook) | ||
243 | { | ||
244 | int arg, mask, count; | ||
245 | lua_Hook func; | ||
246 | (void)getthread(L, &arg); | ||
247 | if (lua_isnoneornil(L, arg+1)) { | ||
248 | lua_settop(L, arg+1); | ||
249 | func = NULL; mask = 0; count = 0; /* turn off hooks */ | ||
250 | } else { | ||
251 | const char *smask = luaL_checkstring(L, arg+2); | ||
252 | luaL_checktype(L, arg+1, LUA_TFUNCTION); | ||
253 | count = luaL_optint(L, arg+3, 0); | ||
254 | func = hookf; mask = makemask(smask, count); | ||
255 | } | ||
256 | lua_pushlightuserdata(L, (void *)&KEY_HOOK); | ||
257 | lua_pushvalue(L, arg+1); | ||
258 | lua_rawset(L, LUA_REGISTRYINDEX); | ||
259 | lua_sethook(L, func, mask, count); | ||
260 | return 0; | ||
261 | } | ||
262 | |||
263 | LJLIB_CF(debug_gethook) | ||
264 | { | ||
265 | char buff[5]; | ||
266 | int mask = lua_gethookmask(L); | ||
267 | lua_Hook hook = lua_gethook(L); | ||
268 | if (hook != NULL && hook != hookf) { /* external hook? */ | ||
269 | lua_pushliteral(L, "external hook"); | ||
270 | } else { | ||
271 | lua_pushlightuserdata(L, (void *)&KEY_HOOK); | ||
272 | lua_rawget(L, LUA_REGISTRYINDEX); /* get hook */ | ||
273 | } | ||
274 | lua_pushstring(L, unmakemask(mask, buff)); | ||
275 | lua_pushinteger(L, lua_gethookcount(L)); | ||
276 | return 3; | ||
277 | } | ||
278 | |||
279 | /* ------------------------------------------------------------------------ */ | ||
280 | |||
281 | LJLIB_CF(debug_debug) | ||
282 | { | ||
283 | for (;;) { | ||
284 | char buffer[250]; | ||
285 | fputs("lua_debug> ", stderr); | ||
286 | if (fgets(buffer, sizeof(buffer), stdin) == 0 || | ||
287 | strcmp(buffer, "cont\n") == 0) | ||
288 | return 0; | ||
289 | if (luaL_loadbuffer(L, buffer, strlen(buffer), "=(debug command)") || | ||
290 | lua_pcall(L, 0, 0, 0)) { | ||
291 | fputs(lua_tostring(L, -1), stderr); | ||
292 | fputs("\n", stderr); | ||
293 | } | ||
294 | lua_settop(L, 0); /* remove eventual returns */ | ||
295 | } | ||
296 | } | ||
297 | |||
298 | /* ------------------------------------------------------------------------ */ | ||
299 | |||
300 | #define LEVELS1 12 /* size of the first part of the stack */ | ||
301 | #define LEVELS2 10 /* size of the second part of the stack */ | ||
302 | |||
303 | LJLIB_CF(debug_traceback) | ||
304 | { | ||
305 | int level; | ||
306 | int firstpart = 1; /* still before eventual `...' */ | ||
307 | int arg; | ||
308 | lua_State *L1 = getthread(L, &arg); | ||
309 | lua_Debug ar; | ||
310 | if (lua_isnumber(L, arg+2)) { | ||
311 | level = (int)lua_tointeger(L, arg+2); | ||
312 | lua_pop(L, 1); | ||
313 | } | ||
314 | else | ||
315 | level = (L == L1) ? 1 : 0; /* level 0 may be this own function */ | ||
316 | if (lua_gettop(L) == arg) | ||
317 | lua_pushliteral(L, ""); | ||
318 | else if (!lua_isstring(L, arg+1)) return 1; /* message is not a string */ | ||
319 | else lua_pushliteral(L, "\n"); | ||
320 | lua_pushliteral(L, "stack traceback:"); | ||
321 | while (lua_getstack(L1, level++, &ar)) { | ||
322 | if (level > LEVELS1 && firstpart) { | ||
323 | /* no more than `LEVELS2' more levels? */ | ||
324 | if (!lua_getstack(L1, level+LEVELS2, &ar)) { | ||
325 | level--; /* keep going */ | ||
326 | } else { | ||
327 | lua_pushliteral(L, "\n\t..."); /* too many levels */ | ||
328 | /* This only works with LuaJIT 2.x. Avoids O(n^2) behaviour. */ | ||
329 | lua_getstack(L1, -10, &ar); | ||
330 | level = ar.i_ci - LEVELS2; | ||
331 | } | ||
332 | firstpart = 0; | ||
333 | continue; | ||
334 | } | ||
335 | lua_pushliteral(L, "\n\t"); | ||
336 | lua_getinfo(L1, "Snl", &ar); | ||
337 | lua_pushfstring(L, "%s:", ar.short_src); | ||
338 | if (ar.currentline > 0) | ||
339 | lua_pushfstring(L, "%d:", ar.currentline); | ||
340 | if (*ar.namewhat != '\0') { /* is there a name? */ | ||
341 | lua_pushfstring(L, " in function " LUA_QS, ar.name); | ||
342 | } else { | ||
343 | if (*ar.what == 'm') /* main? */ | ||
344 | lua_pushfstring(L, " in main chunk"); | ||
345 | else if (*ar.what == 'C' || *ar.what == 't') | ||
346 | lua_pushliteral(L, " ?"); /* C function or tail call */ | ||
347 | else | ||
348 | lua_pushfstring(L, " in function <%s:%d>", | ||
349 | ar.short_src, ar.linedefined); | ||
350 | } | ||
351 | lua_concat(L, lua_gettop(L) - arg); | ||
352 | } | ||
353 | lua_concat(L, lua_gettop(L) - arg); | ||
354 | return 1; | ||
355 | } | ||
356 | |||
357 | /* ------------------------------------------------------------------------ */ | ||
358 | |||
359 | #include "lj_libdef.h" | ||
360 | |||
361 | LUALIB_API int luaopen_debug(lua_State *L) | ||
362 | { | ||
363 | LJ_LIB_REG(L, debug); | ||
364 | return 1; | ||
365 | } | ||
366 | |||
diff --git a/src/lib_init.c b/src/lib_init.c new file mode 100644 index 00000000..04ca60d9 --- /dev/null +++ b/src/lib_init.c | |||
@@ -0,0 +1,37 @@ | |||
1 | /* | ||
2 | ** Library initialization. | ||
3 | ** Major parts taken verbatim from the Lua interpreter. | ||
4 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h | ||
5 | */ | ||
6 | |||
7 | #define lib_init_c | ||
8 | #define LUA_LIB | ||
9 | |||
10 | #include "lua.h" | ||
11 | #include "lauxlib.h" | ||
12 | #include "lualib.h" | ||
13 | |||
14 | static const luaL_Reg lualibs[] = { | ||
15 | { "", luaopen_base }, | ||
16 | { LUA_LOADLIBNAME, luaopen_package }, | ||
17 | { LUA_TABLIBNAME, luaopen_table }, | ||
18 | { LUA_IOLIBNAME, luaopen_io }, | ||
19 | { LUA_OSLIBNAME, luaopen_os }, | ||
20 | { LUA_STRLIBNAME, luaopen_string }, | ||
21 | { LUA_MATHLIBNAME, luaopen_math }, | ||
22 | { LUA_DBLIBNAME, luaopen_debug }, | ||
23 | { LUA_BITLIBNAME, luaopen_bit }, | ||
24 | { LUA_JITLIBNAME, luaopen_jit }, | ||
25 | { NULL, NULL } | ||
26 | }; | ||
27 | |||
28 | LUALIB_API void luaL_openlibs(lua_State *L) | ||
29 | { | ||
30 | const luaL_Reg *lib = lualibs; | ||
31 | for (; lib->func; lib++) { | ||
32 | lua_pushcfunction(L, lib->func); | ||
33 | lua_pushstring(L, lib->name); | ||
34 | lua_call(L, 1, 0); | ||
35 | } | ||
36 | } | ||
37 | |||
diff --git a/src/lib_io.c b/src/lib_io.c new file mode 100644 index 00000000..01623258 --- /dev/null +++ b/src/lib_io.c | |||
@@ -0,0 +1,538 @@ | |||
1 | /* | ||
2 | ** I/O library. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** | ||
5 | ** Major portions taken verbatim or adapted from the Lua interpreter. | ||
6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h | ||
7 | */ | ||
8 | |||
9 | #include <errno.h> | ||
10 | #include <stdio.h> | ||
11 | |||
12 | #define lib_io_c | ||
13 | #define LUA_LIB | ||
14 | |||
15 | #include "lua.h" | ||
16 | #include "lauxlib.h" | ||
17 | #include "lualib.h" | ||
18 | |||
19 | #include "lj_obj.h" | ||
20 | #include "lj_err.h" | ||
21 | #include "lj_gc.h" | ||
22 | #include "lj_ff.h" | ||
23 | #include "lj_lib.h" | ||
24 | |||
25 | /* Index of standard handles in function environment. */ | ||
26 | #define IO_INPUT 1 | ||
27 | #define IO_OUTPUT 2 | ||
28 | |||
29 | /* -- Error handling ------------------------------------------------------ */ | ||
30 | |||
31 | static int io_pushresult(lua_State *L, int ok, const char *fname) | ||
32 | { | ||
33 | if (ok) { | ||
34 | setboolV(L->top++, 1); | ||
35 | return 1; | ||
36 | } else { | ||
37 | int en = errno; /* Lua API calls may change this value. */ | ||
38 | lua_pushnil(L); | ||
39 | if (fname) | ||
40 | lua_pushfstring(L, "%s: %s", fname, strerror(en)); | ||
41 | else | ||
42 | lua_pushfstring(L, "%s", strerror(en)); | ||
43 | lua_pushinteger(L, en); | ||
44 | return 3; | ||
45 | } | ||
46 | } | ||
47 | |||
48 | static void io_file_error(lua_State *L, int arg, const char *fname) | ||
49 | { | ||
50 | lua_pushfstring(L, "%s: %s", fname, strerror(errno)); | ||
51 | luaL_argerror(L, arg, lua_tostring(L, -1)); | ||
52 | } | ||
53 | |||
54 | /* -- Open helpers -------------------------------------------------------- */ | ||
55 | |||
56 | #define io_tofilep(L) ((FILE **)luaL_checkudata(L, 1, LUA_FILEHANDLE)) | ||
57 | |||
58 | static FILE *io_tofile(lua_State *L) | ||
59 | { | ||
60 | FILE **f = io_tofilep(L); | ||
61 | if (*f == NULL) | ||
62 | lj_err_caller(L, LJ_ERR_IOCLFL); | ||
63 | return *f; | ||
64 | } | ||
65 | |||
66 | static FILE **io_file_new(lua_State *L) | ||
67 | { | ||
68 | FILE **pf = (FILE **)lua_newuserdata(L, sizeof(FILE *)); | ||
69 | *pf = NULL; | ||
70 | luaL_getmetatable(L, LUA_FILEHANDLE); | ||
71 | lua_setmetatable(L, -2); | ||
72 | return pf; | ||
73 | } | ||
74 | |||
75 | /* -- Close helpers ------------------------------------------------------- */ | ||
76 | |||
77 | static int lj_cf_io_std_close(lua_State *L) | ||
78 | { | ||
79 | lua_pushnil(L); | ||
80 | lua_pushliteral(L, "cannot close standard file"); | ||
81 | return 2; | ||
82 | } | ||
83 | |||
84 | static int lj_cf_io_pipe_close(lua_State *L) | ||
85 | { | ||
86 | FILE **p = io_tofilep(L); | ||
87 | #if defined(LUA_USE_POSIX) | ||
88 | int ok = (pclose(*p) != -1); | ||
89 | #elif defined(LUA_USE_WIN) | ||
90 | int ok = (_pclose(*p) != -1); | ||
91 | #else | ||
92 | int ok = 0; | ||
93 | #endif | ||
94 | *p = NULL; | ||
95 | return io_pushresult(L, ok, NULL); | ||
96 | } | ||
97 | |||
98 | static int lj_cf_io_file_close(lua_State *L) | ||
99 | { | ||
100 | FILE **p = io_tofilep(L); | ||
101 | int ok = (fclose(*p) == 0); | ||
102 | *p = NULL; | ||
103 | return io_pushresult(L, ok, NULL); | ||
104 | } | ||
105 | |||
106 | static int io_file_close(lua_State *L) | ||
107 | { | ||
108 | lua_getfenv(L, 1); | ||
109 | lua_getfield(L, -1, "__close"); | ||
110 | return (lua_tocfunction(L, -1))(L); | ||
111 | } | ||
112 | |||
113 | /* -- Read/write helpers -------------------------------------------------- */ | ||
114 | |||
115 | static int io_file_readnum(lua_State *L, FILE *fp) | ||
116 | { | ||
117 | lua_Number d; | ||
118 | if (fscanf(fp, LUA_NUMBER_SCAN, &d) == 1) { | ||
119 | lua_pushnumber(L, d); | ||
120 | return 1; | ||
121 | } else { | ||
122 | return 0; /* read fails */ | ||
123 | } | ||
124 | } | ||
125 | |||
126 | static int test_eof(lua_State *L, FILE *fp) | ||
127 | { | ||
128 | int c = getc(fp); | ||
129 | ungetc(c, fp); | ||
130 | lua_pushlstring(L, NULL, 0); | ||
131 | return (c != EOF); | ||
132 | } | ||
133 | |||
134 | static int io_file_readline(lua_State *L, FILE *fp) | ||
135 | { | ||
136 | luaL_Buffer b; | ||
137 | luaL_buffinit(L, &b); | ||
138 | for (;;) { | ||
139 | size_t len; | ||
140 | char *p = luaL_prepbuffer(&b); | ||
141 | if (fgets(p, LUAL_BUFFERSIZE, fp) == NULL) { /* EOF? */ | ||
142 | luaL_pushresult(&b); | ||
143 | return (strV(L->top-1)->len > 0); /* Anything read? */ | ||
144 | } | ||
145 | len = strlen(p); | ||
146 | if (len == 0 || p[len-1] != '\n') { /* Partial line? */ | ||
147 | luaL_addsize(&b, len); | ||
148 | } else { | ||
149 | luaL_addsize(&b, len - 1); /* Don't include EOL. */ | ||
150 | luaL_pushresult(&b); | ||
151 | return 1; /* Got at least an EOL. */ | ||
152 | } | ||
153 | } | ||
154 | } | ||
155 | |||
156 | static int io_file_readchars(lua_State *L, FILE *fp, size_t n) | ||
157 | { | ||
158 | size_t rlen; /* how much to read */ | ||
159 | size_t nr; /* number of chars actually read */ | ||
160 | luaL_Buffer b; | ||
161 | luaL_buffinit(L, &b); | ||
162 | rlen = LUAL_BUFFERSIZE; /* try to read that much each time */ | ||
163 | do { | ||
164 | char *p = luaL_prepbuffer(&b); | ||
165 | if (rlen > n) rlen = n; /* cannot read more than asked */ | ||
166 | nr = fread(p, 1, rlen, fp); | ||
167 | luaL_addsize(&b, nr); | ||
168 | n -= nr; /* still have to read `n' chars */ | ||
169 | } while (n > 0 && nr == rlen); /* until end of count or eof */ | ||
170 | luaL_pushresult(&b); /* close buffer */ | ||
171 | return (n == 0 || lua_objlen(L, -1) > 0); | ||
172 | } | ||
173 | |||
174 | static int io_file_read(lua_State *L, FILE *fp, int start) | ||
175 | { | ||
176 | int ok, n, nargs = (L->top - L->base) - start; | ||
177 | clearerr(fp); | ||
178 | if (nargs == 0) { | ||
179 | ok = io_file_readline(L, fp); | ||
180 | n = start+1; /* Return 1 result. */ | ||
181 | } else { | ||
182 | /* The results plus the buffers go on top of the args. */ | ||
183 | luaL_checkstack(L, nargs+LUA_MINSTACK, "too many arguments"); | ||
184 | ok = 1; | ||
185 | for (n = start; nargs-- && ok; n++) { | ||
186 | if (tvisstr(L->base+n)) { | ||
187 | const char *p = strVdata(L->base+n); | ||
188 | if (p[0] != '*') | ||
189 | lj_err_arg(L, n+1, LJ_ERR_INVOPT); | ||
190 | if (p[1] == 'n') | ||
191 | ok = io_file_readnum(L, fp); | ||
192 | else if (p[1] == 'l') | ||
193 | ok = io_file_readline(L, fp); | ||
194 | else if (p[1] == 'a') | ||
195 | io_file_readchars(L, fp, ~((size_t)0)); | ||
196 | else | ||
197 | lj_err_arg(L, n+1, LJ_ERR_INVFMT); | ||
198 | } else if (tvisnum(L->base+n)) { | ||
199 | size_t len = (size_t)lj_lib_checkint(L, n+1); | ||
200 | ok = len ? io_file_readchars(L, fp, len) : test_eof(L, fp); | ||
201 | } else { | ||
202 | lj_err_arg(L, n+1, LJ_ERR_INVOPT); | ||
203 | } | ||
204 | } | ||
205 | } | ||
206 | if (ferror(fp)) | ||
207 | return io_pushresult(L, 0, NULL); | ||
208 | if (!ok) | ||
209 | setnilV(L->top-1); /* Replace last result with nil. */ | ||
210 | return n - start; | ||
211 | } | ||
212 | |||
213 | static int io_file_write(lua_State *L, FILE *fp, int start) | ||
214 | { | ||
215 | cTValue *tv; | ||
216 | int status = 1; | ||
217 | for (tv = L->base+start; tv < L->top; tv++) { | ||
218 | if (tvisstr(tv)) { | ||
219 | MSize len = strV(tv)->len; | ||
220 | status = status && (fwrite(strVdata(tv), 1, len, fp) == len); | ||
221 | } else if (tvisnum(tv)) { | ||
222 | status = status && (fprintf(fp, LUA_NUMBER_FMT, numV(tv)) > 0); | ||
223 | } else { | ||
224 | lj_lib_checkstr(L, tv-L->base+1); | ||
225 | } | ||
226 | } | ||
227 | return io_pushresult(L, status, NULL); | ||
228 | } | ||
229 | |||
230 | /* -- I/O file methods ---------------------------------------------------- */ | ||
231 | |||
232 | #define LJLIB_MODULE_io_method | ||
233 | |||
234 | LJLIB_CF(io_method_close) | ||
235 | { | ||
236 | if (lua_isnone(L, 1)) | ||
237 | lua_rawgeti(L, LUA_ENVIRONINDEX, IO_OUTPUT); | ||
238 | io_tofile(L); | ||
239 | return io_file_close(L); | ||
240 | } | ||
241 | |||
242 | LJLIB_CF(io_method_read) | ||
243 | { | ||
244 | return io_file_read(L, io_tofile(L), 1); | ||
245 | } | ||
246 | |||
247 | LJLIB_CF(io_method_write) | ||
248 | { | ||
249 | return io_file_write(L, io_tofile(L), 1); | ||
250 | } | ||
251 | |||
252 | LJLIB_CF(io_method_flush) | ||
253 | { | ||
254 | return io_pushresult(L, fflush(io_tofile(L)) == 0, NULL); | ||
255 | } | ||
256 | |||
257 | LJLIB_CF(io_method_seek) | ||
258 | { | ||
259 | FILE *fp = io_tofile(L); | ||
260 | int opt = lj_lib_checkopt(L, 2, 1, "\3set\3cur\3end"); | ||
261 | lua_Number ofs; | ||
262 | int res; | ||
263 | if (opt == 0) opt = SEEK_SET; | ||
264 | else if (opt == 1) opt = SEEK_CUR; | ||
265 | else if (opt == 2) opt = SEEK_END; | ||
266 | lj_lib_opt(L, 3, | ||
267 | ofs = lj_lib_checknum(L, 3); | ||
268 | , | ||
269 | ofs = 0; | ||
270 | ) | ||
271 | #if defined(LUA_USE_POSIX) | ||
272 | res = fseeko(fp, (int64_t)ofs, opt); | ||
273 | #elif _MSC_VER >= 1400 | ||
274 | res = _fseeki64(fp, (int64_t)ofs, opt); | ||
275 | #elif defined(__MINGW32__) | ||
276 | res = fseeko64(fp, (int64_t)ofs, opt); | ||
277 | #else | ||
278 | res = fseek(fp, (long)ofs, opt); | ||
279 | #endif | ||
280 | if (res) | ||
281 | return io_pushresult(L, 0, NULL); | ||
282 | #if defined(LUA_USE_POSIX) | ||
283 | ofs = cast_num(ftello(fp)); | ||
284 | #elif _MSC_VER >= 1400 | ||
285 | ofs = cast_num(_ftelli64(fp)); | ||
286 | #elif defined(__MINGW32__) | ||
287 | ofs = cast_num(ftello64(fp)); | ||
288 | #else | ||
289 | ofs = cast_num(ftell(fp)); | ||
290 | #endif | ||
291 | setnumV(L->top-1, ofs); | ||
292 | return 1; | ||
293 | } | ||
294 | |||
295 | LJLIB_CF(io_method_setvbuf) | ||
296 | { | ||
297 | FILE *fp = io_tofile(L); | ||
298 | int opt = lj_lib_checkopt(L, 2, -1, "\4full\4line\2no"); | ||
299 | size_t sz = (size_t)lj_lib_optint(L, 3, LUAL_BUFFERSIZE); | ||
300 | if (opt == 0) opt = _IOFBF; | ||
301 | else if (opt == 1) opt = _IOLBF; | ||
302 | else if (opt == 2) opt = _IONBF; | ||
303 | return io_pushresult(L, (setvbuf(fp, NULL, opt, sz) == 0), NULL); | ||
304 | } | ||
305 | |||
306 | /* Forward declaration. */ | ||
307 | static void io_file_lines(lua_State *L, int idx, int toclose); | ||
308 | |||
309 | LJLIB_CF(io_method_lines) | ||
310 | { | ||
311 | io_tofile(L); | ||
312 | io_file_lines(L, 1, 0); | ||
313 | return 1; | ||
314 | } | ||
315 | |||
316 | LJLIB_CF(io_method___gc) | ||
317 | { | ||
318 | FILE *fp = *io_tofilep(L); | ||
319 | if (fp != NULL) io_file_close(L); | ||
320 | return 0; | ||
321 | } | ||
322 | |||
323 | LJLIB_CF(io_method___tostring) | ||
324 | { | ||
325 | FILE *fp = *io_tofilep(L); | ||
326 | if (fp == NULL) | ||
327 | lua_pushliteral(L, "file (closed)"); | ||
328 | else | ||
329 | lua_pushfstring(L, "file (%p)", fp); | ||
330 | return 1; | ||
331 | } | ||
332 | |||
333 | LJLIB_PUSH(top-1) LJLIB_SET(__index) | ||
334 | |||
335 | #include "lj_libdef.h" | ||
336 | |||
337 | /* -- I/O library functions ----------------------------------------------- */ | ||
338 | |||
339 | #define LJLIB_MODULE_io | ||
340 | |||
341 | LJLIB_PUSH(top-2) LJLIB_SET(!) /* Set environment. */ | ||
342 | |||
343 | static FILE *io_file_get(lua_State *L, int findex) | ||
344 | { | ||
345 | GCtab *fenv = tabref(curr_func(L)->c.env); | ||
346 | GCudata *ud = udataV(&tvref(fenv->array)[findex]); | ||
347 | FILE *fp = *(FILE **)uddata(ud); | ||
348 | if (fp == NULL) | ||
349 | lj_err_caller(L, LJ_ERR_IOSTDCL); | ||
350 | return fp; | ||
351 | } | ||
352 | |||
353 | LJLIB_CF(io_open) | ||
354 | { | ||
355 | const char *fname = luaL_checkstring(L, 1); | ||
356 | const char *mode = luaL_optstring(L, 2, "r"); | ||
357 | FILE **pf = io_file_new(L); | ||
358 | *pf = fopen(fname, mode); | ||
359 | return (*pf == NULL) ? io_pushresult(L, 0, fname) : 1; | ||
360 | } | ||
361 | |||
362 | LJLIB_CF(io_tmpfile) | ||
363 | { | ||
364 | FILE **pf = io_file_new(L); | ||
365 | *pf = tmpfile(); | ||
366 | return (*pf == NULL) ? io_pushresult(L, 0, NULL) : 1; | ||
367 | } | ||
368 | |||
369 | LJLIB_CF(io_close) | ||
370 | { | ||
371 | return lj_cf_io_method_close(L); | ||
372 | } | ||
373 | |||
374 | LJLIB_CF(io_read) | ||
375 | { | ||
376 | return io_file_read(L, io_file_get(L, IO_INPUT), 0); | ||
377 | } | ||
378 | |||
379 | LJLIB_CF(io_write) | ||
380 | { | ||
381 | return io_file_write(L, io_file_get(L, IO_OUTPUT), 0); | ||
382 | } | ||
383 | |||
384 | LJLIB_CF(io_flush) | ||
385 | { | ||
386 | return io_pushresult(L, fflush(io_file_get(L, IO_OUTPUT)) == 0, NULL); | ||
387 | } | ||
388 | |||
389 | LJLIB_NOREG LJLIB_CF(io_lines_iter) | ||
390 | { | ||
391 | FILE *fp = *(FILE **)uddata(udataV(lj_lib_upvalue(L, 1))); | ||
392 | int ok; | ||
393 | if (fp == NULL) | ||
394 | lj_err_caller(L, LJ_ERR_IOCLFL); | ||
395 | ok = io_file_readline(L, fp); | ||
396 | if (ferror(fp)) | ||
397 | return luaL_error(L, "%s", strerror(errno)); | ||
398 | if (ok) | ||
399 | return 1; | ||
400 | if (tvistrue(lj_lib_upvalue(L, 2))) { /* Need to close file? */ | ||
401 | L->top = L->base+1; | ||
402 | setudataV(L, L->base, udataV(lj_lib_upvalue(L, 1))); | ||
403 | io_file_close(L); | ||
404 | } | ||
405 | return 0; | ||
406 | } | ||
407 | |||
408 | static void io_file_lines(lua_State *L, int idx, int toclose) | ||
409 | { | ||
410 | lua_pushvalue(L, idx); | ||
411 | lua_pushboolean(L, toclose); | ||
412 | lua_pushcclosure(L, lj_cf_io_lines_iter, 2); | ||
413 | funcV(L->top-1)->c.ffid = FF_io_lines_iter; | ||
414 | } | ||
415 | |||
416 | LJLIB_CF(io_lines) | ||
417 | { | ||
418 | if (lua_isnoneornil(L, 1)) { /* no arguments? */ | ||
419 | /* will iterate over default input */ | ||
420 | lua_rawgeti(L, LUA_ENVIRONINDEX, IO_INPUT); | ||
421 | return lj_cf_io_method_lines(L); | ||
422 | } else { | ||
423 | const char *fname = luaL_checkstring(L, 1); | ||
424 | FILE **pf = io_file_new(L); | ||
425 | *pf = fopen(fname, "r"); | ||
426 | if (*pf == NULL) | ||
427 | io_file_error(L, 1, fname); | ||
428 | io_file_lines(L, lua_gettop(L), 1); | ||
429 | return 1; | ||
430 | } | ||
431 | } | ||
432 | |||
433 | static int io_std_get(lua_State *L, int fp, const char *mode) | ||
434 | { | ||
435 | if (!lua_isnoneornil(L, 1)) { | ||
436 | const char *fname = lua_tostring(L, 1); | ||
437 | if (fname) { | ||
438 | FILE **pf = io_file_new(L); | ||
439 | *pf = fopen(fname, mode); | ||
440 | if (*pf == NULL) | ||
441 | io_file_error(L, 1, fname); | ||
442 | } else { | ||
443 | io_tofile(L); /* check that it's a valid file handle */ | ||
444 | lua_pushvalue(L, 1); | ||
445 | } | ||
446 | lua_rawseti(L, LUA_ENVIRONINDEX, fp); | ||
447 | } | ||
448 | /* return current value */ | ||
449 | lua_rawgeti(L, LUA_ENVIRONINDEX, fp); | ||
450 | return 1; | ||
451 | } | ||
452 | |||
453 | LJLIB_CF(io_input) | ||
454 | { | ||
455 | return io_std_get(L, IO_INPUT, "r"); | ||
456 | } | ||
457 | |||
458 | LJLIB_CF(io_output) | ||
459 | { | ||
460 | return io_std_get(L, IO_OUTPUT, "w"); | ||
461 | } | ||
462 | |||
463 | LJLIB_CF(io_type) | ||
464 | { | ||
465 | void *ud; | ||
466 | luaL_checkany(L, 1); | ||
467 | ud = lua_touserdata(L, 1); | ||
468 | lua_getfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE); | ||
469 | if (ud == NULL || !lua_getmetatable(L, 1) || !lua_rawequal(L, -2, -1)) | ||
470 | lua_pushnil(L); /* not a file */ | ||
471 | else if (*((FILE **)ud) == NULL) | ||
472 | lua_pushliteral(L, "closed file"); | ||
473 | else | ||
474 | lua_pushliteral(L, "file"); | ||
475 | return 1; | ||
476 | } | ||
477 | |||
478 | LJLIB_PUSH(top-3) LJLIB_SET(!) /* Set environment. */ | ||
479 | |||
480 | LJLIB_CF(io_popen) | ||
481 | { | ||
482 | #if defined(LUA_USE_POSIX) || defined(LUA_USE_WIN) | ||
483 | const char *fname = luaL_checkstring(L, 1); | ||
484 | const char *mode = luaL_optstring(L, 2, "r"); | ||
485 | FILE **pf = io_file_new(L); | ||
486 | #ifdef LUA_USE_POSIX | ||
487 | fflush(NULL); | ||
488 | *pf = popen(fname, mode); | ||
489 | #else | ||
490 | *pf = _popen(fname, mode); | ||
491 | #endif | ||
492 | return (*pf == NULL) ? io_pushresult(L, 0, fname) : 1; | ||
493 | #else | ||
494 | luaL_error(L, LUA_QL("popen") " not supported"); | ||
495 | #endif | ||
496 | } | ||
497 | |||
498 | #include "lj_libdef.h" | ||
499 | |||
500 | /* ------------------------------------------------------------------------ */ | ||
501 | |||
502 | static void io_std_new(lua_State *L, FILE *fp, int k, const char *fname) | ||
503 | { | ||
504 | FILE **pf = io_file_new(L); | ||
505 | GCudata *ud = udataV(L->top-1); | ||
506 | GCtab *envt = tabV(L->top-2); | ||
507 | *pf = fp; | ||
508 | setgcref(ud->env, obj2gco(envt)); | ||
509 | lj_gc_objbarrier(L, obj2gco(ud), envt); | ||
510 | if (k > 0) { | ||
511 | lua_pushvalue(L, -1); | ||
512 | lua_rawseti(L, -5, k); | ||
513 | } | ||
514 | lua_setfield(L, -3, fname); | ||
515 | } | ||
516 | |||
517 | static void io_fenv_new(lua_State *L, int narr, lua_CFunction cls) | ||
518 | { | ||
519 | lua_createtable(L, narr, 1); | ||
520 | lua_pushcfunction(L, cls); | ||
521 | lua_setfield(L, -2, "__close"); | ||
522 | } | ||
523 | |||
524 | LUALIB_API int luaopen_io(lua_State *L) | ||
525 | { | ||
526 | LJ_LIB_REG_(L, NULL, io_method); | ||
527 | lua_setfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE); | ||
528 | io_fenv_new(L, 0, lj_cf_io_pipe_close); /* top-3 */ | ||
529 | io_fenv_new(L, 2, lj_cf_io_file_close); /* top-2 */ | ||
530 | LJ_LIB_REG(L, io); | ||
531 | io_fenv_new(L, 0, lj_cf_io_std_close); | ||
532 | io_std_new(L, stdin, IO_INPUT, "stdin"); | ||
533 | io_std_new(L, stdout, IO_OUTPUT, "stdout"); | ||
534 | io_std_new(L, stderr, 0, "stderr"); | ||
535 | lua_pop(L, 1); | ||
536 | return 1; | ||
537 | } | ||
538 | |||
diff --git a/src/lib_jit.c b/src/lib_jit.c new file mode 100644 index 00000000..4a57f3b4 --- /dev/null +++ b/src/lib_jit.c | |||
@@ -0,0 +1,589 @@ | |||
1 | /* | ||
2 | ** JIT library. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #define lib_jit_c | ||
7 | #define LUA_LIB | ||
8 | |||
9 | #include "lua.h" | ||
10 | #include "lauxlib.h" | ||
11 | #include "lualib.h" | ||
12 | |||
13 | #include "lj_arch.h" | ||
14 | #include "lj_obj.h" | ||
15 | #include "lj_err.h" | ||
16 | #include "lj_str.h" | ||
17 | #include "lj_tab.h" | ||
18 | #if LJ_HASJIT | ||
19 | #include "lj_ir.h" | ||
20 | #include "lj_jit.h" | ||
21 | #include "lj_iropt.h" | ||
22 | #endif | ||
23 | #include "lj_dispatch.h" | ||
24 | #include "lj_vm.h" | ||
25 | #include "lj_vmevent.h" | ||
26 | #include "lj_lib.h" | ||
27 | |||
28 | #include "luajit.h" | ||
29 | |||
30 | /* -- jit.* functions ----------------------------------------------------- */ | ||
31 | |||
32 | #define LJLIB_MODULE_jit | ||
33 | |||
34 | static int setjitmode(lua_State *L, int mode) | ||
35 | { | ||
36 | int idx = 0; | ||
37 | if (L->base == L->top || tvisnil(L->base)) { /* jit.on/off/flush([nil]) */ | ||
38 | mode |= LUAJIT_MODE_ENGINE; | ||
39 | } else { | ||
40 | /* jit.on/off/flush(func|proto, nil|true|false) */ | ||
41 | if (tvisfunc(L->base) || tvisproto(L->base)) | ||
42 | idx = 1; | ||
43 | else if (!tvistrue(L->base)) /* jit.on/off/flush(true, nil|true|false) */ | ||
44 | goto err; | ||
45 | if (L->base+1 < L->top && tvisbool(L->base+1)) | ||
46 | mode |= boolV(L->base+1) ? LUAJIT_MODE_ALLFUNC : LUAJIT_MODE_ALLSUBFUNC; | ||
47 | else | ||
48 | mode |= LUAJIT_MODE_FUNC; | ||
49 | } | ||
50 | if (luaJIT_setmode(L, idx, mode) != 1) { | ||
51 | err: | ||
52 | #if LJ_HASJIT | ||
53 | lj_err_arg(L, 1, LJ_ERR_NOLFUNC); | ||
54 | #else | ||
55 | lj_err_caller(L, LJ_ERR_NOJIT); | ||
56 | #endif | ||
57 | } | ||
58 | return 0; | ||
59 | } | ||
60 | |||
61 | LJLIB_CF(jit_on) | ||
62 | { | ||
63 | return setjitmode(L, LUAJIT_MODE_ON); | ||
64 | } | ||
65 | |||
66 | LJLIB_CF(jit_off) | ||
67 | { | ||
68 | return setjitmode(L, LUAJIT_MODE_OFF); | ||
69 | } | ||
70 | |||
71 | LJLIB_CF(jit_flush) | ||
72 | { | ||
73 | #if LJ_HASJIT | ||
74 | if (L->base < L->top && (tvisnum(L->base) || tvisstr(L->base))) { | ||
75 | int traceno = lj_lib_checkint(L, 1); | ||
76 | luaJIT_setmode(L, traceno, LUAJIT_MODE_FLUSH|LUAJIT_MODE_TRACE); | ||
77 | return 0; | ||
78 | } | ||
79 | #endif | ||
80 | return setjitmode(L, LUAJIT_MODE_FLUSH); | ||
81 | } | ||
82 | |||
83 | #if LJ_HASJIT | ||
84 | /* Push a string for every flag bit that is set. */ | ||
85 | static void flagbits_to_strings(lua_State *L, uint32_t flags, uint32_t base, | ||
86 | const char *str) | ||
87 | { | ||
88 | for (; *str; base <<= 1, str += 1+*str) | ||
89 | if (flags & base) | ||
90 | setstrV(L, L->top++, lj_str_new(L, str+1, *(uint8_t *)str)); | ||
91 | } | ||
92 | #endif | ||
93 | |||
94 | LJLIB_CF(jit_status) | ||
95 | { | ||
96 | #if LJ_HASJIT | ||
97 | jit_State *J = L2J(L); | ||
98 | L->top = L->base; | ||
99 | setboolV(L->top++, (J->flags & JIT_F_ON) ? 1 : 0); | ||
100 | flagbits_to_strings(L, J->flags, JIT_F_CPU_FIRST, JIT_F_CPUSTRING); | ||
101 | flagbits_to_strings(L, J->flags, JIT_F_OPT_FIRST, JIT_F_OPTSTRING); | ||
102 | return L->top - L->base; | ||
103 | #else | ||
104 | setboolV(L->top++, 0); | ||
105 | return 1; | ||
106 | #endif | ||
107 | } | ||
108 | |||
109 | LJLIB_CF(jit_attach) | ||
110 | { | ||
111 | #ifdef LUAJIT_DISABLE_VMEVENT | ||
112 | luaL_error(L, "vmevent API disabled"); | ||
113 | #else | ||
114 | GCfunc *fn = lj_lib_checkfunc(L, 1); | ||
115 | GCstr *s = lj_lib_optstr(L, 2); | ||
116 | luaL_findtable(L, LUA_REGISTRYINDEX, LJ_VMEVENTS_REGKEY, LJ_VMEVENTS_HSIZE); | ||
117 | if (s) { /* Attach to given event. */ | ||
118 | lua_pushvalue(L, 1); | ||
119 | lua_rawseti(L, -2, VMEVENT_HASHIDX(s->hash)); | ||
120 | G(L)->vmevmask = VMEVENT_NOCACHE; /* Invalidate cache. */ | ||
121 | } else { /* Detach if no event given. */ | ||
122 | setnilV(L->top++); | ||
123 | while (lua_next(L, -2)) { | ||
124 | L->top--; | ||
125 | if (tvisfunc(L->top) && funcV(L->top) == fn) { | ||
126 | setnilV(lj_tab_set(L, tabV(L->top-2), L->top-1)); | ||
127 | } | ||
128 | } | ||
129 | } | ||
130 | #endif | ||
131 | return 0; | ||
132 | } | ||
133 | |||
134 | LJLIB_PUSH(top-4) LJLIB_SET(arch) | ||
135 | LJLIB_PUSH(top-3) LJLIB_SET(version_num) | ||
136 | LJLIB_PUSH(top-2) LJLIB_SET(version) | ||
137 | |||
138 | #include "lj_libdef.h" | ||
139 | |||
140 | /* -- jit.util.* functions ------------------------------------------------ */ | ||
141 | |||
142 | #define LJLIB_MODULE_jit_util | ||
143 | |||
144 | /* -- Reflection API for Lua functions ------------------------------------ */ | ||
145 | |||
146 | /* Return prototype of first argument (Lua function or prototype object) */ | ||
147 | static GCproto *check_Lproto(lua_State *L, int nolua) | ||
148 | { | ||
149 | TValue *o = L->base; | ||
150 | if (L->top > o) { | ||
151 | if (tvisproto(o)) { | ||
152 | return protoV(o); | ||
153 | } else if (tvisfunc(o)) { | ||
154 | if (isluafunc(funcV(o))) | ||
155 | return funcproto(funcV(o)); | ||
156 | else if (nolua) | ||
157 | return NULL; | ||
158 | } | ||
159 | } | ||
160 | lj_err_argt(L, 1, LUA_TFUNCTION); | ||
161 | return NULL; /* unreachable */ | ||
162 | } | ||
163 | |||
164 | static void setintfield(lua_State *L, GCtab *t, const char *name, int32_t val) | ||
165 | { | ||
166 | setintV(lj_tab_setstr(L, t, lj_str_newz(L, name)), val); | ||
167 | } | ||
168 | |||
169 | /* local info = jit.util.funcinfo(func [,pc]) */ | ||
170 | LJLIB_CF(jit_util_funcinfo) | ||
171 | { | ||
172 | GCproto *pt = check_Lproto(L, 1); | ||
173 | if (pt) { | ||
174 | BCPos pc = (BCPos)lj_lib_optint(L, 2, 0); | ||
175 | GCtab *t; | ||
176 | lua_createtable(L, 0, 16); /* Increment hash size if fields are added. */ | ||
177 | t = tabV(L->top-1); | ||
178 | setintfield(L, t, "linedefined", pt->linedefined); | ||
179 | setintfield(L, t, "lastlinedefined", pt->lastlinedefined); | ||
180 | setintfield(L, t, "stackslots", pt->framesize); | ||
181 | setintfield(L, t, "params", pt->numparams); | ||
182 | setintfield(L, t, "bytecodes", (int32_t)pt->sizebc); | ||
183 | setintfield(L, t, "gcconsts", (int32_t)pt->sizekgc); | ||
184 | setintfield(L, t, "nconsts", (int32_t)pt->sizekn); | ||
185 | setintfield(L, t, "upvalues", (int32_t)pt->sizeuv); | ||
186 | if (pc > 0) | ||
187 | setintfield(L, t, "currentline", pt->lineinfo ? pt->lineinfo[pc-1] : 0); | ||
188 | lua_pushboolean(L, (pt->flags & PROTO_IS_VARARG)); | ||
189 | lua_setfield(L, -2, "isvararg"); | ||
190 | setstrV(L, L->top++, pt->chunkname); | ||
191 | lua_setfield(L, -2, "source"); | ||
192 | lj_err_pushloc(L, pt, pc); | ||
193 | lua_setfield(L, -2, "loc"); | ||
194 | } else { | ||
195 | GCfunc *fn = funcV(L->base); | ||
196 | GCtab *t; | ||
197 | lua_createtable(L, 0, 2); /* Increment hash size if fields are added. */ | ||
198 | t = tabV(L->top-1); | ||
199 | setintfield(L, t, "ffid", fn->c.ffid); | ||
200 | setintfield(L, t, "upvalues", fn->c.nupvalues); | ||
201 | } | ||
202 | return 1; | ||
203 | } | ||
204 | |||
205 | /* local ins, m = jit.util.funcbc(func, pc) */ | ||
206 | LJLIB_CF(jit_util_funcbc) | ||
207 | { | ||
208 | GCproto *pt = check_Lproto(L, 0); | ||
209 | BCPos pc = (BCPos)lj_lib_checkint(L, 2) - 1; | ||
210 | if (pc < pt->sizebc) { | ||
211 | BCIns ins = pt->bc[pc]; | ||
212 | BCOp op = bc_op(ins); | ||
213 | lua_assert(op < BC__MAX); | ||
214 | setintV(L->top, ins); | ||
215 | setintV(L->top+1, lj_bc_mode[op]); | ||
216 | L->top += 2; | ||
217 | return 2; | ||
218 | } | ||
219 | return 0; | ||
220 | } | ||
221 | |||
222 | /* local k = jit.util.funck(func, idx) */ | ||
223 | LJLIB_CF(jit_util_funck) | ||
224 | { | ||
225 | GCproto *pt = check_Lproto(L, 0); | ||
226 | MSize idx = (MSize)lj_lib_checkint(L, 2); | ||
227 | if ((int32_t)idx >= 0) { | ||
228 | if (idx < pt->sizekn) { | ||
229 | setnumV(L->top-1, pt->k.n[idx]); | ||
230 | return 1; | ||
231 | } | ||
232 | } else { | ||
233 | if (~idx < pt->sizekgc) { | ||
234 | GCobj *gc = gcref(pt->k.gc[idx]); | ||
235 | setgcV(L, L->top-1, &gc->gch, ~gc->gch.gct); | ||
236 | return 1; | ||
237 | } | ||
238 | } | ||
239 | return 0; | ||
240 | } | ||
241 | |||
242 | /* local name = jit.util.funcuvname(func, idx) */ | ||
243 | LJLIB_CF(jit_util_funcuvname) | ||
244 | { | ||
245 | GCproto *pt = check_Lproto(L, 0); | ||
246 | uint32_t idx = (uint32_t)lj_lib_checkint(L, 2); | ||
247 | if (idx < pt->sizeuvname) { | ||
248 | setstrV(L, L->top-1, pt->uvname[idx]); | ||
249 | return 1; | ||
250 | } | ||
251 | return 0; | ||
252 | } | ||
253 | |||
254 | /* -- Reflection API for traces ------------------------------------------- */ | ||
255 | |||
256 | #if LJ_HASJIT | ||
257 | |||
258 | /* Check trace argument. Must not throw for non-existent trace numbers. */ | ||
259 | static Trace *jit_checktrace(lua_State *L) | ||
260 | { | ||
261 | TraceNo tr = (TraceNo)lj_lib_checkint(L, 1); | ||
262 | jit_State *J = L2J(L); | ||
263 | if (tr > 0 && tr < J->sizetrace) | ||
264 | return J->trace[tr]; | ||
265 | return NULL; | ||
266 | } | ||
267 | |||
268 | /* local info = jit.util.traceinfo(tr) */ | ||
269 | LJLIB_CF(jit_util_traceinfo) | ||
270 | { | ||
271 | Trace *T = jit_checktrace(L); | ||
272 | if (T) { | ||
273 | GCtab *t; | ||
274 | lua_createtable(L, 0, 4); /* Increment hash size if fields are added. */ | ||
275 | t = tabV(L->top-1); | ||
276 | setintfield(L, t, "nins", (int32_t)T->nins - REF_BIAS - 1); | ||
277 | setintfield(L, t, "nk", REF_BIAS - (int32_t)T->nk); | ||
278 | setintfield(L, t, "link", T->link); | ||
279 | setintfield(L, t, "nexit", T->nsnap); | ||
280 | /* There are many more fields. Add them only when needed. */ | ||
281 | return 1; | ||
282 | } | ||
283 | return 0; | ||
284 | } | ||
285 | |||
286 | /* local m, ot, op1, op2, prev = jit.util.traceir(tr, idx) */ | ||
287 | LJLIB_CF(jit_util_traceir) | ||
288 | { | ||
289 | Trace *T = jit_checktrace(L); | ||
290 | IRRef ref = (IRRef)lj_lib_checkint(L, 2) + REF_BIAS; | ||
291 | if (T && ref >= REF_BIAS && ref < T->nins) { | ||
292 | IRIns *ir = &T->ir[ref]; | ||
293 | int32_t m = lj_ir_mode[ir->o]; | ||
294 | setintV(L->top-2, m); | ||
295 | setintV(L->top-1, ir->ot); | ||
296 | setintV(L->top++, (int32_t)ir->op1 - (irm_op1(m)==IRMref ? REF_BIAS : 0)); | ||
297 | setintV(L->top++, (int32_t)ir->op2 - (irm_op2(m)==IRMref ? REF_BIAS : 0)); | ||
298 | setintV(L->top++, ir->prev); | ||
299 | return 5; | ||
300 | } | ||
301 | return 0; | ||
302 | } | ||
303 | |||
304 | /* local k, t [, slot] = jit.util.tracek(tr, idx) */ | ||
305 | LJLIB_CF(jit_util_tracek) | ||
306 | { | ||
307 | Trace *T = jit_checktrace(L); | ||
308 | IRRef ref = (IRRef)lj_lib_checkint(L, 2) + REF_BIAS; | ||
309 | if (T && ref >= T->nk && ref < REF_BIAS) { | ||
310 | IRIns *ir = &T->ir[ref]; | ||
311 | int32_t slot = -1; | ||
312 | if (ir->o == IR_KSLOT) { | ||
313 | slot = ir->op2; | ||
314 | ir = &T->ir[ir->op1]; | ||
315 | } | ||
316 | lj_ir_kvalue(L, L->top-2, ir); | ||
317 | setintV(L->top-1, (int32_t)irt_type(ir->t)); | ||
318 | if (slot == -1) | ||
319 | return 2; | ||
320 | setintV(L->top++, slot); | ||
321 | return 3; | ||
322 | } | ||
323 | return 0; | ||
324 | } | ||
325 | |||
326 | /* local snap = jit.util.tracesnap(tr, sn) */ | ||
327 | LJLIB_CF(jit_util_tracesnap) | ||
328 | { | ||
329 | Trace *T = jit_checktrace(L); | ||
330 | SnapNo sn = (SnapNo)lj_lib_checkint(L, 2); | ||
331 | if (T && sn < T->nsnap) { | ||
332 | SnapShot *snap = &T->snap[sn]; | ||
333 | IRRef2 *map = &T->snapmap[snap->mapofs]; | ||
334 | BCReg s, nslots = snap->nslots; | ||
335 | GCtab *t; | ||
336 | lua_createtable(L, nslots ? (int)nslots : 1, 0); | ||
337 | t = tabV(L->top-1); | ||
338 | setintV(lj_tab_setint(L, t, 0), (int32_t)snap->ref - REF_BIAS); | ||
339 | for (s = 0; s < nslots; s++) { | ||
340 | TValue *o = lj_tab_setint(L, t, (int32_t)(s+1)); | ||
341 | IRRef ref = snap_ref(map[s]); | ||
342 | if (ref) | ||
343 | setintV(o, (int32_t)ref - REF_BIAS); | ||
344 | else | ||
345 | setboolV(o, 0); | ||
346 | } | ||
347 | return 1; | ||
348 | } | ||
349 | return 0; | ||
350 | } | ||
351 | |||
352 | /* local mcode, addr, loop = jit.util.tracemc(tr) */ | ||
353 | LJLIB_CF(jit_util_tracemc) | ||
354 | { | ||
355 | Trace *T = jit_checktrace(L); | ||
356 | if (T && T->mcode != NULL) { | ||
357 | setstrV(L, L->top-1, lj_str_new(L, (const char *)T->mcode, T->szmcode)); | ||
358 | setnumV(L->top++, cast_num((intptr_t)T->mcode)); | ||
359 | setintV(L->top++, T->mcloop); | ||
360 | return 3; | ||
361 | } | ||
362 | return 0; | ||
363 | } | ||
364 | |||
365 | /* local addr = jit.util.traceexitstub(idx) */ | ||
366 | LJLIB_CF(jit_util_traceexitstub) | ||
367 | { | ||
368 | ExitNo exitno = (ExitNo)lj_lib_checkint(L, 1); | ||
369 | jit_State *J = L2J(L); | ||
370 | if (exitno < EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR) { | ||
371 | setnumV(L->top-1, cast_num((intptr_t)exitstub_addr(J, exitno))); | ||
372 | return 1; | ||
373 | } | ||
374 | return 0; | ||
375 | } | ||
376 | |||
377 | #else | ||
378 | |||
379 | static int trace_nojit(lua_State *L) | ||
380 | { | ||
381 | UNUSED(L); | ||
382 | return 0; | ||
383 | } | ||
384 | #define lj_cf_jit_util_traceinfo trace_nojit | ||
385 | #define lj_cf_jit_util_traceir trace_nojit | ||
386 | #define lj_cf_jit_util_tracek trace_nojit | ||
387 | #define lj_cf_jit_util_tracesnap trace_nojit | ||
388 | #define lj_cf_jit_util_tracemc trace_nojit | ||
389 | #define lj_cf_jit_util_traceexitstub trace_nojit | ||
390 | |||
391 | #endif | ||
392 | |||
393 | #include "lj_libdef.h" | ||
394 | |||
395 | /* -- jit.opt module ------------------------------------------------------ */ | ||
396 | |||
397 | #define LJLIB_MODULE_jit_opt | ||
398 | |||
399 | #if LJ_HASJIT | ||
400 | /* Parse optimization level. */ | ||
401 | static int jitopt_level(jit_State *J, const char *str) | ||
402 | { | ||
403 | if (str[0] >= '0' && str[0] <= '9' && str[1] == '\0') { | ||
404 | uint32_t flags; | ||
405 | if (str[0] == '0') flags = JIT_F_OPT_0; | ||
406 | else if (str[0] == '1') flags = JIT_F_OPT_1; | ||
407 | else if (str[0] == '2') flags = JIT_F_OPT_2; | ||
408 | else flags = JIT_F_OPT_3; | ||
409 | J->flags = (J->flags & ~JIT_F_OPT_MASK) | flags; | ||
410 | return 1; /* Ok. */ | ||
411 | } | ||
412 | return 0; /* No match. */ | ||
413 | } | ||
414 | |||
415 | /* Parse optimization flag. */ | ||
416 | static int jitopt_flag(jit_State *J, const char *str) | ||
417 | { | ||
418 | const char *lst = JIT_F_OPTSTRING; | ||
419 | uint32_t opt; | ||
420 | int set = 1; | ||
421 | if (str[0] == '+') { | ||
422 | str++; | ||
423 | } else if (str[0] == '-') { | ||
424 | str++; | ||
425 | set = 0; | ||
426 | } else if (str[0] == 'n' && str[1] == 'o') { | ||
427 | str += str[2] == '-' ? 3 : 2; | ||
428 | set = 0; | ||
429 | } | ||
430 | for (opt = JIT_F_OPT_FIRST; ; opt <<= 1) { | ||
431 | size_t len = *(const uint8_t *)lst; | ||
432 | if (len == 0) | ||
433 | break; | ||
434 | if (strncmp(str, lst+1, len) == 0 && str[len] == '\0') { | ||
435 | if (set) J->flags |= opt; else J->flags &= ~opt; | ||
436 | return 1; /* Ok. */ | ||
437 | } | ||
438 | lst += 1+len; | ||
439 | } | ||
440 | return 0; /* No match. */ | ||
441 | } | ||
442 | |||
443 | /* Forward declaration. */ | ||
444 | static void jit_init_hotcount(jit_State *J); | ||
445 | |||
446 | /* Parse optimization parameter. */ | ||
447 | static int jitopt_param(jit_State *J, const char *str) | ||
448 | { | ||
449 | const char *lst = JIT_P_STRING; | ||
450 | int i; | ||
451 | for (i = 0; i < JIT_P__MAX; i++) { | ||
452 | size_t len = *(const uint8_t *)lst; | ||
453 | TValue tv; | ||
454 | lua_assert(len != 0); | ||
455 | if (strncmp(str, lst+1, len) == 0 && str[len] == '=' && | ||
456 | lj_str_numconv(&str[len+1], &tv)) { | ||
457 | J->param[i] = lj_num2int(tv.n); | ||
458 | if (i == JIT_P_hotloop) | ||
459 | jit_init_hotcount(J); | ||
460 | return 1; /* Ok. */ | ||
461 | } | ||
462 | lst += 1+len; | ||
463 | } | ||
464 | return 0; /* No match. */ | ||
465 | } | ||
466 | #endif | ||
467 | |||
468 | /* jit.opt.start(flags...) */ | ||
469 | LJLIB_CF(jit_opt_start) | ||
470 | { | ||
471 | #if LJ_HASJIT | ||
472 | jit_State *J = L2J(L); | ||
473 | int nargs = (int)(L->top - L->base); | ||
474 | if (nargs == 0) { | ||
475 | J->flags = (J->flags & ~JIT_F_OPT_MASK) | JIT_F_OPT_DEFAULT; | ||
476 | } else { | ||
477 | int i; | ||
478 | for (i = 1; i <= nargs; i++) { | ||
479 | const char *str = strdata(lj_lib_checkstr(L, i)); | ||
480 | if (!jitopt_level(J, str) && | ||
481 | !jitopt_flag(J, str) && | ||
482 | !jitopt_param(J, str)) | ||
483 | lj_err_callerv(L, LJ_ERR_JITOPT, str); | ||
484 | } | ||
485 | } | ||
486 | #else | ||
487 | lj_err_caller(L, LJ_ERR_NOJIT); | ||
488 | #endif | ||
489 | return 0; | ||
490 | } | ||
491 | |||
492 | #include "lj_libdef.h" | ||
493 | |||
494 | /* -- JIT compiler initialization ----------------------------------------- */ | ||
495 | |||
496 | #if LJ_HASJIT | ||
497 | /* Default values for JIT parameters. */ | ||
498 | static const int32_t jit_param_default[JIT_P__MAX+1] = { | ||
499 | #define JIT_PARAMINIT(len, name, value) (value), | ||
500 | JIT_PARAMDEF(JIT_PARAMINIT) | ||
501 | #undef JIT_PARAMINIT | ||
502 | 0 | ||
503 | }; | ||
504 | |||
505 | /* Initialize hotcount table. */ | ||
506 | static void jit_init_hotcount(jit_State *J) | ||
507 | { | ||
508 | HotCount start = (HotCount)J->param[JIT_P_hotloop]; | ||
509 | HotCount *hotcount = J2GG(J)->hotcount; | ||
510 | uint32_t i; | ||
511 | for (i = 0; i < HOTCOUNT_SIZE; i++) | ||
512 | hotcount[i] = start; | ||
513 | } | ||
514 | #endif | ||
515 | |||
516 | /* Arch-dependent CPU detection. */ | ||
517 | static uint32_t jit_cpudetect(lua_State *L) | ||
518 | { | ||
519 | uint32_t flags = 0; | ||
520 | #if LJ_TARGET_X86ORX64 | ||
521 | uint32_t vendor[4]; | ||
522 | uint32_t features[4]; | ||
523 | if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { | ||
524 | #if !LJ_HASJIT | ||
525 | #define JIT_F_CMOV 1 | ||
526 | #endif | ||
527 | flags |= ((features[3] >> 15)&1) * JIT_F_CMOV; | ||
528 | #if LJ_HASJIT | ||
529 | flags |= ((features[3] >> 26)&1) * JIT_F_SSE2; | ||
530 | flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; | ||
531 | if (vendor[2] == 0x6c65746e) { /* Intel. */ | ||
532 | if ((features[0] & 0x0ff00f00) == 0x00000f00) /* P4. */ | ||
533 | flags |= JIT_F_P4; /* Currently unused. */ | ||
534 | else if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */ | ||
535 | flags |= JIT_F_LEA_AGU; | ||
536 | } else if (vendor[2] == 0x444d4163) { /* AMD. */ | ||
537 | uint32_t fam = (features[0] & 0x0ff00f00); | ||
538 | if (fam == 0x00000f00) /* K8. */ | ||
539 | flags |= JIT_F_SPLIT_XMM; | ||
540 | if (fam >= 0x00000f00) /* K8, K10. */ | ||
541 | flags |= JIT_F_PREFER_IMUL; | ||
542 | } | ||
543 | #endif | ||
544 | } | ||
545 | #ifndef LUAJIT_CPU_NOCMOV | ||
546 | if (!(flags & JIT_F_CMOV)) | ||
547 | luaL_error(L, "Ancient CPU lacks CMOV support (recompile with -DLUAJIT_CPU_NOCMOV)"); | ||
548 | #endif | ||
549 | #if LJ_HASJIT | ||
550 | if (!(flags & JIT_F_SSE2)) | ||
551 | luaL_error(L, "Sorry, SSE2 CPU support required for this beta release"); | ||
552 | #endif | ||
553 | UNUSED(L); | ||
554 | #else | ||
555 | #error "Missing CPU detection for this architecture" | ||
556 | #endif | ||
557 | return flags; | ||
558 | } | ||
559 | |||
560 | /* Initialize JIT compiler. */ | ||
561 | static void jit_init(lua_State *L) | ||
562 | { | ||
563 | uint32_t flags = jit_cpudetect(L); | ||
564 | #if LJ_HASJIT | ||
565 | jit_State *J = L2J(L); | ||
566 | J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT; | ||
567 | memcpy(J->param, jit_param_default, sizeof(J->param)); | ||
568 | jit_init_hotcount(J); | ||
569 | lj_dispatch_update(G(L)); | ||
570 | #else | ||
571 | UNUSED(flags); | ||
572 | #endif | ||
573 | } | ||
574 | |||
575 | LUALIB_API int luaopen_jit(lua_State *L) | ||
576 | { | ||
577 | lua_pushliteral(L, LJ_ARCH_NAME); | ||
578 | lua_pushinteger(L, LUAJIT_VERSION_NUM); | ||
579 | lua_pushliteral(L, LUAJIT_VERSION); | ||
580 | LJ_LIB_REG(L, jit); | ||
581 | #ifndef LUAJIT_DISABLE_JITUTIL | ||
582 | LJ_LIB_REG_(L, "jit.util", jit_util); | ||
583 | #endif | ||
584 | LJ_LIB_REG_(L, "jit.opt", jit_opt); | ||
585 | L->top -= 2; | ||
586 | jit_init(L); | ||
587 | return 1; | ||
588 | } | ||
589 | |||
diff --git a/src/lib_math.c b/src/lib_math.c new file mode 100644 index 00000000..ec8b0c2b --- /dev/null +++ b/src/lib_math.c | |||
@@ -0,0 +1,188 @@ | |||
1 | /* | ||
2 | ** Math library. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #include <math.h> | ||
7 | |||
8 | #define lib_math_c | ||
9 | #define LUA_LIB | ||
10 | |||
11 | #include "lua.h" | ||
12 | #include "lauxlib.h" | ||
13 | #include "lualib.h" | ||
14 | |||
15 | #include "lj_obj.h" | ||
16 | #include "lj_lib.h" | ||
17 | |||
18 | /* ------------------------------------------------------------------------ */ | ||
19 | |||
20 | #define LJLIB_MODULE_math | ||
21 | |||
22 | LJLIB_ASM(math_abs) LJLIB_REC(.) | ||
23 | { | ||
24 | lj_lib_checknum(L, 1); | ||
25 | return FFH_RETRY; | ||
26 | } | ||
27 | LJLIB_ASM_(math_floor) LJLIB_REC(math_round IRFPM_FLOOR) | ||
28 | LJLIB_ASM_(math_ceil) LJLIB_REC(math_round IRFPM_CEIL) | ||
29 | LJLIB_ASM_(math_sqrt) LJLIB_REC(math_unary IRFPM_SQRT) | ||
30 | LJLIB_ASM_(math_log) LJLIB_REC(math_unary IRFPM_LOG) | ||
31 | LJLIB_ASM_(math_log10) LJLIB_REC(math_unary IRFPM_LOG10) | ||
32 | LJLIB_ASM_(math_exp) LJLIB_REC(math_unary IRFPM_EXP) | ||
33 | LJLIB_ASM_(math_sin) LJLIB_REC(math_unary IRFPM_SIN) | ||
34 | LJLIB_ASM_(math_cos) LJLIB_REC(math_unary IRFPM_COS) | ||
35 | LJLIB_ASM_(math_tan) LJLIB_REC(math_unary IRFPM_TAN) | ||
36 | LJLIB_ASM_(math_asin) LJLIB_REC(math_atrig FF_math_asin) | ||
37 | LJLIB_ASM_(math_acos) LJLIB_REC(math_atrig FF_math_acos) | ||
38 | LJLIB_ASM_(math_atan) LJLIB_REC(math_atrig FF_math_atan) | ||
39 | LJLIB_ASM_(math_sinh) | ||
40 | LJLIB_ASM_(math_cosh) | ||
41 | LJLIB_ASM_(math_tanh) | ||
42 | LJLIB_ASM_(math_frexp) | ||
43 | LJLIB_ASM_(math_modf) LJLIB_REC(.) | ||
44 | |||
45 | LJLIB_PUSH(57.29577951308232) | ||
46 | LJLIB_ASM_(math_deg) LJLIB_REC(math_degrad) | ||
47 | |||
48 | LJLIB_PUSH(0.017453292519943295) | ||
49 | LJLIB_ASM_(math_rad) LJLIB_REC(math_degrad) | ||
50 | |||
51 | LJLIB_ASM(math_atan2) LJLIB_REC(math_binary IR_ATAN2) | ||
52 | { | ||
53 | lj_lib_checknum(L, 1); | ||
54 | lj_lib_checknum(L, 2); | ||
55 | return FFH_RETRY; | ||
56 | } | ||
57 | LJLIB_ASM_(math_ldexp) LJLIB_REC(math_binary IR_LDEXP) | ||
58 | LJLIB_ASM_(math_pow) LJLIB_REC(.) | ||
59 | LJLIB_ASM_(math_fmod) | ||
60 | |||
61 | LJLIB_ASM(math_min) LJLIB_REC(math_minmax IR_MIN) | ||
62 | { | ||
63 | int i = 0; | ||
64 | do { lj_lib_checknum(L, ++i); } while (L->base+i < L->top); | ||
65 | return FFH_RETRY; | ||
66 | } | ||
67 | LJLIB_ASM_(math_max) LJLIB_REC(math_minmax IR_MAX) | ||
68 | |||
69 | LJLIB_PUSH(3.14159265358979323846) LJLIB_SET(pi) | ||
70 | LJLIB_PUSH(1e310) LJLIB_SET(huge) | ||
71 | |||
72 | #ifdef __MACH__ | ||
73 | LJ_FUNCA double lj_wrapper_sinh(double x) { return sinh(x); } | ||
74 | LJ_FUNCA double lj_wrapper_cosh(double x) { return cosh(x); } | ||
75 | LJ_FUNCA double lj_wrapper_tanh(double x) { return tanh(x); } | ||
76 | #endif | ||
77 | |||
78 | /* ------------------------------------------------------------------------ */ | ||
79 | |||
80 | /* This implements a Tausworthe PRNG with period 2^223. Based on: | ||
81 | ** Tables of maximally-equidistributed combined LFSR generators, | ||
82 | ** Pierre L'Ecuyer, 1991, table 3, 1st entry. | ||
83 | ** Full-period ME-CF generator with L=64, J=4, k=223, N1=49. | ||
84 | */ | ||
85 | |||
86 | /* PRNG state. */ | ||
87 | typedef struct TW223State { | ||
88 | uint64_t gen[4]; /* State of the 4 LFSR generators. */ | ||
89 | int valid; /* State is valid. */ | ||
90 | } TW223State; | ||
91 | |||
92 | /* Union needed for bit-pattern conversion between uint64_t and double. */ | ||
93 | typedef union { uint64_t u64; double d; } U64double; | ||
94 | |||
95 | /* Update generator i and compute a running xor of all states. */ | ||
96 | #define TW223_GEN(i, k, q, s) \ | ||
97 | z = tw->gen[i]; \ | ||
98 | z = (((z<<q)^z) >> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 << (64-k)))<<s); \ | ||
99 | r ^= z; tw->gen[i] = z; | ||
100 | |||
101 | /* PRNG step function. Returns a double in the range 0.0 <= d < 1.0. */ | ||
102 | static double tw223_step(TW223State *tw) | ||
103 | { | ||
104 | uint64_t z, r = 0; | ||
105 | U64double u; | ||
106 | TW223_GEN(0, 63, 31, 18) | ||
107 | TW223_GEN(1, 58, 19, 28) | ||
108 | TW223_GEN(2, 55, 24, 7) | ||
109 | TW223_GEN(3, 47, 21, 8) | ||
110 | u.u64 = (r & (((uint64_t)1 << 52)-1)) | ((uint64_t)0x3ff << 52); | ||
111 | #if defined(__GNUC__) && LJ_TARGET_X86 && __pic__ | ||
112 | /* Compensate for unbelievable GCC pessimization. */ | ||
113 | { | ||
114 | volatile U64double u1; | ||
115 | u1.u64 = (uint64_t)0x3f8 << 52; | ||
116 | return u.d - u1.d; | ||
117 | } | ||
118 | #else | ||
119 | return u.d - 1.0; | ||
120 | #endif | ||
121 | } | ||
122 | |||
123 | /* PRNG initialization function. */ | ||
124 | static void tw223_init(TW223State *tw, double d) | ||
125 | { | ||
126 | uint32_t r = 0x11090601; /* 64-k[i] as four 8 bit constants. */ | ||
127 | int i; | ||
128 | for (i = 0; i < 4; i++) { | ||
129 | U64double u; | ||
130 | uint32_t m = 1u << (r&255); | ||
131 | r >>= 8; | ||
132 | u.d = d = d * 3.14159265358979323846 + 2.7182818284590452354; | ||
133 | if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of gen[i] are non-zero. */ | ||
134 | tw->gen[i] = u.u64; | ||
135 | } | ||
136 | tw->valid = 1; | ||
137 | for (i = 0; i < 10; i++) | ||
138 | tw223_step(tw); | ||
139 | } | ||
140 | |||
141 | /* PRNG extract function. */ | ||
142 | LJLIB_PUSH(top-2) /* Upvalue holds userdata with TW223State. */ | ||
143 | LJLIB_CF(math_random) | ||
144 | { | ||
145 | int n = cast_int(L->top - L->base); | ||
146 | TW223State *tw = (TW223State *)(uddata(udataV(lj_lib_upvalue(L, 1)))); | ||
147 | double d; | ||
148 | if (LJ_UNLIKELY(!tw->valid)) tw223_init(tw, 0.0); | ||
149 | d = tw223_step(tw); | ||
150 | if (n > 0) { | ||
151 | double r1 = lj_lib_checknum(L, 1); | ||
152 | if (n == 1) { | ||
153 | d = floor(d*r1) + 1.0; /* d is an int in range [1, r1] */ | ||
154 | } else { | ||
155 | double r2 = lj_lib_checknum(L, 2); | ||
156 | d = floor(d*(r2-r1+1.0)) + r1; /* d is an int in range [r1, r2] */ | ||
157 | } | ||
158 | } /* else: d is a double in range [0, 1] */ | ||
159 | setnumV(L->top++, d); | ||
160 | return 1; | ||
161 | } | ||
162 | |||
163 | /* PRNG seed function. */ | ||
164 | LJLIB_PUSH(top-2) /* Upvalue holds userdata with TW223State. */ | ||
165 | LJLIB_CF(math_randomseed) | ||
166 | { | ||
167 | TW223State *tw = (TW223State *)(uddata(udataV(lj_lib_upvalue(L, 1)))); | ||
168 | tw223_init(tw, lj_lib_checknum(L, 1)); | ||
169 | return 0; | ||
170 | } | ||
171 | |||
172 | /* ------------------------------------------------------------------------ */ | ||
173 | |||
174 | #include "lj_libdef.h" | ||
175 | |||
176 | LUALIB_API int luaopen_math(lua_State *L) | ||
177 | { | ||
178 | TW223State *tw; | ||
179 | tw = (TW223State *)lua_newuserdata(L, sizeof(TW223State)); | ||
180 | tw->valid = 0; /* Use lazy initialization to save some time on startup. */ | ||
181 | LJ_LIB_REG(L, math); | ||
182 | #if defined(LUA_COMPAT_MOD) | ||
183 | lua_getfield(L, -1, "fmod"); | ||
184 | lua_setfield(L, -2, "mod"); | ||
185 | #endif | ||
186 | return 1; | ||
187 | } | ||
188 | |||
diff --git a/src/lib_os.c b/src/lib_os.c new file mode 100644 index 00000000..bee7216a --- /dev/null +++ b/src/lib_os.c | |||
@@ -0,0 +1,249 @@ | |||
1 | /* | ||
2 | ** OS library. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** | ||
5 | ** Major portions taken verbatim or adapted from the Lua interpreter. | ||
6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h | ||
7 | */ | ||
8 | |||
9 | #include <errno.h> | ||
10 | #include <locale.h> | ||
11 | #include <time.h> | ||
12 | |||
13 | #define lib_os_c | ||
14 | #define LUA_LIB | ||
15 | |||
16 | #include "lua.h" | ||
17 | #include "lauxlib.h" | ||
18 | #include "lualib.h" | ||
19 | |||
20 | #ifdef LUA_USE_POSIX | ||
21 | #include <unistd.h> | ||
22 | #else | ||
23 | #include <stdio.h> | ||
24 | #endif | ||
25 | |||
26 | #include "lj_obj.h" | ||
27 | #include "lj_err.h" | ||
28 | #include "lj_lib.h" | ||
29 | |||
30 | /* ------------------------------------------------------------------------ */ | ||
31 | |||
32 | #define LJLIB_MODULE_os | ||
33 | |||
34 | static int os_pushresult(lua_State *L, int i, const char *filename) | ||
35 | { | ||
36 | int en = errno; /* calls to Lua API may change this value */ | ||
37 | if (i) { | ||
38 | setboolV(L->top-1, 1); | ||
39 | return 1; | ||
40 | } else { | ||
41 | setnilV(L->top-1); | ||
42 | lua_pushfstring(L, "%s: %s", filename, strerror(en)); | ||
43 | lua_pushinteger(L, en); | ||
44 | return 3; | ||
45 | } | ||
46 | } | ||
47 | |||
48 | LJLIB_CF(os_execute) | ||
49 | { | ||
50 | lua_pushinteger(L, system(luaL_optstring(L, 1, NULL))); | ||
51 | return 1; | ||
52 | } | ||
53 | |||
54 | LJLIB_CF(os_remove) | ||
55 | { | ||
56 | const char *filename = luaL_checkstring(L, 1); | ||
57 | return os_pushresult(L, remove(filename) == 0, filename); | ||
58 | } | ||
59 | |||
60 | LJLIB_CF(os_rename) | ||
61 | { | ||
62 | const char *fromname = luaL_checkstring(L, 1); | ||
63 | const char *toname = luaL_checkstring(L, 2); | ||
64 | return os_pushresult(L, rename(fromname, toname) == 0, fromname); | ||
65 | } | ||
66 | |||
67 | LJLIB_CF(os_tmpname) | ||
68 | { | ||
69 | #ifdef LUA_USE_POSIX | ||
70 | char buf[15+1]; | ||
71 | int fp; | ||
72 | strcpy(buf, "/tmp/lua_XXXXXX"); | ||
73 | fp = mkstemp(buf); | ||
74 | if (fp != -1) | ||
75 | close(fp); | ||
76 | else | ||
77 | lj_err_caller(L, LJ_ERR_OSUNIQF); | ||
78 | #else | ||
79 | char buf[L_tmpnam]; | ||
80 | if (tmpnam(buf) == NULL) | ||
81 | lj_err_caller(L, LJ_ERR_OSUNIQF); | ||
82 | #endif | ||
83 | lua_pushstring(L, buf); | ||
84 | return 1; | ||
85 | } | ||
86 | |||
87 | LJLIB_CF(os_getenv) | ||
88 | { | ||
89 | lua_pushstring(L, getenv(luaL_checkstring(L, 1))); /* if NULL push nil */ | ||
90 | return 1; | ||
91 | } | ||
92 | |||
93 | LJLIB_CF(os_exit) | ||
94 | { | ||
95 | exit(lj_lib_optint(L, 1, EXIT_SUCCESS)); | ||
96 | return 0; /* to avoid warnings */ | ||
97 | } | ||
98 | |||
99 | LJLIB_CF(os_clock) | ||
100 | { | ||
101 | setnumV(L->top++, ((lua_Number)clock())*(1.0/(lua_Number)CLOCKS_PER_SEC)); | ||
102 | return 1; | ||
103 | } | ||
104 | |||
105 | /* ------------------------------------------------------------------------ */ | ||
106 | |||
107 | static void setfield(lua_State *L, const char *key, int value) | ||
108 | { | ||
109 | lua_pushinteger(L, value); | ||
110 | lua_setfield(L, -2, key); | ||
111 | } | ||
112 | |||
113 | static void setboolfield(lua_State *L, const char *key, int value) | ||
114 | { | ||
115 | if (value < 0) /* undefined? */ | ||
116 | return; /* does not set field */ | ||
117 | lua_pushboolean(L, value); | ||
118 | lua_setfield(L, -2, key); | ||
119 | } | ||
120 | |||
121 | static int getboolfield(lua_State *L, const char *key) | ||
122 | { | ||
123 | int res; | ||
124 | lua_getfield(L, -1, key); | ||
125 | res = lua_isnil(L, -1) ? -1 : lua_toboolean(L, -1); | ||
126 | lua_pop(L, 1); | ||
127 | return res; | ||
128 | } | ||
129 | |||
130 | static int getfield(lua_State *L, const char *key, int d) | ||
131 | { | ||
132 | int res; | ||
133 | lua_getfield(L, -1, key); | ||
134 | if (lua_isnumber(L, -1)) { | ||
135 | res = (int)lua_tointeger(L, -1); | ||
136 | } else { | ||
137 | if (d < 0) | ||
138 | lj_err_callerv(L, LJ_ERR_OSDATEF, key); | ||
139 | res = d; | ||
140 | } | ||
141 | lua_pop(L, 1); | ||
142 | return res; | ||
143 | } | ||
144 | |||
145 | LJLIB_CF(os_date) | ||
146 | { | ||
147 | const char *s = luaL_optstring(L, 1, "%c"); | ||
148 | time_t t = luaL_opt(L, (time_t)luaL_checknumber, 2, time(NULL)); | ||
149 | struct tm *stm; | ||
150 | if (*s == '!') { /* UTC? */ | ||
151 | stm = gmtime(&t); | ||
152 | s++; /* skip `!' */ | ||
153 | } else { | ||
154 | stm = localtime(&t); | ||
155 | } | ||
156 | if (stm == NULL) { /* invalid date? */ | ||
157 | setnilV(L->top-1); | ||
158 | } else if (strcmp(s, "*t") == 0) { | ||
159 | lua_createtable(L, 0, 9); /* 9 = number of fields */ | ||
160 | setfield(L, "sec", stm->tm_sec); | ||
161 | setfield(L, "min", stm->tm_min); | ||
162 | setfield(L, "hour", stm->tm_hour); | ||
163 | setfield(L, "day", stm->tm_mday); | ||
164 | setfield(L, "month", stm->tm_mon+1); | ||
165 | setfield(L, "year", stm->tm_year+1900); | ||
166 | setfield(L, "wday", stm->tm_wday+1); | ||
167 | setfield(L, "yday", stm->tm_yday+1); | ||
168 | setboolfield(L, "isdst", stm->tm_isdst); | ||
169 | } else { | ||
170 | char cc[3]; | ||
171 | luaL_Buffer b; | ||
172 | cc[0] = '%'; cc[2] = '\0'; | ||
173 | luaL_buffinit(L, &b); | ||
174 | for (; *s; s++) { | ||
175 | if (*s != '%' || *(s + 1) == '\0') { /* no conversion specifier? */ | ||
176 | luaL_addchar(&b, *s); | ||
177 | } else { | ||
178 | size_t reslen; | ||
179 | char buff[200]; /* should be big enough for any conversion result */ | ||
180 | cc[1] = *(++s); | ||
181 | reslen = strftime(buff, sizeof(buff), cc, stm); | ||
182 | luaL_addlstring(&b, buff, reslen); | ||
183 | } | ||
184 | } | ||
185 | luaL_pushresult(&b); | ||
186 | } | ||
187 | return 1; | ||
188 | } | ||
189 | |||
190 | LJLIB_CF(os_time) | ||
191 | { | ||
192 | time_t t; | ||
193 | if (lua_isnoneornil(L, 1)) { /* called without args? */ | ||
194 | t = time(NULL); /* get current time */ | ||
195 | } else { | ||
196 | struct tm ts; | ||
197 | luaL_checktype(L, 1, LUA_TTABLE); | ||
198 | lua_settop(L, 1); /* make sure table is at the top */ | ||
199 | ts.tm_sec = getfield(L, "sec", 0); | ||
200 | ts.tm_min = getfield(L, "min", 0); | ||
201 | ts.tm_hour = getfield(L, "hour", 12); | ||
202 | ts.tm_mday = getfield(L, "day", -1); | ||
203 | ts.tm_mon = getfield(L, "month", -1) - 1; | ||
204 | ts.tm_year = getfield(L, "year", -1) - 1900; | ||
205 | ts.tm_isdst = getboolfield(L, "isdst"); | ||
206 | t = mktime(&ts); | ||
207 | } | ||
208 | if (t == (time_t)(-1)) | ||
209 | lua_pushnil(L); | ||
210 | else | ||
211 | lua_pushnumber(L, (lua_Number)t); | ||
212 | return 1; | ||
213 | } | ||
214 | |||
215 | LJLIB_CF(os_difftime) | ||
216 | { | ||
217 | lua_pushnumber(L, difftime((time_t)(luaL_checknumber(L, 1)), | ||
218 | (time_t)(luaL_optnumber(L, 2, (lua_Number)0)))); | ||
219 | return 1; | ||
220 | } | ||
221 | |||
222 | /* ------------------------------------------------------------------------ */ | ||
223 | |||
224 | LJLIB_CF(os_setlocale) | ||
225 | { | ||
226 | GCstr *s = lj_lib_optstr(L, 1); | ||
227 | const char *str = s ? strdata(s) : NULL; | ||
228 | int opt = lj_lib_checkopt(L, 2, 6, | ||
229 | "\5ctype\7numeric\4time\7collate\10monetary\1\377\3all"); | ||
230 | if (opt == 0) opt = LC_CTYPE; | ||
231 | else if (opt == 1) opt = LC_NUMERIC; | ||
232 | else if (opt == 2) opt = LC_TIME; | ||
233 | else if (opt == 3) opt = LC_COLLATE; | ||
234 | else if (opt == 4) opt = LC_MONETARY; | ||
235 | else if (opt == 6) opt = LC_ALL; | ||
236 | lua_pushstring(L, setlocale(opt, str)); | ||
237 | return 1; | ||
238 | } | ||
239 | |||
240 | /* ------------------------------------------------------------------------ */ | ||
241 | |||
242 | #include "lj_libdef.h" | ||
243 | |||
244 | LUALIB_API int luaopen_os(lua_State *L) | ||
245 | { | ||
246 | LJ_LIB_REG(L, os); | ||
247 | return 1; | ||
248 | } | ||
249 | |||
diff --git a/src/lib_package.c b/src/lib_package.c new file mode 100644 index 00000000..69fa1db9 --- /dev/null +++ b/src/lib_package.c | |||
@@ -0,0 +1,508 @@ | |||
1 | /* | ||
2 | ** Package library. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** | ||
5 | ** Major portions taken verbatim or adapted from the Lua interpreter. | ||
6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h | ||
7 | */ | ||
8 | |||
9 | #define lib_package_c | ||
10 | #define LUA_LIB | ||
11 | |||
12 | #include "lua.h" | ||
13 | #include "lauxlib.h" | ||
14 | #include "lualib.h" | ||
15 | |||
16 | #include "lj_obj.h" | ||
17 | #include "lj_err.h" | ||
18 | #include "lj_lib.h" | ||
19 | |||
20 | /* ------------------------------------------------------------------------ */ | ||
21 | |||
22 | /* Error codes for ll_loadfunc. */ | ||
23 | #define PACKAGE_ERR_LIB 1 | ||
24 | #define PACKAGE_ERR_FUNC 2 | ||
25 | |||
26 | /* Redefined in platform specific part. */ | ||
27 | #define PACKAGE_LIB_FAIL "open" | ||
28 | #define setprogdir(L) ((void)0) | ||
29 | |||
30 | #if defined(LUA_DL_DLOPEN) | ||
31 | |||
32 | #include <dlfcn.h> | ||
33 | |||
34 | static void ll_unloadlib(void *lib) | ||
35 | { | ||
36 | dlclose(lib); | ||
37 | } | ||
38 | |||
39 | static void *ll_load(lua_State *L, const char *path) | ||
40 | { | ||
41 | void *lib = dlopen(path, RTLD_NOW); | ||
42 | if (lib == NULL) lua_pushstring(L, dlerror()); | ||
43 | return lib; | ||
44 | } | ||
45 | |||
46 | static lua_CFunction ll_sym(lua_State *L, void *lib, const char *sym) | ||
47 | { | ||
48 | lua_CFunction f = (lua_CFunction)dlsym(lib, sym); | ||
49 | if (f == NULL) lua_pushstring(L, dlerror()); | ||
50 | return f; | ||
51 | } | ||
52 | |||
53 | #elif defined(LUA_DL_DLL) | ||
54 | |||
55 | #define WIN32_LEAN_AND_MEAN | ||
56 | #include <windows.h> | ||
57 | |||
58 | #undef setprogdir | ||
59 | |||
60 | static void setprogdir(lua_State *L) | ||
61 | { | ||
62 | char buff[MAX_PATH + 1]; | ||
63 | char *lb; | ||
64 | DWORD nsize = sizeof(buff); | ||
65 | DWORD n = GetModuleFileNameA(NULL, buff, nsize); | ||
66 | if (n == 0 || n == nsize || (lb = strrchr(buff, '\\')) == NULL) { | ||
67 | luaL_error(L, "unable to get ModuleFileName"); | ||
68 | } else { | ||
69 | *lb = '\0'; | ||
70 | luaL_gsub(L, lua_tostring(L, -1), LUA_EXECDIR, buff); | ||
71 | lua_remove(L, -2); /* remove original string */ | ||
72 | } | ||
73 | } | ||
74 | |||
75 | static void pusherror(lua_State *L) | ||
76 | { | ||
77 | DWORD error = GetLastError(); | ||
78 | char buffer[128]; | ||
79 | if (FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM, | ||
80 | NULL, error, 0, buffer, sizeof(buffer), NULL)) | ||
81 | lua_pushstring(L, buffer); | ||
82 | else | ||
83 | lua_pushfstring(L, "system error %d\n", error); | ||
84 | } | ||
85 | |||
86 | static void ll_unloadlib(void *lib) | ||
87 | { | ||
88 | FreeLibrary((HINSTANCE)lib); | ||
89 | } | ||
90 | |||
91 | static void *ll_load(lua_State *L, const char *path) | ||
92 | { | ||
93 | HINSTANCE lib = LoadLibraryA(path); | ||
94 | if (lib == NULL) pusherror(L); | ||
95 | return lib; | ||
96 | } | ||
97 | |||
98 | static lua_CFunction ll_sym(lua_State *L, void *lib, const char *sym) | ||
99 | { | ||
100 | lua_CFunction f = (lua_CFunction)GetProcAddress((HINSTANCE)lib, sym); | ||
101 | if (f == NULL) pusherror(L); | ||
102 | return f; | ||
103 | } | ||
104 | |||
105 | #else | ||
106 | |||
107 | #undef PACKAGE_LIB_FAIL | ||
108 | #define PACKAGE_LIB_FAIL "absent" | ||
109 | |||
110 | #define DLMSG "dynamic libraries not enabled; check your Lua installation" | ||
111 | |||
112 | static void ll_unloadlib(void *lib) | ||
113 | { | ||
114 | (void)lib; | ||
115 | } | ||
116 | |||
117 | static void *ll_load(lua_State *L, const char *path) | ||
118 | { | ||
119 | (void)path; | ||
120 | lua_pushliteral(L, DLMSG); | ||
121 | return NULL; | ||
122 | } | ||
123 | |||
124 | static lua_CFunction ll_sym(lua_State *L, void *lib, const char *sym) | ||
125 | { | ||
126 | (void)lib; (void)sym; | ||
127 | lua_pushliteral(L, DLMSG); | ||
128 | return NULL; | ||
129 | } | ||
130 | #endif | ||
131 | |||
132 | /* ------------------------------------------------------------------------ */ | ||
133 | |||
134 | static void **ll_register(lua_State *L, const char *path) | ||
135 | { | ||
136 | void **plib; | ||
137 | lua_pushfstring(L, "LOADLIB: %s", path); | ||
138 | lua_gettable(L, LUA_REGISTRYINDEX); /* check library in registry? */ | ||
139 | if (!lua_isnil(L, -1)) { /* is there an entry? */ | ||
140 | plib = (void **)lua_touserdata(L, -1); | ||
141 | } else { /* no entry yet; create one */ | ||
142 | lua_pop(L, 1); | ||
143 | plib = (void **)lua_newuserdata(L, sizeof(void *)); | ||
144 | *plib = NULL; | ||
145 | luaL_getmetatable(L, "_LOADLIB"); | ||
146 | lua_setmetatable(L, -2); | ||
147 | lua_pushfstring(L, "LOADLIB: %s", path); | ||
148 | lua_pushvalue(L, -2); | ||
149 | lua_settable(L, LUA_REGISTRYINDEX); | ||
150 | } | ||
151 | return plib; | ||
152 | } | ||
153 | |||
154 | static int ll_loadfunc(lua_State *L, const char *path, const char *sym) | ||
155 | { | ||
156 | void **reg = ll_register(L, path); | ||
157 | if (*reg == NULL) *reg = ll_load(L, path); | ||
158 | if (*reg == NULL) { | ||
159 | return PACKAGE_ERR_LIB; /* unable to load library */ | ||
160 | } else { | ||
161 | lua_CFunction f = ll_sym(L, *reg, sym); | ||
162 | if (f == NULL) | ||
163 | return PACKAGE_ERR_FUNC; /* unable to find function */ | ||
164 | lua_pushcfunction(L, f); | ||
165 | return 0; /* return function */ | ||
166 | } | ||
167 | } | ||
168 | |||
169 | static int lj_cf_package_loadlib(lua_State *L) | ||
170 | { | ||
171 | const char *path = luaL_checkstring(L, 1); | ||
172 | const char *init = luaL_checkstring(L, 2); | ||
173 | int stat = ll_loadfunc(L, path, init); | ||
174 | if (stat == 0) { /* no errors? */ | ||
175 | return 1; /* return the loaded function */ | ||
176 | } else { /* error; error message is on stack top */ | ||
177 | lua_pushnil(L); | ||
178 | lua_insert(L, -2); | ||
179 | lua_pushstring(L, (stat == PACKAGE_ERR_LIB) ? PACKAGE_LIB_FAIL : "init"); | ||
180 | return 3; /* return nil, error message, and where */ | ||
181 | } | ||
182 | } | ||
183 | |||
184 | static int lj_cf_package_unloadlib(lua_State *L) | ||
185 | { | ||
186 | void **lib = (void **)luaL_checkudata(L, 1, "_LOADLIB"); | ||
187 | if (*lib) ll_unloadlib(*lib); | ||
188 | *lib = NULL; /* mark library as closed */ | ||
189 | return 0; | ||
190 | } | ||
191 | |||
192 | /* ------------------------------------------------------------------------ */ | ||
193 | |||
194 | static int readable(const char *filename) | ||
195 | { | ||
196 | FILE *f = fopen(filename, "r"); /* try to open file */ | ||
197 | if (f == NULL) return 0; /* open failed */ | ||
198 | fclose(f); | ||
199 | return 1; | ||
200 | } | ||
201 | |||
202 | static const char *pushnexttemplate(lua_State *L, const char *path) | ||
203 | { | ||
204 | const char *l; | ||
205 | while (*path == *LUA_PATHSEP) path++; /* skip separators */ | ||
206 | if (*path == '\0') return NULL; /* no more templates */ | ||
207 | l = strchr(path, *LUA_PATHSEP); /* find next separator */ | ||
208 | if (l == NULL) l = path + strlen(path); | ||
209 | lua_pushlstring(L, path, (size_t)(l - path)); /* template */ | ||
210 | return l; | ||
211 | } | ||
212 | |||
213 | static const char *findfile(lua_State *L, const char *name, | ||
214 | const char *pname) | ||
215 | { | ||
216 | const char *path; | ||
217 | name = luaL_gsub(L, name, ".", LUA_DIRSEP); | ||
218 | lua_getfield(L, LUA_ENVIRONINDEX, pname); | ||
219 | path = lua_tostring(L, -1); | ||
220 | if (path == NULL) | ||
221 | luaL_error(L, LUA_QL("package.%s") " must be a string", pname); | ||
222 | lua_pushliteral(L, ""); /* error accumulator */ | ||
223 | while ((path = pushnexttemplate(L, path)) != NULL) { | ||
224 | const char *filename; | ||
225 | filename = luaL_gsub(L, lua_tostring(L, -1), LUA_PATH_MARK, name); | ||
226 | lua_remove(L, -2); /* remove path template */ | ||
227 | if (readable(filename)) /* does file exist and is readable? */ | ||
228 | return filename; /* return that file name */ | ||
229 | lua_pushfstring(L, "\n\tno file " LUA_QS, filename); | ||
230 | lua_remove(L, -2); /* remove file name */ | ||
231 | lua_concat(L, 2); /* add entry to possible error message */ | ||
232 | } | ||
233 | return NULL; /* not found */ | ||
234 | } | ||
235 | |||
236 | static void loaderror(lua_State *L, const char *filename) | ||
237 | { | ||
238 | luaL_error(L, "error loading module " LUA_QS " from file " LUA_QS ":\n\t%s", | ||
239 | lua_tostring(L, 1), filename, lua_tostring(L, -1)); | ||
240 | } | ||
241 | |||
242 | static int lj_cf_package_loader_lua(lua_State *L) | ||
243 | { | ||
244 | const char *filename; | ||
245 | const char *name = luaL_checkstring(L, 1); | ||
246 | filename = findfile(L, name, "path"); | ||
247 | if (filename == NULL) return 1; /* library not found in this path */ | ||
248 | if (luaL_loadfile(L, filename) != 0) | ||
249 | loaderror(L, filename); | ||
250 | return 1; /* library loaded successfully */ | ||
251 | } | ||
252 | |||
253 | static const char *mkfuncname(lua_State *L, const char *modname) | ||
254 | { | ||
255 | const char *funcname; | ||
256 | const char *mark = strchr(modname, *LUA_IGMARK); | ||
257 | if (mark) modname = mark + 1; | ||
258 | funcname = luaL_gsub(L, modname, ".", "_"); | ||
259 | funcname = lua_pushfstring(L, "luaopen_%s", funcname); | ||
260 | lua_remove(L, -2); /* remove 'gsub' result */ | ||
261 | return funcname; | ||
262 | } | ||
263 | |||
264 | static int lj_cf_package_loader_c(lua_State *L) | ||
265 | { | ||
266 | const char *funcname; | ||
267 | const char *name = luaL_checkstring(L, 1); | ||
268 | const char *filename = findfile(L, name, "cpath"); | ||
269 | if (filename == NULL) return 1; /* library not found in this path */ | ||
270 | funcname = mkfuncname(L, name); | ||
271 | if (ll_loadfunc(L, filename, funcname) != 0) | ||
272 | loaderror(L, filename); | ||
273 | return 1; /* library loaded successfully */ | ||
274 | } | ||
275 | |||
276 | static int lj_cf_package_loader_croot(lua_State *L) | ||
277 | { | ||
278 | const char *funcname; | ||
279 | const char *filename; | ||
280 | const char *name = luaL_checkstring(L, 1); | ||
281 | const char *p = strchr(name, '.'); | ||
282 | int stat; | ||
283 | if (p == NULL) return 0; /* is root */ | ||
284 | lua_pushlstring(L, name, (size_t)(p - name)); | ||
285 | filename = findfile(L, lua_tostring(L, -1), "cpath"); | ||
286 | if (filename == NULL) return 1; /* root not found */ | ||
287 | funcname = mkfuncname(L, name); | ||
288 | if ((stat = ll_loadfunc(L, filename, funcname)) != 0) { | ||
289 | if (stat != PACKAGE_ERR_FUNC) loaderror(L, filename); /* real error */ | ||
290 | lua_pushfstring(L, "\n\tno module " LUA_QS " in file " LUA_QS, | ||
291 | name, filename); | ||
292 | return 1; /* function not found */ | ||
293 | } | ||
294 | return 1; | ||
295 | } | ||
296 | |||
297 | static int lj_cf_package_loader_preload(lua_State *L) | ||
298 | { | ||
299 | const char *name = luaL_checkstring(L, 1); | ||
300 | lua_getfield(L, LUA_ENVIRONINDEX, "preload"); | ||
301 | if (!lua_istable(L, -1)) | ||
302 | luaL_error(L, LUA_QL("package.preload") " must be a table"); | ||
303 | lua_getfield(L, -1, name); | ||
304 | if (lua_isnil(L, -1)) /* not found? */ | ||
305 | lua_pushfstring(L, "\n\tno field package.preload['%s']", name); | ||
306 | return 1; | ||
307 | } | ||
308 | |||
309 | /* ------------------------------------------------------------------------ */ | ||
310 | |||
311 | static const int sentinel_ = 0; | ||
312 | #define sentinel ((void *)&sentinel_) | ||
313 | |||
314 | static int lj_cf_package_require(lua_State *L) | ||
315 | { | ||
316 | const char *name = luaL_checkstring(L, 1); | ||
317 | int i; | ||
318 | lua_settop(L, 1); /* _LOADED table will be at index 2 */ | ||
319 | lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED"); | ||
320 | lua_getfield(L, 2, name); | ||
321 | if (lua_toboolean(L, -1)) { /* is it there? */ | ||
322 | if (lua_touserdata(L, -1) == sentinel) /* check loops */ | ||
323 | luaL_error(L, "loop or previous error loading module " LUA_QS, name); | ||
324 | return 1; /* package is already loaded */ | ||
325 | } | ||
326 | /* else must load it; iterate over available loaders */ | ||
327 | lua_getfield(L, LUA_ENVIRONINDEX, "loaders"); | ||
328 | if (!lua_istable(L, -1)) | ||
329 | luaL_error(L, LUA_QL("package.loaders") " must be a table"); | ||
330 | lua_pushliteral(L, ""); /* error message accumulator */ | ||
331 | for (i = 1; ; i++) { | ||
332 | lua_rawgeti(L, -2, i); /* get a loader */ | ||
333 | if (lua_isnil(L, -1)) | ||
334 | luaL_error(L, "module " LUA_QS " not found:%s", | ||
335 | name, lua_tostring(L, -2)); | ||
336 | lua_pushstring(L, name); | ||
337 | lua_call(L, 1, 1); /* call it */ | ||
338 | if (lua_isfunction(L, -1)) /* did it find module? */ | ||
339 | break; /* module loaded successfully */ | ||
340 | else if (lua_isstring(L, -1)) /* loader returned error message? */ | ||
341 | lua_concat(L, 2); /* accumulate it */ | ||
342 | else | ||
343 | lua_pop(L, 1); | ||
344 | } | ||
345 | lua_pushlightuserdata(L, sentinel); | ||
346 | lua_setfield(L, 2, name); /* _LOADED[name] = sentinel */ | ||
347 | lua_pushstring(L, name); /* pass name as argument to module */ | ||
348 | lua_call(L, 1, 1); /* run loaded module */ | ||
349 | if (!lua_isnil(L, -1)) /* non-nil return? */ | ||
350 | lua_setfield(L, 2, name); /* _LOADED[name] = returned value */ | ||
351 | lua_getfield(L, 2, name); | ||
352 | if (lua_touserdata(L, -1) == sentinel) { /* module did not set a value? */ | ||
353 | lua_pushboolean(L, 1); /* use true as result */ | ||
354 | lua_pushvalue(L, -1); /* extra copy to be returned */ | ||
355 | lua_setfield(L, 2, name); /* _LOADED[name] = true */ | ||
356 | } | ||
357 | return 1; | ||
358 | } | ||
359 | |||
360 | /* ------------------------------------------------------------------------ */ | ||
361 | |||
362 | static void setfenv(lua_State *L) | ||
363 | { | ||
364 | lua_Debug ar; | ||
365 | if (lua_getstack(L, 1, &ar) == 0 || | ||
366 | lua_getinfo(L, "f", &ar) == 0 || /* get calling function */ | ||
367 | lua_iscfunction(L, -1)) | ||
368 | luaL_error(L, LUA_QL("module") " not called from a Lua function"); | ||
369 | lua_pushvalue(L, -2); | ||
370 | lua_setfenv(L, -2); | ||
371 | lua_pop(L, 1); | ||
372 | } | ||
373 | |||
374 | static void dooptions(lua_State *L, int n) | ||
375 | { | ||
376 | int i; | ||
377 | for (i = 2; i <= n; i++) { | ||
378 | lua_pushvalue(L, i); /* get option (a function) */ | ||
379 | lua_pushvalue(L, -2); /* module */ | ||
380 | lua_call(L, 1, 0); | ||
381 | } | ||
382 | } | ||
383 | |||
384 | static void modinit(lua_State *L, const char *modname) | ||
385 | { | ||
386 | const char *dot; | ||
387 | lua_pushvalue(L, -1); | ||
388 | lua_setfield(L, -2, "_M"); /* module._M = module */ | ||
389 | lua_pushstring(L, modname); | ||
390 | lua_setfield(L, -2, "_NAME"); | ||
391 | dot = strrchr(modname, '.'); /* look for last dot in module name */ | ||
392 | if (dot == NULL) dot = modname; else dot++; | ||
393 | /* set _PACKAGE as package name (full module name minus last part) */ | ||
394 | lua_pushlstring(L, modname, (size_t)(dot - modname)); | ||
395 | lua_setfield(L, -2, "_PACKAGE"); | ||
396 | } | ||
397 | |||
398 | static int lj_cf_package_module(lua_State *L) | ||
399 | { | ||
400 | const char *modname = luaL_checkstring(L, 1); | ||
401 | int loaded = lua_gettop(L) + 1; /* index of _LOADED table */ | ||
402 | lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED"); | ||
403 | lua_getfield(L, loaded, modname); /* get _LOADED[modname] */ | ||
404 | if (!lua_istable(L, -1)) { /* not found? */ | ||
405 | lua_pop(L, 1); /* remove previous result */ | ||
406 | /* try global variable (and create one if it does not exist) */ | ||
407 | if (luaL_findtable(L, LUA_GLOBALSINDEX, modname, 1) != NULL) | ||
408 | lj_err_callerv(L, LJ_ERR_BADMODN, modname); | ||
409 | lua_pushvalue(L, -1); | ||
410 | lua_setfield(L, loaded, modname); /* _LOADED[modname] = new table */ | ||
411 | } | ||
412 | /* check whether table already has a _NAME field */ | ||
413 | lua_getfield(L, -1, "_NAME"); | ||
414 | if (!lua_isnil(L, -1)) { /* is table an initialized module? */ | ||
415 | lua_pop(L, 1); | ||
416 | } else { /* no; initialize it */ | ||
417 | lua_pop(L, 1); | ||
418 | modinit(L, modname); | ||
419 | } | ||
420 | lua_pushvalue(L, -1); | ||
421 | setfenv(L); | ||
422 | dooptions(L, loaded - 1); | ||
423 | return 0; | ||
424 | } | ||
425 | |||
426 | static int lj_cf_package_seeall(lua_State *L) | ||
427 | { | ||
428 | luaL_checktype(L, 1, LUA_TTABLE); | ||
429 | if (!lua_getmetatable(L, 1)) { | ||
430 | lua_createtable(L, 0, 1); /* create new metatable */ | ||
431 | lua_pushvalue(L, -1); | ||
432 | lua_setmetatable(L, 1); | ||
433 | } | ||
434 | lua_pushvalue(L, LUA_GLOBALSINDEX); | ||
435 | lua_setfield(L, -2, "__index"); /* mt.__index = _G */ | ||
436 | return 0; | ||
437 | } | ||
438 | |||
439 | /* ------------------------------------------------------------------------ */ | ||
440 | |||
441 | #define AUXMARK "\1" | ||
442 | |||
443 | static void setpath(lua_State *L, const char *fieldname, const char *envname, | ||
444 | const char *def) | ||
445 | { | ||
446 | const char *path = getenv(envname); | ||
447 | if (path == NULL) { | ||
448 | lua_pushstring(L, def); | ||
449 | } else { | ||
450 | path = luaL_gsub(L, path, LUA_PATHSEP LUA_PATHSEP, | ||
451 | LUA_PATHSEP AUXMARK LUA_PATHSEP); | ||
452 | luaL_gsub(L, path, AUXMARK, def); | ||
453 | lua_remove(L, -2); | ||
454 | } | ||
455 | setprogdir(L); | ||
456 | lua_setfield(L, -2, fieldname); | ||
457 | } | ||
458 | |||
459 | static const luaL_Reg package_lib[] = { | ||
460 | { "loadlib", lj_cf_package_loadlib }, | ||
461 | { "seeall", lj_cf_package_seeall }, | ||
462 | { NULL, NULL } | ||
463 | }; | ||
464 | |||
465 | static const luaL_Reg package_global[] = { | ||
466 | { "module", lj_cf_package_module }, | ||
467 | { "require", lj_cf_package_require }, | ||
468 | { NULL, NULL } | ||
469 | }; | ||
470 | |||
471 | static const lua_CFunction package_loaders[] = | ||
472 | { | ||
473 | lj_cf_package_loader_preload, | ||
474 | lj_cf_package_loader_lua, | ||
475 | lj_cf_package_loader_c, | ||
476 | lj_cf_package_loader_croot, | ||
477 | NULL | ||
478 | }; | ||
479 | |||
480 | LUALIB_API int luaopen_package(lua_State *L) | ||
481 | { | ||
482 | int i; | ||
483 | luaL_newmetatable(L, "_LOADLIB"); | ||
484 | lua_pushcfunction(L, lj_cf_package_unloadlib); | ||
485 | lua_setfield(L, -2, "__gc"); | ||
486 | luaL_register(L, LUA_LOADLIBNAME, package_lib); | ||
487 | lua_pushvalue(L, -1); | ||
488 | lua_replace(L, LUA_ENVIRONINDEX); | ||
489 | lua_createtable(L, sizeof(package_loaders)/sizeof(package_loaders[0])-1, 0); | ||
490 | for (i = 0; package_loaders[i] != NULL; i++) { | ||
491 | lua_pushcfunction(L, package_loaders[i]); | ||
492 | lua_rawseti(L, -2, i+1); | ||
493 | } | ||
494 | lua_setfield(L, -2, "loaders"); | ||
495 | setpath(L, "path", LUA_PATH, LUA_PATH_DEFAULT); | ||
496 | setpath(L, "cpath", LUA_CPATH, LUA_CPATH_DEFAULT); | ||
497 | lua_pushliteral(L, LUA_PATH_CONFIG); | ||
498 | lua_setfield(L, -2, "config"); | ||
499 | luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16); | ||
500 | lua_setfield(L, -2, "loaded"); | ||
501 | lua_newtable(L); | ||
502 | lua_setfield(L, -2, "preload"); | ||
503 | lua_pushvalue(L, LUA_GLOBALSINDEX); | ||
504 | luaL_register(L, NULL, package_global); | ||
505 | lua_pop(L, 1); | ||
506 | return 1; | ||
507 | } | ||
508 | |||
diff --git a/src/lib_string.c b/src/lib_string.c new file mode 100644 index 00000000..fdd7fbcb --- /dev/null +++ b/src/lib_string.c | |||
@@ -0,0 +1,790 @@ | |||
1 | /* | ||
2 | ** String library. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** | ||
5 | ** Major portions taken verbatim or adapted from the Lua interpreter. | ||
6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h | ||
7 | */ | ||
8 | |||
9 | #include <stdio.h> | ||
10 | |||
11 | #define lib_string_c | ||
12 | #define LUA_LIB | ||
13 | |||
14 | #include "lua.h" | ||
15 | #include "lauxlib.h" | ||
16 | #include "lualib.h" | ||
17 | |||
18 | #include "lj_obj.h" | ||
19 | #include "lj_err.h" | ||
20 | #include "lj_str.h" | ||
21 | #include "lj_tab.h" | ||
22 | #include "lj_state.h" | ||
23 | #include "lj_ff.h" | ||
24 | #include "lj_ctype.h" | ||
25 | #include "lj_lib.h" | ||
26 | |||
27 | /* ------------------------------------------------------------------------ */ | ||
28 | |||
29 | #define LJLIB_MODULE_string | ||
30 | |||
31 | LJLIB_ASM(string_len) LJLIB_REC(.) | ||
32 | { | ||
33 | lj_lib_checkstr(L, 1); | ||
34 | return FFH_RETRY; | ||
35 | } | ||
36 | |||
37 | LJLIB_ASM(string_byte) LJLIB_REC(string_range 0) | ||
38 | { | ||
39 | GCstr *s = lj_lib_checkstr(L, 1); | ||
40 | int32_t len = (int32_t)s->len; | ||
41 | int32_t start = lj_lib_optint(L, 2, 1); | ||
42 | int32_t stop = lj_lib_optint(L, 3, start); | ||
43 | int32_t n, i; | ||
44 | const unsigned char *p; | ||
45 | if (stop < 0) stop += len+1; | ||
46 | if (start < 0) start += len+1; | ||
47 | if (start <= 0) start = 1; | ||
48 | if (stop > len) stop = len; | ||
49 | if (start > stop) return FFH_RES(0); /* Empty interval: return no results. */ | ||
50 | start--; | ||
51 | n = stop - start; | ||
52 | if ((uint32_t)n > LUAI_MAXCSTACK) | ||
53 | lj_err_caller(L, LJ_ERR_STRSLC); | ||
54 | lj_state_checkstack(L, (MSize)n); | ||
55 | p = (const unsigned char *)strdata(s) + start; | ||
56 | for (i = 0; i < n; i++) | ||
57 | setintV(L->base + i-1, p[i]); | ||
58 | return FFH_RES(n); | ||
59 | } | ||
60 | |||
61 | LJLIB_ASM(string_char) | ||
62 | { | ||
63 | int i, nargs = cast_int(L->top - L->base); | ||
64 | char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, (size_t)nargs); | ||
65 | for (i = 1; i <= nargs; i++) { | ||
66 | int32_t k = lj_lib_checkint(L, i); | ||
67 | if (!checku8(k)) | ||
68 | lj_err_arg(L, i, LJ_ERR_BADVAL); | ||
69 | buf[i-1] = (char)k; | ||
70 | } | ||
71 | setstrV(L, L->base-1, lj_str_new(L, buf, (size_t)nargs)); | ||
72 | return FFH_RES(1); | ||
73 | } | ||
74 | |||
75 | LJLIB_ASM(string_sub) LJLIB_REC(string_range 1) | ||
76 | { | ||
77 | lj_lib_checkstr(L, 1); | ||
78 | lj_lib_checkint(L, 2); | ||
79 | setintV(L->base+2, lj_lib_optint(L, 3, -1)); | ||
80 | return FFH_RETRY; | ||
81 | } | ||
82 | |||
83 | LJLIB_ASM(string_rep) | ||
84 | { | ||
85 | GCstr *s = lj_lib_checkstr(L, 1); | ||
86 | int32_t len = (int32_t)s->len; | ||
87 | int32_t k = lj_lib_checkint(L, 2); | ||
88 | int64_t tlen = (int64_t)k * len; | ||
89 | const char *src; | ||
90 | char *buf; | ||
91 | if (k <= 0) return FFH_RETRY; | ||
92 | if (tlen > LJ_MAX_STR) | ||
93 | lj_err_caller(L, LJ_ERR_STROV); | ||
94 | buf = lj_str_needbuf(L, &G(L)->tmpbuf, (MSize)tlen); | ||
95 | if (len <= 1) return FFH_RETRY; /* ASM code only needed buffer resize. */ | ||
96 | src = strdata(s); | ||
97 | do { | ||
98 | int32_t i = 0; | ||
99 | do { *buf++ = src[i++]; } while (i < len); | ||
100 | } while (--k > 0); | ||
101 | setstrV(L, L->base-1, lj_str_new(L, G(L)->tmpbuf.buf, (size_t)tlen)); | ||
102 | return FFH_RES(1); | ||
103 | } | ||
104 | |||
105 | LJLIB_ASM(string_reverse) | ||
106 | { | ||
107 | GCstr *s = lj_lib_checkstr(L, 1); | ||
108 | lj_str_needbuf(L, &G(L)->tmpbuf, s->len); | ||
109 | return FFH_RETRY; | ||
110 | } | ||
111 | LJLIB_ASM_(string_lower) | ||
112 | LJLIB_ASM_(string_upper) | ||
113 | |||
114 | /* ------------------------------------------------------------------------ */ | ||
115 | |||
116 | LJLIB_CF(string_dump) | ||
117 | { | ||
118 | lj_err_caller(L, LJ_ERR_STRDUMP); | ||
119 | return 0; /* unreachable */ | ||
120 | } | ||
121 | |||
122 | /* ------------------------------------------------------------------------ */ | ||
123 | |||
124 | /* macro to `unsign' a character */ | ||
125 | #define uchar(c) ((unsigned char)(c)) | ||
126 | |||
127 | #define CAP_UNFINISHED (-1) | ||
128 | #define CAP_POSITION (-2) | ||
129 | |||
130 | typedef struct MatchState { | ||
131 | const char *src_init; /* init of source string */ | ||
132 | const char *src_end; /* end (`\0') of source string */ | ||
133 | lua_State *L; | ||
134 | int level; /* total number of captures (finished or unfinished) */ | ||
135 | struct { | ||
136 | const char *init; | ||
137 | ptrdiff_t len; | ||
138 | } capture[LUA_MAXCAPTURES]; | ||
139 | } MatchState; | ||
140 | |||
141 | #define L_ESC '%' | ||
142 | #define SPECIALS "^$*+?.([%-" | ||
143 | |||
144 | static int check_capture(MatchState *ms, int l) | ||
145 | { | ||
146 | l -= '1'; | ||
147 | if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED) | ||
148 | lj_err_caller(ms->L, LJ_ERR_STRCAPI); | ||
149 | return l; | ||
150 | } | ||
151 | |||
152 | static int capture_to_close(MatchState *ms) | ||
153 | { | ||
154 | int level = ms->level; | ||
155 | for (level--; level>=0; level--) | ||
156 | if (ms->capture[level].len == CAP_UNFINISHED) return level; | ||
157 | lj_err_caller(ms->L, LJ_ERR_STRPATC); | ||
158 | return 0; /* unreachable */ | ||
159 | } | ||
160 | |||
161 | static const char *classend(MatchState *ms, const char *p) | ||
162 | { | ||
163 | switch (*p++) { | ||
164 | case L_ESC: | ||
165 | if (*p == '\0') | ||
166 | lj_err_caller(ms->L, LJ_ERR_STRPATE); | ||
167 | return p+1; | ||
168 | case '[': | ||
169 | if (*p == '^') p++; | ||
170 | do { /* look for a `]' */ | ||
171 | if (*p == '\0') | ||
172 | lj_err_caller(ms->L, LJ_ERR_STRPATM); | ||
173 | if (*(p++) == L_ESC && *p != '\0') | ||
174 | p++; /* skip escapes (e.g. `%]') */ | ||
175 | } while (*p != ']'); | ||
176 | return p+1; | ||
177 | default: | ||
178 | return p; | ||
179 | } | ||
180 | } | ||
181 | |||
182 | static const unsigned char match_class_map[32] = { | ||
183 | 0, LJ_CTYPE_ALPHA, 0, LJ_CTYPE_CNTRL, LJ_CTYPE_DIGIT, 0,0,0,0,0,0,0, | ||
184 | LJ_CTYPE_LOWER, 0,0,0, LJ_CTYPE_PUNCT, 0,0, LJ_CTYPE_SPACE, 0, | ||
185 | LJ_CTYPE_UPPER, 0, LJ_CTYPE_ALNUM, LJ_CTYPE_XDIGIT, 0,0,0,0,0,0,0 | ||
186 | }; | ||
187 | |||
188 | static int match_class(int c, int cl) | ||
189 | { | ||
190 | if ((cl & 0xc0) == 0x40) { | ||
191 | int t = match_class_map[(cl&0x1f)]; | ||
192 | if (t) { | ||
193 | t = lj_ctype_isa(c, t); | ||
194 | return (cl & 0x20) ? t : !t; | ||
195 | } | ||
196 | if (cl == 'z') return c == 0; | ||
197 | if (cl == 'Z') return c != 0; | ||
198 | } | ||
199 | return (cl == c); | ||
200 | } | ||
201 | |||
202 | static int matchbracketclass(int c, const char *p, const char *ec) | ||
203 | { | ||
204 | int sig = 1; | ||
205 | if (*(p+1) == '^') { | ||
206 | sig = 0; | ||
207 | p++; /* skip the `^' */ | ||
208 | } | ||
209 | while (++p < ec) { | ||
210 | if (*p == L_ESC) { | ||
211 | p++; | ||
212 | if (match_class(c, uchar(*p))) | ||
213 | return sig; | ||
214 | } | ||
215 | else if ((*(p+1) == '-') && (p+2 < ec)) { | ||
216 | p+=2; | ||
217 | if (uchar(*(p-2)) <= c && c <= uchar(*p)) | ||
218 | return sig; | ||
219 | } | ||
220 | else if (uchar(*p) == c) return sig; | ||
221 | } | ||
222 | return !sig; | ||
223 | } | ||
224 | |||
225 | static int singlematch(int c, const char *p, const char *ep) | ||
226 | { | ||
227 | switch (*p) { | ||
228 | case '.': return 1; /* matches any char */ | ||
229 | case L_ESC: return match_class(c, uchar(*(p+1))); | ||
230 | case '[': return matchbracketclass(c, p, ep-1); | ||
231 | default: return (uchar(*p) == c); | ||
232 | } | ||
233 | } | ||
234 | |||
235 | static const char *match(MatchState *ms, const char *s, const char *p); | ||
236 | |||
237 | static const char *matchbalance(MatchState *ms, const char *s, const char *p) | ||
238 | { | ||
239 | if (*p == 0 || *(p+1) == 0) | ||
240 | lj_err_caller(ms->L, LJ_ERR_STRPATU); | ||
241 | if (*s != *p) { | ||
242 | return NULL; | ||
243 | } else { | ||
244 | int b = *p; | ||
245 | int e = *(p+1); | ||
246 | int cont = 1; | ||
247 | while (++s < ms->src_end) { | ||
248 | if (*s == e) { | ||
249 | if (--cont == 0) return s+1; | ||
250 | } else if (*s == b) { | ||
251 | cont++; | ||
252 | } | ||
253 | } | ||
254 | } | ||
255 | return NULL; /* string ends out of balance */ | ||
256 | } | ||
257 | |||
258 | static const char *max_expand(MatchState *ms, const char *s, | ||
259 | const char *p, const char *ep) | ||
260 | { | ||
261 | ptrdiff_t i = 0; /* counts maximum expand for item */ | ||
262 | while ((s+i)<ms->src_end && singlematch(uchar(*(s+i)), p, ep)) | ||
263 | i++; | ||
264 | /* keeps trying to match with the maximum repetitions */ | ||
265 | while (i>=0) { | ||
266 | const char *res = match(ms, (s+i), ep+1); | ||
267 | if (res) return res; | ||
268 | i--; /* else didn't match; reduce 1 repetition to try again */ | ||
269 | } | ||
270 | return NULL; | ||
271 | } | ||
272 | |||
273 | static const char *min_expand(MatchState *ms, const char *s, | ||
274 | const char *p, const char *ep) | ||
275 | { | ||
276 | for (;;) { | ||
277 | const char *res = match(ms, s, ep+1); | ||
278 | if (res != NULL) | ||
279 | return res; | ||
280 | else if (s<ms->src_end && singlematch(uchar(*s), p, ep)) | ||
281 | s++; /* try with one more repetition */ | ||
282 | else | ||
283 | return NULL; | ||
284 | } | ||
285 | } | ||
286 | |||
287 | static const char *start_capture(MatchState *ms, const char *s, | ||
288 | const char *p, int what) | ||
289 | { | ||
290 | const char *res; | ||
291 | int level = ms->level; | ||
292 | if (level >= LUA_MAXCAPTURES) lj_err_caller(ms->L, LJ_ERR_STRCAPN); | ||
293 | ms->capture[level].init = s; | ||
294 | ms->capture[level].len = what; | ||
295 | ms->level = level+1; | ||
296 | if ((res=match(ms, s, p)) == NULL) /* match failed? */ | ||
297 | ms->level--; /* undo capture */ | ||
298 | return res; | ||
299 | } | ||
300 | |||
301 | static const char *end_capture(MatchState *ms, const char *s, | ||
302 | const char *p) | ||
303 | { | ||
304 | int l = capture_to_close(ms); | ||
305 | const char *res; | ||
306 | ms->capture[l].len = s - ms->capture[l].init; /* close capture */ | ||
307 | if ((res = match(ms, s, p)) == NULL) /* match failed? */ | ||
308 | ms->capture[l].len = CAP_UNFINISHED; /* undo capture */ | ||
309 | return res; | ||
310 | } | ||
311 | |||
312 | static const char *match_capture(MatchState *ms, const char *s, int l) | ||
313 | { | ||
314 | size_t len; | ||
315 | l = check_capture(ms, l); | ||
316 | len = (size_t)ms->capture[l].len; | ||
317 | if ((size_t)(ms->src_end-s) >= len && | ||
318 | memcmp(ms->capture[l].init, s, len) == 0) | ||
319 | return s+len; | ||
320 | else | ||
321 | return NULL; | ||
322 | } | ||
323 | |||
324 | static const char *match(MatchState *ms, const char *s, const char *p) | ||
325 | { | ||
326 | init: /* using goto's to optimize tail recursion */ | ||
327 | switch (*p) { | ||
328 | case '(': /* start capture */ | ||
329 | if (*(p+1) == ')') /* position capture? */ | ||
330 | return start_capture(ms, s, p+2, CAP_POSITION); | ||
331 | else | ||
332 | return start_capture(ms, s, p+1, CAP_UNFINISHED); | ||
333 | case ')': /* end capture */ | ||
334 | return end_capture(ms, s, p+1); | ||
335 | case L_ESC: | ||
336 | switch (*(p+1)) { | ||
337 | case 'b': /* balanced string? */ | ||
338 | s = matchbalance(ms, s, p+2); | ||
339 | if (s == NULL) return NULL; | ||
340 | p+=4; | ||
341 | goto init; /* else return match(ms, s, p+4); */ | ||
342 | case 'f': { /* frontier? */ | ||
343 | const char *ep; char previous; | ||
344 | p += 2; | ||
345 | if (*p != '[') | ||
346 | lj_err_caller(ms->L, LJ_ERR_STRPATB); | ||
347 | ep = classend(ms, p); /* points to what is next */ | ||
348 | previous = (s == ms->src_init) ? '\0' : *(s-1); | ||
349 | if (matchbracketclass(uchar(previous), p, ep-1) || | ||
350 | !matchbracketclass(uchar(*s), p, ep-1)) return NULL; | ||
351 | p=ep; | ||
352 | goto init; /* else return match(ms, s, ep); */ | ||
353 | } | ||
354 | default: | ||
355 | if (lj_ctype_isdigit(uchar(*(p+1)))) { /* capture results (%0-%9)? */ | ||
356 | s = match_capture(ms, s, uchar(*(p+1))); | ||
357 | if (s == NULL) return NULL; | ||
358 | p+=2; | ||
359 | goto init; /* else return match(ms, s, p+2) */ | ||
360 | } | ||
361 | goto dflt; /* case default */ | ||
362 | } | ||
363 | case '\0': /* end of pattern */ | ||
364 | return s; /* match succeeded */ | ||
365 | case '$': | ||
366 | if (*(p+1) == '\0') /* is the `$' the last char in pattern? */ | ||
367 | return (s == ms->src_end) ? s : NULL; /* check end of string */ | ||
368 | else | ||
369 | goto dflt; | ||
370 | default: dflt: { /* it is a pattern item */ | ||
371 | const char *ep = classend(ms, p); /* points to what is next */ | ||
372 | int m = s<ms->src_end && singlematch(uchar(*s), p, ep); | ||
373 | switch (*ep) { | ||
374 | case '?': { /* optional */ | ||
375 | const char *res; | ||
376 | if (m && ((res=match(ms, s+1, ep+1)) != NULL)) | ||
377 | return res; | ||
378 | p=ep+1; | ||
379 | goto init; /* else return match(ms, s, ep+1); */ | ||
380 | } | ||
381 | case '*': /* 0 or more repetitions */ | ||
382 | return max_expand(ms, s, p, ep); | ||
383 | case '+': /* 1 or more repetitions */ | ||
384 | return (m ? max_expand(ms, s+1, p, ep) : NULL); | ||
385 | case '-': /* 0 or more repetitions (minimum) */ | ||
386 | return min_expand(ms, s, p, ep); | ||
387 | default: | ||
388 | if (!m) return NULL; | ||
389 | s++; p=ep; | ||
390 | goto init; /* else return match(ms, s+1, ep); */ | ||
391 | } | ||
392 | } | ||
393 | } | ||
394 | } | ||
395 | |||
396 | static const char *lmemfind(const char *s1, size_t l1, | ||
397 | const char *s2, size_t l2) | ||
398 | { | ||
399 | if (l2 == 0) { | ||
400 | return s1; /* empty strings are everywhere */ | ||
401 | } else if (l2 > l1) { | ||
402 | return NULL; /* avoids a negative `l1' */ | ||
403 | } else { | ||
404 | const char *init; /* to search for a `*s2' inside `s1' */ | ||
405 | l2--; /* 1st char will be checked by `memchr' */ | ||
406 | l1 = l1-l2; /* `s2' cannot be found after that */ | ||
407 | while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) { | ||
408 | init++; /* 1st char is already checked */ | ||
409 | if (memcmp(init, s2+1, l2) == 0) { | ||
410 | return init-1; | ||
411 | } else { /* correct `l1' and `s1' to try again */ | ||
412 | l1 -= (size_t)(init-s1); | ||
413 | s1 = init; | ||
414 | } | ||
415 | } | ||
416 | return NULL; /* not found */ | ||
417 | } | ||
418 | } | ||
419 | |||
420 | static void push_onecapture(MatchState *ms, int i, const char *s, const char *e) | ||
421 | { | ||
422 | if (i >= ms->level) { | ||
423 | if (i == 0) /* ms->level == 0, too */ | ||
424 | lua_pushlstring(ms->L, s, (size_t)(e - s)); /* add whole match */ | ||
425 | else | ||
426 | lj_err_caller(ms->L, LJ_ERR_STRCAPI); | ||
427 | } else { | ||
428 | ptrdiff_t l = ms->capture[i].len; | ||
429 | if (l == CAP_UNFINISHED) lj_err_caller(ms->L, LJ_ERR_STRCAPU); | ||
430 | if (l == CAP_POSITION) | ||
431 | lua_pushinteger(ms->L, ms->capture[i].init - ms->src_init + 1); | ||
432 | else | ||
433 | lua_pushlstring(ms->L, ms->capture[i].init, (size_t)l); | ||
434 | } | ||
435 | } | ||
436 | |||
437 | static int push_captures(MatchState *ms, const char *s, const char *e) | ||
438 | { | ||
439 | int i; | ||
440 | int nlevels = (ms->level == 0 && s) ? 1 : ms->level; | ||
441 | luaL_checkstack(ms->L, nlevels, "too many captures"); | ||
442 | for (i = 0; i < nlevels; i++) | ||
443 | push_onecapture(ms, i, s, e); | ||
444 | return nlevels; /* number of strings pushed */ | ||
445 | } | ||
446 | |||
447 | static ptrdiff_t posrelat(ptrdiff_t pos, size_t len) | ||
448 | { | ||
449 | /* relative string position: negative means back from end */ | ||
450 | if (pos < 0) pos += (ptrdiff_t)len + 1; | ||
451 | return (pos >= 0) ? pos : 0; | ||
452 | } | ||
453 | |||
454 | static int str_find_aux(lua_State *L, int find) | ||
455 | { | ||
456 | size_t l1, l2; | ||
457 | const char *s = luaL_checklstring(L, 1, &l1); | ||
458 | const char *p = luaL_checklstring(L, 2, &l2); | ||
459 | ptrdiff_t init = posrelat(luaL_optinteger(L, 3, 1), l1) - 1; | ||
460 | if (init < 0) | ||
461 | init = 0; | ||
462 | else if ((size_t)(init) > l1) | ||
463 | init = (ptrdiff_t)l1; | ||
464 | if (find && (lua_toboolean(L, 4) || /* explicit request? */ | ||
465 | strpbrk(p, SPECIALS) == NULL)) { /* or no special characters? */ | ||
466 | /* do a plain search */ | ||
467 | const char *s2 = lmemfind(s+init, l1-(size_t)init, p, l2); | ||
468 | if (s2) { | ||
469 | lua_pushinteger(L, s2-s+1); | ||
470 | lua_pushinteger(L, s2-s+(ptrdiff_t)l2); | ||
471 | return 2; | ||
472 | } | ||
473 | } else { | ||
474 | MatchState ms; | ||
475 | int anchor = (*p == '^') ? (p++, 1) : 0; | ||
476 | const char *s1=s+init; | ||
477 | ms.L = L; | ||
478 | ms.src_init = s; | ||
479 | ms.src_end = s+l1; | ||
480 | do { | ||
481 | const char *res; | ||
482 | ms.level = 0; | ||
483 | if ((res=match(&ms, s1, p)) != NULL) { | ||
484 | if (find) { | ||
485 | lua_pushinteger(L, s1-s+1); /* start */ | ||
486 | lua_pushinteger(L, res-s); /* end */ | ||
487 | return push_captures(&ms, NULL, 0) + 2; | ||
488 | } else { | ||
489 | return push_captures(&ms, s1, res); | ||
490 | } | ||
491 | } | ||
492 | } while (s1++ < ms.src_end && !anchor); | ||
493 | } | ||
494 | lua_pushnil(L); /* not found */ | ||
495 | return 1; | ||
496 | } | ||
497 | |||
498 | LJLIB_CF(string_find) | ||
499 | { | ||
500 | return str_find_aux(L, 1); | ||
501 | } | ||
502 | |||
503 | LJLIB_CF(string_match) | ||
504 | { | ||
505 | return str_find_aux(L, 0); | ||
506 | } | ||
507 | |||
508 | LJLIB_NOREG LJLIB_CF(string_gmatch_aux) | ||
509 | { | ||
510 | const char *p = strVdata(lj_lib_upvalue(L, 2)); | ||
511 | GCstr *str = strV(lj_lib_upvalue(L, 1)); | ||
512 | const char *s = strdata(str); | ||
513 | TValue *tvpos = lj_lib_upvalue(L, 3); | ||
514 | const char *src = s + tvpos->u32.lo; | ||
515 | MatchState ms; | ||
516 | ms.L = L; | ||
517 | ms.src_init = s; | ||
518 | ms.src_end = s + str->len; | ||
519 | for (; src <= ms.src_end; src++) { | ||
520 | const char *e; | ||
521 | ms.level = 0; | ||
522 | if ((e = match(&ms, src, p)) != NULL) { | ||
523 | int32_t pos = (int32_t)(e - s); | ||
524 | if (e == src) pos++; /* Ensure progress for empty match. */ | ||
525 | tvpos->u32.lo = (uint32_t)pos; | ||
526 | return push_captures(&ms, src, e); | ||
527 | } | ||
528 | } | ||
529 | return 0; /* not found */ | ||
530 | } | ||
531 | |||
532 | LJLIB_CF(string_gmatch) | ||
533 | { | ||
534 | lj_lib_checkstr(L, 1); | ||
535 | lj_lib_checkstr(L, 2); | ||
536 | L->top = L->base+3; | ||
537 | (L->top-1)->u64 = 0; | ||
538 | lua_pushcclosure(L, lj_cf_string_gmatch_aux, 3); | ||
539 | funcV(L->top-1)->c.ffid = FF_string_gmatch_aux; | ||
540 | return 1; | ||
541 | } | ||
542 | |||
543 | static void add_s(MatchState *ms, luaL_Buffer *b, const char *s, const char *e) | ||
544 | { | ||
545 | size_t l, i; | ||
546 | const char *news = lua_tolstring(ms->L, 3, &l); | ||
547 | for (i = 0; i < l; i++) { | ||
548 | if (news[i] != L_ESC) { | ||
549 | luaL_addchar(b, news[i]); | ||
550 | } else { | ||
551 | i++; /* skip ESC */ | ||
552 | if (!lj_ctype_isdigit(uchar(news[i]))) { | ||
553 | luaL_addchar(b, news[i]); | ||
554 | } else if (news[i] == '0') { | ||
555 | luaL_addlstring(b, s, (size_t)(e - s)); | ||
556 | } else { | ||
557 | push_onecapture(ms, news[i] - '1', s, e); | ||
558 | luaL_addvalue(b); /* add capture to accumulated result */ | ||
559 | } | ||
560 | } | ||
561 | } | ||
562 | } | ||
563 | |||
564 | static void add_value(MatchState *ms, luaL_Buffer *b, | ||
565 | const char *s, const char *e) | ||
566 | { | ||
567 | lua_State *L = ms->L; | ||
568 | switch (lua_type(L, 3)) { | ||
569 | case LUA_TNUMBER: | ||
570 | case LUA_TSTRING: { | ||
571 | add_s(ms, b, s, e); | ||
572 | return; | ||
573 | } | ||
574 | case LUA_TFUNCTION: { | ||
575 | int n; | ||
576 | lua_pushvalue(L, 3); | ||
577 | n = push_captures(ms, s, e); | ||
578 | lua_call(L, n, 1); | ||
579 | break; | ||
580 | } | ||
581 | case LUA_TTABLE: { | ||
582 | push_onecapture(ms, 0, s, e); | ||
583 | lua_gettable(L, 3); | ||
584 | break; | ||
585 | } | ||
586 | } | ||
587 | if (!lua_toboolean(L, -1)) { /* nil or false? */ | ||
588 | lua_pop(L, 1); | ||
589 | lua_pushlstring(L, s, (size_t)(e - s)); /* keep original text */ | ||
590 | } else if (!lua_isstring(L, -1)) { | ||
591 | lj_err_callerv(L, LJ_ERR_STRGSRV, luaL_typename(L, -1)); | ||
592 | } | ||
593 | luaL_addvalue(b); /* add result to accumulator */ | ||
594 | } | ||
595 | |||
596 | LJLIB_CF(string_gsub) | ||
597 | { | ||
598 | size_t srcl; | ||
599 | const char *src = luaL_checklstring(L, 1, &srcl); | ||
600 | const char *p = luaL_checkstring(L, 2); | ||
601 | int tr = lua_type(L, 3); | ||
602 | int max_s = luaL_optint(L, 4, (int)(srcl+1)); | ||
603 | int anchor = (*p == '^') ? (p++, 1) : 0; | ||
604 | int n = 0; | ||
605 | MatchState ms; | ||
606 | luaL_Buffer b; | ||
607 | if (!(tr == LUA_TNUMBER || tr == LUA_TSTRING || | ||
608 | tr == LUA_TFUNCTION || tr == LUA_TTABLE)) | ||
609 | lj_err_arg(L, 3, LJ_ERR_NOSFT); | ||
610 | luaL_buffinit(L, &b); | ||
611 | ms.L = L; | ||
612 | ms.src_init = src; | ||
613 | ms.src_end = src+srcl; | ||
614 | while (n < max_s) { | ||
615 | const char *e; | ||
616 | ms.level = 0; | ||
617 | e = match(&ms, src, p); | ||
618 | if (e) { | ||
619 | n++; | ||
620 | add_value(&ms, &b, src, e); | ||
621 | } | ||
622 | if (e && e>src) /* non empty match? */ | ||
623 | src = e; /* skip it */ | ||
624 | else if (src < ms.src_end) | ||
625 | luaL_addchar(&b, *src++); | ||
626 | else | ||
627 | break; | ||
628 | if (anchor) | ||
629 | break; | ||
630 | } | ||
631 | luaL_addlstring(&b, src, (size_t)(ms.src_end-src)); | ||
632 | luaL_pushresult(&b); | ||
633 | lua_pushinteger(L, n); /* number of substitutions */ | ||
634 | return 2; | ||
635 | } | ||
636 | |||
637 | /* ------------------------------------------------------------------------ */ | ||
638 | |||
639 | /* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */ | ||
640 | #define MAX_FMTITEM 512 | ||
641 | /* valid flags in a format specification */ | ||
642 | #define FMT_FLAGS "-+ #0" | ||
643 | /* | ||
644 | ** maximum size of each format specification (such as '%-099.99d') | ||
645 | ** (+10 accounts for %99.99x plus margin of error) | ||
646 | */ | ||
647 | #define MAX_FMTSPEC (sizeof(FMT_FLAGS) + sizeof(LUA_INTFRMLEN) + 10) | ||
648 | |||
649 | static void addquoted(lua_State *L, luaL_Buffer *b, int arg) | ||
650 | { | ||
651 | GCstr *str = lj_lib_checkstr(L, arg); | ||
652 | int32_t len = (int32_t)str->len; | ||
653 | const char *s = strdata(str); | ||
654 | luaL_addchar(b, '"'); | ||
655 | while (len--) { | ||
656 | switch (*s) { | ||
657 | case '"': case '\\': case '\n': | ||
658 | luaL_addchar(b, '\\'); | ||
659 | luaL_addchar(b, *s); | ||
660 | break; | ||
661 | case '\r': | ||
662 | luaL_addlstring(b, "\\r", 2); | ||
663 | break; | ||
664 | case '\0': | ||
665 | luaL_addlstring(b, "\\000", 4); | ||
666 | break; | ||
667 | default: | ||
668 | luaL_addchar(b, *s); | ||
669 | break; | ||
670 | } | ||
671 | s++; | ||
672 | } | ||
673 | luaL_addchar(b, '"'); | ||
674 | } | ||
675 | |||
676 | static const char *scanformat(lua_State *L, const char *strfrmt, char *form) | ||
677 | { | ||
678 | const char *p = strfrmt; | ||
679 | while (*p != '\0' && strchr(FMT_FLAGS, *p) != NULL) p++; /* skip flags */ | ||
680 | if ((size_t)(p - strfrmt) >= sizeof(FMT_FLAGS)) | ||
681 | lj_err_caller(L, LJ_ERR_STRFMTR); | ||
682 | if (lj_ctype_isdigit(uchar(*p))) p++; /* skip width */ | ||
683 | if (lj_ctype_isdigit(uchar(*p))) p++; /* (2 digits at most) */ | ||
684 | if (*p == '.') { | ||
685 | p++; | ||
686 | if (lj_ctype_isdigit(uchar(*p))) p++; /* skip precision */ | ||
687 | if (lj_ctype_isdigit(uchar(*p))) p++; /* (2 digits at most) */ | ||
688 | } | ||
689 | if (lj_ctype_isdigit(uchar(*p))) | ||
690 | lj_err_caller(L, LJ_ERR_STRFMTW); | ||
691 | *(form++) = '%'; | ||
692 | strncpy(form, strfrmt, (size_t)(p - strfrmt + 1)); | ||
693 | form += p - strfrmt + 1; | ||
694 | *form = '\0'; | ||
695 | return p; | ||
696 | } | ||
697 | |||
698 | static void addintlen(char *form) | ||
699 | { | ||
700 | size_t l = strlen(form); | ||
701 | char spec = form[l - 1]; | ||
702 | strcpy(form + l - 1, LUA_INTFRMLEN); | ||
703 | form[l + sizeof(LUA_INTFRMLEN) - 2] = spec; | ||
704 | form[l + sizeof(LUA_INTFRMLEN) - 1] = '\0'; | ||
705 | } | ||
706 | |||
707 | LJLIB_CF(string_format) | ||
708 | { | ||
709 | int arg = 1; | ||
710 | GCstr *fmt = lj_lib_checkstr(L, arg); | ||
711 | const char *strfrmt = strdata(fmt); | ||
712 | const char *strfrmt_end = strfrmt + fmt->len; | ||
713 | luaL_Buffer b; | ||
714 | luaL_buffinit(L, &b); | ||
715 | while (strfrmt < strfrmt_end) { | ||
716 | if (*strfrmt != L_ESC) { | ||
717 | luaL_addchar(&b, *strfrmt++); | ||
718 | } else if (*++strfrmt == L_ESC) { | ||
719 | luaL_addchar(&b, *strfrmt++); /* %% */ | ||
720 | } else { /* format item */ | ||
721 | char form[MAX_FMTSPEC]; /* to store the format (`%...') */ | ||
722 | char buff[MAX_FMTITEM]; /* to store the formatted item */ | ||
723 | arg++; | ||
724 | strfrmt = scanformat(L, strfrmt, form); | ||
725 | switch (*strfrmt++) { | ||
726 | case 'c': | ||
727 | sprintf(buff, form, lj_lib_checkint(L, arg)); | ||
728 | break; | ||
729 | case 'd': case 'i': | ||
730 | addintlen(form); | ||
731 | sprintf(buff, form, (LUA_INTFRM_T)lj_lib_checknum(L, arg)); | ||
732 | break; | ||
733 | case 'o': case 'u': case 'x': case 'X': | ||
734 | addintlen(form); | ||
735 | sprintf(buff, form, (unsigned LUA_INTFRM_T)lj_lib_checknum(L, arg)); | ||
736 | break; | ||
737 | case 'e': case 'E': case 'f': case 'g': case 'G': | ||
738 | sprintf(buff, form, (double)lj_lib_checknum(L, arg)); | ||
739 | break; | ||
740 | case 'q': | ||
741 | addquoted(L, &b, arg); | ||
742 | continue; | ||
743 | case 'p': | ||
744 | lj_str_pushf(L, "%p", lua_topointer(L, arg)); | ||
745 | luaL_addvalue(&b); | ||
746 | continue; | ||
747 | case 's': { | ||
748 | GCstr *str = lj_lib_checkstr(L, arg); | ||
749 | if (!strchr(form, '.') && str->len >= 100) { | ||
750 | /* no precision and string is too long to be formatted; | ||
751 | keep original string */ | ||
752 | setstrV(L, L->top++, str); | ||
753 | luaL_addvalue(&b); | ||
754 | continue; | ||
755 | } | ||
756 | sprintf(buff, form, strdata(str)); | ||
757 | break; | ||
758 | } | ||
759 | default: | ||
760 | lj_err_callerv(L, LJ_ERR_STRFMTO, *(strfrmt -1)); | ||
761 | break; | ||
762 | } | ||
763 | luaL_addlstring(&b, buff, strlen(buff)); | ||
764 | } | ||
765 | } | ||
766 | luaL_pushresult(&b); | ||
767 | return 1; | ||
768 | } | ||
769 | |||
770 | /* ------------------------------------------------------------------------ */ | ||
771 | |||
772 | #include "lj_libdef.h" | ||
773 | |||
774 | LUALIB_API int luaopen_string(lua_State *L) | ||
775 | { | ||
776 | GCtab *mt; | ||
777 | LJ_LIB_REG(L, string); | ||
778 | #if defined(LUA_COMPAT_GFIND) | ||
779 | lua_getfield(L, -1, "gmatch"); | ||
780 | lua_setfield(L, -2, "gfind"); | ||
781 | #endif | ||
782 | mt = lj_tab_new(L, 0, 1); | ||
783 | /* NOBARRIER: G(L)->mmname[] is a GC root. */ | ||
784 | setgcref(G(L)->basemt[~LJ_TSTR], obj2gco(mt)); | ||
785 | settabV(L, lj_tab_setstr(L, mt, strref(G(L)->mmname[MM_index])), | ||
786 | tabV(L->top-1)); | ||
787 | mt->nomm = cast_byte(~(1u<<MM_index)); | ||
788 | return 1; | ||
789 | } | ||
790 | |||
diff --git a/src/lib_table.c b/src/lib_table.c new file mode 100644 index 00000000..68dc825b --- /dev/null +++ b/src/lib_table.c | |||
@@ -0,0 +1,276 @@ | |||
1 | /* | ||
2 | ** Table library. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** | ||
5 | ** Major portions taken verbatim or adapted from the Lua interpreter. | ||
6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h | ||
7 | */ | ||
8 | |||
9 | #define lib_table_c | ||
10 | #define LUA_LIB | ||
11 | |||
12 | #include "lua.h" | ||
13 | #include "lauxlib.h" | ||
14 | #include "lualib.h" | ||
15 | |||
16 | #include "lj_obj.h" | ||
17 | #include "lj_gc.h" | ||
18 | #include "lj_err.h" | ||
19 | #include "lj_tab.h" | ||
20 | #include "lj_lib.h" | ||
21 | |||
22 | /* ------------------------------------------------------------------------ */ | ||
23 | |||
24 | #define LJLIB_MODULE_table | ||
25 | |||
26 | LJLIB_CF(table_foreachi) | ||
27 | { | ||
28 | GCtab *t = lj_lib_checktab(L, 1); | ||
29 | GCfunc *func = lj_lib_checkfunc(L, 2); | ||
30 | MSize i, n = lj_tab_len(t); | ||
31 | for (i = 1; i <= n; i++) { | ||
32 | cTValue *val; | ||
33 | setfuncV(L, L->top, func); | ||
34 | setintV(L->top+1, i); | ||
35 | val = lj_tab_getint(t, (int32_t)i); | ||
36 | if (val) { copyTV(L, L->top+2, val); } else { setnilV(L->top+2); } | ||
37 | L->top += 3; | ||
38 | lua_call(L, 2, 1); | ||
39 | if (!tvisnil(L->top-1)) | ||
40 | return 1; | ||
41 | L->top--; | ||
42 | } | ||
43 | return 0; | ||
44 | } | ||
45 | |||
46 | LJLIB_CF(table_foreach) | ||
47 | { | ||
48 | GCtab *t = lj_lib_checktab(L, 1); | ||
49 | GCfunc *func = lj_lib_checkfunc(L, 2); | ||
50 | L->top = L->base+3; | ||
51 | setnilV(L->top-1); | ||
52 | while (lj_tab_next(L, t, L->top-1)) { | ||
53 | copyTV(L, L->top+2, L->top); | ||
54 | copyTV(L, L->top+1, L->top-1); | ||
55 | setfuncV(L, L->top, func); | ||
56 | L->top += 3; | ||
57 | lua_call(L, 2, 1); | ||
58 | if (!tvisnil(L->top-1)) | ||
59 | return 1; | ||
60 | L->top--; | ||
61 | } | ||
62 | return 0; | ||
63 | } | ||
64 | |||
65 | LJLIB_ASM(table_getn) LJLIB_REC(.) | ||
66 | { | ||
67 | lj_lib_checktab(L, 1); | ||
68 | return FFH_UNREACHABLE; | ||
69 | } | ||
70 | |||
71 | LJLIB_CF(table_maxn) | ||
72 | { | ||
73 | GCtab *t = lj_lib_checktab(L, 1); | ||
74 | TValue *array = tvref(t->array); | ||
75 | Node *node; | ||
76 | lua_Number m = 0; | ||
77 | uint32_t i; | ||
78 | for (i = 0; i < t->asize; i++) | ||
79 | if (!tvisnil(&array[i])) { | ||
80 | m = (lua_Number)i; | ||
81 | break; | ||
82 | } | ||
83 | node = noderef(t->node); | ||
84 | for (i = 0; i <= t->hmask; i++) | ||
85 | if (tvisnum(&node[i].key) && numV(&node[i].key) > m) | ||
86 | m = numV(&node[i].key); | ||
87 | setnumV(L->top-1, m); | ||
88 | return 1; | ||
89 | } | ||
90 | |||
91 | LJLIB_CF(table_insert) | ||
92 | { | ||
93 | GCtab *t = lj_lib_checktab(L, 1); | ||
94 | int32_t n, i = (int32_t)lj_tab_len(t) + 1; | ||
95 | int nargs = (int)((char *)L->top - (char *)L->base); | ||
96 | if (nargs != 2*sizeof(TValue)) { | ||
97 | if (nargs != 3*sizeof(TValue)) | ||
98 | lj_err_caller(L, LJ_ERR_TABINS); | ||
99 | /* NOBARRIER: This just moves existing elements around. */ | ||
100 | for (n = lj_lib_checkint(L, 2); i > n; i--) { | ||
101 | /* The set may invalidate the get pointer, so need to do it first! */ | ||
102 | TValue *dst = lj_tab_setint(L, t, i); | ||
103 | cTValue *src = lj_tab_getint(t, i-1); | ||
104 | if (src) { | ||
105 | copyTV(L, dst, src); | ||
106 | } else { | ||
107 | setnilV(dst); | ||
108 | } | ||
109 | } | ||
110 | i = n; | ||
111 | } | ||
112 | { | ||
113 | TValue *dst = lj_tab_setint(L, t, i); | ||
114 | copyTV(L, dst, L->top-1); | ||
115 | lj_gc_barriert(L, t, dst); | ||
116 | } | ||
117 | return 0; | ||
118 | } | ||
119 | |||
120 | LJLIB_CF(table_remove) | ||
121 | { | ||
122 | GCtab *t = lj_lib_checktab(L, 1); | ||
123 | int32_t e = (int32_t)lj_tab_len(t); | ||
124 | int32_t pos = lj_lib_optint(L, 2, e); | ||
125 | if (!(1 <= pos && pos <= e)) /* position is outside bounds? */ | ||
126 | return 0; /* nothing to remove */ | ||
127 | lua_rawgeti(L, 1, pos); | ||
128 | /* NOBARRIER: This just moves existing elements around. */ | ||
129 | for (; pos < e; pos++) { | ||
130 | cTValue *src = lj_tab_getint(t, pos+1); | ||
131 | TValue *dst = lj_tab_setint(L, t, pos); | ||
132 | if (src) { | ||
133 | copyTV(L, dst, src); | ||
134 | } else { | ||
135 | setnilV(dst); | ||
136 | } | ||
137 | } | ||
138 | setnilV(lj_tab_setint(L, t, e)); | ||
139 | return 1; | ||
140 | } | ||
141 | |||
142 | LJLIB_CF(table_concat) | ||
143 | { | ||
144 | luaL_Buffer b; | ||
145 | GCtab *t = lj_lib_checktab(L, 1); | ||
146 | GCstr *sep = lj_lib_optstr(L, 2); | ||
147 | MSize seplen = sep ? sep->len : 0; | ||
148 | int32_t i = lj_lib_optint(L, 3, 1); | ||
149 | int32_t e = L->base+3 < L->top ? lj_lib_checkint(L, 4) : | ||
150 | (int32_t)lj_tab_len(t); | ||
151 | luaL_buffinit(L, &b); | ||
152 | if (i <= e) { | ||
153 | for (;;) { | ||
154 | cTValue *o; | ||
155 | lua_rawgeti(L, 1, i); | ||
156 | o = L->top-1; | ||
157 | if (!(tvisstr(o) || tvisnum(o))) | ||
158 | lj_err_callerv(L, LJ_ERR_TABCAT, typename(o), i); | ||
159 | luaL_addvalue(&b); | ||
160 | if (i++ == e) break; | ||
161 | if (seplen) | ||
162 | luaL_addlstring(&b, strdata(sep), seplen); | ||
163 | } | ||
164 | } | ||
165 | luaL_pushresult(&b); | ||
166 | return 1; | ||
167 | } | ||
168 | |||
169 | /* ------------------------------------------------------------------------ */ | ||
170 | |||
171 | static void set2(lua_State *L, int i, int j) | ||
172 | { | ||
173 | lua_rawseti(L, 1, i); | ||
174 | lua_rawseti(L, 1, j); | ||
175 | } | ||
176 | |||
177 | static int sort_comp(lua_State *L, int a, int b) | ||
178 | { | ||
179 | if (!lua_isnil(L, 2)) { /* function? */ | ||
180 | int res; | ||
181 | lua_pushvalue(L, 2); | ||
182 | lua_pushvalue(L, a-1); /* -1 to compensate function */ | ||
183 | lua_pushvalue(L, b-2); /* -2 to compensate function and `a' */ | ||
184 | lua_call(L, 2, 1); | ||
185 | res = lua_toboolean(L, -1); | ||
186 | lua_pop(L, 1); | ||
187 | return res; | ||
188 | } else { /* a < b? */ | ||
189 | return lua_lessthan(L, a, b); | ||
190 | } | ||
191 | } | ||
192 | |||
193 | static void auxsort(lua_State *L, int l, int u) | ||
194 | { | ||
195 | while (l < u) { /* for tail recursion */ | ||
196 | int i, j; | ||
197 | /* sort elements a[l], a[(l+u)/2] and a[u] */ | ||
198 | lua_rawgeti(L, 1, l); | ||
199 | lua_rawgeti(L, 1, u); | ||
200 | if (sort_comp(L, -1, -2)) /* a[u] < a[l]? */ | ||
201 | set2(L, l, u); /* swap a[l] - a[u] */ | ||
202 | else | ||
203 | lua_pop(L, 2); | ||
204 | if (u-l == 1) break; /* only 2 elements */ | ||
205 | i = (l+u)/2; | ||
206 | lua_rawgeti(L, 1, i); | ||
207 | lua_rawgeti(L, 1, l); | ||
208 | if (sort_comp(L, -2, -1)) { /* a[i]<a[l]? */ | ||
209 | set2(L, i, l); | ||
210 | } else { | ||
211 | lua_pop(L, 1); /* remove a[l] */ | ||
212 | lua_rawgeti(L, 1, u); | ||
213 | if (sort_comp(L, -1, -2)) /* a[u]<a[i]? */ | ||
214 | set2(L, i, u); | ||
215 | else | ||
216 | lua_pop(L, 2); | ||
217 | } | ||
218 | if (u-l == 2) break; /* only 3 elements */ | ||
219 | lua_rawgeti(L, 1, i); /* Pivot */ | ||
220 | lua_pushvalue(L, -1); | ||
221 | lua_rawgeti(L, 1, u-1); | ||
222 | set2(L, i, u-1); | ||
223 | /* a[l] <= P == a[u-1] <= a[u], only need to sort from l+1 to u-2 */ | ||
224 | i = l; j = u-1; | ||
225 | for (;;) { /* invariant: a[l..i] <= P <= a[j..u] */ | ||
226 | /* repeat ++i until a[i] >= P */ | ||
227 | while (lua_rawgeti(L, 1, ++i), sort_comp(L, -1, -2)) { | ||
228 | if (i>u) lj_err_caller(L, LJ_ERR_TABSORT); | ||
229 | lua_pop(L, 1); /* remove a[i] */ | ||
230 | } | ||
231 | /* repeat --j until a[j] <= P */ | ||
232 | while (lua_rawgeti(L, 1, --j), sort_comp(L, -3, -1)) { | ||
233 | if (j<l) lj_err_caller(L, LJ_ERR_TABSORT); | ||
234 | lua_pop(L, 1); /* remove a[j] */ | ||
235 | } | ||
236 | if (j<i) { | ||
237 | lua_pop(L, 3); /* pop pivot, a[i], a[j] */ | ||
238 | break; | ||
239 | } | ||
240 | set2(L, i, j); | ||
241 | } | ||
242 | lua_rawgeti(L, 1, u-1); | ||
243 | lua_rawgeti(L, 1, i); | ||
244 | set2(L, u-1, i); /* swap pivot (a[u-1]) with a[i] */ | ||
245 | /* a[l..i-1] <= a[i] == P <= a[i+1..u] */ | ||
246 | /* adjust so that smaller half is in [j..i] and larger one in [l..u] */ | ||
247 | if (i-l < u-i) { | ||
248 | j=l; i=i-1; l=i+2; | ||
249 | } else { | ||
250 | j=i+1; i=u; u=j-2; | ||
251 | } | ||
252 | auxsort(L, j, i); /* call recursively the smaller one */ | ||
253 | } /* repeat the routine for the larger one */ | ||
254 | } | ||
255 | |||
256 | LJLIB_CF(table_sort) | ||
257 | { | ||
258 | GCtab *t = lj_lib_checktab(L, 1); | ||
259 | int32_t n = (int32_t)lj_tab_len(t); | ||
260 | lua_settop(L, 2); | ||
261 | if (!tvisnil(L->base+1)) | ||
262 | lj_lib_checkfunc(L, 2); | ||
263 | auxsort(L, 1, n); | ||
264 | return 0; | ||
265 | } | ||
266 | |||
267 | /* ------------------------------------------------------------------------ */ | ||
268 | |||
269 | #include "lj_libdef.h" | ||
270 | |||
271 | LUALIB_API int luaopen_table(lua_State *L) | ||
272 | { | ||
273 | LJ_LIB_REG(L, table); | ||
274 | return 1; | ||
275 | } | ||
276 | |||
diff --git a/src/lj.supp b/src/lj.supp new file mode 100644 index 00000000..9a1379d7 --- /dev/null +++ b/src/lj.supp | |||
@@ -0,0 +1,6 @@ | |||
1 | # Valgrind suppression file for LuaJIT 2.x. | ||
2 | { | ||
3 | Optimized string compare | ||
4 | Memcheck:Addr4 | ||
5 | fun:lj_str_cmp | ||
6 | } | ||
diff --git a/src/lj_alloc.c b/src/lj_alloc.c new file mode 100644 index 00000000..8ad4f8fb --- /dev/null +++ b/src/lj_alloc.c | |||
@@ -0,0 +1,1232 @@ | |||
1 | /* | ||
2 | ** Bundled memory allocator. | ||
3 | ** | ||
4 | ** Beware: this is a HEAVILY CUSTOMIZED version of dlmalloc. | ||
5 | ** The original bears the following remark: | ||
6 | ** | ||
7 | ** This is a version (aka dlmalloc) of malloc/free/realloc written by | ||
8 | ** Doug Lea and released to the public domain, as explained at | ||
9 | ** http://creativecommons.org/licenses/publicdomain. | ||
10 | ** | ||
11 | ** * Version pre-2.8.4 Wed Mar 29 19:46:29 2006 (dl at gee) | ||
12 | ** | ||
13 | ** No additional copyright is claimed over the customizations. | ||
14 | ** Please do NOT bother the original author about this version here! | ||
15 | ** | ||
16 | ** If you want to use dlmalloc in another project, you should get | ||
17 | ** the original from: ftp://gee.cs.oswego.edu/pub/misc/ | ||
18 | ** For thread-safe derivatives, take a look at: | ||
19 | ** - ptmalloc: http://www.malloc.de/ | ||
20 | ** - nedmalloc: http://www.nedprod.com/programs/portable/nedmalloc/ | ||
21 | */ | ||
22 | |||
23 | #define lj_alloc_c | ||
24 | #define LUA_CORE | ||
25 | |||
26 | /* To get the mremap prototype. Must be defind before any system includes. */ | ||
27 | #if defined(__linux__) && !defined(_GNU_SOURCE) | ||
28 | #define _GNU_SOURCE | ||
29 | #endif | ||
30 | |||
31 | #include "lj_def.h" | ||
32 | #include "lj_arch.h" | ||
33 | #include "lj_alloc.h" | ||
34 | |||
35 | #ifndef LUAJIT_USE_SYSMALLOC | ||
36 | |||
37 | #define MAX_SIZE_T (~(size_t)0) | ||
38 | #define MALLOC_ALIGNMENT ((size_t)8U) | ||
39 | |||
40 | #define DEFAULT_GRANULARITY ((size_t)128U * (size_t)1024U) | ||
41 | #define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U) | ||
42 | #define DEFAULT_MMAP_THRESHOLD ((size_t)128U * (size_t)1024U) | ||
43 | #define MAX_RELEASE_CHECK_RATE 255 | ||
44 | |||
45 | /* ------------------- size_t and alignment properties -------------------- */ | ||
46 | |||
47 | /* The byte and bit size of a size_t */ | ||
48 | #define SIZE_T_SIZE (sizeof(size_t)) | ||
49 | #define SIZE_T_BITSIZE (sizeof(size_t) << 3) | ||
50 | |||
51 | /* Some constants coerced to size_t */ | ||
52 | /* Annoying but necessary to avoid errors on some platforms */ | ||
53 | #define SIZE_T_ZERO ((size_t)0) | ||
54 | #define SIZE_T_ONE ((size_t)1) | ||
55 | #define SIZE_T_TWO ((size_t)2) | ||
56 | #define TWO_SIZE_T_SIZES (SIZE_T_SIZE<<1) | ||
57 | #define FOUR_SIZE_T_SIZES (SIZE_T_SIZE<<2) | ||
58 | #define SIX_SIZE_T_SIZES (FOUR_SIZE_T_SIZES+TWO_SIZE_T_SIZES) | ||
59 | |||
60 | /* The bit mask value corresponding to MALLOC_ALIGNMENT */ | ||
61 | #define CHUNK_ALIGN_MASK (MALLOC_ALIGNMENT - SIZE_T_ONE) | ||
62 | |||
63 | /* the number of bytes to offset an address to align it */ | ||
64 | #define align_offset(A)\ | ||
65 | ((((size_t)(A) & CHUNK_ALIGN_MASK) == 0)? 0 :\ | ||
66 | ((MALLOC_ALIGNMENT - ((size_t)(A) & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK)) | ||
67 | |||
68 | /* -------------------------- MMAP support ------------------------------- */ | ||
69 | |||
70 | #define MFAIL ((void *)(MAX_SIZE_T)) | ||
71 | #define CMFAIL ((char *)(MFAIL)) /* defined for convenience */ | ||
72 | |||
73 | #define IS_DIRECT_BIT (SIZE_T_ONE) | ||
74 | |||
75 | #ifdef LUA_USE_WIN | ||
76 | |||
77 | #if LJ_64 | ||
78 | #error "missing support for WIN64 to allocate in lower 2G" | ||
79 | #endif | ||
80 | |||
81 | #define WIN32_LEAN_AND_MEAN | ||
82 | #include <windows.h> | ||
83 | |||
84 | /* Win32 MMAP via VirtualAlloc */ | ||
85 | static LJ_AINLINE void *CALL_MMAP(size_t size) | ||
86 | { | ||
87 | void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); | ||
88 | return (ptr != 0)? ptr: MFAIL; | ||
89 | } | ||
90 | |||
91 | /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ | ||
92 | static LJ_AINLINE void *DIRECT_MMAP(size_t size) | ||
93 | { | ||
94 | void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, | ||
95 | PAGE_READWRITE); | ||
96 | return (ptr != 0)? ptr: MFAIL; | ||
97 | } | ||
98 | |||
99 | /* This function supports releasing coalesed segments */ | ||
100 | static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) | ||
101 | { | ||
102 | MEMORY_BASIC_INFORMATION minfo; | ||
103 | char *cptr = (char *)ptr; | ||
104 | while (size) { | ||
105 | if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0) | ||
106 | return -1; | ||
107 | if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr || | ||
108 | minfo.State != MEM_COMMIT || minfo.RegionSize > size) | ||
109 | return -1; | ||
110 | if (VirtualFree(cptr, 0, MEM_RELEASE) == 0) | ||
111 | return -1; | ||
112 | cptr += minfo.RegionSize; | ||
113 | size -= minfo.RegionSize; | ||
114 | } | ||
115 | return 0; | ||
116 | } | ||
117 | |||
118 | #else | ||
119 | |||
120 | #include <sys/mman.h> | ||
121 | |||
122 | #define MMAP_PROT (PROT_READ|PROT_WRITE) | ||
123 | #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) | ||
124 | #define MAP_ANONYMOUS MAP_ANON | ||
125 | #endif /* MAP_ANON */ | ||
126 | |||
127 | #if LJ_64 | ||
128 | #define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS|MAP_32BIT) | ||
129 | #else | ||
130 | #define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) | ||
131 | #endif | ||
132 | |||
133 | #define CALL_MMAP(s) mmap(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0) | ||
134 | #define DIRECT_MMAP(s) CALL_MMAP(s) | ||
135 | #define CALL_MUNMAP(a, s) munmap((a), (s)) | ||
136 | |||
137 | #ifdef __linux__ | ||
138 | /* Need to define _GNU_SOURCE to get the mremap prototype. */ | ||
139 | #define CALL_MREMAP(addr, osz, nsz, mv) mremap((addr), (osz), (nsz), (mv)) | ||
140 | #endif | ||
141 | |||
142 | #endif | ||
143 | |||
144 | #ifndef CALL_MREMAP | ||
145 | #define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL) | ||
146 | #endif | ||
147 | |||
148 | /* ----------------------- Chunk representations ------------------------ */ | ||
149 | |||
150 | struct malloc_chunk { | ||
151 | size_t prev_foot; /* Size of previous chunk (if free). */ | ||
152 | size_t head; /* Size and inuse bits. */ | ||
153 | struct malloc_chunk *fd; /* double links -- used only if free. */ | ||
154 | struct malloc_chunk *bk; | ||
155 | }; | ||
156 | |||
157 | typedef struct malloc_chunk mchunk; | ||
158 | typedef struct malloc_chunk *mchunkptr; | ||
159 | typedef struct malloc_chunk *sbinptr; /* The type of bins of chunks */ | ||
160 | typedef unsigned int bindex_t; /* Described below */ | ||
161 | typedef unsigned int binmap_t; /* Described below */ | ||
162 | typedef unsigned int flag_t; /* The type of various bit flag sets */ | ||
163 | |||
164 | /* ------------------- Chunks sizes and alignments ----------------------- */ | ||
165 | |||
166 | #define MCHUNK_SIZE (sizeof(mchunk)) | ||
167 | |||
168 | #define CHUNK_OVERHEAD (SIZE_T_SIZE) | ||
169 | |||
170 | /* Direct chunks need a second word of overhead ... */ | ||
171 | #define DIRECT_CHUNK_OVERHEAD (TWO_SIZE_T_SIZES) | ||
172 | /* ... and additional padding for fake next-chunk at foot */ | ||
173 | #define DIRECT_FOOT_PAD (FOUR_SIZE_T_SIZES) | ||
174 | |||
175 | /* The smallest size we can malloc is an aligned minimal chunk */ | ||
176 | #define MIN_CHUNK_SIZE\ | ||
177 | ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK) | ||
178 | |||
179 | /* conversion from malloc headers to user pointers, and back */ | ||
180 | #define chunk2mem(p) ((void *)((char *)(p) + TWO_SIZE_T_SIZES)) | ||
181 | #define mem2chunk(mem) ((mchunkptr)((char *)(mem) - TWO_SIZE_T_SIZES)) | ||
182 | /* chunk associated with aligned address A */ | ||
183 | #define align_as_chunk(A) (mchunkptr)((A) + align_offset(chunk2mem(A))) | ||
184 | |||
185 | /* Bounds on request (not chunk) sizes. */ | ||
186 | #define MAX_REQUEST ((~MIN_CHUNK_SIZE+1) << 2) | ||
187 | #define MIN_REQUEST (MIN_CHUNK_SIZE - CHUNK_OVERHEAD - SIZE_T_ONE) | ||
188 | |||
189 | /* pad request bytes into a usable size */ | ||
190 | #define pad_request(req) \ | ||
191 | (((req) + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK) | ||
192 | |||
193 | /* pad request, checking for minimum (but not maximum) */ | ||
194 | #define request2size(req) \ | ||
195 | (((req) < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(req)) | ||
196 | |||
197 | /* ------------------ Operations on head and foot fields ----------------- */ | ||
198 | |||
199 | #define PINUSE_BIT (SIZE_T_ONE) | ||
200 | #define CINUSE_BIT (SIZE_T_TWO) | ||
201 | #define INUSE_BITS (PINUSE_BIT|CINUSE_BIT) | ||
202 | |||
203 | /* Head value for fenceposts */ | ||
204 | #define FENCEPOST_HEAD (INUSE_BITS|SIZE_T_SIZE) | ||
205 | |||
206 | /* extraction of fields from head words */ | ||
207 | #define cinuse(p) ((p)->head & CINUSE_BIT) | ||
208 | #define pinuse(p) ((p)->head & PINUSE_BIT) | ||
209 | #define chunksize(p) ((p)->head & ~(INUSE_BITS)) | ||
210 | |||
211 | #define clear_pinuse(p) ((p)->head &= ~PINUSE_BIT) | ||
212 | #define clear_cinuse(p) ((p)->head &= ~CINUSE_BIT) | ||
213 | |||
214 | /* Treat space at ptr +/- offset as a chunk */ | ||
215 | #define chunk_plus_offset(p, s) ((mchunkptr)(((char *)(p)) + (s))) | ||
216 | #define chunk_minus_offset(p, s) ((mchunkptr)(((char *)(p)) - (s))) | ||
217 | |||
218 | /* Ptr to next or previous physical malloc_chunk. */ | ||
219 | #define next_chunk(p) ((mchunkptr)(((char *)(p)) + ((p)->head & ~INUSE_BITS))) | ||
220 | #define prev_chunk(p) ((mchunkptr)(((char *)(p)) - ((p)->prev_foot) )) | ||
221 | |||
222 | /* extract next chunk's pinuse bit */ | ||
223 | #define next_pinuse(p) ((next_chunk(p)->head) & PINUSE_BIT) | ||
224 | |||
225 | /* Get/set size at footer */ | ||
226 | #define get_foot(p, s) (((mchunkptr)((char *)(p) + (s)))->prev_foot) | ||
227 | #define set_foot(p, s) (((mchunkptr)((char *)(p) + (s)))->prev_foot = (s)) | ||
228 | |||
229 | /* Set size, pinuse bit, and foot */ | ||
230 | #define set_size_and_pinuse_of_free_chunk(p, s)\ | ||
231 | ((p)->head = (s|PINUSE_BIT), set_foot(p, s)) | ||
232 | |||
233 | /* Set size, pinuse bit, foot, and clear next pinuse */ | ||
234 | #define set_free_with_pinuse(p, s, n)\ | ||
235 | (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s)) | ||
236 | |||
237 | #define is_direct(p)\ | ||
238 | (!((p)->head & PINUSE_BIT) && ((p)->prev_foot & IS_DIRECT_BIT)) | ||
239 | |||
240 | /* Get the internal overhead associated with chunk p */ | ||
241 | #define overhead_for(p)\ | ||
242 | (is_direct(p)? DIRECT_CHUNK_OVERHEAD : CHUNK_OVERHEAD) | ||
243 | |||
244 | /* ---------------------- Overlaid data structures ----------------------- */ | ||
245 | |||
246 | struct malloc_tree_chunk { | ||
247 | /* The first four fields must be compatible with malloc_chunk */ | ||
248 | size_t prev_foot; | ||
249 | size_t head; | ||
250 | struct malloc_tree_chunk *fd; | ||
251 | struct malloc_tree_chunk *bk; | ||
252 | |||
253 | struct malloc_tree_chunk *child[2]; | ||
254 | struct malloc_tree_chunk *parent; | ||
255 | bindex_t index; | ||
256 | }; | ||
257 | |||
258 | typedef struct malloc_tree_chunk tchunk; | ||
259 | typedef struct malloc_tree_chunk *tchunkptr; | ||
260 | typedef struct malloc_tree_chunk *tbinptr; /* The type of bins of trees */ | ||
261 | |||
262 | /* A little helper macro for trees */ | ||
263 | #define leftmost_child(t) ((t)->child[0] != 0? (t)->child[0] : (t)->child[1]) | ||
264 | |||
265 | /* ----------------------------- Segments -------------------------------- */ | ||
266 | |||
267 | struct malloc_segment { | ||
268 | char *base; /* base address */ | ||
269 | size_t size; /* allocated size */ | ||
270 | struct malloc_segment *next; /* ptr to next segment */ | ||
271 | }; | ||
272 | |||
273 | typedef struct malloc_segment msegment; | ||
274 | typedef struct malloc_segment *msegmentptr; | ||
275 | |||
276 | /* ---------------------------- malloc_state ----------------------------- */ | ||
277 | |||
278 | /* Bin types, widths and sizes */ | ||
279 | #define NSMALLBINS (32U) | ||
280 | #define NTREEBINS (32U) | ||
281 | #define SMALLBIN_SHIFT (3U) | ||
282 | #define SMALLBIN_WIDTH (SIZE_T_ONE << SMALLBIN_SHIFT) | ||
283 | #define TREEBIN_SHIFT (8U) | ||
284 | #define MIN_LARGE_SIZE (SIZE_T_ONE << TREEBIN_SHIFT) | ||
285 | #define MAX_SMALL_SIZE (MIN_LARGE_SIZE - SIZE_T_ONE) | ||
286 | #define MAX_SMALL_REQUEST (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD) | ||
287 | |||
288 | struct malloc_state { | ||
289 | binmap_t smallmap; | ||
290 | binmap_t treemap; | ||
291 | size_t dvsize; | ||
292 | size_t topsize; | ||
293 | mchunkptr dv; | ||
294 | mchunkptr top; | ||
295 | size_t trim_check; | ||
296 | size_t release_checks; | ||
297 | mchunkptr smallbins[(NSMALLBINS+1)*2]; | ||
298 | tbinptr treebins[NTREEBINS]; | ||
299 | msegment seg; | ||
300 | }; | ||
301 | |||
302 | typedef struct malloc_state *mstate; | ||
303 | |||
304 | #define is_initialized(M) ((M)->top != 0) | ||
305 | |||
306 | /* -------------------------- system alloc setup ------------------------- */ | ||
307 | |||
308 | /* page-align a size */ | ||
309 | #define page_align(S)\ | ||
310 | (((S) + (LJ_PAGESIZE - SIZE_T_ONE)) & ~(LJ_PAGESIZE - SIZE_T_ONE)) | ||
311 | |||
312 | /* granularity-align a size */ | ||
313 | #define granularity_align(S)\ | ||
314 | (((S) + (DEFAULT_GRANULARITY - SIZE_T_ONE))\ | ||
315 | & ~(DEFAULT_GRANULARITY - SIZE_T_ONE)) | ||
316 | |||
317 | #ifdef LUA_USE_WIN | ||
318 | #define mmap_align(S) granularity_align(S) | ||
319 | #else | ||
320 | #define mmap_align(S) page_align(S) | ||
321 | #endif | ||
322 | |||
323 | /* True if segment S holds address A */ | ||
324 | #define segment_holds(S, A)\ | ||
325 | ((char *)(A) >= S->base && (char *)(A) < S->base + S->size) | ||
326 | |||
327 | /* Return segment holding given address */ | ||
328 | static msegmentptr segment_holding(mstate m, char *addr) | ||
329 | { | ||
330 | msegmentptr sp = &m->seg; | ||
331 | for (;;) { | ||
332 | if (addr >= sp->base && addr < sp->base + sp->size) | ||
333 | return sp; | ||
334 | if ((sp = sp->next) == 0) | ||
335 | return 0; | ||
336 | } | ||
337 | } | ||
338 | |||
339 | /* Return true if segment contains a segment link */ | ||
340 | static int has_segment_link(mstate m, msegmentptr ss) | ||
341 | { | ||
342 | msegmentptr sp = &m->seg; | ||
343 | for (;;) { | ||
344 | if ((char *)sp >= ss->base && (char *)sp < ss->base + ss->size) | ||
345 | return 1; | ||
346 | if ((sp = sp->next) == 0) | ||
347 | return 0; | ||
348 | } | ||
349 | } | ||
350 | |||
351 | /* | ||
352 | TOP_FOOT_SIZE is padding at the end of a segment, including space | ||
353 | that may be needed to place segment records and fenceposts when new | ||
354 | noncontiguous segments are added. | ||
355 | */ | ||
356 | #define TOP_FOOT_SIZE\ | ||
357 | (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE) | ||
358 | |||
359 | /* ---------------------------- Indexing Bins ---------------------------- */ | ||
360 | |||
361 | #define is_small(s) (((s) >> SMALLBIN_SHIFT) < NSMALLBINS) | ||
362 | #define small_index(s) ((s) >> SMALLBIN_SHIFT) | ||
363 | #define small_index2size(i) ((i) << SMALLBIN_SHIFT) | ||
364 | #define MIN_SMALL_INDEX (small_index(MIN_CHUNK_SIZE)) | ||
365 | |||
366 | /* addressing by index. See above about smallbin repositioning */ | ||
367 | #define smallbin_at(M, i) ((sbinptr)((char *)&((M)->smallbins[(i)<<1]))) | ||
368 | #define treebin_at(M,i) (&((M)->treebins[i])) | ||
369 | |||
370 | /* assign tree index for size S to variable I */ | ||
371 | #define compute_tree_index(S, I)\ | ||
372 | {\ | ||
373 | unsigned int X = S >> TREEBIN_SHIFT;\ | ||
374 | if (X == 0) {\ | ||
375 | I = 0;\ | ||
376 | } else if (X > 0xFFFF) {\ | ||
377 | I = NTREEBINS-1;\ | ||
378 | } else {\ | ||
379 | unsigned int K = lj_fls(X);\ | ||
380 | I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ | ||
381 | }\ | ||
382 | } | ||
383 | |||
384 | /* Bit representing maximum resolved size in a treebin at i */ | ||
385 | #define bit_for_tree_index(i) \ | ||
386 | (i == NTREEBINS-1)? (SIZE_T_BITSIZE-1) : (((i) >> 1) + TREEBIN_SHIFT - 2) | ||
387 | |||
388 | /* Shift placing maximum resolved bit in a treebin at i as sign bit */ | ||
389 | #define leftshift_for_tree_index(i) \ | ||
390 | ((i == NTREEBINS-1)? 0 : \ | ||
391 | ((SIZE_T_BITSIZE-SIZE_T_ONE) - (((i) >> 1) + TREEBIN_SHIFT - 2))) | ||
392 | |||
393 | /* The size of the smallest chunk held in bin with index i */ | ||
394 | #define minsize_for_tree_index(i) \ | ||
395 | ((SIZE_T_ONE << (((i) >> 1) + TREEBIN_SHIFT)) | \ | ||
396 | (((size_t)((i) & SIZE_T_ONE)) << (((i) >> 1) + TREEBIN_SHIFT - 1))) | ||
397 | |||
398 | /* ------------------------ Operations on bin maps ----------------------- */ | ||
399 | |||
400 | /* bit corresponding to given index */ | ||
401 | #define idx2bit(i) ((binmap_t)(1) << (i)) | ||
402 | |||
403 | /* Mark/Clear bits with given index */ | ||
404 | #define mark_smallmap(M,i) ((M)->smallmap |= idx2bit(i)) | ||
405 | #define clear_smallmap(M,i) ((M)->smallmap &= ~idx2bit(i)) | ||
406 | #define smallmap_is_marked(M,i) ((M)->smallmap & idx2bit(i)) | ||
407 | |||
408 | #define mark_treemap(M,i) ((M)->treemap |= idx2bit(i)) | ||
409 | #define clear_treemap(M,i) ((M)->treemap &= ~idx2bit(i)) | ||
410 | #define treemap_is_marked(M,i) ((M)->treemap & idx2bit(i)) | ||
411 | |||
412 | /* mask with all bits to left of least bit of x on */ | ||
413 | #define left_bits(x) ((x<<1) | (~(x<<1)+1)) | ||
414 | |||
415 | /* Set cinuse bit and pinuse bit of next chunk */ | ||
416 | #define set_inuse(M,p,s)\ | ||
417 | ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\ | ||
418 | ((mchunkptr)(((char *)(p)) + (s)))->head |= PINUSE_BIT) | ||
419 | |||
420 | /* Set cinuse and pinuse of this chunk and pinuse of next chunk */ | ||
421 | #define set_inuse_and_pinuse(M,p,s)\ | ||
422 | ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ | ||
423 | ((mchunkptr)(((char *)(p)) + (s)))->head |= PINUSE_BIT) | ||
424 | |||
425 | /* Set size, cinuse and pinuse bit of this chunk */ | ||
426 | #define set_size_and_pinuse_of_inuse_chunk(M, p, s)\ | ||
427 | ((p)->head = (s|PINUSE_BIT|CINUSE_BIT)) | ||
428 | |||
429 | /* ----------------------- Operations on smallbins ----------------------- */ | ||
430 | |||
431 | /* Link a free chunk into a smallbin */ | ||
432 | #define insert_small_chunk(M, P, S) {\ | ||
433 | bindex_t I = small_index(S);\ | ||
434 | mchunkptr B = smallbin_at(M, I);\ | ||
435 | mchunkptr F = B;\ | ||
436 | if (!smallmap_is_marked(M, I))\ | ||
437 | mark_smallmap(M, I);\ | ||
438 | else\ | ||
439 | F = B->fd;\ | ||
440 | B->fd = P;\ | ||
441 | F->bk = P;\ | ||
442 | P->fd = F;\ | ||
443 | P->bk = B;\ | ||
444 | } | ||
445 | |||
446 | /* Unlink a chunk from a smallbin */ | ||
447 | #define unlink_small_chunk(M, P, S) {\ | ||
448 | mchunkptr F = P->fd;\ | ||
449 | mchunkptr B = P->bk;\ | ||
450 | bindex_t I = small_index(S);\ | ||
451 | if (F == B) {\ | ||
452 | clear_smallmap(M, I);\ | ||
453 | } else {\ | ||
454 | F->bk = B;\ | ||
455 | B->fd = F;\ | ||
456 | }\ | ||
457 | } | ||
458 | |||
459 | /* Unlink the first chunk from a smallbin */ | ||
460 | #define unlink_first_small_chunk(M, B, P, I) {\ | ||
461 | mchunkptr F = P->fd;\ | ||
462 | if (B == F) {\ | ||
463 | clear_smallmap(M, I);\ | ||
464 | } else {\ | ||
465 | B->fd = F;\ | ||
466 | F->bk = B;\ | ||
467 | }\ | ||
468 | } | ||
469 | |||
470 | /* Replace dv node, binning the old one */ | ||
471 | /* Used only when dvsize known to be small */ | ||
472 | #define replace_dv(M, P, S) {\ | ||
473 | size_t DVS = M->dvsize;\ | ||
474 | if (DVS != 0) {\ | ||
475 | mchunkptr DV = M->dv;\ | ||
476 | insert_small_chunk(M, DV, DVS);\ | ||
477 | }\ | ||
478 | M->dvsize = S;\ | ||
479 | M->dv = P;\ | ||
480 | } | ||
481 | |||
482 | /* ------------------------- Operations on trees ------------------------- */ | ||
483 | |||
484 | /* Insert chunk into tree */ | ||
485 | #define insert_large_chunk(M, X, S) {\ | ||
486 | tbinptr *H;\ | ||
487 | bindex_t I;\ | ||
488 | compute_tree_index(S, I);\ | ||
489 | H = treebin_at(M, I);\ | ||
490 | X->index = I;\ | ||
491 | X->child[0] = X->child[1] = 0;\ | ||
492 | if (!treemap_is_marked(M, I)) {\ | ||
493 | mark_treemap(M, I);\ | ||
494 | *H = X;\ | ||
495 | X->parent = (tchunkptr)H;\ | ||
496 | X->fd = X->bk = X;\ | ||
497 | } else {\ | ||
498 | tchunkptr T = *H;\ | ||
499 | size_t K = S << leftshift_for_tree_index(I);\ | ||
500 | for (;;) {\ | ||
501 | if (chunksize(T) != S) {\ | ||
502 | tchunkptr *C = &(T->child[(K >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]);\ | ||
503 | K <<= 1;\ | ||
504 | if (*C != 0) {\ | ||
505 | T = *C;\ | ||
506 | } else {\ | ||
507 | *C = X;\ | ||
508 | X->parent = T;\ | ||
509 | X->fd = X->bk = X;\ | ||
510 | break;\ | ||
511 | }\ | ||
512 | } else {\ | ||
513 | tchunkptr F = T->fd;\ | ||
514 | T->fd = F->bk = X;\ | ||
515 | X->fd = F;\ | ||
516 | X->bk = T;\ | ||
517 | X->parent = 0;\ | ||
518 | break;\ | ||
519 | }\ | ||
520 | }\ | ||
521 | }\ | ||
522 | } | ||
523 | |||
524 | #define unlink_large_chunk(M, X) {\ | ||
525 | tchunkptr XP = X->parent;\ | ||
526 | tchunkptr R;\ | ||
527 | if (X->bk != X) {\ | ||
528 | tchunkptr F = X->fd;\ | ||
529 | R = X->bk;\ | ||
530 | F->bk = R;\ | ||
531 | R->fd = F;\ | ||
532 | } else {\ | ||
533 | tchunkptr *RP;\ | ||
534 | if (((R = *(RP = &(X->child[1]))) != 0) ||\ | ||
535 | ((R = *(RP = &(X->child[0]))) != 0)) {\ | ||
536 | tchunkptr *CP;\ | ||
537 | while ((*(CP = &(R->child[1])) != 0) ||\ | ||
538 | (*(CP = &(R->child[0])) != 0)) {\ | ||
539 | R = *(RP = CP);\ | ||
540 | }\ | ||
541 | *RP = 0;\ | ||
542 | }\ | ||
543 | }\ | ||
544 | if (XP != 0) {\ | ||
545 | tbinptr *H = treebin_at(M, X->index);\ | ||
546 | if (X == *H) {\ | ||
547 | if ((*H = R) == 0) \ | ||
548 | clear_treemap(M, X->index);\ | ||
549 | } else {\ | ||
550 | if (XP->child[0] == X) \ | ||
551 | XP->child[0] = R;\ | ||
552 | else \ | ||
553 | XP->child[1] = R;\ | ||
554 | }\ | ||
555 | if (R != 0) {\ | ||
556 | tchunkptr C0, C1;\ | ||
557 | R->parent = XP;\ | ||
558 | if ((C0 = X->child[0]) != 0) {\ | ||
559 | R->child[0] = C0;\ | ||
560 | C0->parent = R;\ | ||
561 | }\ | ||
562 | if ((C1 = X->child[1]) != 0) {\ | ||
563 | R->child[1] = C1;\ | ||
564 | C1->parent = R;\ | ||
565 | }\ | ||
566 | }\ | ||
567 | }\ | ||
568 | } | ||
569 | |||
570 | /* Relays to large vs small bin operations */ | ||
571 | |||
572 | #define insert_chunk(M, P, S)\ | ||
573 | if (is_small(S)) { insert_small_chunk(M, P, S)\ | ||
574 | } else { tchunkptr TP = (tchunkptr)(P); insert_large_chunk(M, TP, S); } | ||
575 | |||
576 | #define unlink_chunk(M, P, S)\ | ||
577 | if (is_small(S)) { unlink_small_chunk(M, P, S)\ | ||
578 | } else { tchunkptr TP = (tchunkptr)(P); unlink_large_chunk(M, TP); } | ||
579 | |||
580 | /* ----------------------- Direct-mmapping chunks ----------------------- */ | ||
581 | |||
582 | static void *direct_alloc(size_t nb) | ||
583 | { | ||
584 | size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); | ||
585 | if (LJ_LIKELY(mmsize > nb)) { /* Check for wrap around 0 */ | ||
586 | char *mm = (char *)(DIRECT_MMAP(mmsize)); | ||
587 | if (mm != CMFAIL) { | ||
588 | size_t offset = align_offset(chunk2mem(mm)); | ||
589 | size_t psize = mmsize - offset - DIRECT_FOOT_PAD; | ||
590 | mchunkptr p = (mchunkptr)(mm + offset); | ||
591 | p->prev_foot = offset | IS_DIRECT_BIT; | ||
592 | p->head = psize|CINUSE_BIT; | ||
593 | chunk_plus_offset(p, psize)->head = FENCEPOST_HEAD; | ||
594 | chunk_plus_offset(p, psize+SIZE_T_SIZE)->head = 0; | ||
595 | return chunk2mem(p); | ||
596 | } | ||
597 | } | ||
598 | return NULL; | ||
599 | } | ||
600 | |||
601 | static mchunkptr direct_resize(mchunkptr oldp, size_t nb) | ||
602 | { | ||
603 | size_t oldsize = chunksize(oldp); | ||
604 | if (is_small(nb)) /* Can't shrink direct regions below small size */ | ||
605 | return NULL; | ||
606 | /* Keep old chunk if big enough but not too big */ | ||
607 | if (oldsize >= nb + SIZE_T_SIZE && | ||
608 | (oldsize - nb) <= (DEFAULT_GRANULARITY << 1)) { | ||
609 | return oldp; | ||
610 | } else { | ||
611 | size_t offset = oldp->prev_foot & ~IS_DIRECT_BIT; | ||
612 | size_t oldmmsize = oldsize + offset + DIRECT_FOOT_PAD; | ||
613 | size_t newmmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); | ||
614 | char *cp = (char *)CALL_MREMAP((char *)oldp - offset, | ||
615 | oldmmsize, newmmsize, 1); | ||
616 | if (cp != CMFAIL) { | ||
617 | mchunkptr newp = (mchunkptr)(cp + offset); | ||
618 | size_t psize = newmmsize - offset - DIRECT_FOOT_PAD; | ||
619 | newp->head = psize|CINUSE_BIT; | ||
620 | chunk_plus_offset(newp, psize)->head = FENCEPOST_HEAD; | ||
621 | chunk_plus_offset(newp, psize+SIZE_T_SIZE)->head = 0; | ||
622 | return newp; | ||
623 | } | ||
624 | } | ||
625 | return NULL; | ||
626 | } | ||
627 | |||
628 | /* -------------------------- mspace management -------------------------- */ | ||
629 | |||
630 | /* Initialize top chunk and its size */ | ||
631 | static void init_top(mstate m, mchunkptr p, size_t psize) | ||
632 | { | ||
633 | /* Ensure alignment */ | ||
634 | size_t offset = align_offset(chunk2mem(p)); | ||
635 | p = (mchunkptr)((char *)p + offset); | ||
636 | psize -= offset; | ||
637 | |||
638 | m->top = p; | ||
639 | m->topsize = psize; | ||
640 | p->head = psize | PINUSE_BIT; | ||
641 | /* set size of fake trailing chunk holding overhead space only once */ | ||
642 | chunk_plus_offset(p, psize)->head = TOP_FOOT_SIZE; | ||
643 | m->trim_check = DEFAULT_TRIM_THRESHOLD; /* reset on each update */ | ||
644 | } | ||
645 | |||
646 | /* Initialize bins for a new mstate that is otherwise zeroed out */ | ||
647 | static void init_bins(mstate m) | ||
648 | { | ||
649 | /* Establish circular links for smallbins */ | ||
650 | bindex_t i; | ||
651 | for (i = 0; i < NSMALLBINS; i++) { | ||
652 | sbinptr bin = smallbin_at(m,i); | ||
653 | bin->fd = bin->bk = bin; | ||
654 | } | ||
655 | } | ||
656 | |||
657 | /* Allocate chunk and prepend remainder with chunk in successor base. */ | ||
658 | static void *prepend_alloc(mstate m, char *newbase, char *oldbase, size_t nb) | ||
659 | { | ||
660 | mchunkptr p = align_as_chunk(newbase); | ||
661 | mchunkptr oldfirst = align_as_chunk(oldbase); | ||
662 | size_t psize = (size_t)((char *)oldfirst - (char *)p); | ||
663 | mchunkptr q = chunk_plus_offset(p, nb); | ||
664 | size_t qsize = psize - nb; | ||
665 | set_size_and_pinuse_of_inuse_chunk(m, p, nb); | ||
666 | |||
667 | /* consolidate remainder with first chunk of old base */ | ||
668 | if (oldfirst == m->top) { | ||
669 | size_t tsize = m->topsize += qsize; | ||
670 | m->top = q; | ||
671 | q->head = tsize | PINUSE_BIT; | ||
672 | } else if (oldfirst == m->dv) { | ||
673 | size_t dsize = m->dvsize += qsize; | ||
674 | m->dv = q; | ||
675 | set_size_and_pinuse_of_free_chunk(q, dsize); | ||
676 | } else { | ||
677 | if (!cinuse(oldfirst)) { | ||
678 | size_t nsize = chunksize(oldfirst); | ||
679 | unlink_chunk(m, oldfirst, nsize); | ||
680 | oldfirst = chunk_plus_offset(oldfirst, nsize); | ||
681 | qsize += nsize; | ||
682 | } | ||
683 | set_free_with_pinuse(q, qsize, oldfirst); | ||
684 | insert_chunk(m, q, qsize); | ||
685 | } | ||
686 | |||
687 | return chunk2mem(p); | ||
688 | } | ||
689 | |||
690 | /* Add a segment to hold a new noncontiguous region */ | ||
691 | static void add_segment(mstate m, char *tbase, size_t tsize) | ||
692 | { | ||
693 | /* Determine locations and sizes of segment, fenceposts, old top */ | ||
694 | char *old_top = (char *)m->top; | ||
695 | msegmentptr oldsp = segment_holding(m, old_top); | ||
696 | char *old_end = oldsp->base + oldsp->size; | ||
697 | size_t ssize = pad_request(sizeof(struct malloc_segment)); | ||
698 | char *rawsp = old_end - (ssize + FOUR_SIZE_T_SIZES + CHUNK_ALIGN_MASK); | ||
699 | size_t offset = align_offset(chunk2mem(rawsp)); | ||
700 | char *asp = rawsp + offset; | ||
701 | char *csp = (asp < (old_top + MIN_CHUNK_SIZE))? old_top : asp; | ||
702 | mchunkptr sp = (mchunkptr)csp; | ||
703 | msegmentptr ss = (msegmentptr)(chunk2mem(sp)); | ||
704 | mchunkptr tnext = chunk_plus_offset(sp, ssize); | ||
705 | mchunkptr p = tnext; | ||
706 | |||
707 | /* reset top to new space */ | ||
708 | init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE); | ||
709 | |||
710 | /* Set up segment record */ | ||
711 | set_size_and_pinuse_of_inuse_chunk(m, sp, ssize); | ||
712 | *ss = m->seg; /* Push current record */ | ||
713 | m->seg.base = tbase; | ||
714 | m->seg.size = tsize; | ||
715 | m->seg.next = ss; | ||
716 | |||
717 | /* Insert trailing fenceposts */ | ||
718 | for (;;) { | ||
719 | mchunkptr nextp = chunk_plus_offset(p, SIZE_T_SIZE); | ||
720 | p->head = FENCEPOST_HEAD; | ||
721 | if ((char *)(&(nextp->head)) < old_end) | ||
722 | p = nextp; | ||
723 | else | ||
724 | break; | ||
725 | } | ||
726 | |||
727 | /* Insert the rest of old top into a bin as an ordinary free chunk */ | ||
728 | if (csp != old_top) { | ||
729 | mchunkptr q = (mchunkptr)old_top; | ||
730 | size_t psize = (size_t)(csp - old_top); | ||
731 | mchunkptr tn = chunk_plus_offset(q, psize); | ||
732 | set_free_with_pinuse(q, psize, tn); | ||
733 | insert_chunk(m, q, psize); | ||
734 | } | ||
735 | } | ||
736 | |||
737 | /* -------------------------- System allocation -------------------------- */ | ||
738 | |||
739 | static void *alloc_sys(mstate m, size_t nb) | ||
740 | { | ||
741 | char *tbase = CMFAIL; | ||
742 | size_t tsize = 0; | ||
743 | |||
744 | /* Directly map large chunks */ | ||
745 | if (LJ_UNLIKELY(nb >= DEFAULT_MMAP_THRESHOLD)) { | ||
746 | void *mem = direct_alloc(nb); | ||
747 | if (mem != 0) | ||
748 | return mem; | ||
749 | } | ||
750 | |||
751 | { | ||
752 | size_t req = nb + TOP_FOOT_SIZE + SIZE_T_ONE; | ||
753 | size_t rsize = granularity_align(req); | ||
754 | if (LJ_LIKELY(rsize > nb)) { /* Fail if wraps around zero */ | ||
755 | char *mp = (char *)(CALL_MMAP(rsize)); | ||
756 | if (mp != CMFAIL) { | ||
757 | tbase = mp; | ||
758 | tsize = rsize; | ||
759 | } | ||
760 | } | ||
761 | } | ||
762 | |||
763 | if (tbase != CMFAIL) { | ||
764 | msegmentptr sp = &m->seg; | ||
765 | /* Try to merge with an existing segment */ | ||
766 | while (sp != 0 && tbase != sp->base + sp->size) | ||
767 | sp = sp->next; | ||
768 | if (sp != 0 && segment_holds(sp, m->top)) { /* append */ | ||
769 | sp->size += tsize; | ||
770 | init_top(m, m->top, m->topsize + tsize); | ||
771 | } else { | ||
772 | sp = &m->seg; | ||
773 | while (sp != 0 && sp->base != tbase + tsize) | ||
774 | sp = sp->next; | ||
775 | if (sp != 0) { | ||
776 | char *oldbase = sp->base; | ||
777 | sp->base = tbase; | ||
778 | sp->size += tsize; | ||
779 | return prepend_alloc(m, tbase, oldbase, nb); | ||
780 | } else { | ||
781 | add_segment(m, tbase, tsize); | ||
782 | } | ||
783 | } | ||
784 | |||
785 | if (nb < m->topsize) { /* Allocate from new or extended top space */ | ||
786 | size_t rsize = m->topsize -= nb; | ||
787 | mchunkptr p = m->top; | ||
788 | mchunkptr r = m->top = chunk_plus_offset(p, nb); | ||
789 | r->head = rsize | PINUSE_BIT; | ||
790 | set_size_and_pinuse_of_inuse_chunk(m, p, nb); | ||
791 | return chunk2mem(p); | ||
792 | } | ||
793 | } | ||
794 | |||
795 | return NULL; | ||
796 | } | ||
797 | |||
798 | /* ----------------------- system deallocation -------------------------- */ | ||
799 | |||
800 | /* Unmap and unlink any mmapped segments that don't contain used chunks */ | ||
801 | static size_t release_unused_segments(mstate m) | ||
802 | { | ||
803 | size_t released = 0; | ||
804 | size_t nsegs = 0; | ||
805 | msegmentptr pred = &m->seg; | ||
806 | msegmentptr sp = pred->next; | ||
807 | while (sp != 0) { | ||
808 | char *base = sp->base; | ||
809 | size_t size = sp->size; | ||
810 | msegmentptr next = sp->next; | ||
811 | nsegs++; | ||
812 | { | ||
813 | mchunkptr p = align_as_chunk(base); | ||
814 | size_t psize = chunksize(p); | ||
815 | /* Can unmap if first chunk holds entire segment and not pinned */ | ||
816 | if (!cinuse(p) && (char *)p + psize >= base + size - TOP_FOOT_SIZE) { | ||
817 | tchunkptr tp = (tchunkptr)p; | ||
818 | if (p == m->dv) { | ||
819 | m->dv = 0; | ||
820 | m->dvsize = 0; | ||
821 | } else { | ||
822 | unlink_large_chunk(m, tp); | ||
823 | } | ||
824 | if (CALL_MUNMAP(base, size) == 0) { | ||
825 | released += size; | ||
826 | /* unlink obsoleted record */ | ||
827 | sp = pred; | ||
828 | sp->next = next; | ||
829 | } else { /* back out if cannot unmap */ | ||
830 | insert_large_chunk(m, tp, psize); | ||
831 | } | ||
832 | } | ||
833 | } | ||
834 | pred = sp; | ||
835 | sp = next; | ||
836 | } | ||
837 | /* Reset check counter */ | ||
838 | m->release_checks = nsegs > MAX_RELEASE_CHECK_RATE ? | ||
839 | nsegs : MAX_RELEASE_CHECK_RATE; | ||
840 | return released; | ||
841 | } | ||
842 | |||
843 | static int alloc_trim(mstate m, size_t pad) | ||
844 | { | ||
845 | size_t released = 0; | ||
846 | if (pad < MAX_REQUEST && is_initialized(m)) { | ||
847 | pad += TOP_FOOT_SIZE; /* ensure enough room for segment overhead */ | ||
848 | |||
849 | if (m->topsize > pad) { | ||
850 | /* Shrink top space in granularity-size units, keeping at least one */ | ||
851 | size_t unit = DEFAULT_GRANULARITY; | ||
852 | size_t extra = ((m->topsize - pad + (unit - SIZE_T_ONE)) / unit - | ||
853 | SIZE_T_ONE) * unit; | ||
854 | msegmentptr sp = segment_holding(m, (char *)m->top); | ||
855 | |||
856 | if (sp->size >= extra && | ||
857 | !has_segment_link(m, sp)) { /* can't shrink if pinned */ | ||
858 | size_t newsize = sp->size - extra; | ||
859 | /* Prefer mremap, fall back to munmap */ | ||
860 | if ((CALL_MREMAP(sp->base, sp->size, newsize, 0) != MFAIL) || | ||
861 | (CALL_MUNMAP(sp->base + newsize, extra) == 0)) { | ||
862 | released = extra; | ||
863 | } | ||
864 | } | ||
865 | |||
866 | if (released != 0) { | ||
867 | sp->size -= released; | ||
868 | init_top(m, m->top, m->topsize - released); | ||
869 | } | ||
870 | } | ||
871 | |||
872 | /* Unmap any unused mmapped segments */ | ||
873 | released += release_unused_segments(m); | ||
874 | |||
875 | /* On failure, disable autotrim to avoid repeated failed future calls */ | ||
876 | if (released == 0 && m->topsize > m->trim_check) | ||
877 | m->trim_check = MAX_SIZE_T; | ||
878 | } | ||
879 | |||
880 | return (released != 0)? 1 : 0; | ||
881 | } | ||
882 | |||
883 | /* ---------------------------- malloc support --------------------------- */ | ||
884 | |||
885 | /* allocate a large request from the best fitting chunk in a treebin */ | ||
886 | static void *tmalloc_large(mstate m, size_t nb) | ||
887 | { | ||
888 | tchunkptr v = 0; | ||
889 | size_t rsize = ~nb+1; /* Unsigned negation */ | ||
890 | tchunkptr t; | ||
891 | bindex_t idx; | ||
892 | compute_tree_index(nb, idx); | ||
893 | |||
894 | if ((t = *treebin_at(m, idx)) != 0) { | ||
895 | /* Traverse tree for this bin looking for node with size == nb */ | ||
896 | size_t sizebits = nb << leftshift_for_tree_index(idx); | ||
897 | tchunkptr rst = 0; /* The deepest untaken right subtree */ | ||
898 | for (;;) { | ||
899 | tchunkptr rt; | ||
900 | size_t trem = chunksize(t) - nb; | ||
901 | if (trem < rsize) { | ||
902 | v = t; | ||
903 | if ((rsize = trem) == 0) | ||
904 | break; | ||
905 | } | ||
906 | rt = t->child[1]; | ||
907 | t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]; | ||
908 | if (rt != 0 && rt != t) | ||
909 | rst = rt; | ||
910 | if (t == 0) { | ||
911 | t = rst; /* set t to least subtree holding sizes > nb */ | ||
912 | break; | ||
913 | } | ||
914 | sizebits <<= 1; | ||
915 | } | ||
916 | } | ||
917 | |||
918 | if (t == 0 && v == 0) { /* set t to root of next non-empty treebin */ | ||
919 | binmap_t leftbits = left_bits(idx2bit(idx)) & m->treemap; | ||
920 | if (leftbits != 0) | ||
921 | t = *treebin_at(m, lj_ffs(leftbits)); | ||
922 | } | ||
923 | |||
924 | while (t != 0) { /* find smallest of tree or subtree */ | ||
925 | size_t trem = chunksize(t) - nb; | ||
926 | if (trem < rsize) { | ||
927 | rsize = trem; | ||
928 | v = t; | ||
929 | } | ||
930 | t = leftmost_child(t); | ||
931 | } | ||
932 | |||
933 | /* If dv is a better fit, return NULL so malloc will use it */ | ||
934 | if (v != 0 && rsize < (size_t)(m->dvsize - nb)) { | ||
935 | mchunkptr r = chunk_plus_offset(v, nb); | ||
936 | unlink_large_chunk(m, v); | ||
937 | if (rsize < MIN_CHUNK_SIZE) { | ||
938 | set_inuse_and_pinuse(m, v, (rsize + nb)); | ||
939 | } else { | ||
940 | set_size_and_pinuse_of_inuse_chunk(m, v, nb); | ||
941 | set_size_and_pinuse_of_free_chunk(r, rsize); | ||
942 | insert_chunk(m, r, rsize); | ||
943 | } | ||
944 | return chunk2mem(v); | ||
945 | } | ||
946 | return NULL; | ||
947 | } | ||
948 | |||
949 | /* allocate a small request from the best fitting chunk in a treebin */ | ||
950 | static void *tmalloc_small(mstate m, size_t nb) | ||
951 | { | ||
952 | tchunkptr t, v; | ||
953 | mchunkptr r; | ||
954 | size_t rsize; | ||
955 | bindex_t i = lj_ffs(m->treemap); | ||
956 | |||
957 | v = t = *treebin_at(m, i); | ||
958 | rsize = chunksize(t) - nb; | ||
959 | |||
960 | while ((t = leftmost_child(t)) != 0) { | ||
961 | size_t trem = chunksize(t) - nb; | ||
962 | if (trem < rsize) { | ||
963 | rsize = trem; | ||
964 | v = t; | ||
965 | } | ||
966 | } | ||
967 | |||
968 | r = chunk_plus_offset(v, nb); | ||
969 | unlink_large_chunk(m, v); | ||
970 | if (rsize < MIN_CHUNK_SIZE) { | ||
971 | set_inuse_and_pinuse(m, v, (rsize + nb)); | ||
972 | } else { | ||
973 | set_size_and_pinuse_of_inuse_chunk(m, v, nb); | ||
974 | set_size_and_pinuse_of_free_chunk(r, rsize); | ||
975 | replace_dv(m, r, rsize); | ||
976 | } | ||
977 | return chunk2mem(v); | ||
978 | } | ||
979 | |||
980 | /* ----------------------------------------------------------------------- */ | ||
981 | |||
982 | void *lj_alloc_create(void) | ||
983 | { | ||
984 | size_t tsize = DEFAULT_GRANULARITY; | ||
985 | char *tbase = (char *)(CALL_MMAP(tsize)); | ||
986 | if (tbase != CMFAIL) { | ||
987 | size_t msize = pad_request(sizeof(struct malloc_state)); | ||
988 | mchunkptr mn; | ||
989 | mchunkptr msp = align_as_chunk(tbase); | ||
990 | mstate m = (mstate)(chunk2mem(msp)); | ||
991 | memset(m, 0, msize); | ||
992 | msp->head = (msize|PINUSE_BIT|CINUSE_BIT); | ||
993 | m->seg.base = tbase; | ||
994 | m->seg.size = tsize; | ||
995 | m->release_checks = MAX_RELEASE_CHECK_RATE; | ||
996 | init_bins(m); | ||
997 | mn = next_chunk(mem2chunk(m)); | ||
998 | init_top(m, mn, (size_t)((tbase + tsize) - (char *)mn) - TOP_FOOT_SIZE); | ||
999 | return m; | ||
1000 | } | ||
1001 | return NULL; | ||
1002 | } | ||
1003 | |||
1004 | void lj_alloc_destroy(void *msp) | ||
1005 | { | ||
1006 | mstate ms = (mstate)msp; | ||
1007 | msegmentptr sp = &ms->seg; | ||
1008 | while (sp != 0) { | ||
1009 | char *base = sp->base; | ||
1010 | size_t size = sp->size; | ||
1011 | sp = sp->next; | ||
1012 | CALL_MUNMAP(base, size); | ||
1013 | } | ||
1014 | } | ||
1015 | |||
1016 | static LJ_NOINLINE void *lj_alloc_malloc(void *msp, size_t nsize) | ||
1017 | { | ||
1018 | mstate ms = (mstate)msp; | ||
1019 | void *mem; | ||
1020 | size_t nb; | ||
1021 | if (nsize <= MAX_SMALL_REQUEST) { | ||
1022 | bindex_t idx; | ||
1023 | binmap_t smallbits; | ||
1024 | nb = (nsize < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(nsize); | ||
1025 | idx = small_index(nb); | ||
1026 | smallbits = ms->smallmap >> idx; | ||
1027 | |||
1028 | if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */ | ||
1029 | mchunkptr b, p; | ||
1030 | idx += ~smallbits & 1; /* Uses next bin if idx empty */ | ||
1031 | b = smallbin_at(ms, idx); | ||
1032 | p = b->fd; | ||
1033 | unlink_first_small_chunk(ms, b, p, idx); | ||
1034 | set_inuse_and_pinuse(ms, p, small_index2size(idx)); | ||
1035 | mem = chunk2mem(p); | ||
1036 | return mem; | ||
1037 | } else if (nb > ms->dvsize) { | ||
1038 | if (smallbits != 0) { /* Use chunk in next nonempty smallbin */ | ||
1039 | mchunkptr b, p, r; | ||
1040 | size_t rsize; | ||
1041 | binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx)); | ||
1042 | bindex_t i = lj_ffs(leftbits); | ||
1043 | b = smallbin_at(ms, i); | ||
1044 | p = b->fd; | ||
1045 | unlink_first_small_chunk(ms, b, p, i); | ||
1046 | rsize = small_index2size(i) - nb; | ||
1047 | /* Fit here cannot be remainderless if 4byte sizes */ | ||
1048 | if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE) { | ||
1049 | set_inuse_and_pinuse(ms, p, small_index2size(i)); | ||
1050 | } else { | ||
1051 | set_size_and_pinuse_of_inuse_chunk(ms, p, nb); | ||
1052 | r = chunk_plus_offset(p, nb); | ||
1053 | set_size_and_pinuse_of_free_chunk(r, rsize); | ||
1054 | replace_dv(ms, r, rsize); | ||
1055 | } | ||
1056 | mem = chunk2mem(p); | ||
1057 | return mem; | ||
1058 | } else if (ms->treemap != 0 && (mem = tmalloc_small(ms, nb)) != 0) { | ||
1059 | return mem; | ||
1060 | } | ||
1061 | } | ||
1062 | } else if (nsize >= MAX_REQUEST) { | ||
1063 | nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */ | ||
1064 | } else { | ||
1065 | nb = pad_request(nsize); | ||
1066 | if (ms->treemap != 0 && (mem = tmalloc_large(ms, nb)) != 0) { | ||
1067 | return mem; | ||
1068 | } | ||
1069 | } | ||
1070 | |||
1071 | if (nb <= ms->dvsize) { | ||
1072 | size_t rsize = ms->dvsize - nb; | ||
1073 | mchunkptr p = ms->dv; | ||
1074 | if (rsize >= MIN_CHUNK_SIZE) { /* split dv */ | ||
1075 | mchunkptr r = ms->dv = chunk_plus_offset(p, nb); | ||
1076 | ms->dvsize = rsize; | ||
1077 | set_size_and_pinuse_of_free_chunk(r, rsize); | ||
1078 | set_size_and_pinuse_of_inuse_chunk(ms, p, nb); | ||
1079 | } else { /* exhaust dv */ | ||
1080 | size_t dvs = ms->dvsize; | ||
1081 | ms->dvsize = 0; | ||
1082 | ms->dv = 0; | ||
1083 | set_inuse_and_pinuse(ms, p, dvs); | ||
1084 | } | ||
1085 | mem = chunk2mem(p); | ||
1086 | return mem; | ||
1087 | } else if (nb < ms->topsize) { /* Split top */ | ||
1088 | size_t rsize = ms->topsize -= nb; | ||
1089 | mchunkptr p = ms->top; | ||
1090 | mchunkptr r = ms->top = chunk_plus_offset(p, nb); | ||
1091 | r->head = rsize | PINUSE_BIT; | ||
1092 | set_size_and_pinuse_of_inuse_chunk(ms, p, nb); | ||
1093 | mem = chunk2mem(p); | ||
1094 | return mem; | ||
1095 | } | ||
1096 | return alloc_sys(ms, nb); | ||
1097 | } | ||
1098 | |||
1099 | static LJ_NOINLINE void *lj_alloc_free(void *msp, void *ptr) | ||
1100 | { | ||
1101 | if (ptr != 0) { | ||
1102 | mchunkptr p = mem2chunk(ptr); | ||
1103 | mstate fm = (mstate)msp; | ||
1104 | size_t psize = chunksize(p); | ||
1105 | mchunkptr next = chunk_plus_offset(p, psize); | ||
1106 | if (!pinuse(p)) { | ||
1107 | size_t prevsize = p->prev_foot; | ||
1108 | if ((prevsize & IS_DIRECT_BIT) != 0) { | ||
1109 | prevsize &= ~IS_DIRECT_BIT; | ||
1110 | psize += prevsize + DIRECT_FOOT_PAD; | ||
1111 | CALL_MUNMAP((char *)p - prevsize, psize); | ||
1112 | return NULL; | ||
1113 | } else { | ||
1114 | mchunkptr prev = chunk_minus_offset(p, prevsize); | ||
1115 | psize += prevsize; | ||
1116 | p = prev; | ||
1117 | /* consolidate backward */ | ||
1118 | if (p != fm->dv) { | ||
1119 | unlink_chunk(fm, p, prevsize); | ||
1120 | } else if ((next->head & INUSE_BITS) == INUSE_BITS) { | ||
1121 | fm->dvsize = psize; | ||
1122 | set_free_with_pinuse(p, psize, next); | ||
1123 | return NULL; | ||
1124 | } | ||
1125 | } | ||
1126 | } | ||
1127 | if (!cinuse(next)) { /* consolidate forward */ | ||
1128 | if (next == fm->top) { | ||
1129 | size_t tsize = fm->topsize += psize; | ||
1130 | fm->top = p; | ||
1131 | p->head = tsize | PINUSE_BIT; | ||
1132 | if (p == fm->dv) { | ||
1133 | fm->dv = 0; | ||
1134 | fm->dvsize = 0; | ||
1135 | } | ||
1136 | if (tsize > fm->trim_check) | ||
1137 | alloc_trim(fm, 0); | ||
1138 | return NULL; | ||
1139 | } else if (next == fm->dv) { | ||
1140 | size_t dsize = fm->dvsize += psize; | ||
1141 | fm->dv = p; | ||
1142 | set_size_and_pinuse_of_free_chunk(p, dsize); | ||
1143 | return NULL; | ||
1144 | } else { | ||
1145 | size_t nsize = chunksize(next); | ||
1146 | psize += nsize; | ||
1147 | unlink_chunk(fm, next, nsize); | ||
1148 | set_size_and_pinuse_of_free_chunk(p, psize); | ||
1149 | if (p == fm->dv) { | ||
1150 | fm->dvsize = psize; | ||
1151 | return NULL; | ||
1152 | } | ||
1153 | } | ||
1154 | } else { | ||
1155 | set_free_with_pinuse(p, psize, next); | ||
1156 | } | ||
1157 | |||
1158 | if (is_small(psize)) { | ||
1159 | insert_small_chunk(fm, p, psize); | ||
1160 | } else { | ||
1161 | tchunkptr tp = (tchunkptr)p; | ||
1162 | insert_large_chunk(fm, tp, psize); | ||
1163 | if (--fm->release_checks == 0) | ||
1164 | release_unused_segments(fm); | ||
1165 | } | ||
1166 | } | ||
1167 | return NULL; | ||
1168 | } | ||
1169 | |||
1170 | static LJ_NOINLINE void *lj_alloc_realloc(void *msp, void *ptr, size_t nsize) | ||
1171 | { | ||
1172 | if (nsize >= MAX_REQUEST) { | ||
1173 | return NULL; | ||
1174 | } else { | ||
1175 | mstate m = (mstate)msp; | ||
1176 | mchunkptr oldp = mem2chunk(ptr); | ||
1177 | size_t oldsize = chunksize(oldp); | ||
1178 | mchunkptr next = chunk_plus_offset(oldp, oldsize); | ||
1179 | mchunkptr newp = 0; | ||
1180 | size_t nb = request2size(nsize); | ||
1181 | |||
1182 | /* Try to either shrink or extend into top. Else malloc-copy-free */ | ||
1183 | if (is_direct(oldp)) { | ||
1184 | newp = direct_resize(oldp, nb); /* this may return NULL. */ | ||
1185 | } else if (oldsize >= nb) { /* already big enough */ | ||
1186 | size_t rsize = oldsize - nb; | ||
1187 | newp = oldp; | ||
1188 | if (rsize >= MIN_CHUNK_SIZE) { | ||
1189 | mchunkptr remainder = chunk_plus_offset(newp, nb); | ||
1190 | set_inuse(m, newp, nb); | ||
1191 | set_inuse(m, remainder, rsize); | ||
1192 | lj_alloc_free(m, chunk2mem(remainder)); | ||
1193 | } | ||
1194 | } else if (next == m->top && oldsize + m->topsize > nb) { | ||
1195 | /* Expand into top */ | ||
1196 | size_t newsize = oldsize + m->topsize; | ||
1197 | size_t newtopsize = newsize - nb; | ||
1198 | mchunkptr newtop = chunk_plus_offset(oldp, nb); | ||
1199 | set_inuse(m, oldp, nb); | ||
1200 | newtop->head = newtopsize |PINUSE_BIT; | ||
1201 | m->top = newtop; | ||
1202 | m->topsize = newtopsize; | ||
1203 | newp = oldp; | ||
1204 | } | ||
1205 | |||
1206 | if (newp != 0) { | ||
1207 | return chunk2mem(newp); | ||
1208 | } else { | ||
1209 | void *newmem = lj_alloc_malloc(m, nsize); | ||
1210 | if (newmem != 0) { | ||
1211 | size_t oc = oldsize - overhead_for(oldp); | ||
1212 | memcpy(newmem, ptr, oc < nsize ? oc : nsize); | ||
1213 | lj_alloc_free(m, ptr); | ||
1214 | } | ||
1215 | return newmem; | ||
1216 | } | ||
1217 | } | ||
1218 | } | ||
1219 | |||
1220 | void *lj_alloc_f(void *msp, void *ptr, size_t osize, size_t nsize) | ||
1221 | { | ||
1222 | (void)osize; | ||
1223 | if (nsize == 0) { | ||
1224 | return lj_alloc_free(msp, ptr); | ||
1225 | } else if (ptr == NULL) { | ||
1226 | return lj_alloc_malloc(msp, nsize); | ||
1227 | } else { | ||
1228 | return lj_alloc_realloc(msp, ptr, nsize); | ||
1229 | } | ||
1230 | } | ||
1231 | |||
1232 | #endif | ||
diff --git a/src/lj_alloc.h b/src/lj_alloc.h new file mode 100644 index 00000000..f87a7cf3 --- /dev/null +++ b/src/lj_alloc.h | |||
@@ -0,0 +1,17 @@ | |||
1 | /* | ||
2 | ** Bundled memory allocator. | ||
3 | ** Donated to the public domain. | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_ALLOC_H | ||
7 | #define _LJ_ALLOC_H | ||
8 | |||
9 | #include "lj_def.h" | ||
10 | |||
11 | #ifndef LUAJIT_USE_SYSMALLOC | ||
12 | LJ_FUNC void *lj_alloc_create(void); | ||
13 | LJ_FUNC void lj_alloc_destroy(void *msp); | ||
14 | LJ_FUNC void *lj_alloc_f(void *msp, void *ptr, size_t osize, size_t nsize); | ||
15 | #endif | ||
16 | |||
17 | #endif | ||
diff --git a/src/lj_api.c b/src/lj_api.c new file mode 100644 index 00000000..ea4eaf66 --- /dev/null +++ b/src/lj_api.c | |||
@@ -0,0 +1,1046 @@ | |||
1 | /* | ||
2 | ** Public Lua/C API. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** | ||
5 | ** Major portions taken verbatim or adapted from the Lua interpreter. | ||
6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h | ||
7 | */ | ||
8 | |||
9 | #define lj_api_c | ||
10 | #define LUA_CORE | ||
11 | |||
12 | #include "lj_obj.h" | ||
13 | #include "lj_gc.h" | ||
14 | #include "lj_err.h" | ||
15 | #include "lj_str.h" | ||
16 | #include "lj_tab.h" | ||
17 | #include "lj_func.h" | ||
18 | #include "lj_udata.h" | ||
19 | #include "lj_meta.h" | ||
20 | #include "lj_state.h" | ||
21 | #include "lj_frame.h" | ||
22 | #include "lj_trace.h" | ||
23 | #include "lj_vm.h" | ||
24 | #include "lj_lex.h" | ||
25 | #include "lj_parse.h" | ||
26 | |||
27 | /* -- Common helper functions --------------------------------------------- */ | ||
28 | |||
29 | #define api_checknelems(L, n) api_check(L, (n) <= (L->top - L->base)) | ||
30 | #define api_checkvalidindex(L, i) api_check(L, (i) != niltv(L)) | ||
31 | |||
32 | static TValue *index2adr(lua_State *L, int idx) | ||
33 | { | ||
34 | if (idx > 0) { | ||
35 | TValue *o = L->base + (idx - 1); | ||
36 | return o < L->top ? o : niltv(L); | ||
37 | } else if (idx > LUA_REGISTRYINDEX) { | ||
38 | api_check(L, idx != 0 && -idx <= L->top - L->base); | ||
39 | return L->top + idx; | ||
40 | } else if (idx == LUA_GLOBALSINDEX) { | ||
41 | TValue *o = &G(L)->tmptv; | ||
42 | settabV(L, o, tabref(L->env)); | ||
43 | return o; | ||
44 | } else if (idx == LUA_REGISTRYINDEX) { | ||
45 | return registry(L); | ||
46 | } else { | ||
47 | GCfunc *fn = curr_func(L); | ||
48 | api_check(L, fn->c.gct == ~LJ_TFUNC && !isluafunc(fn)); | ||
49 | if (idx == LUA_ENVIRONINDEX) { | ||
50 | TValue *o = &G(L)->tmptv; | ||
51 | settabV(L, o, tabref(fn->c.env)); | ||
52 | return o; | ||
53 | } else { | ||
54 | idx = LUA_GLOBALSINDEX - idx; | ||
55 | return idx <= fn->c.nupvalues ? &fn->c.upvalue[idx-1] : niltv(L); | ||
56 | } | ||
57 | } | ||
58 | } | ||
59 | |||
60 | static TValue *stkindex2adr(lua_State *L, int idx) | ||
61 | { | ||
62 | if (idx > 0) { | ||
63 | TValue *o = L->base + (idx - 1); | ||
64 | return o < L->top ? o : niltv(L); | ||
65 | } else { | ||
66 | api_check(L, idx != 0 && -idx <= L->top - L->base); | ||
67 | return L->top + idx; | ||
68 | } | ||
69 | } | ||
70 | |||
71 | static GCtab *getcurrenv(lua_State *L) | ||
72 | { | ||
73 | GCfunc *fn = curr_func(L); | ||
74 | return fn->c.gct == ~LJ_TFUNC ? tabref(fn->c.env) : tabref(L->env); | ||
75 | } | ||
76 | |||
77 | /* -- Miscellaneous API functions ----------------------------------------- */ | ||
78 | |||
79 | LUA_API int lua_status(lua_State *L) | ||
80 | { | ||
81 | return L->status; | ||
82 | } | ||
83 | |||
84 | LUA_API int lua_checkstack(lua_State *L, int size) | ||
85 | { | ||
86 | if (size > LUAI_MAXCSTACK || (L->top - L->base + size) > LUAI_MAXCSTACK) { | ||
87 | return 0; /* Stack overflow. */ | ||
88 | } else if (size > 0) { | ||
89 | lj_state_checkstack(L, (MSize)size); | ||
90 | } | ||
91 | return 1; | ||
92 | } | ||
93 | |||
94 | LUA_API void lua_xmove(lua_State *from, lua_State *to, int n) | ||
95 | { | ||
96 | TValue *f, *t; | ||
97 | if (from == to) return; | ||
98 | api_checknelems(from, n); | ||
99 | api_check(from, G(from) == G(to)); | ||
100 | lj_state_checkstack(to, (MSize)n); | ||
101 | f = from->top; | ||
102 | t = to->top = to->top + n; | ||
103 | while (--n >= 0) copyTV(to, --t, --f); | ||
104 | from->top = f; | ||
105 | } | ||
106 | |||
107 | /* -- Stack manipulation -------------------------------------------------- */ | ||
108 | |||
109 | LUA_API int lua_gettop(lua_State *L) | ||
110 | { | ||
111 | return cast_int(L->top - L->base); | ||
112 | } | ||
113 | |||
114 | LUA_API void lua_settop(lua_State *L, int idx) | ||
115 | { | ||
116 | if (idx >= 0) { | ||
117 | api_check(L, idx <= L->maxstack - L->base); | ||
118 | if (L->base + idx > L->top) { | ||
119 | if (L->base + idx >= L->maxstack) | ||
120 | lj_state_growstack(L, (MSize)idx - (MSize)(L->top - L->base)); | ||
121 | do { setnilV(L->top++); } while (L->top < L->base + idx); | ||
122 | } else { | ||
123 | L->top = L->base + idx; | ||
124 | } | ||
125 | } else { | ||
126 | api_check(L, -(idx+1) <= (L->top - L->base)); | ||
127 | L->top += idx+1; /* Shrinks top (idx < 0). */ | ||
128 | } | ||
129 | } | ||
130 | |||
131 | LUA_API void lua_remove(lua_State *L, int idx) | ||
132 | { | ||
133 | TValue *p = stkindex2adr(L, idx); | ||
134 | api_checkvalidindex(L, p); | ||
135 | while (++p < L->top) copyTV(L, p-1, p); | ||
136 | L->top--; | ||
137 | } | ||
138 | |||
139 | LUA_API void lua_insert(lua_State *L, int idx) | ||
140 | { | ||
141 | TValue *q, *p = stkindex2adr(L, idx); | ||
142 | api_checkvalidindex(L, p); | ||
143 | for (q = L->top; q > p; q--) copyTV(L, q, q-1); | ||
144 | copyTV(L, p, L->top); | ||
145 | } | ||
146 | |||
147 | LUA_API void lua_replace(lua_State *L, int idx) | ||
148 | { | ||
149 | api_checknelems(L, 1); | ||
150 | if (idx == LUA_GLOBALSINDEX) { | ||
151 | api_check(L, tvistab(L->top-1)); | ||
152 | /* NOBARRIER: A thread (i.e. L) is never black. */ | ||
153 | setgcref(L->env, obj2gco(tabV(L->top-1))); | ||
154 | } else if (idx == LUA_ENVIRONINDEX) { | ||
155 | GCfunc *fn = curr_func(L); | ||
156 | if (fn->c.gct != ~LJ_TFUNC) | ||
157 | lj_err_msg(L, LJ_ERR_NOENV); | ||
158 | api_check(L, tvistab(L->top-1)); | ||
159 | setgcref(fn->c.env, obj2gco(tabV(L->top-1))); | ||
160 | lj_gc_barrier(L, fn, L->top-1); | ||
161 | } else { | ||
162 | TValue *o = index2adr(L, idx); | ||
163 | api_checkvalidindex(L, o); | ||
164 | copyTV(L, o, L->top-1); | ||
165 | if (idx < LUA_GLOBALSINDEX) /* Need a barrier for upvalues. */ | ||
166 | lj_gc_barrier(L, curr_func(L), L->top-1); | ||
167 | } | ||
168 | L->top--; | ||
169 | } | ||
170 | |||
171 | LUA_API void lua_pushvalue(lua_State *L, int idx) | ||
172 | { | ||
173 | copyTV(L, L->top, index2adr(L, idx)); | ||
174 | incr_top(L); | ||
175 | } | ||
176 | |||
177 | /* -- Stack getters ------------------------------------------------------- */ | ||
178 | |||
179 | LUA_API int lua_type(lua_State *L, int idx) | ||
180 | { | ||
181 | cTValue *o = index2adr(L, idx); | ||
182 | if (tvisnum(o)) { | ||
183 | return LUA_TNUMBER; | ||
184 | #if LJ_64 | ||
185 | } else if (tvislightud(o)) { | ||
186 | return LUA_TLIGHTUSERDATA; | ||
187 | #endif | ||
188 | } else if (o == niltv(L)) { | ||
189 | return LUA_TNONE; | ||
190 | } else { /* Magic internal/external tag conversion. ORDER LJ_T */ | ||
191 | int t = ~itype(o); | ||
192 | return (int)(((t < 8 ? 0x98a42110 : 0x75b6) >> 4*(t&7)) & 15u); | ||
193 | } | ||
194 | } | ||
195 | |||
196 | LUA_API const char *lua_typename(lua_State *L, int t) | ||
197 | { | ||
198 | UNUSED(L); | ||
199 | return lj_obj_typename[t+1]; | ||
200 | } | ||
201 | |||
202 | LUA_API int lua_iscfunction(lua_State *L, int idx) | ||
203 | { | ||
204 | cTValue *o = index2adr(L, idx); | ||
205 | return !isluafunc(funcV(o)); | ||
206 | } | ||
207 | |||
208 | LUA_API int lua_isnumber(lua_State *L, int idx) | ||
209 | { | ||
210 | cTValue *o = index2adr(L, idx); | ||
211 | TValue tmp; | ||
212 | return (tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp))); | ||
213 | } | ||
214 | |||
215 | LUA_API int lua_isstring(lua_State *L, int idx) | ||
216 | { | ||
217 | cTValue *o = index2adr(L, idx); | ||
218 | return (tvisstr(o) || tvisnum(o)); | ||
219 | } | ||
220 | |||
221 | LUA_API int lua_isuserdata(lua_State *L, int idx) | ||
222 | { | ||
223 | cTValue *o = index2adr(L, idx); | ||
224 | return (tvisudata(o) || tvislightud(o)); | ||
225 | } | ||
226 | |||
227 | LUA_API int lua_rawequal(lua_State *L, int idx1, int idx2) | ||
228 | { | ||
229 | cTValue *o1 = index2adr(L, idx1); | ||
230 | cTValue *o2 = index2adr(L, idx2); | ||
231 | return (o1 == niltv(L) || o2 == niltv(L)) ? 0 : lj_obj_equal(o1, o2); | ||
232 | } | ||
233 | |||
234 | LUA_API int lua_equal(lua_State *L, int idx1, int idx2) | ||
235 | { | ||
236 | cTValue *o1 = index2adr(L, idx1); | ||
237 | cTValue *o2 = index2adr(L, idx2); | ||
238 | if (tvisnum(o1) && tvisnum(o2)) { | ||
239 | return numV(o1) == numV(o2); | ||
240 | } else if (itype(o1) != itype(o2)) { | ||
241 | return 0; | ||
242 | } else if (tvispri(o1)) { | ||
243 | return o1 != niltv(L) && o2 != niltv(L); | ||
244 | #if LJ_64 | ||
245 | } else if (tvislightud(o1)) { | ||
246 | return o1->u64 == o2->u64; | ||
247 | #endif | ||
248 | } else if (gcrefeq(o1->gcr, o2->gcr)) { | ||
249 | return 1; | ||
250 | } else if (!tvistabud(o1)) { | ||
251 | return 0; | ||
252 | } else { | ||
253 | TValue *base = lj_meta_equal(L, gcV(o1), gcV(o2), 0); | ||
254 | if ((uintptr_t)base <= 1) { | ||
255 | return (int)(uintptr_t)base; | ||
256 | } else { | ||
257 | L->top = base+2; | ||
258 | lj_vm_call(L, base, 1+1); | ||
259 | L->top -= 2; | ||
260 | return tvistruecond(L->top+1); | ||
261 | } | ||
262 | } | ||
263 | } | ||
264 | |||
265 | LUA_API int lua_lessthan(lua_State *L, int idx1, int idx2) | ||
266 | { | ||
267 | cTValue *o1 = index2adr(L, idx1); | ||
268 | cTValue *o2 = index2adr(L, idx2); | ||
269 | if (o1 == niltv(L) || o2 == niltv(L)) { | ||
270 | return 0; | ||
271 | } else if (tvisnum(o1) && tvisnum(o2)) { | ||
272 | return numV(o1) < numV(o2); | ||
273 | } else { | ||
274 | TValue *base = lj_meta_comp(L, o1, o2, 0); | ||
275 | if ((uintptr_t)base <= 1) { | ||
276 | return (int)(uintptr_t)base; | ||
277 | } else { | ||
278 | L->top = base+2; | ||
279 | lj_vm_call(L, base, 1+1); | ||
280 | L->top -= 2; | ||
281 | return tvistruecond(L->top+1); | ||
282 | } | ||
283 | } | ||
284 | } | ||
285 | |||
286 | LUA_API lua_Number lua_tonumber(lua_State *L, int idx) | ||
287 | { | ||
288 | cTValue *o = index2adr(L, idx); | ||
289 | TValue tmp; | ||
290 | if (LJ_LIKELY(tvisnum(o))) | ||
291 | return numV(o); | ||
292 | else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp)) | ||
293 | return numV(&tmp); | ||
294 | else | ||
295 | return 0; | ||
296 | } | ||
297 | |||
298 | LUA_API lua_Integer lua_tointeger(lua_State *L, int idx) | ||
299 | { | ||
300 | cTValue *o = index2adr(L, idx); | ||
301 | TValue tmp; | ||
302 | lua_Number n; | ||
303 | if (LJ_LIKELY(tvisnum(o))) | ||
304 | n = numV(o); | ||
305 | else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp)) | ||
306 | n = numV(&tmp); | ||
307 | else | ||
308 | return 0; | ||
309 | #if LJ_64 | ||
310 | return (lua_Integer)n; | ||
311 | #else | ||
312 | return lj_num2int(n); | ||
313 | #endif | ||
314 | } | ||
315 | |||
316 | LUA_API int lua_toboolean(lua_State *L, int idx) | ||
317 | { | ||
318 | cTValue *o = index2adr(L, idx); | ||
319 | return tvistruecond(o); | ||
320 | } | ||
321 | |||
322 | LUA_API const char *lua_tolstring(lua_State *L, int idx, size_t *len) | ||
323 | { | ||
324 | TValue *o = index2adr(L, idx); | ||
325 | GCstr *s; | ||
326 | if (LJ_LIKELY(tvisstr(o))) { | ||
327 | s = strV(o); | ||
328 | } else if (tvisnum(o)) { | ||
329 | lj_gc_check(L); | ||
330 | o = index2adr(L, idx); /* GC may move the stack. */ | ||
331 | s = lj_str_fromnum(L, &o->n); | ||
332 | } else { | ||
333 | if (len != NULL) *len = 0; | ||
334 | return NULL; | ||
335 | } | ||
336 | if (len != NULL) *len = s->len; | ||
337 | return strdata(s); | ||
338 | } | ||
339 | |||
340 | LUA_API size_t lua_objlen(lua_State *L, int idx) | ||
341 | { | ||
342 | TValue *o = index2adr(L, idx); | ||
343 | if (tvisstr(o)) | ||
344 | return strV(o)->len; | ||
345 | else if (tvistab(o)) | ||
346 | return cast(size_t, lj_tab_len(tabV(o))); | ||
347 | else if (tvisudata(o)) | ||
348 | return udataV(o)->len; | ||
349 | else if (tvisnum(o)) | ||
350 | return lj_str_fromnum(L, &o->n)->len; | ||
351 | else | ||
352 | return 0; | ||
353 | } | ||
354 | |||
355 | LUA_API lua_CFunction lua_tocfunction(lua_State *L, int idx) | ||
356 | { | ||
357 | cTValue *o = index2adr(L, idx); | ||
358 | return funcV(o)->c.gate == lj_gate_c ? funcV(o)->c.f : NULL; | ||
359 | } | ||
360 | |||
361 | LUA_API void *lua_touserdata(lua_State *L, int idx) | ||
362 | { | ||
363 | cTValue *o = index2adr(L, idx); | ||
364 | if (tvisudata(o)) | ||
365 | return uddata(udataV(o)); | ||
366 | else if (tvislightud(o)) | ||
367 | return lightudV(o); | ||
368 | else | ||
369 | return NULL; | ||
370 | } | ||
371 | |||
372 | LUA_API lua_State *lua_tothread(lua_State *L, int idx) | ||
373 | { | ||
374 | cTValue *o = index2adr(L, idx); | ||
375 | return (!tvisthread(o)) ? NULL : threadV(o); | ||
376 | } | ||
377 | |||
378 | LUA_API const void *lua_topointer(lua_State *L, int idx) | ||
379 | { | ||
380 | cTValue *o = index2adr(L, idx); | ||
381 | if (tvisudata(o)) | ||
382 | return uddata(udataV(o)); | ||
383 | else if (tvislightud(o)) | ||
384 | return lightudV(o); | ||
385 | else if (tvisgcv(o)) | ||
386 | return gcV(o); | ||
387 | else | ||
388 | return NULL; | ||
389 | } | ||
390 | |||
391 | /* -- Stack setters (object creation) ------------------------------------- */ | ||
392 | |||
393 | LUA_API void lua_pushnil(lua_State *L) | ||
394 | { | ||
395 | setnilV(L->top); | ||
396 | incr_top(L); | ||
397 | } | ||
398 | |||
399 | LUA_API void lua_pushnumber(lua_State *L, lua_Number n) | ||
400 | { | ||
401 | setnumV(L->top, n); | ||
402 | if (LJ_UNLIKELY(tvisnan(L->top))) | ||
403 | setnanV(L->top); /* Canonicalize injected NaNs. */ | ||
404 | incr_top(L); | ||
405 | } | ||
406 | |||
407 | LUA_API void lua_pushinteger(lua_State *L, lua_Integer n) | ||
408 | { | ||
409 | setnumV(L->top, cast_num(n)); | ||
410 | incr_top(L); | ||
411 | } | ||
412 | |||
413 | LUA_API void lua_pushlstring(lua_State *L, const char *str, size_t len) | ||
414 | { | ||
415 | GCstr *s; | ||
416 | lj_gc_check(L); | ||
417 | s = lj_str_new(L, str, len); | ||
418 | setstrV(L, L->top, s); | ||
419 | incr_top(L); | ||
420 | } | ||
421 | |||
422 | LUA_API void lua_pushstring(lua_State *L, const char *str) | ||
423 | { | ||
424 | if (str == NULL) { | ||
425 | setnilV(L->top); | ||
426 | } else { | ||
427 | GCstr *s; | ||
428 | lj_gc_check(L); | ||
429 | s = lj_str_newz(L, str); | ||
430 | setstrV(L, L->top, s); | ||
431 | } | ||
432 | incr_top(L); | ||
433 | } | ||
434 | |||
435 | LUA_API const char *lua_pushvfstring(lua_State *L, const char *fmt, | ||
436 | va_list argp) | ||
437 | { | ||
438 | lj_gc_check(L); | ||
439 | return lj_str_pushvf(L, fmt, argp); | ||
440 | } | ||
441 | |||
442 | LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...) | ||
443 | { | ||
444 | const char *ret; | ||
445 | va_list argp; | ||
446 | lj_gc_check(L); | ||
447 | va_start(argp, fmt); | ||
448 | ret = lj_str_pushvf(L, fmt, argp); | ||
449 | va_end(argp); | ||
450 | return ret; | ||
451 | } | ||
452 | |||
453 | LUA_API void lua_pushcclosure(lua_State *L, lua_CFunction f, int n) | ||
454 | { | ||
455 | GCfunc *fn; | ||
456 | lj_gc_check(L); | ||
457 | api_checknelems(L, n); | ||
458 | fn = lj_func_newC(L, (MSize)n, getcurrenv(L)); | ||
459 | fn->c.f = f; | ||
460 | L->top -= n; | ||
461 | while (n--) | ||
462 | copyTV(L, &fn->c.upvalue[n], L->top+n); | ||
463 | setfuncV(L, L->top, fn); | ||
464 | lua_assert(iswhite(obj2gco(fn))); | ||
465 | incr_top(L); | ||
466 | } | ||
467 | |||
468 | LUA_API void lua_pushboolean(lua_State *L, int b) | ||
469 | { | ||
470 | setboolV(L->top, (b != 0)); | ||
471 | incr_top(L); | ||
472 | } | ||
473 | |||
474 | LUA_API void lua_pushlightuserdata(lua_State *L, void *p) | ||
475 | { | ||
476 | setlightudV(L->top, checklightudptr(L, p)); | ||
477 | incr_top(L); | ||
478 | } | ||
479 | |||
480 | LUA_API void lua_createtable(lua_State *L, int narray, int nrec) | ||
481 | { | ||
482 | GCtab *t; | ||
483 | lj_gc_check(L); | ||
484 | t = lj_tab_new(L, (uint32_t)(narray > 0 ? narray+1 : 0), hsize2hbits(nrec)); | ||
485 | settabV(L, L->top, t); | ||
486 | incr_top(L); | ||
487 | } | ||
488 | |||
489 | LUALIB_API int luaL_newmetatable(lua_State *L, const char *tname) | ||
490 | { | ||
491 | GCtab *regt = tabV(registry(L)); | ||
492 | TValue *tv = lj_tab_setstr(L, regt, lj_str_newz(L, tname)); | ||
493 | if (tvisnil(tv)) { | ||
494 | GCtab *mt = lj_tab_new(L, 0, 1); | ||
495 | settabV(L, tv, mt); | ||
496 | settabV(L, L->top++, mt); | ||
497 | lj_gc_objbarriert(L, regt, mt); | ||
498 | return 1; | ||
499 | } else { | ||
500 | copyTV(L, L->top++, tv); | ||
501 | return 0; | ||
502 | } | ||
503 | } | ||
504 | |||
505 | LUA_API int lua_pushthread(lua_State *L) | ||
506 | { | ||
507 | setthreadV(L, L->top, L); | ||
508 | incr_top(L); | ||
509 | return (mainthread(G(L)) == L); | ||
510 | } | ||
511 | |||
512 | LUA_API lua_State *lua_newthread(lua_State *L) | ||
513 | { | ||
514 | lua_State *L1; | ||
515 | lj_gc_check(L); | ||
516 | L1 = lj_state_new(L); | ||
517 | setthreadV(L, L->top, L1); | ||
518 | incr_top(L); | ||
519 | return L1; | ||
520 | } | ||
521 | |||
522 | LUA_API void *lua_newuserdata(lua_State *L, size_t size) | ||
523 | { | ||
524 | GCudata *ud; | ||
525 | lj_gc_check(L); | ||
526 | if (size > LJ_MAX_UDATA) | ||
527 | lj_err_msg(L, LJ_ERR_UDATAOV); | ||
528 | ud = lj_udata_new(L, (MSize)size, getcurrenv(L)); | ||
529 | setudataV(L, L->top, ud); | ||
530 | incr_top(L); | ||
531 | return uddata(ud); | ||
532 | } | ||
533 | |||
534 | LUA_API void lua_concat(lua_State *L, int n) | ||
535 | { | ||
536 | api_checknelems(L, n); | ||
537 | if (n >= 2) { | ||
538 | n--; | ||
539 | do { | ||
540 | TValue *top = lj_meta_cat(L, L->top-1, n); | ||
541 | if (top == NULL) { | ||
542 | L->top -= n; | ||
543 | break; | ||
544 | } | ||
545 | n -= cast_int(L->top - top); | ||
546 | L->top = top+2; | ||
547 | lj_vm_call(L, top, 1+1); | ||
548 | L->top--; | ||
549 | copyTV(L, L->top-1, L->top); | ||
550 | } while (--n > 0); | ||
551 | } else if (n == 0) { /* Push empty string. */ | ||
552 | setstrV(L, L->top, lj_str_new(L, "", 0)); | ||
553 | incr_top(L); | ||
554 | } | ||
555 | /* else n == 1: nothing to do. */ | ||
556 | } | ||
557 | |||
558 | /* -- Object getters ------------------------------------------------------ */ | ||
559 | |||
560 | LUA_API void lua_gettable(lua_State *L, int idx) | ||
561 | { | ||
562 | cTValue *v, *t = index2adr(L, idx); | ||
563 | api_checkvalidindex(L, t); | ||
564 | v = lj_meta_tget(L, t, L->top-1); | ||
565 | if (v == NULL) { | ||
566 | L->top += 2; | ||
567 | lj_vm_call(L, L->top-2, 1+1); | ||
568 | L->top -= 2; | ||
569 | v = L->top+1; | ||
570 | } | ||
571 | copyTV(L, L->top-1, v); | ||
572 | } | ||
573 | |||
574 | LUA_API void lua_getfield(lua_State *L, int idx, const char *k) | ||
575 | { | ||
576 | cTValue *v, *t = index2adr(L, idx); | ||
577 | TValue key; | ||
578 | api_checkvalidindex(L, t); | ||
579 | setstrV(L, &key, lj_str_newz(L, k)); | ||
580 | v = lj_meta_tget(L, t, &key); | ||
581 | if (v == NULL) { | ||
582 | L->top += 2; | ||
583 | lj_vm_call(L, L->top-2, 1+1); | ||
584 | L->top -= 2; | ||
585 | v = L->top+1; | ||
586 | } | ||
587 | copyTV(L, L->top, v); | ||
588 | incr_top(L); | ||
589 | } | ||
590 | |||
591 | LUA_API void lua_rawget(lua_State *L, int idx) | ||
592 | { | ||
593 | cTValue *t = index2adr(L, idx); | ||
594 | api_check(L, tvistab(t)); | ||
595 | copyTV(L, L->top-1, lj_tab_get(L, tabV(t), L->top-1)); | ||
596 | } | ||
597 | |||
598 | LUA_API void lua_rawgeti(lua_State *L, int idx, int n) | ||
599 | { | ||
600 | cTValue *v, *t = index2adr(L, idx); | ||
601 | api_check(L, tvistab(t)); | ||
602 | v = lj_tab_getint(tabV(t), n); | ||
603 | if (v) { | ||
604 | copyTV(L, L->top, v); | ||
605 | } else { | ||
606 | setnilV(L->top); | ||
607 | } | ||
608 | incr_top(L); | ||
609 | } | ||
610 | |||
611 | LUA_API int lua_getmetatable(lua_State *L, int idx) | ||
612 | { | ||
613 | cTValue *o = index2adr(L, idx); | ||
614 | GCtab *mt = NULL; | ||
615 | if (tvistab(o)) | ||
616 | mt = tabref(tabV(o)->metatable); | ||
617 | else if (tvisudata(o)) | ||
618 | mt = tabref(udataV(o)->metatable); | ||
619 | else | ||
620 | mt = tabref(G(L)->basemt[itypemap(o)]); | ||
621 | if (mt == NULL) | ||
622 | return 0; | ||
623 | settabV(L, L->top, mt); | ||
624 | incr_top(L); | ||
625 | return 1; | ||
626 | } | ||
627 | |||
628 | LUALIB_API int luaL_getmetafield(lua_State *L, int idx, const char *field) | ||
629 | { | ||
630 | if (lua_getmetatable(L, idx)) { | ||
631 | cTValue *tv = lj_tab_getstr(tabV(L->top-1), lj_str_newz(L, field)); | ||
632 | if (tv && !tvisnil(tv)) { | ||
633 | copyTV(L, L->top-1, tv); | ||
634 | return 1; | ||
635 | } | ||
636 | L->top--; | ||
637 | } | ||
638 | return 0; | ||
639 | } | ||
640 | |||
641 | LUA_API void lua_getfenv(lua_State *L, int idx) | ||
642 | { | ||
643 | cTValue *o = index2adr(L, idx); | ||
644 | api_checkvalidindex(L, o); | ||
645 | if (tvisfunc(o)) { | ||
646 | settabV(L, L->top, tabref(funcV(o)->c.env)); | ||
647 | } else if (tvisudata(o)) { | ||
648 | settabV(L, L->top, tabref(udataV(o)->env)); | ||
649 | } else if (tvisthread(o)) { | ||
650 | settabV(L, L->top, tabref(threadV(o)->env)); | ||
651 | } else { | ||
652 | setnilV(L->top); | ||
653 | } | ||
654 | incr_top(L); | ||
655 | } | ||
656 | |||
657 | LUA_API int lua_next(lua_State *L, int idx) | ||
658 | { | ||
659 | cTValue *t = index2adr(L, idx); | ||
660 | int more; | ||
661 | api_check(L, tvistab(t)); | ||
662 | more = lj_tab_next(L, tabV(t), L->top-1); | ||
663 | if (more) { | ||
664 | incr_top(L); /* Return new key and value slot. */ | ||
665 | } else { /* End of traversal. */ | ||
666 | L->top--; /* Remove key slot. */ | ||
667 | } | ||
668 | return more; | ||
669 | } | ||
670 | |||
671 | static const char *aux_upvalue(cTValue *f, uint32_t idx, TValue **val) | ||
672 | { | ||
673 | GCfunc *fn; | ||
674 | if (!tvisfunc(f)) return NULL; | ||
675 | fn = funcV(f); | ||
676 | if (isluafunc(fn)) { | ||
677 | GCproto *pt = funcproto(fn); | ||
678 | if (idx < pt->sizeuvname) { | ||
679 | *val = gcref(fn->l.uvptr[idx])->uv.v; | ||
680 | return strdata(pt->uvname[idx]); | ||
681 | } | ||
682 | } else { | ||
683 | if (idx < fn->c.nupvalues) { | ||
684 | *val = &fn->c.upvalue[idx]; | ||
685 | return ""; | ||
686 | } | ||
687 | } | ||
688 | return NULL; | ||
689 | } | ||
690 | |||
691 | LUA_API const char *lua_getupvalue(lua_State *L, int idx, int n) | ||
692 | { | ||
693 | TValue *val; | ||
694 | const char *name = aux_upvalue(index2adr(L, idx), (uint32_t)(n-1), &val); | ||
695 | if (name) { | ||
696 | copyTV(L, L->top, val); | ||
697 | incr_top(L); | ||
698 | } | ||
699 | return name; | ||
700 | } | ||
701 | |||
702 | LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname) | ||
703 | { | ||
704 | cTValue *o = index2adr(L, idx); | ||
705 | if (tvisudata(o)) { | ||
706 | GCudata *ud = udataV(o); | ||
707 | cTValue *tv = lj_tab_getstr(tabV(registry(L)), lj_str_newz(L, tname)); | ||
708 | if (tv && tvistab(tv) && tabV(tv) == tabref(ud->metatable)) | ||
709 | return uddata(ud); | ||
710 | } | ||
711 | lj_err_argtype(L, idx, tname); | ||
712 | return NULL; /* unreachable */ | ||
713 | } | ||
714 | |||
715 | /* -- Object setters ------------------------------------------------------ */ | ||
716 | |||
717 | LUA_API void lua_settable(lua_State *L, int idx) | ||
718 | { | ||
719 | TValue *o; | ||
720 | cTValue *t = index2adr(L, idx); | ||
721 | api_checknelems(L, 2); | ||
722 | api_checkvalidindex(L, t); | ||
723 | o = lj_meta_tset(L, t, L->top-2); | ||
724 | if (o) { | ||
725 | /* NOBARRIER: lj_meta_tset ensures the table is not black. */ | ||
726 | copyTV(L, o, L->top-1); | ||
727 | L->top -= 2; | ||
728 | } else { | ||
729 | L->top += 3; | ||
730 | copyTV(L, L->top-1, L->top-6); | ||
731 | lj_vm_call(L, L->top-3, 0+1); | ||
732 | L->top -= 3; | ||
733 | } | ||
734 | } | ||
735 | |||
736 | LUA_API void lua_setfield(lua_State *L, int idx, const char *k) | ||
737 | { | ||
738 | TValue *o; | ||
739 | TValue key; | ||
740 | cTValue *t = index2adr(L, idx); | ||
741 | api_checknelems(L, 1); | ||
742 | api_checkvalidindex(L, t); | ||
743 | setstrV(L, &key, lj_str_newz(L, k)); | ||
744 | o = lj_meta_tset(L, t, &key); | ||
745 | if (o) { | ||
746 | L->top--; | ||
747 | /* NOBARRIER: lj_meta_tset ensures the table is not black. */ | ||
748 | copyTV(L, o, L->top); | ||
749 | } else { | ||
750 | L->top += 3; | ||
751 | copyTV(L, L->top-1, L->top-6); | ||
752 | lj_vm_call(L, L->top-3, 0+1); | ||
753 | L->top -= 2; | ||
754 | } | ||
755 | } | ||
756 | |||
757 | LUA_API void lua_rawset(lua_State *L, int idx) | ||
758 | { | ||
759 | GCtab *t = tabV(index2adr(L, idx)); | ||
760 | TValue *dst, *key; | ||
761 | api_checknelems(L, 2); | ||
762 | key = L->top-2; | ||
763 | dst = lj_tab_set(L, t, key); | ||
764 | copyTV(L, dst, key+1); | ||
765 | lj_gc_barriert(L, t, dst); | ||
766 | L->top = key; | ||
767 | } | ||
768 | |||
769 | LUA_API void lua_rawseti(lua_State *L, int idx, int n) | ||
770 | { | ||
771 | GCtab *t = tabV(index2adr(L, idx)); | ||
772 | TValue *dst, *src; | ||
773 | api_checknelems(L, 1); | ||
774 | dst = lj_tab_setint(L, t, n); | ||
775 | src = L->top-1; | ||
776 | copyTV(L, dst, src); | ||
777 | lj_gc_barriert(L, t, dst); | ||
778 | L->top = src; | ||
779 | } | ||
780 | |||
781 | LUA_API int lua_setmetatable(lua_State *L, int idx) | ||
782 | { | ||
783 | global_State *g; | ||
784 | GCtab *mt; | ||
785 | cTValue *o = index2adr(L, idx); | ||
786 | api_checknelems(L, 1); | ||
787 | api_checkvalidindex(L, o); | ||
788 | if (tvisnil(L->top-1)) { | ||
789 | mt = NULL; | ||
790 | } else { | ||
791 | api_check(L, tvistab(L->top-1)); | ||
792 | mt = tabV(L->top-1); | ||
793 | } | ||
794 | g = G(L); | ||
795 | if (tvistab(o)) { | ||
796 | setgcref(tabV(o)->metatable, obj2gco(mt)); | ||
797 | if (mt) | ||
798 | lj_gc_objbarriert(L, tabV(o), mt); | ||
799 | } else if (tvisudata(o)) { | ||
800 | setgcref(udataV(o)->metatable, obj2gco(mt)); | ||
801 | if (mt) | ||
802 | lj_gc_objbarrier(L, udataV(o), mt); | ||
803 | } else { | ||
804 | /* Flush cache, since traces specialize to basemt. But not during __gc. */ | ||
805 | if (lj_trace_flushall(L)) | ||
806 | lj_err_caller(L, LJ_ERR_NOGCMM); | ||
807 | if (tvisbool(o)) { | ||
808 | /* NOBARRIER: g->basemt[] is a GC root. */ | ||
809 | setgcref(g->basemt[~LJ_TTRUE], obj2gco(mt)); | ||
810 | setgcref(g->basemt[~LJ_TFALSE], obj2gco(mt)); | ||
811 | } else { | ||
812 | /* NOBARRIER: g->basemt[] is a GC root. */ | ||
813 | setgcref(g->basemt[itypemap(o)], obj2gco(mt)); | ||
814 | } | ||
815 | } | ||
816 | L->top--; | ||
817 | return 1; | ||
818 | } | ||
819 | |||
820 | LUA_API int lua_setfenv(lua_State *L, int idx) | ||
821 | { | ||
822 | cTValue *o = index2adr(L, idx); | ||
823 | GCtab *t; | ||
824 | api_checknelems(L, 1); | ||
825 | api_checkvalidindex(L, o); | ||
826 | api_check(L, tvistab(L->top-1)); | ||
827 | t = tabV(L->top-1); | ||
828 | if (tvisfunc(o)) { | ||
829 | setgcref(funcV(o)->c.env, obj2gco(t)); | ||
830 | } else if (tvisudata(o)) { | ||
831 | setgcref(udataV(o)->env, obj2gco(t)); | ||
832 | } else if (tvisthread(o)) { | ||
833 | setgcref(threadV(o)->env, obj2gco(t)); | ||
834 | } else { | ||
835 | L->top--; | ||
836 | return 0; | ||
837 | } | ||
838 | lj_gc_objbarrier(L, gcV(o), t); | ||
839 | L->top--; | ||
840 | return 1; | ||
841 | } | ||
842 | |||
843 | LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n) | ||
844 | { | ||
845 | cTValue *f = index2adr(L, idx); | ||
846 | TValue *val; | ||
847 | const char *name; | ||
848 | api_checknelems(L, 1); | ||
849 | name = aux_upvalue(f, (uint32_t)(n-1), &val); | ||
850 | if (name) { | ||
851 | L->top--; | ||
852 | copyTV(L, val, L->top); | ||
853 | lj_gc_barrier(L, funcV(f), L->top); | ||
854 | } | ||
855 | return name; | ||
856 | } | ||
857 | |||
858 | /* -- Calls --------------------------------------------------------------- */ | ||
859 | |||
860 | LUA_API void lua_call(lua_State *L, int nargs, int nresults) | ||
861 | { | ||
862 | api_checknelems(L, nargs+1); | ||
863 | lj_vm_call(L, L->top - nargs, nresults+1); | ||
864 | } | ||
865 | |||
866 | LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc) | ||
867 | { | ||
868 | global_State *g = G(L); | ||
869 | uint8_t oldh = hook_save(g); | ||
870 | ptrdiff_t ef; | ||
871 | int status; | ||
872 | api_checknelems(L, nargs+1); | ||
873 | if (errfunc == 0) { | ||
874 | ef = 0; | ||
875 | } else { | ||
876 | cTValue *o = stkindex2adr(L, errfunc); | ||
877 | api_checkvalidindex(L, o); | ||
878 | ef = savestack(L, o); | ||
879 | } | ||
880 | status = lj_vm_pcall(L, L->top - nargs, nresults+1, ef); | ||
881 | if (status) hook_restore(g, oldh); | ||
882 | return status; | ||
883 | } | ||
884 | |||
885 | static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud) | ||
886 | { | ||
887 | GCfunc *fn; | ||
888 | fn = lj_func_newC(L, 0, getcurrenv(L)); | ||
889 | fn->c.f = func; | ||
890 | setfuncV(L, L->top, fn); | ||
891 | setlightudV(L->top+1, checklightudptr(L, ud)); | ||
892 | cframe_nres(L->cframe) = 1+0; /* Zero results. */ | ||
893 | L->top += 2; | ||
894 | return L->top-1; /* Now call the newly allocated C function. */ | ||
895 | } | ||
896 | |||
897 | LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud) | ||
898 | { | ||
899 | global_State *g = G(L); | ||
900 | uint8_t oldh = hook_save(g); | ||
901 | int status = lj_vm_cpcall(L, cpcall, func, ud); | ||
902 | if (status) hook_restore(g, oldh); | ||
903 | return status; | ||
904 | } | ||
905 | |||
906 | LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field) | ||
907 | { | ||
908 | if (luaL_getmetafield(L, idx, field)) { | ||
909 | TValue *base = L->top--; | ||
910 | copyTV(L, base, index2adr(L, idx)); | ||
911 | L->top = base+1; | ||
912 | lj_vm_call(L, base, 1+1); | ||
913 | return 1; | ||
914 | } | ||
915 | return 0; | ||
916 | } | ||
917 | |||
918 | /* -- Coroutine yield and resume ------------------------------------------ */ | ||
919 | |||
920 | LUA_API int lua_yield(lua_State *L, int nresults) | ||
921 | { | ||
922 | void *cf = L->cframe; | ||
923 | cTValue *f; | ||
924 | if (!cframe_canyield(cf)) | ||
925 | lj_err_msg(L, LJ_ERR_CYIELD); | ||
926 | f = L->top - nresults; | ||
927 | if (f > L->base) { | ||
928 | TValue *t = L->base; | ||
929 | while (--nresults >= 0) copyTV(L, t++, f++); | ||
930 | L->top = t; | ||
931 | } | ||
932 | L->cframe = NULL; | ||
933 | L->status = LUA_YIELD; | ||
934 | lj_vm_unwind_c(cf, LUA_YIELD); | ||
935 | return -1; /* unreachable */ | ||
936 | } | ||
937 | |||
938 | LUA_API int lua_resume(lua_State *L, int nargs) | ||
939 | { | ||
940 | if (L->cframe == NULL && L->status <= LUA_YIELD) | ||
941 | return lj_vm_resume(L, L->top - nargs, 0, 0); | ||
942 | L->top = L->base; | ||
943 | setstrV(L, L->top, lj_err_str(L, LJ_ERR_COSUSP)); | ||
944 | incr_top(L); | ||
945 | return LUA_ERRRUN; | ||
946 | } | ||
947 | |||
948 | /* -- Load and dump Lua code ---------------------------------------------- */ | ||
949 | |||
950 | static TValue *cpparser(lua_State *L, lua_CFunction dummy, void *ud) | ||
951 | { | ||
952 | LexState *ls = cast(LexState *, ud); | ||
953 | GCfunc *fn; | ||
954 | UNUSED(dummy); | ||
955 | cframe_errfunc(L->cframe) = -1; /* Inherit error function. */ | ||
956 | lj_lex_start(L, ls); | ||
957 | fn = lj_func_newL(L, lj_parse(ls), tabref(L->env)); | ||
958 | /* Parser may realloc stack. Don't combine above/below into one statement. */ | ||
959 | setfuncV(L, L->top++, fn); | ||
960 | return NULL; | ||
961 | } | ||
962 | |||
963 | LUA_API int lua_load(lua_State *L, lua_Reader reader, void *data, | ||
964 | const char *chunkname) | ||
965 | { | ||
966 | LexState ls; | ||
967 | int status; | ||
968 | global_State *g; | ||
969 | ls.rfunc = reader; | ||
970 | ls.rdata = data; | ||
971 | ls.chunkarg = chunkname ? chunkname : "?"; | ||
972 | lj_str_initbuf(L, &ls.sb); | ||
973 | status = lj_vm_cpcall(L, cpparser, NULL, &ls); | ||
974 | g = G(L); | ||
975 | lj_str_freebuf(g, &ls.sb); | ||
976 | lj_gc_check(L); | ||
977 | return status; | ||
978 | } | ||
979 | |||
980 | LUA_API int lua_dump(lua_State *L, lua_Writer writer, void *data) | ||
981 | { | ||
982 | api_checknelems(L, 1); | ||
983 | UNUSED(L); UNUSED(writer); UNUSED(data); | ||
984 | return 1; /* Error, not supported. */ | ||
985 | } | ||
986 | |||
987 | /* -- GC and memory management -------------------------------------------- */ | ||
988 | |||
989 | LUA_API int lua_gc(lua_State *L, int what, int data) | ||
990 | { | ||
991 | global_State *g = G(L); | ||
992 | int res = 0; | ||
993 | switch (what) { | ||
994 | case LUA_GCSTOP: | ||
995 | g->gc.threshold = LJ_MAX_MEM; | ||
996 | break; | ||
997 | case LUA_GCRESTART: | ||
998 | g->gc.threshold = g->gc.total; | ||
999 | break; | ||
1000 | case LUA_GCCOLLECT: | ||
1001 | lj_gc_fullgc(L); | ||
1002 | break; | ||
1003 | case LUA_GCCOUNT: | ||
1004 | res = cast_int(g->gc.total >> 10); | ||
1005 | break; | ||
1006 | case LUA_GCCOUNTB: | ||
1007 | res = cast_int(g->gc.total & 0x3ff); | ||
1008 | break; | ||
1009 | case LUA_GCSTEP: { | ||
1010 | MSize a = (MSize)data << 10; | ||
1011 | g->gc.threshold = (a <= g->gc.total) ? (g->gc.total - a) : 0; | ||
1012 | while (g->gc.total >= g->gc.threshold) | ||
1013 | if (lj_gc_step(L)) { | ||
1014 | res = 1; | ||
1015 | break; | ||
1016 | } | ||
1017 | break; | ||
1018 | } | ||
1019 | case LUA_GCSETPAUSE: | ||
1020 | res = cast_int(g->gc.pause); | ||
1021 | g->gc.pause = (MSize)data; | ||
1022 | break; | ||
1023 | case LUA_GCSETSTEPMUL: | ||
1024 | res = cast_int(g->gc.stepmul); | ||
1025 | g->gc.stepmul = (MSize)data; | ||
1026 | break; | ||
1027 | default: | ||
1028 | res = -1; /* Invalid option. */ | ||
1029 | } | ||
1030 | return res; | ||
1031 | } | ||
1032 | |||
1033 | LUA_API lua_Alloc lua_getallocf(lua_State *L, void **ud) | ||
1034 | { | ||
1035 | global_State *g = G(L); | ||
1036 | if (ud) *ud = g->allocd; | ||
1037 | return g->allocf; | ||
1038 | } | ||
1039 | |||
1040 | LUA_API void lua_setallocf(lua_State *L, lua_Alloc f, void *ud) | ||
1041 | { | ||
1042 | global_State *g = G(L); | ||
1043 | g->allocd = ud; | ||
1044 | g->allocf = f; | ||
1045 | } | ||
1046 | |||
diff --git a/src/lj_arch.h b/src/lj_arch.h new file mode 100644 index 00000000..abdb5af9 --- /dev/null +++ b/src/lj_arch.h | |||
@@ -0,0 +1,88 @@ | |||
1 | /* | ||
2 | ** Target architecture selection. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_ARCH_H | ||
7 | #define _LJ_ARCH_H | ||
8 | |||
9 | #include "lua.h" | ||
10 | |||
11 | |||
12 | /* Target endianess. */ | ||
13 | #define LUAJIT_LE 0 | ||
14 | #define LUAJIT_BE 1 | ||
15 | |||
16 | /* Target architectures. */ | ||
17 | #define LUAJIT_ARCH_X86 1 | ||
18 | #define LUAJIT_ARCH_x86 1 | ||
19 | #define LUAJIT_ARCH_X64 2 | ||
20 | #define LUAJIT_ARCH_x64 2 | ||
21 | |||
22 | |||
23 | /* Select native target if no target defined. */ | ||
24 | #ifndef LUAJIT_TARGET | ||
25 | |||
26 | #if defined(__i386) || defined(__i386__) || defined(_M_IX86) | ||
27 | #define LUAJIT_TARGET LUAJIT_ARCH_X86 | ||
28 | #elif defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) | ||
29 | #define LUAJIT_TARGET LUAJIT_ARCH_X64 | ||
30 | #else | ||
31 | #error "No support for this architecture (yet)" | ||
32 | #endif | ||
33 | |||
34 | #endif | ||
35 | |||
36 | /* Set target properties. */ | ||
37 | #if LUAJIT_TARGET == LUAJIT_ARCH_X86 | ||
38 | #define LJ_ARCH_NAME "x86" | ||
39 | #define LJ_ARCH_BITS 32 | ||
40 | #define LJ_ARCH_ENDIAN LUAJIT_LE | ||
41 | #define LJ_TARGET_X86 1 | ||
42 | #define LJ_TARGET_X86ORX64 1 | ||
43 | #define LJ_PAGESIZE 4096 | ||
44 | #elif LUAJIT_TARGET == LUAJIT_ARCH_X64 | ||
45 | #define LJ_ARCH_NAME "x64" | ||
46 | #define LJ_ARCH_BITS 64 | ||
47 | #define LJ_ARCH_ENDIAN LUAJIT_LE | ||
48 | #define LJ_TARGET_X64 1 | ||
49 | #define LJ_TARGET_X86ORX64 1 | ||
50 | #define LJ_PAGESIZE 4096 | ||
51 | #error "No support for x64 architecture (yet)" | ||
52 | #else | ||
53 | #error "No target architecture defined" | ||
54 | #endif | ||
55 | |||
56 | /* Disable or enable the JIT compiler. */ | ||
57 | #if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) | ||
58 | #define LJ_HASJIT 0 | ||
59 | #else | ||
60 | #define LJ_HASJIT 1 | ||
61 | #endif | ||
62 | |||
63 | #if LJ_ARCH_ENDIAN == LUAJIT_BE | ||
64 | #define LJ_ENDIAN_SELECT(le, be) be | ||
65 | #define LJ_ENDIAN_LOHI(lo, hi) hi lo | ||
66 | #else | ||
67 | #define LJ_ENDIAN_SELECT(le, be) le | ||
68 | #define LJ_ENDIAN_LOHI(lo, hi) lo hi | ||
69 | #endif | ||
70 | |||
71 | #if LJ_ARCH_BITS == 32 | ||
72 | #define LJ_32 1 | ||
73 | #define LJ_64 0 | ||
74 | #elif LJ_ARCH_BITS == 64 | ||
75 | #define LJ_32 0 | ||
76 | #define LJ_64 1 | ||
77 | #else | ||
78 | #error "Bad LJ_ARCH_BITS setting" | ||
79 | #endif | ||
80 | |||
81 | /* Whether target CPU masks the shift count by the operand length or not. */ | ||
82 | #if LJ_TARGET_X86ORX64 | ||
83 | #define LJ_TARGET_MASKEDSHIFT 1 | ||
84 | #else | ||
85 | #define LJ_TARGET_MASKEDSHIFT 0 | ||
86 | #endif | ||
87 | |||
88 | #endif | ||
diff --git a/src/lj_asm.c b/src/lj_asm.c new file mode 100644 index 00000000..b89b8543 --- /dev/null +++ b/src/lj_asm.c | |||
@@ -0,0 +1,3324 @@ | |||
1 | /* | ||
2 | ** IR assembler (SSA IR -> machine code). | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #define lj_asm_c | ||
7 | #define LUA_CORE | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | |||
11 | #if LJ_HASJIT | ||
12 | |||
13 | #include "lj_gc.h" | ||
14 | #include "lj_str.h" | ||
15 | #include "lj_tab.h" | ||
16 | #include "lj_ir.h" | ||
17 | #include "lj_jit.h" | ||
18 | #include "lj_iropt.h" | ||
19 | #include "lj_mcode.h" | ||
20 | #include "lj_iropt.h" | ||
21 | #include "lj_trace.h" | ||
22 | #include "lj_snap.h" | ||
23 | #include "lj_asm.h" | ||
24 | #include "lj_dispatch.h" | ||
25 | #include "lj_vm.h" | ||
26 | #include "lj_target.h" | ||
27 | |||
28 | /* -- Assembler state and common macros ----------------------------------- */ | ||
29 | |||
30 | /* Assembler state. */ | ||
31 | typedef struct ASMState { | ||
32 | RegCost cost[RID_MAX]; /* Reference and blended allocation cost for regs. */ | ||
33 | |||
34 | MCode *mcp; /* Current MCode pointer (grows down). */ | ||
35 | MCode *mclim; /* Lower limit for MCode memory + red zone. */ | ||
36 | |||
37 | IRIns *ir; /* Copy of pointer to IR instructions/constants. */ | ||
38 | jit_State *J; /* JIT compiler state. */ | ||
39 | |||
40 | x86ModRM mrm; /* Fused x86 address operand. */ | ||
41 | |||
42 | RegSet freeset; /* Set of free registers. */ | ||
43 | RegSet modset; /* Set of registers modified inside the loop. */ | ||
44 | RegSet phiset; /* Set of PHI registers. */ | ||
45 | |||
46 | uint32_t flags; /* Copy of JIT compiler flags. */ | ||
47 | int loopinv; /* Loop branch inversion (0:no, 1:yes, 2:yes+CC_P). */ | ||
48 | |||
49 | int32_t evenspill; /* Next even spill slot. */ | ||
50 | int32_t oddspill; /* Next odd spill slot (or 0). */ | ||
51 | |||
52 | IRRef curins; /* Reference of current instruction. */ | ||
53 | IRRef stopins; /* Stop assembly before hitting this instruction. */ | ||
54 | IRRef orignins; /* Original T->nins. */ | ||
55 | |||
56 | IRRef snapref; /* Current snapshot is active after this reference. */ | ||
57 | IRRef snaprename; /* Rename highwater mark for snapshot check. */ | ||
58 | SnapNo snapno; /* Current snapshot number. */ | ||
59 | SnapNo loopsnapno; /* Loop snapshot number. */ | ||
60 | |||
61 | Trace *T; /* Trace to assemble. */ | ||
62 | Trace *parent; /* Parent trace (or NULL). */ | ||
63 | |||
64 | IRRef fuseref; /* Fusion limit (loopref, 0 or FUSE_DISABLED). */ | ||
65 | IRRef sectref; /* Section base reference (loopref or 0). */ | ||
66 | IRRef loopref; /* Reference of LOOP instruction (or 0). */ | ||
67 | |||
68 | BCReg topslot; /* Number of slots for stack check (unless 0). */ | ||
69 | MSize gcsteps; /* Accumulated number of GC steps (per section). */ | ||
70 | |||
71 | MCode *mcbot; /* Bottom of reserved MCode. */ | ||
72 | MCode *mctop; /* Top of generated MCode. */ | ||
73 | MCode *mcloop; /* Pointer to loop MCode (or NULL). */ | ||
74 | MCode *invmcp; /* Points to invertible loop branch (or NULL). */ | ||
75 | MCode *testmcp; /* Pending opportunity to remove test r,r. */ | ||
76 | MCode *realign; /* Realign loop if not NULL. */ | ||
77 | |||
78 | IRRef1 phireg[RID_MAX]; /* PHI register references. */ | ||
79 | uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent slot to RegSP map. */ | ||
80 | } ASMState; | ||
81 | |||
82 | #define IR(ref) (&as->ir[(ref)]) | ||
83 | |||
84 | /* Check for variant to invariant references. */ | ||
85 | #define iscrossref(as, ref) ((ref) < as->sectref) | ||
86 | |||
87 | /* Inhibit memory op fusion from variant to invariant references. */ | ||
88 | #define FUSE_DISABLED (~(IRRef)0) | ||
89 | #define mayfuse(as, ref) ((ref) > as->fuseref) | ||
90 | #define neverfuse(as) (as->fuseref == FUSE_DISABLED) | ||
91 | #define opisfusableload(o) \ | ||
92 | ((o) == IR_ALOAD || (o) == IR_HLOAD || (o) == IR_ULOAD || \ | ||
93 | (o) == IR_FLOAD || (o) == IR_SLOAD || (o) == IR_XLOAD) | ||
94 | |||
95 | /* Instruction selection for XMM moves. */ | ||
96 | #define XMM_MOVRR(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVSD : XO_MOVAPS) | ||
97 | #define XMM_MOVRM(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVLPD : XO_MOVSD) | ||
98 | |||
99 | /* Sparse limit checks using a red zone before the actual limit. */ | ||
100 | #define MCLIM_REDZONE 64 | ||
101 | #define checkmclim(as) \ | ||
102 | if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as) | ||
103 | |||
104 | static LJ_NORET LJ_NOINLINE void asm_mclimit(ASMState *as) | ||
105 | { | ||
106 | lj_mcode_limiterr(as->J, (size_t)(as->mctop - as->mcp + 4*MCLIM_REDZONE)); | ||
107 | } | ||
108 | |||
109 | /* -- Emit x86 instructions ----------------------------------------------- */ | ||
110 | |||
111 | #define MODRM(mode, r1, r2) ((MCode)((mode)+(((r1)&7)<<3)+((r2)&7))) | ||
112 | |||
113 | #if LJ_64 | ||
114 | #define REXRB(p, rr, rb) \ | ||
115 | { MCode rex = 0x40 + (((rr)>>1)&4) + (((rb)>>3)&1); \ | ||
116 | if (rex != 0x40) *--(p) = rex; } | ||
117 | #define FORCE_REX 0x200 | ||
118 | #else | ||
119 | #define REXRB(p, rr, rb) ((void)0) | ||
120 | #define FORCE_REX 0 | ||
121 | #endif | ||
122 | |||
123 | #define emit_i8(as, i) (*--as->mcp = (MCode)(i)) | ||
124 | #define emit_i32(as, i) (*(int32_t *)(as->mcp-4) = (i), as->mcp -= 4) | ||
125 | |||
126 | #define emit_x87op(as, xo) \ | ||
127 | (*(uint16_t *)(as->mcp-2) = (uint16_t)(xo), as->mcp -= 2) | ||
128 | |||
129 | /* op */ | ||
130 | static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx, | ||
131 | MCode *p, int delta) | ||
132 | { | ||
133 | int n = (int8_t)xo; | ||
134 | #if defined(__GNUC__) | ||
135 | if (__builtin_constant_p(xo) && n == -2) | ||
136 | p[delta-2] = (MCode)(xo >> 24); | ||
137 | else if (__builtin_constant_p(xo) && n == -3) | ||
138 | *(uint16_t *)(p+delta-3) = (uint16_t)(xo >> 16); | ||
139 | else | ||
140 | #endif | ||
141 | *(uint32_t *)(p+delta-5) = (uint32_t)xo; | ||
142 | p += n + delta; | ||
143 | #if LJ_64 | ||
144 | { | ||
145 | uint32_t rex = 0x40 + ((rr>>1)&(4+(FORCE_REX>>1)))+((rx>>2)&2)+((rb>>3)&1); | ||
146 | if (rex != 0x40) { | ||
147 | if (n == -4) { *p = (MCode)rex; rex = (MCode)(xo >> 8); } | ||
148 | *--p = (MCode)rex; | ||
149 | } | ||
150 | } | ||
151 | #else | ||
152 | UNUSED(rr); UNUSED(rb); UNUSED(rx); | ||
153 | #endif | ||
154 | return p; | ||
155 | } | ||
156 | |||
157 | /* op + modrm */ | ||
158 | #define emit_opm(xo, mode, rr, rb, p, delta) \ | ||
159 | (p[(delta)-1] = MODRM((mode), (rr), (rb)), \ | ||
160 | emit_op((xo), (rr), (rb), 0, (p), (delta))) | ||
161 | |||
162 | /* op + modrm + sib */ | ||
163 | #define emit_opmx(xo, mode, scale, rr, rb, rx, p) \ | ||
164 | (p[-1] = MODRM((scale), (rx), (rb)), \ | ||
165 | p[-2] = MODRM((mode), (rr), RID_ESP), \ | ||
166 | emit_op((xo), (rr), (rb), (rx), (p), -1)) | ||
167 | |||
168 | /* op r1, r2 */ | ||
169 | static void emit_rr(ASMState *as, x86Op xo, Reg r1, Reg r2) | ||
170 | { | ||
171 | MCode *p = as->mcp; | ||
172 | as->mcp = emit_opm(xo, XM_REG, r1, r2, p, 0); | ||
173 | } | ||
174 | |||
175 | #if LJ_64 && defined(LUA_USE_ASSERT) | ||
176 | /* [addr] is sign-extended in x64 and must be in lower 2G (not 4G). */ | ||
177 | static int32_t ptr2addr(void *p) | ||
178 | { | ||
179 | lua_assert((uintptr_t)p < (uintptr_t)0x80000000); | ||
180 | return i32ptr(p); | ||
181 | } | ||
182 | #else | ||
183 | #define ptr2addr(p) (i32ptr((p))) | ||
184 | #endif | ||
185 | |||
186 | /* op r, [addr] */ | ||
187 | static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr) | ||
188 | { | ||
189 | MCode *p = as->mcp; | ||
190 | *(int32_t *)(p-4) = ptr2addr(addr); | ||
191 | #if LJ_64 | ||
192 | p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP); | ||
193 | as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5); | ||
194 | #else | ||
195 | as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4); | ||
196 | #endif | ||
197 | } | ||
198 | |||
199 | /* op r, [base+ofs] */ | ||
200 | static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs) | ||
201 | { | ||
202 | MCode *p = as->mcp; | ||
203 | x86Mode mode; | ||
204 | if (ra_hasreg(rb)) { | ||
205 | if (ofs == 0 && (rb&7) != RID_EBP) { | ||
206 | mode = XM_OFS0; | ||
207 | } else if (checki8(ofs)) { | ||
208 | *--p = (MCode)ofs; | ||
209 | mode = XM_OFS8; | ||
210 | } else { | ||
211 | p -= 4; | ||
212 | *(int32_t *)p = ofs; | ||
213 | mode = XM_OFS32; | ||
214 | } | ||
215 | if ((rb&7) == RID_ESP) | ||
216 | *--p = MODRM(XM_SCALE1, RID_ESP, RID_ESP); | ||
217 | } else { | ||
218 | *(int32_t *)(p-4) = ofs; | ||
219 | #if LJ_64 | ||
220 | p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP); | ||
221 | p -= 5; | ||
222 | rb = RID_ESP; | ||
223 | #else | ||
224 | p -= 4; | ||
225 | rb = RID_EBP; | ||
226 | #endif | ||
227 | mode = XM_OFS0; | ||
228 | } | ||
229 | as->mcp = emit_opm(xo, mode, rr, rb, p, 0); | ||
230 | } | ||
231 | |||
232 | /* op r, [base+idx*scale+ofs] */ | ||
233 | static void emit_rmrxo(ASMState *as, x86Op xo, Reg rr, Reg rb, Reg rx, | ||
234 | x86Mode scale, int32_t ofs) | ||
235 | { | ||
236 | MCode *p = as->mcp; | ||
237 | x86Mode mode; | ||
238 | if (ofs == 0 && (rb&7) != RID_EBP) { | ||
239 | mode = XM_OFS0; | ||
240 | } else if (checki8(ofs)) { | ||
241 | mode = XM_OFS8; | ||
242 | *--p = (MCode)ofs; | ||
243 | } else { | ||
244 | mode = XM_OFS32; | ||
245 | p -= 4; | ||
246 | *(int32_t *)p = ofs; | ||
247 | } | ||
248 | as->mcp = emit_opmx(xo, mode, scale, rr, rb, rx, p); | ||
249 | } | ||
250 | |||
251 | /* op r, i */ | ||
252 | static void emit_gri(ASMState *as, x86Group xg, Reg rb, int32_t i) | ||
253 | { | ||
254 | MCode *p = as->mcp; | ||
255 | if (checki8(i)) { | ||
256 | p -= 3; | ||
257 | p[2] = (MCode)i; | ||
258 | p[0] = (MCode)(xg >> 16); | ||
259 | } else { | ||
260 | p -= 6; | ||
261 | *(int32_t *)(p+2) = i; | ||
262 | p[0] = (MCode)(xg >> 8); | ||
263 | } | ||
264 | p[1] = MODRM(XM_REG, xg, rb); | ||
265 | REXRB(p, 0, rb); | ||
266 | as->mcp = p; | ||
267 | } | ||
268 | |||
269 | /* op [base+ofs], i */ | ||
270 | static void emit_gmroi(ASMState *as, x86Group xg, Reg rb, int32_t ofs, | ||
271 | int32_t i) | ||
272 | { | ||
273 | x86Op xo; | ||
274 | if (checki8(i)) { | ||
275 | emit_i8(as, i); | ||
276 | xo = (x86Op)(((xg >> 16) << 24)+0xfe); | ||
277 | } else { | ||
278 | emit_i32(as, i); | ||
279 | xo = (x86Op)(((xg >> 8) << 24)+0xfe); | ||
280 | } | ||
281 | emit_rmro(as, xo, (Reg)xg, rb, ofs); | ||
282 | } | ||
283 | |||
284 | #define emit_shifti(as, xg, r, i) \ | ||
285 | (emit_i8(as, (i)), emit_rr(as, XO_SHIFTi, (Reg)(xg), (r))) | ||
286 | |||
287 | /* op r, rm/mrm */ | ||
288 | static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb) | ||
289 | { | ||
290 | MCode *p = as->mcp; | ||
291 | x86Mode mode = XM_REG; | ||
292 | if (rb == RID_MRM) { | ||
293 | rb = as->mrm.base; | ||
294 | if (rb == RID_NONE) { | ||
295 | rb = RID_EBP; | ||
296 | mode = XM_OFS0; | ||
297 | p -= 4; | ||
298 | *(int32_t *)p = as->mrm.ofs; | ||
299 | if (as->mrm.idx != RID_NONE) | ||
300 | goto mrmidx; | ||
301 | #if LJ_64 | ||
302 | *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP); | ||
303 | rb = RID_ESP; | ||
304 | #endif | ||
305 | } else { | ||
306 | if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) { | ||
307 | mode = XM_OFS0; | ||
308 | } else if (checki8(as->mrm.ofs)) { | ||
309 | *--p = (MCode)as->mrm.ofs; | ||
310 | mode = XM_OFS8; | ||
311 | } else { | ||
312 | p -= 4; | ||
313 | *(int32_t *)p = as->mrm.ofs; | ||
314 | mode = XM_OFS32; | ||
315 | } | ||
316 | if (as->mrm.idx != RID_NONE) { | ||
317 | mrmidx: | ||
318 | as->mcp = emit_opmx(xo, mode, as->mrm.scale, rr, rb, as->mrm.idx, p); | ||
319 | return; | ||
320 | } | ||
321 | if ((rb&7) == RID_ESP) | ||
322 | *--p = MODRM(XM_SCALE1, RID_ESP, RID_ESP); | ||
323 | } | ||
324 | } | ||
325 | as->mcp = emit_opm(xo, mode, rr, rb, p, 0); | ||
326 | } | ||
327 | |||
328 | static void emit_addptr(ASMState *as, Reg r, int32_t ofs) | ||
329 | { | ||
330 | if (ofs) { | ||
331 | if ((as->flags & JIT_F_LEA_AGU)) | ||
332 | emit_rmro(as, XO_LEA, r, r, ofs); | ||
333 | else | ||
334 | emit_gri(as, XG_ARITHi(XOg_ADD), r, ofs); | ||
335 | } | ||
336 | } | ||
337 | |||
338 | /* -- Emit moves ---------------------------------------------------------- */ | ||
339 | |||
340 | /* Generic move between two regs. */ | ||
341 | static void emit_movrr(ASMState *as, Reg r1, Reg r2) | ||
342 | { | ||
343 | emit_rr(as, r1 < RID_MAX_GPR ? XO_MOV : XMM_MOVRR(as), r1, r2); | ||
344 | } | ||
345 | |||
346 | /* Generic move from [base+ofs]. */ | ||
347 | static void emit_movrmro(ASMState *as, Reg rr, Reg rb, int32_t ofs) | ||
348 | { | ||
349 | emit_rmro(as, rr < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), rr, rb, ofs); | ||
350 | } | ||
351 | |||
352 | /* mov [base+ofs], i */ | ||
353 | static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) | ||
354 | { | ||
355 | emit_i32(as, i); | ||
356 | emit_rmro(as, XO_MOVmi, 0, base, ofs); | ||
357 | } | ||
358 | |||
359 | /* mov [base+ofs], r */ | ||
360 | #define emit_movtomro(as, r, base, ofs) \ | ||
361 | emit_rmro(as, XO_MOVto, (r), (base), (ofs)) | ||
362 | |||
363 | /* Get/set global_State fields. */ | ||
364 | #define emit_opgl(as, xo, r, field) \ | ||
365 | emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field) | ||
366 | #define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r), field) | ||
367 | #define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r), field) | ||
368 | #define emit_setgli(as, field, i) \ | ||
369 | (emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, field)) | ||
370 | |||
371 | /* mov r, i / xor r, r */ | ||
372 | static void emit_loadi(ASMState *as, Reg r, int32_t i) | ||
373 | { | ||
374 | if (i == 0) { | ||
375 | emit_rr(as, XO_ARITH(XOg_XOR), r, r); | ||
376 | } else { | ||
377 | MCode *p = as->mcp; | ||
378 | *(int32_t *)(p-4) = i; | ||
379 | p[-5] = (MCode)(XI_MOVri+(r&7)); | ||
380 | p -= 5; | ||
381 | REXRB(p, 0, r); | ||
382 | as->mcp = p; | ||
383 | } | ||
384 | } | ||
385 | |||
386 | /* mov r, addr */ | ||
387 | #define emit_loada(as, r, addr) \ | ||
388 | emit_loadi(as, (r), ptr2addr((addr))) | ||
389 | |||
390 | /* movsd r, [&tv->n] / xorps r, r */ | ||
391 | static void emit_loadn(ASMState *as, Reg r, cTValue *tv) | ||
392 | { | ||
393 | if (tvispzero(tv)) /* Use xor only for +0. */ | ||
394 | emit_rr(as, XO_XORPS, r, r); | ||
395 | else | ||
396 | emit_rma(as, XMM_MOVRM(as), r, &tv->n); | ||
397 | } | ||
398 | |||
399 | /* -- Emit branches ------------------------------------------------------- */ | ||
400 | |||
401 | /* Label for short jumps. */ | ||
402 | typedef MCode *MCLabel; | ||
403 | |||
404 | /* jcc short target */ | ||
405 | static void emit_sjcc(ASMState *as, int cc, MCLabel target) | ||
406 | { | ||
407 | MCode *p = as->mcp; | ||
408 | p[-1] = (MCode)(int8_t)(target-p); | ||
409 | p[-2] = (MCode)(XI_JCCs+(cc&15)); | ||
410 | as->mcp = p - 2; | ||
411 | } | ||
412 | |||
413 | /* jcc short (pending target) */ | ||
414 | static MCLabel emit_sjcc_label(ASMState *as, int cc) | ||
415 | { | ||
416 | MCode *p = as->mcp; | ||
417 | p[-1] = 0; | ||
418 | p[-2] = (MCode)(XI_JCCs+(cc&15)); | ||
419 | as->mcp = p - 2; | ||
420 | return p; | ||
421 | } | ||
422 | |||
423 | /* Fixup jcc short target. */ | ||
424 | static void emit_sfixup(ASMState *as, MCLabel source) | ||
425 | { | ||
426 | source[-1] = (MCode)(as->mcp-source); | ||
427 | } | ||
428 | |||
429 | /* Return label pointing to current PC. */ | ||
430 | #define emit_label(as) ((as)->mcp) | ||
431 | |||
432 | /* jcc target */ | ||
433 | static void emit_jcc(ASMState *as, int cc, MCode *target) | ||
434 | { | ||
435 | MCode *p = as->mcp; | ||
436 | int32_t addr = (int32_t)(target - p); | ||
437 | *(int32_t *)(p-4) = addr; | ||
438 | p[-5] = (MCode)(XI_JCCn+(cc&15)); | ||
439 | p[-6] = 0x0f; | ||
440 | as->mcp = p - 6; | ||
441 | } | ||
442 | |||
443 | /* call target */ | ||
444 | static void emit_call_(ASMState *as, MCode *target) | ||
445 | { | ||
446 | MCode *p = as->mcp; | ||
447 | *(int32_t *)(p-4) = (int32_t)(target - p); | ||
448 | p[-5] = XI_CALL; | ||
449 | as->mcp = p - 5; | ||
450 | } | ||
451 | |||
452 | #define emit_call(as, f) emit_call_(as, (MCode *)(void *)(f)) | ||
453 | |||
454 | /* Argument setup for C calls. Up to 3 args need no stack adjustment. */ | ||
455 | #define emit_setargr(as, narg, r) \ | ||
456 | emit_movtomro(as, (r), RID_ESP, ((narg)-1)*4); | ||
457 | #define emit_setargi(as, narg, imm) \ | ||
458 | emit_movmroi(as, RID_ESP, ((narg)-1)*4, (imm)) | ||
459 | #define emit_setargp(as, narg, ptr) \ | ||
460 | emit_setargi(as, (narg), ptr2addr((ptr))) | ||
461 | |||
462 | /* -- Register allocator debugging ---------------------------------------- */ | ||
463 | |||
464 | /* #define LUAJIT_DEBUG_RA */ | ||
465 | |||
466 | #ifdef LUAJIT_DEBUG_RA | ||
467 | |||
468 | #include <stdio.h> | ||
469 | #include <stdarg.h> | ||
470 | |||
471 | #define RIDNAME(name) #name, | ||
472 | static const char *const ra_regname[] = { | ||
473 | GPRDEF(RIDNAME) | ||
474 | FPRDEF(RIDNAME) | ||
475 | "mrm", | ||
476 | NULL | ||
477 | }; | ||
478 | #undef RIDNAME | ||
479 | |||
480 | static char ra_dbg_buf[65536]; | ||
481 | static char *ra_dbg_p; | ||
482 | static char *ra_dbg_merge; | ||
483 | static MCode *ra_dbg_mcp; | ||
484 | |||
485 | static void ra_dstart(void) | ||
486 | { | ||
487 | ra_dbg_p = ra_dbg_buf; | ||
488 | ra_dbg_merge = NULL; | ||
489 | ra_dbg_mcp = NULL; | ||
490 | } | ||
491 | |||
492 | static void ra_dflush(void) | ||
493 | { | ||
494 | fwrite(ra_dbg_buf, 1, (size_t)(ra_dbg_p-ra_dbg_buf), stdout); | ||
495 | ra_dstart(); | ||
496 | } | ||
497 | |||
498 | static void ra_dprintf(ASMState *as, const char *fmt, ...) | ||
499 | { | ||
500 | char *p; | ||
501 | va_list argp; | ||
502 | va_start(argp, fmt); | ||
503 | p = ra_dbg_mcp == as->mcp ? ra_dbg_merge : ra_dbg_p; | ||
504 | ra_dbg_mcp = NULL; | ||
505 | p += sprintf(p, "%08x \e[36m%04d ", (uintptr_t)as->mcp, as->curins-REF_BIAS); | ||
506 | for (;;) { | ||
507 | const char *e = strchr(fmt, '$'); | ||
508 | if (e == NULL) break; | ||
509 | memcpy(p, fmt, (size_t)(e-fmt)); | ||
510 | p += e-fmt; | ||
511 | if (e[1] == 'r') { | ||
512 | Reg r = va_arg(argp, Reg) & RID_MASK; | ||
513 | if (r <= RID_MAX) { | ||
514 | const char *q; | ||
515 | for (q = ra_regname[r]; *q; q++) | ||
516 | *p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q; | ||
517 | } else { | ||
518 | *p++ = '?'; | ||
519 | lua_assert(0); | ||
520 | } | ||
521 | } else if (e[1] == 'f' || e[1] == 'i') { | ||
522 | IRRef ref; | ||
523 | if (e[1] == 'f') | ||
524 | ref = va_arg(argp, IRRef); | ||
525 | else | ||
526 | ref = va_arg(argp, IRIns *) - as->ir; | ||
527 | if (ref >= REF_BIAS) | ||
528 | p += sprintf(p, "%04d", ref - REF_BIAS); | ||
529 | else | ||
530 | p += sprintf(p, "K%03d", REF_BIAS - ref); | ||
531 | } else if (e[1] == 's') { | ||
532 | uint32_t slot = va_arg(argp, uint32_t); | ||
533 | p += sprintf(p, "[esp+0x%x]", sps_scale(slot)); | ||
534 | } else { | ||
535 | lua_assert(0); | ||
536 | } | ||
537 | fmt = e+2; | ||
538 | } | ||
539 | va_end(argp); | ||
540 | while (*fmt) | ||
541 | *p++ = *fmt++; | ||
542 | *p++ = '\e'; *p++ = '['; *p++ = 'm'; *p++ = '\n'; | ||
543 | if (p > ra_dbg_buf+sizeof(ra_dbg_buf)-256) { | ||
544 | fwrite(ra_dbg_buf, 1, (size_t)(p-ra_dbg_buf), stdout); | ||
545 | p = ra_dbg_buf; | ||
546 | } | ||
547 | ra_dbg_p = p; | ||
548 | } | ||
549 | |||
550 | #define RA_DBG_START() ra_dstart() | ||
551 | #define RA_DBG_FLUSH() ra_dflush() | ||
552 | #define RA_DBG_REF() \ | ||
553 | do { char *_p = ra_dbg_p; ra_dprintf(as, ""); \ | ||
554 | ra_dbg_merge = _p; ra_dbg_mcp = as->mcp; } while (0) | ||
555 | #define RA_DBGX(x) ra_dprintf x | ||
556 | |||
557 | #else | ||
558 | #define RA_DBG_START() ((void)0) | ||
559 | #define RA_DBG_FLUSH() ((void)0) | ||
560 | #define RA_DBG_REF() ((void)0) | ||
561 | #define RA_DBGX(x) ((void)0) | ||
562 | #endif | ||
563 | |||
564 | /* -- Register allocator -------------------------------------------------- */ | ||
565 | |||
566 | #define ra_free(as, r) rset_set(as->freeset, (r)) | ||
567 | #define ra_modified(as, r) rset_set(as->modset, (r)) | ||
568 | |||
569 | #define ra_used(ir) (ra_hasreg((ir)->r) || ra_hasspill((ir)->s)) | ||
570 | |||
571 | /* Setup register allocator. */ | ||
572 | static void ra_setup(ASMState *as) | ||
573 | { | ||
574 | /* Initially all regs (except the stack pointer) are free for use. */ | ||
575 | as->freeset = RSET_ALL; | ||
576 | as->modset = RSET_EMPTY; | ||
577 | as->phiset = RSET_EMPTY; | ||
578 | memset(as->phireg, 0, sizeof(as->phireg)); | ||
579 | memset(as->cost, 0, sizeof(as->cost)); | ||
580 | as->cost[RID_ESP] = REGCOST(~0u, 0u); | ||
581 | |||
582 | /* Start slots for spill slot allocation. */ | ||
583 | as->evenspill = (SPS_FIRST+1)&~1; | ||
584 | as->oddspill = (SPS_FIRST&1) ? SPS_FIRST : 0; | ||
585 | } | ||
586 | |||
587 | /* Rematerialize constants. */ | ||
588 | static Reg ra_rematk(ASMState *as, IRIns *ir) | ||
589 | { | ||
590 | Reg r = ir->r; | ||
591 | lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); | ||
592 | ra_free(as, r); | ||
593 | ra_modified(as, r); | ||
594 | ir->r = RID_INIT; /* Do not keep any hint. */ | ||
595 | RA_DBGX((as, "remat $i $r", ir, r)); | ||
596 | if (ir->o == IR_KNUM) { | ||
597 | emit_loadn(as, r, ir_knum(ir)); | ||
598 | } else if (ir->o == IR_BASE) { | ||
599 | ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ | ||
600 | emit_getgl(as, r, jit_base); | ||
601 | } else { | ||
602 | lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || | ||
603 | ir->o == IR_KPTR || ir->o == IR_KNULL); | ||
604 | emit_loadi(as, r, ir->i); | ||
605 | } | ||
606 | return r; | ||
607 | } | ||
608 | |||
609 | /* Force a spill. Allocate a new spill slot if needed. */ | ||
610 | static int32_t ra_spill(ASMState *as, IRIns *ir) | ||
611 | { | ||
612 | int32_t slot = ir->s; | ||
613 | if (!ra_hasspill(slot)) { | ||
614 | if (irt_isnum(ir->t)) { | ||
615 | slot = as->evenspill; | ||
616 | as->evenspill += 2; | ||
617 | } else if (as->oddspill) { | ||
618 | slot = as->oddspill; | ||
619 | as->oddspill = 0; | ||
620 | } else { | ||
621 | slot = as->evenspill; | ||
622 | as->oddspill = slot+1; | ||
623 | as->evenspill += 2; | ||
624 | } | ||
625 | if (as->evenspill > 256) | ||
626 | lj_trace_err(as->J, LJ_TRERR_SPILLOV); | ||
627 | ir->s = (uint8_t)slot; | ||
628 | } | ||
629 | return sps_scale(slot); | ||
630 | } | ||
631 | |||
632 | /* Restore a register (marked as free). Rematerialize or force a spill. */ | ||
633 | static Reg ra_restore(ASMState *as, IRRef ref) | ||
634 | { | ||
635 | IRIns *ir = IR(ref); | ||
636 | if (irref_isk(ref) || ref == REF_BASE) { | ||
637 | return ra_rematk(as, ir); | ||
638 | } else { | ||
639 | Reg r = ir->r; | ||
640 | lua_assert(ra_hasreg(r)); | ||
641 | ra_free(as, r); | ||
642 | ra_modified(as, r); | ||
643 | ra_sethint(ir->r, r); /* Keep hint. */ | ||
644 | RA_DBGX((as, "restore $i $r", ir, r)); | ||
645 | emit_movrmro(as, r, RID_ESP, ra_spill(as, ir)); /* Force a spill. */ | ||
646 | return r; | ||
647 | } | ||
648 | } | ||
649 | |||
650 | /* Save a register to a spill slot. */ | ||
651 | static LJ_AINLINE void ra_save(ASMState *as, IRIns *ir, Reg r) | ||
652 | { | ||
653 | RA_DBGX((as, "save $i $r", ir, r)); | ||
654 | emit_rmro(as, r < RID_MAX_GPR ? XO_MOVto : XO_MOVSDto, | ||
655 | r, RID_ESP, sps_scale(ir->s)); | ||
656 | } | ||
657 | |||
658 | #define MINCOST(r) \ | ||
659 | if (LJ_LIKELY(allow&RID2RSET(r)) && as->cost[r] < cost) \ | ||
660 | cost = as->cost[r] | ||
661 | |||
662 | /* Evict the register with the lowest cost, forcing a restore. */ | ||
663 | static Reg ra_evict(ASMState *as, RegSet allow) | ||
664 | { | ||
665 | RegCost cost = ~(RegCost)0; | ||
666 | if (allow < RID2RSET(RID_MAX_GPR)) { | ||
667 | MINCOST(RID_EAX);MINCOST(RID_ECX);MINCOST(RID_EDX);MINCOST(RID_EBX); | ||
668 | MINCOST(RID_EBP);MINCOST(RID_ESI);MINCOST(RID_EDI); | ||
669 | #if LJ_64 | ||
670 | MINCOST(RID_R8D);MINCOST(RID_R9D);MINCOST(RID_R10D);MINCOST(RID_R11D); | ||
671 | MINCOST(RID_R12D);MINCOST(RID_R13D);MINCOST(RID_R14D);MINCOST(RID_R15D); | ||
672 | #endif | ||
673 | } else { | ||
674 | MINCOST(RID_XMM0);MINCOST(RID_XMM1);MINCOST(RID_XMM2);MINCOST(RID_XMM3); | ||
675 | MINCOST(RID_XMM4);MINCOST(RID_XMM5);MINCOST(RID_XMM6);MINCOST(RID_XMM7); | ||
676 | #if LJ_64 | ||
677 | MINCOST(RID_XMM8);MINCOST(RID_XMM9);MINCOST(RID_XMM10);MINCOST(RID_XMM11); | ||
678 | MINCOST(RID_XMM12);MINCOST(RID_XMM13);MINCOST(RID_XMM14);MINCOST(RID_XMM15); | ||
679 | #endif | ||
680 | } | ||
681 | lua_assert(allow != RSET_EMPTY); | ||
682 | lua_assert(regcost_ref(cost) >= as->T->nk && regcost_ref(cost) < as->T->nins); | ||
683 | return ra_restore(as, regcost_ref(cost)); | ||
684 | } | ||
685 | |||
686 | /* Pick any register (marked as free). Evict on-demand. */ | ||
687 | static LJ_AINLINE Reg ra_pick(ASMState *as, RegSet allow) | ||
688 | { | ||
689 | RegSet pick = as->freeset & allow; | ||
690 | if (!pick) | ||
691 | return ra_evict(as, allow); | ||
692 | else | ||
693 | return rset_picktop(pick); | ||
694 | } | ||
695 | |||
696 | /* Get a scratch register (marked as free). */ | ||
697 | static LJ_AINLINE Reg ra_scratch(ASMState *as, RegSet allow) | ||
698 | { | ||
699 | Reg r = ra_pick(as, allow); | ||
700 | ra_modified(as, r); | ||
701 | RA_DBGX((as, "scratch $r", r)); | ||
702 | return r; | ||
703 | } | ||
704 | |||
705 | /* Evict all registers from a set (if not free). */ | ||
706 | static void ra_evictset(ASMState *as, RegSet drop) | ||
707 | { | ||
708 | as->modset |= drop; | ||
709 | drop &= ~as->freeset; | ||
710 | while (drop) { | ||
711 | Reg r = rset_picktop(drop); | ||
712 | ra_restore(as, regcost_ref(as->cost[r])); | ||
713 | rset_clear(drop, r); | ||
714 | checkmclim(as); | ||
715 | } | ||
716 | } | ||
717 | |||
718 | /* Allocate a register for ref from the allowed set of registers. | ||
719 | ** Note: this function assumes the ref does NOT have a register yet! | ||
720 | ** Picks an optimal register, sets the cost and marks the register as non-free. | ||
721 | */ | ||
722 | static Reg ra_allocref(ASMState *as, IRRef ref, RegSet allow) | ||
723 | { | ||
724 | IRIns *ir = IR(ref); | ||
725 | RegSet pick = as->freeset & allow; | ||
726 | Reg r; | ||
727 | lua_assert(ra_noreg(ir->r)); | ||
728 | if (pick) { | ||
729 | /* First check register hint from propagation or PHI. */ | ||
730 | if (ra_hashint(ir->r)) { | ||
731 | r = ra_gethint(ir->r); | ||
732 | if (rset_test(pick, r)) /* Use hint register if possible. */ | ||
733 | goto found; | ||
734 | /* Rematerialization is cheaper than missing a hint. */ | ||
735 | if (rset_test(allow, r) && irref_isk(regcost_ref(as->cost[r]))) { | ||
736 | ra_rematk(as, IR(regcost_ref(as->cost[r]))); | ||
737 | goto found; | ||
738 | } | ||
739 | RA_DBGX((as, "hintmiss $f $r", ref, r)); | ||
740 | } | ||
741 | /* Invariants should preferably get unused registers. */ | ||
742 | if (ref < as->loopref && !irt_isphi(ir->t)) | ||
743 | r = rset_pickbot(pick); | ||
744 | else | ||
745 | r = rset_picktop(pick); | ||
746 | } else { | ||
747 | r = ra_evict(as, allow); | ||
748 | } | ||
749 | found: | ||
750 | RA_DBGX((as, "alloc $f $r", ref, r)); | ||
751 | ir->r = (uint8_t)r; | ||
752 | rset_clear(as->freeset, r); | ||
753 | as->cost[r] = REGCOST_REF_T(ref, irt_t(ir->t)); | ||
754 | return r; | ||
755 | } | ||
756 | |||
757 | /* Allocate a register on-demand. */ | ||
758 | static LJ_INLINE Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow) | ||
759 | { | ||
760 | Reg r = IR(ref)->r; | ||
761 | /* Note: allow is ignored if the register is already allocated. */ | ||
762 | if (ra_noreg(r)) r = ra_allocref(as, ref, allow); | ||
763 | return r; | ||
764 | } | ||
765 | |||
766 | /* Rename register allocation and emit move. */ | ||
767 | static void ra_rename(ASMState *as, Reg down, Reg up) | ||
768 | { | ||
769 | IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]); | ||
770 | IR(ref)->r = (uint8_t)up; | ||
771 | as->cost[down] = 0; | ||
772 | lua_assert((down < RID_MAX_GPR) == (up < RID_MAX_GPR)); | ||
773 | lua_assert(!rset_test(as->freeset, down) && rset_test(as->freeset, up)); | ||
774 | rset_set(as->freeset, down); /* 'down' is free ... */ | ||
775 | rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */ | ||
776 | RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); | ||
777 | emit_movrr(as, down, up); /* Backwards code generation needs inverse move. */ | ||
778 | if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ | ||
779 | lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno); | ||
780 | ren = tref_ref(lj_ir_emit(as->J)); | ||
781 | as->ir = as->T->ir; /* The IR may have been reallocated. */ | ||
782 | IR(ren)->r = (uint8_t)down; | ||
783 | IR(ren)->s = SPS_NONE; | ||
784 | } | ||
785 | } | ||
786 | |||
787 | /* Pick a destination register (marked as free). | ||
788 | ** Caveat: allow is ignored if there's already a destination register. | ||
789 | ** Use ra_destreg() to get a specific register. | ||
790 | */ | ||
791 | static Reg ra_dest(ASMState *as, IRIns *ir, RegSet allow) | ||
792 | { | ||
793 | Reg dest = ir->r; | ||
794 | if (ra_hasreg(dest)) { | ||
795 | ra_free(as, dest); | ||
796 | ra_modified(as, dest); | ||
797 | } else { | ||
798 | dest = ra_scratch(as, allow); | ||
799 | } | ||
800 | if (LJ_UNLIKELY(ra_hasspill(ir->s))) ra_save(as, ir, dest); | ||
801 | return dest; | ||
802 | } | ||
803 | |||
804 | /* Force a specific destination register (marked as free). */ | ||
805 | static void ra_destreg(ASMState *as, IRIns *ir, Reg r) | ||
806 | { | ||
807 | Reg dest = ra_dest(as, ir, RID2RSET(r)); | ||
808 | if (dest != r) { | ||
809 | ra_scratch(as, RID2RSET(r)); | ||
810 | emit_movrr(as, dest, r); | ||
811 | } | ||
812 | } | ||
813 | |||
814 | /* Propagate dest register to left reference. Emit moves as needed. | ||
815 | ** This is a required fixup step for all 2-operand machine instructions. | ||
816 | */ | ||
817 | static void ra_left(ASMState *as, Reg dest, IRRef lref) | ||
818 | { | ||
819 | IRIns *ir = IR(lref); | ||
820 | Reg left = ir->r; | ||
821 | if (ra_noreg(left)) { | ||
822 | if (irref_isk(lref)) { | ||
823 | if (ir->o == IR_KNUM) { | ||
824 | cTValue *tv = ir_knum(ir); | ||
825 | /* FP remat needs a load except for +0. Still better than eviction. */ | ||
826 | if (tvispzero(tv) || !(as->freeset & RSET_FPR)) { | ||
827 | emit_loadn(as, dest, tv); | ||
828 | return; | ||
829 | } | ||
830 | } else { | ||
831 | lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || | ||
832 | ir->o == IR_KPTR || ir->o == IR_KNULL); | ||
833 | emit_loadi(as, dest, ir->i); | ||
834 | return; | ||
835 | } | ||
836 | } | ||
837 | if (!ra_hashint(left) && !iscrossref(as, lref)) | ||
838 | ra_sethint(ir->r, dest); /* Propagate register hint. */ | ||
839 | left = ra_allocref(as, lref, dest < RID_MAX_GPR ? RSET_GPR : RSET_FPR); | ||
840 | } | ||
841 | /* Move needed for true 3-operand instruction: y=a+b ==> y=a; y+=b. */ | ||
842 | if (dest != left) { | ||
843 | /* Use register renaming if dest is the PHI reg. */ | ||
844 | if (irt_isphi(ir->t) && as->phireg[dest] == lref) { | ||
845 | ra_modified(as, left); | ||
846 | ra_rename(as, left, dest); | ||
847 | } else { | ||
848 | emit_movrr(as, dest, left); | ||
849 | } | ||
850 | } | ||
851 | } | ||
852 | |||
853 | /* -- Exit stubs ---------------------------------------------------------- */ | ||
854 | |||
855 | /* Generate an exit stub group at the bottom of the reserved MCode memory. */ | ||
856 | static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) | ||
857 | { | ||
858 | ExitNo i, groupofs = (group*EXITSTUBS_PER_GROUP) & 0xff; | ||
859 | MCode *mxp = as->mcbot; | ||
860 | MCode *mxpstart = mxp; | ||
861 | if (mxp + (2+2)*EXITSTUBS_PER_GROUP+8+5 >= as->mctop) | ||
862 | asm_mclimit(as); | ||
863 | /* Push low byte of exitno for each exit stub. */ | ||
864 | *mxp++ = XI_PUSHi8; *mxp++ = (MCode)groupofs; | ||
865 | for (i = 1; i < EXITSTUBS_PER_GROUP; i++) { | ||
866 | *mxp++ = XI_JMPs; *mxp++ = (MCode)((2+2)*(EXITSTUBS_PER_GROUP - i) - 2); | ||
867 | *mxp++ = XI_PUSHi8; *mxp++ = (MCode)(groupofs + i); | ||
868 | } | ||
869 | /* Push the high byte of the exitno for each exit stub group. */ | ||
870 | *mxp++ = XI_PUSHi8; *mxp++ = (MCode)((group*EXITSTUBS_PER_GROUP)>>8); | ||
871 | /* Store DISPATCH in ExitInfo->dispatch. Account for the two push ops. */ | ||
872 | *mxp++ = XI_MOVmi; | ||
873 | *mxp++ = MODRM(XM_OFS8, 0, RID_ESP); | ||
874 | *mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP); | ||
875 | *mxp++ = 2*sizeof(void *); | ||
876 | *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4; | ||
877 | /* Jump to exit handler which fills in the ExitState. */ | ||
878 | *mxp++ = XI_JMP; mxp += 4; | ||
879 | *((int32_t *)(mxp-4)) = (int32_t)((MCode *)lj_vm_exit_handler - mxp); | ||
880 | /* Commit the code for this group (even if assembly fails later on). */ | ||
881 | lj_mcode_commitbot(as->J, mxp); | ||
882 | as->mcbot = mxp; | ||
883 | as->mclim = as->mcbot + MCLIM_REDZONE; | ||
884 | return mxpstart; | ||
885 | } | ||
886 | |||
887 | /* Setup all needed exit stubs. */ | ||
888 | static void asm_exitstub_setup(ASMState *as, ExitNo nexits) | ||
889 | { | ||
890 | ExitNo i; | ||
891 | if (nexits >= EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR) | ||
892 | lj_trace_err(as->J, LJ_TRERR_SNAPOV); | ||
893 | for (i = 0; i < (nexits+EXITSTUBS_PER_GROUP-1)/EXITSTUBS_PER_GROUP; i++) | ||
894 | if (as->J->exitstubgroup[i] == NULL) | ||
895 | as->J->exitstubgroup[i] = asm_exitstub_gen(as, i); | ||
896 | } | ||
897 | |||
898 | /* -- Snapshot and guard handling ----------------------------------------- */ | ||
899 | |||
900 | /* Can we rematerialize a KNUM instead of forcing a spill? */ | ||
901 | static int asm_snap_canremat(ASMState *as) | ||
902 | { | ||
903 | Reg r; | ||
904 | for (r = RID_MIN_FPR; r < RID_MAX_FPR; r++) | ||
905 | if (irref_isk(regcost_ref(as->cost[r]))) | ||
906 | return 1; | ||
907 | return 0; | ||
908 | } | ||
909 | |||
910 | /* Allocate registers or spill slots for refs escaping to a snapshot. */ | ||
911 | static void asm_snap_alloc(ASMState *as) | ||
912 | { | ||
913 | SnapShot *snap = &as->T->snap[as->snapno]; | ||
914 | IRRef2 *map = &as->T->snapmap[snap->mapofs]; | ||
915 | BCReg s, nslots = snap->nslots; | ||
916 | for (s = 0; s < nslots; s++) { | ||
917 | IRRef ref = snap_ref(map[s]); | ||
918 | if (!irref_isk(ref)) { | ||
919 | IRIns *ir = IR(ref); | ||
920 | if (!ra_used(ir) && ir->o != IR_FRAME) { | ||
921 | RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; | ||
922 | /* Not a var-to-invar ref and got a free register (or a remat)? */ | ||
923 | if ((!iscrossref(as, ref) || irt_isphi(ir->t)) && | ||
924 | ((as->freeset & allow) || | ||
925 | (allow == RSET_FPR && asm_snap_canremat(as)))) { | ||
926 | ra_allocref(as, ref, allow); /* Allocate a register. */ | ||
927 | checkmclim(as); | ||
928 | RA_DBGX((as, "snapreg $f $r", ref, ir->r)); | ||
929 | } else { | ||
930 | ra_spill(as, ir); /* Otherwise force a spill slot. */ | ||
931 | RA_DBGX((as, "snapspill $f $s", ref, ir->s)); | ||
932 | } | ||
933 | } | ||
934 | } | ||
935 | } | ||
936 | } | ||
937 | |||
938 | /* All guards for a snapshot use the same exitno. This is currently the | ||
939 | ** same as the snapshot number. Since the exact origin of the exit cannot | ||
940 | ** be determined, all guards for the same snapshot must exit with the same | ||
941 | ** RegSP mapping. | ||
942 | ** A renamed ref which has been used in a prior guard for the same snapshot | ||
943 | ** would cause an inconsistency. The easy way out is to force a spill slot. | ||
944 | */ | ||
945 | static int asm_snap_checkrename(ASMState *as, IRRef ren) | ||
946 | { | ||
947 | SnapShot *snap = &as->T->snap[as->snapno]; | ||
948 | IRRef2 *map = &as->T->snapmap[snap->mapofs]; | ||
949 | BCReg s, nslots = snap->nslots; | ||
950 | for (s = 0; s < nslots; s++) { | ||
951 | IRRef ref = snap_ref(map[s]); | ||
952 | if (ref == ren) { | ||
953 | IRIns *ir = IR(ref); | ||
954 | ra_spill(as, ir); /* Register renamed, so force a spill slot. */ | ||
955 | RA_DBGX((as, "snaprensp $f $s", ref, ir->s)); | ||
956 | return 1; /* Found. */ | ||
957 | } | ||
958 | } | ||
959 | return 0; /* Not found. */ | ||
960 | } | ||
961 | |||
962 | /* Prepare snapshot for next guard instruction. */ | ||
963 | static void asm_snap_prep(ASMState *as) | ||
964 | { | ||
965 | if (as->curins < as->snapref) { | ||
966 | do { | ||
967 | lua_assert(as->snapno != 0); | ||
968 | as->snapno--; | ||
969 | as->snapref = as->T->snap[as->snapno].ref; | ||
970 | } while (as->curins < as->snapref); | ||
971 | asm_snap_alloc(as); | ||
972 | as->snaprename = as->T->nins; | ||
973 | } else { | ||
974 | /* Process any renames above the highwater mark. */ | ||
975 | for (; as->snaprename < as->T->nins; as->snaprename++) { | ||
976 | IRIns *ir = IR(as->snaprename); | ||
977 | if (asm_snap_checkrename(as, ir->op1)) | ||
978 | ir->op2 = REF_BIAS-1; /* Kill rename. */ | ||
979 | } | ||
980 | } | ||
981 | } | ||
982 | |||
983 | /* Emit conditional branch to exit for guard. | ||
984 | ** It's important to emit this *after* all registers have been allocated, | ||
985 | ** because rematerializations may invalidate the flags. | ||
986 | */ | ||
987 | static void asm_guardcc(ASMState *as, int cc) | ||
988 | { | ||
989 | MCode *target = exitstub_addr(as->J, as->snapno); | ||
990 | MCode *p = as->mcp; | ||
991 | if (LJ_UNLIKELY(p == as->invmcp)) { | ||
992 | as->loopinv = 1; | ||
993 | *(int32_t *)(p+1) = target - (p+5); | ||
994 | target = p; | ||
995 | cc ^= 1; | ||
996 | if (as->realign) { | ||
997 | emit_sjcc(as, cc, target); | ||
998 | return; | ||
999 | } | ||
1000 | } | ||
1001 | emit_jcc(as, cc, target); | ||
1002 | } | ||
1003 | |||
1004 | /* -- Memory operand fusion ----------------------------------------------- */ | ||
1005 | |||
1006 | /* Arch-specific field offsets. */ | ||
1007 | static const uint8_t field_ofs[IRFL__MAX+1] = { | ||
1008 | #define FLOFS(name, type, field) (uint8_t)offsetof(type, field), | ||
1009 | IRFLDEF(FLOFS) | ||
1010 | #undef FLOFS | ||
1011 | 0 | ||
1012 | }; | ||
1013 | |||
1014 | /* Limit linear search to this distance. Avoids O(n^2) behavior. */ | ||
1015 | #define CONFLICT_SEARCH_LIM 15 | ||
1016 | |||
1017 | /* Check if there's no conflicting instruction between curins and ref. */ | ||
1018 | static int noconflict(ASMState *as, IRRef ref, IROp conflict) | ||
1019 | { | ||
1020 | IRIns *ir = as->ir; | ||
1021 | IRRef i = as->curins; | ||
1022 | if (i > ref + CONFLICT_SEARCH_LIM) | ||
1023 | return 0; /* Give up, ref is too far away. */ | ||
1024 | while (--i > ref) | ||
1025 | if (ir[i].o == conflict) | ||
1026 | return 0; /* Conflict found. */ | ||
1027 | return 1; /* Ok, no conflict. */ | ||
1028 | } | ||
1029 | |||
1030 | /* Fuse array reference into memory operand. */ | ||
1031 | static void asm_fusearef(ASMState *as, IRIns *ir, RegSet allow) | ||
1032 | { | ||
1033 | IRIns *irb = IR(ir->op1); | ||
1034 | IRIns *ira, *irx; | ||
1035 | lua_assert(ir->o == IR_AREF); | ||
1036 | lua_assert(irb->o == IR_FLOAD && irb->op2 == IRFL_TAB_ARRAY); | ||
1037 | ira = IR(irb->op1); | ||
1038 | if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE && | ||
1039 | noconflict(as, irb->op1, IR_NEWREF)) { | ||
1040 | /* We can avoid the FLOAD of t->array for colocated arrays. */ | ||
1041 | as->mrm.base = (uint8_t)ra_alloc1(as, irb->op1, allow); /* Table obj. */ | ||
1042 | as->mrm.ofs = -(int32_t)(ira->op1*sizeof(TValue)); /* Ofs to colo array. */ | ||
1043 | } else { | ||
1044 | as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow); /* Array base. */ | ||
1045 | as->mrm.ofs = 0; | ||
1046 | } | ||
1047 | irx = IR(ir->op2); | ||
1048 | if (irref_isk(ir->op2)) { | ||
1049 | as->mrm.ofs += 8*irx->i; | ||
1050 | as->mrm.idx = RID_NONE; | ||
1051 | } else { | ||
1052 | rset_clear(allow, as->mrm.base); | ||
1053 | as->mrm.scale = XM_SCALE8; | ||
1054 | /* Fuse a constant ADD (e.g. t[i+1]) into the offset. | ||
1055 | ** Doesn't help much without ABCelim, but reduces register pressure. | ||
1056 | */ | ||
1057 | if (mayfuse(as, ir->op2) && ra_noreg(irx->r) && | ||
1058 | irx->o == IR_ADD && irref_isk(irx->op2)) { | ||
1059 | as->mrm.ofs += 8*IR(irx->op2)->i; | ||
1060 | as->mrm.idx = (uint8_t)ra_alloc1(as, irx->op1, allow); | ||
1061 | } else { | ||
1062 | as->mrm.idx = (uint8_t)ra_alloc1(as, ir->op2, allow); | ||
1063 | } | ||
1064 | } | ||
1065 | } | ||
1066 | |||
1067 | /* Fuse array/hash/upvalue reference into memory operand. | ||
1068 | ** Caveat: this may allocate GPRs for the base/idx registers. Be sure to | ||
1069 | ** pass the final allow mask, excluding any GPRs used for other inputs. | ||
1070 | ** In particular: 2-operand GPR instructions need to call ra_dest() first! | ||
1071 | */ | ||
1072 | static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow) | ||
1073 | { | ||
1074 | IRIns *ir = IR(ref); | ||
1075 | if (ra_noreg(ir->r)) { | ||
1076 | switch ((IROp)ir->o) { | ||
1077 | case IR_AREF: | ||
1078 | if (mayfuse(as, ref)) { | ||
1079 | asm_fusearef(as, ir, allow); | ||
1080 | return; | ||
1081 | } | ||
1082 | break; | ||
1083 | case IR_HREFK: | ||
1084 | if (mayfuse(as, ref)) { | ||
1085 | as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow); | ||
1086 | as->mrm.ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); | ||
1087 | as->mrm.idx = RID_NONE; | ||
1088 | return; | ||
1089 | } | ||
1090 | break; | ||
1091 | case IR_UREFC: | ||
1092 | if (irref_isk(ir->op1)) { | ||
1093 | GCfunc *fn = ir_kfunc(IR(ir->op1)); | ||
1094 | GCupval *uv = &gcref(fn->l.uvptr[ir->op2])->uv; | ||
1095 | as->mrm.ofs = ptr2addr(&uv->tv); | ||
1096 | as->mrm.base = as->mrm.idx = RID_NONE; | ||
1097 | return; | ||
1098 | } | ||
1099 | break; | ||
1100 | default: | ||
1101 | lua_assert(ir->o == IR_HREF || ir->o == IR_NEWREF || ir->o == IR_UREFO); | ||
1102 | break; | ||
1103 | } | ||
1104 | } | ||
1105 | as->mrm.base = (uint8_t)ra_alloc1(as, ref, allow); | ||
1106 | as->mrm.ofs = 0; | ||
1107 | as->mrm.idx = RID_NONE; | ||
1108 | } | ||
1109 | |||
1110 | /* Fuse FLOAD/FREF reference into memory operand. */ | ||
1111 | static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow) | ||
1112 | { | ||
1113 | lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF); | ||
1114 | as->mrm.ofs = field_ofs[ir->op2]; | ||
1115 | as->mrm.idx = RID_NONE; | ||
1116 | if (irref_isk(ir->op1)) { | ||
1117 | as->mrm.ofs += IR(ir->op1)->i; | ||
1118 | as->mrm.base = RID_NONE; | ||
1119 | } else { | ||
1120 | as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow); | ||
1121 | } | ||
1122 | } | ||
1123 | |||
1124 | /* Fuse string reference into memory operand. */ | ||
1125 | static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow) | ||
1126 | { | ||
1127 | IRIns *irr; | ||
1128 | lua_assert(ir->o == IR_STRREF); | ||
1129 | as->mrm.idx = as->mrm.base = RID_NONE; | ||
1130 | as->mrm.scale = XM_SCALE1; | ||
1131 | as->mrm.ofs = sizeof(GCstr); | ||
1132 | if (irref_isk(ir->op1)) { | ||
1133 | as->mrm.ofs += IR(ir->op1)->i; | ||
1134 | } else { | ||
1135 | Reg r = ra_alloc1(as, ir->op1, allow); | ||
1136 | rset_clear(allow, r); | ||
1137 | as->mrm.base = (uint8_t)r; | ||
1138 | } | ||
1139 | irr = IR(ir->op2); | ||
1140 | if (irref_isk(ir->op2)) { | ||
1141 | as->mrm.ofs += irr->i; | ||
1142 | } else { | ||
1143 | Reg r; | ||
1144 | /* Fuse a constant add into the offset, e.g. string.sub(s, i+10). */ | ||
1145 | if (mayfuse(as, ir->op2) && irr->o == IR_ADD && irref_isk(irr->op2)) { | ||
1146 | as->mrm.ofs += IR(irr->op2)->i; | ||
1147 | r = ra_alloc1(as, irr->op1, allow); | ||
1148 | } else { | ||
1149 | r = ra_alloc1(as, ir->op2, allow); | ||
1150 | } | ||
1151 | if (as->mrm.base == RID_NONE) | ||
1152 | as->mrm.base = (uint8_t)r; | ||
1153 | else | ||
1154 | as->mrm.idx = (uint8_t)r; | ||
1155 | } | ||
1156 | } | ||
1157 | |||
1158 | /* Fuse load into memory operand. */ | ||
1159 | static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | ||
1160 | { | ||
1161 | IRIns *ir = IR(ref); | ||
1162 | if (ra_hasreg(ir->r)) { | ||
1163 | if (allow != RSET_EMPTY) return ir->r; /* Fast path. */ | ||
1164 | fusespill: | ||
1165 | /* Force a spill if only memory operands are allowed (asm_x87load). */ | ||
1166 | as->mrm.base = RID_ESP; | ||
1167 | as->mrm.ofs = ra_spill(as, ir); | ||
1168 | as->mrm.idx = RID_NONE; | ||
1169 | return RID_MRM; | ||
1170 | } | ||
1171 | if (ir->o == IR_KNUM) { | ||
1172 | lua_assert(allow != RSET_EMPTY); | ||
1173 | if (!(as->freeset & ~as->modset & RSET_FPR)) { | ||
1174 | as->mrm.ofs = ptr2addr(ir_knum(ir)); | ||
1175 | as->mrm.base = as->mrm.idx = RID_NONE; | ||
1176 | return RID_MRM; | ||
1177 | } | ||
1178 | } else if (mayfuse(as, ref)) { | ||
1179 | RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR; | ||
1180 | if (ir->o == IR_SLOAD) { | ||
1181 | if (!irt_isint(ir->t) && !(ir->op2 & IRSLOAD_PARENT)) { | ||
1182 | as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); | ||
1183 | as->mrm.ofs = 8*((int32_t)ir->op1-1); | ||
1184 | as->mrm.idx = RID_NONE; | ||
1185 | return RID_MRM; | ||
1186 | } | ||
1187 | } else if (ir->o == IR_FLOAD) { | ||
1188 | /* Generic fusion is only ok for IRT_INT operand (but see asm_comp). */ | ||
1189 | if (irt_isint(ir->t) && noconflict(as, ref, IR_FSTORE)) { | ||
1190 | asm_fusefref(as, ir, xallow); | ||
1191 | return RID_MRM; | ||
1192 | } | ||
1193 | } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) { | ||
1194 | if (noconflict(as, ref, ir->o + IRDELTA_L2S)) { | ||
1195 | asm_fuseahuref(as, ir->op1, xallow); | ||
1196 | return RID_MRM; | ||
1197 | } | ||
1198 | } else if (ir->o == IR_XLOAD) { | ||
1199 | /* Generic fusion is only ok for IRT_INT operand (but see asm_comp). | ||
1200 | ** Fusing unaligned memory operands is ok on x86 (except for SIMD types). | ||
1201 | */ | ||
1202 | if (irt_isint(ir->t)) { | ||
1203 | asm_fusestrref(as, IR(ir->op1), xallow); | ||
1204 | return RID_MRM; | ||
1205 | } | ||
1206 | } | ||
1207 | } | ||
1208 | if (!(as->freeset & allow) && | ||
1209 | (allow == RSET_EMPTY || ra_hasspill(ir->s) || ref < as->loopref)) | ||
1210 | goto fusespill; | ||
1211 | return ra_allocref(as, ref, allow); | ||
1212 | } | ||
1213 | |||
1214 | /* -- Type conversions ---------------------------------------------------- */ | ||
1215 | |||
1216 | static void asm_tonum(ASMState *as, IRIns *ir) | ||
1217 | { | ||
1218 | Reg dest = ra_dest(as, ir, RSET_FPR); | ||
1219 | Reg left = asm_fuseload(as, ir->op1, RSET_GPR); | ||
1220 | emit_mrm(as, XO_CVTSI2SD, dest, left); | ||
1221 | if (!(as->flags & JIT_F_SPLIT_XMM)) | ||
1222 | emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */ | ||
1223 | } | ||
1224 | |||
1225 | static void asm_tointg(ASMState *as, IRIns *ir, Reg left) | ||
1226 | { | ||
1227 | Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); | ||
1228 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1229 | asm_guardcc(as, CC_P); | ||
1230 | asm_guardcc(as, CC_NE); | ||
1231 | emit_rr(as, XO_UCOMISD, left, tmp); | ||
1232 | emit_rr(as, XO_CVTSI2SD, tmp, dest); | ||
1233 | if (!(as->flags & JIT_F_SPLIT_XMM)) | ||
1234 | emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */ | ||
1235 | emit_rr(as, XO_CVTTSD2SI, dest, left); | ||
1236 | /* Can't fuse since left is needed twice. */ | ||
1237 | } | ||
1238 | |||
1239 | static void asm_toint(ASMState *as, IRIns *ir) | ||
1240 | { | ||
1241 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1242 | Reg left = asm_fuseload(as, ir->op1, RSET_FPR); | ||
1243 | emit_mrm(as, XO_CVTSD2SI, dest, left); | ||
1244 | } | ||
1245 | |||
1246 | static void asm_tobit(ASMState *as, IRIns *ir) | ||
1247 | { | ||
1248 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1249 | Reg tmp = ra_noreg(IR(ir->op1)->r) ? | ||
1250 | ra_alloc1(as, ir->op1, RSET_FPR) : | ||
1251 | ra_scratch(as, RSET_FPR); | ||
1252 | Reg right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp)); | ||
1253 | emit_rr(as, XO_MOVDto, tmp, dest); | ||
1254 | emit_mrm(as, XO_ADDSD, tmp, right); | ||
1255 | ra_left(as, tmp, ir->op1); | ||
1256 | } | ||
1257 | |||
1258 | static void asm_strto(ASMState *as, IRIns *ir) | ||
1259 | { | ||
1260 | Reg str; | ||
1261 | int32_t ofs; | ||
1262 | RegSet drop = RSET_SCRATCH; | ||
1263 | /* Force a spill slot for the destination register (if any). */ | ||
1264 | if ((drop & RSET_FPR) != RSET_FPR && ra_hasreg(ir->r)) | ||
1265 | rset_set(drop, ir->r); /* WIN64 doesn't spill all FPRs. */ | ||
1266 | ra_evictset(as, drop); | ||
1267 | asm_guardcc(as, CC_E); | ||
1268 | emit_rr(as, XO_TEST, RID_RET, RID_RET); | ||
1269 | /* int lj_str_numconv(const char *s, TValue *n) */ | ||
1270 | emit_call(as, lj_str_numconv); | ||
1271 | ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */ | ||
1272 | if (ofs == 0) { | ||
1273 | emit_setargr(as, 2, RID_ESP); | ||
1274 | } else { | ||
1275 | emit_setargr(as, 2, RID_RET); | ||
1276 | emit_rmro(as, XO_LEA, RID_RET, RID_ESP, ofs); | ||
1277 | } | ||
1278 | emit_setargr(as, 1, RID_RET); | ||
1279 | str = ra_alloc1(as, ir->op1, RSET_GPR); | ||
1280 | emit_rmro(as, XO_LEA, RID_RET, str, sizeof(GCstr)); | ||
1281 | } | ||
1282 | |||
1283 | static void asm_tostr(ASMState *as, IRIns *ir) | ||
1284 | { | ||
1285 | IRIns *irl = IR(ir->op1); | ||
1286 | ra_destreg(as, ir, RID_RET); | ||
1287 | ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); | ||
1288 | as->gcsteps++; | ||
1289 | if (irt_isnum(irl->t)) { | ||
1290 | /* GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np) */ | ||
1291 | emit_call(as, lj_str_fromnum); | ||
1292 | emit_setargr(as, 1, RID_RET); | ||
1293 | emit_getgl(as, RID_RET, jit_L); | ||
1294 | emit_setargr(as, 2, RID_RET); | ||
1295 | emit_rmro(as, XO_LEA, RID_RET, RID_ESP, ra_spill(as, irl)); | ||
1296 | } else { | ||
1297 | /* GCstr *lj_str_fromint(lua_State *L, int32_t k) */ | ||
1298 | emit_call(as, lj_str_fromint); | ||
1299 | emit_setargr(as, 1, RID_RET); | ||
1300 | emit_getgl(as, RID_RET, jit_L); | ||
1301 | emit_setargr(as, 2, ra_alloc1(as, ir->op1, RSET_GPR)); | ||
1302 | } | ||
1303 | } | ||
1304 | |||
1305 | /* -- Memory references --------------------------------------------------- */ | ||
1306 | |||
1307 | static void asm_aref(ASMState *as, IRIns *ir) | ||
1308 | { | ||
1309 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1310 | asm_fusearef(as, ir, RSET_GPR); | ||
1311 | if (!(as->mrm.idx == RID_NONE && as->mrm.ofs == 0)) | ||
1312 | emit_mrm(as, XO_LEA, dest, RID_MRM); | ||
1313 | else if (as->mrm.base != dest) | ||
1314 | emit_rr(as, XO_MOV, dest, as->mrm.base); | ||
1315 | } | ||
1316 | |||
1317 | /* Must match with hashkey() and hashrot() in lj_tab.c. */ | ||
1318 | static uint32_t ir_khash(IRIns *ir) | ||
1319 | { | ||
1320 | uint32_t lo, hi; | ||
1321 | if (irt_isstr(ir->t)) { | ||
1322 | return ir_kstr(ir)->hash; | ||
1323 | } else if (irt_isnum(ir->t)) { | ||
1324 | lo = ir_knum(ir)->u32.lo; | ||
1325 | hi = ir_knum(ir)->u32.hi & 0x7fffffff; | ||
1326 | } else if (irt_ispri(ir->t)) { | ||
1327 | lua_assert(!irt_isnil(ir->t)); | ||
1328 | return irt_type(ir->t)-IRT_FALSE; | ||
1329 | } else { | ||
1330 | lua_assert(irt_isaddr(ir->t)); | ||
1331 | lo = u32ptr(ir_kgc(ir)); | ||
1332 | hi = lo - 0x04c11db7; | ||
1333 | } | ||
1334 | lo ^= hi; hi = lj_rol(hi, 14); | ||
1335 | lo -= hi; hi = lj_rol(hi, 5); | ||
1336 | hi ^= lo; hi -= lj_rol(lo, 27); | ||
1337 | return hi; | ||
1338 | } | ||
1339 | |||
1340 | /* Merge NE(HREF, niltv) check. */ | ||
1341 | static MCode *merge_href_niltv(ASMState *as, IRIns *ir) | ||
1342 | { | ||
1343 | /* Assumes nothing else generates NE of HREF. */ | ||
1344 | if (ir[1].o == IR_NE && ir[1].op1 == as->curins) { | ||
1345 | if (LJ_64 && *as->mcp != XI_ARITHi) | ||
1346 | as->mcp += 7+6; | ||
1347 | else | ||
1348 | as->mcp += 6+6; /* Kill cmp reg, imm32 + jz exit. */ | ||
1349 | return as->mcp + *(int32_t *)(as->mcp-4); /* Return exit address. */ | ||
1350 | } | ||
1351 | return NULL; | ||
1352 | } | ||
1353 | |||
1354 | /* Inlined hash lookup. Specialized for key type and for const keys. | ||
1355 | ** The equivalent C code is: | ||
1356 | ** Node *n = hashkey(t, key); | ||
1357 | ** do { | ||
1358 | ** if (lj_obj_equal(&n->key, key)) return &n->val; | ||
1359 | ** } while ((n = nextnode(n))); | ||
1360 | ** return niltv(L); | ||
1361 | */ | ||
1362 | static void asm_href(ASMState *as, IRIns *ir) | ||
1363 | { | ||
1364 | MCode *nilexit = merge_href_niltv(as, ir); /* Do this before any restores. */ | ||
1365 | RegSet allow = RSET_GPR; | ||
1366 | Reg dest = ra_dest(as, ir, allow); | ||
1367 | Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); | ||
1368 | Reg key = RID_NONE, tmp = RID_NONE; | ||
1369 | IRIns *irkey = IR(ir->op2); | ||
1370 | int isk = irref_isk(ir->op2); | ||
1371 | IRType1 kt = irkey->t; | ||
1372 | uint32_t khash; | ||
1373 | MCLabel l_end, l_loop, l_next; | ||
1374 | |||
1375 | if (!isk) { | ||
1376 | rset_clear(allow, tab); | ||
1377 | key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); | ||
1378 | if (!irt_isstr(kt)) | ||
1379 | tmp = ra_scratch(as, rset_exclude(allow, key)); | ||
1380 | } | ||
1381 | |||
1382 | /* Key not found in chain: jump to exit (if merged with NE) or load niltv. */ | ||
1383 | l_end = emit_label(as); | ||
1384 | if (nilexit) | ||
1385 | emit_jcc(as, CC_E, nilexit); /* XI_JMP is not found by lj_asm_patchexit. */ | ||
1386 | else | ||
1387 | emit_loada(as, dest, niltvg(J2G(as->J))); | ||
1388 | |||
1389 | /* Follow hash chain until the end. */ | ||
1390 | l_loop = emit_sjcc_label(as, CC_NZ); | ||
1391 | emit_rr(as, XO_TEST, dest, dest); | ||
1392 | emit_rmro(as, XO_MOV, dest, dest, offsetof(Node, next)); | ||
1393 | l_next = emit_label(as); | ||
1394 | |||
1395 | /* Type and value comparison. */ | ||
1396 | emit_sjcc(as, CC_E, l_end); | ||
1397 | if (irt_isnum(kt)) { | ||
1398 | if (isk) { | ||
1399 | /* Assumes -0.0 is already canonicalized to +0.0. */ | ||
1400 | emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.lo), | ||
1401 | (int32_t)ir_knum(irkey)->u32.lo); | ||
1402 | emit_sjcc(as, CC_NE, l_next); | ||
1403 | emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.hi), | ||
1404 | (int32_t)ir_knum(irkey)->u32.hi); | ||
1405 | } else { | ||
1406 | emit_sjcc(as, CC_P, l_next); | ||
1407 | emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n)); | ||
1408 | emit_sjcc(as, CC_A, l_next); | ||
1409 | /* The type check avoids NaN penalties and complaints from Valgrind. */ | ||
1410 | emit_i8(as, ~IRT_NUM); | ||
1411 | emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); | ||
1412 | } | ||
1413 | } else { | ||
1414 | if (!irt_ispri(kt)) { | ||
1415 | lua_assert(irt_isaddr(kt)); | ||
1416 | if (isk) | ||
1417 | emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.gcr), | ||
1418 | ptr2addr(ir_kgc(irkey))); | ||
1419 | else | ||
1420 | emit_rmro(as, XO_CMP, key, dest, offsetof(Node, key.gcr)); | ||
1421 | emit_sjcc(as, CC_NE, l_next); | ||
1422 | } | ||
1423 | lua_assert(!irt_isnil(kt)); | ||
1424 | emit_i8(as, ~irt_type(kt)); | ||
1425 | emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); | ||
1426 | } | ||
1427 | emit_sfixup(as, l_loop); | ||
1428 | checkmclim(as); | ||
1429 | |||
1430 | /* Load main position relative to tab->node into dest. */ | ||
1431 | khash = isk ? ir_khash(irkey) : 1; | ||
1432 | if (khash == 0) { | ||
1433 | emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, node)); | ||
1434 | } else { | ||
1435 | emit_rmro(as, XO_ARITH(XOg_ADD), dest, tab, offsetof(GCtab, node)); | ||
1436 | if ((as->flags & JIT_F_PREFER_IMUL)) { | ||
1437 | emit_i8(as, sizeof(Node)); | ||
1438 | emit_rr(as, XO_IMULi8, dest, dest); | ||
1439 | } else { | ||
1440 | emit_shifti(as, XOg_SHL, dest, 3); | ||
1441 | emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0); | ||
1442 | } | ||
1443 | if (isk) { | ||
1444 | emit_gri(as, XG_ARITHi(XOg_AND), dest, (int32_t)khash); | ||
1445 | emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask)); | ||
1446 | } else if (irt_isstr(kt)) { | ||
1447 | emit_rmro(as, XO_ARITH(XOg_AND), dest, key, offsetof(GCstr, hash)); | ||
1448 | emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask)); | ||
1449 | } else { /* Must match with hashrot() in lj_tab.c. */ | ||
1450 | emit_rmro(as, XO_ARITH(XOg_AND), dest, tab, offsetof(GCtab, hmask)); | ||
1451 | emit_rr(as, XO_ARITH(XOg_SUB), dest, tmp); | ||
1452 | emit_shifti(as, XOg_ROL, tmp, 27); | ||
1453 | emit_rr(as, XO_ARITH(XOg_XOR), dest, tmp); | ||
1454 | emit_shifti(as, XOg_ROL, dest, 5); | ||
1455 | emit_rr(as, XO_ARITH(XOg_SUB), tmp, dest); | ||
1456 | emit_shifti(as, XOg_ROL, dest, 14); | ||
1457 | emit_rr(as, XO_ARITH(XOg_XOR), tmp, dest); | ||
1458 | if (irt_isnum(kt)) { | ||
1459 | emit_rmro(as, XO_ARITH(XOg_AND), dest, RID_ESP, ra_spill(as, irkey)+4); | ||
1460 | emit_loadi(as, dest, 0x7fffffff); | ||
1461 | emit_rr(as, XO_MOVDto, key, tmp); | ||
1462 | } else { | ||
1463 | emit_rr(as, XO_MOV, tmp, key); | ||
1464 | emit_rmro(as, XO_LEA, dest, key, -0x04c11db7); | ||
1465 | } | ||
1466 | } | ||
1467 | } | ||
1468 | } | ||
1469 | |||
1470 | static void asm_hrefk(ASMState *as, IRIns *ir) | ||
1471 | { | ||
1472 | IRIns *kslot = IR(ir->op2); | ||
1473 | IRIns *irkey = IR(kslot->op1); | ||
1474 | int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); | ||
1475 | Reg dest = ra_used(ir) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; | ||
1476 | Reg node = ra_alloc1(as, ir->op1, RSET_GPR); | ||
1477 | MCLabel l_exit; | ||
1478 | lua_assert(ofs % sizeof(Node) == 0); | ||
1479 | if (ra_hasreg(dest)) { | ||
1480 | if (ofs != 0) { | ||
1481 | if (dest == node && !(as->flags & JIT_F_LEA_AGU)) | ||
1482 | emit_gri(as, XG_ARITHi(XOg_ADD), dest, ofs); | ||
1483 | else | ||
1484 | emit_rmro(as, XO_LEA, dest, node, ofs); | ||
1485 | } else if (dest != node) { | ||
1486 | emit_rr(as, XO_MOV, dest, node); | ||
1487 | } | ||
1488 | } | ||
1489 | asm_guardcc(as, CC_NE); | ||
1490 | l_exit = emit_label(as); | ||
1491 | if (irt_isnum(irkey->t)) { | ||
1492 | /* Assumes -0.0 is already canonicalized to +0.0. */ | ||
1493 | emit_gmroi(as, XG_ARITHi(XOg_CMP), node, | ||
1494 | ofs + (int32_t)offsetof(Node, key.u32.lo), | ||
1495 | (int32_t)ir_knum(irkey)->u32.lo); | ||
1496 | emit_sjcc(as, CC_NE, l_exit); | ||
1497 | emit_gmroi(as, XG_ARITHi(XOg_CMP), node, | ||
1498 | ofs + (int32_t)offsetof(Node, key.u32.hi), | ||
1499 | (int32_t)ir_knum(irkey)->u32.hi); | ||
1500 | } else { | ||
1501 | if (!irt_ispri(irkey->t)) { | ||
1502 | lua_assert(irt_isgcv(irkey->t)); | ||
1503 | emit_gmroi(as, XG_ARITHi(XOg_CMP), node, | ||
1504 | ofs + (int32_t)offsetof(Node, key.gcr), | ||
1505 | ptr2addr(ir_kgc(irkey))); | ||
1506 | emit_sjcc(as, CC_NE, l_exit); | ||
1507 | } | ||
1508 | lua_assert(!irt_isnil(irkey->t)); | ||
1509 | emit_i8(as, ~irt_type(irkey->t)); | ||
1510 | emit_rmro(as, XO_ARITHi8, XOg_CMP, node, | ||
1511 | ofs + (int32_t)offsetof(Node, key.it)); | ||
1512 | } | ||
1513 | } | ||
1514 | |||
1515 | static void asm_newref(ASMState *as, IRIns *ir) | ||
1516 | { | ||
1517 | IRRef keyref = ir->op2; | ||
1518 | IRIns *irkey = IR(keyref); | ||
1519 | RegSet allow = RSET_GPR; | ||
1520 | Reg tab, tmp; | ||
1521 | ra_destreg(as, ir, RID_RET); | ||
1522 | ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); | ||
1523 | tab = ra_alloc1(as, ir->op1, allow); | ||
1524 | tmp = ra_scratch(as, rset_clear(allow, tab)); | ||
1525 | /* TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) */ | ||
1526 | emit_call(as, lj_tab_newkey); | ||
1527 | emit_setargr(as, 1, tmp); | ||
1528 | emit_setargr(as, 2, tab); | ||
1529 | emit_getgl(as, tmp, jit_L); | ||
1530 | if (irt_isnum(irkey->t)) { | ||
1531 | /* For numbers use the constant itself or a spill slot as a TValue. */ | ||
1532 | if (irref_isk(keyref)) { | ||
1533 | emit_setargp(as, 3, ir_knum(irkey)); | ||
1534 | } else { | ||
1535 | emit_setargr(as, 3, tmp); | ||
1536 | emit_rmro(as, XO_LEA, tmp, RID_ESP, ra_spill(as, irkey)); | ||
1537 | } | ||
1538 | } else { | ||
1539 | /* Otherwise use g->tmptv to hold the TValue. */ | ||
1540 | lua_assert(irt_ispri(irkey->t) || irt_isaddr(irkey->t)); | ||
1541 | emit_setargr(as, 3, tmp); | ||
1542 | if (!irref_isk(keyref)) { | ||
1543 | Reg src = ra_alloc1(as, keyref, rset_exclude(allow, tmp)); | ||
1544 | emit_movtomro(as, src, tmp, 0); | ||
1545 | } else if (!irt_ispri(irkey->t)) { | ||
1546 | emit_movmroi(as, tmp, 0, irkey->i); | ||
1547 | } | ||
1548 | emit_movmroi(as, tmp, 4, irt_toitype(irkey->t)); | ||
1549 | emit_loada(as, tmp, &J2G(as->J)->tmptv); | ||
1550 | } | ||
1551 | } | ||
1552 | |||
1553 | static void asm_uref(ASMState *as, IRIns *ir) | ||
1554 | { | ||
1555 | /* NYI: Check that UREFO is still open and not aliasing a slot. */ | ||
1556 | if (ra_used(ir)) { | ||
1557 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1558 | if (irref_isk(ir->op1)) { | ||
1559 | GCfunc *fn = ir_kfunc(IR(ir->op1)); | ||
1560 | TValue **v = &gcref(fn->l.uvptr[ir->op2])->uv.v; | ||
1561 | emit_rma(as, XO_MOV, dest, v); | ||
1562 | } else { | ||
1563 | Reg uv = ra_scratch(as, RSET_GPR); | ||
1564 | Reg func = ra_alloc1(as, ir->op1, RSET_GPR); | ||
1565 | if (ir->o == IR_UREFC) { | ||
1566 | emit_rmro(as, XO_LEA, dest, uv, offsetof(GCupval, tv)); | ||
1567 | asm_guardcc(as, CC_NE); | ||
1568 | emit_i8(as, 1); | ||
1569 | emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed)); | ||
1570 | } else { | ||
1571 | emit_rmro(as, XO_MOV, dest, uv, offsetof(GCupval, v)); | ||
1572 | } | ||
1573 | emit_rmro(as, XO_MOV, uv, func, | ||
1574 | (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)ir->op2); | ||
1575 | } | ||
1576 | } | ||
1577 | } | ||
1578 | |||
1579 | static void asm_fref(ASMState *as, IRIns *ir) | ||
1580 | { | ||
1581 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1582 | asm_fusefref(as, ir, RSET_GPR); | ||
1583 | emit_mrm(as, XO_LEA, dest, RID_MRM); | ||
1584 | } | ||
1585 | |||
1586 | static void asm_strref(ASMState *as, IRIns *ir) | ||
1587 | { | ||
1588 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1589 | asm_fusestrref(as, ir, RSET_GPR); | ||
1590 | if (as->mrm.base == RID_NONE) | ||
1591 | emit_loadi(as, dest, as->mrm.ofs); | ||
1592 | else if (as->mrm.base == dest && as->mrm.idx == RID_NONE) | ||
1593 | emit_gri(as, XG_ARITHi(XOg_ADD), dest, as->mrm.ofs); | ||
1594 | else | ||
1595 | emit_mrm(as, XO_LEA, dest, RID_MRM); | ||
1596 | } | ||
1597 | |||
1598 | /* -- Loads and stores ---------------------------------------------------- */ | ||
1599 | |||
1600 | static void asm_fload(ASMState *as, IRIns *ir) | ||
1601 | { | ||
1602 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1603 | x86Op xo; | ||
1604 | asm_fusefref(as, ir, RSET_GPR); | ||
1605 | switch (irt_type(ir->t)) { | ||
1606 | case IRT_I8: xo = XO_MOVSXb; break; | ||
1607 | case IRT_U8: xo = XO_MOVZXb; break; | ||
1608 | case IRT_I16: xo = XO_MOVSXw; break; | ||
1609 | case IRT_U16: xo = XO_MOVZXw; break; | ||
1610 | default: | ||
1611 | lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t)); | ||
1612 | xo = XO_MOV; | ||
1613 | break; | ||
1614 | } | ||
1615 | emit_mrm(as, xo, dest, RID_MRM); | ||
1616 | } | ||
1617 | |||
1618 | static void asm_fstore(ASMState *as, IRIns *ir) | ||
1619 | { | ||
1620 | RegSet allow = RSET_GPR; | ||
1621 | Reg src = RID_NONE; | ||
1622 | /* The IRT_I16/IRT_U16 stores should never be simplified for constant | ||
1623 | ** values since mov word [mem], imm16 has a length-changing prefix. | ||
1624 | */ | ||
1625 | if (!irref_isk(ir->op2) || irt_isi16(ir->t) || irt_isu16(ir->t)) { | ||
1626 | RegSet allow8 = (irt_isi8(ir->t) || irt_isu8(ir->t)) ? RSET_GPR8 : RSET_GPR; | ||
1627 | src = ra_alloc1(as, ir->op2, allow8); | ||
1628 | rset_clear(allow, src); | ||
1629 | } | ||
1630 | asm_fusefref(as, IR(ir->op1), allow); | ||
1631 | if (ra_hasreg(src)) { | ||
1632 | x86Op xo; | ||
1633 | switch (irt_type(ir->t)) { | ||
1634 | case IRT_I8: case IRT_U8: xo = XO_MOVtob; src |= FORCE_REX; break; | ||
1635 | case IRT_I16: case IRT_U16: xo = XO_MOVtow; break; | ||
1636 | default: | ||
1637 | lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t)); | ||
1638 | xo = XO_MOVto; | ||
1639 | break; | ||
1640 | } | ||
1641 | emit_mrm(as, xo, src, RID_MRM); | ||
1642 | } else { | ||
1643 | if (irt_isi8(ir->t) || irt_isu8(ir->t)) { | ||
1644 | emit_i8(as, IR(ir->op2)->i); | ||
1645 | emit_mrm(as, XO_MOVmib, 0, RID_MRM); | ||
1646 | } else { | ||
1647 | lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t)); | ||
1648 | emit_i32(as, IR(ir->op2)->i); | ||
1649 | emit_mrm(as, XO_MOVmi, 0, RID_MRM); | ||
1650 | } | ||
1651 | } | ||
1652 | } | ||
1653 | |||
1654 | static void asm_ahuload(ASMState *as, IRIns *ir) | ||
1655 | { | ||
1656 | RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; | ||
1657 | lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t)); | ||
1658 | if (ra_used(ir)) { | ||
1659 | Reg dest = ra_dest(as, ir, allow); | ||
1660 | asm_fuseahuref(as, ir->op1, RSET_GPR); | ||
1661 | emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM); | ||
1662 | } else { | ||
1663 | asm_fuseahuref(as, ir->op1, RSET_GPR); | ||
1664 | } | ||
1665 | /* Always do the type check, even if the load result is unused. */ | ||
1666 | asm_guardcc(as, irt_isnum(ir->t) ? CC_A : CC_NE); | ||
1667 | emit_i8(as, ~irt_type(ir->t)); | ||
1668 | as->mrm.ofs += 4; | ||
1669 | emit_mrm(as, XO_ARITHi8, XOg_CMP, RID_MRM); | ||
1670 | } | ||
1671 | |||
1672 | static void asm_ahustore(ASMState *as, IRIns *ir) | ||
1673 | { | ||
1674 | if (irt_isnum(ir->t)) { | ||
1675 | Reg src = ra_alloc1(as, ir->op2, RSET_FPR); | ||
1676 | asm_fuseahuref(as, ir->op1, RSET_GPR); | ||
1677 | emit_mrm(as, XO_MOVSDto, src, RID_MRM); | ||
1678 | } else { | ||
1679 | IRIns *irr = IR(ir->op2); | ||
1680 | RegSet allow = RSET_GPR; | ||
1681 | Reg src = RID_NONE; | ||
1682 | if (!irref_isk(ir->op2)) { | ||
1683 | src = ra_alloc1(as, ir->op2, allow); | ||
1684 | rset_clear(allow, src); | ||
1685 | } | ||
1686 | asm_fuseahuref(as, ir->op1, allow); | ||
1687 | if (ra_hasreg(src)) { | ||
1688 | emit_mrm(as, XO_MOVto, src, RID_MRM); | ||
1689 | } else if (!irt_ispri(irr->t)) { | ||
1690 | lua_assert(irt_isaddr(ir->t)); | ||
1691 | emit_i32(as, irr->i); | ||
1692 | emit_mrm(as, XO_MOVmi, 0, RID_MRM); | ||
1693 | } | ||
1694 | as->mrm.ofs += 4; | ||
1695 | emit_i32(as, (int32_t)~irt_type(ir->t)); | ||
1696 | emit_mrm(as, XO_MOVmi, 0, RID_MRM); | ||
1697 | } | ||
1698 | } | ||
1699 | |||
1700 | static void asm_sload(ASMState *as, IRIns *ir) | ||
1701 | { | ||
1702 | int32_t ofs = 8*((int32_t)ir->op1-1); | ||
1703 | IRType1 t = ir->t; | ||
1704 | Reg base; | ||
1705 | lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ | ||
1706 | if (irt_isint(t)) { | ||
1707 | Reg left = ra_scratch(as, RSET_FPR); | ||
1708 | asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ | ||
1709 | base = ra_alloc1(as, REF_BASE, RSET_GPR); | ||
1710 | emit_rmro(as, XMM_MOVRM(as), left, base, ofs); | ||
1711 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ | ||
1712 | } else if (ra_used(ir)) { | ||
1713 | RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; | ||
1714 | Reg dest = ra_dest(as, ir, allow); | ||
1715 | lua_assert(irt_isnum(ir->t) || irt_isaddr(ir->t)); | ||
1716 | base = ra_alloc1(as, REF_BASE, RSET_GPR); | ||
1717 | emit_movrmro(as, dest, base, ofs); | ||
1718 | } else { | ||
1719 | if (!irt_isguard(ir->t)) | ||
1720 | return; /* No type check: avoid base alloc. */ | ||
1721 | base = ra_alloc1(as, REF_BASE, RSET_GPR); | ||
1722 | } | ||
1723 | if (irt_isguard(ir->t)) { | ||
1724 | /* Need type check, even if the load result is unused. */ | ||
1725 | asm_guardcc(as, irt_isnum(t) ? CC_A : CC_NE); | ||
1726 | emit_i8(as, ~irt_type(t)); | ||
1727 | emit_rmro(as, XO_ARITHi8, XOg_CMP, base, ofs+4); | ||
1728 | } | ||
1729 | } | ||
1730 | |||
1731 | static void asm_xload(ASMState *as, IRIns *ir) | ||
1732 | { | ||
1733 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1734 | x86Op xo; | ||
1735 | asm_fusestrref(as, IR(ir->op1), RSET_GPR); /* For now only support STRREF. */ | ||
1736 | /* ir->op2 is ignored -- unaligned loads are ok on x86. */ | ||
1737 | switch (irt_type(ir->t)) { | ||
1738 | case IRT_I8: xo = XO_MOVSXb; break; | ||
1739 | case IRT_U8: xo = XO_MOVZXb; break; | ||
1740 | case IRT_I16: xo = XO_MOVSXw; break; | ||
1741 | case IRT_U16: xo = XO_MOVZXw; break; | ||
1742 | default: lua_assert(irt_isint(ir->t)); xo = XO_MOV; break; | ||
1743 | } | ||
1744 | emit_mrm(as, xo, dest, RID_MRM); | ||
1745 | } | ||
1746 | |||
1747 | /* -- String ops ---------------------------------------------------------- */ | ||
1748 | |||
1749 | static void asm_snew(ASMState *as, IRIns *ir) | ||
1750 | { | ||
1751 | RegSet allow = RSET_GPR; | ||
1752 | Reg left, right; | ||
1753 | IRIns *irl; | ||
1754 | ra_destreg(as, ir, RID_RET); | ||
1755 | ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); | ||
1756 | irl = IR(ir->op1); | ||
1757 | left = irl->r; | ||
1758 | right = IR(ir->op2)->r; | ||
1759 | if (ra_noreg(left)) { | ||
1760 | lua_assert(irl->o == IR_STRREF); | ||
1761 | /* Get register only for non-const STRREF. */ | ||
1762 | if (!(irref_isk(irl->op1) && irref_isk(irl->op2))) { | ||
1763 | if (ra_hasreg(right)) rset_clear(allow, right); | ||
1764 | left = ra_allocref(as, ir->op1, allow); | ||
1765 | } | ||
1766 | } | ||
1767 | if (ra_noreg(right) && !irref_isk(ir->op2)) { | ||
1768 | if (ra_hasreg(left)) rset_clear(allow, left); | ||
1769 | right = ra_allocref(as, ir->op2, allow); | ||
1770 | } | ||
1771 | /* GCstr *lj_str_new(lua_State *L, const char *str, size_t len) */ | ||
1772 | emit_call(as, lj_str_new); | ||
1773 | emit_setargr(as, 1, RID_RET); | ||
1774 | emit_getgl(as, RID_RET, jit_L); | ||
1775 | if (ra_noreg(left)) /* Use immediate for const STRREF. */ | ||
1776 | emit_setargi(as, 2, IR(irl->op1)->i + IR(irl->op2)->i + | ||
1777 | (int32_t)sizeof(GCstr)); | ||
1778 | else | ||
1779 | emit_setargr(as, 2, left); | ||
1780 | if (ra_noreg(right)) | ||
1781 | emit_setargi(as, 3, IR(ir->op2)->i); | ||
1782 | else | ||
1783 | emit_setargr(as, 3, right); | ||
1784 | as->gcsteps++; | ||
1785 | } | ||
1786 | |||
1787 | /* -- Table ops ----------------------------------------------------------- */ | ||
1788 | |||
1789 | static void asm_tnew(ASMState *as, IRIns *ir) | ||
1790 | { | ||
1791 | ra_destreg(as, ir, RID_RET); | ||
1792 | ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); | ||
1793 | /* GCtab *lj_tab_new(lua_State *L, int32_t asize, uint32_t hbits) */ | ||
1794 | emit_call(as, lj_tab_new); | ||
1795 | emit_setargr(as, 1, RID_RET); | ||
1796 | emit_setargi(as, 2, ir->op1); | ||
1797 | emit_setargi(as, 3, ir->op2); | ||
1798 | emit_getgl(as, RID_RET, jit_L); | ||
1799 | as->gcsteps++; | ||
1800 | } | ||
1801 | |||
1802 | static void asm_tdup(ASMState *as, IRIns *ir) | ||
1803 | { | ||
1804 | ra_destreg(as, ir, RID_RET); | ||
1805 | ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); | ||
1806 | /* GCtab *lj_tab_dup(lua_State *L, const GCtab *kt) */ | ||
1807 | emit_call(as, lj_tab_dup); | ||
1808 | emit_setargr(as, 1, RID_RET); | ||
1809 | emit_setargp(as, 2, ir_kgc(IR(ir->op1))); | ||
1810 | emit_getgl(as, RID_RET, jit_L); | ||
1811 | as->gcsteps++; | ||
1812 | } | ||
1813 | |||
1814 | static void asm_tlen(ASMState *as, IRIns *ir) | ||
1815 | { | ||
1816 | ra_destreg(as, ir, RID_RET); | ||
1817 | ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); | ||
1818 | emit_call(as, lj_tab_len); /* MSize lj_tab_len(GCtab *t) */ | ||
1819 | emit_setargr(as, 1, ra_alloc1(as, ir->op1, RSET_GPR)); | ||
1820 | } | ||
1821 | |||
1822 | static void asm_tbar(ASMState *as, IRIns *ir) | ||
1823 | { | ||
1824 | Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); | ||
1825 | Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, tab)); | ||
1826 | MCLabel l_end = emit_label(as); | ||
1827 | emit_movtomro(as, tmp, tab, offsetof(GCtab, gclist)); | ||
1828 | emit_setgl(as, tab, gc.grayagain); | ||
1829 | emit_getgl(as, tmp, gc.grayagain); | ||
1830 | emit_i8(as, ~LJ_GC_BLACK); | ||
1831 | emit_rmro(as, XO_ARITHib, XOg_AND, tab, offsetof(GCtab, marked)); | ||
1832 | emit_sjcc(as, CC_Z, l_end); | ||
1833 | emit_i8(as, LJ_GC_BLACK); | ||
1834 | emit_rmro(as, XO_GROUP3b, XOg_TEST, tab, offsetof(GCtab, marked)); | ||
1835 | } | ||
1836 | |||
1837 | static void asm_obar(ASMState *as, IRIns *ir) | ||
1838 | { | ||
1839 | RegSet allow = RSET_GPR; | ||
1840 | Reg obj, val; | ||
1841 | GCobj *valp; | ||
1842 | MCLabel l_end; | ||
1843 | int32_t ofs; | ||
1844 | ra_evictset(as, RSET_SCRATCH); | ||
1845 | if (irref_isk(ir->op2)) { | ||
1846 | valp = ir_kgc(IR(ir->op2)); | ||
1847 | val = RID_NONE; | ||
1848 | } else { | ||
1849 | valp = NULL; | ||
1850 | val = ra_alloc1(as, ir->op2, allow); | ||
1851 | rset_clear(allow, val); | ||
1852 | } | ||
1853 | obj = ra_alloc1(as, ir->op1, allow); | ||
1854 | l_end = emit_label(as); | ||
1855 | /* No need for other object barriers (yet). */ | ||
1856 | lua_assert(IR(ir->op1)->o == IR_UREFC); | ||
1857 | ofs = -(int32_t)offsetof(GCupval, tv); | ||
1858 | /* void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v) */ | ||
1859 | emit_call(as, lj_gc_barrieruv); | ||
1860 | if (ofs == 0) { | ||
1861 | emit_setargr(as, 2, obj); | ||
1862 | } else if (rset_test(RSET_SCRATCH, obj) && !(as->flags & JIT_F_LEA_AGU)) { | ||
1863 | emit_setargr(as, 2, obj); | ||
1864 | emit_gri(as, XG_ARITHi(XOg_ADD), obj, ofs); | ||
1865 | } else { | ||
1866 | emit_setargr(as, 2, RID_RET); | ||
1867 | emit_rmro(as, XO_LEA, RID_RET, obj, ofs); | ||
1868 | } | ||
1869 | emit_setargp(as, 1, J2G(as->J)); | ||
1870 | if (valp) | ||
1871 | emit_setargp(as, 3, valp); | ||
1872 | else | ||
1873 | emit_setargr(as, 3, val); | ||
1874 | emit_sjcc(as, CC_Z, l_end); | ||
1875 | emit_i8(as, LJ_GC_WHITES); | ||
1876 | if (valp) | ||
1877 | emit_rma(as, XO_GROUP3b, XOg_TEST, &valp->gch.marked); | ||
1878 | else | ||
1879 | emit_rmro(as, XO_GROUP3b, XOg_TEST, val, (int32_t)offsetof(GChead, marked)); | ||
1880 | emit_sjcc(as, CC_Z, l_end); | ||
1881 | emit_i8(as, LJ_GC_BLACK); | ||
1882 | emit_rmro(as, XO_GROUP3b, XOg_TEST, obj, | ||
1883 | ofs + (int32_t)offsetof(GChead, marked)); | ||
1884 | } | ||
1885 | |||
1886 | /* -- FP/int arithmetic and logic operations ------------------------------ */ | ||
1887 | |||
1888 | /* Load reference onto x87 stack. Force a spill to memory if needed. */ | ||
1889 | static void asm_x87load(ASMState *as, IRRef ref) | ||
1890 | { | ||
1891 | IRIns *ir = IR(ref); | ||
1892 | if (ir->o == IR_KNUM) { | ||
1893 | cTValue *tv = ir_knum(ir); | ||
1894 | if (tvispzero(tv)) /* Use fldz only for +0. */ | ||
1895 | emit_x87op(as, XI_FLDZ); | ||
1896 | else if (tvispone(tv)) | ||
1897 | emit_x87op(as, XI_FLD1); | ||
1898 | else | ||
1899 | emit_rma(as, XO_FLDq, XOg_FLDq, tv); | ||
1900 | } else if (ir->o == IR_TONUM && !ra_used(ir) && | ||
1901 | !irref_isk(ir->op1) && mayfuse(as, ir->op1)) { | ||
1902 | IRIns *iri = IR(ir->op1); | ||
1903 | emit_rmro(as, XO_FILDd, XOg_FILDd, RID_ESP, ra_spill(as, iri)); | ||
1904 | } else { | ||
1905 | emit_mrm(as, XO_FLDq, XOg_FLDq, asm_fuseload(as, ref, RSET_EMPTY)); | ||
1906 | } | ||
1907 | } | ||
1908 | |||
1909 | /* Try to rejoin pow from EXP2, MUL and LOG2 (if still unsplit). */ | ||
1910 | static int fpmjoin_pow(ASMState *as, IRIns *ir) | ||
1911 | { | ||
1912 | IRIns *irp = IR(ir->op1); | ||
1913 | if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { | ||
1914 | IRIns *irpp = IR(irp->op1); | ||
1915 | if (irpp == ir-2 && irpp->o == IR_FPMATH && | ||
1916 | irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { | ||
1917 | emit_call(as, lj_vm_pow); /* st0 = lj_vm_pow(st1, st0) */ | ||
1918 | asm_x87load(as, irp->op2); | ||
1919 | asm_x87load(as, irpp->op1); | ||
1920 | return 1; | ||
1921 | } | ||
1922 | } | ||
1923 | return 0; | ||
1924 | } | ||
1925 | |||
1926 | static void asm_fpmath(ASMState *as, IRIns *ir) | ||
1927 | { | ||
1928 | IRFPMathOp fpm = ir->o == IR_FPMATH ? (IRFPMathOp)ir->op2 : IRFPM_OTHER; | ||
1929 | if (fpm == IRFPM_SQRT) { | ||
1930 | Reg dest = ra_dest(as, ir, RSET_FPR); | ||
1931 | Reg left = asm_fuseload(as, ir->op1, RSET_FPR); | ||
1932 | emit_mrm(as, XO_SQRTSD, dest, left); | ||
1933 | } else if ((as->flags & JIT_F_SSE4_1) && fpm <= IRFPM_TRUNC) { | ||
1934 | Reg dest = ra_dest(as, ir, RSET_FPR); | ||
1935 | Reg left = asm_fuseload(as, ir->op1, RSET_FPR); | ||
1936 | /* Round down/up/trunc == 1001/1010/1011. */ | ||
1937 | emit_i8(as, 0x09 + fpm); | ||
1938 | /* ROUNDSD has a 4-byte opcode which doesn't fit in x86Op. */ | ||
1939 | emit_mrm(as, XO_ROUNDSD, dest, left); | ||
1940 | /* Let's pretend it's a 3-byte opcode, and compensate afterwards. */ | ||
1941 | /* This is atrocious, but the alternatives are much worse. */ | ||
1942 | if (LJ_64 && as->mcp[1] != (MCode)(XO_ROUNDSD >> 16)) { | ||
1943 | as->mcp[0] = as->mcp[1]; as->mcp[1] = 0x0f; /* Swap 0F and REX. */ | ||
1944 | } | ||
1945 | *--as->mcp = 0x66; /* 1st byte of ROUNDSD opcode. */ | ||
1946 | } else { | ||
1947 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */ | ||
1948 | Reg dest = ir->r; | ||
1949 | if (ra_hasreg(dest)) { | ||
1950 | ra_free(as, dest); | ||
1951 | ra_modified(as, dest); | ||
1952 | emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs); | ||
1953 | } | ||
1954 | emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); | ||
1955 | switch (fpm) { /* st0 = lj_vm_*(st0) */ | ||
1956 | case IRFPM_FLOOR: emit_call(as, lj_vm_floor); break; | ||
1957 | case IRFPM_CEIL: emit_call(as, lj_vm_ceil); break; | ||
1958 | case IRFPM_TRUNC: emit_call(as, lj_vm_trunc); break; | ||
1959 | case IRFPM_EXP: emit_call(as, lj_vm_exp); break; | ||
1960 | case IRFPM_EXP2: | ||
1961 | if (fpmjoin_pow(as, ir)) return; | ||
1962 | emit_call(as, lj_vm_exp2); /* st0 = lj_vm_exp2(st0) */ | ||
1963 | break; | ||
1964 | case IRFPM_SIN: emit_x87op(as, XI_FSIN); break; | ||
1965 | case IRFPM_COS: emit_x87op(as, XI_FCOS); break; | ||
1966 | case IRFPM_TAN: emit_x87op(as, XI_FPOP); emit_x87op(as, XI_FPTAN); break; | ||
1967 | case IRFPM_LOG: case IRFPM_LOG2: case IRFPM_LOG10: | ||
1968 | /* Note: the use of fyl2xp1 would be pointless here. When computing | ||
1969 | ** log(1.0+eps) the precision is already lost after 1.0 is added. | ||
1970 | ** Subtracting 1.0 won't recover it. OTOH math.log1p would make sense. | ||
1971 | */ | ||
1972 | emit_x87op(as, XI_FYL2X); break; | ||
1973 | case IRFPM_OTHER: | ||
1974 | switch (ir->o) { | ||
1975 | case IR_ATAN2: | ||
1976 | emit_x87op(as, XI_FPATAN); asm_x87load(as, ir->op2); break; | ||
1977 | case IR_LDEXP: | ||
1978 | emit_x87op(as, XI_FPOP1); emit_x87op(as, XI_FSCALE); break; | ||
1979 | case IR_POWI: | ||
1980 | emit_call(as, lj_vm_powi); /* st0 = lj_vm_powi(st0, [esp]) */ | ||
1981 | emit_rmro(as, XO_MOVto, ra_alloc1(as, ir->op2, RSET_GPR), RID_ESP, 0); | ||
1982 | break; | ||
1983 | default: lua_assert(0); break; | ||
1984 | } | ||
1985 | break; | ||
1986 | default: lua_assert(0); break; | ||
1987 | } | ||
1988 | asm_x87load(as, ir->op1); | ||
1989 | switch (fpm) { | ||
1990 | case IRFPM_LOG: emit_x87op(as, XI_FLDLN2); break; | ||
1991 | case IRFPM_LOG2: emit_x87op(as, XI_FLD1); break; | ||
1992 | case IRFPM_LOG10: emit_x87op(as, XI_FLDLG2); break; | ||
1993 | case IRFPM_OTHER: | ||
1994 | if (ir->o == IR_LDEXP) asm_x87load(as, ir->op2); | ||
1995 | break; | ||
1996 | default: break; | ||
1997 | } | ||
1998 | } | ||
1999 | } | ||
2000 | |||
2001 | /* Find out whether swapping operands might be beneficial. */ | ||
2002 | static int swapops(ASMState *as, IRIns *ir) | ||
2003 | { | ||
2004 | IRIns *irl = IR(ir->op1); | ||
2005 | IRIns *irr = IR(ir->op2); | ||
2006 | lua_assert(ra_noreg(irr->r)); | ||
2007 | if (!irm_iscomm(lj_ir_mode[ir->o])) | ||
2008 | return 0; /* Can't swap non-commutative operations. */ | ||
2009 | if (irref_isk(ir->op2)) | ||
2010 | return 0; /* Don't swap constants to the left. */ | ||
2011 | if (ra_hasreg(irl->r)) | ||
2012 | return 1; /* Swap if left already has a register. */ | ||
2013 | if (ra_samehint(ir->r, irr->r)) | ||
2014 | return 1; /* Swap if dest and right have matching hints. */ | ||
2015 | if (ir->op1 < as->loopref && !irt_isphi(irl->t) && | ||
2016 | !(ir->op2 < as->loopref && !irt_isphi(irr->t))) | ||
2017 | return 1; /* Swap invariants to the right. */ | ||
2018 | if (opisfusableload(irl->o)) | ||
2019 | return 1; /* Swap fusable loads to the right. */ | ||
2020 | return 0; /* Otherwise don't swap. */ | ||
2021 | } | ||
2022 | |||
2023 | static void asm_fparith(ASMState *as, IRIns *ir, x86Op xo) | ||
2024 | { | ||
2025 | IRRef lref = ir->op1; | ||
2026 | IRRef rref = ir->op2; | ||
2027 | RegSet allow = RSET_FPR; | ||
2028 | Reg dest; | ||
2029 | Reg right = IR(rref)->r; | ||
2030 | if (ra_hasreg(right)) | ||
2031 | rset_clear(allow, right); | ||
2032 | dest = ra_dest(as, ir, allow); | ||
2033 | if (lref == rref) { | ||
2034 | right = dest; | ||
2035 | } else if (ra_noreg(right)) { | ||
2036 | if (swapops(as, ir)) { | ||
2037 | IRRef tmp = lref; lref = rref; rref = tmp; | ||
2038 | } | ||
2039 | right = asm_fuseload(as, rref, rset_clear(allow, dest)); | ||
2040 | } | ||
2041 | emit_mrm(as, xo, dest, right); | ||
2042 | ra_left(as, dest, lref); | ||
2043 | } | ||
2044 | |||
2045 | static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa) | ||
2046 | { | ||
2047 | IRRef lref = ir->op1; | ||
2048 | IRRef rref = ir->op2; | ||
2049 | RegSet allow = RSET_GPR; | ||
2050 | Reg dest, right; | ||
2051 | if (as->testmcp == as->mcp) { /* Drop test r,r instruction. */ | ||
2052 | as->testmcp = NULL; | ||
2053 | as->mcp += (LJ_64 && *as->mcp != XI_TEST) ? 3 : 2; | ||
2054 | } | ||
2055 | right = IR(rref)->r; | ||
2056 | if (ra_hasreg(right)) | ||
2057 | rset_clear(allow, right); | ||
2058 | dest = ra_dest(as, ir, allow); | ||
2059 | if (lref == rref) { | ||
2060 | right = dest; | ||
2061 | } else if (ra_noreg(right) && !irref_isk(rref)) { | ||
2062 | if (swapops(as, ir)) { | ||
2063 | IRRef tmp = lref; lref = rref; rref = tmp; | ||
2064 | } | ||
2065 | right = asm_fuseload(as, rref, rset_clear(allow, dest)); | ||
2066 | /* Note: fuses only with IR_FLOAD for now. */ | ||
2067 | } | ||
2068 | if (irt_isguard(ir->t)) /* For IR_ADDOV etc. */ | ||
2069 | asm_guardcc(as, CC_O); | ||
2070 | if (ra_hasreg(right)) | ||
2071 | emit_mrm(as, XO_ARITH(xa), dest, right); | ||
2072 | else | ||
2073 | emit_gri(as, XG_ARITHi(xa), dest, IR(ir->op2)->i); | ||
2074 | ra_left(as, dest, lref); | ||
2075 | } | ||
2076 | |||
2077 | /* LEA is really a 4-operand ADD with an independent destination register, | ||
2078 | ** up to two source registers and an immediate. One register can be scaled | ||
2079 | ** by 1, 2, 4 or 8. This can be used to avoid moves or to fuse several | ||
2080 | ** instructions. | ||
2081 | ** | ||
2082 | ** Currently only a few common cases are supported: | ||
2083 | ** - 3-operand ADD: y = a+b; y = a+k with a and b already allocated | ||
2084 | ** - Left ADD fusion: y = (a+b)+k; y = (a+k)+b | ||
2085 | ** - Right ADD fusion: y = a+(b+k) | ||
2086 | ** The ommited variants have already been reduced by FOLD. | ||
2087 | ** | ||
2088 | ** There are more fusion opportunities, like gathering shifts or joining | ||
2089 | ** common references. But these are probably not worth the trouble, since | ||
2090 | ** array indexing is not decomposed and already makes use of all fields | ||
2091 | ** of the ModRM operand. | ||
2092 | */ | ||
2093 | static int asm_lea(ASMState *as, IRIns *ir) | ||
2094 | { | ||
2095 | IRIns *irl = IR(ir->op1); | ||
2096 | IRIns *irr = IR(ir->op2); | ||
2097 | RegSet allow = RSET_GPR; | ||
2098 | Reg dest; | ||
2099 | as->mrm.base = as->mrm.idx = RID_NONE; | ||
2100 | as->mrm.scale = XM_SCALE1; | ||
2101 | as->mrm.ofs = 0; | ||
2102 | if (ra_hasreg(irl->r)) { | ||
2103 | rset_clear(allow, irl->r); | ||
2104 | as->mrm.base = irl->r; | ||
2105 | if (irref_isk(ir->op2) || ra_hasreg(irr->r)) { | ||
2106 | /* The PHI renaming logic does a better job in some cases. */ | ||
2107 | if (ra_hasreg(ir->r) && | ||
2108 | ((irt_isphi(irl->t) && as->phireg[ir->r] == ir->op1) || | ||
2109 | (irt_isphi(irr->t) && as->phireg[ir->r] == ir->op2))) | ||
2110 | return 0; | ||
2111 | if (irref_isk(ir->op2)) { | ||
2112 | as->mrm.ofs = irr->i; | ||
2113 | } else { | ||
2114 | rset_clear(allow, irr->r); | ||
2115 | as->mrm.idx = irr->r; | ||
2116 | } | ||
2117 | } else if (irr->o == IR_ADD && mayfuse(as, ir->op2) && | ||
2118 | irref_isk(irr->op2)) { | ||
2119 | Reg idx = ra_alloc1(as, irr->op1, allow); | ||
2120 | rset_clear(allow, idx); | ||
2121 | as->mrm.idx = (uint8_t)idx; | ||
2122 | as->mrm.ofs = IR(irr->op2)->i; | ||
2123 | } else { | ||
2124 | return 0; | ||
2125 | } | ||
2126 | } else if (ir->op1 != ir->op2 && irl->o == IR_ADD && mayfuse(as, ir->op1) && | ||
2127 | (irref_isk(ir->op2) || irref_isk(irl->op2))) { | ||
2128 | Reg idx, base = ra_alloc1(as, irl->op1, allow); | ||
2129 | rset_clear(allow, base); | ||
2130 | as->mrm.base = (uint8_t)base; | ||
2131 | if (irref_isk(ir->op2)) { | ||
2132 | as->mrm.ofs = irr->i; | ||
2133 | idx = ra_alloc1(as, irl->op2, allow); | ||
2134 | } else { | ||
2135 | as->mrm.ofs = IR(irl->op2)->i; | ||
2136 | idx = ra_alloc1(as, ir->op2, allow); | ||
2137 | } | ||
2138 | rset_clear(allow, idx); | ||
2139 | as->mrm.idx = (uint8_t)idx; | ||
2140 | } else { | ||
2141 | return 0; | ||
2142 | } | ||
2143 | dest = ra_dest(as, ir, allow); | ||
2144 | emit_mrm(as, XO_LEA, dest, RID_MRM); | ||
2145 | return 1; /* Success. */ | ||
2146 | } | ||
2147 | |||
2148 | static void asm_add(ASMState *as, IRIns *ir) | ||
2149 | { | ||
2150 | if (irt_isnum(ir->t)) | ||
2151 | asm_fparith(as, ir, XO_ADDSD); | ||
2152 | else if ((as->flags & JIT_F_LEA_AGU) || as->testmcp == as->mcp || | ||
2153 | !asm_lea(as, ir)) | ||
2154 | asm_intarith(as, ir, XOg_ADD); | ||
2155 | } | ||
2156 | |||
2157 | static void asm_bitnot(ASMState *as, IRIns *ir) | ||
2158 | { | ||
2159 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
2160 | emit_rr(as, XO_GROUP3, XOg_NOT, dest); | ||
2161 | ra_left(as, dest, ir->op1); | ||
2162 | } | ||
2163 | |||
2164 | static void asm_bitswap(ASMState *as, IRIns *ir) | ||
2165 | { | ||
2166 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
2167 | MCode *p = as->mcp; | ||
2168 | p[-1] = (MCode)(XI_BSWAP+(dest&7)); | ||
2169 | p[-2] = 0x0f; | ||
2170 | p -= 2; | ||
2171 | REXRB(p, 0, dest); | ||
2172 | as->mcp = p; | ||
2173 | ra_left(as, dest, ir->op1); | ||
2174 | } | ||
2175 | |||
2176 | static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) | ||
2177 | { | ||
2178 | IRRef rref = ir->op2; | ||
2179 | IRIns *irr = IR(rref); | ||
2180 | Reg dest; | ||
2181 | if (irref_isk(rref)) { /* Constant shifts. */ | ||
2182 | int shift; | ||
2183 | dest = ra_dest(as, ir, RSET_GPR); | ||
2184 | shift = irr->i & 31; /* Handle shifts of 0..31 bits. */ | ||
2185 | switch (shift) { | ||
2186 | case 0: return; | ||
2187 | case 1: emit_rr(as, XO_SHIFT1, (Reg)xs, dest); break; | ||
2188 | default: emit_shifti(as, xs, dest, shift); break; | ||
2189 | } | ||
2190 | } else { /* Variable shifts implicitly use register cl (i.e. ecx). */ | ||
2191 | RegSet allow = rset_exclude(RSET_GPR, RID_ECX); | ||
2192 | Reg right = irr->r; | ||
2193 | if (ra_noreg(right)) { | ||
2194 | right = ra_allocref(as, rref, RID2RSET(RID_ECX)); | ||
2195 | } else if (right != RID_ECX) { | ||
2196 | rset_clear(allow, right); | ||
2197 | ra_scratch(as, RID2RSET(RID_ECX)); | ||
2198 | } | ||
2199 | dest = ra_dest(as, ir, allow); | ||
2200 | emit_rr(as, XO_SHIFTcl, (Reg)xs, dest); | ||
2201 | if (right != RID_ECX) | ||
2202 | emit_rr(as, XO_MOV, RID_ECX, right); | ||
2203 | } | ||
2204 | ra_left(as, dest, ir->op1); | ||
2205 | /* | ||
2206 | ** Note: avoid using the flags resulting from a shift or rotate! | ||
2207 | ** All of them cause a partial flag stall, except for r,1 shifts | ||
2208 | ** (but not rotates). And a shift count of 0 leaves the flags unmodified. | ||
2209 | */ | ||
2210 | } | ||
2211 | |||
2212 | /* -- Comparisons --------------------------------------------------------- */ | ||
2213 | |||
2214 | /* Virtual flags for unordered FP comparisons. */ | ||
2215 | #define VCC_U 0x100 /* Unordered. */ | ||
2216 | #define VCC_P 0x200 /* Needs extra CC_P branch. */ | ||
2217 | #define VCC_S 0x400 /* Swap avoids CC_P branch. */ | ||
2218 | #define VCC_PS (VCC_P|VCC_S) | ||
2219 | |||
2220 | static void asm_comp_(ASMState *as, IRIns *ir, int cc) | ||
2221 | { | ||
2222 | if (irt_isnum(ir->t)) { | ||
2223 | IRRef lref = ir->op1; | ||
2224 | IRRef rref = ir->op2; | ||
2225 | Reg left, right; | ||
2226 | MCLabel l_around; | ||
2227 | /* | ||
2228 | ** An extra CC_P branch is required to preserve ordered/unordered | ||
2229 | ** semantics for FP comparisons. This can be avoided by swapping | ||
2230 | ** the operands and inverting the condition (except for EQ and UNE). | ||
2231 | ** So always try to swap if possible. | ||
2232 | ** | ||
2233 | ** Another option would be to swap operands to achieve better memory | ||
2234 | ** operand fusion. But it's unlikely that this outweighs the cost | ||
2235 | ** of the extra branches. | ||
2236 | */ | ||
2237 | if (cc & VCC_S) { /* Swap? */ | ||
2238 | IRRef tmp = lref; lref = rref; rref = tmp; | ||
2239 | cc ^= (VCC_PS|(5<<4)); /* A <-> B, AE <-> BE, PS <-> none */ | ||
2240 | } | ||
2241 | left = ra_alloc1(as, lref, RSET_FPR); | ||
2242 | right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left)); | ||
2243 | l_around = emit_label(as); | ||
2244 | asm_guardcc(as, cc >> 4); | ||
2245 | if (cc & VCC_P) { /* Extra CC_P branch required? */ | ||
2246 | if (!(cc & VCC_U)) { | ||
2247 | asm_guardcc(as, CC_P); /* Branch to exit for ordered comparisons. */ | ||
2248 | } else if (l_around != as->invmcp) { | ||
2249 | emit_sjcc(as, CC_P, l_around); /* Branch around for unordered. */ | ||
2250 | } else { | ||
2251 | /* Patched to mcloop by asm_loop_fixup. */ | ||
2252 | as->loopinv = 2; | ||
2253 | if (as->realign) | ||
2254 | emit_sjcc(as, CC_P, as->mcp); | ||
2255 | else | ||
2256 | emit_jcc(as, CC_P, as->mcp); | ||
2257 | } | ||
2258 | } | ||
2259 | emit_mrm(as, XO_UCOMISD, left, right); | ||
2260 | } else if (!(irt_isstr(ir->t) && (cc & 0xe) != CC_E)) { | ||
2261 | IRRef lref = ir->op1, rref = ir->op2; | ||
2262 | IROp leftop = (IROp)(IR(lref)->o); | ||
2263 | lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t)); | ||
2264 | /* Swap constants (only for ABC) and fusable loads to the right. */ | ||
2265 | if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) { | ||
2266 | if ((cc & 0xc) == 0xc) cc ^= 3; /* L <-> G, LE <-> GE */ | ||
2267 | else if ((cc & 0xa) == 0x2) cc ^= 5; /* A <-> B, AE <-> BE */ | ||
2268 | lref = ir->op2; rref = ir->op1; | ||
2269 | } | ||
2270 | if (irref_isk(rref)) { | ||
2271 | IRIns *irl = IR(lref); | ||
2272 | int32_t imm = IR(rref)->i; | ||
2273 | /* Check wether we can use test ins. Not for unsigned, since CF=0. */ | ||
2274 | int usetest = (imm == 0 && (cc & 0xa) != 0x2); | ||
2275 | if (usetest && irl->o == IR_BAND && irl+1 == ir && !ra_used(irl)) { | ||
2276 | /* Combine comp(BAND(ref, r/imm), 0) into test mrm, r/imm. */ | ||
2277 | Reg right, left = RID_NONE; | ||
2278 | RegSet allow = RSET_GPR; | ||
2279 | if (!irref_isk(irl->op2)) { | ||
2280 | left = ra_alloc1(as, irl->op2, allow); | ||
2281 | rset_clear(allow, left); | ||
2282 | } | ||
2283 | right = asm_fuseload(as, irl->op1, allow); | ||
2284 | asm_guardcc(as, cc); | ||
2285 | if (irref_isk(irl->op2)) { | ||
2286 | emit_i32(as, IR(irl->op2)->i); | ||
2287 | emit_mrm(as, XO_GROUP3, XOg_TEST, right); | ||
2288 | } else { | ||
2289 | emit_mrm(as, XO_TEST, left, right); | ||
2290 | } | ||
2291 | } else { | ||
2292 | Reg left; | ||
2293 | if (opisfusableload((IROp)irl->o) && | ||
2294 | ((irt_isi8(irl->t) && checki8(imm)) || | ||
2295 | (irt_isu8(irl->t) && checku8(imm)))) { | ||
2296 | /* Only the IRT_INT case is fused by asm_fuseload. The IRT_I8/IRT_U8 | ||
2297 | ** loads are handled here. The IRT_I16/IRT_U16 loads should never be | ||
2298 | ** fused, since cmp word [mem], imm16 has a length-changing prefix. | ||
2299 | */ | ||
2300 | IRType1 origt = irl->t; /* Temporarily flip types. */ | ||
2301 | irl->t.irt = (irl->t.irt & ~IRT_TYPE) | IRT_INT; | ||
2302 | left = asm_fuseload(as, lref, RSET_GPR); | ||
2303 | irl->t = origt; | ||
2304 | if (left == RID_MRM) { /* Fusion succeeded? */ | ||
2305 | asm_guardcc(as, cc); | ||
2306 | emit_i8(as, imm); | ||
2307 | emit_mrm(as, XO_ARITHib, XOg_CMP, RID_MRM); | ||
2308 | return; | ||
2309 | } /* Otherwise handle register case as usual. */ | ||
2310 | } else { | ||
2311 | left = asm_fuseload(as, lref, RSET_GPR); | ||
2312 | } | ||
2313 | asm_guardcc(as, cc); | ||
2314 | if (usetest && left != RID_MRM) { | ||
2315 | /* Use test r,r instead of cmp r,0. */ | ||
2316 | if (irl+1 == ir) /* Referencing previous ins? */ | ||
2317 | as->testmcp = as->mcp; /* Set flag to drop test r,r if possible. */ | ||
2318 | emit_rr(as, XO_TEST, left, left); | ||
2319 | } else { | ||
2320 | x86Op xo; | ||
2321 | if (checki8(imm)) { | ||
2322 | emit_i8(as, imm); | ||
2323 | xo = XO_ARITHi8; | ||
2324 | } else { | ||
2325 | emit_i32(as, imm); | ||
2326 | xo = XO_ARITHi; | ||
2327 | } | ||
2328 | emit_mrm(as, xo, XOg_CMP, left); | ||
2329 | } | ||
2330 | } | ||
2331 | } else { | ||
2332 | Reg left = ra_alloc1(as, lref, RSET_GPR); | ||
2333 | Reg right = asm_fuseload(as, rref, rset_exclude(RSET_GPR, left)); | ||
2334 | asm_guardcc(as, cc); | ||
2335 | emit_mrm(as, XO_CMP, left, right); | ||
2336 | } | ||
2337 | } else { /* Handle ordered string compares. */ | ||
2338 | RegSet allow = RSET_GPR; | ||
2339 | /* This assumes lj_str_cmp never uses any SSE registers. */ | ||
2340 | ra_evictset(as, (RSET_SCRATCH & RSET_GPR)); | ||
2341 | asm_guardcc(as, cc); | ||
2342 | emit_rr(as, XO_TEST, RID_RET, RID_RET); | ||
2343 | emit_call(as, lj_str_cmp); /* int32_t lj_str_cmp(GCstr *a, GCstr *b) */ | ||
2344 | if (irref_isk(ir->op1)) { | ||
2345 | emit_setargi(as, 1, IR(ir->op1)->i); | ||
2346 | } else { | ||
2347 | Reg left = ra_alloc1(as, ir->op1, allow); | ||
2348 | rset_clear(allow, left); | ||
2349 | emit_setargr(as, 1, left); | ||
2350 | } | ||
2351 | if (irref_isk(ir->op2)) { | ||
2352 | emit_setargi(as, 2, IR(ir->op2)->i); | ||
2353 | } else { | ||
2354 | Reg right = ra_alloc1(as, ir->op2, allow); | ||
2355 | emit_setargr(as, 2, right); | ||
2356 | } | ||
2357 | } | ||
2358 | } | ||
2359 | |||
2360 | #define asm_comp(as, ir, ci, cf, cu) \ | ||
2361 | asm_comp_(as, ir, (ci)+((cf)<<4)+(cu)) | ||
2362 | |||
2363 | /* -- GC handling --------------------------------------------------------- */ | ||
2364 | |||
2365 | /* Sync all live GC values to Lua stack slots. */ | ||
2366 | static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base, RegSet allow) | ||
2367 | { | ||
2368 | IRRef2 *map = &as->T->snapmap[snap->mapofs]; | ||
2369 | BCReg s, nslots = snap->nslots; | ||
2370 | for (s = 0; s < nslots; s++) { | ||
2371 | IRRef ref = snap_ref(map[s]); | ||
2372 | if (!irref_isk(ref)) { | ||
2373 | IRIns *ir = IR(ref); | ||
2374 | if (ir->o == IR_FRAME) { | ||
2375 | /* NYI: sync the frame, bump base, set topslot, clear new slots. */ | ||
2376 | lj_trace_err(as->J, LJ_TRERR_NYIGCF); | ||
2377 | } else if (irt_isgcv(ir->t) && | ||
2378 | !(ir->o == IR_SLOAD && ir->op1 < nslots && map[ir->op1] == 0)) { | ||
2379 | Reg src = ra_alloc1(as, ref, allow); | ||
2380 | int32_t ofs = 8*(int32_t)(s-1); | ||
2381 | emit_movtomro(as, src, base, ofs); | ||
2382 | emit_movmroi(as, base, ofs+4, irt_toitype(ir->t)); | ||
2383 | checkmclim(as); | ||
2384 | } | ||
2385 | } | ||
2386 | } | ||
2387 | } | ||
2388 | |||
2389 | /* Check GC threshold and do one or more GC steps. */ | ||
2390 | static void asm_gc_check(ASMState *as, SnapShot *snap) | ||
2391 | { | ||
2392 | MCLabel l_end; | ||
2393 | const BCIns *pc; | ||
2394 | Reg tmp, base; | ||
2395 | RegSet drop = RSET_SCRATCH; | ||
2396 | /* Must evict BASE because the stack may be reallocated by the GC. */ | ||
2397 | if (ra_hasreg(IR(REF_BASE)->r)) | ||
2398 | drop |= RID2RSET(IR(REF_BASE)->r); | ||
2399 | ra_evictset(as, drop); | ||
2400 | base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_GPR, RID_RET)); | ||
2401 | l_end = emit_label(as); | ||
2402 | /* void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps) */ | ||
2403 | emit_call(as, lj_gc_step_jit); | ||
2404 | emit_movtomro(as, base, RID_RET, offsetof(lua_State, base)); | ||
2405 | emit_setargr(as, 1, RID_RET); | ||
2406 | emit_setargi(as, 3, (int32_t)as->gcsteps); | ||
2407 | emit_getgl(as, RID_RET, jit_L); | ||
2408 | pc = (const BCIns *)(uintptr_t)as->T->snapmap[snap->mapofs+snap->nslots]; | ||
2409 | emit_setargp(as, 2, pc); | ||
2410 | asm_gc_sync(as, snap, base, rset_exclude(RSET_SCRATCH & RSET_GPR, base)); | ||
2411 | if (as->curins == as->loopref) /* BASE gets restored by LOOP anyway. */ | ||
2412 | ra_restore(as, REF_BASE); /* Better do it inside the slow path. */ | ||
2413 | /* Jump around GC step if GC total < GC threshold. */ | ||
2414 | tmp = ra_scratch(as, RSET_SCRATCH & RSET_GPR); | ||
2415 | emit_sjcc(as, CC_B, l_end); | ||
2416 | emit_opgl(as, XO_ARITH(XOg_CMP), tmp, gc.threshold); | ||
2417 | emit_getgl(as, tmp, gc.total); | ||
2418 | as->gcsteps = 0; | ||
2419 | checkmclim(as); | ||
2420 | } | ||
2421 | |||
2422 | /* -- PHI and loop handling ----------------------------------------------- */ | ||
2423 | |||
2424 | /* Break a PHI cycle by renaming to a free register (evict if needed). */ | ||
2425 | static void asm_phi_break(ASMState *as, RegSet blocked, RegSet blockedby, | ||
2426 | RegSet allow) | ||
2427 | { | ||
2428 | RegSet candidates = blocked & allow; | ||
2429 | if (candidates) { /* If this register file has candidates. */ | ||
2430 | /* Note: the set for ra_pick cannot be empty, since each register file | ||
2431 | ** has some registers never allocated to PHIs. | ||
2432 | */ | ||
2433 | Reg down, up = ra_pick(as, ~blocked & allow); /* Get a free register. */ | ||
2434 | if (candidates & ~blockedby) /* Optimize shifts, else it's a cycle. */ | ||
2435 | candidates = candidates & ~blockedby; | ||
2436 | down = rset_picktop(candidates); /* Pick candidate PHI register. */ | ||
2437 | ra_rename(as, down, up); /* And rename it to the free register. */ | ||
2438 | } | ||
2439 | } | ||
2440 | |||
2441 | /* PHI register shuffling. | ||
2442 | ** | ||
2443 | ** The allocator tries hard to preserve PHI register assignments across | ||
2444 | ** the loop body. Most of the time this loop does nothing, since there | ||
2445 | ** are no register mismatches. | ||
2446 | ** | ||
2447 | ** If a register mismatch is detected and ... | ||
2448 | ** - the register is currently free: rename it. | ||
2449 | ** - the register is blocked by an invariant: restore/remat and rename it. | ||
2450 | ** - Otherwise the register is used by another PHI, so mark it as blocked. | ||
2451 | ** | ||
2452 | ** The renames are order-sensitive, so just retry the loop if a register | ||
2453 | ** is marked as blocked, but has been freed in the meantime. A cycle is | ||
2454 | ** detected if all of the blocked registers are allocated. To break the | ||
2455 | ** cycle rename one of them to a free register and retry. | ||
2456 | ** | ||
2457 | ** Note that PHI spill slots are kept in sync and don't need to be shuffled. | ||
2458 | */ | ||
2459 | static void asm_phi_shuffle(ASMState *as) | ||
2460 | { | ||
2461 | RegSet work; | ||
2462 | |||
2463 | /* Find and resolve PHI register mismatches. */ | ||
2464 | for (;;) { | ||
2465 | RegSet blocked = RSET_EMPTY; | ||
2466 | RegSet blockedby = RSET_EMPTY; | ||
2467 | RegSet phiset = as->phiset; | ||
2468 | while (phiset) { /* Check all left PHI operand registers. */ | ||
2469 | Reg r = rset_picktop(phiset); | ||
2470 | IRIns *irl = IR(as->phireg[r]); | ||
2471 | Reg left = irl->r; | ||
2472 | if (r != left) { /* Mismatch? */ | ||
2473 | if (!rset_test(as->freeset, r)) { /* PHI register blocked? */ | ||
2474 | IRRef ref = regcost_ref(as->cost[r]); | ||
2475 | if (irt_ismarked(IR(ref)->t)) { /* Blocked by other PHI (w/reg)? */ | ||
2476 | rset_set(blocked, r); | ||
2477 | if (ra_hasreg(left)) | ||
2478 | rset_set(blockedby, left); | ||
2479 | left = RID_NONE; | ||
2480 | } else { /* Otherwise grab register from invariant. */ | ||
2481 | ra_restore(as, ref); | ||
2482 | checkmclim(as); | ||
2483 | } | ||
2484 | } | ||
2485 | if (ra_hasreg(left)) { | ||
2486 | ra_rename(as, left, r); | ||
2487 | checkmclim(as); | ||
2488 | } | ||
2489 | } | ||
2490 | rset_clear(phiset, r); | ||
2491 | } | ||
2492 | if (!blocked) break; /* Finished. */ | ||
2493 | if (!(as->freeset & blocked)) { /* Break cycles if none are free. */ | ||
2494 | asm_phi_break(as, blocked, blockedby, RSET_GPR); | ||
2495 | asm_phi_break(as, blocked, blockedby, RSET_FPR); | ||
2496 | checkmclim(as); | ||
2497 | } /* Else retry some more renames. */ | ||
2498 | } | ||
2499 | |||
2500 | /* Restore/remat invariants whose registers are modified inside the loop. */ | ||
2501 | work = as->modset & ~(as->freeset | as->phiset); | ||
2502 | while (work) { | ||
2503 | Reg r = rset_picktop(work); | ||
2504 | ra_restore(as, regcost_ref(as->cost[r])); | ||
2505 | rset_clear(work, r); | ||
2506 | checkmclim(as); | ||
2507 | } | ||
2508 | |||
2509 | /* Allocate and save all unsaved PHI regs and clear marks. */ | ||
2510 | work = as->phiset; | ||
2511 | while (work) { | ||
2512 | Reg r = rset_picktop(work); | ||
2513 | IRRef lref = as->phireg[r]; | ||
2514 | IRIns *ir = IR(lref); | ||
2515 | if (ra_hasspill(ir->s)) { /* Left PHI gained a spill slot? */ | ||
2516 | irt_clearmark(ir->t); /* Handled here, so clear marker now. */ | ||
2517 | ra_alloc1(as, lref, RID2RSET(r)); | ||
2518 | ra_save(as, ir, r); /* Save to spill slot inside the loop. */ | ||
2519 | checkmclim(as); | ||
2520 | } | ||
2521 | rset_clear(work, r); | ||
2522 | } | ||
2523 | } | ||
2524 | |||
2525 | /* Emit renames for left PHIs which are only spilled outside the loop. */ | ||
2526 | static void asm_phi_fixup(ASMState *as) | ||
2527 | { | ||
2528 | RegSet work = as->phiset; | ||
2529 | while (work) { | ||
2530 | Reg r = rset_picktop(work); | ||
2531 | IRRef lref = as->phireg[r]; | ||
2532 | IRIns *ir = IR(lref); | ||
2533 | /* Left PHI gained a spill slot before the loop? */ | ||
2534 | if (irt_ismarked(ir->t) && ra_hasspill(ir->s)) { | ||
2535 | IRRef ren; | ||
2536 | lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno); | ||
2537 | ren = tref_ref(lj_ir_emit(as->J)); | ||
2538 | as->ir = as->T->ir; /* The IR may have been reallocated. */ | ||
2539 | IR(ren)->r = (uint8_t)r; | ||
2540 | IR(ren)->s = SPS_NONE; | ||
2541 | } | ||
2542 | irt_clearmark(ir->t); /* Always clear marker. */ | ||
2543 | rset_clear(work, r); | ||
2544 | } | ||
2545 | } | ||
2546 | |||
2547 | /* Setup right PHI reference. */ | ||
2548 | static void asm_phi(ASMState *as, IRIns *ir) | ||
2549 | { | ||
2550 | RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; | ||
2551 | RegSet afree = (as->freeset & allow); | ||
2552 | IRIns *irl = IR(ir->op1); | ||
2553 | IRIns *irr = IR(ir->op2); | ||
2554 | /* Spill slot shuffling is not implemented yet (but rarely needed). */ | ||
2555 | if (ra_hasspill(irl->s) || ra_hasspill(irr->s)) | ||
2556 | lj_trace_err(as->J, LJ_TRERR_NYIPHI); | ||
2557 | /* Leave at least one register free for non-PHIs (and PHI cycle breaking). */ | ||
2558 | if ((afree & (afree-1))) { /* Two or more free registers? */ | ||
2559 | Reg r; | ||
2560 | if (ra_noreg(irr->r)) { /* Get a register for the right PHI. */ | ||
2561 | r = ra_allocref(as, ir->op2, allow); | ||
2562 | } else { /* Duplicate right PHI, need a copy (rare). */ | ||
2563 | r = ra_scratch(as, allow); | ||
2564 | emit_movrr(as, r, irr->r); | ||
2565 | } | ||
2566 | ir->r = (uint8_t)r; | ||
2567 | rset_set(as->phiset, r); | ||
2568 | as->phireg[r] = (IRRef1)ir->op1; | ||
2569 | irt_setmark(irl->t); /* Marks left PHIs _with_ register. */ | ||
2570 | if (ra_noreg(irl->r)) | ||
2571 | ra_sethint(irl->r, r); /* Set register hint for left PHI. */ | ||
2572 | } else { /* Otherwise allocate a spill slot. */ | ||
2573 | /* This is overly restrictive, but it triggers only on synthetic code. */ | ||
2574 | if (ra_hasreg(irl->r) || ra_hasreg(irr->r)) | ||
2575 | lj_trace_err(as->J, LJ_TRERR_NYIPHI); | ||
2576 | ra_spill(as, ir); | ||
2577 | irl->s = irr->s = ir->s; /* Sync left/right PHI spill slots. */ | ||
2578 | } | ||
2579 | } | ||
2580 | |||
2581 | /* Fixup the loop branch. */ | ||
2582 | static void asm_loop_fixup(ASMState *as) | ||
2583 | { | ||
2584 | MCode *p = as->mctop; | ||
2585 | MCode *target = as->mcp; | ||
2586 | if (as->realign) { /* Realigned loops use short jumps. */ | ||
2587 | as->realign = NULL; /* Stop another retry. */ | ||
2588 | lua_assert(((intptr_t)target & 15) == 0); | ||
2589 | if (as->loopinv) { /* Inverted loop branch? */ | ||
2590 | p -= 5; | ||
2591 | p[0] = XI_JMP; | ||
2592 | lua_assert(target - p >= -128); | ||
2593 | p[-1] = (MCode)(target - p); /* Patch sjcc. */ | ||
2594 | if (as->loopinv == 2) | ||
2595 | p[-3] = (MCode)(target - p + 2); /* Patch opt. short jp. */ | ||
2596 | } else { | ||
2597 | lua_assert(target - p >= -128); | ||
2598 | p[-1] = (MCode)(int8_t)(target - p); /* Patch short jmp. */ | ||
2599 | p[-2] = XI_JMPs; | ||
2600 | } | ||
2601 | } else { | ||
2602 | MCode *newloop; | ||
2603 | p[-5] = XI_JMP; | ||
2604 | if (as->loopinv) { /* Inverted loop branch? */ | ||
2605 | /* asm_guardcc already inverted the jcc and patched the jmp. */ | ||
2606 | p -= 5; | ||
2607 | newloop = target+4; | ||
2608 | *(int32_t *)(p-4) = (int32_t)(target - p); /* Patch jcc. */ | ||
2609 | if (as->loopinv == 2) { | ||
2610 | *(int32_t *)(p-10) = (int32_t)(target - p + 6); /* Patch opt. jp. */ | ||
2611 | newloop = target+8; | ||
2612 | } | ||
2613 | } else { /* Otherwise just patch jmp. */ | ||
2614 | *(int32_t *)(p-4) = (int32_t)(target - p); | ||
2615 | newloop = target+3; | ||
2616 | } | ||
2617 | /* Realign small loops and shorten the loop branch. */ | ||
2618 | if (newloop >= p - 128) { | ||
2619 | as->realign = newloop; /* Force a retry and remember alignment. */ | ||
2620 | as->curins = as->stopins; /* Abort asm_trace now. */ | ||
2621 | as->T->nins = as->orignins; /* Remove any added renames. */ | ||
2622 | } | ||
2623 | } | ||
2624 | } | ||
2625 | |||
2626 | /* Middle part of a loop. */ | ||
2627 | static void asm_loop(ASMState *as) | ||
2628 | { | ||
2629 | /* LOOP is a guard, so the snapno is up to date. */ | ||
2630 | as->loopsnapno = as->snapno; | ||
2631 | if (as->gcsteps) | ||
2632 | asm_gc_check(as, &as->T->snap[as->loopsnapno]); | ||
2633 | /* LOOP marks the transition from the variant to the invariant part. */ | ||
2634 | as->testmcp = as->invmcp = NULL; | ||
2635 | as->sectref = 0; | ||
2636 | if (!neverfuse(as)) as->fuseref = 0; | ||
2637 | asm_phi_shuffle(as); | ||
2638 | asm_loop_fixup(as); | ||
2639 | as->mcloop = as->mcp; | ||
2640 | RA_DBGX((as, "===== LOOP =====")); | ||
2641 | if (!as->realign) RA_DBG_FLUSH(); | ||
2642 | } | ||
2643 | |||
2644 | /* -- Head of trace ------------------------------------------------------- */ | ||
2645 | |||
2646 | /* Rematerialize all remaining constants in registers. */ | ||
2647 | static void asm_const_remat(ASMState *as) | ||
2648 | { | ||
2649 | RegSet work = ~as->freeset & RSET_ALL; | ||
2650 | while (work) { | ||
2651 | Reg r = rset_pickbot(work); | ||
2652 | IRRef ref = regcost_ref(as->cost[r]); | ||
2653 | if (irref_isk(ref) || ref == REF_BASE) { | ||
2654 | ra_rematk(as, IR(ref)); | ||
2655 | checkmclim(as); | ||
2656 | } | ||
2657 | rset_clear(work, r); | ||
2658 | } | ||
2659 | } | ||
2660 | |||
2661 | /* Head of a root trace. */ | ||
2662 | static void asm_head_root(ASMState *as) | ||
2663 | { | ||
2664 | int32_t spadj; | ||
2665 | emit_setgli(as, vmstate, (int32_t)as->J->curtrace); | ||
2666 | spadj = sps_adjust(as); | ||
2667 | as->T->spadjust = (uint16_t)spadj; | ||
2668 | emit_addptr(as, RID_ESP, -spadj); | ||
2669 | } | ||
2670 | |||
2671 | /* Handle BASE coalescing for a root trace. */ | ||
2672 | static void asm_head_base(ASMState *as) | ||
2673 | { | ||
2674 | IRIns *ir = IR(REF_BASE); | ||
2675 | Reg r = ir->r; | ||
2676 | lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); | ||
2677 | ra_free(as, r); | ||
2678 | if (r != RID_BASE) { | ||
2679 | ra_scratch(as, RID2RSET(RID_BASE)); | ||
2680 | emit_rr(as, XO_MOV, r, RID_BASE); | ||
2681 | } | ||
2682 | } | ||
2683 | |||
2684 | /* Check Lua stack size for overflow at the start of a side trace. | ||
2685 | ** Stack overflow is rare, so let the regular exit handling fix this up. | ||
2686 | ** This is done in the context of the *parent* trace and parent exitno! | ||
2687 | */ | ||
2688 | static void asm_checkstack(ASMState *as, RegSet allow) | ||
2689 | { | ||
2690 | /* Try to get an unused temp. register, otherwise spill/restore eax. */ | ||
2691 | Reg r = allow ? rset_pickbot(allow) : RID_EAX; | ||
2692 | emit_jcc(as, CC_B, exitstub_addr(as->J, as->J->exitno)); | ||
2693 | if (allow == RSET_EMPTY) /* Restore temp. register. */ | ||
2694 | emit_rmro(as, XO_MOV, r, RID_ESP, sps_scale(SPS_TEMP1)); | ||
2695 | emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*as->topslot)); | ||
2696 | emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE, ptr2addr(&J2G(as->J)->jit_base)); | ||
2697 | emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack)); | ||
2698 | emit_getgl(as, r, jit_L); | ||
2699 | if (allow == RSET_EMPTY) /* Spill temp. register. */ | ||
2700 | emit_rmro(as, XO_MOVto, r, RID_ESP, sps_scale(SPS_TEMP1)); | ||
2701 | } | ||
2702 | |||
2703 | /* Head of a side trace. | ||
2704 | ** | ||
2705 | ** The current simplistic algorithm requires that all slots inherited | ||
2706 | ** from the parent are live in a register between pass 2 and pass 3. This | ||
2707 | ** avoids the complexity of stack slot shuffling. But of course this may | ||
2708 | ** overflow the register set in some cases and cause the dreaded error: | ||
2709 | ** "NYI: register coalescing too complex". A refined algorithm is needed. | ||
2710 | */ | ||
2711 | static void asm_head_side(ASMState *as) | ||
2712 | { | ||
2713 | IRRef1 sloadins[RID_MAX]; | ||
2714 | RegSet allow = RSET_ALL; /* Inverse of all coalesced registers. */ | ||
2715 | RegSet live = RSET_EMPTY; /* Live parent registers. */ | ||
2716 | int32_t spadj, spdelta; | ||
2717 | int pass2 = 0; | ||
2718 | int pass3 = 0; | ||
2719 | IRRef i; | ||
2720 | |||
2721 | /* Scan all parent SLOADs and collect register dependencies. */ | ||
2722 | for (i = as->curins; i > REF_BASE; i--) { | ||
2723 | IRIns *ir = IR(i); | ||
2724 | lua_assert((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) || | ||
2725 | ir->o == IR_FRAME); | ||
2726 | if (ir->o == IR_SLOAD) { | ||
2727 | RegSP rs = as->parentmap[ir->op1]; | ||
2728 | if (ra_hasreg(ir->r)) { | ||
2729 | rset_clear(allow, ir->r); | ||
2730 | if (ra_hasspill(ir->s)) | ||
2731 | ra_save(as, ir, ir->r); | ||
2732 | } else if (ra_hasspill(ir->s)) { | ||
2733 | irt_setmark(ir->t); | ||
2734 | pass2 = 1; | ||
2735 | } | ||
2736 | if (ir->r == rs) { /* Coalesce matching registers right now. */ | ||
2737 | ra_free(as, ir->r); | ||
2738 | } else if (ra_hasspill(regsp_spill(rs))) { | ||
2739 | if (ra_hasreg(ir->r)) | ||
2740 | pass3 = 1; | ||
2741 | } else if (ra_used(ir)) { | ||
2742 | sloadins[rs] = (IRRef1)i; | ||
2743 | rset_set(live, rs); /* Block live parent register. */ | ||
2744 | } | ||
2745 | } | ||
2746 | } | ||
2747 | |||
2748 | /* Calculate stack frame adjustment. */ | ||
2749 | spadj = sps_adjust(as); | ||
2750 | spdelta = spadj - (int32_t)as->parent->spadjust; | ||
2751 | if (spdelta < 0) { /* Don't shrink the stack frame. */ | ||
2752 | spadj = (int32_t)as->parent->spadjust; | ||
2753 | spdelta = 0; | ||
2754 | } | ||
2755 | as->T->spadjust = (uint16_t)spadj; | ||
2756 | |||
2757 | /* Reload spilled target registers. */ | ||
2758 | if (pass2) { | ||
2759 | for (i = as->curins; i > REF_BASE; i--) { | ||
2760 | IRIns *ir = IR(i); | ||
2761 | if (irt_ismarked(ir->t)) { | ||
2762 | RegSet mask; | ||
2763 | Reg r; | ||
2764 | RegSP rs; | ||
2765 | irt_clearmark(ir->t); | ||
2766 | rs = as->parentmap[ir->op1]; | ||
2767 | if (!ra_hasspill(regsp_spill(rs))) | ||
2768 | ra_sethint(ir->r, rs); /* Hint may be gone, set it again. */ | ||
2769 | else if (sps_scale(regsp_spill(rs))+spdelta == sps_scale(ir->s)) | ||
2770 | continue; /* Same spill slot, do nothing. */ | ||
2771 | mask = (irt_isnum(ir->t) ? RSET_FPR : RSET_GPR) & allow; | ||
2772 | if (mask == RSET_EMPTY) | ||
2773 | lj_trace_err(as->J, LJ_TRERR_NYICOAL); | ||
2774 | r = ra_allocref(as, i, mask); | ||
2775 | ra_save(as, ir, r); | ||
2776 | rset_clear(allow, r); | ||
2777 | if (r == rs) { /* Coalesce matching registers right now. */ | ||
2778 | ra_free(as, r); | ||
2779 | rset_clear(live, r); | ||
2780 | } else if (ra_hasspill(regsp_spill(rs))) { | ||
2781 | pass3 = 1; | ||
2782 | } | ||
2783 | checkmclim(as); | ||
2784 | } | ||
2785 | } | ||
2786 | } | ||
2787 | |||
2788 | /* Store trace number and adjust stack frame relative to the parent. */ | ||
2789 | emit_setgli(as, vmstate, (int32_t)as->J->curtrace); | ||
2790 | emit_addptr(as, RID_ESP, -spdelta); | ||
2791 | |||
2792 | /* Restore target registers from parent spill slots. */ | ||
2793 | if (pass3) { | ||
2794 | RegSet work = ~as->freeset & RSET_ALL; | ||
2795 | while (work) { | ||
2796 | Reg r = rset_pickbot(work); | ||
2797 | IRIns *ir = IR(regcost_ref(as->cost[r])); | ||
2798 | RegSP rs = as->parentmap[ir->op1]; | ||
2799 | rset_clear(work, r); | ||
2800 | if (ra_hasspill(regsp_spill(rs))) { | ||
2801 | int32_t ofs = sps_scale(regsp_spill(rs)); | ||
2802 | ra_free(as, r); | ||
2803 | emit_movrmro(as, r, RID_ESP, ofs); | ||
2804 | checkmclim(as); | ||
2805 | } | ||
2806 | } | ||
2807 | } | ||
2808 | |||
2809 | /* Shuffle registers to match up target regs with parent regs. */ | ||
2810 | for (;;) { | ||
2811 | RegSet work; | ||
2812 | |||
2813 | /* Repeatedly coalesce free live registers by moving to their target. */ | ||
2814 | while ((work = as->freeset & live) != RSET_EMPTY) { | ||
2815 | Reg rp = rset_pickbot(work); | ||
2816 | IRIns *ir = IR(sloadins[rp]); | ||
2817 | rset_clear(live, rp); | ||
2818 | rset_clear(allow, rp); | ||
2819 | ra_free(as, ir->r); | ||
2820 | emit_movrr(as, ir->r, rp); | ||
2821 | checkmclim(as); | ||
2822 | } | ||
2823 | |||
2824 | /* We're done if no live registers remain. */ | ||
2825 | if (live == RSET_EMPTY) | ||
2826 | break; | ||
2827 | |||
2828 | /* Break cycles by renaming one target to a temp. register. */ | ||
2829 | if (live & RSET_GPR) { | ||
2830 | RegSet tmpset = as->freeset & ~live & allow & RSET_GPR; | ||
2831 | if (tmpset == RSET_EMPTY) | ||
2832 | lj_trace_err(as->J, LJ_TRERR_NYICOAL); | ||
2833 | ra_rename(as, rset_pickbot(live & RSET_GPR), rset_pickbot(tmpset)); | ||
2834 | } | ||
2835 | if (live & RSET_FPR) { | ||
2836 | RegSet tmpset = as->freeset & ~live & allow & RSET_FPR; | ||
2837 | if (tmpset == RSET_EMPTY) | ||
2838 | lj_trace_err(as->J, LJ_TRERR_NYICOAL); | ||
2839 | ra_rename(as, rset_pickbot(live & RSET_FPR), rset_pickbot(tmpset)); | ||
2840 | } | ||
2841 | checkmclim(as); | ||
2842 | /* Continue with coalescing to fix up the broken cycle(s). */ | ||
2843 | } | ||
2844 | |||
2845 | /* Check Lua stack size if frames have been added. */ | ||
2846 | if (as->topslot) | ||
2847 | asm_checkstack(as, allow & RSET_GPR); | ||
2848 | } | ||
2849 | |||
2850 | /* -- Tail of trace ------------------------------------------------------- */ | ||
2851 | |||
2852 | /* Sync Lua stack slots to match the last snapshot. | ||
2853 | ** Note: code generation is backwards, so this is best read bottom-up. | ||
2854 | */ | ||
2855 | static void asm_tail_sync(ASMState *as) | ||
2856 | { | ||
2857 | SnapShot *snap = &as->T->snap[as->T->nsnap-1]; /* Last snapshot. */ | ||
2858 | BCReg s, nslots = snap->nslots; | ||
2859 | IRRef2 *map = &as->T->snapmap[snap->mapofs]; | ||
2860 | IRRef2 *flinks = map + nslots + snap->nframelinks; | ||
2861 | BCReg newbase = 0; | ||
2862 | BCReg secondbase = ~(BCReg)0; | ||
2863 | BCReg topslot = 0; | ||
2864 | |||
2865 | checkmclim(as); | ||
2866 | ra_allocref(as, REF_BASE, RID2RSET(RID_BASE)); | ||
2867 | |||
2868 | /* Must check all frames to find topslot (outer can be larger than inner). */ | ||
2869 | for (s = 0; s < nslots; s++) { | ||
2870 | IRRef ref = snap_ref(map[s]); | ||
2871 | if (!irref_isk(ref)) { | ||
2872 | IRIns *ir = IR(ref); | ||
2873 | if (ir->o == IR_FRAME && irt_isfunc(ir->t)) { | ||
2874 | GCfunc *fn = ir_kfunc(IR(ir->op2)); | ||
2875 | if (isluafunc(fn)) { | ||
2876 | BCReg fs = s + funcproto(fn)->framesize; | ||
2877 | newbase = s; | ||
2878 | if (secondbase == ~(BCReg)0) secondbase = s; | ||
2879 | if (fs > topslot) topslot = fs; | ||
2880 | } | ||
2881 | } | ||
2882 | } | ||
2883 | } | ||
2884 | as->topslot = topslot; /* Used in asm_head_side(). */ | ||
2885 | |||
2886 | if (as->T->link == TRACE_INTERP) { | ||
2887 | /* Setup fixed registers for exit to interpreter. */ | ||
2888 | emit_loada(as, RID_DISPATCH, J2GG(as->J)->dispatch); | ||
2889 | emit_loadi(as, RID_PC, (int32_t)map[nslots]); | ||
2890 | } else if (newbase) { | ||
2891 | /* Save modified BASE for linking to trace with higher start frame. */ | ||
2892 | emit_setgl(as, RID_BASE, jit_base); | ||
2893 | } | ||
2894 | |||
2895 | emit_addptr(as, RID_BASE, 8*(int32_t)newbase); | ||
2896 | |||
2897 | /* Clear stack slots of newly added frames. */ | ||
2898 | if (nslots <= topslot) { | ||
2899 | if (nslots < topslot) { | ||
2900 | for (s = nslots; s <= topslot; s++) { | ||
2901 | emit_movtomro(as, RID_EAX, RID_BASE, 8*(int32_t)s-4); | ||
2902 | checkmclim(as); | ||
2903 | } | ||
2904 | emit_loadi(as, RID_EAX, LJ_TNIL); | ||
2905 | } else { | ||
2906 | emit_movmroi(as, RID_BASE, 8*(int32_t)nslots-4, LJ_TNIL); | ||
2907 | } | ||
2908 | } | ||
2909 | |||
2910 | /* Store the value of all modified slots to the Lua stack. */ | ||
2911 | for (s = 0; s < nslots; s++) { | ||
2912 | int32_t ofs = 8*((int32_t)s-1); | ||
2913 | IRRef ref = snap_ref(map[s]); | ||
2914 | if (ref) { | ||
2915 | IRIns *ir = IR(ref); | ||
2916 | /* No need to restore readonly slots and unmodified non-parent slots. */ | ||
2917 | if (ir->o == IR_SLOAD && ir->op1 == s && | ||
2918 | (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) | ||
2919 | continue; | ||
2920 | if (irt_isnum(ir->t)) { | ||
2921 | Reg src = ra_alloc1(as, ref, RSET_FPR); | ||
2922 | emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); | ||
2923 | } else if (ir->o == IR_FRAME) { | ||
2924 | emit_movmroi(as, RID_BASE, ofs, ptr2addr(ir_kgc(IR(ir->op2)))); | ||
2925 | if (s != 0) /* Do not overwrite link to previous frame. */ | ||
2926 | emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*--flinks)); | ||
2927 | } else { | ||
2928 | lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t)); | ||
2929 | if (!irref_isk(ref)) { | ||
2930 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); | ||
2931 | emit_movtomro(as, src, RID_BASE, ofs); | ||
2932 | } else if (!irt_ispri(ir->t)) { | ||
2933 | emit_movmroi(as, RID_BASE, ofs, ir->i); | ||
2934 | } | ||
2935 | emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); | ||
2936 | } | ||
2937 | } else if (s > secondbase) { | ||
2938 | emit_movmroi(as, RID_BASE, ofs+4, LJ_TNIL); | ||
2939 | } | ||
2940 | checkmclim(as); | ||
2941 | } | ||
2942 | lua_assert(map + nslots == flinks-1); | ||
2943 | } | ||
2944 | |||
2945 | /* Fixup the tail code. */ | ||
2946 | static void asm_tail_fixup(ASMState *as, TraceNo lnk) | ||
2947 | { | ||
2948 | /* Note: don't use as->mcp swap + emit_*: emit_op overwrites more bytes. */ | ||
2949 | MCode *p = as->mctop; | ||
2950 | MCode *target, *q; | ||
2951 | int32_t spadj = as->T->spadjust; | ||
2952 | if (spadj == 0) { | ||
2953 | p -= (as->flags & JIT_F_LEA_AGU) ? 7 : 6; | ||
2954 | } else { | ||
2955 | MCode *p1; | ||
2956 | /* Patch stack adjustment. */ | ||
2957 | if (checki8(spadj)) { | ||
2958 | p -= 3; | ||
2959 | p1 = p-6; | ||
2960 | *p1 = (MCode)spadj; | ||
2961 | } else { | ||
2962 | p1 = p-9; | ||
2963 | *(int32_t *)p1 = spadj; | ||
2964 | } | ||
2965 | if ((as->flags & JIT_F_LEA_AGU)) { | ||
2966 | p1[-3] = (MCode)XI_LEA; | ||
2967 | p1[-2] = MODRM(checki8(spadj) ? XM_OFS8 : XM_OFS32, RID_ESP, RID_ESP); | ||
2968 | p1[-1] = MODRM(XM_SCALE1, RID_ESP, RID_ESP); | ||
2969 | } else { | ||
2970 | p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); | ||
2971 | p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); | ||
2972 | } | ||
2973 | } | ||
2974 | /* Patch exit branch. */ | ||
2975 | target = lnk == TRACE_INTERP ? (MCode *)lj_vm_exit_interp : | ||
2976 | as->J->trace[lnk]->mcode; | ||
2977 | *(int32_t *)(p-4) = (int32_t)(target - p); | ||
2978 | p[-5] = XI_JMP; | ||
2979 | /* Drop unused mcode tail. Fill with NOPs to make the prefetcher happy. */ | ||
2980 | for (q = as->mctop-1; q >= p; q--) | ||
2981 | *q = XI_NOP; | ||
2982 | as->mctop = p; | ||
2983 | } | ||
2984 | |||
2985 | /* -- Instruction dispatch ------------------------------------------------ */ | ||
2986 | |||
2987 | /* Assemble a single instruction. */ | ||
2988 | static void asm_ir(ASMState *as, IRIns *ir) | ||
2989 | { | ||
2990 | switch ((IROp)ir->o) { | ||
2991 | /* Miscellaneous ops. */ | ||
2992 | case IR_LOOP: asm_loop(as); break; | ||
2993 | case IR_NOP: break; | ||
2994 | case IR_PHI: asm_phi(as, ir); break; | ||
2995 | |||
2996 | /* Guarded assertions. */ | ||
2997 | case IR_LT: asm_comp(as, ir, CC_GE, CC_AE, VCC_PS); break; | ||
2998 | case IR_GE: asm_comp(as, ir, CC_L, CC_B, 0); break; | ||
2999 | case IR_LE: asm_comp(as, ir, CC_G, CC_A, VCC_PS); break; | ||
3000 | case IR_GT: asm_comp(as, ir, CC_LE, CC_BE, 0); break; | ||
3001 | case IR_ULT: asm_comp(as, ir, CC_AE, CC_AE, VCC_U); break; | ||
3002 | case IR_UGE: asm_comp(as, ir, CC_B, CC_B, VCC_U|VCC_PS); break; | ||
3003 | case IR_ULE: asm_comp(as, ir, CC_A, CC_A, VCC_U); break; | ||
3004 | case IR_ABC: | ||
3005 | case IR_UGT: asm_comp(as, ir, CC_BE, CC_BE, VCC_U|VCC_PS); break; | ||
3006 | |||
3007 | case IR_FRAME: | ||
3008 | if (ir->op1 == ir->op2) break; /* No check needed for placeholder. */ | ||
3009 | /* fallthrough */ | ||
3010 | case IR_EQ: asm_comp(as, ir, CC_NE, CC_NE, VCC_P); break; | ||
3011 | case IR_NE: asm_comp(as, ir, CC_E, CC_E, VCC_U|VCC_P); break; | ||
3012 | |||
3013 | /* Bit ops. */ | ||
3014 | case IR_BNOT: asm_bitnot(as, ir); break; | ||
3015 | case IR_BSWAP: asm_bitswap(as, ir); break; | ||
3016 | |||
3017 | case IR_BAND: asm_intarith(as, ir, XOg_AND); break; | ||
3018 | case IR_BOR: asm_intarith(as, ir, XOg_OR); break; | ||
3019 | case IR_BXOR: asm_intarith(as, ir, XOg_XOR); break; | ||
3020 | |||
3021 | case IR_BSHL: asm_bitshift(as, ir, XOg_SHL); break; | ||
3022 | case IR_BSHR: asm_bitshift(as, ir, XOg_SHR); break; | ||
3023 | case IR_BSAR: asm_bitshift(as, ir, XOg_SAR); break; | ||
3024 | case IR_BROL: asm_bitshift(as, ir, XOg_ROL); break; | ||
3025 | case IR_BROR: asm_bitshift(as, ir, XOg_ROR); break; | ||
3026 | |||
3027 | /* Arithmetic ops. */ | ||
3028 | case IR_ADD: asm_add(as, ir); break; | ||
3029 | case IR_SUB: | ||
3030 | if (irt_isnum(ir->t)) | ||
3031 | asm_fparith(as, ir, XO_SUBSD); | ||
3032 | else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */ | ||
3033 | asm_intarith(as, ir, XOg_SUB); | ||
3034 | break; | ||
3035 | case IR_MUL: asm_fparith(as, ir, XO_MULSD); break; | ||
3036 | case IR_DIV: asm_fparith(as, ir, XO_DIVSD); break; | ||
3037 | |||
3038 | case IR_NEG: asm_fparith(as, ir, XO_XORPS); break; | ||
3039 | case IR_ABS: asm_fparith(as, ir, XO_ANDPS); break; | ||
3040 | |||
3041 | case IR_MIN: asm_fparith(as, ir, XO_MINSD); break; | ||
3042 | case IR_MAX: asm_fparith(as, ir, XO_MAXSD); break; | ||
3043 | |||
3044 | case IR_FPMATH: case IR_ATAN2: case IR_LDEXP: case IR_POWI: | ||
3045 | asm_fpmath(as, ir); | ||
3046 | break; | ||
3047 | |||
3048 | /* Overflow-checking arithmetic ops. Note: don't use LEA here! */ | ||
3049 | case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break; | ||
3050 | case IR_SUBOV: asm_intarith(as, ir, XOg_SUB); break; | ||
3051 | |||
3052 | /* Memory references. */ | ||
3053 | case IR_AREF: asm_aref(as, ir); break; | ||
3054 | case IR_HREF: asm_href(as, ir); break; | ||
3055 | case IR_HREFK: asm_hrefk(as, ir); break; | ||
3056 | case IR_NEWREF: asm_newref(as, ir); break; | ||
3057 | case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; | ||
3058 | case IR_FREF: asm_fref(as, ir); break; | ||
3059 | case IR_STRREF: asm_strref(as, ir); break; | ||
3060 | |||
3061 | /* Loads and stores. */ | ||
3062 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: asm_ahuload(as, ir); break; | ||
3063 | case IR_FLOAD: asm_fload(as, ir); break; | ||
3064 | case IR_SLOAD: asm_sload(as, ir); break; | ||
3065 | case IR_XLOAD: asm_xload(as, ir); break; | ||
3066 | |||
3067 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; | ||
3068 | case IR_FSTORE: asm_fstore(as, ir); break; | ||
3069 | |||
3070 | /* String ops. */ | ||
3071 | case IR_SNEW: asm_snew(as, ir); break; | ||
3072 | |||
3073 | /* Table ops. */ | ||
3074 | case IR_TNEW: asm_tnew(as, ir); break; | ||
3075 | case IR_TDUP: asm_tdup(as, ir); break; | ||
3076 | case IR_TLEN: asm_tlen(as, ir); break; | ||
3077 | case IR_TBAR: asm_tbar(as, ir); break; | ||
3078 | case IR_OBAR: asm_obar(as, ir); break; | ||
3079 | |||
3080 | /* Type conversions. */ | ||
3081 | case IR_TONUM: asm_tonum(as, ir); break; | ||
3082 | case IR_TOINT: | ||
3083 | if (irt_isguard(ir->t)) | ||
3084 | asm_tointg(as, ir, ra_alloc1(as, ir->op1, RSET_FPR)); | ||
3085 | else | ||
3086 | asm_toint(as, ir); break; | ||
3087 | break; | ||
3088 | case IR_TOBIT: asm_tobit(as, ir); break; | ||
3089 | case IR_TOSTR: asm_tostr(as, ir); break; | ||
3090 | case IR_STRTO: asm_strto(as, ir); break; | ||
3091 | |||
3092 | default: | ||
3093 | setintV(&as->J->errinfo, ir->o); | ||
3094 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
3095 | break; | ||
3096 | } | ||
3097 | } | ||
3098 | |||
3099 | /* Assemble a trace in linear backwards order. */ | ||
3100 | static void asm_trace(ASMState *as) | ||
3101 | { | ||
3102 | for (as->curins--; as->curins > as->stopins; as->curins--) { | ||
3103 | IRIns *ir = IR(as->curins); | ||
3104 | if (irt_isguard(ir->t)) | ||
3105 | asm_snap_prep(as); | ||
3106 | else if (!ra_used(ir) && !irm_sideeff(lj_ir_mode[ir->o]) && | ||
3107 | (as->flags & JIT_F_OPT_DCE)) | ||
3108 | continue; /* Dead-code elimination can be soooo easy. */ | ||
3109 | RA_DBG_REF(); | ||
3110 | checkmclim(as); | ||
3111 | asm_ir(as, ir); | ||
3112 | } | ||
3113 | } | ||
3114 | |||
3115 | /* -- Trace setup --------------------------------------------------------- */ | ||
3116 | |||
3117 | /* Clear reg/sp for all instructions and add register hints. */ | ||
3118 | static void asm_setup_regsp(ASMState *as, Trace *T) | ||
3119 | { | ||
3120 | IRRef i, nins; | ||
3121 | int inloop; | ||
3122 | |||
3123 | /* Clear reg/sp for constants. */ | ||
3124 | for (i = T->nk; i < REF_BIAS; i++) | ||
3125 | IR(i)->prev = REGSP_INIT; | ||
3126 | |||
3127 | /* REF_BASE is used for implicit references to the BASE register. */ | ||
3128 | IR(REF_BASE)->prev = REGSP_HINT(RID_BASE); | ||
3129 | |||
3130 | nins = T->nins; | ||
3131 | if (IR(nins-1)->o == IR_RENAME) { | ||
3132 | do { nins--; } while (IR(nins-1)->o == IR_RENAME); | ||
3133 | T->nins = nins; /* Remove any renames left over from ASM restart. */ | ||
3134 | } | ||
3135 | as->snaprename = nins; | ||
3136 | as->snapref = nins; | ||
3137 | as->snapno = T->nsnap; | ||
3138 | |||
3139 | as->stopins = REF_BASE; | ||
3140 | as->orignins = nins; | ||
3141 | as->curins = nins; | ||
3142 | |||
3143 | inloop = 0; | ||
3144 | for (i = REF_FIRST; i < nins; i++) { | ||
3145 | IRIns *ir = IR(i); | ||
3146 | switch (ir->o) { | ||
3147 | case IR_LOOP: | ||
3148 | inloop = 1; | ||
3149 | break; | ||
3150 | /* Set hints for slot loads from a parent trace. */ | ||
3151 | case IR_SLOAD: | ||
3152 | if ((ir->op2 & IRSLOAD_PARENT)) { | ||
3153 | RegSP rs = as->parentmap[ir->op1]; | ||
3154 | lua_assert(regsp_used(rs)); | ||
3155 | as->stopins = i; | ||
3156 | if (!ra_hasspill(regsp_spill(rs)) && ra_hasreg(regsp_reg(rs))) { | ||
3157 | ir->prev = (uint16_t)REGSP_HINT(regsp_reg(rs)); | ||
3158 | continue; | ||
3159 | } | ||
3160 | } | ||
3161 | break; | ||
3162 | case IR_FRAME: | ||
3163 | if (i == as->stopins+1 && ir->op1 == ir->op2) | ||
3164 | as->stopins++; | ||
3165 | break; | ||
3166 | /* C calls evict all scratch regs and return results in RID_RET. */ | ||
3167 | case IR_SNEW: case IR_TNEW: case IR_TDUP: case IR_TLEN: case IR_TOSTR: | ||
3168 | case IR_NEWREF: | ||
3169 | ir->prev = REGSP_HINT(RID_RET); | ||
3170 | if (inloop) | ||
3171 | as->modset = RSET_SCRATCH; | ||
3172 | continue; | ||
3173 | case IR_STRTO: case IR_OBAR: | ||
3174 | if (inloop) | ||
3175 | as->modset = RSET_SCRATCH; | ||
3176 | break; | ||
3177 | /* Ordered string compares evict all integer scratch registers. */ | ||
3178 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: | ||
3179 | if (irt_isstr(ir->t) && inloop) | ||
3180 | as->modset |= (RSET_SCRATCH & RSET_GPR); | ||
3181 | break; | ||
3182 | /* Non-constant shift counts need to be in RID_ECX. */ | ||
3183 | case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: | ||
3184 | if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) | ||
3185 | IR(ir->op2)->r = REGSP_HINT(RID_ECX); | ||
3186 | break; | ||
3187 | /* Do not propagate hints across type conversions. */ | ||
3188 | case IR_TONUM: case IR_TOINT: case IR_TOBIT: | ||
3189 | break; | ||
3190 | default: | ||
3191 | /* Propagate hints across likely 'op reg, imm' or 'op reg'. */ | ||
3192 | if (irref_isk(ir->op2) && !irref_isk(ir->op1)) { | ||
3193 | ir->prev = IR(ir->op1)->prev; | ||
3194 | continue; | ||
3195 | } | ||
3196 | break; | ||
3197 | } | ||
3198 | ir->prev = REGSP_INIT; | ||
3199 | } | ||
3200 | } | ||
3201 | |||
3202 | /* -- Assembler core ------------------------------------------------------ */ | ||
3203 | |||
3204 | /* Define this if you want to run LuaJIT with Valgrind. */ | ||
3205 | #ifdef LUAJIT_USE_VALGRIND | ||
3206 | #include <valgrind/valgrind.h> | ||
3207 | #define VG_INVALIDATE(p, sz) VALGRIND_DISCARD_TRANSLATIONS(p, sz) | ||
3208 | #else | ||
3209 | #define VG_INVALIDATE(p, sz) ((void)0) | ||
3210 | #endif | ||
3211 | |||
3212 | /* Assemble a trace. */ | ||
3213 | void lj_asm_trace(jit_State *J, Trace *T) | ||
3214 | { | ||
3215 | ASMState as_; | ||
3216 | ASMState *as = &as_; | ||
3217 | |||
3218 | /* Setup initial state. Copy some fields to reduce indirections. */ | ||
3219 | as->J = J; | ||
3220 | as->T = T; | ||
3221 | as->ir = T->ir; | ||
3222 | as->flags = J->flags; | ||
3223 | as->loopref = J->loopref; | ||
3224 | as->realign = NULL; | ||
3225 | as->loopinv = 0; | ||
3226 | if (J->parent) { | ||
3227 | as->parent = J->trace[J->parent]; | ||
3228 | lj_snap_regspmap(as->parentmap, as->parent, J->exitno); | ||
3229 | } else { | ||
3230 | as->parent = NULL; | ||
3231 | } | ||
3232 | as->mctop = lj_mcode_reserve(J, &as->mcbot); /* Reserve MCode memory. */ | ||
3233 | as->mcp = as->mctop; | ||
3234 | as->mclim = as->mcbot + MCLIM_REDZONE; | ||
3235 | asm_exitstub_setup(as, T->nsnap); | ||
3236 | |||
3237 | do { | ||
3238 | as->mcp = as->mctop; | ||
3239 | as->curins = T->nins; | ||
3240 | RA_DBG_START(); | ||
3241 | RA_DBGX((as, "===== STOP =====")); | ||
3242 | /* Realign and leave room for backwards loop branch or exit branch. */ | ||
3243 | if (as->realign) { | ||
3244 | int i = ((int)(intptr_t)as->realign) & 15; | ||
3245 | MCode *p = as->mctop; | ||
3246 | /* Fill unused mcode tail with NOPs to make the prefetcher happy. */ | ||
3247 | while (i-- > 0) | ||
3248 | *--p = XI_NOP; | ||
3249 | as->mctop = p; | ||
3250 | as->mcp = p - (as->loopinv ? 5 : 2); /* Space for short/near jmp. */ | ||
3251 | } else { | ||
3252 | as->mcp = as->mctop - 5; /* Space for exit branch (near jmp). */ | ||
3253 | } | ||
3254 | as->invmcp = as->mcp; | ||
3255 | as->mcloop = NULL; | ||
3256 | as->testmcp = NULL; | ||
3257 | as->topslot = 0; | ||
3258 | as->gcsteps = 0; | ||
3259 | as->sectref = as->loopref; | ||
3260 | as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED; | ||
3261 | |||
3262 | /* Setup register allocation. */ | ||
3263 | ra_setup(as); | ||
3264 | asm_setup_regsp(as, T); | ||
3265 | |||
3266 | if (!as->loopref) { | ||
3267 | /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ | ||
3268 | as->mcp -= (as->flags & JIT_F_LEA_AGU) ? 7 : 6; | ||
3269 | as->invmcp = NULL; | ||
3270 | asm_tail_sync(as); | ||
3271 | } | ||
3272 | asm_trace(as); | ||
3273 | } while (as->realign); /* Retry in case the MCode needs to be realigned. */ | ||
3274 | |||
3275 | RA_DBG_REF(); | ||
3276 | checkmclim(as); | ||
3277 | if (as->gcsteps) | ||
3278 | asm_gc_check(as, &as->T->snap[0]); | ||
3279 | if (!J->parent) | ||
3280 | asm_head_base(as); | ||
3281 | asm_const_remat(as); | ||
3282 | if (J->parent) | ||
3283 | asm_head_side(as); | ||
3284 | else | ||
3285 | asm_head_root(as); | ||
3286 | asm_phi_fixup(as); | ||
3287 | |||
3288 | RA_DBGX((as, "===== START ====")); | ||
3289 | RA_DBG_FLUSH(); | ||
3290 | if (as->freeset != RSET_ALL) | ||
3291 | lj_trace_err(as->J, LJ_TRERR_BADRA); /* Ouch! Should never happen. */ | ||
3292 | |||
3293 | /* Set trace entry point before fixing up tail to allow link to self. */ | ||
3294 | T->mcode = as->mcp; | ||
3295 | T->mcloop = as->mcloop ? (MSize)(as->mcloop - as->mcp) : 0; | ||
3296 | if (!as->loopref) | ||
3297 | asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ | ||
3298 | T->szmcode = (MSize)(as->mctop - as->mcp); | ||
3299 | VG_INVALIDATE(T->mcode, T->szmcode); | ||
3300 | } | ||
3301 | |||
3302 | /* Patch exit jumps of existing machine code to a new target. */ | ||
3303 | void lj_asm_patchexit(jit_State *J, Trace *T, ExitNo exitno, MCode *target) | ||
3304 | { | ||
3305 | MCode *p = T->mcode; | ||
3306 | MCode *mcarea = lj_mcode_patch(J, p, 0); | ||
3307 | MSize len = T->szmcode; | ||
3308 | MCode *px = exitstub_addr(J, exitno) - 6; | ||
3309 | MCode *pe = p+len-6; | ||
3310 | if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px) | ||
3311 | *(int32_t *)(p+len-4) = (int32_t)(target - (p+len)); | ||
3312 | for (; p < pe; p++) { | ||
3313 | if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px) { | ||
3314 | *(int32_t *)(p+2) = (int32_t)(target - (p+6)); | ||
3315 | p += 5; | ||
3316 | } | ||
3317 | } | ||
3318 | lj_mcode_patch(J, mcarea, 1); | ||
3319 | VG_INVALIDATE(T->mcode, T->szmcode); | ||
3320 | } | ||
3321 | |||
3322 | #undef IR | ||
3323 | |||
3324 | #endif | ||
diff --git a/src/lj_asm.h b/src/lj_asm.h new file mode 100644 index 00000000..84122b43 --- /dev/null +++ b/src/lj_asm.h | |||
@@ -0,0 +1,17 @@ | |||
1 | /* | ||
2 | ** IR assembler (SSA IR -> machine code). | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_ASM_H | ||
7 | #define _LJ_ASM_H | ||
8 | |||
9 | #include "lj_jit.h" | ||
10 | |||
11 | #if LJ_HASJIT | ||
12 | LJ_FUNC void lj_asm_trace(jit_State *J, Trace *T); | ||
13 | LJ_FUNC void lj_asm_patchexit(jit_State *J, Trace *T, ExitNo exitno, | ||
14 | MCode *target); | ||
15 | #endif | ||
16 | |||
17 | #endif | ||
diff --git a/src/lj_bc.c b/src/lj_bc.c new file mode 100644 index 00000000..79846325 --- /dev/null +++ b/src/lj_bc.c | |||
@@ -0,0 +1,17 @@ | |||
1 | /* | ||
2 | ** Bytecode instruction modes. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #define lj_bc_c | ||
7 | #define LUA_CORE | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | #include "lj_bc.h" | ||
11 | |||
12 | /* Bytecode instruction modes. */ | ||
13 | LJ_DATADEF const uint16_t lj_bc_mode[BC__MAX+1] = { | ||
14 | BCDEF(BCMODE) | ||
15 | 0 | ||
16 | }; | ||
17 | |||
diff --git a/src/lj_bc.h b/src/lj_bc.h new file mode 100644 index 00000000..d906011c --- /dev/null +++ b/src/lj_bc.h | |||
@@ -0,0 +1,235 @@ | |||
1 | /* | ||
2 | ** Bytecode instruction format. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_BC_H | ||
7 | #define _LJ_BC_H | ||
8 | |||
9 | #include "lj_def.h" | ||
10 | #include "lj_arch.h" | ||
11 | |||
12 | /* Bytecode instruction format, 32 bit wide, fields of 8 or 16 bit: | ||
13 | ** | ||
14 | ** +----+----+----+----+ | ||
15 | ** | B | C | A | OP | Format ABC | ||
16 | ** +----+----+----+----+ | ||
17 | ** | D | A | OP | Format AD | ||
18 | ** +-------------------- | ||
19 | ** MSB LSB | ||
20 | ** | ||
21 | ** In-memory instructions are always stored in host byte order. | ||
22 | */ | ||
23 | |||
24 | /* Operand ranges and related constants. */ | ||
25 | #define BCMAX_A 0xff | ||
26 | #define BCMAX_B 0xff | ||
27 | #define BCMAX_C 0xff | ||
28 | #define BCMAX_D 0xffff | ||
29 | #define BCBIAS_J 0x8000 | ||
30 | #define NO_REG BCMAX_A | ||
31 | #define NO_JMP (~(BCPos)0) | ||
32 | |||
33 | /* Macros to get instruction fields. */ | ||
34 | #define bc_op(i) (cast(BCOp, (i)&0xff)) | ||
35 | #define bc_a(i) (cast(BCReg, ((i)>>8)&0xff)) | ||
36 | #define bc_b(i) (cast(BCReg, (i)>>24)) | ||
37 | #define bc_c(i) (cast(BCReg, ((i)>>16)&0xff)) | ||
38 | #define bc_d(i) (cast(BCReg, (i)>>16)) | ||
39 | #define bc_j(i) ((ptrdiff_t)bc_d(i)-BCBIAS_J) | ||
40 | |||
41 | /* Macros to set instruction fields. */ | ||
42 | #define setbc_byte(p, x, ofs) \ | ||
43 | ((uint8_t *)(p))[LJ_ENDIAN_SELECT(ofs, 3-ofs)] = cast_byte(x) | ||
44 | #define setbc_op(p, x) setbc_byte(p, (x), 0) | ||
45 | #define setbc_a(p, x) setbc_byte(p, (x), 1) | ||
46 | #define setbc_b(p, x) setbc_byte(p, (x), 3) | ||
47 | #define setbc_c(p, x) setbc_byte(p, (x), 2) | ||
48 | #define setbc_d(p, x) \ | ||
49 | ((uint16_t *)(p))[LJ_ENDIAN_SELECT(1, 0)] = cast(uint16_t, (x)) | ||
50 | #define setbc_j(p, x) setbc_d(p, (BCPos)((int32_t)(x)+BCBIAS_J)) | ||
51 | |||
52 | /* Macros to compose instructions. */ | ||
53 | #define BCINS_ABC(o, a, b, c) \ | ||
54 | (cast(BCIns, o)|(cast(BCIns, a)<<8)|\ | ||
55 | (cast(BCIns, b)<<24)|(cast(BCIns, c)<<16)) | ||
56 | #define BCINS_AD(o, a, d) \ | ||
57 | (cast(BCIns, o)|(cast(BCIns, a)<<8)|(cast(BCIns, d)<<16)) | ||
58 | #define BCINS_AJ(o, a, j) BCINS_AD(o, a, (BCPos)((int32_t)(j)+BCBIAS_J)) | ||
59 | |||
60 | /* Bytecode instruction definition. Order matters, see below. | ||
61 | ** | ||
62 | ** (name, filler, Amode, Bmode, Cmode or Dmode, metamethod) | ||
63 | ** | ||
64 | ** The opcode name suffixes specify the type for RB/RC or RD: | ||
65 | ** V = variable slot | ||
66 | ** S = string const | ||
67 | ** N = number const | ||
68 | ** P = primitive type (~itype) | ||
69 | ** B = unsigned byte literal | ||
70 | ** M = multiple args/results | ||
71 | */ | ||
72 | #define BCDEF(_) \ | ||
73 | /* Comparison ops. ORDER OPR. */ \ | ||
74 | _(ISLT, var, ___, var, lt) \ | ||
75 | _(ISGE, var, ___, var, lt) \ | ||
76 | _(ISLE, var, ___, var, le) \ | ||
77 | _(ISGT, var, ___, var, le) \ | ||
78 | \ | ||
79 | _(ISEQV, var, ___, var, eq) \ | ||
80 | _(ISNEV, var, ___, var, eq) \ | ||
81 | _(ISEQS, var, ___, str, eq) \ | ||
82 | _(ISNES, var, ___, str, eq) \ | ||
83 | _(ISEQN, var, ___, num, eq) \ | ||
84 | _(ISNEN, var, ___, num, eq) \ | ||
85 | _(ISEQP, var, ___, pri, eq) \ | ||
86 | _(ISNEP, var, ___, pri, eq) \ | ||
87 | \ | ||
88 | /* Unary test and copy ops. */ \ | ||
89 | _(ISTC, dst, ___, var, ___) \ | ||
90 | _(ISFC, dst, ___, var, ___) \ | ||
91 | _(IST, ___, ___, var, ___) \ | ||
92 | _(ISF, ___, ___, var, ___) \ | ||
93 | \ | ||
94 | /* Unary ops. */ \ | ||
95 | _(MOV, dst, ___, var, ___) \ | ||
96 | _(NOT, dst, ___, var, ___) \ | ||
97 | _(UNM, dst, ___, var, unm) \ | ||
98 | _(LEN, dst, ___, var, len) \ | ||
99 | \ | ||
100 | /* Binary ops. ORDER OPR. VV last, POW must be next. */ \ | ||
101 | _(ADDVN, dst, var, num, add) \ | ||
102 | _(SUBVN, dst, var, num, sub) \ | ||
103 | _(MULVN, dst, var, num, mul) \ | ||
104 | _(DIVVN, dst, var, num, div) \ | ||
105 | _(MODVN, dst, var, num, mod) \ | ||
106 | \ | ||
107 | _(ADDNV, dst, var, num, add) \ | ||
108 | _(SUBNV, dst, var, num, sub) \ | ||
109 | _(MULNV, dst, var, num, mul) \ | ||
110 | _(DIVNV, dst, var, num, div) \ | ||
111 | _(MODNV, dst, var, num, mod) \ | ||
112 | \ | ||
113 | _(ADDVV, dst, var, var, add) \ | ||
114 | _(SUBVV, dst, var, var, sub) \ | ||
115 | _(MULVV, dst, var, var, mul) \ | ||
116 | _(DIVVV, dst, var, var, div) \ | ||
117 | _(MODVV, dst, var, var, mod) \ | ||
118 | \ | ||
119 | _(POW, dst, var, var, pow) \ | ||
120 | _(CAT, dst, rbase, rbase, concat) \ | ||
121 | \ | ||
122 | /* Constant ops. */ \ | ||
123 | _(KSTR, dst, ___, str, ___) \ | ||
124 | _(KSHORT, dst, ___, lits, ___) \ | ||
125 | _(KNUM, dst, ___, num, ___) \ | ||
126 | _(KPRI, dst, ___, pri, ___) \ | ||
127 | _(KNIL, base, ___, base, ___) \ | ||
128 | \ | ||
129 | /* Upvalue and function ops. */ \ | ||
130 | _(UGET, dst, ___, uv, ___) \ | ||
131 | _(USETV, uv, ___, var, ___) \ | ||
132 | _(USETS, uv, ___, str, ___) \ | ||
133 | _(USETN, uv, ___, num, ___) \ | ||
134 | _(USETP, uv, ___, pri, ___) \ | ||
135 | _(UCLO, rbase, ___, jump, ___) \ | ||
136 | _(FNEW, dst, ___, func, gc) \ | ||
137 | \ | ||
138 | /* Table ops. */ \ | ||
139 | _(TNEW, dst, ___, lit, gc) \ | ||
140 | _(TDUP, dst, ___, tab, gc) \ | ||
141 | _(GGET, dst, ___, str, index) \ | ||
142 | _(GSET, var, ___, str, newindex) \ | ||
143 | _(TGETV, dst, var, var, index) \ | ||
144 | _(TGETS, dst, var, str, index) \ | ||
145 | _(TGETB, dst, var, lit, index) \ | ||
146 | _(TSETV, var, var, var, newindex) \ | ||
147 | _(TSETS, var, var, str, newindex) \ | ||
148 | _(TSETB, var, var, lit, newindex) \ | ||
149 | _(TSETM, base, ___, num, newindex) \ | ||
150 | \ | ||
151 | /* Calls and vararg handling. T = tail call. */ \ | ||
152 | _(CALLM, base, lit, lit, call) \ | ||
153 | _(CALL, base, lit, lit, call) \ | ||
154 | _(CALLMT, base, ___, lit, call) \ | ||
155 | _(CALLT, base, ___, lit, call) \ | ||
156 | _(ITERC, base, lit, lit, call) \ | ||
157 | _(VARG, base, lit, lit, ___) \ | ||
158 | \ | ||
159 | /* Returns. */ \ | ||
160 | _(RETM, base, ___, lit, ___) \ | ||
161 | _(RET, rbase, ___, lit, ___) \ | ||
162 | _(RET0, rbase, ___, lit, ___) \ | ||
163 | _(RET1, rbase, ___, lit, ___) \ | ||
164 | \ | ||
165 | /* Loops and branches. I/J = interp/JIT, I/C/L = init/call/loop. */ \ | ||
166 | _(FORI, base, ___, jump, ___) \ | ||
167 | _(JFORI, base, ___, jump, ___) \ | ||
168 | \ | ||
169 | _(FORL, base, ___, jump, ___) \ | ||
170 | _(IFORL, base, ___, jump, ___) \ | ||
171 | _(JFORL, base, ___, lit, ___) \ | ||
172 | \ | ||
173 | _(ITERL, base, ___, jump, ___) \ | ||
174 | _(IITERL, base, ___, jump, ___) \ | ||
175 | _(JITERL, base, ___, lit, ___) \ | ||
176 | \ | ||
177 | _(LOOP, rbase, ___, jump, ___) \ | ||
178 | _(ILOOP, rbase, ___, jump, ___) \ | ||
179 | _(JLOOP, rbase, ___, lit, ___) \ | ||
180 | \ | ||
181 | _(JMP, rbase, ___, jump, ___) | ||
182 | |||
183 | /* Bytecode opcode numbers. */ | ||
184 | typedef enum { | ||
185 | #define BCENUM(name, ma, mb, mc, mt) BC_##name, | ||
186 | BCDEF(BCENUM) | ||
187 | #undef BCENUM | ||
188 | BC__MAX | ||
189 | } BCOp; | ||
190 | |||
191 | LJ_STATIC_ASSERT((int)BC_ISEQV+1 == (int)BC_ISNEV); | ||
192 | LJ_STATIC_ASSERT(((int)BC_ISEQV^1) == (int)BC_ISNEV); | ||
193 | LJ_STATIC_ASSERT(((int)BC_ISEQS^1) == (int)BC_ISNES); | ||
194 | LJ_STATIC_ASSERT(((int)BC_ISEQN^1) == (int)BC_ISNEN); | ||
195 | LJ_STATIC_ASSERT(((int)BC_ISEQP^1) == (int)BC_ISNEP); | ||
196 | LJ_STATIC_ASSERT(((int)BC_ISLT^1) == (int)BC_ISGE); | ||
197 | LJ_STATIC_ASSERT(((int)BC_ISLE^1) == (int)BC_ISGT); | ||
198 | LJ_STATIC_ASSERT(((int)BC_ISLT^3) == (int)BC_ISGT); | ||
199 | LJ_STATIC_ASSERT((int)BC_IST-(int)BC_ISTC == (int)BC_ISF-(int)BC_ISFC); | ||
200 | LJ_STATIC_ASSERT((int)BC_CALLT-(int)BC_CALL == (int)BC_CALLMT-(int)BC_CALLM); | ||
201 | LJ_STATIC_ASSERT((int)BC_CALLMT + 1 == (int)BC_CALLT); | ||
202 | LJ_STATIC_ASSERT((int)BC_RETM + 1 == (int)BC_RET); | ||
203 | LJ_STATIC_ASSERT((int)BC_FORL + 1 == (int)BC_IFORL); | ||
204 | LJ_STATIC_ASSERT((int)BC_FORL + 2 == (int)BC_JFORL); | ||
205 | LJ_STATIC_ASSERT((int)BC_ITERL + 1 == (int)BC_IITERL); | ||
206 | LJ_STATIC_ASSERT((int)BC_ITERL + 2 == (int)BC_JITERL); | ||
207 | LJ_STATIC_ASSERT((int)BC_LOOP + 1 == (int)BC_ILOOP); | ||
208 | LJ_STATIC_ASSERT((int)BC_LOOP + 2 == (int)BC_JLOOP); | ||
209 | |||
210 | /* Stack slots used by FORI/FORL, relative to operand A. */ | ||
211 | enum { | ||
212 | FORL_IDX, FORL_STOP, FORL_STEP, FORL_EXT | ||
213 | }; | ||
214 | |||
215 | /* Bytecode operand modes. ORDER BCMode */ | ||
216 | typedef enum { | ||
217 | BCMnone, BCMdst, BCMbase, BCMvar, BCMrbase, BCMuv, /* Mode A must be <= 7 */ | ||
218 | BCMlit, BCMlits, BCMpri, BCMnum, BCMstr, BCMtab, BCMfunc, BCMjump, | ||
219 | BCM_max | ||
220 | } BCMode; | ||
221 | #define BCM___ BCMnone | ||
222 | |||
223 | #define bcmode_a(op) (cast(BCMode, lj_bc_mode[op] & 7)) | ||
224 | #define bcmode_b(op) (cast(BCMode, (lj_bc_mode[op]>>3) & 15)) | ||
225 | #define bcmode_c(op) (cast(BCMode, (lj_bc_mode[op]>>7) & 15)) | ||
226 | #define bcmode_d(op) bcmode_c(op) | ||
227 | #define bcmode_hasd(op) ((lj_bc_mode[op] & (15<<3)) == (BCMnone<<3)) | ||
228 | #define bcmode_mm(op) (cast(MMS, lj_bc_mode[op]>>11)) | ||
229 | |||
230 | #define BCMODE(name, ma, mb, mc, mm) \ | ||
231 | (BCM##ma|(BCM##mb<<3)|(BCM##mc<<7)|(MM_##mm<<11)), | ||
232 | |||
233 | LJ_DATA const uint16_t lj_bc_mode[BC__MAX+1]; | ||
234 | |||
235 | #endif | ||
diff --git a/src/lj_ctype.c b/src/lj_ctype.c new file mode 100644 index 00000000..9f19b879 --- /dev/null +++ b/src/lj_ctype.c | |||
@@ -0,0 +1,44 @@ | |||
1 | /* | ||
2 | ** Internal CTYPE replacement. | ||
3 | ** Donated to the public domain. | ||
4 | ** | ||
5 | ** This is intended to replace the problematic libc single-byte NLS functions. | ||
6 | ** These just don't make sense anymore with UTF-8 locales becoming the norm | ||
7 | ** on POSIX systems. It never worked too well on Windows systems since hardly | ||
8 | ** anyone bothered to call setlocale(). | ||
9 | ** | ||
10 | ** Instead this table is hardcoded for ASCII, except for identifiers. These | ||
11 | ** include the characters 128-255, too. This allows for the use of all | ||
12 | ** non-ASCII chars as identifiers in the lexer. This is a broad definition, | ||
13 | ** but works well in practice for both UTF-8 locales and most single-byte | ||
14 | ** locales (such as ISO-8859-*). | ||
15 | ** | ||
16 | ** If you really need proper ctypes for UTF-8 strings, please use an add-on | ||
17 | ** library such as slnunicode: http://luaforge.net/projects/sln/ | ||
18 | */ | ||
19 | |||
20 | #define lj_ctype_c | ||
21 | #define LUA_CORE | ||
22 | |||
23 | #include "lj_ctype.h" | ||
24 | |||
25 | LJ_DATADEF const uint8_t lj_ctype_bits[257] = { | ||
26 | 0, | ||
27 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 1, 1, | ||
28 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
29 | 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | ||
30 | 152,152,152,152,152,152,152,152,152,152, 4, 4, 4, 4, 4, 4, | ||
31 | 4,176,176,176,176,176,176,160,160,160,160,160,160,160,160,160, | ||
32 | 160,160,160,160,160,160,160,160,160,160,160, 4, 4, 4, 4,132, | ||
33 | 4,208,208,208,208,208,208,192,192,192,192,192,192,192,192,192, | ||
34 | 192,192,192,192,192,192,192,192,192,192,192, 4, 4, 4, 4, 1, | ||
35 | 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, | ||
36 | 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, | ||
37 | 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, | ||
38 | 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, | ||
39 | 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, | ||
40 | 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, | ||
41 | 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, | ||
42 | 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128 | ||
43 | }; | ||
44 | |||
diff --git a/src/lj_ctype.h b/src/lj_ctype.h new file mode 100644 index 00000000..c4cdff84 --- /dev/null +++ b/src/lj_ctype.h | |||
@@ -0,0 +1,40 @@ | |||
1 | /* | ||
2 | ** Internal CTYPE replacement. | ||
3 | ** Donated to the public domain. | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_CTYPE_H | ||
7 | #define _LJ_CTYPE_H | ||
8 | |||
9 | #include "lj_def.h" | ||
10 | |||
11 | #define LJ_CTYPE_CNTRL 0x01 | ||
12 | #define LJ_CTYPE_SPACE 0x02 | ||
13 | #define LJ_CTYPE_PUNCT 0x04 | ||
14 | #define LJ_CTYPE_DIGIT 0x08 | ||
15 | #define LJ_CTYPE_XDIGIT 0x10 | ||
16 | #define LJ_CTYPE_UPPER 0x20 | ||
17 | #define LJ_CTYPE_LOWER 0x40 | ||
18 | #define LJ_CTYPE_IDENT 0x80 | ||
19 | #define LJ_CTYPE_ALPHA (LJ_CTYPE_LOWER|LJ_CTYPE_UPPER) | ||
20 | #define LJ_CTYPE_ALNUM (LJ_CTYPE_ALPHA|LJ_CTYPE_DIGIT) | ||
21 | |||
22 | /* Only pass -1 or 0..255 to these macros. Never pass a signed char! */ | ||
23 | #define lj_ctype_isa(c, t) (lj_ctype_bits[(c)+1] & t) | ||
24 | #define lj_ctype_iscntrl(c) lj_ctype_isa((c), LJ_CTYPE_CNTRL) | ||
25 | #define lj_ctype_isspace(c) lj_ctype_isa((c), LJ_CTYPE_SPACE) | ||
26 | #define lj_ctype_ispunct(c) lj_ctype_isa((c), LJ_CTYPE_PUNCT) | ||
27 | #define lj_ctype_isdigit(c) lj_ctype_isa((c), LJ_CTYPE_DIGIT) | ||
28 | #define lj_ctype_isxdigit(c) lj_ctype_isa((c), LJ_CTYPE_XDIGIT) | ||
29 | #define lj_ctype_isupper(c) lj_ctype_isa((c), LJ_CTYPE_UPPER) | ||
30 | #define lj_ctype_islower(c) lj_ctype_isa((c), LJ_CTYPE_LOWER) | ||
31 | #define lj_ctype_isident(c) lj_ctype_isa((c), LJ_CTYPE_IDENT) | ||
32 | #define lj_ctype_isalpha(c) lj_ctype_isa((c), LJ_CTYPE_ALPHA) | ||
33 | #define lj_ctype_isalnum(c) lj_ctype_isa((c), LJ_CTYPE_ALNUM) | ||
34 | |||
35 | #define lj_ctype_toupper(c) ((c) - (lj_ctype_islower(c) >> 1)) | ||
36 | #define lj_ctype_tolower(c) ((c) + lj_ctype_isupper(c)) | ||
37 | |||
38 | LJ_DATA const uint8_t lj_ctype_bits[257]; | ||
39 | |||
40 | #endif | ||
diff --git a/src/lj_def.h b/src/lj_def.h new file mode 100644 index 00000000..dbfd5bf5 --- /dev/null +++ b/src/lj_def.h | |||
@@ -0,0 +1,226 @@ | |||
1 | /* | ||
2 | ** LuaJIT common internal definitions. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_DEF_H | ||
7 | #define _LJ_DEF_H | ||
8 | |||
9 | #include "lua.h" | ||
10 | |||
11 | #ifdef _MSC_VER | ||
12 | /* MSVC is stuck in the last century and doesn't have C99's stdint.h. */ | ||
13 | typedef __int8 int8_t; | ||
14 | typedef __int16 int16_t; | ||
15 | typedef __int32 int32_t; | ||
16 | typedef __int64 int64_t; | ||
17 | typedef unsigned __int8 uint8_t; | ||
18 | typedef unsigned __int16 uint16_t; | ||
19 | typedef unsigned __int32 uint32_t; | ||
20 | typedef unsigned __int64 uint64_t; | ||
21 | #ifdef _WIN64 | ||
22 | typedef __int64 intptr_t; | ||
23 | typedef unsigned __int64 uintptr_t; | ||
24 | #else | ||
25 | typedef __int32 intptr_t; | ||
26 | typedef unsigned __int32 uintptr_t; | ||
27 | #endif | ||
28 | #else | ||
29 | #include <stdint.h> | ||
30 | #endif | ||
31 | |||
32 | /* Needed everywhere. */ | ||
33 | #include <string.h> | ||
34 | #include <stdlib.h> | ||
35 | |||
36 | /* Various VM limits. */ | ||
37 | #define LJ_MAX_MEM 0x7fffff00 /* Max. total memory allocation. */ | ||
38 | #define LJ_MAX_ALLOC LJ_MAX_MEM /* Max. individual allocation length. */ | ||
39 | #define LJ_MAX_STR LJ_MAX_MEM /* Max. string length. */ | ||
40 | #define LJ_MAX_UDATA LJ_MAX_MEM /* Max. userdata length. */ | ||
41 | |||
42 | #define LJ_MAX_STRTAB (1<<26) /* Max. string table size. */ | ||
43 | #define LJ_MAX_HBITS 26 /* Max. hash bits. */ | ||
44 | #define LJ_MAX_ABITS 28 /* Max. bits of array key. */ | ||
45 | #define LJ_MAX_ASIZE ((1<<(LJ_MAX_ABITS-1))+1) /* Max. array part size. */ | ||
46 | #define LJ_MAX_COLOSIZE 16 /* Max. elems for colocated array. */ | ||
47 | |||
48 | #define LJ_MAX_LINE LJ_MAX_MEM /* Max. source code line number. */ | ||
49 | #define LJ_MAX_XLEVEL 200 /* Max. syntactic nesting level. */ | ||
50 | #define LJ_MAX_BCINS (1<<26) /* Max. # of bytecode instructions. */ | ||
51 | #define LJ_MAX_SLOTS 250 /* Max. # of slots in a Lua func. */ | ||
52 | #define LJ_MAX_LOCVAR 200 /* Max. # of local variables. */ | ||
53 | #define LJ_MAX_UPVAL 60 /* Max. # of upvalues. */ | ||
54 | |||
55 | #define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */ | ||
56 | #define LJ_STACK_EXTRA 5 /* Extra stack space (metamethods). */ | ||
57 | |||
58 | /* Minimum table/buffer sizes. */ | ||
59 | #define LJ_MIN_GLOBAL 6 /* Min. global table size (hbits). */ | ||
60 | #define LJ_MIN_REGISTRY 2 /* Min. registry size (hbits). */ | ||
61 | #define LJ_MIN_STRTAB 256 /* Min. string table size (pow2). */ | ||
62 | #define LJ_MIN_SBUF 32 /* Min. string buffer length. */ | ||
63 | #define LJ_MIN_VECSZ 8 /* Min. size for growable vectors. */ | ||
64 | #define LJ_MIN_IRSZ 32 /* Min. size for growable IR. */ | ||
65 | #define LJ_MIN_KNUMSZ 16 /* Min. size for chained KNUM array. */ | ||
66 | |||
67 | /* JIT compiler limits. */ | ||
68 | #define LJ_MAX_JSLOTS 250 /* Max. # of stack slots for a trace. */ | ||
69 | #define LJ_MAX_PHI 32 /* Max. # of PHIs for a loop. */ | ||
70 | #define LJ_MAX_EXITSTUBGR 8 /* Max. # of exit stub groups. */ | ||
71 | |||
72 | /* Various macros. */ | ||
73 | #ifndef UNUSED | ||
74 | #define UNUSED(x) ((void)(x)) /* to avoid warnings */ | ||
75 | #endif | ||
76 | |||
77 | #ifndef cast | ||
78 | #define cast(t, exp) ((t)(exp)) | ||
79 | #endif | ||
80 | |||
81 | #define U64x(hi, lo) (((uint64_t)0x##hi << 32) + (uint64_t)0x##lo) | ||
82 | #define cast_byte(i) cast(uint8_t, (i)) | ||
83 | #define cast_num(i) cast(lua_Number, (i)) | ||
84 | #define cast_int(i) cast(int, (i)) | ||
85 | #define i32ptr(p) ((int32_t)(intptr_t)(void *)(p)) | ||
86 | #define u32ptr(p) ((uint32_t)(intptr_t)(void *)(p)) | ||
87 | |||
88 | #define checki8(x) ((x) == (int32_t)(int8_t)(x)) | ||
89 | #define checku8(x) ((x) == (int32_t)(uint8_t)(x)) | ||
90 | #define checki16(x) ((x) == (int32_t)(int16_t)(x)) | ||
91 | |||
92 | /* Every half-decent C compiler transforms this into a rotate instruction. */ | ||
93 | #define lj_rol(x, n) (((x)<<(n)) | ((x)>>(32-(n)))) | ||
94 | #define lj_ror(x, n) (((x)<<(32-(n))) | ((x)>>(n))) | ||
95 | |||
96 | /* A really naive Bloom filter. But sufficient for our needs. */ | ||
97 | typedef uintptr_t BloomFilter; | ||
98 | #define BLOOM_MASK (8*sizeof(BloomFilter) - 1) | ||
99 | #define bloombit(x) ((uintptr_t)1 << ((x) & BLOOM_MASK)) | ||
100 | #define bloomset(b, x) ((b) |= bloombit((x))) | ||
101 | #define bloomtest(b, x) ((b) & bloombit((x))) | ||
102 | |||
103 | #if defined(__GNUC__) | ||
104 | |||
105 | #if (__GNUC__ < 3) || ((__GNUC__ == 3) && __GNUC_MINOR__ < 4) | ||
106 | #error "sorry, need GCC 3.4 or newer" | ||
107 | #endif | ||
108 | |||
109 | #define LJ_NORET __attribute__((noreturn)) | ||
110 | #define LJ_ALIGN(n) __attribute__((aligned(n))) | ||
111 | #define LJ_INLINE inline | ||
112 | #define LJ_AINLINE inline __attribute__((always_inline)) | ||
113 | #define LJ_NOINLINE __attribute__((noinline)) | ||
114 | |||
115 | #if defined(__ELF__) || defined(__MACH__) | ||
116 | #define LJ_NOAPI extern __attribute__((visibility("hidden"))) | ||
117 | #endif | ||
118 | |||
119 | /* Note: it's only beneficial to use fastcall on x86 and then only for up to | ||
120 | ** two non-FP args. The amalgamated compile covers all LJ_FUNC cases. Only | ||
121 | ** indirect calls and related tail-called C functions are marked as fastcall. | ||
122 | */ | ||
123 | #if defined(__i386__) | ||
124 | #define LJ_FASTCALL __attribute__((fastcall)) | ||
125 | #endif | ||
126 | |||
127 | #define LJ_LIKELY(x) __builtin_expect(!!(x), 1) | ||
128 | #define LJ_UNLIKELY(x) __builtin_expect(!!(x), 0) | ||
129 | |||
130 | #define lj_ffs(x) ((uint32_t)__builtin_ctz(x)) | ||
131 | /* Don't ask ... */ | ||
132 | #if defined(__INTEL_COMPILER) && (defined(__i386__) || defined(__x86_64__)) | ||
133 | static LJ_AINLINE uint32_t lj_fls(uint32_t x) | ||
134 | { | ||
135 | uint32_t r; __asm__("bsrl %1, %0" : "=r" (r) : "rm" (x) : "cc"); return r; | ||
136 | } | ||
137 | #else | ||
138 | #define lj_fls(x) ((uint32_t)(__builtin_clz(x)^31)) | ||
139 | #endif | ||
140 | |||
141 | #if defined(__i386__) || defined(__x86_64__) | ||
142 | static LJ_AINLINE uint32_t lj_bswap(uint32_t x) | ||
143 | { | ||
144 | uint32_t r; __asm__("bswap %0" : "=r" (r) : "0" (x)); return r; | ||
145 | } | ||
146 | #else | ||
147 | #error "missing define for lj_bswap()" | ||
148 | #endif | ||
149 | |||
150 | #elif defined(_MSC_VER) | ||
151 | |||
152 | #define LJ_NORET __declspec(noreturn) | ||
153 | #define LJ_ALIGN(n) __declspec(align(n)) | ||
154 | #define LJ_INLINE __inline | ||
155 | #define LJ_AINLINE __forceinline | ||
156 | #define LJ_NOINLINE __declspec(noinline) | ||
157 | #if defined(_M_IX86) | ||
158 | #define LJ_FASTCALL __fastcall | ||
159 | #endif | ||
160 | |||
161 | static LJ_AINLINE uint32_t lj_ffs(uint32_t x) | ||
162 | { | ||
163 | uint32_t r; _BitScanForward(&r, x); return r; | ||
164 | } | ||
165 | |||
166 | static LJ_AINLINE uint32_t lj_fls(uint32_t x) | ||
167 | { | ||
168 | uint32_t r; _BitScanReverse(&r, x); return r; | ||
169 | } | ||
170 | |||
171 | #define lj_bswap(x) (_byteswap_ulong((x))) | ||
172 | |||
173 | #else | ||
174 | #error "missing defines for your compiler" | ||
175 | #endif | ||
176 | |||
177 | /* Optional defines. */ | ||
178 | #ifndef LJ_FASTCALL | ||
179 | #define LJ_FASTCALL | ||
180 | #endif | ||
181 | #ifndef LJ_NORET | ||
182 | #define LJ_NORET | ||
183 | #endif | ||
184 | #ifndef LJ_NOAPI | ||
185 | #define LJ_NOAPI extern | ||
186 | #endif | ||
187 | #ifndef LJ_LIKELY | ||
188 | #define LJ_LIKELY(x) (x) | ||
189 | #define LJ_UNLIKELY(x) (x) | ||
190 | #endif | ||
191 | |||
192 | /* Attributes for internal functions. */ | ||
193 | #if defined(ljamalg_c) | ||
194 | #define LJ_DATA static | ||
195 | #define LJ_DATADEF static | ||
196 | #define LJ_FUNC static | ||
197 | #define LJ_ASMF LJ_NOAPI | ||
198 | #define LJ_FUNCA LJ_NOAPI | ||
199 | #else | ||
200 | #define LJ_DATA LJ_NOAPI | ||
201 | #define LJ_DATADEF | ||
202 | #define LJ_FUNC LJ_NOAPI | ||
203 | #define LJ_ASMF LJ_NOAPI | ||
204 | #define LJ_FUNCA LJ_NOAPI | ||
205 | #endif | ||
206 | #define LJ_FUNC_NORET LJ_FUNC LJ_NORET | ||
207 | #define LJ_FUNCA_NORET LJ_FUNCA LJ_NORET | ||
208 | #define LJ_ASMF_NORET LJ_ASMF LJ_NORET | ||
209 | |||
210 | /* Runtime assertions. */ | ||
211 | #ifdef lua_assert | ||
212 | #define check_exp(c, e) (lua_assert(c), (e)) | ||
213 | #define api_check(l, e) lua_assert(e) | ||
214 | #else | ||
215 | #define lua_assert(c) ((void)0) | ||
216 | #define check_exp(c, e) (e) | ||
217 | #define api_check luai_apicheck | ||
218 | #endif | ||
219 | |||
220 | /* Static assertions. */ | ||
221 | #define LJ_ASSERT_NAME2(name, line) name ## line | ||
222 | #define LJ_ASSERT_NAME(line) LJ_ASSERT_NAME2(lj_assert_, line) | ||
223 | #define LJ_STATIC_ASSERT(cond) \ | ||
224 | extern void LJ_ASSERT_NAME(__LINE__)(int STATIC_ASSERTION_FAILED[(cond)?1:-1]) | ||
225 | |||
226 | #endif | ||
diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c new file mode 100644 index 00000000..d2fce2e0 --- /dev/null +++ b/src/lj_dispatch.c | |||
@@ -0,0 +1,284 @@ | |||
1 | /* | ||
2 | ** Instruction dispatch handling. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #define lj_dispatch_c | ||
7 | #define LUA_CORE | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | #include "lj_err.h" | ||
11 | #include "lj_state.h" | ||
12 | #include "lj_frame.h" | ||
13 | #include "lj_bc.h" | ||
14 | #if LJ_HASJIT | ||
15 | #include "lj_jit.h" | ||
16 | #endif | ||
17 | #include "lj_trace.h" | ||
18 | #include "lj_dispatch.h" | ||
19 | #include "lj_vm.h" | ||
20 | #include "luajit.h" | ||
21 | |||
22 | /* -- Dispatch table management ------------------------------------------- */ | ||
23 | |||
24 | /* Initialize instruction dispatch table and hot counters. */ | ||
25 | void lj_dispatch_init(GG_State *GG) | ||
26 | { | ||
27 | uint32_t i; | ||
28 | ASMFunction *disp = GG->dispatch; | ||
29 | for (i = 0; i < BC__MAX; i++) | ||
30 | disp[GG_DISP_STATIC+i] = disp[i] = makeasmfunc(lj_vm_op_ofs[i]); | ||
31 | /* The JIT engine is off by default. luaopen_jit() turns it on. */ | ||
32 | disp[BC_FORL] = disp[BC_IFORL]; | ||
33 | disp[BC_ITERL] = disp[BC_IITERL]; | ||
34 | disp[BC_LOOP] = disp[BC_ILOOP]; | ||
35 | } | ||
36 | |||
37 | /* Update dispatch table depending on various flags. */ | ||
38 | void lj_dispatch_update(global_State *g) | ||
39 | { | ||
40 | uint8_t oldmode = g->dispatchmode; | ||
41 | uint8_t mode = 0; | ||
42 | #if LJ_HASJIT | ||
43 | mode |= (G2J(g)->flags & JIT_F_ON) ? 1 : 0; | ||
44 | mode |= G2J(g)->state != LJ_TRACE_IDLE ? 6 : 0; | ||
45 | #endif | ||
46 | mode |= (g->hookmask & HOOK_EVENTMASK) ? 2 : 0; | ||
47 | if (oldmode != mode) { /* Mode changed? */ | ||
48 | ASMFunction *disp = G2GG(g)->dispatch; | ||
49 | ASMFunction f_forl, f_iterl, f_loop; | ||
50 | g->dispatchmode = mode; | ||
51 | if ((mode & 5) == 1) { /* Hotcount if JIT is on, but not when recording. */ | ||
52 | f_forl = makeasmfunc(lj_vm_op_ofs[BC_FORL]); | ||
53 | f_iterl = makeasmfunc(lj_vm_op_ofs[BC_ITERL]); | ||
54 | f_loop = makeasmfunc(lj_vm_op_ofs[BC_LOOP]); | ||
55 | } else { /* Otherwise use the non-hotcounting instructions. */ | ||
56 | f_forl = disp[GG_DISP_STATIC+BC_IFORL]; | ||
57 | f_iterl = disp[GG_DISP_STATIC+BC_IITERL]; | ||
58 | f_loop = disp[GG_DISP_STATIC+BC_ILOOP]; | ||
59 | } | ||
60 | /* Set static loop ins first (may be copied below). */ | ||
61 | disp[GG_DISP_STATIC+BC_FORL] = f_forl; | ||
62 | disp[GG_DISP_STATIC+BC_ITERL] = f_iterl; | ||
63 | disp[GG_DISP_STATIC+BC_LOOP] = f_loop; | ||
64 | if ((oldmode & 6) != (mode & 6)) { /* Need to change whole table? */ | ||
65 | if ((mode & 6) == 0) { /* No hooks and no recording? */ | ||
66 | /* Copy static dispatch table to dynamic dispatch table. */ | ||
67 | memcpy(&disp[0], &disp[GG_DISP_STATIC], sizeof(ASMFunction)*BC__MAX); | ||
68 | } else { | ||
69 | /* The recording dispatch also checks for hooks. */ | ||
70 | ASMFunction f = (mode & 6) == 6 ? lj_vm_record : lj_vm_hook; | ||
71 | uint32_t i; | ||
72 | for (i = 0; i < BC__MAX; i++) | ||
73 | disp[i] = f; | ||
74 | } | ||
75 | } else if ((mode & 6) == 0) { /* Fix dynamic loop ins unless overriden. */ | ||
76 | disp[BC_FORL] = f_forl; | ||
77 | disp[BC_ITERL] = f_iterl; | ||
78 | disp[BC_LOOP] = f_loop; | ||
79 | } | ||
80 | } | ||
81 | } | ||
82 | |||
83 | /* -- JIT mode setting ---------------------------------------------------- */ | ||
84 | |||
85 | #if LJ_HASJIT | ||
86 | /* Set JIT mode for a single prototype. */ | ||
87 | static void setptmode(global_State *g, GCproto *pt, int mode) | ||
88 | { | ||
89 | if ((mode & LUAJIT_MODE_ON)) { /* (Re-)enable JIT compilation. */ | ||
90 | pt->flags &= ~PROTO_NO_JIT; | ||
91 | lj_trace_reenableproto(pt); /* Unpatch all ILOOP etc. bytecodes. */ | ||
92 | } else { /* Flush and/or disable JIT compilation. */ | ||
93 | if (!(mode & LUAJIT_MODE_FLUSH)) | ||
94 | pt->flags |= PROTO_NO_JIT; | ||
95 | lj_trace_flushproto(g, pt); /* Flush all traces of prototype. */ | ||
96 | } | ||
97 | } | ||
98 | |||
99 | /* Recursively set the JIT mode for all children of a prototype. */ | ||
100 | static void setptmode_all(global_State *g, GCproto *pt, int mode) | ||
101 | { | ||
102 | ptrdiff_t i; | ||
103 | for (i = -(ptrdiff_t)pt->sizekgc; i < 0; i++) { | ||
104 | GCobj *o = gcref(pt->k.gc[i]); | ||
105 | if (o->gch.gct == ~LJ_TPROTO) { | ||
106 | setptmode(g, gco2pt(o), mode); | ||
107 | setptmode_all(g, gco2pt(o), mode); | ||
108 | } | ||
109 | } | ||
110 | } | ||
111 | #endif | ||
112 | |||
113 | /* Public API function: control the JIT engine. */ | ||
114 | int luaJIT_setmode(lua_State *L, int idx, int mode) | ||
115 | { | ||
116 | global_State *g = G(L); | ||
117 | int mm = mode & LUAJIT_MODE_MASK; | ||
118 | lj_trace_abort(g); /* Abort recording on any state change. */ | ||
119 | /* Avoid pulling the rug from under our own feet. */ | ||
120 | if ((g->hookmask & HOOK_GC)) | ||
121 | lj_err_caller(L, LJ_ERR_NOGCMM); | ||
122 | switch (mm) { | ||
123 | #if LJ_HASJIT | ||
124 | case LUAJIT_MODE_ENGINE: | ||
125 | if ((mode & LUAJIT_MODE_FLUSH)) { | ||
126 | lj_trace_flushall(L); | ||
127 | } else { | ||
128 | if ((mode & LUAJIT_MODE_ON)) | ||
129 | G2J(g)->flags |= (uint32_t)JIT_F_ON; | ||
130 | else | ||
131 | G2J(g)->flags &= ~(uint32_t)JIT_F_ON; | ||
132 | lj_dispatch_update(g); | ||
133 | } | ||
134 | break; | ||
135 | case LUAJIT_MODE_FUNC: | ||
136 | case LUAJIT_MODE_ALLFUNC: | ||
137 | case LUAJIT_MODE_ALLSUBFUNC: { | ||
138 | cTValue *tv = idx == 0 ? frame_prev(L->base-1) : | ||
139 | idx > 0 ? L->base + (idx-1) : L->top + idx; | ||
140 | GCproto *pt; | ||
141 | if ((idx == 0 || tvisfunc(tv)) && isluafunc(&gcval(tv)->fn)) | ||
142 | pt = funcproto(&gcval(tv)->fn); /* Cannot use funcV() for frame slot. */ | ||
143 | else if (tvisproto(tv)) | ||
144 | pt = protoV(tv); | ||
145 | else | ||
146 | return 0; /* Failed. */ | ||
147 | if (mm != LUAJIT_MODE_ALLSUBFUNC) | ||
148 | setptmode(g, pt, mode); | ||
149 | if (mm != LUAJIT_MODE_FUNC) | ||
150 | setptmode_all(g, pt, mode); | ||
151 | break; | ||
152 | } | ||
153 | case LUAJIT_MODE_TRACE: | ||
154 | if (!(mode & LUAJIT_MODE_FLUSH)) | ||
155 | return 0; /* Failed. */ | ||
156 | lj_trace_flush(G2J(g), idx); | ||
157 | break; | ||
158 | #else | ||
159 | case LUAJIT_MODE_ENGINE: | ||
160 | case LUAJIT_MODE_FUNC: | ||
161 | case LUAJIT_MODE_ALLFUNC: | ||
162 | case LUAJIT_MODE_ALLSUBFUNC: | ||
163 | UNUSED(idx); | ||
164 | if ((mode & LUAJIT_MODE_ON)) | ||
165 | return 0; /* Failed. */ | ||
166 | break; | ||
167 | #endif | ||
168 | default: | ||
169 | return 0; /* Failed. */ | ||
170 | } | ||
171 | return 1; /* OK. */ | ||
172 | } | ||
173 | |||
174 | /* Enforce (dynamic) linker error for version mismatches. See luajit.c. */ | ||
175 | LUA_API void LUAJIT_VERSION_SYM(void) | ||
176 | { | ||
177 | } | ||
178 | |||
179 | /* -- Hooks --------------------------------------------------------------- */ | ||
180 | |||
181 | /* This function can be called asynchronously (e.g. during a signal). */ | ||
182 | LUA_API int lua_sethook(lua_State *L, lua_Hook func, int mask, int count) | ||
183 | { | ||
184 | global_State *g = G(L); | ||
185 | mask &= HOOK_EVENTMASK; | ||
186 | if (func == NULL || mask == 0) { mask = 0; func = NULL; } /* Consistency. */ | ||
187 | g->hookf = func; | ||
188 | g->hookcount = g->hookcstart = (int32_t)count; | ||
189 | g->hookmask = (uint8_t)((g->hookmask & ~HOOK_EVENTMASK) | mask); | ||
190 | lj_trace_abort(g); /* Abort recording on any hook change. */ | ||
191 | lj_dispatch_update(g); | ||
192 | return 1; | ||
193 | } | ||
194 | |||
195 | LUA_API lua_Hook lua_gethook(lua_State *L) | ||
196 | { | ||
197 | return G(L)->hookf; | ||
198 | } | ||
199 | |||
200 | LUA_API int lua_gethookmask(lua_State *L) | ||
201 | { | ||
202 | return G(L)->hookmask & HOOK_EVENTMASK; | ||
203 | } | ||
204 | |||
205 | LUA_API int lua_gethookcount(lua_State *L) | ||
206 | { | ||
207 | return (int)G(L)->hookcstart; | ||
208 | } | ||
209 | |||
210 | /* Call a hook. */ | ||
211 | static void callhook(lua_State *L, int event, BCLine line) | ||
212 | { | ||
213 | global_State *g = G(L); | ||
214 | lua_Hook hookf = g->hookf; | ||
215 | if (hookf && !hook_active(g)) { | ||
216 | lua_Debug ar; | ||
217 | lj_trace_abort(g); /* Abort recording on any hook call. */ | ||
218 | ar.event = event; | ||
219 | ar.currentline = line; | ||
220 | ar.i_ci = cast_int((L->base-1) - L->stack); /* Top frame, nextframe=NULL. */ | ||
221 | lj_state_checkstack(L, 1+LUA_MINSTACK); | ||
222 | hook_enter(g); | ||
223 | hookf(L, &ar); | ||
224 | lua_assert(hook_active(g)); | ||
225 | hook_leave(g); | ||
226 | } | ||
227 | } | ||
228 | |||
229 | /* -- Instruction dispatch callbacks -------------------------------------- */ | ||
230 | |||
231 | /* Calculate number of used stack slots in the current frame. */ | ||
232 | static BCReg cur_topslot(GCproto *pt, const BCIns *pc, uint32_t nres) | ||
233 | { | ||
234 | BCIns ins = pc[-1]; | ||
235 | for (;;) { | ||
236 | switch (bc_op(ins)) { | ||
237 | case BC_UCLO: ins = pc[bc_j(ins)]; break; | ||
238 | case BC_CALLM: | ||
239 | case BC_CALLMT: return bc_a(ins) + bc_c(ins) + nres-1+1; | ||
240 | case BC_RETM: return bc_a(ins) + bc_d(ins) + nres-1; | ||
241 | case BC_TSETM: return bc_a(ins) + nres-1; | ||
242 | default: return pt->framesize; | ||
243 | } | ||
244 | } | ||
245 | } | ||
246 | |||
247 | /* Instruction dispatch callback for instr/line hooks or when recording. */ | ||
248 | void lj_dispatch_ins(lua_State *L, const BCIns *pc, uint32_t nres) | ||
249 | { | ||
250 | GCfunc *fn = curr_func(L); | ||
251 | GCproto *pt = funcproto(fn); | ||
252 | BCReg slots = cur_topslot(pt, pc, nres); | ||
253 | global_State *g = G(L); | ||
254 | const BCIns *oldpc = cframe_Lpc(L); | ||
255 | cframe_Lpc(L) = pc; | ||
256 | L->top = L->base + slots; /* Fix top. */ | ||
257 | #if LJ_HASJIT | ||
258 | { | ||
259 | jit_State *J = G2J(g); | ||
260 | if (J->state != LJ_TRACE_IDLE) { | ||
261 | J->L = L; | ||
262 | J->pc = pc-1; | ||
263 | J->fn = fn; | ||
264 | J->pt = pt; | ||
265 | lj_trace_ins(J); | ||
266 | } | ||
267 | } | ||
268 | #endif | ||
269 | if ((g->hookmask & LUA_MASKCOUNT) && g->hookcount == 0) { | ||
270 | g->hookcount = g->hookcstart; | ||
271 | callhook(L, LUA_HOOKCOUNT, -1); | ||
272 | } | ||
273 | if ((g->hookmask & LUA_MASKLINE) && pt->lineinfo) { | ||
274 | BCPos npc = (BCPos)(pc - pt->bc)-1; | ||
275 | BCPos opc = (BCPos)(oldpc - pt->bc)-1; | ||
276 | BCLine line = pt->lineinfo[npc]; | ||
277 | if (npc == 0 || pc <= oldpc || | ||
278 | opc >= pt->sizebc || line != pt->lineinfo[opc]) { | ||
279 | L->top = L->base + slots; /* Fix top again after instruction hook. */ | ||
280 | callhook(L, LUA_HOOKLINE, line); | ||
281 | } | ||
282 | } | ||
283 | } | ||
284 | |||
diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h new file mode 100644 index 00000000..298aa166 --- /dev/null +++ b/src/lj_dispatch.h | |||
@@ -0,0 +1,64 @@ | |||
1 | /* | ||
2 | ** Instruction dispatch handling. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_DISPATCH_H | ||
7 | #define _LJ_DISPATCH_H | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | #include "lj_bc.h" | ||
11 | #if LJ_HASJIT | ||
12 | #include "lj_jit.h" | ||
13 | #endif | ||
14 | |||
15 | /* Type of hot counter. Must match the code in the assembler VM. */ | ||
16 | /* 16 bits are sufficient. Only 0.0015% overhead with maximum slot penalty. */ | ||
17 | typedef uint16_t HotCount; | ||
18 | |||
19 | /* Number of hot counter hash table entries (must be a power of two). */ | ||
20 | #define HOTCOUNT_SIZE 64 | ||
21 | #define HOTCOUNT_PCMASK ((HOTCOUNT_SIZE-1)*sizeof(HotCount)) | ||
22 | #define HOTCOUNT_MIN_PENALTY 103 | ||
23 | #define HOTCOUNT_MAX_PENALTY 60000 | ||
24 | |||
25 | /* Global state, main thread and extra fields are allocated together. */ | ||
26 | typedef struct GG_State { | ||
27 | lua_State L; /* Main thread. */ | ||
28 | global_State g; /* Global state. */ | ||
29 | #if LJ_HASJIT | ||
30 | jit_State J; /* JIT state. */ | ||
31 | HotCount hotcount[HOTCOUNT_SIZE]; /* Hot counters. */ | ||
32 | #endif | ||
33 | ASMFunction dispatch[2*BC__MAX]; /* Instruction dispatch tables. */ | ||
34 | } GG_State; | ||
35 | |||
36 | #define GG_DISP_STATIC BC__MAX | ||
37 | |||
38 | #define GG_OFS(field) ((int)offsetof(GG_State, field)) | ||
39 | #define G2GG(gl) \ | ||
40 | ((GG_State *)(((char *)(gl))-((char *)(&((GG_State *)0)->g)))) | ||
41 | #define J2GG(j) \ | ||
42 | ((GG_State *)(((char *)(j))-((char *)(&((GG_State *)0)->J)))) | ||
43 | #define L2GG(L) G2GG(G(L)) | ||
44 | #define J2G(J) (&J2GG(J)->g) | ||
45 | #define G2J(gl) (&G2GG(gl)->J) | ||
46 | #define L2J(L) (&L2GG(L)->J) | ||
47 | #define GG_G2DISP (GG_OFS(dispatch) - GG_OFS(g)) | ||
48 | #define GG_DISP2G (GG_OFS(g) - GG_OFS(dispatch)) | ||
49 | #define GG_DISP2J (GG_OFS(J) - GG_OFS(dispatch)) | ||
50 | #define GG_DISP2HOT (GG_OFS(hotcount) - GG_OFS(dispatch)) | ||
51 | |||
52 | #define hotcount_get(gg, pc) \ | ||
53 | (gg)->hotcount[(u32ptr(pc)>>2) & (HOTCOUNT_SIZE-1)] | ||
54 | #define hotcount_set(gg, pc, val) \ | ||
55 | (hotcount_get((gg), (pc)) = (HotCount)(val)) | ||
56 | |||
57 | /* Dispatch table management. */ | ||
58 | LJ_FUNC void lj_dispatch_init(GG_State *GG); | ||
59 | LJ_FUNC void lj_dispatch_update(global_State *g); | ||
60 | |||
61 | /* Instruction dispatch callback for instr/line hooks or when recording. */ | ||
62 | LJ_FUNCA void lj_dispatch_ins(lua_State *L, const BCIns *pc, uint32_t nres); | ||
63 | |||
64 | #endif | ||
diff --git a/src/lj_err.c b/src/lj_err.c new file mode 100644 index 00000000..a723af48 --- /dev/null +++ b/src/lj_err.c | |||
@@ -0,0 +1,763 @@ | |||
1 | /* | ||
2 | ** Error handling and debugging API. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** | ||
5 | ** Portions taken verbatim or adapted from the Lua interpreter. | ||
6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h | ||
7 | */ | ||
8 | |||
9 | #define lj_err_c | ||
10 | #define LUA_CORE | ||
11 | |||
12 | #include "lj_obj.h" | ||
13 | #include "lj_err.h" | ||
14 | #include "lj_str.h" | ||
15 | #include "lj_tab.h" | ||
16 | #include "lj_func.h" | ||
17 | #include "lj_state.h" | ||
18 | #include "lj_frame.h" | ||
19 | #include "lj_bc.h" | ||
20 | #include "lj_trace.h" | ||
21 | #include "lj_vm.h" | ||
22 | |||
23 | /* -- Error messages ------------------------------------------------------ */ | ||
24 | |||
25 | /* Error message strings. */ | ||
26 | static const char *lj_err_allmsg = | ||
27 | #define ERRDEF(name, msg) msg "\0" | ||
28 | #include "lj_errmsg.h" | ||
29 | ; | ||
30 | |||
31 | #define err2msg(em) (lj_err_allmsg+(int)(em)) | ||
32 | |||
33 | /* -- Frame and function introspection ------------------------------------ */ | ||
34 | |||
35 | static BCPos currentpc(lua_State *L, GCfunc *fn, cTValue *nextframe) | ||
36 | { | ||
37 | const BCIns *ins; | ||
38 | lua_assert(fn->c.gct == ~LJ_TFUNC || fn->c.gct == ~LJ_TTHREAD); | ||
39 | if (!isluafunc(fn)) { /* Cannot derive a PC for non-Lua functions. */ | ||
40 | return ~(BCPos)0; | ||
41 | } else if (nextframe == NULL) { /* Lua function on top. */ | ||
42 | ins = cframe_Lpc(L); /* Only happens during error/hook handling. */ | ||
43 | } else { | ||
44 | if (frame_islua(nextframe)) { | ||
45 | ins = frame_pc(nextframe); | ||
46 | } else if (frame_iscont(nextframe)) { | ||
47 | ins = frame_contpc(nextframe); | ||
48 | } else { | ||
49 | /* Lua function below errfunc/gc/hook: find cframe to get the PC. */ | ||
50 | void *cf = cframe_raw(L->cframe); | ||
51 | TValue *f = L->base-1; | ||
52 | while (f > nextframe) { | ||
53 | if (frame_islua(f)) { | ||
54 | f = frame_prevl(f); | ||
55 | } else { | ||
56 | if (frame_isc(f)) | ||
57 | cf = cframe_raw(cframe_prev(cf)); | ||
58 | f = frame_prevd(f); | ||
59 | } | ||
60 | } | ||
61 | if (cframe_prev(cf)) | ||
62 | cf = cframe_raw(cframe_prev(cf)); | ||
63 | ins = cframe_pc(cf); | ||
64 | } | ||
65 | } | ||
66 | return (BCPos)((ins - funcproto(fn)->bc) - 1); | ||
67 | } | ||
68 | |||
69 | static BCLine currentline(lua_State *L, GCfunc *fn, cTValue *nextframe) | ||
70 | { | ||
71 | BCPos pc = currentpc(L, fn, nextframe); | ||
72 | if (pc != ~(BCPos)0) { | ||
73 | GCproto *pt = funcproto(fn); | ||
74 | lua_assert(pc < pt->sizebc); | ||
75 | return pt->lineinfo ? pt->lineinfo[pc] : 0; | ||
76 | } else { | ||
77 | return -1; | ||
78 | } | ||
79 | } | ||
80 | |||
81 | static const char *getvarname(const GCproto *pt, BCPos pc, BCReg slot) | ||
82 | { | ||
83 | MSize i; | ||
84 | for (i = 0; i < pt->sizevarinfo && pt->varinfo[i].startpc <= pc; i++) | ||
85 | if (pc < pt->varinfo[i].endpc && slot-- == 0) | ||
86 | return strdata(pt->varinfo[i].name); | ||
87 | return NULL; | ||
88 | } | ||
89 | |||
90 | static const char *getobjname(GCproto *pt, const BCIns *ip, BCReg slot, | ||
91 | const char **name) | ||
92 | { | ||
93 | const char *lname; | ||
94 | restart: | ||
95 | lname = getvarname(pt, (BCPos)(ip - pt->bc), slot); | ||
96 | if (lname != NULL) { *name = lname; return "local"; } | ||
97 | while (--ip >= pt->bc) { | ||
98 | BCIns ins = *ip; | ||
99 | BCOp op = bc_op(ins); | ||
100 | BCReg ra = bc_a(ins); | ||
101 | if (bcmode_a(op) == BCMbase) { | ||
102 | if (slot >= ra && (op != BC_KNIL || slot <= bc_d(ins))) | ||
103 | return NULL; | ||
104 | } else if (bcmode_a(op) == BCMdst && ra == slot) { | ||
105 | switch (bc_op(ins)) { | ||
106 | case BC_MOV: | ||
107 | if (ra == slot) { slot = bc_d(ins); goto restart; } | ||
108 | break; | ||
109 | case BC_GGET: | ||
110 | *name = strdata(gco2str(gcref(pt->k.gc[~bc_d(ins)]))); | ||
111 | return "global"; | ||
112 | case BC_TGETS: | ||
113 | *name = strdata(gco2str(gcref(pt->k.gc[~bc_c(ins)]))); | ||
114 | if (ip > pt->bc) { | ||
115 | BCIns insp = ip[-1]; | ||
116 | if (bc_op(insp) == BC_MOV && bc_a(insp) == ra+1 && | ||
117 | bc_d(insp) == bc_b(ins)) | ||
118 | return "method"; | ||
119 | } | ||
120 | return "field"; | ||
121 | case BC_UGET: | ||
122 | *name = pt->uvname ? strdata(pt->uvname[bc_d(ins)]) : "?"; | ||
123 | return "upvalue"; | ||
124 | default: | ||
125 | return NULL; | ||
126 | } | ||
127 | } | ||
128 | } | ||
129 | return NULL; | ||
130 | } | ||
131 | |||
132 | static const char *getfuncname(lua_State *L, TValue *frame, const char **name) | ||
133 | { | ||
134 | MMS mm; | ||
135 | const BCIns *ip; | ||
136 | TValue *pframe; | ||
137 | GCfunc *fn; | ||
138 | BCPos pc; | ||
139 | if (frame_isvarg(frame)) | ||
140 | frame = frame_prevd(frame); | ||
141 | pframe = frame_prev(frame); | ||
142 | fn = frame_func(pframe); | ||
143 | pc = currentpc(L, fn, frame); | ||
144 | if (pc == ~(BCPos)0) | ||
145 | return NULL; | ||
146 | lua_assert(pc < funcproto(fn)->sizebc); | ||
147 | ip = &funcproto(fn)->bc[pc]; | ||
148 | mm = bcmode_mm(bc_op(*ip)); | ||
149 | if (mm == MM_call) { | ||
150 | BCReg slot = bc_a(*ip); | ||
151 | if (bc_op(*ip) == BC_ITERC) slot -= 3; | ||
152 | return getobjname(funcproto(fn), ip, slot, name); | ||
153 | } else if (mm != MM_MAX) { | ||
154 | *name = strdata(strref(G(L)->mmname[mm])); | ||
155 | return "metamethod"; | ||
156 | } else { | ||
157 | return NULL; | ||
158 | } | ||
159 | } | ||
160 | |||
161 | void lj_err_pushloc(lua_State *L, GCproto *pt, BCPos pc) | ||
162 | { | ||
163 | GCstr *name = pt->chunkname; | ||
164 | if (name) { | ||
165 | const char *s = strdata(name); | ||
166 | MSize i, len = name->len; | ||
167 | BCLine line; | ||
168 | if (pc) | ||
169 | line = pt->lineinfo ? pt->lineinfo[pc-1] : 0; | ||
170 | else | ||
171 | line = pt->linedefined; | ||
172 | if (*s == '@') { | ||
173 | s++; len--; | ||
174 | for (i = len; i > 0; i--) | ||
175 | if (s[i] == '/' || s[i] == '\\') { | ||
176 | s += i+1; | ||
177 | break; | ||
178 | } | ||
179 | lj_str_pushf(L, "%s:%d", s, line); | ||
180 | } else if (len > 40) { | ||
181 | lj_str_pushf(L, "%p:%d", pt, line); | ||
182 | } else if (*s == '=') { | ||
183 | lj_str_pushf(L, "%s:%d", s+1, line); | ||
184 | } else { | ||
185 | lj_str_pushf(L, "\"%s\":%d", s, line); | ||
186 | } | ||
187 | } else { | ||
188 | lj_str_pushf(L, "%p:%u", pt, pc); | ||
189 | } | ||
190 | } | ||
191 | |||
192 | static void err_chunkid(char *out, const char *src) | ||
193 | { | ||
194 | if (*src == '=') { | ||
195 | strncpy(out, src+1, LUA_IDSIZE); /* remove first char */ | ||
196 | out[LUA_IDSIZE-1] = '\0'; /* ensures null termination */ | ||
197 | } else if (*src == '@') { /* out = "source", or "...source" */ | ||
198 | size_t l = strlen(++src); /* skip the `@' */ | ||
199 | if (l >= LUA_IDSIZE) { | ||
200 | src += l-(LUA_IDSIZE-4); /* get last part of file name */ | ||
201 | strcpy(out, "..."); | ||
202 | out += 3; | ||
203 | } | ||
204 | strcpy(out, src); | ||
205 | } else { /* out = [string "string"] */ | ||
206 | size_t len; /* Length, up to first control char. */ | ||
207 | for (len = 0; len < LUA_IDSIZE-11; len++) | ||
208 | if (((const unsigned char *)src)[len] < ' ') break; | ||
209 | strcpy(out, "[string \""); out += 9; | ||
210 | if (src[len] != '\0') { /* must truncate? */ | ||
211 | if (len > LUA_IDSIZE-15) len = LUA_IDSIZE-15; | ||
212 | strncpy(out, src, len); out += len; | ||
213 | strcpy(out, "..."); out += 3; | ||
214 | } else { | ||
215 | strcpy(out, src); out += len; | ||
216 | } | ||
217 | strcpy(out, "\"]"); | ||
218 | } | ||
219 | } | ||
220 | |||
221 | /* -- Public debug API ---------------------------------------------------- */ | ||
222 | |||
223 | static TValue *findlocal(lua_State *L, const lua_Debug *ar, | ||
224 | const char **name, BCReg slot) | ||
225 | { | ||
226 | uint32_t offset = (uint32_t)ar->i_ci & 0xffff; | ||
227 | uint32_t size = (uint32_t)ar->i_ci >> 16; | ||
228 | TValue *frame = L->stack + offset; | ||
229 | TValue *nextframe = size ? frame + size : NULL; | ||
230 | GCfunc *fn = frame_func(frame); | ||
231 | BCPos pc = currentpc(L, fn, nextframe); | ||
232 | if (pc != ~(BCPos)0 && | ||
233 | (*name = getvarname(funcproto(fn), pc, slot-1)) != NULL) | ||
234 | ; | ||
235 | else if (slot > 0 && frame + slot < (nextframe ? nextframe : L->top)) | ||
236 | *name = "(*temporary)"; | ||
237 | else | ||
238 | *name = NULL; | ||
239 | return frame+slot; | ||
240 | } | ||
241 | |||
242 | LUA_API const char *lua_getlocal(lua_State *L, const lua_Debug *ar, int n) | ||
243 | { | ||
244 | const char *name; | ||
245 | TValue *o = findlocal(L, ar, &name, (BCReg)n); | ||
246 | if (name) { | ||
247 | copyTV(L, L->top, o); | ||
248 | incr_top(L); | ||
249 | } | ||
250 | return name; | ||
251 | } | ||
252 | |||
253 | |||
254 | LUA_API const char *lua_setlocal(lua_State *L, const lua_Debug *ar, int n) | ||
255 | { | ||
256 | const char *name; | ||
257 | TValue *o = findlocal(L, ar, &name, (BCReg)n); | ||
258 | if (name) | ||
259 | copyTV(L, o, L->top-1); | ||
260 | L->top--; | ||
261 | return name; | ||
262 | } | ||
263 | |||
264 | LUA_API int lua_getinfo(lua_State *L, const char *what, lua_Debug *ar) | ||
265 | { | ||
266 | int status = 1; | ||
267 | TValue *frame = NULL; | ||
268 | TValue *nextframe = NULL; | ||
269 | GCfunc *fn; | ||
270 | if (*what == '>') { | ||
271 | TValue *func = L->top - 1; | ||
272 | api_check(L, tvisfunc(func)); | ||
273 | fn = funcV(func); | ||
274 | L->top--; | ||
275 | what++; | ||
276 | } else { | ||
277 | uint32_t offset = (uint32_t)ar->i_ci & 0xffff; | ||
278 | uint32_t size = (uint32_t)ar->i_ci >> 16; | ||
279 | lua_assert(offset != 0); | ||
280 | frame = L->stack + offset; | ||
281 | if (size) nextframe = frame + size; | ||
282 | lua_assert(frame<=L->maxstack && (!nextframe || nextframe<=L->maxstack)); | ||
283 | fn = frame_func(frame); | ||
284 | lua_assert(fn->c.gct == ~LJ_TFUNC); | ||
285 | } | ||
286 | for (; *what; what++) { | ||
287 | switch (*what) { | ||
288 | case 'S': | ||
289 | if (isluafunc(fn)) { | ||
290 | ar->source = strdata(funcproto(fn)->chunkname); | ||
291 | ar->linedefined = cast_int(funcproto(fn)->linedefined); | ||
292 | ar->lastlinedefined = cast_int(funcproto(fn)->lastlinedefined); | ||
293 | ar->what = (ar->linedefined == 0) ? "main" : "Lua"; | ||
294 | } else { | ||
295 | ar->source = "=[C]"; | ||
296 | ar->linedefined = -1; | ||
297 | ar->lastlinedefined = -1; | ||
298 | ar->what = "C"; | ||
299 | } | ||
300 | err_chunkid(ar->short_src, ar->source); | ||
301 | break; | ||
302 | case 'l': | ||
303 | ar->currentline = frame ? currentline(L, fn, nextframe) : -1; | ||
304 | break; | ||
305 | case 'u': | ||
306 | ar->nups = fn->c.nupvalues; | ||
307 | break; | ||
308 | case 'n': | ||
309 | ar->namewhat = frame ? getfuncname(L, frame, &ar->name) : NULL; | ||
310 | if (ar->namewhat == NULL) { | ||
311 | ar->namewhat = ""; | ||
312 | ar->name = NULL; | ||
313 | } | ||
314 | break; | ||
315 | case 'f': | ||
316 | setfuncV(L, L->top, fn); | ||
317 | incr_top(L); | ||
318 | break; | ||
319 | case 'L': | ||
320 | if (isluafunc(fn)) { | ||
321 | GCtab *t = lj_tab_new(L, 0, 0); | ||
322 | BCLine *lineinfo = funcproto(fn)->lineinfo; | ||
323 | uint32_t i, szl = funcproto(fn)->sizelineinfo; | ||
324 | for (i = 0; i < szl; i++) | ||
325 | setboolV(lj_tab_setint(L, t, lineinfo[i]), 1); | ||
326 | settabV(L, L->top, t); | ||
327 | } else { | ||
328 | setnilV(L->top); | ||
329 | } | ||
330 | incr_top(L); | ||
331 | break; | ||
332 | default: | ||
333 | status = 0; /* Bad option. */ | ||
334 | break; | ||
335 | } | ||
336 | } | ||
337 | return status; | ||
338 | } | ||
339 | |||
340 | cTValue *lj_err_getframe(lua_State *L, int level, int *size) | ||
341 | { | ||
342 | cTValue *frame, *nextframe; | ||
343 | /* Traverse frames backwards. */ | ||
344 | for (nextframe = frame = L->base-1; frame > L->stack; ) { | ||
345 | if (frame_gc(frame) == obj2gco(L)) | ||
346 | level++; /* Skip dummy frames. See lj_meta_call(). */ | ||
347 | if (level-- == 0) { | ||
348 | *size = cast_int(nextframe - frame); | ||
349 | return frame; /* Level found. */ | ||
350 | } | ||
351 | nextframe = frame; | ||
352 | if (frame_islua(frame)) { | ||
353 | frame = frame_prevl(frame); | ||
354 | } else { | ||
355 | if (frame_isvarg(frame)) | ||
356 | level++; /* Skip vararg pseudo-frame. */ | ||
357 | frame = frame_prevd(frame); | ||
358 | } | ||
359 | } | ||
360 | *size = level; | ||
361 | return NULL; /* Level not found. */ | ||
362 | } | ||
363 | |||
364 | LUA_API int lua_getstack(lua_State *L, int level, lua_Debug *ar) | ||
365 | { | ||
366 | int size; | ||
367 | cTValue *frame = lj_err_getframe(L, level, &size); | ||
368 | if (frame) { | ||
369 | ar->i_ci = (size << 16) + cast_int(frame - L->stack); | ||
370 | return 1; | ||
371 | } else { | ||
372 | ar->i_ci = level - size; | ||
373 | return 0; | ||
374 | } | ||
375 | } | ||
376 | |||
377 | /* -- Error handling ------------------------------------------------------ */ | ||
378 | |||
379 | /* Return string object for error message. */ | ||
380 | LJ_NOINLINE GCstr *lj_err_str(lua_State *L, ErrMsg em) | ||
381 | { | ||
382 | return lj_str_newz(L, err2msg(em)); | ||
383 | } | ||
384 | |||
385 | /* Unwind Lua stack and add error message on top. */ | ||
386 | LJ_NOINLINE static void unwindstack(lua_State *L, TValue *top, int errcode) | ||
387 | { | ||
388 | lj_func_closeuv(L, top); | ||
389 | switch (errcode) { | ||
390 | case LUA_ERRMEM: | ||
391 | setstrV(L, top, lj_err_str(L, LJ_ERR_ERRMEM)); | ||
392 | break; | ||
393 | case LUA_ERRERR: | ||
394 | setstrV(L, top, lj_err_str(L, LJ_ERR_ERRERR)); | ||
395 | break; | ||
396 | case LUA_ERRSYNTAX: | ||
397 | case LUA_ERRRUN: | ||
398 | copyTV(L, top, L->top - 1); | ||
399 | break; | ||
400 | default: | ||
401 | lua_assert(0); | ||
402 | break; | ||
403 | } | ||
404 | L->top = top+1; | ||
405 | lj_state_relimitstack(L); | ||
406 | } | ||
407 | |||
408 | /* Throw error. Find catch frame, unwind stack and continue. */ | ||
409 | LJ_NOINLINE void lj_err_throw(lua_State *L, int errcode) | ||
410 | { | ||
411 | TValue *frame = L->base-1; | ||
412 | void *cf = L->cframe; | ||
413 | global_State *g = G(L); | ||
414 | if (L->status == LUA_ERRERR+1) { /* Don't touch the stack during lua_open. */ | ||
415 | lj_vm_unwind_c(cf, errcode); | ||
416 | goto uncaught; /* unreachable */ | ||
417 | } | ||
418 | lj_trace_abort(g); | ||
419 | setgcrefnull(g->jit_L); | ||
420 | L->status = 0; | ||
421 | while (cf) { | ||
422 | if (cframe_nres(cframe_raw(cf)) < 0) { /* cframe without frame? */ | ||
423 | TValue *top = restorestack(L, -cframe_nres(cf)); | ||
424 | if (frame < top) { | ||
425 | L->cframe = cframe_prev(cf); | ||
426 | L->base = frame+1; | ||
427 | unwindstack(L, top, errcode); | ||
428 | lj_vm_unwind_c(cf, errcode); | ||
429 | goto uncaught; /* unreachable */ | ||
430 | } | ||
431 | } | ||
432 | if (frame <= L->stack) | ||
433 | break; | ||
434 | switch (frame_typep(frame)) { | ||
435 | case FRAME_LUA: | ||
436 | case FRAME_LUAP: | ||
437 | frame = frame_prevl(frame); | ||
438 | break; | ||
439 | case FRAME_C: | ||
440 | if (cframe_canyield(cf)) goto uncaught; | ||
441 | cf = cframe_prev(cf); | ||
442 | /* fallthrough */ | ||
443 | case FRAME_CONT: | ||
444 | case FRAME_VARG: | ||
445 | frame = frame_prevd(frame); | ||
446 | break; | ||
447 | case FRAME_CP: | ||
448 | L->cframe = cframe_prev(cf); | ||
449 | L->base = frame_prevd(frame) + 1; | ||
450 | unwindstack(L, frame, errcode); | ||
451 | lj_vm_unwind_c(cf, errcode); | ||
452 | goto uncaught; /* unreachable */ | ||
453 | case FRAME_PCALL: | ||
454 | hook_leave(g); | ||
455 | /* fallthrough */ | ||
456 | case FRAME_PCALLH: | ||
457 | L->cframe = cf; | ||
458 | L->base = frame_prevd(frame) + 1; | ||
459 | unwindstack(L, L->base, errcode); | ||
460 | lj_vm_unwind_ff(cf); | ||
461 | goto uncaught; /* unreachable */ | ||
462 | default: | ||
463 | lua_assert(0); | ||
464 | goto uncaught; | ||
465 | } | ||
466 | } | ||
467 | /* No catch frame found. Must be a resume or an unprotected error. */ | ||
468 | uncaught: | ||
469 | L->status = cast_byte(errcode); | ||
470 | L->cframe = NULL; | ||
471 | if (cframe_canyield(cf)) { /* Resume? */ | ||
472 | unwindstack(L, L->top, errcode); | ||
473 | lj_vm_unwind_c(cf, errcode); | ||
474 | } | ||
475 | /* Better rethrow on main thread than panic. */ | ||
476 | { | ||
477 | if (L != mainthread(g)) | ||
478 | lj_err_throw(mainthread(g), errcode); | ||
479 | if (g->panic) { | ||
480 | L->base = L->stack+1; | ||
481 | unwindstack(L, L->base, errcode); | ||
482 | g->panic(L); | ||
483 | } | ||
484 | } | ||
485 | exit(EXIT_FAILURE); | ||
486 | } | ||
487 | |||
488 | /* Find error function for runtime errors. Requires an extra stack traversal. */ | ||
489 | static ptrdiff_t finderrfunc(lua_State *L) | ||
490 | { | ||
491 | TValue *frame = L->base-1; | ||
492 | void *cf = L->cframe; | ||
493 | while (frame > L->stack) { | ||
494 | lua_assert(cf != NULL); | ||
495 | while (cframe_nres(cframe_raw(cf)) < 0) { /* cframe without frame? */ | ||
496 | if (frame >= restorestack(L, -cframe_nres(cf))) | ||
497 | break; | ||
498 | if (cframe_errfunc(cf) >= 0) /* Error handler not inherited (-1)? */ | ||
499 | return cframe_errfunc(cf); | ||
500 | cf = cframe_prev(cf); /* Else unwind cframe and continue searching. */ | ||
501 | if (cf == NULL) | ||
502 | return 0; | ||
503 | } | ||
504 | switch (frame_typep(frame)) { | ||
505 | case FRAME_LUA: | ||
506 | case FRAME_LUAP: | ||
507 | frame = frame_prevl(frame); | ||
508 | break; | ||
509 | case FRAME_C: | ||
510 | if (cframe_canyield(cf)) return 0; | ||
511 | cf = cframe_prev(cf); | ||
512 | /* fallthrough */ | ||
513 | case FRAME_CONT: | ||
514 | case FRAME_VARG: | ||
515 | frame = frame_prevd(frame); | ||
516 | break; | ||
517 | case FRAME_CP: | ||
518 | if (cframe_errfunc(cf) >= 0) | ||
519 | return cframe_errfunc(cf); | ||
520 | frame = frame_prevd(frame); | ||
521 | break; | ||
522 | case FRAME_PCALL: | ||
523 | case FRAME_PCALLH: | ||
524 | if (frame_ftsz(frame) >= (ptrdiff_t)(2*sizeof(TValue))) /* xpcall? */ | ||
525 | return savestack(L, frame-1); /* Point to xpcall's errorfunc. */ | ||
526 | return 0; | ||
527 | default: | ||
528 | lua_assert(0); | ||
529 | return 0; | ||
530 | } | ||
531 | } | ||
532 | return 0; | ||
533 | } | ||
534 | |||
535 | /* Runtime error. */ | ||
536 | LJ_NOINLINE void lj_err_run(lua_State *L) | ||
537 | { | ||
538 | ptrdiff_t ef = finderrfunc(L); | ||
539 | if (ef) { | ||
540 | TValue *errfunc = restorestack(L, ef); | ||
541 | TValue *top = L->top; | ||
542 | lj_trace_abort(G(L)); | ||
543 | if (!tvisfunc(errfunc) || L->status == LUA_ERRERR) | ||
544 | lj_err_throw(L, LUA_ERRERR); | ||
545 | L->status = LUA_ERRERR; | ||
546 | copyTV(L, top, top-1); | ||
547 | copyTV(L, top-1, errfunc); | ||
548 | L->top = top+1; | ||
549 | lj_vm_call(L, top, 1+1); /* Stack: |errfunc|msg| -> |msg| */ | ||
550 | } | ||
551 | lj_err_throw(L, LUA_ERRRUN); | ||
552 | } | ||
553 | |||
554 | /* Add location to error message. */ | ||
555 | LJ_NOINLINE static void err_loc(lua_State *L, const char *msg, | ||
556 | cTValue *frame, cTValue *nextframe) | ||
557 | { | ||
558 | if (frame) { | ||
559 | GCfunc *fn = frame_func(frame); | ||
560 | if (isluafunc(fn)) { | ||
561 | char buff[LUA_IDSIZE]; | ||
562 | BCLine line = currentline(L, fn, nextframe); | ||
563 | err_chunkid(buff, strdata(funcproto(fn)->chunkname)); | ||
564 | lj_str_pushf(L, "%s:%d: %s", buff, line, msg); | ||
565 | return; | ||
566 | } | ||
567 | } | ||
568 | lj_str_pushf(L, "%s", msg); | ||
569 | } | ||
570 | |||
571 | /* Formatted runtime error message. */ | ||
572 | LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...) | ||
573 | { | ||
574 | const char *msg; | ||
575 | va_list argp; | ||
576 | va_start(argp, em); | ||
577 | if (curr_funcisL(L)) L->top = curr_topL(L); | ||
578 | msg = lj_str_pushvf(L, err2msg(em), argp); | ||
579 | va_end(argp); | ||
580 | err_loc(L, msg, L->base-1, NULL); | ||
581 | lj_err_run(L); | ||
582 | } | ||
583 | |||
584 | /* Non-vararg variant for better calling conventions. */ | ||
585 | LJ_NOINLINE void lj_err_msg(lua_State *L, ErrMsg em) | ||
586 | { | ||
587 | err_msgv(L, em); | ||
588 | } | ||
589 | |||
590 | /* Lexer error. */ | ||
591 | LJ_NOINLINE void lj_err_lex(lua_State *L, const char *src, const char *tok, | ||
592 | BCLine line, ErrMsg em, va_list argp) | ||
593 | { | ||
594 | char buff[LUA_IDSIZE]; | ||
595 | const char *msg; | ||
596 | err_chunkid(buff, src); | ||
597 | msg = lj_str_pushvf(L, err2msg(em), argp); | ||
598 | msg = lj_str_pushf(L, "%s:%d: %s", buff, line, msg); | ||
599 | if (tok) | ||
600 | lj_str_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tok); | ||
601 | lj_err_throw(L, LUA_ERRSYNTAX); | ||
602 | } | ||
603 | |||
604 | /* Typecheck error for operands. */ | ||
605 | LJ_NOINLINE void lj_err_optype(lua_State *L, cTValue *o, ErrMsg opm) | ||
606 | { | ||
607 | const char *tname = typename(o); | ||
608 | const char *oname = NULL; | ||
609 | const char *opname = err2msg(opm); | ||
610 | if (curr_funcisL(L)) { | ||
611 | GCproto *pt = curr_proto(L); | ||
612 | const BCIns *pc = cframe_Lpc(L) - 1; | ||
613 | const char *kind = getobjname(pt, pc, (BCReg)(o - L->base), &oname); | ||
614 | if (kind) | ||
615 | err_msgv(L, LJ_ERR_BADOPRT, opname, kind, oname, tname); | ||
616 | } | ||
617 | err_msgv(L, LJ_ERR_BADOPRV, opname, tname); | ||
618 | } | ||
619 | |||
620 | /* Typecheck error for ordered comparisons. */ | ||
621 | LJ_NOINLINE void lj_err_comp(lua_State *L, cTValue *o1, cTValue *o2) | ||
622 | { | ||
623 | const char *t1 = typename(o1); | ||
624 | const char *t2 = typename(o2); | ||
625 | err_msgv(L, t1 == t2 ? LJ_ERR_BADCMPV : LJ_ERR_BADCMPT, t1, t2); | ||
626 | /* This assumes the two "boolean" entries are commoned by the C compiler. */ | ||
627 | } | ||
628 | |||
629 | /* Typecheck error for __call. */ | ||
630 | LJ_NOINLINE void lj_err_optype_call(lua_State *L, TValue *o) | ||
631 | { | ||
632 | /* Gross hack if lua_[p]call or pcall/xpcall fail for a non-callable object: | ||
633 | ** L->base still points to the caller. So add a dummy frame with L instead | ||
634 | ** of a function. See lua_getstack(). | ||
635 | */ | ||
636 | const BCIns *pc = cframe_Lpc(L); | ||
637 | if (((ptrdiff_t)pc & FRAME_TYPE) != FRAME_LUA) { | ||
638 | const char *tname = typename(o); | ||
639 | setframe_pc(o, pc); | ||
640 | setframe_gc(o, obj2gco(L)); | ||
641 | L->top = L->base = o+1; | ||
642 | err_msgv(L, LJ_ERR_BADCALL, tname); | ||
643 | } | ||
644 | lj_err_optype(L, o, LJ_ERR_OPCALL); | ||
645 | } | ||
646 | |||
647 | /* Error in context of caller. */ | ||
648 | LJ_NOINLINE void lj_err_callermsg(lua_State *L, const char *msg) | ||
649 | { | ||
650 | cTValue *frame = L->base-1; | ||
651 | cTValue *pframe = frame_islua(frame) ? frame_prevl(frame) : NULL; | ||
652 | err_loc(L, msg, pframe, frame); | ||
653 | lj_err_run(L); | ||
654 | } | ||
655 | |||
656 | /* Formatted error in context of caller. */ | ||
657 | LJ_NOINLINE void lj_err_callerv(lua_State *L, ErrMsg em, ...) | ||
658 | { | ||
659 | const char *msg; | ||
660 | va_list argp; | ||
661 | va_start(argp, em); | ||
662 | msg = lj_str_pushvf(L, err2msg(em), argp); | ||
663 | va_end(argp); | ||
664 | lj_err_callermsg(L, msg); | ||
665 | } | ||
666 | |||
667 | /* Error in context of caller. */ | ||
668 | LJ_NOINLINE void lj_err_caller(lua_State *L, ErrMsg em) | ||
669 | { | ||
670 | lj_err_callermsg(L, err2msg(em)); | ||
671 | } | ||
672 | |||
673 | /* Argument error message. */ | ||
674 | LJ_NORET LJ_NOINLINE static void err_argmsg(lua_State *L, int narg, | ||
675 | const char *msg) | ||
676 | { | ||
677 | const char *fname = "?"; | ||
678 | const char *ftype = getfuncname(L, L->base - 1, &fname); | ||
679 | if (ftype && ftype[3] == 'h' && --narg == 0) /* Check for "method". */ | ||
680 | msg = lj_str_pushf(L, err2msg(LJ_ERR_BADSELF), fname, msg); | ||
681 | else | ||
682 | msg = lj_str_pushf(L, err2msg(LJ_ERR_BADARG), narg, fname, msg); | ||
683 | lj_err_callermsg(L, msg); | ||
684 | } | ||
685 | |||
686 | /* Formatted argument error. */ | ||
687 | LJ_NOINLINE void lj_err_argv(lua_State *L, int narg, ErrMsg em, ...) | ||
688 | { | ||
689 | const char *msg; | ||
690 | va_list argp; | ||
691 | va_start(argp, em); | ||
692 | msg = lj_str_pushvf(L, err2msg(em), argp); | ||
693 | va_end(argp); | ||
694 | err_argmsg(L, narg, msg); | ||
695 | } | ||
696 | |||
697 | /* Argument error. */ | ||
698 | LJ_NOINLINE void lj_err_arg(lua_State *L, int narg, ErrMsg em) | ||
699 | { | ||
700 | err_argmsg(L, narg, err2msg(em)); | ||
701 | } | ||
702 | |||
703 | /* Typecheck error for arguments. */ | ||
704 | LJ_NOINLINE void lj_err_argtype(lua_State *L, int narg, const char *xname) | ||
705 | { | ||
706 | TValue *o = L->base + narg-1; | ||
707 | const char *tname = o < L->top ? typename(o) : lj_obj_typename[0]; | ||
708 | const char *msg = lj_str_pushf(L, err2msg(LJ_ERR_BADTYPE), xname, tname); | ||
709 | err_argmsg(L, narg, msg); | ||
710 | } | ||
711 | |||
712 | /* Typecheck error for arguments. */ | ||
713 | LJ_NOINLINE void lj_err_argt(lua_State *L, int narg, int tt) | ||
714 | { | ||
715 | lj_err_argtype(L, narg, lj_obj_typename[tt+1]); | ||
716 | } | ||
717 | |||
718 | /* -- Public error handling API ------------------------------------------- */ | ||
719 | |||
720 | LUA_API lua_CFunction lua_atpanic(lua_State *L, lua_CFunction panicf) | ||
721 | { | ||
722 | lua_CFunction old = G(L)->panic; | ||
723 | G(L)->panic = panicf; | ||
724 | return old; | ||
725 | } | ||
726 | |||
727 | /* Forwarders for the public API (C calling convention and no LJ_NORET). */ | ||
728 | LUA_API int lua_error(lua_State *L) | ||
729 | { | ||
730 | lj_err_run(L); | ||
731 | return 0; /* unreachable */ | ||
732 | } | ||
733 | |||
734 | LUALIB_API int luaL_argerror(lua_State *L, int narg, const char *msg) | ||
735 | { | ||
736 | err_argmsg(L, narg, msg); | ||
737 | return 0; /* unreachable */ | ||
738 | } | ||
739 | |||
740 | LUALIB_API int luaL_typerror(lua_State *L, int narg, const char *xname) | ||
741 | { | ||
742 | lj_err_argtype(L, narg, xname); | ||
743 | return 0; /* unreachable */ | ||
744 | } | ||
745 | |||
746 | LUALIB_API void luaL_where(lua_State *L, int level) | ||
747 | { | ||
748 | int size; | ||
749 | cTValue *frame = lj_err_getframe(L, level, &size); | ||
750 | err_loc(L, "", frame, size ? frame+size : NULL); | ||
751 | } | ||
752 | |||
753 | LUALIB_API int luaL_error(lua_State *L, const char *fmt, ...) | ||
754 | { | ||
755 | const char *msg; | ||
756 | va_list argp; | ||
757 | va_start(argp, fmt); | ||
758 | msg = lj_str_pushvf(L, fmt, argp); | ||
759 | va_end(argp); | ||
760 | lj_err_callermsg(L, msg); | ||
761 | return 0; /* unreachable */ | ||
762 | } | ||
763 | |||
diff --git a/src/lj_err.h b/src/lj_err.h new file mode 100644 index 00000000..e794d44c --- /dev/null +++ b/src/lj_err.h | |||
@@ -0,0 +1,40 @@ | |||
1 | /* | ||
2 | ** Error handling and debugging support. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_ERR_H | ||
7 | #define _LJ_ERR_H | ||
8 | |||
9 | #include <stdarg.h> | ||
10 | |||
11 | #include "lj_obj.h" | ||
12 | |||
13 | typedef enum { | ||
14 | #define ERRDEF(name, msg) \ | ||
15 | LJ_ERR_##name, LJ_ERR_##name##_ = LJ_ERR_##name + sizeof(msg)-1, | ||
16 | #include "lj_errmsg.h" | ||
17 | LJ_ERR__MAX | ||
18 | } ErrMsg; | ||
19 | |||
20 | LJ_FUNC GCstr *lj_err_str(lua_State *L, ErrMsg em); | ||
21 | LJ_FUNC_NORET void lj_err_throw(lua_State *L, int errcode); | ||
22 | LJ_FUNC_NORET void lj_err_run(lua_State *L); | ||
23 | LJ_FUNC_NORET void lj_err_msg(lua_State *L, ErrMsg em); | ||
24 | LJ_FUNC_NORET void lj_err_lex(lua_State *L, const char *src, const char *tok, | ||
25 | BCLine line, ErrMsg em, va_list argp); | ||
26 | LJ_FUNC_NORET void lj_err_optype(lua_State *L, cTValue *o, ErrMsg opm); | ||
27 | LJ_FUNC_NORET void lj_err_comp(lua_State *L, cTValue *o1, cTValue *o2); | ||
28 | LJ_FUNC_NORET void lj_err_optype_call(lua_State *L, TValue *o); | ||
29 | LJ_FUNC_NORET void lj_err_callermsg(lua_State *L, const char *msg); | ||
30 | LJ_FUNC_NORET void lj_err_callerv(lua_State *L, ErrMsg em, ...); | ||
31 | LJ_FUNC_NORET void lj_err_caller(lua_State *L, ErrMsg em); | ||
32 | LJ_FUNC_NORET void lj_err_arg(lua_State *L, int narg, ErrMsg em); | ||
33 | LJ_FUNC_NORET void lj_err_argv(lua_State *L, int narg, ErrMsg em, ...); | ||
34 | LJ_FUNC_NORET void lj_err_argtype(lua_State *L, int narg, const char *xname); | ||
35 | LJ_FUNC_NORET void lj_err_argt(lua_State *L, int narg, int tt); | ||
36 | |||
37 | LJ_FUNC void lj_err_pushloc(lua_State *L, GCproto *pt, BCPos pc); | ||
38 | LJ_FUNC cTValue *lj_err_getframe(lua_State *L, int level, int *size); | ||
39 | |||
40 | #endif | ||
diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h new file mode 100644 index 00000000..03abd59b --- /dev/null +++ b/src/lj_errmsg.h | |||
@@ -0,0 +1,134 @@ | |||
1 | /* | ||
2 | ** VM error messages. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | /* This file may be included multiple times with different ERRDEF macros. */ | ||
7 | |||
8 | /* Basic error handling. */ | ||
9 | ERRDEF(ERRMEM, "not enough memory") | ||
10 | ERRDEF(ERRERR, "error in error handling") | ||
11 | |||
12 | /* Allocations. */ | ||
13 | ERRDEF(STROV, "string length overflow") | ||
14 | ERRDEF(UDATAOV, "userdata length overflow") | ||
15 | ERRDEF(STKOV, "stack overflow") | ||
16 | ERRDEF(STKOVM, "stack overflow (%s)") | ||
17 | ERRDEF(TABOV, "table overflow") | ||
18 | |||
19 | /* Table indexing. */ | ||
20 | ERRDEF(NANIDX, "table index is NaN") | ||
21 | ERRDEF(NILIDX, "table index is nil") | ||
22 | ERRDEF(NEXTIDX, "invalid key to " LUA_QL("next")) | ||
23 | |||
24 | /* Metamethod resolving. */ | ||
25 | ERRDEF(BADCALL, "attempt to call a %s value") | ||
26 | ERRDEF(BADOPRT, "attempt to %s %s " LUA_QS " (a %s value)") | ||
27 | ERRDEF(BADOPRV, "attempt to %s a %s value") | ||
28 | ERRDEF(BADCMPT, "attempt to compare %s with %s") | ||
29 | ERRDEF(BADCMPV, "attempt to compare two %s values") | ||
30 | ERRDEF(GETLOOP, "loop in gettable") | ||
31 | ERRDEF(SETLOOP, "loop in settable") | ||
32 | ERRDEF(OPCALL, "call") | ||
33 | ERRDEF(OPINDEX, "index") | ||
34 | ERRDEF(OPARITH, "perform arithmetic on") | ||
35 | ERRDEF(OPCAT, "concatenate") | ||
36 | ERRDEF(OPLEN, "get length of") | ||
37 | |||
38 | /* Type checks. */ | ||
39 | ERRDEF(BADSELF, "calling " LUA_QS " on bad self (%s)") | ||
40 | ERRDEF(BADARG, "bad argument #%d to " LUA_QS " (%s)") | ||
41 | ERRDEF(BADTYPE, "%s expected, got %s") | ||
42 | ERRDEF(BADVAL, "invalid value") | ||
43 | ERRDEF(NOVAL, "value expected") | ||
44 | ERRDEF(NOCORO, "coroutine expected") | ||
45 | ERRDEF(NOTABN, "nil or table expected") | ||
46 | ERRDEF(NOLFUNC, "Lua function expected") | ||
47 | ERRDEF(NOFUNCL, "function or level expected") | ||
48 | ERRDEF(NOSFT, "string/function/table expected") | ||
49 | ERRDEF(NOPROXY, "boolean or proxy expected") | ||
50 | ERRDEF(FORINIT, LUA_QL("for") " initial value must be a number") | ||
51 | ERRDEF(FORLIM, LUA_QL("for") " limit must be a number") | ||
52 | ERRDEF(FORSTEP, LUA_QL("for") " step must be a number") | ||
53 | |||
54 | /* C API checks. */ | ||
55 | ERRDEF(NOENV, "no calling environment") | ||
56 | ERRDEF(CYIELD, "attempt to yield across C-call boundary") | ||
57 | ERRDEF(BADLU, "bad light userdata pointer") | ||
58 | ERRDEF(NOGCMM, "bad action while in __gc metamethod") | ||
59 | |||
60 | /* Standard library function errors. */ | ||
61 | ERRDEF(ASSERT, "assertion failed!") | ||
62 | ERRDEF(PROTMT, "cannot change a protected metatable") | ||
63 | ERRDEF(UNPACK, "too many results to unpack") | ||
64 | ERRDEF(RDRSTR, "reader function must return a string") | ||
65 | ERRDEF(PRTOSTR, LUA_QL("tostring") " must return a string to " LUA_QL("print")) | ||
66 | ERRDEF(IDXRNG, "index out of range") | ||
67 | ERRDEF(BASERNG, "base out of range") | ||
68 | ERRDEF(LVLRNG, "level out of range") | ||
69 | ERRDEF(INVLVL, "invalid level") | ||
70 | ERRDEF(INVOPT, "invalid option") | ||
71 | ERRDEF(INVOPTM, "invalid option " LUA_QS) | ||
72 | ERRDEF(INVFMT, "invalid format") | ||
73 | ERRDEF(SETFENV, LUA_QL("setfenv") " cannot change environment of given object") | ||
74 | ERRDEF(CORUN, "cannot resume running coroutine") | ||
75 | ERRDEF(CODEAD, "cannot resume dead coroutine") | ||
76 | ERRDEF(COSUSP, "cannot resume non-suspended coroutine") | ||
77 | ERRDEF(TABINS, "wrong number of arguments to " LUA_QL("insert")) | ||
78 | ERRDEF(TABCAT, "invalid value (%s) at index %d in table for " LUA_QL("concat")) | ||
79 | ERRDEF(TABSORT, "invalid order function for sorting") | ||
80 | ERRDEF(IOCLFL, "attempt to use a closed file") | ||
81 | ERRDEF(IOSTDCL, "standard file is closed") | ||
82 | ERRDEF(OSUNIQF, "unable to generate a unique filename") | ||
83 | ERRDEF(OSDATEF, "field " LUA_QS " missing in date table") | ||
84 | ERRDEF(STRDUMP, "cannot dump functions") | ||
85 | ERRDEF(STRSLC, "string slice too long") | ||
86 | ERRDEF(STRPATB, "missing " LUA_QL("[") " after " LUA_QL("%f") " in pattern") | ||
87 | ERRDEF(STRPATC, "invalid pattern capture") | ||
88 | ERRDEF(STRPATE, "malformed pattern (ends with " LUA_QL("%") ")") | ||
89 | ERRDEF(STRPATM, "malformed pattern (missing " LUA_QL("]") ")") | ||
90 | ERRDEF(STRPATU, "unbalanced pattern") | ||
91 | ERRDEF(STRCAPI, "invalid capture index") | ||
92 | ERRDEF(STRCAPN, "too many captures") | ||
93 | ERRDEF(STRCAPU, "unfinished capture") | ||
94 | ERRDEF(STRFMTO, "invalid option " LUA_QL("%%%c") " to " LUA_QL("format")) | ||
95 | ERRDEF(STRFMTR, "invalid format (repeated flags)") | ||
96 | ERRDEF(STRFMTW, "invalid format (width or precision too long)") | ||
97 | ERRDEF(STRGSRV, "invalid replacement value (a %s)") | ||
98 | ERRDEF(BADMODN, "name conflict for module " LUA_QS) | ||
99 | ERRDEF(NOJIT, "JIT compiler permanently disabled") | ||
100 | ERRDEF(JITOPT, "unknown or malformed optimization flag " LUA_QS) | ||
101 | |||
102 | /* Lexer/parser errors. */ | ||
103 | ERRDEF(XNEAR, "%s near " LUA_QS) | ||
104 | ERRDEF(XELEM, "lexical element too long") | ||
105 | ERRDEF(XLINES, "chunk has too many lines") | ||
106 | ERRDEF(XLEVELS, "chunk has too many syntax levels") | ||
107 | ERRDEF(XNUMBER, "malformed number") | ||
108 | ERRDEF(XLSTR, "unfinished long string") | ||
109 | ERRDEF(XLCOM, "unfinished long comment") | ||
110 | ERRDEF(XSTR, "unfinished string") | ||
111 | ERRDEF(XESC, "escape sequence too large") | ||
112 | ERRDEF(XLDELIM, "invalid long string delimiter") | ||
113 | ERRDEF(XBCLOAD, "cannot load Lua bytecode") | ||
114 | ERRDEF(XTOKEN, LUA_QS " expected") | ||
115 | ERRDEF(XJUMP, "control structure too long") | ||
116 | ERRDEF(XSLOTS, "function or expression too complex") | ||
117 | ERRDEF(XLIMM, "main function has more than %d %s") | ||
118 | ERRDEF(XLIMF, "function at line %d has more than %d %s") | ||
119 | ERRDEF(XMATCH, LUA_QS " expected (to close " LUA_QS " at line %d)") | ||
120 | ERRDEF(XFIXUP, "function too long for return fixup") | ||
121 | ERRDEF(XPARAM, "<name> or " LUA_QL("...") " expected") | ||
122 | ERRDEF(XAMBIG, "ambiguous syntax (function call x new statement)") | ||
123 | ERRDEF(XFUNARG, "function arguments expected") | ||
124 | ERRDEF(XSYMBOL, "unexpected symbol") | ||
125 | ERRDEF(XDOTS, "cannot use " LUA_QL("...") " outside a vararg function") | ||
126 | ERRDEF(XSYNTAX, "syntax error") | ||
127 | ERRDEF(XBREAK, "no loop to break") | ||
128 | ERRDEF(XFOR, LUA_QL("=") " or " LUA_QL("in") " expected") | ||
129 | |||
130 | #undef ERRDEF | ||
131 | |||
132 | /* Detecting unused error messages: | ||
133 | awk -F, '/^ERRDEF/ { gsub(/ERRDEF./, ""); printf "grep -q LJ_ERR_%s *.[ch] || echo %s\n", $1, $1}' lj_errmsg.h | sh | ||
134 | */ | ||
diff --git a/src/lj_ff.h b/src/lj_ff.h new file mode 100644 index 00000000..6dfd73a7 --- /dev/null +++ b/src/lj_ff.h | |||
@@ -0,0 +1,18 @@ | |||
1 | /* | ||
2 | ** Fast function IDs. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_FF_H | ||
7 | #define _LJ_FF_H | ||
8 | |||
9 | /* Fast function ID. */ | ||
10 | typedef enum { | ||
11 | FF_LUA_ = FF_LUA, /* Lua function (must be 0). */ | ||
12 | FF_C_ = FF_C, /* Regular C function (must be 1). */ | ||
13 | #define FFDEF(name) FF_##name, | ||
14 | #include "lj_ffdef.h" | ||
15 | FF__MAX | ||
16 | } FastFunc; | ||
17 | |||
18 | #endif | ||
diff --git a/src/lj_frame.h b/src/lj_frame.h new file mode 100644 index 00000000..1c03e3e1 --- /dev/null +++ b/src/lj_frame.h | |||
@@ -0,0 +1,84 @@ | |||
1 | /* | ||
2 | ** Stack frames. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_FRAME_H | ||
7 | #define _LJ_FRAME_H | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | #include "lj_bc.h" | ||
11 | |||
12 | /* -- Lua stack frame ----------------------------------------------------- */ | ||
13 | |||
14 | /* Frame type markers in callee function slot (callee base-1). */ | ||
15 | enum { | ||
16 | FRAME_LUA, FRAME_C, FRAME_CONT, FRAME_VARG, | ||
17 | FRAME_LUAP, FRAME_CP, FRAME_PCALL, FRAME_PCALLH | ||
18 | }; | ||
19 | #define FRAME_TYPE 3 | ||
20 | #define FRAME_P 4 | ||
21 | #define FRAME_TYPEP (FRAME_TYPE|FRAME_P) | ||
22 | |||
23 | /* Macros to access and modify Lua frames. */ | ||
24 | #define frame_gc(f) (gcref((f)->fr.func)) | ||
25 | #define frame_func(f) (&frame_gc(f)->fn) | ||
26 | #define frame_ftsz(f) ((f)->fr.tp.ftsz) | ||
27 | |||
28 | #define frame_type(f) (frame_ftsz(f) & FRAME_TYPE) | ||
29 | #define frame_typep(f) (frame_ftsz(f) & FRAME_TYPEP) | ||
30 | #define frame_islua(f) (frame_type(f) == FRAME_LUA) | ||
31 | #define frame_isc(f) (frame_type(f) == FRAME_C) | ||
32 | #define frame_iscont(f) (frame_typep(f) == FRAME_CONT) | ||
33 | #define frame_isvarg(f) (frame_typep(f) == FRAME_VARG) | ||
34 | #define frame_ispcall(f) ((frame_ftsz(f) & 6) == FRAME_PCALL) | ||
35 | |||
36 | #define frame_pc(f) (mref((f)->fr.tp.pcr, const BCIns)) | ||
37 | #define frame_contpc(f) (frame_pc((f)-1)) | ||
38 | #if LJ_64 | ||
39 | #define frame_contf(f) \ | ||
40 | ((ASMFunction)(void *)((intptr_t)lj_vm_asm_begin+(((f)-1)->u64 & 0xffffffff))) | ||
41 | #else | ||
42 | #define frame_contf(f) ((ASMFunction)gcrefp(((f)-1)->gcr, void)) | ||
43 | #endif | ||
44 | #define frame_delta(f) (frame_ftsz(f) >> 3) | ||
45 | #define frame_sized(f) (frame_ftsz(f) & ~FRAME_TYPEP) | ||
46 | |||
47 | #define frame_prevl(f) ((f) - (1+bc_a(frame_pc(f)[-1]))) | ||
48 | #define frame_prevd(f) ((TValue *)((char *)(f) - frame_sized(f))) | ||
49 | #define frame_prev(f) (frame_islua(f)?frame_prevl(f):frame_prevd(f)) | ||
50 | /* Note: this macro does not skip over FRAME_VARG. */ | ||
51 | |||
52 | #define setframe_pc(f, pc) (setmref((f)->fr.tp.pcr, (pc))) | ||
53 | #define setframe_gc(f, p) (setgcref((f)->fr.func, (p))) | ||
54 | |||
55 | /* -- C stack frame ------------------------------------------------------- */ | ||
56 | |||
57 | /* Macros to access and modify the C stack frame chain. */ | ||
58 | |||
59 | /* These definitions must match with the arch-specific *.dasc files. */ | ||
60 | #if LJ_TARGET_X86 | ||
61 | #define CFRAME_OFS_ERRF (15*sizeof(void *)) | ||
62 | #define CFRAME_OFS_NRES (14*sizeof(void *)) | ||
63 | #define CFRAME_OFS_PREV (13*sizeof(void *)) | ||
64 | #define CFRAME_OFS_L (12*sizeof(void *)) | ||
65 | #define CFRAME_OFS_PC (6*sizeof(void *)) | ||
66 | #define CFRAME_SIZE (12*sizeof(void *)) | ||
67 | #else | ||
68 | #error "Missing CFRAME_* definitions for this architecture" | ||
69 | #endif | ||
70 | |||
71 | #define CFRAME_RESUME 1 | ||
72 | #define CFRAME_CANYIELD ((intptr_t)(CFRAME_RESUME)) | ||
73 | #define CFRAME_RAWMASK (~CFRAME_CANYIELD) | ||
74 | |||
75 | #define cframe_errfunc(cf) (*(ptrdiff_t *)(((char *)cf)+CFRAME_OFS_ERRF)) | ||
76 | #define cframe_nres(cf) (*(ptrdiff_t *)(((char *)cf)+CFRAME_OFS_NRES)) | ||
77 | #define cframe_prev(cf) (*(void **)(((char *)cf)+CFRAME_OFS_PREV)) | ||
78 | #define cframe_L(cf) (*(lua_State **)(((char *)cf)+CFRAME_OFS_L)) | ||
79 | #define cframe_pc(cf) (*(const BCIns **)(((char *)cf)+CFRAME_OFS_PC)) | ||
80 | #define cframe_canyield(cf) ((intptr_t)(cf) & CFRAME_CANYIELD) | ||
81 | #define cframe_raw(cf) ((void *)((intptr_t)(cf) & CFRAME_RAWMASK)) | ||
82 | #define cframe_Lpc(L) cframe_pc(cframe_raw(L->cframe)) | ||
83 | |||
84 | #endif | ||
diff --git a/src/lj_func.c b/src/lj_func.c new file mode 100644 index 00000000..92cdeda2 --- /dev/null +++ b/src/lj_func.c | |||
@@ -0,0 +1,185 @@ | |||
1 | /* | ||
2 | ** Function handling (prototypes, functions and upvalues). | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** | ||
5 | ** Portions taken verbatim or adapted from the Lua interpreter. | ||
6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h | ||
7 | */ | ||
8 | |||
9 | #define lj_func_c | ||
10 | #define LUA_CORE | ||
11 | |||
12 | #include "lj_obj.h" | ||
13 | #include "lj_gc.h" | ||
14 | #include "lj_func.h" | ||
15 | #include "lj_trace.h" | ||
16 | #include "lj_vm.h" | ||
17 | |||
18 | /* -- Prototypes ---------------------------------------------------------- */ | ||
19 | |||
20 | GCproto *lj_func_newproto(lua_State *L) | ||
21 | { | ||
22 | GCproto *pt = lj_mem_newobj(L, GCproto); | ||
23 | pt->gct = ~LJ_TPROTO; | ||
24 | pt->numparams = 0; | ||
25 | pt->framesize = 0; | ||
26 | pt->sizeuv = 0; | ||
27 | pt->flags = 0; | ||
28 | pt->trace = 0; | ||
29 | pt->k.n = NULL; | ||
30 | pt->bc = NULL; | ||
31 | pt->uv = NULL; | ||
32 | pt->sizebc = 0; | ||
33 | pt->sizekgc = 0; | ||
34 | pt->sizekn = 0; | ||
35 | pt->sizelineinfo = 0; | ||
36 | pt->sizevarinfo = 0; | ||
37 | pt->sizeuvname = 0; | ||
38 | pt->linedefined = 0; | ||
39 | pt->lastlinedefined = 0; | ||
40 | pt->lineinfo = NULL; | ||
41 | pt->varinfo = NULL; | ||
42 | pt->uvname = NULL; | ||
43 | pt->chunkname = NULL; | ||
44 | return pt; | ||
45 | } | ||
46 | |||
47 | void LJ_FASTCALL lj_func_freeproto(global_State *g, GCproto *pt) | ||
48 | { | ||
49 | MSize nkgc = round_nkgc(pt->sizekgc); | ||
50 | MSize sizek = nkgc*(MSize)sizeof(GCobj *) + | ||
51 | pt->sizekn*(MSize)sizeof(lua_Number); | ||
52 | lj_mem_free(g, pt->k.gc - nkgc, sizek); | ||
53 | lj_mem_freevec(g, pt->bc, pt->sizebc, BCIns); | ||
54 | lj_mem_freevec(g, pt->uv, pt->sizeuv, int16_t); | ||
55 | lj_mem_freevec(g, pt->lineinfo, pt->sizelineinfo, int32_t); | ||
56 | lj_mem_freevec(g, pt->varinfo, pt->sizevarinfo, struct VarInfo); | ||
57 | lj_mem_freevec(g, pt->uvname, pt->sizeuvname, GCstr *); | ||
58 | lj_trace_freeproto(g, pt); | ||
59 | lj_mem_freet(g, pt); | ||
60 | } | ||
61 | |||
62 | /* -- Upvalues ------------------------------------------------------------ */ | ||
63 | |||
64 | static void unlinkuv(GCupval *uv) | ||
65 | { | ||
66 | lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv); | ||
67 | setgcrefr(uvnext(uv)->prev, uv->prev); | ||
68 | setgcrefr(uvprev(uv)->next, uv->next); | ||
69 | } | ||
70 | |||
71 | /* Find existing open upvalue for a stack slot or create a new one. */ | ||
72 | static GCupval *func_finduv(lua_State *L, TValue *slot) | ||
73 | { | ||
74 | global_State *g = G(L); | ||
75 | GCRef *pp = &L->openupval; | ||
76 | GCupval *p; | ||
77 | GCupval *uv; | ||
78 | /* Search the sorted list of open upvalues. */ | ||
79 | while (gcref(*pp) != NULL && (p = gco2uv(gcref(*pp)))->v >= slot) { | ||
80 | lua_assert(!p->closed && p->v != &p->tv); | ||
81 | if (p->v == slot) { /* Found open upvalue pointing to same slot? */ | ||
82 | if (isdead(g, obj2gco(p))) /* Resurrect it, if it's dead. */ | ||
83 | flipwhite(obj2gco(p)); | ||
84 | return p; | ||
85 | } | ||
86 | pp = &p->nextgc; | ||
87 | } | ||
88 | /* No matching upvalue found. Create a new one. */ | ||
89 | uv = lj_mem_newt(L, sizeof(GCupval), GCupval); | ||
90 | newwhite(g, uv); | ||
91 | uv->gct = ~LJ_TUPVAL; | ||
92 | uv->closed = 0; /* Still open. */ | ||
93 | uv->v = slot; /* Pointing to the stack slot. */ | ||
94 | /* NOBARRIER: The GCupval is new (marked white) and open. */ | ||
95 | setgcrefr(uv->nextgc, *pp); /* Insert into sorted list of open upvalues. */ | ||
96 | setgcref(*pp, obj2gco(uv)); | ||
97 | setgcref(uv->prev, obj2gco(&g->uvhead)); /* Insert into GC list, too. */ | ||
98 | setgcrefr(uv->next, g->uvhead.next); | ||
99 | setgcref(uvnext(uv)->prev, obj2gco(uv)); | ||
100 | setgcref(g->uvhead.next, obj2gco(uv)); | ||
101 | lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv); | ||
102 | return uv; | ||
103 | } | ||
104 | |||
105 | /* Close all open upvalues pointing to some stack level or above. */ | ||
106 | void lj_func_closeuv(lua_State *L, TValue *level) | ||
107 | { | ||
108 | GCupval *uv; | ||
109 | global_State *g = G(L); | ||
110 | while (gcref(L->openupval) != NULL && | ||
111 | (uv = gco2uv(gcref(L->openupval)))->v >= level) { | ||
112 | GCobj *o = obj2gco(uv); | ||
113 | lua_assert(!isblack(o) && !uv->closed && uv->v != &uv->tv); | ||
114 | setgcrefr(L->openupval, uv->nextgc); /* No longer in open list. */ | ||
115 | if (isdead(g, o)) { | ||
116 | lj_func_freeuv(g, uv); | ||
117 | } else { | ||
118 | unlinkuv(uv); | ||
119 | lj_gc_closeuv(g, uv); | ||
120 | } | ||
121 | } | ||
122 | } | ||
123 | |||
124 | void LJ_FASTCALL lj_func_freeuv(global_State *g, GCupval *uv) | ||
125 | { | ||
126 | if (!uv->closed) | ||
127 | unlinkuv(uv); | ||
128 | lj_mem_freet(g, uv); | ||
129 | } | ||
130 | |||
131 | /* -- Functions (closures) ------------------------------------------------ */ | ||
132 | |||
133 | GCfunc *lj_func_newC(lua_State *L, MSize nelems, GCtab *env) | ||
134 | { | ||
135 | GCfunc *fn = cast(GCfunc *, lj_mem_newgco(L, sizeCfunc(nelems))); | ||
136 | fn->c.gct = ~LJ_TFUNC; | ||
137 | fn->c.ffid = FF_C; | ||
138 | fn->c.nupvalues = cast_byte(nelems); | ||
139 | /* NOBARRIER: The GCfunc is new (marked white). */ | ||
140 | setgcref(fn->c.env, obj2gco(env)); | ||
141 | fn->c.gate = lj_gate_c; | ||
142 | return fn; | ||
143 | } | ||
144 | |||
145 | GCfunc *lj_func_newL(lua_State *L, GCproto *pt, GCtab *env) | ||
146 | { | ||
147 | GCfunc *fn = cast(GCfunc *, lj_mem_newgco(L, sizeLfunc((MSize)pt->sizeuv))); | ||
148 | fn->l.gct = ~LJ_TFUNC; | ||
149 | fn->l.ffid = FF_LUA; | ||
150 | fn->l.nupvalues = cast_byte(pt->sizeuv); | ||
151 | /* NOBARRIER: The GCfunc is new (marked white). */ | ||
152 | setgcref(fn->l.pt, obj2gco(pt)); | ||
153 | setgcref(fn->l.env, obj2gco(env)); | ||
154 | fn->l.gate = (pt->flags & PROTO_IS_VARARG) ? lj_gate_lv : lj_gate_lf; | ||
155 | return fn; | ||
156 | } | ||
157 | |||
158 | /* Do a GC check and create a new Lua function with inherited upvalues. */ | ||
159 | GCfunc *lj_func_newL_gc(lua_State *L, GCproto *pt, GCfuncL *parent) | ||
160 | { | ||
161 | GCfunc *fn; | ||
162 | GCRef *puv; | ||
163 | uint32_t i, nuv; | ||
164 | TValue *base; | ||
165 | lj_gc_check_fixtop(L); | ||
166 | fn = lj_func_newL(L, pt, tabref(parent->env)); | ||
167 | /* NOBARRIER: The GCfunc is new (marked white). */ | ||
168 | puv = parent->uvptr; | ||
169 | nuv = fn->l.nupvalues; | ||
170 | base = L->base; | ||
171 | for (i = 0; i < nuv; i++) { | ||
172 | int v = pt->uv[i]; | ||
173 | GCupval *uv = v < 0 ? &gcref(puv[~v])->uv : func_finduv(L, base + v); | ||
174 | setgcref(fn->l.uvptr[i], obj2gco(uv)); | ||
175 | } | ||
176 | return fn; | ||
177 | } | ||
178 | |||
179 | void LJ_FASTCALL lj_func_free(global_State *g, GCfunc *fn) | ||
180 | { | ||
181 | MSize size = isluafunc(fn) ? sizeLfunc((MSize)fn->l.nupvalues) : | ||
182 | sizeCfunc((MSize)fn->c.nupvalues); | ||
183 | lj_mem_free(g, fn, size); | ||
184 | } | ||
185 | |||
diff --git a/src/lj_func.h b/src/lj_func.h new file mode 100644 index 00000000..ee7942ea --- /dev/null +++ b/src/lj_func.h | |||
@@ -0,0 +1,25 @@ | |||
1 | /* | ||
2 | ** Function handling (prototypes, functions and upvalues). | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_FUNC_H | ||
7 | #define _LJ_FUNC_H | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | |||
11 | /* Prototypes. */ | ||
12 | LJ_FUNC GCproto *lj_func_newproto(lua_State *L); | ||
13 | LJ_FUNC void LJ_FASTCALL lj_func_freeproto(global_State *g, GCproto *pt); | ||
14 | |||
15 | /* Upvalues. */ | ||
16 | LJ_FUNCA void lj_func_closeuv(lua_State *L, TValue *level); | ||
17 | LJ_FUNC void LJ_FASTCALL lj_func_freeuv(global_State *g, GCupval *uv); | ||
18 | |||
19 | /* Functions (closures). */ | ||
20 | LJ_FUNC GCfunc *lj_func_newC(lua_State *L, MSize nelems, GCtab *env); | ||
21 | LJ_FUNC GCfunc *lj_func_newL(lua_State *L, GCproto *pt, GCtab *env); | ||
22 | LJ_FUNCA GCfunc *lj_func_newL_gc(lua_State *L, GCproto *pt, GCfuncL *parent); | ||
23 | LJ_FUNC void LJ_FASTCALL lj_func_free(global_State *g, GCfunc *c); | ||
24 | |||
25 | #endif | ||
diff --git a/src/lj_gc.c b/src/lj_gc.c new file mode 100644 index 00000000..e479b567 --- /dev/null +++ b/src/lj_gc.c | |||
@@ -0,0 +1,800 @@ | |||
1 | /* | ||
2 | ** Garbage collector. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** | ||
5 | ** Major portions taken verbatim or adapted from the Lua interpreter. | ||
6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h | ||
7 | */ | ||
8 | |||
9 | #define lj_gc_c | ||
10 | #define LUA_CORE | ||
11 | |||
12 | #include "lj_obj.h" | ||
13 | #include "lj_gc.h" | ||
14 | #include "lj_err.h" | ||
15 | #include "lj_str.h" | ||
16 | #include "lj_tab.h" | ||
17 | #include "lj_func.h" | ||
18 | #include "lj_udata.h" | ||
19 | #include "lj_meta.h" | ||
20 | #include "lj_state.h" | ||
21 | #include "lj_frame.h" | ||
22 | #include "lj_trace.h" | ||
23 | #include "lj_vm.h" | ||
24 | |||
25 | #define GCSTEPSIZE 1024u | ||
26 | #define GCSWEEPMAX 40 | ||
27 | #define GCSWEEPCOST 10 | ||
28 | #define GCFINALIZECOST 100 | ||
29 | |||
30 | /* Macros to set GCobj colors and flags. */ | ||
31 | #define white2gray(x) ((x)->gch.marked &= cast_byte(~LJ_GC_WHITES)) | ||
32 | #define black2gray(x) ((x)->gch.marked &= cast_byte(~LJ_GC_BLACK)) | ||
33 | #define gray2black(x) ((x)->gch.marked |= LJ_GC_BLACK) | ||
34 | #define makewhite(g, x) \ | ||
35 | ((x)->gch.marked = ((x)->gch.marked & cast_byte(~LJ_GC_COLORS)) | curwhite(g)) | ||
36 | #define isfinalized(u) ((u)->marked & LJ_GC_FINALIZED) | ||
37 | #define markfinalized(u) ((u)->marked |= LJ_GC_FINALIZED) | ||
38 | |||
39 | /* -- Mark phase ---------------------------------------------------------- */ | ||
40 | |||
41 | /* Mark a TValue (if needed). */ | ||
42 | #define gc_marktv(g, tv) \ | ||
43 | { lua_assert(!tvisgcv(tv) || (~itype(tv) == gcval(tv)->gch.gct)); \ | ||
44 | if (tviswhite(tv)) gc_mark(g, gcV(tv)); } | ||
45 | |||
46 | /* Mark a GCobj (if needed). */ | ||
47 | #define gc_markobj(g, o) \ | ||
48 | { if (iswhite(obj2gco(o))) gc_mark(g, obj2gco(o)); } | ||
49 | |||
50 | /* Mark a string object. */ | ||
51 | #define gc_mark_str(s) ((s)->marked &= cast_byte(~LJ_GC_WHITES)) | ||
52 | |||
53 | /* Mark a white GCobj. */ | ||
54 | static void gc_mark(global_State *g, GCobj *o) | ||
55 | { | ||
56 | lua_assert(iswhite(o) && !isdead(g, o)); | ||
57 | white2gray(o); | ||
58 | if (LJ_UNLIKELY(o->gch.gct == ~LJ_TUDATA)) { | ||
59 | GCtab *mt = tabref(gco2ud(o)->metatable); | ||
60 | gray2black(o); /* Userdata are never gray. */ | ||
61 | if (mt) gc_markobj(g, mt); | ||
62 | gc_markobj(g, tabref(gco2ud(o)->env)); | ||
63 | } else if (LJ_UNLIKELY(o->gch.gct == ~LJ_TUPVAL)) { | ||
64 | GCupval *uv = gco2uv(o); | ||
65 | gc_marktv(g, uv->v); | ||
66 | if (uv->closed) | ||
67 | gray2black(o); /* Closed upvalues are never gray. */ | ||
68 | } else if (o->gch.gct != ~LJ_TSTR) { | ||
69 | lua_assert(o->gch.gct == ~LJ_TFUNC || o->gch.gct == ~LJ_TTAB || | ||
70 | o->gch.gct == ~LJ_TTHREAD || o->gch.gct == ~LJ_TPROTO); | ||
71 | setgcrefr(o->gch.gclist, g->gc.gray); | ||
72 | setgcref(g->gc.gray, o); | ||
73 | } | ||
74 | } | ||
75 | |||
76 | /* Mark the base metatables. */ | ||
77 | static void gc_mark_basemt(global_State *g) | ||
78 | { | ||
79 | int i; | ||
80 | for (i = 0; i < BASEMT_MAX; i++) | ||
81 | if (tabref(g->basemt[i]) != NULL) | ||
82 | gc_markobj(g, tabref(g->basemt[i])); | ||
83 | } | ||
84 | |||
85 | /* Start a GC cycle and mark the root set. */ | ||
86 | static void gc_mark_start(global_State *g) | ||
87 | { | ||
88 | setgcrefnull(g->gc.gray); | ||
89 | setgcrefnull(g->gc.grayagain); | ||
90 | setgcrefnull(g->gc.weak); | ||
91 | gc_markobj(g, mainthread(g)); | ||
92 | gc_markobj(g, tabref(mainthread(g)->env)); | ||
93 | gc_marktv(g, &g->registrytv); | ||
94 | gc_mark_basemt(g); | ||
95 | g->gc.state = GCSpropagate; | ||
96 | } | ||
97 | |||
98 | /* Mark open upvalues. */ | ||
99 | static void gc_mark_uv(global_State *g) | ||
100 | { | ||
101 | GCupval *uv; | ||
102 | for (uv = uvnext(&g->uvhead); uv != &g->uvhead; uv = uvnext(uv)) { | ||
103 | lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv); | ||
104 | if (isgray(obj2gco(uv))) | ||
105 | gc_marktv(g, uv->v); | ||
106 | } | ||
107 | } | ||
108 | |||
109 | /* Mark userdata in mmudata list. */ | ||
110 | static void gc_mark_mmudata(global_State *g) | ||
111 | { | ||
112 | GCobj *root = gcref(g->gc.mmudata); | ||
113 | GCobj *u = root; | ||
114 | if (u) { | ||
115 | do { | ||
116 | u = gcnext(u); | ||
117 | makewhite(g, u); /* Could be from previous GC. */ | ||
118 | gc_mark(g, u); | ||
119 | } while (u != root); | ||
120 | } | ||
121 | } | ||
122 | |||
123 | /* Separate userdata which which needs finalization to mmudata list. */ | ||
124 | size_t lj_gc_separateudata(global_State *g, int all) | ||
125 | { | ||
126 | size_t m = 0; | ||
127 | GCRef *p = &mainthread(g)->nextgc; | ||
128 | GCobj *o; | ||
129 | while ((o = gcref(*p)) != NULL) { | ||
130 | if (!(iswhite(o) || all) || isfinalized(gco2ud(o))) { | ||
131 | p = &o->gch.nextgc; /* Nothing to do. */ | ||
132 | } else if (!lj_meta_fastg(g, tabref(gco2ud(o)->metatable), MM_gc)) { | ||
133 | markfinalized(gco2ud(o)); /* Done, as there's no __gc metamethod. */ | ||
134 | p = &o->gch.nextgc; | ||
135 | } else { /* Otherwise move userdata to be finalized to mmudata list. */ | ||
136 | m += sizeudata(gco2ud(o)); | ||
137 | markfinalized(gco2ud(o)); | ||
138 | *p = o->gch.nextgc; | ||
139 | if (gcref(g->gc.mmudata)) { /* Link to end of mmudata list. */ | ||
140 | GCobj *root = gcref(g->gc.mmudata); | ||
141 | setgcrefr(o->gch.nextgc, root->gch.nextgc); | ||
142 | setgcref(root->gch.nextgc, o); | ||
143 | setgcref(g->gc.mmudata, o); | ||
144 | } else { /* Create circular list. */ | ||
145 | setgcref(o->gch.nextgc, o); | ||
146 | setgcref(g->gc.mmudata, o); | ||
147 | } | ||
148 | } | ||
149 | } | ||
150 | return m; | ||
151 | } | ||
152 | |||
153 | /* -- Propagation phase --------------------------------------------------- */ | ||
154 | |||
155 | /* Traverse a table. */ | ||
156 | static int gc_traverse_tab(global_State *g, GCtab *t) | ||
157 | { | ||
158 | int weak = 0; | ||
159 | cTValue *mode; | ||
160 | GCtab *mt = tabref(t->metatable); | ||
161 | if (mt) | ||
162 | gc_markobj(g, mt); | ||
163 | mode = lj_meta_fastg(g, mt, MM_mode); | ||
164 | if (mode && tvisstr(mode)) { /* Valid __mode field? */ | ||
165 | const char *modestr = strVdata(mode); | ||
166 | int c; | ||
167 | while ((c = *modestr++)) { | ||
168 | if (c == 'k') weak |= LJ_GC_WEAKKEY; | ||
169 | else if (c == 'v') weak |= LJ_GC_WEAKVAL; | ||
170 | } | ||
171 | if (weak) { /* Weak tables are cleared in the atomic phase. */ | ||
172 | t->marked = cast_byte((t->marked & ~LJ_GC_WEAK) | weak); | ||
173 | setgcrefr(t->gclist, g->gc.weak); | ||
174 | setgcref(g->gc.weak, obj2gco(t)); | ||
175 | } | ||
176 | } | ||
177 | if (weak == LJ_GC_WEAK) /* Nothing to mark if both keys/values are weak. */ | ||
178 | return 1; | ||
179 | if (!(weak & LJ_GC_WEAKVAL)) { /* Mark array part. */ | ||
180 | MSize i, asize = t->asize; | ||
181 | for (i = 0; i < asize; i++) | ||
182 | gc_marktv(g, arrayslot(t, i)); | ||
183 | } | ||
184 | if (t->hmask > 0) { /* Mark hash part. */ | ||
185 | Node *node = noderef(t->node); | ||
186 | MSize i, hmask = t->hmask; | ||
187 | for (i = 0; i <= hmask; i++) { | ||
188 | Node *n = &node[i]; | ||
189 | lua_assert(itype(&n->key) != LJ_TDEADKEY || tvisnil(&n->val)); | ||
190 | if (!tvisnil(&n->val)) { /* Mark non-empty slot. */ | ||
191 | lua_assert(!tvisnil(&n->key)); | ||
192 | if (!(weak & LJ_GC_WEAKKEY)) gc_marktv(g, &n->key); | ||
193 | if (!(weak & LJ_GC_WEAKVAL)) gc_marktv(g, &n->val); | ||
194 | } else if (tvisgcv(&n->key)) { /* Leave GC key in, but mark as dead. */ | ||
195 | setitype(&n->key, LJ_TDEADKEY); | ||
196 | } | ||
197 | } | ||
198 | } | ||
199 | return weak; | ||
200 | } | ||
201 | |||
202 | /* Traverse a function. */ | ||
203 | static void gc_traverse_func(global_State *g, GCfunc *fn) | ||
204 | { | ||
205 | gc_markobj(g, tabref(fn->c.env)); | ||
206 | if (isluafunc(fn)) { | ||
207 | uint32_t i; | ||
208 | lua_assert(fn->l.nupvalues == funcproto(fn)->sizeuv); | ||
209 | gc_markobj(g, funcproto(fn)); | ||
210 | for (i = 0; i < fn->l.nupvalues; i++) /* Mark Lua function upvalues. */ | ||
211 | gc_markobj(g, &gcref(fn->l.uvptr[i])->uv); | ||
212 | } else { | ||
213 | uint32_t i; | ||
214 | for (i = 0; i < fn->c.nupvalues; i++) /* Mark C function upvalues. */ | ||
215 | gc_marktv(g, &fn->c.upvalue[i]); | ||
216 | } | ||
217 | } | ||
218 | |||
219 | #if LJ_HASJIT | ||
220 | /* Traverse a trace. */ | ||
221 | static void gc_traverse_trace(global_State *g, Trace *T) | ||
222 | { | ||
223 | IRRef ref; | ||
224 | for (ref = T->nk; ref < REF_TRUE; ref++) { | ||
225 | IRIns *ir = &T->ir[ref]; | ||
226 | if (ir->o == IR_KGC) | ||
227 | gc_markobj(g, ir_kgc(ir)); | ||
228 | } | ||
229 | } | ||
230 | |||
231 | /* The current trace is a GC root while not anchored in the prototype (yet). */ | ||
232 | #define gc_mark_curtrace(g) \ | ||
233 | { if (G2J(g)->state != LJ_TRACE_IDLE && G2J(g)->curtrace != 0) \ | ||
234 | gc_traverse_trace(g, &G2J(g)->cur); } | ||
235 | #else | ||
236 | #define gc_mark_curtrace(g) UNUSED(g) | ||
237 | #endif | ||
238 | |||
239 | /* Traverse a prototype. */ | ||
240 | static void gc_traverse_proto(global_State *g, GCproto *pt) | ||
241 | { | ||
242 | ptrdiff_t i; | ||
243 | #if LJ_HASJIT | ||
244 | jit_State *J = G2J(g); | ||
245 | TraceNo root, side; | ||
246 | /* Mark all root traces and attached side traces. */ | ||
247 | for (root = pt->trace; root != 0; root = J->trace[root]->nextroot) { | ||
248 | for (side = J->trace[root]->nextside; side != 0; | ||
249 | side = J->trace[side]->nextside) | ||
250 | gc_traverse_trace(g, J->trace[side]); | ||
251 | gc_traverse_trace(g, J->trace[root]); | ||
252 | } | ||
253 | #endif | ||
254 | /* GC during prototype creation could cause NULL fields. */ | ||
255 | if (pt->chunkname) | ||
256 | gc_mark_str(pt->chunkname); | ||
257 | for (i = -(ptrdiff_t)pt->sizekgc; i < 0; i++) /* Mark collectable consts. */ | ||
258 | gc_markobj(g, gcref(pt->k.gc[i])); | ||
259 | for (i = 0; i < (ptrdiff_t)pt->sizeuvname; i++) /* Mark upvalue names. */ | ||
260 | if (pt->uvname[i]) | ||
261 | gc_mark_str(pt->uvname[i]); | ||
262 | for (i = 0; i < (ptrdiff_t)pt->sizevarinfo; i++) /* Mark names of locals. */ | ||
263 | if (pt->varinfo[i].name) | ||
264 | gc_mark_str(pt->varinfo[i].name); | ||
265 | } | ||
266 | |||
267 | /* Traverse the frame structure of a stack. */ | ||
268 | static TValue *gc_traverse_frames(global_State *g, lua_State *th) | ||
269 | { | ||
270 | TValue *frame, *top = th->top-1; | ||
271 | /* Note: extra vararg frame not skipped, marks function twice (harmless). */ | ||
272 | for (frame = th->base-1; frame > th->stack; frame = frame_prev(frame)) { | ||
273 | GCfunc *fn = frame_func(frame); | ||
274 | TValue *ftop = frame; | ||
275 | if (isluafunc(fn)) ftop += funcproto(fn)->framesize; | ||
276 | if (ftop > top) top = ftop; | ||
277 | gc_markobj(g, frame_gc(frame)); /* Need to mark hidden function (or L). */ | ||
278 | } | ||
279 | top++; /* Correct bias of -1 (frame == base-1). */ | ||
280 | if (top > th->maxstack) top = th->maxstack; | ||
281 | return top; | ||
282 | } | ||
283 | |||
284 | /* Traverse a thread object. */ | ||
285 | static void gc_traverse_thread(global_State *g, lua_State *th) | ||
286 | { | ||
287 | TValue *o, *lim; | ||
288 | gc_markobj(g, tabref(th->env)); | ||
289 | for (o = th->stack+1; o < th->top; o++) | ||
290 | gc_marktv(g, o); | ||
291 | lim = gc_traverse_frames(g, th); | ||
292 | /* Extra cleanup required to avoid this marking problem: | ||
293 | ** | ||
294 | ** [aa[bb.X| X created. | ||
295 | ** [aa[cc| GC called from (small) inner frame, X destroyed. | ||
296 | ** [aa....X.| GC called again in (larger) outer frame, X resurrected (ouch). | ||
297 | ** | ||
298 | ** During GC in step 2 the stack must be cleaned up to the max. frame extent: | ||
299 | ** | ||
300 | ** ***| Slots cleaned | ||
301 | ** [cc| from top of last frame | ||
302 | ** [aa......| to max. frame extent. | ||
303 | */ | ||
304 | for (; o <= lim; o++) | ||
305 | setnilV(o); | ||
306 | lj_state_shrinkstack(th, (MSize)(lim - th->stack)); | ||
307 | } | ||
308 | |||
309 | /* Propagate one gray object. Traverse it and turn it black. */ | ||
310 | static size_t propagatemark(global_State *g) | ||
311 | { | ||
312 | GCobj *o = gcref(g->gc.gray); | ||
313 | lua_assert(isgray(o)); | ||
314 | gray2black(o); | ||
315 | setgcrefr(g->gc.gray, o->gch.gclist); /* Remove from gray list. */ | ||
316 | if (LJ_LIKELY(o->gch.gct == ~LJ_TTAB)) { | ||
317 | GCtab *t = gco2tab(o); | ||
318 | if (gc_traverse_tab(g, t)) | ||
319 | black2gray(o); /* Keep weak tables gray. */ | ||
320 | return sizeof(GCtab) + sizeof(TValue) * t->asize + | ||
321 | sizeof(Node) * (t->hmask + 1); | ||
322 | } else if (LJ_LIKELY(o->gch.gct == ~LJ_TFUNC)) { | ||
323 | GCfunc *fn = gco2func(o); | ||
324 | gc_traverse_func(g, fn); | ||
325 | return isluafunc(fn) ? sizeLfunc((MSize)fn->l.nupvalues) : | ||
326 | sizeCfunc((MSize)fn->c.nupvalues); | ||
327 | } else if (LJ_LIKELY(o->gch.gct == ~LJ_TPROTO)) { | ||
328 | GCproto *pt = gco2pt(o); | ||
329 | gc_traverse_proto(g, pt); | ||
330 | return sizeof(GCproto) + sizeof(BCIns) * pt->sizebc + | ||
331 | sizeof(GCobj *) * pt->sizekgc + | ||
332 | sizeof(lua_Number) * pt->sizekn + | ||
333 | sizeof(int16_t) * pt->sizeuv + | ||
334 | sizeof(int32_t) * pt->sizelineinfo + | ||
335 | sizeof(VarInfo) * pt->sizevarinfo + | ||
336 | sizeof(GCstr *) * pt->sizeuvname; | ||
337 | } else { | ||
338 | lua_State *th = gco2th(o); | ||
339 | setgcrefr(th->gclist, g->gc.grayagain); | ||
340 | setgcref(g->gc.grayagain, o); | ||
341 | black2gray(o); /* Threads are never black. */ | ||
342 | gc_traverse_thread(g, th); | ||
343 | return sizeof(lua_State) + sizeof(TValue) * th->stacksize; | ||
344 | } | ||
345 | } | ||
346 | |||
347 | /* Propagate all gray objects. */ | ||
348 | static size_t gc_propagate_gray(global_State *g) | ||
349 | { | ||
350 | size_t m = 0; | ||
351 | while (gcref(g->gc.gray) != NULL) | ||
352 | m += propagatemark(g); | ||
353 | return m; | ||
354 | } | ||
355 | |||
356 | /* -- Sweep phase --------------------------------------------------------- */ | ||
357 | |||
358 | /* Try to shrink some common data structures. */ | ||
359 | static void gc_shrink(global_State *g, lua_State *L) | ||
360 | { | ||
361 | if (g->strnum <= (g->strmask >> 2) && g->strmask > LJ_MIN_STRTAB*2-1) | ||
362 | lj_str_resize(L, g->strmask >> 1); /* Shrink string table. */ | ||
363 | if (g->tmpbuf.sz > LJ_MIN_SBUF*2) | ||
364 | lj_str_resizebuf(L, &g->tmpbuf, g->tmpbuf.sz >> 1); /* Shrink temp buf. */ | ||
365 | } | ||
366 | |||
367 | /* Type of GC free functions. */ | ||
368 | typedef void (LJ_FASTCALL *GCFreeFunc)(global_State *g, GCobj *o); | ||
369 | |||
370 | /* GC free functions for LJ_TSTR .. LJ_TUDATA. ORDER LJ_T */ | ||
371 | static const GCFreeFunc gc_freefunc[] = { | ||
372 | (GCFreeFunc)lj_str_free, | ||
373 | (GCFreeFunc)lj_func_freeuv, | ||
374 | (GCFreeFunc)lj_state_free, | ||
375 | (GCFreeFunc)lj_func_freeproto, | ||
376 | (GCFreeFunc)lj_func_free, | ||
377 | (GCFreeFunc)0, | ||
378 | (GCFreeFunc)lj_tab_free, | ||
379 | (GCFreeFunc)lj_udata_free | ||
380 | }; | ||
381 | |||
382 | /* Full sweep of a GC list. */ | ||
383 | #define gc_fullsweep(g, p) gc_sweep(g, (p), LJ_MAX_MEM) | ||
384 | |||
385 | /* Partial sweep of a GC list. */ | ||
386 | static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim) | ||
387 | { | ||
388 | /* Mask with other white and LJ_GC_FIXED. Or LJ_GC_SFIXED on shutdown. */ | ||
389 | int ow = otherwhite(g); | ||
390 | GCobj *o; | ||
391 | while ((o = gcref(*p)) != NULL && lim-- > 0) { | ||
392 | if (o->gch.gct == ~LJ_TTHREAD) /* Need to sweep open upvalues, too. */ | ||
393 | gc_fullsweep(g, &gco2th(o)->openupval); | ||
394 | if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* Black or current white? */ | ||
395 | lua_assert(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED)); | ||
396 | makewhite(g, o); /* Value is alive, change to the current white. */ | ||
397 | p = &o->gch.nextgc; | ||
398 | } else { /* Otherwise value is dead, free it. */ | ||
399 | lua_assert(isdead(g, o) || ow == LJ_GC_SFIXED); | ||
400 | setgcrefr(*p, o->gch.nextgc); | ||
401 | if (o == gcref(g->gc.root)) | ||
402 | setgcrefr(g->gc.root, o->gch.nextgc); /* Adjust list anchor. */ | ||
403 | gc_freefunc[o->gch.gct - ~LJ_TSTR](g, o); | ||
404 | } | ||
405 | } | ||
406 | return p; | ||
407 | } | ||
408 | |||
409 | /* Check whether we can clear a key or a value slot from a table. */ | ||
410 | static int gc_mayclear(cTValue *o, int val) | ||
411 | { | ||
412 | if (tvisgcv(o)) { /* Only collectable objects can be weak references. */ | ||
413 | if (tvisstr(o)) { /* But strings cannot be used as weak references. */ | ||
414 | gc_mark_str(strV(o)); /* And need to be marked. */ | ||
415 | return 0; | ||
416 | } | ||
417 | if (iswhite(gcV(o))) | ||
418 | return 1; /* Object is about to be collected. */ | ||
419 | if (tvisudata(o) && val && isfinalized(udataV(o))) | ||
420 | return 1; /* Finalized userdata is dropped only from values. */ | ||
421 | } | ||
422 | return 0; /* Cannot clear. */ | ||
423 | } | ||
424 | |||
425 | /* Clear collected entries from weak tables. */ | ||
426 | static void gc_clearweak(GCobj *o) | ||
427 | { | ||
428 | while (o) { | ||
429 | GCtab *t = gco2tab(o); | ||
430 | lua_assert((t->marked & LJ_GC_WEAK)); | ||
431 | if ((t->marked & LJ_GC_WEAKVAL)) { | ||
432 | MSize i, asize = t->asize; | ||
433 | for (i = 0; i < asize; i++) { | ||
434 | /* Clear array slot when value is about to be collected. */ | ||
435 | TValue *tv = arrayslot(t, i); | ||
436 | if (gc_mayclear(tv, 1)) | ||
437 | setnilV(tv); | ||
438 | } | ||
439 | } | ||
440 | if (t->hmask > 0) { | ||
441 | Node *node = noderef(t->node); | ||
442 | MSize i, hmask = t->hmask; | ||
443 | for (i = 0; i <= hmask; i++) { | ||
444 | Node *n = &node[i]; | ||
445 | /* Clear hash slot when key or value is about to be collected. */ | ||
446 | if (!tvisnil(&n->val) && (gc_mayclear(&n->key, 0) || | ||
447 | gc_mayclear(&n->val, 1))) { | ||
448 | setnilV(&n->val); | ||
449 | if (tvisgcv(&n->key)) /* Leave GC key in, but mark as dead. */ | ||
450 | setitype(&n->key, LJ_TDEADKEY); | ||
451 | } | ||
452 | } | ||
453 | } | ||
454 | o = gcref(t->gclist); | ||
455 | } | ||
456 | } | ||
457 | |||
458 | /* Finalize one userdata object from mmudata list. */ | ||
459 | static void gc_finalize(lua_State *L) | ||
460 | { | ||
461 | global_State *g = G(L); | ||
462 | GCobj *o = gcnext(gcref(g->gc.mmudata)); | ||
463 | GCudata *ud = gco2ud(o); | ||
464 | cTValue *mo; | ||
465 | /* Unchain from list of userdata to be finalized. */ | ||
466 | if (o == gcref(g->gc.mmudata)) | ||
467 | setgcrefnull(g->gc.mmudata); | ||
468 | else | ||
469 | setgcrefr(gcref(g->gc.mmudata)->gch.nextgc, ud->nextgc); | ||
470 | /* Add it back to the main userdata list and make it white. */ | ||
471 | setgcrefr(ud->nextgc, mainthread(g)->nextgc); | ||
472 | setgcref(mainthread(g)->nextgc, o); | ||
473 | makewhite(g, o); | ||
474 | /* Resolve the __gc metamethod. */ | ||
475 | mo = lj_meta_fastg(g, tabref(ud->metatable), MM_gc); | ||
476 | if (mo) { | ||
477 | /* Save and restore lots of state around the __gc callback. */ | ||
478 | uint8_t oldh = hook_save(g); | ||
479 | MSize oldt = g->gc.threshold; | ||
480 | GCobj *oldjl = gcref(g->jit_L); | ||
481 | MSize oldjs = 0; | ||
482 | ptrdiff_t oldjb = 0; | ||
483 | int errcode; | ||
484 | TValue *top; | ||
485 | if (oldjl) { | ||
486 | oldjs = gco2th(oldjl)->stacksize; | ||
487 | oldjb = savestack(gco2th(oldjl), mref(g->jit_base, TValue )); | ||
488 | setgcrefnull(g->jit_L); | ||
489 | } | ||
490 | lj_trace_abort(g); | ||
491 | top = L->top; | ||
492 | L->top = top+2; | ||
493 | hook_entergc(g); /* Disable hooks and new traces during __gc. */ | ||
494 | g->gc.threshold = LJ_MAX_MEM; /* Prevent GC steps. */ | ||
495 | copyTV(L, top, mo); | ||
496 | setudataV(L, top+1, ud); | ||
497 | errcode = lj_vm_pcall(L, top+1, 1+0, -1); /* Stack: |mo|ud| -> | */ | ||
498 | hook_restore(g, oldh); | ||
499 | g->gc.threshold = oldt; /* Restore GC threshold. */ | ||
500 | if (oldjl) { | ||
501 | if (gco2th(oldjl)->stacksize < oldjs) | ||
502 | lj_state_growstack(gco2th(oldjl), oldjs - gco2th(oldjl)->stacksize); | ||
503 | setgcref(g->jit_L, oldjl); | ||
504 | setmref(g->jit_base, restorestack(gco2th(oldjl), oldjb)); | ||
505 | } | ||
506 | if (errcode) | ||
507 | lj_err_throw(L, errcode); /* Propagate errors. */ | ||
508 | } | ||
509 | } | ||
510 | |||
511 | /* Finalize all userdata objects from mmudata list. */ | ||
512 | void lj_gc_finalizeudata(lua_State *L) | ||
513 | { | ||
514 | while (gcref(G(L)->gc.mmudata) != NULL) | ||
515 | gc_finalize(L); | ||
516 | } | ||
517 | |||
518 | /* Free all remaining GC objects. */ | ||
519 | void lj_gc_freeall(global_State *g) | ||
520 | { | ||
521 | MSize i, strmask; | ||
522 | /* Free everything, except super-fixed objects (the main thread). */ | ||
523 | g->gc.currentwhite = LJ_GC_WHITES | LJ_GC_SFIXED; | ||
524 | gc_fullsweep(g, &g->gc.root); | ||
525 | strmask = g->strmask; | ||
526 | for (i = 0; i <= strmask; i++) /* Free all string hash chains. */ | ||
527 | gc_fullsweep(g, &g->strhash[i]); | ||
528 | } | ||
529 | |||
530 | /* -- Collector ----------------------------------------------------------- */ | ||
531 | |||
532 | /* Atomic part of the GC cycle, transitioning from mark to sweep phase. */ | ||
533 | static void atomic(global_State *g, lua_State *L) | ||
534 | { | ||
535 | size_t udsize; | ||
536 | |||
537 | gc_mark_uv(g); /* Need to remark open upvalues (the thread may be dead). */ | ||
538 | gc_propagate_gray(g); /* Propagate any left-overs. */ | ||
539 | |||
540 | setgcrefr(g->gc.gray, g->gc.weak); /* Empty the list of weak tables. */ | ||
541 | setgcrefnull(g->gc.weak); | ||
542 | lua_assert(!iswhite(obj2gco(mainthread(g)))); | ||
543 | gc_markobj(g, L); /* Mark running thread. */ | ||
544 | gc_mark_curtrace(g); /* Mark current trace. */ | ||
545 | gc_mark_basemt(g); /* Mark base metatables (again). */ | ||
546 | gc_propagate_gray(g); /* Propagate all of the above. */ | ||
547 | |||
548 | setgcrefr(g->gc.gray, g->gc.grayagain); /* Empty the 2nd chance list. */ | ||
549 | setgcrefnull(g->gc.grayagain); | ||
550 | gc_propagate_gray(g); /* Propagate it. */ | ||
551 | |||
552 | udsize = lj_gc_separateudata(g, 0); /* Separate userdata to be finalized. */ | ||
553 | gc_mark_mmudata(g); /* Mark them. */ | ||
554 | udsize += gc_propagate_gray(g); /* And propagate the marks. */ | ||
555 | |||
556 | /* All marking done, clear weak tables. */ | ||
557 | gc_clearweak(gcref(g->gc.weak)); | ||
558 | |||
559 | /* Prepare for sweep phase. */ | ||
560 | g->gc.currentwhite = cast_byte(otherwhite(g)); /* Flip current white. */ | ||
561 | g->gc.sweepstr = 0; | ||
562 | g->gc.sweep = &g->gc.root; | ||
563 | g->gc.state = GCSsweepstring; | ||
564 | g->gc.estimate = g->gc.total - (MSize)udsize; /* Initial estimate. */ | ||
565 | } | ||
566 | |||
567 | /* GC state machine. Returns a cost estimate for each step performed. */ | ||
568 | static size_t gc_onestep(lua_State *L) | ||
569 | { | ||
570 | global_State *g = G(L); | ||
571 | switch (g->gc.state) { | ||
572 | case GCSpause: | ||
573 | gc_mark_start(g); /* Start a new GC cycle by marking all GC roots. */ | ||
574 | return 0; | ||
575 | case GCSpropagate: | ||
576 | if (gcref(g->gc.gray) != NULL) | ||
577 | return propagatemark(g); /* Propagate one gray object. */ | ||
578 | atomic(g, L); /* End of mark phase. */ | ||
579 | return 0; | ||
580 | case GCSsweepstring: { | ||
581 | MSize old = g->gc.total; | ||
582 | gc_fullsweep(g, &g->strhash[g->gc.sweepstr++]); /* Sweep one chain. */ | ||
583 | if (g->gc.sweepstr > g->strmask) | ||
584 | g->gc.state = GCSsweep; /* All string hash chains sweeped. */ | ||
585 | lua_assert(old >= g->gc.total); | ||
586 | g->gc.estimate -= old - g->gc.total; | ||
587 | return GCSWEEPCOST; | ||
588 | } | ||
589 | case GCSsweep: { | ||
590 | MSize old = g->gc.total; | ||
591 | g->gc.sweep = gc_sweep(g, g->gc.sweep, GCSWEEPMAX); /* Partial sweep. */ | ||
592 | if (gcref(*g->gc.sweep) == NULL) { | ||
593 | gc_shrink(g, L); | ||
594 | g->gc.state = GCSfinalize; /* End of sweep phase. */ | ||
595 | } | ||
596 | lua_assert(old >= g->gc.total); | ||
597 | g->gc.estimate -= old - g->gc.total; | ||
598 | return GCSWEEPMAX*GCSWEEPCOST; | ||
599 | } | ||
600 | case GCSfinalize: | ||
601 | if (gcref(g->gc.mmudata) != NULL) { | ||
602 | gc_finalize(L); /* Finalize one userdata object. */ | ||
603 | if (g->gc.estimate > GCFINALIZECOST) | ||
604 | g->gc.estimate -= GCFINALIZECOST; | ||
605 | return GCFINALIZECOST; | ||
606 | } | ||
607 | g->gc.state = GCSpause; /* End of GC cycle. */ | ||
608 | g->gc.debt = 0; | ||
609 | return 0; | ||
610 | default: | ||
611 | lua_assert(0); | ||
612 | return 0; | ||
613 | } | ||
614 | } | ||
615 | |||
616 | /* Perform a limited amount of incremental GC steps. */ | ||
617 | int lj_gc_step(lua_State *L) | ||
618 | { | ||
619 | global_State *g = G(L); | ||
620 | MSize lim; | ||
621 | int32_t ostate = g->vmstate; | ||
622 | setvmstate(g, GC); | ||
623 | lim = (GCSTEPSIZE/100) * g->gc.stepmul; | ||
624 | if (lim == 0) | ||
625 | lim = LJ_MAX_MEM; | ||
626 | g->gc.debt += g->gc.total - g->gc.threshold; | ||
627 | do { | ||
628 | lim -= (MSize)gc_onestep(L); | ||
629 | if (g->gc.state == GCSpause) { | ||
630 | lua_assert(g->gc.total >= g->gc.estimate); | ||
631 | g->gc.threshold = (g->gc.estimate/100) * g->gc.pause; | ||
632 | g->vmstate = ostate; | ||
633 | return 1; /* Finished a GC cycle. */ | ||
634 | } | ||
635 | } while ((int32_t)lim > 0); | ||
636 | if (g->gc.debt < GCSTEPSIZE) { | ||
637 | g->gc.threshold = g->gc.total + GCSTEPSIZE; | ||
638 | } else { | ||
639 | g->gc.debt -= GCSTEPSIZE; | ||
640 | g->gc.threshold = g->gc.total; | ||
641 | } | ||
642 | g->vmstate = ostate; | ||
643 | return 0; | ||
644 | } | ||
645 | |||
646 | /* Ditto, but fix the stack top first. */ | ||
647 | void lj_gc_step_fixtop(lua_State *L) | ||
648 | { | ||
649 | if (curr_funcisL(L)) L->top = curr_topL(L); | ||
650 | lj_gc_step(L); | ||
651 | } | ||
652 | |||
653 | /* Perform multiple GC steps. Called from JIT-compiled code. */ | ||
654 | void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps) | ||
655 | { | ||
656 | cframe_pc(cframe_raw(L->cframe)) = pc; | ||
657 | L->top = curr_topL(L); | ||
658 | while (steps-- > 0 && lj_gc_step(L) == 0) | ||
659 | ; | ||
660 | } | ||
661 | |||
662 | /* Perform a full GC cycle. */ | ||
663 | void lj_gc_fullgc(lua_State *L) | ||
664 | { | ||
665 | global_State *g = G(L); | ||
666 | int32_t ostate = g->vmstate; | ||
667 | setvmstate(g, GC); | ||
668 | if (g->gc.state <= GCSpropagate) { /* Caught somewhere in the middle. */ | ||
669 | g->gc.sweepstr = 0; | ||
670 | g->gc.sweep = &g->gc.root; /* Sweep everything (preserving it). */ | ||
671 | setgcrefnull(g->gc.gray); /* Reset lists from partial propagation. */ | ||
672 | setgcrefnull(g->gc.grayagain); | ||
673 | setgcrefnull(g->gc.weak); | ||
674 | g->gc.state = GCSsweepstring; /* Fast forward to the sweep phase. */ | ||
675 | } | ||
676 | lua_assert(g->gc.state != GCSpause && g->gc.state != GCSpropagate); | ||
677 | while (g->gc.state != GCSfinalize) { /* Finish sweep. */ | ||
678 | lua_assert(g->gc.state == GCSsweepstring || g->gc.state == GCSsweep); | ||
679 | gc_onestep(L); | ||
680 | } | ||
681 | /* Now perform a full GC. */ | ||
682 | gc_mark_start(g); | ||
683 | while (g->gc.state != GCSpause) | ||
684 | gc_onestep(L); | ||
685 | g->gc.threshold = (g->gc.estimate/100) * g->gc.pause; | ||
686 | g->vmstate = ostate; | ||
687 | } | ||
688 | |||
689 | /* -- Write barriers ------------------------------------------------------ */ | ||
690 | |||
691 | /* Move the GC propagation frontier back for tables (make it gray again). */ | ||
692 | void lj_gc_barrierback(global_State *g, GCtab *t) | ||
693 | { | ||
694 | GCobj *o = obj2gco(t); | ||
695 | lua_assert(isblack(o) && !isdead(g, o)); | ||
696 | lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); | ||
697 | black2gray(o); | ||
698 | setgcrefr(t->gclist, g->gc.grayagain); | ||
699 | setgcref(g->gc.grayagain, o); | ||
700 | } | ||
701 | |||
702 | /* Move the GC propagation frontier forward. */ | ||
703 | void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v) | ||
704 | { | ||
705 | lua_assert(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o)); | ||
706 | lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); | ||
707 | lua_assert(o->gch.gct != ~LJ_TTAB); | ||
708 | /* Preserve invariant during propagation. Otherwise it doesn't matter. */ | ||
709 | if (g->gc.state == GCSpropagate) | ||
710 | gc_mark(g, v); /* Move frontier forward. */ | ||
711 | else | ||
712 | makewhite(g, o); /* Make it white to avoid the following barrier. */ | ||
713 | } | ||
714 | |||
715 | /* The reason for duplicating this is that it needs to be visible from ASM. */ | ||
716 | void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v) | ||
717 | { | ||
718 | lua_assert(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o)); | ||
719 | lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); | ||
720 | lua_assert(o->gch.gct == ~LJ_TUPVAL); | ||
721 | /* Preserve invariant during propagation. Otherwise it doesn't matter. */ | ||
722 | if (g->gc.state == GCSpropagate) | ||
723 | gc_mark(g, v); /* Move frontier forward. */ | ||
724 | else | ||
725 | makewhite(g, o); /* Make it white to avoid the following barrier. */ | ||
726 | } | ||
727 | |||
728 | /* Close upvalue. Also needs a write barrier. */ | ||
729 | void lj_gc_closeuv(global_State *g, GCupval *uv) | ||
730 | { | ||
731 | GCobj *o = obj2gco(uv); | ||
732 | /* Copy stack slot to upvalue itself and point to the copy. */ | ||
733 | copyTV(mainthread(g), &uv->tv, uv->v); | ||
734 | uv->v = &uv->tv; | ||
735 | uv->closed = 1; | ||
736 | setgcrefr(o->gch.nextgc, g->gc.root); | ||
737 | setgcref(g->gc.root, o); | ||
738 | if (isgray(o)) { /* A closed upvalue is never gray, so fix this. */ | ||
739 | if (g->gc.state == GCSpropagate) { | ||
740 | gray2black(o); /* Make it black and preserve invariant. */ | ||
741 | if (tviswhite(uv->v)) | ||
742 | lj_gc_barrierf(g, o, gcV(uv->v)); | ||
743 | } else { | ||
744 | makewhite(g, o); /* Make it white, i.e. sweep the upvalue. */ | ||
745 | lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); | ||
746 | } | ||
747 | } | ||
748 | } | ||
749 | |||
750 | #if LJ_HASJIT | ||
751 | /* Mark a trace if it's saved during the propagation phase. */ | ||
752 | void lj_gc_barriertrace(global_State *g, void *T) | ||
753 | { | ||
754 | if (g->gc.state == GCSpropagate) | ||
755 | gc_traverse_trace(g, (Trace *)T); | ||
756 | } | ||
757 | #endif | ||
758 | |||
759 | /* -- Allocator ----------------------------------------------------------- */ | ||
760 | |||
761 | /* Call pluggable memory allocator to allocate or resize a fragment. */ | ||
762 | void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz) | ||
763 | { | ||
764 | global_State *g = G(L); | ||
765 | lua_assert((osz == 0) == (p == NULL)); | ||
766 | p = g->allocf(g->allocd, p, osz, nsz); | ||
767 | if (p == NULL && nsz > 0) | ||
768 | lj_err_throw(L, LUA_ERRMEM); | ||
769 | lua_assert((nsz == 0) == (p == NULL)); | ||
770 | g->gc.total = (g->gc.total - osz) + nsz; | ||
771 | return p; | ||
772 | } | ||
773 | |||
774 | /* Allocate new GC object and link it to the root set. */ | ||
775 | void *lj_mem_newgco(lua_State *L, MSize size) | ||
776 | { | ||
777 | global_State *g = G(L); | ||
778 | GCobj *o = (GCobj *)g->allocf(g->allocd, NULL, 0, size); | ||
779 | if (o == NULL) | ||
780 | lj_err_throw(L, LUA_ERRMEM); | ||
781 | g->gc.total += size; | ||
782 | setgcrefr(o->gch.nextgc, g->gc.root); | ||
783 | setgcref(g->gc.root, o); | ||
784 | newwhite(g, o); | ||
785 | return o; | ||
786 | } | ||
787 | |||
788 | /* Resize growable vector. */ | ||
789 | void *lj_mem_grow(lua_State *L, void *p, MSize *szp, MSize lim, MSize esz) | ||
790 | { | ||
791 | MSize sz = (*szp) << 1; | ||
792 | if (sz < LJ_MIN_VECSZ) | ||
793 | sz = LJ_MIN_VECSZ; | ||
794 | if (sz > lim) | ||
795 | sz = lim; | ||
796 | p = lj_mem_realloc(L, p, (*szp)*esz, sz*esz); | ||
797 | *szp = sz; | ||
798 | return p; | ||
799 | } | ||
800 | |||
diff --git a/src/lj_gc.h b/src/lj_gc.h new file mode 100644 index 00000000..192066d3 --- /dev/null +++ b/src/lj_gc.h | |||
@@ -0,0 +1,102 @@ | |||
1 | /* | ||
2 | ** Garbage collector. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_GC_H | ||
7 | #define _LJ_GC_H | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | |||
11 | /* Garbage collector states. Order matters. */ | ||
12 | enum { GCSpause, GCSpropagate, GCSsweepstring, GCSsweep, GCSfinalize }; | ||
13 | |||
14 | /* Bitmasks for marked field of GCobj. */ | ||
15 | #define LJ_GC_WHITE0 0x01 | ||
16 | #define LJ_GC_WHITE1 0x02 | ||
17 | #define LJ_GC_BLACK 0x04 | ||
18 | #define LJ_GC_FINALIZED 0x08 | ||
19 | #define LJ_GC_WEAKKEY 0x08 | ||
20 | #define LJ_GC_WEAKVAL 0x10 | ||
21 | #define LJ_GC_FIXED 0x20 | ||
22 | #define LJ_GC_SFIXED 0x40 | ||
23 | |||
24 | #define LJ_GC_WHITES (LJ_GC_WHITE0 | LJ_GC_WHITE1) | ||
25 | #define LJ_GC_COLORS (LJ_GC_WHITES | LJ_GC_BLACK) | ||
26 | #define LJ_GC_WEAK (LJ_GC_WEAKKEY | LJ_GC_WEAKVAL) | ||
27 | |||
28 | /* Macros to test and set GCobj colors. */ | ||
29 | #define iswhite(x) ((x)->gch.marked & LJ_GC_WHITES) | ||
30 | #define isblack(x) ((x)->gch.marked & LJ_GC_BLACK) | ||
31 | #define isgray(x) (!((x)->gch.marked & (LJ_GC_BLACK|LJ_GC_WHITES))) | ||
32 | #define tviswhite(x) (tvisgcv(x) && iswhite(gcV(x))) | ||
33 | #define otherwhite(g) (g->gc.currentwhite ^ LJ_GC_WHITES) | ||
34 | #define isdead(g, v) ((v)->gch.marked & otherwhite(g) & LJ_GC_WHITES) | ||
35 | |||
36 | #define curwhite(g) ((g)->gc.currentwhite & LJ_GC_WHITES) | ||
37 | #define newwhite(g, x) (obj2gco(x)->gch.marked = (uint8_t)curwhite(g)) | ||
38 | #define flipwhite(x) ((x)->gch.marked ^= LJ_GC_WHITES) | ||
39 | #define fixstring(s) ((s)->marked |= LJ_GC_FIXED) | ||
40 | |||
41 | /* Collector. */ | ||
42 | LJ_FUNC size_t lj_gc_separateudata(global_State *g, int all); | ||
43 | LJ_FUNC void lj_gc_finalizeudata(lua_State *L); | ||
44 | LJ_FUNC void lj_gc_freeall(global_State *g); | ||
45 | LJ_FUNCA int lj_gc_step(lua_State *L); | ||
46 | LJ_FUNCA void lj_gc_step_fixtop(lua_State *L); | ||
47 | LJ_FUNCA void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps); | ||
48 | LJ_FUNC void lj_gc_fullgc(lua_State *L); | ||
49 | |||
50 | /* GC check: drive collector forward if the GC threshold has been reached. */ | ||
51 | #define lj_gc_check(L) \ | ||
52 | { if (LJ_UNLIKELY(G(L)->gc.total >= G(L)->gc.threshold)) \ | ||
53 | lj_gc_step(L); } | ||
54 | #define lj_gc_check_fixtop(L) \ | ||
55 | { if (LJ_UNLIKELY(G(L)->gc.total >= G(L)->gc.threshold)) \ | ||
56 | lj_gc_step_fixtop(L); } | ||
57 | |||
58 | /* Write barriers. */ | ||
59 | LJ_FUNC void lj_gc_barrierback(global_State *g, GCtab *t); | ||
60 | LJ_FUNC void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v); | ||
61 | LJ_FUNCA void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v); | ||
62 | LJ_FUNC void lj_gc_closeuv(global_State *g, GCupval *uv); | ||
63 | LJ_FUNC void lj_gc_barriertrace(global_State *g, void *T); | ||
64 | |||
65 | /* Barrier for stores to table objects. TValue and GCobj variant. */ | ||
66 | #define lj_gc_barriert(L, t, tv) \ | ||
67 | { if (tviswhite(tv) && isblack(obj2gco(t))) \ | ||
68 | lj_gc_barrierback(G(L), (t)); } | ||
69 | #define lj_gc_objbarriert(L, t, o) \ | ||
70 | { if (iswhite(obj2gco(o)) && isblack(obj2gco(t))) \ | ||
71 | lj_gc_barrierback(G(L), (t)); } | ||
72 | |||
73 | /* Barrier for stores to any other object. TValue and GCobj variant. */ | ||
74 | #define lj_gc_barrier(L, p, tv) \ | ||
75 | { if (tviswhite(tv) && isblack(obj2gco(p))) \ | ||
76 | lj_gc_barrierf(G(L), obj2gco(p), gcV(tv)); } | ||
77 | #define lj_gc_objbarrier(L, p, o) \ | ||
78 | { if (iswhite(obj2gco(o)) && isblack(obj2gco(p))) \ | ||
79 | lj_gc_barrierf(G(L), obj2gco(p), obj2gco(o)); } | ||
80 | |||
81 | /* Allocator. */ | ||
82 | LJ_FUNC void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz); | ||
83 | LJ_FUNC void *lj_mem_newgco(lua_State *L, MSize size); | ||
84 | LJ_FUNC void *lj_mem_grow(lua_State *L, void *p, | ||
85 | MSize *szp, MSize lim, MSize esz); | ||
86 | |||
87 | #define lj_mem_new(L, s) lj_mem_realloc(L, NULL, 0, (s)) | ||
88 | #define lj_mem_free(g, p, osize) \ | ||
89 | (g->gc.total -= (MSize)(osize), g->allocf(g->allocd, (p), (osize), 0)) | ||
90 | |||
91 | #define lj_mem_newvec(L, n, t) ((t *)lj_mem_new(L, (MSize)((n)*sizeof(t)))) | ||
92 | #define lj_mem_reallocvec(L, p, on, n, t) \ | ||
93 | ((p) = (t *)lj_mem_realloc(L, p, (on)*sizeof(t), (MSize)((n)*sizeof(t)))) | ||
94 | #define lj_mem_growvec(L, p, n, m, t) \ | ||
95 | ((p) = (t *)lj_mem_grow(L, (p), &(n), (m), (MSize)sizeof(t))) | ||
96 | #define lj_mem_freevec(g, p, n, t) lj_mem_free(g, (p), (n)*sizeof(t)) | ||
97 | |||
98 | #define lj_mem_newobj(L, t) ((t *)lj_mem_newgco(L, sizeof(t))) | ||
99 | #define lj_mem_newt(L, s, t) ((t *)lj_mem_new(L, (s))) | ||
100 | #define lj_mem_freet(g, p) lj_mem_free(g, (p), sizeof(*(p))) | ||
101 | |||
102 | #endif | ||
diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c new file mode 100644 index 00000000..dfec188a --- /dev/null +++ b/src/lj_gdbjit.c | |||
@@ -0,0 +1,739 @@ | |||
1 | /* | ||
2 | ** Client for the GDB JIT API. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #define lj_gdbjit_c | ||
7 | #define LUA_CORE | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | |||
11 | #if LJ_HASJIT | ||
12 | |||
13 | #include "lj_gc.h" | ||
14 | #include "lj_err.h" | ||
15 | #include "lj_str.h" | ||
16 | #include "lj_frame.h" | ||
17 | #include "lj_jit.h" | ||
18 | #include "lj_dispatch.h" | ||
19 | |||
20 | /* This is not compiled in by default. | ||
21 | ** Enable with -DLUAJIT_USE_GDBJIT in the Makefile and recompile everything. | ||
22 | */ | ||
23 | #ifdef LUAJIT_USE_GDBJIT | ||
24 | |||
25 | /* The GDB JIT API allows JIT compilers to pass debug information about | ||
26 | ** JIT-compiled code back to GDB. You need at least GDB 7.0 or higher | ||
27 | ** to see it in action. | ||
28 | ** | ||
29 | ** This is a passive API, so it works even when not running under GDB | ||
30 | ** or when attaching to an already running process. Alas, this implies | ||
31 | ** enabling it always has a non-negligible overhead -- do not use in | ||
32 | ** release mode! | ||
33 | ** | ||
34 | ** The LuaJIT GDB JIT client is rather minimal at the moment. It gives | ||
35 | ** each trace a symbol name and adds a source location and frame unwind | ||
36 | ** information. Obviously LuaJIT itself and any embedding C application | ||
37 | ** should be compiled with debug symbols, too (see the Makefile). | ||
38 | ** | ||
39 | ** Traces are named TRACE_1, TRACE_2, ... these correspond to the trace | ||
40 | ** numbers from -jv or -jdump. Use "break TRACE_1" or "tbreak TRACE_1" etc. | ||
41 | ** to set breakpoints on specific traces (even ahead of their creation). | ||
42 | ** | ||
43 | ** The source location for each trace allows listing the corresponding | ||
44 | ** source lines with the GDB command "list" (but only if the Lua source | ||
45 | ** has been loaded from a file). Currently this is always set to the | ||
46 | ** location where the trace has been started. | ||
47 | ** | ||
48 | ** Frame unwind information can be inspected with the GDB command | ||
49 | ** "info frame". This also allows proper backtraces across JIT-compiled | ||
50 | ** code with the GDB command "bt". | ||
51 | ** | ||
52 | ** You probably want to add the following settings to a .gdbinit file | ||
53 | ** (or add them to ~/.gdbinit): | ||
54 | ** set disassembly-flavor intel | ||
55 | ** set breakpoint pending on | ||
56 | ** | ||
57 | ** Here's a sample GDB session: | ||
58 | ** ------------------------------------------------------------------------ | ||
59 | |||
60 | $ cat >x.lua | ||
61 | for outer=1,100 do | ||
62 | for inner=1,100 do end | ||
63 | end | ||
64 | ^D | ||
65 | |||
66 | $ luajit -jv x.lua | ||
67 | [TRACE 1 x.lua:2] | ||
68 | [TRACE 2 (1/3) x.lua:1 -> 1] | ||
69 | |||
70 | $ gdb --quiet --args luajit x.lua | ||
71 | (gdb) tbreak TRACE_1 | ||
72 | Function "TRACE_1" not defined. | ||
73 | Temporary breakpoint 1 (TRACE_1) pending. | ||
74 | (gdb) run | ||
75 | Starting program: luajit x.lua | ||
76 | |||
77 | Temporary breakpoint 1, TRACE_1 () at x.lua:2 | ||
78 | 2 for inner=1,100 do end | ||
79 | (gdb) list | ||
80 | 1 for outer=1,100 do | ||
81 | 2 for inner=1,100 do end | ||
82 | 3 end | ||
83 | (gdb) bt | ||
84 | #0 TRACE_1 () at x.lua:2 | ||
85 | #1 0x08053690 in lua_pcall [...] | ||
86 | [...] | ||
87 | #7 0x0806ff90 in main [...] | ||
88 | (gdb) disass TRACE_1 | ||
89 | Dump of assembler code for function TRACE_1: | ||
90 | 0xf7fd9fba <TRACE_1+0>: mov DWORD PTR ds:0xf7e0e2a0,0x1 | ||
91 | 0xf7fd9fc4 <TRACE_1+10>: movsd xmm7,QWORD PTR [edx+0x20] | ||
92 | [...] | ||
93 | 0xf7fd9ff8 <TRACE_1+62>: jmp 0xf7fd2014 | ||
94 | End of assembler dump. | ||
95 | (gdb) tbreak TRACE_2 | ||
96 | Function "TRACE_2" not defined. | ||
97 | Temporary breakpoint 2 (TRACE_2) pending. | ||
98 | (gdb) cont | ||
99 | Continuing. | ||
100 | |||
101 | Temporary breakpoint 2, TRACE_2 () at x.lua:1 | ||
102 | 1 for outer=1,100 do | ||
103 | (gdb) info frame | ||
104 | Stack level 0, frame at 0xffffd7c0: | ||
105 | eip = 0xf7fd9f60 in TRACE_2 (x.lua:1); saved eip 0x8053690 | ||
106 | called by frame at 0xffffd7e0 | ||
107 | source language unknown. | ||
108 | Arglist at 0xffffd78c, args: | ||
109 | Locals at 0xffffd78c, Previous frame's sp is 0xffffd7c0 | ||
110 | Saved registers: | ||
111 | ebx at 0xffffd7ac, ebp at 0xffffd7b8, esi at 0xffffd7b0, edi at 0xffffd7b4, | ||
112 | eip at 0xffffd7bc | ||
113 | (gdb) | ||
114 | |||
115 | ** ------------------------------------------------------------------------ | ||
116 | */ | ||
117 | |||
118 | /* -- GDB JIT API --------------------------------------------------------- */ | ||
119 | |||
120 | /* GDB JIT actions. */ | ||
121 | enum { | ||
122 | GDBJIT_NOACTION = 0, | ||
123 | GDBJIT_REGISTER, | ||
124 | GDBJIT_UNREGISTER | ||
125 | }; | ||
126 | |||
127 | /* GDB JIT entry. */ | ||
128 | typedef struct GDBJITentry { | ||
129 | struct GDBJITentry *next_entry; | ||
130 | struct GDBJITentry *prev_entry; | ||
131 | const char *symfile_addr; | ||
132 | uint64_t symfile_size; | ||
133 | } GDBJITentry; | ||
134 | |||
135 | /* GDB JIT descriptor. */ | ||
136 | typedef struct GDBJITdesc { | ||
137 | uint32_t version; | ||
138 | uint32_t action_flag; | ||
139 | GDBJITentry *relevant_entry; | ||
140 | GDBJITentry *first_entry; | ||
141 | } GDBJITdesc; | ||
142 | |||
143 | GDBJITdesc __jit_debug_descriptor = { | ||
144 | 1, GDBJIT_NOACTION, NULL, NULL | ||
145 | }; | ||
146 | |||
147 | /* GDB sets a breakpoint at this function. */ | ||
148 | void LJ_NOINLINE __jit_debug_register_code() | ||
149 | { | ||
150 | __asm__ __volatile__(""); | ||
151 | }; | ||
152 | |||
153 | /* -- In-memory ELF object definitions ------------------------------------ */ | ||
154 | |||
155 | /* ELF definitions. */ | ||
156 | typedef struct ELFheader { | ||
157 | uint8_t emagic[4]; | ||
158 | uint8_t eclass; | ||
159 | uint8_t eendian; | ||
160 | uint8_t eversion; | ||
161 | uint8_t eosabi; | ||
162 | uint8_t eabiversion; | ||
163 | uint8_t epad[7]; | ||
164 | uint16_t type; | ||
165 | uint16_t machine; | ||
166 | uint32_t version; | ||
167 | uintptr_t entry; | ||
168 | uintptr_t phofs; | ||
169 | uintptr_t shofs; | ||
170 | uint32_t flags; | ||
171 | uint16_t ehsize; | ||
172 | uint16_t phentsize; | ||
173 | uint16_t phnum; | ||
174 | uint16_t shentsize; | ||
175 | uint16_t shnum; | ||
176 | uint16_t shstridx; | ||
177 | } ELFheader; | ||
178 | |||
179 | typedef struct ELFsectheader { | ||
180 | uint32_t name; | ||
181 | uint32_t type; | ||
182 | uintptr_t flags; | ||
183 | uintptr_t addr; | ||
184 | uintptr_t ofs; | ||
185 | uintptr_t size; | ||
186 | uint32_t link; | ||
187 | uint32_t info; | ||
188 | uintptr_t align; | ||
189 | uintptr_t entsize; | ||
190 | } ELFsectheader; | ||
191 | |||
192 | #define ELFSECT_IDX_ABS 0xfff1 | ||
193 | |||
194 | enum { | ||
195 | ELFSECT_TYPE_PROGBITS = 1, | ||
196 | ELFSECT_TYPE_SYMTAB = 2, | ||
197 | ELFSECT_TYPE_STRTAB = 3, | ||
198 | ELFSECT_TYPE_NOBITS = 8 | ||
199 | }; | ||
200 | |||
201 | #define ELFSECT_FLAGS_WRITE 1 | ||
202 | #define ELFSECT_FLAGS_ALLOC 2 | ||
203 | #define ELFSECT_FLAGS_EXEC 4 | ||
204 | |||
205 | typedef struct ELFsymbol { | ||
206 | #if LJ_64 | ||
207 | uint32_t name; | ||
208 | uint8_t info; | ||
209 | uint8_t other; | ||
210 | uint16_t sectidx; | ||
211 | uintptr_t value; | ||
212 | uint64_t size; | ||
213 | #else | ||
214 | uint32_t name; | ||
215 | uintptr_t value; | ||
216 | uint32_t size; | ||
217 | uint8_t info; | ||
218 | uint8_t other; | ||
219 | uint16_t sectidx; | ||
220 | #endif | ||
221 | } ELFsymbol; | ||
222 | |||
223 | enum { | ||
224 | ELFSYM_TYPE_FUNC = 2, | ||
225 | ELFSYM_TYPE_FILE = 4, | ||
226 | ELFSYM_BIND_LOCAL = 0 << 4, | ||
227 | ELFSYM_BIND_GLOBAL = 1 << 4, | ||
228 | }; | ||
229 | |||
230 | /* DWARF definitions. */ | ||
231 | #define DW_CIE_VERSION 1 | ||
232 | |||
233 | enum { | ||
234 | DW_CFA_nop = 0x0, | ||
235 | DW_CFA_def_cfa = 0xc, | ||
236 | DW_CFA_def_cfa_offset = 0xe, | ||
237 | DW_CFA_advance_loc = 0x40, | ||
238 | DW_CFA_offset = 0x80 | ||
239 | }; | ||
240 | |||
241 | enum { | ||
242 | DW_EH_PE_udata4 = 3, | ||
243 | DW_EH_PE_textrel = 0x20 | ||
244 | }; | ||
245 | |||
246 | enum { | ||
247 | DW_TAG_compile_unit = 0x11 | ||
248 | }; | ||
249 | |||
250 | enum { | ||
251 | DW_children_no = 0, | ||
252 | DW_children_yes = 1 | ||
253 | }; | ||
254 | |||
255 | enum { | ||
256 | DW_AT_name = 0x03, | ||
257 | DW_AT_stmt_list = 0x10, | ||
258 | DW_AT_low_pc = 0x11, | ||
259 | DW_AT_high_pc = 0x12 | ||
260 | }; | ||
261 | |||
262 | enum { | ||
263 | DW_FORM_addr = 0x01, | ||
264 | DW_FORM_data4 = 0x06, | ||
265 | DW_FORM_string = 0x08 | ||
266 | }; | ||
267 | |||
268 | enum { | ||
269 | DW_LNS_extended_op = 0, | ||
270 | DW_LNS_copy = 1, | ||
271 | DW_LNS_advance_pc = 2, | ||
272 | DW_LNS_advance_line = 3 | ||
273 | }; | ||
274 | |||
275 | enum { | ||
276 | DW_LNE_end_sequence = 1, | ||
277 | DW_LNE_set_address = 2 | ||
278 | }; | ||
279 | |||
280 | enum { | ||
281 | #if LJ_TARGET_X86 | ||
282 | DW_REG_AX, DW_REG_CX, DW_REG_DX, DW_REG_BX, | ||
283 | DW_REG_SP, DW_REG_BP, DW_REG_SI, DW_REG_DI, | ||
284 | DW_REG_RA, | ||
285 | #elif LJ_TARGET_X64 | ||
286 | /* Yes, the order is strange, but correct. */ | ||
287 | DW_REG_AX, DW_REG_DX, DW_REG_CX, DW_REG_BX, | ||
288 | DW_REG_SI, DW_REG_DI, DW_REG_BP, DW_REG_SP, | ||
289 | DW_REG_8, DW_REG_9, DW_REG_10, DW_REG_11, | ||
290 | DW_REG_12, DW_REG_13, DW_REG_14, DW_REG_15, | ||
291 | DW_REG_RA, | ||
292 | #else | ||
293 | #error "Unsupported target architecture" | ||
294 | #endif | ||
295 | }; | ||
296 | |||
297 | /* Minimal list of sections for the in-memory ELF object. */ | ||
298 | enum { | ||
299 | GDBJIT_SECT_NULL, | ||
300 | GDBJIT_SECT_text, | ||
301 | GDBJIT_SECT_eh_frame, | ||
302 | GDBJIT_SECT_shstrtab, | ||
303 | GDBJIT_SECT_strtab, | ||
304 | GDBJIT_SECT_symtab, | ||
305 | GDBJIT_SECT_debug_info, | ||
306 | GDBJIT_SECT_debug_abbrev, | ||
307 | GDBJIT_SECT_debug_line, | ||
308 | GDBJIT_SECT__MAX | ||
309 | }; | ||
310 | |||
311 | enum { | ||
312 | GDBJIT_SYM_UNDEF, | ||
313 | GDBJIT_SYM_FILE, | ||
314 | GDBJIT_SYM_FUNC, | ||
315 | GDBJIT_SYM__MAX | ||
316 | }; | ||
317 | |||
318 | /* In-memory ELF object. */ | ||
319 | typedef struct GDBJITobj { | ||
320 | ELFheader hdr; /* ELF header. */ | ||
321 | ELFsectheader sect[GDBJIT_SECT__MAX]; /* ELF sections. */ | ||
322 | ELFsymbol sym[GDBJIT_SYM__MAX]; /* ELF symbol table. */ | ||
323 | uint8_t space[4096]; /* Space for various section data. */ | ||
324 | } GDBJITobj; | ||
325 | |||
326 | /* Combined structure for GDB JIT entry and ELF object. */ | ||
327 | typedef struct GDBJITentryobj { | ||
328 | GDBJITentry entry; | ||
329 | size_t sz; | ||
330 | GDBJITobj obj; | ||
331 | } GDBJITentryobj; | ||
332 | |||
333 | /* Template for in-memory ELF header. */ | ||
334 | static const ELFheader elfhdr_template = { | ||
335 | .emagic = { 0x7f, 'E', 'L', 'F' }, | ||
336 | .eclass = LJ_64 ? 2 : 1, | ||
337 | .eendian = LJ_ENDIAN_SELECT(1, 2), | ||
338 | .eversion = 1, | ||
339 | #if defined(__linux__) | ||
340 | .eosabi = 0, /* Nope, it's not 3. */ | ||
341 | #elif defined(__FreeBSD__) | ||
342 | .eosabi = 9, | ||
343 | #elif defined(__NetBSD__) | ||
344 | .eosabi = 2, | ||
345 | #elif defined(__OpenBSD__) | ||
346 | .eosabi = 12, | ||
347 | #elif defined(__solaris__) | ||
348 | .eosabi = 6, | ||
349 | #else | ||
350 | .eosabi = 0, | ||
351 | #endif | ||
352 | .eabiversion = 0, | ||
353 | .epad = { 0, 0, 0, 0, 0, 0, 0 }, | ||
354 | .type = 1, | ||
355 | #if LJ_TARGET_X86 | ||
356 | .machine = 3, | ||
357 | #elif LJ_TARGET_X64 | ||
358 | .machine = 62, | ||
359 | #else | ||
360 | #error "Unsupported target architecture" | ||
361 | #endif | ||
362 | .version = 1, | ||
363 | .entry = 0, | ||
364 | .phofs = 0, | ||
365 | .shofs = offsetof(GDBJITobj, sect), | ||
366 | .flags = 0, | ||
367 | .ehsize = sizeof(ELFheader), | ||
368 | .phentsize = 0, | ||
369 | .phnum = 0, | ||
370 | .shentsize = sizeof(ELFsectheader), | ||
371 | .shnum = GDBJIT_SECT__MAX, | ||
372 | .shstridx = GDBJIT_SECT_shstrtab | ||
373 | }; | ||
374 | |||
375 | /* -- In-memory ELF object generation ------------------------------------- */ | ||
376 | |||
377 | /* Context for generating the ELF object for the GDB JIT API. */ | ||
378 | typedef struct GDBJITctx { | ||
379 | uint8_t *p; /* Pointer to next address in obj.space. */ | ||
380 | uint8_t *startp; /* Pointer to start address in obj.space. */ | ||
381 | Trace *T; /* Generate symbols for this trace. */ | ||
382 | uintptr_t mcaddr; /* Machine code address. */ | ||
383 | MSize szmcode; /* Size of machine code. */ | ||
384 | MSize spadjp; /* Stack adjustment for parent trace or interpreter. */ | ||
385 | MSize spadj; /* Stack adjustment for trace itself. */ | ||
386 | BCLine lineno; /* Starting line number. */ | ||
387 | const char *filename; /* Starting file name. */ | ||
388 | const char *trname; /* Name of trace. */ | ||
389 | size_t objsize; /* Final size of ELF object. */ | ||
390 | GDBJITobj obj; /* In-memory ELF object. */ | ||
391 | } GDBJITctx; | ||
392 | |||
393 | /* Add a zero-terminated string. */ | ||
394 | static uint32_t gdbjit_strz(GDBJITctx *ctx, const char *str) | ||
395 | { | ||
396 | uint8_t *p = ctx->p; | ||
397 | uint32_t ofs = (uint32_t)(p - ctx->startp); | ||
398 | do { | ||
399 | *p++ = (uint8_t)*str; | ||
400 | } while (*str++); | ||
401 | ctx->p = p; | ||
402 | return ofs; | ||
403 | } | ||
404 | |||
405 | /* Add a ULEB128 value. */ | ||
406 | static void gdbjit_uleb128(GDBJITctx *ctx, uint32_t v) | ||
407 | { | ||
408 | uint8_t *p = ctx->p; | ||
409 | for (; v >= 0x80; v >>= 7) | ||
410 | *p++ = (uint8_t)((v & 0x7f) | 0x80); | ||
411 | *p++ = (uint8_t)v; | ||
412 | ctx->p = p; | ||
413 | } | ||
414 | |||
415 | /* Add a SLEB128 value. */ | ||
416 | static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v) | ||
417 | { | ||
418 | uint8_t *p = ctx->p; | ||
419 | for (; (uint32_t)(v+0x40) >= 0x80; v >>= 7) | ||
420 | *p++ = (uint8_t)((v & 0x7f) | 0x80); | ||
421 | *p++ = (uint8_t)(v & 0x7f); | ||
422 | ctx->p = p; | ||
423 | } | ||
424 | |||
425 | /* Shortcuts to generate DWARF structures. */ | ||
426 | #define DB(x) (*p++ = (x)) | ||
427 | #define DI8(x) (*(int8_t *)p = (x), p++) | ||
428 | #define DU16(x) (*(uint16_t *)p = (x), p += 2) | ||
429 | #define DU32(x) (*(uint32_t *)p = (x), p += 4) | ||
430 | #define DADDR(x) (*(uintptr_t *)p = (x), p += sizeof(uintptr_t)) | ||
431 | #define DUV(x) (ctx->p = p, gdbjit_uleb128(ctx, (x)), p = ctx->p) | ||
432 | #define DSV(x) (ctx->p = p, gdbjit_sleb128(ctx, (x)), p = ctx->p) | ||
433 | #define DSTR(str) (ctx->p = p, gdbjit_strz(ctx, (str)), p = ctx->p) | ||
434 | #define DALIGNNOP(s) while ((uintptr_t)p & ((s)-1)) *p++ = DW_CFA_nop | ||
435 | #define DSECT(name, stmt) \ | ||
436 | { uint32_t *szp_##name = (uint32_t *)p; p += 4; stmt \ | ||
437 | *szp_##name = (uint32_t)((p-(uint8_t *)szp_##name)-4); } \ | ||
438 | |||
439 | /* Initialize ELF section headers. */ | ||
440 | static void LJ_FASTCALL gdbjit_secthdr(GDBJITctx *ctx) | ||
441 | { | ||
442 | ELFsectheader *sect; | ||
443 | |||
444 | *ctx->p++ = '\0'; /* Empty string at start of string table. */ | ||
445 | |||
446 | #define SECTDEF(id, tp, al) \ | ||
447 | sect = &ctx->obj.sect[GDBJIT_SECT_##id]; \ | ||
448 | sect->name = gdbjit_strz(ctx, "." #id); \ | ||
449 | sect->type = ELFSECT_TYPE_##tp; \ | ||
450 | sect->align = (al) | ||
451 | |||
452 | SECTDEF(text, NOBITS, 16); | ||
453 | sect->flags = ELFSECT_FLAGS_ALLOC|ELFSECT_FLAGS_EXEC; | ||
454 | sect->addr = ctx->mcaddr; | ||
455 | sect->ofs = 0; | ||
456 | sect->size = ctx->szmcode; | ||
457 | |||
458 | SECTDEF(eh_frame, PROGBITS, sizeof(uintptr_t)); | ||
459 | sect->flags = ELFSECT_FLAGS_ALLOC; | ||
460 | |||
461 | SECTDEF(shstrtab, STRTAB, 1); | ||
462 | SECTDEF(strtab, STRTAB, 1); | ||
463 | |||
464 | SECTDEF(symtab, SYMTAB, sizeof(uintptr_t)); | ||
465 | sect->ofs = offsetof(GDBJITobj, sym); | ||
466 | sect->size = sizeof(ctx->obj.sym); | ||
467 | sect->link = GDBJIT_SECT_strtab; | ||
468 | sect->entsize = sizeof(ELFsymbol); | ||
469 | sect->info = GDBJIT_SYM_FUNC; | ||
470 | |||
471 | SECTDEF(debug_info, PROGBITS, 1); | ||
472 | SECTDEF(debug_abbrev, PROGBITS, 1); | ||
473 | SECTDEF(debug_line, PROGBITS, 1); | ||
474 | |||
475 | #undef SECTDEF | ||
476 | } | ||
477 | |||
478 | /* Initialize symbol table. */ | ||
479 | static void LJ_FASTCALL gdbjit_symtab(GDBJITctx *ctx) | ||
480 | { | ||
481 | ELFsymbol *sym; | ||
482 | |||
483 | *ctx->p++ = '\0'; /* Empty string at start of string table. */ | ||
484 | |||
485 | sym = &ctx->obj.sym[GDBJIT_SYM_FILE]; | ||
486 | sym->name = gdbjit_strz(ctx, "JIT mcode"); | ||
487 | sym->sectidx = ELFSECT_IDX_ABS; | ||
488 | sym->info = ELFSYM_TYPE_FILE|ELFSYM_BIND_LOCAL; | ||
489 | |||
490 | sym = &ctx->obj.sym[GDBJIT_SYM_FUNC]; | ||
491 | sym->name = gdbjit_strz(ctx, ctx->trname); | ||
492 | sym->sectidx = GDBJIT_SECT_text; | ||
493 | sym->value = 0; | ||
494 | sym->size = ctx->szmcode; | ||
495 | sym->info = ELFSYM_TYPE_FUNC|ELFSYM_BIND_GLOBAL; | ||
496 | } | ||
497 | |||
498 | /* Initialize .eh_frame section. */ | ||
499 | static void LJ_FASTCALL gdbjit_ehframe(GDBJITctx *ctx) | ||
500 | { | ||
501 | uint8_t *p = ctx->p; | ||
502 | uint8_t *framep = p; | ||
503 | |||
504 | /* Emit DWARF EH CIE. */ | ||
505 | DSECT(CIE, | ||
506 | DU32(0); /* Offset to CIE itself. */ | ||
507 | DB(DW_CIE_VERSION); | ||
508 | DSTR("zR"); /* Augmentation. */ | ||
509 | DUV(1); /* Code alignment factor. */ | ||
510 | DSV(-(int32_t)sizeof(uintptr_t)); /* Data alignment factor. */ | ||
511 | DB(DW_REG_RA); /* Return address register. */ | ||
512 | DB(1); DB(DW_EH_PE_textrel|DW_EH_PE_udata4); /* Augmentation data. */ | ||
513 | DB(DW_CFA_def_cfa); DUV(DW_REG_SP); DUV(sizeof(uintptr_t)); | ||
514 | DB(DW_CFA_offset|DW_REG_RA); DUV(1); | ||
515 | DALIGNNOP(sizeof(uintptr_t)); | ||
516 | ) | ||
517 | |||
518 | /* Emit DWARF EH FDE. */ | ||
519 | DSECT(FDE, | ||
520 | DU32((uint32_t)(p-framep)); /* Offset to CIE. */ | ||
521 | DU32(0); /* Machine code offset relative to .text. */ | ||
522 | DU32(ctx->szmcode); /* Machine code length. */ | ||
523 | DB(0); /* Augmentation data. */ | ||
524 | /* Registers saved in CFRAME. */ | ||
525 | #if LJ_TARGET_X86 | ||
526 | DB(DW_CFA_offset|DW_REG_BP); DUV(2); | ||
527 | DB(DW_CFA_offset|DW_REG_DI); DUV(3); | ||
528 | DB(DW_CFA_offset|DW_REG_SI); DUV(4); | ||
529 | DB(DW_CFA_offset|DW_REG_BX); DUV(5); | ||
530 | #elif LJ_TARGET_X64 | ||
531 | /* Add saved registers for x64 CFRAME. */ | ||
532 | #else | ||
533 | #error "Unsupported target architecture" | ||
534 | #endif | ||
535 | if (ctx->spadjp != ctx->spadj) { /* Parent/interpreter stack frame size. */ | ||
536 | DB(DW_CFA_def_cfa_offset); DUV(ctx->spadjp); | ||
537 | DB(DW_CFA_advance_loc|1); /* Only an approximation. */ | ||
538 | } | ||
539 | DB(DW_CFA_def_cfa_offset); DUV(ctx->spadj); /* Trace stack frame size. */ | ||
540 | DALIGNNOP(sizeof(uintptr_t)); | ||
541 | ) | ||
542 | |||
543 | ctx->p = p; | ||
544 | } | ||
545 | |||
546 | /* Initialize .debug_info section. */ | ||
547 | static void LJ_FASTCALL gdbjit_debuginfo(GDBJITctx *ctx) | ||
548 | { | ||
549 | uint8_t *p = ctx->p; | ||
550 | |||
551 | DSECT(info, | ||
552 | DU16(2); /* DWARF version. */ | ||
553 | DU32(0); /* Abbrev offset. */ | ||
554 | DB(sizeof(uintptr_t)); /* Pointer size. */ | ||
555 | |||
556 | DUV(1); /* Abbrev #1: DW_TAG_compile_unit. */ | ||
557 | DSTR(ctx->filename); /* DW_AT_name. */ | ||
558 | DADDR(ctx->mcaddr); /* DW_AT_low_pc. */ | ||
559 | DADDR(ctx->mcaddr + ctx->szmcode); /* DW_AT_high_pc. */ | ||
560 | DU32(0); /* DW_AT_stmt_list. */ | ||
561 | ) | ||
562 | |||
563 | ctx->p = p; | ||
564 | } | ||
565 | |||
566 | /* Initialize .debug_abbrev section. */ | ||
567 | static void LJ_FASTCALL gdbjit_debugabbrev(GDBJITctx *ctx) | ||
568 | { | ||
569 | uint8_t *p = ctx->p; | ||
570 | |||
571 | /* Abbrev #1: DW_TAG_compile_unit. */ | ||
572 | DUV(1); DUV(DW_TAG_compile_unit); | ||
573 | DB(DW_children_no); | ||
574 | DUV(DW_AT_name); DUV(DW_FORM_string); | ||
575 | DUV(DW_AT_low_pc); DUV(DW_FORM_addr); | ||
576 | DUV(DW_AT_high_pc); DUV(DW_FORM_addr); | ||
577 | DUV(DW_AT_stmt_list); DUV(DW_FORM_data4); | ||
578 | DB(0); DB(0); | ||
579 | |||
580 | ctx->p = p; | ||
581 | } | ||
582 | |||
583 | #define DLNE(op, s) (DB(DW_LNS_extended_op), DUV(1+(s)), DB((op))) | ||
584 | |||
585 | /* Initialize .debug_line section. */ | ||
586 | static void LJ_FASTCALL gdbjit_debugline(GDBJITctx *ctx) | ||
587 | { | ||
588 | uint8_t *p = ctx->p; | ||
589 | |||
590 | DSECT(line, | ||
591 | DU16(2); /* DWARF version. */ | ||
592 | DSECT(header, | ||
593 | DB(1); /* Minimum instruction length. */ | ||
594 | DB(1); /* is_stmt. */ | ||
595 | DI8(0); /* Line base for special opcodes. */ | ||
596 | DB(2); /* Line range for special opcodes. */ | ||
597 | DB(3+1); /* Opcode base at DW_LNS_advance_line+1. */ | ||
598 | DB(0); DB(1); DB(1); /* Standard opcode lengths. */ | ||
599 | /* Directory table. */ | ||
600 | DB(0); | ||
601 | /* File name table. */ | ||
602 | DSTR(ctx->filename); DUV(0); DUV(0); DUV(0); | ||
603 | DB(0); | ||
604 | ) | ||
605 | |||
606 | DLNE(DW_LNE_set_address, sizeof(uintptr_t)); DADDR(ctx->mcaddr); | ||
607 | if (ctx->lineno) { | ||
608 | DB(DW_LNS_advance_line); DSV(ctx->lineno-1); | ||
609 | } | ||
610 | DB(DW_LNS_copy); | ||
611 | DB(DW_LNS_advance_pc); DUV(ctx->szmcode); | ||
612 | DLNE(DW_LNE_end_sequence, 0); | ||
613 | ) | ||
614 | |||
615 | ctx->p = p; | ||
616 | } | ||
617 | |||
618 | #undef DLNE | ||
619 | |||
620 | /* Undef shortcuts. */ | ||
621 | #undef DB | ||
622 | #undef DI8 | ||
623 | #undef DU16 | ||
624 | #undef DU32 | ||
625 | #undef DADDR | ||
626 | #undef DUV | ||
627 | #undef DSV | ||
628 | #undef DSTR | ||
629 | #undef DALIGNNOP | ||
630 | #undef DSECT | ||
631 | |||
632 | /* Type of a section initializer callback. */ | ||
633 | typedef void (LJ_FASTCALL *GDBJITinitf)(GDBJITctx *ctx); | ||
634 | |||
635 | /* Call section initializer and set the section offset and size. */ | ||
636 | static void gdbjit_initsect(GDBJITctx *ctx, int sect, GDBJITinitf initf) | ||
637 | { | ||
638 | ctx->startp = ctx->p; | ||
639 | ctx->obj.sect[sect].ofs = (uintptr_t)((char *)ctx->p - (char *)&ctx->obj); | ||
640 | initf(ctx); | ||
641 | ctx->obj.sect[sect].size = (uintptr_t)(ctx->p - ctx->startp); | ||
642 | } | ||
643 | |||
644 | #define SECTALIGN(p, a) \ | ||
645 | ((p) = (uint8_t *)(((uintptr_t)(p) + ((a)-1)) & ~(uintptr_t)((a)-1))) | ||
646 | |||
647 | /* Build in-memory ELF object. */ | ||
648 | static void gdbjit_buildobj(GDBJITctx *ctx) | ||
649 | { | ||
650 | GDBJITobj *obj = &ctx->obj; | ||
651 | /* Fill in ELF header and clear structures. */ | ||
652 | memcpy(&obj->hdr, &elfhdr_template, sizeof(ELFheader)); | ||
653 | memset(&obj->sect, 0, sizeof(ELFsectheader)*GDBJIT_SECT__MAX); | ||
654 | memset(&obj->sym, 0, sizeof(ELFsymbol)*GDBJIT_SYM__MAX); | ||
655 | /* Initialize sections. */ | ||
656 | ctx->p = obj->space; | ||
657 | gdbjit_initsect(ctx, GDBJIT_SECT_shstrtab, gdbjit_secthdr); | ||
658 | gdbjit_initsect(ctx, GDBJIT_SECT_strtab, gdbjit_symtab); | ||
659 | gdbjit_initsect(ctx, GDBJIT_SECT_debug_info, gdbjit_debuginfo); | ||
660 | gdbjit_initsect(ctx, GDBJIT_SECT_debug_abbrev, gdbjit_debugabbrev); | ||
661 | gdbjit_initsect(ctx, GDBJIT_SECT_debug_line, gdbjit_debugline); | ||
662 | SECTALIGN(ctx->p, sizeof(uintptr_t)); | ||
663 | gdbjit_initsect(ctx, GDBJIT_SECT_eh_frame, gdbjit_ehframe); | ||
664 | ctx->objsize = (size_t)((char *)ctx->p - (char *)obj); | ||
665 | lua_assert(ctx->objsize < sizeof(GDBJITobj)); | ||
666 | } | ||
667 | |||
668 | #undef SECTALIGN | ||
669 | |||
670 | /* -- Interface to GDB JIT API -------------------------------------------- */ | ||
671 | |||
672 | /* Add new entry to GDB JIT symbol chain. */ | ||
673 | static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx) | ||
674 | { | ||
675 | /* Allocate memory for GDB JIT entry and ELF object. */ | ||
676 | MSize sz = (MSize)(sizeof(GDBJITentryobj) - sizeof(GDBJITobj) + ctx->objsize); | ||
677 | GDBJITentryobj *eo = lj_mem_newt(L, sz, GDBJITentryobj); | ||
678 | memcpy(&eo->obj, &ctx->obj, ctx->objsize); /* Copy ELF object. */ | ||
679 | eo->sz = sz; | ||
680 | ctx->T->gdbjit_entry = (void *)eo; | ||
681 | /* Link new entry to chain and register it. */ | ||
682 | eo->entry.prev_entry = NULL; | ||
683 | eo->entry.next_entry = __jit_debug_descriptor.first_entry; | ||
684 | if (eo->entry.next_entry) | ||
685 | eo->entry.next_entry->prev_entry = &eo->entry; | ||
686 | eo->entry.symfile_addr = (const char *)&eo->obj; | ||
687 | eo->entry.symfile_size = ctx->objsize; | ||
688 | __jit_debug_descriptor.first_entry = &eo->entry; | ||
689 | __jit_debug_descriptor.relevant_entry = &eo->entry; | ||
690 | __jit_debug_descriptor.action_flag = GDBJIT_REGISTER; | ||
691 | __jit_debug_register_code(); | ||
692 | } | ||
693 | |||
694 | /* Add debug info for newly compiled trace and notify GDB. */ | ||
695 | void lj_gdbjit_addtrace(jit_State *J, Trace *T, TraceNo traceno) | ||
696 | { | ||
697 | GDBJITctx ctx; | ||
698 | lua_State *L = J->L; | ||
699 | GCproto *pt = &gcref(T->startpt)->pt; | ||
700 | TraceNo parent = T->ir[REF_BASE].op1; | ||
701 | uintptr_t pcofs = (uintptr_t)(T->snap[0].mapofs+T->snap[0].nslots); | ||
702 | const BCIns *startpc = (const BCIns *)(uintptr_t)T->snapmap[pcofs]; | ||
703 | ctx.T = T; | ||
704 | ctx.mcaddr = (uintptr_t)T->mcode; | ||
705 | ctx.szmcode = T->szmcode; | ||
706 | ctx.spadjp = CFRAME_SIZE + (MSize)(parent ? J->trace[parent]->spadjust : 0); | ||
707 | ctx.spadj = CFRAME_SIZE + T->spadjust; | ||
708 | ctx.lineno = pt->lineinfo ? pt->lineinfo[startpc - pt->bc] : 0; | ||
709 | ctx.filename = strdata(pt->chunkname); | ||
710 | if (*ctx.filename == '@' || *ctx.filename == '=') | ||
711 | ctx.filename++; | ||
712 | else | ||
713 | ctx.filename = "(string)"; | ||
714 | ctx.trname = lj_str_pushf(L, "TRACE_%d", traceno); | ||
715 | L->top--; | ||
716 | gdbjit_buildobj(&ctx); | ||
717 | gdbjit_newentry(L, &ctx); | ||
718 | } | ||
719 | |||
720 | /* Delete debug info for trace and notify GDB. */ | ||
721 | void lj_gdbjit_deltrace(jit_State *J, Trace *T) | ||
722 | { | ||
723 | GDBJITentryobj *eo = (GDBJITentryobj *)T->gdbjit_entry; | ||
724 | if (eo) { | ||
725 | if (eo->entry.prev_entry) | ||
726 | eo->entry.prev_entry->next_entry = eo->entry.next_entry; | ||
727 | else | ||
728 | __jit_debug_descriptor.first_entry = eo->entry.next_entry; | ||
729 | if (eo->entry.next_entry) | ||
730 | eo->entry.next_entry->prev_entry = eo->entry.prev_entry; | ||
731 | __jit_debug_descriptor.relevant_entry = &eo->entry; | ||
732 | __jit_debug_descriptor.action_flag = GDBJIT_UNREGISTER; | ||
733 | __jit_debug_register_code(); | ||
734 | lj_mem_free(J2G(J), eo, eo->sz); | ||
735 | } | ||
736 | } | ||
737 | |||
738 | #endif | ||
739 | #endif | ||
diff --git a/src/lj_gdbjit.h b/src/lj_gdbjit.h new file mode 100644 index 00000000..2221948f --- /dev/null +++ b/src/lj_gdbjit.h | |||
@@ -0,0 +1,22 @@ | |||
1 | /* | ||
2 | ** Client for the GDB JIT API. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_GDBJIT_H | ||
7 | #define _LJ_GDBJIT_H | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | #include "lj_jit.h" | ||
11 | |||
12 | #if LJ_HASJIT && defined(LUAJIT_USE_GDBJIT) | ||
13 | |||
14 | LJ_FUNC void lj_gdbjit_addtrace(jit_State *J, Trace *T, TraceNo traceno); | ||
15 | LJ_FUNC void lj_gdbjit_deltrace(jit_State *J, Trace *T); | ||
16 | |||
17 | #else | ||
18 | #define lj_gdbjit_addtrace(J, T, tn) UNUSED(T) | ||
19 | #define lj_gdbjit_deltrace(J, T) UNUSED(T) | ||
20 | #endif | ||
21 | |||
22 | #endif | ||
diff --git a/src/lj_ir.c b/src/lj_ir.c new file mode 100644 index 00000000..2ff54821 --- /dev/null +++ b/src/lj_ir.c | |||
@@ -0,0 +1,461 @@ | |||
1 | /* | ||
2 | ** SSA IR (Intermediate Representation) emitter. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #define lj_ir_c | ||
7 | #define LUA_CORE | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | |||
11 | #if LJ_HASJIT | ||
12 | |||
13 | #include "lj_gc.h" | ||
14 | #include "lj_str.h" | ||
15 | #include "lj_ir.h" | ||
16 | #include "lj_jit.h" | ||
17 | #include "lj_iropt.h" | ||
18 | #include "lj_trace.h" | ||
19 | |||
20 | /* Some local macros to save typing. Undef'd at the end. */ | ||
21 | #define IR(ref) (&J->cur.ir[(ref)]) | ||
22 | #define fins (&J->fold.ins) | ||
23 | |||
24 | /* Pass IR on to next optimization in chain (FOLD). */ | ||
25 | #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) | ||
26 | |||
27 | /* -- IR tables ----------------------------------------------------------- */ | ||
28 | |||
29 | /* IR instruction modes. */ | ||
30 | LJ_DATADEF const uint8_t lj_ir_mode[IR__MAX+1] = { | ||
31 | IRDEF(IRMODE) | ||
32 | 0 | ||
33 | }; | ||
34 | |||
35 | /* -- IR emitter ---------------------------------------------------------- */ | ||
36 | |||
37 | /* Grow IR buffer at the top. */ | ||
38 | void LJ_FASTCALL lj_ir_growtop(jit_State *J) | ||
39 | { | ||
40 | IRIns *baseir = J->irbuf + J->irbotlim; | ||
41 | MSize szins = J->irtoplim - J->irbotlim; | ||
42 | if (szins) { | ||
43 | baseir = (IRIns *)lj_mem_realloc(J->L, baseir, szins*sizeof(IRIns), | ||
44 | 2*szins*sizeof(IRIns)); | ||
45 | J->irtoplim = J->irbotlim + 2*szins; | ||
46 | } else { | ||
47 | baseir = (IRIns *)lj_mem_realloc(J->L, NULL, 0, LJ_MIN_IRSZ*sizeof(IRIns)); | ||
48 | J->irbotlim = REF_BASE - LJ_MIN_IRSZ/4; | ||
49 | J->irtoplim = J->irbotlim + LJ_MIN_IRSZ; | ||
50 | } | ||
51 | J->cur.ir = J->irbuf = baseir - J->irbotlim; | ||
52 | } | ||
53 | |||
54 | /* Grow IR buffer at the bottom or shift it up. */ | ||
55 | static void lj_ir_growbot(jit_State *J) | ||
56 | { | ||
57 | IRIns *baseir = J->irbuf + J->irbotlim; | ||
58 | MSize szins = J->irtoplim - J->irbotlim; | ||
59 | lua_assert(szins != 0); | ||
60 | lua_assert(J->cur.nk == J->irbotlim); | ||
61 | if (J->cur.nins + (szins >> 1) < J->irtoplim) { | ||
62 | /* More than half of the buffer is free on top: shift up by a quarter. */ | ||
63 | MSize ofs = szins >> 2; | ||
64 | memmove(baseir + ofs, baseir, (J->cur.nins - J->irbotlim)*sizeof(IRIns)); | ||
65 | J->irbotlim -= ofs; | ||
66 | J->irtoplim -= ofs; | ||
67 | J->cur.ir = J->irbuf = baseir - J->irbotlim; | ||
68 | } else { | ||
69 | /* Double the buffer size, but split the growth amongst top/bottom. */ | ||
70 | IRIns *newbase = lj_mem_newt(J->L, 2*szins*sizeof(IRIns), IRIns); | ||
71 | MSize ofs = szins >= 256 ? 128 : (szins >> 1); /* Limit bottom growth. */ | ||
72 | memcpy(newbase + ofs, baseir, (J->cur.nins - J->irbotlim)*sizeof(IRIns)); | ||
73 | lj_mem_free(G(J->L), baseir, szins*sizeof(IRIns)); | ||
74 | J->irbotlim -= ofs; | ||
75 | J->irtoplim = J->irbotlim + 2*szins; | ||
76 | J->cur.ir = J->irbuf = newbase - J->irbotlim; | ||
77 | } | ||
78 | } | ||
79 | |||
80 | /* Emit IR without any optimizations. */ | ||
81 | TRef LJ_FASTCALL lj_ir_emit(jit_State *J) | ||
82 | { | ||
83 | IRRef ref = lj_ir_nextins(J); | ||
84 | IRIns *ir = IR(ref); | ||
85 | IROp op = fins->o; | ||
86 | ir->prev = J->chain[op]; | ||
87 | J->chain[op] = (IRRef1)ref; | ||
88 | ir->o = op; | ||
89 | ir->op1 = fins->op1; | ||
90 | ir->op2 = fins->op2; | ||
91 | J->guardemit.irt |= fins->t.irt; | ||
92 | return TREF(ref, irt_t((ir->t = fins->t))); | ||
93 | } | ||
94 | |||
95 | /* -- Interning of constants ---------------------------------------------- */ | ||
96 | |||
97 | /* | ||
98 | ** IR instructions for constants are kept between J->cur.nk >= ref < REF_BIAS. | ||
99 | ** They are chained like all other instructions, but grow downwards. | ||
100 | ** The are interned (like strings in the VM) to facilitate reference | ||
101 | ** comparisons. The same constant must get the same reference. | ||
102 | */ | ||
103 | |||
104 | /* Get ref of next IR constant and optionally grow IR. | ||
105 | ** Note: this may invalidate all IRIns *! | ||
106 | */ | ||
107 | static LJ_AINLINE IRRef ir_nextk(jit_State *J) | ||
108 | { | ||
109 | IRRef ref = J->cur.nk; | ||
110 | if (LJ_UNLIKELY(ref <= J->irbotlim)) lj_ir_growbot(J); | ||
111 | J->cur.nk = --ref; | ||
112 | return ref; | ||
113 | } | ||
114 | |||
115 | /* Intern int32_t constant. */ | ||
116 | TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k) | ||
117 | { | ||
118 | IRIns *ir, *cir = J->cur.ir; | ||
119 | IRRef ref; | ||
120 | for (ref = J->chain[IR_KINT]; ref; ref = cir[ref].prev) | ||
121 | if (cir[ref].i == k) | ||
122 | goto found; | ||
123 | ref = ir_nextk(J); | ||
124 | ir = IR(ref); | ||
125 | ir->i = k; | ||
126 | ir->t.irt = IRT_INT; | ||
127 | ir->o = IR_KINT; | ||
128 | ir->prev = J->chain[IR_KINT]; | ||
129 | J->chain[IR_KINT] = (IRRef1)ref; | ||
130 | found: | ||
131 | return TREF(ref, IRT_INT); | ||
132 | } | ||
133 | |||
134 | /* The MRef inside the KNUM IR instruction holds the address of the constant | ||
135 | ** (an aligned double or a special 64 bit pattern). The KNUM constants | ||
136 | ** themselves are stored in a chained array and shared across traces. | ||
137 | ** | ||
138 | ** Rationale for choosing this data structure: | ||
139 | ** - The address of the constants is embedded in the generated machine code | ||
140 | ** and must never move. A resizable array or hash table wouldn't work. | ||
141 | ** - Most apps need very few non-integer constants (less than a dozen). | ||
142 | ** - Linear search is hard to beat in terms of speed and low complexity. | ||
143 | */ | ||
144 | typedef struct KNumArray { | ||
145 | MRef next; /* Pointer to next list. */ | ||
146 | MSize numk; /* Number of used elements in this array. */ | ||
147 | TValue k[LJ_MIN_KNUMSZ]; /* Array of constants. */ | ||
148 | } KNumArray; | ||
149 | |||
150 | /* Free all chained arrays. */ | ||
151 | void lj_ir_knum_freeall(jit_State *J) | ||
152 | { | ||
153 | KNumArray *kn; | ||
154 | for (kn = mref(J->knum, KNumArray); kn; ) { | ||
155 | KNumArray *next = mref(kn->next, KNumArray); | ||
156 | lj_mem_free(J2G(J), kn, sizeof(KNumArray)); | ||
157 | kn = next; | ||
158 | } | ||
159 | } | ||
160 | |||
161 | /* Find KNUM constant in chained array or add it. */ | ||
162 | static cTValue *ir_knum_find(jit_State *J, uint64_t nn) | ||
163 | { | ||
164 | KNumArray *kn, *knp = NULL; | ||
165 | TValue *ntv; | ||
166 | MSize idx; | ||
167 | /* Search for the constant in the whole chain of arrays. */ | ||
168 | for (kn = mref(J->knum, KNumArray); kn; kn = mref(kn->next, KNumArray)) { | ||
169 | knp = kn; /* Remember previous element in list. */ | ||
170 | for (idx = 0; idx < kn->numk; idx++) { /* Search one array. */ | ||
171 | TValue *tv = &kn->k[idx]; | ||
172 | if (tv->u64 == nn) /* Needed for +-0/NaN/absmask. */ | ||
173 | return tv; | ||
174 | } | ||
175 | } | ||
176 | /* Constant was not found, need to add it. */ | ||
177 | if (!(knp && knp->numk < LJ_MIN_KNUMSZ)) { /* Allocate a new array. */ | ||
178 | KNumArray *nkn = lj_mem_newt(J->L, sizeof(KNumArray), KNumArray); | ||
179 | setmref(nkn->next, NULL); | ||
180 | nkn->numk = 0; | ||
181 | if (knp) | ||
182 | setmref(knp->next, nkn); /* Chain to the end of the list. */ | ||
183 | else | ||
184 | setmref(J->knum, nkn); /* Link first array. */ | ||
185 | knp = nkn; | ||
186 | } | ||
187 | ntv = &knp->k[knp->numk++]; /* Add to current array. */ | ||
188 | ntv->u64 = nn; | ||
189 | return ntv; | ||
190 | } | ||
191 | |||
192 | /* Intern FP constant, given by its address. */ | ||
193 | TRef lj_ir_knum_addr(jit_State *J, cTValue *tv) | ||
194 | { | ||
195 | IRIns *ir, *cir = J->cur.ir; | ||
196 | IRRef ref; | ||
197 | for (ref = J->chain[IR_KNUM]; ref; ref = cir[ref].prev) | ||
198 | if (ir_knum(&cir[ref]) == tv) | ||
199 | goto found; | ||
200 | ref = ir_nextk(J); | ||
201 | ir = IR(ref); | ||
202 | setmref(ir->ptr, tv); | ||
203 | ir->t.irt = IRT_NUM; | ||
204 | ir->o = IR_KNUM; | ||
205 | ir->prev = J->chain[IR_KNUM]; | ||
206 | J->chain[IR_KNUM] = (IRRef1)ref; | ||
207 | found: | ||
208 | return TREF(ref, IRT_NUM); | ||
209 | } | ||
210 | |||
211 | /* Intern FP constant, given by its 64 bit pattern. */ | ||
212 | TRef lj_ir_knum_nn(jit_State *J, uint64_t nn) | ||
213 | { | ||
214 | return lj_ir_knum_addr(J, ir_knum_find(J, nn)); | ||
215 | } | ||
216 | |||
217 | /* Special 16 byte aligned SIMD constants. */ | ||
218 | LJ_DATADEF LJ_ALIGN(16) cTValue lj_ir_knum_tv[4] = { | ||
219 | { U64x(7fffffff,ffffffff) }, { U64x(7fffffff,ffffffff) }, | ||
220 | { U64x(80000000,00000000) }, { U64x(80000000,00000000) } | ||
221 | }; | ||
222 | |||
223 | /* Check whether a number is int and return it. -0 is NOT considered an int. */ | ||
224 | static int numistrueint(lua_Number n, int32_t *kp) | ||
225 | { | ||
226 | int32_t k = lj_num2int(n); | ||
227 | if (n == cast_num(k)) { | ||
228 | if (kp) *kp = k; | ||
229 | if (k == 0) { /* Special check for -0. */ | ||
230 | TValue tv; | ||
231 | setnumV(&tv, n); | ||
232 | if (tv.u32.hi != 0) | ||
233 | return 0; | ||
234 | } | ||
235 | return 1; | ||
236 | } | ||
237 | return 0; | ||
238 | } | ||
239 | |||
240 | /* Intern number as int32_t constant if possible, otherwise as FP constant. */ | ||
241 | TRef lj_ir_knumint(jit_State *J, lua_Number n) | ||
242 | { | ||
243 | int32_t k; | ||
244 | if (numistrueint(n, &k)) | ||
245 | return lj_ir_kint(J, k); | ||
246 | else | ||
247 | return lj_ir_knum(J, n); | ||
248 | } | ||
249 | |||
250 | /* Intern GC object "constant". */ | ||
251 | TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t) | ||
252 | { | ||
253 | IRIns *ir, *cir = J->cur.ir; | ||
254 | IRRef ref; | ||
255 | for (ref = J->chain[IR_KGC]; ref; ref = cir[ref].prev) | ||
256 | if (ir_kgc(&cir[ref]) == o) | ||
257 | goto found; | ||
258 | ref = ir_nextk(J); | ||
259 | ir = IR(ref); | ||
260 | /* NOBARRIER: Current trace is a GC root. */ | ||
261 | setgcref(ir->gcr, o); | ||
262 | ir->t.irt = (uint8_t)t; | ||
263 | ir->o = IR_KGC; | ||
264 | ir->prev = J->chain[IR_KGC]; | ||
265 | J->chain[IR_KGC] = (IRRef1)ref; | ||
266 | found: | ||
267 | return TREF(ref, t); | ||
268 | } | ||
269 | |||
270 | /* Intern 32 bit pointer constant. */ | ||
271 | TRef lj_ir_kptr(jit_State *J, void *ptr) | ||
272 | { | ||
273 | IRIns *ir, *cir = J->cur.ir; | ||
274 | IRRef ref; | ||
275 | lua_assert((void *)(intptr_t)i32ptr(ptr) == ptr); | ||
276 | for (ref = J->chain[IR_KPTR]; ref; ref = cir[ref].prev) | ||
277 | if (mref(cir[ref].ptr, void) == ptr) | ||
278 | goto found; | ||
279 | ref = ir_nextk(J); | ||
280 | ir = IR(ref); | ||
281 | setmref(ir->ptr, ptr); | ||
282 | ir->t.irt = IRT_PTR; | ||
283 | ir->o = IR_KPTR; | ||
284 | ir->prev = J->chain[IR_KPTR]; | ||
285 | J->chain[IR_KPTR] = (IRRef1)ref; | ||
286 | found: | ||
287 | return TREF(ref, IRT_PTR); | ||
288 | } | ||
289 | |||
290 | /* Intern typed NULL constant. */ | ||
291 | TRef lj_ir_knull(jit_State *J, IRType t) | ||
292 | { | ||
293 | IRIns *ir, *cir = J->cur.ir; | ||
294 | IRRef ref; | ||
295 | for (ref = J->chain[IR_KNULL]; ref; ref = cir[ref].prev) | ||
296 | if (irt_t(cir[ref].t) == t) | ||
297 | goto found; | ||
298 | ref = ir_nextk(J); | ||
299 | ir = IR(ref); | ||
300 | ir->i = 0; | ||
301 | ir->t.irt = (uint8_t)t; | ||
302 | ir->o = IR_KNULL; | ||
303 | ir->prev = J->chain[IR_KNULL]; | ||
304 | J->chain[IR_KNULL] = (IRRef1)ref; | ||
305 | found: | ||
306 | return TREF(ref, t); | ||
307 | } | ||
308 | |||
309 | /* Intern key slot. */ | ||
310 | TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot) | ||
311 | { | ||
312 | IRIns *ir, *cir = J->cur.ir; | ||
313 | IRRef2 op12 = IRREF2((IRRef1)key, (IRRef1)slot); | ||
314 | IRRef ref; | ||
315 | /* Const part is not touched by CSE/DCE, so 0-65535 is ok for IRMlit here. */ | ||
316 | lua_assert(tref_isk(key) && slot == (IRRef)(IRRef1)slot); | ||
317 | for (ref = J->chain[IR_KSLOT]; ref; ref = cir[ref].prev) | ||
318 | if (cir[ref].op12 == op12) | ||
319 | goto found; | ||
320 | ref = ir_nextk(J); | ||
321 | ir = IR(ref); | ||
322 | ir->op12 = op12; | ||
323 | ir->t.irt = IRT_PTR; | ||
324 | ir->o = IR_KSLOT; | ||
325 | ir->prev = J->chain[IR_KSLOT]; | ||
326 | J->chain[IR_KSLOT] = (IRRef1)ref; | ||
327 | found: | ||
328 | return TREF(ref, IRT_PTR); | ||
329 | } | ||
330 | |||
331 | /* -- Access to IR constants ---------------------------------------------- */ | ||
332 | |||
333 | /* Copy value of IR constant. */ | ||
334 | void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir) | ||
335 | { | ||
336 | UNUSED(L); | ||
337 | lua_assert(ir->o != IR_KSLOT); /* Common mistake. */ | ||
338 | if (irt_isint(ir->t)) { | ||
339 | lua_assert(ir->o == IR_KINT); | ||
340 | setintV(tv, ir->i); | ||
341 | } else if (irt_isnum(ir->t)) { | ||
342 | lua_assert(ir->o == IR_KNUM); | ||
343 | setnumV(tv, ir_knum(ir)->n); | ||
344 | } else if (irt_ispri(ir->t)) { | ||
345 | lua_assert(ir->o == IR_KPRI); | ||
346 | setitype(tv, irt_toitype(ir->t)); | ||
347 | } else { | ||
348 | if (ir->o == IR_KGC) { | ||
349 | lua_assert(irt_isgcv(ir->t)); | ||
350 | setgcV(L, tv, &ir_kgc(ir)->gch, irt_toitype(ir->t)); | ||
351 | } else { | ||
352 | lua_assert(ir->o == IR_KPTR || ir->o == IR_KNULL); | ||
353 | setlightudV(tv, mref(ir->ptr, void)); | ||
354 | } | ||
355 | } | ||
356 | } | ||
357 | |||
358 | /* -- Convert IR operand types -------------------------------------------- */ | ||
359 | |||
360 | /* Convert from integer or string to number. */ | ||
361 | TRef LJ_FASTCALL lj_ir_tonum(jit_State *J, TRef tr) | ||
362 | { | ||
363 | if (!tref_isnum(tr)) { | ||
364 | if (tref_isinteger(tr)) | ||
365 | tr = emitir(IRTN(IR_TONUM), tr, 0); | ||
366 | else if (tref_isstr(tr)) | ||
367 | tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); | ||
368 | else | ||
369 | lj_trace_err(J, LJ_TRERR_BADTYPE); | ||
370 | } | ||
371 | return tr; | ||
372 | } | ||
373 | |||
374 | /* Convert from integer or number to string. */ | ||
375 | TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr) | ||
376 | { | ||
377 | if (!tref_isstr(tr)) { | ||
378 | if (!tref_isnumber(tr)) | ||
379 | lj_trace_err(J, LJ_TRERR_BADTYPE); | ||
380 | tr = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0); | ||
381 | } | ||
382 | return tr; | ||
383 | } | ||
384 | |||
385 | /* Convert from number or string to bitop operand (overflow wrapped). */ | ||
386 | TRef LJ_FASTCALL lj_ir_tobit(jit_State *J, TRef tr) | ||
387 | { | ||
388 | if (!tref_isinteger(tr)) { | ||
389 | if (tref_isstr(tr)) | ||
390 | tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); | ||
391 | else if (!tref_isnum(tr)) | ||
392 | lj_trace_err(J, LJ_TRERR_BADTYPE); | ||
393 | tr = emitir(IRTI(IR_TOBIT), tr, lj_ir_knum_tobit(J)); | ||
394 | } | ||
395 | return tr; | ||
396 | } | ||
397 | |||
398 | /* Convert from number or string to integer (overflow undefined). */ | ||
399 | TRef LJ_FASTCALL lj_ir_toint(jit_State *J, TRef tr) | ||
400 | { | ||
401 | if (!tref_isinteger(tr)) { | ||
402 | if (tref_isstr(tr)) | ||
403 | tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); | ||
404 | else if (!tref_isnum(tr)) | ||
405 | lj_trace_err(J, LJ_TRERR_BADTYPE); | ||
406 | tr = emitir(IRTI(IR_TOINT), tr, IRTOINT_ANY); | ||
407 | } | ||
408 | return tr; | ||
409 | } | ||
410 | |||
411 | /* -- Miscellaneous IR ops ------------------------------------------------ */ | ||
412 | |||
413 | /* Evaluate numeric comparison. */ | ||
414 | int lj_ir_numcmp(lua_Number a, lua_Number b, IROp op) | ||
415 | { | ||
416 | switch (op) { | ||
417 | case IR_EQ: return (a == b); | ||
418 | case IR_NE: return (a != b); | ||
419 | case IR_LT: return (a < b); | ||
420 | case IR_GE: return (a >= b); | ||
421 | case IR_LE: return (a <= b); | ||
422 | case IR_GT: return (a > b); | ||
423 | case IR_ULT: return !(a >= b); | ||
424 | case IR_UGE: return !(a < b); | ||
425 | case IR_ULE: return !(a > b); | ||
426 | case IR_UGT: return !(a <= b); | ||
427 | default: lua_assert(0); return 0; | ||
428 | } | ||
429 | } | ||
430 | |||
431 | /* Evaluate string comparison. */ | ||
432 | int lj_ir_strcmp(GCstr *a, GCstr *b, IROp op) | ||
433 | { | ||
434 | int res = lj_str_cmp(a, b); | ||
435 | switch (op) { | ||
436 | case IR_LT: return (res < 0); | ||
437 | case IR_GE: return (res >= 0); | ||
438 | case IR_LE: return (res <= 0); | ||
439 | case IR_GT: return (res > 0); | ||
440 | default: lua_assert(0); return 0; | ||
441 | } | ||
442 | } | ||
443 | |||
444 | /* Rollback IR to previous state. */ | ||
445 | void lj_ir_rollback(jit_State *J, IRRef ref) | ||
446 | { | ||
447 | IRRef nins = J->cur.nins; | ||
448 | while (nins > ref) { | ||
449 | IRIns *ir; | ||
450 | nins--; | ||
451 | ir = IR(nins); | ||
452 | J->chain[ir->o] = ir->prev; | ||
453 | } | ||
454 | J->cur.nins = nins; | ||
455 | } | ||
456 | |||
457 | #undef IR | ||
458 | #undef fins | ||
459 | #undef emitir | ||
460 | |||
461 | #endif | ||
diff --git a/src/lj_ir.h b/src/lj_ir.h new file mode 100644 index 00000000..a6973a81 --- /dev/null +++ b/src/lj_ir.h | |||
@@ -0,0 +1,429 @@ | |||
1 | /* | ||
2 | ** SSA IR (Intermediate Representation) format. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_IR_H | ||
7 | #define _LJ_IR_H | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | |||
11 | /* IR instruction definition. Order matters, see below. */ | ||
12 | #define IRDEF(_) \ | ||
13 | /* Miscellaneous ops. */ \ | ||
14 | _(NOP, N , ___, ___) \ | ||
15 | _(BASE, N , lit, lit) \ | ||
16 | _(LOOP, G , ___, ___) \ | ||
17 | _(PHI, S , ref, ref) \ | ||
18 | _(RENAME, S , ref, lit) \ | ||
19 | \ | ||
20 | /* Constants. */ \ | ||
21 | _(KPRI, N , ___, ___) \ | ||
22 | _(KINT, N , cst, ___) \ | ||
23 | _(KGC, N , cst, ___) \ | ||
24 | _(KPTR, N , cst, ___) \ | ||
25 | _(KNULL, N , cst, ___) \ | ||
26 | _(KNUM, N , cst, ___) \ | ||
27 | _(KSLOT, N , ref, lit) \ | ||
28 | \ | ||
29 | /* Guarded assertions. */ \ | ||
30 | /* Must be properly aligned to flip opposites (^1) and (un)ordered (^4). */ \ | ||
31 | _(EQ, GC, ref, ref) \ | ||
32 | _(NE, GC, ref, ref) \ | ||
33 | \ | ||
34 | _(ABC, G , ref, ref) \ | ||
35 | _(FRAME, G , ref, ref) \ | ||
36 | \ | ||
37 | _(LT, G , ref, ref) \ | ||
38 | _(GE, G , ref, ref) \ | ||
39 | _(LE, G , ref, ref) \ | ||
40 | _(GT, G , ref, ref) \ | ||
41 | \ | ||
42 | _(ULT, G , ref, ref) \ | ||
43 | _(UGE, G , ref, ref) \ | ||
44 | _(ULE, G , ref, ref) \ | ||
45 | _(UGT, G , ref, ref) \ | ||
46 | \ | ||
47 | /* Bit ops. */ \ | ||
48 | _(BNOT, N , ref, ___) \ | ||
49 | _(BSWAP, N , ref, ___) \ | ||
50 | _(BAND, C , ref, ref) \ | ||
51 | _(BOR, C , ref, ref) \ | ||
52 | _(BXOR, C , ref, ref) \ | ||
53 | _(BSHL, N , ref, ref) \ | ||
54 | _(BSHR, N , ref, ref) \ | ||
55 | _(BSAR, N , ref, ref) \ | ||
56 | _(BROL, N , ref, ref) \ | ||
57 | _(BROR, N , ref, ref) \ | ||
58 | \ | ||
59 | /* Arithmetic ops. ORDER ARITH (FPMATH/POWI take the space for MOD/POW). */ \ | ||
60 | _(ADD, C , ref, ref) \ | ||
61 | _(SUB, N , ref, ref) \ | ||
62 | _(MUL, C , ref, ref) \ | ||
63 | _(DIV, N , ref, ref) \ | ||
64 | \ | ||
65 | _(FPMATH, N , ref, lit) \ | ||
66 | _(POWI, N , ref, ref) \ | ||
67 | \ | ||
68 | _(NEG, N , ref, ref) \ | ||
69 | _(ABS, N , ref, ref) \ | ||
70 | _(ATAN2, N , ref, ref) \ | ||
71 | _(LDEXP, N , ref, ref) \ | ||
72 | _(MIN, C , ref, ref) \ | ||
73 | _(MAX, C , ref, ref) \ | ||
74 | \ | ||
75 | /* Overflow-checking arithmetic ops. */ \ | ||
76 | _(ADDOV, GC, ref, ref) \ | ||
77 | _(SUBOV, G , ref, ref) \ | ||
78 | \ | ||
79 | /* Memory ops. A = array, H = hash, U = upvalue, F = field, S = stack. */ \ | ||
80 | \ | ||
81 | /* Memory references. */ \ | ||
82 | _(AREF, R , ref, ref) \ | ||
83 | _(HREFK, RG, ref, ref) \ | ||
84 | _(HREF, L , ref, ref) \ | ||
85 | _(NEWREF, S , ref, ref) \ | ||
86 | _(UREFO, LG, ref, lit) \ | ||
87 | _(UREFC, LG, ref, lit) \ | ||
88 | _(FREF, R , ref, lit) \ | ||
89 | _(STRREF, N , ref, ref) \ | ||
90 | \ | ||
91 | /* Loads and Stores. These must be in the same order. */ \ | ||
92 | _(ALOAD, LG, ref, ___) \ | ||
93 | _(HLOAD, LG, ref, ___) \ | ||
94 | _(ULOAD, LG, ref, ___) \ | ||
95 | _(FLOAD, L , ref, lit) \ | ||
96 | _(SLOAD, LG, lit, lit) \ | ||
97 | _(XLOAD, L , ref, lit) \ | ||
98 | \ | ||
99 | _(ASTORE, S , ref, ref) \ | ||
100 | _(HSTORE, S , ref, ref) \ | ||
101 | _(USTORE, S , ref, ref) \ | ||
102 | _(FSTORE, S , ref, ref) \ | ||
103 | \ | ||
104 | /* String ops. */ \ | ||
105 | _(SNEW, N , ref, ref) \ | ||
106 | \ | ||
107 | /* Table ops. */ \ | ||
108 | _(TNEW, A , lit, lit) \ | ||
109 | _(TDUP, A , ref, ___) \ | ||
110 | _(TLEN, L , ref, ___) \ | ||
111 | _(TBAR, S , ref, ___) \ | ||
112 | _(OBAR, S , ref, ref) \ | ||
113 | \ | ||
114 | /* Type conversions. */ \ | ||
115 | _(TONUM, N , ref, ___) \ | ||
116 | _(TOINT, N , ref, lit) \ | ||
117 | _(TOBIT, N , ref, ref) \ | ||
118 | _(TOSTR, N , ref, ___) \ | ||
119 | _(STRTO, G , ref, ___) \ | ||
120 | \ | ||
121 | /* End of list. */ | ||
122 | |||
123 | /* IR opcodes (max. 256). */ | ||
124 | typedef enum { | ||
125 | #define IRENUM(name, m, m1, m2) IR_##name, | ||
126 | IRDEF(IRENUM) | ||
127 | #undef IRENUM | ||
128 | IR__MAX | ||
129 | } IROp; | ||
130 | |||
131 | /* Stored opcode. */ | ||
132 | typedef uint8_t IROp1; | ||
133 | |||
134 | LJ_STATIC_ASSERT(((int)IR_EQ^1) == (int)IR_NE); | ||
135 | LJ_STATIC_ASSERT(((int)IR_LT^1) == (int)IR_GE); | ||
136 | LJ_STATIC_ASSERT(((int)IR_LE^1) == (int)IR_GT); | ||
137 | LJ_STATIC_ASSERT(((int)IR_LT^3) == (int)IR_GT); | ||
138 | LJ_STATIC_ASSERT(((int)IR_LT^4) == (int)IR_ULT); | ||
139 | |||
140 | /* Delta between xLOAD and xSTORE. */ | ||
141 | #define IRDELTA_L2S ((int)IR_ASTORE - (int)IR_ALOAD) | ||
142 | |||
143 | LJ_STATIC_ASSERT((int)IR_HLOAD + IRDELTA_L2S == (int)IR_HSTORE); | ||
144 | LJ_STATIC_ASSERT((int)IR_ULOAD + IRDELTA_L2S == (int)IR_USTORE); | ||
145 | LJ_STATIC_ASSERT((int)IR_FLOAD + IRDELTA_L2S == (int)IR_FSTORE); | ||
146 | |||
147 | /* FPMATH sub-functions. ORDER FPM. */ | ||
148 | #define IRFPMDEF(_) \ | ||
149 | _(FLOOR) _(CEIL) _(TRUNC) /* Must be first and in this order. */ \ | ||
150 | _(SQRT) _(EXP) _(EXP2) _(LOG) _(LOG2) _(LOG10) \ | ||
151 | _(SIN) _(COS) _(TAN) \ | ||
152 | _(OTHER) | ||
153 | |||
154 | typedef enum { | ||
155 | #define FPMENUM(name) IRFPM_##name, | ||
156 | IRFPMDEF(FPMENUM) | ||
157 | #undef FPMENUM | ||
158 | IRFPM__MAX | ||
159 | } IRFPMathOp; | ||
160 | |||
161 | /* FLOAD field IDs. */ | ||
162 | #define IRFLDEF(_) \ | ||
163 | _(STR_LEN, GCstr, len) \ | ||
164 | _(FUNC_ENV, GCfunc, l.env) \ | ||
165 | _(TAB_META, GCtab, metatable) \ | ||
166 | _(TAB_ARRAY, GCtab, array) \ | ||
167 | _(TAB_NODE, GCtab, node) \ | ||
168 | _(TAB_ASIZE, GCtab, asize) \ | ||
169 | _(TAB_HMASK, GCtab, hmask) \ | ||
170 | _(TAB_NOMM, GCtab, nomm) \ | ||
171 | _(UDATA_META, GCudata, metatable) | ||
172 | |||
173 | typedef enum { | ||
174 | #define FLENUM(name, type, field) IRFL_##name, | ||
175 | IRFLDEF(FLENUM) | ||
176 | #undef FLENUM | ||
177 | IRFL__MAX | ||
178 | } IRFieldID; | ||
179 | |||
180 | /* SLOAD mode bits, stored in op2. */ | ||
181 | #define IRSLOAD_INHERIT 1 /* Inherited by exits/side traces. */ | ||
182 | #define IRSLOAD_READONLY 2 /* Read-only, omit slot store. */ | ||
183 | #define IRSLOAD_PARENT 4 /* Coalesce with parent trace. */ | ||
184 | |||
185 | /* XLOAD mode, stored in op2. */ | ||
186 | #define IRXLOAD_UNALIGNED 1 | ||
187 | |||
188 | /* TOINT mode, stored in op2. Ordered by strength of the checks. */ | ||
189 | #define IRTOINT_CHECK 0 /* Number checked for integerness. */ | ||
190 | #define IRTOINT_INDEX 1 /* Checked + special backprop rules. */ | ||
191 | #define IRTOINT_ANY 2 /* Any FP number is ok. */ | ||
192 | #define IRTOINT_TOBIT 3 /* Cache only: TOBIT conversion. */ | ||
193 | |||
194 | /* IR operand mode (2 bit). */ | ||
195 | typedef enum { | ||
196 | IRMref, /* IR reference. */ | ||
197 | IRMlit, /* 16 bit unsigned literal. */ | ||
198 | IRMcst, /* Constant literal: i, gcr or ptr. */ | ||
199 | IRMnone /* Unused operand. */ | ||
200 | } IRMode; | ||
201 | #define IRM___ IRMnone | ||
202 | |||
203 | /* Mode bits: Commutative, {Normal/Ref, Alloc, Load, Store}, Guard. */ | ||
204 | #define IRM_C 0x10 | ||
205 | |||
206 | #define IRM_N 0x00 | ||
207 | #define IRM_R IRM_N | ||
208 | #define IRM_A 0x20 | ||
209 | #define IRM_L 0x40 | ||
210 | #define IRM_S 0x60 | ||
211 | |||
212 | #define IRM_G 0x80 | ||
213 | |||
214 | #define IRM_GC (IRM_G|IRM_C) | ||
215 | #define IRM_RG (IRM_R|IRM_G) | ||
216 | #define IRM_LG (IRM_L|IRM_G) | ||
217 | |||
218 | #define irm_op1(m) (cast(IRMode, (m)&3)) | ||
219 | #define irm_op2(m) (cast(IRMode, ((m)>>2)&3)) | ||
220 | #define irm_iscomm(m) ((m) & IRM_C) | ||
221 | #define irm_kind(m) ((m) & IRM_S) | ||
222 | #define irm_isguard(m) ((m) & IRM_G) | ||
223 | /* Stores or any other op with a guard has a side-effect. */ | ||
224 | #define irm_sideeff(m) ((m) >= IRM_S) | ||
225 | |||
226 | #define IRMODE(name, m, m1, m2) ((IRM##m1)|((IRM##m2)<<2)|(IRM_##m)), | ||
227 | |||
228 | LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1]; | ||
229 | |||
230 | /* IR result type and flags (8 bit). */ | ||
231 | typedef enum { | ||
232 | /* Map of itypes to non-negative numbers. ORDER LJ_T */ | ||
233 | IRT_NIL, | ||
234 | IRT_FALSE, | ||
235 | IRT_TRUE, | ||
236 | IRT_LIGHTUD, | ||
237 | /* GCobj types are from here ... */ | ||
238 | IRT_STR, | ||
239 | IRT_PTR, /* IRT_PTR never escapes the IR (map of LJ_TUPVAL). */ | ||
240 | IRT_THREAD, | ||
241 | IRT_PROTO, | ||
242 | IRT_FUNC, | ||
243 | IRT_9, /* LJ_TDEADKEY is never used in the IR. */ | ||
244 | IRT_TAB, | ||
245 | IRT_UDATA, | ||
246 | /* ... until here. */ | ||
247 | IRT_NUM, | ||
248 | /* The various integers are only used in the IR and can only escape to | ||
249 | ** a TValue after implicit or explicit conversion (TONUM). Their types | ||
250 | ** must be contiguous and next to IRT_NUM (see the typerange macros below). | ||
251 | */ | ||
252 | IRT_INT, | ||
253 | IRT_I8, | ||
254 | IRT_U8, | ||
255 | IRT_I16, | ||
256 | IRT_U16, | ||
257 | /* There is room for 14 more types. */ | ||
258 | |||
259 | /* Additional flags. */ | ||
260 | IRT_MARK = 0x20, /* Marker for misc. purposes. */ | ||
261 | IRT_GUARD = 0x40, /* Instruction is a guard. */ | ||
262 | IRT_ISPHI = 0x80, /* Instruction is left or right PHI operand. */ | ||
263 | |||
264 | /* Masks. */ | ||
265 | IRT_TYPE = 0x1f, | ||
266 | IRT_T = 0xff | ||
267 | } IRType; | ||
268 | |||
269 | #define irtype_ispri(irt) ((uint32_t)(irt) <= IRT_TRUE) | ||
270 | |||
271 | /* Stored IRType. */ | ||
272 | typedef struct IRType1 { uint8_t irt; } IRType1; | ||
273 | |||
274 | #define IRT(o, t) ((uint32_t)(((o)<<8) | (t))) | ||
275 | #define IRTI(o) (IRT((o), IRT_INT)) | ||
276 | #define IRTN(o) (IRT((o), IRT_NUM)) | ||
277 | #define IRTG(o, t) (IRT((o), IRT_GUARD|(t))) | ||
278 | #define IRTGI(o) (IRT((o), IRT_GUARD|IRT_INT)) | ||
279 | |||
280 | #define irt_t(t) (cast(IRType, (t).irt)) | ||
281 | #define irt_type(t) (cast(IRType, (t).irt & IRT_TYPE)) | ||
282 | #define irt_sametype(t1, t2) ((((t1).irt ^ (t2).irt) & IRT_TYPE) == 0) | ||
283 | #define irt_typerange(t, first, last) \ | ||
284 | ((uint32_t)((t).irt & IRT_TYPE) - (uint32_t)(first) <= (uint32_t)(last-first)) | ||
285 | |||
286 | #define irt_isnil(t) (irt_type(t) == IRT_NIL) | ||
287 | #define irt_ispri(t) ((uint32_t)irt_type(t) <= IRT_TRUE) | ||
288 | #define irt_isstr(t) (irt_type(t) == IRT_STR) | ||
289 | #define irt_isfunc(t) (irt_type(t) == IRT_FUNC) | ||
290 | #define irt_istab(t) (irt_type(t) == IRT_TAB) | ||
291 | #define irt_isnum(t) (irt_type(t) == IRT_NUM) | ||
292 | #define irt_isint(t) (irt_type(t) == IRT_INT) | ||
293 | #define irt_isi8(t) (irt_type(t) == IRT_I8) | ||
294 | #define irt_isu8(t) (irt_type(t) == IRT_U8) | ||
295 | #define irt_isi16(t) (irt_type(t) == IRT_I16) | ||
296 | #define irt_isu16(t) (irt_type(t) == IRT_U16) | ||
297 | |||
298 | #define irt_isinteger(t) (irt_typerange((t), IRT_INT, IRT_U16)) | ||
299 | #define irt_isgcv(t) (irt_typerange((t), IRT_STR, IRT_UDATA)) | ||
300 | #define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA)) | ||
301 | |||
302 | #define itype2irt(tv) \ | ||
303 | (~uitype(tv) < IRT_NUM ? cast(IRType, ~uitype(tv)) : IRT_NUM) | ||
304 | #define irt_toitype(t) ((int32_t)~(uint32_t)irt_type(t)) | ||
305 | |||
306 | #define irt_isguard(t) ((t).irt & IRT_GUARD) | ||
307 | #define irt_ismarked(t) ((t).irt & IRT_MARK) | ||
308 | #define irt_setmark(t) ((t).irt |= IRT_MARK) | ||
309 | #define irt_clearmark(t) ((t).irt &= ~IRT_MARK) | ||
310 | #define irt_isphi(t) ((t).irt & IRT_ISPHI) | ||
311 | #define irt_setphi(t) ((t).irt |= IRT_ISPHI) | ||
312 | #define irt_clearphi(t) ((t).irt &= ~IRT_ISPHI) | ||
313 | |||
314 | /* Stored combined IR opcode and type. */ | ||
315 | typedef uint16_t IROpT; | ||
316 | |||
317 | /* IR references. */ | ||
318 | typedef uint16_t IRRef1; /* One stored reference. */ | ||
319 | typedef uint32_t IRRef2; /* Two stored references. */ | ||
320 | typedef uint32_t IRRef; /* Used to pass around references. */ | ||
321 | |||
322 | /* Fixed references. */ | ||
323 | enum { | ||
324 | REF_BIAS = 0x8000, | ||
325 | REF_TRUE = REF_BIAS-3, | ||
326 | REF_FALSE = REF_BIAS-2, | ||
327 | REF_NIL = REF_BIAS-1, /* \--- Constants grow downwards. */ | ||
328 | REF_BASE = REF_BIAS, /* /--- IR grows upwards. */ | ||
329 | REF_FIRST = REF_BIAS+1, | ||
330 | REF_DROP = 0xffff | ||
331 | }; | ||
332 | |||
333 | /* Note: IRMlit operands must be < REF_BIAS, too! | ||
334 | ** This allows for fast and uniform manipulation of all operands | ||
335 | ** without looking up the operand mode in lj_ir_mode: | ||
336 | ** - CSE calculates the maximum reference of two operands. | ||
337 | ** This must work with mixed reference/literal operands, too. | ||
338 | ** - DCE marking only checks for operand >= REF_BIAS. | ||
339 | ** - LOOP needs to substitute reference operands. | ||
340 | ** Constant references and literals must not be modified. | ||
341 | */ | ||
342 | |||
343 | #define IRREF2(lo, hi) ((IRRef2)(lo) | ((IRRef2)(hi) << 16)) | ||
344 | |||
345 | #define irref_isk(ref) ((ref) < REF_BIAS) | ||
346 | |||
347 | /* Tagged IR references. */ | ||
348 | typedef uint32_t TRef; | ||
349 | |||
350 | #define TREF(ref, t) (cast(TRef, (ref) + ((t)<<16))) | ||
351 | |||
352 | #define tref_ref(tr) (cast(IRRef1, (tr))) | ||
353 | #define tref_t(tr) (cast(IRType, (tr)>>16)) | ||
354 | #define tref_type(tr) (cast(IRType, ((tr)>>16) & IRT_TYPE)) | ||
355 | #define tref_typerange(tr, first, last) \ | ||
356 | ((((tr)>>16) & IRT_TYPE) - (TRef)(first) <= (TRef)(last-first)) | ||
357 | |||
358 | #define tref_istype(tr, t) (((tr) & (IRT_TYPE<<16)) == ((t)<<16)) | ||
359 | #define tref_isnil(tr) (tref_istype((tr), IRT_NIL)) | ||
360 | #define tref_isfalse(tr) (tref_istype((tr), IRT_FALSE)) | ||
361 | #define tref_istrue(tr) (tref_istype((tr), IRT_TRUE)) | ||
362 | #define tref_isstr(tr) (tref_istype((tr), IRT_STR)) | ||
363 | #define tref_isfunc(tr) (tref_istype((tr), IRT_FUNC)) | ||
364 | #define tref_istab(tr) (tref_istype((tr), IRT_TAB)) | ||
365 | #define tref_isudata(tr) (tref_istype((tr), IRT_UDATA)) | ||
366 | #define tref_isnum(tr) (tref_istype((tr), IRT_NUM)) | ||
367 | #define tref_isint(tr) (tref_istype((tr), IRT_INT)) | ||
368 | |||
369 | #define tref_isbool(tr) (tref_typerange((tr), IRT_FALSE, IRT_TRUE)) | ||
370 | #define tref_ispri(tr) (tref_typerange((tr), IRT_NIL, IRT_TRUE)) | ||
371 | #define tref_istruecond(tr) (!tref_typerange((tr), IRT_NIL, IRT_FALSE)) | ||
372 | #define tref_isinteger(tr) (tref_typerange((tr), IRT_INT, IRT_U16)) | ||
373 | #define tref_isnumber(tr) (tref_typerange((tr), IRT_NUM, IRT_U16)) | ||
374 | #define tref_isnumber_str(tr) (tref_isnumber((tr)) || tref_isstr((tr))) | ||
375 | #define tref_isgcv(tr) (tref_typerange((tr), IRT_STR, IRT_UDATA)) | ||
376 | |||
377 | #define tref_isk(tr) (irref_isk(tref_ref((tr)))) | ||
378 | #define tref_isk2(tr1, tr2) (irref_isk(tref_ref((tr1) | (tr2)))) | ||
379 | |||
380 | #define TREF_PRI(t) (TREF(REF_NIL-(t), (t))) | ||
381 | #define TREF_NIL (TREF_PRI(IRT_NIL)) | ||
382 | #define TREF_FALSE (TREF_PRI(IRT_FALSE)) | ||
383 | #define TREF_TRUE (TREF_PRI(IRT_TRUE)) | ||
384 | |||
385 | /* IR instruction format (64 bit). | ||
386 | ** | ||
387 | ** 16 16 8 8 8 8 | ||
388 | ** +-------+-------+---+---+---+---+ | ||
389 | ** | op1 | op2 | t | o | r | s | | ||
390 | ** +-------+-------+---+---+---+---+ | ||
391 | ** | op12/i/gco | ot | prev | (alternative fields in union) | ||
392 | ** +---------------+-------+-------+ | ||
393 | ** 32 16 16 | ||
394 | ** | ||
395 | ** prev is only valid prior to register allocation and then reused for r + s. | ||
396 | */ | ||
397 | |||
398 | typedef union IRIns { | ||
399 | struct { | ||
400 | LJ_ENDIAN_LOHI( | ||
401 | IRRef1 op1; /* IR operand 1. */ | ||
402 | , IRRef1 op2; /* IR operand 2. */ | ||
403 | ) | ||
404 | IROpT ot; /* IR opcode and type (overlaps t and o). */ | ||
405 | IRRef1 prev; /* Previous ins in same chain (overlaps r and s). */ | ||
406 | }; | ||
407 | struct { | ||
408 | IRRef2 op12; /* IR operand 1 and 2 (overlaps op1 and op2). */ | ||
409 | LJ_ENDIAN_LOHI( | ||
410 | IRType1 t; /* IR type. */ | ||
411 | , IROp1 o; /* IR opcode. */ | ||
412 | ) | ||
413 | LJ_ENDIAN_LOHI( | ||
414 | uint8_t r; /* Register allocation (overlaps prev). */ | ||
415 | , uint8_t s; /* Spill slot allocation (overlaps prev). */ | ||
416 | ) | ||
417 | }; | ||
418 | int32_t i; /* 32 bit signed integer literal (overlaps op12). */ | ||
419 | GCRef gcr; /* GCobj constant (overlaps op12). */ | ||
420 | MRef ptr; /* Pointer constant (overlaps op12). */ | ||
421 | } IRIns; | ||
422 | |||
423 | #define ir_kgc(ir) (gcref((ir)->gcr)) | ||
424 | #define ir_kstr(ir) (gco2str(ir_kgc((ir)))) | ||
425 | #define ir_ktab(ir) (gco2tab(ir_kgc((ir)))) | ||
426 | #define ir_kfunc(ir) (gco2func(ir_kgc((ir)))) | ||
427 | #define ir_knum(ir) (mref((ir)->ptr, cTValue)) | ||
428 | |||
429 | #endif | ||
diff --git a/src/lj_iropt.h b/src/lj_iropt.h new file mode 100644 index 00000000..69b0a955 --- /dev/null +++ b/src/lj_iropt.h | |||
@@ -0,0 +1,128 @@ | |||
1 | /* | ||
2 | ** Common header for IR emitter and optimizations. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_IROPT_H | ||
7 | #define _LJ_IROPT_H | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | #include "lj_jit.h" | ||
11 | |||
12 | #if LJ_HASJIT | ||
13 | /* IR emitter. */ | ||
14 | LJ_FUNC void LJ_FASTCALL lj_ir_growtop(jit_State *J); | ||
15 | LJ_FUNC TRef LJ_FASTCALL lj_ir_emit(jit_State *J); | ||
16 | |||
17 | /* Save current IR in J->fold.ins, but do not emit it (yet). */ | ||
18 | static LJ_AINLINE void lj_ir_set_(jit_State *J, uint16_t ot, IRRef1 a, IRRef1 b) | ||
19 | { | ||
20 | J->fold.ins.ot = ot; J->fold.ins.op1 = a; J->fold.ins.op2 = b; | ||
21 | } | ||
22 | |||
23 | #define lj_ir_set(J, ot, a, b) \ | ||
24 | lj_ir_set_(J, (uint16_t)(ot), (IRRef1)(a), (IRRef1)(b)) | ||
25 | |||
26 | /* Get ref of next IR instruction and optionally grow IR. | ||
27 | ** Note: this may invalidate all IRIns*! | ||
28 | */ | ||
29 | static LJ_AINLINE IRRef lj_ir_nextins(jit_State *J) | ||
30 | { | ||
31 | IRRef ref = J->cur.nins; | ||
32 | if (LJ_UNLIKELY(ref >= J->irtoplim)) lj_ir_growtop(J); | ||
33 | J->cur.nins = ref + 1; | ||
34 | return ref; | ||
35 | } | ||
36 | |||
37 | /* Interning of constants. */ | ||
38 | LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k); | ||
39 | LJ_FUNC void lj_ir_knum_freeall(jit_State *J); | ||
40 | LJ_FUNC TRef lj_ir_knum_addr(jit_State *J, cTValue *tv); | ||
41 | LJ_FUNC TRef lj_ir_knum_nn(jit_State *J, uint64_t nn); | ||
42 | LJ_FUNC TRef lj_ir_knumint(jit_State *J, lua_Number n); | ||
43 | LJ_FUNC TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t); | ||
44 | LJ_FUNC TRef lj_ir_kptr(jit_State *J, void *ptr); | ||
45 | LJ_FUNC TRef lj_ir_knull(jit_State *J, IRType t); | ||
46 | LJ_FUNC TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot); | ||
47 | |||
48 | static LJ_AINLINE TRef lj_ir_knum(jit_State *J, lua_Number n) | ||
49 | { | ||
50 | TValue tv; | ||
51 | tv.n = n; | ||
52 | return lj_ir_knum_nn(J, tv.u64); | ||
53 | } | ||
54 | |||
55 | #define lj_ir_kstr(J, str) lj_ir_kgc(J, obj2gco((str)), IRT_STR) | ||
56 | #define lj_ir_ktab(J, tab) lj_ir_kgc(J, obj2gco((tab)), IRT_TAB) | ||
57 | #define lj_ir_kfunc(J, func) lj_ir_kgc(J, obj2gco((func)), IRT_FUNC) | ||
58 | |||
59 | /* Special FP constants. */ | ||
60 | #define lj_ir_knum_zero(J) lj_ir_knum_nn(J, U64x(00000000,00000000)) | ||
61 | #define lj_ir_knum_one(J) lj_ir_knum_nn(J, U64x(3ff00000,00000000)) | ||
62 | #define lj_ir_knum_tobit(J) lj_ir_knum_nn(J, U64x(43380000,00000000)) | ||
63 | |||
64 | /* Special 16 byte aligned SIMD constants. */ | ||
65 | LJ_DATA LJ_ALIGN(16) cTValue lj_ir_knum_tv[4]; | ||
66 | #define lj_ir_knum_abs(J) lj_ir_knum_addr(J, &lj_ir_knum_tv[0]) | ||
67 | #define lj_ir_knum_neg(J) lj_ir_knum_addr(J, &lj_ir_knum_tv[2]) | ||
68 | |||
69 | /* Access to constants. */ | ||
70 | LJ_FUNC void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir); | ||
71 | |||
72 | /* Convert IR operand types. */ | ||
73 | LJ_FUNC TRef LJ_FASTCALL lj_ir_tonum(jit_State *J, TRef tr); | ||
74 | LJ_FUNC TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr); | ||
75 | LJ_FUNC TRef LJ_FASTCALL lj_ir_tobit(jit_State *J, TRef tr); | ||
76 | LJ_FUNC TRef LJ_FASTCALL lj_ir_toint(jit_State *J, TRef tr); | ||
77 | |||
78 | /* Miscellaneous IR ops. */ | ||
79 | LJ_FUNC int lj_ir_numcmp(lua_Number a, lua_Number b, IROp op); | ||
80 | LJ_FUNC int lj_ir_strcmp(GCstr *a, GCstr *b, IROp op); | ||
81 | LJ_FUNC void lj_ir_rollback(jit_State *J, IRRef ref); | ||
82 | |||
83 | /* Emit IR instructions with on-the-fly optimizations. */ | ||
84 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fold(jit_State *J); | ||
85 | LJ_FUNC TRef LJ_FASTCALL lj_opt_cse(jit_State *J); | ||
86 | |||
87 | /* Special return values for the fold functions. */ | ||
88 | enum { | ||
89 | NEXTFOLD, /* Couldn't fold, pass on. */ | ||
90 | RETRYFOLD, /* Retry fold with modified fins. */ | ||
91 | KINTFOLD, /* Return ref for int constant in fins->i. */ | ||
92 | FAILFOLD, /* Guard would always fail. */ | ||
93 | DROPFOLD, /* Guard eliminated. */ | ||
94 | MAX_FOLD | ||
95 | }; | ||
96 | |||
97 | #define INTFOLD(k) ((J->fold.ins.i = (k)), (TRef)KINTFOLD) | ||
98 | #define CONDFOLD(cond) ((TRef)FAILFOLD + (TRef)(cond)) | ||
99 | #define LEFTFOLD (J->fold.ins.op1) | ||
100 | #define RIGHTFOLD (J->fold.ins.op2) | ||
101 | #define CSEFOLD (lj_opt_cse(J)) | ||
102 | #define EMITFOLD (lj_ir_emit(J)) | ||
103 | |||
104 | /* Load/store forwarding. */ | ||
105 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_aload(jit_State *J); | ||
106 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J); | ||
107 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J); | ||
108 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J); | ||
109 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tlen(jit_State *J); | ||
110 | LJ_FUNC int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref); | ||
111 | |||
112 | /* Dead-store elimination. */ | ||
113 | LJ_FUNC TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J); | ||
114 | LJ_FUNC TRef LJ_FASTCALL lj_opt_dse_ustore(jit_State *J); | ||
115 | LJ_FUNC TRef LJ_FASTCALL lj_opt_dse_fstore(jit_State *J); | ||
116 | |||
117 | /* Narrowing. */ | ||
118 | LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_convert(jit_State *J); | ||
119 | LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc); | ||
120 | LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc); | ||
121 | LJ_FUNC IRType lj_opt_narrow_forl(cTValue *forbase); | ||
122 | |||
123 | /* Optimization passes. */ | ||
124 | LJ_FUNC void lj_opt_dce(jit_State *J); | ||
125 | LJ_FUNC int lj_opt_loop(jit_State *J); | ||
126 | #endif | ||
127 | |||
128 | #endif | ||
diff --git a/src/lj_jit.h b/src/lj_jit.h new file mode 100644 index 00000000..280eff41 --- /dev/null +++ b/src/lj_jit.h | |||
@@ -0,0 +1,279 @@ | |||
1 | /* | ||
2 | ** Common definitions for the JIT compiler. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_JIT_H | ||
7 | #define _LJ_JIT_H | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | #include "lj_ir.h" | ||
11 | |||
12 | /* JIT engine flags. */ | ||
13 | #define JIT_F_ON 0x00000001 | ||
14 | |||
15 | /* CPU-specific JIT engine flags. */ | ||
16 | #if LJ_TARGET_X86ORX64 | ||
17 | #define JIT_F_CMOV 0x00000100 | ||
18 | #define JIT_F_SSE2 0x00000200 | ||
19 | #define JIT_F_SSE4_1 0x00000400 | ||
20 | #define JIT_F_P4 0x00000800 | ||
21 | #define JIT_F_PREFER_IMUL 0x00001000 | ||
22 | #define JIT_F_SPLIT_XMM 0x00002000 | ||
23 | #define JIT_F_LEA_AGU 0x00004000 | ||
24 | |||
25 | /* Names for the CPU-specific flags. Must match the order above. */ | ||
26 | #define JIT_F_CPU_FIRST JIT_F_CMOV | ||
27 | #define JIT_F_CPUSTRING "\4CMOV\4SSE2\6SSE4.1\2P4\3AMD\2K8\4ATOM" | ||
28 | #else | ||
29 | #error "Missing CPU-specific JIT engine flags" | ||
30 | #endif | ||
31 | |||
32 | /* Optimization flags. */ | ||
33 | #define JIT_F_OPT_MASK 0x00ff0000 | ||
34 | |||
35 | #define JIT_F_OPT_FOLD 0x00010000 | ||
36 | #define JIT_F_OPT_CSE 0x00020000 | ||
37 | #define JIT_F_OPT_DCE 0x00040000 | ||
38 | #define JIT_F_OPT_FWD 0x00080000 | ||
39 | #define JIT_F_OPT_DSE 0x00100000 | ||
40 | #define JIT_F_OPT_NARROW 0x00200000 | ||
41 | #define JIT_F_OPT_LOOP 0x00400000 | ||
42 | #define JIT_F_OPT_FUSE 0x00800000 | ||
43 | |||
44 | /* Optimizations names for -O. Must match the order above. */ | ||
45 | #define JIT_F_OPT_FIRST JIT_F_OPT_FOLD | ||
46 | #define JIT_F_OPTSTRING \ | ||
47 | "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\4fuse" | ||
48 | |||
49 | /* Optimization levels set a fixed combination of flags. */ | ||
50 | #define JIT_F_OPT_0 0 | ||
51 | #define JIT_F_OPT_1 (JIT_F_OPT_FOLD|JIT_F_OPT_CSE|JIT_F_OPT_DCE) | ||
52 | #define JIT_F_OPT_2 (JIT_F_OPT_1|JIT_F_OPT_NARROW|JIT_F_OPT_LOOP) | ||
53 | #define JIT_F_OPT_3 (JIT_F_OPT_2|JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_FUSE) | ||
54 | #define JIT_F_OPT_DEFAULT JIT_F_OPT_3 | ||
55 | |||
56 | #ifdef LUA_USE_WIN | ||
57 | /* See: http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */ | ||
58 | #define JIT_P_sizemcode_DEFAULT 64 | ||
59 | #else | ||
60 | /* Could go as low as 4K, but the mmap() overhead would be rather high. */ | ||
61 | #define JIT_P_sizemcode_DEFAULT 32 | ||
62 | #endif | ||
63 | |||
64 | /* Optimization parameters and their defaults. Length is a char in octal! */ | ||
65 | #define JIT_PARAMDEF(_) \ | ||
66 | _(\010, maxtrace, 1000) /* Max. # of traces in cache. */ \ | ||
67 | _(\011, maxrecord, 2000) /* Max. # of recorded IR instructions. */ \ | ||
68 | _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \ | ||
69 | _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \ | ||
70 | _(\007, maxsnap, 100) /* Max. # of snapshots for a trace. */ \ | ||
71 | \ | ||
72 | _(\007, hotloop, 57) /* # of iterations to detect a hot loop. */ \ | ||
73 | _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \ | ||
74 | _(\007, tryside, 4) /* # of attempts to compile a side trace. */ \ | ||
75 | \ | ||
76 | _(\012, instunroll, 4) /* Max. unroll for instable loops. */ \ | ||
77 | _(\012, loopunroll, 7) /* Max. unroll for loop ops in side traces. */ \ | ||
78 | _(\012, callunroll, 3) /* Max. unroll for recursive calls. */ \ | ||
79 | _(\011, recunroll, 0) /* Max. unroll for true recursion. */ \ | ||
80 | \ | ||
81 | /* Size of each machine code area (in KBytes). */ \ | ||
82 | _(\011, sizemcode, JIT_P_sizemcode_DEFAULT) \ | ||
83 | /* Max. total size of all machine code areas (in KBytes). */ \ | ||
84 | _(\010, maxmcode, 512) \ | ||
85 | /* End of list. */ | ||
86 | |||
87 | enum { | ||
88 | #define JIT_PARAMENUM(len, name, value) JIT_P_##name, | ||
89 | JIT_PARAMDEF(JIT_PARAMENUM) | ||
90 | #undef JIT_PARAMENUM | ||
91 | JIT_P__MAX | ||
92 | }; | ||
93 | |||
94 | #define JIT_PARAMSTR(len, name, value) #len #name | ||
95 | #define JIT_P_STRING JIT_PARAMDEF(JIT_PARAMSTR) | ||
96 | |||
97 | /* Trace compiler state. */ | ||
98 | typedef enum { | ||
99 | LJ_TRACE_IDLE, /* Trace compiler idle. */ | ||
100 | LJ_TRACE_ACTIVE = 0x10, | ||
101 | LJ_TRACE_RECORD, /* Bytecode recording active. */ | ||
102 | LJ_TRACE_START, /* New trace started. */ | ||
103 | LJ_TRACE_END, /* End of trace. */ | ||
104 | LJ_TRACE_ASM, /* Assemble trace. */ | ||
105 | LJ_TRACE_ERR, /* Trace aborted with error. */ | ||
106 | } TraceState; | ||
107 | |||
108 | /* Machine code type. */ | ||
109 | typedef uint8_t MCode; | ||
110 | |||
111 | /* Stack snapshot header. */ | ||
112 | typedef struct SnapShot { | ||
113 | uint16_t mapofs; /* Offset into snapshot map. */ | ||
114 | IRRef1 ref; /* First IR ref for this snapshot. */ | ||
115 | uint8_t nslots; /* Number of stack slots. */ | ||
116 | uint8_t nframelinks; /* Number of frame links. */ | ||
117 | uint8_t count; /* Count of taken exits for this snapshot. */ | ||
118 | uint8_t unused1; | ||
119 | } SnapShot; | ||
120 | |||
121 | #define SNAPCOUNT_DONE 255 /* Already compiled and linked a side trace. */ | ||
122 | #define snap_ref(sn) ((IRRef)(IRRef1)(sn)) | ||
123 | #define snap_ridsp(sn) ((sn) >> 16) | ||
124 | |||
125 | /* Snapshot and exit numbers. */ | ||
126 | typedef uint32_t SnapNo; | ||
127 | typedef uint32_t ExitNo; | ||
128 | |||
129 | /* Trace number. */ | ||
130 | typedef uint32_t TraceNo; /* Used to pass around trace numbers. */ | ||
131 | typedef uint16_t TraceNo1; /* Stored trace number. */ | ||
132 | |||
133 | #define TRACE_INTERP 0 /* Fallback to interpreter. */ | ||
134 | |||
135 | /* Trace anchor. */ | ||
136 | typedef struct Trace { | ||
137 | IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */ | ||
138 | IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */ | ||
139 | IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */ | ||
140 | SnapShot *snap; /* Snapshot array. */ | ||
141 | IRRef2 *snapmap; /* Snapshot map. */ | ||
142 | uint16_t nsnap; /* Number of snapshots. */ | ||
143 | uint16_t nsnapmap; /* Number of snapshot map elements. */ | ||
144 | GCRef startpt; /* Starting prototype. */ | ||
145 | BCIns startins; /* Original bytecode of starting instruction. */ | ||
146 | MCode *mcode; /* Start of machine code. */ | ||
147 | MSize szmcode; /* Size of machine code. */ | ||
148 | MSize mcloop; /* Offset of loop start in machine code. */ | ||
149 | TraceNo1 link; /* Linked trace (or self for loops). */ | ||
150 | TraceNo1 root; /* Root trace of side trace (or 0 for root traces). */ | ||
151 | TraceNo1 nextroot; /* Next root trace for same prototype. */ | ||
152 | TraceNo1 nextside; /* Next side trace of same root trace. */ | ||
153 | uint16_t nchild; /* Number of child traces (root trace only). */ | ||
154 | uint16_t spadjust; /* Stack pointer adjustment (offset in bytes). */ | ||
155 | #ifdef LUAJIT_USE_GDBJIT | ||
156 | void *gdbjit_entry; /* GDB JIT entry. */ | ||
157 | #endif | ||
158 | } Trace; | ||
159 | |||
160 | /* Round-robin penalty cache for bytecodes leading to aborted traces. */ | ||
161 | typedef struct HotPenalty { | ||
162 | const BCIns *pc; /* Starting bytecode PC. */ | ||
163 | uint16_t val; /* Penalty value, i.e. hotcount start. */ | ||
164 | uint16_t reason; /* Abort reason (really TraceErr). */ | ||
165 | } HotPenalty; | ||
166 | |||
167 | /* Number of slots for the penalty cache. Must be a power of 2. */ | ||
168 | #define PENALTY_SLOTS 16 | ||
169 | |||
170 | /* Round-robin backpropagation cache for narrowing conversions. */ | ||
171 | typedef struct BPropEntry { | ||
172 | IRRef1 key; /* Key: original reference. */ | ||
173 | IRRef1 val; /* Value: reference after conversion. */ | ||
174 | IRRef mode; /* Mode for this entry (currently IRTOINT_*). */ | ||
175 | } BPropEntry; | ||
176 | |||
177 | /* Number of slots for the backpropagation cache. Must be a power of 2. */ | ||
178 | #define BPROP_SLOTS 16 | ||
179 | |||
180 | /* Fold state is used to fold instructions on-the-fly. */ | ||
181 | typedef struct FoldState { | ||
182 | IRIns ins; /* Currently emitted instruction. */ | ||
183 | IRIns left; /* Instruction referenced by left operand. */ | ||
184 | IRIns right; /* Instruction referenced by right operand. */ | ||
185 | } FoldState; | ||
186 | |||
187 | /* JIT compiler state. */ | ||
188 | typedef struct jit_State { | ||
189 | Trace cur; /* Current trace. */ | ||
190 | |||
191 | lua_State *L; /* Current Lua state. */ | ||
192 | const BCIns *pc; /* Current PC. */ | ||
193 | BCReg maxslot; /* Relative to baseslot. */ | ||
194 | |||
195 | uint32_t flags; /* JIT engine flags. */ | ||
196 | TRef *base; /* Current frame base, points into J->slots. */ | ||
197 | BCReg baseslot; /* Current frame base, offset into J->slots. */ | ||
198 | GCfunc *fn; /* Current function. */ | ||
199 | GCproto *pt; /* Current prototype. */ | ||
200 | |||
201 | FoldState fold; /* Fold state. */ | ||
202 | |||
203 | uint8_t mergesnap; /* Allowed to merge with next snapshot. */ | ||
204 | uint8_t needsnap; /* Need snapshot before recording next bytecode. */ | ||
205 | IRType1 guardemit; /* Accumulated IRT_GUARD for emitted instructions. */ | ||
206 | uint8_t unused1; | ||
207 | |||
208 | const BCIns *bc_min; /* Start of allowed bytecode range for root trace. */ | ||
209 | MSize bc_extent; /* Extent of the range. */ | ||
210 | |||
211 | TraceState state; /* Trace compiler state. */ | ||
212 | |||
213 | int32_t instunroll; /* Unroll counter for instable loops. */ | ||
214 | int32_t loopunroll; /* Unroll counter for loop ops in side traces. */ | ||
215 | int32_t tailcalled; /* Number of successive tailcalls. */ | ||
216 | int32_t framedepth; /* Current frame depth. */ | ||
217 | |||
218 | MRef knum; /* Pointer to chained array of KNUM constants. */ | ||
219 | |||
220 | IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ | ||
221 | IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ | ||
222 | IRRef irbotlim; /* Lower limit of instuction buffer (biased). */ | ||
223 | IRRef loopref; /* Last loop reference or ref of final LOOP (or 0). */ | ||
224 | |||
225 | SnapShot *snapbuf; /* Temp. snapshot buffer. */ | ||
226 | IRRef2 *snapmapbuf; /* Temp. snapshot map buffer. */ | ||
227 | MSize sizesnap; /* Size of temp. snapshot buffer. */ | ||
228 | MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ | ||
229 | |||
230 | Trace **trace; /* Array of traces. */ | ||
231 | TraceNo curtrace; /* Current trace number (if not 0). Kept in J->cur. */ | ||
232 | TraceNo freetrace; /* Start of scan for next free trace. */ | ||
233 | MSize sizetrace; /* Size of trace array. */ | ||
234 | |||
235 | IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */ | ||
236 | TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */ | ||
237 | |||
238 | int32_t param[JIT_P__MAX]; /* JIT engine parameters. */ | ||
239 | |||
240 | MCode *exitstubgroup[LJ_MAX_EXITSTUBGR]; /* Exit stub group addresses. */ | ||
241 | |||
242 | HotPenalty penalty[PENALTY_SLOTS]; /* Penalty slots. */ | ||
243 | uint32_t penaltyslot; /* Round-robin index into penalty slots. */ | ||
244 | |||
245 | BPropEntry bpropcache[BPROP_SLOTS]; /* Backpropagation cache slots. */ | ||
246 | uint32_t bpropslot; /* Round-robin index into bpropcache slots. */ | ||
247 | |||
248 | const BCIns *startpc; /* Bytecode PC of starting instruction. */ | ||
249 | TraceNo parent; /* Parent of current side trace (0 for root traces). */ | ||
250 | ExitNo exitno; /* Exit number in parent of current side trace. */ | ||
251 | |||
252 | TValue errinfo; /* Additional info element for trace errors. */ | ||
253 | |||
254 | MCode *mcarea; /* Base of current mcode area. */ | ||
255 | MCode *mctop; /* Top of current mcode area. */ | ||
256 | MCode *mcbot; /* Bottom of current mcode area. */ | ||
257 | size_t szmcarea; /* Size of current mcode area. */ | ||
258 | size_t szallmcarea; /* Total size of all allocated mcode areas. */ | ||
259 | int mcprot; /* Protection of current mcode area. */ | ||
260 | } jit_State; | ||
261 | |||
262 | /* Exit stubs. */ | ||
263 | #if LJ_TARGET_X86ORX64 | ||
264 | /* Limited by the range of a short fwd jump (127): (2+2)*(32-1)-2 = 122. */ | ||
265 | #define EXITSTUB_SPACING (2+2) | ||
266 | #define EXITSTUBS_PER_GROUP 32 | ||
267 | #else | ||
268 | #error "Missing CPU-specific exit stub definitions" | ||
269 | #endif | ||
270 | |||
271 | /* Return the address of an exit stub. */ | ||
272 | static LJ_AINLINE MCode *exitstub_addr(jit_State *J, ExitNo exitno) | ||
273 | { | ||
274 | lua_assert(J->exitstubgroup[exitno / EXITSTUBS_PER_GROUP] != NULL); | ||
275 | return J->exitstubgroup[exitno / EXITSTUBS_PER_GROUP] + | ||
276 | EXITSTUB_SPACING*(exitno % EXITSTUBS_PER_GROUP); | ||
277 | } | ||
278 | |||
279 | #endif | ||
diff --git a/src/lj_lex.c b/src/lj_lex.c new file mode 100644 index 00000000..38b0a7d4 --- /dev/null +++ b/src/lj_lex.c | |||
@@ -0,0 +1,393 @@ | |||
1 | /* | ||
2 | ** Lexical analyzer. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** | ||
5 | ** Major portions taken verbatim or adapted from the Lua interpreter. | ||
6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h | ||
7 | */ | ||
8 | |||
9 | #define lj_lex_c | ||
10 | #define LUA_CORE | ||
11 | |||
12 | #include "lj_obj.h" | ||
13 | #include "lj_gc.h" | ||
14 | #include "lj_err.h" | ||
15 | #include "lj_str.h" | ||
16 | #include "lj_lex.h" | ||
17 | #include "lj_parse.h" | ||
18 | #include "lj_ctype.h" | ||
19 | |||
20 | /* Lua lexer token names. */ | ||
21 | static const char *const tokennames[] = { | ||
22 | #define TKSTR1(name) #name, | ||
23 | #define TKSTR2(name, sym) #sym, | ||
24 | TKDEF(TKSTR1, TKSTR2) | ||
25 | #undef TKSTR1 | ||
26 | #undef TKSTR2 | ||
27 | NULL | ||
28 | }; | ||
29 | |||
30 | /* -- Buffer handling ----------------------------------------------------- */ | ||
31 | |||
32 | #define char2int(c) cast(int, cast(uint8_t, (c))) | ||
33 | #define next(ls) \ | ||
34 | (ls->current = (ls->n--) > 0 ? char2int(*ls->p++) : fillbuf(ls)) | ||
35 | #define save_and_next(ls) (save(ls, ls->current), next(ls)) | ||
36 | #define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r') | ||
37 | #define END_OF_STREAM (-1) | ||
38 | |||
39 | static int fillbuf(LexState *ls) | ||
40 | { | ||
41 | size_t sz; | ||
42 | const char *buf = ls->rfunc(ls->L, ls->rdata, &sz); | ||
43 | if (buf == NULL || sz == 0) return END_OF_STREAM; | ||
44 | ls->n = (MSize)sz - 1; | ||
45 | ls->p = buf; | ||
46 | return char2int(*(ls->p++)); | ||
47 | } | ||
48 | |||
49 | static void save(LexState *ls, int c) | ||
50 | { | ||
51 | if (ls->sb.n + 1 > ls->sb.sz) { | ||
52 | MSize newsize; | ||
53 | if (ls->sb.sz >= LJ_MAX_STR/2) | ||
54 | lj_lex_error(ls, 0, LJ_ERR_XELEM); | ||
55 | newsize = ls->sb.sz * 2; | ||
56 | lj_str_resizebuf(ls->L, &ls->sb, newsize); | ||
57 | } | ||
58 | ls->sb.buf[ls->sb.n++] = cast(char, c); | ||
59 | } | ||
60 | |||
61 | static int check_next(LexState *ls, const char *set) | ||
62 | { | ||
63 | if (!strchr(set, ls->current)) | ||
64 | return 0; | ||
65 | save_and_next(ls); | ||
66 | return 1; | ||
67 | } | ||
68 | |||
69 | static void inclinenumber(LexState *ls) | ||
70 | { | ||
71 | int old = ls->current; | ||
72 | lua_assert(currIsNewline(ls)); | ||
73 | next(ls); /* skip `\n' or `\r' */ | ||
74 | if (currIsNewline(ls) && ls->current != old) | ||
75 | next(ls); /* skip `\n\r' or `\r\n' */ | ||
76 | if (++ls->linenumber >= LJ_MAX_LINE) | ||
77 | lj_lex_error(ls, ls->token, LJ_ERR_XLINES); | ||
78 | } | ||
79 | |||
80 | /* -- Scanner for terminals ----------------------------------------------- */ | ||
81 | |||
82 | static void read_numeral(LexState *ls, TValue *tv) | ||
83 | { | ||
84 | lua_assert(lj_ctype_isdigit(ls->current)); | ||
85 | do { | ||
86 | save_and_next(ls); | ||
87 | } while (lj_ctype_isdigit(ls->current) || ls->current == '.'); | ||
88 | if (check_next(ls, "Ee")) /* `E'? */ | ||
89 | check_next(ls, "+-"); /* optional exponent sign */ | ||
90 | while (lj_ctype_isident(ls->current)) | ||
91 | save_and_next(ls); | ||
92 | save(ls, '\0'); | ||
93 | if (!lj_str_numconv(ls->sb.buf, tv)) | ||
94 | lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER); | ||
95 | } | ||
96 | |||
97 | static int skip_sep(LexState *ls) | ||
98 | { | ||
99 | int count = 0; | ||
100 | int s = ls->current; | ||
101 | lua_assert(s == '[' || s == ']'); | ||
102 | save_and_next(ls); | ||
103 | while (ls->current == '=') { | ||
104 | save_and_next(ls); | ||
105 | count++; | ||
106 | } | ||
107 | return (ls->current == s) ? count : (-count) - 1; | ||
108 | } | ||
109 | |||
110 | static void read_long_string(LexState *ls, TValue *tv, int sep) | ||
111 | { | ||
112 | save_and_next(ls); /* skip 2nd `[' */ | ||
113 | if (currIsNewline(ls)) /* string starts with a newline? */ | ||
114 | inclinenumber(ls); /* skip it */ | ||
115 | for (;;) { | ||
116 | switch (ls->current) { | ||
117 | case END_OF_STREAM: | ||
118 | lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM); | ||
119 | break; | ||
120 | case ']': | ||
121 | if (skip_sep(ls) == sep) { | ||
122 | save_and_next(ls); /* skip 2nd `]' */ | ||
123 | goto endloop; | ||
124 | } | ||
125 | break; | ||
126 | case '\n': | ||
127 | case '\r': | ||
128 | save(ls, '\n'); | ||
129 | inclinenumber(ls); | ||
130 | if (!tv) lj_str_resetbuf(&ls->sb); /* avoid wasting space */ | ||
131 | break; | ||
132 | default: | ||
133 | if (tv) save_and_next(ls); | ||
134 | else next(ls); | ||
135 | break; | ||
136 | } | ||
137 | } endloop: | ||
138 | if (tv) { | ||
139 | GCstr *str = lj_parse_keepstr(ls, ls->sb.buf + (2 + (MSize)sep), | ||
140 | ls->sb.n - 2*(2 + (MSize)sep)); | ||
141 | setstrV(ls->L, tv, str); | ||
142 | } | ||
143 | } | ||
144 | |||
145 | static void read_string(LexState *ls, int delim, TValue *tv) | ||
146 | { | ||
147 | save_and_next(ls); | ||
148 | while (ls->current != delim) { | ||
149 | switch (ls->current) { | ||
150 | case END_OF_STREAM: | ||
151 | lj_lex_error(ls, TK_eof, LJ_ERR_XSTR); | ||
152 | continue; | ||
153 | case '\n': | ||
154 | case '\r': | ||
155 | lj_lex_error(ls, TK_string, LJ_ERR_XSTR); | ||
156 | continue; | ||
157 | case '\\': { | ||
158 | int c; | ||
159 | next(ls); /* do not save the `\' */ | ||
160 | switch (ls->current) { | ||
161 | case 'a': c = '\a'; break; | ||
162 | case 'b': c = '\b'; break; | ||
163 | case 'f': c = '\f'; break; | ||
164 | case 'n': c = '\n'; break; | ||
165 | case 'r': c = '\r'; break; | ||
166 | case 't': c = '\t'; break; | ||
167 | case 'v': c = '\v'; break; | ||
168 | case '\n': case '\r': save(ls, '\n'); inclinenumber(ls); continue; | ||
169 | case END_OF_STREAM: continue; /* will raise an error next loop */ | ||
170 | default: | ||
171 | if (!lj_ctype_isdigit(ls->current)) { | ||
172 | save_and_next(ls); /* handles \\, \", \', and \? */ | ||
173 | } else { /* \xxx */ | ||
174 | int i = 0; | ||
175 | c = 0; | ||
176 | do { | ||
177 | c = 10*c + (ls->current-'0'); | ||
178 | next(ls); | ||
179 | } while (++i<3 && lj_ctype_isdigit(ls->current)); | ||
180 | if (c > UCHAR_MAX) | ||
181 | lj_lex_error(ls, TK_string, LJ_ERR_XESC); | ||
182 | save(ls, c); | ||
183 | } | ||
184 | continue; | ||
185 | } | ||
186 | save(ls, c); | ||
187 | next(ls); | ||
188 | continue; | ||
189 | } | ||
190 | default: | ||
191 | save_and_next(ls); | ||
192 | break; | ||
193 | } | ||
194 | } | ||
195 | save_and_next(ls); /* skip delimiter */ | ||
196 | setstrV(ls->L, tv, lj_parse_keepstr(ls, ls->sb.buf + 1, ls->sb.n - 2)); | ||
197 | } | ||
198 | |||
199 | /* -- Main lexical scanner ------------------------------------------------ */ | ||
200 | |||
201 | static int llex(LexState *ls, TValue *tv) | ||
202 | { | ||
203 | lj_str_resetbuf(&ls->sb); | ||
204 | for (;;) { | ||
205 | if (lj_ctype_isident(ls->current)) { | ||
206 | GCstr *s; | ||
207 | if (lj_ctype_isdigit(ls->current)) { /* Numeric literal. */ | ||
208 | read_numeral(ls, tv); | ||
209 | return TK_number; | ||
210 | } | ||
211 | /* Identifier or reserved word. */ | ||
212 | do { | ||
213 | save_and_next(ls); | ||
214 | } while (lj_ctype_isident(ls->current)); | ||
215 | s = lj_parse_keepstr(ls, ls->sb.buf, ls->sb.n); | ||
216 | if (s->reserved > 0) /* Reserved word? */ | ||
217 | return TK_OFS + s->reserved; | ||
218 | setstrV(ls->L, tv, s); | ||
219 | return TK_name; | ||
220 | } | ||
221 | switch (ls->current) { | ||
222 | case '\n': | ||
223 | case '\r': | ||
224 | inclinenumber(ls); | ||
225 | continue; | ||
226 | case ' ': | ||
227 | case '\t': | ||
228 | case '\v': | ||
229 | case '\f': | ||
230 | next(ls); | ||
231 | continue; | ||
232 | case '-': | ||
233 | next(ls); | ||
234 | if (ls->current != '-') return '-'; | ||
235 | /* else is a comment */ | ||
236 | next(ls); | ||
237 | if (ls->current == '[') { | ||
238 | int sep = skip_sep(ls); | ||
239 | lj_str_resetbuf(&ls->sb); /* `skip_sep' may dirty the buffer */ | ||
240 | if (sep >= 0) { | ||
241 | read_long_string(ls, NULL, sep); /* long comment */ | ||
242 | lj_str_resetbuf(&ls->sb); | ||
243 | continue; | ||
244 | } | ||
245 | } | ||
246 | /* else short comment */ | ||
247 | while (!currIsNewline(ls) && ls->current != END_OF_STREAM) | ||
248 | next(ls); | ||
249 | continue; | ||
250 | case '[': { | ||
251 | int sep = skip_sep(ls); | ||
252 | if (sep >= 0) { | ||
253 | read_long_string(ls, tv, sep); | ||
254 | return TK_string; | ||
255 | } else if (sep == -1) { | ||
256 | return '['; | ||
257 | } else { | ||
258 | lj_lex_error(ls, TK_string, LJ_ERR_XLDELIM); | ||
259 | continue; | ||
260 | } | ||
261 | } | ||
262 | case '=': | ||
263 | next(ls); | ||
264 | if (ls->current != '=') return '='; else { next(ls); return TK_eq; } | ||
265 | case '<': | ||
266 | next(ls); | ||
267 | if (ls->current != '=') return '<'; else { next(ls); return TK_le; } | ||
268 | case '>': | ||
269 | next(ls); | ||
270 | if (ls->current != '=') return '>'; else { next(ls); return TK_ge; } | ||
271 | case '~': | ||
272 | next(ls); | ||
273 | if (ls->current != '=') return '~'; else { next(ls); return TK_ne; } | ||
274 | case '"': | ||
275 | case '\'': | ||
276 | read_string(ls, ls->current, tv); | ||
277 | return TK_string; | ||
278 | case '.': | ||
279 | save_and_next(ls); | ||
280 | if (check_next(ls, ".")) { | ||
281 | if (check_next(ls, ".")) | ||
282 | return TK_dots; /* ... */ | ||
283 | else | ||
284 | return TK_concat; /* .. */ | ||
285 | } else if (!lj_ctype_isdigit(ls->current)) { | ||
286 | return '.'; | ||
287 | } else { | ||
288 | read_numeral(ls, tv); | ||
289 | return TK_number; | ||
290 | } | ||
291 | case END_OF_STREAM: | ||
292 | return TK_eof; | ||
293 | default: { | ||
294 | int c = ls->current; | ||
295 | next(ls); | ||
296 | return c; /* Single-char tokens (+ - / ...). */ | ||
297 | } | ||
298 | } | ||
299 | } | ||
300 | } | ||
301 | |||
302 | /* -- Lexer API ----------------------------------------------------------- */ | ||
303 | |||
304 | void lj_lex_start(lua_State *L, LexState *ls) | ||
305 | { | ||
306 | ls->L = L; | ||
307 | ls->fs = NULL; | ||
308 | ls->n = 0; | ||
309 | ls->p = NULL; | ||
310 | ls->lookahead = TK_eof; /* No look-ahead token. */ | ||
311 | ls->linenumber = 1; | ||
312 | ls->lastline = 1; | ||
313 | lj_str_resizebuf(ls->L, &ls->sb, LJ_MIN_SBUF); | ||
314 | next(ls); /* Read-ahead first char. */ | ||
315 | if (ls->current == 0xef && ls->n >= 2 && char2int(ls->p[0]) == 0xbb && | ||
316 | char2int(ls->p[1]) == 0xbf) { /* Skip UTF-8 BOM (if buffered). */ | ||
317 | ls->n -= 2; | ||
318 | ls->p += 2; | ||
319 | next(ls); | ||
320 | } | ||
321 | if (ls->current == '#') { /* Skip POSIX #! header line. */ | ||
322 | do { | ||
323 | next(ls); | ||
324 | if (ls->current == END_OF_STREAM) return; | ||
325 | } while (!currIsNewline(ls)); | ||
326 | inclinenumber(ls); | ||
327 | } | ||
328 | if (ls->current == LUA_SIGNATURE[0]) { | ||
329 | setstrV(L, L->top++, lj_err_str(L, LJ_ERR_XBCLOAD)); | ||
330 | lj_err_throw(L, LUA_ERRSYNTAX); | ||
331 | } | ||
332 | /* This is an unanchored GCstr before it's stored in the prototype. | ||
333 | ** Do this last since next() calls the reader which may call the GC. | ||
334 | */ | ||
335 | ls->chunkname = lj_str_newz(L, ls->chunkarg); | ||
336 | } | ||
337 | |||
338 | void lj_lex_next(LexState *ls) | ||
339 | { | ||
340 | ls->lastline = ls->linenumber; | ||
341 | if (LJ_LIKELY(ls->lookahead == TK_eof)) { /* No lookahead token? */ | ||
342 | ls->token = llex(ls, &ls->tokenval); /* Get next token. */ | ||
343 | } else { /* Otherwise return lookahead token. */ | ||
344 | ls->token = ls->lookahead; | ||
345 | ls->lookahead = TK_eof; | ||
346 | ls->tokenval = ls->lookaheadval; | ||
347 | } | ||
348 | } | ||
349 | |||
350 | LexToken lj_lex_lookahead(LexState *ls) | ||
351 | { | ||
352 | lua_assert(ls->lookahead == TK_eof); | ||
353 | ls->lookahead = llex(ls, &ls->lookaheadval); | ||
354 | return ls->lookahead; | ||
355 | } | ||
356 | |||
357 | const char *lj_lex_token2str(LexState *ls, LexToken token) | ||
358 | { | ||
359 | if (token > TK_OFS) | ||
360 | return tokennames[token-TK_OFS-1]; | ||
361 | else if (!lj_ctype_iscntrl(token)) | ||
362 | return lj_str_pushf(ls->L, "%c", token); | ||
363 | else | ||
364 | return lj_str_pushf(ls->L, "char(%d)", token); | ||
365 | } | ||
366 | |||
367 | void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...) | ||
368 | { | ||
369 | const char *tok; | ||
370 | va_list argp; | ||
371 | if (token == 0) { | ||
372 | tok = NULL; | ||
373 | } else if (token == TK_name || token == TK_string || token == TK_number) { | ||
374 | save(ls, '\0'); | ||
375 | tok = ls->sb.buf; | ||
376 | } else { | ||
377 | tok = lj_lex_token2str(ls, token); | ||
378 | } | ||
379 | va_start(argp, em); | ||
380 | lj_err_lex(ls->L, strdata(ls->chunkname), tok, ls->linenumber, em, argp); | ||
381 | va_end(argp); | ||
382 | } | ||
383 | |||
384 | void lj_lex_init(lua_State *L) | ||
385 | { | ||
386 | uint32_t i; | ||
387 | for (i = 0; i < TK_RESERVED; i++) { | ||
388 | GCstr *s = lj_str_newz(L, tokennames[i]); | ||
389 | fixstring(s); /* Reserved words are never collected. */ | ||
390 | s->reserved = cast_byte(i+1); | ||
391 | } | ||
392 | } | ||
393 | |||
diff --git a/src/lj_lex.h b/src/lj_lex.h new file mode 100644 index 00000000..cc5d5a9f --- /dev/null +++ b/src/lj_lex.h | |||
@@ -0,0 +1,63 @@ | |||
1 | /* | ||
2 | ** Lexical analyzer. | ||
3 | ** Major parts taken verbatim from the Lua interpreter. | ||
4 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h | ||
5 | */ | ||
6 | |||
7 | #ifndef _LJ_LEX_H | ||
8 | #define _LJ_LEX_H | ||
9 | |||
10 | #include <stdarg.h> | ||
11 | |||
12 | #include "lj_obj.h" | ||
13 | #include "lj_err.h" | ||
14 | |||
15 | /* Lua lexer tokens. */ | ||
16 | #define TKDEF(_, __) \ | ||
17 | _(and) _(break) _(do) _(else) _(elseif) _(end) _(false) \ | ||
18 | _(for) _(function) _(if) _(in) _(local) _(nil) _(not) _(or) \ | ||
19 | _(repeat) _(return) _(then) _(true) _(until) _(while) \ | ||
20 | __(concat, ..) __(dots, ...) __(eq, ==) __(ge, >=) __(le, <=) __(ne, ~=) \ | ||
21 | __(number, <number>) __(name, <name>) __(string, <string>) __(eof, <eof>) | ||
22 | |||
23 | enum { | ||
24 | TK_OFS = 256, | ||
25 | #define TKENUM1(name) TK_##name, | ||
26 | #define TKENUM2(name, sym) TK_##name, | ||
27 | TKDEF(TKENUM1, TKENUM2) | ||
28 | #undef TKENUM1 | ||
29 | #undef TKENUM2 | ||
30 | TK_RESERVED = TK_while - TK_OFS | ||
31 | }; | ||
32 | |||
33 | typedef int LexToken; | ||
34 | |||
35 | /* Lua lexer state. */ | ||
36 | typedef struct LexState { | ||
37 | struct FuncState *fs; /* Current FuncState. Defined in lj_parse.c. */ | ||
38 | struct lua_State *L; /* Lua state. */ | ||
39 | TValue tokenval; /* Current token value. */ | ||
40 | TValue lookaheadval; /* Lookahead token value. */ | ||
41 | int current; /* Current character (charint). */ | ||
42 | LexToken token; /* Current token. */ | ||
43 | LexToken lookahead; /* Lookahead token. */ | ||
44 | SBuf sb; /* String buffer for tokens. */ | ||
45 | const char *p; /* Current position in input buffer. */ | ||
46 | MSize n; /* Bytes left in input buffer. */ | ||
47 | lua_Reader rfunc; /* Reader callback. */ | ||
48 | void *rdata; /* Reader callback data. */ | ||
49 | BCLine linenumber; /* Input line counter. */ | ||
50 | BCLine lastline; /* Line of last token. */ | ||
51 | GCstr *chunkname; /* Current chunk name (interned string). */ | ||
52 | const char *chunkarg; /* Chunk name argument. */ | ||
53 | uint32_t level; /* Syntactical nesting level. */ | ||
54 | } LexState; | ||
55 | |||
56 | LJ_FUNC void lj_lex_start(lua_State *L, LexState *ls); | ||
57 | LJ_FUNC void lj_lex_next(LexState *ls); | ||
58 | LJ_FUNC LexToken lj_lex_lookahead(LexState *ls); | ||
59 | LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken token); | ||
60 | LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...); | ||
61 | LJ_FUNC void lj_lex_init(lua_State *L); | ||
62 | |||
63 | #endif | ||
diff --git a/src/lj_lib.c b/src/lj_lib.c new file mode 100644 index 00000000..683c66d6 --- /dev/null +++ b/src/lj_lib.c | |||
@@ -0,0 +1,216 @@ | |||
1 | /* | ||
2 | ** Library function support. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #define lj_lib_c | ||
7 | #define LUA_CORE | ||
8 | |||
9 | #include "lauxlib.h" | ||
10 | |||
11 | #include "lj_obj.h" | ||
12 | #include "lj_gc.h" | ||
13 | #include "lj_err.h" | ||
14 | #include "lj_str.h" | ||
15 | #include "lj_tab.h" | ||
16 | #include "lj_func.h" | ||
17 | #include "lj_vm.h" | ||
18 | #include "lj_lib.h" | ||
19 | |||
20 | /* -- Library initialization ---------------------------------------------- */ | ||
21 | |||
22 | static GCtab *lib_create_table(lua_State *L, const char *libname, int hsize) | ||
23 | { | ||
24 | if (libname) { | ||
25 | luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16); | ||
26 | lua_getfield(L, -1, libname); | ||
27 | if (!tvistab(L->top-1)) { | ||
28 | L->top--; | ||
29 | if (luaL_findtable(L, LUA_GLOBALSINDEX, libname, hsize) != NULL) | ||
30 | lj_err_callerv(L, LJ_ERR_BADMODN, libname); | ||
31 | settabV(L, L->top, tabV(L->top-1)); | ||
32 | L->top++; | ||
33 | lua_setfield(L, -3, libname); /* _LOADED[libname] = new table */ | ||
34 | } | ||
35 | L->top--; | ||
36 | settabV(L, L->top-1, tabV(L->top)); | ||
37 | } else { | ||
38 | lua_createtable(L, 0, hsize); | ||
39 | } | ||
40 | return tabV(L->top-1); | ||
41 | } | ||
42 | |||
43 | void lj_lib_register(lua_State *L, const char *libname, | ||
44 | const uint8_t *p, const lua_CFunction *cf) | ||
45 | { | ||
46 | GCtab *env = tabref(L->env); | ||
47 | GCfunc *ofn = NULL; | ||
48 | int ffid = *p++; | ||
49 | GCtab *tab = lib_create_table(L, libname, *p++); | ||
50 | ptrdiff_t tpos = L->top - L->base; | ||
51 | |||
52 | /* Avoid barriers further down. */ | ||
53 | if (isblack(obj2gco(tab))) lj_gc_barrierback(G(L), tab); | ||
54 | tab->nomm = 0; | ||
55 | |||
56 | for (;;) { | ||
57 | uint32_t tag = *p++; | ||
58 | MSize len = tag & LIBINIT_LENMASK; | ||
59 | tag &= LIBINIT_TAGMASK; | ||
60 | if (tag != LIBINIT_STRING) { | ||
61 | const char *name; | ||
62 | MSize nuv = (MSize)(L->top - L->base - tpos); | ||
63 | GCfunc *fn = lj_func_newC(L, nuv, env); | ||
64 | if (nuv) { | ||
65 | L->top = L->base + tpos; | ||
66 | memcpy(fn->c.upvalue, L->top, sizeof(TValue)*nuv); | ||
67 | } | ||
68 | fn->c.ffid = (uint8_t)(ffid++); | ||
69 | name = (const char *)p; | ||
70 | p += len; | ||
71 | if (tag != LIBINIT_CF) { | ||
72 | fn->c.gate = makeasmfunc(p[0] + (p[1] << 8)); | ||
73 | p += 2; | ||
74 | } | ||
75 | if (tag == LIBINIT_ASM_) | ||
76 | fn->c.f = ofn->c.f; /* Copy handler from previous function. */ | ||
77 | else | ||
78 | fn->c.f = *cf++; /* Get cf or handler from C function table. */ | ||
79 | if (len) { | ||
80 | /* NOBARRIER: See above for common barrier. */ | ||
81 | setfuncV(L, lj_tab_setstr(L, tab, lj_str_new(L, name, len)), fn); | ||
82 | } | ||
83 | ofn = fn; | ||
84 | } else { | ||
85 | switch (tag | len) { | ||
86 | case LIBINIT_SET: | ||
87 | L->top -= 2; | ||
88 | if (tvisstr(L->top+1) && strV(L->top+1)->len == 0) | ||
89 | env = tabV(L->top); | ||
90 | else /* NOBARRIER: See above for common barrier. */ | ||
91 | copyTV(L, lj_tab_set(L, tab, L->top+1), L->top); | ||
92 | break; | ||
93 | case LIBINIT_NUMBER: | ||
94 | memcpy(&L->top->n, p, sizeof(double)); | ||
95 | L->top++; | ||
96 | p += sizeof(double); | ||
97 | break; | ||
98 | case LIBINIT_COPY: | ||
99 | copyTV(L, L->top, L->top - *p++); | ||
100 | L->top++; | ||
101 | break; | ||
102 | case LIBINIT_LASTCL: | ||
103 | setfuncV(L, L->top++, ofn); | ||
104 | break; | ||
105 | case LIBINIT_FFID: | ||
106 | ffid++; | ||
107 | break; | ||
108 | case LIBINIT_END: | ||
109 | return; | ||
110 | default: | ||
111 | setstrV(L, L->top++, lj_str_new(L, (const char *)p, len)); | ||
112 | p += len; | ||
113 | break; | ||
114 | } | ||
115 | } | ||
116 | } | ||
117 | } | ||
118 | |||
119 | /* -- Type checks --------------------------------------------------------- */ | ||
120 | |||
121 | TValue *lj_lib_checkany(lua_State *L, int narg) | ||
122 | { | ||
123 | TValue *o = L->base + narg-1; | ||
124 | if (o >= L->top) | ||
125 | lj_err_arg(L, narg, LJ_ERR_NOVAL); | ||
126 | return o; | ||
127 | } | ||
128 | |||
129 | GCstr *lj_lib_checkstr(lua_State *L, int narg) | ||
130 | { | ||
131 | TValue *o = L->base + narg-1; | ||
132 | if (o < L->top) { | ||
133 | if (LJ_LIKELY(tvisstr(o))) { | ||
134 | return strV(o); | ||
135 | } else if (tvisnum(o)) { | ||
136 | GCstr *s = lj_str_fromnum(L, &o->n); | ||
137 | setstrV(L, o, s); | ||
138 | return s; | ||
139 | } | ||
140 | } | ||
141 | lj_err_argt(L, narg, LUA_TSTRING); | ||
142 | return NULL; /* unreachable */ | ||
143 | } | ||
144 | |||
145 | GCstr *lj_lib_optstr(lua_State *L, int narg) | ||
146 | { | ||
147 | TValue *o = L->base + narg-1; | ||
148 | return (o < L->top && !tvisnil(o)) ? lj_lib_checkstr(L, narg) : NULL; | ||
149 | } | ||
150 | |||
151 | lua_Number lj_lib_checknum(lua_State *L, int narg) | ||
152 | { | ||
153 | TValue *o = L->base + narg-1; | ||
154 | if (!(o < L->top && | ||
155 | (tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), o))))) | ||
156 | lj_err_argt(L, narg, LUA_TNUMBER); | ||
157 | return numV(o); | ||
158 | } | ||
159 | |||
160 | int32_t lj_lib_checkint(lua_State *L, int narg) | ||
161 | { | ||
162 | return lj_num2int(lj_lib_checknum(L, narg)); | ||
163 | } | ||
164 | |||
165 | int32_t lj_lib_optint(lua_State *L, int narg, int32_t def) | ||
166 | { | ||
167 | TValue *o = L->base + narg-1; | ||
168 | return (o < L->top && !tvisnil(o)) ? lj_lib_checkint(L, narg) : def; | ||
169 | } | ||
170 | |||
171 | GCfunc *lj_lib_checkfunc(lua_State *L, int narg) | ||
172 | { | ||
173 | TValue *o = L->base + narg-1; | ||
174 | if (!(o < L->top && tvisfunc(o))) | ||
175 | lj_err_argt(L, narg, LUA_TFUNCTION); | ||
176 | return funcV(o); | ||
177 | } | ||
178 | |||
179 | GCtab *lj_lib_checktab(lua_State *L, int narg) | ||
180 | { | ||
181 | TValue *o = L->base + narg-1; | ||
182 | if (!(o < L->top && tvistab(o))) | ||
183 | lj_err_argt(L, narg, LUA_TTABLE); | ||
184 | return tabV(o); | ||
185 | } | ||
186 | |||
187 | GCtab *lj_lib_checktabornil(lua_State *L, int narg) | ||
188 | { | ||
189 | TValue *o = L->base + narg-1; | ||
190 | if (o < L->top) { | ||
191 | if (tvistab(o)) | ||
192 | return tabV(o); | ||
193 | else if (tvisnil(o)) | ||
194 | return NULL; | ||
195 | } | ||
196 | lj_err_arg(L, narg, LJ_ERR_NOTABN); | ||
197 | return NULL; /* unreachable */ | ||
198 | } | ||
199 | |||
200 | int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst) | ||
201 | { | ||
202 | GCstr *s = def >= 0 ? lj_lib_optstr(L, narg) : lj_lib_checkstr(L, narg); | ||
203 | if (s) { | ||
204 | const char *opt = strdata(s); | ||
205 | MSize len = s->len; | ||
206 | int i; | ||
207 | for (i = 0; *(const uint8_t *)lst; i++) { | ||
208 | if (*(const uint8_t *)lst == len && memcmp(opt, lst+1, len) == 0) | ||
209 | return i; | ||
210 | lst += 1+*(const uint8_t *)lst; | ||
211 | } | ||
212 | lj_err_argv(L, narg, LJ_ERR_INVOPTM, opt); | ||
213 | } | ||
214 | return def; | ||
215 | } | ||
216 | |||
diff --git a/src/lj_lib.h b/src/lj_lib.h new file mode 100644 index 00000000..1cba3778 --- /dev/null +++ b/src/lj_lib.h | |||
@@ -0,0 +1,84 @@ | |||
1 | /* | ||
2 | ** Library function support. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_LIB_H | ||
7 | #define _LJ_LIB_H | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | |||
11 | /* | ||
12 | ** A fallback handler is called by the assembler VM if the fast path fails: | ||
13 | ** | ||
14 | ** - too few arguments: unrecoverable. | ||
15 | ** - wrong argument type: recoverable, if coercion succeeds. | ||
16 | ** - bad argument value: unrecoverable. | ||
17 | ** - stack overflow: recoverable, if stack reallocation succeeds. | ||
18 | ** - extra handling: recoverable. | ||
19 | ** | ||
20 | ** The unrecoverable cases throw an error with lj_err_arg(), lj_err_argtype(), | ||
21 | ** lj_err_caller() or lj_err_callermsg(). | ||
22 | ** The recoverable cases return 0 or the number of results + 1. | ||
23 | ** The assembler VM retries the fast path only if 0 is returned. | ||
24 | ** This time the fallback must not be called again or it gets stuck in a loop. | ||
25 | */ | ||
26 | |||
27 | /* Return values from fallback handler. */ | ||
28 | #define FFH_RETRY 0 | ||
29 | #define FFH_UNREACHABLE FFH_RETRY | ||
30 | #define FFH_RES(n) ((n)+1) | ||
31 | |||
32 | LJ_FUNC TValue *lj_lib_checkany(lua_State *L, int narg); | ||
33 | LJ_FUNC GCstr *lj_lib_checkstr(lua_State *L, int narg); | ||
34 | LJ_FUNC GCstr *lj_lib_optstr(lua_State *L, int narg); | ||
35 | LJ_FUNC lua_Number lj_lib_checknum(lua_State *L, int narg); | ||
36 | LJ_FUNC int32_t lj_lib_checkint(lua_State *L, int narg); | ||
37 | LJ_FUNC int32_t lj_lib_optint(lua_State *L, int narg, int32_t def); | ||
38 | LJ_FUNC GCfunc *lj_lib_checkfunc(lua_State *L, int narg); | ||
39 | LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg); | ||
40 | LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg); | ||
41 | LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst); | ||
42 | |||
43 | #define lj_lib_opt(L, narg, gotarg, noarg) \ | ||
44 | { TValue *_o = L->base + (narg)-1; \ | ||
45 | if (_o < L->top && !tvisnil(_o)) { gotarg } else { noarg } } | ||
46 | |||
47 | /* Avoid including lj_frame.h. */ | ||
48 | #define lj_lib_upvalue(L, n) \ | ||
49 | (&gcref((L->base-1)->fr.func)->fn.c.upvalue[(n)-1]) | ||
50 | |||
51 | /* Library function declarations. Scanned by buildvm. */ | ||
52 | #define LJLIB_CF(name) static int lj_cf_##name(lua_State *L) | ||
53 | #define LJLIB_ASM(name) static int lj_ffh_##name(lua_State *L) | ||
54 | #define LJLIB_ASM_(name) | ||
55 | #define LJLIB_SET(name) | ||
56 | #define LJLIB_PUSH(arg) | ||
57 | #define LJLIB_REC(handler) | ||
58 | #define LJLIB_NOREGUV | ||
59 | #define LJLIB_NOREG | ||
60 | |||
61 | #define LJ_LIB_REG(L, name) \ | ||
62 | lj_lib_register(L, #name, lj_lib_init_##name, lj_lib_cf_##name) | ||
63 | #define LJ_LIB_REG_(L, regname, name) \ | ||
64 | lj_lib_register(L, regname, lj_lib_init_##name, lj_lib_cf_##name) | ||
65 | |||
66 | LJ_FUNC void lj_lib_register(lua_State *L, const char *libname, | ||
67 | const uint8_t *init, const lua_CFunction *cf); | ||
68 | |||
69 | /* Library init data tags. */ | ||
70 | #define LIBINIT_LENMASK 0x3f | ||
71 | #define LIBINIT_TAGMASK 0xc0 | ||
72 | #define LIBINIT_CF 0x00 | ||
73 | #define LIBINIT_ASM 0x40 | ||
74 | #define LIBINIT_ASM_ 0x80 | ||
75 | #define LIBINIT_STRING 0xc0 | ||
76 | #define LIBINIT_MAXSTR 0x39 | ||
77 | #define LIBINIT_SET 0xfa | ||
78 | #define LIBINIT_NUMBER 0xfb | ||
79 | #define LIBINIT_COPY 0xfc | ||
80 | #define LIBINIT_LASTCL 0xfd | ||
81 | #define LIBINIT_FFID 0xfe | ||
82 | #define LIBINIT_END 0xff | ||
83 | |||
84 | #endif | ||
diff --git a/src/lj_mcode.c b/src/lj_mcode.c new file mode 100644 index 00000000..e5791e9f --- /dev/null +++ b/src/lj_mcode.c | |||
@@ -0,0 +1,260 @@ | |||
1 | /* | ||
2 | ** Machine code management. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #define lj_mcode_c | ||
7 | #define LUA_CORE | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | |||
11 | #if LJ_HASJIT | ||
12 | |||
13 | #include "lj_gc.h" | ||
14 | #include "lj_jit.h" | ||
15 | #include "lj_mcode.h" | ||
16 | #include "lj_trace.h" | ||
17 | #include "lj_dispatch.h" | ||
18 | |||
19 | /* -- OS-specific functions ----------------------------------------------- */ | ||
20 | |||
21 | #if defined(LUA_USE_WIN) | ||
22 | |||
23 | #define WIN32_LEAN_AND_MEAN | ||
24 | #include <windows.h> | ||
25 | |||
26 | #define MCPROT_RW PAGE_READWRITE | ||
27 | #define MCPROT_RX PAGE_EXECUTE_READ | ||
28 | #define MCPROT_RWX PAGE_EXECUTE_READWRITE | ||
29 | |||
30 | static LJ_AINLINE void *mcode_alloc(jit_State *J, size_t sz, DWORD prot) | ||
31 | { | ||
32 | void *p = VirtualAlloc(NULL, sz, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot); | ||
33 | if (!p) | ||
34 | lj_trace_err(J, LJ_TRERR_MCODEAL); | ||
35 | return p; | ||
36 | } | ||
37 | |||
38 | static LJ_AINLINE void mcode_free(jit_State *J, void *p, size_t sz) | ||
39 | { | ||
40 | UNUSED(J); UNUSED(sz); | ||
41 | VirtualFree(p, 0, MEM_RELEASE); | ||
42 | } | ||
43 | |||
44 | static LJ_AINLINE void mcode_setprot(void *p, size_t sz, DWORD prot) | ||
45 | { | ||
46 | DWORD oprot; | ||
47 | VirtualProtect(p, sz, prot, &oprot); | ||
48 | } | ||
49 | |||
50 | #elif defined(LUA_USE_POSIX) | ||
51 | |||
52 | #include <sys/mman.h> | ||
53 | |||
54 | #ifndef MAP_ANONYMOUS | ||
55 | #define MAP_ANONYMOUS MAP_ANON | ||
56 | #endif | ||
57 | |||
58 | #define MCPROT_RW (PROT_READ|PROT_WRITE) | ||
59 | #define MCPROT_RX (PROT_READ|PROT_EXEC) | ||
60 | #define MCPROT_RWX (PROT_READ|PROT_WRITE|PROT_EXEC) | ||
61 | |||
62 | static LJ_AINLINE void *mcode_alloc(jit_State *J, size_t sz, int prot) | ||
63 | { | ||
64 | void *p = mmap(NULL, sz, prot, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); | ||
65 | if (p == MAP_FAILED) | ||
66 | lj_trace_err(J, LJ_TRERR_MCODEAL); | ||
67 | return p; | ||
68 | } | ||
69 | |||
70 | static LJ_AINLINE void mcode_free(jit_State *J, void *p, size_t sz) | ||
71 | { | ||
72 | UNUSED(J); | ||
73 | munmap(p, sz); | ||
74 | } | ||
75 | |||
76 | static LJ_AINLINE void mcode_setprot(void *p, size_t sz, int prot) | ||
77 | { | ||
78 | mprotect(p, sz, prot); | ||
79 | } | ||
80 | |||
81 | #else | ||
82 | |||
83 | /* Fallback allocator. This will fail if memory is not executable by default. */ | ||
84 | #define LUAJIT_UNPROTECT_MCODE | ||
85 | #define MCPROT_RW 0 | ||
86 | #define MCPROT_RX 0 | ||
87 | #define MCPROT_RWX 0 | ||
88 | |||
89 | static LJ_AINLINE void *mcode_alloc(jit_State *J, size_t sz, int prot) | ||
90 | { | ||
91 | UNUSED(prot); | ||
92 | return lj_mem_new(J->L, sz); | ||
93 | } | ||
94 | |||
95 | static LJ_AINLINE void mcode_free(jit_State *J, void *p, size_t sz) | ||
96 | { | ||
97 | lj_mem_free(J2G(J), p, sz); | ||
98 | } | ||
99 | |||
100 | #define mcode_setprot(p, sz, prot) UNUSED(p) | ||
101 | |||
102 | #endif | ||
103 | |||
104 | /* -- MCode area management ----------------------------------------------- */ | ||
105 | |||
106 | /* Define this ONLY if the page protection twiddling becomes a bottleneck. */ | ||
107 | #ifdef LUAJIT_UNPROTECT_MCODE | ||
108 | |||
109 | /* It's generally considered to be a potential security risk to have | ||
110 | ** pages with simultaneous write *and* execute access in a process. | ||
111 | ** | ||
112 | ** Do not even think about using this mode for server processes or | ||
113 | ** apps handling untrusted external data (such as a browser). | ||
114 | ** | ||
115 | ** The security risk is not in LuaJIT itself -- but if an adversary finds | ||
116 | ** any *other* flaw in your C application logic, then any RWX memory page | ||
117 | ** simplifies writing an exploit considerably. | ||
118 | */ | ||
119 | #define MCPROT_GEN MCPROT_RWX | ||
120 | #define MCPROT_RUN MCPROT_RWX | ||
121 | |||
122 | #else | ||
123 | |||
124 | /* This is the default behaviour and much safer: | ||
125 | ** | ||
126 | ** Most of the time the memory pages holding machine code are executable, | ||
127 | ** but NONE of them is writable. | ||
128 | ** | ||
129 | ** The current memory area is marked read-write (but NOT executable) only | ||
130 | ** during the short time window while the assembler generates machine code. | ||
131 | */ | ||
132 | #define MCPROT_GEN MCPROT_RW | ||
133 | #define MCPROT_RUN MCPROT_RX | ||
134 | |||
135 | #endif | ||
136 | |||
137 | /* Change protection of MCode area. */ | ||
138 | static void mcode_protect(jit_State *J, int prot) | ||
139 | { | ||
140 | #ifdef LUAJIT_UNPROTECT_MCODE | ||
141 | UNUSED(J); UNUSED(prot); | ||
142 | #else | ||
143 | if (J->mcprot != prot) { | ||
144 | mcode_setprot(J->mcarea, J->szmcarea, prot); | ||
145 | J->mcprot = prot; | ||
146 | } | ||
147 | #endif | ||
148 | } | ||
149 | |||
150 | /* Linked list of MCode areas. */ | ||
151 | typedef struct MCLink { | ||
152 | MCode *next; /* Next area. */ | ||
153 | size_t size; /* Size of current area. */ | ||
154 | } MCLink; | ||
155 | |||
156 | /* Allocate a new MCode area. */ | ||
157 | static void mcode_allocarea(jit_State *J) | ||
158 | { | ||
159 | MCode *oldarea = J->mcarea; | ||
160 | size_t sz = (size_t)J->param[JIT_P_sizemcode] << 10; | ||
161 | sz = (sz + LJ_PAGESIZE-1) & ~(size_t)(LJ_PAGESIZE - 1); | ||
162 | J->mcarea = (MCode *)mcode_alloc(J, sz, MCPROT_GEN); | ||
163 | J->szmcarea = sz; | ||
164 | J->mcprot = MCPROT_GEN; | ||
165 | J->mctop = (MCode *)((char *)J->mcarea + J->szmcarea); | ||
166 | J->mcbot = (MCode *)((char *)J->mcarea + sizeof(MCLink)); | ||
167 | ((MCLink *)J->mcarea)->next = oldarea; | ||
168 | ((MCLink *)J->mcarea)->size = sz; | ||
169 | J->szallmcarea += sz; | ||
170 | } | ||
171 | |||
172 | /* Free all MCode areas. */ | ||
173 | void lj_mcode_free(jit_State *J) | ||
174 | { | ||
175 | MCode *mc = J->mcarea; | ||
176 | J->mcarea = NULL; | ||
177 | J->szallmcarea = 0; | ||
178 | while (mc) { | ||
179 | MCode *next = ((MCLink *)mc)->next; | ||
180 | mcode_free(J, mc, ((MCLink *)mc)->size); | ||
181 | mc = next; | ||
182 | } | ||
183 | } | ||
184 | |||
185 | /* -- MCode transactions -------------------------------------------------- */ | ||
186 | |||
187 | /* Reserve the remainder of the current MCode area. */ | ||
188 | MCode *lj_mcode_reserve(jit_State *J, MCode **lim) | ||
189 | { | ||
190 | if (!J->mcarea) | ||
191 | mcode_allocarea(J); | ||
192 | else | ||
193 | mcode_protect(J, MCPROT_GEN); | ||
194 | *lim = J->mcbot; | ||
195 | return J->mctop; | ||
196 | } | ||
197 | |||
198 | /* Commit the top part of the current MCode area. */ | ||
199 | void lj_mcode_commit(jit_State *J, MCode *top) | ||
200 | { | ||
201 | J->mctop = top; | ||
202 | mcode_protect(J, MCPROT_RUN); | ||
203 | } | ||
204 | |||
205 | /* Abort the reservation. */ | ||
206 | void lj_mcode_abort(jit_State *J) | ||
207 | { | ||
208 | mcode_protect(J, MCPROT_RUN); | ||
209 | } | ||
210 | |||
211 | /* Set/reset protection to allow patching of MCode areas. */ | ||
212 | MCode *lj_mcode_patch(jit_State *J, MCode *ptr, int finish) | ||
213 | { | ||
214 | #ifdef LUAJIT_UNPROTECT_MCODE | ||
215 | UNUSED(J); UNUSED(ptr); UNUSED(finish); | ||
216 | return NULL; | ||
217 | #else | ||
218 | if (finish) { | ||
219 | if (J->mcarea == ptr) | ||
220 | mcode_protect(J, MCPROT_RUN); | ||
221 | else | ||
222 | mcode_setprot(ptr, ((MCLink *)ptr)->size, MCPROT_RUN); | ||
223 | return NULL; | ||
224 | } else { | ||
225 | MCode *mc = J->mcarea; | ||
226 | /* Try current area first to use the protection cache. */ | ||
227 | if (ptr >= mc && ptr < mc + J->szmcarea) { | ||
228 | mcode_protect(J, MCPROT_GEN); | ||
229 | return mc; | ||
230 | } | ||
231 | /* Otherwise search through the list of MCode areas. */ | ||
232 | for (;;) { | ||
233 | mc = ((MCLink *)mc)->next; | ||
234 | lua_assert(mc != NULL); | ||
235 | if (ptr >= mc && ptr < mc + ((MCLink *)mc)->size) { | ||
236 | mcode_setprot(mc, ((MCLink *)mc)->size, MCPROT_GEN); | ||
237 | return mc; | ||
238 | } | ||
239 | } | ||
240 | } | ||
241 | #endif | ||
242 | } | ||
243 | |||
244 | /* Limit of MCode reservation reached. */ | ||
245 | void lj_mcode_limiterr(jit_State *J, size_t need) | ||
246 | { | ||
247 | size_t sizemcode, maxmcode; | ||
248 | lj_mcode_abort(J); | ||
249 | sizemcode = (size_t)J->param[JIT_P_sizemcode] << 10; | ||
250 | sizemcode = (sizemcode + LJ_PAGESIZE-1) & ~(size_t)(LJ_PAGESIZE - 1); | ||
251 | maxmcode = (size_t)J->param[JIT_P_maxmcode] << 10; | ||
252 | if ((size_t)need > sizemcode) | ||
253 | lj_trace_err(J, LJ_TRERR_MCODEOV); /* Too long for any area. */ | ||
254 | if (J->szallmcarea + sizemcode > maxmcode) | ||
255 | lj_trace_err(J, LJ_TRERR_MCODEAL); | ||
256 | mcode_allocarea(J); | ||
257 | lj_trace_err(J, LJ_TRERR_MCODELM); /* Retry with new area. */ | ||
258 | } | ||
259 | |||
260 | #endif | ||
diff --git a/src/lj_mcode.h b/src/lj_mcode.h new file mode 100644 index 00000000..d4573bf4 --- /dev/null +++ b/src/lj_mcode.h | |||
@@ -0,0 +1,23 @@ | |||
1 | /* | ||
2 | ** Machine code management. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_MCODE_H | ||
7 | #define _LJ_MCODE_H | ||
8 | |||
9 | #include "lj_jit.h" | ||
10 | |||
11 | #if LJ_HASJIT | ||
12 | LJ_FUNC void lj_mcode_free(jit_State *J); | ||
13 | LJ_FUNC MCode *lj_mcode_reserve(jit_State *J, MCode **lim); | ||
14 | LJ_FUNC void lj_mcode_commit(jit_State *J, MCode *m); | ||
15 | LJ_FUNC void lj_mcode_abort(jit_State *J); | ||
16 | LJ_FUNC MCode *lj_mcode_patch(jit_State *J, MCode *ptr, int finish); | ||
17 | LJ_FUNC_NORET void lj_mcode_limiterr(jit_State *J, size_t need); | ||
18 | |||
19 | #define lj_mcode_commitbot(J, m) (J->mcbot = (m)) | ||
20 | |||
21 | #endif | ||
22 | |||
23 | #endif | ||
diff --git a/src/lj_meta.c b/src/lj_meta.c new file mode 100644 index 00000000..dff01f85 --- /dev/null +++ b/src/lj_meta.c | |||
@@ -0,0 +1,358 @@ | |||
1 | /* | ||
2 | ** Metamethod handling. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** | ||
5 | ** Portions taken verbatim or adapted from the Lua interpreter. | ||
6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h | ||
7 | */ | ||
8 | |||
9 | #define lj_meta_c | ||
10 | #define LUA_CORE | ||
11 | |||
12 | #include "lj_obj.h" | ||
13 | #include "lj_gc.h" | ||
14 | #include "lj_err.h" | ||
15 | #include "lj_str.h" | ||
16 | #include "lj_tab.h" | ||
17 | #include "lj_meta.h" | ||
18 | #include "lj_bc.h" | ||
19 | #include "lj_vm.h" | ||
20 | |||
21 | /* -- Metamethod handling ------------------------------------------------- */ | ||
22 | |||
23 | /* String interning of metamethod names for fast indexing. */ | ||
24 | void lj_meta_init(lua_State *L) | ||
25 | { | ||
26 | #define MMNAME(name) "__" #name | ||
27 | const char *metanames = MMDEF(MMNAME); | ||
28 | #undef MMNAME | ||
29 | global_State *g = G(L); | ||
30 | const char *p, *q; | ||
31 | uint32_t i; | ||
32 | for (i = 0, p = metanames; *p; i++, p = q) { | ||
33 | GCstr *s; | ||
34 | for (q = p+2; *q && *q != '_'; q++) ; | ||
35 | s = lj_str_new(L, p, (size_t)(q-p)); | ||
36 | fixstring(s); /* Never collect these names. */ | ||
37 | /* NOBARRIER: g->mmname[] is a GC root. */ | ||
38 | setgcref(g->mmname[i], obj2gco(s)); | ||
39 | } | ||
40 | } | ||
41 | |||
42 | /* Negative caching of a few fast metamethods. See the lj_meta_fast() macro. */ | ||
43 | cTValue *lj_meta_cache(GCtab *mt, MMS mm, GCstr *name) | ||
44 | { | ||
45 | cTValue *mo = lj_tab_getstr(mt, name); | ||
46 | lua_assert(mm <= MM_FAST); | ||
47 | if (!mo || tvisnil(mo)) { /* No metamethod? */ | ||
48 | mt->nomm |= cast_byte(1u<<mm); /* Set negative cache flag. */ | ||
49 | return NULL; | ||
50 | } | ||
51 | return mo; | ||
52 | } | ||
53 | |||
54 | /* Lookup metamethod for object. */ | ||
55 | cTValue *lj_meta_lookup(lua_State *L, cTValue *o, MMS mm) | ||
56 | { | ||
57 | GCtab *mt; | ||
58 | if (tvistab(o)) | ||
59 | mt = tabref(tabV(o)->metatable); | ||
60 | else if (tvisudata(o)) | ||
61 | mt = tabref(udataV(o)->metatable); | ||
62 | else | ||
63 | mt = tabref(G(L)->basemt[itypemap(o)]); | ||
64 | if (mt) { | ||
65 | cTValue *mo = lj_tab_getstr(mt, strref(G(L)->mmname[mm])); | ||
66 | if (mo) | ||
67 | return mo; | ||
68 | } | ||
69 | return niltv(L); | ||
70 | } | ||
71 | |||
72 | /* Setup call to metamethod to be run by Assembler VM. */ | ||
73 | static TValue *mmcall(lua_State *L, ASMFunction cont, cTValue *mo, | ||
74 | cTValue *a, cTValue *b) | ||
75 | { | ||
76 | /* | ||
77 | ** |-- framesize -> top top+1 top+2 top+3 | ||
78 | ** before: [func slots ...] | ||
79 | ** mm setup: [func slots ...] [cont|?] [mo|tmtype] [a] [b] | ||
80 | ** in asm: [func slots ...] [cont|PC] [mo|delta] [a] [b] | ||
81 | ** ^-- func base ^-- mm base | ||
82 | ** after mm: [func slots ...] [result] | ||
83 | ** ^-- copy to base[PC_RA] --/ for lj_cont_ra | ||
84 | ** istruecond + branch for lj_cont_cond* | ||
85 | ** ignore for lj_cont_nop | ||
86 | ** next PC: [func slots ...] | ||
87 | */ | ||
88 | TValue *top = L->top; | ||
89 | if (curr_funcisL(L)) top = curr_topL(L); | ||
90 | setcont(top, cont); /* Assembler VM stores PC in upper word. */ | ||
91 | copyTV(L, top+1, mo); /* Store metamethod and two arguments. */ | ||
92 | copyTV(L, top+2, a); | ||
93 | copyTV(L, top+3, b); | ||
94 | return top+2; /* Return new base. */ | ||
95 | } | ||
96 | |||
97 | /* -- C helpers for some instructions, called from assembler VM ----------- */ | ||
98 | |||
99 | /* Helper for TGET*. __index chain and metamethod. */ | ||
100 | cTValue *lj_meta_tget(lua_State *L, cTValue *o, cTValue *k) | ||
101 | { | ||
102 | int loop; | ||
103 | for (loop = 0; loop < LJ_MAX_IDXCHAIN; loop++) { | ||
104 | cTValue *mo; | ||
105 | if (tvistab(o)) { | ||
106 | GCtab *t = tabV(o); | ||
107 | cTValue *tv = lj_tab_get(L, t, k); | ||
108 | if (!tvisnil(tv) || | ||
109 | !(mo = lj_meta_fast(L, tabref(t->metatable), MM_index))) | ||
110 | return tv; | ||
111 | } else if (tvisnil(mo = lj_meta_lookup(L, o, MM_index))) { | ||
112 | lj_err_optype(L, o, LJ_ERR_OPINDEX); | ||
113 | return NULL; /* unreachable */ | ||
114 | } | ||
115 | if (tvisfunc(mo)) { | ||
116 | L->top = mmcall(L, lj_cont_ra, mo, o, k); | ||
117 | return NULL; /* Trigger metamethod call. */ | ||
118 | } | ||
119 | o = mo; | ||
120 | } | ||
121 | lj_err_msg(L, LJ_ERR_GETLOOP); | ||
122 | return NULL; /* unreachable */ | ||
123 | } | ||
124 | |||
125 | /* Helper for TSET*. __newindex chain and metamethod. */ | ||
126 | TValue *lj_meta_tset(lua_State *L, cTValue *o, cTValue *k) | ||
127 | { | ||
128 | TValue tmp; | ||
129 | int loop; | ||
130 | for (loop = 0; loop < LJ_MAX_IDXCHAIN; loop++) { | ||
131 | cTValue *mo; | ||
132 | if (tvistab(o)) { | ||
133 | GCtab *t = tabV(o); | ||
134 | TValue *tv = lj_tab_set(L, t, k); | ||
135 | if (!tvisnil(tv) || | ||
136 | !(mo = lj_meta_fast(L, tabref(t->metatable), MM_newindex))) { | ||
137 | if (isblack(obj2gco(t))) lj_gc_barrierback(G(L), t); | ||
138 | return tv; | ||
139 | } | ||
140 | } else if (tvisnil(mo = lj_meta_lookup(L, o, MM_newindex))) { | ||
141 | lj_err_optype(L, o, LJ_ERR_OPINDEX); | ||
142 | return NULL; /* unreachable */ | ||
143 | } | ||
144 | if (tvisfunc(mo)) { | ||
145 | L->top = mmcall(L, lj_cont_nop, mo, o, k); | ||
146 | /* L->top+2 = v filled in by caller. */ | ||
147 | return NULL; /* Trigger metamethod call. */ | ||
148 | } | ||
149 | copyTV(L, &tmp, mo); | ||
150 | o = &tmp; | ||
151 | } | ||
152 | lj_err_msg(L, LJ_ERR_SETLOOP); | ||
153 | return NULL; /* unreachable */ | ||
154 | } | ||
155 | |||
156 | static cTValue *str2num(cTValue *o, TValue *n) | ||
157 | { | ||
158 | if (tvisnum(o)) | ||
159 | return o; | ||
160 | else if (tvisstr(o) && lj_str_numconv(strVdata(o), n)) | ||
161 | return n; | ||
162 | else | ||
163 | return NULL; | ||
164 | } | ||
165 | |||
166 | /* Helper for arithmetic instructions. Coercion, metamethod. */ | ||
167 | TValue *lj_meta_arith(lua_State *L, TValue *ra, cTValue *rb, cTValue *rc, | ||
168 | BCReg op) | ||
169 | { | ||
170 | MMS mm = bcmode_mm(op); | ||
171 | TValue tempb, tempc; | ||
172 | cTValue *b, *c; | ||
173 | if ((b = str2num(rb, &tempb)) != NULL && | ||
174 | (c = str2num(rc, &tempc)) != NULL) { /* Try coercion first. */ | ||
175 | setnumV(ra, lj_vm_foldarith(numV(b), numV(c), (int)mm-MM_add)); | ||
176 | return NULL; | ||
177 | } else { | ||
178 | cTValue *mo = lj_meta_lookup(L, rb, mm); | ||
179 | if (tvisnil(mo)) { | ||
180 | mo = lj_meta_lookup(L, rc, mm); | ||
181 | if (tvisnil(mo)) { | ||
182 | if (str2num(rb, &tempb) == NULL) rc = rb; | ||
183 | lj_err_optype(L, rc, LJ_ERR_OPARITH); | ||
184 | return NULL; /* unreachable */ | ||
185 | } | ||
186 | } | ||
187 | return mmcall(L, lj_cont_ra, mo, rb, rc); | ||
188 | } | ||
189 | } | ||
190 | |||
191 | /* In-place coercion of a number to a string. */ | ||
192 | static LJ_AINLINE int tostring(lua_State *L, TValue *o) | ||
193 | { | ||
194 | if (tvisstr(o)) { | ||
195 | return 1; | ||
196 | } else if (tvisnum(o)) { | ||
197 | setstrV(L, o, lj_str_fromnum(L, &o->n)); | ||
198 | return 1; | ||
199 | } else { | ||
200 | return 0; | ||
201 | } | ||
202 | } | ||
203 | |||
204 | /* Helper for CAT. Coercion, iterative concat, __concat metamethod. */ | ||
205 | TValue *lj_meta_cat(lua_State *L, TValue *top, int left) | ||
206 | { | ||
207 | do { | ||
208 | int n = 1; | ||
209 | if (!(tvisstr(top-1) || tvisnum(top-1)) || !tostring(L, top)) { | ||
210 | cTValue *mo = lj_meta_lookup(L, top-1, MM_concat); | ||
211 | if (tvisnil(mo)) { | ||
212 | mo = lj_meta_lookup(L, top, MM_concat); | ||
213 | if (tvisnil(mo)) { | ||
214 | if (tvisstr(top-1) || tvisnum(top-1)) top++; | ||
215 | lj_err_optype(L, top-1, LJ_ERR_OPCAT); | ||
216 | return NULL; /* unreachable */ | ||
217 | } | ||
218 | } | ||
219 | /* One of the top two elements is not a string, call __cat metamethod: | ||
220 | ** | ||
221 | ** before: [...][CAT stack .........................] | ||
222 | ** top-1 top top+1 top+2 | ||
223 | ** pick two: [...][CAT stack ...] [o1] [o2] | ||
224 | ** setup mm: [...][CAT stack ...] [cont|?] [mo|tmtype] [o1] [o2] | ||
225 | ** in asm: [...][CAT stack ...] [cont|PC] [mo|delta] [o1] [o2] | ||
226 | ** ^-- func base ^-- mm base | ||
227 | ** after mm: [...][CAT stack ...] <--push-- [result] | ||
228 | ** next step: [...][CAT stack .............] | ||
229 | */ | ||
230 | copyTV(L, top+2, top) /* Careful with the order of stack copies! */ | ||
231 | copyTV(L, top+1, top-1) | ||
232 | copyTV(L, top, mo) | ||
233 | setcont(top-1, lj_cont_cat); | ||
234 | return top+1; /* Trigger metamethod call. */ | ||
235 | } else if (strV(top)->len == 0) { /* Shortcut. */ | ||
236 | (void)tostring(L, top-1); | ||
237 | } else { | ||
238 | /* Pick as many strings as possible from the top and concatenate them: | ||
239 | ** | ||
240 | ** before: [...][CAT stack ...........................] | ||
241 | ** pick str: [...][CAT stack ...] [...... strings ......] | ||
242 | ** concat: [...][CAT stack ...] [result] | ||
243 | ** next step: [...][CAT stack ............] | ||
244 | */ | ||
245 | MSize tlen = strV(top)->len; | ||
246 | char *buffer; | ||
247 | int i; | ||
248 | for (n = 1; n <= left && tostring(L, top-n); n++) { | ||
249 | MSize len = strV(top-n)->len; | ||
250 | if (len >= LJ_MAX_STR - tlen) | ||
251 | lj_err_msg(L, LJ_ERR_STROV); | ||
252 | tlen += len; | ||
253 | } | ||
254 | buffer = lj_str_needbuf(L, &G(L)->tmpbuf, tlen); | ||
255 | n--; | ||
256 | tlen = 0; | ||
257 | for (i = n; i >= 0; i--) { | ||
258 | MSize len = strV(top-i)->len; | ||
259 | memcpy(buffer + tlen, strVdata(top-i), len); | ||
260 | tlen += len; | ||
261 | } | ||
262 | setstrV(L, top-n, lj_str_new(L, buffer, tlen)); | ||
263 | } | ||
264 | left -= n; | ||
265 | top -= n; | ||
266 | } while (left >= 1); | ||
267 | lj_gc_check_fixtop(L); | ||
268 | return NULL; | ||
269 | } | ||
270 | |||
271 | /* Helper for LEN. __len metamethod. */ | ||
272 | TValue *lj_meta_len(lua_State *L, cTValue *o) | ||
273 | { | ||
274 | cTValue *mo = lj_meta_lookup(L, o, MM_len); | ||
275 | if (tvisnil(mo)) { | ||
276 | lj_err_optype(L, o, LJ_ERR_OPLEN); | ||
277 | return NULL; /* unreachable */ | ||
278 | } | ||
279 | return mmcall(L, lj_cont_ra, mo, o, niltv(L)); | ||
280 | } | ||
281 | |||
282 | /* Helper for equality comparisons. __eq metamethod. */ | ||
283 | TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne) | ||
284 | { | ||
285 | /* Field metatable must be at same offset for GCtab and GCudata! */ | ||
286 | cTValue *mo = lj_meta_fast(L, tabref(o1->gch.metatable), MM_eq); | ||
287 | if (mo) { | ||
288 | TValue *top; | ||
289 | int it; | ||
290 | if (tabref(o1->gch.metatable) != tabref(o2->gch.metatable)) { | ||
291 | cTValue *mo2 = lj_meta_fast(L, tabref(o2->gch.metatable), MM_eq); | ||
292 | if (mo2 == NULL || !lj_obj_equal(mo, mo2)) | ||
293 | return cast(TValue *, (intptr_t)ne); | ||
294 | } | ||
295 | top = curr_top(L); | ||
296 | setcont(top, ne ? lj_cont_condf : lj_cont_condt); | ||
297 | copyTV(L, top+1, mo); | ||
298 | it = o1->gch.gct == ~LJ_TTAB ? LJ_TTAB : LJ_TUDATA; | ||
299 | setgcV(L, top+2, &o1->gch, it); | ||
300 | setgcV(L, top+3, &o2->gch, it); | ||
301 | return top+2; /* Trigger metamethod call. */ | ||
302 | } | ||
303 | return cast(TValue *, (intptr_t)ne); | ||
304 | } | ||
305 | |||
306 | /* Helper for ordered comparisons. String compare, __lt/__le metamethods. */ | ||
307 | TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op) | ||
308 | { | ||
309 | if (itype(o1) == itype(o2)) { /* Never called with two numbers. */ | ||
310 | if (tvisstr(o1) && tvisstr(o2)) { | ||
311 | int32_t res = lj_str_cmp(strV(o1), strV(o2)); | ||
312 | return cast(TValue *, (intptr_t)(((op&2) ? res <= 0 : res < 0) ^ (op&1))); | ||
313 | } else { | ||
314 | trymt: | ||
315 | while (1) { | ||
316 | ASMFunction cont = (op & 1) ? lj_cont_condf : lj_cont_condt; | ||
317 | MMS mm = (op & 2) ? MM_le : MM_lt; | ||
318 | cTValue *mo = lj_meta_lookup(L, o1, mm); | ||
319 | cTValue *mo2 = lj_meta_lookup(L, o2, mm); | ||
320 | if (tvisnil(mo) || !lj_obj_equal(mo, mo2)) { | ||
321 | if (op & 2) { /* MM_le not found: retry with MM_lt. */ | ||
322 | cTValue *ot = o1; o1 = o2; o2 = ot; /* Swap operands. */ | ||
323 | op ^= 3; /* Use LT and flip condition. */ | ||
324 | continue; | ||
325 | } | ||
326 | goto err; | ||
327 | } | ||
328 | return mmcall(L, cont, mo, o1, o2); | ||
329 | } | ||
330 | } | ||
331 | } else if (tvisbool(o1) && tvisbool(o2)) { | ||
332 | goto trymt; | ||
333 | } else { | ||
334 | err: | ||
335 | lj_err_comp(L, o1, o2); | ||
336 | return NULL; | ||
337 | } | ||
338 | } | ||
339 | |||
340 | /* Helper for calls. __call metamethod. */ | ||
341 | void lj_meta_call(lua_State *L, TValue *func, TValue *top) | ||
342 | { | ||
343 | cTValue *mo = lj_meta_lookup(L, func, MM_call); | ||
344 | TValue *p; | ||
345 | if (!tvisfunc(mo)) | ||
346 | lj_err_optype_call(L, func); | ||
347 | for (p = top; p > func; p--) copyTV(L, p, p-1); | ||
348 | copyTV(L, func, mo); | ||
349 | } | ||
350 | |||
351 | /* Helper for FORI. Coercion. */ | ||
352 | void lj_meta_for(lua_State *L, TValue *base) | ||
353 | { | ||
354 | if (!str2num(base, base)) lj_err_msg(L, LJ_ERR_FORINIT); | ||
355 | if (!str2num(base+1, base+1)) lj_err_msg(L, LJ_ERR_FORLIM); | ||
356 | if (!str2num(base+2, base+2)) lj_err_msg(L, LJ_ERR_FORSTEP); | ||
357 | } | ||
358 | |||
diff --git a/src/lj_meta.h b/src/lj_meta.h new file mode 100644 index 00000000..60d1e79e --- /dev/null +++ b/src/lj_meta.h | |||
@@ -0,0 +1,33 @@ | |||
1 | /* | ||
2 | ** Metamethod handling. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_META_H | ||
7 | #define _LJ_META_H | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | |||
11 | /* Metamethod handling */ | ||
12 | LJ_FUNC void lj_meta_init(lua_State *L); | ||
13 | LJ_FUNC cTValue *lj_meta_cache(GCtab *mt, MMS mm, GCstr *name); | ||
14 | LJ_FUNC cTValue *lj_meta_lookup(lua_State *L, cTValue *o, MMS mm); | ||
15 | |||
16 | #define lj_meta_fastg(g, mt, mm) \ | ||
17 | ((mt) == NULL ? NULL : ((mt)->nomm & (1u<<(mm))) ? NULL : \ | ||
18 | lj_meta_cache(mt, mm, strref((g)->mmname[mm]))) | ||
19 | #define lj_meta_fast(L, mt, mm) lj_meta_fastg(G(L), mt, mm) | ||
20 | |||
21 | /* C helpers for some instructions, called from assembler VM. */ | ||
22 | LJ_FUNCA cTValue *lj_meta_tget(lua_State *L, cTValue *o, cTValue *k); | ||
23 | LJ_FUNCA TValue *lj_meta_tset(lua_State *L, cTValue *o, cTValue *k); | ||
24 | LJ_FUNCA TValue *lj_meta_arith(lua_State *L, TValue *ra, cTValue *rb, | ||
25 | cTValue *rc, BCReg op); | ||
26 | LJ_FUNCA TValue *lj_meta_cat(lua_State *L, TValue *top, int left); | ||
27 | LJ_FUNCA TValue *lj_meta_len(lua_State *L, cTValue *o); | ||
28 | LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne); | ||
29 | LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op); | ||
30 | LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top); | ||
31 | LJ_FUNCA void lj_meta_for(lua_State *L, TValue *base); | ||
32 | |||
33 | #endif | ||
diff --git a/src/lj_obj.c b/src/lj_obj.c new file mode 100644 index 00000000..d26a6b38 --- /dev/null +++ b/src/lj_obj.c | |||
@@ -0,0 +1,41 @@ | |||
1 | /* | ||
2 | ** Miscellaneous object handling. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #define lj_obj_c | ||
7 | #define LUA_CORE | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | |||
11 | /* Object type names. */ | ||
12 | LJ_DATADEF const char *const lj_obj_typename[] = { /* ORDER LUA_T */ | ||
13 | "no value", "nil", "boolean", "userdata", "number", "string", | ||
14 | "table", "function", "userdata", "thread", "proto", "upval" | ||
15 | }; | ||
16 | |||
17 | LJ_DATADEF const char *const lj_obj_itypename[] = { /* ORDER LJ_T */ | ||
18 | "nil", "boolean", "boolean", "userdata", "string", "upval", "thread", | ||
19 | "proto", "function", "deadkey", "table", "userdata", "number" | ||
20 | }; | ||
21 | |||
22 | /* Compare two objects without calling metamethods. */ | ||
23 | int lj_obj_equal(cTValue *o1, cTValue *o2) | ||
24 | { | ||
25 | if (itype(o1) == itype(o2)) { | ||
26 | if (tvispri(o1)) | ||
27 | return 1; | ||
28 | if (!tvisnum(o1)) { | ||
29 | #if LJ_64 | ||
30 | if (tvislightud(o1)) | ||
31 | return o1->u64 == o2->u64; | ||
32 | else | ||
33 | #endif | ||
34 | return gcrefeq(o1->gcr, o2->gcr); | ||
35 | } | ||
36 | } else if (!tvisnum(o1) || !tvisnum(o2)) { | ||
37 | return 0; | ||
38 | } | ||
39 | return numV(o1) == numV(o2); | ||
40 | } | ||
41 | |||
diff --git a/src/lj_obj.h b/src/lj_obj.h new file mode 100644 index 00000000..e5ea713d --- /dev/null +++ b/src/lj_obj.h | |||
@@ -0,0 +1,676 @@ | |||
1 | /* | ||
2 | ** LuaJIT VM tags, values and objects. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** | ||
5 | ** Portions taken verbatim or adapted from the Lua interpreter. | ||
6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h | ||
7 | */ | ||
8 | |||
9 | #ifndef _LJ_OBJ_H | ||
10 | #define _LJ_OBJ_H | ||
11 | |||
12 | #include "lua.h" | ||
13 | #include "lj_def.h" | ||
14 | #include "lj_arch.h" | ||
15 | |||
16 | /* -- Memory references (32 bit address space) ---------------------------- */ | ||
17 | |||
18 | /* Memory size. */ | ||
19 | typedef uint32_t MSize; | ||
20 | |||
21 | /* Memory reference */ | ||
22 | typedef struct MRef { | ||
23 | uint32_t ptr32; /* Pseudo 32 bit pointer. */ | ||
24 | } MRef; | ||
25 | |||
26 | #define mref(r, t) ((t *)(void *)(uintptr_t)(r).ptr32) | ||
27 | |||
28 | #define setmref(r, p) ((r).ptr32 = (uint32_t)(uintptr_t)(void *)(p)) | ||
29 | #define setmrefr(r, v) ((r).ptr32 = (v).ptr32) | ||
30 | |||
31 | /* -- GC object references (32 bit address space) ------------------------- */ | ||
32 | |||
33 | /* GCobj reference */ | ||
34 | typedef struct GCRef { | ||
35 | uint32_t gcptr32; /* Pseudo 32 bit pointer. */ | ||
36 | } GCRef; | ||
37 | |||
38 | /* Common GC header for all collectable objects. */ | ||
39 | #define GCHeader GCRef nextgc; uint8_t marked; uint8_t gct | ||
40 | /* This occupies 6 bytes, so use the next 2 bytes for non-32 bit fields. */ | ||
41 | |||
42 | #define gcref(r) ((GCobj *)(uintptr_t)(r).gcptr32) | ||
43 | #define gcrefp(r, t) ((t *)(void *)(uintptr_t)(r).gcptr32) | ||
44 | #define gcrefu(r) ((r).gcptr32) | ||
45 | #define gcrefi(r) ((int32_t)(r).gcptr32) | ||
46 | #define gcrefeq(r1, r2) ((r1).gcptr32 == (r2).gcptr32) | ||
47 | #define gcnext(gc) (gcref((gc)->gch.nextgc)) | ||
48 | |||
49 | #define setgcref(r, gc) ((r).gcptr32 = (uint32_t)(uintptr_t)&(gc)->gch) | ||
50 | #define setgcrefi(r, i) ((r).gcptr32 = (uint32_t)(i)) | ||
51 | #define setgcrefp(r, p) ((r).gcptr32 = (uint32_t)(uintptr_t)(p)) | ||
52 | #define setgcrefnull(r) ((r).gcptr32 = 0) | ||
53 | #define setgcrefr(r, v) ((r).gcptr32 = (v).gcptr32) | ||
54 | |||
55 | /* IMPORTANT NOTE: | ||
56 | ** | ||
57 | ** All uses of the setgcref* macros MUST be accompanied with a write barrier. | ||
58 | ** | ||
59 | ** This is to ensure the integrity of the incremental GC. The invariant | ||
60 | ** to preserve is that a black object never points to a white object. | ||
61 | ** I.e. never store a white object into a field of a black object. | ||
62 | ** | ||
63 | ** It's ok to LEAVE OUT the write barrier ONLY in the following cases: | ||
64 | ** - The source is not a GC object (NULL). | ||
65 | ** - The target is a GC root. I.e. everything in global_State. | ||
66 | ** - The target is a lua_State field (threads are never black). | ||
67 | ** - The target is a stack slot, see setgcV et al. | ||
68 | ** - The target is an open upvalue, i.e. pointing to a stack slot. | ||
69 | ** - The target is a newly created object (i.e. marked white). But make | ||
70 | ** sure nothing invokes the GC inbetween. | ||
71 | ** - The target and the source are the same object (self-reference). | ||
72 | ** - The target already contains the object (e.g. moving elements around). | ||
73 | ** | ||
74 | ** The most common case is a store to a stack slot. All other cases where | ||
75 | ** a barrier has been omitted are annotated with a NOBARRIER comment. | ||
76 | ** | ||
77 | ** The same logic applies for stores to table slots (array part or hash | ||
78 | ** part). ALL uses of lj_tab_set* require a barrier for the stored *value* | ||
79 | ** (if it's a GC object). The barrier for the *key* is already handled | ||
80 | ** internally by lj_tab_newkey. | ||
81 | */ | ||
82 | |||
83 | /* -- Common type definitions --------------------------------------------- */ | ||
84 | |||
85 | /* Types for handling bytecodes. Need this here, details in lj_bc.h. */ | ||
86 | typedef uint32_t BCIns; /* Bytecode instruction. */ | ||
87 | typedef uint32_t BCPos; /* Bytecode position. */ | ||
88 | typedef uint32_t BCReg; /* Bytecode register. */ | ||
89 | typedef int32_t BCLine; /* Bytecode line number. */ | ||
90 | |||
91 | /* Internal assembler functions. Never call these directly from C. */ | ||
92 | typedef void (*ASMFunction)(void); | ||
93 | |||
94 | /* Resizable string buffer. Need this here, details in lj_str.h. */ | ||
95 | typedef struct SBuf { | ||
96 | char *buf; /* String buffer base. */ | ||
97 | MSize n; /* String buffer length. */ | ||
98 | MSize sz; /* String buffer size. */ | ||
99 | } SBuf; | ||
100 | |||
101 | /* -- Tags and values ----------------------------------------------------- */ | ||
102 | |||
103 | /* Frame link. */ | ||
104 | typedef union { | ||
105 | int32_t ftsz; /* Frame type and size of previous frame. */ | ||
106 | MRef pcr; /* Overlaps PC for Lua frames. */ | ||
107 | } FrameLink; | ||
108 | |||
109 | /* Tagged value. */ | ||
110 | typedef LJ_ALIGN(8) union TValue { | ||
111 | uint64_t u64; /* 64 bit pattern overlaps number. */ | ||
112 | lua_Number n; /* Number object overlaps split tag/value object. */ | ||
113 | struct { | ||
114 | LJ_ENDIAN_LOHI( | ||
115 | GCRef gcr; /* GCobj reference (if any). */ | ||
116 | , int32_t it; /* Internal object tag. Must overlap MSW of number. */ | ||
117 | ) | ||
118 | }; | ||
119 | struct { | ||
120 | LJ_ENDIAN_LOHI( | ||
121 | GCRef func; /* Function for next frame (or dummy L). */ | ||
122 | , FrameLink tp; /* Link to previous frame. */ | ||
123 | ) | ||
124 | } fr; | ||
125 | struct { | ||
126 | LJ_ENDIAN_LOHI( | ||
127 | uint32_t lo; /* Lower 32 bits of number. */ | ||
128 | , uint32_t hi; /* Upper 32 bits of number. */ | ||
129 | ) | ||
130 | } u32; | ||
131 | } TValue; | ||
132 | |||
133 | typedef const TValue cTValue; | ||
134 | |||
135 | #define tvref(r) (mref(r, TValue)) | ||
136 | |||
137 | /* More external and GCobj tags for internal objects. */ | ||
138 | #define LAST_TT LUA_TTHREAD | ||
139 | |||
140 | #define LUA_TPROTO (LAST_TT+1) | ||
141 | #define LUA_TUPVAL (LAST_TT+2) | ||
142 | #define LUA_TDEADKEY (LAST_TT+3) | ||
143 | |||
144 | /* Internal object tags. | ||
145 | ** | ||
146 | ** Internal tags overlap the MSW of a number object (must be a double). | ||
147 | ** Interpreted as a double these are special NaNs. The FPU only generates | ||
148 | ** one type of NaN (0xfff8_0000_0000_0000). So MSWs > 0xfff80000 are available | ||
149 | ** for use as internal tags. Small negative numbers are used to shorten the | ||
150 | ** encoding of type comparisons (reg/mem against sign-ext. 8 bit immediate). | ||
151 | ** | ||
152 | ** ---MSW---.---LSW--- | ||
153 | ** primitive types | itype | | | ||
154 | ** lightuserdata | itype | void * | (32 bit platforms) | ||
155 | ** lightuserdata |fffc| void * | (64 bit platforms, 48 bit pointers) | ||
156 | ** GC objects | itype | GCRef | | ||
157 | ** number -------double------ | ||
158 | ** | ||
159 | ** ORDER LJ_T | ||
160 | ** Primitive types nil/false/true must be first, lightuserdata next. | ||
161 | ** GC objects are at the end, table/userdata must be lowest. | ||
162 | ** Also check lj_ir.h for similar ordering constraints. | ||
163 | */ | ||
164 | #define LJ_TNIL (-1) | ||
165 | #define LJ_TFALSE (-2) | ||
166 | #define LJ_TTRUE (-3) | ||
167 | #define LJ_TLIGHTUD (-4) | ||
168 | #define LJ_TSTR (-5) | ||
169 | #define LJ_TUPVAL (-6) | ||
170 | #define LJ_TTHREAD (-7) | ||
171 | #define LJ_TPROTO (-8) | ||
172 | #define LJ_TFUNC (-9) | ||
173 | #define LJ_TDEADKEY (-10) | ||
174 | #define LJ_TTAB (-11) | ||
175 | #define LJ_TUDATA (-12) | ||
176 | /* This is just the canonical number type used in some places. */ | ||
177 | #define LJ_TNUMX (-13) | ||
178 | |||
179 | #if LJ_64 | ||
180 | #define LJ_TISNUM ((uint32_t)0xfff80000) | ||
181 | #else | ||
182 | #define LJ_TISNUM ((uint32_t)LJ_TNUMX) | ||
183 | #endif | ||
184 | #define LJ_TISTRUECOND ((uint32_t)LJ_TFALSE) | ||
185 | #define LJ_TISPRI ((uint32_t)LJ_TTRUE) | ||
186 | #define LJ_TISGCV ((uint32_t)(LJ_TSTR+1)) | ||
187 | #define LJ_TISTABUD ((uint32_t)LJ_TTAB) | ||
188 | |||
189 | /* -- TValue getters/setters ---------------------------------------------- */ | ||
190 | |||
191 | /* Macros to test types. */ | ||
192 | #define itype(o) ((o)->it) | ||
193 | #define uitype(o) ((uint32_t)itype(o)) | ||
194 | #define tvisnil(o) (itype(o) == LJ_TNIL) | ||
195 | #define tvisfalse(o) (itype(o) == LJ_TFALSE) | ||
196 | #define tvistrue(o) (itype(o) == LJ_TTRUE) | ||
197 | #define tvisbool(o) (tvisfalse(o) || tvistrue(o)) | ||
198 | #if LJ_64 | ||
199 | #define tvislightud(o) ((itype(o) >> 16) == LJ_TLIGHTUD) | ||
200 | #else | ||
201 | #define tvislightud(o) (itype(o) == LJ_TLIGHTUD) | ||
202 | #endif | ||
203 | #define tvisstr(o) (itype(o) == LJ_TSTR) | ||
204 | #define tvisfunc(o) (itype(o) == LJ_TFUNC) | ||
205 | #define tvisthread(o) (itype(o) == LJ_TTHREAD) | ||
206 | #define tvisproto(o) (itype(o) == LJ_TPROTO) | ||
207 | #define tvistab(o) (itype(o) == LJ_TTAB) | ||
208 | #define tvisudata(o) (itype(o) == LJ_TUDATA) | ||
209 | #define tvisnum(o) (uitype(o) <= LJ_TISNUM) | ||
210 | |||
211 | #define tvistruecond(o) (uitype(o) < LJ_TISTRUECOND) | ||
212 | #define tvispri(o) (uitype(o) >= LJ_TISPRI) | ||
213 | #define tvistabud(o) (uitype(o) <= LJ_TISTABUD) /* && !tvisnum() */ | ||
214 | #define tvisgcv(o) \ | ||
215 | ((uitype(o) - LJ_TISGCV) > ((uint32_t)LJ_TNUMX - LJ_TISGCV)) | ||
216 | |||
217 | /* Special macros to test numbers for NaN, +0, -0, +1 and raw equality. */ | ||
218 | #define tvisnan(o) ((o)->n != (o)->n) | ||
219 | #define tvispzero(o) ((o)->u64 == 0) | ||
220 | #define tvismzero(o) ((o)->u64 == U64x(80000000,00000000)) | ||
221 | #define tvispone(o) ((o)->u64 == U64x(3ff00000,00000000)) | ||
222 | #define rawnumequal(o1, o2) ((o1)->u64 == (o2)->u64) | ||
223 | |||
224 | /* Macros to convert type ids. */ | ||
225 | #if LJ_64 | ||
226 | #define itypemap(o) \ | ||
227 | (tvisnum(o) ? ~LJ_TNUMX : tvislightud(o) ? ~LJ_TLIGHTUD : ~itype(o)) | ||
228 | #else | ||
229 | #define itypemap(o) (tvisnum(o) ? ~LJ_TNUMX : ~itype(o)) | ||
230 | #endif | ||
231 | |||
232 | /* Macros to get tagged values. */ | ||
233 | #define gcval(o) (gcref((o)->gcr)) | ||
234 | #define boolV(o) check_exp(tvisbool(o), (LJ_TFALSE - (o)->it)) | ||
235 | #if LJ_64 | ||
236 | #define lightudV(o) check_exp(tvislightud(o), \ | ||
237 | (void *)((o)->u64 & U64x(0000ffff,ffffffff))) | ||
238 | #else | ||
239 | #define lightudV(o) check_exp(tvislightud(o), gcrefp((o)->gcr, void)) | ||
240 | #endif | ||
241 | #define gcV(o) check_exp(tvisgcv(o), gcval(o)) | ||
242 | #define strV(o) check_exp(tvisstr(o), &gcval(o)->str) | ||
243 | #define funcV(o) check_exp(tvisfunc(o), &gcval(o)->fn) | ||
244 | #define threadV(o) check_exp(tvisthread(o), &gcval(o)->th) | ||
245 | #define protoV(o) check_exp(tvisproto(o), &gcval(o)->pt) | ||
246 | #define tabV(o) check_exp(tvistab(o), &gcval(o)->tab) | ||
247 | #define udataV(o) check_exp(tvisudata(o), &gcval(o)->ud) | ||
248 | #define numV(o) check_exp(tvisnum(o), (o)->n) | ||
249 | |||
250 | /* Macros to set tagged values. */ | ||
251 | #define setitype(o, i) ((o)->it = (i)) | ||
252 | #define setnilV(o) ((o)->it = LJ_TNIL) | ||
253 | #define setboolV(o, x) ((o)->it = LJ_TFALSE-(x)) | ||
254 | |||
255 | #if LJ_64 | ||
256 | #define checklightudptr(L, p) \ | ||
257 | (((uint64_t)(p) >> 48) ? (lj_err_msg(L, LJ_ERR_BADLU), NULL) : (p)) | ||
258 | #define setlightudV(o, x) \ | ||
259 | ((o)->u64 = (uint64_t)(x) | (((uint64_t)LJ_TLIGHTUD) << 48)) | ||
260 | #define setcont(o, x) \ | ||
261 | ((o)->u64 = (uint64_t)(x) - (uint64_t)lj_vm_asm_begin) | ||
262 | #else | ||
263 | #define checklightudptr(L, p) (p) | ||
264 | #define setlightudV(o, x) \ | ||
265 | { TValue *i_o = (o); \ | ||
266 | setgcrefp(i_o->gcr, (x)); i_o->it = LJ_TLIGHTUD; } | ||
267 | #define setcont(o, x) \ | ||
268 | { TValue *i_o = (o); \ | ||
269 | setgcrefp(i_o->gcr, (x)); i_o->it = LJ_TLIGHTUD; } | ||
270 | #endif | ||
271 | |||
272 | #define tvchecklive(g, o) \ | ||
273 | lua_assert(!tvisgcv(o) || \ | ||
274 | ((~itype(o) == gcval(o)->gch.gct) && !isdead(g, gcval(o)))) | ||
275 | |||
276 | #define setgcV(L, o, x, itype) \ | ||
277 | { TValue *i_o = (o); \ | ||
278 | setgcrefp(i_o->gcr, &(x)->nextgc); i_o->it = itype; \ | ||
279 | tvchecklive(G(L), i_o); } | ||
280 | #define setstrV(L, o, x) setgcV(L, o, x, LJ_TSTR) | ||
281 | #define setthreadV(L, o, x) setgcV(L, o, x, LJ_TTHREAD) | ||
282 | #define setprotoV(L, o, x) setgcV(L, o, x, LJ_TPROTO) | ||
283 | #define setfuncV(L, o, x) setgcV(L, o, &(x)->l, LJ_TFUNC) | ||
284 | #define settabV(L, o, x) setgcV(L, o, x, LJ_TTAB) | ||
285 | #define setudataV(L, o, x) setgcV(L, o, x, LJ_TUDATA) | ||
286 | |||
287 | #define setnumV(o, x) ((o)->n = (x)) | ||
288 | #define setnanV(o) ((o)->u64 = U64x(fff80000,00000000)) | ||
289 | #define setintV(o, i) ((o)->n = cast_num((int32_t)(i))) | ||
290 | |||
291 | /* Copy tagged values. */ | ||
292 | #define copyTV(L, o1, o2) \ | ||
293 | { cTValue *i_o2 = (o2); TValue *i_o1 = (o1); \ | ||
294 | *i_o1 = *i_o2; tvchecklive(G(L), i_o1); } | ||
295 | |||
296 | /* -- String object ------------------------------------------------------- */ | ||
297 | |||
298 | /* String object header. String payload follows. */ | ||
299 | typedef struct GCstr { | ||
300 | GCHeader; | ||
301 | uint8_t reserved; /* Used by lexer for fast lookup of reserved words. */ | ||
302 | uint8_t unused; | ||
303 | MSize hash; /* Hash of string. */ | ||
304 | MSize len; /* Size of string. */ | ||
305 | } GCstr; | ||
306 | |||
307 | #define strref(r) (&gcref((r))->str) | ||
308 | #define strdata(s) ((const char *)((s)+1)) | ||
309 | #define strdatawr(s) ((char *)((s)+1)) | ||
310 | #define strVdata(o) strdata(strV(o)) | ||
311 | #define sizestring(s) (sizeof(struct GCstr)+(s)->len+1) | ||
312 | |||
313 | /* -- Userdata object ----------------------------------------------------- */ | ||
314 | |||
315 | /* Userdata object. Payload follows. */ | ||
316 | typedef struct GCudata { | ||
317 | GCHeader; | ||
318 | uint8_t unused1; | ||
319 | uint8_t unused2; | ||
320 | GCRef env; /* Should be at same offset in GCfunc. */ | ||
321 | MSize len; /* Size of payload. */ | ||
322 | GCRef metatable; /* Must be at same offset in GCtab. */ | ||
323 | uint32_t align1; /* To force 8 byte alignment of the payload. */ | ||
324 | } GCudata; | ||
325 | |||
326 | #define uddata(u) ((void *)((u)+1)) | ||
327 | #define sizeudata(u) (sizeof(struct GCudata)+(u)->len) | ||
328 | |||
329 | /* -- Prototype object ---------------------------------------------------- */ | ||
330 | |||
331 | /* Split constant array. Collectables are below, numbers above pointer. */ | ||
332 | typedef union ProtoK { | ||
333 | lua_Number *n; /* Numbers. */ | ||
334 | GCRef *gc; /* Collectable objects (strings/table/proto). */ | ||
335 | } ProtoK; | ||
336 | |||
337 | #define SCALE_NUM_GCO ((int32_t)sizeof(lua_Number)/sizeof(GCRef)) | ||
338 | #define round_nkgc(n) (((n) + SCALE_NUM_GCO-1) & ~(SCALE_NUM_GCO-1)) | ||
339 | |||
340 | typedef struct VarInfo { | ||
341 | GCstr *name; /* Local variable name. */ | ||
342 | BCPos startpc; /* First point where the local variable is active. */ | ||
343 | BCPos endpc; /* First point where the local variable is dead. */ | ||
344 | } VarInfo; | ||
345 | |||
346 | typedef struct GCproto { | ||
347 | GCHeader; | ||
348 | uint8_t numparams; /* Number of parameters. */ | ||
349 | uint8_t framesize; /* Fixed frame size. */ | ||
350 | MSize sizebc; /* Number of bytecode instructions. */ | ||
351 | GCRef gclist; | ||
352 | ProtoK k; /* Split constant array (points to the middle). */ | ||
353 | BCIns *bc; /* Array of bytecode instructions. */ | ||
354 | int16_t *uv; /* Upvalue list. local >= 0. parent uv < 0. */ | ||
355 | MSize sizekgc; /* Number of collectable constants. */ | ||
356 | MSize sizekn; /* Number of lua_Number constants. */ | ||
357 | uint8_t sizeuv; /* Number of upvalues. */ | ||
358 | uint8_t flags; /* Miscellaneous flags (see below). */ | ||
359 | uint16_t trace; /* Anchor for chain of root traces. */ | ||
360 | /* ------ The following fields are for debugging/tracebacks only ------ */ | ||
361 | MSize sizelineinfo; /* Size of lineinfo array (may be 0). */ | ||
362 | MSize sizevarinfo; /* Size of local var info array (may be 0). */ | ||
363 | MSize sizeuvname; /* Size of upvalue names array (may be 0). */ | ||
364 | BCLine linedefined; /* First line of the function definition. */ | ||
365 | BCLine lastlinedefined; /* Last line of the function definition. */ | ||
366 | BCLine *lineinfo; /* Map from bytecode instructions to source lines. */ | ||
367 | struct VarInfo *varinfo; /* Names and extents of local variables. */ | ||
368 | GCstr **uvname; /* Upvalue names. */ | ||
369 | GCstr *chunkname; /* Name of the chunk this function was defined in. */ | ||
370 | } GCproto; | ||
371 | |||
372 | #define PROTO_IS_VARARG 0x01 | ||
373 | #define PROTO_HAS_FNEW 0x02 | ||
374 | #define PROTO_HAS_RETURN 0x04 | ||
375 | #define PROTO_FIXUP_RETURN 0x08 | ||
376 | #define PROTO_NO_JIT 0x10 | ||
377 | #define PROTO_HAS_ILOOP 0x20 | ||
378 | |||
379 | /* -- Upvalue object ------------------------------------------------------ */ | ||
380 | |||
381 | typedef struct GCupval { | ||
382 | GCHeader; | ||
383 | uint8_t closed; /* Set if closed (i.e. uv->v == &uv->u.value). */ | ||
384 | uint8_t unused; | ||
385 | union { | ||
386 | TValue tv; /* If closed: the value itself. */ | ||
387 | struct { /* If open: double linked list, anchored at thread. */ | ||
388 | GCRef prev; | ||
389 | GCRef next; | ||
390 | }; | ||
391 | }; | ||
392 | TValue *v; /* Points to stack slot (open) or above (closed). */ | ||
393 | #if LJ_32 | ||
394 | int32_t unusedv; /* For consistent alignment (32 bit only). */ | ||
395 | #endif | ||
396 | } GCupval; | ||
397 | |||
398 | #define uvprev(uv_) (&gcref((uv_)->prev)->uv) | ||
399 | #define uvnext(uv_) (&gcref((uv_)->next)->uv) | ||
400 | |||
401 | /* -- Function object (closures) ------------------------------------------ */ | ||
402 | |||
403 | /* Common header for functions. env should be at same offset in GCudata. */ | ||
404 | #define GCfuncHeader \ | ||
405 | GCHeader; uint8_t ffid; uint8_t nupvalues; \ | ||
406 | GCRef env; GCRef gclist; ASMFunction gate | ||
407 | |||
408 | typedef struct GCfuncC { | ||
409 | GCfuncHeader; | ||
410 | lua_CFunction f; /* C function to be called. */ | ||
411 | TValue upvalue[1]; /* Array of upvalues (TValue). */ | ||
412 | } GCfuncC; | ||
413 | |||
414 | typedef struct GCfuncL { | ||
415 | GCfuncHeader; | ||
416 | GCRef pt; /* Link to prototype this function is based on. */ | ||
417 | GCRef uvptr[1]; /* Array of _pointers_ to upvalue objects (GCupval). */ | ||
418 | } GCfuncL; | ||
419 | |||
420 | typedef union GCfunc { | ||
421 | GCfuncC c; | ||
422 | GCfuncL l; | ||
423 | } GCfunc; | ||
424 | |||
425 | #define FF_LUA 0 | ||
426 | #define FF_C 1 | ||
427 | #define isluafunc(fn) ((fn)->c.ffid == FF_LUA) | ||
428 | #define iscfunc(fn) ((fn)->c.ffid == FF_C) | ||
429 | #define isffunc(fn) ((fn)->c.ffid > FF_C) | ||
430 | #define funcproto(fn) check_exp(isluafunc(fn), &gcref((fn)->l.pt)->pt) | ||
431 | #define sizeCfunc(n) (sizeof(GCfuncC) + sizeof(TValue)*((n)-1)) | ||
432 | #define sizeLfunc(n) (sizeof(GCfuncL) + sizeof(TValue *)*((n)-1)) | ||
433 | |||
434 | /* -- Table object -------------------------------------------------------- */ | ||
435 | |||
436 | /* Hash node. */ | ||
437 | typedef struct Node { | ||
438 | TValue val; /* Value object. Must be first field. */ | ||
439 | TValue key; /* Key object. */ | ||
440 | MRef next; /* Hash chain. */ | ||
441 | int32_t unused; /* For consistent alignment. */ | ||
442 | } Node; | ||
443 | |||
444 | LJ_STATIC_ASSERT(offsetof(Node, val) == 0); | ||
445 | |||
446 | typedef struct GCtab { | ||
447 | GCHeader; | ||
448 | uint8_t nomm; /* Negative cache for fast metamethods. */ | ||
449 | int8_t colo; /* Array colocation. */ | ||
450 | MRef array; /* Array part. */ | ||
451 | GCRef gclist; | ||
452 | GCRef metatable; /* Must be at same offset in GCudata. */ | ||
453 | MRef node; /* Hash part. */ | ||
454 | uint32_t asize; /* Size of array part (keys [0, asize-1]). */ | ||
455 | uint32_t hmask; /* Hash part mask (size of hash part - 1). */ | ||
456 | MRef lastfree; /* Any free position is before this position. */ | ||
457 | } GCtab; | ||
458 | |||
459 | #define sizetabcolo(n) ((n)*sizeof(TValue) + sizeof(GCtab)) | ||
460 | #define tabref(r) (&gcref((r))->tab) | ||
461 | #define noderef(r) (mref((r), Node)) | ||
462 | #define nextnode(n) (mref((n)->next, Node)) | ||
463 | |||
464 | /* -- State objects ------------------------------------------------------- */ | ||
465 | |||
466 | /* VM states. */ | ||
467 | enum { | ||
468 | LJ_VMST_INTERP, /* Interpreter. */ | ||
469 | LJ_VMST_C, /* C function. */ | ||
470 | LJ_VMST_GC, /* Garbage collector. */ | ||
471 | LJ_VMST_EXIT, /* Trace exit handler. */ | ||
472 | LJ_VMST_RECORD, /* Trace recorder. */ | ||
473 | LJ_VMST_OPT, /* Optimizer. */ | ||
474 | LJ_VMST_ASM, /* Assembler. */ | ||
475 | LJ_VMST__MAX | ||
476 | }; | ||
477 | |||
478 | #define setvmstate(g, st) ((g)->vmstate = ~LJ_VMST_##st) | ||
479 | |||
480 | /* Metamethods. */ | ||
481 | #define MMDEF(_) \ | ||
482 | _(index) _(newindex) _(gc) _(mode) _(eq) \ | ||
483 | /* Only the above (fast) metamethods are negative cached (max. 8). */ \ | ||
484 | _(len) _(lt) _(le) _(concat) _(call) \ | ||
485 | /* The following must be in ORDER ARITH. */ \ | ||
486 | _(add) _(sub) _(mul) _(div) _(mod) _(pow) _(unm) \ | ||
487 | /* The following are used in the standard libraries. */ \ | ||
488 | _(metatable) _(tostring) | ||
489 | |||
490 | typedef enum { | ||
491 | #define MMENUM(name) MM_##name, | ||
492 | MMDEF(MMENUM) | ||
493 | #undef MMENUM | ||
494 | MM_MAX, | ||
495 | MM____ = MM_MAX, | ||
496 | MM_FAST = MM_eq | ||
497 | } MMS; | ||
498 | |||
499 | #define BASEMT_MAX ((~LJ_TNUMX)+1) | ||
500 | |||
501 | typedef struct GCState { | ||
502 | MSize total; /* Memory currently allocated. */ | ||
503 | MSize threshold; /* Memory threshold. */ | ||
504 | uint8_t currentwhite; /* Current white color. */ | ||
505 | uint8_t state; /* GC state. */ | ||
506 | uint8_t unused1; | ||
507 | uint8_t unused2; | ||
508 | MSize sweepstr; /* Sweep position in string table. */ | ||
509 | GCRef root; /* List of all collectable objects. */ | ||
510 | GCRef *sweep; /* Sweep position in root list. */ | ||
511 | GCRef gray; /* List of gray objects. */ | ||
512 | GCRef grayagain; /* List of objects for atomic traversal. */ | ||
513 | GCRef weak; /* List of weak tables (to be cleared). */ | ||
514 | GCRef mmudata; /* List of userdata (to be finalized). */ | ||
515 | MSize stepmul; /* Incremental GC step granularity. */ | ||
516 | MSize debt; /* Debt (how much GC is behind schedule). */ | ||
517 | MSize estimate; /* Estimate of memory actually in use. */ | ||
518 | MSize pause; /* Pause between successive GC cycles. */ | ||
519 | } GCState; | ||
520 | |||
521 | /* Global state, shared by all threads of a Lua universe. */ | ||
522 | typedef struct global_State { | ||
523 | GCRef *strhash; /* String hash table (hash chain anchors). */ | ||
524 | MSize strmask; /* String hash mask (size of hash table - 1). */ | ||
525 | MSize strnum; /* Number of strings in hash table. */ | ||
526 | lua_Alloc allocf; /* Memory allocator. */ | ||
527 | void *allocd; /* Memory allocator data. */ | ||
528 | GCState gc; /* Garbage collector. */ | ||
529 | SBuf tmpbuf; /* Temporary buffer for string concatenation. */ | ||
530 | Node nilnode; /* Fallback 1-element hash part (nil key and value). */ | ||
531 | uint8_t hookmask; /* Hook mask. */ | ||
532 | uint8_t dispatchmode; /* Dispatch mode. */ | ||
533 | uint8_t vmevmask; /* VM event mask. */ | ||
534 | uint8_t unused1; | ||
535 | GCRef mainthref; /* Link to main thread. */ | ||
536 | TValue registrytv; /* Anchor for registry. */ | ||
537 | TValue tmptv; /* Temporary TValue. */ | ||
538 | GCupval uvhead; /* Head of double-linked list of all open upvalues. */ | ||
539 | int32_t hookcount; /* Instruction hook countdown. */ | ||
540 | int32_t hookcstart; /* Start count for instruction hook counter. */ | ||
541 | lua_Hook hookf; /* Hook function. */ | ||
542 | lua_CFunction panic; /* Called as a last resort for errors. */ | ||
543 | volatile int32_t vmstate; /* VM state or current JIT code trace number. */ | ||
544 | GCRef jit_L; /* Current JIT code lua_State or NULL. */ | ||
545 | MRef jit_base; /* Current JIT code L->base. */ | ||
546 | GCRef basemt[BASEMT_MAX]; /* Metatables for base types. */ | ||
547 | GCRef mmname[MM_MAX]; /* Array holding metamethod names. */ | ||
548 | } global_State; | ||
549 | |||
550 | #define mainthread(g) (&gcref(g->mainthref)->th) | ||
551 | #define niltv(L) \ | ||
552 | check_exp(tvisnil(&G(L)->nilnode.val), &G(L)->nilnode.val) | ||
553 | #define niltvg(g) \ | ||
554 | check_exp(tvisnil(&(g)->nilnode.val), &(g)->nilnode.val) | ||
555 | |||
556 | /* Hook management. Hook event masks are defined in lua.h. */ | ||
557 | #define HOOK_EVENTMASK 0x0f | ||
558 | #define HOOK_ACTIVE 0x10 | ||
559 | #define HOOK_VMEVENT 0x20 | ||
560 | #define HOOK_GC 0x40 | ||
561 | #define hook_active(g) ((g)->hookmask & HOOK_ACTIVE) | ||
562 | #define hook_enter(g) ((g)->hookmask |= HOOK_ACTIVE) | ||
563 | #define hook_entergc(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_GC)) | ||
564 | #define hook_vmevent(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_VMEVENT)) | ||
565 | #define hook_leave(g) ((g)->hookmask &= ~HOOK_ACTIVE) | ||
566 | #define hook_save(g) ((g)->hookmask & ~HOOK_EVENTMASK) | ||
567 | #define hook_restore(g, h) \ | ||
568 | ((g)->hookmask = ((g)->hookmask & HOOK_EVENTMASK) | (h)) | ||
569 | |||
570 | /* Per-thread state object. */ | ||
571 | struct lua_State { | ||
572 | GCHeader; | ||
573 | uint8_t dummy_ffid; /* Fake FF_C for curr_funcisL() on dummy frames. */ | ||
574 | uint8_t status; /* Thread status. */ | ||
575 | MRef glref; /* Link to global state. */ | ||
576 | GCRef gclist; /* GC chain. */ | ||
577 | TValue *base; /* Base of currently executing function. */ | ||
578 | TValue *top; /* First free slot in the stack. */ | ||
579 | TValue *maxstack; /* Last free slot in the stack. */ | ||
580 | TValue *stack; /* Stack base. */ | ||
581 | GCRef openupval; /* List of open upvalues in the stack. */ | ||
582 | GCRef env; /* Thread environment (table of globals). */ | ||
583 | void *cframe; /* End of C stack frame chain. */ | ||
584 | MSize stacksize; /* True stack size (incl. LJ_STACK_EXTRA). */ | ||
585 | }; | ||
586 | |||
587 | #define G(L) (mref(L->glref, global_State)) | ||
588 | #define registry(L) (&G(L)->registrytv) | ||
589 | |||
590 | /* Macros to access the currently executing (Lua) function. */ | ||
591 | #define curr_func(L) (&gcref((L->base-1)->fr.func)->fn) | ||
592 | #define curr_funcisL(L) (isluafunc(curr_func(L))) | ||
593 | #define curr_proto(L) (funcproto(curr_func(L))) | ||
594 | #define curr_topL(L) (L->base + curr_proto(L)->framesize) | ||
595 | #define curr_top(L) (curr_funcisL(L) ? curr_topL(L) : L->top) | ||
596 | |||
597 | /* -- GC object definition and conversions -------------------------------- */ | ||
598 | |||
599 | /* GC header for generic access to common fields of GC objects. */ | ||
600 | typedef struct GChead { | ||
601 | GCHeader; | ||
602 | uint8_t unused1; | ||
603 | uint8_t unused2; | ||
604 | GCRef env; | ||
605 | GCRef gclist; | ||
606 | GCRef metatable; | ||
607 | } GChead; | ||
608 | |||
609 | /* The env field SHOULD be at the same offset for all GC objects. */ | ||
610 | LJ_STATIC_ASSERT(offsetof(GChead, env) == offsetof(GCfuncL, env)); | ||
611 | LJ_STATIC_ASSERT(offsetof(GChead, env) == offsetof(GCudata, env)); | ||
612 | |||
613 | /* The metatable field MUST be at the same offset for all GC objects. */ | ||
614 | LJ_STATIC_ASSERT(offsetof(GChead, metatable) == offsetof(GCtab, metatable)); | ||
615 | LJ_STATIC_ASSERT(offsetof(GChead, metatable) == offsetof(GCudata, metatable)); | ||
616 | |||
617 | /* The gclist field MUST be at the same offset for all GC objects. */ | ||
618 | LJ_STATIC_ASSERT(offsetof(GChead, gclist) == offsetof(lua_State, gclist)); | ||
619 | LJ_STATIC_ASSERT(offsetof(GChead, gclist) == offsetof(GCproto, gclist)); | ||
620 | LJ_STATIC_ASSERT(offsetof(GChead, gclist) == offsetof(GCfuncL, gclist)); | ||
621 | LJ_STATIC_ASSERT(offsetof(GChead, gclist) == offsetof(GCtab, gclist)); | ||
622 | |||
623 | typedef union GCobj { | ||
624 | GChead gch; | ||
625 | GCstr str; | ||
626 | GCupval uv; | ||
627 | lua_State th; | ||
628 | GCproto pt; | ||
629 | GCfunc fn; | ||
630 | GCtab tab; | ||
631 | GCudata ud; | ||
632 | } GCobj; | ||
633 | |||
634 | /* Macros to convert a GCobj pointer into a specific value. */ | ||
635 | #define gco2str(o) check_exp((o)->gch.gct == ~LJ_TSTR, &(o)->str) | ||
636 | #define gco2uv(o) check_exp((o)->gch.gct == ~LJ_TUPVAL, &(o)->uv) | ||
637 | #define gco2th(o) check_exp((o)->gch.gct == ~LJ_TTHREAD, &(o)->th) | ||
638 | #define gco2pt(o) check_exp((o)->gch.gct == ~LJ_TPROTO, &(o)->pt) | ||
639 | #define gco2func(o) check_exp((o)->gch.gct == ~LJ_TFUNC, &(o)->fn) | ||
640 | #define gco2tab(o) check_exp((o)->gch.gct == ~LJ_TTAB, &(o)->tab) | ||
641 | #define gco2ud(o) check_exp((o)->gch.gct == ~LJ_TUDATA, &(o)->ud) | ||
642 | |||
643 | /* Macro to convert any collectable object into a GCobj pointer. */ | ||
644 | #define obj2gco(v) (cast(GCobj *, (v))) | ||
645 | |||
646 | /* -- Number to integer conversion ---------------------------------------- */ | ||
647 | |||
648 | static LJ_AINLINE int32_t lj_num2bit(lua_Number n) | ||
649 | { | ||
650 | TValue o; | ||
651 | o.n = n + 6755399441055744.0; /* 2^52 + 2^51 */ | ||
652 | return (int32_t)o.u32.lo; | ||
653 | } | ||
654 | |||
655 | #if (defined(__i386__) || defined(_M_IX86)) && !defined(__SSE2__) | ||
656 | #define lj_num2int(n) lj_num2bit((n)) | ||
657 | #else | ||
658 | #define lj_num2int(n) ((int32_t)(n)) | ||
659 | #endif | ||
660 | |||
661 | /* -- Miscellaneous object handling --------------------------------------- */ | ||
662 | |||
663 | /* Names and maps for internal and external object tags. */ | ||
664 | LJ_DATA const char *const lj_obj_typename[1+LUA_TUPVAL+1]; | ||
665 | LJ_DATA const char *const lj_obj_itypename[~LJ_TNUMX+1]; | ||
666 | |||
667 | #define typename(o) (lj_obj_itypename[itypemap(o)]) | ||
668 | |||
669 | /* Compare two objects without calling metamethods. */ | ||
670 | LJ_FUNC int lj_obj_equal(cTValue *o1, cTValue *o2); | ||
671 | |||
672 | #ifdef LUA_USE_ASSERT | ||
673 | #include "lj_gc.h" | ||
674 | #endif | ||
675 | |||
676 | #endif | ||
diff --git a/src/lj_opt_dce.c b/src/lj_opt_dce.c new file mode 100644 index 00000000..0cd60830 --- /dev/null +++ b/src/lj_opt_dce.c | |||
@@ -0,0 +1,79 @@ | |||
1 | /* | ||
2 | ** DCE: Dead Code Elimination. Pre-LOOP only -- ASM already performs DCE. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #define lj_opt_dce_c | ||
7 | #define LUA_CORE | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | |||
11 | #if LJ_HASJIT | ||
12 | |||
13 | #include "lj_ir.h" | ||
14 | #include "lj_jit.h" | ||
15 | #include "lj_iropt.h" | ||
16 | |||
17 | /* Some local macros to save typing. Undef'd at the end. */ | ||
18 | #define IR(ref) (&J->cur.ir[(ref)]) | ||
19 | |||
20 | /* Scan through all snapshots and mark all referenced instructions. */ | ||
21 | static void dce_marksnap(jit_State *J) | ||
22 | { | ||
23 | SnapNo i, nsnap = J->cur.nsnap; | ||
24 | for (i = 0; i < nsnap; i++) { | ||
25 | SnapShot *snap = &J->cur.snap[i]; | ||
26 | IRRef2 *map = &J->cur.snapmap[snap->mapofs]; | ||
27 | BCReg s, nslots = snap->nslots; | ||
28 | for (s = 0; s < nslots; s++) { | ||
29 | IRRef ref = snap_ref(map[s]); | ||
30 | if (!irref_isk(ref)) | ||
31 | irt_setmark(IR(ref)->t); | ||
32 | } | ||
33 | } | ||
34 | } | ||
35 | |||
36 | /* Backwards propagate marks. Replace unused instructions with NOPs. */ | ||
37 | static void dce_propagate(jit_State *J) | ||
38 | { | ||
39 | IRRef1 *pchain[IR__MAX]; | ||
40 | IRRef ins; | ||
41 | uint32_t i; | ||
42 | for (i = 0; i < IR__MAX; i++) pchain[i] = &J->chain[i]; | ||
43 | for (ins = J->cur.nins-1; ins >= REF_FIRST; ins--) { | ||
44 | IRIns *ir = IR(ins); | ||
45 | if (irt_ismarked(ir->t)) { | ||
46 | irt_clearmark(ir->t); | ||
47 | pchain[ir->o] = &ir->prev; | ||
48 | } else if (!(irt_isguard(ir->t) || irm_sideeff(lj_ir_mode[ir->o]))) { | ||
49 | *pchain[ir->o] = ir->prev; /* Reroute original instruction chain. */ | ||
50 | *pchain[IR_NOP] = (IRRef1)ins; | ||
51 | ir->t.irt = IRT_NIL; | ||
52 | ir->o = IR_NOP; /* Replace instruction with NOP. */ | ||
53 | ir->op1 = ir->op2 = 0; | ||
54 | pchain[IR_NOP] = &ir->prev; | ||
55 | continue; | ||
56 | } | ||
57 | if (!irref_isk(ir->op1)) irt_setmark(IR(ir->op1)->t); | ||
58 | if (!irref_isk(ir->op2)) irt_setmark(IR(ir->op2)->t); | ||
59 | } | ||
60 | *pchain[IR_NOP] = 0; /* Terminate NOP chain. */ | ||
61 | } | ||
62 | |||
63 | /* Dead Code Elimination. | ||
64 | ** | ||
65 | ** First backpropagate marks for all used instructions. Then replace | ||
66 | ** the unused ones with a NOP. Note that compressing the IR to eliminate | ||
67 | ** the NOPs does not pay off. | ||
68 | */ | ||
69 | void lj_opt_dce(jit_State *J) | ||
70 | { | ||
71 | if ((J->flags & JIT_F_OPT_DCE)) { | ||
72 | dce_marksnap(J); | ||
73 | dce_propagate(J); | ||
74 | } | ||
75 | } | ||
76 | |||
77 | #undef IR | ||
78 | |||
79 | #endif | ||
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c new file mode 100644 index 00000000..e5d98162 --- /dev/null +++ b/src/lj_opt_fold.c | |||
@@ -0,0 +1,1415 @@ | |||
1 | /* | ||
2 | ** FOLD: Constant Folding, Algebraic Simplifications and Reassociation. | ||
3 | ** CSE: Common-Subexpression Elimination. | ||
4 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
5 | */ | ||
6 | |||
7 | #define lj_opt_fold_c | ||
8 | #define LUA_CORE | ||
9 | |||
10 | #include "lj_obj.h" | ||
11 | |||
12 | #if LJ_HASJIT | ||
13 | |||
14 | #include "lj_str.h" | ||
15 | #include "lj_ir.h" | ||
16 | #include "lj_jit.h" | ||
17 | #include "lj_iropt.h" | ||
18 | #include "lj_trace.h" | ||
19 | #include "lj_vm.h" | ||
20 | |||
21 | /* Here's a short description how the FOLD engine processes instructions: | ||
22 | ** | ||
23 | ** The FOLD engine receives a single instruction stored in fins (J->fold.ins). | ||
24 | ** The instruction and its operands are used to select matching fold rules. | ||
25 | ** These are applied iteratively until a fixed point is reached. | ||
26 | ** | ||
27 | ** The 8 bit opcode of the instruction itself plus the opcodes of the | ||
28 | ** two instructions referenced by its operands form a 24 bit key | ||
29 | ** 'ins left right' (unused operands -> 0, literals -> lowest 8 bits). | ||
30 | ** | ||
31 | ** This key is used for partial matching against the fold rules. The | ||
32 | ** left/right operand fields of the key are successively masked with | ||
33 | ** the 'any' wildcard, from most specific to least specific: | ||
34 | ** | ||
35 | ** ins left right | ||
36 | ** ins any right | ||
37 | ** ins left any | ||
38 | ** ins any any | ||
39 | ** | ||
40 | ** The masked key is used to lookup a matching fold rule in a semi-perfect | ||
41 | ** hash table. If a matching rule is found, the related fold function is run. | ||
42 | ** Multiple rules can share the same fold function. A fold rule may return | ||
43 | ** one of several special values: | ||
44 | ** | ||
45 | ** - NEXTFOLD means no folding was applied, because an additional test | ||
46 | ** inside the fold function failed. Matching continues against less | ||
47 | ** specific fold rules. Finally the instruction is passed on to CSE. | ||
48 | ** | ||
49 | ** - RETRYFOLD means the instruction was modified in-place. Folding is | ||
50 | ** retried as if this instruction had just been received. | ||
51 | ** | ||
52 | ** All other return values are terminal actions -- no further folding is | ||
53 | ** applied: | ||
54 | ** | ||
55 | ** - INTFOLD(i) returns a reference to the integer constant i. | ||
56 | ** | ||
57 | ** - LEFTFOLD and RIGHTFOLD return the left/right operand reference | ||
58 | ** without emitting an instruction. | ||
59 | ** | ||
60 | ** - CSEFOLD and EMITFOLD pass the instruction directly to CSE or emit | ||
61 | ** it without passing through any further optimizations. | ||
62 | ** | ||
63 | ** - FAILFOLD, DROPFOLD and CONDFOLD only apply to instructions which have | ||
64 | ** no result (e.g. guarded assertions): FAILFOLD means the guard would | ||
65 | ** always fail, i.e. the current trace is pointless. DROPFOLD means | ||
66 | ** the guard is always true and has been eliminated. CONDFOLD is a | ||
67 | ** shortcut for FAILFOLD + cond (i.e. drop if true, otherwise fail). | ||
68 | ** | ||
69 | ** - Any other return value is interpreted as an IRRef or TRef. This | ||
70 | ** can be a reference to an existing or a newly created instruction. | ||
71 | ** Only the least-significant 16 bits (IRRef1) are used to form a TRef | ||
72 | ** which is finally returned to the caller. | ||
73 | ** | ||
74 | ** The FOLD engine receives instructions both from the trace recorder and | ||
75 | ** substituted instructions from LOOP unrolling. This means all types | ||
76 | ** of instructions may end up here, even though the recorder bypasses | ||
77 | ** FOLD in some cases. Thus all loads, stores and allocations must have | ||
78 | ** an any/any rule to avoid being passed on to CSE. | ||
79 | ** | ||
80 | ** Carefully read the following requirements before adding or modifying | ||
81 | ** any fold rules: | ||
82 | ** | ||
83 | ** Requirement #1: All fold rules must preserve their destination type. | ||
84 | ** | ||
85 | ** Consistently use INTFOLD() (KINT result) or lj_ir_knum() (KNUM result). | ||
86 | ** Never use lj_ir_knumint() which can have either a KINT or KNUM result. | ||
87 | ** | ||
88 | ** Requirement #2: Fold rules should not create *new* instructions which | ||
89 | ** reference operands *across* PHIs. | ||
90 | ** | ||
91 | ** E.g. a RETRYFOLD with 'fins->op1 = fleft->op1' is invalid if the | ||
92 | ** left operand is a PHI. Then fleft->op1 would point across the PHI | ||
93 | ** frontier to an invariant instruction. Adding a PHI for this instruction | ||
94 | ** would be counterproductive. The solution is to add a barrier which | ||
95 | ** prevents folding across PHIs, i.e. 'PHIBARRIER(fleft)' in this case. | ||
96 | ** The only exception is for recurrences with high latencies like | ||
97 | ** repeated int->num->int conversions. | ||
98 | ** | ||
99 | ** One could relax this condition a bit if the referenced instruction is | ||
100 | ** a PHI, too. But this often leads to worse code due to excessive | ||
101 | ** register shuffling. | ||
102 | ** | ||
103 | ** Note: returning *existing* instructions (e.g. LEFTFOLD) is ok, though. | ||
104 | ** Even returning fleft->op1 would be ok, because a new PHI will added, | ||
105 | ** if needed. But again, this leads to excessive register shuffling and | ||
106 | ** should be avoided. | ||
107 | ** | ||
108 | ** Requirement #3: The set of all fold rules must be monotonic to guarantee | ||
109 | ** termination. | ||
110 | ** | ||
111 | ** The goal is optimization, so one primarily wants to add strength-reducing | ||
112 | ** rules. This means eliminating an instruction or replacing an instruction | ||
113 | ** with one or more simpler instructions. Don't add fold rules which point | ||
114 | ** into the other direction. | ||
115 | ** | ||
116 | ** Some rules (like commutativity) do not directly reduce the strength of | ||
117 | ** an instruction, but enable other fold rules (e.g. by moving constants | ||
118 | ** to the right operand). These rules must be made unidirectional to avoid | ||
119 | ** cycles. | ||
120 | ** | ||
121 | ** Rule of thumb: the trace recorder expands the IR and FOLD shrinks it. | ||
122 | */ | ||
123 | |||
124 | /* Some local macros to save typing. Undef'd at the end. */ | ||
125 | #define IR(ref) (&J->cur.ir[(ref)]) | ||
126 | #define fins (&J->fold.ins) | ||
127 | #define fleft (&J->fold.left) | ||
128 | #define fright (&J->fold.right) | ||
129 | #define knumleft (ir_knum(fleft)->n) | ||
130 | #define knumright (ir_knum(fright)->n) | ||
131 | |||
132 | /* Pass IR on to next optimization in chain (FOLD). */ | ||
133 | #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) | ||
134 | |||
135 | /* Fold function type. Fastcall on x86 significantly reduces their size. */ | ||
136 | typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J); | ||
137 | |||
138 | /* Macros for the fold specs, so buildvm can recognize them. */ | ||
139 | #define LJFOLD(x) | ||
140 | #define LJFOLDX(x) | ||
141 | #define LJFOLDF(name) static TRef LJ_FASTCALL name(jit_State *J) | ||
142 | /* Note: They must be at the start of a line or buildvm ignores them! */ | ||
143 | |||
144 | /* Barrier to prevent using operands across PHIs. */ | ||
145 | #define PHIBARRIER(ir) if (irt_isphi((ir)->t)) return NEXTFOLD | ||
146 | |||
147 | /* Barrier to prevent folding across a GC step. | ||
148 | ** GC steps can only happen at the head of a trace and at LOOP. | ||
149 | ** And the GC is only driven forward if there is at least one allocation. | ||
150 | */ | ||
151 | #define gcstep_barrier(J, ref) \ | ||
152 | ((ref) < J->chain[IR_LOOP] && \ | ||
153 | (J->chain[IR_TNEW] || J->chain[IR_TDUP] || \ | ||
154 | J->chain[IR_SNEW] || J->chain[IR_TOSTR])) | ||
155 | |||
156 | /* -- Constant folding ---------------------------------------------------- */ | ||
157 | |||
158 | LJFOLD(ADD KNUM KNUM) | ||
159 | LJFOLD(SUB KNUM KNUM) | ||
160 | LJFOLD(MUL KNUM KNUM) | ||
161 | LJFOLD(DIV KNUM KNUM) | ||
162 | LJFOLD(NEG KNUM KNUM) | ||
163 | LJFOLD(ABS KNUM KNUM) | ||
164 | LJFOLD(ATAN2 KNUM KNUM) | ||
165 | LJFOLD(LDEXP KNUM KNUM) | ||
166 | LJFOLD(MIN KNUM KNUM) | ||
167 | LJFOLD(MAX KNUM KNUM) | ||
168 | LJFOLDF(kfold_numarith) | ||
169 | { | ||
170 | lua_Number a = knumleft; | ||
171 | lua_Number b = knumright; | ||
172 | lua_Number y = lj_vm_foldarith(a, b, fins->o - IR_ADD); | ||
173 | return lj_ir_knum(J, y); | ||
174 | } | ||
175 | |||
176 | LJFOLD(FPMATH KNUM any) | ||
177 | LJFOLDF(kfold_fpmath) | ||
178 | { | ||
179 | lua_Number a = knumleft; | ||
180 | lua_Number y = lj_vm_foldfpm(a, fins->op2); | ||
181 | return lj_ir_knum(J, y); | ||
182 | } | ||
183 | |||
184 | LJFOLD(POWI KNUM KINT) | ||
185 | LJFOLDF(kfold_powi) | ||
186 | { | ||
187 | lua_Number a = knumleft; | ||
188 | lua_Number b = cast_num(fright->i); | ||
189 | lua_Number y = lj_vm_foldarith(a, b, IR_POWI - IR_ADD); | ||
190 | return lj_ir_knum(J, y); | ||
191 | } | ||
192 | |||
193 | static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op) | ||
194 | { | ||
195 | switch (op) { | ||
196 | case IR_ADD: k1 += k2; break; | ||
197 | case IR_SUB: k1 -= k2; break; | ||
198 | case IR_BAND: k1 &= k2; break; | ||
199 | case IR_BOR: k1 |= k2; break; | ||
200 | case IR_BXOR: k1 ^= k2; break; | ||
201 | case IR_BSHL: k1 <<= (k2 & 31); break; | ||
202 | case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 31)); break; | ||
203 | case IR_BSAR: k1 >>= (k2 & 31); break; | ||
204 | case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 31)); break; | ||
205 | case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break; | ||
206 | default: lua_assert(0); break; | ||
207 | } | ||
208 | return k1; | ||
209 | } | ||
210 | |||
211 | LJFOLD(ADD KINT KINT) | ||
212 | LJFOLD(SUB KINT KINT) | ||
213 | LJFOLD(BAND KINT KINT) | ||
214 | LJFOLD(BOR KINT KINT) | ||
215 | LJFOLD(BXOR KINT KINT) | ||
216 | LJFOLD(BSHL KINT KINT) | ||
217 | LJFOLD(BSHR KINT KINT) | ||
218 | LJFOLD(BSAR KINT KINT) | ||
219 | LJFOLD(BROL KINT KINT) | ||
220 | LJFOLD(BROR KINT KINT) | ||
221 | LJFOLDF(kfold_intarith) | ||
222 | { | ||
223 | return INTFOLD(kfold_intop(fleft->i, fright->i, (IROp)fins->o)); | ||
224 | } | ||
225 | |||
226 | LJFOLD(BNOT KINT) | ||
227 | LJFOLDF(kfold_bnot) | ||
228 | { | ||
229 | return INTFOLD(~fleft->i); | ||
230 | } | ||
231 | |||
232 | LJFOLD(BSWAP KINT) | ||
233 | LJFOLDF(kfold_bswap) | ||
234 | { | ||
235 | return INTFOLD((int32_t)lj_bswap((uint32_t)fleft->i)); | ||
236 | } | ||
237 | |||
238 | LJFOLD(TONUM KINT) | ||
239 | LJFOLDF(kfold_tonum) | ||
240 | { | ||
241 | return lj_ir_knum(J, cast_num(fleft->i)); | ||
242 | } | ||
243 | |||
244 | LJFOLD(TOBIT KNUM KNUM) | ||
245 | LJFOLDF(kfold_tobit) | ||
246 | { | ||
247 | TValue tv; | ||
248 | tv.n = knumleft + knumright; | ||
249 | return INTFOLD((int32_t)tv.u32.lo); | ||
250 | } | ||
251 | |||
252 | LJFOLD(TOINT KNUM any) | ||
253 | LJFOLDF(kfold_toint) | ||
254 | { | ||
255 | lua_Number n = knumleft; | ||
256 | int32_t k = lj_num2int(n); | ||
257 | if (irt_isguard(fins->t) && n != cast_num(k)) { | ||
258 | /* We're about to create a guard which always fails, like TOINT +1.5. | ||
259 | ** Some pathological loops cause this during LICM, e.g.: | ||
260 | ** local x,k,t = 0,1.5,{1,[1.5]=2} | ||
261 | ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end | ||
262 | ** assert(x == 300) | ||
263 | */ | ||
264 | return FAILFOLD; | ||
265 | } | ||
266 | return INTFOLD(k); | ||
267 | } | ||
268 | |||
269 | LJFOLD(TOSTR KNUM) | ||
270 | LJFOLDF(kfold_tostr_knum) | ||
271 | { | ||
272 | return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft)); | ||
273 | } | ||
274 | |||
275 | LJFOLD(TOSTR KINT) | ||
276 | LJFOLDF(kfold_tostr_kint) | ||
277 | { | ||
278 | return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i)); | ||
279 | } | ||
280 | |||
281 | LJFOLD(STRTO KGC) | ||
282 | LJFOLDF(kfold_strto) | ||
283 | { | ||
284 | TValue n; | ||
285 | if (lj_str_numconv(strdata(ir_kstr(fleft)), &n)) | ||
286 | return lj_ir_knum(J, numV(&n)); | ||
287 | return FAILFOLD; | ||
288 | } | ||
289 | |||
290 | LJFOLD(SNEW STRREF KINT) | ||
291 | LJFOLDF(kfold_snew) | ||
292 | { | ||
293 | if (fright->i == 0) | ||
294 | return lj_ir_kstr(J, lj_str_new(J->L, "", 0)); | ||
295 | PHIBARRIER(fleft); | ||
296 | if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) { | ||
297 | const char *s = strdata(ir_kstr(IR(fleft->op1))); | ||
298 | int32_t ofs = IR(fleft->op2)->i; | ||
299 | return lj_ir_kstr(J, lj_str_new(J->L, s+ofs, (size_t)fright->i)); | ||
300 | } | ||
301 | return NEXTFOLD; | ||
302 | } | ||
303 | |||
304 | /* Must not use kfold_kref for numbers (could be NaN). */ | ||
305 | LJFOLD(EQ KNUM KNUM) | ||
306 | LJFOLD(NE KNUM KNUM) | ||
307 | LJFOLD(LT KNUM KNUM) | ||
308 | LJFOLD(GE KNUM KNUM) | ||
309 | LJFOLD(LE KNUM KNUM) | ||
310 | LJFOLD(GT KNUM KNUM) | ||
311 | LJFOLD(ULT KNUM KNUM) | ||
312 | LJFOLD(UGE KNUM KNUM) | ||
313 | LJFOLD(ULE KNUM KNUM) | ||
314 | LJFOLD(UGT KNUM KNUM) | ||
315 | LJFOLDF(kfold_numcomp) | ||
316 | { | ||
317 | return CONDFOLD(lj_ir_numcmp(knumleft, knumright, (IROp)fins->o)); | ||
318 | } | ||
319 | |||
320 | LJFOLD(LT KINT KINT) | ||
321 | LJFOLD(GE KINT KINT) | ||
322 | LJFOLD(LE KINT KINT) | ||
323 | LJFOLD(GT KINT KINT) | ||
324 | LJFOLD(ULT KINT KINT) | ||
325 | LJFOLD(UGE KINT KINT) | ||
326 | LJFOLD(ULE KINT KINT) | ||
327 | LJFOLD(UGT KINT KINT) | ||
328 | LJFOLD(ABC KINT KINT) | ||
329 | LJFOLDF(kfold_intcomp) | ||
330 | { | ||
331 | int32_t a = fleft->i, b = fright->i; | ||
332 | switch ((IROp)fins->o) { | ||
333 | case IR_LT: return CONDFOLD(a < b); | ||
334 | case IR_GE: return CONDFOLD(a >= b); | ||
335 | case IR_LE: return CONDFOLD(a <= b); | ||
336 | case IR_GT: return CONDFOLD(a > b); | ||
337 | case IR_ULT: return CONDFOLD((uint32_t)a < (uint32_t)b); | ||
338 | case IR_UGE: return CONDFOLD((uint32_t)a >= (uint32_t)b); | ||
339 | case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b); | ||
340 | case IR_ABC: | ||
341 | case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b); | ||
342 | default: lua_assert(0); return FAILFOLD; | ||
343 | } | ||
344 | } | ||
345 | |||
346 | LJFOLD(LT KGC KGC) | ||
347 | LJFOLD(GE KGC KGC) | ||
348 | LJFOLD(LE KGC KGC) | ||
349 | LJFOLD(GT KGC KGC) | ||
350 | LJFOLDF(kfold_strcomp) | ||
351 | { | ||
352 | if (irt_isstr(fins->t)) { | ||
353 | GCstr *a = ir_kstr(fleft); | ||
354 | GCstr *b = ir_kstr(fright); | ||
355 | return CONDFOLD(lj_ir_strcmp(a, b, (IROp)fins->o)); | ||
356 | } | ||
357 | return NEXTFOLD; | ||
358 | } | ||
359 | |||
360 | /* Don't constant-fold away FLOAD checks against KNULL. */ | ||
361 | LJFOLD(EQ FLOAD KNULL) | ||
362 | LJFOLD(NE FLOAD KNULL) | ||
363 | LJFOLDX(lj_opt_cse) | ||
364 | |||
365 | /* But fold all other KNULL compares, since only KNULL is equal to KNULL. */ | ||
366 | LJFOLD(EQ any KNULL) | ||
367 | LJFOLD(NE any KNULL) | ||
368 | LJFOLD(EQ KNULL any) | ||
369 | LJFOLD(NE KNULL any) | ||
370 | LJFOLD(EQ KINT KINT) /* Constants are unique, so same refs <==> same value. */ | ||
371 | LJFOLD(NE KINT KINT) | ||
372 | LJFOLD(EQ KGC KGC) | ||
373 | LJFOLD(NE KGC KGC) | ||
374 | LJFOLDF(kfold_kref) | ||
375 | { | ||
376 | return CONDFOLD((fins->op1 == fins->op2) ^ (fins->o == IR_NE)); | ||
377 | } | ||
378 | |||
379 | /* -- Algebraic shortcuts ------------------------------------------------- */ | ||
380 | |||
381 | LJFOLD(FPMATH FPMATH IRFPM_FLOOR) | ||
382 | LJFOLD(FPMATH FPMATH IRFPM_CEIL) | ||
383 | LJFOLD(FPMATH FPMATH IRFPM_TRUNC) | ||
384 | LJFOLDF(shortcut_round) | ||
385 | { | ||
386 | IRFPMathOp op = (IRFPMathOp)fleft->op2; | ||
387 | if (op == IRFPM_FLOOR || op == IRFPM_CEIL || op == IRFPM_TRUNC) | ||
388 | return LEFTFOLD; /* round(round_left(x)) = round_left(x) */ | ||
389 | return NEXTFOLD; | ||
390 | } | ||
391 | |||
392 | LJFOLD(FPMATH TONUM IRFPM_FLOOR) | ||
393 | LJFOLD(FPMATH TONUM IRFPM_CEIL) | ||
394 | LJFOLD(FPMATH TONUM IRFPM_TRUNC) | ||
395 | LJFOLD(ABS ABS KNUM) | ||
396 | LJFOLDF(shortcut_left) | ||
397 | { | ||
398 | return LEFTFOLD; /* f(g(x)) ==> g(x) */ | ||
399 | } | ||
400 | |||
401 | LJFOLD(ABS NEG KNUM) | ||
402 | LJFOLDF(shortcut_dropleft) | ||
403 | { | ||
404 | PHIBARRIER(fleft); | ||
405 | fins->op1 = fleft->op1; /* abs(neg(x)) ==> abs(x) */ | ||
406 | return RETRYFOLD; | ||
407 | } | ||
408 | |||
409 | /* Note: no safe shortcuts with STRTO and TOSTR ("1e2" ==> +100 ==> "100"). */ | ||
410 | LJFOLD(NEG NEG KNUM) | ||
411 | LJFOLD(BNOT BNOT) | ||
412 | LJFOLD(BSWAP BSWAP) | ||
413 | LJFOLDF(shortcut_leftleft) | ||
414 | { | ||
415 | PHIBARRIER(fleft); /* See above. Fold would be ok, but not beneficial. */ | ||
416 | return fleft->op1; /* f(g(x)) ==> x */ | ||
417 | } | ||
418 | |||
419 | LJFOLD(TONUM TOINT) | ||
420 | LJFOLDF(shortcut_leftleft_toint) | ||
421 | { | ||
422 | PHIBARRIER(fleft); | ||
423 | if (irt_isguard(fleft->t)) /* Only safe with a guarded TOINT. */ | ||
424 | return fleft->op1; /* f(g(x)) ==> x */ | ||
425 | return NEXTFOLD; | ||
426 | } | ||
427 | |||
428 | LJFOLD(TOINT TONUM any) | ||
429 | LJFOLD(TOBIT TONUM KNUM) /* The inverse must NOT be shortcut! */ | ||
430 | LJFOLDF(shortcut_leftleft_across_phi) | ||
431 | { | ||
432 | /* Fold even across PHI to avoid expensive int->num->int conversions. */ | ||
433 | return fleft->op1; /* f(g(x)) ==> x */ | ||
434 | } | ||
435 | |||
436 | /* -- FP algebraic simplifications ---------------------------------------- */ | ||
437 | |||
438 | /* FP arithmetic is tricky -- there's not much to simplify. | ||
439 | ** Please note the following common pitfalls before sending "improvements": | ||
440 | ** x+0 ==> x is INVALID for x=-0 | ||
441 | ** 0-x ==> -x is INVALID for x=+0 | ||
442 | ** x*0 ==> 0 is INVALID for x=-0, x=+-Inf or x=NaN | ||
443 | */ | ||
444 | |||
445 | LJFOLD(ADD NEG any) | ||
446 | LJFOLDF(simplify_numadd_negx) | ||
447 | { | ||
448 | PHIBARRIER(fleft); | ||
449 | fins->o = IR_SUB; /* (-a) + b ==> b - a */ | ||
450 | fins->op1 = fins->op2; | ||
451 | fins->op2 = fleft->op1; | ||
452 | return RETRYFOLD; | ||
453 | } | ||
454 | |||
455 | LJFOLD(ADD any NEG) | ||
456 | LJFOLDF(simplify_numadd_xneg) | ||
457 | { | ||
458 | PHIBARRIER(fright); | ||
459 | fins->o = IR_SUB; /* a + (-b) ==> a - b */ | ||
460 | fins->op2 = fright->op1; | ||
461 | return RETRYFOLD; | ||
462 | } | ||
463 | |||
464 | LJFOLD(SUB any KNUM) | ||
465 | LJFOLDF(simplify_numsub_k) | ||
466 | { | ||
467 | lua_Number n = knumright; | ||
468 | if (n == 0.0) /* x - (+-0) ==> x */ | ||
469 | return LEFTFOLD; | ||
470 | return NEXTFOLD; | ||
471 | } | ||
472 | |||
473 | LJFOLD(SUB NEG KNUM) | ||
474 | LJFOLDF(simplify_numsub_negk) | ||
475 | { | ||
476 | PHIBARRIER(fleft); | ||
477 | fins->op2 = fleft->op1; /* (-x) - k ==> (-k) - x */ | ||
478 | fins->op1 = (IRRef1)lj_ir_knum(J, -knumright); | ||
479 | return RETRYFOLD; | ||
480 | } | ||
481 | |||
482 | LJFOLD(SUB any NEG) | ||
483 | LJFOLDF(simplify_numsub_xneg) | ||
484 | { | ||
485 | PHIBARRIER(fright); | ||
486 | fins->o = IR_ADD; /* a - (-b) ==> a + b */ | ||
487 | fins->op2 = fright->op1; | ||
488 | return RETRYFOLD; | ||
489 | } | ||
490 | |||
491 | LJFOLD(MUL any KNUM) | ||
492 | LJFOLD(DIV any KNUM) | ||
493 | LJFOLDF(simplify_nummuldiv_k) | ||
494 | { | ||
495 | lua_Number n = knumright; | ||
496 | if (n == 1.0) { /* x o 1 ==> x */ | ||
497 | return LEFTFOLD; | ||
498 | } else if (n == -1.0) { /* x o -1 ==> -x */ | ||
499 | fins->o = IR_NEG; | ||
500 | fins->op2 = (IRRef1)lj_ir_knum_neg(J); | ||
501 | return RETRYFOLD; | ||
502 | } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */ | ||
503 | fins->o = IR_ADD; | ||
504 | fins->op2 = fins->op1; | ||
505 | return RETRYFOLD; | ||
506 | } | ||
507 | return NEXTFOLD; | ||
508 | } | ||
509 | |||
510 | LJFOLD(MUL NEG KNUM) | ||
511 | LJFOLD(DIV NEG KNUM) | ||
512 | LJFOLDF(simplify_nummuldiv_negk) | ||
513 | { | ||
514 | PHIBARRIER(fleft); | ||
515 | fins->op1 = fleft->op1; /* (-a) o k ==> a o (-k) */ | ||
516 | fins->op2 = (IRRef1)lj_ir_knum(J, -knumright); | ||
517 | return RETRYFOLD; | ||
518 | } | ||
519 | |||
520 | LJFOLD(MUL NEG NEG) | ||
521 | LJFOLD(DIV NEG NEG) | ||
522 | LJFOLDF(simplify_nummuldiv_negneg) | ||
523 | { | ||
524 | PHIBARRIER(fleft); | ||
525 | PHIBARRIER(fright); | ||
526 | fins->op1 = fleft->op1; /* (-a) o (-b) ==> a o b */ | ||
527 | fins->op2 = fright->op1; | ||
528 | return RETRYFOLD; | ||
529 | } | ||
530 | |||
531 | LJFOLD(POWI any KINT) | ||
532 | LJFOLDF(simplify_powi_xk) | ||
533 | { | ||
534 | int32_t k = fright->i; | ||
535 | TRef ref = fins->op1; | ||
536 | if (k == 0) /* x ^ 0 ==> 1 */ | ||
537 | return lj_ir_knum_one(J); /* Result must be a number, not an int. */ | ||
538 | if (k == 1) /* x ^ 1 ==> x */ | ||
539 | return LEFTFOLD; | ||
540 | if ((uint32_t)(k+65536) > 2*65536u) /* Limit code explosion. */ | ||
541 | return NEXTFOLD; | ||
542 | if (k < 0) { /* x ^ (-k) ==> (1/x) ^ k. */ | ||
543 | ref = emitir(IRTN(IR_DIV), lj_ir_knum_one(J), ref); | ||
544 | k = -k; | ||
545 | } | ||
546 | /* Unroll x^k for 1 <= k <= 65536. */ | ||
547 | for (; (k & 1) == 0; k >>= 1) /* Handle leading zeros. */ | ||
548 | ref = emitir(IRTN(IR_MUL), ref, ref); | ||
549 | if ((k >>= 1) != 0) { /* Handle trailing bits. */ | ||
550 | TRef tmp = emitir(IRTN(IR_MUL), ref, ref); | ||
551 | for (; k != 1; k >>= 1) { | ||
552 | if (k & 1) | ||
553 | ref = emitir(IRTN(IR_MUL), ref, tmp); | ||
554 | tmp = emitir(IRTN(IR_MUL), tmp, tmp); | ||
555 | } | ||
556 | ref = emitir(IRTN(IR_MUL), ref, tmp); | ||
557 | } | ||
558 | return ref; | ||
559 | } | ||
560 | |||
561 | LJFOLD(POWI KNUM any) | ||
562 | LJFOLDF(simplify_powi_kx) | ||
563 | { | ||
564 | lua_Number n = knumleft; | ||
565 | if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */ | ||
566 | fins->o = IR_TONUM; | ||
567 | fins->op1 = fins->op2; | ||
568 | fins->op2 = 0; | ||
569 | fins->op2 = (IRRef1)lj_opt_fold(J); | ||
570 | fins->op1 = (IRRef1)lj_ir_knum_one(J); | ||
571 | fins->o = IR_LDEXP; | ||
572 | return RETRYFOLD; | ||
573 | } | ||
574 | return NEXTFOLD; | ||
575 | } | ||
576 | |||
577 | /* -- FP conversion narrowing --------------------------------------------- */ | ||
578 | |||
579 | LJFOLD(TOINT ADD any) | ||
580 | LJFOLD(TOINT SUB any) | ||
581 | LJFOLD(TOBIT ADD KNUM) | ||
582 | LJFOLD(TOBIT SUB KNUM) | ||
583 | LJFOLDF(narrow_convert) | ||
584 | { | ||
585 | PHIBARRIER(fleft); | ||
586 | /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */ | ||
587 | if (J->chain[IR_LOOP]) | ||
588 | return NEXTFOLD; | ||
589 | return lj_opt_narrow_convert(J); | ||
590 | } | ||
591 | |||
592 | /* Relaxed CSE rule for TOINT allows commoning with stronger checks, too. */ | ||
593 | LJFOLD(TOINT any any) | ||
594 | LJFOLDF(cse_toint) | ||
595 | { | ||
596 | if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { | ||
597 | IRRef ref, op1 = fins->op1; | ||
598 | uint8_t guard = irt_isguard(fins->t); | ||
599 | for (ref = J->chain[IR_TOINT]; ref > op1; ref = IR(ref)->prev) | ||
600 | if (IR(ref)->op1 == op1 && irt_isguard(IR(ref)->t) >= guard) | ||
601 | return ref; | ||
602 | } | ||
603 | return EMITFOLD; /* No fallthrough to regular CSE. */ | ||
604 | } | ||
605 | |||
606 | /* -- Integer algebraic simplifications ----------------------------------- */ | ||
607 | |||
608 | LJFOLD(ADD any KINT) | ||
609 | LJFOLD(ADDOV any KINT) | ||
610 | LJFOLD(SUBOV any KINT) | ||
611 | LJFOLDF(simplify_intadd_k) | ||
612 | { | ||
613 | if (fright->i == 0) /* i o 0 ==> i */ | ||
614 | return LEFTFOLD; | ||
615 | return NEXTFOLD; | ||
616 | } | ||
617 | |||
618 | LJFOLD(SUB any KINT) | ||
619 | LJFOLDF(simplify_intsub_k) | ||
620 | { | ||
621 | if (fright->i == 0) /* i - 0 ==> i */ | ||
622 | return LEFTFOLD; | ||
623 | fins->o = IR_ADD; /* i - k ==> i + (-k) */ | ||
624 | fins->op2 = (IRRef1)lj_ir_kint(J, -fright->i); /* Overflow for -2^31 ok. */ | ||
625 | return RETRYFOLD; | ||
626 | } | ||
627 | |||
628 | LJFOLD(SUB any any) | ||
629 | LJFOLD(SUBOV any any) | ||
630 | LJFOLDF(simplify_intsub) | ||
631 | { | ||
632 | if (fins->op1 == fins->op2 && !irt_isnum(fins->t)) /* i - i ==> 0 */ | ||
633 | return INTFOLD(0); | ||
634 | return NEXTFOLD; | ||
635 | } | ||
636 | |||
637 | LJFOLD(SUB ADD any) | ||
638 | LJFOLDF(simplify_intsubadd_leftcancel) | ||
639 | { | ||
640 | if (!irt_isnum(fins->t)) { | ||
641 | PHIBARRIER(fleft); | ||
642 | if (fins->op2 == fleft->op1) /* (i + j) - i ==> j */ | ||
643 | return fleft->op2; | ||
644 | if (fins->op2 == fleft->op2) /* (i + j) - j ==> i */ | ||
645 | return fleft->op1; | ||
646 | } | ||
647 | return NEXTFOLD; | ||
648 | } | ||
649 | |||
650 | LJFOLD(SUB SUB any) | ||
651 | LJFOLDF(simplify_intsubsub_leftcancel) | ||
652 | { | ||
653 | if (!irt_isnum(fins->t)) { | ||
654 | PHIBARRIER(fleft); | ||
655 | if (fins->op1 == fleft->op1) { /* (i - j) - i ==> 0 - j */ | ||
656 | fins->op1 = (IRRef1)lj_ir_kint(J, 0); | ||
657 | fins->op2 = fleft->op2; | ||
658 | return RETRYFOLD; | ||
659 | } | ||
660 | } | ||
661 | return NEXTFOLD; | ||
662 | } | ||
663 | |||
664 | LJFOLD(SUB any SUB) | ||
665 | LJFOLDF(simplify_intsubsub_rightcancel) | ||
666 | { | ||
667 | if (!irt_isnum(fins->t)) { | ||
668 | PHIBARRIER(fright); | ||
669 | if (fins->op1 == fright->op1) /* i - (i - j) ==> j */ | ||
670 | return fright->op2; | ||
671 | } | ||
672 | return NEXTFOLD; | ||
673 | } | ||
674 | |||
675 | LJFOLD(SUB any ADD) | ||
676 | LJFOLDF(simplify_intsubadd_rightcancel) | ||
677 | { | ||
678 | if (!irt_isnum(fins->t)) { | ||
679 | PHIBARRIER(fright); | ||
680 | if (fins->op1 == fright->op1) { /* i - (i + j) ==> 0 - j */ | ||
681 | fins->op2 = fright->op2; | ||
682 | fins->op1 = (IRRef1)lj_ir_kint(J, 0); | ||
683 | return RETRYFOLD; | ||
684 | } | ||
685 | if (fins->op1 == fright->op2) { /* i - (j + i) ==> 0 - j */ | ||
686 | fins->op2 = fright->op1; | ||
687 | fins->op1 = (IRRef1)lj_ir_kint(J, 0); | ||
688 | return RETRYFOLD; | ||
689 | } | ||
690 | } | ||
691 | return NEXTFOLD; | ||
692 | } | ||
693 | |||
694 | LJFOLD(SUB ADD ADD) | ||
695 | LJFOLDF(simplify_intsubaddadd_cancel) | ||
696 | { | ||
697 | if (!irt_isnum(fins->t)) { | ||
698 | PHIBARRIER(fleft); | ||
699 | PHIBARRIER(fright); | ||
700 | if (fleft->op1 == fright->op1) { /* (i + j1) - (i + j2) ==> j1 - j2 */ | ||
701 | fins->op1 = fleft->op2; | ||
702 | fins->op2 = fright->op2; | ||
703 | return RETRYFOLD; | ||
704 | } | ||
705 | if (fleft->op1 == fright->op2) { /* (i + j1) - (j2 + i) ==> j1 - j2 */ | ||
706 | fins->op1 = fleft->op2; | ||
707 | fins->op2 = fright->op1; | ||
708 | return RETRYFOLD; | ||
709 | } | ||
710 | if (fleft->op2 == fright->op1) { /* (j1 + i) - (i + j2) ==> j1 - j2 */ | ||
711 | fins->op1 = fleft->op1; | ||
712 | fins->op2 = fright->op2; | ||
713 | return RETRYFOLD; | ||
714 | } | ||
715 | if (fleft->op2 == fright->op2) { /* (j1 + i) - (j2 + i) ==> j1 - j2 */ | ||
716 | fins->op1 = fleft->op1; | ||
717 | fins->op2 = fright->op1; | ||
718 | return RETRYFOLD; | ||
719 | } | ||
720 | } | ||
721 | return NEXTFOLD; | ||
722 | } | ||
723 | |||
724 | LJFOLD(BAND any KINT) | ||
725 | LJFOLDF(simplify_band_k) | ||
726 | { | ||
727 | if (fright->i == 0) /* i & 0 ==> 0 */ | ||
728 | return RIGHTFOLD; | ||
729 | if (fright->i == -1) /* i & -1 ==> i */ | ||
730 | return LEFTFOLD; | ||
731 | return NEXTFOLD; | ||
732 | } | ||
733 | |||
734 | LJFOLD(BOR any KINT) | ||
735 | LJFOLDF(simplify_bor_k) | ||
736 | { | ||
737 | if (fright->i == 0) /* i | 0 ==> i */ | ||
738 | return LEFTFOLD; | ||
739 | if (fright->i == -1) /* i | -1 ==> -1 */ | ||
740 | return RIGHTFOLD; | ||
741 | return NEXTFOLD; | ||
742 | } | ||
743 | |||
744 | LJFOLD(BXOR any KINT) | ||
745 | LJFOLDF(simplify_bxor_k) | ||
746 | { | ||
747 | if (fright->i == 0) /* i xor 0 ==> i */ | ||
748 | return LEFTFOLD; | ||
749 | if (fright->i == -1) { /* i xor -1 ==> ~i */ | ||
750 | fins->o = IR_BNOT; | ||
751 | fins->op2 = 0; | ||
752 | return RETRYFOLD; | ||
753 | } | ||
754 | return NEXTFOLD; | ||
755 | } | ||
756 | |||
757 | LJFOLD(BSHL any KINT) | ||
758 | LJFOLD(BSHR any KINT) | ||
759 | LJFOLD(BSAR any KINT) | ||
760 | LJFOLD(BROL any KINT) | ||
761 | LJFOLD(BROR any KINT) | ||
762 | LJFOLDF(simplify_shift_ik) | ||
763 | { | ||
764 | int32_t k = (fright->i & 31); | ||
765 | if (k == 0) /* i o 0 ==> i */ | ||
766 | return LEFTFOLD; | ||
767 | if (k != fright->i) { /* i o k ==> i o (k & 31) */ | ||
768 | fins->op2 = (IRRef1)lj_ir_kint(J, k); | ||
769 | return RETRYFOLD; | ||
770 | } | ||
771 | if (fins->o == IR_BROR) { /* bror(i, k) ==> brol(i, (-k)&31) */ | ||
772 | fins->o = IR_BROL; | ||
773 | fins->op2 = (IRRef1)lj_ir_kint(J, (-k)&31); | ||
774 | return RETRYFOLD; | ||
775 | } | ||
776 | return NEXTFOLD; | ||
777 | } | ||
778 | |||
779 | LJFOLD(BSHL any BAND) | ||
780 | LJFOLD(BSHR any BAND) | ||
781 | LJFOLD(BSAR any BAND) | ||
782 | LJFOLD(BROL any BAND) | ||
783 | LJFOLD(BROR any BAND) | ||
784 | LJFOLDF(simplify_shift_andk) | ||
785 | { | ||
786 | #if LJ_TARGET_MASKEDSHIFT | ||
787 | IRIns *irk = IR(fright->op2); | ||
788 | PHIBARRIER(fright); | ||
789 | if (irk->o == IR_KINT) { /* i o (j & 31) ==> i o j */ | ||
790 | int32_t k = irk->i & 31; | ||
791 | if (k == 31) { | ||
792 | fins->op2 = fright->op1; | ||
793 | return RETRYFOLD; | ||
794 | } | ||
795 | } | ||
796 | #endif | ||
797 | return NEXTFOLD; | ||
798 | } | ||
799 | |||
800 | LJFOLD(BSHL KINT any) | ||
801 | LJFOLD(BSHR KINT any) | ||
802 | LJFOLDF(simplify_shift1_ki) | ||
803 | { | ||
804 | if (fleft->i == 0) /* 0 o i ==> 0 */ | ||
805 | return LEFTFOLD; | ||
806 | return NEXTFOLD; | ||
807 | } | ||
808 | |||
809 | LJFOLD(BSAR KINT any) | ||
810 | LJFOLD(BROL KINT any) | ||
811 | LJFOLD(BROR KINT any) | ||
812 | LJFOLDF(simplify_shift2_ki) | ||
813 | { | ||
814 | if (fleft->i == 0 || fleft->i == -1) /* 0 o i ==> 0; -1 o i ==> -1 */ | ||
815 | return LEFTFOLD; | ||
816 | return NEXTFOLD; | ||
817 | } | ||
818 | |||
819 | /* -- Reassociation ------------------------------------------------------- */ | ||
820 | |||
821 | LJFOLD(ADD ADD KINT) | ||
822 | LJFOLD(BAND BAND KINT) | ||
823 | LJFOLD(BOR BOR KINT) | ||
824 | LJFOLD(BXOR BXOR KINT) | ||
825 | LJFOLDF(reassoc_intarith_k) | ||
826 | { | ||
827 | IRIns *irk = IR(fleft->op2); | ||
828 | if (irk->o == IR_KINT) { | ||
829 | int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o); | ||
830 | if (k == irk->i) /* (i o k1) o k2 ==> i o k1, if (k1 o k2) == k1. */ | ||
831 | return LEFTFOLD; | ||
832 | PHIBARRIER(fleft); | ||
833 | fins->op1 = fleft->op1; | ||
834 | fins->op2 = (IRRef1)lj_ir_kint(J, k); | ||
835 | return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */ | ||
836 | } | ||
837 | return NEXTFOLD; | ||
838 | } | ||
839 | |||
840 | LJFOLD(MIN MIN any) | ||
841 | LJFOLD(MAX MAX any) | ||
842 | LJFOLD(BAND BAND any) | ||
843 | LJFOLD(BOR BOR any) | ||
844 | LJFOLDF(reassoc_dup) | ||
845 | { | ||
846 | PHIBARRIER(fleft); | ||
847 | if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2) | ||
848 | return LEFTFOLD; /* (a o b) o a ==> a o b; (a o b) o b ==> a o b */ | ||
849 | return NEXTFOLD; | ||
850 | } | ||
851 | |||
852 | LJFOLD(BXOR BXOR any) | ||
853 | LJFOLDF(reassoc_bxor) | ||
854 | { | ||
855 | PHIBARRIER(fleft); | ||
856 | if (fins->op2 == fleft->op1) /* (a xor b) xor a ==> b */ | ||
857 | return fleft->op2; | ||
858 | if (fins->op2 == fleft->op2) /* (a xor b) xor b ==> a */ | ||
859 | return fleft->op1; | ||
860 | return NEXTFOLD; | ||
861 | } | ||
862 | |||
863 | LJFOLD(BSHL BSHL KINT) | ||
864 | LJFOLD(BSHR BSHR KINT) | ||
865 | LJFOLD(BSAR BSAR KINT) | ||
866 | LJFOLD(BROL BROL KINT) | ||
867 | LJFOLD(BROR BROR KINT) | ||
868 | LJFOLDF(reassoc_shift) | ||
869 | { | ||
870 | IRIns *irk = IR(fleft->op2); | ||
871 | PHIBARRIER(fleft); /* The (shift any KINT) rule covers k2 == 0 and more. */ | ||
872 | if (irk->o == IR_KINT) { /* (i o k1) o k2 ==> i o (k1 + k2) */ | ||
873 | int32_t k = (irk->i & 31) + (fright->i & 31); | ||
874 | if (k > 31) { /* Combined shift too wide? */ | ||
875 | if (fins->o == IR_BSHL || fins->o == IR_BSHR) | ||
876 | return INTFOLD(0); | ||
877 | else if (fins->o == IR_BSAR) | ||
878 | k = 31; | ||
879 | else | ||
880 | k &= 31; | ||
881 | } | ||
882 | fins->op1 = fleft->op1; | ||
883 | fins->op2 = (IRRef1)lj_ir_kint(J, k); | ||
884 | return RETRYFOLD; | ||
885 | } | ||
886 | return NEXTFOLD; | ||
887 | } | ||
888 | |||
889 | LJFOLD(MIN MIN KNUM) | ||
890 | LJFOLD(MAX MAX KNUM) | ||
891 | LJFOLDF(reassoc_minmax_k) | ||
892 | { | ||
893 | IRIns *irk = IR(fleft->op2); | ||
894 | if (irk->o == IR_KNUM) { | ||
895 | lua_Number a = ir_knum(irk)->n; | ||
896 | lua_Number b = knumright; | ||
897 | lua_Number y = lj_vm_foldarith(a, b, fins->o - IR_ADD); | ||
898 | if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */ | ||
899 | return LEFTFOLD; | ||
900 | PHIBARRIER(fleft); | ||
901 | fins->op1 = fleft->op1; | ||
902 | fins->op2 = (IRRef1)lj_ir_knum(J, y); | ||
903 | return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */ | ||
904 | } | ||
905 | return NEXTFOLD; | ||
906 | } | ||
907 | |||
908 | LJFOLD(MIN MAX any) | ||
909 | LJFOLD(MAX MIN any) | ||
910 | LJFOLDF(reassoc_minmax_left) | ||
911 | { | ||
912 | if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2) | ||
913 | return RIGHTFOLD; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */ | ||
914 | return NEXTFOLD; | ||
915 | } | ||
916 | |||
917 | LJFOLD(MIN any MAX) | ||
918 | LJFOLD(MAX any MIN) | ||
919 | LJFOLDF(reassoc_minmax_right) | ||
920 | { | ||
921 | if (fins->op1 == fright->op1 || fins->op1 == fright->op2) | ||
922 | return LEFTFOLD; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */ | ||
923 | return NEXTFOLD; | ||
924 | } | ||
925 | |||
926 | /* Eliminate ABC across PHIs to handle t[i-1] forwarding case. | ||
927 | ** ABC(asize, (i+k)+(-k)) ==> ABC(asize, i), but only if it already exists. | ||
928 | ** Could be generalized to (i+k1)+k2 ==> i+(k1+k2), but needs better disambig. | ||
929 | */ | ||
930 | LJFOLD(ABC any ADD) | ||
931 | LJFOLDF(reassoc_abc) | ||
932 | { | ||
933 | if (irref_isk(fright->op2)) { | ||
934 | IRIns *add2 = IR(fright->op1); | ||
935 | if (add2->o == IR_ADD && irref_isk(add2->op2) && | ||
936 | IR(fright->op2)->i == -IR(add2->op2)->i) { | ||
937 | IRRef ref = J->chain[IR_ABC]; | ||
938 | IRRef lim = add2->op1; | ||
939 | if (fins->op1 > lim) lim = fins->op1; | ||
940 | while (ref > lim) { | ||
941 | IRIns *ir = IR(ref); | ||
942 | if (ir->op1 == fins->op1 && ir->op2 == add2->op1) | ||
943 | return DROPFOLD; | ||
944 | ref = ir->prev; | ||
945 | } | ||
946 | } | ||
947 | } | ||
948 | return NEXTFOLD; | ||
949 | } | ||
950 | |||
951 | /* -- Commutativity ------------------------------------------------------- */ | ||
952 | |||
953 | /* The refs of commutative ops are canonicalized. Lower refs go to the right. | ||
954 | ** Rationale behind this: | ||
955 | ** - It (also) moves constants to the right. | ||
956 | ** - It reduces the number of FOLD rules (e.g. (BOR any KINT) suffices). | ||
957 | ** - It helps CSE to find more matches. | ||
958 | ** - The assembler generates better code with constants at the right. | ||
959 | */ | ||
960 | |||
961 | LJFOLD(ADD any any) | ||
962 | LJFOLD(MUL any any) | ||
963 | LJFOLD(ADDOV any any) | ||
964 | LJFOLDF(comm_swap) | ||
965 | { | ||
966 | if (fins->op1 < fins->op2) { /* Move lower ref to the right. */ | ||
967 | IRRef1 tmp = fins->op1; | ||
968 | fins->op1 = fins->op2; | ||
969 | fins->op2 = tmp; | ||
970 | return RETRYFOLD; | ||
971 | } | ||
972 | return NEXTFOLD; | ||
973 | } | ||
974 | |||
975 | LJFOLD(EQ any any) | ||
976 | LJFOLD(NE any any) | ||
977 | LJFOLDF(comm_equal) | ||
978 | { | ||
979 | /* For non-numbers only: x == x ==> drop; x ~= x ==> fail */ | ||
980 | if (fins->op1 == fins->op2 && !irt_isnum(fins->t)) | ||
981 | return CONDFOLD(fins->o == IR_EQ); | ||
982 | return comm_swap(J); | ||
983 | } | ||
984 | |||
985 | LJFOLD(LT any any) | ||
986 | LJFOLD(GE any any) | ||
987 | LJFOLD(LE any any) | ||
988 | LJFOLD(GT any any) | ||
989 | LJFOLD(ULT any any) | ||
990 | LJFOLD(UGE any any) | ||
991 | LJFOLD(ULE any any) | ||
992 | LJFOLD(UGT any any) | ||
993 | LJFOLDF(comm_comp) | ||
994 | { | ||
995 | /* For non-numbers only: x <=> x ==> drop; x <> x ==> fail */ | ||
996 | if (fins->op1 == fins->op2 && !irt_isnum(fins->t)) | ||
997 | return CONDFOLD(fins->o & 1); | ||
998 | if (fins->op1 < fins->op2) { /* Move lower ref to the right. */ | ||
999 | IRRef1 tmp = fins->op1; | ||
1000 | fins->op1 = fins->op2; | ||
1001 | fins->op2 = tmp; | ||
1002 | fins->o ^= 3; /* GT <-> LT, GE <-> LE, does not affect U */ | ||
1003 | return RETRYFOLD; | ||
1004 | } | ||
1005 | return NEXTFOLD; | ||
1006 | } | ||
1007 | |||
1008 | LJFOLD(BAND any any) | ||
1009 | LJFOLD(BOR any any) | ||
1010 | LJFOLD(MIN any any) | ||
1011 | LJFOLD(MAX any any) | ||
1012 | LJFOLDF(comm_dup) | ||
1013 | { | ||
1014 | if (fins->op1 == fins->op2) /* x o x ==> x */ | ||
1015 | return LEFTFOLD; | ||
1016 | return comm_swap(J); | ||
1017 | } | ||
1018 | |||
1019 | LJFOLD(BXOR any any) | ||
1020 | LJFOLDF(comm_bxor) | ||
1021 | { | ||
1022 | if (fins->op1 == fins->op2) /* i xor i ==> 0 */ | ||
1023 | return INTFOLD(0); | ||
1024 | return comm_swap(J); | ||
1025 | } | ||
1026 | |||
1027 | /* -- Simplification of compound expressions ------------------------------ */ | ||
1028 | |||
1029 | static int32_t kfold_xload(IRIns *ir, const void *p) | ||
1030 | { | ||
1031 | #if !LJ_TARGET_X86ORX64 | ||
1032 | #error "Missing support for unaligned loads" | ||
1033 | #endif | ||
1034 | switch (irt_type(ir->t)) { | ||
1035 | case IRT_I8: return (int32_t)*(int8_t *)p; | ||
1036 | case IRT_U8: return (int32_t)*(uint8_t *)p; | ||
1037 | case IRT_I16: return (int32_t)*(int16_t *)p; | ||
1038 | case IRT_U16: return (int32_t)*(uint16_t *)p; | ||
1039 | default: lua_assert(irt_isint(ir->t)); return (int32_t)*(int32_t *)p; | ||
1040 | } | ||
1041 | } | ||
1042 | |||
1043 | /* Turn: string.sub(str, a, b) == kstr | ||
1044 | ** into: string.byte(str, a) == string.byte(kstr, 1) etc. | ||
1045 | ** Note: this creates unaligned XLOADs! | ||
1046 | */ | ||
1047 | LJFOLD(EQ SNEW KGC) | ||
1048 | LJFOLD(NE SNEW KGC) | ||
1049 | LJFOLDF(merge_eqne_snew_kgc) | ||
1050 | { | ||
1051 | GCstr *kstr = ir_kstr(fright); | ||
1052 | int32_t len = (int32_t)kstr->len; | ||
1053 | lua_assert(irt_isstr(fins->t)); | ||
1054 | if (len <= 4) { /* Handle string lengths 0, 1, 2, 3, 4. */ | ||
1055 | IROp op = (IROp)fins->o; | ||
1056 | IRRef strref = fleft->op1; | ||
1057 | lua_assert(IR(strref)->o == IR_STRREF); | ||
1058 | if (op == IR_EQ) { | ||
1059 | emitir(IRTGI(IR_EQ), fleft->op2, lj_ir_kint(J, len)); | ||
1060 | /* Caveat: fins/fleft/fright is no longer valid after emitir. */ | ||
1061 | } else { | ||
1062 | /* NE is not expanded since this would need an OR of two conds. */ | ||
1063 | if (!irref_isk(fleft->op2)) /* Only handle the constant length case. */ | ||
1064 | return NEXTFOLD; | ||
1065 | if (IR(fleft->op2)->i != len) | ||
1066 | return DROPFOLD; | ||
1067 | } | ||
1068 | if (len > 0) { | ||
1069 | /* A 4 byte load for length 3 is ok -- all strings have an extra NUL. */ | ||
1070 | uint16_t ot = (uint16_t)(len == 1 ? IRT(IR_XLOAD, IRT_I8) : | ||
1071 | len == 2 ? IRT(IR_XLOAD, IRT_U16) : | ||
1072 | IRTI(IR_XLOAD)); | ||
1073 | TRef tmp = emitir(ot, strref, len > 1 ? IRXLOAD_UNALIGNED : 0); | ||
1074 | TRef val = lj_ir_kint(J, kfold_xload(IR(tref_ref(tmp)), strdata(kstr))); | ||
1075 | if (len == 3) | ||
1076 | tmp = emitir(IRTI(IR_BAND), tmp, | ||
1077 | lj_ir_kint(J, LJ_ENDIAN_SELECT(0x00ffffff, 0xffffff00))); | ||
1078 | fins->op1 = (IRRef1)tmp; | ||
1079 | fins->op2 = (IRRef1)val; | ||
1080 | fins->ot = (IROpT)IRTGI(op); | ||
1081 | return RETRYFOLD; | ||
1082 | } else { | ||
1083 | return DROPFOLD; | ||
1084 | } | ||
1085 | } | ||
1086 | return NEXTFOLD; | ||
1087 | } | ||
1088 | |||
1089 | /* -- Loads --------------------------------------------------------------- */ | ||
1090 | |||
1091 | /* Loads cannot be folded or passed on to CSE in general. | ||
1092 | ** Alias analysis is needed to check for forwarding opportunities. | ||
1093 | ** | ||
1094 | ** Caveat: *all* loads must be listed here or they end up at CSE! | ||
1095 | */ | ||
1096 | |||
1097 | LJFOLD(ALOAD any) | ||
1098 | LJFOLDX(lj_opt_fwd_aload) | ||
1099 | |||
1100 | LJFOLD(HLOAD any) | ||
1101 | LJFOLDX(lj_opt_fwd_hload) | ||
1102 | |||
1103 | LJFOLD(ULOAD any) | ||
1104 | LJFOLDX(lj_opt_fwd_uload) | ||
1105 | |||
1106 | LJFOLD(TLEN any) | ||
1107 | LJFOLDX(lj_opt_fwd_tlen) | ||
1108 | |||
1109 | /* Upvalue refs are really loads, but there are no corresponding stores. | ||
1110 | ** So CSE is ok for them, except for UREFO across a GC step (see below). | ||
1111 | ** If the referenced function is const, its upvalue addresses are const, too. | ||
1112 | ** This can be used to improve CSE by looking for the same address, | ||
1113 | ** even if the upvalues originate from a different function. | ||
1114 | */ | ||
1115 | LJFOLD(UREFO KGC any) | ||
1116 | LJFOLD(UREFC KGC any) | ||
1117 | LJFOLDF(cse_uref) | ||
1118 | { | ||
1119 | if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { | ||
1120 | IRRef ref = J->chain[fins->o]; | ||
1121 | GCfunc *fn = ir_kfunc(fleft); | ||
1122 | GCupval *uv = gco2uv(gcref(fn->l.uvptr[fins->op2])); | ||
1123 | while (ref > 0) { | ||
1124 | IRIns *ir = IR(ref); | ||
1125 | if (irref_isk(ir->op1)) { | ||
1126 | GCfunc *fn2 = ir_kfunc(IR(ir->op1)); | ||
1127 | if (gco2uv(gcref(fn2->l.uvptr[ir->op2])) == uv) { | ||
1128 | if (fins->o == IR_UREFO && gcstep_barrier(J, ref)) | ||
1129 | break; | ||
1130 | return ref; | ||
1131 | } | ||
1132 | } | ||
1133 | ref = ir->prev; | ||
1134 | } | ||
1135 | } | ||
1136 | return EMITFOLD; | ||
1137 | } | ||
1138 | |||
1139 | /* We can safely FOLD/CSE array/hash refs and field loads, since there | ||
1140 | ** are no corresponding stores. But NEWREF may invalidate all of them. | ||
1141 | ** Lacking better disambiguation for table references, these optimizations | ||
1142 | ** are simply disabled across any NEWREF. | ||
1143 | ** Only HREF needs the NEWREF check -- AREF and HREFK already depend on | ||
1144 | ** FLOADs. And NEWREF itself is treated like a store (see below). | ||
1145 | */ | ||
1146 | LJFOLD(HREF any any) | ||
1147 | LJFOLDF(cse_href) | ||
1148 | { | ||
1149 | TRef tr = lj_opt_cse(J); | ||
1150 | return tref_ref(tr) < J->chain[IR_NEWREF] ? EMITFOLD : tr; | ||
1151 | } | ||
1152 | |||
1153 | LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE) | ||
1154 | LJFOLDF(fload_tab_tnew_asize) | ||
1155 | { | ||
1156 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fins->op1 > J->chain[IR_NEWREF]) | ||
1157 | return INTFOLD(fleft->op1); | ||
1158 | return NEXTFOLD; | ||
1159 | } | ||
1160 | |||
1161 | LJFOLD(FLOAD TNEW IRFL_TAB_HMASK) | ||
1162 | LJFOLDF(fload_tab_tnew_hmask) | ||
1163 | { | ||
1164 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fins->op1 > J->chain[IR_NEWREF]) | ||
1165 | return INTFOLD((1 << fleft->op2)-1); | ||
1166 | return NEXTFOLD; | ||
1167 | } | ||
1168 | |||
1169 | LJFOLD(FLOAD TDUP IRFL_TAB_ASIZE) | ||
1170 | LJFOLDF(fload_tab_tdup_asize) | ||
1171 | { | ||
1172 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fins->op1 > J->chain[IR_NEWREF]) | ||
1173 | return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->asize); | ||
1174 | return NEXTFOLD; | ||
1175 | } | ||
1176 | |||
1177 | LJFOLD(FLOAD TDUP IRFL_TAB_HMASK) | ||
1178 | LJFOLDF(fload_tab_tdup_hmask) | ||
1179 | { | ||
1180 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fins->op1 > J->chain[IR_NEWREF]) | ||
1181 | return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->hmask); | ||
1182 | return NEXTFOLD; | ||
1183 | } | ||
1184 | |||
1185 | LJFOLD(FLOAD any IRFL_TAB_ARRAY) | ||
1186 | LJFOLD(FLOAD any IRFL_TAB_NODE) | ||
1187 | LJFOLD(FLOAD any IRFL_TAB_ASIZE) | ||
1188 | LJFOLD(FLOAD any IRFL_TAB_HMASK) | ||
1189 | LJFOLDF(fload_tab_ah) | ||
1190 | { | ||
1191 | TRef tr = lj_opt_cse(J); | ||
1192 | return tref_ref(tr) < J->chain[IR_NEWREF] ? EMITFOLD : tr; | ||
1193 | } | ||
1194 | |||
1195 | /* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */ | ||
1196 | LJFOLD(FLOAD KGC IRFL_STR_LEN) | ||
1197 | LJFOLDF(fload_str_len) | ||
1198 | { | ||
1199 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) | ||
1200 | return INTFOLD((int32_t)ir_kstr(fleft)->len); | ||
1201 | return NEXTFOLD; | ||
1202 | } | ||
1203 | |||
1204 | LJFOLD(FLOAD any IRFL_STR_LEN) | ||
1205 | LJFOLDX(lj_opt_cse) | ||
1206 | |||
1207 | /* All other field loads need alias analysis. */ | ||
1208 | LJFOLD(FLOAD any any) | ||
1209 | LJFOLDX(lj_opt_fwd_fload) | ||
1210 | |||
1211 | /* This is for LOOP only. Recording handles SLOADs internally. */ | ||
1212 | LJFOLD(SLOAD any any) | ||
1213 | LJFOLDF(fwd_sload) | ||
1214 | { | ||
1215 | lua_assert(J->slot[fins->op1] != 0); | ||
1216 | return J->slot[fins->op1]; | ||
1217 | } | ||
1218 | |||
1219 | /* Strings are immutable, so we can safely FOLD/CSE an XLOAD of a string. */ | ||
1220 | LJFOLD(XLOAD STRREF any) | ||
1221 | LJFOLDF(xload_str) | ||
1222 | { | ||
1223 | if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) { | ||
1224 | GCstr *str = ir_kstr(IR(fleft->op1)); | ||
1225 | int32_t ofs = IR(fleft->op2)->i; | ||
1226 | lua_assert((MSize)ofs < str->len); | ||
1227 | lua_assert((MSize)(ofs + (1<<((fins->op2>>8)&3))) <= str->len); | ||
1228 | return INTFOLD(kfold_xload(fins, strdata(str)+ofs)); | ||
1229 | } | ||
1230 | return CSEFOLD; | ||
1231 | } | ||
1232 | /* No XLOAD of non-strings (yet), so we don't need a (XLOAD any any) rule. */ | ||
1233 | |||
1234 | /* -- Write barriers ------------------------------------------------------ */ | ||
1235 | |||
1236 | /* Write barriers are amenable to CSE, but not across any incremental | ||
1237 | ** GC steps. | ||
1238 | ** | ||
1239 | ** The same logic applies to open upvalue references, because the stack | ||
1240 | ** may be resized during a GC step. | ||
1241 | */ | ||
1242 | LJFOLD(TBAR any) | ||
1243 | LJFOLD(OBAR any any) | ||
1244 | LJFOLD(UREFO any any) | ||
1245 | LJFOLDF(barrier_tab) | ||
1246 | { | ||
1247 | TRef tr = lj_opt_cse(J); | ||
1248 | if (gcstep_barrier(J, tref_ref(tr))) /* CSE across GC step? */ | ||
1249 | return EMITFOLD; /* Raw emit. Assumes fins is left intact by CSE. */ | ||
1250 | return tr; | ||
1251 | } | ||
1252 | |||
1253 | LJFOLD(TBAR TNEW) | ||
1254 | LJFOLD(TBAR TDUP) | ||
1255 | LJFOLDF(barrier_tnew_tdup) | ||
1256 | { | ||
1257 | /* New tables are always white and never need a barrier. */ | ||
1258 | if (fins->op1 < J->chain[IR_LOOP]) /* Except across a GC step. */ | ||
1259 | return NEXTFOLD; | ||
1260 | return DROPFOLD; | ||
1261 | } | ||
1262 | |||
1263 | /* -- Stores and allocations ---------------------------------------------- */ | ||
1264 | |||
1265 | /* Stores and allocations cannot be folded or passed on to CSE in general. | ||
1266 | ** But some stores can be eliminated with dead-store elimination (DSE). | ||
1267 | ** | ||
1268 | ** Caveat: *all* stores and allocs must be listed here or they end up at CSE! | ||
1269 | */ | ||
1270 | |||
1271 | LJFOLD(ASTORE any any) | ||
1272 | LJFOLD(HSTORE any any) | ||
1273 | LJFOLDX(lj_opt_dse_ahstore) | ||
1274 | |||
1275 | LJFOLD(USTORE any any) | ||
1276 | LJFOLDX(lj_opt_dse_ustore) | ||
1277 | |||
1278 | LJFOLD(FSTORE any any) | ||
1279 | LJFOLDX(lj_opt_dse_fstore) | ||
1280 | |||
1281 | LJFOLD(NEWREF any any) /* Treated like a store. */ | ||
1282 | LJFOLD(TNEW any any) | ||
1283 | LJFOLD(TDUP any) | ||
1284 | LJFOLDF(store_raw) | ||
1285 | { | ||
1286 | return EMITFOLD; | ||
1287 | } | ||
1288 | |||
1289 | /* ------------------------------------------------------------------------ */ | ||
1290 | |||
1291 | /* Every entry in the generated hash table is a 32 bit pattern: | ||
1292 | ** | ||
1293 | ** xxxxxxxx iiiiiiii llllllll rrrrrrrr | ||
1294 | ** | ||
1295 | ** xxxxxxxx = 8 bit index into fold function table | ||
1296 | ** iiiiiiii = 8 bit folded instruction opcode | ||
1297 | ** llllllll = 8 bit left instruction opcode | ||
1298 | ** rrrrrrrr = 8 bit right instruction opcode or 8 bits from literal field | ||
1299 | */ | ||
1300 | |||
1301 | #include "lj_folddef.h" | ||
1302 | |||
1303 | /* ------------------------------------------------------------------------ */ | ||
1304 | |||
1305 | /* Fold IR instruction. */ | ||
1306 | TRef LJ_FASTCALL lj_opt_fold(jit_State *J) | ||
1307 | { | ||
1308 | uint32_t key, any; | ||
1309 | IRRef ref; | ||
1310 | |||
1311 | if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) { | ||
1312 | lua_assert(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) | | ||
1313 | JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT); | ||
1314 | /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */ | ||
1315 | if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N) | ||
1316 | return lj_opt_cse(J); | ||
1317 | |||
1318 | /* Forwarding or CSE disabled? Emit raw IR for loads, except for SLOAD. */ | ||
1319 | if ((J->flags & (JIT_F_OPT_FWD|JIT_F_OPT_CSE)) != | ||
1320 | (JIT_F_OPT_FWD|JIT_F_OPT_CSE) && | ||
1321 | irm_kind(lj_ir_mode[fins->o]) == IRM_L && fins->o != IR_SLOAD) | ||
1322 | return lj_ir_emit(J); | ||
1323 | |||
1324 | /* DSE disabled? Emit raw IR for stores. */ | ||
1325 | if (!(J->flags & JIT_F_OPT_DSE) && irm_kind(lj_ir_mode[fins->o]) == IRM_S) | ||
1326 | return lj_ir_emit(J); | ||
1327 | } | ||
1328 | |||
1329 | /* Fold engine start/retry point. */ | ||
1330 | retry: | ||
1331 | /* Construct key from opcode and operand opcodes (unless literal/none). */ | ||
1332 | key = ((uint32_t)fins->o << 16); | ||
1333 | if (fins->op1 >= J->cur.nk) { | ||
1334 | key += (uint32_t)IR(fins->op1)->o << 8; | ||
1335 | *fleft = *IR(fins->op1); | ||
1336 | } | ||
1337 | if (fins->op2 >= J->cur.nk) { | ||
1338 | key += (uint32_t)IR(fins->op2)->o; | ||
1339 | *fright = *IR(fins->op2); | ||
1340 | } else { | ||
1341 | key += (fins->op2 & 0xffu); /* For IRFPM_* and IRFL_*. */ | ||
1342 | } | ||
1343 | |||
1344 | /* Check for a match in order from most specific to least specific. */ | ||
1345 | any = 0; | ||
1346 | for (;;) { | ||
1347 | uint32_t k = key | any; | ||
1348 | uint32_t h = fold_hashkey(k); | ||
1349 | uint32_t fh = fold_hash[h]; /* Lookup key in semi-perfect hash table. */ | ||
1350 | if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) { | ||
1351 | ref = (IRRef)tref_ref(fold_func[fh >> 24](J)); | ||
1352 | if (ref != NEXTFOLD) | ||
1353 | break; | ||
1354 | } | ||
1355 | if (any == 0xffff) /* Exhausted folding. Pass on to CSE. */ | ||
1356 | return lj_opt_cse(J); | ||
1357 | any = (any | (any >> 8)) ^ 0xff00; | ||
1358 | } | ||
1359 | |||
1360 | /* Return value processing, ordered by frequency. */ | ||
1361 | if (LJ_LIKELY(ref >= MAX_FOLD)) | ||
1362 | return TREF(ref, irt_t(IR(ref)->t)); | ||
1363 | if (ref == RETRYFOLD) | ||
1364 | goto retry; | ||
1365 | if (ref == KINTFOLD) | ||
1366 | return lj_ir_kint(J, fins->i); | ||
1367 | if (ref == FAILFOLD) | ||
1368 | lj_trace_err(J, LJ_TRERR_GFAIL); | ||
1369 | lua_assert(ref == DROPFOLD); | ||
1370 | return REF_DROP; | ||
1371 | } | ||
1372 | |||
1373 | /* -- Common-Subexpression Elimination ------------------------------------ */ | ||
1374 | |||
1375 | /* CSE an IR instruction. This is very fast due to the skip-list chains. */ | ||
1376 | TRef LJ_FASTCALL lj_opt_cse(jit_State *J) | ||
1377 | { | ||
1378 | /* Avoid narrow to wide store-to-load forwarding stall */ | ||
1379 | IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16); | ||
1380 | IROp op = fins->o; | ||
1381 | if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { | ||
1382 | /* Limited search for same operands in per-opcode chain. */ | ||
1383 | IRRef ref = J->chain[op]; | ||
1384 | IRRef lim = fins->op1; | ||
1385 | if (fins->op2 > lim) lim = fins->op2; /* Relies on lit < REF_BIAS. */ | ||
1386 | while (ref > lim) { | ||
1387 | if (IR(ref)->op12 == op12) | ||
1388 | return TREF(ref, irt_t(IR(ref)->t)); /* Common subexpression found. */ | ||
1389 | ref = IR(ref)->prev; | ||
1390 | } | ||
1391 | } | ||
1392 | /* Otherwise emit IR (inlined for speed). */ | ||
1393 | { | ||
1394 | IRRef ref = lj_ir_nextins(J); | ||
1395 | IRIns *ir = IR(ref); | ||
1396 | ir->prev = J->chain[op]; | ||
1397 | ir->op12 = op12; | ||
1398 | J->chain[op] = (IRRef1)ref; | ||
1399 | ir->o = fins->o; | ||
1400 | J->guardemit.irt |= fins->t.irt; | ||
1401 | return TREF(ref, irt_t((ir->t = fins->t))); | ||
1402 | } | ||
1403 | } | ||
1404 | |||
1405 | /* ------------------------------------------------------------------------ */ | ||
1406 | |||
1407 | #undef IR | ||
1408 | #undef fins | ||
1409 | #undef fleft | ||
1410 | #undef fright | ||
1411 | #undef knumleft | ||
1412 | #undef knumright | ||
1413 | #undef emitir | ||
1414 | |||
1415 | #endif | ||
diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c new file mode 100644 index 00000000..adc0c476 --- /dev/null +++ b/src/lj_opt_loop.c | |||
@@ -0,0 +1,358 @@ | |||
1 | /* | ||
2 | ** LOOP: Loop Optimizations. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #define lj_opt_loop_c | ||
7 | #define LUA_CORE | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | |||
11 | #if LJ_HASJIT | ||
12 | |||
13 | #include "lj_gc.h" | ||
14 | #include "lj_err.h" | ||
15 | #include "lj_str.h" | ||
16 | #include "lj_ir.h" | ||
17 | #include "lj_jit.h" | ||
18 | #include "lj_iropt.h" | ||
19 | #include "lj_trace.h" | ||
20 | #include "lj_snap.h" | ||
21 | #include "lj_vm.h" | ||
22 | |||
23 | /* Loop optimization: | ||
24 | ** | ||
25 | ** Traditional Loop-Invariant Code Motion (LICM) splits the instructions | ||
26 | ** of a loop into invariant and variant instructions. The invariant | ||
27 | ** instructions are hoisted out of the loop and only the variant | ||
28 | ** instructions remain inside the loop body. | ||
29 | ** | ||
30 | ** Unfortunately LICM is mostly useless for compiling dynamic languages. | ||
31 | ** The IR has many guards and most of the subsequent instructions are | ||
32 | ** control-dependent on them. The first non-hoistable guard would | ||
33 | ** effectively prevent hoisting of all subsequent instructions. | ||
34 | ** | ||
35 | ** That's why we use a special form of unrolling using copy-substitution, | ||
36 | ** combined with redundancy elimination: | ||
37 | ** | ||
38 | ** The recorded instruction stream is re-emitted to the compiler pipeline | ||
39 | ** with substituted operands. The substitution table is filled with the | ||
40 | ** refs returned by re-emitting each instruction. This can be done | ||
41 | ** on-the-fly, because the IR is in strict SSA form, where every ref is | ||
42 | ** defined before its use. | ||
43 | ** | ||
44 | ** This aproach generates two code sections, separated by the LOOP | ||
45 | ** instruction: | ||
46 | ** | ||
47 | ** 1. The recorded instructions form a kind of pre-roll for the loop. It | ||
48 | ** contains a mix of invariant and variant instructions and performs | ||
49 | ** exactly one loop iteration (but not necessarily the 1st iteration). | ||
50 | ** | ||
51 | ** 2. The loop body contains only the variant instructions and performs | ||
52 | ** all remaining loop iterations. | ||
53 | ** | ||
54 | ** On first sight that looks like a waste of space, because the variant | ||
55 | ** instructions are present twice. But the key insight is that the | ||
56 | ** pre-roll honors the control-dependencies for *both* the pre-roll itself | ||
57 | ** *and* the loop body! | ||
58 | ** | ||
59 | ** It also means one doesn't have to explicitly model control-dependencies | ||
60 | ** (which, BTW, wouldn't help LICM much). And it's much easier to | ||
61 | ** integrate sparse snapshotting with this approach. | ||
62 | ** | ||
63 | ** One of the nicest aspects of this approach is that all of the | ||
64 | ** optimizations of the compiler pipeline (FOLD, CSE, FWD, etc.) can be | ||
65 | ** reused with only minor restrictions (e.g. one should not fold | ||
66 | ** instructions across loop-carried dependencies). | ||
67 | ** | ||
68 | ** But in general all optimizations can be applied which only need to look | ||
69 | ** backwards into the generated instruction stream. At any point in time | ||
70 | ** during the copy-substitution process this contains both a static loop | ||
71 | ** iteration (the pre-roll) and a dynamic one (from the to-be-copied | ||
72 | ** instruction up to the end of the partial loop body). | ||
73 | ** | ||
74 | ** Since control-dependencies are implicitly kept, CSE also applies to all | ||
75 | ** kinds of guards. The major advantage is that all invariant guards can | ||
76 | ** be hoisted, too. | ||
77 | ** | ||
78 | ** Load/store forwarding works across loop iterations, too. This is | ||
79 | ** important if loop-carried dependencies are kept in upvalues or tables. | ||
80 | ** E.g. 'self.idx = self.idx + 1' deep down in some OO-style method may | ||
81 | ** become a forwarded loop-recurrence after inlining. | ||
82 | ** | ||
83 | ** Since the IR is in SSA form, loop-carried dependencies have to be | ||
84 | ** modeled with PHI instructions. The potential candidates for PHIs are | ||
85 | ** collected on-the-fly during copy-substitution. After eliminating the | ||
86 | ** redundant ones, PHI instructions are emitted *below* the loop body. | ||
87 | ** | ||
88 | ** Note that this departure from traditional SSA form doesn't change the | ||
89 | ** semantics of the PHI instructions themselves. But it greatly simplifies | ||
90 | ** on-the-fly generation of the IR and the machine code. | ||
91 | */ | ||
92 | |||
93 | /* Some local macros to save typing. Undef'd at the end. */ | ||
94 | #define IR(ref) (&J->cur.ir[(ref)]) | ||
95 | |||
96 | /* Pass IR on to next optimization in chain (FOLD). */ | ||
97 | #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) | ||
98 | |||
99 | /* Emit raw IR without passing through optimizations. */ | ||
100 | #define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J)) | ||
101 | |||
102 | /* -- PHI elimination ----------------------------------------------------- */ | ||
103 | |||
104 | /* Emit or eliminate collected PHIs. */ | ||
105 | static void loop_emit_phi(jit_State *J, IRRef1 *subst, IRRef1 *phi, IRRef nphi) | ||
106 | { | ||
107 | int pass2 = 0; | ||
108 | IRRef i, nslots; | ||
109 | IRRef invar = J->chain[IR_LOOP]; | ||
110 | /* Pass #1: mark redundant and potentially redundant PHIs. */ | ||
111 | for (i = 0; i < nphi; i++) { | ||
112 | IRRef lref = phi[i]; | ||
113 | IRRef rref = subst[lref]; | ||
114 | if (lref == rref || rref == REF_DROP) { /* Invariants are redundant. */ | ||
115 | irt_setmark(IR(lref)->t); | ||
116 | } else if (!(IR(rref)->op1 == lref || IR(rref)->op2 == lref)) { | ||
117 | /* Quick check for simple recurrences failed, need pass2. */ | ||
118 | irt_setmark(IR(lref)->t); | ||
119 | pass2 = 1; | ||
120 | } | ||
121 | } | ||
122 | /* Pass #2: traverse variant part and clear marks of non-redundant PHIs. */ | ||
123 | if (pass2) { | ||
124 | for (i = J->cur.nins-1; i > invar; i--) { | ||
125 | IRIns *ir = IR(i); | ||
126 | if (!irref_isk(ir->op1)) irt_clearmark(IR(ir->op1)->t); | ||
127 | if (!irref_isk(ir->op2)) irt_clearmark(IR(ir->op2)->t); | ||
128 | } | ||
129 | } | ||
130 | /* Pass #3: add PHIs for variant slots without a corresponding SLOAD. */ | ||
131 | nslots = J->baseslot+J->maxslot; | ||
132 | for (i = 1; i < nslots; i++) { | ||
133 | IRRef ref = tref_ref(J->slot[i]); | ||
134 | if (!irref_isk(ref) && ref != subst[ref]) { | ||
135 | IRIns *ir = IR(ref); | ||
136 | irt_clearmark(ir->t); /* Unmark potential uses, too. */ | ||
137 | if (!irt_isphi(ir->t) && !irt_ispri(ir->t)) { | ||
138 | irt_setphi(ir->t); | ||
139 | if (nphi >= LJ_MAX_PHI) | ||
140 | lj_trace_err(J, LJ_TRERR_PHIOV); | ||
141 | phi[nphi++] = (IRRef1)ref; | ||
142 | } | ||
143 | } | ||
144 | } | ||
145 | /* Pass #4: emit PHI instructions or eliminate PHIs. */ | ||
146 | for (i = 0; i < nphi; i++) { | ||
147 | IRRef lref = phi[i]; | ||
148 | IRIns *ir = IR(lref); | ||
149 | if (!irt_ismarked(ir->t)) { /* Emit PHI if not marked. */ | ||
150 | IRRef rref = subst[lref]; | ||
151 | if (rref > invar) | ||
152 | irt_setphi(IR(rref)->t); | ||
153 | emitir_raw(IRT(IR_PHI, irt_type(ir->t)), lref, rref); | ||
154 | } else { /* Otherwise eliminate PHI. */ | ||
155 | irt_clearmark(ir->t); | ||
156 | irt_clearphi(ir->t); | ||
157 | } | ||
158 | } | ||
159 | } | ||
160 | |||
161 | /* -- Loop unrolling using copy-substitution ------------------------------ */ | ||
162 | |||
163 | /* Unroll loop. */ | ||
164 | static void loop_unroll(jit_State *J) | ||
165 | { | ||
166 | IRRef1 phi[LJ_MAX_PHI]; | ||
167 | uint32_t nphi = 0; | ||
168 | IRRef1 *subst; | ||
169 | SnapShot *osnap, *snap; | ||
170 | IRRef2 *loopmap; | ||
171 | BCReg loopslots; | ||
172 | MSize nsnap, nsnapmap; | ||
173 | IRRef ins, invar, osnapref; | ||
174 | |||
175 | /* Use temp buffer for substitution table. | ||
176 | ** Only non-constant refs in [REF_BIAS,invar) are valid indexes. | ||
177 | ** Note: don't call into the VM or run the GC or the buffer may be gone. | ||
178 | */ | ||
179 | invar = J->cur.nins; | ||
180 | subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, | ||
181 | (invar-REF_BIAS)*sizeof(IRRef1)) - REF_BIAS; | ||
182 | subst[REF_BASE] = REF_BASE; | ||
183 | |||
184 | /* LOOP separates the pre-roll from the loop body. */ | ||
185 | emitir_raw(IRTG(IR_LOOP, IRT_NIL), 0, 0); | ||
186 | |||
187 | /* Ensure size for copy-substituted snapshots (minus #0 and loop snapshot). */ | ||
188 | nsnap = J->cur.nsnap; | ||
189 | if (LJ_UNLIKELY(2*nsnap-2 > J->sizesnap)) { | ||
190 | MSize maxsnap = (MSize)J->param[JIT_P_maxsnap]; | ||
191 | if (2*nsnap-2 > maxsnap) | ||
192 | lj_trace_err(J, LJ_TRERR_SNAPOV); | ||
193 | lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot); | ||
194 | J->cur.snap = J->snapbuf; | ||
195 | } | ||
196 | nsnapmap = J->cur.nsnapmap; /* Use temp. copy to avoid undo. */ | ||
197 | if (LJ_UNLIKELY(nsnapmap*2 > J->sizesnapmap)) { | ||
198 | J->snapmapbuf = (IRRef2 *)lj_mem_realloc(J->L, J->snapmapbuf, | ||
199 | J->sizesnapmap*sizeof(IRRef2), | ||
200 | 2*J->sizesnapmap*sizeof(IRRef2)); | ||
201 | J->cur.snapmap = J->snapmapbuf; | ||
202 | J->sizesnapmap *= 2; | ||
203 | } | ||
204 | |||
205 | /* The loop snapshot is used for fallback substitutions. */ | ||
206 | snap = &J->cur.snap[nsnap-1]; | ||
207 | loopmap = &J->cur.snapmap[snap->mapofs]; | ||
208 | loopslots = snap->nslots; | ||
209 | /* The PC of snapshot #0 and the loop snapshot must match. */ | ||
210 | lua_assert(loopmap[loopslots] == J->cur.snapmap[J->cur.snap[0].nslots]); | ||
211 | |||
212 | /* Start substitution with snapshot #1 (#0 is empty for root traces). */ | ||
213 | osnap = &J->cur.snap[1]; | ||
214 | osnapref = osnap->ref; | ||
215 | |||
216 | /* Copy and substitute all recorded instructions and snapshots. */ | ||
217 | for (ins = REF_FIRST; ins < invar; ins++) { | ||
218 | IRIns *ir; | ||
219 | IRRef op1, op2; | ||
220 | |||
221 | /* Copy-substitute snapshot. */ | ||
222 | if (ins >= osnapref) { | ||
223 | IRRef2 *nmap, *omap = &J->cur.snapmap[osnap->mapofs]; | ||
224 | BCReg s, nslots; | ||
225 | uint32_t nmapofs, nframelinks; | ||
226 | if (irt_isguard(J->guardemit)) { /* Guard inbetween? */ | ||
227 | nmapofs = nsnapmap; | ||
228 | snap++; /* Add new snapshot. */ | ||
229 | } else { | ||
230 | nmapofs = snap->mapofs; /* Overwrite previous snapshot. */ | ||
231 | } | ||
232 | J->guardemit.irt = 0; | ||
233 | nslots = osnap->nslots; | ||
234 | nframelinks = osnap->nframelinks; | ||
235 | snap->mapofs = (uint16_t)nmapofs; | ||
236 | snap->ref = (IRRef1)J->cur.nins; | ||
237 | snap->nslots = (uint8_t)nslots; | ||
238 | snap->nframelinks = (uint8_t)nframelinks; | ||
239 | snap->count = 0; | ||
240 | osnap++; | ||
241 | osnapref = osnap->ref; | ||
242 | nsnapmap = nmapofs + nslots + nframelinks; | ||
243 | nmap = &J->cur.snapmap[nmapofs]; | ||
244 | /* Substitute snapshot slots. */ | ||
245 | for (s = 0; s < nslots; s++) { | ||
246 | IRRef ref = snap_ref(omap[s]); | ||
247 | if (ref) { | ||
248 | if (!irref_isk(ref)) | ||
249 | ref = subst[ref]; | ||
250 | } else if (s < loopslots) { | ||
251 | ref = loopmap[s]; | ||
252 | } | ||
253 | nmap[s] = ref; | ||
254 | } | ||
255 | /* Copy frame links. */ | ||
256 | nmap += nslots; | ||
257 | omap += nslots; | ||
258 | for (s = 0; s < nframelinks; s++) | ||
259 | nmap[s] = omap[s]; | ||
260 | } | ||
261 | |||
262 | /* Substitute instruction operands. */ | ||
263 | ir = IR(ins); | ||
264 | op1 = ir->op1; | ||
265 | if (!irref_isk(op1)) op1 = subst[op1]; | ||
266 | op2 = ir->op2; | ||
267 | if (!irref_isk(op2)) op2 = subst[op2]; | ||
268 | if (irm_kind(lj_ir_mode[ir->o]) == IRM_N && | ||
269 | op1 == ir->op1 && op2 == ir->op2) { /* Regular invariant ins? */ | ||
270 | subst[ins] = (IRRef1)ins; /* Shortcut. */ | ||
271 | } else { | ||
272 | /* Re-emit substituted instruction to the FOLD/CSE/etc. pipeline. */ | ||
273 | IRType1 t = ir->t; /* Get this first, since emitir may invalidate ir. */ | ||
274 | IRRef ref = tref_ref(emitir(ir->ot & ~IRT_ISPHI, op1, op2)); | ||
275 | subst[ins] = (IRRef1)ref; | ||
276 | if (ref != ins && ref < invar) { /* Loop-carried dependency? */ | ||
277 | IRIns *irr = IR(ref); | ||
278 | /* Potential PHI? */ | ||
279 | if (!irref_isk(ref) && !irt_isphi(irr->t) && !irt_ispri(irr->t)) { | ||
280 | irt_setphi(irr->t); | ||
281 | if (nphi >= LJ_MAX_PHI) | ||
282 | lj_trace_err(J, LJ_TRERR_PHIOV); | ||
283 | phi[nphi++] = (IRRef1)ref; | ||
284 | } | ||
285 | /* Check all loop-carried dependencies for type instability. */ | ||
286 | if (!irt_sametype(t, irr->t)) { | ||
287 | if (irt_isnum(t) && irt_isinteger(irr->t)) /* Fix int->num case. */ | ||
288 | subst[ins] = tref_ref(emitir(IRTN(IR_TONUM), ref, 0)); | ||
289 | else | ||
290 | lj_trace_err(J, LJ_TRERR_TYPEINS); | ||
291 | } | ||
292 | } | ||
293 | } | ||
294 | } | ||
295 | if (irt_isguard(J->guardemit)) { /* Guard inbetween? */ | ||
296 | J->cur.nsnapmap = (uint16_t)nsnapmap; | ||
297 | snap++; | ||
298 | } else { | ||
299 | J->cur.nsnapmap = (uint16_t)snap->mapofs; /* Last snapshot is redundant. */ | ||
300 | } | ||
301 | J->cur.nsnap = (uint16_t)(snap - J->cur.snap); | ||
302 | lua_assert(J->cur.nsnapmap <= J->sizesnapmap); | ||
303 | |||
304 | loop_emit_phi(J, subst, phi, nphi); | ||
305 | } | ||
306 | |||
307 | /* Undo any partial changes made by the loop optimization. */ | ||
308 | static void loop_undo(jit_State *J, IRRef ins) | ||
309 | { | ||
310 | lj_ir_rollback(J, ins); | ||
311 | for (ins--; ins >= REF_FIRST; ins--) { /* Remove flags. */ | ||
312 | IRIns *ir = IR(ins); | ||
313 | irt_clearphi(ir->t); | ||
314 | irt_clearmark(ir->t); | ||
315 | } | ||
316 | } | ||
317 | |||
318 | /* Protected callback for loop optimization. */ | ||
319 | static TValue *cploop_opt(lua_State *L, lua_CFunction dummy, void *ud) | ||
320 | { | ||
321 | UNUSED(L); UNUSED(dummy); | ||
322 | loop_unroll((jit_State *)ud); | ||
323 | return NULL; | ||
324 | } | ||
325 | |||
326 | /* Loop optimization. */ | ||
327 | int lj_opt_loop(jit_State *J) | ||
328 | { | ||
329 | IRRef nins = J->cur.nins; | ||
330 | int errcode = lj_vm_cpcall(J->L, cploop_opt, NULL, J); | ||
331 | if (LJ_UNLIKELY(errcode)) { | ||
332 | lua_State *L = J->L; | ||
333 | if (errcode == LUA_ERRRUN && tvisnum(L->top-1)) { /* Trace error? */ | ||
334 | int32_t e = lj_num2int(numV(L->top-1)); | ||
335 | switch ((TraceError)e) { | ||
336 | case LJ_TRERR_TYPEINS: /* Type instability. */ | ||
337 | case LJ_TRERR_GFAIL: /* Guard would always fail. */ | ||
338 | /* Unrolling via recording fixes many cases, e.g. a flipped boolean. */ | ||
339 | if (--J->instunroll < 0) /* But do not unroll forever. */ | ||
340 | break; | ||
341 | L->top--; /* Remove error object. */ | ||
342 | J->guardemit.irt = 0; | ||
343 | loop_undo(J, nins); | ||
344 | return 1; /* Loop optimization failed, continue recording. */ | ||
345 | default: | ||
346 | break; | ||
347 | } | ||
348 | } | ||
349 | lj_err_throw(L, errcode); /* Propagate all other errors. */ | ||
350 | } | ||
351 | return 0; /* Loop optimization is ok. */ | ||
352 | } | ||
353 | |||
354 | #undef IR | ||
355 | #undef emitir | ||
356 | #undef emitir_raw | ||
357 | |||
358 | #endif | ||
diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c new file mode 100644 index 00000000..77a9c0e7 --- /dev/null +++ b/src/lj_opt_mem.c | |||
@@ -0,0 +1,550 @@ | |||
1 | /* | ||
2 | ** Memory access optimizations. | ||
3 | ** AA: Alias Analysis using high-level semantic disambiguation. | ||
4 | ** FWD: Load Forwarding (L2L) + Store Forwarding (S2L). | ||
5 | ** DSE: Dead-Store Elimination. | ||
6 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
7 | */ | ||
8 | |||
9 | #define lj_opt_mem_c | ||
10 | #define LUA_CORE | ||
11 | |||
12 | #include "lj_obj.h" | ||
13 | |||
14 | #if LJ_HASJIT | ||
15 | |||
16 | #include "lj_tab.h" | ||
17 | #include "lj_ir.h" | ||
18 | #include "lj_jit.h" | ||
19 | #include "lj_iropt.h" | ||
20 | |||
21 | /* Some local macros to save typing. Undef'd at the end. */ | ||
22 | #define IR(ref) (&J->cur.ir[(ref)]) | ||
23 | #define fins (&J->fold.ins) | ||
24 | |||
25 | /* | ||
26 | ** Caveat #1: return value is not always a TRef -- only use with tref_ref(). | ||
27 | ** Caveat #2: FWD relies on active CSE for xREF operands -- see lj_opt_fold(). | ||
28 | */ | ||
29 | |||
30 | /* Return values from alias analysis. */ | ||
31 | typedef enum { | ||
32 | ALIAS_NO, /* The two refs CANNOT alias (exact). */ | ||
33 | ALIAS_MAY, /* The two refs MAY alias (inexact). */ | ||
34 | ALIAS_MUST /* The two refs MUST alias (exact). */ | ||
35 | } AliasRet; | ||
36 | |||
37 | /* -- ALOAD/HLOAD forwarding and ASTORE/HSTORE elimination ---------------- */ | ||
38 | |||
39 | /* Alias analysis for array and hash access using key-based disambiguation. */ | ||
40 | static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb) | ||
41 | { | ||
42 | IRRef ka = refa->op2; | ||
43 | IRRef kb = refb->op2; | ||
44 | IRIns *keya, *keyb; | ||
45 | if (refa == refb) | ||
46 | return ALIAS_MUST; /* Shortcut for same refs. */ | ||
47 | keya = IR(ka); | ||
48 | if (keya->o == IR_KSLOT) { ka = keya->op1; keya = IR(ka); } | ||
49 | keyb = IR(kb); | ||
50 | if (keyb->o == IR_KSLOT) { kb = keyb->op1; keyb = IR(kb); } | ||
51 | if (ka == kb) { | ||
52 | /* Same key. Check for same table with different ref (NEWREF vs. HREF). */ | ||
53 | IRIns *ta = refa; | ||
54 | IRIns *tb = refb; | ||
55 | if (ta->o == IR_HREFK || ta->o == IR_AREF) ta = IR(ta->op1); | ||
56 | if (tb->o == IR_HREFK || tb->o == IR_AREF) tb = IR(tb->op1); | ||
57 | if (ta->op1 == tb->op1) | ||
58 | return ALIAS_MUST; /* Same key, same table. */ | ||
59 | else | ||
60 | return ALIAS_MAY; /* Same key, possibly different table. */ | ||
61 | } | ||
62 | if (irref_isk(ka) && irref_isk(kb)) | ||
63 | return ALIAS_NO; /* Different constant keys. */ | ||
64 | if (refa->o == IR_AREF) { | ||
65 | /* Disambiguate array references based on index arithmetic. */ | ||
66 | lua_assert(refb->o == IR_AREF); | ||
67 | if (refa->op1 == refb->op1) { | ||
68 | /* Same table, different non-const array keys. */ | ||
69 | int32_t ofsa = 0, ofsb = 0; | ||
70 | IRRef basea = ka, baseb = kb; | ||
71 | /* Gather base and offset from t[base] or t[base+-ofs]. */ | ||
72 | if (keya->o == IR_ADD && irref_isk(keya->op2)) { | ||
73 | basea = keya->op1; | ||
74 | ofsa = IR(keya->op2)->i; | ||
75 | if (basea == kb && ofsa != 0) | ||
76 | return ALIAS_NO; /* t[base+-ofs] vs. t[base]. */ | ||
77 | } | ||
78 | if (keyb->o == IR_ADD && irref_isk(keyb->op2)) { | ||
79 | baseb = keyb->op1; | ||
80 | ofsb = IR(keyb->op2)->i; | ||
81 | if (ka == baseb && ofsb != 0) | ||
82 | return ALIAS_NO; /* t[base] vs. t[base+-ofs]. */ | ||
83 | } | ||
84 | if (basea == baseb && ofsa != ofsb) | ||
85 | return ALIAS_NO; /* t[base+-o1] vs. t[base+-o2] and o1 != o2. */ | ||
86 | } | ||
87 | } else { | ||
88 | /* Disambiguate hash references based on the type of their keys. */ | ||
89 | lua_assert((refa->o==IR_HREF || refa->o==IR_HREFK || refa->o==IR_NEWREF) && | ||
90 | (refb->o==IR_HREF || refb->o==IR_HREFK || refb->o==IR_NEWREF)); | ||
91 | if (!irt_sametype(keya->t, keyb->t)) | ||
92 | return ALIAS_NO; /* Different key types. */ | ||
93 | } | ||
94 | return ALIAS_MAY; /* Anything else: we just don't know. */ | ||
95 | } | ||
96 | |||
97 | /* Array and hash load forwarding. */ | ||
98 | static TRef fwd_ahload(jit_State *J, IRRef xref) | ||
99 | { | ||
100 | IRIns *xr = IR(xref); | ||
101 | IRRef lim = xref; /* Search limit. */ | ||
102 | IRRef ref; | ||
103 | |||
104 | /* Search for conflicting stores. */ | ||
105 | ref = J->chain[fins->o+IRDELTA_L2S]; | ||
106 | while (ref > xref) { | ||
107 | IRIns *store = IR(ref); | ||
108 | switch (aa_ahref(J, xr, IR(store->op1))) { | ||
109 | case ALIAS_NO: break; /* Continue searching. */ | ||
110 | case ALIAS_MAY: lim = ref; goto conflict; /* Limit search for load. */ | ||
111 | case ALIAS_MUST: return store->op2; /* Store forwarding. */ | ||
112 | } | ||
113 | ref = store->prev; | ||
114 | } | ||
115 | |||
116 | /* No conflicting store (yet): const-fold loads from allocations. */ | ||
117 | { | ||
118 | IRIns *ir = (xr->o == IR_HREFK || xr->o == IR_AREF) ? IR(xr->op1) : xr; | ||
119 | IRRef tab = ir->op1; | ||
120 | ir = IR(tab); | ||
121 | if (ir->o == IR_TNEW || (ir->o == IR_TDUP && irref_isk(xr->op2))) { | ||
122 | /* A NEWREF with a number key may end up pointing to the array part. | ||
123 | ** But it's referenced from HSTORE and not found in the ASTORE chain. | ||
124 | ** For now simply consider this a conflict without forwarding anything. | ||
125 | */ | ||
126 | if (xr->o == IR_AREF) { | ||
127 | IRRef ref2 = J->chain[IR_NEWREF]; | ||
128 | while (ref2 > tab) { | ||
129 | IRIns *newref = IR(ref2); | ||
130 | if (irt_isnum(IR(newref->op2)->t)) | ||
131 | goto conflict; | ||
132 | ref2 = newref->prev; | ||
133 | } | ||
134 | } | ||
135 | /* NEWREF inhibits CSE for HREF, and dependent FLOADs from HREFK/AREF. | ||
136 | ** But the above search for conflicting stores was limited by xref. | ||
137 | ** So continue searching, limited by the TNEW/TDUP. Store forwarding | ||
138 | ** is ok, too. A conflict does NOT limit the search for a matching load. | ||
139 | */ | ||
140 | while (ref > tab) { | ||
141 | IRIns *store = IR(ref); | ||
142 | switch (aa_ahref(J, xr, IR(store->op1))) { | ||
143 | case ALIAS_NO: break; /* Continue searching. */ | ||
144 | case ALIAS_MAY: goto conflict; /* Conflicting store. */ | ||
145 | case ALIAS_MUST: return store->op2; /* Store forwarding. */ | ||
146 | } | ||
147 | ref = store->prev; | ||
148 | } | ||
149 | lua_assert(ir->o != IR_TNEW || irt_isnil(fins->t)); | ||
150 | if (irt_ispri(fins->t)) { | ||
151 | return TREF_PRI(irt_type(fins->t)); | ||
152 | } else if (irt_isnum(fins->t) || irt_isstr(fins->t)) { | ||
153 | TValue keyv; | ||
154 | cTValue *tv; | ||
155 | IRIns *key = IR(xr->op2); | ||
156 | if (key->o == IR_KSLOT) key = IR(key->op1); | ||
157 | lj_ir_kvalue(J->L, &keyv, key); | ||
158 | tv = lj_tab_get(J->L, ir_ktab(IR(ir->op1)), &keyv); | ||
159 | lua_assert(itype2irt(tv) == irt_type(fins->t)); | ||
160 | if (irt_isnum(fins->t)) | ||
161 | return lj_ir_knum_nn(J, tv->u64); | ||
162 | else | ||
163 | return lj_ir_kstr(J, strV(tv)); | ||
164 | } | ||
165 | /* Othwerwise: don't intern as a constant. */ | ||
166 | } | ||
167 | } | ||
168 | |||
169 | conflict: | ||
170 | /* Try to find a matching load. Below the conflicting store, if any. */ | ||
171 | ref = J->chain[fins->o]; | ||
172 | while (ref > lim) { | ||
173 | IRIns *load = IR(ref); | ||
174 | if (load->op1 == xref) | ||
175 | return ref; /* Load forwarding. */ | ||
176 | ref = load->prev; | ||
177 | } | ||
178 | return 0; /* Conflict or no match. */ | ||
179 | } | ||
180 | |||
181 | /* Reassociate ALOAD across PHIs to handle t[i-1] forwarding case. */ | ||
182 | static TRef fwd_aload_reassoc(jit_State *J) | ||
183 | { | ||
184 | IRIns *irx = IR(fins->op1); | ||
185 | IRIns *key = IR(irx->op2); | ||
186 | if (key->o == IR_ADD && irref_isk(key->op2)) { | ||
187 | IRIns *add2 = IR(key->op1); | ||
188 | if (add2->o == IR_ADD && irref_isk(add2->op2) && | ||
189 | IR(key->op2)->i == -IR(add2->op2)->i) { | ||
190 | IRRef ref = J->chain[IR_AREF]; | ||
191 | IRRef lim = add2->op1; | ||
192 | if (irx->op1 > lim) lim = irx->op1; | ||
193 | while (ref > lim) { | ||
194 | IRIns *ir = IR(ref); | ||
195 | if (ir->op1 == irx->op1 && ir->op2 == add2->op1) | ||
196 | return fwd_ahload(J, ref); | ||
197 | ref = ir->prev; | ||
198 | } | ||
199 | } | ||
200 | } | ||
201 | return 0; | ||
202 | } | ||
203 | |||
204 | /* ALOAD forwarding. */ | ||
205 | TRef LJ_FASTCALL lj_opt_fwd_aload(jit_State *J) | ||
206 | { | ||
207 | IRRef ref; | ||
208 | if ((ref = fwd_ahload(J, fins->op1)) || | ||
209 | (ref = fwd_aload_reassoc(J))) | ||
210 | return ref; | ||
211 | return EMITFOLD; | ||
212 | } | ||
213 | |||
214 | /* HLOAD forwarding. */ | ||
215 | TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J) | ||
216 | { | ||
217 | IRRef ref = fwd_ahload(J, fins->op1); | ||
218 | if (ref) | ||
219 | return ref; | ||
220 | return EMITFOLD; | ||
221 | } | ||
222 | |||
223 | /* ASTORE/HSTORE elimination. */ | ||
224 | TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J) | ||
225 | { | ||
226 | IRRef xref = fins->op1; /* xREF reference. */ | ||
227 | IRRef val = fins->op2; /* Stored value reference. */ | ||
228 | IRIns *xr = IR(xref); | ||
229 | IRRef1 *refp = &J->chain[fins->o]; | ||
230 | IRRef ref = *refp; | ||
231 | while (ref > xref) { /* Search for redundant or conflicting stores. */ | ||
232 | IRIns *store = IR(ref); | ||
233 | switch (aa_ahref(J, xr, IR(store->op1))) { | ||
234 | case ALIAS_NO: | ||
235 | break; /* Continue searching. */ | ||
236 | case ALIAS_MAY: /* Store to MAYBE the same location. */ | ||
237 | if (store->op2 != val) /* Conflict if the value is different. */ | ||
238 | goto doemit; | ||
239 | break; /* Otherwise continue searching. */ | ||
240 | case ALIAS_MUST: /* Store to the same location. */ | ||
241 | if (store->op2 == val) /* Same value: drop the new store. */ | ||
242 | return DROPFOLD; | ||
243 | /* Different value: try to eliminate the redundant store. */ | ||
244 | if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */ | ||
245 | IRIns *ir; | ||
246 | /* Check for any intervening guards (includes conflicting loads). */ | ||
247 | for (ir = IR(J->cur.nins-1); ir > store; ir--) | ||
248 | if (irt_isguard(ir->t)) | ||
249 | goto doemit; /* No elimination possible. */ | ||
250 | /* Remove redundant store from chain and replace with NOP. */ | ||
251 | *refp = store->prev; | ||
252 | store->o = IR_NOP; /* Unchained NOP -- does anybody care? */ | ||
253 | store->t.irt = IRT_NIL; | ||
254 | store->op1 = store->op2 = 0; | ||
255 | store->prev = 0; | ||
256 | /* Now emit the new store instead. */ | ||
257 | } | ||
258 | goto doemit; | ||
259 | } | ||
260 | ref = *(refp = &store->prev); | ||
261 | } | ||
262 | doemit: | ||
263 | return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ | ||
264 | } | ||
265 | |||
266 | /* -- ULOAD forwarding ---------------------------------------------------- */ | ||
267 | |||
268 | /* The current alias analysis for upvalues is very simplistic. It only | ||
269 | ** disambiguates between the unique upvalues of the same function. | ||
270 | ** This is good enough for now, since most upvalues are read-only. | ||
271 | ** | ||
272 | ** A more precise analysis would be feasible with the help of the parser: | ||
273 | ** generate a unique key for every upvalue, even across all prototypes. | ||
274 | ** Lacking a realistic use-case, it's unclear whether this is beneficial. | ||
275 | */ | ||
276 | static AliasRet aa_uref(IRIns *refa, IRIns *refb) | ||
277 | { | ||
278 | if (refa->o != refb->o) | ||
279 | return ALIAS_NO; /* Different UREFx type. */ | ||
280 | if (refa->op1 != refb->op1) | ||
281 | return ALIAS_MAY; /* Different function. */ | ||
282 | else if (refa->op2 == refb->op2) | ||
283 | return ALIAS_MUST; /* Same function, same upvalue idx. */ | ||
284 | else | ||
285 | return ALIAS_NO; /* Same function, different upvalue idx. */ | ||
286 | } | ||
287 | |||
288 | /* ULOAD forwarding. */ | ||
289 | TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J) | ||
290 | { | ||
291 | IRRef uref = fins->op1; | ||
292 | IRRef lim = uref; /* Search limit. */ | ||
293 | IRIns *xr = IR(uref); | ||
294 | IRRef ref; | ||
295 | |||
296 | /* Search for conflicting stores. */ | ||
297 | ref = J->chain[IR_USTORE]; | ||
298 | while (ref > uref) { | ||
299 | IRIns *store = IR(ref); | ||
300 | switch (aa_uref(xr, IR(store->op1))) { | ||
301 | case ALIAS_NO: break; /* Continue searching. */ | ||
302 | case ALIAS_MAY: lim = ref; goto conflict; /* Limit search for load. */ | ||
303 | case ALIAS_MUST: return store->op2; /* Store forwarding. */ | ||
304 | } | ||
305 | ref = store->prev; | ||
306 | } | ||
307 | |||
308 | conflict: | ||
309 | /* Try to find a matching load. Below the conflicting store, if any. */ | ||
310 | ref = J->chain[IR_ULOAD]; | ||
311 | while (ref > lim) { | ||
312 | IRIns *load = IR(ref); | ||
313 | if (load->op1 == uref) | ||
314 | return ref; /* Load forwarding. */ | ||
315 | ref = load->prev; | ||
316 | } | ||
317 | return EMITFOLD; /* Conflict or no match. */ | ||
318 | } | ||
319 | |||
320 | /* USTORE elimination. */ | ||
321 | TRef LJ_FASTCALL lj_opt_dse_ustore(jit_State *J) | ||
322 | { | ||
323 | IRRef xref = fins->op1; /* xREF reference. */ | ||
324 | IRRef val = fins->op2; /* Stored value reference. */ | ||
325 | IRIns *xr = IR(xref); | ||
326 | IRRef1 *refp = &J->chain[IR_USTORE]; | ||
327 | IRRef ref = *refp; | ||
328 | while (ref > xref) { /* Search for redundant or conflicting stores. */ | ||
329 | IRIns *store = IR(ref); | ||
330 | switch (aa_uref(xr, IR(store->op1))) { | ||
331 | case ALIAS_NO: | ||
332 | break; /* Continue searching. */ | ||
333 | case ALIAS_MAY: /* Store to MAYBE the same location. */ | ||
334 | if (store->op2 != val) /* Conflict if the value is different. */ | ||
335 | goto doemit; | ||
336 | break; /* Otherwise continue searching. */ | ||
337 | case ALIAS_MUST: /* Store to the same location. */ | ||
338 | if (store->op2 == val) /* Same value: drop the new store. */ | ||
339 | return DROPFOLD; | ||
340 | /* Different value: try to eliminate the redundant store. */ | ||
341 | if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */ | ||
342 | IRIns *ir; | ||
343 | /* Check for any intervening guards (includes conflicting loads). */ | ||
344 | for (ir = IR(J->cur.nins-1); ir > store; ir--) | ||
345 | if (irt_isguard(ir->t)) | ||
346 | goto doemit; /* No elimination possible. */ | ||
347 | /* Remove redundant store from chain and replace with NOP. */ | ||
348 | *refp = store->prev; | ||
349 | store->o = IR_NOP; /* Unchained NOP -- does anybody care? */ | ||
350 | store->t.irt = IRT_NIL; | ||
351 | store->op1 = store->op2 = 0; | ||
352 | store->prev = 0; | ||
353 | /* Now emit the new store instead. */ | ||
354 | } | ||
355 | goto doemit; | ||
356 | } | ||
357 | ref = *(refp = &store->prev); | ||
358 | } | ||
359 | doemit: | ||
360 | return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ | ||
361 | } | ||
362 | |||
363 | /* -- FLOAD forwarding and FSTORE elimination ----------------------------- */ | ||
364 | |||
365 | /* Alias analysis for field access. | ||
366 | ** Field loads are cheap and field stores are rare. | ||
367 | ** Simple disambiguation based on field types is good enough. | ||
368 | */ | ||
369 | static AliasRet aa_fref(IRIns *refa, IRIns *refb) | ||
370 | { | ||
371 | if (refa->op2 != refb->op2) | ||
372 | return ALIAS_NO; /* Different fields. */ | ||
373 | if (refa->op1 == refb->op1) | ||
374 | return ALIAS_MUST; /* Same field, same object. */ | ||
375 | else | ||
376 | return ALIAS_MAY; /* Same field, possibly different object. */ | ||
377 | } | ||
378 | |||
379 | /* Only the loads for mutable fields end up here (see FOLD). */ | ||
380 | TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J) | ||
381 | { | ||
382 | IRRef oref = fins->op1; /* Object reference. */ | ||
383 | IRRef fid = fins->op2; /* Field ID. */ | ||
384 | IRRef lim = oref; /* Search limit. */ | ||
385 | IRRef ref; | ||
386 | |||
387 | /* Search for conflicting stores. */ | ||
388 | ref = J->chain[IR_FSTORE]; | ||
389 | while (ref > oref) { | ||
390 | IRIns *store = IR(ref); | ||
391 | switch (aa_fref(fins, IR(store->op1))) { | ||
392 | case ALIAS_NO: break; /* Continue searching. */ | ||
393 | case ALIAS_MAY: lim = ref; goto conflict; /* Limit search for load. */ | ||
394 | case ALIAS_MUST: return store->op2; /* Store forwarding. */ | ||
395 | } | ||
396 | ref = store->prev; | ||
397 | } | ||
398 | |||
399 | /* No conflicting store: const-fold field loads from allocations. */ | ||
400 | if (fid == IRFL_TAB_META) { | ||
401 | IRIns *ir = IR(oref); | ||
402 | if (ir->o == IR_TNEW || ir->o == IR_TDUP) | ||
403 | return lj_ir_knull(J, IRT_TAB); | ||
404 | } | ||
405 | |||
406 | conflict: | ||
407 | /* Try to find a matching load. Below the conflicting store, if any. */ | ||
408 | ref = J->chain[IR_FLOAD]; | ||
409 | while (ref > lim) { | ||
410 | IRIns *load = IR(ref); | ||
411 | if (load->op1 == oref && load->op2 == fid) | ||
412 | return ref; /* Load forwarding. */ | ||
413 | ref = load->prev; | ||
414 | } | ||
415 | return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ | ||
416 | } | ||
417 | |||
418 | /* FSTORE elimination. */ | ||
419 | TRef LJ_FASTCALL lj_opt_dse_fstore(jit_State *J) | ||
420 | { | ||
421 | IRRef fref = fins->op1; /* FREF reference. */ | ||
422 | IRRef val = fins->op2; /* Stored value reference. */ | ||
423 | IRIns *xr = IR(fref); | ||
424 | IRRef1 *refp = &J->chain[IR_FSTORE]; | ||
425 | IRRef ref = *refp; | ||
426 | while (ref > fref) { /* Search for redundant or conflicting stores. */ | ||
427 | IRIns *store = IR(ref); | ||
428 | switch (aa_fref(xr, IR(store->op1))) { | ||
429 | case ALIAS_NO: | ||
430 | break; /* Continue searching. */ | ||
431 | case ALIAS_MAY: | ||
432 | if (store->op2 != val) /* Conflict if the value is different. */ | ||
433 | goto doemit; | ||
434 | break; /* Otherwise continue searching. */ | ||
435 | case ALIAS_MUST: | ||
436 | if (store->op2 == val) /* Same value: drop the new store. */ | ||
437 | return DROPFOLD; | ||
438 | /* Different value: try to eliminate the redundant store. */ | ||
439 | if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */ | ||
440 | IRIns *ir; | ||
441 | /* Check for any intervening guards or conflicting loads. */ | ||
442 | for (ir = IR(J->cur.nins-1); ir > store; ir--) | ||
443 | if (irt_isguard(ir->t) || (ir->o == IR_FLOAD && ir->op2 == xr->op2)) | ||
444 | goto doemit; /* No elimination possible. */ | ||
445 | /* Remove redundant store from chain and replace with NOP. */ | ||
446 | *refp = store->prev; | ||
447 | store->o = IR_NOP; /* Unchained NOP -- does anybody care? */ | ||
448 | store->t.irt = IRT_NIL; | ||
449 | store->op1 = store->op2 = 0; | ||
450 | store->prev = 0; | ||
451 | /* Now emit the new store instead. */ | ||
452 | } | ||
453 | goto doemit; | ||
454 | } | ||
455 | ref = *(refp = &store->prev); | ||
456 | } | ||
457 | doemit: | ||
458 | return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ | ||
459 | } | ||
460 | |||
461 | /* -- TLEN forwarding ----------------------------------------------------- */ | ||
462 | |||
463 | /* This is rather simplistic right now, but better than nothing. */ | ||
464 | TRef LJ_FASTCALL lj_opt_fwd_tlen(jit_State *J) | ||
465 | { | ||
466 | IRRef tab = fins->op1; /* Table reference. */ | ||
467 | IRRef lim = tab; /* Search limit. */ | ||
468 | IRRef ref; | ||
469 | |||
470 | /* Any ASTORE is a conflict and limits the search. */ | ||
471 | if (J->chain[IR_ASTORE] > lim) lim = J->chain[IR_ASTORE]; | ||
472 | |||
473 | /* Search for conflicting HSTORE with numeric key. */ | ||
474 | ref = J->chain[IR_HSTORE]; | ||
475 | while (ref > lim) { | ||
476 | IRIns *store = IR(ref); | ||
477 | IRIns *href = IR(store->op1); | ||
478 | IRIns *key = IR(href->op2); | ||
479 | if (irt_isnum(key->o == IR_KSLOT ? IR(key->op1)->t : key->t)) { | ||
480 | lim = ref; /* Conflicting store found, limits search for TLEN. */ | ||
481 | break; | ||
482 | } | ||
483 | ref = store->prev; | ||
484 | } | ||
485 | |||
486 | /* Try to find a matching load. Below the conflicting store, if any. */ | ||
487 | ref = J->chain[IR_TLEN]; | ||
488 | while (ref > lim) { | ||
489 | IRIns *tlen = IR(ref); | ||
490 | if (tlen->op1 == tab) | ||
491 | return ref; /* Load forwarding. */ | ||
492 | ref = tlen->prev; | ||
493 | } | ||
494 | return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ | ||
495 | } | ||
496 | |||
497 | /* -- ASTORE/HSTORE previous type analysis -------------------------------- */ | ||
498 | |||
499 | /* Check whether the previous value for a table store is non-nil. | ||
500 | ** This can be derived either from a previous store or from a previous | ||
501 | ** load (because all loads from tables perform a type check). | ||
502 | ** | ||
503 | ** The result of the analysis can be used to avoid the metatable check | ||
504 | ** and the guard against HREF returning niltv. Both of these are cheap, | ||
505 | ** so let's not spend too much effort on the analysis. | ||
506 | ** | ||
507 | ** A result of 1 is exact: previous value CANNOT be nil. | ||
508 | ** A result of 0 is inexact: previous value MAY be nil. | ||
509 | */ | ||
510 | int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref) | ||
511 | { | ||
512 | /* First check stores. */ | ||
513 | IRRef ref = J->chain[loadop+IRDELTA_L2S]; | ||
514 | while (ref > xref) { | ||
515 | IRIns *store = IR(ref); | ||
516 | if (store->op1 == xref) { /* Same xREF. */ | ||
517 | /* A nil store MAY alias, but a non-nil store MUST alias. */ | ||
518 | return !irt_isnil(store->t); | ||
519 | } else if (irt_isnil(store->t)) { /* Must check any nil store. */ | ||
520 | IRRef skref = IR(store->op1)->op2; | ||
521 | IRRef xkref = IR(xref)->op2; | ||
522 | /* Same key type MAY alias. */ | ||
523 | if (irt_sametype(IR(skref)->t, IR(xkref)->t)) { | ||
524 | if (skref == xkref || !irref_isk(skref) || !irref_isk(xkref)) | ||
525 | return 0; /* A nil store with same const key or var key MAY alias. */ | ||
526 | /* Different const keys CANNOT alias. */ | ||
527 | } /* Different key types CANNOT alias. */ | ||
528 | } /* Other non-nil stores MAY alias. */ | ||
529 | ref = store->prev; | ||
530 | } | ||
531 | |||
532 | /* Check loads since nothing could be derived from stores. */ | ||
533 | ref = J->chain[loadop]; | ||
534 | while (ref > xref) { | ||
535 | IRIns *load = IR(ref); | ||
536 | if (load->op1 == xref) { /* Same xREF. */ | ||
537 | /* A nil load MAY alias, but a non-nil load MUST alias. */ | ||
538 | return !irt_isnil(load->t); | ||
539 | } /* Other non-nil loads MAY alias. */ | ||
540 | ref = load->prev; | ||
541 | } | ||
542 | return 0; /* Nothing derived at all, previous value MAY be nil. */ | ||
543 | } | ||
544 | |||
545 | /* ------------------------------------------------------------------------ */ | ||
546 | |||
547 | #undef IR | ||
548 | #undef fins | ||
549 | |||
550 | #endif | ||
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c new file mode 100644 index 00000000..60a6afb8 --- /dev/null +++ b/src/lj_opt_narrow.c | |||
@@ -0,0 +1,430 @@ | |||
1 | /* | ||
2 | ** NARROW: Narrowing of numbers to integers (double to int32_t). | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #define lj_opt_narrow_c | ||
7 | #define LUA_CORE | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | |||
11 | #if LJ_HASJIT | ||
12 | |||
13 | #include "lj_str.h" | ||
14 | #include "lj_bc.h" | ||
15 | #include "lj_ir.h" | ||
16 | #include "lj_jit.h" | ||
17 | #include "lj_iropt.h" | ||
18 | #include "lj_trace.h" | ||
19 | |||
20 | /* Rationale for narrowing optimizations: | ||
21 | ** | ||
22 | ** Lua has only a single number type and this is a FP double by default. | ||
23 | ** Narrowing doubles to integers does not pay off for the interpreter on a | ||
24 | ** current-generation x86/x64 machine. Most FP operations need the same | ||
25 | ** amount of execution resources as their integer counterparts, except | ||
26 | ** with slightly longer latencies. Longer latencies are a non-issue for | ||
27 | ** the interpreter, since they are usually hidden by other overhead. | ||
28 | ** | ||
29 | ** The total CPU execution bandwidth is the sum of the bandwidth of the FP | ||
30 | ** and the integer units, because they execute in parallel. The FP units | ||
31 | ** have an equal or higher bandwidth than the integer units. Not using | ||
32 | ** them means losing execution bandwidth. Moving work away from them to | ||
33 | ** the already quite busy integer units is a losing proposition. | ||
34 | ** | ||
35 | ** The situation for JIT-compiled code is a bit different: the higher code | ||
36 | ** density makes the extra latencies much more visible. Tight loops expose | ||
37 | ** the latencies for updating the induction variables. Array indexing | ||
38 | ** requires narrowing conversions with high latencies and additional | ||
39 | ** guards (to check that the index is really an integer). And many common | ||
40 | ** optimizations only work on integers. | ||
41 | ** | ||
42 | ** One solution would be speculative, eager narrowing of all number loads. | ||
43 | ** This causes many problems, like losing -0 or the need to resolve type | ||
44 | ** mismatches between traces. It also effectively forces the integer type | ||
45 | ** to have overflow-checking semantics. This impedes many basic | ||
46 | ** optimizations and requires adding overflow checks to all integer | ||
47 | ** arithmetic operations (whereas FP arithmetics can do without). | ||
48 | ** | ||
49 | ** Always replacing an FP op with an integer op plus an overflow check is | ||
50 | ** counter-productive on a current-generation super-scalar CPU. Although | ||
51 | ** the overflow check branches are highly predictable, they will clog the | ||
52 | ** execution port for the branch unit and tie up reorder buffers. This is | ||
53 | ** turning a pure data-flow dependency into a different data-flow | ||
54 | ** dependency (with slightly lower latency) *plus* a control dependency. | ||
55 | ** In general, you don't want to do this since latencies due to data-flow | ||
56 | ** dependencies can be well hidden by out-of-order execution. | ||
57 | ** | ||
58 | ** A better solution is to keep all numbers as FP values and only narrow | ||
59 | ** when it's beneficial to do so. LuaJIT uses predictive narrowing for | ||
60 | ** induction variables and demand-driven narrowing for index expressions | ||
61 | ** and bit operations. Additionally it can eliminate or hoists most of the | ||
62 | ** resulting overflow checks. Regular arithmetic computations are never | ||
63 | ** narrowed to integers. | ||
64 | ** | ||
65 | ** The integer type in the IR has convenient wrap-around semantics and | ||
66 | ** ignores overflow. Extra operations have been added for | ||
67 | ** overflow-checking arithmetic (ADDOV/SUBOV) instead of an extra type. | ||
68 | ** Apart from reducing overall complexity of the compiler, this also | ||
69 | ** nicely solves the problem where you want to apply algebraic | ||
70 | ** simplifications to ADD, but not to ADDOV. And the assembler can use lea | ||
71 | ** instead of an add for integer ADD, but not for ADDOV (lea does not | ||
72 | ** affect the flags, but it helps to avoid register moves). | ||
73 | ** | ||
74 | ** Note that all of the above has to be reconsidered if LuaJIT is to be | ||
75 | ** ported to architectures with slow FP operations or with no hardware FPU | ||
76 | ** at all. In the latter case an integer-only port may be the best overall | ||
77 | ** solution (if this still meets user demands). | ||
78 | */ | ||
79 | |||
80 | /* Some local macros to save typing. Undef'd at the end. */ | ||
81 | #define IR(ref) (&J->cur.ir[(ref)]) | ||
82 | #define fins (&J->fold.ins) | ||
83 | |||
84 | /* Pass IR on to next optimization in chain (FOLD). */ | ||
85 | #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) | ||
86 | |||
87 | #define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J)) | ||
88 | |||
89 | /* -- Elimination of narrowing type conversions --------------------------- */ | ||
90 | |||
91 | /* Narrowing of index expressions and bit operations is demand-driven. The | ||
92 | ** trace recorder emits a narrowing type conversion (TOINT or TOBIT) in | ||
93 | ** all of these cases (e.g. array indexing or string indexing). FOLD | ||
94 | ** already takes care of eliminating simple redundant conversions like | ||
95 | ** TOINT(TONUM(x)) ==> x. | ||
96 | ** | ||
97 | ** But the surrounding code is FP-heavy and all arithmetic operations are | ||
98 | ** performed on FP numbers. Consider a common example such as 'x=t[i+1]', | ||
99 | ** with 'i' already an integer (due to induction variable narrowing). The | ||
100 | ** index expression would be recorded as TOINT(ADD(TONUM(i), 1)), which is | ||
101 | ** clearly suboptimal. | ||
102 | ** | ||
103 | ** One can do better by recursively backpropagating the narrowing type | ||
104 | ** conversion across FP arithmetic operations. This turns FP ops into | ||
105 | ** their corresponding integer counterparts. Depending on the semantics of | ||
106 | ** the conversion they also need to check for overflow. Currently only ADD | ||
107 | ** and SUB are supported. | ||
108 | ** | ||
109 | ** The above example can be rewritten as ADDOV(TOINT(TONUM(i)), 1) and | ||
110 | ** then into ADDOV(i, 1) after folding of the conversions. The original FP | ||
111 | ** ops remain in the IR and are eliminated by DCE since all references to | ||
112 | ** them are gone. | ||
113 | ** | ||
114 | ** Special care has to be taken to avoid narrowing across an operation | ||
115 | ** which is potentially operating on non-integral operands. One obvious | ||
116 | ** case is when an expression contains a non-integral constant, but ends | ||
117 | ** up as an integer index at runtime (like t[x+1.5] with x=0.5). | ||
118 | ** | ||
119 | ** Operations with two non-constant operands illustrate a similar problem | ||
120 | ** (like t[a+b] with a=1.5 and b=2.5). Backpropagation has to stop there, | ||
121 | ** unless it can be proven that either operand is integral (e.g. by CSEing | ||
122 | ** a previous conversion). As a not-so-obvious corollary this logic also | ||
123 | ** applies for a whole expression tree (e.g. t[(a+1)+(b+1)]). | ||
124 | ** | ||
125 | ** Correctness of the transformation is guaranteed by avoiding to expand | ||
126 | ** the tree by adding more conversions than the one we would need to emit | ||
127 | ** if not backpropagating. TOBIT employs a more optimistic rule, because | ||
128 | ** the conversion has special semantics, designed to make the life of the | ||
129 | ** compiler writer easier. ;-) | ||
130 | ** | ||
131 | ** Using on-the-fly backpropagation of an expression tree doesn't work | ||
132 | ** because it's unknown whether the transform is correct until the end. | ||
133 | ** This either requires IR rollback and cache invalidation for every | ||
134 | ** subtree or a two-pass algorithm. The former didn't work out too well, | ||
135 | ** so the code now combines a recursive collector with a stack-based | ||
136 | ** emitter. | ||
137 | ** | ||
138 | ** [A recursive backpropagation algorithm with backtracking, employing | ||
139 | ** skip-list lookup and round-robin caching, emitting stack operations | ||
140 | ** on-the-fly for a stack-based interpreter -- and all of that in a meager | ||
141 | ** kilobyte? Yep, compilers are a great treasure chest. Throw away your | ||
142 | ** textbooks and read the codebase of a compiler today!] | ||
143 | ** | ||
144 | ** There's another optimization opportunity for array indexing: it's | ||
145 | ** always accompanied by an array bounds-check. The outermost overflow | ||
146 | ** check may be delegated to the ABC operation. This works because ABC is | ||
147 | ** an unsigned comparison and wrap-around due to overflow creates negative | ||
148 | ** numbers. | ||
149 | ** | ||
150 | ** But this optimization is only valid for constants that cannot overflow | ||
151 | ** an int32_t into the range of valid array indexes [0..2^27+1). A check | ||
152 | ** for +-2^30 is safe since -2^31 - 2^30 wraps to 2^30 and 2^31-1 + 2^30 | ||
153 | ** wraps to -2^30-1. | ||
154 | ** | ||
155 | ** It's also good enough in practice, since e.g. t[i+1] or t[i-10] are | ||
156 | ** quite common. So the above example finally ends up as ADD(i, 1)! | ||
157 | ** | ||
158 | ** Later on, the assembler is able to fuse the whole array reference and | ||
159 | ** the ADD into the memory operands of loads and other instructions. This | ||
160 | ** is why LuaJIT is able to generate very pretty (and fast) machine code | ||
161 | ** for array indexing. And that, my dear, concludes another story about | ||
162 | ** one of the hidden secrets of LuaJIT ... | ||
163 | */ | ||
164 | |||
165 | /* Maximum backpropagation depth and maximum stack size. */ | ||
166 | #define NARROW_MAX_BACKPROP 100 | ||
167 | #define NARROW_MAX_STACK 256 | ||
168 | |||
169 | /* Context used for narrowing of type conversions. */ | ||
170 | typedef struct NarrowConv { | ||
171 | jit_State *J; /* JIT compiler state. */ | ||
172 | IRRef2 *sp; /* Current stack pointer. */ | ||
173 | IRRef2 *maxsp; /* Maximum stack pointer minus redzone. */ | ||
174 | int lim; /* Limit on the number of emitted conversions. */ | ||
175 | IRRef mode; /* Conversion mode (IRTOINT_*). */ | ||
176 | IRRef2 stack[NARROW_MAX_STACK]; /* Stack holding the stack-machine code. */ | ||
177 | } NarrowConv; | ||
178 | |||
179 | /* The stack machine has a 32 bit instruction format: [IROpT | IRRef1] | ||
180 | ** The lower 16 bits hold a reference (or 0). The upper 16 bits hold | ||
181 | ** the IR opcode + type or one of the following special opcodes: | ||
182 | */ | ||
183 | enum { | ||
184 | NARROW_REF, /* Push ref. */ | ||
185 | NARROW_CONV, /* Push conversion of ref. */ | ||
186 | NARROW_INT /* Push KINT ref. The next code holds an int32_t. */ | ||
187 | }; | ||
188 | |||
189 | /* Lookup a reference in the backpropagation cache. */ | ||
190 | static IRRef narrow_bpc_get(jit_State *J, IRRef1 key, IRRef mode) | ||
191 | { | ||
192 | ptrdiff_t i; | ||
193 | for (i = 0; i < BPROP_SLOTS; i++) { | ||
194 | BPropEntry *bp = &J->bpropcache[i]; | ||
195 | if (bp->key == key && bp->mode <= mode) /* Stronger checks are ok, too. */ | ||
196 | return bp->val; | ||
197 | } | ||
198 | return 0; | ||
199 | } | ||
200 | |||
201 | /* Add an entry to the backpropagation cache. */ | ||
202 | static void narrow_bpc_set(jit_State *J, IRRef1 key, IRRef1 val, IRRef mode) | ||
203 | { | ||
204 | uint32_t slot = J->bpropslot; | ||
205 | BPropEntry *bp = &J->bpropcache[slot]; | ||
206 | J->bpropslot = (slot + 1) & (BPROP_SLOTS-1); | ||
207 | bp->key = key; | ||
208 | bp->val = val; | ||
209 | bp->mode = mode; | ||
210 | } | ||
211 | |||
212 | /* Backpropagate narrowing conversion. Return number of needed conversions. */ | ||
213 | static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth) | ||
214 | { | ||
215 | jit_State *J = nc->J; | ||
216 | IRIns *ir = IR(ref); | ||
217 | IRRef cref; | ||
218 | |||
219 | /* Check the easy cases first. */ | ||
220 | if (ir->o == IR_TONUM) { /* Undo inverse conversion. */ | ||
221 | *nc->sp++ = IRREF2(ir->op1, NARROW_REF); | ||
222 | return 0; | ||
223 | } else if (ir->o == IR_KNUM) { /* Narrow FP constant. */ | ||
224 | lua_Number n = ir_knum(ir)->n; | ||
225 | if (nc->mode == IRTOINT_TOBIT) { /* Allows a wider range of constants. */ | ||
226 | int64_t k64 = (int64_t)n; | ||
227 | if (n == cast_num(k64)) { /* Only if constant doesn't lose precision. */ | ||
228 | *nc->sp++ = IRREF2(0, NARROW_INT); | ||
229 | *nc->sp++ = (IRRef2)k64; /* But always truncate to 32 bits. */ | ||
230 | return 0; | ||
231 | } | ||
232 | } else { | ||
233 | int32_t k = lj_num2int(n); | ||
234 | if (n == cast_num(k)) { /* Only if constant is really an integer. */ | ||
235 | *nc->sp++ = IRREF2(0, NARROW_INT); | ||
236 | *nc->sp++ = (IRRef2)k; | ||
237 | return 0; | ||
238 | } | ||
239 | } | ||
240 | return 10; /* Never narrow other FP constants (this is rare). */ | ||
241 | } | ||
242 | |||
243 | /* Try to CSE the conversion. Stronger checks are ok, too. */ | ||
244 | for (cref = J->chain[fins->o]; cref > ref; cref = IR(cref)->prev) | ||
245 | if (IR(cref)->op1 == ref && | ||
246 | irt_isguard(IR(cref)->t) >= irt_isguard(fins->t)) { | ||
247 | *nc->sp++ = IRREF2(cref, NARROW_REF); | ||
248 | return 0; /* Already there, no additional conversion needed. */ | ||
249 | } | ||
250 | |||
251 | /* Backpropagate across ADD/SUB. */ | ||
252 | if (ir->o == IR_ADD || ir->o == IR_SUB) { | ||
253 | /* Try cache lookup first. */ | ||
254 | IRRef bpref, mode = nc->mode; | ||
255 | if (mode == IRTOINT_INDEX && depth > 0) | ||
256 | mode = IRTOINT_CHECK; /* Inner conversions need a stronger check. */ | ||
257 | bpref = narrow_bpc_get(nc->J, (IRRef1)ref, mode); | ||
258 | if (bpref) { | ||
259 | *nc->sp++ = IRREF2(bpref, NARROW_REF); | ||
260 | return 0; | ||
261 | } | ||
262 | if (++depth < NARROW_MAX_BACKPROP && nc->sp < nc->maxsp) { | ||
263 | IRRef2 *savesp = nc->sp; | ||
264 | int count = narrow_conv_backprop(nc, ir->op1, depth); | ||
265 | count += narrow_conv_backprop(nc, ir->op2, depth); | ||
266 | if (count <= nc->lim) { /* Limit total number of conversions. */ | ||
267 | *nc->sp++ = IRREF2(ref, IRTI(ir->o)); | ||
268 | return count; | ||
269 | } | ||
270 | nc->sp = savesp; /* Too many conversions, need to backtrack. */ | ||
271 | } | ||
272 | } | ||
273 | |||
274 | /* Otherwise add a conversion. */ | ||
275 | *nc->sp++ = IRREF2(ref, NARROW_CONV); | ||
276 | return 1; | ||
277 | } | ||
278 | |||
279 | /* Emit the conversions collected during backpropagation. */ | ||
280 | static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc) | ||
281 | { | ||
282 | /* The fins fields must be saved now -- emitir() overwrites them. */ | ||
283 | IROpT guardot = irt_isguard(fins->t) ? IRTG(IR_ADDOV-IR_ADD, 0) : 0; | ||
284 | IROpT convot = fins->ot; | ||
285 | IRRef1 convop2 = fins->op2; | ||
286 | IRRef2 *next = nc->stack; /* List of instructions from backpropagation. */ | ||
287 | IRRef2 *last = nc->sp; | ||
288 | IRRef2 *sp = nc->stack; /* Recycle the stack to store operands. */ | ||
289 | while (next < last) { /* Simple stack machine to process the ins. list. */ | ||
290 | IRRef2 ref = *next++; | ||
291 | IROpT op = ref >> 16; | ||
292 | if (op == NARROW_REF) { | ||
293 | *sp++ = ref; | ||
294 | } else if (op == NARROW_CONV) { | ||
295 | *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */ | ||
296 | } else if (op == NARROW_INT) { | ||
297 | lua_assert(next < last); | ||
298 | *sp++ = lj_ir_kint(J, *next++); | ||
299 | } else { /* Regular IROpT. Pops two operands and pushes one result. */ | ||
300 | IRRef mode = nc->mode; | ||
301 | lua_assert(sp >= nc->stack+2); | ||
302 | sp--; | ||
303 | /* Omit some overflow checks for array indexing. See comments above. */ | ||
304 | if (mode == IRTOINT_INDEX) { | ||
305 | if (next == last && irref_isk((IRRef1)sp[0]) && | ||
306 | (uint32_t)IR((IRRef1)sp[0])->i + 0x40000000 < 0x80000000) | ||
307 | guardot = 0; | ||
308 | else | ||
309 | mode = IRTOINT_CHECK; /* Otherwise cache a stronger check. */ | ||
310 | } | ||
311 | sp[-1] = emitir(op+guardot, sp[-1], sp[0]); | ||
312 | narrow_bpc_set(J, (IRRef1)ref, (IRRef1)sp[-1], mode); /* Add to cache. */ | ||
313 | } | ||
314 | } | ||
315 | lua_assert(sp == nc->stack+1); | ||
316 | return nc->stack[0]; | ||
317 | } | ||
318 | |||
319 | /* Narrow a type conversion of an arithmetic operation. */ | ||
320 | TRef LJ_FASTCALL lj_opt_narrow_convert(jit_State *J) | ||
321 | { | ||
322 | if ((J->flags & JIT_F_OPT_NARROW)) { | ||
323 | NarrowConv nc; | ||
324 | nc.J = J; | ||
325 | nc.sp = nc.stack; | ||
326 | nc.maxsp = &nc.stack[NARROW_MAX_STACK-4]; | ||
327 | if (fins->o == IR_TOBIT) { | ||
328 | nc.mode = IRTOINT_TOBIT; /* Used only in the backpropagation cache. */ | ||
329 | nc.lim = 2; /* TOBIT can use a more optimistic rule. */ | ||
330 | } else { | ||
331 | nc.mode = fins->op2; | ||
332 | nc.lim = 1; | ||
333 | } | ||
334 | if (narrow_conv_backprop(&nc, fins->op1, 0) <= nc.lim) | ||
335 | return narrow_conv_emit(J, &nc); | ||
336 | } | ||
337 | return NEXTFOLD; | ||
338 | } | ||
339 | |||
340 | /* -- Narrowing of arithmetic operators ----------------------------------- */ | ||
341 | |||
342 | /* Check whether a number fits into an int32_t (-0 is ok, too). */ | ||
343 | static int numisint(lua_Number n) | ||
344 | { | ||
345 | return (n == cast_num(lj_num2int(n))); | ||
346 | } | ||
347 | |||
348 | /* Narrowing of modulo operator. */ | ||
349 | TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc) | ||
350 | { | ||
351 | TRef tmp; | ||
352 | if ((J->flags & JIT_F_OPT_NARROW) && | ||
353 | tref_isk(rc) && tref_isint(rc)) { /* Optimize x % k. */ | ||
354 | int32_t k = IR(tref_ref(rc))->i; | ||
355 | if (k > 0 && (k & (k-1)) == 0) { /* i % 2^k ==> band(i, 2^k-1) */ | ||
356 | if (tref_isint(rb)) | ||
357 | return emitir(IRTI(IR_BAND), rb, lj_ir_kint(J, k-1)); | ||
358 | } | ||
359 | } | ||
360 | /* b % c ==> b - floor(b/c)*c */ | ||
361 | rb = lj_ir_tonum(J, rb); | ||
362 | rc = lj_ir_tonum(J, rc); | ||
363 | tmp = emitir(IRTN(IR_DIV), rb, rc); | ||
364 | tmp = emitir(IRTN(IR_FPMATH), tmp, IRFPM_FLOOR); | ||
365 | tmp = emitir(IRTN(IR_MUL), tmp, rc); | ||
366 | return emitir(IRTN(IR_SUB), rb, tmp); | ||
367 | } | ||
368 | |||
369 | /* Narrowing of power operator or math.pow. */ | ||
370 | TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc) | ||
371 | { | ||
372 | lua_Number n; | ||
373 | if (tvisstr(vc) && !lj_str_numconv(strVdata(vc), vc)) | ||
374 | lj_trace_err(J, LJ_TRERR_BADTYPE); | ||
375 | n = numV(vc); | ||
376 | /* Limit narrowing for pow to small exponents (or for two constants). */ | ||
377 | if ((tref_isint(rc) && tref_isk(rc) && tref_isk(rb)) || | ||
378 | ((J->flags & JIT_F_OPT_NARROW) && | ||
379 | (numisint(n) && n >= -65536.0 && n <= 65536.0))) { | ||
380 | TRef tmp; | ||
381 | if (!tref_isinteger(rc)) { | ||
382 | if (tref_isstr(rc)) | ||
383 | rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0); | ||
384 | rc = emitir(IRTGI(IR_TOINT), rc, IRTOINT_CHECK); /* Guarded TOINT! */ | ||
385 | } | ||
386 | if (!tref_isk(rc)) { /* Range guard: -65536 <= i <= 65536 */ | ||
387 | tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536-2147483647-1)); | ||
388 | emitir(IRTGI(IR_LE), tmp, lj_ir_kint(J, 2*65536-2147483647-1)); | ||
389 | } | ||
390 | return emitir(IRTN(IR_POWI), rb, rc); | ||
391 | } | ||
392 | /* FOLD covers most cases, but some are easier to do here. */ | ||
393 | if (tref_isk(rb) && tvispone(ir_knum(IR(tref_ref(rb))))) | ||
394 | return rb; /* 1 ^ x ==> 1 */ | ||
395 | rc = lj_ir_tonum(J, rc); | ||
396 | if (tref_isk(rc) && ir_knum(IR(tref_ref(rc)))->n == 0.5) | ||
397 | return emitir(IRTN(IR_FPMATH), rb, IRFPM_SQRT); /* x ^ 0.5 ==> sqrt(x) */ | ||
398 | /* Split up b^c into exp2(c*log2(b)). Assembler may rejoin later. */ | ||
399 | rb = emitir(IRTN(IR_FPMATH), rb, IRFPM_LOG2); | ||
400 | rc = emitir(IRTN(IR_MUL), rb, rc); | ||
401 | return emitir(IRTN(IR_FPMATH), rc, IRFPM_EXP2); | ||
402 | } | ||
403 | |||
404 | /* -- Predictive narrowing of induction variables ------------------------- */ | ||
405 | |||
406 | /* Narrow the FORL index type by looking at the runtime values. */ | ||
407 | IRType lj_opt_narrow_forl(cTValue *forbase) | ||
408 | { | ||
409 | lua_assert(tvisnum(&forbase[FORL_IDX]) && | ||
410 | tvisnum(&forbase[FORL_STOP]) && | ||
411 | tvisnum(&forbase[FORL_STEP])); | ||
412 | /* Narrow only if the runtime values of start/stop/step are all integers. */ | ||
413 | if (numisint(numV(&forbase[FORL_IDX])) && | ||
414 | numisint(numV(&forbase[FORL_STOP])) && | ||
415 | numisint(numV(&forbase[FORL_STEP]))) { | ||
416 | /* And if the loop index can't possibly overflow. */ | ||
417 | lua_Number step = numV(&forbase[FORL_STEP]); | ||
418 | lua_Number sum = numV(&forbase[FORL_STOP]) + step; | ||
419 | if (0 <= step ? sum <= 2147483647.0 : sum >= -2147483648.0) | ||
420 | return IRT_INT; | ||
421 | } | ||
422 | return IRT_NUM; | ||
423 | } | ||
424 | |||
425 | #undef IR | ||
426 | #undef fins | ||
427 | #undef emitir | ||
428 | #undef emitir_raw | ||
429 | |||
430 | #endif | ||
diff --git a/src/lj_parse.c b/src/lj_parse.c new file mode 100644 index 00000000..663525ab --- /dev/null +++ b/src/lj_parse.c | |||
@@ -0,0 +1,2198 @@ | |||
1 | /* | ||
2 | ** Lua parser (source code -> bytecode). | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** | ||
5 | ** Major portions taken verbatim or adapted from the Lua interpreter. | ||
6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h | ||
7 | */ | ||
8 | |||
9 | #define lj_parse_c | ||
10 | #define LUA_CORE | ||
11 | |||
12 | #include "lj_obj.h" | ||
13 | #include "lj_gc.h" | ||
14 | #include "lj_err.h" | ||
15 | #include "lj_str.h" | ||
16 | #include "lj_tab.h" | ||
17 | #include "lj_func.h" | ||
18 | #include "lj_state.h" | ||
19 | #include "lj_bc.h" | ||
20 | #include "lj_lex.h" | ||
21 | #include "lj_parse.h" | ||
22 | #include "lj_vm.h" | ||
23 | #include "lj_vmevent.h" | ||
24 | |||
25 | /* -- Parser structures and definitions ----------------------------------- */ | ||
26 | |||
27 | /* Expression kinds. */ | ||
28 | typedef enum { | ||
29 | /* Constant expressions must be first and in this order: */ | ||
30 | VKNIL, | ||
31 | VKFALSE, | ||
32 | VKTRUE, | ||
33 | VKSTR, /* sval = string value */ | ||
34 | VKNUM, /* nval = numerical value */ | ||
35 | VKLAST = VKNUM, | ||
36 | /* Non-constant expressions follow: */ | ||
37 | VLOCAL, /* info = local register */ | ||
38 | VUPVAL, /* info = upvalue index */ | ||
39 | VGLOBAL, /* sval = string value */ | ||
40 | VINDEXED, /* info = table register, aux = index reg/byte/string const */ | ||
41 | VJMP, /* info = instruction PC */ | ||
42 | VRELOCABLE, /* info = instruction PC */ | ||
43 | VNONRELOC, /* info = result register */ | ||
44 | VCALL, /* info = instruction PC, aux = base */ | ||
45 | VVOID | ||
46 | } ExpKind; | ||
47 | |||
48 | /* Expression descriptor. */ | ||
49 | typedef struct ExpDesc { | ||
50 | union { | ||
51 | struct { uint32_t info, aux; } s; | ||
52 | TValue nval; | ||
53 | GCstr *sval; | ||
54 | } u; | ||
55 | ExpKind k; | ||
56 | BCPos t; /* true condition exit list */ | ||
57 | BCPos f; /* false condition exit list */ | ||
58 | } ExpDesc; | ||
59 | |||
60 | /* Tests for expression types */ | ||
61 | #define isK(e) ((uint32_t)((e)->k) <= VKLAST) | ||
62 | #define isnumK(e) ((e)->k == VKNUM) | ||
63 | #define isstrK(e) ((e)->k == VKSTR) | ||
64 | #define expnumV(e) check_exp(isnumK((e)), numV(&(e)->u.nval)) | ||
65 | |||
66 | #define hasjumps(e) ((e)->t != (e)->f) | ||
67 | #define isKexp(e) (isK(e) && !hasjumps(e)) | ||
68 | #define isnumKexp(e) (isnumK(e) && !hasjumps(e)) | ||
69 | |||
70 | #define priKk(k) check_exp((k) <= VKTRUE, (k) - VKNIL) | ||
71 | #define priK(e) priKk((e)->k) | ||
72 | |||
73 | /* Per-function linked list of blocks. */ | ||
74 | typedef struct FuncBlock { | ||
75 | struct FuncBlock *previous; /* chain */ | ||
76 | BCPos breaklist; /* list of jumps out of this loop */ | ||
77 | uint8_t nactvar; /* # active locals outside the breakable structure */ | ||
78 | uint8_t upval; /* true if some variable in the block is an upvalue */ | ||
79 | uint8_t isbreakable; /* true if `block' is a loop */ | ||
80 | } FuncBlock; | ||
81 | |||
82 | typedef struct UpValDesc { | ||
83 | uint8_t k; | ||
84 | uint8_t info; | ||
85 | } UpValDesc; | ||
86 | |||
87 | /* Per-function state. */ | ||
88 | typedef struct FuncState { | ||
89 | GCproto *pt; /* current function header */ | ||
90 | GCtab *kt; /* table to find (and reuse) elements in `k' */ | ||
91 | struct FuncState *prev; /* enclosing function */ | ||
92 | struct LexState *ls; /* lexical state */ | ||
93 | struct lua_State *L; /* copy of the Lua state */ | ||
94 | struct FuncBlock *bl; /* chain of current blocks */ | ||
95 | BCPos pc; /* next bytecode position */ | ||
96 | BCPos lasttarget; /* PC of last jump target */ | ||
97 | BCPos jpc; /* list of pending jumps to PC */ | ||
98 | BCReg freereg; /* first free register */ | ||
99 | BCReg nkn, nkgc; /* number of lua_Number/GCobj constants */ | ||
100 | uint16_t nlocvars; /* number of elements in `locvars' */ | ||
101 | uint8_t nactvar; /* number of active local variables */ | ||
102 | uint8_t nuv; /* number of upvalues */ | ||
103 | UpValDesc upvalues[LJ_MAX_UPVAL]; /* upvalues */ | ||
104 | uint16_t actvar[LJ_MAX_LOCVAR]; /* declared-variable stack */ | ||
105 | } FuncState; | ||
106 | |||
107 | /* Binary and unary operators. ORDER OPR */ | ||
108 | typedef enum BinOpr { | ||
109 | OPR_ADD, OPR_SUB, OPR_MUL, OPR_DIV, OPR_MOD, OPR_POW, /* ORDER ARITH */ | ||
110 | OPR_CONCAT, | ||
111 | OPR_NE, OPR_EQ, | ||
112 | OPR_LT, OPR_GE, OPR_LE, OPR_GT, | ||
113 | OPR_AND, OPR_OR, | ||
114 | OPR_NOBINOPR | ||
115 | } BinOpr; | ||
116 | |||
117 | LJ_STATIC_ASSERT((int)BC_ISGE-(int)BC_ISLT == (int)OPR_GE-(int)OPR_LT); | ||
118 | LJ_STATIC_ASSERT((int)BC_ISLE-(int)BC_ISLT == (int)OPR_LE-(int)OPR_LT); | ||
119 | LJ_STATIC_ASSERT((int)BC_ISGT-(int)BC_ISLT == (int)OPR_GT-(int)OPR_LT); | ||
120 | LJ_STATIC_ASSERT((int)BC_SUBVV-(int)BC_ADDVV == (int)OPR_SUB-(int)OPR_ADD); | ||
121 | LJ_STATIC_ASSERT((int)BC_MULVV-(int)BC_ADDVV == (int)OPR_MUL-(int)OPR_ADD); | ||
122 | LJ_STATIC_ASSERT((int)BC_DIVVV-(int)BC_ADDVV == (int)OPR_DIV-(int)OPR_ADD); | ||
123 | LJ_STATIC_ASSERT((int)BC_MODVV-(int)BC_ADDVV == (int)OPR_MOD-(int)OPR_ADD); | ||
124 | |||
125 | typedef enum UnOpr { OPR_MINUS, OPR_NOT, OPR_LEN, OPR_NOUNOPR } UnOpr; | ||
126 | |||
127 | /* -- Error handling ------------------------------------------------------ */ | ||
128 | |||
129 | LJ_NORET LJ_NOINLINE static void err_syntax(LexState *ls, ErrMsg em) | ||
130 | { | ||
131 | lj_lex_error(ls, ls->token, em); | ||
132 | } | ||
133 | |||
134 | LJ_NORET LJ_NOINLINE static void err_token(LexState *ls, LexToken token) | ||
135 | { | ||
136 | lj_lex_error(ls, ls->token, LJ_ERR_XTOKEN, lj_lex_token2str(ls, token)); | ||
137 | } | ||
138 | |||
139 | LJ_NORET static void err_limit(FuncState *fs, uint32_t limit, const char *what) | ||
140 | { | ||
141 | if (fs->pt->linedefined == 0) | ||
142 | lj_lex_error(fs->ls, 0, LJ_ERR_XLIMM, limit, what); | ||
143 | else | ||
144 | lj_lex_error(fs->ls, 0, LJ_ERR_XLIMF, fs->pt->linedefined, limit, what); | ||
145 | } | ||
146 | |||
147 | #define checklimit(fs, v, l, m) if ((v) >= (l)) err_limit(fs, l, m) | ||
148 | #define checklimitgt(fs, v, l, m) if ((v) > (l)) err_limit(fs, l, m) | ||
149 | #define checkcond(ls, c, em) { if (!(c)) err_syntax(ls, em); } | ||
150 | |||
151 | /* -- Code emitter: branches ---------------------------------------------- */ | ||
152 | |||
153 | static BCPos getjump(FuncState *fs, BCPos pc) | ||
154 | { | ||
155 | ptrdiff_t delta = bc_j(fs->pt->bc[pc]); | ||
156 | if ((BCPos)delta == NO_JMP) | ||
157 | return NO_JMP; | ||
158 | else | ||
159 | return (BCPos)(((ptrdiff_t)pc+1)+delta); | ||
160 | } | ||
161 | |||
162 | static int need_value(FuncState *fs, BCPos list) | ||
163 | { | ||
164 | for (; list != NO_JMP; list = getjump(fs, list)) { | ||
165 | BCOp op = bc_op(fs->pt->bc[list >= 1 ? list-1 : list]); | ||
166 | if (!(op == BC_ISTC || op == BC_ISFC)) return 1; | ||
167 | } | ||
168 | return 0; /* Not found. */ | ||
169 | } | ||
170 | |||
171 | static int patchtestreg(FuncState *fs, BCPos pc, BCReg reg) | ||
172 | { | ||
173 | BCIns *i = &fs->pt->bc[pc >= 1 ? pc-1 : pc]; | ||
174 | BCOp op = bc_op(*i); | ||
175 | if (!(op == BC_ISTC || op == BC_ISFC)) | ||
176 | return 0; /* cannot patch other instructions */ | ||
177 | if (reg != NO_REG && reg != bc_d(*i)) { | ||
178 | setbc_a(i, reg); | ||
179 | } else { /* no register to put value or register already has the value */ | ||
180 | setbc_op(i, op+(BC_IST-BC_ISTC)); | ||
181 | setbc_a(i, 0); | ||
182 | } | ||
183 | return 1; | ||
184 | } | ||
185 | |||
186 | static void removevalues(FuncState *fs, BCPos list) | ||
187 | { | ||
188 | for (; list != NO_JMP; list = getjump(fs, list)) | ||
189 | patchtestreg(fs, list, NO_REG); | ||
190 | } | ||
191 | |||
192 | static void fixjump(FuncState *fs, BCPos pc, BCPos dest) | ||
193 | { | ||
194 | BCIns *jmp = &fs->pt->bc[pc]; | ||
195 | BCPos offset = dest-(pc+1)+BCBIAS_J; | ||
196 | lua_assert(dest != NO_JMP); | ||
197 | if (offset > BCMAX_D) | ||
198 | err_syntax(fs->ls, LJ_ERR_XJUMP); | ||
199 | setbc_d(jmp, offset); | ||
200 | } | ||
201 | |||
202 | static void concatjumps(FuncState *fs, BCPos *l1, BCPos l2) | ||
203 | { | ||
204 | if (l2 == NO_JMP) return; | ||
205 | else if (*l1 == NO_JMP) { | ||
206 | *l1 = l2; | ||
207 | } else { | ||
208 | BCPos list = *l1; | ||
209 | BCPos next; | ||
210 | while ((next = getjump(fs, list)) != NO_JMP) /* find last element */ | ||
211 | list = next; | ||
212 | fixjump(fs, list, l2); | ||
213 | } | ||
214 | } | ||
215 | |||
216 | static void patchlistaux(FuncState *fs, BCPos list, BCPos vtarget, | ||
217 | BCReg reg, BCPos dtarget) | ||
218 | { | ||
219 | while (list != NO_JMP) { | ||
220 | BCPos next = getjump(fs, list); | ||
221 | if (patchtestreg(fs, list, reg)) | ||
222 | fixjump(fs, list, vtarget); | ||
223 | else | ||
224 | fixjump(fs, list, dtarget); /* jump to default target */ | ||
225 | list = next; | ||
226 | } | ||
227 | } | ||
228 | |||
229 | static void patchtohere(FuncState *fs, BCPos list) | ||
230 | { | ||
231 | fs->lasttarget = fs->pc; | ||
232 | concatjumps(fs, &fs->jpc, list); | ||
233 | } | ||
234 | |||
235 | static void patchlist(FuncState *fs, BCPos list, BCPos target) | ||
236 | { | ||
237 | if (target == fs->pc) { | ||
238 | patchtohere(fs, list); | ||
239 | } else { | ||
240 | lua_assert(target < fs->pc); | ||
241 | patchlistaux(fs, list, target, NO_REG, target); | ||
242 | } | ||
243 | } | ||
244 | |||
245 | /* -- Code emitter: instructions ------------------------------------------ */ | ||
246 | |||
247 | static BCPos emitINS(FuncState *fs, BCIns i) | ||
248 | { | ||
249 | GCproto *pt; | ||
250 | patchlistaux(fs, fs->jpc, fs->pc, NO_REG, fs->pc); | ||
251 | fs->jpc = NO_JMP; | ||
252 | pt = fs->pt; | ||
253 | if (LJ_UNLIKELY(fs->pc >= pt->sizebc)) { | ||
254 | checklimit(fs, fs->pc, LJ_MAX_BCINS, "bytecode instructions"); | ||
255 | lj_mem_growvec(fs->L, pt->bc, pt->sizebc, LJ_MAX_BCINS, BCIns); | ||
256 | lj_mem_growvec(fs->L, pt->lineinfo, pt->sizelineinfo, LJ_MAX_BCINS, BCLine); | ||
257 | } | ||
258 | pt->bc[fs->pc] = i; | ||
259 | pt->lineinfo[fs->pc] = fs->ls->lastline; | ||
260 | return fs->pc++; | ||
261 | } | ||
262 | |||
263 | #define emitABC(fs, o, a, b, c) emitINS(fs, BCINS_ABC(o, a, b, c)) | ||
264 | #define emitAD(fs, o, a, d) emitINS(fs, BCINS_AD(o, a, d)) | ||
265 | #define emitAJ(fs, o, a, j) emitINS(fs, BCINS_AJ(o, a, j)) | ||
266 | |||
267 | #define bcptr(fs, e) (&(fs)->pt->bc[(e)->u.s.info]) | ||
268 | |||
269 | static BCPos emit_jump(FuncState *fs) | ||
270 | { | ||
271 | BCPos jpc = fs->jpc; /* save list of jumps to here */ | ||
272 | BCPos j = fs->pc - 1; | ||
273 | fs->jpc = NO_JMP; | ||
274 | if ((int32_t)j >= (int32_t)fs->lasttarget && bc_op(fs->pt->bc[j]) == BC_UCLO) | ||
275 | setbc_j(&fs->pt->bc[j], NO_JMP); | ||
276 | else | ||
277 | j = emitAJ(fs, BC_JMP, fs->freereg, NO_JMP); | ||
278 | concatjumps(fs, &j, jpc); /* keep them on hold */ | ||
279 | return j; | ||
280 | } | ||
281 | |||
282 | /* -- Code emitter: constants --------------------------------------------- */ | ||
283 | |||
284 | static BCReg numK(FuncState *fs, ExpDesc *e) | ||
285 | { | ||
286 | lua_State *L = fs->L; | ||
287 | TValue *val; | ||
288 | lua_assert(isnumK(e)); | ||
289 | val = lj_tab_set(L, fs->kt, &e->u.nval); | ||
290 | if (tvisnum(val)) | ||
291 | return val->u32.lo; | ||
292 | val->u64 = fs->nkn; | ||
293 | return fs->nkn++; | ||
294 | } | ||
295 | |||
296 | static BCReg gcK(FuncState *fs, GCobj *gc, int itype) | ||
297 | { | ||
298 | lua_State *L = fs->L; | ||
299 | TValue o, *val; | ||
300 | setgcV(L, &o, &gc->gch, itype); | ||
301 | val = lj_tab_set(L, fs->kt, &o); | ||
302 | if (tvisnum(val)) | ||
303 | return val->u32.lo; | ||
304 | val->u64 = fs->nkgc; | ||
305 | return fs->nkgc++; | ||
306 | } | ||
307 | |||
308 | static BCReg strK(FuncState *fs, ExpDesc *e) | ||
309 | { | ||
310 | lua_assert(isstrK(e) || e->k == VGLOBAL); | ||
311 | return gcK(fs, obj2gco(e->u.sval), LJ_TSTR); | ||
312 | } | ||
313 | |||
314 | GCstr *lj_parse_keepstr(LexState *ls, const char *str, size_t len) | ||
315 | { | ||
316 | lua_State *L = ls->L; | ||
317 | GCstr *s = lj_str_new(L, str, len); | ||
318 | TValue *tv = lj_tab_setstr(L, ls->fs->kt, s); | ||
319 | if (tvisnil(tv)) setboolV(tv, 1); /* Anchor string to avoid GC. */ | ||
320 | return s; | ||
321 | } | ||
322 | |||
323 | static void keep_token(LexState *ls) | ||
324 | { | ||
325 | if (ls->token == TK_name || ls->token == TK_string) { | ||
326 | TValue *tv = lj_tab_setstr(ls->L, ls->fs->kt, strV(&ls->tokenval)); | ||
327 | if (tvisnil(tv)) setboolV(tv, 1); /* Anchor string to avoid GC. */ | ||
328 | } | ||
329 | } | ||
330 | |||
331 | static void nilK(FuncState *fs, BCReg from, BCReg n) | ||
332 | { | ||
333 | BCIns *pr; | ||
334 | if (fs->pc > fs->lasttarget) { /* no jumps to current position? */ | ||
335 | BCReg pfrom, pto; | ||
336 | pr = &fs->pt->bc[fs->pc-1]; | ||
337 | pfrom = bc_a(*pr); | ||
338 | switch (bc_op(*pr)) { | ||
339 | case BC_KPRI: | ||
340 | if (bc_d(*pr) != ~LJ_TNIL) break; | ||
341 | if (from == pfrom) { | ||
342 | if (n == 1) return; | ||
343 | } else if (from == pfrom+1) { | ||
344 | from = pfrom; | ||
345 | n++; | ||
346 | } else { | ||
347 | break; | ||
348 | } | ||
349 | fs->pc--; | ||
350 | break; | ||
351 | case BC_KNIL: | ||
352 | pto = bc_d(*pr); | ||
353 | if (pfrom <= from && from <= pto+1) { /* can connect both? */ | ||
354 | if (from+n-1 > pto) | ||
355 | setbc_d(pr, from+n-1); | ||
356 | return; | ||
357 | } | ||
358 | break; | ||
359 | default: | ||
360 | break; | ||
361 | } | ||
362 | } | ||
363 | emitINS(fs, n == 1 ? BCINS_AD(BC_KPRI, from, priKk(VKNIL)) | ||
364 | : BCINS_AD(BC_KNIL, from, from+n-1)); | ||
365 | } | ||
366 | |||
367 | /* -- Code emitter: registers --------------------------------------------- */ | ||
368 | |||
369 | static void checkframe(FuncState *fs, BCReg n) | ||
370 | { | ||
371 | BCReg sz = fs->freereg + n; | ||
372 | if (sz > fs->pt->framesize) { | ||
373 | if (sz >= LJ_MAX_SLOTS) | ||
374 | err_syntax(fs->ls, LJ_ERR_XSLOTS); | ||
375 | fs->pt->framesize = cast_byte(sz); | ||
376 | } | ||
377 | } | ||
378 | |||
379 | static void reserveregs(FuncState *fs, BCReg n) | ||
380 | { | ||
381 | checkframe(fs, n); | ||
382 | fs->freereg += n; | ||
383 | } | ||
384 | |||
385 | static void freereg(FuncState *fs, BCReg reg) | ||
386 | { | ||
387 | if (reg >= fs->nactvar) { | ||
388 | fs->freereg--; | ||
389 | lua_assert(reg == fs->freereg); | ||
390 | } | ||
391 | } | ||
392 | |||
393 | static void freeexp(FuncState *fs, ExpDesc *e) | ||
394 | { | ||
395 | if (e->k == VNONRELOC) | ||
396 | freereg(fs, e->u.s.info); | ||
397 | } | ||
398 | |||
399 | /* -- Code emitter: expressions ------------------------------------------- */ | ||
400 | |||
401 | static void dischargevars(FuncState *fs, ExpDesc *e) | ||
402 | { | ||
403 | BCIns ins; | ||
404 | switch (e->k) { | ||
405 | case VUPVAL: | ||
406 | ins = BCINS_AD(BC_UGET, 0, e->u.s.info); | ||
407 | break; | ||
408 | case VGLOBAL: | ||
409 | ins = BCINS_AD(BC_GGET, 0, strK(fs, e)); | ||
410 | break; | ||
411 | case VINDEXED: { | ||
412 | /* TGET[VSB] key = reg, string const or byte const */ | ||
413 | BCReg rc = e->u.s.aux; | ||
414 | if ((int32_t)rc < 0) { | ||
415 | ins = BCINS_ABC(BC_TGETS, 0, e->u.s.info, ~rc); | ||
416 | } else if (rc > BCMAX_C) { | ||
417 | ins = BCINS_ABC(BC_TGETB, 0, e->u.s.info, rc-(BCMAX_C+1)); | ||
418 | } else { | ||
419 | freereg(fs, rc); | ||
420 | ins = BCINS_ABC(BC_TGETV, 0, e->u.s.info, rc); | ||
421 | } | ||
422 | freereg(fs, e->u.s.info); | ||
423 | break; | ||
424 | } | ||
425 | case VCALL: | ||
426 | e->u.s.info = e->u.s.aux; | ||
427 | /* fallthrough */ | ||
428 | case VLOCAL: | ||
429 | e->k = VNONRELOC; | ||
430 | /* fallthrough */ | ||
431 | default: | ||
432 | return; | ||
433 | } | ||
434 | e->u.s.info = emitINS(fs, ins); | ||
435 | e->k = VRELOCABLE; | ||
436 | } | ||
437 | |||
438 | static void discharge2reg(FuncState *fs, ExpDesc *e, BCReg reg) | ||
439 | { | ||
440 | BCIns ins; | ||
441 | dischargevars(fs, e); | ||
442 | switch (e->k) { | ||
443 | case VKNIL: case VKFALSE: case VKTRUE: | ||
444 | ins = BCINS_AD(BC_KPRI, reg, priK(e)); | ||
445 | break; | ||
446 | case VKSTR: | ||
447 | ins = BCINS_AD(BC_KSTR, reg, strK(fs, e)); | ||
448 | break; | ||
449 | case VKNUM: { | ||
450 | lua_Number n = expnumV(e); | ||
451 | int32_t k = lj_num2int(n); | ||
452 | if (checki16(k) && n == cast_num(k)) | ||
453 | ins = BCINS_AD(BC_KSHORT, reg, (BCReg)(uint16_t)k); | ||
454 | else | ||
455 | ins = BCINS_AD(BC_KNUM, reg, numK(fs, e)); | ||
456 | break; | ||
457 | } | ||
458 | case VRELOCABLE: | ||
459 | setbc_a(bcptr(fs, e), reg); | ||
460 | goto noins; | ||
461 | case VNONRELOC: | ||
462 | if (reg == e->u.s.info) | ||
463 | goto noins; | ||
464 | ins = BCINS_AD(BC_MOV, reg, e->u.s.info); | ||
465 | break; | ||
466 | default: | ||
467 | lua_assert(e->k == VVOID || e->k == VJMP); | ||
468 | return; /* nothing to do... */ | ||
469 | } | ||
470 | emitINS(fs, ins); | ||
471 | noins: | ||
472 | e->u.s.info = reg; | ||
473 | e->k = VNONRELOC; | ||
474 | } | ||
475 | |||
476 | static void exp2reg(FuncState *fs, ExpDesc *e, BCReg reg) | ||
477 | { | ||
478 | discharge2reg(fs, e, reg); | ||
479 | if (e->k == VJMP) | ||
480 | concatjumps(fs, &e->t, e->u.s.info); /* put this jump in `t' list */ | ||
481 | if (hasjumps(e)) { | ||
482 | BCPos final; /* position after whole expression */ | ||
483 | BCPos p_f = NO_JMP; /* position of an eventual LOAD false */ | ||
484 | BCPos p_t = NO_JMP; /* position of an eventual LOAD true */ | ||
485 | if (need_value(fs, e->t) || need_value(fs, e->f)) { | ||
486 | BCPos fj = (e->k == VJMP) ? NO_JMP : emit_jump(fs); | ||
487 | p_f = emitAD(fs, BC_KPRI, reg, priKk(VKFALSE)); | ||
488 | emitAJ(fs, BC_JMP, fs->freereg, 1); | ||
489 | p_t = emitAD(fs, BC_KPRI, reg, priKk(VKTRUE)); | ||
490 | patchtohere(fs, fj); | ||
491 | } | ||
492 | final = fs->pc; | ||
493 | fs->lasttarget = final; | ||
494 | patchlistaux(fs, e->f, final, reg, p_f); | ||
495 | patchlistaux(fs, e->t, final, reg, p_t); | ||
496 | } | ||
497 | e->f = e->t = NO_JMP; | ||
498 | e->u.s.info = reg; | ||
499 | e->k = VNONRELOC; | ||
500 | } | ||
501 | |||
502 | static void exp2nextreg(FuncState *fs, ExpDesc *e) | ||
503 | { | ||
504 | dischargevars(fs, e); | ||
505 | freeexp(fs, e); | ||
506 | reserveregs(fs, 1); | ||
507 | exp2reg(fs, e, fs->freereg - 1); | ||
508 | } | ||
509 | |||
510 | static BCReg exp2anyreg(FuncState *fs, ExpDesc *e) | ||
511 | { | ||
512 | dischargevars(fs, e); | ||
513 | if (e->k == VNONRELOC) { | ||
514 | if (!hasjumps(e)) return e->u.s.info; /* exp is already in a register */ | ||
515 | if (e->u.s.info >= fs->nactvar) { /* reg. is not a local? */ | ||
516 | exp2reg(fs, e, e->u.s.info); /* put value on it */ | ||
517 | return e->u.s.info; | ||
518 | } | ||
519 | } | ||
520 | exp2nextreg(fs, e); /* default */ | ||
521 | return e->u.s.info; | ||
522 | } | ||
523 | |||
524 | static void exp2val(FuncState *fs, ExpDesc *e) | ||
525 | { | ||
526 | if (hasjumps(e)) | ||
527 | exp2anyreg(fs, e); | ||
528 | else | ||
529 | dischargevars(fs, e); | ||
530 | } | ||
531 | |||
532 | static void storevar(FuncState *fs, ExpDesc *var, ExpDesc *e) | ||
533 | { | ||
534 | BCIns ins; | ||
535 | switch (var->k) { | ||
536 | case VLOCAL: | ||
537 | freeexp(fs, e); | ||
538 | exp2reg(fs, e, var->u.s.info); | ||
539 | return; | ||
540 | case VUPVAL: | ||
541 | exp2val(fs, e); | ||
542 | switch (e->k) { | ||
543 | case VKNIL: case VKFALSE: case VKTRUE: | ||
544 | ins = BCINS_AD(BC_USETP, var->u.s.info, priK(e)); | ||
545 | break; | ||
546 | case VKSTR: | ||
547 | ins = BCINS_AD(BC_USETS, var->u.s.info, strK(fs, e)); | ||
548 | break; | ||
549 | case VKNUM: | ||
550 | ins = BCINS_AD(BC_USETN, var->u.s.info, numK(fs, e)); | ||
551 | break; | ||
552 | default: | ||
553 | ins = BCINS_AD(BC_USETV, var->u.s.info, exp2anyreg(fs, e)); | ||
554 | break; | ||
555 | } | ||
556 | break; | ||
557 | case VGLOBAL: { | ||
558 | BCReg ra = exp2anyreg(fs, e); | ||
559 | ins = BCINS_AD(BC_GSET, ra, strK(fs, var)); | ||
560 | break; | ||
561 | } | ||
562 | case VINDEXED: { | ||
563 | /* TSET[VSB] key = reg, string const or byte const */ | ||
564 | BCReg ra = exp2anyreg(fs, e); | ||
565 | BCReg rc = var->u.s.aux; | ||
566 | if ((int32_t)rc < 0) { | ||
567 | ins = BCINS_ABC(BC_TSETS, ra, var->u.s.info, ~rc); | ||
568 | } else if (rc > BCMAX_C) { | ||
569 | ins = BCINS_ABC(BC_TSETB, ra, var->u.s.info, rc-(BCMAX_C+1)); | ||
570 | } else { | ||
571 | /* Free late alloced key reg to avoid assert on free of value reg. */ | ||
572 | /* This can only happen when called from constructor(). */ | ||
573 | lua_assert(e->k != VNONRELOC || ra < fs->nactvar || | ||
574 | rc < ra || (freereg(fs, rc),1)); | ||
575 | ins = BCINS_ABC(BC_TSETV, ra, var->u.s.info, rc); | ||
576 | } | ||
577 | break; | ||
578 | } | ||
579 | default: | ||
580 | lua_assert(0); /* invalid var kind to store */ | ||
581 | return; | ||
582 | } | ||
583 | emitINS(fs, ins); | ||
584 | freeexp(fs, e); | ||
585 | } | ||
586 | |||
587 | static void indexexp(FuncState *fs, ExpDesc *t, ExpDesc *e) | ||
588 | { | ||
589 | /* already called: exp2val(fs, e) */ | ||
590 | t->k = VINDEXED; | ||
591 | if (isnumK(e)) { | ||
592 | lua_Number n = expnumV(e); | ||
593 | int32_t k = lj_num2int(n); | ||
594 | if (checku8(k) && n == cast_num(k)) { | ||
595 | t->u.s.aux = BCMAX_C+1+(uint32_t)k; /* 256..511: const byte key */ | ||
596 | return; | ||
597 | } | ||
598 | } else if (isstrK(e)) { | ||
599 | BCReg idx = strK(fs, e); | ||
600 | if (idx <= BCMAX_C) { | ||
601 | t->u.s.aux = ~idx; /* -256..-1: const string key */ | ||
602 | return; | ||
603 | } | ||
604 | } | ||
605 | t->u.s.aux = exp2anyreg(fs, e); /* 0..255: register */ | ||
606 | } | ||
607 | |||
608 | static void methodexp(FuncState *fs, ExpDesc *e, ExpDesc *key) | ||
609 | { | ||
610 | BCReg idx, func, tab = exp2anyreg(fs, e); | ||
611 | freeexp(fs, e); | ||
612 | func = fs->freereg; | ||
613 | emitAD(fs, BC_MOV, func+1, tab); | ||
614 | lua_assert(isstrK(key)); | ||
615 | idx = strK(fs, key); | ||
616 | if (idx <= BCMAX_C) { | ||
617 | reserveregs(fs, 2); | ||
618 | emitABC(fs, BC_TGETS, func, tab, idx); | ||
619 | } else { | ||
620 | reserveregs(fs, 3); | ||
621 | emitAD(fs, BC_KSTR, func+2, idx); | ||
622 | emitABC(fs, BC_TGETV, func, tab, func+2); | ||
623 | fs->freereg--; | ||
624 | } | ||
625 | e->u.s.info = func; | ||
626 | e->k = VNONRELOC; | ||
627 | } | ||
628 | |||
629 | /* -- Code emitter: conditionals ------------------------------------------ */ | ||
630 | |||
631 | static void invertjump(FuncState *fs, ExpDesc *e) | ||
632 | { | ||
633 | BCIns *i = bcptr(fs, e) - 1; | ||
634 | setbc_op(i, bc_op(*i)^1); | ||
635 | } | ||
636 | |||
637 | static BCPos jumponcond(FuncState *fs, ExpDesc *e, int cond) | ||
638 | { | ||
639 | if (e->k == VRELOCABLE) { | ||
640 | BCIns *i = bcptr(fs, e); | ||
641 | if (bc_op(*i) == BC_NOT) { | ||
642 | *i = BCINS_AD(cond ? BC_ISF : BC_IST, 0, bc_d(*i)); | ||
643 | return emit_jump(fs); | ||
644 | } | ||
645 | /* else go through */ | ||
646 | } | ||
647 | if (e->k != VNONRELOC) { | ||
648 | reserveregs(fs, 1); | ||
649 | discharge2reg(fs, e, fs->freereg-1); | ||
650 | } | ||
651 | freeexp(fs, e); | ||
652 | emitAD(fs, cond ? BC_ISTC : BC_ISFC, NO_REG, e->u.s.info); | ||
653 | return emit_jump(fs); | ||
654 | } | ||
655 | |||
656 | static void goiftrue(FuncState *fs, ExpDesc *e) | ||
657 | { | ||
658 | BCPos pc; /* PC of last jump. */ | ||
659 | dischargevars(fs, e); | ||
660 | switch (e->k) { | ||
661 | case VKSTR: case VKNUM: case VKTRUE: | ||
662 | pc = NO_JMP; /* always true; do nothing */ | ||
663 | break; | ||
664 | case VJMP: | ||
665 | invertjump(fs, e); | ||
666 | pc = e->u.s.info; | ||
667 | break; | ||
668 | case VKFALSE: | ||
669 | if (!hasjumps(e)) { | ||
670 | pc = emit_jump(fs); /* always jump */ | ||
671 | break; | ||
672 | } | ||
673 | /* fallthrough */ | ||
674 | default: | ||
675 | pc = jumponcond(fs, e, 0); | ||
676 | break; | ||
677 | } | ||
678 | concatjumps(fs, &e->f, pc); /* insert last jump in `f' list */ | ||
679 | patchtohere(fs, e->t); | ||
680 | e->t = NO_JMP; | ||
681 | } | ||
682 | |||
683 | static void goiffalse(FuncState *fs, ExpDesc *e) | ||
684 | { | ||
685 | BCPos pc; /* PC of last jump. */ | ||
686 | dischargevars(fs, e); | ||
687 | switch (e->k) { | ||
688 | case VKNIL: case VKFALSE: | ||
689 | pc = NO_JMP; /* always false; do nothing */ | ||
690 | break; | ||
691 | case VJMP: | ||
692 | pc = e->u.s.info; | ||
693 | break; | ||
694 | case VKTRUE: | ||
695 | if (!hasjumps(e)) { | ||
696 | pc = emit_jump(fs); /* always jump */ | ||
697 | break; | ||
698 | } | ||
699 | /* fallthrough */ | ||
700 | default: | ||
701 | pc = jumponcond(fs, e, 1); | ||
702 | break; | ||
703 | } | ||
704 | concatjumps(fs, &e->t, pc); /* insert last jump in `t' list */ | ||
705 | patchtohere(fs, e->f); | ||
706 | e->f = NO_JMP; | ||
707 | } | ||
708 | |||
709 | /* -- Code emitter: operators --------------------------------------------- */ | ||
710 | |||
711 | static int foldarith(BinOpr opr, ExpDesc *e1, ExpDesc *e2) | ||
712 | { | ||
713 | TValue o; | ||
714 | if (!isnumKexp(e1) || !isnumKexp(e2)) return 0; | ||
715 | setnumV(&o, lj_vm_foldarith(expnumV(e1), expnumV(e2), (int)opr-OPR_ADD)); | ||
716 | if (tvisnan(&o) || tvismzero(&o)) return 0; /* Avoid NaN and -0 as consts. */ | ||
717 | setnumV(&e1->u.nval, numV(&o)); | ||
718 | return 1; | ||
719 | } | ||
720 | |||
721 | static void codearith(FuncState *fs, BinOpr opr, ExpDesc *e1, ExpDesc *e2) | ||
722 | { | ||
723 | BCReg rb, rc, t; | ||
724 | uint32_t op; | ||
725 | if (foldarith(opr, e1, e2)) | ||
726 | return; | ||
727 | if (opr == OPR_POW) { | ||
728 | op = BC_POW; | ||
729 | rc = exp2anyreg(fs, e2); | ||
730 | rb = exp2anyreg(fs, e1); | ||
731 | } else { | ||
732 | op = opr-OPR_ADD+BC_ADDVV; | ||
733 | /* must discharge 2nd operand first since VINDEXED might free regs */ | ||
734 | exp2val(fs, e2); | ||
735 | if (isnumK(e2) && (rc = numK(fs, e2)) <= BCMAX_C) | ||
736 | op -= BC_ADDVV-BC_ADDVN; | ||
737 | else | ||
738 | rc = exp2anyreg(fs, e2); | ||
739 | /* emit_prebinop discharges 1st operand, but may need to use KNUM/KSHORT */ | ||
740 | lua_assert(isnumK(e1) || e1->k == VNONRELOC); | ||
741 | exp2val(fs, e1); | ||
742 | /* avoid two consts to satisfy bytecode constraints */ | ||
743 | if (isnumK(e1) && !isnumK(e2) && (t = numK(fs, e1)) <= BCMAX_B) { | ||
744 | rb = rc; rc = t; op -= BC_ADDVV-BC_ADDNV; | ||
745 | } else { | ||
746 | rb = exp2anyreg(fs, e1); | ||
747 | } | ||
748 | } | ||
749 | /* using freeexp might cause asserts if the order is wrong */ | ||
750 | if (e1->k == VNONRELOC && e1->u.s.info >= fs->nactvar) fs->freereg--; | ||
751 | if (e2->k == VNONRELOC && e2->u.s.info >= fs->nactvar) fs->freereg--; | ||
752 | e1->u.s.info = emitABC(fs, op, 0, rb, rc); | ||
753 | e1->k = VRELOCABLE; | ||
754 | } | ||
755 | |||
756 | static void codecomp(FuncState *fs, BinOpr opr, ExpDesc *e1, ExpDesc *e2) | ||
757 | { | ||
758 | ExpDesc *eret = e1; | ||
759 | BCIns ins; | ||
760 | exp2val(fs, e1); | ||
761 | if (opr == OPR_EQ || opr == OPR_NE) { | ||
762 | BCOp op = opr == OPR_EQ ? BC_ISEQV : BC_ISNEV; | ||
763 | BCReg ra; | ||
764 | if (isK(e1)) { e1 = e2; e2 = eret; } /* need constant in 2nd arg */ | ||
765 | ra = exp2anyreg(fs, e1); /* first arg must be in a reg */ | ||
766 | exp2val(fs, e2); | ||
767 | switch (e2->k) { | ||
768 | case VKNIL: case VKFALSE: case VKTRUE: | ||
769 | ins = BCINS_AD(op+(BC_ISEQP-BC_ISEQV), ra, priK(e2)); | ||
770 | break; | ||
771 | case VKSTR: | ||
772 | ins = BCINS_AD(op+(BC_ISEQS-BC_ISEQV), ra, strK(fs, e2)); | ||
773 | break; | ||
774 | case VKNUM: | ||
775 | ins = BCINS_AD(op+(BC_ISEQN-BC_ISEQV), ra, numK(fs, e2)); | ||
776 | break; | ||
777 | default: | ||
778 | ins = BCINS_AD(op, ra, exp2anyreg(fs, e2)); | ||
779 | break; | ||
780 | } | ||
781 | } else { | ||
782 | uint32_t op = opr-OPR_LT+BC_ISLT; | ||
783 | BCReg ra; | ||
784 | if ((op-BC_ISLT) & 1) { /* GT -> LT, GE -> LE */ | ||
785 | e1 = e2; e2 = eret; /* swap operands */ | ||
786 | op = ((op-BC_ISLT)^3)+BC_ISLT; | ||
787 | } | ||
788 | ra = exp2anyreg(fs, e1); | ||
789 | ins = BCINS_AD(op, ra, exp2anyreg(fs, e2)); | ||
790 | } | ||
791 | /* using freeexp might cause asserts if the order is wrong */ | ||
792 | if (e1->k == VNONRELOC && e1->u.s.info >= fs->nactvar) fs->freereg--; | ||
793 | if (e2->k == VNONRELOC && e2->u.s.info >= fs->nactvar) fs->freereg--; | ||
794 | emitINS(fs, ins); | ||
795 | eret->u.s.info = emit_jump(fs); | ||
796 | eret->k = VJMP; | ||
797 | } | ||
798 | |||
799 | static void emit_unop(FuncState *fs, UnOpr uop, ExpDesc *e) | ||
800 | { | ||
801 | BCOp op = BC_LEN; | ||
802 | switch (uop) { | ||
803 | case OPR_MINUS: | ||
804 | if (isnumKexp(e) && expnumV(e) != 0) { /* Avoid const-folding to -0. */ | ||
805 | setnumV(&e->u.nval, -expnumV(e)); | ||
806 | return; | ||
807 | } | ||
808 | op = BC_UNM; | ||
809 | /* fallthrough */ | ||
810 | case OPR_LEN: | ||
811 | exp2anyreg(fs, e); | ||
812 | break; | ||
813 | case OPR_NOT: | ||
814 | /* interchange true and false lists */ | ||
815 | { BCPos temp = e->f; e->f = e->t; e->t = temp; } | ||
816 | removevalues(fs, e->f); | ||
817 | removevalues(fs, e->t); | ||
818 | dischargevars(fs, e); | ||
819 | switch (e->k) { | ||
820 | case VKNIL: case VKFALSE: | ||
821 | e->k = VKTRUE; | ||
822 | return; | ||
823 | case VKSTR: case VKNUM: case VKTRUE: | ||
824 | e->k = VKFALSE; | ||
825 | return; | ||
826 | case VJMP: | ||
827 | invertjump(fs, e); | ||
828 | return; | ||
829 | case VRELOCABLE: | ||
830 | reserveregs(fs, 1); | ||
831 | setbc_a(bcptr(fs, e), fs->freereg-1); | ||
832 | e->u.s.info = fs->freereg-1; | ||
833 | e->k = VNONRELOC; | ||
834 | break; | ||
835 | case VNONRELOC: | ||
836 | break; | ||
837 | default: lua_assert(0); return; | ||
838 | } | ||
839 | op = BC_NOT; | ||
840 | break; | ||
841 | default: lua_assert(0); return; | ||
842 | } | ||
843 | freeexp(fs, e); | ||
844 | e->u.s.info = emitAD(fs, op, 0, e->u.s.info); | ||
845 | e->k = VRELOCABLE; | ||
846 | } | ||
847 | |||
848 | static void prepare_binop(FuncState *fs, BinOpr op, ExpDesc *e) | ||
849 | { | ||
850 | switch (op) { | ||
851 | case OPR_AND: | ||
852 | goiftrue(fs, e); | ||
853 | break; | ||
854 | case OPR_OR: | ||
855 | goiffalse(fs, e); | ||
856 | break; | ||
857 | case OPR_CONCAT: | ||
858 | exp2nextreg(fs, e); /* operand must be on the `stack' */ | ||
859 | break; | ||
860 | case OPR_EQ: case OPR_NE: | ||
861 | if (!isKexp(e)) exp2anyreg(fs, e); | ||
862 | break; | ||
863 | default: | ||
864 | if (!isnumKexp(e)) exp2anyreg(fs, e); | ||
865 | break; | ||
866 | } | ||
867 | } | ||
868 | |||
869 | static void emit_binop(FuncState *fs, BinOpr op, ExpDesc *e1, ExpDesc *e2) | ||
870 | { | ||
871 | switch (op) { | ||
872 | case OPR_AND: | ||
873 | lua_assert(e1->t == NO_JMP); /* list must be closed */ | ||
874 | dischargevars(fs, e2); | ||
875 | concatjumps(fs, &e2->f, e1->f); | ||
876 | *e1 = *e2; | ||
877 | break; | ||
878 | case OPR_OR: | ||
879 | lua_assert(e1->f == NO_JMP); /* list must be closed */ | ||
880 | dischargevars(fs, e2); | ||
881 | concatjumps(fs, &e2->t, e1->t); | ||
882 | *e1 = *e2; | ||
883 | break; | ||
884 | case OPR_CONCAT: | ||
885 | exp2val(fs, e2); | ||
886 | if (e2->k == VRELOCABLE && bc_op(*bcptr(fs, e2)) == BC_CAT) { | ||
887 | lua_assert(e1->u.s.info == bc_b(*bcptr(fs, e2))-1); | ||
888 | freeexp(fs, e1); | ||
889 | setbc_b(bcptr(fs, e2), e1->u.s.info); | ||
890 | e1->u.s.info = e2->u.s.info; | ||
891 | } else { | ||
892 | exp2nextreg(fs, e2); | ||
893 | freeexp(fs, e2); | ||
894 | freeexp(fs, e1); | ||
895 | e1->u.s.info = emitABC(fs, BC_CAT, 0, e1->u.s.info, e2->u.s.info); | ||
896 | } | ||
897 | e1->k = VRELOCABLE; | ||
898 | break; | ||
899 | case OPR_ADD: case OPR_SUB: case OPR_MUL: | ||
900 | case OPR_DIV: case OPR_MOD: case OPR_POW: | ||
901 | codearith(fs, op, e1, e2); | ||
902 | break; | ||
903 | case OPR_EQ: case OPR_NE: | ||
904 | case OPR_LT: case OPR_LE: case OPR_GT: case OPR_GE: | ||
905 | codecomp(fs, op, e1, e2); | ||
906 | break; | ||
907 | default: lua_assert(0); break; | ||
908 | } | ||
909 | } | ||
910 | |||
911 | /* -- Lexer support ------------------------------------------------------- */ | ||
912 | |||
913 | static int testnext(LexState *ls, LexToken tok) | ||
914 | { | ||
915 | if (ls->token == tok) { | ||
916 | lj_lex_next(ls); | ||
917 | return 1; | ||
918 | } | ||
919 | return 0; | ||
920 | } | ||
921 | |||
922 | static void checknext(LexState *ls, LexToken tok) | ||
923 | { | ||
924 | if (ls->token != tok) | ||
925 | err_token(ls, tok); | ||
926 | lj_lex_next(ls); | ||
927 | } | ||
928 | |||
929 | static void checkmatch(LexState *ls, LexToken what, LexToken who, BCLine line) | ||
930 | { | ||
931 | if (!testnext(ls, what)) { | ||
932 | if (line == ls->linenumber) { | ||
933 | err_token(ls, what); | ||
934 | } else { | ||
935 | const char *swhat = lj_lex_token2str(ls, what); | ||
936 | const char *swho = lj_lex_token2str(ls, who); | ||
937 | lj_lex_error(ls, ls->token, LJ_ERR_XMATCH, swhat, swho, line); | ||
938 | } | ||
939 | } | ||
940 | } | ||
941 | |||
942 | static GCstr *str_checkname(LexState *ls) | ||
943 | { | ||
944 | GCstr *s; | ||
945 | if (ls->token != TK_name) | ||
946 | err_token(ls, TK_name); | ||
947 | s = strV(&ls->tokenval); | ||
948 | lj_lex_next(ls); | ||
949 | return s; | ||
950 | } | ||
951 | |||
952 | static void init_exp(ExpDesc *e, ExpKind k, uint32_t info) | ||
953 | { | ||
954 | e->k = k; | ||
955 | e->u.s.info = info; | ||
956 | e->f = e->t = NO_JMP; | ||
957 | } | ||
958 | |||
959 | static void checkname(LexState *ls, ExpDesc *e) | ||
960 | { | ||
961 | init_exp(e, VKSTR, 0); | ||
962 | e->u.sval = str_checkname(ls); | ||
963 | } | ||
964 | |||
965 | /* -- Variable handling --------------------------------------------------- */ | ||
966 | |||
967 | #define getlocvar(fs, i) ((fs)->pt->varinfo[(fs)->actvar[(i)]]) | ||
968 | |||
969 | static BCReg registerlocalvar(LexState *ls, GCstr *name) | ||
970 | { | ||
971 | FuncState *fs = ls->fs; | ||
972 | GCproto *pt = fs->pt; | ||
973 | if (LJ_UNLIKELY(fs->nlocvars >= pt->sizevarinfo)) { | ||
974 | MSize oldsize = pt->sizevarinfo; | ||
975 | checklimit(fs, fs->nlocvars, 32767, "local variables"); | ||
976 | lj_mem_growvec(fs->L, pt->varinfo, pt->sizevarinfo, 32767, VarInfo); | ||
977 | while (oldsize < pt->sizevarinfo) pt->varinfo[oldsize++].name = NULL; | ||
978 | } | ||
979 | pt->varinfo[fs->nlocvars].name = name; | ||
980 | lj_gc_objbarrier(ls->L, pt, name); | ||
981 | return fs->nlocvars++; | ||
982 | } | ||
983 | |||
984 | static void new_localvar(LexState *ls, GCstr *name, BCReg n) | ||
985 | { | ||
986 | FuncState *fs = ls->fs; | ||
987 | checklimit(fs, fs->nactvar+n, LJ_MAX_LOCVAR, "local variables"); | ||
988 | fs->actvar[fs->nactvar+n] = cast(uint16_t, registerlocalvar(ls, name)); | ||
989 | } | ||
990 | |||
991 | #define new_localvarliteral(ls,v,n) \ | ||
992 | new_localvar(ls, lj_parse_keepstr(ls, "" v, sizeof(v)-1), n) | ||
993 | |||
994 | static void adjustlocalvars(LexState *ls, BCReg nvars) | ||
995 | { | ||
996 | FuncState *fs = ls->fs; | ||
997 | fs->nactvar = cast_byte(fs->nactvar + nvars); | ||
998 | for (; nvars; nvars--) | ||
999 | getlocvar(fs, fs->nactvar - nvars).startpc = fs->pc; | ||
1000 | } | ||
1001 | |||
1002 | static void removevars(LexState *ls, BCReg tolevel) | ||
1003 | { | ||
1004 | FuncState *fs = ls->fs; | ||
1005 | while (fs->nactvar > tolevel) | ||
1006 | getlocvar(fs, --fs->nactvar).endpc = fs->pc; | ||
1007 | } | ||
1008 | |||
1009 | static uint32_t indexupvalue(FuncState *fs, GCstr *name, ExpDesc *v) | ||
1010 | { | ||
1011 | uint32_t i; | ||
1012 | GCproto *pt = fs->pt; | ||
1013 | for (i = 0; i < fs->nuv; i++) { | ||
1014 | if (fs->upvalues[i].k == v->k && fs->upvalues[i].info == v->u.s.info) { | ||
1015 | lua_assert(pt->uvname[i] == name); | ||
1016 | return i; | ||
1017 | } | ||
1018 | } | ||
1019 | /* Not found, create a new upvalue for this name. */ | ||
1020 | if (LJ_UNLIKELY(fs->nuv >= pt->sizeuvname)) { | ||
1021 | MSize oldsize = pt->sizeuvname; | ||
1022 | checklimit(fs, fs->nuv, LJ_MAX_UPVAL, "upvalues"); | ||
1023 | lj_mem_growvec(fs->L, pt->uvname, pt->sizeuvname, LJ_MAX_UPVAL, GCstr *); | ||
1024 | while (oldsize < pt->sizeuvname) pt->uvname[oldsize++] = NULL; | ||
1025 | } | ||
1026 | pt->uvname[fs->nuv] = name; | ||
1027 | lj_gc_objbarrier(fs->L, pt, name); | ||
1028 | lua_assert(v->k == VLOCAL || v->k == VUPVAL); | ||
1029 | fs->upvalues[fs->nuv].k = cast_byte(v->k); | ||
1030 | fs->upvalues[fs->nuv].info = cast_byte(v->u.s.info); | ||
1031 | return fs->nuv++; | ||
1032 | } | ||
1033 | |||
1034 | static BCReg searchvar(FuncState *fs, GCstr *n) | ||
1035 | { | ||
1036 | int i; | ||
1037 | for (i = fs->nactvar-1; i >= 0; i--) { | ||
1038 | if (n == getlocvar(fs, i).name) | ||
1039 | return (BCReg)i; | ||
1040 | } | ||
1041 | return (BCReg)-1; /* Not found. */ | ||
1042 | } | ||
1043 | |||
1044 | static void markupval(FuncState *fs, BCReg level) | ||
1045 | { | ||
1046 | FuncBlock *bl = fs->bl; | ||
1047 | while (bl && bl->nactvar > level) bl = bl->previous; | ||
1048 | if (bl) bl->upval = 1; | ||
1049 | } | ||
1050 | |||
1051 | static int singlevaraux(FuncState *fs, GCstr *name, ExpDesc *e, int first) | ||
1052 | { | ||
1053 | if (fs == NULL) { /* no more levels? */ | ||
1054 | init_exp(e, VGLOBAL, 0); /* default is global variable */ | ||
1055 | e->u.sval = name; | ||
1056 | return 1; | ||
1057 | } else { | ||
1058 | BCReg reg = searchvar(fs, name); /* look up at current level */ | ||
1059 | if ((int32_t)reg >= 0) { | ||
1060 | init_exp(e, VLOCAL, reg); | ||
1061 | if (!first) | ||
1062 | markupval(fs, reg); /* local will be used as an upval */ | ||
1063 | return 0; | ||
1064 | } else { /* not found at current level; try upper one */ | ||
1065 | if (singlevaraux(fs->prev, name, e, 0)) /* global? */ | ||
1066 | return 1; | ||
1067 | e->u.s.info = indexupvalue(fs, name, e); /* else was local or upvalue */ | ||
1068 | e->k = VUPVAL; /* upvalue in this level */ | ||
1069 | return 0; | ||
1070 | } | ||
1071 | } | ||
1072 | } | ||
1073 | |||
1074 | #define singlevar(ls, e) singlevaraux((ls)->fs, str_checkname(ls), (e), 1) | ||
1075 | |||
1076 | static void adjust_assign(LexState *ls, BCReg nvars, BCReg nexps, ExpDesc *e) | ||
1077 | { | ||
1078 | FuncState *fs = ls->fs; | ||
1079 | int32_t extra = (int32_t)nvars - (int32_t)nexps; | ||
1080 | if (e->k == VCALL) { | ||
1081 | extra++; /* includes call itself */ | ||
1082 | if (extra < 0) extra = 0; | ||
1083 | setbc_b(bcptr(fs, e), extra+1); | ||
1084 | if (extra > 1) reserveregs(fs, (BCReg)extra-1); | ||
1085 | } else { | ||
1086 | if (e->k != VVOID) exp2nextreg(fs, e); /* close last expression */ | ||
1087 | if (extra > 0) { | ||
1088 | BCReg reg = fs->freereg; | ||
1089 | reserveregs(fs, (BCReg)extra); | ||
1090 | nilK(fs, reg, (BCReg)extra); | ||
1091 | } | ||
1092 | } | ||
1093 | } | ||
1094 | |||
1095 | /* -- Function handling --------------------------------------------------- */ | ||
1096 | |||
1097 | /* Forward declaration. */ | ||
1098 | static void chunk(LexState *ls); | ||
1099 | |||
1100 | static void open_func(LexState *ls, FuncState *fs) | ||
1101 | { | ||
1102 | lua_State *L = ls->L; | ||
1103 | GCproto *pt = lj_func_newproto(L); | ||
1104 | fs->pt = pt; | ||
1105 | fs->prev = ls->fs; /* linked list of funcstates */ | ||
1106 | fs->ls = ls; | ||
1107 | fs->L = L; | ||
1108 | ls->fs = fs; | ||
1109 | fs->pc = 0; | ||
1110 | fs->lasttarget = 0; | ||
1111 | fs->jpc = NO_JMP; | ||
1112 | fs->freereg = 0; | ||
1113 | fs->nkgc = 0; | ||
1114 | fs->nkn = 0; | ||
1115 | fs->nlocvars = 0; | ||
1116 | fs->nactvar = 0; | ||
1117 | fs->nuv = 0; | ||
1118 | fs->bl = NULL; | ||
1119 | pt->chunkname = ls->chunkname; | ||
1120 | pt->framesize = 2; /* registers 0/1 are always valid */ | ||
1121 | fs->kt = lj_tab_new(L, 0, 0); | ||
1122 | /* anchor table of constants and prototype (to avoid being collected) */ | ||
1123 | settabV(L, L->top, fs->kt); | ||
1124 | incr_top(L); | ||
1125 | setprotoV(L, L->top, pt); | ||
1126 | incr_top(L); | ||
1127 | } | ||
1128 | |||
1129 | static void collectk(FuncState *fs, GCproto *pt) | ||
1130 | { | ||
1131 | GCtab *kt; | ||
1132 | TValue *array; | ||
1133 | Node *node; | ||
1134 | BCReg nkgc; | ||
1135 | MSize i, hmask, sizek; | ||
1136 | GCRef *kstart; | ||
1137 | checklimitgt(fs, fs->nkn, BCMAX_D+1, "constants"); | ||
1138 | checklimitgt(fs, fs->nkgc, BCMAX_D+1, "constants"); | ||
1139 | nkgc = round_nkgc(fs->nkgc); | ||
1140 | sizek = (MSize)(nkgc*sizeof(MRef) + fs->nkn*sizeof(lua_Number)); | ||
1141 | kstart = lj_mem_newt(fs->L, sizek, GCRef); | ||
1142 | if (nkgc) setgcrefnull(kstart[0]); /* May be uninitialized otherwise. */ | ||
1143 | pt->k.gc = kstart + nkgc; | ||
1144 | pt->sizekn = fs->nkn; | ||
1145 | pt->sizekgc = fs->nkgc; | ||
1146 | kt = fs->kt; | ||
1147 | array = tvref(kt->array); | ||
1148 | for (i = 0; i < kt->asize; i++) | ||
1149 | if (tvisnum(&array[i])) | ||
1150 | pt->k.n[array[i].u32.lo] = cast_num(i); | ||
1151 | node = noderef(kt->node); | ||
1152 | hmask = kt->hmask; | ||
1153 | for (i = 0; i <= hmask; i++) { | ||
1154 | Node *n = &node[i]; | ||
1155 | if (tvisnum(&n->val)) { | ||
1156 | ptrdiff_t kidx = (ptrdiff_t)n->val.u32.lo; | ||
1157 | if (tvisnum(&n->key)) { | ||
1158 | pt->k.n[kidx] = numV(&n->key); | ||
1159 | } else { | ||
1160 | GCobj *o = gcV(&n->key); | ||
1161 | setgcref(pt->k.gc[~kidx], o); | ||
1162 | lj_gc_objbarrier(fs->L, pt, o); | ||
1163 | } | ||
1164 | } | ||
1165 | } | ||
1166 | } | ||
1167 | |||
1168 | static void collectuv(FuncState *fs, GCproto *pt) | ||
1169 | { | ||
1170 | uint32_t i; | ||
1171 | pt->uv = lj_mem_newvec(fs->L, fs->nuv, int16_t); | ||
1172 | pt->sizeuv = fs->nuv; | ||
1173 | for (i = 0; i < pt->sizeuv; i++) { | ||
1174 | uint32_t v = fs->upvalues[i].info; | ||
1175 | if (fs->upvalues[i].k == VUPVAL) v = ~v; | ||
1176 | pt->uv[i] = (int16_t)v; | ||
1177 | } | ||
1178 | } | ||
1179 | |||
1180 | static void finalret(FuncState *fs, GCproto *pt) | ||
1181 | { | ||
1182 | BCPos lastpc = fs->pc; | ||
1183 | if (lastpc > fs->lasttarget) { | ||
1184 | switch (bc_op(pt->bc[lastpc-1])) { | ||
1185 | case BC_CALLMT: case BC_CALLT: | ||
1186 | case BC_RETM: case BC_RET: case BC_RET0: case BC_RET1: | ||
1187 | goto suppress_return; /* already got a return */ | ||
1188 | default: break; | ||
1189 | } | ||
1190 | } | ||
1191 | if (fs->pt->flags & PROTO_HAS_FNEW) | ||
1192 | emitAJ(fs, BC_UCLO, 0, 0); | ||
1193 | emitAD(fs, BC_RET0, 0, 1); /* final return */ | ||
1194 | suppress_return: | ||
1195 | /* may need to fixup returns encoded before first function was created */ | ||
1196 | if (fs->pt->flags & PROTO_FIXUP_RETURN) { | ||
1197 | BCPos pc; | ||
1198 | for (pc = 0; pc < lastpc; pc++) { | ||
1199 | BCIns i = pt->bc[pc]; | ||
1200 | BCPos offset; | ||
1201 | switch (bc_op(i)) { | ||
1202 | case BC_CALLMT: case BC_CALLT: | ||
1203 | case BC_RETM: case BC_RET: case BC_RET0: case BC_RET1: | ||
1204 | offset = emitINS(fs, i)-(pc+1)+BCBIAS_J; /* copy return ins */ | ||
1205 | if (offset > BCMAX_D) | ||
1206 | err_syntax(fs->ls, LJ_ERR_XFIXUP); | ||
1207 | pt->bc[pc] = BCINS_AD(BC_UCLO, 0, offset); /* replace w/ UCLO+branch */ | ||
1208 | break; | ||
1209 | case BC_UCLO: return; /* we're done */ | ||
1210 | default: break; | ||
1211 | } | ||
1212 | } | ||
1213 | } | ||
1214 | } | ||
1215 | |||
1216 | static void close_func(LexState *ls) | ||
1217 | { | ||
1218 | lua_State *L = ls->L; | ||
1219 | FuncState *fs = ls->fs; | ||
1220 | GCproto *pt = fs->pt; | ||
1221 | removevars(ls, 0); | ||
1222 | finalret(fs, pt); | ||
1223 | lj_mem_reallocvec(L, pt->bc, pt->sizebc, fs->pc, BCIns); | ||
1224 | pt->sizebc = fs->pc; | ||
1225 | collectk(fs, pt); | ||
1226 | collectuv(fs, pt); | ||
1227 | lj_mem_reallocvec(L, pt->lineinfo, pt->sizelineinfo, fs->pc, BCLine); | ||
1228 | pt->sizelineinfo = fs->pc; | ||
1229 | lj_mem_reallocvec(L, pt->varinfo, pt->sizevarinfo, fs->nlocvars, VarInfo); | ||
1230 | pt->sizevarinfo = fs->nlocvars; | ||
1231 | lj_mem_reallocvec(L, pt->uvname, pt->sizeuvname, fs->nuv, GCstr *); | ||
1232 | pt->sizeuvname = fs->nuv; | ||
1233 | lua_assert(fs->bl == NULL); | ||
1234 | lj_vmevent_send(L, BC, | ||
1235 | setprotoV(L, L->top++, pt); | ||
1236 | ); | ||
1237 | ls->fs = fs->prev; | ||
1238 | L->top -= 2; /* Remove table and prototype from the stack. */ | ||
1239 | lua_assert(ls->fs != NULL || ls->token == TK_eof); | ||
1240 | keep_token(ls); /* Re-anchor last token. */ | ||
1241 | } | ||
1242 | |||
1243 | GCproto *lj_parse(LexState *ls) | ||
1244 | { | ||
1245 | struct FuncState fs; | ||
1246 | ls->level = 0; | ||
1247 | open_func(ls, &fs); | ||
1248 | fs.pt->flags |= PROTO_IS_VARARG; /* Main chunk is always a vararg func. */ | ||
1249 | lj_lex_next(ls); /* Read-ahead first token. */ | ||
1250 | chunk(ls); | ||
1251 | if (ls->token != TK_eof) | ||
1252 | err_token(ls, TK_eof); | ||
1253 | fs.pt->lastlinedefined = ls->linenumber; | ||
1254 | close_func(ls); | ||
1255 | lua_assert(fs.prev == NULL); | ||
1256 | lua_assert(fs.pt->sizeuv == 0); | ||
1257 | lua_assert(ls->fs == NULL); | ||
1258 | return fs.pt; | ||
1259 | } | ||
1260 | |||
1261 | /* -- Expressions --------------------------------------------------------- */ | ||
1262 | |||
1263 | /* forward declaration */ | ||
1264 | static void expr(LexState *ls, ExpDesc *v); | ||
1265 | |||
1266 | static void field(LexState *ls, ExpDesc *v) | ||
1267 | { | ||
1268 | /* field -> ['.' | ':'] NAME */ | ||
1269 | FuncState *fs = ls->fs; | ||
1270 | ExpDesc key; | ||
1271 | exp2anyreg(fs, v); | ||
1272 | lj_lex_next(ls); /* skip the dot or colon */ | ||
1273 | checkname(ls, &key); | ||
1274 | indexexp(fs, v, &key); | ||
1275 | } | ||
1276 | |||
1277 | static void yindex(LexState *ls, ExpDesc *v) | ||
1278 | { | ||
1279 | /* index -> '[' expr ']' */ | ||
1280 | lj_lex_next(ls); /* skip the '[' */ | ||
1281 | expr(ls, v); | ||
1282 | exp2val(ls->fs, v); | ||
1283 | checknext(ls, ']'); | ||
1284 | } | ||
1285 | |||
1286 | static void kexp2tv(TValue *v, ExpDesc *e) | ||
1287 | { | ||
1288 | switch (e->k) { | ||
1289 | case VKNIL: case VKFALSE: case VKTRUE: v->it = ~(int32_t)e->k; break; | ||
1290 | case VKSTR: | ||
1291 | setgcref(v->gcr, obj2gco(e->u.sval)); v->it = LJ_TSTR; break; | ||
1292 | case VKNUM: setnumV(v, expnumV(e)); break; | ||
1293 | default: lua_assert(0); break; | ||
1294 | } | ||
1295 | } | ||
1296 | |||
1297 | static void constructor(LexState *ls, ExpDesc *e) | ||
1298 | { | ||
1299 | FuncState *fs = ls->fs; | ||
1300 | BCLine line = ls->linenumber; | ||
1301 | GCtab *t = NULL; | ||
1302 | int vcall = 0, needarr = 0; | ||
1303 | int32_t narr = 1; /* first array index */ | ||
1304 | uint32_t nhash = 0; /* number of hash entries */ | ||
1305 | BCReg freg = fs->freereg; | ||
1306 | BCPos pc = emitAD(fs, BC_TNEW, freg, 0); | ||
1307 | init_exp(e, VNONRELOC, freg); | ||
1308 | reserveregs(fs, 1); | ||
1309 | freg++; | ||
1310 | checknext(ls, '{'); | ||
1311 | while (ls->token != '}') { | ||
1312 | ExpDesc key, val; | ||
1313 | vcall = 0; | ||
1314 | if (ls->token == '[') { | ||
1315 | yindex(ls, &key); /* already calls exp2val */ | ||
1316 | if (!isK(&key)) indexexp(fs, e, &key); | ||
1317 | if (isnumK(&key) && expnumV(&key) == 0) needarr = 1; else nhash++; | ||
1318 | checknext(ls, '='); | ||
1319 | } else if (ls->token == TK_name && lj_lex_lookahead(ls) == '=') { | ||
1320 | checkname(ls, &key); | ||
1321 | checknext(ls, '='); | ||
1322 | nhash++; | ||
1323 | } else { | ||
1324 | init_exp(&key, VKNUM, 0); | ||
1325 | setintV(&key.u.nval, narr); | ||
1326 | narr++; | ||
1327 | needarr = vcall = 1; | ||
1328 | } | ||
1329 | expr(ls, &val); | ||
1330 | if (isKexp(&val) && isK(&key) && key.k != VKNIL) { | ||
1331 | TValue k; | ||
1332 | if (!t) { /* create template table on demand */ | ||
1333 | BCReg kidx; | ||
1334 | t = lj_tab_new(fs->L, 0, 0); | ||
1335 | kidx = gcK(fs, obj2gco(t), LJ_TTAB); | ||
1336 | fs->pt->bc[pc] = BCINS_AD(BC_TDUP, freg-1, kidx); | ||
1337 | } | ||
1338 | vcall = 0; | ||
1339 | kexp2tv(&k, &key); | ||
1340 | kexp2tv(lj_tab_set(fs->L, t, &k), &val); | ||
1341 | if (val.k == VKSTR) | ||
1342 | lj_gc_objbarriert(fs->L, t, val.u.sval); | ||
1343 | } else { | ||
1344 | if (isK(&key)) indexexp(fs, e, &key); | ||
1345 | if (val.k != VCALL) vcall = 0; | ||
1346 | storevar(fs, e, &val); | ||
1347 | } | ||
1348 | fs->freereg = freg; | ||
1349 | if (!testnext(ls, ',') && !testnext(ls, ';')) break; | ||
1350 | } | ||
1351 | checkmatch(ls, '}', '{', line); | ||
1352 | if (vcall) { | ||
1353 | BCIns *i = &fs->pt->bc[fs->pc-1]; | ||
1354 | ExpDesc en; | ||
1355 | lua_assert(bc_a(*i)==freg && bc_op(*i) == (narr>256?BC_TSETV:BC_TSETB)); | ||
1356 | init_exp(&en, VKNUM, 0); | ||
1357 | setintV(&en.u.nval, narr-1); | ||
1358 | if (narr > 256) { fs->pc--; i--; } | ||
1359 | *i = BCINS_AD(BC_TSETM, freg, numK(fs, &en)); | ||
1360 | setbc_b(i-1, 0); | ||
1361 | } | ||
1362 | if (pc == fs->pc-1) { /* make expr relocable if possible */ | ||
1363 | e->u.s.info = pc; | ||
1364 | fs->freereg--; | ||
1365 | e->k = VRELOCABLE; | ||
1366 | } else { | ||
1367 | e->k = VNONRELOC; /* indexexp may have changed it */ | ||
1368 | } | ||
1369 | if (!t) { /* Construct TNEW RD: hhhhhaaaaaaaaaaa. */ | ||
1370 | if (!needarr) narr = 0; | ||
1371 | else if (narr < 3) narr = 3; | ||
1372 | else if (narr > 0x7ff) narr = 0x7ff; | ||
1373 | setbc_d(&fs->pt->bc[pc], (uint32_t)narr | (hsize2hbits(nhash) << 11)); | ||
1374 | } | ||
1375 | } | ||
1376 | |||
1377 | static void parlist(LexState *ls) | ||
1378 | { | ||
1379 | /* parlist -> [ param { `,' param } ] */ | ||
1380 | FuncState *fs = ls->fs; | ||
1381 | GCproto *pt = fs->pt; | ||
1382 | BCReg nparams = 0; | ||
1383 | if (ls->token != ')') { /* is `parlist' not empty? */ | ||
1384 | do { | ||
1385 | switch (ls->token) { | ||
1386 | case TK_name: /* param -> NAME */ | ||
1387 | new_localvar(ls, str_checkname(ls), nparams++); | ||
1388 | break; | ||
1389 | case TK_dots: /* param -> `...' */ | ||
1390 | lj_lex_next(ls); | ||
1391 | pt->flags |= PROTO_IS_VARARG; | ||
1392 | break; | ||
1393 | default: | ||
1394 | err_syntax(ls, LJ_ERR_XPARAM); | ||
1395 | break; | ||
1396 | } | ||
1397 | } while (!(pt->flags & PROTO_IS_VARARG) && testnext(ls, ',')); | ||
1398 | } | ||
1399 | adjustlocalvars(ls, nparams); | ||
1400 | pt->numparams = cast_byte(fs->nactvar); | ||
1401 | reserveregs(fs, fs->nactvar); /* reserve register for parameters */ | ||
1402 | } | ||
1403 | |||
1404 | static void body(LexState *ls, ExpDesc *e, int needself, BCLine line) | ||
1405 | { | ||
1406 | /* body -> `(' parlist `)' chunk END */ | ||
1407 | FuncState *fs, new_fs; | ||
1408 | BCReg kidx; | ||
1409 | open_func(ls, &new_fs); | ||
1410 | new_fs.pt->linedefined = line; | ||
1411 | checknext(ls, '('); | ||
1412 | if (needself) { | ||
1413 | new_localvarliteral(ls, "self", 0); | ||
1414 | adjustlocalvars(ls, 1); | ||
1415 | } | ||
1416 | parlist(ls); | ||
1417 | checknext(ls, ')'); | ||
1418 | chunk(ls); | ||
1419 | new_fs.pt->lastlinedefined = ls->linenumber; | ||
1420 | checkmatch(ls, TK_end, TK_function, line); | ||
1421 | close_func(ls); | ||
1422 | fs = ls->fs; | ||
1423 | kidx = gcK(fs, obj2gco(new_fs.pt), LJ_TPROTO); | ||
1424 | init_exp(e, VRELOCABLE, emitAD(fs, BC_FNEW, 0, kidx)); | ||
1425 | if (!(fs->pt->flags & PROTO_HAS_FNEW)) { | ||
1426 | if (fs->pt->flags & PROTO_HAS_RETURN) | ||
1427 | fs->pt->flags |= PROTO_FIXUP_RETURN; | ||
1428 | fs->pt->flags |= PROTO_HAS_FNEW; | ||
1429 | } | ||
1430 | } | ||
1431 | |||
1432 | static BCReg explist1(LexState *ls, ExpDesc *v) | ||
1433 | { | ||
1434 | /* explist1 -> expr { `,' expr } */ | ||
1435 | BCReg n = 1; /* at least one expression */ | ||
1436 | expr(ls, v); | ||
1437 | while (testnext(ls, ',')) { | ||
1438 | exp2nextreg(ls->fs, v); | ||
1439 | expr(ls, v); | ||
1440 | n++; | ||
1441 | } | ||
1442 | return n; | ||
1443 | } | ||
1444 | |||
1445 | static void funcargs(LexState *ls, ExpDesc *e) | ||
1446 | { | ||
1447 | FuncState *fs = ls->fs; | ||
1448 | ExpDesc args; | ||
1449 | BCIns ins; | ||
1450 | BCReg base; | ||
1451 | BCLine line = ls->linenumber; | ||
1452 | switch (ls->token) { | ||
1453 | case '(': { /* funcargs -> `(' [ explist1 ] `)' */ | ||
1454 | if (line != ls->lastline) | ||
1455 | err_syntax(ls, LJ_ERR_XAMBIG); | ||
1456 | lj_lex_next(ls); | ||
1457 | if (ls->token == ')') { /* arg list is empty? */ | ||
1458 | args.k = VVOID; | ||
1459 | } else { | ||
1460 | explist1(ls, &args); | ||
1461 | if (args.k == VCALL) | ||
1462 | setbc_b(bcptr(fs, &args), 0); | ||
1463 | } | ||
1464 | checkmatch(ls, ')', '(', line); | ||
1465 | break; | ||
1466 | } | ||
1467 | case '{': { /* funcargs -> constructor */ | ||
1468 | constructor(ls, &args); | ||
1469 | break; | ||
1470 | } | ||
1471 | case TK_string: { /* funcargs -> STRING */ | ||
1472 | init_exp(&args, VKSTR, 0); | ||
1473 | args.u.sval = strV(&ls->tokenval); | ||
1474 | lj_lex_next(ls); /* must use `seminfo' before `next' */ | ||
1475 | break; | ||
1476 | } | ||
1477 | default: { | ||
1478 | err_syntax(ls, LJ_ERR_XFUNARG); | ||
1479 | return; | ||
1480 | } | ||
1481 | } | ||
1482 | lua_assert(e->k == VNONRELOC); | ||
1483 | base = e->u.s.info; /* base register for call */ | ||
1484 | if (args.k == VCALL) { | ||
1485 | ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1); | ||
1486 | } else { | ||
1487 | if (args.k != VVOID) | ||
1488 | exp2nextreg(fs, &args); /* close last argument */ | ||
1489 | ins = BCINS_ABC(BC_CALL, base, 2, fs->freereg - base); | ||
1490 | } | ||
1491 | init_exp(e, VCALL, emitINS(fs, ins)); | ||
1492 | e->u.s.aux = base; | ||
1493 | fs->pt->lineinfo[fs->pc - 1] = line; | ||
1494 | fs->freereg = base+1; /* call removes function and arguments and leaves | ||
1495 | (unless changed) one result */ | ||
1496 | } | ||
1497 | |||
1498 | static void prefixexp(LexState *ls, ExpDesc *v) | ||
1499 | { | ||
1500 | /* prefixexp -> NAME | '(' expr ')' */ | ||
1501 | switch (ls->token) { | ||
1502 | case '(': { | ||
1503 | BCLine line = ls->linenumber; | ||
1504 | lj_lex_next(ls); | ||
1505 | expr(ls, v); | ||
1506 | checkmatch(ls, ')', '(', line); | ||
1507 | dischargevars(ls->fs, v); | ||
1508 | return; | ||
1509 | } | ||
1510 | case TK_name: { | ||
1511 | singlevar(ls, v); | ||
1512 | return; | ||
1513 | } | ||
1514 | default: { | ||
1515 | err_syntax(ls, LJ_ERR_XSYMBOL); | ||
1516 | return; | ||
1517 | } | ||
1518 | } | ||
1519 | } | ||
1520 | |||
1521 | static void primaryexp(LexState *ls, ExpDesc *v) | ||
1522 | { | ||
1523 | /* primaryexp -> | ||
1524 | prefixexp { `.' NAME | `[' exp `]' | `:' NAME funcargs | funcargs } */ | ||
1525 | FuncState *fs = ls->fs; | ||
1526 | prefixexp(ls, v); | ||
1527 | for (;;) { | ||
1528 | switch (ls->token) { | ||
1529 | case '.': /* field */ | ||
1530 | field(ls, v); | ||
1531 | break; | ||
1532 | case '[': { /* `[' exp1 `]' */ | ||
1533 | ExpDesc key; | ||
1534 | exp2anyreg(fs, v); | ||
1535 | yindex(ls, &key); | ||
1536 | indexexp(fs, v, &key); | ||
1537 | break; | ||
1538 | } | ||
1539 | case ':': { /* `:' NAME funcargs */ | ||
1540 | ExpDesc key; | ||
1541 | lj_lex_next(ls); | ||
1542 | checkname(ls, &key); | ||
1543 | methodexp(fs, v, &key); | ||
1544 | funcargs(ls, v); | ||
1545 | break; | ||
1546 | } | ||
1547 | case '(': case TK_string: case '{': /* funcargs */ | ||
1548 | exp2nextreg(fs, v); | ||
1549 | funcargs(ls, v); | ||
1550 | break; | ||
1551 | default: return; | ||
1552 | } | ||
1553 | } | ||
1554 | } | ||
1555 | |||
1556 | static void simpleexp(LexState *ls, ExpDesc *v) | ||
1557 | { | ||
1558 | /* simpleexp -> NUMBER | STRING | NIL | true | false | ... | | ||
1559 | constructor | FUNCTION body | primaryexp */ | ||
1560 | switch (ls->token) { | ||
1561 | case TK_number: | ||
1562 | init_exp(v, VKNUM, 0); | ||
1563 | setnumV(&v->u.nval, numV(&ls->tokenval)); | ||
1564 | break; | ||
1565 | case TK_string: | ||
1566 | init_exp(v, VKSTR, 0); | ||
1567 | v->u.sval = strV(&ls->tokenval); | ||
1568 | break; | ||
1569 | case TK_nil: | ||
1570 | init_exp(v, VKNIL, 0); | ||
1571 | break; | ||
1572 | case TK_true: | ||
1573 | init_exp(v, VKTRUE, 0); | ||
1574 | break; | ||
1575 | case TK_false: | ||
1576 | init_exp(v, VKFALSE, 0); | ||
1577 | break; | ||
1578 | case TK_dots: { /* vararg */ | ||
1579 | FuncState *fs = ls->fs; | ||
1580 | BCReg base; | ||
1581 | checkcond(ls, fs->pt->flags & PROTO_IS_VARARG, LJ_ERR_XDOTS); | ||
1582 | reserveregs(fs, 1); | ||
1583 | base = fs->freereg-1; | ||
1584 | init_exp(v, VCALL, emitABC(fs, BC_VARG, base, 2, 1)); | ||
1585 | v->u.s.aux = base; | ||
1586 | break; | ||
1587 | } | ||
1588 | case '{': /* constructor */ | ||
1589 | constructor(ls, v); | ||
1590 | return; | ||
1591 | case TK_function: | ||
1592 | lj_lex_next(ls); | ||
1593 | body(ls, v, 0, ls->linenumber); | ||
1594 | return; | ||
1595 | default: | ||
1596 | primaryexp(ls, v); | ||
1597 | return; | ||
1598 | } | ||
1599 | lj_lex_next(ls); | ||
1600 | } | ||
1601 | |||
1602 | static void enterlevel(LexState *ls) | ||
1603 | { | ||
1604 | if (++ls->level >= LJ_MAX_XLEVEL) | ||
1605 | lj_lex_error(ls, 0, LJ_ERR_XLEVELS); | ||
1606 | } | ||
1607 | |||
1608 | #define leavelevel(ls) ((ls)->level--) | ||
1609 | |||
1610 | static UnOpr getunopr(LexToken tok) | ||
1611 | { | ||
1612 | switch (tok) { | ||
1613 | case TK_not: return OPR_NOT; | ||
1614 | case '-': return OPR_MINUS; | ||
1615 | case '#': return OPR_LEN; | ||
1616 | default: return OPR_NOUNOPR; | ||
1617 | } | ||
1618 | } | ||
1619 | |||
1620 | static BinOpr getbinopr(LexToken tok) | ||
1621 | { | ||
1622 | switch (tok) { | ||
1623 | case '+': return OPR_ADD; | ||
1624 | case '-': return OPR_SUB; | ||
1625 | case '*': return OPR_MUL; | ||
1626 | case '/': return OPR_DIV; | ||
1627 | case '%': return OPR_MOD; | ||
1628 | case '^': return OPR_POW; | ||
1629 | case TK_concat: return OPR_CONCAT; | ||
1630 | case TK_ne: return OPR_NE; | ||
1631 | case TK_eq: return OPR_EQ; | ||
1632 | case '<': return OPR_LT; | ||
1633 | case TK_le: return OPR_LE; | ||
1634 | case '>': return OPR_GT; | ||
1635 | case TK_ge: return OPR_GE; | ||
1636 | case TK_and: return OPR_AND; | ||
1637 | case TK_or: return OPR_OR; | ||
1638 | default: return OPR_NOBINOPR; | ||
1639 | } | ||
1640 | } | ||
1641 | |||
1642 | static const struct { | ||
1643 | uint8_t left; /* left priority for each binary operator */ | ||
1644 | uint8_t right; /* right priority */ | ||
1645 | } priority[] = { /* ORDER OPR */ | ||
1646 | {6,6}, {6,6}, {7,7}, {7,7}, {7,7}, /* ADD SUB MUL DIV MOD */ | ||
1647 | {10,9}, {5,4}, /* POW CONCAT (right associative) */ | ||
1648 | {3,3}, {3,3}, /* EQ NE */ | ||
1649 | {3,3}, {3,3}, {3,3}, {3,3}, /* LT GE GT LE */ | ||
1650 | {2,2}, {1,1} /* AND OR */ | ||
1651 | }; | ||
1652 | |||
1653 | #define UNARY_PRIORITY 8 /* priority for unary operators */ | ||
1654 | |||
1655 | /* | ||
1656 | ** subexpr -> (simpleexp | unop subexpr) { binop subexpr } | ||
1657 | ** where `binop' is any binary operator with a priority higher than `limit' | ||
1658 | */ | ||
1659 | static BinOpr subexpr(LexState *ls, ExpDesc *v, uint32_t limit) | ||
1660 | { | ||
1661 | BinOpr op; | ||
1662 | UnOpr uop; | ||
1663 | enterlevel(ls); | ||
1664 | uop = getunopr(ls->token); | ||
1665 | if (uop != OPR_NOUNOPR) { | ||
1666 | lj_lex_next(ls); | ||
1667 | subexpr(ls, v, UNARY_PRIORITY); | ||
1668 | emit_unop(ls->fs, uop, v); | ||
1669 | } else { | ||
1670 | simpleexp(ls, v); | ||
1671 | } | ||
1672 | /* expand while operators have priorities higher than `limit' */ | ||
1673 | op = getbinopr(ls->token); | ||
1674 | while (op != OPR_NOBINOPR && priority[op].left > limit) { | ||
1675 | ExpDesc v2; | ||
1676 | BinOpr nextop; | ||
1677 | lj_lex_next(ls); | ||
1678 | prepare_binop(ls->fs, op, v); | ||
1679 | /* read sub-expression with higher priority */ | ||
1680 | nextop = subexpr(ls, &v2, priority[op].right); | ||
1681 | emit_binop(ls->fs, op, v, &v2); | ||
1682 | op = nextop; | ||
1683 | } | ||
1684 | leavelevel(ls); | ||
1685 | return op; /* return first untreated operator */ | ||
1686 | } | ||
1687 | |||
1688 | static void expr(LexState *ls, ExpDesc *v) | ||
1689 | { | ||
1690 | subexpr(ls, v, 0); | ||
1691 | } | ||
1692 | |||
1693 | static BCPos condexpr(LexState *ls) | ||
1694 | { | ||
1695 | /* cond -> exp */ | ||
1696 | ExpDesc v; | ||
1697 | expr(ls, &v); /* read condition */ | ||
1698 | if (v.k == VKNIL) v.k = VKFALSE; /* `falses' are all equal here */ | ||
1699 | goiftrue(ls->fs, &v); | ||
1700 | return v.f; | ||
1701 | } | ||
1702 | |||
1703 | /* -- Scope handling ------------------------------------------------------ */ | ||
1704 | |||
1705 | static void enterblock(FuncState *fs, FuncBlock *bl, int isbreakable) | ||
1706 | { | ||
1707 | bl->breaklist = NO_JMP; | ||
1708 | bl->isbreakable = (uint8_t)isbreakable; | ||
1709 | bl->nactvar = fs->nactvar; | ||
1710 | bl->upval = 0; | ||
1711 | bl->previous = fs->bl; | ||
1712 | fs->bl = bl; | ||
1713 | lua_assert(fs->freereg == fs->nactvar); | ||
1714 | } | ||
1715 | |||
1716 | static void leaveblock(FuncState *fs) | ||
1717 | { | ||
1718 | FuncBlock *bl = fs->bl; | ||
1719 | fs->bl = bl->previous; | ||
1720 | removevars(fs->ls, bl->nactvar); | ||
1721 | fs->freereg = fs->nactvar; /* free registers */ | ||
1722 | lua_assert(bl->nactvar == fs->nactvar); | ||
1723 | /* a block either controls scope or breaks (never both) */ | ||
1724 | lua_assert(!bl->isbreakable || !bl->upval); | ||
1725 | if (bl->upval) | ||
1726 | emitAJ(fs, BC_UCLO, bl->nactvar, 0); | ||
1727 | else /* avoid in upval case, it clears lasttarget and kills UCLO+JMP join */ | ||
1728 | patchtohere(fs, bl->breaklist); | ||
1729 | } | ||
1730 | |||
1731 | static void block(LexState *ls) | ||
1732 | { | ||
1733 | /* block -> chunk */ | ||
1734 | FuncState *fs = ls->fs; | ||
1735 | FuncBlock bl; | ||
1736 | enterblock(fs, &bl, 0); | ||
1737 | chunk(ls); | ||
1738 | lua_assert(bl.breaklist == NO_JMP); | ||
1739 | leaveblock(fs); | ||
1740 | } | ||
1741 | |||
1742 | /* -- Statements ---------------------------------------------------------- */ | ||
1743 | |||
1744 | /* | ||
1745 | ** structure to chain all variables in the left-hand side of an | ||
1746 | ** assignment | ||
1747 | */ | ||
1748 | struct LHS_assign { | ||
1749 | ExpDesc v; /* variable (global, local, upvalue, or indexed) */ | ||
1750 | struct LHS_assign *prev; | ||
1751 | }; | ||
1752 | |||
1753 | /* | ||
1754 | ** check whether, in an assignment to a local variable, the local variable | ||
1755 | ** is needed in a previous assignment (to a table). If so, save original | ||
1756 | ** local value in a safe place and use this safe copy in the previous | ||
1757 | ** assignment. | ||
1758 | */ | ||
1759 | static void check_conflict(LexState *ls, struct LHS_assign *lh, | ||
1760 | const ExpDesc *v) | ||
1761 | { | ||
1762 | FuncState *fs = ls->fs; | ||
1763 | BCReg reg = fs->freereg; /* eventual position to save local variable */ | ||
1764 | int conflict = 0; | ||
1765 | for (; lh; lh = lh->prev) { | ||
1766 | if (lh->v.k == VINDEXED) { | ||
1767 | if (lh->v.u.s.info == v->u.s.info) { /* conflict? */ | ||
1768 | conflict = 1; | ||
1769 | lh->v.u.s.info = reg; /* previous assignment will use safe copy */ | ||
1770 | } | ||
1771 | if (lh->v.u.s.aux == v->u.s.info) { /* conflict? */ | ||
1772 | conflict = 1; | ||
1773 | lh->v.u.s.aux = reg; /* previous assignment will use safe copy */ | ||
1774 | } | ||
1775 | } | ||
1776 | } | ||
1777 | if (conflict) { | ||
1778 | emitAD(fs, BC_MOV, reg, v->u.s.info); /* make copy */ | ||
1779 | reserveregs(fs, 1); | ||
1780 | } | ||
1781 | } | ||
1782 | |||
1783 | static void assignment(LexState *ls, struct LHS_assign *lh, BCReg nvars) | ||
1784 | { | ||
1785 | ExpDesc e; | ||
1786 | checkcond(ls, VLOCAL <= lh->v.k && lh->v.k <= VINDEXED, LJ_ERR_XSYNTAX); | ||
1787 | if (testnext(ls, ',')) { /* assignment -> `,' primaryexp assignment */ | ||
1788 | struct LHS_assign nv; | ||
1789 | nv.prev = lh; | ||
1790 | primaryexp(ls, &nv.v); | ||
1791 | if (nv.v.k == VLOCAL) | ||
1792 | check_conflict(ls, lh, &nv.v); | ||
1793 | checklimit(ls->fs, ls->level + nvars, LJ_MAX_XLEVEL, "variable names"); | ||
1794 | assignment(ls, &nv, nvars+1); | ||
1795 | } else { /* assignment -> `=' explist1 */ | ||
1796 | BCReg nexps; | ||
1797 | checknext(ls, '='); | ||
1798 | nexps = explist1(ls, &e); | ||
1799 | if (nexps == nvars) { | ||
1800 | if (e.k == VCALL) { | ||
1801 | if (bc_op(*bcptr(ls->fs, &e)) == BC_VARG) { | ||
1802 | ls->fs->freereg--; | ||
1803 | e.k = VRELOCABLE; | ||
1804 | } else { | ||
1805 | e.u.s.info = e.u.s.aux; | ||
1806 | e.k = VNONRELOC; | ||
1807 | } | ||
1808 | } | ||
1809 | storevar(ls->fs, &lh->v, &e); | ||
1810 | return; | ||
1811 | } | ||
1812 | adjust_assign(ls, nvars, nexps, &e); | ||
1813 | if (nexps > nvars) | ||
1814 | ls->fs->freereg -= nexps - nvars; /* remove extra values */ | ||
1815 | } | ||
1816 | init_exp(&e, VNONRELOC, ls->fs->freereg-1); /* default assignment */ | ||
1817 | storevar(ls->fs, &lh->v, &e); | ||
1818 | } | ||
1819 | |||
1820 | static void breakstat(LexState *ls) | ||
1821 | { | ||
1822 | FuncState *fs = ls->fs; | ||
1823 | FuncBlock *bl = fs->bl; | ||
1824 | int upval = 0; | ||
1825 | while (bl && !bl->isbreakable) { | ||
1826 | upval |= bl->upval; | ||
1827 | bl = bl->previous; | ||
1828 | } | ||
1829 | if (!bl) | ||
1830 | err_syntax(ls, LJ_ERR_XBREAK); | ||
1831 | if (upval) | ||
1832 | emitAJ(fs, BC_UCLO, bl->nactvar, 0); | ||
1833 | concatjumps(fs, &bl->breaklist, emit_jump(fs)); | ||
1834 | } | ||
1835 | |||
1836 | static void whilestat(LexState *ls, BCLine line) | ||
1837 | { | ||
1838 | /* whilestat -> WHILE cond DO block END */ | ||
1839 | FuncState *fs = ls->fs; | ||
1840 | BCPos start, loop, condexit; | ||
1841 | FuncBlock bl; | ||
1842 | lj_lex_next(ls); /* skip WHILE */ | ||
1843 | start = fs->lasttarget = fs->pc; | ||
1844 | condexit = condexpr(ls); | ||
1845 | enterblock(fs, &bl, 1); | ||
1846 | checknext(ls, TK_do); | ||
1847 | loop = emitAD(fs, BC_LOOP, fs->nactvar, 0); | ||
1848 | block(ls); | ||
1849 | patchlist(fs, emit_jump(fs), start); | ||
1850 | checkmatch(ls, TK_end, TK_while, line); | ||
1851 | leaveblock(fs); | ||
1852 | patchtohere(fs, condexit); /* false conditions finish the loop */ | ||
1853 | fixjump(fs, loop, fs->pc); | ||
1854 | } | ||
1855 | |||
1856 | static void repeatstat(LexState *ls, BCLine line) | ||
1857 | { | ||
1858 | /* repeatstat -> REPEAT block UNTIL cond */ | ||
1859 | FuncState *fs = ls->fs; | ||
1860 | BCPos loop = fs->lasttarget = fs->pc; | ||
1861 | BCPos condexit; | ||
1862 | FuncBlock bl1, bl2; | ||
1863 | enterblock(fs, &bl1, 1); /* loop block */ | ||
1864 | enterblock(fs, &bl2, 0); /* scope block */ | ||
1865 | lj_lex_next(ls); /* skip REPEAT */ | ||
1866 | emitAD(fs, BC_LOOP, fs->nactvar, 0); | ||
1867 | chunk(ls); | ||
1868 | checkmatch(ls, TK_until, TK_repeat, line); | ||
1869 | condexit = condexpr(ls); /* read condition (inside scope block) */ | ||
1870 | if (!bl2.upval) { /* no upvalues? */ | ||
1871 | leaveblock(fs); /* finish scope */ | ||
1872 | } else { /* complete semantics when there are upvalues */ | ||
1873 | breakstat(ls); /* if condition then break */ | ||
1874 | patchtohere(fs, condexit); /* else... */ | ||
1875 | leaveblock(fs); /* finish scope... */ | ||
1876 | condexit = emit_jump(fs); /* and repeat */ | ||
1877 | } | ||
1878 | patchlist(fs, condexit, loop); /* close the loop */ | ||
1879 | fixjump(fs, loop, fs->pc); | ||
1880 | leaveblock(fs); /* finish loop */ | ||
1881 | } | ||
1882 | |||
1883 | static void exp1(LexState *ls) | ||
1884 | { | ||
1885 | ExpDesc e; | ||
1886 | expr(ls, &e); | ||
1887 | exp2nextreg(ls->fs, &e); | ||
1888 | } | ||
1889 | |||
1890 | static void forbody(LexState *ls, BCReg base, BCLine line, BCReg nvars, | ||
1891 | int isnum) | ||
1892 | { | ||
1893 | /* forbody -> DO block */ | ||
1894 | FuncBlock bl; | ||
1895 | FuncState *fs = ls->fs; | ||
1896 | BCPos loop, loopend; | ||
1897 | adjustlocalvars(ls, 3); /* control variables */ | ||
1898 | checknext(ls, TK_do); | ||
1899 | loop = isnum ? emitAJ(fs, BC_FORI, base, NO_JMP) : | ||
1900 | emitAJ(fs, BC_JMP, fs->freereg, NO_JMP); | ||
1901 | enterblock(fs, &bl, 0); /* scope for declared variables */ | ||
1902 | adjustlocalvars(ls, nvars); | ||
1903 | reserveregs(fs, nvars); | ||
1904 | block(ls); | ||
1905 | leaveblock(fs); /* end of scope for declared variables */ | ||
1906 | if (isnum) { | ||
1907 | loopend = emitAJ(fs, BC_FORL, base, NO_JMP); | ||
1908 | fixjump(fs, loop, fs->pc); | ||
1909 | } else { | ||
1910 | fixjump(fs, loop, fs->pc); | ||
1911 | emitABC(fs, BC_ITERC, base+3, nvars+1, 2+1); | ||
1912 | loopend = emitAJ(fs, BC_ITERL, base+3, NO_JMP); | ||
1913 | fs->pt->lineinfo[loopend-1] = line; | ||
1914 | } | ||
1915 | fs->pt->lineinfo[loopend] = line; /* pretend last op starts the loop */ | ||
1916 | fixjump(fs, loopend, loop+1); | ||
1917 | } | ||
1918 | |||
1919 | static void fornum(LexState *ls, GCstr *varname, BCLine line) | ||
1920 | { | ||
1921 | /* fornum -> NAME = exp1,exp1[,exp1] forbody */ | ||
1922 | FuncState *fs = ls->fs; | ||
1923 | BCReg base = fs->freereg; | ||
1924 | new_localvarliteral(ls, "(for index)", FORL_IDX); | ||
1925 | new_localvarliteral(ls, "(for limit)", FORL_STOP); | ||
1926 | new_localvarliteral(ls, "(for step)", FORL_STEP); | ||
1927 | new_localvar(ls, varname, FORL_EXT); | ||
1928 | checknext(ls, '='); | ||
1929 | exp1(ls); /* initial value */ | ||
1930 | checknext(ls, ','); | ||
1931 | exp1(ls); /* limit */ | ||
1932 | if (testnext(ls, ',')) { | ||
1933 | exp1(ls); /* optional step */ | ||
1934 | } else { /* default step = 1 */ | ||
1935 | emitAD(fs, BC_KSHORT, fs->freereg, 1); | ||
1936 | reserveregs(fs, 1); | ||
1937 | } | ||
1938 | forbody(ls, base, line, 1, 1); | ||
1939 | } | ||
1940 | |||
1941 | static void forlist(LexState *ls, GCstr *indexname) | ||
1942 | { | ||
1943 | /* forlist -> NAME {,NAME} IN explist1 forbody */ | ||
1944 | FuncState *fs = ls->fs; | ||
1945 | ExpDesc e; | ||
1946 | BCReg nvars = 0; | ||
1947 | BCLine line; | ||
1948 | BCReg base = fs->freereg; | ||
1949 | /* create control variables */ | ||
1950 | new_localvarliteral(ls, "(for generator)", nvars++); | ||
1951 | new_localvarliteral(ls, "(for state)", nvars++); | ||
1952 | new_localvarliteral(ls, "(for control)", nvars++); | ||
1953 | /* create declared variables */ | ||
1954 | new_localvar(ls, indexname, nvars++); | ||
1955 | while (testnext(ls, ',')) | ||
1956 | new_localvar(ls, str_checkname(ls), nvars++); | ||
1957 | checknext(ls, TK_in); | ||
1958 | line = ls->linenumber; | ||
1959 | adjust_assign(ls, 3, explist1(ls, &e), &e); | ||
1960 | checkframe(fs, 3); /* extra space to call generator */ | ||
1961 | forbody(ls, base, line, nvars - 3, 0); | ||
1962 | } | ||
1963 | |||
1964 | static void forstat(LexState *ls, BCLine line) | ||
1965 | { | ||
1966 | /* forstat -> FOR (fornum | forlist) END */ | ||
1967 | FuncState *fs = ls->fs; | ||
1968 | GCstr *varname; | ||
1969 | FuncBlock bl; | ||
1970 | enterblock(fs, &bl, 1); /* scope for loop and control variables */ | ||
1971 | lj_lex_next(ls); /* skip `for' */ | ||
1972 | varname = str_checkname(ls); /* first variable name */ | ||
1973 | switch (ls->token) { | ||
1974 | case '=': fornum(ls, varname, line); break; | ||
1975 | case ',': case TK_in: forlist(ls, varname); break; | ||
1976 | default: err_syntax(ls, LJ_ERR_XFOR); | ||
1977 | } | ||
1978 | checkmatch(ls, TK_end, TK_for, line); | ||
1979 | leaveblock(fs); /* loop scope (`break' jumps to this point) */ | ||
1980 | } | ||
1981 | |||
1982 | static BCPos test_then_block(LexState *ls) | ||
1983 | { | ||
1984 | /* test_then_block -> [IF | ELSEIF] cond THEN block */ | ||
1985 | BCPos condexit; | ||
1986 | lj_lex_next(ls); /* skip IF or ELSEIF */ | ||
1987 | condexit = condexpr(ls); | ||
1988 | checknext(ls, TK_then); | ||
1989 | block(ls); /* `then' part */ | ||
1990 | return condexit; | ||
1991 | } | ||
1992 | |||
1993 | static void ifstat(LexState *ls, BCLine line) | ||
1994 | { | ||
1995 | /* ifstat -> IF cond THEN block {ELSEIF cond THEN block} [ELSE block] END */ | ||
1996 | FuncState *fs = ls->fs; | ||
1997 | BCPos flist; | ||
1998 | BCPos escapelist = NO_JMP; | ||
1999 | flist = test_then_block(ls); /* IF cond THEN block */ | ||
2000 | while (ls->token == TK_elseif) { | ||
2001 | concatjumps(fs, &escapelist, emit_jump(fs)); | ||
2002 | patchtohere(fs, flist); | ||
2003 | flist = test_then_block(ls); /* ELSEIF cond THEN block */ | ||
2004 | } | ||
2005 | if (ls->token == TK_else) { | ||
2006 | concatjumps(fs, &escapelist, emit_jump(fs)); | ||
2007 | patchtohere(fs, flist); | ||
2008 | lj_lex_next(ls); /* skip ELSE (after patch, for correct line info) */ | ||
2009 | block(ls); /* `else' part */ | ||
2010 | } else { | ||
2011 | concatjumps(fs, &escapelist, flist); | ||
2012 | } | ||
2013 | patchtohere(fs, escapelist); | ||
2014 | checkmatch(ls, TK_end, TK_if, line); | ||
2015 | } | ||
2016 | |||
2017 | static void localfunc(LexState *ls) | ||
2018 | { | ||
2019 | ExpDesc v, b; | ||
2020 | FuncState *fs = ls->fs; | ||
2021 | new_localvar(ls, str_checkname(ls), 0); | ||
2022 | init_exp(&v, VLOCAL, fs->freereg); | ||
2023 | reserveregs(fs, 1); | ||
2024 | adjustlocalvars(ls, 1); | ||
2025 | body(ls, &b, 0, ls->linenumber); | ||
2026 | storevar(fs, &v, &b); | ||
2027 | /* debug information will only see the variable after this point! */ | ||
2028 | getlocvar(fs, fs->nactvar - 1).startpc = fs->pc; | ||
2029 | } | ||
2030 | |||
2031 | static void localstat(LexState *ls) | ||
2032 | { | ||
2033 | /* stat -> LOCAL NAME {`,' NAME} [`=' explist1] */ | ||
2034 | BCReg nvars = 0; | ||
2035 | BCReg nexps; | ||
2036 | ExpDesc e; | ||
2037 | do { | ||
2038 | new_localvar(ls, str_checkname(ls), nvars++); | ||
2039 | } while (testnext(ls, ',')); | ||
2040 | if (testnext(ls, '=')) { | ||
2041 | nexps = explist1(ls, &e); | ||
2042 | } else { | ||
2043 | e.k = VVOID; | ||
2044 | nexps = 0; | ||
2045 | } | ||
2046 | adjust_assign(ls, nvars, nexps, &e); | ||
2047 | adjustlocalvars(ls, nvars); | ||
2048 | } | ||
2049 | |||
2050 | static int func_name(LexState *ls, ExpDesc *v) | ||
2051 | { | ||
2052 | /* func_name -> NAME {field} [`:' NAME] */ | ||
2053 | int needself = 0; | ||
2054 | singlevar(ls, v); | ||
2055 | while (ls->token == '.') | ||
2056 | field(ls, v); | ||
2057 | if (ls->token == ':') { | ||
2058 | needself = 1; | ||
2059 | field(ls, v); | ||
2060 | } | ||
2061 | return needself; | ||
2062 | } | ||
2063 | |||
2064 | static void funcstat(LexState *ls, BCLine line) | ||
2065 | { | ||
2066 | /* funcstat -> FUNCTION func_name body */ | ||
2067 | FuncState *fs; | ||
2068 | int needself; | ||
2069 | ExpDesc v, b; | ||
2070 | lj_lex_next(ls); /* skip FUNCTION */ | ||
2071 | needself = func_name(ls, &v); | ||
2072 | body(ls, &b, needself, line); | ||
2073 | fs = ls->fs; | ||
2074 | storevar(fs, &v, &b); | ||
2075 | fs->pt->lineinfo[fs->pc - 1] = line; | ||
2076 | } | ||
2077 | |||
2078 | static void exprstat(LexState *ls) | ||
2079 | { | ||
2080 | /* stat -> func | assignment */ | ||
2081 | FuncState *fs = ls->fs; | ||
2082 | struct LHS_assign v; | ||
2083 | primaryexp(ls, &v.v); | ||
2084 | if (v.v.k == VCALL) { /* stat -> func */ | ||
2085 | setbc_b(bcptr(fs, &v.v), 1); /* call statement uses no results */ | ||
2086 | } else { /* stat -> assignment */ | ||
2087 | v.prev = NULL; | ||
2088 | assignment(ls, &v, 1); | ||
2089 | } | ||
2090 | } | ||
2091 | |||
2092 | static int block_follow(LexToken token) | ||
2093 | { | ||
2094 | switch (token) { | ||
2095 | case TK_else: case TK_elseif: case TK_end: case TK_until: case TK_eof: | ||
2096 | return 1; | ||
2097 | default: | ||
2098 | return 0; | ||
2099 | } | ||
2100 | } | ||
2101 | |||
2102 | static void retstat(LexState *ls) | ||
2103 | { | ||
2104 | /* stat -> RETURN explist */ | ||
2105 | BCIns ins; | ||
2106 | FuncState *fs = ls->fs; | ||
2107 | lj_lex_next(ls); /* skip RETURN */ | ||
2108 | fs->pt->flags |= PROTO_HAS_RETURN; | ||
2109 | if (block_follow(ls->token) || ls->token == ';') { | ||
2110 | ins = BCINS_AD(BC_RET0, 0, 1); /* return no values */ | ||
2111 | } else { | ||
2112 | ExpDesc e; | ||
2113 | BCReg nret = explist1(ls, &e); /* optional return values */ | ||
2114 | if (nret == 1) { | ||
2115 | if (e.k == VCALL) { | ||
2116 | BCIns *i = bcptr(fs, &e); | ||
2117 | /* It doesn't pay off to add BC_VARGT just for 'return ...'. */ | ||
2118 | if (bc_op(*i) == BC_VARG) goto notailcall; | ||
2119 | fs->pc--; | ||
2120 | ins = BCINS_AD(bc_op(*i)-BC_CALL+BC_CALLT, bc_a(*i), bc_c(*i)); | ||
2121 | } else { | ||
2122 | ins = BCINS_AD(BC_RET1, exp2anyreg(fs, &e), 2); | ||
2123 | } | ||
2124 | } else { | ||
2125 | if (e.k == VCALL) { | ||
2126 | notailcall: | ||
2127 | setbc_b(bcptr(fs, &e), 0); | ||
2128 | ins = BCINS_AD(BC_RETM, fs->nactvar, e.u.s.aux - fs->nactvar); | ||
2129 | } else { | ||
2130 | exp2nextreg(fs, &e); /* values must go to the `stack' */ | ||
2131 | ins = BCINS_AD(BC_RET, fs->nactvar, nret+1); | ||
2132 | } | ||
2133 | } | ||
2134 | } | ||
2135 | if (fs->pt->flags & PROTO_HAS_FNEW) | ||
2136 | emitAJ(fs, BC_UCLO, 0, 0); | ||
2137 | emitINS(fs, ins); | ||
2138 | } | ||
2139 | |||
2140 | static int statement(LexState *ls) | ||
2141 | { | ||
2142 | BCLine line = ls->linenumber; /* may be needed for error messages */ | ||
2143 | switch (ls->token) { | ||
2144 | case TK_if: | ||
2145 | ifstat(ls, line); | ||
2146 | return 0; | ||
2147 | case TK_while: | ||
2148 | whilestat(ls, line); | ||
2149 | return 0; | ||
2150 | case TK_do: | ||
2151 | lj_lex_next(ls); /* skip DO */ | ||
2152 | block(ls); | ||
2153 | checkmatch(ls, TK_end, TK_do, line); | ||
2154 | return 0; | ||
2155 | case TK_for: | ||
2156 | forstat(ls, line); | ||
2157 | return 0; | ||
2158 | case TK_repeat: | ||
2159 | repeatstat(ls, line); | ||
2160 | return 0; | ||
2161 | case TK_function: | ||
2162 | funcstat(ls, line); | ||
2163 | return 0; | ||
2164 | case TK_local: | ||
2165 | lj_lex_next(ls); /* skip LOCAL */ | ||
2166 | if (testnext(ls, TK_function)) /* local function? */ | ||
2167 | localfunc(ls); | ||
2168 | else | ||
2169 | localstat(ls); | ||
2170 | return 0; | ||
2171 | case TK_return: | ||
2172 | retstat(ls); | ||
2173 | return 1; /* must be last statement */ | ||
2174 | case TK_break: | ||
2175 | lj_lex_next(ls); /* skip BREAK */ | ||
2176 | breakstat(ls); | ||
2177 | return 1; /* must be last statement */ | ||
2178 | default: | ||
2179 | exprstat(ls); | ||
2180 | return 0; | ||
2181 | } | ||
2182 | } | ||
2183 | |||
2184 | static void chunk(LexState *ls) | ||
2185 | { | ||
2186 | /* chunk -> { stat [`;'] } */ | ||
2187 | int islast = 0; | ||
2188 | enterlevel(ls); | ||
2189 | while (!islast && !block_follow(ls->token)) { | ||
2190 | islast = statement(ls); | ||
2191 | testnext(ls, ';'); | ||
2192 | lua_assert(ls->fs->pt->framesize >= ls->fs->freereg && | ||
2193 | ls->fs->freereg >= ls->fs->nactvar); | ||
2194 | ls->fs->freereg = ls->fs->nactvar; /* free registers */ | ||
2195 | } | ||
2196 | leavelevel(ls); | ||
2197 | } | ||
2198 | |||
diff --git a/src/lj_parse.h b/src/lj_parse.h new file mode 100644 index 00000000..72aac2c6 --- /dev/null +++ b/src/lj_parse.h | |||
@@ -0,0 +1,15 @@ | |||
1 | /* | ||
2 | ** Lua parser (source code -> bytecode). | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_PARSE_H | ||
7 | #define _LJ_PARSE_H | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | #include "lj_lex.h" | ||
11 | |||
12 | LJ_FUNC GCproto *lj_parse(LexState *ls); | ||
13 | LJ_FUNC GCstr *lj_parse_keepstr(LexState *ls, const char *str, size_t l); | ||
14 | |||
15 | #endif | ||
diff --git a/src/lj_record.c b/src/lj_record.c new file mode 100644 index 00000000..e101ba23 --- /dev/null +++ b/src/lj_record.c | |||
@@ -0,0 +1,2136 @@ | |||
1 | /* | ||
2 | ** Trace recorder (bytecode -> SSA IR). | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #define lj_record_c | ||
7 | #define LUA_CORE | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | |||
11 | #if LJ_HASJIT | ||
12 | |||
13 | #include "lj_gc.h" | ||
14 | #include "lj_err.h" | ||
15 | #include "lj_str.h" | ||
16 | #include "lj_tab.h" | ||
17 | #include "lj_state.h" | ||
18 | #include "lj_frame.h" | ||
19 | #include "lj_bc.h" | ||
20 | #include "lj_ff.h" | ||
21 | #include "lj_ir.h" | ||
22 | #include "lj_jit.h" | ||
23 | #include "lj_iropt.h" | ||
24 | #include "lj_trace.h" | ||
25 | #include "lj_record.h" | ||
26 | #include "lj_snap.h" | ||
27 | #include "lj_asm.h" | ||
28 | #include "lj_dispatch.h" | ||
29 | #include "lj_vm.h" | ||
30 | |||
31 | /* Some local macros to save typing. Undef'd at the end. */ | ||
32 | #define IR(ref) (&J->cur.ir[(ref)]) | ||
33 | |||
34 | /* Pass IR on to next optimization in chain (FOLD). */ | ||
35 | #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) | ||
36 | |||
37 | /* Emit raw IR without passing through optimizations. */ | ||
38 | #define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J)) | ||
39 | |||
40 | /* Context for recording an indexed load/store. */ | ||
41 | typedef struct RecordIndex { | ||
42 | TValue tabv; /* Runtime value of table (or indexed object). */ | ||
43 | TValue keyv; /* Runtime value of key. */ | ||
44 | TValue valv; /* Runtime value of stored value. */ | ||
45 | TValue mobjv; /* Runtime value of metamethod object. */ | ||
46 | GCtab *mtv; /* Runtime value of metatable object. */ | ||
47 | cTValue *oldv; /* Runtime value of previously stored value. */ | ||
48 | TRef tab; /* Table (or indexed object) reference. */ | ||
49 | TRef key; /* Key reference. */ | ||
50 | TRef val; /* Value reference for a store or 0 for a load. */ | ||
51 | TRef mt; /* Metatable reference. */ | ||
52 | TRef mobj; /* Metamethod object reference. */ | ||
53 | int idxchain; /* Index indirections left or 0 for raw lookup. */ | ||
54 | } RecordIndex; | ||
55 | |||
56 | /* Requested results from rec_call(). */ | ||
57 | enum { | ||
58 | /* Non-negative numbers are number of requested results. */ | ||
59 | CALLRES_MULTI = -1, /* Return multiple results. */ | ||
60 | CALLRES_TAILCALL = -2, /* Tail call. */ | ||
61 | CALLRES_PENDING = -3, /* Call is pending, no results yet. */ | ||
62 | CALLRES_CONT = -4 /* Continuation call. */ | ||
63 | }; | ||
64 | |||
65 | /* Forward declarations. */ | ||
66 | static TRef rec_idx(jit_State *J, RecordIndex *ix); | ||
67 | static int rec_call(jit_State *J, BCReg func, int cres, int nargs); | ||
68 | |||
69 | /* -- Sanity checks ------------------------------------------------------- */ | ||
70 | |||
71 | #ifdef LUA_USE_ASSERT | ||
72 | /* Sanity check the whole IR -- sloooow. */ | ||
73 | static void rec_check_ir(jit_State *J) | ||
74 | { | ||
75 | IRRef i, nins = J->cur.nins, nk = J->cur.nk; | ||
76 | lua_assert(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536); | ||
77 | for (i = nins-1; i >= nk; i--) { | ||
78 | IRIns *ir = IR(i); | ||
79 | uint32_t mode = lj_ir_mode[ir->o]; | ||
80 | IRRef op1 = ir->op1; | ||
81 | IRRef op2 = ir->op2; | ||
82 | switch (irm_op1(mode)) { | ||
83 | case IRMnone: lua_assert(op1 == 0); break; | ||
84 | case IRMref: lua_assert(op1 >= nk); | ||
85 | lua_assert(i >= REF_BIAS ? op1 < i : op1 > i); break; | ||
86 | case IRMlit: break; | ||
87 | case IRMcst: lua_assert(i < REF_BIAS); continue; | ||
88 | } | ||
89 | switch (irm_op2(mode)) { | ||
90 | case IRMnone: lua_assert(op2 == 0); break; | ||
91 | case IRMref: lua_assert(op2 >= nk); | ||
92 | lua_assert(i >= REF_BIAS ? op2 < i : op2 > i); break; | ||
93 | case IRMlit: break; | ||
94 | case IRMcst: lua_assert(0); break; | ||
95 | } | ||
96 | if (ir->prev) { | ||
97 | lua_assert(ir->prev >= nk); | ||
98 | lua_assert(i >= REF_BIAS ? ir->prev < i : ir->prev > i); | ||
99 | lua_assert(IR(ir->prev)->o == ir->o); | ||
100 | } | ||
101 | } | ||
102 | } | ||
103 | |||
104 | /* Sanity check the slots. */ | ||
105 | static void rec_check_slots(jit_State *J) | ||
106 | { | ||
107 | BCReg s, nslots = J->baseslot + J->maxslot; | ||
108 | lua_assert(J->baseslot >= 1 && J->baseslot < LJ_MAX_JSLOTS); | ||
109 | lua_assert(nslots < LJ_MAX_JSLOTS); | ||
110 | for (s = 0; s < nslots; s++) { | ||
111 | TRef tr = J->slot[s]; | ||
112 | if (tr) { | ||
113 | IRRef ref = tref_ref(tr); | ||
114 | lua_assert(ref >= J->cur.nk && ref < J->cur.nins); | ||
115 | lua_assert(irt_t(IR(ref)->t) == tref_t(tr)); | ||
116 | } | ||
117 | } | ||
118 | } | ||
119 | #endif | ||
120 | |||
121 | /* -- Type handling and specialization ------------------------------------ */ | ||
122 | |||
123 | /* Note: these functions return tagged references (TRef). */ | ||
124 | |||
125 | /* Specialize a slot to a specific type. Note: slot can be negative! */ | ||
126 | static TRef sloadt(jit_State *J, int32_t slot, IRType t, int mode) | ||
127 | { | ||
128 | /* No guard, since none of the callers need a type-checking SLOAD. */ | ||
129 | TRef ref = emitir_raw(IRT(IR_SLOAD, t), (int32_t)J->baseslot+slot, mode); | ||
130 | J->base[slot] = ref; | ||
131 | return ref; | ||
132 | } | ||
133 | |||
134 | /* Specialize a slot to the runtime type. Note: slot can be negative! */ | ||
135 | static TRef sload(jit_State *J, int32_t slot) | ||
136 | { | ||
137 | IRType t = itype2irt(&J->L->base[slot]); | ||
138 | TRef ref = emitir_raw(IRTG(IR_SLOAD, t), (int32_t)J->baseslot+slot, 0); | ||
139 | if (irtype_ispri(t)) ref = TREF_PRI(t); /* Canonicalize primitive refs. */ | ||
140 | J->base[slot] = ref; | ||
141 | return ref; | ||
142 | } | ||
143 | |||
144 | /* Get TRef from slot. Load slot and specialize if not done already. */ | ||
145 | #define getslot(J, s) (J->base[(s)] ? J->base[(s)] : sload(J, (int32_t)(s))) | ||
146 | |||
147 | /* Get TRef for current function. */ | ||
148 | static TRef getcurrf(jit_State *J) | ||
149 | { | ||
150 | if (J->base[-1]) { | ||
151 | IRIns *ir = IR(tref_ref(J->base[-1])); | ||
152 | if (ir->o == IR_FRAME) /* Shortcut if already specialized. */ | ||
153 | return TREF(ir->op2, IRT_FUNC); /* Return TRef of KFUNC. */ | ||
154 | return J->base[-1]; | ||
155 | } else { | ||
156 | lua_assert(J->baseslot == 1); | ||
157 | return sloadt(J, -1, IRT_FUNC, IRSLOAD_READONLY); | ||
158 | } | ||
159 | } | ||
160 | |||
161 | /* Compare for raw object equality. | ||
162 | ** Returns 0 if the objects are the same. | ||
163 | ** Returns 1 if they are different, but the same type. | ||
164 | ** Returns 2 for two different types. | ||
165 | ** Comparisons between primitives always return 1 -- no caller cares about it. | ||
166 | */ | ||
167 | static int rec_objcmp(jit_State *J, TRef a, TRef b, cTValue *av, cTValue *bv) | ||
168 | { | ||
169 | int diff = !lj_obj_equal(av, bv); | ||
170 | if (!tref_isk2(a, b)) { /* Shortcut, also handles primitives. */ | ||
171 | IRType ta = tref_type(a); | ||
172 | IRType tb = tref_type(b); | ||
173 | if (ta != tb) { | ||
174 | /* Widen mixed number/int comparisons to number/number comparison. */ | ||
175 | if (ta == IRT_INT && tb == IRT_NUM) { | ||
176 | a = emitir(IRTN(IR_TONUM), a, 0); | ||
177 | ta = IRT_NUM; | ||
178 | } else if (ta == IRT_NUM && tb == IRT_INT) { | ||
179 | b = emitir(IRTN(IR_TONUM), b, 0); | ||
180 | } else { | ||
181 | return 2; /* Two different types are never equal. */ | ||
182 | } | ||
183 | } | ||
184 | emitir(IRTG(diff ? IR_NE : IR_EQ, ta), a, b); | ||
185 | } | ||
186 | return diff; | ||
187 | } | ||
188 | |||
189 | /* -- Record loop ops ----------------------------------------------------- */ | ||
190 | |||
191 | /* Loop event. */ | ||
192 | typedef enum { | ||
193 | LOOPEV_LEAVE, /* Loop is left or not entered. */ | ||
194 | LOOPEV_ENTER /* Loop is entered. */ | ||
195 | } LoopEvent; | ||
196 | |||
197 | /* Canonicalize slots: convert integers to numbers. */ | ||
198 | static void canonicalize_slots(jit_State *J) | ||
199 | { | ||
200 | BCReg s; | ||
201 | for (s = J->baseslot+J->maxslot-1; s >= 1; s--) { | ||
202 | TRef tr = J->slot[s]; | ||
203 | if (tref_isinteger(tr)) { | ||
204 | IRIns *ir = IR(tref_ref(tr)); | ||
205 | if (!(ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_READONLY))) | ||
206 | J->slot[s] = emitir(IRTN(IR_TONUM), tr, 0); | ||
207 | } | ||
208 | } | ||
209 | } | ||
210 | |||
211 | /* Stop recording. */ | ||
212 | static void rec_stop(jit_State *J, TraceNo lnk) | ||
213 | { | ||
214 | lj_trace_end(J); | ||
215 | J->cur.link = (uint16_t)lnk; | ||
216 | if (lnk == J->curtrace) { /* Looping back? */ | ||
217 | if ((J->flags & JIT_F_OPT_LOOP)) /* Shall we try to create a loop? */ | ||
218 | goto nocanon; /* Do not canonicalize or we lose the narrowing. */ | ||
219 | if (J->cur.root) /* Otherwise ensure we always link to the root trace. */ | ||
220 | J->cur.link = J->cur.root; | ||
221 | } | ||
222 | canonicalize_slots(J); | ||
223 | nocanon: | ||
224 | /* Note: all loop ops must set J->pc to the following instruction! */ | ||
225 | lj_snap_add(J); /* Add loop snapshot. */ | ||
226 | J->needsnap = 0; | ||
227 | J->mergesnap = 1; /* In case recording continues. */ | ||
228 | } | ||
229 | |||
230 | /* Peek before FORI to find a const initializer, otherwise load from slot. */ | ||
231 | static TRef fori_arg(jit_State *J, const BCIns *pc, BCReg slot, IRType t) | ||
232 | { | ||
233 | /* A store to slot-1 means there's no conditional assignment for slot. */ | ||
234 | if (bc_a(pc[-1]) == slot-1 && bcmode_a(bc_op(pc[-1])) == BCMdst) { | ||
235 | BCIns ins = pc[0]; | ||
236 | if (bc_a(ins) == slot) { | ||
237 | if (bc_op(ins) == BC_KSHORT) { | ||
238 | int32_t k = (int32_t)(int16_t)bc_d(ins); | ||
239 | if (t == IRT_INT) | ||
240 | return lj_ir_kint(J, k); | ||
241 | else | ||
242 | return lj_ir_knum(J, cast_num(k)); | ||
243 | } else if (bc_op(ins) == BC_KNUM) { | ||
244 | lua_Number n = J->pt->k.n[bc_d(ins)]; | ||
245 | if (t == IRT_INT) | ||
246 | return lj_ir_kint(J, lj_num2int(n)); | ||
247 | else | ||
248 | return lj_ir_knum(J, n); | ||
249 | } | ||
250 | } | ||
251 | } | ||
252 | if (J->base[slot]) | ||
253 | return J->base[slot]; | ||
254 | else | ||
255 | return sloadt(J, (int32_t)slot, t, IRSLOAD_READONLY|IRSLOAD_INHERIT); | ||
256 | } | ||
257 | |||
258 | /* Simulate the runtime behavior of the FOR loop iterator. | ||
259 | ** It's important to exactly reproduce the semantics of the interpreter. | ||
260 | */ | ||
261 | static LoopEvent for_iter(jit_State *J, IROp *op, BCReg ra, int isforl) | ||
262 | { | ||
263 | cTValue *forbase = &J->L->base[ra]; | ||
264 | lua_Number stopv = numV(&forbase[FORL_STOP]); | ||
265 | lua_Number idxv = numV(&forbase[FORL_IDX]); | ||
266 | if (isforl) | ||
267 | idxv += numV(&forbase[FORL_STEP]); | ||
268 | if ((int32_t)forbase[FORL_STEP].u32.hi >= 0) { | ||
269 | if (idxv <= stopv) { *op = IR_LE; return LOOPEV_ENTER; } | ||
270 | *op = IR_GT; return LOOPEV_LEAVE; | ||
271 | } else { | ||
272 | if (stopv <= idxv) { *op = IR_GE; return LOOPEV_ENTER; } | ||
273 | *op = IR_LT; return LOOPEV_LEAVE; | ||
274 | } | ||
275 | } | ||
276 | |||
277 | /* Record FORL/JFORL or FORI/JFORI. */ | ||
278 | static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) | ||
279 | { | ||
280 | BCReg ra = bc_a(*fori); | ||
281 | IROp op; | ||
282 | LoopEvent ev = for_iter(J, &op, ra, isforl); | ||
283 | TRef *tr = &J->base[ra]; | ||
284 | TRef idx, stop; | ||
285 | IRType t; | ||
286 | if (isforl) { /* Handle FORL/JFORL opcodes. */ | ||
287 | TRef step; | ||
288 | idx = tr[FORL_IDX]; | ||
289 | if (!idx) idx = sloadt(J, (int32_t)(ra+FORL_IDX), IRT_NUM, 0); | ||
290 | t = tref_type(idx); | ||
291 | stop = fori_arg(J, fori-2, ra+FORL_STOP, t); | ||
292 | step = fori_arg(J, fori-1, ra+FORL_STEP, t); | ||
293 | tr[FORL_IDX] = idx = emitir(IRT(IR_ADD, t), idx, step); | ||
294 | } else { /* Handle FORI/JFORI opcodes. */ | ||
295 | BCReg i; | ||
296 | t = IRT_NUM; | ||
297 | for (i = FORL_IDX; i <= FORL_STEP; i++) { | ||
298 | lua_assert(J->base[ra+i] != 0); /* Assumes the slots are already set. */ | ||
299 | tr[i] = lj_ir_tonum(J, J->base[ra+i]); | ||
300 | } | ||
301 | idx = tr[FORL_IDX]; | ||
302 | stop = tr[FORL_STOP]; | ||
303 | if (!tref_isk(tr[FORL_STEP])) /* Non-const step: need direction guard. */ | ||
304 | emitir(IRTG(((op-IR_LT)>>1)+IR_LT, IRT_NUM), | ||
305 | tr[FORL_STEP], lj_ir_knum_zero(J)); | ||
306 | } | ||
307 | |||
308 | tr[FORL_EXT] = idx; | ||
309 | if (ev == LOOPEV_LEAVE) { | ||
310 | J->maxslot = ra+FORL_EXT+1; | ||
311 | J->pc = fori+1; | ||
312 | } else { | ||
313 | J->maxslot = ra; | ||
314 | J->pc = fori+bc_j(*fori)+1; | ||
315 | } | ||
316 | lj_snap_add(J); | ||
317 | |||
318 | emitir(IRTG(op, t), idx, stop); | ||
319 | |||
320 | if (ev == LOOPEV_LEAVE) { | ||
321 | J->maxslot = ra; | ||
322 | J->pc = fori+bc_j(*fori)+1; | ||
323 | } else { | ||
324 | J->maxslot = ra+FORL_EXT+1; | ||
325 | J->pc = fori+1; | ||
326 | } | ||
327 | J->needsnap = 1; | ||
328 | return ev; | ||
329 | } | ||
330 | |||
331 | /* Record ITERL/JITERL. */ | ||
332 | static LoopEvent rec_iterl(jit_State *J, const BCIns iterins) | ||
333 | { | ||
334 | BCReg ra = bc_a(iterins); | ||
335 | lua_assert(J->base[ra] != 0); | ||
336 | if (!tref_isnil(J->base[ra])) { /* Looping back? */ | ||
337 | J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */ | ||
338 | J->maxslot = ra-1+bc_b(J->pc[-1]); | ||
339 | J->pc += bc_j(iterins)+1; | ||
340 | return LOOPEV_ENTER; | ||
341 | } else { | ||
342 | J->maxslot = ra-3; | ||
343 | J->pc++; | ||
344 | return LOOPEV_LEAVE; | ||
345 | } | ||
346 | } | ||
347 | |||
348 | /* Record LOOP/JLOOP. Now, that was easy. */ | ||
349 | static LoopEvent rec_loop(jit_State *J, BCReg ra) | ||
350 | { | ||
351 | J->maxslot = ra; | ||
352 | J->pc++; | ||
353 | return LOOPEV_ENTER; | ||
354 | } | ||
355 | |||
356 | /* Check if a loop repeatedly failed to trace because it didn't loop back. */ | ||
357 | static int innerloopleft(jit_State *J, const BCIns *pc) | ||
358 | { | ||
359 | ptrdiff_t i; | ||
360 | for (i = 0; i < PENALTY_SLOTS; i++) | ||
361 | if (J->penalty[i].pc == pc) { | ||
362 | if (J->penalty[i].reason == LJ_TRERR_LLEAVE && | ||
363 | J->penalty[i].val >= 2*HOTCOUNT_MIN_PENALTY) | ||
364 | return 1; | ||
365 | break; | ||
366 | } | ||
367 | return 0; | ||
368 | } | ||
369 | |||
370 | /* Handle the case when an interpreted loop op is hit. */ | ||
371 | static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev) | ||
372 | { | ||
373 | if (J->parent == 0) { | ||
374 | if (pc == J->startpc && J->framedepth == 0) { /* Same loop? */ | ||
375 | if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */ | ||
376 | lj_trace_err(J, LJ_TRERR_LLEAVE); | ||
377 | rec_stop(J, J->curtrace); /* Root trace forms a loop. */ | ||
378 | } else if (ev != LOOPEV_LEAVE) { /* Entering inner loop? */ | ||
379 | /* It's usually better to abort here and wait until the inner loop | ||
380 | ** is traced. But if the inner loop repeatedly didn't loop back, | ||
381 | ** this indicates a low trip count. In this case try unrolling | ||
382 | ** an inner loop even in a root trace. But it's better to be a bit | ||
383 | ** more conservative here and only do it for very short loops. | ||
384 | */ | ||
385 | if (!innerloopleft(J, pc)) | ||
386 | lj_trace_err(J, LJ_TRERR_LINNER); /* Root trace hit an inner loop. */ | ||
387 | if ((J->loopref && J->cur.nins - J->loopref > 8) || --J->loopunroll < 0) | ||
388 | lj_trace_err(J, LJ_TRERR_LUNROLL); /* Limit loop unrolling. */ | ||
389 | J->loopref = J->cur.nins; | ||
390 | } | ||
391 | } else if (ev != LOOPEV_LEAVE) { /* Side trace enters an inner loop. */ | ||
392 | J->loopref = J->cur.nins; | ||
393 | if (--J->loopunroll < 0) | ||
394 | lj_trace_err(J, LJ_TRERR_LUNROLL); /* Limit loop unrolling. */ | ||
395 | } /* Side trace continues across a loop that's left or not entered. */ | ||
396 | } | ||
397 | |||
398 | /* Handle the case when an already compiled loop op is hit. */ | ||
399 | static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev) | ||
400 | { | ||
401 | if (J->parent == 0) { /* Root trace hit an inner loop. */ | ||
402 | /* Better let the inner loop spawn a side trace back here. */ | ||
403 | lj_trace_err(J, LJ_TRERR_LINNER); | ||
404 | } else if (ev != LOOPEV_LEAVE) { /* Side trace enters a compiled loop. */ | ||
405 | J->instunroll = 0; /* Cannot continue across a compiled loop op. */ | ||
406 | if (J->pc == J->startpc && J->framedepth == 0) | ||
407 | lnk = J->curtrace; /* Can form an extra loop. */ | ||
408 | rec_stop(J, lnk); /* Link to the loop. */ | ||
409 | } /* Side trace continues across a loop that's left or not entered. */ | ||
410 | } | ||
411 | |||
412 | /* -- Metamethod handling ------------------------------------------------- */ | ||
413 | |||
414 | /* Prepare to record call to metamethod. */ | ||
415 | static BCReg rec_mm_prep(jit_State *J, ASMFunction cont) | ||
416 | { | ||
417 | BCReg s, top = curr_proto(J->L)->framesize; | ||
418 | TRef trcont; | ||
419 | setcont(&J->L->base[top], cont); | ||
420 | #if LJ_64 | ||
421 | trcont = lj_ir_kptr(J, (void *)((int64_t)cont - (int64_t)lj_vm_asm_begin)); | ||
422 | #else | ||
423 | trcont = lj_ir_kptr(J, (void *)cont); | ||
424 | #endif | ||
425 | J->base[top] = emitir(IRTG(IR_FRAME, IRT_PTR), trcont, trcont); | ||
426 | for (s = J->maxslot; s < top; s++) | ||
427 | J->base[s] = 0; | ||
428 | return top+1; | ||
429 | } | ||
430 | |||
431 | /* Record metamethod lookup. */ | ||
432 | static int rec_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) | ||
433 | { | ||
434 | RecordIndex mix; | ||
435 | GCtab *mt; | ||
436 | if (tref_istab(ix->tab)) { | ||
437 | mt = tabref(tabV(&ix->tabv)->metatable); | ||
438 | mix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_TAB_META); | ||
439 | } else if (tref_isudata(ix->tab)) { | ||
440 | mt = tabref(udataV(&ix->tabv)->metatable); | ||
441 | mix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_UDATA_META); | ||
442 | } else { | ||
443 | /* Specialize to base metatable. Must flush mcode in lua_setmetatable(). */ | ||
444 | mt = tabref(J2G(J)->basemt[itypemap(&ix->tabv)]); | ||
445 | if (mt == NULL) | ||
446 | return 0; /* No metamethod. */ | ||
447 | mix.tab = lj_ir_ktab(J, mt); | ||
448 | goto nocheck; | ||
449 | } | ||
450 | ix->mt = mix.tab; | ||
451 | emitir(IRTG(mt ? IR_NE : IR_EQ, IRT_TAB), mix.tab, lj_ir_knull(J, IRT_TAB)); | ||
452 | nocheck: | ||
453 | if (mt) { | ||
454 | GCstr *mmstr = strref(J2G(J)->mmname[mm]); | ||
455 | cTValue *mo = lj_tab_getstr(mt, mmstr); | ||
456 | if (mo && !tvisnil(mo)) | ||
457 | copyTV(J->L, &ix->mobjv, mo); | ||
458 | ix->mtv = mt; | ||
459 | settabV(J->L, &mix.tabv, mt); | ||
460 | setstrV(J->L, &mix.keyv, mmstr); | ||
461 | mix.key = lj_ir_kstr(J, mmstr); | ||
462 | mix.val = 0; | ||
463 | mix.idxchain = 0; | ||
464 | ix->mobj = rec_idx(J, &mix); | ||
465 | return !tref_isnil(ix->mobj); /* 1 if metamethod found, 0 if not. */ | ||
466 | } | ||
467 | return 0; /* No metamethod. */ | ||
468 | } | ||
469 | |||
470 | /* Record call to arithmetic metamethod (and MM_len). */ | ||
471 | static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) | ||
472 | { | ||
473 | /* Set up metamethod call first to save ix->tab and ix->tabv. */ | ||
474 | BCReg func = rec_mm_prep(J, lj_cont_ra); | ||
475 | TRef *base = J->base + func; | ||
476 | TValue *basev = J->L->base + func; | ||
477 | base[1] = ix->tab; base[2] = ix->key; | ||
478 | copyTV(J->L, basev+1, &ix->tabv); | ||
479 | copyTV(J->L, basev+2, &ix->keyv); | ||
480 | if (!rec_mm_lookup(J, ix, mm)) { /* Lookup metamethod on 1st operand. */ | ||
481 | if (mm != MM_len) { | ||
482 | ix->tab = ix->key; | ||
483 | copyTV(J->L, &ix->tabv, &ix->keyv); | ||
484 | if (rec_mm_lookup(J, ix, mm)) /* Lookup metamethod on 2nd operand. */ | ||
485 | goto ok; | ||
486 | } | ||
487 | lj_trace_err(J, LJ_TRERR_NOMM); | ||
488 | } | ||
489 | ok: | ||
490 | base[0] = ix->mobj; | ||
491 | copyTV(J->L, basev+0, &ix->mobjv); | ||
492 | return rec_call(J, func, CALLRES_CONT, 2) ? J->base[func] : 0; | ||
493 | } | ||
494 | |||
495 | /* Call a comparison metamethod. */ | ||
496 | static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op) | ||
497 | { | ||
498 | BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt); | ||
499 | TRef *base = J->base + func; | ||
500 | TValue *tv = J->L->base + func; | ||
501 | base[0] = ix->mobj; base[1] = ix->val; base[2] = ix->key; | ||
502 | copyTV(J->L, tv+0, &ix->mobjv); | ||
503 | copyTV(J->L, tv+1, &ix->valv); | ||
504 | copyTV(J->L, tv+2, &ix->keyv); | ||
505 | rec_call(J, func, CALLRES_CONT, 2); | ||
506 | /* It doesn't matter whether this is immediately resolved or not. | ||
507 | ** Type specialization of the return type suffices to specialize | ||
508 | ** the control flow. | ||
509 | */ | ||
510 | } | ||
511 | |||
512 | /* Record call to equality comparison metamethod (for tab and udata only). */ | ||
513 | static void rec_mm_equal(jit_State *J, RecordIndex *ix, int op) | ||
514 | { | ||
515 | ix->tab = ix->val; | ||
516 | copyTV(J->L, &ix->tabv, &ix->valv); | ||
517 | if (rec_mm_lookup(J, ix, MM_eq)) { /* Lookup metamethod on 1st operand. */ | ||
518 | cTValue *bv; | ||
519 | TRef mo1 = ix->mobj; | ||
520 | TValue mo1v; | ||
521 | copyTV(J->L, &mo1v, &ix->mobjv); | ||
522 | /* Avoid the 2nd lookup and the objcmp if the metatables are equal. */ | ||
523 | bv = &ix->keyv; | ||
524 | if (tvistab(bv) && tabref(tabV(bv)->metatable) == ix->mtv) { | ||
525 | TRef mt2 = emitir(IRT(IR_FLOAD, IRT_TAB), ix->key, IRFL_TAB_META); | ||
526 | emitir(IRTG(IR_EQ, IRT_TAB), mt2, ix->mt); | ||
527 | } else if (tvisudata(bv) && tabref(udataV(bv)->metatable) == ix->mtv) { | ||
528 | TRef mt2 = emitir(IRT(IR_FLOAD, IRT_TAB), ix->key, IRFL_UDATA_META); | ||
529 | emitir(IRTG(IR_EQ, IRT_TAB), mt2, ix->mt); | ||
530 | } else { /* Lookup metamethod on 2nd operand and compare both. */ | ||
531 | ix->tab = ix->key; | ||
532 | copyTV(J->L, &ix->tabv, bv); | ||
533 | if (!rec_mm_lookup(J, ix, MM_eq) || | ||
534 | rec_objcmp(J, mo1, ix->mobj, &mo1v, &ix->mobjv)) | ||
535 | return; | ||
536 | } | ||
537 | rec_mm_callcomp(J, ix, op); | ||
538 | } | ||
539 | } | ||
540 | |||
541 | /* Record call to ordered comparison metamethods (for arbitrary objects). */ | ||
542 | static void rec_mm_comp(jit_State *J, RecordIndex *ix, int op) | ||
543 | { | ||
544 | ix->tab = ix->val; | ||
545 | copyTV(J->L, &ix->tabv, &ix->valv); | ||
546 | while (1) { | ||
547 | MMS mm = (op & 2) ? MM_le : MM_lt; /* Try __le + __lt or only __lt. */ | ||
548 | if (rec_mm_lookup(J, ix, mm)) { /* Lookup metamethod on 1st operand. */ | ||
549 | cTValue *bv; | ||
550 | TRef mo1 = ix->mobj; | ||
551 | TValue mo1v; | ||
552 | copyTV(J->L, &mo1v, &ix->mobjv); | ||
553 | /* Avoid the 2nd lookup and the objcmp if the metatables are equal. */ | ||
554 | bv = &ix->keyv; | ||
555 | if (tvistab(bv) && tabref(tabV(bv)->metatable) == ix->mtv) { | ||
556 | TRef mt2 = emitir(IRT(IR_FLOAD, IRT_TAB), ix->key, IRFL_TAB_META); | ||
557 | emitir(IRTG(IR_EQ, IRT_TAB), mt2, ix->mt); | ||
558 | } else if (tvisudata(bv) && tabref(udataV(bv)->metatable) == ix->mtv) { | ||
559 | TRef mt2 = emitir(IRT(IR_FLOAD, IRT_TAB), ix->key, IRFL_UDATA_META); | ||
560 | emitir(IRTG(IR_EQ, IRT_TAB), mt2, ix->mt); | ||
561 | } else { /* Lookup metamethod on 2nd operand and compare both. */ | ||
562 | ix->tab = ix->key; | ||
563 | copyTV(J->L, &ix->tabv, bv); | ||
564 | if (!rec_mm_lookup(J, ix, mm) || | ||
565 | rec_objcmp(J, mo1, ix->mobj, &mo1v, &ix->mobjv)) | ||
566 | goto nomatch; | ||
567 | } | ||
568 | rec_mm_callcomp(J, ix, op); | ||
569 | return; | ||
570 | } | ||
571 | nomatch: | ||
572 | /* First lookup failed. Retry with __lt and swapped operands. */ | ||
573 | if (!(op & 2)) break; /* Already at __lt. Interpreter will throw. */ | ||
574 | ix->tab = ix->key; ix->key = ix->val; ix->val = ix->tab; | ||
575 | copyTV(J->L, &ix->tabv, &ix->keyv); | ||
576 | copyTV(J->L, &ix->keyv, &ix->valv); | ||
577 | copyTV(J->L, &ix->valv, &ix->tabv); | ||
578 | op ^= 3; | ||
579 | } | ||
580 | } | ||
581 | |||
582 | /* -- Indexed access ------------------------------------------------------ */ | ||
583 | |||
584 | /* Record indexed key lookup. */ | ||
585 | static TRef rec_idx_key(jit_State *J, RecordIndex *ix) | ||
586 | { | ||
587 | TRef key; | ||
588 | GCtab *t = tabV(&ix->tabv); | ||
589 | ix->oldv = lj_tab_get(J->L, t, &ix->keyv); /* Lookup previous value. */ | ||
590 | |||
591 | /* Integer keys are looked up in the array part first. */ | ||
592 | key = ix->key; | ||
593 | if (tref_isnumber(key)) { | ||
594 | lua_Number n = numV(&ix->keyv); | ||
595 | int32_t k = lj_num2int(n); | ||
596 | lua_assert(tvisnum(&ix->keyv)); | ||
597 | /* Potential array key? */ | ||
598 | if ((MSize)k < LJ_MAX_ASIZE && n == cast_num(k)) { | ||
599 | TRef asizeref, ikey = key; | ||
600 | if (!tref_isinteger(ikey)) | ||
601 | ikey = emitir(IRTGI(IR_TOINT), ikey, IRTOINT_INDEX); | ||
602 | asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE); | ||
603 | if ((MSize)k < t->asize) { /* Currently an array key? */ | ||
604 | TRef arrayref; | ||
605 | emitir(IRTGI(IR_ABC), asizeref, ikey); /* Bounds check. */ | ||
606 | arrayref = emitir(IRT(IR_FLOAD, IRT_PTR), ix->tab, IRFL_TAB_ARRAY); | ||
607 | return emitir(IRT(IR_AREF, IRT_PTR), arrayref, ikey); | ||
608 | } else { /* Currently not in array (may be an array extension)? */ | ||
609 | emitir(IRTGI(IR_ULE), asizeref, ikey); /* Inv. bounds check. */ | ||
610 | if (k == 0 && tref_isk(key)) | ||
611 | key = lj_ir_knum_zero(J); /* Canonicalize 0 or +-0.0 to +0.0. */ | ||
612 | /* And continue with the hash lookup. */ | ||
613 | } | ||
614 | } else if (!tref_isk(key)) { | ||
615 | /* We can rule out const numbers which failed the integerness test | ||
616 | ** above. But all other numbers are potential array keys. | ||
617 | */ | ||
618 | if (t->asize == 0) { /* True sparse tables have an empty array part. */ | ||
619 | /* Guard that the array part stays empty. */ | ||
620 | TRef tmp = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE); | ||
621 | emitir(IRTGI(IR_EQ), tmp, lj_ir_kint(J, 0)); | ||
622 | } else { | ||
623 | lj_trace_err(J, LJ_TRERR_NYITMIX); | ||
624 | } | ||
625 | } | ||
626 | } | ||
627 | |||
628 | /* Otherwise the key is located in the hash part. */ | ||
629 | if (tref_isinteger(key)) /* Hash keys are based on numbers, not ints. */ | ||
630 | ix->key = key = emitir(IRTN(IR_TONUM), key, 0); | ||
631 | if (tref_isk(key)) { | ||
632 | /* Optimize lookup of constant hash keys. */ | ||
633 | MSize hslot = (MSize)((char *)ix->oldv - (char *)&noderef(t->node)[0].val); | ||
634 | if (t->hmask > 0 && hslot <= t->hmask*(MSize)sizeof(Node) && | ||
635 | hslot <= 65535*(MSize)sizeof(Node)) { | ||
636 | TRef node, kslot; | ||
637 | TRef hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK); | ||
638 | emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask)); | ||
639 | node = emitir(IRT(IR_FLOAD, IRT_PTR), ix->tab, IRFL_TAB_NODE); | ||
640 | kslot = lj_ir_kslot(J, key, hslot / sizeof(Node)); | ||
641 | return emitir(IRTG(IR_HREFK, IRT_PTR), node, kslot); | ||
642 | } | ||
643 | } | ||
644 | /* Fall back to a regular hash lookup. */ | ||
645 | return emitir(IRT(IR_HREF, IRT_PTR), ix->tab, key); | ||
646 | } | ||
647 | |||
648 | /* Determine whether a key is NOT one of the fast metamethod names. */ | ||
649 | static int nommstr(jit_State *J, TRef key) | ||
650 | { | ||
651 | if (tref_isstr(key)) { | ||
652 | if (tref_isk(key)) { | ||
653 | GCstr *str = ir_kstr(IR(tref_ref(key))); | ||
654 | uint32_t i; | ||
655 | for (i = 0; i <= MM_FAST; i++) | ||
656 | if (strref(J2G(J)->mmname[i]) == str) | ||
657 | return 0; /* MUST be one the fast metamethod names. */ | ||
658 | } else { | ||
659 | return 0; /* Variable string key MAY be a metamethod name. */ | ||
660 | } | ||
661 | } | ||
662 | return 1; /* CANNOT be a metamethod name. */ | ||
663 | } | ||
664 | |||
665 | /* Record indexed load/store. */ | ||
666 | static TRef rec_idx(jit_State *J, RecordIndex *ix) | ||
667 | { | ||
668 | TRef xref; | ||
669 | IROp xrefop, loadop; | ||
670 | cTValue *oldv; | ||
671 | |||
672 | while (!tref_istab(ix->tab)) { /* Handle non-table lookup. */ | ||
673 | lua_assert(ix->idxchain != 0); /* Never call raw rec_idx() on non-table. */ | ||
674 | if (!rec_mm_lookup(J, ix, ix->val ? MM_newindex : MM_index)) | ||
675 | lj_trace_err(J, LJ_TRERR_NOMM); | ||
676 | handlemm: | ||
677 | if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */ | ||
678 | BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra); | ||
679 | TRef *base = J->base + func; | ||
680 | TValue *tv = J->L->base + func; | ||
681 | base[0] = ix->mobj; base[1] = ix->tab; base[2] = ix->key; | ||
682 | setfuncV(J->L, tv+0, funcV(&ix->mobjv)); | ||
683 | copyTV(J->L, tv+1, &ix->tabv); | ||
684 | copyTV(J->L, tv+2, &ix->keyv); | ||
685 | if (ix->val) { | ||
686 | base[3] = ix->val; | ||
687 | copyTV(J->L, tv+3, &ix->valv); | ||
688 | rec_call(J, func, CALLRES_CONT, 3); /* mobj(tab, key, val) */ | ||
689 | return 0; | ||
690 | } else { | ||
691 | /* res = mobj(tab, key) */ | ||
692 | return rec_call(J, func, CALLRES_CONT, 2) ? J->base[func] : 0; | ||
693 | } | ||
694 | } | ||
695 | /* Otherwise retry lookup with metaobject. */ | ||
696 | ix->tab = ix->mobj; | ||
697 | copyTV(J->L, &ix->tabv, &ix->mobjv); | ||
698 | if (--ix->idxchain == 0) | ||
699 | lj_trace_err(J, LJ_TRERR_IDXLOOP); | ||
700 | } | ||
701 | |||
702 | /* First catch nil and NaN keys for tables. */ | ||
703 | if (tvisnil(&ix->keyv) || (tvisnum(&ix->keyv) && tvisnan(&ix->keyv))) { | ||
704 | if (ix->val) /* Better fail early. */ | ||
705 | lj_trace_err(J, LJ_TRERR_STORENN); | ||
706 | if (tref_isk(ix->key)) { | ||
707 | if (ix->idxchain && rec_mm_lookup(J, ix, MM_index)) | ||
708 | goto handlemm; | ||
709 | return TREF_NIL; | ||
710 | } | ||
711 | } | ||
712 | |||
713 | /* Record the key lookup. */ | ||
714 | xref = rec_idx_key(J, ix); | ||
715 | xrefop = IR(tref_ref(xref))->o; | ||
716 | loadop = xrefop == IR_AREF ? IR_ALOAD : IR_HLOAD; | ||
717 | oldv = ix->oldv; | ||
718 | |||
719 | if (ix->val == 0) { /* Indexed load */ | ||
720 | IRType t = itype2irt(oldv); | ||
721 | TRef res = emitir(IRTG(loadop, t), xref, 0); | ||
722 | if (t == IRT_NIL && ix->idxchain && rec_mm_lookup(J, ix, MM_index)) | ||
723 | goto handlemm; | ||
724 | if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitives. */ | ||
725 | return res; | ||
726 | } else { /* Indexed store. */ | ||
727 | GCtab *mt = tabref(tabV(&ix->tabv)->metatable); | ||
728 | if (tvisnil(oldv)) { /* Previous value was nil? */ | ||
729 | /* Need to duplicate the hasmm check for the early guards. */ | ||
730 | int hasmm = 0; | ||
731 | if (ix->idxchain && mt) { | ||
732 | cTValue *mo = lj_tab_getstr(mt, strref(J2G(J)->mmname[MM_newindex])); | ||
733 | hasmm = mo && !tvisnil(mo); | ||
734 | } | ||
735 | if (hasmm || oldv == niltvg(J2G(J))) | ||
736 | emitir(IRTG(loadop, IRT_NIL), xref, 0); /* Guard for nil value. */ | ||
737 | else if (xrefop == IR_HREF) | ||
738 | emitir(IRTG(IR_NE, IRT_PTR), xref, lj_ir_kptr(J, niltvg(J2G(J)))); | ||
739 | if (ix->idxchain && rec_mm_lookup(J, ix, MM_newindex)) { /* Metamethod? */ | ||
740 | lua_assert(hasmm); | ||
741 | goto handlemm; | ||
742 | } | ||
743 | lua_assert(!hasmm); | ||
744 | if (oldv == niltvg(J2G(J))) { /* Need to insert a new key. */ | ||
745 | TRef key = ix->key; | ||
746 | if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */ | ||
747 | key = emitir(IRTN(IR_TONUM), key, 0); | ||
748 | xref = emitir(IRT(IR_NEWREF, IRT_PTR), ix->tab, key); | ||
749 | } | ||
750 | } else if (!lj_opt_fwd_wasnonnil(J, loadop, tref_ref(xref))) { | ||
751 | /* Cannot derive that the previous value was non-nil, must do checks. */ | ||
752 | if (xrefop == IR_HREF) /* Guard against store to niltv. */ | ||
753 | emitir(IRTG(IR_NE, IRT_PTR), xref, lj_ir_kptr(J, niltvg(J2G(J)))); | ||
754 | if (ix->idxchain) { /* Metamethod lookup required? */ | ||
755 | /* A check for NULL metatable is cheaper (hoistable) than a load. */ | ||
756 | if (!mt) { | ||
757 | TRef mtref = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_TAB_META); | ||
758 | emitir(IRTG(IR_EQ, IRT_TAB), mtref, lj_ir_knull(J, IRT_TAB)); | ||
759 | } else { | ||
760 | IRType t = itype2irt(oldv); | ||
761 | emitir(IRTG(loadop, t), xref, 0); /* Guard for non-nil value. */ | ||
762 | } | ||
763 | } | ||
764 | } | ||
765 | if (tref_isinteger(ix->val)) /* Convert int to number before storing. */ | ||
766 | ix->val = emitir(IRTN(IR_TONUM), ix->val, 0); | ||
767 | emitir(IRT(loadop+IRDELTA_L2S, tref_type(ix->val)), xref, ix->val); | ||
768 | if (tref_isgcv(ix->val)) | ||
769 | emitir(IRT(IR_TBAR, IRT_NIL), ix->tab, 0); | ||
770 | /* Invalidate neg. metamethod cache for stores with certain string keys. */ | ||
771 | if (!nommstr(J, ix->key)) { | ||
772 | TRef fref = emitir(IRT(IR_FREF, IRT_PTR), ix->tab, IRFL_TAB_NOMM); | ||
773 | emitir(IRT(IR_FSTORE, IRT_U8), fref, lj_ir_kint(J, 0)); | ||
774 | } | ||
775 | J->needsnap = 1; | ||
776 | return 0; | ||
777 | } | ||
778 | } | ||
779 | |||
780 | /* -- Upvalue access ------------------------------------------------------ */ | ||
781 | |||
782 | /* Record upvalue load/store. */ | ||
783 | static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val) | ||
784 | { | ||
785 | GCupval *uvp = &gcref(J->fn->l.uvptr[uv])->uv; | ||
786 | TRef fn = getcurrf(J); | ||
787 | IRRef uref; | ||
788 | int needbarrier = 0; | ||
789 | if (!uvp->closed) { | ||
790 | /* In current stack? */ | ||
791 | if (uvp->v >= J->L->stack && uvp->v < J->L->maxstack) { | ||
792 | int32_t slot = (int32_t)(uvp->v - (J->L->base - J->baseslot)); | ||
793 | if (slot >= 0) { /* Aliases an SSA slot? */ | ||
794 | slot -= (int32_t)J->baseslot; /* Note: slot number may be negative! */ | ||
795 | /* NYI: add IR to guard that it's still aliasing the same slot. */ | ||
796 | if (val == 0) { | ||
797 | return getslot(J, slot); | ||
798 | } else { | ||
799 | J->base[slot] = val; | ||
800 | if (slot >= (int32_t)J->maxslot) J->maxslot = (BCReg)(slot+1); | ||
801 | return 0; | ||
802 | } | ||
803 | } | ||
804 | } | ||
805 | uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_PTR), fn, uv)); | ||
806 | } else { | ||
807 | needbarrier = 1; | ||
808 | uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_PTR), fn, uv)); | ||
809 | } | ||
810 | if (val == 0) { /* Upvalue load */ | ||
811 | IRType t = itype2irt(uvp->v); | ||
812 | TRef res = emitir(IRTG(IR_ULOAD, t), uref, 0); | ||
813 | if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitive refs. */ | ||
814 | return res; | ||
815 | } else { /* Upvalue store. */ | ||
816 | if (tref_isinteger(val)) /* Convert int to number before storing. */ | ||
817 | val = emitir(IRTN(IR_TONUM), val, 0); | ||
818 | emitir(IRT(IR_USTORE, tref_type(val)), uref, val); | ||
819 | if (needbarrier && tref_isgcv(val)) | ||
820 | emitir(IRT(IR_OBAR, IRT_NIL), uref, val); | ||
821 | J->needsnap = 1; | ||
822 | return 0; | ||
823 | } | ||
824 | } | ||
825 | |||
826 | /* -- Record calls to fast functions -------------------------------------- */ | ||
827 | |||
828 | /* Note: The function and the arguments for the bytecode CALL instructions | ||
829 | ** always occupy _new_ stack slots (above the highest active variable). | ||
830 | ** This means they must have been stored there by previous instructions | ||
831 | ** (MOV, K*, ADD etc.) which must be part of the same trace. This in turn | ||
832 | ** means their reference slots are already valid and their types have | ||
833 | ** already been specialized (i.e. getslot() would be redundant). | ||
834 | ** The 1st slot beyond the arguments is set to 0 before calling recff_*. | ||
835 | */ | ||
836 | |||
837 | /* Data used by handlers to record a fast function. */ | ||
838 | typedef struct RecordFFData { | ||
839 | TValue *argv; /* Runtime argument values. */ | ||
840 | GCfunc *fn; /* The currently recorded function. */ | ||
841 | int nargs; /* Number of passed arguments. */ | ||
842 | int nres; /* Number of returned results (defaults to 1). */ | ||
843 | int cres; /* Wanted number of call results. */ | ||
844 | uint32_t data; /* Per-ffid auxiliary data (opcode, literal etc.). */ | ||
845 | } RecordFFData; | ||
846 | |||
847 | /* Type of handler to record a fast function. */ | ||
848 | typedef void (*RecordFunc)(jit_State *J, TRef *res, RecordFFData *rd); | ||
849 | |||
850 | /* Avoid carrying two pointers around. */ | ||
851 | #define arg (res+1) | ||
852 | |||
853 | /* Get runtime value of int argument. */ | ||
854 | static int32_t argv2int(jit_State *J, TValue *o) | ||
855 | { | ||
856 | if (tvisstr(o) && !lj_str_numconv(strVdata(o), o)) | ||
857 | lj_trace_err(J, LJ_TRERR_BADTYPE); | ||
858 | return lj_num2bit(numV(o)); | ||
859 | } | ||
860 | |||
861 | /* Get runtime value of string argument. */ | ||
862 | static GCstr *argv2str(jit_State *J, TValue *o) | ||
863 | { | ||
864 | if (LJ_LIKELY(tvisstr(o))) { | ||
865 | return strV(o); | ||
866 | } else { | ||
867 | GCstr *s; | ||
868 | lua_assert(tvisnum(o)); | ||
869 | s = lj_str_fromnum(J->L, &o->n); | ||
870 | setstrV(J->L, o, s); | ||
871 | return s; | ||
872 | } | ||
873 | } | ||
874 | |||
875 | /* Fallback handler for all fast functions that are not recorded (yet). */ | ||
876 | static void recff_nyi(jit_State *J, TRef *res, RecordFFData *rd) | ||
877 | { | ||
878 | UNUSED(res); | ||
879 | setfuncV(J->L, &J->errinfo, rd->fn); | ||
880 | lj_trace_err_info(J, LJ_TRERR_NYIFF); | ||
881 | } | ||
882 | |||
883 | LJ_NORET static void recff_err_ffu(jit_State *J, RecordFFData *rd) | ||
884 | { | ||
885 | setfuncV(J->L, &J->errinfo, rd->fn); | ||
886 | lj_trace_err_info(J, LJ_TRERR_NYIFFU); | ||
887 | } | ||
888 | |||
889 | /* C functions can have arbitrary side-effects and are not recorded (yet). */ | ||
890 | static void recff_c(jit_State *J, TRef *res, RecordFFData *rd) | ||
891 | { | ||
892 | UNUSED(res); | ||
893 | setlightudV(&J->errinfo, (void *)rd->fn->c.f); | ||
894 | lj_trace_err_info(J, LJ_TRERR_NYICF); | ||
895 | } | ||
896 | |||
897 | /* -- Base library fast functions ----------------------------------------- */ | ||
898 | |||
899 | static void recff_assert(jit_State *J, TRef *res, RecordFFData *rd) | ||
900 | { | ||
901 | /* Arguments already specialized. The interpreter throws for nil/false. */ | ||
902 | BCReg i; | ||
903 | for (i = 0; arg[i]; i++) /* Need to pass through all arguments. */ | ||
904 | res[i] = arg[i]; | ||
905 | rd->nres = (int)i; | ||
906 | UNUSED(J); | ||
907 | } | ||
908 | |||
909 | static void recff_type(jit_State *J, TRef *res, RecordFFData *rd) | ||
910 | { | ||
911 | /* Arguments already specialized. Result is a constant string. Neat, huh? */ | ||
912 | IRType t = tref_isinteger(arg[0]) ? IRT_NUM : tref_type(arg[0]); | ||
913 | res[0] = lj_ir_kstr(J, strV(&rd->fn->c.upvalue[t])); | ||
914 | } | ||
915 | |||
916 | static void recff_getmetatable(jit_State *J, TRef *res, RecordFFData *rd) | ||
917 | { | ||
918 | TRef tr = arg[0]; | ||
919 | if (tref_istab(tr)) { | ||
920 | RecordIndex ix; | ||
921 | ix.tab = tr; | ||
922 | copyTV(J->L, &ix.tabv, &rd->argv[0]); | ||
923 | if (rec_mm_lookup(J, &ix, MM_metatable)) | ||
924 | res[0] = ix.mobj; | ||
925 | else | ||
926 | res[0] = ix.mt; | ||
927 | } /* else: Interpreter will throw. */ | ||
928 | } | ||
929 | |||
930 | static void recff_setmetatable(jit_State *J, TRef *res, RecordFFData *rd) | ||
931 | { | ||
932 | TRef tr = arg[0]; | ||
933 | TRef mt = arg[1]; | ||
934 | if (tref_istab(tr) && (tref_istab(mt) || (mt && tref_isnil(mt)))) { | ||
935 | TRef fref, mtref; | ||
936 | RecordIndex ix; | ||
937 | ix.tab = tr; | ||
938 | copyTV(J->L, &ix.tabv, &rd->argv[0]); | ||
939 | rec_mm_lookup(J, &ix, MM_metatable); /* Guard for no __metatable field. */ | ||
940 | fref = emitir(IRT(IR_FREF, IRT_PTR), tr, IRFL_TAB_META); | ||
941 | mtref = tref_isnil(mt) ? lj_ir_knull(J, IRT_TAB) : mt; | ||
942 | emitir(IRT(IR_FSTORE, IRT_TAB), fref, mtref); | ||
943 | if (!tref_isnil(mt)) | ||
944 | emitir(IRT(IR_TBAR, IRT_TAB), tr, 0); | ||
945 | res[0] = tr; | ||
946 | J->needsnap = 1; | ||
947 | } /* else: Interpreter will throw. */ | ||
948 | } | ||
949 | |||
950 | static void recff_rawget(jit_State *J, TRef *res, RecordFFData *rd) | ||
951 | { | ||
952 | if (tref_istab(arg[0]) && arg[1]) { | ||
953 | RecordIndex ix; | ||
954 | ix.tab = arg[0]; ix.key = arg[1]; ix.val = 0; ix.idxchain = 0; | ||
955 | settabV(J->L, &ix.tabv, tabV(&rd->argv[0])); | ||
956 | copyTV(J->L, &ix.keyv, &rd->argv[1]); | ||
957 | res[0] = rec_idx(J, &ix); | ||
958 | } /* else: Interpreter will throw. */ | ||
959 | } | ||
960 | |||
961 | static void recff_rawset(jit_State *J, TRef *res, RecordFFData *rd) | ||
962 | { | ||
963 | if (tref_istab(arg[0]) && arg[1] && arg[2]) { | ||
964 | RecordIndex ix; | ||
965 | ix.tab = arg[0]; ix.key = arg[1]; ix.val = arg[2]; ix.idxchain = 0; | ||
966 | settabV(J->L, &ix.tabv, tabV(&rd->argv[0])); | ||
967 | copyTV(J->L, &ix.keyv, &rd->argv[1]); | ||
968 | copyTV(J->L, &ix.valv, &rd->argv[2]); | ||
969 | rec_idx(J, &ix); | ||
970 | res[0] = arg[0]; /* Returns table. */ | ||
971 | } /* else: Interpreter will throw. */ | ||
972 | } | ||
973 | |||
974 | static void recff_rawequal(jit_State *J, TRef *res, RecordFFData *rd) | ||
975 | { | ||
976 | if (arg[0] && arg[1]) { | ||
977 | int diff = rec_objcmp(J, arg[0], arg[1], &rd->argv[0], &rd->argv[1]); | ||
978 | res[0] = diff ? TREF_FALSE : TREF_TRUE; | ||
979 | } /* else: Interpreter will throw. */ | ||
980 | } | ||
981 | |||
982 | static void recff_tonumber(jit_State *J, TRef *res, RecordFFData *rd) | ||
983 | { | ||
984 | TRef tr = arg[0]; | ||
985 | if (tref_isnumber_str(tr)) { | ||
986 | if (arg[1]) { | ||
987 | TRef base = lj_ir_toint(J, arg[1]); | ||
988 | if (!tref_isk(base) || IR(tref_ref(base))->i != 10) | ||
989 | recff_err_ffu(J, rd); | ||
990 | } | ||
991 | if (tref_isstr(tr)) | ||
992 | tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); | ||
993 | } else { | ||
994 | tr = TREF_NIL; | ||
995 | } | ||
996 | res[0] = tr; | ||
997 | UNUSED(rd); | ||
998 | } | ||
999 | |||
1000 | static void recff_tostring(jit_State *J, TRef *res, RecordFFData *rd) | ||
1001 | { | ||
1002 | TRef tr = arg[0]; | ||
1003 | if (tref_isstr(tr)) { | ||
1004 | /* Ignore __tostring in the string base metatable. */ | ||
1005 | res[0] = tr; | ||
1006 | } else { | ||
1007 | RecordIndex ix; | ||
1008 | ix.tab = tr; | ||
1009 | copyTV(J->L, &ix.tabv, &rd->argv[0]); | ||
1010 | if (rec_mm_lookup(J, &ix, MM_tostring)) { /* Has __tostring metamethod? */ | ||
1011 | res[0] = ix.mobj; | ||
1012 | copyTV(J->L, rd->argv - 1, &ix.mobjv); | ||
1013 | if (!rec_call(J, (BCReg)(res - J->base), 1, 1)) /* Pending call? */ | ||
1014 | rd->cres = CALLRES_PENDING; | ||
1015 | /* Otherwise res[0] already contains the result. */ | ||
1016 | } else if (tref_isnumber(tr)) { | ||
1017 | res[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0); | ||
1018 | } else { | ||
1019 | recff_err_ffu(J, rd); | ||
1020 | } | ||
1021 | } | ||
1022 | } | ||
1023 | |||
1024 | static void recff_ipairs_aux(jit_State *J, TRef *res, RecordFFData *rd) | ||
1025 | { | ||
1026 | RecordIndex ix; | ||
1027 | ix.tab = arg[0]; | ||
1028 | if (tref_istab(ix.tab)) { | ||
1029 | if (!tvisnum(&rd->argv[1])) /* No support for string coercion. */ | ||
1030 | lj_trace_err(J, LJ_TRERR_BADTYPE); | ||
1031 | setnumV(&ix.keyv, numV(&rd->argv[1])+(lua_Number)1); | ||
1032 | settabV(J->L, &ix.tabv, tabV(&rd->argv[0])); | ||
1033 | ix.val = 0; ix.idxchain = 0; | ||
1034 | ix.key = lj_ir_toint(J, arg[1]); | ||
1035 | res[0] = ix.key = emitir(IRTI(IR_ADD), ix.key, lj_ir_kint(J, 1)); | ||
1036 | res[1] = rec_idx(J, &ix); | ||
1037 | rd->nres = tref_isnil(res[1]) ? 0 : 2; | ||
1038 | } /* else: Interpreter will throw. */ | ||
1039 | } | ||
1040 | |||
1041 | static void recff_ipairs(jit_State *J, TRef *res, RecordFFData *rd) | ||
1042 | { | ||
1043 | TRef tab = arg[0]; | ||
1044 | if (tref_istab(tab)) { | ||
1045 | res[0] = lj_ir_kfunc(J, funcV(&rd->fn->c.upvalue[0])); | ||
1046 | res[1] = tab; | ||
1047 | res[2] = lj_ir_kint(J, 0); | ||
1048 | rd->nres = 3; | ||
1049 | } /* else: Interpreter will throw. */ | ||
1050 | } | ||
1051 | |||
1052 | static void recff_pcall(jit_State *J, TRef *res, RecordFFData *rd) | ||
1053 | { | ||
1054 | if (rd->nargs >= 1) { | ||
1055 | BCReg parg = (BCReg)(arg - J->base); | ||
1056 | if (rec_call(J, parg, CALLRES_MULTI, rd->nargs - 1)) { /* Resolved call. */ | ||
1057 | res[0] = TREF_TRUE; /* Prepend true result. No need to move results. */ | ||
1058 | rd->nres = (int)((J->maxslot - parg) + 1); | ||
1059 | } else { /* Propagate pending call. */ | ||
1060 | rd->cres = CALLRES_PENDING; | ||
1061 | } | ||
1062 | } /* else: Interpreter will throw. */ | ||
1063 | } | ||
1064 | |||
1065 | /* Struct to pass context across lj_vm_cpcall. */ | ||
1066 | typedef struct RecordXpcall { | ||
1067 | BCReg parg; | ||
1068 | int nargs; | ||
1069 | int resolved; | ||
1070 | } RecordXpcall; | ||
1071 | |||
1072 | static TValue *recff_xpcall_cp(lua_State *L, lua_CFunction dummy, void *ud) | ||
1073 | { | ||
1074 | jit_State *J = L2J(L); | ||
1075 | RecordXpcall *rx = (RecordXpcall *)ud; | ||
1076 | UNUSED(dummy); | ||
1077 | rx->resolved = rec_call(J, rx->parg, CALLRES_MULTI, rx->nargs); | ||
1078 | return NULL; | ||
1079 | } | ||
1080 | |||
1081 | static void recff_xpcall(jit_State *J, TRef *res, RecordFFData *rd) | ||
1082 | { | ||
1083 | if (rd->nargs >= 2) { | ||
1084 | RecordXpcall rx; | ||
1085 | BCReg parg = (BCReg)(arg - J->base) + 1; | ||
1086 | TRef tmp; | ||
1087 | TValue argv0, argv1; | ||
1088 | ptrdiff_t oargv; | ||
1089 | int errcode; | ||
1090 | /* Swap function and traceback. */ | ||
1091 | tmp = arg[0]; arg[0] = arg[1]; arg[1] = tmp; | ||
1092 | copyTV(J->L, &argv0, &rd->argv[0]); | ||
1093 | copyTV(J->L, &argv1, &rd->argv[1]); | ||
1094 | copyTV(J->L, &rd->argv[0], &argv1); | ||
1095 | copyTV(J->L, &rd->argv[1], &argv0); | ||
1096 | oargv = savestack(J->L, rd->argv); | ||
1097 | /* Need to protect rec_call because the recorder may throw. */ | ||
1098 | rx.parg = parg; | ||
1099 | rx.nargs = rd->nargs - 2; | ||
1100 | errcode = lj_vm_cpcall(J->L, recff_xpcall_cp, NULL, &rx); | ||
1101 | /* Always undo Lua stack swap to avoid confusing the interpreter. */ | ||
1102 | rd->argv = restorestack(J->L, oargv); /* Stack may have been resized. */ | ||
1103 | copyTV(J->L, &rd->argv[0], &argv0); | ||
1104 | copyTV(J->L, &rd->argv[1], &argv1); | ||
1105 | if (errcode) | ||
1106 | lj_err_throw(J->L, errcode); /* Propagate errors. */ | ||
1107 | if (rx.resolved) { /* Resolved call. */ | ||
1108 | int i, nres = (int)(J->maxslot - parg); | ||
1109 | rd->nres = nres + 1; | ||
1110 | res[0] = TREF_TRUE; /* Prepend true result. */ | ||
1111 | for (i = 1; i <= nres; i++) /* Move results down. */ | ||
1112 | res[i] = res[i+1]; | ||
1113 | } else { /* Propagate pending call. */ | ||
1114 | rd->cres = CALLRES_PENDING; | ||
1115 | } | ||
1116 | } /* else: Interpreter will throw. */ | ||
1117 | } | ||
1118 | |||
1119 | /* -- Math library fast functions ----------------------------------------- */ | ||
1120 | |||
1121 | static void recff_math_abs(jit_State *J, TRef *res, RecordFFData *rd) | ||
1122 | { | ||
1123 | TRef tr = lj_ir_tonum(J, arg[0]); | ||
1124 | res[0] = emitir(IRTN(IR_ABS), tr, lj_ir_knum_abs(J)); | ||
1125 | UNUSED(rd); | ||
1126 | } | ||
1127 | |||
1128 | /* Record rounding functions math.floor and math.ceil. */ | ||
1129 | static void recff_math_round(jit_State *J, TRef *res, RecordFFData *rd) | ||
1130 | { | ||
1131 | if (tref_isinteger(arg[0])) | ||
1132 | res[0] = arg[0]; | ||
1133 | else | ||
1134 | res[0] = emitir(IRTN(IR_FPMATH), lj_ir_tonum(J, arg[0]), rd->data); | ||
1135 | /* Note: result is integral (or NaN/Inf), but may not fit into an integer. */ | ||
1136 | } | ||
1137 | |||
1138 | /* Record unary math.* functions, mapped to IR_FPMATH opcode. */ | ||
1139 | static void recff_math_unary(jit_State *J, TRef *res, RecordFFData *rd) | ||
1140 | { | ||
1141 | res[0] = emitir(IRTN(IR_FPMATH), lj_ir_tonum(J, arg[0]), rd->data); | ||
1142 | } | ||
1143 | |||
1144 | /* Record binary math.* functions math.atan2 and math.ldexp. */ | ||
1145 | static void recff_math_binary(jit_State *J, TRef *res, RecordFFData *rd) | ||
1146 | { | ||
1147 | TRef tr = lj_ir_tonum(J, arg[0]); | ||
1148 | res[0] = emitir(IRTN(rd->data), tr, lj_ir_tonum(J, arg[1])); | ||
1149 | } | ||
1150 | |||
1151 | /* Record math.asin, math.acos, math.atan. */ | ||
1152 | static void recff_math_atrig(jit_State *J, TRef *res, RecordFFData *rd) | ||
1153 | { | ||
1154 | TRef y = lj_ir_tonum(J, arg[0]); | ||
1155 | TRef x = lj_ir_knum_one(J); | ||
1156 | uint32_t ffid = rd->data; | ||
1157 | if (ffid != FF_math_atan) { | ||
1158 | TRef tmp = emitir(IRTN(IR_MUL), y, y); | ||
1159 | tmp = emitir(IRTN(IR_SUB), x, tmp); | ||
1160 | tmp = emitir(IRTN(IR_FPMATH), tmp, IRFPM_SQRT); | ||
1161 | if (ffid == FF_math_asin) { x = tmp; } else { x = y; y = tmp; } | ||
1162 | } | ||
1163 | res[0] = emitir(IRTN(IR_ATAN2), y, x); | ||
1164 | } | ||
1165 | |||
1166 | static void recff_math_modf(jit_State *J, TRef *res, RecordFFData *rd) | ||
1167 | { | ||
1168 | TRef tr = arg[0]; | ||
1169 | if (tref_isinteger(arg[0])) { | ||
1170 | res[0] = tr; | ||
1171 | res[1] = lj_ir_kint(J, 0); | ||
1172 | } else { | ||
1173 | tr = lj_ir_tonum(J, tr); | ||
1174 | res[0] = emitir(IRTN(IR_FPMATH), tr, IRFPM_TRUNC); | ||
1175 | res[1] = emitir(IRTN(IR_SUB), tr, res[0]); | ||
1176 | } | ||
1177 | rd->nres = 2; | ||
1178 | } | ||
1179 | |||
1180 | static void recff_math_degrad(jit_State *J, TRef *res, RecordFFData *rd) | ||
1181 | { | ||
1182 | TRef tr = lj_ir_tonum(J, arg[0]); | ||
1183 | res[0] = emitir(IRTN(IR_MUL), tr, lj_ir_knum(J, numV(&rd->fn->c.upvalue[0]))); | ||
1184 | } | ||
1185 | |||
1186 | static void recff_math_pow(jit_State *J, TRef *res, RecordFFData *rd) | ||
1187 | { | ||
1188 | if (!tref_isnumber_str(arg[1])) | ||
1189 | lj_trace_err(J, LJ_TRERR_BADTYPE); | ||
1190 | res[0] = lj_opt_narrow_pow(J, lj_ir_tonum(J, arg[0]), arg[1], &rd->argv[1]); | ||
1191 | UNUSED(rd); | ||
1192 | } | ||
1193 | |||
1194 | static void recff_math_minmax(jit_State *J, TRef *res, RecordFFData *rd) | ||
1195 | { | ||
1196 | TRef tr = lj_ir_tonum(J, arg[0]); | ||
1197 | uint32_t op = rd->data; | ||
1198 | BCReg i; | ||
1199 | for (i = 1; arg[i]; i++) | ||
1200 | tr = emitir(IRTN(op), tr, lj_ir_tonum(J, arg[i])); | ||
1201 | res[0] = tr; | ||
1202 | } | ||
1203 | |||
1204 | /* -- Bit library fast functions ------------------------------------------ */ | ||
1205 | |||
1206 | /* Record unary bit.tobit, bit.bnot, bit.bswap. */ | ||
1207 | static void recff_bit_unary(jit_State *J, TRef *res, RecordFFData *rd) | ||
1208 | { | ||
1209 | TRef tr = lj_ir_tobit(J, arg[0]); | ||
1210 | res[0] = (rd->data == IR_TOBIT) ? tr : emitir(IRTI(rd->data), tr, 0); | ||
1211 | } | ||
1212 | |||
1213 | /* Record N-ary bit.band, bit.bor, bit.bxor. */ | ||
1214 | static void recff_bit_nary(jit_State *J, TRef *res, RecordFFData *rd) | ||
1215 | { | ||
1216 | TRef tr = lj_ir_tobit(J, arg[0]); | ||
1217 | uint32_t op = rd->data; | ||
1218 | BCReg i; | ||
1219 | for (i = 1; arg[i]; i++) | ||
1220 | tr = emitir(IRTI(op), tr, lj_ir_tobit(J, arg[i])); | ||
1221 | res[0] = tr; | ||
1222 | } | ||
1223 | |||
1224 | /* Record bit shifts. */ | ||
1225 | static void recff_bit_shift(jit_State *J, TRef *res, RecordFFData *rd) | ||
1226 | { | ||
1227 | TRef tr = lj_ir_tobit(J, arg[0]); | ||
1228 | TRef tsh = lj_ir_tobit(J, arg[1]); | ||
1229 | #if !LJ_TARGET_MASKEDSHIFT | ||
1230 | if (!tref_isk(tsh)) | ||
1231 | tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31)); | ||
1232 | #endif | ||
1233 | res[0] = emitir(IRTI(rd->data), tr, tsh); | ||
1234 | } | ||
1235 | |||
1236 | /* -- String library fast functions --------------------------------------- */ | ||
1237 | |||
1238 | static void recff_string_len(jit_State *J, TRef *res, RecordFFData *rd) | ||
1239 | { | ||
1240 | res[0] = emitir(IRTI(IR_FLOAD), lj_ir_tostr(J, arg[0]), IRFL_STR_LEN); | ||
1241 | UNUSED(rd); | ||
1242 | } | ||
1243 | |||
1244 | /* Handle string.byte (rd->data = 0) and string.sub (rd->data = 1). */ | ||
1245 | static void recff_string_range(jit_State *J, TRef *res, RecordFFData *rd) | ||
1246 | { | ||
1247 | TRef trstr = lj_ir_tostr(J, arg[0]); | ||
1248 | TRef trlen = emitir(IRTI(IR_FLOAD), trstr, IRFL_STR_LEN); | ||
1249 | TRef tr0 = lj_ir_kint(J, 0); | ||
1250 | TRef trstart, trend; | ||
1251 | GCstr *str = argv2str(J, &rd->argv[0]); | ||
1252 | int32_t start, end; | ||
1253 | if (rd->data) { /* string.sub(str, start [,end]) */ | ||
1254 | trstart = lj_ir_toint(J, arg[1]); | ||
1255 | trend = tref_isnil(arg[2]) ? lj_ir_kint(J, -1) : lj_ir_toint(J, arg[2]); | ||
1256 | start = argv2int(J, &rd->argv[1]); | ||
1257 | end = tref_isnil(arg[2]) ? -1 : argv2int(J, &rd->argv[2]); | ||
1258 | } else { /* string.byte(str, [,start [,end]]) */ | ||
1259 | if (arg[1]) { | ||
1260 | trstart = lj_ir_toint(J, arg[1]); | ||
1261 | trend = tref_isnil(arg[2]) ? trstart : lj_ir_toint(J, arg[2]); | ||
1262 | start = argv2int(J, &rd->argv[1]); | ||
1263 | end = tref_isnil(arg[2]) ? start : argv2int(J, &rd->argv[2]); | ||
1264 | } else { | ||
1265 | trend = trstart = lj_ir_kint(J, 1); | ||
1266 | end = start = 1; | ||
1267 | } | ||
1268 | } | ||
1269 | if (end < 0) { | ||
1270 | emitir(IRTGI(IR_LT), trend, tr0); | ||
1271 | trend = emitir(IRTI(IR_ADD), emitir(IRTI(IR_ADD), trlen, trend), | ||
1272 | lj_ir_kint(J, 1)); | ||
1273 | end = end+(int32_t)str->len+1; | ||
1274 | } else if ((MSize)end <= str->len) { | ||
1275 | emitir(IRTGI(IR_ULE), trend, trlen); | ||
1276 | } else { | ||
1277 | emitir(IRTGI(IR_GT), trend, trlen); | ||
1278 | end = (int32_t)str->len; | ||
1279 | trend = trlen; | ||
1280 | } | ||
1281 | if (start < 0) { | ||
1282 | emitir(IRTGI(IR_LT), trstart, tr0); | ||
1283 | trstart = emitir(IRTI(IR_ADD), trlen, trstart); | ||
1284 | start = start+(int32_t)str->len; | ||
1285 | emitir(start < 0 ? IRTGI(IR_LT) : IRTGI(IR_GE), trstart, tr0); | ||
1286 | if (start < 0) { | ||
1287 | trstart = tr0; | ||
1288 | start = 0; | ||
1289 | } | ||
1290 | } else { | ||
1291 | if (start == 0) { | ||
1292 | emitir(IRTGI(IR_EQ), trstart, tr0); | ||
1293 | trstart = tr0; | ||
1294 | } else { | ||
1295 | trstart = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, -1)); | ||
1296 | emitir(IRTGI(IR_GE), trstart, tr0); | ||
1297 | start--; | ||
1298 | } | ||
1299 | } | ||
1300 | if (rd->data) { /* Return string.sub result. */ | ||
1301 | if (end - start >= 0) { | ||
1302 | /* Also handle empty range here, to avoid extra traces. */ | ||
1303 | TRef trptr, trslen = emitir(IRTI(IR_SUB), trend, trstart); | ||
1304 | emitir(IRTGI(IR_GE), trslen, tr0); | ||
1305 | trptr = emitir(IRT(IR_STRREF, IRT_PTR), trstr, trstart); | ||
1306 | res[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen); | ||
1307 | } else { /* Range underflow: return empty string. */ | ||
1308 | emitir(IRTGI(IR_LT), trend, trstart); | ||
1309 | res[0] = lj_ir_kstr(J, lj_str_new(J->L, strdata(str), 0)); | ||
1310 | } | ||
1311 | } else { /* Return string.byte result(s). */ | ||
1312 | int32_t i, len = end - start; | ||
1313 | if (len > 0) { | ||
1314 | TRef trslen = emitir(IRTI(IR_SUB), trend, trstart); | ||
1315 | emitir(IRTGI(IR_EQ), trslen, lj_ir_kint(J, len)); | ||
1316 | if (res + len > J->slot + LJ_MAX_JSLOTS) | ||
1317 | lj_trace_err(J, LJ_TRERR_STACKOV); | ||
1318 | rd->nres = len; | ||
1319 | for (i = 0; i < len; i++) { | ||
1320 | TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, i)); | ||
1321 | tmp = emitir(IRT(IR_STRREF, IRT_PTR), trstr, tmp); | ||
1322 | res[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, 0); | ||
1323 | } | ||
1324 | } else { /* Empty range or range underflow: return no results. */ | ||
1325 | emitir(IRTGI(IR_LE), trend, trstart); | ||
1326 | rd->nres = 0; | ||
1327 | } | ||
1328 | } | ||
1329 | } | ||
1330 | |||
1331 | /* -- Table library fast functions ---------------------------------------- */ | ||
1332 | |||
1333 | static void recff_table_getn(jit_State *J, TRef *res, RecordFFData *rd) | ||
1334 | { | ||
1335 | if (tref_istab(arg[0])) { | ||
1336 | res[0] = emitir(IRTI(IR_TLEN), arg[0], 0); | ||
1337 | } /* else: Interpreter will throw. */ | ||
1338 | UNUSED(rd); | ||
1339 | } | ||
1340 | |||
1341 | /* -- Record calls and returns -------------------------------------------- */ | ||
1342 | |||
1343 | #undef arg | ||
1344 | |||
1345 | #include "lj_recdef.h" | ||
1346 | |||
1347 | /* Record return. */ | ||
1348 | static void rec_ret(jit_State *J, BCReg rbase, int gotresults) | ||
1349 | { | ||
1350 | TValue *frame = J->L->base - 1; | ||
1351 | TRef *res = J->base + rbase; | ||
1352 | J->tailcalled = 0; | ||
1353 | while (frame_ispcall(frame)) { | ||
1354 | BCReg cbase = (BCReg)frame_delta(frame); | ||
1355 | lua_assert(J->baseslot > 1); | ||
1356 | J->baseslot -= (BCReg)cbase; | ||
1357 | J->base -= cbase; | ||
1358 | *--res = TREF_TRUE; /* Prepend true to results. */ | ||
1359 | gotresults++; | ||
1360 | J->framedepth--; | ||
1361 | frame = frame_prevd(frame); | ||
1362 | } | ||
1363 | if (J->framedepth-- <= 0) | ||
1364 | lj_trace_err(J, LJ_TRERR_NYIRETL); | ||
1365 | lua_assert(J->baseslot > 1); | ||
1366 | if (frame_islua(frame)) { | ||
1367 | BCIns callins = *(J->pc = frame_pc(frame)-1); | ||
1368 | ptrdiff_t nresults = bc_b(callins) ? (int)bc_b(callins)-1 : gotresults; | ||
1369 | BCReg cbase = bc_a(callins); | ||
1370 | int i; | ||
1371 | for (i = 0; i < nresults; i++) | ||
1372 | J->base[i-1] = i < gotresults ? res[i] : TREF_NIL; | ||
1373 | J->maxslot = cbase+(BCReg)nresults; | ||
1374 | J->baseslot -= cbase+1; | ||
1375 | J->base -= cbase+1; | ||
1376 | } else if (frame_iscont(frame)) { | ||
1377 | ASMFunction cont = frame_contf(frame); | ||
1378 | BCReg i, cbase = (BCReg)frame_delta(frame); | ||
1379 | J->pc = frame_contpc(frame)-1; | ||
1380 | J->baseslot -= (BCReg)cbase; | ||
1381 | J->base -= cbase; | ||
1382 | /* Shrink maxslot as much as possible after return from continuation. */ | ||
1383 | for (i = cbase-2; i > 0 && J->base[i] == 0; i--) ; | ||
1384 | J->maxslot = i; | ||
1385 | if (cont == lj_cont_ra) { | ||
1386 | /* Copy result to destination slot. */ | ||
1387 | BCReg dst = bc_a(*J->pc); | ||
1388 | J->base[dst] = res[0]; | ||
1389 | if (dst > J->maxslot) J->maxslot = dst+1; | ||
1390 | } else if (cont == lj_cont_nop) { | ||
1391 | /* Nothing to do here. */ | ||
1392 | } else if (cont == lj_cont_cat) { | ||
1393 | lua_assert(0); | ||
1394 | } else { | ||
1395 | /* Result type already specialized. */ | ||
1396 | lua_assert(cont == lj_cont_condf || cont == lj_cont_condt); | ||
1397 | } | ||
1398 | } else { | ||
1399 | lua_assert(0); | ||
1400 | } | ||
1401 | lua_assert(J->baseslot >= 1); | ||
1402 | } | ||
1403 | |||
1404 | /* Check unroll limits for calls. */ | ||
1405 | static void check_call_unroll(jit_State *J, GCfunc *fn) | ||
1406 | { | ||
1407 | TValue *first = J->L->base - J->baseslot; | ||
1408 | TValue *frame = J->L->base - 1; | ||
1409 | int count = 0; | ||
1410 | while (frame > first) { | ||
1411 | if (frame_func(frame) == fn) | ||
1412 | count++; | ||
1413 | if (frame_isvarg(frame)) | ||
1414 | frame = frame_prevd(frame); | ||
1415 | frame = frame_prev(frame); | ||
1416 | } | ||
1417 | if (frame_func(first) == fn && bc_op(J->cur.startins) == BC_CALL) { | ||
1418 | if (count >= J->param[JIT_P_recunroll]) | ||
1419 | lj_trace_err(J, LJ_TRERR_NYIRECU); | ||
1420 | } else { | ||
1421 | if (count >= J->param[JIT_P_callunroll]) | ||
1422 | lj_trace_err(J, LJ_TRERR_CUNROLL); | ||
1423 | } | ||
1424 | } | ||
1425 | |||
1426 | /* Record call. Returns 0 for pending calls and 1 for resolved calls. */ | ||
1427 | static int rec_call(jit_State *J, BCReg func, int cres, int nargs) | ||
1428 | { | ||
1429 | RecordFFData rd; | ||
1430 | TRef *res = &J->base[func]; | ||
1431 | TValue *tv = &J->L->base[func]; | ||
1432 | |||
1433 | if (tref_isfunc(res[0])) { /* Regular function call. */ | ||
1434 | rd.fn = funcV(tv); | ||
1435 | rd.argv = tv+1; | ||
1436 | } else { /* Otherwise resolve __call metamethod for called object. */ | ||
1437 | RecordIndex ix; | ||
1438 | int i; | ||
1439 | ix.tab = res[0]; | ||
1440 | copyTV(J->L, &ix.tabv, tv); | ||
1441 | if (!rec_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj)) | ||
1442 | lj_trace_err(J, LJ_TRERR_NOMM); | ||
1443 | /* Update the recorder state, but not the Lua stack. */ | ||
1444 | for (i = ++nargs; i > 0; i--) | ||
1445 | res[i] = res[i-1]; | ||
1446 | res[0] = ix.mobj; | ||
1447 | rd.fn = funcV(&ix.mobjv); | ||
1448 | rd.argv = tv; /* The called object is the 1st arg. */ | ||
1449 | } | ||
1450 | |||
1451 | /* Specialize to the runtime value of the called function. */ | ||
1452 | res[0] = emitir(IRTG(IR_FRAME, IRT_FUNC), res[0], lj_ir_kfunc(J, rd.fn)); | ||
1453 | |||
1454 | if (isluafunc(rd.fn)) { /* Record call to Lua function. */ | ||
1455 | GCproto *pt = funcproto(rd.fn); | ||
1456 | if ((pt->flags & PROTO_NO_JIT)) | ||
1457 | lj_trace_err(J, LJ_TRERR_CJITOFF); | ||
1458 | if ((pt->flags & PROTO_IS_VARARG)) { | ||
1459 | if (rd.fn->l.gate != lj_gate_lv) | ||
1460 | lj_trace_err(J, LJ_TRERR_NYILNKF); | ||
1461 | lj_trace_err(J, LJ_TRERR_NYIVF); | ||
1462 | } else { | ||
1463 | if (rd.fn->l.gate != lj_gate_lf) | ||
1464 | lj_trace_err(J, LJ_TRERR_NYILNKF); | ||
1465 | } | ||
1466 | check_call_unroll(J, rd.fn); | ||
1467 | if (cres == CALLRES_TAILCALL) { | ||
1468 | int i; | ||
1469 | /* Tailcalls can form a loop, so count towards the loop unroll limit. */ | ||
1470 | if (++J->tailcalled > J->loopunroll) | ||
1471 | lj_trace_err(J, LJ_TRERR_LUNROLL); | ||
1472 | for (i = 0; i <= nargs; i++) /* Move func + args down. */ | ||
1473 | J->base[i-1] = res[i]; | ||
1474 | /* Note: the new FRAME is now at J->base[-1] (even for slot #0). */ | ||
1475 | } else { /* Regular call. */ | ||
1476 | J->base += func+1; | ||
1477 | J->baseslot += func+1; | ||
1478 | J->framedepth++; | ||
1479 | } | ||
1480 | if (J->baseslot + pt->framesize >= LJ_MAX_JSLOTS) | ||
1481 | lj_trace_err(J, LJ_TRERR_STACKOV); | ||
1482 | /* Fill up missing args with nil. */ | ||
1483 | while (nargs < pt->numparams) | ||
1484 | J->base[nargs++] = TREF_NIL; | ||
1485 | /* The remaining slots should never be read before they are written. */ | ||
1486 | J->maxslot = pt->numparams; | ||
1487 | return 0; /* No result yet. */ | ||
1488 | } else { /* Record call to C function or fast function. */ | ||
1489 | uint32_t m = 0; | ||
1490 | res[1+nargs] = 0; | ||
1491 | rd.nargs = nargs; | ||
1492 | if (rd.fn->c.ffid < sizeof(recff_idmap)/sizeof(recff_idmap[0])) | ||
1493 | m = recff_idmap[rd.fn->c.ffid]; | ||
1494 | rd.data = m & 0xff; | ||
1495 | rd.cres = cres; | ||
1496 | rd.nres = 1; /* Default is one result. */ | ||
1497 | (recff_func[m >> 8])(J, res, &rd); /* Call recff_* handler. */ | ||
1498 | cres = rd.cres; | ||
1499 | if (cres >= 0) { | ||
1500 | /* Caller takes fixed number of results: local a,b = f() */ | ||
1501 | J->maxslot = func + (BCReg)cres; | ||
1502 | while (rd.nres < cres) /* Fill up missing results with nil. */ | ||
1503 | res[rd.nres++] = TREF_NIL; | ||
1504 | } else if (cres == CALLRES_MULTI) { | ||
1505 | /* Caller takes any number of results: return 1,f() */ | ||
1506 | J->maxslot = func + (BCReg)rd.nres; | ||
1507 | } else if (cres == CALLRES_TAILCALL) { | ||
1508 | /* Tail call: return f() */ | ||
1509 | rec_ret(J, func, rd.nres); | ||
1510 | } else if (cres == CALLRES_CONT) { | ||
1511 | /* Note: immediately resolved continuations must not change J->maxslot. */ | ||
1512 | res[rd.nres] = TREF_NIL; /* Turn 0 results into nil result. */ | ||
1513 | } else { | ||
1514 | J->framedepth++; | ||
1515 | lua_assert(cres == CALLRES_PENDING); | ||
1516 | return 0; /* Pending call, no result yet. */ | ||
1517 | } | ||
1518 | return 1; /* Result resolved immediately. */ | ||
1519 | } | ||
1520 | } | ||
1521 | |||
1522 | /* -- Record allocations -------------------------------------------------- */ | ||
1523 | |||
1524 | static TRef rec_tnew(jit_State *J, uint32_t ah) | ||
1525 | { | ||
1526 | uint32_t asize = ah & 0x7ff; | ||
1527 | uint32_t hbits = ah >> 11; | ||
1528 | if (asize == 0x7ff) asize = 0x801; | ||
1529 | return emitir(IRT(IR_TNEW, IRT_TAB), asize, hbits); | ||
1530 | } | ||
1531 | |||
1532 | /* -- Record bytecode ops ------------------------------------------------- */ | ||
1533 | |||
1534 | /* Optimize state after comparison. */ | ||
1535 | static void optstate_comp(jit_State *J, int cond) | ||
1536 | { | ||
1537 | BCIns jmpins = J->pc[1]; | ||
1538 | const BCIns *npc = J->pc + 2 + (cond ? bc_j(jmpins) : 0); | ||
1539 | SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; | ||
1540 | /* Avoid re-recording the comparison in side traces. */ | ||
1541 | J->cur.snapmap[snap->mapofs + snap->nslots] = u32ptr(npc); | ||
1542 | J->needsnap = 1; | ||
1543 | /* Shrink last snapshot if possible. */ | ||
1544 | if (bc_a(jmpins) < J->maxslot) { | ||
1545 | J->maxslot = bc_a(jmpins); | ||
1546 | lj_snap_shrink(J); | ||
1547 | } | ||
1548 | } | ||
1549 | |||
1550 | /* Record the next bytecode instruction (_before_ it's executed). */ | ||
1551 | void lj_record_ins(jit_State *J) | ||
1552 | { | ||
1553 | cTValue *lbase; | ||
1554 | RecordIndex ix; | ||
1555 | const BCIns *pc; | ||
1556 | BCIns ins; | ||
1557 | BCOp op; | ||
1558 | TRef ra, rb, rc; | ||
1559 | |||
1560 | /* Need snapshot before recording next bytecode (e.g. after a store). */ | ||
1561 | if (J->needsnap) { | ||
1562 | J->needsnap = 0; | ||
1563 | lj_snap_add(J); | ||
1564 | J->mergesnap = 1; | ||
1565 | } | ||
1566 | |||
1567 | /* Record only closed loops for root traces. */ | ||
1568 | pc = J->pc; | ||
1569 | if (J->framedepth == 0 && | ||
1570 | (MSize)((char *)pc - (char *)J->bc_min) >= J->bc_extent) | ||
1571 | lj_trace_err(J, LJ_TRERR_LLEAVE); | ||
1572 | |||
1573 | #ifdef LUA_USE_ASSERT | ||
1574 | rec_check_slots(J); | ||
1575 | rec_check_ir(J); | ||
1576 | #endif | ||
1577 | |||
1578 | /* Keep a copy of the runtime values of var/num/str operands. */ | ||
1579 | #define rav (&ix.valv) | ||
1580 | #define rbv (&ix.tabv) | ||
1581 | #define rcv (&ix.keyv) | ||
1582 | |||
1583 | lbase = J->L->base; | ||
1584 | ins = *pc; | ||
1585 | op = bc_op(ins); | ||
1586 | ra = bc_a(ins); | ||
1587 | ix.val = 0; | ||
1588 | switch (bcmode_a(op)) { | ||
1589 | case BCMvar: | ||
1590 | copyTV(J->L, rav, &lbase[ra]); ix.val = ra = getslot(J, ra); break; | ||
1591 | default: break; /* Handled later. */ | ||
1592 | } | ||
1593 | rb = bc_b(ins); | ||
1594 | rc = bc_c(ins); | ||
1595 | switch (bcmode_b(op)) { | ||
1596 | case BCMnone: rb = 0; rc = bc_d(ins); break; /* Upgrade rc to 'rd'. */ | ||
1597 | case BCMvar: | ||
1598 | copyTV(J->L, rbv, &lbase[rb]); ix.tab = rb = getslot(J, rb); break; | ||
1599 | case BCMnum: { lua_Number n = J->pt->k.n[rb]; | ||
1600 | setnumV(rbv, n); ix.tab = rb = lj_ir_knumint(J, n); } break; | ||
1601 | default: break; /* Handled later. */ | ||
1602 | } | ||
1603 | switch (bcmode_c(op)) { | ||
1604 | case BCMvar: | ||
1605 | copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break; | ||
1606 | case BCMpri: setitype(rcv, (int32_t)~rc); rc = TREF_PRI(IRT_NIL+rc); break; | ||
1607 | case BCMnum: { lua_Number n = J->pt->k.n[rc]; | ||
1608 | setnumV(rcv, n); ix.key = rc = lj_ir_knumint(J, n); } break; | ||
1609 | case BCMstr: { GCstr *s = strref(J->pt->k.gc[~rc]); | ||
1610 | setstrV(J->L, rcv, s); ix.key = rc = lj_ir_kstr(J, s); } break; | ||
1611 | default: break; /* Handled later. */ | ||
1612 | } | ||
1613 | |||
1614 | switch (op) { | ||
1615 | |||
1616 | /* -- Comparison ops ---------------------------------------------------- */ | ||
1617 | |||
1618 | case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: | ||
1619 | /* Emit nothing for two numeric or string consts. */ | ||
1620 | if (!(tref_isk2(ra,rc) && tref_isnumber_str(ra) && tref_isnumber_str(rc))) { | ||
1621 | IRType ta = tref_type(ra); | ||
1622 | IRType tc = tref_type(rc); | ||
1623 | int irop; | ||
1624 | if (ta != tc) { | ||
1625 | /* Widen mixed number/int comparisons to number/number comparison. */ | ||
1626 | if (ta == IRT_INT && tc == IRT_NUM) { | ||
1627 | ra = emitir(IRTN(IR_TONUM), ra, 0); | ||
1628 | ta = IRT_NUM; | ||
1629 | } else if (ta == IRT_NUM && tc == IRT_INT) { | ||
1630 | rc = emitir(IRTN(IR_TONUM), rc, 0); | ||
1631 | } else if (!((ta == IRT_FALSE || ta == IRT_TRUE) && | ||
1632 | (tc == IRT_FALSE || tc == IRT_TRUE))) { | ||
1633 | break; /* Interpreter will throw for two different types. */ | ||
1634 | } | ||
1635 | } | ||
1636 | lj_snap_add(J); | ||
1637 | irop = (int)op - (int)BC_ISLT + (int)IR_LT; | ||
1638 | if (ta == IRT_NUM) { | ||
1639 | if ((irop & 1)) irop ^= 4; /* ISGE/ISGT are unordered. */ | ||
1640 | if (!lj_ir_numcmp(numV(rav), numV(rcv), (IROp)irop)) irop ^= 5; | ||
1641 | } else if (ta == IRT_INT) { | ||
1642 | if (!lj_ir_numcmp(numV(rav), numV(rcv), (IROp)irop)) irop ^= 1; | ||
1643 | } else if (ta == IRT_STR) { | ||
1644 | if (!lj_ir_strcmp(strV(rav), strV(rcv), (IROp)irop)) irop ^= 1; | ||
1645 | } else { | ||
1646 | rec_mm_comp(J, &ix, (int)op); | ||
1647 | break; | ||
1648 | } | ||
1649 | emitir(IRTG(irop, ta), ra, rc); | ||
1650 | optstate_comp(J, ((int)op ^ irop) & 1); | ||
1651 | } | ||
1652 | break; | ||
1653 | |||
1654 | case BC_ISEQV: case BC_ISNEV: | ||
1655 | case BC_ISEQS: case BC_ISNES: | ||
1656 | case BC_ISEQN: case BC_ISNEN: | ||
1657 | case BC_ISEQP: case BC_ISNEP: | ||
1658 | /* Emit nothing for two non-table, non-udata consts. */ | ||
1659 | if (!(tref_isk2(ra, rc) && !(tref_istab(ra) || tref_isudata(ra)))) { | ||
1660 | int diff; | ||
1661 | lj_snap_add(J); | ||
1662 | diff = rec_objcmp(J, ra, rc, rav, rcv); | ||
1663 | if (diff == 1 && (tref_istab(ra) || tref_isudata(ra))) { | ||
1664 | /* Only check __eq if different, but the same type (table or udata). */ | ||
1665 | rec_mm_equal(J, &ix, (int)op); | ||
1666 | break; | ||
1667 | } | ||
1668 | optstate_comp(J, ((int)op & 1) == !diff); | ||
1669 | } | ||
1670 | break; | ||
1671 | |||
1672 | /* -- Unary test and copy ops ------------------------------------------- */ | ||
1673 | |||
1674 | case BC_ISTC: case BC_ISFC: | ||
1675 | if ((op & 1) == tref_istruecond(rc)) | ||
1676 | rc = 0; /* Don't store if condition is not true. */ | ||
1677 | /* fallthrough */ | ||
1678 | case BC_IST: case BC_ISF: /* Type specialization suffices. */ | ||
1679 | if (bc_a(pc[1]) < J->maxslot) | ||
1680 | J->maxslot = bc_a(pc[1]); /* Shrink used slots. */ | ||
1681 | break; | ||
1682 | |||
1683 | /* -- Unary ops --------------------------------------------------------- */ | ||
1684 | |||
1685 | case BC_NOT: | ||
1686 | /* Type specialization already forces const result. */ | ||
1687 | rc = tref_istruecond(rc) ? TREF_FALSE : TREF_TRUE; | ||
1688 | break; | ||
1689 | |||
1690 | case BC_LEN: | ||
1691 | if (tref_isstr(rc)) { | ||
1692 | rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN); | ||
1693 | } else if (tref_istab(rc)) { | ||
1694 | rc = emitir(IRTI(IR_TLEN), rc, 0); | ||
1695 | } else { | ||
1696 | ix.tab = rc; | ||
1697 | copyTV(J->L, &ix.tabv, &ix.keyv); | ||
1698 | ix.key = IRT_NIL; | ||
1699 | setnilV(&ix.keyv); | ||
1700 | rc = rec_mm_arith(J, &ix, MM_len); | ||
1701 | } | ||
1702 | break; | ||
1703 | |||
1704 | /* -- Arithmetic ops ---------------------------------------------------- */ | ||
1705 | |||
1706 | case BC_UNM: | ||
1707 | if (tref_isnumber_str(rc)) { | ||
1708 | rc = lj_ir_tonum(J, rc); | ||
1709 | rc = emitir(IRTN(IR_NEG), rc, lj_ir_knum_neg(J)); | ||
1710 | } else { | ||
1711 | ix.tab = rc; | ||
1712 | copyTV(J->L, &ix.tabv, &ix.keyv); | ||
1713 | rc = rec_mm_arith(J, &ix, MM_unm); | ||
1714 | } | ||
1715 | break; | ||
1716 | |||
1717 | case BC_ADDNV: case BC_SUBNV: case BC_MULNV: case BC_DIVNV: case BC_MODNV: | ||
1718 | ix.tab = rc; ix.key = rc = rb; rb = ix.tab; | ||
1719 | copyTV(J->L, &ix.valv, &ix.tabv); | ||
1720 | copyTV(J->L, &ix.tabv, &ix.keyv); | ||
1721 | copyTV(J->L, &ix.keyv, &ix.valv); | ||
1722 | if (op == BC_MODNV) | ||
1723 | goto recmod; | ||
1724 | /* fallthrough */ | ||
1725 | case BC_ADDVN: case BC_SUBVN: case BC_MULVN: case BC_DIVVN: | ||
1726 | case BC_ADDVV: case BC_SUBVV: case BC_MULVV: case BC_DIVVV: { | ||
1727 | MMS mm = bcmode_mm(op); | ||
1728 | if (tref_isnumber_str(rb) && tref_isnumber_str(rc)) { | ||
1729 | rb = lj_ir_tonum(J, rb); | ||
1730 | rc = lj_ir_tonum(J, rc); | ||
1731 | rc = emitir(IRTN((int)mm - (int)MM_add + (int)IR_ADD), rb, rc); | ||
1732 | } else { | ||
1733 | rc = rec_mm_arith(J, &ix, mm); | ||
1734 | } | ||
1735 | break; | ||
1736 | } | ||
1737 | |||
1738 | case BC_MODVN: case BC_MODVV: | ||
1739 | recmod: | ||
1740 | if (tref_isnumber_str(rb) && tref_isnumber_str(rc)) | ||
1741 | rc = lj_opt_narrow_mod(J, rb, rc); | ||
1742 | else | ||
1743 | rc = rec_mm_arith(J, &ix, MM_mod); | ||
1744 | break; | ||
1745 | |||
1746 | case BC_POW: | ||
1747 | if (tref_isnumber_str(rb) && tref_isnumber_str(rc)) | ||
1748 | rc = lj_opt_narrow_pow(J, lj_ir_tonum(J, rb), rc, rcv); | ||
1749 | else | ||
1750 | rc = rec_mm_arith(J, &ix, MM_pow); | ||
1751 | break; | ||
1752 | |||
1753 | /* -- Constant and move ops --------------------------------------------- */ | ||
1754 | |||
1755 | case BC_KSTR: case BC_KNUM: case BC_KPRI: case BC_MOV: | ||
1756 | break; | ||
1757 | case BC_KSHORT: | ||
1758 | rc = lj_ir_kint(J, (int32_t)(int16_t)rc); | ||
1759 | break; | ||
1760 | case BC_KNIL: | ||
1761 | while (ra <= rc) | ||
1762 | J->base[ra++] = TREF_NIL; | ||
1763 | if (rc >= J->maxslot) J->maxslot = rc+1; | ||
1764 | break; | ||
1765 | |||
1766 | /* -- Upvalue and function ops ------------------------------------------ */ | ||
1767 | |||
1768 | case BC_UGET: | ||
1769 | rc = rec_upvalue(J, rc, 0); | ||
1770 | break; | ||
1771 | case BC_USETV: case BC_USETS: case BC_USETN: case BC_USETP: | ||
1772 | rec_upvalue(J, ra, rc); | ||
1773 | break; | ||
1774 | |||
1775 | /* -- Table ops --------------------------------------------------------- */ | ||
1776 | |||
1777 | case BC_GGET: case BC_GSET: | ||
1778 | settabV(J->L, &ix.tabv, tabref(J->fn->l.env)); | ||
1779 | ix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), getcurrf(J), IRFL_FUNC_ENV); | ||
1780 | ix.idxchain = LJ_MAX_IDXCHAIN; | ||
1781 | rc = rec_idx(J, &ix); | ||
1782 | break; | ||
1783 | |||
1784 | case BC_TGETB: case BC_TSETB: | ||
1785 | setintV(&ix.keyv, (int32_t)rc); | ||
1786 | ix.key = lj_ir_kint(J, (int32_t)rc); | ||
1787 | /* fallthrough */ | ||
1788 | case BC_TGETV: case BC_TGETS: case BC_TSETV: case BC_TSETS: | ||
1789 | ix.idxchain = LJ_MAX_IDXCHAIN; | ||
1790 | rc = rec_idx(J, &ix); | ||
1791 | break; | ||
1792 | |||
1793 | case BC_TNEW: | ||
1794 | rc = rec_tnew(J, rc); | ||
1795 | break; | ||
1796 | case BC_TDUP: | ||
1797 | rc = emitir(IRT(IR_TDUP, IRT_TAB), | ||
1798 | lj_ir_ktab(J, tabref(J->pt->k.gc[~rc])), 0); | ||
1799 | break; | ||
1800 | |||
1801 | /* -- Calls and vararg handling ----------------------------------------- */ | ||
1802 | |||
1803 | case BC_ITERC: | ||
1804 | J->base[ra] = getslot(J, ra-3); | ||
1805 | J->base[ra+1] = getslot(J, ra-2); | ||
1806 | J->base[ra+2] = getslot(J, ra-1); | ||
1807 | { /* Have to do the actual copy now because rec_call needs the values. */ | ||
1808 | TValue *b = &J->L->base[ra]; | ||
1809 | copyTV(J->L, b, b-3); | ||
1810 | copyTV(J->L, b+1, b-2); | ||
1811 | copyTV(J->L, b+2, b-1); | ||
1812 | } | ||
1813 | goto callop; | ||
1814 | |||
1815 | case BC_CALLMT: | ||
1816 | rb = (TRef)(CALLRES_TAILCALL+1); | ||
1817 | /* fallthrough */ | ||
1818 | case BC_CALLM: | ||
1819 | /* L->top is set to L->base+ra+rc+NRESULTS-1+1, see lj_dispatch_ins(). */ | ||
1820 | rc = (BCReg)(J->L->top - J->L->base) - ra; | ||
1821 | goto callop; | ||
1822 | |||
1823 | case BC_CALLT: | ||
1824 | rb = (TRef)(CALLRES_TAILCALL+1); | ||
1825 | /* fallthrough */ | ||
1826 | case BC_CALL: | ||
1827 | callop: | ||
1828 | if (rb == (TRef)(CALLRES_TAILCALL+1)) { /* Tail call. */ | ||
1829 | } | ||
1830 | rec_call(J, ra, (int)(rb-1), (int)(rc-1)); | ||
1831 | break; | ||
1832 | |||
1833 | /* -- Returns ----------------------------------------------------------- */ | ||
1834 | |||
1835 | case BC_RETM: | ||
1836 | /* L->top is set to L->base+ra+rc+NRESULTS-1, see lj_dispatch_ins(). */ | ||
1837 | rc = (BCReg)(J->L->top - J->L->base) - ra + 1; | ||
1838 | /* fallthrough */ | ||
1839 | case BC_RET: case BC_RET0: case BC_RET1: | ||
1840 | rec_ret(J, ra, (int)(rc-1)); | ||
1841 | break; | ||
1842 | |||
1843 | /* -- Loops and branches ------------------------------------------------ */ | ||
1844 | |||
1845 | case BC_FORI: | ||
1846 | if (rec_for(J, pc, 0) != LOOPEV_LEAVE) | ||
1847 | J->loopref = J->cur.nins; | ||
1848 | break; | ||
1849 | case BC_JFORI: | ||
1850 | lua_assert(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL); | ||
1851 | if (rec_for(J, pc, 0) != LOOPEV_LEAVE) /* Link to existing loop. */ | ||
1852 | rec_stop(J, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J])); | ||
1853 | /* Continue tracing if the loop is not entered. */ | ||
1854 | break; | ||
1855 | |||
1856 | case BC_FORL: | ||
1857 | rec_loop_interp(J, pc, rec_for(J, pc+((ptrdiff_t)rc-BCBIAS_J), 1)); | ||
1858 | break; | ||
1859 | case BC_ITERL: | ||
1860 | rec_loop_interp(J, pc, rec_iterl(J, *pc)); | ||
1861 | break; | ||
1862 | case BC_LOOP: | ||
1863 | rec_loop_interp(J, pc, rec_loop(J, ra)); | ||
1864 | break; | ||
1865 | |||
1866 | case BC_JFORL: | ||
1867 | rec_loop_jit(J, rc, rec_for(J, pc+bc_j(J->trace[rc]->startins), 1)); | ||
1868 | break; | ||
1869 | case BC_JITERL: | ||
1870 | rec_loop_jit(J, rc, rec_iterl(J, J->trace[rc]->startins)); | ||
1871 | break; | ||
1872 | case BC_JLOOP: | ||
1873 | rec_loop_jit(J, rc, rec_loop(J, ra)); | ||
1874 | break; | ||
1875 | |||
1876 | case BC_IFORL: | ||
1877 | case BC_IITERL: | ||
1878 | case BC_ILOOP: | ||
1879 | lj_trace_err_info(J, LJ_TRERR_LBLACKL); | ||
1880 | break; | ||
1881 | |||
1882 | case BC_JMP: | ||
1883 | if (ra < J->maxslot) | ||
1884 | J->maxslot = ra; /* Shrink used slots. */ | ||
1885 | break; | ||
1886 | |||
1887 | case BC_CAT: | ||
1888 | case BC_UCLO: | ||
1889 | case BC_FNEW: | ||
1890 | case BC_TSETM: | ||
1891 | case BC_VARG: | ||
1892 | default: | ||
1893 | setintV(&J->errinfo, (int32_t)op); | ||
1894 | lj_trace_err_info(J, LJ_TRERR_NYIBC); | ||
1895 | break; | ||
1896 | } | ||
1897 | |||
1898 | /* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */ | ||
1899 | if (bcmode_a(op) == BCMdst && rc) { | ||
1900 | J->base[ra] = rc; | ||
1901 | if (ra >= J->maxslot) J->maxslot = ra+1; | ||
1902 | } | ||
1903 | |||
1904 | #undef rav | ||
1905 | #undef rbv | ||
1906 | #undef rcv | ||
1907 | |||
1908 | /* Limit the number of recorded IR instructions. */ | ||
1909 | if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord]) | ||
1910 | lj_trace_err(J, LJ_TRERR_TRACEOV); | ||
1911 | } | ||
1912 | |||
1913 | /* -- Recording setup ----------------------------------------------------- */ | ||
1914 | |||
1915 | /* Setup recording for a FORL loop. */ | ||
1916 | static void rec_setup_forl(jit_State *J, const BCIns *fori) | ||
1917 | { | ||
1918 | BCReg ra = bc_a(*fori); | ||
1919 | cTValue *forbase = &J->L->base[ra]; | ||
1920 | IRType t = (J->flags & JIT_F_OPT_NARROW) ? lj_opt_narrow_forl(forbase) | ||
1921 | : IRT_NUM; | ||
1922 | TRef stop = fori_arg(J, fori-2, ra+FORL_STOP, t); | ||
1923 | TRef step = fori_arg(J, fori-1, ra+FORL_STEP, t); | ||
1924 | int dir = (0 <= numV(&forbase[FORL_STEP])); | ||
1925 | lua_assert(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI); | ||
1926 | if (!tref_isk(step)) { | ||
1927 | /* Non-constant step: need a guard for the direction. */ | ||
1928 | TRef zero = (t == IRT_INT) ? lj_ir_kint(J, 0) : lj_ir_knum_zero(J); | ||
1929 | emitir(IRTG(dir ? IR_GE : IR_LT, t), step, zero); | ||
1930 | /* Add hoistable overflow checks for a narrowed FORL index. */ | ||
1931 | if (t == IRT_INT) { | ||
1932 | if (tref_isk(stop)) { | ||
1933 | /* Constant stop: optimize check away or to a range check for step. */ | ||
1934 | int32_t k = IR(tref_ref(stop))->i; | ||
1935 | if (dir) { | ||
1936 | if (k > 0) | ||
1937 | emitir(IRTGI(IR_LE), step, lj_ir_kint(J, (int32_t)0x7fffffff-k)); | ||
1938 | } else { | ||
1939 | if (k < 0) | ||
1940 | emitir(IRTGI(IR_GE), step, lj_ir_kint(J, (int32_t)0x80000000-k)); | ||
1941 | } | ||
1942 | } else { | ||
1943 | /* Stop+step variable: need full overflow check (with dead result). */ | ||
1944 | emitir(IRTGI(IR_ADDOV), step, stop); | ||
1945 | } | ||
1946 | } | ||
1947 | } else if (t == IRT_INT && !tref_isk(stop)) { | ||
1948 | /* Constant step: optimize overflow check to a range check for stop. */ | ||
1949 | int32_t k = IR(tref_ref(step))->i; | ||
1950 | k = (int32_t)(dir ? 0x7fffffff : 0x80000000) - k; | ||
1951 | emitir(IRTGI(dir ? IR_LE : IR_GE), stop, lj_ir_kint(J, k)); | ||
1952 | } | ||
1953 | J->base[ra+FORL_EXT] = sloadt(J, (int32_t)(ra+FORL_IDX), t, IRSLOAD_INHERIT); | ||
1954 | J->maxslot = ra+FORL_EXT+1; | ||
1955 | } | ||
1956 | |||
1957 | /* Setup recording for a root trace started by a hot loop. */ | ||
1958 | static const BCIns *rec_setup_root(jit_State *J) | ||
1959 | { | ||
1960 | /* Determine the next PC and the bytecode range for the loop. */ | ||
1961 | const BCIns *pcj, *pc = J->pc; | ||
1962 | BCIns ins = *pc; | ||
1963 | BCReg ra = bc_a(ins); | ||
1964 | switch (bc_op(ins)) { | ||
1965 | case BC_FORL: | ||
1966 | J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns); | ||
1967 | pc += 1+bc_j(ins); | ||
1968 | J->bc_min = pc; | ||
1969 | break; | ||
1970 | case BC_ITERL: | ||
1971 | lua_assert(bc_op(pc[-1]) == BC_ITERC); | ||
1972 | J->maxslot = ra + bc_b(pc[-1]) - 1; | ||
1973 | J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns); | ||
1974 | pc += 1+bc_j(ins); | ||
1975 | lua_assert(bc_op(pc[-1]) == BC_JMP); | ||
1976 | J->bc_min = pc; | ||
1977 | break; | ||
1978 | case BC_LOOP: | ||
1979 | /* Only check BC range for real loops, but not for "repeat until true". */ | ||
1980 | pcj = pc + bc_j(ins); | ||
1981 | ins = *pcj; | ||
1982 | if (bc_op(ins) == BC_JMP && bc_j(ins) < 0) { | ||
1983 | J->bc_min = pcj+1 + bc_j(ins); | ||
1984 | J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns); | ||
1985 | } | ||
1986 | J->maxslot = ra; | ||
1987 | pc++; | ||
1988 | break; | ||
1989 | default: | ||
1990 | lua_assert(0); | ||
1991 | break; | ||
1992 | } | ||
1993 | return pc; | ||
1994 | } | ||
1995 | |||
1996 | /* Setup recording for a side trace. */ | ||
1997 | static void rec_setup_side(jit_State *J, Trace *T) | ||
1998 | { | ||
1999 | SnapShot *snap = &T->snap[J->exitno]; | ||
2000 | IRRef2 *map = &T->snapmap[snap->mapofs]; | ||
2001 | BCReg s, nslots = snap->nslots; | ||
2002 | BloomFilter seen = 0; | ||
2003 | for (s = 0; s < nslots; s++) { | ||
2004 | IRRef ref = snap_ref(map[s]); | ||
2005 | if (ref) { | ||
2006 | IRIns *ir = &T->ir[ref]; | ||
2007 | TRef tr = 0; | ||
2008 | /* The bloom filter avoids O(nslots^2) overhead for de-duping slots. */ | ||
2009 | if (bloomtest(seen, ref)) { | ||
2010 | BCReg j; | ||
2011 | for (j = 0; j < s; j++) | ||
2012 | if (snap_ref(map[j]) == ref) { | ||
2013 | if (ir->o == IR_FRAME && irt_isfunc(ir->t)) | ||
2014 | J->baseslot = s+1; | ||
2015 | tr = J->slot[j]; | ||
2016 | goto dupslot; | ||
2017 | } | ||
2018 | } | ||
2019 | bloomset(seen, ref); | ||
2020 | switch ((IROp)ir->o) { | ||
2021 | case IR_KPRI: tr = TREF_PRI(irt_type(ir->t)); break; | ||
2022 | case IR_KINT: tr = lj_ir_kint(J, ir->i); break; | ||
2023 | case IR_KGC: tr = lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); break; | ||
2024 | case IR_KNUM: tr = lj_ir_knum_addr(J, ir_knum(ir)); break; | ||
2025 | case IR_FRAME: /* Placeholder FRAMEs don't need a guard. */ | ||
2026 | if (irt_isfunc(ir->t)) { | ||
2027 | J->baseslot = s+1; | ||
2028 | J->framedepth++; | ||
2029 | tr = lj_ir_kfunc(J, ir_kfunc(&T->ir[ir->op2])); | ||
2030 | tr = emitir_raw(IRT(IR_FRAME, IRT_FUNC), tr, tr); | ||
2031 | } else { | ||
2032 | tr = lj_ir_kptr(J, mref(T->ir[ir->op2].ptr, void)); | ||
2033 | tr = emitir_raw(IRT(IR_FRAME, IRT_PTR), tr, tr); | ||
2034 | } | ||
2035 | break; | ||
2036 | case IR_SLOAD: /* Inherited SLOADs don't need a guard. */ | ||
2037 | tr = emitir_raw(ir->ot & ~IRT_GUARD, s, | ||
2038 | (ir->op2&IRSLOAD_READONLY) | IRSLOAD_INHERIT|IRSLOAD_PARENT); | ||
2039 | break; | ||
2040 | default: /* Parent refs are already typed and don't need a guard. */ | ||
2041 | tr = emitir_raw(IRT(IR_SLOAD, irt_type(ir->t)), s, | ||
2042 | IRSLOAD_INHERIT|IRSLOAD_PARENT); | ||
2043 | break; | ||
2044 | } | ||
2045 | dupslot: | ||
2046 | J->slot[s] = tr; | ||
2047 | } | ||
2048 | } | ||
2049 | J->base = J->slot + J->baseslot; | ||
2050 | J->maxslot = nslots - J->baseslot; | ||
2051 | lj_snap_add(J); | ||
2052 | } | ||
2053 | |||
2054 | /* Setup for recording a new trace. */ | ||
2055 | void lj_record_setup(jit_State *J) | ||
2056 | { | ||
2057 | uint32_t i; | ||
2058 | |||
2059 | /* Initialize state related to current trace. */ | ||
2060 | memset(J->slot, 0, sizeof(J->slot)); | ||
2061 | memset(J->chain, 0, sizeof(J->chain)); | ||
2062 | memset(J->bpropcache, 0, sizeof(J->bpropcache)); | ||
2063 | |||
2064 | J->baseslot = 1; /* Invoking function is at base[-1]. */ | ||
2065 | J->base = J->slot + J->baseslot; | ||
2066 | J->maxslot = 0; | ||
2067 | J->framedepth = 0; | ||
2068 | |||
2069 | J->instunroll = J->param[JIT_P_instunroll]; | ||
2070 | J->loopunroll = J->param[JIT_P_loopunroll]; | ||
2071 | J->tailcalled = 0; | ||
2072 | J->loopref = 0; | ||
2073 | |||
2074 | J->bc_min = NULL; /* Means no limit. */ | ||
2075 | J->bc_extent = ~(MSize)0; | ||
2076 | |||
2077 | /* Emit instructions for fixed references. Also triggers initial IR alloc. */ | ||
2078 | emitir_raw(IRT(IR_BASE, IRT_PTR), J->parent, J->exitno); | ||
2079 | for (i = 0; i <= 2; i++) { | ||
2080 | IRIns *ir = IR(REF_NIL-i); | ||
2081 | ir->i = 0; | ||
2082 | ir->t.irt = (uint8_t)(IRT_NIL+i); | ||
2083 | ir->o = IR_KPRI; | ||
2084 | ir->prev = 0; | ||
2085 | } | ||
2086 | J->cur.nk = REF_TRUE; | ||
2087 | |||
2088 | setgcref(J->cur.startpt, obj2gco(J->pt)); | ||
2089 | J->startpc = J->pc; | ||
2090 | if (J->parent) { /* Side trace. */ | ||
2091 | Trace *T = J->trace[J->parent]; | ||
2092 | TraceNo root = T->root ? T->root : J->parent; | ||
2093 | J->cur.root = (uint16_t)root; | ||
2094 | J->cur.startins = BCINS_AD(BC_JMP, 0, 0); | ||
2095 | /* Check whether we could at least potentially form an extra loop. */ | ||
2096 | if (J->exitno == 0 && T->snap[0].nslots == 1 && T->snapmap[0] == 0) { | ||
2097 | /* We can narrow a FORL for some side traces, too. */ | ||
2098 | if (J->pc > J->pt->bc && bc_op(J->pc[-1]) == BC_JFORI && | ||
2099 | bc_d(J->pc[bc_j(J->pc[-1])-1]) == root) { | ||
2100 | lj_snap_add(J); | ||
2101 | rec_setup_forl(J, J->pc-1); | ||
2102 | goto sidecheck; | ||
2103 | } | ||
2104 | } else { | ||
2105 | J->startpc = NULL; /* Prevent forming an extra loop. */ | ||
2106 | } | ||
2107 | rec_setup_side(J, T); | ||
2108 | sidecheck: | ||
2109 | if (J->trace[J->cur.root]->nchild >= J->param[JIT_P_maxside] || | ||
2110 | T->snap[J->exitno].count >= J->param[JIT_P_hotexit] + | ||
2111 | J->param[JIT_P_tryside]) | ||
2112 | rec_stop(J, TRACE_INTERP); | ||
2113 | } else { /* Root trace. */ | ||
2114 | J->cur.root = 0; | ||
2115 | if (J->pc >= J->pt->bc) { /* Not a hot CALL? */ | ||
2116 | J->cur.startins = *J->pc; | ||
2117 | J->pc = rec_setup_root(J); | ||
2118 | /* Note: the loop instruction itself is recorded at the end and not | ||
2119 | ** at the start! So snapshot #0 needs to point to the *next* instruction. | ||
2120 | */ | ||
2121 | } else { | ||
2122 | J->cur.startins = BCINS_ABC(BC_CALL, 0, 0, 0); | ||
2123 | } | ||
2124 | lj_snap_add(J); | ||
2125 | if (bc_op(J->cur.startins) == BC_FORL) | ||
2126 | rec_setup_forl(J, J->pc-1); | ||
2127 | if (1 + J->pt->framesize >= LJ_MAX_JSLOTS) | ||
2128 | lj_trace_err(J, LJ_TRERR_STACKOV); | ||
2129 | } | ||
2130 | } | ||
2131 | |||
2132 | #undef IR | ||
2133 | #undef emitir_raw | ||
2134 | #undef emitir | ||
2135 | |||
2136 | #endif | ||
diff --git a/src/lj_record.h b/src/lj_record.h new file mode 100644 index 00000000..7bb7952c --- /dev/null +++ b/src/lj_record.h | |||
@@ -0,0 +1,17 @@ | |||
1 | /* | ||
2 | ** Trace recorder (bytecode -> SSA IR). | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_RECORD_H | ||
7 | #define _LJ_RECORD_H | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | #include "lj_jit.h" | ||
11 | |||
12 | #if LJ_HASJIT | ||
13 | LJ_FUNC void lj_record_ins(jit_State *J); | ||
14 | LJ_FUNC void lj_record_setup(jit_State *J); | ||
15 | #endif | ||
16 | |||
17 | #endif | ||
diff --git a/src/lj_snap.c b/src/lj_snap.c new file mode 100644 index 00000000..09cd095c --- /dev/null +++ b/src/lj_snap.c | |||
@@ -0,0 +1,286 @@ | |||
1 | /* | ||
2 | ** Snapshot handling. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #define lj_snap_c | ||
7 | #define LUA_CORE | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | |||
11 | #if LJ_HASJIT | ||
12 | |||
13 | #include "lj_gc.h" | ||
14 | #include "lj_state.h" | ||
15 | #include "lj_frame.h" | ||
16 | #include "lj_ir.h" | ||
17 | #include "lj_jit.h" | ||
18 | #include "lj_iropt.h" | ||
19 | #include "lj_trace.h" | ||
20 | #include "lj_snap.h" | ||
21 | #include "lj_target.h" | ||
22 | |||
23 | /* Some local macros to save typing. Undef'd at the end. */ | ||
24 | #define IR(ref) (&J->cur.ir[(ref)]) | ||
25 | |||
26 | /* -- Snapshot generation ------------------------------------------------- */ | ||
27 | |||
28 | /* NYI: Snapshots are in need of a redesign. The current storage model for | ||
29 | ** snapshot maps is too wasteful. They could be compressed (1D or 2D) and | ||
30 | ** made more flexible at the same time. Iterators should no longer need to | ||
31 | ** skip unmodified slots. IR_FRAME should be eliminated, too. | ||
32 | */ | ||
33 | |||
34 | /* Add all modified slots to the snapshot. */ | ||
35 | static void snapshot_slots(jit_State *J, IRRef2 *map, BCReg nslots) | ||
36 | { | ||
37 | BCReg s; | ||
38 | for (s = 0; s < nslots; s++) { | ||
39 | IRRef ref = tref_ref(J->slot[s]); | ||
40 | if (ref) { | ||
41 | IRIns *ir = IR(ref); | ||
42 | if (ir->o == IR_SLOAD && ir->op1 == s && !(ir->op2 & IRSLOAD_INHERIT)) | ||
43 | ref = 0; | ||
44 | } | ||
45 | map[s] = (IRRef2)ref; | ||
46 | } | ||
47 | } | ||
48 | |||
49 | /* Add frame links at the end of the snapshot. */ | ||
50 | static MSize snapshot_framelinks(jit_State *J, IRRef2 *map) | ||
51 | { | ||
52 | cTValue *frame = J->L->base - 1; | ||
53 | cTValue *lim = J->L->base - J->baseslot; | ||
54 | MSize f = 0; | ||
55 | map[f++] = u32ptr(J->pc); | ||
56 | while (frame > lim) { | ||
57 | if (frame_islua(frame)) { | ||
58 | map[f++] = u32ptr(frame_pc(frame)); | ||
59 | frame = frame_prevl(frame); | ||
60 | } else if (frame_ispcall(frame)) { | ||
61 | map[f++] = (uint32_t)frame_ftsz(frame); | ||
62 | frame = frame_prevd(frame); | ||
63 | } else if (frame_iscont(frame)) { | ||
64 | map[f++] = (uint32_t)frame_ftsz(frame); | ||
65 | map[f++] = u32ptr(frame_contpc(frame)); | ||
66 | frame = frame_prevd(frame); | ||
67 | } else { | ||
68 | lua_assert(0); | ||
69 | } | ||
70 | } | ||
71 | return f; | ||
72 | } | ||
73 | |||
74 | /* Take a snapshot of the current stack. */ | ||
75 | static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) | ||
76 | { | ||
77 | BCReg nslots = J->baseslot + J->maxslot; | ||
78 | MSize nsm, nframelinks; | ||
79 | IRRef2 *p; | ||
80 | /* Conservative estimate. Continuation frames need 2 slots. */ | ||
81 | nsm = nsnapmap + nslots + (uint32_t)J->framedepth*2+1; | ||
82 | if (LJ_UNLIKELY(nsm > J->sizesnapmap)) { /* Need to grow snapshot map? */ | ||
83 | if (nsm < 2*J->sizesnapmap) | ||
84 | nsm = 2*J->sizesnapmap; | ||
85 | else if (nsm < 64) | ||
86 | nsm = 64; | ||
87 | J->snapmapbuf = (IRRef2 *)lj_mem_realloc(J->L, J->snapmapbuf, | ||
88 | J->sizesnapmap*sizeof(IRRef2), nsm*sizeof(IRRef2)); | ||
89 | J->cur.snapmap = J->snapmapbuf; | ||
90 | J->sizesnapmap = nsm; | ||
91 | } | ||
92 | p = &J->cur.snapmap[nsnapmap]; | ||
93 | snapshot_slots(J, p, nslots); | ||
94 | nframelinks = snapshot_framelinks(J, p + nslots); | ||
95 | J->cur.nsnapmap = (uint16_t)(nsnapmap + nslots + nframelinks); | ||
96 | snap->mapofs = (uint16_t)nsnapmap; | ||
97 | snap->ref = (IRRef1)J->cur.nins; | ||
98 | snap->nslots = (uint8_t)nslots; | ||
99 | snap->nframelinks = (uint8_t)nframelinks; | ||
100 | snap->count = 0; | ||
101 | } | ||
102 | |||
103 | /* Add or merge a snapshot. */ | ||
104 | void lj_snap_add(jit_State *J) | ||
105 | { | ||
106 | MSize nsnap = J->cur.nsnap; | ||
107 | MSize nsnapmap = J->cur.nsnapmap; | ||
108 | /* Merge if no ins. inbetween or if requested and no guard inbetween. */ | ||
109 | if (J->mergesnap ? !irt_isguard(J->guardemit) : | ||
110 | (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) { | ||
111 | nsnapmap = J->cur.snap[--nsnap].mapofs; | ||
112 | } else { | ||
113 | /* Need to grow snapshot buffer? */ | ||
114 | if (LJ_UNLIKELY(nsnap >= J->sizesnap)) { | ||
115 | MSize maxsnap = (MSize)J->param[JIT_P_maxsnap]; | ||
116 | if (nsnap >= maxsnap) | ||
117 | lj_trace_err(J, LJ_TRERR_SNAPOV); | ||
118 | lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot); | ||
119 | J->cur.snap = J->snapbuf; | ||
120 | } | ||
121 | J->cur.nsnap = (uint16_t)(nsnap+1); | ||
122 | } | ||
123 | J->mergesnap = 0; | ||
124 | J->guardemit.irt = 0; | ||
125 | snapshot_stack(J, &J->cur.snap[nsnap], nsnapmap); | ||
126 | } | ||
127 | |||
128 | /* Shrink last snapshot. */ | ||
129 | void lj_snap_shrink(jit_State *J) | ||
130 | { | ||
131 | BCReg nslots = J->baseslot + J->maxslot; | ||
132 | SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; | ||
133 | IRRef2 *oflinks = &J->cur.snapmap[snap->mapofs + snap->nslots]; | ||
134 | IRRef2 *nflinks = &J->cur.snapmap[snap->mapofs + nslots]; | ||
135 | uint32_t s, nframelinks = snap->nframelinks; | ||
136 | lua_assert(nslots < snap->nslots); | ||
137 | snap->nslots = (uint8_t)nslots; | ||
138 | J->cur.nsnapmap = (uint16_t)(snap->mapofs + nslots + nframelinks); | ||
139 | for (s = 0; s < nframelinks; s++) /* Move frame links down. */ | ||
140 | nflinks[s] = oflinks[s]; | ||
141 | } | ||
142 | |||
143 | /* -- Snapshot access ----------------------------------------------------- */ | ||
144 | |||
145 | /* Initialize a Bloom Filter with all renamed refs. | ||
146 | ** There are very few renames (often none), so the filter has | ||
147 | ** very few bits set. This makes it suitable for negative filtering. | ||
148 | */ | ||
149 | static BloomFilter snap_renamefilter(Trace *T, SnapNo lim) | ||
150 | { | ||
151 | BloomFilter rfilt = 0; | ||
152 | IRIns *ir; | ||
153 | for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--) | ||
154 | if (ir->op2 <= lim) | ||
155 | bloomset(rfilt, ir->op1); | ||
156 | return rfilt; | ||
157 | } | ||
158 | |||
159 | /* Process matching renames to find the original RegSP. */ | ||
160 | static RegSP snap_renameref(Trace *T, SnapNo lim, IRRef ref, RegSP rs) | ||
161 | { | ||
162 | IRIns *ir; | ||
163 | for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--) | ||
164 | if (ir->op1 == ref && ir->op2 <= lim) | ||
165 | rs = ir->prev; | ||
166 | return rs; | ||
167 | } | ||
168 | |||
169 | /* Convert a snapshot into a linear slot -> RegSP map. */ | ||
170 | void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno) | ||
171 | { | ||
172 | SnapShot *snap = &T->snap[snapno]; | ||
173 | BCReg s, nslots = snap->nslots; | ||
174 | IRRef2 *map = &T->snapmap[snap->mapofs]; | ||
175 | BloomFilter rfilt = snap_renamefilter(T, snapno); | ||
176 | for (s = 0; s < nslots; s++) { | ||
177 | IRRef ref = snap_ref(map[s]); | ||
178 | if (!irref_isk(ref)) { | ||
179 | IRIns *ir = &T->ir[ref]; | ||
180 | uint32_t rs = ir->prev; | ||
181 | if (bloomtest(rfilt, ref)) | ||
182 | rs = snap_renameref(T, snapno, ref, rs); | ||
183 | rsmap[s] = (uint16_t)rs; | ||
184 | } | ||
185 | } | ||
186 | } | ||
187 | |||
188 | /* Restore interpreter state from exit state with the help of a snapshot. */ | ||
189 | void lj_snap_restore(jit_State *J, void *exptr) | ||
190 | { | ||
191 | ExitState *ex = (ExitState *)exptr; | ||
192 | SnapNo snapno = J->exitno; /* For now, snapno == exitno. */ | ||
193 | Trace *T = J->trace[J->parent]; | ||
194 | SnapShot *snap = &T->snap[snapno]; | ||
195 | BCReg s, nslots = snap->nslots; | ||
196 | IRRef2 *map = &T->snapmap[snap->mapofs]; | ||
197 | IRRef2 *flinks = map + nslots + snap->nframelinks; | ||
198 | TValue *o, *newbase, *ntop; | ||
199 | BloomFilter rfilt = snap_renamefilter(T, snapno); | ||
200 | lua_State *L = J->L; | ||
201 | |||
202 | /* Make sure the stack is big enough for the slots from the snapshot. */ | ||
203 | if (L->base + nslots >= L->maxstack) { | ||
204 | L->top = curr_topL(L); | ||
205 | lj_state_growstack(L, nslots - curr_proto(L)->framesize); | ||
206 | } | ||
207 | |||
208 | /* Fill stack slots with data from the registers and spill slots. */ | ||
209 | newbase = NULL; | ||
210 | ntop = L->base; | ||
211 | for (s = 0, o = L->base-1; s < nslots; s++, o++) { | ||
212 | IRRef ref = snap_ref(map[s]); | ||
213 | if (ref) { | ||
214 | IRIns *ir = &T->ir[ref]; | ||
215 | if (irref_isk(ref)) { /* Restore constant slot. */ | ||
216 | lj_ir_kvalue(L, o, ir); | ||
217 | } else { | ||
218 | IRType1 t = ir->t; | ||
219 | RegSP rs = ir->prev; | ||
220 | if (LJ_UNLIKELY(bloomtest(rfilt, ref))) | ||
221 | rs = snap_renameref(T, snapno, ref, rs); | ||
222 | if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ | ||
223 | int32_t *sps = &ex->spill[regsp_spill(rs)]; | ||
224 | if (irt_isinteger(t)) { | ||
225 | setintV(o, *sps); | ||
226 | } else if (irt_isnum(t)) { | ||
227 | o->u64 = *(uint64_t *)sps; | ||
228 | } else { | ||
229 | lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ | ||
230 | setgcrefi(o->gcr, *sps); | ||
231 | setitype(o, irt_toitype(t)); | ||
232 | } | ||
233 | } else if (ra_hasreg(regsp_reg(rs))) { /* Restore from register. */ | ||
234 | Reg r = regsp_reg(rs); | ||
235 | if (irt_isinteger(t)) { | ||
236 | setintV(o, ex->gpr[r-RID_MIN_GPR]); | ||
237 | } else if (irt_isnum(t)) { | ||
238 | setnumV(o, ex->fpr[r-RID_MIN_FPR]); | ||
239 | } else { | ||
240 | if (!irt_ispri(t)) | ||
241 | setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]); | ||
242 | setitype(o, irt_toitype(t)); | ||
243 | } | ||
244 | } else { /* Restore frame slot. */ | ||
245 | lua_assert(ir->o == IR_FRAME); | ||
246 | /* This works for both PTR and FUNC IR_FRAME. */ | ||
247 | setgcrefp(o->fr.func, mref(T->ir[ir->op2].ptr, void)); | ||
248 | if (s != 0) /* Do not overwrite link to previous frame. */ | ||
249 | o->fr.tp.ftsz = (int32_t)*--flinks; | ||
250 | if (irt_isfunc(ir->t)) { | ||
251 | GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr)); | ||
252 | if (isluafunc(fn)) { | ||
253 | TValue *fs; | ||
254 | newbase = o+1; | ||
255 | fs = newbase + funcproto(fn)->framesize; | ||
256 | if (fs > ntop) ntop = fs; /* Update top for newly added frames. */ | ||
257 | } | ||
258 | } | ||
259 | } | ||
260 | } | ||
261 | } else if (newbase) { | ||
262 | setnilV(o); /* Clear unreferenced slots of newly added frames. */ | ||
263 | } | ||
264 | } | ||
265 | if (newbase) { /* Clear remainder of newly added frames. */ | ||
266 | L->base = newbase; | ||
267 | if (ntop >= L->maxstack) { /* Need to grow the stack again. */ | ||
268 | MSize need = (MSize)(ntop - o); | ||
269 | L->top = o; | ||
270 | lj_state_growstack(L, need); | ||
271 | o = L->top; | ||
272 | ntop = o + need; | ||
273 | } | ||
274 | L->top = curr_topL(L); | ||
275 | for (; o < ntop; o++) | ||
276 | setnilV(o); | ||
277 | } else { /* Must not clear slots of existing frame. */ | ||
278 | L->top = curr_topL(L); | ||
279 | } | ||
280 | lua_assert(map + nslots == flinks-1); | ||
281 | J->pc = (const BCIns *)(uintptr_t)(*--flinks); | ||
282 | } | ||
283 | |||
284 | #undef IR | ||
285 | |||
286 | #endif | ||
diff --git a/src/lj_snap.h b/src/lj_snap.h new file mode 100644 index 00000000..806047b1 --- /dev/null +++ b/src/lj_snap.h | |||
@@ -0,0 +1,19 @@ | |||
1 | /* | ||
2 | ** Snapshot handling. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_SNAP_H | ||
7 | #define _LJ_SNAP_H | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | #include "lj_jit.h" | ||
11 | |||
12 | #if LJ_HASJIT | ||
13 | LJ_FUNC void lj_snap_add(jit_State *J); | ||
14 | LJ_FUNC void lj_snap_shrink(jit_State *J); | ||
15 | LJ_FUNC void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno); | ||
16 | LJ_FUNC void lj_snap_restore(jit_State *J, void *exptr); | ||
17 | #endif | ||
18 | |||
19 | #endif | ||
diff --git a/src/lj_state.c b/src/lj_state.c new file mode 100644 index 00000000..b4bc7a0c --- /dev/null +++ b/src/lj_state.c | |||
@@ -0,0 +1,255 @@ | |||
1 | /* | ||
2 | ** State and stack handling. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** | ||
5 | ** Portions taken verbatim or adapted from the Lua interpreter. | ||
6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h | ||
7 | */ | ||
8 | |||
9 | #define lj_state_c | ||
10 | #define LUA_CORE | ||
11 | |||
12 | #include "lj_obj.h" | ||
13 | #include "lj_gc.h" | ||
14 | #include "lj_err.h" | ||
15 | #include "lj_str.h" | ||
16 | #include "lj_tab.h" | ||
17 | #include "lj_func.h" | ||
18 | #include "lj_meta.h" | ||
19 | #include "lj_state.h" | ||
20 | #include "lj_frame.h" | ||
21 | #include "lj_trace.h" | ||
22 | #include "lj_dispatch.h" | ||
23 | #include "lj_vm.h" | ||
24 | #include "lj_lex.h" | ||
25 | #include "lj_alloc.h" | ||
26 | |||
27 | /* -- Stack handling ------------------------------------------------------ */ | ||
28 | |||
29 | /* Stack sizes. */ | ||
30 | #define LJ_STACK_MIN LUA_MINSTACK /* Min. stack size. */ | ||
31 | #define LJ_STACK_MAX LUAI_MAXSTACK /* Max. stack size. */ | ||
32 | #define LJ_STACK_START (2*LJ_STACK_MIN) /* Starting stack size. */ | ||
33 | #define LJ_STACK_MAXEX (LJ_STACK_MAX + 1 + LJ_STACK_EXTRA) | ||
34 | |||
35 | /* Explanation of LJ_STACK_EXTRA: | ||
36 | ** | ||
37 | ** Calls to metamethods store their arguments beyond the current top | ||
38 | ** without checking for the stack limit. This avoids stack resizes which | ||
39 | ** would invalidate passed TValue pointers. The stack check is performed | ||
40 | ** later by the call gate. This can safely resize the stack or raise an | ||
41 | ** error. Thus we need some extra slots beyond the current stack limit. | ||
42 | ** | ||
43 | ** Most metamethods need 4 slots above top (cont, mobj, arg1, arg2) plus | ||
44 | ** one extra slot if mobj is not a function. Only lj_meta_tset needs 5 | ||
45 | ** slots above top, but then mobj is always a function. So we can get by | ||
46 | ** with 5 extra slots. | ||
47 | */ | ||
48 | |||
49 | /* Resize stack slots and adjust pointers in state. */ | ||
50 | static void resizestack(lua_State *L, MSize n) | ||
51 | { | ||
52 | TValue *oldst = L->stack; | ||
53 | ptrdiff_t delta; | ||
54 | MSize realsize = n + 1 + LJ_STACK_EXTRA; | ||
55 | GCobj *up; | ||
56 | lua_assert((MSize)(L->maxstack-L->stack) == L->stacksize-LJ_STACK_EXTRA-1); | ||
57 | lj_mem_reallocvec(L, L->stack, L->stacksize, realsize, TValue); | ||
58 | delta = (char *)L->stack - (char *)oldst; | ||
59 | L->maxstack = L->stack + n; | ||
60 | L->stacksize = realsize; | ||
61 | L->base = (TValue *)((char *)L->base + delta); | ||
62 | L->top = (TValue *)((char *)L->top + delta); | ||
63 | for (up = gcref(L->openupval); up != NULL; up = gcnext(up)) | ||
64 | gco2uv(up)->v = (TValue *)((char *)gco2uv(up)->v + delta); | ||
65 | if (obj2gco(L) == gcref(G(L)->jit_L)) | ||
66 | setmref(G(L)->jit_base, mref(G(L)->jit_base, char) + delta); | ||
67 | } | ||
68 | |||
69 | /* Relimit stack after error, in case the limit was overdrawn. */ | ||
70 | void lj_state_relimitstack(lua_State *L) | ||
71 | { | ||
72 | if (L->stacksize > LJ_STACK_MAXEX && L->top - L->stack < LJ_STACK_MAX-1) | ||
73 | resizestack(L, LJ_STACK_MAX); | ||
74 | } | ||
75 | |||
76 | /* Try to shrink the stack (called from GC). */ | ||
77 | void lj_state_shrinkstack(lua_State *L, MSize used) | ||
78 | { | ||
79 | if (L->stacksize > LJ_STACK_MAXEX) | ||
80 | return; /* Avoid stack shrinking while handling stack overflow. */ | ||
81 | if (4*used < L->stacksize && | ||
82 | 2*(LJ_STACK_START+LJ_STACK_EXTRA) < L->stacksize && | ||
83 | obj2gco(L) != gcref(G(L)->jit_L)) /* Don't shrink stack of live trace. */ | ||
84 | resizestack(L, L->stacksize >> 1); | ||
85 | } | ||
86 | |||
87 | /* Try to grow stack. */ | ||
88 | void lj_state_growstack(lua_State *L, MSize need) | ||
89 | { | ||
90 | if (L->stacksize > LJ_STACK_MAXEX) /* overflow while handling overflow? */ | ||
91 | lj_err_throw(L, LUA_ERRERR); | ||
92 | resizestack(L, L->stacksize + (need > L->stacksize ? need : L->stacksize)); | ||
93 | if (L->stacksize > LJ_STACK_MAXEX) { | ||
94 | if (curr_funcisL(L)) { /* Clear slots of incomplete Lua frame. */ | ||
95 | TValue *top = curr_topL(L); | ||
96 | while (--top >= L->top) setnilV(top); | ||
97 | } | ||
98 | lj_err_msg(L, LJ_ERR_STKOV); /* ... to allow L->top = curr_topL(L). */ | ||
99 | } | ||
100 | } | ||
101 | |||
102 | void lj_state_growstack1(lua_State *L) | ||
103 | { | ||
104 | lj_state_growstack(L, 1); | ||
105 | } | ||
106 | |||
107 | /* Allocate basic stack for new state. */ | ||
108 | static void stack_init(lua_State *L1, lua_State *L) | ||
109 | { | ||
110 | L1->stack = lj_mem_newvec(L, LJ_STACK_START + LJ_STACK_EXTRA, TValue); | ||
111 | L1->stacksize = LJ_STACK_START + LJ_STACK_EXTRA; | ||
112 | L1->top = L1->stack; | ||
113 | L1->maxstack = L1->stack+(L1->stacksize - LJ_STACK_EXTRA)-1; | ||
114 | setthreadV(L1, L1->top, L1); /* needed for curr_funcisL() on empty stack */ | ||
115 | setnilV(L1->top); /* but clear its type */ | ||
116 | L1->base = ++L1->top; | ||
117 | } | ||
118 | |||
119 | /* -- State handling ------------------------------------------------------ */ | ||
120 | |||
121 | /* Open parts that may cause memory-allocation errors. */ | ||
122 | static TValue *cpluaopen(lua_State *L, lua_CFunction dummy, void *ud) | ||
123 | { | ||
124 | global_State *g = G(L); | ||
125 | UNUSED(dummy); | ||
126 | UNUSED(ud); | ||
127 | stack_init(L, L); | ||
128 | /* NOBARRIER: State initialization, all objects are white. */ | ||
129 | setgcref(L->env, obj2gco(lj_tab_new(L, 0, LJ_MIN_GLOBAL))); | ||
130 | settabV(L, registry(L), lj_tab_new(L, 0, LJ_MIN_REGISTRY)); | ||
131 | lj_str_resize(L, LJ_MIN_STRTAB-1); | ||
132 | lj_meta_init(L); | ||
133 | lj_lex_init(L); | ||
134 | fixstring(lj_err_str(L, LJ_ERR_ERRMEM)); /* Preallocate memory error msg. */ | ||
135 | g->gc.threshold = 4*g->gc.total; | ||
136 | return NULL; | ||
137 | } | ||
138 | |||
139 | static void close_state(lua_State *L) | ||
140 | { | ||
141 | global_State *g = G(L); | ||
142 | #ifndef LUAJIT_USE_SYSMALLOC | ||
143 | if (g->allocf == lj_alloc_f) { | ||
144 | lj_alloc_destroy(g->allocd); | ||
145 | } else | ||
146 | #endif | ||
147 | { | ||
148 | lj_func_closeuv(L, L->stack); | ||
149 | lj_gc_freeall(g); | ||
150 | lua_assert(gcref(g->gc.root) == obj2gco(L)); | ||
151 | lua_assert(g->strnum == 0); | ||
152 | lj_trace_freestate(g); | ||
153 | lj_mem_freevec(g, g->strhash, g->strmask+1, GCstr *); | ||
154 | lj_str_freebuf(g, &g->tmpbuf); | ||
155 | lj_mem_freevec(g, L->stack, L->stacksize, TValue); | ||
156 | lua_assert(g->gc.total == sizeof(GG_State)); | ||
157 | g->allocf(g->allocd, G2GG(g), sizeof(GG_State), 0); | ||
158 | } | ||
159 | } | ||
160 | |||
161 | LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) | ||
162 | { | ||
163 | GG_State *GG = cast(GG_State *, f(ud, NULL, 0, sizeof(GG_State))); | ||
164 | lua_State *L = &GG->L; | ||
165 | global_State *g = &GG->g; | ||
166 | if (GG == NULL) return NULL; | ||
167 | memset(GG, 0, sizeof(GG_State)); | ||
168 | L->gct = ~LJ_TTHREAD; | ||
169 | L->marked = LJ_GC_WHITE0 | LJ_GC_FIXED | LJ_GC_SFIXED; /* Prevent free. */ | ||
170 | L->dummy_ffid = FF_C; | ||
171 | setmref(L->glref, g); | ||
172 | g->gc.currentwhite = LJ_GC_WHITE0 | LJ_GC_FIXED; | ||
173 | g->allocf = f; | ||
174 | g->allocd = ud; | ||
175 | setgcref(g->mainthref, obj2gco(L)); | ||
176 | setgcref(g->uvhead.prev, obj2gco(&g->uvhead)); | ||
177 | setgcref(g->uvhead.next, obj2gco(&g->uvhead)); | ||
178 | g->strmask = ~(MSize)0; | ||
179 | setnilV(registry(L)); | ||
180 | setnilV(&g->nilnode.val); | ||
181 | setnilV(&g->nilnode.key); | ||
182 | lj_str_initbuf(L, &g->tmpbuf); | ||
183 | g->gc.state = GCSpause; | ||
184 | setgcref(g->gc.root, obj2gco(L)); | ||
185 | g->gc.sweep = &g->gc.root; | ||
186 | g->gc.total = sizeof(GG_State); | ||
187 | g->gc.pause = LUAI_GCPAUSE; | ||
188 | g->gc.stepmul = LUAI_GCMUL; | ||
189 | lj_dispatch_init((GG_State *)L); | ||
190 | L->status = LUA_ERRERR+1; /* Avoid touching the stack upon memory error. */ | ||
191 | if (lj_vm_cpcall(L, cpluaopen, NULL, NULL) != 0) { | ||
192 | /* Memory allocation error: free partial state. */ | ||
193 | close_state(L); | ||
194 | return NULL; | ||
195 | } | ||
196 | L->status = 0; | ||
197 | return L; | ||
198 | } | ||
199 | |||
200 | static TValue *cpfinalize(lua_State *L, lua_CFunction dummy, void *ud) | ||
201 | { | ||
202 | UNUSED(dummy); | ||
203 | UNUSED(ud); | ||
204 | lj_gc_finalizeudata(L); | ||
205 | /* Frame pop omitted. */ | ||
206 | return NULL; | ||
207 | } | ||
208 | |||
209 | LUA_API void lua_close(lua_State *L) | ||
210 | { | ||
211 | global_State *g = G(L); | ||
212 | L = mainthread(g); /* Only the main thread can be closed. */ | ||
213 | lj_func_closeuv(L, L->stack); | ||
214 | lj_gc_separateudata(g, 1); /* Separate udata which have GC metamethods. */ | ||
215 | #if LJ_HASJIT | ||
216 | G2J(g)->flags &= ~JIT_F_ON; | ||
217 | G2J(g)->state = LJ_TRACE_IDLE; | ||
218 | lj_dispatch_update(g); | ||
219 | #endif | ||
220 | do { | ||
221 | hook_enter(g); | ||
222 | L->status = 0; | ||
223 | L->cframe = NULL; | ||
224 | L->base = L->top = L->stack + 1; | ||
225 | } while (lj_vm_cpcall(L, cpfinalize, NULL, NULL) != 0); | ||
226 | close_state(L); | ||
227 | } | ||
228 | |||
229 | lua_State *lj_state_new(lua_State *L) | ||
230 | { | ||
231 | lua_State *L1 = lj_mem_newobj(L, lua_State); | ||
232 | L1->gct = ~LJ_TTHREAD; | ||
233 | L1->dummy_ffid = FF_C; | ||
234 | L1->status = 0; | ||
235 | L1->stacksize = 0; | ||
236 | L1->stack = NULL; | ||
237 | L1->cframe = NULL; | ||
238 | /* NOBARRIER: The lua_State is new (marked white). */ | ||
239 | setgcrefnull(L1->openupval); | ||
240 | setmrefr(L1->glref, L->glref); | ||
241 | setgcrefr(L1->env, L->env); | ||
242 | stack_init(L1, L); /* init stack */ | ||
243 | lua_assert(iswhite(obj2gco(L1))); | ||
244 | return L1; | ||
245 | } | ||
246 | |||
247 | void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L) | ||
248 | { | ||
249 | lua_assert(L != mainthread(g)); | ||
250 | lj_func_closeuv(L, L->stack); | ||
251 | lua_assert(gcref(L->openupval) == NULL); | ||
252 | lj_mem_freevec(g, L->stack, L->stacksize, TValue); | ||
253 | lj_mem_freet(g, L); | ||
254 | } | ||
255 | |||
diff --git a/src/lj_state.h b/src/lj_state.h new file mode 100644 index 00000000..54e85405 --- /dev/null +++ b/src/lj_state.h | |||
@@ -0,0 +1,31 @@ | |||
1 | /* | ||
2 | ** State and stack handling. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_STATE_H | ||
7 | #define _LJ_STATE_H | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | |||
11 | #define incr_top(L) \ | ||
12 | (++L->top >= L->maxstack && (lj_state_growstack1(L), 0)) | ||
13 | |||
14 | #define savestack(L, p) ((char *)(p) - (char *)L->stack) | ||
15 | #define restorestack(L, n) ((TValue *)((char *)L->stack + (n))) | ||
16 | |||
17 | LJ_FUNC void lj_state_relimitstack(lua_State *L); | ||
18 | LJ_FUNC void lj_state_shrinkstack(lua_State *L, MSize used); | ||
19 | LJ_FUNCA void lj_state_growstack(lua_State *L, MSize need); | ||
20 | LJ_FUNCA void lj_state_growstack1(lua_State *L); | ||
21 | |||
22 | static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need) | ||
23 | { | ||
24 | if ((MSize)((char *)L->maxstack-(char *)L->top) <= need*(MSize)sizeof(TValue)) | ||
25 | lj_state_growstack(L, need); | ||
26 | } | ||
27 | |||
28 | LJ_FUNC lua_State *lj_state_new(lua_State *L); | ||
29 | LJ_FUNC void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L); | ||
30 | |||
31 | #endif | ||
diff --git a/src/lj_str.c b/src/lj_str.c new file mode 100644 index 00000000..26f91cba --- /dev/null +++ b/src/lj_str.c | |||
@@ -0,0 +1,301 @@ | |||
1 | /* | ||
2 | ** String handling. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** | ||
5 | ** Portions taken verbatim or adapted from the Lua interpreter. | ||
6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h | ||
7 | */ | ||
8 | |||
9 | #include <stdio.h> | ||
10 | |||
11 | #define lj_str_c | ||
12 | #define LUA_CORE | ||
13 | |||
14 | #include "lj_obj.h" | ||
15 | #include "lj_gc.h" | ||
16 | #include "lj_err.h" | ||
17 | #include "lj_str.h" | ||
18 | #include "lj_state.h" | ||
19 | #include "lj_ctype.h" | ||
20 | |||
21 | /* -- String interning ---------------------------------------------------- */ | ||
22 | |||
23 | /* Ordered compare of strings. Assumes string data is 4-byte aligned. */ | ||
24 | int32_t lj_str_cmp(GCstr *a, GCstr *b) | ||
25 | { | ||
26 | MSize i, n = a->len > b->len ? b->len : a->len; | ||
27 | for (i = 0; i < n; i += 4) { | ||
28 | /* Note: innocuous access up to end of string + 3. */ | ||
29 | uint32_t va = *(const uint32_t *)(strdata(a)+i); | ||
30 | uint32_t vb = *(const uint32_t *)(strdata(b)+i); | ||
31 | if (va != vb) { | ||
32 | #if LJ_ARCH_ENDIAN == LUAJIT_LE | ||
33 | va = lj_bswap(va); vb = lj_bswap(vb); | ||
34 | #endif | ||
35 | i -= n; | ||
36 | if ((int32_t)i >= -3) { | ||
37 | va >>= 32+(i<<3); vb >>= 32+(i<<3); | ||
38 | if (va == vb) break; | ||
39 | } | ||
40 | return (int32_t)(va - vb); | ||
41 | } | ||
42 | } | ||
43 | return (int32_t)(a->len - b->len); | ||
44 | } | ||
45 | |||
46 | /* Resize the string hash table (grow and shrink). */ | ||
47 | void lj_str_resize(lua_State *L, MSize newmask) | ||
48 | { | ||
49 | global_State *g = G(L); | ||
50 | GCRef *newhash; | ||
51 | MSize i; | ||
52 | if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1) | ||
53 | return; /* No resizing during GC traversal or if already too big. */ | ||
54 | newhash = lj_mem_newvec(L, newmask+1, GCRef); | ||
55 | memset(newhash, 0, (newmask+1)*sizeof(GCRef)); | ||
56 | for (i = g->strmask; i != ~(MSize)0; i--) { /* Rehash old table. */ | ||
57 | GCobj *p = gcref(g->strhash[i]); | ||
58 | while (p) { /* Follow each hash chain and reinsert all strings. */ | ||
59 | MSize h = gco2str(p)->hash & newmask; | ||
60 | GCobj *next = gcnext(p); | ||
61 | /* NOBARRIER: The string table is a GC root. */ | ||
62 | setgcrefr(p->gch.nextgc, newhash[h]); | ||
63 | setgcref(newhash[h], p); | ||
64 | p = next; | ||
65 | } | ||
66 | } | ||
67 | lj_mem_freevec(g, g->strhash, g->strmask+1, GCstr *); | ||
68 | g->strmask = newmask; | ||
69 | g->strhash = newhash; | ||
70 | } | ||
71 | |||
72 | /* Intern a string and return string object. */ | ||
73 | GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) | ||
74 | { | ||
75 | global_State *g; | ||
76 | GCstr *s; | ||
77 | GCobj *o; | ||
78 | MSize len = (MSize)lenx; | ||
79 | MSize h = len; | ||
80 | MSize step = (len>>5)+1; /* Partial hash. */ | ||
81 | MSize l1; | ||
82 | if (lenx >= LJ_MAX_STR) | ||
83 | lj_err_msg(L, LJ_ERR_STROV); | ||
84 | for (l1 = len; l1 >= step; l1 -= step) /* Compute string hash. */ | ||
85 | h = h ^ ((h<<5)+(h>>2)+cast(unsigned char, str[l1-1])); | ||
86 | /* Check if the string has already been interned. */ | ||
87 | g = G(L); | ||
88 | for (o = gcref(g->strhash[h & g->strmask]); o != NULL; o = gcnext(o)) { | ||
89 | GCstr *tso = gco2str(o); | ||
90 | if (tso->len == len && (memcmp(str, strdata(tso), len) == 0)) { | ||
91 | if (isdead(g, o)) flipwhite(o); /* Resurrect if dead. */ | ||
92 | return tso; /* Return existing string. */ | ||
93 | } | ||
94 | } | ||
95 | /* Nope, create a new string. */ | ||
96 | s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr); | ||
97 | newwhite(g, s); | ||
98 | s->gct = ~LJ_TSTR; | ||
99 | s->len = len; | ||
100 | s->hash = h; | ||
101 | s->reserved = 0; | ||
102 | memcpy(strdatawr(s), str, len); | ||
103 | strdatawr(s)[len] = '\0'; /* Zero-terminate string. */ | ||
104 | /* Add it to string hash table. */ | ||
105 | h &= g->strmask; | ||
106 | s->nextgc = g->strhash[h]; | ||
107 | /* NOBARRIER: The string table is a GC root. */ | ||
108 | setgcref(g->strhash[h], obj2gco(s)); | ||
109 | if (g->strnum++ > g->strmask) /* Allow a 100% load factor. */ | ||
110 | lj_str_resize(L, (g->strmask<<1)+1); /* Grow string table. */ | ||
111 | return s; /* Return newly interned string. */ | ||
112 | } | ||
113 | |||
114 | void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s) | ||
115 | { | ||
116 | g->strnum--; | ||
117 | lj_mem_free(g, s, sizestring(s)); | ||
118 | } | ||
119 | |||
120 | /* -- Type conversions ---------------------------------------------------- */ | ||
121 | |||
122 | /* Convert string to number. */ | ||
123 | int lj_str_numconv(const char *s, TValue *n) | ||
124 | { | ||
125 | lua_Number sign = 1; | ||
126 | const uint8_t *p = (const uint8_t *)s; | ||
127 | while (lj_ctype_isspace(*p)) p++; | ||
128 | if (*p == '-') { p++; sign = -1; } else if (*p == '+') { p++; } | ||
129 | if ((uint32_t)(*p - '0') < 10) { | ||
130 | uint32_t k = (uint32_t)(*p++ - '0'); | ||
131 | if (k == 0 && ((*p & ~0x20) == 'X')) { | ||
132 | p++; | ||
133 | while (lj_ctype_isxdigit(*p)) { | ||
134 | if (k >= 0x10000000) goto parsedbl; | ||
135 | k = (k << 4) + (*p & 15u); | ||
136 | if (!lj_ctype_isdigit(*p)) k += 9; | ||
137 | p++; | ||
138 | } | ||
139 | } else { | ||
140 | while ((uint32_t)(*p - '0') < 10) { | ||
141 | if (k >= 0x19999999) goto parsedbl; | ||
142 | k = k * 10u + (uint32_t)(*p++ - '0'); | ||
143 | } | ||
144 | } | ||
145 | while (LJ_UNLIKELY(lj_ctype_isspace(*p))) p++; | ||
146 | if (LJ_LIKELY(*p == '\0')) { | ||
147 | setnumV(n, sign * cast_num(k)); | ||
148 | return 1; | ||
149 | } | ||
150 | } | ||
151 | parsedbl: | ||
152 | { | ||
153 | TValue tv; | ||
154 | char *endptr; | ||
155 | setnumV(&tv, lua_str2number(s, &endptr)); | ||
156 | if (endptr == s) return 0; /* conversion failed */ | ||
157 | if (LJ_UNLIKELY(*endptr != '\0')) { | ||
158 | while (lj_ctype_isspace((uint8_t)*endptr)) endptr++; | ||
159 | if (*endptr != '\0') return 0; /* invalid trailing characters? */ | ||
160 | } | ||
161 | if (LJ_LIKELY(!tvisnan(&tv))) | ||
162 | setnumV(n, numV(&tv)); | ||
163 | else | ||
164 | setnanV(n); /* Canonicalize injected NaNs. */ | ||
165 | return 1; | ||
166 | } | ||
167 | } | ||
168 | |||
169 | /* Convert number to string. */ | ||
170 | GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np) | ||
171 | { | ||
172 | char s[LUAI_MAXNUMBER2STR]; | ||
173 | lua_Number n = *np; | ||
174 | size_t len = (size_t)lua_number2str(s, n); | ||
175 | return lj_str_new(L, s, len); | ||
176 | } | ||
177 | |||
178 | /* Convert integer to string. */ | ||
179 | GCstr *lj_str_fromint(lua_State *L, int32_t k) | ||
180 | { | ||
181 | char s[1+10]; | ||
182 | char *p = s+sizeof(s); | ||
183 | uint32_t i = (uint32_t)(k < 0 ? -k : k); | ||
184 | do { *--p = (char)('0' + i % 10); } while (i /= 10); | ||
185 | if (k < 0) *--p = '-'; | ||
186 | return lj_str_new(L, p, (size_t)(s+sizeof(s)-p)); | ||
187 | } | ||
188 | |||
189 | /* -- String formatting --------------------------------------------------- */ | ||
190 | |||
191 | static void addstr(lua_State *L, SBuf *sb, const char *str, MSize len) | ||
192 | { | ||
193 | char *p; | ||
194 | MSize i; | ||
195 | if (sb->n + len > sb->sz) { | ||
196 | MSize sz = sb->sz * 2; | ||
197 | while (sb->n + len > sz) sz = sz * 2; | ||
198 | lj_str_resizebuf(L, sb, sz); | ||
199 | } | ||
200 | p = sb->buf + sb->n; | ||
201 | sb->n += len; | ||
202 | for (i = 0; i < len; i++) p[i] = str[i]; | ||
203 | } | ||
204 | |||
205 | static void addchar(lua_State *L, SBuf *sb, int c) | ||
206 | { | ||
207 | if (sb->n + 1 > sb->sz) { | ||
208 | MSize sz = sb->sz * 2; | ||
209 | lj_str_resizebuf(L, sb, sz); | ||
210 | } | ||
211 | sb->buf[sb->n++] = cast(char, c); | ||
212 | } | ||
213 | |||
214 | /* Push formatted message as a string object to Lua stack. va_list variant. */ | ||
215 | const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp) | ||
216 | { | ||
217 | SBuf *sb = &G(L)->tmpbuf; | ||
218 | lj_str_needbuf(L, sb, (MSize)strlen(fmt)); | ||
219 | lj_str_resetbuf(sb); | ||
220 | for (;;) { | ||
221 | const char *e = strchr(fmt, '%'); | ||
222 | if (e == NULL) break; | ||
223 | addstr(L, sb, fmt, (MSize)(e-fmt)); | ||
224 | /* This function only handles %s, %c, %d, %f and %p formats. */ | ||
225 | switch (e[1]) { | ||
226 | case 's': { | ||
227 | const char *s = va_arg(argp, char *); | ||
228 | if (s == NULL) s = "(null)"; | ||
229 | addstr(L, sb, s, (MSize)strlen(s)); | ||
230 | break; | ||
231 | } | ||
232 | case 'c': | ||
233 | addchar(L, sb, va_arg(argp, int)); | ||
234 | break; | ||
235 | case 'd': { | ||
236 | char buff[1+10]; | ||
237 | char *p = buff+sizeof(buff); | ||
238 | int32_t k = va_arg(argp, int32_t); | ||
239 | uint32_t i = (uint32_t)(k < 0 ? -k : k); | ||
240 | do { *--p = (char)('0' + i % 10); } while (i /= 10); | ||
241 | if (k < 0) *--p = '-'; | ||
242 | addstr(L, sb, p, (MSize)(buff+sizeof(buff)-p)); | ||
243 | break; | ||
244 | } | ||
245 | case 'f': { | ||
246 | char buff[LUAI_MAXNUMBER2STR]; | ||
247 | lua_Number n = cast_num(va_arg(argp, LUAI_UACNUMBER)); | ||
248 | MSize len = (MSize)lua_number2str(buff, n); | ||
249 | addstr(L, sb, buff, len); | ||
250 | break; | ||
251 | } | ||
252 | case 'p': { | ||
253 | #define FMTP_CHARS (2*sizeof(ptrdiff_t)) | ||
254 | char buff[2+FMTP_CHARS]; | ||
255 | ptrdiff_t p = (ptrdiff_t)(va_arg(argp, void *)); | ||
256 | int i; | ||
257 | buff[0] = '0'; | ||
258 | buff[1] = 'x'; | ||
259 | for (i = 2+FMTP_CHARS-1; i >= 2; i--, p >>= 4) | ||
260 | buff[i] = "0123456789abcdef"[(p & 15)]; | ||
261 | addstr(L, sb, buff, 2+FMTP_CHARS); | ||
262 | break; | ||
263 | } | ||
264 | case '%': | ||
265 | addchar(L, sb, '%'); | ||
266 | break; | ||
267 | default: | ||
268 | addchar(L, sb, '%'); | ||
269 | addchar(L, sb, e[1]); | ||
270 | break; | ||
271 | } | ||
272 | fmt = e+2; | ||
273 | } | ||
274 | addstr(L, sb, fmt, (MSize)strlen(fmt)); | ||
275 | setstrV(L, L->top, lj_str_new(L, sb->buf, sb->n)); | ||
276 | incr_top(L); | ||
277 | return strVdata(L->top - 1); | ||
278 | } | ||
279 | |||
280 | /* Push formatted message as a string object to Lua stack. Vararg variant. */ | ||
281 | const char *lj_str_pushf(lua_State *L, const char *fmt, ...) | ||
282 | { | ||
283 | const char *msg; | ||
284 | va_list argp; | ||
285 | va_start(argp, fmt); | ||
286 | msg = lj_str_pushvf(L, fmt, argp); | ||
287 | va_end(argp); | ||
288 | return msg; | ||
289 | } | ||
290 | |||
291 | /* -- Buffer handling ----------------------------------------------------- */ | ||
292 | |||
293 | char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz) | ||
294 | { | ||
295 | if (sz > sb->sz) { | ||
296 | if (sz < LJ_MIN_SBUF) sz = LJ_MIN_SBUF; | ||
297 | lj_str_resizebuf(L, sb, sz); | ||
298 | } | ||
299 | return sb->buf; | ||
300 | } | ||
301 | |||
diff --git a/src/lj_str.h b/src/lj_str.h new file mode 100644 index 00000000..f7e56d16 --- /dev/null +++ b/src/lj_str.h | |||
@@ -0,0 +1,45 @@ | |||
1 | /* | ||
2 | ** String handling. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_STR_H | ||
7 | #define _LJ_STR_H | ||
8 | |||
9 | #include <stdarg.h> | ||
10 | |||
11 | #include "lj_obj.h" | ||
12 | |||
13 | /* String interning. */ | ||
14 | LJ_FUNCA int32_t lj_str_cmp(GCstr *a, GCstr *b); | ||
15 | LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask); | ||
16 | LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len); | ||
17 | LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); | ||
18 | |||
19 | #define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s))) | ||
20 | #define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1)) | ||
21 | |||
22 | /* Type conversions. */ | ||
23 | LJ_FUNCA int lj_str_numconv(const char *s, TValue *n); | ||
24 | LJ_FUNCA GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np); | ||
25 | LJ_FUNCA GCstr *lj_str_fromint(lua_State *L, int32_t k); | ||
26 | |||
27 | /* String formatting. */ | ||
28 | LJ_FUNC const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp); | ||
29 | LJ_FUNC const char *lj_str_pushf(lua_State *L, const char *fmt, ...) | ||
30 | #if defined(__GNUC__) | ||
31 | __attribute__ ((format (printf, 2, 3))) | ||
32 | #endif | ||
33 | ; | ||
34 | |||
35 | /* Resizable string buffers. Struct definition in lj_obj.h. */ | ||
36 | LJ_FUNC char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz); | ||
37 | |||
38 | #define lj_str_initbuf(L, sb) ((sb)->buf = NULL, (sb)->sz = 0) | ||
39 | #define lj_str_resetbuf(sb) ((sb)->n = 0) | ||
40 | #define lj_str_resizebuf(L, sb, size) \ | ||
41 | ((sb)->buf = (char *)lj_mem_realloc(L, (sb)->buf, (sb)->sz, (size)), \ | ||
42 | (sb)->sz = (size)) | ||
43 | #define lj_str_freebuf(g, sb) lj_mem_free(g, (void *)(sb)->buf, (sb)->sz) | ||
44 | |||
45 | #endif | ||
diff --git a/src/lj_tab.c b/src/lj_tab.c new file mode 100644 index 00000000..633ea20c --- /dev/null +++ b/src/lj_tab.c | |||
@@ -0,0 +1,618 @@ | |||
1 | /* | ||
2 | ** Table handling. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** | ||
5 | ** Major portions taken verbatim or adapted from the Lua interpreter. | ||
6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h | ||
7 | */ | ||
8 | |||
9 | #define lj_tab_c | ||
10 | #define LUA_CORE | ||
11 | |||
12 | #include "lj_obj.h" | ||
13 | #include "lj_gc.h" | ||
14 | #include "lj_err.h" | ||
15 | #include "lj_tab.h" | ||
16 | |||
17 | /* -- Object hashing ------------------------------------------------------ */ | ||
18 | |||
19 | /* Hash values are masked with the table hash mask and used as an index. */ | ||
20 | #define hashmask(t, x) (&noderef(t->node)[(x) & t->hmask]) | ||
21 | |||
22 | /* String hashes are precomputed when they are interned. */ | ||
23 | #define hashstr(t, s) hashmask(t, (s)->hash) | ||
24 | |||
25 | #define hashnum(t, o) hashrot(t, (o)->u32.lo, (o)->u32.hi&0x7fffffff) | ||
26 | #define hashgcref(t, r) hashrot(t, gcrefu(r), gcrefu(r)-0x04c11db7) | ||
27 | |||
28 | /* Scramble the bits of numbers and pointers. */ | ||
29 | static LJ_AINLINE Node *hashrot(const GCtab *t, uint32_t lo, uint32_t hi) | ||
30 | { | ||
31 | lo ^= hi; hi = lj_rol(hi, 14); | ||
32 | lo -= hi; hi = lj_rol(hi, 5); | ||
33 | hi ^= lo; hi -= lj_rol(lo, 27); | ||
34 | return hashmask(t, hi); | ||
35 | } | ||
36 | |||
37 | /* Hash an arbitrary key and return its anchor position in the hash table. */ | ||
38 | static Node *hashkey(const GCtab *t, cTValue *key) | ||
39 | { | ||
40 | if (tvisstr(key)) | ||
41 | return hashstr(t, strV(key)); | ||
42 | else if (tvisnum(key)) | ||
43 | return hashnum(t, key); | ||
44 | else if (tvisbool(key)) | ||
45 | return hashmask(t, boolV(key)); | ||
46 | else | ||
47 | return hashgcref(t, key->gcr); | ||
48 | /* Only hash 32 bits of lightuserdata on a 64 bit CPU. Good enough? */ | ||
49 | } | ||
50 | |||
51 | /* -- Table creation and destruction -------------------------------------- */ | ||
52 | |||
53 | /* Create new hash part for table. */ | ||
54 | static LJ_AINLINE void newhpart(lua_State *L, GCtab *t, uint32_t hbits) | ||
55 | { | ||
56 | uint32_t hsize; | ||
57 | Node *node; | ||
58 | lua_assert(hbits != 0); | ||
59 | if (hbits > LJ_MAX_HBITS) | ||
60 | lj_err_msg(L, LJ_ERR_TABOV); | ||
61 | hsize = 1u << hbits; | ||
62 | node = lj_mem_newvec(L, hsize, Node); | ||
63 | setmref(t->node, node); | ||
64 | t->hmask = hsize-1; | ||
65 | setmref(t->lastfree, &node[hsize]); | ||
66 | } | ||
67 | |||
68 | /* | ||
69 | ** Q: Why all of these copies of t->hmask, t->node etc. to local variables? | ||
70 | ** A: Because alias analysis for C is _really_ tough. | ||
71 | ** Even state-of-the-art C compilers won't produce good code without this. | ||
72 | */ | ||
73 | |||
74 | /* Clear hash part of table. */ | ||
75 | static LJ_AINLINE void clearhpart(GCtab *t) | ||
76 | { | ||
77 | uint32_t i, hmask = t->hmask; | ||
78 | Node *node = noderef(t->node); | ||
79 | lua_assert(t->hmask != 0); | ||
80 | for (i = 0; i <= hmask; i++) { | ||
81 | Node *n = &node[i]; | ||
82 | setmref(n->next, NULL); | ||
83 | setnilV(&n->key); | ||
84 | setnilV(&n->val); | ||
85 | } | ||
86 | } | ||
87 | |||
88 | /* Clear array part of table. */ | ||
89 | static LJ_AINLINE void clearapart(GCtab *t) | ||
90 | { | ||
91 | uint32_t i, asize = t->asize; | ||
92 | TValue *array = tvref(t->array); | ||
93 | for (i = 0; i < asize; i++) | ||
94 | setnilV(&array[i]); | ||
95 | } | ||
96 | |||
97 | /* Create a new table. Note: the slots are not initialized (yet). */ | ||
98 | static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits) | ||
99 | { | ||
100 | GCtab *t; | ||
101 | global_State *g; | ||
102 | /* First try to colocate the array part. */ | ||
103 | if (LJ_MAX_COLOSIZE && asize > 0 && asize <= LJ_MAX_COLOSIZE) { | ||
104 | /* This is ugly. (sizeof(GCtab)&7) != 0. So prepend the colocated array. */ | ||
105 | TValue *array = lj_mem_newt(L, sizetabcolo(asize), TValue); | ||
106 | t = cast(GCtab *, array + asize); | ||
107 | g = G(L); | ||
108 | setgcrefr(t->nextgc, g->gc.root); | ||
109 | setgcref(g->gc.root, obj2gco(t)); | ||
110 | newwhite(g, t); | ||
111 | t->gct = ~LJ_TTAB; | ||
112 | t->nomm = cast_byte(~0); | ||
113 | t->colo = (int8_t)asize; | ||
114 | setmref(t->array, array); | ||
115 | setgcrefnull(t->metatable); | ||
116 | t->asize = asize; | ||
117 | t->hmask = 0; | ||
118 | setmref(t->node, &g->nilnode); | ||
119 | setmref(t->lastfree, &g->nilnode); | ||
120 | } else { /* Otherwise separately allocate the array part. */ | ||
121 | t = lj_mem_newobj(L, GCtab); | ||
122 | t->gct = ~LJ_TTAB; | ||
123 | t->nomm = cast_byte(~0); | ||
124 | t->colo = 0; | ||
125 | setmref(t->array, NULL); | ||
126 | setgcrefnull(t->metatable); | ||
127 | t->asize = 0; /* In case the array allocation fails. */ | ||
128 | t->hmask = 0; | ||
129 | g = G(L); | ||
130 | setmref(t->node, &g->nilnode); | ||
131 | setmref(t->lastfree, &g->nilnode); | ||
132 | if (asize > 0) { | ||
133 | if (asize > LJ_MAX_ASIZE) | ||
134 | lj_err_msg(L, LJ_ERR_TABOV); | ||
135 | setmref(t->array, lj_mem_newvec(L, asize, TValue)); | ||
136 | t->asize = asize; | ||
137 | } | ||
138 | } | ||
139 | if (hbits) | ||
140 | newhpart(L, t, hbits); | ||
141 | return t; | ||
142 | } | ||
143 | |||
144 | /* Create a new table. | ||
145 | ** | ||
146 | ** IMPORTANT NOTE: The API differs from lua_createtable()! | ||
147 | ** | ||
148 | ** The array size is non-inclusive. E.g. asize=128 creates array slots | ||
149 | ** for 0..127, but not for 128. If you need slots 1..128, pass asize=129 | ||
150 | ** (slot 0 is wasted in this case). | ||
151 | ** | ||
152 | ** The hash size is given in hash bits. hbits=0 means no hash part. | ||
153 | ** hbits=1 creates 2 hash slots, hbits=2 creates 4 hash slots and so on. | ||
154 | */ | ||
155 | GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits) | ||
156 | { | ||
157 | GCtab *t = newtab(L, asize, hbits); | ||
158 | clearapart(t); | ||
159 | if (t->hmask > 0) clearhpart(t); | ||
160 | return t; | ||
161 | } | ||
162 | |||
163 | /* Duplicate a table. */ | ||
164 | GCtab *lj_tab_dup(lua_State *L, const GCtab *kt) | ||
165 | { | ||
166 | GCtab *t; | ||
167 | uint32_t asize, hmask; | ||
168 | t = newtab(L, kt->asize, kt->hmask > 0 ? lj_fls(kt->hmask)+1 : 0); | ||
169 | lua_assert(kt->asize == t->asize && kt->hmask == t->hmask); | ||
170 | t->nomm = 0; /* Keys with metamethod names may be present. */ | ||
171 | asize = kt->asize; | ||
172 | if (asize > 0) { | ||
173 | TValue *array = tvref(t->array); | ||
174 | TValue *karray = tvref(kt->array); | ||
175 | if (asize < 64) { /* An inlined loop beats memcpy for < 512 bytes. */ | ||
176 | uint32_t i; | ||
177 | for (i = 0; i < asize; i++) | ||
178 | copyTV(L, &array[i], &karray[i]); | ||
179 | } else { | ||
180 | memcpy(array, karray, asize*sizeof(TValue)); | ||
181 | } | ||
182 | } | ||
183 | hmask = kt->hmask; | ||
184 | if (hmask > 0) { | ||
185 | uint32_t i; | ||
186 | Node *node = noderef(t->node); | ||
187 | Node *knode = noderef(kt->node); | ||
188 | ptrdiff_t d = (char *)node - (char *)knode; | ||
189 | setmref(t->lastfree, (Node *)((char *)noderef(kt->lastfree) + d)); | ||
190 | for (i = 0; i <= hmask; i++) { | ||
191 | Node *kn = &knode[i]; | ||
192 | Node *n = &node[i]; | ||
193 | Node *next = nextnode(kn); | ||
194 | copyTV(L, &n->val, &kn->val); | ||
195 | copyTV(L, &n->key, &kn->key); | ||
196 | setmref(n->next, next == NULL? next : (Node *)((char *)next + d)); | ||
197 | } | ||
198 | } | ||
199 | return t; | ||
200 | } | ||
201 | |||
202 | /* Free a table. */ | ||
203 | void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t) | ||
204 | { | ||
205 | if (t->hmask > 0) | ||
206 | lj_mem_freevec(g, noderef(t->node), t->hmask+1, Node); | ||
207 | if (LJ_MAX_COLOSIZE && t->colo) { | ||
208 | ptrdiff_t n; | ||
209 | if (t->colo < 0 && t->asize > 0) /* Array part was separated. */ | ||
210 | lj_mem_freevec(g, tvref(t->array), t->asize, TValue); | ||
211 | n = t->colo & 0x7f; | ||
212 | lj_mem_free(g, (TValue *)t - n, sizetabcolo((uint32_t)n)); | ||
213 | } else { | ||
214 | if (t->asize > 0) | ||
215 | lj_mem_freevec(g, tvref(t->array), t->asize, TValue); | ||
216 | lj_mem_freet(g, t); | ||
217 | } | ||
218 | } | ||
219 | |||
220 | /* -- Table resizing ------------------------------------------------------ */ | ||
221 | |||
222 | /* Resize a table to fit the new array/hash part sizes. */ | ||
223 | static void resizetab(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits) | ||
224 | { | ||
225 | Node *oldnode = noderef(t->node); | ||
226 | uint32_t oldasize = t->asize; | ||
227 | uint32_t oldhmask = t->hmask; | ||
228 | if (asize > oldasize) { /* Array part grows? */ | ||
229 | TValue *array; | ||
230 | uint32_t i; | ||
231 | if (asize > LJ_MAX_ASIZE) | ||
232 | lj_err_msg(L, LJ_ERR_TABOV); | ||
233 | if (LJ_MAX_COLOSIZE && t->colo > 0) { | ||
234 | /* A colocated array must be separated and copied. */ | ||
235 | TValue *oarray = tvref(t->array); | ||
236 | array = lj_mem_newvec(L, asize, TValue); | ||
237 | t->colo = (int8_t)(t->colo | 0x80); /* Mark as separated (colo < 0). */ | ||
238 | for (i = 0; i < oldasize; i++) | ||
239 | copyTV(L, &array[i], &oarray[i]); | ||
240 | } else { | ||
241 | array = (TValue *)lj_mem_realloc(L, tvref(t->array), | ||
242 | oldasize*sizeof(TValue), asize*sizeof(TValue)); | ||
243 | } | ||
244 | setmref(t->array, array); | ||
245 | t->asize = asize; | ||
246 | for (i = oldasize; i < asize; i++) /* Clear newly allocated slots. */ | ||
247 | setnilV(&array[i]); | ||
248 | } | ||
249 | /* Create new (empty) hash part. */ | ||
250 | if (hbits) { | ||
251 | newhpart(L, t, hbits); | ||
252 | clearhpart(t); | ||
253 | } else { | ||
254 | global_State *g = G(L); | ||
255 | setmref(t->node, &g->nilnode); | ||
256 | setmref(t->lastfree, &g->nilnode); | ||
257 | t->hmask = 0; | ||
258 | } | ||
259 | if (asize < oldasize) { /* Array part shrinks? */ | ||
260 | TValue *array = tvref(t->array); | ||
261 | uint32_t i; | ||
262 | t->asize = asize; /* Note: This 'shrinks' even colocated arrays. */ | ||
263 | for (i = asize; i < oldasize; i++) /* Reinsert old array values. */ | ||
264 | if (!tvisnil(&array[i])) | ||
265 | copyTV(L, lj_tab_setinth(L, t, (int32_t)i), &array[i]); | ||
266 | /* Physically shrink only separated arrays. */ | ||
267 | if (LJ_MAX_COLOSIZE && t->colo <= 0) | ||
268 | setmref(t->array, lj_mem_realloc(L, array, | ||
269 | oldasize*sizeof(TValue), asize*sizeof(TValue))); | ||
270 | } | ||
271 | if (oldhmask > 0) { /* Reinsert pairs from old hash part. */ | ||
272 | global_State *g; | ||
273 | uint32_t i; | ||
274 | for (i = 0; i <= oldhmask; i++) { | ||
275 | Node *n = &oldnode[i]; | ||
276 | if (!tvisnil(&n->val)) | ||
277 | copyTV(L, lj_tab_set(L, t, &n->key), &n->val); | ||
278 | } | ||
279 | g = G(L); | ||
280 | lj_mem_freevec(g, oldnode, oldhmask+1, Node); | ||
281 | } | ||
282 | } | ||
283 | |||
284 | static uint32_t countint(cTValue *key, uint32_t *bins) | ||
285 | { | ||
286 | if (tvisnum(key)) { | ||
287 | lua_Number nk = numV(key); | ||
288 | int32_t k = lj_num2int(nk); | ||
289 | if ((uint32_t)k < LJ_MAX_ASIZE && nk == cast_num(k)) { | ||
290 | bins[(k > 2 ? lj_fls((uint32_t)(k-1)) : 0)]++; | ||
291 | return 1; | ||
292 | } | ||
293 | } | ||
294 | return 0; | ||
295 | } | ||
296 | |||
297 | static uint32_t countarray(const GCtab *t, uint32_t *bins) | ||
298 | { | ||
299 | uint32_t na, b, i; | ||
300 | if (t->asize == 0) return 0; | ||
301 | for (na = i = b = 0; b < LJ_MAX_ABITS; b++) { | ||
302 | uint32_t n, top = 2u << b; | ||
303 | TValue *array; | ||
304 | if (top >= t->asize) { | ||
305 | top = t->asize-1; | ||
306 | if (i > top) | ||
307 | break; | ||
308 | } | ||
309 | array = tvref(t->array); | ||
310 | for (n = 0; i <= top; i++) | ||
311 | if (!tvisnil(&array[i])) | ||
312 | n++; | ||
313 | bins[b] += n; | ||
314 | na += n; | ||
315 | } | ||
316 | return na; | ||
317 | } | ||
318 | |||
319 | static uint32_t counthash(const GCtab *t, uint32_t *bins, uint32_t *narray) | ||
320 | { | ||
321 | uint32_t total, na, i, hmask = t->hmask; | ||
322 | Node *node = noderef(t->node); | ||
323 | for (total = na = 0, i = 0; i <= hmask; i++) { | ||
324 | Node *n = &node[i]; | ||
325 | if (!tvisnil(&n->val)) { | ||
326 | na += countint(&n->key, bins); | ||
327 | total++; | ||
328 | } | ||
329 | } | ||
330 | *narray += na; | ||
331 | return total; | ||
332 | } | ||
333 | |||
334 | static uint32_t bestasize(uint32_t bins[], uint32_t *narray) | ||
335 | { | ||
336 | uint32_t b, sum, na = 0, sz = 0, nn = *narray; | ||
337 | for (b = 0, sum = 0; (1u<<b) <= nn && sum != nn; b++) | ||
338 | if (bins[b] > 0 && (sum += bins[b]) >= (1u<<b)) { | ||
339 | sz = (2u<<b)+1; | ||
340 | na = sum; | ||
341 | } | ||
342 | *narray = sz; | ||
343 | return na; | ||
344 | } | ||
345 | |||
346 | static void rehashtab(lua_State *L, GCtab *t, cTValue *ek) | ||
347 | { | ||
348 | uint32_t bins[LJ_MAX_ABITS]; | ||
349 | uint32_t total, asize, na, i; | ||
350 | for (i = 0; i < LJ_MAX_ABITS; i++) bins[i] = 0; | ||
351 | asize = countarray(t, bins); | ||
352 | total = 1 + asize + counthash(t, bins, &asize); | ||
353 | asize += countint(ek, bins); | ||
354 | na = bestasize(bins, &asize); | ||
355 | total -= na; | ||
356 | resizetab(L, t, asize, hsize2hbits(total)); | ||
357 | } | ||
358 | |||
359 | void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize) | ||
360 | { | ||
361 | resizetab(L, t, nasize+1, t->hmask > 0 ? lj_fls(t->hmask)+1 : 0); | ||
362 | } | ||
363 | |||
364 | /* -- Table getters ------------------------------------------------------- */ | ||
365 | |||
366 | cTValue *lj_tab_getinth(GCtab *t, int32_t key) | ||
367 | { | ||
368 | TValue k; | ||
369 | Node *n; | ||
370 | k.n = cast_num(key); | ||
371 | n = hashnum(t, &k); | ||
372 | do { | ||
373 | if (tvisnum(&n->key) && n->key.n == k.n) | ||
374 | return &n->val; | ||
375 | } while ((n = nextnode(n))); | ||
376 | return NULL; | ||
377 | } | ||
378 | |||
379 | cTValue *lj_tab_getstr(GCtab *t, GCstr *key) | ||
380 | { | ||
381 | Node *n = hashstr(t, key); | ||
382 | do { | ||
383 | if (tvisstr(&n->key) && strV(&n->key) == key) | ||
384 | return &n->val; | ||
385 | } while ((n = nextnode(n))); | ||
386 | return NULL; | ||
387 | } | ||
388 | |||
389 | cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key) | ||
390 | { | ||
391 | if (tvisstr(key)) { | ||
392 | cTValue *tv = lj_tab_getstr(t, strV(key)); | ||
393 | if (tv) | ||
394 | return tv; | ||
395 | } else if (tvisnum(key)) { | ||
396 | lua_Number nk = numV(key); | ||
397 | int32_t k = lj_num2int(nk); | ||
398 | if (nk == cast_num(k)) { | ||
399 | cTValue *tv = lj_tab_getint(t, k); | ||
400 | if (tv) | ||
401 | return tv; | ||
402 | } else { | ||
403 | goto genlookup; /* Else use the generic lookup. */ | ||
404 | } | ||
405 | } else if (!tvisnil(key)) { | ||
406 | Node *n; | ||
407 | genlookup: | ||
408 | n = hashkey(t, key); | ||
409 | do { | ||
410 | if (lj_obj_equal(&n->key, key)) | ||
411 | return &n->val; | ||
412 | } while ((n = nextnode(n))); | ||
413 | } | ||
414 | return niltv(L); | ||
415 | } | ||
416 | |||
417 | /* -- Table setters ------------------------------------------------------- */ | ||
418 | |||
419 | static Node *getfreepos(GCtab *t) | ||
420 | { | ||
421 | Node *node = noderef(t->node); | ||
422 | Node *lastfree = noderef(t->lastfree); | ||
423 | while (lastfree > node) { | ||
424 | lastfree--; | ||
425 | setmref(t->lastfree, lastfree); | ||
426 | if (tvisnil(&lastfree->key)) | ||
427 | return lastfree; | ||
428 | } | ||
429 | return NULL; /* could not find a free place */ | ||
430 | } | ||
431 | |||
432 | /* | ||
433 | ** inserts a new key into a hash table; first, check whether key's main | ||
434 | ** position is free. If not, check whether colliding node is in its main | ||
435 | ** position or not: if it is not, move colliding node to an empty place and | ||
436 | ** put new key in its main position; otherwise (colliding node is in its main | ||
437 | ** position), new key goes to an empty position. | ||
438 | */ | ||
439 | TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) | ||
440 | { | ||
441 | Node *mp = hashkey(t, key); | ||
442 | if (!tvisnil(&mp->val) || t->hmask == 0) { | ||
443 | Node *othern; | ||
444 | Node *n = getfreepos(t); /* get a free place */ | ||
445 | if (n == NULL) { /* cannot find a free place? */ | ||
446 | rehashtab(L, t, key); /* grow table */ | ||
447 | return lj_tab_set(L, t, key); /* re-insert key into grown table */ | ||
448 | } | ||
449 | lua_assert(n != &G(L)->nilnode); | ||
450 | othern = hashkey(t, &mp->key); | ||
451 | if (othern != mp) { /* is colliding node out of its main position? */ | ||
452 | /* yes; move colliding node into free position */ | ||
453 | while (noderef(othern->next) != mp) | ||
454 | othern = nextnode(othern); /* find previous */ | ||
455 | setmref(othern->next, n); /* redo the chain with `n' in place of `mp' */ | ||
456 | *n = *mp; /* copy colliding node into free pos. (mp->next also goes) */ | ||
457 | setmref(mp->next, NULL); /* now `mp' is free */ | ||
458 | setnilV(&mp->val); | ||
459 | } else { /* colliding node is in its own main position */ | ||
460 | /* new node will go into free position */ | ||
461 | setmrefr(n->next, mp->next); /* chain new position */ | ||
462 | setmref(mp->next, n); | ||
463 | mp = n; | ||
464 | } | ||
465 | } | ||
466 | mp->key.u64 = key->u64; | ||
467 | if (LJ_UNLIKELY(tvismzero(&mp->key))) | ||
468 | mp->key.u64 = 0; | ||
469 | lj_gc_barriert(L, t, key); | ||
470 | lua_assert(tvisnil(&mp->val)); | ||
471 | return &mp->val; | ||
472 | } | ||
473 | |||
474 | TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key) | ||
475 | { | ||
476 | TValue k; | ||
477 | Node *n; | ||
478 | k.n = cast_num(key); | ||
479 | n = hashnum(t, &k); | ||
480 | do { | ||
481 | if (tvisnum(&n->key) && n->key.n == k.n) | ||
482 | return &n->val; | ||
483 | } while ((n = nextnode(n))); | ||
484 | return lj_tab_newkey(L, t, &k); | ||
485 | } | ||
486 | |||
487 | TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key) | ||
488 | { | ||
489 | TValue k; | ||
490 | Node *n = hashstr(t, key); | ||
491 | do { | ||
492 | if (tvisstr(&n->key) && strV(&n->key) == key) | ||
493 | return &n->val; | ||
494 | } while ((n = nextnode(n))); | ||
495 | setstrV(L, &k, key); | ||
496 | return lj_tab_newkey(L, t, &k); | ||
497 | } | ||
498 | |||
499 | TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key) | ||
500 | { | ||
501 | Node *n; | ||
502 | t->nomm = 0; /* Invalidate negative metamethod cache. */ | ||
503 | if (tvisstr(key)) { | ||
504 | return lj_tab_setstr(L, t, strV(key)); | ||
505 | } else if (tvisnum(key)) { | ||
506 | lua_Number nk = numV(key); | ||
507 | int32_t k = lj_num2int(nk); | ||
508 | if (nk == cast_num(k)) | ||
509 | return lj_tab_setint(L, t, k); | ||
510 | if (tvisnan(key)) | ||
511 | lj_err_msg(L, LJ_ERR_NANIDX); | ||
512 | /* Else use the generic lookup. */ | ||
513 | } else if (tvisnil(key)) { | ||
514 | lj_err_msg(L, LJ_ERR_NILIDX); | ||
515 | } | ||
516 | n = hashkey(t, key); | ||
517 | do { | ||
518 | if (lj_obj_equal(&n->key, key)) | ||
519 | return &n->val; | ||
520 | } while ((n = nextnode(n))); | ||
521 | return lj_tab_newkey(L, t, key); | ||
522 | } | ||
523 | |||
524 | /* -- Table traversal ----------------------------------------------------- */ | ||
525 | |||
526 | /* Get the traversal index of a key. */ | ||
527 | static uint32_t keyindex(lua_State *L, GCtab *t, cTValue *key) | ||
528 | { | ||
529 | if (tvisnum(key)) { | ||
530 | lua_Number nk = numV(key); | ||
531 | int32_t k = lj_num2int(nk); | ||
532 | if ((uint32_t)k < t->asize && nk == cast_num(k)) | ||
533 | return (uint32_t)k; /* Array key indexes: [0..t->asize-1] */ | ||
534 | } | ||
535 | if (!tvisnil(key)) { | ||
536 | Node *n = hashkey(t, key); | ||
537 | do { | ||
538 | if (lj_obj_equal(&n->key, key) || | ||
539 | (itype(&n->key) == LJ_TDEADKEY && tvisgcv(key) && | ||
540 | gcV(&n->key) == gcV(key))) | ||
541 | return t->asize + (uint32_t)(n - noderef(t->node)); | ||
542 | /* Hash key indexes: [t->asize..t->asize+t->nmask] */ | ||
543 | } while ((n = nextnode(n))); | ||
544 | lj_err_msg(L, LJ_ERR_NEXTIDX); | ||
545 | return 0; /* unreachable */ | ||
546 | } | ||
547 | return ~0u; /* A nil key starts the traversal. */ | ||
548 | } | ||
549 | |||
550 | /* Advance to the next step in a table traversal. */ | ||
551 | int lj_tab_next(lua_State *L, GCtab *t, TValue *key) | ||
552 | { | ||
553 | uint32_t i = keyindex(L, t, key); /* Find predecessor key index. */ | ||
554 | for (i++; i < t->asize; i++) /* First traverse the array keys. */ | ||
555 | if (!tvisnil(arrayslot(t, i))) { | ||
556 | setintV(key, i); | ||
557 | copyTV(L, key+1, arrayslot(t, i)); | ||
558 | return 1; | ||
559 | } | ||
560 | for (i -= t->asize; i <= t->hmask; i++) { /* Then traverse the hash keys. */ | ||
561 | Node *n = &noderef(t->node)[i]; | ||
562 | if (!tvisnil(&n->val)) { | ||
563 | copyTV(L, key, &n->key); | ||
564 | copyTV(L, key+1, &n->val); | ||
565 | return 1; | ||
566 | } | ||
567 | } | ||
568 | return 0; /* End of traversal. */ | ||
569 | } | ||
570 | |||
571 | /* -- Table length calculation -------------------------------------------- */ | ||
572 | |||
573 | static MSize unbound_search(GCtab *t, MSize j) | ||
574 | { | ||
575 | cTValue *tv; | ||
576 | MSize i = j; /* i is zero or a present index */ | ||
577 | j++; | ||
578 | /* find `i' and `j' such that i is present and j is not */ | ||
579 | while ((tv = lj_tab_getint(t, cast(int32_t, j))) && !tvisnil(tv)) { | ||
580 | i = j; | ||
581 | j *= 2; | ||
582 | if (j > (MSize)(INT_MAX-2)) { /* overflow? */ | ||
583 | /* table was built with bad purposes: resort to linear search */ | ||
584 | i = 1; | ||
585 | while ((tv = lj_tab_getint(t, cast(int32_t, i))) && !tvisnil(tv)) i++; | ||
586 | return i - 1; | ||
587 | } | ||
588 | } | ||
589 | /* now do a binary search between them */ | ||
590 | while (j - i > 1) { | ||
591 | MSize m = (i+j)/2; | ||
592 | cTValue *tvb = lj_tab_getint(t, cast(int32_t, m)); | ||
593 | if (tvb && !tvisnil(tvb)) i = m; else j = m; | ||
594 | } | ||
595 | return i; | ||
596 | } | ||
597 | |||
598 | /* | ||
599 | ** Try to find a boundary in table `t'. A `boundary' is an integer index | ||
600 | ** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil). | ||
601 | */ | ||
602 | MSize lj_tab_len(GCtab *t) | ||
603 | { | ||
604 | MSize j = (MSize)t->asize; | ||
605 | if (j > 1 && tvisnil(arrayslot(t, j-1))) { | ||
606 | MSize i = 1; | ||
607 | while (j - i > 1) { | ||
608 | MSize m = (i+j)/2; | ||
609 | if (tvisnil(arrayslot(t, m-1))) j = m; else i = m; | ||
610 | } | ||
611 | return i-1; | ||
612 | } | ||
613 | if (j) j--; | ||
614 | if (t->hmask <= 0) | ||
615 | return j; | ||
616 | return unbound_search(t, j); | ||
617 | } | ||
618 | |||
diff --git a/src/lj_tab.h b/src/lj_tab.h new file mode 100644 index 00000000..e9e8bcd1 --- /dev/null +++ b/src/lj_tab.h | |||
@@ -0,0 +1,41 @@ | |||
1 | /* | ||
2 | ** Table handling. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_TAB_H | ||
7 | #define _LJ_TAB_H | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | |||
11 | #define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0) | ||
12 | |||
13 | LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits); | ||
14 | LJ_FUNCA GCtab *lj_tab_dup(lua_State *L, const GCtab *kt); | ||
15 | LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t); | ||
16 | LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize); | ||
17 | |||
18 | /* Caveat: all getters except lj_tab_get() can return NULL! */ | ||
19 | |||
20 | LJ_FUNCA cTValue *lj_tab_getinth(GCtab *t, int32_t key); | ||
21 | LJ_FUNC cTValue *lj_tab_getstr(GCtab *t, GCstr *key); | ||
22 | LJ_FUNCA cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key); | ||
23 | |||
24 | /* Caveat: all setters require a write barrier for the stored value. */ | ||
25 | |||
26 | LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key); | ||
27 | LJ_FUNC TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key); | ||
28 | LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key); | ||
29 | LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key); | ||
30 | |||
31 | #define inarray(t, key) ((MSize)(key) < (MSize)(t)->asize) | ||
32 | #define arrayslot(t, i) (&tvref((t)->array)[(i)]) | ||
33 | #define lj_tab_getint(t, key) \ | ||
34 | (inarray((t), (key)) ? arrayslot((t), (key)) : lj_tab_getinth((t), (key))) | ||
35 | #define lj_tab_setint(L, t, key) \ | ||
36 | (inarray((t), (key)) ? arrayslot((t), (key)) : lj_tab_setinth(L, (t), (key))) | ||
37 | |||
38 | LJ_FUNCA int lj_tab_next(lua_State *L, GCtab *t, TValue *key); | ||
39 | LJ_FUNCA MSize lj_tab_len(GCtab *t); | ||
40 | |||
41 | #endif | ||
diff --git a/src/lj_target.h b/src/lj_target.h new file mode 100644 index 00000000..0b464d3f --- /dev/null +++ b/src/lj_target.h | |||
@@ -0,0 +1,132 @@ | |||
1 | /* | ||
2 | ** Definitions for target CPU. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_TARGET_H | ||
7 | #define _LJ_TARGET_H | ||
8 | |||
9 | #include "lj_def.h" | ||
10 | #include "lj_arch.h" | ||
11 | |||
12 | /* -- Registers and spill slots ------------------------------------------- */ | ||
13 | |||
14 | /* Register type (uint8_t in ir->r). */ | ||
15 | typedef uint32_t Reg; | ||
16 | |||
17 | /* The hi-bit is NOT set for an allocated register. This means the value | ||
18 | ** can be directly used without masking. The hi-bit is set for a register | ||
19 | ** allocation hint or for RID_INIT. | ||
20 | */ | ||
21 | #define RID_NONE 0x80 | ||
22 | #define RID_MASK 0x7f | ||
23 | #define RID_INIT (RID_NONE|RID_MASK) | ||
24 | |||
25 | #define ra_noreg(r) ((r) & RID_NONE) | ||
26 | #define ra_hasreg(r) (!((r) & RID_NONE)) | ||
27 | |||
28 | /* The ra_hashint() macro assumes a previous test for ra_noreg(). */ | ||
29 | #define ra_hashint(r) ((r) != RID_INIT) | ||
30 | #define ra_gethint(r) ((Reg)((r) & RID_MASK)) | ||
31 | #define ra_sethint(rr, r) rr = (uint8_t)((r)|RID_NONE) | ||
32 | #define ra_samehint(r1, r2) (ra_gethint((r1)^(r2)) == 0) | ||
33 | |||
34 | /* Spill slot 0 means no spill slot has been allocated. */ | ||
35 | #define SPS_NONE 0 | ||
36 | |||
37 | #define ra_hasspill(s) ((s) != SPS_NONE) | ||
38 | |||
39 | /* Combined register and spill slot (uint16_t in ir->prev). */ | ||
40 | typedef uint32_t RegSP; | ||
41 | |||
42 | #define REGSP(r, s) ((r) + ((s) << 8)) | ||
43 | #define REGSP_HINT(r) ((r)|RID_NONE) | ||
44 | #define REGSP_INIT REGSP(RID_INIT, 0) | ||
45 | |||
46 | #define regsp_reg(rs) ((rs) & 255) | ||
47 | #define regsp_spill(rs) ((rs) >> 8) | ||
48 | #define regsp_used(rs) \ | ||
49 | (((rs) & ~REGSP(RID_MASK, 0)) != REGSP(RID_NONE, 0)) | ||
50 | |||
51 | /* -- Register sets ------------------------------------------------------- */ | ||
52 | |||
53 | /* Bitset for registers. 32 registers suffice right now. | ||
54 | ** Note that one set holds bits for both GPRs and FPRs. | ||
55 | */ | ||
56 | typedef uint32_t RegSet; | ||
57 | |||
58 | #define RID2RSET(r) (((RegSet)1) << (r)) | ||
59 | #define RSET_EMPTY 0 | ||
60 | #define RSET_RANGE(lo, hi) ((RID2RSET((hi)-(lo))-1) << (lo)) | ||
61 | |||
62 | #define rset_test(rs, r) (((rs) >> (r)) & 1) | ||
63 | #define rset_set(rs, r) (rs |= RID2RSET(r)) | ||
64 | #define rset_clear(rs, r) (rs &= ~RID2RSET(r)) | ||
65 | #define rset_exclude(rs, r) (rs & ~RID2RSET(r)) | ||
66 | #define rset_picktop(rs) ((Reg)lj_fls(rs)) | ||
67 | #define rset_pickbot(rs) ((Reg)lj_ffs(rs)) | ||
68 | |||
69 | /* -- Register allocation cost -------------------------------------------- */ | ||
70 | |||
71 | /* The register allocation heuristic keeps track of the cost for allocating | ||
72 | ** a specific register: | ||
73 | ** | ||
74 | ** A free register (obviously) has a cost of 0 and a 1-bit in the free mask. | ||
75 | ** | ||
76 | ** An already allocated register has the (non-zero) IR reference in the lowest | ||
77 | ** bits and the result of a blended cost-model in the higher bits. | ||
78 | ** | ||
79 | ** The allocator first checks the free mask for a hit. Otherwise an (unrolled) | ||
80 | ** linear search for the minimum cost is used. The search doesn't need to | ||
81 | ** keep track of the position of the minimum, which makes it very fast. | ||
82 | ** The lowest bits of the minimum cost show the desired IR reference whose | ||
83 | ** register is the one to evict. | ||
84 | ** | ||
85 | ** Without the cost-model this degenerates to the standard heuristics for | ||
86 | ** (reverse) linear-scan register allocation. Since code generation is done | ||
87 | ** in reverse, a live interval extends from the last use to the first def. | ||
88 | ** For an SSA IR the IR reference is the first (and only) def and thus | ||
89 | ** trivially marks the end of the interval. The LSRA heuristics says to pick | ||
90 | ** the register whose live interval has the furthest extent, i.e. the lowest | ||
91 | ** IR reference in our case. | ||
92 | ** | ||
93 | ** A cost-model should take into account other factors, like spill-cost and | ||
94 | ** restore- or rematerialization-cost, which depend on the kind of instruction. | ||
95 | ** E.g. constants have zero spill costs, variant instructions have higher | ||
96 | ** costs than invariants and PHIs should preferably never be spilled. | ||
97 | ** | ||
98 | ** Here's a first cut at simple, but effective blended cost-model for R-LSRA: | ||
99 | ** - Due to careful design of the IR, constants already have lower IR | ||
100 | ** references than invariants and invariants have lower IR references | ||
101 | ** than variants. | ||
102 | ** - The cost in the upper 16 bits is the sum of the IR reference and a | ||
103 | ** weighted score. The score currently only takes into account whether | ||
104 | ** the IRT_ISPHI bit is set in the instruction type. | ||
105 | ** - The PHI weight is the minimum distance (in IR instructions) a PHI | ||
106 | ** reference has to be further apart from a non-PHI reference to be spilled. | ||
107 | ** - It should be a power of two (for speed) and must be between 2 and 32768. | ||
108 | ** Good values for the PHI weight seem to be between 40 and 150. | ||
109 | ** - Further study is required. | ||
110 | */ | ||
111 | #define REGCOST_PHI_WEIGHT 64 | ||
112 | |||
113 | /* Cost for allocating a specific register. */ | ||
114 | typedef uint32_t RegCost; | ||
115 | |||
116 | /* Note: assumes 16 bit IRRef1. */ | ||
117 | #define REGCOST(cost, ref) ((RegCost)(ref) + ((RegCost)(cost) << 16)) | ||
118 | #define regcost_ref(rc) ((IRRef1)(rc)) | ||
119 | |||
120 | #define REGCOST_T(t) \ | ||
121 | ((RegCost)((t)&IRT_ISPHI) * (((RegCost)(REGCOST_PHI_WEIGHT)<<16)/IRT_ISPHI)) | ||
122 | #define REGCOST_REF_T(ref, t) (REGCOST((ref), (ref)) + REGCOST_T((t))) | ||
123 | |||
124 | /* -- Target-specific definitions ----------------------------------------- */ | ||
125 | |||
126 | #if LJ_TARGET_X86ORX64 | ||
127 | #include "lj_target_x86.h" | ||
128 | #else | ||
129 | #error "Missing include for target CPU" | ||
130 | #endif | ||
131 | |||
132 | #endif | ||
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h new file mode 100644 index 00000000..3ee4fa00 --- /dev/null +++ b/src/lj_target_x86.h | |||
@@ -0,0 +1,257 @@ | |||
1 | /* | ||
2 | ** Definitions for x86 and x64 CPUs. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_TARGET_X86_H | ||
7 | #define _LJ_TARGET_X86_H | ||
8 | |||
9 | /* -- Registers IDs ------------------------------------------------------- */ | ||
10 | |||
11 | #if LJ_64 | ||
12 | #define GPRDEF(_) \ | ||
13 | _(EAX) _(ECX) _(EDX) _(EBX) _(ESP) _(EBP) _(ESI) _(EDI) \ | ||
14 | _(R8D) _(R9D) _(R10D) _(R11D) _(R12D) _(R13D) _(R14D) _(R15D) | ||
15 | #define FPRDEF(_) \ | ||
16 | _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) \ | ||
17 | _(XMM8) _(XMM9) _(XMM10) _(XMM11) _(XMM12) _(XMM13) _(XMM14) _(XMM15) | ||
18 | #else | ||
19 | #define GPRDEF(_) \ | ||
20 | _(EAX) _(ECX) _(EDX) _(EBX) _(ESP) _(EBP) _(ESI) _(EDI) | ||
21 | #define FPRDEF(_) \ | ||
22 | _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) | ||
23 | #endif | ||
24 | |||
25 | #define RIDENUM(name) RID_##name, | ||
26 | |||
27 | enum { | ||
28 | GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ | ||
29 | FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ | ||
30 | RID_MAX, | ||
31 | RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */ | ||
32 | |||
33 | /* Calling conventions. */ | ||
34 | RID_RET = RID_EAX, | ||
35 | |||
36 | /* These definitions must match with the *.dasc file(s): */ | ||
37 | RID_BASE = RID_EDX, /* Interpreter BASE. */ | ||
38 | RID_PC = RID_ESI, /* Interpreter PC. */ | ||
39 | RID_DISPATCH = RID_EBX, /* Interpreter DISPATCH table. */ | ||
40 | |||
41 | /* Register ranges [min, max) and number of registers. */ | ||
42 | RID_MIN_GPR = RID_EAX, | ||
43 | RID_MIN_FPR = RID_XMM0, | ||
44 | RID_MAX_GPR = RID_MIN_FPR, | ||
45 | RID_MAX_FPR = RID_MAX, | ||
46 | RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, | ||
47 | RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR, | ||
48 | }; | ||
49 | |||
50 | /* -- Register sets ------------------------------------------------------- */ | ||
51 | |||
52 | /* Make use of all registers, except the stack pointer. */ | ||
53 | #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR)-RID2RSET(RID_ESP)) | ||
54 | #define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) | ||
55 | #define RSET_ALL (RSET_GPR|RSET_FPR) | ||
56 | |||
57 | #if LJ_64 | ||
58 | /* Note: this requires the use of FORCE_REX! */ | ||
59 | #define RSET_GPR8 RSET_GPR | ||
60 | #else | ||
61 | #define RSET_GPR8 (RSET_RANGE(RID_EAX, RID_EBX+1)) | ||
62 | #endif | ||
63 | |||
64 | /* ABI-specific register sets. */ | ||
65 | #define RSET_ACD (RID2RSET(RID_EAX)|RID2RSET(RID_ECX)|RID2RSET(RID_EDX)) | ||
66 | #if LJ_64 | ||
67 | #ifdef _WIN64 | ||
68 | /* Windows x64 ABI. */ | ||
69 | #define RSET_SCRATCH \ | ||
70 | (RSET_ACD|RSET_RANGE(RID_R8D, RID_R11D+1)|RSET_RANGE(RID_XMM0, RID_XMM5+1)) | ||
71 | #else | ||
72 | /* The rest of the civilized x64 world has a common ABI. */ | ||
73 | #define RSET_SCRATCH \ | ||
74 | (RSET_ACD|RSET_RANGE(RID_ESI, RID_R11D+1)|RSET_FPR) | ||
75 | #endif | ||
76 | #else | ||
77 | /* Common x86 ABI. */ | ||
78 | #define RSET_SCRATCH (RSET_ACD|RSET_FPR) | ||
79 | #endif | ||
80 | |||
81 | #if LJ_64 | ||
82 | /* Prefer the low 8 regs of each type to reduce REX prefixes. */ | ||
83 | #undef rset_picktop | ||
84 | #define rset_picktop(rs) (lj_fls(lj_bswap(rs)) ^ 0x18) | ||
85 | #endif | ||
86 | |||
87 | /* -- Spill slots --------------------------------------------------------- */ | ||
88 | |||
89 | /* Stack layout for the compiled machine code (after stack adjustment). */ | ||
90 | enum { | ||
91 | SPS_TEMP1, /* Temps (3*dword) for calls and asm_x87load. */ | ||
92 | SPS_TEMP2, | ||
93 | SPS_TEMP3, | ||
94 | SPS_FIRST, /* First spill slot for general use. */ | ||
95 | |||
96 | /* This definition must match with the *.dasc file(s). */ | ||
97 | SPS_FIXED = 6 /* Available fixed spill slots in interpreter frame. */ | ||
98 | }; | ||
99 | |||
100 | /* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. */ | ||
101 | #define sps_scale(slot) (4 * (int32_t)(slot)) | ||
102 | #define sps_adjust(as) (sps_scale((as->evenspill-SPS_FIXED+3)&~3)) | ||
103 | |||
104 | /* -- Exit state ---------------------------------------------------------- */ | ||
105 | |||
106 | /* This definition must match with the *.dasc file(s). */ | ||
107 | typedef struct { | ||
108 | lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ | ||
109 | int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ | ||
110 | int32_t spill[256]; /* Spill slots. */ | ||
111 | } ExitState; | ||
112 | |||
113 | /* -- x86 ModRM operand encoding ------------------------------------------ */ | ||
114 | |||
115 | typedef enum { | ||
116 | XM_OFS0 = 0x00, XM_OFS8 = 0x40, XM_OFS32 = 0x80, XM_REG = 0xc0, | ||
117 | XM_SCALE1 = 0x00, XM_SCALE2 = 0x40, XM_SCALE4 = 0x80, XM_SCALE8 = 0xc0, | ||
118 | XM_MASK = 0xc0 | ||
119 | } x86Mode; | ||
120 | |||
121 | /* Structure to hold variable ModRM operand. */ | ||
122 | typedef struct { | ||
123 | int32_t ofs; /* Offset. */ | ||
124 | uint8_t base; /* Base register or RID_NONE. */ | ||
125 | uint8_t idx; /* Index register or RID_NONE. */ | ||
126 | uint8_t scale; /* Index scale (XM_SCALE1 .. XM_SCALE8). */ | ||
127 | } x86ModRM; | ||
128 | |||
129 | /* -- Opcodes ------------------------------------------------------------- */ | ||
130 | |||
131 | /* Macros to construct variable-length x86 opcodes. -(len+1) is in LSB. */ | ||
132 | #define XO_(o) ((uint32_t)(0x0000fe + (0x##o<<24))) | ||
133 | #define XO_FPU(a,b) ((uint32_t)(0x00fd + (0x##a<<16)+(0x##b<<24))) | ||
134 | #define XO_0f(o) ((uint32_t)(0x0f00fd + (0x##o<<24))) | ||
135 | #define XO_66(o) ((uint32_t)(0x6600fd + (0x##o<<24))) | ||
136 | #define XO_660f(o) ((uint32_t)(0x0f66fc + (0x##o<<24))) | ||
137 | #define XO_f20f(o) ((uint32_t)(0x0ff2fc + (0x##o<<24))) | ||
138 | #define XO_f30f(o) ((uint32_t)(0x0ff3fc + (0x##o<<24))) | ||
139 | |||
140 | /* This list of x86 opcodes is not intended to be complete. Opcodes are only | ||
141 | ** included when needed. Take a look at DynASM or jit.dis_x86 to see the | ||
142 | ** whole mess. | ||
143 | */ | ||
144 | typedef enum { | ||
145 | /* Fixed length opcodes. XI_* prefix. */ | ||
146 | XI_NOP = 0x90, | ||
147 | XI_CALL = 0xe8, | ||
148 | XI_JMP = 0xe9, | ||
149 | XI_JMPs = 0xeb, | ||
150 | XI_JCCs = 0x70, /* Really 7x. */ | ||
151 | XI_JCCn = 0x80, /* Really 0f8x. */ | ||
152 | XI_LEA = 0x8d, | ||
153 | XI_MOVri = 0xb8, /* Really b8+r. */ | ||
154 | XI_ARITHib = 0x80, | ||
155 | XI_ARITHi = 0x81, | ||
156 | XI_ARITHi8 = 0x83, | ||
157 | XI_PUSHi8 = 0x6a, | ||
158 | XI_TEST = 0x85, | ||
159 | XI_MOVmi = 0xc7, | ||
160 | XI_BSWAP = 0xc8, /* Really 0fc8+r. */ | ||
161 | |||
162 | /* Note: little-endian byte-order! */ | ||
163 | XI_FLDZ = 0xeed9, | ||
164 | XI_FLD1 = 0xe8d9, | ||
165 | XI_FLDLG2 = 0xecd9, | ||
166 | XI_FLDLN2 = 0xedd9, | ||
167 | XI_FPOP = 0xd8dd, /* Really fstp st0. */ | ||
168 | XI_FPOP1 = 0xd9dd, /* Really fstp st1. */ | ||
169 | XI_FRNDINT = 0xfcd9, | ||
170 | XI_FSIN = 0xfed9, | ||
171 | XI_FCOS = 0xffd9, | ||
172 | XI_FPTAN = 0xf2d9, | ||
173 | XI_FPATAN = 0xf3d9, | ||
174 | XI_FSCALE = 0xfdd9, | ||
175 | XI_FYL2X = 0xf1d9, | ||
176 | |||
177 | /* Variable-length opcodes. XO_* prefix. */ | ||
178 | XO_MOV = XO_(8b), | ||
179 | XO_MOVto = XO_(89), | ||
180 | XO_MOVtow = XO_66(89), | ||
181 | XO_MOVtob = XO_(88), | ||
182 | XO_MOVmi = XO_(c7), | ||
183 | XO_MOVmib = XO_(c6), | ||
184 | XO_LEA = XO_(8d), | ||
185 | XO_ARITHib = XO_(80), | ||
186 | XO_ARITHi = XO_(81), | ||
187 | XO_ARITHi8 = XO_(83), | ||
188 | XO_SHIFTi = XO_(c1), | ||
189 | XO_SHIFT1 = XO_(d1), | ||
190 | XO_SHIFTcl = XO_(d3), | ||
191 | XO_IMULi8 = XO_(6b), | ||
192 | XO_CMP = XO_(3b), | ||
193 | XO_TEST = XO_(85), | ||
194 | XO_GROUP3b = XO_(f6), | ||
195 | XO_GROUP3 = XO_(f7), | ||
196 | XO_MOVZXb = XO_0f(b6), | ||
197 | XO_MOVZXw = XO_0f(b7), | ||
198 | XO_MOVSXb = XO_0f(be), | ||
199 | XO_MOVSXw = XO_0f(bf), | ||
200 | |||
201 | XO_MOVSD = XO_f20f(10), | ||
202 | XO_MOVSDto = XO_f20f(11), | ||
203 | XO_MOVLPD = XO_660f(12), | ||
204 | XO_MOVAPS = XO_0f(28), | ||
205 | XO_XORPS = XO_0f(57), | ||
206 | XO_ANDPS = XO_0f(54), | ||
207 | XO_ADDSD = XO_f20f(58), | ||
208 | XO_SUBSD = XO_f20f(5c), | ||
209 | XO_MULSD = XO_f20f(59), | ||
210 | XO_DIVSD = XO_f20f(5e), | ||
211 | XO_SQRTSD = XO_f20f(51), | ||
212 | XO_MINSD = XO_f20f(5d), | ||
213 | XO_MAXSD = XO_f20f(5f), | ||
214 | XO_ROUNDSD = 0x0b3a0ffc, /* Really 66 0f 3a 0b. See asm_fpmath. */ | ||
215 | XO_UCOMISD = XO_660f(2e), | ||
216 | XO_CVTSI2SD = XO_f20f(2a), | ||
217 | XO_CVTSD2SI = XO_f20f(2d), | ||
218 | XO_CVTTSD2SI= XO_f20f(2c), | ||
219 | XO_MOVDto = XO_660f(7e), | ||
220 | |||
221 | XO_FLDq = XO_(dd), XOg_FLDq = 0, | ||
222 | XO_FILDd = XO_(db), XOg_FILDd = 0, | ||
223 | XO_FSTPq = XO_(dd), XOg_FSTPq = 3, | ||
224 | XO_FISTPq = XO_(df), XOg_FISTPq = 7, | ||
225 | } x86Op; | ||
226 | |||
227 | /* x86 opcode groups. */ | ||
228 | typedef uint32_t x86Group; | ||
229 | |||
230 | #define XG_(i8, i, g) ((x86Group)(((i8) << 16) + ((i) << 8) + (g))) | ||
231 | #define XG_ARITHi(g) XG_(XI_ARITHi8, XI_ARITHi, g) | ||
232 | |||
233 | #define XO_ARITH(a) ((x86Op)(0x030000fe + ((a)<<27))) | ||
234 | |||
235 | typedef enum { | ||
236 | XOg_ADD, XOg_OR, XOg_ADC, XOg_SBB, XOg_AND, XOg_SUB, XOg_XOR, XOg_CMP | ||
237 | } x86Arith; | ||
238 | |||
239 | typedef enum { | ||
240 | XOg_ROL, XOg_ROR, XOg_RCL, XOg_RCR, XOg_SHL, XOg_SHR, XOg_SAL, XOg_SAR | ||
241 | } x86Shift; | ||
242 | |||
243 | typedef enum { | ||
244 | XOg_TEST, XOg_TEST_, XOg_NOT, XOg_NEG, XOg_MUL, XOg_IMUL, XOg_DIV, XOg_IDIV | ||
245 | } x86Group3; | ||
246 | |||
247 | /* x86 condition codes. */ | ||
248 | typedef enum { | ||
249 | CC_O, CC_NO, CC_B, CC_NB, CC_E, CC_NE, CC_BE, CC_NBE, | ||
250 | CC_S, CC_NS, CC_P, CC_NP, CC_L, CC_NL, CC_LE, CC_NLE, | ||
251 | CC_C = CC_B, CC_NAE = CC_C, CC_NC = CC_NB, CC_AE = CC_NB, | ||
252 | CC_Z = CC_E, CC_NZ = CC_NE, CC_NA = CC_BE, CC_A = CC_NBE, | ||
253 | CC_PE = CC_P, CC_PO = CC_NP, CC_NGE = CC_L, CC_GE = CC_NL, | ||
254 | CC_NG = CC_LE, CC_G = CC_NLE | ||
255 | } x86CC; | ||
256 | |||
257 | #endif | ||
diff --git a/src/lj_trace.c b/src/lj_trace.c new file mode 100644 index 00000000..6ceb5633 --- /dev/null +++ b/src/lj_trace.c | |||
@@ -0,0 +1,591 @@ | |||
1 | /* | ||
2 | ** Trace management. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #define lj_trace_c | ||
7 | #define LUA_CORE | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | |||
11 | #if LJ_HASJIT | ||
12 | |||
13 | #include "lj_gc.h" | ||
14 | #include "lj_err.h" | ||
15 | #include "lj_str.h" | ||
16 | #include "lj_frame.h" | ||
17 | #include "lj_state.h" | ||
18 | #include "lj_bc.h" | ||
19 | #include "lj_ir.h" | ||
20 | #include "lj_jit.h" | ||
21 | #include "lj_iropt.h" | ||
22 | #include "lj_mcode.h" | ||
23 | #include "lj_trace.h" | ||
24 | #include "lj_snap.h" | ||
25 | #include "lj_gdbjit.h" | ||
26 | #include "lj_record.h" | ||
27 | #include "lj_asm.h" | ||
28 | #include "lj_dispatch.h" | ||
29 | #include "lj_vm.h" | ||
30 | #include "lj_vmevent.h" | ||
31 | #include "lj_target.h" | ||
32 | |||
33 | /* -- Error handling ------------------------------------------------------ */ | ||
34 | |||
35 | /* Synchronous abort with error message. */ | ||
36 | void lj_trace_err(jit_State *J, TraceError e) | ||
37 | { | ||
38 | setnilV(&J->errinfo); /* No error info. */ | ||
39 | setintV(J->L->top++, (int32_t)e); | ||
40 | lj_err_throw(J->L, LUA_ERRRUN); | ||
41 | } | ||
42 | |||
43 | /* Synchronous abort with error message and error info. */ | ||
44 | void lj_trace_err_info(jit_State *J, TraceError e) | ||
45 | { | ||
46 | setintV(J->L->top++, (int32_t)e); | ||
47 | lj_err_throw(J->L, LUA_ERRRUN); | ||
48 | } | ||
49 | |||
50 | /* -- Trace management ---------------------------------------------------- */ | ||
51 | |||
52 | /* The current trace is first assembled in J->cur. The variable length | ||
53 | ** arrays point to shared, growable buffers (J->irbuf etc.). The trace is | ||
54 | ** kept in this state until a new trace needs to be created. Then the current | ||
55 | ** trace and its data structures are copied to a new (compact) Trace object. | ||
56 | */ | ||
57 | |||
58 | /* Find a free trace number. */ | ||
59 | static TraceNo trace_findfree(jit_State *J) | ||
60 | { | ||
61 | MSize osz, lim; | ||
62 | if (J->freetrace == 0) | ||
63 | J->freetrace = 1; | ||
64 | for (; J->freetrace < J->sizetrace; J->freetrace++) | ||
65 | if (J->trace[J->freetrace] == NULL) | ||
66 | return J->freetrace++; | ||
67 | /* Need to grow trace array. */ | ||
68 | lim = (MSize)J->param[JIT_P_maxtrace] + 1; | ||
69 | if (lim < 2) lim = 2; else if (lim > 65535) lim = 65535; | ||
70 | osz = J->sizetrace; | ||
71 | if (osz >= lim) | ||
72 | return 0; /* Too many traces. */ | ||
73 | lj_mem_growvec(J->L, J->trace, J->sizetrace, lim, Trace *); | ||
74 | while (osz < J->sizetrace) | ||
75 | J->trace[osz++] = NULL; | ||
76 | return J->freetrace; | ||
77 | } | ||
78 | |||
79 | #define TRACE_COPYELEM(field, szfield, tp) \ | ||
80 | T2->field = (tp *)p; \ | ||
81 | memcpy(p, T->field, T->szfield*sizeof(tp)); \ | ||
82 | p += T->szfield*sizeof(tp); | ||
83 | |||
84 | /* Save a trace by copying and compacting it. */ | ||
85 | static Trace *trace_save(jit_State *J, Trace *T) | ||
86 | { | ||
87 | size_t sztr = ((sizeof(Trace)+7)&~7); | ||
88 | size_t szins = (T->nins-T->nk)*sizeof(IRIns); | ||
89 | size_t sz = sztr + szins + | ||
90 | T->nsnap*sizeof(SnapShot) + | ||
91 | T->nsnapmap*sizeof(IRRef2); | ||
92 | Trace *T2 = lj_mem_newt(J->L, (MSize)sz, Trace); | ||
93 | char *p = (char *)T2 + sztr; | ||
94 | memcpy(T2, T, sizeof(Trace)); | ||
95 | T2->ir = (IRIns *)p - T->nk; | ||
96 | memcpy(p, T->ir+T->nk, szins); | ||
97 | p += szins; | ||
98 | TRACE_COPYELEM(snap, nsnap, SnapShot) | ||
99 | TRACE_COPYELEM(snapmap, nsnapmap, IRRef2) | ||
100 | lj_gc_barriertrace(J2G(J), T); | ||
101 | return T2; | ||
102 | } | ||
103 | |||
104 | /* Free a trace. */ | ||
105 | static void trace_free(jit_State *J, TraceNo traceno) | ||
106 | { | ||
107 | lua_assert(traceno != 0); | ||
108 | if (traceno < J->freetrace) | ||
109 | J->freetrace = traceno; | ||
110 | lj_gdbjit_deltrace(J, J->trace[traceno]); | ||
111 | if (traceno == J->curtrace) { | ||
112 | lua_assert(J->trace[traceno] == &J->cur); | ||
113 | J->trace[traceno] = NULL; | ||
114 | J->curtrace = 0; | ||
115 | } else { | ||
116 | Trace *T = J->trace[traceno]; | ||
117 | lua_assert(T != NULL && T != &J->cur); | ||
118 | J->trace[traceno] = NULL; | ||
119 | lj_mem_free(J2G(J), T, | ||
120 | ((sizeof(Trace)+7)&~7) + (T->nins-T->nk)*sizeof(IRIns) + | ||
121 | T->nsnap*sizeof(SnapShot) + T->nsnapmap*sizeof(IRRef2)); | ||
122 | } | ||
123 | } | ||
124 | |||
125 | /* Free all traces associated with a prototype. No unpatching needed. */ | ||
126 | void lj_trace_freeproto(global_State *g, GCproto *pt) | ||
127 | { | ||
128 | jit_State *J = G2J(g); | ||
129 | TraceNo traceno; | ||
130 | /* Free all root traces. */ | ||
131 | for (traceno = pt->trace; traceno != 0; ) { | ||
132 | TraceNo side, nextroot = J->trace[traceno]->nextroot; | ||
133 | /* Free all side traces. */ | ||
134 | for (side = J->trace[traceno]->nextside; side != 0; ) { | ||
135 | TraceNo next = J->trace[side]->nextside; | ||
136 | trace_free(J, side); | ||
137 | side = next; | ||
138 | } | ||
139 | /* Now free the trace itself. */ | ||
140 | trace_free(J, traceno); | ||
141 | traceno = nextroot; | ||
142 | } | ||
143 | } | ||
144 | |||
145 | /* Re-enable compiling a prototype by unpatching any modified bytecode. */ | ||
146 | void lj_trace_reenableproto(GCproto *pt) | ||
147 | { | ||
148 | if ((pt->flags & PROTO_HAS_ILOOP)) { | ||
149 | BCIns *bc = pt->bc; | ||
150 | BCPos i, sizebc = pt->sizebc;; | ||
151 | pt->flags &= ~PROTO_HAS_ILOOP; | ||
152 | for (i = 0; i < sizebc; i++) { | ||
153 | BCOp op = bc_op(bc[i]); | ||
154 | if (op == BC_IFORL || op == BC_IITERL || op == BC_ILOOP) | ||
155 | setbc_op(&bc[i], (int)op+(int)BC_LOOP-(int)BC_ILOOP); | ||
156 | } | ||
157 | } | ||
158 | } | ||
159 | |||
160 | /* Unpatch the bytecode modified by a root trace. */ | ||
161 | static void trace_unpatch(jit_State *J, Trace *T) | ||
162 | { | ||
163 | BCOp op = bc_op(T->startins); | ||
164 | uint32_t pcofs = T->snap[0].mapofs + T->snap[0].nslots; | ||
165 | BCIns *pc = ((BCIns *)(uintptr_t)T->snapmap[pcofs]) - 1; | ||
166 | switch (op) { | ||
167 | case BC_FORL: | ||
168 | lua_assert(bc_op(*pc) == BC_JFORI); | ||
169 | setbc_op(pc, BC_FORI); /* Unpatch JFORI, too. */ | ||
170 | pc += bc_j(*pc); | ||
171 | lua_assert(bc_op(*pc) == BC_JFORL && J->trace[bc_d(*pc)] == T); | ||
172 | *pc = T->startins; | ||
173 | break; | ||
174 | case BC_LOOP: | ||
175 | lua_assert(bc_op(*pc) == BC_JLOOP && J->trace[bc_d(*pc)] == T); | ||
176 | *pc = T->startins; | ||
177 | break; | ||
178 | case BC_ITERL: | ||
179 | lua_assert(bc_op(*pc) == BC_JMP); | ||
180 | pc += bc_j(*pc)+2; | ||
181 | lua_assert(bc_op(*pc) == BC_JITERL && J->trace[bc_d(*pc)] == T); | ||
182 | *pc = T->startins; | ||
183 | break; | ||
184 | case BC_CALL: | ||
185 | lj_trace_err(J, LJ_TRERR_NYILNKF); | ||
186 | break; | ||
187 | case BC_JMP: /* No need to unpatch branches in parent traces (yet). */ | ||
188 | default: | ||
189 | lua_assert(0); | ||
190 | break; | ||
191 | } | ||
192 | } | ||
193 | |||
194 | /* Flush a root trace and any attached side traces. */ | ||
195 | void lj_trace_flush(jit_State *J, TraceNo traceno) | ||
196 | { | ||
197 | Trace *T = NULL; | ||
198 | GCproto *pt; | ||
199 | if (traceno > 0 && traceno <= J->sizetrace) | ||
200 | T = J->trace[traceno]; | ||
201 | if (T == NULL) | ||
202 | return; | ||
203 | pt = &gcref(T->startpt)->pt; | ||
204 | if (T->root == 0 && pt != NULL) { | ||
205 | TraceNo side; | ||
206 | /* First unpatch any modified bytecode. */ | ||
207 | trace_unpatch(J, T); | ||
208 | /* Unlink root trace from chain anchored in prototype. */ | ||
209 | if (pt->trace == traceno) { /* Trace is first in chain. Easy. */ | ||
210 | pt->trace = T->nextroot; | ||
211 | } else { /* Otherwise search in chain of root traces. */ | ||
212 | Trace *T2 = J->trace[pt->trace]; | ||
213 | while (T2->nextroot != traceno) { | ||
214 | lua_assert(T2->nextroot != 0); | ||
215 | T2 = J->trace[T2->nextroot]; | ||
216 | } | ||
217 | T2->nextroot = T->nextroot; /* Unlink from chain. */ | ||
218 | } | ||
219 | /* Free all side traces. */ | ||
220 | for (side = T->nextside; side != 0; ) { | ||
221 | TraceNo next = J->trace[side]->nextside; | ||
222 | trace_free(J, side); | ||
223 | side = next; | ||
224 | } | ||
225 | /* Now free the trace itself. */ | ||
226 | trace_free(J, traceno); | ||
227 | } /* Flush for non-root traces is currently ignored. */ | ||
228 | } | ||
229 | |||
230 | /* Flush all traces associated with a prototype. */ | ||
231 | void lj_trace_flushproto(global_State *g, GCproto *pt) | ||
232 | { | ||
233 | while (pt->trace != 0) | ||
234 | lj_trace_flush(G2J(g), pt->trace); | ||
235 | } | ||
236 | |||
237 | /* Flush all traces. */ | ||
238 | int lj_trace_flushall(lua_State *L) | ||
239 | { | ||
240 | jit_State *J = L2J(L); | ||
241 | ptrdiff_t i; | ||
242 | if ((J2G(J)->hookmask & HOOK_GC)) | ||
243 | return 1; | ||
244 | for (i = (ptrdiff_t)J->sizetrace-1; i > 0; i--) | ||
245 | lj_trace_flush(J, (TraceNo)i); | ||
246 | #ifdef LUA_USE_ASSERT | ||
247 | for (i = 0; i < (ptrdiff_t)J->sizetrace; i++) | ||
248 | lua_assert(J->trace[i] == NULL); | ||
249 | #endif | ||
250 | J->freetrace = 0; | ||
251 | /* Free the whole machine code and invalidate all exit stub groups. */ | ||
252 | lj_mcode_free(J); | ||
253 | memset(J->exitstubgroup, 0, sizeof(J->exitstubgroup)); | ||
254 | lj_vmevent_send(L, TRACE, | ||
255 | setstrV(L, L->top++, lj_str_newlit(L, "flush")); | ||
256 | ); | ||
257 | return 0; | ||
258 | } | ||
259 | |||
260 | /* Free everything associated with the JIT compiler state. */ | ||
261 | void lj_trace_freestate(global_State *g) | ||
262 | { | ||
263 | jit_State *J = G2J(g); | ||
264 | #ifdef LUA_USE_ASSERT | ||
265 | { /* This assumes all traces have already been freed. */ | ||
266 | ptrdiff_t i; | ||
267 | for (i = 0; i < (ptrdiff_t)J->sizetrace; i++) | ||
268 | lua_assert(J->trace[i] == NULL); | ||
269 | } | ||
270 | #endif | ||
271 | lj_mcode_free(J); | ||
272 | lj_ir_knum_freeall(J); | ||
273 | lj_mem_freevec(g, J->snapmapbuf, J->sizesnapmap, IRRef2); | ||
274 | lj_mem_freevec(g, J->snapbuf, J->sizesnap, SnapShot); | ||
275 | lj_mem_freevec(g, J->irbuf + J->irbotlim, J->irtoplim - J->irbotlim, IRIns); | ||
276 | lj_mem_freevec(g, J->trace, J->sizetrace, Trace *); | ||
277 | } | ||
278 | |||
279 | /* -- Trace compiler state machine ---------------------------------------- */ | ||
280 | |||
281 | /* Penalize a bytecode instruction by bumping its hot counter. */ | ||
282 | static void hotpenalty(jit_State *J, const BCIns *pc, TraceError e) | ||
283 | { | ||
284 | uint32_t i, val = HOTCOUNT_MIN_PENALTY; | ||
285 | for (i = 0; i < PENALTY_SLOTS; i++) | ||
286 | if (J->penalty[i].pc == pc) { | ||
287 | val = ((uint32_t)J->penalty[i].val << 1) + 1; | ||
288 | if (val > HOTCOUNT_MAX_PENALTY) val = HOTCOUNT_MAX_PENALTY; | ||
289 | goto setpenalty; | ||
290 | } | ||
291 | i = J->penaltyslot; | ||
292 | J->penaltyslot = (J->penaltyslot + 1) & (PENALTY_SLOTS-1); | ||
293 | J->penalty[i].pc = pc; | ||
294 | setpenalty: | ||
295 | J->penalty[i].val = (uint16_t)val; | ||
296 | J->penalty[i].reason = e; | ||
297 | hotcount_set(J2GG(J), pc+1, val); | ||
298 | } | ||
299 | |||
300 | /* Start tracing. */ | ||
301 | static void trace_start(jit_State *J) | ||
302 | { | ||
303 | lua_State *L; | ||
304 | |||
305 | if (J->curtrace != 0 && J->trace[J->curtrace] == &J->cur) { | ||
306 | J->trace[J->curtrace] = trace_save(J, &J->cur); /* Save current trace. */ | ||
307 | J->curtrace = 0; | ||
308 | } | ||
309 | |||
310 | if ((J->pt->flags & PROTO_NO_JIT)) { /* JIT disabled for this proto? */ | ||
311 | if (J->parent == 0) { | ||
312 | if (J->pc >= J->pt->bc) { | ||
313 | /* Lazy bytecode patching to disable hotcount events. */ | ||
314 | setbc_op(J->pc, (int)bc_op(*J->pc)+(int)BC_ILOOP-(int)BC_LOOP); | ||
315 | J->pt->flags |= PROTO_HAS_ILOOP; | ||
316 | } else { | ||
317 | /* NYI: lazy closure patching to disable hotcall events. */ | ||
318 | lua_assert(0); | ||
319 | } | ||
320 | } | ||
321 | J->state = LJ_TRACE_IDLE; /* Silently ignored. */ | ||
322 | return; | ||
323 | } | ||
324 | |||
325 | /* Get a new trace number. */ | ||
326 | J->curtrace = trace_findfree(J); | ||
327 | if (LJ_UNLIKELY(J->curtrace == 0)) { /* No free trace? */ | ||
328 | lua_assert((J2G(J)->hookmask & HOOK_GC) == 0); | ||
329 | lj_trace_flushall(J->L); | ||
330 | J->state = LJ_TRACE_IDLE; /* Silently ignored. */ | ||
331 | return; | ||
332 | } | ||
333 | J->trace[J->curtrace] = &J->cur; | ||
334 | |||
335 | /* Setup enough of the current trace to be able to send the vmevent. */ | ||
336 | memset(&J->cur, 0, sizeof(Trace)); | ||
337 | J->cur.nins = J->cur.nk = REF_BASE; | ||
338 | J->cur.ir = J->irbuf; | ||
339 | J->cur.snap = J->snapbuf; | ||
340 | J->cur.snapmap = J->snapmapbuf; | ||
341 | /* J->cur.nsnapmap = 0; */ | ||
342 | J->mergesnap = 0; | ||
343 | J->needsnap = 0; | ||
344 | J->guardemit.irt = 0; | ||
345 | |||
346 | L = J->L; | ||
347 | lj_vmevent_send(L, TRACE, | ||
348 | setstrV(L, L->top++, lj_str_newlit(L, "start")); | ||
349 | setintV(L->top++, J->curtrace); | ||
350 | setfuncV(L, L->top++, J->fn); | ||
351 | setintV(L->top++, J->pc - J->pt->bc + 1); | ||
352 | if (J->parent) { | ||
353 | setintV(L->top++, J->parent); | ||
354 | setintV(L->top++, J->exitno); | ||
355 | } | ||
356 | ); | ||
357 | lj_record_setup(J); | ||
358 | } | ||
359 | |||
360 | /* Stop tracing. */ | ||
361 | static void trace_stop(jit_State *J) | ||
362 | { | ||
363 | BCIns *pc = (BCIns *)J->startpc; /* Not const here. */ | ||
364 | BCOp op = bc_op(J->cur.startins); | ||
365 | GCproto *pt = &gcref(J->cur.startpt)->pt; | ||
366 | lua_State *L; | ||
367 | |||
368 | switch (op) { | ||
369 | case BC_FORL: | ||
370 | setbc_op(pc+bc_j(J->cur.startins), BC_JFORI); /* Patch FORI, too. */ | ||
371 | /* fallthrough */ | ||
372 | case BC_LOOP: | ||
373 | case BC_ITERL: | ||
374 | /* Patch bytecode of starting instruction in root trace. */ | ||
375 | setbc_op(pc, (int)op+(int)BC_JLOOP-(int)BC_LOOP); | ||
376 | setbc_d(pc, J->curtrace); | ||
377 | /* Add to root trace chain in prototype. */ | ||
378 | J->cur.nextroot = pt->trace; | ||
379 | pt->trace = (TraceNo1)J->curtrace; | ||
380 | break; | ||
381 | case BC_CALL: | ||
382 | lj_trace_err(J, LJ_TRERR_NYILNKF); | ||
383 | break; | ||
384 | case BC_JMP: | ||
385 | /* Patch exit branch in parent to side trace entry. */ | ||
386 | lua_assert(J->parent != 0 && J->cur.root != 0); | ||
387 | lj_asm_patchexit(J, J->trace[J->parent], J->exitno, J->cur.mcode); | ||
388 | /* Avoid compiling a side trace twice (stack resizing uses parent exit). */ | ||
389 | J->trace[J->parent]->snap[J->exitno].count = SNAPCOUNT_DONE; | ||
390 | /* Add to side trace chain in root trace. */ | ||
391 | { | ||
392 | Trace *root = J->trace[J->cur.root]; | ||
393 | root->nchild++; | ||
394 | J->cur.nextside = root->nextside; | ||
395 | root->nextside = (TraceNo1)J->curtrace; | ||
396 | } | ||
397 | break; | ||
398 | default: | ||
399 | lua_assert(0); | ||
400 | break; | ||
401 | } | ||
402 | |||
403 | /* Commit new mcode only after all patching is done. */ | ||
404 | lj_mcode_commit(J, J->cur.mcode); | ||
405 | lj_gdbjit_addtrace(J, &J->cur, J->curtrace); | ||
406 | |||
407 | L = J->L; | ||
408 | lj_vmevent_send(L, TRACE, | ||
409 | setstrV(L, L->top++, lj_str_newlit(L, "stop")); | ||
410 | setintV(L->top++, J->curtrace); | ||
411 | ); | ||
412 | } | ||
413 | |||
414 | /* Abort tracing. */ | ||
415 | static int trace_abort(jit_State *J) | ||
416 | { | ||
417 | lua_State *L = J->L; | ||
418 | TraceError e = LJ_TRERR_RECERR; | ||
419 | lj_mcode_abort(J); | ||
420 | if (tvisnum(L->top-1)) | ||
421 | e = (TraceError)lj_num2int(numV(L->top-1)); | ||
422 | if (e == LJ_TRERR_MCODELM) { | ||
423 | J->state = LJ_TRACE_ASM; | ||
424 | return 1; /* Retry ASM with new MCode area. */ | ||
425 | } | ||
426 | if (J->parent == 0) | ||
427 | hotpenalty(J, J->startpc, e); /* Penalize starting instruction. */ | ||
428 | if (J->curtrace) { /* Is there anything to abort? */ | ||
429 | ptrdiff_t errobj = savestack(L, L->top-1); /* Stack may be resized. */ | ||
430 | lj_vmevent_send(L, TRACE, | ||
431 | setstrV(L, L->top++, lj_str_newlit(L, "abort")); | ||
432 | setintV(L->top++, J->curtrace); | ||
433 | setfuncV(L, L->top++, J->fn); | ||
434 | setintV(L->top++, J->pc - J->pt->bc + 1); | ||
435 | copyTV(L, L->top++, restorestack(L, errobj)); | ||
436 | copyTV(L, L->top++, &J->errinfo); | ||
437 | ); | ||
438 | /* Drop aborted trace after the vmevent (which may still access it). */ | ||
439 | J->trace[J->curtrace] = NULL; | ||
440 | if (J->curtrace < J->freetrace) | ||
441 | J->freetrace = J->curtrace; | ||
442 | J->curtrace = 0; | ||
443 | } | ||
444 | L->top--; /* Remove error object */ | ||
445 | if (e == LJ_TRERR_MCODEAL) | ||
446 | lj_trace_flushall(L); | ||
447 | return 0; | ||
448 | } | ||
449 | |||
450 | /* State machine for the trace compiler. Protected callback. */ | ||
451 | static TValue *trace_state(lua_State *L, lua_CFunction dummy, void *ud) | ||
452 | { | ||
453 | jit_State *J = (jit_State *)ud; | ||
454 | UNUSED(dummy); | ||
455 | do { | ||
456 | switch (J->state) { | ||
457 | case LJ_TRACE_START: | ||
458 | J->state = LJ_TRACE_RECORD; /* trace_start() may change state. */ | ||
459 | trace_start(J); | ||
460 | lj_dispatch_update(J2G(J)); | ||
461 | break; | ||
462 | |||
463 | case LJ_TRACE_RECORD: | ||
464 | setvmstate(J2G(J), RECORD); | ||
465 | lj_vmevent_send(L, RECORD, | ||
466 | setintV(L->top++, J->curtrace); | ||
467 | setfuncV(L, L->top++, J->fn); | ||
468 | setintV(L->top++, J->pc - J->pt->bc + 1); | ||
469 | setintV(L->top++, J->framedepth); | ||
470 | if (bcmode_mm(bc_op(*J->pc)) == MM_call) { | ||
471 | cTValue *o = &L->base[bc_a(*J->pc)]; | ||
472 | if (bc_op(*J->pc) == BC_ITERC) o -= 3; | ||
473 | copyTV(L, L->top++, o); | ||
474 | } | ||
475 | ); | ||
476 | lj_record_ins(J); | ||
477 | break; | ||
478 | |||
479 | case LJ_TRACE_END: | ||
480 | J->loopref = 0; | ||
481 | if ((J->flags & JIT_F_OPT_LOOP) && J->cur.link == J->curtrace) { | ||
482 | setvmstate(J2G(J), OPT); | ||
483 | lj_opt_dce(J); | ||
484 | if (lj_opt_loop(J)) { /* Loop optimization failed? */ | ||
485 | J->loopref = J->cur.nins; | ||
486 | J->state = LJ_TRACE_RECORD; /* Try to continue recording. */ | ||
487 | break; | ||
488 | } | ||
489 | J->loopref = J->chain[IR_LOOP]; /* Needed by assembler. */ | ||
490 | } | ||
491 | J->state = LJ_TRACE_ASM; | ||
492 | break; | ||
493 | |||
494 | case LJ_TRACE_ASM: | ||
495 | setvmstate(J2G(J), ASM); | ||
496 | lj_asm_trace(J, &J->cur); | ||
497 | trace_stop(J); | ||
498 | setvmstate(J2G(J), INTERP); | ||
499 | J->state = LJ_TRACE_IDLE; | ||
500 | lj_dispatch_update(J2G(J)); | ||
501 | return NULL; | ||
502 | |||
503 | default: /* Trace aborted asynchronously. */ | ||
504 | setintV(L->top++, (int32_t)LJ_TRERR_RECERR); | ||
505 | /* fallthrough */ | ||
506 | case LJ_TRACE_ERR: | ||
507 | if (trace_abort(J)) | ||
508 | break; /* Retry. */ | ||
509 | setvmstate(J2G(J), INTERP); | ||
510 | J->state = LJ_TRACE_IDLE; | ||
511 | lj_dispatch_update(J2G(J)); | ||
512 | return NULL; | ||
513 | } | ||
514 | } while (J->state > LJ_TRACE_RECORD); | ||
515 | return NULL; | ||
516 | } | ||
517 | |||
518 | /* -- Event handling ------------------------------------------------------ */ | ||
519 | |||
520 | /* A bytecode instruction is about to be executed. Record it. */ | ||
521 | void lj_trace_ins(jit_State *J) | ||
522 | { | ||
523 | while (lj_vm_cpcall(J->L, trace_state, NULL, (void *)J) != 0) | ||
524 | J->state = LJ_TRACE_ERR; | ||
525 | } | ||
526 | |||
527 | /* Start recording a new trace. */ | ||
528 | static void trace_new(jit_State *J) | ||
529 | { | ||
530 | /* Only start a new trace if not inside __gc call or vmevent. */ | ||
531 | if (!(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT))) { | ||
532 | lua_assert(J->state == LJ_TRACE_IDLE); | ||
533 | J->state = LJ_TRACE_START; | ||
534 | J->fn = curr_func(J->L); | ||
535 | J->pt = funcproto(J->fn); | ||
536 | lj_trace_ins(J); | ||
537 | } | ||
538 | } | ||
539 | |||
540 | /* A hotcount triggered. Start recording a root trace. */ | ||
541 | void lj_trace_hot(jit_State *J, const BCIns *pc) | ||
542 | { | ||
543 | lua_State *L = J->L; | ||
544 | L->top = curr_topL(L); /* Only called from Lua and NRESULTS is not used. */ | ||
545 | hotcount_set(J2GG(J), pc, J->param[JIT_P_hotloop]+1); /* Reset hotcount. */ | ||
546 | J->parent = 0; /* Root trace. */ | ||
547 | J->exitno = 0; | ||
548 | J->pc = pc-1; /* The interpreter bytecode PC is offset by 1. */ | ||
549 | trace_new(J); | ||
550 | } | ||
551 | |||
552 | /* A trace exited. Restore interpreter state and check for hot exits. */ | ||
553 | void *lj_trace_exit(jit_State *J, void *exptr) | ||
554 | { | ||
555 | lua_State *L = J->L; | ||
556 | void *cf; | ||
557 | |||
558 | /* Restore interpreter state. */ | ||
559 | lj_snap_restore(J, exptr); | ||
560 | cf = cframe_raw(L->cframe); | ||
561 | cframe_pc(cf) = J->pc; | ||
562 | |||
563 | lj_vmevent_send(L, TEXIT, | ||
564 | ExitState *ex = (ExitState *)exptr; | ||
565 | uint32_t i; | ||
566 | lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK); | ||
567 | setintV(L->top++, J->parent); | ||
568 | setintV(L->top++, J->exitno); | ||
569 | setintV(L->top++, RID_NUM_GPR); | ||
570 | setintV(L->top++, RID_NUM_FPR); | ||
571 | for (i = 0; i < RID_NUM_GPR; i++) | ||
572 | setintV(L->top++, ex->gpr[i]); | ||
573 | for (i = 0; i < RID_NUM_FPR; i++) { | ||
574 | setnumV(L->top, ex->fpr[i]); | ||
575 | if (LJ_UNLIKELY(tvisnan(L->top))) | ||
576 | setnanV(L->top); | ||
577 | L->top++; | ||
578 | } | ||
579 | ); | ||
580 | |||
581 | { /* Check for a hot exit. */ | ||
582 | SnapShot *snap = &J->trace[J->parent]->snap[J->exitno]; | ||
583 | if (snap->count != SNAPCOUNT_DONE && | ||
584 | ++snap->count >= J->param[JIT_P_hotexit]) | ||
585 | trace_new(J); /* Start recording a side trace. */ | ||
586 | } | ||
587 | |||
588 | return cf; /* Return the interpreter C frame. */ | ||
589 | } | ||
590 | |||
591 | #endif | ||
diff --git a/src/lj_trace.h b/src/lj_trace.h new file mode 100644 index 00000000..9d8eb790 --- /dev/null +++ b/src/lj_trace.h | |||
@@ -0,0 +1,52 @@ | |||
1 | /* | ||
2 | ** Trace management. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_TRACE_H | ||
7 | #define _LJ_TRACE_H | ||
8 | |||
9 | #if LJ_HASJIT | ||
10 | |||
11 | #include "lj_obj.h" | ||
12 | #include "lj_jit.h" | ||
13 | #include "lj_dispatch.h" | ||
14 | |||
15 | /* Trace errors. */ | ||
16 | typedef enum { | ||
17 | #define TREDEF(name, msg) LJ_TRERR_##name, | ||
18 | #include "lj_traceerr.h" | ||
19 | LJ_TRERR__MAX | ||
20 | } TraceError; | ||
21 | |||
22 | LJ_FUNC_NORET void lj_trace_err(jit_State *J, TraceError e); | ||
23 | LJ_FUNC_NORET void lj_trace_err_info(jit_State *J, TraceError e); | ||
24 | |||
25 | /* Trace management. */ | ||
26 | LJ_FUNC void lj_trace_freeproto(global_State *g, GCproto *pt); | ||
27 | LJ_FUNC void lj_trace_reenableproto(GCproto *pt); | ||
28 | LJ_FUNC void lj_trace_flushproto(global_State *g, GCproto *pt); | ||
29 | LJ_FUNC void lj_trace_flush(jit_State *J, TraceNo traceno); | ||
30 | LJ_FUNC int lj_trace_flushall(lua_State *L); | ||
31 | LJ_FUNC void lj_trace_freestate(global_State *g); | ||
32 | |||
33 | /* Event handling. */ | ||
34 | LJ_FUNC void lj_trace_ins(jit_State *J); | ||
35 | LJ_FUNCA void lj_trace_hot(jit_State *J, const BCIns *pc); | ||
36 | LJ_FUNCA void *lj_trace_exit(jit_State *J, void *exptr); | ||
37 | |||
38 | /* Signal asynchronous abort of trace or end of trace. */ | ||
39 | #define lj_trace_abort(g) (G2J(g)->state &= ~LJ_TRACE_ACTIVE) | ||
40 | #define lj_trace_end(J) (J->state = LJ_TRACE_END) | ||
41 | |||
42 | #else | ||
43 | |||
44 | #define lj_trace_flushall(L) (UNUSED(L), 0) | ||
45 | #define lj_trace_freestate(g) UNUSED(g) | ||
46 | #define lj_trace_freeproto(g, pt) (UNUSED(g), UNUSED(pt), (void)0) | ||
47 | #define lj_trace_abort(g) UNUSED(g) | ||
48 | #define lj_trace_end(J) UNUSED(J) | ||
49 | |||
50 | #endif | ||
51 | |||
52 | #endif | ||
diff --git a/src/lj_traceerr.h b/src/lj_traceerr.h new file mode 100644 index 00000000..2c8347b0 --- /dev/null +++ b/src/lj_traceerr.h | |||
@@ -0,0 +1,59 @@ | |||
1 | /* | ||
2 | ** Trace compiler error messages. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | /* This file may be included multiple times with different TREDEF macros. */ | ||
7 | |||
8 | /* Recording. */ | ||
9 | TREDEF(RECERR, "error thrown or hook called during recording") | ||
10 | TREDEF(TRACEOV, "trace too long") | ||
11 | TREDEF(STACKOV, "trace too deep") | ||
12 | TREDEF(SNAPOV, "too many snapshots") | ||
13 | TREDEF(NYIBC, "NYI: bytecode %d") | ||
14 | |||
15 | /* Recording loop ops. */ | ||
16 | TREDEF(LLEAVE, "leaving loop in root trace") | ||
17 | TREDEF(LINNER, "inner loop in root trace") | ||
18 | TREDEF(LUNROLL, "loop unroll limit reached") | ||
19 | TREDEF(LBLACKL, "blacklisted loop") | ||
20 | |||
21 | /* Recording calls/returns. */ | ||
22 | TREDEF(BADTYPE, "bad argument type") | ||
23 | TREDEF(CJITOFF, "call to JIT-disabled function") | ||
24 | TREDEF(CUNROLL, "call unroll limit reached") | ||
25 | TREDEF(NYIRECU, "NYI: recursive calls") | ||
26 | TREDEF(NYILNKF, "NYI: linking/patching function calls") | ||
27 | TREDEF(NYIVF, "NYI: vararg function") | ||
28 | TREDEF(NYICF, "NYI: C function %p") | ||
29 | TREDEF(NYIFF, "NYI: FastFunc %s") | ||
30 | TREDEF(NYIFFU, "NYI: unsupported variant of FastFunc %s") | ||
31 | TREDEF(NYIRETL, "NYI: return to lower frame") | ||
32 | |||
33 | /* Recording indexed load/store. */ | ||
34 | TREDEF(STORENN, "store with nil or NaN key") | ||
35 | TREDEF(NOMM, "missing metamethod") | ||
36 | TREDEF(IDXLOOP, "looping index lookup") | ||
37 | TREDEF(NYITMIX, "NYI: mixed sparse/dense table") | ||
38 | |||
39 | /* Optimizations. */ | ||
40 | TREDEF(GFAIL, "guard would always fail") | ||
41 | TREDEF(PHIOV, "too many PHIs") | ||
42 | TREDEF(TYPEINS, "persistent type instability") | ||
43 | |||
44 | /* Assembler. */ | ||
45 | TREDEF(MCODEAL, "failed to allocate mcode memory") | ||
46 | TREDEF(MCODEOV, "machine code too long") | ||
47 | TREDEF(MCODELM, "hit mcode limit (retrying)") | ||
48 | TREDEF(SPILLOV, "too many spill slots") | ||
49 | TREDEF(BADRA, "inconsistent register allocation") | ||
50 | TREDEF(NYIIR, "NYI: cannot assemble IR instruction %d") | ||
51 | TREDEF(NYIPHI, "NYI: PHI shuffling too complex") | ||
52 | TREDEF(NYICOAL, "NYI: register coalescing too complex") | ||
53 | TREDEF(NYIGCF, "NYI: gcstep sync with frames") | ||
54 | |||
55 | #undef TREDEF | ||
56 | |||
57 | /* Detecting unused error messages: | ||
58 | awk -F, '/^TREDEF/ { gsub(/TREDEF./, ""); printf "grep -q LJ_TRERR_%s *.[ch] || echo %s\n", $1, $1}' lj_traceerr.h | sh | ||
59 | */ | ||
diff --git a/src/lj_udata.c b/src/lj_udata.c new file mode 100644 index 00000000..863889c9 --- /dev/null +++ b/src/lj_udata.c | |||
@@ -0,0 +1,33 @@ | |||
1 | /* | ||
2 | ** Userdata handling. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #define lj_udata_c | ||
7 | #define LUA_CORE | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | #include "lj_gc.h" | ||
11 | #include "lj_udata.h" | ||
12 | |||
13 | GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env) | ||
14 | { | ||
15 | GCudata *ud = lj_mem_newt(L, sizeof(GCudata) + sz, GCudata); | ||
16 | global_State *g = G(L); | ||
17 | newwhite(g, ud); /* Not finalized. */ | ||
18 | ud->gct = ~LJ_TUDATA; | ||
19 | ud->len = sz; | ||
20 | /* NOBARRIER: The GCudata is new (marked white). */ | ||
21 | setgcrefnull(ud->metatable); | ||
22 | setgcref(ud->env, obj2gco(env)); | ||
23 | /* Chain to userdata list (after main thread). */ | ||
24 | setgcrefr(ud->nextgc, mainthread(g)->nextgc); | ||
25 | setgcref(mainthread(g)->nextgc, obj2gco(ud)); | ||
26 | return ud; | ||
27 | } | ||
28 | |||
29 | void LJ_FASTCALL lj_udata_free(global_State *g, GCudata *ud) | ||
30 | { | ||
31 | lj_mem_free(g, ud, sizeudata(ud)); | ||
32 | } | ||
33 | |||
diff --git a/src/lj_udata.h b/src/lj_udata.h new file mode 100644 index 00000000..981304f8 --- /dev/null +++ b/src/lj_udata.h | |||
@@ -0,0 +1,14 @@ | |||
1 | /* | ||
2 | ** Userdata handling. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_UDATA_H | ||
7 | #define _LJ_UDATA_H | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | |||
11 | LJ_FUNC GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env); | ||
12 | LJ_FUNC void LJ_FASTCALL lj_udata_free(global_State *g, GCudata *ud); | ||
13 | |||
14 | #endif | ||
diff --git a/src/lj_vm.h b/src/lj_vm.h new file mode 100644 index 00000000..f50614bb --- /dev/null +++ b/src/lj_vm.h | |||
@@ -0,0 +1,66 @@ | |||
1 | /* | ||
2 | ** Assembler VM interface definitions. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_VM_H | ||
7 | #define _LJ_VM_H | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | |||
11 | /* Entry points for ASM parts of VM. */ | ||
12 | LJ_ASMF void lj_vm_call(lua_State *L, TValue *base, int nres1); | ||
13 | LJ_ASMF int lj_vm_pcall(lua_State *L, TValue *base, int nres1, ptrdiff_t ef); | ||
14 | typedef TValue *(*lua_CPFunction)(lua_State *L, lua_CFunction func, void *ud); | ||
15 | LJ_ASMF int lj_vm_cpcall(lua_State *L, lua_CPFunction cp, lua_CFunction func, | ||
16 | void *ud); | ||
17 | LJ_ASMF int lj_vm_resume(lua_State *L, TValue *base, int nres1, ptrdiff_t ef); | ||
18 | LJ_ASMF_NORET void lj_vm_unwind_c(void *cframe, int errcode); | ||
19 | LJ_ASMF_NORET void lj_vm_unwind_ff(void *cframe); | ||
20 | |||
21 | /* Miscellaneous functions. */ | ||
22 | #if LJ_TARGET_X86ORX64 | ||
23 | LJ_ASMF int lj_vm_cpuid(uint32_t f, uint32_t res[4]); | ||
24 | #endif | ||
25 | LJ_ASMF double lj_vm_foldarith(double x, double y, int op); | ||
26 | LJ_ASMF double lj_vm_foldfpm(double x, int op); | ||
27 | |||
28 | /* Dispatch targets for recording and hooks. */ | ||
29 | LJ_ASMF void lj_vm_record(void); | ||
30 | LJ_ASMF void lj_vm_hook(void); | ||
31 | |||
32 | /* Trace exit handling. */ | ||
33 | LJ_ASMF void lj_vm_exit_handler(void); | ||
34 | LJ_ASMF void lj_vm_exit_interp(void); | ||
35 | |||
36 | /* Handlers callable from compiled code. */ | ||
37 | LJ_ASMF void lj_vm_floor(void); | ||
38 | LJ_ASMF void lj_vm_ceil(void); | ||
39 | LJ_ASMF void lj_vm_trunc(void); | ||
40 | LJ_ASMF void lj_vm_exp(void); | ||
41 | LJ_ASMF void lj_vm_exp2(void); | ||
42 | LJ_ASMF void lj_vm_pow(void); | ||
43 | LJ_ASMF void lj_vm_powi(void); | ||
44 | |||
45 | /* Call gates for functions. */ | ||
46 | LJ_ASMF void lj_gate_lf(void); | ||
47 | LJ_ASMF void lj_gate_lv(void); | ||
48 | LJ_ASMF void lj_gate_c(void); | ||
49 | |||
50 | /* Continuations for metamethods. */ | ||
51 | LJ_ASMF void lj_cont_cat(void); /* Continue with concatenation. */ | ||
52 | LJ_ASMF void lj_cont_ra(void); /* Store result in RA from instruction. */ | ||
53 | LJ_ASMF void lj_cont_nop(void); /* Do nothing, just continue execution. */ | ||
54 | LJ_ASMF void lj_cont_condt(void); /* Branch if result is true. */ | ||
55 | LJ_ASMF void lj_cont_condf(void); /* Branch if result is false. */ | ||
56 | |||
57 | /* Start of the ASM code. */ | ||
58 | LJ_ASMF void lj_vm_asm_begin(void); | ||
59 | |||
60 | /* Opcode handler offsets, relative to lj_vm_asm_begin. */ | ||
61 | LJ_ASMF const uint16_t lj_vm_op_ofs[]; | ||
62 | |||
63 | #define makeasmfunc(ofs) \ | ||
64 | ((ASMFunction)((char *)lj_vm_asm_begin + (ofs))) | ||
65 | |||
66 | #endif | ||
diff --git a/src/lj_vmevent.c b/src/lj_vmevent.c new file mode 100644 index 00000000..657eb8d7 --- /dev/null +++ b/src/lj_vmevent.c | |||
@@ -0,0 +1,56 @@ | |||
1 | /* | ||
2 | ** VM event handling. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #include <stdio.h> | ||
7 | |||
8 | #define lj_vmevent_c | ||
9 | #define LUA_CORE | ||
10 | |||
11 | #include "lj_obj.h" | ||
12 | #include "lj_str.h" | ||
13 | #include "lj_tab.h" | ||
14 | #include "lj_state.h" | ||
15 | #include "lj_dispatch.h" | ||
16 | #include "lj_vm.h" | ||
17 | #include "lj_vmevent.h" | ||
18 | |||
19 | ptrdiff_t lj_vmevent_prepare(lua_State *L, VMEvent ev) | ||
20 | { | ||
21 | global_State *g = G(L); | ||
22 | GCstr *s = lj_str_newlit(L, LJ_VMEVENTS_REGKEY); | ||
23 | cTValue *tv = lj_tab_getstr(tabV(registry(L)), s); | ||
24 | if (tvistab(tv)) { | ||
25 | int hash = VMEVENT_HASH(ev); | ||
26 | tv = lj_tab_getint(tabV(tv), hash); | ||
27 | if (tv && tvisfunc(tv)) { | ||
28 | lj_state_checkstack(L, LUA_MINSTACK); | ||
29 | setfuncV(L, L->top++, funcV(tv)); | ||
30 | return savestack(L, L->top); | ||
31 | } | ||
32 | } | ||
33 | g->vmevmask &= ~VMEVENT_MASK(ev); /* No handler: cache this fact. */ | ||
34 | return 0; | ||
35 | } | ||
36 | |||
37 | void lj_vmevent_call(lua_State *L, ptrdiff_t argbase) | ||
38 | { | ||
39 | global_State *g = G(L); | ||
40 | uint8_t oldmask = g->vmevmask; | ||
41 | uint8_t oldh = hook_save(g); | ||
42 | int status; | ||
43 | g->vmevmask = 0; /* Disable all events. */ | ||
44 | hook_vmevent(g); | ||
45 | status = lj_vm_pcall(L, restorestack(L, argbase), 0+1, 0); | ||
46 | if (LJ_UNLIKELY(status)) { | ||
47 | /* Really shouldn't use stderr here, but where else to complain? */ | ||
48 | L->top--; | ||
49 | fprintf(stderr, "VM handler failed: %s\n", | ||
50 | tvisstr(L->top) ? strVdata(L->top) : "?"); | ||
51 | } | ||
52 | hook_restore(g, oldh); | ||
53 | if (g->vmevmask != VMEVENT_NOCACHE) | ||
54 | g->vmevmask = oldmask; /* Restore event mask, but not if not modified. */ | ||
55 | } | ||
56 | |||
diff --git a/src/lj_vmevent.h b/src/lj_vmevent.h new file mode 100644 index 00000000..9eaa52e1 --- /dev/null +++ b/src/lj_vmevent.h | |||
@@ -0,0 +1,49 @@ | |||
1 | /* | ||
2 | ** VM event handling. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_VMEVENT_H | ||
7 | #define _LJ_VMEVENT_H | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | |||
11 | /* Registry key for VM event handler table. */ | ||
12 | #define LJ_VMEVENTS_REGKEY "_VMEVENTS" | ||
13 | #define LJ_VMEVENTS_HSIZE 4 | ||
14 | |||
15 | #define VMEVENT_MASK(ev) ((uint8_t)1 << ((int)(ev) & 7)) | ||
16 | #define VMEVENT_HASH(ev) ((int)(ev) & ~7) | ||
17 | #define VMEVENT_HASHIDX(h) ((int)(h) << 3) | ||
18 | #define VMEVENT_NOCACHE 255 | ||
19 | |||
20 | #define VMEVENT_DEF(name, hash) \ | ||
21 | LJ_VMEVENT_##name##_, \ | ||
22 | LJ_VMEVENT_##name = ((LJ_VMEVENT_##name##_) & 7)|((hash) << 3) | ||
23 | |||
24 | /* VM event IDs. */ | ||
25 | typedef enum { | ||
26 | VMEVENT_DEF(BC, 0x0000140b), | ||
27 | VMEVENT_DEF(TRACE, 0x10ea574d), | ||
28 | VMEVENT_DEF(RECORD, 0x5698231c), | ||
29 | VMEVENT_DEF(TEXIT, 0x12d984a7), | ||
30 | LJ_VMEVENT__MAX | ||
31 | } VMEvent; | ||
32 | |||
33 | #ifdef LUAJIT_DISABLE_VMEVENT | ||
34 | #define lj_vmevent_send(L, ev, args) UNUSED(L) | ||
35 | #else | ||
36 | #define lj_vmevent_send(L, ev, args) \ | ||
37 | if (G(L)->vmevmask & VMEVENT_MASK(LJ_VMEVENT_##ev)) { \ | ||
38 | ptrdiff_t argbase = lj_vmevent_prepare(L, LJ_VMEVENT_##ev); \ | ||
39 | if (argbase) { \ | ||
40 | args \ | ||
41 | lj_vmevent_call(L, argbase); \ | ||
42 | } \ | ||
43 | } | ||
44 | |||
45 | LJ_FUNC ptrdiff_t lj_vmevent_prepare(lua_State *L, VMEvent ev); | ||
46 | LJ_FUNC void lj_vmevent_call(lua_State *L, ptrdiff_t argbase); | ||
47 | #endif | ||
48 | |||
49 | #endif | ||
diff --git a/src/ljamalg.c b/src/ljamalg.c new file mode 100644 index 00000000..46d0e21f --- /dev/null +++ b/src/ljamalg.c | |||
@@ -0,0 +1,70 @@ | |||
1 | /* | ||
2 | ** LuaJIT core and libraries amalgamation. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | /* | ||
7 | +--------------------------------------------------------------------------+ | ||
8 | | WARNING: Compiling the amalgamation needs a lot of virtual memory | | ||
9 | | (around 160 MB with GCC 4.x)! If you don't have enough physical memory | | ||
10 | | your machine will start swapping to disk and the compile will not finish | | ||
11 | | within a reasonable amount of time. | | ||
12 | | So either compile on a bigger machine or use the non-amalgamated build. | | ||
13 | +--------------------------------------------------------------------------+ | ||
14 | */ | ||
15 | |||
16 | #define ljamalg_c | ||
17 | #define LUA_CORE | ||
18 | |||
19 | /* To get the mremap prototype. Must be defind before any system includes. */ | ||
20 | #if defined(__linux__) && !defined(_GNU_SOURCE) | ||
21 | #define _GNU_SOURCE | ||
22 | #endif | ||
23 | |||
24 | #include "lua.h" | ||
25 | #include "lauxlib.h" | ||
26 | |||
27 | #include "lj_gc.c" | ||
28 | #include "lj_err.c" | ||
29 | #include "lj_ctype.c" | ||
30 | #include "lj_bc.c" | ||
31 | #include "lj_obj.c" | ||
32 | #include "lj_str.c" | ||
33 | #include "lj_tab.c" | ||
34 | #include "lj_func.c" | ||
35 | #include "lj_udata.c" | ||
36 | #include "lj_meta.c" | ||
37 | #include "lj_state.c" | ||
38 | #include "lj_dispatch.c" | ||
39 | #include "lj_vmevent.c" | ||
40 | #include "lj_api.c" | ||
41 | #include "lj_lex.c" | ||
42 | #include "lj_parse.c" | ||
43 | #include "lj_lib.c" | ||
44 | #include "lj_ir.c" | ||
45 | #include "lj_opt_mem.c" | ||
46 | #include "lj_opt_fold.c" | ||
47 | #include "lj_opt_narrow.c" | ||
48 | #include "lj_opt_dce.c" | ||
49 | #include "lj_opt_loop.c" | ||
50 | #include "lj_mcode.c" | ||
51 | #include "lj_snap.c" | ||
52 | #include "lj_record.c" | ||
53 | #include "lj_asm.c" | ||
54 | #include "lj_trace.c" | ||
55 | #include "lj_gdbjit.c" | ||
56 | #include "lj_alloc.c" | ||
57 | |||
58 | #include "lib_aux.c" | ||
59 | #include "lib_base.c" | ||
60 | #include "lib_math.c" | ||
61 | #include "lib_string.c" | ||
62 | #include "lib_table.c" | ||
63 | #include "lib_io.c" | ||
64 | #include "lib_os.c" | ||
65 | #include "lib_package.c" | ||
66 | #include "lib_debug.c" | ||
67 | #include "lib_bit.c" | ||
68 | #include "lib_jit.c" | ||
69 | #include "lib_init.c" | ||
70 | |||
diff --git a/src/lua.h b/src/lua.h new file mode 100644 index 00000000..0e98b374 --- /dev/null +++ b/src/lua.h | |||
@@ -0,0 +1,388 @@ | |||
1 | /* | ||
2 | ** $Id: lua.h,v 1.218.1.5 2008/08/06 13:30:12 roberto Exp $ | ||
3 | ** Lua - An Extensible Extension Language | ||
4 | ** Lua.org, PUC-Rio, Brazil (http://www.lua.org) | ||
5 | ** See Copyright Notice at the end of this file | ||
6 | */ | ||
7 | |||
8 | |||
9 | #ifndef lua_h | ||
10 | #define lua_h | ||
11 | |||
12 | #include <stdarg.h> | ||
13 | #include <stddef.h> | ||
14 | |||
15 | |||
16 | #include "luaconf.h" | ||
17 | |||
18 | |||
19 | #define LUA_VERSION "Lua 5.1" | ||
20 | #define LUA_RELEASE "Lua 5.1.4" | ||
21 | #define LUA_VERSION_NUM 501 | ||
22 | #define LUA_COPYRIGHT "Copyright (C) 1994-2008 Lua.org, PUC-Rio" | ||
23 | #define LUA_AUTHORS "R. Ierusalimschy, L. H. de Figueiredo & W. Celes" | ||
24 | |||
25 | |||
26 | /* mark for precompiled code (`<esc>Lua') */ | ||
27 | #define LUA_SIGNATURE "\033Lua" | ||
28 | |||
29 | /* option for multiple returns in `lua_pcall' and `lua_call' */ | ||
30 | #define LUA_MULTRET (-1) | ||
31 | |||
32 | |||
33 | /* | ||
34 | ** pseudo-indices | ||
35 | */ | ||
36 | #define LUA_REGISTRYINDEX (-10000) | ||
37 | #define LUA_ENVIRONINDEX (-10001) | ||
38 | #define LUA_GLOBALSINDEX (-10002) | ||
39 | #define lua_upvalueindex(i) (LUA_GLOBALSINDEX-(i)) | ||
40 | |||
41 | |||
42 | /* thread status; 0 is OK */ | ||
43 | #define LUA_YIELD 1 | ||
44 | #define LUA_ERRRUN 2 | ||
45 | #define LUA_ERRSYNTAX 3 | ||
46 | #define LUA_ERRMEM 4 | ||
47 | #define LUA_ERRERR 5 | ||
48 | |||
49 | |||
50 | typedef struct lua_State lua_State; | ||
51 | |||
52 | typedef int (*lua_CFunction) (lua_State *L); | ||
53 | |||
54 | |||
55 | /* | ||
56 | ** functions that read/write blocks when loading/dumping Lua chunks | ||
57 | */ | ||
58 | typedef const char * (*lua_Reader) (lua_State *L, void *ud, size_t *sz); | ||
59 | |||
60 | typedef int (*lua_Writer) (lua_State *L, const void* p, size_t sz, void* ud); | ||
61 | |||
62 | |||
63 | /* | ||
64 | ** prototype for memory-allocation functions | ||
65 | */ | ||
66 | typedef void * (*lua_Alloc) (void *ud, void *ptr, size_t osize, size_t nsize); | ||
67 | |||
68 | |||
69 | /* | ||
70 | ** basic types | ||
71 | */ | ||
72 | #define LUA_TNONE (-1) | ||
73 | |||
74 | #define LUA_TNIL 0 | ||
75 | #define LUA_TBOOLEAN 1 | ||
76 | #define LUA_TLIGHTUSERDATA 2 | ||
77 | #define LUA_TNUMBER 3 | ||
78 | #define LUA_TSTRING 4 | ||
79 | #define LUA_TTABLE 5 | ||
80 | #define LUA_TFUNCTION 6 | ||
81 | #define LUA_TUSERDATA 7 | ||
82 | #define LUA_TTHREAD 8 | ||
83 | |||
84 | |||
85 | |||
86 | /* minimum Lua stack available to a C function */ | ||
87 | #define LUA_MINSTACK 20 | ||
88 | |||
89 | |||
90 | /* | ||
91 | ** generic extra include file | ||
92 | */ | ||
93 | #if defined(LUA_USER_H) | ||
94 | #include LUA_USER_H | ||
95 | #endif | ||
96 | |||
97 | |||
98 | /* type of numbers in Lua */ | ||
99 | typedef LUA_NUMBER lua_Number; | ||
100 | |||
101 | |||
102 | /* type for integer functions */ | ||
103 | typedef LUA_INTEGER lua_Integer; | ||
104 | |||
105 | |||
106 | |||
107 | /* | ||
108 | ** state manipulation | ||
109 | */ | ||
110 | LUA_API lua_State *(lua_newstate) (lua_Alloc f, void *ud); | ||
111 | LUA_API void (lua_close) (lua_State *L); | ||
112 | LUA_API lua_State *(lua_newthread) (lua_State *L); | ||
113 | |||
114 | LUA_API lua_CFunction (lua_atpanic) (lua_State *L, lua_CFunction panicf); | ||
115 | |||
116 | |||
117 | /* | ||
118 | ** basic stack manipulation | ||
119 | */ | ||
120 | LUA_API int (lua_gettop) (lua_State *L); | ||
121 | LUA_API void (lua_settop) (lua_State *L, int idx); | ||
122 | LUA_API void (lua_pushvalue) (lua_State *L, int idx); | ||
123 | LUA_API void (lua_remove) (lua_State *L, int idx); | ||
124 | LUA_API void (lua_insert) (lua_State *L, int idx); | ||
125 | LUA_API void (lua_replace) (lua_State *L, int idx); | ||
126 | LUA_API int (lua_checkstack) (lua_State *L, int sz); | ||
127 | |||
128 | LUA_API void (lua_xmove) (lua_State *from, lua_State *to, int n); | ||
129 | |||
130 | |||
131 | /* | ||
132 | ** access functions (stack -> C) | ||
133 | */ | ||
134 | |||
135 | LUA_API int (lua_isnumber) (lua_State *L, int idx); | ||
136 | LUA_API int (lua_isstring) (lua_State *L, int idx); | ||
137 | LUA_API int (lua_iscfunction) (lua_State *L, int idx); | ||
138 | LUA_API int (lua_isuserdata) (lua_State *L, int idx); | ||
139 | LUA_API int (lua_type) (lua_State *L, int idx); | ||
140 | LUA_API const char *(lua_typename) (lua_State *L, int tp); | ||
141 | |||
142 | LUA_API int (lua_equal) (lua_State *L, int idx1, int idx2); | ||
143 | LUA_API int (lua_rawequal) (lua_State *L, int idx1, int idx2); | ||
144 | LUA_API int (lua_lessthan) (lua_State *L, int idx1, int idx2); | ||
145 | |||
146 | LUA_API lua_Number (lua_tonumber) (lua_State *L, int idx); | ||
147 | LUA_API lua_Integer (lua_tointeger) (lua_State *L, int idx); | ||
148 | LUA_API int (lua_toboolean) (lua_State *L, int idx); | ||
149 | LUA_API const char *(lua_tolstring) (lua_State *L, int idx, size_t *len); | ||
150 | LUA_API size_t (lua_objlen) (lua_State *L, int idx); | ||
151 | LUA_API lua_CFunction (lua_tocfunction) (lua_State *L, int idx); | ||
152 | LUA_API void *(lua_touserdata) (lua_State *L, int idx); | ||
153 | LUA_API lua_State *(lua_tothread) (lua_State *L, int idx); | ||
154 | LUA_API const void *(lua_topointer) (lua_State *L, int idx); | ||
155 | |||
156 | |||
157 | /* | ||
158 | ** push functions (C -> stack) | ||
159 | */ | ||
160 | LUA_API void (lua_pushnil) (lua_State *L); | ||
161 | LUA_API void (lua_pushnumber) (lua_State *L, lua_Number n); | ||
162 | LUA_API void (lua_pushinteger) (lua_State *L, lua_Integer n); | ||
163 | LUA_API void (lua_pushlstring) (lua_State *L, const char *s, size_t l); | ||
164 | LUA_API void (lua_pushstring) (lua_State *L, const char *s); | ||
165 | LUA_API const char *(lua_pushvfstring) (lua_State *L, const char *fmt, | ||
166 | va_list argp); | ||
167 | LUA_API const char *(lua_pushfstring) (lua_State *L, const char *fmt, ...); | ||
168 | LUA_API void (lua_pushcclosure) (lua_State *L, lua_CFunction fn, int n); | ||
169 | LUA_API void (lua_pushboolean) (lua_State *L, int b); | ||
170 | LUA_API void (lua_pushlightuserdata) (lua_State *L, void *p); | ||
171 | LUA_API int (lua_pushthread) (lua_State *L); | ||
172 | |||
173 | |||
174 | /* | ||
175 | ** get functions (Lua -> stack) | ||
176 | */ | ||
177 | LUA_API void (lua_gettable) (lua_State *L, int idx); | ||
178 | LUA_API void (lua_getfield) (lua_State *L, int idx, const char *k); | ||
179 | LUA_API void (lua_rawget) (lua_State *L, int idx); | ||
180 | LUA_API void (lua_rawgeti) (lua_State *L, int idx, int n); | ||
181 | LUA_API void (lua_createtable) (lua_State *L, int narr, int nrec); | ||
182 | LUA_API void *(lua_newuserdata) (lua_State *L, size_t sz); | ||
183 | LUA_API int (lua_getmetatable) (lua_State *L, int objindex); | ||
184 | LUA_API void (lua_getfenv) (lua_State *L, int idx); | ||
185 | |||
186 | |||
187 | /* | ||
188 | ** set functions (stack -> Lua) | ||
189 | */ | ||
190 | LUA_API void (lua_settable) (lua_State *L, int idx); | ||
191 | LUA_API void (lua_setfield) (lua_State *L, int idx, const char *k); | ||
192 | LUA_API void (lua_rawset) (lua_State *L, int idx); | ||
193 | LUA_API void (lua_rawseti) (lua_State *L, int idx, int n); | ||
194 | LUA_API int (lua_setmetatable) (lua_State *L, int objindex); | ||
195 | LUA_API int (lua_setfenv) (lua_State *L, int idx); | ||
196 | |||
197 | |||
198 | /* | ||
199 | ** `load' and `call' functions (load and run Lua code) | ||
200 | */ | ||
201 | LUA_API void (lua_call) (lua_State *L, int nargs, int nresults); | ||
202 | LUA_API int (lua_pcall) (lua_State *L, int nargs, int nresults, int errfunc); | ||
203 | LUA_API int (lua_cpcall) (lua_State *L, lua_CFunction func, void *ud); | ||
204 | LUA_API int (lua_load) (lua_State *L, lua_Reader reader, void *dt, | ||
205 | const char *chunkname); | ||
206 | |||
207 | LUA_API int (lua_dump) (lua_State *L, lua_Writer writer, void *data); | ||
208 | |||
209 | |||
210 | /* | ||
211 | ** coroutine functions | ||
212 | */ | ||
213 | LUA_API int (lua_yield) (lua_State *L, int nresults); | ||
214 | LUA_API int (lua_resume) (lua_State *L, int narg); | ||
215 | LUA_API int (lua_status) (lua_State *L); | ||
216 | |||
217 | /* | ||
218 | ** garbage-collection function and options | ||
219 | */ | ||
220 | |||
221 | #define LUA_GCSTOP 0 | ||
222 | #define LUA_GCRESTART 1 | ||
223 | #define LUA_GCCOLLECT 2 | ||
224 | #define LUA_GCCOUNT 3 | ||
225 | #define LUA_GCCOUNTB 4 | ||
226 | #define LUA_GCSTEP 5 | ||
227 | #define LUA_GCSETPAUSE 6 | ||
228 | #define LUA_GCSETSTEPMUL 7 | ||
229 | |||
230 | LUA_API int (lua_gc) (lua_State *L, int what, int data); | ||
231 | |||
232 | |||
233 | /* | ||
234 | ** miscellaneous functions | ||
235 | */ | ||
236 | |||
237 | LUA_API int (lua_error) (lua_State *L); | ||
238 | |||
239 | LUA_API int (lua_next) (lua_State *L, int idx); | ||
240 | |||
241 | LUA_API void (lua_concat) (lua_State *L, int n); | ||
242 | |||
243 | LUA_API lua_Alloc (lua_getallocf) (lua_State *L, void **ud); | ||
244 | LUA_API void lua_setallocf (lua_State *L, lua_Alloc f, void *ud); | ||
245 | |||
246 | |||
247 | |||
248 | /* | ||
249 | ** =============================================================== | ||
250 | ** some useful macros | ||
251 | ** =============================================================== | ||
252 | */ | ||
253 | |||
254 | #define lua_pop(L,n) lua_settop(L, -(n)-1) | ||
255 | |||
256 | #define lua_newtable(L) lua_createtable(L, 0, 0) | ||
257 | |||
258 | #define lua_register(L,n,f) (lua_pushcfunction(L, (f)), lua_setglobal(L, (n))) | ||
259 | |||
260 | #define lua_pushcfunction(L,f) lua_pushcclosure(L, (f), 0) | ||
261 | |||
262 | #define lua_strlen(L,i) lua_objlen(L, (i)) | ||
263 | |||
264 | #define lua_isfunction(L,n) (lua_type(L, (n)) == LUA_TFUNCTION) | ||
265 | #define lua_istable(L,n) (lua_type(L, (n)) == LUA_TTABLE) | ||
266 | #define lua_islightuserdata(L,n) (lua_type(L, (n)) == LUA_TLIGHTUSERDATA) | ||
267 | #define lua_isnil(L,n) (lua_type(L, (n)) == LUA_TNIL) | ||
268 | #define lua_isboolean(L,n) (lua_type(L, (n)) == LUA_TBOOLEAN) | ||
269 | #define lua_isthread(L,n) (lua_type(L, (n)) == LUA_TTHREAD) | ||
270 | #define lua_isnone(L,n) (lua_type(L, (n)) == LUA_TNONE) | ||
271 | #define lua_isnoneornil(L, n) (lua_type(L, (n)) <= 0) | ||
272 | |||
273 | #define lua_pushliteral(L, s) \ | ||
274 | lua_pushlstring(L, "" s, (sizeof(s)/sizeof(char))-1) | ||
275 | |||
276 | #define lua_setglobal(L,s) lua_setfield(L, LUA_GLOBALSINDEX, (s)) | ||
277 | #define lua_getglobal(L,s) lua_getfield(L, LUA_GLOBALSINDEX, (s)) | ||
278 | |||
279 | #define lua_tostring(L,i) lua_tolstring(L, (i), NULL) | ||
280 | |||
281 | |||
282 | |||
283 | /* | ||
284 | ** compatibility macros and functions | ||
285 | */ | ||
286 | |||
287 | #define lua_open() luaL_newstate() | ||
288 | |||
289 | #define lua_getregistry(L) lua_pushvalue(L, LUA_REGISTRYINDEX) | ||
290 | |||
291 | #define lua_getgccount(L) lua_gc(L, LUA_GCCOUNT, 0) | ||
292 | |||
293 | #define lua_Chunkreader lua_Reader | ||
294 | #define lua_Chunkwriter lua_Writer | ||
295 | |||
296 | |||
297 | /* hack */ | ||
298 | LUA_API void lua_setlevel (lua_State *from, lua_State *to); | ||
299 | |||
300 | |||
301 | /* | ||
302 | ** {====================================================================== | ||
303 | ** Debug API | ||
304 | ** ======================================================================= | ||
305 | */ | ||
306 | |||
307 | |||
308 | /* | ||
309 | ** Event codes | ||
310 | */ | ||
311 | #define LUA_HOOKCALL 0 | ||
312 | #define LUA_HOOKRET 1 | ||
313 | #define LUA_HOOKLINE 2 | ||
314 | #define LUA_HOOKCOUNT 3 | ||
315 | #define LUA_HOOKTAILRET 4 | ||
316 | |||
317 | |||
318 | /* | ||
319 | ** Event masks | ||
320 | */ | ||
321 | #define LUA_MASKCALL (1 << LUA_HOOKCALL) | ||
322 | #define LUA_MASKRET (1 << LUA_HOOKRET) | ||
323 | #define LUA_MASKLINE (1 << LUA_HOOKLINE) | ||
324 | #define LUA_MASKCOUNT (1 << LUA_HOOKCOUNT) | ||
325 | |||
326 | typedef struct lua_Debug lua_Debug; /* activation record */ | ||
327 | |||
328 | |||
329 | /* Functions to be called by the debuger in specific events */ | ||
330 | typedef void (*lua_Hook) (lua_State *L, lua_Debug *ar); | ||
331 | |||
332 | |||
333 | LUA_API int lua_getstack (lua_State *L, int level, lua_Debug *ar); | ||
334 | LUA_API int lua_getinfo (lua_State *L, const char *what, lua_Debug *ar); | ||
335 | LUA_API const char *lua_getlocal (lua_State *L, const lua_Debug *ar, int n); | ||
336 | LUA_API const char *lua_setlocal (lua_State *L, const lua_Debug *ar, int n); | ||
337 | LUA_API const char *lua_getupvalue (lua_State *L, int funcindex, int n); | ||
338 | LUA_API const char *lua_setupvalue (lua_State *L, int funcindex, int n); | ||
339 | |||
340 | LUA_API int lua_sethook (lua_State *L, lua_Hook func, int mask, int count); | ||
341 | LUA_API lua_Hook lua_gethook (lua_State *L); | ||
342 | LUA_API int lua_gethookmask (lua_State *L); | ||
343 | LUA_API int lua_gethookcount (lua_State *L); | ||
344 | |||
345 | |||
346 | struct lua_Debug { | ||
347 | int event; | ||
348 | const char *name; /* (n) */ | ||
349 | const char *namewhat; /* (n) `global', `local', `field', `method' */ | ||
350 | const char *what; /* (S) `Lua', `C', `main', `tail' */ | ||
351 | const char *source; /* (S) */ | ||
352 | int currentline; /* (l) */ | ||
353 | int nups; /* (u) number of upvalues */ | ||
354 | int linedefined; /* (S) */ | ||
355 | int lastlinedefined; /* (S) */ | ||
356 | char short_src[LUA_IDSIZE]; /* (S) */ | ||
357 | /* private part */ | ||
358 | int i_ci; /* active function */ | ||
359 | }; | ||
360 | |||
361 | /* }====================================================================== */ | ||
362 | |||
363 | |||
364 | /****************************************************************************** | ||
365 | * Copyright (C) 1994-2008 Lua.org, PUC-Rio. All rights reserved. | ||
366 | * | ||
367 | * Permission is hereby granted, free of charge, to any person obtaining | ||
368 | * a copy of this software and associated documentation files (the | ||
369 | * "Software"), to deal in the Software without restriction, including | ||
370 | * without limitation the rights to use, copy, modify, merge, publish, | ||
371 | * distribute, sublicense, and/or sell copies of the Software, and to | ||
372 | * permit persons to whom the Software is furnished to do so, subject to | ||
373 | * the following conditions: | ||
374 | * | ||
375 | * The above copyright notice and this permission notice shall be | ||
376 | * included in all copies or substantial portions of the Software. | ||
377 | * | ||
378 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
379 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
380 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | ||
381 | * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | ||
382 | * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
383 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
384 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
385 | ******************************************************************************/ | ||
386 | |||
387 | |||
388 | #endif | ||
diff --git a/src/lua.hpp b/src/lua.hpp new file mode 100644 index 00000000..ec417f59 --- /dev/null +++ b/src/lua.hpp | |||
@@ -0,0 +1,9 @@ | |||
1 | // lua.hpp | ||
2 | // Lua header files for C++ | ||
3 | // <<extern "C">> not supplied automatically because Lua also compiles as C++ | ||
4 | |||
5 | extern "C" { | ||
6 | #include "lua.h" | ||
7 | #include "lualib.h" | ||
8 | #include "lauxlib.h" | ||
9 | } | ||
diff --git a/src/luaconf.h b/src/luaconf.h new file mode 100644 index 00000000..4d4f1099 --- /dev/null +++ b/src/luaconf.h | |||
@@ -0,0 +1,133 @@ | |||
1 | /* | ||
2 | ** Configuration header. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef luaconf_h | ||
7 | #define luaconf_h | ||
8 | |||
9 | #include <limits.h> | ||
10 | #include <stddef.h> | ||
11 | |||
12 | /* Try to determine supported features for a couple of standard platforms. */ | ||
13 | #if defined(_WIN32) | ||
14 | #define LUA_USE_WIN | ||
15 | #define LUA_DL_DLL | ||
16 | #elif defined(__linux__) || defined(__solaris__) || defined(__CYGWIN__) || \ | ||
17 | defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ | ||
18 | (defined(__MACH__) && defined(__APPLE__)) | ||
19 | #define LUA_USE_POSIX | ||
20 | #define LUA_DL_DLOPEN | ||
21 | #endif | ||
22 | |||
23 | /* Default path for loading Lua and C modules with require(). */ | ||
24 | #ifdef LUA_USE_WIN | ||
25 | /* | ||
26 | ** In Windows, any exclamation mark ('!') in the path is replaced by the | ||
27 | ** path of the directory of the executable file of the current process. | ||
28 | */ | ||
29 | #define LUA_LDIR "!\\lua\\" | ||
30 | #define LUA_CDIR "!\\" | ||
31 | #define LUA_PATH_DEFAULT \ | ||
32 | ".\\?.lua;" LUA_LDIR"?.lua;" LUA_LDIR"?\\init.lua;" | ||
33 | #define LUA_CPATH_DEFAULT \ | ||
34 | ".\\?.dll;" LUA_CDIR"?.dll;" LUA_CDIR"loadall.dll" | ||
35 | #else | ||
36 | #define LUA_ROOT "/usr/local/" | ||
37 | #define LUA_JDIR LUA_ROOT "share/luajit-2.0.0-beta1/" | ||
38 | #define LUA_LDIR LUA_ROOT "share/lua/5.1/" | ||
39 | #define LUA_CDIR LUA_ROOT "lib/lua/5.1/" | ||
40 | #define LUA_PATH_DEFAULT \ | ||
41 | "./?.lua;" LUA_JDIR"?.lua;" LUA_LDIR"?.lua;" LUA_LDIR"?/init.lua;" | ||
42 | #define LUA_CPATH_DEFAULT \ | ||
43 | "./?.so;" LUA_CDIR"?.so;" LUA_CDIR"loadall.so" | ||
44 | #endif | ||
45 | |||
46 | /* Environment variable names for path overrides and initialization code. */ | ||
47 | #define LUA_PATH "LUA_PATH" | ||
48 | #define LUA_CPATH "LUA_CPATH" | ||
49 | #define LUA_INIT "LUA_INIT" | ||
50 | |||
51 | /* Special file system characters. */ | ||
52 | #ifdef LUA_USE_WIN | ||
53 | #define LUA_DIRSEP "\\" | ||
54 | #else | ||
55 | #define LUA_DIRSEP "/" | ||
56 | #endif | ||
57 | #define LUA_PATHSEP ";" | ||
58 | #define LUA_PATH_MARK "?" | ||
59 | #define LUA_EXECDIR "!" | ||
60 | #define LUA_IGMARK "-" | ||
61 | #define LUA_PATH_CONFIG \ | ||
62 | LUA_DIRSEP "\n" LUA_PATHSEP "\n" LUA_PATH_MARK "\n" \ | ||
63 | LUA_EXECDIR "\n" LUA_IGMARK | ||
64 | |||
65 | /* Quoting in error messages. */ | ||
66 | #define LUA_QL(x) "'" x "'" | ||
67 | #define LUA_QS LUA_QL("%s") | ||
68 | |||
69 | /* Various tunables. */ | ||
70 | #define LUAI_MAXSTACK 65500 /* Max. # of stack slots for a thread (<64K). */ | ||
71 | #define LUAI_MAXCSTACK 8000 /* Max. # of stack slots for a C func (<10K). */ | ||
72 | #define LUAI_GCPAUSE 200 /* Pause GC until memory is at 200%. */ | ||
73 | #define LUAI_GCMUL 200 /* Run GC at 200% of allocation speed. */ | ||
74 | #define LUA_MAXCAPTURES 32 /* Max. pattern captures. */ | ||
75 | |||
76 | /* Compatibility with older library function names. */ | ||
77 | #define LUA_COMPAT_MOD /* OLD: math.mod, NEW: math.fmod */ | ||
78 | #define LUA_COMPAT_GFIND /* OLD: string.gfind, NEW: string.gmatch */ | ||
79 | |||
80 | /* Configuration for the frontend (the luajit executable). */ | ||
81 | #if defined(luajit_c) | ||
82 | #define LUA_PROGNAME "luajit" /* Fallback frontend name. */ | ||
83 | #define LUA_PROMPT "> " /* Interactive prompt. */ | ||
84 | #define LUA_PROMPT2 ">> " /* Continuation prompt. */ | ||
85 | #define LUA_MAXINPUT 512 /* Max. input line length. */ | ||
86 | #endif | ||
87 | |||
88 | /* Note: changing the following defines breaks the Lua 5.1 ABI. */ | ||
89 | #define LUA_INTEGER ptrdiff_t | ||
90 | #define LUA_IDSIZE 60 /* Size of lua_Debug.short_src. */ | ||
91 | #define LUAL_BUFFERSIZE BUFSIZ /* Size of lauxlib and io.* buffers. */ | ||
92 | |||
93 | /* The following defines are here only for compatibility with luaconf.h | ||
94 | ** from the standard Lua distribution. They must not be changed for LuaJIT. | ||
95 | */ | ||
96 | #define LUA_NUMBER_DOUBLE | ||
97 | #define LUA_NUMBER double | ||
98 | #define LUAI_UACNUMBER double | ||
99 | #define LUA_NUMBER_SCAN "%lf" | ||
100 | #define LUA_NUMBER_FMT "%.14g" | ||
101 | #define lua_number2str(s, n) sprintf((s), LUA_NUMBER_FMT, (n)) | ||
102 | #define LUAI_MAXNUMBER2STR 32 | ||
103 | #define lua_str2number(s, p) strtod((s), (p)) | ||
104 | #define LUA_INTFRMLEN "l" | ||
105 | #define LUA_INTFRM_T long | ||
106 | |||
107 | /* Linkage of public API functions. */ | ||
108 | #if defined(LUA_BUILD_AS_DLL) | ||
109 | #if defined(LUA_CORE) || defined(LUA_LIB) | ||
110 | #define LUA_API __declspec(dllexport) | ||
111 | #else | ||
112 | #define LUA_API __declspec(dllimport) | ||
113 | #endif | ||
114 | #else | ||
115 | #define LUA_API extern | ||
116 | #endif | ||
117 | |||
118 | #define LUALIB_API LUA_API | ||
119 | |||
120 | /* Support for internal assertions. */ | ||
121 | #if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK) | ||
122 | #include <assert.h> | ||
123 | #endif | ||
124 | #ifdef LUA_USE_ASSERT | ||
125 | #define lua_assert(x) assert(x) | ||
126 | #endif | ||
127 | #ifdef LUA_USE_APICHECK | ||
128 | #define luai_apicheck(L, o) { (void)L; assert(o); } | ||
129 | #else | ||
130 | #define luai_apicheck(L, o) { (void)L; } | ||
131 | #endif | ||
132 | |||
133 | #endif | ||
diff --git a/src/luajit.c b/src/luajit.c new file mode 100644 index 00000000..9153975b --- /dev/null +++ b/src/luajit.c | |||
@@ -0,0 +1,519 @@ | |||
1 | /* | ||
2 | ** LuaJIT frontend. Runs commands, scripts, read-eval-print (REPL) etc. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** | ||
5 | ** Major portions taken verbatim or adapted from the Lua interpreter. | ||
6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h | ||
7 | */ | ||
8 | |||
9 | #include <signal.h> | ||
10 | #include <stdio.h> | ||
11 | #include <stdlib.h> | ||
12 | #include <string.h> | ||
13 | |||
14 | #define luajit_c | ||
15 | |||
16 | #include "lua.h" | ||
17 | #include "lauxlib.h" | ||
18 | #include "lualib.h" | ||
19 | #include "luajit.h" | ||
20 | |||
21 | #if defined(LUA_USE_POSIX) | ||
22 | #include <unistd.h> | ||
23 | #define lua_stdin_is_tty() isatty(0) | ||
24 | #elif defined(LUA_USE_WIN) | ||
25 | #include <io.h> | ||
26 | #ifdef __BORLANDC__ | ||
27 | #define lua_stdin_is_tty() isatty(_fileno(stdin)) | ||
28 | #else | ||
29 | #define lua_stdin_is_tty() _isatty(_fileno(stdin)) | ||
30 | #endif | ||
31 | #else | ||
32 | #define lua_stdin_is_tty() 1 | ||
33 | #endif | ||
34 | |||
35 | static lua_State *globalL = NULL; | ||
36 | static const char *progname = LUA_PROGNAME; | ||
37 | |||
38 | static void lstop(lua_State *L, lua_Debug *ar) | ||
39 | { | ||
40 | (void)ar; /* unused arg. */ | ||
41 | lua_sethook(L, NULL, 0, 0); | ||
42 | /* Avoid luaL_error -- a C hook doesn't add an extra frame. */ | ||
43 | luaL_where(L, 0); | ||
44 | lua_pushfstring(L, "%sinterrupted!", lua_tostring(L, -1)); | ||
45 | lua_error(L); | ||
46 | } | ||
47 | |||
48 | static void laction(int i) | ||
49 | { | ||
50 | signal(i, SIG_DFL); /* if another SIGINT happens before lstop, | ||
51 | terminate process (default action) */ | ||
52 | lua_sethook(globalL, lstop, LUA_MASKCALL | LUA_MASKRET | LUA_MASKCOUNT, 1); | ||
53 | } | ||
54 | |||
55 | static void print_usage(void) | ||
56 | { | ||
57 | fprintf(stderr, | ||
58 | "usage: %s [options] [script [args]].\n" | ||
59 | "Available options are:\n" | ||
60 | " -e stat execute string " LUA_QL("stat") "\n" | ||
61 | " -l name require library " LUA_QL("name") "\n" | ||
62 | " -j cmd perform LuaJIT control command\n" | ||
63 | " -O[lvl] set LuaJIT optimization level\n" | ||
64 | " -i enter interactive mode after executing " LUA_QL("script") "\n" | ||
65 | " -v show version information\n" | ||
66 | " -- stop handling options\n" | ||
67 | " - execute stdin and stop handling options\n" | ||
68 | , | ||
69 | progname); | ||
70 | fflush(stderr); | ||
71 | } | ||
72 | |||
73 | static void l_message(const char *pname, const char *msg) | ||
74 | { | ||
75 | if (pname) fprintf(stderr, "%s: ", pname); | ||
76 | fprintf(stderr, "%s\n", msg); | ||
77 | fflush(stderr); | ||
78 | } | ||
79 | |||
80 | static int report(lua_State *L, int status) | ||
81 | { | ||
82 | if (status && !lua_isnil(L, -1)) { | ||
83 | const char *msg = lua_tostring(L, -1); | ||
84 | if (msg == NULL) msg = "(error object is not a string)"; | ||
85 | l_message(progname, msg); | ||
86 | lua_pop(L, 1); | ||
87 | } | ||
88 | return status; | ||
89 | } | ||
90 | |||
91 | static int traceback(lua_State *L) | ||
92 | { | ||
93 | if (!lua_isstring(L, 1)) /* 'message' not a string? */ | ||
94 | return 1; /* keep it intact */ | ||
95 | lua_getfield(L, LUA_GLOBALSINDEX, "debug"); | ||
96 | if (!lua_istable(L, -1)) { | ||
97 | lua_pop(L, 1); | ||
98 | return 1; | ||
99 | } | ||
100 | lua_getfield(L, -1, "traceback"); | ||
101 | if (!lua_isfunction(L, -1)) { | ||
102 | lua_pop(L, 2); | ||
103 | return 1; | ||
104 | } | ||
105 | lua_pushvalue(L, 1); /* pass error message */ | ||
106 | lua_pushinteger(L, 2); /* skip this function and traceback */ | ||
107 | lua_call(L, 2, 1); /* call debug.traceback */ | ||
108 | return 1; | ||
109 | } | ||
110 | |||
111 | static int docall(lua_State *L, int narg, int clear) | ||
112 | { | ||
113 | int status; | ||
114 | int base = lua_gettop(L) - narg; /* function index */ | ||
115 | lua_pushcfunction(L, traceback); /* push traceback function */ | ||
116 | lua_insert(L, base); /* put it under chunk and args */ | ||
117 | signal(SIGINT, laction); | ||
118 | status = lua_pcall(L, narg, (clear ? 0 : LUA_MULTRET), base); | ||
119 | signal(SIGINT, SIG_DFL); | ||
120 | lua_remove(L, base); /* remove traceback function */ | ||
121 | /* force a complete garbage collection in case of errors */ | ||
122 | if (status != 0) lua_gc(L, LUA_GCCOLLECT, 0); | ||
123 | return status; | ||
124 | } | ||
125 | |||
126 | static void print_version(void) | ||
127 | { | ||
128 | fprintf(stderr, | ||
129 | LUAJIT_VERSION " -- " LUAJIT_COPYRIGHT ". " LUAJIT_URL "\n"); | ||
130 | } | ||
131 | |||
132 | static void print_jit_status(lua_State *L) | ||
133 | { | ||
134 | int n; | ||
135 | const char *s; | ||
136 | lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED"); | ||
137 | lua_getfield(L, -1, "jit"); /* Get jit.* module table. */ | ||
138 | lua_remove(L, -2); | ||
139 | lua_getfield(L, -1, "status"); | ||
140 | lua_remove(L, -2); | ||
141 | n = lua_gettop(L); | ||
142 | lua_call(L, 0, LUA_MULTRET); | ||
143 | fputs(lua_toboolean(L, n) ? "JIT: ON" : "JIT: OFF", stderr); | ||
144 | for (n++; (s = lua_tostring(L, n)); n++) | ||
145 | fprintf(stderr, " %s", s); | ||
146 | fputs("\n", stdout); | ||
147 | } | ||
148 | |||
149 | static int getargs(lua_State *L, char **argv, int n) | ||
150 | { | ||
151 | int narg; | ||
152 | int i; | ||
153 | int argc = 0; | ||
154 | while (argv[argc]) argc++; /* count total number of arguments */ | ||
155 | narg = argc - (n + 1); /* number of arguments to the script */ | ||
156 | luaL_checkstack(L, narg + 3, "too many arguments to script"); | ||
157 | for (i = n+1; i < argc; i++) | ||
158 | lua_pushstring(L, argv[i]); | ||
159 | lua_createtable(L, narg, n + 1); | ||
160 | for (i = 0; i < argc; i++) { | ||
161 | lua_pushstring(L, argv[i]); | ||
162 | lua_rawseti(L, -2, i - n); | ||
163 | } | ||
164 | return narg; | ||
165 | } | ||
166 | |||
167 | static int dofile(lua_State *L, const char *name) | ||
168 | { | ||
169 | int status = luaL_loadfile(L, name) || docall(L, 0, 1); | ||
170 | return report(L, status); | ||
171 | } | ||
172 | |||
173 | static int dostring(lua_State *L, const char *s, const char *name) | ||
174 | { | ||
175 | int status = luaL_loadbuffer(L, s, strlen(s), name) || docall(L, 0, 1); | ||
176 | return report(L, status); | ||
177 | } | ||
178 | |||
179 | static int dolibrary(lua_State *L, const char *name) | ||
180 | { | ||
181 | lua_getglobal(L, "require"); | ||
182 | lua_pushstring(L, name); | ||
183 | return report(L, docall(L, 1, 1)); | ||
184 | } | ||
185 | |||
186 | static void write_prompt(lua_State *L, int firstline) | ||
187 | { | ||
188 | const char *p; | ||
189 | lua_getfield(L, LUA_GLOBALSINDEX, firstline ? "_PROMPT" : "_PROMPT2"); | ||
190 | p = lua_tostring(L, -1); | ||
191 | if (p == NULL) p = firstline ? LUA_PROMPT : LUA_PROMPT2; | ||
192 | fputs(p, stdout); | ||
193 | fflush(stdout); | ||
194 | lua_pop(L, 1); /* remove global */ | ||
195 | } | ||
196 | |||
197 | static int incomplete(lua_State *L, int status) | ||
198 | { | ||
199 | if (status == LUA_ERRSYNTAX) { | ||
200 | size_t lmsg; | ||
201 | const char *msg = lua_tolstring(L, -1, &lmsg); | ||
202 | const char *tp = msg + lmsg - (sizeof(LUA_QL("<eof>")) - 1); | ||
203 | if (strstr(msg, LUA_QL("<eof>")) == tp) { | ||
204 | lua_pop(L, 1); | ||
205 | return 1; | ||
206 | } | ||
207 | } | ||
208 | return 0; /* else... */ | ||
209 | } | ||
210 | |||
211 | static int pushline(lua_State *L, int firstline) | ||
212 | { | ||
213 | char buf[LUA_MAXINPUT]; | ||
214 | write_prompt(L, firstline); | ||
215 | if (fgets(buf, LUA_MAXINPUT, stdin)) { | ||
216 | size_t len = strlen(buf); | ||
217 | if (len > 0 && buf[len-1] == '\n') | ||
218 | buf[len-1] = '\0'; | ||
219 | if (firstline && buf[0] == '=') | ||
220 | lua_pushfstring(L, "return %s", buf+1); | ||
221 | else | ||
222 | lua_pushstring(L, buf); | ||
223 | return 1; | ||
224 | } | ||
225 | return 0; | ||
226 | } | ||
227 | |||
228 | static int loadline(lua_State *L) | ||
229 | { | ||
230 | int status; | ||
231 | lua_settop(L, 0); | ||
232 | if (!pushline(L, 1)) | ||
233 | return -1; /* no input */ | ||
234 | for (;;) { /* repeat until gets a complete line */ | ||
235 | status = luaL_loadbuffer(L, lua_tostring(L, 1), lua_strlen(L, 1), "=stdin"); | ||
236 | if (!incomplete(L, status)) break; /* cannot try to add lines? */ | ||
237 | if (!pushline(L, 0)) /* no more input? */ | ||
238 | return -1; | ||
239 | lua_pushliteral(L, "\n"); /* add a new line... */ | ||
240 | lua_insert(L, -2); /* ...between the two lines */ | ||
241 | lua_concat(L, 3); /* join them */ | ||
242 | } | ||
243 | lua_remove(L, 1); /* remove line */ | ||
244 | return status; | ||
245 | } | ||
246 | |||
247 | static void dotty(lua_State *L) | ||
248 | { | ||
249 | int status; | ||
250 | const char *oldprogname = progname; | ||
251 | progname = NULL; | ||
252 | while ((status = loadline(L)) != -1) { | ||
253 | if (status == 0) status = docall(L, 0, 0); | ||
254 | report(L, status); | ||
255 | if (status == 0 && lua_gettop(L) > 0) { /* any result to print? */ | ||
256 | lua_getglobal(L, "print"); | ||
257 | lua_insert(L, 1); | ||
258 | if (lua_pcall(L, lua_gettop(L)-1, 0, 0) != 0) | ||
259 | l_message(progname, | ||
260 | lua_pushfstring(L, "error calling " LUA_QL("print") " (%s)", | ||
261 | lua_tostring(L, -1))); | ||
262 | } | ||
263 | } | ||
264 | lua_settop(L, 0); /* clear stack */ | ||
265 | fputs("\n", stdout); | ||
266 | fflush(stdout); | ||
267 | progname = oldprogname; | ||
268 | } | ||
269 | |||
270 | static int handle_script(lua_State *L, char **argv, int n) | ||
271 | { | ||
272 | int status; | ||
273 | const char *fname; | ||
274 | int narg = getargs(L, argv, n); /* collect arguments */ | ||
275 | lua_setglobal(L, "arg"); | ||
276 | fname = argv[n]; | ||
277 | if (strcmp(fname, "-") == 0 && strcmp(argv[n-1], "--") != 0) | ||
278 | fname = NULL; /* stdin */ | ||
279 | status = luaL_loadfile(L, fname); | ||
280 | lua_insert(L, -(narg+1)); | ||
281 | if (status == 0) | ||
282 | status = docall(L, narg, 0); | ||
283 | else | ||
284 | lua_pop(L, narg); | ||
285 | return report(L, status); | ||
286 | } | ||
287 | |||
288 | /* Load add-on module. */ | ||
289 | static int loadjitmodule(lua_State *L, const char *notfound) | ||
290 | { | ||
291 | lua_getglobal(L, "require"); | ||
292 | lua_pushliteral(L, "jit."); | ||
293 | lua_pushvalue(L, -3); | ||
294 | lua_concat(L, 2); | ||
295 | if (lua_pcall(L, 1, 1, 0)) { | ||
296 | const char *msg = lua_tostring(L, -1); | ||
297 | if (msg && !strncmp(msg, "module ", 7)) { | ||
298 | err: | ||
299 | l_message(progname, notfound); | ||
300 | return 1; | ||
301 | } else { | ||
302 | return report(L, 1); | ||
303 | } | ||
304 | } | ||
305 | lua_getfield(L, -1, "start"); | ||
306 | if (lua_isnil(L, -1)) goto err; | ||
307 | lua_remove(L, -2); /* Drop module table. */ | ||
308 | return 0; | ||
309 | } | ||
310 | |||
311 | /* Run command with options. */ | ||
312 | static int runcmdopt(lua_State *L, const char *opt) | ||
313 | { | ||
314 | int narg = 0; | ||
315 | if (opt && *opt) { | ||
316 | for (;;) { /* Split arguments. */ | ||
317 | const char *p = strchr(opt, ','); | ||
318 | narg++; | ||
319 | if (!p) break; | ||
320 | if (p == opt) | ||
321 | lua_pushnil(L); | ||
322 | else | ||
323 | lua_pushlstring(L, opt, (size_t)(p - opt)); | ||
324 | opt = p + 1; | ||
325 | } | ||
326 | if (*opt) | ||
327 | lua_pushstring(L, opt); | ||
328 | else | ||
329 | lua_pushnil(L); | ||
330 | } | ||
331 | return report(L, lua_pcall(L, narg, 0, 0)); | ||
332 | } | ||
333 | |||
334 | /* JIT engine control command: try jit library first or load add-on module. */ | ||
335 | static int dojitcmd(lua_State *L, const char *cmd) | ||
336 | { | ||
337 | const char *opt = strchr(cmd, '='); | ||
338 | lua_pushlstring(L, cmd, opt ? (size_t)(opt - cmd) : strlen(cmd)); | ||
339 | lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED"); | ||
340 | lua_getfield(L, -1, "jit"); /* Get jit.* module table. */ | ||
341 | lua_remove(L, -2); | ||
342 | lua_pushvalue(L, -2); | ||
343 | lua_gettable(L, -2); /* Lookup library function. */ | ||
344 | if (!lua_isfunction(L, -1)) { | ||
345 | lua_pop(L, 2); /* Drop non-function and jit.* table, keep module name. */ | ||
346 | if (loadjitmodule(L, "unknown luaJIT command")) | ||
347 | return 1; | ||
348 | } else { | ||
349 | lua_remove(L, -2); /* Drop jit.* table. */ | ||
350 | } | ||
351 | lua_remove(L, -2); /* Drop module name. */ | ||
352 | return runcmdopt(L, opt ? opt+1 : opt); | ||
353 | } | ||
354 | |||
355 | /* Optimization flags. */ | ||
356 | static int dojitopt(lua_State *L, const char *opt) | ||
357 | { | ||
358 | lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED"); | ||
359 | lua_getfield(L, -1, "jit.opt"); /* Get jit.opt.* module table. */ | ||
360 | lua_remove(L, -2); | ||
361 | lua_getfield(L, -1, "start"); | ||
362 | lua_remove(L, -2); | ||
363 | return runcmdopt(L, opt); | ||
364 | } | ||
365 | |||
366 | /* check that argument has no extra characters at the end */ | ||
367 | #define notail(x) {if ((x)[2] != '\0') return -1;} | ||
368 | |||
369 | static int collectargs(char **argv, int *pi, int *pv, int *pe) | ||
370 | { | ||
371 | int i; | ||
372 | for (i = 1; argv[i] != NULL; i++) { | ||
373 | if (argv[i][0] != '-') /* not an option? */ | ||
374 | return i; | ||
375 | switch (argv[i][1]) { /* option */ | ||
376 | case '-': | ||
377 | notail(argv[i]); | ||
378 | return (argv[i+1] != NULL ? i+1 : 0); | ||
379 | case '\0': | ||
380 | return i; | ||
381 | case 'i': | ||
382 | notail(argv[i]); | ||
383 | *pi = 1; /* go through */ | ||
384 | case 'v': | ||
385 | notail(argv[i]); | ||
386 | *pv = 1; | ||
387 | break; | ||
388 | case 'e': | ||
389 | *pe = 1; /* go through */ | ||
390 | case 'j': /* LuaJIT extension */ | ||
391 | case 'l': | ||
392 | if (argv[i][2] == '\0') { | ||
393 | i++; | ||
394 | if (argv[i] == NULL) return -1; | ||
395 | } | ||
396 | break; | ||
397 | case 'O': break; /* LuaJIT extension */ | ||
398 | default: return -1; /* invalid option */ | ||
399 | } | ||
400 | } | ||
401 | return 0; | ||
402 | } | ||
403 | |||
404 | static int runargs(lua_State *L, char **argv, int n) | ||
405 | { | ||
406 | int i; | ||
407 | for (i = 1; i < n; i++) { | ||
408 | if (argv[i] == NULL) continue; | ||
409 | lua_assert(argv[i][0] == '-'); | ||
410 | switch (argv[i][1]) { /* option */ | ||
411 | case 'e': { | ||
412 | const char *chunk = argv[i] + 2; | ||
413 | if (*chunk == '\0') chunk = argv[++i]; | ||
414 | lua_assert(chunk != NULL); | ||
415 | if (dostring(L, chunk, "=(command line)") != 0) | ||
416 | return 1; | ||
417 | break; | ||
418 | } | ||
419 | case 'l': { | ||
420 | const char *filename = argv[i] + 2; | ||
421 | if (*filename == '\0') filename = argv[++i]; | ||
422 | lua_assert(filename != NULL); | ||
423 | if (dolibrary(L, filename)) | ||
424 | return 1; /* stop if file fails */ | ||
425 | break; | ||
426 | } | ||
427 | case 'j': { /* LuaJIT extension */ | ||
428 | const char *cmd = argv[i] + 2; | ||
429 | if (*cmd == '\0') cmd = argv[++i]; | ||
430 | lua_assert(cmd != NULL); | ||
431 | if (dojitcmd(L, cmd)) | ||
432 | return 1; | ||
433 | break; | ||
434 | } | ||
435 | case 'O': /* LuaJIT extension */ | ||
436 | if (dojitopt(L, argv[i] + 2)) | ||
437 | return 1; | ||
438 | break; | ||
439 | default: break; | ||
440 | } | ||
441 | } | ||
442 | return 0; | ||
443 | } | ||
444 | |||
445 | static int handle_luainit(lua_State *L) | ||
446 | { | ||
447 | const char *init = getenv(LUA_INIT); | ||
448 | if (init == NULL) | ||
449 | return 0; /* status OK */ | ||
450 | else if (init[0] == '@') | ||
451 | return dofile(L, init+1); | ||
452 | else | ||
453 | return dostring(L, init, "=" LUA_INIT); | ||
454 | } | ||
455 | |||
456 | struct Smain { | ||
457 | int argc; | ||
458 | char **argv; | ||
459 | int status; | ||
460 | }; | ||
461 | |||
462 | static int pmain(lua_State *L) | ||
463 | { | ||
464 | struct Smain *s = (struct Smain *)lua_touserdata(L, 1); | ||
465 | char **argv = s->argv; | ||
466 | int script; | ||
467 | int has_i = 0, has_v = 0, has_e = 0; | ||
468 | globalL = L; | ||
469 | if (argv[0] && argv[0][0]) progname = argv[0]; | ||
470 | LUAJIT_VERSION_SYM(); /* linker-enforced version check */ | ||
471 | lua_gc(L, LUA_GCSTOP, 0); /* stop collector during initialization */ | ||
472 | luaL_openlibs(L); /* open libraries */ | ||
473 | lua_gc(L, LUA_GCRESTART, 0); | ||
474 | s->status = handle_luainit(L); | ||
475 | if (s->status != 0) return 0; | ||
476 | script = collectargs(argv, &has_i, &has_v, &has_e); | ||
477 | if (script < 0) { /* invalid args? */ | ||
478 | print_usage(); | ||
479 | s->status = 1; | ||
480 | return 0; | ||
481 | } | ||
482 | if (has_v) print_version(); | ||
483 | s->status = runargs(L, argv, (script > 0) ? script : s->argc); | ||
484 | if (s->status != 0) return 0; | ||
485 | if (script) | ||
486 | s->status = handle_script(L, argv, script); | ||
487 | if (s->status != 0) return 0; | ||
488 | if (has_i) { | ||
489 | print_jit_status(L); | ||
490 | dotty(L); | ||
491 | } else if (script == 0 && !has_e && !has_v) { | ||
492 | if (lua_stdin_is_tty()) { | ||
493 | print_version(); | ||
494 | print_jit_status(L); | ||
495 | dotty(L); | ||
496 | } else { | ||
497 | dofile(L, NULL); /* executes stdin as a file */ | ||
498 | } | ||
499 | } | ||
500 | return 0; | ||
501 | } | ||
502 | |||
503 | int main(int argc, char **argv) | ||
504 | { | ||
505 | int status; | ||
506 | struct Smain s; | ||
507 | lua_State *L = lua_open(); /* create state */ | ||
508 | if (L == NULL) { | ||
509 | l_message(argv[0], "cannot create state: not enough memory"); | ||
510 | return EXIT_FAILURE; | ||
511 | } | ||
512 | s.argc = argc; | ||
513 | s.argv = argv; | ||
514 | status = lua_cpcall(L, pmain, &s); | ||
515 | report(L, status); | ||
516 | lua_close(L); | ||
517 | return (status || s.status) ? EXIT_FAILURE : EXIT_SUCCESS; | ||
518 | } | ||
519 | |||
diff --git a/src/luajit.h b/src/luajit.h new file mode 100644 index 00000000..01913755 --- /dev/null +++ b/src/luajit.h | |||
@@ -0,0 +1,68 @@ | |||
1 | /* | ||
2 | ** LuaJIT -- a Just-In-Time Compiler for Lua. http://luajit.org/ | ||
3 | ** | ||
4 | ** Copyright (C) 2005-2009 Mike Pall. All rights reserved. | ||
5 | ** | ||
6 | ** Permission is hereby granted, free of charge, to any person obtaining | ||
7 | ** a copy of this software and associated documentation files (the | ||
8 | ** "Software"), to deal in the Software without restriction, including | ||
9 | ** without limitation the rights to use, copy, modify, merge, publish, | ||
10 | ** distribute, sublicense, and/or sell copies of the Software, and to | ||
11 | ** permit persons to whom the Software is furnished to do so, subject to | ||
12 | ** the following conditions: | ||
13 | ** | ||
14 | ** The above copyright notice and this permission notice shall be | ||
15 | ** included in all copies or substantial portions of the Software. | ||
16 | ** | ||
17 | ** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
18 | ** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
19 | ** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | ||
20 | ** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | ||
21 | ** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
22 | ** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
23 | ** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
24 | ** | ||
25 | ** [ MIT license: http://www.opensource.org/licenses/mit-license.php ] | ||
26 | */ | ||
27 | |||
28 | #ifndef _LUAJIT_H | ||
29 | #define _LUAJIT_H | ||
30 | |||
31 | #include "lua.h" | ||
32 | |||
33 | #define LUAJIT_VERSION "LuaJIT 2.0.0-beta1" | ||
34 | #define LUAJIT_VERSION_NUM 20000 /* Version 2.0.0 = 02.00.00. */ | ||
35 | #define LUAJIT_VERSION_SYM luaJIT_version_2_0_0_beta1 | ||
36 | #define LUAJIT_COPYRIGHT "Copyright (C) 2005-2009 Mike Pall" | ||
37 | #define LUAJIT_URL "http://luajit.org/" | ||
38 | |||
39 | /* Modes for luaJIT_setmode. */ | ||
40 | #define LUAJIT_MODE_MASK 0x00ff | ||
41 | |||
42 | enum { | ||
43 | LUAJIT_MODE_ENGINE, /* Set mode for whole JIT engine. */ | ||
44 | LUAJIT_MODE_DEBUG, /* Set debug mode (idx = level). */ | ||
45 | |||
46 | LUAJIT_MODE_FUNC, /* Change mode for a function. */ | ||
47 | LUAJIT_MODE_ALLFUNC, /* Recurse into subroutine protos. */ | ||
48 | LUAJIT_MODE_ALLSUBFUNC, /* Change only the subroutines. */ | ||
49 | |||
50 | LUAJIT_MODE_TRACE, /* Flush a compiled trace. */ | ||
51 | |||
52 | LUAJIT_MODE_MAX | ||
53 | }; | ||
54 | |||
55 | /* Flags or'ed in to the mode. */ | ||
56 | #define LUAJIT_MODE_OFF 0x0000 /* Disable JIT compilation. */ | ||
57 | #define LUAJIT_MODE_ON 0x0100 /* (Re-)enable JIT compilation. */ | ||
58 | #define LUAJIT_MODE_FLUSH 0x0200 /* Flush JIT-compiled code. */ | ||
59 | |||
60 | /* LuaJIT public C API. */ | ||
61 | |||
62 | /* Control the JIT engine. */ | ||
63 | LUA_API int luaJIT_setmode(lua_State *L, int idx, int mode); | ||
64 | |||
65 | /* Enforce (dynamic) linker error for version mismatches. Call from main. */ | ||
66 | LUA_API void LUAJIT_VERSION_SYM(void); | ||
67 | |||
68 | #endif | ||
diff --git a/src/lualib.h b/src/lualib.h new file mode 100644 index 00000000..c1ceb613 --- /dev/null +++ b/src/lualib.h | |||
@@ -0,0 +1,41 @@ | |||
1 | /* | ||
2 | ** Standard library header. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LUALIB_H | ||
7 | #define _LUALIB_H | ||
8 | |||
9 | #include "lua.h" | ||
10 | |||
11 | #define LUA_FILEHANDLE "FILE*" | ||
12 | |||
13 | #define LUA_COLIBNAME "coroutine" | ||
14 | #define LUA_MATHLIBNAME "math" | ||
15 | #define LUA_STRLIBNAME "string" | ||
16 | #define LUA_TABLIBNAME "table" | ||
17 | #define LUA_IOLIBNAME "io" | ||
18 | #define LUA_OSLIBNAME "os" | ||
19 | #define LUA_LOADLIBNAME "package" | ||
20 | #define LUA_DBLIBNAME "debug" | ||
21 | #define LUA_BITLIBNAME "bit" | ||
22 | #define LUA_JITLIBNAME "jit" | ||
23 | |||
24 | LUALIB_API int luaopen_base(lua_State *L); | ||
25 | LUALIB_API int luaopen_math(lua_State *L); | ||
26 | LUALIB_API int luaopen_string(lua_State *L); | ||
27 | LUALIB_API int luaopen_table(lua_State *L); | ||
28 | LUALIB_API int luaopen_io(lua_State *L); | ||
29 | LUALIB_API int luaopen_os(lua_State *L); | ||
30 | LUALIB_API int luaopen_package(lua_State *L); | ||
31 | LUALIB_API int luaopen_debug(lua_State *L); | ||
32 | LUALIB_API int luaopen_bit(lua_State *L); | ||
33 | LUALIB_API int luaopen_jit(lua_State *L); | ||
34 | |||
35 | LUALIB_API void luaL_openlibs(lua_State *L); | ||
36 | |||
37 | #ifndef lua_assert | ||
38 | #define lua_assert(x) ((void)0) | ||
39 | #endif | ||
40 | |||
41 | #endif | ||
diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat new file mode 100644 index 00000000..8bdc4d8a --- /dev/null +++ b/src/msvcbuild.bat | |||
@@ -0,0 +1,53 @@ | |||
1 | @rem Script to build LuaJIT with MSVC. | ||
2 | @rem Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
3 | @rem | ||
4 | @rem Open a "Visual Studio .NET Command Prompt", cd to this directory | ||
5 | @rem and run this script. | ||
6 | |||
7 | @if not defined INCLUDE goto :FAIL | ||
8 | |||
9 | @setlocal | ||
10 | @set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE | ||
11 | @set LJLINK=link /nologo | ||
12 | @set LJMT=mt /nologo | ||
13 | @set DASMDIR=..\dynasm | ||
14 | @set DASM=lua %DASMDIR%\dynasm.lua | ||
15 | @set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c | ||
16 | |||
17 | if not exist buildvm_x86.h^ | ||
18 | %DASM% -LN -o buildvm_x86.h buildvm_x86.dasc | ||
19 | |||
20 | %LJCOMPILE% /I "." /I %DASMDIR% buildvm*.c | ||
21 | %LJLINK% /out:buildvm.exe buildvm*.obj | ||
22 | if exist buildvm.exe.manifest^ | ||
23 | %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe | ||
24 | |||
25 | buildvm -m peobj -o lj_vm.obj | ||
26 | buildvm -m ffdef -o lj_ffdef.h %ALL_LIB% | ||
27 | buildvm -m libdef -o lj_libdef.h %ALL_LIB% | ||
28 | buildvm -m recdef -o lj_recdef.h %ALL_LIB% | ||
29 | buildvm -m vmdef -o ..\lib\vmdef.lua %ALL_LIB% | ||
30 | buildvm -m folddef -o lj_folddef.h lj_opt_fold.c | ||
31 | |||
32 | @if "%1"=="amalg" goto :AMALGDLL | ||
33 | %LJCOMPILE% /DLUA_BUILD_AS_DLL lj_*.c lib_*.c | ||
34 | %LJLINK% /DLL /out:lua51.dll lj_*.obj lib_*.obj | ||
35 | @goto :MTDLL | ||
36 | :AMALGDLL | ||
37 | %LJCOMPILE% /DLUA_BUILD_AS_DLL ljamalg.c | ||
38 | %LJLINK% /DLL /out:lua51.dll ljamalg.obj lj_vm.obj | ||
39 | :MTDLL | ||
40 | if exist lua51.dll.manifest^ | ||
41 | %LJMT% -manifest lua51.dll.manifest -outputresource:lua51.dll;2 | ||
42 | |||
43 | %LJCOMPILE% luajit.c | ||
44 | %LJLINK% /out:luajit.exe luajit.obj lua51.lib | ||
45 | if exist luajit.exe.manifest^ | ||
46 | %LJMT% -manifest luajit.exe.manifest -outputresource:luajit.exe | ||
47 | |||
48 | del *.obj *.manifest buildvm.exe | ||
49 | |||
50 | @goto :END | ||
51 | :FAIL | ||
52 | @echo You must open a "Visual Studio .NET Command Prompt" to run this script | ||
53 | :END | ||