diff options
182 files changed, 39665 insertions, 8095 deletions
@@ -14,9 +14,10 @@ | |||
14 | ############################################################################## | 14 | ############################################################################## |
15 | 15 | ||
16 | MAJVER= 2 | 16 | MAJVER= 2 |
17 | MINVER= 0 | 17 | MINVER= 1 |
18 | RELVER= 5 | 18 | RELVER= 0 |
19 | VERSION= $(MAJVER).$(MINVER).$(RELVER) | 19 | PREREL= -beta3 |
20 | VERSION= $(MAJVER).$(MINVER).$(RELVER)$(PREREL) | ||
20 | ABIVER= 5.1 | 21 | ABIVER= 5.1 |
21 | 22 | ||
22 | ############################################################################## | 23 | ############################################################################## |
@@ -84,8 +85,10 @@ FILE_SO= libluajit.so | |||
84 | FILE_MAN= luajit.1 | 85 | FILE_MAN= luajit.1 |
85 | FILE_PC= luajit.pc | 86 | FILE_PC= luajit.pc |
86 | FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h | 87 | FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h |
87 | FILES_JITLIB= bc.lua v.lua dump.lua dis_x86.lua dis_x64.lua dis_arm.lua \ | 88 | FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \ |
88 | dis_ppc.lua dis_mips.lua dis_mipsel.lua bcsave.lua vmdef.lua | 89 | dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \ |
90 | dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \ | ||
91 | dis_mips64.lua dis_mips64el.lua vmdef.lua | ||
89 | 92 | ||
90 | ifeq (,$(findstring Windows,$(OS))) | 93 | ifeq (,$(findstring Windows,$(OS))) |
91 | HOST_SYS:= $(shell uname -s) | 94 | HOST_SYS:= $(shell uname -s) |
@@ -115,7 +118,7 @@ install: $(INSTALL_DEP) | |||
115 | $(MKDIR) $(INSTALL_DIRS) | 118 | $(MKDIR) $(INSTALL_DIRS) |
116 | cd src && $(INSTALL_X) $(FILE_T) $(INSTALL_T) | 119 | cd src && $(INSTALL_X) $(FILE_T) $(INSTALL_T) |
117 | cd src && test -f $(FILE_A) && $(INSTALL_F) $(FILE_A) $(INSTALL_STATIC) || : | 120 | cd src && test -f $(FILE_A) && $(INSTALL_F) $(FILE_A) $(INSTALL_STATIC) || : |
118 | $(RM) $(INSTALL_TSYM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) | 121 | $(RM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) |
119 | cd src && test -f $(FILE_SO) && \ | 122 | cd src && test -f $(FILE_SO) && \ |
120 | $(INSTALL_X) $(FILE_SO) $(INSTALL_DYN) && \ | 123 | $(INSTALL_X) $(FILE_SO) $(INSTALL_DYN) && \ |
121 | ( $(LDCONFIG) $(INSTALL_LIB) || : ) && \ | 124 | ( $(LDCONFIG) $(INSTALL_LIB) || : ) && \ |
@@ -127,12 +130,18 @@ install: $(INSTALL_DEP) | |||
127 | $(RM) $(FILE_PC).tmp | 130 | $(RM) $(FILE_PC).tmp |
128 | cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC) | 131 | cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC) |
129 | cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB) | 132 | cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB) |
130 | $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM) | ||
131 | @echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ====" | 133 | @echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ====" |
134 | @echo "" | ||
135 | @echo "Note: the development releases deliberately do NOT install a symlink for luajit" | ||
136 | @echo "You can do this now by running this command (with sudo):" | ||
137 | @echo "" | ||
138 | @echo " $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)" | ||
139 | @echo "" | ||
140 | |||
132 | 141 | ||
133 | uninstall: | 142 | uninstall: |
134 | @echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ====" | 143 | @echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ====" |
135 | $(UNINSTALL) $(INSTALL_TSYM) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC) | 144 | $(UNINSTALL) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC) |
136 | for file in $(FILES_JITLIB); do \ | 145 | for file in $(FILES_JITLIB); do \ |
137 | $(UNINSTALL) $(INSTALL_JITLIB)/$$file; \ | 146 | $(UNINSTALL) $(INSTALL_JITLIB)/$$file; \ |
138 | done | 147 | done |
@@ -1,5 +1,5 @@ | |||
1 | README for LuaJIT 2.0.5 | 1 | README for LuaJIT 2.1.0-beta3 |
2 | ----------------------- | 2 | ----------------------------- |
3 | 3 | ||
4 | LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language. | 4 | LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language. |
5 | 5 | ||
diff --git a/doc/changes.html b/doc/changes.html index a20295f2..5fc74f10 100644 --- a/doc/changes.html +++ b/doc/changes.html | |||
@@ -43,6 +43,8 @@ div.major { max-width: 600px; padding: 1em; margin: 1em 0 1em 0; } | |||
43 | <a href="ext_jit.html">jit.* Library</a> | 43 | <a href="ext_jit.html">jit.* Library</a> |
44 | </li><li> | 44 | </li><li> |
45 | <a href="ext_c_api.html">Lua/C API</a> | 45 | <a href="ext_c_api.html">Lua/C API</a> |
46 | </li><li> | ||
47 | <a href="ext_profiler.html">Profiler</a> | ||
46 | </li></ul> | 48 | </li></ul> |
47 | </li><li> | 49 | </li><li> |
48 | <a href="status.html">Status</a> | 50 | <a href="status.html">Status</a> |
@@ -71,6 +73,96 @@ to see whether newer versions are available. | |||
71 | </p> | 73 | </p> |
72 | 74 | ||
73 | <div class="major" style="background: #d0d0ff;"> | 75 | <div class="major" style="background: #d0d0ff;"> |
76 | <h2 id="LuaJIT-2.1.0-beta3">LuaJIT 2.1.0-beta3 — 2017-05-01</h2> | ||
77 | <ul> | ||
78 | <li>Rewrite memory block allocator.</li> | ||
79 | <li>Add various extension from Lua 5.2/5.3.</li> | ||
80 | <li>Remove old Lua 5.0 compatibility defines.</li> | ||
81 | <li>Set arg table before evaluating <tt>LUA_INIT</tt> and <tt>-e</tt> chunks.</li> | ||
82 | <li>Fix FOLD rules for <tt>math.abs()</tt> and FP negation.</li> | ||
83 | <li>Fix soft-float <tt>math.abs()</tt> and negation.</li> | ||
84 | <li>Fix formatting of some small denormals at low precision.</li> | ||
85 | <li>LJ_GC64: Add JIT compiler support.</li> | ||
86 | <li>x64/LJ_GC64: Add JIT compiler backend.</li> | ||
87 | <li>x86/x64: Generate BMI2 shifts and rotates, if available.</li> | ||
88 | <li>Windows/x86: Add full exception interoperability.</li> | ||
89 | <li>ARM64: Add big-endian support.</li> | ||
90 | <li>ARM64: Add JIT compiler backend.</li> | ||
91 | <li>MIPS: Fix <tt>TSETR</tt> barrier.</li> | ||
92 | <li>MIPS: Support MIPS16 interlinking.</li> | ||
93 | <li>MIPS soft-float: Fix code generation for <tt>HREF</tt>.</li> | ||
94 | <li>MIPS64: Add MIPS64 hard-float JIT compiler backend.</li> | ||
95 | <li>MIPS64: Add MIPS64 hard-float/soft-float support to interpreter.</li> | ||
96 | <li>FFI: Compile bitfield loads/stores.</li> | ||
97 | <li>Various fixes common with the 2.0 branch.</li> | ||
98 | </ul> | ||
99 | |||
100 | <h2 id="LuaJIT-2.1.0-beta2">LuaJIT 2.1.0-beta2 — 2016-03-03</h2> | ||
101 | <ul> | ||
102 | <li>Enable trace stitching.</li> | ||
103 | <li>Use internal implementation for converting FP numbers to strings.</li> | ||
104 | <li>Parse Unicode escape <tt>'\u{XX...}'</tt> in string literals.</li> | ||
105 | <li>Add MIPS soft-float support.</li> | ||
106 | <li>Switch MIPS port to dual-number mode.</li> | ||
107 | <li>x86/x64: Add support for AES-NI, AVX and AVX2 to DynASM.</li> | ||
108 | <li>FFI: Add <tt>ssize_t</tt> declaration.</li> | ||
109 | <li>FFI: Parse <tt>#line NN</tt> and <tt>#NN</tt>.</li> | ||
110 | <li>Various minor fixes.</li> | ||
111 | </ul> | ||
112 | |||
113 | <h2 id="LuaJIT-2.1.0-beta1">LuaJIT 2.1.0-beta1 — 2015-08-25</h2> | ||
114 | <p> | ||
115 | This is a brief summary of the major changes in LuaJIT 2.1 compared to 2.0. | ||
116 | Please take a look at the commit history for more details. | ||
117 | </p> | ||
118 | <ul> | ||
119 | <li>Changes to the VM core: | ||
120 | <ul> | ||
121 | <li>Add low-overhead profiler (<tt>-jp</tt>).</li> | ||
122 | <li>Add <tt>LJ_GC64</tt> mode: 64 bit GC object references (really: 47 bit). Interpreter-only for now.</li> | ||
123 | <li>Add <tt>LJ_FR2</tt> mode: Two-slot frame info. Required by <tt>LJ_GC64</tt> mode.</li> | ||
124 | <li>Add <tt>table.new()</tt> and <tt>table.clear()</tt>.</li> | ||
125 | <li>Parse binary number literals (<tt>0bxxx</tt>).</li> | ||
126 | </ul></li> | ||
127 | <li>Improvements to the JIT compiler: | ||
128 | <ul> | ||
129 | <li>Add trace stitching (disabled for now).</li> | ||
130 | <li>Compile various builtins: <tt>string.char()</tt>, <tt>string.reverse()</tt>, <tt>string.lower()</tt>, <tt>string.upper()</tt>, <tt>string.rep()</tt>, <tt>string.format()</tt>, <tt>table.concat()</tt>, <tt>bit.tohex()</tt>, <tt>getfenv(0)</tt>, <tt>debug.getmetatable()</tt>.</li> | ||
131 | <li>Compile <tt>string.find()</tt> for fixed string searches (no patterns).</li> | ||
132 | <li>Compile <tt>BC_TSETM</tt>, e.g. <tt>{1,2,3,f()}</tt>.</li> | ||
133 | <li>Compile string concatenations (<tt>BC_CAT</tt>).</li> | ||
134 | <li>Compile <tt>__concat</tt> metamethod.</li> | ||
135 | <li>Various minor optimizations.</li> | ||
136 | </ul></li> | ||
137 | <li>Internal Changes: | ||
138 | <ul> | ||
139 | <li>Add support for embedding LuaJIT bytecode for builtins.</li> | ||
140 | <li>Replace various builtins with embedded bytecode.</li> | ||
141 | <li>Refactor string buffers and string formatting.</li> | ||
142 | <li>Remove obsolete non-truncating number to integer conversions.</li> | ||
143 | </ul></li> | ||
144 | <li>Ports: | ||
145 | <ul> | ||
146 | <li>Add Xbox One port (<tt>LJ_GC64</tt> mode).</li> | ||
147 | <li>ARM64: Add port of the interpreter (<tt>LJ_GC64</tt> mode).</li> | ||
148 | <li>x64: Add separate port of the interpreter to <tt>LJ_GC64</tt> mode.</li> | ||
149 | <li>x86/x64: Drop internal x87 math functions. Use libm functions.</li> | ||
150 | <li>x86: Remove x87 support from interpreter. SSE2 is mandatory now.</li> | ||
151 | <li>PPC/e500: Drop support for this architecture.</li> | ||
152 | </ul></li> | ||
153 | <li>FFI library: | ||
154 | <ul> | ||
155 | <li>FFI: Add 64 bit bitwise operations.</li> | ||
156 | <li>FFI: Compile VLA/VLS and large cdata allocations with default initialization.</li> | ||
157 | <li>FFI: Compile conversions from functions to function pointers.</li> | ||
158 | <li>FFI: Compile lightuserdata to <tt>void *</tt> conversion.</li> | ||
159 | <li>FFI: Compile <tt>ffi.gc(cdata, nil)</tt>, too.</li> | ||
160 | <li>FFI: Add <tt>ffi.typeinfo()</tt>.</li> | ||
161 | </ul></li> | ||
162 | </ul> | ||
163 | </div> | ||
164 | |||
165 | <div class="major" style="background: #ffffd0;"> | ||
74 | <h2 id="LuaJIT-2.0.5">LuaJIT 2.0.5 — 2017-05-01</h2> | 166 | <h2 id="LuaJIT-2.0.5">LuaJIT 2.0.5 — 2017-05-01</h2> |
75 | <ul> | 167 | <ul> |
76 | <li>Add workaround for MSVC 2015 stdio changes.</li> | 168 | <li>Add workaround for MSVC 2015 stdio changes.</li> |
@@ -80,7 +172,7 @@ to see whether newer versions are available. | |||
80 | <li>Remove internal <tt>__mode = "K"</tt> and replace with safe check.</li> | 172 | <li>Remove internal <tt>__mode = "K"</tt> and replace with safe check.</li> |
81 | <li>Add "proto" field to <tt>jit.util.funcinfo()</tt>.</li> | 173 | <li>Add "proto" field to <tt>jit.util.funcinfo()</tt>.</li> |
82 | <li>Fix GC step size calculation.</li> | 174 | <li>Fix GC step size calculation.</li> |
83 | <li>Initialize <tt>uv->immutable</tt> for upvalues of loaded chunks.</li> | 175 | <li>Initialize <tt>uv->immutable</tt> for upvalues of loaded chunks.</li> |
84 | <li>Fix for cdata vs. non-cdata arithmetics/comparisons.</li> | 176 | <li>Fix for cdata vs. non-cdata arithmetics/comparisons.</li> |
85 | <li>Drop leftover regs in 'for' iterator assignment, too.</li> | 177 | <li>Drop leftover regs in 'for' iterator assignment, too.</li> |
86 | <li>Fix PHI remarking in SINK pass.</li> | 178 | <li>Fix PHI remarking in SINK pass.</li> |
@@ -776,235 +868,6 @@ This matches the behavior of Lua 5.1, but not the specification.</li> | |||
776 | no point in listing differences over earlier versions.</li> | 868 | no point in listing differences over earlier versions.</li> |
777 | </ul> | 869 | </ul> |
778 | </div> | 870 | </div> |
779 | |||
780 | <div class="major" style="background: #ffff80;"> | ||
781 | <h2 id="LuaJIT-1.1.8">LuaJIT 1.1.8 — 2012-04-16</h2> | ||
782 | <ul> | ||
783 | <li>Merged with Lua 5.1.5. Also integrated fixes for all | ||
784 | <a href="http://www.lua.org/bugs.html#5.1.5"><span class="ext">»</span> <span class="ext">»</span> currently known bugs in Lua 5.1.5</a>.</li> | ||
785 | </ul> | ||
786 | |||
787 | <h2 id="LuaJIT-1.1.7">LuaJIT 1.1.7 — 2011-05-05</h2> | ||
788 | <ul> | ||
789 | <li>Added fixes for the | ||
790 | <a href="http://www.lua.org/bugs.html#5.1.4"><span class="ext">»</span> currently known bugs in Lua 5.1.4</a>.</li> | ||
791 | </ul> | ||
792 | |||
793 | <h2 id="LuaJIT-1.1.6">LuaJIT 1.1.6 — 2010-03-28</h2> | ||
794 | <ul> | ||
795 | <li>Added fixes for the | ||
796 | <a href="http://www.lua.org/bugs.html#5.1.4"><span class="ext">»</span> currently known bugs in Lua 5.1.4</a>.</li> | ||
797 | <li>Removed wrong GC check in <tt>jit_createstate()</tt>. | ||
798 | Thanks to Tim Mensch.</li> | ||
799 | <li>Fixed bad assertions while compiling <tt>table.insert()</tt> and | ||
800 | <tt>table.remove()</tt>.</li> | ||
801 | </ul> | ||
802 | |||
803 | <h2 id="LuaJIT-1.1.5">LuaJIT 1.1.5 — 2008-10-25</h2> | ||
804 | <ul> | ||
805 | <li>Merged with Lua 5.1.4. Fixes all | ||
806 | <a href="http://www.lua.org/bugs.html#5.1.3"><span class="ext">»</span> known bugs in Lua 5.1.3</a>.</li> | ||
807 | </ul> | ||
808 | |||
809 | <h2 id="LuaJIT-1.1.4">LuaJIT 1.1.4 — 2008-02-05</h2> | ||
810 | <ul> | ||
811 | <li>Merged with Lua 5.1.3. Fixes all | ||
812 | <a href="http://www.lua.org/bugs.html#5.1.2"><span class="ext">»</span> known bugs in Lua 5.1.2</a>.</li> | ||
813 | <li>Fixed possible (but unlikely) stack corruption while compiling | ||
814 | <tt>k^x</tt> expressions.</li> | ||
815 | <li>Fixed DynASM template for cmpss instruction.</li> | ||
816 | </ul> | ||
817 | |||
818 | <h2 id="LuaJIT-1.1.3">LuaJIT 1.1.3 — 2007-05-24</h2> | ||
819 | <ul> | ||
820 | <li>Merged with Lua 5.1.2. Fixes all | ||
821 | <a href="http://www.lua.org/bugs.html#5.1.1"><span class="ext">»</span> known bugs in Lua 5.1.1</a>.</li> | ||
822 | <li>Merged pending Lua 5.1.x fixes: "return -nil" bug, spurious count hook call.</li> | ||
823 | <li>Remove a (sometimes) wrong assertion in <tt>luaJIT_findpc()</tt>.</li> | ||
824 | <li>DynASM now allows labels for displacements and <tt>.aword</tt>.</li> | ||
825 | <li>Fix some compiler warnings for DynASM glue (internal API change).</li> | ||
826 | <li>Correct naming for SSSE3 (temporarily known as SSE4) in DynASM and x86 disassembler.</li> | ||
827 | <li>The loadable debug modules now handle redirection to stdout | ||
828 | (e.g. <tt>-j trace=-</tt>).</li> | ||
829 | </ul> | ||
830 | |||
831 | <h2 id="LuaJIT-1.1.2">LuaJIT 1.1.2 — 2006-06-24</h2> | ||
832 | <ul> | ||
833 | <li>Fix MSVC inline assembly: use only local variables with | ||
834 | <tt>lua_number2int()</tt>.</li> | ||
835 | <li>Fix "attempt to call a thread value" bug on Mac OS X: | ||
836 | make values of consts used as lightuserdata keys unique | ||
837 | to avoid joining by the compiler/linker.</li> | ||
838 | </ul> | ||
839 | |||
840 | <h2 id="LuaJIT-1.1.1">LuaJIT 1.1.1 — 2006-06-20</h2> | ||
841 | <ul> | ||
842 | <li>Merged with Lua 5.1.1. Fixes all | ||
843 | <a href="http://www.lua.org/bugs.html#5.1"><span class="ext">»</span> known bugs in Lua 5.1</a>.</li> | ||
844 | <li>Enforce (dynamic) linker error for EXE/DLL version mismatches.</li> | ||
845 | <li>Minor changes to DynASM: faster pre-processing, smaller encoding | ||
846 | for some immediates.</li> | ||
847 | </ul> | ||
848 | <p> | ||
849 | This release is in sync with Coco 1.1.1 (see the | ||
850 | <a href="http://coco.luajit.org/changes.html"><span class="ext">»</span> Coco Change History</a>). | ||
851 | </p> | ||
852 | |||
853 | <h2 id="LuaJIT-1.1.0">LuaJIT 1.1.0 — 2006-03-13</h2> | ||
854 | <ul> | ||
855 | <li>Merged with Lua 5.1 (final).</li> | ||
856 | |||
857 | <li>New JIT call frame setup: | ||
858 | <ul> | ||
859 | <li>The C stack is kept 16 byte aligned (faster). | ||
860 | Mandatory for Mac OS X on Intel, too.</li> | ||
861 | <li>Faster calling conventions for internal C helper functions.</li> | ||
862 | <li>Better instruction scheduling for function prologue, OP_CALL and | ||
863 | OP_RETURN.</li> | ||
864 | </ul></li> | ||
865 | |||
866 | <li>Miscellaneous optimizations: | ||
867 | <ul> | ||
868 | <li>Faster loads of FP constants. Remove narrow-to-wide store-to-load | ||
869 | forwarding stalls.</li> | ||
870 | <li>Use (scalar) SSE2 ops (if the CPU supports it) to speed up slot moves | ||
871 | and FP to integer conversions.</li> | ||
872 | <li>Optimized the two-argument form of <tt>OP_CONCAT</tt> (<tt>a..b</tt>).</li> | ||
873 | <li>Inlined <tt>OP_MOD</tt> (<tt>a%b</tt>). | ||
874 | With better accuracy than the C variant, too.</li> | ||
875 | <li>Inlined <tt>OP_POW</tt> (<tt>a^b</tt>). Unroll <tt>x^k</tt> or | ||
876 | use <tt>k^x = 2^(log2(k)*x)</tt> or call <tt>pow()</tt>.</li> | ||
877 | </ul></li> | ||
878 | |||
879 | <li>Changes in the optimizer: | ||
880 | <ul> | ||
881 | <li>Improved hinting for table keys derived from table values | ||
882 | (<tt>t1[t2[x]]</tt>).</li> | ||
883 | <li>Lookup hinting now works with arbitrary object types and | ||
884 | supports index chains, too.</li> | ||
885 | <li>Generate type hints for arithmetic and comparison operators, | ||
886 | OP_LEN, OP_CONCAT and OP_FORPREP.</li> | ||
887 | <li>Remove several hint definitions in favour of a generic COMBINE hint.</li> | ||
888 | <li>Complete rewrite of <tt>jit.opt_inline</tt> module | ||
889 | (ex <tt>jit.opt_lib</tt>).</li> | ||
890 | </ul></li> | ||
891 | |||
892 | <li>Use adaptive deoptimization: | ||
893 | <ul> | ||
894 | <li>If runtime verification of a contract fails, the affected | ||
895 | instruction is recompiled and patched on-the-fly. | ||
896 | Regular programs will trigger deoptimization only occasionally.</li> | ||
897 | <li>This avoids generating code for uncommon fallback cases | ||
898 | most of the time. Generated code is up to 30% smaller compared to | ||
899 | LuaJIT 1.0.3.</li> | ||
900 | <li>Deoptimization is used for many opcodes and contracts: | ||
901 | <ul> | ||
902 | <li>OP_CALL, OP_TAILCALL: type mismatch for callable.</li> | ||
903 | <li>Inlined calls: closure mismatch, parameter number and type mismatches.</li> | ||
904 | <li>OP_GETTABLE, OP_SETTABLE: table or key type and range mismatches.</li> | ||
905 | <li>All arithmetic and comparison operators, OP_LEN, OP_CONCAT, | ||
906 | OP_FORPREP: operand type and range mismatches.</li> | ||
907 | </ul></li> | ||
908 | <li>Complete redesign of the debug and traceback info | ||
909 | (bytecode ↔ mcode) to support deoptimization. | ||
910 | Much more flexible and needs only 50% of the space.</li> | ||
911 | <li>The modules <tt>jit.trace</tt>, <tt>jit.dumphints</tt> and | ||
912 | <tt>jit.dump</tt> handle deoptimization.</li> | ||
913 | </ul></li> | ||
914 | |||
915 | <li>Inlined many popular library functions | ||
916 | (for commonly used arguments only): | ||
917 | <ul> | ||
918 | <li>Most <tt>math.*</tt> functions (the 18 most used ones) | ||
919 | [2x-10x faster].</li> | ||
920 | <li><tt>string.len</tt>, <tt>string.sub</tt> and <tt>string.char</tt> | ||
921 | [2x-10x faster].</li> | ||
922 | <li><tt>table.insert</tt>, <tt>table.remove</tt> and <tt>table.getn</tt> | ||
923 | [3x-5x faster].</li> | ||
924 | <li><tt>coroutine.yield</tt> and <tt>coroutine.resume</tt> | ||
925 | [3x-5x faster].</li> | ||
926 | <li><tt>pairs</tt>, <tt>ipairs</tt> and the corresponding iterators | ||
927 | [8x-15x faster].</li> | ||
928 | </ul></li> | ||
929 | |||
930 | <li>Changes in the core and loadable modules and the stand-alone executable: | ||
931 | <ul> | ||
932 | <li>Added <tt>jit.version</tt>, <tt>jit.version_num</tt> | ||
933 | and <tt>jit.arch</tt>.</li> | ||
934 | <li>Reorganized some internal API functions (<tt>jit.util.*mcode*</tt>).</li> | ||
935 | <li>The <tt>-j dump</tt> output now shows JSUB names, too.</li> | ||
936 | <li>New x86 disassembler module written in pure Lua. No dependency | ||
937 | on ndisasm anymore. Flexible API, very compact (500 lines) | ||
938 | and complete (x87, MMX, SSE, SSE2, SSE3, SSSE3, privileged instructions).</li> | ||
939 | <li><tt>luajit -v</tt> prints the LuaJIT version and copyright | ||
940 | on a separate line.</li> | ||
941 | </ul></li> | ||
942 | |||
943 | <li>Added SSE, SSE2, SSE3 and SSSE3 support to DynASM.</li> | ||
944 | <li>Miscellaneous doc changes. Added a section about | ||
945 | <a href="install.html#embedding">embedding LuaJIT</a>.</li> | ||
946 | </ul> | ||
947 | <p> | ||
948 | This release is in sync with Coco 1.1.0 (see the | ||
949 | <a href="http://coco.luajit.org/changes.html"><span class="ext">»</span> Coco Change History</a>). | ||
950 | </p> | ||
951 | </div> | ||
952 | |||
953 | <div class="major" style="background: #ffffd0;"> | ||
954 | <h2 id="LuaJIT-1.0.3">LuaJIT 1.0.3 — 2005-09-08</h2> | ||
955 | <ul> | ||
956 | <li>Even more docs.</li> | ||
957 | <li>Unified closure checks in <tt>jit.*</tt>.</li> | ||
958 | <li>Fixed some range checks in <tt>jit.util.*</tt>.</li> | ||
959 | <li>Fixed __newindex call originating from <tt>jit_settable_str()</tt>.</li> | ||
960 | <li>Merged with Lua 5.1 alpha (including early bug fixes).</li> | ||
961 | </ul> | ||
962 | <p> | ||
963 | This is the first public release of LuaJIT. | ||
964 | </p> | ||
965 | |||
966 | <h2 id="LuaJIT-1.0.2">LuaJIT 1.0.2 — 2005-09-02</h2> | ||
967 | <ul> | ||
968 | <li>Add support for flushing the Valgrind translation cache <br> | ||
969 | (<tt>MYCFLAGS= -DUSE_VALGRIND</tt>).</li> | ||
970 | <li>Add support for freeing executable mcode memory to the <tt>mmap()</tt>-based | ||
971 | variant for POSIX systems.</li> | ||
972 | <li>Reorganized the C function signature handling in | ||
973 | <tt>jit.opt_lib</tt>.</li> | ||
974 | <li>Changed to index-based hints for inlining C functions. | ||
975 | Still no support in the backend for inlining.</li> | ||
976 | <li>Hardcode <tt>HEAP_CREATE_ENABLE_EXECUTE</tt> value if undefined.</li> | ||
977 | <li>Misc. changes to the <tt>jit.*</tt> modules.</li> | ||
978 | <li>Misc. changes to the Makefiles.</li> | ||
979 | <li>Lots of new docs.</li> | ||
980 | <li>Complete doc reorg.</li> | ||
981 | </ul> | ||
982 | <p> | ||
983 | Not released because Lua 5.1 alpha came out today. | ||
984 | </p> | ||
985 | |||
986 | <h2 id="LuaJIT-1.0.1">LuaJIT 1.0.1 — 2005-08-31</h2> | ||
987 | <ul> | ||
988 | <li>Missing GC step in <tt>OP_CONCAT</tt>.</li> | ||
989 | <li>Fix result handling for C –> JIT calls.</li> | ||
990 | <li>Detect CPU feature bits.</li> | ||
991 | <li>Encode conditional moves (<tt>fucomip</tt>) only when supported.</li> | ||
992 | <li>Add fallback instructions for FP compares.</li> | ||
993 | <li>Add support for <tt>LUA_COMPAT_VARARG</tt>. Still disabled by default.</li> | ||
994 | <li>MSVC needs a specific place for the <tt>CALLBACK</tt> attribute | ||
995 | (David Burgess).</li> | ||
996 | <li>Misc. doc updates.</li> | ||
997 | </ul> | ||
998 | <p> | ||
999 | Interim non-public release. | ||
1000 | Special thanks to Adam D. Moss for reporting most of the bugs. | ||
1001 | </p> | ||
1002 | |||
1003 | <h2 id="LuaJIT-1.0.0">LuaJIT 1.0.0 — 2005-08-29</h2> | ||
1004 | <p> | ||
1005 | This is the initial non-public release of LuaJIT. | ||
1006 | </p> | ||
1007 | </div> | ||
1008 | <br class="flush"> | 871 | <br class="flush"> |
1009 | </div> | 872 | </div> |
1010 | <div id="foot"> | 873 | <div id="foot"> |
diff --git a/doc/contact.html b/doc/contact.html index fbab370c..c014dc9a 100644 --- a/doc/contact.html +++ b/doc/contact.html | |||
@@ -40,6 +40,8 @@ | |||
40 | <a href="ext_jit.html">jit.* Library</a> | 40 | <a href="ext_jit.html">jit.* Library</a> |
41 | </li><li> | 41 | </li><li> |
42 | <a href="ext_c_api.html">Lua/C API</a> | 42 | <a href="ext_c_api.html">Lua/C API</a> |
43 | </li><li> | ||
44 | <a href="ext_profiler.html">Profiler</a> | ||
43 | </li></ul> | 45 | </li></ul> |
44 | </li><li> | 46 | </li><li> |
45 | <a href="status.html">Status</a> | 47 | <a href="status.html">Status</a> |
diff --git a/doc/ext_c_api.html b/doc/ext_c_api.html index 88017ace..4f471526 100644 --- a/doc/ext_c_api.html +++ b/doc/ext_c_api.html | |||
@@ -40,6 +40,8 @@ | |||
40 | <a href="ext_jit.html">jit.* Library</a> | 40 | <a href="ext_jit.html">jit.* Library</a> |
41 | </li><li> | 41 | </li><li> |
42 | <a class="current" href="ext_c_api.html">Lua/C API</a> | 42 | <a class="current" href="ext_c_api.html">Lua/C API</a> |
43 | </li><li> | ||
44 | <a href="ext_profiler.html">Profiler</a> | ||
43 | </li></ul> | 45 | </li></ul> |
44 | </li><li> | 46 | </li><li> |
45 | <a href="status.html">Status</a> | 47 | <a href="status.html">Status</a> |
diff --git a/doc/ext_ffi.html b/doc/ext_ffi.html index fe74a1d8..6a2a4852 100644 --- a/doc/ext_ffi.html +++ b/doc/ext_ffi.html | |||
@@ -40,6 +40,8 @@ | |||
40 | <a href="ext_jit.html">jit.* Library</a> | 40 | <a href="ext_jit.html">jit.* Library</a> |
41 | </li><li> | 41 | </li><li> |
42 | <a href="ext_c_api.html">Lua/C API</a> | 42 | <a href="ext_c_api.html">Lua/C API</a> |
43 | </li><li> | ||
44 | <a href="ext_profiler.html">Profiler</a> | ||
43 | </li></ul> | 45 | </li></ul> |
44 | </li><li> | 46 | </li><li> |
45 | <a href="status.html">Status</a> | 47 | <a href="status.html">Status</a> |
diff --git a/doc/ext_ffi_api.html b/doc/ext_ffi_api.html index 40fc694b..ad19b76b 100644 --- a/doc/ext_ffi_api.html +++ b/doc/ext_ffi_api.html | |||
@@ -45,6 +45,8 @@ td.abiparam { font-weight: bold; width: 6em; } | |||
45 | <a href="ext_jit.html">jit.* Library</a> | 45 | <a href="ext_jit.html">jit.* Library</a> |
46 | </li><li> | 46 | </li><li> |
47 | <a href="ext_c_api.html">Lua/C API</a> | 47 | <a href="ext_c_api.html">Lua/C API</a> |
48 | </li><li> | ||
49 | <a href="ext_profiler.html">Profiler</a> | ||
48 | </li></ul> | 50 | </li></ul> |
49 | </li><li> | 51 | </li><li> |
50 | <a href="status.html">Status</a> | 52 | <a href="status.html">Status</a> |
@@ -465,6 +467,10 @@ otherwise. The following parameters are currently defined: | |||
465 | <td class="abiparam">eabi</td><td class="abidesc">EABI variant of the standard ABI</td></tr> | 467 | <td class="abiparam">eabi</td><td class="abidesc">EABI variant of the standard ABI</td></tr> |
466 | <tr class="odd"> | 468 | <tr class="odd"> |
467 | <td class="abiparam">win</td><td class="abidesc">Windows variant of the standard ABI</td></tr> | 469 | <td class="abiparam">win</td><td class="abidesc">Windows variant of the standard ABI</td></tr> |
470 | <tr class="even"> | ||
471 | <td class="abiparam">uwp</td><td class="abidesc">Universal Windows Platform</td></tr> | ||
472 | <tr class="odd"> | ||
473 | <td class="abiparam">gc64</td><td class="abidesc">64 bit GC references</td></tr> | ||
468 | </table> | 474 | </table> |
469 | 475 | ||
470 | <h3 id="ffi_os"><tt>ffi.os</tt></h3> | 476 | <h3 id="ffi_os"><tt>ffi.os</tt></h3> |
@@ -541,8 +547,8 @@ corresponding ctype. | |||
541 | The parser for Lua source code treats numeric literals with the | 547 | The parser for Lua source code treats numeric literals with the |
542 | suffixes <tt>LL</tt> or <tt>ULL</tt> as signed or unsigned 64 bit | 548 | suffixes <tt>LL</tt> or <tt>ULL</tt> as signed or unsigned 64 bit |
543 | integers. Case doesn't matter, but uppercase is recommended for | 549 | integers. Case doesn't matter, but uppercase is recommended for |
544 | readability. It handles both decimal (<tt>42LL</tt>) and hexadecimal | 550 | readability. It handles decimal (<tt>42LL</tt>), hexadecimal |
545 | (<tt>0x2aLL</tt>) literals. | 551 | (<tt>0x2aLL</tt>) and binary (<tt>0b101010LL</tt>) literals. |
546 | </p> | 552 | </p> |
547 | <p> | 553 | <p> |
548 | The imaginary part of complex numbers can be specified by suffixing | 554 | The imaginary part of complex numbers can be specified by suffixing |
diff --git a/doc/ext_ffi_semantics.html b/doc/ext_ffi_semantics.html index a21e5bd5..40575af8 100644 --- a/doc/ext_ffi_semantics.html +++ b/doc/ext_ffi_semantics.html | |||
@@ -45,6 +45,8 @@ td.convop { font-style: italic; width: 40%; } | |||
45 | <a href="ext_jit.html">jit.* Library</a> | 45 | <a href="ext_jit.html">jit.* Library</a> |
46 | </li><li> | 46 | </li><li> |
47 | <a href="ext_c_api.html">Lua/C API</a> | 47 | <a href="ext_c_api.html">Lua/C API</a> |
48 | </li><li> | ||
49 | <a href="ext_profiler.html">Profiler</a> | ||
48 | </li></ul> | 50 | </li></ul> |
49 | </li><li> | 51 | </li><li> |
50 | <a href="status.html">Status</a> | 52 | <a href="status.html">Status</a> |
@@ -182,6 +184,8 @@ a <tt>typedef</tt>, except re-declarations will be ignored): | |||
182 | <tt>uint16_t</tt>, <tt>uint32_t</tt>, <tt>uint64_t</tt>, | 184 | <tt>uint16_t</tt>, <tt>uint32_t</tt>, <tt>uint64_t</tt>, |
183 | <tt>intptr_t</tt>, <tt>uintptr_t</tt>.</li> | 185 | <tt>intptr_t</tt>, <tt>uintptr_t</tt>.</li> |
184 | 186 | ||
187 | <li>From <tt><unistd.h></tt> (POSIX): <tt>ssize_t</tt>.</li> | ||
188 | |||
185 | </ul> | 189 | </ul> |
186 | <p> | 190 | <p> |
187 | You're encouraged to use these types in preference to | 191 | You're encouraged to use these types in preference to |
@@ -729,6 +733,22 @@ You'll have to explicitly convert a 64 bit integer to a Lua | |||
729 | number (e.g. for regular floating-point calculations) with | 733 | number (e.g. for regular floating-point calculations) with |
730 | <tt>tonumber()</tt>. But note this may incur a precision loss.</li> | 734 | <tt>tonumber()</tt>. But note this may incur a precision loss.</li> |
731 | 735 | ||
736 | <li><b>64 bit bitwise operations</b>: the rules for 64 bit | ||
737 | arithmetic operators apply analogously.<br> | ||
738 | |||
739 | Unlike the other <tt>bit.*</tt> operations, <tt>bit.tobit()</tt> | ||
740 | converts a cdata number via <tt>int64_t</tt> to <tt>int32_t</tt> and | ||
741 | returns a Lua number.<br> | ||
742 | |||
743 | For <tt>bit.band()</tt>, <tt>bit.bor()</tt> and <tt>bit.bxor()</tt>, the | ||
744 | conversion to <tt>int64_t</tt> or <tt>uint64_t</tt> applies to | ||
745 | <em>all</em> arguments, if <em>any</em> argument is a cdata number.<br> | ||
746 | |||
747 | For all other operations, only the first argument is used to determine | ||
748 | the output type. This implies that a cdata number as a shift count for | ||
749 | shifts and rotates is accepted, but that alone does <em>not</em> cause | ||
750 | a cdata number output. | ||
751 | |||
732 | </ul> | 752 | </ul> |
733 | 753 | ||
734 | <h3 id="cdata_comp">Comparisons of cdata objects</h3> | 754 | <h3 id="cdata_comp">Comparisons of cdata objects</h3> |
@@ -1200,14 +1220,12 @@ The following operations are currently not compiled and may exhibit | |||
1200 | suboptimal performance, especially when used in inner loops: | 1220 | suboptimal performance, especially when used in inner loops: |
1201 | </p> | 1221 | </p> |
1202 | <ul> | 1222 | <ul> |
1203 | <li>Bitfield accesses and initializations.</li> | ||
1204 | <li>Vector operations.</li> | 1223 | <li>Vector operations.</li> |
1205 | <li>Table initializers.</li> | 1224 | <li>Table initializers.</li> |
1206 | <li>Initialization of nested <tt>struct</tt>/<tt>union</tt> types.</li> | 1225 | <li>Initialization of nested <tt>struct</tt>/<tt>union</tt> types.</li> |
1207 | <li>Allocations of variable-length arrays or structs.</li> | 1226 | <li>Non-default initialization of VLA/VLS or large C types |
1208 | <li>Allocations of C types with a size > 128 bytes or an | 1227 | (> 128 bytes or > 16 array elements.</li> |
1209 | alignment > 8 bytes.</li> | 1228 | <li>Bitfield initializations.</li> |
1210 | <li>Conversions from lightuserdata to <tt>void *</tt>.</li> | ||
1211 | <li>Pointer differences for element sizes that are not a power of | 1229 | <li>Pointer differences for element sizes that are not a power of |
1212 | two.</li> | 1230 | two.</li> |
1213 | <li>Calls to C functions with aggregates passed or returned by | 1231 | <li>Calls to C functions with aggregates passed or returned by |
@@ -1223,7 +1241,6 @@ value.</li> | |||
1223 | Other missing features: | 1241 | Other missing features: |
1224 | </p> | 1242 | </p> |
1225 | <ul> | 1243 | <ul> |
1226 | <li>Bit operations for 64 bit types.</li> | ||
1227 | <li>Arithmetic for <tt>complex</tt> numbers.</li> | 1244 | <li>Arithmetic for <tt>complex</tt> numbers.</li> |
1228 | <li>Passing structs by value to vararg C functions.</li> | 1245 | <li>Passing structs by value to vararg C functions.</li> |
1229 | <li><a href="extensions.html#exceptions">C++ exception interoperability</a> | 1246 | <li><a href="extensions.html#exceptions">C++ exception interoperability</a> |
diff --git a/doc/ext_ffi_tutorial.html b/doc/ext_ffi_tutorial.html index 7ca14018..e979ffea 100644 --- a/doc/ext_ffi_tutorial.html +++ b/doc/ext_ffi_tutorial.html | |||
@@ -47,6 +47,8 @@ td.idiomlua b { font-weight: normal; color: #2142bf; } | |||
47 | <a href="ext_jit.html">jit.* Library</a> | 47 | <a href="ext_jit.html">jit.* Library</a> |
48 | </li><li> | 48 | </li><li> |
49 | <a href="ext_c_api.html">Lua/C API</a> | 49 | <a href="ext_c_api.html">Lua/C API</a> |
50 | </li><li> | ||
51 | <a href="ext_profiler.html">Profiler</a> | ||
50 | </li></ul> | 52 | </li></ul> |
51 | </li><li> | 53 | </li><li> |
52 | <a href="status.html">Status</a> | 54 | <a href="status.html">Status</a> |
diff --git a/doc/ext_jit.html b/doc/ext_jit.html index 018b4396..3720d308 100644 --- a/doc/ext_jit.html +++ b/doc/ext_jit.html | |||
@@ -40,6 +40,8 @@ | |||
40 | <a class="current" href="ext_jit.html">jit.* Library</a> | 40 | <a class="current" href="ext_jit.html">jit.* Library</a> |
41 | </li><li> | 41 | </li><li> |
42 | <a href="ext_c_api.html">Lua/C API</a> | 42 | <a href="ext_c_api.html">Lua/C API</a> |
43 | </li><li> | ||
44 | <a href="ext_profiler.html">Profiler</a> | ||
43 | </li></ul> | 45 | </li></ul> |
44 | </li><li> | 46 | </li><li> |
45 | <a href="status.html">Status</a> | 47 | <a href="status.html">Status</a> |
@@ -150,7 +152,7 @@ Contains the target OS name: | |||
150 | <h3 id="jit_arch"><tt>jit.arch</tt></h3> | 152 | <h3 id="jit_arch"><tt>jit.arch</tt></h3> |
151 | <p> | 153 | <p> |
152 | Contains the target architecture name: | 154 | Contains the target architecture name: |
153 | "x86", "x64", "arm", "ppc", "ppcspe", or "mips". | 155 | "x86", "x64", "arm", "arm64", "ppc", "mips" or "mips64". |
154 | </p> | 156 | </p> |
155 | 157 | ||
156 | <h2 id="jit_opt"><tt>jit.opt.*</tt> — JIT compiler optimization control</h2> | 158 | <h2 id="jit_opt"><tt>jit.opt.*</tt> — JIT compiler optimization control</h2> |
diff --git a/doc/ext_profiler.html b/doc/ext_profiler.html new file mode 100644 index 00000000..b778cda4 --- /dev/null +++ b/doc/ext_profiler.html | |||
@@ -0,0 +1,364 @@ | |||
1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> | ||
2 | <html> | ||
3 | <head> | ||
4 | <title>Profiler</title> | ||
5 | <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> | ||
6 | <meta name="Copyright" content="Copyright (C) 2005-2020"> | ||
7 | <meta name="Language" content="en"> | ||
8 | <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> | ||
9 | <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> | ||
10 | </head> | ||
11 | <body> | ||
12 | <div id="site"> | ||
13 | <a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> | ||
14 | </div> | ||
15 | <div id="head"> | ||
16 | <h1>Profiler</h1> | ||
17 | </div> | ||
18 | <div id="nav"> | ||
19 | <ul><li> | ||
20 | <a href="luajit.html">LuaJIT</a> | ||
21 | <ul><li> | ||
22 | <a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> | ||
23 | </li><li> | ||
24 | <a href="install.html">Installation</a> | ||
25 | </li><li> | ||
26 | <a href="running.html">Running</a> | ||
27 | </li></ul> | ||
28 | </li><li> | ||
29 | <a href="extensions.html">Extensions</a> | ||
30 | <ul><li> | ||
31 | <a href="ext_ffi.html">FFI Library</a> | ||
32 | <ul><li> | ||
33 | <a href="ext_ffi_tutorial.html">FFI Tutorial</a> | ||
34 | </li><li> | ||
35 | <a href="ext_ffi_api.html">ffi.* API</a> | ||
36 | </li><li> | ||
37 | <a href="ext_ffi_semantics.html">FFI Semantics</a> | ||
38 | </li></ul> | ||
39 | </li><li> | ||
40 | <a href="ext_jit.html">jit.* Library</a> | ||
41 | </li><li> | ||
42 | <a href="ext_c_api.html">Lua/C API</a> | ||
43 | </li><li> | ||
44 | <a class="current" href="ext_profiler.html">Profiler</a> | ||
45 | </li></ul> | ||
46 | </li><li> | ||
47 | <a href="status.html">Status</a> | ||
48 | <ul><li> | ||
49 | <a href="changes.html">Changes</a> | ||
50 | </li></ul> | ||
51 | </li><li> | ||
52 | <a href="faq.html">FAQ</a> | ||
53 | </li><li> | ||
54 | <a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> | ||
55 | </li><li> | ||
56 | <a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> | ||
57 | </li><li> | ||
58 | <a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> | ||
59 | </li></ul> | ||
60 | </div> | ||
61 | <div id="main"> | ||
62 | <p> | ||
63 | LuaJIT has an integrated statistical profiler with very low overhead. It | ||
64 | allows sampling the currently executing stack and other parameters in | ||
65 | regular intervals. | ||
66 | </p> | ||
67 | <p> | ||
68 | The integrated profiler can be accessed from three levels: | ||
69 | </p> | ||
70 | <ul> | ||
71 | <li>The <a href="#hl_profiler">bundled high-level profiler</a>, invoked by the | ||
72 | <a href="#j_p"><tt>-jp</tt></a> command line option.</li> | ||
73 | <li>A <a href="#ll_lua_api">low-level Lua API</a> to control the profiler.</li> | ||
74 | <li>A <a href="#ll_c_api">low-level C API</a> to control the profiler.</li> | ||
75 | </ul> | ||
76 | |||
77 | <h2 id="hl_profiler">High-Level Profiler</h2> | ||
78 | <p> | ||
79 | The bundled high-level profiler offers basic profiling functionality. It | ||
80 | generates simple textual summaries or source code annotations. It can be | ||
81 | accessed with the <a href="#j_p"><tt>-jp</tt></a> command line option | ||
82 | or from Lua code by loading the underlying <tt>jit.p</tt> module. | ||
83 | </p> | ||
84 | <p> | ||
85 | To cut to the chase — run this to get a CPU usage profile by | ||
86 | function name: | ||
87 | </p> | ||
88 | <pre class="code"> | ||
89 | luajit -jp myapp.lua | ||
90 | </pre> | ||
91 | <p> | ||
92 | It's <em>not</em> a stated goal of the bundled profiler to add every | ||
93 | possible option or to cater for special profiling needs. The low-level | ||
94 | profiler APIs are documented below. They may be used by third-party | ||
95 | authors to implement advanced functionality, e.g. IDE integration or | ||
96 | graphical profilers. | ||
97 | </p> | ||
98 | <p> | ||
99 | Note: Sampling works for both interpreted and JIT-compiled code. The | ||
100 | results for JIT-compiled code may sometimes be surprising. LuaJIT | ||
101 | heavily optimizes and inlines Lua code — there's no simple | ||
102 | one-to-one correspondence between source code lines and the sampled | ||
103 | machine code. | ||
104 | </p> | ||
105 | |||
106 | <h3 id="j_p"><tt>-jp=[options[,output]]</tt></h3> | ||
107 | <p> | ||
108 | The <tt>-jp</tt> command line option starts the high-level profiler. | ||
109 | When the application run by the command line terminates, the profiler | ||
110 | stops and writes the results to <tt>stdout</tt> or to the specified | ||
111 | <tt>output</tt> file. | ||
112 | </p> | ||
113 | <p> | ||
114 | The <tt>options</tt> argument specifies how the profiling is to be | ||
115 | performed: | ||
116 | </p> | ||
117 | <ul> | ||
118 | <li><tt>f</tt> — Stack dump: function name, otherwise module:line. | ||
119 | This is the default mode.</li> | ||
120 | <li><tt>F</tt> — Stack dump: ditto, but dump module:name.</li> | ||
121 | <li><tt>l</tt> — Stack dump: module:line.</li> | ||
122 | <li><tt><number></tt> — stack dump depth (callee ← | ||
123 | caller). Default: 1.</li> | ||
124 | <li><tt>-<number></tt> — Inverse stack dump depth (caller | ||
125 | → callee).</li> | ||
126 | <li><tt>s</tt> — Split stack dump after first stack level. Implies | ||
127 | depth ≥ 2 or depth ≤ -2.</li> | ||
128 | <li><tt>p</tt> — Show full path for module names.</li> | ||
129 | <li><tt>v</tt> — Show VM states.</li> | ||
130 | <li><tt>z</tt> — Show <a href="#jit_zone">zones</a>.</li> | ||
131 | <li><tt>r</tt> — Show raw sample counts. Default: show percentages.</li> | ||
132 | <li><tt>a</tt> — Annotate excerpts from source code files.</li> | ||
133 | <li><tt>A</tt> — Annotate complete source code files.</li> | ||
134 | <li><tt>G</tt> — Produce raw output suitable for graphical tools.</li> | ||
135 | <li><tt>m<number></tt> — Minimum sample percentage to be shown. | ||
136 | Default: 3%.</li> | ||
137 | <li><tt>i<number></tt> — Sampling interval in milliseconds. | ||
138 | Default: 10ms.<br> | ||
139 | Note: The actual sampling precision is OS-dependent.</li> | ||
140 | </ul> | ||
141 | <p> | ||
142 | The default output for <tt>-jp</tt> is a list of the most CPU consuming | ||
143 | spots in the application. Increasing the stack dump depth with (say) | ||
144 | <tt>-jp=2</tt> may help to point out the main callers or callees of | ||
145 | hotspots. But sample aggregation is still flat per unique stack dump. | ||
146 | </p> | ||
147 | <p> | ||
148 | To get a two-level view (split view) of callers/callees, use | ||
149 | <tt>-jp=s</tt> or <tt>-jp=-s</tt>. The percentages shown for the second | ||
150 | level are relative to the first level. | ||
151 | </p> | ||
152 | <p> | ||
153 | To see how much time is spent in each line relative to a function, use | ||
154 | <tt>-jp=fl</tt>. | ||
155 | </p> | ||
156 | <p> | ||
157 | To see how much time is spent in different VM states or | ||
158 | <a href="#jit_zone">zones</a>, use <tt>-jp=v</tt> or <tt>-jp=z</tt>. | ||
159 | </p> | ||
160 | <p> | ||
161 | Combinations of <tt>v/z</tt> with <tt>f/F/l</tt> produce two-level | ||
162 | views, e.g. <tt>-jp=vf</tt> or <tt>-jp=fv</tt>. This shows the time | ||
163 | spent in a VM state or zone vs. hotspots. This can be used to answer | ||
164 | questions like "Which time consuming functions are only interpreted?" or | ||
165 | "What's the garbage collector overhead for a specific function?". | ||
166 | </p> | ||
167 | <p> | ||
168 | Multiple options can be combined — but not all combinations make | ||
169 | sense, see above. E.g. <tt>-jp=3si4m1</tt> samples three stack levels | ||
170 | deep in 4ms intervals and shows a split view of the CPU consuming | ||
171 | functions and their callers with a 1% threshold. | ||
172 | </p> | ||
173 | <p> | ||
174 | Source code annotations produced by <tt>-jp=a</tt> or <tt>-jp=A</tt> are | ||
175 | always flat and at the line level. Obviously, the source code files need | ||
176 | to be readable by the profiler script. | ||
177 | </p> | ||
178 | <p> | ||
179 | The high-level profiler can also be started and stopped from Lua code with: | ||
180 | </p> | ||
181 | <pre class="code"> | ||
182 | require("jit.p").start(options, output) | ||
183 | ... | ||
184 | require("jit.p").stop() | ||
185 | </pre> | ||
186 | |||
187 | <h3 id="jit_zone"><tt>jit.zone</tt> — Zones</h3> | ||
188 | <p> | ||
189 | Zones can be used to provide information about different parts of an | ||
190 | application to the high-level profiler. E.g. a game could make use of an | ||
191 | <tt>"AI"</tt> zone, a <tt>"PHYS"</tt> zone, etc. Zones are hierarchical, | ||
192 | organized as a stack. | ||
193 | </p> | ||
194 | <p> | ||
195 | The <tt>jit.zone</tt> module needs to be loaded explicitly: | ||
196 | </p> | ||
197 | <pre class="code"> | ||
198 | local zone = require("jit.zone") | ||
199 | </pre> | ||
200 | <ul> | ||
201 | <li><tt>zone("name")</tt> pushes a named zone to the zone stack.</li> | ||
202 | <li><tt>zone()</tt> pops the current zone from the zone stack and | ||
203 | returns its name.</li> | ||
204 | <li><tt>zone:get()</tt> returns the current zone name or <tt>nil</tt>.</li> | ||
205 | <li><tt>zone:flush()</tt> flushes the zone stack.</li> | ||
206 | </ul> | ||
207 | <p> | ||
208 | To show the time spent in each zone use <tt>-jp=z</tt>. To show the time | ||
209 | spent relative to hotspots use e.g. <tt>-jp=zf</tt> or <tt>-jp=fz</tt>. | ||
210 | </p> | ||
211 | |||
212 | <h2 id="ll_lua_api">Low-level Lua API</h2> | ||
213 | <p> | ||
214 | The <tt>jit.profile</tt> module gives access to the low-level API of the | ||
215 | profiler from Lua code. This module needs to be loaded explicitly: | ||
216 | <pre class="code"> | ||
217 | local profile = require("jit.profile") | ||
218 | </pre> | ||
219 | <p> | ||
220 | This module can be used to implement your own higher-level profiler. | ||
221 | A typical profiling run starts the profiler, captures stack dumps in | ||
222 | the profiler callback, adds them to a hash table to aggregate the number | ||
223 | of samples, stops the profiler and then analyzes all of the captured | ||
224 | stack dumps. Other parameters can be sampled in the profiler callback, | ||
225 | too. But it's important not to spend too much time in the callback, | ||
226 | since this may skew the statistics. | ||
227 | </p> | ||
228 | |||
229 | <h3 id="profile_start"><tt>profile.start(mode, cb)</tt> | ||
230 | — Start profiler</h3> | ||
231 | <p> | ||
232 | This function starts the profiler. The <tt>mode</tt> argument is a | ||
233 | string holding options: | ||
234 | </p> | ||
235 | <ul> | ||
236 | <li><tt>f</tt> — Profile with precision down to the function level.</li> | ||
237 | <li><tt>l</tt> — Profile with precision down to the line level.</li> | ||
238 | <li><tt>i<number></tt> — Sampling interval in milliseconds (default | ||
239 | 10ms).</br> | ||
240 | Note: The actual sampling precision is OS-dependent. | ||
241 | </li> | ||
242 | </ul> | ||
243 | <p> | ||
244 | The <tt>cb</tt> argument is a callback function which is called with | ||
245 | three arguments: <tt>(thread, samples, vmstate)</tt>. The callback is | ||
246 | called on a separate coroutine, the <tt>thread</tt> argument is the | ||
247 | state that holds the stack to sample for profiling. Note: do | ||
248 | <em>not</em> modify the stack of that state or call functions on it. | ||
249 | </p> | ||
250 | <p> | ||
251 | <tt>samples</tt> gives the number of accumulated samples since the last | ||
252 | callback (usually 1). | ||
253 | </p> | ||
254 | <p> | ||
255 | <tt>vmstate</tt> holds the VM state at the time the profiling timer | ||
256 | triggered. This may or may not correspond to the state of the VM when | ||
257 | the profiling callback is called. The state is either <tt>'N'</tt> | ||
258 | native (compiled) code, <tt>'I'</tt> interpreted code, <tt>'C'</tt> | ||
259 | C code, <tt>'G'</tt> the garbage collector, or <tt>'J'</tt> the JIT | ||
260 | compiler. | ||
261 | </p> | ||
262 | |||
263 | <h3 id="profile_stop"><tt>profile.stop()</tt> | ||
264 | — Stop profiler</h3> | ||
265 | <p> | ||
266 | This function stops the profiler. | ||
267 | </p> | ||
268 | |||
269 | <h3 id="profile_dump"><tt>dump = profile.dumpstack([thread,] fmt, depth)</tt> | ||
270 | — Dump stack </h3> | ||
271 | <p> | ||
272 | This function allows taking stack dumps in an efficient manner. It | ||
273 | returns a string with a stack dump for the <tt>thread</tt> (coroutine), | ||
274 | formatted according to the <tt>fmt</tt> argument: | ||
275 | </p> | ||
276 | <ul> | ||
277 | <li><tt>p</tt> — Preserve the full path for module names. Otherwise | ||
278 | only the file name is used.</li> | ||
279 | <li><tt>f</tt> — Dump the function name if it can be derived. Otherwise | ||
280 | use module:line.</li> | ||
281 | <li><tt>F</tt> — Ditto, but dump module:name.</li> | ||
282 | <li><tt>l</tt> — Dump module:line.</li> | ||
283 | <li><tt>Z</tt> — Zap the following characters for the last dumped | ||
284 | frame.</li> | ||
285 | <li>All other characters are added verbatim to the output string.</li> | ||
286 | </ul> | ||
287 | <p> | ||
288 | The <tt>depth</tt> argument gives the number of frames to dump, starting | ||
289 | at the topmost frame of the thread. A negative number dumps the frames in | ||
290 | inverse order. | ||
291 | </p> | ||
292 | <p> | ||
293 | The first example prints a list of the current module names and line | ||
294 | numbers of up to 10 frames in separate lines. The second example prints | ||
295 | semicolon-separated function names for all frames (up to 100) in inverse | ||
296 | order: | ||
297 | </p> | ||
298 | <pre class="code"> | ||
299 | print(profile.dumpstack(thread, "l\n", 10)) | ||
300 | print(profile.dumpstack(thread, "lZ;", -100)) | ||
301 | </pre> | ||
302 | |||
303 | <h2 id="ll_c_api">Low-level C API</h2> | ||
304 | <p> | ||
305 | The profiler can be controlled directly from C code, e.g. for | ||
306 | use by IDEs. The declarations are in <tt>"luajit.h"</tt> (see | ||
307 | <a href="ext_c_api.html">Lua/C API</a> extensions). | ||
308 | </p> | ||
309 | |||
310 | <h3 id="luaJIT_profile_start"><tt>luaJIT_profile_start(L, mode, cb, data)</tt> | ||
311 | — Start profiler</h3> | ||
312 | <p> | ||
313 | This function starts the profiler. <a href="#profile_start">See | ||
314 | above</a> for a description of the <tt>mode</tt> argument. | ||
315 | </p> | ||
316 | <p> | ||
317 | The <tt>cb</tt> argument is a callback function with the following | ||
318 | declaration: | ||
319 | </p> | ||
320 | <pre class="code"> | ||
321 | typedef void (*luaJIT_profile_callback)(void *data, lua_State *L, | ||
322 | int samples, int vmstate); | ||
323 | </pre> | ||
324 | <p> | ||
325 | <tt>data</tt> is available for use by the callback. <tt>L</tt> is the | ||
326 | state that holds the stack to sample for profiling. Note: do | ||
327 | <em>not</em> modify this stack or call functions on this stack — | ||
328 | use a separate coroutine for this purpose. <a href="#profile_start">See | ||
329 | above</a> for a description of <tt>samples</tt> and <tt>vmstate</tt>. | ||
330 | </p> | ||
331 | |||
332 | <h3 id="luaJIT_profile_stop"><tt>luaJIT_profile_stop(L)</tt> | ||
333 | — Stop profiler</h3> | ||
334 | <p> | ||
335 | This function stops the profiler. | ||
336 | </p> | ||
337 | |||
338 | <h3 id="luaJIT_profile_dumpstack"><tt>p = luaJIT_profile_dumpstack(L, fmt, depth, len)</tt> | ||
339 | — Dump stack </h3> | ||
340 | <p> | ||
341 | This function allows taking stack dumps in an efficient manner. | ||
342 | <a href="#profile_dump">See above</a> for a description of <tt>fmt</tt> | ||
343 | and <tt>depth</tt>. | ||
344 | </p> | ||
345 | <p> | ||
346 | This function returns a <tt>const char *</tt> pointing to a | ||
347 | private string buffer of the profiler. The <tt>int *len</tt> | ||
348 | argument returns the length of the output string. The buffer is | ||
349 | overwritten on the next call and deallocated when the profiler stops. | ||
350 | You either need to consume the content immediately or copy it for later | ||
351 | use. | ||
352 | </p> | ||
353 | <br class="flush"> | ||
354 | </div> | ||
355 | <div id="foot"> | ||
356 | <hr class="hide"> | ||
357 | Copyright © 2005-2020 | ||
358 | <span class="noprint"> | ||
359 | · | ||
360 | <a href="contact.html">Contact</a> | ||
361 | </span> | ||
362 | </div> | ||
363 | </body> | ||
364 | </html> | ||
diff --git a/doc/extensions.html b/doc/extensions.html index 25d2f7fd..25764198 100644 --- a/doc/extensions.html +++ b/doc/extensions.html | |||
@@ -57,6 +57,8 @@ td.excinterop { | |||
57 | <a href="ext_jit.html">jit.* Library</a> | 57 | <a href="ext_jit.html">jit.* Library</a> |
58 | </li><li> | 58 | </li><li> |
59 | <a href="ext_c_api.html">Lua/C API</a> | 59 | <a href="ext_c_api.html">Lua/C API</a> |
60 | </li><li> | ||
61 | <a href="ext_profiler.html">Profiler</a> | ||
60 | </li></ul> | 62 | </li></ul> |
61 | </li><li> | 63 | </li><li> |
62 | <a href="status.html">Status</a> | 64 | <a href="status.html">Status</a> |
@@ -112,6 +114,9 @@ bit.lshift bit.rshift bit.arshift bit.rol bit.ror bit.bswap | |||
112 | This module is a LuaJIT built-in — you don't need to download or | 114 | This module is a LuaJIT built-in — you don't need to download or |
113 | install Lua BitOp. The Lua BitOp site has full documentation for all | 115 | install Lua BitOp. The Lua BitOp site has full documentation for all |
114 | <a href="http://bitop.luajit.org/api.html"><span class="ext">»</span> Lua BitOp API functions</a>. | 116 | <a href="http://bitop.luajit.org/api.html"><span class="ext">»</span> Lua BitOp API functions</a>. |
117 | The FFI adds support for | ||
118 | <a href="ext_ffi_semantics.html#cdata_arith">64 bit bitwise operations</a>, | ||
119 | using the same API functions. | ||
115 | </p> | 120 | </p> |
116 | <p> | 121 | <p> |
117 | Please make sure to <tt>require</tt> the module before using any of | 122 | Please make sure to <tt>require</tt> the module before using any of |
@@ -145,6 +150,11 @@ LuaJIT adds some | |||
145 | <a href="ext_c_api.html">extra functions to the Lua/C API</a>. | 150 | <a href="ext_c_api.html">extra functions to the Lua/C API</a>. |
146 | </p> | 151 | </p> |
147 | 152 | ||
153 | <h3 id="profiler">Profiler</h3> | ||
154 | <p> | ||
155 | LuaJIT has an <a href="ext_profiler.html">integrated profiler</a>. | ||
156 | </p> | ||
157 | |||
148 | <h2 id="library">Enhanced Standard Library Functions</h2> | 158 | <h2 id="library">Enhanced Standard Library Functions</h2> |
149 | 159 | ||
150 | <h3 id="xpcall"><tt>xpcall(f, err [,args...])</tt> passes arguments</h3> | 160 | <h3 id="xpcall"><tt>xpcall(f, err [,args...])</tt> passes arguments</h3> |
@@ -172,7 +182,7 @@ in <tt>"-inf"</tt>. | |||
172 | <h3 id="tonumber"><tt>tonumber()</tt> etc. use builtin string to number conversion</h3> | 182 | <h3 id="tonumber"><tt>tonumber()</tt> etc. use builtin string to number conversion</h3> |
173 | <p> | 183 | <p> |
174 | All string-to-number conversions consistently convert integer and | 184 | All string-to-number conversions consistently convert integer and |
175 | floating-point inputs in decimal and hexadecimal on all platforms. | 185 | floating-point inputs in decimal, hexadecimal and binary on all platforms. |
176 | <tt>strtod()</tt> is <em>not</em> used anymore, which avoids numerous | 186 | <tt>strtod()</tt> is <em>not</em> used anymore, which avoids numerous |
177 | problems with poor C library implementations. The builtin conversion | 187 | problems with poor C library implementations. The builtin conversion |
178 | function provides full precision according to the IEEE-754 standard, it | 188 | function provides full precision according to the IEEE-754 standard, it |
@@ -196,6 +206,36 @@ for dot releases (x.y.0 → x.y.1), but may change with major or | |||
196 | minor releases (2.0 → 2.1) or between any beta release. Foreign | 206 | minor releases (2.0 → 2.1) or between any beta release. Foreign |
197 | bytecode (e.g. from Lua 5.1) is incompatible and cannot be loaded. | 207 | bytecode (e.g. from Lua 5.1) is incompatible and cannot be loaded. |
198 | </p> | 208 | </p> |
209 | <p> | ||
210 | Note: <tt>LJ_GC64</tt> mode requires a different frame layout, which implies | ||
211 | a different, incompatible bytecode format for all 64 bit ports. This may be | ||
212 | rectified in the future. | ||
213 | </p> | ||
214 | |||
215 | <h3 id="table_new"><tt>table.new(narray, nhash)</tt> allocates a pre-sized table</h3> | ||
216 | <p> | ||
217 | An extra library function <tt>table.new()</tt> can be made available via | ||
218 | <tt>require("table.new")</tt>. This creates a pre-sized table, just like | ||
219 | the C API equivalent <tt>lua_createtable()</tt>. This is useful for big | ||
220 | tables if the final table size is known and automatic table resizing is | ||
221 | too expensive. | ||
222 | </p> | ||
223 | |||
224 | <h3 id="table_clear"><tt>table.clear(tab)</tt> clears a table</h3> | ||
225 | <p> | ||
226 | An extra library function <tt>table.clear()</tt> can be made available | ||
227 | via <tt>require("table.clear")</tt>. This clears all keys and values | ||
228 | from a table, but preserves the allocated array/hash sizes. This is | ||
229 | useful when a table, which is linked from multiple places, needs to be | ||
230 | cleared and/or when recycling a table for use by the same context. This | ||
231 | avoids managing backlinks, saves an allocation and the overhead of | ||
232 | incremental array/hash part growth. | ||
233 | </p> | ||
234 | <p> | ||
235 | Please note this function is meant for very specific situations. In most | ||
236 | cases it's better to replace the (usually single) link with a new table | ||
237 | and let the GC do its work. | ||
238 | </p> | ||
199 | 239 | ||
200 | <h3 id="math_random">Enhanced PRNG for <tt>math.random()</tt></h3> | 240 | <h3 id="math_random">Enhanced PRNG for <tt>math.random()</tt></h3> |
201 | <p> | 241 | <p> |
@@ -274,6 +314,26 @@ indexes for varargs.</li> | |||
274 | <li><tt>debug.getupvalue()</tt> and <tt>debug.setupvalue()</tt> handle | 314 | <li><tt>debug.getupvalue()</tt> and <tt>debug.setupvalue()</tt> handle |
275 | C functions.</li> | 315 | C functions.</li> |
276 | <li><tt>debug.upvalueid()</tt> and <tt>debug.upvaluejoin()</tt>.</li> | 316 | <li><tt>debug.upvalueid()</tt> and <tt>debug.upvaluejoin()</tt>.</li> |
317 | <li>Lua/C API extensions: | ||
318 | <tt>lua_version()</tt> | ||
319 | <tt>lua_upvalueid()</tt> | ||
320 | <tt>lua_upvaluejoin()</tt> | ||
321 | <tt>lua_loadx()</tt> | ||
322 | <tt>lua_copy()</tt> | ||
323 | <tt>lua_tonumberx()</tt> | ||
324 | <tt>lua_tointegerx()</tt> | ||
325 | <tt>luaL_fileresult()</tt> | ||
326 | <tt>luaL_execresult()</tt> | ||
327 | <tt>luaL_loadfilex()</tt> | ||
328 | <tt>luaL_loadbufferx()</tt> | ||
329 | <tt>luaL_traceback()</tt> | ||
330 | <tt>luaL_setfuncs()</tt> | ||
331 | <tt>luaL_pushmodule()</tt> | ||
332 | <tt>luaL_newlibtable()</tt> | ||
333 | <tt>luaL_newlib()</tt> | ||
334 | <tt>luaL_testudata()</tt> | ||
335 | <tt>luaL_setmetatable()</tt> | ||
336 | </li> | ||
277 | <li>Command line option <tt>-E</tt>.</li> | 337 | <li>Command line option <tt>-E</tt>.</li> |
278 | <li>Command line checks <tt>__tostring</tt> for errors.</li> | 338 | <li>Command line checks <tt>__tostring</tt> for errors.</li> |
279 | </ul> | 339 | </ul> |
@@ -299,6 +359,8 @@ exit status.</li> | |||
299 | <li><tt>debug.setmetatable()</tt> returns object.</li> | 359 | <li><tt>debug.setmetatable()</tt> returns object.</li> |
300 | <li><tt>debug.getuservalue()</tt> and <tt>debug.setuservalue()</tt>.</li> | 360 | <li><tt>debug.getuservalue()</tt> and <tt>debug.setuservalue()</tt>.</li> |
301 | <li>Remove <tt>math.mod()</tt>, <tt>string.gfind()</tt>.</li> | 361 | <li>Remove <tt>math.mod()</tt>, <tt>string.gfind()</tt>.</li> |
362 | <li><tt>package.searchers</tt>.</li> | ||
363 | <li><tt>module()</tt> returns the module table.</li> | ||
302 | </ul> | 364 | </ul> |
303 | <p> | 365 | <p> |
304 | Note: this provides only partial compatibility with Lua 5.2 at the | 366 | Note: this provides only partial compatibility with Lua 5.2 at the |
@@ -307,6 +369,21 @@ Lua 5.1, which prevents implementing features that would otherwise | |||
307 | break the Lua/C API and ABI (e.g. <tt>_ENV</tt>). | 369 | break the Lua/C API and ABI (e.g. <tt>_ENV</tt>). |
308 | </p> | 370 | </p> |
309 | 371 | ||
372 | <h2 id="lua53">Extensions from Lua 5.3</h2> | ||
373 | <p> | ||
374 | LuaJIT supports some extensions from Lua 5.3: | ||
375 | <ul> | ||
376 | <li>Unicode escape <tt>'\u{XX...}'</tt> embeds the UTF-8 encoding in string literals.</li> | ||
377 | <li>The argument table <tt>arg</tt> can be read (and modified) by <tt>LUA_INIT</tt> and <tt>-e</tt> chunks.</li> | ||
378 | <li><tt>io.read()</tt> and <tt>file:read()</tt> accept formats with or without a leading <tt>*</tt>.</li> | ||
379 | <li><tt>assert()</tt> accepts any type of error object.</li> | ||
380 | <li><tt>table.move(a1, f, e, t [,a2])</tt>.</li> | ||
381 | <li><tt>coroutine.isyieldable()</tt>.</li> | ||
382 | <li>Lua/C API extensions: | ||
383 | <tt>lua_isyieldable()</tt> | ||
384 | </li> | ||
385 | </ul> | ||
386 | |||
310 | <h2 id="exceptions">C++ Exception Interoperability</h2> | 387 | <h2 id="exceptions">C++ Exception Interoperability</h2> |
311 | <p> | 388 | <p> |
312 | LuaJIT has built-in support for interoperating with C++ exceptions. | 389 | LuaJIT has built-in support for interoperating with C++ exceptions. |
@@ -321,25 +398,30 @@ the toolchain used to compile LuaJIT: | |||
321 | </tr> | 398 | </tr> |
322 | <tr class="odd separate"> | 399 | <tr class="odd separate"> |
323 | <td class="excplatform">POSIX/x64, DWARF2 unwinding</td> | 400 | <td class="excplatform">POSIX/x64, DWARF2 unwinding</td> |
324 | <td class="exccompiler">GCC 4.3+</td> | 401 | <td class="exccompiler">GCC 4.3+, Clang</td> |
325 | <td class="excinterop"><b style="color: #00a000;">Full</b></td> | 402 | <td class="excinterop"><b style="color: #00a000;">Full</b></td> |
326 | </tr> | 403 | </tr> |
327 | <tr class="even"> | 404 | <tr class="even"> |
405 | <td class="excplatform">ARM <tt>-DLUAJIT_UNWIND_EXTERNAL</tt></td> | ||
406 | <td class="exccompiler">GCC, Clang</td> | ||
407 | <td class="excinterop"><b style="color: #00a000;">Full</b></td> | ||
408 | </tr> | ||
409 | <tr class="odd"> | ||
328 | <td class="excplatform">Other platforms, DWARF2 unwinding</td> | 410 | <td class="excplatform">Other platforms, DWARF2 unwinding</td> |
329 | <td class="exccompiler">GCC</td> | 411 | <td class="exccompiler">GCC, Clang</td> |
330 | <td class="excinterop"><b style="color: #c06000;">Limited</b></td> | 412 | <td class="excinterop"><b style="color: #c06000;">Limited</b></td> |
331 | </tr> | 413 | </tr> |
332 | <tr class="odd"> | 414 | <tr class="even"> |
333 | <td class="excplatform">Windows/x64</td> | 415 | <td class="excplatform">Windows/x64</td> |
334 | <td class="exccompiler">MSVC or WinSDK</td> | 416 | <td class="exccompiler">MSVC or WinSDK</td> |
335 | <td class="excinterop"><b style="color: #00a000;">Full</b></td> | 417 | <td class="excinterop"><b style="color: #00a000;">Full</b></td> |
336 | </tr> | 418 | </tr> |
337 | <tr class="even"> | 419 | <tr class="odd"> |
338 | <td class="excplatform">Windows/x86</td> | 420 | <td class="excplatform">Windows/x86</td> |
339 | <td class="exccompiler">Any</td> | 421 | <td class="exccompiler">Any</td> |
340 | <td class="excinterop"><b style="color: #a00000;">No</b></td> | 422 | <td class="excinterop"><b style="color: #00a000;">Full</b></td> |
341 | </tr> | 423 | </tr> |
342 | <tr class="odd"> | 424 | <tr class="even"> |
343 | <td class="excplatform">Other platforms</td> | 425 | <td class="excplatform">Other platforms</td> |
344 | <td class="exccompiler">Other compilers</td> | 426 | <td class="exccompiler">Other compilers</td> |
345 | <td class="excinterop"><b style="color: #a00000;">No</b></td> | 427 | <td class="excinterop"><b style="color: #a00000;">No</b></td> |
@@ -388,14 +470,6 @@ C++ destructors.</li> | |||
388 | <li>Lua errors <b>cannot</b> be caught on the C++ side.</li> | 470 | <li>Lua errors <b>cannot</b> be caught on the C++ side.</li> |
389 | <li>Throwing Lua errors across C++ frames will <b>not</b> call | 471 | <li>Throwing Lua errors across C++ frames will <b>not</b> call |
390 | C++ destructors.</li> | 472 | C++ destructors.</li> |
391 | <li>Additionally, on Windows/x86 with SEH-based C++ exceptions: | ||
392 | it's <b>not</b> safe to throw a Lua error across any frames containing | ||
393 | a C++ function with any try/catch construct or using variables with | ||
394 | (implicit) destructors. This also applies to any functions which may be | ||
395 | inlined in such a function. It doesn't matter whether <tt>lua_error()</tt> | ||
396 | is called inside or outside of a try/catch or whether any object actually | ||
397 | needs to be destroyed: the SEH chain is corrupted and this will eventually | ||
398 | lead to the termination of the process.</li> | ||
399 | </ul> | 473 | </ul> |
400 | <br class="flush"> | 474 | <br class="flush"> |
401 | </div> | 475 | </div> |
diff --git a/doc/faq.html b/doc/faq.html index be4d9f61..2031aa8a 100644 --- a/doc/faq.html +++ b/doc/faq.html | |||
@@ -43,6 +43,8 @@ dd { margin-left: 1.5em; } | |||
43 | <a href="ext_jit.html">jit.* Library</a> | 43 | <a href="ext_jit.html">jit.* Library</a> |
44 | </li><li> | 44 | </li><li> |
45 | <a href="ext_c_api.html">Lua/C API</a> | 45 | <a href="ext_c_api.html">Lua/C API</a> |
46 | </li><li> | ||
47 | <a href="ext_profiler.html">Profiler</a> | ||
46 | </li></ul> | 48 | </li></ul> |
47 | </li><li> | 49 | </li><li> |
48 | <a href="status.html">Status</a> | 50 | <a href="status.html">Status</a> |
diff --git a/doc/install.html b/doc/install.html index 68de0c10..9602831e 100644 --- a/doc/install.html +++ b/doc/install.html | |||
@@ -68,6 +68,8 @@ td.compatno { | |||
68 | <a href="ext_jit.html">jit.* Library</a> | 68 | <a href="ext_jit.html">jit.* Library</a> |
69 | </li><li> | 69 | </li><li> |
70 | <a href="ext_c_api.html">Lua/C API</a> | 70 | <a href="ext_c_api.html">Lua/C API</a> |
71 | </li><li> | ||
72 | <a href="ext_profiler.html">Profiler</a> | ||
71 | </li></ul> | 73 | </li></ul> |
72 | </li><li> | 74 | </li><li> |
73 | <a href="status.html">Status</a> | 75 | <a href="status.html">Status</a> |
@@ -111,17 +113,17 @@ operating systems, CPUs and compilers: | |||
111 | </tr> | 113 | </tr> |
112 | <tr class="odd separate"> | 114 | <tr class="odd separate"> |
113 | <td class="compatcpu">x86 (32 bit)</td> | 115 | <td class="compatcpu">x86 (32 bit)</td> |
114 | <td class="compatos">GCC 4.x+<br>GCC 3.4</td> | 116 | <td class="compatos">GCC 4.2+</td> |
115 | <td class="compatos">GCC 4.x+<br>GCC 3.4</td> | 117 | <td class="compatos">GCC 4.2+</td> |
116 | <td class="compatos">XCode 5.0+<br>Clang</td> | 118 | <td class="compatos">XCode 5.0+<br>Clang</td> |
117 | <td class="compatos">MSVC<br>MinGW, Cygwin</td> | 119 | <td class="compatos">MSVC<br>MinGW, Cygwin</td> |
118 | </tr> | 120 | </tr> |
119 | <tr class="even"> | 121 | <tr class="even"> |
120 | <td class="compatcpu">x64 (64 bit)</td> | 122 | <td class="compatcpu">x64 (64 bit)</td> |
121 | <td class="compatos">GCC 4.x+</td> | 123 | <td class="compatos">GCC 4.2+</td> |
122 | <td class="compatos">ORBIS (<a href="#ps4">PS4</a>)</td> | 124 | <td class="compatos">GCC 4.2+<br>ORBIS (<a href="#ps4">PS4</a>)</td> |
123 | <td class="compatos">XCode 5.0+<br>Clang</td> | 125 | <td class="compatos">XCode 5.0+<br>Clang</td> |
124 | <td class="compatos">MSVC</td> | 126 | <td class="compatos">MSVC<br>Durango (<a href="#xboxone">Xbox One</a>)</td> |
125 | </tr> | 127 | </tr> |
126 | <tr class="odd"> | 128 | <tr class="odd"> |
127 | <td class="compatcpu"><a href="#cross2">ARMv5+<br>ARM9E+</a></td> | 129 | <td class="compatcpu"><a href="#cross2">ARMv5+<br>ARM9E+</a></td> |
@@ -131,21 +133,21 @@ operating systems, CPUs and compilers: | |||
131 | <td class="compatos compatno"> </td> | 133 | <td class="compatos compatno"> </td> |
132 | </tr> | 134 | </tr> |
133 | <tr class="even"> | 135 | <tr class="even"> |
134 | <td class="compatcpu"><a href="#cross2">PPC</a></td> | 136 | <td class="compatcpu"><a href="#cross2">ARM64</a></td> |
135 | <td class="compatos">GCC 4.3+</td> | 137 | <td class="compatos">GCC 4.8+</td> |
136 | <td class="compatos">GCC 4.3+<br>GCC 4.1 (<a href="#ps3">PS3</a>)</td> | 138 | <td class="compatos compatno"> </td> |
139 | <td class="compatos">XCode 6.0+<br>Clang 3.5+</td> | ||
137 | <td class="compatos compatno"> </td> | 140 | <td class="compatos compatno"> </td> |
138 | <td class="compatos">XEDK (<a href="#xbox360">Xbox 360</a>)</td> | ||
139 | </tr> | 141 | </tr> |
140 | <tr class="odd"> | 142 | <tr class="odd"> |
141 | <td class="compatcpu"><a href="#cross2">PPC/e500v2</a></td> | 143 | <td class="compatcpu"><a href="#cross2">PPC</a></td> |
142 | <td class="compatos">GCC 4.3+</td> | ||
143 | <td class="compatos">GCC 4.3+</td> | 144 | <td class="compatos">GCC 4.3+</td> |
145 | <td class="compatos">GCC 4.3+<br>GCC 4.1 (<a href="#ps3">PS3</a>)</td> | ||
144 | <td class="compatos compatno"> </td> | 146 | <td class="compatos compatno"> </td> |
145 | <td class="compatos compatno"> </td> | 147 | <td class="compatos">XEDK (<a href="#xbox360">Xbox 360</a>)</td> |
146 | </tr> | 148 | </tr> |
147 | <tr class="even"> | 149 | <tr class="even"> |
148 | <td class="compatcpu"><a href="#cross2">MIPS</a></td> | 150 | <td class="compatcpu"><a href="#cross2">MIPS32<br>MIPS64</a></td> |
149 | <td class="compatos">GCC 4.3+</td> | 151 | <td class="compatos">GCC 4.3+</td> |
150 | <td class="compatos">GCC 4.3+</td> | 152 | <td class="compatos">GCC 4.3+</td> |
151 | <td class="compatos compatno"> </td> | 153 | <td class="compatos compatno"> </td> |
@@ -172,6 +174,13 @@ MSVC (Visual Studio).</li> | |||
172 | Please read the instructions given in these files, before changing | 174 | Please read the instructions given in these files, before changing |
173 | any settings. | 175 | any settings. |
174 | </p> | 176 | </p> |
177 | <p> | ||
178 | All LuaJIT 64 bit ports use 64 bit GC objects by default (<tt>LJ_GC64</tt>). | ||
179 | For x64, you can select the old 32-on-64 bit mode by adding | ||
180 | <tt>XCFLAGS=-DLUAJIT_DISABLE_GC64</tt> to the make command. | ||
181 | Please check the note about the | ||
182 | <a href="extensions.html#string_dump">bytecode format</a> differences, too. | ||
183 | </p> | ||
175 | 184 | ||
176 | <h2 id="posix">POSIX Systems (Linux, OSX, *BSD etc.)</h2> | 185 | <h2 id="posix">POSIX Systems (Linux, OSX, *BSD etc.)</h2> |
177 | <h3>Prerequisites</h3> | 186 | <h3>Prerequisites</h3> |
@@ -199,7 +208,7 @@ which is probably the default on your system, anyway. Simply run: | |||
199 | make | 208 | make |
200 | </pre> | 209 | </pre> |
201 | <p> | 210 | <p> |
202 | This always builds a native x86, x64 or PPC binary, depending on the host OS | 211 | This always builds a native binary, depending on the host OS |
203 | you're running this command on. Check the section on | 212 | you're running this command on. Check the section on |
204 | <a href="#cross">cross-compilation</a> for more options. | 213 | <a href="#cross">cross-compilation</a> for more options. |
205 | </p> | 214 | </p> |
@@ -297,25 +306,36 @@ directory where <tt>luajit.exe</tt> is installed | |||
297 | 306 | ||
298 | <h2 id="cross">Cross-compiling LuaJIT</h2> | 307 | <h2 id="cross">Cross-compiling LuaJIT</h2> |
299 | <p> | 308 | <p> |
309 | First, let's clear up some terminology: | ||
310 | </p> | ||
311 | <ul> | ||
312 | <li>Host: This is your development system, usually based on a x64 or x86 CPU.</li> | ||
313 | <li>Target: This is the target system you want LuaJIT to run on, e.g. Android/ARM.</li> | ||
314 | <li>Toolchain: This comprises a C compiler, linker, assembler and a matching C library.</li> | ||
315 | <li>Host (or system) toolchain: This is the toolchain used to build native binaries for your host system.</li> | ||
316 | <li>Cross-compile toolchain: This is the toolchain used to build binaries for the target system. They can only be run on the target system.</li> | ||
317 | </ul> | ||
318 | <p> | ||
300 | The GNU Makefile-based build system allows cross-compiling on any host | 319 | The GNU Makefile-based build system allows cross-compiling on any host |
301 | for any supported target, as long as both architectures have the same | 320 | for any supported target: |
302 | pointer size. If you want to cross-compile to any 32 bit target on an | ||
303 | x64 OS, you need to install the multilib development package (e.g. | ||
304 | <tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part | ||
305 | (<tt>HOST_CC="gcc -m32"</tt>). | ||
306 | </p> | 321 | </p> |
322 | <ul> | ||
323 | <li>Yes, you need a toolchain for both your host <em>and</em> your target!</li> | ||
324 | <li>Both host and target architectures must have the same pointer size.</li> | ||
325 | <li>E.g. if you want to cross-compile to a 32 bit target on a 64 bit host, you need to install the multilib development package (e.g. <tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part (<tt>HOST_CC="gcc -m32"</tt>).</li> | ||
326 | <li>64 bit targets always require compilation on a 64 bit host.</li> | ||
327 | </ul> | ||
307 | <p> | 328 | <p> |
308 | You need to specify <tt>TARGET_SYS</tt> whenever the host OS and the | 329 | You need to specify <tt>TARGET_SYS</tt> whenever the host OS and the |
309 | target OS differ, or you'll get assembler or linker errors. E.g. if | 330 | target OS differ, or you'll get assembler or linker errors: |
310 | you're compiling on a Windows or OSX host for embedded Linux or Android, | ||
311 | you need to add <tt>TARGET_SYS=Linux</tt> to the examples below. For a | ||
312 | minimal target OS, you may need to disable the built-in allocator in | ||
313 | <tt>src/Makefile</tt> and use <tt>TARGET_SYS=Other</tt>. Don't forget to | ||
314 | specify the same <tt>TARGET_SYS</tt> for the install step, too. | ||
315 | </p> | 331 | </p> |
332 | <ul> | ||
333 | <li>E.g. if you're compiling on a Windows or OSX host for embedded Linux or Android, you need to add <tt>TARGET_SYS=Linux</tt> to the examples below.</li> | ||
334 | <li>For a minimal target OS, you may need to disable the built-in allocator in <tt>src/Makefile</tt> and use <tt>TARGET_SYS=Other</tt>.</li> | ||
335 | <li>Don't forget to specify the same <tt>TARGET_SYS</tt> for the install step, too.</li> | ||
336 | </ul> | ||
316 | <p> | 337 | <p> |
317 | The examples below only show some popular targets — please check | 338 | Here are some examples where host and target have the same CPU: |
318 | the comments in <tt>src/Makefile</tt> for more details. | ||
319 | </p> | 339 | </p> |
320 | <pre class="code"> | 340 | <pre class="code"> |
321 | # Cross-compile to a 32 bit binary on a multilib x64 OS | 341 | # Cross-compile to a 32 bit binary on a multilib x64 OS |
@@ -333,34 +353,44 @@ use the canonical toolchain triplets for Linux. | |||
333 | </p> | 353 | </p> |
334 | <p> | 354 | <p> |
335 | Since there's often no easy way to detect CPU features at runtime, it's | 355 | Since there's often no easy way to detect CPU features at runtime, it's |
336 | important to compile with the proper CPU or architecture settings. You | 356 | important to compile with the proper CPU or architecture settings: |
337 | can specify these when building the toolchain yourself. Or add | 357 | </o> |
338 | <tt>-mcpu=...</tt> or <tt>-march=...</tt> to <tt>TARGET_CFLAGS</tt>. For | 358 | <ul> |
339 | ARM it's important to have the correct <tt>-mfloat-abi=...</tt> setting, | 359 | <li>The best way to get consistent results is to specify the correct settings when building the toolchain yourself.</li> |
340 | too. Otherwise LuaJIT may not run at the full performance of your target | 360 | <li>For a pre-built, generic toolchain add <tt>-mcpu=...</tt> or <tt>-march=...</tt> and other necessary flags to <tt>TARGET_CFLAGS</tt>.</li> |
341 | CPU. | 361 | <li>For ARM it's important to have the correct <tt>-mfloat-abi=...</tt> setting, too. Otherwise LuaJIT may not run at the full performance of your target CPU.</li> |
362 | <li>For MIPS it's important to select a supported ABI (o32 on MIPS32, n64 on MIPS64) and consistently compile your project either with hard-float or soft-float compiler settings.</li> | ||
363 | </ul> | ||
364 | <p> | ||
365 | Here are some examples for targets with a different CPU than the host: | ||
342 | </p> | 366 | </p> |
343 | <pre class="code"> | 367 | <pre class="code"> |
344 | # ARM soft-float | 368 | # ARM soft-float |
345 | make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \ | 369 | make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \ |
346 | TARGET_CFLAGS="-mfloat-abi=soft" | 370 | TARGET_CFLAGS="-mfloat-abi=soft" |
347 | 371 | ||
348 | # ARM soft-float ABI with VFP (example for Cortex-A8) | 372 | # ARM soft-float ABI with VFP (example for Cortex-A9) |
349 | make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \ | 373 | make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \ |
350 | TARGET_CFLAGS="-mcpu=cortex-a8 -mfloat-abi=softfp" | 374 | TARGET_CFLAGS="-mcpu=cortex-a9 -mfloat-abi=softfp" |
351 | 375 | ||
352 | # ARM hard-float ABI with VFP (armhf, requires recent toolchain) | 376 | # ARM hard-float ABI with VFP (armhf, most modern toolchains) |
353 | make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabihf- | 377 | make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabihf- |
354 | 378 | ||
379 | # ARM64 | ||
380 | make CROSS=aarch64-linux- | ||
381 | |||
355 | # PPC | 382 | # PPC |
356 | make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu- | 383 | make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu- |
357 | # PPC/e500v2 (fast interpreter only) | ||
358 | make HOST_CC="gcc -m32" CROSS=powerpc-e500v2-linux-gnuspe- | ||
359 | 384 | ||
360 | # MIPS big-endian | 385 | # MIPS32 big-endian |
361 | make HOST_CC="gcc -m32" CROSS=mips-linux- | 386 | make HOST_CC="gcc -m32" CROSS=mips-linux- |
362 | # MIPS little-endian | 387 | # MIPS32 little-endian |
363 | make HOST_CC="gcc -m32" CROSS=mipsel-linux- | 388 | make HOST_CC="gcc -m32" CROSS=mipsel-linux- |
389 | |||
390 | # MIPS64 big-endian | ||
391 | make CROSS=mips-linux- TARGET_CFLAGS="-mips64r2 -mabi=64" | ||
392 | # MIPS64 little-endian | ||
393 | make CROSS=mipsel-linux- TARGET_CFLAGS="-mips64r2 -mabi=64" | ||
364 | </pre> | 394 | </pre> |
365 | <p> | 395 | <p> |
366 | You can cross-compile for <b id="android">Android</b> using the <a href="http://developer.android.com/ndk/"><span class="ext">»</span> Android NDK</a>. | 396 | You can cross-compile for <b id="android">Android</b> using the <a href="http://developer.android.com/ndk/"><span class="ext">»</span> Android NDK</a>. |
@@ -368,8 +398,16 @@ Please adapt the environment variables to match the install locations and the | |||
368 | desired target platform. E.g. Android 4.1 corresponds to ABI level 16. | 398 | desired target platform. E.g. Android 4.1 corresponds to ABI level 16. |
369 | </p> | 399 | </p> |
370 | <pre class="code"> | 400 | <pre class="code"> |
371 | # Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.1+ (JB) | 401 | # Android/ARM64, aarch64, Android 5.0+ (L) |
402 | NDKDIR=/opt/android/ndk | ||
403 | NDKBIN=$NDKDIR/toolchains/llvm/prebuilt/linux-x86_64/bin | ||
404 | NDKCROSS=$NDKBIN/aarch64-linux-android- | ||
405 | NDKCC=$NDKBIN/aarch64-linux-android21-clang | ||
406 | make CROSS=$NDKCROSS \ | ||
407 | STATIC_CC=$NDKCC DYNAMIC_CC="$NDKCC -fPIC" \ | ||
408 | TARGET_LD=$NDKCC | ||
372 | 409 | ||
410 | # Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.1+ (JB) | ||
373 | NDKDIR=/opt/android/ndk | 411 | NDKDIR=/opt/android/ndk |
374 | NDKBIN=$NDKDIR/toolchains/llvm/prebuilt/linux-x86_64/bin | 412 | NDKBIN=$NDKDIR/toolchains/llvm/prebuilt/linux-x86_64/bin |
375 | NDKCROSS=$NDKBIN/arm-linux-androideabi- | 413 | NDKCROSS=$NDKBIN/arm-linux-androideabi- |
@@ -379,9 +417,23 @@ make HOST_CC="gcc -m32" CROSS=$NDKCROSS \ | |||
379 | TARGET_LD=$NDKCC | 417 | TARGET_LD=$NDKCC |
380 | </pre> | 418 | </pre> |
381 | <p> | 419 | <p> |
382 | Please use the LuaJIT 2.1 branch to compile for | 420 | You can cross-compile for <b id="ios">iOS 3.0+</b> (iPhone/iPad) using the <a href="http://developer.apple.com/ios/"><span class="ext">»</span> iOS SDK</a>: |
383 | <b id="ios">iOS</b> (iPhone/iPad). | ||
384 | </p> | 421 | </p> |
422 | <p style="font-size: 8pt;"> | ||
423 | Note: <b>the JIT compiler is disabled for iOS</b>, because regular iOS Apps | ||
424 | are not allowed to generate code at runtime. You'll only get the performance | ||
425 | of the LuaJIT interpreter on iOS. This is still faster than plain Lua, but | ||
426 | much slower than the JIT compiler. Please complain to Apple, not me. | ||
427 | Or use Android. :-p | ||
428 | </p> | ||
429 | <pre class="code"> | ||
430 | # iOS/ARM64 | ||
431 | ISDKP=$(xcrun --sdk iphoneos --show-sdk-path) | ||
432 | ICC=$(xcrun --sdk iphoneos --find clang) | ||
433 | ISDKF="-arch arm64 -isysroot $ISDKP" | ||
434 | make DEFAULT_CC=clang CROSS="$(dirname $ICC)/" \ | ||
435 | TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS | ||
436 | </pre> | ||
385 | 437 | ||
386 | <h3 id="consoles">Cross-compiling for consoles</h3> | 438 | <h3 id="consoles">Cross-compiling for consoles</h3> |
387 | <p> | 439 | <p> |
@@ -437,6 +489,16 @@ the following commands: | |||
437 | cd src | 489 | cd src |
438 | xedkbuild | 490 | xedkbuild |
439 | </pre> | 491 | </pre> |
492 | <p> | ||
493 | To cross-compile for <b id="xboxone">Xbox One</b> from a Windows host, | ||
494 | open a "Visual Studio .NET Command Prompt" (64 bit host compiler), | ||
495 | <tt>cd</tt> to the directory where you've unpacked the sources and run | ||
496 | the following commands: | ||
497 | </p> | ||
498 | <pre class="code"> | ||
499 | cd src | ||
500 | xb1build | ||
501 | </pre> | ||
440 | 502 | ||
441 | <h2 id="embed">Embedding LuaJIT</h2> | 503 | <h2 id="embed">Embedding LuaJIT</h2> |
442 | <p> | 504 | <p> |
@@ -467,14 +529,11 @@ intend to load Lua/C modules at runtime. | |||
467 | </li> | 529 | </li> |
468 | <li> | 530 | <li> |
469 | If you're building a 64 bit application on OSX which links directly or | 531 | If you're building a 64 bit application on OSX which links directly or |
470 | indirectly against LuaJIT, you need to link your main executable | 532 | indirectly against LuaJIT which is not built for <tt>LJ_GC64</tt> mode, |
471 | with these flags: | 533 | you need to link your main executable with these flags: |
472 | <pre class="code"> | 534 | <pre class="code"> |
473 | -pagezero_size 10000 -image_base 100000000 | 535 | -pagezero_size 10000 -image_base 100000000 |
474 | </pre> | 536 | </pre> |
475 | Also, it's recommended to <tt>rebase</tt> all (self-compiled) shared libraries | ||
476 | which are loaded at runtime on OSX/x64 (e.g. C extension modules for Lua). | ||
477 | See: <tt>man rebase</tt> | ||
478 | </li> | 537 | </li> |
479 | </ul> | 538 | </ul> |
480 | <p>Additional hints for initializing LuaJIT using the C API functions:</p> | 539 | <p>Additional hints for initializing LuaJIT using the C API functions:</p> |
diff --git a/doc/luajit.html b/doc/luajit.html index 3f360a93..a3ffa476 100644 --- a/doc/luajit.html +++ b/doc/luajit.html | |||
@@ -125,6 +125,8 @@ table.feature small { | |||
125 | <a href="ext_jit.html">jit.* Library</a> | 125 | <a href="ext_jit.html">jit.* Library</a> |
126 | </li><li> | 126 | </li><li> |
127 | <a href="ext_c_api.html">Lua/C API</a> | 127 | <a href="ext_c_api.html">Lua/C API</a> |
128 | </li><li> | ||
129 | <a href="ext_profiler.html">Profiler</a> | ||
128 | </li></ul> | 130 | </li></ul> |
129 | </li><li> | 131 | </li><li> |
130 | <a href="status.html">Status</a> | 132 | <a href="status.html">Status</a> |
@@ -163,13 +165,13 @@ LuaJIT is Copyright © 2005-2020 Mike Pall, released under the | |||
163 | <tr><td><span style="font-size:90%;">Embedded</span></td><td>Android</td><td>iOS</td></tr> | 165 | <tr><td><span style="font-size:90%;">Embedded</span></td><td>Android</td><td>iOS</td></tr> |
164 | </table> | 166 | </table> |
165 | <table class="feature os os3"> | 167 | <table class="feature os os3"> |
166 | <tr><td>PS3</td><td>PS4</td><td>PS Vita</td><td>Xbox 360</td></tr> | 168 | <tr><td>PS3</td><td>PS4</td><td>PS Vita</td><td>Xbox 360</td><td>Xbox One</td></tr> |
167 | </table> | 169 | </table> |
168 | <table class="feature compiler"> | 170 | <table class="feature compiler"> |
169 | <tr><td>GCC</td><td>CLANG<br>LLVM</td><td>MSVC</td></tr> | 171 | <tr><td>GCC</td><td>Clang<br>LLVM</td><td>MSVC</td></tr> |
170 | </table> | 172 | </table> |
171 | <table class="feature cpu"> | 173 | <table class="feature cpu"> |
172 | <tr><td>x86</td><td>x64</td><td>ARM</td><td>PPC</td><td>e500</td><td>MIPS</td></tr> | 174 | <tr><td>x86<br>x64</td><td>ARM<br>ARM64</td><td>PPC</td><td>MIPS32<br>MIPS64</td></tr> |
173 | </table> | 175 | </table> |
174 | <table class="feature fcompat"> | 176 | <table class="feature fcompat"> |
175 | <tr><td>Lua 5.1<br>API+ABI</td><td>+ JIT</td><td>+ BitOp</td><td>+ FFI</td><td>Drop-in<br>DLL/.so</td></tr> | 177 | <tr><td>Lua 5.1<br>API+ABI</td><td>+ JIT</td><td>+ BitOp</td><td>+ FFI</td><td>Drop-in<br>DLL/.so</td></tr> |
diff --git a/doc/running.html b/doc/running.html index 5cfdcc5e..6f96e9d8 100644 --- a/doc/running.html +++ b/doc/running.html | |||
@@ -62,6 +62,8 @@ td.param_default { | |||
62 | <a href="ext_jit.html">jit.* Library</a> | 62 | <a href="ext_jit.html">jit.* Library</a> |
63 | </li><li> | 63 | </li><li> |
64 | <a href="ext_c_api.html">Lua/C API</a> | 64 | <a href="ext_c_api.html">Lua/C API</a> |
65 | </li><li> | ||
66 | <a href="ext_profiler.html">Profiler</a> | ||
65 | </li></ul> | 67 | </li></ul> |
66 | </li><li> | 68 | </li><li> |
67 | <a href="status.html">Status</a> | 69 | <a href="status.html">Status</a> |
@@ -177,6 +179,7 @@ Here are the available LuaJIT control commands: | |||
177 | <li id="j_flush"><tt>-jflush</tt> — Flushes the whole cache of compiled code.</li> | 179 | <li id="j_flush"><tt>-jflush</tt> — Flushes the whole cache of compiled code.</li> |
178 | <li id="j_v"><tt>-jv</tt> — Shows verbose information about the progress of the JIT compiler.</li> | 180 | <li id="j_v"><tt>-jv</tt> — Shows verbose information about the progress of the JIT compiler.</li> |
179 | <li id="j_dump"><tt>-jdump</tt> — Dumps the code and structures used in various compiler stages.</li> | 181 | <li id="j_dump"><tt>-jdump</tt> — Dumps the code and structures used in various compiler stages.</li> |
182 | <li id="j_p"><tt>-jp</tt> — Start the <a href="ext_profiler.html">integrated profiler</a>.</li> | ||
180 | </ul> | 183 | </ul> |
181 | <p> | 184 | <p> |
182 | The <tt>-jv</tt> and <tt>-jdump</tt> commands are extension modules | 185 | The <tt>-jv</tt> and <tt>-jdump</tt> commands are extension modules |
diff --git a/doc/status.html b/doc/status.html index 175f6a29..cb454db8 100644 --- a/doc/status.html +++ b/doc/status.html | |||
@@ -43,6 +43,8 @@ ul li { padding-bottom: 0.3em; } | |||
43 | <a href="ext_jit.html">jit.* Library</a> | 43 | <a href="ext_jit.html">jit.* Library</a> |
44 | </li><li> | 44 | </li><li> |
45 | <a href="ext_c_api.html">Lua/C API</a> | 45 | <a href="ext_c_api.html">Lua/C API</a> |
46 | </li><li> | ||
47 | <a href="ext_profiler.html">Profiler</a> | ||
46 | </li></ul> | 48 | </li></ul> |
47 | </li><li> | 49 | </li><li> |
48 | <a class="current" href="status.html">Status</a> | 50 | <a class="current" href="status.html">Status</a> |
@@ -94,6 +96,17 @@ handled correctly. The error may fall through an on-trace | |||
94 | <tt>lua_atpanic</tt> on x64. This issue will be fixed with the new | 96 | <tt>lua_atpanic</tt> on x64. This issue will be fixed with the new |
95 | garbage collector. | 97 | garbage collector. |
96 | </li> | 98 | </li> |
99 | <li> | ||
100 | LuaJIT on 64 bit systems provides a <b>limited range</b> of 47 bits for the | ||
101 | <b>legacy <tt>lightuserdata</tt></b> data type. | ||
102 | This is only relevant on x64 systems which use the negative part of the | ||
103 | virtual address space in user mode, e.g. Solaris/x64, and on ARM64 systems | ||
104 | configured with a 48 bit or 52 bit VA. | ||
105 | Avoid using <tt>lightuserdata</tt> to hold pointers that may point outside | ||
106 | of that range, e.g. variables on the stack. In general, avoid this data | ||
107 | type for new code and replace it with (much more performant) FFI bindings. | ||
108 | FFI cdata pointers can address the full 64 bit range. | ||
109 | </li> | ||
97 | </ul> | 110 | </ul> |
98 | <br class="flush"> | 111 | <br class="flush"> |
99 | </div> | 112 | </div> |
diff --git a/dynasm/dasm_arm.lua b/dynasm/dasm_arm.lua index 21fb5022..164980a1 100644 --- a/dynasm/dasm_arm.lua +++ b/dynasm/dasm_arm.lua | |||
@@ -9,9 +9,9 @@ | |||
9 | local _info = { | 9 | local _info = { |
10 | arch = "arm", | 10 | arch = "arm", |
11 | description = "DynASM ARM module", | 11 | description = "DynASM ARM module", |
12 | version = "1.3.0", | 12 | version = "1.4.0", |
13 | vernum = 10300, | 13 | vernum = 10400, |
14 | release = "2011-05-05", | 14 | release = "2015-10-18", |
15 | author = "Mike Pall", | 15 | author = "Mike Pall", |
16 | license = "MIT", | 16 | license = "MIT", |
17 | } | 17 | } |
diff --git a/dynasm/dasm_arm64.h b/dynasm/dasm_arm64.h new file mode 100644 index 00000000..3455981f --- /dev/null +++ b/dynasm/dasm_arm64.h | |||
@@ -0,0 +1,519 @@ | |||
1 | /* | ||
2 | ** DynASM ARM64 encoding engine. | ||
3 | ** Copyright (C) 2005-2020 Mike Pall. All rights reserved. | ||
4 | ** Released under the MIT license. See dynasm.lua for full copyright notice. | ||
5 | */ | ||
6 | |||
7 | #include <stddef.h> | ||
8 | #include <stdarg.h> | ||
9 | #include <string.h> | ||
10 | #include <stdlib.h> | ||
11 | |||
12 | #define DASM_ARCH "arm64" | ||
13 | |||
14 | #ifndef DASM_EXTERN | ||
15 | #define DASM_EXTERN(a,b,c,d) 0 | ||
16 | #endif | ||
17 | |||
18 | /* Action definitions. */ | ||
19 | enum { | ||
20 | DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT, | ||
21 | /* The following actions need a buffer position. */ | ||
22 | DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, | ||
23 | /* The following actions also have an argument. */ | ||
24 | DASM_REL_PC, DASM_LABEL_PC, | ||
25 | DASM_IMM, DASM_IMM6, DASM_IMM12, DASM_IMM13W, DASM_IMM13X, DASM_IMML, | ||
26 | DASM__MAX | ||
27 | }; | ||
28 | |||
29 | /* Maximum number of section buffer positions for a single dasm_put() call. */ | ||
30 | #define DASM_MAXSECPOS 25 | ||
31 | |||
32 | /* DynASM encoder status codes. Action list offset or number are or'ed in. */ | ||
33 | #define DASM_S_OK 0x00000000 | ||
34 | #define DASM_S_NOMEM 0x01000000 | ||
35 | #define DASM_S_PHASE 0x02000000 | ||
36 | #define DASM_S_MATCH_SEC 0x03000000 | ||
37 | #define DASM_S_RANGE_I 0x11000000 | ||
38 | #define DASM_S_RANGE_SEC 0x12000000 | ||
39 | #define DASM_S_RANGE_LG 0x13000000 | ||
40 | #define DASM_S_RANGE_PC 0x14000000 | ||
41 | #define DASM_S_RANGE_REL 0x15000000 | ||
42 | #define DASM_S_UNDEF_LG 0x21000000 | ||
43 | #define DASM_S_UNDEF_PC 0x22000000 | ||
44 | |||
45 | /* Macros to convert positions (8 bit section + 24 bit index). */ | ||
46 | #define DASM_POS2IDX(pos) ((pos)&0x00ffffff) | ||
47 | #define DASM_POS2BIAS(pos) ((pos)&0xff000000) | ||
48 | #define DASM_SEC2POS(sec) ((sec)<<24) | ||
49 | #define DASM_POS2SEC(pos) ((pos)>>24) | ||
50 | #define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) | ||
51 | |||
52 | /* Action list type. */ | ||
53 | typedef const unsigned int *dasm_ActList; | ||
54 | |||
55 | /* Per-section structure. */ | ||
56 | typedef struct dasm_Section { | ||
57 | int *rbuf; /* Biased buffer pointer (negative section bias). */ | ||
58 | int *buf; /* True buffer pointer. */ | ||
59 | size_t bsize; /* Buffer size in bytes. */ | ||
60 | int pos; /* Biased buffer position. */ | ||
61 | int epos; /* End of biased buffer position - max single put. */ | ||
62 | int ofs; /* Byte offset into section. */ | ||
63 | } dasm_Section; | ||
64 | |||
65 | /* Core structure holding the DynASM encoding state. */ | ||
66 | struct dasm_State { | ||
67 | size_t psize; /* Allocated size of this structure. */ | ||
68 | dasm_ActList actionlist; /* Current actionlist pointer. */ | ||
69 | int *lglabels; /* Local/global chain/pos ptrs. */ | ||
70 | size_t lgsize; | ||
71 | int *pclabels; /* PC label chains/pos ptrs. */ | ||
72 | size_t pcsize; | ||
73 | void **globals; /* Array of globals (bias -10). */ | ||
74 | dasm_Section *section; /* Pointer to active section. */ | ||
75 | size_t codesize; /* Total size of all code sections. */ | ||
76 | int maxsection; /* 0 <= sectionidx < maxsection. */ | ||
77 | int status; /* Status code. */ | ||
78 | dasm_Section sections[1]; /* All sections. Alloc-extended. */ | ||
79 | }; | ||
80 | |||
81 | /* The size of the core structure depends on the max. number of sections. */ | ||
82 | #define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) | ||
83 | |||
84 | |||
85 | /* Initialize DynASM state. */ | ||
86 | void dasm_init(Dst_DECL, int maxsection) | ||
87 | { | ||
88 | dasm_State *D; | ||
89 | size_t psz = 0; | ||
90 | int i; | ||
91 | Dst_REF = NULL; | ||
92 | DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); | ||
93 | D = Dst_REF; | ||
94 | D->psize = psz; | ||
95 | D->lglabels = NULL; | ||
96 | D->lgsize = 0; | ||
97 | D->pclabels = NULL; | ||
98 | D->pcsize = 0; | ||
99 | D->globals = NULL; | ||
100 | D->maxsection = maxsection; | ||
101 | for (i = 0; i < maxsection; i++) { | ||
102 | D->sections[i].buf = NULL; /* Need this for pass3. */ | ||
103 | D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); | ||
104 | D->sections[i].bsize = 0; | ||
105 | D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ | ||
106 | } | ||
107 | } | ||
108 | |||
109 | /* Free DynASM state. */ | ||
110 | void dasm_free(Dst_DECL) | ||
111 | { | ||
112 | dasm_State *D = Dst_REF; | ||
113 | int i; | ||
114 | for (i = 0; i < D->maxsection; i++) | ||
115 | if (D->sections[i].buf) | ||
116 | DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); | ||
117 | if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); | ||
118 | if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); | ||
119 | DASM_M_FREE(Dst, D, D->psize); | ||
120 | } | ||
121 | |||
122 | /* Setup global label array. Must be called before dasm_setup(). */ | ||
123 | void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) | ||
124 | { | ||
125 | dasm_State *D = Dst_REF; | ||
126 | D->globals = gl - 10; /* Negative bias to compensate for locals. */ | ||
127 | DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); | ||
128 | } | ||
129 | |||
130 | /* Grow PC label array. Can be called after dasm_setup(), too. */ | ||
131 | void dasm_growpc(Dst_DECL, unsigned int maxpc) | ||
132 | { | ||
133 | dasm_State *D = Dst_REF; | ||
134 | size_t osz = D->pcsize; | ||
135 | DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); | ||
136 | memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); | ||
137 | } | ||
138 | |||
139 | /* Setup encoder. */ | ||
140 | void dasm_setup(Dst_DECL, const void *actionlist) | ||
141 | { | ||
142 | dasm_State *D = Dst_REF; | ||
143 | int i; | ||
144 | D->actionlist = (dasm_ActList)actionlist; | ||
145 | D->status = DASM_S_OK; | ||
146 | D->section = &D->sections[0]; | ||
147 | memset((void *)D->lglabels, 0, D->lgsize); | ||
148 | if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); | ||
149 | for (i = 0; i < D->maxsection; i++) { | ||
150 | D->sections[i].pos = DASM_SEC2POS(i); | ||
151 | D->sections[i].ofs = 0; | ||
152 | } | ||
153 | } | ||
154 | |||
155 | |||
156 | #ifdef DASM_CHECKS | ||
157 | #define CK(x, st) \ | ||
158 | do { if (!(x)) { \ | ||
159 | D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) | ||
160 | #define CKPL(kind, st) \ | ||
161 | do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ | ||
162 | D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) | ||
163 | #else | ||
164 | #define CK(x, st) ((void)0) | ||
165 | #define CKPL(kind, st) ((void)0) | ||
166 | #endif | ||
167 | |||
168 | static int dasm_imm12(unsigned int n) | ||
169 | { | ||
170 | if ((n >> 12) == 0) | ||
171 | return n; | ||
172 | else if ((n & 0xff000fff) == 0) | ||
173 | return (n >> 12) | 0x1000; | ||
174 | else | ||
175 | return -1; | ||
176 | } | ||
177 | |||
178 | static int dasm_ffs(unsigned long long x) | ||
179 | { | ||
180 | int n = -1; | ||
181 | while (x) { x >>= 1; n++; } | ||
182 | return n; | ||
183 | } | ||
184 | |||
185 | static int dasm_imm13(int lo, int hi) | ||
186 | { | ||
187 | int inv = 0, w = 64, s = 0xfff, xa, xb; | ||
188 | unsigned long long n = (((unsigned long long)hi) << 32) | (unsigned int)lo; | ||
189 | unsigned long long m = 1ULL, a, b, c; | ||
190 | if (n & 1) { n = ~n; inv = 1; } | ||
191 | a = n & -n; b = (n+a)&-(n+a); c = (n+a-b)&-(n+a-b); | ||
192 | xa = dasm_ffs(a); xb = dasm_ffs(b); | ||
193 | if (c) { | ||
194 | w = dasm_ffs(c) - xa; | ||
195 | if (w == 32) m = 0x0000000100000001UL; | ||
196 | else if (w == 16) m = 0x0001000100010001UL; | ||
197 | else if (w == 8) m = 0x0101010101010101UL; | ||
198 | else if (w == 4) m = 0x1111111111111111UL; | ||
199 | else if (w == 2) m = 0x5555555555555555UL; | ||
200 | else return -1; | ||
201 | s = (-2*w & 0x3f) - 1; | ||
202 | } else if (!a) { | ||
203 | return -1; | ||
204 | } else if (xb == -1) { | ||
205 | xb = 64; | ||
206 | } | ||
207 | if ((b-a) * m != n) return -1; | ||
208 | if (inv) { | ||
209 | return ((w - xb) << 6) | (s+w+xa-xb); | ||
210 | } else { | ||
211 | return ((w - xa) << 6) | (s+xb-xa); | ||
212 | } | ||
213 | return -1; | ||
214 | } | ||
215 | |||
216 | /* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ | ||
217 | void dasm_put(Dst_DECL, int start, ...) | ||
218 | { | ||
219 | va_list ap; | ||
220 | dasm_State *D = Dst_REF; | ||
221 | dasm_ActList p = D->actionlist + start; | ||
222 | dasm_Section *sec = D->section; | ||
223 | int pos = sec->pos, ofs = sec->ofs; | ||
224 | int *b; | ||
225 | |||
226 | if (pos >= sec->epos) { | ||
227 | DASM_M_GROW(Dst, int, sec->buf, sec->bsize, | ||
228 | sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); | ||
229 | sec->rbuf = sec->buf - DASM_POS2BIAS(pos); | ||
230 | sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); | ||
231 | } | ||
232 | |||
233 | b = sec->rbuf; | ||
234 | b[pos++] = start; | ||
235 | |||
236 | va_start(ap, start); | ||
237 | while (1) { | ||
238 | unsigned int ins = *p++; | ||
239 | unsigned int action = (ins >> 16); | ||
240 | if (action >= DASM__MAX) { | ||
241 | ofs += 4; | ||
242 | } else { | ||
243 | int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; | ||
244 | switch (action) { | ||
245 | case DASM_STOP: goto stop; | ||
246 | case DASM_SECTION: | ||
247 | n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); | ||
248 | D->section = &D->sections[n]; goto stop; | ||
249 | case DASM_ESC: p++; ofs += 4; break; | ||
250 | case DASM_REL_EXT: break; | ||
251 | case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; | ||
252 | case DASM_REL_LG: | ||
253 | n = (ins & 2047) - 10; pl = D->lglabels + n; | ||
254 | /* Bkwd rel or global. */ | ||
255 | if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; } | ||
256 | pl += 10; n = *pl; | ||
257 | if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ | ||
258 | goto linkrel; | ||
259 | case DASM_REL_PC: | ||
260 | pl = D->pclabels + n; CKPL(pc, PC); | ||
261 | putrel: | ||
262 | n = *pl; | ||
263 | if (n < 0) { /* Label exists. Get label pos and store it. */ | ||
264 | b[pos] = -n; | ||
265 | } else { | ||
266 | linkrel: | ||
267 | b[pos] = n; /* Else link to rel chain, anchored at label. */ | ||
268 | *pl = pos; | ||
269 | } | ||
270 | pos++; | ||
271 | break; | ||
272 | case DASM_LABEL_LG: | ||
273 | pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; | ||
274 | case DASM_LABEL_PC: | ||
275 | pl = D->pclabels + n; CKPL(pc, PC); | ||
276 | putlabel: | ||
277 | n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ | ||
278 | while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; | ||
279 | } | ||
280 | *pl = -pos; /* Label exists now. */ | ||
281 | b[pos++] = ofs; /* Store pass1 offset estimate. */ | ||
282 | break; | ||
283 | case DASM_IMM: | ||
284 | CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); | ||
285 | n >>= ((ins>>10)&31); | ||
286 | #ifdef DASM_CHECKS | ||
287 | if ((ins & 0x8000)) | ||
288 | CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I); | ||
289 | else | ||
290 | CK((n>>((ins>>5)&31)) == 0, RANGE_I); | ||
291 | #endif | ||
292 | b[pos++] = n; | ||
293 | break; | ||
294 | case DASM_IMM6: | ||
295 | CK((n >> 6) == 0, RANGE_I); | ||
296 | b[pos++] = n; | ||
297 | break; | ||
298 | case DASM_IMM12: | ||
299 | CK(dasm_imm12((unsigned int)n) != -1, RANGE_I); | ||
300 | b[pos++] = n; | ||
301 | break; | ||
302 | case DASM_IMM13W: | ||
303 | CK(dasm_imm13(n, n) != -1, RANGE_I); | ||
304 | b[pos++] = n; | ||
305 | break; | ||
306 | case DASM_IMM13X: { | ||
307 | int m = va_arg(ap, int); | ||
308 | CK(dasm_imm13(n, m) != -1, RANGE_I); | ||
309 | b[pos++] = n; | ||
310 | b[pos++] = m; | ||
311 | break; | ||
312 | } | ||
313 | case DASM_IMML: { | ||
314 | #ifdef DASM_CHECKS | ||
315 | int scale = (p[-2] >> 30); | ||
316 | CK((!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) || | ||
317 | (unsigned int)(n+256) < 512, RANGE_I); | ||
318 | #endif | ||
319 | b[pos++] = n; | ||
320 | break; | ||
321 | } | ||
322 | } | ||
323 | } | ||
324 | } | ||
325 | stop: | ||
326 | va_end(ap); | ||
327 | sec->pos = pos; | ||
328 | sec->ofs = ofs; | ||
329 | } | ||
330 | #undef CK | ||
331 | |||
332 | /* Pass 2: Link sections, shrink aligns, fix label offsets. */ | ||
333 | int dasm_link(Dst_DECL, size_t *szp) | ||
334 | { | ||
335 | dasm_State *D = Dst_REF; | ||
336 | int secnum; | ||
337 | int ofs = 0; | ||
338 | |||
339 | #ifdef DASM_CHECKS | ||
340 | *szp = 0; | ||
341 | if (D->status != DASM_S_OK) return D->status; | ||
342 | { | ||
343 | int pc; | ||
344 | for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) | ||
345 | if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; | ||
346 | } | ||
347 | #endif | ||
348 | |||
349 | { /* Handle globals not defined in this translation unit. */ | ||
350 | int idx; | ||
351 | for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { | ||
352 | int n = D->lglabels[idx]; | ||
353 | /* Undefined label: Collapse rel chain and replace with marker (< 0). */ | ||
354 | while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } | ||
355 | } | ||
356 | } | ||
357 | |||
358 | /* Combine all code sections. No support for data sections (yet). */ | ||
359 | for (secnum = 0; secnum < D->maxsection; secnum++) { | ||
360 | dasm_Section *sec = D->sections + secnum; | ||
361 | int *b = sec->rbuf; | ||
362 | int pos = DASM_SEC2POS(secnum); | ||
363 | int lastpos = sec->pos; | ||
364 | |||
365 | while (pos != lastpos) { | ||
366 | dasm_ActList p = D->actionlist + b[pos++]; | ||
367 | while (1) { | ||
368 | unsigned int ins = *p++; | ||
369 | unsigned int action = (ins >> 16); | ||
370 | switch (action) { | ||
371 | case DASM_STOP: case DASM_SECTION: goto stop; | ||
372 | case DASM_ESC: p++; break; | ||
373 | case DASM_REL_EXT: break; | ||
374 | case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; | ||
375 | case DASM_REL_LG: case DASM_REL_PC: pos++; break; | ||
376 | case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; | ||
377 | case DASM_IMM: case DASM_IMM6: case DASM_IMM12: case DASM_IMM13W: | ||
378 | case DASM_IMML: pos++; break; | ||
379 | case DASM_IMM13X: pos += 2; break; | ||
380 | } | ||
381 | } | ||
382 | stop: (void)0; | ||
383 | } | ||
384 | ofs += sec->ofs; /* Next section starts right after current section. */ | ||
385 | } | ||
386 | |||
387 | D->codesize = ofs; /* Total size of all code sections */ | ||
388 | *szp = ofs; | ||
389 | return DASM_S_OK; | ||
390 | } | ||
391 | |||
392 | #ifdef DASM_CHECKS | ||
393 | #define CK(x, st) \ | ||
394 | do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) | ||
395 | #else | ||
396 | #define CK(x, st) ((void)0) | ||
397 | #endif | ||
398 | |||
399 | /* Pass 3: Encode sections. */ | ||
400 | int dasm_encode(Dst_DECL, void *buffer) | ||
401 | { | ||
402 | dasm_State *D = Dst_REF; | ||
403 | char *base = (char *)buffer; | ||
404 | unsigned int *cp = (unsigned int *)buffer; | ||
405 | int secnum; | ||
406 | |||
407 | /* Encode all code sections. No support for data sections (yet). */ | ||
408 | for (secnum = 0; secnum < D->maxsection; secnum++) { | ||
409 | dasm_Section *sec = D->sections + secnum; | ||
410 | int *b = sec->buf; | ||
411 | int *endb = sec->rbuf + sec->pos; | ||
412 | |||
413 | while (b != endb) { | ||
414 | dasm_ActList p = D->actionlist + *b++; | ||
415 | while (1) { | ||
416 | unsigned int ins = *p++; | ||
417 | unsigned int action = (ins >> 16); | ||
418 | int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; | ||
419 | switch (action) { | ||
420 | case DASM_STOP: case DASM_SECTION: goto stop; | ||
421 | case DASM_ESC: *cp++ = *p++; break; | ||
422 | case DASM_REL_EXT: | ||
423 | n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins&2047), !(ins&2048)); | ||
424 | goto patchrel; | ||
425 | case DASM_ALIGN: | ||
426 | ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000; | ||
427 | break; | ||
428 | case DASM_REL_LG: | ||
429 | CK(n >= 0, UNDEF_LG); | ||
430 | /* fallthrough */ | ||
431 | case DASM_REL_PC: | ||
432 | CK(n >= 0, UNDEF_PC); | ||
433 | n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4; | ||
434 | patchrel: | ||
435 | if (!(ins & 0xf800)) { /* B, BL */ | ||
436 | CK((n & 3) == 0 && ((n+0x08000000) >> 28) == 0, RANGE_REL); | ||
437 | cp[-1] |= ((n >> 2) & 0x03ffffff); | ||
438 | } else if ((ins & 0x800)) { /* B.cond, CBZ, CBNZ, LDR* literal */ | ||
439 | CK((n & 3) == 0 && ((n+0x00100000) >> 21) == 0, RANGE_REL); | ||
440 | cp[-1] |= ((n << 3) & 0x00ffffe0); | ||
441 | } else if ((ins & 0x3000) == 0x2000) { /* ADR */ | ||
442 | CK(((n+0x00100000) >> 21) == 0, RANGE_REL); | ||
443 | cp[-1] |= ((n << 3) & 0x00ffffe0) | ((n & 3) << 29); | ||
444 | } else if ((ins & 0x3000) == 0x3000) { /* ADRP */ | ||
445 | cp[-1] |= ((n >> 9) & 0x00ffffe0) | (((n >> 12) & 3) << 29); | ||
446 | } else if ((ins & 0x1000)) { /* TBZ, TBNZ */ | ||
447 | CK((n & 3) == 0 && ((n+0x00008000) >> 16) == 0, RANGE_REL); | ||
448 | cp[-1] |= ((n << 3) & 0x0007ffe0); | ||
449 | } | ||
450 | break; | ||
451 | case DASM_LABEL_LG: | ||
452 | ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); | ||
453 | break; | ||
454 | case DASM_LABEL_PC: break; | ||
455 | case DASM_IMM: | ||
456 | cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); | ||
457 | break; | ||
458 | case DASM_IMM6: | ||
459 | cp[-1] |= ((n&31) << 19) | ((n&32) << 26); | ||
460 | break; | ||
461 | case DASM_IMM12: | ||
462 | cp[-1] |= (dasm_imm12((unsigned int)n) << 10); | ||
463 | break; | ||
464 | case DASM_IMM13W: | ||
465 | cp[-1] |= (dasm_imm13(n, n) << 10); | ||
466 | break; | ||
467 | case DASM_IMM13X: | ||
468 | cp[-1] |= (dasm_imm13(n, *b++) << 10); | ||
469 | break; | ||
470 | case DASM_IMML: { | ||
471 | int scale = (p[-2] >> 30); | ||
472 | cp[-1] |= (!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ? | ||
473 | ((n << (10-scale)) | 0x01000000) : ((n & 511) << 12); | ||
474 | break; | ||
475 | } | ||
476 | default: *cp++ = ins; break; | ||
477 | } | ||
478 | } | ||
479 | stop: (void)0; | ||
480 | } | ||
481 | } | ||
482 | |||
483 | if (base + D->codesize != (char *)cp) /* Check for phase errors. */ | ||
484 | return DASM_S_PHASE; | ||
485 | return DASM_S_OK; | ||
486 | } | ||
487 | #undef CK | ||
488 | |||
489 | /* Get PC label offset. */ | ||
490 | int dasm_getpclabel(Dst_DECL, unsigned int pc) | ||
491 | { | ||
492 | dasm_State *D = Dst_REF; | ||
493 | if (pc*sizeof(int) < D->pcsize) { | ||
494 | int pos = D->pclabels[pc]; | ||
495 | if (pos < 0) return *DASM_POS2PTR(D, -pos); | ||
496 | if (pos > 0) return -1; /* Undefined. */ | ||
497 | } | ||
498 | return -2; /* Unused or out of range. */ | ||
499 | } | ||
500 | |||
501 | #ifdef DASM_CHECKS | ||
502 | /* Optional sanity checker to call between isolated encoding steps. */ | ||
503 | int dasm_checkstep(Dst_DECL, int secmatch) | ||
504 | { | ||
505 | dasm_State *D = Dst_REF; | ||
506 | if (D->status == DASM_S_OK) { | ||
507 | int i; | ||
508 | for (i = 1; i <= 9; i++) { | ||
509 | if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; } | ||
510 | D->lglabels[i] = 0; | ||
511 | } | ||
512 | } | ||
513 | if (D->status == DASM_S_OK && secmatch >= 0 && | ||
514 | D->section != &D->sections[secmatch]) | ||
515 | D->status = DASM_S_MATCH_SEC|(D->section-D->sections); | ||
516 | return D->status; | ||
517 | } | ||
518 | #endif | ||
519 | |||
diff --git a/dynasm/dasm_arm64.lua b/dynasm/dasm_arm64.lua new file mode 100644 index 00000000..d5640842 --- /dev/null +++ b/dynasm/dasm_arm64.lua | |||
@@ -0,0 +1,1166 @@ | |||
1 | ------------------------------------------------------------------------------ | ||
2 | -- DynASM ARM64 module. | ||
3 | -- | ||
4 | -- Copyright (C) 2005-2020 Mike Pall. All rights reserved. | ||
5 | -- See dynasm.lua for full copyright notice. | ||
6 | ------------------------------------------------------------------------------ | ||
7 | |||
8 | -- Module information: | ||
9 | local _info = { | ||
10 | arch = "arm", | ||
11 | description = "DynASM ARM64 module", | ||
12 | version = "1.4.0", | ||
13 | vernum = 10400, | ||
14 | release = "2015-10-18", | ||
15 | author = "Mike Pall", | ||
16 | license = "MIT", | ||
17 | } | ||
18 | |||
19 | -- Exported glue functions for the arch-specific module. | ||
20 | local _M = { _info = _info } | ||
21 | |||
22 | -- Cache library functions. | ||
23 | local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs | ||
24 | local assert, setmetatable, rawget = assert, setmetatable, rawget | ||
25 | local _s = string | ||
26 | local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char | ||
27 | local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub | ||
28 | local concat, sort, insert = table.concat, table.sort, table.insert | ||
29 | local bit = bit or require("bit") | ||
30 | local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift | ||
31 | local ror, tohex = bit.ror, bit.tohex | ||
32 | |||
33 | -- Inherited tables and callbacks. | ||
34 | local g_opt, g_arch | ||
35 | local wline, werror, wfatal, wwarn | ||
36 | |||
37 | -- Action name list. | ||
38 | -- CHECK: Keep this in sync with the C code! | ||
39 | local action_names = { | ||
40 | "STOP", "SECTION", "ESC", "REL_EXT", | ||
41 | "ALIGN", "REL_LG", "LABEL_LG", | ||
42 | "REL_PC", "LABEL_PC", "IMM", "IMM6", "IMM12", "IMM13W", "IMM13X", "IMML", | ||
43 | } | ||
44 | |||
45 | -- Maximum number of section buffer positions for dasm_put(). | ||
46 | -- CHECK: Keep this in sync with the C code! | ||
47 | local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. | ||
48 | |||
49 | -- Action name -> action number. | ||
50 | local map_action = {} | ||
51 | for n,name in ipairs(action_names) do | ||
52 | map_action[name] = n-1 | ||
53 | end | ||
54 | |||
55 | -- Action list buffer. | ||
56 | local actlist = {} | ||
57 | |||
58 | -- Argument list for next dasm_put(). Start with offset 0 into action list. | ||
59 | local actargs = { 0 } | ||
60 | |||
61 | -- Current number of section buffer positions for dasm_put(). | ||
62 | local secpos = 1 | ||
63 | |||
64 | ------------------------------------------------------------------------------ | ||
65 | |||
66 | -- Dump action names and numbers. | ||
67 | local function dumpactions(out) | ||
68 | out:write("DynASM encoding engine action codes:\n") | ||
69 | for n,name in ipairs(action_names) do | ||
70 | local num = map_action[name] | ||
71 | out:write(format(" %-10s %02X %d\n", name, num, num)) | ||
72 | end | ||
73 | out:write("\n") | ||
74 | end | ||
75 | |||
76 | -- Write action list buffer as a huge static C array. | ||
77 | local function writeactions(out, name) | ||
78 | local nn = #actlist | ||
79 | if nn == 0 then nn = 1; actlist[0] = map_action.STOP end | ||
80 | out:write("static const unsigned int ", name, "[", nn, "] = {\n") | ||
81 | for i = 1,nn-1 do | ||
82 | assert(out:write("0x", tohex(actlist[i]), ",\n")) | ||
83 | end | ||
84 | assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n")) | ||
85 | end | ||
86 | |||
87 | ------------------------------------------------------------------------------ | ||
88 | |||
89 | -- Add word to action list. | ||
90 | local function wputxw(n) | ||
91 | assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") | ||
92 | actlist[#actlist+1] = n | ||
93 | end | ||
94 | |||
95 | -- Add action to list with optional arg. Advance buffer pos, too. | ||
96 | local function waction(action, val, a, num) | ||
97 | local w = assert(map_action[action], "bad action name `"..action.."'") | ||
98 | wputxw(w * 0x10000 + (val or 0)) | ||
99 | if a then actargs[#actargs+1] = a end | ||
100 | if a or num then secpos = secpos + (num or 1) end | ||
101 | end | ||
102 | |||
103 | -- Flush action list (intervening C code or buffer pos overflow). | ||
104 | local function wflush(term) | ||
105 | if #actlist == actargs[1] then return end -- Nothing to flush. | ||
106 | if not term then waction("STOP") end -- Terminate action list. | ||
107 | wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) | ||
108 | actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). | ||
109 | secpos = 1 -- The actionlist offset occupies a buffer position, too. | ||
110 | end | ||
111 | |||
112 | -- Put escaped word. | ||
113 | local function wputw(n) | ||
114 | if n <= 0x000fffff then waction("ESC") end | ||
115 | wputxw(n) | ||
116 | end | ||
117 | |||
118 | -- Reserve position for word. | ||
119 | local function wpos() | ||
120 | local pos = #actlist+1 | ||
121 | actlist[pos] = "" | ||
122 | return pos | ||
123 | end | ||
124 | |||
125 | -- Store word to reserved position. | ||
126 | local function wputpos(pos, n) | ||
127 | assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") | ||
128 | if n <= 0x000fffff then | ||
129 | insert(actlist, pos+1, n) | ||
130 | n = map_action.ESC * 0x10000 | ||
131 | end | ||
132 | actlist[pos] = n | ||
133 | end | ||
134 | |||
135 | ------------------------------------------------------------------------------ | ||
136 | |||
137 | -- Global label name -> global label number. With auto assignment on 1st use. | ||
138 | local next_global = 20 | ||
139 | local map_global = setmetatable({}, { __index = function(t, name) | ||
140 | if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end | ||
141 | local n = next_global | ||
142 | if n > 2047 then werror("too many global labels") end | ||
143 | next_global = n + 1 | ||
144 | t[name] = n | ||
145 | return n | ||
146 | end}) | ||
147 | |||
148 | -- Dump global labels. | ||
149 | local function dumpglobals(out, lvl) | ||
150 | local t = {} | ||
151 | for name, n in pairs(map_global) do t[n] = name end | ||
152 | out:write("Global labels:\n") | ||
153 | for i=20,next_global-1 do | ||
154 | out:write(format(" %s\n", t[i])) | ||
155 | end | ||
156 | out:write("\n") | ||
157 | end | ||
158 | |||
159 | -- Write global label enum. | ||
160 | local function writeglobals(out, prefix) | ||
161 | local t = {} | ||
162 | for name, n in pairs(map_global) do t[n] = name end | ||
163 | out:write("enum {\n") | ||
164 | for i=20,next_global-1 do | ||
165 | out:write(" ", prefix, t[i], ",\n") | ||
166 | end | ||
167 | out:write(" ", prefix, "_MAX\n};\n") | ||
168 | end | ||
169 | |||
170 | -- Write global label names. | ||
171 | local function writeglobalnames(out, name) | ||
172 | local t = {} | ||
173 | for name, n in pairs(map_global) do t[n] = name end | ||
174 | out:write("static const char *const ", name, "[] = {\n") | ||
175 | for i=20,next_global-1 do | ||
176 | out:write(" \"", t[i], "\",\n") | ||
177 | end | ||
178 | out:write(" (const char *)0\n};\n") | ||
179 | end | ||
180 | |||
181 | ------------------------------------------------------------------------------ | ||
182 | |||
183 | -- Extern label name -> extern label number. With auto assignment on 1st use. | ||
184 | local next_extern = 0 | ||
185 | local map_extern_ = {} | ||
186 | local map_extern = setmetatable({}, { __index = function(t, name) | ||
187 | -- No restrictions on the name for now. | ||
188 | local n = next_extern | ||
189 | if n > 2047 then werror("too many extern labels") end | ||
190 | next_extern = n + 1 | ||
191 | t[name] = n | ||
192 | map_extern_[n] = name | ||
193 | return n | ||
194 | end}) | ||
195 | |||
196 | -- Dump extern labels. | ||
197 | local function dumpexterns(out, lvl) | ||
198 | out:write("Extern labels:\n") | ||
199 | for i=0,next_extern-1 do | ||
200 | out:write(format(" %s\n", map_extern_[i])) | ||
201 | end | ||
202 | out:write("\n") | ||
203 | end | ||
204 | |||
205 | -- Write extern label names. | ||
206 | local function writeexternnames(out, name) | ||
207 | out:write("static const char *const ", name, "[] = {\n") | ||
208 | for i=0,next_extern-1 do | ||
209 | out:write(" \"", map_extern_[i], "\",\n") | ||
210 | end | ||
211 | out:write(" (const char *)0\n};\n") | ||
212 | end | ||
213 | |||
214 | ------------------------------------------------------------------------------ | ||
215 | |||
216 | -- Arch-specific maps. | ||
217 | |||
218 | -- Ext. register name -> int. name. | ||
219 | local map_archdef = { xzr = "@x31", wzr = "@w31", lr = "x30", } | ||
220 | |||
221 | -- Int. register name -> ext. name. | ||
222 | local map_reg_rev = { ["@x31"] = "xzr", ["@w31"] = "wzr", x30 = "lr", } | ||
223 | |||
224 | local map_type = {} -- Type name -> { ctype, reg } | ||
225 | local ctypenum = 0 -- Type number (for Dt... macros). | ||
226 | |||
227 | -- Reverse defines for registers. | ||
228 | function _M.revdef(s) | ||
229 | return map_reg_rev[s] or s | ||
230 | end | ||
231 | |||
232 | local map_shift = { lsl = 0, lsr = 1, asr = 2, } | ||
233 | |||
234 | local map_extend = { | ||
235 | uxtb = 0, uxth = 1, uxtw = 2, uxtx = 3, | ||
236 | sxtb = 4, sxth = 5, sxtw = 6, sxtx = 7, | ||
237 | } | ||
238 | |||
239 | local map_cond = { | ||
240 | eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7, | ||
241 | hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14, | ||
242 | hs = 2, lo = 3, | ||
243 | } | ||
244 | |||
245 | ------------------------------------------------------------------------------ | ||
246 | |||
247 | local parse_reg_type | ||
248 | |||
249 | local function parse_reg(expr) | ||
250 | if not expr then werror("expected register name") end | ||
251 | local tname, ovreg = match(expr, "^([%w_]+):(@?%l%d+)$") | ||
252 | local tp = map_type[tname or expr] | ||
253 | if tp then | ||
254 | local reg = ovreg or tp.reg | ||
255 | if not reg then | ||
256 | werror("type `"..(tname or expr).."' needs a register override") | ||
257 | end | ||
258 | expr = reg | ||
259 | end | ||
260 | local ok31, rt, r = match(expr, "^(@?)([xwqdshb])([123]?[0-9])$") | ||
261 | if r then | ||
262 | r = tonumber(r) | ||
263 | if r <= 30 or (r == 31 and ok31 ~= "" or (rt ~= "w" and rt ~= "x")) then | ||
264 | if not parse_reg_type then | ||
265 | parse_reg_type = rt | ||
266 | elseif parse_reg_type ~= rt then | ||
267 | werror("register size mismatch") | ||
268 | end | ||
269 | return r, tp | ||
270 | end | ||
271 | end | ||
272 | werror("bad register name `"..expr.."'") | ||
273 | end | ||
274 | |||
275 | local function parse_reg_base(expr) | ||
276 | if expr == "sp" then return 0x3e0 end | ||
277 | local base, tp = parse_reg(expr) | ||
278 | if parse_reg_type ~= "x" then werror("bad register type") end | ||
279 | parse_reg_type = false | ||
280 | return shl(base, 5), tp | ||
281 | end | ||
282 | |||
283 | local parse_ctx = {} | ||
284 | |||
285 | local loadenv = setfenv and function(s) | ||
286 | local code = loadstring(s, "") | ||
287 | if code then setfenv(code, parse_ctx) end | ||
288 | return code | ||
289 | end or function(s) | ||
290 | return load(s, "", nil, parse_ctx) | ||
291 | end | ||
292 | |||
293 | -- Try to parse simple arithmetic, too, since some basic ops are aliases. | ||
294 | local function parse_number(n) | ||
295 | local x = tonumber(n) | ||
296 | if x then return x end | ||
297 | local code = loadenv("return "..n) | ||
298 | if code then | ||
299 | local ok, y = pcall(code) | ||
300 | if ok then return y end | ||
301 | end | ||
302 | return nil | ||
303 | end | ||
304 | |||
305 | local function parse_imm(imm, bits, shift, scale, signed) | ||
306 | imm = match(imm, "^#(.*)$") | ||
307 | if not imm then werror("expected immediate operand") end | ||
308 | local n = parse_number(imm) | ||
309 | if n then | ||
310 | local m = sar(n, scale) | ||
311 | if shl(m, scale) == n then | ||
312 | if signed then | ||
313 | local s = sar(m, bits-1) | ||
314 | if s == 0 then return shl(m, shift) | ||
315 | elseif s == -1 then return shl(m + shl(1, bits), shift) end | ||
316 | else | ||
317 | if sar(m, bits) == 0 then return shl(m, shift) end | ||
318 | end | ||
319 | end | ||
320 | werror("out of range immediate `"..imm.."'") | ||
321 | else | ||
322 | waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm) | ||
323 | return 0 | ||
324 | end | ||
325 | end | ||
326 | |||
327 | local function parse_imm12(imm) | ||
328 | imm = match(imm, "^#(.*)$") | ||
329 | if not imm then werror("expected immediate operand") end | ||
330 | local n = parse_number(imm) | ||
331 | if n then | ||
332 | if shr(n, 12) == 0 then | ||
333 | return shl(n, 10) | ||
334 | elseif band(n, 0xff000fff) == 0 then | ||
335 | return shr(n, 2) + 0x00400000 | ||
336 | end | ||
337 | werror("out of range immediate `"..imm.."'") | ||
338 | else | ||
339 | waction("IMM12", 0, imm) | ||
340 | return 0 | ||
341 | end | ||
342 | end | ||
343 | |||
344 | local function parse_imm13(imm) | ||
345 | imm = match(imm, "^#(.*)$") | ||
346 | if not imm then werror("expected immediate operand") end | ||
347 | local n = parse_number(imm) | ||
348 | local r64 = parse_reg_type == "x" | ||
349 | if n and n % 1 == 0 and n >= 0 and n <= 0xffffffff then | ||
350 | local inv = false | ||
351 | if band(n, 1) == 1 then n = bit.bnot(n); inv = true end | ||
352 | local t = {} | ||
353 | for i=1,32 do t[i] = band(n, 1); n = shr(n, 1) end | ||
354 | local b = table.concat(t) | ||
355 | b = b..(r64 and (inv and "1" or "0"):rep(32) or b) | ||
356 | local p0, p1, p0a, p1a = b:match("^(0+)(1+)(0*)(1*)") | ||
357 | if p0 then | ||
358 | local w = p1a == "" and (r64 and 64 or 32) or #p1+#p0a | ||
359 | if band(w, w-1) == 0 and b == b:sub(1, w):rep(64/w) then | ||
360 | local s = band(-2*w, 0x3f) - 1 | ||
361 | if w == 64 then s = s + 0x1000 end | ||
362 | if inv then | ||
363 | return shl(w-#p1-#p0, 16) + shl(s+w-#p1, 10) | ||
364 | else | ||
365 | return shl(w-#p0, 16) + shl(s+#p1, 10) | ||
366 | end | ||
367 | end | ||
368 | end | ||
369 | werror("out of range immediate `"..imm.."'") | ||
370 | elseif r64 then | ||
371 | waction("IMM13X", 0, format("(unsigned int)(%s)", imm)) | ||
372 | actargs[#actargs+1] = format("(unsigned int)((unsigned long long)(%s)>>32)", imm) | ||
373 | return 0 | ||
374 | else | ||
375 | waction("IMM13W", 0, imm) | ||
376 | return 0 | ||
377 | end | ||
378 | end | ||
379 | |||
380 | local function parse_imm6(imm) | ||
381 | imm = match(imm, "^#(.*)$") | ||
382 | if not imm then werror("expected immediate operand") end | ||
383 | local n = parse_number(imm) | ||
384 | if n then | ||
385 | if n >= 0 and n <= 63 then | ||
386 | return shl(band(n, 0x1f), 19) + (n >= 32 and 0x80000000 or 0) | ||
387 | end | ||
388 | werror("out of range immediate `"..imm.."'") | ||
389 | else | ||
390 | waction("IMM6", 0, imm) | ||
391 | return 0 | ||
392 | end | ||
393 | end | ||
394 | |||
395 | local function parse_imm_load(imm, scale) | ||
396 | local n = parse_number(imm) | ||
397 | if n then | ||
398 | local m = sar(n, scale) | ||
399 | if shl(m, scale) == n and m >= 0 and m < 0x1000 then | ||
400 | return shl(m, 10) + 0x01000000 -- Scaled, unsigned 12 bit offset. | ||
401 | elseif n >= -256 and n < 256 then | ||
402 | return shl(band(n, 511), 12) -- Unscaled, signed 9 bit offset. | ||
403 | end | ||
404 | werror("out of range immediate `"..imm.."'") | ||
405 | else | ||
406 | waction("IMML", 0, imm) | ||
407 | return 0 | ||
408 | end | ||
409 | end | ||
410 | |||
411 | local function parse_fpimm(imm) | ||
412 | imm = match(imm, "^#(.*)$") | ||
413 | if not imm then werror("expected immediate operand") end | ||
414 | local n = parse_number(imm) | ||
415 | if n then | ||
416 | local m, e = math.frexp(n) | ||
417 | local s, e2 = 0, band(e-2, 7) | ||
418 | if m < 0 then m = -m; s = 0x00100000 end | ||
419 | m = m*32-16 | ||
420 | if m % 1 == 0 and m >= 0 and m <= 15 and sar(shl(e2, 29), 29)+2 == e then | ||
421 | return s + shl(e2, 17) + shl(m, 13) | ||
422 | end | ||
423 | werror("out of range immediate `"..imm.."'") | ||
424 | else | ||
425 | werror("NYI fpimm action") | ||
426 | end | ||
427 | end | ||
428 | |||
429 | local function parse_shift(expr) | ||
430 | local s, s2 = match(expr, "^(%S+)%s*(.*)$") | ||
431 | s = map_shift[s] | ||
432 | if not s then werror("expected shift operand") end | ||
433 | return parse_imm(s2, 6, 10, 0, false) + shl(s, 22) | ||
434 | end | ||
435 | |||
436 | local function parse_lslx16(expr) | ||
437 | local n = match(expr, "^lsl%s*#(%d+)$") | ||
438 | n = tonumber(n) | ||
439 | if not n then werror("expected shift operand") end | ||
440 | if band(n, parse_reg_type == "x" and 0xffffffcf or 0xffffffef) ~= 0 then | ||
441 | werror("bad shift amount") | ||
442 | end | ||
443 | return shl(n, 17) | ||
444 | end | ||
445 | |||
446 | local function parse_extend(expr) | ||
447 | local s, s2 = match(expr, "^(%S+)%s*(.*)$") | ||
448 | if s == "lsl" then | ||
449 | s = parse_reg_type == "x" and 3 or 2 | ||
450 | else | ||
451 | s = map_extend[s] | ||
452 | end | ||
453 | if not s then werror("expected extend operand") end | ||
454 | return (s2 == "" and 0 or parse_imm(s2, 3, 10, 0, false)) + shl(s, 13) | ||
455 | end | ||
456 | |||
457 | local function parse_cond(expr, inv) | ||
458 | local c = map_cond[expr] | ||
459 | if not c then werror("expected condition operand") end | ||
460 | return shl(bit.bxor(c, inv), 12) | ||
461 | end | ||
462 | |||
463 | local function parse_load(params, nparams, n, op) | ||
464 | if params[n+2] then werror("too many operands") end | ||
465 | local pn, p2 = params[n], params[n+1] | ||
466 | local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$") | ||
467 | if not p1 then | ||
468 | if not p2 then | ||
469 | local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$") | ||
470 | if reg and tailr ~= "" then | ||
471 | local base, tp = parse_reg_base(reg) | ||
472 | if tp then | ||
473 | waction("IMML", 0, format(tp.ctypefmt, tailr)) | ||
474 | return op + base | ||
475 | end | ||
476 | end | ||
477 | end | ||
478 | werror("expected address operand") | ||
479 | end | ||
480 | local scale = shr(op, 30) | ||
481 | if p2 then | ||
482 | if wb == "!" then werror("bad use of '!'") end | ||
483 | op = op + parse_reg_base(p1) + parse_imm(p2, 9, 12, 0, true) + 0x400 | ||
484 | elseif wb == "!" then | ||
485 | local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$") | ||
486 | if not p1a then werror("bad use of '!'") end | ||
487 | op = op + parse_reg_base(p1a) + parse_imm(p2a, 9, 12, 0, true) + 0xc00 | ||
488 | else | ||
489 | local p1a, p2a = match(p1, "^([^,%s]*)%s*(.*)$") | ||
490 | op = op + parse_reg_base(p1a) | ||
491 | if p2a ~= "" then | ||
492 | local imm = match(p2a, "^,%s*#(.*)$") | ||
493 | if imm then | ||
494 | op = op + parse_imm_load(imm, scale) | ||
495 | else | ||
496 | local p2b, p3b, p3s = match(p2a, "^,%s*([^,%s]*)%s*,?%s*(%S*)%s*(.*)$") | ||
497 | op = op + shl(parse_reg(p2b), 16) + 0x00200800 | ||
498 | if parse_reg_type ~= "x" and parse_reg_type ~= "w" then | ||
499 | werror("bad index register type") | ||
500 | end | ||
501 | if p3b == "" then | ||
502 | if parse_reg_type ~= "x" then werror("bad index register type") end | ||
503 | op = op + 0x6000 | ||
504 | else | ||
505 | if p3s == "" or p3s == "#0" then | ||
506 | elseif p3s == "#"..scale then | ||
507 | op = op + 0x1000 | ||
508 | else | ||
509 | werror("bad scale") | ||
510 | end | ||
511 | if parse_reg_type == "x" then | ||
512 | if p3b == "lsl" and p3s ~= "" then op = op + 0x6000 | ||
513 | elseif p3b == "sxtx" then op = op + 0xe000 | ||
514 | else | ||
515 | werror("bad extend/shift specifier") | ||
516 | end | ||
517 | else | ||
518 | if p3b == "uxtw" then op = op + 0x4000 | ||
519 | elseif p3b == "sxtw" then op = op + 0xc000 | ||
520 | else | ||
521 | werror("bad extend/shift specifier") | ||
522 | end | ||
523 | end | ||
524 | end | ||
525 | end | ||
526 | else | ||
527 | if wb == "!" then werror("bad use of '!'") end | ||
528 | op = op + 0x01000000 | ||
529 | end | ||
530 | end | ||
531 | return op | ||
532 | end | ||
533 | |||
534 | local function parse_load_pair(params, nparams, n, op) | ||
535 | if params[n+2] then werror("too many operands") end | ||
536 | local pn, p2 = params[n], params[n+1] | ||
537 | local scale = shr(op, 30) == 0 and 2 or 3 | ||
538 | local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$") | ||
539 | if not p1 then | ||
540 | if not p2 then | ||
541 | local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$") | ||
542 | if reg and tailr ~= "" then | ||
543 | local base, tp = parse_reg_base(reg) | ||
544 | if tp then | ||
545 | waction("IMM", 32768+7*32+15+scale*1024, format(tp.ctypefmt, tailr)) | ||
546 | return op + base + 0x01000000 | ||
547 | end | ||
548 | end | ||
549 | end | ||
550 | werror("expected address operand") | ||
551 | end | ||
552 | if p2 then | ||
553 | if wb == "!" then werror("bad use of '!'") end | ||
554 | op = op + 0x00800000 | ||
555 | else | ||
556 | local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$") | ||
557 | if p1a then p1, p2 = p1a, p2a else p2 = "#0" end | ||
558 | op = op + (wb == "!" and 0x01800000 or 0x01000000) | ||
559 | end | ||
560 | return op + parse_reg_base(p1) + parse_imm(p2, 7, 15, scale, true) | ||
561 | end | ||
562 | |||
563 | local function parse_label(label, def) | ||
564 | local prefix = sub(label, 1, 2) | ||
565 | -- =>label (pc label reference) | ||
566 | if prefix == "=>" then | ||
567 | return "PC", 0, sub(label, 3) | ||
568 | end | ||
569 | -- ->name (global label reference) | ||
570 | if prefix == "->" then | ||
571 | return "LG", map_global[sub(label, 3)] | ||
572 | end | ||
573 | if def then | ||
574 | -- [1-9] (local label definition) | ||
575 | if match(label, "^[1-9]$") then | ||
576 | return "LG", 10+tonumber(label) | ||
577 | end | ||
578 | else | ||
579 | -- [<>][1-9] (local label reference) | ||
580 | local dir, lnum = match(label, "^([<>])([1-9])$") | ||
581 | if dir then -- Fwd: 1-9, Bkwd: 11-19. | ||
582 | return "LG", lnum + (dir == ">" and 0 or 10) | ||
583 | end | ||
584 | -- extern label (extern label reference) | ||
585 | local extname = match(label, "^extern%s+(%S+)$") | ||
586 | if extname then | ||
587 | return "EXT", map_extern[extname] | ||
588 | end | ||
589 | end | ||
590 | werror("bad label `"..label.."'") | ||
591 | end | ||
592 | |||
593 | local function branch_type(op) | ||
594 | if band(op, 0x7c000000) == 0x14000000 then return 0 -- B, BL | ||
595 | elseif shr(op, 24) == 0x54 or band(op, 0x7e000000) == 0x34000000 or | ||
596 | band(op, 0x3b000000) == 0x18000000 then | ||
597 | return 0x800 -- B.cond, CBZ, CBNZ, LDR* literal | ||
598 | elseif band(op, 0x7e000000) == 0x36000000 then return 0x1000 -- TBZ, TBNZ | ||
599 | elseif band(op, 0x9f000000) == 0x10000000 then return 0x2000 -- ADR | ||
600 | elseif band(op, 0x9f000000) == band(0x90000000) then return 0x3000 -- ADRP | ||
601 | else | ||
602 | assert(false, "unknown branch type") | ||
603 | end | ||
604 | end | ||
605 | |||
606 | ------------------------------------------------------------------------------ | ||
607 | |||
608 | local map_op, op_template | ||
609 | |||
610 | local function op_alias(opname, f) | ||
611 | return function(params, nparams) | ||
612 | if not params then return "-> "..opname:sub(1, -3) end | ||
613 | f(params, nparams) | ||
614 | op_template(params, map_op[opname], nparams) | ||
615 | end | ||
616 | end | ||
617 | |||
618 | local function alias_bfx(p) | ||
619 | p[4] = "#("..p[3]:sub(2)..")+("..p[4]:sub(2)..")-1" | ||
620 | end | ||
621 | |||
622 | local function alias_bfiz(p) | ||
623 | parse_reg(p[1]) | ||
624 | if parse_reg_type == "w" then | ||
625 | p[3] = "#-("..p[3]:sub(2)..")%32" | ||
626 | p[4] = "#("..p[4]:sub(2)..")-1" | ||
627 | else | ||
628 | p[3] = "#-("..p[3]:sub(2)..")%64" | ||
629 | p[4] = "#("..p[4]:sub(2)..")-1" | ||
630 | end | ||
631 | end | ||
632 | |||
633 | local alias_lslimm = op_alias("ubfm_4", function(p) | ||
634 | parse_reg(p[1]) | ||
635 | local sh = p[3]:sub(2) | ||
636 | if parse_reg_type == "w" then | ||
637 | p[3] = "#-("..sh..")%32" | ||
638 | p[4] = "#31-("..sh..")" | ||
639 | else | ||
640 | p[3] = "#-("..sh..")%64" | ||
641 | p[4] = "#63-("..sh..")" | ||
642 | end | ||
643 | end) | ||
644 | |||
645 | -- Template strings for ARM instructions. | ||
646 | map_op = { | ||
647 | -- Basic data processing instructions. | ||
648 | add_3 = "0b000000DNMg|11000000pDpNIg|8b206000pDpNMx", | ||
649 | add_4 = "0b000000DNMSg|0b200000DNMXg|8b200000pDpNMXx|8b200000pDpNxMwX", | ||
650 | adds_3 = "2b000000DNMg|31000000DpNIg|ab206000DpNMx", | ||
651 | adds_4 = "2b000000DNMSg|2b200000DNMXg|ab200000DpNMXx|ab200000DpNxMwX", | ||
652 | cmn_2 = "2b00001fNMg|3100001fpNIg|ab20601fpNMx", | ||
653 | cmn_3 = "2b00001fNMSg|2b20001fNMXg|ab20001fpNMXx|ab20001fpNxMwX", | ||
654 | |||
655 | sub_3 = "4b000000DNMg|51000000pDpNIg|cb206000pDpNMx", | ||
656 | sub_4 = "4b000000DNMSg|4b200000DNMXg|cb200000pDpNMXx|cb200000pDpNxMwX", | ||
657 | subs_3 = "6b000000DNMg|71000000DpNIg|eb206000DpNMx", | ||
658 | subs_4 = "6b000000DNMSg|6b200000DNMXg|eb200000DpNMXx|eb200000DpNxMwX", | ||
659 | cmp_2 = "6b00001fNMg|7100001fpNIg|eb20601fpNMx", | ||
660 | cmp_3 = "6b00001fNMSg|6b20001fNMXg|eb20001fpNMXx|eb20001fpNxMwX", | ||
661 | |||
662 | neg_2 = "4b0003e0DMg", | ||
663 | neg_3 = "4b0003e0DMSg", | ||
664 | negs_2 = "6b0003e0DMg", | ||
665 | negs_3 = "6b0003e0DMSg", | ||
666 | |||
667 | adc_3 = "1a000000DNMg", | ||
668 | adcs_3 = "3a000000DNMg", | ||
669 | sbc_3 = "5a000000DNMg", | ||
670 | sbcs_3 = "7a000000DNMg", | ||
671 | ngc_2 = "5a0003e0DMg", | ||
672 | ngcs_2 = "7a0003e0DMg", | ||
673 | |||
674 | and_3 = "0a000000DNMg|12000000pDNig", | ||
675 | and_4 = "0a000000DNMSg", | ||
676 | orr_3 = "2a000000DNMg|32000000pDNig", | ||
677 | orr_4 = "2a000000DNMSg", | ||
678 | eor_3 = "4a000000DNMg|52000000pDNig", | ||
679 | eor_4 = "4a000000DNMSg", | ||
680 | ands_3 = "6a000000DNMg|72000000DNig", | ||
681 | ands_4 = "6a000000DNMSg", | ||
682 | tst_2 = "6a00001fNMg|7200001fNig", | ||
683 | tst_3 = "6a00001fNMSg", | ||
684 | |||
685 | bic_3 = "0a200000DNMg", | ||
686 | bic_4 = "0a200000DNMSg", | ||
687 | orn_3 = "2a200000DNMg", | ||
688 | orn_4 = "2a200000DNMSg", | ||
689 | eon_3 = "4a200000DNMg", | ||
690 | eon_4 = "4a200000DNMSg", | ||
691 | bics_3 = "6a200000DNMg", | ||
692 | bics_4 = "6a200000DNMSg", | ||
693 | |||
694 | movn_2 = "12800000DWg", | ||
695 | movn_3 = "12800000DWRg", | ||
696 | movz_2 = "52800000DWg", | ||
697 | movz_3 = "52800000DWRg", | ||
698 | movk_2 = "72800000DWg", | ||
699 | movk_3 = "72800000DWRg", | ||
700 | |||
701 | -- TODO: this doesn't cover all valid immediates for mov reg, #imm. | ||
702 | mov_2 = "2a0003e0DMg|52800000DW|320003e0pDig|11000000pDpNg", | ||
703 | mov_3 = "2a0003e0DMSg", | ||
704 | mvn_2 = "2a2003e0DMg", | ||
705 | mvn_3 = "2a2003e0DMSg", | ||
706 | |||
707 | adr_2 = "10000000DBx", | ||
708 | adrp_2 = "90000000DBx", | ||
709 | |||
710 | csel_4 = "1a800000DNMCg", | ||
711 | csinc_4 = "1a800400DNMCg", | ||
712 | csinv_4 = "5a800000DNMCg", | ||
713 | csneg_4 = "5a800400DNMCg", | ||
714 | cset_2 = "1a9f07e0Dcg", | ||
715 | csetm_2 = "5a9f03e0Dcg", | ||
716 | cinc_3 = "1a800400DNmcg", | ||
717 | cinv_3 = "5a800000DNmcg", | ||
718 | cneg_3 = "5a800400DNmcg", | ||
719 | |||
720 | ccmn_4 = "3a400000NMVCg|3a400800N5VCg", | ||
721 | ccmp_4 = "7a400000NMVCg|7a400800N5VCg", | ||
722 | |||
723 | madd_4 = "1b000000DNMAg", | ||
724 | msub_4 = "1b008000DNMAg", | ||
725 | mul_3 = "1b007c00DNMg", | ||
726 | mneg_3 = "1b00fc00DNMg", | ||
727 | |||
728 | smaddl_4 = "9b200000DxNMwAx", | ||
729 | smsubl_4 = "9b208000DxNMwAx", | ||
730 | smull_3 = "9b207c00DxNMw", | ||
731 | smnegl_3 = "9b20fc00DxNMw", | ||
732 | smulh_3 = "9b407c00DNMx", | ||
733 | umaddl_4 = "9ba00000DxNMwAx", | ||
734 | umsubl_4 = "9ba08000DxNMwAx", | ||
735 | umull_3 = "9ba07c00DxNMw", | ||
736 | umnegl_3 = "9ba0fc00DxNMw", | ||
737 | umulh_3 = "9bc07c00DNMx", | ||
738 | |||
739 | udiv_3 = "1ac00800DNMg", | ||
740 | sdiv_3 = "1ac00c00DNMg", | ||
741 | |||
742 | -- Bit operations. | ||
743 | sbfm_4 = "13000000DN12w|93400000DN12x", | ||
744 | bfm_4 = "33000000DN12w|b3400000DN12x", | ||
745 | ubfm_4 = "53000000DN12w|d3400000DN12x", | ||
746 | extr_4 = "13800000DNM2w|93c00000DNM2x", | ||
747 | |||
748 | sxtb_2 = "13001c00DNw|93401c00DNx", | ||
749 | sxth_2 = "13003c00DNw|93403c00DNx", | ||
750 | sxtw_2 = "93407c00DxNw", | ||
751 | uxtb_2 = "53001c00DNw", | ||
752 | uxth_2 = "53003c00DNw", | ||
753 | |||
754 | sbfx_4 = op_alias("sbfm_4", alias_bfx), | ||
755 | bfxil_4 = op_alias("bfm_4", alias_bfx), | ||
756 | ubfx_4 = op_alias("ubfm_4", alias_bfx), | ||
757 | sbfiz_4 = op_alias("sbfm_4", alias_bfiz), | ||
758 | bfi_4 = op_alias("bfm_4", alias_bfiz), | ||
759 | ubfiz_4 = op_alias("ubfm_4", alias_bfiz), | ||
760 | |||
761 | lsl_3 = function(params, nparams) | ||
762 | if params and params[3]:byte() == 35 then | ||
763 | return alias_lslimm(params, nparams) | ||
764 | else | ||
765 | return op_template(params, "1ac02000DNMg", nparams) | ||
766 | end | ||
767 | end, | ||
768 | lsr_3 = "1ac02400DNMg|53007c00DN1w|d340fc00DN1x", | ||
769 | asr_3 = "1ac02800DNMg|13007c00DN1w|9340fc00DN1x", | ||
770 | ror_3 = "1ac02c00DNMg|13800000DNm2w|93c00000DNm2x", | ||
771 | |||
772 | clz_2 = "5ac01000DNg", | ||
773 | cls_2 = "5ac01400DNg", | ||
774 | rbit_2 = "5ac00000DNg", | ||
775 | rev_2 = "5ac00800DNw|dac00c00DNx", | ||
776 | rev16_2 = "5ac00400DNg", | ||
777 | rev32_2 = "dac00800DNx", | ||
778 | |||
779 | -- Loads and stores. | ||
780 | ["strb_*"] = "38000000DwL", | ||
781 | ["ldrb_*"] = "38400000DwL", | ||
782 | ["ldrsb_*"] = "38c00000DwL|38800000DxL", | ||
783 | ["strh_*"] = "78000000DwL", | ||
784 | ["ldrh_*"] = "78400000DwL", | ||
785 | ["ldrsh_*"] = "78c00000DwL|78800000DxL", | ||
786 | ["str_*"] = "b8000000DwL|f8000000DxL|bc000000DsL|fc000000DdL", | ||
787 | ["ldr_*"] = "18000000DwB|58000000DxB|1c000000DsB|5c000000DdB|b8400000DwL|f8400000DxL|bc400000DsL|fc400000DdL", | ||
788 | ["ldrsw_*"] = "98000000DxB|b8800000DxL", | ||
789 | -- NOTE: ldur etc. are handled by ldr et al. | ||
790 | |||
791 | ["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP", | ||
792 | ["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP", | ||
793 | ["ldpsw_*"] = "68400000DAxP", | ||
794 | |||
795 | -- Branches. | ||
796 | b_1 = "14000000B", | ||
797 | bl_1 = "94000000B", | ||
798 | blr_1 = "d63f0000Nx", | ||
799 | br_1 = "d61f0000Nx", | ||
800 | ret_0 = "d65f03c0", | ||
801 | ret_1 = "d65f0000Nx", | ||
802 | -- b.cond is added below. | ||
803 | cbz_2 = "34000000DBg", | ||
804 | cbnz_2 = "35000000DBg", | ||
805 | tbz_3 = "36000000DTBw|36000000DTBx", | ||
806 | tbnz_3 = "37000000DTBw|37000000DTBx", | ||
807 | |||
808 | -- Miscellaneous instructions. | ||
809 | -- TODO: hlt, hvc, smc, svc, eret, dcps[123], drps, mrs, msr | ||
810 | -- TODO: sys, sysl, ic, dc, at, tlbi | ||
811 | -- TODO: hint, yield, wfe, wfi, sev, sevl | ||
812 | -- TODO: clrex, dsb, dmb, isb | ||
813 | nop_0 = "d503201f", | ||
814 | brk_0 = "d4200000", | ||
815 | brk_1 = "d4200000W", | ||
816 | |||
817 | -- Floating point instructions. | ||
818 | fmov_2 = "1e204000DNf|1e260000DwNs|1e270000DsNw|9e660000DxNd|9e670000DdNx|1e201000DFf", | ||
819 | fabs_2 = "1e20c000DNf", | ||
820 | fneg_2 = "1e214000DNf", | ||
821 | fsqrt_2 = "1e21c000DNf", | ||
822 | |||
823 | fcvt_2 = "1e22c000DdNs|1e624000DsNd", | ||
824 | |||
825 | -- TODO: half-precision and fixed-point conversions. | ||
826 | fcvtas_2 = "1e240000DwNs|9e240000DxNs|1e640000DwNd|9e640000DxNd", | ||
827 | fcvtau_2 = "1e250000DwNs|9e250000DxNs|1e650000DwNd|9e650000DxNd", | ||
828 | fcvtms_2 = "1e300000DwNs|9e300000DxNs|1e700000DwNd|9e700000DxNd", | ||
829 | fcvtmu_2 = "1e310000DwNs|9e310000DxNs|1e710000DwNd|9e710000DxNd", | ||
830 | fcvtns_2 = "1e200000DwNs|9e200000DxNs|1e600000DwNd|9e600000DxNd", | ||
831 | fcvtnu_2 = "1e210000DwNs|9e210000DxNs|1e610000DwNd|9e610000DxNd", | ||
832 | fcvtps_2 = "1e280000DwNs|9e280000DxNs|1e680000DwNd|9e680000DxNd", | ||
833 | fcvtpu_2 = "1e290000DwNs|9e290000DxNs|1e690000DwNd|9e690000DxNd", | ||
834 | fcvtzs_2 = "1e380000DwNs|9e380000DxNs|1e780000DwNd|9e780000DxNd", | ||
835 | fcvtzu_2 = "1e390000DwNs|9e390000DxNs|1e790000DwNd|9e790000DxNd", | ||
836 | |||
837 | scvtf_2 = "1e220000DsNw|9e220000DsNx|1e620000DdNw|9e620000DdNx", | ||
838 | ucvtf_2 = "1e230000DsNw|9e230000DsNx|1e630000DdNw|9e630000DdNx", | ||
839 | |||
840 | frintn_2 = "1e244000DNf", | ||
841 | frintp_2 = "1e24c000DNf", | ||
842 | frintm_2 = "1e254000DNf", | ||
843 | frintz_2 = "1e25c000DNf", | ||
844 | frinta_2 = "1e264000DNf", | ||
845 | frintx_2 = "1e274000DNf", | ||
846 | frinti_2 = "1e27c000DNf", | ||
847 | |||
848 | fadd_3 = "1e202800DNMf", | ||
849 | fsub_3 = "1e203800DNMf", | ||
850 | fmul_3 = "1e200800DNMf", | ||
851 | fnmul_3 = "1e208800DNMf", | ||
852 | fdiv_3 = "1e201800DNMf", | ||
853 | |||
854 | fmadd_4 = "1f000000DNMAf", | ||
855 | fmsub_4 = "1f008000DNMAf", | ||
856 | fnmadd_4 = "1f200000DNMAf", | ||
857 | fnmsub_4 = "1f208000DNMAf", | ||
858 | |||
859 | fmax_3 = "1e204800DNMf", | ||
860 | fmaxnm_3 = "1e206800DNMf", | ||
861 | fmin_3 = "1e205800DNMf", | ||
862 | fminnm_3 = "1e207800DNMf", | ||
863 | |||
864 | fcmp_2 = "1e202000NMf|1e202008NZf", | ||
865 | fcmpe_2 = "1e202010NMf|1e202018NZf", | ||
866 | |||
867 | fccmp_4 = "1e200400NMVCf", | ||
868 | fccmpe_4 = "1e200410NMVCf", | ||
869 | |||
870 | fcsel_4 = "1e200c00DNMCf", | ||
871 | |||
872 | -- TODO: crc32*, aes*, sha*, pmull | ||
873 | -- TODO: SIMD instructions. | ||
874 | } | ||
875 | |||
876 | for cond,c in pairs(map_cond) do | ||
877 | map_op["b"..cond.."_1"] = tohex(0x54000000+c).."B" | ||
878 | end | ||
879 | |||
880 | ------------------------------------------------------------------------------ | ||
881 | |||
882 | -- Handle opcodes defined with template strings. | ||
883 | local function parse_template(params, template, nparams, pos) | ||
884 | local op = tonumber(sub(template, 1, 8), 16) | ||
885 | local n = 1 | ||
886 | local rtt = {} | ||
887 | |||
888 | parse_reg_type = false | ||
889 | |||
890 | -- Process each character. | ||
891 | for p in gmatch(sub(template, 9), ".") do | ||
892 | local q = params[n] | ||
893 | if p == "D" then | ||
894 | op = op + parse_reg(q); n = n + 1 | ||
895 | elseif p == "N" then | ||
896 | op = op + shl(parse_reg(q), 5); n = n + 1 | ||
897 | elseif p == "M" then | ||
898 | op = op + shl(parse_reg(q), 16); n = n + 1 | ||
899 | elseif p == "A" then | ||
900 | op = op + shl(parse_reg(q), 10); n = n + 1 | ||
901 | elseif p == "m" then | ||
902 | op = op + shl(parse_reg(params[n-1]), 16) | ||
903 | |||
904 | elseif p == "p" then | ||
905 | if q == "sp" then params[n] = "@x31" end | ||
906 | elseif p == "g" then | ||
907 | if parse_reg_type == "x" then | ||
908 | op = op + 0x80000000 | ||
909 | elseif parse_reg_type ~= "w" then | ||
910 | werror("bad register type") | ||
911 | end | ||
912 | parse_reg_type = false | ||
913 | elseif p == "f" then | ||
914 | if parse_reg_type == "d" then | ||
915 | op = op + 0x00400000 | ||
916 | elseif parse_reg_type ~= "s" then | ||
917 | werror("bad register type") | ||
918 | end | ||
919 | parse_reg_type = false | ||
920 | elseif p == "x" or p == "w" or p == "d" or p == "s" then | ||
921 | if parse_reg_type ~= p then | ||
922 | werror("register size mismatch") | ||
923 | end | ||
924 | parse_reg_type = false | ||
925 | |||
926 | elseif p == "L" then | ||
927 | op = parse_load(params, nparams, n, op) | ||
928 | elseif p == "P" then | ||
929 | op = parse_load_pair(params, nparams, n, op) | ||
930 | |||
931 | elseif p == "B" then | ||
932 | local mode, v, s = parse_label(q, false); n = n + 1 | ||
933 | local m = branch_type(op) | ||
934 | waction("REL_"..mode, v+m, s, 1) | ||
935 | |||
936 | elseif p == "I" then | ||
937 | op = op + parse_imm12(q); n = n + 1 | ||
938 | elseif p == "i" then | ||
939 | op = op + parse_imm13(q); n = n + 1 | ||
940 | elseif p == "W" then | ||
941 | op = op + parse_imm(q, 16, 5, 0, false); n = n + 1 | ||
942 | elseif p == "T" then | ||
943 | op = op + parse_imm6(q); n = n + 1 | ||
944 | elseif p == "1" then | ||
945 | op = op + parse_imm(q, 6, 16, 0, false); n = n + 1 | ||
946 | elseif p == "2" then | ||
947 | op = op + parse_imm(q, 6, 10, 0, false); n = n + 1 | ||
948 | elseif p == "5" then | ||
949 | op = op + parse_imm(q, 5, 16, 0, false); n = n + 1 | ||
950 | elseif p == "V" then | ||
951 | op = op + parse_imm(q, 4, 0, 0, false); n = n + 1 | ||
952 | elseif p == "F" then | ||
953 | op = op + parse_fpimm(q); n = n + 1 | ||
954 | elseif p == "Z" then | ||
955 | if q ~= "#0" and q ~= "#0.0" then werror("expected zero immediate") end | ||
956 | n = n + 1 | ||
957 | |||
958 | elseif p == "S" then | ||
959 | op = op + parse_shift(q); n = n + 1 | ||
960 | elseif p == "X" then | ||
961 | op = op + parse_extend(q); n = n + 1 | ||
962 | elseif p == "R" then | ||
963 | op = op + parse_lslx16(q); n = n + 1 | ||
964 | elseif p == "C" then | ||
965 | op = op + parse_cond(q, 0); n = n + 1 | ||
966 | elseif p == "c" then | ||
967 | op = op + parse_cond(q, 1); n = n + 1 | ||
968 | |||
969 | else | ||
970 | assert(false) | ||
971 | end | ||
972 | end | ||
973 | wputpos(pos, op) | ||
974 | end | ||
975 | |||
976 | function op_template(params, template, nparams) | ||
977 | if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end | ||
978 | |||
979 | -- Limit number of section buffer positions used by a single dasm_put(). | ||
980 | -- A single opcode needs a maximum of 3 positions. | ||
981 | if secpos+3 > maxsecpos then wflush() end | ||
982 | local pos = wpos() | ||
983 | local lpos, apos, spos = #actlist, #actargs, secpos | ||
984 | |||
985 | local ok, err | ||
986 | for t in gmatch(template, "[^|]+") do | ||
987 | ok, err = pcall(parse_template, params, t, nparams, pos) | ||
988 | if ok then return end | ||
989 | secpos = spos | ||
990 | actlist[lpos+1] = nil | ||
991 | actlist[lpos+2] = nil | ||
992 | actlist[lpos+3] = nil | ||
993 | actargs[apos+1] = nil | ||
994 | actargs[apos+2] = nil | ||
995 | actargs[apos+3] = nil | ||
996 | end | ||
997 | error(err, 0) | ||
998 | end | ||
999 | |||
1000 | map_op[".template__"] = op_template | ||
1001 | |||
1002 | ------------------------------------------------------------------------------ | ||
1003 | |||
1004 | -- Pseudo-opcode to mark the position where the action list is to be emitted. | ||
1005 | map_op[".actionlist_1"] = function(params) | ||
1006 | if not params then return "cvar" end | ||
1007 | local name = params[1] -- No syntax check. You get to keep the pieces. | ||
1008 | wline(function(out) writeactions(out, name) end) | ||
1009 | end | ||
1010 | |||
1011 | -- Pseudo-opcode to mark the position where the global enum is to be emitted. | ||
1012 | map_op[".globals_1"] = function(params) | ||
1013 | if not params then return "prefix" end | ||
1014 | local prefix = params[1] -- No syntax check. You get to keep the pieces. | ||
1015 | wline(function(out) writeglobals(out, prefix) end) | ||
1016 | end | ||
1017 | |||
1018 | -- Pseudo-opcode to mark the position where the global names are to be emitted. | ||
1019 | map_op[".globalnames_1"] = function(params) | ||
1020 | if not params then return "cvar" end | ||
1021 | local name = params[1] -- No syntax check. You get to keep the pieces. | ||
1022 | wline(function(out) writeglobalnames(out, name) end) | ||
1023 | end | ||
1024 | |||
1025 | -- Pseudo-opcode to mark the position where the extern names are to be emitted. | ||
1026 | map_op[".externnames_1"] = function(params) | ||
1027 | if not params then return "cvar" end | ||
1028 | local name = params[1] -- No syntax check. You get to keep the pieces. | ||
1029 | wline(function(out) writeexternnames(out, name) end) | ||
1030 | end | ||
1031 | |||
1032 | ------------------------------------------------------------------------------ | ||
1033 | |||
1034 | -- Label pseudo-opcode (converted from trailing colon form). | ||
1035 | map_op[".label_1"] = function(params) | ||
1036 | if not params then return "[1-9] | ->global | =>pcexpr" end | ||
1037 | if secpos+1 > maxsecpos then wflush() end | ||
1038 | local mode, n, s = parse_label(params[1], true) | ||
1039 | if mode == "EXT" then werror("bad label definition") end | ||
1040 | waction("LABEL_"..mode, n, s, 1) | ||
1041 | end | ||
1042 | |||
1043 | ------------------------------------------------------------------------------ | ||
1044 | |||
1045 | -- Pseudo-opcodes for data storage. | ||
1046 | map_op[".long_*"] = function(params) | ||
1047 | if not params then return "imm..." end | ||
1048 | for _,p in ipairs(params) do | ||
1049 | local n = tonumber(p) | ||
1050 | if not n then werror("bad immediate `"..p.."'") end | ||
1051 | if n < 0 then n = n + 2^32 end | ||
1052 | wputw(n) | ||
1053 | if secpos+2 > maxsecpos then wflush() end | ||
1054 | end | ||
1055 | end | ||
1056 | |||
1057 | -- Alignment pseudo-opcode. | ||
1058 | map_op[".align_1"] = function(params) | ||
1059 | if not params then return "numpow2" end | ||
1060 | if secpos+1 > maxsecpos then wflush() end | ||
1061 | local align = tonumber(params[1]) | ||
1062 | if align then | ||
1063 | local x = align | ||
1064 | -- Must be a power of 2 in the range (2 ... 256). | ||
1065 | for i=1,8 do | ||
1066 | x = x / 2 | ||
1067 | if x == 1 then | ||
1068 | waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1. | ||
1069 | return | ||
1070 | end | ||
1071 | end | ||
1072 | end | ||
1073 | werror("bad alignment") | ||
1074 | end | ||
1075 | |||
1076 | ------------------------------------------------------------------------------ | ||
1077 | |||
1078 | -- Pseudo-opcode for (primitive) type definitions (map to C types). | ||
1079 | map_op[".type_3"] = function(params, nparams) | ||
1080 | if not params then | ||
1081 | return nparams == 2 and "name, ctype" or "name, ctype, reg" | ||
1082 | end | ||
1083 | local name, ctype, reg = params[1], params[2], params[3] | ||
1084 | if not match(name, "^[%a_][%w_]*$") then | ||
1085 | werror("bad type name `"..name.."'") | ||
1086 | end | ||
1087 | local tp = map_type[name] | ||
1088 | if tp then | ||
1089 | werror("duplicate type `"..name.."'") | ||
1090 | end | ||
1091 | -- Add #type to defines. A bit unclean to put it in map_archdef. | ||
1092 | map_archdef["#"..name] = "sizeof("..ctype..")" | ||
1093 | -- Add new type and emit shortcut define. | ||
1094 | local num = ctypenum + 1 | ||
1095 | map_type[name] = { | ||
1096 | ctype = ctype, | ||
1097 | ctypefmt = format("Dt%X(%%s)", num), | ||
1098 | reg = reg, | ||
1099 | } | ||
1100 | wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) | ||
1101 | ctypenum = num | ||
1102 | end | ||
1103 | map_op[".type_2"] = map_op[".type_3"] | ||
1104 | |||
1105 | -- Dump type definitions. | ||
1106 | local function dumptypes(out, lvl) | ||
1107 | local t = {} | ||
1108 | for name in pairs(map_type) do t[#t+1] = name end | ||
1109 | sort(t) | ||
1110 | out:write("Type definitions:\n") | ||
1111 | for _,name in ipairs(t) do | ||
1112 | local tp = map_type[name] | ||
1113 | local reg = tp.reg or "" | ||
1114 | out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) | ||
1115 | end | ||
1116 | out:write("\n") | ||
1117 | end | ||
1118 | |||
1119 | ------------------------------------------------------------------------------ | ||
1120 | |||
1121 | -- Set the current section. | ||
1122 | function _M.section(num) | ||
1123 | waction("SECTION", num) | ||
1124 | wflush(true) -- SECTION is a terminal action. | ||
1125 | end | ||
1126 | |||
1127 | ------------------------------------------------------------------------------ | ||
1128 | |||
1129 | -- Dump architecture description. | ||
1130 | function _M.dumparch(out) | ||
1131 | out:write(format("DynASM %s version %s, released %s\n\n", | ||
1132 | _info.arch, _info.version, _info.release)) | ||
1133 | dumpactions(out) | ||
1134 | end | ||
1135 | |||
1136 | -- Dump all user defined elements. | ||
1137 | function _M.dumpdef(out, lvl) | ||
1138 | dumptypes(out, lvl) | ||
1139 | dumpglobals(out, lvl) | ||
1140 | dumpexterns(out, lvl) | ||
1141 | end | ||
1142 | |||
1143 | ------------------------------------------------------------------------------ | ||
1144 | |||
1145 | -- Pass callbacks from/to the DynASM core. | ||
1146 | function _M.passcb(wl, we, wf, ww) | ||
1147 | wline, werror, wfatal, wwarn = wl, we, wf, ww | ||
1148 | return wflush | ||
1149 | end | ||
1150 | |||
1151 | -- Setup the arch-specific module. | ||
1152 | function _M.setup(arch, opt) | ||
1153 | g_arch, g_opt = arch, opt | ||
1154 | end | ||
1155 | |||
1156 | -- Merge the core maps and the arch-specific maps. | ||
1157 | function _M.mergemaps(map_coreop, map_def) | ||
1158 | setmetatable(map_op, { __index = map_coreop }) | ||
1159 | setmetatable(map_def, { __index = map_archdef }) | ||
1160 | return map_op, map_def | ||
1161 | end | ||
1162 | |||
1163 | return _M | ||
1164 | |||
1165 | ------------------------------------------------------------------------------ | ||
1166 | |||
diff --git a/dynasm/dasm_mips.h b/dynasm/dasm_mips.h index 1b309edd..143c3cbe 100644 --- a/dynasm/dasm_mips.h +++ b/dynasm/dasm_mips.h | |||
@@ -21,7 +21,7 @@ enum { | |||
21 | /* The following actions need a buffer position. */ | 21 | /* The following actions need a buffer position. */ |
22 | DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, | 22 | DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, |
23 | /* The following actions also have an argument. */ | 23 | /* The following actions also have an argument. */ |
24 | DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, | 24 | DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMS, |
25 | DASM__MAX | 25 | DASM__MAX |
26 | }; | 26 | }; |
27 | 27 | ||
@@ -231,7 +231,7 @@ void dasm_put(Dst_DECL, int start, ...) | |||
231 | *pl = -pos; /* Label exists now. */ | 231 | *pl = -pos; /* Label exists now. */ |
232 | b[pos++] = ofs; /* Store pass1 offset estimate. */ | 232 | b[pos++] = ofs; /* Store pass1 offset estimate. */ |
233 | break; | 233 | break; |
234 | case DASM_IMM: | 234 | case DASM_IMM: case DASM_IMMS: |
235 | #ifdef DASM_CHECKS | 235 | #ifdef DASM_CHECKS |
236 | CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); | 236 | CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); |
237 | #endif | 237 | #endif |
@@ -299,7 +299,7 @@ int dasm_link(Dst_DECL, size_t *szp) | |||
299 | case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; | 299 | case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; |
300 | case DASM_REL_LG: case DASM_REL_PC: pos++; break; | 300 | case DASM_REL_LG: case DASM_REL_PC: pos++; break; |
301 | case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; | 301 | case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; |
302 | case DASM_IMM: pos++; break; | 302 | case DASM_IMM: case DASM_IMMS: pos++; break; |
303 | } | 303 | } |
304 | } | 304 | } |
305 | stop: (void)0; | 305 | stop: (void)0; |
@@ -355,19 +355,23 @@ int dasm_encode(Dst_DECL, void *buffer) | |||
355 | CK(n >= 0, UNDEF_PC); | 355 | CK(n >= 0, UNDEF_PC); |
356 | n = *DASM_POS2PTR(D, n); | 356 | n = *DASM_POS2PTR(D, n); |
357 | if (ins & 2048) | 357 | if (ins & 2048) |
358 | n = n - (int)((char *)cp - base); | 358 | n = (n + (int)(size_t)base) & 0x0fffffff; |
359 | else | 359 | else |
360 | n = (n + (int)base) & 0x0fffffff; | 360 | n = n - (int)((char *)cp - base); |
361 | patchrel: | 361 | patchrel: { |
362 | unsigned int e = 16 + ((ins >> 12) & 15); | ||
362 | CK((n & 3) == 0 && | 363 | CK((n & 3) == 0 && |
363 | ((n + ((ins & 2048) ? 0x00020000 : 0)) >> | 364 | ((n + ((ins & 2048) ? 0 : (1<<(e+1)))) >> (e+2)) == 0, RANGE_REL); |
364 | ((ins & 2048) ? 18 : 28)) == 0, RANGE_REL); | 365 | cp[-1] |= ((n>>2) & ((1<<e)-1)); |
365 | cp[-1] |= ((n>>2) & ((ins & 2048) ? 0x0000ffff: 0x03ffffff)); | 366 | } |
366 | break; | 367 | break; |
367 | case DASM_LABEL_LG: | 368 | case DASM_LABEL_LG: |
368 | ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); | 369 | ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); |
369 | break; | 370 | break; |
370 | case DASM_LABEL_PC: break; | 371 | case DASM_LABEL_PC: break; |
372 | case DASM_IMMS: | ||
373 | cp[-1] |= ((n>>3) & 4); n &= 0x1f; | ||
374 | /* fallthrough */ | ||
371 | case DASM_IMM: | 375 | case DASM_IMM: |
372 | cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); | 376 | cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); |
373 | break; | 377 | break; |
diff --git a/dynasm/dasm_mips.lua b/dynasm/dasm_mips.lua index 9ef280e3..3e41df52 100644 --- a/dynasm/dasm_mips.lua +++ b/dynasm/dasm_mips.lua | |||
@@ -1,17 +1,20 @@ | |||
1 | ------------------------------------------------------------------------------ | 1 | ------------------------------------------------------------------------------ |
2 | -- DynASM MIPS module. | 2 | -- DynASM MIPS32/MIPS64 module. |
3 | -- | 3 | -- |
4 | -- Copyright (C) 2005-2020 Mike Pall. All rights reserved. | 4 | -- Copyright (C) 2005-2020 Mike Pall. All rights reserved. |
5 | -- See dynasm.lua for full copyright notice. | 5 | -- See dynasm.lua for full copyright notice. |
6 | ------------------------------------------------------------------------------ | 6 | ------------------------------------------------------------------------------ |
7 | 7 | ||
8 | local mips64 = mips64 | ||
9 | local mipsr6 = _map_def.MIPSR6 | ||
10 | |||
8 | -- Module information: | 11 | -- Module information: |
9 | local _info = { | 12 | local _info = { |
10 | arch = "mips", | 13 | arch = mips64 and "mips64" or "mips", |
11 | description = "DynASM MIPS module", | 14 | description = "DynASM MIPS32/MIPS64 module", |
12 | version = "1.3.0", | 15 | version = "1.4.0", |
13 | vernum = 10300, | 16 | vernum = 10400, |
14 | release = "2012-01-23", | 17 | release = "2020-01-20", |
15 | author = "Mike Pall", | 18 | author = "Mike Pall", |
16 | license = "MIT", | 19 | license = "MIT", |
17 | } | 20 | } |
@@ -27,7 +30,8 @@ local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char | |||
27 | local match, gmatch = _s.match, _s.gmatch | 30 | local match, gmatch = _s.match, _s.gmatch |
28 | local concat, sort = table.concat, table.sort | 31 | local concat, sort = table.concat, table.sort |
29 | local bit = bit or require("bit") | 32 | local bit = bit or require("bit") |
30 | local band, shl, sar, tohex = bit.band, bit.lshift, bit.arshift, bit.tohex | 33 | local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift |
34 | local tohex = bit.tohex | ||
31 | 35 | ||
32 | -- Inherited tables and callbacks. | 36 | -- Inherited tables and callbacks. |
33 | local g_opt, g_arch | 37 | local g_opt, g_arch |
@@ -38,7 +42,7 @@ local wline, werror, wfatal, wwarn | |||
38 | local action_names = { | 42 | local action_names = { |
39 | "STOP", "SECTION", "ESC", "REL_EXT", | 43 | "STOP", "SECTION", "ESC", "REL_EXT", |
40 | "ALIGN", "REL_LG", "LABEL_LG", | 44 | "ALIGN", "REL_LG", "LABEL_LG", |
41 | "REL_PC", "LABEL_PC", "IMM", | 45 | "REL_PC", "LABEL_PC", "IMM", "IMMS", |
42 | } | 46 | } |
43 | 47 | ||
44 | -- Maximum number of section buffer positions for dasm_put(). | 48 | -- Maximum number of section buffer positions for dasm_put(). |
@@ -235,7 +239,6 @@ local map_op = { | |||
235 | bne_3 = "14000000STB", | 239 | bne_3 = "14000000STB", |
236 | blez_2 = "18000000SB", | 240 | blez_2 = "18000000SB", |
237 | bgtz_2 = "1c000000SB", | 241 | bgtz_2 = "1c000000SB", |
238 | addi_3 = "20000000TSI", | ||
239 | li_2 = "24000000TI", | 242 | li_2 = "24000000TI", |
240 | addiu_3 = "24000000TSI", | 243 | addiu_3 = "24000000TSI", |
241 | slti_3 = "28000000TSI", | 244 | slti_3 = "28000000TSI", |
@@ -245,70 +248,52 @@ local map_op = { | |||
245 | ori_3 = "34000000TSU", | 248 | ori_3 = "34000000TSU", |
246 | xori_3 = "38000000TSU", | 249 | xori_3 = "38000000TSU", |
247 | lui_2 = "3c000000TU", | 250 | lui_2 = "3c000000TU", |
248 | beqzl_2 = "50000000SB", | 251 | daddiu_3 = mips64 and "64000000TSI", |
249 | beql_3 = "50000000STB", | 252 | ldl_2 = mips64 and "68000000TO", |
250 | bnezl_2 = "54000000SB", | 253 | ldr_2 = mips64 and "6c000000TO", |
251 | bnel_3 = "54000000STB", | ||
252 | blezl_2 = "58000000SB", | ||
253 | bgtzl_2 = "5c000000SB", | ||
254 | lb_2 = "80000000TO", | 254 | lb_2 = "80000000TO", |
255 | lh_2 = "84000000TO", | 255 | lh_2 = "84000000TO", |
256 | lwl_2 = "88000000TO", | ||
257 | lw_2 = "8c000000TO", | 256 | lw_2 = "8c000000TO", |
258 | lbu_2 = "90000000TO", | 257 | lbu_2 = "90000000TO", |
259 | lhu_2 = "94000000TO", | 258 | lhu_2 = "94000000TO", |
260 | lwr_2 = "98000000TO", | 259 | lwu_2 = mips64 and "9c000000TO", |
261 | sb_2 = "a0000000TO", | 260 | sb_2 = "a0000000TO", |
262 | sh_2 = "a4000000TO", | 261 | sh_2 = "a4000000TO", |
263 | swl_2 = "a8000000TO", | ||
264 | sw_2 = "ac000000TO", | 262 | sw_2 = "ac000000TO", |
265 | swr_2 = "b8000000TO", | ||
266 | cache_2 = "bc000000NO", | ||
267 | ll_2 = "c0000000TO", | ||
268 | lwc1_2 = "c4000000HO", | 263 | lwc1_2 = "c4000000HO", |
269 | pref_2 = "cc000000NO", | ||
270 | ldc1_2 = "d4000000HO", | 264 | ldc1_2 = "d4000000HO", |
271 | sc_2 = "e0000000TO", | 265 | ld_2 = mips64 and "dc000000TO", |
272 | swc1_2 = "e4000000HO", | 266 | swc1_2 = "e4000000HO", |
273 | sdc1_2 = "f4000000HO", | 267 | sdc1_2 = "f4000000HO", |
268 | sd_2 = mips64 and "fc000000TO", | ||
274 | 269 | ||
275 | -- Opcode SPECIAL. | 270 | -- Opcode SPECIAL. |
276 | nop_0 = "00000000", | 271 | nop_0 = "00000000", |
277 | sll_3 = "00000000DTA", | 272 | sll_3 = "00000000DTA", |
278 | movf_2 = "00000001DS", | 273 | sextw_2 = "00000000DT", |
279 | movf_3 = "00000001DSC", | ||
280 | movt_2 = "00010001DS", | ||
281 | movt_3 = "00010001DSC", | ||
282 | srl_3 = "00000002DTA", | 274 | srl_3 = "00000002DTA", |
283 | rotr_3 = "00200002DTA", | 275 | rotr_3 = "00200002DTA", |
284 | sra_3 = "00000003DTA", | 276 | sra_3 = "00000003DTA", |
285 | sllv_3 = "00000004DTS", | 277 | sllv_3 = "00000004DTS", |
286 | srlv_3 = "00000006DTS", | 278 | srlv_3 = "00000006DTS", |
287 | rotrv_3 = "00000046DTS", | 279 | rotrv_3 = "00000046DTS", |
280 | drotrv_3 = mips64 and "00000056DTS", | ||
288 | srav_3 = "00000007DTS", | 281 | srav_3 = "00000007DTS", |
289 | jr_1 = "00000008S", | ||
290 | jalr_1 = "0000f809S", | 282 | jalr_1 = "0000f809S", |
291 | jalr_2 = "00000009DS", | 283 | jalr_2 = "00000009DS", |
292 | movz_3 = "0000000aDST", | ||
293 | movn_3 = "0000000bDST", | ||
294 | syscall_0 = "0000000c", | 284 | syscall_0 = "0000000c", |
295 | syscall_1 = "0000000cY", | 285 | syscall_1 = "0000000cY", |
296 | break_0 = "0000000d", | 286 | break_0 = "0000000d", |
297 | break_1 = "0000000dY", | 287 | break_1 = "0000000dY", |
298 | sync_0 = "0000000f", | 288 | sync_0 = "0000000f", |
299 | mfhi_1 = "00000010D", | 289 | dsllv_3 = mips64 and "00000014DTS", |
300 | mthi_1 = "00000011S", | 290 | dsrlv_3 = mips64 and "00000016DTS", |
301 | mflo_1 = "00000012D", | 291 | dsrav_3 = mips64 and "00000017DTS", |
302 | mtlo_1 = "00000013S", | ||
303 | mult_2 = "00000018ST", | ||
304 | multu_2 = "00000019ST", | ||
305 | div_2 = "0000001aST", | ||
306 | divu_2 = "0000001bST", | ||
307 | add_3 = "00000020DST", | 292 | add_3 = "00000020DST", |
308 | move_2 = "00000021DS", | 293 | move_2 = mips64 and "00000025DS" or "00000021DS", |
309 | addu_3 = "00000021DST", | 294 | addu_3 = "00000021DST", |
310 | sub_3 = "00000022DST", | 295 | sub_3 = "00000022DST", |
311 | negu_2 = "00000023DT", | 296 | negu_2 = mips64 and "0000002fDT" or "00000023DT", |
312 | subu_3 = "00000023DST", | 297 | subu_3 = "00000023DST", |
313 | and_3 = "00000024DST", | 298 | and_3 = "00000024DST", |
314 | or_3 = "00000025DST", | 299 | or_3 = "00000025DST", |
@@ -317,6 +302,10 @@ local map_op = { | |||
317 | nor_3 = "00000027DST", | 302 | nor_3 = "00000027DST", |
318 | slt_3 = "0000002aDST", | 303 | slt_3 = "0000002aDST", |
319 | sltu_3 = "0000002bDST", | 304 | sltu_3 = "0000002bDST", |
305 | dadd_3 = mips64 and "0000002cDST", | ||
306 | daddu_3 = mips64 and "0000002dDST", | ||
307 | dsub_3 = mips64 and "0000002eDST", | ||
308 | dsubu_3 = mips64 and "0000002fDST", | ||
320 | tge_2 = "00000030ST", | 309 | tge_2 = "00000030ST", |
321 | tge_3 = "00000030STZ", | 310 | tge_3 = "00000030STZ", |
322 | tgeu_2 = "00000031ST", | 311 | tgeu_2 = "00000031ST", |
@@ -329,40 +318,36 @@ local map_op = { | |||
329 | teq_3 = "00000034STZ", | 318 | teq_3 = "00000034STZ", |
330 | tne_2 = "00000036ST", | 319 | tne_2 = "00000036ST", |
331 | tne_3 = "00000036STZ", | 320 | tne_3 = "00000036STZ", |
321 | dsll_3 = mips64 and "00000038DTa", | ||
322 | dsrl_3 = mips64 and "0000003aDTa", | ||
323 | drotr_3 = mips64 and "0020003aDTa", | ||
324 | dsra_3 = mips64 and "0000003bDTa", | ||
325 | dsll32_3 = mips64 and "0000003cDTA", | ||
326 | dsrl32_3 = mips64 and "0000003eDTA", | ||
327 | drotr32_3 = mips64 and "0020003eDTA", | ||
328 | dsra32_3 = mips64 and "0000003fDTA", | ||
332 | 329 | ||
333 | -- Opcode REGIMM. | 330 | -- Opcode REGIMM. |
334 | bltz_2 = "04000000SB", | 331 | bltz_2 = "04000000SB", |
335 | bgez_2 = "04010000SB", | 332 | bgez_2 = "04010000SB", |
336 | bltzl_2 = "04020000SB", | 333 | bltzl_2 = "04020000SB", |
337 | bgezl_2 = "04030000SB", | 334 | bgezl_2 = "04030000SB", |
338 | tgei_2 = "04080000SI", | ||
339 | tgeiu_2 = "04090000SI", | ||
340 | tlti_2 = "040a0000SI", | ||
341 | tltiu_2 = "040b0000SI", | ||
342 | teqi_2 = "040c0000SI", | ||
343 | tnei_2 = "040e0000SI", | ||
344 | bltzal_2 = "04100000SB", | ||
345 | bal_1 = "04110000B", | 335 | bal_1 = "04110000B", |
346 | bgezal_2 = "04110000SB", | ||
347 | bltzall_2 = "04120000SB", | ||
348 | bgezall_2 = "04130000SB", | ||
349 | synci_1 = "041f0000O", | 336 | synci_1 = "041f0000O", |
350 | 337 | ||
351 | -- Opcode SPECIAL2. | ||
352 | madd_2 = "70000000ST", | ||
353 | maddu_2 = "70000001ST", | ||
354 | mul_3 = "70000002DST", | ||
355 | msub_2 = "70000004ST", | ||
356 | msubu_2 = "70000005ST", | ||
357 | clz_2 = "70000020DS=", | ||
358 | clo_2 = "70000021DS=", | ||
359 | sdbbp_0 = "7000003f", | ||
360 | sdbbp_1 = "7000003fY", | ||
361 | |||
362 | -- Opcode SPECIAL3. | 338 | -- Opcode SPECIAL3. |
363 | ext_4 = "7c000000TSAM", -- Note: last arg is msbd = size-1 | 339 | ext_4 = "7c000000TSAM", -- Note: last arg is msbd = size-1 |
340 | dextm_4 = mips64 and "7c000001TSAM", -- Args: pos | size-1-32 | ||
341 | dextu_4 = mips64 and "7c000002TSAM", -- Args: pos-32 | size-1 | ||
342 | dext_4 = mips64 and "7c000003TSAM", -- Args: pos | size-1 | ||
343 | zextw_2 = mips64 and "7c00f803TS", | ||
364 | ins_4 = "7c000004TSAM", -- Note: last arg is msb = pos+size-1 | 344 | ins_4 = "7c000004TSAM", -- Note: last arg is msb = pos+size-1 |
345 | dinsm_4 = mips64 and "7c000005TSAM", -- Args: pos | pos+size-33 | ||
346 | dinsu_4 = mips64 and "7c000006TSAM", -- Args: pos-32 | pos+size-33 | ||
347 | dins_4 = mips64 and "7c000007TSAM", -- Args: pos | pos+size-1 | ||
365 | wsbh_2 = "7c0000a0DT", | 348 | wsbh_2 = "7c0000a0DT", |
349 | dsbh_2 = mips64 and "7c0000a4DT", | ||
350 | dshd_2 = mips64 and "7c000164DT", | ||
366 | seb_2 = "7c000420DT", | 351 | seb_2 = "7c000420DT", |
367 | seh_2 = "7c000620DT", | 352 | seh_2 = "7c000620DT", |
368 | rdhwr_2 = "7c00003bTD", | 353 | rdhwr_2 = "7c00003bTD", |
@@ -370,8 +355,12 @@ local map_op = { | |||
370 | -- Opcode COP0. | 355 | -- Opcode COP0. |
371 | mfc0_2 = "40000000TD", | 356 | mfc0_2 = "40000000TD", |
372 | mfc0_3 = "40000000TDW", | 357 | mfc0_3 = "40000000TDW", |
358 | dmfc0_2 = mips64 and "40200000TD", | ||
359 | dmfc0_3 = mips64 and "40200000TDW", | ||
373 | mtc0_2 = "40800000TD", | 360 | mtc0_2 = "40800000TD", |
374 | mtc0_3 = "40800000TDW", | 361 | mtc0_3 = "40800000TDW", |
362 | dmtc0_2 = mips64 and "40a00000TD", | ||
363 | dmtc0_3 = mips64 and "40a00000TDW", | ||
375 | rdpgpr_2 = "41400000DT", | 364 | rdpgpr_2 = "41400000DT", |
376 | di_0 = "41606000", | 365 | di_0 = "41606000", |
377 | di_1 = "41606000T", | 366 | di_1 = "41606000T", |
@@ -388,21 +377,14 @@ local map_op = { | |||
388 | 377 | ||
389 | -- Opcode COP1. | 378 | -- Opcode COP1. |
390 | mfc1_2 = "44000000TG", | 379 | mfc1_2 = "44000000TG", |
380 | dmfc1_2 = mips64 and "44200000TG", | ||
391 | cfc1_2 = "44400000TG", | 381 | cfc1_2 = "44400000TG", |
392 | mfhc1_2 = "44600000TG", | 382 | mfhc1_2 = "44600000TG", |
393 | mtc1_2 = "44800000TG", | 383 | mtc1_2 = "44800000TG", |
384 | dmtc1_2 = mips64 and "44a00000TG", | ||
394 | ctc1_2 = "44c00000TG", | 385 | ctc1_2 = "44c00000TG", |
395 | mthc1_2 = "44e00000TG", | 386 | mthc1_2 = "44e00000TG", |
396 | 387 | ||
397 | bc1f_1 = "45000000B", | ||
398 | bc1f_2 = "45000000CB", | ||
399 | bc1t_1 = "45010000B", | ||
400 | bc1t_2 = "45010000CB", | ||
401 | bc1fl_1 = "45020000B", | ||
402 | bc1fl_2 = "45020000CB", | ||
403 | bc1tl_1 = "45030000B", | ||
404 | bc1tl_2 = "45030000CB", | ||
405 | |||
406 | ["add.s_3"] = "46000000FGH", | 388 | ["add.s_3"] = "46000000FGH", |
407 | ["sub.s_3"] = "46000001FGH", | 389 | ["sub.s_3"] = "46000001FGH", |
408 | ["mul.s_3"] = "46000002FGH", | 390 | ["mul.s_3"] = "46000002FGH", |
@@ -419,51 +401,11 @@ local map_op = { | |||
419 | ["trunc.w.s_2"] = "4600000dFG", | 401 | ["trunc.w.s_2"] = "4600000dFG", |
420 | ["ceil.w.s_2"] = "4600000eFG", | 402 | ["ceil.w.s_2"] = "4600000eFG", |
421 | ["floor.w.s_2"] = "4600000fFG", | 403 | ["floor.w.s_2"] = "4600000fFG", |
422 | ["movf.s_2"] = "46000011FG", | ||
423 | ["movf.s_3"] = "46000011FGC", | ||
424 | ["movt.s_2"] = "46010011FG", | ||
425 | ["movt.s_3"] = "46010011FGC", | ||
426 | ["movz.s_3"] = "46000012FGT", | ||
427 | ["movn.s_3"] = "46000013FGT", | ||
428 | ["recip.s_2"] = "46000015FG", | 404 | ["recip.s_2"] = "46000015FG", |
429 | ["rsqrt.s_2"] = "46000016FG", | 405 | ["rsqrt.s_2"] = "46000016FG", |
430 | ["cvt.d.s_2"] = "46000021FG", | 406 | ["cvt.d.s_2"] = "46000021FG", |
431 | ["cvt.w.s_2"] = "46000024FG", | 407 | ["cvt.w.s_2"] = "46000024FG", |
432 | ["cvt.l.s_2"] = "46000025FG", | 408 | ["cvt.l.s_2"] = "46000025FG", |
433 | ["cvt.ps.s_3"] = "46000026FGH", | ||
434 | ["c.f.s_2"] = "46000030GH", | ||
435 | ["c.f.s_3"] = "46000030VGH", | ||
436 | ["c.un.s_2"] = "46000031GH", | ||
437 | ["c.un.s_3"] = "46000031VGH", | ||
438 | ["c.eq.s_2"] = "46000032GH", | ||
439 | ["c.eq.s_3"] = "46000032VGH", | ||
440 | ["c.ueq.s_2"] = "46000033GH", | ||
441 | ["c.ueq.s_3"] = "46000033VGH", | ||
442 | ["c.olt.s_2"] = "46000034GH", | ||
443 | ["c.olt.s_3"] = "46000034VGH", | ||
444 | ["c.ult.s_2"] = "46000035GH", | ||
445 | ["c.ult.s_3"] = "46000035VGH", | ||
446 | ["c.ole.s_2"] = "46000036GH", | ||
447 | ["c.ole.s_3"] = "46000036VGH", | ||
448 | ["c.ule.s_2"] = "46000037GH", | ||
449 | ["c.ule.s_3"] = "46000037VGH", | ||
450 | ["c.sf.s_2"] = "46000038GH", | ||
451 | ["c.sf.s_3"] = "46000038VGH", | ||
452 | ["c.ngle.s_2"] = "46000039GH", | ||
453 | ["c.ngle.s_3"] = "46000039VGH", | ||
454 | ["c.seq.s_2"] = "4600003aGH", | ||
455 | ["c.seq.s_3"] = "4600003aVGH", | ||
456 | ["c.ngl.s_2"] = "4600003bGH", | ||
457 | ["c.ngl.s_3"] = "4600003bVGH", | ||
458 | ["c.lt.s_2"] = "4600003cGH", | ||
459 | ["c.lt.s_3"] = "4600003cVGH", | ||
460 | ["c.nge.s_2"] = "4600003dGH", | ||
461 | ["c.nge.s_3"] = "4600003dVGH", | ||
462 | ["c.le.s_2"] = "4600003eGH", | ||
463 | ["c.le.s_3"] = "4600003eVGH", | ||
464 | ["c.ngt.s_2"] = "4600003fGH", | ||
465 | ["c.ngt.s_3"] = "4600003fVGH", | ||
466 | |||
467 | ["add.d_3"] = "46200000FGH", | 409 | ["add.d_3"] = "46200000FGH", |
468 | ["sub.d_3"] = "46200001FGH", | 410 | ["sub.d_3"] = "46200001FGH", |
469 | ["mul.d_3"] = "46200002FGH", | 411 | ["mul.d_3"] = "46200002FGH", |
@@ -480,130 +422,410 @@ local map_op = { | |||
480 | ["trunc.w.d_2"] = "4620000dFG", | 422 | ["trunc.w.d_2"] = "4620000dFG", |
481 | ["ceil.w.d_2"] = "4620000eFG", | 423 | ["ceil.w.d_2"] = "4620000eFG", |
482 | ["floor.w.d_2"] = "4620000fFG", | 424 | ["floor.w.d_2"] = "4620000fFG", |
483 | ["movf.d_2"] = "46200011FG", | ||
484 | ["movf.d_3"] = "46200011FGC", | ||
485 | ["movt.d_2"] = "46210011FG", | ||
486 | ["movt.d_3"] = "46210011FGC", | ||
487 | ["movz.d_3"] = "46200012FGT", | ||
488 | ["movn.d_3"] = "46200013FGT", | ||
489 | ["recip.d_2"] = "46200015FG", | 425 | ["recip.d_2"] = "46200015FG", |
490 | ["rsqrt.d_2"] = "46200016FG", | 426 | ["rsqrt.d_2"] = "46200016FG", |
491 | ["cvt.s.d_2"] = "46200020FG", | 427 | ["cvt.s.d_2"] = "46200020FG", |
492 | ["cvt.w.d_2"] = "46200024FG", | 428 | ["cvt.w.d_2"] = "46200024FG", |
493 | ["cvt.l.d_2"] = "46200025FG", | 429 | ["cvt.l.d_2"] = "46200025FG", |
494 | ["c.f.d_2"] = "46200030GH", | ||
495 | ["c.f.d_3"] = "46200030VGH", | ||
496 | ["c.un.d_2"] = "46200031GH", | ||
497 | ["c.un.d_3"] = "46200031VGH", | ||
498 | ["c.eq.d_2"] = "46200032GH", | ||
499 | ["c.eq.d_3"] = "46200032VGH", | ||
500 | ["c.ueq.d_2"] = "46200033GH", | ||
501 | ["c.ueq.d_3"] = "46200033VGH", | ||
502 | ["c.olt.d_2"] = "46200034GH", | ||
503 | ["c.olt.d_3"] = "46200034VGH", | ||
504 | ["c.ult.d_2"] = "46200035GH", | ||
505 | ["c.ult.d_3"] = "46200035VGH", | ||
506 | ["c.ole.d_2"] = "46200036GH", | ||
507 | ["c.ole.d_3"] = "46200036VGH", | ||
508 | ["c.ule.d_2"] = "46200037GH", | ||
509 | ["c.ule.d_3"] = "46200037VGH", | ||
510 | ["c.sf.d_2"] = "46200038GH", | ||
511 | ["c.sf.d_3"] = "46200038VGH", | ||
512 | ["c.ngle.d_2"] = "46200039GH", | ||
513 | ["c.ngle.d_3"] = "46200039VGH", | ||
514 | ["c.seq.d_2"] = "4620003aGH", | ||
515 | ["c.seq.d_3"] = "4620003aVGH", | ||
516 | ["c.ngl.d_2"] = "4620003bGH", | ||
517 | ["c.ngl.d_3"] = "4620003bVGH", | ||
518 | ["c.lt.d_2"] = "4620003cGH", | ||
519 | ["c.lt.d_3"] = "4620003cVGH", | ||
520 | ["c.nge.d_2"] = "4620003dGH", | ||
521 | ["c.nge.d_3"] = "4620003dVGH", | ||
522 | ["c.le.d_2"] = "4620003eGH", | ||
523 | ["c.le.d_3"] = "4620003eVGH", | ||
524 | ["c.ngt.d_2"] = "4620003fGH", | ||
525 | ["c.ngt.d_3"] = "4620003fVGH", | ||
526 | |||
527 | ["add.ps_3"] = "46c00000FGH", | ||
528 | ["sub.ps_3"] = "46c00001FGH", | ||
529 | ["mul.ps_3"] = "46c00002FGH", | ||
530 | ["abs.ps_2"] = "46c00005FG", | ||
531 | ["mov.ps_2"] = "46c00006FG", | ||
532 | ["neg.ps_2"] = "46c00007FG", | ||
533 | ["movf.ps_2"] = "46c00011FG", | ||
534 | ["movf.ps_3"] = "46c00011FGC", | ||
535 | ["movt.ps_2"] = "46c10011FG", | ||
536 | ["movt.ps_3"] = "46c10011FGC", | ||
537 | ["movz.ps_3"] = "46c00012FGT", | ||
538 | ["movn.ps_3"] = "46c00013FGT", | ||
539 | ["cvt.s.pu_2"] = "46c00020FG", | ||
540 | ["cvt.s.pl_2"] = "46c00028FG", | ||
541 | ["pll.ps_3"] = "46c0002cFGH", | ||
542 | ["plu.ps_3"] = "46c0002dFGH", | ||
543 | ["pul.ps_3"] = "46c0002eFGH", | ||
544 | ["puu.ps_3"] = "46c0002fFGH", | ||
545 | ["c.f.ps_2"] = "46c00030GH", | ||
546 | ["c.f.ps_3"] = "46c00030VGH", | ||
547 | ["c.un.ps_2"] = "46c00031GH", | ||
548 | ["c.un.ps_3"] = "46c00031VGH", | ||
549 | ["c.eq.ps_2"] = "46c00032GH", | ||
550 | ["c.eq.ps_3"] = "46c00032VGH", | ||
551 | ["c.ueq.ps_2"] = "46c00033GH", | ||
552 | ["c.ueq.ps_3"] = "46c00033VGH", | ||
553 | ["c.olt.ps_2"] = "46c00034GH", | ||
554 | ["c.olt.ps_3"] = "46c00034VGH", | ||
555 | ["c.ult.ps_2"] = "46c00035GH", | ||
556 | ["c.ult.ps_3"] = "46c00035VGH", | ||
557 | ["c.ole.ps_2"] = "46c00036GH", | ||
558 | ["c.ole.ps_3"] = "46c00036VGH", | ||
559 | ["c.ule.ps_2"] = "46c00037GH", | ||
560 | ["c.ule.ps_3"] = "46c00037VGH", | ||
561 | ["c.sf.ps_2"] = "46c00038GH", | ||
562 | ["c.sf.ps_3"] = "46c00038VGH", | ||
563 | ["c.ngle.ps_2"] = "46c00039GH", | ||
564 | ["c.ngle.ps_3"] = "46c00039VGH", | ||
565 | ["c.seq.ps_2"] = "46c0003aGH", | ||
566 | ["c.seq.ps_3"] = "46c0003aVGH", | ||
567 | ["c.ngl.ps_2"] = "46c0003bGH", | ||
568 | ["c.ngl.ps_3"] = "46c0003bVGH", | ||
569 | ["c.lt.ps_2"] = "46c0003cGH", | ||
570 | ["c.lt.ps_3"] = "46c0003cVGH", | ||
571 | ["c.nge.ps_2"] = "46c0003dGH", | ||
572 | ["c.nge.ps_3"] = "46c0003dVGH", | ||
573 | ["c.le.ps_2"] = "46c0003eGH", | ||
574 | ["c.le.ps_3"] = "46c0003eVGH", | ||
575 | ["c.ngt.ps_2"] = "46c0003fGH", | ||
576 | ["c.ngt.ps_3"] = "46c0003fVGH", | ||
577 | |||
578 | ["cvt.s.w_2"] = "46800020FG", | 430 | ["cvt.s.w_2"] = "46800020FG", |
579 | ["cvt.d.w_2"] = "46800021FG", | 431 | ["cvt.d.w_2"] = "46800021FG", |
580 | |||
581 | ["cvt.s.l_2"] = "46a00020FG", | 432 | ["cvt.s.l_2"] = "46a00020FG", |
582 | ["cvt.d.l_2"] = "46a00021FG", | 433 | ["cvt.d.l_2"] = "46a00021FG", |
583 | |||
584 | -- Opcode COP1X. | ||
585 | lwxc1_2 = "4c000000FX", | ||
586 | ldxc1_2 = "4c000001FX", | ||
587 | luxc1_2 = "4c000005FX", | ||
588 | swxc1_2 = "4c000008FX", | ||
589 | sdxc1_2 = "4c000009FX", | ||
590 | suxc1_2 = "4c00000dFX", | ||
591 | prefx_2 = "4c00000fMX", | ||
592 | ["alnv.ps_4"] = "4c00001eFGHS", | ||
593 | ["madd.s_4"] = "4c000020FRGH", | ||
594 | ["madd.d_4"] = "4c000021FRGH", | ||
595 | ["madd.ps_4"] = "4c000026FRGH", | ||
596 | ["msub.s_4"] = "4c000028FRGH", | ||
597 | ["msub.d_4"] = "4c000029FRGH", | ||
598 | ["msub.ps_4"] = "4c00002eFRGH", | ||
599 | ["nmadd.s_4"] = "4c000030FRGH", | ||
600 | ["nmadd.d_4"] = "4c000031FRGH", | ||
601 | ["nmadd.ps_4"] = "4c000036FRGH", | ||
602 | ["nmsub.s_4"] = "4c000038FRGH", | ||
603 | ["nmsub.d_4"] = "4c000039FRGH", | ||
604 | ["nmsub.ps_4"] = "4c00003eFRGH", | ||
605 | } | 434 | } |
606 | 435 | ||
436 | if mipsr6 then -- Instructions added with MIPSR6. | ||
437 | |||
438 | for k,v in pairs({ | ||
439 | |||
440 | -- Add immediate to upper bits. | ||
441 | aui_3 = "3c000000TSI", | ||
442 | daui_3 = mips64 and "74000000TSI", | ||
443 | dahi_2 = mips64 and "04060000SI", | ||
444 | dati_2 = mips64 and "041e0000SI", | ||
445 | |||
446 | -- TODO: addiupc, auipc, aluipc, lwpc, lwupc, ldpc. | ||
447 | |||
448 | -- Compact branches. | ||
449 | blezalc_2 = "18000000TB", -- rt != 0. | ||
450 | bgezalc_2 = "18000000T=SB", -- rt != 0. | ||
451 | bgtzalc_2 = "1c000000TB", -- rt != 0. | ||
452 | bltzalc_2 = "1c000000T=SB", -- rt != 0. | ||
453 | |||
454 | blezc_2 = "58000000TB", -- rt != 0. | ||
455 | bgezc_2 = "58000000T=SB", -- rt != 0. | ||
456 | bgec_3 = "58000000STB", -- rs != rt. | ||
457 | blec_3 = "58000000TSB", -- rt != rs. | ||
458 | |||
459 | bgtzc_2 = "5c000000TB", -- rt != 0. | ||
460 | bltzc_2 = "5c000000T=SB", -- rt != 0. | ||
461 | bltc_3 = "5c000000STB", -- rs != rt. | ||
462 | bgtc_3 = "5c000000TSB", -- rt != rs. | ||
463 | |||
464 | bgeuc_3 = "18000000STB", -- rs != rt. | ||
465 | bleuc_3 = "18000000TSB", -- rt != rs. | ||
466 | bltuc_3 = "1c000000STB", -- rs != rt. | ||
467 | bgtuc_3 = "1c000000TSB", -- rt != rs. | ||
468 | |||
469 | beqzalc_2 = "20000000TB", -- rt != 0. | ||
470 | bnezalc_2 = "60000000TB", -- rt != 0. | ||
471 | beqc_3 = "20000000STB", -- rs < rt. | ||
472 | bnec_3 = "60000000STB", -- rs < rt. | ||
473 | bovc_3 = "20000000STB", -- rs >= rt. | ||
474 | bnvc_3 = "60000000STB", -- rs >= rt. | ||
475 | |||
476 | beqzc_2 = "d8000000SK", -- rs != 0. | ||
477 | bnezc_2 = "f8000000SK", -- rs != 0. | ||
478 | jic_2 = "d8000000TI", | ||
479 | jialc_2 = "f8000000TI", | ||
480 | bc_1 = "c8000000L", | ||
481 | balc_1 = "e8000000L", | ||
482 | |||
483 | -- Opcode SPECIAL. | ||
484 | jr_1 = "00000009S", | ||
485 | sdbbp_0 = "0000000e", | ||
486 | sdbbp_1 = "0000000eY", | ||
487 | lsa_4 = "00000005DSTA", | ||
488 | dlsa_4 = mips64 and "00000015DSTA", | ||
489 | seleqz_3 = "00000035DST", | ||
490 | selnez_3 = "00000037DST", | ||
491 | clz_2 = "00000050DS", | ||
492 | clo_2 = "00000051DS", | ||
493 | dclz_2 = mips64 and "00000052DS", | ||
494 | dclo_2 = mips64 and "00000053DS", | ||
495 | mul_3 = "00000098DST", | ||
496 | muh_3 = "000000d8DST", | ||
497 | mulu_3 = "00000099DST", | ||
498 | muhu_3 = "000000d9DST", | ||
499 | div_3 = "0000009aDST", | ||
500 | mod_3 = "000000daDST", | ||
501 | divu_3 = "0000009bDST", | ||
502 | modu_3 = "000000dbDST", | ||
503 | dmul_3 = mips64 and "0000009cDST", | ||
504 | dmuh_3 = mips64 and "000000dcDST", | ||
505 | dmulu_3 = mips64 and "0000009dDST", | ||
506 | dmuhu_3 = mips64 and "000000ddDST", | ||
507 | ddiv_3 = mips64 and "0000009eDST", | ||
508 | dmod_3 = mips64 and "000000deDST", | ||
509 | ddivu_3 = mips64 and "0000009fDST", | ||
510 | dmodu_3 = mips64 and "000000dfDST", | ||
511 | |||
512 | -- Opcode SPECIAL3. | ||
513 | align_4 = "7c000220DSTA", | ||
514 | dalign_4 = mips64 and "7c000224DSTA", | ||
515 | bitswap_2 = "7c000020DT", | ||
516 | dbitswap_2 = mips64 and "7c000024DT", | ||
517 | |||
518 | -- Opcode COP1. | ||
519 | bc1eqz_2 = "45200000HB", | ||
520 | bc1nez_2 = "45a00000HB", | ||
521 | |||
522 | ["sel.s_3"] = "46000010FGH", | ||
523 | ["seleqz.s_3"] = "46000014FGH", | ||
524 | ["selnez.s_3"] = "46000017FGH", | ||
525 | ["maddf.s_3"] = "46000018FGH", | ||
526 | ["msubf.s_3"] = "46000019FGH", | ||
527 | ["rint.s_2"] = "4600001aFG", | ||
528 | ["class.s_2"] = "4600001bFG", | ||
529 | ["min.s_3"] = "4600001cFGH", | ||
530 | ["mina.s_3"] = "4600001dFGH", | ||
531 | ["max.s_3"] = "4600001eFGH", | ||
532 | ["maxa.s_3"] = "4600001fFGH", | ||
533 | ["cmp.af.s_3"] = "46800000FGH", | ||
534 | ["cmp.un.s_3"] = "46800001FGH", | ||
535 | ["cmp.or.s_3"] = "46800011FGH", | ||
536 | ["cmp.eq.s_3"] = "46800002FGH", | ||
537 | ["cmp.une.s_3"] = "46800012FGH", | ||
538 | ["cmp.ueq.s_3"] = "46800003FGH", | ||
539 | ["cmp.ne.s_3"] = "46800013FGH", | ||
540 | ["cmp.lt.s_3"] = "46800004FGH", | ||
541 | ["cmp.ult.s_3"] = "46800005FGH", | ||
542 | ["cmp.le.s_3"] = "46800006FGH", | ||
543 | ["cmp.ule.s_3"] = "46800007FGH", | ||
544 | ["cmp.saf.s_3"] = "46800008FGH", | ||
545 | ["cmp.sun.s_3"] = "46800009FGH", | ||
546 | ["cmp.sor.s_3"] = "46800019FGH", | ||
547 | ["cmp.seq.s_3"] = "4680000aFGH", | ||
548 | ["cmp.sune.s_3"] = "4680001aFGH", | ||
549 | ["cmp.sueq.s_3"] = "4680000bFGH", | ||
550 | ["cmp.sne.s_3"] = "4680001bFGH", | ||
551 | ["cmp.slt.s_3"] = "4680000cFGH", | ||
552 | ["cmp.sult.s_3"] = "4680000dFGH", | ||
553 | ["cmp.sle.s_3"] = "4680000eFGH", | ||
554 | ["cmp.sule.s_3"] = "4680000fFGH", | ||
555 | |||
556 | ["sel.d_3"] = "46200010FGH", | ||
557 | ["seleqz.d_3"] = "46200014FGH", | ||
558 | ["selnez.d_3"] = "46200017FGH", | ||
559 | ["maddf.d_3"] = "46200018FGH", | ||
560 | ["msubf.d_3"] = "46200019FGH", | ||
561 | ["rint.d_2"] = "4620001aFG", | ||
562 | ["class.d_2"] = "4620001bFG", | ||
563 | ["min.d_3"] = "4620001cFGH", | ||
564 | ["mina.d_3"] = "4620001dFGH", | ||
565 | ["max.d_3"] = "4620001eFGH", | ||
566 | ["maxa.d_3"] = "4620001fFGH", | ||
567 | ["cmp.af.d_3"] = "46a00000FGH", | ||
568 | ["cmp.un.d_3"] = "46a00001FGH", | ||
569 | ["cmp.or.d_3"] = "46a00011FGH", | ||
570 | ["cmp.eq.d_3"] = "46a00002FGH", | ||
571 | ["cmp.une.d_3"] = "46a00012FGH", | ||
572 | ["cmp.ueq.d_3"] = "46a00003FGH", | ||
573 | ["cmp.ne.d_3"] = "46a00013FGH", | ||
574 | ["cmp.lt.d_3"] = "46a00004FGH", | ||
575 | ["cmp.ult.d_3"] = "46a00005FGH", | ||
576 | ["cmp.le.d_3"] = "46a00006FGH", | ||
577 | ["cmp.ule.d_3"] = "46a00007FGH", | ||
578 | ["cmp.saf.d_3"] = "46a00008FGH", | ||
579 | ["cmp.sun.d_3"] = "46a00009FGH", | ||
580 | ["cmp.sor.d_3"] = "46a00019FGH", | ||
581 | ["cmp.seq.d_3"] = "46a0000aFGH", | ||
582 | ["cmp.sune.d_3"] = "46a0001aFGH", | ||
583 | ["cmp.sueq.d_3"] = "46a0000bFGH", | ||
584 | ["cmp.sne.d_3"] = "46a0001bFGH", | ||
585 | ["cmp.slt.d_3"] = "46a0000cFGH", | ||
586 | ["cmp.sult.d_3"] = "46a0000dFGH", | ||
587 | ["cmp.sle.d_3"] = "46a0000eFGH", | ||
588 | ["cmp.sule.d_3"] = "46a0000fFGH", | ||
589 | |||
590 | }) do map_op[k] = v end | ||
591 | |||
592 | else -- Instructions removed by MIPSR6. | ||
593 | |||
594 | for k,v in pairs({ | ||
595 | -- Traps, don't use. | ||
596 | addi_3 = "20000000TSI", | ||
597 | daddi_3 = mips64 and "60000000TSI", | ||
598 | |||
599 | -- Branch on likely, don't use. | ||
600 | beqzl_2 = "50000000SB", | ||
601 | beql_3 = "50000000STB", | ||
602 | bnezl_2 = "54000000SB", | ||
603 | bnel_3 = "54000000STB", | ||
604 | blezl_2 = "58000000SB", | ||
605 | bgtzl_2 = "5c000000SB", | ||
606 | |||
607 | lwl_2 = "88000000TO", | ||
608 | lwr_2 = "98000000TO", | ||
609 | swl_2 = "a8000000TO", | ||
610 | sdl_2 = mips64 and "b0000000TO", | ||
611 | sdr_2 = mips64 and "b1000000TO", | ||
612 | swr_2 = "b8000000TO", | ||
613 | cache_2 = "bc000000NO", | ||
614 | ll_2 = "c0000000TO", | ||
615 | pref_2 = "cc000000NO", | ||
616 | sc_2 = "e0000000TO", | ||
617 | scd_2 = mips64 and "f0000000TO", | ||
618 | |||
619 | -- Opcode SPECIAL. | ||
620 | movf_2 = "00000001DS", | ||
621 | movf_3 = "00000001DSC", | ||
622 | movt_2 = "00010001DS", | ||
623 | movt_3 = "00010001DSC", | ||
624 | jr_1 = "00000008S", | ||
625 | movz_3 = "0000000aDST", | ||
626 | movn_3 = "0000000bDST", | ||
627 | mfhi_1 = "00000010D", | ||
628 | mthi_1 = "00000011S", | ||
629 | mflo_1 = "00000012D", | ||
630 | mtlo_1 = "00000013S", | ||
631 | mult_2 = "00000018ST", | ||
632 | multu_2 = "00000019ST", | ||
633 | div_3 = "0000001aST", | ||
634 | divu_3 = "0000001bST", | ||
635 | ddiv_3 = mips64 and "0000001eST", | ||
636 | ddivu_3 = mips64 and "0000001fST", | ||
637 | dmult_2 = mips64 and "0000001cST", | ||
638 | dmultu_2 = mips64 and "0000001dST", | ||
639 | |||
640 | -- Opcode REGIMM. | ||
641 | tgei_2 = "04080000SI", | ||
642 | tgeiu_2 = "04090000SI", | ||
643 | tlti_2 = "040a0000SI", | ||
644 | tltiu_2 = "040b0000SI", | ||
645 | teqi_2 = "040c0000SI", | ||
646 | tnei_2 = "040e0000SI", | ||
647 | bltzal_2 = "04100000SB", | ||
648 | bgezal_2 = "04110000SB", | ||
649 | bltzall_2 = "04120000SB", | ||
650 | bgezall_2 = "04130000SB", | ||
651 | |||
652 | -- Opcode SPECIAL2. | ||
653 | madd_2 = "70000000ST", | ||
654 | maddu_2 = "70000001ST", | ||
655 | mul_3 = "70000002DST", | ||
656 | msub_2 = "70000004ST", | ||
657 | msubu_2 = "70000005ST", | ||
658 | clz_2 = "70000020D=TS", | ||
659 | clo_2 = "70000021D=TS", | ||
660 | dclz_2 = mips64 and "70000024D=TS", | ||
661 | dclo_2 = mips64 and "70000025D=TS", | ||
662 | sdbbp_0 = "7000003f", | ||
663 | sdbbp_1 = "7000003fY", | ||
664 | |||
665 | -- Opcode COP1. | ||
666 | bc1f_1 = "45000000B", | ||
667 | bc1f_2 = "45000000CB", | ||
668 | bc1t_1 = "45010000B", | ||
669 | bc1t_2 = "45010000CB", | ||
670 | bc1fl_1 = "45020000B", | ||
671 | bc1fl_2 = "45020000CB", | ||
672 | bc1tl_1 = "45030000B", | ||
673 | bc1tl_2 = "45030000CB", | ||
674 | |||
675 | ["movf.s_2"] = "46000011FG", | ||
676 | ["movf.s_3"] = "46000011FGC", | ||
677 | ["movt.s_2"] = "46010011FG", | ||
678 | ["movt.s_3"] = "46010011FGC", | ||
679 | ["movz.s_3"] = "46000012FGT", | ||
680 | ["movn.s_3"] = "46000013FGT", | ||
681 | ["cvt.ps.s_3"] = "46000026FGH", | ||
682 | ["c.f.s_2"] = "46000030GH", | ||
683 | ["c.f.s_3"] = "46000030VGH", | ||
684 | ["c.un.s_2"] = "46000031GH", | ||
685 | ["c.un.s_3"] = "46000031VGH", | ||
686 | ["c.eq.s_2"] = "46000032GH", | ||
687 | ["c.eq.s_3"] = "46000032VGH", | ||
688 | ["c.ueq.s_2"] = "46000033GH", | ||
689 | ["c.ueq.s_3"] = "46000033VGH", | ||
690 | ["c.olt.s_2"] = "46000034GH", | ||
691 | ["c.olt.s_3"] = "46000034VGH", | ||
692 | ["c.ult.s_2"] = "46000035GH", | ||
693 | ["c.ult.s_3"] = "46000035VGH", | ||
694 | ["c.ole.s_2"] = "46000036GH", | ||
695 | ["c.ole.s_3"] = "46000036VGH", | ||
696 | ["c.ule.s_2"] = "46000037GH", | ||
697 | ["c.ule.s_3"] = "46000037VGH", | ||
698 | ["c.sf.s_2"] = "46000038GH", | ||
699 | ["c.sf.s_3"] = "46000038VGH", | ||
700 | ["c.ngle.s_2"] = "46000039GH", | ||
701 | ["c.ngle.s_3"] = "46000039VGH", | ||
702 | ["c.seq.s_2"] = "4600003aGH", | ||
703 | ["c.seq.s_3"] = "4600003aVGH", | ||
704 | ["c.ngl.s_2"] = "4600003bGH", | ||
705 | ["c.ngl.s_3"] = "4600003bVGH", | ||
706 | ["c.lt.s_2"] = "4600003cGH", | ||
707 | ["c.lt.s_3"] = "4600003cVGH", | ||
708 | ["c.nge.s_2"] = "4600003dGH", | ||
709 | ["c.nge.s_3"] = "4600003dVGH", | ||
710 | ["c.le.s_2"] = "4600003eGH", | ||
711 | ["c.le.s_3"] = "4600003eVGH", | ||
712 | ["c.ngt.s_2"] = "4600003fGH", | ||
713 | ["c.ngt.s_3"] = "4600003fVGH", | ||
714 | ["movf.d_2"] = "46200011FG", | ||
715 | ["movf.d_3"] = "46200011FGC", | ||
716 | ["movt.d_2"] = "46210011FG", | ||
717 | ["movt.d_3"] = "46210011FGC", | ||
718 | ["movz.d_3"] = "46200012FGT", | ||
719 | ["movn.d_3"] = "46200013FGT", | ||
720 | ["c.f.d_2"] = "46200030GH", | ||
721 | ["c.f.d_3"] = "46200030VGH", | ||
722 | ["c.un.d_2"] = "46200031GH", | ||
723 | ["c.un.d_3"] = "46200031VGH", | ||
724 | ["c.eq.d_2"] = "46200032GH", | ||
725 | ["c.eq.d_3"] = "46200032VGH", | ||
726 | ["c.ueq.d_2"] = "46200033GH", | ||
727 | ["c.ueq.d_3"] = "46200033VGH", | ||
728 | ["c.olt.d_2"] = "46200034GH", | ||
729 | ["c.olt.d_3"] = "46200034VGH", | ||
730 | ["c.ult.d_2"] = "46200035GH", | ||
731 | ["c.ult.d_3"] = "46200035VGH", | ||
732 | ["c.ole.d_2"] = "46200036GH", | ||
733 | ["c.ole.d_3"] = "46200036VGH", | ||
734 | ["c.ule.d_2"] = "46200037GH", | ||
735 | ["c.ule.d_3"] = "46200037VGH", | ||
736 | ["c.sf.d_2"] = "46200038GH", | ||
737 | ["c.sf.d_3"] = "46200038VGH", | ||
738 | ["c.ngle.d_2"] = "46200039GH", | ||
739 | ["c.ngle.d_3"] = "46200039VGH", | ||
740 | ["c.seq.d_2"] = "4620003aGH", | ||
741 | ["c.seq.d_3"] = "4620003aVGH", | ||
742 | ["c.ngl.d_2"] = "4620003bGH", | ||
743 | ["c.ngl.d_3"] = "4620003bVGH", | ||
744 | ["c.lt.d_2"] = "4620003cGH", | ||
745 | ["c.lt.d_3"] = "4620003cVGH", | ||
746 | ["c.nge.d_2"] = "4620003dGH", | ||
747 | ["c.nge.d_3"] = "4620003dVGH", | ||
748 | ["c.le.d_2"] = "4620003eGH", | ||
749 | ["c.le.d_3"] = "4620003eVGH", | ||
750 | ["c.ngt.d_2"] = "4620003fGH", | ||
751 | ["c.ngt.d_3"] = "4620003fVGH", | ||
752 | ["add.ps_3"] = "46c00000FGH", | ||
753 | ["sub.ps_3"] = "46c00001FGH", | ||
754 | ["mul.ps_3"] = "46c00002FGH", | ||
755 | ["abs.ps_2"] = "46c00005FG", | ||
756 | ["mov.ps_2"] = "46c00006FG", | ||
757 | ["neg.ps_2"] = "46c00007FG", | ||
758 | ["movf.ps_2"] = "46c00011FG", | ||
759 | ["movf.ps_3"] = "46c00011FGC", | ||
760 | ["movt.ps_2"] = "46c10011FG", | ||
761 | ["movt.ps_3"] = "46c10011FGC", | ||
762 | ["movz.ps_3"] = "46c00012FGT", | ||
763 | ["movn.ps_3"] = "46c00013FGT", | ||
764 | ["cvt.s.pu_2"] = "46c00020FG", | ||
765 | ["cvt.s.pl_2"] = "46c00028FG", | ||
766 | ["pll.ps_3"] = "46c0002cFGH", | ||
767 | ["plu.ps_3"] = "46c0002dFGH", | ||
768 | ["pul.ps_3"] = "46c0002eFGH", | ||
769 | ["puu.ps_3"] = "46c0002fFGH", | ||
770 | ["c.f.ps_2"] = "46c00030GH", | ||
771 | ["c.f.ps_3"] = "46c00030VGH", | ||
772 | ["c.un.ps_2"] = "46c00031GH", | ||
773 | ["c.un.ps_3"] = "46c00031VGH", | ||
774 | ["c.eq.ps_2"] = "46c00032GH", | ||
775 | ["c.eq.ps_3"] = "46c00032VGH", | ||
776 | ["c.ueq.ps_2"] = "46c00033GH", | ||
777 | ["c.ueq.ps_3"] = "46c00033VGH", | ||
778 | ["c.olt.ps_2"] = "46c00034GH", | ||
779 | ["c.olt.ps_3"] = "46c00034VGH", | ||
780 | ["c.ult.ps_2"] = "46c00035GH", | ||
781 | ["c.ult.ps_3"] = "46c00035VGH", | ||
782 | ["c.ole.ps_2"] = "46c00036GH", | ||
783 | ["c.ole.ps_3"] = "46c00036VGH", | ||
784 | ["c.ule.ps_2"] = "46c00037GH", | ||
785 | ["c.ule.ps_3"] = "46c00037VGH", | ||
786 | ["c.sf.ps_2"] = "46c00038GH", | ||
787 | ["c.sf.ps_3"] = "46c00038VGH", | ||
788 | ["c.ngle.ps_2"] = "46c00039GH", | ||
789 | ["c.ngle.ps_3"] = "46c00039VGH", | ||
790 | ["c.seq.ps_2"] = "46c0003aGH", | ||
791 | ["c.seq.ps_3"] = "46c0003aVGH", | ||
792 | ["c.ngl.ps_2"] = "46c0003bGH", | ||
793 | ["c.ngl.ps_3"] = "46c0003bVGH", | ||
794 | ["c.lt.ps_2"] = "46c0003cGH", | ||
795 | ["c.lt.ps_3"] = "46c0003cVGH", | ||
796 | ["c.nge.ps_2"] = "46c0003dGH", | ||
797 | ["c.nge.ps_3"] = "46c0003dVGH", | ||
798 | ["c.le.ps_2"] = "46c0003eGH", | ||
799 | ["c.le.ps_3"] = "46c0003eVGH", | ||
800 | ["c.ngt.ps_2"] = "46c0003fGH", | ||
801 | ["c.ngt.ps_3"] = "46c0003fVGH", | ||
802 | |||
803 | -- Opcode COP1X. | ||
804 | lwxc1_2 = "4c000000FX", | ||
805 | ldxc1_2 = "4c000001FX", | ||
806 | luxc1_2 = "4c000005FX", | ||
807 | swxc1_2 = "4c000008FX", | ||
808 | sdxc1_2 = "4c000009FX", | ||
809 | suxc1_2 = "4c00000dFX", | ||
810 | prefx_2 = "4c00000fMX", | ||
811 | ["alnv.ps_4"] = "4c00001eFGHS", | ||
812 | ["madd.s_4"] = "4c000020FRGH", | ||
813 | ["madd.d_4"] = "4c000021FRGH", | ||
814 | ["madd.ps_4"] = "4c000026FRGH", | ||
815 | ["msub.s_4"] = "4c000028FRGH", | ||
816 | ["msub.d_4"] = "4c000029FRGH", | ||
817 | ["msub.ps_4"] = "4c00002eFRGH", | ||
818 | ["nmadd.s_4"] = "4c000030FRGH", | ||
819 | ["nmadd.d_4"] = "4c000031FRGH", | ||
820 | ["nmadd.ps_4"] = "4c000036FRGH", | ||
821 | ["nmsub.s_4"] = "4c000038FRGH", | ||
822 | ["nmsub.d_4"] = "4c000039FRGH", | ||
823 | ["nmsub.ps_4"] = "4c00003eFRGH", | ||
824 | |||
825 | }) do map_op[k] = v end | ||
826 | |||
827 | end | ||
828 | |||
607 | ------------------------------------------------------------------------------ | 829 | ------------------------------------------------------------------------------ |
608 | 830 | ||
609 | local function parse_gpr(expr) | 831 | local function parse_gpr(expr) |
@@ -633,7 +855,7 @@ local function parse_fpr(expr) | |||
633 | werror("bad register name `"..expr.."'") | 855 | werror("bad register name `"..expr.."'") |
634 | end | 856 | end |
635 | 857 | ||
636 | local function parse_imm(imm, bits, shift, scale, signed) | 858 | local function parse_imm(imm, bits, shift, scale, signed, action) |
637 | local n = tonumber(imm) | 859 | local n = tonumber(imm) |
638 | if n then | 860 | if n then |
639 | local m = sar(n, scale) | 861 | local m = sar(n, scale) |
@@ -651,7 +873,8 @@ local function parse_imm(imm, bits, shift, scale, signed) | |||
651 | match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then | 873 | match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then |
652 | werror("expected immediate operand, got register") | 874 | werror("expected immediate operand, got register") |
653 | else | 875 | else |
654 | waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm) | 876 | waction(action or "IMM", |
877 | (signed and 32768 or 0)+shl(scale, 10)+shl(bits, 5)+shift, imm) | ||
655 | return 0 | 878 | return 0 |
656 | end | 879 | end |
657 | end | 880 | end |
@@ -756,13 +979,18 @@ map_op[".template__"] = function(params, template, nparams) | |||
756 | op = op + parse_disp(params[n]); n = n + 1 | 979 | op = op + parse_disp(params[n]); n = n + 1 |
757 | elseif p == "X" then | 980 | elseif p == "X" then |
758 | op = op + parse_index(params[n]); n = n + 1 | 981 | op = op + parse_index(params[n]); n = n + 1 |
759 | elseif p == "B" or p == "J" then | 982 | elseif p == "B" or p == "J" or p == "K" or p == "L" then |
760 | local mode, m, s = parse_label(params[n], false) | 983 | local mode, m, s = parse_label(params[n], false) |
761 | if p == "B" then m = m + 2048 end | 984 | if p == "J" then m = m + 0xa800 |
985 | elseif p == "K" then m = m + 0x5000 | ||
986 | elseif p == "L" then m = m + 0xa000 end | ||
762 | waction("REL_"..mode, m, s, 1) | 987 | waction("REL_"..mode, m, s, 1) |
763 | n = n + 1 | 988 | n = n + 1 |
764 | elseif p == "A" then | 989 | elseif p == "A" then |
765 | op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1 | 990 | op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1 |
991 | elseif p == "a" then | ||
992 | local m = parse_imm(params[n], 6, 6, 0, false, "IMMS"); n = n + 1 | ||
993 | op = op + band(m, 0x7c0) + band(shr(m, 9), 4) | ||
766 | elseif p == "M" then | 994 | elseif p == "M" then |
767 | op = op + parse_imm(params[n], 5, 11, 0, false); n = n + 1 | 995 | op = op + parse_imm(params[n], 5, 11, 0, false); n = n + 1 |
768 | elseif p == "N" then | 996 | elseif p == "N" then |
@@ -778,7 +1006,7 @@ map_op[".template__"] = function(params, template, nparams) | |||
778 | elseif p == "Z" then | 1006 | elseif p == "Z" then |
779 | op = op + parse_imm(params[n], 10, 6, 0, false); n = n + 1 | 1007 | op = op + parse_imm(params[n], 10, 6, 0, false); n = n + 1 |
780 | elseif p == "=" then | 1008 | elseif p == "=" then |
781 | op = op + shl(band(op, 0xf800), 5) -- Copy D to T for clz, clo. | 1009 | n = n - 1 -- Re-use previous parameter for next template char. |
782 | else | 1010 | else |
783 | assert(false) | 1011 | assert(false) |
784 | end | 1012 | end |
diff --git a/dynasm/dasm_mips64.lua b/dynasm/dasm_mips64.lua new file mode 100644 index 00000000..0aae291b --- /dev/null +++ b/dynasm/dasm_mips64.lua | |||
@@ -0,0 +1,12 @@ | |||
1 | ------------------------------------------------------------------------------ | ||
2 | -- DynASM MIPS64 module. | ||
3 | -- | ||
4 | -- Copyright (C) 2005-2020 Mike Pall. All rights reserved. | ||
5 | -- See dynasm.lua for full copyright notice. | ||
6 | ------------------------------------------------------------------------------ | ||
7 | -- This module just sets 64 bit mode for the combined MIPS/MIPS64 module. | ||
8 | -- All the interesting stuff is there. | ||
9 | ------------------------------------------------------------------------------ | ||
10 | |||
11 | mips64 = true -- Using a global is an ugly, but effective solution. | ||
12 | return require("dasm_mips") | ||
diff --git a/dynasm/dasm_ppc.h b/dynasm/dasm_ppc.h index 699d5c31..6e7cc7ab 100644 --- a/dynasm/dasm_ppc.h +++ b/dynasm/dasm_ppc.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | ** DynASM PPC encoding engine. | 2 | ** DynASM PPC/PPC64 encoding engine. |
3 | ** Copyright (C) 2005-2020 Mike Pall. All rights reserved. | 3 | ** Copyright (C) 2005-2020 Mike Pall. All rights reserved. |
4 | ** Released under the MIT license. See dynasm.lua for full copyright notice. | 4 | ** Released under the MIT license. See dynasm.lua for full copyright notice. |
5 | */ | 5 | */ |
@@ -21,7 +21,7 @@ enum { | |||
21 | /* The following actions need a buffer position. */ | 21 | /* The following actions need a buffer position. */ |
22 | DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, | 22 | DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, |
23 | /* The following actions also have an argument. */ | 23 | /* The following actions also have an argument. */ |
24 | DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, | 24 | DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMSH, |
25 | DASM__MAX | 25 | DASM__MAX |
26 | }; | 26 | }; |
27 | 27 | ||
@@ -244,6 +244,10 @@ void dasm_put(Dst_DECL, int start, ...) | |||
244 | #endif | 244 | #endif |
245 | b[pos++] = n; | 245 | b[pos++] = n; |
246 | break; | 246 | break; |
247 | case DASM_IMMSH: | ||
248 | CK((n >> 6) == 0, RANGE_I); | ||
249 | b[pos++] = n; | ||
250 | break; | ||
247 | } | 251 | } |
248 | } | 252 | } |
249 | } | 253 | } |
@@ -299,7 +303,7 @@ int dasm_link(Dst_DECL, size_t *szp) | |||
299 | case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; | 303 | case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; |
300 | case DASM_REL_LG: case DASM_REL_PC: pos++; break; | 304 | case DASM_REL_LG: case DASM_REL_PC: pos++; break; |
301 | case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; | 305 | case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; |
302 | case DASM_IMM: pos++; break; | 306 | case DASM_IMM: case DASM_IMMSH: pos++; break; |
303 | } | 307 | } |
304 | } | 308 | } |
305 | stop: (void)0; | 309 | stop: (void)0; |
@@ -367,6 +371,9 @@ int dasm_encode(Dst_DECL, void *buffer) | |||
367 | case DASM_IMM: | 371 | case DASM_IMM: |
368 | cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); | 372 | cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); |
369 | break; | 373 | break; |
374 | case DASM_IMMSH: | ||
375 | cp[-1] |= (ins & 1) ? ((n&31)<<11)|((n&32)>>4) : ((n&31)<<6)|(n&32); | ||
376 | break; | ||
370 | default: *cp++ = ins; break; | 377 | default: *cp++ = ins; break; |
371 | } | 378 | } |
372 | } | 379 | } |
diff --git a/dynasm/dasm_ppc.lua b/dynasm/dasm_ppc.lua index 05981760..20634e13 100644 --- a/dynasm/dasm_ppc.lua +++ b/dynasm/dasm_ppc.lua | |||
@@ -1,17 +1,19 @@ | |||
1 | ------------------------------------------------------------------------------ | 1 | ------------------------------------------------------------------------------ |
2 | -- DynASM PPC module. | 2 | -- DynASM PPC/PPC64 module. |
3 | -- | 3 | -- |
4 | -- Copyright (C) 2005-2020 Mike Pall. All rights reserved. | 4 | -- Copyright (C) 2005-2020 Mike Pall. All rights reserved. |
5 | -- See dynasm.lua for full copyright notice. | 5 | -- See dynasm.lua for full copyright notice. |
6 | -- | ||
7 | -- Support for various extensions contributed by Caio Souza Oliveira. | ||
6 | ------------------------------------------------------------------------------ | 8 | ------------------------------------------------------------------------------ |
7 | 9 | ||
8 | -- Module information: | 10 | -- Module information: |
9 | local _info = { | 11 | local _info = { |
10 | arch = "ppc", | 12 | arch = "ppc", |
11 | description = "DynASM PPC module", | 13 | description = "DynASM PPC module", |
12 | version = "1.3.0", | 14 | version = "1.4.0", |
13 | vernum = 10300, | 15 | vernum = 10400, |
14 | release = "2011-05-05", | 16 | release = "2015-10-18", |
15 | author = "Mike Pall", | 17 | author = "Mike Pall", |
16 | license = "MIT", | 18 | license = "MIT", |
17 | } | 19 | } |
@@ -39,7 +41,7 @@ local wline, werror, wfatal, wwarn | |||
39 | local action_names = { | 41 | local action_names = { |
40 | "STOP", "SECTION", "ESC", "REL_EXT", | 42 | "STOP", "SECTION", "ESC", "REL_EXT", |
41 | "ALIGN", "REL_LG", "LABEL_LG", | 43 | "ALIGN", "REL_LG", "LABEL_LG", |
42 | "REL_PC", "LABEL_PC", "IMM", | 44 | "REL_PC", "LABEL_PC", "IMM", "IMMSH" |
43 | } | 45 | } |
44 | 46 | ||
45 | -- Maximum number of section buffer positions for dasm_put(). | 47 | -- Maximum number of section buffer positions for dasm_put(). |
@@ -228,8 +230,18 @@ local map_cond = { | |||
228 | 230 | ||
229 | ------------------------------------------------------------------------------ | 231 | ------------------------------------------------------------------------------ |
230 | 232 | ||
233 | local map_op, op_template | ||
234 | |||
235 | local function op_alias(opname, f) | ||
236 | return function(params, nparams) | ||
237 | if not params then return "-> "..opname:sub(1, -3) end | ||
238 | f(params, nparams) | ||
239 | op_template(params, map_op[opname], nparams) | ||
240 | end | ||
241 | end | ||
242 | |||
231 | -- Template strings for PPC instructions. | 243 | -- Template strings for PPC instructions. |
232 | local map_op = { | 244 | map_op = { |
233 | tdi_3 = "08000000ARI", | 245 | tdi_3 = "08000000ARI", |
234 | twi_3 = "0c000000ARI", | 246 | twi_3 = "0c000000ARI", |
235 | mulli_3 = "1c000000RRI", | 247 | mulli_3 = "1c000000RRI", |
@@ -297,6 +309,250 @@ local map_op = { | |||
297 | std_2 = "f8000000RD", | 309 | std_2 = "f8000000RD", |
298 | stdu_2 = "f8000001RD", | 310 | stdu_2 = "f8000001RD", |
299 | 311 | ||
312 | subi_3 = op_alias("addi_3", function(p) p[3] = "-("..p[3]..")" end), | ||
313 | subis_3 = op_alias("addis_3", function(p) p[3] = "-("..p[3]..")" end), | ||
314 | subic_3 = op_alias("addic_3", function(p) p[3] = "-("..p[3]..")" end), | ||
315 | ["subic._3"] = op_alias("addic._3", function(p) p[3] = "-("..p[3]..")" end), | ||
316 | |||
317 | rotlwi_3 = op_alias("rlwinm_5", function(p) | ||
318 | p[4] = "0"; p[5] = "31" | ||
319 | end), | ||
320 | rotrwi_3 = op_alias("rlwinm_5", function(p) | ||
321 | p[3] = "32-("..p[3]..")"; p[4] = "0"; p[5] = "31" | ||
322 | end), | ||
323 | rotlw_3 = op_alias("rlwnm_5", function(p) | ||
324 | p[4] = "0"; p[5] = "31" | ||
325 | end), | ||
326 | slwi_3 = op_alias("rlwinm_5", function(p) | ||
327 | p[5] = "31-("..p[3]..")"; p[4] = "0" | ||
328 | end), | ||
329 | srwi_3 = op_alias("rlwinm_5", function(p) | ||
330 | p[4] = p[3]; p[3] = "32-("..p[3]..")"; p[5] = "31" | ||
331 | end), | ||
332 | clrlwi_3 = op_alias("rlwinm_5", function(p) | ||
333 | p[4] = p[3]; p[3] = "0"; p[5] = "31" | ||
334 | end), | ||
335 | clrrwi_3 = op_alias("rlwinm_5", function(p) | ||
336 | p[5] = "31-("..p[3]..")"; p[3] = "0"; p[4] = "0" | ||
337 | end), | ||
338 | |||
339 | -- Primary opcode 4: | ||
340 | mulhhwu_3 = "10000010RRR.", | ||
341 | machhwu_3 = "10000018RRR.", | ||
342 | mulhhw_3 = "10000050RRR.", | ||
343 | nmachhw_3 = "1000005cRRR.", | ||
344 | machhwsu_3 = "10000098RRR.", | ||
345 | machhws_3 = "100000d8RRR.", | ||
346 | nmachhws_3 = "100000dcRRR.", | ||
347 | mulchwu_3 = "10000110RRR.", | ||
348 | macchwu_3 = "10000118RRR.", | ||
349 | mulchw_3 = "10000150RRR.", | ||
350 | macchw_3 = "10000158RRR.", | ||
351 | nmacchw_3 = "1000015cRRR.", | ||
352 | macchwsu_3 = "10000198RRR.", | ||
353 | macchws_3 = "100001d8RRR.", | ||
354 | nmacchws_3 = "100001dcRRR.", | ||
355 | mullhw_3 = "10000350RRR.", | ||
356 | maclhw_3 = "10000358RRR.", | ||
357 | nmaclhw_3 = "1000035cRRR.", | ||
358 | maclhwsu_3 = "10000398RRR.", | ||
359 | maclhws_3 = "100003d8RRR.", | ||
360 | nmaclhws_3 = "100003dcRRR.", | ||
361 | machhwuo_3 = "10000418RRR.", | ||
362 | nmachhwo_3 = "1000045cRRR.", | ||
363 | machhwsuo_3 = "10000498RRR.", | ||
364 | machhwso_3 = "100004d8RRR.", | ||
365 | nmachhwso_3 = "100004dcRRR.", | ||
366 | macchwuo_3 = "10000518RRR.", | ||
367 | macchwo_3 = "10000558RRR.", | ||
368 | nmacchwo_3 = "1000055cRRR.", | ||
369 | macchwsuo_3 = "10000598RRR.", | ||
370 | macchwso_3 = "100005d8RRR.", | ||
371 | nmacchwso_3 = "100005dcRRR.", | ||
372 | maclhwo_3 = "10000758RRR.", | ||
373 | nmaclhwo_3 = "1000075cRRR.", | ||
374 | maclhwsuo_3 = "10000798RRR.", | ||
375 | maclhwso_3 = "100007d8RRR.", | ||
376 | nmaclhwso_3 = "100007dcRRR.", | ||
377 | |||
378 | vaddubm_3 = "10000000VVV", | ||
379 | vmaxub_3 = "10000002VVV", | ||
380 | vrlb_3 = "10000004VVV", | ||
381 | vcmpequb_3 = "10000006VVV", | ||
382 | vmuloub_3 = "10000008VVV", | ||
383 | vaddfp_3 = "1000000aVVV", | ||
384 | vmrghb_3 = "1000000cVVV", | ||
385 | vpkuhum_3 = "1000000eVVV", | ||
386 | vmhaddshs_4 = "10000020VVVV", | ||
387 | vmhraddshs_4 = "10000021VVVV", | ||
388 | vmladduhm_4 = "10000022VVVV", | ||
389 | vmsumubm_4 = "10000024VVVV", | ||
390 | vmsummbm_4 = "10000025VVVV", | ||
391 | vmsumuhm_4 = "10000026VVVV", | ||
392 | vmsumuhs_4 = "10000027VVVV", | ||
393 | vmsumshm_4 = "10000028VVVV", | ||
394 | vmsumshs_4 = "10000029VVVV", | ||
395 | vsel_4 = "1000002aVVVV", | ||
396 | vperm_4 = "1000002bVVVV", | ||
397 | vsldoi_4 = "1000002cVVVP", | ||
398 | vpermxor_4 = "1000002dVVVV", | ||
399 | vmaddfp_4 = "1000002eVVVV~", | ||
400 | vnmsubfp_4 = "1000002fVVVV~", | ||
401 | vaddeuqm_4 = "1000003cVVVV", | ||
402 | vaddecuq_4 = "1000003dVVVV", | ||
403 | vsubeuqm_4 = "1000003eVVVV", | ||
404 | vsubecuq_4 = "1000003fVVVV", | ||
405 | vadduhm_3 = "10000040VVV", | ||
406 | vmaxuh_3 = "10000042VVV", | ||
407 | vrlh_3 = "10000044VVV", | ||
408 | vcmpequh_3 = "10000046VVV", | ||
409 | vmulouh_3 = "10000048VVV", | ||
410 | vsubfp_3 = "1000004aVVV", | ||
411 | vmrghh_3 = "1000004cVVV", | ||
412 | vpkuwum_3 = "1000004eVVV", | ||
413 | vadduwm_3 = "10000080VVV", | ||
414 | vmaxuw_3 = "10000082VVV", | ||
415 | vrlw_3 = "10000084VVV", | ||
416 | vcmpequw_3 = "10000086VVV", | ||
417 | vmulouw_3 = "10000088VVV", | ||
418 | vmuluwm_3 = "10000089VVV", | ||
419 | vmrghw_3 = "1000008cVVV", | ||
420 | vpkuhus_3 = "1000008eVVV", | ||
421 | vaddudm_3 = "100000c0VVV", | ||
422 | vmaxud_3 = "100000c2VVV", | ||
423 | vrld_3 = "100000c4VVV", | ||
424 | vcmpeqfp_3 = "100000c6VVV", | ||
425 | vcmpequd_3 = "100000c7VVV", | ||
426 | vpkuwus_3 = "100000ceVVV", | ||
427 | vadduqm_3 = "10000100VVV", | ||
428 | vmaxsb_3 = "10000102VVV", | ||
429 | vslb_3 = "10000104VVV", | ||
430 | vmulosb_3 = "10000108VVV", | ||
431 | vrefp_2 = "1000010aV-V", | ||
432 | vmrglb_3 = "1000010cVVV", | ||
433 | vpkshus_3 = "1000010eVVV", | ||
434 | vaddcuq_3 = "10000140VVV", | ||
435 | vmaxsh_3 = "10000142VVV", | ||
436 | vslh_3 = "10000144VVV", | ||
437 | vmulosh_3 = "10000148VVV", | ||
438 | vrsqrtefp_2 = "1000014aV-V", | ||
439 | vmrglh_3 = "1000014cVVV", | ||
440 | vpkswus_3 = "1000014eVVV", | ||
441 | vaddcuw_3 = "10000180VVV", | ||
442 | vmaxsw_3 = "10000182VVV", | ||
443 | vslw_3 = "10000184VVV", | ||
444 | vmulosw_3 = "10000188VVV", | ||
445 | vexptefp_2 = "1000018aV-V", | ||
446 | vmrglw_3 = "1000018cVVV", | ||
447 | vpkshss_3 = "1000018eVVV", | ||
448 | vmaxsd_3 = "100001c2VVV", | ||
449 | vsl_3 = "100001c4VVV", | ||
450 | vcmpgefp_3 = "100001c6VVV", | ||
451 | vlogefp_2 = "100001caV-V", | ||
452 | vpkswss_3 = "100001ceVVV", | ||
453 | vadduhs_3 = "10000240VVV", | ||
454 | vminuh_3 = "10000242VVV", | ||
455 | vsrh_3 = "10000244VVV", | ||
456 | vcmpgtuh_3 = "10000246VVV", | ||
457 | vmuleuh_3 = "10000248VVV", | ||
458 | vrfiz_2 = "1000024aV-V", | ||
459 | vsplth_3 = "1000024cVV3", | ||
460 | vupkhsh_2 = "1000024eV-V", | ||
461 | vminuw_3 = "10000282VVV", | ||
462 | vminud_3 = "100002c2VVV", | ||
463 | vcmpgtud_3 = "100002c7VVV", | ||
464 | vrfim_2 = "100002caV-V", | ||
465 | vcmpgtsb_3 = "10000306VVV", | ||
466 | vcfux_3 = "1000030aVVA~", | ||
467 | vaddshs_3 = "10000340VVV", | ||
468 | vminsh_3 = "10000342VVV", | ||
469 | vsrah_3 = "10000344VVV", | ||
470 | vcmpgtsh_3 = "10000346VVV", | ||
471 | vmulesh_3 = "10000348VVV", | ||
472 | vcfsx_3 = "1000034aVVA~", | ||
473 | vspltish_2 = "1000034cVS", | ||
474 | vupkhpx_2 = "1000034eV-V", | ||
475 | vaddsws_3 = "10000380VVV", | ||
476 | vminsw_3 = "10000382VVV", | ||
477 | vsraw_3 = "10000384VVV", | ||
478 | vcmpgtsw_3 = "10000386VVV", | ||
479 | vmulesw_3 = "10000388VVV", | ||
480 | vctuxs_3 = "1000038aVVA~", | ||
481 | vspltisw_2 = "1000038cVS", | ||
482 | vminsd_3 = "100003c2VVV", | ||
483 | vsrad_3 = "100003c4VVV", | ||
484 | vcmpbfp_3 = "100003c6VVV", | ||
485 | vcmpgtsd_3 = "100003c7VVV", | ||
486 | vctsxs_3 = "100003caVVA~", | ||
487 | vupklpx_2 = "100003ceV-V", | ||
488 | vsububm_3 = "10000400VVV", | ||
489 | ["bcdadd._4"] = "10000401VVVy.", | ||
490 | vavgub_3 = "10000402VVV", | ||
491 | vand_3 = "10000404VVV", | ||
492 | ["vcmpequb._3"] = "10000406VVV", | ||
493 | vmaxfp_3 = "1000040aVVV", | ||
494 | vsubuhm_3 = "10000440VVV", | ||
495 | ["bcdsub._4"] = "10000441VVVy.", | ||
496 | vavguh_3 = "10000442VVV", | ||
497 | vandc_3 = "10000444VVV", | ||
498 | ["vcmpequh._3"] = "10000446VVV", | ||
499 | vminfp_3 = "1000044aVVV", | ||
500 | vpkudum_3 = "1000044eVVV", | ||
501 | vsubuwm_3 = "10000480VVV", | ||
502 | vavguw_3 = "10000482VVV", | ||
503 | vor_3 = "10000484VVV", | ||
504 | ["vcmpequw._3"] = "10000486VVV", | ||
505 | vpmsumw_3 = "10000488VVV", | ||
506 | ["vcmpeqfp._3"] = "100004c6VVV", | ||
507 | ["vcmpequd._3"] = "100004c7VVV", | ||
508 | vpkudus_3 = "100004ceVVV", | ||
509 | vavgsb_3 = "10000502VVV", | ||
510 | vavgsh_3 = "10000542VVV", | ||
511 | vorc_3 = "10000544VVV", | ||
512 | vbpermq_3 = "1000054cVVV", | ||
513 | vpksdus_3 = "1000054eVVV", | ||
514 | vavgsw_3 = "10000582VVV", | ||
515 | vsld_3 = "100005c4VVV", | ||
516 | ["vcmpgefp._3"] = "100005c6VVV", | ||
517 | vpksdss_3 = "100005ceVVV", | ||
518 | vsububs_3 = "10000600VVV", | ||
519 | mfvscr_1 = "10000604V--", | ||
520 | vsum4ubs_3 = "10000608VVV", | ||
521 | vsubuhs_3 = "10000640VVV", | ||
522 | mtvscr_1 = "10000644--V", | ||
523 | ["vcmpgtuh._3"] = "10000646VVV", | ||
524 | vsum4shs_3 = "10000648VVV", | ||
525 | vupkhsw_2 = "1000064eV-V", | ||
526 | vsubuws_3 = "10000680VVV", | ||
527 | vshasigmaw_4 = "10000682VVYp", | ||
528 | veqv_3 = "10000684VVV", | ||
529 | vsum2sws_3 = "10000688VVV", | ||
530 | vmrgow_3 = "1000068cVVV", | ||
531 | vshasigmad_4 = "100006c2VVYp", | ||
532 | vsrd_3 = "100006c4VVV", | ||
533 | ["vcmpgtud._3"] = "100006c7VVV", | ||
534 | vupklsw_2 = "100006ceV-V", | ||
535 | vupkslw_2 = "100006ceV-V", | ||
536 | vsubsbs_3 = "10000700VVV", | ||
537 | vclzb_2 = "10000702V-V", | ||
538 | vpopcntb_2 = "10000703V-V", | ||
539 | ["vcmpgtsb._3"] = "10000706VVV", | ||
540 | vsum4sbs_3 = "10000708VVV", | ||
541 | vsubshs_3 = "10000740VVV", | ||
542 | vclzh_2 = "10000742V-V", | ||
543 | vpopcnth_2 = "10000743V-V", | ||
544 | ["vcmpgtsh._3"] = "10000746VVV", | ||
545 | vsubsws_3 = "10000780VVV", | ||
546 | vclzw_2 = "10000782V-V", | ||
547 | vpopcntw_2 = "10000783V-V", | ||
548 | ["vcmpgtsw._3"] = "10000786VVV", | ||
549 | vsumsws_3 = "10000788VVV", | ||
550 | vmrgew_3 = "1000078cVVV", | ||
551 | vclzd_2 = "100007c2V-V", | ||
552 | vpopcntd_2 = "100007c3V-V", | ||
553 | ["vcmpbfp._3"] = "100007c6VVV", | ||
554 | ["vcmpgtsd._3"] = "100007c7VVV", | ||
555 | |||
300 | -- Primary opcode 19: | 556 | -- Primary opcode 19: |
301 | mcrf_2 = "4c000000XX", | 557 | mcrf_2 = "4c000000XX", |
302 | isync_0 = "4c00012c", | 558 | isync_0 = "4c00012c", |
@@ -316,6 +572,8 @@ local map_op = { | |||
316 | bclrl_2 = "4c000021AA", | 572 | bclrl_2 = "4c000021AA", |
317 | bcctr_2 = "4c000420AA", | 573 | bcctr_2 = "4c000420AA", |
318 | bcctrl_2 = "4c000421AA", | 574 | bcctrl_2 = "4c000421AA", |
575 | bctar_2 = "4c000460AA", | ||
576 | bctarl_2 = "4c000461AA", | ||
319 | blr_0 = "4e800020", | 577 | blr_0 = "4e800020", |
320 | blrl_0 = "4e800021", | 578 | blrl_0 = "4e800021", |
321 | bctr_0 = "4e800420", | 579 | bctr_0 = "4e800420", |
@@ -327,6 +585,7 @@ local map_op = { | |||
327 | cmpd_3 = "7c200000XRR", | 585 | cmpd_3 = "7c200000XRR", |
328 | cmpd_2 = "7c200000-RR", | 586 | cmpd_2 = "7c200000-RR", |
329 | tw_3 = "7c000008ARR", | 587 | tw_3 = "7c000008ARR", |
588 | lvsl_3 = "7c00000cVRR", | ||
330 | subfc_3 = "7c000010RRR.", | 589 | subfc_3 = "7c000010RRR.", |
331 | subc_3 = "7c000010RRR~.", | 590 | subc_3 = "7c000010RRR~.", |
332 | mulhdu_3 = "7c000012RRR.", | 591 | mulhdu_3 = "7c000012RRR.", |
@@ -351,50 +610,68 @@ local map_op = { | |||
351 | cmplw_2 = "7c000040-RR", | 610 | cmplw_2 = "7c000040-RR", |
352 | cmpld_3 = "7c200040XRR", | 611 | cmpld_3 = "7c200040XRR", |
353 | cmpld_2 = "7c200040-RR", | 612 | cmpld_2 = "7c200040-RR", |
613 | lvsr_3 = "7c00004cVRR", | ||
354 | subf_3 = "7c000050RRR.", | 614 | subf_3 = "7c000050RRR.", |
355 | sub_3 = "7c000050RRR~.", | 615 | sub_3 = "7c000050RRR~.", |
616 | lbarx_3 = "7c000068RR0R", | ||
356 | ldux_3 = "7c00006aRR0R", | 617 | ldux_3 = "7c00006aRR0R", |
357 | dcbst_2 = "7c00006c-RR", | 618 | dcbst_2 = "7c00006c-RR", |
358 | lwzux_3 = "7c00006eRR0R", | 619 | lwzux_3 = "7c00006eRR0R", |
359 | cntlzd_2 = "7c000074RR~", | 620 | cntlzd_2 = "7c000074RR~", |
360 | andc_3 = "7c000078RR~R.", | 621 | andc_3 = "7c000078RR~R.", |
361 | td_3 = "7c000088ARR", | 622 | td_3 = "7c000088ARR", |
623 | lvewx_3 = "7c00008eVRR", | ||
362 | mulhd_3 = "7c000092RRR.", | 624 | mulhd_3 = "7c000092RRR.", |
625 | addg6s_3 = "7c000094RRR", | ||
363 | mulhw_3 = "7c000096RRR.", | 626 | mulhw_3 = "7c000096RRR.", |
627 | dlmzb_3 = "7c00009cRR~R.", | ||
364 | ldarx_3 = "7c0000a8RR0R", | 628 | ldarx_3 = "7c0000a8RR0R", |
365 | dcbf_2 = "7c0000ac-RR", | 629 | dcbf_2 = "7c0000ac-RR", |
366 | lbzx_3 = "7c0000aeRR0R", | 630 | lbzx_3 = "7c0000aeRR0R", |
631 | lvx_3 = "7c0000ceVRR", | ||
367 | neg_2 = "7c0000d0RR.", | 632 | neg_2 = "7c0000d0RR.", |
633 | lharx_3 = "7c0000e8RR0R", | ||
368 | lbzux_3 = "7c0000eeRR0R", | 634 | lbzux_3 = "7c0000eeRR0R", |
369 | popcntb_2 = "7c0000f4RR~", | 635 | popcntb_2 = "7c0000f4RR~", |
370 | not_2 = "7c0000f8RR~%.", | 636 | not_2 = "7c0000f8RR~%.", |
371 | nor_3 = "7c0000f8RR~R.", | 637 | nor_3 = "7c0000f8RR~R.", |
638 | stvebx_3 = "7c00010eVRR", | ||
372 | subfe_3 = "7c000110RRR.", | 639 | subfe_3 = "7c000110RRR.", |
373 | sube_3 = "7c000110RRR~.", | 640 | sube_3 = "7c000110RRR~.", |
374 | adde_3 = "7c000114RRR.", | 641 | adde_3 = "7c000114RRR.", |
375 | stdx_3 = "7c00012aRR0R", | 642 | stdx_3 = "7c00012aRR0R", |
376 | stwcx_3 = "7c00012cRR0R.", | 643 | ["stwcx._3"] = "7c00012dRR0R.", |
377 | stwx_3 = "7c00012eRR0R", | 644 | stwx_3 = "7c00012eRR0R", |
378 | prtyw_2 = "7c000134RR~", | 645 | prtyw_2 = "7c000134RR~", |
646 | stvehx_3 = "7c00014eVRR", | ||
379 | stdux_3 = "7c00016aRR0R", | 647 | stdux_3 = "7c00016aRR0R", |
648 | ["stqcx._3"] = "7c00016dR:R0R.", | ||
380 | stwux_3 = "7c00016eRR0R", | 649 | stwux_3 = "7c00016eRR0R", |
381 | prtyd_2 = "7c000174RR~", | 650 | prtyd_2 = "7c000174RR~", |
651 | stvewx_3 = "7c00018eVRR", | ||
382 | subfze_2 = "7c000190RR.", | 652 | subfze_2 = "7c000190RR.", |
383 | addze_2 = "7c000194RR.", | 653 | addze_2 = "7c000194RR.", |
384 | stdcx_3 = "7c0001acRR0R.", | 654 | ["stdcx._3"] = "7c0001adRR0R.", |
385 | stbx_3 = "7c0001aeRR0R", | 655 | stbx_3 = "7c0001aeRR0R", |
656 | stvx_3 = "7c0001ceVRR", | ||
386 | subfme_2 = "7c0001d0RR.", | 657 | subfme_2 = "7c0001d0RR.", |
387 | mulld_3 = "7c0001d2RRR.", | 658 | mulld_3 = "7c0001d2RRR.", |
388 | addme_2 = "7c0001d4RR.", | 659 | addme_2 = "7c0001d4RR.", |
389 | mullw_3 = "7c0001d6RRR.", | 660 | mullw_3 = "7c0001d6RRR.", |
390 | dcbtst_2 = "7c0001ec-RR", | 661 | dcbtst_2 = "7c0001ec-RR", |
391 | stbux_3 = "7c0001eeRR0R", | 662 | stbux_3 = "7c0001eeRR0R", |
663 | bpermd_3 = "7c0001f8RR~R", | ||
664 | lvepxl_3 = "7c00020eVRR", | ||
392 | add_3 = "7c000214RRR.", | 665 | add_3 = "7c000214RRR.", |
666 | lqarx_3 = "7c000228R:R0R", | ||
393 | dcbt_2 = "7c00022c-RR", | 667 | dcbt_2 = "7c00022c-RR", |
394 | lhzx_3 = "7c00022eRR0R", | 668 | lhzx_3 = "7c00022eRR0R", |
669 | cdtbcd_2 = "7c000234RR~", | ||
395 | eqv_3 = "7c000238RR~R.", | 670 | eqv_3 = "7c000238RR~R.", |
671 | lvepx_3 = "7c00024eVRR", | ||
396 | eciwx_3 = "7c00026cRR0R", | 672 | eciwx_3 = "7c00026cRR0R", |
397 | lhzux_3 = "7c00026eRR0R", | 673 | lhzux_3 = "7c00026eRR0R", |
674 | cbcdtd_2 = "7c000274RR~", | ||
398 | xor_3 = "7c000278RR~R.", | 675 | xor_3 = "7c000278RR~R.", |
399 | mfspefscr_1 = "7c0082a6R", | 676 | mfspefscr_1 = "7c0082a6R", |
400 | mfxer_1 = "7c0102a6R", | 677 | mfxer_1 = "7c0102a6R", |
@@ -404,8 +681,12 @@ local map_op = { | |||
404 | lhax_3 = "7c0002aeRR0R", | 681 | lhax_3 = "7c0002aeRR0R", |
405 | mftb_1 = "7c0c42e6R", | 682 | mftb_1 = "7c0c42e6R", |
406 | mftbu_1 = "7c0d42e6R", | 683 | mftbu_1 = "7c0d42e6R", |
684 | lvxl_3 = "7c0002ceVRR", | ||
407 | lwaux_3 = "7c0002eaRR0R", | 685 | lwaux_3 = "7c0002eaRR0R", |
408 | lhaux_3 = "7c0002eeRR0R", | 686 | lhaux_3 = "7c0002eeRR0R", |
687 | popcntw_2 = "7c0002f4RR~", | ||
688 | divdeu_3 = "7c000312RRR.", | ||
689 | divweu_3 = "7c000316RRR.", | ||
409 | sthx_3 = "7c00032eRR0R", | 690 | sthx_3 = "7c00032eRR0R", |
410 | orc_3 = "7c000338RR~R.", | 691 | orc_3 = "7c000338RR~R.", |
411 | ecowx_3 = "7c00036cRR0R", | 692 | ecowx_3 = "7c00036cRR0R", |
@@ -420,10 +701,14 @@ local map_op = { | |||
420 | mtctr_1 = "7c0903a6R", | 701 | mtctr_1 = "7c0903a6R", |
421 | dcbi_2 = "7c0003ac-RR", | 702 | dcbi_2 = "7c0003ac-RR", |
422 | nand_3 = "7c0003b8RR~R.", | 703 | nand_3 = "7c0003b8RR~R.", |
704 | dsn_2 = "7c0003c6-RR", | ||
705 | stvxl_3 = "7c0003ceVRR", | ||
423 | divd_3 = "7c0003d2RRR.", | 706 | divd_3 = "7c0003d2RRR.", |
424 | divw_3 = "7c0003d6RRR.", | 707 | divw_3 = "7c0003d6RRR.", |
708 | popcntd_2 = "7c0003f4RR~", | ||
425 | cmpb_3 = "7c0003f8RR~R.", | 709 | cmpb_3 = "7c0003f8RR~R.", |
426 | mcrxr_1 = "7c000400X", | 710 | mcrxr_1 = "7c000400X", |
711 | lbdx_3 = "7c000406RRR", | ||
427 | subfco_3 = "7c000410RRR.", | 712 | subfco_3 = "7c000410RRR.", |
428 | subco_3 = "7c000410RRR~.", | 713 | subco_3 = "7c000410RRR~.", |
429 | addco_3 = "7c000414RRR.", | 714 | addco_3 = "7c000414RRR.", |
@@ -433,16 +718,20 @@ local map_op = { | |||
433 | lfsx_3 = "7c00042eFR0R", | 718 | lfsx_3 = "7c00042eFR0R", |
434 | srw_3 = "7c000430RR~R.", | 719 | srw_3 = "7c000430RR~R.", |
435 | srd_3 = "7c000436RR~R.", | 720 | srd_3 = "7c000436RR~R.", |
721 | lhdx_3 = "7c000446RRR", | ||
436 | subfo_3 = "7c000450RRR.", | 722 | subfo_3 = "7c000450RRR.", |
437 | subo_3 = "7c000450RRR~.", | 723 | subo_3 = "7c000450RRR~.", |
438 | lfsux_3 = "7c00046eFR0R", | 724 | lfsux_3 = "7c00046eFR0R", |
725 | lwdx_3 = "7c000486RRR", | ||
439 | lswi_3 = "7c0004aaRR0A", | 726 | lswi_3 = "7c0004aaRR0A", |
440 | sync_0 = "7c0004ac", | 727 | sync_0 = "7c0004ac", |
441 | lwsync_0 = "7c2004ac", | 728 | lwsync_0 = "7c2004ac", |
442 | ptesync_0 = "7c4004ac", | 729 | ptesync_0 = "7c4004ac", |
443 | lfdx_3 = "7c0004aeFR0R", | 730 | lfdx_3 = "7c0004aeFR0R", |
731 | lddx_3 = "7c0004c6RRR", | ||
444 | nego_2 = "7c0004d0RR.", | 732 | nego_2 = "7c0004d0RR.", |
445 | lfdux_3 = "7c0004eeFR0R", | 733 | lfdux_3 = "7c0004eeFR0R", |
734 | stbdx_3 = "7c000506RRR", | ||
446 | subfeo_3 = "7c000510RRR.", | 735 | subfeo_3 = "7c000510RRR.", |
447 | subeo_3 = "7c000510RRR~.", | 736 | subeo_3 = "7c000510RRR~.", |
448 | addeo_3 = "7c000514RRR.", | 737 | addeo_3 = "7c000514RRR.", |
@@ -450,27 +739,42 @@ local map_op = { | |||
450 | stswx_3 = "7c00052aRR0R", | 739 | stswx_3 = "7c00052aRR0R", |
451 | stwbrx_3 = "7c00052cRR0R", | 740 | stwbrx_3 = "7c00052cRR0R", |
452 | stfsx_3 = "7c00052eFR0R", | 741 | stfsx_3 = "7c00052eFR0R", |
742 | sthdx_3 = "7c000546RRR", | ||
743 | ["stbcx._3"] = "7c00056dRRR", | ||
453 | stfsux_3 = "7c00056eFR0R", | 744 | stfsux_3 = "7c00056eFR0R", |
745 | stwdx_3 = "7c000586RRR", | ||
454 | subfzeo_2 = "7c000590RR.", | 746 | subfzeo_2 = "7c000590RR.", |
455 | addzeo_2 = "7c000594RR.", | 747 | addzeo_2 = "7c000594RR.", |
456 | stswi_3 = "7c0005aaRR0A", | 748 | stswi_3 = "7c0005aaRR0A", |
749 | ["sthcx._3"] = "7c0005adRRR", | ||
457 | stfdx_3 = "7c0005aeFR0R", | 750 | stfdx_3 = "7c0005aeFR0R", |
751 | stddx_3 = "7c0005c6RRR", | ||
458 | subfmeo_2 = "7c0005d0RR.", | 752 | subfmeo_2 = "7c0005d0RR.", |
459 | mulldo_3 = "7c0005d2RRR.", | 753 | mulldo_3 = "7c0005d2RRR.", |
460 | addmeo_2 = "7c0005d4RR.", | 754 | addmeo_2 = "7c0005d4RR.", |
461 | mullwo_3 = "7c0005d6RRR.", | 755 | mullwo_3 = "7c0005d6RRR.", |
462 | dcba_2 = "7c0005ec-RR", | 756 | dcba_2 = "7c0005ec-RR", |
463 | stfdux_3 = "7c0005eeFR0R", | 757 | stfdux_3 = "7c0005eeFR0R", |
758 | stvepxl_3 = "7c00060eVRR", | ||
464 | addo_3 = "7c000614RRR.", | 759 | addo_3 = "7c000614RRR.", |
465 | lhbrx_3 = "7c00062cRR0R", | 760 | lhbrx_3 = "7c00062cRR0R", |
761 | lfdpx_3 = "7c00062eF:RR", | ||
466 | sraw_3 = "7c000630RR~R.", | 762 | sraw_3 = "7c000630RR~R.", |
467 | srad_3 = "7c000634RR~R.", | 763 | srad_3 = "7c000634RR~R.", |
764 | lfddx_3 = "7c000646FRR", | ||
765 | stvepx_3 = "7c00064eVRR", | ||
468 | srawi_3 = "7c000670RR~A.", | 766 | srawi_3 = "7c000670RR~A.", |
469 | sradi_3 = "7c000674RR~H.", | 767 | sradi_3 = "7c000674RR~H.", |
470 | eieio_0 = "7c0006ac", | 768 | eieio_0 = "7c0006ac", |
471 | lfiwax_3 = "7c0006aeFR0R", | 769 | lfiwax_3 = "7c0006aeFR0R", |
770 | divdeuo_3 = "7c000712RRR.", | ||
771 | divweuo_3 = "7c000716RRR.", | ||
472 | sthbrx_3 = "7c00072cRR0R", | 772 | sthbrx_3 = "7c00072cRR0R", |
773 | stfdpx_3 = "7c00072eF:RR", | ||
473 | extsh_2 = "7c000734RR~.", | 774 | extsh_2 = "7c000734RR~.", |
775 | stfddx_3 = "7c000746FRR", | ||
776 | divdeo_3 = "7c000752RRR.", | ||
777 | divweo_3 = "7c000756RRR.", | ||
474 | extsb_2 = "7c000774RR~.", | 778 | extsb_2 = "7c000774RR~.", |
475 | divduo_3 = "7c000792RRR.", | 779 | divduo_3 = "7c000792RRR.", |
476 | divwou_3 = "7c000796RRR.", | 780 | divwou_3 = "7c000796RRR.", |
@@ -481,6 +785,40 @@ local map_op = { | |||
481 | divwo_3 = "7c0007d6RRR.", | 785 | divwo_3 = "7c0007d6RRR.", |
482 | dcbz_2 = "7c0007ec-RR", | 786 | dcbz_2 = "7c0007ec-RR", |
483 | 787 | ||
788 | ["tbegin._1"] = "7c00051d1", | ||
789 | ["tbegin._0"] = "7c00051d", | ||
790 | ["tend._1"] = "7c00055dY", | ||
791 | ["tend._0"] = "7c00055d", | ||
792 | ["tendall._0"] = "7e00055d", | ||
793 | tcheck_1 = "7c00059cX", | ||
794 | ["tsr._1"] = "7c0005dd1", | ||
795 | ["tsuspend._0"] = "7c0005dd", | ||
796 | ["tresume._0"] = "7c2005dd", | ||
797 | ["tabortwc._3"] = "7c00061dARR", | ||
798 | ["tabortdc._3"] = "7c00065dARR", | ||
799 | ["tabortwci._3"] = "7c00069dARS", | ||
800 | ["tabortdci._3"] = "7c0006ddARS", | ||
801 | ["tabort._1"] = "7c00071d-R-", | ||
802 | ["treclaim._1"] = "7c00075d-R", | ||
803 | ["trechkpt._0"] = "7c0007dd", | ||
804 | |||
805 | lxsiwzx_3 = "7c000018QRR", | ||
806 | lxsiwax_3 = "7c000098QRR", | ||
807 | mfvsrd_2 = "7c000066-Rq", | ||
808 | mfvsrwz_2 = "7c0000e6-Rq", | ||
809 | stxsiwx_3 = "7c000118QRR", | ||
810 | mtvsrd_2 = "7c000166QR", | ||
811 | mtvsrwa_2 = "7c0001a6QR", | ||
812 | lxvdsx_3 = "7c000298QRR", | ||
813 | lxsspx_3 = "7c000418QRR", | ||
814 | lxsdx_3 = "7c000498QRR", | ||
815 | stxsspx_3 = "7c000518QRR", | ||
816 | stxsdx_3 = "7c000598QRR", | ||
817 | lxvw4x_3 = "7c000618QRR", | ||
818 | lxvd2x_3 = "7c000698QRR", | ||
819 | stxvw4x_3 = "7c000718QRR", | ||
820 | stxvd2x_3 = "7c000798QRR", | ||
821 | |||
484 | -- Primary opcode 30: | 822 | -- Primary opcode 30: |
485 | rldicl_4 = "78000000RR~HM.", | 823 | rldicl_4 = "78000000RR~HM.", |
486 | rldicr_4 = "78000004RR~HM.", | 824 | rldicr_4 = "78000004RR~HM.", |
@@ -489,6 +827,34 @@ local map_op = { | |||
489 | rldcl_4 = "78000010RR~RM.", | 827 | rldcl_4 = "78000010RR~RM.", |
490 | rldcr_4 = "78000012RR~RM.", | 828 | rldcr_4 = "78000012RR~RM.", |
491 | 829 | ||
830 | rotldi_3 = op_alias("rldicl_4", function(p) | ||
831 | p[4] = "0" | ||
832 | end), | ||
833 | rotrdi_3 = op_alias("rldicl_4", function(p) | ||
834 | p[3] = "64-("..p[3]..")"; p[4] = "0" | ||
835 | end), | ||
836 | rotld_3 = op_alias("rldcl_4", function(p) | ||
837 | p[4] = "0" | ||
838 | end), | ||
839 | sldi_3 = op_alias("rldicr_4", function(p) | ||
840 | p[4] = "63-("..p[3]..")" | ||
841 | end), | ||
842 | srdi_3 = op_alias("rldicl_4", function(p) | ||
843 | p[4] = p[3]; p[3] = "64-("..p[3]..")" | ||
844 | end), | ||
845 | clrldi_3 = op_alias("rldicl_4", function(p) | ||
846 | p[4] = p[3]; p[3] = "0" | ||
847 | end), | ||
848 | clrrdi_3 = op_alias("rldicr_4", function(p) | ||
849 | p[4] = "63-("..p[3]..")"; p[3] = "0" | ||
850 | end), | ||
851 | |||
852 | -- Primary opcode 56: | ||
853 | lq_2 = "e0000000R:D", -- NYI: displacement must be divisible by 8. | ||
854 | |||
855 | -- Primary opcode 57: | ||
856 | lfdp_2 = "e4000000F:D", -- NYI: displacement must be divisible by 4. | ||
857 | |||
492 | -- Primary opcode 59: | 858 | -- Primary opcode 59: |
493 | fdivs_3 = "ec000024FFF.", | 859 | fdivs_3 = "ec000024FFF.", |
494 | fsubs_3 = "ec000028FFF.", | 860 | fsubs_3 = "ec000028FFF.", |
@@ -501,6 +867,200 @@ local map_op = { | |||
501 | fmadds_4 = "ec00003aFFFF~.", | 867 | fmadds_4 = "ec00003aFFFF~.", |
502 | fnmsubs_4 = "ec00003cFFFF~.", | 868 | fnmsubs_4 = "ec00003cFFFF~.", |
503 | fnmadds_4 = "ec00003eFFFF~.", | 869 | fnmadds_4 = "ec00003eFFFF~.", |
870 | fcfids_2 = "ec00069cF-F.", | ||
871 | fcfidus_2 = "ec00079cF-F.", | ||
872 | |||
873 | dadd_3 = "ec000004FFF.", | ||
874 | dqua_4 = "ec000006FFFZ.", | ||
875 | dmul_3 = "ec000044FFF.", | ||
876 | drrnd_4 = "ec000046FFFZ.", | ||
877 | dscli_3 = "ec000084FF6.", | ||
878 | dquai_4 = "ec000086SF~FZ.", | ||
879 | dscri_3 = "ec0000c4FF6.", | ||
880 | drintx_4 = "ec0000c61F~FZ.", | ||
881 | dcmpo_3 = "ec000104XFF", | ||
882 | dtstex_3 = "ec000144XFF", | ||
883 | dtstdc_3 = "ec000184XF6", | ||
884 | dtstdg_3 = "ec0001c4XF6", | ||
885 | drintn_4 = "ec0001c61F~FZ.", | ||
886 | dctdp_2 = "ec000204F-F.", | ||
887 | dctfix_2 = "ec000244F-F.", | ||
888 | ddedpd_3 = "ec000284ZF~F.", | ||
889 | dxex_2 = "ec0002c4F-F.", | ||
890 | dsub_3 = "ec000404FFF.", | ||
891 | ddiv_3 = "ec000444FFF.", | ||
892 | dcmpu_3 = "ec000504XFF", | ||
893 | dtstsf_3 = "ec000544XFF", | ||
894 | drsp_2 = "ec000604F-F.", | ||
895 | dcffix_2 = "ec000644F-F.", | ||
896 | denbcd_3 = "ec000684YF~F.", | ||
897 | diex_3 = "ec0006c4FFF.", | ||
898 | |||
899 | -- Primary opcode 60: | ||
900 | xsaddsp_3 = "f0000000QQQ", | ||
901 | xsmaddasp_3 = "f0000008QQQ", | ||
902 | xxsldwi_4 = "f0000010QQQz", | ||
903 | xsrsqrtesp_2 = "f0000028Q-Q", | ||
904 | xssqrtsp_2 = "f000002cQ-Q", | ||
905 | xxsel_4 = "f0000030QQQQ", | ||
906 | xssubsp_3 = "f0000040QQQ", | ||
907 | xsmaddmsp_3 = "f0000048QQQ", | ||
908 | xxpermdi_4 = "f0000050QQQz", | ||
909 | xsresp_2 = "f0000068Q-Q", | ||
910 | xsmulsp_3 = "f0000080QQQ", | ||
911 | xsmsubasp_3 = "f0000088QQQ", | ||
912 | xxmrghw_3 = "f0000090QQQ", | ||
913 | xsdivsp_3 = "f00000c0QQQ", | ||
914 | xsmsubmsp_3 = "f00000c8QQQ", | ||
915 | xsadddp_3 = "f0000100QQQ", | ||
916 | xsmaddadp_3 = "f0000108QQQ", | ||
917 | xscmpudp_3 = "f0000118XQQ", | ||
918 | xscvdpuxws_2 = "f0000120Q-Q", | ||
919 | xsrdpi_2 = "f0000124Q-Q", | ||
920 | xsrsqrtedp_2 = "f0000128Q-Q", | ||
921 | xssqrtdp_2 = "f000012cQ-Q", | ||
922 | xssubdp_3 = "f0000140QQQ", | ||
923 | xsmaddmdp_3 = "f0000148QQQ", | ||
924 | xscmpodp_3 = "f0000158XQQ", | ||
925 | xscvdpsxws_2 = "f0000160Q-Q", | ||
926 | xsrdpiz_2 = "f0000164Q-Q", | ||
927 | xsredp_2 = "f0000168Q-Q", | ||
928 | xsmuldp_3 = "f0000180QQQ", | ||
929 | xsmsubadp_3 = "f0000188QQQ", | ||
930 | xxmrglw_3 = "f0000190QQQ", | ||
931 | xsrdpip_2 = "f00001a4Q-Q", | ||
932 | xstsqrtdp_2 = "f00001a8X-Q", | ||
933 | xsrdpic_2 = "f00001acQ-Q", | ||
934 | xsdivdp_3 = "f00001c0QQQ", | ||
935 | xsmsubmdp_3 = "f00001c8QQQ", | ||
936 | xsrdpim_2 = "f00001e4Q-Q", | ||
937 | xstdivdp_3 = "f00001e8XQQ", | ||
938 | xvaddsp_3 = "f0000200QQQ", | ||
939 | xvmaddasp_3 = "f0000208QQQ", | ||
940 | xvcmpeqsp_3 = "f0000218QQQ", | ||
941 | xvcvspuxws_2 = "f0000220Q-Q", | ||
942 | xvrspi_2 = "f0000224Q-Q", | ||
943 | xvrsqrtesp_2 = "f0000228Q-Q", | ||
944 | xvsqrtsp_2 = "f000022cQ-Q", | ||
945 | xvsubsp_3 = "f0000240QQQ", | ||
946 | xvmaddmsp_3 = "f0000248QQQ", | ||
947 | xvcmpgtsp_3 = "f0000258QQQ", | ||
948 | xvcvspsxws_2 = "f0000260Q-Q", | ||
949 | xvrspiz_2 = "f0000264Q-Q", | ||
950 | xvresp_2 = "f0000268Q-Q", | ||
951 | xvmulsp_3 = "f0000280QQQ", | ||
952 | xvmsubasp_3 = "f0000288QQQ", | ||
953 | xxspltw_3 = "f0000290QQg~", | ||
954 | xvcmpgesp_3 = "f0000298QQQ", | ||
955 | xvcvuxwsp_2 = "f00002a0Q-Q", | ||
956 | xvrspip_2 = "f00002a4Q-Q", | ||
957 | xvtsqrtsp_2 = "f00002a8X-Q", | ||
958 | xvrspic_2 = "f00002acQ-Q", | ||
959 | xvdivsp_3 = "f00002c0QQQ", | ||
960 | xvmsubmsp_3 = "f00002c8QQQ", | ||
961 | xvcvsxwsp_2 = "f00002e0Q-Q", | ||
962 | xvrspim_2 = "f00002e4Q-Q", | ||
963 | xvtdivsp_3 = "f00002e8XQQ", | ||
964 | xvadddp_3 = "f0000300QQQ", | ||
965 | xvmaddadp_3 = "f0000308QQQ", | ||
966 | xvcmpeqdp_3 = "f0000318QQQ", | ||
967 | xvcvdpuxws_2 = "f0000320Q-Q", | ||
968 | xvrdpi_2 = "f0000324Q-Q", | ||
969 | xvrsqrtedp_2 = "f0000328Q-Q", | ||
970 | xvsqrtdp_2 = "f000032cQ-Q", | ||
971 | xvsubdp_3 = "f0000340QQQ", | ||
972 | xvmaddmdp_3 = "f0000348QQQ", | ||
973 | xvcmpgtdp_3 = "f0000358QQQ", | ||
974 | xvcvdpsxws_2 = "f0000360Q-Q", | ||
975 | xvrdpiz_2 = "f0000364Q-Q", | ||
976 | xvredp_2 = "f0000368Q-Q", | ||
977 | xvmuldp_3 = "f0000380QQQ", | ||
978 | xvmsubadp_3 = "f0000388QQQ", | ||
979 | xvcmpgedp_3 = "f0000398QQQ", | ||
980 | xvcvuxwdp_2 = "f00003a0Q-Q", | ||
981 | xvrdpip_2 = "f00003a4Q-Q", | ||
982 | xvtsqrtdp_2 = "f00003a8X-Q", | ||
983 | xvrdpic_2 = "f00003acQ-Q", | ||
984 | xvdivdp_3 = "f00003c0QQQ", | ||
985 | xvmsubmdp_3 = "f00003c8QQQ", | ||
986 | xvcvsxwdp_2 = "f00003e0Q-Q", | ||
987 | xvrdpim_2 = "f00003e4Q-Q", | ||
988 | xvtdivdp_3 = "f00003e8XQQ", | ||
989 | xsnmaddasp_3 = "f0000408QQQ", | ||
990 | xxland_3 = "f0000410QQQ", | ||
991 | xscvdpsp_2 = "f0000424Q-Q", | ||
992 | xscvdpspn_2 = "f000042cQ-Q", | ||
993 | xsnmaddmsp_3 = "f0000448QQQ", | ||
994 | xxlandc_3 = "f0000450QQQ", | ||
995 | xsrsp_2 = "f0000464Q-Q", | ||
996 | xsnmsubasp_3 = "f0000488QQQ", | ||
997 | xxlor_3 = "f0000490QQQ", | ||
998 | xscvuxdsp_2 = "f00004a0Q-Q", | ||
999 | xsnmsubmsp_3 = "f00004c8QQQ", | ||
1000 | xxlxor_3 = "f00004d0QQQ", | ||
1001 | xscvsxdsp_2 = "f00004e0Q-Q", | ||
1002 | xsmaxdp_3 = "f0000500QQQ", | ||
1003 | xsnmaddadp_3 = "f0000508QQQ", | ||
1004 | xxlnor_3 = "f0000510QQQ", | ||
1005 | xscvdpuxds_2 = "f0000520Q-Q", | ||
1006 | xscvspdp_2 = "f0000524Q-Q", | ||
1007 | xscvspdpn_2 = "f000052cQ-Q", | ||
1008 | xsmindp_3 = "f0000540QQQ", | ||
1009 | xsnmaddmdp_3 = "f0000548QQQ", | ||
1010 | xxlorc_3 = "f0000550QQQ", | ||
1011 | xscvdpsxds_2 = "f0000560Q-Q", | ||
1012 | xsabsdp_2 = "f0000564Q-Q", | ||
1013 | xscpsgndp_3 = "f0000580QQQ", | ||
1014 | xsnmsubadp_3 = "f0000588QQQ", | ||
1015 | xxlnand_3 = "f0000590QQQ", | ||
1016 | xscvuxddp_2 = "f00005a0Q-Q", | ||
1017 | xsnabsdp_2 = "f00005a4Q-Q", | ||
1018 | xsnmsubmdp_3 = "f00005c8QQQ", | ||
1019 | xxleqv_3 = "f00005d0QQQ", | ||
1020 | xscvsxddp_2 = "f00005e0Q-Q", | ||
1021 | xsnegdp_2 = "f00005e4Q-Q", | ||
1022 | xvmaxsp_3 = "f0000600QQQ", | ||
1023 | xvnmaddasp_3 = "f0000608QQQ", | ||
1024 | ["xvcmpeqsp._3"] = "f0000618QQQ", | ||
1025 | xvcvspuxds_2 = "f0000620Q-Q", | ||
1026 | xvcvdpsp_2 = "f0000624Q-Q", | ||
1027 | xvminsp_3 = "f0000640QQQ", | ||
1028 | xvnmaddmsp_3 = "f0000648QQQ", | ||
1029 | ["xvcmpgtsp._3"] = "f0000658QQQ", | ||
1030 | xvcvspsxds_2 = "f0000660Q-Q", | ||
1031 | xvabssp_2 = "f0000664Q-Q", | ||
1032 | xvcpsgnsp_3 = "f0000680QQQ", | ||
1033 | xvnmsubasp_3 = "f0000688QQQ", | ||
1034 | ["xvcmpgesp._3"] = "f0000698QQQ", | ||
1035 | xvcvuxdsp_2 = "f00006a0Q-Q", | ||
1036 | xvnabssp_2 = "f00006a4Q-Q", | ||
1037 | xvnmsubmsp_3 = "f00006c8QQQ", | ||
1038 | xvcvsxdsp_2 = "f00006e0Q-Q", | ||
1039 | xvnegsp_2 = "f00006e4Q-Q", | ||
1040 | xvmaxdp_3 = "f0000700QQQ", | ||
1041 | xvnmaddadp_3 = "f0000708QQQ", | ||
1042 | ["xvcmpeqdp._3"] = "f0000718QQQ", | ||
1043 | xvcvdpuxds_2 = "f0000720Q-Q", | ||
1044 | xvcvspdp_2 = "f0000724Q-Q", | ||
1045 | xvmindp_3 = "f0000740QQQ", | ||
1046 | xvnmaddmdp_3 = "f0000748QQQ", | ||
1047 | ["xvcmpgtdp._3"] = "f0000758QQQ", | ||
1048 | xvcvdpsxds_2 = "f0000760Q-Q", | ||
1049 | xvabsdp_2 = "f0000764Q-Q", | ||
1050 | xvcpsgndp_3 = "f0000780QQQ", | ||
1051 | xvnmsubadp_3 = "f0000788QQQ", | ||
1052 | ["xvcmpgedp._3"] = "f0000798QQQ", | ||
1053 | xvcvuxddp_2 = "f00007a0Q-Q", | ||
1054 | xvnabsdp_2 = "f00007a4Q-Q", | ||
1055 | xvnmsubmdp_3 = "f00007c8QQQ", | ||
1056 | xvcvsxddp_2 = "f00007e0Q-Q", | ||
1057 | xvnegdp_2 = "f00007e4Q-Q", | ||
1058 | |||
1059 | -- Primary opcode 61: | ||
1060 | stfdp_2 = "f4000000F:D", -- NYI: displacement must be divisible by 4. | ||
1061 | |||
1062 | -- Primary opcode 62: | ||
1063 | stq_2 = "f8000002R:D", -- NYI: displacement must be divisible by 8. | ||
504 | 1064 | ||
505 | -- Primary opcode 63: | 1065 | -- Primary opcode 63: |
506 | fdiv_3 = "fc000024FFF.", | 1066 | fdiv_3 = "fc000024FFF.", |
@@ -526,8 +1086,12 @@ local map_op = { | |||
526 | frsp_2 = "fc000018F-F.", | 1086 | frsp_2 = "fc000018F-F.", |
527 | fctiw_2 = "fc00001cF-F.", | 1087 | fctiw_2 = "fc00001cF-F.", |
528 | fctiwz_2 = "fc00001eF-F.", | 1088 | fctiwz_2 = "fc00001eF-F.", |
1089 | ftdiv_2 = "fc000100X-F.", | ||
1090 | fctiwu_2 = "fc00011cF-F.", | ||
1091 | fctiwuz_2 = "fc00011eF-F.", | ||
529 | mtfsfi_2 = "fc00010cAA", -- NYI: upshift. | 1092 | mtfsfi_2 = "fc00010cAA", -- NYI: upshift. |
530 | fnabs_2 = "fc000110F-F.", | 1093 | fnabs_2 = "fc000110F-F.", |
1094 | ftsqrt_2 = "fc000140X-F.", | ||
531 | fabs_2 = "fc000210F-F.", | 1095 | fabs_2 = "fc000210F-F.", |
532 | frin_2 = "fc000310F-F.", | 1096 | frin_2 = "fc000310F-F.", |
533 | friz_2 = "fc000350F-F.", | 1097 | friz_2 = "fc000350F-F.", |
@@ -537,7 +1101,38 @@ local map_op = { | |||
537 | -- NYI: mtfsf, mtfsb0, mtfsb1. | 1101 | -- NYI: mtfsf, mtfsb0, mtfsb1. |
538 | fctid_2 = "fc00065cF-F.", | 1102 | fctid_2 = "fc00065cF-F.", |
539 | fctidz_2 = "fc00065eF-F.", | 1103 | fctidz_2 = "fc00065eF-F.", |
1104 | fmrgow_3 = "fc00068cFFF", | ||
540 | fcfid_2 = "fc00069cF-F.", | 1105 | fcfid_2 = "fc00069cF-F.", |
1106 | fctidu_2 = "fc00075cF-F.", | ||
1107 | fctiduz_2 = "fc00075eF-F.", | ||
1108 | fmrgew_3 = "fc00078cFFF", | ||
1109 | fcfidu_2 = "fc00079cF-F.", | ||
1110 | |||
1111 | daddq_3 = "fc000004F:F:F:.", | ||
1112 | dquaq_4 = "fc000006F:F:F:Z.", | ||
1113 | dmulq_3 = "fc000044F:F:F:.", | ||
1114 | drrndq_4 = "fc000046F:F:F:Z.", | ||
1115 | dscliq_3 = "fc000084F:F:6.", | ||
1116 | dquaiq_4 = "fc000086SF:~F:Z.", | ||
1117 | dscriq_3 = "fc0000c4F:F:6.", | ||
1118 | drintxq_4 = "fc0000c61F:~F:Z.", | ||
1119 | dcmpoq_3 = "fc000104XF:F:", | ||
1120 | dtstexq_3 = "fc000144XF:F:", | ||
1121 | dtstdcq_3 = "fc000184XF:6", | ||
1122 | dtstdgq_3 = "fc0001c4XF:6", | ||
1123 | drintnq_4 = "fc0001c61F:~F:Z.", | ||
1124 | dctqpq_2 = "fc000204F:-F:.", | ||
1125 | dctfixq_2 = "fc000244F:-F:.", | ||
1126 | ddedpdq_3 = "fc000284ZF:~F:.", | ||
1127 | dxexq_2 = "fc0002c4F:-F:.", | ||
1128 | dsubq_3 = "fc000404F:F:F:.", | ||
1129 | ddivq_3 = "fc000444F:F:F:.", | ||
1130 | dcmpuq_3 = "fc000504XF:F:", | ||
1131 | dtstsfq_3 = "fc000544XF:F:", | ||
1132 | drdpq_2 = "fc000604F:-F:.", | ||
1133 | dcffixq_2 = "fc000644F:-F:.", | ||
1134 | denbcdq_3 = "fc000684YF:~F:.", | ||
1135 | diexq_3 = "fc0006c4F:FF:.", | ||
541 | 1136 | ||
542 | -- Primary opcode 4, SPE APU extension: | 1137 | -- Primary opcode 4, SPE APU extension: |
543 | evaddw_3 = "10000200RRR", | 1138 | evaddw_3 = "10000200RRR", |
@@ -822,7 +1417,7 @@ local map_op = { | |||
822 | do | 1417 | do |
823 | local t = {} | 1418 | local t = {} |
824 | for k,v in pairs(map_op) do | 1419 | for k,v in pairs(map_op) do |
825 | if sub(v, -1) == "." then | 1420 | if type(v) == "string" and sub(v, -1) == "." then |
826 | local v2 = sub(v, 1, 7)..char(byte(v, 8)+1)..sub(v, 9, -2) | 1421 | local v2 = sub(v, 1, 7)..char(byte(v, 8)+1)..sub(v, 9, -2) |
827 | t[sub(k, 1, -3).."."..sub(k, -2)] = v2 | 1422 | t[sub(k, 1, -3).."."..sub(k, -2)] = v2 |
828 | end | 1423 | end |
@@ -884,6 +1479,24 @@ local function parse_fpr(expr) | |||
884 | werror("bad register name `"..expr.."'") | 1479 | werror("bad register name `"..expr.."'") |
885 | end | 1480 | end |
886 | 1481 | ||
1482 | local function parse_vr(expr) | ||
1483 | local r = match(expr, "^v([1-3]?[0-9])$") | ||
1484 | if r then | ||
1485 | r = tonumber(r) | ||
1486 | if r <= 31 then return r end | ||
1487 | end | ||
1488 | werror("bad register name `"..expr.."'") | ||
1489 | end | ||
1490 | |||
1491 | local function parse_vs(expr) | ||
1492 | local r = match(expr, "^vs([1-6]?[0-9])$") | ||
1493 | if r then | ||
1494 | r = tonumber(r) | ||
1495 | if r <= 63 then return r end | ||
1496 | end | ||
1497 | werror("bad register name `"..expr.."'") | ||
1498 | end | ||
1499 | |||
887 | local function parse_cr(expr) | 1500 | local function parse_cr(expr) |
888 | local r = match(expr, "^cr([0-7])$") | 1501 | local r = match(expr, "^cr([0-7])$") |
889 | if r then return tonumber(r) end | 1502 | if r then return tonumber(r) end |
@@ -900,8 +1513,30 @@ local function parse_cond(expr) | |||
900 | werror("bad condition bit name `"..expr.."'") | 1513 | werror("bad condition bit name `"..expr.."'") |
901 | end | 1514 | end |
902 | 1515 | ||
1516 | local parse_ctx = {} | ||
1517 | |||
1518 | local loadenv = setfenv and function(s) | ||
1519 | local code = loadstring(s, "") | ||
1520 | if code then setfenv(code, parse_ctx) end | ||
1521 | return code | ||
1522 | end or function(s) | ||
1523 | return load(s, "", nil, parse_ctx) | ||
1524 | end | ||
1525 | |||
1526 | -- Try to parse simple arithmetic, too, since some basic ops are aliases. | ||
1527 | local function parse_number(n) | ||
1528 | local x = tonumber(n) | ||
1529 | if x then return x end | ||
1530 | local code = loadenv("return "..n) | ||
1531 | if code then | ||
1532 | local ok, y = pcall(code) | ||
1533 | if ok then return y end | ||
1534 | end | ||
1535 | return nil | ||
1536 | end | ||
1537 | |||
903 | local function parse_imm(imm, bits, shift, scale, signed) | 1538 | local function parse_imm(imm, bits, shift, scale, signed) |
904 | local n = tonumber(imm) | 1539 | local n = parse_number(imm) |
905 | if n then | 1540 | if n then |
906 | local m = sar(n, scale) | 1541 | local m = sar(n, scale) |
907 | if shl(m, scale) == n then | 1542 | if shl(m, scale) == n then |
@@ -914,7 +1549,8 @@ local function parse_imm(imm, bits, shift, scale, signed) | |||
914 | end | 1549 | end |
915 | end | 1550 | end |
916 | werror("out of range immediate `"..imm.."'") | 1551 | werror("out of range immediate `"..imm.."'") |
917 | elseif match(imm, "^r([1-3]?[0-9])$") or | 1552 | elseif match(imm, "^[rfv]([1-3]?[0-9])$") or |
1553 | match(imm, "^vs([1-6]?[0-9])$") or | ||
918 | match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then | 1554 | match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then |
919 | werror("expected immediate operand, got register") | 1555 | werror("expected immediate operand, got register") |
920 | else | 1556 | else |
@@ -924,11 +1560,11 @@ local function parse_imm(imm, bits, shift, scale, signed) | |||
924 | end | 1560 | end |
925 | 1561 | ||
926 | local function parse_shiftmask(imm, isshift) | 1562 | local function parse_shiftmask(imm, isshift) |
927 | local n = tonumber(imm) | 1563 | local n = parse_number(imm) |
928 | if n then | 1564 | if n then |
929 | if shr(n, 6) == 0 then | 1565 | if shr(n, 6) == 0 then |
930 | local lsb = band(imm, 31) | 1566 | local lsb = band(n, 31) |
931 | local msb = imm - lsb | 1567 | local msb = n - lsb |
932 | return isshift and (shl(lsb, 11)+shr(msb, 4)) or (shl(lsb, 6)+msb) | 1568 | return isshift and (shl(lsb, 11)+shr(msb, 4)) or (shl(lsb, 6)+msb) |
933 | end | 1569 | end |
934 | werror("out of range immediate `"..imm.."'") | 1570 | werror("out of range immediate `"..imm.."'") |
@@ -936,7 +1572,8 @@ local function parse_shiftmask(imm, isshift) | |||
936 | match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then | 1572 | match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then |
937 | werror("expected immediate operand, got register") | 1573 | werror("expected immediate operand, got register") |
938 | else | 1574 | else |
939 | werror("NYI: parameterized 64 bit shift/mask") | 1575 | waction("IMMSH", isshift and 1 or 0, imm) |
1576 | return 0; | ||
940 | end | 1577 | end |
941 | end | 1578 | end |
942 | 1579 | ||
@@ -1011,7 +1648,7 @@ end | |||
1011 | ------------------------------------------------------------------------------ | 1648 | ------------------------------------------------------------------------------ |
1012 | 1649 | ||
1013 | -- Handle opcodes defined with template strings. | 1650 | -- Handle opcodes defined with template strings. |
1014 | map_op[".template__"] = function(params, template, nparams) | 1651 | op_template = function(params, template, nparams) |
1015 | if not params then return sub(template, 9) end | 1652 | if not params then return sub(template, 9) end |
1016 | local op = tonumber(sub(template, 1, 8), 16) | 1653 | local op = tonumber(sub(template, 1, 8), 16) |
1017 | local n, rs = 1, 26 | 1654 | local n, rs = 1, 26 |
@@ -1027,6 +1664,15 @@ map_op[".template__"] = function(params, template, nparams) | |||
1027 | rs = rs - 5; op = op + shl(parse_gpr(params[n]), rs); n = n + 1 | 1664 | rs = rs - 5; op = op + shl(parse_gpr(params[n]), rs); n = n + 1 |
1028 | elseif p == "F" then | 1665 | elseif p == "F" then |
1029 | rs = rs - 5; op = op + shl(parse_fpr(params[n]), rs); n = n + 1 | 1666 | rs = rs - 5; op = op + shl(parse_fpr(params[n]), rs); n = n + 1 |
1667 | elseif p == "V" then | ||
1668 | rs = rs - 5; op = op + shl(parse_vr(params[n]), rs); n = n + 1 | ||
1669 | elseif p == "Q" then | ||
1670 | local vs = parse_vs(params[n]); n = n + 1; rs = rs - 5 | ||
1671 | local sh = rs == 6 and 2 or 3 + band(shr(rs, 1), 3) | ||
1672 | op = op + shl(band(vs, 31), rs) + shr(band(vs, 32), sh) | ||
1673 | elseif p == "q" then | ||
1674 | local vs = parse_vs(params[n]); n = n + 1 | ||
1675 | op = op + shl(band(vs, 31), 21) + shr(band(vs, 32), 5) | ||
1030 | elseif p == "A" then | 1676 | elseif p == "A" then |
1031 | rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, false); n = n + 1 | 1677 | rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, false); n = n + 1 |
1032 | elseif p == "S" then | 1678 | elseif p == "S" then |
@@ -1047,6 +1693,26 @@ map_op[".template__"] = function(params, template, nparams) | |||
1047 | rs = rs - 5; op = op + shl(parse_cond(params[n]), rs); n = n + 1 | 1693 | rs = rs - 5; op = op + shl(parse_cond(params[n]), rs); n = n + 1 |
1048 | elseif p == "X" then | 1694 | elseif p == "X" then |
1049 | rs = rs - 5; op = op + shl(parse_cr(params[n]), rs+2); n = n + 1 | 1695 | rs = rs - 5; op = op + shl(parse_cr(params[n]), rs+2); n = n + 1 |
1696 | elseif p == "1" then | ||
1697 | rs = rs - 5; op = op + parse_imm(params[n], 1, rs, 0, false); n = n + 1 | ||
1698 | elseif p == "g" then | ||
1699 | rs = rs - 5; op = op + parse_imm(params[n], 2, rs, 0, false); n = n + 1 | ||
1700 | elseif p == "3" then | ||
1701 | rs = rs - 5; op = op + parse_imm(params[n], 3, rs, 0, false); n = n + 1 | ||
1702 | elseif p == "P" then | ||
1703 | rs = rs - 5; op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1 | ||
1704 | elseif p == "p" then | ||
1705 | op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1 | ||
1706 | elseif p == "6" then | ||
1707 | rs = rs - 6; op = op + parse_imm(params[n], 6, rs, 0, false); n = n + 1 | ||
1708 | elseif p == "Y" then | ||
1709 | rs = rs - 5; op = op + parse_imm(params[n], 1, rs+4, 0, false); n = n + 1 | ||
1710 | elseif p == "y" then | ||
1711 | rs = rs - 5; op = op + parse_imm(params[n], 1, rs+3, 0, false); n = n + 1 | ||
1712 | elseif p == "Z" then | ||
1713 | rs = rs - 5; op = op + parse_imm(params[n], 2, rs+3, 0, false); n = n + 1 | ||
1714 | elseif p == "z" then | ||
1715 | rs = rs - 5; op = op + parse_imm(params[n], 2, rs+2, 0, false); n = n + 1 | ||
1050 | elseif p == "W" then | 1716 | elseif p == "W" then |
1051 | op = op + parse_cr(params[n]); n = n + 1 | 1717 | op = op + parse_cr(params[n]); n = n + 1 |
1052 | elseif p == "G" then | 1718 | elseif p == "G" then |
@@ -1071,6 +1737,8 @@ map_op[".template__"] = function(params, template, nparams) | |||
1071 | local lo = band(op, mm) | 1737 | local lo = band(op, mm) |
1072 | local hi = band(op, shl(mm, 5)) | 1738 | local hi = band(op, shl(mm, 5)) |
1073 | op = op - lo - hi + shl(lo, 5) + shr(hi, 5) | 1739 | op = op - lo - hi + shl(lo, 5) + shr(hi, 5) |
1740 | elseif p == ":" then | ||
1741 | if band(shr(op, rs), 1) ~= 0 then werror("register pair expected") end | ||
1074 | elseif p == "-" then | 1742 | elseif p == "-" then |
1075 | rs = rs - 5 | 1743 | rs = rs - 5 |
1076 | elseif p == "." then | 1744 | elseif p == "." then |
@@ -1082,6 +1750,8 @@ map_op[".template__"] = function(params, template, nparams) | |||
1082 | wputpos(pos, op) | 1750 | wputpos(pos, op) |
1083 | end | 1751 | end |
1084 | 1752 | ||
1753 | map_op[".template__"] = op_template | ||
1754 | |||
1085 | ------------------------------------------------------------------------------ | 1755 | ------------------------------------------------------------------------------ |
1086 | 1756 | ||
1087 | -- Pseudo-opcode to mark the position where the action list is to be emitted. | 1757 | -- Pseudo-opcode to mark the position where the action list is to be emitted. |
diff --git a/dynasm/dasm_proto.h b/dynasm/dasm_proto.h index a7278e85..ba038e87 100644 --- a/dynasm/dasm_proto.h +++ b/dynasm/dasm_proto.h | |||
@@ -10,8 +10,8 @@ | |||
10 | #include <stddef.h> | 10 | #include <stddef.h> |
11 | #include <stdarg.h> | 11 | #include <stdarg.h> |
12 | 12 | ||
13 | #define DASM_IDENT "DynASM 1.3.0" | 13 | #define DASM_IDENT "DynASM 1.4.0" |
14 | #define DASM_VERSION 10300 /* 1.3.0 */ | 14 | #define DASM_VERSION 10400 /* 1.4.0 */ |
15 | 15 | ||
16 | #ifndef Dst_DECL | 16 | #ifndef Dst_DECL |
17 | #define Dst_DECL dasm_State **Dst | 17 | #define Dst_DECL dasm_State **Dst |
diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h index 84b9d17f..edaddf54 100644 --- a/dynasm/dasm_x86.h +++ b/dynasm/dasm_x86.h | |||
@@ -170,7 +170,7 @@ void dasm_put(Dst_DECL, int start, ...) | |||
170 | dasm_State *D = Dst_REF; | 170 | dasm_State *D = Dst_REF; |
171 | dasm_ActList p = D->actionlist + start; | 171 | dasm_ActList p = D->actionlist + start; |
172 | dasm_Section *sec = D->section; | 172 | dasm_Section *sec = D->section; |
173 | int pos = sec->pos, ofs = sec->ofs, mrm = 4; | 173 | int pos = sec->pos, ofs = sec->ofs, mrm = -1; |
174 | int *b; | 174 | int *b; |
175 | 175 | ||
176 | if (pos >= sec->epos) { | 176 | if (pos >= sec->epos) { |
@@ -193,7 +193,7 @@ void dasm_put(Dst_DECL, int start, ...) | |||
193 | b[pos++] = n; | 193 | b[pos++] = n; |
194 | switch (action) { | 194 | switch (action) { |
195 | case DASM_DISP: | 195 | case DASM_DISP: |
196 | if (n == 0) { if ((mrm&7) == 4) mrm = p[-2]; if ((mrm&7) != 5) break; } | 196 | if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; } |
197 | /* fallthrough */ | 197 | /* fallthrough */ |
198 | case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */ | 198 | case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */ |
199 | case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */ | 199 | case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */ |
@@ -204,11 +204,17 @@ void dasm_put(Dst_DECL, int start, ...) | |||
204 | case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break; | 204 | case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break; |
205 | case DASM_SPACE: p++; ofs += n; break; | 205 | case DASM_SPACE: p++; ofs += n; break; |
206 | case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */ | 206 | case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */ |
207 | case DASM_VREG: CK((n&-8) == 0 && (n != 4 || (*p&1) == 0), RANGE_VREG); | 207 | case DASM_VREG: CK((n&-16) == 0 && (n != 4 || (*p>>5) != 2), RANGE_VREG); |
208 | if (*p++ == 1 && *p == DASM_DISP) mrm = n; | 208 | if (*p < 0x40 && p[1] == DASM_DISP) mrm = n; |
209 | if (*p < 0x20 && (n&7) == 4) ofs++; | ||
210 | switch ((*p++ >> 3) & 3) { | ||
211 | case 3: n |= b[pos-3]; /* fallthrough */ | ||
212 | case 2: n |= b[pos-2]; /* fallthrough */ | ||
213 | case 1: if (n <= 7) { b[pos-1] |= 0x10; ofs--; } | ||
214 | } | ||
209 | continue; | 215 | continue; |
210 | } | 216 | } |
211 | mrm = 4; | 217 | mrm = -1; |
212 | } else { | 218 | } else { |
213 | int *pl, n; | 219 | int *pl, n; |
214 | switch (action) { | 220 | switch (action) { |
@@ -399,7 +405,27 @@ int dasm_encode(Dst_DECL, void *buffer) | |||
399 | case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL; | 405 | case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL; |
400 | /* fallthrough */ | 406 | /* fallthrough */ |
401 | case DASM_IMM_W: dasmw(n); break; | 407 | case DASM_IMM_W: dasmw(n); break; |
402 | case DASM_VREG: { int t = *p++; if (t >= 2) n<<=3; cp[-1] |= n; break; } | 408 | case DASM_VREG: { |
409 | int t = *p++; | ||
410 | unsigned char *ex = cp - (t&7); | ||
411 | if ((n & 8) && t < 0xa0) { | ||
412 | if (*ex & 0x80) ex[1] ^= 0x20 << (t>>6); else *ex ^= 1 << (t>>6); | ||
413 | n &= 7; | ||
414 | } else if (n & 0x10) { | ||
415 | if (*ex & 0x80) { | ||
416 | *ex = 0xc5; ex[1] = (ex[1] & 0x80) | ex[2]; ex += 2; | ||
417 | } | ||
418 | while (++ex < cp) ex[-1] = *ex; | ||
419 | if (mark) mark--; | ||
420 | cp--; | ||
421 | n &= 7; | ||
422 | } | ||
423 | if (t >= 0xc0) n <<= 4; | ||
424 | else if (t >= 0x40) n <<= 3; | ||
425 | else if (n == 4 && t < 0x20) { cp[-1] ^= n; *cp++ = 0x20; } | ||
426 | cp[-1] ^= n; | ||
427 | break; | ||
428 | } | ||
403 | case DASM_REL_LG: p++; if (n >= 0) goto rel_pc; | 429 | case DASM_REL_LG: p++; if (n >= 0) goto rel_pc; |
404 | b++; n = (int)(ptrdiff_t)D->globals[-n]; | 430 | b++; n = (int)(ptrdiff_t)D->globals[-n]; |
405 | /* fallthrough */ | 431 | /* fallthrough */ |
diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua index 13aa68ff..c5c8c17b 100644 --- a/dynasm/dasm_x86.lua +++ b/dynasm/dasm_x86.lua | |||
@@ -11,9 +11,9 @@ local x64 = x64 | |||
11 | local _info = { | 11 | local _info = { |
12 | arch = x64 and "x64" or "x86", | 12 | arch = x64 and "x64" or "x86", |
13 | description = "DynASM x86/x64 module", | 13 | description = "DynASM x86/x64 module", |
14 | version = "1.3.0", | 14 | version = "1.4.0", |
15 | vernum = 10300, | 15 | vernum = 10400, |
16 | release = "2011-05-05", | 16 | release = "2015-10-18", |
17 | author = "Mike Pall", | 17 | author = "Mike Pall", |
18 | license = "MIT", | 18 | license = "MIT", |
19 | } | 19 | } |
@@ -27,9 +27,9 @@ local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatabl | |||
27 | local _s = string | 27 | local _s = string |
28 | local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char | 28 | local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char |
29 | local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub | 29 | local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub |
30 | local concat, sort = table.concat, table.sort | 30 | local concat, sort, remove = table.concat, table.sort, table.remove |
31 | local bit = bit or require("bit") | 31 | local bit = bit or require("bit") |
32 | local band, shl, shr = bit.band, bit.lshift, bit.rshift | 32 | local band, bxor, shl, shr = bit.band, bit.bxor, bit.lshift, bit.rshift |
33 | 33 | ||
34 | -- Inherited tables and callbacks. | 34 | -- Inherited tables and callbacks. |
35 | local g_opt, g_arch | 35 | local g_opt, g_arch |
@@ -41,7 +41,7 @@ local action_names = { | |||
41 | -- int arg, 1 buffer pos: | 41 | -- int arg, 1 buffer pos: |
42 | "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB", | 42 | "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB", |
43 | -- action arg (1 byte), int arg, 1 buffer pos (reg/num): | 43 | -- action arg (1 byte), int arg, 1 buffer pos (reg/num): |
44 | "VREG", "SPACE", -- !x64: VREG support NYI. | 44 | "VREG", "SPACE", |
45 | -- ptrdiff_t arg, 1 buffer pos (address): !x64 | 45 | -- ptrdiff_t arg, 1 buffer pos (address): !x64 |
46 | "SETLABEL", "REL_A", | 46 | "SETLABEL", "REL_A", |
47 | -- action arg (1 byte) or int arg, 2 buffer pos (link, offset): | 47 | -- action arg (1 byte) or int arg, 2 buffer pos (link, offset): |
@@ -83,6 +83,21 @@ local actargs = { 0 } | |||
83 | -- Current number of section buffer positions for dasm_put(). | 83 | -- Current number of section buffer positions for dasm_put(). |
84 | local secpos = 1 | 84 | local secpos = 1 |
85 | 85 | ||
86 | -- VREG kind encodings, pre-shifted by 5 bits. | ||
87 | local map_vreg = { | ||
88 | ["modrm.rm.m"] = 0x00, | ||
89 | ["modrm.rm.r"] = 0x20, | ||
90 | ["opcode"] = 0x20, | ||
91 | ["sib.base"] = 0x20, | ||
92 | ["sib.index"] = 0x40, | ||
93 | ["modrm.reg"] = 0x80, | ||
94 | ["vex.v"] = 0xa0, | ||
95 | ["imm.hi"] = 0xc0, | ||
96 | } | ||
97 | |||
98 | -- Current number of VREG actions contributing to REX/VEX shrinkage. | ||
99 | local vreg_shrink_count = 0 | ||
100 | |||
86 | ------------------------------------------------------------------------------ | 101 | ------------------------------------------------------------------------------ |
87 | 102 | ||
88 | -- Compute action numbers for action names. | 103 | -- Compute action numbers for action names. |
@@ -134,6 +149,21 @@ local function waction(action, a, num) | |||
134 | if a or num then secpos = secpos + (num or 1) end | 149 | if a or num then secpos = secpos + (num or 1) end |
135 | end | 150 | end |
136 | 151 | ||
152 | -- Optionally add a VREG action. | ||
153 | local function wvreg(kind, vreg, psz, sk, defer) | ||
154 | if not vreg then return end | ||
155 | waction("VREG", vreg) | ||
156 | local b = assert(map_vreg[kind], "bad vreg kind `"..vreg.."'") | ||
157 | if b < (sk or 0) then | ||
158 | vreg_shrink_count = vreg_shrink_count + 1 | ||
159 | end | ||
160 | if not defer then | ||
161 | b = b + vreg_shrink_count * 8 | ||
162 | vreg_shrink_count = 0 | ||
163 | end | ||
164 | wputxb(b + (psz or 0)) | ||
165 | end | ||
166 | |||
137 | -- Add call to embedded DynASM C code. | 167 | -- Add call to embedded DynASM C code. |
138 | local function wcall(func, args) | 168 | local function wcall(func, args) |
139 | wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true) | 169 | wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true) |
@@ -299,7 +329,7 @@ local function mkrmap(sz, cl, names) | |||
299 | local iname = format("@%s%x%s", sz, i, needrex and "R" or "") | 329 | local iname = format("@%s%x%s", sz, i, needrex and "R" or "") |
300 | if needrex then map_reg_needrex[iname] = true end | 330 | if needrex then map_reg_needrex[iname] = true end |
301 | local name | 331 | local name |
302 | if sz == "o" then name = format("xmm%d", i) | 332 | if sz == "o" or sz == "y" then name = format("%s%d", cl, i) |
303 | elseif sz == "f" then name = format("st%d", i) | 333 | elseif sz == "f" then name = format("st%d", i) |
304 | else name = format("r%d%s", i, sz == addrsize and "" or sz) end | 334 | else name = format("r%d%s", i, sz == addrsize and "" or sz) end |
305 | map_archdef[name] = iname | 335 | map_archdef[name] = iname |
@@ -326,6 +356,7 @@ mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"}) | |||
326 | mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"}) | 356 | mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"}) |
327 | map_reg_valid_index[map_archdef.esp] = false | 357 | map_reg_valid_index[map_archdef.esp] = false |
328 | if x64 then map_reg_valid_index[map_archdef.rsp] = false end | 358 | if x64 then map_reg_valid_index[map_archdef.rsp] = false end |
359 | if x64 then map_reg_needrex[map_archdef.Rb] = true end | ||
329 | map_archdef["Ra"] = "@"..addrsize | 360 | map_archdef["Ra"] = "@"..addrsize |
330 | 361 | ||
331 | -- FP registers (internally tword sized, but use "f" as operand size). | 362 | -- FP registers (internally tword sized, but use "f" as operand size). |
@@ -334,21 +365,24 @@ mkrmap("f", "Rf") | |||
334 | -- SSE registers (oword sized, but qword and dword accessible). | 365 | -- SSE registers (oword sized, but qword and dword accessible). |
335 | mkrmap("o", "xmm") | 366 | mkrmap("o", "xmm") |
336 | 367 | ||
368 | -- AVX registers (yword sized, but oword, qword and dword accessible). | ||
369 | mkrmap("y", "ymm") | ||
370 | |||
337 | -- Operand size prefixes to codes. | 371 | -- Operand size prefixes to codes. |
338 | local map_opsize = { | 372 | local map_opsize = { |
339 | byte = "b", word = "w", dword = "d", qword = "q", oword = "o", tword = "t", | 373 | byte = "b", word = "w", dword = "d", qword = "q", oword = "o", yword = "y", |
340 | aword = addrsize, | 374 | tword = "t", aword = addrsize, |
341 | } | 375 | } |
342 | 376 | ||
343 | -- Operand size code to number. | 377 | -- Operand size code to number. |
344 | local map_opsizenum = { | 378 | local map_opsizenum = { |
345 | b = 1, w = 2, d = 4, q = 8, o = 16, t = 10, | 379 | b = 1, w = 2, d = 4, q = 8, o = 16, y = 32, t = 10, |
346 | } | 380 | } |
347 | 381 | ||
348 | -- Operand size code to name. | 382 | -- Operand size code to name. |
349 | local map_opsizename = { | 383 | local map_opsizename = { |
350 | b = "byte", w = "word", d = "dword", q = "qword", o = "oword", t = "tword", | 384 | b = "byte", w = "word", d = "dword", q = "qword", o = "oword", y = "yword", |
351 | f = "fpword", | 385 | t = "tword", f = "fpword", |
352 | } | 386 | } |
353 | 387 | ||
354 | -- Valid index register scale factors. | 388 | -- Valid index register scale factors. |
@@ -460,9 +494,45 @@ local function wputszarg(sz, n) | |||
460 | end | 494 | end |
461 | 495 | ||
462 | -- Put multi-byte opcode with operand-size dependent modifications. | 496 | -- Put multi-byte opcode with operand-size dependent modifications. |
463 | local function wputop(sz, op, rex) | 497 | local function wputop(sz, op, rex, vex, vregr, vregxb) |
498 | local psz, sk = 0, nil | ||
499 | if vex then | ||
500 | local tail | ||
501 | if vex.m == 1 and band(rex, 11) == 0 then | ||
502 | if x64 and vregxb then | ||
503 | sk = map_vreg["modrm.reg"] | ||
504 | else | ||
505 | wputb(0xc5) | ||
506 | tail = shl(bxor(band(rex, 4), 4), 5) | ||
507 | psz = 3 | ||
508 | end | ||
509 | end | ||
510 | if not tail then | ||
511 | wputb(0xc4) | ||
512 | wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m) | ||
513 | tail = shl(band(rex, 8), 4) | ||
514 | psz = 4 | ||
515 | end | ||
516 | local reg, vreg = 0, nil | ||
517 | if vex.v then | ||
518 | reg = vex.v.reg | ||
519 | if not reg then werror("bad vex operand") end | ||
520 | if reg < 0 then reg = 0; vreg = vex.v.vreg end | ||
521 | end | ||
522 | if sz == "y" or vex.l then tail = tail + 4 end | ||
523 | wputb(tail + shl(bxor(reg, 15), 3) + vex.p) | ||
524 | wvreg("vex.v", vreg) | ||
525 | rex = 0 | ||
526 | if op >= 256 then werror("bad vex opcode") end | ||
527 | else | ||
528 | if rex ~= 0 then | ||
529 | if not x64 then werror("bad operand size") end | ||
530 | elseif (vregr or vregxb) and x64 then | ||
531 | rex = 0x10 | ||
532 | sk = map_vreg["vex.v"] | ||
533 | end | ||
534 | end | ||
464 | local r | 535 | local r |
465 | if rex ~= 0 and not x64 then werror("bad operand size") end | ||
466 | if sz == "w" then wputb(102) end | 536 | if sz == "w" then wputb(102) end |
467 | -- Needs >32 bit numbers, but only for crc32 eax, word [ebx] | 537 | -- Needs >32 bit numbers, but only for crc32 eax, word [ebx] |
468 | if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end | 538 | if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end |
@@ -471,20 +541,20 @@ local function wputop(sz, op, rex) | |||
471 | if rex ~= 0 then | 541 | if rex ~= 0 then |
472 | local opc3 = band(op, 0xffff00) | 542 | local opc3 = band(op, 0xffff00) |
473 | if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then | 543 | if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then |
474 | wputb(64 + band(rex, 15)); rex = 0 | 544 | wputb(64 + band(rex, 15)); rex = 0; psz = 2 |
475 | end | 545 | end |
476 | end | 546 | end |
477 | wputb(shr(op, 16)); op = band(op, 0xffff) | 547 | wputb(shr(op, 16)); op = band(op, 0xffff); psz = psz + 1 |
478 | end | 548 | end |
479 | if op >= 256 then | 549 | if op >= 256 then |
480 | local b = shr(op, 8) | 550 | local b = shr(op, 8) |
481 | if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0 end | 551 | if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0; psz = 2 end |
482 | wputb(b) | 552 | wputb(b); op = band(op, 255); psz = psz + 1 |
483 | op = band(op, 255) | ||
484 | end | 553 | end |
485 | if rex ~= 0 then wputb(64 + band(rex, 15)) end | 554 | if rex ~= 0 then wputb(64 + band(rex, 15)); psz = 2 end |
486 | if sz == "b" then op = op - 1 end | 555 | if sz == "b" then op = op - 1 end |
487 | wputb(op) | 556 | wputb(op) |
557 | return psz, sk | ||
488 | end | 558 | end |
489 | 559 | ||
490 | -- Put ModRM or SIB formatted byte. | 560 | -- Put ModRM or SIB formatted byte. |
@@ -494,7 +564,7 @@ local function wputmodrm(m, s, rm, vs, vrm) | |||
494 | end | 564 | end |
495 | 565 | ||
496 | -- Put ModRM/SIB plus optional displacement. | 566 | -- Put ModRM/SIB plus optional displacement. |
497 | local function wputmrmsib(t, imark, s, vsreg) | 567 | local function wputmrmsib(t, imark, s, vsreg, psz, sk) |
498 | local vreg, vxreg | 568 | local vreg, vxreg |
499 | local reg, xreg = t.reg, t.xreg | 569 | local reg, xreg = t.reg, t.xreg |
500 | if reg and reg < 0 then reg = 0; vreg = t.vreg end | 570 | if reg and reg < 0 then reg = 0; vreg = t.vreg end |
@@ -504,8 +574,8 @@ local function wputmrmsib(t, imark, s, vsreg) | |||
504 | -- Register mode. | 574 | -- Register mode. |
505 | if sub(t.mode, 1, 1) == "r" then | 575 | if sub(t.mode, 1, 1) == "r" then |
506 | wputmodrm(3, s, reg) | 576 | wputmodrm(3, s, reg) |
507 | if vsreg then waction("VREG", vsreg); wputxb(2) end | 577 | wvreg("modrm.reg", vsreg, psz+1, sk, vreg) |
508 | if vreg then waction("VREG", vreg); wputxb(0) end | 578 | wvreg("modrm.rm.r", vreg, psz+1, sk) |
509 | return | 579 | return |
510 | end | 580 | end |
511 | 581 | ||
@@ -519,21 +589,22 @@ local function wputmrmsib(t, imark, s, vsreg) | |||
519 | -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp) | 589 | -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp) |
520 | wputmodrm(0, s, 4) | 590 | wputmodrm(0, s, 4) |
521 | if imark == "I" then waction("MARK") end | 591 | if imark == "I" then waction("MARK") end |
522 | if vsreg then waction("VREG", vsreg); wputxb(2) end | 592 | wvreg("modrm.reg", vsreg, psz+1, sk, vxreg) |
523 | wputmodrm(t.xsc, xreg, 5) | 593 | wputmodrm(t.xsc, xreg, 5) |
524 | if vxreg then waction("VREG", vxreg); wputxb(3) end | 594 | wvreg("sib.index", vxreg, psz+2, sk) |
525 | else | 595 | else |
526 | -- Pure 32 bit displacement. | 596 | -- Pure 32 bit displacement. |
527 | if x64 and tdisp ~= "table" then | 597 | if x64 and tdisp ~= "table" then |
528 | wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp) | 598 | wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp) |
599 | wvreg("modrm.reg", vsreg, psz+1, sk) | ||
529 | if imark == "I" then waction("MARK") end | 600 | if imark == "I" then waction("MARK") end |
530 | wputmodrm(0, 4, 5) | 601 | wputmodrm(0, 4, 5) |
531 | else | 602 | else |
532 | riprel = x64 | 603 | riprel = x64 |
533 | wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp) | 604 | wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp) |
605 | wvreg("modrm.reg", vsreg, psz+1, sk) | ||
534 | if imark == "I" then waction("MARK") end | 606 | if imark == "I" then waction("MARK") end |
535 | end | 607 | end |
536 | if vsreg then waction("VREG", vsreg); wputxb(2) end | ||
537 | end | 608 | end |
538 | if riprel then -- Emit rip-relative displacement. | 609 | if riprel then -- Emit rip-relative displacement. |
539 | if match("UWSiI", imark) then | 610 | if match("UWSiI", imark) then |
@@ -561,16 +632,16 @@ local function wputmrmsib(t, imark, s, vsreg) | |||
561 | if xreg or band(reg, 7) == 4 then | 632 | if xreg or band(reg, 7) == 4 then |
562 | wputmodrm(m or 2, s, 4) -- ModRM. | 633 | wputmodrm(m or 2, s, 4) -- ModRM. |
563 | if m == nil or imark == "I" then waction("MARK") end | 634 | if m == nil or imark == "I" then waction("MARK") end |
564 | if vsreg then waction("VREG", vsreg); wputxb(2) end | 635 | wvreg("modrm.reg", vsreg, psz+1, sk, vxreg or vreg) |
565 | wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB. | 636 | wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB. |
566 | if vxreg then waction("VREG", vxreg); wputxb(3) end | 637 | wvreg("sib.index", vxreg, psz+2, sk, vreg) |
567 | if vreg then waction("VREG", vreg); wputxb(1) end | 638 | wvreg("sib.base", vreg, psz+2, sk) |
568 | else | 639 | else |
569 | wputmodrm(m or 2, s, reg) -- ModRM. | 640 | wputmodrm(m or 2, s, reg) -- ModRM. |
570 | if (imark == "I" and (m == 1 or m == 2)) or | 641 | if (imark == "I" and (m == 1 or m == 2)) or |
571 | (m == nil and (vsreg or vreg)) then waction("MARK") end | 642 | (m == nil and (vsreg or vreg)) then waction("MARK") end |
572 | if vsreg then waction("VREG", vsreg); wputxb(2) end | 643 | wvreg("modrm.reg", vsreg, psz+1, sk, vreg) |
573 | if vreg then waction("VREG", vreg); wputxb(1) end | 644 | wvreg("modrm.rm.m", vreg, psz+1, sk) |
574 | end | 645 | end |
575 | 646 | ||
576 | -- Put displacement. | 647 | -- Put displacement. |
@@ -881,9 +952,16 @@ end | |||
881 | -- "m"/"M" generates ModRM/SIB from the 1st/2nd operand. | 952 | -- "m"/"M" generates ModRM/SIB from the 1st/2nd operand. |
882 | -- The spare 3 bits are either filled with the last hex digit or | 953 | -- The spare 3 bits are either filled with the last hex digit or |
883 | -- the result from a previous "r"/"R". The opcode is restored. | 954 | -- the result from a previous "r"/"R". The opcode is restored. |
955 | -- "u" Use VEX encoding, vvvv unused. | ||
956 | -- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the operand is | ||
957 | -- removed from the list used by future characters). | ||
958 | -- "w" Use VEX encoding, vvvv from 3rd operand. | ||
959 | -- "L" Force VEX.L | ||
884 | -- | 960 | -- |
885 | -- All of the following characters force a flush of the opcode: | 961 | -- All of the following characters force a flush of the opcode: |
886 | -- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand. | 962 | -- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand. |
963 | -- "s" stores a 4 bit immediate from the last register operand, | ||
964 | -- followed by 4 zero bits. | ||
887 | -- "S" stores a signed 8 bit immediate from the last operand. | 965 | -- "S" stores a signed 8 bit immediate from the last operand. |
888 | -- "U" stores an unsigned 8 bit immediate from the last operand. | 966 | -- "U" stores an unsigned 8 bit immediate from the last operand. |
889 | -- "W" stores an unsigned 16 bit immediate from the last operand. | 967 | -- "W" stores an unsigned 16 bit immediate from the last operand. |
@@ -1226,46 +1304,14 @@ local map_op = { | |||
1226 | movups_2 = "rmo:0F10rM|mro:0F11Rm", | 1304 | movups_2 = "rmo:0F10rM|mro:0F11Rm", |
1227 | orpd_2 = "rmo:660F56rM", | 1305 | orpd_2 = "rmo:660F56rM", |
1228 | orps_2 = "rmo:0F56rM", | 1306 | orps_2 = "rmo:0F56rM", |
1229 | packssdw_2 = "rmo:660F6BrM", | ||
1230 | packsswb_2 = "rmo:660F63rM", | ||
1231 | packuswb_2 = "rmo:660F67rM", | ||
1232 | paddb_2 = "rmo:660FFCrM", | ||
1233 | paddd_2 = "rmo:660FFErM", | ||
1234 | paddq_2 = "rmo:660FD4rM", | ||
1235 | paddsb_2 = "rmo:660FECrM", | ||
1236 | paddsw_2 = "rmo:660FEDrM", | ||
1237 | paddusb_2 = "rmo:660FDCrM", | ||
1238 | paddusw_2 = "rmo:660FDDrM", | ||
1239 | paddw_2 = "rmo:660FFDrM", | ||
1240 | pand_2 = "rmo:660FDBrM", | ||
1241 | pandn_2 = "rmo:660FDFrM", | ||
1242 | pause_0 = "F390", | 1307 | pause_0 = "F390", |
1243 | pavgb_2 = "rmo:660FE0rM", | ||
1244 | pavgw_2 = "rmo:660FE3rM", | ||
1245 | pcmpeqb_2 = "rmo:660F74rM", | ||
1246 | pcmpeqd_2 = "rmo:660F76rM", | ||
1247 | pcmpeqw_2 = "rmo:660F75rM", | ||
1248 | pcmpgtb_2 = "rmo:660F64rM", | ||
1249 | pcmpgtd_2 = "rmo:660F66rM", | ||
1250 | pcmpgtw_2 = "rmo:660F65rM", | ||
1251 | pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only. | 1308 | pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only. |
1252 | pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:", | 1309 | pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:", |
1253 | pmaddwd_2 = "rmo:660FF5rM", | ||
1254 | pmaxsw_2 = "rmo:660FEErM", | ||
1255 | pmaxub_2 = "rmo:660FDErM", | ||
1256 | pminsw_2 = "rmo:660FEArM", | ||
1257 | pminub_2 = "rmo:660FDArM", | ||
1258 | pmovmskb_2 = "rr/do:660FD7rM", | 1310 | pmovmskb_2 = "rr/do:660FD7rM", |
1259 | pmulhuw_2 = "rmo:660FE4rM", | ||
1260 | pmulhw_2 = "rmo:660FE5rM", | ||
1261 | pmullw_2 = "rmo:660FD5rM", | ||
1262 | pmuludq_2 = "rmo:660FF4rM", | ||
1263 | por_2 = "rmo:660FEBrM", | ||
1264 | prefetchnta_1 = "xb:n0F180m", | 1311 | prefetchnta_1 = "xb:n0F180m", |
1265 | prefetcht0_1 = "xb:n0F181m", | 1312 | prefetcht0_1 = "xb:n0F181m", |
1266 | prefetcht1_1 = "xb:n0F182m", | 1313 | prefetcht1_1 = "xb:n0F182m", |
1267 | prefetcht2_1 = "xb:n0F183m", | 1314 | prefetcht2_1 = "xb:n0F183m", |
1268 | psadbw_2 = "rmo:660FF6rM", | ||
1269 | pshufd_3 = "rmio:660F70rMU", | 1315 | pshufd_3 = "rmio:660F70rMU", |
1270 | pshufhw_3 = "rmio:F30F70rMU", | 1316 | pshufhw_3 = "rmio:F30F70rMU", |
1271 | pshuflw_3 = "rmio:F20F70rMU", | 1317 | pshuflw_3 = "rmio:F20F70rMU", |
@@ -1279,23 +1325,6 @@ local map_op = { | |||
1279 | psrldq_2 = "rio:660F733mU", | 1325 | psrldq_2 = "rio:660F733mU", |
1280 | psrlq_2 = "rmo:660FD3rM|rio:660F732mU", | 1326 | psrlq_2 = "rmo:660FD3rM|rio:660F732mU", |
1281 | psrlw_2 = "rmo:660FD1rM|rio:660F712mU", | 1327 | psrlw_2 = "rmo:660FD1rM|rio:660F712mU", |
1282 | psubb_2 = "rmo:660FF8rM", | ||
1283 | psubd_2 = "rmo:660FFArM", | ||
1284 | psubq_2 = "rmo:660FFBrM", | ||
1285 | psubsb_2 = "rmo:660FE8rM", | ||
1286 | psubsw_2 = "rmo:660FE9rM", | ||
1287 | psubusb_2 = "rmo:660FD8rM", | ||
1288 | psubusw_2 = "rmo:660FD9rM", | ||
1289 | psubw_2 = "rmo:660FF9rM", | ||
1290 | punpckhbw_2 = "rmo:660F68rM", | ||
1291 | punpckhdq_2 = "rmo:660F6ArM", | ||
1292 | punpckhqdq_2 = "rmo:660F6DrM", | ||
1293 | punpckhwd_2 = "rmo:660F69rM", | ||
1294 | punpcklbw_2 = "rmo:660F60rM", | ||
1295 | punpckldq_2 = "rmo:660F62rM", | ||
1296 | punpcklqdq_2 = "rmo:660F6CrM", | ||
1297 | punpcklwd_2 = "rmo:660F61rM", | ||
1298 | pxor_2 = "rmo:660FEFrM", | ||
1299 | rcpps_2 = "rmo:0F53rM", | 1328 | rcpps_2 = "rmo:0F53rM", |
1300 | rcpss_2 = "rro:F30F53rM|rx/od:", | 1329 | rcpss_2 = "rro:F30F53rM|rx/od:", |
1301 | rsqrtps_2 = "rmo:0F52rM", | 1330 | rsqrtps_2 = "rmo:0F52rM", |
@@ -1413,6 +1442,327 @@ local map_op = { | |||
1413 | movntsd_2 = "xr/qo:nF20F2BRm", | 1442 | movntsd_2 = "xr/qo:nF20F2BRm", |
1414 | movntss_2 = "xr/do:F30F2BRm", | 1443 | movntss_2 = "xr/do:F30F2BRm", |
1415 | -- popcnt is also in SSE4.2 | 1444 | -- popcnt is also in SSE4.2 |
1445 | |||
1446 | -- AES-NI | ||
1447 | aesdec_2 = "rmo:660F38DErM", | ||
1448 | aesdeclast_2 = "rmo:660F38DFrM", | ||
1449 | aesenc_2 = "rmo:660F38DCrM", | ||
1450 | aesenclast_2 = "rmo:660F38DDrM", | ||
1451 | aesimc_2 = "rmo:660F38DBrM", | ||
1452 | aeskeygenassist_3 = "rmio:660F3ADFrMU", | ||
1453 | pclmulqdq_3 = "rmio:660F3A44rMU", | ||
1454 | |||
1455 | -- AVX FP ops | ||
1456 | vaddsubpd_3 = "rrmoy:660FVD0rM", | ||
1457 | vaddsubps_3 = "rrmoy:F20FVD0rM", | ||
1458 | vandpd_3 = "rrmoy:660FV54rM", | ||
1459 | vandps_3 = "rrmoy:0FV54rM", | ||
1460 | vandnpd_3 = "rrmoy:660FV55rM", | ||
1461 | vandnps_3 = "rrmoy:0FV55rM", | ||
1462 | vblendpd_4 = "rrmioy:660F3AV0DrMU", | ||
1463 | vblendps_4 = "rrmioy:660F3AV0CrMU", | ||
1464 | vblendvpd_4 = "rrmroy:660F3AV4BrMs", | ||
1465 | vblendvps_4 = "rrmroy:660F3AV4ArMs", | ||
1466 | vbroadcastf128_2 = "rx/yo:660F38u1ArM", | ||
1467 | vcmppd_4 = "rrmioy:660FVC2rMU", | ||
1468 | vcmpps_4 = "rrmioy:0FVC2rMU", | ||
1469 | vcmpsd_4 = "rrrio:F20FVC2rMU|rrxi/ooq:", | ||
1470 | vcmpss_4 = "rrrio:F30FVC2rMU|rrxi/ood:", | ||
1471 | vcomisd_2 = "rro:660Fu2FrM|rx/oq:", | ||
1472 | vcomiss_2 = "rro:0Fu2FrM|rx/od:", | ||
1473 | vcvtdq2pd_2 = "rro:F30FuE6rM|rx/oq:|rm/yo:", | ||
1474 | vcvtdq2ps_2 = "rmoy:0Fu5BrM", | ||
1475 | vcvtpd2dq_2 = "rmoy:F20FuE6rM", | ||
1476 | vcvtpd2ps_2 = "rmoy:660Fu5ArM", | ||
1477 | vcvtps2dq_2 = "rmoy:660Fu5BrM", | ||
1478 | vcvtps2pd_2 = "rro:0Fu5ArM|rx/oq:|rm/yo:", | ||
1479 | vcvtsd2si_2 = "rr/do:F20Fu2DrM|rx/dq:|rr/qo:|rxq:", | ||
1480 | vcvtsd2ss_3 = "rrro:F20FV5ArM|rrx/ooq:", | ||
1481 | vcvtsi2sd_3 = "rrm/ood:F20FV2ArM|rrm/ooq:F20FVX2ArM", | ||
1482 | vcvtsi2ss_3 = "rrm/ood:F30FV2ArM|rrm/ooq:F30FVX2ArM", | ||
1483 | vcvtss2sd_3 = "rrro:F30FV5ArM|rrx/ood:", | ||
1484 | vcvtss2si_2 = "rr/do:F30Fu2DrM|rxd:|rr/qo:|rx/qd:", | ||
1485 | vcvttpd2dq_2 = "rmo:660FuE6rM|rm/oy:660FuLE6rM", | ||
1486 | vcvttps2dq_2 = "rmoy:F30Fu5BrM", | ||
1487 | vcvttsd2si_2 = "rr/do:F20Fu2CrM|rx/dq:|rr/qo:|rxq:", | ||
1488 | vcvttss2si_2 = "rr/do:F30Fu2CrM|rxd:|rr/qo:|rx/qd:", | ||
1489 | vdppd_4 = "rrmio:660F3AV41rMU", | ||
1490 | vdpps_4 = "rrmioy:660F3AV40rMU", | ||
1491 | vextractf128_3 = "mri/oy:660F3AuL19RmU", | ||
1492 | vextractps_3 = "mri/do:660F3Au17RmU", | ||
1493 | vhaddpd_3 = "rrmoy:660FV7CrM", | ||
1494 | vhaddps_3 = "rrmoy:F20FV7CrM", | ||
1495 | vhsubpd_3 = "rrmoy:660FV7DrM", | ||
1496 | vhsubps_3 = "rrmoy:F20FV7DrM", | ||
1497 | vinsertf128_4 = "rrmi/yyo:660F3AV18rMU", | ||
1498 | vinsertps_4 = "rrrio:660F3AV21rMU|rrxi/ood:", | ||
1499 | vldmxcsr_1 = "xd:0FuAE2m", | ||
1500 | vmaskmovps_3 = "rrxoy:660F38V2CrM|xrroy:660F38V2ERm", | ||
1501 | vmaskmovpd_3 = "rrxoy:660F38V2DrM|xrroy:660F38V2FRm", | ||
1502 | vmovapd_2 = "rmoy:660Fu28rM|mroy:660Fu29Rm", | ||
1503 | vmovaps_2 = "rmoy:0Fu28rM|mroy:0Fu29Rm", | ||
1504 | vmovd_2 = "rm/od:660Fu6ErM|rm/oq:660FuX6ErM|mr/do:660Fu7ERm|mr/qo:", | ||
1505 | vmovq_2 = "rro:F30Fu7ErM|rx/oq:|xr/qo:660FuD6Rm", | ||
1506 | vmovddup_2 = "rmy:F20Fu12rM|rro:|rx/oq:", | ||
1507 | vmovhlps_3 = "rrro:0FV12rM", | ||
1508 | vmovhpd_2 = "xr/qo:660Fu17Rm", | ||
1509 | vmovhpd_3 = "rrx/ooq:660FV16rM", | ||
1510 | vmovhps_2 = "xr/qo:0Fu17Rm", | ||
1511 | vmovhps_3 = "rrx/ooq:0FV16rM", | ||
1512 | vmovlhps_3 = "rrro:0FV16rM", | ||
1513 | vmovlpd_2 = "xr/qo:660Fu13Rm", | ||
1514 | vmovlpd_3 = "rrx/ooq:660FV12rM", | ||
1515 | vmovlps_2 = "xr/qo:0Fu13Rm", | ||
1516 | vmovlps_3 = "rrx/ooq:0FV12rM", | ||
1517 | vmovmskpd_2 = "rr/do:660Fu50rM|rr/dy:660FuL50rM", | ||
1518 | vmovmskps_2 = "rr/do:0Fu50rM|rr/dy:0FuL50rM", | ||
1519 | vmovntpd_2 = "xroy:660Fu2BRm", | ||
1520 | vmovntps_2 = "xroy:0Fu2BRm", | ||
1521 | vmovsd_2 = "rx/oq:F20Fu10rM|xr/qo:F20Fu11Rm", | ||
1522 | vmovsd_3 = "rrro:F20FV10rM", | ||
1523 | vmovshdup_2 = "rmoy:F30Fu16rM", | ||
1524 | vmovsldup_2 = "rmoy:F30Fu12rM", | ||
1525 | vmovss_2 = "rx/od:F30Fu10rM|xr/do:F30Fu11Rm", | ||
1526 | vmovss_3 = "rrro:F30FV10rM", | ||
1527 | vmovupd_2 = "rmoy:660Fu10rM|mroy:660Fu11Rm", | ||
1528 | vmovups_2 = "rmoy:0Fu10rM|mroy:0Fu11Rm", | ||
1529 | vorpd_3 = "rrmoy:660FV56rM", | ||
1530 | vorps_3 = "rrmoy:0FV56rM", | ||
1531 | vpermilpd_3 = "rrmoy:660F38V0DrM|rmioy:660F3Au05rMU", | ||
1532 | vpermilps_3 = "rrmoy:660F38V0CrM|rmioy:660F3Au04rMU", | ||
1533 | vperm2f128_4 = "rrmiy:660F3AV06rMU", | ||
1534 | vptestpd_2 = "rmoy:660F38u0FrM", | ||
1535 | vptestps_2 = "rmoy:660F38u0ErM", | ||
1536 | vrcpps_2 = "rmoy:0Fu53rM", | ||
1537 | vrcpss_3 = "rrro:F30FV53rM|rrx/ood:", | ||
1538 | vrsqrtps_2 = "rmoy:0Fu52rM", | ||
1539 | vrsqrtss_3 = "rrro:F30FV52rM|rrx/ood:", | ||
1540 | vroundpd_3 = "rmioy:660F3Au09rMU", | ||
1541 | vroundps_3 = "rmioy:660F3Au08rMU", | ||
1542 | vroundsd_4 = "rrrio:660F3AV0BrMU|rrxi/ooq:", | ||
1543 | vroundss_4 = "rrrio:660F3AV0ArMU|rrxi/ood:", | ||
1544 | vshufpd_4 = "rrmioy:660FVC6rMU", | ||
1545 | vshufps_4 = "rrmioy:0FVC6rMU", | ||
1546 | vsqrtps_2 = "rmoy:0Fu51rM", | ||
1547 | vsqrtss_2 = "rro:F30Fu51rM|rx/od:", | ||
1548 | vsqrtpd_2 = "rmoy:660Fu51rM", | ||
1549 | vsqrtsd_2 = "rro:F20Fu51rM|rx/oq:", | ||
1550 | vstmxcsr_1 = "xd:0FuAE3m", | ||
1551 | vucomisd_2 = "rro:660Fu2ErM|rx/oq:", | ||
1552 | vucomiss_2 = "rro:0Fu2ErM|rx/od:", | ||
1553 | vunpckhpd_3 = "rrmoy:660FV15rM", | ||
1554 | vunpckhps_3 = "rrmoy:0FV15rM", | ||
1555 | vunpcklpd_3 = "rrmoy:660FV14rM", | ||
1556 | vunpcklps_3 = "rrmoy:0FV14rM", | ||
1557 | vxorpd_3 = "rrmoy:660FV57rM", | ||
1558 | vxorps_3 = "rrmoy:0FV57rM", | ||
1559 | vzeroall_0 = "0FuL77", | ||
1560 | vzeroupper_0 = "0Fu77", | ||
1561 | |||
1562 | -- AVX2 FP ops | ||
1563 | vbroadcastss_2 = "rx/od:660F38u18rM|rx/yd:|rro:|rr/yo:", | ||
1564 | vbroadcastsd_2 = "rx/yq:660F38u19rM|rr/yo:", | ||
1565 | -- *vgather* (!vsib) | ||
1566 | vpermpd_3 = "rmiy:660F3AuX01rMU", | ||
1567 | vpermps_3 = "rrmy:660F38V16rM", | ||
1568 | |||
1569 | -- AVX, AVX2 integer ops | ||
1570 | -- In general, xmm requires AVX, ymm requires AVX2. | ||
1571 | vaesdec_3 = "rrmo:660F38VDErM", | ||
1572 | vaesdeclast_3 = "rrmo:660F38VDFrM", | ||
1573 | vaesenc_3 = "rrmo:660F38VDCrM", | ||
1574 | vaesenclast_3 = "rrmo:660F38VDDrM", | ||
1575 | vaesimc_2 = "rmo:660F38uDBrM", | ||
1576 | vaeskeygenassist_3 = "rmio:660F3AuDFrMU", | ||
1577 | vlddqu_2 = "rxoy:F20FuF0rM", | ||
1578 | vmaskmovdqu_2 = "rro:660FuF7rM", | ||
1579 | vmovdqa_2 = "rmoy:660Fu6FrM|mroy:660Fu7FRm", | ||
1580 | vmovdqu_2 = "rmoy:F30Fu6FrM|mroy:F30Fu7FRm", | ||
1581 | vmovntdq_2 = "xroy:660FuE7Rm", | ||
1582 | vmovntdqa_2 = "rxoy:660F38u2ArM", | ||
1583 | vmpsadbw_4 = "rrmioy:660F3AV42rMU", | ||
1584 | vpabsb_2 = "rmoy:660F38u1CrM", | ||
1585 | vpabsd_2 = "rmoy:660F38u1ErM", | ||
1586 | vpabsw_2 = "rmoy:660F38u1DrM", | ||
1587 | vpackusdw_3 = "rrmoy:660F38V2BrM", | ||
1588 | vpalignr_4 = "rrmioy:660F3AV0FrMU", | ||
1589 | vpblendvb_4 = "rrmroy:660F3AV4CrMs", | ||
1590 | vpblendw_4 = "rrmioy:660F3AV0ErMU", | ||
1591 | vpclmulqdq_4 = "rrmio:660F3AV44rMU", | ||
1592 | vpcmpeqq_3 = "rrmoy:660F38V29rM", | ||
1593 | vpcmpestri_3 = "rmio:660F3Au61rMU", | ||
1594 | vpcmpestrm_3 = "rmio:660F3Au60rMU", | ||
1595 | vpcmpgtq_3 = "rrmoy:660F38V37rM", | ||
1596 | vpcmpistri_3 = "rmio:660F3Au63rMU", | ||
1597 | vpcmpistrm_3 = "rmio:660F3Au62rMU", | ||
1598 | vpextrb_3 = "rri/do:660F3Au14nRmU|rri/qo:|xri/bo:", | ||
1599 | vpextrw_3 = "rri/do:660FuC5rMU|xri/wo:660F3Au15nRmU", | ||
1600 | vpextrd_3 = "mri/do:660F3Au16RmU", | ||
1601 | vpextrq_3 = "mri/qo:660F3Au16RmU", | ||
1602 | vphaddw_3 = "rrmoy:660F38V01rM", | ||
1603 | vphaddd_3 = "rrmoy:660F38V02rM", | ||
1604 | vphaddsw_3 = "rrmoy:660F38V03rM", | ||
1605 | vphminposuw_2 = "rmo:660F38u41rM", | ||
1606 | vphsubw_3 = "rrmoy:660F38V05rM", | ||
1607 | vphsubd_3 = "rrmoy:660F38V06rM", | ||
1608 | vphsubsw_3 = "rrmoy:660F38V07rM", | ||
1609 | vpinsrb_4 = "rrri/ood:660F3AV20rMU|rrxi/oob:", | ||
1610 | vpinsrw_4 = "rrri/ood:660FVC4rMU|rrxi/oow:", | ||
1611 | vpinsrd_4 = "rrmi/ood:660F3AV22rMU", | ||
1612 | vpinsrq_4 = "rrmi/ooq:660F3AVX22rMU", | ||
1613 | vpmaddubsw_3 = "rrmoy:660F38V04rM", | ||
1614 | vpmaxsb_3 = "rrmoy:660F38V3CrM", | ||
1615 | vpmaxsd_3 = "rrmoy:660F38V3DrM", | ||
1616 | vpmaxuw_3 = "rrmoy:660F38V3ErM", | ||
1617 | vpmaxud_3 = "rrmoy:660F38V3FrM", | ||
1618 | vpminsb_3 = "rrmoy:660F38V38rM", | ||
1619 | vpminsd_3 = "rrmoy:660F38V39rM", | ||
1620 | vpminuw_3 = "rrmoy:660F38V3ArM", | ||
1621 | vpminud_3 = "rrmoy:660F38V3BrM", | ||
1622 | vpmovmskb_2 = "rr/do:660FuD7rM|rr/dy:660FuLD7rM", | ||
1623 | vpmovsxbw_2 = "rroy:660F38u20rM|rx/oq:|rx/yo:", | ||
1624 | vpmovsxbd_2 = "rroy:660F38u21rM|rx/od:|rx/yq:", | ||
1625 | vpmovsxbq_2 = "rroy:660F38u22rM|rx/ow:|rx/yd:", | ||
1626 | vpmovsxwd_2 = "rroy:660F38u23rM|rx/oq:|rx/yo:", | ||
1627 | vpmovsxwq_2 = "rroy:660F38u24rM|rx/od:|rx/yq:", | ||
1628 | vpmovsxdq_2 = "rroy:660F38u25rM|rx/oq:|rx/yo:", | ||
1629 | vpmovzxbw_2 = "rroy:660F38u30rM|rx/oq:|rx/yo:", | ||
1630 | vpmovzxbd_2 = "rroy:660F38u31rM|rx/od:|rx/yq:", | ||
1631 | vpmovzxbq_2 = "rroy:660F38u32rM|rx/ow:|rx/yd:", | ||
1632 | vpmovzxwd_2 = "rroy:660F38u33rM|rx/oq:|rx/yo:", | ||
1633 | vpmovzxwq_2 = "rroy:660F38u34rM|rx/od:|rx/yq:", | ||
1634 | vpmovzxdq_2 = "rroy:660F38u35rM|rx/oq:|rx/yo:", | ||
1635 | vpmuldq_3 = "rrmoy:660F38V28rM", | ||
1636 | vpmulhrsw_3 = "rrmoy:660F38V0BrM", | ||
1637 | vpmulld_3 = "rrmoy:660F38V40rM", | ||
1638 | vpshufb_3 = "rrmoy:660F38V00rM", | ||
1639 | vpshufd_3 = "rmioy:660Fu70rMU", | ||
1640 | vpshufhw_3 = "rmioy:F30Fu70rMU", | ||
1641 | vpshuflw_3 = "rmioy:F20Fu70rMU", | ||
1642 | vpsignb_3 = "rrmoy:660F38V08rM", | ||
1643 | vpsignw_3 = "rrmoy:660F38V09rM", | ||
1644 | vpsignd_3 = "rrmoy:660F38V0ArM", | ||
1645 | vpslldq_3 = "rrioy:660Fv737mU", | ||
1646 | vpsllw_3 = "rrmoy:660FVF1rM|rrioy:660Fv716mU", | ||
1647 | vpslld_3 = "rrmoy:660FVF2rM|rrioy:660Fv726mU", | ||
1648 | vpsllq_3 = "rrmoy:660FVF3rM|rrioy:660Fv736mU", | ||
1649 | vpsraw_3 = "rrmoy:660FVE1rM|rrioy:660Fv714mU", | ||
1650 | vpsrad_3 = "rrmoy:660FVE2rM|rrioy:660Fv724mU", | ||
1651 | vpsrldq_3 = "rrioy:660Fv733mU", | ||
1652 | vpsrlw_3 = "rrmoy:660FVD1rM|rrioy:660Fv712mU", | ||
1653 | vpsrld_3 = "rrmoy:660FVD2rM|rrioy:660Fv722mU", | ||
1654 | vpsrlq_3 = "rrmoy:660FVD3rM|rrioy:660Fv732mU", | ||
1655 | vptest_2 = "rmoy:660F38u17rM", | ||
1656 | |||
1657 | -- AVX2 integer ops | ||
1658 | vbroadcasti128_2 = "rx/yo:660F38u5ArM", | ||
1659 | vinserti128_4 = "rrmi/yyo:660F3AV38rMU", | ||
1660 | vextracti128_3 = "mri/oy:660F3AuL39RmU", | ||
1661 | vpblendd_4 = "rrmioy:660F3AV02rMU", | ||
1662 | vpbroadcastb_2 = "rro:660F38u78rM|rx/ob:|rr/yo:|rx/yb:", | ||
1663 | vpbroadcastw_2 = "rro:660F38u79rM|rx/ow:|rr/yo:|rx/yw:", | ||
1664 | vpbroadcastd_2 = "rro:660F38u58rM|rx/od:|rr/yo:|rx/yd:", | ||
1665 | vpbroadcastq_2 = "rro:660F38u59rM|rx/oq:|rr/yo:|rx/yq:", | ||
1666 | vpermd_3 = "rrmy:660F38V36rM", | ||
1667 | vpermq_3 = "rmiy:660F3AuX00rMU", | ||
1668 | -- *vpgather* (!vsib) | ||
1669 | vperm2i128_4 = "rrmiy:660F3AV46rMU", | ||
1670 | vpmaskmovd_3 = "rrxoy:660F38V8CrM|xrroy:660F38V8ERm", | ||
1671 | vpmaskmovq_3 = "rrxoy:660F38VX8CrM|xrroy:660F38VX8ERm", | ||
1672 | vpsllvd_3 = "rrmoy:660F38V47rM", | ||
1673 | vpsllvq_3 = "rrmoy:660F38VX47rM", | ||
1674 | vpsravd_3 = "rrmoy:660F38V46rM", | ||
1675 | vpsrlvd_3 = "rrmoy:660F38V45rM", | ||
1676 | vpsrlvq_3 = "rrmoy:660F38VX45rM", | ||
1677 | |||
1678 | -- Intel ADX | ||
1679 | adcx_2 = "rmqd:660F38F6rM", | ||
1680 | adox_2 = "rmqd:F30F38F6rM", | ||
1681 | |||
1682 | -- BMI1 | ||
1683 | andn_3 = "rrmqd:0F38VF2rM", | ||
1684 | bextr_3 = "rmrqd:0F38wF7rM", | ||
1685 | blsi_2 = "rmqd:0F38vF33m", | ||
1686 | blsmsk_2 = "rmqd:0F38vF32m", | ||
1687 | blsr_2 = "rmqd:0F38vF31m", | ||
1688 | tzcnt_2 = "rmqdw:F30FBCrM", | ||
1689 | |||
1690 | -- BMI2 | ||
1691 | bzhi_3 = "rmrqd:0F38wF5rM", | ||
1692 | mulx_3 = "rrmqd:F20F38VF6rM", | ||
1693 | pdep_3 = "rrmqd:F20F38VF5rM", | ||
1694 | pext_3 = "rrmqd:F30F38VF5rM", | ||
1695 | rorx_3 = "rmSqd:F20F3AuF0rMS", | ||
1696 | sarx_3 = "rmrqd:F30F38wF7rM", | ||
1697 | shrx_3 = "rmrqd:F20F38wF7rM", | ||
1698 | shlx_3 = "rmrqd:660F38wF7rM", | ||
1699 | |||
1700 | -- FMA3 | ||
1701 | vfmaddsub132pd_3 = "rrmoy:660F38VX96rM", | ||
1702 | vfmaddsub132ps_3 = "rrmoy:660F38V96rM", | ||
1703 | vfmaddsub213pd_3 = "rrmoy:660F38VXA6rM", | ||
1704 | vfmaddsub213ps_3 = "rrmoy:660F38VA6rM", | ||
1705 | vfmaddsub231pd_3 = "rrmoy:660F38VXB6rM", | ||
1706 | vfmaddsub231ps_3 = "rrmoy:660F38VB6rM", | ||
1707 | |||
1708 | vfmsubadd132pd_3 = "rrmoy:660F38VX97rM", | ||
1709 | vfmsubadd132ps_3 = "rrmoy:660F38V97rM", | ||
1710 | vfmsubadd213pd_3 = "rrmoy:660F38VXA7rM", | ||
1711 | vfmsubadd213ps_3 = "rrmoy:660F38VA7rM", | ||
1712 | vfmsubadd231pd_3 = "rrmoy:660F38VXB7rM", | ||
1713 | vfmsubadd231ps_3 = "rrmoy:660F38VB7rM", | ||
1714 | |||
1715 | vfmadd132pd_3 = "rrmoy:660F38VX98rM", | ||
1716 | vfmadd132ps_3 = "rrmoy:660F38V98rM", | ||
1717 | vfmadd132sd_3 = "rrro:660F38VX99rM|rrx/ooq:", | ||
1718 | vfmadd132ss_3 = "rrro:660F38V99rM|rrx/ood:", | ||
1719 | vfmadd213pd_3 = "rrmoy:660F38VXA8rM", | ||
1720 | vfmadd213ps_3 = "rrmoy:660F38VA8rM", | ||
1721 | vfmadd213sd_3 = "rrro:660F38VXA9rM|rrx/ooq:", | ||
1722 | vfmadd213ss_3 = "rrro:660F38VA9rM|rrx/ood:", | ||
1723 | vfmadd231pd_3 = "rrmoy:660F38VXB8rM", | ||
1724 | vfmadd231ps_3 = "rrmoy:660F38VB8rM", | ||
1725 | vfmadd231sd_3 = "rrro:660F38VXB9rM|rrx/ooq:", | ||
1726 | vfmadd231ss_3 = "rrro:660F38VB9rM|rrx/ood:", | ||
1727 | |||
1728 | vfmsub132pd_3 = "rrmoy:660F38VX9ArM", | ||
1729 | vfmsub132ps_3 = "rrmoy:660F38V9ArM", | ||
1730 | vfmsub132sd_3 = "rrro:660F38VX9BrM|rrx/ooq:", | ||
1731 | vfmsub132ss_3 = "rrro:660F38V9BrM|rrx/ood:", | ||
1732 | vfmsub213pd_3 = "rrmoy:660F38VXAArM", | ||
1733 | vfmsub213ps_3 = "rrmoy:660F38VAArM", | ||
1734 | vfmsub213sd_3 = "rrro:660F38VXABrM|rrx/ooq:", | ||
1735 | vfmsub213ss_3 = "rrro:660F38VABrM|rrx/ood:", | ||
1736 | vfmsub231pd_3 = "rrmoy:660F38VXBArM", | ||
1737 | vfmsub231ps_3 = "rrmoy:660F38VBArM", | ||
1738 | vfmsub231sd_3 = "rrro:660F38VXBBrM|rrx/ooq:", | ||
1739 | vfmsub231ss_3 = "rrro:660F38VBBrM|rrx/ood:", | ||
1740 | |||
1741 | vfnmadd132pd_3 = "rrmoy:660F38VX9CrM", | ||
1742 | vfnmadd132ps_3 = "rrmoy:660F38V9CrM", | ||
1743 | vfnmadd132sd_3 = "rrro:660F38VX9DrM|rrx/ooq:", | ||
1744 | vfnmadd132ss_3 = "rrro:660F38V9DrM|rrx/ood:", | ||
1745 | vfnmadd213pd_3 = "rrmoy:660F38VXACrM", | ||
1746 | vfnmadd213ps_3 = "rrmoy:660F38VACrM", | ||
1747 | vfnmadd213sd_3 = "rrro:660F38VXADrM|rrx/ooq:", | ||
1748 | vfnmadd213ss_3 = "rrro:660F38VADrM|rrx/ood:", | ||
1749 | vfnmadd231pd_3 = "rrmoy:660F38VXBCrM", | ||
1750 | vfnmadd231ps_3 = "rrmoy:660F38VBCrM", | ||
1751 | vfnmadd231sd_3 = "rrro:660F38VXBDrM|rrx/ooq:", | ||
1752 | vfnmadd231ss_3 = "rrro:660F38VBDrM|rrx/ood:", | ||
1753 | |||
1754 | vfnmsub132pd_3 = "rrmoy:660F38VX9ErM", | ||
1755 | vfnmsub132ps_3 = "rrmoy:660F38V9ErM", | ||
1756 | vfnmsub132sd_3 = "rrro:660F38VX9FrM|rrx/ooq:", | ||
1757 | vfnmsub132ss_3 = "rrro:660F38V9FrM|rrx/ood:", | ||
1758 | vfnmsub213pd_3 = "rrmoy:660F38VXAErM", | ||
1759 | vfnmsub213ps_3 = "rrmoy:660F38VAErM", | ||
1760 | vfnmsub213sd_3 = "rrro:660F38VXAFrM|rrx/ooq:", | ||
1761 | vfnmsub213ss_3 = "rrro:660F38VAFrM|rrx/ood:", | ||
1762 | vfnmsub231pd_3 = "rrmoy:660F38VXBErM", | ||
1763 | vfnmsub231ps_3 = "rrmoy:660F38VBErM", | ||
1764 | vfnmsub231sd_3 = "rrro:660F38VXBFrM|rrx/ooq:", | ||
1765 | vfnmsub231ss_3 = "rrro:660F38VBFrM|rrx/ood:", | ||
1416 | } | 1766 | } |
1417 | 1767 | ||
1418 | ------------------------------------------------------------------------------ | 1768 | ------------------------------------------------------------------------------ |
@@ -1463,28 +1813,58 @@ for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do | |||
1463 | map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+ | 1813 | map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+ |
1464 | end | 1814 | end |
1465 | 1815 | ||
1466 | -- SSE FP arithmetic ops. | 1816 | -- SSE / AVX FP arithmetic ops. |
1467 | for name,n in pairs{ sqrt = 1, add = 8, mul = 9, | 1817 | for name,n in pairs{ sqrt = 1, add = 8, mul = 9, |
1468 | sub = 12, min = 13, div = 14, max = 15 } do | 1818 | sub = 12, min = 13, div = 14, max = 15 } do |
1469 | map_op[name.."ps_2"] = format("rmo:0F5%XrM", n) | 1819 | map_op[name.."ps_2"] = format("rmo:0F5%XrM", n) |
1470 | map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n) | 1820 | map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n) |
1471 | map_op[name.."pd_2"] = format("rmo:660F5%XrM", n) | 1821 | map_op[name.."pd_2"] = format("rmo:660F5%XrM", n) |
1472 | map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n) | 1822 | map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n) |
1823 | if n ~= 1 then | ||
1824 | map_op["v"..name.."ps_3"] = format("rrmoy:0FV5%XrM", n) | ||
1825 | map_op["v"..name.."ss_3"] = format("rrro:F30FV5%XrM|rrx/ood:", n) | ||
1826 | map_op["v"..name.."pd_3"] = format("rrmoy:660FV5%XrM", n) | ||
1827 | map_op["v"..name.."sd_3"] = format("rrro:F20FV5%XrM|rrx/ooq:", n) | ||
1828 | end | ||
1829 | end | ||
1830 | |||
1831 | -- SSE2 / AVX / AVX2 integer arithmetic ops (66 0F leaf). | ||
1832 | for name,n in pairs{ | ||
1833 | paddb = 0xFC, paddw = 0xFD, paddd = 0xFE, paddq = 0xD4, | ||
1834 | paddsb = 0xEC, paddsw = 0xED, packssdw = 0x6B, | ||
1835 | packsswb = 0x63, packuswb = 0x67, paddusb = 0xDC, | ||
1836 | paddusw = 0xDD, pand = 0xDB, pandn = 0xDF, pavgb = 0xE0, | ||
1837 | pavgw = 0xE3, pcmpeqb = 0x74, pcmpeqd = 0x76, | ||
1838 | pcmpeqw = 0x75, pcmpgtb = 0x64, pcmpgtd = 0x66, | ||
1839 | pcmpgtw = 0x65, pmaddwd = 0xF5, pmaxsw = 0xEE, | ||
1840 | pmaxub = 0xDE, pminsw = 0xEA, pminub = 0xDA, | ||
1841 | pmulhuw = 0xE4, pmulhw = 0xE5, pmullw = 0xD5, | ||
1842 | pmuludq = 0xF4, por = 0xEB, psadbw = 0xF6, psubb = 0xF8, | ||
1843 | psubw = 0xF9, psubd = 0xFA, psubq = 0xFB, psubsb = 0xE8, | ||
1844 | psubsw = 0xE9, psubusb = 0xD8, psubusw = 0xD9, | ||
1845 | punpckhbw = 0x68, punpckhwd = 0x69, punpckhdq = 0x6A, | ||
1846 | punpckhqdq = 0x6D, punpcklbw = 0x60, punpcklwd = 0x61, | ||
1847 | punpckldq = 0x62, punpcklqdq = 0x6C, pxor = 0xEF | ||
1848 | } do | ||
1849 | map_op[name.."_2"] = format("rmo:660F%02XrM", n) | ||
1850 | map_op["v"..name.."_3"] = format("rrmoy:660FV%02XrM", n) | ||
1473 | end | 1851 | end |
1474 | 1852 | ||
1475 | ------------------------------------------------------------------------------ | 1853 | ------------------------------------------------------------------------------ |
1476 | 1854 | ||
1855 | local map_vexarg = { u = false, v = 1, V = 2, w = 3 } | ||
1856 | |||
1477 | -- Process pattern string. | 1857 | -- Process pattern string. |
1478 | local function dopattern(pat, args, sz, op, needrex) | 1858 | local function dopattern(pat, args, sz, op, needrex) |
1479 | local digit, addin | 1859 | local digit, addin, vex |
1480 | local opcode = 0 | 1860 | local opcode = 0 |
1481 | local szov = sz | 1861 | local szov = sz |
1482 | local narg = 1 | 1862 | local narg = 1 |
1483 | local rex = 0 | 1863 | local rex = 0 |
1484 | 1864 | ||
1485 | -- Limit number of section buffer positions used by a single dasm_put(). | 1865 | -- Limit number of section buffer positions used by a single dasm_put(). |
1486 | -- A single opcode needs a maximum of 5 positions. | 1866 | -- A single opcode needs a maximum of 6 positions. |
1487 | if secpos+5 > maxsecpos then wflush() end | 1867 | if secpos+6 > maxsecpos then wflush() end |
1488 | 1868 | ||
1489 | -- Process each character. | 1869 | -- Process each character. |
1490 | for c in gmatch(pat.."|", ".") do | 1870 | for c in gmatch(pat.."|", ".") do |
@@ -1498,6 +1878,8 @@ local function dopattern(pat, args, sz, op, needrex) | |||
1498 | szov = nil | 1878 | szov = nil |
1499 | elseif c == "X" then -- Force REX.W. | 1879 | elseif c == "X" then -- Force REX.W. |
1500 | rex = 8 | 1880 | rex = 8 |
1881 | elseif c == "L" then -- Force VEX.L. | ||
1882 | vex.l = true | ||
1501 | elseif c == "r" then -- Merge 1st operand regno. into opcode. | 1883 | elseif c == "r" then -- Merge 1st operand regno. into opcode. |
1502 | addin = args[1]; opcode = opcode + (addin.reg % 8) | 1884 | addin = args[1]; opcode = opcode + (addin.reg % 8) |
1503 | if narg < 2 then narg = 2 end | 1885 | if narg < 2 then narg = 2 end |
@@ -1521,21 +1903,42 @@ local function dopattern(pat, args, sz, op, needrex) | |||
1521 | if t.xreg and t.xreg > 7 then rex = rex + 2 end | 1903 | if t.xreg and t.xreg > 7 then rex = rex + 2 end |
1522 | if s > 7 then rex = rex + 4 end | 1904 | if s > 7 then rex = rex + 4 end |
1523 | if needrex then rex = rex + 16 end | 1905 | if needrex then rex = rex + 16 end |
1524 | wputop(szov, opcode, rex); opcode = nil | 1906 | local psz, sk = wputop(szov, opcode, rex, vex, s < 0, t.vreg or t.vxreg) |
1907 | opcode = nil | ||
1525 | local imark = sub(pat, -1) -- Force a mark (ugly). | 1908 | local imark = sub(pat, -1) -- Force a mark (ugly). |
1526 | -- Put ModRM/SIB with regno/last digit as spare. | 1909 | -- Put ModRM/SIB with regno/last digit as spare. |
1527 | wputmrmsib(t, imark, s, addin and addin.vreg) | 1910 | wputmrmsib(t, imark, s, addin and addin.vreg, psz, sk) |
1528 | addin = nil | 1911 | addin = nil |
1912 | elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix | ||
1913 | local b = band(opcode, 255); opcode = shr(opcode, 8) | ||
1914 | local m = 1 | ||
1915 | if b == 0x38 then m = 2 | ||
1916 | elseif b == 0x3a then m = 3 end | ||
1917 | if m ~= 1 then b = band(opcode, 255); opcode = shr(opcode, 8) end | ||
1918 | if b ~= 0x0f then | ||
1919 | werror("expected `0F', `0F38', or `0F3A' to precede `"..c.. | ||
1920 | "' in pattern `"..pat.."' for `"..op.."'") | ||
1921 | end | ||
1922 | local v = map_vexarg[c] | ||
1923 | if v then v = remove(args, v) end | ||
1924 | b = band(opcode, 255) | ||
1925 | local p = 0 | ||
1926 | if b == 0x66 then p = 1 | ||
1927 | elseif b == 0xf3 then p = 2 | ||
1928 | elseif b == 0xf2 then p = 3 end | ||
1929 | if p ~= 0 then opcode = shr(opcode, 8) end | ||
1930 | if opcode ~= 0 then wputop(nil, opcode, 0); opcode = 0 end | ||
1931 | vex = { m = m, p = p, v = v } | ||
1529 | else | 1932 | else |
1530 | if opcode then -- Flush opcode. | 1933 | if opcode then -- Flush opcode. |
1531 | if szov == "q" and rex == 0 then rex = rex + 8 end | 1934 | if szov == "q" and rex == 0 then rex = rex + 8 end |
1532 | if needrex then rex = rex + 16 end | 1935 | if needrex then rex = rex + 16 end |
1533 | if addin and addin.reg == -1 then | 1936 | if addin and addin.reg == -1 then |
1534 | wputop(szov, opcode - 7, rex) | 1937 | local psz, sk = wputop(szov, opcode - 7, rex, vex, true) |
1535 | waction("VREG", addin.vreg); wputxb(0) | 1938 | wvreg("opcode", addin.vreg, psz, sk) |
1536 | else | 1939 | else |
1537 | if addin and addin.reg > 7 then rex = rex + 1 end | 1940 | if addin and addin.reg > 7 then rex = rex + 1 end |
1538 | wputop(szov, opcode, rex) | 1941 | wputop(szov, opcode, rex, vex) |
1539 | end | 1942 | end |
1540 | opcode = nil | 1943 | opcode = nil |
1541 | end | 1944 | end |
@@ -1572,6 +1975,14 @@ local function dopattern(pat, args, sz, op, needrex) | |||
1572 | else | 1975 | else |
1573 | wputlabel("REL_", imm, 2) | 1976 | wputlabel("REL_", imm, 2) |
1574 | end | 1977 | end |
1978 | elseif c == "s" then | ||
1979 | local reg = a.reg | ||
1980 | if reg < 0 then | ||
1981 | wputb(0) | ||
1982 | wvreg("imm.hi", a.vreg) | ||
1983 | else | ||
1984 | wputb(shl(reg, 4)) | ||
1985 | end | ||
1575 | else | 1986 | else |
1576 | werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'") | 1987 | werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'") |
1577 | end | 1988 | end |
@@ -1648,11 +2059,14 @@ map_op[".template__"] = function(params, template, nparams) | |||
1648 | if pat == "" then pat = lastpat else lastpat = pat end | 2059 | if pat == "" then pat = lastpat else lastpat = pat end |
1649 | if matchtm(tm, args) then | 2060 | if matchtm(tm, args) then |
1650 | local prefix = sub(szm, 1, 1) | 2061 | local prefix = sub(szm, 1, 1) |
1651 | if prefix == "/" then -- Match both operand sizes. | 2062 | if prefix == "/" then -- Exactly match leading operand sizes. |
1652 | if args[1].opsize == sub(szm, 2, 2) and | 2063 | for i = #szm,1,-1 do |
1653 | args[2].opsize == sub(szm, 3, 3) then | 2064 | if i == 1 then |
1654 | dopattern(pat, args, sz, params.op, needrex) -- Process pattern. | 2065 | dopattern(pat, args, sz, params.op, needrex) -- Process pattern. |
1655 | return | 2066 | return |
2067 | elseif args[i-1].opsize ~= sub(szm, i, i) then | ||
2068 | break | ||
2069 | end | ||
1656 | end | 2070 | end |
1657 | else -- Match common operand size. | 2071 | else -- Match common operand size. |
1658 | local szp = sz | 2072 | local szp = sz |
@@ -1717,8 +2131,8 @@ if x64 then | |||
1717 | rex = a.reg > 7 and 9 or 8 | 2131 | rex = a.reg > 7 and 9 or 8 |
1718 | end | 2132 | end |
1719 | end | 2133 | end |
1720 | wputop(sz, opcode, rex) | 2134 | local psz, sk = wputop(sz, opcode, rex, nil, vreg) |
1721 | if vreg then waction("VREG", vreg); wputxb(0) end | 2135 | wvreg("opcode", vreg, psz, sk) |
1722 | waction("IMM_D", format("(unsigned int)(%s)", op64)) | 2136 | waction("IMM_D", format("(unsigned int)(%s)", op64)) |
1723 | waction("IMM_D", format("(unsigned int)((%s)>>32)", op64)) | 2137 | waction("IMM_D", format("(unsigned int)((%s)>>32)", op64)) |
1724 | end | 2138 | end |
diff --git a/dynasm/dynasm.lua b/dynasm/dynasm.lua index 5fda425b..8e85af24 100644 --- a/dynasm/dynasm.lua +++ b/dynasm/dynasm.lua | |||
@@ -10,9 +10,9 @@ | |||
10 | local _info = { | 10 | local _info = { |
11 | name = "DynASM", | 11 | name = "DynASM", |
12 | description = "A dynamic assembler for code generation engines", | 12 | description = "A dynamic assembler for code generation engines", |
13 | version = "1.3.0", | 13 | version = "1.4.0", |
14 | vernum = 10300, | 14 | vernum = 10400, |
15 | release = "2011-05-05", | 15 | release = "2015-10-18", |
16 | author = "Mike Pall", | 16 | author = "Mike Pall", |
17 | url = "http://luajit.org/dynasm.html", | 17 | url = "http://luajit.org/dynasm.html", |
18 | license = "MIT", | 18 | license = "MIT", |
@@ -630,6 +630,7 @@ end | |||
630 | -- Load architecture-specific module. | 630 | -- Load architecture-specific module. |
631 | local function loadarch(arch) | 631 | local function loadarch(arch) |
632 | if not match(arch, "^[%w_]+$") then return "bad arch name" end | 632 | if not match(arch, "^[%w_]+$") then return "bad arch name" end |
633 | _G._map_def = map_def | ||
633 | local ok, m_arch = pcall(require, "dasm_"..arch) | 634 | local ok, m_arch = pcall(require, "dasm_"..arch) |
634 | if not ok then return "cannot load module: "..m_arch end | 635 | if not ok then return "cannot load module: "..m_arch end |
635 | g_arch = m_arch | 636 | g_arch = m_arch |
diff --git a/etc/luajit.pc b/etc/luajit.pc index 36840ab8..a78f1746 100644 --- a/etc/luajit.pc +++ b/etc/luajit.pc | |||
@@ -1,8 +1,8 @@ | |||
1 | # Package information for LuaJIT to be used by pkg-config. | 1 | # Package information for LuaJIT to be used by pkg-config. |
2 | majver=2 | 2 | majver=2 |
3 | minver=0 | 3 | minver=1 |
4 | relver=5 | 4 | relver=0 |
5 | version=${majver}.${minver}.${relver} | 5 | version=${majver}.${minver}.${relver}-beta3 |
6 | abiver=5.1 | 6 | abiver=5.1 |
7 | 7 | ||
8 | prefix=/usr/local | 8 | prefix=/usr/local |
diff --git a/src/.gitignore b/src/.gitignore index fc94e82c..1a30573c 100644 --- a/src/.gitignore +++ b/src/.gitignore | |||
@@ -4,4 +4,4 @@ lj_ffdef.h | |||
4 | lj_libdef.h | 4 | lj_libdef.h |
5 | lj_recdef.h | 5 | lj_recdef.h |
6 | lj_folddef.h | 6 | lj_folddef.h |
7 | lj_vm.s | 7 | lj_vm.[sS] |
diff --git a/src/Makefile b/src/Makefile index a588dc3d..77e0d537 100644 --- a/src/Makefile +++ b/src/Makefile | |||
@@ -11,8 +11,8 @@ | |||
11 | ############################################################################## | 11 | ############################################################################## |
12 | 12 | ||
13 | MAJVER= 2 | 13 | MAJVER= 2 |
14 | MINVER= 0 | 14 | MINVER= 1 |
15 | RELVER= 5 | 15 | RELVER= 0 |
16 | ABIVER= 5.1 | 16 | ABIVER= 5.1 |
17 | NODOTABIVER= 51 | 17 | NODOTABIVER= 51 |
18 | 18 | ||
@@ -44,17 +44,14 @@ CCOPT= -O2 -fomit-frame-pointer | |||
44 | # | 44 | # |
45 | # Target-specific compiler options: | 45 | # Target-specific compiler options: |
46 | # | 46 | # |
47 | # x86 only: it's recommended to compile at least for i686. Better yet, | ||
48 | # compile for an architecture that has SSE2, too (-msse -msse2). | ||
49 | # | ||
50 | # x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute | 47 | # x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute |
51 | # the binaries to a different machine you could also use: -march=native | 48 | # the binaries to a different machine you could also use: -march=native |
52 | # | 49 | # |
53 | CCOPT_x86= -march=i686 | 50 | CCOPT_x86= -march=i686 -msse -msse2 -mfpmath=sse |
54 | CCOPT_x64= | 51 | CCOPT_x64= |
55 | CCOPT_arm= | 52 | CCOPT_arm= |
53 | CCOPT_arm64= | ||
56 | CCOPT_ppc= | 54 | CCOPT_ppc= |
57 | CCOPT_ppcspe= | ||
58 | CCOPT_mips= | 55 | CCOPT_mips= |
59 | # | 56 | # |
60 | CCDEBUG= | 57 | CCDEBUG= |
@@ -113,6 +110,9 @@ XCFLAGS= | |||
113 | #XCFLAGS+= -DLUAJIT_NUMMODE=1 | 110 | #XCFLAGS+= -DLUAJIT_NUMMODE=1 |
114 | #XCFLAGS+= -DLUAJIT_NUMMODE=2 | 111 | #XCFLAGS+= -DLUAJIT_NUMMODE=2 |
115 | # | 112 | # |
113 | # Disable LJ_GC64 mode for x64. | ||
114 | #XCFLAGS+= -DLUAJIT_DISABLE_GC64 | ||
115 | # | ||
116 | ############################################################################## | 116 | ############################################################################## |
117 | 117 | ||
118 | ############################################################################## | 118 | ############################################################################## |
@@ -124,8 +124,8 @@ XCFLAGS= | |||
124 | # | 124 | # |
125 | # Use the system provided memory allocator (realloc) instead of the | 125 | # Use the system provided memory allocator (realloc) instead of the |
126 | # bundled memory allocator. This is slower, but sometimes helpful for | 126 | # bundled memory allocator. This is slower, but sometimes helpful for |
127 | # debugging. This option cannot be enabled on x64, since realloc usually | 127 | # debugging. This option cannot be enabled on x64 without GC64, since |
128 | # doesn't return addresses in the right address range. | 128 | # realloc usually doesn't return addresses in the right address range. |
129 | # OTOH this option is mandatory for Valgrind's memcheck tool on x64 and | 129 | # OTOH this option is mandatory for Valgrind's memcheck tool on x64 and |
130 | # the only way to get useful results from it for all other architectures. | 130 | # the only way to get useful results from it for all other architectures. |
131 | #XCFLAGS+= -DLUAJIT_USE_SYSMALLOC | 131 | #XCFLAGS+= -DLUAJIT_USE_SYSMALLOC |
@@ -189,7 +189,8 @@ endif | |||
189 | # make HOST_CC="gcc -m32" CROSS=i586-mingw32msvc- TARGET_SYS=Windows | 189 | # make HOST_CC="gcc -m32" CROSS=i586-mingw32msvc- TARGET_SYS=Windows |
190 | # make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu- | 190 | # make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu- |
191 | 191 | ||
192 | CCOPTIONS= $(CCDEBUG) $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS) | 192 | ASOPTIONS= $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS) |
193 | CCOPTIONS= $(CCDEBUG) $(ASOPTIONS) | ||
193 | LDOPTIONS= $(CCDEBUG) $(LDFLAGS) | 194 | LDOPTIONS= $(CCDEBUG) $(LDFLAGS) |
194 | 195 | ||
195 | HOST_CC= $(CC) | 196 | HOST_CC= $(CC) |
@@ -229,6 +230,7 @@ TARGET_XLDFLAGS= | |||
229 | TARGET_XLIBS= -lm | 230 | TARGET_XLIBS= -lm |
230 | TARGET_TCFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS) | 231 | TARGET_TCFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS) |
231 | TARGET_ACFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS) | 232 | TARGET_ACFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS) |
233 | TARGET_ASFLAGS= $(ASOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS) | ||
232 | TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAGS) | 234 | TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAGS) |
233 | TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS) | 235 | TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS) |
234 | TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS) | 236 | TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS) |
@@ -243,17 +245,29 @@ else | |||
243 | ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH))) | 245 | ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH))) |
244 | TARGET_LJARCH= arm | 246 | TARGET_LJARCH= arm |
245 | else | 247 | else |
248 | ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH))) | ||
249 | ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH))) | ||
250 | TARGET_ARCH= -D__AARCH64EB__=1 | ||
251 | endif | ||
252 | TARGET_LJARCH= arm64 | ||
253 | else | ||
246 | ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH))) | 254 | ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH))) |
255 | ifneq (,$(findstring LJ_LE 1,$(TARGET_TESTARCH))) | ||
256 | TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_LE | ||
257 | else | ||
258 | TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_BE | ||
259 | endif | ||
247 | TARGET_LJARCH= ppc | 260 | TARGET_LJARCH= ppc |
248 | else | 261 | else |
249 | ifneq (,$(findstring LJ_TARGET_PPCSPE ,$(TARGET_TESTARCH))) | ||
250 | TARGET_LJARCH= ppcspe | ||
251 | else | ||
252 | ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH))) | 262 | ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH))) |
253 | ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH))) | 263 | ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH))) |
254 | TARGET_ARCH= -D__MIPSEL__=1 | 264 | TARGET_ARCH= -D__MIPSEL__=1 |
255 | endif | 265 | endif |
256 | TARGET_LJARCH= mips | 266 | ifneq (,$(findstring LJ_TARGET_MIPS64 ,$(TARGET_TESTARCH))) |
267 | TARGET_LJARCH= mips64 | ||
268 | else | ||
269 | TARGET_LJARCH= mips | ||
270 | endif | ||
257 | else | 271 | else |
258 | $(error Unsupported target architecture) | 272 | $(error Unsupported target architecture) |
259 | endif | 273 | endif |
@@ -267,6 +281,7 @@ ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH))) | |||
267 | TARGET_SYS= PS3 | 281 | TARGET_SYS= PS3 |
268 | TARGET_ARCH+= -D__CELLOS_LV2__ | 282 | TARGET_ARCH+= -D__CELLOS_LV2__ |
269 | TARGET_XCFLAGS+= -DLUAJIT_USE_SYSMALLOC | 283 | TARGET_XCFLAGS+= -DLUAJIT_USE_SYSMALLOC |
284 | TARGET_XLIBS+= -lpthread | ||
270 | endif | 285 | endif |
271 | 286 | ||
272 | TARGET_XCFLAGS+= $(CCOPT_$(TARGET_LJARCH)) | 287 | TARGET_XCFLAGS+= $(CCOPT_$(TARGET_LJARCH)) |
@@ -320,6 +335,9 @@ ifeq (iOS,$(TARGET_SYS)) | |||
320 | TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC | 335 | TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC |
321 | TARGET_DYNXLDOPTS= | 336 | TARGET_DYNXLDOPTS= |
322 | TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER) | 337 | TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER) |
338 | ifeq (arm64,$(TARGET_LJARCH)) | ||
339 | TARGET_XCFLAGS+= -fno-omit-frame-pointer | ||
340 | endif | ||
323 | else | 341 | else |
324 | ifneq (SunOS,$(TARGET_SYS)) | 342 | ifneq (SunOS,$(TARGET_SYS)) |
325 | ifneq (PS3,$(TARGET_SYS)) | 343 | ifneq (PS3,$(TARGET_SYS)) |
@@ -380,6 +398,11 @@ DASM_XFLAGS= | |||
380 | DASM_AFLAGS= | 398 | DASM_AFLAGS= |
381 | DASM_ARCH= $(TARGET_LJARCH) | 399 | DASM_ARCH= $(TARGET_LJARCH) |
382 | 400 | ||
401 | ifneq (,$(findstring LJ_LE 1,$(TARGET_TESTARCH))) | ||
402 | DASM_AFLAGS+= -D ENDIAN_LE | ||
403 | else | ||
404 | DASM_AFLAGS+= -D ENDIAN_BE | ||
405 | endif | ||
383 | ifneq (,$(findstring LJ_ARCH_BITS 64,$(TARGET_TESTARCH))) | 406 | ifneq (,$(findstring LJ_ARCH_BITS 64,$(TARGET_TESTARCH))) |
384 | DASM_AFLAGS+= -D P64 | 407 | DASM_AFLAGS+= -D P64 |
385 | endif | 408 | endif |
@@ -412,19 +435,19 @@ DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subs | |||
412 | ifeq (Windows,$(TARGET_SYS)) | 435 | ifeq (Windows,$(TARGET_SYS)) |
413 | DASM_AFLAGS+= -D WIN | 436 | DASM_AFLAGS+= -D WIN |
414 | endif | 437 | endif |
415 | ifeq (x86,$(TARGET_LJARCH)) | ||
416 | ifneq (,$(findstring __SSE2__ 1,$(TARGET_TESTARCH))) | ||
417 | DASM_AFLAGS+= -D SSE | ||
418 | endif | ||
419 | else | ||
420 | ifeq (x64,$(TARGET_LJARCH)) | 438 | ifeq (x64,$(TARGET_LJARCH)) |
421 | DASM_ARCH= x86 | 439 | ifeq (,$(findstring LJ_FR2 1,$(TARGET_TESTARCH))) |
440 | DASM_ARCH= x86 | ||
441 | endif | ||
422 | else | 442 | else |
423 | ifeq (arm,$(TARGET_LJARCH)) | 443 | ifeq (arm,$(TARGET_LJARCH)) |
424 | ifeq (iOS,$(TARGET_SYS)) | 444 | ifeq (iOS,$(TARGET_SYS)) |
425 | DASM_AFLAGS+= -D IOS | 445 | DASM_AFLAGS+= -D IOS |
426 | endif | 446 | endif |
427 | else | 447 | else |
448 | ifneq (,$(findstring LJ_TARGET_MIPSR6 ,$(TARGET_TESTARCH))) | ||
449 | DASM_AFLAGS+= -D MIPSR6 | ||
450 | endif | ||
428 | ifeq (ppc,$(TARGET_LJARCH)) | 451 | ifeq (ppc,$(TARGET_LJARCH)) |
429 | ifneq (,$(findstring LJ_ARCH_SQRT 1,$(TARGET_TESTARCH))) | 452 | ifneq (,$(findstring LJ_ARCH_SQRT 1,$(TARGET_TESTARCH))) |
430 | DASM_AFLAGS+= -D SQRT | 453 | DASM_AFLAGS+= -D SQRT |
@@ -432,7 +455,7 @@ ifeq (ppc,$(TARGET_LJARCH)) | |||
432 | ifneq (,$(findstring LJ_ARCH_ROUND 1,$(TARGET_TESTARCH))) | 455 | ifneq (,$(findstring LJ_ARCH_ROUND 1,$(TARGET_TESTARCH))) |
433 | DASM_AFLAGS+= -D ROUND | 456 | DASM_AFLAGS+= -D ROUND |
434 | endif | 457 | endif |
435 | ifneq (,$(findstring LJ_ARCH_PPC64 1,$(TARGET_TESTARCH))) | 458 | ifneq (,$(findstring LJ_ARCH_PPC32ON64 1,$(TARGET_TESTARCH))) |
436 | DASM_AFLAGS+= -D GPR64 | 459 | DASM_AFLAGS+= -D GPR64 |
437 | endif | 460 | endif |
438 | ifeq (PS3,$(TARGET_SYS)) | 461 | ifeq (PS3,$(TARGET_SYS)) |
@@ -441,7 +464,6 @@ ifeq (ppc,$(TARGET_LJARCH)) | |||
441 | endif | 464 | endif |
442 | endif | 465 | endif |
443 | endif | 466 | endif |
444 | endif | ||
445 | 467 | ||
446 | DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS) | 468 | DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS) |
447 | DASM_DASC= vm_$(DASM_ARCH).dasc | 469 | DASM_DASC= vm_$(DASM_ARCH).dasc |
@@ -454,7 +476,7 @@ BUILDVM_X= $(BUILDVM_T) | |||
454 | HOST_O= $(MINILUA_O) $(BUILDVM_O) | 476 | HOST_O= $(MINILUA_O) $(BUILDVM_O) |
455 | HOST_T= $(MINILUA_T) $(BUILDVM_T) | 477 | HOST_T= $(MINILUA_T) $(BUILDVM_T) |
456 | 478 | ||
457 | LJVM_S= lj_vm.s | 479 | LJVM_S= lj_vm.S |
458 | LJVM_O= lj_vm.o | 480 | LJVM_O= lj_vm.o |
459 | LJVM_BOUT= $(LJVM_S) | 481 | LJVM_BOUT= $(LJVM_S) |
460 | LJVM_MODE= elfasm | 482 | LJVM_MODE= elfasm |
@@ -463,10 +485,11 @@ LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \ | |||
463 | lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o | 485 | lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o |
464 | LJLIB_C= $(LJLIB_O:.o=.c) | 486 | LJLIB_C= $(LJLIB_O:.o=.c) |
465 | 487 | ||
466 | LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o \ | 488 | LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \ |
467 | lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \ | 489 | lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \ |
468 | lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \ | 490 | lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \ |
469 | lj_api.o lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \ | 491 | lj_strfmt.o lj_strfmt_num.o lj_api.o lj_profile.o \ |
492 | lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \ | ||
470 | lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ | 493 | lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ |
471 | lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \ | 494 | lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \ |
472 | lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \ | 495 | lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \ |
@@ -586,6 +609,10 @@ amalg: | |||
586 | clean: | 609 | clean: |
587 | $(HOST_RM) $(ALL_RM) | 610 | $(HOST_RM) $(ALL_RM) |
588 | 611 | ||
612 | libbc: | ||
613 | ./$(LUAJIT_T) host/genlibbc.lua -o host/buildvm_libbc.h $(LJLIB_C) | ||
614 | $(MAKE) all | ||
615 | |||
589 | depend: | 616 | depend: |
590 | @for file in $(ALL_HDRGEN); do \ | 617 | @for file in $(ALL_HDRGEN); do \ |
591 | test -f $$file || touch $$file; \ | 618 | test -f $$file || touch $$file; \ |
@@ -600,7 +627,7 @@ depend: | |||
600 | test -s $$file || $(HOST_RM) $$file; \ | 627 | test -s $$file || $(HOST_RM) $$file; \ |
601 | done | 628 | done |
602 | 629 | ||
603 | .PHONY: default all amalg clean depend | 630 | .PHONY: default all amalg clean libbc depend |
604 | 631 | ||
605 | ############################################################################## | 632 | ############################################################################## |
606 | # Rules for generated files. | 633 | # Rules for generated files. |
@@ -610,7 +637,7 @@ $(MINILUA_T): $(MINILUA_O) | |||
610 | $(E) "HOSTLINK $@" | 637 | $(E) "HOSTLINK $@" |
611 | $(Q)$(HOST_CC) $(HOST_ALDFLAGS) -o $@ $(MINILUA_O) $(MINILUA_LIBS) $(HOST_ALIBS) | 638 | $(Q)$(HOST_CC) $(HOST_ALDFLAGS) -o $@ $(MINILUA_O) $(MINILUA_LIBS) $(HOST_ALIBS) |
612 | 639 | ||
613 | host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP) | 640 | host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP) $(DASM_DIR)/*.lua |
614 | $(E) "DYNASM $@" | 641 | $(E) "DYNASM $@" |
615 | $(Q)$(DASM) $(DASM_FLAGS) -o $@ $(DASM_DASC) | 642 | $(Q)$(DASM) $(DASM_FLAGS) -o $@ $(DASM_DASC) |
616 | 643 | ||
@@ -657,10 +684,10 @@ lj_folddef.h: $(BUILDVM_T) lj_opt_fold.c | |||
657 | $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $< | 684 | $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $< |
658 | $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $< | 685 | $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $< |
659 | 686 | ||
660 | %.o: %.s | 687 | %.o: %.S |
661 | $(E) "ASM $@" | 688 | $(E) "ASM $@" |
662 | $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $< | 689 | $(Q)$(TARGET_DYNCC) $(TARGET_ASFLAGS) -c -o $(@:.o=_dyn.o) $< |
663 | $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $< | 690 | $(Q)$(TARGET_CC) $(TARGET_ASFLAGS) -c -o $@ $< |
664 | 691 | ||
665 | $(LUAJIT_O): | 692 | $(LUAJIT_O): |
666 | $(E) "CC $@" | 693 | $(E) "CC $@" |
diff --git a/src/Makefile.dep b/src/Makefile.dep index 9e14d617..2b1cb5ef 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep | |||
@@ -3,45 +3,49 @@ lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ | |||
3 | lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h lj_alloc.h | 3 | lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h lj_alloc.h |
4 | lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ | 4 | lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ |
5 | lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \ | 5 | lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \ |
6 | lj_tab.h lj_meta.h lj_state.h lj_ctype.h lj_cconv.h lj_bc.h lj_ff.h \ | 6 | lj_tab.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cconv.h \ |
7 | lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \ | 7 | lj_ff.h lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \ |
8 | lj_lib.h lj_libdef.h | 8 | lj_strfmt.h lj_lib.h lj_libdef.h |
9 | lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ | 9 | lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ |
10 | lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_lib.h lj_libdef.h | 10 | lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_strscan.h \ |
11 | lj_strfmt.h lj_ctype.h lj_cdata.h lj_cconv.h lj_carith.h lj_ff.h \ | ||
12 | lj_ffdef.h lj_lib.h lj_libdef.h | ||
11 | lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ | 13 | lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ |
12 | lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \ | 14 | lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \ |
13 | lj_libdef.h | 15 | lj_libdef.h |
14 | lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ | 16 | lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ |
15 | lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h \ | 17 | lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h \ |
16 | lj_ctype.h lj_cparse.h lj_cdata.h lj_cconv.h lj_carith.h lj_ccall.h \ | 18 | lj_ctype.h lj_cparse.h lj_cdata.h lj_cconv.h lj_carith.h lj_ccall.h \ |
17 | lj_ccallback.h lj_clib.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h | 19 | lj_ccallback.h lj_clib.h lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h \ |
20 | lj_libdef.h | ||
18 | lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h | 21 | lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h |
19 | lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ | 22 | lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ |
20 | lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_ff.h \ | 23 | lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_state.h \ |
21 | lj_ffdef.h lj_lib.h lj_libdef.h | 24 | lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h |
22 | lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \ | 25 | lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ |
23 | lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \ | 26 | lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \ |
24 | lj_bc.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_target.h \ | 27 | lj_state.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \ |
25 | lj_target_*.h lj_dispatch.h lj_vm.h lj_vmevent.h lj_lib.h luajit.h \ | 28 | lj_target.h lj_target_*.h lj_trace.h lj_dispatch.h lj_traceerr.h \ |
26 | lj_libdef.h | 29 | lj_vm.h lj_vmevent.h lj_lib.h luajit.h lj_libdef.h |
27 | lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ | 30 | lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ |
28 | lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_libdef.h | 31 | lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_libdef.h |
29 | lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ | 32 | lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ |
30 | lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h | 33 | lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_lib.h \ |
34 | lj_libdef.h | ||
31 | lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ | 35 | lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ |
32 | lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h | 36 | lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h |
33 | lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ | 37 | lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ |
34 | lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h \ | 38 | lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \ |
35 | lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h lj_char.h \ | 39 | lj_tab.h lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h \ |
36 | lj_lib.h lj_libdef.h | 40 | lj_char.h lj_strfmt.h lj_lib.h lj_libdef.h |
37 | lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ | 41 | lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ |
38 | lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_lib.h \ | 42 | lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \ |
39 | lj_libdef.h | 43 | lj_tab.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h |
40 | lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h | 44 | lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h |
41 | lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 45 | lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
42 | lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \ | 46 | lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \ |
43 | lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \ | 47 | lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \ |
44 | lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h | 48 | lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h lj_strfmt.h |
45 | lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 49 | lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
46 | lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \ | 50 | lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \ |
47 | lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \ | 51 | lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \ |
@@ -50,17 +54,20 @@ lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | |||
50 | lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \ | 54 | lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \ |
51 | lj_bcdef.h | 55 | lj_bcdef.h |
52 | lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 56 | lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
53 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_bc.h lj_ctype.h \ | 57 | lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_bc.h \ |
54 | lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h | 58 | lj_ctype.h lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h \ |
59 | lj_strfmt.h | ||
55 | lj_bcwrite.o: lj_bcwrite.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 60 | lj_bcwrite.o: lj_bcwrite.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
56 | lj_gc.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h lj_ir.h \ | 61 | lj_gc.h lj_buf.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h \ |
57 | lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h | 62 | lj_ir.h lj_strfmt.h lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h |
63 | lj_buf.o: lj_buf.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | ||
64 | lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_strfmt.h | ||
58 | lj_carith.o: lj_carith.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 65 | lj_carith.o: lj_carith.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
59 | lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ctype.h lj_cconv.h \ | 66 | lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ir.h lj_ctype.h \ |
60 | lj_cdata.h lj_carith.h | 67 | lj_cconv.h lj_cdata.h lj_carith.h lj_strscan.h |
61 | lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 68 | lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
62 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \ | 69 | lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h \ |
63 | lj_cdata.h lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ | 70 | lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ |
64 | lj_traceerr.h | 71 | lj_traceerr.h |
65 | lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \ | 72 | lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \ |
66 | lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_state.h lj_frame.h \ | 73 | lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_state.h lj_frame.h \ |
@@ -71,107 +78,118 @@ lj_cconv.o: lj_cconv.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | |||
71 | lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \ | 78 | lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \ |
72 | lj_ccallback.h | 79 | lj_ccallback.h |
73 | lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 80 | lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
74 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \ | 81 | lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h |
75 | lj_cdata.h | ||
76 | lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h | 82 | lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h |
77 | lj_clib.o: lj_clib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 83 | lj_clib.o: lj_clib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
78 | lj_err.h lj_errmsg.h lj_tab.h lj_str.h lj_udata.h lj_ctype.h lj_cconv.h \ | 84 | lj_err.h lj_errmsg.h lj_tab.h lj_str.h lj_udata.h lj_ctype.h lj_cconv.h \ |
79 | lj_cdata.h lj_clib.h | 85 | lj_cdata.h lj_clib.h lj_strfmt.h |
80 | lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 86 | lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
81 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ctype.h lj_cparse.h lj_frame.h \ | 87 | lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_ctype.h lj_cparse.h \ |
82 | lj_bc.h lj_vm.h lj_char.h lj_strscan.h | 88 | lj_frame.h lj_bc.h lj_vm.h lj_char.h lj_strscan.h lj_strfmt.h |
83 | lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 89 | lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
84 | lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h \ | 90 | lj_err.h lj_errmsg.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_gc.h \ |
85 | lj_gc.h lj_cdata.h lj_cparse.h lj_cconv.h lj_clib.h lj_ccall.h lj_ff.h \ | 91 | lj_cdata.h lj_cparse.h lj_cconv.h lj_carith.h lj_clib.h lj_ccall.h \ |
86 | lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ | 92 | lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ |
87 | lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \ | 93 | lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \ |
88 | lj_crecord.h | 94 | lj_crecord.h lj_strfmt.h |
89 | lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 95 | lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
90 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_ccallback.h | 96 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_strfmt.h lj_ctype.h \ |
97 | lj_ccallback.h lj_buf.h | ||
91 | lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 98 | lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
92 | lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_state.h lj_frame.h \ | 99 | lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \ |
93 | lj_bc.h lj_vm.h lj_jit.h lj_ir.h | 100 | lj_state.h lj_frame.h lj_bc.h lj_strfmt.h lj_jit.h lj_ir.h |
94 | lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 101 | lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
95 | lj_err.h lj_errmsg.h lj_func.h lj_str.h lj_tab.h lj_meta.h lj_debug.h \ | 102 | lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_func.h lj_tab.h \ |
96 | lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h lj_jit.h lj_ir.h \ | 103 | lj_meta.h lj_debug.h lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h \ |
97 | lj_ccallback.h lj_ctype.h lj_gc.h lj_trace.h lj_dispatch.h lj_traceerr.h \ | 104 | lj_strfmt.h lj_jit.h lj_ir.h lj_ccallback.h lj_ctype.h lj_trace.h \ |
98 | lj_vm.h luajit.h | 105 | lj_dispatch.h lj_traceerr.h lj_profile.h lj_vm.h luajit.h |
99 | lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \ | 106 | lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \ |
100 | lj_errmsg.h lj_debug.h lj_str.h lj_func.h lj_state.h lj_frame.h lj_bc.h \ | 107 | lj_errmsg.h lj_debug.h lj_str.h lj_func.h lj_state.h lj_frame.h lj_bc.h \ |
101 | lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ | 108 | lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ |
102 | lj_traceerr.h lj_vm.h | 109 | lj_traceerr.h lj_vm.h lj_strfmt.h |
103 | lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 110 | lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
104 | lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \ | 111 | lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \ |
105 | lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ | 112 | lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ |
106 | lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \ | 113 | lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \ |
107 | lj_vm.h lj_strscan.h lj_recdef.h | 114 | lj_vm.h lj_strscan.h lj_strfmt.h lj_recdef.h |
108 | lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 115 | lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
109 | lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ | 116 | lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ |
110 | lj_traceerr.h lj_vm.h | 117 | lj_traceerr.h lj_vm.h |
111 | lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 118 | lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
112 | lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h \ | 119 | lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h lj_udata.h \ |
113 | lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h lj_jit.h \ | 120 | lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h \ |
114 | lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h | 121 | lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h |
115 | lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 122 | lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
116 | lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_jit.h \ | 123 | lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_buf.h \ |
117 | lj_ir.h lj_dispatch.h | 124 | lj_str.h lj_strfmt.h lj_jit.h lj_ir.h lj_dispatch.h |
118 | lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 125 | lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
119 | lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ | 126 | lj_buf.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \ |
120 | lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h lj_carith.h \ | 127 | lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h \ |
121 | lj_vm.h lj_strscan.h lj_lib.h | 128 | lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h |
122 | lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 129 | lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
123 | lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h lualib.h \ | 130 | lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h \ |
124 | lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h | 131 | lualib.h lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h \ |
132 | lj_strfmt.h | ||
125 | lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \ | 133 | lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \ |
126 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \ | 134 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \ |
127 | lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_lib.h | 135 | lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lex.h \ |
136 | lj_bcdump.h lj_lib.h | ||
128 | lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ | 137 | lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ |
129 | lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_func.h lj_frame.h \ | 138 | lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_func.h \ |
130 | lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h | 139 | lj_frame.h lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h |
131 | lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 140 | lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
132 | lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \ | 141 | lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \ |
133 | lj_dispatch.h lj_bc.h lj_traceerr.h lj_vm.h | 142 | lj_dispatch.h lj_bc.h lj_traceerr.h lj_vm.h |
134 | lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 143 | lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
135 | lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ | 144 | lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_meta.h lj_frame.h \ |
136 | lj_vm.h lj_strscan.h | 145 | lj_bc.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h |
137 | lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h | 146 | lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h |
138 | lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 147 | lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
139 | lj_ir.h lj_jit.h lj_iropt.h | 148 | lj_ir.h lj_jit.h lj_iropt.h |
140 | lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 149 | lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
141 | lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ | 150 | lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h \ |
142 | lj_bc.h lj_traceerr.h lj_ctype.h lj_gc.h lj_carith.h lj_vm.h \ | 151 | lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h \ |
143 | lj_strscan.h lj_folddef.h | 152 | lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_folddef.h |
144 | lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 153 | lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
145 | lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ | 154 | lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h lj_jit.h \ |
146 | lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h | 155 | lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h \ |
156 | lj_vm.h | ||
147 | lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 157 | lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
148 | lj_tab.h lj_ir.h lj_jit.h lj_iropt.h | 158 | lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_ircall.h |
149 | lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ | 159 | lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ |
150 | lj_arch.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ | 160 | lj_arch.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ |
151 | lj_traceerr.h lj_vm.h lj_strscan.h | 161 | lj_traceerr.h lj_vm.h lj_strscan.h |
152 | lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 162 | lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
153 | lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h | 163 | lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h |
154 | lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ | 164 | lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ |
155 | lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_ircall.h \ | 165 | lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h \ |
156 | lj_iropt.h lj_vm.h | 166 | lj_jit.h lj_ircall.h lj_iropt.h lj_dispatch.h lj_bc.h lj_vm.h |
157 | lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 167 | lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
158 | lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h \ | 168 | lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \ |
159 | lj_state.h lj_bc.h lj_ctype.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h | 169 | lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \ |
170 | lj_vm.h lj_vmevent.h | ||
171 | lj_profile.o: lj_profile.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | ||
172 | lj_buf.h lj_gc.h lj_str.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h \ | ||
173 | lj_jit.h lj_ir.h lj_trace.h lj_traceerr.h lj_profile.h luajit.h | ||
160 | lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 174 | lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
161 | lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ | 175 | lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ |
162 | lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h \ | 176 | lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_debug.h lj_ir.h lj_jit.h \ |
163 | lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h lj_record.h \ | 177 | lj_ircall.h lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h \ |
164 | lj_ffrecord.h lj_snap.h lj_vm.h | 178 | lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h |
165 | lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 179 | lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
166 | lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \ | 180 | lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \ |
167 | lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \ | 181 | lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \ |
168 | lj_target_*.h lj_ctype.h lj_cdata.h | 182 | lj_target_*.h lj_ctype.h lj_cdata.h |
169 | lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 183 | lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
170 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_meta.h \ | 184 | lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h \ |
171 | lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h lj_ir.h \ | 185 | lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h \ |
172 | lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h | 186 | lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h luajit.h |
173 | lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 187 | lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
174 | lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_char.h | 188 | lj_err.h lj_errmsg.h lj_str.h lj_char.h |
189 | lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | ||
190 | lj_buf.h lj_gc.h lj_str.h lj_state.h lj_char.h lj_strfmt.h | ||
191 | lj_strfmt_num.o: lj_strfmt_num.c lj_obj.h lua.h luaconf.h lj_def.h \ | ||
192 | lj_arch.h lj_buf.h lj_gc.h lj_str.h lj_strfmt.h | ||
175 | lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 193 | lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
176 | lj_char.h lj_strscan.h | 194 | lj_char.h lj_strscan.h |
177 | lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 195 | lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
@@ -189,26 +207,27 @@ lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | |||
189 | lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 207 | lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
190 | lj_ir.h lj_vm.h | 208 | lj_ir.h lj_vm.h |
191 | ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \ | 209 | ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \ |
192 | lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h \ | 210 | lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h \ |
193 | lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h \ | 211 | lj_func.h lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h \ |
194 | lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_err.c \ | 212 | lj_cdata.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h \ |
195 | lj_debug.h lj_ff.h lj_ffdef.h lj_char.c lj_char.h lj_bc.c lj_bcdef.h \ | 213 | lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_strfmt.h lj_char.c \ |
196 | lj_obj.c lj_str.c lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h \ | 214 | lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c lj_tab.c \ |
197 | lj_debug.c lj_state.c lj_lex.h lj_alloc.h lj_dispatch.c lj_ccallback.h \ | 215 | lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c \ |
198 | luajit.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c lj_api.c \ | 216 | lj_state.c lj_lex.h lj_alloc.h luajit.h lj_dispatch.c lj_ccallback.h \ |
199 | lj_lex.c lualib.h lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h \ | 217 | lj_profile.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c \ |
200 | lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c \ | 218 | lj_strfmt.c lj_strfmt_num.c lj_api.c lj_profile.c lj_lex.c lualib.h \ |
201 | lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h lj_target_*.h \ | 219 | lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c \ |
202 | lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h lj_cparse.c \ | 220 | lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h \ |
203 | lj_cparse.h lj_lib.c lj_lib.h lj_ir.c lj_ircall.h lj_iropt.h \ | 221 | lj_ccallback.c lj_target.h lj_target_*.h lj_mcode.h lj_carith.c \ |
204 | lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c \ | 222 | lj_carith.h lj_clib.c lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c \ |
205 | lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c \ | 223 | lj_ircall.h lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h \ |
206 | lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \ | 224 | lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c \ |
207 | lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \ | 225 | lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h \ |
208 | lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \ | 226 | lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h \ |
209 | lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \ | 227 | lj_emit_*.h lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c \ |
210 | lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \ | 228 | lib_aux.c lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c \ |
211 | lib_init.c | 229 | lib_io.c lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c \ |
230 | lib_ffi.c lib_init.c | ||
212 | luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h | 231 | luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h |
213 | host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \ | 232 | host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \ |
214 | lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \ | 233 | lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \ |
@@ -220,7 +239,8 @@ host/buildvm_asm.o: host/buildvm_asm.c host/buildvm.h lj_def.h lua.h luaconf.h \ | |||
220 | host/buildvm_fold.o: host/buildvm_fold.c host/buildvm.h lj_def.h lua.h \ | 239 | host/buildvm_fold.o: host/buildvm_fold.c host/buildvm.h lj_def.h lua.h \ |
221 | luaconf.h lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_ir.h lj_obj.h | 240 | luaconf.h lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_ir.h lj_obj.h |
222 | host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \ | 241 | host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \ |
223 | lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_lib.h lj_obj.h | 242 | lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_bc.h lj_lib.h lj_obj.h \ |
243 | host/buildvm_libbc.h | ||
224 | host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \ | 244 | host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \ |
225 | luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h | 245 | luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h |
226 | host/minilua.o: host/minilua.c | 246 | host/minilua.o: host/minilua.c |
diff --git a/src/host/buildvm.c b/src/host/buildvm.c index 05e0dbdb..27e14d57 100644 --- a/src/host/buildvm.c +++ b/src/host/buildvm.c | |||
@@ -59,10 +59,10 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type); | |||
59 | #include "../dynasm/dasm_x86.h" | 59 | #include "../dynasm/dasm_x86.h" |
60 | #elif LJ_TARGET_ARM | 60 | #elif LJ_TARGET_ARM |
61 | #include "../dynasm/dasm_arm.h" | 61 | #include "../dynasm/dasm_arm.h" |
62 | #elif LJ_TARGET_ARM64 | ||
63 | #include "../dynasm/dasm_arm64.h" | ||
62 | #elif LJ_TARGET_PPC | 64 | #elif LJ_TARGET_PPC |
63 | #include "../dynasm/dasm_ppc.h" | 65 | #include "../dynasm/dasm_ppc.h" |
64 | #elif LJ_TARGET_PPCSPE | ||
65 | #include "../dynasm/dasm_ppc.h" | ||
66 | #elif LJ_TARGET_MIPS | 66 | #elif LJ_TARGET_MIPS |
67 | #include "../dynasm/dasm_mips.h" | 67 | #include "../dynasm/dasm_mips.h" |
68 | #else | 68 | #else |
@@ -110,11 +110,11 @@ static const char *sym_decorate(BuildCtx *ctx, | |||
110 | if (p) { | 110 | if (p) { |
111 | #if LJ_TARGET_X86ORX64 | 111 | #if LJ_TARGET_X86ORX64 |
112 | if (!LJ_64 && (ctx->mode == BUILD_coffasm || ctx->mode == BUILD_peobj)) | 112 | if (!LJ_64 && (ctx->mode == BUILD_coffasm || ctx->mode == BUILD_peobj)) |
113 | name[0] = '@'; | 113 | name[0] = name[1] == 'R' ? '_' : '@'; /* Just for _RtlUnwind@16. */ |
114 | else | 114 | else |
115 | *p = '\0'; | 115 | *p = '\0'; |
116 | #elif (LJ_TARGET_PPC || LJ_TARGET_PPCSPE) && !LJ_TARGET_CONSOLE | 116 | #elif LJ_TARGET_PPC && !LJ_TARGET_CONSOLE |
117 | /* Keep @plt. */ | 117 | /* Keep @plt etc. */ |
118 | #else | 118 | #else |
119 | *p = '\0'; | 119 | *p = '\0'; |
120 | #endif | 120 | #endif |
@@ -179,6 +179,7 @@ static int build_code(BuildCtx *ctx) | |||
179 | ctx->nreloc = 0; | 179 | ctx->nreloc = 0; |
180 | 180 | ||
181 | ctx->globnames = globnames; | 181 | ctx->globnames = globnames; |
182 | ctx->extnames = extnames; | ||
182 | ctx->relocsym = (const char **)malloc(NRELOCSYM*sizeof(const char *)); | 183 | ctx->relocsym = (const char **)malloc(NRELOCSYM*sizeof(const char *)); |
183 | ctx->nrelocsym = 0; | 184 | ctx->nrelocsym = 0; |
184 | for (i = 0; i < (int)NRELOCSYM; i++) relocmap[i] = -1; | 185 | for (i = 0; i < (int)NRELOCSYM; i++) relocmap[i] = -1; |
@@ -320,20 +321,20 @@ static void emit_vmdef(BuildCtx *ctx) | |||
320 | char buf[80]; | 321 | char buf[80]; |
321 | int i; | 322 | int i; |
322 | fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n"); | 323 | fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n"); |
323 | fprintf(ctx->fp, "module(...)\n\n"); | 324 | fprintf(ctx->fp, "return {\n\n"); |
324 | 325 | ||
325 | fprintf(ctx->fp, "bcnames = \""); | 326 | fprintf(ctx->fp, "bcnames = \""); |
326 | for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]); | 327 | for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]); |
327 | fprintf(ctx->fp, "\"\n\n"); | 328 | fprintf(ctx->fp, "\",\n\n"); |
328 | 329 | ||
329 | fprintf(ctx->fp, "irnames = \""); | 330 | fprintf(ctx->fp, "irnames = \""); |
330 | for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]); | 331 | for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]); |
331 | fprintf(ctx->fp, "\"\n\n"); | 332 | fprintf(ctx->fp, "\",\n\n"); |
332 | 333 | ||
333 | fprintf(ctx->fp, "irfpm = { [0]="); | 334 | fprintf(ctx->fp, "irfpm = { [0]="); |
334 | for (i = 0; irfpm_names[i]; i++) | 335 | for (i = 0; irfpm_names[i]; i++) |
335 | fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i])); | 336 | fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i])); |
336 | fprintf(ctx->fp, "}\n\n"); | 337 | fprintf(ctx->fp, "},\n\n"); |
337 | 338 | ||
338 | fprintf(ctx->fp, "irfield = { [0]="); | 339 | fprintf(ctx->fp, "irfield = { [0]="); |
339 | for (i = 0; irfield_names[i]; i++) { | 340 | for (i = 0; irfield_names[i]; i++) { |
@@ -343,17 +344,17 @@ static void emit_vmdef(BuildCtx *ctx) | |||
343 | if (p) *p = '.'; | 344 | if (p) *p = '.'; |
344 | fprintf(ctx->fp, "\"%s\", ", buf); | 345 | fprintf(ctx->fp, "\"%s\", ", buf); |
345 | } | 346 | } |
346 | fprintf(ctx->fp, "}\n\n"); | 347 | fprintf(ctx->fp, "},\n\n"); |
347 | 348 | ||
348 | fprintf(ctx->fp, "ircall = {\n[0]="); | 349 | fprintf(ctx->fp, "ircall = {\n[0]="); |
349 | for (i = 0; ircall_names[i]; i++) | 350 | for (i = 0; ircall_names[i]; i++) |
350 | fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]); | 351 | fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]); |
351 | fprintf(ctx->fp, "}\n\n"); | 352 | fprintf(ctx->fp, "},\n\n"); |
352 | 353 | ||
353 | fprintf(ctx->fp, "traceerr = {\n[0]="); | 354 | fprintf(ctx->fp, "traceerr = {\n[0]="); |
354 | for (i = 0; trace_errors[i]; i++) | 355 | for (i = 0; trace_errors[i]; i++) |
355 | fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]); | 356 | fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]); |
356 | fprintf(ctx->fp, "}\n\n"); | 357 | fprintf(ctx->fp, "},\n\n"); |
357 | } | 358 | } |
358 | 359 | ||
359 | /* -- Argument parsing ---------------------------------------------------- */ | 360 | /* -- Argument parsing ---------------------------------------------------- */ |
@@ -490,6 +491,7 @@ int main(int argc, char **argv) | |||
490 | case BUILD_vmdef: | 491 | case BUILD_vmdef: |
491 | emit_vmdef(ctx); | 492 | emit_vmdef(ctx); |
492 | emit_lib(ctx); | 493 | emit_lib(ctx); |
494 | fprintf(ctx->fp, "}\n\n"); | ||
493 | break; | 495 | break; |
494 | case BUILD_ffdef: | 496 | case BUILD_ffdef: |
495 | case BUILD_libdef: | 497 | case BUILD_libdef: |
diff --git a/src/host/buildvm.h b/src/host/buildvm.h index a440cfc3..3fdff65b 100644 --- a/src/host/buildvm.h +++ b/src/host/buildvm.h | |||
@@ -82,6 +82,7 @@ typedef struct BuildCtx { | |||
82 | const char *beginsym; | 82 | const char *beginsym; |
83 | /* Strings generated by DynASM. */ | 83 | /* Strings generated by DynASM. */ |
84 | const char *const *globnames; | 84 | const char *const *globnames; |
85 | const char *const *extnames; | ||
85 | const char *dasm_ident; | 86 | const char *dasm_ident; |
86 | const char *dasm_arch; | 87 | const char *dasm_arch; |
87 | /* Relocations. */ | 88 | /* Relocations. */ |
diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c index 2cb7d451..b9cfa049 100644 --- a/src/host/buildvm_asm.c +++ b/src/host/buildvm_asm.c | |||
@@ -51,8 +51,8 @@ static const char *const jccnames[] = { | |||
51 | "js", "jns", "jpe", "jpo", "jl", "jge", "jle", "jg" | 51 | "js", "jns", "jpe", "jpo", "jl", "jge", "jle", "jg" |
52 | }; | 52 | }; |
53 | 53 | ||
54 | /* Emit relocation for the incredibly stupid OSX assembler. */ | 54 | /* Emit x86/x64 text relocations. */ |
55 | static void emit_asm_reloc_mach(BuildCtx *ctx, uint8_t *cp, int n, | 55 | static void emit_asm_reloc_text(BuildCtx *ctx, uint8_t *cp, int n, |
56 | const char *sym) | 56 | const char *sym) |
57 | { | 57 | { |
58 | const char *opname = NULL; | 58 | const char *opname = NULL; |
@@ -71,6 +71,20 @@ err: | |||
71 | exit(1); | 71 | exit(1); |
72 | } | 72 | } |
73 | emit_asm_bytes(ctx, cp, n); | 73 | emit_asm_bytes(ctx, cp, n); |
74 | if (strncmp(sym+(*sym == '_'), LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) { | ||
75 | /* Various fixups for external symbols outside of our binary. */ | ||
76 | if (ctx->mode == BUILD_elfasm) { | ||
77 | if (LJ_32) | ||
78 | fprintf(ctx->fp, "#if __PIC__\n\t%s lj_wrap_%s\n#else\n", opname, sym); | ||
79 | fprintf(ctx->fp, "\t%s %s@PLT\n", opname, sym); | ||
80 | if (LJ_32) | ||
81 | fprintf(ctx->fp, "#endif\n"); | ||
82 | return; | ||
83 | } else if (LJ_32 && ctx->mode == BUILD_machasm) { | ||
84 | fprintf(ctx->fp, "\t%s L%s$stub\n", opname, sym); | ||
85 | return; | ||
86 | } | ||
87 | } | ||
74 | fprintf(ctx->fp, "\t%s %s\n", opname, sym); | 88 | fprintf(ctx->fp, "\t%s %s\n", opname, sym); |
75 | } | 89 | } |
76 | #else | 90 | #else |
@@ -79,10 +93,14 @@ static void emit_asm_words(BuildCtx *ctx, uint8_t *p, int n) | |||
79 | { | 93 | { |
80 | int i; | 94 | int i; |
81 | for (i = 0; i < n; i += 4) { | 95 | for (i = 0; i < n; i += 4) { |
96 | uint32_t ins = *(uint32_t *)(p+i); | ||
97 | #if LJ_TARGET_ARM64 && LJ_BE | ||
98 | ins = lj_bswap(ins); /* ARM64 instructions are always little-endian. */ | ||
99 | #endif | ||
82 | if ((i & 15) == 0) | 100 | if ((i & 15) == 0) |
83 | fprintf(ctx->fp, "\t.long 0x%08x", *(uint32_t *)(p+i)); | 101 | fprintf(ctx->fp, "\t.long 0x%08x", ins); |
84 | else | 102 | else |
85 | fprintf(ctx->fp, ",0x%08x", *(uint32_t *)(p+i)); | 103 | fprintf(ctx->fp, ",0x%08x", ins); |
86 | if ((i & 15) == 12) putc('\n', ctx->fp); | 104 | if ((i & 15) == 12) putc('\n', ctx->fp); |
87 | } | 105 | } |
88 | if ((n & 15) != 0) putc('\n', ctx->fp); | 106 | if ((n & 15) != 0) putc('\n', ctx->fp); |
@@ -107,7 +125,16 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n, | |||
107 | ins, sym); | 125 | ins, sym); |
108 | exit(1); | 126 | exit(1); |
109 | } | 127 | } |
110 | #elif LJ_TARGET_PPC || LJ_TARGET_PPCSPE | 128 | #elif LJ_TARGET_ARM64 |
129 | if ((ins >> 26) == 0x25u) { | ||
130 | fprintf(ctx->fp, "\tbl %s\n", sym); | ||
131 | } else { | ||
132 | fprintf(stderr, | ||
133 | "Error: unsupported opcode %08x for %s symbol relocation.\n", | ||
134 | ins, sym); | ||
135 | exit(1); | ||
136 | } | ||
137 | #elif LJ_TARGET_PPC | ||
111 | #if LJ_TARGET_PS3 | 138 | #if LJ_TARGET_PS3 |
112 | #define TOCPREFIX "." | 139 | #define TOCPREFIX "." |
113 | #else | 140 | #else |
@@ -228,11 +255,20 @@ void emit_asm(BuildCtx *ctx) | |||
228 | 255 | ||
229 | #if LJ_TARGET_ARM && defined(__GNUC__) && !LJ_NO_UNWIND | 256 | #if LJ_TARGET_ARM && defined(__GNUC__) && !LJ_NO_UNWIND |
230 | /* This should really be moved into buildvm_arm.dasc. */ | 257 | /* This should really be moved into buildvm_arm.dasc. */ |
258 | #if LJ_ARCH_HASFPU | ||
259 | fprintf(ctx->fp, | ||
260 | ".fnstart\n" | ||
261 | ".save {r5, r6, r7, r8, r9, r10, r11, lr}\n" | ||
262 | ".vsave {d8-d15}\n" | ||
263 | ".save {r4}\n" | ||
264 | ".pad #28\n"); | ||
265 | #else | ||
231 | fprintf(ctx->fp, | 266 | fprintf(ctx->fp, |
232 | ".fnstart\n" | 267 | ".fnstart\n" |
233 | ".save {r4, r5, r6, r7, r8, r9, r10, r11, lr}\n" | 268 | ".save {r4, r5, r6, r7, r8, r9, r10, r11, lr}\n" |
234 | ".pad #28\n"); | 269 | ".pad #28\n"); |
235 | #endif | 270 | #endif |
271 | #endif | ||
236 | #if LJ_TARGET_MIPS | 272 | #if LJ_TARGET_MIPS |
237 | fprintf(ctx->fp, ".set nomips16\n.abicalls\n.set noreorder\n.set nomacro\n"); | 273 | fprintf(ctx->fp, ".set nomips16\n.abicalls\n.set noreorder\n.set nomacro\n"); |
238 | #endif | 274 | #endif |
@@ -255,8 +291,9 @@ void emit_asm(BuildCtx *ctx) | |||
255 | BuildReloc *r = &ctx->reloc[rel]; | 291 | BuildReloc *r = &ctx->reloc[rel]; |
256 | int n = r->ofs - ofs; | 292 | int n = r->ofs - ofs; |
257 | #if LJ_TARGET_X86ORX64 | 293 | #if LJ_TARGET_X86ORX64 |
258 | if (ctx->mode == BUILD_machasm && r->type != 0) { | 294 | if (r->type != 0 && |
259 | emit_asm_reloc_mach(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]); | 295 | (ctx->mode == BUILD_elfasm || ctx->mode == BUILD_machasm)) { |
296 | emit_asm_reloc_text(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]); | ||
260 | } else { | 297 | } else { |
261 | emit_asm_bytes(ctx, ctx->code+ofs, n); | 298 | emit_asm_bytes(ctx, ctx->code+ofs, n); |
262 | emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]); | 299 | emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]); |
@@ -290,10 +327,7 @@ void emit_asm(BuildCtx *ctx) | |||
290 | #if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA) | 327 | #if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA) |
291 | fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n"); | 328 | fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n"); |
292 | #endif | 329 | #endif |
293 | #if LJ_TARGET_PPCSPE | 330 | #if LJ_TARGET_PPC && !LJ_TARGET_PS3 && !LJ_ABI_SOFTFP |
294 | /* Soft-float ABI + SPE. */ | ||
295 | fprintf(ctx->fp, "\t.gnu_attribute 4, 2\n\t.gnu_attribute 8, 3\n"); | ||
296 | #elif LJ_TARGET_PPC && !LJ_TARGET_PS3 | ||
297 | /* Hard-float ABI. */ | 331 | /* Hard-float ABI. */ |
298 | fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n"); | 332 | fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n"); |
299 | #endif | 333 | #endif |
diff --git a/src/host/buildvm_lib.c b/src/host/buildvm_lib.c index 3c64626c..88014b23 100644 --- a/src/host/buildvm_lib.c +++ b/src/host/buildvm_lib.c | |||
@@ -5,7 +5,9 @@ | |||
5 | 5 | ||
6 | #include "buildvm.h" | 6 | #include "buildvm.h" |
7 | #include "lj_obj.h" | 7 | #include "lj_obj.h" |
8 | #include "lj_bc.h" | ||
8 | #include "lj_lib.h" | 9 | #include "lj_lib.h" |
10 | #include "buildvm_libbc.h" | ||
9 | 11 | ||
10 | /* Context for library definitions. */ | 12 | /* Context for library definitions. */ |
11 | static uint8_t obuf[8192]; | 13 | static uint8_t obuf[8192]; |
@@ -151,6 +153,62 @@ static void libdef_func(BuildCtx *ctx, char *p, int arg) | |||
151 | regfunc = REGFUNC_OK; | 153 | regfunc = REGFUNC_OK; |
152 | } | 154 | } |
153 | 155 | ||
156 | static uint8_t *libdef_uleb128(uint8_t *p, uint32_t *vv) | ||
157 | { | ||
158 | uint32_t v = *p++; | ||
159 | if (v >= 0x80) { | ||
160 | int sh = 0; v &= 0x7f; | ||
161 | do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80); | ||
162 | } | ||
163 | *vv = v; | ||
164 | return p; | ||
165 | } | ||
166 | |||
167 | static void libdef_fixupbc(uint8_t *p) | ||
168 | { | ||
169 | uint32_t i, sizebc; | ||
170 | p += 4; | ||
171 | p = libdef_uleb128(p, &sizebc); | ||
172 | p = libdef_uleb128(p, &sizebc); | ||
173 | p = libdef_uleb128(p, &sizebc); | ||
174 | for (i = 0; i < sizebc; i++, p += 4) { | ||
175 | uint8_t op = p[libbc_endian ? 3 : 0]; | ||
176 | uint8_t ra = p[libbc_endian ? 2 : 1]; | ||
177 | uint8_t rc = p[libbc_endian ? 1 : 2]; | ||
178 | uint8_t rb = p[libbc_endian ? 0 : 3]; | ||
179 | if (!LJ_DUALNUM && op == BC_ISTYPE && rc == ~LJ_TNUMX+1) { | ||
180 | op = BC_ISNUM; rc++; | ||
181 | } | ||
182 | p[LJ_ENDIAN_SELECT(0, 3)] = op; | ||
183 | p[LJ_ENDIAN_SELECT(1, 2)] = ra; | ||
184 | p[LJ_ENDIAN_SELECT(2, 1)] = rc; | ||
185 | p[LJ_ENDIAN_SELECT(3, 0)] = rb; | ||
186 | } | ||
187 | } | ||
188 | |||
189 | static void libdef_lua(BuildCtx *ctx, char *p, int arg) | ||
190 | { | ||
191 | UNUSED(arg); | ||
192 | if (ctx->mode == BUILD_libdef) { | ||
193 | int i; | ||
194 | for (i = 0; libbc_map[i].name != NULL; i++) { | ||
195 | if (!strcmp(libbc_map[i].name, p)) { | ||
196 | int ofs = libbc_map[i].ofs; | ||
197 | int len = libbc_map[i+1].ofs - ofs; | ||
198 | obuf[2]++; /* Bump hash table size. */ | ||
199 | *optr++ = LIBINIT_LUA; | ||
200 | libdef_name(p, 0); | ||
201 | memcpy(optr, libbc_code + ofs, len); | ||
202 | libdef_fixupbc(optr); | ||
203 | optr += len; | ||
204 | return; | ||
205 | } | ||
206 | } | ||
207 | fprintf(stderr, "Error: missing libbc definition for %s\n", p); | ||
208 | exit(1); | ||
209 | } | ||
210 | } | ||
211 | |||
154 | static uint32_t find_rec(char *name) | 212 | static uint32_t find_rec(char *name) |
155 | { | 213 | { |
156 | char *p = (char *)obuf; | 214 | char *p = (char *)obuf; |
@@ -277,6 +335,7 @@ static const LibDefHandler libdef_handlers[] = { | |||
277 | { "CF(", ")", libdef_func, LIBINIT_CF }, | 335 | { "CF(", ")", libdef_func, LIBINIT_CF }, |
278 | { "ASM(", ")", libdef_func, LIBINIT_ASM }, | 336 | { "ASM(", ")", libdef_func, LIBINIT_ASM }, |
279 | { "ASM_(", ")", libdef_func, LIBINIT_ASM_ }, | 337 | { "ASM_(", ")", libdef_func, LIBINIT_ASM_ }, |
338 | { "LUA(", ")", libdef_lua, 0 }, | ||
280 | { "REC(", ")", libdef_rec, 0 }, | 339 | { "REC(", ")", libdef_rec, 0 }, |
281 | { "PUSH(", ")", libdef_push, 0 }, | 340 | { "PUSH(", ")", libdef_push, 0 }, |
282 | { "SET(", ")", libdef_set, 0 }, | 341 | { "SET(", ")", libdef_set, 0 }, |
@@ -373,7 +432,7 @@ void emit_lib(BuildCtx *ctx) | |||
373 | "#ifndef FF_NUM_ASMFUNC\n#define FF_NUM_ASMFUNC %d\n#endif\n\n", | 432 | "#ifndef FF_NUM_ASMFUNC\n#define FF_NUM_ASMFUNC %d\n#endif\n\n", |
374 | ffasmfunc); | 433 | ffasmfunc); |
375 | } else if (ctx->mode == BUILD_vmdef) { | 434 | } else if (ctx->mode == BUILD_vmdef) { |
376 | fprintf(ctx->fp, "}\n\n"); | 435 | fprintf(ctx->fp, "},\n\n"); |
377 | } else if (ctx->mode == BUILD_bcdef) { | 436 | } else if (ctx->mode == BUILD_bcdef) { |
378 | int i; | 437 | int i; |
379 | fprintf(ctx->fp, "\n};\n\n"); | 438 | fprintf(ctx->fp, "\n};\n\n"); |
diff --git a/src/host/buildvm_libbc.h b/src/host/buildvm_libbc.h new file mode 100644 index 00000000..b2600bd5 --- /dev/null +++ b/src/host/buildvm_libbc.h | |||
@@ -0,0 +1,56 @@ | |||
1 | /* This is a generated file. DO NOT EDIT! */ | ||
2 | |||
3 | static const int libbc_endian = 0; | ||
4 | |||
5 | static const uint8_t libbc_code[] = { | ||
6 | #if LJ_FR2 | ||
7 | 0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0, | ||
8 | 0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3, | ||
9 | 16,0,5,0,21,1,0,0,76,1,2,0,0,2,10,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3, | ||
10 | 0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,8,5,0,59,9,5,0,66,6,3,2,10,6,0,0,88,7,1, | ||
11 | 128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,11,0,0,0,16,16,0,12,0,16,1,9,0,43,2, | ||
12 | 0,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,9,5,0,18,10,6,0,66,7,3,2,10,7, | ||
13 | 0,0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12, | ||
14 | 0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128, | ||
15 | 8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14, | ||
16 | 0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2, | ||
17 | 0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4, | ||
18 | 2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,35,16,0,12,0,16,1,14,0,16,2,14,0,16, | ||
19 | 3,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88,5,24,128,33,5,1,3, | ||
20 | 0,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128,18,6,1,0,18,7,2,0, | ||
21 | 41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,6,8,128, | ||
22 | 18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79, | ||
23 | 6,252,127,76,4,2,0,0 | ||
24 | #else | ||
25 | 0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0, | ||
26 | 0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3, | ||
27 | 16,0,5,0,21,1,0,0,76,1,2,0,0,2,9,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3, | ||
28 | 0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,7,5,0,59,8,5,0,66,6,3,2,10,6,0,0,88,7,1, | ||
29 | 128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,10,0,0,0,16,16,0,12,0,16,1,9,0,43,2, | ||
30 | 0,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,8,5,0,18,9,6,0,66,7,3,2,10,7,0, | ||
31 | 0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12, | ||
32 | 0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128, | ||
33 | 8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14, | ||
34 | 0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2, | ||
35 | 0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4, | ||
36 | 2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,35,16,0,12,0,16,1,14,0,16,2,14,0,16, | ||
37 | 3,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88,5,24,128,33,5,1,3, | ||
38 | 0,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128,18,6,1,0,18,7,2,0, | ||
39 | 41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,6,8,128, | ||
40 | 18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79, | ||
41 | 6,252,127,76,4,2,0,0 | ||
42 | #endif | ||
43 | }; | ||
44 | |||
45 | static const struct { const char *name; int ofs; } libbc_map[] = { | ||
46 | {"math_deg",0}, | ||
47 | {"math_rad",25}, | ||
48 | {"string_len",50}, | ||
49 | {"table_foreachi",69}, | ||
50 | {"table_foreach",136}, | ||
51 | {"table_getn",207}, | ||
52 | {"table_remove",226}, | ||
53 | {"table_move",355}, | ||
54 | {NULL,502} | ||
55 | }; | ||
56 | |||
diff --git a/src/host/buildvm_peobj.c b/src/host/buildvm_peobj.c index 876b0add..01f9dac4 100644 --- a/src/host/buildvm_peobj.c +++ b/src/host/buildvm_peobj.c | |||
@@ -9,7 +9,7 @@ | |||
9 | #include "buildvm.h" | 9 | #include "buildvm.h" |
10 | #include "lj_bc.h" | 10 | #include "lj_bc.h" |
11 | 11 | ||
12 | #if LJ_TARGET_X86ORX64 || LJ_TARGET_PPC | 12 | #if LJ_TARGET_X86ORX64 |
13 | 13 | ||
14 | /* Context for PE object emitter. */ | 14 | /* Context for PE object emitter. */ |
15 | static char *strtab; | 15 | static char *strtab; |
@@ -93,12 +93,6 @@ typedef struct PEsymaux { | |||
93 | #define PEOBJ_RELOC_ADDR32NB 0x03 | 93 | #define PEOBJ_RELOC_ADDR32NB 0x03 |
94 | #define PEOBJ_RELOC_OFS 0 | 94 | #define PEOBJ_RELOC_OFS 0 |
95 | #define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */ | 95 | #define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */ |
96 | #elif LJ_TARGET_PPC | ||
97 | #define PEOBJ_ARCH_TARGET 0x01f2 | ||
98 | #define PEOBJ_RELOC_REL32 0x06 | ||
99 | #define PEOBJ_RELOC_DIR32 0x02 | ||
100 | #define PEOBJ_RELOC_OFS (-4) | ||
101 | #define PEOBJ_TEXT_FLAGS 0x60400020 /* 60=r+x, 40=align8, 20=code. */ | ||
102 | #endif | 96 | #endif |
103 | 97 | ||
104 | /* Section numbers (0-based). */ | 98 | /* Section numbers (0-based). */ |
@@ -109,6 +103,8 @@ enum { | |||
109 | #if LJ_TARGET_X64 | 103 | #if LJ_TARGET_X64 |
110 | PEOBJ_SECT_PDATA, | 104 | PEOBJ_SECT_PDATA, |
111 | PEOBJ_SECT_XDATA, | 105 | PEOBJ_SECT_XDATA, |
106 | #elif LJ_TARGET_X86 | ||
107 | PEOBJ_SECT_SXDATA, | ||
112 | #endif | 108 | #endif |
113 | PEOBJ_SECT_RDATA_Z, | 109 | PEOBJ_SECT_RDATA_Z, |
114 | PEOBJ_NSECTIONS | 110 | PEOBJ_NSECTIONS |
@@ -208,6 +204,13 @@ void emit_peobj(BuildCtx *ctx) | |||
208 | sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE; | 204 | sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE; |
209 | /* Flags: 40 = read, 30 = align4, 40 = initialized data. */ | 205 | /* Flags: 40 = read, 30 = align4, 40 = initialized data. */ |
210 | pesect[PEOBJ_SECT_XDATA].flags = 0x40300040; | 206 | pesect[PEOBJ_SECT_XDATA].flags = 0x40300040; |
207 | #elif LJ_TARGET_X86 | ||
208 | memcpy(pesect[PEOBJ_SECT_SXDATA].name, ".sxdata", sizeof(".sxdata")-1); | ||
209 | pesect[PEOBJ_SECT_SXDATA].ofs = sofs; | ||
210 | sofs += (pesect[PEOBJ_SECT_SXDATA].size = 4); | ||
211 | pesect[PEOBJ_SECT_SXDATA].relocofs = sofs; | ||
212 | /* Flags: 40 = read, 30 = align4, 02 = lnk_info, 40 = initialized data. */ | ||
213 | pesect[PEOBJ_SECT_SXDATA].flags = 0x40300240; | ||
211 | #endif | 214 | #endif |
212 | 215 | ||
213 | memcpy(pesect[PEOBJ_SECT_RDATA_Z].name, ".rdata$Z", sizeof(".rdata$Z")-1); | 216 | memcpy(pesect[PEOBJ_SECT_RDATA_Z].name, ".rdata$Z", sizeof(".rdata$Z")-1); |
@@ -232,7 +235,7 @@ void emit_peobj(BuildCtx *ctx) | |||
232 | nrsym = ctx->nrelocsym; | 235 | nrsym = ctx->nrelocsym; |
233 | pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym; | 236 | pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym; |
234 | #if LJ_TARGET_X64 | 237 | #if LJ_TARGET_X64 |
235 | pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win64. */ | 238 | pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win. */ |
236 | #endif | 239 | #endif |
237 | 240 | ||
238 | /* Write PE object header and all sections. */ | 241 | /* Write PE object header and all sections. */ |
@@ -242,15 +245,8 @@ void emit_peobj(BuildCtx *ctx) | |||
242 | /* Write .text section. */ | 245 | /* Write .text section. */ |
243 | host_endian.u = 1; | 246 | host_endian.u = 1; |
244 | if (host_endian.b != LJ_ENDIAN_SELECT(1, 0)) { | 247 | if (host_endian.b != LJ_ENDIAN_SELECT(1, 0)) { |
245 | #if LJ_TARGET_PPC | ||
246 | uint32_t *p = (uint32_t *)ctx->code; | ||
247 | int n = (int)(ctx->codesz >> 2); | ||
248 | for (i = 0; i < n; i++, p++) | ||
249 | *p = lj_bswap(*p); /* Byteswap .text section. */ | ||
250 | #else | ||
251 | fprintf(stderr, "Error: different byte order for host and target\n"); | 248 | fprintf(stderr, "Error: different byte order for host and target\n"); |
252 | exit(1); | 249 | exit(1); |
253 | #endif | ||
254 | } | 250 | } |
255 | owrite(ctx, ctx->code, ctx->codesz); | 251 | owrite(ctx, ctx->code, ctx->codesz); |
256 | for (i = 0; i < ctx->nreloc; i++) { | 252 | for (i = 0; i < ctx->nreloc; i++) { |
@@ -312,6 +308,19 @@ void emit_peobj(BuildCtx *ctx) | |||
312 | reloc.type = PEOBJ_RELOC_ADDR32NB; | 308 | reloc.type = PEOBJ_RELOC_ADDR32NB; |
313 | owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); | 309 | owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); |
314 | } | 310 | } |
311 | #elif LJ_TARGET_X86 | ||
312 | /* Write .sxdata section. */ | ||
313 | for (i = 0; i < nrsym; i++) { | ||
314 | if (!strcmp(ctx->relocsym[i], "_lj_err_unwind_win")) { | ||
315 | uint32_t symidx = 1+2+i; | ||
316 | owrite(ctx, &symidx, 4); | ||
317 | break; | ||
318 | } | ||
319 | } | ||
320 | if (i == nrsym) { | ||
321 | fprintf(stderr, "Error: extern lj_err_unwind_win not used\n"); | ||
322 | exit(1); | ||
323 | } | ||
315 | #endif | 324 | #endif |
316 | 325 | ||
317 | /* Write .rdata$Z section. */ | 326 | /* Write .rdata$Z section. */ |
@@ -333,8 +342,10 @@ void emit_peobj(BuildCtx *ctx) | |||
333 | #if LJ_TARGET_X64 | 342 | #if LJ_TARGET_X64 |
334 | emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA); | 343 | emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA); |
335 | emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA); | 344 | emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA); |
336 | emit_peobj_sym(ctx, "lj_err_unwind_win64", 0, | 345 | emit_peobj_sym(ctx, "lj_err_unwind_win", 0, |
337 | PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN); | 346 | PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN); |
347 | #elif LJ_TARGET_X86 | ||
348 | emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_SXDATA); | ||
338 | #endif | 349 | #endif |
339 | 350 | ||
340 | emit_peobj_sym(ctx, ctx->beginsym, 0, | 351 | emit_peobj_sym(ctx, ctx->beginsym, 0, |
diff --git a/src/host/genlibbc.lua b/src/host/genlibbc.lua new file mode 100644 index 00000000..56899546 --- /dev/null +++ b/src/host/genlibbc.lua | |||
@@ -0,0 +1,197 @@ | |||
1 | ---------------------------------------------------------------------------- | ||
2 | -- Lua script to dump the bytecode of the library functions written in Lua. | ||
3 | -- The resulting 'buildvm_libbc.h' is used for the build process of LuaJIT. | ||
4 | ---------------------------------------------------------------------------- | ||
5 | -- Copyright (C) 2005-2020 Mike Pall. All rights reserved. | ||
6 | -- Released under the MIT license. See Copyright Notice in luajit.h | ||
7 | ---------------------------------------------------------------------------- | ||
8 | |||
9 | local ffi = require("ffi") | ||
10 | local bit = require("bit") | ||
11 | local vmdef = require("jit.vmdef") | ||
12 | local bcnames = vmdef.bcnames | ||
13 | |||
14 | local format = string.format | ||
15 | |||
16 | local isbe = (string.byte(string.dump(function() end), 5) % 2 == 1) | ||
17 | |||
18 | local function usage(arg) | ||
19 | io.stderr:write("Usage: ", arg and arg[0] or "genlibbc", | ||
20 | " [-o buildvm_libbc.h] lib_*.c\n") | ||
21 | os.exit(1) | ||
22 | end | ||
23 | |||
24 | local function parse_arg(arg) | ||
25 | local outfile = "-" | ||
26 | if not (arg and arg[1]) then | ||
27 | usage(arg) | ||
28 | end | ||
29 | if arg[1] == "-o" then | ||
30 | outfile = arg[2] | ||
31 | if not outfile then usage(arg) end | ||
32 | table.remove(arg, 1) | ||
33 | table.remove(arg, 1) | ||
34 | end | ||
35 | return outfile | ||
36 | end | ||
37 | |||
38 | local function read_files(names) | ||
39 | local src = "" | ||
40 | for _,name in ipairs(names) do | ||
41 | local fp = assert(io.open(name)) | ||
42 | src = src .. fp:read("*a") | ||
43 | fp:close() | ||
44 | end | ||
45 | return src | ||
46 | end | ||
47 | |||
48 | local function transform_lua(code) | ||
49 | local fixup = {} | ||
50 | local n = -30000 | ||
51 | code = string.gsub(code, "CHECK_(%w*)%((.-)%)", function(tp, var) | ||
52 | n = n + 1 | ||
53 | fixup[n] = { "CHECK", tp } | ||
54 | return format("%s=%d", var, n) | ||
55 | end) | ||
56 | code = string.gsub(code, "PAIRS%((.-)%)", function(var) | ||
57 | fixup.PAIRS = true | ||
58 | return format("nil, %s, 0", var) | ||
59 | end) | ||
60 | return "return "..code, fixup | ||
61 | end | ||
62 | |||
63 | local function read_uleb128(p) | ||
64 | local v = p[0]; p = p + 1 | ||
65 | if v >= 128 then | ||
66 | local sh = 7; v = v - 128 | ||
67 | repeat | ||
68 | local r = p[0] | ||
69 | v = v + bit.lshift(bit.band(r, 127), sh) | ||
70 | sh = sh + 7 | ||
71 | p = p + 1 | ||
72 | until r < 128 | ||
73 | end | ||
74 | return p, v | ||
75 | end | ||
76 | |||
77 | -- ORDER LJ_T | ||
78 | local name2itype = { | ||
79 | str = 5, func = 9, tab = 12, int = 14, num = 15 | ||
80 | } | ||
81 | |||
82 | local BC = {} | ||
83 | for i=0,#bcnames/6-1 do | ||
84 | BC[string.gsub(string.sub(bcnames, i*6+1, i*6+6), " ", "")] = i | ||
85 | end | ||
86 | local xop, xra = isbe and 3 or 0, isbe and 2 or 1 | ||
87 | local xrc, xrb = isbe and 1 or 2, isbe and 0 or 3 | ||
88 | |||
89 | local function fixup_dump(dump, fixup) | ||
90 | local buf = ffi.new("uint8_t[?]", #dump+1, dump) | ||
91 | local p = buf+5 | ||
92 | local n, sizebc | ||
93 | p, n = read_uleb128(p) | ||
94 | local start = p | ||
95 | p = p + 4 | ||
96 | p = read_uleb128(p) | ||
97 | p = read_uleb128(p) | ||
98 | p, sizebc = read_uleb128(p) | ||
99 | local rawtab = {} | ||
100 | for i=0,sizebc-1 do | ||
101 | local op = p[xop] | ||
102 | if op == BC.KSHORT then | ||
103 | local rd = p[xrc] + 256*p[xrb] | ||
104 | rd = bit.arshift(bit.lshift(rd, 16), 16) | ||
105 | local f = fixup[rd] | ||
106 | if f then | ||
107 | if f[1] == "CHECK" then | ||
108 | local tp = f[2] | ||
109 | if tp == "tab" then rawtab[p[xra]] = true end | ||
110 | p[xop] = tp == "num" and BC.ISNUM or BC.ISTYPE | ||
111 | p[xrb] = 0 | ||
112 | p[xrc] = name2itype[tp] | ||
113 | else | ||
114 | error("unhandled fixup type: "..f[1]) | ||
115 | end | ||
116 | end | ||
117 | elseif op == BC.TGETV then | ||
118 | if rawtab[p[xrb]] then | ||
119 | p[xop] = BC.TGETR | ||
120 | end | ||
121 | elseif op == BC.TSETV then | ||
122 | if rawtab[p[xrb]] then | ||
123 | p[xop] = BC.TSETR | ||
124 | end | ||
125 | elseif op == BC.ITERC then | ||
126 | if fixup.PAIRS then | ||
127 | p[xop] = BC.ITERN | ||
128 | end | ||
129 | end | ||
130 | p = p + 4 | ||
131 | end | ||
132 | return ffi.string(start, n) | ||
133 | end | ||
134 | |||
135 | local function find_defs(src) | ||
136 | local defs = {} | ||
137 | for name, code in string.gmatch(src, "LJLIB_LUA%(([^)]*)%)%s*/%*(.-)%*/") do | ||
138 | local env = {} | ||
139 | local tcode, fixup = transform_lua(code) | ||
140 | local func = assert(load(tcode, "", nil, env))() | ||
141 | defs[name] = fixup_dump(string.dump(func, true), fixup) | ||
142 | defs[#defs+1] = name | ||
143 | end | ||
144 | return defs | ||
145 | end | ||
146 | |||
147 | local function gen_header(defs) | ||
148 | local t = {} | ||
149 | local function w(x) t[#t+1] = x end | ||
150 | w("/* This is a generated file. DO NOT EDIT! */\n\n") | ||
151 | w("static const int libbc_endian = ") w(isbe and 1 or 0) w(";\n\n") | ||
152 | local s = "" | ||
153 | for _,name in ipairs(defs) do | ||
154 | s = s .. defs[name] | ||
155 | end | ||
156 | w("static const uint8_t libbc_code[] = {\n") | ||
157 | local n = 0 | ||
158 | for i=1,#s do | ||
159 | local x = string.byte(s, i) | ||
160 | w(x); w(",") | ||
161 | n = n + (x < 10 and 2 or (x < 100 and 3 or 4)) | ||
162 | if n >= 75 then n = 0; w("\n") end | ||
163 | end | ||
164 | w("0\n};\n\n") | ||
165 | w("static const struct { const char *name; int ofs; } libbc_map[] = {\n") | ||
166 | local m = 0 | ||
167 | for _,name in ipairs(defs) do | ||
168 | w('{"'); w(name); w('",'); w(m) w('},\n') | ||
169 | m = m + #defs[name] | ||
170 | end | ||
171 | w("{NULL,"); w(m); w("}\n};\n\n") | ||
172 | return table.concat(t) | ||
173 | end | ||
174 | |||
175 | local function write_file(name, data) | ||
176 | if name == "-" then | ||
177 | assert(io.write(data)) | ||
178 | assert(io.flush()) | ||
179 | else | ||
180 | local fp = io.open(name) | ||
181 | if fp then | ||
182 | local old = fp:read("*a") | ||
183 | fp:close() | ||
184 | if data == old then return end | ||
185 | end | ||
186 | fp = assert(io.open(name, "w")) | ||
187 | assert(fp:write(data)) | ||
188 | assert(fp:close()) | ||
189 | end | ||
190 | end | ||
191 | |||
192 | local outfile = parse_arg(arg) | ||
193 | local src = read_files(arg) | ||
194 | local defs = find_defs(src) | ||
195 | local hdr = gen_header(defs) | ||
196 | write_file(outfile, hdr) | ||
197 | |||
diff --git a/src/jit/bc.lua b/src/jit/bc.lua index 7ca4c61c..45ba40e2 100644 --- a/src/jit/bc.lua +++ b/src/jit/bc.lua | |||
@@ -41,7 +41,7 @@ | |||
41 | 41 | ||
42 | -- Cache some library functions and objects. | 42 | -- Cache some library functions and objects. |
43 | local jit = require("jit") | 43 | local jit = require("jit") |
44 | assert(jit.version_num == 20005, "LuaJIT core/library version mismatch") | 44 | assert(jit.version_num == 20100, "LuaJIT core/library version mismatch") |
45 | local jutil = require("jit.util") | 45 | local jutil = require("jit.util") |
46 | local vmdef = require("jit.vmdef") | 46 | local vmdef = require("jit.vmdef") |
47 | local bit = require("bit") | 47 | local bit = require("bit") |
@@ -179,13 +179,12 @@ local function bcliston(outfile) | |||
179 | end | 179 | end |
180 | 180 | ||
181 | -- Public module functions. | 181 | -- Public module functions. |
182 | module(...) | 182 | return { |
183 | 183 | line = bcline, | |
184 | line = bcline | 184 | dump = bcdump, |
185 | dump = bcdump | 185 | targets = bctargets, |
186 | targets = bctargets | 186 | on = bcliston, |
187 | 187 | off = bclistoff, | |
188 | on = bcliston | 188 | start = bcliston -- For -j command line option. |
189 | off = bclistoff | 189 | } |
190 | start = bcliston -- For -j command line option. | ||
191 | 190 | ||
diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua index 58351c16..42d7240b 100644 --- a/src/jit/bcsave.lua +++ b/src/jit/bcsave.lua | |||
@@ -11,12 +11,16 @@ | |||
11 | ------------------------------------------------------------------------------ | 11 | ------------------------------------------------------------------------------ |
12 | 12 | ||
13 | local jit = require("jit") | 13 | local jit = require("jit") |
14 | assert(jit.version_num == 20005, "LuaJIT core/library version mismatch") | 14 | assert(jit.version_num == 20100, "LuaJIT core/library version mismatch") |
15 | local bit = require("bit") | 15 | local bit = require("bit") |
16 | 16 | ||
17 | -- Symbol name prefix for LuaJIT bytecode. | 17 | -- Symbol name prefix for LuaJIT bytecode. |
18 | local LJBC_PREFIX = "luaJIT_BC_" | 18 | local LJBC_PREFIX = "luaJIT_BC_" |
19 | 19 | ||
20 | local type, assert = type, assert | ||
21 | local format = string.format | ||
22 | local tremove, tconcat = table.remove, table.concat | ||
23 | |||
20 | ------------------------------------------------------------------------------ | 24 | ------------------------------------------------------------------------------ |
21 | 25 | ||
22 | local function usage() | 26 | local function usage() |
@@ -63,8 +67,18 @@ local map_type = { | |||
63 | } | 67 | } |
64 | 68 | ||
65 | local map_arch = { | 69 | local map_arch = { |
66 | x86 = true, x64 = true, arm = true, ppc = true, ppcspe = true, | 70 | x86 = { e = "le", b = 32, m = 3, p = 0x14c, }, |
67 | mips = true, mipsel = true, | 71 | x64 = { e = "le", b = 64, m = 62, p = 0x8664, }, |
72 | arm = { e = "le", b = 32, m = 40, p = 0x1c0, }, | ||
73 | arm64 = { e = "le", b = 64, m = 183, p = 0xaa64, }, | ||
74 | arm64be = { e = "be", b = 64, m = 183, }, | ||
75 | ppc = { e = "be", b = 32, m = 20, }, | ||
76 | mips = { e = "be", b = 32, m = 8, f = 0x50001006, }, | ||
77 | mipsel = { e = "le", b = 32, m = 8, f = 0x50001006, }, | ||
78 | mips64 = { e = "be", b = 64, m = 8, f = 0x80000007, }, | ||
79 | mips64el = { e = "le", b = 64, m = 8, f = 0x80000007, }, | ||
80 | mips64r6 = { e = "be", b = 64, m = 8, f = 0xa0000407, }, | ||
81 | mips64r6el = { e = "le", b = 64, m = 8, f = 0xa0000407, }, | ||
68 | } | 82 | } |
69 | 83 | ||
70 | local map_os = { | 84 | local map_os = { |
@@ -73,33 +87,33 @@ local map_os = { | |||
73 | } | 87 | } |
74 | 88 | ||
75 | local function checkarg(str, map, err) | 89 | local function checkarg(str, map, err) |
76 | str = string.lower(str) | 90 | str = str:lower() |
77 | local s = check(map[str], "unknown ", err) | 91 | local s = check(map[str], "unknown ", err) |
78 | return s == true and str or s | 92 | return type(s) == "string" and s or str |
79 | end | 93 | end |
80 | 94 | ||
81 | local function detecttype(str) | 95 | local function detecttype(str) |
82 | local ext = string.match(string.lower(str), "%.(%a+)$") | 96 | local ext = str:lower():match("%.(%a+)$") |
83 | return map_type[ext] or "raw" | 97 | return map_type[ext] or "raw" |
84 | end | 98 | end |
85 | 99 | ||
86 | local function checkmodname(str) | 100 | local function checkmodname(str) |
87 | check(string.match(str, "^[%w_.%-]+$"), "bad module name") | 101 | check(str:match("^[%w_.%-]+$"), "bad module name") |
88 | return string.gsub(str, "[%.%-]", "_") | 102 | return str:gsub("[%.%-]", "_") |
89 | end | 103 | end |
90 | 104 | ||
91 | local function detectmodname(str) | 105 | local function detectmodname(str) |
92 | if type(str) == "string" then | 106 | if type(str) == "string" then |
93 | local tail = string.match(str, "[^/\\]+$") | 107 | local tail = str:match("[^/\\]+$") |
94 | if tail then str = tail end | 108 | if tail then str = tail end |
95 | local head = string.match(str, "^(.*)%.[^.]*$") | 109 | local head = str:match("^(.*)%.[^.]*$") |
96 | if head then str = head end | 110 | if head then str = head end |
97 | str = string.match(str, "^[%w_.%-]+") | 111 | str = str:match("^[%w_.%-]+") |
98 | else | 112 | else |
99 | str = nil | 113 | str = nil |
100 | end | 114 | end |
101 | check(str, "cannot derive module name, use -n name") | 115 | check(str, "cannot derive module name, use -n name") |
102 | return string.gsub(str, "[%.%-]", "_") | 116 | return str:gsub("[%.%-]", "_") |
103 | end | 117 | end |
104 | 118 | ||
105 | ------------------------------------------------------------------------------ | 119 | ------------------------------------------------------------------------------ |
@@ -118,19 +132,19 @@ end | |||
118 | local function bcsave_c(ctx, output, s) | 132 | local function bcsave_c(ctx, output, s) |
119 | local fp = savefile(output, "w") | 133 | local fp = savefile(output, "w") |
120 | if ctx.type == "c" then | 134 | if ctx.type == "c" then |
121 | fp:write(string.format([[ | 135 | fp:write(format([[ |
122 | #ifdef __cplusplus | 136 | #ifdef __cplusplus |
123 | extern "C" | 137 | extern "C" |
124 | #endif | 138 | #endif |
125 | #ifdef _WIN32 | 139 | #ifdef _WIN32 |
126 | __declspec(dllexport) | 140 | __declspec(dllexport) |
127 | #endif | 141 | #endif |
128 | const char %s%s[] = { | 142 | const unsigned char %s%s[] = { |
129 | ]], LJBC_PREFIX, ctx.modname)) | 143 | ]], LJBC_PREFIX, ctx.modname)) |
130 | else | 144 | else |
131 | fp:write(string.format([[ | 145 | fp:write(format([[ |
132 | #define %s%s_SIZE %d | 146 | #define %s%s_SIZE %d |
133 | static const char %s%s[] = { | 147 | static const unsigned char %s%s[] = { |
134 | ]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname)) | 148 | ]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname)) |
135 | end | 149 | end |
136 | local t, n, m = {}, 0, 0 | 150 | local t, n, m = {}, 0, 0 |
@@ -138,13 +152,13 @@ static const char %s%s[] = { | |||
138 | local b = tostring(string.byte(s, i)) | 152 | local b = tostring(string.byte(s, i)) |
139 | m = m + #b + 1 | 153 | m = m + #b + 1 |
140 | if m > 78 then | 154 | if m > 78 then |
141 | fp:write(table.concat(t, ",", 1, n), ",\n") | 155 | fp:write(tconcat(t, ",", 1, n), ",\n") |
142 | n, m = 0, #b + 1 | 156 | n, m = 0, #b + 1 |
143 | end | 157 | end |
144 | n = n + 1 | 158 | n = n + 1 |
145 | t[n] = b | 159 | t[n] = b |
146 | end | 160 | end |
147 | bcsave_tail(fp, output, table.concat(t, ",", 1, n).."\n};\n") | 161 | bcsave_tail(fp, output, tconcat(t, ",", 1, n).."\n};\n") |
148 | end | 162 | end |
149 | 163 | ||
150 | local function bcsave_elfobj(ctx, output, s, ffi) | 164 | local function bcsave_elfobj(ctx, output, s, ffi) |
@@ -199,12 +213,8 @@ typedef struct { | |||
199 | } ELF64obj; | 213 | } ELF64obj; |
200 | ]] | 214 | ]] |
201 | local symname = LJBC_PREFIX..ctx.modname | 215 | local symname = LJBC_PREFIX..ctx.modname |
202 | local is64, isbe = false, false | 216 | local ai = assert(map_arch[ctx.arch]) |
203 | if ctx.arch == "x64" then | 217 | local is64, isbe = ai.b == 64, ai.e == "be" |
204 | is64 = true | ||
205 | elseif ctx.arch == "ppc" or ctx.arch == "ppcspe" or ctx.arch == "mips" then | ||
206 | isbe = true | ||
207 | end | ||
208 | 218 | ||
209 | -- Handle different host/target endianess. | 219 | -- Handle different host/target endianess. |
210 | local function f32(x) return x end | 220 | local function f32(x) return x end |
@@ -237,10 +247,8 @@ typedef struct { | |||
237 | hdr.eendian = isbe and 2 or 1 | 247 | hdr.eendian = isbe and 2 or 1 |
238 | hdr.eversion = 1 | 248 | hdr.eversion = 1 |
239 | hdr.type = f16(1) | 249 | hdr.type = f16(1) |
240 | hdr.machine = f16(({ x86=3, x64=62, arm=40, ppc=20, ppcspe=20, mips=8, mipsel=8 })[ctx.arch]) | 250 | hdr.machine = f16(ai.m) |
241 | if ctx.arch == "mips" or ctx.arch == "mipsel" then | 251 | hdr.flags = f32(ai.f or 0) |
242 | hdr.flags = f32(0x50001006) | ||
243 | end | ||
244 | hdr.version = f32(1) | 252 | hdr.version = f32(1) |
245 | hdr.shofs = fofs(ffi.offsetof(o, "sect")) | 253 | hdr.shofs = fofs(ffi.offsetof(o, "sect")) |
246 | hdr.ehsize = f16(ffi.sizeof(hdr)) | 254 | hdr.ehsize = f16(ffi.sizeof(hdr)) |
@@ -336,12 +344,8 @@ typedef struct { | |||
336 | } PEobj; | 344 | } PEobj; |
337 | ]] | 345 | ]] |
338 | local symname = LJBC_PREFIX..ctx.modname | 346 | local symname = LJBC_PREFIX..ctx.modname |
339 | local is64 = false | 347 | local ai = assert(map_arch[ctx.arch]) |
340 | if ctx.arch == "x86" then | 348 | local is64 = ai.b == 64 |
341 | symname = "_"..symname | ||
342 | elseif ctx.arch == "x64" then | ||
343 | is64 = true | ||
344 | end | ||
345 | local symexport = " /EXPORT:"..symname..",DATA " | 349 | local symexport = " /EXPORT:"..symname..",DATA " |
346 | 350 | ||
347 | -- The file format is always little-endian. Swap if the host is big-endian. | 351 | -- The file format is always little-endian. Swap if the host is big-endian. |
@@ -355,7 +359,7 @@ typedef struct { | |||
355 | -- Create PE object and fill in header. | 359 | -- Create PE object and fill in header. |
356 | local o = ffi.new("PEobj") | 360 | local o = ffi.new("PEobj") |
357 | local hdr = o.hdr | 361 | local hdr = o.hdr |
358 | hdr.arch = f16(({ x86=0x14c, x64=0x8664, arm=0x1c0, ppc=0x1f2, mips=0x366, mipsel=0x366 })[ctx.arch]) | 362 | hdr.arch = f16(assert(ai.p)) |
359 | hdr.nsects = f16(2) | 363 | hdr.nsects = f16(2) |
360 | hdr.symtabofs = f32(ffi.offsetof(o, "sym0")) | 364 | hdr.symtabofs = f32(ffi.offsetof(o, "sym0")) |
361 | hdr.nsyms = f32(6) | 365 | hdr.nsyms = f32(6) |
@@ -477,13 +481,13 @@ typedef struct { | |||
477 | } mach_obj_64; | 481 | } mach_obj_64; |
478 | typedef struct { | 482 | typedef struct { |
479 | mach_fat_header fat; | 483 | mach_fat_header fat; |
480 | mach_fat_arch fat_arch[4]; | 484 | mach_fat_arch fat_arch[2]; |
481 | struct { | 485 | struct { |
482 | mach_header hdr; | 486 | mach_header hdr; |
483 | mach_segment_command seg; | 487 | mach_segment_command seg; |
484 | mach_section sec; | 488 | mach_section sec; |
485 | mach_symtab_command sym; | 489 | mach_symtab_command sym; |
486 | } arch[4]; | 490 | } arch[2]; |
487 | mach_nlist sym_entry; | 491 | mach_nlist sym_entry; |
488 | uint8_t space[4096]; | 492 | uint8_t space[4096]; |
489 | } mach_fat_obj; | 493 | } mach_fat_obj; |
@@ -494,6 +498,8 @@ typedef struct { | |||
494 | is64, align, mobj = true, 8, "mach_obj_64" | 498 | is64, align, mobj = true, 8, "mach_obj_64" |
495 | elseif ctx.arch == "arm" then | 499 | elseif ctx.arch == "arm" then |
496 | isfat, mobj = true, "mach_fat_obj" | 500 | isfat, mobj = true, "mach_fat_obj" |
501 | elseif ctx.arch == "arm64" then | ||
502 | is64, align, isfat, mobj = true, 8, true, "mach_fat_obj" | ||
497 | else | 503 | else |
498 | check(ctx.arch == "x86", "unsupported architecture for OSX") | 504 | check(ctx.arch == "x86", "unsupported architecture for OSX") |
499 | end | 505 | end |
@@ -503,8 +509,8 @@ typedef struct { | |||
503 | -- Create Mach-O object and fill in header. | 509 | -- Create Mach-O object and fill in header. |
504 | local o = ffi.new(mobj) | 510 | local o = ffi.new(mobj) |
505 | local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, align) | 511 | local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, align) |
506 | local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12,12,12} })[ctx.arch] | 512 | local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12}, arm64={0x01000007,0x0100000c} })[ctx.arch] |
507 | local cpusubtype = ({ x86={3}, x64={3}, arm={3,6,9,11} })[ctx.arch] | 513 | local cpusubtype = ({ x86={3}, x64={3}, arm={3,9}, arm64={3,0} })[ctx.arch] |
508 | if isfat then | 514 | if isfat then |
509 | o.fat.magic = be32(0xcafebabe) | 515 | o.fat.magic = be32(0xcafebabe) |
510 | o.fat.nfat_arch = be32(#cpusubtype) | 516 | o.fat.nfat_arch = be32(#cpusubtype) |
@@ -603,16 +609,16 @@ local function docmd(...) | |||
603 | local n = 1 | 609 | local n = 1 |
604 | local list = false | 610 | local list = false |
605 | local ctx = { | 611 | local ctx = { |
606 | strip = true, arch = jit.arch, os = string.lower(jit.os), | 612 | strip = true, arch = jit.arch, os = jit.os:lower(), |
607 | type = false, modname = false, | 613 | type = false, modname = false, |
608 | } | 614 | } |
609 | while n <= #arg do | 615 | while n <= #arg do |
610 | local a = arg[n] | 616 | local a = arg[n] |
611 | if type(a) == "string" and string.sub(a, 1, 1) == "-" and a ~= "-" then | 617 | if type(a) == "string" and a:sub(1, 1) == "-" and a ~= "-" then |
612 | table.remove(arg, n) | 618 | tremove(arg, n) |
613 | if a == "--" then break end | 619 | if a == "--" then break end |
614 | for m=2,#a do | 620 | for m=2,#a do |
615 | local opt = string.sub(a, m, m) | 621 | local opt = a:sub(m, m) |
616 | if opt == "l" then | 622 | if opt == "l" then |
617 | list = true | 623 | list = true |
618 | elseif opt == "s" then | 624 | elseif opt == "s" then |
@@ -625,13 +631,13 @@ local function docmd(...) | |||
625 | if n ~= 1 then usage() end | 631 | if n ~= 1 then usage() end |
626 | arg[1] = check(loadstring(arg[1])) | 632 | arg[1] = check(loadstring(arg[1])) |
627 | elseif opt == "n" then | 633 | elseif opt == "n" then |
628 | ctx.modname = checkmodname(table.remove(arg, n)) | 634 | ctx.modname = checkmodname(tremove(arg, n)) |
629 | elseif opt == "t" then | 635 | elseif opt == "t" then |
630 | ctx.type = checkarg(table.remove(arg, n), map_type, "file type") | 636 | ctx.type = checkarg(tremove(arg, n), map_type, "file type") |
631 | elseif opt == "a" then | 637 | elseif opt == "a" then |
632 | ctx.arch = checkarg(table.remove(arg, n), map_arch, "architecture") | 638 | ctx.arch = checkarg(tremove(arg, n), map_arch, "architecture") |
633 | elseif opt == "o" then | 639 | elseif opt == "o" then |
634 | ctx.os = checkarg(table.remove(arg, n), map_os, "OS name") | 640 | ctx.os = checkarg(tremove(arg, n), map_os, "OS name") |
635 | else | 641 | else |
636 | usage() | 642 | usage() |
637 | end | 643 | end |
@@ -653,7 +659,7 @@ end | |||
653 | ------------------------------------------------------------------------------ | 659 | ------------------------------------------------------------------------------ |
654 | 660 | ||
655 | -- Public module functions. | 661 | -- Public module functions. |
656 | module(...) | 662 | return { |
657 | 663 | start = docmd -- Process -b command line option. | |
658 | start = docmd -- Process -b command line option. | 664 | } |
659 | 665 | ||
diff --git a/src/jit/dis_arm.lua b/src/jit/dis_arm.lua index 152d91bb..cafd2f74 100644 --- a/src/jit/dis_arm.lua +++ b/src/jit/dis_arm.lua | |||
@@ -658,7 +658,7 @@ local function disass_block(ctx, ofs, len) | |||
658 | end | 658 | end |
659 | 659 | ||
660 | -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). | 660 | -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). |
661 | local function create_(code, addr, out) | 661 | local function create(code, addr, out) |
662 | local ctx = {} | 662 | local ctx = {} |
663 | ctx.code = code | 663 | ctx.code = code |
664 | ctx.addr = addr or 0 | 664 | ctx.addr = addr or 0 |
@@ -670,20 +670,20 @@ local function create_(code, addr, out) | |||
670 | end | 670 | end |
671 | 671 | ||
672 | -- Simple API: disassemble code (a string) at address and output via out. | 672 | -- Simple API: disassemble code (a string) at address and output via out. |
673 | local function disass_(code, addr, out) | 673 | local function disass(code, addr, out) |
674 | create_(code, addr, out):disass() | 674 | create(code, addr, out):disass() |
675 | end | 675 | end |
676 | 676 | ||
677 | -- Return register name for RID. | 677 | -- Return register name for RID. |
678 | local function regname_(r) | 678 | local function regname(r) |
679 | if r < 16 then return map_gpr[r] end | 679 | if r < 16 then return map_gpr[r] end |
680 | return "d"..(r-16) | 680 | return "d"..(r-16) |
681 | end | 681 | end |
682 | 682 | ||
683 | -- Public module functions. | 683 | -- Public module functions. |
684 | module(...) | 684 | return { |
685 | 685 | create = create, | |
686 | create = create_ | 686 | disass = disass, |
687 | disass = disass_ | 687 | regname = regname |
688 | regname = regname_ | 688 | } |
689 | 689 | ||
diff --git a/src/jit/dis_arm64.lua b/src/jit/dis_arm64.lua new file mode 100644 index 00000000..d1596ebc --- /dev/null +++ b/src/jit/dis_arm64.lua | |||
@@ -0,0 +1,1216 @@ | |||
1 | ---------------------------------------------------------------------------- | ||
2 | -- LuaJIT ARM64 disassembler module. | ||
3 | -- | ||
4 | -- Copyright (C) 2005-2020 Mike Pall. All rights reserved. | ||
5 | -- Released under the MIT license. See Copyright Notice in luajit.h | ||
6 | -- | ||
7 | -- Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. | ||
8 | -- Sponsored by Cisco Systems, Inc. | ||
9 | ---------------------------------------------------------------------------- | ||
10 | -- This is a helper module used by the LuaJIT machine code dumper module. | ||
11 | -- | ||
12 | -- It disassembles most user-mode AArch64 instructions. | ||
13 | -- NYI: Advanced SIMD and VFP instructions. | ||
14 | ------------------------------------------------------------------------------ | ||
15 | |||
16 | local type = type | ||
17 | local sub, byte, format = string.sub, string.byte, string.format | ||
18 | local match, gmatch, gsub = string.match, string.gmatch, string.gsub | ||
19 | local concat = table.concat | ||
20 | local bit = require("bit") | ||
21 | local band, bor, bxor, tohex = bit.band, bit.bor, bit.bxor, bit.tohex | ||
22 | local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift | ||
23 | local ror = bit.ror | ||
24 | |||
25 | ------------------------------------------------------------------------------ | ||
26 | -- Opcode maps | ||
27 | ------------------------------------------------------------------------------ | ||
28 | |||
29 | local map_adr = { -- PC-relative addressing. | ||
30 | shift = 31, mask = 1, | ||
31 | [0] = "adrDBx", "adrpDBx" | ||
32 | } | ||
33 | |||
34 | local map_addsubi = { -- Add/subtract immediate. | ||
35 | shift = 29, mask = 3, | ||
36 | [0] = "add|movDNIg", "adds|cmnD0NIg", "subDNIg", "subs|cmpD0NIg", | ||
37 | } | ||
38 | |||
39 | local map_logi = { -- Logical immediate. | ||
40 | shift = 31, mask = 1, | ||
41 | [0] = { | ||
42 | shift = 22, mask = 1, | ||
43 | [0] = { | ||
44 | shift = 29, mask = 3, | ||
45 | [0] = "andDNig", "orr|movDN0ig", "eorDNig", "ands|tstD0Nig" | ||
46 | }, | ||
47 | false -- unallocated | ||
48 | }, | ||
49 | { | ||
50 | shift = 29, mask = 3, | ||
51 | [0] = "andDNig", "orr|movDN0ig", "eorDNig", "ands|tstD0Nig" | ||
52 | } | ||
53 | } | ||
54 | |||
55 | local map_movwi = { -- Move wide immediate. | ||
56 | shift = 31, mask = 1, | ||
57 | [0] = { | ||
58 | shift = 22, mask = 1, | ||
59 | [0] = { | ||
60 | shift = 29, mask = 3, | ||
61 | [0] = "movnDWRg", false, "movz|movDYRg", "movkDWRg" | ||
62 | }, false -- unallocated | ||
63 | }, | ||
64 | { | ||
65 | shift = 29, mask = 3, | ||
66 | [0] = "movnDWRg", false, "movz|movDYRg", "movkDWRg" | ||
67 | }, | ||
68 | } | ||
69 | |||
70 | local map_bitf = { -- Bitfield. | ||
71 | shift = 31, mask = 1, | ||
72 | [0] = { | ||
73 | shift = 22, mask = 1, | ||
74 | [0] = { | ||
75 | shift = 29, mask = 3, | ||
76 | [0] = "sbfm|sbfiz|sbfx|asr|sxtw|sxth|sxtbDN12w", | ||
77 | "bfm|bfi|bfxilDN13w", | ||
78 | "ubfm|ubfiz|ubfx|lsr|lsl|uxth|uxtbDN12w" | ||
79 | } | ||
80 | }, | ||
81 | { | ||
82 | shift = 22, mask = 1, | ||
83 | { | ||
84 | shift = 29, mask = 3, | ||
85 | [0] = "sbfm|sbfiz|sbfx|asr|sxtw|sxth|sxtbDN12x", | ||
86 | "bfm|bfi|bfxilDN13x", | ||
87 | "ubfm|ubfiz|ubfx|lsr|lsl|uxth|uxtbDN12x" | ||
88 | } | ||
89 | } | ||
90 | } | ||
91 | |||
92 | local map_datai = { -- Data processing - immediate. | ||
93 | shift = 23, mask = 7, | ||
94 | [0] = map_adr, map_adr, map_addsubi, false, | ||
95 | map_logi, map_movwi, map_bitf, | ||
96 | { | ||
97 | shift = 15, mask = 0x1c0c1, | ||
98 | [0] = "extr|rorDNM4w", [0x10080] = "extr|rorDNM4x", | ||
99 | [0x10081] = "extr|rorDNM4x" | ||
100 | } | ||
101 | } | ||
102 | |||
103 | local map_logsr = { -- Logical, shifted register. | ||
104 | shift = 31, mask = 1, | ||
105 | [0] = { | ||
106 | shift = 15, mask = 1, | ||
107 | [0] = { | ||
108 | shift = 29, mask = 3, | ||
109 | [0] = { | ||
110 | shift = 21, mask = 7, | ||
111 | [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg", | ||
112 | "andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg" | ||
113 | }, | ||
114 | { | ||
115 | shift = 21, mask = 7, | ||
116 | [0] ="orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg", | ||
117 | "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg" | ||
118 | }, | ||
119 | { | ||
120 | shift = 21, mask = 7, | ||
121 | [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg", | ||
122 | "eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg" | ||
123 | }, | ||
124 | { | ||
125 | shift = 21, mask = 7, | ||
126 | [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg", | ||
127 | "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg" | ||
128 | } | ||
129 | }, | ||
130 | false -- unallocated | ||
131 | }, | ||
132 | { | ||
133 | shift = 29, mask = 3, | ||
134 | [0] = { | ||
135 | shift = 21, mask = 7, | ||
136 | [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg", | ||
137 | "andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg" | ||
138 | }, | ||
139 | { | ||
140 | shift = 21, mask = 7, | ||
141 | [0] = "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg", | ||
142 | "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg" | ||
143 | }, | ||
144 | { | ||
145 | shift = 21, mask = 7, | ||
146 | [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg", | ||
147 | "eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg" | ||
148 | }, | ||
149 | { | ||
150 | shift = 21, mask = 7, | ||
151 | [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg", | ||
152 | "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg" | ||
153 | } | ||
154 | } | ||
155 | } | ||
156 | |||
157 | local map_assh = { | ||
158 | shift = 31, mask = 1, | ||
159 | [0] = { | ||
160 | shift = 15, mask = 1, | ||
161 | [0] = { | ||
162 | shift = 29, mask = 3, | ||
163 | [0] = { | ||
164 | shift = 22, mask = 3, | ||
165 | [0] = "addDNMSg", "addDNMSg", "addDNMSg", "addDNMg" | ||
166 | }, | ||
167 | { | ||
168 | shift = 22, mask = 3, | ||
169 | [0] = "adds|cmnD0NMSg", "adds|cmnD0NMSg", | ||
170 | "adds|cmnD0NMSg", "adds|cmnD0NMg" | ||
171 | }, | ||
172 | { | ||
173 | shift = 22, mask = 3, | ||
174 | [0] = "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0Mg" | ||
175 | }, | ||
176 | { | ||
177 | shift = 22, mask = 3, | ||
178 | [0] = "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0MzSg", | ||
179 | "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0Mzg" | ||
180 | }, | ||
181 | }, | ||
182 | false -- unallocated | ||
183 | }, | ||
184 | { | ||
185 | shift = 29, mask = 3, | ||
186 | [0] = { | ||
187 | shift = 22, mask = 3, | ||
188 | [0] = "addDNMSg", "addDNMSg", "addDNMSg", "addDNMg" | ||
189 | }, | ||
190 | { | ||
191 | shift = 22, mask = 3, | ||
192 | [0] = "adds|cmnD0NMSg", "adds|cmnD0NMSg", "adds|cmnD0NMSg", | ||
193 | "adds|cmnD0NMg" | ||
194 | }, | ||
195 | { | ||
196 | shift = 22, mask = 3, | ||
197 | [0] = "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0Mg" | ||
198 | }, | ||
199 | { | ||
200 | shift = 22, mask = 3, | ||
201 | [0] = "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0MzSg", | ||
202 | "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0Mzg" | ||
203 | } | ||
204 | } | ||
205 | } | ||
206 | |||
207 | local map_addsubsh = { -- Add/subtract, shifted register. | ||
208 | shift = 22, mask = 3, | ||
209 | [0] = map_assh, map_assh, map_assh | ||
210 | } | ||
211 | |||
212 | local map_addsubex = { -- Add/subtract, extended register. | ||
213 | shift = 22, mask = 3, | ||
214 | [0] = { | ||
215 | shift = 29, mask = 3, | ||
216 | [0] = "addDNMXg", "adds|cmnD0NMXg", "subDNMXg", "subs|cmpD0NMzXg", | ||
217 | } | ||
218 | } | ||
219 | |||
220 | local map_addsubc = { -- Add/subtract, with carry. | ||
221 | shift = 10, mask = 63, | ||
222 | [0] = { | ||
223 | shift = 29, mask = 3, | ||
224 | [0] = "adcDNMg", "adcsDNMg", "sbc|ngcDN0Mg", "sbcs|ngcsDN0Mg", | ||
225 | } | ||
226 | } | ||
227 | |||
228 | local map_ccomp = { | ||
229 | shift = 4, mask = 1, | ||
230 | [0] = { | ||
231 | shift = 10, mask = 3, | ||
232 | [0] = { -- Conditional compare register. | ||
233 | shift = 29, mask = 3, | ||
234 | "ccmnNMVCg", false, "ccmpNMVCg", | ||
235 | }, | ||
236 | [2] = { -- Conditional compare immediate. | ||
237 | shift = 29, mask = 3, | ||
238 | "ccmnN5VCg", false, "ccmpN5VCg", | ||
239 | } | ||
240 | } | ||
241 | } | ||
242 | |||
243 | local map_csel = { -- Conditional select. | ||
244 | shift = 11, mask = 1, | ||
245 | [0] = { | ||
246 | shift = 10, mask = 1, | ||
247 | [0] = { | ||
248 | shift = 29, mask = 3, | ||
249 | [0] = "cselDNMzCg", false, "csinv|cinv|csetmDNMcg", false, | ||
250 | }, | ||
251 | { | ||
252 | shift = 29, mask = 3, | ||
253 | [0] = "csinc|cinc|csetDNMcg", false, "csneg|cnegDNMcg", false, | ||
254 | } | ||
255 | } | ||
256 | } | ||
257 | |||
258 | local map_data1s = { -- Data processing, 1 source. | ||
259 | shift = 29, mask = 1, | ||
260 | [0] = { | ||
261 | shift = 31, mask = 1, | ||
262 | [0] = { | ||
263 | shift = 10, mask = 0x7ff, | ||
264 | [0] = "rbitDNg", "rev16DNg", "revDNw", false, "clzDNg", "clsDNg" | ||
265 | }, | ||
266 | { | ||
267 | shift = 10, mask = 0x7ff, | ||
268 | [0] = "rbitDNg", "rev16DNg", "rev32DNx", "revDNx", "clzDNg", "clsDNg" | ||
269 | } | ||
270 | } | ||
271 | } | ||
272 | |||
273 | local map_data2s = { -- Data processing, 2 sources. | ||
274 | shift = 29, mask = 1, | ||
275 | [0] = { | ||
276 | shift = 10, mask = 63, | ||
277 | false, "udivDNMg", "sdivDNMg", false, false, false, false, "lslDNMg", | ||
278 | "lsrDNMg", "asrDNMg", "rorDNMg" | ||
279 | } | ||
280 | } | ||
281 | |||
282 | local map_data3s = { -- Data processing, 3 sources. | ||
283 | shift = 29, mask = 7, | ||
284 | [0] = { | ||
285 | shift = 21, mask = 7, | ||
286 | [0] = { | ||
287 | shift = 15, mask = 1, | ||
288 | [0] = "madd|mulDNMA0g", "msub|mnegDNMA0g" | ||
289 | } | ||
290 | }, false, false, false, | ||
291 | { | ||
292 | shift = 15, mask = 1, | ||
293 | [0] = { | ||
294 | shift = 21, mask = 7, | ||
295 | [0] = "madd|mulDNMA0g", "smaddl|smullDxNMwA0x", "smulhDNMx", false, | ||
296 | false, "umaddl|umullDxNMwA0x", "umulhDNMx" | ||
297 | }, | ||
298 | { | ||
299 | shift = 21, mask = 7, | ||
300 | [0] = "msub|mnegDNMA0g", "smsubl|smneglDxNMwA0x", false, false, | ||
301 | false, "umsubl|umneglDxNMwA0x" | ||
302 | } | ||
303 | } | ||
304 | } | ||
305 | |||
306 | local map_datar = { -- Data processing, register. | ||
307 | shift = 28, mask = 1, | ||
308 | [0] = { | ||
309 | shift = 24, mask = 1, | ||
310 | [0] = map_logsr, | ||
311 | { | ||
312 | shift = 21, mask = 1, | ||
313 | [0] = map_addsubsh, map_addsubex | ||
314 | } | ||
315 | }, | ||
316 | { | ||
317 | shift = 21, mask = 15, | ||
318 | [0] = map_addsubc, false, map_ccomp, false, map_csel, false, | ||
319 | { | ||
320 | shift = 30, mask = 1, | ||
321 | [0] = map_data2s, map_data1s | ||
322 | }, | ||
323 | false, map_data3s, map_data3s, map_data3s, map_data3s, map_data3s, | ||
324 | map_data3s, map_data3s, map_data3s | ||
325 | } | ||
326 | } | ||
327 | |||
328 | local map_lrl = { -- Load register, literal. | ||
329 | shift = 26, mask = 1, | ||
330 | [0] = { | ||
331 | shift = 30, mask = 3, | ||
332 | [0] = "ldrDwB", "ldrDxB", "ldrswDxB" | ||
333 | }, | ||
334 | { | ||
335 | shift = 30, mask = 3, | ||
336 | [0] = "ldrDsB", "ldrDdB" | ||
337 | } | ||
338 | } | ||
339 | |||
340 | local map_lsriind = { -- Load/store register, immediate pre/post-indexed. | ||
341 | shift = 30, mask = 3, | ||
342 | [0] = { | ||
343 | shift = 26, mask = 1, | ||
344 | [0] = { | ||
345 | shift = 22, mask = 3, | ||
346 | [0] = "strbDwzL", "ldrbDwzL", "ldrsbDxzL", "ldrsbDwzL" | ||
347 | } | ||
348 | }, | ||
349 | { | ||
350 | shift = 26, mask = 1, | ||
351 | [0] = { | ||
352 | shift = 22, mask = 3, | ||
353 | [0] = "strhDwzL", "ldrhDwzL", "ldrshDxzL", "ldrshDwzL" | ||
354 | } | ||
355 | }, | ||
356 | { | ||
357 | shift = 26, mask = 1, | ||
358 | [0] = { | ||
359 | shift = 22, mask = 3, | ||
360 | [0] = "strDwzL", "ldrDwzL", "ldrswDxzL" | ||
361 | }, | ||
362 | { | ||
363 | shift = 22, mask = 3, | ||
364 | [0] = "strDszL", "ldrDszL" | ||
365 | } | ||
366 | }, | ||
367 | { | ||
368 | shift = 26, mask = 1, | ||
369 | [0] = { | ||
370 | shift = 22, mask = 3, | ||
371 | [0] = "strDxzL", "ldrDxzL" | ||
372 | }, | ||
373 | { | ||
374 | shift = 22, mask = 3, | ||
375 | [0] = "strDdzL", "ldrDdzL" | ||
376 | } | ||
377 | } | ||
378 | } | ||
379 | |||
380 | local map_lsriro = { | ||
381 | shift = 21, mask = 1, | ||
382 | [0] = { -- Load/store register immediate. | ||
383 | shift = 10, mask = 3, | ||
384 | [0] = { -- Unscaled immediate. | ||
385 | shift = 26, mask = 1, | ||
386 | [0] = { | ||
387 | shift = 30, mask = 3, | ||
388 | [0] = { | ||
389 | shift = 22, mask = 3, | ||
390 | [0] = "sturbDwK", "ldurbDwK" | ||
391 | }, | ||
392 | { | ||
393 | shift = 22, mask = 3, | ||
394 | [0] = "sturhDwK", "ldurhDwK" | ||
395 | }, | ||
396 | { | ||
397 | shift = 22, mask = 3, | ||
398 | [0] = "sturDwK", "ldurDwK" | ||
399 | }, | ||
400 | { | ||
401 | shift = 22, mask = 3, | ||
402 | [0] = "sturDxK", "ldurDxK" | ||
403 | } | ||
404 | } | ||
405 | }, map_lsriind, false, map_lsriind | ||
406 | }, | ||
407 | { -- Load/store register, register offset. | ||
408 | shift = 10, mask = 3, | ||
409 | [2] = { | ||
410 | shift = 26, mask = 1, | ||
411 | [0] = { | ||
412 | shift = 30, mask = 3, | ||
413 | [0] = { | ||
414 | shift = 22, mask = 3, | ||
415 | [0] = "strbDwO", "ldrbDwO", "ldrsbDxO", "ldrsbDwO" | ||
416 | }, | ||
417 | { | ||
418 | shift = 22, mask = 3, | ||
419 | [0] = "strhDwO", "ldrhDwO", "ldrshDxO", "ldrshDwO" | ||
420 | }, | ||
421 | { | ||
422 | shift = 22, mask = 3, | ||
423 | [0] = "strDwO", "ldrDwO", "ldrswDxO" | ||
424 | }, | ||
425 | { | ||
426 | shift = 22, mask = 3, | ||
427 | [0] = "strDxO", "ldrDxO" | ||
428 | } | ||
429 | }, | ||
430 | { | ||
431 | shift = 30, mask = 3, | ||
432 | [2] = { | ||
433 | shift = 22, mask = 3, | ||
434 | [0] = "strDsO", "ldrDsO" | ||
435 | }, | ||
436 | [3] = { | ||
437 | shift = 22, mask = 3, | ||
438 | [0] = "strDdO", "ldrDdO" | ||
439 | } | ||
440 | } | ||
441 | } | ||
442 | } | ||
443 | } | ||
444 | |||
445 | local map_lsp = { -- Load/store register pair, offset. | ||
446 | shift = 22, mask = 1, | ||
447 | [0] = { | ||
448 | shift = 30, mask = 3, | ||
449 | [0] = { | ||
450 | shift = 26, mask = 1, | ||
451 | [0] = "stpDzAzwP", "stpDzAzsP", | ||
452 | }, | ||
453 | { | ||
454 | shift = 26, mask = 1, | ||
455 | "stpDzAzdP" | ||
456 | }, | ||
457 | { | ||
458 | shift = 26, mask = 1, | ||
459 | [0] = "stpDzAzxP" | ||
460 | } | ||
461 | }, | ||
462 | { | ||
463 | shift = 30, mask = 3, | ||
464 | [0] = { | ||
465 | shift = 26, mask = 1, | ||
466 | [0] = "ldpDzAzwP", "ldpDzAzsP", | ||
467 | }, | ||
468 | { | ||
469 | shift = 26, mask = 1, | ||
470 | [0] = "ldpswDAxP", "ldpDzAzdP" | ||
471 | }, | ||
472 | { | ||
473 | shift = 26, mask = 1, | ||
474 | [0] = "ldpDzAzxP" | ||
475 | } | ||
476 | } | ||
477 | } | ||
478 | |||
479 | local map_ls = { -- Loads and stores. | ||
480 | shift = 24, mask = 0x31, | ||
481 | [0x10] = map_lrl, [0x30] = map_lsriro, | ||
482 | [0x20] = { | ||
483 | shift = 23, mask = 3, | ||
484 | map_lsp, map_lsp, map_lsp | ||
485 | }, | ||
486 | [0x21] = { | ||
487 | shift = 23, mask = 3, | ||
488 | map_lsp, map_lsp, map_lsp | ||
489 | }, | ||
490 | [0x31] = { | ||
491 | shift = 26, mask = 1, | ||
492 | [0] = { | ||
493 | shift = 30, mask = 3, | ||
494 | [0] = { | ||
495 | shift = 22, mask = 3, | ||
496 | [0] = "strbDwzU", "ldrbDwzU" | ||
497 | }, | ||
498 | { | ||
499 | shift = 22, mask = 3, | ||
500 | [0] = "strhDwzU", "ldrhDwzU" | ||
501 | }, | ||
502 | { | ||
503 | shift = 22, mask = 3, | ||
504 | [0] = "strDwzU", "ldrDwzU" | ||
505 | }, | ||
506 | { | ||
507 | shift = 22, mask = 3, | ||
508 | [0] = "strDxzU", "ldrDxzU" | ||
509 | } | ||
510 | }, | ||
511 | { | ||
512 | shift = 30, mask = 3, | ||
513 | [2] = { | ||
514 | shift = 22, mask = 3, | ||
515 | [0] = "strDszU", "ldrDszU" | ||
516 | }, | ||
517 | [3] = { | ||
518 | shift = 22, mask = 3, | ||
519 | [0] = "strDdzU", "ldrDdzU" | ||
520 | } | ||
521 | } | ||
522 | }, | ||
523 | } | ||
524 | |||
525 | local map_datafp = { -- Data processing, SIMD and FP. | ||
526 | shift = 28, mask = 7, | ||
527 | { -- 001 | ||
528 | shift = 24, mask = 1, | ||
529 | [0] = { | ||
530 | shift = 21, mask = 1, | ||
531 | { | ||
532 | shift = 10, mask = 3, | ||
533 | [0] = { | ||
534 | shift = 12, mask = 1, | ||
535 | [0] = { | ||
536 | shift = 13, mask = 1, | ||
537 | [0] = { | ||
538 | shift = 14, mask = 1, | ||
539 | [0] = { | ||
540 | shift = 15, mask = 1, | ||
541 | [0] = { -- FP/int conversion. | ||
542 | shift = 31, mask = 1, | ||
543 | [0] = { | ||
544 | shift = 16, mask = 0xff, | ||
545 | [0x20] = "fcvtnsDwNs", [0x21] = "fcvtnuDwNs", | ||
546 | [0x22] = "scvtfDsNw", [0x23] = "ucvtfDsNw", | ||
547 | [0x24] = "fcvtasDwNs", [0x25] = "fcvtauDwNs", | ||
548 | [0x26] = "fmovDwNs", [0x27] = "fmovDsNw", | ||
549 | [0x28] = "fcvtpsDwNs", [0x29] = "fcvtpuDwNs", | ||
550 | [0x30] = "fcvtmsDwNs", [0x31] = "fcvtmuDwNs", | ||
551 | [0x38] = "fcvtzsDwNs", [0x39] = "fcvtzuDwNs", | ||
552 | [0x60] = "fcvtnsDwNd", [0x61] = "fcvtnuDwNd", | ||
553 | [0x62] = "scvtfDdNw", [0x63] = "ucvtfDdNw", | ||
554 | [0x64] = "fcvtasDwNd", [0x65] = "fcvtauDwNd", | ||
555 | [0x68] = "fcvtpsDwNd", [0x69] = "fcvtpuDwNd", | ||
556 | [0x70] = "fcvtmsDwNd", [0x71] = "fcvtmuDwNd", | ||
557 | [0x78] = "fcvtzsDwNd", [0x79] = "fcvtzuDwNd" | ||
558 | }, | ||
559 | { | ||
560 | shift = 16, mask = 0xff, | ||
561 | [0x20] = "fcvtnsDxNs", [0x21] = "fcvtnuDxNs", | ||
562 | [0x22] = "scvtfDsNx", [0x23] = "ucvtfDsNx", | ||
563 | [0x24] = "fcvtasDxNs", [0x25] = "fcvtauDxNs", | ||
564 | [0x28] = "fcvtpsDxNs", [0x29] = "fcvtpuDxNs", | ||
565 | [0x30] = "fcvtmsDxNs", [0x31] = "fcvtmuDxNs", | ||
566 | [0x38] = "fcvtzsDxNs", [0x39] = "fcvtzuDxNs", | ||
567 | [0x60] = "fcvtnsDxNd", [0x61] = "fcvtnuDxNd", | ||
568 | [0x62] = "scvtfDdNx", [0x63] = "ucvtfDdNx", | ||
569 | [0x64] = "fcvtasDxNd", [0x65] = "fcvtauDxNd", | ||
570 | [0x66] = "fmovDxNd", [0x67] = "fmovDdNx", | ||
571 | [0x68] = "fcvtpsDxNd", [0x69] = "fcvtpuDxNd", | ||
572 | [0x70] = "fcvtmsDxNd", [0x71] = "fcvtmuDxNd", | ||
573 | [0x78] = "fcvtzsDxNd", [0x79] = "fcvtzuDxNd" | ||
574 | } | ||
575 | } | ||
576 | }, | ||
577 | { -- FP data-processing, 1 source. | ||
578 | shift = 31, mask = 1, | ||
579 | [0] = { | ||
580 | shift = 22, mask = 3, | ||
581 | [0] = { | ||
582 | shift = 15, mask = 63, | ||
583 | [0] = "fmovDNf", "fabsDNf", "fnegDNf", | ||
584 | "fsqrtDNf", false, "fcvtDdNs", false, false, | ||
585 | "frintnDNf", "frintpDNf", "frintmDNf", "frintzDNf", | ||
586 | "frintaDNf", false, "frintxDNf", "frintiDNf", | ||
587 | }, | ||
588 | { | ||
589 | shift = 15, mask = 63, | ||
590 | [0] = "fmovDNf", "fabsDNf", "fnegDNf", | ||
591 | "fsqrtDNf", "fcvtDsNd", false, false, false, | ||
592 | "frintnDNf", "frintpDNf", "frintmDNf", "frintzDNf", | ||
593 | "frintaDNf", false, "frintxDNf", "frintiDNf", | ||
594 | } | ||
595 | } | ||
596 | } | ||
597 | }, | ||
598 | { -- FP compare. | ||
599 | shift = 31, mask = 1, | ||
600 | [0] = { | ||
601 | shift = 14, mask = 3, | ||
602 | [0] = { | ||
603 | shift = 23, mask = 1, | ||
604 | [0] = { | ||
605 | shift = 0, mask = 31, | ||
606 | [0] = "fcmpNMf", [8] = "fcmpNZf", | ||
607 | [16] = "fcmpeNMf", [24] = "fcmpeNZf", | ||
608 | } | ||
609 | } | ||
610 | } | ||
611 | } | ||
612 | }, | ||
613 | { -- FP immediate. | ||
614 | shift = 31, mask = 1, | ||
615 | [0] = { | ||
616 | shift = 5, mask = 31, | ||
617 | [0] = { | ||
618 | shift = 23, mask = 1, | ||
619 | [0] = "fmovDFf" | ||
620 | } | ||
621 | } | ||
622 | } | ||
623 | }, | ||
624 | { -- FP conditional compare. | ||
625 | shift = 31, mask = 1, | ||
626 | [0] = { | ||
627 | shift = 23, mask = 1, | ||
628 | [0] = { | ||
629 | shift = 4, mask = 1, | ||
630 | [0] = "fccmpNMVCf", "fccmpeNMVCf" | ||
631 | } | ||
632 | } | ||
633 | }, | ||
634 | { -- FP data-processing, 2 sources. | ||
635 | shift = 31, mask = 1, | ||
636 | [0] = { | ||
637 | shift = 23, mask = 1, | ||
638 | [0] = { | ||
639 | shift = 12, mask = 15, | ||
640 | [0] = "fmulDNMf", "fdivDNMf", "faddDNMf", "fsubDNMf", | ||
641 | "fmaxDNMf", "fminDNMf", "fmaxnmDNMf", "fminnmDNMf", | ||
642 | "fnmulDNMf" | ||
643 | } | ||
644 | } | ||
645 | }, | ||
646 | { -- FP conditional select. | ||
647 | shift = 31, mask = 1, | ||
648 | [0] = { | ||
649 | shift = 23, mask = 1, | ||
650 | [0] = "fcselDNMCf" | ||
651 | } | ||
652 | } | ||
653 | } | ||
654 | }, | ||
655 | { -- FP data-processing, 3 sources. | ||
656 | shift = 31, mask = 1, | ||
657 | [0] = { | ||
658 | shift = 15, mask = 1, | ||
659 | [0] = { | ||
660 | shift = 21, mask = 5, | ||
661 | [0] = "fmaddDNMAf", "fnmaddDNMAf" | ||
662 | }, | ||
663 | { | ||
664 | shift = 21, mask = 5, | ||
665 | [0] = "fmsubDNMAf", "fnmsubDNMAf" | ||
666 | } | ||
667 | } | ||
668 | } | ||
669 | } | ||
670 | } | ||
671 | |||
672 | local map_br = { -- Branches, exception generating and system instructions. | ||
673 | shift = 29, mask = 7, | ||
674 | [0] = "bB", | ||
675 | { -- Compare & branch, immediate. | ||
676 | shift = 24, mask = 3, | ||
677 | [0] = "cbzDBg", "cbnzDBg", "tbzDTBw", "tbnzDTBw" | ||
678 | }, | ||
679 | { -- Conditional branch, immediate. | ||
680 | shift = 24, mask = 3, | ||
681 | [0] = { | ||
682 | shift = 4, mask = 1, | ||
683 | [0] = { | ||
684 | shift = 0, mask = 15, | ||
685 | [0] = "beqB", "bneB", "bhsB", "bloB", "bmiB", "bplB", "bvsB", "bvcB", | ||
686 | "bhiB", "blsB", "bgeB", "bltB", "bgtB", "bleB", "balB" | ||
687 | } | ||
688 | } | ||
689 | }, false, "blB", | ||
690 | { -- Compare & branch, immediate. | ||
691 | shift = 24, mask = 3, | ||
692 | [0] = "cbzDBg", "cbnzDBg", "tbzDTBx", "tbnzDTBx" | ||
693 | }, | ||
694 | { | ||
695 | shift = 24, mask = 3, | ||
696 | [0] = { -- Exception generation. | ||
697 | shift = 0, mask = 0xe0001f, | ||
698 | [0x200000] = "brkW" | ||
699 | }, | ||
700 | { -- System instructions. | ||
701 | shift = 0, mask = 0x3fffff, | ||
702 | [0x03201f] = "nop" | ||
703 | }, | ||
704 | { -- Unconditional branch, register. | ||
705 | shift = 0, mask = 0xfffc1f, | ||
706 | [0x1f0000] = "brNx", [0x3f0000] = "blrNx", | ||
707 | [0x5f0000] = "retNx" | ||
708 | }, | ||
709 | } | ||
710 | } | ||
711 | |||
712 | local map_init = { | ||
713 | shift = 25, mask = 15, | ||
714 | [0] = false, false, false, false, map_ls, map_datar, map_ls, map_datafp, | ||
715 | map_datai, map_datai, map_br, map_br, map_ls, map_datar, map_ls, map_datafp | ||
716 | } | ||
717 | |||
718 | ------------------------------------------------------------------------------ | ||
719 | |||
720 | local map_regs = { x = {}, w = {}, d = {}, s = {} } | ||
721 | |||
722 | for i=0,30 do | ||
723 | map_regs.x[i] = "x"..i | ||
724 | map_regs.w[i] = "w"..i | ||
725 | map_regs.d[i] = "d"..i | ||
726 | map_regs.s[i] = "s"..i | ||
727 | end | ||
728 | map_regs.x[31] = "sp" | ||
729 | map_regs.w[31] = "wsp" | ||
730 | map_regs.d[31] = "d31" | ||
731 | map_regs.s[31] = "s31" | ||
732 | |||
733 | local map_cond = { | ||
734 | [0] = "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", | ||
735 | "hi", "ls", "ge", "lt", "gt", "le", "al", | ||
736 | } | ||
737 | |||
738 | local map_shift = { [0] = "lsl", "lsr", "asr", } | ||
739 | |||
740 | local map_extend = { | ||
741 | [0] = "uxtb", "uxth", "uxtw", "uxtx", "sxtb", "sxth", "sxtw", "sxtx", | ||
742 | } | ||
743 | |||
744 | ------------------------------------------------------------------------------ | ||
745 | |||
746 | -- Output a nicely formatted line with an opcode and operands. | ||
747 | local function putop(ctx, text, operands) | ||
748 | local pos = ctx.pos | ||
749 | local extra = "" | ||
750 | if ctx.rel then | ||
751 | local sym = ctx.symtab[ctx.rel] | ||
752 | if sym then | ||
753 | extra = "\t->"..sym | ||
754 | end | ||
755 | end | ||
756 | if ctx.hexdump > 0 then | ||
757 | ctx.out(format("%08x %s %-5s %s%s\n", | ||
758 | ctx.addr+pos, tohex(ctx.op), text, concat(operands, ", "), extra)) | ||
759 | else | ||
760 | ctx.out(format("%08x %-5s %s%s\n", | ||
761 | ctx.addr+pos, text, concat(operands, ", "), extra)) | ||
762 | end | ||
763 | ctx.pos = pos + 4 | ||
764 | end | ||
765 | |||
766 | -- Fallback for unknown opcodes. | ||
767 | local function unknown(ctx) | ||
768 | return putop(ctx, ".long", { "0x"..tohex(ctx.op) }) | ||
769 | end | ||
770 | |||
771 | local function match_reg(p, pat, regnum) | ||
772 | return map_regs[match(pat, p.."%w-([xwds])")][regnum] | ||
773 | end | ||
774 | |||
775 | local function fmt_hex32(x) | ||
776 | if x < 0 then | ||
777 | return tohex(x) | ||
778 | else | ||
779 | return format("%x", x) | ||
780 | end | ||
781 | end | ||
782 | |||
783 | local imm13_rep = { 0x55555555, 0x11111111, 0x01010101, 0x00010001, 0x00000001 } | ||
784 | |||
785 | local function decode_imm13(op) | ||
786 | local imms = band(rshift(op, 10), 63) | ||
787 | local immr = band(rshift(op, 16), 63) | ||
788 | if band(op, 0x00400000) == 0 then | ||
789 | local len = 5 | ||
790 | if imms >= 56 then | ||
791 | if imms >= 60 then len = 1 else len = 2 end | ||
792 | elseif imms >= 48 then len = 3 elseif imms >= 32 then len = 4 end | ||
793 | local l = lshift(1, len)-1 | ||
794 | local s = band(imms, l) | ||
795 | local r = band(immr, l) | ||
796 | local imm = ror(rshift(-1, 31-s), r) | ||
797 | if len ~= 5 then imm = band(imm, lshift(1, l)-1) + rshift(imm, 31-l) end | ||
798 | imm = imm * imm13_rep[len] | ||
799 | local ix = fmt_hex32(imm) | ||
800 | if rshift(op, 31) ~= 0 then | ||
801 | return ix..tohex(imm) | ||
802 | else | ||
803 | return ix | ||
804 | end | ||
805 | else | ||
806 | local lo, hi = -1, 0 | ||
807 | if imms < 32 then lo = rshift(-1, 31-imms) else hi = rshift(-1, 63-imms) end | ||
808 | if immr ~= 0 then | ||
809 | lo, hi = ror(lo, immr), ror(hi, immr) | ||
810 | local x = immr == 32 and 0 or band(bxor(lo, hi), lshift(-1, 32-immr)) | ||
811 | lo, hi = bxor(lo, x), bxor(hi, x) | ||
812 | if immr >= 32 then lo, hi = hi, lo end | ||
813 | end | ||
814 | if hi ~= 0 then | ||
815 | return fmt_hex32(hi)..tohex(lo) | ||
816 | else | ||
817 | return fmt_hex32(lo) | ||
818 | end | ||
819 | end | ||
820 | end | ||
821 | |||
822 | local function parse_immpc(op, name) | ||
823 | if name == "b" or name == "bl" then | ||
824 | return arshift(lshift(op, 6), 4) | ||
825 | elseif name == "adr" or name == "adrp" then | ||
826 | local immlo = band(rshift(op, 29), 3) | ||
827 | local immhi = lshift(arshift(lshift(op, 8), 13), 2) | ||
828 | return bor(immhi, immlo) | ||
829 | elseif name == "tbz" or name == "tbnz" then | ||
830 | return lshift(arshift(lshift(op, 13), 18), 2) | ||
831 | else | ||
832 | return lshift(arshift(lshift(op, 8), 13), 2) | ||
833 | end | ||
834 | end | ||
835 | |||
836 | local function parse_fpimm8(op) | ||
837 | local sign = band(op, 0x100000) == 0 and 1 or -1 | ||
838 | local exp = bxor(rshift(arshift(lshift(op, 12), 5), 24), 0x80) - 131 | ||
839 | local frac = 16+band(rshift(op, 13), 15) | ||
840 | return sign * frac * 2^exp | ||
841 | end | ||
842 | |||
843 | local function prefer_bfx(sf, uns, imms, immr) | ||
844 | if imms < immr or imms == 31 or imms == 63 then | ||
845 | return false | ||
846 | end | ||
847 | if immr == 0 then | ||
848 | if sf == 0 and (imms == 7 or imms == 15) then | ||
849 | return false | ||
850 | end | ||
851 | if sf ~= 0 and uns == 0 and (imms == 7 or imms == 15 or imms == 31) then | ||
852 | return false | ||
853 | end | ||
854 | end | ||
855 | return true | ||
856 | end | ||
857 | |||
858 | -- Disassemble a single instruction. | ||
859 | local function disass_ins(ctx) | ||
860 | local pos = ctx.pos | ||
861 | local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4) | ||
862 | local op = bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0) | ||
863 | local operands = {} | ||
864 | local suffix = "" | ||
865 | local last, name, pat | ||
866 | local map_reg | ||
867 | ctx.op = op | ||
868 | ctx.rel = nil | ||
869 | last = nil | ||
870 | local opat | ||
871 | opat = map_init[band(rshift(op, 25), 15)] | ||
872 | while type(opat) ~= "string" do | ||
873 | if not opat then return unknown(ctx) end | ||
874 | opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ | ||
875 | end | ||
876 | name, pat = match(opat, "^([a-z0-9]*)(.*)") | ||
877 | local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)") | ||
878 | if altname then pat = pat2 end | ||
879 | if sub(pat, 1, 1) == "." then | ||
880 | local s2, p2 = match(pat, "^([a-z0-9.]*)(.*)") | ||
881 | suffix = suffix..s2 | ||
882 | pat = p2 | ||
883 | end | ||
884 | |||
885 | local rt = match(pat, "[gf]") | ||
886 | if rt then | ||
887 | if rt == "g" then | ||
888 | map_reg = band(op, 0x80000000) ~= 0 and map_regs.x or map_regs.w | ||
889 | else | ||
890 | map_reg = band(op, 0x400000) ~= 0 and map_regs.d or map_regs.s | ||
891 | end | ||
892 | end | ||
893 | |||
894 | local second0, immr | ||
895 | |||
896 | for p in gmatch(pat, ".") do | ||
897 | local x = nil | ||
898 | if p == "D" then | ||
899 | local regnum = band(op, 31) | ||
900 | x = rt and map_reg[regnum] or match_reg(p, pat, regnum) | ||
901 | elseif p == "N" then | ||
902 | local regnum = band(rshift(op, 5), 31) | ||
903 | x = rt and map_reg[regnum] or match_reg(p, pat, regnum) | ||
904 | elseif p == "M" then | ||
905 | local regnum = band(rshift(op, 16), 31) | ||
906 | x = rt and map_reg[regnum] or match_reg(p, pat, regnum) | ||
907 | elseif p == "A" then | ||
908 | local regnum = band(rshift(op, 10), 31) | ||
909 | x = rt and map_reg[regnum] or match_reg(p, pat, regnum) | ||
910 | elseif p == "B" then | ||
911 | local addr = ctx.addr + pos + parse_immpc(op, name) | ||
912 | ctx.rel = addr | ||
913 | x = "0x"..tohex(addr) | ||
914 | elseif p == "T" then | ||
915 | x = bor(band(rshift(op, 26), 32), band(rshift(op, 19), 31)) | ||
916 | elseif p == "V" then | ||
917 | x = band(op, 15) | ||
918 | elseif p == "C" then | ||
919 | x = map_cond[band(rshift(op, 12), 15)] | ||
920 | elseif p == "c" then | ||
921 | local rn = band(rshift(op, 5), 31) | ||
922 | local rm = band(rshift(op, 16), 31) | ||
923 | local cond = band(rshift(op, 12), 15) | ||
924 | local invc = bxor(cond, 1) | ||
925 | x = map_cond[cond] | ||
926 | if altname and cond ~= 14 and cond ~= 15 then | ||
927 | local a1, a2 = match(altname, "([^|]*)|(.*)") | ||
928 | if rn == rm then | ||
929 | local n = #operands | ||
930 | operands[n] = nil | ||
931 | x = map_cond[invc] | ||
932 | if rn ~= 31 then | ||
933 | if a1 then name = a1 else name = altname end | ||
934 | else | ||
935 | operands[n-1] = nil | ||
936 | name = a2 | ||
937 | end | ||
938 | end | ||
939 | end | ||
940 | elseif p == "W" then | ||
941 | x = band(rshift(op, 5), 0xffff) | ||
942 | elseif p == "Y" then | ||
943 | x = band(rshift(op, 5), 0xffff) | ||
944 | local hw = band(rshift(op, 21), 3) | ||
945 | if altname and (hw == 0 or x ~= 0) then | ||
946 | name = altname | ||
947 | end | ||
948 | elseif p == "L" then | ||
949 | local rn = map_regs.x[band(rshift(op, 5), 31)] | ||
950 | local imm9 = arshift(lshift(op, 11), 23) | ||
951 | if band(op, 0x800) ~= 0 then | ||
952 | x = "["..rn..", #"..imm9.."]!" | ||
953 | else | ||
954 | x = "["..rn.."], #"..imm9 | ||
955 | end | ||
956 | elseif p == "U" then | ||
957 | local rn = map_regs.x[band(rshift(op, 5), 31)] | ||
958 | local sz = band(rshift(op, 30), 3) | ||
959 | local imm12 = lshift(arshift(lshift(op, 10), 20), sz) | ||
960 | if imm12 ~= 0 then | ||
961 | x = "["..rn..", #"..imm12.."]" | ||
962 | else | ||
963 | x = "["..rn.."]" | ||
964 | end | ||
965 | elseif p == "K" then | ||
966 | local rn = map_regs.x[band(rshift(op, 5), 31)] | ||
967 | local imm9 = arshift(lshift(op, 11), 23) | ||
968 | if imm9 ~= 0 then | ||
969 | x = "["..rn..", #"..imm9.."]" | ||
970 | else | ||
971 | x = "["..rn.."]" | ||
972 | end | ||
973 | elseif p == "O" then | ||
974 | local rn, rm = map_regs.x[band(rshift(op, 5), 31)] | ||
975 | local m = band(rshift(op, 13), 1) | ||
976 | if m == 0 then | ||
977 | rm = map_regs.w[band(rshift(op, 16), 31)] | ||
978 | else | ||
979 | rm = map_regs.x[band(rshift(op, 16), 31)] | ||
980 | end | ||
981 | x = "["..rn..", "..rm | ||
982 | local opt = band(rshift(op, 13), 7) | ||
983 | local s = band(rshift(op, 12), 1) | ||
984 | local sz = band(rshift(op, 30), 3) | ||
985 | -- extension to be applied | ||
986 | if opt == 3 then | ||
987 | if s == 0 then x = x.."]" | ||
988 | else x = x..", lsl #"..sz.."]" end | ||
989 | elseif opt == 2 or opt == 6 or opt == 7 then | ||
990 | if s == 0 then x = x..", "..map_extend[opt].."]" | ||
991 | else x = x..", "..map_extend[opt].." #"..sz.."]" end | ||
992 | else | ||
993 | x = x.."]" | ||
994 | end | ||
995 | elseif p == "P" then | ||
996 | local opcv, sh = rshift(op, 26), 2 | ||
997 | if opcv >= 0x2a then sh = 4 elseif opcv >= 0x1b then sh = 3 end | ||
998 | local imm7 = lshift(arshift(lshift(op, 10), 25), sh) | ||
999 | local rn = map_regs.x[band(rshift(op, 5), 31)] | ||
1000 | local ind = band(rshift(op, 23), 3) | ||
1001 | if ind == 1 then | ||
1002 | x = "["..rn.."], #"..imm7 | ||
1003 | elseif ind == 2 then | ||
1004 | if imm7 == 0 then | ||
1005 | x = "["..rn.."]" | ||
1006 | else | ||
1007 | x = "["..rn..", #"..imm7.."]" | ||
1008 | end | ||
1009 | elseif ind == 3 then | ||
1010 | x = "["..rn..", #"..imm7.."]!" | ||
1011 | end | ||
1012 | elseif p == "I" then | ||
1013 | local shf = band(rshift(op, 22), 3) | ||
1014 | local imm12 = band(rshift(op, 10), 0x0fff) | ||
1015 | local rn, rd = band(rshift(op, 5), 31), band(op, 31) | ||
1016 | if altname == "mov" and shf == 0 and imm12 == 0 and (rn == 31 or rd == 31) then | ||
1017 | name = altname | ||
1018 | x = nil | ||
1019 | elseif shf == 0 then | ||
1020 | x = imm12 | ||
1021 | elseif shf == 1 then | ||
1022 | x = imm12..", lsl #12" | ||
1023 | end | ||
1024 | elseif p == "i" then | ||
1025 | x = "#0x"..decode_imm13(op) | ||
1026 | elseif p == "1" then | ||
1027 | immr = band(rshift(op, 16), 63) | ||
1028 | x = immr | ||
1029 | elseif p == "2" then | ||
1030 | x = band(rshift(op, 10), 63) | ||
1031 | if altname then | ||
1032 | local a1, a2, a3, a4, a5, a6 = | ||
1033 | match(altname, "([^|]*)|([^|]*)|([^|]*)|([^|]*)|([^|]*)|(.*)") | ||
1034 | local sf = band(rshift(op, 26), 32) | ||
1035 | local uns = band(rshift(op, 30), 1) | ||
1036 | if prefer_bfx(sf, uns, x, immr) then | ||
1037 | name = a2 | ||
1038 | x = x - immr + 1 | ||
1039 | elseif immr == 0 and x == 7 then | ||
1040 | local n = #operands | ||
1041 | operands[n] = nil | ||
1042 | if sf ~= 0 then | ||
1043 | operands[n-1] = gsub(operands[n-1], "x", "w") | ||
1044 | end | ||
1045 | last = operands[n-1] | ||
1046 | name = a6 | ||
1047 | x = nil | ||
1048 | elseif immr == 0 and x == 15 then | ||
1049 | local n = #operands | ||
1050 | operands[n] = nil | ||
1051 | if sf ~= 0 then | ||
1052 | operands[n-1] = gsub(operands[n-1], "x", "w") | ||
1053 | end | ||
1054 | last = operands[n-1] | ||
1055 | name = a5 | ||
1056 | x = nil | ||
1057 | elseif x == 31 or x == 63 then | ||
1058 | if x == 31 and immr == 0 and name == "sbfm" then | ||
1059 | name = a4 | ||
1060 | local n = #operands | ||
1061 | operands[n] = nil | ||
1062 | if sf ~= 0 then | ||
1063 | operands[n-1] = gsub(operands[n-1], "x", "w") | ||
1064 | end | ||
1065 | last = operands[n-1] | ||
1066 | else | ||
1067 | name = a3 | ||
1068 | end | ||
1069 | x = nil | ||
1070 | elseif band(x, 31) ~= 31 and immr == x+1 and name == "ubfm" then | ||
1071 | name = a4 | ||
1072 | last = "#"..(sf+32 - immr) | ||
1073 | operands[#operands] = last | ||
1074 | x = nil | ||
1075 | elseif x < immr then | ||
1076 | name = a1 | ||
1077 | last = "#"..(sf+32 - immr) | ||
1078 | operands[#operands] = last | ||
1079 | x = x + 1 | ||
1080 | end | ||
1081 | end | ||
1082 | elseif p == "3" then | ||
1083 | x = band(rshift(op, 10), 63) | ||
1084 | if altname then | ||
1085 | local a1, a2 = match(altname, "([^|]*)|(.*)") | ||
1086 | if x < immr then | ||
1087 | name = a1 | ||
1088 | local sf = band(rshift(op, 26), 32) | ||
1089 | last = "#"..(sf+32 - immr) | ||
1090 | operands[#operands] = last | ||
1091 | x = x + 1 | ||
1092 | elseif x >= immr then | ||
1093 | name = a2 | ||
1094 | x = x - immr + 1 | ||
1095 | end | ||
1096 | end | ||
1097 | elseif p == "4" then | ||
1098 | x = band(rshift(op, 10), 63) | ||
1099 | local rn = band(rshift(op, 5), 31) | ||
1100 | local rm = band(rshift(op, 16), 31) | ||
1101 | if altname and rn == rm then | ||
1102 | local n = #operands | ||
1103 | operands[n] = nil | ||
1104 | last = operands[n-1] | ||
1105 | name = altname | ||
1106 | end | ||
1107 | elseif p == "5" then | ||
1108 | x = band(rshift(op, 16), 31) | ||
1109 | elseif p == "S" then | ||
1110 | x = band(rshift(op, 10), 63) | ||
1111 | if x == 0 then x = nil | ||
1112 | else x = map_shift[band(rshift(op, 22), 3)].." #"..x end | ||
1113 | elseif p == "X" then | ||
1114 | local opt = band(rshift(op, 13), 7) | ||
1115 | -- Width specifier <R>. | ||
1116 | if opt ~= 3 and opt ~= 7 then | ||
1117 | last = map_regs.w[band(rshift(op, 16), 31)] | ||
1118 | operands[#operands] = last | ||
1119 | end | ||
1120 | x = band(rshift(op, 10), 7) | ||
1121 | -- Extension. | ||
1122 | if opt == 2 + band(rshift(op, 31), 1) and | ||
1123 | band(rshift(op, second0 and 5 or 0), 31) == 31 then | ||
1124 | if x == 0 then x = nil | ||
1125 | else x = "lsl #"..x end | ||
1126 | else | ||
1127 | if x == 0 then x = map_extend[band(rshift(op, 13), 7)] | ||
1128 | else x = map_extend[band(rshift(op, 13), 7)].." #"..x end | ||
1129 | end | ||
1130 | elseif p == "R" then | ||
1131 | x = band(rshift(op,21), 3) | ||
1132 | if x == 0 then x = nil | ||
1133 | else x = "lsl #"..x*16 end | ||
1134 | elseif p == "z" then | ||
1135 | local n = #operands | ||
1136 | if operands[n] == "sp" then operands[n] = "xzr" | ||
1137 | elseif operands[n] == "wsp" then operands[n] = "wzr" | ||
1138 | end | ||
1139 | elseif p == "Z" then | ||
1140 | x = 0 | ||
1141 | elseif p == "F" then | ||
1142 | x = parse_fpimm8(op) | ||
1143 | elseif p == "g" or p == "f" or p == "x" or p == "w" or | ||
1144 | p == "d" or p == "s" then | ||
1145 | -- These are handled in D/N/M/A. | ||
1146 | elseif p == "0" then | ||
1147 | if last == "sp" or last == "wsp" then | ||
1148 | local n = #operands | ||
1149 | operands[n] = nil | ||
1150 | last = operands[n-1] | ||
1151 | if altname then | ||
1152 | local a1, a2 = match(altname, "([^|]*)|(.*)") | ||
1153 | if not a1 then | ||
1154 | name = altname | ||
1155 | elseif second0 then | ||
1156 | name, altname = a2, a1 | ||
1157 | else | ||
1158 | name, altname = a1, a2 | ||
1159 | end | ||
1160 | end | ||
1161 | end | ||
1162 | second0 = true | ||
1163 | else | ||
1164 | assert(false) | ||
1165 | end | ||
1166 | if x then | ||
1167 | last = x | ||
1168 | if type(x) == "number" then x = "#"..x end | ||
1169 | operands[#operands+1] = x | ||
1170 | end | ||
1171 | end | ||
1172 | |||
1173 | return putop(ctx, name..suffix, operands) | ||
1174 | end | ||
1175 | |||
1176 | ------------------------------------------------------------------------------ | ||
1177 | |||
1178 | -- Disassemble a block of code. | ||
1179 | local function disass_block(ctx, ofs, len) | ||
1180 | if not ofs then ofs = 0 end | ||
1181 | local stop = len and ofs+len or #ctx.code | ||
1182 | ctx.pos = ofs | ||
1183 | ctx.rel = nil | ||
1184 | while ctx.pos < stop do disass_ins(ctx) end | ||
1185 | end | ||
1186 | |||
1187 | -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). | ||
1188 | local function create(code, addr, out) | ||
1189 | local ctx = {} | ||
1190 | ctx.code = code | ||
1191 | ctx.addr = addr or 0 | ||
1192 | ctx.out = out or io.write | ||
1193 | ctx.symtab = {} | ||
1194 | ctx.disass = disass_block | ||
1195 | ctx.hexdump = 8 | ||
1196 | return ctx | ||
1197 | end | ||
1198 | |||
1199 | -- Simple API: disassemble code (a string) at address and output via out. | ||
1200 | local function disass(code, addr, out) | ||
1201 | create(code, addr, out):disass() | ||
1202 | end | ||
1203 | |||
1204 | -- Return register name for RID. | ||
1205 | local function regname(r) | ||
1206 | if r < 32 then return map_regs.x[r] end | ||
1207 | return map_regs.d[r-32] | ||
1208 | end | ||
1209 | |||
1210 | -- Public module functions. | ||
1211 | return { | ||
1212 | create = create, | ||
1213 | disass = disass, | ||
1214 | regname = regname | ||
1215 | } | ||
1216 | |||
diff --git a/src/jit/dis_arm64be.lua b/src/jit/dis_arm64be.lua new file mode 100644 index 00000000..9f4077af --- /dev/null +++ b/src/jit/dis_arm64be.lua | |||
@@ -0,0 +1,12 @@ | |||
1 | ---------------------------------------------------------------------------- | ||
2 | -- LuaJIT ARM64BE disassembler wrapper module. | ||
3 | -- | ||
4 | -- Copyright (C) 2005-2020 Mike Pall. All rights reserved. | ||
5 | -- Released under the MIT license. See Copyright Notice in luajit.h | ||
6 | ---------------------------------------------------------------------------- | ||
7 | -- ARM64 instructions are always little-endian. So just forward to the | ||
8 | -- common ARM64 disassembler module. All the interesting stuff is there. | ||
9 | ------------------------------------------------------------------------------ | ||
10 | |||
11 | return require((string.match(..., ".*%.") or "").."dis_arm64") | ||
12 | |||
diff --git a/src/jit/dis_mips.lua b/src/jit/dis_mips.lua index c720b537..791ac91d 100644 --- a/src/jit/dis_mips.lua +++ b/src/jit/dis_mips.lua | |||
@@ -19,13 +19,34 @@ local band, bor, tohex = bit.band, bit.bor, bit.tohex | |||
19 | local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift | 19 | local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift |
20 | 20 | ||
21 | ------------------------------------------------------------------------------ | 21 | ------------------------------------------------------------------------------ |
22 | -- Primary and extended opcode maps | 22 | -- Extended opcode maps common to all MIPS releases |
23 | ------------------------------------------------------------------------------ | 23 | ------------------------------------------------------------------------------ |
24 | 24 | ||
25 | local map_movci = { shift = 16, mask = 1, [0] = "movfDSC", "movtDSC", } | ||
26 | local map_srl = { shift = 21, mask = 1, [0] = "srlDTA", "rotrDTA", } | 25 | local map_srl = { shift = 21, mask = 1, [0] = "srlDTA", "rotrDTA", } |
27 | local map_srlv = { shift = 6, mask = 1, [0] = "srlvDTS", "rotrvDTS", } | 26 | local map_srlv = { shift = 6, mask = 1, [0] = "srlvDTS", "rotrvDTS", } |
28 | 27 | ||
28 | local map_cop0 = { | ||
29 | shift = 25, mask = 1, | ||
30 | [0] = { | ||
31 | shift = 21, mask = 15, | ||
32 | [0] = "mfc0TDW", [4] = "mtc0TDW", | ||
33 | [10] = "rdpgprDT", | ||
34 | [11] = { shift = 5, mask = 1, [0] = "diT0", "eiT0", }, | ||
35 | [14] = "wrpgprDT", | ||
36 | }, { | ||
37 | shift = 0, mask = 63, | ||
38 | [1] = "tlbr", [2] = "tlbwi", [6] = "tlbwr", [8] = "tlbp", | ||
39 | [24] = "eret", [31] = "deret", | ||
40 | [32] = "wait", | ||
41 | }, | ||
42 | } | ||
43 | |||
44 | ------------------------------------------------------------------------------ | ||
45 | -- Primary and extended opcode maps for MIPS R1-R5 | ||
46 | ------------------------------------------------------------------------------ | ||
47 | |||
48 | local map_movci = { shift = 16, mask = 1, [0] = "movfDSC", "movtDSC", } | ||
49 | |||
29 | local map_special = { | 50 | local map_special = { |
30 | shift = 0, mask = 63, | 51 | shift = 0, mask = 63, |
31 | [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" }, | 52 | [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" }, |
@@ -34,15 +55,17 @@ local map_special = { | |||
34 | "jrS", "jalrD1S", "movzDST", "movnDST", | 55 | "jrS", "jalrD1S", "movzDST", "movnDST", |
35 | "syscallY", "breakY", false, "sync", | 56 | "syscallY", "breakY", false, "sync", |
36 | "mfhiD", "mthiS", "mfloD", "mtloS", | 57 | "mfhiD", "mthiS", "mfloD", "mtloS", |
37 | false, false, false, false, | 58 | "dsllvDST", false, "dsrlvDST", "dsravDST", |
38 | "multST", "multuST", "divST", "divuST", | 59 | "multST", "multuST", "divST", "divuST", |
39 | false, false, false, false, | 60 | "dmultST", "dmultuST", "ddivST", "ddivuST", |
40 | "addDST", "addu|moveDST0", "subDST", "subu|neguDS0T", | 61 | "addDST", "addu|moveDST0", "subDST", "subu|neguDS0T", |
41 | "andDST", "orDST", "xorDST", "nor|notDST0", | 62 | "andDST", "or|moveDST0", "xorDST", "nor|notDST0", |
42 | false, false, "sltDST", "sltuDST", | 63 | false, false, "sltDST", "sltuDST", |
43 | false, false, false, false, | 64 | "daddDST", "dadduDST", "dsubDST", "dsubuDST", |
44 | "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ", | 65 | "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ", |
45 | "teqSTZ", false, "tneSTZ", | 66 | "teqSTZ", false, "tneSTZ", false, |
67 | "dsllDTA", false, "dsrlDTA", "dsraDTA", | ||
68 | "dsll32DTA", false, "dsrl32DTA", "dsra32DTA", | ||
46 | } | 69 | } |
47 | 70 | ||
48 | local map_special2 = { | 71 | local map_special2 = { |
@@ -60,11 +83,17 @@ local map_bshfl = { | |||
60 | [24] = "sehDT", | 83 | [24] = "sehDT", |
61 | } | 84 | } |
62 | 85 | ||
86 | local map_dbshfl = { | ||
87 | shift = 6, mask = 31, | ||
88 | [2] = "dsbhDT", | ||
89 | [5] = "dshdDT", | ||
90 | } | ||
91 | |||
63 | local map_special3 = { | 92 | local map_special3 = { |
64 | shift = 0, mask = 63, | 93 | shift = 0, mask = 63, |
65 | [0] = "extTSAK", [4] = "insTSAL", | 94 | [0] = "extTSAK", [1] = "dextmTSAP", [3] = "dextTSAK", |
66 | [32] = map_bshfl, | 95 | [4] = "insTSAL", [6] = "dinsuTSEQ", [7] = "dinsTSAL", |
67 | [59] = "rdhwrTD", | 96 | [32] = map_bshfl, [36] = map_dbshfl, [59] = "rdhwrTD", |
68 | } | 97 | } |
69 | 98 | ||
70 | local map_regimm = { | 99 | local map_regimm = { |
@@ -79,22 +108,6 @@ local map_regimm = { | |||
79 | false, false, false, "synciSO", | 108 | false, false, false, "synciSO", |
80 | } | 109 | } |
81 | 110 | ||
82 | local map_cop0 = { | ||
83 | shift = 25, mask = 1, | ||
84 | [0] = { | ||
85 | shift = 21, mask = 15, | ||
86 | [0] = "mfc0TDW", [4] = "mtc0TDW", | ||
87 | [10] = "rdpgprDT", | ||
88 | [11] = { shift = 5, mask = 1, [0] = "diT0", "eiT0", }, | ||
89 | [14] = "wrpgprDT", | ||
90 | }, { | ||
91 | shift = 0, mask = 63, | ||
92 | [1] = "tlbr", [2] = "tlbwi", [6] = "tlbwr", [8] = "tlbp", | ||
93 | [24] = "eret", [31] = "deret", | ||
94 | [32] = "wait", | ||
95 | }, | ||
96 | } | ||
97 | |||
98 | local map_cop1s = { | 111 | local map_cop1s = { |
99 | shift = 0, mask = 63, | 112 | shift = 0, mask = 63, |
100 | [0] = "add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH", | 113 | [0] = "add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH", |
@@ -178,8 +191,8 @@ local map_cop1bc = { | |||
178 | 191 | ||
179 | local map_cop1 = { | 192 | local map_cop1 = { |
180 | shift = 21, mask = 31, | 193 | shift = 21, mask = 31, |
181 | [0] = "mfc1TG", false, "cfc1TG", "mfhc1TG", | 194 | [0] = "mfc1TG", "dmfc1TG", "cfc1TG", "mfhc1TG", |
182 | "mtc1TG", false, "ctc1TG", "mthc1TG", | 195 | "mtc1TG", "dmtc1TG", "ctc1TG", "mthc1TG", |
183 | map_cop1bc, false, false, false, | 196 | map_cop1bc, false, false, false, |
184 | false, false, false, false, | 197 | false, false, false, false, |
185 | map_cop1s, map_cop1d, false, false, | 198 | map_cop1s, map_cop1d, false, false, |
@@ -213,16 +226,218 @@ local map_pri = { | |||
213 | "andiTSU", "ori|liTS0U", "xoriTSU", "luiTU", | 226 | "andiTSU", "ori|liTS0U", "xoriTSU", "luiTU", |
214 | map_cop0, map_cop1, false, map_cop1x, | 227 | map_cop0, map_cop1, false, map_cop1x, |
215 | "beql|beqzlST0B", "bnel|bnezlST0B", "blezlSB", "bgtzlSB", | 228 | "beql|beqzlST0B", "bnel|bnezlST0B", "blezlSB", "bgtzlSB", |
216 | false, false, false, false, | 229 | "daddiTSI", "daddiuTSI", false, false, |
217 | map_special2, false, false, map_special3, | 230 | map_special2, "jalxJ", false, map_special3, |
218 | "lbTSO", "lhTSO", "lwlTSO", "lwTSO", | 231 | "lbTSO", "lhTSO", "lwlTSO", "lwTSO", |
219 | "lbuTSO", "lhuTSO", "lwrTSO", false, | 232 | "lbuTSO", "lhuTSO", "lwrTSO", false, |
220 | "sbTSO", "shTSO", "swlTSO", "swTSO", | 233 | "sbTSO", "shTSO", "swlTSO", "swTSO", |
221 | false, false, "swrTSO", "cacheNSO", | 234 | false, false, "swrTSO", "cacheNSO", |
222 | "llTSO", "lwc1HSO", "lwc2TSO", "prefNSO", | 235 | "llTSO", "lwc1HSO", "lwc2TSO", "prefNSO", |
223 | false, "ldc1HSO", "ldc2TSO", false, | 236 | false, "ldc1HSO", "ldc2TSO", "ldTSO", |
224 | "scTSO", "swc1HSO", "swc2TSO", false, | 237 | "scTSO", "swc1HSO", "swc2TSO", false, |
225 | false, "sdc1HSO", "sdc2TSO", false, | 238 | false, "sdc1HSO", "sdc2TSO", "sdTSO", |
239 | } | ||
240 | |||
241 | ------------------------------------------------------------------------------ | ||
242 | -- Primary and extended opcode maps for MIPS R6 | ||
243 | ------------------------------------------------------------------------------ | ||
244 | |||
245 | local map_mul_r6 = { shift = 6, mask = 3, [2] = "mulDST", [3] = "muhDST" } | ||
246 | local map_mulu_r6 = { shift = 6, mask = 3, [2] = "muluDST", [3] = "muhuDST" } | ||
247 | local map_div_r6 = { shift = 6, mask = 3, [2] = "divDST", [3] = "modDST" } | ||
248 | local map_divu_r6 = { shift = 6, mask = 3, [2] = "divuDST", [3] = "moduDST" } | ||
249 | local map_dmul_r6 = { shift = 6, mask = 3, [2] = "dmulDST", [3] = "dmuhDST" } | ||
250 | local map_dmulu_r6 = { shift = 6, mask = 3, [2] = "dmuluDST", [3] = "dmuhuDST" } | ||
251 | local map_ddiv_r6 = { shift = 6, mask = 3, [2] = "ddivDST", [3] = "dmodDST" } | ||
252 | local map_ddivu_r6 = { shift = 6, mask = 3, [2] = "ddivuDST", [3] = "dmoduDST" } | ||
253 | |||
254 | local map_special_r6 = { | ||
255 | shift = 0, mask = 63, | ||
256 | [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" }, | ||
257 | false, map_srl, "sraDTA", | ||
258 | "sllvDTS", false, map_srlv, "sravDTS", | ||
259 | "jrS", "jalrD1S", false, false, | ||
260 | "syscallY", "breakY", false, "sync", | ||
261 | "clzDS", "cloDS", "dclzDS", "dcloDS", | ||
262 | "dsllvDST", "dlsaDSTA", "dsrlvDST", "dsravDST", | ||
263 | map_mul_r6, map_mulu_r6, map_div_r6, map_divu_r6, | ||
264 | map_dmul_r6, map_dmulu_r6, map_ddiv_r6, map_ddivu_r6, | ||
265 | "addDST", "addu|moveDST0", "subDST", "subu|neguDS0T", | ||
266 | "andDST", "or|moveDST0", "xorDST", "nor|notDST0", | ||
267 | false, false, "sltDST", "sltuDST", | ||
268 | "daddDST", "dadduDST", "dsubDST", "dsubuDST", | ||
269 | "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ", | ||
270 | "teqSTZ", "seleqzDST", "tneSTZ", "selnezDST", | ||
271 | "dsllDTA", false, "dsrlDTA", "dsraDTA", | ||
272 | "dsll32DTA", false, "dsrl32DTA", "dsra32DTA", | ||
273 | } | ||
274 | |||
275 | local map_bshfl_r6 = { | ||
276 | shift = 9, mask = 3, | ||
277 | [1] = "alignDSTa", | ||
278 | _ = { | ||
279 | shift = 6, mask = 31, | ||
280 | [0] = "bitswapDT", | ||
281 | [2] = "wsbhDT", | ||
282 | [16] = "sebDT", | ||
283 | [24] = "sehDT", | ||
284 | } | ||
285 | } | ||
286 | |||
287 | local map_dbshfl_r6 = { | ||
288 | shift = 9, mask = 3, | ||
289 | [1] = "dalignDSTa", | ||
290 | _ = { | ||
291 | shift = 6, mask = 31, | ||
292 | [0] = "dbitswapDT", | ||
293 | [2] = "dsbhDT", | ||
294 | [5] = "dshdDT", | ||
295 | } | ||
296 | } | ||
297 | |||
298 | local map_special3_r6 = { | ||
299 | shift = 0, mask = 63, | ||
300 | [0] = "extTSAK", [1] = "dextmTSAP", [3] = "dextTSAK", | ||
301 | [4] = "insTSAL", [6] = "dinsuTSEQ", [7] = "dinsTSAL", | ||
302 | [32] = map_bshfl_r6, [36] = map_dbshfl_r6, [59] = "rdhwrTD", | ||
303 | } | ||
304 | |||
305 | local map_regimm_r6 = { | ||
306 | shift = 16, mask = 31, | ||
307 | [0] = "bltzSB", [1] = "bgezSB", | ||
308 | [6] = "dahiSI", [30] = "datiSI", | ||
309 | [23] = "sigrieI", [31] = "synciSO", | ||
310 | } | ||
311 | |||
312 | local map_pcrel_r6 = { | ||
313 | shift = 19, mask = 3, | ||
314 | [0] = "addiupcS2", "lwpcS2", "lwupcS2", { | ||
315 | shift = 18, mask = 1, | ||
316 | [0] = "ldpcS3", { shift = 16, mask = 3, [2] = "auipcSI", [3] = "aluipcSI" } | ||
317 | } | ||
318 | } | ||
319 | |||
320 | local map_cop1s_r6 = { | ||
321 | shift = 0, mask = 63, | ||
322 | [0] = "add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH", | ||
323 | "sqrt.sFG", "abs.sFG", "mov.sFG", "neg.sFG", | ||
324 | "round.l.sFG", "trunc.l.sFG", "ceil.l.sFG", "floor.l.sFG", | ||
325 | "round.w.sFG", "trunc.w.sFG", "ceil.w.sFG", "floor.w.sFG", | ||
326 | "sel.sFGH", false, false, false, | ||
327 | "seleqz.sFGH", "recip.sFG", "rsqrt.sFG", "selnez.sFGH", | ||
328 | "maddf.sFGH", "msubf.sFGH", "rint.sFG", "class.sFG", | ||
329 | "min.sFGH", "mina.sFGH", "max.sFGH", "maxa.sFGH", | ||
330 | false, "cvt.d.sFG", false, false, | ||
331 | "cvt.w.sFG", "cvt.l.sFG", | ||
332 | } | ||
333 | |||
334 | local map_cop1d_r6 = { | ||
335 | shift = 0, mask = 63, | ||
336 | [0] = "add.dFGH", "sub.dFGH", "mul.dFGH", "div.dFGH", | ||
337 | "sqrt.dFG", "abs.dFG", "mov.dFG", "neg.dFG", | ||
338 | "round.l.dFG", "trunc.l.dFG", "ceil.l.dFG", "floor.l.dFG", | ||
339 | "round.w.dFG", "trunc.w.dFG", "ceil.w.dFG", "floor.w.dFG", | ||
340 | "sel.dFGH", false, false, false, | ||
341 | "seleqz.dFGH", "recip.dFG", "rsqrt.dFG", "selnez.dFGH", | ||
342 | "maddf.dFGH", "msubf.dFGH", "rint.dFG", "class.dFG", | ||
343 | "min.dFGH", "mina.dFGH", "max.dFGH", "maxa.dFGH", | ||
344 | "cvt.s.dFG", false, false, false, | ||
345 | "cvt.w.dFG", "cvt.l.dFG", | ||
346 | } | ||
347 | |||
348 | local map_cop1w_r6 = { | ||
349 | shift = 0, mask = 63, | ||
350 | [0] = "cmp.af.sFGH", "cmp.un.sFGH", "cmp.eq.sFGH", "cmp.ueq.sFGH", | ||
351 | "cmp.lt.sFGH", "cmp.ult.sFGH", "cmp.le.sFGH", "cmp.ule.sFGH", | ||
352 | "cmp.saf.sFGH", "cmp.sun.sFGH", "cmp.seq.sFGH", "cmp.sueq.sFGH", | ||
353 | "cmp.slt.sFGH", "cmp.sult.sFGH", "cmp.sle.sFGH", "cmp.sule.sFGH", | ||
354 | false, "cmp.or.sFGH", "cmp.une.sFGH", "cmp.ne.sFGH", | ||
355 | false, false, false, false, | ||
356 | false, "cmp.sor.sFGH", "cmp.sune.sFGH", "cmp.sne.sFGH", | ||
357 | false, false, false, false, | ||
358 | "cvt.s.wFG", "cvt.d.wFG", | ||
359 | } | ||
360 | |||
361 | local map_cop1l_r6 = { | ||
362 | shift = 0, mask = 63, | ||
363 | [0] = "cmp.af.dFGH", "cmp.un.dFGH", "cmp.eq.dFGH", "cmp.ueq.dFGH", | ||
364 | "cmp.lt.dFGH", "cmp.ult.dFGH", "cmp.le.dFGH", "cmp.ule.dFGH", | ||
365 | "cmp.saf.dFGH", "cmp.sun.dFGH", "cmp.seq.dFGH", "cmp.sueq.dFGH", | ||
366 | "cmp.slt.dFGH", "cmp.sult.dFGH", "cmp.sle.dFGH", "cmp.sule.dFGH", | ||
367 | false, "cmp.or.dFGH", "cmp.une.dFGH", "cmp.ne.dFGH", | ||
368 | false, false, false, false, | ||
369 | false, "cmp.sor.dFGH", "cmp.sune.dFGH", "cmp.sne.dFGH", | ||
370 | false, false, false, false, | ||
371 | "cvt.s.lFG", "cvt.d.lFG", | ||
372 | } | ||
373 | |||
374 | local map_cop1_r6 = { | ||
375 | shift = 21, mask = 31, | ||
376 | [0] = "mfc1TG", "dmfc1TG", "cfc1TG", "mfhc1TG", | ||
377 | "mtc1TG", "dmtc1TG", "ctc1TG", "mthc1TG", | ||
378 | false, "bc1eqzHB", false, false, | ||
379 | false, "bc1nezHB", false, false, | ||
380 | map_cop1s_r6, map_cop1d_r6, false, false, | ||
381 | map_cop1w_r6, map_cop1l_r6, | ||
382 | } | ||
383 | |||
384 | local function maprs_popTS(rs, rt) | ||
385 | if rt == 0 then return 0 elseif rs == 0 then return 1 | ||
386 | elseif rs == rt then return 2 else return 3 end | ||
387 | end | ||
388 | |||
389 | local map_pop06_r6 = { | ||
390 | maprs = maprs_popTS, [0] = "blezSB", "blezalcTB", "bgezalcTB", "bgeucSTB" | ||
391 | } | ||
392 | local map_pop07_r6 = { | ||
393 | maprs = maprs_popTS, [0] = "bgtzSB", "bgtzalcTB", "bltzalcTB", "bltucSTB" | ||
394 | } | ||
395 | local map_pop26_r6 = { | ||
396 | maprs = maprs_popTS, "blezcTB", "bgezcTB", "bgecSTB" | ||
397 | } | ||
398 | local map_pop27_r6 = { | ||
399 | maprs = maprs_popTS, "bgtzcTB", "bltzcTB", "bltcSTB" | ||
400 | } | ||
401 | |||
402 | local function maprs_popS(rs, rt) | ||
403 | if rs == 0 then return 0 else return 1 end | ||
404 | end | ||
405 | |||
406 | local map_pop66_r6 = { | ||
407 | maprs = maprs_popS, [0] = "jicTI", "beqzcSb" | ||
408 | } | ||
409 | local map_pop76_r6 = { | ||
410 | maprs = maprs_popS, [0] = "jialcTI", "bnezcSb" | ||
411 | } | ||
412 | |||
413 | local function maprs_popST(rs, rt) | ||
414 | if rs >= rt then return 0 elseif rs == 0 then return 1 else return 2 end | ||
415 | end | ||
416 | |||
417 | local map_pop10_r6 = { | ||
418 | maprs = maprs_popST, [0] = "bovcSTB", "beqzalcTB", "beqcSTB" | ||
419 | } | ||
420 | local map_pop30_r6 = { | ||
421 | maprs = maprs_popST, [0] = "bnvcSTB", "bnezalcTB", "bnecSTB" | ||
422 | } | ||
423 | |||
424 | local map_pri_r6 = { | ||
425 | [0] = map_special_r6, map_regimm_r6, "jJ", "jalJ", | ||
426 | "beq|beqz|bST00B", "bne|bnezST0B", map_pop06_r6, map_pop07_r6, | ||
427 | map_pop10_r6, "addiu|liTS0I", "sltiTSI", "sltiuTSI", | ||
428 | "andiTSU", "ori|liTS0U", "xoriTSU", "aui|luiTS0U", | ||
429 | map_cop0, map_cop1_r6, false, false, | ||
430 | false, false, map_pop26_r6, map_pop27_r6, | ||
431 | map_pop30_r6, "daddiuTSI", false, false, | ||
432 | false, "dauiTSI", false, map_special3_r6, | ||
433 | "lbTSO", "lhTSO", false, "lwTSO", | ||
434 | "lbuTSO", "lhuTSO", false, false, | ||
435 | "sbTSO", "shTSO", false, "swTSO", | ||
436 | false, false, false, false, | ||
437 | false, "lwc1HSO", "bc#", false, | ||
438 | false, "ldc1HSO", map_pop66_r6, "ldTSO", | ||
439 | false, "swc1HSO", "balc#", map_pcrel_r6, | ||
440 | false, "sdc1HSO", map_pop76_r6, "sdTSO", | ||
226 | } | 441 | } |
227 | 442 | ||
228 | ------------------------------------------------------------------------------ | 443 | ------------------------------------------------------------------------------ |
@@ -279,10 +494,14 @@ local function disass_ins(ctx) | |||
279 | ctx.op = op | 494 | ctx.op = op |
280 | ctx.rel = nil | 495 | ctx.rel = nil |
281 | 496 | ||
282 | local opat = map_pri[rshift(op, 26)] | 497 | local opat = ctx.map_pri[rshift(op, 26)] |
283 | while type(opat) ~= "string" do | 498 | while type(opat) ~= "string" do |
284 | if not opat then return unknown(ctx) end | 499 | if not opat then return unknown(ctx) end |
285 | opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ | 500 | if opat.maprs then |
501 | opat = opat[opat.maprs(band(rshift(op,21),31), band(rshift(op,16),31))] | ||
502 | else | ||
503 | opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ | ||
504 | end | ||
286 | end | 505 | end |
287 | local name, pat = match(opat, "^([a-z0-9_.]*)(.*)") | 506 | local name, pat = match(opat, "^([a-z0-9_.]*)(.*)") |
288 | local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)") | 507 | local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)") |
@@ -306,6 +525,10 @@ local function disass_ins(ctx) | |||
306 | x = "f"..band(rshift(op, 21), 31) | 525 | x = "f"..band(rshift(op, 21), 31) |
307 | elseif p == "A" then | 526 | elseif p == "A" then |
308 | x = band(rshift(op, 6), 31) | 527 | x = band(rshift(op, 6), 31) |
528 | elseif p == "a" then | ||
529 | x = band(rshift(op, 6), 7) | ||
530 | elseif p == "E" then | ||
531 | x = band(rshift(op, 6), 31) + 32 | ||
309 | elseif p == "M" then | 532 | elseif p == "M" then |
310 | x = band(rshift(op, 11), 31) | 533 | x = band(rshift(op, 11), 31) |
311 | elseif p == "N" then | 534 | elseif p == "N" then |
@@ -315,10 +538,18 @@ local function disass_ins(ctx) | |||
315 | if x == 0 then x = nil end | 538 | if x == 0 then x = nil end |
316 | elseif p == "K" then | 539 | elseif p == "K" then |
317 | x = band(rshift(op, 11), 31) + 1 | 540 | x = band(rshift(op, 11), 31) + 1 |
541 | elseif p == "P" then | ||
542 | x = band(rshift(op, 11), 31) + 33 | ||
318 | elseif p == "L" then | 543 | elseif p == "L" then |
319 | x = band(rshift(op, 11), 31) - last + 1 | 544 | x = band(rshift(op, 11), 31) - last + 1 |
545 | elseif p == "Q" then | ||
546 | x = band(rshift(op, 11), 31) - last + 33 | ||
320 | elseif p == "I" then | 547 | elseif p == "I" then |
321 | x = arshift(lshift(op, 16), 16) | 548 | x = arshift(lshift(op, 16), 16) |
549 | elseif p == "2" then | ||
550 | x = arshift(lshift(op, 13), 11) | ||
551 | elseif p == "3" then | ||
552 | x = arshift(lshift(op, 14), 11) | ||
322 | elseif p == "U" then | 553 | elseif p == "U" then |
323 | x = band(op, 0xffff) | 554 | x = band(op, 0xffff) |
324 | elseif p == "O" then | 555 | elseif p == "O" then |
@@ -328,13 +559,22 @@ local function disass_ins(ctx) | |||
328 | local index = map_gpr[band(rshift(op, 16), 31)] | 559 | local index = map_gpr[band(rshift(op, 16), 31)] |
329 | operands[#operands] = format("%s(%s)", index, last) | 560 | operands[#operands] = format("%s(%s)", index, last) |
330 | elseif p == "B" then | 561 | elseif p == "B" then |
331 | x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 16)*4 + 4 | 562 | x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 14) + 4 |
563 | ctx.rel = x | ||
564 | x = format("0x%08x", x) | ||
565 | elseif p == "b" then | ||
566 | x = ctx.addr + ctx.pos + arshift(lshift(op, 11), 9) + 4 | ||
332 | ctx.rel = x | 567 | ctx.rel = x |
333 | x = "0x"..tohex(x) | 568 | x = format("0x%08x", x) |
569 | elseif p == "#" then | ||
570 | x = ctx.addr + ctx.pos + arshift(lshift(op, 6), 4) + 4 | ||
571 | ctx.rel = x | ||
572 | x = format("0x%08x", x) | ||
334 | elseif p == "J" then | 573 | elseif p == "J" then |
335 | x = band(ctx.addr + ctx.pos, 0xf0000000) + band(op, 0x03ffffff)*4 | 574 | local a = ctx.addr + ctx.pos |
575 | x = a - band(a, 0x0fffffff) + band(op, 0x03ffffff)*4 | ||
336 | ctx.rel = x | 576 | ctx.rel = x |
337 | x = "0x"..tohex(x) | 577 | x = format("0x%08x", x) |
338 | elseif p == "V" then | 578 | elseif p == "V" then |
339 | x = band(rshift(op, 8), 7) | 579 | x = band(rshift(op, 8), 7) |
340 | if x == 0 then x = nil end | 580 | if x == 0 then x = nil end |
@@ -384,7 +624,7 @@ local function disass_block(ctx, ofs, len) | |||
384 | end | 624 | end |
385 | 625 | ||
386 | -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). | 626 | -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). |
387 | local function create_(code, addr, out) | 627 | local function create(code, addr, out) |
388 | local ctx = {} | 628 | local ctx = {} |
389 | ctx.code = code | 629 | ctx.code = code |
390 | ctx.addr = addr or 0 | 630 | ctx.addr = addr or 0 |
@@ -393,36 +633,62 @@ local function create_(code, addr, out) | |||
393 | ctx.disass = disass_block | 633 | ctx.disass = disass_block |
394 | ctx.hexdump = 8 | 634 | ctx.hexdump = 8 |
395 | ctx.get = get_be | 635 | ctx.get = get_be |
636 | ctx.map_pri = map_pri | ||
637 | return ctx | ||
638 | end | ||
639 | |||
640 | local function create_el(code, addr, out) | ||
641 | local ctx = create(code, addr, out) | ||
642 | ctx.get = get_le | ||
643 | return ctx | ||
644 | end | ||
645 | |||
646 | local function create_r6(code, addr, out) | ||
647 | local ctx = create(code, addr, out) | ||
648 | ctx.map_pri = map_pri_r6 | ||
396 | return ctx | 649 | return ctx |
397 | end | 650 | end |
398 | 651 | ||
399 | local function create_el_(code, addr, out) | 652 | local function create_r6_el(code, addr, out) |
400 | local ctx = create_(code, addr, out) | 653 | local ctx = create(code, addr, out) |
401 | ctx.get = get_le | 654 | ctx.get = get_le |
655 | ctx.map_pri = map_pri_r6 | ||
402 | return ctx | 656 | return ctx |
403 | end | 657 | end |
404 | 658 | ||
405 | -- Simple API: disassemble code (a string) at address and output via out. | 659 | -- Simple API: disassemble code (a string) at address and output via out. |
406 | local function disass_(code, addr, out) | 660 | local function disass(code, addr, out) |
407 | create_(code, addr, out):disass() | 661 | create(code, addr, out):disass() |
662 | end | ||
663 | |||
664 | local function disass_el(code, addr, out) | ||
665 | create_el(code, addr, out):disass() | ||
408 | end | 666 | end |
409 | 667 | ||
410 | local function disass_el_(code, addr, out) | 668 | local function disass_r6(code, addr, out) |
411 | create_el_(code, addr, out):disass() | 669 | create_r6(code, addr, out):disass() |
670 | end | ||
671 | |||
672 | local function disass_r6_el(code, addr, out) | ||
673 | create_r6_el(code, addr, out):disass() | ||
412 | end | 674 | end |
413 | 675 | ||
414 | -- Return register name for RID. | 676 | -- Return register name for RID. |
415 | local function regname_(r) | 677 | local function regname(r) |
416 | if r < 32 then return map_gpr[r] end | 678 | if r < 32 then return map_gpr[r] end |
417 | return "f"..(r-32) | 679 | return "f"..(r-32) |
418 | end | 680 | end |
419 | 681 | ||
420 | -- Public module functions. | 682 | -- Public module functions. |
421 | module(...) | 683 | return { |
422 | 684 | create = create, | |
423 | create = create_ | 685 | create_el = create_el, |
424 | create_el = create_el_ | 686 | create_r6 = create_r6, |
425 | disass = disass_ | 687 | create_r6_el = create_r6_el, |
426 | disass_el = disass_el_ | 688 | disass = disass, |
427 | regname = regname_ | 689 | disass_el = disass_el, |
690 | disass_r6 = disass_r6, | ||
691 | disass_r6_el = disass_r6_el, | ||
692 | regname = regname | ||
693 | } | ||
428 | 694 | ||
diff --git a/src/jit/dis_mips64.lua b/src/jit/dis_mips64.lua new file mode 100644 index 00000000..018e6058 --- /dev/null +++ b/src/jit/dis_mips64.lua | |||
@@ -0,0 +1,17 @@ | |||
1 | ---------------------------------------------------------------------------- | ||
2 | -- LuaJIT MIPS64 disassembler wrapper module. | ||
3 | -- | ||
4 | -- Copyright (C) 2005-2020 Mike Pall. All rights reserved. | ||
5 | -- Released under the MIT license. See Copyright Notice in luajit.h | ||
6 | ---------------------------------------------------------------------------- | ||
7 | -- This module just exports the big-endian functions from the | ||
8 | -- MIPS disassembler module. All the interesting stuff is there. | ||
9 | ------------------------------------------------------------------------------ | ||
10 | |||
11 | local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips") | ||
12 | return { | ||
13 | create = dis_mips.create, | ||
14 | disass = dis_mips.disass, | ||
15 | regname = dis_mips.regname | ||
16 | } | ||
17 | |||
diff --git a/src/jit/dis_mips64el.lua b/src/jit/dis_mips64el.lua new file mode 100644 index 00000000..ef3af475 --- /dev/null +++ b/src/jit/dis_mips64el.lua | |||
@@ -0,0 +1,17 @@ | |||
1 | ---------------------------------------------------------------------------- | ||
2 | -- LuaJIT MIPS64EL disassembler wrapper module. | ||
3 | -- | ||
4 | -- Copyright (C) 2005-2020 Mike Pall. All rights reserved. | ||
5 | -- Released under the MIT license. See Copyright Notice in luajit.h | ||
6 | ---------------------------------------------------------------------------- | ||
7 | -- This module just exports the little-endian functions from the | ||
8 | -- MIPS disassembler module. All the interesting stuff is there. | ||
9 | ------------------------------------------------------------------------------ | ||
10 | |||
11 | local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips") | ||
12 | return { | ||
13 | create = dis_mips.create_el, | ||
14 | disass = dis_mips.disass_el, | ||
15 | regname = dis_mips.regname | ||
16 | } | ||
17 | |||
diff --git a/src/jit/dis_mips64r6.lua b/src/jit/dis_mips64r6.lua new file mode 100644 index 00000000..2bfc2429 --- /dev/null +++ b/src/jit/dis_mips64r6.lua | |||
@@ -0,0 +1,17 @@ | |||
1 | ---------------------------------------------------------------------------- | ||
2 | -- LuaJIT MIPS64R6 disassembler wrapper module. | ||
3 | -- | ||
4 | -- Copyright (C) 2005-2020 Mike Pall. All rights reserved. | ||
5 | -- Released under the MIT license. See Copyright Notice in luajit.h | ||
6 | ---------------------------------------------------------------------------- | ||
7 | -- This module just exports the r6 big-endian functions from the | ||
8 | -- MIPS disassembler module. All the interesting stuff is there. | ||
9 | ------------------------------------------------------------------------------ | ||
10 | |||
11 | local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips") | ||
12 | return { | ||
13 | create = dis_mips.create_r6, | ||
14 | disass = dis_mips.disass_r6, | ||
15 | regname = dis_mips.regname | ||
16 | } | ||
17 | |||
diff --git a/src/jit/dis_mips64r6el.lua b/src/jit/dis_mips64r6el.lua new file mode 100644 index 00000000..30597552 --- /dev/null +++ b/src/jit/dis_mips64r6el.lua | |||
@@ -0,0 +1,17 @@ | |||
1 | ---------------------------------------------------------------------------- | ||
2 | -- LuaJIT MIPS64R6EL disassembler wrapper module. | ||
3 | -- | ||
4 | -- Copyright (C) 2005-2020 Mike Pall. All rights reserved. | ||
5 | -- Released under the MIT license. See Copyright Notice in luajit.h | ||
6 | ---------------------------------------------------------------------------- | ||
7 | -- This module just exports the r6 little-endian functions from the | ||
8 | -- MIPS disassembler module. All the interesting stuff is there. | ||
9 | ------------------------------------------------------------------------------ | ||
10 | |||
11 | local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips") | ||
12 | return { | ||
13 | create = dis_mips.create_r6_el, | ||
14 | disass = dis_mips.disass_r6_el, | ||
15 | regname = dis_mips.regname | ||
16 | } | ||
17 | |||
diff --git a/src/jit/dis_mipsel.lua b/src/jit/dis_mipsel.lua index a2d05690..a6bb9565 100644 --- a/src/jit/dis_mipsel.lua +++ b/src/jit/dis_mipsel.lua | |||
@@ -8,13 +8,10 @@ | |||
8 | -- MIPS disassembler module. All the interesting stuff is there. | 8 | -- MIPS disassembler module. All the interesting stuff is there. |
9 | ------------------------------------------------------------------------------ | 9 | ------------------------------------------------------------------------------ |
10 | 10 | ||
11 | local require = require | 11 | local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips") |
12 | 12 | return { | |
13 | module(...) | 13 | create = dis_mips.create_el, |
14 | 14 | disass = dis_mips.disass_el, | |
15 | local dis_mips = require(_PACKAGE.."dis_mips") | 15 | regname = dis_mips.regname |
16 | 16 | } | |
17 | create = dis_mips.create_el | ||
18 | disass = dis_mips.disass_el | ||
19 | regname = dis_mips.regname | ||
20 | 17 | ||
diff --git a/src/jit/dis_ppc.lua b/src/jit/dis_ppc.lua index dfc6cbce..31d7a4d5 100644 --- a/src/jit/dis_ppc.lua +++ b/src/jit/dis_ppc.lua | |||
@@ -560,7 +560,7 @@ local function disass_block(ctx, ofs, len) | |||
560 | end | 560 | end |
561 | 561 | ||
562 | -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). | 562 | -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). |
563 | local function create_(code, addr, out) | 563 | local function create(code, addr, out) |
564 | local ctx = {} | 564 | local ctx = {} |
565 | ctx.code = code | 565 | ctx.code = code |
566 | ctx.addr = addr or 0 | 566 | ctx.addr = addr or 0 |
@@ -572,20 +572,20 @@ local function create_(code, addr, out) | |||
572 | end | 572 | end |
573 | 573 | ||
574 | -- Simple API: disassemble code (a string) at address and output via out. | 574 | -- Simple API: disassemble code (a string) at address and output via out. |
575 | local function disass_(code, addr, out) | 575 | local function disass(code, addr, out) |
576 | create_(code, addr, out):disass() | 576 | create(code, addr, out):disass() |
577 | end | 577 | end |
578 | 578 | ||
579 | -- Return register name for RID. | 579 | -- Return register name for RID. |
580 | local function regname_(r) | 580 | local function regname(r) |
581 | if r < 32 then return map_gpr[r] end | 581 | if r < 32 then return map_gpr[r] end |
582 | return "f"..(r-32) | 582 | return "f"..(r-32) |
583 | end | 583 | end |
584 | 584 | ||
585 | -- Public module functions. | 585 | -- Public module functions. |
586 | module(...) | 586 | return { |
587 | 587 | create = create, | |
588 | create = create_ | 588 | disass = disass, |
589 | disass = disass_ | 589 | regname = regname |
590 | regname = regname_ | 590 | } |
591 | 591 | ||
diff --git a/src/jit/dis_x64.lua b/src/jit/dis_x64.lua index 1027b5a1..88032f1e 100644 --- a/src/jit/dis_x64.lua +++ b/src/jit/dis_x64.lua | |||
@@ -8,13 +8,10 @@ | |||
8 | -- x86/x64 disassembler module. All the interesting stuff is there. | 8 | -- x86/x64 disassembler module. All the interesting stuff is there. |
9 | ------------------------------------------------------------------------------ | 9 | ------------------------------------------------------------------------------ |
10 | 10 | ||
11 | local require = require | 11 | local dis_x86 = require((string.match(..., ".*%.") or "").."dis_x86") |
12 | 12 | return { | |
13 | module(...) | 13 | create = dis_x86.create64, |
14 | 14 | disass = dis_x86.disass64, | |
15 | local dis_x86 = require(_PACKAGE.."dis_x86") | 15 | regname = dis_x86.regname64 |
16 | 16 | } | |
17 | create = dis_x86.create64 | ||
18 | disass = dis_x86.disass64 | ||
19 | regname = dis_x86.regname64 | ||
20 | 17 | ||
diff --git a/src/jit/dis_x86.lua b/src/jit/dis_x86.lua index 9246820d..364a3184 100644 --- a/src/jit/dis_x86.lua +++ b/src/jit/dis_x86.lua | |||
@@ -15,19 +15,20 @@ | |||
15 | -- Intel and AMD manuals. The supported instruction set is quite extensive | 15 | -- Intel and AMD manuals. The supported instruction set is quite extensive |
16 | -- and reflects what a current generation Intel or AMD CPU implements in | 16 | -- and reflects what a current generation Intel or AMD CPU implements in |
17 | -- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3, | 17 | -- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3, |
18 | -- SSE4.1, SSE4.2, SSE4a and even privileged and hypervisor (VMX/SVM) | 18 | -- SSE4.1, SSE4.2, SSE4a, AVX, AVX2 and even privileged and hypervisor |
19 | -- instructions. | 19 | -- (VMX/SVM) instructions. |
20 | -- | 20 | -- |
21 | -- Notes: | 21 | -- Notes: |
22 | -- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported. | 22 | -- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported. |
23 | -- * No attempt at optimization has been made -- it's fast enough for my needs. | 23 | -- * No attempt at optimization has been made -- it's fast enough for my needs. |
24 | -- * The public API may change when more architectures are added. | ||
25 | ------------------------------------------------------------------------------ | 24 | ------------------------------------------------------------------------------ |
26 | 25 | ||
27 | local type = type | 26 | local type = type |
28 | local sub, byte, format = string.sub, string.byte, string.format | 27 | local sub, byte, format = string.sub, string.byte, string.format |
29 | local match, gmatch, gsub = string.match, string.gmatch, string.gsub | 28 | local match, gmatch, gsub = string.match, string.gmatch, string.gsub |
30 | local lower, rep = string.lower, string.rep | 29 | local lower, rep = string.lower, string.rep |
30 | local bit = require("bit") | ||
31 | local tohex = bit.tohex | ||
31 | 32 | ||
32 | -- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on. | 33 | -- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on. |
33 | local map_opc1_32 = { | 34 | local map_opc1_32 = { |
@@ -76,7 +77,7 @@ local map_opc1_32 = { | |||
76 | "movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi", | 77 | "movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi", |
77 | "movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI", | 78 | "movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI", |
78 | --Cx | 79 | --Cx |
79 | "shift!Bmu","shift!Vmu","retBw","ret","$lesVrm","$ldsVrm","movBmi","movVmi", | 80 | "shift!Bmu","shift!Vmu","retBw","ret","vex*3$lesVrm","vex*2$ldsVrm","movBmi","movVmi", |
80 | "enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS", | 81 | "enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS", |
81 | --Dx | 82 | --Dx |
82 | "shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb", | 83 | "shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb", |
@@ -101,7 +102,7 @@ local map_opc1_64 = setmetatable({ | |||
101 | [0x44]="rex*r", [0x45]="rex*rb", [0x46]="rex*rx", [0x47]="rex*rxb", | 102 | [0x44]="rex*r", [0x45]="rex*rb", [0x46]="rex*rx", [0x47]="rex*rxb", |
102 | [0x48]="rex*w", [0x49]="rex*wb", [0x4a]="rex*wx", [0x4b]="rex*wxb", | 103 | [0x48]="rex*w", [0x49]="rex*wb", [0x4a]="rex*wx", [0x4b]="rex*wxb", |
103 | [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb", | 104 | [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb", |
104 | [0x82]=false, [0x9a]=false, [0xc4]=false, [0xc5]=false, [0xce]=false, | 105 | [0x82]=false, [0x9a]=false, [0xc4]="vex*3", [0xc5]="vex*2", [0xce]=false, |
105 | [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false, | 106 | [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false, |
106 | }, { __index = map_opc1_32 }) | 107 | }, { __index = map_opc1_32 }) |
107 | 108 | ||
@@ -112,12 +113,12 @@ local map_opc2 = { | |||
112 | [0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret", | 113 | [0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret", |
113 | "invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu", | 114 | "invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu", |
114 | --1x | 115 | --1x |
115 | "movupsXrm|movssXrm|movupdXrm|movsdXrm", | 116 | "movupsXrm|movssXrvm|movupdXrm|movsdXrvm", |
116 | "movupsXmr|movssXmr|movupdXmr|movsdXmr", | 117 | "movupsXmr|movssXmvr|movupdXmr|movsdXmvr", |
117 | "movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm", | 118 | "movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm", |
118 | "movlpsXmr||movlpdXmr", | 119 | "movlpsXmr||movlpdXmr", |
119 | "unpcklpsXrm||unpcklpdXrm", | 120 | "unpcklpsXrvm||unpcklpdXrvm", |
120 | "unpckhpsXrm||unpckhpdXrm", | 121 | "unpckhpsXrvm||unpckhpdXrvm", |
121 | "movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm", | 122 | "movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm", |
122 | "movhpsXmr||movhpdXmr", | 123 | "movhpsXmr||movhpdXmr", |
123 | "$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm", | 124 | "$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm", |
@@ -126,7 +127,7 @@ local map_opc2 = { | |||
126 | "movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil, | 127 | "movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil, |
127 | "movapsXrm||movapdXrm", | 128 | "movapsXrm||movapdXrm", |
128 | "movapsXmr||movapdXmr", | 129 | "movapsXmr||movapdXmr", |
129 | "cvtpi2psXrMm|cvtsi2ssXrVmt|cvtpi2pdXrMm|cvtsi2sdXrVmt", | 130 | "cvtpi2psXrMm|cvtsi2ssXrvVmt|cvtpi2pdXrMm|cvtsi2sdXrvVmt", |
130 | "movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr", | 131 | "movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr", |
131 | "cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm", | 132 | "cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm", |
132 | "cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm", | 133 | "cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm", |
@@ -142,27 +143,27 @@ local map_opc2 = { | |||
142 | "cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm", | 143 | "cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm", |
143 | --5x | 144 | --5x |
144 | "movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm", | 145 | "movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm", |
145 | "rsqrtpsXrm|rsqrtssXrm","rcppsXrm|rcpssXrm", | 146 | "rsqrtpsXrm|rsqrtssXrvm","rcppsXrm|rcpssXrvm", |
146 | "andpsXrm||andpdXrm","andnpsXrm||andnpdXrm", | 147 | "andpsXrvm||andpdXrvm","andnpsXrvm||andnpdXrvm", |
147 | "orpsXrm||orpdXrm","xorpsXrm||xorpdXrm", | 148 | "orpsXrvm||orpdXrvm","xorpsXrvm||xorpdXrvm", |
148 | "addpsXrm|addssXrm|addpdXrm|addsdXrm","mulpsXrm|mulssXrm|mulpdXrm|mulsdXrm", | 149 | "addpsXrvm|addssXrvm|addpdXrvm|addsdXrvm","mulpsXrvm|mulssXrvm|mulpdXrvm|mulsdXrvm", |
149 | "cvtps2pdXrm|cvtss2sdXrm|cvtpd2psXrm|cvtsd2ssXrm", | 150 | "cvtps2pdXrm|cvtss2sdXrvm|cvtpd2psXrm|cvtsd2ssXrvm", |
150 | "cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm", | 151 | "cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm", |
151 | "subpsXrm|subssXrm|subpdXrm|subsdXrm","minpsXrm|minssXrm|minpdXrm|minsdXrm", | 152 | "subpsXrvm|subssXrvm|subpdXrvm|subsdXrvm","minpsXrvm|minssXrvm|minpdXrvm|minsdXrvm", |
152 | "divpsXrm|divssXrm|divpdXrm|divsdXrm","maxpsXrm|maxssXrm|maxpdXrm|maxsdXrm", | 153 | "divpsXrvm|divssXrvm|divpdXrvm|divsdXrvm","maxpsXrvm|maxssXrvm|maxpdXrvm|maxsdXrvm", |
153 | --6x | 154 | --6x |
154 | "punpcklbwPrm","punpcklwdPrm","punpckldqPrm","packsswbPrm", | 155 | "punpcklbwPrvm","punpcklwdPrvm","punpckldqPrvm","packsswbPrvm", |
155 | "pcmpgtbPrm","pcmpgtwPrm","pcmpgtdPrm","packuswbPrm", | 156 | "pcmpgtbPrvm","pcmpgtwPrvm","pcmpgtdPrvm","packuswbPrvm", |
156 | "punpckhbwPrm","punpckhwdPrm","punpckhdqPrm","packssdwPrm", | 157 | "punpckhbwPrvm","punpckhwdPrvm","punpckhdqPrvm","packssdwPrvm", |
157 | "||punpcklqdqXrm","||punpckhqdqXrm", | 158 | "||punpcklqdqXrvm","||punpckhqdqXrvm", |
158 | "movPrVSm","movqMrm|movdquXrm|movdqaXrm", | 159 | "movPrVSm","movqMrm|movdquXrm|movdqaXrm", |
159 | --7x | 160 | --7x |
160 | "pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pmu", | 161 | "pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pvmu", |
161 | "pshiftd!Pmu","pshiftq!Mmu||pshiftdq!Xmu", | 162 | "pshiftd!Pvmu","pshiftq!Mvmu||pshiftdq!Xvmu", |
162 | "pcmpeqbPrm","pcmpeqwPrm","pcmpeqdPrm","emms|", | 163 | "pcmpeqbPrvm","pcmpeqwPrvm","pcmpeqdPrvm","emms*|", |
163 | "vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$", | 164 | "vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$", |
164 | nil,nil, | 165 | nil,nil, |
165 | "||haddpdXrm|haddpsXrm","||hsubpdXrm|hsubpsXrm", | 166 | "||haddpdXrvm|haddpsXrvm","||hsubpdXrvm|hsubpsXrvm", |
166 | "movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr", | 167 | "movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr", |
167 | --8x | 168 | --8x |
168 | "joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj", | 169 | "joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj", |
@@ -180,27 +181,27 @@ nil,nil, | |||
180 | "bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt", | 181 | "bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt", |
181 | --Cx | 182 | --Cx |
182 | "xaddBmr","xaddVmr", | 183 | "xaddBmr","xaddVmr", |
183 | "cmppsXrmu|cmpssXrmu|cmppdXrmu|cmpsdXrmu","$movntiVmr|", | 184 | "cmppsXrvmu|cmpssXrvmu|cmppdXrvmu|cmpsdXrvmu","$movntiVmr|", |
184 | "pinsrwPrWmu","pextrwDrPmu", | 185 | "pinsrwPrvWmu","pextrwDrPmu", |
185 | "shufpsXrmu||shufpdXrmu","$cmpxchg!Qmp", | 186 | "shufpsXrvmu||shufpdXrvmu","$cmpxchg!Qmp", |
186 | "bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR", | 187 | "bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR", |
187 | --Dx | 188 | --Dx |
188 | "||addsubpdXrm|addsubpsXrm","psrlwPrm","psrldPrm","psrlqPrm", | 189 | "||addsubpdXrvm|addsubpsXrvm","psrlwPrvm","psrldPrvm","psrlqPrvm", |
189 | "paddqPrm","pmullwPrm", | 190 | "paddqPrvm","pmullwPrvm", |
190 | "|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm", | 191 | "|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm", |
191 | "psubusbPrm","psubuswPrm","pminubPrm","pandPrm", | 192 | "psubusbPrvm","psubuswPrvm","pminubPrvm","pandPrvm", |
192 | "paddusbPrm","padduswPrm","pmaxubPrm","pandnPrm", | 193 | "paddusbPrvm","padduswPrvm","pmaxubPrvm","pandnPrvm", |
193 | --Ex | 194 | --Ex |
194 | "pavgbPrm","psrawPrm","psradPrm","pavgwPrm", | 195 | "pavgbPrvm","psrawPrvm","psradPrvm","pavgwPrvm", |
195 | "pmulhuwPrm","pmulhwPrm", | 196 | "pmulhuwPrvm","pmulhwPrvm", |
196 | "|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr", | 197 | "|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr", |
197 | "psubsbPrm","psubswPrm","pminswPrm","porPrm", | 198 | "psubsbPrvm","psubswPrvm","pminswPrvm","porPrvm", |
198 | "paddsbPrm","paddswPrm","pmaxswPrm","pxorPrm", | 199 | "paddsbPrvm","paddswPrvm","pmaxswPrvm","pxorPrvm", |
199 | --Fx | 200 | --Fx |
200 | "|||lddquXrm","psllwPrm","pslldPrm","psllqPrm", | 201 | "|||lddquXrm","psllwPrvm","pslldPrvm","psllqPrvm", |
201 | "pmuludqPrm","pmaddwdPrm","psadbwPrm","maskmovqMrm||maskmovdquXrm$", | 202 | "pmuludqPrvm","pmaddwdPrvm","psadbwPrvm","maskmovqMrm||maskmovdquXrm$", |
202 | "psubbPrm","psubwPrm","psubdPrm","psubqPrm", | 203 | "psubbPrvm","psubwPrvm","psubdPrvm","psubqPrvm", |
203 | "paddbPrm","paddwPrm","padddPrm","ud", | 204 | "paddbPrvm","paddwPrvm","padddPrvm","ud", |
204 | } | 205 | } |
205 | assert(map_opc2[255] == "ud") | 206 | assert(map_opc2[255] == "ud") |
206 | 207 | ||
@@ -208,49 +209,91 @@ assert(map_opc2[255] == "ud") | |||
208 | local map_opc3 = { | 209 | local map_opc3 = { |
209 | ["38"] = { -- [66] 0f 38 xx | 210 | ["38"] = { -- [66] 0f 38 xx |
210 | --0x | 211 | --0x |
211 | [0]="pshufbPrm","phaddwPrm","phadddPrm","phaddswPrm", | 212 | [0]="pshufbPrvm","phaddwPrvm","phadddPrvm","phaddswPrvm", |
212 | "pmaddubswPrm","phsubwPrm","phsubdPrm","phsubswPrm", | 213 | "pmaddubswPrvm","phsubwPrvm","phsubdPrvm","phsubswPrvm", |
213 | "psignbPrm","psignwPrm","psigndPrm","pmulhrswPrm", | 214 | "psignbPrvm","psignwPrvm","psigndPrvm","pmulhrswPrvm", |
214 | nil,nil,nil,nil, | 215 | "||permilpsXrvm","||permilpdXrvm",nil,nil, |
215 | --1x | 216 | --1x |
216 | "||pblendvbXrma",nil,nil,nil, | 217 | "||pblendvbXrma",nil,nil,nil, |
217 | "||blendvpsXrma","||blendvpdXrma",nil,"||ptestXrm", | 218 | "||blendvpsXrma","||blendvpdXrma","||permpsXrvm","||ptestXrm", |
218 | nil,nil,nil,nil, | 219 | "||broadcastssXrm","||broadcastsdXrm","||broadcastf128XrlXm",nil, |
219 | "pabsbPrm","pabswPrm","pabsdPrm",nil, | 220 | "pabsbPrm","pabswPrm","pabsdPrm",nil, |
220 | --2x | 221 | --2x |
221 | "||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm", | 222 | "||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm", |
222 | "||pmovsxwqXrm","||pmovsxdqXrm",nil,nil, | 223 | "||pmovsxwqXrm","||pmovsxdqXrm",nil,nil, |
223 | "||pmuldqXrm","||pcmpeqqXrm","||$movntdqaXrm","||packusdwXrm", | 224 | "||pmuldqXrvm","||pcmpeqqXrvm","||$movntdqaXrm","||packusdwXrvm", |
224 | nil,nil,nil,nil, | 225 | "||maskmovpsXrvm","||maskmovpdXrvm","||maskmovpsXmvr","||maskmovpdXmvr", |
225 | --3x | 226 | --3x |
226 | "||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm", | 227 | "||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm", |
227 | "||pmovzxwqXrm","||pmovzxdqXrm",nil,"||pcmpgtqXrm", | 228 | "||pmovzxwqXrm","||pmovzxdqXrm","||permdXrvm","||pcmpgtqXrvm", |
228 | "||pminsbXrm","||pminsdXrm","||pminuwXrm","||pminudXrm", | 229 | "||pminsbXrvm","||pminsdXrvm","||pminuwXrvm","||pminudXrvm", |
229 | "||pmaxsbXrm","||pmaxsdXrm","||pmaxuwXrm","||pmaxudXrm", | 230 | "||pmaxsbXrvm","||pmaxsdXrvm","||pmaxuwXrvm","||pmaxudXrvm", |
230 | --4x | 231 | --4x |
231 | "||pmulddXrm","||phminposuwXrm", | 232 | "||pmulddXrvm","||phminposuwXrm",nil,nil, |
233 | nil,"||psrlvVSXrvm","||psravdXrvm","||psllvVSXrvm", | ||
234 | --5x | ||
235 | [0x58] = "||pbroadcastdXrlXm",[0x59] = "||pbroadcastqXrlXm", | ||
236 | [0x5a] = "||broadcasti128XrlXm", | ||
237 | --7x | ||
238 | [0x78] = "||pbroadcastbXrlXm",[0x79] = "||pbroadcastwXrlXm", | ||
239 | --8x | ||
240 | [0x8c] = "||pmaskmovXrvVSm", | ||
241 | [0x8e] = "||pmaskmovVSmXvr", | ||
242 | --9x | ||
243 | [0x96] = "||fmaddsub132pHXrvm",[0x97] = "||fmsubadd132pHXrvm", | ||
244 | [0x98] = "||fmadd132pHXrvm",[0x99] = "||fmadd132sHXrvm", | ||
245 | [0x9a] = "||fmsub132pHXrvm",[0x9b] = "||fmsub132sHXrvm", | ||
246 | [0x9c] = "||fnmadd132pHXrvm",[0x9d] = "||fnmadd132sHXrvm", | ||
247 | [0x9e] = "||fnmsub132pHXrvm",[0x9f] = "||fnmsub132sHXrvm", | ||
248 | --Ax | ||
249 | [0xa6] = "||fmaddsub213pHXrvm",[0xa7] = "||fmsubadd213pHXrvm", | ||
250 | [0xa8] = "||fmadd213pHXrvm",[0xa9] = "||fmadd213sHXrvm", | ||
251 | [0xaa] = "||fmsub213pHXrvm",[0xab] = "||fmsub213sHXrvm", | ||
252 | [0xac] = "||fnmadd213pHXrvm",[0xad] = "||fnmadd213sHXrvm", | ||
253 | [0xae] = "||fnmsub213pHXrvm",[0xaf] = "||fnmsub213sHXrvm", | ||
254 | --Bx | ||
255 | [0xb6] = "||fmaddsub231pHXrvm",[0xb7] = "||fmsubadd231pHXrvm", | ||
256 | [0xb8] = "||fmadd231pHXrvm",[0xb9] = "||fmadd231sHXrvm", | ||
257 | [0xba] = "||fmsub231pHXrvm",[0xbb] = "||fmsub231sHXrvm", | ||
258 | [0xbc] = "||fnmadd231pHXrvm",[0xbd] = "||fnmadd231sHXrvm", | ||
259 | [0xbe] = "||fnmsub231pHXrvm",[0xbf] = "||fnmsub231sHXrvm", | ||
260 | --Dx | ||
261 | [0xdc] = "||aesencXrvm", [0xdd] = "||aesenclastXrvm", | ||
262 | [0xde] = "||aesdecXrvm", [0xdf] = "||aesdeclastXrvm", | ||
232 | --Fx | 263 | --Fx |
233 | [0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt", | 264 | [0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt", |
265 | [0xf7] = "| sarxVrmv| shlxVrmv| shrxVrmv", | ||
234 | }, | 266 | }, |
235 | 267 | ||
236 | ["3a"] = { -- [66] 0f 3a xx | 268 | ["3a"] = { -- [66] 0f 3a xx |
237 | --0x | 269 | --0x |
238 | [0x00]=nil,nil,nil,nil,nil,nil,nil,nil, | 270 | [0x00]="||permqXrmu","||permpdXrmu","||pblenddXrvmu",nil, |
239 | "||roundpsXrmu","||roundpdXrmu","||roundssXrmu","||roundsdXrmu", | 271 | "||permilpsXrmu","||permilpdXrmu","||perm2f128Xrvmu",nil, |
240 | "||blendpsXrmu","||blendpdXrmu","||pblendwXrmu","palignrPrmu", | 272 | "||roundpsXrmu","||roundpdXrmu","||roundssXrvmu","||roundsdXrvmu", |
273 | "||blendpsXrvmu","||blendpdXrvmu","||pblendwXrvmu","palignrPrvmu", | ||
241 | --1x | 274 | --1x |
242 | nil,nil,nil,nil, | 275 | nil,nil,nil,nil, |
243 | "||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru", | 276 | "||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru", |
244 | nil,nil,nil,nil,nil,nil,nil,nil, | 277 | "||insertf128XrvlXmu","||extractf128XlXmYru",nil,nil, |
278 | nil,nil,nil,nil, | ||
245 | --2x | 279 | --2x |
246 | "||pinsrbXrVmu","||insertpsXrmu","||pinsrXrVmuS",nil, | 280 | "||pinsrbXrvVmu","||insertpsXrvmu","||pinsrXrvVmuS",nil, |
281 | --3x | ||
282 | [0x38] = "||inserti128Xrvmu",[0x39] = "||extracti128XlXmYru", | ||
247 | --4x | 283 | --4x |
248 | [0x40] = "||dppsXrmu", | 284 | [0x40] = "||dppsXrvmu", |
249 | [0x41] = "||dppdXrmu", | 285 | [0x41] = "||dppdXrvmu", |
250 | [0x42] = "||mpsadbwXrmu", | 286 | [0x42] = "||mpsadbwXrvmu", |
287 | [0x44] = "||pclmulqdqXrvmu", | ||
288 | [0x46] = "||perm2i128Xrvmu", | ||
289 | [0x4a] = "||blendvpsXrvmb",[0x4b] = "||blendvpdXrvmb", | ||
290 | [0x4c] = "||pblendvbXrvmb", | ||
251 | --6x | 291 | --6x |
252 | [0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu", | 292 | [0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu", |
253 | [0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu", | 293 | [0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu", |
294 | [0xdf] = "||aeskeygenassistXrmu", | ||
295 | --Fx | ||
296 | [0xf0] = "||| rorxVrmu", | ||
254 | }, | 297 | }, |
255 | } | 298 | } |
256 | 299 | ||
@@ -354,17 +397,19 @@ local map_regs = { | |||
354 | "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext! | 397 | "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext! |
355 | X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", | 398 | X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", |
356 | "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" }, | 399 | "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" }, |
400 | Y = { "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", | ||
401 | "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15" }, | ||
357 | } | 402 | } |
358 | local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" } | 403 | local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" } |
359 | 404 | ||
360 | -- Maps for size names. | 405 | -- Maps for size names. |
361 | local map_sz2n = { | 406 | local map_sz2n = { |
362 | B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16, | 407 | B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16, Y = 32, |
363 | } | 408 | } |
364 | local map_sz2prefix = { | 409 | local map_sz2prefix = { |
365 | B = "byte", W = "word", D = "dword", | 410 | B = "byte", W = "word", D = "dword", |
366 | Q = "qword", | 411 | Q = "qword", |
367 | M = "qword", X = "xword", | 412 | M = "qword", X = "xword", Y = "yword", |
368 | F = "dword", G = "qword", -- No need for sizes/register names for these two. | 413 | F = "dword", G = "qword", -- No need for sizes/register names for these two. |
369 | } | 414 | } |
370 | 415 | ||
@@ -387,10 +432,13 @@ local function putop(ctx, text, operands) | |||
387 | if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end | 432 | if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end |
388 | if ctx.rex then | 433 | if ctx.rex then |
389 | local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "").. | 434 | local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "").. |
390 | (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "") | 435 | (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "").. |
391 | if t ~= "" then text = "rex."..t.." "..text end | 436 | (ctx.vexl and "l" or "") |
437 | if ctx.vexv and ctx.vexv ~= 0 then t = t.."v"..ctx.vexv end | ||
438 | if t ~= "" then text = ctx.rex.."."..t.." "..gsub(text, "^ ", "") | ||
439 | elseif ctx.rex == "vex" then text = gsub("v"..text, "^v ", "") end | ||
392 | ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false | 440 | ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false |
393 | ctx.rex = false | 441 | ctx.rex = false; ctx.vexl = false; ctx.vexv = false |
394 | end | 442 | end |
395 | if ctx.seg then | 443 | if ctx.seg then |
396 | local text2, n = gsub(text, "%[", "["..ctx.seg..":") | 444 | local text2, n = gsub(text, "%[", "["..ctx.seg..":") |
@@ -405,6 +453,7 @@ local function putop(ctx, text, operands) | |||
405 | end | 453 | end |
406 | ctx.out(format("%08x %s%s\n", ctx.addr+ctx.start, hex, text)) | 454 | ctx.out(format("%08x %s%s\n", ctx.addr+ctx.start, hex, text)) |
407 | ctx.mrm = false | 455 | ctx.mrm = false |
456 | ctx.vexv = false | ||
408 | ctx.start = pos | 457 | ctx.start = pos |
409 | ctx.imm = nil | 458 | ctx.imm = nil |
410 | end | 459 | end |
@@ -413,7 +462,7 @@ end | |||
413 | local function clearprefixes(ctx) | 462 | local function clearprefixes(ctx) |
414 | ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false | 463 | ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false |
415 | ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false | 464 | ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false |
416 | ctx.rex = false; ctx.a32 = false | 465 | ctx.rex = false; ctx.a32 = false; ctx.vexl = false |
417 | end | 466 | end |
418 | 467 | ||
419 | -- Fallback for incomplete opcodes at the end. | 468 | -- Fallback for incomplete opcodes at the end. |
@@ -450,9 +499,9 @@ end | |||
450 | -- Process pattern string and generate the operands. | 499 | -- Process pattern string and generate the operands. |
451 | local function putpat(ctx, name, pat) | 500 | local function putpat(ctx, name, pat) |
452 | local operands, regs, sz, mode, sp, rm, sc, rx, sdisp | 501 | local operands, regs, sz, mode, sp, rm, sc, rx, sdisp |
453 | local code, pos, stop = ctx.code, ctx.pos, ctx.stop | 502 | local code, pos, stop, vexl = ctx.code, ctx.pos, ctx.stop, ctx.vexl |
454 | 503 | ||
455 | -- Chars used: 1DFGIMPQRSTUVWXacdfgijmoprstuwxyz | 504 | -- Chars used: 1DFGHIMPQRSTUVWXYabcdfgijlmoprstuvwxyz |
456 | for p in gmatch(pat, ".") do | 505 | for p in gmatch(pat, ".") do |
457 | local x = nil | 506 | local x = nil |
458 | if p == "V" or p == "U" then | 507 | if p == "V" or p == "U" then |
@@ -467,12 +516,17 @@ local function putpat(ctx, name, pat) | |||
467 | elseif p == "B" then | 516 | elseif p == "B" then |
468 | sz = "B" | 517 | sz = "B" |
469 | regs = ctx.rex and map_regs.B64 or map_regs.B | 518 | regs = ctx.rex and map_regs.B64 or map_regs.B |
470 | elseif match(p, "[WDQMXFG]") then | 519 | elseif match(p, "[WDQMXYFG]") then |
471 | sz = p | 520 | sz = p |
521 | if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end | ||
472 | regs = map_regs[sz] | 522 | regs = map_regs[sz] |
473 | elseif p == "P" then | 523 | elseif p == "P" then |
474 | sz = ctx.o16 and "X" or "M"; ctx.o16 = false | 524 | sz = ctx.o16 and "X" or "M"; ctx.o16 = false |
525 | if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end | ||
475 | regs = map_regs[sz] | 526 | regs = map_regs[sz] |
527 | elseif p == "H" then | ||
528 | name = name..(ctx.rexw and "d" or "s") | ||
529 | ctx.rexw = false | ||
476 | elseif p == "S" then | 530 | elseif p == "S" then |
477 | name = name..lower(sz) | 531 | name = name..lower(sz) |
478 | elseif p == "s" then | 532 | elseif p == "s" then |
@@ -484,6 +538,10 @@ local function putpat(ctx, name, pat) | |||
484 | local imm = getimm(ctx, pos, 1); if not imm then return end | 538 | local imm = getimm(ctx, pos, 1); if not imm then return end |
485 | x = format("0x%02x", imm) | 539 | x = format("0x%02x", imm) |
486 | pos = pos+1 | 540 | pos = pos+1 |
541 | elseif p == "b" then | ||
542 | local imm = getimm(ctx, pos, 1); if not imm then return end | ||
543 | x = regs[imm/16+1] | ||
544 | pos = pos+1 | ||
487 | elseif p == "w" then | 545 | elseif p == "w" then |
488 | local imm = getimm(ctx, pos, 2); if not imm then return end | 546 | local imm = getimm(ctx, pos, 2); if not imm then return end |
489 | x = format("0x%x", imm) | 547 | x = format("0x%x", imm) |
@@ -532,7 +590,7 @@ local function putpat(ctx, name, pat) | |||
532 | local lo = imm % 0x1000000 | 590 | local lo = imm % 0x1000000 |
533 | x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo) | 591 | x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo) |
534 | else | 592 | else |
535 | x = format("0x%08x", imm) | 593 | x = "0x"..tohex(imm) |
536 | end | 594 | end |
537 | elseif p == "R" then | 595 | elseif p == "R" then |
538 | local r = byte(code, pos-1, pos-1)%8 | 596 | local r = byte(code, pos-1, pos-1)%8 |
@@ -616,8 +674,13 @@ local function putpat(ctx, name, pat) | |||
616 | else | 674 | else |
617 | x = "CR"..sp | 675 | x = "CR"..sp |
618 | end | 676 | end |
677 | elseif p == "v" then | ||
678 | if ctx.vexv then | ||
679 | x = regs[ctx.vexv+1]; ctx.vexv = false | ||
680 | end | ||
619 | elseif p == "y" then x = "DR"..sp | 681 | elseif p == "y" then x = "DR"..sp |
620 | elseif p == "z" then x = "TR"..sp | 682 | elseif p == "z" then x = "TR"..sp |
683 | elseif p == "l" then vexl = false | ||
621 | elseif p == "t" then | 684 | elseif p == "t" then |
622 | else | 685 | else |
623 | error("bad pattern `"..pat.."'") | 686 | error("bad pattern `"..pat.."'") |
@@ -692,7 +755,8 @@ map_act = { | |||
692 | B = putpat, W = putpat, D = putpat, Q = putpat, | 755 | B = putpat, W = putpat, D = putpat, Q = putpat, |
693 | V = putpat, U = putpat, T = putpat, | 756 | V = putpat, U = putpat, T = putpat, |
694 | M = putpat, X = putpat, P = putpat, | 757 | M = putpat, X = putpat, P = putpat, |
695 | F = putpat, G = putpat, | 758 | F = putpat, G = putpat, Y = putpat, |
759 | H = putpat, | ||
696 | 760 | ||
697 | -- Collect prefixes. | 761 | -- Collect prefixes. |
698 | [":"] = function(ctx, name, pat) | 762 | [":"] = function(ctx, name, pat) |
@@ -753,15 +817,68 @@ map_act = { | |||
753 | 817 | ||
754 | -- REX prefix. | 818 | -- REX prefix. |
755 | rex = function(ctx, name, pat) | 819 | rex = function(ctx, name, pat) |
756 | if ctx.rex then return unknown(ctx) end -- Only 1 REX prefix allowed. | 820 | if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed. |
757 | for p in gmatch(pat, ".") do ctx["rex"..p] = true end | 821 | for p in gmatch(pat, ".") do ctx["rex"..p] = true end |
758 | ctx.rex = true | 822 | ctx.rex = "rex" |
823 | end, | ||
824 | |||
825 | -- VEX prefix. | ||
826 | vex = function(ctx, name, pat) | ||
827 | if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed. | ||
828 | ctx.rex = "vex" | ||
829 | local pos = ctx.pos | ||
830 | if ctx.mrm then | ||
831 | ctx.mrm = nil | ||
832 | pos = pos-1 | ||
833 | end | ||
834 | local b = byte(ctx.code, pos, pos) | ||
835 | if not b then return incomplete(ctx) end | ||
836 | pos = pos+1 | ||
837 | if b < 128 then ctx.rexr = true end | ||
838 | local m = 1 | ||
839 | if pat == "3" then | ||
840 | m = b%32; b = (b-m)/32 | ||
841 | local nb = b%2; b = (b-nb)/2 | ||
842 | if nb == 0 then ctx.rexb = true end | ||
843 | local nx = b%2 | ||
844 | if nx == 0 then ctx.rexx = true end | ||
845 | b = byte(ctx.code, pos, pos) | ||
846 | if not b then return incomplete(ctx) end | ||
847 | pos = pos+1 | ||
848 | if b >= 128 then ctx.rexw = true end | ||
849 | end | ||
850 | ctx.pos = pos | ||
851 | local map | ||
852 | if m == 1 then map = map_opc2 | ||
853 | elseif m == 2 then map = map_opc3["38"] | ||
854 | elseif m == 3 then map = map_opc3["3a"] | ||
855 | else return unknown(ctx) end | ||
856 | local p = b%4; b = (b-p)/4 | ||
857 | if p == 1 then ctx.o16 = "o16" | ||
858 | elseif p == 2 then ctx.rep = "rep" | ||
859 | elseif p == 3 then ctx.rep = "repne" end | ||
860 | local l = b%2; b = (b-l)/2 | ||
861 | if l ~= 0 then ctx.vexl = true end | ||
862 | ctx.vexv = (-1-b)%16 | ||
863 | return dispatchmap(ctx, map) | ||
759 | end, | 864 | end, |
760 | 865 | ||
761 | -- Special case for nop with REX prefix. | 866 | -- Special case for nop with REX prefix. |
762 | nop = function(ctx, name, pat) | 867 | nop = function(ctx, name, pat) |
763 | return dispatch(ctx, ctx.rex and pat or "nop") | 868 | return dispatch(ctx, ctx.rex and pat or "nop") |
764 | end, | 869 | end, |
870 | |||
871 | -- Special case for 0F 77. | ||
872 | emms = function(ctx, name, pat) | ||
873 | if ctx.rex ~= "vex" then | ||
874 | return putop(ctx, "emms") | ||
875 | elseif ctx.vexl then | ||
876 | ctx.vexl = false | ||
877 | return putop(ctx, "zeroall") | ||
878 | else | ||
879 | return putop(ctx, "zeroupper") | ||
880 | end | ||
881 | end, | ||
765 | } | 882 | } |
766 | 883 | ||
767 | ------------------------------------------------------------------------------ | 884 | ------------------------------------------------------------------------------ |
@@ -782,7 +899,7 @@ local function disass_block(ctx, ofs, len) | |||
782 | end | 899 | end |
783 | 900 | ||
784 | -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). | 901 | -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). |
785 | local function create_(code, addr, out) | 902 | local function create(code, addr, out) |
786 | local ctx = {} | 903 | local ctx = {} |
787 | ctx.code = code | 904 | ctx.code = code |
788 | ctx.addr = (addr or 0) - 1 | 905 | ctx.addr = (addr or 0) - 1 |
@@ -796,8 +913,8 @@ local function create_(code, addr, out) | |||
796 | return ctx | 913 | return ctx |
797 | end | 914 | end |
798 | 915 | ||
799 | local function create64_(code, addr, out) | 916 | local function create64(code, addr, out) |
800 | local ctx = create_(code, addr, out) | 917 | local ctx = create(code, addr, out) |
801 | ctx.x64 = true | 918 | ctx.x64 = true |
802 | ctx.map1 = map_opc1_64 | 919 | ctx.map1 = map_opc1_64 |
803 | ctx.aregs = map_regs.Q | 920 | ctx.aregs = map_regs.Q |
@@ -805,32 +922,32 @@ local function create64_(code, addr, out) | |||
805 | end | 922 | end |
806 | 923 | ||
807 | -- Simple API: disassemble code (a string) at address and output via out. | 924 | -- Simple API: disassemble code (a string) at address and output via out. |
808 | local function disass_(code, addr, out) | 925 | local function disass(code, addr, out) |
809 | create_(code, addr, out):disass() | 926 | create(code, addr, out):disass() |
810 | end | 927 | end |
811 | 928 | ||
812 | local function disass64_(code, addr, out) | 929 | local function disass64(code, addr, out) |
813 | create64_(code, addr, out):disass() | 930 | create64(code, addr, out):disass() |
814 | end | 931 | end |
815 | 932 | ||
816 | -- Return register name for RID. | 933 | -- Return register name for RID. |
817 | local function regname_(r) | 934 | local function regname(r) |
818 | if r < 8 then return map_regs.D[r+1] end | 935 | if r < 8 then return map_regs.D[r+1] end |
819 | return map_regs.X[r-7] | 936 | return map_regs.X[r-7] |
820 | end | 937 | end |
821 | 938 | ||
822 | local function regname64_(r) | 939 | local function regname64(r) |
823 | if r < 16 then return map_regs.Q[r+1] end | 940 | if r < 16 then return map_regs.Q[r+1] end |
824 | return map_regs.X[r-15] | 941 | return map_regs.X[r-15] |
825 | end | 942 | end |
826 | 943 | ||
827 | -- Public module functions. | 944 | -- Public module functions. |
828 | module(...) | 945 | return { |
829 | 946 | create = create, | |
830 | create = create_ | 947 | create64 = create64, |
831 | create64 = create64_ | 948 | disass = disass, |
832 | disass = disass_ | 949 | disass64 = disass64, |
833 | disass64 = disass64_ | 950 | regname = regname, |
834 | regname = regname_ | 951 | regname64 = regname64 |
835 | regname64 = regname64_ | 952 | } |
836 | 953 | ||
diff --git a/src/jit/dump.lua b/src/jit/dump.lua index 6a2632c3..0cb38b58 100644 --- a/src/jit/dump.lua +++ b/src/jit/dump.lua | |||
@@ -55,7 +55,7 @@ | |||
55 | 55 | ||
56 | -- Cache some library functions and objects. | 56 | -- Cache some library functions and objects. |
57 | local jit = require("jit") | 57 | local jit = require("jit") |
58 | assert(jit.version_num == 20005, "LuaJIT core/library version mismatch") | 58 | assert(jit.version_num == 20100, "LuaJIT core/library version mismatch") |
59 | local jutil = require("jit.util") | 59 | local jutil = require("jit.util") |
60 | local vmdef = require("jit.vmdef") | 60 | local vmdef = require("jit.vmdef") |
61 | local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc | 61 | local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc |
@@ -63,7 +63,7 @@ local traceinfo, traceir, tracek = jutil.traceinfo, jutil.traceir, jutil.tracek | |||
63 | local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap | 63 | local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap |
64 | local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr | 64 | local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr |
65 | local bit = require("bit") | 65 | local bit = require("bit") |
66 | local band, shr = bit.band, bit.rshift | 66 | local band, shr, tohex = bit.band, bit.rshift, bit.tohex |
67 | local sub, gsub, format = string.sub, string.gsub, string.format | 67 | local sub, gsub, format = string.sub, string.gsub, string.format |
68 | local byte, rep = string.byte, string.rep | 68 | local byte, rep = string.byte, string.rep |
69 | local type, tostring = type, tostring | 69 | local type, tostring = type, tostring |
@@ -85,12 +85,13 @@ local nexitsym = 0 | |||
85 | local function fillsymtab_tr(tr, nexit) | 85 | local function fillsymtab_tr(tr, nexit) |
86 | local t = {} | 86 | local t = {} |
87 | symtabmt.__index = t | 87 | symtabmt.__index = t |
88 | if jit.arch == "mips" or jit.arch == "mipsel" then | 88 | if jit.arch:sub(1, 4) == "mips" then |
89 | t[traceexitstub(tr, 0)] = "exit" | 89 | t[traceexitstub(tr, 0)] = "exit" |
90 | return | 90 | return |
91 | end | 91 | end |
92 | for i=0,nexit-1 do | 92 | for i=0,nexit-1 do |
93 | local addr = traceexitstub(tr, i) | 93 | local addr = traceexitstub(tr, i) |
94 | if addr < 0 then addr = addr + 2^32 end | ||
94 | t[addr] = tostring(i) | 95 | t[addr] = tostring(i) |
95 | end | 96 | end |
96 | local addr = traceexitstub(tr, nexit) | 97 | local addr = traceexitstub(tr, nexit) |
@@ -104,7 +105,10 @@ local function fillsymtab(tr, nexit) | |||
104 | local ircall = vmdef.ircall | 105 | local ircall = vmdef.ircall |
105 | for i=0,#ircall do | 106 | for i=0,#ircall do |
106 | local addr = ircalladdr(i) | 107 | local addr = ircalladdr(i) |
107 | if addr ~= 0 then t[addr] = ircall[i] end | 108 | if addr ~= 0 then |
109 | if addr < 0 then addr = addr + 2^32 end | ||
110 | t[addr] = ircall[i] | ||
111 | end | ||
108 | end | 112 | end |
109 | end | 113 | end |
110 | if nexitsym == 1000000 then -- Per-trace exit stubs. | 114 | if nexitsym == 1000000 then -- Per-trace exit stubs. |
@@ -118,6 +122,7 @@ local function fillsymtab(tr, nexit) | |||
118 | nexit = 1000000 | 122 | nexit = 1000000 |
119 | break | 123 | break |
120 | end | 124 | end |
125 | if addr < 0 then addr = addr + 2^32 end | ||
121 | t[addr] = tostring(i) | 126 | t[addr] = tostring(i) |
122 | end | 127 | end |
123 | nexitsym = nexit | 128 | nexitsym = nexit |
@@ -136,6 +141,7 @@ local function dump_mcode(tr) | |||
136 | local mcode, addr, loop = tracemc(tr) | 141 | local mcode, addr, loop = tracemc(tr) |
137 | if not mcode then return end | 142 | if not mcode then return end |
138 | if not disass then disass = require("jit.dis_"..jit.arch) end | 143 | if not disass then disass = require("jit.dis_"..jit.arch) end |
144 | if addr < 0 then addr = addr + 2^32 end | ||
139 | out:write("---- TRACE ", tr, " mcode ", #mcode, "\n") | 145 | out:write("---- TRACE ", tr, " mcode ", #mcode, "\n") |
140 | local ctx = disass.create(mcode, addr, dumpwrite) | 146 | local ctx = disass.create(mcode, addr, dumpwrite) |
141 | ctx.hexdump = 0 | 147 | ctx.hexdump = 0 |
@@ -270,8 +276,7 @@ local litname = { | |||
270 | ["CONV "] = setmetatable({}, { __index = function(t, mode) | 276 | ["CONV "] = setmetatable({}, { __index = function(t, mode) |
271 | local s = irtype[band(mode, 31)] | 277 | local s = irtype[band(mode, 31)] |
272 | s = irtype[band(shr(mode, 5), 31)].."."..s | 278 | s = irtype[band(shr(mode, 5), 31)].."."..s |
273 | if band(mode, 0x400) ~= 0 then s = s.." trunc" | 279 | if band(mode, 0x800) ~= 0 then s = s.." sext" end |
274 | elseif band(mode, 0x800) ~= 0 then s = s.." sext" end | ||
275 | local c = shr(mode, 14) | 280 | local c = shr(mode, 14) |
276 | if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end | 281 | if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end |
277 | t[mode] = s | 282 | t[mode] = s |
@@ -280,6 +285,8 @@ local litname = { | |||
280 | ["FLOAD "] = vmdef.irfield, | 285 | ["FLOAD "] = vmdef.irfield, |
281 | ["FREF "] = vmdef.irfield, | 286 | ["FREF "] = vmdef.irfield, |
282 | ["FPMATH"] = vmdef.irfpm, | 287 | ["FPMATH"] = vmdef.irfpm, |
288 | ["BUFHDR"] = { [0] = "RESET", "APPEND" }, | ||
289 | ["TOSTR "] = { [0] = "INT", "NUM", "CHAR" }, | ||
283 | } | 290 | } |
284 | 291 | ||
285 | local function ctlsub(c) | 292 | local function ctlsub(c) |
@@ -303,15 +310,17 @@ local function fmtfunc(func, pc) | |||
303 | end | 310 | end |
304 | end | 311 | end |
305 | 312 | ||
306 | local function formatk(tr, idx) | 313 | local function formatk(tr, idx, sn) |
307 | local k, t, slot = tracek(tr, idx) | 314 | local k, t, slot = tracek(tr, idx) |
308 | local tn = type(k) | 315 | local tn = type(k) |
309 | local s | 316 | local s |
310 | if tn == "number" then | 317 | if tn == "number" then |
311 | if k == 2^52+2^51 then | 318 | if band(sn or 0, 0x30000) ~= 0 then |
319 | s = band(sn, 0x20000) ~= 0 and "contpc" or "ftsz" | ||
320 | elseif k == 2^52+2^51 then | ||
312 | s = "bias" | 321 | s = "bias" |
313 | else | 322 | else |
314 | s = format("%+.14g", k) | 323 | s = format(0 < k and k < 0x1p-1026 and "%+a" or "%+.14g", k) |
315 | end | 324 | end |
316 | elseif tn == "string" then | 325 | elseif tn == "string" then |
317 | s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub)) | 326 | s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub)) |
@@ -329,6 +338,8 @@ local function formatk(tr, idx) | |||
329 | elseif t == 21 then -- int64_t | 338 | elseif t == 21 then -- int64_t |
330 | s = sub(tostring(k), 1, -3) | 339 | s = sub(tostring(k), 1, -3) |
331 | if sub(s, 1, 1) ~= "-" then s = "+"..s end | 340 | if sub(s, 1, 1) ~= "-" then s = "+"..s end |
341 | elseif sn == 0x1057fff then -- SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL) | ||
342 | return "----" -- Special case for LJ_FR2 slot 1. | ||
332 | else | 343 | else |
333 | s = tostring(k) -- For primitives. | 344 | s = tostring(k) -- For primitives. |
334 | end | 345 | end |
@@ -347,7 +358,7 @@ local function printsnap(tr, snap) | |||
347 | n = n + 1 | 358 | n = n + 1 |
348 | local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS | 359 | local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS |
349 | if ref < 0 then | 360 | if ref < 0 then |
350 | out:write(formatk(tr, ref)) | 361 | out:write(formatk(tr, ref, sn)) |
351 | elseif band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM | 362 | elseif band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM |
352 | out:write(colorize(format("%04d/%04d", ref, ref+1), 14)) | 363 | out:write(colorize(format("%04d/%04d", ref, ref+1), 14)) |
353 | else | 364 | else |
@@ -545,7 +556,7 @@ local function dump_trace(what, tr, func, pc, otr, oex) | |||
545 | if what == "start" then | 556 | if what == "start" then |
546 | if dumpmode.H then out:write('<pre class="ljdump">\n') end | 557 | if dumpmode.H then out:write('<pre class="ljdump">\n') end |
547 | out:write("---- TRACE ", tr, " ", what) | 558 | out:write("---- TRACE ", tr, " ", what) |
548 | if otr then out:write(" ", otr, "/", oex) end | 559 | if otr then out:write(" ", otr, "/", oex == -1 and "stitch" or oex) end |
549 | out:write(" ", fmtfunc(func, pc), "\n") | 560 | out:write(" ", fmtfunc(func, pc), "\n") |
550 | elseif what == "stop" or what == "abort" then | 561 | elseif what == "stop" or what == "abort" then |
551 | out:write("---- TRACE ", tr, " ", what) | 562 | out:write("---- TRACE ", tr, " ", what) |
@@ -608,7 +619,7 @@ local function dump_texit(tr, ex, ngpr, nfpr, ...) | |||
608 | end | 619 | end |
609 | else | 620 | else |
610 | for i=1,ngpr do | 621 | for i=1,ngpr do |
611 | out:write(format(" %08x", regs[i])) | 622 | out:write(" ", tohex(regs[i])) |
612 | if i % 8 == 0 then out:write("\n") end | 623 | if i % 8 == 0 then out:write("\n") end |
613 | end | 624 | end |
614 | end | 625 | end |
@@ -693,9 +704,9 @@ local function dumpon(opt, outfile) | |||
693 | end | 704 | end |
694 | 705 | ||
695 | -- Public module functions. | 706 | -- Public module functions. |
696 | module(...) | 707 | return { |
697 | 708 | on = dumpon, | |
698 | on = dumpon | 709 | off = dumpoff, |
699 | off = dumpoff | 710 | start = dumpon -- For -j command line option. |
700 | start = dumpon -- For -j command line option. | 711 | } |
701 | 712 | ||
diff --git a/src/jit/p.lua b/src/jit/p.lua new file mode 100644 index 00000000..ac3ec40a --- /dev/null +++ b/src/jit/p.lua | |||
@@ -0,0 +1,311 @@ | |||
1 | ---------------------------------------------------------------------------- | ||
2 | -- LuaJIT profiler. | ||
3 | -- | ||
4 | -- Copyright (C) 2005-2020 Mike Pall. All rights reserved. | ||
5 | -- Released under the MIT license. See Copyright Notice in luajit.h | ||
6 | ---------------------------------------------------------------------------- | ||
7 | -- | ||
8 | -- This module is a simple command line interface to the built-in | ||
9 | -- low-overhead profiler of LuaJIT. | ||
10 | -- | ||
11 | -- The lower-level API of the profiler is accessible via the "jit.profile" | ||
12 | -- module or the luaJIT_profile_* C API. | ||
13 | -- | ||
14 | -- Example usage: | ||
15 | -- | ||
16 | -- luajit -jp myapp.lua | ||
17 | -- luajit -jp=s myapp.lua | ||
18 | -- luajit -jp=-s myapp.lua | ||
19 | -- luajit -jp=vl myapp.lua | ||
20 | -- luajit -jp=G,profile.txt myapp.lua | ||
21 | -- | ||
22 | -- The following dump features are available: | ||
23 | -- | ||
24 | -- f Stack dump: function name, otherwise module:line. Default mode. | ||
25 | -- F Stack dump: ditto, but always prepend module. | ||
26 | -- l Stack dump: module:line. | ||
27 | -- <number> stack dump depth (callee < caller). Default: 1. | ||
28 | -- -<number> Inverse stack dump depth (caller > callee). | ||
29 | -- s Split stack dump after first stack level. Implies abs(depth) >= 2. | ||
30 | -- p Show full path for module names. | ||
31 | -- v Show VM states. Can be combined with stack dumps, e.g. vf or fv. | ||
32 | -- z Show zones. Can be combined with stack dumps, e.g. zf or fz. | ||
33 | -- r Show raw sample counts. Default: show percentages. | ||
34 | -- a Annotate excerpts from source code files. | ||
35 | -- A Annotate complete source code files. | ||
36 | -- G Produce raw output suitable for graphical tools (e.g. flame graphs). | ||
37 | -- m<number> Minimum sample percentage to be shown. Default: 3. | ||
38 | -- i<number> Sampling interval in milliseconds. Default: 10. | ||
39 | -- | ||
40 | ---------------------------------------------------------------------------- | ||
41 | |||
42 | -- Cache some library functions and objects. | ||
43 | local jit = require("jit") | ||
44 | assert(jit.version_num == 20100, "LuaJIT core/library version mismatch") | ||
45 | local profile = require("jit.profile") | ||
46 | local vmdef = require("jit.vmdef") | ||
47 | local math = math | ||
48 | local pairs, ipairs, tonumber, floor = pairs, ipairs, tonumber, math.floor | ||
49 | local sort, format = table.sort, string.format | ||
50 | local stdout = io.stdout | ||
51 | local zone -- Load jit.zone module on demand. | ||
52 | |||
53 | -- Output file handle. | ||
54 | local out | ||
55 | |||
56 | ------------------------------------------------------------------------------ | ||
57 | |||
58 | local prof_ud | ||
59 | local prof_states, prof_split, prof_min, prof_raw, prof_fmt, prof_depth | ||
60 | local prof_ann, prof_count1, prof_count2, prof_samples | ||
61 | |||
62 | local map_vmmode = { | ||
63 | N = "Compiled", | ||
64 | I = "Interpreted", | ||
65 | C = "C code", | ||
66 | G = "Garbage Collector", | ||
67 | J = "JIT Compiler", | ||
68 | } | ||
69 | |||
70 | -- Profiler callback. | ||
71 | local function prof_cb(th, samples, vmmode) | ||
72 | prof_samples = prof_samples + samples | ||
73 | local key_stack, key_stack2, key_state | ||
74 | -- Collect keys for sample. | ||
75 | if prof_states then | ||
76 | if prof_states == "v" then | ||
77 | key_state = map_vmmode[vmmode] or vmmode | ||
78 | else | ||
79 | key_state = zone:get() or "(none)" | ||
80 | end | ||
81 | end | ||
82 | if prof_fmt then | ||
83 | key_stack = profile.dumpstack(th, prof_fmt, prof_depth) | ||
84 | key_stack = key_stack:gsub("%[builtin#(%d+)%]", function(x) | ||
85 | return vmdef.ffnames[tonumber(x)] | ||
86 | end) | ||
87 | if prof_split == 2 then | ||
88 | local k1, k2 = key_stack:match("(.-) [<>] (.*)") | ||
89 | if k2 then key_stack, key_stack2 = k1, k2 end | ||
90 | elseif prof_split == 3 then | ||
91 | key_stack2 = profile.dumpstack(th, "l", 1) | ||
92 | end | ||
93 | end | ||
94 | -- Order keys. | ||
95 | local k1, k2 | ||
96 | if prof_split == 1 then | ||
97 | if key_state then | ||
98 | k1 = key_state | ||
99 | if key_stack then k2 = key_stack end | ||
100 | end | ||
101 | elseif key_stack then | ||
102 | k1 = key_stack | ||
103 | if key_stack2 then k2 = key_stack2 elseif key_state then k2 = key_state end | ||
104 | end | ||
105 | -- Coalesce samples in one or two levels. | ||
106 | if k1 then | ||
107 | local t1 = prof_count1 | ||
108 | t1[k1] = (t1[k1] or 0) + samples | ||
109 | if k2 then | ||
110 | local t2 = prof_count2 | ||
111 | local t3 = t2[k1] | ||
112 | if not t3 then t3 = {}; t2[k1] = t3 end | ||
113 | t3[k2] = (t3[k2] or 0) + samples | ||
114 | end | ||
115 | end | ||
116 | end | ||
117 | |||
118 | ------------------------------------------------------------------------------ | ||
119 | |||
120 | -- Show top N list. | ||
121 | local function prof_top(count1, count2, samples, indent) | ||
122 | local t, n = {}, 0 | ||
123 | for k in pairs(count1) do | ||
124 | n = n + 1 | ||
125 | t[n] = k | ||
126 | end | ||
127 | sort(t, function(a, b) return count1[a] > count1[b] end) | ||
128 | for i=1,n do | ||
129 | local k = t[i] | ||
130 | local v = count1[k] | ||
131 | local pct = floor(v*100/samples + 0.5) | ||
132 | if pct < prof_min then break end | ||
133 | if not prof_raw then | ||
134 | out:write(format("%s%2d%% %s\n", indent, pct, k)) | ||
135 | elseif prof_raw == "r" then | ||
136 | out:write(format("%s%5d %s\n", indent, v, k)) | ||
137 | else | ||
138 | out:write(format("%s %d\n", k, v)) | ||
139 | end | ||
140 | if count2 then | ||
141 | local r = count2[k] | ||
142 | if r then | ||
143 | prof_top(r, nil, v, (prof_split == 3 or prof_split == 1) and " -- " or | ||
144 | (prof_depth < 0 and " -> " or " <- ")) | ||
145 | end | ||
146 | end | ||
147 | end | ||
148 | end | ||
149 | |||
150 | -- Annotate source code | ||
151 | local function prof_annotate(count1, samples) | ||
152 | local files = {} | ||
153 | local ms = 0 | ||
154 | for k, v in pairs(count1) do | ||
155 | local pct = floor(v*100/samples + 0.5) | ||
156 | ms = math.max(ms, v) | ||
157 | if pct >= prof_min then | ||
158 | local file, line = k:match("^(.*):(%d+)$") | ||
159 | if not file then file = k; line = 0 end | ||
160 | local fl = files[file] | ||
161 | if not fl then fl = {}; files[file] = fl; files[#files+1] = file end | ||
162 | line = tonumber(line) | ||
163 | fl[line] = prof_raw and v or pct | ||
164 | end | ||
165 | end | ||
166 | sort(files) | ||
167 | local fmtv, fmtn = " %3d%% | %s\n", " | %s\n" | ||
168 | if prof_raw then | ||
169 | local n = math.max(5, math.ceil(math.log10(ms))) | ||
170 | fmtv = "%"..n.."d | %s\n" | ||
171 | fmtn = (" "):rep(n).." | %s\n" | ||
172 | end | ||
173 | local ann = prof_ann | ||
174 | for _, file in ipairs(files) do | ||
175 | local f0 = file:byte() | ||
176 | if f0 == 40 or f0 == 91 then | ||
177 | out:write(format("\n====== %s ======\n[Cannot annotate non-file]\n", file)) | ||
178 | break | ||
179 | end | ||
180 | local fp, err = io.open(file) | ||
181 | if not fp then | ||
182 | out:write(format("====== ERROR: %s: %s\n", file, err)) | ||
183 | break | ||
184 | end | ||
185 | out:write(format("\n====== %s ======\n", file)) | ||
186 | local fl = files[file] | ||
187 | local n, show = 1, false | ||
188 | if ann ~= 0 then | ||
189 | for i=1,ann do | ||
190 | if fl[i] then show = true; out:write("@@ 1 @@\n"); break end | ||
191 | end | ||
192 | end | ||
193 | for line in fp:lines() do | ||
194 | if line:byte() == 27 then | ||
195 | out:write("[Cannot annotate bytecode file]\n") | ||
196 | break | ||
197 | end | ||
198 | local v = fl[n] | ||
199 | if ann ~= 0 then | ||
200 | local v2 = fl[n+ann] | ||
201 | if show then | ||
202 | if v2 then show = n+ann elseif v then show = n | ||
203 | elseif show+ann < n then show = false end | ||
204 | elseif v2 then | ||
205 | show = n+ann | ||
206 | out:write(format("@@ %d @@\n", n)) | ||
207 | end | ||
208 | if not show then goto next end | ||
209 | end | ||
210 | if v then | ||
211 | out:write(format(fmtv, v, line)) | ||
212 | else | ||
213 | out:write(format(fmtn, line)) | ||
214 | end | ||
215 | ::next:: | ||
216 | n = n + 1 | ||
217 | end | ||
218 | fp:close() | ||
219 | end | ||
220 | end | ||
221 | |||
222 | ------------------------------------------------------------------------------ | ||
223 | |||
224 | -- Finish profiling and dump result. | ||
225 | local function prof_finish() | ||
226 | if prof_ud then | ||
227 | profile.stop() | ||
228 | local samples = prof_samples | ||
229 | if samples == 0 then | ||
230 | if prof_raw ~= true then out:write("[No samples collected]\n") end | ||
231 | return | ||
232 | end | ||
233 | if prof_ann then | ||
234 | prof_annotate(prof_count1, samples) | ||
235 | else | ||
236 | prof_top(prof_count1, prof_count2, samples, "") | ||
237 | end | ||
238 | prof_count1 = nil | ||
239 | prof_count2 = nil | ||
240 | prof_ud = nil | ||
241 | end | ||
242 | end | ||
243 | |||
244 | -- Start profiling. | ||
245 | local function prof_start(mode) | ||
246 | local interval = "" | ||
247 | mode = mode:gsub("i%d*", function(s) interval = s; return "" end) | ||
248 | prof_min = 3 | ||
249 | mode = mode:gsub("m(%d+)", function(s) prof_min = tonumber(s); return "" end) | ||
250 | prof_depth = 1 | ||
251 | mode = mode:gsub("%-?%d+", function(s) prof_depth = tonumber(s); return "" end) | ||
252 | local m = {} | ||
253 | for c in mode:gmatch(".") do m[c] = c end | ||
254 | prof_states = m.z or m.v | ||
255 | if prof_states == "z" then zone = require("jit.zone") end | ||
256 | local scope = m.l or m.f or m.F or (prof_states and "" or "f") | ||
257 | local flags = (m.p or "") | ||
258 | prof_raw = m.r | ||
259 | if m.s then | ||
260 | prof_split = 2 | ||
261 | if prof_depth == -1 or m["-"] then prof_depth = -2 | ||
262 | elseif prof_depth == 1 then prof_depth = 2 end | ||
263 | elseif mode:find("[fF].*l") then | ||
264 | scope = "l" | ||
265 | prof_split = 3 | ||
266 | else | ||
267 | prof_split = (scope == "" or mode:find("[zv].*[lfF]")) and 1 or 0 | ||
268 | end | ||
269 | prof_ann = m.A and 0 or (m.a and 3) | ||
270 | if prof_ann then | ||
271 | scope = "l" | ||
272 | prof_fmt = "pl" | ||
273 | prof_split = 0 | ||
274 | prof_depth = 1 | ||
275 | elseif m.G and scope ~= "" then | ||
276 | prof_fmt = flags..scope.."Z;" | ||
277 | prof_depth = -100 | ||
278 | prof_raw = true | ||
279 | prof_min = 0 | ||
280 | elseif scope == "" then | ||
281 | prof_fmt = false | ||
282 | else | ||
283 | local sc = prof_split == 3 and m.f or m.F or scope | ||
284 | prof_fmt = flags..sc..(prof_depth >= 0 and "Z < " or "Z > ") | ||
285 | end | ||
286 | prof_count1 = {} | ||
287 | prof_count2 = {} | ||
288 | prof_samples = 0 | ||
289 | profile.start(scope:lower()..interval, prof_cb) | ||
290 | prof_ud = newproxy(true) | ||
291 | getmetatable(prof_ud).__gc = prof_finish | ||
292 | end | ||
293 | |||
294 | ------------------------------------------------------------------------------ | ||
295 | |||
296 | local function start(mode, outfile) | ||
297 | if not outfile then outfile = os.getenv("LUAJIT_PROFILEFILE") end | ||
298 | if outfile then | ||
299 | out = outfile == "-" and stdout or assert(io.open(outfile, "w")) | ||
300 | else | ||
301 | out = stdout | ||
302 | end | ||
303 | prof_start(mode or "f") | ||
304 | end | ||
305 | |||
306 | -- Public module functions. | ||
307 | return { | ||
308 | start = start, -- For -j command line option. | ||
309 | stop = prof_finish | ||
310 | } | ||
311 | |||
diff --git a/src/jit/v.lua b/src/jit/v.lua index 9696f67f..e37466c6 100644 --- a/src/jit/v.lua +++ b/src/jit/v.lua | |||
@@ -59,7 +59,7 @@ | |||
59 | 59 | ||
60 | -- Cache some library functions and objects. | 60 | -- Cache some library functions and objects. |
61 | local jit = require("jit") | 61 | local jit = require("jit") |
62 | assert(jit.version_num == 20005, "LuaJIT core/library version mismatch") | 62 | assert(jit.version_num == 20100, "LuaJIT core/library version mismatch") |
63 | local jutil = require("jit.util") | 63 | local jutil = require("jit.util") |
64 | local vmdef = require("jit.vmdef") | 64 | local vmdef = require("jit.vmdef") |
65 | local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo | 65 | local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo |
@@ -99,7 +99,7 @@ end | |||
99 | local function dump_trace(what, tr, func, pc, otr, oex) | 99 | local function dump_trace(what, tr, func, pc, otr, oex) |
100 | if what == "start" then | 100 | if what == "start" then |
101 | startloc = fmtfunc(func, pc) | 101 | startloc = fmtfunc(func, pc) |
102 | startex = otr and "("..otr.."/"..oex..") " or "" | 102 | startex = otr and "("..otr.."/"..(oex == -1 and "stitch" or oex)..") " or "" |
103 | else | 103 | else |
104 | if what == "abort" then | 104 | if what == "abort" then |
105 | local loc = fmtfunc(func, pc) | 105 | local loc = fmtfunc(func, pc) |
@@ -116,6 +116,9 @@ local function dump_trace(what, tr, func, pc, otr, oex) | |||
116 | if ltype == "interpreter" then | 116 | if ltype == "interpreter" then |
117 | out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n", | 117 | out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n", |
118 | tr, startex, startloc)) | 118 | tr, startex, startloc)) |
119 | elseif ltype == "stitch" then | ||
120 | out:write(format("[TRACE %3s %s%s %s %s]\n", | ||
121 | tr, startex, startloc, ltype, fmtfunc(func, pc))) | ||
119 | elseif link == tr or link == 0 then | 122 | elseif link == tr or link == 0 then |
120 | out:write(format("[TRACE %3s %s%s %s]\n", | 123 | out:write(format("[TRACE %3s %s%s %s]\n", |
121 | tr, startex, startloc, ltype)) | 124 | tr, startex, startloc, ltype)) |
@@ -159,9 +162,9 @@ local function dumpon(outfile) | |||
159 | end | 162 | end |
160 | 163 | ||
161 | -- Public module functions. | 164 | -- Public module functions. |
162 | module(...) | 165 | return { |
163 | 166 | on = dumpon, | |
164 | on = dumpon | 167 | off = dumpoff, |
165 | off = dumpoff | 168 | start = dumpon -- For -j command line option. |
166 | start = dumpon -- For -j command line option. | 169 | } |
167 | 170 | ||
diff --git a/src/jit/zone.lua b/src/jit/zone.lua new file mode 100644 index 00000000..a8b4f0ae --- /dev/null +++ b/src/jit/zone.lua | |||
@@ -0,0 +1,45 @@ | |||
1 | ---------------------------------------------------------------------------- | ||
2 | -- LuaJIT profiler zones. | ||
3 | -- | ||
4 | -- Copyright (C) 2005-2020 Mike Pall. All rights reserved. | ||
5 | -- Released under the MIT license. See Copyright Notice in luajit.h | ||
6 | ---------------------------------------------------------------------------- | ||
7 | -- | ||
8 | -- This module implements a simple hierarchical zone model. | ||
9 | -- | ||
10 | -- Example usage: | ||
11 | -- | ||
12 | -- local zone = require("jit.zone") | ||
13 | -- zone("AI") | ||
14 | -- ... | ||
15 | -- zone("A*") | ||
16 | -- ... | ||
17 | -- print(zone:get()) --> "A*" | ||
18 | -- ... | ||
19 | -- zone() | ||
20 | -- ... | ||
21 | -- print(zone:get()) --> "AI" | ||
22 | -- ... | ||
23 | -- zone() | ||
24 | -- | ||
25 | ---------------------------------------------------------------------------- | ||
26 | |||
27 | local remove = table.remove | ||
28 | |||
29 | return setmetatable({ | ||
30 | flush = function(t) | ||
31 | for i=#t,1,-1 do t[i] = nil end | ||
32 | end, | ||
33 | get = function(t) | ||
34 | return t[#t] | ||
35 | end | ||
36 | }, { | ||
37 | __call = function(t, zone) | ||
38 | if zone then | ||
39 | t[#t+1] = zone | ||
40 | else | ||
41 | return (assert(remove(t), "empty zone stack")) | ||
42 | end | ||
43 | end | ||
44 | }) | ||
45 | |||
diff --git a/src/lauxlib.h b/src/lauxlib.h index fed1491b..a44f0272 100644 --- a/src/lauxlib.h +++ b/src/lauxlib.h | |||
@@ -15,9 +15,6 @@ | |||
15 | #include "lua.h" | 15 | #include "lua.h" |
16 | 16 | ||
17 | 17 | ||
18 | #define luaL_getn(L,i) ((int)lua_objlen(L, i)) | ||
19 | #define luaL_setn(L,i,j) ((void)0) /* no op! */ | ||
20 | |||
21 | /* extra error code for `luaL_load' */ | 18 | /* extra error code for `luaL_load' */ |
22 | #define LUA_ERRFILE (LUA_ERRERR+1) | 19 | #define LUA_ERRFILE (LUA_ERRERR+1) |
23 | 20 | ||
@@ -58,6 +55,10 @@ LUALIB_API int (luaL_error) (lua_State *L, const char *fmt, ...); | |||
58 | LUALIB_API int (luaL_checkoption) (lua_State *L, int narg, const char *def, | 55 | LUALIB_API int (luaL_checkoption) (lua_State *L, int narg, const char *def, |
59 | const char *const lst[]); | 56 | const char *const lst[]); |
60 | 57 | ||
58 | /* pre-defined references */ | ||
59 | #define LUA_NOREF (-2) | ||
60 | #define LUA_REFNIL (-1) | ||
61 | |||
61 | LUALIB_API int (luaL_ref) (lua_State *L, int t); | 62 | LUALIB_API int (luaL_ref) (lua_State *L, int t); |
62 | LUALIB_API void (luaL_unref) (lua_State *L, int t, int ref); | 63 | LUALIB_API void (luaL_unref) (lua_State *L, int t, int ref); |
63 | 64 | ||
@@ -84,6 +85,11 @@ LUALIB_API int (luaL_loadbufferx) (lua_State *L, const char *buff, size_t sz, | |||
84 | const char *name, const char *mode); | 85 | const char *name, const char *mode); |
85 | LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg, | 86 | LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg, |
86 | int level); | 87 | int level); |
88 | LUALIB_API void (luaL_setfuncs) (lua_State *L, const luaL_Reg *l, int nup); | ||
89 | LUALIB_API void (luaL_pushmodule) (lua_State *L, const char *modname, | ||
90 | int sizehint); | ||
91 | LUALIB_API void *(luaL_testudata) (lua_State *L, int ud, const char *tname); | ||
92 | LUALIB_API void (luaL_setmetatable) (lua_State *L, const char *tname); | ||
87 | 93 | ||
88 | 94 | ||
89 | /* | 95 | /* |
@@ -113,6 +119,11 @@ LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg, | |||
113 | 119 | ||
114 | #define luaL_opt(L,f,n,d) (lua_isnoneornil(L,(n)) ? (d) : f(L,(n))) | 120 | #define luaL_opt(L,f,n,d) (lua_isnoneornil(L,(n)) ? (d) : f(L,(n))) |
115 | 121 | ||
122 | /* From Lua 5.2. */ | ||
123 | #define luaL_newlibtable(L, l) \ | ||
124 | lua_createtable(L, 0, sizeof(l)/sizeof((l)[0]) - 1) | ||
125 | #define luaL_newlib(L, l) (luaL_newlibtable(L, l), luaL_setfuncs(L, l, 0)) | ||
126 | |||
116 | /* | 127 | /* |
117 | ** {====================================================== | 128 | ** {====================================================== |
118 | ** Generic Buffer manipulation | 129 | ** Generic Buffer manipulation |
@@ -147,21 +158,4 @@ LUALIB_API void (luaL_pushresult) (luaL_Buffer *B); | |||
147 | 158 | ||
148 | /* }====================================================== */ | 159 | /* }====================================================== */ |
149 | 160 | ||
150 | |||
151 | /* compatibility with ref system */ | ||
152 | |||
153 | /* pre-defined references */ | ||
154 | #define LUA_NOREF (-2) | ||
155 | #define LUA_REFNIL (-1) | ||
156 | |||
157 | #define lua_ref(L,lock) ((lock) ? luaL_ref(L, LUA_REGISTRYINDEX) : \ | ||
158 | (lua_pushstring(L, "unlocked references are obsolete"), lua_error(L), 0)) | ||
159 | |||
160 | #define lua_unref(L,ref) luaL_unref(L, LUA_REGISTRYINDEX, (ref)) | ||
161 | |||
162 | #define lua_getref(L,ref) lua_rawgeti(L, LUA_REGISTRYINDEX, (ref)) | ||
163 | |||
164 | |||
165 | #define luaL_reg luaL_Reg | ||
166 | |||
167 | #endif | 161 | #endif |
diff --git a/src/lib_aux.c b/src/lib_aux.c index f29ca848..8f10e23c 100644 --- a/src/lib_aux.c +++ b/src/lib_aux.c | |||
@@ -107,38 +107,36 @@ LUALIB_API const char *luaL_findtable(lua_State *L, int idx, | |||
107 | static int libsize(const luaL_Reg *l) | 107 | static int libsize(const luaL_Reg *l) |
108 | { | 108 | { |
109 | int size = 0; | 109 | int size = 0; |
110 | for (; l->name; l++) size++; | 110 | for (; l && l->name; l++) size++; |
111 | return size; | 111 | return size; |
112 | } | 112 | } |
113 | 113 | ||
114 | LUALIB_API void luaL_pushmodule(lua_State *L, const char *modname, int sizehint) | ||
115 | { | ||
116 | luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16); | ||
117 | lua_getfield(L, -1, modname); | ||
118 | if (!lua_istable(L, -1)) { | ||
119 | lua_pop(L, 1); | ||
120 | if (luaL_findtable(L, LUA_GLOBALSINDEX, modname, sizehint) != NULL) | ||
121 | lj_err_callerv(L, LJ_ERR_BADMODN, modname); | ||
122 | lua_pushvalue(L, -1); | ||
123 | lua_setfield(L, -3, modname); /* _LOADED[modname] = new table. */ | ||
124 | } | ||
125 | lua_remove(L, -2); /* Remove _LOADED table. */ | ||
126 | } | ||
127 | |||
114 | LUALIB_API void luaL_openlib(lua_State *L, const char *libname, | 128 | LUALIB_API void luaL_openlib(lua_State *L, const char *libname, |
115 | const luaL_Reg *l, int nup) | 129 | const luaL_Reg *l, int nup) |
116 | { | 130 | { |
117 | lj_lib_checkfpu(L); | 131 | lj_lib_checkfpu(L); |
118 | if (libname) { | 132 | if (libname) { |
119 | int size = libsize(l); | 133 | luaL_pushmodule(L, libname, libsize(l)); |
120 | /* check whether lib already exists */ | 134 | lua_insert(L, -(nup + 1)); /* Move module table below upvalues. */ |
121 | luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16); | ||
122 | lua_getfield(L, -1, libname); /* get _LOADED[libname] */ | ||
123 | if (!lua_istable(L, -1)) { /* not found? */ | ||
124 | lua_pop(L, 1); /* remove previous result */ | ||
125 | /* try global variable (and create one if it does not exist) */ | ||
126 | if (luaL_findtable(L, LUA_GLOBALSINDEX, libname, size) != NULL) | ||
127 | lj_err_callerv(L, LJ_ERR_BADMODN, libname); | ||
128 | lua_pushvalue(L, -1); | ||
129 | lua_setfield(L, -3, libname); /* _LOADED[libname] = new table */ | ||
130 | } | ||
131 | lua_remove(L, -2); /* remove _LOADED table */ | ||
132 | lua_insert(L, -(nup+1)); /* move library table to below upvalues */ | ||
133 | } | 135 | } |
134 | for (; l->name; l++) { | 136 | if (l) |
135 | int i; | 137 | luaL_setfuncs(L, l, nup); |
136 | for (i = 0; i < nup; i++) /* copy upvalues to the top */ | 138 | else |
137 | lua_pushvalue(L, -nup); | 139 | lua_pop(L, nup); /* Remove upvalues. */ |
138 | lua_pushcclosure(L, l->func, nup); | ||
139 | lua_setfield(L, -(nup+2), l->name); | ||
140 | } | ||
141 | lua_pop(L, nup); /* remove upvalues */ | ||
142 | } | 140 | } |
143 | 141 | ||
144 | LUALIB_API void luaL_register(lua_State *L, const char *libname, | 142 | LUALIB_API void luaL_register(lua_State *L, const char *libname, |
@@ -147,6 +145,19 @@ LUALIB_API void luaL_register(lua_State *L, const char *libname, | |||
147 | luaL_openlib(L, libname, l, 0); | 145 | luaL_openlib(L, libname, l, 0); |
148 | } | 146 | } |
149 | 147 | ||
148 | LUALIB_API void luaL_setfuncs(lua_State *L, const luaL_Reg *l, int nup) | ||
149 | { | ||
150 | luaL_checkstack(L, nup, "too many upvalues"); | ||
151 | for (; l->name; l++) { | ||
152 | int i; | ||
153 | for (i = 0; i < nup; i++) /* Copy upvalues to the top. */ | ||
154 | lua_pushvalue(L, -nup); | ||
155 | lua_pushcclosure(L, l->func, nup); | ||
156 | lua_setfield(L, -(nup + 2), l->name); | ||
157 | } | ||
158 | lua_pop(L, nup); /* Remove upvalues. */ | ||
159 | } | ||
160 | |||
150 | LUALIB_API const char *luaL_gsub(lua_State *L, const char *s, | 161 | LUALIB_API const char *luaL_gsub(lua_State *L, const char *s, |
151 | const char *p, const char *r) | 162 | const char *p, const char *r) |
152 | { | 163 | { |
@@ -207,8 +218,15 @@ LUALIB_API char *luaL_prepbuffer(luaL_Buffer *B) | |||
207 | 218 | ||
208 | LUALIB_API void luaL_addlstring(luaL_Buffer *B, const char *s, size_t l) | 219 | LUALIB_API void luaL_addlstring(luaL_Buffer *B, const char *s, size_t l) |
209 | { | 220 | { |
210 | while (l--) | 221 | if (l <= bufffree(B)) { |
211 | luaL_addchar(B, *s++); | 222 | memcpy(B->p, s, l); |
223 | B->p += l; | ||
224 | } else { | ||
225 | emptybuffer(B); | ||
226 | lua_pushlstring(B->L, s, l); | ||
227 | B->lvl++; | ||
228 | adjuststack(B); | ||
229 | } | ||
212 | } | 230 | } |
213 | 231 | ||
214 | LUALIB_API void luaL_addstring(luaL_Buffer *B, const char *s) | 232 | LUALIB_API void luaL_addstring(luaL_Buffer *B, const char *s) |
@@ -302,7 +320,7 @@ static int panic(lua_State *L) | |||
302 | 320 | ||
303 | #ifdef LUAJIT_USE_SYSMALLOC | 321 | #ifdef LUAJIT_USE_SYSMALLOC |
304 | 322 | ||
305 | #if LJ_64 && !defined(LUAJIT_USE_VALGRIND) | 323 | #if LJ_64 && !LJ_GC64 && !defined(LUAJIT_USE_VALGRIND) |
306 | #error "Must use builtin allocator for 64 bit target" | 324 | #error "Must use builtin allocator for 64 bit target" |
307 | #endif | 325 | #endif |
308 | 326 | ||
@@ -334,7 +352,7 @@ LUALIB_API lua_State *luaL_newstate(void) | |||
334 | lua_State *L; | 352 | lua_State *L; |
335 | void *ud = lj_alloc_create(); | 353 | void *ud = lj_alloc_create(); |
336 | if (ud == NULL) return NULL; | 354 | if (ud == NULL) return NULL; |
337 | #if LJ_64 | 355 | #if LJ_64 && !LJ_GC64 |
338 | L = lj_state_newstate(lj_alloc_f, ud); | 356 | L = lj_state_newstate(lj_alloc_f, ud); |
339 | #else | 357 | #else |
340 | L = lua_newstate(lj_alloc_f, ud); | 358 | L = lua_newstate(lj_alloc_f, ud); |
@@ -343,7 +361,7 @@ LUALIB_API lua_State *luaL_newstate(void) | |||
343 | return L; | 361 | return L; |
344 | } | 362 | } |
345 | 363 | ||
346 | #if LJ_64 | 364 | #if LJ_64 && !LJ_GC64 |
347 | LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) | 365 | LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) |
348 | { | 366 | { |
349 | UNUSED(f); UNUSED(ud); | 367 | UNUSED(f); UNUSED(ud); |
diff --git a/src/lib_base.c b/src/lib_base.c index dae61fe1..54e9e2b0 100644 --- a/src/lib_base.c +++ b/src/lib_base.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include "lj_tab.h" | 23 | #include "lj_tab.h" |
24 | #include "lj_meta.h" | 24 | #include "lj_meta.h" |
25 | #include "lj_state.h" | 25 | #include "lj_state.h" |
26 | #include "lj_frame.h" | ||
26 | #if LJ_HASFFI | 27 | #if LJ_HASFFI |
27 | #include "lj_ctype.h" | 28 | #include "lj_ctype.h" |
28 | #include "lj_cconv.h" | 29 | #include "lj_cconv.h" |
@@ -32,6 +33,7 @@ | |||
32 | #include "lj_dispatch.h" | 33 | #include "lj_dispatch.h" |
33 | #include "lj_char.h" | 34 | #include "lj_char.h" |
34 | #include "lj_strscan.h" | 35 | #include "lj_strscan.h" |
36 | #include "lj_strfmt.h" | ||
35 | #include "lj_lib.h" | 37 | #include "lj_lib.h" |
36 | 38 | ||
37 | /* -- Base library: checks ------------------------------------------------ */ | 39 | /* -- Base library: checks ------------------------------------------------ */ |
@@ -40,13 +42,13 @@ | |||
40 | 42 | ||
41 | LJLIB_ASM(assert) LJLIB_REC(.) | 43 | LJLIB_ASM(assert) LJLIB_REC(.) |
42 | { | 44 | { |
43 | GCstr *s; | ||
44 | lj_lib_checkany(L, 1); | 45 | lj_lib_checkany(L, 1); |
45 | s = lj_lib_optstr(L, 2); | 46 | if (L->top == L->base+1) |
46 | if (s) | ||
47 | lj_err_callermsg(L, strdata(s)); | ||
48 | else | ||
49 | lj_err_caller(L, LJ_ERR_ASSERT); | 47 | lj_err_caller(L, LJ_ERR_ASSERT); |
48 | else if (tvisstr(L->base+1) || tvisnumber(L->base+1)) | ||
49 | lj_err_callermsg(L, strdata(lj_lib_checkstr(L, 2))); | ||
50 | else | ||
51 | lj_err_run(L); | ||
50 | return FFH_UNREACHABLE; | 52 | return FFH_UNREACHABLE; |
51 | } | 53 | } |
52 | 54 | ||
@@ -86,10 +88,11 @@ static int ffh_pairs(lua_State *L, MMS mm) | |||
86 | cTValue *mo = lj_meta_lookup(L, o, mm); | 88 | cTValue *mo = lj_meta_lookup(L, o, mm); |
87 | if ((LJ_52 || tviscdata(o)) && !tvisnil(mo)) { | 89 | if ((LJ_52 || tviscdata(o)) && !tvisnil(mo)) { |
88 | L->top = o+1; /* Only keep one argument. */ | 90 | L->top = o+1; /* Only keep one argument. */ |
89 | copyTV(L, L->base-1, mo); /* Replace callable. */ | 91 | copyTV(L, L->base-1-LJ_FR2, mo); /* Replace callable. */ |
90 | return FFH_TAILCALL; | 92 | return FFH_TAILCALL; |
91 | } else { | 93 | } else { |
92 | if (!tvistab(o)) lj_err_argt(L, 1, LUA_TTABLE); | 94 | if (!tvistab(o)) lj_err_argt(L, 1, LUA_TTABLE); |
95 | if (LJ_FR2) { copyTV(L, o-1, o); o--; } | ||
93 | setfuncV(L, o-1, funcV(lj_lib_upvalue(L, 1))); | 96 | setfuncV(L, o-1, funcV(lj_lib_upvalue(L, 1))); |
94 | if (mm == MM_pairs) setnilV(o+1); else setintV(o+1, 0); | 97 | if (mm == MM_pairs) setnilV(o+1); else setintV(o+1, 0); |
95 | return FFH_RES(3); | 98 | return FFH_RES(3); |
@@ -100,7 +103,7 @@ static int ffh_pairs(lua_State *L, MMS mm) | |||
100 | #endif | 103 | #endif |
101 | 104 | ||
102 | LJLIB_PUSH(lastcl) | 105 | LJLIB_PUSH(lastcl) |
103 | LJLIB_ASM(pairs) | 106 | LJLIB_ASM(pairs) LJLIB_REC(xpairs 0) |
104 | { | 107 | { |
105 | return ffh_pairs(L, MM_pairs); | 108 | return ffh_pairs(L, MM_pairs); |
106 | } | 109 | } |
@@ -113,7 +116,7 @@ LJLIB_NOREGUV LJLIB_ASM(ipairs_aux) LJLIB_REC(.) | |||
113 | } | 116 | } |
114 | 117 | ||
115 | LJLIB_PUSH(lastcl) | 118 | LJLIB_PUSH(lastcl) |
116 | LJLIB_ASM(ipairs) LJLIB_REC(.) | 119 | LJLIB_ASM(ipairs) LJLIB_REC(xpairs 1) |
117 | { | 120 | { |
118 | return ffh_pairs(L, MM_ipairs); | 121 | return ffh_pairs(L, MM_ipairs); |
119 | } | 122 | } |
@@ -131,11 +134,11 @@ LJLIB_ASM(setmetatable) LJLIB_REC(.) | |||
131 | lj_err_caller(L, LJ_ERR_PROTMT); | 134 | lj_err_caller(L, LJ_ERR_PROTMT); |
132 | setgcref(t->metatable, obj2gco(mt)); | 135 | setgcref(t->metatable, obj2gco(mt)); |
133 | if (mt) { lj_gc_objbarriert(L, t, mt); } | 136 | if (mt) { lj_gc_objbarriert(L, t, mt); } |
134 | settabV(L, L->base-1, t); | 137 | settabV(L, L->base-1-LJ_FR2, t); |
135 | return FFH_RES(1); | 138 | return FFH_RES(1); |
136 | } | 139 | } |
137 | 140 | ||
138 | LJLIB_CF(getfenv) | 141 | LJLIB_CF(getfenv) LJLIB_REC(.) |
139 | { | 142 | { |
140 | GCfunc *fn; | 143 | GCfunc *fn; |
141 | cTValue *o = L->base; | 144 | cTValue *o = L->base; |
@@ -144,6 +147,7 @@ LJLIB_CF(getfenv) | |||
144 | o = lj_debug_frame(L, level, &level); | 147 | o = lj_debug_frame(L, level, &level); |
145 | if (o == NULL) | 148 | if (o == NULL) |
146 | lj_err_arg(L, 1, LJ_ERR_INVLVL); | 149 | lj_err_arg(L, 1, LJ_ERR_INVLVL); |
150 | if (LJ_FR2) o--; | ||
147 | } | 151 | } |
148 | fn = &gcval(o)->fn; | 152 | fn = &gcval(o)->fn; |
149 | settabV(L, L->top++, isluafunc(fn) ? tabref(fn->l.env) : tabref(L->env)); | 153 | settabV(L, L->top++, isluafunc(fn) ? tabref(fn->l.env) : tabref(L->env)); |
@@ -165,6 +169,7 @@ LJLIB_CF(setfenv) | |||
165 | o = lj_debug_frame(L, level, &level); | 169 | o = lj_debug_frame(L, level, &level); |
166 | if (o == NULL) | 170 | if (o == NULL) |
167 | lj_err_arg(L, 1, LJ_ERR_INVLVL); | 171 | lj_err_arg(L, 1, LJ_ERR_INVLVL); |
172 | if (LJ_FR2) o--; | ||
168 | } | 173 | } |
169 | fn = &gcval(o)->fn; | 174 | fn = &gcval(o)->fn; |
170 | if (!isluafunc(fn)) | 175 | if (!isluafunc(fn)) |
@@ -257,7 +262,7 @@ LJLIB_ASM(tonumber) LJLIB_REC(.) | |||
257 | if (base == 10) { | 262 | if (base == 10) { |
258 | TValue *o = lj_lib_checkany(L, 1); | 263 | TValue *o = lj_lib_checkany(L, 1); |
259 | if (lj_strscan_numberobj(o)) { | 264 | if (lj_strscan_numberobj(o)) { |
260 | copyTV(L, L->base-1, o); | 265 | copyTV(L, L->base-1-LJ_FR2, o); |
261 | return FFH_RES(1); | 266 | return FFH_RES(1); |
262 | } | 267 | } |
263 | #if LJ_HASFFI | 268 | #if LJ_HASFFI |
@@ -270,11 +275,11 @@ LJLIB_ASM(tonumber) LJLIB_REC(.) | |||
270 | ct->size <= 4 && !(ct->size == 4 && (ct->info & CTF_UNSIGNED))) { | 275 | ct->size <= 4 && !(ct->size == 4 && (ct->info & CTF_UNSIGNED))) { |
271 | int32_t i; | 276 | int32_t i; |
272 | lj_cconv_ct_tv(cts, ctype_get(cts, CTID_INT32), (uint8_t *)&i, o, 0); | 277 | lj_cconv_ct_tv(cts, ctype_get(cts, CTID_INT32), (uint8_t *)&i, o, 0); |
273 | setintV(L->base-1, i); | 278 | setintV(L->base-1-LJ_FR2, i); |
274 | return FFH_RES(1); | 279 | return FFH_RES(1); |
275 | } | 280 | } |
276 | lj_cconv_ct_tv(cts, ctype_get(cts, CTID_DOUBLE), | 281 | lj_cconv_ct_tv(cts, ctype_get(cts, CTID_DOUBLE), |
277 | (uint8_t *)&(L->base-1)->n, o, 0); | 282 | (uint8_t *)&(L->base-1-LJ_FR2)->n, o, 0); |
278 | return FFH_RES(1); | 283 | return FFH_RES(1); |
279 | } | 284 | } |
280 | } | 285 | } |
@@ -282,53 +287,46 @@ LJLIB_ASM(tonumber) LJLIB_REC(.) | |||
282 | } else { | 287 | } else { |
283 | const char *p = strdata(lj_lib_checkstr(L, 1)); | 288 | const char *p = strdata(lj_lib_checkstr(L, 1)); |
284 | char *ep; | 289 | char *ep; |
290 | unsigned int neg = 0; | ||
285 | unsigned long ul; | 291 | unsigned long ul; |
286 | if (base < 2 || base > 36) | 292 | if (base < 2 || base > 36) |
287 | lj_err_arg(L, 2, LJ_ERR_BASERNG); | 293 | lj_err_arg(L, 2, LJ_ERR_BASERNG); |
288 | ul = strtoul(p, &ep, base); | 294 | while (lj_char_isspace((unsigned char)(*p))) p++; |
289 | if (p != ep) { | 295 | if (*p == '-') { p++; neg = 1; } else if (*p == '+') { p++; } |
290 | while (lj_char_isspace((unsigned char)(*ep))) ep++; | 296 | if (lj_char_isalnum((unsigned char)(*p))) { |
291 | if (*ep == '\0') { | 297 | ul = strtoul(p, &ep, base); |
292 | if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u)) | 298 | if (p != ep) { |
293 | setintV(L->base-1, (int32_t)ul); | 299 | while (lj_char_isspace((unsigned char)(*ep))) ep++; |
294 | else | 300 | if (*ep == '\0') { |
295 | setnumV(L->base-1, (lua_Number)ul); | 301 | if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u+neg)) { |
296 | return FFH_RES(1); | 302 | if (neg) ul = -ul; |
303 | setintV(L->base-1-LJ_FR2, (int32_t)ul); | ||
304 | } else { | ||
305 | lua_Number n = (lua_Number)ul; | ||
306 | if (neg) n = -n; | ||
307 | setnumV(L->base-1-LJ_FR2, n); | ||
308 | } | ||
309 | return FFH_RES(1); | ||
310 | } | ||
297 | } | 311 | } |
298 | } | 312 | } |
299 | } | 313 | } |
300 | setnilV(L->base-1); | 314 | setnilV(L->base-1-LJ_FR2); |
301 | return FFH_RES(1); | 315 | return FFH_RES(1); |
302 | } | 316 | } |
303 | 317 | ||
304 | LJLIB_PUSH("nil") | ||
305 | LJLIB_PUSH("false") | ||
306 | LJLIB_PUSH("true") | ||
307 | LJLIB_ASM(tostring) LJLIB_REC(.) | 318 | LJLIB_ASM(tostring) LJLIB_REC(.) |
308 | { | 319 | { |
309 | TValue *o = lj_lib_checkany(L, 1); | 320 | TValue *o = lj_lib_checkany(L, 1); |
310 | cTValue *mo; | 321 | cTValue *mo; |
311 | L->top = o+1; /* Only keep one argument. */ | 322 | L->top = o+1; /* Only keep one argument. */ |
312 | if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { | 323 | if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { |
313 | copyTV(L, L->base-1, mo); /* Replace callable. */ | 324 | copyTV(L, L->base-1-LJ_FR2, mo); /* Replace callable. */ |
314 | return FFH_TAILCALL; | 325 | return FFH_TAILCALL; |
315 | } else { | ||
316 | GCstr *s; | ||
317 | if (tvisnumber(o)) { | ||
318 | s = lj_str_fromnumber(L, o); | ||
319 | } else if (tvispri(o)) { | ||
320 | s = strV(lj_lib_upvalue(L, -(int32_t)itype(o))); | ||
321 | } else { | ||
322 | if (tvisfunc(o) && isffunc(funcV(o))) | ||
323 | lua_pushfstring(L, "function: builtin#%d", funcV(o)->c.ffid); | ||
324 | else | ||
325 | lua_pushfstring(L, "%s: %p", lj_typename(o), lua_topointer(L, 1)); | ||
326 | /* Note: lua_pushfstring calls the GC which may invalidate o. */ | ||
327 | s = strV(L->top-1); | ||
328 | } | ||
329 | setstrV(L, L->base-1, s); | ||
330 | return FFH_RES(1); | ||
331 | } | 326 | } |
327 | lj_gc_check(L); | ||
328 | setstrV(L, L->base-1-LJ_FR2, lj_strfmt_obj(L, L->base)); | ||
329 | return FFH_RES(1); | ||
332 | } | 330 | } |
333 | 331 | ||
334 | /* -- Base library: throw and catch errors -------------------------------- */ | 332 | /* -- Base library: throw and catch errors -------------------------------- */ |
@@ -357,7 +355,7 @@ LJLIB_ASM_(xpcall) LJLIB_REC(.) | |||
357 | 355 | ||
358 | static int load_aux(lua_State *L, int status, int envarg) | 356 | static int load_aux(lua_State *L, int status, int envarg) |
359 | { | 357 | { |
360 | if (status == 0) { | 358 | if (status == LUA_OK) { |
361 | if (tvistab(L->base+envarg-1)) { | 359 | if (tvistab(L->base+envarg-1)) { |
362 | GCfunc *fn = funcV(L->top-1); | 360 | GCfunc *fn = funcV(L->top-1); |
363 | GCtab *t = tabV(L->base+envarg-1); | 361 | GCtab *t = tabV(L->base+envarg-1); |
@@ -430,7 +428,7 @@ LJLIB_CF(dofile) | |||
430 | GCstr *fname = lj_lib_optstr(L, 1); | 428 | GCstr *fname = lj_lib_optstr(L, 1); |
431 | setnilV(L->top); | 429 | setnilV(L->top); |
432 | L->top = L->base+1; | 430 | L->top = L->base+1; |
433 | if (luaL_loadfile(L, fname ? strdata(fname) : NULL) != 0) | 431 | if (luaL_loadfile(L, fname ? strdata(fname) : NULL) != LUA_OK) |
434 | lua_error(L); | 432 | lua_error(L); |
435 | lua_call(L, 0, LUA_MULTRET); | 433 | lua_call(L, 0, LUA_MULTRET); |
436 | return (int)(L->top - L->base) - 1; | 434 | return (int)(L->top - L->base) - 1; |
@@ -440,20 +438,20 @@ LJLIB_CF(dofile) | |||
440 | 438 | ||
441 | LJLIB_CF(gcinfo) | 439 | LJLIB_CF(gcinfo) |
442 | { | 440 | { |
443 | setintV(L->top++, (G(L)->gc.total >> 10)); | 441 | setintV(L->top++, (int32_t)(G(L)->gc.total >> 10)); |
444 | return 1; | 442 | return 1; |
445 | } | 443 | } |
446 | 444 | ||
447 | LJLIB_CF(collectgarbage) | 445 | LJLIB_CF(collectgarbage) |
448 | { | 446 | { |
449 | int opt = lj_lib_checkopt(L, 1, LUA_GCCOLLECT, /* ORDER LUA_GC* */ | 447 | int opt = lj_lib_checkopt(L, 1, LUA_GCCOLLECT, /* ORDER LUA_GC* */ |
450 | "\4stop\7restart\7collect\5count\1\377\4step\10setpause\12setstepmul"); | 448 | "\4stop\7restart\7collect\5count\1\377\4step\10setpause\12setstepmul\1\377\11isrunning"); |
451 | int32_t data = lj_lib_optint(L, 2, 0); | 449 | int32_t data = lj_lib_optint(L, 2, 0); |
452 | if (opt == LUA_GCCOUNT) { | 450 | if (opt == LUA_GCCOUNT) { |
453 | setnumV(L->top, (lua_Number)G(L)->gc.total/1024.0); | 451 | setnumV(L->top, (lua_Number)G(L)->gc.total/1024.0); |
454 | } else { | 452 | } else { |
455 | int res = lua_gc(L, opt, data); | 453 | int res = lua_gc(L, opt, data); |
456 | if (opt == LUA_GCSTEP) | 454 | if (opt == LUA_GCSTEP || opt == LUA_GCISRUNNING) |
457 | setboolV(L->top, res); | 455 | setboolV(L->top, res); |
458 | else | 456 | else |
459 | setintV(L->top, res); | 457 | setintV(L->top, res); |
@@ -505,23 +503,14 @@ LJLIB_CF(print) | |||
505 | tv = L->top-1; | 503 | tv = L->top-1; |
506 | } | 504 | } |
507 | shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring) | 505 | shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring) |
508 | && !gcrefu(basemt_it(G(L), LJ_TNUMX)); | 506 | && !gcrefu(basemt_it(G(L), LJ_TNUMX)); |
509 | for (i = 0; i < nargs; i++) { | 507 | for (i = 0; i < nargs; i++) { |
508 | cTValue *o = &L->base[i]; | ||
510 | const char *str; | 509 | const char *str; |
511 | size_t size; | 510 | size_t size; |
512 | cTValue *o = &L->base[i]; | 511 | MSize len; |
513 | if (shortcut && tvisstr(o)) { | 512 | if (shortcut && (str = lj_strfmt_wstrnum(L, o, &len)) != NULL) { |
514 | str = strVdata(o); | 513 | size = len; |
515 | size = strV(o)->len; | ||
516 | } else if (shortcut && tvisint(o)) { | ||
517 | char buf[LJ_STR_INTBUF]; | ||
518 | char *p = lj_str_bufint(buf, intV(o)); | ||
519 | size = (size_t)(buf+LJ_STR_INTBUF-p); | ||
520 | str = p; | ||
521 | } else if (shortcut && tvisnum(o)) { | ||
522 | char buf[LJ_STR_NUMBUF]; | ||
523 | size = lj_str_bufnum(buf, o); | ||
524 | str = buf; | ||
525 | } else { | 514 | } else { |
526 | copyTV(L, L->top+1, o); | 515 | copyTV(L, L->top+1, o); |
527 | copyTV(L, L->top, L->top-1); | 516 | copyTV(L, L->top, L->top-1); |
@@ -558,8 +547,8 @@ LJLIB_CF(coroutine_status) | |||
558 | co = threadV(L->base); | 547 | co = threadV(L->base); |
559 | if (co == L) s = "running"; | 548 | if (co == L) s = "running"; |
560 | else if (co->status == LUA_YIELD) s = "suspended"; | 549 | else if (co->status == LUA_YIELD) s = "suspended"; |
561 | else if (co->status != 0) s = "dead"; | 550 | else if (co->status != LUA_OK) s = "dead"; |
562 | else if (co->base > tvref(co->stack)+1) s = "normal"; | 551 | else if (co->base > tvref(co->stack)+1+LJ_FR2) s = "normal"; |
563 | else if (co->top == co->base) s = "dead"; | 552 | else if (co->top == co->base) s = "dead"; |
564 | else s = "suspended"; | 553 | else s = "suspended"; |
565 | lua_pushstring(L, s); | 554 | lua_pushstring(L, s); |
@@ -579,6 +568,12 @@ LJLIB_CF(coroutine_running) | |||
579 | #endif | 568 | #endif |
580 | } | 569 | } |
581 | 570 | ||
571 | LJLIB_CF(coroutine_isyieldable) | ||
572 | { | ||
573 | setboolV(L->top++, cframe_canyield(L->cframe)); | ||
574 | return 1; | ||
575 | } | ||
576 | |||
582 | LJLIB_CF(coroutine_create) | 577 | LJLIB_CF(coroutine_create) |
583 | { | 578 | { |
584 | lua_State *L1; | 579 | lua_State *L1; |
@@ -598,11 +593,11 @@ LJLIB_ASM(coroutine_yield) | |||
598 | static int ffh_resume(lua_State *L, lua_State *co, int wrap) | 593 | static int ffh_resume(lua_State *L, lua_State *co, int wrap) |
599 | { | 594 | { |
600 | if (co->cframe != NULL || co->status > LUA_YIELD || | 595 | if (co->cframe != NULL || co->status > LUA_YIELD || |
601 | (co->status == 0 && co->top == co->base)) { | 596 | (co->status == LUA_OK && co->top == co->base)) { |
602 | ErrMsg em = co->cframe ? LJ_ERR_CORUN : LJ_ERR_CODEAD; | 597 | ErrMsg em = co->cframe ? LJ_ERR_CORUN : LJ_ERR_CODEAD; |
603 | if (wrap) lj_err_caller(L, em); | 598 | if (wrap) lj_err_caller(L, em); |
604 | setboolV(L->base-1, 0); | 599 | setboolV(L->base-1-LJ_FR2, 0); |
605 | setstrV(L, L->base, lj_err_str(L, em)); | 600 | setstrV(L, L->base-LJ_FR2, lj_err_str(L, em)); |
606 | return FFH_RES(2); | 601 | return FFH_RES(2); |
607 | } | 602 | } |
608 | lj_state_growstack(co, (MSize)(L->top - L->base)); | 603 | lj_state_growstack(co, (MSize)(L->top - L->base)); |
@@ -643,9 +638,10 @@ static void setpc_wrap_aux(lua_State *L, GCfunc *fn); | |||
643 | 638 | ||
644 | LJLIB_CF(coroutine_wrap) | 639 | LJLIB_CF(coroutine_wrap) |
645 | { | 640 | { |
641 | GCfunc *fn; | ||
646 | lj_cf_coroutine_create(L); | 642 | lj_cf_coroutine_create(L); |
647 | lj_lib_pushcc(L, lj_ffh_coroutine_wrap_aux, FF_coroutine_wrap_aux, 1); | 643 | fn = lj_lib_pushcc(L, lj_ffh_coroutine_wrap_aux, FF_coroutine_wrap_aux, 1); |
648 | setpc_wrap_aux(L, funcV(L->top-1)); | 644 | setpc_wrap_aux(L, fn); |
649 | return 1; | 645 | return 1; |
650 | } | 646 | } |
651 | 647 | ||
diff --git a/src/lib_bit.c b/src/lib_bit.c index c374d7a0..c4911450 100644 --- a/src/lib_bit.c +++ b/src/lib_bit.c | |||
@@ -12,26 +12,99 @@ | |||
12 | 12 | ||
13 | #include "lj_obj.h" | 13 | #include "lj_obj.h" |
14 | #include "lj_err.h" | 14 | #include "lj_err.h" |
15 | #include "lj_str.h" | 15 | #include "lj_buf.h" |
16 | #include "lj_strscan.h" | ||
17 | #include "lj_strfmt.h" | ||
18 | #if LJ_HASFFI | ||
19 | #include "lj_ctype.h" | ||
20 | #include "lj_cdata.h" | ||
21 | #include "lj_cconv.h" | ||
22 | #include "lj_carith.h" | ||
23 | #endif | ||
24 | #include "lj_ff.h" | ||
16 | #include "lj_lib.h" | 25 | #include "lj_lib.h" |
17 | 26 | ||
18 | /* ------------------------------------------------------------------------ */ | 27 | /* ------------------------------------------------------------------------ */ |
19 | 28 | ||
20 | #define LJLIB_MODULE_bit | 29 | #define LJLIB_MODULE_bit |
21 | 30 | ||
22 | LJLIB_ASM(bit_tobit) LJLIB_REC(bit_unary IR_TOBIT) | 31 | #if LJ_HASFFI |
32 | static int bit_result64(lua_State *L, CTypeID id, uint64_t x) | ||
23 | { | 33 | { |
34 | GCcdata *cd = lj_cdata_new_(L, id, 8); | ||
35 | *(uint64_t *)cdataptr(cd) = x; | ||
36 | setcdataV(L, L->base-1-LJ_FR2, cd); | ||
37 | return FFH_RES(1); | ||
38 | } | ||
39 | #else | ||
40 | static int32_t bit_checkbit(lua_State *L, int narg) | ||
41 | { | ||
42 | TValue *o = L->base + narg-1; | ||
43 | if (!(o < L->top && lj_strscan_numberobj(o))) | ||
44 | lj_err_argt(L, narg, LUA_TNUMBER); | ||
45 | if (LJ_LIKELY(tvisint(o))) { | ||
46 | return intV(o); | ||
47 | } else { | ||
48 | int32_t i = lj_num2bit(numV(o)); | ||
49 | if (LJ_DUALNUM) setintV(o, i); | ||
50 | return i; | ||
51 | } | ||
52 | } | ||
53 | #endif | ||
54 | |||
55 | LJLIB_ASM(bit_tobit) LJLIB_REC(bit_tobit) | ||
56 | { | ||
57 | #if LJ_HASFFI | ||
58 | CTypeID id = 0; | ||
59 | setintV(L->base-1-LJ_FR2, (int32_t)lj_carith_check64(L, 1, &id)); | ||
60 | return FFH_RES(1); | ||
61 | #else | ||
62 | lj_lib_checknumber(L, 1); | ||
63 | return FFH_RETRY; | ||
64 | #endif | ||
65 | } | ||
66 | |||
67 | LJLIB_ASM(bit_bnot) LJLIB_REC(bit_unary IR_BNOT) | ||
68 | { | ||
69 | #if LJ_HASFFI | ||
70 | CTypeID id = 0; | ||
71 | uint64_t x = lj_carith_check64(L, 1, &id); | ||
72 | return id ? bit_result64(L, id, ~x) : FFH_RETRY; | ||
73 | #else | ||
24 | lj_lib_checknumber(L, 1); | 74 | lj_lib_checknumber(L, 1); |
25 | return FFH_RETRY; | 75 | return FFH_RETRY; |
76 | #endif | ||
77 | } | ||
78 | |||
79 | LJLIB_ASM(bit_bswap) LJLIB_REC(bit_unary IR_BSWAP) | ||
80 | { | ||
81 | #if LJ_HASFFI | ||
82 | CTypeID id = 0; | ||
83 | uint64_t x = lj_carith_check64(L, 1, &id); | ||
84 | return id ? bit_result64(L, id, lj_bswap64(x)) : FFH_RETRY; | ||
85 | #else | ||
86 | lj_lib_checknumber(L, 1); | ||
87 | return FFH_RETRY; | ||
88 | #endif | ||
26 | } | 89 | } |
27 | LJLIB_ASM_(bit_bnot) LJLIB_REC(bit_unary IR_BNOT) | ||
28 | LJLIB_ASM_(bit_bswap) LJLIB_REC(bit_unary IR_BSWAP) | ||
29 | 90 | ||
30 | LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL) | 91 | LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL) |
31 | { | 92 | { |
93 | #if LJ_HASFFI | ||
94 | CTypeID id = 0, id2 = 0; | ||
95 | uint64_t x = lj_carith_check64(L, 1, &id); | ||
96 | int32_t sh = (int32_t)lj_carith_check64(L, 2, &id2); | ||
97 | if (id) { | ||
98 | x = lj_carith_shift64(x, sh, curr_func(L)->c.ffid - (int)FF_bit_lshift); | ||
99 | return bit_result64(L, id, x); | ||
100 | } | ||
101 | if (id2) setintV(L->base+1, sh); | ||
102 | return FFH_RETRY; | ||
103 | #else | ||
32 | lj_lib_checknumber(L, 1); | 104 | lj_lib_checknumber(L, 1); |
33 | lj_lib_checkbit(L, 2); | 105 | bit_checkbit(L, 2); |
34 | return FFH_RETRY; | 106 | return FFH_RETRY; |
107 | #endif | ||
35 | } | 108 | } |
36 | LJLIB_ASM_(bit_rshift) LJLIB_REC(bit_shift IR_BSHR) | 109 | LJLIB_ASM_(bit_rshift) LJLIB_REC(bit_shift IR_BSHR) |
37 | LJLIB_ASM_(bit_arshift) LJLIB_REC(bit_shift IR_BSAR) | 110 | LJLIB_ASM_(bit_arshift) LJLIB_REC(bit_shift IR_BSAR) |
@@ -40,25 +113,58 @@ LJLIB_ASM_(bit_ror) LJLIB_REC(bit_shift IR_BROR) | |||
40 | 113 | ||
41 | LJLIB_ASM(bit_band) LJLIB_REC(bit_nary IR_BAND) | 114 | LJLIB_ASM(bit_band) LJLIB_REC(bit_nary IR_BAND) |
42 | { | 115 | { |
116 | #if LJ_HASFFI | ||
117 | CTypeID id = 0; | ||
118 | TValue *o = L->base, *top = L->top; | ||
119 | int i = 0; | ||
120 | do { lj_carith_check64(L, ++i, &id); } while (++o < top); | ||
121 | if (id) { | ||
122 | CTState *cts = ctype_cts(L); | ||
123 | CType *ct = ctype_get(cts, id); | ||
124 | int op = curr_func(L)->c.ffid - (int)FF_bit_bor; | ||
125 | uint64_t x, y = op >= 0 ? 0 : ~(uint64_t)0; | ||
126 | o = L->base; | ||
127 | do { | ||
128 | lj_cconv_ct_tv(cts, ct, (uint8_t *)&x, o, 0); | ||
129 | if (op < 0) y &= x; else if (op == 0) y |= x; else y ^= x; | ||
130 | } while (++o < top); | ||
131 | return bit_result64(L, id, y); | ||
132 | } | ||
133 | return FFH_RETRY; | ||
134 | #else | ||
43 | int i = 0; | 135 | int i = 0; |
44 | do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top); | 136 | do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top); |
45 | return FFH_RETRY; | 137 | return FFH_RETRY; |
138 | #endif | ||
46 | } | 139 | } |
47 | LJLIB_ASM_(bit_bor) LJLIB_REC(bit_nary IR_BOR) | 140 | LJLIB_ASM_(bit_bor) LJLIB_REC(bit_nary IR_BOR) |
48 | LJLIB_ASM_(bit_bxor) LJLIB_REC(bit_nary IR_BXOR) | 141 | LJLIB_ASM_(bit_bxor) LJLIB_REC(bit_nary IR_BXOR) |
49 | 142 | ||
50 | /* ------------------------------------------------------------------------ */ | 143 | /* ------------------------------------------------------------------------ */ |
51 | 144 | ||
52 | LJLIB_CF(bit_tohex) | 145 | LJLIB_CF(bit_tohex) LJLIB_REC(.) |
53 | { | 146 | { |
54 | uint32_t b = (uint32_t)lj_lib_checkbit(L, 1); | 147 | #if LJ_HASFFI |
55 | int32_t i, n = L->base+1 >= L->top ? 8 : lj_lib_checkbit(L, 2); | 148 | CTypeID id = 0, id2 = 0; |
56 | const char *hexdigits = "0123456789abcdef"; | 149 | uint64_t b = lj_carith_check64(L, 1, &id); |
57 | char buf[8]; | 150 | int32_t n = L->base+1>=L->top ? (id ? 16 : 8) : |
58 | if (n < 0) { n = -n; hexdigits = "0123456789ABCDEF"; } | 151 | (int32_t)lj_carith_check64(L, 2, &id2); |
59 | if (n > 8) n = 8; | 152 | #else |
60 | for (i = n; --i >= 0; ) { buf[i] = hexdigits[b & 15]; b >>= 4; } | 153 | uint32_t b = (uint32_t)bit_checkbit(L, 1); |
61 | lua_pushlstring(L, buf, (size_t)n); | 154 | int32_t n = L->base+1>=L->top ? 8 : bit_checkbit(L, 2); |
155 | #endif | ||
156 | SBuf *sb = lj_buf_tmp_(L); | ||
157 | SFormat sf = (STRFMT_UINT|STRFMT_T_HEX); | ||
158 | if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; } | ||
159 | sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC); | ||
160 | #if LJ_HASFFI | ||
161 | if (n < 16) b &= ((uint64_t)1 << 4*n)-1; | ||
162 | #else | ||
163 | if (n < 8) b &= (1u << 4*n)-1; | ||
164 | #endif | ||
165 | sb = lj_strfmt_putfxint(sb, sf, b); | ||
166 | setstrV(L, L->top-1, lj_buf_str(L, sb)); | ||
167 | lj_gc_check(L); | ||
62 | return 1; | 168 | return 1; |
63 | } | 169 | } |
64 | 170 | ||
diff --git a/src/lib_debug.c b/src/lib_debug.c index a485ff8e..5bcabe7d 100644 --- a/src/lib_debug.c +++ b/src/lib_debug.c | |||
@@ -29,7 +29,7 @@ LJLIB_CF(debug_getregistry) | |||
29 | return 1; | 29 | return 1; |
30 | } | 30 | } |
31 | 31 | ||
32 | LJLIB_CF(debug_getmetatable) | 32 | LJLIB_CF(debug_getmetatable) LJLIB_REC(.) |
33 | { | 33 | { |
34 | lj_lib_checkany(L, 1); | 34 | lj_lib_checkany(L, 1); |
35 | if (!lua_getmetatable(L, 1)) { | 35 | if (!lua_getmetatable(L, 1)) { |
@@ -283,13 +283,13 @@ LJLIB_CF(debug_setuservalue) | |||
283 | 283 | ||
284 | /* ------------------------------------------------------------------------ */ | 284 | /* ------------------------------------------------------------------------ */ |
285 | 285 | ||
286 | static const char KEY_HOOK = 'h'; | 286 | #define KEY_HOOK ((void *)0x3004) |
287 | 287 | ||
288 | static void hookf(lua_State *L, lua_Debug *ar) | 288 | static void hookf(lua_State *L, lua_Debug *ar) |
289 | { | 289 | { |
290 | static const char *const hooknames[] = | 290 | static const char *const hooknames[] = |
291 | {"call", "return", "line", "count", "tail return"}; | 291 | {"call", "return", "line", "count", "tail return"}; |
292 | lua_pushlightuserdata(L, (void *)&KEY_HOOK); | 292 | lua_pushlightuserdata(L, KEY_HOOK); |
293 | lua_rawget(L, LUA_REGISTRYINDEX); | 293 | lua_rawget(L, LUA_REGISTRYINDEX); |
294 | if (lua_isfunction(L, -1)) { | 294 | if (lua_isfunction(L, -1)) { |
295 | lua_pushstring(L, hooknames[(int)ar->event]); | 295 | lua_pushstring(L, hooknames[(int)ar->event]); |
@@ -334,7 +334,7 @@ LJLIB_CF(debug_sethook) | |||
334 | count = luaL_optint(L, arg+3, 0); | 334 | count = luaL_optint(L, arg+3, 0); |
335 | func = hookf; mask = makemask(smask, count); | 335 | func = hookf; mask = makemask(smask, count); |
336 | } | 336 | } |
337 | lua_pushlightuserdata(L, (void *)&KEY_HOOK); | 337 | lua_pushlightuserdata(L, KEY_HOOK); |
338 | lua_pushvalue(L, arg+1); | 338 | lua_pushvalue(L, arg+1); |
339 | lua_rawset(L, LUA_REGISTRYINDEX); | 339 | lua_rawset(L, LUA_REGISTRYINDEX); |
340 | lua_sethook(L, func, mask, count); | 340 | lua_sethook(L, func, mask, count); |
@@ -349,7 +349,7 @@ LJLIB_CF(debug_gethook) | |||
349 | if (hook != NULL && hook != hookf) { /* external hook? */ | 349 | if (hook != NULL && hook != hookf) { /* external hook? */ |
350 | lua_pushliteral(L, "external hook"); | 350 | lua_pushliteral(L, "external hook"); |
351 | } else { | 351 | } else { |
352 | lua_pushlightuserdata(L, (void *)&KEY_HOOK); | 352 | lua_pushlightuserdata(L, KEY_HOOK); |
353 | lua_rawget(L, LUA_REGISTRYINDEX); /* get hook */ | 353 | lua_rawget(L, LUA_REGISTRYINDEX); /* get hook */ |
354 | } | 354 | } |
355 | lua_pushstring(L, unmakemask(mask, buff)); | 355 | lua_pushstring(L, unmakemask(mask, buff)); |
diff --git a/src/lib_ffi.c b/src/lib_ffi.c index 5851eea5..16fecacb 100644 --- a/src/lib_ffi.c +++ b/src/lib_ffi.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include "lj_ccall.h" | 29 | #include "lj_ccall.h" |
30 | #include "lj_ccallback.h" | 30 | #include "lj_ccallback.h" |
31 | #include "lj_clib.h" | 31 | #include "lj_clib.h" |
32 | #include "lj_strfmt.h" | ||
32 | #include "lj_ff.h" | 33 | #include "lj_ff.h" |
33 | #include "lj_lib.h" | 34 | #include "lj_lib.h" |
34 | 35 | ||
@@ -137,7 +138,7 @@ static int ffi_index_meta(lua_State *L, CTState *cts, CType *ct, MMS mm) | |||
137 | } | 138 | } |
138 | } | 139 | } |
139 | copyTV(L, base, L->top); | 140 | copyTV(L, base, L->top); |
140 | tv = L->top-1; | 141 | tv = L->top-1-LJ_FR2; |
141 | } | 142 | } |
142 | return lj_meta_tailcall(L, tv); | 143 | return lj_meta_tailcall(L, tv); |
143 | } | 144 | } |
@@ -318,7 +319,7 @@ LJLIB_CF(ffi_meta___tostring) | |||
318 | } | 319 | } |
319 | } | 320 | } |
320 | } | 321 | } |
321 | lj_str_pushf(L, msg, strdata(lj_ctype_repr(L, id, NULL)), p); | 322 | lj_strfmt_pushf(L, msg, strdata(lj_ctype_repr(L, id, NULL)), p); |
322 | checkgc: | 323 | checkgc: |
323 | lj_gc_check(L); | 324 | lj_gc_check(L); |
324 | return 1; | 325 | return 1; |
@@ -504,10 +505,7 @@ LJLIB_CF(ffi_new) LJLIB_REC(.) | |||
504 | } | 505 | } |
505 | if (sz == CTSIZE_INVALID) | 506 | if (sz == CTSIZE_INVALID) |
506 | lj_err_arg(L, 1, LJ_ERR_FFI_INVSIZE); | 507 | lj_err_arg(L, 1, LJ_ERR_FFI_INVSIZE); |
507 | if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN) | 508 | cd = lj_cdata_newx(cts, id, sz, info); |
508 | cd = lj_cdata_new(cts, id, sz); | ||
509 | else | ||
510 | cd = lj_cdata_newv(cts, id, sz, ctype_align(info)); | ||
511 | setcdataV(L, o-1, cd); /* Anchor the uninitialized cdata. */ | 509 | setcdataV(L, o-1, cd); /* Anchor the uninitialized cdata. */ |
512 | lj_cconv_ct_init(cts, ct, sz, cdataptr(cd), | 510 | lj_cconv_ct_init(cts, ct, sz, cdataptr(cd), |
513 | o, (MSize)(L->top - o)); /* Initialize cdata. */ | 511 | o, (MSize)(L->top - o)); /* Initialize cdata. */ |
@@ -558,6 +556,31 @@ LJLIB_CF(ffi_typeof) LJLIB_REC(.) | |||
558 | return 1; | 556 | return 1; |
559 | } | 557 | } |
560 | 558 | ||
559 | /* Internal and unsupported API. */ | ||
560 | LJLIB_CF(ffi_typeinfo) | ||
561 | { | ||
562 | CTState *cts = ctype_cts(L); | ||
563 | CTypeID id = (CTypeID)ffi_checkint(L, 1); | ||
564 | if (id > 0 && id < cts->top) { | ||
565 | CType *ct = ctype_get(cts, id); | ||
566 | GCtab *t; | ||
567 | lua_createtable(L, 0, 4); /* Increment hash size if fields are added. */ | ||
568 | t = tabV(L->top-1); | ||
569 | setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "info")), (int32_t)ct->info); | ||
570 | if (ct->size != CTSIZE_INVALID) | ||
571 | setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "size")), (int32_t)ct->size); | ||
572 | if (ct->sib) | ||
573 | setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "sib")), (int32_t)ct->sib); | ||
574 | if (gcref(ct->name)) { | ||
575 | GCstr *s = gco2str(gcref(ct->name)); | ||
576 | setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "name")), s); | ||
577 | } | ||
578 | lj_gc_check(L); | ||
579 | return 1; | ||
580 | } | ||
581 | return 0; | ||
582 | } | ||
583 | |||
561 | LJLIB_CF(ffi_istype) LJLIB_REC(.) | 584 | LJLIB_CF(ffi_istype) LJLIB_REC(.) |
562 | { | 585 | { |
563 | CTState *cts = ctype_cts(L); | 586 | CTState *cts = ctype_cts(L); |
@@ -697,44 +720,47 @@ LJLIB_CF(ffi_fill) LJLIB_REC(.) | |||
697 | return 0; | 720 | return 0; |
698 | } | 721 | } |
699 | 722 | ||
700 | #define H_(le, be) LJ_ENDIAN_SELECT(0x##le, 0x##be) | ||
701 | |||
702 | /* Test ABI string. */ | 723 | /* Test ABI string. */ |
703 | LJLIB_CF(ffi_abi) LJLIB_REC(.) | 724 | LJLIB_CF(ffi_abi) LJLIB_REC(.) |
704 | { | 725 | { |
705 | GCstr *s = lj_lib_checkstr(L, 1); | 726 | GCstr *s = lj_lib_checkstr(L, 1); |
706 | int b = 0; | 727 | int b = lj_cparse_case(s, |
707 | switch (s->hash) { | ||
708 | #if LJ_64 | 728 | #if LJ_64 |
709 | case H_(849858eb,ad35fd06): b = 1; break; /* 64bit */ | 729 | "\00564bit" |
710 | #else | 730 | #else |
711 | case H_(662d3c79,d0e22477): b = 1; break; /* 32bit */ | 731 | "\00532bit" |
712 | #endif | 732 | #endif |
713 | #if LJ_ARCH_HASFPU | 733 | #if LJ_ARCH_HASFPU |
714 | case H_(e33ee463,e33ee463): b = 1; break; /* fpu */ | 734 | "\003fpu" |
715 | #endif | 735 | #endif |
716 | #if LJ_ABI_SOFTFP | 736 | #if LJ_ABI_SOFTFP |
717 | case H_(61211a23,c2e8c81c): b = 1; break; /* softfp */ | 737 | "\006softfp" |
718 | #else | 738 | #else |
719 | case H_(539417a8,8ce0812f): b = 1; break; /* hardfp */ | 739 | "\006hardfp" |
720 | #endif | 740 | #endif |
721 | #if LJ_ABI_EABI | 741 | #if LJ_ABI_EABI |
722 | case H_(2182df8f,f2ed1152): b = 1; break; /* eabi */ | 742 | "\004eabi" |
723 | #endif | 743 | #endif |
724 | #if LJ_ABI_WIN | 744 | #if LJ_ABI_WIN |
725 | case H_(4ab624a8,4ab624a8): b = 1; break; /* win */ | 745 | "\003win" |
726 | #endif | 746 | #endif |
727 | case H_(3af93066,1f001464): b = 1; break; /* le/be */ | 747 | #if LJ_TARGET_UWP |
728 | default: | 748 | "\003uwp" |
729 | break; | 749 | #endif |
730 | } | 750 | #if LJ_LE |
751 | "\002le" | ||
752 | #else | ||
753 | "\002be" | ||
754 | #endif | ||
755 | #if LJ_GC64 | ||
756 | "\004gc64" | ||
757 | #endif | ||
758 | ) >= 0; | ||
731 | setboolV(L->top-1, b); | 759 | setboolV(L->top-1, b); |
732 | setboolV(&G(L)->tmptv2, b); /* Remember for trace recorder. */ | 760 | setboolV(&G(L)->tmptv2, b); /* Remember for trace recorder. */ |
733 | return 1; | 761 | return 1; |
734 | } | 762 | } |
735 | 763 | ||
736 | #undef H_ | ||
737 | |||
738 | LJLIB_PUSH(top-8) LJLIB_SET(!) /* Store reference to miscmap table. */ | 764 | LJLIB_PUSH(top-8) LJLIB_SET(!) /* Store reference to miscmap table. */ |
739 | 765 | ||
740 | LJLIB_CF(ffi_metatype) | 766 | LJLIB_CF(ffi_metatype) |
@@ -768,19 +794,11 @@ LJLIB_CF(ffi_gc) LJLIB_REC(.) | |||
768 | GCcdata *cd = ffi_checkcdata(L, 1); | 794 | GCcdata *cd = ffi_checkcdata(L, 1); |
769 | TValue *fin = lj_lib_checkany(L, 2); | 795 | TValue *fin = lj_lib_checkany(L, 2); |
770 | CTState *cts = ctype_cts(L); | 796 | CTState *cts = ctype_cts(L); |
771 | GCtab *t = cts->finalizer; | ||
772 | CType *ct = ctype_raw(cts, cd->ctypeid); | 797 | CType *ct = ctype_raw(cts, cd->ctypeid); |
773 | if (!(ctype_isptr(ct->info) || ctype_isstruct(ct->info) || | 798 | if (!(ctype_isptr(ct->info) || ctype_isstruct(ct->info) || |
774 | ctype_isrefarray(ct->info))) | 799 | ctype_isrefarray(ct->info))) |
775 | lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE); | 800 | lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE); |
776 | if (gcref(t->metatable)) { /* Update finalizer table, if still enabled. */ | 801 | lj_cdata_setfin(L, cd, gcval(fin), itype(fin)); |
777 | copyTV(L, lj_tab_set(L, t, L->base), fin); | ||
778 | lj_gc_anybarriert(L, t); | ||
779 | if (!tvisnil(fin)) | ||
780 | cd->marked |= LJ_GC_CDATA_FIN; | ||
781 | else | ||
782 | cd->marked &= ~LJ_GC_CDATA_FIN; | ||
783 | } | ||
784 | L->top = L->base+1; /* Pass through the cdata object. */ | 802 | L->top = L->base+1; /* Pass through the cdata object. */ |
785 | return 1; | 803 | return 1; |
786 | } | 804 | } |
diff --git a/src/lib_io.c b/src/lib_io.c index f13cf048..5e9d0d66 100644 --- a/src/lib_io.c +++ b/src/lib_io.c | |||
@@ -19,8 +19,10 @@ | |||
19 | #include "lj_obj.h" | 19 | #include "lj_obj.h" |
20 | #include "lj_gc.h" | 20 | #include "lj_gc.h" |
21 | #include "lj_err.h" | 21 | #include "lj_err.h" |
22 | #include "lj_buf.h" | ||
22 | #include "lj_str.h" | 23 | #include "lj_str.h" |
23 | #include "lj_state.h" | 24 | #include "lj_state.h" |
25 | #include "lj_strfmt.h" | ||
24 | #include "lj_ff.h" | 26 | #include "lj_ff.h" |
25 | #include "lj_lib.h" | 27 | #include "lj_lib.h" |
26 | 28 | ||
@@ -84,7 +86,7 @@ static IOFileUD *io_file_open(lua_State *L, const char *mode) | |||
84 | IOFileUD *iof = io_file_new(L); | 86 | IOFileUD *iof = io_file_new(L); |
85 | iof->fp = fopen(fname, mode); | 87 | iof->fp = fopen(fname, mode); |
86 | if (iof->fp == NULL) | 88 | if (iof->fp == NULL) |
87 | luaL_argerror(L, 1, lj_str_pushf(L, "%s: %s", fname, strerror(errno))); | 89 | luaL_argerror(L, 1, lj_strfmt_pushf(L, "%s: %s", fname, strerror(errno))); |
88 | return iof; | 90 | return iof; |
89 | } | 91 | } |
90 | 92 | ||
@@ -97,7 +99,7 @@ static int io_file_close(lua_State *L, IOFileUD *iof) | |||
97 | int stat = -1; | 99 | int stat = -1; |
98 | #if LJ_TARGET_POSIX | 100 | #if LJ_TARGET_POSIX |
99 | stat = pclose(iof->fp); | 101 | stat = pclose(iof->fp); |
100 | #elif LJ_TARGET_WINDOWS | 102 | #elif LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE && !LJ_TARGET_UWP |
101 | stat = _pclose(iof->fp); | 103 | stat = _pclose(iof->fp); |
102 | #else | 104 | #else |
103 | lua_assert(0); | 105 | lua_assert(0); |
@@ -145,7 +147,7 @@ static int io_file_readline(lua_State *L, FILE *fp, MSize chop) | |||
145 | MSize m = LUAL_BUFFERSIZE, n = 0, ok = 0; | 147 | MSize m = LUAL_BUFFERSIZE, n = 0, ok = 0; |
146 | char *buf; | 148 | char *buf; |
147 | for (;;) { | 149 | for (;;) { |
148 | buf = lj_str_needbuf(L, &G(L)->tmpbuf, m); | 150 | buf = lj_buf_tmp(L, m); |
149 | if (fgets(buf+n, m-n, fp) == NULL) break; | 151 | if (fgets(buf+n, m-n, fp) == NULL) break; |
150 | n += (MSize)strlen(buf+n); | 152 | n += (MSize)strlen(buf+n); |
151 | ok |= n; | 153 | ok |= n; |
@@ -161,7 +163,7 @@ static void io_file_readall(lua_State *L, FILE *fp) | |||
161 | { | 163 | { |
162 | MSize m, n; | 164 | MSize m, n; |
163 | for (m = LUAL_BUFFERSIZE, n = 0; ; m += m) { | 165 | for (m = LUAL_BUFFERSIZE, n = 0; ; m += m) { |
164 | char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m); | 166 | char *buf = lj_buf_tmp(L, m); |
165 | n += (MSize)fread(buf+n, 1, m-n, fp); | 167 | n += (MSize)fread(buf+n, 1, m-n, fp); |
166 | if (n != m) { | 168 | if (n != m) { |
167 | setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n)); | 169 | setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n)); |
@@ -174,7 +176,7 @@ static void io_file_readall(lua_State *L, FILE *fp) | |||
174 | static int io_file_readlen(lua_State *L, FILE *fp, MSize m) | 176 | static int io_file_readlen(lua_State *L, FILE *fp, MSize m) |
175 | { | 177 | { |
176 | if (m) { | 178 | if (m) { |
177 | char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m); | 179 | char *buf = lj_buf_tmp(L, m); |
178 | MSize n = (MSize)fread(buf, 1, m, fp); | 180 | MSize n = (MSize)fread(buf, 1, m, fp); |
179 | setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n)); | 181 | setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n)); |
180 | lj_gc_check(L); | 182 | lj_gc_check(L); |
@@ -201,13 +203,12 @@ static int io_file_read(lua_State *L, FILE *fp, int start) | |||
201 | for (n = start; nargs-- && ok; n++) { | 203 | for (n = start; nargs-- && ok; n++) { |
202 | if (tvisstr(L->base+n)) { | 204 | if (tvisstr(L->base+n)) { |
203 | const char *p = strVdata(L->base+n); | 205 | const char *p = strVdata(L->base+n); |
204 | if (p[0] != '*') | 206 | if (p[0] == '*') p++; |
205 | lj_err_arg(L, n+1, LJ_ERR_INVOPT); | 207 | if (p[0] == 'n') |
206 | if (p[1] == 'n') | ||
207 | ok = io_file_readnum(L, fp); | 208 | ok = io_file_readnum(L, fp); |
208 | else if ((p[1] & ~0x20) == 'L') | 209 | else if ((p[0] & ~0x20) == 'L') |
209 | ok = io_file_readline(L, fp, (p[1] == 'l')); | 210 | ok = io_file_readline(L, fp, (p[0] == 'l')); |
210 | else if (p[1] == 'a') | 211 | else if (p[0] == 'a') |
211 | io_file_readall(L, fp); | 212 | io_file_readall(L, fp); |
212 | else | 213 | else |
213 | lj_err_arg(L, n+1, LJ_ERR_INVFMT); | 214 | lj_err_arg(L, n+1, LJ_ERR_INVFMT); |
@@ -230,19 +231,11 @@ static int io_file_write(lua_State *L, FILE *fp, int start) | |||
230 | cTValue *tv; | 231 | cTValue *tv; |
231 | int status = 1; | 232 | int status = 1; |
232 | for (tv = L->base+start; tv < L->top; tv++) { | 233 | for (tv = L->base+start; tv < L->top; tv++) { |
233 | if (tvisstr(tv)) { | 234 | MSize len; |
234 | MSize len = strV(tv)->len; | 235 | const char *p = lj_strfmt_wstrnum(L, tv, &len); |
235 | status = status && (fwrite(strVdata(tv), 1, len, fp) == len); | 236 | if (!p) |
236 | } else if (tvisint(tv)) { | ||
237 | char buf[LJ_STR_INTBUF]; | ||
238 | char *p = lj_str_bufint(buf, intV(tv)); | ||
239 | size_t len = (size_t)(buf+LJ_STR_INTBUF-p); | ||
240 | status = status && (fwrite(p, 1, len, fp) == len); | ||
241 | } else if (tvisnum(tv)) { | ||
242 | status = status && (fprintf(fp, LUA_NUMBER_FMT, numV(tv)) > 0); | ||
243 | } else { | ||
244 | lj_err_argt(L, (int)(tv - L->base) + 1, LUA_TSTRING); | 237 | lj_err_argt(L, (int)(tv - L->base) + 1, LUA_TSTRING); |
245 | } | 238 | status = status && (fwrite(p, 1, len, fp) == len); |
246 | } | 239 | } |
247 | if (LJ_52 && status) { | 240 | if (LJ_52 && status) { |
248 | L->top = L->base+1; | 241 | L->top = L->base+1; |
@@ -413,7 +406,7 @@ LJLIB_CF(io_open) | |||
413 | 406 | ||
414 | LJLIB_CF(io_popen) | 407 | LJLIB_CF(io_popen) |
415 | { | 408 | { |
416 | #if LJ_TARGET_POSIX || LJ_TARGET_WINDOWS | 409 | #if LJ_TARGET_POSIX || (LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE && !LJ_TARGET_UWP) |
417 | const char *fname = strdata(lj_lib_checkstr(L, 1)); | 410 | const char *fname = strdata(lj_lib_checkstr(L, 1)); |
418 | GCstr *s = lj_lib_optstr(L, 2); | 411 | GCstr *s = lj_lib_optstr(L, 2); |
419 | const char *mode = s ? strdata(s) : "r"; | 412 | const char *mode = s ? strdata(s) : "r"; |
diff --git a/src/lib_jit.c b/src/lib_jit.c index 6e98229e..c97b0d53 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c | |||
@@ -10,13 +10,17 @@ | |||
10 | #include "lauxlib.h" | 10 | #include "lauxlib.h" |
11 | #include "lualib.h" | 11 | #include "lualib.h" |
12 | 12 | ||
13 | #include "lj_arch.h" | ||
14 | #include "lj_obj.h" | 13 | #include "lj_obj.h" |
14 | #include "lj_gc.h" | ||
15 | #include "lj_err.h" | 15 | #include "lj_err.h" |
16 | #include "lj_debug.h" | 16 | #include "lj_debug.h" |
17 | #include "lj_str.h" | 17 | #include "lj_str.h" |
18 | #include "lj_tab.h" | 18 | #include "lj_tab.h" |
19 | #include "lj_state.h" | ||
19 | #include "lj_bc.h" | 20 | #include "lj_bc.h" |
21 | #if LJ_HASFFI | ||
22 | #include "lj_ctype.h" | ||
23 | #endif | ||
20 | #if LJ_HASJIT | 24 | #if LJ_HASJIT |
21 | #include "lj_ir.h" | 25 | #include "lj_ir.h" |
22 | #include "lj_jit.h" | 26 | #include "lj_jit.h" |
@@ -24,6 +28,7 @@ | |||
24 | #include "lj_iropt.h" | 28 | #include "lj_iropt.h" |
25 | #include "lj_target.h" | 29 | #include "lj_target.h" |
26 | #endif | 30 | #endif |
31 | #include "lj_trace.h" | ||
27 | #include "lj_dispatch.h" | 32 | #include "lj_dispatch.h" |
28 | #include "lj_vm.h" | 33 | #include "lj_vm.h" |
29 | #include "lj_vmevent.h" | 34 | #include "lj_vmevent.h" |
@@ -280,7 +285,7 @@ static GCtrace *jit_checktrace(lua_State *L) | |||
280 | /* Names of link types. ORDER LJ_TRLINK */ | 285 | /* Names of link types. ORDER LJ_TRLINK */ |
281 | static const char *const jit_trlinkname[] = { | 286 | static const char *const jit_trlinkname[] = { |
282 | "none", "root", "loop", "tail-recursion", "up-recursion", "down-recursion", | 287 | "none", "root", "loop", "tail-recursion", "up-recursion", "down-recursion", |
283 | "interpreter", "return" | 288 | "interpreter", "return", "stitch" |
284 | }; | 289 | }; |
285 | 290 | ||
286 | /* local info = jit.util.traceinfo(tr) */ | 291 | /* local info = jit.util.traceinfo(tr) */ |
@@ -333,6 +338,13 @@ LJLIB_CF(jit_util_tracek) | |||
333 | slot = ir->op2; | 338 | slot = ir->op2; |
334 | ir = &T->ir[ir->op1]; | 339 | ir = &T->ir[ir->op1]; |
335 | } | 340 | } |
341 | #if LJ_HASFFI | ||
342 | if (ir->o == IR_KINT64 && !ctype_ctsG(G(L))) { | ||
343 | ptrdiff_t oldtop = savestack(L, L->top); | ||
344 | luaopen_ffi(L); /* Load FFI library on-demand. */ | ||
345 | L->top = restorestack(L, oldtop); | ||
346 | } | ||
347 | #endif | ||
336 | lj_ir_kvalue(L, L->top-2, ir); | 348 | lj_ir_kvalue(L, L->top-2, ir); |
337 | setintV(L->top-1, (int32_t)irt_type(ir->t)); | 349 | setintV(L->top-1, (int32_t)irt_type(ir->t)); |
338 | if (slot == -1) | 350 | if (slot == -1) |
@@ -417,6 +429,12 @@ LJLIB_CF(jit_util_ircalladdr) | |||
417 | 429 | ||
418 | #include "lj_libdef.h" | 430 | #include "lj_libdef.h" |
419 | 431 | ||
432 | static int luaopen_jit_util(lua_State *L) | ||
433 | { | ||
434 | LJ_LIB_REG(L, NULL, jit_util); | ||
435 | return 1; | ||
436 | } | ||
437 | |||
420 | /* -- jit.opt module ------------------------------------------------------ */ | 438 | /* -- jit.opt module ------------------------------------------------------ */ |
421 | 439 | ||
422 | #if LJ_HASJIT | 440 | #if LJ_HASJIT |
@@ -514,6 +532,104 @@ LJLIB_CF(jit_opt_start) | |||
514 | 532 | ||
515 | #endif | 533 | #endif |
516 | 534 | ||
535 | /* -- jit.profile module -------------------------------------------------- */ | ||
536 | |||
537 | #if LJ_HASPROFILE | ||
538 | |||
539 | #define LJLIB_MODULE_jit_profile | ||
540 | |||
541 | /* Not loaded by default, use: local profile = require("jit.profile") */ | ||
542 | |||
543 | static const char KEY_PROFILE_THREAD = 't'; | ||
544 | static const char KEY_PROFILE_FUNC = 'f'; | ||
545 | |||
546 | static void jit_profile_callback(lua_State *L2, lua_State *L, int samples, | ||
547 | int vmstate) | ||
548 | { | ||
549 | TValue key; | ||
550 | cTValue *tv; | ||
551 | setlightudV(&key, (void *)&KEY_PROFILE_FUNC); | ||
552 | tv = lj_tab_get(L, tabV(registry(L)), &key); | ||
553 | if (tvisfunc(tv)) { | ||
554 | char vmst = (char)vmstate; | ||
555 | int status; | ||
556 | setfuncV(L2, L2->top++, funcV(tv)); | ||
557 | setthreadV(L2, L2->top++, L); | ||
558 | setintV(L2->top++, samples); | ||
559 | setstrV(L2, L2->top++, lj_str_new(L2, &vmst, 1)); | ||
560 | status = lua_pcall(L2, 3, 0, 0); /* callback(thread, samples, vmstate) */ | ||
561 | if (status) { | ||
562 | if (G(L2)->panic) G(L2)->panic(L2); | ||
563 | exit(EXIT_FAILURE); | ||
564 | } | ||
565 | lj_trace_abort(G(L2)); | ||
566 | } | ||
567 | } | ||
568 | |||
569 | /* profile.start(mode, cb) */ | ||
570 | LJLIB_CF(jit_profile_start) | ||
571 | { | ||
572 | GCtab *registry = tabV(registry(L)); | ||
573 | GCstr *mode = lj_lib_optstr(L, 1); | ||
574 | GCfunc *func = lj_lib_checkfunc(L, 2); | ||
575 | lua_State *L2 = lua_newthread(L); /* Thread that runs profiler callback. */ | ||
576 | TValue key; | ||
577 | /* Anchor thread and function in registry. */ | ||
578 | setlightudV(&key, (void *)&KEY_PROFILE_THREAD); | ||
579 | setthreadV(L, lj_tab_set(L, registry, &key), L2); | ||
580 | setlightudV(&key, (void *)&KEY_PROFILE_FUNC); | ||
581 | setfuncV(L, lj_tab_set(L, registry, &key), func); | ||
582 | lj_gc_anybarriert(L, registry); | ||
583 | luaJIT_profile_start(L, mode ? strdata(mode) : "", | ||
584 | (luaJIT_profile_callback)jit_profile_callback, L2); | ||
585 | return 0; | ||
586 | } | ||
587 | |||
588 | /* profile.stop() */ | ||
589 | LJLIB_CF(jit_profile_stop) | ||
590 | { | ||
591 | GCtab *registry; | ||
592 | TValue key; | ||
593 | luaJIT_profile_stop(L); | ||
594 | registry = tabV(registry(L)); | ||
595 | setlightudV(&key, (void *)&KEY_PROFILE_THREAD); | ||
596 | setnilV(lj_tab_set(L, registry, &key)); | ||
597 | setlightudV(&key, (void *)&KEY_PROFILE_FUNC); | ||
598 | setnilV(lj_tab_set(L, registry, &key)); | ||
599 | lj_gc_anybarriert(L, registry); | ||
600 | return 0; | ||
601 | } | ||
602 | |||
603 | /* dump = profile.dumpstack([thread,] fmt, depth) */ | ||
604 | LJLIB_CF(jit_profile_dumpstack) | ||
605 | { | ||
606 | lua_State *L2 = L; | ||
607 | int arg = 0; | ||
608 | size_t len; | ||
609 | int depth; | ||
610 | GCstr *fmt; | ||
611 | const char *p; | ||
612 | if (L->top > L->base && tvisthread(L->base)) { | ||
613 | L2 = threadV(L->base); | ||
614 | arg = 1; | ||
615 | } | ||
616 | fmt = lj_lib_checkstr(L, arg+1); | ||
617 | depth = lj_lib_checkint(L, arg+2); | ||
618 | p = luaJIT_profile_dumpstack(L2, strdata(fmt), depth, &len); | ||
619 | lua_pushlstring(L, p, len); | ||
620 | return 1; | ||
621 | } | ||
622 | |||
623 | #include "lj_libdef.h" | ||
624 | |||
625 | static int luaopen_jit_profile(lua_State *L) | ||
626 | { | ||
627 | LJ_LIB_REG(L, NULL, jit_profile); | ||
628 | return 1; | ||
629 | } | ||
630 | |||
631 | #endif | ||
632 | |||
517 | /* -- JIT compiler initialization ----------------------------------------- */ | 633 | /* -- JIT compiler initialization ----------------------------------------- */ |
518 | 634 | ||
519 | #if LJ_HASJIT | 635 | #if LJ_HASJIT |
@@ -539,38 +655,31 @@ static uint32_t jit_cpudetect(lua_State *L) | |||
539 | uint32_t features[4]; | 655 | uint32_t features[4]; |
540 | if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { | 656 | if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { |
541 | #if !LJ_HASJIT | 657 | #if !LJ_HASJIT |
542 | #define JIT_F_CMOV 1 | ||
543 | #define JIT_F_SSE2 2 | 658 | #define JIT_F_SSE2 2 |
544 | #endif | 659 | #endif |
545 | flags |= ((features[3] >> 15)&1) * JIT_F_CMOV; | ||
546 | flags |= ((features[3] >> 26)&1) * JIT_F_SSE2; | 660 | flags |= ((features[3] >> 26)&1) * JIT_F_SSE2; |
547 | #if LJ_HASJIT | 661 | #if LJ_HASJIT |
548 | flags |= ((features[2] >> 0)&1) * JIT_F_SSE3; | 662 | flags |= ((features[2] >> 0)&1) * JIT_F_SSE3; |
549 | flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; | 663 | flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; |
550 | if (vendor[2] == 0x6c65746e) { /* Intel. */ | 664 | if (vendor[2] == 0x6c65746e) { /* Intel. */ |
551 | if ((features[0] & 0x0ff00f00) == 0x00000f00) /* P4. */ | 665 | if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */ |
552 | flags |= JIT_F_P4; /* Currently unused. */ | ||
553 | else if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */ | ||
554 | flags |= JIT_F_LEA_AGU; | 666 | flags |= JIT_F_LEA_AGU; |
555 | } else if (vendor[2] == 0x444d4163) { /* AMD. */ | 667 | } else if (vendor[2] == 0x444d4163) { /* AMD. */ |
556 | uint32_t fam = (features[0] & 0x0ff00f00); | 668 | uint32_t fam = (features[0] & 0x0ff00f00); |
557 | if (fam == 0x00000f00) /* K8. */ | ||
558 | flags |= JIT_F_SPLIT_XMM; | ||
559 | if (fam >= 0x00000f00) /* K8, K10. */ | 669 | if (fam >= 0x00000f00) /* K8, K10. */ |
560 | flags |= JIT_F_PREFER_IMUL; | 670 | flags |= JIT_F_PREFER_IMUL; |
561 | } | 671 | } |
672 | if (vendor[0] >= 7) { | ||
673 | uint32_t xfeatures[4]; | ||
674 | lj_vm_cpuid(7, xfeatures); | ||
675 | flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2; | ||
676 | } | ||
562 | #endif | 677 | #endif |
563 | } | 678 | } |
564 | /* Check for required instruction set support on x86 (unnecessary on x64). */ | 679 | /* Check for required instruction set support on x86 (unnecessary on x64). */ |
565 | #if LJ_TARGET_X86 | 680 | #if LJ_TARGET_X86 |
566 | #if !defined(LUAJIT_CPU_NOCMOV) | ||
567 | if (!(flags & JIT_F_CMOV)) | ||
568 | luaL_error(L, "CPU not supported"); | ||
569 | #endif | ||
570 | #if defined(LUAJIT_CPU_SSE2) | ||
571 | if (!(flags & JIT_F_SSE2)) | 681 | if (!(flags & JIT_F_SSE2)) |
572 | luaL_error(L, "CPU does not support SSE2 (recompile without -DLUAJIT_CPU_SSE2)"); | 682 | luaL_error(L, "CPU with SSE2 required"); |
573 | #endif | ||
574 | #endif | 683 | #endif |
575 | #elif LJ_TARGET_ARM | 684 | #elif LJ_TARGET_ARM |
576 | #if LJ_HASJIT | 685 | #if LJ_HASJIT |
@@ -592,6 +701,8 @@ static uint32_t jit_cpudetect(lua_State *L) | |||
592 | ver >= 60 ? JIT_F_ARMV6_ : 0; | 701 | ver >= 60 ? JIT_F_ARMV6_ : 0; |
593 | flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2; | 702 | flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2; |
594 | #endif | 703 | #endif |
704 | #elif LJ_TARGET_ARM64 | ||
705 | /* No optional CPU features to detect (for now). */ | ||
595 | #elif LJ_TARGET_PPC | 706 | #elif LJ_TARGET_PPC |
596 | #if LJ_HASJIT | 707 | #if LJ_HASJIT |
597 | #if LJ_ARCH_SQRT | 708 | #if LJ_ARCH_SQRT |
@@ -601,21 +712,23 @@ static uint32_t jit_cpudetect(lua_State *L) | |||
601 | flags |= JIT_F_ROUND; | 712 | flags |= JIT_F_ROUND; |
602 | #endif | 713 | #endif |
603 | #endif | 714 | #endif |
604 | #elif LJ_TARGET_PPCSPE | ||
605 | /* Nothing to do. */ | ||
606 | #elif LJ_TARGET_MIPS | 715 | #elif LJ_TARGET_MIPS |
607 | #if LJ_HASJIT | 716 | #if LJ_HASJIT |
608 | /* Compile-time MIPS CPU detection. */ | 717 | /* Compile-time MIPS CPU detection. */ |
609 | #if LJ_ARCH_VERSION >= 20 | 718 | #if LJ_ARCH_VERSION >= 20 |
610 | flags |= JIT_F_MIPS32R2; | 719 | flags |= JIT_F_MIPSXXR2; |
611 | #endif | 720 | #endif |
612 | /* Runtime MIPS CPU detection. */ | 721 | /* Runtime MIPS CPU detection. */ |
613 | #if defined(__GNUC__) | 722 | #if defined(__GNUC__) |
614 | if (!(flags & JIT_F_MIPS32R2)) { | 723 | if (!(flags & JIT_F_MIPSXXR2)) { |
615 | int x; | 724 | int x; |
725 | #ifdef __mips16 | ||
726 | x = 0; /* Runtime detection is difficult. Ensure optimal -march flags. */ | ||
727 | #else | ||
616 | /* On MIPS32R1 rotr is treated as srl. rotr r2,r2,1 -> srl r2,r2,1. */ | 728 | /* On MIPS32R1 rotr is treated as srl. rotr r2,r2,1 -> srl r2,r2,1. */ |
617 | __asm__("li $2, 1\n\t.long 0x00221042\n\tmove %0, $2" : "=r"(x) : : "$2"); | 729 | __asm__("li $2, 1\n\t.long 0x00221042\n\tmove %0, $2" : "=r"(x) : : "$2"); |
618 | if (x) flags |= JIT_F_MIPS32R2; /* Either 0x80000000 (R2) or 0 (R1). */ | 730 | #endif |
731 | if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */ | ||
619 | } | 732 | } |
620 | #endif | 733 | #endif |
621 | #endif | 734 | #endif |
@@ -632,11 +745,7 @@ static void jit_init(lua_State *L) | |||
632 | uint32_t flags = jit_cpudetect(L); | 745 | uint32_t flags = jit_cpudetect(L); |
633 | #if LJ_HASJIT | 746 | #if LJ_HASJIT |
634 | jit_State *J = L2J(L); | 747 | jit_State *J = L2J(L); |
635 | #if LJ_TARGET_X86 | 748 | J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT; |
636 | /* Silently turn off the JIT compiler on CPUs without SSE2. */ | ||
637 | if ((flags & JIT_F_SSE2)) | ||
638 | #endif | ||
639 | J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT; | ||
640 | memcpy(J->param, jit_param_default, sizeof(J->param)); | 749 | memcpy(J->param, jit_param_default, sizeof(J->param)); |
641 | lj_dispatch_update(G(L)); | 750 | lj_dispatch_update(G(L)); |
642 | #else | 751 | #else |
@@ -646,19 +755,23 @@ static void jit_init(lua_State *L) | |||
646 | 755 | ||
647 | LUALIB_API int luaopen_jit(lua_State *L) | 756 | LUALIB_API int luaopen_jit(lua_State *L) |
648 | { | 757 | { |
758 | jit_init(L); | ||
649 | lua_pushliteral(L, LJ_OS_NAME); | 759 | lua_pushliteral(L, LJ_OS_NAME); |
650 | lua_pushliteral(L, LJ_ARCH_NAME); | 760 | lua_pushliteral(L, LJ_ARCH_NAME); |
651 | lua_pushinteger(L, LUAJIT_VERSION_NUM); | 761 | lua_pushinteger(L, LUAJIT_VERSION_NUM); |
652 | lua_pushliteral(L, LUAJIT_VERSION); | 762 | lua_pushliteral(L, LUAJIT_VERSION); |
653 | LJ_LIB_REG(L, LUA_JITLIBNAME, jit); | 763 | LJ_LIB_REG(L, LUA_JITLIBNAME, jit); |
764 | #if LJ_HASPROFILE | ||
765 | lj_lib_prereg(L, LUA_JITLIBNAME ".profile", luaopen_jit_profile, | ||
766 | tabref(L->env)); | ||
767 | #endif | ||
654 | #ifndef LUAJIT_DISABLE_JITUTIL | 768 | #ifndef LUAJIT_DISABLE_JITUTIL |
655 | LJ_LIB_REG(L, "jit.util", jit_util); | 769 | lj_lib_prereg(L, LUA_JITLIBNAME ".util", luaopen_jit_util, tabref(L->env)); |
656 | #endif | 770 | #endif |
657 | #if LJ_HASJIT | 771 | #if LJ_HASJIT |
658 | LJ_LIB_REG(L, "jit.opt", jit_opt); | 772 | LJ_LIB_REG(L, "jit.opt", jit_opt); |
659 | #endif | 773 | #endif |
660 | L->top -= 2; | 774 | L->top -= 2; |
661 | jit_init(L); | ||
662 | return 1; | 775 | return 1; |
663 | } | 776 | } |
664 | 777 | ||
diff --git a/src/lib_math.c b/src/lib_math.c index 9d324d7e..3fd466ca 100644 --- a/src/lib_math.c +++ b/src/lib_math.c | |||
@@ -47,12 +47,6 @@ LJLIB_ASM_(math_tanh) LJLIB_REC(math_htrig IRCALL_tanh) | |||
47 | LJLIB_ASM_(math_frexp) | 47 | LJLIB_ASM_(math_frexp) |
48 | LJLIB_ASM_(math_modf) LJLIB_REC(.) | 48 | LJLIB_ASM_(math_modf) LJLIB_REC(.) |
49 | 49 | ||
50 | LJLIB_PUSH(57.29577951308232) | ||
51 | LJLIB_ASM_(math_deg) LJLIB_REC(math_degrad) | ||
52 | |||
53 | LJLIB_PUSH(0.017453292519943295) | ||
54 | LJLIB_ASM_(math_rad) LJLIB_REC(math_degrad) | ||
55 | |||
56 | LJLIB_ASM(math_log) LJLIB_REC(math_log) | 50 | LJLIB_ASM(math_log) LJLIB_REC(math_log) |
57 | { | 51 | { |
58 | double x = lj_lib_checknum(L, 1); | 52 | double x = lj_lib_checknum(L, 1); |
@@ -63,12 +57,15 @@ LJLIB_ASM(math_log) LJLIB_REC(math_log) | |||
63 | #else | 57 | #else |
64 | x = lj_vm_log2(x); y = 1.0 / lj_vm_log2(y); | 58 | x = lj_vm_log2(x); y = 1.0 / lj_vm_log2(y); |
65 | #endif | 59 | #endif |
66 | setnumV(L->base-1, x*y); /* Do NOT join the expression to x / y. */ | 60 | setnumV(L->base-1-LJ_FR2, x*y); /* Do NOT join the expression to x / y. */ |
67 | return FFH_RES(1); | 61 | return FFH_RES(1); |
68 | } | 62 | } |
69 | return FFH_RETRY; | 63 | return FFH_RETRY; |
70 | } | 64 | } |
71 | 65 | ||
66 | LJLIB_LUA(math_deg) /* function(x) return x * 57.29577951308232 end */ | ||
67 | LJLIB_LUA(math_rad) /* function(x) return x * 0.017453292519943295 end */ | ||
68 | |||
72 | LJLIB_ASM(math_atan2) LJLIB_REC(.) | 69 | LJLIB_ASM(math_atan2) LJLIB_REC(.) |
73 | { | 70 | { |
74 | lj_lib_checknum(L, 1); | 71 | lj_lib_checknum(L, 1); |
@@ -224,10 +221,6 @@ LUALIB_API int luaopen_math(lua_State *L) | |||
224 | rs = (RandomState *)lua_newuserdata(L, sizeof(RandomState)); | 221 | rs = (RandomState *)lua_newuserdata(L, sizeof(RandomState)); |
225 | rs->valid = 0; /* Use lazy initialization to save some time on startup. */ | 222 | rs->valid = 0; /* Use lazy initialization to save some time on startup. */ |
226 | LJ_LIB_REG(L, LUA_MATHLIBNAME, math); | 223 | LJ_LIB_REG(L, LUA_MATHLIBNAME, math); |
227 | #if defined(LUA_COMPAT_MOD) && !LJ_52 | ||
228 | lua_getfield(L, -1, "fmod"); | ||
229 | lua_setfield(L, -2, "mod"); | ||
230 | #endif | ||
231 | return 1; | 224 | return 1; |
232 | } | 225 | } |
233 | 226 | ||
diff --git a/src/lib_os.c b/src/lib_os.c index a8e1708f..47893766 100644 --- a/src/lib_os.c +++ b/src/lib_os.c | |||
@@ -17,7 +17,10 @@ | |||
17 | #include "lualib.h" | 17 | #include "lualib.h" |
18 | 18 | ||
19 | #include "lj_obj.h" | 19 | #include "lj_obj.h" |
20 | #include "lj_gc.h" | ||
20 | #include "lj_err.h" | 21 | #include "lj_err.h" |
22 | #include "lj_buf.h" | ||
23 | #include "lj_str.h" | ||
21 | #include "lj_lib.h" | 24 | #include "lj_lib.h" |
22 | 25 | ||
23 | #if LJ_TARGET_POSIX | 26 | #if LJ_TARGET_POSIX |
@@ -188,7 +191,7 @@ LJLIB_CF(os_date) | |||
188 | #endif | 191 | #endif |
189 | } | 192 | } |
190 | if (stm == NULL) { /* Invalid date? */ | 193 | if (stm == NULL) { /* Invalid date? */ |
191 | setnilV(L->top-1); | 194 | setnilV(L->top++); |
192 | } else if (strcmp(s, "*t") == 0) { | 195 | } else if (strcmp(s, "*t") == 0) { |
193 | lua_createtable(L, 0, 9); /* 9 = number of fields */ | 196 | lua_createtable(L, 0, 9); /* 9 = number of fields */ |
194 | setfield(L, "sec", stm->tm_sec); | 197 | setfield(L, "sec", stm->tm_sec); |
@@ -200,23 +203,25 @@ LJLIB_CF(os_date) | |||
200 | setfield(L, "wday", stm->tm_wday+1); | 203 | setfield(L, "wday", stm->tm_wday+1); |
201 | setfield(L, "yday", stm->tm_yday+1); | 204 | setfield(L, "yday", stm->tm_yday+1); |
202 | setboolfield(L, "isdst", stm->tm_isdst); | 205 | setboolfield(L, "isdst", stm->tm_isdst); |
203 | } else { | 206 | } else if (*s) { |
204 | char cc[3]; | 207 | SBuf *sb = &G(L)->tmpbuf; |
205 | luaL_Buffer b; | 208 | MSize sz = 0, retry = 4; |
206 | cc[0] = '%'; cc[2] = '\0'; | 209 | const char *q; |
207 | luaL_buffinit(L, &b); | 210 | for (q = s; *q; q++) |
208 | for (; *s; s++) { | 211 | sz += (*q == '%') ? 30 : 1; /* Overflow doesn't matter. */ |
209 | if (*s != '%' || *(s + 1) == '\0') { /* No conversion specifier? */ | 212 | setsbufL(sb, L); |
210 | luaL_addchar(&b, *s); | 213 | while (retry--) { /* Limit growth for invalid format or empty result. */ |
211 | } else { | 214 | char *buf = lj_buf_need(sb, sz); |
212 | size_t reslen; | 215 | size_t len = strftime(buf, sbufsz(sb), s, stm); |
213 | char buff[200]; /* Should be big enough for any conversion result. */ | 216 | if (len) { |
214 | cc[1] = *(++s); | 217 | setstrV(L, L->top++, lj_str_new(L, buf, len)); |
215 | reslen = strftime(buff, sizeof(buff), cc, stm); | 218 | lj_gc_check(L); |
216 | luaL_addlstring(&b, buff, reslen); | 219 | break; |
217 | } | 220 | } |
221 | sz += (sz|1); | ||
218 | } | 222 | } |
219 | luaL_pushresult(&b); | 223 | } else { |
224 | setstrV(L, L->top++, &G(L)->strempty); | ||
220 | } | 225 | } |
221 | return 1; | 226 | return 1; |
222 | } | 227 | } |
diff --git a/src/lib_package.c b/src/lib_package.c index a8bdcf17..5d8eb25d 100644 --- a/src/lib_package.c +++ b/src/lib_package.c | |||
@@ -76,6 +76,20 @@ static const char *ll_bcsym(void *lib, const char *sym) | |||
76 | BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*); | 76 | BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*); |
77 | #endif | 77 | #endif |
78 | 78 | ||
79 | #if LJ_TARGET_UWP | ||
80 | void *LJ_WIN_LOADLIBA(const char *path) | ||
81 | { | ||
82 | DWORD err = GetLastError(); | ||
83 | wchar_t wpath[256]; | ||
84 | HANDLE lib = NULL; | ||
85 | if (MultiByteToWideChar(CP_ACP, 0, path, -1, wpath, 256) > 0) { | ||
86 | lib = LoadPackagedLibrary(wpath, 0); | ||
87 | } | ||
88 | SetLastError(err); | ||
89 | return lib; | ||
90 | } | ||
91 | #endif | ||
92 | |||
79 | #undef setprogdir | 93 | #undef setprogdir |
80 | 94 | ||
81 | static void setprogdir(lua_State *L) | 95 | static void setprogdir(lua_State *L) |
@@ -96,9 +110,17 @@ static void setprogdir(lua_State *L) | |||
96 | static void pusherror(lua_State *L) | 110 | static void pusherror(lua_State *L) |
97 | { | 111 | { |
98 | DWORD error = GetLastError(); | 112 | DWORD error = GetLastError(); |
113 | #if LJ_TARGET_XBOXONE | ||
114 | wchar_t wbuffer[128]; | ||
115 | char buffer[128*2]; | ||
116 | if (FormatMessageW(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM, | ||
117 | NULL, error, 0, wbuffer, sizeof(wbuffer)/sizeof(wchar_t), NULL) && | ||
118 | WideCharToMultiByte(CP_ACP, 0, wbuffer, 128, buffer, 128*2, NULL, NULL)) | ||
119 | #else | ||
99 | char buffer[128]; | 120 | char buffer[128]; |
100 | if (FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM, | 121 | if (FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM, |
101 | NULL, error, 0, buffer, sizeof(buffer), NULL)) | 122 | NULL, error, 0, buffer, sizeof(buffer), NULL)) |
123 | #endif | ||
102 | lua_pushstring(L, buffer); | 124 | lua_pushstring(L, buffer); |
103 | else | 125 | else |
104 | lua_pushfstring(L, "system error %d\n", error); | 126 | lua_pushfstring(L, "system error %d\n", error); |
@@ -111,7 +133,7 @@ static void ll_unloadlib(void *lib) | |||
111 | 133 | ||
112 | static void *ll_load(lua_State *L, const char *path, int gl) | 134 | static void *ll_load(lua_State *L, const char *path, int gl) |
113 | { | 135 | { |
114 | HINSTANCE lib = LoadLibraryA(path); | 136 | HINSTANCE lib = LJ_WIN_LOADLIBA(path); |
115 | if (lib == NULL) pusherror(L); | 137 | if (lib == NULL) pusherror(L); |
116 | UNUSED(gl); | 138 | UNUSED(gl); |
117 | return lib; | 139 | return lib; |
@@ -124,17 +146,25 @@ static lua_CFunction ll_sym(lua_State *L, void *lib, const char *sym) | |||
124 | return f; | 146 | return f; |
125 | } | 147 | } |
126 | 148 | ||
149 | #if LJ_TARGET_UWP | ||
150 | EXTERN_C IMAGE_DOS_HEADER __ImageBase; | ||
151 | #endif | ||
152 | |||
127 | static const char *ll_bcsym(void *lib, const char *sym) | 153 | static const char *ll_bcsym(void *lib, const char *sym) |
128 | { | 154 | { |
129 | if (lib) { | 155 | if (lib) { |
130 | return (const char *)GetProcAddress((HINSTANCE)lib, sym); | 156 | return (const char *)GetProcAddress((HINSTANCE)lib, sym); |
131 | } else { | 157 | } else { |
158 | #if LJ_TARGET_UWP | ||
159 | return (const char *)GetProcAddress((HINSTANCE)&__ImageBase, sym); | ||
160 | #else | ||
132 | HINSTANCE h = GetModuleHandleA(NULL); | 161 | HINSTANCE h = GetModuleHandleA(NULL); |
133 | const char *p = (const char *)GetProcAddress(h, sym); | 162 | const char *p = (const char *)GetProcAddress(h, sym); |
134 | if (p == NULL && GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, | 163 | if (p == NULL && GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, |
135 | (const char *)ll_bcsym, &h)) | 164 | (const char *)ll_bcsym, &h)) |
136 | p = (const char *)GetProcAddress(h, sym); | 165 | p = (const char *)GetProcAddress(h, sym); |
137 | return p; | 166 | return p; |
167 | #endif | ||
138 | } | 168 | } |
139 | } | 169 | } |
140 | 170 | ||
@@ -185,8 +215,7 @@ static void **ll_register(lua_State *L, const char *path) | |||
185 | lua_pop(L, 1); | 215 | lua_pop(L, 1); |
186 | plib = (void **)lua_newuserdata(L, sizeof(void *)); | 216 | plib = (void **)lua_newuserdata(L, sizeof(void *)); |
187 | *plib = NULL; | 217 | *plib = NULL; |
188 | luaL_getmetatable(L, "_LOADLIB"); | 218 | luaL_setmetatable(L, "_LOADLIB"); |
189 | lua_setmetatable(L, -2); | ||
190 | lua_pushfstring(L, "LOADLIB: %s", path); | 219 | lua_pushfstring(L, "LOADLIB: %s", path); |
191 | lua_pushvalue(L, -2); | 220 | lua_pushvalue(L, -2); |
192 | lua_settable(L, LUA_REGISTRYINDEX); | 221 | lua_settable(L, LUA_REGISTRYINDEX); |
@@ -391,8 +420,7 @@ static int lj_cf_package_loader_preload(lua_State *L) | |||
391 | 420 | ||
392 | /* ------------------------------------------------------------------------ */ | 421 | /* ------------------------------------------------------------------------ */ |
393 | 422 | ||
394 | static const int sentinel_ = 0; | 423 | #define sentinel ((void *)0x4004) |
395 | #define sentinel ((void *)&sentinel_) | ||
396 | 424 | ||
397 | static int lj_cf_package_require(lua_State *L) | 425 | static int lj_cf_package_require(lua_State *L) |
398 | { | 426 | { |
@@ -482,29 +510,19 @@ static void modinit(lua_State *L, const char *modname) | |||
482 | static int lj_cf_package_module(lua_State *L) | 510 | static int lj_cf_package_module(lua_State *L) |
483 | { | 511 | { |
484 | const char *modname = luaL_checkstring(L, 1); | 512 | const char *modname = luaL_checkstring(L, 1); |
485 | int loaded = lua_gettop(L) + 1; /* index of _LOADED table */ | 513 | int lastarg = (int)(L->top - L->base); |
486 | lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED"); | 514 | luaL_pushmodule(L, modname, 1); |
487 | lua_getfield(L, loaded, modname); /* get _LOADED[modname] */ | ||
488 | if (!lua_istable(L, -1)) { /* not found? */ | ||
489 | lua_pop(L, 1); /* remove previous result */ | ||
490 | /* try global variable (and create one if it does not exist) */ | ||
491 | if (luaL_findtable(L, LUA_GLOBALSINDEX, modname, 1) != NULL) | ||
492 | lj_err_callerv(L, LJ_ERR_BADMODN, modname); | ||
493 | lua_pushvalue(L, -1); | ||
494 | lua_setfield(L, loaded, modname); /* _LOADED[modname] = new table */ | ||
495 | } | ||
496 | /* check whether table already has a _NAME field */ | ||
497 | lua_getfield(L, -1, "_NAME"); | 515 | lua_getfield(L, -1, "_NAME"); |
498 | if (!lua_isnil(L, -1)) { /* is table an initialized module? */ | 516 | if (!lua_isnil(L, -1)) { /* Module already initialized? */ |
499 | lua_pop(L, 1); | 517 | lua_pop(L, 1); |
500 | } else { /* no; initialize it */ | 518 | } else { |
501 | lua_pop(L, 1); | 519 | lua_pop(L, 1); |
502 | modinit(L, modname); | 520 | modinit(L, modname); |
503 | } | 521 | } |
504 | lua_pushvalue(L, -1); | 522 | lua_pushvalue(L, -1); |
505 | setfenv(L); | 523 | setfenv(L); |
506 | dooptions(L, loaded - 1); | 524 | dooptions(L, lastarg); |
507 | return 0; | 525 | return LJ_52; |
508 | } | 526 | } |
509 | 527 | ||
510 | static int lj_cf_package_seeall(lua_State *L) | 528 | static int lj_cf_package_seeall(lua_State *L) |
@@ -575,13 +593,16 @@ LUALIB_API int luaopen_package(lua_State *L) | |||
575 | lj_lib_pushcf(L, lj_cf_package_unloadlib, 1); | 593 | lj_lib_pushcf(L, lj_cf_package_unloadlib, 1); |
576 | lua_setfield(L, -2, "__gc"); | 594 | lua_setfield(L, -2, "__gc"); |
577 | luaL_register(L, LUA_LOADLIBNAME, package_lib); | 595 | luaL_register(L, LUA_LOADLIBNAME, package_lib); |
578 | lua_pushvalue(L, -1); | 596 | lua_copy(L, -1, LUA_ENVIRONINDEX); |
579 | lua_replace(L, LUA_ENVIRONINDEX); | ||
580 | lua_createtable(L, sizeof(package_loaders)/sizeof(package_loaders[0])-1, 0); | 597 | lua_createtable(L, sizeof(package_loaders)/sizeof(package_loaders[0])-1, 0); |
581 | for (i = 0; package_loaders[i] != NULL; i++) { | 598 | for (i = 0; package_loaders[i] != NULL; i++) { |
582 | lj_lib_pushcf(L, package_loaders[i], 1); | 599 | lj_lib_pushcf(L, package_loaders[i], 1); |
583 | lua_rawseti(L, -2, i+1); | 600 | lua_rawseti(L, -2, i+1); |
584 | } | 601 | } |
602 | #if LJ_52 | ||
603 | lua_pushvalue(L, -1); | ||
604 | lua_setfield(L, -3, "searchers"); | ||
605 | #endif | ||
585 | lua_setfield(L, -2, "loaders"); | 606 | lua_setfield(L, -2, "loaders"); |
586 | lua_getfield(L, LUA_REGISTRYINDEX, "LUA_NOENV"); | 607 | lua_getfield(L, LUA_REGISTRYINDEX, "LUA_NOENV"); |
587 | noenv = lua_toboolean(L, -1); | 608 | noenv = lua_toboolean(L, -1); |
diff --git a/src/lib_string.c b/src/lib_string.c index d0b79160..6b88ee9b 100644 --- a/src/lib_string.c +++ b/src/lib_string.c | |||
@@ -6,8 +6,6 @@ | |||
6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h | 6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <stdio.h> | ||
10 | |||
11 | #define lib_string_c | 9 | #define lib_string_c |
12 | #define LUA_LIB | 10 | #define LUA_LIB |
13 | 11 | ||
@@ -18,6 +16,7 @@ | |||
18 | #include "lj_obj.h" | 16 | #include "lj_obj.h" |
19 | #include "lj_gc.h" | 17 | #include "lj_gc.h" |
20 | #include "lj_err.h" | 18 | #include "lj_err.h" |
19 | #include "lj_buf.h" | ||
21 | #include "lj_str.h" | 20 | #include "lj_str.h" |
22 | #include "lj_tab.h" | 21 | #include "lj_tab.h" |
23 | #include "lj_meta.h" | 22 | #include "lj_meta.h" |
@@ -25,17 +24,19 @@ | |||
25 | #include "lj_ff.h" | 24 | #include "lj_ff.h" |
26 | #include "lj_bcdump.h" | 25 | #include "lj_bcdump.h" |
27 | #include "lj_char.h" | 26 | #include "lj_char.h" |
27 | #include "lj_strfmt.h" | ||
28 | #include "lj_lib.h" | 28 | #include "lj_lib.h" |
29 | 29 | ||
30 | /* ------------------------------------------------------------------------ */ | 30 | /* ------------------------------------------------------------------------ */ |
31 | 31 | ||
32 | #define LJLIB_MODULE_string | 32 | #define LJLIB_MODULE_string |
33 | 33 | ||
34 | LJLIB_ASM(string_len) LJLIB_REC(.) | 34 | LJLIB_LUA(string_len) /* |
35 | { | 35 | function(s) |
36 | lj_lib_checkstr(L, 1); | 36 | CHECK_str(s) |
37 | return FFH_RETRY; | 37 | return #s |
38 | } | 38 | end |
39 | */ | ||
39 | 40 | ||
40 | LJLIB_ASM(string_byte) LJLIB_REC(string_range 0) | 41 | LJLIB_ASM(string_byte) LJLIB_REC(string_range 0) |
41 | { | 42 | { |
@@ -57,21 +58,21 @@ LJLIB_ASM(string_byte) LJLIB_REC(string_range 0) | |||
57 | lj_state_checkstack(L, (MSize)n); | 58 | lj_state_checkstack(L, (MSize)n); |
58 | p = (const unsigned char *)strdata(s) + start; | 59 | p = (const unsigned char *)strdata(s) + start; |
59 | for (i = 0; i < n; i++) | 60 | for (i = 0; i < n; i++) |
60 | setintV(L->base + i-1, p[i]); | 61 | setintV(L->base + i-1-LJ_FR2, p[i]); |
61 | return FFH_RES(n); | 62 | return FFH_RES(n); |
62 | } | 63 | } |
63 | 64 | ||
64 | LJLIB_ASM(string_char) | 65 | LJLIB_ASM(string_char) LJLIB_REC(.) |
65 | { | 66 | { |
66 | int i, nargs = (int)(L->top - L->base); | 67 | int i, nargs = (int)(L->top - L->base); |
67 | char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, (MSize)nargs); | 68 | char *buf = lj_buf_tmp(L, (MSize)nargs); |
68 | for (i = 1; i <= nargs; i++) { | 69 | for (i = 1; i <= nargs; i++) { |
69 | int32_t k = lj_lib_checkint(L, i); | 70 | int32_t k = lj_lib_checkint(L, i); |
70 | if (!checku8(k)) | 71 | if (!checku8(k)) |
71 | lj_err_arg(L, i, LJ_ERR_BADVAL); | 72 | lj_err_arg(L, i, LJ_ERR_BADVAL); |
72 | buf[i-1] = (char)k; | 73 | buf[i-1] = (char)k; |
73 | } | 74 | } |
74 | setstrV(L, L->base-1, lj_str_new(L, buf, (size_t)nargs)); | 75 | setstrV(L, L->base-1-LJ_FR2, lj_str_new(L, buf, (size_t)nargs)); |
75 | return FFH_RES(1); | 76 | return FFH_RES(1); |
76 | } | 77 | } |
77 | 78 | ||
@@ -83,68 +84,38 @@ LJLIB_ASM(string_sub) LJLIB_REC(string_range 1) | |||
83 | return FFH_RETRY; | 84 | return FFH_RETRY; |
84 | } | 85 | } |
85 | 86 | ||
86 | LJLIB_ASM(string_rep) | 87 | LJLIB_CF(string_rep) LJLIB_REC(.) |
87 | { | 88 | { |
88 | GCstr *s = lj_lib_checkstr(L, 1); | 89 | GCstr *s = lj_lib_checkstr(L, 1); |
89 | int32_t k = lj_lib_checkint(L, 2); | 90 | int32_t rep = lj_lib_checkint(L, 2); |
90 | GCstr *sep = lj_lib_optstr(L, 3); | 91 | GCstr *sep = lj_lib_optstr(L, 3); |
91 | int32_t len = (int32_t)s->len; | 92 | SBuf *sb = lj_buf_tmp_(L); |
92 | global_State *g = G(L); | 93 | if (sep && rep > 1) { |
93 | int64_t tlen; | 94 | GCstr *s2 = lj_buf_cat2str(L, sep, s); |
94 | const char *src; | 95 | lj_buf_reset(sb); |
95 | char *buf; | 96 | lj_buf_putstr(sb, s); |
96 | if (k <= 0) { | 97 | s = s2; |
97 | empty: | 98 | rep--; |
98 | setstrV(L, L->base-1, &g->strempty); | ||
99 | return FFH_RES(1); | ||
100 | } | ||
101 | if (sep) { | ||
102 | tlen = (int64_t)len + sep->len; | ||
103 | if (tlen > LJ_MAX_STR) | ||
104 | lj_err_caller(L, LJ_ERR_STROV); | ||
105 | tlen *= k; | ||
106 | if (tlen > LJ_MAX_STR) | ||
107 | lj_err_caller(L, LJ_ERR_STROV); | ||
108 | } else { | ||
109 | tlen = (int64_t)k * len; | ||
110 | if (tlen > LJ_MAX_STR) | ||
111 | lj_err_caller(L, LJ_ERR_STROV); | ||
112 | } | ||
113 | if (tlen == 0) goto empty; | ||
114 | buf = lj_str_needbuf(L, &g->tmpbuf, (MSize)tlen); | ||
115 | src = strdata(s); | ||
116 | if (sep) { | ||
117 | tlen -= sep->len; /* Ignore trailing separator. */ | ||
118 | if (k > 1) { /* Paste one string and one separator. */ | ||
119 | int32_t i; | ||
120 | i = 0; while (i < len) *buf++ = src[i++]; | ||
121 | src = strdata(sep); len = sep->len; | ||
122 | i = 0; while (i < len) *buf++ = src[i++]; | ||
123 | src = g->tmpbuf.buf; len += s->len; k--; /* Now copy that k-1 times. */ | ||
124 | } | ||
125 | } | 99 | } |
126 | do { | 100 | sb = lj_buf_putstr_rep(sb, s, rep); |
127 | int32_t i = 0; | 101 | setstrV(L, L->top-1, lj_buf_str(L, sb)); |
128 | do { *buf++ = src[i++]; } while (i < len); | 102 | lj_gc_check(L); |
129 | } while (--k > 0); | 103 | return 1; |
130 | setstrV(L, L->base-1, lj_str_new(L, g->tmpbuf.buf, (size_t)tlen)); | ||
131 | return FFH_RES(1); | ||
132 | } | 104 | } |
133 | 105 | ||
134 | LJLIB_ASM(string_reverse) | 106 | LJLIB_ASM(string_reverse) LJLIB_REC(string_op IRCALL_lj_buf_putstr_reverse) |
135 | { | 107 | { |
136 | GCstr *s = lj_lib_checkstr(L, 1); | 108 | lj_lib_checkstr(L, 1); |
137 | lj_str_needbuf(L, &G(L)->tmpbuf, s->len); | ||
138 | return FFH_RETRY; | 109 | return FFH_RETRY; |
139 | } | 110 | } |
140 | LJLIB_ASM_(string_lower) | 111 | LJLIB_ASM_(string_lower) LJLIB_REC(string_op IRCALL_lj_buf_putstr_lower) |
141 | LJLIB_ASM_(string_upper) | 112 | LJLIB_ASM_(string_upper) LJLIB_REC(string_op IRCALL_lj_buf_putstr_upper) |
142 | 113 | ||
143 | /* ------------------------------------------------------------------------ */ | 114 | /* ------------------------------------------------------------------------ */ |
144 | 115 | ||
145 | static int writer_buf(lua_State *L, const void *p, size_t size, void *b) | 116 | static int writer_buf(lua_State *L, const void *p, size_t size, void *sb) |
146 | { | 117 | { |
147 | luaL_addlstring((luaL_Buffer *)b, (const char *)p, size); | 118 | lj_buf_putmem((SBuf *)sb, p, (MSize)size); |
148 | UNUSED(L); | 119 | UNUSED(L); |
149 | return 0; | 120 | return 0; |
150 | } | 121 | } |
@@ -153,12 +124,12 @@ LJLIB_CF(string_dump) | |||
153 | { | 124 | { |
154 | GCfunc *fn = lj_lib_checkfunc(L, 1); | 125 | GCfunc *fn = lj_lib_checkfunc(L, 1); |
155 | int strip = L->base+1 < L->top && tvistruecond(L->base+1); | 126 | int strip = L->base+1 < L->top && tvistruecond(L->base+1); |
156 | luaL_Buffer b; | 127 | SBuf *sb = lj_buf_tmp_(L); /* Assumes lj_bcwrite() doesn't use tmpbuf. */ |
157 | L->top = L->base+1; | 128 | L->top = L->base+1; |
158 | luaL_buffinit(L, &b); | 129 | if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, sb, strip)) |
159 | if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, &b, strip)) | ||
160 | lj_err_caller(L, LJ_ERR_STRDUMP); | 130 | lj_err_caller(L, LJ_ERR_STRDUMP); |
161 | luaL_pushresult(&b); | 131 | setstrV(L, L->top-1, lj_buf_str(L, sb)); |
132 | lj_gc_check(L); | ||
162 | return 1; | 133 | return 1; |
163 | } | 134 | } |
164 | 135 | ||
@@ -183,7 +154,6 @@ typedef struct MatchState { | |||
183 | } MatchState; | 154 | } MatchState; |
184 | 155 | ||
185 | #define L_ESC '%' | 156 | #define L_ESC '%' |
186 | #define SPECIALS "^$*+?.([%-" | ||
187 | 157 | ||
188 | static int check_capture(MatchState *ms, int l) | 158 | static int check_capture(MatchState *ms, int l) |
189 | { | 159 | { |
@@ -450,30 +420,6 @@ static const char *match(MatchState *ms, const char *s, const char *p) | |||
450 | return s; | 420 | return s; |
451 | } | 421 | } |
452 | 422 | ||
453 | static const char *lmemfind(const char *s1, size_t l1, | ||
454 | const char *s2, size_t l2) | ||
455 | { | ||
456 | if (l2 == 0) { | ||
457 | return s1; /* empty strings are everywhere */ | ||
458 | } else if (l2 > l1) { | ||
459 | return NULL; /* avoids a negative `l1' */ | ||
460 | } else { | ||
461 | const char *init; /* to search for a `*s2' inside `s1' */ | ||
462 | l2--; /* 1st char will be checked by `memchr' */ | ||
463 | l1 = l1-l2; /* `s2' cannot be found after that */ | ||
464 | while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) { | ||
465 | init++; /* 1st char is already checked */ | ||
466 | if (memcmp(init, s2+1, l2) == 0) { | ||
467 | return init-1; | ||
468 | } else { /* correct `l1' and `s1' to try again */ | ||
469 | l1 -= (size_t)(init-s1); | ||
470 | s1 = init; | ||
471 | } | ||
472 | } | ||
473 | return NULL; /* not found */ | ||
474 | } | ||
475 | } | ||
476 | |||
477 | static void push_onecapture(MatchState *ms, int i, const char *s, const char *e) | 423 | static void push_onecapture(MatchState *ms, int i, const char *s, const char *e) |
478 | { | 424 | { |
479 | if (i >= ms->level) { | 425 | if (i >= ms->level) { |
@@ -501,64 +447,60 @@ static int push_captures(MatchState *ms, const char *s, const char *e) | |||
501 | return nlevels; /* number of strings pushed */ | 447 | return nlevels; /* number of strings pushed */ |
502 | } | 448 | } |
503 | 449 | ||
504 | static ptrdiff_t posrelat(ptrdiff_t pos, size_t len) | ||
505 | { | ||
506 | /* relative string position: negative means back from end */ | ||
507 | if (pos < 0) pos += (ptrdiff_t)len + 1; | ||
508 | return (pos >= 0) ? pos : 0; | ||
509 | } | ||
510 | |||
511 | static int str_find_aux(lua_State *L, int find) | 450 | static int str_find_aux(lua_State *L, int find) |
512 | { | 451 | { |
513 | size_t l1, l2; | 452 | GCstr *s = lj_lib_checkstr(L, 1); |
514 | const char *s = luaL_checklstring(L, 1, &l1); | 453 | GCstr *p = lj_lib_checkstr(L, 2); |
515 | const char *p = luaL_checklstring(L, 2, &l2); | 454 | int32_t start = lj_lib_optint(L, 3, 1); |
516 | ptrdiff_t init = posrelat(luaL_optinteger(L, 3, 1), l1) - 1; | 455 | MSize st; |
517 | if (init < 0) { | 456 | if (start < 0) start += (int32_t)s->len; else start--; |
518 | init = 0; | 457 | if (start < 0) start = 0; |
519 | } else if ((size_t)(init) > l1) { | 458 | st = (MSize)start; |
459 | if (st > s->len) { | ||
520 | #if LJ_52 | 460 | #if LJ_52 |
521 | setnilV(L->top-1); | 461 | setnilV(L->top-1); |
522 | return 1; | 462 | return 1; |
523 | #else | 463 | #else |
524 | init = (ptrdiff_t)l1; | 464 | st = s->len; |
525 | #endif | 465 | #endif |
526 | } | 466 | } |
527 | if (find && (lua_toboolean(L, 4) || /* explicit request? */ | 467 | if (find && ((L->base+3 < L->top && tvistruecond(L->base+3)) || |
528 | strpbrk(p, SPECIALS) == NULL)) { /* or no special characters? */ | 468 | !lj_str_haspattern(p))) { /* Search for fixed string. */ |
529 | /* do a plain search */ | 469 | const char *q = lj_str_find(strdata(s)+st, strdata(p), s->len-st, p->len); |
530 | const char *s2 = lmemfind(s+init, l1-(size_t)init, p, l2); | 470 | if (q) { |
531 | if (s2) { | 471 | setintV(L->top-2, (int32_t)(q-strdata(s)) + 1); |
532 | lua_pushinteger(L, s2-s+1); | 472 | setintV(L->top-1, (int32_t)(q-strdata(s)) + (int32_t)p->len); |
533 | lua_pushinteger(L, s2-s+(ptrdiff_t)l2); | ||
534 | return 2; | 473 | return 2; |
535 | } | 474 | } |
536 | } else { | 475 | } else { /* Search for pattern. */ |
537 | MatchState ms; | 476 | MatchState ms; |
538 | int anchor = (*p == '^') ? (p++, 1) : 0; | 477 | const char *pstr = strdata(p); |
539 | const char *s1=s+init; | 478 | const char *sstr = strdata(s) + st; |
479 | int anchor = 0; | ||
480 | if (*pstr == '^') { pstr++; anchor = 1; } | ||
540 | ms.L = L; | 481 | ms.L = L; |
541 | ms.src_init = s; | 482 | ms.src_init = strdata(s); |
542 | ms.src_end = s+l1; | 483 | ms.src_end = strdata(s) + s->len; |
543 | do { | 484 | do { /* Loop through string and try to match the pattern. */ |
544 | const char *res; | 485 | const char *q; |
545 | ms.level = ms.depth = 0; | 486 | ms.level = ms.depth = 0; |
546 | if ((res=match(&ms, s1, p)) != NULL) { | 487 | q = match(&ms, sstr, pstr); |
488 | if (q) { | ||
547 | if (find) { | 489 | if (find) { |
548 | lua_pushinteger(L, s1-s+1); /* start */ | 490 | setintV(L->top++, (int32_t)(sstr-(strdata(s)-1))); |
549 | lua_pushinteger(L, res-s); /* end */ | 491 | setintV(L->top++, (int32_t)(q-strdata(s))); |
550 | return push_captures(&ms, NULL, 0) + 2; | 492 | return push_captures(&ms, NULL, NULL) + 2; |
551 | } else { | 493 | } else { |
552 | return push_captures(&ms, s1, res); | 494 | return push_captures(&ms, sstr, q); |
553 | } | 495 | } |
554 | } | 496 | } |
555 | } while (s1++ < ms.src_end && !anchor); | 497 | } while (sstr++ < ms.src_end && !anchor); |
556 | } | 498 | } |
557 | lua_pushnil(L); /* not found */ | 499 | setnilV(L->top-1); /* Not found. */ |
558 | return 1; | 500 | return 1; |
559 | } | 501 | } |
560 | 502 | ||
561 | LJLIB_CF(string_find) | 503 | LJLIB_CF(string_find) LJLIB_REC(.) |
562 | { | 504 | { |
563 | return str_find_aux(L, 1); | 505 | return str_find_aux(L, 1); |
564 | } | 506 | } |
@@ -698,222 +640,91 @@ LJLIB_CF(string_gsub) | |||
698 | 640 | ||
699 | /* ------------------------------------------------------------------------ */ | 641 | /* ------------------------------------------------------------------------ */ |
700 | 642 | ||
701 | /* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */ | 643 | /* Emulate tostring() inline. */ |
702 | #define MAX_FMTITEM 512 | 644 | static GCstr *string_fmt_tostring(lua_State *L, int arg, int retry) |
703 | /* valid flags in a format specification */ | ||
704 | #define FMT_FLAGS "-+ #0" | ||
705 | /* | ||
706 | ** maximum size of each format specification (such as '%-099.99d') | ||
707 | ** (+10 accounts for %99.99x plus margin of error) | ||
708 | */ | ||
709 | #define MAX_FMTSPEC (sizeof(FMT_FLAGS) + sizeof(LUA_INTFRMLEN) + 10) | ||
710 | |||
711 | static void addquoted(lua_State *L, luaL_Buffer *b, int arg) | ||
712 | { | ||
713 | GCstr *str = lj_lib_checkstr(L, arg); | ||
714 | int32_t len = (int32_t)str->len; | ||
715 | const char *s = strdata(str); | ||
716 | luaL_addchar(b, '"'); | ||
717 | while (len--) { | ||
718 | uint32_t c = uchar(*s); | ||
719 | if (c == '"' || c == '\\' || c == '\n') { | ||
720 | luaL_addchar(b, '\\'); | ||
721 | } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */ | ||
722 | uint32_t d; | ||
723 | luaL_addchar(b, '\\'); | ||
724 | if (c >= 100 || lj_char_isdigit(uchar(s[1]))) { | ||
725 | luaL_addchar(b, '0'+(c >= 100)); if (c >= 100) c -= 100; | ||
726 | goto tens; | ||
727 | } else if (c >= 10) { | ||
728 | tens: | ||
729 | d = (c * 205) >> 11; c -= d * 10; luaL_addchar(b, '0'+d); | ||
730 | } | ||
731 | c += '0'; | ||
732 | } | ||
733 | luaL_addchar(b, c); | ||
734 | s++; | ||
735 | } | ||
736 | luaL_addchar(b, '"'); | ||
737 | } | ||
738 | |||
739 | static const char *scanformat(lua_State *L, const char *strfrmt, char *form) | ||
740 | { | ||
741 | const char *p = strfrmt; | ||
742 | while (*p != '\0' && strchr(FMT_FLAGS, *p) != NULL) p++; /* skip flags */ | ||
743 | if ((size_t)(p - strfrmt) >= sizeof(FMT_FLAGS)) | ||
744 | lj_err_caller(L, LJ_ERR_STRFMTR); | ||
745 | if (lj_char_isdigit(uchar(*p))) p++; /* skip width */ | ||
746 | if (lj_char_isdigit(uchar(*p))) p++; /* (2 digits at most) */ | ||
747 | if (*p == '.') { | ||
748 | p++; | ||
749 | if (lj_char_isdigit(uchar(*p))) p++; /* skip precision */ | ||
750 | if (lj_char_isdigit(uchar(*p))) p++; /* (2 digits at most) */ | ||
751 | } | ||
752 | if (lj_char_isdigit(uchar(*p))) | ||
753 | lj_err_caller(L, LJ_ERR_STRFMTW); | ||
754 | *(form++) = '%'; | ||
755 | strncpy(form, strfrmt, (size_t)(p - strfrmt + 1)); | ||
756 | form += p - strfrmt + 1; | ||
757 | *form = '\0'; | ||
758 | return p; | ||
759 | } | ||
760 | |||
761 | static void addintlen(char *form) | ||
762 | { | ||
763 | size_t l = strlen(form); | ||
764 | char spec = form[l - 1]; | ||
765 | strcpy(form + l - 1, LUA_INTFRMLEN); | ||
766 | form[l + sizeof(LUA_INTFRMLEN) - 2] = spec; | ||
767 | form[l + sizeof(LUA_INTFRMLEN) - 1] = '\0'; | ||
768 | } | ||
769 | |||
770 | static unsigned LUA_INTFRM_T num2intfrm(lua_State *L, int arg) | ||
771 | { | ||
772 | if (sizeof(LUA_INTFRM_T) == 4) { | ||
773 | return (LUA_INTFRM_T)lj_lib_checkbit(L, arg); | ||
774 | } else { | ||
775 | cTValue *o; | ||
776 | lj_lib_checknumber(L, arg); | ||
777 | o = L->base+arg-1; | ||
778 | if (tvisint(o)) | ||
779 | return (LUA_INTFRM_T)intV(o); | ||
780 | else | ||
781 | return (LUA_INTFRM_T)numV(o); | ||
782 | } | ||
783 | } | ||
784 | |||
785 | static unsigned LUA_INTFRM_T num2uintfrm(lua_State *L, int arg) | ||
786 | { | ||
787 | if (sizeof(LUA_INTFRM_T) == 4) { | ||
788 | return (unsigned LUA_INTFRM_T)lj_lib_checkbit(L, arg); | ||
789 | } else { | ||
790 | cTValue *o; | ||
791 | lj_lib_checknumber(L, arg); | ||
792 | o = L->base+arg-1; | ||
793 | if (tvisint(o)) | ||
794 | return (unsigned LUA_INTFRM_T)intV(o); | ||
795 | else if ((int32_t)o->u32.hi < 0) | ||
796 | return (unsigned LUA_INTFRM_T)(LUA_INTFRM_T)numV(o); | ||
797 | else | ||
798 | return (unsigned LUA_INTFRM_T)numV(o); | ||
799 | } | ||
800 | } | ||
801 | |||
802 | static GCstr *meta_tostring(lua_State *L, int arg) | ||
803 | { | 645 | { |
804 | TValue *o = L->base+arg-1; | 646 | TValue *o = L->base+arg-1; |
805 | cTValue *mo; | 647 | cTValue *mo; |
806 | lua_assert(o < L->top); /* Caller already checks for existence. */ | 648 | lua_assert(o < L->top); /* Caller already checks for existence. */ |
807 | if (LJ_LIKELY(tvisstr(o))) | 649 | if (LJ_LIKELY(tvisstr(o))) |
808 | return strV(o); | 650 | return strV(o); |
809 | if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { | 651 | if (retry != 2 && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { |
810 | copyTV(L, L->top++, mo); | 652 | copyTV(L, L->top++, mo); |
811 | copyTV(L, L->top++, o); | 653 | copyTV(L, L->top++, o); |
812 | lua_call(L, 1, 1); | 654 | lua_call(L, 1, 1); |
813 | L->top--; | 655 | copyTV(L, L->base+arg-1, --L->top); |
814 | if (tvisstr(L->top)) | 656 | return NULL; /* Buffer may be overwritten, retry. */ |
815 | return strV(L->top); | ||
816 | o = L->base+arg-1; | ||
817 | copyTV(L, o, L->top); | ||
818 | } | ||
819 | if (tvisnumber(o)) { | ||
820 | return lj_str_fromnumber(L, o); | ||
821 | } else if (tvisnil(o)) { | ||
822 | return lj_str_newlit(L, "nil"); | ||
823 | } else if (tvisfalse(o)) { | ||
824 | return lj_str_newlit(L, "false"); | ||
825 | } else if (tvistrue(o)) { | ||
826 | return lj_str_newlit(L, "true"); | ||
827 | } else { | ||
828 | if (tvisfunc(o) && isffunc(funcV(o))) | ||
829 | lj_str_pushf(L, "function: builtin#%d", funcV(o)->c.ffid); | ||
830 | else | ||
831 | lj_str_pushf(L, "%s: %p", lj_typename(o), lua_topointer(L, arg)); | ||
832 | L->top--; | ||
833 | return strV(L->top); | ||
834 | } | 657 | } |
835 | } | 658 | return lj_strfmt_obj(L, o); |
836 | 659 | } | |
837 | LJLIB_CF(string_format) | 660 | |
838 | { | 661 | LJLIB_CF(string_format) LJLIB_REC(.) |
839 | int arg = 1, top = (int)(L->top - L->base); | 662 | { |
840 | GCstr *fmt = lj_lib_checkstr(L, arg); | 663 | int arg, top = (int)(L->top - L->base); |
841 | const char *strfrmt = strdata(fmt); | 664 | GCstr *fmt; |
842 | const char *strfrmt_end = strfrmt + fmt->len; | 665 | SBuf *sb; |
843 | luaL_Buffer b; | 666 | FormatState fs; |
844 | luaL_buffinit(L, &b); | 667 | SFormat sf; |
845 | while (strfrmt < strfrmt_end) { | 668 | int retry = 0; |
846 | if (*strfrmt != L_ESC) { | 669 | again: |
847 | luaL_addchar(&b, *strfrmt++); | 670 | arg = 1; |
848 | } else if (*++strfrmt == L_ESC) { | 671 | sb = lj_buf_tmp_(L); |
849 | luaL_addchar(&b, *strfrmt++); /* %% */ | 672 | fmt = lj_lib_checkstr(L, arg); |
850 | } else { /* format item */ | 673 | lj_strfmt_init(&fs, strdata(fmt), fmt->len); |
851 | char form[MAX_FMTSPEC]; /* to store the format (`%...') */ | 674 | while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) { |
852 | char buff[MAX_FMTITEM]; /* to store the formatted item */ | 675 | if (sf == STRFMT_LIT) { |
853 | int n = 0; | 676 | lj_buf_putmem(sb, fs.str, fs.len); |
677 | } else if (sf == STRFMT_ERR) { | ||
678 | lj_err_callerv(L, LJ_ERR_STRFMT, strdata(lj_str_new(L, fs.str, fs.len))); | ||
679 | } else { | ||
854 | if (++arg > top) | 680 | if (++arg > top) |
855 | luaL_argerror(L, arg, lj_obj_typename[0]); | 681 | luaL_argerror(L, arg, lj_obj_typename[0]); |
856 | strfrmt = scanformat(L, strfrmt, form); | 682 | switch (STRFMT_TYPE(sf)) { |
857 | switch (*strfrmt++) { | 683 | case STRFMT_INT: |
858 | case 'c': | 684 | if (tvisint(L->base+arg-1)) { |
859 | n = sprintf(buff, form, lj_lib_checkint(L, arg)); | 685 | int32_t k = intV(L->base+arg-1); |
686 | if (sf == STRFMT_INT) | ||
687 | lj_strfmt_putint(sb, k); /* Shortcut for plain %d. */ | ||
688 | else | ||
689 | lj_strfmt_putfxint(sb, sf, k); | ||
690 | } else { | ||
691 | lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg)); | ||
692 | } | ||
860 | break; | 693 | break; |
861 | case 'd': case 'i': | 694 | case STRFMT_UINT: |
862 | addintlen(form); | 695 | if (tvisint(L->base+arg-1)) |
863 | n = sprintf(buff, form, num2intfrm(L, arg)); | 696 | lj_strfmt_putfxint(sb, sf, intV(L->base+arg-1)); |
697 | else | ||
698 | lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg)); | ||
864 | break; | 699 | break; |
865 | case 'o': case 'u': case 'x': case 'X': | 700 | case STRFMT_NUM: |
866 | addintlen(form); | 701 | lj_strfmt_putfnum(sb, sf, lj_lib_checknum(L, arg)); |
867 | n = sprintf(buff, form, num2uintfrm(L, arg)); | ||
868 | break; | 702 | break; |
869 | case 'e': case 'E': case 'f': case 'g': case 'G': case 'a': case 'A': { | 703 | case STRFMT_STR: { |
870 | TValue tv; | 704 | GCstr *str = string_fmt_tostring(L, arg, retry); |
871 | tv.n = lj_lib_checknum(L, arg); | 705 | if (str == NULL) |
872 | if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) { | 706 | retry = 1; |
873 | /* Canonicalize output of non-finite values. */ | 707 | else if ((sf & STRFMT_T_QUOTED)) |
874 | char *p, nbuf[LJ_STR_NUMBUF]; | 708 | lj_strfmt_putquoted(sb, str); /* No formatting. */ |
875 | size_t len = lj_str_bufnum(nbuf, &tv); | 709 | else |
876 | if (strfrmt[-1] < 'a') { | 710 | lj_strfmt_putfstr(sb, sf, str); |
877 | nbuf[len-3] = nbuf[len-3] - 0x20; | ||
878 | nbuf[len-2] = nbuf[len-2] - 0x20; | ||
879 | nbuf[len-1] = nbuf[len-1] - 0x20; | ||
880 | } | ||
881 | nbuf[len] = '\0'; | ||
882 | for (p = form; *p < 'A' && *p != '.'; p++) ; | ||
883 | *p++ = 's'; *p = '\0'; | ||
884 | n = sprintf(buff, form, nbuf); | ||
885 | break; | ||
886 | } | ||
887 | n = sprintf(buff, form, (double)tv.n); | ||
888 | break; | 711 | break; |
889 | } | 712 | } |
890 | case 'q': | 713 | case STRFMT_CHAR: |
891 | addquoted(L, &b, arg); | 714 | lj_strfmt_putfchar(sb, sf, lj_lib_checkint(L, arg)); |
892 | continue; | 715 | break; |
893 | case 'p': | 716 | case STRFMT_PTR: /* No formatting. */ |
894 | lj_str_pushf(L, "%p", lua_topointer(L, arg)); | 717 | lj_strfmt_putptr(sb, lj_obj_ptr(L->base+arg-1)); |
895 | luaL_addvalue(&b); | ||
896 | continue; | ||
897 | case 's': { | ||
898 | GCstr *str = meta_tostring(L, arg); | ||
899 | if (!strchr(form, '.') && str->len >= 100) { | ||
900 | /* no precision and string is too long to be formatted; | ||
901 | keep original string */ | ||
902 | setstrV(L, L->top++, str); | ||
903 | luaL_addvalue(&b); | ||
904 | continue; | ||
905 | } | ||
906 | n = sprintf(buff, form, strdata(str)); | ||
907 | break; | 718 | break; |
908 | } | ||
909 | default: | 719 | default: |
910 | lj_err_callerv(L, LJ_ERR_STRFMTO, *(strfrmt -1)); | 720 | lua_assert(0); |
911 | break; | 721 | break; |
912 | } | 722 | } |
913 | luaL_addlstring(&b, buff, n); | ||
914 | } | 723 | } |
915 | } | 724 | } |
916 | luaL_pushresult(&b); | 725 | if (retry++ == 1) goto again; |
726 | setstrV(L, L->top-1, lj_buf_str(L, sb)); | ||
727 | lj_gc_check(L); | ||
917 | return 1; | 728 | return 1; |
918 | } | 729 | } |
919 | 730 | ||
@@ -926,10 +737,6 @@ LUALIB_API int luaopen_string(lua_State *L) | |||
926 | GCtab *mt; | 737 | GCtab *mt; |
927 | global_State *g; | 738 | global_State *g; |
928 | LJ_LIB_REG(L, LUA_STRLIBNAME, string); | 739 | LJ_LIB_REG(L, LUA_STRLIBNAME, string); |
929 | #if defined(LUA_COMPAT_GFIND) && !LJ_52 | ||
930 | lua_getfield(L, -1, "gmatch"); | ||
931 | lua_setfield(L, -2, "gfind"); | ||
932 | #endif | ||
933 | mt = lj_tab_new(L, 0, 1); | 740 | mt = lj_tab_new(L, 0, 1); |
934 | /* NOBARRIER: basemt is a GC root. */ | 741 | /* NOBARRIER: basemt is a GC root. */ |
935 | g = G(L); | 742 | g = G(L); |
diff --git a/src/lib_table.c b/src/lib_table.c index 9842513b..4e612146 100644 --- a/src/lib_table.c +++ b/src/lib_table.c | |||
@@ -16,57 +16,43 @@ | |||
16 | #include "lj_obj.h" | 16 | #include "lj_obj.h" |
17 | #include "lj_gc.h" | 17 | #include "lj_gc.h" |
18 | #include "lj_err.h" | 18 | #include "lj_err.h" |
19 | #include "lj_buf.h" | ||
19 | #include "lj_tab.h" | 20 | #include "lj_tab.h" |
21 | #include "lj_ff.h" | ||
20 | #include "lj_lib.h" | 22 | #include "lj_lib.h" |
21 | 23 | ||
22 | /* ------------------------------------------------------------------------ */ | 24 | /* ------------------------------------------------------------------------ */ |
23 | 25 | ||
24 | #define LJLIB_MODULE_table | 26 | #define LJLIB_MODULE_table |
25 | 27 | ||
26 | LJLIB_CF(table_foreachi) | 28 | LJLIB_LUA(table_foreachi) /* |
27 | { | 29 | function(t, f) |
28 | GCtab *t = lj_lib_checktab(L, 1); | 30 | CHECK_tab(t) |
29 | GCfunc *func = lj_lib_checkfunc(L, 2); | 31 | CHECK_func(f) |
30 | MSize i, n = lj_tab_len(t); | 32 | for i=1,#t do |
31 | for (i = 1; i <= n; i++) { | 33 | local r = f(i, t[i]) |
32 | cTValue *val; | 34 | if r ~= nil then return r end |
33 | setfuncV(L, L->top, func); | 35 | end |
34 | setintV(L->top+1, i); | 36 | end |
35 | val = lj_tab_getint(t, (int32_t)i); | 37 | */ |
36 | if (val) { copyTV(L, L->top+2, val); } else { setnilV(L->top+2); } | ||
37 | L->top += 3; | ||
38 | lua_call(L, 2, 1); | ||
39 | if (!tvisnil(L->top-1)) | ||
40 | return 1; | ||
41 | L->top--; | ||
42 | } | ||
43 | return 0; | ||
44 | } | ||
45 | 38 | ||
46 | LJLIB_CF(table_foreach) | 39 | LJLIB_LUA(table_foreach) /* |
47 | { | 40 | function(t, f) |
48 | GCtab *t = lj_lib_checktab(L, 1); | 41 | CHECK_tab(t) |
49 | GCfunc *func = lj_lib_checkfunc(L, 2); | 42 | CHECK_func(f) |
50 | L->top = L->base+3; | 43 | for k, v in PAIRS(t) do |
51 | setnilV(L->top-1); | 44 | local r = f(k, v) |
52 | while (lj_tab_next(L, t, L->top-1)) { | 45 | if r ~= nil then return r end |
53 | copyTV(L, L->top+2, L->top); | 46 | end |
54 | copyTV(L, L->top+1, L->top-1); | 47 | end |
55 | setfuncV(L, L->top, func); | 48 | */ |
56 | L->top += 3; | ||
57 | lua_call(L, 2, 1); | ||
58 | if (!tvisnil(L->top-1)) | ||
59 | return 1; | ||
60 | L->top--; | ||
61 | } | ||
62 | return 0; | ||
63 | } | ||
64 | 49 | ||
65 | LJLIB_ASM(table_getn) LJLIB_REC(.) | 50 | LJLIB_LUA(table_getn) /* |
66 | { | 51 | function(t) |
67 | lj_lib_checktab(L, 1); | 52 | CHECK_tab(t) |
68 | return FFH_UNREACHABLE; | 53 | return #t |
69 | } | 54 | end |
55 | */ | ||
70 | 56 | ||
71 | LJLIB_CF(table_maxn) | 57 | LJLIB_CF(table_maxn) |
72 | { | 58 | { |
@@ -119,52 +105,67 @@ LJLIB_CF(table_insert) LJLIB_REC(.) | |||
119 | return 0; | 105 | return 0; |
120 | } | 106 | } |
121 | 107 | ||
122 | LJLIB_CF(table_remove) LJLIB_REC(.) | 108 | LJLIB_LUA(table_remove) /* |
123 | { | 109 | function(t, pos) |
124 | GCtab *t = lj_lib_checktab(L, 1); | 110 | CHECK_tab(t) |
125 | int32_t e = (int32_t)lj_tab_len(t); | 111 | local len = #t |
126 | int32_t pos = lj_lib_optint(L, 2, e); | 112 | if pos == nil then |
127 | if (!(1 <= pos && pos <= e)) /* Nothing to remove? */ | 113 | if len ~= 0 then |
128 | return 0; | 114 | local old = t[len] |
129 | lua_rawgeti(L, 1, pos); /* Get previous value. */ | 115 | t[len] = nil |
130 | /* NOBARRIER: This just moves existing elements around. */ | 116 | return old |
131 | for (; pos < e; pos++) { | 117 | end |
132 | cTValue *src = lj_tab_getint(t, pos+1); | 118 | else |
133 | TValue *dst = lj_tab_setint(L, t, pos); | 119 | CHECK_int(pos) |
134 | if (src) { | 120 | if pos >= 1 and pos <= len then |
135 | copyTV(L, dst, src); | 121 | local old = t[pos] |
136 | } else { | 122 | for i=pos+1,len do |
137 | setnilV(dst); | 123 | t[i-1] = t[i] |
138 | } | 124 | end |
139 | } | 125 | t[len] = nil |
140 | setnilV(lj_tab_setint(L, t, e)); /* Remove (last) value. */ | 126 | return old |
141 | return 1; /* Return previous value. */ | 127 | end |
142 | } | 128 | end |
129 | end | ||
130 | */ | ||
131 | |||
132 | LJLIB_LUA(table_move) /* | ||
133 | function(a1, f, e, t, a2) | ||
134 | CHECK_tab(a1) | ||
135 | CHECK_int(f) | ||
136 | CHECK_int(e) | ||
137 | CHECK_int(t) | ||
138 | if a2 == nil then a2 = a1 end | ||
139 | CHECK_tab(a2) | ||
140 | if e >= f then | ||
141 | local d = t - f | ||
142 | if t > e or t <= f or a2 ~= a1 then | ||
143 | for i=f,e do a2[i+d] = a1[i] end | ||
144 | else | ||
145 | for i=e,f,-1 do a2[i+d] = a1[i] end | ||
146 | end | ||
147 | end | ||
148 | return a2 | ||
149 | end | ||
150 | */ | ||
143 | 151 | ||
144 | LJLIB_CF(table_concat) | 152 | LJLIB_CF(table_concat) LJLIB_REC(.) |
145 | { | 153 | { |
146 | luaL_Buffer b; | ||
147 | GCtab *t = lj_lib_checktab(L, 1); | 154 | GCtab *t = lj_lib_checktab(L, 1); |
148 | GCstr *sep = lj_lib_optstr(L, 2); | 155 | GCstr *sep = lj_lib_optstr(L, 2); |
149 | MSize seplen = sep ? sep->len : 0; | ||
150 | int32_t i = lj_lib_optint(L, 3, 1); | 156 | int32_t i = lj_lib_optint(L, 3, 1); |
151 | int32_t e = (L->base+3 < L->top && !tvisnil(L->base+3)) ? | 157 | int32_t e = (L->base+3 < L->top && !tvisnil(L->base+3)) ? |
152 | lj_lib_checkint(L, 4) : (int32_t)lj_tab_len(t); | 158 | lj_lib_checkint(L, 4) : (int32_t)lj_tab_len(t); |
153 | luaL_buffinit(L, &b); | 159 | SBuf *sb = lj_buf_tmp_(L); |
154 | if (i <= e) { | 160 | SBuf *sbx = lj_buf_puttab(sb, t, sep, i, e); |
155 | for (;;) { | 161 | if (LJ_UNLIKELY(!sbx)) { /* Error: bad element type. */ |
156 | cTValue *o; | 162 | int32_t idx = (int32_t)(intptr_t)sbufP(sb); |
157 | lua_rawgeti(L, 1, i); | 163 | cTValue *o = lj_tab_getint(t, idx); |
158 | o = L->top-1; | 164 | lj_err_callerv(L, LJ_ERR_TABCAT, |
159 | if (!(tvisstr(o) || tvisnumber(o))) | 165 | lj_obj_itypename[o ? itypemap(o) : ~LJ_TNIL], idx); |
160 | lj_err_callerv(L, LJ_ERR_TABCAT, lj_typename(o), i); | ||
161 | luaL_addvalue(&b); | ||
162 | if (i++ == e) break; | ||
163 | if (seplen) | ||
164 | luaL_addlstring(&b, strdata(sep), seplen); | ||
165 | } | ||
166 | } | 166 | } |
167 | luaL_pushresult(&b); | 167 | setstrV(L, L->top-1, lj_buf_str(L, sbx)); |
168 | lj_gc_check(L); | ||
168 | return 1; | 169 | return 1; |
169 | } | 170 | } |
170 | 171 | ||
@@ -284,6 +285,30 @@ LJLIB_CF(table_pack) | |||
284 | } | 285 | } |
285 | #endif | 286 | #endif |
286 | 287 | ||
288 | LJLIB_NOREG LJLIB_CF(table_new) LJLIB_REC(.) | ||
289 | { | ||
290 | int32_t a = lj_lib_checkint(L, 1); | ||
291 | int32_t h = lj_lib_checkint(L, 2); | ||
292 | lua_createtable(L, a, h); | ||
293 | return 1; | ||
294 | } | ||
295 | |||
296 | LJLIB_NOREG LJLIB_CF(table_clear) LJLIB_REC(.) | ||
297 | { | ||
298 | lj_tab_clear(lj_lib_checktab(L, 1)); | ||
299 | return 0; | ||
300 | } | ||
301 | |||
302 | static int luaopen_table_new(lua_State *L) | ||
303 | { | ||
304 | return lj_lib_postreg(L, lj_cf_table_new, FF_table_new, "new"); | ||
305 | } | ||
306 | |||
307 | static int luaopen_table_clear(lua_State *L) | ||
308 | { | ||
309 | return lj_lib_postreg(L, lj_cf_table_clear, FF_table_clear, "clear"); | ||
310 | } | ||
311 | |||
287 | /* ------------------------------------------------------------------------ */ | 312 | /* ------------------------------------------------------------------------ */ |
288 | 313 | ||
289 | #include "lj_libdef.h" | 314 | #include "lj_libdef.h" |
@@ -295,6 +320,8 @@ LUALIB_API int luaopen_table(lua_State *L) | |||
295 | lua_getglobal(L, "unpack"); | 320 | lua_getglobal(L, "unpack"); |
296 | lua_setfield(L, -2, "unpack"); | 321 | lua_setfield(L, -2, "unpack"); |
297 | #endif | 322 | #endif |
323 | lj_lib_prereg(L, LUA_TABLIBNAME ".new", luaopen_table_new, tabV(L->top-1)); | ||
324 | lj_lib_prereg(L, LUA_TABLIBNAME ".clear", luaopen_table_clear, tabV(L->top-1)); | ||
298 | return 1; | 325 | return 1; |
299 | } | 326 | } |
300 | 327 | ||
diff --git a/src/lj_alloc.c b/src/lj_alloc.c index dc64dca9..33a2eb8f 100644 --- a/src/lj_alloc.c +++ b/src/lj_alloc.c | |||
@@ -72,13 +72,56 @@ | |||
72 | 72 | ||
73 | #define IS_DIRECT_BIT (SIZE_T_ONE) | 73 | #define IS_DIRECT_BIT (SIZE_T_ONE) |
74 | 74 | ||
75 | |||
76 | /* Determine system-specific block allocation method. */ | ||
75 | #if LJ_TARGET_WINDOWS | 77 | #if LJ_TARGET_WINDOWS |
76 | 78 | ||
77 | #define WIN32_LEAN_AND_MEAN | 79 | #define WIN32_LEAN_AND_MEAN |
78 | #include <windows.h> | 80 | #include <windows.h> |
79 | 81 | ||
82 | #define LJ_ALLOC_VIRTUALALLOC 1 | ||
83 | |||
84 | #if LJ_64 && !LJ_GC64 | ||
85 | #define LJ_ALLOC_NTAVM 1 | ||
86 | #endif | ||
87 | |||
88 | #else | ||
89 | |||
90 | #include <errno.h> | ||
91 | /* If this include fails, then rebuild with: -DLUAJIT_USE_SYSMALLOC */ | ||
92 | #include <sys/mman.h> | ||
93 | |||
94 | #define LJ_ALLOC_MMAP 1 | ||
95 | |||
80 | #if LJ_64 | 96 | #if LJ_64 |
81 | 97 | ||
98 | #define LJ_ALLOC_MMAP_PROBE 1 | ||
99 | |||
100 | #if LJ_GC64 | ||
101 | #define LJ_ALLOC_MBITS 47 /* 128 TB in LJ_GC64 mode. */ | ||
102 | #elif LJ_TARGET_X64 && LJ_HASJIT | ||
103 | /* Due to limitations in the x64 compiler backend. */ | ||
104 | #define LJ_ALLOC_MBITS 31 /* 2 GB on x64 with !LJ_GC64. */ | ||
105 | #else | ||
106 | #define LJ_ALLOC_MBITS 32 /* 4 GB on other archs with !LJ_GC64. */ | ||
107 | #endif | ||
108 | |||
109 | #endif | ||
110 | |||
111 | #if LJ_64 && !LJ_GC64 && defined(MAP_32BIT) | ||
112 | #define LJ_ALLOC_MMAP32 1 | ||
113 | #endif | ||
114 | |||
115 | #if LJ_TARGET_LINUX | ||
116 | #define LJ_ALLOC_MREMAP 1 | ||
117 | #endif | ||
118 | |||
119 | #endif | ||
120 | |||
121 | |||
122 | #if LJ_ALLOC_VIRTUALALLOC | ||
123 | |||
124 | #if LJ_ALLOC_NTAVM | ||
82 | /* Undocumented, but hey, that's what we all love so much about Windows. */ | 125 | /* Undocumented, but hey, that's what we all love so much about Windows. */ |
83 | typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits, | 126 | typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits, |
84 | size_t *size, ULONG alloctype, ULONG prot); | 127 | size_t *size, ULONG alloctype, ULONG prot); |
@@ -89,14 +132,15 @@ static PNTAVM ntavm; | |||
89 | */ | 132 | */ |
90 | #define NTAVM_ZEROBITS 1 | 133 | #define NTAVM_ZEROBITS 1 |
91 | 134 | ||
92 | static void INIT_MMAP(void) | 135 | static void init_mmap(void) |
93 | { | 136 | { |
94 | ntavm = (PNTAVM)GetProcAddress(GetModuleHandleA("ntdll.dll"), | 137 | ntavm = (PNTAVM)GetProcAddress(GetModuleHandleA("ntdll.dll"), |
95 | "NtAllocateVirtualMemory"); | 138 | "NtAllocateVirtualMemory"); |
96 | } | 139 | } |
140 | #define INIT_MMAP() init_mmap() | ||
97 | 141 | ||
98 | /* Win64 32 bit MMAP via NtAllocateVirtualMemory. */ | 142 | /* Win64 32 bit MMAP via NtAllocateVirtualMemory. */ |
99 | static LJ_AINLINE void *CALL_MMAP(size_t size) | 143 | static void *CALL_MMAP(size_t size) |
100 | { | 144 | { |
101 | DWORD olderr = GetLastError(); | 145 | DWORD olderr = GetLastError(); |
102 | void *ptr = NULL; | 146 | void *ptr = NULL; |
@@ -107,7 +151,7 @@ static LJ_AINLINE void *CALL_MMAP(size_t size) | |||
107 | } | 151 | } |
108 | 152 | ||
109 | /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ | 153 | /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ |
110 | static LJ_AINLINE void *DIRECT_MMAP(size_t size) | 154 | static void *DIRECT_MMAP(size_t size) |
111 | { | 155 | { |
112 | DWORD olderr = GetLastError(); | 156 | DWORD olderr = GetLastError(); |
113 | void *ptr = NULL; | 157 | void *ptr = NULL; |
@@ -119,23 +163,21 @@ static LJ_AINLINE void *DIRECT_MMAP(size_t size) | |||
119 | 163 | ||
120 | #else | 164 | #else |
121 | 165 | ||
122 | #define INIT_MMAP() ((void)0) | ||
123 | |||
124 | /* Win32 MMAP via VirtualAlloc */ | 166 | /* Win32 MMAP via VirtualAlloc */ |
125 | static LJ_AINLINE void *CALL_MMAP(size_t size) | 167 | static void *CALL_MMAP(size_t size) |
126 | { | 168 | { |
127 | DWORD olderr = GetLastError(); | 169 | DWORD olderr = GetLastError(); |
128 | void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); | 170 | void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); |
129 | SetLastError(olderr); | 171 | SetLastError(olderr); |
130 | return ptr ? ptr : MFAIL; | 172 | return ptr ? ptr : MFAIL; |
131 | } | 173 | } |
132 | 174 | ||
133 | /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ | 175 | /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ |
134 | static LJ_AINLINE void *DIRECT_MMAP(size_t size) | 176 | static void *DIRECT_MMAP(size_t size) |
135 | { | 177 | { |
136 | DWORD olderr = GetLastError(); | 178 | DWORD olderr = GetLastError(); |
137 | void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, | 179 | void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, |
138 | PAGE_READWRITE); | 180 | PAGE_READWRITE); |
139 | SetLastError(olderr); | 181 | SetLastError(olderr); |
140 | return ptr ? ptr : MFAIL; | 182 | return ptr ? ptr : MFAIL; |
141 | } | 183 | } |
@@ -143,7 +185,7 @@ static LJ_AINLINE void *DIRECT_MMAP(size_t size) | |||
143 | #endif | 185 | #endif |
144 | 186 | ||
145 | /* This function supports releasing coalesed segments */ | 187 | /* This function supports releasing coalesed segments */ |
146 | static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) | 188 | static int CALL_MUNMAP(void *ptr, size_t size) |
147 | { | 189 | { |
148 | DWORD olderr = GetLastError(); | 190 | DWORD olderr = GetLastError(); |
149 | MEMORY_BASIC_INFORMATION minfo; | 191 | MEMORY_BASIC_INFORMATION minfo; |
@@ -163,10 +205,7 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) | |||
163 | return 0; | 205 | return 0; |
164 | } | 206 | } |
165 | 207 | ||
166 | #else | 208 | #elif LJ_ALLOC_MMAP |
167 | |||
168 | #include <errno.h> | ||
169 | #include <sys/mman.h> | ||
170 | 209 | ||
171 | #define MMAP_PROT (PROT_READ|PROT_WRITE) | 210 | #define MMAP_PROT (PROT_READ|PROT_WRITE) |
172 | #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) | 211 | #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) |
@@ -174,105 +213,152 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) | |||
174 | #endif | 213 | #endif |
175 | #define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) | 214 | #define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) |
176 | 215 | ||
177 | #if LJ_64 | 216 | #if LJ_ALLOC_MMAP_PROBE |
178 | /* 64 bit mode needs special support for allocating memory in the lower 2GB. */ | ||
179 | |||
180 | #if defined(MAP_32BIT) | ||
181 | 217 | ||
182 | #if defined(__sun__) | 218 | #ifdef MAP_TRYFIXED |
183 | #define MMAP_REGION_START ((uintptr_t)0x1000) | 219 | #define MMAP_FLAGS_PROBE (MMAP_FLAGS|MAP_TRYFIXED) |
184 | #else | 220 | #else |
185 | /* Actually this only gives us max. 1GB in current Linux kernels. */ | 221 | #define MMAP_FLAGS_PROBE MMAP_FLAGS |
186 | #define MMAP_REGION_START ((uintptr_t)0) | ||
187 | #endif | 222 | #endif |
188 | 223 | ||
189 | static LJ_AINLINE void *CALL_MMAP(size_t size) | 224 | #define LJ_ALLOC_MMAP_PROBE_MAX 30 |
190 | { | 225 | #define LJ_ALLOC_MMAP_PROBE_LINEAR 5 |
191 | int olderr = errno; | ||
192 | void *ptr = mmap((void *)MMAP_REGION_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0); | ||
193 | errno = olderr; | ||
194 | return ptr; | ||
195 | } | ||
196 | 226 | ||
197 | #elif LJ_TARGET_OSX || LJ_TARGET_PS4 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__sun__) || LJ_TARGET_CYGWIN | 227 | #define LJ_ALLOC_MMAP_PROBE_LOWER ((uintptr_t)0x4000) |
198 | 228 | ||
199 | /* OSX and FreeBSD mmap() use a naive first-fit linear search. | 229 | /* No point in a giant ifdef mess. Just try to open /dev/urandom. |
200 | ** That's perfect for us. Except that -pagezero_size must be set for OSX, | 230 | ** It doesn't really matter if this fails, since we get some ASLR bits from |
201 | ** otherwise the lower 4GB are blocked. And the 32GB RLIMIT_DATA needs | 231 | ** every unsuitable allocation, too. And we prefer linear allocation, anyway. |
202 | ** to be reduced to 250MB on FreeBSD. | ||
203 | */ | 232 | */ |
204 | #if LJ_TARGET_OSX || defined(__DragonFly__) | 233 | #include <fcntl.h> |
205 | #define MMAP_REGION_START ((uintptr_t)0x10000) | 234 | #include <unistd.h> |
206 | #elif LJ_TARGET_PS4 | ||
207 | #define MMAP_REGION_START ((uintptr_t)0x4000) | ||
208 | #else | ||
209 | #define MMAP_REGION_START ((uintptr_t)0x10000000) | ||
210 | #endif | ||
211 | #define MMAP_REGION_END ((uintptr_t)0x80000000) | ||
212 | 235 | ||
213 | #if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 | 236 | static uintptr_t mmap_probe_seed(void) |
214 | #include <sys/resource.h> | 237 | { |
215 | #endif | 238 | uintptr_t val; |
239 | int fd = open("/dev/urandom", O_RDONLY); | ||
240 | if (fd != -1) { | ||
241 | int ok = ((size_t)read(fd, &val, sizeof(val)) == sizeof(val)); | ||
242 | (void)close(fd); | ||
243 | if (ok) return val; | ||
244 | } | ||
245 | return 1; /* Punt. */ | ||
246 | } | ||
216 | 247 | ||
217 | static LJ_AINLINE void *CALL_MMAP(size_t size) | 248 | static void *mmap_probe(size_t size) |
218 | { | 249 | { |
219 | int olderr = errno; | ||
220 | /* Hint for next allocation. Doesn't need to be thread-safe. */ | 250 | /* Hint for next allocation. Doesn't need to be thread-safe. */ |
221 | static uintptr_t alloc_hint = MMAP_REGION_START; | 251 | static uintptr_t hint_addr = 0; |
222 | int retry = 0; | 252 | static uintptr_t hint_prng = 0; |
223 | #if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 | 253 | int olderr = errno; |
224 | static int rlimit_modified = 0; | 254 | int retry; |
225 | if (LJ_UNLIKELY(rlimit_modified == 0)) { | 255 | for (retry = 0; retry < LJ_ALLOC_MMAP_PROBE_MAX; retry++) { |
226 | struct rlimit rlim; | 256 | void *p = mmap((void *)hint_addr, size, MMAP_PROT, MMAP_FLAGS_PROBE, -1, 0); |
227 | rlim.rlim_cur = rlim.rlim_max = MMAP_REGION_START; | 257 | uintptr_t addr = (uintptr_t)p; |
228 | setrlimit(RLIMIT_DATA, &rlim); /* Ignore result. May fail below. */ | 258 | if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >= LJ_ALLOC_MMAP_PROBE_LOWER && |
229 | rlimit_modified = 1; | 259 | ((addr + size) >> LJ_ALLOC_MBITS) == 0) { |
230 | } | 260 | /* We got a suitable address. Bump the hint address. */ |
231 | #endif | 261 | hint_addr = addr + size; |
232 | for (;;) { | ||
233 | void *p = mmap((void *)alloc_hint, size, MMAP_PROT, MMAP_FLAGS, -1, 0); | ||
234 | if ((uintptr_t)p >= MMAP_REGION_START && | ||
235 | (uintptr_t)p + size < MMAP_REGION_END) { | ||
236 | alloc_hint = (uintptr_t)p + size; | ||
237 | errno = olderr; | 262 | errno = olderr; |
238 | return p; | 263 | return p; |
239 | } | 264 | } |
240 | if (p != CMFAIL) munmap(p, size); | 265 | if (p != MFAIL) { |
241 | #if defined(__sun__) || defined(__DragonFly__) | 266 | munmap(p, size); |
242 | alloc_hint += 0x1000000; /* Need near-exhaustive linear scan. */ | 267 | } else if (errno == ENOMEM) { |
243 | if (alloc_hint + size < MMAP_REGION_END) continue; | 268 | return MFAIL; |
244 | #endif | 269 | } |
245 | if (retry) break; | 270 | if (hint_addr) { |
246 | retry = 1; | 271 | /* First, try linear probing. */ |
247 | alloc_hint = MMAP_REGION_START; | 272 | if (retry < LJ_ALLOC_MMAP_PROBE_LINEAR) { |
273 | hint_addr += 0x1000000; | ||
274 | if (((hint_addr + size) >> LJ_ALLOC_MBITS) != 0) | ||
275 | hint_addr = 0; | ||
276 | continue; | ||
277 | } else if (retry == LJ_ALLOC_MMAP_PROBE_LINEAR) { | ||
278 | /* Next, try a no-hint probe to get back an ASLR address. */ | ||
279 | hint_addr = 0; | ||
280 | continue; | ||
281 | } | ||
282 | } | ||
283 | /* Finally, try pseudo-random probing. */ | ||
284 | if (LJ_UNLIKELY(hint_prng == 0)) { | ||
285 | hint_prng = mmap_probe_seed(); | ||
286 | } | ||
287 | /* The unsuitable address we got has some ASLR PRNG bits. */ | ||
288 | hint_addr ^= addr & ~((uintptr_t)(LJ_PAGESIZE-1)); | ||
289 | do { /* The PRNG itself is very weak, but see above. */ | ||
290 | hint_prng = hint_prng * 1103515245 + 12345; | ||
291 | hint_addr ^= hint_prng * (uintptr_t)LJ_PAGESIZE; | ||
292 | hint_addr &= (((uintptr_t)1 << LJ_ALLOC_MBITS)-1); | ||
293 | } while (hint_addr < LJ_ALLOC_MMAP_PROBE_LOWER); | ||
248 | } | 294 | } |
249 | errno = olderr; | 295 | errno = olderr; |
250 | return CMFAIL; | 296 | return MFAIL; |
251 | } | 297 | } |
252 | 298 | ||
299 | #endif | ||
300 | |||
301 | #if LJ_ALLOC_MMAP32 | ||
302 | |||
303 | #if defined(__sun__) | ||
304 | #define LJ_ALLOC_MMAP32_START ((uintptr_t)0x1000) | ||
253 | #else | 305 | #else |
306 | #define LJ_ALLOC_MMAP32_START ((uintptr_t)0) | ||
307 | #endif | ||
254 | 308 | ||
255 | #error "NYI: need an equivalent of MAP_32BIT for this 64 bit OS" | 309 | static void *mmap_map32(size_t size) |
310 | { | ||
311 | #if LJ_ALLOC_MMAP_PROBE | ||
312 | static int fallback = 0; | ||
313 | if (fallback) | ||
314 | return mmap_probe(size); | ||
315 | #endif | ||
316 | { | ||
317 | int olderr = errno; | ||
318 | void *ptr = mmap((void *)LJ_ALLOC_MMAP32_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0); | ||
319 | errno = olderr; | ||
320 | /* This only allows 1GB on Linux. So fallback to probing to get 2GB. */ | ||
321 | #if LJ_ALLOC_MMAP_PROBE | ||
322 | if (ptr == MFAIL) { | ||
323 | fallback = 1; | ||
324 | return mmap_probe(size); | ||
325 | } | ||
326 | #endif | ||
327 | return ptr; | ||
328 | } | ||
329 | } | ||
256 | 330 | ||
257 | #endif | 331 | #endif |
258 | 332 | ||
333 | #if LJ_ALLOC_MMAP32 | ||
334 | #define CALL_MMAP(size) mmap_map32(size) | ||
335 | #elif LJ_ALLOC_MMAP_PROBE | ||
336 | #define CALL_MMAP(size) mmap_probe(size) | ||
259 | #else | 337 | #else |
260 | 338 | static void *CALL_MMAP(size_t size) | |
261 | /* 32 bit mode is easy. */ | ||
262 | static LJ_AINLINE void *CALL_MMAP(size_t size) | ||
263 | { | 339 | { |
264 | int olderr = errno; | 340 | int olderr = errno; |
265 | void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0); | 341 | void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0); |
266 | errno = olderr; | 342 | errno = olderr; |
267 | return ptr; | 343 | return ptr; |
268 | } | 344 | } |
269 | |||
270 | #endif | 345 | #endif |
271 | 346 | ||
272 | #define INIT_MMAP() ((void)0) | 347 | #if LJ_64 && !LJ_GC64 && ((defined(__FreeBSD__) && __FreeBSD__ < 10) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 |
273 | #define DIRECT_MMAP(s) CALL_MMAP(s) | 348 | |
349 | #include <sys/resource.h> | ||
350 | |||
351 | static void init_mmap(void) | ||
352 | { | ||
353 | struct rlimit rlim; | ||
354 | rlim.rlim_cur = rlim.rlim_max = 0x10000; | ||
355 | setrlimit(RLIMIT_DATA, &rlim); /* Ignore result. May fail later. */ | ||
356 | } | ||
357 | #define INIT_MMAP() init_mmap() | ||
274 | 358 | ||
275 | static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) | 359 | #endif |
360 | |||
361 | static int CALL_MUNMAP(void *ptr, size_t size) | ||
276 | { | 362 | { |
277 | int olderr = errno; | 363 | int olderr = errno; |
278 | int ret = munmap(ptr, size); | 364 | int ret = munmap(ptr, size); |
@@ -280,10 +366,9 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) | |||
280 | return ret; | 366 | return ret; |
281 | } | 367 | } |
282 | 368 | ||
283 | #if LJ_TARGET_LINUX | 369 | #if LJ_ALLOC_MREMAP |
284 | /* Need to define _GNU_SOURCE to get the mremap prototype. */ | 370 | /* Need to define _GNU_SOURCE to get the mremap prototype. */ |
285 | static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, | 371 | static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int flags) |
286 | int flags) | ||
287 | { | 372 | { |
288 | int olderr = errno; | 373 | int olderr = errno; |
289 | ptr = mremap(ptr, osz, nsz, flags); | 374 | ptr = mremap(ptr, osz, nsz, flags); |
@@ -294,7 +379,7 @@ static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, | |||
294 | #define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv)) | 379 | #define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv)) |
295 | #define CALL_MREMAP_NOMOVE 0 | 380 | #define CALL_MREMAP_NOMOVE 0 |
296 | #define CALL_MREMAP_MAYMOVE 1 | 381 | #define CALL_MREMAP_MAYMOVE 1 |
297 | #if LJ_64 | 382 | #if LJ_64 && !LJ_GC64 |
298 | #define CALL_MREMAP_MV CALL_MREMAP_NOMOVE | 383 | #define CALL_MREMAP_MV CALL_MREMAP_NOMOVE |
299 | #else | 384 | #else |
300 | #define CALL_MREMAP_MV CALL_MREMAP_MAYMOVE | 385 | #define CALL_MREMAP_MV CALL_MREMAP_MAYMOVE |
@@ -303,6 +388,15 @@ static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, | |||
303 | 388 | ||
304 | #endif | 389 | #endif |
305 | 390 | ||
391 | |||
392 | #ifndef INIT_MMAP | ||
393 | #define INIT_MMAP() ((void)0) | ||
394 | #endif | ||
395 | |||
396 | #ifndef DIRECT_MMAP | ||
397 | #define DIRECT_MMAP(s) CALL_MMAP(s) | ||
398 | #endif | ||
399 | |||
306 | #ifndef CALL_MREMAP | 400 | #ifndef CALL_MREMAP |
307 | #define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL) | 401 | #define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL) |
308 | #endif | 402 | #endif |
diff --git a/src/lj_api.c b/src/lj_api.c index e2d7e533..974b5643 100644 --- a/src/lj_api.c +++ b/src/lj_api.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include "lj_trace.h" | 24 | #include "lj_trace.h" |
25 | #include "lj_vm.h" | 25 | #include "lj_vm.h" |
26 | #include "lj_strscan.h" | 26 | #include "lj_strscan.h" |
27 | #include "lj_strfmt.h" | ||
27 | 28 | ||
28 | /* -- Common helper functions --------------------------------------------- */ | 29 | /* -- Common helper functions --------------------------------------------- */ |
29 | 30 | ||
@@ -111,6 +112,13 @@ LUA_API void lua_xmove(lua_State *from, lua_State *to, int n) | |||
111 | from->top = f; | 112 | from->top = f; |
112 | } | 113 | } |
113 | 114 | ||
115 | LUA_API const lua_Number *lua_version(lua_State *L) | ||
116 | { | ||
117 | static const lua_Number version = LUA_VERSION_NUM; | ||
118 | UNUSED(L); | ||
119 | return &version; | ||
120 | } | ||
121 | |||
114 | /* -- Stack manipulation -------------------------------------------------- */ | 122 | /* -- Stack manipulation -------------------------------------------------- */ |
115 | 123 | ||
116 | LUA_API int lua_gettop(lua_State *L) | 124 | LUA_API int lua_gettop(lua_State *L) |
@@ -151,30 +159,40 @@ LUA_API void lua_insert(lua_State *L, int idx) | |||
151 | copyTV(L, p, L->top); | 159 | copyTV(L, p, L->top); |
152 | } | 160 | } |
153 | 161 | ||
154 | LUA_API void lua_replace(lua_State *L, int idx) | 162 | static void copy_slot(lua_State *L, TValue *f, int idx) |
155 | { | 163 | { |
156 | api_checknelems(L, 1); | ||
157 | if (idx == LUA_GLOBALSINDEX) { | 164 | if (idx == LUA_GLOBALSINDEX) { |
158 | api_check(L, tvistab(L->top-1)); | 165 | api_check(L, tvistab(f)); |
159 | /* NOBARRIER: A thread (i.e. L) is never black. */ | 166 | /* NOBARRIER: A thread (i.e. L) is never black. */ |
160 | setgcref(L->env, obj2gco(tabV(L->top-1))); | 167 | setgcref(L->env, obj2gco(tabV(f))); |
161 | } else if (idx == LUA_ENVIRONINDEX) { | 168 | } else if (idx == LUA_ENVIRONINDEX) { |
162 | GCfunc *fn = curr_func(L); | 169 | GCfunc *fn = curr_func(L); |
163 | if (fn->c.gct != ~LJ_TFUNC) | 170 | if (fn->c.gct != ~LJ_TFUNC) |
164 | lj_err_msg(L, LJ_ERR_NOENV); | 171 | lj_err_msg(L, LJ_ERR_NOENV); |
165 | api_check(L, tvistab(L->top-1)); | 172 | api_check(L, tvistab(f)); |
166 | setgcref(fn->c.env, obj2gco(tabV(L->top-1))); | 173 | setgcref(fn->c.env, obj2gco(tabV(f))); |
167 | lj_gc_barrier(L, fn, L->top-1); | 174 | lj_gc_barrier(L, fn, f); |
168 | } else { | 175 | } else { |
169 | TValue *o = index2adr(L, idx); | 176 | TValue *o = index2adr(L, idx); |
170 | api_checkvalidindex(L, o); | 177 | api_checkvalidindex(L, o); |
171 | copyTV(L, o, L->top-1); | 178 | copyTV(L, o, f); |
172 | if (idx < LUA_GLOBALSINDEX) /* Need a barrier for upvalues. */ | 179 | if (idx < LUA_GLOBALSINDEX) /* Need a barrier for upvalues. */ |
173 | lj_gc_barrier(L, curr_func(L), L->top-1); | 180 | lj_gc_barrier(L, curr_func(L), f); |
174 | } | 181 | } |
182 | } | ||
183 | |||
184 | LUA_API void lua_replace(lua_State *L, int idx) | ||
185 | { | ||
186 | api_checknelems(L, 1); | ||
187 | copy_slot(L, L->top - 1, idx); | ||
175 | L->top--; | 188 | L->top--; |
176 | } | 189 | } |
177 | 190 | ||
191 | LUA_API void lua_copy(lua_State *L, int fromidx, int toidx) | ||
192 | { | ||
193 | copy_slot(L, index2adr(L, fromidx), toidx); | ||
194 | } | ||
195 | |||
178 | LUA_API void lua_pushvalue(lua_State *L, int idx) | 196 | LUA_API void lua_pushvalue(lua_State *L, int idx) |
179 | { | 197 | { |
180 | copyTV(L, L->top, index2adr(L, idx)); | 198 | copyTV(L, L->top, index2adr(L, idx)); |
@@ -188,7 +206,7 @@ LUA_API int lua_type(lua_State *L, int idx) | |||
188 | cTValue *o = index2adr(L, idx); | 206 | cTValue *o = index2adr(L, idx); |
189 | if (tvisnumber(o)) { | 207 | if (tvisnumber(o)) { |
190 | return LUA_TNUMBER; | 208 | return LUA_TNUMBER; |
191 | #if LJ_64 | 209 | #if LJ_64 && !LJ_GC64 |
192 | } else if (tvislightud(o)) { | 210 | } else if (tvislightud(o)) { |
193 | return LUA_TLIGHTUSERDATA; | 211 | return LUA_TLIGHTUSERDATA; |
194 | #endif | 212 | #endif |
@@ -268,7 +286,7 @@ LUA_API int lua_equal(lua_State *L, int idx1, int idx2) | |||
268 | return 0; | 286 | return 0; |
269 | } else if (tvispri(o1)) { | 287 | } else if (tvispri(o1)) { |
270 | return o1 != niltv(L) && o2 != niltv(L); | 288 | return o1 != niltv(L) && o2 != niltv(L); |
271 | #if LJ_64 | 289 | #if LJ_64 && !LJ_GC64 |
272 | } else if (tvislightud(o1)) { | 290 | } else if (tvislightud(o1)) { |
273 | return o1->u64 == o2->u64; | 291 | return o1->u64 == o2->u64; |
274 | #endif | 292 | #endif |
@@ -283,8 +301,8 @@ LUA_API int lua_equal(lua_State *L, int idx1, int idx2) | |||
283 | } else { | 301 | } else { |
284 | L->top = base+2; | 302 | L->top = base+2; |
285 | lj_vm_call(L, base, 1+1); | 303 | lj_vm_call(L, base, 1+1); |
286 | L->top -= 2; | 304 | L->top -= 2+LJ_FR2; |
287 | return tvistruecond(L->top+1); | 305 | return tvistruecond(L->top+1+LJ_FR2); |
288 | } | 306 | } |
289 | } | 307 | } |
290 | } | 308 | } |
@@ -306,8 +324,8 @@ LUA_API int lua_lessthan(lua_State *L, int idx1, int idx2) | |||
306 | } else { | 324 | } else { |
307 | L->top = base+2; | 325 | L->top = base+2; |
308 | lj_vm_call(L, base, 1+1); | 326 | lj_vm_call(L, base, 1+1); |
309 | L->top -= 2; | 327 | L->top -= 2+LJ_FR2; |
310 | return tvistruecond(L->top+1); | 328 | return tvistruecond(L->top+1+LJ_FR2); |
311 | } | 329 | } |
312 | } | 330 | } |
313 | } | 331 | } |
@@ -324,6 +342,22 @@ LUA_API lua_Number lua_tonumber(lua_State *L, int idx) | |||
324 | return 0; | 342 | return 0; |
325 | } | 343 | } |
326 | 344 | ||
345 | LUA_API lua_Number lua_tonumberx(lua_State *L, int idx, int *ok) | ||
346 | { | ||
347 | cTValue *o = index2adr(L, idx); | ||
348 | TValue tmp; | ||
349 | if (LJ_LIKELY(tvisnumber(o))) { | ||
350 | if (ok) *ok = 1; | ||
351 | return numberVnum(o); | ||
352 | } else if (tvisstr(o) && lj_strscan_num(strV(o), &tmp)) { | ||
353 | if (ok) *ok = 1; | ||
354 | return numV(&tmp); | ||
355 | } else { | ||
356 | if (ok) *ok = 0; | ||
357 | return 0; | ||
358 | } | ||
359 | } | ||
360 | |||
327 | LUALIB_API lua_Number luaL_checknumber(lua_State *L, int idx) | 361 | LUALIB_API lua_Number luaL_checknumber(lua_State *L, int idx) |
328 | { | 362 | { |
329 | cTValue *o = index2adr(L, idx); | 363 | cTValue *o = index2adr(L, idx); |
@@ -361,9 +395,38 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx) | |||
361 | if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp))) | 395 | if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp))) |
362 | return 0; | 396 | return 0; |
363 | if (tvisint(&tmp)) | 397 | if (tvisint(&tmp)) |
364 | return (lua_Integer)intV(&tmp); | 398 | return intV(&tmp); |
399 | n = numV(&tmp); | ||
400 | } | ||
401 | #if LJ_64 | ||
402 | return (lua_Integer)n; | ||
403 | #else | ||
404 | return lj_num2int(n); | ||
405 | #endif | ||
406 | } | ||
407 | |||
408 | LUA_API lua_Integer lua_tointegerx(lua_State *L, int idx, int *ok) | ||
409 | { | ||
410 | cTValue *o = index2adr(L, idx); | ||
411 | TValue tmp; | ||
412 | lua_Number n; | ||
413 | if (LJ_LIKELY(tvisint(o))) { | ||
414 | if (ok) *ok = 1; | ||
415 | return intV(o); | ||
416 | } else if (LJ_LIKELY(tvisnum(o))) { | ||
417 | n = numV(o); | ||
418 | } else { | ||
419 | if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp))) { | ||
420 | if (ok) *ok = 0; | ||
421 | return 0; | ||
422 | } | ||
423 | if (tvisint(&tmp)) { | ||
424 | if (ok) *ok = 1; | ||
425 | return intV(&tmp); | ||
426 | } | ||
365 | n = numV(&tmp); | 427 | n = numV(&tmp); |
366 | } | 428 | } |
429 | if (ok) *ok = 1; | ||
367 | #if LJ_64 | 430 | #if LJ_64 |
368 | return (lua_Integer)n; | 431 | return (lua_Integer)n; |
369 | #else | 432 | #else |
@@ -434,7 +497,7 @@ LUA_API const char *lua_tolstring(lua_State *L, int idx, size_t *len) | |||
434 | } else if (tvisnumber(o)) { | 497 | } else if (tvisnumber(o)) { |
435 | lj_gc_check(L); | 498 | lj_gc_check(L); |
436 | o = index2adr(L, idx); /* GC may move the stack. */ | 499 | o = index2adr(L, idx); /* GC may move the stack. */ |
437 | s = lj_str_fromnumber(L, o); | 500 | s = lj_strfmt_number(L, o); |
438 | setstrV(L, o, s); | 501 | setstrV(L, o, s); |
439 | } else { | 502 | } else { |
440 | if (len != NULL) *len = 0; | 503 | if (len != NULL) *len = 0; |
@@ -453,7 +516,7 @@ LUALIB_API const char *luaL_checklstring(lua_State *L, int idx, size_t *len) | |||
453 | } else if (tvisnumber(o)) { | 516 | } else if (tvisnumber(o)) { |
454 | lj_gc_check(L); | 517 | lj_gc_check(L); |
455 | o = index2adr(L, idx); /* GC may move the stack. */ | 518 | o = index2adr(L, idx); /* GC may move the stack. */ |
456 | s = lj_str_fromnumber(L, o); | 519 | s = lj_strfmt_number(L, o); |
457 | setstrV(L, o, s); | 520 | setstrV(L, o, s); |
458 | } else { | 521 | } else { |
459 | lj_err_argt(L, idx, LUA_TSTRING); | 522 | lj_err_argt(L, idx, LUA_TSTRING); |
@@ -475,7 +538,7 @@ LUALIB_API const char *luaL_optlstring(lua_State *L, int idx, | |||
475 | } else if (tvisnumber(o)) { | 538 | } else if (tvisnumber(o)) { |
476 | lj_gc_check(L); | 539 | lj_gc_check(L); |
477 | o = index2adr(L, idx); /* GC may move the stack. */ | 540 | o = index2adr(L, idx); /* GC may move the stack. */ |
478 | s = lj_str_fromnumber(L, o); | 541 | s = lj_strfmt_number(L, o); |
479 | setstrV(L, o, s); | 542 | setstrV(L, o, s); |
480 | } else { | 543 | } else { |
481 | lj_err_argt(L, idx, LUA_TSTRING); | 544 | lj_err_argt(L, idx, LUA_TSTRING); |
@@ -507,7 +570,7 @@ LUA_API size_t lua_objlen(lua_State *L, int idx) | |||
507 | } else if (tvisudata(o)) { | 570 | } else if (tvisudata(o)) { |
508 | return udataV(o)->len; | 571 | return udataV(o)->len; |
509 | } else if (tvisnumber(o)) { | 572 | } else if (tvisnumber(o)) { |
510 | GCstr *s = lj_str_fromnumber(L, o); | 573 | GCstr *s = lj_strfmt_number(L, o); |
511 | setstrV(L, o, s); | 574 | setstrV(L, o, s); |
512 | return s->len; | 575 | return s->len; |
513 | } else { | 576 | } else { |
@@ -545,17 +608,7 @@ LUA_API lua_State *lua_tothread(lua_State *L, int idx) | |||
545 | 608 | ||
546 | LUA_API const void *lua_topointer(lua_State *L, int idx) | 609 | LUA_API const void *lua_topointer(lua_State *L, int idx) |
547 | { | 610 | { |
548 | cTValue *o = index2adr(L, idx); | 611 | return lj_obj_ptr(index2adr(L, idx)); |
549 | if (tvisudata(o)) | ||
550 | return uddata(udataV(o)); | ||
551 | else if (tvislightud(o)) | ||
552 | return lightudV(o); | ||
553 | else if (tviscdata(o)) | ||
554 | return cdataptr(cdataV(o)); | ||
555 | else if (tvisgcv(o)) | ||
556 | return gcV(o); | ||
557 | else | ||
558 | return NULL; | ||
559 | } | 612 | } |
560 | 613 | ||
561 | /* -- Stack setters (object creation) ------------------------------------- */ | 614 | /* -- Stack setters (object creation) ------------------------------------- */ |
@@ -606,7 +659,7 @@ LUA_API const char *lua_pushvfstring(lua_State *L, const char *fmt, | |||
606 | va_list argp) | 659 | va_list argp) |
607 | { | 660 | { |
608 | lj_gc_check(L); | 661 | lj_gc_check(L); |
609 | return lj_str_pushvf(L, fmt, argp); | 662 | return lj_strfmt_pushvf(L, fmt, argp); |
610 | } | 663 | } |
611 | 664 | ||
612 | LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...) | 665 | LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...) |
@@ -615,7 +668,7 @@ LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...) | |||
615 | va_list argp; | 668 | va_list argp; |
616 | lj_gc_check(L); | 669 | lj_gc_check(L); |
617 | va_start(argp, fmt); | 670 | va_start(argp, fmt); |
618 | ret = lj_str_pushvf(L, fmt, argp); | 671 | ret = lj_strfmt_pushvf(L, fmt, argp); |
619 | va_end(argp); | 672 | va_end(argp); |
620 | return ret; | 673 | return ret; |
621 | } | 674 | } |
@@ -649,10 +702,8 @@ LUA_API void lua_pushlightuserdata(lua_State *L, void *p) | |||
649 | 702 | ||
650 | LUA_API void lua_createtable(lua_State *L, int narray, int nrec) | 703 | LUA_API void lua_createtable(lua_State *L, int narray, int nrec) |
651 | { | 704 | { |
652 | GCtab *t; | ||
653 | lj_gc_check(L); | 705 | lj_gc_check(L); |
654 | t = lj_tab_new(L, (uint32_t)(narray > 0 ? narray+1 : 0), hsize2hbits(nrec)); | 706 | settabV(L, L->top, lj_tab_new_ah(L, narray, nrec)); |
655 | settabV(L, L->top, t); | ||
656 | incr_top(L); | 707 | incr_top(L); |
657 | } | 708 | } |
658 | 709 | ||
@@ -715,8 +766,8 @@ LUA_API void lua_concat(lua_State *L, int n) | |||
715 | n -= (int)(L->top - top); | 766 | n -= (int)(L->top - top); |
716 | L->top = top+2; | 767 | L->top = top+2; |
717 | lj_vm_call(L, top, 1+1); | 768 | lj_vm_call(L, top, 1+1); |
718 | L->top--; | 769 | L->top -= 1+LJ_FR2; |
719 | copyTV(L, L->top-1, L->top); | 770 | copyTV(L, L->top-1, L->top+LJ_FR2); |
720 | } while (--n > 0); | 771 | } while (--n > 0); |
721 | } else if (n == 0) { /* Push empty string. */ | 772 | } else if (n == 0) { /* Push empty string. */ |
722 | setstrV(L, L->top, &G(L)->strempty); | 773 | setstrV(L, L->top, &G(L)->strempty); |
@@ -735,8 +786,8 @@ LUA_API void lua_gettable(lua_State *L, int idx) | |||
735 | if (v == NULL) { | 786 | if (v == NULL) { |
736 | L->top += 2; | 787 | L->top += 2; |
737 | lj_vm_call(L, L->top-2, 1+1); | 788 | lj_vm_call(L, L->top-2, 1+1); |
738 | L->top -= 2; | 789 | L->top -= 2+LJ_FR2; |
739 | v = L->top+1; | 790 | v = L->top+1+LJ_FR2; |
740 | } | 791 | } |
741 | copyTV(L, L->top-1, v); | 792 | copyTV(L, L->top-1, v); |
742 | } | 793 | } |
@@ -751,8 +802,8 @@ LUA_API void lua_getfield(lua_State *L, int idx, const char *k) | |||
751 | if (v == NULL) { | 802 | if (v == NULL) { |
752 | L->top += 2; | 803 | L->top += 2; |
753 | lj_vm_call(L, L->top-2, 1+1); | 804 | lj_vm_call(L, L->top-2, 1+1); |
754 | L->top -= 2; | 805 | L->top -= 2+LJ_FR2; |
755 | v = L->top+1; | 806 | v = L->top+1+LJ_FR2; |
756 | } | 807 | } |
757 | copyTV(L, L->top, v); | 808 | copyTV(L, L->top, v); |
758 | incr_top(L); | 809 | incr_top(L); |
@@ -869,7 +920,7 @@ LUA_API void lua_upvaluejoin(lua_State *L, int idx1, int n1, int idx2, int n2) | |||
869 | lj_gc_objbarrier(L, fn1, gcref(fn1->l.uvptr[n1])); | 920 | lj_gc_objbarrier(L, fn1, gcref(fn1->l.uvptr[n1])); |
870 | } | 921 | } |
871 | 922 | ||
872 | LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname) | 923 | LUALIB_API void *luaL_testudata(lua_State *L, int idx, const char *tname) |
873 | { | 924 | { |
874 | cTValue *o = index2adr(L, idx); | 925 | cTValue *o = index2adr(L, idx); |
875 | if (tvisudata(o)) { | 926 | if (tvisudata(o)) { |
@@ -878,8 +929,14 @@ LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname) | |||
878 | if (tv && tvistab(tv) && tabV(tv) == tabref(ud->metatable)) | 929 | if (tv && tvistab(tv) && tabV(tv) == tabref(ud->metatable)) |
879 | return uddata(ud); | 930 | return uddata(ud); |
880 | } | 931 | } |
881 | lj_err_argtype(L, idx, tname); | 932 | return NULL; /* value is not a userdata with a metatable */ |
882 | return NULL; /* unreachable */ | 933 | } |
934 | |||
935 | LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname) | ||
936 | { | ||
937 | void *p = luaL_testudata(L, idx, tname); | ||
938 | if (!p) lj_err_argtype(L, idx, tname); | ||
939 | return p; | ||
883 | } | 940 | } |
884 | 941 | ||
885 | /* -- Object setters ------------------------------------------------------ */ | 942 | /* -- Object setters ------------------------------------------------------ */ |
@@ -893,13 +950,14 @@ LUA_API void lua_settable(lua_State *L, int idx) | |||
893 | o = lj_meta_tset(L, t, L->top-2); | 950 | o = lj_meta_tset(L, t, L->top-2); |
894 | if (o) { | 951 | if (o) { |
895 | /* NOBARRIER: lj_meta_tset ensures the table is not black. */ | 952 | /* NOBARRIER: lj_meta_tset ensures the table is not black. */ |
896 | copyTV(L, o, L->top-1); | ||
897 | L->top -= 2; | 953 | L->top -= 2; |
954 | copyTV(L, o, L->top+1); | ||
898 | } else { | 955 | } else { |
899 | L->top += 3; | 956 | TValue *base = L->top; |
900 | copyTV(L, L->top-1, L->top-6); | 957 | copyTV(L, base+2, base-3-2*LJ_FR2); |
901 | lj_vm_call(L, L->top-3, 0+1); | 958 | L->top = base+3; |
902 | L->top -= 3; | 959 | lj_vm_call(L, base, 0+1); |
960 | L->top -= 3+LJ_FR2; | ||
903 | } | 961 | } |
904 | } | 962 | } |
905 | 963 | ||
@@ -913,14 +971,14 @@ LUA_API void lua_setfield(lua_State *L, int idx, const char *k) | |||
913 | setstrV(L, &key, lj_str_newz(L, k)); | 971 | setstrV(L, &key, lj_str_newz(L, k)); |
914 | o = lj_meta_tset(L, t, &key); | 972 | o = lj_meta_tset(L, t, &key); |
915 | if (o) { | 973 | if (o) { |
916 | L->top--; | ||
917 | /* NOBARRIER: lj_meta_tset ensures the table is not black. */ | 974 | /* NOBARRIER: lj_meta_tset ensures the table is not black. */ |
918 | copyTV(L, o, L->top); | 975 | copyTV(L, o, --L->top); |
919 | } else { | 976 | } else { |
920 | L->top += 3; | 977 | TValue *base = L->top; |
921 | copyTV(L, L->top-1, L->top-6); | 978 | copyTV(L, base+2, base-3-2*LJ_FR2); |
922 | lj_vm_call(L, L->top-3, 0+1); | 979 | L->top = base+3; |
923 | L->top -= 2; | 980 | lj_vm_call(L, base, 0+1); |
981 | L->top -= 2+LJ_FR2; | ||
924 | } | 982 | } |
925 | } | 983 | } |
926 | 984 | ||
@@ -987,6 +1045,12 @@ LUA_API int lua_setmetatable(lua_State *L, int idx) | |||
987 | return 1; | 1045 | return 1; |
988 | } | 1046 | } |
989 | 1047 | ||
1048 | LUALIB_API void luaL_setmetatable(lua_State *L, const char *tname) | ||
1049 | { | ||
1050 | lua_getfield(L, LUA_REGISTRYINDEX, tname); | ||
1051 | lua_setmetatable(L, -2); | ||
1052 | } | ||
1053 | |||
990 | LUA_API int lua_setfenv(lua_State *L, int idx) | 1054 | LUA_API int lua_setfenv(lua_State *L, int idx) |
991 | { | 1055 | { |
992 | cTValue *o = index2adr(L, idx); | 1056 | cTValue *o = index2adr(L, idx); |
@@ -1027,11 +1091,24 @@ LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n) | |||
1027 | 1091 | ||
1028 | /* -- Calls --------------------------------------------------------------- */ | 1092 | /* -- Calls --------------------------------------------------------------- */ |
1029 | 1093 | ||
1094 | #if LJ_FR2 | ||
1095 | static TValue *api_call_base(lua_State *L, int nargs) | ||
1096 | { | ||
1097 | TValue *o = L->top, *base = o - nargs; | ||
1098 | L->top = o+1; | ||
1099 | for (; o > base; o--) copyTV(L, o, o-1); | ||
1100 | setnilV(o); | ||
1101 | return o+1; | ||
1102 | } | ||
1103 | #else | ||
1104 | #define api_call_base(L, nargs) (L->top - (nargs)) | ||
1105 | #endif | ||
1106 | |||
1030 | LUA_API void lua_call(lua_State *L, int nargs, int nresults) | 1107 | LUA_API void lua_call(lua_State *L, int nargs, int nresults) |
1031 | { | 1108 | { |
1032 | api_check(L, L->status == 0 || L->status == LUA_ERRERR); | 1109 | api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR); |
1033 | api_checknelems(L, nargs+1); | 1110 | api_checknelems(L, nargs+1); |
1034 | lj_vm_call(L, L->top - nargs, nresults+1); | 1111 | lj_vm_call(L, api_call_base(L, nargs), nresults+1); |
1035 | } | 1112 | } |
1036 | 1113 | ||
1037 | LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc) | 1114 | LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc) |
@@ -1040,7 +1117,7 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc) | |||
1040 | uint8_t oldh = hook_save(g); | 1117 | uint8_t oldh = hook_save(g); |
1041 | ptrdiff_t ef; | 1118 | ptrdiff_t ef; |
1042 | int status; | 1119 | int status; |
1043 | api_check(L, L->status == 0 || L->status == LUA_ERRERR); | 1120 | api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR); |
1044 | api_checknelems(L, nargs+1); | 1121 | api_checknelems(L, nargs+1); |
1045 | if (errfunc == 0) { | 1122 | if (errfunc == 0) { |
1046 | ef = 0; | 1123 | ef = 0; |
@@ -1049,7 +1126,7 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc) | |||
1049 | api_checkvalidindex(L, o); | 1126 | api_checkvalidindex(L, o); |
1050 | ef = savestack(L, o); | 1127 | ef = savestack(L, o); |
1051 | } | 1128 | } |
1052 | status = lj_vm_pcall(L, L->top - nargs, nresults+1, ef); | 1129 | status = lj_vm_pcall(L, api_call_base(L, nargs), nresults+1, ef); |
1053 | if (status) hook_restore(g, oldh); | 1130 | if (status) hook_restore(g, oldh); |
1054 | return status; | 1131 | return status; |
1055 | } | 1132 | } |
@@ -1057,12 +1134,14 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc) | |||
1057 | static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud) | 1134 | static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud) |
1058 | { | 1135 | { |
1059 | GCfunc *fn = lj_func_newC(L, 0, getcurrenv(L)); | 1136 | GCfunc *fn = lj_func_newC(L, 0, getcurrenv(L)); |
1137 | TValue *top = L->top; | ||
1060 | fn->c.f = func; | 1138 | fn->c.f = func; |
1061 | setfuncV(L, L->top, fn); | 1139 | setfuncV(L, top++, fn); |
1062 | setlightudV(L->top+1, checklightudptr(L, ud)); | 1140 | if (LJ_FR2) setnilV(top++); |
1141 | setlightudV(top++, checklightudptr(L, ud)); | ||
1063 | cframe_nres(L->cframe) = 1+0; /* Zero results. */ | 1142 | cframe_nres(L->cframe) = 1+0; /* Zero results. */ |
1064 | L->top += 2; | 1143 | L->top = top; |
1065 | return L->top-1; /* Now call the newly allocated C function. */ | 1144 | return top-1; /* Now call the newly allocated C function. */ |
1066 | } | 1145 | } |
1067 | 1146 | ||
1068 | LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud) | 1147 | LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud) |
@@ -1070,7 +1149,7 @@ LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud) | |||
1070 | global_State *g = G(L); | 1149 | global_State *g = G(L); |
1071 | uint8_t oldh = hook_save(g); | 1150 | uint8_t oldh = hook_save(g); |
1072 | int status; | 1151 | int status; |
1073 | api_check(L, L->status == 0 || L->status == LUA_ERRERR); | 1152 | api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR); |
1074 | status = lj_vm_cpcall(L, func, ud, cpcall); | 1153 | status = lj_vm_cpcall(L, func, ud, cpcall); |
1075 | if (status) hook_restore(g, oldh); | 1154 | if (status) hook_restore(g, oldh); |
1076 | return status; | 1155 | return status; |
@@ -1079,10 +1158,11 @@ LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud) | |||
1079 | LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field) | 1158 | LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field) |
1080 | { | 1159 | { |
1081 | if (luaL_getmetafield(L, idx, field)) { | 1160 | if (luaL_getmetafield(L, idx, field)) { |
1082 | TValue *base = L->top--; | 1161 | TValue *top = L->top--; |
1083 | copyTV(L, base, index2adr(L, idx)); | 1162 | if (LJ_FR2) setnilV(top++); |
1084 | L->top = base+1; | 1163 | copyTV(L, top++, index2adr(L, idx)); |
1085 | lj_vm_call(L, base, 1+1); | 1164 | L->top = top; |
1165 | lj_vm_call(L, top-1, 1+1); | ||
1086 | return 1; | 1166 | return 1; |
1087 | } | 1167 | } |
1088 | return 0; | 1168 | return 0; |
@@ -1090,6 +1170,11 @@ LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field) | |||
1090 | 1170 | ||
1091 | /* -- Coroutine yield and resume ------------------------------------------ */ | 1171 | /* -- Coroutine yield and resume ------------------------------------------ */ |
1092 | 1172 | ||
1173 | LUA_API int lua_isyieldable(lua_State *L) | ||
1174 | { | ||
1175 | return cframe_canyield(L->cframe); | ||
1176 | } | ||
1177 | |||
1093 | LUA_API int lua_yield(lua_State *L, int nresults) | 1178 | LUA_API int lua_yield(lua_State *L, int nresults) |
1094 | { | 1179 | { |
1095 | void *cf = L->cframe; | 1180 | void *cf = L->cframe; |
@@ -1109,12 +1194,14 @@ LUA_API int lua_yield(lua_State *L, int nresults) | |||
1109 | } else { /* Yield from hook: add a pseudo-frame. */ | 1194 | } else { /* Yield from hook: add a pseudo-frame. */ |
1110 | TValue *top = L->top; | 1195 | TValue *top = L->top; |
1111 | hook_leave(g); | 1196 | hook_leave(g); |
1112 | top->u64 = cframe_multres(cf); | 1197 | (top++)->u64 = cframe_multres(cf); |
1113 | setcont(top+1, lj_cont_hook); | 1198 | setcont(top, lj_cont_hook); |
1114 | setframe_pc(top+1, cframe_pc(cf)-1); | 1199 | if (LJ_FR2) top++; |
1115 | setframe_gc(top+2, obj2gco(L)); | 1200 | setframe_pc(top, cframe_pc(cf)-1); |
1116 | setframe_ftsz(top+2, (int)((char *)(top+3)-(char *)L->base)+FRAME_CONT); | 1201 | if (LJ_FR2) top++; |
1117 | L->top = L->base = top+3; | 1202 | setframe_gc(top, obj2gco(L), LJ_TTHREAD); |
1203 | setframe_ftsz(top, ((char *)(top+1)-(char *)L->base)+FRAME_CONT); | ||
1204 | L->top = L->base = top+1; | ||
1118 | #if LJ_TARGET_X64 | 1205 | #if LJ_TARGET_X64 |
1119 | lj_err_throw(L, LUA_YIELD); | 1206 | lj_err_throw(L, LUA_YIELD); |
1120 | #else | 1207 | #else |
@@ -1131,7 +1218,9 @@ LUA_API int lua_yield(lua_State *L, int nresults) | |||
1131 | LUA_API int lua_resume(lua_State *L, int nargs) | 1218 | LUA_API int lua_resume(lua_State *L, int nargs) |
1132 | { | 1219 | { |
1133 | if (L->cframe == NULL && L->status <= LUA_YIELD) | 1220 | if (L->cframe == NULL && L->status <= LUA_YIELD) |
1134 | return lj_vm_resume(L, L->top - nargs, 0, 0); | 1221 | return lj_vm_resume(L, |
1222 | L->status == LUA_OK ? api_call_base(L, nargs) : L->top - nargs, | ||
1223 | 0, 0); | ||
1135 | L->top = L->base; | 1224 | L->top = L->base; |
1136 | setstrV(L, L->top, lj_err_str(L, LJ_ERR_COSUSP)); | 1225 | setstrV(L, L->top, lj_err_str(L, LJ_ERR_COSUSP)); |
1137 | incr_top(L); | 1226 | incr_top(L); |
@@ -1161,7 +1250,7 @@ LUA_API int lua_gc(lua_State *L, int what, int data) | |||
1161 | res = (int)(g->gc.total & 0x3ff); | 1250 | res = (int)(g->gc.total & 0x3ff); |
1162 | break; | 1251 | break; |
1163 | case LUA_GCSTEP: { | 1252 | case LUA_GCSTEP: { |
1164 | MSize a = (MSize)data << 10; | 1253 | GCSize a = (GCSize)data << 10; |
1165 | g->gc.threshold = (a <= g->gc.total) ? (g->gc.total - a) : 0; | 1254 | g->gc.threshold = (a <= g->gc.total) ? (g->gc.total - a) : 0; |
1166 | while (g->gc.total >= g->gc.threshold) | 1255 | while (g->gc.total >= g->gc.threshold) |
1167 | if (lj_gc_step(L) > 0) { | 1256 | if (lj_gc_step(L) > 0) { |
@@ -1178,6 +1267,9 @@ LUA_API int lua_gc(lua_State *L, int what, int data) | |||
1178 | res = (int)(g->gc.stepmul); | 1267 | res = (int)(g->gc.stepmul); |
1179 | g->gc.stepmul = (MSize)data; | 1268 | g->gc.stepmul = (MSize)data; |
1180 | break; | 1269 | break; |
1270 | case LUA_GCISRUNNING: | ||
1271 | res = (g->gc.threshold != LJ_MAX_MEM); | ||
1272 | break; | ||
1181 | default: | 1273 | default: |
1182 | res = -1; /* Invalid option. */ | 1274 | res = -1; /* Invalid option. */ |
1183 | } | 1275 | } |
diff --git a/src/lj_arch.h b/src/lj_arch.h index 320ccf97..027b39ce 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h | |||
@@ -19,12 +19,16 @@ | |||
19 | #define LUAJIT_ARCH_x64 2 | 19 | #define LUAJIT_ARCH_x64 2 |
20 | #define LUAJIT_ARCH_ARM 3 | 20 | #define LUAJIT_ARCH_ARM 3 |
21 | #define LUAJIT_ARCH_arm 3 | 21 | #define LUAJIT_ARCH_arm 3 |
22 | #define LUAJIT_ARCH_PPC 4 | 22 | #define LUAJIT_ARCH_ARM64 4 |
23 | #define LUAJIT_ARCH_ppc 4 | 23 | #define LUAJIT_ARCH_arm64 4 |
24 | #define LUAJIT_ARCH_PPCSPE 5 | 24 | #define LUAJIT_ARCH_PPC 5 |
25 | #define LUAJIT_ARCH_ppcspe 5 | 25 | #define LUAJIT_ARCH_ppc 5 |
26 | #define LUAJIT_ARCH_MIPS 6 | 26 | #define LUAJIT_ARCH_MIPS 6 |
27 | #define LUAJIT_ARCH_mips 6 | 27 | #define LUAJIT_ARCH_mips 6 |
28 | #define LUAJIT_ARCH_MIPS32 6 | ||
29 | #define LUAJIT_ARCH_mips32 6 | ||
30 | #define LUAJIT_ARCH_MIPS64 7 | ||
31 | #define LUAJIT_ARCH_mips64 7 | ||
28 | 32 | ||
29 | /* Target OS. */ | 33 | /* Target OS. */ |
30 | #define LUAJIT_OS_OTHER 0 | 34 | #define LUAJIT_OS_OTHER 0 |
@@ -43,14 +47,14 @@ | |||
43 | #define LUAJIT_TARGET LUAJIT_ARCH_X64 | 47 | #define LUAJIT_TARGET LUAJIT_ARCH_X64 |
44 | #elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM) | 48 | #elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM) |
45 | #define LUAJIT_TARGET LUAJIT_ARCH_ARM | 49 | #define LUAJIT_TARGET LUAJIT_ARCH_ARM |
50 | #elif defined(__aarch64__) | ||
51 | #define LUAJIT_TARGET LUAJIT_ARCH_ARM64 | ||
46 | #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC) | 52 | #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC) |
47 | #ifdef __NO_FPRS__ | ||
48 | #define LUAJIT_TARGET LUAJIT_ARCH_PPCSPE | ||
49 | #else | ||
50 | #define LUAJIT_TARGET LUAJIT_ARCH_PPC | 53 | #define LUAJIT_TARGET LUAJIT_ARCH_PPC |
51 | #endif | 54 | #elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64) |
55 | #define LUAJIT_TARGET LUAJIT_ARCH_MIPS64 | ||
52 | #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS) | 56 | #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS) |
53 | #define LUAJIT_TARGET LUAJIT_ARCH_MIPS | 57 | #define LUAJIT_TARGET LUAJIT_ARCH_MIPS32 |
54 | #else | 58 | #else |
55 | #error "No support for this architecture (yet)" | 59 | #error "No support for this architecture (yet)" |
56 | #endif | 60 | #endif |
@@ -70,7 +74,7 @@ | |||
70 | defined(__NetBSD__) || defined(__OpenBSD__) || \ | 74 | defined(__NetBSD__) || defined(__OpenBSD__) || \ |
71 | defined(__DragonFly__)) && !defined(__ORBIS__) | 75 | defined(__DragonFly__)) && !defined(__ORBIS__) |
72 | #define LUAJIT_OS LUAJIT_OS_BSD | 76 | #define LUAJIT_OS LUAJIT_OS_BSD |
73 | #elif (defined(__sun__) && defined(__svr4__)) | 77 | #elif (defined(__sun__) && defined(__svr4__)) || defined(__HAIKU__) |
74 | #define LUAJIT_OS LUAJIT_OS_POSIX | 78 | #define LUAJIT_OS LUAJIT_OS_POSIX |
75 | #elif defined(__CYGWIN__) | 79 | #elif defined(__CYGWIN__) |
76 | #define LJ_TARGET_CYGWIN 1 | 80 | #define LJ_TARGET_CYGWIN 1 |
@@ -99,7 +103,7 @@ | |||
99 | #define LJ_TARGET_WINDOWS (LUAJIT_OS == LUAJIT_OS_WINDOWS) | 103 | #define LJ_TARGET_WINDOWS (LUAJIT_OS == LUAJIT_OS_WINDOWS) |
100 | #define LJ_TARGET_LINUX (LUAJIT_OS == LUAJIT_OS_LINUX) | 104 | #define LJ_TARGET_LINUX (LUAJIT_OS == LUAJIT_OS_LINUX) |
101 | #define LJ_TARGET_OSX (LUAJIT_OS == LUAJIT_OS_OSX) | 105 | #define LJ_TARGET_OSX (LUAJIT_OS == LUAJIT_OS_OSX) |
102 | #define LJ_TARGET_IOS (LJ_TARGET_OSX && LUAJIT_TARGET == LUAJIT_ARCH_ARM) | 106 | #define LJ_TARGET_IOS (LJ_TARGET_OSX && (LUAJIT_TARGET == LUAJIT_ARCH_ARM || LUAJIT_TARGET == LUAJIT_ARCH_ARM64)) |
103 | #define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS) | 107 | #define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS) |
104 | #define LJ_TARGET_DLOPEN LJ_TARGET_POSIX | 108 | #define LJ_TARGET_DLOPEN LJ_TARGET_POSIX |
105 | 109 | ||
@@ -125,6 +129,19 @@ | |||
125 | #define LJ_TARGET_CONSOLE 1 | 129 | #define LJ_TARGET_CONSOLE 1 |
126 | #endif | 130 | #endif |
127 | 131 | ||
132 | #ifdef _DURANGO | ||
133 | #define LJ_TARGET_XBOXONE 1 | ||
134 | #define LJ_TARGET_CONSOLE 1 | ||
135 | #define LJ_TARGET_GC64 1 | ||
136 | #endif | ||
137 | |||
138 | #ifdef _UWP | ||
139 | #define LJ_TARGET_UWP 1 | ||
140 | #if LUAJIT_TARGET == LUAJIT_ARCH_X64 | ||
141 | #define LJ_TARGET_GC64 1 | ||
142 | #endif | ||
143 | #endif | ||
144 | |||
128 | #define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */ | 145 | #define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */ |
129 | #define LJ_NUMMODE_SINGLE_DUAL 1 /* Default to single-number mode. */ | 146 | #define LJ_NUMMODE_SINGLE_DUAL 1 /* Default to single-number mode. */ |
130 | #define LJ_NUMMODE_DUAL 2 /* Dual-number mode only. */ | 147 | #define LJ_NUMMODE_DUAL 2 /* Dual-number mode only. */ |
@@ -167,6 +184,9 @@ | |||
167 | #define LJ_TARGET_MASKROT 1 | 184 | #define LJ_TARGET_MASKROT 1 |
168 | #define LJ_TARGET_UNALIGNED 1 | 185 | #define LJ_TARGET_UNALIGNED 1 |
169 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE_DUAL | 186 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE_DUAL |
187 | #ifndef LUAJIT_DISABLE_GC64 | ||
188 | #define LJ_TARGET_GC64 1 | ||
189 | #endif | ||
170 | 190 | ||
171 | #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM | 191 | #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM |
172 | 192 | ||
@@ -188,7 +208,7 @@ | |||
188 | #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ | 208 | #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ |
189 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL | 209 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL |
190 | 210 | ||
191 | #if __ARM_ARCH____ARM_ARCH_8__ || __ARM_ARCH_8A__ | 211 | #if __ARM_ARCH_8__ || __ARM_ARCH_8A__ |
192 | #define LJ_ARCH_VERSION 80 | 212 | #define LJ_ARCH_VERSION 80 |
193 | #elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ | 213 | #elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ |
194 | #define LJ_ARCH_VERSION 70 | 214 | #define LJ_ARCH_VERSION 70 |
@@ -200,22 +220,84 @@ | |||
200 | #define LJ_ARCH_VERSION 50 | 220 | #define LJ_ARCH_VERSION 50 |
201 | #endif | 221 | #endif |
202 | 222 | ||
223 | #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64 | ||
224 | |||
225 | #define LJ_ARCH_BITS 64 | ||
226 | #if defined(__AARCH64EB__) | ||
227 | #define LJ_ARCH_NAME "arm64be" | ||
228 | #define LJ_ARCH_ENDIAN LUAJIT_BE | ||
229 | #else | ||
230 | #define LJ_ARCH_NAME "arm64" | ||
231 | #define LJ_ARCH_ENDIAN LUAJIT_LE | ||
232 | #endif | ||
233 | #define LJ_TARGET_ARM64 1 | ||
234 | #define LJ_TARGET_EHRETREG 0 | ||
235 | #define LJ_TARGET_JUMPRANGE 27 /* +-2^27 = +-128MB */ | ||
236 | #define LJ_TARGET_MASKSHIFT 1 | ||
237 | #define LJ_TARGET_MASKROT 1 | ||
238 | #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ | ||
239 | #define LJ_TARGET_GC64 1 | ||
240 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL | ||
241 | |||
242 | #define LJ_ARCH_VERSION 80 | ||
243 | |||
203 | #elif LUAJIT_TARGET == LUAJIT_ARCH_PPC | 244 | #elif LUAJIT_TARGET == LUAJIT_ARCH_PPC |
204 | 245 | ||
205 | #define LJ_ARCH_NAME "ppc" | 246 | #ifndef LJ_ARCH_ENDIAN |
247 | #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ | ||
248 | #define LJ_ARCH_ENDIAN LUAJIT_LE | ||
249 | #else | ||
250 | #define LJ_ARCH_ENDIAN LUAJIT_BE | ||
251 | #endif | ||
252 | #endif | ||
253 | |||
206 | #if _LP64 | 254 | #if _LP64 |
207 | #define LJ_ARCH_BITS 64 | 255 | #define LJ_ARCH_BITS 64 |
256 | #if LJ_ARCH_ENDIAN == LUAJIT_LE | ||
257 | #define LJ_ARCH_NAME "ppc64le" | ||
258 | #else | ||
259 | #define LJ_ARCH_NAME "ppc64" | ||
260 | #endif | ||
208 | #else | 261 | #else |
209 | #define LJ_ARCH_BITS 32 | 262 | #define LJ_ARCH_BITS 32 |
263 | #define LJ_ARCH_NAME "ppc" | ||
264 | |||
265 | #if !defined(LJ_ARCH_HASFPU) | ||
266 | #if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE) | ||
267 | #define LJ_ARCH_HASFPU 0 | ||
268 | #else | ||
269 | #define LJ_ARCH_HASFPU 1 | ||
210 | #endif | 270 | #endif |
211 | #define LJ_ARCH_ENDIAN LUAJIT_BE | 271 | #endif |
272 | |||
273 | #if !defined(LJ_ABI_SOFTFP) | ||
274 | #if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE) | ||
275 | #define LJ_ABI_SOFTFP 1 | ||
276 | #else | ||
277 | #define LJ_ABI_SOFTFP 0 | ||
278 | #endif | ||
279 | #endif | ||
280 | #endif | ||
281 | |||
282 | #if LJ_ABI_SOFTFP | ||
283 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL | ||
284 | #else | ||
285 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE | ||
286 | #endif | ||
287 | |||
212 | #define LJ_TARGET_PPC 1 | 288 | #define LJ_TARGET_PPC 1 |
213 | #define LJ_TARGET_EHRETREG 3 | 289 | #define LJ_TARGET_EHRETREG 3 |
214 | #define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */ | 290 | #define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */ |
215 | #define LJ_TARGET_MASKSHIFT 0 | 291 | #define LJ_TARGET_MASKSHIFT 0 |
216 | #define LJ_TARGET_MASKROT 1 | 292 | #define LJ_TARGET_MASKROT 1 |
217 | #define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */ | 293 | #define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */ |
218 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE | 294 | |
295 | #if LJ_TARGET_CONSOLE | ||
296 | #define LJ_ARCH_PPC32ON64 1 | ||
297 | #define LJ_ARCH_NOFFI 1 | ||
298 | #elif LJ_ARCH_BITS == 64 | ||
299 | #error "No support for PPC64" | ||
300 | #endif | ||
219 | 301 | ||
220 | #if _ARCH_PWR7 | 302 | #if _ARCH_PWR7 |
221 | #define LJ_ARCH_VERSION 70 | 303 | #define LJ_ARCH_VERSION 70 |
@@ -230,10 +312,6 @@ | |||
230 | #else | 312 | #else |
231 | #define LJ_ARCH_VERSION 0 | 313 | #define LJ_ARCH_VERSION 0 |
232 | #endif | 314 | #endif |
233 | #if __PPC64__ || __powerpc64__ || LJ_TARGET_CONSOLE | ||
234 | #define LJ_ARCH_PPC64 1 | ||
235 | #define LJ_ARCH_NOFFI 1 | ||
236 | #endif | ||
237 | #if _ARCH_PPCSQ | 315 | #if _ARCH_PPCSQ |
238 | #define LJ_ARCH_SQRT 1 | 316 | #define LJ_ARCH_SQRT 1 |
239 | #endif | 317 | #endif |
@@ -247,44 +325,79 @@ | |||
247 | #define LJ_ARCH_XENON 1 | 325 | #define LJ_ARCH_XENON 1 |
248 | #endif | 326 | #endif |
249 | 327 | ||
250 | #elif LUAJIT_TARGET == LUAJIT_ARCH_PPCSPE | 328 | #elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 || LUAJIT_TARGET == LUAJIT_ARCH_MIPS64 |
251 | |||
252 | #define LJ_ARCH_NAME "ppcspe" | ||
253 | #define LJ_ARCH_BITS 32 | ||
254 | #define LJ_ARCH_ENDIAN LUAJIT_BE | ||
255 | #ifndef LJ_ABI_SOFTFP | ||
256 | #define LJ_ABI_SOFTFP 1 | ||
257 | #endif | ||
258 | #define LJ_ABI_EABI 1 | ||
259 | #define LJ_TARGET_PPCSPE 1 | ||
260 | #define LJ_TARGET_EHRETREG 3 | ||
261 | #define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */ | ||
262 | #define LJ_TARGET_MASKSHIFT 0 | ||
263 | #define LJ_TARGET_MASKROT 1 | ||
264 | #define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */ | ||
265 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE | ||
266 | #define LJ_ARCH_NOFFI 1 /* NYI: comparisons, calls. */ | ||
267 | #define LJ_ARCH_NOJIT 1 | ||
268 | |||
269 | #elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS | ||
270 | 329 | ||
271 | #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) | 330 | #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) |
331 | #if __mips_isa_rev >= 6 | ||
332 | #define LJ_TARGET_MIPSR6 1 | ||
333 | #define LJ_TARGET_UNALIGNED 1 | ||
334 | #endif | ||
335 | #if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 | ||
336 | #if LJ_TARGET_MIPSR6 | ||
337 | #define LJ_ARCH_NAME "mips32r6el" | ||
338 | #else | ||
272 | #define LJ_ARCH_NAME "mipsel" | 339 | #define LJ_ARCH_NAME "mipsel" |
340 | #endif | ||
341 | #else | ||
342 | #if LJ_TARGET_MIPSR6 | ||
343 | #define LJ_ARCH_NAME "mips64r6el" | ||
344 | #else | ||
345 | #define LJ_ARCH_NAME "mips64el" | ||
346 | #endif | ||
347 | #endif | ||
273 | #define LJ_ARCH_ENDIAN LUAJIT_LE | 348 | #define LJ_ARCH_ENDIAN LUAJIT_LE |
274 | #else | 349 | #else |
350 | #if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 | ||
351 | #if LJ_TARGET_MIPSR6 | ||
352 | #define LJ_ARCH_NAME "mips32r6" | ||
353 | #else | ||
275 | #define LJ_ARCH_NAME "mips" | 354 | #define LJ_ARCH_NAME "mips" |
355 | #endif | ||
356 | #else | ||
357 | #if LJ_TARGET_MIPSR6 | ||
358 | #define LJ_ARCH_NAME "mips64r6" | ||
359 | #else | ||
360 | #define LJ_ARCH_NAME "mips64" | ||
361 | #endif | ||
362 | #endif | ||
276 | #define LJ_ARCH_ENDIAN LUAJIT_BE | 363 | #define LJ_ARCH_ENDIAN LUAJIT_BE |
277 | #endif | 364 | #endif |
365 | |||
366 | #if !defined(LJ_ARCH_HASFPU) | ||
367 | #ifdef __mips_soft_float | ||
368 | #define LJ_ARCH_HASFPU 0 | ||
369 | #else | ||
370 | #define LJ_ARCH_HASFPU 1 | ||
371 | #endif | ||
372 | #endif | ||
373 | |||
374 | #if !defined(LJ_ABI_SOFTFP) | ||
375 | #ifdef __mips_soft_float | ||
376 | #define LJ_ABI_SOFTFP 1 | ||
377 | #else | ||
378 | #define LJ_ABI_SOFTFP 0 | ||
379 | #endif | ||
380 | #endif | ||
381 | |||
382 | #if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 | ||
278 | #define LJ_ARCH_BITS 32 | 383 | #define LJ_ARCH_BITS 32 |
384 | #define LJ_TARGET_MIPS32 1 | ||
385 | #else | ||
386 | #define LJ_ARCH_BITS 64 | ||
387 | #define LJ_TARGET_MIPS64 1 | ||
388 | #define LJ_TARGET_GC64 1 | ||
389 | #endif | ||
279 | #define LJ_TARGET_MIPS 1 | 390 | #define LJ_TARGET_MIPS 1 |
280 | #define LJ_TARGET_EHRETREG 4 | 391 | #define LJ_TARGET_EHRETREG 4 |
281 | #define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */ | 392 | #define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */ |
282 | #define LJ_TARGET_MASKSHIFT 1 | 393 | #define LJ_TARGET_MASKSHIFT 1 |
283 | #define LJ_TARGET_MASKROT 1 | 394 | #define LJ_TARGET_MASKROT 1 |
284 | #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ | 395 | #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ |
285 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE | 396 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL |
286 | 397 | ||
287 | #if _MIPS_ARCH_MIPS32R2 | 398 | #if LJ_TARGET_MIPSR6 |
399 | #define LJ_ARCH_VERSION 60 | ||
400 | #elif _MIPS_ARCH_MIPS32R2 || _MIPS_ARCH_MIPS64R2 | ||
288 | #define LJ_ARCH_VERSION 20 | 401 | #define LJ_ARCH_VERSION 20 |
289 | #else | 402 | #else |
290 | #define LJ_ARCH_VERSION 10 | 403 | #define LJ_ARCH_VERSION 10 |
@@ -312,6 +425,16 @@ | |||
312 | #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2) | 425 | #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2) |
313 | #error "Need at least GCC 4.2 or newer" | 426 | #error "Need at least GCC 4.2 or newer" |
314 | #endif | 427 | #endif |
428 | #elif LJ_TARGET_ARM64 | ||
429 | #if __clang__ | ||
430 | #if ((__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5)) && !defined(__NX_TOOLCHAIN_MAJOR__) | ||
431 | #error "Need at least Clang 3.5 or newer" | ||
432 | #endif | ||
433 | #else | ||
434 | #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 8) | ||
435 | #error "Need at least GCC 4.8 or newer" | ||
436 | #endif | ||
437 | #endif | ||
315 | #elif !LJ_TARGET_PS3 | 438 | #elif !LJ_TARGET_PS3 |
316 | #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 3) | 439 | #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 3) |
317 | #error "Need at least GCC 4.3 or newer" | 440 | #error "Need at least GCC 4.3 or newer" |
@@ -335,22 +458,29 @@ | |||
335 | #if !(__ARM_EABI__ || LJ_TARGET_IOS) | 458 | #if !(__ARM_EABI__ || LJ_TARGET_IOS) |
336 | #error "Only ARM EABI or iOS 3.0+ ABI is supported" | 459 | #error "Only ARM EABI or iOS 3.0+ ABI is supported" |
337 | #endif | 460 | #endif |
338 | #elif LJ_TARGET_PPC || LJ_TARGET_PPCSPE | 461 | #elif LJ_TARGET_ARM64 |
339 | #if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE) | 462 | #if defined(_ILP32) |
340 | #error "No support for PowerPC CPUs without double-precision FPU" | 463 | #error "No support for ILP32 model on ARM64" |
341 | #endif | 464 | #endif |
465 | #elif LJ_TARGET_PPC | ||
342 | #if defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN)) | 466 | #if defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN)) |
343 | #error "No support for little-endian PowerPC" | 467 | #error "No support for little-endian PPC32" |
468 | #endif | ||
469 | #if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT) | ||
470 | #error "No support for PPC/e500 anymore (use LuaJIT 2.0)" | ||
344 | #endif | 471 | #endif |
345 | #if defined(_LP64) | 472 | #elif LJ_TARGET_MIPS32 |
346 | #error "No support for PowerPC 64 bit mode" | 473 | #if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32)) |
474 | #error "Only o32 ABI supported for MIPS32" | ||
347 | #endif | 475 | #endif |
348 | #elif LJ_TARGET_MIPS | 476 | #if LJ_TARGET_MIPSR6 |
349 | #if defined(__mips_soft_float) | 477 | /* Not that useful, since most available r6 CPUs are 64 bit. */ |
350 | #error "No support for MIPS CPUs without FPU" | 478 | #error "No support for MIPS32R6" |
351 | #endif | 479 | #endif |
352 | #if defined(_LP64) | 480 | #elif LJ_TARGET_MIPS64 |
353 | #error "No support for MIPS64" | 481 | #if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) || (defined(_ABI64) && _MIPS_SIM == _ABI64)) |
482 | /* MIPS32ON64 aka n32 ABI support might be desirable, but difficult. */ | ||
483 | #error "Only n64 ABI supported for MIPS64" | ||
354 | #endif | 484 | #endif |
355 | #endif | 485 | #endif |
356 | #endif | 486 | #endif |
@@ -376,6 +506,20 @@ | |||
376 | #endif | 506 | #endif |
377 | #endif | 507 | #endif |
378 | 508 | ||
509 | /* 64 bit GC references. */ | ||
510 | #if LJ_TARGET_GC64 | ||
511 | #define LJ_GC64 1 | ||
512 | #else | ||
513 | #define LJ_GC64 0 | ||
514 | #endif | ||
515 | |||
516 | /* 2-slot frame info. */ | ||
517 | #if LJ_GC64 | ||
518 | #define LJ_FR2 1 | ||
519 | #else | ||
520 | #define LJ_FR2 0 | ||
521 | #endif | ||
522 | |||
379 | /* Disable or enable the JIT compiler. */ | 523 | /* Disable or enable the JIT compiler. */ |
380 | #if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) | 524 | #if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) |
381 | #define LJ_HASJIT 0 | 525 | #define LJ_HASJIT 0 |
@@ -390,6 +534,21 @@ | |||
390 | #define LJ_HASFFI 1 | 534 | #define LJ_HASFFI 1 |
391 | #endif | 535 | #endif |
392 | 536 | ||
537 | #if defined(LUAJIT_DISABLE_PROFILE) | ||
538 | #define LJ_HASPROFILE 0 | ||
539 | #elif LJ_TARGET_POSIX | ||
540 | #define LJ_HASPROFILE 1 | ||
541 | #define LJ_PROFILE_SIGPROF 1 | ||
542 | #elif LJ_TARGET_PS3 | ||
543 | #define LJ_HASPROFILE 1 | ||
544 | #define LJ_PROFILE_PTHREAD 1 | ||
545 | #elif LJ_TARGET_WINDOWS || LJ_TARGET_XBOX360 | ||
546 | #define LJ_HASPROFILE 1 | ||
547 | #define LJ_PROFILE_WTHREAD 1 | ||
548 | #else | ||
549 | #define LJ_HASPROFILE 0 | ||
550 | #endif | ||
551 | |||
393 | #ifndef LJ_ARCH_HASFPU | 552 | #ifndef LJ_ARCH_HASFPU |
394 | #define LJ_ARCH_HASFPU 1 | 553 | #define LJ_ARCH_HASFPU 1 |
395 | #endif | 554 | #endif |
@@ -397,6 +556,7 @@ | |||
397 | #define LJ_ABI_SOFTFP 0 | 556 | #define LJ_ABI_SOFTFP 0 |
398 | #endif | 557 | #endif |
399 | #define LJ_SOFTFP (!LJ_ARCH_HASFPU) | 558 | #define LJ_SOFTFP (!LJ_ARCH_HASFPU) |
559 | #define LJ_SOFTFP32 (LJ_SOFTFP && LJ_32) | ||
400 | 560 | ||
401 | #if LJ_ARCH_ENDIAN == LUAJIT_BE | 561 | #if LJ_ARCH_ENDIAN == LUAJIT_BE |
402 | #define LJ_LE 0 | 562 | #define LJ_LE 0 |
@@ -422,11 +582,11 @@ | |||
422 | #define LJ_TARGET_UNALIGNED 0 | 582 | #define LJ_TARGET_UNALIGNED 0 |
423 | #endif | 583 | #endif |
424 | 584 | ||
425 | /* Various workarounds for embedded operating systems. */ | 585 | /* Various workarounds for embedded operating systems or weak C runtimes. */ |
426 | #if (defined(__ANDROID__) && !defined(LJ_TARGET_X86ORX64)) || defined(__symbian__) || LJ_TARGET_XBOX360 | 586 | #if defined(__ANDROID__) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS |
427 | #define LUAJIT_NO_LOG2 | 587 | #define LUAJIT_NO_LOG2 |
428 | #endif | 588 | #endif |
429 | #if defined(__symbian__) | 589 | #if defined(__symbian__) || LJ_TARGET_WINDOWS |
430 | #define LUAJIT_NO_EXP2 | 590 | #define LUAJIT_NO_EXP2 |
431 | #endif | 591 | #endif |
432 | #if LJ_TARGET_CONSOLE || (LJ_TARGET_IOS && __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0) | 592 | #if LJ_TARGET_CONSOLE || (LJ_TARGET_IOS && __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0) |
@@ -442,6 +602,18 @@ | |||
442 | #define LJ_NO_UNWIND 1 | 602 | #define LJ_NO_UNWIND 1 |
443 | #endif | 603 | #endif |
444 | 604 | ||
605 | #if LJ_TARGET_WINDOWS | ||
606 | #if LJ_TARGET_UWP | ||
607 | #define LJ_WIN_VALLOC VirtualAllocFromApp | ||
608 | #define LJ_WIN_VPROTECT VirtualProtectFromApp | ||
609 | extern void *LJ_WIN_LOADLIBA(const char *path); | ||
610 | #else | ||
611 | #define LJ_WIN_VALLOC VirtualAlloc | ||
612 | #define LJ_WIN_VPROTECT VirtualProtect | ||
613 | #define LJ_WIN_LOADLIBA(path) LoadLibraryExA((path), NULL, 0) | ||
614 | #endif | ||
615 | #endif | ||
616 | |||
445 | /* Compatibility with Lua 5.1 vs. 5.2. */ | 617 | /* Compatibility with Lua 5.1 vs. 5.2. */ |
446 | #ifdef LUAJIT_ENABLE_LUA52COMPAT | 618 | #ifdef LUAJIT_ENABLE_LUA52COMPAT |
447 | #define LJ_52 1 | 619 | #define LJ_52 1 |
diff --git a/src/lj_asm.c b/src/lj_asm.c index 9b17421e..68d28fb0 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -90,7 +90,7 @@ typedef struct ASMState { | |||
90 | MCode *realign; /* Realign loop if not NULL. */ | 90 | MCode *realign; /* Realign loop if not NULL. */ |
91 | 91 | ||
92 | #ifdef RID_NUM_KREF | 92 | #ifdef RID_NUM_KREF |
93 | int32_t krefk[RID_NUM_KREF]; | 93 | intptr_t krefk[RID_NUM_KREF]; |
94 | #endif | 94 | #endif |
95 | IRRef1 phireg[RID_MAX]; /* PHI register references. */ | 95 | IRRef1 phireg[RID_MAX]; /* PHI register references. */ |
96 | uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */ | 96 | uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */ |
@@ -143,7 +143,7 @@ static LJ_AINLINE void checkmclim(ASMState *as) | |||
143 | #define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref))) | 143 | #define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref))) |
144 | #define ra_krefk(as, ref) (as->krefk[(ref)]) | 144 | #define ra_krefk(as, ref) (as->krefk[(ref)]) |
145 | 145 | ||
146 | static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, int32_t k) | 146 | static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, intptr_t k) |
147 | { | 147 | { |
148 | IRRef ref = (IRRef)(r - RID_MIN_KREF); | 148 | IRRef ref = (IRRef)(r - RID_MIN_KREF); |
149 | as->krefk[ref] = k; | 149 | as->krefk[ref] = k; |
@@ -170,6 +170,8 @@ IRFLDEF(FLOFS) | |||
170 | #include "lj_emit_x86.h" | 170 | #include "lj_emit_x86.h" |
171 | #elif LJ_TARGET_ARM | 171 | #elif LJ_TARGET_ARM |
172 | #include "lj_emit_arm.h" | 172 | #include "lj_emit_arm.h" |
173 | #elif LJ_TARGET_ARM64 | ||
174 | #include "lj_emit_arm64.h" | ||
173 | #elif LJ_TARGET_PPC | 175 | #elif LJ_TARGET_PPC |
174 | #include "lj_emit_ppc.h" | 176 | #include "lj_emit_ppc.h" |
175 | #elif LJ_TARGET_MIPS | 177 | #elif LJ_TARGET_MIPS |
@@ -178,6 +180,12 @@ IRFLDEF(FLOFS) | |||
178 | #error "Missing instruction emitter for target CPU" | 180 | #error "Missing instruction emitter for target CPU" |
179 | #endif | 181 | #endif |
180 | 182 | ||
183 | /* Generic load/store of register from/to stack slot. */ | ||
184 | #define emit_spload(as, ir, r, ofs) \ | ||
185 | emit_loadofs(as, ir, (r), RID_SP, (ofs)) | ||
186 | #define emit_spstore(as, ir, r, ofs) \ | ||
187 | emit_storeofs(as, ir, (r), RID_SP, (ofs)) | ||
188 | |||
181 | /* -- Register allocator debugging ---------------------------------------- */ | 189 | /* -- Register allocator debugging ---------------------------------------- */ |
182 | 190 | ||
183 | /* #define LUAJIT_DEBUG_RA */ | 191 | /* #define LUAJIT_DEBUG_RA */ |
@@ -315,7 +323,11 @@ static Reg ra_rematk(ASMState *as, IRRef ref) | |||
315 | lua_assert(!rset_test(as->freeset, r)); | 323 | lua_assert(!rset_test(as->freeset, r)); |
316 | ra_free(as, r); | 324 | ra_free(as, r); |
317 | ra_modified(as, r); | 325 | ra_modified(as, r); |
326 | #if LJ_64 | ||
327 | emit_loadu64(as, r, ra_krefk(as, ref)); | ||
328 | #else | ||
318 | emit_loadi(as, r, ra_krefk(as, ref)); | 329 | emit_loadi(as, r, ra_krefk(as, ref)); |
330 | #endif | ||
319 | return r; | 331 | return r; |
320 | } | 332 | } |
321 | ir = IR(ref); | 333 | ir = IR(ref); |
@@ -325,9 +337,9 @@ static Reg ra_rematk(ASMState *as, IRRef ref) | |||
325 | ra_modified(as, r); | 337 | ra_modified(as, r); |
326 | ir->r = RID_INIT; /* Do not keep any hint. */ | 338 | ir->r = RID_INIT; /* Do not keep any hint. */ |
327 | RA_DBGX((as, "remat $i $r", ir, r)); | 339 | RA_DBGX((as, "remat $i $r", ir, r)); |
328 | #if !LJ_SOFTFP | 340 | #if !LJ_SOFTFP32 |
329 | if (ir->o == IR_KNUM) { | 341 | if (ir->o == IR_KNUM) { |
330 | emit_loadn(as, r, ir_knum(ir)); | 342 | emit_loadk64(as, r, ir); |
331 | } else | 343 | } else |
332 | #endif | 344 | #endif |
333 | if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { | 345 | if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { |
@@ -335,10 +347,16 @@ static Reg ra_rematk(ASMState *as, IRRef ref) | |||
335 | emit_getgl(as, r, jit_base); | 347 | emit_getgl(as, r, jit_base); |
336 | } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { | 348 | } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { |
337 | lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */ | 349 | lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */ |
338 | emit_getgl(as, r, jit_L); | 350 | emit_getgl(as, r, cur_L); |
339 | #if LJ_64 | 351 | #if LJ_64 |
340 | } else if (ir->o == IR_KINT64) { | 352 | } else if (ir->o == IR_KINT64) { |
341 | emit_loadu64(as, r, ir_kint64(ir)->u64); | 353 | emit_loadu64(as, r, ir_kint64(ir)->u64); |
354 | #if LJ_GC64 | ||
355 | } else if (ir->o == IR_KGC) { | ||
356 | emit_loadu64(as, r, (uintptr_t)ir_kgc(ir)); | ||
357 | } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { | ||
358 | emit_loadu64(as, r, (uintptr_t)ir_kptr(ir)); | ||
359 | #endif | ||
342 | #endif | 360 | #endif |
343 | } else { | 361 | } else { |
344 | lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || | 362 | lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || |
@@ -511,7 +529,7 @@ static void ra_evictk(ASMState *as) | |||
511 | 529 | ||
512 | #ifdef RID_NUM_KREF | 530 | #ifdef RID_NUM_KREF |
513 | /* Allocate a register for a constant. */ | 531 | /* Allocate a register for a constant. */ |
514 | static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) | 532 | static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow) |
515 | { | 533 | { |
516 | /* First try to find a register which already holds the same constant. */ | 534 | /* First try to find a register which already holds the same constant. */ |
517 | RegSet pick, work = ~as->freeset & RSET_GPR; | 535 | RegSet pick, work = ~as->freeset & RSET_GPR; |
@@ -520,9 +538,31 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) | |||
520 | IRRef ref; | 538 | IRRef ref; |
521 | r = rset_pickbot(work); | 539 | r = rset_pickbot(work); |
522 | ref = regcost_ref(as->cost[r]); | 540 | ref = regcost_ref(as->cost[r]); |
541 | #if LJ_64 | ||
542 | if (ref < ASMREF_L) { | ||
543 | if (ra_iskref(ref)) { | ||
544 | if (k == ra_krefk(as, ref)) | ||
545 | return r; | ||
546 | } else { | ||
547 | IRIns *ir = IR(ref); | ||
548 | if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) || | ||
549 | #if LJ_GC64 | ||
550 | (ir->o == IR_KINT && k == ir->i) || | ||
551 | (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) || | ||
552 | ((ir->o == IR_KPTR || ir->o == IR_KKPTR) && | ||
553 | k == (intptr_t)ir_kptr(ir)) | ||
554 | #else | ||
555 | (ir->o != IR_KINT64 && k == ir->i) | ||
556 | #endif | ||
557 | ) | ||
558 | return r; | ||
559 | } | ||
560 | } | ||
561 | #else | ||
523 | if (ref < ASMREF_L && | 562 | if (ref < ASMREF_L && |
524 | k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i)) | 563 | k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i)) |
525 | return r; | 564 | return r; |
565 | #endif | ||
526 | rset_clear(work, r); | 566 | rset_clear(work, r); |
527 | } | 567 | } |
528 | pick = as->freeset & allow; | 568 | pick = as->freeset & allow; |
@@ -542,7 +582,7 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) | |||
542 | } | 582 | } |
543 | 583 | ||
544 | /* Allocate a specific register for a constant. */ | 584 | /* Allocate a specific register for a constant. */ |
545 | static void ra_allockreg(ASMState *as, int32_t k, Reg r) | 585 | static void ra_allockreg(ASMState *as, intptr_t k, Reg r) |
546 | { | 586 | { |
547 | Reg kr = ra_allock(as, k, RID2RSET(r)); | 587 | Reg kr = ra_allock(as, k, RID2RSET(r)); |
548 | if (kr != r) { | 588 | if (kr != r) { |
@@ -612,10 +652,20 @@ static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow) | |||
612 | return r; | 652 | return r; |
613 | } | 653 | } |
614 | 654 | ||
655 | /* Add a register rename to the IR. */ | ||
656 | static void ra_addrename(ASMState *as, Reg down, IRRef ref, SnapNo snapno) | ||
657 | { | ||
658 | IRRef ren; | ||
659 | lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, snapno); | ||
660 | ren = tref_ref(lj_ir_emit(as->J)); | ||
661 | as->J->cur.ir[ren].r = (uint8_t)down; | ||
662 | as->J->cur.ir[ren].s = SPS_NONE; | ||
663 | } | ||
664 | |||
615 | /* Rename register allocation and emit move. */ | 665 | /* Rename register allocation and emit move. */ |
616 | static void ra_rename(ASMState *as, Reg down, Reg up) | 666 | static void ra_rename(ASMState *as, Reg down, Reg up) |
617 | { | 667 | { |
618 | IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]); | 668 | IRRef ref = regcost_ref(as->cost[up] = as->cost[down]); |
619 | IRIns *ir = IR(ref); | 669 | IRIns *ir = IR(ref); |
620 | ir->r = (uint8_t)up; | 670 | ir->r = (uint8_t)up; |
621 | as->cost[down] = 0; | 671 | as->cost[down] = 0; |
@@ -628,11 +678,7 @@ static void ra_rename(ASMState *as, Reg down, Reg up) | |||
628 | RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); | 678 | RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); |
629 | emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ | 679 | emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ |
630 | if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ | 680 | if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ |
631 | lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno); | 681 | ra_addrename(as, down, ref, as->snapno); |
632 | ren = tref_ref(lj_ir_emit(as->J)); | ||
633 | as->ir = as->T->ir; /* The IR may have been reallocated. */ | ||
634 | IR(ren)->r = (uint8_t)down; | ||
635 | IR(ren)->s = SPS_NONE; | ||
636 | } | 682 | } |
637 | } | 683 | } |
638 | 684 | ||
@@ -682,18 +728,22 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref) | |||
682 | if (ra_noreg(left)) { | 728 | if (ra_noreg(left)) { |
683 | if (irref_isk(lref)) { | 729 | if (irref_isk(lref)) { |
684 | if (ir->o == IR_KNUM) { | 730 | if (ir->o == IR_KNUM) { |
685 | cTValue *tv = ir_knum(ir); | ||
686 | /* FP remat needs a load except for +0. Still better than eviction. */ | 731 | /* FP remat needs a load except for +0. Still better than eviction. */ |
687 | if (tvispzero(tv) || !(as->freeset & RSET_FPR)) { | 732 | if (tvispzero(ir_knum(ir)) || !(as->freeset & RSET_FPR)) { |
688 | emit_loadn(as, dest, tv); | 733 | emit_loadk64(as, dest, ir); |
689 | return; | 734 | return; |
690 | } | 735 | } |
691 | #if LJ_64 | 736 | #if LJ_64 |
692 | } else if (ir->o == IR_KINT64) { | 737 | } else if (ir->o == IR_KINT64) { |
693 | emit_loadu64(as, dest, ir_kint64(ir)->u64); | 738 | emit_loadk64(as, dest, ir); |
739 | return; | ||
740 | #if LJ_GC64 | ||
741 | } else if (ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR) { | ||
742 | emit_loadk64(as, dest, ir); | ||
694 | return; | 743 | return; |
695 | #endif | 744 | #endif |
696 | } else { | 745 | #endif |
746 | } else if (ir->o != IR_KPRI) { | ||
697 | lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || | 747 | lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || |
698 | ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); | 748 | ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); |
699 | emit_loadi(as, dest, ir->i); | 749 | emit_loadi(as, dest, ir->i); |
@@ -934,7 +984,7 @@ static void asm_snap_prep(ASMState *as) | |||
934 | } else { | 984 | } else { |
935 | /* Process any renames above the highwater mark. */ | 985 | /* Process any renames above the highwater mark. */ |
936 | for (; as->snaprename < as->T->nins; as->snaprename++) { | 986 | for (; as->snaprename < as->T->nins; as->snaprename++) { |
937 | IRIns *ir = IR(as->snaprename); | 987 | IRIns *ir = &as->T->ir[as->snaprename]; |
938 | if (asm_snap_checkrename(as, ir->op1)) | 988 | if (asm_snap_checkrename(as, ir->op1)) |
939 | ir->op2 = REF_BIAS-1; /* Kill rename. */ | 989 | ir->op2 = REF_BIAS-1; /* Kill rename. */ |
940 | } | 990 | } |
@@ -943,44 +993,6 @@ static void asm_snap_prep(ASMState *as) | |||
943 | 993 | ||
944 | /* -- Miscellaneous helpers ----------------------------------------------- */ | 994 | /* -- Miscellaneous helpers ----------------------------------------------- */ |
945 | 995 | ||
946 | /* Collect arguments from CALL* and CARG instructions. */ | ||
947 | static void asm_collectargs(ASMState *as, IRIns *ir, | ||
948 | const CCallInfo *ci, IRRef *args) | ||
949 | { | ||
950 | uint32_t n = CCI_NARGS(ci); | ||
951 | lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */ | ||
952 | if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; } | ||
953 | while (n-- > 1) { | ||
954 | ir = IR(ir->op1); | ||
955 | lua_assert(ir->o == IR_CARG); | ||
956 | args[n] = ir->op2 == REF_NIL ? 0 : ir->op2; | ||
957 | } | ||
958 | args[0] = ir->op1 == REF_NIL ? 0 : ir->op1; | ||
959 | lua_assert(IR(ir->op1)->o != IR_CARG); | ||
960 | } | ||
961 | |||
962 | /* Reconstruct CCallInfo flags for CALLX*. */ | ||
963 | static uint32_t asm_callx_flags(ASMState *as, IRIns *ir) | ||
964 | { | ||
965 | uint32_t nargs = 0; | ||
966 | if (ir->op1 != REF_NIL) { /* Count number of arguments first. */ | ||
967 | IRIns *ira = IR(ir->op1); | ||
968 | nargs++; | ||
969 | while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); } | ||
970 | } | ||
971 | #if LJ_HASFFI | ||
972 | if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */ | ||
973 | CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i; | ||
974 | CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id); | ||
975 | nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0); | ||
976 | #if LJ_TARGET_X86 | ||
977 | nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT); | ||
978 | #endif | ||
979 | } | ||
980 | #endif | ||
981 | return (nargs | (ir->t.irt << CCI_OTSHIFT)); | ||
982 | } | ||
983 | |||
984 | /* Calculate stack adjustment. */ | 996 | /* Calculate stack adjustment. */ |
985 | static int32_t asm_stack_adjust(ASMState *as) | 997 | static int32_t asm_stack_adjust(ASMState *as) |
986 | { | 998 | { |
@@ -1004,7 +1016,11 @@ static uint32_t ir_khash(IRIns *ir) | |||
1004 | } else { | 1016 | } else { |
1005 | lua_assert(irt_isgcv(ir->t)); | 1017 | lua_assert(irt_isgcv(ir->t)); |
1006 | lo = u32ptr(ir_kgc(ir)); | 1018 | lo = u32ptr(ir_kgc(ir)); |
1019 | #if LJ_GC64 | ||
1020 | hi = (uint32_t)(u64ptr(ir_kgc(ir)) >> 32) | (irt_toitype(ir->t) << 15); | ||
1021 | #else | ||
1007 | hi = lo + HASH_BIAS; | 1022 | hi = lo + HASH_BIAS; |
1023 | #endif | ||
1008 | } | 1024 | } |
1009 | return hashrot(lo, hi); | 1025 | return hashrot(lo, hi); |
1010 | } | 1026 | } |
@@ -1065,6 +1081,259 @@ static void asm_gcstep(ASMState *as, IRIns *ir) | |||
1065 | as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */ | 1081 | as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */ |
1066 | } | 1082 | } |
1067 | 1083 | ||
1084 | /* -- Buffer operations --------------------------------------------------- */ | ||
1085 | |||
1086 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref); | ||
1087 | |||
1088 | static void asm_bufhdr(ASMState *as, IRIns *ir) | ||
1089 | { | ||
1090 | Reg sb = ra_dest(as, ir, RSET_GPR); | ||
1091 | if ((ir->op2 & IRBUFHDR_APPEND)) { | ||
1092 | /* Rematerialize const buffer pointer instead of likely spill. */ | ||
1093 | IRIns *irp = IR(ir->op1); | ||
1094 | if (!(ra_hasreg(irp->r) || irp == ir-1 || | ||
1095 | (irp == ir-2 && !ra_used(ir-1)))) { | ||
1096 | while (!(irp->o == IR_BUFHDR && !(irp->op2 & IRBUFHDR_APPEND))) | ||
1097 | irp = IR(irp->op1); | ||
1098 | if (irref_isk(irp->op1)) { | ||
1099 | ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR)); | ||
1100 | ir = irp; | ||
1101 | } | ||
1102 | } | ||
1103 | } else { | ||
1104 | Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); | ||
1105 | /* Passing ir isn't strictly correct, but it's an IRT_PGC, too. */ | ||
1106 | emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p)); | ||
1107 | emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b)); | ||
1108 | } | ||
1109 | #if LJ_TARGET_X86ORX64 | ||
1110 | ra_left(as, sb, ir->op1); | ||
1111 | #else | ||
1112 | ra_leftov(as, sb, ir->op1); | ||
1113 | #endif | ||
1114 | } | ||
1115 | |||
1116 | static void asm_bufput(ASMState *as, IRIns *ir) | ||
1117 | { | ||
1118 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr]; | ||
1119 | IRRef args[3]; | ||
1120 | IRIns *irs; | ||
1121 | int kchar = -129; | ||
1122 | args[0] = ir->op1; /* SBuf * */ | ||
1123 | args[1] = ir->op2; /* GCstr * */ | ||
1124 | irs = IR(ir->op2); | ||
1125 | lua_assert(irt_isstr(irs->t)); | ||
1126 | if (irs->o == IR_KGC) { | ||
1127 | GCstr *s = ir_kstr(irs); | ||
1128 | if (s->len == 1) { /* Optimize put of single-char string constant. */ | ||
1129 | kchar = (int8_t)strdata(s)[0]; /* Signed! */ | ||
1130 | args[1] = ASMREF_TMP1; /* int, truncated to char */ | ||
1131 | ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar]; | ||
1132 | } | ||
1133 | } else if (mayfuse(as, ir->op2) && ra_noreg(irs->r)) { | ||
1134 | if (irs->o == IR_TOSTR) { /* Fuse number to string conversions. */ | ||
1135 | if (irs->op2 == IRTOSTR_NUM) { | ||
1136 | args[1] = ASMREF_TMP1; /* TValue * */ | ||
1137 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum]; | ||
1138 | } else { | ||
1139 | lua_assert(irt_isinteger(IR(irs->op1)->t)); | ||
1140 | args[1] = irs->op1; /* int */ | ||
1141 | if (irs->op2 == IRTOSTR_INT) | ||
1142 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint]; | ||
1143 | else | ||
1144 | ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar]; | ||
1145 | } | ||
1146 | } else if (irs->o == IR_SNEW) { /* Fuse string allocation. */ | ||
1147 | args[1] = irs->op1; /* const void * */ | ||
1148 | args[2] = irs->op2; /* MSize */ | ||
1149 | ci = &lj_ir_callinfo[IRCALL_lj_buf_putmem]; | ||
1150 | } | ||
1151 | } | ||
1152 | asm_setupresult(as, ir, ci); /* SBuf * */ | ||
1153 | asm_gencall(as, ci, args); | ||
1154 | if (args[1] == ASMREF_TMP1) { | ||
1155 | Reg tmp = ra_releasetmp(as, ASMREF_TMP1); | ||
1156 | if (kchar == -129) | ||
1157 | asm_tvptr(as, tmp, irs->op1); | ||
1158 | else | ||
1159 | ra_allockreg(as, kchar, tmp); | ||
1160 | } | ||
1161 | } | ||
1162 | |||
1163 | static void asm_bufstr(ASMState *as, IRIns *ir) | ||
1164 | { | ||
1165 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr]; | ||
1166 | IRRef args[1]; | ||
1167 | args[0] = ir->op1; /* SBuf *sb */ | ||
1168 | as->gcsteps++; | ||
1169 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
1170 | asm_gencall(as, ci, args); | ||
1171 | } | ||
1172 | |||
1173 | /* -- Type conversions ---------------------------------------------------- */ | ||
1174 | |||
1175 | static void asm_tostr(ASMState *as, IRIns *ir) | ||
1176 | { | ||
1177 | const CCallInfo *ci; | ||
1178 | IRRef args[2]; | ||
1179 | args[0] = ASMREF_L; | ||
1180 | as->gcsteps++; | ||
1181 | if (ir->op2 == IRTOSTR_NUM) { | ||
1182 | args[1] = ASMREF_TMP1; /* cTValue * */ | ||
1183 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_num]; | ||
1184 | } else { | ||
1185 | args[1] = ir->op1; /* int32_t k */ | ||
1186 | if (ir->op2 == IRTOSTR_INT) | ||
1187 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_int]; | ||
1188 | else | ||
1189 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_char]; | ||
1190 | } | ||
1191 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
1192 | asm_gencall(as, ci, args); | ||
1193 | if (ir->op2 == IRTOSTR_NUM) | ||
1194 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1); | ||
1195 | } | ||
1196 | |||
1197 | #if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86 | ||
1198 | static void asm_conv64(ASMState *as, IRIns *ir) | ||
1199 | { | ||
1200 | IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); | ||
1201 | IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); | ||
1202 | IRCallID id; | ||
1203 | IRRef args[2]; | ||
1204 | lua_assert((ir-1)->o == IR_CONV && ir->o == IR_HIOP); | ||
1205 | args[LJ_BE] = (ir-1)->op1; | ||
1206 | args[LJ_LE] = ir->op1; | ||
1207 | if (st == IRT_NUM || st == IRT_FLOAT) { | ||
1208 | id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64); | ||
1209 | ir--; | ||
1210 | } else { | ||
1211 | id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64); | ||
1212 | } | ||
1213 | { | ||
1214 | #if LJ_TARGET_ARM && !LJ_ABI_SOFTFP | ||
1215 | CCallInfo cim = lj_ir_callinfo[id], *ci = &cim; | ||
1216 | cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */ | ||
1217 | #else | ||
1218 | const CCallInfo *ci = &lj_ir_callinfo[id]; | ||
1219 | #endif | ||
1220 | asm_setupresult(as, ir, ci); | ||
1221 | asm_gencall(as, ci, args); | ||
1222 | } | ||
1223 | } | ||
1224 | #endif | ||
1225 | |||
1226 | /* -- Memory references --------------------------------------------------- */ | ||
1227 | |||
1228 | static void asm_newref(ASMState *as, IRIns *ir) | ||
1229 | { | ||
1230 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; | ||
1231 | IRRef args[3]; | ||
1232 | if (ir->r == RID_SINK) | ||
1233 | return; | ||
1234 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1235 | args[1] = ir->op1; /* GCtab *t */ | ||
1236 | args[2] = ASMREF_TMP1; /* cTValue *key */ | ||
1237 | asm_setupresult(as, ir, ci); /* TValue * */ | ||
1238 | asm_gencall(as, ci, args); | ||
1239 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2); | ||
1240 | } | ||
1241 | |||
1242 | static void asm_lref(ASMState *as, IRIns *ir) | ||
1243 | { | ||
1244 | Reg r = ra_dest(as, ir, RSET_GPR); | ||
1245 | #if LJ_TARGET_X86ORX64 | ||
1246 | ra_left(as, r, ASMREF_L); | ||
1247 | #else | ||
1248 | ra_leftov(as, r, ASMREF_L); | ||
1249 | #endif | ||
1250 | } | ||
1251 | |||
1252 | /* -- Calls --------------------------------------------------------------- */ | ||
1253 | |||
1254 | /* Collect arguments from CALL* and CARG instructions. */ | ||
1255 | static void asm_collectargs(ASMState *as, IRIns *ir, | ||
1256 | const CCallInfo *ci, IRRef *args) | ||
1257 | { | ||
1258 | uint32_t n = CCI_XNARGS(ci); | ||
1259 | lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */ | ||
1260 | if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; } | ||
1261 | while (n-- > 1) { | ||
1262 | ir = IR(ir->op1); | ||
1263 | lua_assert(ir->o == IR_CARG); | ||
1264 | args[n] = ir->op2 == REF_NIL ? 0 : ir->op2; | ||
1265 | } | ||
1266 | args[0] = ir->op1 == REF_NIL ? 0 : ir->op1; | ||
1267 | lua_assert(IR(ir->op1)->o != IR_CARG); | ||
1268 | } | ||
1269 | |||
1270 | /* Reconstruct CCallInfo flags for CALLX*. */ | ||
1271 | static uint32_t asm_callx_flags(ASMState *as, IRIns *ir) | ||
1272 | { | ||
1273 | uint32_t nargs = 0; | ||
1274 | if (ir->op1 != REF_NIL) { /* Count number of arguments first. */ | ||
1275 | IRIns *ira = IR(ir->op1); | ||
1276 | nargs++; | ||
1277 | while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); } | ||
1278 | } | ||
1279 | #if LJ_HASFFI | ||
1280 | if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */ | ||
1281 | CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i; | ||
1282 | CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id); | ||
1283 | nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0); | ||
1284 | #if LJ_TARGET_X86 | ||
1285 | nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT); | ||
1286 | #endif | ||
1287 | } | ||
1288 | #endif | ||
1289 | return (nargs | (ir->t.irt << CCI_OTSHIFT)); | ||
1290 | } | ||
1291 | |||
1292 | static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) | ||
1293 | { | ||
1294 | const CCallInfo *ci = &lj_ir_callinfo[id]; | ||
1295 | IRRef args[2]; | ||
1296 | args[0] = ir->op1; | ||
1297 | args[1] = ir->op2; | ||
1298 | asm_setupresult(as, ir, ci); | ||
1299 | asm_gencall(as, ci, args); | ||
1300 | } | ||
1301 | |||
1302 | static void asm_call(ASMState *as, IRIns *ir) | ||
1303 | { | ||
1304 | IRRef args[CCI_NARGS_MAX]; | ||
1305 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | ||
1306 | asm_collectargs(as, ir, ci, args); | ||
1307 | asm_setupresult(as, ir, ci); | ||
1308 | asm_gencall(as, ci, args); | ||
1309 | } | ||
1310 | |||
1311 | #if !LJ_SOFTFP32 | ||
1312 | static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref) | ||
1313 | { | ||
1314 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; | ||
1315 | IRRef args[2]; | ||
1316 | args[0] = lref; | ||
1317 | args[1] = rref; | ||
1318 | asm_setupresult(as, ir, ci); | ||
1319 | asm_gencall(as, ci, args); | ||
1320 | } | ||
1321 | |||
1322 | static int asm_fpjoin_pow(ASMState *as, IRIns *ir) | ||
1323 | { | ||
1324 | IRIns *irp = IR(ir->op1); | ||
1325 | if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { | ||
1326 | IRIns *irpp = IR(irp->op1); | ||
1327 | if (irpp == ir-2 && irpp->o == IR_FPMATH && | ||
1328 | irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { | ||
1329 | asm_fppow(as, ir, irpp->op1, irp->op2); | ||
1330 | return 1; | ||
1331 | } | ||
1332 | } | ||
1333 | return 0; | ||
1334 | } | ||
1335 | #endif | ||
1336 | |||
1068 | /* -- PHI and loop handling ----------------------------------------------- */ | 1337 | /* -- PHI and loop handling ----------------------------------------------- */ |
1069 | 1338 | ||
1070 | /* Break a PHI cycle by renaming to a free register (evict if needed). */ | 1339 | /* Break a PHI cycle by renaming to a free register (evict if needed). */ |
@@ -1250,12 +1519,7 @@ static void asm_phi_fixup(ASMState *as) | |||
1250 | irt_clearmark(ir->t); | 1519 | irt_clearmark(ir->t); |
1251 | /* Left PHI gained a spill slot before the loop? */ | 1520 | /* Left PHI gained a spill slot before the loop? */ |
1252 | if (ra_hasspill(ir->s)) { | 1521 | if (ra_hasspill(ir->s)) { |
1253 | IRRef ren; | 1522 | ra_addrename(as, r, lref, as->loopsnapno); |
1254 | lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno); | ||
1255 | ren = tref_ref(lj_ir_emit(as->J)); | ||
1256 | as->ir = as->T->ir; /* The IR may have been reallocated. */ | ||
1257 | IR(ren)->r = (uint8_t)r; | ||
1258 | IR(ren)->s = SPS_NONE; | ||
1259 | } | 1523 | } |
1260 | } | 1524 | } |
1261 | rset_clear(work, r); | 1525 | rset_clear(work, r); |
@@ -1330,6 +1594,8 @@ static void asm_loop(ASMState *as) | |||
1330 | #include "lj_asm_x86.h" | 1594 | #include "lj_asm_x86.h" |
1331 | #elif LJ_TARGET_ARM | 1595 | #elif LJ_TARGET_ARM |
1332 | #include "lj_asm_arm.h" | 1596 | #include "lj_asm_arm.h" |
1597 | #elif LJ_TARGET_ARM64 | ||
1598 | #include "lj_asm_arm64.h" | ||
1333 | #elif LJ_TARGET_PPC | 1599 | #elif LJ_TARGET_PPC |
1334 | #include "lj_asm_ppc.h" | 1600 | #include "lj_asm_ppc.h" |
1335 | #elif LJ_TARGET_MIPS | 1601 | #elif LJ_TARGET_MIPS |
@@ -1338,6 +1604,136 @@ static void asm_loop(ASMState *as) | |||
1338 | #error "Missing assembler for target CPU" | 1604 | #error "Missing assembler for target CPU" |
1339 | #endif | 1605 | #endif |
1340 | 1606 | ||
1607 | /* -- Instruction dispatch ------------------------------------------------ */ | ||
1608 | |||
1609 | /* Assemble a single instruction. */ | ||
1610 | static void asm_ir(ASMState *as, IRIns *ir) | ||
1611 | { | ||
1612 | switch ((IROp)ir->o) { | ||
1613 | /* Miscellaneous ops. */ | ||
1614 | case IR_LOOP: asm_loop(as); break; | ||
1615 | case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; | ||
1616 | case IR_USE: | ||
1617 | ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; | ||
1618 | case IR_PHI: asm_phi(as, ir); break; | ||
1619 | case IR_HIOP: asm_hiop(as, ir); break; | ||
1620 | case IR_GCSTEP: asm_gcstep(as, ir); break; | ||
1621 | case IR_PROF: asm_prof(as, ir); break; | ||
1622 | |||
1623 | /* Guarded assertions. */ | ||
1624 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: | ||
1625 | case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: | ||
1626 | case IR_ABC: | ||
1627 | asm_comp(as, ir); | ||
1628 | break; | ||
1629 | case IR_EQ: case IR_NE: | ||
1630 | if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) { | ||
1631 | as->curins--; | ||
1632 | asm_href(as, ir-1, (IROp)ir->o); | ||
1633 | } else { | ||
1634 | asm_equal(as, ir); | ||
1635 | } | ||
1636 | break; | ||
1637 | |||
1638 | case IR_RETF: asm_retf(as, ir); break; | ||
1639 | |||
1640 | /* Bit ops. */ | ||
1641 | case IR_BNOT: asm_bnot(as, ir); break; | ||
1642 | case IR_BSWAP: asm_bswap(as, ir); break; | ||
1643 | case IR_BAND: asm_band(as, ir); break; | ||
1644 | case IR_BOR: asm_bor(as, ir); break; | ||
1645 | case IR_BXOR: asm_bxor(as, ir); break; | ||
1646 | case IR_BSHL: asm_bshl(as, ir); break; | ||
1647 | case IR_BSHR: asm_bshr(as, ir); break; | ||
1648 | case IR_BSAR: asm_bsar(as, ir); break; | ||
1649 | case IR_BROL: asm_brol(as, ir); break; | ||
1650 | case IR_BROR: asm_bror(as, ir); break; | ||
1651 | |||
1652 | /* Arithmetic ops. */ | ||
1653 | case IR_ADD: asm_add(as, ir); break; | ||
1654 | case IR_SUB: asm_sub(as, ir); break; | ||
1655 | case IR_MUL: asm_mul(as, ir); break; | ||
1656 | case IR_MOD: asm_mod(as, ir); break; | ||
1657 | case IR_NEG: asm_neg(as, ir); break; | ||
1658 | #if LJ_SOFTFP32 | ||
1659 | case IR_DIV: case IR_POW: case IR_ABS: | ||
1660 | case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT: | ||
1661 | lua_assert(0); /* Unused for LJ_SOFTFP32. */ | ||
1662 | break; | ||
1663 | #else | ||
1664 | case IR_DIV: asm_div(as, ir); break; | ||
1665 | case IR_POW: asm_pow(as, ir); break; | ||
1666 | case IR_ABS: asm_abs(as, ir); break; | ||
1667 | case IR_ATAN2: asm_atan2(as, ir); break; | ||
1668 | case IR_LDEXP: asm_ldexp(as, ir); break; | ||
1669 | case IR_FPMATH: asm_fpmath(as, ir); break; | ||
1670 | case IR_TOBIT: asm_tobit(as, ir); break; | ||
1671 | #endif | ||
1672 | case IR_MIN: asm_min(as, ir); break; | ||
1673 | case IR_MAX: asm_max(as, ir); break; | ||
1674 | |||
1675 | /* Overflow-checking arithmetic ops. */ | ||
1676 | case IR_ADDOV: asm_addov(as, ir); break; | ||
1677 | case IR_SUBOV: asm_subov(as, ir); break; | ||
1678 | case IR_MULOV: asm_mulov(as, ir); break; | ||
1679 | |||
1680 | /* Memory references. */ | ||
1681 | case IR_AREF: asm_aref(as, ir); break; | ||
1682 | case IR_HREF: asm_href(as, ir, 0); break; | ||
1683 | case IR_HREFK: asm_hrefk(as, ir); break; | ||
1684 | case IR_NEWREF: asm_newref(as, ir); break; | ||
1685 | case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; | ||
1686 | case IR_FREF: asm_fref(as, ir); break; | ||
1687 | case IR_STRREF: asm_strref(as, ir); break; | ||
1688 | case IR_LREF: asm_lref(as, ir); break; | ||
1689 | |||
1690 | /* Loads and stores. */ | ||
1691 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | ||
1692 | asm_ahuvload(as, ir); | ||
1693 | break; | ||
1694 | case IR_FLOAD: asm_fload(as, ir); break; | ||
1695 | case IR_XLOAD: asm_xload(as, ir); break; | ||
1696 | case IR_SLOAD: asm_sload(as, ir); break; | ||
1697 | |||
1698 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; | ||
1699 | case IR_FSTORE: asm_fstore(as, ir); break; | ||
1700 | case IR_XSTORE: asm_xstore(as, ir); break; | ||
1701 | |||
1702 | /* Allocations. */ | ||
1703 | case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; | ||
1704 | case IR_TNEW: asm_tnew(as, ir); break; | ||
1705 | case IR_TDUP: asm_tdup(as, ir); break; | ||
1706 | case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; | ||
1707 | |||
1708 | /* Buffer operations. */ | ||
1709 | case IR_BUFHDR: asm_bufhdr(as, ir); break; | ||
1710 | case IR_BUFPUT: asm_bufput(as, ir); break; | ||
1711 | case IR_BUFSTR: asm_bufstr(as, ir); break; | ||
1712 | |||
1713 | /* Write barriers. */ | ||
1714 | case IR_TBAR: asm_tbar(as, ir); break; | ||
1715 | case IR_OBAR: asm_obar(as, ir); break; | ||
1716 | |||
1717 | /* Type conversions. */ | ||
1718 | case IR_CONV: asm_conv(as, ir); break; | ||
1719 | case IR_TOSTR: asm_tostr(as, ir); break; | ||
1720 | case IR_STRTO: asm_strto(as, ir); break; | ||
1721 | |||
1722 | /* Calls. */ | ||
1723 | case IR_CALLA: | ||
1724 | as->gcsteps++; | ||
1725 | /* fallthrough */ | ||
1726 | case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; | ||
1727 | case IR_CALLXS: asm_callx(as, ir); break; | ||
1728 | case IR_CARG: break; | ||
1729 | |||
1730 | default: | ||
1731 | setintV(&as->J->errinfo, ir->o); | ||
1732 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
1733 | break; | ||
1734 | } | ||
1735 | } | ||
1736 | |||
1341 | /* -- Head of trace ------------------------------------------------------- */ | 1737 | /* -- Head of trace ------------------------------------------------------- */ |
1342 | 1738 | ||
1343 | /* Head of a root trace. */ | 1739 | /* Head of a root trace. */ |
@@ -1536,7 +1932,7 @@ static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe) | |||
1536 | SnapEntry sn = map[n-1]; | 1932 | SnapEntry sn = map[n-1]; |
1537 | if ((sn & SNAP_FRAME)) { | 1933 | if ((sn & SNAP_FRAME)) { |
1538 | *gotframe = 1; | 1934 | *gotframe = 1; |
1539 | return snap_slot(sn); | 1935 | return snap_slot(sn) - LJ_FR2; |
1540 | } | 1936 | } |
1541 | } | 1937 | } |
1542 | return 0; | 1938 | return 0; |
@@ -1556,19 +1952,23 @@ static void asm_tail_link(ASMState *as) | |||
1556 | 1952 | ||
1557 | if (as->T->link == 0) { | 1953 | if (as->T->link == 0) { |
1558 | /* Setup fixed registers for exit to interpreter. */ | 1954 | /* Setup fixed registers for exit to interpreter. */ |
1559 | const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]); | 1955 | const BCIns *pc = snap_pc(&as->T->snapmap[snap->mapofs + snap->nent]); |
1560 | int32_t mres; | 1956 | int32_t mres; |
1561 | if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */ | 1957 | if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */ |
1562 | BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins; | 1958 | BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins; |
1563 | if (bc_isret(bc_op(*retpc))) | 1959 | if (bc_isret(bc_op(*retpc))) |
1564 | pc = retpc; | 1960 | pc = retpc; |
1565 | } | 1961 | } |
1962 | #if LJ_GC64 | ||
1963 | emit_loadu64(as, RID_LPC, u64ptr(pc)); | ||
1964 | #else | ||
1566 | ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); | 1965 | ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); |
1567 | ra_allockreg(as, i32ptr(pc), RID_LPC); | 1966 | ra_allockreg(as, i32ptr(pc), RID_LPC); |
1568 | mres = (int32_t)(snap->nslots - baseslot); | 1967 | #endif |
1968 | mres = (int32_t)(snap->nslots - baseslot - LJ_FR2); | ||
1569 | switch (bc_op(*pc)) { | 1969 | switch (bc_op(*pc)) { |
1570 | case BC_CALLM: case BC_CALLMT: | 1970 | case BC_CALLM: case BC_CALLMT: |
1571 | mres -= (int32_t)(1 + bc_a(*pc) + bc_c(*pc)); break; | 1971 | mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break; |
1572 | case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break; | 1972 | case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break; |
1573 | case BC_TSETM: mres -= (int32_t)bc_a(*pc); break; | 1973 | case BC_TSETM: mres -= (int32_t)bc_a(*pc); break; |
1574 | default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break; | 1974 | default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break; |
@@ -1580,6 +1980,11 @@ static void asm_tail_link(ASMState *as) | |||
1580 | } | 1980 | } |
1581 | emit_addptr(as, RID_BASE, 8*(int32_t)baseslot); | 1981 | emit_addptr(as, RID_BASE, 8*(int32_t)baseslot); |
1582 | 1982 | ||
1983 | if (as->J->ktrace) { /* Patch ktrace slot with the final GCtrace pointer. */ | ||
1984 | setgcref(IR(as->J->ktrace)[LJ_GC64].gcr, obj2gco(as->J->curfinal)); | ||
1985 | IR(as->J->ktrace)->o = IR_KGC; | ||
1986 | } | ||
1987 | |||
1583 | /* Sync the interpreter state with the on-trace state. */ | 1988 | /* Sync the interpreter state with the on-trace state. */ |
1584 | asm_stack_restore(as, snap); | 1989 | asm_stack_restore(as, snap); |
1585 | 1990 | ||
@@ -1605,17 +2010,23 @@ static void asm_setup_regsp(ASMState *as) | |||
1605 | ra_setup(as); | 2010 | ra_setup(as); |
1606 | 2011 | ||
1607 | /* Clear reg/sp for constants. */ | 2012 | /* Clear reg/sp for constants. */ |
1608 | for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) | 2013 | for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) { |
1609 | ir->prev = REGSP_INIT; | 2014 | ir->prev = REGSP_INIT; |
2015 | if (irt_is64(ir->t) && ir->o != IR_KNULL) { | ||
2016 | #if LJ_GC64 | ||
2017 | /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */ | ||
2018 | ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */ | ||
2019 | #else | ||
2020 | /* Make life easier for backends by putting address of constant in i. */ | ||
2021 | ir->i = (int32_t)(intptr_t)(ir+1); | ||
2022 | #endif | ||
2023 | ir++; | ||
2024 | } | ||
2025 | } | ||
1610 | 2026 | ||
1611 | /* REF_BASE is used for implicit references to the BASE register. */ | 2027 | /* REF_BASE is used for implicit references to the BASE register. */ |
1612 | lastir->prev = REGSP_HINT(RID_BASE); | 2028 | lastir->prev = REGSP_HINT(RID_BASE); |
1613 | 2029 | ||
1614 | ir = IR(nins-1); | ||
1615 | if (ir->o == IR_RENAME) { | ||
1616 | do { ir--; nins--; } while (ir->o == IR_RENAME); | ||
1617 | T->nins = nins; /* Remove any renames left over from ASM restart. */ | ||
1618 | } | ||
1619 | as->snaprename = nins; | 2030 | as->snaprename = nins; |
1620 | as->snapref = nins; | 2031 | as->snapref = nins; |
1621 | as->snapno = T->nsnap; | 2032 | as->snapno = T->nsnap; |
@@ -1676,7 +2087,7 @@ static void asm_setup_regsp(ASMState *as) | |||
1676 | as->modset |= RSET_SCRATCH; | 2087 | as->modset |= RSET_SCRATCH; |
1677 | continue; | 2088 | continue; |
1678 | } | 2089 | } |
1679 | case IR_CALLN: case IR_CALLL: case IR_CALLS: { | 2090 | case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: { |
1680 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | 2091 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; |
1681 | ir->prev = asm_setup_call_slots(as, ir, ci); | 2092 | ir->prev = asm_setup_call_slots(as, ir, ci); |
1682 | if (inloop) | 2093 | if (inloop) |
@@ -1701,8 +2112,8 @@ static void asm_setup_regsp(ASMState *as) | |||
1701 | ir->prev = REGSP_HINT(RID_FPRET); | 2112 | ir->prev = REGSP_HINT(RID_FPRET); |
1702 | continue; | 2113 | continue; |
1703 | } | 2114 | } |
1704 | /* fallthrough */ | ||
1705 | #endif | 2115 | #endif |
2116 | /* fallthrough */ | ||
1706 | case IR_CALLN: case IR_CALLXS: | 2117 | case IR_CALLN: case IR_CALLXS: |
1707 | #if LJ_SOFTFP | 2118 | #if LJ_SOFTFP |
1708 | case IR_MIN: case IR_MAX: | 2119 | case IR_MIN: case IR_MAX: |
@@ -1721,11 +2132,23 @@ static void asm_setup_regsp(ASMState *as) | |||
1721 | #endif | 2132 | #endif |
1722 | /* fallthrough */ | 2133 | /* fallthrough */ |
1723 | /* C calls evict all scratch regs and return results in RID_RET. */ | 2134 | /* C calls evict all scratch regs and return results in RID_RET. */ |
1724 | case IR_SNEW: case IR_XSNEW: case IR_NEWREF: | 2135 | case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT: |
1725 | if (REGARG_NUMGPR < 3 && as->evenspill < 3) | 2136 | if (REGARG_NUMGPR < 3 && as->evenspill < 3) |
1726 | as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ | 2137 | as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ |
2138 | #if LJ_TARGET_X86 && LJ_HASFFI | ||
2139 | if (0) { | ||
2140 | case IR_CNEW: | ||
2141 | if (ir->op2 != REF_NIL && as->evenspill < 4) | ||
2142 | as->evenspill = 4; /* lj_cdata_newv needs 4 args. */ | ||
2143 | } | ||
2144 | /* fallthrough */ | ||
2145 | #else | ||
2146 | /* fallthrough */ | ||
2147 | case IR_CNEW: | ||
2148 | #endif | ||
1727 | /* fallthrough */ | 2149 | /* fallthrough */ |
1728 | case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR: | 2150 | case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR: |
2151 | case IR_BUFSTR: | ||
1729 | ir->prev = REGSP_HINT(RID_RET); | 2152 | ir->prev = REGSP_HINT(RID_RET); |
1730 | if (inloop) | 2153 | if (inloop) |
1731 | as->modset = RSET_SCRATCH; | 2154 | as->modset = RSET_SCRATCH; |
@@ -1734,21 +2157,27 @@ static void asm_setup_regsp(ASMState *as) | |||
1734 | if (inloop) | 2157 | if (inloop) |
1735 | as->modset = RSET_SCRATCH; | 2158 | as->modset = RSET_SCRATCH; |
1736 | break; | 2159 | break; |
1737 | #if !LJ_TARGET_X86ORX64 && !LJ_SOFTFP | 2160 | #if !LJ_SOFTFP |
1738 | case IR_ATAN2: case IR_LDEXP: | 2161 | case IR_ATAN2: |
2162 | #if LJ_TARGET_X86 | ||
2163 | if (as->evenspill < 4) /* Leave room to call atan2(). */ | ||
2164 | as->evenspill = 4; | ||
2165 | #endif | ||
2166 | #if !LJ_TARGET_X86ORX64 | ||
2167 | case IR_LDEXP: | ||
2168 | #endif | ||
1739 | #endif | 2169 | #endif |
2170 | /* fallthrough */ | ||
1740 | case IR_POW: | 2171 | case IR_POW: |
1741 | if (!LJ_SOFTFP && irt_isnum(ir->t)) { | 2172 | if (!LJ_SOFTFP && irt_isnum(ir->t)) { |
1742 | #if LJ_TARGET_X86ORX64 | ||
1743 | ir->prev = REGSP_HINT(RID_XMM0); | ||
1744 | if (inloop) | 2173 | if (inloop) |
1745 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); | 2174 | as->modset |= RSET_SCRATCH; |
2175 | #if LJ_TARGET_X86 | ||
2176 | break; | ||
1746 | #else | 2177 | #else |
1747 | ir->prev = REGSP_HINT(RID_FPRET); | 2178 | ir->prev = REGSP_HINT(RID_FPRET); |
1748 | if (inloop) | ||
1749 | as->modset |= RSET_SCRATCH; | ||
1750 | #endif | ||
1751 | continue; | 2179 | continue; |
2180 | #endif | ||
1752 | } | 2181 | } |
1753 | /* fallthrough */ /* for integer POW */ | 2182 | /* fallthrough */ /* for integer POW */ |
1754 | case IR_DIV: case IR_MOD: | 2183 | case IR_DIV: case IR_MOD: |
@@ -1761,31 +2190,34 @@ static void asm_setup_regsp(ASMState *as) | |||
1761 | break; | 2190 | break; |
1762 | case IR_FPMATH: | 2191 | case IR_FPMATH: |
1763 | #if LJ_TARGET_X86ORX64 | 2192 | #if LJ_TARGET_X86ORX64 |
1764 | if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */ | 2193 | if (ir->op2 <= IRFPM_TRUNC) { |
1765 | ir->prev = REGSP_HINT(RID_XMM0); | 2194 | if (!(as->flags & JIT_F_SSE4_1)) { |
1766 | #if !LJ_64 | 2195 | ir->prev = REGSP_HINT(RID_XMM0); |
1767 | if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */ | 2196 | if (inloop) |
2197 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX); | ||
2198 | continue; | ||
2199 | } | ||
2200 | break; | ||
2201 | } else if (ir->op2 == IRFPM_EXP2 && !LJ_64) { | ||
2202 | if (as->evenspill < 4) /* Leave room to call pow(). */ | ||
1768 | as->evenspill = 4; | 2203 | as->evenspill = 4; |
1769 | #endif | ||
1770 | if (inloop) | ||
1771 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX); | ||
1772 | continue; | ||
1773 | } else if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) { | ||
1774 | ir->prev = REGSP_HINT(RID_XMM0); | ||
1775 | if (inloop) | ||
1776 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX); | ||
1777 | continue; | ||
1778 | } | 2204 | } |
2205 | #endif | ||
2206 | if (inloop) | ||
2207 | as->modset |= RSET_SCRATCH; | ||
2208 | #if LJ_TARGET_X86 | ||
1779 | break; | 2209 | break; |
1780 | #else | 2210 | #else |
1781 | ir->prev = REGSP_HINT(RID_FPRET); | 2211 | ir->prev = REGSP_HINT(RID_FPRET); |
1782 | if (inloop) | ||
1783 | as->modset |= RSET_SCRATCH; | ||
1784 | continue; | 2212 | continue; |
1785 | #endif | 2213 | #endif |
1786 | #if LJ_TARGET_X86ORX64 | 2214 | #if LJ_TARGET_X86ORX64 |
1787 | /* Non-constant shift counts need to be in RID_ECX on x86/x64. */ | 2215 | /* Non-constant shift counts need to be in RID_ECX on x86/x64. */ |
1788 | case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: | 2216 | case IR_BSHL: case IR_BSHR: case IR_BSAR: |
2217 | if ((as->flags & JIT_F_BMI2)) /* Except if BMI2 is available. */ | ||
2218 | break; | ||
2219 | /* fallthrough */ | ||
2220 | case IR_BROL: case IR_BROR: | ||
1789 | if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { | 2221 | if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { |
1790 | IR(ir->op2)->r = REGSP_HINT(RID_ECX); | 2222 | IR(ir->op2)->r = REGSP_HINT(RID_ECX); |
1791 | if (inloop) | 2223 | if (inloop) |
@@ -1831,14 +2263,25 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
1831 | ASMState *as = &as_; | 2263 | ASMState *as = &as_; |
1832 | MCode *origtop; | 2264 | MCode *origtop; |
1833 | 2265 | ||
2266 | /* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */ | ||
2267 | { | ||
2268 | IRRef nins = T->nins; | ||
2269 | IRIns *ir = &T->ir[nins-1]; | ||
2270 | if (ir->o == IR_NOP || ir->o == IR_RENAME) { | ||
2271 | do { ir--; nins--; } while (ir->o == IR_NOP || ir->o == IR_RENAME); | ||
2272 | T->nins = nins; | ||
2273 | } | ||
2274 | } | ||
2275 | |||
1834 | /* Ensure an initialized instruction beyond the last one for HIOP checks. */ | 2276 | /* Ensure an initialized instruction beyond the last one for HIOP checks. */ |
1835 | J->cur.nins = lj_ir_nextins(J); | 2277 | /* This also allows one RENAME to be added without reallocating curfinal. */ |
1836 | J->cur.ir[J->cur.nins].o = IR_NOP; | 2278 | as->orignins = lj_ir_nextins(J); |
2279 | J->cur.ir[as->orignins].o = IR_NOP; | ||
1837 | 2280 | ||
1838 | /* Setup initial state. Copy some fields to reduce indirections. */ | 2281 | /* Setup initial state. Copy some fields to reduce indirections. */ |
1839 | as->J = J; | 2282 | as->J = J; |
1840 | as->T = T; | 2283 | as->T = T; |
1841 | as->ir = T->ir; | 2284 | J->curfinal = lj_trace_alloc(J->L, T); /* This copies the IR, too. */ |
1842 | as->flags = J->flags; | 2285 | as->flags = J->flags; |
1843 | as->loopref = J->loopref; | 2286 | as->loopref = J->loopref; |
1844 | as->realign = NULL; | 2287 | as->realign = NULL; |
@@ -1851,12 +2294,41 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
1851 | as->mclim = as->mcbot + MCLIM_REDZONE; | 2294 | as->mclim = as->mcbot + MCLIM_REDZONE; |
1852 | asm_setup_target(as); | 2295 | asm_setup_target(as); |
1853 | 2296 | ||
1854 | do { | 2297 | /* |
2298 | ** This is a loop, because the MCode may have to be (re-)assembled | ||
2299 | ** multiple times: | ||
2300 | ** | ||
2301 | ** 1. as->realign is set (and the assembly aborted), if the arch-specific | ||
2302 | ** backend wants the MCode to be aligned differently. | ||
2303 | ** | ||
2304 | ** This is currently only the case on x86/x64, where small loops get | ||
2305 | ** an aligned loop body plus a short branch. Not much effort is wasted, | ||
2306 | ** because the abort happens very quickly and only once. | ||
2307 | ** | ||
2308 | ** 2. The IR is immovable, since the MCode embeds pointers to various | ||
2309 | ** constants inside the IR. But RENAMEs may need to be added to the IR | ||
2310 | ** during assembly, which might grow and reallocate the IR. We check | ||
2311 | ** at the end if the IR (in J->cur.ir) has actually grown, resize the | ||
2312 | ** copy (in J->curfinal.ir) and try again. | ||
2313 | ** | ||
2314 | ** 95% of all traces have zero RENAMEs, 3% have one RENAME, 1.5% have | ||
2315 | ** 2 RENAMEs and only 0.5% have more than that. That's why we opt to | ||
2316 | ** always have one spare slot in the IR (see above), which means we | ||
2317 | ** have to redo the assembly for only ~2% of all traces. | ||
2318 | ** | ||
2319 | ** Very, very rarely, this needs to be done repeatedly, since the | ||
2320 | ** location of constants inside the IR (actually, reachability from | ||
2321 | ** a global pointer) may affect register allocation and thus the | ||
2322 | ** number of RENAMEs. | ||
2323 | */ | ||
2324 | for (;;) { | ||
1855 | as->mcp = as->mctop; | 2325 | as->mcp = as->mctop; |
1856 | #ifdef LUA_USE_ASSERT | 2326 | #ifdef LUA_USE_ASSERT |
1857 | as->mcp_prev = as->mcp; | 2327 | as->mcp_prev = as->mcp; |
1858 | #endif | 2328 | #endif |
1859 | as->curins = T->nins; | 2329 | as->ir = J->curfinal->ir; /* Use the copied IR. */ |
2330 | as->curins = J->cur.nins = as->orignins; | ||
2331 | |||
1860 | RA_DBG_START(); | 2332 | RA_DBG_START(); |
1861 | RA_DBGX((as, "===== STOP =====")); | 2333 | RA_DBGX((as, "===== STOP =====")); |
1862 | 2334 | ||
@@ -1884,22 +2356,40 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
1884 | checkmclim(as); | 2356 | checkmclim(as); |
1885 | asm_ir(as, ir); | 2357 | asm_ir(as, ir); |
1886 | } | 2358 | } |
1887 | } while (as->realign); /* Retry in case the MCode needs to be realigned. */ | ||
1888 | 2359 | ||
1889 | /* Emit head of trace. */ | 2360 | if (as->realign && J->curfinal->nins >= T->nins) |
1890 | RA_DBG_REF(); | 2361 | continue; /* Retry in case only the MCode needs to be realigned. */ |
1891 | checkmclim(as); | 2362 | |
1892 | if (as->gcsteps > 0) { | 2363 | /* Emit head of trace. */ |
1893 | as->curins = as->T->snap[0].ref; | 2364 | RA_DBG_REF(); |
1894 | asm_snap_prep(as); /* The GC check is a guard. */ | 2365 | checkmclim(as); |
1895 | asm_gc_check(as); | 2366 | if (as->gcsteps > 0) { |
2367 | as->curins = as->T->snap[0].ref; | ||
2368 | asm_snap_prep(as); /* The GC check is a guard. */ | ||
2369 | asm_gc_check(as); | ||
2370 | as->curins = as->stopins; | ||
2371 | } | ||
2372 | ra_evictk(as); | ||
2373 | if (as->parent) | ||
2374 | asm_head_side(as); | ||
2375 | else | ||
2376 | asm_head_root(as); | ||
2377 | asm_phi_fixup(as); | ||
2378 | |||
2379 | if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */ | ||
2380 | lua_assert(J->curfinal->nk == T->nk); | ||
2381 | memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins, | ||
2382 | (T->nins - as->orignins) * sizeof(IRIns)); /* Copy RENAMEs. */ | ||
2383 | T->nins = J->curfinal->nins; | ||
2384 | break; /* Done. */ | ||
2385 | } | ||
2386 | |||
2387 | /* Otherwise try again with a bigger IR. */ | ||
2388 | lj_trace_free(J2G(J), J->curfinal); | ||
2389 | J->curfinal = NULL; /* In case lj_trace_alloc() OOMs. */ | ||
2390 | J->curfinal = lj_trace_alloc(J->L, T); | ||
2391 | as->realign = NULL; | ||
1896 | } | 2392 | } |
1897 | ra_evictk(as); | ||
1898 | if (as->parent) | ||
1899 | asm_head_side(as); | ||
1900 | else | ||
1901 | asm_head_root(as); | ||
1902 | asm_phi_fixup(as); | ||
1903 | 2393 | ||
1904 | RA_DBGX((as, "===== START ====")); | 2394 | RA_DBGX((as, "===== START ====")); |
1905 | RA_DBG_FLUSH(); | 2395 | RA_DBG_FLUSH(); |
@@ -1912,6 +2402,9 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
1912 | if (!as->loopref) | 2402 | if (!as->loopref) |
1913 | asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ | 2403 | asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ |
1914 | T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp); | 2404 | T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp); |
2405 | #if LJ_TARGET_MCODE_FIXUP | ||
2406 | asm_mcode_fixup(T->mcode, T->szmcode); | ||
2407 | #endif | ||
1915 | lj_mcode_sync(T->mcode, origtop); | 2408 | lj_mcode_sync(T->mcode, origtop); |
1916 | } | 2409 | } |
1917 | 2410 | ||
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index 087530b2..9d055c81 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h | |||
@@ -338,7 +338,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air) | |||
338 | /* Generate a call to a C function. */ | 338 | /* Generate a call to a C function. */ |
339 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | 339 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) |
340 | { | 340 | { |
341 | uint32_t n, nargs = CCI_NARGS(ci); | 341 | uint32_t n, nargs = CCI_XNARGS(ci); |
342 | int32_t ofs = 0; | 342 | int32_t ofs = 0; |
343 | #if LJ_SOFTFP | 343 | #if LJ_SOFTFP |
344 | Reg gpr = REGARG_FIRSTGPR; | 344 | Reg gpr = REGARG_FIRSTGPR; |
@@ -453,15 +453,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
453 | UNUSED(ci); | 453 | UNUSED(ci); |
454 | } | 454 | } |
455 | 455 | ||
456 | static void asm_call(ASMState *as, IRIns *ir) | ||
457 | { | ||
458 | IRRef args[CCI_NARGS_MAX]; | ||
459 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | ||
460 | asm_collectargs(as, ir, ci, args); | ||
461 | asm_setupresult(as, ir, ci); | ||
462 | asm_gencall(as, ci, args); | ||
463 | } | ||
464 | |||
465 | static void asm_callx(ASMState *as, IRIns *ir) | 456 | static void asm_callx(ASMState *as, IRIns *ir) |
466 | { | 457 | { |
467 | IRRef args[CCI_NARGS_MAX*2]; | 458 | IRRef args[CCI_NARGS_MAX*2]; |
@@ -490,7 +481,7 @@ static void asm_retf(ASMState *as, IRIns *ir) | |||
490 | { | 481 | { |
491 | Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); | 482 | Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); |
492 | void *pc = ir_kptr(IR(ir->op2)); | 483 | void *pc = ir_kptr(IR(ir->op2)); |
493 | int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); | 484 | int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); |
494 | as->topslot -= (BCReg)delta; | 485 | as->topslot -= (BCReg)delta; |
495 | if ((int32_t)as->topslot < 0) as->topslot = 0; | 486 | if ((int32_t)as->topslot < 0) as->topslot = 0; |
496 | irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ | 487 | irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ |
@@ -601,31 +592,6 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
601 | } | 592 | } |
602 | } | 593 | } |
603 | 594 | ||
604 | #if !LJ_SOFTFP && LJ_HASFFI | ||
605 | static void asm_conv64(ASMState *as, IRIns *ir) | ||
606 | { | ||
607 | IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); | ||
608 | IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); | ||
609 | IRCallID id; | ||
610 | CCallInfo ci; | ||
611 | IRRef args[2]; | ||
612 | args[0] = (ir-1)->op1; | ||
613 | args[1] = ir->op1; | ||
614 | if (st == IRT_NUM || st == IRT_FLOAT) { | ||
615 | id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64); | ||
616 | ir--; | ||
617 | } else { | ||
618 | id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64); | ||
619 | } | ||
620 | ci = lj_ir_callinfo[id]; | ||
621 | #if !LJ_ABI_SOFTFP | ||
622 | ci.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */ | ||
623 | #endif | ||
624 | asm_setupresult(as, ir, &ci); | ||
625 | asm_gencall(as, &ci, args); | ||
626 | } | ||
627 | #endif | ||
628 | |||
629 | static void asm_strto(ASMState *as, IRIns *ir) | 595 | static void asm_strto(ASMState *as, IRIns *ir) |
630 | { | 596 | { |
631 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; | 597 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; |
@@ -689,6 +655,8 @@ static void asm_strto(ASMState *as, IRIns *ir) | |||
689 | emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR); | 655 | emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR); |
690 | } | 656 | } |
691 | 657 | ||
658 | /* -- Memory references --------------------------------------------------- */ | ||
659 | |||
692 | /* Get pointer to TValue. */ | 660 | /* Get pointer to TValue. */ |
693 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) | 661 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) |
694 | { | 662 | { |
@@ -714,7 +682,7 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) | |||
714 | Reg src = ra_alloc1(as, ref, allow); | 682 | Reg src = ra_alloc1(as, ref, allow); |
715 | emit_lso(as, ARMI_STR, src, RID_SP, 0); | 683 | emit_lso(as, ARMI_STR, src, RID_SP, 0); |
716 | } | 684 | } |
717 | if ((ir+1)->o == IR_HIOP) | 685 | if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) |
718 | type = ra_alloc1(as, ref+1, allow); | 686 | type = ra_alloc1(as, ref+1, allow); |
719 | else | 687 | else |
720 | type = ra_allock(as, irt_toitype(ir->t), allow); | 688 | type = ra_allock(as, irt_toitype(ir->t), allow); |
@@ -722,27 +690,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) | |||
722 | } | 690 | } |
723 | } | 691 | } |
724 | 692 | ||
725 | static void asm_tostr(ASMState *as, IRIns *ir) | ||
726 | { | ||
727 | IRRef args[2]; | ||
728 | args[0] = ASMREF_L; | ||
729 | as->gcsteps++; | ||
730 | if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) { | ||
731 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; | ||
732 | args[1] = ASMREF_TMP1; /* const lua_Number * */ | ||
733 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
734 | asm_gencall(as, ci, args); | ||
735 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1); | ||
736 | } else { | ||
737 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; | ||
738 | args[1] = ir->op1; /* int32_t k */ | ||
739 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
740 | asm_gencall(as, ci, args); | ||
741 | } | ||
742 | } | ||
743 | |||
744 | /* -- Memory references --------------------------------------------------- */ | ||
745 | |||
746 | static void asm_aref(ASMState *as, IRIns *ir) | 693 | static void asm_aref(ASMState *as, IRIns *ir) |
747 | { | 694 | { |
748 | Reg dest = ra_dest(as, ir, RSET_GPR); | 695 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -960,20 +907,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
960 | emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR); | 907 | emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR); |
961 | } | 908 | } |
962 | 909 | ||
963 | static void asm_newref(ASMState *as, IRIns *ir) | ||
964 | { | ||
965 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; | ||
966 | IRRef args[3]; | ||
967 | if (ir->r == RID_SINK) | ||
968 | return; | ||
969 | args[0] = ASMREF_L; /* lua_State *L */ | ||
970 | args[1] = ir->op1; /* GCtab *t */ | ||
971 | args[2] = ASMREF_TMP1; /* cTValue *key */ | ||
972 | asm_setupresult(as, ir, ci); /* TValue * */ | ||
973 | asm_gencall(as, ci, args); | ||
974 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2); | ||
975 | } | ||
976 | |||
977 | static void asm_uref(ASMState *as, IRIns *ir) | 910 | static void asm_uref(ASMState *as, IRIns *ir) |
978 | { | 911 | { |
979 | Reg dest = ra_dest(as, ir, RSET_GPR); | 912 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -1064,22 +997,26 @@ static ARMIns asm_fxstoreins(IRIns *ir) | |||
1064 | 997 | ||
1065 | static void asm_fload(ASMState *as, IRIns *ir) | 998 | static void asm_fload(ASMState *as, IRIns *ir) |
1066 | { | 999 | { |
1067 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1000 | if (ir->op1 == REF_NIL) { |
1068 | Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); | 1001 | lua_assert(!ra_used(ir)); /* We can end up here if DCE is turned off. */ |
1069 | ARMIns ai = asm_fxloadins(ir); | 1002 | } else { |
1070 | int32_t ofs; | 1003 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1071 | if (ir->op2 == IRFL_TAB_ARRAY) { | 1004 | Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); |
1072 | ofs = asm_fuseabase(as, ir->op1); | 1005 | ARMIns ai = asm_fxloadins(ir); |
1073 | if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ | 1006 | int32_t ofs; |
1074 | emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx); | 1007 | if (ir->op2 == IRFL_TAB_ARRAY) { |
1075 | return; | 1008 | ofs = asm_fuseabase(as, ir->op1); |
1009 | if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ | ||
1010 | emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx); | ||
1011 | return; | ||
1012 | } | ||
1076 | } | 1013 | } |
1014 | ofs = field_ofs[ir->op2]; | ||
1015 | if ((ai & 0x04000000)) | ||
1016 | emit_lso(as, ai, dest, idx, ofs); | ||
1017 | else | ||
1018 | emit_lsox(as, ai, dest, idx, ofs); | ||
1077 | } | 1019 | } |
1078 | ofs = field_ofs[ir->op2]; | ||
1079 | if ((ai & 0x04000000)) | ||
1080 | emit_lso(as, ai, dest, idx, ofs); | ||
1081 | else | ||
1082 | emit_lsox(as, ai, dest, idx, ofs); | ||
1083 | } | 1020 | } |
1084 | 1021 | ||
1085 | static void asm_fstore(ASMState *as, IRIns *ir) | 1022 | static void asm_fstore(ASMState *as, IRIns *ir) |
@@ -1105,7 +1042,7 @@ static void asm_xload(ASMState *as, IRIns *ir) | |||
1105 | asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); | 1042 | asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); |
1106 | } | 1043 | } |
1107 | 1044 | ||
1108 | static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) | 1045 | static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) |
1109 | { | 1046 | { |
1110 | if (ir->r != RID_SINK) { | 1047 | if (ir->r != RID_SINK) { |
1111 | Reg src = ra_alloc1(as, ir->op2, | 1048 | Reg src = ra_alloc1(as, ir->op2, |
@@ -1115,6 +1052,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) | |||
1115 | } | 1052 | } |
1116 | } | 1053 | } |
1117 | 1054 | ||
1055 | #define asm_xstore(as, ir) asm_xstore_(as, ir, 0) | ||
1056 | |||
1118 | static void asm_ahuvload(ASMState *as, IRIns *ir) | 1057 | static void asm_ahuvload(ASMState *as, IRIns *ir) |
1119 | { | 1058 | { |
1120 | int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); | 1059 | int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); |
@@ -1272,19 +1211,16 @@ dotypecheck: | |||
1272 | static void asm_cnew(ASMState *as, IRIns *ir) | 1211 | static void asm_cnew(ASMState *as, IRIns *ir) |
1273 | { | 1212 | { |
1274 | CTState *cts = ctype_ctsG(J2G(as->J)); | 1213 | CTState *cts = ctype_ctsG(J2G(as->J)); |
1275 | CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; | 1214 | CTypeID id = (CTypeID)IR(ir->op1)->i; |
1276 | CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? | 1215 | CTSize sz; |
1277 | lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; | 1216 | CTInfo info = lj_ctype_info(cts, id, &sz); |
1278 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; | 1217 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; |
1279 | IRRef args[2]; | 1218 | IRRef args[4]; |
1280 | RegSet allow = (RSET_GPR & ~RSET_SCRATCH); | 1219 | RegSet allow = (RSET_GPR & ~RSET_SCRATCH); |
1281 | RegSet drop = RSET_SCRATCH; | 1220 | RegSet drop = RSET_SCRATCH; |
1282 | lua_assert(sz != CTSIZE_INVALID); | 1221 | lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); |
1283 | 1222 | ||
1284 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1285 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1286 | as->gcsteps++; | 1223 | as->gcsteps++; |
1287 | |||
1288 | if (ra_hasreg(ir->r)) | 1224 | if (ra_hasreg(ir->r)) |
1289 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | 1225 | rset_clear(drop, ir->r); /* Dest reg handled below. */ |
1290 | ra_evictset(as, drop); | 1226 | ra_evictset(as, drop); |
@@ -1306,16 +1242,28 @@ static void asm_cnew(ASMState *as, IRIns *ir) | |||
1306 | if (ofs == sizeof(GCcdata)) break; | 1242 | if (ofs == sizeof(GCcdata)) break; |
1307 | ofs -= 4; ir--; | 1243 | ofs -= 4; ir--; |
1308 | } | 1244 | } |
1245 | } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ | ||
1246 | ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; | ||
1247 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1248 | args[1] = ir->op1; /* CTypeID id */ | ||
1249 | args[2] = ir->op2; /* CTSize sz */ | ||
1250 | args[3] = ASMREF_TMP1; /* CTSize align */ | ||
1251 | asm_gencall(as, ci, args); | ||
1252 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); | ||
1253 | return; | ||
1309 | } | 1254 | } |
1255 | |||
1310 | /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ | 1256 | /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ |
1311 | { | 1257 | { |
1312 | uint32_t k = emit_isk12(ARMI_MOV, ctypeid); | 1258 | uint32_t k = emit_isk12(ARMI_MOV, id); |
1313 | Reg r = k ? RID_R1 : ra_allock(as, ctypeid, allow); | 1259 | Reg r = k ? RID_R1 : ra_allock(as, id, allow); |
1314 | emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); | 1260 | emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); |
1315 | emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); | 1261 | emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); |
1316 | emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP); | 1262 | emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP); |
1317 | if (k) emit_d(as, ARMI_MOV^k, RID_R1); | 1263 | if (k) emit_d(as, ARMI_MOV^k, RID_R1); |
1318 | } | 1264 | } |
1265 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1266 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1319 | asm_gencall(as, ci, args); | 1267 | asm_gencall(as, ci, args); |
1320 | ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), | 1268 | ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), |
1321 | ra_releasetmp(as, ASMREF_TMP1)); | 1269 | ra_releasetmp(as, ASMREF_TMP1)); |
@@ -1392,23 +1340,38 @@ static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai) | |||
1392 | emit_dm(as, ai, (dest & 15), (left & 15)); | 1340 | emit_dm(as, ai, (dest & 15), (left & 15)); |
1393 | } | 1341 | } |
1394 | 1342 | ||
1395 | static int asm_fpjoin_pow(ASMState *as, IRIns *ir) | 1343 | static void asm_callround(ASMState *as, IRIns *ir, int id) |
1396 | { | 1344 | { |
1397 | IRIns *irp = IR(ir->op1); | 1345 | /* The modified regs must match with the *.dasc implementation. */ |
1398 | if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { | 1346 | RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)| |
1399 | IRIns *irpp = IR(irp->op1); | 1347 | RID2RSET(RID_R3)|RID2RSET(RID_R12); |
1400 | if (irpp == ir-2 && irpp->o == IR_FPMATH && | 1348 | RegSet of; |
1401 | irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { | 1349 | Reg dest, src; |
1402 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; | 1350 | ra_evictset(as, drop); |
1403 | IRRef args[2]; | 1351 | dest = ra_dest(as, ir, RSET_FPR); |
1404 | args[0] = irpp->op1; | 1352 | emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15)); |
1405 | args[1] = irp->op2; | 1353 | emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf : |
1406 | asm_setupresult(as, ir, ci); | 1354 | id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf : |
1407 | asm_gencall(as, ci, args); | 1355 | (void *)lj_vm_trunc_sf); |
1408 | return 1; | 1356 | /* Workaround to protect argument GPRs from being used for remat. */ |
1409 | } | 1357 | of = as->freeset; |
1410 | } | 1358 | as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1); |
1411 | return 0; | 1359 | as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L); |
1360 | src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */ | ||
1361 | as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1)); | ||
1362 | emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15)); | ||
1363 | } | ||
1364 | |||
1365 | static void asm_fpmath(ASMState *as, IRIns *ir) | ||
1366 | { | ||
1367 | if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) | ||
1368 | return; | ||
1369 | if (ir->op2 <= IRFPM_TRUNC) | ||
1370 | asm_callround(as, ir, ir->op2); | ||
1371 | else if (ir->op2 == IRFPM_SQRT) | ||
1372 | asm_fpunary(as, ir, ARMI_VSQRT_D); | ||
1373 | else | ||
1374 | asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); | ||
1412 | } | 1375 | } |
1413 | #endif | 1376 | #endif |
1414 | 1377 | ||
@@ -1474,19 +1437,6 @@ static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai) | |||
1474 | asm_intop(as, ir, asm_drop_cmp0(as, ai)); | 1437 | asm_intop(as, ir, asm_drop_cmp0(as, ai)); |
1475 | } | 1438 | } |
1476 | 1439 | ||
1477 | static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai) | ||
1478 | { | ||
1479 | ai = asm_drop_cmp0(as, ai); | ||
1480 | if (ir->op2 == 0) { | ||
1481 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1482 | uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR); | ||
1483 | emit_d(as, ai^m, dest); | ||
1484 | } else { | ||
1485 | /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */ | ||
1486 | asm_intop(as, ir, ai); | ||
1487 | } | ||
1488 | } | ||
1489 | |||
1490 | static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai) | 1440 | static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai) |
1491 | { | 1441 | { |
1492 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1442 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -1552,6 +1502,20 @@ static void asm_mul(ASMState *as, IRIns *ir) | |||
1552 | asm_intmul(as, ir); | 1502 | asm_intmul(as, ir); |
1553 | } | 1503 | } |
1554 | 1504 | ||
1505 | #define asm_addov(as, ir) asm_add(as, ir) | ||
1506 | #define asm_subov(as, ir) asm_sub(as, ir) | ||
1507 | #define asm_mulov(as, ir) asm_mul(as, ir) | ||
1508 | |||
1509 | #if !LJ_SOFTFP | ||
1510 | #define asm_div(as, ir) asm_fparith(as, ir, ARMI_VDIV_D) | ||
1511 | #define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) | ||
1512 | #define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D) | ||
1513 | #define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) | ||
1514 | #define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) | ||
1515 | #endif | ||
1516 | |||
1517 | #define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi) | ||
1518 | |||
1555 | static void asm_neg(ASMState *as, IRIns *ir) | 1519 | static void asm_neg(ASMState *as, IRIns *ir) |
1556 | { | 1520 | { |
1557 | #if !LJ_SOFTFP | 1521 | #if !LJ_SOFTFP |
@@ -1563,41 +1527,22 @@ static void asm_neg(ASMState *as, IRIns *ir) | |||
1563 | asm_intneg(as, ir, ARMI_RSB); | 1527 | asm_intneg(as, ir, ARMI_RSB); |
1564 | } | 1528 | } |
1565 | 1529 | ||
1566 | static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) | 1530 | static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai) |
1567 | { | 1531 | { |
1568 | const CCallInfo *ci = &lj_ir_callinfo[id]; | 1532 | ai = asm_drop_cmp0(as, ai); |
1569 | IRRef args[2]; | 1533 | if (ir->op2 == 0) { |
1570 | args[0] = ir->op1; | 1534 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1571 | args[1] = ir->op2; | 1535 | uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR); |
1572 | asm_setupresult(as, ir, ci); | 1536 | emit_d(as, ai^m, dest); |
1573 | asm_gencall(as, ci, args); | 1537 | } else { |
1538 | /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */ | ||
1539 | asm_intop(as, ir, ai); | ||
1540 | } | ||
1574 | } | 1541 | } |
1575 | 1542 | ||
1576 | #if !LJ_SOFTFP | 1543 | #define asm_bnot(as, ir) asm_bitop(as, ir, ARMI_MVN) |
1577 | static void asm_callround(ASMState *as, IRIns *ir, int id) | ||
1578 | { | ||
1579 | /* The modified regs must match with the *.dasc implementation. */ | ||
1580 | RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)| | ||
1581 | RID2RSET(RID_R3)|RID2RSET(RID_R12); | ||
1582 | RegSet of; | ||
1583 | Reg dest, src; | ||
1584 | ra_evictset(as, drop); | ||
1585 | dest = ra_dest(as, ir, RSET_FPR); | ||
1586 | emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15)); | ||
1587 | emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf : | ||
1588 | id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf : | ||
1589 | (void *)lj_vm_trunc_sf); | ||
1590 | /* Workaround to protect argument GPRs from being used for remat. */ | ||
1591 | of = as->freeset; | ||
1592 | as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1); | ||
1593 | as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L); | ||
1594 | src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */ | ||
1595 | as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1)); | ||
1596 | emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15)); | ||
1597 | } | ||
1598 | #endif | ||
1599 | 1544 | ||
1600 | static void asm_bitswap(ASMState *as, IRIns *ir) | 1545 | static void asm_bswap(ASMState *as, IRIns *ir) |
1601 | { | 1546 | { |
1602 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1547 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1603 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); | 1548 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); |
@@ -1614,6 +1559,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir) | |||
1614 | } | 1559 | } |
1615 | } | 1560 | } |
1616 | 1561 | ||
1562 | #define asm_band(as, ir) asm_bitop(as, ir, ARMI_AND) | ||
1563 | #define asm_bor(as, ir) asm_bitop(as, ir, ARMI_ORR) | ||
1564 | #define asm_bxor(as, ir) asm_bitop(as, ir, ARMI_EOR) | ||
1565 | |||
1617 | static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) | 1566 | static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) |
1618 | { | 1567 | { |
1619 | if (irref_isk(ir->op2)) { /* Constant shifts. */ | 1568 | if (irref_isk(ir->op2)) { /* Constant shifts. */ |
@@ -1631,6 +1580,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) | |||
1631 | } | 1580 | } |
1632 | } | 1581 | } |
1633 | 1582 | ||
1583 | #define asm_bshl(as, ir) asm_bitshift(as, ir, ARMSH_LSL) | ||
1584 | #define asm_bshr(as, ir) asm_bitshift(as, ir, ARMSH_LSR) | ||
1585 | #define asm_bsar(as, ir) asm_bitshift(as, ir, ARMSH_ASR) | ||
1586 | #define asm_bror(as, ir) asm_bitshift(as, ir, ARMSH_ROR) | ||
1587 | #define asm_brol(as, ir) lua_assert(0) | ||
1588 | |||
1634 | static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) | 1589 | static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) |
1635 | { | 1590 | { |
1636 | uint32_t kcmp = 0, kmov = 0; | 1591 | uint32_t kcmp = 0, kmov = 0; |
@@ -1704,6 +1659,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc) | |||
1704 | asm_intmin_max(as, ir, cc); | 1659 | asm_intmin_max(as, ir, cc); |
1705 | } | 1660 | } |
1706 | 1661 | ||
1662 | #define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_HI) | ||
1663 | #define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LO) | ||
1664 | |||
1707 | /* -- Comparisons --------------------------------------------------------- */ | 1665 | /* -- Comparisons --------------------------------------------------------- */ |
1708 | 1666 | ||
1709 | /* Map of comparisons to flags. ORDER IR. */ | 1667 | /* Map of comparisons to flags. ORDER IR. */ |
@@ -1819,6 +1777,18 @@ notst: | |||
1819 | as->flagmcp = as->mcp; /* Allow elimination of the compare. */ | 1777 | as->flagmcp = as->mcp; /* Allow elimination of the compare. */ |
1820 | } | 1778 | } |
1821 | 1779 | ||
1780 | static void asm_comp(ASMState *as, IRIns *ir) | ||
1781 | { | ||
1782 | #if !LJ_SOFTFP | ||
1783 | if (irt_isnum(ir->t)) | ||
1784 | asm_fpcomp(as, ir); | ||
1785 | else | ||
1786 | #endif | ||
1787 | asm_intcomp(as, ir); | ||
1788 | } | ||
1789 | |||
1790 | #define asm_equal(as, ir) asm_comp(as, ir) | ||
1791 | |||
1822 | #if LJ_HASFFI | 1792 | #if LJ_HASFFI |
1823 | /* 64 bit integer comparisons. */ | 1793 | /* 64 bit integer comparisons. */ |
1824 | static void asm_int64comp(ASMState *as, IRIns *ir) | 1794 | static void asm_int64comp(ASMState *as, IRIns *ir) |
@@ -1893,7 +1863,7 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
1893 | #endif | 1863 | #endif |
1894 | } else if ((ir-1)->o == IR_XSTORE) { | 1864 | } else if ((ir-1)->o == IR_XSTORE) { |
1895 | if ((ir-1)->r != RID_SINK) | 1865 | if ((ir-1)->r != RID_SINK) |
1896 | asm_xstore(as, ir, 4); | 1866 | asm_xstore_(as, ir, 4); |
1897 | return; | 1867 | return; |
1898 | } | 1868 | } |
1899 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ | 1869 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ |
@@ -1941,6 +1911,16 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
1941 | #endif | 1911 | #endif |
1942 | } | 1912 | } |
1943 | 1913 | ||
1914 | /* -- Profiling ----------------------------------------------------------- */ | ||
1915 | |||
1916 | static void asm_prof(ASMState *as, IRIns *ir) | ||
1917 | { | ||
1918 | UNUSED(ir); | ||
1919 | asm_guardcc(as, CC_NE); | ||
1920 | emit_n(as, ARMI_TST|ARMI_K12|HOOK_PROFILE, RID_TMP); | ||
1921 | emit_lsptr(as, ARMI_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask); | ||
1922 | } | ||
1923 | |||
1944 | /* -- Stack handling ------------------------------------------------------ */ | 1924 | /* -- Stack handling ------------------------------------------------------ */ |
1945 | 1925 | ||
1946 | /* Check Lua stack size for overflow. Use exit handler as fallback. */ | 1926 | /* Check Lua stack size for overflow. Use exit handler as fallback. */ |
@@ -1970,7 +1950,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, | |||
1970 | emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, | 1950 | emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, |
1971 | (int32_t)offsetof(lua_State, maxstack)); | 1951 | (int32_t)offsetof(lua_State, maxstack)); |
1972 | if (irp) { /* Must not spill arbitrary registers in head of side trace. */ | 1952 | if (irp) { /* Must not spill arbitrary registers in head of side trace. */ |
1973 | int32_t i = i32ptr(&J2G(as->J)->jit_L); | 1953 | int32_t i = i32ptr(&J2G(as->J)->cur_L); |
1974 | if (ra_hasspill(irp->s)) | 1954 | if (ra_hasspill(irp->s)) |
1975 | emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s)); | 1955 | emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s)); |
1976 | emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095)); | 1956 | emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095)); |
@@ -1978,7 +1958,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, | |||
1978 | emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */ | 1958 | emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */ |
1979 | emit_loadi(as, RID_TMP, (i & ~4095)); | 1959 | emit_loadi(as, RID_TMP, (i & ~4095)); |
1980 | } else { | 1960 | } else { |
1981 | emit_getgl(as, RID_TMP, jit_L); | 1961 | emit_getgl(as, RID_TMP, cur_L); |
1982 | } | 1962 | } |
1983 | } | 1963 | } |
1984 | 1964 | ||
@@ -2087,13 +2067,13 @@ static void asm_loop_fixup(ASMState *as) | |||
2087 | 2067 | ||
2088 | /* -- Head of trace ------------------------------------------------------- */ | 2068 | /* -- Head of trace ------------------------------------------------------- */ |
2089 | 2069 | ||
2090 | /* Reload L register from g->jit_L. */ | 2070 | /* Reload L register from g->cur_L. */ |
2091 | static void asm_head_lreg(ASMState *as) | 2071 | static void asm_head_lreg(ASMState *as) |
2092 | { | 2072 | { |
2093 | IRIns *ir = IR(ASMREF_L); | 2073 | IRIns *ir = IR(ASMREF_L); |
2094 | if (ra_used(ir)) { | 2074 | if (ra_used(ir)) { |
2095 | Reg r = ra_dest(as, ir, RSET_GPR); | 2075 | Reg r = ra_dest(as, ir, RSET_GPR); |
2096 | emit_getgl(as, r, jit_L); | 2076 | emit_getgl(as, r, cur_L); |
2097 | ra_evictk(as); | 2077 | ra_evictk(as); |
2098 | } | 2078 | } |
2099 | } | 2079 | } |
@@ -2164,143 +2144,13 @@ static void asm_tail_prep(ASMState *as) | |||
2164 | *p = 0; /* Prevent load/store merging. */ | 2144 | *p = 0; /* Prevent load/store merging. */ |
2165 | } | 2145 | } |
2166 | 2146 | ||
2167 | /* -- Instruction dispatch ------------------------------------------------ */ | ||
2168 | |||
2169 | /* Assemble a single instruction. */ | ||
2170 | static void asm_ir(ASMState *as, IRIns *ir) | ||
2171 | { | ||
2172 | switch ((IROp)ir->o) { | ||
2173 | /* Miscellaneous ops. */ | ||
2174 | case IR_LOOP: asm_loop(as); break; | ||
2175 | case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; | ||
2176 | case IR_USE: | ||
2177 | ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; | ||
2178 | case IR_PHI: asm_phi(as, ir); break; | ||
2179 | case IR_HIOP: asm_hiop(as, ir); break; | ||
2180 | case IR_GCSTEP: asm_gcstep(as, ir); break; | ||
2181 | |||
2182 | /* Guarded assertions. */ | ||
2183 | case IR_EQ: case IR_NE: | ||
2184 | if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) { | ||
2185 | as->curins--; | ||
2186 | asm_href(as, ir-1, (IROp)ir->o); | ||
2187 | break; | ||
2188 | } | ||
2189 | /* fallthrough */ | ||
2190 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: | ||
2191 | case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: | ||
2192 | case IR_ABC: | ||
2193 | #if !LJ_SOFTFP | ||
2194 | if (irt_isnum(ir->t)) { asm_fpcomp(as, ir); break; } | ||
2195 | #endif | ||
2196 | asm_intcomp(as, ir); | ||
2197 | break; | ||
2198 | |||
2199 | case IR_RETF: asm_retf(as, ir); break; | ||
2200 | |||
2201 | /* Bit ops. */ | ||
2202 | case IR_BNOT: asm_bitop(as, ir, ARMI_MVN); break; | ||
2203 | case IR_BSWAP: asm_bitswap(as, ir); break; | ||
2204 | |||
2205 | case IR_BAND: asm_bitop(as, ir, ARMI_AND); break; | ||
2206 | case IR_BOR: asm_bitop(as, ir, ARMI_ORR); break; | ||
2207 | case IR_BXOR: asm_bitop(as, ir, ARMI_EOR); break; | ||
2208 | |||
2209 | case IR_BSHL: asm_bitshift(as, ir, ARMSH_LSL); break; | ||
2210 | case IR_BSHR: asm_bitshift(as, ir, ARMSH_LSR); break; | ||
2211 | case IR_BSAR: asm_bitshift(as, ir, ARMSH_ASR); break; | ||
2212 | case IR_BROR: asm_bitshift(as, ir, ARMSH_ROR); break; | ||
2213 | case IR_BROL: lua_assert(0); break; | ||
2214 | |||
2215 | /* Arithmetic ops. */ | ||
2216 | case IR_ADD: case IR_ADDOV: asm_add(as, ir); break; | ||
2217 | case IR_SUB: case IR_SUBOV: asm_sub(as, ir); break; | ||
2218 | case IR_MUL: case IR_MULOV: asm_mul(as, ir); break; | ||
2219 | case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break; | ||
2220 | case IR_NEG: asm_neg(as, ir); break; | ||
2221 | |||
2222 | #if LJ_SOFTFP | ||
2223 | case IR_DIV: case IR_POW: case IR_ABS: | ||
2224 | case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT: | ||
2225 | lua_assert(0); /* Unused for LJ_SOFTFP. */ | ||
2226 | break; | ||
2227 | #else | ||
2228 | case IR_DIV: asm_fparith(as, ir, ARMI_VDIV_D); break; | ||
2229 | case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break; | ||
2230 | case IR_ABS: asm_fpunary(as, ir, ARMI_VABS_D); break; | ||
2231 | case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break; | ||
2232 | case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break; | ||
2233 | case IR_FPMATH: | ||
2234 | if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) | ||
2235 | break; | ||
2236 | if (ir->op2 <= IRFPM_TRUNC) | ||
2237 | asm_callround(as, ir, ir->op2); | ||
2238 | else if (ir->op2 == IRFPM_SQRT) | ||
2239 | asm_fpunary(as, ir, ARMI_VSQRT_D); | ||
2240 | else | ||
2241 | asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); | ||
2242 | break; | ||
2243 | case IR_TOBIT: asm_tobit(as, ir); break; | ||
2244 | #endif | ||
2245 | |||
2246 | case IR_MIN: asm_min_max(as, ir, CC_GT, CC_HI); break; | ||
2247 | case IR_MAX: asm_min_max(as, ir, CC_LT, CC_LO); break; | ||
2248 | |||
2249 | /* Memory references. */ | ||
2250 | case IR_AREF: asm_aref(as, ir); break; | ||
2251 | case IR_HREF: asm_href(as, ir, 0); break; | ||
2252 | case IR_HREFK: asm_hrefk(as, ir); break; | ||
2253 | case IR_NEWREF: asm_newref(as, ir); break; | ||
2254 | case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; | ||
2255 | case IR_FREF: asm_fref(as, ir); break; | ||
2256 | case IR_STRREF: asm_strref(as, ir); break; | ||
2257 | |||
2258 | /* Loads and stores. */ | ||
2259 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | ||
2260 | asm_ahuvload(as, ir); | ||
2261 | break; | ||
2262 | case IR_FLOAD: asm_fload(as, ir); break; | ||
2263 | case IR_XLOAD: asm_xload(as, ir); break; | ||
2264 | case IR_SLOAD: asm_sload(as, ir); break; | ||
2265 | |||
2266 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; | ||
2267 | case IR_FSTORE: asm_fstore(as, ir); break; | ||
2268 | case IR_XSTORE: asm_xstore(as, ir, 0); break; | ||
2269 | |||
2270 | /* Allocations. */ | ||
2271 | case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; | ||
2272 | case IR_TNEW: asm_tnew(as, ir); break; | ||
2273 | case IR_TDUP: asm_tdup(as, ir); break; | ||
2274 | case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; | ||
2275 | |||
2276 | /* Write barriers. */ | ||
2277 | case IR_TBAR: asm_tbar(as, ir); break; | ||
2278 | case IR_OBAR: asm_obar(as, ir); break; | ||
2279 | |||
2280 | /* Type conversions. */ | ||
2281 | case IR_CONV: asm_conv(as, ir); break; | ||
2282 | case IR_TOSTR: asm_tostr(as, ir); break; | ||
2283 | case IR_STRTO: asm_strto(as, ir); break; | ||
2284 | |||
2285 | /* Calls. */ | ||
2286 | case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; | ||
2287 | case IR_CALLXS: asm_callx(as, ir); break; | ||
2288 | case IR_CARG: break; | ||
2289 | |||
2290 | default: | ||
2291 | setintV(&as->J->errinfo, ir->o); | ||
2292 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
2293 | break; | ||
2294 | } | ||
2295 | } | ||
2296 | |||
2297 | /* -- Trace setup --------------------------------------------------------- */ | 2147 | /* -- Trace setup --------------------------------------------------------- */ |
2298 | 2148 | ||
2299 | /* Ensure there are enough stack slots for call arguments. */ | 2149 | /* Ensure there are enough stack slots for call arguments. */ |
2300 | static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) | 2150 | static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) |
2301 | { | 2151 | { |
2302 | IRRef args[CCI_NARGS_MAX*2]; | 2152 | IRRef args[CCI_NARGS_MAX*2]; |
2303 | uint32_t i, nargs = (int)CCI_NARGS(ci); | 2153 | uint32_t i, nargs = CCI_XNARGS(ci); |
2304 | int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0; | 2154 | int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0; |
2305 | asm_collectargs(as, ir, ci, args); | 2155 | asm_collectargs(as, ir, ci, args); |
2306 | for (i = 0; i < nargs; i++) { | 2156 | for (i = 0; i < nargs; i++) { |
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h new file mode 100644 index 00000000..ce2100c9 --- /dev/null +++ b/src/lj_asm_arm64.h | |||
@@ -0,0 +1,2043 @@ | |||
1 | /* | ||
2 | ** ARM64 IR assembler (SSA IR -> machine code). | ||
3 | ** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** | ||
5 | ** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. | ||
6 | ** Sponsored by Cisco Systems, Inc. | ||
7 | */ | ||
8 | |||
9 | /* -- Register allocator extensions --------------------------------------- */ | ||
10 | |||
11 | /* Allocate a register with a hint. */ | ||
12 | static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) | ||
13 | { | ||
14 | Reg r = IR(ref)->r; | ||
15 | if (ra_noreg(r)) { | ||
16 | if (!ra_hashint(r) && !iscrossref(as, ref)) | ||
17 | ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ | ||
18 | r = ra_allocref(as, ref, allow); | ||
19 | } | ||
20 | ra_noweak(as, r); | ||
21 | return r; | ||
22 | } | ||
23 | |||
24 | /* Allocate two source registers for three-operand instructions. */ | ||
25 | static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) | ||
26 | { | ||
27 | IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); | ||
28 | Reg left = irl->r, right = irr->r; | ||
29 | if (ra_hasreg(left)) { | ||
30 | ra_noweak(as, left); | ||
31 | if (ra_noreg(right)) | ||
32 | right = ra_allocref(as, ir->op2, rset_exclude(allow, left)); | ||
33 | else | ||
34 | ra_noweak(as, right); | ||
35 | } else if (ra_hasreg(right)) { | ||
36 | ra_noweak(as, right); | ||
37 | left = ra_allocref(as, ir->op1, rset_exclude(allow, right)); | ||
38 | } else if (ra_hashint(right)) { | ||
39 | right = ra_allocref(as, ir->op2, allow); | ||
40 | left = ra_alloc1(as, ir->op1, rset_exclude(allow, right)); | ||
41 | } else { | ||
42 | left = ra_allocref(as, ir->op1, allow); | ||
43 | right = ra_alloc1(as, ir->op2, rset_exclude(allow, left)); | ||
44 | } | ||
45 | return left | (right << 8); | ||
46 | } | ||
47 | |||
48 | /* -- Guard handling ------------------------------------------------------ */ | ||
49 | |||
50 | /* Setup all needed exit stubs. */ | ||
51 | static void asm_exitstub_setup(ASMState *as, ExitNo nexits) | ||
52 | { | ||
53 | ExitNo i; | ||
54 | MCode *mxp = as->mctop; | ||
55 | if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim) | ||
56 | asm_mclimit(as); | ||
57 | /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */ | ||
58 | for (i = nexits-1; (int32_t)i >= 0; i--) | ||
59 | *--mxp = A64I_LE(A64I_BL | A64F_S26(-3-i)); | ||
60 | *--mxp = A64I_LE(A64I_MOVZw | A64F_U16(as->T->traceno)); | ||
61 | mxp--; | ||
62 | *mxp = A64I_LE(A64I_BL | A64F_S26(((MCode *)(void *)lj_vm_exit_handler-mxp))); | ||
63 | *--mxp = A64I_LE(A64I_STRx | A64F_D(RID_LR) | A64F_N(RID_SP)); | ||
64 | as->mctop = mxp; | ||
65 | } | ||
66 | |||
67 | static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno) | ||
68 | { | ||
69 | /* Keep this in-sync with exitstub_trace_addr(). */ | ||
70 | return as->mctop + exitno + 3; | ||
71 | } | ||
72 | |||
73 | /* Emit conditional branch to exit for guard. */ | ||
74 | static void asm_guardcc(ASMState *as, A64CC cc) | ||
75 | { | ||
76 | MCode *target = asm_exitstub_addr(as, as->snapno); | ||
77 | MCode *p = as->mcp; | ||
78 | if (LJ_UNLIKELY(p == as->invmcp)) { | ||
79 | as->loopinv = 1; | ||
80 | *p = A64I_B | A64F_S26(target-p); | ||
81 | emit_cond_branch(as, cc^1, p-1); | ||
82 | return; | ||
83 | } | ||
84 | emit_cond_branch(as, cc, target); | ||
85 | } | ||
86 | |||
87 | /* Emit test and branch instruction to exit for guard. */ | ||
88 | static void asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit) | ||
89 | { | ||
90 | MCode *target = asm_exitstub_addr(as, as->snapno); | ||
91 | MCode *p = as->mcp; | ||
92 | if (LJ_UNLIKELY(p == as->invmcp)) { | ||
93 | as->loopinv = 1; | ||
94 | *p = A64I_B | A64F_S26(target-p); | ||
95 | emit_tnb(as, ai^0x01000000u, r, bit, p-1); | ||
96 | return; | ||
97 | } | ||
98 | emit_tnb(as, ai, r, bit, target); | ||
99 | } | ||
100 | |||
101 | /* Emit compare and branch instruction to exit for guard. */ | ||
102 | static void asm_guardcnb(ASMState *as, A64Ins ai, Reg r) | ||
103 | { | ||
104 | MCode *target = asm_exitstub_addr(as, as->snapno); | ||
105 | MCode *p = as->mcp; | ||
106 | if (LJ_UNLIKELY(p == as->invmcp)) { | ||
107 | as->loopinv = 1; | ||
108 | *p = A64I_B | A64F_S26(target-p); | ||
109 | emit_cnb(as, ai^0x01000000u, r, p-1); | ||
110 | return; | ||
111 | } | ||
112 | emit_cnb(as, ai, r, target); | ||
113 | } | ||
114 | |||
115 | /* -- Operand fusion ------------------------------------------------------ */ | ||
116 | |||
117 | /* Limit linear search to this distance. Avoids O(n^2) behavior. */ | ||
118 | #define CONFLICT_SEARCH_LIM 31 | ||
119 | |||
120 | static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) | ||
121 | { | ||
122 | if (irref_isk(ref)) { | ||
123 | IRIns *ir = IR(ref); | ||
124 | if (ir->o == IR_KNULL || !irt_is64(ir->t)) { | ||
125 | *k = ir->i; | ||
126 | return 1; | ||
127 | } else if (checki32((int64_t)ir_k64(ir)->u64)) { | ||
128 | *k = (int32_t)ir_k64(ir)->u64; | ||
129 | return 1; | ||
130 | } | ||
131 | } | ||
132 | return 0; | ||
133 | } | ||
134 | |||
135 | /* Check if there's no conflicting instruction between curins and ref. */ | ||
136 | static int noconflict(ASMState *as, IRRef ref, IROp conflict) | ||
137 | { | ||
138 | IRIns *ir = as->ir; | ||
139 | IRRef i = as->curins; | ||
140 | if (i > ref + CONFLICT_SEARCH_LIM) | ||
141 | return 0; /* Give up, ref is too far away. */ | ||
142 | while (--i > ref) | ||
143 | if (ir[i].o == conflict) | ||
144 | return 0; /* Conflict found. */ | ||
145 | return 1; /* Ok, no conflict. */ | ||
146 | } | ||
147 | |||
148 | /* Fuse the array base of colocated arrays. */ | ||
149 | static int32_t asm_fuseabase(ASMState *as, IRRef ref) | ||
150 | { | ||
151 | IRIns *ir = IR(ref); | ||
152 | if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && | ||
153 | !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) | ||
154 | return (int32_t)sizeof(GCtab); | ||
155 | return 0; | ||
156 | } | ||
157 | |||
158 | #define FUSE_REG 0x40000000 | ||
159 | |||
160 | /* Fuse array/hash/upvalue reference into register+offset operand. */ | ||
161 | static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow, | ||
162 | A64Ins ins) | ||
163 | { | ||
164 | IRIns *ir = IR(ref); | ||
165 | if (ra_noreg(ir->r)) { | ||
166 | if (ir->o == IR_AREF) { | ||
167 | if (mayfuse(as, ref)) { | ||
168 | if (irref_isk(ir->op2)) { | ||
169 | IRRef tab = IR(ir->op1)->op1; | ||
170 | int32_t ofs = asm_fuseabase(as, tab); | ||
171 | IRRef refa = ofs ? tab : ir->op1; | ||
172 | ofs += 8*IR(ir->op2)->i; | ||
173 | if (emit_checkofs(ins, ofs)) { | ||
174 | *ofsp = ofs; | ||
175 | return ra_alloc1(as, refa, allow); | ||
176 | } | ||
177 | } else { | ||
178 | Reg base = ra_alloc1(as, ir->op1, allow); | ||
179 | *ofsp = FUSE_REG|ra_alloc1(as, ir->op2, rset_exclude(allow, base)); | ||
180 | return base; | ||
181 | } | ||
182 | } | ||
183 | } else if (ir->o == IR_HREFK) { | ||
184 | if (mayfuse(as, ref)) { | ||
185 | int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); | ||
186 | if (emit_checkofs(ins, ofs)) { | ||
187 | *ofsp = ofs; | ||
188 | return ra_alloc1(as, ir->op1, allow); | ||
189 | } | ||
190 | } | ||
191 | } else if (ir->o == IR_UREFC) { | ||
192 | if (irref_isk(ir->op1)) { | ||
193 | GCfunc *fn = ir_kfunc(IR(ir->op1)); | ||
194 | GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv; | ||
195 | int64_t ofs = glofs(as, &uv->tv); | ||
196 | if (emit_checkofs(ins, ofs)) { | ||
197 | *ofsp = (int32_t)ofs; | ||
198 | return RID_GL; | ||
199 | } | ||
200 | } | ||
201 | } | ||
202 | } | ||
203 | *ofsp = 0; | ||
204 | return ra_alloc1(as, ref, allow); | ||
205 | } | ||
206 | |||
207 | /* Fuse m operand into arithmetic/logic instructions. */ | ||
208 | static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow) | ||
209 | { | ||
210 | IRIns *ir = IR(ref); | ||
211 | if (ra_hasreg(ir->r)) { | ||
212 | ra_noweak(as, ir->r); | ||
213 | return A64F_M(ir->r); | ||
214 | } else if (irref_isk(ref)) { | ||
215 | uint32_t m; | ||
216 | int64_t k = get_k64val(ir); | ||
217 | if ((ai & 0x1f000000) == 0x0a000000) | ||
218 | m = emit_isk13(k, irt_is64(ir->t)); | ||
219 | else | ||
220 | m = emit_isk12(k); | ||
221 | if (m) | ||
222 | return m; | ||
223 | } else if (mayfuse(as, ref)) { | ||
224 | if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR && irref_isk(ir->op2)) || | ||
225 | (ir->o == IR_ADD && ir->op1 == ir->op2)) { | ||
226 | A64Shift sh = ir->o == IR_BSHR ? A64SH_LSR : | ||
227 | ir->o == IR_BSAR ? A64SH_ASR : A64SH_LSL; | ||
228 | int shift = ir->o == IR_ADD ? 1 : | ||
229 | (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31)); | ||
230 | IRIns *irl = IR(ir->op1); | ||
231 | if (sh == A64SH_LSL && | ||
232 | irl->o == IR_CONV && | ||
233 | irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) && | ||
234 | shift <= 4 && | ||
235 | canfuse(as, irl)) { | ||
236 | Reg m = ra_alloc1(as, irl->op1, allow); | ||
237 | return A64F_M(m) | A64F_EXSH(A64EX_SXTW, shift); | ||
238 | } else { | ||
239 | Reg m = ra_alloc1(as, ir->op1, allow); | ||
240 | return A64F_M(m) | A64F_SH(sh, shift); | ||
241 | } | ||
242 | } else if (ir->o == IR_CONV && | ||
243 | ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)) { | ||
244 | Reg m = ra_alloc1(as, ir->op1, allow); | ||
245 | return A64F_M(m) | A64F_EX(A64EX_SXTW); | ||
246 | } | ||
247 | } | ||
248 | return A64F_M(ra_allocref(as, ref, allow)); | ||
249 | } | ||
250 | |||
251 | /* Fuse XLOAD/XSTORE reference into load/store operand. */ | ||
252 | static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref, | ||
253 | RegSet allow) | ||
254 | { | ||
255 | IRIns *ir = IR(ref); | ||
256 | Reg base; | ||
257 | int32_t ofs = 0; | ||
258 | if (ra_noreg(ir->r) && canfuse(as, ir)) { | ||
259 | if (ir->o == IR_ADD) { | ||
260 | if (asm_isk32(as, ir->op2, &ofs) && emit_checkofs(ai, ofs)) { | ||
261 | ref = ir->op1; | ||
262 | } else { | ||
263 | Reg rn, rm; | ||
264 | IRRef lref = ir->op1, rref = ir->op2; | ||
265 | IRIns *irl = IR(lref); | ||
266 | if (mayfuse(as, irl->op1)) { | ||
267 | unsigned int shift = 4; | ||
268 | if (irl->o == IR_BSHL && irref_isk(irl->op2)) { | ||
269 | shift = (IR(irl->op2)->i & 63); | ||
270 | } else if (irl->o == IR_ADD && irl->op1 == irl->op2) { | ||
271 | shift = 1; | ||
272 | } | ||
273 | if ((ai >> 30) == shift) { | ||
274 | lref = irl->op1; | ||
275 | irl = IR(lref); | ||
276 | ai |= A64I_LS_SH; | ||
277 | } | ||
278 | } | ||
279 | if (irl->o == IR_CONV && | ||
280 | irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) && | ||
281 | canfuse(as, irl)) { | ||
282 | lref = irl->op1; | ||
283 | ai |= A64I_LS_SXTWx; | ||
284 | } else { | ||
285 | ai |= A64I_LS_LSLx; | ||
286 | } | ||
287 | rm = ra_alloc1(as, lref, allow); | ||
288 | rn = ra_alloc1(as, rref, rset_exclude(allow, rm)); | ||
289 | emit_dnm(as, (ai^A64I_LS_R), (rd & 31), rn, rm); | ||
290 | return; | ||
291 | } | ||
292 | } else if (ir->o == IR_STRREF) { | ||
293 | if (asm_isk32(as, ir->op2, &ofs)) { | ||
294 | ref = ir->op1; | ||
295 | } else if (asm_isk32(as, ir->op1, &ofs)) { | ||
296 | ref = ir->op2; | ||
297 | } else { | ||
298 | Reg refk = irref_isk(ir->op1) ? ir->op1 : ir->op2; | ||
299 | Reg refv = irref_isk(ir->op1) ? ir->op2 : ir->op1; | ||
300 | Reg rn = ra_alloc1(as, refv, allow); | ||
301 | IRIns *irr = IR(refk); | ||
302 | uint32_t m; | ||
303 | if (irr+1 == ir && !ra_used(irr) && | ||
304 | irr->o == IR_ADD && irref_isk(irr->op2)) { | ||
305 | ofs = sizeof(GCstr) + IR(irr->op2)->i; | ||
306 | if (emit_checkofs(ai, ofs)) { | ||
307 | Reg rm = ra_alloc1(as, irr->op1, rset_exclude(allow, rn)); | ||
308 | m = A64F_M(rm) | A64F_EX(A64EX_SXTW); | ||
309 | goto skipopm; | ||
310 | } | ||
311 | } | ||
312 | m = asm_fuseopm(as, 0, refk, rset_exclude(allow, rn)); | ||
313 | ofs = sizeof(GCstr); | ||
314 | skipopm: | ||
315 | emit_lso(as, ai, rd, rd, ofs); | ||
316 | emit_dn(as, A64I_ADDx^m, rd, rn); | ||
317 | return; | ||
318 | } | ||
319 | ofs += sizeof(GCstr); | ||
320 | if (!emit_checkofs(ai, ofs)) { | ||
321 | Reg rn = ra_alloc1(as, ref, allow); | ||
322 | Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn)); | ||
323 | emit_dnm(as, (ai^A64I_LS_R)|A64I_LS_UXTWx, rd, rn, rm); | ||
324 | return; | ||
325 | } | ||
326 | } | ||
327 | } | ||
328 | base = ra_alloc1(as, ref, allow); | ||
329 | emit_lso(as, ai, (rd & 31), base, ofs); | ||
330 | } | ||
331 | |||
332 | /* Fuse FP multiply-add/sub. */ | ||
333 | static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air) | ||
334 | { | ||
335 | IRRef lref = ir->op1, rref = ir->op2; | ||
336 | IRIns *irm; | ||
337 | if (lref != rref && | ||
338 | ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && | ||
339 | ra_noreg(irm->r)) || | ||
340 | (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && | ||
341 | (rref = lref, ai = air, ra_noreg(irm->r))))) { | ||
342 | Reg dest = ra_dest(as, ir, RSET_FPR); | ||
343 | Reg add = ra_hintalloc(as, rref, dest, RSET_FPR); | ||
344 | Reg left = ra_alloc2(as, irm, | ||
345 | rset_exclude(rset_exclude(RSET_FPR, dest), add)); | ||
346 | Reg right = (left >> 8); left &= 255; | ||
347 | emit_dnma(as, ai, (dest & 31), (left & 31), (right & 31), (add & 31)); | ||
348 | return 1; | ||
349 | } | ||
350 | return 0; | ||
351 | } | ||
352 | |||
353 | /* Fuse BAND + BSHL/BSHR into UBFM. */ | ||
354 | static int asm_fuseandshift(ASMState *as, IRIns *ir) | ||
355 | { | ||
356 | IRIns *irl = IR(ir->op1); | ||
357 | lua_assert(ir->o == IR_BAND); | ||
358 | if (canfuse(as, irl) && irref_isk(ir->op2)) { | ||
359 | uint64_t mask = get_k64val(IR(ir->op2)); | ||
360 | if (irref_isk(irl->op2) && (irl->o == IR_BSHR || irl->o == IR_BSHL)) { | ||
361 | int32_t shmask = irt_is64(irl->t) ? 63 : 31; | ||
362 | int32_t shift = (IR(irl->op2)->i & shmask); | ||
363 | int32_t imms = shift; | ||
364 | if (irl->o == IR_BSHL) { | ||
365 | mask >>= shift; | ||
366 | shift = (shmask-shift+1) & shmask; | ||
367 | imms = 0; | ||
368 | } | ||
369 | if (mask && !((mask+1) & mask)) { /* Contiguous 1-bits at the bottom. */ | ||
370 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
371 | Reg left = ra_alloc1(as, irl->op1, RSET_GPR); | ||
372 | A64Ins ai = shmask == 63 ? A64I_UBFMx : A64I_UBFMw; | ||
373 | imms += 63 - emit_clz64(mask); | ||
374 | if (imms > shmask) imms = shmask; | ||
375 | emit_dn(as, ai | A64F_IMMS(imms) | A64F_IMMR(shift), dest, left); | ||
376 | return 1; | ||
377 | } | ||
378 | } | ||
379 | } | ||
380 | return 0; | ||
381 | } | ||
382 | |||
383 | /* Fuse BOR(BSHL, BSHR) into EXTR/ROR. */ | ||
384 | static int asm_fuseorshift(ASMState *as, IRIns *ir) | ||
385 | { | ||
386 | IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); | ||
387 | lua_assert(ir->o == IR_BOR); | ||
388 | if (canfuse(as, irl) && canfuse(as, irr) && | ||
389 | ((irl->o == IR_BSHR && irr->o == IR_BSHL) || | ||
390 | (irl->o == IR_BSHL && irr->o == IR_BSHR))) { | ||
391 | if (irref_isk(irl->op2) && irref_isk(irr->op2)) { | ||
392 | IRRef lref = irl->op1, rref = irr->op1; | ||
393 | uint32_t lshift = IR(irl->op2)->i, rshift = IR(irr->op2)->i; | ||
394 | if (irl->o == IR_BSHR) { /* BSHR needs to be the right operand. */ | ||
395 | uint32_t tmp2; | ||
396 | IRRef tmp1 = lref; lref = rref; rref = tmp1; | ||
397 | tmp2 = lshift; lshift = rshift; rshift = tmp2; | ||
398 | } | ||
399 | if (rshift + lshift == (irt_is64(ir->t) ? 64 : 32)) { | ||
400 | A64Ins ai = irt_is64(ir->t) ? A64I_EXTRx : A64I_EXTRw; | ||
401 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
402 | Reg left = ra_alloc1(as, lref, RSET_GPR); | ||
403 | Reg right = ra_alloc1(as, rref, rset_exclude(RSET_GPR, left)); | ||
404 | emit_dnm(as, ai | A64F_IMMS(rshift), dest, left, right); | ||
405 | return 1; | ||
406 | } | ||
407 | } | ||
408 | } | ||
409 | return 0; | ||
410 | } | ||
411 | |||
412 | /* -- Calls --------------------------------------------------------------- */ | ||
413 | |||
414 | /* Generate a call to a C function. */ | ||
415 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | ||
416 | { | ||
417 | uint32_t n, nargs = CCI_XNARGS(ci); | ||
418 | int32_t ofs = 0; | ||
419 | Reg gpr, fpr = REGARG_FIRSTFPR; | ||
420 | if ((void *)ci->func) | ||
421 | emit_call(as, (void *)ci->func); | ||
422 | for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) | ||
423 | as->cost[gpr] = REGCOST(~0u, ASMREF_L); | ||
424 | gpr = REGARG_FIRSTGPR; | ||
425 | for (n = 0; n < nargs; n++) { /* Setup args. */ | ||
426 | IRRef ref = args[n]; | ||
427 | IRIns *ir = IR(ref); | ||
428 | if (ref) { | ||
429 | if (irt_isfp(ir->t)) { | ||
430 | if (fpr <= REGARG_LASTFPR) { | ||
431 | lua_assert(rset_test(as->freeset, fpr)); /* Must have been evicted. */ | ||
432 | ra_leftov(as, fpr, ref); | ||
433 | fpr++; | ||
434 | } else { | ||
435 | Reg r = ra_alloc1(as, ref, RSET_FPR); | ||
436 | emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isnum(ir->t)) ? 4 : 0)); | ||
437 | ofs += 8; | ||
438 | } | ||
439 | } else { | ||
440 | if (gpr <= REGARG_LASTGPR) { | ||
441 | lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ | ||
442 | ra_leftov(as, gpr, ref); | ||
443 | gpr++; | ||
444 | } else { | ||
445 | Reg r = ra_alloc1(as, ref, RSET_GPR); | ||
446 | emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_is64(ir->t)) ? 4 : 0)); | ||
447 | ofs += 8; | ||
448 | } | ||
449 | } | ||
450 | } | ||
451 | } | ||
452 | } | ||
453 | |||
454 | /* Setup result reg/sp for call. Evict scratch regs. */ | ||
455 | static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | ||
456 | { | ||
457 | RegSet drop = RSET_SCRATCH; | ||
458 | if (ra_hasreg(ir->r)) | ||
459 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | ||
460 | ra_evictset(as, drop); /* Evictions must be performed first. */ | ||
461 | if (ra_used(ir)) { | ||
462 | lua_assert(!irt_ispri(ir->t)); | ||
463 | if (irt_isfp(ir->t)) { | ||
464 | if (ci->flags & CCI_CASTU64) { | ||
465 | Reg dest = ra_dest(as, ir, RSET_FPR) & 31; | ||
466 | emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D_R : A64I_FMOV_S_R, | ||
467 | dest, RID_RET); | ||
468 | } else { | ||
469 | ra_destreg(as, ir, RID_FPRET); | ||
470 | } | ||
471 | } else { | ||
472 | ra_destreg(as, ir, RID_RET); | ||
473 | } | ||
474 | } | ||
475 | UNUSED(ci); | ||
476 | } | ||
477 | |||
478 | static void asm_callx(ASMState *as, IRIns *ir) | ||
479 | { | ||
480 | IRRef args[CCI_NARGS_MAX*2]; | ||
481 | CCallInfo ci; | ||
482 | IRRef func; | ||
483 | IRIns *irf; | ||
484 | ci.flags = asm_callx_flags(as, ir); | ||
485 | asm_collectargs(as, ir, &ci, args); | ||
486 | asm_setupresult(as, ir, &ci); | ||
487 | func = ir->op2; irf = IR(func); | ||
488 | if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } | ||
489 | if (irref_isk(func)) { /* Call to constant address. */ | ||
490 | ci.func = (ASMFunction)(ir_k64(irf)->u64); | ||
491 | } else { /* Need a non-argument register for indirect calls. */ | ||
492 | Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED); | ||
493 | emit_n(as, A64I_BLR, freg); | ||
494 | ci.func = (ASMFunction)(void *)0; | ||
495 | } | ||
496 | asm_gencall(as, &ci, args); | ||
497 | } | ||
498 | |||
499 | /* -- Returns ------------------------------------------------------------- */ | ||
500 | |||
501 | /* Return to lower frame. Guard that it goes to the right spot. */ | ||
502 | static void asm_retf(ASMState *as, IRIns *ir) | ||
503 | { | ||
504 | Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); | ||
505 | void *pc = ir_kptr(IR(ir->op2)); | ||
506 | int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); | ||
507 | as->topslot -= (BCReg)delta; | ||
508 | if ((int32_t)as->topslot < 0) as->topslot = 0; | ||
509 | irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ | ||
510 | /* Need to force a spill on REF_BASE now to update the stack slot. */ | ||
511 | emit_lso(as, A64I_STRx, base, RID_SP, ra_spill(as, IR(REF_BASE))); | ||
512 | emit_setgl(as, base, jit_base); | ||
513 | emit_addptr(as, base, -8*delta); | ||
514 | asm_guardcc(as, CC_NE); | ||
515 | emit_nm(as, A64I_CMPx, RID_TMP, | ||
516 | ra_allock(as, i64ptr(pc), rset_exclude(RSET_GPR, base))); | ||
517 | emit_lso(as, A64I_LDRx, RID_TMP, base, -8); | ||
518 | } | ||
519 | |||
520 | /* -- Type conversions ---------------------------------------------------- */ | ||
521 | |||
522 | static void asm_tointg(ASMState *as, IRIns *ir, Reg left) | ||
523 | { | ||
524 | Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); | ||
525 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
526 | asm_guardcc(as, CC_NE); | ||
527 | emit_nm(as, A64I_FCMPd, (tmp & 31), (left & 31)); | ||
528 | emit_dn(as, A64I_FCVT_F64_S32, (tmp & 31), dest); | ||
529 | emit_dn(as, A64I_FCVT_S32_F64, dest, (left & 31)); | ||
530 | } | ||
531 | |||
532 | static void asm_tobit(ASMState *as, IRIns *ir) | ||
533 | { | ||
534 | RegSet allow = RSET_FPR; | ||
535 | Reg left = ra_alloc1(as, ir->op1, allow); | ||
536 | Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); | ||
537 | Reg tmp = ra_scratch(as, rset_clear(allow, right)); | ||
538 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
539 | emit_dn(as, A64I_FMOV_R_S, dest, (tmp & 31)); | ||
540 | emit_dnm(as, A64I_FADDd, (tmp & 31), (left & 31), (right & 31)); | ||
541 | } | ||
542 | |||
543 | static void asm_conv(ASMState *as, IRIns *ir) | ||
544 | { | ||
545 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); | ||
546 | int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64); | ||
547 | int stfp = (st == IRT_NUM || st == IRT_FLOAT); | ||
548 | IRRef lref = ir->op1; | ||
549 | lua_assert(irt_type(ir->t) != st); | ||
550 | if (irt_isfp(ir->t)) { | ||
551 | Reg dest = ra_dest(as, ir, RSET_FPR); | ||
552 | if (stfp) { /* FP to FP conversion. */ | ||
553 | emit_dn(as, st == IRT_NUM ? A64I_FCVT_F32_F64 : A64I_FCVT_F64_F32, | ||
554 | (dest & 31), (ra_alloc1(as, lref, RSET_FPR) & 31)); | ||
555 | } else { /* Integer to FP conversion. */ | ||
556 | Reg left = ra_alloc1(as, lref, RSET_GPR); | ||
557 | A64Ins ai = irt_isfloat(ir->t) ? | ||
558 | (((IRT_IS64 >> st) & 1) ? | ||
559 | (st == IRT_I64 ? A64I_FCVT_F32_S64 : A64I_FCVT_F32_U64) : | ||
560 | (st == IRT_INT ? A64I_FCVT_F32_S32 : A64I_FCVT_F32_U32)) : | ||
561 | (((IRT_IS64 >> st) & 1) ? | ||
562 | (st == IRT_I64 ? A64I_FCVT_F64_S64 : A64I_FCVT_F64_U64) : | ||
563 | (st == IRT_INT ? A64I_FCVT_F64_S32 : A64I_FCVT_F64_U32)); | ||
564 | emit_dn(as, ai, (dest & 31), left); | ||
565 | } | ||
566 | } else if (stfp) { /* FP to integer conversion. */ | ||
567 | if (irt_isguard(ir->t)) { | ||
568 | /* Checked conversions are only supported from number to int. */ | ||
569 | lua_assert(irt_isint(ir->t) && st == IRT_NUM); | ||
570 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); | ||
571 | } else { | ||
572 | Reg left = ra_alloc1(as, lref, RSET_FPR); | ||
573 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
574 | A64Ins ai = irt_is64(ir->t) ? | ||
575 | (st == IRT_NUM ? | ||
576 | (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) : | ||
577 | (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) : | ||
578 | (st == IRT_NUM ? | ||
579 | (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) : | ||
580 | (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32)); | ||
581 | emit_dn(as, ai, dest, (left & 31)); | ||
582 | } | ||
583 | } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ | ||
584 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
585 | Reg left = ra_alloc1(as, lref, RSET_GPR); | ||
586 | A64Ins ai = st == IRT_I8 ? A64I_SXTBw : | ||
587 | st == IRT_U8 ? A64I_UXTBw : | ||
588 | st == IRT_I16 ? A64I_SXTHw : A64I_UXTHw; | ||
589 | lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); | ||
590 | emit_dn(as, ai, dest, left); | ||
591 | } else { | ||
592 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
593 | if (irt_is64(ir->t)) { | ||
594 | if (st64 || !(ir->op2 & IRCONV_SEXT)) { | ||
595 | /* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */ | ||
596 | ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ | ||
597 | } else { /* 32 to 64 bit sign extension. */ | ||
598 | Reg left = ra_alloc1(as, lref, RSET_GPR); | ||
599 | emit_dn(as, A64I_SXTW, dest, left); | ||
600 | } | ||
601 | } else { | ||
602 | if (st64) { | ||
603 | /* This is either a 32 bit reg/reg mov which zeroes the hiword | ||
604 | ** or a load of the loword from a 64 bit address. | ||
605 | */ | ||
606 | Reg left = ra_alloc1(as, lref, RSET_GPR); | ||
607 | emit_dm(as, A64I_MOVw, dest, left); | ||
608 | } else { /* 32/32 bit no-op (cast). */ | ||
609 | ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ | ||
610 | } | ||
611 | } | ||
612 | } | ||
613 | } | ||
614 | |||
615 | static void asm_strto(ASMState *as, IRIns *ir) | ||
616 | { | ||
617 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; | ||
618 | IRRef args[2]; | ||
619 | Reg dest = 0, tmp; | ||
620 | int destused = ra_used(ir); | ||
621 | int32_t ofs = 0; | ||
622 | ra_evictset(as, RSET_SCRATCH); | ||
623 | if (destused) { | ||
624 | if (ra_hasspill(ir->s)) { | ||
625 | ofs = sps_scale(ir->s); | ||
626 | destused = 0; | ||
627 | if (ra_hasreg(ir->r)) { | ||
628 | ra_free(as, ir->r); | ||
629 | ra_modified(as, ir->r); | ||
630 | emit_spload(as, ir, ir->r, ofs); | ||
631 | } | ||
632 | } else { | ||
633 | dest = ra_dest(as, ir, RSET_FPR); | ||
634 | } | ||
635 | } | ||
636 | if (destused) | ||
637 | emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0); | ||
638 | asm_guardcnb(as, A64I_CBZ, RID_RET); | ||
639 | args[0] = ir->op1; /* GCstr *str */ | ||
640 | args[1] = ASMREF_TMP1; /* TValue *n */ | ||
641 | asm_gencall(as, ci, args); | ||
642 | tmp = ra_releasetmp(as, ASMREF_TMP1); | ||
643 | emit_opk(as, A64I_ADDx, tmp, RID_SP, ofs, RSET_GPR); | ||
644 | } | ||
645 | |||
646 | /* -- Memory references --------------------------------------------------- */ | ||
647 | |||
648 | /* Store tagged value for ref at base+ofs. */ | ||
649 | static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) | ||
650 | { | ||
651 | RegSet allow = rset_exclude(RSET_GPR, base); | ||
652 | IRIns *ir = IR(ref); | ||
653 | lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); | ||
654 | if (irref_isk(ref)) { | ||
655 | TValue k; | ||
656 | lj_ir_kvalue(as->J->L, &k, ir); | ||
657 | emit_lso(as, A64I_STRx, ra_allock(as, k.u64, allow), base, ofs); | ||
658 | } else { | ||
659 | Reg src = ra_alloc1(as, ref, allow); | ||
660 | rset_clear(allow, src); | ||
661 | if (irt_isinteger(ir->t)) { | ||
662 | Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); | ||
663 | emit_lso(as, A64I_STRx, RID_TMP, base, ofs); | ||
664 | emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), RID_TMP, type, src); | ||
665 | } else { | ||
666 | Reg type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | ||
667 | emit_lso(as, A64I_STRx, RID_TMP, base, ofs); | ||
668 | emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), RID_TMP, src, type); | ||
669 | } | ||
670 | } | ||
671 | } | ||
672 | |||
673 | /* Get pointer to TValue. */ | ||
674 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) | ||
675 | { | ||
676 | IRIns *ir = IR(ref); | ||
677 | if (irt_isnum(ir->t)) { | ||
678 | if (irref_isk(ref)) { | ||
679 | /* Use the number constant itself as a TValue. */ | ||
680 | ra_allockreg(as, i64ptr(ir_knum(ir)), dest); | ||
681 | } else { | ||
682 | /* Otherwise force a spill and use the spill slot. */ | ||
683 | emit_opk(as, A64I_ADDx, dest, RID_SP, ra_spill(as, ir), RSET_GPR); | ||
684 | } | ||
685 | } else { | ||
686 | /* Otherwise use g->tmptv to hold the TValue. */ | ||
687 | asm_tvstore64(as, dest, 0, ref); | ||
688 | ra_allockreg(as, i64ptr(&J2G(as->J)->tmptv), dest); | ||
689 | } | ||
690 | } | ||
691 | |||
692 | static void asm_aref(ASMState *as, IRIns *ir) | ||
693 | { | ||
694 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
695 | Reg idx, base; | ||
696 | if (irref_isk(ir->op2)) { | ||
697 | IRRef tab = IR(ir->op1)->op1; | ||
698 | int32_t ofs = asm_fuseabase(as, tab); | ||
699 | IRRef refa = ofs ? tab : ir->op1; | ||
700 | uint32_t k = emit_isk12(ofs + 8*IR(ir->op2)->i); | ||
701 | if (k) { | ||
702 | base = ra_alloc1(as, refa, RSET_GPR); | ||
703 | emit_dn(as, A64I_ADDx^k, dest, base); | ||
704 | return; | ||
705 | } | ||
706 | } | ||
707 | base = ra_alloc1(as, ir->op1, RSET_GPR); | ||
708 | idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); | ||
709 | emit_dnm(as, A64I_ADDx | A64F_EXSH(A64EX_UXTW, 3), dest, base, idx); | ||
710 | } | ||
711 | |||
712 | /* Inlined hash lookup. Specialized for key type and for const keys. | ||
713 | ** The equivalent C code is: | ||
714 | ** Node *n = hashkey(t, key); | ||
715 | ** do { | ||
716 | ** if (lj_obj_equal(&n->key, key)) return &n->val; | ||
717 | ** } while ((n = nextnode(n))); | ||
718 | ** return niltv(L); | ||
719 | */ | ||
720 | static void asm_href(ASMState *as, IRIns *ir, IROp merge) | ||
721 | { | ||
722 | RegSet allow = RSET_GPR; | ||
723 | int destused = ra_used(ir); | ||
724 | Reg dest = ra_dest(as, ir, allow); | ||
725 | Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); | ||
726 | Reg key = 0, tmp = RID_TMP; | ||
727 | Reg ftmp = RID_NONE, type = RID_NONE, scr = RID_NONE, tisnum = RID_NONE; | ||
728 | IRRef refkey = ir->op2; | ||
729 | IRIns *irkey = IR(refkey); | ||
730 | int isk = irref_isk(ir->op2); | ||
731 | IRType1 kt = irkey->t; | ||
732 | uint32_t k = 0; | ||
733 | uint32_t khash; | ||
734 | MCLabel l_end, l_loop, l_next; | ||
735 | rset_clear(allow, tab); | ||
736 | |||
737 | if (!isk) { | ||
738 | key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); | ||
739 | rset_clear(allow, key); | ||
740 | if (!irt_isstr(kt)) { | ||
741 | tmp = ra_scratch(as, allow); | ||
742 | rset_clear(allow, tmp); | ||
743 | } | ||
744 | } else if (irt_isnum(kt)) { | ||
745 | int64_t val = (int64_t)ir_knum(irkey)->u64; | ||
746 | if (!(k = emit_isk12(val))) { | ||
747 | key = ra_allock(as, val, allow); | ||
748 | rset_clear(allow, key); | ||
749 | } | ||
750 | } else if (!irt_ispri(kt)) { | ||
751 | if (!(k = emit_isk12(irkey->i))) { | ||
752 | key = ra_alloc1(as, refkey, allow); | ||
753 | rset_clear(allow, key); | ||
754 | } | ||
755 | } | ||
756 | |||
757 | /* Allocate constants early. */ | ||
758 | if (irt_isnum(kt)) { | ||
759 | if (!isk) { | ||
760 | tisnum = ra_allock(as, LJ_TISNUM << 15, allow); | ||
761 | ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key)); | ||
762 | rset_clear(allow, tisnum); | ||
763 | } | ||
764 | } else if (irt_isaddr(kt)) { | ||
765 | if (isk) { | ||
766 | int64_t kk = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; | ||
767 | scr = ra_allock(as, kk, allow); | ||
768 | } else { | ||
769 | scr = ra_scratch(as, allow); | ||
770 | } | ||
771 | rset_clear(allow, scr); | ||
772 | } else { | ||
773 | lua_assert(irt_ispri(kt) && !irt_isnil(kt)); | ||
774 | type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); | ||
775 | scr = ra_scratch(as, rset_clear(allow, type)); | ||
776 | rset_clear(allow, scr); | ||
777 | } | ||
778 | |||
779 | /* Key not found in chain: jump to exit (if merged) or load niltv. */ | ||
780 | l_end = emit_label(as); | ||
781 | as->invmcp = NULL; | ||
782 | if (merge == IR_NE) | ||
783 | asm_guardcc(as, CC_AL); | ||
784 | else if (destused) | ||
785 | emit_loada(as, dest, niltvg(J2G(as->J))); | ||
786 | |||
787 | /* Follow hash chain until the end. */ | ||
788 | l_loop = --as->mcp; | ||
789 | emit_n(as, A64I_CMPx^A64I_K12^0, dest); | ||
790 | emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next)); | ||
791 | l_next = emit_label(as); | ||
792 | |||
793 | /* Type and value comparison. */ | ||
794 | if (merge == IR_EQ) | ||
795 | asm_guardcc(as, CC_EQ); | ||
796 | else | ||
797 | emit_cond_branch(as, CC_EQ, l_end); | ||
798 | |||
799 | if (irt_isnum(kt)) { | ||
800 | if (isk) { | ||
801 | /* Assumes -0.0 is already canonicalized to +0.0. */ | ||
802 | if (k) | ||
803 | emit_n(as, A64I_CMPx^k, tmp); | ||
804 | else | ||
805 | emit_nm(as, A64I_CMPx, key, tmp); | ||
806 | emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64)); | ||
807 | } else { | ||
808 | emit_nm(as, A64I_FCMPd, key, ftmp); | ||
809 | emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31)); | ||
810 | emit_cond_branch(as, CC_LO, l_next); | ||
811 | emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), tisnum, tmp); | ||
812 | emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n)); | ||
813 | } | ||
814 | } else if (irt_isaddr(kt)) { | ||
815 | if (isk) { | ||
816 | emit_nm(as, A64I_CMPx, scr, tmp); | ||
817 | emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64)); | ||
818 | } else { | ||
819 | emit_nm(as, A64I_CMPx, tmp, scr); | ||
820 | emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64)); | ||
821 | } | ||
822 | } else { | ||
823 | emit_nm(as, A64I_CMPw, scr, type); | ||
824 | emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key)); | ||
825 | } | ||
826 | |||
827 | *l_loop = A64I_BCC | A64F_S19(as->mcp - l_loop) | CC_NE; | ||
828 | if (!isk && irt_isaddr(kt)) { | ||
829 | type = ra_allock(as, (int32_t)irt_toitype(kt), allow); | ||
830 | emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type); | ||
831 | rset_clear(allow, type); | ||
832 | } | ||
833 | /* Load main position relative to tab->node into dest. */ | ||
834 | khash = isk ? ir_khash(irkey) : 1; | ||
835 | if (khash == 0) { | ||
836 | emit_lso(as, A64I_LDRx, dest, tab, offsetof(GCtab, node)); | ||
837 | } else { | ||
838 | emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 3), dest, tmp, dest); | ||
839 | emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 1), dest, dest, dest); | ||
840 | emit_lso(as, A64I_LDRx, tmp, tab, offsetof(GCtab, node)); | ||
841 | if (isk) { | ||
842 | Reg tmphash = ra_allock(as, khash, allow); | ||
843 | emit_dnm(as, A64I_ANDw, dest, dest, tmphash); | ||
844 | emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); | ||
845 | } else if (irt_isstr(kt)) { | ||
846 | /* Fetch of str->hash is cheaper than ra_allock. */ | ||
847 | emit_dnm(as, A64I_ANDw, dest, dest, tmp); | ||
848 | emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, hash)); | ||
849 | emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); | ||
850 | } else { /* Must match with hash*() in lj_tab.c. */ | ||
851 | emit_dnm(as, A64I_ANDw, dest, dest, tmp); | ||
852 | emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask)); | ||
853 | emit_dnm(as, A64I_SUBw, dest, dest, tmp); | ||
854 | emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp); | ||
855 | emit_dnm(as, A64I_EORw, dest, dest, tmp); | ||
856 | emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT2)), dest, dest, dest); | ||
857 | emit_dnm(as, A64I_SUBw, tmp, tmp, dest); | ||
858 | emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest); | ||
859 | emit_dnm(as, A64I_EORw, tmp, tmp, dest); | ||
860 | if (irt_isnum(kt)) { | ||
861 | emit_dnm(as, A64I_ADDw, dest, dest, dest); | ||
862 | emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest); | ||
863 | emit_dm(as, A64I_MOVw, tmp, dest); | ||
864 | emit_dn(as, A64I_FMOV_R_D, dest, (key & 31)); | ||
865 | } else { | ||
866 | checkmclim(as); | ||
867 | emit_dm(as, A64I_MOVw, tmp, key); | ||
868 | emit_dnm(as, A64I_EORw, dest, dest, | ||
869 | ra_allock(as, irt_toitype(kt) << 15, allow)); | ||
870 | emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest); | ||
871 | emit_dm(as, A64I_MOVx, dest, key); | ||
872 | } | ||
873 | } | ||
874 | } | ||
875 | } | ||
876 | |||
877 | static void asm_hrefk(ASMState *as, IRIns *ir) | ||
878 | { | ||
879 | IRIns *kslot = IR(ir->op2); | ||
880 | IRIns *irkey = IR(kslot->op1); | ||
881 | int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); | ||
882 | int32_t kofs = ofs + (int32_t)offsetof(Node, key); | ||
883 | int bigofs = !emit_checkofs(A64I_LDRx, ofs); | ||
884 | Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; | ||
885 | Reg node = ra_alloc1(as, ir->op1, RSET_GPR); | ||
886 | Reg key, idx = node; | ||
887 | RegSet allow = rset_exclude(RSET_GPR, node); | ||
888 | uint64_t k; | ||
889 | lua_assert(ofs % sizeof(Node) == 0); | ||
890 | if (bigofs) { | ||
891 | idx = dest; | ||
892 | rset_clear(allow, dest); | ||
893 | kofs = (int32_t)offsetof(Node, key); | ||
894 | } else if (ra_hasreg(dest)) { | ||
895 | emit_opk(as, A64I_ADDx, dest, node, ofs, allow); | ||
896 | } | ||
897 | asm_guardcc(as, CC_NE); | ||
898 | if (irt_ispri(irkey->t)) { | ||
899 | k = ~((int64_t)~irt_toitype(irkey->t) << 47); | ||
900 | } else if (irt_isnum(irkey->t)) { | ||
901 | k = ir_knum(irkey)->u64; | ||
902 | } else { | ||
903 | k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey); | ||
904 | } | ||
905 | key = ra_scratch(as, allow); | ||
906 | emit_nm(as, A64I_CMPx, key, ra_allock(as, k, rset_exclude(allow, key))); | ||
907 | emit_lso(as, A64I_LDRx, key, idx, kofs); | ||
908 | if (bigofs) | ||
909 | emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR); | ||
910 | } | ||
911 | |||
912 | static void asm_uref(ASMState *as, IRIns *ir) | ||
913 | { | ||
914 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
915 | if (irref_isk(ir->op1)) { | ||
916 | GCfunc *fn = ir_kfunc(IR(ir->op1)); | ||
917 | MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; | ||
918 | emit_lsptr(as, A64I_LDRx, dest, v); | ||
919 | } else { | ||
920 | Reg uv = ra_scratch(as, RSET_GPR); | ||
921 | Reg func = ra_alloc1(as, ir->op1, RSET_GPR); | ||
922 | if (ir->o == IR_UREFC) { | ||
923 | asm_guardcc(as, CC_NE); | ||
924 | emit_n(as, (A64I_CMPx^A64I_K12) | A64F_U12(1), RID_TMP); | ||
925 | emit_opk(as, A64I_ADDx, dest, uv, | ||
926 | (int32_t)offsetof(GCupval, tv), RSET_GPR); | ||
927 | emit_lso(as, A64I_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); | ||
928 | } else { | ||
929 | emit_lso(as, A64I_LDRx, dest, uv, (int32_t)offsetof(GCupval, v)); | ||
930 | } | ||
931 | emit_lso(as, A64I_LDRx, uv, func, | ||
932 | (int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8)); | ||
933 | } | ||
934 | } | ||
935 | |||
936 | static void asm_fref(ASMState *as, IRIns *ir) | ||
937 | { | ||
938 | UNUSED(as); UNUSED(ir); | ||
939 | lua_assert(!ra_used(ir)); | ||
940 | } | ||
941 | |||
942 | static void asm_strref(ASMState *as, IRIns *ir) | ||
943 | { | ||
944 | RegSet allow = RSET_GPR; | ||
945 | Reg dest = ra_dest(as, ir, allow); | ||
946 | Reg base = ra_alloc1(as, ir->op1, allow); | ||
947 | IRIns *irr = IR(ir->op2); | ||
948 | int32_t ofs = sizeof(GCstr); | ||
949 | uint32_t m; | ||
950 | rset_clear(allow, base); | ||
951 | if (irref_isk(ir->op2) && (m = emit_isk12(ofs + irr->i))) { | ||
952 | emit_dn(as, A64I_ADDx^m, dest, base); | ||
953 | } else { | ||
954 | emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, dest); | ||
955 | emit_dnm(as, A64I_ADDx, dest, base, ra_alloc1(as, ir->op2, allow)); | ||
956 | } | ||
957 | } | ||
958 | |||
959 | /* -- Loads and stores ---------------------------------------------------- */ | ||
960 | |||
961 | static A64Ins asm_fxloadins(IRIns *ir) | ||
962 | { | ||
963 | switch (irt_type(ir->t)) { | ||
964 | case IRT_I8: return A64I_LDRB ^ A64I_LS_S; | ||
965 | case IRT_U8: return A64I_LDRB; | ||
966 | case IRT_I16: return A64I_LDRH ^ A64I_LS_S; | ||
967 | case IRT_U16: return A64I_LDRH; | ||
968 | case IRT_NUM: return A64I_LDRd; | ||
969 | case IRT_FLOAT: return A64I_LDRs; | ||
970 | default: return irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw; | ||
971 | } | ||
972 | } | ||
973 | |||
974 | static A64Ins asm_fxstoreins(IRIns *ir) | ||
975 | { | ||
976 | switch (irt_type(ir->t)) { | ||
977 | case IRT_I8: case IRT_U8: return A64I_STRB; | ||
978 | case IRT_I16: case IRT_U16: return A64I_STRH; | ||
979 | case IRT_NUM: return A64I_STRd; | ||
980 | case IRT_FLOAT: return A64I_STRs; | ||
981 | default: return irt_is64(ir->t) ? A64I_STRx : A64I_STRw; | ||
982 | } | ||
983 | } | ||
984 | |||
985 | static void asm_fload(ASMState *as, IRIns *ir) | ||
986 | { | ||
987 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
988 | Reg idx; | ||
989 | A64Ins ai = asm_fxloadins(ir); | ||
990 | int32_t ofs; | ||
991 | if (ir->op1 == REF_NIL) { | ||
992 | idx = RID_GL; | ||
993 | ofs = (ir->op2 << 2) - GG_OFS(g); | ||
994 | } else { | ||
995 | idx = ra_alloc1(as, ir->op1, RSET_GPR); | ||
996 | if (ir->op2 == IRFL_TAB_ARRAY) { | ||
997 | ofs = asm_fuseabase(as, ir->op1); | ||
998 | if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ | ||
999 | emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, idx); | ||
1000 | return; | ||
1001 | } | ||
1002 | } | ||
1003 | ofs = field_ofs[ir->op2]; | ||
1004 | } | ||
1005 | emit_lso(as, ai, (dest & 31), idx, ofs); | ||
1006 | } | ||
1007 | |||
1008 | static void asm_fstore(ASMState *as, IRIns *ir) | ||
1009 | { | ||
1010 | if (ir->r != RID_SINK) { | ||
1011 | Reg src = ra_alloc1(as, ir->op2, RSET_GPR); | ||
1012 | IRIns *irf = IR(ir->op1); | ||
1013 | Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); | ||
1014 | int32_t ofs = field_ofs[irf->op2]; | ||
1015 | emit_lso(as, asm_fxstoreins(ir), (src & 31), idx, ofs); | ||
1016 | } | ||
1017 | } | ||
1018 | |||
1019 | static void asm_xload(ASMState *as, IRIns *ir) | ||
1020 | { | ||
1021 | Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); | ||
1022 | lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); | ||
1023 | asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR); | ||
1024 | } | ||
1025 | |||
1026 | static void asm_xstore(ASMState *as, IRIns *ir) | ||
1027 | { | ||
1028 | if (ir->r != RID_SINK) { | ||
1029 | Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); | ||
1030 | asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, | ||
1031 | rset_exclude(RSET_GPR, src)); | ||
1032 | } | ||
1033 | } | ||
1034 | |||
1035 | static void asm_ahuvload(ASMState *as, IRIns *ir) | ||
1036 | { | ||
1037 | Reg idx, tmp, type; | ||
1038 | int32_t ofs = 0; | ||
1039 | RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; | ||
1040 | lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || | ||
1041 | irt_isint(ir->t)); | ||
1042 | if (ra_used(ir)) { | ||
1043 | Reg dest = ra_dest(as, ir, allow); | ||
1044 | tmp = irt_isnum(ir->t) ? ra_scratch(as, rset_clear(gpr, dest)) : dest; | ||
1045 | if (irt_isaddr(ir->t)) { | ||
1046 | emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest); | ||
1047 | } else if (irt_isnum(ir->t)) { | ||
1048 | emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp); | ||
1049 | } else if (irt_isint(ir->t)) { | ||
1050 | emit_dm(as, A64I_MOVw, dest, dest); | ||
1051 | } | ||
1052 | } else { | ||
1053 | tmp = ra_scratch(as, gpr); | ||
1054 | } | ||
1055 | type = ra_scratch(as, rset_clear(gpr, tmp)); | ||
1056 | idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx); | ||
1057 | /* Always do the type check, even if the load result is unused. */ | ||
1058 | asm_guardcc(as, irt_isnum(ir->t) ? CC_LS : CC_NE); | ||
1059 | if (irt_type(ir->t) >= IRT_NUM) { | ||
1060 | lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); | ||
1061 | emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), | ||
1062 | ra_allock(as, LJ_TISNUM << 15, rset_exclude(gpr, idx)), tmp); | ||
1063 | } else if (irt_isaddr(ir->t)) { | ||
1064 | emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), type); | ||
1065 | emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp); | ||
1066 | } else if (irt_isnil(ir->t)) { | ||
1067 | emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp); | ||
1068 | } else { | ||
1069 | emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), | ||
1070 | ra_allock(as, (irt_toitype(ir->t) << 15) | 0x7fff, allow), tmp); | ||
1071 | } | ||
1072 | if (ofs & FUSE_REG) | ||
1073 | emit_dnm(as, (A64I_LDRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31)); | ||
1074 | else | ||
1075 | emit_lso(as, A64I_LDRx, tmp, idx, ofs); | ||
1076 | } | ||
1077 | |||
1078 | static void asm_ahustore(ASMState *as, IRIns *ir) | ||
1079 | { | ||
1080 | if (ir->r != RID_SINK) { | ||
1081 | RegSet allow = RSET_GPR; | ||
1082 | Reg idx, src = RID_NONE, tmp = RID_TMP, type = RID_NONE; | ||
1083 | int32_t ofs = 0; | ||
1084 | if (irt_isnum(ir->t)) { | ||
1085 | src = ra_alloc1(as, ir->op2, RSET_FPR); | ||
1086 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow, A64I_STRd); | ||
1087 | if (ofs & FUSE_REG) | ||
1088 | emit_dnm(as, (A64I_STRd^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, (src & 31), idx, (ofs &31)); | ||
1089 | else | ||
1090 | emit_lso(as, A64I_STRd, (src & 31), idx, ofs); | ||
1091 | } else { | ||
1092 | if (!irt_ispri(ir->t)) { | ||
1093 | src = ra_alloc1(as, ir->op2, allow); | ||
1094 | rset_clear(allow, src); | ||
1095 | if (irt_isinteger(ir->t)) | ||
1096 | type = ra_allock(as, (uint64_t)(int32_t)LJ_TISNUM << 47, allow); | ||
1097 | else | ||
1098 | type = ra_allock(as, irt_toitype(ir->t), allow); | ||
1099 | } else { | ||
1100 | tmp = type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t)<<47), allow); | ||
1101 | } | ||
1102 | idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type), | ||
1103 | A64I_STRx); | ||
1104 | if (ofs & FUSE_REG) | ||
1105 | emit_dnm(as, (A64I_STRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31)); | ||
1106 | else | ||
1107 | emit_lso(as, A64I_STRx, tmp, idx, ofs); | ||
1108 | if (ra_hasreg(src)) { | ||
1109 | if (irt_isinteger(ir->t)) { | ||
1110 | emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), tmp, type, src); | ||
1111 | } else { | ||
1112 | emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, src, type); | ||
1113 | } | ||
1114 | } | ||
1115 | } | ||
1116 | } | ||
1117 | } | ||
1118 | |||
1119 | static void asm_sload(ASMState *as, IRIns *ir) | ||
1120 | { | ||
1121 | int32_t ofs = 8*((int32_t)ir->op1-2); | ||
1122 | IRType1 t = ir->t; | ||
1123 | Reg dest = RID_NONE, base; | ||
1124 | RegSet allow = RSET_GPR; | ||
1125 | lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ | ||
1126 | lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); | ||
1127 | if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { | ||
1128 | dest = ra_scratch(as, RSET_FPR); | ||
1129 | asm_tointg(as, ir, dest); | ||
1130 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ | ||
1131 | } else if (ra_used(ir)) { | ||
1132 | Reg tmp = RID_NONE; | ||
1133 | if ((ir->op2 & IRSLOAD_CONVERT)) | ||
1134 | tmp = ra_scratch(as, irt_isint(t) ? RSET_FPR : RSET_GPR); | ||
1135 | lua_assert((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t)); | ||
1136 | dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow); | ||
1137 | base = ra_alloc1(as, REF_BASE, rset_clear(allow, dest)); | ||
1138 | if (irt_isaddr(t)) { | ||
1139 | emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest); | ||
1140 | } else if ((ir->op2 & IRSLOAD_CONVERT)) { | ||
1141 | if (irt_isint(t)) { | ||
1142 | emit_dn(as, A64I_FCVT_S32_F64, dest, (tmp & 31)); | ||
1143 | /* If value is already loaded for type check, move it to FPR. */ | ||
1144 | if ((ir->op2 & IRSLOAD_TYPECHECK)) | ||
1145 | emit_dn(as, A64I_FMOV_D_R, (tmp & 31), dest); | ||
1146 | else | ||
1147 | dest = tmp; | ||
1148 | t.irt = IRT_NUM; /* Check for original type. */ | ||
1149 | } else { | ||
1150 | emit_dn(as, A64I_FCVT_F64_S32, (dest & 31), tmp); | ||
1151 | dest = tmp; | ||
1152 | t.irt = IRT_INT; /* Check for original type. */ | ||
1153 | } | ||
1154 | } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) { | ||
1155 | emit_dm(as, A64I_MOVw, dest, dest); | ||
1156 | } | ||
1157 | goto dotypecheck; | ||
1158 | } | ||
1159 | base = ra_alloc1(as, REF_BASE, allow); | ||
1160 | dotypecheck: | ||
1161 | rset_clear(allow, base); | ||
1162 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { | ||
1163 | Reg tmp; | ||
1164 | if (ra_hasreg(dest) && rset_test(RSET_GPR, dest)) { | ||
1165 | tmp = dest; | ||
1166 | } else { | ||
1167 | tmp = ra_scratch(as, allow); | ||
1168 | rset_clear(allow, tmp); | ||
1169 | } | ||
1170 | if (irt_isnum(t) && !(ir->op2 & IRSLOAD_CONVERT)) | ||
1171 | emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp); | ||
1172 | /* Need type check, even if the load result is unused. */ | ||
1173 | asm_guardcc(as, irt_isnum(t) ? CC_LS : CC_NE); | ||
1174 | if (irt_type(t) >= IRT_NUM) { | ||
1175 | lua_assert(irt_isinteger(t) || irt_isnum(t)); | ||
1176 | emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), | ||
1177 | ra_allock(as, LJ_TISNUM << 15, allow), tmp); | ||
1178 | } else if (irt_isnil(t)) { | ||
1179 | emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp); | ||
1180 | } else if (irt_ispri(t)) { | ||
1181 | emit_nm(as, A64I_CMPx, | ||
1182 | ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow), tmp); | ||
1183 | } else { | ||
1184 | Reg type = ra_scratch(as, allow); | ||
1185 | emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), type); | ||
1186 | emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp); | ||
1187 | } | ||
1188 | emit_lso(as, A64I_LDRx, tmp, base, ofs); | ||
1189 | return; | ||
1190 | } | ||
1191 | if (ra_hasreg(dest)) { | ||
1192 | emit_lso(as, irt_isnum(t) ? A64I_LDRd : | ||
1193 | (irt_isint(t) ? A64I_LDRw : A64I_LDRx), (dest & 31), base, | ||
1194 | ofs ^ ((LJ_BE && irt_isint(t) ? 4 : 0))); | ||
1195 | } | ||
1196 | } | ||
1197 | |||
1198 | /* -- Allocations --------------------------------------------------------- */ | ||
1199 | |||
1200 | #if LJ_HASFFI | ||
1201 | static void asm_cnew(ASMState *as, IRIns *ir) | ||
1202 | { | ||
1203 | CTState *cts = ctype_ctsG(J2G(as->J)); | ||
1204 | CTypeID id = (CTypeID)IR(ir->op1)->i; | ||
1205 | CTSize sz; | ||
1206 | CTInfo info = lj_ctype_info(cts, id, &sz); | ||
1207 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; | ||
1208 | IRRef args[4]; | ||
1209 | RegSet allow = (RSET_GPR & ~RSET_SCRATCH); | ||
1210 | lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); | ||
1211 | |||
1212 | as->gcsteps++; | ||
1213 | asm_setupresult(as, ir, ci); /* GCcdata * */ | ||
1214 | /* Initialize immutable cdata object. */ | ||
1215 | if (ir->o == IR_CNEWI) { | ||
1216 | int32_t ofs = sizeof(GCcdata); | ||
1217 | Reg r = ra_alloc1(as, ir->op2, allow); | ||
1218 | lua_assert(sz == 4 || sz == 8); | ||
1219 | emit_lso(as, sz == 8 ? A64I_STRx : A64I_STRw, r, RID_RET, ofs); | ||
1220 | } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ | ||
1221 | ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; | ||
1222 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1223 | args[1] = ir->op1; /* CTypeID id */ | ||
1224 | args[2] = ir->op2; /* CTSize sz */ | ||
1225 | args[3] = ASMREF_TMP1; /* CTSize align */ | ||
1226 | asm_gencall(as, ci, args); | ||
1227 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); | ||
1228 | return; | ||
1229 | } | ||
1230 | |||
1231 | /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ | ||
1232 | { | ||
1233 | Reg r = (id < 65536) ? RID_X1 : ra_allock(as, id, allow); | ||
1234 | emit_lso(as, A64I_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); | ||
1235 | emit_lso(as, A64I_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); | ||
1236 | emit_d(as, A64I_MOVZw | A64F_U16(~LJ_TCDATA), RID_TMP); | ||
1237 | if (id < 65536) emit_d(as, A64I_MOVZw | A64F_U16(id), RID_X1); | ||
1238 | } | ||
1239 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1240 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1241 | asm_gencall(as, ci, args); | ||
1242 | ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), | ||
1243 | ra_releasetmp(as, ASMREF_TMP1)); | ||
1244 | } | ||
1245 | #else | ||
1246 | #define asm_cnew(as, ir) ((void)0) | ||
1247 | #endif | ||
1248 | |||
1249 | /* -- Write barriers ------------------------------------------------------ */ | ||
1250 | |||
1251 | static void asm_tbar(ASMState *as, IRIns *ir) | ||
1252 | { | ||
1253 | Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); | ||
1254 | Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab)); | ||
1255 | Reg gr = ra_allock(as, i64ptr(J2G(as->J)), | ||
1256 | rset_exclude(rset_exclude(RSET_GPR, tab), link)); | ||
1257 | Reg mark = RID_TMP; | ||
1258 | MCLabel l_end = emit_label(as); | ||
1259 | emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist)); | ||
1260 | emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked)); | ||
1261 | emit_lso(as, A64I_STRx, tab, gr, | ||
1262 | (int32_t)offsetof(global_State, gc.grayagain)); | ||
1263 | emit_dn(as, A64I_ANDw^emit_isk13(~LJ_GC_BLACK, 0), mark, mark); | ||
1264 | emit_lso(as, A64I_LDRx, link, gr, | ||
1265 | (int32_t)offsetof(global_State, gc.grayagain)); | ||
1266 | emit_cond_branch(as, CC_EQ, l_end); | ||
1267 | emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), mark); | ||
1268 | emit_lso(as, A64I_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked)); | ||
1269 | } | ||
1270 | |||
1271 | static void asm_obar(ASMState *as, IRIns *ir) | ||
1272 | { | ||
1273 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; | ||
1274 | IRRef args[2]; | ||
1275 | MCLabel l_end; | ||
1276 | RegSet allow = RSET_GPR; | ||
1277 | Reg obj, val, tmp; | ||
1278 | /* No need for other object barriers (yet). */ | ||
1279 | lua_assert(IR(ir->op1)->o == IR_UREFC); | ||
1280 | ra_evictset(as, RSET_SCRATCH); | ||
1281 | l_end = emit_label(as); | ||
1282 | args[0] = ASMREF_TMP1; /* global_State *g */ | ||
1283 | args[1] = ir->op1; /* TValue *tv */ | ||
1284 | asm_gencall(as, ci, args); | ||
1285 | ra_allockreg(as, i64ptr(J2G(as->J)), ra_releasetmp(as, ASMREF_TMP1) ); | ||
1286 | obj = IR(ir->op1)->r; | ||
1287 | tmp = ra_scratch(as, rset_exclude(allow, obj)); | ||
1288 | emit_cond_branch(as, CC_EQ, l_end); | ||
1289 | emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), tmp); | ||
1290 | emit_cond_branch(as, CC_EQ, l_end); | ||
1291 | emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_WHITES, 0), RID_TMP); | ||
1292 | val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); | ||
1293 | emit_lso(as, A64I_LDRB, tmp, obj, | ||
1294 | (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); | ||
1295 | emit_lso(as, A64I_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked)); | ||
1296 | } | ||
1297 | |||
1298 | /* -- Arithmetic and logic operations ------------------------------------- */ | ||
1299 | |||
1300 | static void asm_fparith(ASMState *as, IRIns *ir, A64Ins ai) | ||
1301 | { | ||
1302 | Reg dest = ra_dest(as, ir, RSET_FPR); | ||
1303 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); | ||
1304 | right = (left >> 8); left &= 255; | ||
1305 | emit_dnm(as, ai, (dest & 31), (left & 31), (right & 31)); | ||
1306 | } | ||
1307 | |||
1308 | static void asm_fpunary(ASMState *as, IRIns *ir, A64Ins ai) | ||
1309 | { | ||
1310 | Reg dest = ra_dest(as, ir, RSET_FPR); | ||
1311 | Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); | ||
1312 | emit_dn(as, ai, (dest & 31), (left & 31)); | ||
1313 | } | ||
1314 | |||
1315 | static void asm_fpmath(ASMState *as, IRIns *ir) | ||
1316 | { | ||
1317 | IRFPMathOp fpm = (IRFPMathOp)ir->op2; | ||
1318 | if (fpm == IRFPM_SQRT) { | ||
1319 | asm_fpunary(as, ir, A64I_FSQRTd); | ||
1320 | } else if (fpm <= IRFPM_TRUNC) { | ||
1321 | asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? A64I_FRINTMd : | ||
1322 | fpm == IRFPM_CEIL ? A64I_FRINTPd : A64I_FRINTZd); | ||
1323 | } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) { | ||
1324 | return; | ||
1325 | } else { | ||
1326 | asm_callid(as, ir, IRCALL_lj_vm_floor + fpm); | ||
1327 | } | ||
1328 | } | ||
1329 | |||
1330 | static int asm_swapops(ASMState *as, IRRef lref, IRRef rref) | ||
1331 | { | ||
1332 | IRIns *ir; | ||
1333 | if (irref_isk(rref)) | ||
1334 | return 0; /* Don't swap constants to the left. */ | ||
1335 | if (irref_isk(lref)) | ||
1336 | return 1; /* But swap constants to the right. */ | ||
1337 | ir = IR(rref); | ||
1338 | if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) || | ||
1339 | (ir->o == IR_ADD && ir->op1 == ir->op2) || | ||
1340 | (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT))) | ||
1341 | return 0; /* Don't swap fusable operands to the left. */ | ||
1342 | ir = IR(lref); | ||
1343 | if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) || | ||
1344 | (ir->o == IR_ADD && ir->op1 == ir->op2) || | ||
1345 | (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT))) | ||
1346 | return 1; /* But swap fusable operands to the right. */ | ||
1347 | return 0; /* Otherwise don't swap. */ | ||
1348 | } | ||
1349 | |||
1350 | static void asm_intop(ASMState *as, IRIns *ir, A64Ins ai) | ||
1351 | { | ||
1352 | IRRef lref = ir->op1, rref = ir->op2; | ||
1353 | Reg left, dest = ra_dest(as, ir, RSET_GPR); | ||
1354 | uint32_t m; | ||
1355 | if ((ai & ~A64I_S) != A64I_SUBw && asm_swapops(as, lref, rref)) { | ||
1356 | IRRef tmp = lref; lref = rref; rref = tmp; | ||
1357 | } | ||
1358 | left = ra_hintalloc(as, lref, dest, RSET_GPR); | ||
1359 | if (irt_is64(ir->t)) ai |= A64I_X; | ||
1360 | m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left)); | ||
1361 | if (irt_isguard(ir->t)) { /* For IR_ADDOV etc. */ | ||
1362 | asm_guardcc(as, CC_VS); | ||
1363 | ai |= A64I_S; | ||
1364 | } | ||
1365 | emit_dn(as, ai^m, dest, left); | ||
1366 | } | ||
1367 | |||
1368 | static void asm_intop_s(ASMState *as, IRIns *ir, A64Ins ai) | ||
1369 | { | ||
1370 | if (as->flagmcp == as->mcp) { /* Drop cmp r, #0. */ | ||
1371 | as->flagmcp = NULL; | ||
1372 | as->mcp++; | ||
1373 | ai |= A64I_S; | ||
1374 | } | ||
1375 | asm_intop(as, ir, ai); | ||
1376 | } | ||
1377 | |||
1378 | static void asm_intneg(ASMState *as, IRIns *ir) | ||
1379 | { | ||
1380 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1381 | Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | ||
1382 | emit_dm(as, irt_is64(ir->t) ? A64I_NEGx : A64I_NEGw, dest, left); | ||
1383 | } | ||
1384 | |||
1385 | /* NYI: use add/shift for MUL(OV) with constants. FOLD only does 2^k. */ | ||
1386 | static void asm_intmul(ASMState *as, IRIns *ir) | ||
1387 | { | ||
1388 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1389 | Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest)); | ||
1390 | Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); | ||
1391 | if (irt_isguard(ir->t)) { /* IR_MULOV */ | ||
1392 | asm_guardcc(as, CC_NE); | ||
1393 | emit_dm(as, A64I_MOVw, dest, dest); /* Zero-extend. */ | ||
1394 | emit_nm(as, A64I_CMPw | A64F_SH(A64SH_ASR, 31), RID_TMP, dest); | ||
1395 | emit_dn(as, A64I_ASRx | A64F_IMMR(32), RID_TMP, dest); | ||
1396 | emit_dnm(as, A64I_SMULL, dest, right, left); | ||
1397 | } else { | ||
1398 | emit_dnm(as, irt_is64(ir->t) ? A64I_MULx : A64I_MULw, dest, left, right); | ||
1399 | } | ||
1400 | } | ||
1401 | |||
1402 | static void asm_add(ASMState *as, IRIns *ir) | ||
1403 | { | ||
1404 | if (irt_isnum(ir->t)) { | ||
1405 | if (!asm_fusemadd(as, ir, A64I_FMADDd, A64I_FMADDd)) | ||
1406 | asm_fparith(as, ir, A64I_FADDd); | ||
1407 | return; | ||
1408 | } | ||
1409 | asm_intop_s(as, ir, A64I_ADDw); | ||
1410 | } | ||
1411 | |||
1412 | static void asm_sub(ASMState *as, IRIns *ir) | ||
1413 | { | ||
1414 | if (irt_isnum(ir->t)) { | ||
1415 | if (!asm_fusemadd(as, ir, A64I_FNMSUBd, A64I_FMSUBd)) | ||
1416 | asm_fparith(as, ir, A64I_FSUBd); | ||
1417 | return; | ||
1418 | } | ||
1419 | asm_intop_s(as, ir, A64I_SUBw); | ||
1420 | } | ||
1421 | |||
1422 | static void asm_mul(ASMState *as, IRIns *ir) | ||
1423 | { | ||
1424 | if (irt_isnum(ir->t)) { | ||
1425 | asm_fparith(as, ir, A64I_FMULd); | ||
1426 | return; | ||
1427 | } | ||
1428 | asm_intmul(as, ir); | ||
1429 | } | ||
1430 | |||
1431 | static void asm_div(ASMState *as, IRIns *ir) | ||
1432 | { | ||
1433 | #if LJ_HASFFI | ||
1434 | if (!irt_isnum(ir->t)) | ||
1435 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : | ||
1436 | IRCALL_lj_carith_divu64); | ||
1437 | else | ||
1438 | #endif | ||
1439 | asm_fparith(as, ir, A64I_FDIVd); | ||
1440 | } | ||
1441 | |||
1442 | static void asm_pow(ASMState *as, IRIns *ir) | ||
1443 | { | ||
1444 | #if LJ_HASFFI | ||
1445 | if (!irt_isnum(ir->t)) | ||
1446 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : | ||
1447 | IRCALL_lj_carith_powu64); | ||
1448 | else | ||
1449 | #endif | ||
1450 | asm_callid(as, ir, IRCALL_lj_vm_powi); | ||
1451 | } | ||
1452 | |||
1453 | #define asm_addov(as, ir) asm_add(as, ir) | ||
1454 | #define asm_subov(as, ir) asm_sub(as, ir) | ||
1455 | #define asm_mulov(as, ir) asm_mul(as, ir) | ||
1456 | |||
1457 | #define asm_abs(as, ir) asm_fpunary(as, ir, A64I_FABS) | ||
1458 | #define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) | ||
1459 | #define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) | ||
1460 | |||
1461 | static void asm_mod(ASMState *as, IRIns *ir) | ||
1462 | { | ||
1463 | #if LJ_HASFFI | ||
1464 | if (!irt_isint(ir->t)) | ||
1465 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : | ||
1466 | IRCALL_lj_carith_modu64); | ||
1467 | else | ||
1468 | #endif | ||
1469 | asm_callid(as, ir, IRCALL_lj_vm_modi); | ||
1470 | } | ||
1471 | |||
1472 | static void asm_neg(ASMState *as, IRIns *ir) | ||
1473 | { | ||
1474 | if (irt_isnum(ir->t)) { | ||
1475 | asm_fpunary(as, ir, A64I_FNEGd); | ||
1476 | return; | ||
1477 | } | ||
1478 | asm_intneg(as, ir); | ||
1479 | } | ||
1480 | |||
1481 | static void asm_band(ASMState *as, IRIns *ir) | ||
1482 | { | ||
1483 | A64Ins ai = A64I_ANDw; | ||
1484 | if (asm_fuseandshift(as, ir)) | ||
1485 | return; | ||
1486 | if (as->flagmcp == as->mcp) { | ||
1487 | /* Try to drop cmp r, #0. */ | ||
1488 | as->flagmcp = NULL; | ||
1489 | as->mcp++; | ||
1490 | ai = A64I_ANDSw; | ||
1491 | } | ||
1492 | asm_intop(as, ir, ai); | ||
1493 | } | ||
1494 | |||
1495 | static void asm_borbxor(ASMState *as, IRIns *ir, A64Ins ai) | ||
1496 | { | ||
1497 | IRRef lref = ir->op1, rref = ir->op2; | ||
1498 | IRIns *irl = IR(lref), *irr = IR(rref); | ||
1499 | if ((canfuse(as, irl) && irl->o == IR_BNOT && !irref_isk(rref)) || | ||
1500 | (canfuse(as, irr) && irr->o == IR_BNOT && !irref_isk(lref))) { | ||
1501 | Reg left, dest = ra_dest(as, ir, RSET_GPR); | ||
1502 | uint32_t m; | ||
1503 | if (irl->o == IR_BNOT) { | ||
1504 | IRRef tmp = lref; lref = rref; rref = tmp; | ||
1505 | } | ||
1506 | left = ra_alloc1(as, lref, RSET_GPR); | ||
1507 | ai |= A64I_ON; | ||
1508 | if (irt_is64(ir->t)) ai |= A64I_X; | ||
1509 | m = asm_fuseopm(as, ai, IR(rref)->op1, rset_exclude(RSET_GPR, left)); | ||
1510 | emit_dn(as, ai^m, dest, left); | ||
1511 | } else { | ||
1512 | asm_intop(as, ir, ai); | ||
1513 | } | ||
1514 | } | ||
1515 | |||
1516 | static void asm_bor(ASMState *as, IRIns *ir) | ||
1517 | { | ||
1518 | if (asm_fuseorshift(as, ir)) | ||
1519 | return; | ||
1520 | asm_borbxor(as, ir, A64I_ORRw); | ||
1521 | } | ||
1522 | |||
1523 | #define asm_bxor(as, ir) asm_borbxor(as, ir, A64I_EORw) | ||
1524 | |||
1525 | static void asm_bnot(ASMState *as, IRIns *ir) | ||
1526 | { | ||
1527 | A64Ins ai = A64I_MVNw; | ||
1528 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1529 | uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR); | ||
1530 | if (irt_is64(ir->t)) ai |= A64I_X; | ||
1531 | emit_d(as, ai^m, dest); | ||
1532 | } | ||
1533 | |||
1534 | static void asm_bswap(ASMState *as, IRIns *ir) | ||
1535 | { | ||
1536 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1537 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); | ||
1538 | emit_dn(as, irt_is64(ir->t) ? A64I_REVx : A64I_REVw, dest, left); | ||
1539 | } | ||
1540 | |||
1541 | static void asm_bitshift(ASMState *as, IRIns *ir, A64Ins ai, A64Shift sh) | ||
1542 | { | ||
1543 | int32_t shmask = irt_is64(ir->t) ? 63 : 31; | ||
1544 | if (irref_isk(ir->op2)) { /* Constant shifts. */ | ||
1545 | Reg left, dest = ra_dest(as, ir, RSET_GPR); | ||
1546 | int32_t shift = (IR(ir->op2)->i & shmask); | ||
1547 | IRIns *irl = IR(ir->op1); | ||
1548 | if (shmask == 63) ai += A64I_UBFMx - A64I_UBFMw; | ||
1549 | |||
1550 | /* Fuse BSHL + BSHR/BSAR into UBFM/SBFM aka UBFX/SBFX/UBFIZ/SBFIZ. */ | ||
1551 | if ((sh == A64SH_LSR || sh == A64SH_ASR) && canfuse(as, irl)) { | ||
1552 | if (irl->o == IR_BSHL && irref_isk(irl->op2)) { | ||
1553 | int32_t shift2 = (IR(irl->op2)->i & shmask); | ||
1554 | shift = ((shift - shift2) & shmask); | ||
1555 | shmask -= shift2; | ||
1556 | ir = irl; | ||
1557 | } | ||
1558 | } | ||
1559 | |||
1560 | left = ra_alloc1(as, ir->op1, RSET_GPR); | ||
1561 | switch (sh) { | ||
1562 | case A64SH_LSL: | ||
1563 | emit_dn(as, ai | A64F_IMMS(shmask-shift) | | ||
1564 | A64F_IMMR((shmask-shift+1)&shmask), dest, left); | ||
1565 | break; | ||
1566 | case A64SH_LSR: case A64SH_ASR: | ||
1567 | emit_dn(as, ai | A64F_IMMS(shmask) | A64F_IMMR(shift), dest, left); | ||
1568 | break; | ||
1569 | case A64SH_ROR: | ||
1570 | emit_dnm(as, ai | A64F_IMMS(shift), dest, left, left); | ||
1571 | break; | ||
1572 | } | ||
1573 | } else { /* Variable-length shifts. */ | ||
1574 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1575 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); | ||
1576 | Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); | ||
1577 | emit_dnm(as, (shmask == 63 ? A64I_SHRx : A64I_SHRw) | A64F_BSH(sh), dest, left, right); | ||
1578 | } | ||
1579 | } | ||
1580 | |||
1581 | #define asm_bshl(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSL) | ||
1582 | #define asm_bshr(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSR) | ||
1583 | #define asm_bsar(as, ir) asm_bitshift(as, ir, A64I_SBFMw, A64SH_ASR) | ||
1584 | #define asm_bror(as, ir) asm_bitshift(as, ir, A64I_EXTRw, A64SH_ROR) | ||
1585 | #define asm_brol(as, ir) lua_assert(0) | ||
1586 | |||
1587 | static void asm_intmin_max(ASMState *as, IRIns *ir, A64CC cc) | ||
1588 | { | ||
1589 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1590 | Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | ||
1591 | Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); | ||
1592 | emit_dnm(as, A64I_CSELw|A64F_CC(cc), dest, left, right); | ||
1593 | emit_nm(as, A64I_CMPw, left, right); | ||
1594 | } | ||
1595 | |||
1596 | static void asm_fpmin_max(ASMState *as, IRIns *ir, A64CC fcc) | ||
1597 | { | ||
1598 | Reg dest = (ra_dest(as, ir, RSET_FPR) & 31); | ||
1599 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); | ||
1600 | right = ((left >> 8) & 31); left &= 31; | ||
1601 | emit_dnm(as, A64I_FCSELd | A64F_CC(fcc), dest, left, right); | ||
1602 | emit_nm(as, A64I_FCMPd, left, right); | ||
1603 | } | ||
1604 | |||
1605 | static void asm_min_max(ASMState *as, IRIns *ir, A64CC cc, A64CC fcc) | ||
1606 | { | ||
1607 | if (irt_isnum(ir->t)) | ||
1608 | asm_fpmin_max(as, ir, fcc); | ||
1609 | else | ||
1610 | asm_intmin_max(as, ir, cc); | ||
1611 | } | ||
1612 | |||
1613 | #define asm_max(as, ir) asm_min_max(as, ir, CC_GT, CC_HI) | ||
1614 | #define asm_min(as, ir) asm_min_max(as, ir, CC_LT, CC_LO) | ||
1615 | |||
1616 | /* -- Comparisons --------------------------------------------------------- */ | ||
1617 | |||
1618 | /* Map of comparisons to flags. ORDER IR. */ | ||
1619 | static const uint8_t asm_compmap[IR_ABC+1] = { | ||
1620 | /* op FP swp int cc FP cc */ | ||
1621 | /* LT */ CC_GE + (CC_HS << 4), | ||
1622 | /* GE x */ CC_LT + (CC_HI << 4), | ||
1623 | /* LE */ CC_GT + (CC_HI << 4), | ||
1624 | /* GT x */ CC_LE + (CC_HS << 4), | ||
1625 | /* ULT x */ CC_HS + (CC_LS << 4), | ||
1626 | /* UGE */ CC_LO + (CC_LO << 4), | ||
1627 | /* ULE x */ CC_HI + (CC_LO << 4), | ||
1628 | /* UGT */ CC_LS + (CC_LS << 4), | ||
1629 | /* EQ */ CC_NE + (CC_NE << 4), | ||
1630 | /* NE */ CC_EQ + (CC_EQ << 4), | ||
1631 | /* ABC */ CC_LS + (CC_LS << 4) /* Same as UGT. */ | ||
1632 | }; | ||
1633 | |||
1634 | /* FP comparisons. */ | ||
1635 | static void asm_fpcomp(ASMState *as, IRIns *ir) | ||
1636 | { | ||
1637 | Reg left, right; | ||
1638 | A64Ins ai; | ||
1639 | int swp = ((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1); | ||
1640 | if (!swp && irref_isk(ir->op2) && ir_knum(IR(ir->op2))->u64 == 0) { | ||
1641 | left = (ra_alloc1(as, ir->op1, RSET_FPR) & 31); | ||
1642 | right = 0; | ||
1643 | ai = A64I_FCMPZd; | ||
1644 | } else { | ||
1645 | left = ra_alloc2(as, ir, RSET_FPR); | ||
1646 | if (swp) { | ||
1647 | right = (left & 31); left = ((left >> 8) & 31); | ||
1648 | } else { | ||
1649 | right = ((left >> 8) & 31); left &= 31; | ||
1650 | } | ||
1651 | ai = A64I_FCMPd; | ||
1652 | } | ||
1653 | asm_guardcc(as, (asm_compmap[ir->o] >> 4)); | ||
1654 | emit_nm(as, ai, left, right); | ||
1655 | } | ||
1656 | |||
1657 | /* Integer comparisons. */ | ||
1658 | static void asm_intcomp(ASMState *as, IRIns *ir) | ||
1659 | { | ||
1660 | A64CC oldcc, cc = (asm_compmap[ir->o] & 15); | ||
1661 | A64Ins ai = irt_is64(ir->t) ? A64I_CMPx : A64I_CMPw; | ||
1662 | IRRef lref = ir->op1, rref = ir->op2; | ||
1663 | Reg left; | ||
1664 | uint32_t m; | ||
1665 | int cmpprev0 = 0; | ||
1666 | lua_assert(irt_is64(ir->t) || irt_isint(ir->t) || | ||
1667 | irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t)); | ||
1668 | if (asm_swapops(as, lref, rref)) { | ||
1669 | IRRef tmp = lref; lref = rref; rref = tmp; | ||
1670 | if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */ | ||
1671 | else if (cc > CC_NE) cc ^= 11; /* LO <-> HI, LS <-> HS */ | ||
1672 | } | ||
1673 | oldcc = cc; | ||
1674 | if (irref_isk(rref) && get_k64val(IR(rref)) == 0) { | ||
1675 | IRIns *irl = IR(lref); | ||
1676 | if (cc == CC_GE) cc = CC_PL; | ||
1677 | else if (cc == CC_LT) cc = CC_MI; | ||
1678 | else if (cc > CC_NE) goto nocombine; /* Other conds don't work with tst. */ | ||
1679 | cmpprev0 = (irl+1 == ir); | ||
1680 | /* Combine and-cmp-bcc into tbz/tbnz or and-cmp into tst. */ | ||
1681 | if (cmpprev0 && irl->o == IR_BAND && !ra_used(irl)) { | ||
1682 | IRRef blref = irl->op1, brref = irl->op2; | ||
1683 | uint32_t m2 = 0; | ||
1684 | Reg bleft; | ||
1685 | if (asm_swapops(as, blref, brref)) { | ||
1686 | Reg tmp = blref; blref = brref; brref = tmp; | ||
1687 | } | ||
1688 | if (irref_isk(brref)) { | ||
1689 | uint64_t k = get_k64val(IR(brref)); | ||
1690 | if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE)) { | ||
1691 | asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ, | ||
1692 | ra_alloc1(as, blref, RSET_GPR), emit_ctz64(k)); | ||
1693 | return; | ||
1694 | } | ||
1695 | m2 = emit_isk13(k, irt_is64(irl->t)); | ||
1696 | } | ||
1697 | bleft = ra_alloc1(as, blref, RSET_GPR); | ||
1698 | ai = (irt_is64(irl->t) ? A64I_TSTx : A64I_TSTw); | ||
1699 | if (!m2) | ||
1700 | m2 = asm_fuseopm(as, ai, brref, rset_exclude(RSET_GPR, bleft)); | ||
1701 | asm_guardcc(as, cc); | ||
1702 | emit_n(as, ai^m2, bleft); | ||
1703 | return; | ||
1704 | } | ||
1705 | if (cc == CC_EQ || cc == CC_NE) { | ||
1706 | /* Combine cmp-bcc into cbz/cbnz. */ | ||
1707 | ai = cc == CC_EQ ? A64I_CBZ : A64I_CBNZ; | ||
1708 | if (irt_is64(ir->t)) ai |= A64I_X; | ||
1709 | asm_guardcnb(as, ai, ra_alloc1(as, lref, RSET_GPR)); | ||
1710 | return; | ||
1711 | } | ||
1712 | } | ||
1713 | nocombine: | ||
1714 | left = ra_alloc1(as, lref, RSET_GPR); | ||
1715 | m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left)); | ||
1716 | asm_guardcc(as, cc); | ||
1717 | emit_n(as, ai^m, left); | ||
1718 | /* Signed comparison with zero and referencing previous ins? */ | ||
1719 | if (cmpprev0 && (oldcc <= CC_NE || oldcc >= CC_GE)) | ||
1720 | as->flagmcp = as->mcp; /* Allow elimination of the compare. */ | ||
1721 | } | ||
1722 | |||
1723 | static void asm_comp(ASMState *as, IRIns *ir) | ||
1724 | { | ||
1725 | if (irt_isnum(ir->t)) | ||
1726 | asm_fpcomp(as, ir); | ||
1727 | else | ||
1728 | asm_intcomp(as, ir); | ||
1729 | } | ||
1730 | |||
1731 | #define asm_equal(as, ir) asm_comp(as, ir) | ||
1732 | |||
1733 | /* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ | ||
1734 | |||
1735 | /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ | ||
1736 | static void asm_hiop(ASMState *as, IRIns *ir) | ||
1737 | { | ||
1738 | UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused on 64 bit. */ | ||
1739 | } | ||
1740 | |||
1741 | /* -- Profiling ----------------------------------------------------------- */ | ||
1742 | |||
1743 | static void asm_prof(ASMState *as, IRIns *ir) | ||
1744 | { | ||
1745 | uint32_t k = emit_isk13(HOOK_PROFILE, 0); | ||
1746 | lua_assert(k != 0); | ||
1747 | UNUSED(ir); | ||
1748 | asm_guardcc(as, CC_NE); | ||
1749 | emit_n(as, A64I_TSTw^k, RID_TMP); | ||
1750 | emit_lsptr(as, A64I_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask); | ||
1751 | } | ||
1752 | |||
1753 | /* -- Stack handling ------------------------------------------------------ */ | ||
1754 | |||
1755 | /* Check Lua stack size for overflow. Use exit handler as fallback. */ | ||
1756 | static void asm_stack_check(ASMState *as, BCReg topslot, | ||
1757 | IRIns *irp, RegSet allow, ExitNo exitno) | ||
1758 | { | ||
1759 | Reg pbase; | ||
1760 | uint32_t k; | ||
1761 | if (irp) { | ||
1762 | if (!ra_hasspill(irp->s)) { | ||
1763 | pbase = irp->r; | ||
1764 | lua_assert(ra_hasreg(pbase)); | ||
1765 | } else if (allow) { | ||
1766 | pbase = rset_pickbot(allow); | ||
1767 | } else { | ||
1768 | pbase = RID_RET; | ||
1769 | emit_lso(as, A64I_LDRx, RID_RET, RID_SP, 0); /* Restore temp register. */ | ||
1770 | } | ||
1771 | } else { | ||
1772 | pbase = RID_BASE; | ||
1773 | } | ||
1774 | emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno)); | ||
1775 | k = emit_isk12((8*topslot)); | ||
1776 | lua_assert(k); | ||
1777 | emit_n(as, A64I_CMPx^k, RID_TMP); | ||
1778 | emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, pbase); | ||
1779 | emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP, | ||
1780 | (int32_t)offsetof(lua_State, maxstack)); | ||
1781 | if (irp) { /* Must not spill arbitrary registers in head of side trace. */ | ||
1782 | if (ra_hasspill(irp->s)) | ||
1783 | emit_lso(as, A64I_LDRx, pbase, RID_SP, sps_scale(irp->s)); | ||
1784 | emit_lso(as, A64I_LDRx, RID_TMP, RID_GL, glofs(as, &J2G(as->J)->cur_L)); | ||
1785 | if (ra_hasspill(irp->s) && !allow) | ||
1786 | emit_lso(as, A64I_STRx, RID_RET, RID_SP, 0); /* Save temp register. */ | ||
1787 | } else { | ||
1788 | emit_getgl(as, RID_TMP, cur_L); | ||
1789 | } | ||
1790 | } | ||
1791 | |||
1792 | /* Restore Lua stack from on-trace state. */ | ||
1793 | static void asm_stack_restore(ASMState *as, SnapShot *snap) | ||
1794 | { | ||
1795 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; | ||
1796 | #ifdef LUA_USE_ASSERT | ||
1797 | SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; | ||
1798 | #endif | ||
1799 | MSize n, nent = snap->nent; | ||
1800 | /* Store the value of all modified slots to the Lua stack. */ | ||
1801 | for (n = 0; n < nent; n++) { | ||
1802 | SnapEntry sn = map[n]; | ||
1803 | BCReg s = snap_slot(sn); | ||
1804 | int32_t ofs = 8*((int32_t)s-1-LJ_FR2); | ||
1805 | IRRef ref = snap_ref(sn); | ||
1806 | IRIns *ir = IR(ref); | ||
1807 | if ((sn & SNAP_NORESTORE)) | ||
1808 | continue; | ||
1809 | if (irt_isnum(ir->t)) { | ||
1810 | Reg src = ra_alloc1(as, ref, RSET_FPR); | ||
1811 | emit_lso(as, A64I_STRd, (src & 31), RID_BASE, ofs); | ||
1812 | } else { | ||
1813 | asm_tvstore64(as, RID_BASE, ofs, ref); | ||
1814 | } | ||
1815 | checkmclim(as); | ||
1816 | } | ||
1817 | lua_assert(map + nent == flinks); | ||
1818 | } | ||
1819 | |||
1820 | /* -- GC handling --------------------------------------------------------- */ | ||
1821 | |||
1822 | /* Check GC threshold and do one or more GC steps. */ | ||
1823 | static void asm_gc_check(ASMState *as) | ||
1824 | { | ||
1825 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; | ||
1826 | IRRef args[2]; | ||
1827 | MCLabel l_end; | ||
1828 | Reg tmp1, tmp2; | ||
1829 | ra_evictset(as, RSET_SCRATCH); | ||
1830 | l_end = emit_label(as); | ||
1831 | /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ | ||
1832 | asm_guardcnb(as, A64I_CBNZ, RID_RET); /* Assumes asm_snap_prep() is done. */ | ||
1833 | args[0] = ASMREF_TMP1; /* global_State *g */ | ||
1834 | args[1] = ASMREF_TMP2; /* MSize steps */ | ||
1835 | asm_gencall(as, ci, args); | ||
1836 | tmp1 = ra_releasetmp(as, ASMREF_TMP1); | ||
1837 | tmp2 = ra_releasetmp(as, ASMREF_TMP2); | ||
1838 | emit_loadi(as, tmp2, as->gcsteps); | ||
1839 | /* Jump around GC step if GC total < GC threshold. */ | ||
1840 | emit_cond_branch(as, CC_LS, l_end); | ||
1841 | emit_nm(as, A64I_CMPx, RID_TMP, tmp2); | ||
1842 | emit_lso(as, A64I_LDRx, tmp2, tmp1, | ||
1843 | (int32_t)offsetof(global_State, gc.threshold)); | ||
1844 | emit_lso(as, A64I_LDRx, RID_TMP, tmp1, | ||
1845 | (int32_t)offsetof(global_State, gc.total)); | ||
1846 | ra_allockreg(as, i64ptr(J2G(as->J)), tmp1); | ||
1847 | as->gcsteps = 0; | ||
1848 | checkmclim(as); | ||
1849 | } | ||
1850 | |||
1851 | /* -- Loop handling ------------------------------------------------------- */ | ||
1852 | |||
1853 | /* Fixup the loop branch. */ | ||
1854 | static void asm_loop_fixup(ASMState *as) | ||
1855 | { | ||
1856 | MCode *p = as->mctop; | ||
1857 | MCode *target = as->mcp; | ||
1858 | if (as->loopinv) { /* Inverted loop branch? */ | ||
1859 | uint32_t mask = (p[-2] & 0x7e000000) == 0x36000000 ? 0x3fffu : 0x7ffffu; | ||
1860 | ptrdiff_t delta = target - (p - 2); | ||
1861 | /* asm_guard* already inverted the bcc/tnb/cnb and patched the final b. */ | ||
1862 | p[-2] |= ((uint32_t)delta & mask) << 5; | ||
1863 | } else { | ||
1864 | ptrdiff_t delta = target - (p - 1); | ||
1865 | p[-1] = A64I_B | A64F_S26(delta); | ||
1866 | } | ||
1867 | } | ||
1868 | |||
1869 | /* -- Head of trace ------------------------------------------------------- */ | ||
1870 | |||
1871 | /* Reload L register from g->cur_L. */ | ||
1872 | static void asm_head_lreg(ASMState *as) | ||
1873 | { | ||
1874 | IRIns *ir = IR(ASMREF_L); | ||
1875 | if (ra_used(ir)) { | ||
1876 | Reg r = ra_dest(as, ir, RSET_GPR); | ||
1877 | emit_getgl(as, r, cur_L); | ||
1878 | ra_evictk(as); | ||
1879 | } | ||
1880 | } | ||
1881 | |||
1882 | /* Coalesce BASE register for a root trace. */ | ||
1883 | static void asm_head_root_base(ASMState *as) | ||
1884 | { | ||
1885 | IRIns *ir; | ||
1886 | asm_head_lreg(as); | ||
1887 | ir = IR(REF_BASE); | ||
1888 | if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) | ||
1889 | ra_spill(as, ir); | ||
1890 | ra_destreg(as, ir, RID_BASE); | ||
1891 | } | ||
1892 | |||
1893 | /* Coalesce BASE register for a side trace. */ | ||
1894 | static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) | ||
1895 | { | ||
1896 | IRIns *ir; | ||
1897 | asm_head_lreg(as); | ||
1898 | ir = IR(REF_BASE); | ||
1899 | if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) | ||
1900 | ra_spill(as, ir); | ||
1901 | if (ra_hasspill(irp->s)) { | ||
1902 | rset_clear(allow, ra_dest(as, ir, allow)); | ||
1903 | } else { | ||
1904 | Reg r = irp->r; | ||
1905 | lua_assert(ra_hasreg(r)); | ||
1906 | rset_clear(allow, r); | ||
1907 | if (r != ir->r && !rset_test(as->freeset, r)) | ||
1908 | ra_restore(as, regcost_ref(as->cost[r])); | ||
1909 | ra_destreg(as, ir, r); | ||
1910 | } | ||
1911 | return allow; | ||
1912 | } | ||
1913 | |||
1914 | /* -- Tail of trace ------------------------------------------------------- */ | ||
1915 | |||
1916 | /* Fixup the tail code. */ | ||
1917 | static void asm_tail_fixup(ASMState *as, TraceNo lnk) | ||
1918 | { | ||
1919 | MCode *p = as->mctop; | ||
1920 | MCode *target; | ||
1921 | /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */ | ||
1922 | int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED)); | ||
1923 | if (spadj == 0) { | ||
1924 | *--p = A64I_LE(A64I_NOP); | ||
1925 | as->mctop = p; | ||
1926 | } else { | ||
1927 | /* Patch stack adjustment. */ | ||
1928 | uint32_t k = emit_isk12(spadj); | ||
1929 | lua_assert(k); | ||
1930 | p[-2] = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP); | ||
1931 | } | ||
1932 | /* Patch exit branch. */ | ||
1933 | target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; | ||
1934 | p[-1] = A64I_B | A64F_S26((target-p)+1); | ||
1935 | } | ||
1936 | |||
1937 | /* Prepare tail of code. */ | ||
1938 | static void asm_tail_prep(ASMState *as) | ||
1939 | { | ||
1940 | MCode *p = as->mctop - 1; /* Leave room for exit branch. */ | ||
1941 | if (as->loopref) { | ||
1942 | as->invmcp = as->mcp = p; | ||
1943 | } else { | ||
1944 | as->mcp = p-1; /* Leave room for stack pointer adjustment. */ | ||
1945 | as->invmcp = NULL; | ||
1946 | } | ||
1947 | *p = 0; /* Prevent load/store merging. */ | ||
1948 | } | ||
1949 | |||
1950 | /* -- Trace setup --------------------------------------------------------- */ | ||
1951 | |||
1952 | /* Ensure there are enough stack slots for call arguments. */ | ||
1953 | static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) | ||
1954 | { | ||
1955 | IRRef args[CCI_NARGS_MAX*2]; | ||
1956 | uint32_t i, nargs = CCI_XNARGS(ci); | ||
1957 | int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; | ||
1958 | asm_collectargs(as, ir, ci, args); | ||
1959 | for (i = 0; i < nargs; i++) { | ||
1960 | if (args[i] && irt_isfp(IR(args[i])->t)) { | ||
1961 | if (nfpr > 0) nfpr--; else nslots += 2; | ||
1962 | } else { | ||
1963 | if (ngpr > 0) ngpr--; else nslots += 2; | ||
1964 | } | ||
1965 | } | ||
1966 | if (nslots > as->evenspill) /* Leave room for args in stack slots. */ | ||
1967 | as->evenspill = nslots; | ||
1968 | return REGSP_HINT(RID_RET); | ||
1969 | } | ||
1970 | |||
1971 | static void asm_setup_target(ASMState *as) | ||
1972 | { | ||
1973 | /* May need extra exit for asm_stack_check on side traces. */ | ||
1974 | asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0)); | ||
1975 | } | ||
1976 | |||
1977 | #if LJ_BE | ||
1978 | /* ARM64 instructions are always little-endian. Swap for ARM64BE. */ | ||
1979 | static void asm_mcode_fixup(MCode *mcode, MSize size) | ||
1980 | { | ||
1981 | MCode *pe = (MCode *)((char *)mcode + size); | ||
1982 | while (mcode < pe) { | ||
1983 | MCode ins = *mcode; | ||
1984 | *mcode++ = lj_bswap(ins); | ||
1985 | } | ||
1986 | } | ||
1987 | #define LJ_TARGET_MCODE_FIXUP 1 | ||
1988 | #endif | ||
1989 | |||
1990 | /* -- Trace patching ------------------------------------------------------ */ | ||
1991 | |||
1992 | /* Patch exit jumps of existing machine code to a new target. */ | ||
1993 | void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) | ||
1994 | { | ||
1995 | MCode *p = T->mcode; | ||
1996 | MCode *pe = (MCode *)((char *)p + T->szmcode); | ||
1997 | MCode *cstart = NULL; | ||
1998 | MCode *mcarea = lj_mcode_patch(J, p, 0); | ||
1999 | MCode *px = exitstub_trace_addr(T, exitno); | ||
2000 | /* Note: this assumes a trace exit is only ever patched once. */ | ||
2001 | for (; p < pe; p++) { | ||
2002 | /* Look for exitstub branch, replace with branch to target. */ | ||
2003 | ptrdiff_t delta = target - p; | ||
2004 | MCode ins = A64I_LE(*p); | ||
2005 | if ((ins & 0xff000000u) == 0x54000000u && | ||
2006 | ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { | ||
2007 | /* Patch bcc, if within range. */ | ||
2008 | if (A64F_S_OK(delta, 19)) { | ||
2009 | *p = A64I_LE((ins & 0xff00001fu) | A64F_S19(delta)); | ||
2010 | if (!cstart) cstart = p; | ||
2011 | } | ||
2012 | } else if ((ins & 0xfc000000u) == 0x14000000u && | ||
2013 | ((ins ^ (px-p)) & 0x03ffffffu) == 0) { | ||
2014 | /* Patch b. */ | ||
2015 | lua_assert(A64F_S_OK(delta, 26)); | ||
2016 | *p = A64I_LE((ins & 0xfc000000u) | A64F_S26(delta)); | ||
2017 | if (!cstart) cstart = p; | ||
2018 | } else if ((ins & 0x7e000000u) == 0x34000000u && | ||
2019 | ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { | ||
2020 | /* Patch cbz/cbnz, if within range. */ | ||
2021 | if (A64F_S_OK(delta, 19)) { | ||
2022 | *p = A64I_LE((ins & 0xff00001fu) | A64F_S19(delta)); | ||
2023 | if (!cstart) cstart = p; | ||
2024 | } | ||
2025 | } else if ((ins & 0x7e000000u) == 0x36000000u && | ||
2026 | ((ins ^ ((px-p)<<5)) & 0x0007ffe0u) == 0) { | ||
2027 | /* Patch tbz/tbnz, if within range. */ | ||
2028 | if (A64F_S_OK(delta, 14)) { | ||
2029 | *p = A64I_LE((ins & 0xfff8001fu) | A64F_S14(delta)); | ||
2030 | if (!cstart) cstart = p; | ||
2031 | } | ||
2032 | } | ||
2033 | } | ||
2034 | { /* Always patch long-range branch in exit stub itself. */ | ||
2035 | ptrdiff_t delta = target - px; | ||
2036 | lua_assert(A64F_S_OK(delta, 26)); | ||
2037 | *px = A64I_B | A64F_S26(delta); | ||
2038 | if (!cstart) cstart = px; | ||
2039 | } | ||
2040 | lj_mcode_sync(cstart, px+1); | ||
2041 | lj_mcode_patch(J, mcarea, 1); | ||
2042 | } | ||
2043 | |||
diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index 190a55eb..9309b781 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h | |||
@@ -23,7 +23,7 @@ static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow) | |||
23 | { | 23 | { |
24 | Reg r = IR(ref)->r; | 24 | Reg r = IR(ref)->r; |
25 | if (ra_noreg(r)) { | 25 | if (ra_noreg(r)) { |
26 | if (!(allow & RSET_FPR) && irref_isk(ref) && IR(ref)->i == 0) | 26 | if (!(allow & RSET_FPR) && irref_isk(ref) && get_kval(IR(ref)) == 0) |
27 | return RID_ZERO; | 27 | return RID_ZERO; |
28 | r = ra_allocref(as, ref, allow); | 28 | r = ra_allocref(as, ref, allow); |
29 | } else { | 29 | } else { |
@@ -101,7 +101,12 @@ static void asm_guard(ASMState *as, MIPSIns mi, Reg rs, Reg rt) | |||
101 | as->invmcp = NULL; | 101 | as->invmcp = NULL; |
102 | as->loopinv = 1; | 102 | as->loopinv = 1; |
103 | as->mcp = p+1; | 103 | as->mcp = p+1; |
104 | #if !LJ_TARGET_MIPSR6 | ||
104 | mi = mi ^ ((mi>>28) == 1 ? 0x04000000u : 0x00010000u); /* Invert cond. */ | 105 | mi = mi ^ ((mi>>28) == 1 ? 0x04000000u : 0x00010000u); /* Invert cond. */ |
106 | #else | ||
107 | mi = mi ^ ((mi>>28) == 1 ? 0x04000000u : | ||
108 | (mi>>28) == 4 ? 0x00800000u : 0x00010000u); /* Invert cond. */ | ||
109 | #endif | ||
105 | target = p; /* Patch target later in asm_loop_fixup. */ | 110 | target = p; /* Patch target later in asm_loop_fixup. */ |
106 | } | 111 | } |
107 | emit_ti(as, MIPSI_LI, RID_TMP, as->snapno); | 112 | emit_ti(as, MIPSI_LI, RID_TMP, as->snapno); |
@@ -165,9 +170,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) | |||
165 | } else if (ir->o == IR_UREFC) { | 170 | } else if (ir->o == IR_UREFC) { |
166 | if (irref_isk(ir->op1)) { | 171 | if (irref_isk(ir->op1)) { |
167 | GCfunc *fn = ir_kfunc(IR(ir->op1)); | 172 | GCfunc *fn = ir_kfunc(IR(ir->op1)); |
168 | int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv); | 173 | intptr_t ofs = (intptr_t)&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv; |
169 | int32_t jgl = (intptr_t)J2G(as->J); | 174 | intptr_t jgl = (intptr_t)J2G(as->J); |
170 | if ((uint32_t)(ofs-jgl) < 65536) { | 175 | if ((uintptr_t)(ofs-jgl) < 65536) { |
171 | *ofsp = ofs-jgl-32768; | 176 | *ofsp = ofs-jgl-32768; |
172 | return RID_JGL; | 177 | return RID_JGL; |
173 | } else { | 178 | } else { |
@@ -189,20 +194,21 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref, | |||
189 | Reg base; | 194 | Reg base; |
190 | if (ra_noreg(ir->r) && canfuse(as, ir)) { | 195 | if (ra_noreg(ir->r) && canfuse(as, ir)) { |
191 | if (ir->o == IR_ADD) { | 196 | if (ir->o == IR_ADD) { |
192 | int32_t ofs2; | 197 | intptr_t ofs2; |
193 | if (irref_isk(ir->op2) && (ofs2 = ofs + IR(ir->op2)->i, checki16(ofs2))) { | 198 | if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(IR(ir->op2)), |
199 | checki16(ofs2))) { | ||
194 | ref = ir->op1; | 200 | ref = ir->op1; |
195 | ofs = ofs2; | 201 | ofs = (int32_t)ofs2; |
196 | } | 202 | } |
197 | } else if (ir->o == IR_STRREF) { | 203 | } else if (ir->o == IR_STRREF) { |
198 | int32_t ofs2 = 65536; | 204 | intptr_t ofs2 = 65536; |
199 | lua_assert(ofs == 0); | 205 | lua_assert(ofs == 0); |
200 | ofs = (int32_t)sizeof(GCstr); | 206 | ofs = (int32_t)sizeof(GCstr); |
201 | if (irref_isk(ir->op2)) { | 207 | if (irref_isk(ir->op2)) { |
202 | ofs2 = ofs + IR(ir->op2)->i; | 208 | ofs2 = ofs + get_kval(IR(ir->op2)); |
203 | ref = ir->op1; | 209 | ref = ir->op1; |
204 | } else if (irref_isk(ir->op1)) { | 210 | } else if (irref_isk(ir->op1)) { |
205 | ofs2 = ofs + IR(ir->op1)->i; | 211 | ofs2 = ofs + get_kval(IR(ir->op1)); |
206 | ref = ir->op2; | 212 | ref = ir->op2; |
207 | } | 213 | } |
208 | if (!checki16(ofs2)) { | 214 | if (!checki16(ofs2)) { |
@@ -210,7 +216,7 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref, | |||
210 | Reg right, left = ra_alloc2(as, ir, allow); | 216 | Reg right, left = ra_alloc2(as, ir, allow); |
211 | right = (left >> 8); left &= 255; | 217 | right = (left >> 8); left &= 255; |
212 | emit_hsi(as, mi, rt, RID_TMP, ofs); | 218 | emit_hsi(as, mi, rt, RID_TMP, ofs); |
213 | emit_dst(as, MIPSI_ADDU, RID_TMP, left, right); | 219 | emit_dst(as, MIPSI_AADDU, RID_TMP, left, right); |
214 | return; | 220 | return; |
215 | } | 221 | } |
216 | ofs = ofs2; | 222 | ofs = ofs2; |
@@ -225,29 +231,41 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref, | |||
225 | /* Generate a call to a C function. */ | 231 | /* Generate a call to a C function. */ |
226 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | 232 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) |
227 | { | 233 | { |
228 | uint32_t n, nargs = CCI_NARGS(ci); | 234 | uint32_t n, nargs = CCI_XNARGS(ci); |
229 | int32_t ofs = 16; | 235 | int32_t ofs = LJ_32 ? 16 : 0; |
236 | #if LJ_SOFTFP | ||
237 | Reg gpr = REGARG_FIRSTGPR; | ||
238 | #else | ||
230 | Reg gpr, fpr = REGARG_FIRSTFPR; | 239 | Reg gpr, fpr = REGARG_FIRSTFPR; |
240 | #endif | ||
231 | if ((void *)ci->func) | 241 | if ((void *)ci->func) |
232 | emit_call(as, (void *)ci->func); | 242 | emit_call(as, (void *)ci->func, 1); |
243 | #if !LJ_SOFTFP | ||
233 | for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) | 244 | for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) |
234 | as->cost[gpr] = REGCOST(~0u, ASMREF_L); | 245 | as->cost[gpr] = REGCOST(~0u, ASMREF_L); |
235 | gpr = REGARG_FIRSTGPR; | 246 | gpr = REGARG_FIRSTGPR; |
247 | #endif | ||
236 | for (n = 0; n < nargs; n++) { /* Setup args. */ | 248 | for (n = 0; n < nargs; n++) { /* Setup args. */ |
237 | IRRef ref = args[n]; | 249 | IRRef ref = args[n]; |
238 | if (ref) { | 250 | if (ref) { |
239 | IRIns *ir = IR(ref); | 251 | IRIns *ir = IR(ref); |
252 | #if !LJ_SOFTFP | ||
240 | if (irt_isfp(ir->t) && fpr <= REGARG_LASTFPR && | 253 | if (irt_isfp(ir->t) && fpr <= REGARG_LASTFPR && |
241 | !(ci->flags & CCI_VARARG)) { | 254 | !(ci->flags & CCI_VARARG)) { |
242 | lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ | 255 | lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ |
243 | ra_leftov(as, fpr, ref); | 256 | ra_leftov(as, fpr, ref); |
244 | fpr += 2; | 257 | fpr += LJ_32 ? 2 : 1; |
245 | gpr += irt_isnum(ir->t) ? 2 : 1; | 258 | gpr += (LJ_32 && irt_isnum(ir->t)) ? 2 : 1; |
246 | } else { | 259 | } else |
260 | #endif | ||
261 | { | ||
262 | #if LJ_32 && !LJ_SOFTFP | ||
247 | fpr = REGARG_LASTFPR+1; | 263 | fpr = REGARG_LASTFPR+1; |
248 | if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1; | 264 | #endif |
265 | if (LJ_32 && irt_isnum(ir->t)) gpr = (gpr+1) & ~1; | ||
249 | if (gpr <= REGARG_LASTGPR) { | 266 | if (gpr <= REGARG_LASTGPR) { |
250 | lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ | 267 | lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ |
268 | #if !LJ_SOFTFP | ||
251 | if (irt_isfp(ir->t)) { | 269 | if (irt_isfp(ir->t)) { |
252 | RegSet of = as->freeset; | 270 | RegSet of = as->freeset; |
253 | Reg r; | 271 | Reg r; |
@@ -256,31 +274,55 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
256 | r = ra_alloc1(as, ref, RSET_FPR); | 274 | r = ra_alloc1(as, ref, RSET_FPR); |
257 | as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); | 275 | as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); |
258 | if (irt_isnum(ir->t)) { | 276 | if (irt_isnum(ir->t)) { |
277 | #if LJ_32 | ||
259 | emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?0:1), r+1); | 278 | emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?0:1), r+1); |
260 | emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?1:0), r); | 279 | emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?1:0), r); |
261 | lua_assert(rset_test(as->freeset, gpr+1)); /* Already evicted. */ | 280 | lua_assert(rset_test(as->freeset, gpr+1)); /* Already evicted. */ |
262 | gpr += 2; | 281 | gpr += 2; |
282 | #else | ||
283 | emit_tg(as, MIPSI_DMFC1, gpr, r); | ||
284 | gpr++; fpr++; | ||
285 | #endif | ||
263 | } else if (irt_isfloat(ir->t)) { | 286 | } else if (irt_isfloat(ir->t)) { |
264 | emit_tg(as, MIPSI_MFC1, gpr, r); | 287 | emit_tg(as, MIPSI_MFC1, gpr, r); |
265 | gpr++; | 288 | gpr++; |
289 | #if LJ_64 | ||
290 | fpr++; | ||
291 | #endif | ||
266 | } | 292 | } |
267 | } else { | 293 | } else |
294 | #endif | ||
295 | { | ||
268 | ra_leftov(as, gpr, ref); | 296 | ra_leftov(as, gpr, ref); |
269 | gpr++; | 297 | gpr++; |
298 | #if LJ_64 && !LJ_SOFTFP | ||
299 | fpr++; | ||
300 | #endif | ||
270 | } | 301 | } |
271 | } else { | 302 | } else { |
272 | Reg r = ra_alloc1z(as, ref, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); | 303 | Reg r = ra_alloc1z(as, ref, !LJ_SOFTFP && irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); |
304 | #if LJ_32 | ||
273 | if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4; | 305 | if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4; |
274 | emit_spstore(as, ir, r, ofs); | 306 | emit_spstore(as, ir, r, ofs); |
275 | ofs += irt_isnum(ir->t) ? 8 : 4; | 307 | ofs += irt_isnum(ir->t) ? 8 : 4; |
308 | #else | ||
309 | emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isfp(ir->t) && !irt_is64(ir->t)) ? 4 : 0)); | ||
310 | ofs += 8; | ||
311 | #endif | ||
276 | } | 312 | } |
277 | } | 313 | } |
278 | } else { | 314 | } else { |
315 | #if !LJ_SOFTFP | ||
279 | fpr = REGARG_LASTFPR+1; | 316 | fpr = REGARG_LASTFPR+1; |
280 | if (gpr <= REGARG_LASTGPR) | 317 | #endif |
318 | if (gpr <= REGARG_LASTGPR) { | ||
281 | gpr++; | 319 | gpr++; |
282 | else | 320 | #if LJ_64 && !LJ_SOFTFP |
283 | ofs += 4; | 321 | fpr++; |
322 | #endif | ||
323 | } else { | ||
324 | ofs += LJ_32 ? 4 : 8; | ||
325 | } | ||
284 | } | 326 | } |
285 | checkmclim(as); | 327 | checkmclim(as); |
286 | } | 328 | } |
@@ -290,50 +332,57 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
290 | static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | 332 | static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) |
291 | { | 333 | { |
292 | RegSet drop = RSET_SCRATCH; | 334 | RegSet drop = RSET_SCRATCH; |
335 | #if LJ_32 | ||
293 | int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); | 336 | int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); |
337 | #endif | ||
338 | #if !LJ_SOFTFP | ||
294 | if ((ci->flags & CCI_NOFPRCLOBBER)) | 339 | if ((ci->flags & CCI_NOFPRCLOBBER)) |
295 | drop &= ~RSET_FPR; | 340 | drop &= ~RSET_FPR; |
341 | #endif | ||
296 | if (ra_hasreg(ir->r)) | 342 | if (ra_hasreg(ir->r)) |
297 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | 343 | rset_clear(drop, ir->r); /* Dest reg handled below. */ |
344 | #if LJ_32 | ||
298 | if (hiop && ra_hasreg((ir+1)->r)) | 345 | if (hiop && ra_hasreg((ir+1)->r)) |
299 | rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ | 346 | rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ |
347 | #endif | ||
300 | ra_evictset(as, drop); /* Evictions must be performed first. */ | 348 | ra_evictset(as, drop); /* Evictions must be performed first. */ |
301 | if (ra_used(ir)) { | 349 | if (ra_used(ir)) { |
302 | lua_assert(!irt_ispri(ir->t)); | 350 | lua_assert(!irt_ispri(ir->t)); |
303 | if (irt_isfp(ir->t)) { | 351 | if (!LJ_SOFTFP && irt_isfp(ir->t)) { |
304 | if ((ci->flags & CCI_CASTU64)) { | 352 | if ((ci->flags & CCI_CASTU64)) { |
305 | int32_t ofs = sps_scale(ir->s); | 353 | int32_t ofs = sps_scale(ir->s); |
306 | Reg dest = ir->r; | 354 | Reg dest = ir->r; |
307 | if (ra_hasreg(dest)) { | 355 | if (ra_hasreg(dest)) { |
308 | ra_free(as, dest); | 356 | ra_free(as, dest); |
309 | ra_modified(as, dest); | 357 | ra_modified(as, dest); |
358 | #if LJ_32 | ||
310 | emit_tg(as, MIPSI_MTC1, RID_RETHI, dest+1); | 359 | emit_tg(as, MIPSI_MTC1, RID_RETHI, dest+1); |
311 | emit_tg(as, MIPSI_MTC1, RID_RETLO, dest); | 360 | emit_tg(as, MIPSI_MTC1, RID_RETLO, dest); |
361 | #else | ||
362 | emit_tg(as, MIPSI_DMTC1, RID_RET, dest); | ||
363 | #endif | ||
312 | } | 364 | } |
313 | if (ofs) { | 365 | if (ofs) { |
366 | #if LJ_32 | ||
314 | emit_tsi(as, MIPSI_SW, RID_RETLO, RID_SP, ofs+(LJ_BE?4:0)); | 367 | emit_tsi(as, MIPSI_SW, RID_RETLO, RID_SP, ofs+(LJ_BE?4:0)); |
315 | emit_tsi(as, MIPSI_SW, RID_RETHI, RID_SP, ofs+(LJ_BE?0:4)); | 368 | emit_tsi(as, MIPSI_SW, RID_RETHI, RID_SP, ofs+(LJ_BE?0:4)); |
369 | #else | ||
370 | emit_tsi(as, MIPSI_SD, RID_RET, RID_SP, ofs); | ||
371 | #endif | ||
316 | } | 372 | } |
317 | } else { | 373 | } else { |
318 | ra_destreg(as, ir, RID_FPRET); | 374 | ra_destreg(as, ir, RID_FPRET); |
319 | } | 375 | } |
376 | #if LJ_32 | ||
320 | } else if (hiop) { | 377 | } else if (hiop) { |
321 | ra_destpair(as, ir); | 378 | ra_destpair(as, ir); |
379 | #endif | ||
322 | } else { | 380 | } else { |
323 | ra_destreg(as, ir, RID_RET); | 381 | ra_destreg(as, ir, RID_RET); |
324 | } | 382 | } |
325 | } | 383 | } |
326 | } | 384 | } |
327 | 385 | ||
328 | static void asm_call(ASMState *as, IRIns *ir) | ||
329 | { | ||
330 | IRRef args[CCI_NARGS_MAX]; | ||
331 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | ||
332 | asm_collectargs(as, ir, ci, args); | ||
333 | asm_setupresult(as, ir, ci); | ||
334 | asm_gencall(as, ci, args); | ||
335 | } | ||
336 | |||
337 | static void asm_callx(ASMState *as, IRIns *ir) | 386 | static void asm_callx(ASMState *as, IRIns *ir) |
338 | { | 387 | { |
339 | IRRef args[CCI_NARGS_MAX*2]; | 388 | IRRef args[CCI_NARGS_MAX*2]; |
@@ -346,7 +395,7 @@ static void asm_callx(ASMState *as, IRIns *ir) | |||
346 | func = ir->op2; irf = IR(func); | 395 | func = ir->op2; irf = IR(func); |
347 | if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } | 396 | if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } |
348 | if (irref_isk(func)) { /* Call to constant address. */ | 397 | if (irref_isk(func)) { /* Call to constant address. */ |
349 | ci.func = (ASMFunction)(void *)(irf->i); | 398 | ci.func = (ASMFunction)(void *)get_kval(irf); |
350 | } else { /* Need specific register for indirect calls. */ | 399 | } else { /* Need specific register for indirect calls. */ |
351 | Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR)); | 400 | Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR)); |
352 | MCode *p = as->mcp; | 401 | MCode *p = as->mcp; |
@@ -361,27 +410,23 @@ static void asm_callx(ASMState *as, IRIns *ir) | |||
361 | asm_gencall(as, &ci, args); | 410 | asm_gencall(as, &ci, args); |
362 | } | 411 | } |
363 | 412 | ||
364 | static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) | 413 | #if !LJ_SOFTFP |
365 | { | ||
366 | const CCallInfo *ci = &lj_ir_callinfo[id]; | ||
367 | IRRef args[2]; | ||
368 | args[0] = ir->op1; | ||
369 | args[1] = ir->op2; | ||
370 | asm_setupresult(as, ir, ci); | ||
371 | asm_gencall(as, ci, args); | ||
372 | } | ||
373 | |||
374 | static void asm_callround(ASMState *as, IRIns *ir, IRCallID id) | 414 | static void asm_callround(ASMState *as, IRIns *ir, IRCallID id) |
375 | { | 415 | { |
376 | /* The modified regs must match with the *.dasc implementation. */ | 416 | /* The modified regs must match with the *.dasc implementation. */ |
377 | RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)| | 417 | RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)| |
378 | RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR); | 418 | RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR) |
419 | #if LJ_TARGET_MIPSR6 | ||
420 | |RID2RSET(RID_F21) | ||
421 | #endif | ||
422 | ; | ||
379 | if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); | 423 | if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); |
380 | ra_evictset(as, drop); | 424 | ra_evictset(as, drop); |
381 | ra_destreg(as, ir, RID_FPRET); | 425 | ra_destreg(as, ir, RID_FPRET); |
382 | emit_call(as, (void *)lj_ir_callinfo[id].func); | 426 | emit_call(as, (void *)lj_ir_callinfo[id].func, 0); |
383 | ra_leftov(as, REGARG_FIRSTFPR, ir->op1); | 427 | ra_leftov(as, REGARG_FIRSTFPR, ir->op1); |
384 | } | 428 | } |
429 | #endif | ||
385 | 430 | ||
386 | /* -- Returns ------------------------------------------------------------- */ | 431 | /* -- Returns ------------------------------------------------------------- */ |
387 | 432 | ||
@@ -390,25 +435,31 @@ static void asm_retf(ASMState *as, IRIns *ir) | |||
390 | { | 435 | { |
391 | Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); | 436 | Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); |
392 | void *pc = ir_kptr(IR(ir->op2)); | 437 | void *pc = ir_kptr(IR(ir->op2)); |
393 | int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); | 438 | int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); |
394 | as->topslot -= (BCReg)delta; | 439 | as->topslot -= (BCReg)delta; |
395 | if ((int32_t)as->topslot < 0) as->topslot = 0; | 440 | if ((int32_t)as->topslot < 0) as->topslot = 0; |
396 | irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ | 441 | irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ |
397 | emit_setgl(as, base, jit_base); | 442 | emit_setgl(as, base, jit_base); |
398 | emit_addptr(as, base, -8*delta); | 443 | emit_addptr(as, base, -8*delta); |
399 | asm_guard(as, MIPSI_BNE, RID_TMP, | 444 | asm_guard(as, MIPSI_BNE, RID_TMP, |
400 | ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base))); | 445 | ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base))); |
401 | emit_tsi(as, MIPSI_LW, RID_TMP, base, -8); | 446 | emit_tsi(as, MIPSI_AL, RID_TMP, base, -8); |
402 | } | 447 | } |
403 | 448 | ||
404 | /* -- Type conversions ---------------------------------------------------- */ | 449 | /* -- Type conversions ---------------------------------------------------- */ |
405 | 450 | ||
451 | #if !LJ_SOFTFP | ||
406 | static void asm_tointg(ASMState *as, IRIns *ir, Reg left) | 452 | static void asm_tointg(ASMState *as, IRIns *ir, Reg left) |
407 | { | 453 | { |
408 | Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); | 454 | Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); |
409 | Reg dest = ra_dest(as, ir, RSET_GPR); | 455 | Reg dest = ra_dest(as, ir, RSET_GPR); |
456 | #if !LJ_TARGET_MIPSR6 | ||
410 | asm_guard(as, MIPSI_BC1F, 0, 0); | 457 | asm_guard(as, MIPSI_BC1F, 0, 0); |
411 | emit_fgh(as, MIPSI_C_EQ_D, 0, tmp, left); | 458 | emit_fgh(as, MIPSI_C_EQ_D, 0, tmp, left); |
459 | #else | ||
460 | asm_guard(as, MIPSI_BC1EQZ, 0, (tmp&31)); | ||
461 | emit_fgh(as, MIPSI_CMP_EQ_D, tmp, tmp, left); | ||
462 | #endif | ||
412 | emit_fg(as, MIPSI_CVT_D_W, tmp, tmp); | 463 | emit_fg(as, MIPSI_CVT_D_W, tmp, tmp); |
413 | emit_tg(as, MIPSI_MFC1, dest, tmp); | 464 | emit_tg(as, MIPSI_MFC1, dest, tmp); |
414 | emit_fg(as, MIPSI_CVT_W_D, tmp, left); | 465 | emit_fg(as, MIPSI_CVT_W_D, tmp, left); |
@@ -424,15 +475,53 @@ static void asm_tobit(ASMState *as, IRIns *ir) | |||
424 | emit_tg(as, MIPSI_MFC1, dest, tmp); | 475 | emit_tg(as, MIPSI_MFC1, dest, tmp); |
425 | emit_fgh(as, MIPSI_ADD_D, tmp, left, right); | 476 | emit_fgh(as, MIPSI_ADD_D, tmp, left, right); |
426 | } | 477 | } |
478 | #elif LJ_64 /* && LJ_SOFTFP */ | ||
479 | static void asm_tointg(ASMState *as, IRIns *ir, Reg r) | ||
480 | { | ||
481 | /* The modified regs must match with the *.dasc implementation. */ | ||
482 | RegSet drop = RID2RSET(REGARG_FIRSTGPR)|RID2RSET(RID_RET)|RID2RSET(RID_RET+1)| | ||
483 | RID2RSET(RID_R1)|RID2RSET(RID_R12); | ||
484 | if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); | ||
485 | ra_evictset(as, drop); | ||
486 | /* Return values are in RID_RET (converted value) and RID_RET+1 (status). */ | ||
487 | ra_destreg(as, ir, RID_RET); | ||
488 | asm_guard(as, MIPSI_BNE, RID_RET+1, RID_ZERO); | ||
489 | emit_call(as, (void *)lj_ir_callinfo[IRCALL_lj_vm_tointg].func, 0); | ||
490 | if (r == RID_NONE) | ||
491 | ra_leftov(as, REGARG_FIRSTGPR, ir->op1); | ||
492 | else if (r != REGARG_FIRSTGPR) | ||
493 | emit_move(as, REGARG_FIRSTGPR, r); | ||
494 | } | ||
495 | |||
496 | static void asm_tobit(ASMState *as, IRIns *ir) | ||
497 | { | ||
498 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
499 | emit_dta(as, MIPSI_SLL, dest, dest, 0); | ||
500 | asm_callid(as, ir, IRCALL_lj_vm_tobit); | ||
501 | } | ||
502 | #endif | ||
427 | 503 | ||
428 | static void asm_conv(ASMState *as, IRIns *ir) | 504 | static void asm_conv(ASMState *as, IRIns *ir) |
429 | { | 505 | { |
430 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); | 506 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); |
507 | #if !LJ_SOFTFP32 | ||
431 | int stfp = (st == IRT_NUM || st == IRT_FLOAT); | 508 | int stfp = (st == IRT_NUM || st == IRT_FLOAT); |
509 | #endif | ||
510 | #if LJ_64 | ||
511 | int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64); | ||
512 | #endif | ||
432 | IRRef lref = ir->op1; | 513 | IRRef lref = ir->op1; |
433 | lua_assert(irt_type(ir->t) != st); | 514 | #if LJ_32 |
434 | lua_assert(!(irt_isint64(ir->t) || | 515 | lua_assert(!(irt_isint64(ir->t) || |
435 | (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ | 516 | (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ |
517 | #endif | ||
518 | #if LJ_SOFTFP32 | ||
519 | /* FP conversions are handled by SPLIT. */ | ||
520 | lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); | ||
521 | /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ | ||
522 | #else | ||
523 | lua_assert(irt_type(ir->t) != st); | ||
524 | #if !LJ_SOFTFP | ||
436 | if (irt_isfp(ir->t)) { | 525 | if (irt_isfp(ir->t)) { |
437 | Reg dest = ra_dest(as, ir, RSET_FPR); | 526 | Reg dest = ra_dest(as, ir, RSET_FPR); |
438 | if (stfp) { /* FP to FP conversion. */ | 527 | if (stfp) { /* FP to FP conversion. */ |
@@ -448,16 +537,44 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
448 | emit_fgh(as, MIPSI_ADD_D, dest, dest, tmp); | 537 | emit_fgh(as, MIPSI_ADD_D, dest, dest, tmp); |
449 | emit_fg(as, MIPSI_CVT_D_W, dest, dest); | 538 | emit_fg(as, MIPSI_CVT_D_W, dest, dest); |
450 | emit_lsptr(as, MIPSI_LDC1, (tmp & 31), | 539 | emit_lsptr(as, MIPSI_LDC1, (tmp & 31), |
451 | (void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)), | 540 | (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR); |
452 | RSET_GPR); | ||
453 | emit_tg(as, MIPSI_MTC1, RID_TMP, dest); | 541 | emit_tg(as, MIPSI_MTC1, RID_TMP, dest); |
454 | emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left); | 542 | emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left); |
455 | emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); | 543 | emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); |
544 | #if LJ_64 | ||
545 | } else if(st == IRT_U64) { /* U64 to FP conversion. */ | ||
546 | /* if (x >= 1u<<63) y = (double)(int64_t)(x&(1u<<63)-1) + pow(2.0, 63) */ | ||
547 | Reg left = ra_alloc1(as, lref, RSET_GPR); | ||
548 | Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest)); | ||
549 | MCLabel l_end = emit_label(as); | ||
550 | if (irt_isfloat(ir->t)) { | ||
551 | emit_fgh(as, MIPSI_ADD_S, dest, dest, tmp); | ||
552 | emit_lsptr(as, MIPSI_LWC1, (tmp & 31), (void *)&as->J->k32[LJ_K32_2P63], | ||
553 | rset_exclude(RSET_GPR, left)); | ||
554 | emit_fg(as, MIPSI_CVT_S_L, dest, dest); | ||
555 | } else { | ||
556 | emit_fgh(as, MIPSI_ADD_D, dest, dest, tmp); | ||
557 | emit_lsptr(as, MIPSI_LDC1, (tmp & 31), (void *)&as->J->k64[LJ_K64_2P63], | ||
558 | rset_exclude(RSET_GPR, left)); | ||
559 | emit_fg(as, MIPSI_CVT_D_L, dest, dest); | ||
560 | } | ||
561 | emit_branch(as, MIPSI_BGEZ, left, RID_ZERO, l_end); | ||
562 | emit_tg(as, MIPSI_DMTC1, RID_TMP, dest); | ||
563 | emit_tsml(as, MIPSI_DEXTM, RID_TMP, left, 30, 0); | ||
564 | #endif | ||
456 | } else { /* Integer to FP conversion. */ | 565 | } else { /* Integer to FP conversion. */ |
457 | Reg left = ra_alloc1(as, lref, RSET_GPR); | 566 | Reg left = ra_alloc1(as, lref, RSET_GPR); |
567 | #if LJ_32 | ||
458 | emit_fg(as, irt_isfloat(ir->t) ? MIPSI_CVT_S_W : MIPSI_CVT_D_W, | 568 | emit_fg(as, irt_isfloat(ir->t) ? MIPSI_CVT_S_W : MIPSI_CVT_D_W, |
459 | dest, dest); | 569 | dest, dest); |
460 | emit_tg(as, MIPSI_MTC1, left, dest); | 570 | emit_tg(as, MIPSI_MTC1, left, dest); |
571 | #else | ||
572 | MIPSIns mi = irt_isfloat(ir->t) ? | ||
573 | (st64 ? MIPSI_CVT_S_L : MIPSI_CVT_S_W) : | ||
574 | (st64 ? MIPSI_CVT_D_L : MIPSI_CVT_D_W); | ||
575 | emit_fg(as, mi, dest, dest); | ||
576 | emit_tg(as, st64 ? MIPSI_DMTC1 : MIPSI_MTC1, left, dest); | ||
577 | #endif | ||
461 | } | 578 | } |
462 | } else if (stfp) { /* FP to integer conversion. */ | 579 | } else if (stfp) { /* FP to integer conversion. */ |
463 | if (irt_isguard(ir->t)) { | 580 | if (irt_isguard(ir->t)) { |
@@ -468,7 +585,7 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
468 | Reg dest = ra_dest(as, ir, RSET_GPR); | 585 | Reg dest = ra_dest(as, ir, RSET_GPR); |
469 | Reg left = ra_alloc1(as, lref, RSET_FPR); | 586 | Reg left = ra_alloc1(as, lref, RSET_FPR); |
470 | Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); | 587 | Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); |
471 | if (irt_isu32(ir->t)) { | 588 | if (irt_isu32(ir->t)) { /* FP to U32 conversion. */ |
472 | /* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */ | 589 | /* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */ |
473 | emit_dst(as, MIPSI_XOR, dest, dest, RID_TMP); | 590 | emit_dst(as, MIPSI_XOR, dest, dest, RID_TMP); |
474 | emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); | 591 | emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); |
@@ -479,25 +596,111 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
479 | tmp, left, tmp); | 596 | tmp, left, tmp); |
480 | if (st == IRT_FLOAT) | 597 | if (st == IRT_FLOAT) |
481 | emit_lsptr(as, MIPSI_LWC1, (tmp & 31), | 598 | emit_lsptr(as, MIPSI_LWC1, (tmp & 31), |
482 | (void *)lj_ir_k64_find(as->J, U64x(4f000000,4f000000)), | 599 | (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); |
483 | RSET_GPR); | ||
484 | else | 600 | else |
485 | emit_lsptr(as, MIPSI_LDC1, (tmp & 31), | 601 | emit_lsptr(as, MIPSI_LDC1, (tmp & 31), |
486 | (void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)), | 602 | (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR); |
487 | RSET_GPR); | 603 | #if LJ_64 |
604 | } else if (irt_isu64(ir->t)) { /* FP to U64 conversion. */ | ||
605 | MCLabel l_end; | ||
606 | emit_tg(as, MIPSI_DMFC1, dest, tmp); | ||
607 | l_end = emit_label(as); | ||
608 | /* For inputs >= 2^63 add -2^64 and convert again. */ | ||
609 | if (st == IRT_NUM) { | ||
610 | emit_fg(as, MIPSI_TRUNC_L_D, tmp, tmp); | ||
611 | emit_fgh(as, MIPSI_ADD_D, tmp, left, tmp); | ||
612 | emit_lsptr(as, MIPSI_LDC1, (tmp & 31), | ||
613 | (void *)&as->J->k64[LJ_K64_M2P64], | ||
614 | rset_exclude(RSET_GPR, dest)); | ||
615 | emit_fg(as, MIPSI_TRUNC_L_D, tmp, left); /* Delay slot. */ | ||
616 | #if !LJ_TARGET_MIPSR6 | ||
617 | emit_branch(as, MIPSI_BC1T, 0, 0, l_end); | ||
618 | emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp); | ||
619 | #else | ||
620 | emit_branch(as, MIPSI_BC1NEZ, 0, (left&31), l_end); | ||
621 | emit_fgh(as, MIPSI_CMP_LT_D, left, left, tmp); | ||
622 | #endif | ||
623 | emit_lsptr(as, MIPSI_LDC1, (tmp & 31), | ||
624 | (void *)&as->J->k64[LJ_K64_2P63], | ||
625 | rset_exclude(RSET_GPR, dest)); | ||
626 | } else { | ||
627 | emit_fg(as, MIPSI_TRUNC_L_S, tmp, tmp); | ||
628 | emit_fgh(as, MIPSI_ADD_S, tmp, left, tmp); | ||
629 | emit_lsptr(as, MIPSI_LWC1, (tmp & 31), | ||
630 | (void *)&as->J->k32[LJ_K32_M2P64], | ||
631 | rset_exclude(RSET_GPR, dest)); | ||
632 | emit_fg(as, MIPSI_TRUNC_L_S, tmp, left); /* Delay slot. */ | ||
633 | #if !LJ_TARGET_MIPSR6 | ||
634 | emit_branch(as, MIPSI_BC1T, 0, 0, l_end); | ||
635 | emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp); | ||
636 | #else | ||
637 | emit_branch(as, MIPSI_BC1NEZ, 0, (left&31), l_end); | ||
638 | emit_fgh(as, MIPSI_CMP_LT_S, left, left, tmp); | ||
639 | #endif | ||
640 | emit_lsptr(as, MIPSI_LWC1, (tmp & 31), | ||
641 | (void *)&as->J->k32[LJ_K32_2P63], | ||
642 | rset_exclude(RSET_GPR, dest)); | ||
643 | } | ||
644 | #endif | ||
488 | } else { | 645 | } else { |
646 | #if LJ_32 | ||
489 | emit_tg(as, MIPSI_MFC1, dest, tmp); | 647 | emit_tg(as, MIPSI_MFC1, dest, tmp); |
490 | emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D, | 648 | emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D, |
491 | tmp, left); | 649 | tmp, left); |
650 | #else | ||
651 | MIPSIns mi = irt_is64(ir->t) ? | ||
652 | (st == IRT_NUM ? MIPSI_TRUNC_L_D : MIPSI_TRUNC_L_S) : | ||
653 | (st == IRT_NUM ? MIPSI_TRUNC_W_D : MIPSI_TRUNC_W_S); | ||
654 | emit_tg(as, irt_is64(ir->t) ? MIPSI_DMFC1 : MIPSI_MFC1, dest, left); | ||
655 | emit_fg(as, mi, left, left); | ||
656 | #endif | ||
492 | } | 657 | } |
493 | } | 658 | } |
494 | } else { | 659 | } else |
660 | #else | ||
661 | if (irt_isfp(ir->t)) { | ||
662 | #if LJ_64 && LJ_HASFFI | ||
663 | if (stfp) { /* FP to FP conversion. */ | ||
664 | asm_callid(as, ir, irt_isnum(ir->t) ? IRCALL_softfp_f2d : | ||
665 | IRCALL_softfp_d2f); | ||
666 | } else { /* Integer to FP conversion. */ | ||
667 | IRCallID cid = ((IRT_IS64 >> st) & 1) ? | ||
668 | (irt_isnum(ir->t) ? | ||
669 | (st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d) : | ||
670 | (st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f)) : | ||
671 | (irt_isnum(ir->t) ? | ||
672 | (st == IRT_INT ? IRCALL_softfp_i2d : IRCALL_softfp_ui2d) : | ||
673 | (st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f)); | ||
674 | asm_callid(as, ir, cid); | ||
675 | } | ||
676 | #else | ||
677 | asm_callid(as, ir, IRCALL_softfp_i2d); | ||
678 | #endif | ||
679 | } else if (stfp) { /* FP to integer conversion. */ | ||
680 | if (irt_isguard(ir->t)) { | ||
681 | /* Checked conversions are only supported from number to int. */ | ||
682 | lua_assert(irt_isint(ir->t) && st == IRT_NUM); | ||
683 | asm_tointg(as, ir, RID_NONE); | ||
684 | } else { | ||
685 | IRCallID cid = irt_is64(ir->t) ? | ||
686 | ((st == IRT_NUM) ? | ||
687 | (irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul) : | ||
688 | (irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul)) : | ||
689 | ((st == IRT_NUM) ? | ||
690 | (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) : | ||
691 | (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui)); | ||
692 | asm_callid(as, ir, cid); | ||
693 | } | ||
694 | } else | ||
695 | #endif | ||
696 | #endif | ||
697 | { | ||
495 | Reg dest = ra_dest(as, ir, RSET_GPR); | 698 | Reg dest = ra_dest(as, ir, RSET_GPR); |
496 | if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ | 699 | if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ |
497 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); | 700 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); |
498 | lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); | 701 | lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); |
499 | if ((ir->op2 & IRCONV_SEXT)) { | 702 | if ((ir->op2 & IRCONV_SEXT)) { |
500 | if ((as->flags & JIT_F_MIPS32R2)) { | 703 | if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) { |
501 | emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left); | 704 | emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left); |
502 | } else { | 705 | } else { |
503 | uint32_t shift = st == IRT_I8 ? 24 : 16; | 706 | uint32_t shift = st == IRT_I8 ? 24 : 16; |
@@ -509,49 +712,108 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
509 | (int32_t)(st == IRT_U8 ? 0xff : 0xffff)); | 712 | (int32_t)(st == IRT_U8 ? 0xff : 0xffff)); |
510 | } | 713 | } |
511 | } else { /* 32/64 bit integer conversions. */ | 714 | } else { /* 32/64 bit integer conversions. */ |
715 | #if LJ_32 | ||
512 | /* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */ | 716 | /* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */ |
513 | ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ | 717 | ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ |
718 | #else | ||
719 | if (irt_is64(ir->t)) { | ||
720 | if (st64) { | ||
721 | /* 64/64 bit no-op (cast)*/ | ||
722 | ra_leftov(as, dest, lref); | ||
723 | } else { | ||
724 | Reg left = ra_alloc1(as, lref, RSET_GPR); | ||
725 | if ((ir->op2 & IRCONV_SEXT)) { /* 32 to 64 bit sign extension. */ | ||
726 | emit_dta(as, MIPSI_SLL, dest, left, 0); | ||
727 | } else { /* 32 to 64 bit zero extension. */ | ||
728 | emit_tsml(as, MIPSI_DEXT, dest, left, 31, 0); | ||
729 | } | ||
730 | } | ||
731 | } else { | ||
732 | if (st64) { | ||
733 | /* This is either a 32 bit reg/reg mov which zeroes the hiword | ||
734 | ** or a load of the loword from a 64 bit address. | ||
735 | */ | ||
736 | Reg left = ra_alloc1(as, lref, RSET_GPR); | ||
737 | emit_tsml(as, MIPSI_DEXT, dest, left, 31, 0); | ||
738 | } else { /* 32/32 bit no-op (cast). */ | ||
739 | /* Do nothing, but may need to move regs. */ | ||
740 | ra_leftov(as, dest, lref); | ||
741 | } | ||
742 | } | ||
743 | #endif | ||
514 | } | 744 | } |
515 | } | 745 | } |
516 | } | 746 | } |
517 | 747 | ||
518 | #if LJ_HASFFI | ||
519 | static void asm_conv64(ASMState *as, IRIns *ir) | ||
520 | { | ||
521 | IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); | ||
522 | IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); | ||
523 | IRCallID id; | ||
524 | const CCallInfo *ci; | ||
525 | IRRef args[2]; | ||
526 | args[LJ_BE?0:1] = ir->op1; | ||
527 | args[LJ_BE?1:0] = (ir-1)->op1; | ||
528 | if (st == IRT_NUM || st == IRT_FLOAT) { | ||
529 | id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64); | ||
530 | ir--; | ||
531 | } else { | ||
532 | id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64); | ||
533 | } | ||
534 | ci = &lj_ir_callinfo[id]; | ||
535 | asm_setupresult(as, ir, ci); | ||
536 | asm_gencall(as, ci, args); | ||
537 | } | ||
538 | #endif | ||
539 | |||
540 | static void asm_strto(ASMState *as, IRIns *ir) | 748 | static void asm_strto(ASMState *as, IRIns *ir) |
541 | { | 749 | { |
542 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; | 750 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; |
543 | IRRef args[2]; | 751 | IRRef args[2]; |
752 | int32_t ofs = 0; | ||
753 | #if LJ_SOFTFP32 | ||
754 | ra_evictset(as, RSET_SCRATCH); | ||
755 | if (ra_used(ir)) { | ||
756 | if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) && | ||
757 | (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) { | ||
758 | int i; | ||
759 | for (i = 0; i < 2; i++) { | ||
760 | Reg r = (ir+i)->r; | ||
761 | if (ra_hasreg(r)) { | ||
762 | ra_free(as, r); | ||
763 | ra_modified(as, r); | ||
764 | emit_spload(as, ir+i, r, sps_scale((ir+i)->s)); | ||
765 | } | ||
766 | } | ||
767 | ofs = sps_scale(ir->s & ~1); | ||
768 | } else { | ||
769 | Reg rhi = ra_dest(as, ir+1, RSET_GPR); | ||
770 | Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi)); | ||
771 | emit_tsi(as, MIPSI_LW, rhi, RID_SP, ofs+(LJ_BE?0:4)); | ||
772 | emit_tsi(as, MIPSI_LW, rlo, RID_SP, ofs+(LJ_BE?4:0)); | ||
773 | } | ||
774 | } | ||
775 | #else | ||
544 | RegSet drop = RSET_SCRATCH; | 776 | RegSet drop = RSET_SCRATCH; |
545 | if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ | 777 | if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ |
546 | ra_evictset(as, drop); | 778 | ra_evictset(as, drop); |
779 | ofs = sps_scale(ir->s); | ||
780 | #endif | ||
547 | asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO); /* Test return status. */ | 781 | asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO); /* Test return status. */ |
548 | args[0] = ir->op1; /* GCstr *str */ | 782 | args[0] = ir->op1; /* GCstr *str */ |
549 | args[1] = ASMREF_TMP1; /* TValue *n */ | 783 | args[1] = ASMREF_TMP1; /* TValue *n */ |
550 | asm_gencall(as, ci, args); | 784 | asm_gencall(as, ci, args); |
551 | /* Store the result to the spill slot or temp slots. */ | 785 | /* Store the result to the spill slot or temp slots. */ |
552 | emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), | 786 | emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), |
553 | RID_SP, sps_scale(ir->s)); | 787 | RID_SP, ofs); |
788 | } | ||
789 | |||
790 | /* -- Memory references --------------------------------------------------- */ | ||
791 | |||
792 | #if LJ_64 | ||
793 | /* Store tagged value for ref at base+ofs. */ | ||
794 | static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) | ||
795 | { | ||
796 | RegSet allow = rset_exclude(RSET_GPR, base); | ||
797 | IRIns *ir = IR(ref); | ||
798 | lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); | ||
799 | if (irref_isk(ref)) { | ||
800 | TValue k; | ||
801 | lj_ir_kvalue(as->J->L, &k, ir); | ||
802 | emit_tsi(as, MIPSI_SD, ra_allock(as, (int64_t)k.u64, allow), base, ofs); | ||
803 | } else { | ||
804 | Reg src = ra_alloc1(as, ref, allow); | ||
805 | Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, | ||
806 | rset_exclude(allow, src)); | ||
807 | emit_tsi(as, MIPSI_SD, RID_TMP, base, ofs); | ||
808 | if (irt_isinteger(ir->t)) { | ||
809 | emit_dst(as, MIPSI_DADDU, RID_TMP, RID_TMP, type); | ||
810 | emit_tsml(as, MIPSI_DEXT, RID_TMP, src, 31, 0); | ||
811 | } else { | ||
812 | emit_dst(as, MIPSI_DADDU, RID_TMP, src, type); | ||
813 | } | ||
814 | } | ||
554 | } | 815 | } |
816 | #endif | ||
555 | 817 | ||
556 | /* Get pointer to TValue. */ | 818 | /* Get pointer to TValue. */ |
557 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) | 819 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) |
@@ -559,44 +821,32 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) | |||
559 | IRIns *ir = IR(ref); | 821 | IRIns *ir = IR(ref); |
560 | if (irt_isnum(ir->t)) { | 822 | if (irt_isnum(ir->t)) { |
561 | if (irref_isk(ref)) /* Use the number constant itself as a TValue. */ | 823 | if (irref_isk(ref)) /* Use the number constant itself as a TValue. */ |
562 | ra_allockreg(as, i32ptr(ir_knum(ir)), dest); | 824 | ra_allockreg(as, igcptr(ir_knum(ir)), dest); |
563 | else /* Otherwise force a spill and use the spill slot. */ | 825 | else /* Otherwise force a spill and use the spill slot. */ |
564 | emit_tsi(as, MIPSI_ADDIU, dest, RID_SP, ra_spill(as, ir)); | 826 | emit_tsi(as, MIPSI_AADDIU, dest, RID_SP, ra_spill(as, ir)); |
565 | } else { | 827 | } else { |
566 | /* Otherwise use g->tmptv to hold the TValue. */ | 828 | /* Otherwise use g->tmptv to hold the TValue. */ |
829 | #if LJ_32 | ||
567 | RegSet allow = rset_exclude(RSET_GPR, dest); | 830 | RegSet allow = rset_exclude(RSET_GPR, dest); |
568 | Reg type; | 831 | Reg type; |
569 | emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, offsetof(global_State, tmptv)-32768); | 832 | emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, (int32_t)(offsetof(global_State, tmptv)-32768)); |
570 | if (!irt_ispri(ir->t)) { | 833 | if (!irt_ispri(ir->t)) { |
571 | Reg src = ra_alloc1(as, ref, allow); | 834 | Reg src = ra_alloc1(as, ref, allow); |
572 | emit_setgl(as, src, tmptv.gcr); | 835 | emit_setgl(as, src, tmptv.gcr); |
573 | } | 836 | } |
574 | type = ra_allock(as, irt_toitype(ir->t), allow); | 837 | if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) |
838 | type = ra_alloc1(as, ref+1, allow); | ||
839 | else | ||
840 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | ||
575 | emit_setgl(as, type, tmptv.it); | 841 | emit_setgl(as, type, tmptv.it); |
842 | #else | ||
843 | asm_tvstore64(as, dest, 0, ref); | ||
844 | emit_tsi(as, MIPSI_DADDIU, dest, RID_JGL, | ||
845 | (int32_t)(offsetof(global_State, tmptv)-32768)); | ||
846 | #endif | ||
576 | } | 847 | } |
577 | } | 848 | } |
578 | 849 | ||
579 | static void asm_tostr(ASMState *as, IRIns *ir) | ||
580 | { | ||
581 | IRRef args[2]; | ||
582 | args[0] = ASMREF_L; | ||
583 | as->gcsteps++; | ||
584 | if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) { | ||
585 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; | ||
586 | args[1] = ASMREF_TMP1; /* const lua_Number * */ | ||
587 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
588 | asm_gencall(as, ci, args); | ||
589 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1); | ||
590 | } else { | ||
591 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; | ||
592 | args[1] = ir->op1; /* int32_t k */ | ||
593 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
594 | asm_gencall(as, ci, args); | ||
595 | } | ||
596 | } | ||
597 | |||
598 | /* -- Memory references --------------------------------------------------- */ | ||
599 | |||
600 | static void asm_aref(ASMState *as, IRIns *ir) | 850 | static void asm_aref(ASMState *as, IRIns *ir) |
601 | { | 851 | { |
602 | Reg dest = ra_dest(as, ir, RSET_GPR); | 852 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -608,14 +858,18 @@ static void asm_aref(ASMState *as, IRIns *ir) | |||
608 | ofs += 8*IR(ir->op2)->i; | 858 | ofs += 8*IR(ir->op2)->i; |
609 | if (checki16(ofs)) { | 859 | if (checki16(ofs)) { |
610 | base = ra_alloc1(as, refa, RSET_GPR); | 860 | base = ra_alloc1(as, refa, RSET_GPR); |
611 | emit_tsi(as, MIPSI_ADDIU, dest, base, ofs); | 861 | emit_tsi(as, MIPSI_AADDIU, dest, base, ofs); |
612 | return; | 862 | return; |
613 | } | 863 | } |
614 | } | 864 | } |
615 | base = ra_alloc1(as, ir->op1, RSET_GPR); | 865 | base = ra_alloc1(as, ir->op1, RSET_GPR); |
616 | idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); | 866 | idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); |
617 | emit_dst(as, MIPSI_ADDU, dest, RID_TMP, base); | 867 | #if !LJ_TARGET_MIPSR6 |
868 | emit_dst(as, MIPSI_AADDU, dest, RID_TMP, base); | ||
618 | emit_dta(as, MIPSI_SLL, RID_TMP, idx, 3); | 869 | emit_dta(as, MIPSI_SLL, RID_TMP, idx, 3); |
870 | #else | ||
871 | emit_dst(as, MIPSI_ALSA | MIPSF_A(3-1), dest, idx, base); | ||
872 | #endif | ||
619 | } | 873 | } |
620 | 874 | ||
621 | /* Inlined hash lookup. Specialized for key type and for const keys. | 875 | /* Inlined hash lookup. Specialized for key type and for const keys. |
@@ -626,51 +880,109 @@ static void asm_aref(ASMState *as, IRIns *ir) | |||
626 | ** } while ((n = nextnode(n))); | 880 | ** } while ((n = nextnode(n))); |
627 | ** return niltv(L); | 881 | ** return niltv(L); |
628 | */ | 882 | */ |
629 | static void asm_href(ASMState *as, IRIns *ir) | 883 | static void asm_href(ASMState *as, IRIns *ir, IROp merge) |
630 | { | 884 | { |
631 | RegSet allow = RSET_GPR; | 885 | RegSet allow = RSET_GPR; |
632 | int destused = ra_used(ir); | 886 | int destused = ra_used(ir); |
633 | Reg dest = ra_dest(as, ir, allow); | 887 | Reg dest = ra_dest(as, ir, allow); |
634 | Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); | 888 | Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); |
635 | Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2; | 889 | Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2; |
890 | #if LJ_64 | ||
891 | Reg cmp64 = RID_NONE; | ||
892 | #endif | ||
636 | IRRef refkey = ir->op2; | 893 | IRRef refkey = ir->op2; |
637 | IRIns *irkey = IR(refkey); | 894 | IRIns *irkey = IR(refkey); |
895 | int isk = irref_isk(refkey); | ||
638 | IRType1 kt = irkey->t; | 896 | IRType1 kt = irkey->t; |
639 | uint32_t khash; | 897 | uint32_t khash; |
640 | MCLabel l_end, l_loop, l_next; | 898 | MCLabel l_end, l_loop, l_next; |
641 | 899 | ||
642 | rset_clear(allow, tab); | 900 | rset_clear(allow, tab); |
643 | if (irt_isnum(kt)) { | 901 | #if LJ_SOFTFP32 |
902 | if (!isk) { | ||
903 | key = ra_alloc1(as, refkey, allow); | ||
904 | rset_clear(allow, key); | ||
905 | if (irkey[1].o == IR_HIOP) { | ||
906 | if (ra_hasreg((irkey+1)->r)) { | ||
907 | type = tmpnum = (irkey+1)->r; | ||
908 | tmp1 = ra_scratch(as, allow); | ||
909 | rset_clear(allow, tmp1); | ||
910 | ra_noweak(as, tmpnum); | ||
911 | } else { | ||
912 | type = tmpnum = ra_allocref(as, refkey+1, allow); | ||
913 | } | ||
914 | rset_clear(allow, tmpnum); | ||
915 | } else { | ||
916 | type = ra_allock(as, (int32_t)irt_toitype(irkey->t), allow); | ||
917 | rset_clear(allow, type); | ||
918 | } | ||
919 | } | ||
920 | #else | ||
921 | if (!LJ_SOFTFP && irt_isnum(kt)) { | ||
644 | key = ra_alloc1(as, refkey, RSET_FPR); | 922 | key = ra_alloc1(as, refkey, RSET_FPR); |
645 | tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); | 923 | tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); |
646 | } else if (!irt_ispri(kt)) { | 924 | } else if (!irt_ispri(kt)) { |
647 | key = ra_alloc1(as, refkey, allow); | 925 | key = ra_alloc1(as, refkey, allow); |
648 | rset_clear(allow, key); | 926 | rset_clear(allow, key); |
649 | type = ra_allock(as, irt_toitype(irkey->t), allow); | 927 | #if LJ_32 |
928 | type = ra_allock(as, (int32_t)irt_toitype(irkey->t), allow); | ||
650 | rset_clear(allow, type); | 929 | rset_clear(allow, type); |
930 | #endif | ||
651 | } | 931 | } |
932 | #endif | ||
652 | tmp2 = ra_scratch(as, allow); | 933 | tmp2 = ra_scratch(as, allow); |
653 | rset_clear(allow, tmp2); | 934 | rset_clear(allow, tmp2); |
935 | #if LJ_64 | ||
936 | if (LJ_SOFTFP || !irt_isnum(kt)) { | ||
937 | /* Allocate cmp64 register used for 64-bit comparisons */ | ||
938 | if (LJ_SOFTFP && irt_isnum(kt)) { | ||
939 | cmp64 = key; | ||
940 | } else if (!isk && irt_isaddr(kt)) { | ||
941 | cmp64 = tmp2; | ||
942 | } else { | ||
943 | int64_t k; | ||
944 | if (isk && irt_isaddr(kt)) { | ||
945 | k = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; | ||
946 | } else { | ||
947 | lua_assert(irt_ispri(kt) && !irt_isnil(kt)); | ||
948 | k = ~((int64_t)~irt_toitype(ir->t) << 47); | ||
949 | } | ||
950 | cmp64 = ra_allock(as, k, allow); | ||
951 | rset_clear(allow, cmp64); | ||
952 | } | ||
953 | } | ||
954 | #endif | ||
654 | 955 | ||
655 | /* Key not found in chain: load niltv. */ | 956 | /* Key not found in chain: jump to exit (if merged) or load niltv. */ |
656 | l_end = emit_label(as); | 957 | l_end = emit_label(as); |
657 | if (destused) | 958 | as->invmcp = NULL; |
959 | if (merge == IR_NE) | ||
960 | asm_guard(as, MIPSI_B, RID_ZERO, RID_ZERO); | ||
961 | else if (destused) | ||
658 | emit_loada(as, dest, niltvg(J2G(as->J))); | 962 | emit_loada(as, dest, niltvg(J2G(as->J))); |
659 | else | ||
660 | *--as->mcp = MIPSI_NOP; | ||
661 | /* Follow hash chain until the end. */ | 963 | /* Follow hash chain until the end. */ |
662 | emit_move(as, dest, tmp1); | 964 | emit_move(as, dest, tmp1); |
663 | l_loop = --as->mcp; | 965 | l_loop = --as->mcp; |
664 | emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, next)); | 966 | emit_tsi(as, MIPSI_AL, tmp1, dest, (int32_t)offsetof(Node, next)); |
665 | l_next = emit_label(as); | 967 | l_next = emit_label(as); |
666 | 968 | ||
667 | /* Type and value comparison. */ | 969 | /* Type and value comparison. */ |
668 | if (irt_isnum(kt)) { | 970 | if (merge == IR_EQ) { /* Must match asm_guard(). */ |
971 | emit_ti(as, MIPSI_LI, RID_TMP, as->snapno); | ||
972 | l_end = asm_exitstub_addr(as); | ||
973 | } | ||
974 | if (!LJ_SOFTFP && irt_isnum(kt)) { | ||
975 | #if !LJ_TARGET_MIPSR6 | ||
669 | emit_branch(as, MIPSI_BC1T, 0, 0, l_end); | 976 | emit_branch(as, MIPSI_BC1T, 0, 0, l_end); |
670 | emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key); | 977 | emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key); |
671 | emit_tg(as, MIPSI_MFC1, tmp1, key+1); | 978 | #else |
979 | emit_branch(as, MIPSI_BC1NEZ, 0, (tmpnum&31), l_end); | ||
980 | emit_fgh(as, MIPSI_CMP_EQ_D, tmpnum, tmpnum, key); | ||
981 | #endif | ||
982 | *--as->mcp = MIPSI_NOP; /* Avoid NaN comparison overhead. */ | ||
672 | emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next); | 983 | emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next); |
673 | emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM); | 984 | emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM); |
985 | #if LJ_32 | ||
674 | emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n)); | 986 | emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n)); |
675 | } else { | 987 | } else { |
676 | if (irt_ispri(kt)) { | 988 | if (irt_ispri(kt)) { |
@@ -683,24 +995,39 @@ static void asm_href(ASMState *as, IRIns *ir) | |||
683 | } | 995 | } |
684 | emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.it)); | 996 | emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.it)); |
685 | *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu); | 997 | *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu); |
998 | #else | ||
999 | emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 15); | ||
1000 | emit_tg(as, MIPSI_DMTC1, tmp1, tmpnum); | ||
1001 | emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); | ||
1002 | } else { | ||
1003 | emit_branch(as, MIPSI_BEQ, tmp1, cmp64, l_end); | ||
1004 | emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); | ||
1005 | } | ||
1006 | *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu); | ||
1007 | if (!isk && irt_isaddr(kt)) { | ||
1008 | type = ra_allock(as, (int64_t)irt_toitype(kt) << 47, allow); | ||
1009 | emit_dst(as, MIPSI_DADDU, tmp2, key, type); | ||
1010 | rset_clear(allow, type); | ||
1011 | } | ||
1012 | #endif | ||
686 | 1013 | ||
687 | /* Load main position relative to tab->node into dest. */ | 1014 | /* Load main position relative to tab->node into dest. */ |
688 | khash = irref_isk(refkey) ? ir_khash(irkey) : 1; | 1015 | khash = isk ? ir_khash(irkey) : 1; |
689 | if (khash == 0) { | 1016 | if (khash == 0) { |
690 | emit_tsi(as, MIPSI_LW, dest, tab, (int32_t)offsetof(GCtab, node)); | 1017 | emit_tsi(as, MIPSI_AL, dest, tab, (int32_t)offsetof(GCtab, node)); |
691 | } else { | 1018 | } else { |
692 | Reg tmphash = tmp1; | 1019 | Reg tmphash = tmp1; |
693 | if (irref_isk(refkey)) | 1020 | if (isk) |
694 | tmphash = ra_allock(as, khash, allow); | 1021 | tmphash = ra_allock(as, khash, allow); |
695 | emit_dst(as, MIPSI_ADDU, dest, dest, tmp1); | 1022 | emit_dst(as, MIPSI_AADDU, dest, dest, tmp1); |
696 | lua_assert(sizeof(Node) == 24); | 1023 | lua_assert(sizeof(Node) == 24); |
697 | emit_dst(as, MIPSI_SUBU, tmp1, tmp2, tmp1); | 1024 | emit_dst(as, MIPSI_SUBU, tmp1, tmp2, tmp1); |
698 | emit_dta(as, MIPSI_SLL, tmp1, tmp1, 3); | 1025 | emit_dta(as, MIPSI_SLL, tmp1, tmp1, 3); |
699 | emit_dta(as, MIPSI_SLL, tmp2, tmp1, 5); | 1026 | emit_dta(as, MIPSI_SLL, tmp2, tmp1, 5); |
700 | emit_dst(as, MIPSI_AND, tmp1, tmp2, tmphash); | 1027 | emit_dst(as, MIPSI_AND, tmp1, tmp2, tmphash); |
701 | emit_tsi(as, MIPSI_LW, dest, tab, (int32_t)offsetof(GCtab, node)); | 1028 | emit_tsi(as, MIPSI_AL, dest, tab, (int32_t)offsetof(GCtab, node)); |
702 | emit_tsi(as, MIPSI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); | 1029 | emit_tsi(as, MIPSI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); |
703 | if (irref_isk(refkey)) { | 1030 | if (isk) { |
704 | /* Nothing to do. */ | 1031 | /* Nothing to do. */ |
705 | } else if (irt_isstr(kt)) { | 1032 | } else if (irt_isstr(kt)) { |
706 | emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, hash)); | 1033 | emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, hash)); |
@@ -710,9 +1037,10 @@ static void asm_href(ASMState *as, IRIns *ir) | |||
710 | emit_dst(as, MIPSI_XOR, tmp1, tmp1, tmp2); | 1037 | emit_dst(as, MIPSI_XOR, tmp1, tmp1, tmp2); |
711 | emit_rotr(as, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&31); | 1038 | emit_rotr(as, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&31); |
712 | emit_dst(as, MIPSI_SUBU, tmp2, tmp2, dest); | 1039 | emit_dst(as, MIPSI_SUBU, tmp2, tmp2, dest); |
713 | if (irt_isnum(kt)) { | 1040 | #if LJ_32 |
1041 | if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) { | ||
714 | emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1); | 1042 | emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1); |
715 | if ((as->flags & JIT_F_MIPS32R2)) { | 1043 | if ((as->flags & JIT_F_MIPSXXR2)) { |
716 | emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31); | 1044 | emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31); |
717 | } else { | 1045 | } else { |
718 | emit_dst(as, MIPSI_OR, dest, dest, tmp1); | 1046 | emit_dst(as, MIPSI_OR, dest, dest, tmp1); |
@@ -720,13 +1048,35 @@ static void asm_href(ASMState *as, IRIns *ir) | |||
720 | emit_dta(as, MIPSI_SRL, dest, tmp1, (-HASH_ROT1)&31); | 1048 | emit_dta(as, MIPSI_SRL, dest, tmp1, (-HASH_ROT1)&31); |
721 | } | 1049 | } |
722 | emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1); | 1050 | emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1); |
1051 | #if LJ_SOFTFP | ||
1052 | emit_ds(as, MIPSI_MOVE, tmp1, type); | ||
1053 | emit_ds(as, MIPSI_MOVE, tmp2, key); | ||
1054 | #else | ||
723 | emit_tg(as, MIPSI_MFC1, tmp2, key); | 1055 | emit_tg(as, MIPSI_MFC1, tmp2, key); |
724 | emit_tg(as, MIPSI_MFC1, tmp1, key+1); | 1056 | emit_tg(as, MIPSI_MFC1, tmp1, key+1); |
1057 | #endif | ||
725 | } else { | 1058 | } else { |
726 | emit_dst(as, MIPSI_XOR, tmp2, key, tmp1); | 1059 | emit_dst(as, MIPSI_XOR, tmp2, key, tmp1); |
727 | emit_rotr(as, dest, tmp1, tmp2, (-HASH_ROT1)&31); | 1060 | emit_rotr(as, dest, tmp1, tmp2, (-HASH_ROT1)&31); |
728 | emit_dst(as, MIPSI_ADDU, tmp1, key, ra_allock(as, HASH_BIAS, allow)); | 1061 | emit_dst(as, MIPSI_ADDU, tmp1, key, ra_allock(as, HASH_BIAS, allow)); |
729 | } | 1062 | } |
1063 | #else | ||
1064 | emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1); | ||
1065 | emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31); | ||
1066 | if (irt_isnum(kt)) { | ||
1067 | emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1); | ||
1068 | emit_dta(as, MIPSI_DSRA32, tmp1, LJ_SOFTFP ? key : tmp1, 0); | ||
1069 | emit_dta(as, MIPSI_SLL, tmp2, LJ_SOFTFP ? key : tmp1, 0); | ||
1070 | #if !LJ_SOFTFP | ||
1071 | emit_tg(as, MIPSI_DMFC1, tmp1, key); | ||
1072 | #endif | ||
1073 | } else { | ||
1074 | checkmclim(as); | ||
1075 | emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 0); | ||
1076 | emit_dta(as, MIPSI_SLL, tmp2, key, 0); | ||
1077 | emit_dst(as, MIPSI_DADDU, tmp1, key, type); | ||
1078 | } | ||
1079 | #endif | ||
730 | } | 1080 | } |
731 | } | 1081 | } |
732 | } | 1082 | } |
@@ -739,17 +1089,24 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
739 | int32_t kofs = ofs + (int32_t)offsetof(Node, key); | 1089 | int32_t kofs = ofs + (int32_t)offsetof(Node, key); |
740 | Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; | 1090 | Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; |
741 | Reg node = ra_alloc1(as, ir->op1, RSET_GPR); | 1091 | Reg node = ra_alloc1(as, ir->op1, RSET_GPR); |
742 | Reg key = RID_NONE, type = RID_TMP, idx = node; | ||
743 | RegSet allow = rset_exclude(RSET_GPR, node); | 1092 | RegSet allow = rset_exclude(RSET_GPR, node); |
1093 | Reg idx = node; | ||
1094 | #if LJ_32 | ||
1095 | Reg key = RID_NONE, type = RID_TMP; | ||
744 | int32_t lo, hi; | 1096 | int32_t lo, hi; |
1097 | #else | ||
1098 | Reg key = ra_scratch(as, allow); | ||
1099 | int64_t k; | ||
1100 | #endif | ||
745 | lua_assert(ofs % sizeof(Node) == 0); | 1101 | lua_assert(ofs % sizeof(Node) == 0); |
746 | if (ofs > 32736) { | 1102 | if (ofs > 32736) { |
747 | idx = dest; | 1103 | idx = dest; |
748 | rset_clear(allow, dest); | 1104 | rset_clear(allow, dest); |
749 | kofs = (int32_t)offsetof(Node, key); | 1105 | kofs = (int32_t)offsetof(Node, key); |
750 | } else if (ra_hasreg(dest)) { | 1106 | } else if (ra_hasreg(dest)) { |
751 | emit_tsi(as, MIPSI_ADDIU, dest, node, ofs); | 1107 | emit_tsi(as, MIPSI_AADDIU, dest, node, ofs); |
752 | } | 1108 | } |
1109 | #if LJ_32 | ||
753 | if (!irt_ispri(irkey->t)) { | 1110 | if (!irt_ispri(irkey->t)) { |
754 | key = ra_scratch(as, allow); | 1111 | key = ra_scratch(as, allow); |
755 | rset_clear(allow, key); | 1112 | rset_clear(allow, key); |
@@ -768,22 +1125,20 @@ nolo: | |||
768 | asm_guard(as, MIPSI_BNE, type, hi ? ra_allock(as, hi, allow) : RID_ZERO); | 1125 | asm_guard(as, MIPSI_BNE, type, hi ? ra_allock(as, hi, allow) : RID_ZERO); |
769 | if (ra_hasreg(key)) emit_tsi(as, MIPSI_LW, key, idx, kofs+(LJ_BE?4:0)); | 1126 | if (ra_hasreg(key)) emit_tsi(as, MIPSI_LW, key, idx, kofs+(LJ_BE?4:0)); |
770 | emit_tsi(as, MIPSI_LW, type, idx, kofs+(LJ_BE?0:4)); | 1127 | emit_tsi(as, MIPSI_LW, type, idx, kofs+(LJ_BE?0:4)); |
771 | if (ofs > 32736) | 1128 | #else |
772 | emit_tsi(as, MIPSI_ADDU, dest, node, ra_allock(as, ofs, allow)); | 1129 | if (irt_ispri(irkey->t)) { |
773 | } | 1130 | lua_assert(!irt_isnil(irkey->t)); |
774 | 1131 | k = ~((int64_t)~irt_toitype(irkey->t) << 47); | |
775 | static void asm_newref(ASMState *as, IRIns *ir) | 1132 | } else if (irt_isnum(irkey->t)) { |
776 | { | 1133 | k = (int64_t)ir_knum(irkey)->u64; |
777 | if (ir->r != RID_SINK) { | 1134 | } else { |
778 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; | 1135 | k = ((int64_t)irt_toitype(irkey->t) << 47) | (int64_t)ir_kgc(irkey); |
779 | IRRef args[3]; | ||
780 | args[0] = ASMREF_L; /* lua_State *L */ | ||
781 | args[1] = ir->op1; /* GCtab *t */ | ||
782 | args[2] = ASMREF_TMP1; /* cTValue *key */ | ||
783 | asm_setupresult(as, ir, ci); /* TValue * */ | ||
784 | asm_gencall(as, ci, args); | ||
785 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2); | ||
786 | } | 1136 | } |
1137 | asm_guard(as, MIPSI_BNE, key, ra_allock(as, k, allow)); | ||
1138 | emit_tsi(as, MIPSI_LD, key, idx, kofs); | ||
1139 | #endif | ||
1140 | if (ofs > 32736) | ||
1141 | emit_tsi(as, MIPSI_AADDU, dest, node, ra_allock(as, ofs, allow)); | ||
787 | } | 1142 | } |
788 | 1143 | ||
789 | static void asm_uref(ASMState *as, IRIns *ir) | 1144 | static void asm_uref(ASMState *as, IRIns *ir) |
@@ -792,19 +1147,19 @@ static void asm_uref(ASMState *as, IRIns *ir) | |||
792 | if (irref_isk(ir->op1)) { | 1147 | if (irref_isk(ir->op1)) { |
793 | GCfunc *fn = ir_kfunc(IR(ir->op1)); | 1148 | GCfunc *fn = ir_kfunc(IR(ir->op1)); |
794 | MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; | 1149 | MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; |
795 | emit_lsptr(as, MIPSI_LW, dest, v, RSET_GPR); | 1150 | emit_lsptr(as, MIPSI_AL, dest, v, RSET_GPR); |
796 | } else { | 1151 | } else { |
797 | Reg uv = ra_scratch(as, RSET_GPR); | 1152 | Reg uv = ra_scratch(as, RSET_GPR); |
798 | Reg func = ra_alloc1(as, ir->op1, RSET_GPR); | 1153 | Reg func = ra_alloc1(as, ir->op1, RSET_GPR); |
799 | if (ir->o == IR_UREFC) { | 1154 | if (ir->o == IR_UREFC) { |
800 | asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); | 1155 | asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); |
801 | emit_tsi(as, MIPSI_ADDIU, dest, uv, (int32_t)offsetof(GCupval, tv)); | 1156 | emit_tsi(as, MIPSI_AADDIU, dest, uv, (int32_t)offsetof(GCupval, tv)); |
802 | emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); | 1157 | emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); |
803 | } else { | 1158 | } else { |
804 | emit_tsi(as, MIPSI_LW, dest, uv, (int32_t)offsetof(GCupval, v)); | 1159 | emit_tsi(as, MIPSI_AL, dest, uv, (int32_t)offsetof(GCupval, v)); |
805 | } | 1160 | } |
806 | emit_tsi(as, MIPSI_LW, uv, func, | 1161 | emit_tsi(as, MIPSI_AL, uv, func, (int32_t)offsetof(GCfuncL, uvptr) + |
807 | (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); | 1162 | (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); |
808 | } | 1163 | } |
809 | } | 1164 | } |
810 | 1165 | ||
@@ -816,6 +1171,7 @@ static void asm_fref(ASMState *as, IRIns *ir) | |||
816 | 1171 | ||
817 | static void asm_strref(ASMState *as, IRIns *ir) | 1172 | static void asm_strref(ASMState *as, IRIns *ir) |
818 | { | 1173 | { |
1174 | #if LJ_32 | ||
819 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1175 | Reg dest = ra_dest(as, ir, RSET_GPR); |
820 | IRRef ref = ir->op2, refk = ir->op1; | 1176 | IRRef ref = ir->op2, refk = ir->op1; |
821 | int32_t ofs = (int32_t)sizeof(GCstr); | 1177 | int32_t ofs = (int32_t)sizeof(GCstr); |
@@ -847,6 +1203,20 @@ static void asm_strref(ASMState *as, IRIns *ir) | |||
847 | else | 1203 | else |
848 | emit_dst(as, MIPSI_ADDU, dest, r, | 1204 | emit_dst(as, MIPSI_ADDU, dest, r, |
849 | ra_allock(as, ofs, rset_exclude(RSET_GPR, r))); | 1205 | ra_allock(as, ofs, rset_exclude(RSET_GPR, r))); |
1206 | #else | ||
1207 | RegSet allow = RSET_GPR; | ||
1208 | Reg dest = ra_dest(as, ir, allow); | ||
1209 | Reg base = ra_alloc1(as, ir->op1, allow); | ||
1210 | IRIns *irr = IR(ir->op2); | ||
1211 | int32_t ofs = sizeof(GCstr); | ||
1212 | rset_clear(allow, base); | ||
1213 | if (irref_isk(ir->op2) && checki16(ofs + irr->i)) { | ||
1214 | emit_tsi(as, MIPSI_DADDIU, dest, base, ofs + irr->i); | ||
1215 | } else { | ||
1216 | emit_tsi(as, MIPSI_DADDIU, dest, dest, ofs); | ||
1217 | emit_dst(as, MIPSI_DADDU, dest, base, ra_alloc1(as, ir->op2, allow)); | ||
1218 | } | ||
1219 | #endif | ||
850 | } | 1220 | } |
851 | 1221 | ||
852 | /* -- Loads and stores ---------------------------------------------------- */ | 1222 | /* -- Loads and stores ---------------------------------------------------- */ |
@@ -858,9 +1228,11 @@ static MIPSIns asm_fxloadins(IRIns *ir) | |||
858 | case IRT_U8: return MIPSI_LBU; | 1228 | case IRT_U8: return MIPSI_LBU; |
859 | case IRT_I16: return MIPSI_LH; | 1229 | case IRT_I16: return MIPSI_LH; |
860 | case IRT_U16: return MIPSI_LHU; | 1230 | case IRT_U16: return MIPSI_LHU; |
861 | case IRT_NUM: return MIPSI_LDC1; | 1231 | case IRT_NUM: lua_assert(!LJ_SOFTFP32); if (!LJ_SOFTFP) return MIPSI_LDC1; |
862 | case IRT_FLOAT: return MIPSI_LWC1; | 1232 | /* fallthrough */ |
863 | default: return MIPSI_LW; | 1233 | case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_LWC1; |
1234 | /* fallthrough */ | ||
1235 | default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_LD : MIPSI_LW; | ||
864 | } | 1236 | } |
865 | } | 1237 | } |
866 | 1238 | ||
@@ -869,26 +1241,34 @@ static MIPSIns asm_fxstoreins(IRIns *ir) | |||
869 | switch (irt_type(ir->t)) { | 1241 | switch (irt_type(ir->t)) { |
870 | case IRT_I8: case IRT_U8: return MIPSI_SB; | 1242 | case IRT_I8: case IRT_U8: return MIPSI_SB; |
871 | case IRT_I16: case IRT_U16: return MIPSI_SH; | 1243 | case IRT_I16: case IRT_U16: return MIPSI_SH; |
872 | case IRT_NUM: return MIPSI_SDC1; | 1244 | case IRT_NUM: lua_assert(!LJ_SOFTFP32); if (!LJ_SOFTFP) return MIPSI_SDC1; |
873 | case IRT_FLOAT: return MIPSI_SWC1; | 1245 | /* fallthrough */ |
874 | default: return MIPSI_SW; | 1246 | case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_SWC1; |
1247 | /* fallthrough */ | ||
1248 | default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_SD : MIPSI_SW; | ||
875 | } | 1249 | } |
876 | } | 1250 | } |
877 | 1251 | ||
878 | static void asm_fload(ASMState *as, IRIns *ir) | 1252 | static void asm_fload(ASMState *as, IRIns *ir) |
879 | { | 1253 | { |
880 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1254 | Reg dest = ra_dest(as, ir, RSET_GPR); |
881 | Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); | ||
882 | MIPSIns mi = asm_fxloadins(ir); | 1255 | MIPSIns mi = asm_fxloadins(ir); |
1256 | Reg idx; | ||
883 | int32_t ofs; | 1257 | int32_t ofs; |
884 | if (ir->op2 == IRFL_TAB_ARRAY) { | 1258 | if (ir->op1 == REF_NIL) { |
885 | ofs = asm_fuseabase(as, ir->op1); | 1259 | idx = RID_JGL; |
886 | if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ | 1260 | ofs = (ir->op2 << 2) - 32768 - GG_OFS(g); |
887 | emit_tsi(as, MIPSI_ADDIU, dest, idx, ofs); | 1261 | } else { |
888 | return; | 1262 | idx = ra_alloc1(as, ir->op1, RSET_GPR); |
1263 | if (ir->op2 == IRFL_TAB_ARRAY) { | ||
1264 | ofs = asm_fuseabase(as, ir->op1); | ||
1265 | if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ | ||
1266 | emit_tsi(as, MIPSI_AADDIU, dest, idx, ofs); | ||
1267 | return; | ||
1268 | } | ||
889 | } | 1269 | } |
1270 | ofs = field_ofs[ir->op2]; | ||
890 | } | 1271 | } |
891 | ofs = field_ofs[ir->op2]; | ||
892 | lua_assert(!irt_isfp(ir->t)); | 1272 | lua_assert(!irt_isfp(ir->t)); |
893 | emit_tsi(as, mi, dest, idx, ofs); | 1273 | emit_tsi(as, mi, dest, idx, ofs); |
894 | } | 1274 | } |
@@ -908,43 +1288,79 @@ static void asm_fstore(ASMState *as, IRIns *ir) | |||
908 | 1288 | ||
909 | static void asm_xload(ASMState *as, IRIns *ir) | 1289 | static void asm_xload(ASMState *as, IRIns *ir) |
910 | { | 1290 | { |
911 | Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); | 1291 | Reg dest = ra_dest(as, ir, |
912 | lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); | 1292 | (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); |
1293 | lua_assert(LJ_TARGET_UNALIGNED || !(ir->op2 & IRXLOAD_UNALIGNED)); | ||
913 | asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); | 1294 | asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); |
914 | } | 1295 | } |
915 | 1296 | ||
916 | static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) | 1297 | static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) |
917 | { | 1298 | { |
918 | if (ir->r != RID_SINK) { | 1299 | if (ir->r != RID_SINK) { |
919 | Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); | 1300 | Reg src = ra_alloc1z(as, ir->op2, |
1301 | (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); | ||
920 | asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, | 1302 | asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, |
921 | rset_exclude(RSET_GPR, src), ofs); | 1303 | rset_exclude(RSET_GPR, src), ofs); |
922 | } | 1304 | } |
923 | } | 1305 | } |
924 | 1306 | ||
1307 | #define asm_xstore(as, ir) asm_xstore_(as, ir, 0) | ||
1308 | |||
925 | static void asm_ahuvload(ASMState *as, IRIns *ir) | 1309 | static void asm_ahuvload(ASMState *as, IRIns *ir) |
926 | { | 1310 | { |
927 | IRType1 t = ir->t; | 1311 | int hiop = (LJ_SOFTFP32 && (ir+1)->o == IR_HIOP); |
928 | Reg dest = RID_NONE, type = RID_TMP, idx; | 1312 | Reg dest = RID_NONE, type = RID_TMP, idx; |
929 | RegSet allow = RSET_GPR; | 1313 | RegSet allow = RSET_GPR; |
930 | int32_t ofs = 0; | 1314 | int32_t ofs = 0; |
1315 | IRType1 t = ir->t; | ||
1316 | if (hiop) { | ||
1317 | t.irt = IRT_NUM; | ||
1318 | if (ra_used(ir+1)) { | ||
1319 | type = ra_dest(as, ir+1, allow); | ||
1320 | rset_clear(allow, type); | ||
1321 | } | ||
1322 | } | ||
931 | if (ra_used(ir)) { | 1323 | if (ra_used(ir)) { |
932 | lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); | 1324 | lua_assert((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) || |
933 | dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); | 1325 | irt_isint(ir->t) || irt_isaddr(ir->t)); |
1326 | dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); | ||
934 | rset_clear(allow, dest); | 1327 | rset_clear(allow, dest); |
1328 | #if LJ_64 | ||
1329 | if (irt_isaddr(t)) | ||
1330 | emit_tsml(as, MIPSI_DEXTM, dest, dest, 14, 0); | ||
1331 | else if (irt_isint(t)) | ||
1332 | emit_dta(as, MIPSI_SLL, dest, dest, 0); | ||
1333 | #endif | ||
935 | } | 1334 | } |
936 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow); | 1335 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow); |
937 | rset_clear(allow, idx); | 1336 | rset_clear(allow, idx); |
938 | if (irt_isnum(t)) { | 1337 | if (irt_isnum(t)) { |
939 | asm_guard(as, MIPSI_BEQ, type, RID_ZERO); | 1338 | asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); |
940 | emit_tsi(as, MIPSI_SLTIU, type, type, (int32_t)LJ_TISNUM); | 1339 | emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM); |
941 | if (ra_hasreg(dest)) | ||
942 | emit_hsi(as, MIPSI_LDC1, dest, idx, ofs); | ||
943 | } else { | 1340 | } else { |
944 | asm_guard(as, MIPSI_BNE, type, ra_allock(as, irt_toitype(t), allow)); | 1341 | asm_guard(as, MIPSI_BNE, type, |
945 | if (ra_hasreg(dest)) emit_tsi(as, MIPSI_LW, dest, idx, ofs+(LJ_BE?4:0)); | 1342 | ra_allock(as, (int32_t)irt_toitype(t), allow)); |
1343 | } | ||
1344 | #if LJ_32 | ||
1345 | if (ra_hasreg(dest)) { | ||
1346 | if (!LJ_SOFTFP && irt_isnum(t)) | ||
1347 | emit_hsi(as, MIPSI_LDC1, dest, idx, ofs); | ||
1348 | else | ||
1349 | emit_tsi(as, MIPSI_LW, dest, idx, ofs+(LJ_BE?4:0)); | ||
946 | } | 1350 | } |
947 | emit_tsi(as, MIPSI_LW, type, idx, ofs+(LJ_BE?0:4)); | 1351 | emit_tsi(as, MIPSI_LW, type, idx, ofs+(LJ_BE?0:4)); |
1352 | #else | ||
1353 | if (ra_hasreg(dest)) { | ||
1354 | if (!LJ_SOFTFP && irt_isnum(t)) { | ||
1355 | emit_hsi(as, MIPSI_LDC1, dest, idx, ofs); | ||
1356 | dest = type; | ||
1357 | } | ||
1358 | } else { | ||
1359 | dest = type; | ||
1360 | } | ||
1361 | emit_dta(as, MIPSI_DSRA32, type, dest, 15); | ||
1362 | emit_tsi(as, MIPSI_LD, dest, idx, ofs); | ||
1363 | #endif | ||
948 | } | 1364 | } |
949 | 1365 | ||
950 | static void asm_ahustore(ASMState *as, IRIns *ir) | 1366 | static void asm_ahustore(ASMState *as, IRIns *ir) |
@@ -954,81 +1370,176 @@ static void asm_ahustore(ASMState *as, IRIns *ir) | |||
954 | int32_t ofs = 0; | 1370 | int32_t ofs = 0; |
955 | if (ir->r == RID_SINK) | 1371 | if (ir->r == RID_SINK) |
956 | return; | 1372 | return; |
957 | if (irt_isnum(ir->t)) { | 1373 | if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { |
958 | src = ra_alloc1(as, ir->op2, RSET_FPR); | 1374 | src = ra_alloc1(as, ir->op2, LJ_SOFTFP ? RSET_GPR : RSET_FPR); |
1375 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow); | ||
1376 | emit_hsi(as, LJ_SOFTFP ? MIPSI_SD : MIPSI_SDC1, src, idx, ofs); | ||
959 | } else { | 1377 | } else { |
1378 | #if LJ_32 | ||
960 | if (!irt_ispri(ir->t)) { | 1379 | if (!irt_ispri(ir->t)) { |
961 | src = ra_alloc1(as, ir->op2, allow); | 1380 | src = ra_alloc1(as, ir->op2, allow); |
962 | rset_clear(allow, src); | 1381 | rset_clear(allow, src); |
963 | } | 1382 | } |
964 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | 1383 | if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) |
1384 | type = ra_alloc1(as, (ir+1)->op2, allow); | ||
1385 | else | ||
1386 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | ||
965 | rset_clear(allow, type); | 1387 | rset_clear(allow, type); |
966 | } | 1388 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow); |
967 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow); | ||
968 | if (irt_isnum(ir->t)) { | ||
969 | emit_hsi(as, MIPSI_SDC1, src, idx, ofs); | ||
970 | } else { | ||
971 | if (ra_hasreg(src)) | 1389 | if (ra_hasreg(src)) |
972 | emit_tsi(as, MIPSI_SW, src, idx, ofs+(LJ_BE?4:0)); | 1390 | emit_tsi(as, MIPSI_SW, src, idx, ofs+(LJ_BE?4:0)); |
973 | emit_tsi(as, MIPSI_SW, type, idx, ofs+(LJ_BE?0:4)); | 1391 | emit_tsi(as, MIPSI_SW, type, idx, ofs+(LJ_BE?0:4)); |
1392 | #else | ||
1393 | Reg tmp = RID_TMP; | ||
1394 | if (irt_ispri(ir->t)) { | ||
1395 | tmp = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); | ||
1396 | rset_clear(allow, tmp); | ||
1397 | } else { | ||
1398 | src = ra_alloc1(as, ir->op2, allow); | ||
1399 | rset_clear(allow, src); | ||
1400 | type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); | ||
1401 | rset_clear(allow, type); | ||
1402 | } | ||
1403 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow); | ||
1404 | emit_tsi(as, MIPSI_SD, tmp, idx, ofs); | ||
1405 | if (ra_hasreg(src)) { | ||
1406 | if (irt_isinteger(ir->t)) { | ||
1407 | emit_dst(as, MIPSI_DADDU, tmp, tmp, type); | ||
1408 | emit_tsml(as, MIPSI_DEXT, tmp, src, 31, 0); | ||
1409 | } else { | ||
1410 | emit_dst(as, MIPSI_DADDU, tmp, src, type); | ||
1411 | } | ||
1412 | } | ||
1413 | #endif | ||
974 | } | 1414 | } |
975 | } | 1415 | } |
976 | 1416 | ||
977 | static void asm_sload(ASMState *as, IRIns *ir) | 1417 | static void asm_sload(ASMState *as, IRIns *ir) |
978 | { | 1418 | { |
979 | int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); | ||
980 | IRType1 t = ir->t; | ||
981 | Reg dest = RID_NONE, type = RID_NONE, base; | 1419 | Reg dest = RID_NONE, type = RID_NONE, base; |
982 | RegSet allow = RSET_GPR; | 1420 | RegSet allow = RSET_GPR; |
1421 | IRType1 t = ir->t; | ||
1422 | #if LJ_32 | ||
1423 | int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); | ||
1424 | int hiop = (LJ_SOFTFP32 && (ir+1)->o == IR_HIOP); | ||
1425 | if (hiop) | ||
1426 | t.irt = IRT_NUM; | ||
1427 | #else | ||
1428 | int32_t ofs = 8*((int32_t)ir->op1-2); | ||
1429 | #endif | ||
983 | lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ | 1430 | lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ |
984 | lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); | 1431 | lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); |
985 | lua_assert(!irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); | 1432 | #if LJ_SOFTFP32 |
1433 | lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ | ||
1434 | if (hiop && ra_used(ir+1)) { | ||
1435 | type = ra_dest(as, ir+1, allow); | ||
1436 | rset_clear(allow, type); | ||
1437 | } | ||
1438 | #else | ||
986 | if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { | 1439 | if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { |
987 | dest = ra_scratch(as, RSET_FPR); | 1440 | dest = ra_scratch(as, LJ_SOFTFP ? allow : RSET_FPR); |
988 | asm_tointg(as, ir, dest); | 1441 | asm_tointg(as, ir, dest); |
989 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ | 1442 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ |
990 | } else if (ra_used(ir)) { | 1443 | } else |
991 | lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); | 1444 | #endif |
992 | dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); | 1445 | if (ra_used(ir)) { |
1446 | lua_assert((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) || | ||
1447 | irt_isint(ir->t) || irt_isaddr(ir->t)); | ||
1448 | dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); | ||
993 | rset_clear(allow, dest); | 1449 | rset_clear(allow, dest); |
994 | base = ra_alloc1(as, REF_BASE, allow); | 1450 | base = ra_alloc1(as, REF_BASE, allow); |
995 | rset_clear(allow, base); | 1451 | rset_clear(allow, base); |
996 | if ((ir->op2 & IRSLOAD_CONVERT)) { | 1452 | if (!LJ_SOFTFP32 && (ir->op2 & IRSLOAD_CONVERT)) { |
997 | if (irt_isint(t)) { | 1453 | if (irt_isint(t)) { |
998 | Reg tmp = ra_scratch(as, RSET_FPR); | 1454 | Reg tmp = ra_scratch(as, LJ_SOFTFP ? RSET_GPR : RSET_FPR); |
1455 | #if LJ_SOFTFP | ||
1456 | ra_evictset(as, rset_exclude(RSET_SCRATCH, dest)); | ||
1457 | ra_destreg(as, ir, RID_RET); | ||
1458 | emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_d2i].func, 0); | ||
1459 | if (tmp != REGARG_FIRSTGPR) | ||
1460 | emit_move(as, REGARG_FIRSTGPR, tmp); | ||
1461 | #else | ||
999 | emit_tg(as, MIPSI_MFC1, dest, tmp); | 1462 | emit_tg(as, MIPSI_MFC1, dest, tmp); |
1000 | emit_fg(as, MIPSI_CVT_W_D, tmp, tmp); | 1463 | emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp); |
1464 | #endif | ||
1001 | dest = tmp; | 1465 | dest = tmp; |
1002 | t.irt = IRT_NUM; /* Check for original type. */ | 1466 | t.irt = IRT_NUM; /* Check for original type. */ |
1003 | } else { | 1467 | } else { |
1004 | Reg tmp = ra_scratch(as, RSET_GPR); | 1468 | Reg tmp = ra_scratch(as, RSET_GPR); |
1469 | #if LJ_SOFTFP | ||
1470 | ra_evictset(as, rset_exclude(RSET_SCRATCH, dest)); | ||
1471 | ra_destreg(as, ir, RID_RET); | ||
1472 | emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_i2d].func, 0); | ||
1473 | emit_dta(as, MIPSI_SLL, REGARG_FIRSTGPR, tmp, 0); | ||
1474 | #else | ||
1005 | emit_fg(as, MIPSI_CVT_D_W, dest, dest); | 1475 | emit_fg(as, MIPSI_CVT_D_W, dest, dest); |
1006 | emit_tg(as, MIPSI_MTC1, tmp, dest); | 1476 | emit_tg(as, MIPSI_MTC1, tmp, dest); |
1477 | #endif | ||
1007 | dest = tmp; | 1478 | dest = tmp; |
1008 | t.irt = IRT_INT; /* Check for original type. */ | 1479 | t.irt = IRT_INT; /* Check for original type. */ |
1009 | } | 1480 | } |
1010 | } | 1481 | } |
1482 | #if LJ_64 | ||
1483 | else if (irt_isaddr(t)) { | ||
1484 | /* Clear type from pointers. */ | ||
1485 | emit_tsml(as, MIPSI_DEXTM, dest, dest, 14, 0); | ||
1486 | } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) { | ||
1487 | /* Sign-extend integers. */ | ||
1488 | emit_dta(as, MIPSI_SLL, dest, dest, 0); | ||
1489 | } | ||
1490 | #endif | ||
1011 | goto dotypecheck; | 1491 | goto dotypecheck; |
1012 | } | 1492 | } |
1013 | base = ra_alloc1(as, REF_BASE, allow); | 1493 | base = ra_alloc1(as, REF_BASE, allow); |
1014 | rset_clear(allow, base); | 1494 | rset_clear(allow, base); |
1015 | dotypecheck: | 1495 | dotypecheck: |
1016 | if (irt_isnum(t)) { | 1496 | #if LJ_32 |
1017 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { | 1497 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { |
1018 | asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); | 1498 | if (ra_noreg(type)) |
1019 | emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)LJ_TISNUM); | ||
1020 | type = RID_TMP; | 1499 | type = RID_TMP; |
1021 | } | 1500 | if (irt_isnum(t)) { |
1022 | if (ra_hasreg(dest)) emit_hsi(as, MIPSI_LDC1, dest, base, ofs); | 1501 | asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); |
1023 | } else { | 1502 | emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM); |
1024 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { | 1503 | } else { |
1025 | Reg ktype = ra_allock(as, irt_toitype(t), allow); | 1504 | Reg ktype = ra_allock(as, irt_toitype(t), allow); |
1026 | asm_guard(as, MIPSI_BNE, RID_TMP, ktype); | 1505 | asm_guard(as, MIPSI_BNE, type, ktype); |
1027 | type = RID_TMP; | 1506 | } |
1507 | } | ||
1508 | if (ra_hasreg(dest)) { | ||
1509 | if (!LJ_SOFTFP && irt_isnum(t)) | ||
1510 | emit_hsi(as, MIPSI_LDC1, dest, base, ofs); | ||
1511 | else | ||
1512 | emit_tsi(as, MIPSI_LW, dest, base, ofs ^ (LJ_BE?4:0)); | ||
1513 | } | ||
1514 | if (ra_hasreg(type)) | ||
1515 | emit_tsi(as, MIPSI_LW, type, base, ofs ^ (LJ_BE?0:4)); | ||
1516 | #else | ||
1517 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { | ||
1518 | type = dest < RID_MAX_GPR ? dest : RID_TMP; | ||
1519 | if (irt_ispri(t)) { | ||
1520 | asm_guard(as, MIPSI_BNE, type, | ||
1521 | ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow)); | ||
1522 | } else { | ||
1523 | if (irt_isnum(t)) { | ||
1524 | asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); | ||
1525 | emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)LJ_TISNUM); | ||
1526 | if (!LJ_SOFTFP && ra_hasreg(dest)) | ||
1527 | emit_hsi(as, MIPSI_LDC1, dest, base, ofs); | ||
1528 | } else { | ||
1529 | asm_guard(as, MIPSI_BNE, RID_TMP, | ||
1530 | ra_allock(as, (int32_t)irt_toitype(t), allow)); | ||
1531 | } | ||
1532 | emit_dta(as, MIPSI_DSRA32, RID_TMP, type, 15); | ||
1028 | } | 1533 | } |
1029 | if (ra_hasreg(dest)) emit_tsi(as, MIPSI_LW, dest, base, ofs ^ (LJ_BE?4:0)); | 1534 | emit_tsi(as, MIPSI_LD, type, base, ofs); |
1535 | } else if (ra_hasreg(dest)) { | ||
1536 | if (!LJ_SOFTFP && irt_isnum(t)) | ||
1537 | emit_hsi(as, MIPSI_LDC1, dest, base, ofs); | ||
1538 | else | ||
1539 | emit_tsi(as, irt_isint(t) ? MIPSI_LW : MIPSI_LD, dest, base, | ||
1540 | ofs ^ ((LJ_BE && irt_isint(t)) ? 4 : 0)); | ||
1030 | } | 1541 | } |
1031 | if (ra_hasreg(type)) emit_tsi(as, MIPSI_LW, type, base, ofs ^ (LJ_BE?0:4)); | 1542 | #endif |
1032 | } | 1543 | } |
1033 | 1544 | ||
1034 | /* -- Allocations --------------------------------------------------------- */ | 1545 | /* -- Allocations --------------------------------------------------------- */ |
@@ -1037,19 +1548,15 @@ dotypecheck: | |||
1037 | static void asm_cnew(ASMState *as, IRIns *ir) | 1548 | static void asm_cnew(ASMState *as, IRIns *ir) |
1038 | { | 1549 | { |
1039 | CTState *cts = ctype_ctsG(J2G(as->J)); | 1550 | CTState *cts = ctype_ctsG(J2G(as->J)); |
1040 | CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; | 1551 | CTypeID id = (CTypeID)IR(ir->op1)->i; |
1041 | CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? | 1552 | CTSize sz; |
1042 | lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; | 1553 | CTInfo info = lj_ctype_info(cts, id, &sz); |
1043 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; | 1554 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; |
1044 | IRRef args[2]; | 1555 | IRRef args[4]; |
1045 | RegSet allow = (RSET_GPR & ~RSET_SCRATCH); | ||
1046 | RegSet drop = RSET_SCRATCH; | 1556 | RegSet drop = RSET_SCRATCH; |
1047 | lua_assert(sz != CTSIZE_INVALID); | 1557 | lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); |
1048 | 1558 | ||
1049 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1050 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1051 | as->gcsteps++; | 1559 | as->gcsteps++; |
1052 | |||
1053 | if (ra_hasreg(ir->r)) | 1560 | if (ra_hasreg(ir->r)) |
1054 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | 1561 | rset_clear(drop, ir->r); /* Dest reg handled below. */ |
1055 | ra_evictset(as, drop); | 1562 | ra_evictset(as, drop); |
@@ -1058,8 +1565,9 @@ static void asm_cnew(ASMState *as, IRIns *ir) | |||
1058 | 1565 | ||
1059 | /* Initialize immutable cdata object. */ | 1566 | /* Initialize immutable cdata object. */ |
1060 | if (ir->o == IR_CNEWI) { | 1567 | if (ir->o == IR_CNEWI) { |
1568 | RegSet allow = (RSET_GPR & ~RSET_SCRATCH); | ||
1569 | #if LJ_32 | ||
1061 | int32_t ofs = sizeof(GCcdata); | 1570 | int32_t ofs = sizeof(GCcdata); |
1062 | lua_assert(sz == 4 || sz == 8); | ||
1063 | if (sz == 8) { | 1571 | if (sz == 8) { |
1064 | ofs += 4; | 1572 | ofs += 4; |
1065 | lua_assert((ir+1)->o == IR_HIOP); | 1573 | lua_assert((ir+1)->o == IR_HIOP); |
@@ -1072,12 +1580,29 @@ static void asm_cnew(ASMState *as, IRIns *ir) | |||
1072 | if (ofs == sizeof(GCcdata)) break; | 1580 | if (ofs == sizeof(GCcdata)) break; |
1073 | ofs -= 4; if (LJ_BE) ir++; else ir--; | 1581 | ofs -= 4; if (LJ_BE) ir++; else ir--; |
1074 | } | 1582 | } |
1583 | #else | ||
1584 | emit_tsi(as, sz == 8 ? MIPSI_SD : MIPSI_SW, ra_alloc1(as, ir->op2, allow), | ||
1585 | RID_RET, sizeof(GCcdata)); | ||
1586 | #endif | ||
1587 | lua_assert(sz == 4 || sz == 8); | ||
1588 | } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ | ||
1589 | ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; | ||
1590 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1591 | args[1] = ir->op1; /* CTypeID id */ | ||
1592 | args[2] = ir->op2; /* CTSize sz */ | ||
1593 | args[3] = ASMREF_TMP1; /* CTSize align */ | ||
1594 | asm_gencall(as, ci, args); | ||
1595 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); | ||
1596 | return; | ||
1075 | } | 1597 | } |
1598 | |||
1076 | /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ | 1599 | /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ |
1077 | emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); | 1600 | emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); |
1078 | emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); | 1601 | emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); |
1079 | emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA); | 1602 | emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA); |
1080 | emit_ti(as, MIPSI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */ | 1603 | emit_ti(as, MIPSI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */ |
1604 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1605 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1081 | asm_gencall(as, ci, args); | 1606 | asm_gencall(as, ci, args); |
1082 | ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), | 1607 | ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), |
1083 | ra_releasetmp(as, ASMREF_TMP1)); | 1608 | ra_releasetmp(as, ASMREF_TMP1)); |
@@ -1094,7 +1619,7 @@ static void asm_tbar(ASMState *as, IRIns *ir) | |||
1094 | Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab)); | 1619 | Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab)); |
1095 | Reg link = RID_TMP; | 1620 | Reg link = RID_TMP; |
1096 | MCLabel l_end = emit_label(as); | 1621 | MCLabel l_end = emit_label(as); |
1097 | emit_tsi(as, MIPSI_SW, link, tab, (int32_t)offsetof(GCtab, gclist)); | 1622 | emit_tsi(as, MIPSI_AS, link, tab, (int32_t)offsetof(GCtab, gclist)); |
1098 | emit_tsi(as, MIPSI_SB, mark, tab, (int32_t)offsetof(GCtab, marked)); | 1623 | emit_tsi(as, MIPSI_SB, mark, tab, (int32_t)offsetof(GCtab, marked)); |
1099 | emit_setgl(as, tab, gc.grayagain); | 1624 | emit_setgl(as, tab, gc.grayagain); |
1100 | emit_getgl(as, link, gc.grayagain); | 1625 | emit_getgl(as, link, gc.grayagain); |
@@ -1117,7 +1642,7 @@ static void asm_obar(ASMState *as, IRIns *ir) | |||
1117 | args[0] = ASMREF_TMP1; /* global_State *g */ | 1642 | args[0] = ASMREF_TMP1; /* global_State *g */ |
1118 | args[1] = ir->op1; /* TValue *tv */ | 1643 | args[1] = ir->op1; /* TValue *tv */ |
1119 | asm_gencall(as, ci, args); | 1644 | asm_gencall(as, ci, args); |
1120 | emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); | 1645 | emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); |
1121 | obj = IR(ir->op1)->r; | 1646 | obj = IR(ir->op1)->r; |
1122 | tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); | 1647 | tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); |
1123 | emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end); | 1648 | emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end); |
@@ -1132,6 +1657,7 @@ static void asm_obar(ASMState *as, IRIns *ir) | |||
1132 | 1657 | ||
1133 | /* -- Arithmetic and logic operations ------------------------------------- */ | 1658 | /* -- Arithmetic and logic operations ------------------------------------- */ |
1134 | 1659 | ||
1660 | #if !LJ_SOFTFP | ||
1135 | static void asm_fparith(ASMState *as, IRIns *ir, MIPSIns mi) | 1661 | static void asm_fparith(ASMState *as, IRIns *ir, MIPSIns mi) |
1136 | { | 1662 | { |
1137 | Reg dest = ra_dest(as, ir, RSET_FPR); | 1663 | Reg dest = ra_dest(as, ir, RSET_FPR); |
@@ -1146,83 +1672,180 @@ static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi) | |||
1146 | Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); | 1672 | Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); |
1147 | emit_fg(as, mi, dest, left); | 1673 | emit_fg(as, mi, dest, left); |
1148 | } | 1674 | } |
1675 | #endif | ||
1149 | 1676 | ||
1150 | static int asm_fpjoin_pow(ASMState *as, IRIns *ir) | 1677 | #if !LJ_SOFTFP32 |
1151 | { | 1678 | static void asm_fpmath(ASMState *as, IRIns *ir) |
1152 | IRIns *irp = IR(ir->op1); | 1679 | { |
1153 | if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { | 1680 | if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) |
1154 | IRIns *irpp = IR(irp->op1); | 1681 | return; |
1155 | if (irpp == ir-2 && irpp->o == IR_FPMATH && | 1682 | #if !LJ_SOFTFP |
1156 | irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { | 1683 | if (ir->op2 <= IRFPM_TRUNC) |
1157 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; | 1684 | asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2); |
1158 | IRRef args[2]; | 1685 | else if (ir->op2 == IRFPM_SQRT) |
1159 | args[0] = irpp->op1; | 1686 | asm_fpunary(as, ir, MIPSI_SQRT_D); |
1160 | args[1] = irp->op2; | 1687 | else |
1161 | asm_setupresult(as, ir, ci); | 1688 | #endif |
1162 | asm_gencall(as, ci, args); | 1689 | asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); |
1163 | return 1; | ||
1164 | } | ||
1165 | } | ||
1166 | return 0; | ||
1167 | } | 1690 | } |
1691 | #endif | ||
1692 | |||
1693 | #if !LJ_SOFTFP | ||
1694 | #define asm_fpadd(as, ir) asm_fparith(as, ir, MIPSI_ADD_D) | ||
1695 | #define asm_fpsub(as, ir) asm_fparith(as, ir, MIPSI_SUB_D) | ||
1696 | #define asm_fpmul(as, ir) asm_fparith(as, ir, MIPSI_MUL_D) | ||
1697 | #elif LJ_64 /* && LJ_SOFTFP */ | ||
1698 | #define asm_fpadd(as, ir) asm_callid(as, ir, IRCALL_softfp_add) | ||
1699 | #define asm_fpsub(as, ir) asm_callid(as, ir, IRCALL_softfp_sub) | ||
1700 | #define asm_fpmul(as, ir) asm_callid(as, ir, IRCALL_softfp_mul) | ||
1701 | #endif | ||
1168 | 1702 | ||
1169 | static void asm_add(ASMState *as, IRIns *ir) | 1703 | static void asm_add(ASMState *as, IRIns *ir) |
1170 | { | 1704 | { |
1171 | if (irt_isnum(ir->t)) { | 1705 | IRType1 t = ir->t; |
1172 | asm_fparith(as, ir, MIPSI_ADD_D); | 1706 | #if !LJ_SOFTFP32 |
1173 | } else { | 1707 | if (irt_isnum(t)) { |
1708 | asm_fpadd(as, ir); | ||
1709 | } else | ||
1710 | #endif | ||
1711 | { | ||
1712 | /* TODO MIPSR6: Fuse ADD(BSHL(a,1-4),b) or ADD(ADD(a,a),b) to MIPSI_ALSA. */ | ||
1174 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1713 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1175 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | 1714 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); |
1176 | if (irref_isk(ir->op2)) { | 1715 | if (irref_isk(ir->op2)) { |
1177 | int32_t k = IR(ir->op2)->i; | 1716 | intptr_t k = get_kval(IR(ir->op2)); |
1178 | if (checki16(k)) { | 1717 | if (checki16(k)) { |
1179 | emit_tsi(as, MIPSI_ADDIU, dest, left, k); | 1718 | emit_tsi(as, (LJ_64 && irt_is64(t)) ? MIPSI_DADDIU : MIPSI_ADDIU, dest, |
1719 | left, k); | ||
1180 | return; | 1720 | return; |
1181 | } | 1721 | } |
1182 | } | 1722 | } |
1183 | right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); | 1723 | right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); |
1184 | emit_dst(as, MIPSI_ADDU, dest, left, right); | 1724 | emit_dst(as, (LJ_64 && irt_is64(t)) ? MIPSI_DADDU : MIPSI_ADDU, dest, |
1725 | left, right); | ||
1185 | } | 1726 | } |
1186 | } | 1727 | } |
1187 | 1728 | ||
1188 | static void asm_sub(ASMState *as, IRIns *ir) | 1729 | static void asm_sub(ASMState *as, IRIns *ir) |
1189 | { | 1730 | { |
1731 | #if !LJ_SOFTFP32 | ||
1190 | if (irt_isnum(ir->t)) { | 1732 | if (irt_isnum(ir->t)) { |
1191 | asm_fparith(as, ir, MIPSI_SUB_D); | 1733 | asm_fpsub(as, ir); |
1192 | } else { | 1734 | } else |
1735 | #endif | ||
1736 | { | ||
1193 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1737 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1194 | Reg right, left = ra_alloc2(as, ir, RSET_GPR); | 1738 | Reg right, left = ra_alloc2(as, ir, RSET_GPR); |
1195 | right = (left >> 8); left &= 255; | 1739 | right = (left >> 8); left &= 255; |
1196 | emit_dst(as, MIPSI_SUBU, dest, left, right); | 1740 | emit_dst(as, (LJ_64 && irt_is64(ir->t)) ? MIPSI_DSUBU : MIPSI_SUBU, dest, |
1741 | left, right); | ||
1197 | } | 1742 | } |
1198 | } | 1743 | } |
1199 | 1744 | ||
1200 | static void asm_mul(ASMState *as, IRIns *ir) | 1745 | static void asm_mul(ASMState *as, IRIns *ir) |
1201 | { | 1746 | { |
1747 | #if !LJ_SOFTFP32 | ||
1202 | if (irt_isnum(ir->t)) { | 1748 | if (irt_isnum(ir->t)) { |
1203 | asm_fparith(as, ir, MIPSI_MUL_D); | 1749 | asm_fpmul(as, ir); |
1204 | } else { | 1750 | } else |
1751 | #endif | ||
1752 | { | ||
1205 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1753 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1206 | Reg right, left = ra_alloc2(as, ir, RSET_GPR); | 1754 | Reg right, left = ra_alloc2(as, ir, RSET_GPR); |
1207 | right = (left >> 8); left &= 255; | 1755 | right = (left >> 8); left &= 255; |
1208 | emit_dst(as, MIPSI_MUL, dest, left, right); | 1756 | if (LJ_64 && irt_is64(ir->t)) { |
1757 | #if !LJ_TARGET_MIPSR6 | ||
1758 | emit_dst(as, MIPSI_MFLO, dest, 0, 0); | ||
1759 | emit_dst(as, MIPSI_DMULT, 0, left, right); | ||
1760 | #else | ||
1761 | emit_dst(as, MIPSI_DMUL, dest, left, right); | ||
1762 | #endif | ||
1763 | } else { | ||
1764 | emit_dst(as, MIPSI_MUL, dest, left, right); | ||
1765 | } | ||
1209 | } | 1766 | } |
1210 | } | 1767 | } |
1211 | 1768 | ||
1769 | static void asm_mod(ASMState *as, IRIns *ir) | ||
1770 | { | ||
1771 | #if LJ_64 && LJ_HASFFI | ||
1772 | if (!irt_isint(ir->t)) | ||
1773 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : | ||
1774 | IRCALL_lj_carith_modu64); | ||
1775 | else | ||
1776 | #endif | ||
1777 | asm_callid(as, ir, IRCALL_lj_vm_modi); | ||
1778 | } | ||
1779 | |||
1780 | #if !LJ_SOFTFP32 | ||
1781 | static void asm_pow(ASMState *as, IRIns *ir) | ||
1782 | { | ||
1783 | #if LJ_64 && LJ_HASFFI | ||
1784 | if (!irt_isnum(ir->t)) | ||
1785 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : | ||
1786 | IRCALL_lj_carith_powu64); | ||
1787 | else | ||
1788 | #endif | ||
1789 | asm_callid(as, ir, IRCALL_lj_vm_powi); | ||
1790 | } | ||
1791 | |||
1792 | static void asm_div(ASMState *as, IRIns *ir) | ||
1793 | { | ||
1794 | #if LJ_64 && LJ_HASFFI | ||
1795 | if (!irt_isnum(ir->t)) | ||
1796 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : | ||
1797 | IRCALL_lj_carith_divu64); | ||
1798 | else | ||
1799 | #endif | ||
1800 | #if !LJ_SOFTFP | ||
1801 | asm_fparith(as, ir, MIPSI_DIV_D); | ||
1802 | #else | ||
1803 | asm_callid(as, ir, IRCALL_softfp_div); | ||
1804 | #endif | ||
1805 | } | ||
1806 | #endif | ||
1807 | |||
1212 | static void asm_neg(ASMState *as, IRIns *ir) | 1808 | static void asm_neg(ASMState *as, IRIns *ir) |
1213 | { | 1809 | { |
1810 | #if !LJ_SOFTFP | ||
1214 | if (irt_isnum(ir->t)) { | 1811 | if (irt_isnum(ir->t)) { |
1215 | asm_fpunary(as, ir, MIPSI_NEG_D); | 1812 | asm_fpunary(as, ir, MIPSI_NEG_D); |
1216 | } else { | 1813 | } else |
1814 | #elif LJ_64 /* && LJ_SOFTFP */ | ||
1815 | if (irt_isnum(ir->t)) { | ||
1816 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1817 | Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | ||
1818 | emit_dst(as, MIPSI_XOR, dest, left, | ||
1819 | ra_allock(as, 0x8000000000000000ll, rset_exclude(RSET_GPR, dest))); | ||
1820 | } else | ||
1821 | #endif | ||
1822 | { | ||
1217 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1823 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1218 | Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | 1824 | Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); |
1219 | emit_dst(as, MIPSI_SUBU, dest, RID_ZERO, left); | 1825 | emit_dst(as, (LJ_64 && irt_is64(ir->t)) ? MIPSI_DSUBU : MIPSI_SUBU, dest, |
1826 | RID_ZERO, left); | ||
1220 | } | 1827 | } |
1221 | } | 1828 | } |
1222 | 1829 | ||
1830 | #if !LJ_SOFTFP | ||
1831 | #define asm_abs(as, ir) asm_fpunary(as, ir, MIPSI_ABS_D) | ||
1832 | #elif LJ_64 /* && LJ_SOFTFP */ | ||
1833 | static void asm_abs(ASMState *as, IRIns *ir) | ||
1834 | { | ||
1835 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1836 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); | ||
1837 | emit_tsml(as, MIPSI_DEXTM, dest, left, 30, 0); | ||
1838 | } | ||
1839 | #endif | ||
1840 | |||
1841 | #define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) | ||
1842 | #define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) | ||
1843 | |||
1223 | static void asm_arithov(ASMState *as, IRIns *ir) | 1844 | static void asm_arithov(ASMState *as, IRIns *ir) |
1224 | { | 1845 | { |
1846 | /* TODO MIPSR6: bovc/bnvc. Caveat: no delay slot to load RID_TMP. */ | ||
1225 | Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR); | 1847 | Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR); |
1848 | lua_assert(!irt_is64(ir->t)); | ||
1226 | if (irref_isk(ir->op2)) { | 1849 | if (irref_isk(ir->op2)) { |
1227 | int k = IR(ir->op2)->i; | 1850 | int k = IR(ir->op2)->i; |
1228 | if (ir->o == IR_SUBOV) k = -k; | 1851 | if (ir->o == IR_SUBOV) k = -k; |
@@ -1253,16 +1876,29 @@ static void asm_arithov(ASMState *as, IRIns *ir) | |||
1253 | emit_move(as, RID_TMP, dest == left ? left : right); | 1876 | emit_move(as, RID_TMP, dest == left ? left : right); |
1254 | } | 1877 | } |
1255 | 1878 | ||
1879 | #define asm_addov(as, ir) asm_arithov(as, ir) | ||
1880 | #define asm_subov(as, ir) asm_arithov(as, ir) | ||
1881 | |||
1256 | static void asm_mulov(ASMState *as, IRIns *ir) | 1882 | static void asm_mulov(ASMState *as, IRIns *ir) |
1257 | { | 1883 | { |
1258 | #if LJ_DUALNUM | 1884 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1259 | #error "NYI: MULOV" | 1885 | Reg tmp, right, left = ra_alloc2(as, ir, RSET_GPR); |
1886 | right = (left >> 8); left &= 255; | ||
1887 | tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left), | ||
1888 | right), dest)); | ||
1889 | asm_guard(as, MIPSI_BNE, RID_TMP, tmp); | ||
1890 | emit_dta(as, MIPSI_SRA, RID_TMP, dest, 31); | ||
1891 | #if !LJ_TARGET_MIPSR6 | ||
1892 | emit_dst(as, MIPSI_MFHI, tmp, 0, 0); | ||
1893 | emit_dst(as, MIPSI_MFLO, dest, 0, 0); | ||
1894 | emit_dst(as, MIPSI_MULT, 0, left, right); | ||
1260 | #else | 1895 | #else |
1261 | UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused in single-number mode. */ | 1896 | emit_dst(as, MIPSI_MUL, dest, left, right); |
1897 | emit_dst(as, MIPSI_MUH, tmp, left, right); | ||
1262 | #endif | 1898 | #endif |
1263 | } | 1899 | } |
1264 | 1900 | ||
1265 | #if LJ_HASFFI | 1901 | #if LJ_32 && LJ_HASFFI |
1266 | static void asm_add64(ASMState *as, IRIns *ir) | 1902 | static void asm_add64(ASMState *as, IRIns *ir) |
1267 | { | 1903 | { |
1268 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1904 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -1346,7 +1982,7 @@ static void asm_neg64(ASMState *as, IRIns *ir) | |||
1346 | } | 1982 | } |
1347 | #endif | 1983 | #endif |
1348 | 1984 | ||
1349 | static void asm_bitnot(ASMState *as, IRIns *ir) | 1985 | static void asm_bnot(ASMState *as, IRIns *ir) |
1350 | { | 1986 | { |
1351 | Reg left, right, dest = ra_dest(as, ir, RSET_GPR); | 1987 | Reg left, right, dest = ra_dest(as, ir, RSET_GPR); |
1352 | IRIns *irl = IR(ir->op1); | 1988 | IRIns *irl = IR(ir->op1); |
@@ -1360,11 +1996,12 @@ static void asm_bitnot(ASMState *as, IRIns *ir) | |||
1360 | emit_dst(as, MIPSI_NOR, dest, left, right); | 1996 | emit_dst(as, MIPSI_NOR, dest, left, right); |
1361 | } | 1997 | } |
1362 | 1998 | ||
1363 | static void asm_bitswap(ASMState *as, IRIns *ir) | 1999 | static void asm_bswap(ASMState *as, IRIns *ir) |
1364 | { | 2000 | { |
1365 | Reg dest = ra_dest(as, ir, RSET_GPR); | 2001 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1366 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); | 2002 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); |
1367 | if ((as->flags & JIT_F_MIPS32R2)) { | 2003 | #if LJ_32 |
2004 | if ((as->flags & JIT_F_MIPSXXR2)) { | ||
1368 | emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16); | 2005 | emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16); |
1369 | emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left); | 2006 | emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left); |
1370 | } else { | 2007 | } else { |
@@ -1379,6 +2016,15 @@ static void asm_bitswap(ASMState *as, IRIns *ir) | |||
1379 | emit_dta(as, MIPSI_SRL, tmp, left, 24); | 2016 | emit_dta(as, MIPSI_SRL, tmp, left, 24); |
1380 | emit_dta(as, MIPSI_SLL, RID_TMP, left, 24); | 2017 | emit_dta(as, MIPSI_SLL, RID_TMP, left, 24); |
1381 | } | 2018 | } |
2019 | #else | ||
2020 | if (irt_is64(ir->t)) { | ||
2021 | emit_dst(as, MIPSI_DSHD, dest, 0, RID_TMP); | ||
2022 | emit_dst(as, MIPSI_DSBH, RID_TMP, 0, left); | ||
2023 | } else { | ||
2024 | emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16); | ||
2025 | emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left); | ||
2026 | } | ||
2027 | #endif | ||
1382 | } | 2028 | } |
1383 | 2029 | ||
1384 | static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) | 2030 | static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) |
@@ -1386,7 +2032,7 @@ static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) | |||
1386 | Reg dest = ra_dest(as, ir, RSET_GPR); | 2032 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1387 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | 2033 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); |
1388 | if (irref_isk(ir->op2)) { | 2034 | if (irref_isk(ir->op2)) { |
1389 | int32_t k = IR(ir->op2)->i; | 2035 | intptr_t k = get_kval(IR(ir->op2)); |
1390 | if (checku16(k)) { | 2036 | if (checku16(k)) { |
1391 | emit_tsi(as, mik, dest, left, k); | 2037 | emit_tsi(as, mik, dest, left, k); |
1392 | return; | 2038 | return; |
@@ -1396,22 +2042,34 @@ static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) | |||
1396 | emit_dst(as, mi, dest, left, right); | 2042 | emit_dst(as, mi, dest, left, right); |
1397 | } | 2043 | } |
1398 | 2044 | ||
2045 | #define asm_band(as, ir) asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI) | ||
2046 | #define asm_bor(as, ir) asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI) | ||
2047 | #define asm_bxor(as, ir) asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI) | ||
2048 | |||
1399 | static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) | 2049 | static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) |
1400 | { | 2050 | { |
1401 | Reg dest = ra_dest(as, ir, RSET_GPR); | 2051 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1402 | if (irref_isk(ir->op2)) { /* Constant shifts. */ | 2052 | if (irref_isk(ir->op2)) { /* Constant shifts. */ |
1403 | uint32_t shift = (uint32_t)(IR(ir->op2)->i & 31); | 2053 | uint32_t shift = (uint32_t)IR(ir->op2)->i; |
1404 | emit_dta(as, mik, dest, ra_hintalloc(as, ir->op1, dest, RSET_GPR), shift); | 2054 | if (LJ_64 && irt_is64(ir->t)) mik |= (shift & 32) ? MIPSI_D32 : MIPSI_D; |
2055 | emit_dta(as, mik, dest, ra_hintalloc(as, ir->op1, dest, RSET_GPR), | ||
2056 | (shift & 31)); | ||
1405 | } else { | 2057 | } else { |
1406 | Reg right, left = ra_alloc2(as, ir, RSET_GPR); | 2058 | Reg right, left = ra_alloc2(as, ir, RSET_GPR); |
1407 | right = (left >> 8); left &= 255; | 2059 | right = (left >> 8); left &= 255; |
2060 | if (LJ_64 && irt_is64(ir->t)) mi |= MIPSI_DV; | ||
1408 | emit_dst(as, mi, dest, right, left); /* Shift amount is in rs. */ | 2061 | emit_dst(as, mi, dest, right, left); /* Shift amount is in rs. */ |
1409 | } | 2062 | } |
1410 | } | 2063 | } |
1411 | 2064 | ||
1412 | static void asm_bitror(ASMState *as, IRIns *ir) | 2065 | #define asm_bshl(as, ir) asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL) |
2066 | #define asm_bshr(as, ir) asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL) | ||
2067 | #define asm_bsar(as, ir) asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA) | ||
2068 | #define asm_brol(as, ir) lua_assert(0) | ||
2069 | |||
2070 | static void asm_bror(ASMState *as, IRIns *ir) | ||
1413 | { | 2071 | { |
1414 | if ((as->flags & JIT_F_MIPS32R2)) { | 2072 | if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) { |
1415 | asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR); | 2073 | asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR); |
1416 | } else { | 2074 | } else { |
1417 | Reg dest = ra_dest(as, ir, RSET_GPR); | 2075 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -1430,12 +2088,38 @@ static void asm_bitror(ASMState *as, IRIns *ir) | |||
1430 | } | 2088 | } |
1431 | } | 2089 | } |
1432 | 2090 | ||
2091 | #if LJ_SOFTFP | ||
2092 | static void asm_sfpmin_max(ASMState *as, IRIns *ir) | ||
2093 | { | ||
2094 | CCallInfo ci = lj_ir_callinfo[(IROp)ir->o == IR_MIN ? IRCALL_lj_vm_sfmin : IRCALL_lj_vm_sfmax]; | ||
2095 | #if LJ_64 | ||
2096 | IRRef args[2]; | ||
2097 | args[0] = ir->op1; | ||
2098 | args[1] = ir->op2; | ||
2099 | #else | ||
2100 | IRRef args[4]; | ||
2101 | args[0^LJ_BE] = ir->op1; | ||
2102 | args[1^LJ_BE] = (ir+1)->op1; | ||
2103 | args[2^LJ_BE] = ir->op2; | ||
2104 | args[3^LJ_BE] = (ir+1)->op2; | ||
2105 | #endif | ||
2106 | asm_setupresult(as, ir, &ci); | ||
2107 | emit_call(as, (void *)ci.func, 0); | ||
2108 | ci.func = NULL; | ||
2109 | asm_gencall(as, &ci, args); | ||
2110 | } | ||
2111 | #endif | ||
2112 | |||
1433 | static void asm_min_max(ASMState *as, IRIns *ir, int ismax) | 2113 | static void asm_min_max(ASMState *as, IRIns *ir, int ismax) |
1434 | { | 2114 | { |
1435 | if (irt_isnum(ir->t)) { | 2115 | if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { |
2116 | #if LJ_SOFTFP | ||
2117 | asm_sfpmin_max(as, ir); | ||
2118 | #else | ||
1436 | Reg dest = ra_dest(as, ir, RSET_FPR); | 2119 | Reg dest = ra_dest(as, ir, RSET_FPR); |
1437 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); | 2120 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); |
1438 | right = (left >> 8); left &= 255; | 2121 | right = (left >> 8); left &= 255; |
2122 | #if !LJ_TARGET_MIPSR6 | ||
1439 | if (dest == left) { | 2123 | if (dest == left) { |
1440 | emit_fg(as, MIPSI_MOVT_D, dest, right); | 2124 | emit_fg(as, MIPSI_MOVT_D, dest, right); |
1441 | } else { | 2125 | } else { |
@@ -1443,42 +2127,143 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax) | |||
1443 | if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right); | 2127 | if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right); |
1444 | } | 2128 | } |
1445 | emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? left : right, ismax ? right : left); | 2129 | emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? left : right, ismax ? right : left); |
2130 | #else | ||
2131 | emit_fgh(as, ismax ? MIPSI_MAX_D : MIPSI_MIN_D, dest, left, right); | ||
2132 | #endif | ||
2133 | #endif | ||
1446 | } else { | 2134 | } else { |
1447 | Reg dest = ra_dest(as, ir, RSET_GPR); | 2135 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1448 | Reg right, left = ra_alloc2(as, ir, RSET_GPR); | 2136 | Reg right, left = ra_alloc2(as, ir, RSET_GPR); |
1449 | right = (left >> 8); left &= 255; | 2137 | right = (left >> 8); left &= 255; |
1450 | if (dest == left) { | 2138 | if (left == right) { |
1451 | emit_dst(as, MIPSI_MOVN, dest, right, RID_TMP); | 2139 | if (dest != left) emit_move(as, dest, left); |
1452 | } else { | 2140 | } else { |
1453 | emit_dst(as, MIPSI_MOVZ, dest, left, RID_TMP); | 2141 | #if !LJ_TARGET_MIPSR6 |
1454 | if (dest != right) emit_move(as, dest, right); | 2142 | if (dest == left) { |
2143 | emit_dst(as, MIPSI_MOVN, dest, right, RID_TMP); | ||
2144 | } else { | ||
2145 | emit_dst(as, MIPSI_MOVZ, dest, left, RID_TMP); | ||
2146 | if (dest != right) emit_move(as, dest, right); | ||
2147 | } | ||
2148 | #else | ||
2149 | emit_dst(as, MIPSI_OR, dest, dest, RID_TMP); | ||
2150 | if (dest != right) { | ||
2151 | emit_dst(as, MIPSI_SELNEZ, RID_TMP, right, RID_TMP); | ||
2152 | emit_dst(as, MIPSI_SELEQZ, dest, left, RID_TMP); | ||
2153 | } else { | ||
2154 | emit_dst(as, MIPSI_SELEQZ, RID_TMP, left, RID_TMP); | ||
2155 | emit_dst(as, MIPSI_SELNEZ, dest, right, RID_TMP); | ||
2156 | } | ||
2157 | #endif | ||
2158 | emit_dst(as, MIPSI_SLT, RID_TMP, | ||
2159 | ismax ? left : right, ismax ? right : left); | ||
1455 | } | 2160 | } |
1456 | emit_dst(as, MIPSI_SLT, RID_TMP, | ||
1457 | ismax ? left : right, ismax ? right : left); | ||
1458 | } | 2161 | } |
1459 | } | 2162 | } |
1460 | 2163 | ||
2164 | #define asm_min(as, ir) asm_min_max(as, ir, 0) | ||
2165 | #define asm_max(as, ir) asm_min_max(as, ir, 1) | ||
2166 | |||
1461 | /* -- Comparisons --------------------------------------------------------- */ | 2167 | /* -- Comparisons --------------------------------------------------------- */ |
1462 | 2168 | ||
2169 | #if LJ_SOFTFP | ||
2170 | /* SFP comparisons. */ | ||
2171 | static void asm_sfpcomp(ASMState *as, IRIns *ir) | ||
2172 | { | ||
2173 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; | ||
2174 | RegSet drop = RSET_SCRATCH; | ||
2175 | Reg r; | ||
2176 | #if LJ_64 | ||
2177 | IRRef args[2]; | ||
2178 | args[0] = ir->op1; | ||
2179 | args[1] = ir->op2; | ||
2180 | #else | ||
2181 | IRRef args[4]; | ||
2182 | args[LJ_LE ? 0 : 1] = ir->op1; args[LJ_LE ? 1 : 0] = (ir+1)->op1; | ||
2183 | args[LJ_LE ? 2 : 3] = ir->op2; args[LJ_LE ? 3 : 2] = (ir+1)->op2; | ||
2184 | #endif | ||
2185 | |||
2186 | for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+(LJ_64?1:3); r++) { | ||
2187 | if (!rset_test(as->freeset, r) && | ||
2188 | regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR]) | ||
2189 | rset_clear(drop, r); | ||
2190 | } | ||
2191 | ra_evictset(as, drop); | ||
2192 | |||
2193 | asm_setupresult(as, ir, ci); | ||
2194 | |||
2195 | switch ((IROp)ir->o) { | ||
2196 | case IR_LT: | ||
2197 | asm_guard(as, MIPSI_BGEZ, RID_RET, 0); | ||
2198 | break; | ||
2199 | case IR_ULT: | ||
2200 | asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP); | ||
2201 | emit_loadi(as, RID_TMP, 1); | ||
2202 | asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO); | ||
2203 | break; | ||
2204 | case IR_GE: | ||
2205 | asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP); | ||
2206 | emit_loadi(as, RID_TMP, 2); | ||
2207 | asm_guard(as, MIPSI_BLTZ, RID_RET, 0); | ||
2208 | break; | ||
2209 | case IR_LE: | ||
2210 | asm_guard(as, MIPSI_BGTZ, RID_RET, 0); | ||
2211 | break; | ||
2212 | case IR_GT: | ||
2213 | asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP); | ||
2214 | emit_loadi(as, RID_TMP, 2); | ||
2215 | asm_guard(as, MIPSI_BLEZ, RID_RET, 0); | ||
2216 | break; | ||
2217 | case IR_UGE: | ||
2218 | asm_guard(as, MIPSI_BLTZ, RID_RET, 0); | ||
2219 | break; | ||
2220 | case IR_ULE: | ||
2221 | asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP); | ||
2222 | emit_loadi(as, RID_TMP, 1); | ||
2223 | break; | ||
2224 | case IR_UGT: case IR_ABC: | ||
2225 | asm_guard(as, MIPSI_BLEZ, RID_RET, 0); | ||
2226 | break; | ||
2227 | case IR_EQ: case IR_NE: | ||
2228 | asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, RID_RET, RID_ZERO); | ||
2229 | default: | ||
2230 | break; | ||
2231 | } | ||
2232 | asm_gencall(as, ci, args); | ||
2233 | } | ||
2234 | #endif | ||
2235 | |||
1463 | static void asm_comp(ASMState *as, IRIns *ir) | 2236 | static void asm_comp(ASMState *as, IRIns *ir) |
1464 | { | 2237 | { |
1465 | /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */ | 2238 | /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */ |
1466 | IROp op = ir->o; | 2239 | IROp op = ir->o; |
1467 | if (irt_isnum(ir->t)) { | 2240 | if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { |
2241 | #if LJ_SOFTFP | ||
2242 | asm_sfpcomp(as, ir); | ||
2243 | #else | ||
2244 | #if !LJ_TARGET_MIPSR6 | ||
1468 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); | 2245 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); |
1469 | right = (left >> 8); left &= 255; | 2246 | right = (left >> 8); left &= 255; |
1470 | asm_guard(as, (op&1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); | 2247 | asm_guard(as, (op&1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); |
1471 | emit_fgh(as, MIPSI_C_OLT_D + ((op&3) ^ ((op>>2)&1)), 0, left, right); | 2248 | emit_fgh(as, MIPSI_C_OLT_D + ((op&3) ^ ((op>>2)&1)), 0, left, right); |
2249 | #else | ||
2250 | Reg tmp, right, left = ra_alloc2(as, ir, RSET_FPR); | ||
2251 | right = (left >> 8); left &= 255; | ||
2252 | tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_FPR, left), right)); | ||
2253 | asm_guard(as, (op&1) ? MIPSI_BC1NEZ : MIPSI_BC1EQZ, 0, (tmp&31)); | ||
2254 | emit_fgh(as, MIPSI_CMP_LT_D + ((op&3) ^ ((op>>2)&1)), tmp, left, right); | ||
2255 | #endif | ||
2256 | #endif | ||
1472 | } else { | 2257 | } else { |
1473 | Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); | 2258 | Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); |
1474 | if (op == IR_ABC) op = IR_UGT; | 2259 | if (op == IR_ABC) op = IR_UGT; |
1475 | if ((op&4) == 0 && irref_isk(ir->op2) && IR(ir->op2)->i == 0) { | 2260 | if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(IR(ir->op2)) == 0) { |
1476 | MIPSIns mi = (op&2) ? ((op&1) ? MIPSI_BLEZ : MIPSI_BGTZ) : | 2261 | MIPSIns mi = (op&2) ? ((op&1) ? MIPSI_BLEZ : MIPSI_BGTZ) : |
1477 | ((op&1) ? MIPSI_BLTZ : MIPSI_BGEZ); | 2262 | ((op&1) ? MIPSI_BLTZ : MIPSI_BGEZ); |
1478 | asm_guard(as, mi, left, 0); | 2263 | asm_guard(as, mi, left, 0); |
1479 | } else { | 2264 | } else { |
1480 | if (irref_isk(ir->op2)) { | 2265 | if (irref_isk(ir->op2)) { |
1481 | int32_t k = IR(ir->op2)->i; | 2266 | intptr_t k = get_kval(IR(ir->op2)); |
1482 | if ((op&2)) k++; | 2267 | if ((op&2)) k++; |
1483 | if (checki16(k)) { | 2268 | if (checki16(k)) { |
1484 | asm_guard(as, (op&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO); | 2269 | asm_guard(as, (op&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO); |
@@ -1495,19 +2280,28 @@ static void asm_comp(ASMState *as, IRIns *ir) | |||
1495 | } | 2280 | } |
1496 | } | 2281 | } |
1497 | 2282 | ||
1498 | static void asm_compeq(ASMState *as, IRIns *ir) | 2283 | static void asm_equal(ASMState *as, IRIns *ir) |
1499 | { | 2284 | { |
1500 | Reg right, left = ra_alloc2(as, ir, irt_isnum(ir->t) ? RSET_FPR : RSET_GPR); | 2285 | Reg right, left = ra_alloc2(as, ir, (!LJ_SOFTFP && irt_isnum(ir->t)) ? |
2286 | RSET_FPR : RSET_GPR); | ||
1501 | right = (left >> 8); left &= 255; | 2287 | right = (left >> 8); left &= 255; |
1502 | if (irt_isnum(ir->t)) { | 2288 | if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { |
2289 | #if LJ_SOFTFP | ||
2290 | asm_sfpcomp(as, ir); | ||
2291 | #elif !LJ_TARGET_MIPSR6 | ||
1503 | asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); | 2292 | asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); |
1504 | emit_fgh(as, MIPSI_C_EQ_D, 0, left, right); | 2293 | emit_fgh(as, MIPSI_C_EQ_D, 0, left, right); |
2294 | #else | ||
2295 | Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_FPR, left), right)); | ||
2296 | asm_guard(as, (ir->o & 1) ? MIPSI_BC1NEZ : MIPSI_BC1EQZ, 0, (tmp&31)); | ||
2297 | emit_fgh(as, MIPSI_CMP_EQ_D, tmp, left, right); | ||
2298 | #endif | ||
1505 | } else { | 2299 | } else { |
1506 | asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, left, right); | 2300 | asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, left, right); |
1507 | } | 2301 | } |
1508 | } | 2302 | } |
1509 | 2303 | ||
1510 | #if LJ_HASFFI | 2304 | #if LJ_32 && LJ_HASFFI |
1511 | /* 64 bit integer comparisons. */ | 2305 | /* 64 bit integer comparisons. */ |
1512 | static void asm_comp64(ASMState *as, IRIns *ir) | 2306 | static void asm_comp64(ASMState *as, IRIns *ir) |
1513 | { | 2307 | { |
@@ -1549,41 +2343,79 @@ static void asm_comp64eq(ASMState *as, IRIns *ir) | |||
1549 | /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ | 2343 | /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ |
1550 | static void asm_hiop(ASMState *as, IRIns *ir) | 2344 | static void asm_hiop(ASMState *as, IRIns *ir) |
1551 | { | 2345 | { |
1552 | #if LJ_HASFFI | 2346 | #if LJ_32 && (LJ_HASFFI || LJ_SOFTFP) |
1553 | /* HIOP is marked as a store because it needs its own DCE logic. */ | 2347 | /* HIOP is marked as a store because it needs its own DCE logic. */ |
1554 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ | 2348 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ |
1555 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; | 2349 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; |
1556 | if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ | 2350 | if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ |
1557 | as->curins--; /* Always skip the CONV. */ | 2351 | as->curins--; /* Always skip the CONV. */ |
2352 | #if LJ_HASFFI && !LJ_SOFTFP | ||
1558 | if (usehi || uselo) | 2353 | if (usehi || uselo) |
1559 | asm_conv64(as, ir); | 2354 | asm_conv64(as, ir); |
1560 | return; | 2355 | return; |
2356 | #endif | ||
1561 | } else if ((ir-1)->o < IR_EQ) { /* 64 bit integer comparisons. ORDER IR. */ | 2357 | } else if ((ir-1)->o < IR_EQ) { /* 64 bit integer comparisons. ORDER IR. */ |
1562 | as->curins--; /* Always skip the loword comparison. */ | 2358 | as->curins--; /* Always skip the loword comparison. */ |
2359 | #if LJ_SOFTFP | ||
2360 | if (!irt_isint(ir->t)) { | ||
2361 | asm_sfpcomp(as, ir-1); | ||
2362 | return; | ||
2363 | } | ||
2364 | #endif | ||
2365 | #if LJ_HASFFI | ||
1563 | asm_comp64(as, ir); | 2366 | asm_comp64(as, ir); |
2367 | #endif | ||
1564 | return; | 2368 | return; |
1565 | } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ | 2369 | } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ |
1566 | as->curins--; /* Always skip the loword comparison. */ | 2370 | as->curins--; /* Always skip the loword comparison. */ |
2371 | #if LJ_SOFTFP | ||
2372 | if (!irt_isint(ir->t)) { | ||
2373 | asm_sfpcomp(as, ir-1); | ||
2374 | return; | ||
2375 | } | ||
2376 | #endif | ||
2377 | #if LJ_HASFFI | ||
1567 | asm_comp64eq(as, ir); | 2378 | asm_comp64eq(as, ir); |
2379 | #endif | ||
1568 | return; | 2380 | return; |
2381 | #if LJ_SOFTFP | ||
2382 | } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { | ||
2383 | as->curins--; /* Always skip the loword min/max. */ | ||
2384 | if (uselo || usehi) | ||
2385 | asm_sfpmin_max(as, ir-1); | ||
2386 | return; | ||
2387 | #endif | ||
1569 | } else if ((ir-1)->o == IR_XSTORE) { | 2388 | } else if ((ir-1)->o == IR_XSTORE) { |
1570 | as->curins--; /* Handle both stores here. */ | 2389 | as->curins--; /* Handle both stores here. */ |
1571 | if ((ir-1)->r != RID_SINK) { | 2390 | if ((ir-1)->r != RID_SINK) { |
1572 | asm_xstore(as, ir, LJ_LE ? 4 : 0); | 2391 | asm_xstore_(as, ir, LJ_LE ? 4 : 0); |
1573 | asm_xstore(as, ir-1, LJ_LE ? 0 : 4); | 2392 | asm_xstore_(as, ir-1, LJ_LE ? 0 : 4); |
1574 | } | 2393 | } |
1575 | return; | 2394 | return; |
1576 | } | 2395 | } |
1577 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ | 2396 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ |
1578 | switch ((ir-1)->o) { | 2397 | switch ((ir-1)->o) { |
2398 | #if LJ_HASFFI | ||
1579 | case IR_ADD: as->curins--; asm_add64(as, ir); break; | 2399 | case IR_ADD: as->curins--; asm_add64(as, ir); break; |
1580 | case IR_SUB: as->curins--; asm_sub64(as, ir); break; | 2400 | case IR_SUB: as->curins--; asm_sub64(as, ir); break; |
1581 | case IR_NEG: as->curins--; asm_neg64(as, ir); break; | 2401 | case IR_NEG: as->curins--; asm_neg64(as, ir); break; |
2402 | #endif | ||
2403 | #if LJ_SOFTFP | ||
2404 | case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | ||
2405 | case IR_STRTO: | ||
2406 | if (!uselo) | ||
2407 | ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ | ||
2408 | break; | ||
2409 | #endif | ||
1582 | case IR_CALLN: | 2410 | case IR_CALLN: |
2411 | case IR_CALLS: | ||
1583 | case IR_CALLXS: | 2412 | case IR_CALLXS: |
1584 | if (!uselo) | 2413 | if (!uselo) |
1585 | ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ | 2414 | ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ |
1586 | break; | 2415 | break; |
2416 | #if LJ_SOFTFP | ||
2417 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: | ||
2418 | #endif | ||
1587 | case IR_CNEWI: | 2419 | case IR_CNEWI: |
1588 | /* Nothing to do here. Handled by lo op itself. */ | 2420 | /* Nothing to do here. Handled by lo op itself. */ |
1589 | break; | 2421 | break; |
@@ -1594,6 +2426,17 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
1594 | #endif | 2426 | #endif |
1595 | } | 2427 | } |
1596 | 2428 | ||
2429 | /* -- Profiling ----------------------------------------------------------- */ | ||
2430 | |||
2431 | static void asm_prof(ASMState *as, IRIns *ir) | ||
2432 | { | ||
2433 | UNUSED(ir); | ||
2434 | asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO); | ||
2435 | emit_tsi(as, MIPSI_ANDI, RID_TMP, RID_TMP, HOOK_PROFILE); | ||
2436 | emit_lsglptr(as, MIPSI_LBU, RID_TMP, | ||
2437 | (int32_t)offsetof(global_State, hookmask)); | ||
2438 | } | ||
2439 | |||
1597 | /* -- Stack handling ------------------------------------------------------ */ | 2440 | /* -- Stack handling ------------------------------------------------------ */ |
1598 | 2441 | ||
1599 | /* Check Lua stack size for overflow. Use exit handler as fallback. */ | 2442 | /* Check Lua stack size for overflow. Use exit handler as fallback. */ |
@@ -1604,46 +2447,67 @@ static void asm_stack_check(ASMState *as, BCReg topslot, | |||
1604 | Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE; | 2447 | Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE; |
1605 | ExitNo oldsnap = as->snapno; | 2448 | ExitNo oldsnap = as->snapno; |
1606 | rset_clear(allow, pbase); | 2449 | rset_clear(allow, pbase); |
2450 | #if LJ_32 | ||
1607 | tmp = allow ? rset_pickbot(allow) : | 2451 | tmp = allow ? rset_pickbot(allow) : |
1608 | (pbase == RID_RETHI ? RID_RETLO : RID_RETHI); | 2452 | (pbase == RID_RETHI ? RID_RETLO : RID_RETHI); |
2453 | #else | ||
2454 | tmp = allow ? rset_pickbot(allow) : RID_RET; | ||
2455 | #endif | ||
1609 | as->snapno = exitno; | 2456 | as->snapno = exitno; |
1610 | asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO); | 2457 | asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO); |
1611 | as->snapno = oldsnap; | 2458 | as->snapno = oldsnap; |
1612 | if (allow == RSET_EMPTY) /* Restore temp. register. */ | 2459 | if (allow == RSET_EMPTY) /* Restore temp. register. */ |
1613 | emit_tsi(as, MIPSI_LW, tmp, RID_SP, 0); | 2460 | emit_tsi(as, MIPSI_AL, tmp, RID_SP, 0); |
1614 | else | 2461 | else |
1615 | ra_modified(as, tmp); | 2462 | ra_modified(as, tmp); |
1616 | emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot)); | 2463 | emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot)); |
1617 | emit_dst(as, MIPSI_SUBU, RID_TMP, tmp, pbase); | 2464 | emit_dst(as, MIPSI_ASUBU, RID_TMP, tmp, pbase); |
1618 | emit_tsi(as, MIPSI_LW, tmp, tmp, offsetof(lua_State, maxstack)); | 2465 | emit_tsi(as, MIPSI_AL, tmp, tmp, offsetof(lua_State, maxstack)); |
1619 | if (pbase == RID_TMP) | 2466 | if (pbase == RID_TMP) |
1620 | emit_getgl(as, RID_TMP, jit_base); | 2467 | emit_getgl(as, RID_TMP, jit_base); |
1621 | emit_getgl(as, tmp, jit_L); | 2468 | emit_getgl(as, tmp, cur_L); |
1622 | if (allow == RSET_EMPTY) /* Spill temp. register. */ | 2469 | if (allow == RSET_EMPTY) /* Spill temp. register. */ |
1623 | emit_tsi(as, MIPSI_SW, tmp, RID_SP, 0); | 2470 | emit_tsi(as, MIPSI_AS, tmp, RID_SP, 0); |
1624 | } | 2471 | } |
1625 | 2472 | ||
1626 | /* Restore Lua stack from on-trace state. */ | 2473 | /* Restore Lua stack from on-trace state. */ |
1627 | static void asm_stack_restore(ASMState *as, SnapShot *snap) | 2474 | static void asm_stack_restore(ASMState *as, SnapShot *snap) |
1628 | { | 2475 | { |
1629 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; | 2476 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; |
1630 | SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; | 2477 | #if LJ_32 || defined(LUA_USE_ASSERT) |
2478 | SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; | ||
2479 | #endif | ||
1631 | MSize n, nent = snap->nent; | 2480 | MSize n, nent = snap->nent; |
1632 | /* Store the value of all modified slots to the Lua stack. */ | 2481 | /* Store the value of all modified slots to the Lua stack. */ |
1633 | for (n = 0; n < nent; n++) { | 2482 | for (n = 0; n < nent; n++) { |
1634 | SnapEntry sn = map[n]; | 2483 | SnapEntry sn = map[n]; |
1635 | BCReg s = snap_slot(sn); | 2484 | BCReg s = snap_slot(sn); |
1636 | int32_t ofs = 8*((int32_t)s-1); | 2485 | int32_t ofs = 8*((int32_t)s-1-LJ_FR2); |
1637 | IRRef ref = snap_ref(sn); | 2486 | IRRef ref = snap_ref(sn); |
1638 | IRIns *ir = IR(ref); | 2487 | IRIns *ir = IR(ref); |
1639 | if ((sn & SNAP_NORESTORE)) | 2488 | if ((sn & SNAP_NORESTORE)) |
1640 | continue; | 2489 | continue; |
1641 | if (irt_isnum(ir->t)) { | 2490 | if (irt_isnum(ir->t)) { |
2491 | #if LJ_SOFTFP32 | ||
2492 | Reg tmp; | ||
2493 | RegSet allow = rset_exclude(RSET_GPR, RID_BASE); | ||
2494 | lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ | ||
2495 | tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow); | ||
2496 | emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?4:0)); | ||
2497 | if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1); | ||
2498 | tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow); | ||
2499 | emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?0:4)); | ||
2500 | #elif LJ_SOFTFP /* && LJ_64 */ | ||
2501 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); | ||
2502 | emit_tsi(as, MIPSI_SD, src, RID_BASE, ofs); | ||
2503 | #else | ||
1642 | Reg src = ra_alloc1(as, ref, RSET_FPR); | 2504 | Reg src = ra_alloc1(as, ref, RSET_FPR); |
1643 | emit_hsi(as, MIPSI_SDC1, src, RID_BASE, ofs); | 2505 | emit_hsi(as, MIPSI_SDC1, src, RID_BASE, ofs); |
2506 | #endif | ||
1644 | } else { | 2507 | } else { |
1645 | Reg type; | 2508 | #if LJ_32 |
1646 | RegSet allow = rset_exclude(RSET_GPR, RID_BASE); | 2509 | RegSet allow = rset_exclude(RSET_GPR, RID_BASE); |
2510 | Reg type; | ||
1647 | lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); | 2511 | lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); |
1648 | if (!irt_ispri(ir->t)) { | 2512 | if (!irt_ispri(ir->t)) { |
1649 | Reg src = ra_alloc1(as, ref, allow); | 2513 | Reg src = ra_alloc1(as, ref, allow); |
@@ -1653,10 +2517,17 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
1653 | if ((sn & (SNAP_CONT|SNAP_FRAME))) { | 2517 | if ((sn & (SNAP_CONT|SNAP_FRAME))) { |
1654 | if (s == 0) continue; /* Do not overwrite link to previous frame. */ | 2518 | if (s == 0) continue; /* Do not overwrite link to previous frame. */ |
1655 | type = ra_allock(as, (int32_t)(*flinks--), allow); | 2519 | type = ra_allock(as, (int32_t)(*flinks--), allow); |
2520 | #if LJ_SOFTFP | ||
2521 | } else if ((sn & SNAP_SOFTFPNUM)) { | ||
2522 | type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE)); | ||
2523 | #endif | ||
1656 | } else { | 2524 | } else { |
1657 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | 2525 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); |
1658 | } | 2526 | } |
1659 | emit_tsi(as, MIPSI_SW, type, RID_BASE, ofs+(LJ_BE?0:4)); | 2527 | emit_tsi(as, MIPSI_SW, type, RID_BASE, ofs+(LJ_BE?0:4)); |
2528 | #else | ||
2529 | asm_tvstore64(as, RID_BASE, ofs, ref); | ||
2530 | #endif | ||
1660 | } | 2531 | } |
1661 | checkmclim(as); | 2532 | checkmclim(as); |
1662 | } | 2533 | } |
@@ -1680,7 +2551,7 @@ static void asm_gc_check(ASMState *as) | |||
1680 | args[0] = ASMREF_TMP1; /* global_State *g */ | 2551 | args[0] = ASMREF_TMP1; /* global_State *g */ |
1681 | args[1] = ASMREF_TMP2; /* MSize steps */ | 2552 | args[1] = ASMREF_TMP2; /* MSize steps */ |
1682 | asm_gencall(as, ci, args); | 2553 | asm_gencall(as, ci, args); |
1683 | emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); | 2554 | emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); |
1684 | tmp = ra_releasetmp(as, ASMREF_TMP2); | 2555 | tmp = ra_releasetmp(as, ASMREF_TMP2); |
1685 | emit_loadi(as, tmp, as->gcsteps); | 2556 | emit_loadi(as, tmp, as->gcsteps); |
1686 | /* Jump around GC step if GC total < GC threshold. */ | 2557 | /* Jump around GC step if GC total < GC threshold. */ |
@@ -1755,7 +2626,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) | |||
1755 | MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp; | 2626 | MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp; |
1756 | int32_t spadj = as->T->spadjust; | 2627 | int32_t spadj = as->T->spadjust; |
1757 | MCode *p = as->mctop-1; | 2628 | MCode *p = as->mctop-1; |
1758 | *p = spadj ? (MIPSI_ADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP; | 2629 | *p = spadj ? (MIPSI_AADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP; |
1759 | p[-1] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); | 2630 | p[-1] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); |
1760 | } | 2631 | } |
1761 | 2632 | ||
@@ -1766,139 +2637,26 @@ static void asm_tail_prep(ASMState *as) | |||
1766 | as->invmcp = as->loopref ? as->mcp : NULL; | 2637 | as->invmcp = as->loopref ? as->mcp : NULL; |
1767 | } | 2638 | } |
1768 | 2639 | ||
1769 | /* -- Instruction dispatch ------------------------------------------------ */ | ||
1770 | |||
1771 | /* Assemble a single instruction. */ | ||
1772 | static void asm_ir(ASMState *as, IRIns *ir) | ||
1773 | { | ||
1774 | switch ((IROp)ir->o) { | ||
1775 | /* Miscellaneous ops. */ | ||
1776 | case IR_LOOP: asm_loop(as); break; | ||
1777 | case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; | ||
1778 | case IR_USE: | ||
1779 | ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; | ||
1780 | case IR_PHI: asm_phi(as, ir); break; | ||
1781 | case IR_HIOP: asm_hiop(as, ir); break; | ||
1782 | case IR_GCSTEP: asm_gcstep(as, ir); break; | ||
1783 | |||
1784 | /* Guarded assertions. */ | ||
1785 | case IR_EQ: case IR_NE: asm_compeq(as, ir); break; | ||
1786 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: | ||
1787 | case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: | ||
1788 | case IR_ABC: | ||
1789 | asm_comp(as, ir); | ||
1790 | break; | ||
1791 | |||
1792 | case IR_RETF: asm_retf(as, ir); break; | ||
1793 | |||
1794 | /* Bit ops. */ | ||
1795 | case IR_BNOT: asm_bitnot(as, ir); break; | ||
1796 | case IR_BSWAP: asm_bitswap(as, ir); break; | ||
1797 | |||
1798 | case IR_BAND: asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI); break; | ||
1799 | case IR_BOR: asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI); break; | ||
1800 | case IR_BXOR: asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI); break; | ||
1801 | |||
1802 | case IR_BSHL: asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL); break; | ||
1803 | case IR_BSHR: asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL); break; | ||
1804 | case IR_BSAR: asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA); break; | ||
1805 | case IR_BROL: lua_assert(0); break; | ||
1806 | case IR_BROR: asm_bitror(as, ir); break; | ||
1807 | |||
1808 | /* Arithmetic ops. */ | ||
1809 | case IR_ADD: asm_add(as, ir); break; | ||
1810 | case IR_SUB: asm_sub(as, ir); break; | ||
1811 | case IR_MUL: asm_mul(as, ir); break; | ||
1812 | case IR_DIV: asm_fparith(as, ir, MIPSI_DIV_D); break; | ||
1813 | case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break; | ||
1814 | case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break; | ||
1815 | case IR_NEG: asm_neg(as, ir); break; | ||
1816 | |||
1817 | case IR_ABS: asm_fpunary(as, ir, MIPSI_ABS_D); break; | ||
1818 | case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break; | ||
1819 | case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break; | ||
1820 | case IR_MIN: asm_min_max(as, ir, 0); break; | ||
1821 | case IR_MAX: asm_min_max(as, ir, 1); break; | ||
1822 | case IR_FPMATH: | ||
1823 | if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) | ||
1824 | break; | ||
1825 | if (ir->op2 <= IRFPM_TRUNC) | ||
1826 | asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2); | ||
1827 | else if (ir->op2 == IRFPM_SQRT) | ||
1828 | asm_fpunary(as, ir, MIPSI_SQRT_D); | ||
1829 | else | ||
1830 | asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); | ||
1831 | break; | ||
1832 | |||
1833 | /* Overflow-checking arithmetic ops. */ | ||
1834 | case IR_ADDOV: asm_arithov(as, ir); break; | ||
1835 | case IR_SUBOV: asm_arithov(as, ir); break; | ||
1836 | case IR_MULOV: asm_mulov(as, ir); break; | ||
1837 | |||
1838 | /* Memory references. */ | ||
1839 | case IR_AREF: asm_aref(as, ir); break; | ||
1840 | case IR_HREF: asm_href(as, ir); break; | ||
1841 | case IR_HREFK: asm_hrefk(as, ir); break; | ||
1842 | case IR_NEWREF: asm_newref(as, ir); break; | ||
1843 | case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; | ||
1844 | case IR_FREF: asm_fref(as, ir); break; | ||
1845 | case IR_STRREF: asm_strref(as, ir); break; | ||
1846 | |||
1847 | /* Loads and stores. */ | ||
1848 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | ||
1849 | asm_ahuvload(as, ir); | ||
1850 | break; | ||
1851 | case IR_FLOAD: asm_fload(as, ir); break; | ||
1852 | case IR_XLOAD: asm_xload(as, ir); break; | ||
1853 | case IR_SLOAD: asm_sload(as, ir); break; | ||
1854 | |||
1855 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; | ||
1856 | case IR_FSTORE: asm_fstore(as, ir); break; | ||
1857 | case IR_XSTORE: asm_xstore(as, ir, 0); break; | ||
1858 | |||
1859 | /* Allocations. */ | ||
1860 | case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; | ||
1861 | case IR_TNEW: asm_tnew(as, ir); break; | ||
1862 | case IR_TDUP: asm_tdup(as, ir); break; | ||
1863 | case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; | ||
1864 | |||
1865 | /* Write barriers. */ | ||
1866 | case IR_TBAR: asm_tbar(as, ir); break; | ||
1867 | case IR_OBAR: asm_obar(as, ir); break; | ||
1868 | |||
1869 | /* Type conversions. */ | ||
1870 | case IR_CONV: asm_conv(as, ir); break; | ||
1871 | case IR_TOBIT: asm_tobit(as, ir); break; | ||
1872 | case IR_TOSTR: asm_tostr(as, ir); break; | ||
1873 | case IR_STRTO: asm_strto(as, ir); break; | ||
1874 | |||
1875 | /* Calls. */ | ||
1876 | case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; | ||
1877 | case IR_CALLXS: asm_callx(as, ir); break; | ||
1878 | case IR_CARG: break; | ||
1879 | |||
1880 | default: | ||
1881 | setintV(&as->J->errinfo, ir->o); | ||
1882 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
1883 | break; | ||
1884 | } | ||
1885 | } | ||
1886 | |||
1887 | /* -- Trace setup --------------------------------------------------------- */ | 2640 | /* -- Trace setup --------------------------------------------------------- */ |
1888 | 2641 | ||
1889 | /* Ensure there are enough stack slots for call arguments. */ | 2642 | /* Ensure there are enough stack slots for call arguments. */ |
1890 | static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) | 2643 | static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) |
1891 | { | 2644 | { |
1892 | IRRef args[CCI_NARGS_MAX*2]; | 2645 | IRRef args[CCI_NARGS_MAX*2]; |
1893 | uint32_t i, nargs = (int)CCI_NARGS(ci); | 2646 | uint32_t i, nargs = CCI_XNARGS(ci); |
2647 | #if LJ_32 | ||
1894 | int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; | 2648 | int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; |
2649 | #else | ||
2650 | int nslots = 0, ngpr = REGARG_NUMGPR; | ||
2651 | #endif | ||
1895 | asm_collectargs(as, ir, ci, args); | 2652 | asm_collectargs(as, ir, ci, args); |
1896 | for (i = 0; i < nargs; i++) { | 2653 | for (i = 0; i < nargs; i++) { |
1897 | if (args[i] && irt_isfp(IR(args[i])->t) && | 2654 | #if LJ_32 |
2655 | if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t) && | ||
1898 | nfpr > 0 && !(ci->flags & CCI_VARARG)) { | 2656 | nfpr > 0 && !(ci->flags & CCI_VARARG)) { |
1899 | nfpr--; | 2657 | nfpr--; |
1900 | ngpr -= irt_isnum(IR(args[i])->t) ? 2 : 1; | 2658 | ngpr -= irt_isnum(IR(args[i])->t) ? 2 : 1; |
1901 | } else if (args[i] && irt_isnum(IR(args[i])->t)) { | 2659 | } else if (!LJ_SOFTFP && args[i] && irt_isnum(IR(args[i])->t)) { |
1902 | nfpr = 0; | 2660 | nfpr = 0; |
1903 | ngpr = ngpr & ~1; | 2661 | ngpr = ngpr & ~1; |
1904 | if (ngpr > 0) ngpr -= 2; else nslots = (nslots+3) & ~1; | 2662 | if (ngpr > 0) ngpr -= 2; else nslots = (nslots+3) & ~1; |
@@ -1906,6 +2664,9 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
1906 | nfpr = 0; | 2664 | nfpr = 0; |
1907 | if (ngpr > 0) ngpr--; else nslots++; | 2665 | if (ngpr > 0) ngpr--; else nslots++; |
1908 | } | 2666 | } |
2667 | #else | ||
2668 | if (ngpr > 0) ngpr--; else nslots += 2; | ||
2669 | #endif | ||
1909 | } | 2670 | } |
1910 | if (nslots > as->evenspill) /* Leave room for args in stack slots. */ | 2671 | if (nslots > as->evenspill) /* Leave room for args in stack slots. */ |
1911 | as->evenspill = nslots; | 2672 | as->evenspill = nslots; |
@@ -1936,7 +2697,12 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) | |||
1936 | if (((p[-1] ^ (px-p)) & 0xffffu) == 0 && | 2697 | if (((p[-1] ^ (px-p)) & 0xffffu) == 0 && |
1937 | ((p[-1] & 0xf0000000u) == MIPSI_BEQ || | 2698 | ((p[-1] & 0xf0000000u) == MIPSI_BEQ || |
1938 | (p[-1] & 0xfc1e0000u) == MIPSI_BLTZ || | 2699 | (p[-1] & 0xfc1e0000u) == MIPSI_BLTZ || |
1939 | (p[-1] & 0xffe00000u) == MIPSI_BC1F)) { | 2700 | #if !LJ_TARGET_MIPSR6 |
2701 | (p[-1] & 0xffe00000u) == MIPSI_BC1F | ||
2702 | #else | ||
2703 | (p[-1] & 0xff600000u) == MIPSI_BC1EQZ | ||
2704 | #endif | ||
2705 | )) { | ||
1940 | ptrdiff_t delta = target - p; | 2706 | ptrdiff_t delta = target - p; |
1941 | if (((delta + 0x8000) >> 16) == 0) { /* Patch in-range branch. */ | 2707 | if (((delta + 0x8000) >> 16) == 0) { /* Patch in-range branch. */ |
1942 | patchbranch: | 2708 | patchbranch: |
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index dc092db2..8fa8c8ef 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h | |||
@@ -226,6 +226,7 @@ static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef ref, | |||
226 | emit_tab(as, pi, rt, left, right); | 226 | emit_tab(as, pi, rt, left, right); |
227 | } | 227 | } |
228 | 228 | ||
229 | #if !LJ_SOFTFP | ||
229 | /* Fuse to multiply-add/sub instruction. */ | 230 | /* Fuse to multiply-add/sub instruction. */ |
230 | static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) | 231 | static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) |
231 | { | 232 | { |
@@ -245,21 +246,26 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) | |||
245 | } | 246 | } |
246 | return 0; | 247 | return 0; |
247 | } | 248 | } |
249 | #endif | ||
248 | 250 | ||
249 | /* -- Calls --------------------------------------------------------------- */ | 251 | /* -- Calls --------------------------------------------------------------- */ |
250 | 252 | ||
251 | /* Generate a call to a C function. */ | 253 | /* Generate a call to a C function. */ |
252 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | 254 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) |
253 | { | 255 | { |
254 | uint32_t n, nargs = CCI_NARGS(ci); | 256 | uint32_t n, nargs = CCI_XNARGS(ci); |
255 | int32_t ofs = 8; | 257 | int32_t ofs = 8; |
256 | Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR; | 258 | Reg gpr = REGARG_FIRSTGPR; |
259 | #if !LJ_SOFTFP | ||
260 | Reg fpr = REGARG_FIRSTFPR; | ||
261 | #endif | ||
257 | if ((void *)ci->func) | 262 | if ((void *)ci->func) |
258 | emit_call(as, (void *)ci->func); | 263 | emit_call(as, (void *)ci->func); |
259 | for (n = 0; n < nargs; n++) { /* Setup args. */ | 264 | for (n = 0; n < nargs; n++) { /* Setup args. */ |
260 | IRRef ref = args[n]; | 265 | IRRef ref = args[n]; |
261 | if (ref) { | 266 | if (ref) { |
262 | IRIns *ir = IR(ref); | 267 | IRIns *ir = IR(ref); |
268 | #if !LJ_SOFTFP | ||
263 | if (irt_isfp(ir->t)) { | 269 | if (irt_isfp(ir->t)) { |
264 | if (fpr <= REGARG_LASTFPR) { | 270 | if (fpr <= REGARG_LASTFPR) { |
265 | lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ | 271 | lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ |
@@ -271,7 +277,9 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
271 | emit_spstore(as, ir, r, ofs); | 277 | emit_spstore(as, ir, r, ofs); |
272 | ofs += irt_isnum(ir->t) ? 8 : 4; | 278 | ofs += irt_isnum(ir->t) ? 8 : 4; |
273 | } | 279 | } |
274 | } else { | 280 | } else |
281 | #endif | ||
282 | { | ||
275 | if (gpr <= REGARG_LASTGPR) { | 283 | if (gpr <= REGARG_LASTGPR) { |
276 | lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ | 284 | lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ |
277 | ra_leftov(as, gpr, ref); | 285 | ra_leftov(as, gpr, ref); |
@@ -290,8 +298,10 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
290 | } | 298 | } |
291 | checkmclim(as); | 299 | checkmclim(as); |
292 | } | 300 | } |
301 | #if !LJ_SOFTFP | ||
293 | if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */ | 302 | if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */ |
294 | emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6); | 303 | emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6); |
304 | #endif | ||
295 | } | 305 | } |
296 | 306 | ||
297 | /* Setup result reg/sp for call. Evict scratch regs. */ | 307 | /* Setup result reg/sp for call. Evict scratch regs. */ |
@@ -299,8 +309,10 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
299 | { | 309 | { |
300 | RegSet drop = RSET_SCRATCH; | 310 | RegSet drop = RSET_SCRATCH; |
301 | int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); | 311 | int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); |
312 | #if !LJ_SOFTFP | ||
302 | if ((ci->flags & CCI_NOFPRCLOBBER)) | 313 | if ((ci->flags & CCI_NOFPRCLOBBER)) |
303 | drop &= ~RSET_FPR; | 314 | drop &= ~RSET_FPR; |
315 | #endif | ||
304 | if (ra_hasreg(ir->r)) | 316 | if (ra_hasreg(ir->r)) |
305 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | 317 | rset_clear(drop, ir->r); /* Dest reg handled below. */ |
306 | if (hiop && ra_hasreg((ir+1)->r)) | 318 | if (hiop && ra_hasreg((ir+1)->r)) |
@@ -308,7 +320,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
308 | ra_evictset(as, drop); /* Evictions must be performed first. */ | 320 | ra_evictset(as, drop); /* Evictions must be performed first. */ |
309 | if (ra_used(ir)) { | 321 | if (ra_used(ir)) { |
310 | lua_assert(!irt_ispri(ir->t)); | 322 | lua_assert(!irt_ispri(ir->t)); |
311 | if (irt_isfp(ir->t)) { | 323 | if (!LJ_SOFTFP && irt_isfp(ir->t)) { |
312 | if ((ci->flags & CCI_CASTU64)) { | 324 | if ((ci->flags & CCI_CASTU64)) { |
313 | /* Use spill slot or temp slots. */ | 325 | /* Use spill slot or temp slots. */ |
314 | int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; | 326 | int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; |
@@ -323,23 +335,16 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
323 | } else { | 335 | } else { |
324 | ra_destreg(as, ir, RID_FPRET); | 336 | ra_destreg(as, ir, RID_FPRET); |
325 | } | 337 | } |
338 | #if LJ_32 | ||
326 | } else if (hiop) { | 339 | } else if (hiop) { |
327 | ra_destpair(as, ir); | 340 | ra_destpair(as, ir); |
341 | #endif | ||
328 | } else { | 342 | } else { |
329 | ra_destreg(as, ir, RID_RET); | 343 | ra_destreg(as, ir, RID_RET); |
330 | } | 344 | } |
331 | } | 345 | } |
332 | } | 346 | } |
333 | 347 | ||
334 | static void asm_call(ASMState *as, IRIns *ir) | ||
335 | { | ||
336 | IRRef args[CCI_NARGS_MAX]; | ||
337 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | ||
338 | asm_collectargs(as, ir, ci, args); | ||
339 | asm_setupresult(as, ir, ci); | ||
340 | asm_gencall(as, ci, args); | ||
341 | } | ||
342 | |||
343 | static void asm_callx(ASMState *as, IRIns *ir) | 348 | static void asm_callx(ASMState *as, IRIns *ir) |
344 | { | 349 | { |
345 | IRRef args[CCI_NARGS_MAX*2]; | 350 | IRRef args[CCI_NARGS_MAX*2]; |
@@ -352,7 +357,7 @@ static void asm_callx(ASMState *as, IRIns *ir) | |||
352 | func = ir->op2; irf = IR(func); | 357 | func = ir->op2; irf = IR(func); |
353 | if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } | 358 | if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } |
354 | if (irref_isk(func)) { /* Call to constant address. */ | 359 | if (irref_isk(func)) { /* Call to constant address. */ |
355 | ci.func = (ASMFunction)(void *)(irf->i); | 360 | ci.func = (ASMFunction)(void *)(intptr_t)(irf->i); |
356 | } else { /* Need a non-argument register for indirect calls. */ | 361 | } else { /* Need a non-argument register for indirect calls. */ |
357 | RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1); | 362 | RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1); |
358 | Reg freg = ra_alloc1(as, func, allow); | 363 | Reg freg = ra_alloc1(as, func, allow); |
@@ -363,16 +368,6 @@ static void asm_callx(ASMState *as, IRIns *ir) | |||
363 | asm_gencall(as, &ci, args); | 368 | asm_gencall(as, &ci, args); |
364 | } | 369 | } |
365 | 370 | ||
366 | static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) | ||
367 | { | ||
368 | const CCallInfo *ci = &lj_ir_callinfo[id]; | ||
369 | IRRef args[2]; | ||
370 | args[0] = ir->op1; | ||
371 | args[1] = ir->op2; | ||
372 | asm_setupresult(as, ir, ci); | ||
373 | asm_gencall(as, ci, args); | ||
374 | } | ||
375 | |||
376 | /* -- Returns ------------------------------------------------------------- */ | 371 | /* -- Returns ------------------------------------------------------------- */ |
377 | 372 | ||
378 | /* Return to lower frame. Guard that it goes to the right spot. */ | 373 | /* Return to lower frame. Guard that it goes to the right spot. */ |
@@ -380,7 +375,7 @@ static void asm_retf(ASMState *as, IRIns *ir) | |||
380 | { | 375 | { |
381 | Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); | 376 | Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); |
382 | void *pc = ir_kptr(IR(ir->op2)); | 377 | void *pc = ir_kptr(IR(ir->op2)); |
383 | int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); | 378 | int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); |
384 | as->topslot -= (BCReg)delta; | 379 | as->topslot -= (BCReg)delta; |
385 | if ((int32_t)as->topslot < 0) as->topslot = 0; | 380 | if ((int32_t)as->topslot < 0) as->topslot = 0; |
386 | irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ | 381 | irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ |
@@ -394,6 +389,7 @@ static void asm_retf(ASMState *as, IRIns *ir) | |||
394 | 389 | ||
395 | /* -- Type conversions ---------------------------------------------------- */ | 390 | /* -- Type conversions ---------------------------------------------------- */ |
396 | 391 | ||
392 | #if !LJ_SOFTFP | ||
397 | static void asm_tointg(ASMState *as, IRIns *ir, Reg left) | 393 | static void asm_tointg(ASMState *as, IRIns *ir, Reg left) |
398 | { | 394 | { |
399 | RegSet allow = RSET_FPR; | 395 | RegSet allow = RSET_FPR; |
@@ -410,8 +406,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left) | |||
410 | emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000); | 406 | emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000); |
411 | emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); | 407 | emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); |
412 | emit_lsptr(as, PPCI_LFS, (fbias & 31), | 408 | emit_lsptr(as, PPCI_LFS, (fbias & 31), |
413 | (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)), | 409 | (void *)&as->J->k32[LJ_K32_2P52_2P31], RSET_GPR); |
414 | RSET_GPR); | ||
415 | emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); | 410 | emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); |
416 | emit_fb(as, PPCI_FCTIWZ, tmp, left); | 411 | emit_fb(as, PPCI_FCTIWZ, tmp, left); |
417 | } | 412 | } |
@@ -427,15 +422,23 @@ static void asm_tobit(ASMState *as, IRIns *ir) | |||
427 | emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); | 422 | emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); |
428 | emit_fab(as, PPCI_FADD, tmp, left, right); | 423 | emit_fab(as, PPCI_FADD, tmp, left, right); |
429 | } | 424 | } |
425 | #endif | ||
430 | 426 | ||
431 | static void asm_conv(ASMState *as, IRIns *ir) | 427 | static void asm_conv(ASMState *as, IRIns *ir) |
432 | { | 428 | { |
433 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); | 429 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); |
430 | #if !LJ_SOFTFP | ||
434 | int stfp = (st == IRT_NUM || st == IRT_FLOAT); | 431 | int stfp = (st == IRT_NUM || st == IRT_FLOAT); |
432 | #endif | ||
435 | IRRef lref = ir->op1; | 433 | IRRef lref = ir->op1; |
436 | lua_assert(irt_type(ir->t) != st); | ||
437 | lua_assert(!(irt_isint64(ir->t) || | 434 | lua_assert(!(irt_isint64(ir->t) || |
438 | (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ | 435 | (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ |
436 | #if LJ_SOFTFP | ||
437 | /* FP conversions are handled by SPLIT. */ | ||
438 | lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); | ||
439 | /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ | ||
440 | #else | ||
441 | lua_assert(irt_type(ir->t) != st); | ||
439 | if (irt_isfp(ir->t)) { | 442 | if (irt_isfp(ir->t)) { |
440 | Reg dest = ra_dest(as, ir, RSET_FPR); | 443 | Reg dest = ra_dest(as, ir, RSET_FPR); |
441 | if (stfp) { /* FP to FP conversion. */ | 444 | if (stfp) { /* FP to FP conversion. */ |
@@ -450,13 +453,11 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
450 | Reg left = ra_alloc1(as, lref, allow); | 453 | Reg left = ra_alloc1(as, lref, allow); |
451 | Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left)); | 454 | Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left)); |
452 | Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); | 455 | Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); |
453 | const float *kbias; | ||
454 | if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest); | 456 | if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest); |
455 | emit_fab(as, PPCI_FSUB, dest, dest, fbias); | 457 | emit_fab(as, PPCI_FSUB, dest, dest, fbias); |
456 | emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); | 458 | emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); |
457 | kbias = (const float *)lj_ir_k64_find(as->J, U64x(59800004,59800000)); | 459 | emit_lsptr(as, PPCI_LFS, (fbias & 31), |
458 | if (st == IRT_U32) kbias++; | 460 | &as->J->k32[st == IRT_U32 ? LJ_K32_2P52 : LJ_K32_2P52_2P31], |
459 | emit_lsptr(as, PPCI_LFS, (fbias & 31), (void *)kbias, | ||
460 | rset_clear(allow, hibias)); | 461 | rset_clear(allow, hibias)); |
461 | emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP, | 462 | emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP, |
462 | RID_SP, SPOFS_TMPLO); | 463 | RID_SP, SPOFS_TMPLO); |
@@ -489,15 +490,16 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
489 | emit_fb(as, PPCI_FCTIWZ, tmp, tmp); | 490 | emit_fb(as, PPCI_FCTIWZ, tmp, tmp); |
490 | emit_fab(as, PPCI_FSUB, tmp, left, tmp); | 491 | emit_fab(as, PPCI_FSUB, tmp, left, tmp); |
491 | emit_lsptr(as, PPCI_LFS, (tmp & 31), | 492 | emit_lsptr(as, PPCI_LFS, (tmp & 31), |
492 | (void *)lj_ir_k64_find(as->J, U64x(4f000000,00000000)), | 493 | (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); |
493 | RSET_GPR); | ||
494 | } else { | 494 | } else { |
495 | emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); | 495 | emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); |
496 | emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); | 496 | emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); |
497 | emit_fb(as, PPCI_FCTIWZ, tmp, left); | 497 | emit_fb(as, PPCI_FCTIWZ, tmp, left); |
498 | } | 498 | } |
499 | } | 499 | } |
500 | } else { | 500 | } else |
501 | #endif | ||
502 | { | ||
501 | Reg dest = ra_dest(as, ir, RSET_GPR); | 503 | Reg dest = ra_dest(as, ir, RSET_GPR); |
502 | if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ | 504 | if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ |
503 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); | 505 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); |
@@ -513,46 +515,50 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
513 | } | 515 | } |
514 | } | 516 | } |
515 | 517 | ||
516 | #if LJ_HASFFI | ||
517 | static void asm_conv64(ASMState *as, IRIns *ir) | ||
518 | { | ||
519 | IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); | ||
520 | IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); | ||
521 | IRCallID id; | ||
522 | const CCallInfo *ci; | ||
523 | IRRef args[2]; | ||
524 | args[0] = ir->op1; | ||
525 | args[1] = (ir-1)->op1; | ||
526 | if (st == IRT_NUM || st == IRT_FLOAT) { | ||
527 | id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64); | ||
528 | ir--; | ||
529 | } else { | ||
530 | id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64); | ||
531 | } | ||
532 | ci = &lj_ir_callinfo[id]; | ||
533 | asm_setupresult(as, ir, ci); | ||
534 | asm_gencall(as, ci, args); | ||
535 | } | ||
536 | #endif | ||
537 | |||
538 | static void asm_strto(ASMState *as, IRIns *ir) | 518 | static void asm_strto(ASMState *as, IRIns *ir) |
539 | { | 519 | { |
540 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; | 520 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; |
541 | IRRef args[2]; | 521 | IRRef args[2]; |
542 | int32_t ofs; | 522 | int32_t ofs = SPOFS_TMP; |
523 | #if LJ_SOFTFP | ||
524 | ra_evictset(as, RSET_SCRATCH); | ||
525 | if (ra_used(ir)) { | ||
526 | if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) && | ||
527 | (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) { | ||
528 | int i; | ||
529 | for (i = 0; i < 2; i++) { | ||
530 | Reg r = (ir+i)->r; | ||
531 | if (ra_hasreg(r)) { | ||
532 | ra_free(as, r); | ||
533 | ra_modified(as, r); | ||
534 | emit_spload(as, ir+i, r, sps_scale((ir+i)->s)); | ||
535 | } | ||
536 | } | ||
537 | ofs = sps_scale(ir->s & ~1); | ||
538 | } else { | ||
539 | Reg rhi = ra_dest(as, ir+1, RSET_GPR); | ||
540 | Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi)); | ||
541 | emit_tai(as, PPCI_LWZ, rhi, RID_SP, ofs); | ||
542 | emit_tai(as, PPCI_LWZ, rlo, RID_SP, ofs+4); | ||
543 | } | ||
544 | } | ||
545 | #else | ||
543 | RegSet drop = RSET_SCRATCH; | 546 | RegSet drop = RSET_SCRATCH; |
544 | if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ | 547 | if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ |
545 | ra_evictset(as, drop); | 548 | ra_evictset(as, drop); |
549 | if (ir->s) ofs = sps_scale(ir->s); | ||
550 | #endif | ||
546 | asm_guardcc(as, CC_EQ); | 551 | asm_guardcc(as, CC_EQ); |
547 | emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */ | 552 | emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */ |
548 | args[0] = ir->op1; /* GCstr *str */ | 553 | args[0] = ir->op1; /* GCstr *str */ |
549 | args[1] = ASMREF_TMP1; /* TValue *n */ | 554 | args[1] = ASMREF_TMP1; /* TValue *n */ |
550 | asm_gencall(as, ci, args); | 555 | asm_gencall(as, ci, args); |
551 | /* Store the result to the spill slot or temp slots. */ | 556 | /* Store the result to the spill slot or temp slots. */ |
552 | ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; | ||
553 | emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs); | 557 | emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs); |
554 | } | 558 | } |
555 | 559 | ||
560 | /* -- Memory references --------------------------------------------------- */ | ||
561 | |||
556 | /* Get pointer to TValue. */ | 562 | /* Get pointer to TValue. */ |
557 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) | 563 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) |
558 | { | 564 | { |
@@ -566,37 +572,19 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) | |||
566 | /* Otherwise use g->tmptv to hold the TValue. */ | 572 | /* Otherwise use g->tmptv to hold the TValue. */ |
567 | RegSet allow = rset_exclude(RSET_GPR, dest); | 573 | RegSet allow = rset_exclude(RSET_GPR, dest); |
568 | Reg type; | 574 | Reg type; |
569 | emit_tai(as, PPCI_ADDI, dest, RID_JGL, offsetof(global_State, tmptv)-32768); | 575 | emit_tai(as, PPCI_ADDI, dest, RID_JGL, (int32_t)offsetof(global_State, tmptv)-32768); |
570 | if (!irt_ispri(ir->t)) { | 576 | if (!irt_ispri(ir->t)) { |
571 | Reg src = ra_alloc1(as, ref, allow); | 577 | Reg src = ra_alloc1(as, ref, allow); |
572 | emit_setgl(as, src, tmptv.gcr); | 578 | emit_setgl(as, src, tmptv.gcr); |
573 | } | 579 | } |
574 | type = ra_allock(as, irt_toitype(ir->t), allow); | 580 | if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) |
581 | type = ra_alloc1(as, ref+1, allow); | ||
582 | else | ||
583 | type = ra_allock(as, irt_toitype(ir->t), allow); | ||
575 | emit_setgl(as, type, tmptv.it); | 584 | emit_setgl(as, type, tmptv.it); |
576 | } | 585 | } |
577 | } | 586 | } |
578 | 587 | ||
579 | static void asm_tostr(ASMState *as, IRIns *ir) | ||
580 | { | ||
581 | IRRef args[2]; | ||
582 | args[0] = ASMREF_L; | ||
583 | as->gcsteps++; | ||
584 | if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) { | ||
585 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; | ||
586 | args[1] = ASMREF_TMP1; /* const lua_Number * */ | ||
587 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
588 | asm_gencall(as, ci, args); | ||
589 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1); | ||
590 | } else { | ||
591 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; | ||
592 | args[1] = ir->op1; /* int32_t k */ | ||
593 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
594 | asm_gencall(as, ci, args); | ||
595 | } | ||
596 | } | ||
597 | |||
598 | /* -- Memory references --------------------------------------------------- */ | ||
599 | |||
600 | static void asm_aref(ASMState *as, IRIns *ir) | 588 | static void asm_aref(ASMState *as, IRIns *ir) |
601 | { | 589 | { |
602 | Reg dest = ra_dest(as, ir, RSET_GPR); | 590 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -636,11 +624,27 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
636 | Reg tisnum = RID_NONE, tmpnum = RID_NONE; | 624 | Reg tisnum = RID_NONE, tmpnum = RID_NONE; |
637 | IRRef refkey = ir->op2; | 625 | IRRef refkey = ir->op2; |
638 | IRIns *irkey = IR(refkey); | 626 | IRIns *irkey = IR(refkey); |
627 | int isk = irref_isk(refkey); | ||
639 | IRType1 kt = irkey->t; | 628 | IRType1 kt = irkey->t; |
640 | uint32_t khash; | 629 | uint32_t khash; |
641 | MCLabel l_end, l_loop, l_next; | 630 | MCLabel l_end, l_loop, l_next; |
642 | 631 | ||
643 | rset_clear(allow, tab); | 632 | rset_clear(allow, tab); |
633 | #if LJ_SOFTFP | ||
634 | if (!isk) { | ||
635 | key = ra_alloc1(as, refkey, allow); | ||
636 | rset_clear(allow, key); | ||
637 | if (irkey[1].o == IR_HIOP) { | ||
638 | if (ra_hasreg((irkey+1)->r)) { | ||
639 | tmpnum = (irkey+1)->r; | ||
640 | ra_noweak(as, tmpnum); | ||
641 | } else { | ||
642 | tmpnum = ra_allocref(as, refkey+1, allow); | ||
643 | } | ||
644 | rset_clear(allow, tmpnum); | ||
645 | } | ||
646 | } | ||
647 | #else | ||
644 | if (irt_isnum(kt)) { | 648 | if (irt_isnum(kt)) { |
645 | key = ra_alloc1(as, refkey, RSET_FPR); | 649 | key = ra_alloc1(as, refkey, RSET_FPR); |
646 | tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); | 650 | tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); |
@@ -650,6 +654,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
650 | key = ra_alloc1(as, refkey, allow); | 654 | key = ra_alloc1(as, refkey, allow); |
651 | rset_clear(allow, key); | 655 | rset_clear(allow, key); |
652 | } | 656 | } |
657 | #endif | ||
653 | tmp2 = ra_scratch(as, allow); | 658 | tmp2 = ra_scratch(as, allow); |
654 | rset_clear(allow, tmp2); | 659 | rset_clear(allow, tmp2); |
655 | 660 | ||
@@ -672,7 +677,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
672 | asm_guardcc(as, CC_EQ); | 677 | asm_guardcc(as, CC_EQ); |
673 | else | 678 | else |
674 | emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); | 679 | emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); |
675 | if (irt_isnum(kt)) { | 680 | if (!LJ_SOFTFP && irt_isnum(kt)) { |
676 | emit_fab(as, PPCI_FCMPU, 0, tmpnum, key); | 681 | emit_fab(as, PPCI_FCMPU, 0, tmpnum, key); |
677 | emit_condbranch(as, PPCI_BC, CC_GE, l_next); | 682 | emit_condbranch(as, PPCI_BC, CC_GE, l_next); |
678 | emit_ab(as, PPCI_CMPLW, tmp1, tisnum); | 683 | emit_ab(as, PPCI_CMPLW, tmp1, tisnum); |
@@ -682,7 +687,10 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
682 | emit_ab(as, PPCI_CMPW, tmp2, key); | 687 | emit_ab(as, PPCI_CMPW, tmp2, key); |
683 | emit_condbranch(as, PPCI_BC, CC_NE, l_next); | 688 | emit_condbranch(as, PPCI_BC, CC_NE, l_next); |
684 | } | 689 | } |
685 | emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t)); | 690 | if (LJ_SOFTFP && ra_hasreg(tmpnum)) |
691 | emit_ab(as, PPCI_CMPW, tmp1, tmpnum); | ||
692 | else | ||
693 | emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t)); | ||
686 | if (!irt_ispri(kt)) | 694 | if (!irt_ispri(kt)) |
687 | emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); | 695 | emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); |
688 | } | 696 | } |
@@ -691,19 +699,19 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
691 | (((char *)as->mcp-(char *)l_loop) & 0xffffu); | 699 | (((char *)as->mcp-(char *)l_loop) & 0xffffu); |
692 | 700 | ||
693 | /* Load main position relative to tab->node into dest. */ | 701 | /* Load main position relative to tab->node into dest. */ |
694 | khash = irref_isk(refkey) ? ir_khash(irkey) : 1; | 702 | khash = isk ? ir_khash(irkey) : 1; |
695 | if (khash == 0) { | 703 | if (khash == 0) { |
696 | emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); | 704 | emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); |
697 | } else { | 705 | } else { |
698 | Reg tmphash = tmp1; | 706 | Reg tmphash = tmp1; |
699 | if (irref_isk(refkey)) | 707 | if (isk) |
700 | tmphash = ra_allock(as, khash, allow); | 708 | tmphash = ra_allock(as, khash, allow); |
701 | emit_tab(as, PPCI_ADD, dest, dest, tmp1); | 709 | emit_tab(as, PPCI_ADD, dest, dest, tmp1); |
702 | emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node)); | 710 | emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node)); |
703 | emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash); | 711 | emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash); |
704 | emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); | 712 | emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); |
705 | emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); | 713 | emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); |
706 | if (irref_isk(refkey)) { | 714 | if (isk) { |
707 | /* Nothing to do. */ | 715 | /* Nothing to do. */ |
708 | } else if (irt_isstr(kt)) { | 716 | } else if (irt_isstr(kt)) { |
709 | emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash)); | 717 | emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash)); |
@@ -713,13 +721,19 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
713 | emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2); | 721 | emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2); |
714 | emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31); | 722 | emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31); |
715 | emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2); | 723 | emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2); |
716 | if (irt_isnum(kt)) { | 724 | if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) { |
725 | #if LJ_SOFTFP | ||
726 | emit_asb(as, PPCI_XOR, tmp2, key, tmp1); | ||
727 | emit_rotlwi(as, dest, tmp1, HASH_ROT1); | ||
728 | emit_tab(as, PPCI_ADD, tmp1, tmpnum, tmpnum); | ||
729 | #else | ||
717 | int32_t ofs = ra_spill(as, irkey); | 730 | int32_t ofs = ra_spill(as, irkey); |
718 | emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1); | 731 | emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1); |
719 | emit_rotlwi(as, dest, tmp1, HASH_ROT1); | 732 | emit_rotlwi(as, dest, tmp1, HASH_ROT1); |
720 | emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1); | 733 | emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1); |
721 | emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4); | 734 | emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4); |
722 | emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs); | 735 | emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs); |
736 | #endif | ||
723 | } else { | 737 | } else { |
724 | emit_asb(as, PPCI_XOR, tmp2, key, tmp1); | 738 | emit_asb(as, PPCI_XOR, tmp2, key, tmp1); |
725 | emit_rotlwi(as, dest, tmp1, HASH_ROT1); | 739 | emit_rotlwi(as, dest, tmp1, HASH_ROT1); |
@@ -773,20 +787,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
773 | } | 787 | } |
774 | } | 788 | } |
775 | 789 | ||
776 | static void asm_newref(ASMState *as, IRIns *ir) | ||
777 | { | ||
778 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; | ||
779 | IRRef args[3]; | ||
780 | if (ir->r == RID_SINK) | ||
781 | return; | ||
782 | args[0] = ASMREF_L; /* lua_State *L */ | ||
783 | args[1] = ir->op1; /* GCtab *t */ | ||
784 | args[2] = ASMREF_TMP1; /* cTValue *key */ | ||
785 | asm_setupresult(as, ir, ci); /* TValue * */ | ||
786 | asm_gencall(as, ci, args); | ||
787 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2); | ||
788 | } | ||
789 | |||
790 | static void asm_uref(ASMState *as, IRIns *ir) | 790 | static void asm_uref(ASMState *as, IRIns *ir) |
791 | { | 791 | { |
792 | Reg dest = ra_dest(as, ir, RSET_GPR); | 792 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -860,8 +860,8 @@ static PPCIns asm_fxloadins(IRIns *ir) | |||
860 | case IRT_U8: return PPCI_LBZ; | 860 | case IRT_U8: return PPCI_LBZ; |
861 | case IRT_I16: return PPCI_LHA; | 861 | case IRT_I16: return PPCI_LHA; |
862 | case IRT_U16: return PPCI_LHZ; | 862 | case IRT_U16: return PPCI_LHZ; |
863 | case IRT_NUM: return PPCI_LFD; | 863 | case IRT_NUM: lua_assert(!LJ_SOFTFP); return PPCI_LFD; |
864 | case IRT_FLOAT: return PPCI_LFS; | 864 | case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_LFS; |
865 | default: return PPCI_LWZ; | 865 | default: return PPCI_LWZ; |
866 | } | 866 | } |
867 | } | 867 | } |
@@ -871,8 +871,8 @@ static PPCIns asm_fxstoreins(IRIns *ir) | |||
871 | switch (irt_type(ir->t)) { | 871 | switch (irt_type(ir->t)) { |
872 | case IRT_I8: case IRT_U8: return PPCI_STB; | 872 | case IRT_I8: case IRT_U8: return PPCI_STB; |
873 | case IRT_I16: case IRT_U16: return PPCI_STH; | 873 | case IRT_I16: case IRT_U16: return PPCI_STH; |
874 | case IRT_NUM: return PPCI_STFD; | 874 | case IRT_NUM: lua_assert(!LJ_SOFTFP); return PPCI_STFD; |
875 | case IRT_FLOAT: return PPCI_STFS; | 875 | case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_STFS; |
876 | default: return PPCI_STW; | 876 | default: return PPCI_STW; |
877 | } | 877 | } |
878 | } | 878 | } |
@@ -880,17 +880,23 @@ static PPCIns asm_fxstoreins(IRIns *ir) | |||
880 | static void asm_fload(ASMState *as, IRIns *ir) | 880 | static void asm_fload(ASMState *as, IRIns *ir) |
881 | { | 881 | { |
882 | Reg dest = ra_dest(as, ir, RSET_GPR); | 882 | Reg dest = ra_dest(as, ir, RSET_GPR); |
883 | Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); | ||
884 | PPCIns pi = asm_fxloadins(ir); | 883 | PPCIns pi = asm_fxloadins(ir); |
884 | Reg idx; | ||
885 | int32_t ofs; | 885 | int32_t ofs; |
886 | if (ir->op2 == IRFL_TAB_ARRAY) { | 886 | if (ir->op1 == REF_NIL) { |
887 | ofs = asm_fuseabase(as, ir->op1); | 887 | idx = RID_JGL; |
888 | if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ | 888 | ofs = (ir->op2 << 2) - 32768; |
889 | emit_tai(as, PPCI_ADDI, dest, idx, ofs); | 889 | } else { |
890 | return; | 890 | idx = ra_alloc1(as, ir->op1, RSET_GPR); |
891 | if (ir->op2 == IRFL_TAB_ARRAY) { | ||
892 | ofs = asm_fuseabase(as, ir->op1); | ||
893 | if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ | ||
894 | emit_tai(as, PPCI_ADDI, dest, idx, ofs); | ||
895 | return; | ||
896 | } | ||
891 | } | 897 | } |
898 | ofs = field_ofs[ir->op2]; | ||
892 | } | 899 | } |
893 | ofs = field_ofs[ir->op2]; | ||
894 | lua_assert(!irt_isi8(ir->t)); | 900 | lua_assert(!irt_isi8(ir->t)); |
895 | emit_tai(as, pi, dest, idx, ofs); | 901 | emit_tai(as, pi, dest, idx, ofs); |
896 | } | 902 | } |
@@ -909,14 +915,15 @@ static void asm_fstore(ASMState *as, IRIns *ir) | |||
909 | 915 | ||
910 | static void asm_xload(ASMState *as, IRIns *ir) | 916 | static void asm_xload(ASMState *as, IRIns *ir) |
911 | { | 917 | { |
912 | Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); | 918 | Reg dest = ra_dest(as, ir, |
919 | (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); | ||
913 | lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); | 920 | lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); |
914 | if (irt_isi8(ir->t)) | 921 | if (irt_isi8(ir->t)) |
915 | emit_as(as, PPCI_EXTSB, dest, dest); | 922 | emit_as(as, PPCI_EXTSB, dest, dest); |
916 | asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); | 923 | asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); |
917 | } | 924 | } |
918 | 925 | ||
919 | static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) | 926 | static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) |
920 | { | 927 | { |
921 | IRIns *irb; | 928 | IRIns *irb; |
922 | if (ir->r == RID_SINK) | 929 | if (ir->r == RID_SINK) |
@@ -927,22 +934,34 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) | |||
927 | Reg src = ra_alloc1(as, irb->op1, RSET_GPR); | 934 | Reg src = ra_alloc1(as, irb->op1, RSET_GPR); |
928 | asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src)); | 935 | asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src)); |
929 | } else { | 936 | } else { |
930 | Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); | 937 | Reg src = ra_alloc1(as, ir->op2, |
938 | (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); | ||
931 | asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, | 939 | asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, |
932 | rset_exclude(RSET_GPR, src), ofs); | 940 | rset_exclude(RSET_GPR, src), ofs); |
933 | } | 941 | } |
934 | } | 942 | } |
935 | 943 | ||
944 | #define asm_xstore(as, ir) asm_xstore_(as, ir, 0) | ||
945 | |||
936 | static void asm_ahuvload(ASMState *as, IRIns *ir) | 946 | static void asm_ahuvload(ASMState *as, IRIns *ir) |
937 | { | 947 | { |
938 | IRType1 t = ir->t; | 948 | IRType1 t = ir->t; |
939 | Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx; | 949 | Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx; |
940 | RegSet allow = RSET_GPR; | 950 | RegSet allow = RSET_GPR; |
941 | int32_t ofs = AHUREF_LSX; | 951 | int32_t ofs = AHUREF_LSX; |
952 | if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) { | ||
953 | t.irt = IRT_NUM; | ||
954 | if (ra_used(ir+1)) { | ||
955 | type = ra_dest(as, ir+1, allow); | ||
956 | rset_clear(allow, type); | ||
957 | } | ||
958 | ofs = 0; | ||
959 | } | ||
942 | if (ra_used(ir)) { | 960 | if (ra_used(ir)) { |
943 | lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); | 961 | lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || |
944 | if (!irt_isnum(t)) ofs = 0; | 962 | irt_isint(ir->t) || irt_isaddr(ir->t)); |
945 | dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); | 963 | if (LJ_SOFTFP || !irt_isnum(t)) ofs = 0; |
964 | dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); | ||
946 | rset_clear(allow, dest); | 965 | rset_clear(allow, dest); |
947 | } | 966 | } |
948 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow); | 967 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow); |
@@ -951,12 +970,13 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) | |||
951 | asm_guardcc(as, CC_GE); | 970 | asm_guardcc(as, CC_GE); |
952 | emit_ab(as, PPCI_CMPLW, type, tisnum); | 971 | emit_ab(as, PPCI_CMPLW, type, tisnum); |
953 | if (ra_hasreg(dest)) { | 972 | if (ra_hasreg(dest)) { |
954 | if (ofs == AHUREF_LSX) { | 973 | if (!LJ_SOFTFP && ofs == AHUREF_LSX) { |
955 | tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, | 974 | tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, |
956 | (idx&255)), (idx>>8))); | 975 | (idx&255)), (idx>>8))); |
957 | emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp); | 976 | emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp); |
958 | } else { | 977 | } else { |
959 | emit_fai(as, PPCI_LFD, dest, idx, ofs); | 978 | emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, idx, |
979 | ofs+4*LJ_SOFTFP); | ||
960 | } | 980 | } |
961 | } | 981 | } |
962 | } else { | 982 | } else { |
@@ -979,7 +999,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir) | |||
979 | int32_t ofs = AHUREF_LSX; | 999 | int32_t ofs = AHUREF_LSX; |
980 | if (ir->r == RID_SINK) | 1000 | if (ir->r == RID_SINK) |
981 | return; | 1001 | return; |
982 | if (irt_isnum(ir->t)) { | 1002 | if (!LJ_SOFTFP && irt_isnum(ir->t)) { |
983 | src = ra_alloc1(as, ir->op2, RSET_FPR); | 1003 | src = ra_alloc1(as, ir->op2, RSET_FPR); |
984 | } else { | 1004 | } else { |
985 | if (!irt_ispri(ir->t)) { | 1005 | if (!irt_ispri(ir->t)) { |
@@ -987,11 +1007,14 @@ static void asm_ahustore(ASMState *as, IRIns *ir) | |||
987 | rset_clear(allow, src); | 1007 | rset_clear(allow, src); |
988 | ofs = 0; | 1008 | ofs = 0; |
989 | } | 1009 | } |
990 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | 1010 | if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) |
1011 | type = ra_alloc1(as, (ir+1)->op2, allow); | ||
1012 | else | ||
1013 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | ||
991 | rset_clear(allow, type); | 1014 | rset_clear(allow, type); |
992 | } | 1015 | } |
993 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow); | 1016 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow); |
994 | if (irt_isnum(ir->t)) { | 1017 | if (!LJ_SOFTFP && irt_isnum(ir->t)) { |
995 | if (ofs == AHUREF_LSX) { | 1018 | if (ofs == AHUREF_LSX) { |
996 | emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP); | 1019 | emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP); |
997 | emit_slwi(as, RID_TMP, (idx>>8), 3); | 1020 | emit_slwi(as, RID_TMP, (idx>>8), 3); |
@@ -1016,21 +1039,33 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1016 | IRType1 t = ir->t; | 1039 | IRType1 t = ir->t; |
1017 | Reg dest = RID_NONE, type = RID_NONE, base; | 1040 | Reg dest = RID_NONE, type = RID_NONE, base; |
1018 | RegSet allow = RSET_GPR; | 1041 | RegSet allow = RSET_GPR; |
1042 | int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); | ||
1043 | if (hiop) | ||
1044 | t.irt = IRT_NUM; | ||
1019 | lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ | 1045 | lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ |
1020 | lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); | 1046 | lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); |
1021 | lua_assert(LJ_DUALNUM || | 1047 | lua_assert(LJ_DUALNUM || |
1022 | !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); | 1048 | !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); |
1049 | #if LJ_SOFTFP | ||
1050 | lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ | ||
1051 | if (hiop && ra_used(ir+1)) { | ||
1052 | type = ra_dest(as, ir+1, allow); | ||
1053 | rset_clear(allow, type); | ||
1054 | } | ||
1055 | #else | ||
1023 | if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { | 1056 | if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { |
1024 | dest = ra_scratch(as, RSET_FPR); | 1057 | dest = ra_scratch(as, RSET_FPR); |
1025 | asm_tointg(as, ir, dest); | 1058 | asm_tointg(as, ir, dest); |
1026 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ | 1059 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ |
1027 | } else if (ra_used(ir)) { | 1060 | } else |
1061 | #endif | ||
1062 | if (ra_used(ir)) { | ||
1028 | lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); | 1063 | lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); |
1029 | dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); | 1064 | dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); |
1030 | rset_clear(allow, dest); | 1065 | rset_clear(allow, dest); |
1031 | base = ra_alloc1(as, REF_BASE, allow); | 1066 | base = ra_alloc1(as, REF_BASE, allow); |
1032 | rset_clear(allow, base); | 1067 | rset_clear(allow, base); |
1033 | if ((ir->op2 & IRSLOAD_CONVERT)) { | 1068 | if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) { |
1034 | if (irt_isint(t)) { | 1069 | if (irt_isint(t)) { |
1035 | emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); | 1070 | emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); |
1036 | dest = ra_scratch(as, RSET_FPR); | 1071 | dest = ra_scratch(as, RSET_FPR); |
@@ -1044,7 +1079,7 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1044 | emit_fab(as, PPCI_FSUB, dest, dest, fbias); | 1079 | emit_fab(as, PPCI_FSUB, dest, dest, fbias); |
1045 | emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); | 1080 | emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); |
1046 | emit_lsptr(as, PPCI_LFS, (fbias & 31), | 1081 | emit_lsptr(as, PPCI_LFS, (fbias & 31), |
1047 | (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)), | 1082 | (void *)&as->J->k32[LJ_K32_2P52_2P31], |
1048 | rset_clear(allow, hibias)); | 1083 | rset_clear(allow, hibias)); |
1049 | emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO); | 1084 | emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO); |
1050 | emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); | 1085 | emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); |
@@ -1062,10 +1097,13 @@ dotypecheck: | |||
1062 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { | 1097 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { |
1063 | Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); | 1098 | Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); |
1064 | asm_guardcc(as, CC_GE); | 1099 | asm_guardcc(as, CC_GE); |
1065 | emit_ab(as, PPCI_CMPLW, RID_TMP, tisnum); | 1100 | #if !LJ_SOFTFP |
1066 | type = RID_TMP; | 1101 | type = RID_TMP; |
1102 | #endif | ||
1103 | emit_ab(as, PPCI_CMPLW, type, tisnum); | ||
1067 | } | 1104 | } |
1068 | if (ra_hasreg(dest)) emit_fai(as, PPCI_LFD, dest, base, ofs-4); | 1105 | if (ra_hasreg(dest)) emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, |
1106 | base, ofs-(LJ_SOFTFP?0:4)); | ||
1069 | } else { | 1107 | } else { |
1070 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { | 1108 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { |
1071 | asm_guardcc(as, CC_NE); | 1109 | asm_guardcc(as, CC_NE); |
@@ -1083,19 +1121,15 @@ dotypecheck: | |||
1083 | static void asm_cnew(ASMState *as, IRIns *ir) | 1121 | static void asm_cnew(ASMState *as, IRIns *ir) |
1084 | { | 1122 | { |
1085 | CTState *cts = ctype_ctsG(J2G(as->J)); | 1123 | CTState *cts = ctype_ctsG(J2G(as->J)); |
1086 | CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; | 1124 | CTypeID id = (CTypeID)IR(ir->op1)->i; |
1087 | CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? | 1125 | CTSize sz; |
1088 | lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; | 1126 | CTInfo info = lj_ctype_info(cts, id, &sz); |
1089 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; | 1127 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; |
1090 | IRRef args[2]; | 1128 | IRRef args[4]; |
1091 | RegSet allow = (RSET_GPR & ~RSET_SCRATCH); | ||
1092 | RegSet drop = RSET_SCRATCH; | 1129 | RegSet drop = RSET_SCRATCH; |
1093 | lua_assert(sz != CTSIZE_INVALID); | 1130 | lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); |
1094 | 1131 | ||
1095 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1096 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1097 | as->gcsteps++; | 1132 | as->gcsteps++; |
1098 | |||
1099 | if (ra_hasreg(ir->r)) | 1133 | if (ra_hasreg(ir->r)) |
1100 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | 1134 | rset_clear(drop, ir->r); /* Dest reg handled below. */ |
1101 | ra_evictset(as, drop); | 1135 | ra_evictset(as, drop); |
@@ -1104,6 +1138,7 @@ static void asm_cnew(ASMState *as, IRIns *ir) | |||
1104 | 1138 | ||
1105 | /* Initialize immutable cdata object. */ | 1139 | /* Initialize immutable cdata object. */ |
1106 | if (ir->o == IR_CNEWI) { | 1140 | if (ir->o == IR_CNEWI) { |
1141 | RegSet allow = (RSET_GPR & ~RSET_SCRATCH); | ||
1107 | int32_t ofs = sizeof(GCcdata); | 1142 | int32_t ofs = sizeof(GCcdata); |
1108 | lua_assert(sz == 4 || sz == 8); | 1143 | lua_assert(sz == 4 || sz == 8); |
1109 | if (sz == 8) { | 1144 | if (sz == 8) { |
@@ -1117,12 +1152,24 @@ static void asm_cnew(ASMState *as, IRIns *ir) | |||
1117 | if (ofs == sizeof(GCcdata)) break; | 1152 | if (ofs == sizeof(GCcdata)) break; |
1118 | ofs -= 4; ir++; | 1153 | ofs -= 4; ir++; |
1119 | } | 1154 | } |
1155 | } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ | ||
1156 | ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; | ||
1157 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1158 | args[1] = ir->op1; /* CTypeID id */ | ||
1159 | args[2] = ir->op2; /* CTSize sz */ | ||
1160 | args[3] = ASMREF_TMP1; /* CTSize align */ | ||
1161 | asm_gencall(as, ci, args); | ||
1162 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); | ||
1163 | return; | ||
1120 | } | 1164 | } |
1165 | |||
1121 | /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ | 1166 | /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ |
1122 | emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); | 1167 | emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); |
1123 | emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); | 1168 | emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); |
1124 | emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA); | 1169 | emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA); |
1125 | emit_ti(as, PPCI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */ | 1170 | emit_ti(as, PPCI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */ |
1171 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1172 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1126 | asm_gencall(as, ci, args); | 1173 | asm_gencall(as, ci, args); |
1127 | ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), | 1174 | ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), |
1128 | ra_releasetmp(as, ASMREF_TMP1)); | 1175 | ra_releasetmp(as, ASMREF_TMP1)); |
@@ -1178,6 +1225,7 @@ static void asm_obar(ASMState *as, IRIns *ir) | |||
1178 | 1225 | ||
1179 | /* -- Arithmetic and logic operations ------------------------------------- */ | 1226 | /* -- Arithmetic and logic operations ------------------------------------- */ |
1180 | 1227 | ||
1228 | #if !LJ_SOFTFP | ||
1181 | static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi) | 1229 | static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi) |
1182 | { | 1230 | { |
1183 | Reg dest = ra_dest(as, ir, RSET_FPR); | 1231 | Reg dest = ra_dest(as, ir, RSET_FPR); |
@@ -1196,31 +1244,26 @@ static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi) | |||
1196 | emit_fb(as, pi, dest, left); | 1244 | emit_fb(as, pi, dest, left); |
1197 | } | 1245 | } |
1198 | 1246 | ||
1199 | static int asm_fpjoin_pow(ASMState *as, IRIns *ir) | 1247 | static void asm_fpmath(ASMState *as, IRIns *ir) |
1200 | { | 1248 | { |
1201 | IRIns *irp = IR(ir->op1); | 1249 | if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) |
1202 | if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { | 1250 | return; |
1203 | IRIns *irpp = IR(irp->op1); | 1251 | if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT)) |
1204 | if (irpp == ir-2 && irpp->o == IR_FPMATH && | 1252 | asm_fpunary(as, ir, PPCI_FSQRT); |
1205 | irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { | 1253 | else |
1206 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; | 1254 | asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); |
1207 | IRRef args[2]; | ||
1208 | args[0] = irpp->op1; | ||
1209 | args[1] = irp->op2; | ||
1210 | asm_setupresult(as, ir, ci); | ||
1211 | asm_gencall(as, ci, args); | ||
1212 | return 1; | ||
1213 | } | ||
1214 | } | ||
1215 | return 0; | ||
1216 | } | 1255 | } |
1256 | #endif | ||
1217 | 1257 | ||
1218 | static void asm_add(ASMState *as, IRIns *ir) | 1258 | static void asm_add(ASMState *as, IRIns *ir) |
1219 | { | 1259 | { |
1260 | #if !LJ_SOFTFP | ||
1220 | if (irt_isnum(ir->t)) { | 1261 | if (irt_isnum(ir->t)) { |
1221 | if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD)) | 1262 | if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD)) |
1222 | asm_fparith(as, ir, PPCI_FADD); | 1263 | asm_fparith(as, ir, PPCI_FADD); |
1223 | } else { | 1264 | } else |
1265 | #endif | ||
1266 | { | ||
1224 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1267 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1225 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | 1268 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); |
1226 | PPCIns pi; | 1269 | PPCIns pi; |
@@ -1259,10 +1302,13 @@ static void asm_add(ASMState *as, IRIns *ir) | |||
1259 | 1302 | ||
1260 | static void asm_sub(ASMState *as, IRIns *ir) | 1303 | static void asm_sub(ASMState *as, IRIns *ir) |
1261 | { | 1304 | { |
1305 | #if !LJ_SOFTFP | ||
1262 | if (irt_isnum(ir->t)) { | 1306 | if (irt_isnum(ir->t)) { |
1263 | if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB)) | 1307 | if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB)) |
1264 | asm_fparith(as, ir, PPCI_FSUB); | 1308 | asm_fparith(as, ir, PPCI_FSUB); |
1265 | } else { | 1309 | } else |
1310 | #endif | ||
1311 | { | ||
1266 | PPCIns pi = PPCI_SUBF; | 1312 | PPCIns pi = PPCI_SUBF; |
1267 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1313 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1268 | Reg left, right; | 1314 | Reg left, right; |
@@ -1288,9 +1334,12 @@ static void asm_sub(ASMState *as, IRIns *ir) | |||
1288 | 1334 | ||
1289 | static void asm_mul(ASMState *as, IRIns *ir) | 1335 | static void asm_mul(ASMState *as, IRIns *ir) |
1290 | { | 1336 | { |
1337 | #if !LJ_SOFTFP | ||
1291 | if (irt_isnum(ir->t)) { | 1338 | if (irt_isnum(ir->t)) { |
1292 | asm_fparith(as, ir, PPCI_FMUL); | 1339 | asm_fparith(as, ir, PPCI_FMUL); |
1293 | } else { | 1340 | } else |
1341 | #endif | ||
1342 | { | ||
1294 | PPCIns pi = PPCI_MULLW; | 1343 | PPCIns pi = PPCI_MULLW; |
1295 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1344 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1296 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | 1345 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); |
@@ -1312,11 +1361,18 @@ static void asm_mul(ASMState *as, IRIns *ir) | |||
1312 | } | 1361 | } |
1313 | } | 1362 | } |
1314 | 1363 | ||
1364 | #define asm_div(as, ir) asm_fparith(as, ir, PPCI_FDIV) | ||
1365 | #define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi) | ||
1366 | #define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) | ||
1367 | |||
1315 | static void asm_neg(ASMState *as, IRIns *ir) | 1368 | static void asm_neg(ASMState *as, IRIns *ir) |
1316 | { | 1369 | { |
1370 | #if !LJ_SOFTFP | ||
1317 | if (irt_isnum(ir->t)) { | 1371 | if (irt_isnum(ir->t)) { |
1318 | asm_fpunary(as, ir, PPCI_FNEG); | 1372 | asm_fpunary(as, ir, PPCI_FNEG); |
1319 | } else { | 1373 | } else |
1374 | #endif | ||
1375 | { | ||
1320 | Reg dest, left; | 1376 | Reg dest, left; |
1321 | PPCIns pi = PPCI_NEG; | 1377 | PPCIns pi = PPCI_NEG; |
1322 | if (as->flagmcp == as->mcp) { | 1378 | if (as->flagmcp == as->mcp) { |
@@ -1330,6 +1386,10 @@ static void asm_neg(ASMState *as, IRIns *ir) | |||
1330 | } | 1386 | } |
1331 | } | 1387 | } |
1332 | 1388 | ||
1389 | #define asm_abs(as, ir) asm_fpunary(as, ir, PPCI_FABS) | ||
1390 | #define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) | ||
1391 | #define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) | ||
1392 | |||
1333 | static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) | 1393 | static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) |
1334 | { | 1394 | { |
1335 | Reg dest, left, right; | 1395 | Reg dest, left, right; |
@@ -1345,6 +1405,10 @@ static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) | |||
1345 | emit_tab(as, pi|PPCF_DOT, dest, left, right); | 1405 | emit_tab(as, pi|PPCF_DOT, dest, left, right); |
1346 | } | 1406 | } |
1347 | 1407 | ||
1408 | #define asm_addov(as, ir) asm_arithov(as, ir, PPCI_ADDO) | ||
1409 | #define asm_subov(as, ir) asm_arithov(as, ir, PPCI_SUBFO) | ||
1410 | #define asm_mulov(as, ir) asm_arithov(as, ir, PPCI_MULLWO) | ||
1411 | |||
1348 | #if LJ_HASFFI | 1412 | #if LJ_HASFFI |
1349 | static void asm_add64(ASMState *as, IRIns *ir) | 1413 | static void asm_add64(ASMState *as, IRIns *ir) |
1350 | { | 1414 | { |
@@ -1424,7 +1488,7 @@ static void asm_neg64(ASMState *as, IRIns *ir) | |||
1424 | } | 1488 | } |
1425 | #endif | 1489 | #endif |
1426 | 1490 | ||
1427 | static void asm_bitnot(ASMState *as, IRIns *ir) | 1491 | static void asm_bnot(ASMState *as, IRIns *ir) |
1428 | { | 1492 | { |
1429 | Reg dest, left, right; | 1493 | Reg dest, left, right; |
1430 | PPCIns pi = PPCI_NOR; | 1494 | PPCIns pi = PPCI_NOR; |
@@ -1451,7 +1515,7 @@ nofuse: | |||
1451 | emit_asb(as, pi, dest, left, right); | 1515 | emit_asb(as, pi, dest, left, right); |
1452 | } | 1516 | } |
1453 | 1517 | ||
1454 | static void asm_bitswap(ASMState *as, IRIns *ir) | 1518 | static void asm_bswap(ASMState *as, IRIns *ir) |
1455 | { | 1519 | { |
1456 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1520 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1457 | IRIns *irx; | 1521 | IRIns *irx; |
@@ -1472,32 +1536,6 @@ static void asm_bitswap(ASMState *as, IRIns *ir) | |||
1472 | } | 1536 | } |
1473 | } | 1537 | } |
1474 | 1538 | ||
1475 | static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) | ||
1476 | { | ||
1477 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1478 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | ||
1479 | if (irref_isk(ir->op2)) { | ||
1480 | int32_t k = IR(ir->op2)->i; | ||
1481 | Reg tmp = left; | ||
1482 | if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) { | ||
1483 | if (!checku16(k)) { | ||
1484 | emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16)); | ||
1485 | if ((k & 0xffff) == 0) return; | ||
1486 | } | ||
1487 | emit_asi(as, pik, dest, left, k); | ||
1488 | return; | ||
1489 | } | ||
1490 | } | ||
1491 | /* May fail due to spills/restores above, but simplifies the logic. */ | ||
1492 | if (as->flagmcp == as->mcp) { | ||
1493 | as->flagmcp = NULL; | ||
1494 | as->mcp++; | ||
1495 | pi |= PPCF_DOT; | ||
1496 | } | ||
1497 | right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); | ||
1498 | emit_asb(as, pi, dest, left, right); | ||
1499 | } | ||
1500 | |||
1501 | /* Fuse BAND with contiguous bitmask and a shift to rlwinm. */ | 1539 | /* Fuse BAND with contiguous bitmask and a shift to rlwinm. */ |
1502 | static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref) | 1540 | static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref) |
1503 | { | 1541 | { |
@@ -1528,7 +1566,7 @@ nofuse: | |||
1528 | *--as->mcp = pi | PPCF_T(left); | 1566 | *--as->mcp = pi | PPCF_T(left); |
1529 | } | 1567 | } |
1530 | 1568 | ||
1531 | static void asm_bitand(ASMState *as, IRIns *ir) | 1569 | static void asm_band(ASMState *as, IRIns *ir) |
1532 | { | 1570 | { |
1533 | Reg dest, left, right; | 1571 | Reg dest, left, right; |
1534 | IRRef lref = ir->op1; | 1572 | IRRef lref = ir->op1; |
@@ -1583,6 +1621,35 @@ static void asm_bitand(ASMState *as, IRIns *ir) | |||
1583 | emit_asb(as, PPCI_AND ^ dot, dest, left, right); | 1621 | emit_asb(as, PPCI_AND ^ dot, dest, left, right); |
1584 | } | 1622 | } |
1585 | 1623 | ||
1624 | static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) | ||
1625 | { | ||
1626 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1627 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | ||
1628 | if (irref_isk(ir->op2)) { | ||
1629 | int32_t k = IR(ir->op2)->i; | ||
1630 | Reg tmp = left; | ||
1631 | if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) { | ||
1632 | if (!checku16(k)) { | ||
1633 | emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16)); | ||
1634 | if ((k & 0xffff) == 0) return; | ||
1635 | } | ||
1636 | emit_asi(as, pik, dest, left, k); | ||
1637 | return; | ||
1638 | } | ||
1639 | } | ||
1640 | /* May fail due to spills/restores above, but simplifies the logic. */ | ||
1641 | if (as->flagmcp == as->mcp) { | ||
1642 | as->flagmcp = NULL; | ||
1643 | as->mcp++; | ||
1644 | pi |= PPCF_DOT; | ||
1645 | } | ||
1646 | right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); | ||
1647 | emit_asb(as, pi, dest, left, right); | ||
1648 | } | ||
1649 | |||
1650 | #define asm_bor(as, ir) asm_bitop(as, ir, PPCI_OR, PPCI_ORI) | ||
1651 | #define asm_bxor(as, ir) asm_bitop(as, ir, PPCI_XOR, PPCI_XORI) | ||
1652 | |||
1586 | static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) | 1653 | static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) |
1587 | { | 1654 | { |
1588 | Reg dest, left; | 1655 | Reg dest, left; |
@@ -1608,9 +1675,48 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) | |||
1608 | } | 1675 | } |
1609 | } | 1676 | } |
1610 | 1677 | ||
1678 | #define asm_bshl(as, ir) asm_bitshift(as, ir, PPCI_SLW, 0) | ||
1679 | #define asm_bshr(as, ir) asm_bitshift(as, ir, PPCI_SRW, 1) | ||
1680 | #define asm_bsar(as, ir) asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI) | ||
1681 | #define asm_brol(as, ir) \ | ||
1682 | asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), \ | ||
1683 | PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)) | ||
1684 | #define asm_bror(as, ir) lua_assert(0) | ||
1685 | |||
1686 | #if LJ_SOFTFP | ||
1687 | static void asm_sfpmin_max(ASMState *as, IRIns *ir) | ||
1688 | { | ||
1689 | CCallInfo ci = lj_ir_callinfo[IRCALL_softfp_cmp]; | ||
1690 | IRRef args[4]; | ||
1691 | MCLabel l_right, l_end; | ||
1692 | Reg desthi = ra_dest(as, ir, RSET_GPR), destlo = ra_dest(as, ir+1, RSET_GPR); | ||
1693 | Reg righthi, lefthi = ra_alloc2(as, ir, RSET_GPR); | ||
1694 | Reg rightlo, leftlo = ra_alloc2(as, ir+1, RSET_GPR); | ||
1695 | PPCCC cond = (IROp)ir->o == IR_MIN ? CC_EQ : CC_NE; | ||
1696 | righthi = (lefthi >> 8); lefthi &= 255; | ||
1697 | rightlo = (leftlo >> 8); leftlo &= 255; | ||
1698 | args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1; | ||
1699 | args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2; | ||
1700 | l_end = emit_label(as); | ||
1701 | if (desthi != righthi) emit_mr(as, desthi, righthi); | ||
1702 | if (destlo != rightlo) emit_mr(as, destlo, rightlo); | ||
1703 | l_right = emit_label(as); | ||
1704 | if (l_end != l_right) emit_jmp(as, l_end); | ||
1705 | if (desthi != lefthi) emit_mr(as, desthi, lefthi); | ||
1706 | if (destlo != leftlo) emit_mr(as, destlo, leftlo); | ||
1707 | if (l_right == as->mcp+1) { | ||
1708 | cond ^= 4; l_right = l_end; ++as->mcp; | ||
1709 | } | ||
1710 | emit_condbranch(as, PPCI_BC, cond, l_right); | ||
1711 | ra_evictset(as, RSET_SCRATCH); | ||
1712 | emit_cmpi(as, RID_RET, 1); | ||
1713 | asm_gencall(as, &ci, args); | ||
1714 | } | ||
1715 | #endif | ||
1716 | |||
1611 | static void asm_min_max(ASMState *as, IRIns *ir, int ismax) | 1717 | static void asm_min_max(ASMState *as, IRIns *ir, int ismax) |
1612 | { | 1718 | { |
1613 | if (irt_isnum(ir->t)) { | 1719 | if (!LJ_SOFTFP && irt_isnum(ir->t)) { |
1614 | Reg dest = ra_dest(as, ir, RSET_FPR); | 1720 | Reg dest = ra_dest(as, ir, RSET_FPR); |
1615 | Reg tmp = dest; | 1721 | Reg tmp = dest; |
1616 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); | 1722 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); |
@@ -1638,6 +1744,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax) | |||
1638 | } | 1744 | } |
1639 | } | 1745 | } |
1640 | 1746 | ||
1747 | #define asm_min(as, ir) asm_min_max(as, ir, 0) | ||
1748 | #define asm_max(as, ir) asm_min_max(as, ir, 1) | ||
1749 | |||
1641 | /* -- Comparisons --------------------------------------------------------- */ | 1750 | /* -- Comparisons --------------------------------------------------------- */ |
1642 | 1751 | ||
1643 | #define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */ | 1752 | #define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */ |
@@ -1695,7 +1804,7 @@ static void asm_intcomp_(ASMState *as, IRRef lref, IRRef rref, Reg cr, PPCCC cc) | |||
1695 | static void asm_comp(ASMState *as, IRIns *ir) | 1804 | static void asm_comp(ASMState *as, IRIns *ir) |
1696 | { | 1805 | { |
1697 | PPCCC cc = asm_compmap[ir->o]; | 1806 | PPCCC cc = asm_compmap[ir->o]; |
1698 | if (irt_isnum(ir->t)) { | 1807 | if (!LJ_SOFTFP && irt_isnum(ir->t)) { |
1699 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); | 1808 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); |
1700 | right = (left >> 8); left &= 255; | 1809 | right = (left >> 8); left &= 255; |
1701 | asm_guardcc(as, (cc >> 4)); | 1810 | asm_guardcc(as, (cc >> 4)); |
@@ -1714,6 +1823,46 @@ static void asm_comp(ASMState *as, IRIns *ir) | |||
1714 | } | 1823 | } |
1715 | } | 1824 | } |
1716 | 1825 | ||
1826 | #define asm_equal(as, ir) asm_comp(as, ir) | ||
1827 | |||
1828 | #if LJ_SOFTFP | ||
1829 | /* SFP comparisons. */ | ||
1830 | static void asm_sfpcomp(ASMState *as, IRIns *ir) | ||
1831 | { | ||
1832 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; | ||
1833 | RegSet drop = RSET_SCRATCH; | ||
1834 | Reg r; | ||
1835 | IRRef args[4]; | ||
1836 | args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1; | ||
1837 | args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2; | ||
1838 | |||
1839 | for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) { | ||
1840 | if (!rset_test(as->freeset, r) && | ||
1841 | regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR]) | ||
1842 | rset_clear(drop, r); | ||
1843 | } | ||
1844 | ra_evictset(as, drop); | ||
1845 | asm_setupresult(as, ir, ci); | ||
1846 | switch ((IROp)ir->o) { | ||
1847 | case IR_ULT: | ||
1848 | asm_guardcc(as, CC_EQ); | ||
1849 | emit_ai(as, PPCI_CMPWI, RID_RET, 0); | ||
1850 | case IR_ULE: | ||
1851 | asm_guardcc(as, CC_EQ); | ||
1852 | emit_ai(as, PPCI_CMPWI, RID_RET, 1); | ||
1853 | break; | ||
1854 | case IR_GE: case IR_GT: | ||
1855 | asm_guardcc(as, CC_EQ); | ||
1856 | emit_ai(as, PPCI_CMPWI, RID_RET, 2); | ||
1857 | default: | ||
1858 | asm_guardcc(as, (asm_compmap[ir->o] & 0xf)); | ||
1859 | emit_ai(as, PPCI_CMPWI, RID_RET, 0); | ||
1860 | break; | ||
1861 | } | ||
1862 | asm_gencall(as, ci, args); | ||
1863 | } | ||
1864 | #endif | ||
1865 | |||
1717 | #if LJ_HASFFI | 1866 | #if LJ_HASFFI |
1718 | /* 64 bit integer comparisons. */ | 1867 | /* 64 bit integer comparisons. */ |
1719 | static void asm_comp64(ASMState *as, IRIns *ir) | 1868 | static void asm_comp64(ASMState *as, IRIns *ir) |
@@ -1743,37 +1892,67 @@ static void asm_comp64(ASMState *as, IRIns *ir) | |||
1743 | /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ | 1892 | /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ |
1744 | static void asm_hiop(ASMState *as, IRIns *ir) | 1893 | static void asm_hiop(ASMState *as, IRIns *ir) |
1745 | { | 1894 | { |
1746 | #if LJ_HASFFI | 1895 | #if LJ_HASFFI || LJ_SOFTFP |
1747 | /* HIOP is marked as a store because it needs its own DCE logic. */ | 1896 | /* HIOP is marked as a store because it needs its own DCE logic. */ |
1748 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ | 1897 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ |
1749 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; | 1898 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; |
1750 | if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ | 1899 | if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ |
1751 | as->curins--; /* Always skip the CONV. */ | 1900 | as->curins--; /* Always skip the CONV. */ |
1901 | #if LJ_HASFFI && !LJ_SOFTFP | ||
1752 | if (usehi || uselo) | 1902 | if (usehi || uselo) |
1753 | asm_conv64(as, ir); | 1903 | asm_conv64(as, ir); |
1754 | return; | 1904 | return; |
1905 | #endif | ||
1755 | } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ | 1906 | } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ |
1756 | as->curins--; /* Always skip the loword comparison. */ | 1907 | as->curins--; /* Always skip the loword comparison. */ |
1908 | #if LJ_SOFTFP | ||
1909 | if (!irt_isint(ir->t)) { | ||
1910 | asm_sfpcomp(as, ir-1); | ||
1911 | return; | ||
1912 | } | ||
1913 | #endif | ||
1914 | #if LJ_HASFFI | ||
1757 | asm_comp64(as, ir); | 1915 | asm_comp64(as, ir); |
1916 | #endif | ||
1917 | return; | ||
1918 | #if LJ_SOFTFP | ||
1919 | } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { | ||
1920 | as->curins--; /* Always skip the loword min/max. */ | ||
1921 | if (uselo || usehi) | ||
1922 | asm_sfpmin_max(as, ir-1); | ||
1758 | return; | 1923 | return; |
1924 | #endif | ||
1759 | } else if ((ir-1)->o == IR_XSTORE) { | 1925 | } else if ((ir-1)->o == IR_XSTORE) { |
1760 | as->curins--; /* Handle both stores here. */ | 1926 | as->curins--; /* Handle both stores here. */ |
1761 | if ((ir-1)->r != RID_SINK) { | 1927 | if ((ir-1)->r != RID_SINK) { |
1762 | asm_xstore(as, ir, 0); | 1928 | asm_xstore_(as, ir, 0); |
1763 | asm_xstore(as, ir-1, 4); | 1929 | asm_xstore_(as, ir-1, 4); |
1764 | } | 1930 | } |
1765 | return; | 1931 | return; |
1766 | } | 1932 | } |
1767 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ | 1933 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ |
1768 | switch ((ir-1)->o) { | 1934 | switch ((ir-1)->o) { |
1935 | #if LJ_HASFFI | ||
1769 | case IR_ADD: as->curins--; asm_add64(as, ir); break; | 1936 | case IR_ADD: as->curins--; asm_add64(as, ir); break; |
1770 | case IR_SUB: as->curins--; asm_sub64(as, ir); break; | 1937 | case IR_SUB: as->curins--; asm_sub64(as, ir); break; |
1771 | case IR_NEG: as->curins--; asm_neg64(as, ir); break; | 1938 | case IR_NEG: as->curins--; asm_neg64(as, ir); break; |
1939 | #endif | ||
1940 | #if LJ_SOFTFP | ||
1941 | case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | ||
1942 | case IR_STRTO: | ||
1943 | if (!uselo) | ||
1944 | ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ | ||
1945 | break; | ||
1946 | #endif | ||
1772 | case IR_CALLN: | 1947 | case IR_CALLN: |
1948 | case IR_CALLS: | ||
1773 | case IR_CALLXS: | 1949 | case IR_CALLXS: |
1774 | if (!uselo) | 1950 | if (!uselo) |
1775 | ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ | 1951 | ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ |
1776 | break; | 1952 | break; |
1953 | #if LJ_SOFTFP | ||
1954 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: | ||
1955 | #endif | ||
1777 | case IR_CNEWI: | 1956 | case IR_CNEWI: |
1778 | /* Nothing to do here. Handled by lo op itself. */ | 1957 | /* Nothing to do here. Handled by lo op itself. */ |
1779 | break; | 1958 | break; |
@@ -1784,6 +1963,17 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
1784 | #endif | 1963 | #endif |
1785 | } | 1964 | } |
1786 | 1965 | ||
1966 | /* -- Profiling ----------------------------------------------------------- */ | ||
1967 | |||
1968 | static void asm_prof(ASMState *as, IRIns *ir) | ||
1969 | { | ||
1970 | UNUSED(ir); | ||
1971 | asm_guardcc(as, CC_NE); | ||
1972 | emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, HOOK_PROFILE); | ||
1973 | emit_lsglptr(as, PPCI_LBZ, RID_TMP, | ||
1974 | (int32_t)offsetof(global_State, hookmask)); | ||
1975 | } | ||
1976 | |||
1787 | /* -- Stack handling ------------------------------------------------------ */ | 1977 | /* -- Stack handling ------------------------------------------------------ */ |
1788 | 1978 | ||
1789 | /* Check Lua stack size for overflow. Use exit handler as fallback. */ | 1979 | /* Check Lua stack size for overflow. Use exit handler as fallback. */ |
@@ -1805,7 +1995,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, | |||
1805 | emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack)); | 1995 | emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack)); |
1806 | if (pbase == RID_TMP) | 1996 | if (pbase == RID_TMP) |
1807 | emit_getgl(as, RID_TMP, jit_base); | 1997 | emit_getgl(as, RID_TMP, jit_base); |
1808 | emit_getgl(as, tmp, jit_L); | 1998 | emit_getgl(as, tmp, cur_L); |
1809 | if (allow == RSET_EMPTY) /* Spill temp. register. */ | 1999 | if (allow == RSET_EMPTY) /* Spill temp. register. */ |
1810 | emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW); | 2000 | emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW); |
1811 | } | 2001 | } |
@@ -1826,8 +2016,19 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
1826 | if ((sn & SNAP_NORESTORE)) | 2016 | if ((sn & SNAP_NORESTORE)) |
1827 | continue; | 2017 | continue; |
1828 | if (irt_isnum(ir->t)) { | 2018 | if (irt_isnum(ir->t)) { |
2019 | #if LJ_SOFTFP | ||
2020 | Reg tmp; | ||
2021 | RegSet allow = rset_exclude(RSET_GPR, RID_BASE); | ||
2022 | lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ | ||
2023 | tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow); | ||
2024 | emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?4:0)); | ||
2025 | if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1); | ||
2026 | tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow); | ||
2027 | emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?0:4)); | ||
2028 | #else | ||
1829 | Reg src = ra_alloc1(as, ref, RSET_FPR); | 2029 | Reg src = ra_alloc1(as, ref, RSET_FPR); |
1830 | emit_fai(as, PPCI_STFD, src, RID_BASE, ofs); | 2030 | emit_fai(as, PPCI_STFD, src, RID_BASE, ofs); |
2031 | #endif | ||
1831 | } else { | 2032 | } else { |
1832 | Reg type; | 2033 | Reg type; |
1833 | RegSet allow = rset_exclude(RSET_GPR, RID_BASE); | 2034 | RegSet allow = rset_exclude(RSET_GPR, RID_BASE); |
@@ -1840,6 +2041,10 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
1840 | if ((sn & (SNAP_CONT|SNAP_FRAME))) { | 2041 | if ((sn & (SNAP_CONT|SNAP_FRAME))) { |
1841 | if (s == 0) continue; /* Do not overwrite link to previous frame. */ | 2042 | if (s == 0) continue; /* Do not overwrite link to previous frame. */ |
1842 | type = ra_allock(as, (int32_t)(*flinks--), allow); | 2043 | type = ra_allock(as, (int32_t)(*flinks--), allow); |
2044 | #if LJ_SOFTFP | ||
2045 | } else if ((sn & SNAP_SOFTFPNUM)) { | ||
2046 | type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE)); | ||
2047 | #endif | ||
1843 | } else { | 2048 | } else { |
1844 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | 2049 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); |
1845 | } | 2050 | } |
@@ -1966,147 +2171,25 @@ static void asm_tail_prep(ASMState *as) | |||
1966 | } | 2171 | } |
1967 | } | 2172 | } |
1968 | 2173 | ||
1969 | /* -- Instruction dispatch ------------------------------------------------ */ | ||
1970 | |||
1971 | /* Assemble a single instruction. */ | ||
1972 | static void asm_ir(ASMState *as, IRIns *ir) | ||
1973 | { | ||
1974 | switch ((IROp)ir->o) { | ||
1975 | /* Miscellaneous ops. */ | ||
1976 | case IR_LOOP: asm_loop(as); break; | ||
1977 | case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; | ||
1978 | case IR_USE: | ||
1979 | ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; | ||
1980 | case IR_PHI: asm_phi(as, ir); break; | ||
1981 | case IR_HIOP: asm_hiop(as, ir); break; | ||
1982 | case IR_GCSTEP: asm_gcstep(as, ir); break; | ||
1983 | |||
1984 | /* Guarded assertions. */ | ||
1985 | case IR_EQ: case IR_NE: | ||
1986 | if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) { | ||
1987 | as->curins--; | ||
1988 | asm_href(as, ir-1, (IROp)ir->o); | ||
1989 | break; | ||
1990 | } | ||
1991 | /* fallthrough */ | ||
1992 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: | ||
1993 | case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: | ||
1994 | case IR_ABC: | ||
1995 | asm_comp(as, ir); | ||
1996 | break; | ||
1997 | |||
1998 | case IR_RETF: asm_retf(as, ir); break; | ||
1999 | |||
2000 | /* Bit ops. */ | ||
2001 | case IR_BNOT: asm_bitnot(as, ir); break; | ||
2002 | case IR_BSWAP: asm_bitswap(as, ir); break; | ||
2003 | |||
2004 | case IR_BAND: asm_bitand(as, ir); break; | ||
2005 | case IR_BOR: asm_bitop(as, ir, PPCI_OR, PPCI_ORI); break; | ||
2006 | case IR_BXOR: asm_bitop(as, ir, PPCI_XOR, PPCI_XORI); break; | ||
2007 | |||
2008 | case IR_BSHL: asm_bitshift(as, ir, PPCI_SLW, 0); break; | ||
2009 | case IR_BSHR: asm_bitshift(as, ir, PPCI_SRW, 1); break; | ||
2010 | case IR_BSAR: asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI); break; | ||
2011 | case IR_BROL: asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), | ||
2012 | PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)); break; | ||
2013 | case IR_BROR: lua_assert(0); break; | ||
2014 | |||
2015 | /* Arithmetic ops. */ | ||
2016 | case IR_ADD: asm_add(as, ir); break; | ||
2017 | case IR_SUB: asm_sub(as, ir); break; | ||
2018 | case IR_MUL: asm_mul(as, ir); break; | ||
2019 | case IR_DIV: asm_fparith(as, ir, PPCI_FDIV); break; | ||
2020 | case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break; | ||
2021 | case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break; | ||
2022 | case IR_NEG: asm_neg(as, ir); break; | ||
2023 | |||
2024 | case IR_ABS: asm_fpunary(as, ir, PPCI_FABS); break; | ||
2025 | case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break; | ||
2026 | case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break; | ||
2027 | case IR_MIN: asm_min_max(as, ir, 0); break; | ||
2028 | case IR_MAX: asm_min_max(as, ir, 1); break; | ||
2029 | case IR_FPMATH: | ||
2030 | if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) | ||
2031 | break; | ||
2032 | if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT)) | ||
2033 | asm_fpunary(as, ir, PPCI_FSQRT); | ||
2034 | else | ||
2035 | asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); | ||
2036 | break; | ||
2037 | |||
2038 | /* Overflow-checking arithmetic ops. */ | ||
2039 | case IR_ADDOV: asm_arithov(as, ir, PPCI_ADDO); break; | ||
2040 | case IR_SUBOV: asm_arithov(as, ir, PPCI_SUBFO); break; | ||
2041 | case IR_MULOV: asm_arithov(as, ir, PPCI_MULLWO); break; | ||
2042 | |||
2043 | /* Memory references. */ | ||
2044 | case IR_AREF: asm_aref(as, ir); break; | ||
2045 | case IR_HREF: asm_href(as, ir, 0); break; | ||
2046 | case IR_HREFK: asm_hrefk(as, ir); break; | ||
2047 | case IR_NEWREF: asm_newref(as, ir); break; | ||
2048 | case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; | ||
2049 | case IR_FREF: asm_fref(as, ir); break; | ||
2050 | case IR_STRREF: asm_strref(as, ir); break; | ||
2051 | |||
2052 | /* Loads and stores. */ | ||
2053 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | ||
2054 | asm_ahuvload(as, ir); | ||
2055 | break; | ||
2056 | case IR_FLOAD: asm_fload(as, ir); break; | ||
2057 | case IR_XLOAD: asm_xload(as, ir); break; | ||
2058 | case IR_SLOAD: asm_sload(as, ir); break; | ||
2059 | |||
2060 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; | ||
2061 | case IR_FSTORE: asm_fstore(as, ir); break; | ||
2062 | case IR_XSTORE: asm_xstore(as, ir, 0); break; | ||
2063 | |||
2064 | /* Allocations. */ | ||
2065 | case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; | ||
2066 | case IR_TNEW: asm_tnew(as, ir); break; | ||
2067 | case IR_TDUP: asm_tdup(as, ir); break; | ||
2068 | case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; | ||
2069 | |||
2070 | /* Write barriers. */ | ||
2071 | case IR_TBAR: asm_tbar(as, ir); break; | ||
2072 | case IR_OBAR: asm_obar(as, ir); break; | ||
2073 | |||
2074 | /* Type conversions. */ | ||
2075 | case IR_CONV: asm_conv(as, ir); break; | ||
2076 | case IR_TOBIT: asm_tobit(as, ir); break; | ||
2077 | case IR_TOSTR: asm_tostr(as, ir); break; | ||
2078 | case IR_STRTO: asm_strto(as, ir); break; | ||
2079 | |||
2080 | /* Calls. */ | ||
2081 | case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; | ||
2082 | case IR_CALLXS: asm_callx(as, ir); break; | ||
2083 | case IR_CARG: break; | ||
2084 | |||
2085 | default: | ||
2086 | setintV(&as->J->errinfo, ir->o); | ||
2087 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
2088 | break; | ||
2089 | } | ||
2090 | } | ||
2091 | |||
2092 | /* -- Trace setup --------------------------------------------------------- */ | 2174 | /* -- Trace setup --------------------------------------------------------- */ |
2093 | 2175 | ||
2094 | /* Ensure there are enough stack slots for call arguments. */ | 2176 | /* Ensure there are enough stack slots for call arguments. */ |
2095 | static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) | 2177 | static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) |
2096 | { | 2178 | { |
2097 | IRRef args[CCI_NARGS_MAX*2]; | 2179 | IRRef args[CCI_NARGS_MAX*2]; |
2098 | uint32_t i, nargs = (int)CCI_NARGS(ci); | 2180 | uint32_t i, nargs = CCI_XNARGS(ci); |
2099 | int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; | 2181 | int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; |
2100 | asm_collectargs(as, ir, ci, args); | 2182 | asm_collectargs(as, ir, ci, args); |
2101 | for (i = 0; i < nargs; i++) | 2183 | for (i = 0; i < nargs; i++) |
2102 | if (args[i] && irt_isfp(IR(args[i])->t)) { | 2184 | if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) { |
2103 | if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1; | 2185 | if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1; |
2104 | } else { | 2186 | } else { |
2105 | if (ngpr > 0) ngpr--; else nslots++; | 2187 | if (ngpr > 0) ngpr--; else nslots++; |
2106 | } | 2188 | } |
2107 | if (nslots > as->evenspill) /* Leave room for args in stack slots. */ | 2189 | if (nslots > as->evenspill) /* Leave room for args in stack slots. */ |
2108 | as->evenspill = nslots; | 2190 | as->evenspill = nslots; |
2109 | return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET); | 2191 | return (!LJ_SOFTFP && irt_isfp(ir->t)) ? REGSP_HINT(RID_FPRET) : |
2192 | REGSP_HINT(RID_RET); | ||
2110 | } | 2193 | } |
2111 | 2194 | ||
2112 | static void asm_setup_target(ASMState *as) | 2195 | static void asm_setup_target(ASMState *as) |
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 2c38d1ec..21b510ca 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h | |||
@@ -21,12 +21,14 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) | |||
21 | } | 21 | } |
22 | /* Push the high byte of the exitno for each exit stub group. */ | 22 | /* Push the high byte of the exitno for each exit stub group. */ |
23 | *mxp++ = XI_PUSHi8; *mxp++ = (MCode)((group*EXITSTUBS_PER_GROUP)>>8); | 23 | *mxp++ = XI_PUSHi8; *mxp++ = (MCode)((group*EXITSTUBS_PER_GROUP)>>8); |
24 | #if !LJ_GC64 | ||
24 | /* Store DISPATCH at original stack slot 0. Account for the two push ops. */ | 25 | /* Store DISPATCH at original stack slot 0. Account for the two push ops. */ |
25 | *mxp++ = XI_MOVmi; | 26 | *mxp++ = XI_MOVmi; |
26 | *mxp++ = MODRM(XM_OFS8, 0, RID_ESP); | 27 | *mxp++ = MODRM(XM_OFS8, 0, RID_ESP); |
27 | *mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP); | 28 | *mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP); |
28 | *mxp++ = 2*sizeof(void *); | 29 | *mxp++ = 2*sizeof(void *); |
29 | *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4; | 30 | *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4; |
31 | #endif | ||
30 | /* Jump to exit handler which fills in the ExitState. */ | 32 | /* Jump to exit handler which fills in the ExitState. */ |
31 | *mxp++ = XI_JMP; mxp += 4; | 33 | *mxp++ = XI_JMP; mxp += 4; |
32 | *((int32_t *)(mxp-4)) = jmprel(mxp, (MCode *)(void *)lj_vm_exit_handler); | 34 | *((int32_t *)(mxp-4)) = jmprel(mxp, (MCode *)(void *)lj_vm_exit_handler); |
@@ -62,10 +64,14 @@ static void asm_guardcc(ASMState *as, int cc) | |||
62 | target = p; | 64 | target = p; |
63 | cc ^= 1; | 65 | cc ^= 1; |
64 | if (as->realign) { | 66 | if (as->realign) { |
67 | if (LJ_GC64 && LJ_UNLIKELY(as->mrm.base == RID_RIP)) | ||
68 | as->mrm.ofs += 2; /* Fixup RIP offset for pending fused load. */ | ||
65 | emit_sjcc(as, cc, target); | 69 | emit_sjcc(as, cc, target); |
66 | return; | 70 | return; |
67 | } | 71 | } |
68 | } | 72 | } |
73 | if (LJ_GC64 && LJ_UNLIKELY(as->mrm.base == RID_RIP)) | ||
74 | as->mrm.ofs += 6; /* Fixup RIP offset for pending fused load. */ | ||
69 | emit_jcc(as, cc, target); | 75 | emit_jcc(as, cc, target); |
70 | } | 76 | } |
71 | 77 | ||
@@ -79,6 +85,15 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) | |||
79 | { | 85 | { |
80 | if (irref_isk(ref)) { | 86 | if (irref_isk(ref)) { |
81 | IRIns *ir = IR(ref); | 87 | IRIns *ir = IR(ref); |
88 | #if LJ_GC64 | ||
89 | if (ir->o == IR_KNULL || !irt_is64(ir->t)) { | ||
90 | *k = ir->i; | ||
91 | return 1; | ||
92 | } else if (checki32((int64_t)ir_k64(ir)->u64)) { | ||
93 | *k = (int32_t)ir_k64(ir)->u64; | ||
94 | return 1; | ||
95 | } | ||
96 | #else | ||
82 | if (ir->o != IR_KINT64) { | 97 | if (ir->o != IR_KINT64) { |
83 | *k = ir->i; | 98 | *k = ir->i; |
84 | return 1; | 99 | return 1; |
@@ -86,6 +101,7 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) | |||
86 | *k = (int32_t)ir_kint64(ir)->u64; | 101 | *k = (int32_t)ir_kint64(ir)->u64; |
87 | return 1; | 102 | return 1; |
88 | } | 103 | } |
104 | #endif | ||
89 | } | 105 | } |
90 | return 0; | 106 | return 0; |
91 | } | 107 | } |
@@ -185,9 +201,19 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow) | |||
185 | if (irref_isk(ir->op1)) { | 201 | if (irref_isk(ir->op1)) { |
186 | GCfunc *fn = ir_kfunc(IR(ir->op1)); | 202 | GCfunc *fn = ir_kfunc(IR(ir->op1)); |
187 | GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv; | 203 | GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv; |
204 | #if LJ_GC64 | ||
205 | int64_t ofs = dispofs(as, &uv->tv); | ||
206 | if (checki32(ofs) && checki32(ofs+4)) { | ||
207 | as->mrm.ofs = (int32_t)ofs; | ||
208 | as->mrm.base = RID_DISPATCH; | ||
209 | as->mrm.idx = RID_NONE; | ||
210 | return; | ||
211 | } | ||
212 | #else | ||
188 | as->mrm.ofs = ptr2addr(&uv->tv); | 213 | as->mrm.ofs = ptr2addr(&uv->tv); |
189 | as->mrm.base = as->mrm.idx = RID_NONE; | 214 | as->mrm.base = as->mrm.idx = RID_NONE; |
190 | return; | 215 | return; |
216 | #endif | ||
191 | } | 217 | } |
192 | break; | 218 | break; |
193 | default: | 219 | default: |
@@ -205,14 +231,40 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow) | |||
205 | static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow) | 231 | static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow) |
206 | { | 232 | { |
207 | lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF); | 233 | lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF); |
208 | as->mrm.ofs = field_ofs[ir->op2]; | ||
209 | as->mrm.idx = RID_NONE; | 234 | as->mrm.idx = RID_NONE; |
235 | if (ir->op1 == REF_NIL) { | ||
236 | #if LJ_GC64 | ||
237 | as->mrm.ofs = (int32_t)(ir->op2 << 2) - GG_OFS(dispatch); | ||
238 | as->mrm.base = RID_DISPATCH; | ||
239 | #else | ||
240 | as->mrm.ofs = (int32_t)(ir->op2 << 2) + ptr2addr(J2GG(as->J)); | ||
241 | as->mrm.base = RID_NONE; | ||
242 | #endif | ||
243 | return; | ||
244 | } | ||
245 | as->mrm.ofs = field_ofs[ir->op2]; | ||
210 | if (irref_isk(ir->op1)) { | 246 | if (irref_isk(ir->op1)) { |
211 | as->mrm.ofs += IR(ir->op1)->i; | 247 | IRIns *op1 = IR(ir->op1); |
248 | #if LJ_GC64 | ||
249 | if (ir->op1 == REF_NIL) { | ||
250 | as->mrm.ofs -= GG_OFS(dispatch); | ||
251 | as->mrm.base = RID_DISPATCH; | ||
252 | return; | ||
253 | } else if (op1->o == IR_KPTR || op1->o == IR_KKPTR) { | ||
254 | intptr_t ofs = dispofs(as, ir_kptr(op1)); | ||
255 | if (checki32(as->mrm.ofs + ofs)) { | ||
256 | as->mrm.ofs += (int32_t)ofs; | ||
257 | as->mrm.base = RID_DISPATCH; | ||
258 | return; | ||
259 | } | ||
260 | } | ||
261 | #else | ||
262 | as->mrm.ofs += op1->i; | ||
212 | as->mrm.base = RID_NONE; | 263 | as->mrm.base = RID_NONE; |
213 | } else { | 264 | return; |
214 | as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow); | 265 | #endif |
215 | } | 266 | } |
267 | as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow); | ||
216 | } | 268 | } |
217 | 269 | ||
218 | /* Fuse string reference into memory operand. */ | 270 | /* Fuse string reference into memory operand. */ |
@@ -223,7 +275,7 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow) | |||
223 | as->mrm.base = as->mrm.idx = RID_NONE; | 275 | as->mrm.base = as->mrm.idx = RID_NONE; |
224 | as->mrm.scale = XM_SCALE1; | 276 | as->mrm.scale = XM_SCALE1; |
225 | as->mrm.ofs = sizeof(GCstr); | 277 | as->mrm.ofs = sizeof(GCstr); |
226 | if (irref_isk(ir->op1)) { | 278 | if (!LJ_GC64 && irref_isk(ir->op1)) { |
227 | as->mrm.ofs += IR(ir->op1)->i; | 279 | as->mrm.ofs += IR(ir->op1)->i; |
228 | } else { | 280 | } else { |
229 | Reg r = ra_alloc1(as, ir->op1, allow); | 281 | Reg r = ra_alloc1(as, ir->op1, allow); |
@@ -255,10 +307,20 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow) | |||
255 | IRIns *ir = IR(ref); | 307 | IRIns *ir = IR(ref); |
256 | as->mrm.idx = RID_NONE; | 308 | as->mrm.idx = RID_NONE; |
257 | if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { | 309 | if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { |
310 | #if LJ_GC64 | ||
311 | intptr_t ofs = dispofs(as, ir_kptr(ir)); | ||
312 | if (checki32(ofs)) { | ||
313 | as->mrm.ofs = (int32_t)ofs; | ||
314 | as->mrm.base = RID_DISPATCH; | ||
315 | return; | ||
316 | } | ||
317 | } if (0) { | ||
318 | #else | ||
258 | as->mrm.ofs = ir->i; | 319 | as->mrm.ofs = ir->i; |
259 | as->mrm.base = RID_NONE; | 320 | as->mrm.base = RID_NONE; |
260 | } else if (ir->o == IR_STRREF) { | 321 | } else if (ir->o == IR_STRREF) { |
261 | asm_fusestrref(as, ir, allow); | 322 | asm_fusestrref(as, ir, allow); |
323 | #endif | ||
262 | } else { | 324 | } else { |
263 | as->mrm.ofs = 0; | 325 | as->mrm.ofs = 0; |
264 | if (canfuse(as, ir) && ir->o == IR_ADD && ra_noreg(ir->r)) { | 326 | if (canfuse(as, ir) && ir->o == IR_ADD && ra_noreg(ir->r)) { |
@@ -301,7 +363,46 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow) | |||
301 | } | 363 | } |
302 | } | 364 | } |
303 | 365 | ||
304 | /* Fuse load into memory operand. */ | 366 | /* Fuse load of 64 bit IR constant into memory operand. */ |
367 | static Reg asm_fuseloadk64(ASMState *as, IRIns *ir) | ||
368 | { | ||
369 | const uint64_t *k = &ir_k64(ir)->u64; | ||
370 | if (!LJ_GC64 || checki32((intptr_t)k)) { | ||
371 | as->mrm.ofs = ptr2addr(k); | ||
372 | as->mrm.base = RID_NONE; | ||
373 | #if LJ_GC64 | ||
374 | } else if (checki32(dispofs(as, k))) { | ||
375 | as->mrm.ofs = (int32_t)dispofs(as, k); | ||
376 | as->mrm.base = RID_DISPATCH; | ||
377 | } else if (checki32(mcpofs(as, k)) && checki32(mcpofs(as, k+1)) && | ||
378 | checki32(mctopofs(as, k)) && checki32(mctopofs(as, k+1))) { | ||
379 | as->mrm.ofs = (int32_t)mcpofs(as, k); | ||
380 | as->mrm.base = RID_RIP; | ||
381 | } else { | ||
382 | if (ir->i) { | ||
383 | lua_assert(*k == *(uint64_t*)(as->mctop - ir->i)); | ||
384 | } else { | ||
385 | while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3; | ||
386 | *(uint64_t*)as->mcbot = *k; | ||
387 | ir->i = (int32_t)(as->mctop - as->mcbot); | ||
388 | as->mcbot += 8; | ||
389 | as->mclim = as->mcbot + MCLIM_REDZONE; | ||
390 | lj_mcode_commitbot(as->J, as->mcbot); | ||
391 | } | ||
392 | as->mrm.ofs = (int32_t)mcpofs(as, as->mctop - ir->i); | ||
393 | as->mrm.base = RID_RIP; | ||
394 | #endif | ||
395 | } | ||
396 | as->mrm.idx = RID_NONE; | ||
397 | return RID_MRM; | ||
398 | } | ||
399 | |||
400 | /* Fuse load into memory operand. | ||
401 | ** | ||
402 | ** Important caveat: this may emit RIP-relative loads! So don't place any | ||
403 | ** code emitters between this function and the use of its result. | ||
404 | ** The only permitted exception is asm_guardcc(). | ||
405 | */ | ||
305 | static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | 406 | static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) |
306 | { | 407 | { |
307 | IRIns *ir = IR(ref); | 408 | IRIns *ir = IR(ref); |
@@ -320,26 +421,35 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | |||
320 | if (ir->o == IR_KNUM) { | 421 | if (ir->o == IR_KNUM) { |
321 | RegSet avail = as->freeset & ~as->modset & RSET_FPR; | 422 | RegSet avail = as->freeset & ~as->modset & RSET_FPR; |
322 | lua_assert(allow != RSET_EMPTY); | 423 | lua_assert(allow != RSET_EMPTY); |
323 | if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */ | 424 | if (!(avail & (avail-1))) /* Fuse if less than two regs available. */ |
324 | as->mrm.ofs = ptr2addr(ir_knum(ir)); | 425 | return asm_fuseloadk64(as, ir); |
325 | as->mrm.base = as->mrm.idx = RID_NONE; | ||
326 | return RID_MRM; | ||
327 | } | ||
328 | } else if (ref == REF_BASE || ir->o == IR_KINT64) { | 426 | } else if (ref == REF_BASE || ir->o == IR_KINT64) { |
329 | RegSet avail = as->freeset & ~as->modset & RSET_GPR; | 427 | RegSet avail = as->freeset & ~as->modset & RSET_GPR; |
330 | lua_assert(allow != RSET_EMPTY); | 428 | lua_assert(allow != RSET_EMPTY); |
331 | if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */ | 429 | if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */ |
332 | as->mrm.ofs = ptr2addr(ref == REF_BASE ? (void *)&J2G(as->J)->jit_base : (void *)ir_kint64(ir)); | 430 | if (ref == REF_BASE) { |
333 | as->mrm.base = as->mrm.idx = RID_NONE; | 431 | #if LJ_GC64 |
334 | return RID_MRM; | 432 | as->mrm.ofs = (int32_t)dispofs(as, &J2G(as->J)->jit_base); |
433 | as->mrm.base = RID_DISPATCH; | ||
434 | #else | ||
435 | as->mrm.ofs = ptr2addr(&J2G(as->J)->jit_base); | ||
436 | as->mrm.base = RID_NONE; | ||
437 | #endif | ||
438 | as->mrm.idx = RID_NONE; | ||
439 | return RID_MRM; | ||
440 | } else { | ||
441 | return asm_fuseloadk64(as, ir); | ||
442 | } | ||
335 | } | 443 | } |
336 | } else if (mayfuse(as, ref)) { | 444 | } else if (mayfuse(as, ref)) { |
337 | RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR; | 445 | RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR; |
338 | if (ir->o == IR_SLOAD) { | 446 | if (ir->o == IR_SLOAD) { |
339 | if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) && | 447 | if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) && |
340 | noconflict(as, ref, IR_RETF, 0)) { | 448 | noconflict(as, ref, IR_RETF, 0) && |
449 | !(LJ_GC64 && irt_isaddr(ir->t))) { | ||
341 | as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); | 450 | as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); |
342 | as->mrm.ofs = 8*((int32_t)ir->op1-1) + ((ir->op2&IRSLOAD_FRAME)?4:0); | 451 | as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) + |
452 | (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0); | ||
343 | as->mrm.idx = RID_NONE; | 453 | as->mrm.idx = RID_NONE; |
344 | return RID_MRM; | 454 | return RID_MRM; |
345 | } | 455 | } |
@@ -351,7 +461,8 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | |||
351 | return RID_MRM; | 461 | return RID_MRM; |
352 | } | 462 | } |
353 | } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) { | 463 | } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) { |
354 | if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0)) { | 464 | if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0) && |
465 | !(LJ_GC64 && irt_isaddr(ir->t))) { | ||
355 | asm_fuseahuref(as, ir->op1, xallow); | 466 | asm_fuseahuref(as, ir->op1, xallow); |
356 | return RID_MRM; | 467 | return RID_MRM; |
357 | } | 468 | } |
@@ -364,11 +475,15 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | |||
364 | asm_fusexref(as, ir->op1, xallow); | 475 | asm_fusexref(as, ir->op1, xallow); |
365 | return RID_MRM; | 476 | return RID_MRM; |
366 | } | 477 | } |
367 | } else if (ir->o == IR_VLOAD) { | 478 | } else if (ir->o == IR_VLOAD && !(LJ_GC64 && irt_isaddr(ir->t))) { |
368 | asm_fuseahuref(as, ir->op1, xallow); | 479 | asm_fuseahuref(as, ir->op1, xallow); |
369 | return RID_MRM; | 480 | return RID_MRM; |
370 | } | 481 | } |
371 | } | 482 | } |
483 | if (ir->o == IR_FLOAD && ir->op1 == REF_NIL) { | ||
484 | asm_fusefref(as, ir, RSET_EMPTY); | ||
485 | return RID_MRM; | ||
486 | } | ||
372 | if (!(as->freeset & allow) && !emit_canremat(ref) && | 487 | if (!(as->freeset & allow) && !emit_canremat(ref) && |
373 | (allow == RSET_EMPTY || ra_hasspill(ir->s) || iscrossref(as, ref))) | 488 | (allow == RSET_EMPTY || ra_hasspill(ir->s) || iscrossref(as, ref))) |
374 | goto fusespill; | 489 | goto fusespill; |
@@ -392,7 +507,7 @@ static Reg asm_fuseloadm(ASMState *as, IRRef ref, RegSet allow, int is64) | |||
392 | /* Count the required number of stack slots for a call. */ | 507 | /* Count the required number of stack slots for a call. */ |
393 | static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args) | 508 | static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args) |
394 | { | 509 | { |
395 | uint32_t i, nargs = CCI_NARGS(ci); | 510 | uint32_t i, nargs = CCI_XNARGS(ci); |
396 | int nslots = 0; | 511 | int nslots = 0; |
397 | #if LJ_64 | 512 | #if LJ_64 |
398 | if (LJ_ABI_WIN) { | 513 | if (LJ_ABI_WIN) { |
@@ -425,7 +540,7 @@ static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
425 | /* Generate a call to a C function. */ | 540 | /* Generate a call to a C function. */ |
426 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | 541 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) |
427 | { | 542 | { |
428 | uint32_t n, nargs = CCI_NARGS(ci); | 543 | uint32_t n, nargs = CCI_XNARGS(ci); |
429 | int32_t ofs = STACKARG_OFS; | 544 | int32_t ofs = STACKARG_OFS; |
430 | #if LJ_64 | 545 | #if LJ_64 |
431 | uint32_t gprs = REGARG_GPRS; | 546 | uint32_t gprs = REGARG_GPRS; |
@@ -485,8 +600,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
485 | if (r) { /* Argument is in a register. */ | 600 | if (r) { /* Argument is in a register. */ |
486 | if (r < RID_MAX_GPR && ref < ASMREF_TMP1) { | 601 | if (r < RID_MAX_GPR && ref < ASMREF_TMP1) { |
487 | #if LJ_64 | 602 | #if LJ_64 |
488 | if (ir->o == IR_KINT64) | 603 | if (LJ_GC64 ? !(ir->o == IR_KINT || ir->o == IR_KNULL) : ir->o == IR_KINT64) |
489 | emit_loadu64(as, r, ir_kint64(ir)->u64); | 604 | emit_loadu64(as, r, ir_k64(ir)->u64); |
490 | else | 605 | else |
491 | #endif | 606 | #endif |
492 | emit_loadi(as, r, ir->i); | 607 | emit_loadi(as, r, ir->i); |
@@ -560,7 +675,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
560 | if (ra_hasreg(dest)) { | 675 | if (ra_hasreg(dest)) { |
561 | ra_free(as, dest); | 676 | ra_free(as, dest); |
562 | ra_modified(as, dest); | 677 | ra_modified(as, dest); |
563 | emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, | 678 | emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, |
564 | dest, RID_ESP, ofs); | 679 | dest, RID_ESP, ofs); |
565 | } | 680 | } |
566 | if ((ci->flags & CCI_CASTU64)) { | 681 | if ((ci->flags & CCI_CASTU64)) { |
@@ -584,15 +699,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
584 | } | 699 | } |
585 | } | 700 | } |
586 | 701 | ||
587 | static void asm_call(ASMState *as, IRIns *ir) | ||
588 | { | ||
589 | IRRef args[CCI_NARGS_MAX]; | ||
590 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | ||
591 | asm_collectargs(as, ir, ci, args); | ||
592 | asm_setupresult(as, ir, ci); | ||
593 | asm_gencall(as, ci, args); | ||
594 | } | ||
595 | |||
596 | /* Return a constant function pointer or NULL for indirect calls. */ | 702 | /* Return a constant function pointer or NULL for indirect calls. */ |
597 | static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func) | 703 | static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func) |
598 | { | 704 | { |
@@ -651,15 +757,23 @@ static void asm_callx(ASMState *as, IRIns *ir) | |||
651 | static void asm_retf(ASMState *as, IRIns *ir) | 757 | static void asm_retf(ASMState *as, IRIns *ir) |
652 | { | 758 | { |
653 | Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); | 759 | Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); |
760 | #if LJ_FR2 | ||
761 | Reg rpc = ra_scratch(as, rset_exclude(RSET_GPR, base)); | ||
762 | #endif | ||
654 | void *pc = ir_kptr(IR(ir->op2)); | 763 | void *pc = ir_kptr(IR(ir->op2)); |
655 | int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); | 764 | int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); |
656 | as->topslot -= (BCReg)delta; | 765 | as->topslot -= (BCReg)delta; |
657 | if ((int32_t)as->topslot < 0) as->topslot = 0; | 766 | if ((int32_t)as->topslot < 0) as->topslot = 0; |
658 | irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ | 767 | irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ |
659 | emit_setgl(as, base, jit_base); | 768 | emit_setgl(as, base, jit_base); |
660 | emit_addptr(as, base, -8*delta); | 769 | emit_addptr(as, base, -8*delta); |
661 | asm_guardcc(as, CC_NE); | 770 | asm_guardcc(as, CC_NE); |
771 | #if LJ_FR2 | ||
772 | emit_rmro(as, XO_CMP, rpc|REX_GC64, base, -8); | ||
773 | emit_loadu64(as, rpc, u64ptr(pc)); | ||
774 | #else | ||
662 | emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc)); | 775 | emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc)); |
776 | #endif | ||
663 | } | 777 | } |
664 | 778 | ||
665 | /* -- Type conversions ---------------------------------------------------- */ | 779 | /* -- Type conversions ---------------------------------------------------- */ |
@@ -672,8 +786,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left) | |||
672 | asm_guardcc(as, CC_NE); | 786 | asm_guardcc(as, CC_NE); |
673 | emit_rr(as, XO_UCOMISD, left, tmp); | 787 | emit_rr(as, XO_UCOMISD, left, tmp); |
674 | emit_rr(as, XO_CVTSI2SD, tmp, dest); | 788 | emit_rr(as, XO_CVTSI2SD, tmp, dest); |
675 | if (!(as->flags & JIT_F_SPLIT_XMM)) | 789 | emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */ |
676 | emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */ | ||
677 | emit_rr(as, XO_CVTTSD2SI, dest, left); | 790 | emit_rr(as, XO_CVTTSD2SI, dest, left); |
678 | /* Can't fuse since left is needed twice. */ | 791 | /* Can't fuse since left is needed twice. */ |
679 | } | 792 | } |
@@ -684,8 +797,9 @@ static void asm_tobit(ASMState *as, IRIns *ir) | |||
684 | Reg tmp = ra_noreg(IR(ir->op1)->r) ? | 797 | Reg tmp = ra_noreg(IR(ir->op1)->r) ? |
685 | ra_alloc1(as, ir->op1, RSET_FPR) : | 798 | ra_alloc1(as, ir->op1, RSET_FPR) : |
686 | ra_scratch(as, RSET_FPR); | 799 | ra_scratch(as, RSET_FPR); |
687 | Reg right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp)); | 800 | Reg right; |
688 | emit_rr(as, XO_MOVDto, tmp, dest); | 801 | emit_rr(as, XO_MOVDto, tmp, dest); |
802 | right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp)); | ||
689 | emit_mrm(as, XO_ADDSD, tmp, right); | 803 | emit_mrm(as, XO_ADDSD, tmp, right); |
690 | ra_left(as, tmp, ir->op1); | 804 | ra_left(as, tmp, ir->op1); |
691 | } | 805 | } |
@@ -706,13 +820,13 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
706 | if (left == dest) return; /* Avoid the XO_XORPS. */ | 820 | if (left == dest) return; /* Avoid the XO_XORPS. */ |
707 | } else if (LJ_32 && st == IRT_U32) { /* U32 to FP conversion on x86. */ | 821 | } else if (LJ_32 && st == IRT_U32) { /* U32 to FP conversion on x86. */ |
708 | /* number = (2^52+2^51 .. u32) - (2^52+2^51) */ | 822 | /* number = (2^52+2^51 .. u32) - (2^52+2^51) */ |
709 | cTValue *k = lj_ir_k64_find(as->J, U64x(43380000,00000000)); | 823 | cTValue *k = &as->J->k64[LJ_K64_TOBIT]; |
710 | Reg bias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); | 824 | Reg bias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); |
711 | if (irt_isfloat(ir->t)) | 825 | if (irt_isfloat(ir->t)) |
712 | emit_rr(as, XO_CVTSD2SS, dest, dest); | 826 | emit_rr(as, XO_CVTSD2SS, dest, dest); |
713 | emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */ | 827 | emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */ |
714 | emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */ | 828 | emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */ |
715 | emit_loadn(as, bias, k); | 829 | emit_rma(as, XO_MOVSD, bias, k); |
716 | emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR)); | 830 | emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR)); |
717 | return; | 831 | return; |
718 | } else { /* Integer to FP conversion. */ | 832 | } else { /* Integer to FP conversion. */ |
@@ -721,7 +835,7 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
721 | asm_fuseloadm(as, lref, RSET_GPR, st64); | 835 | asm_fuseloadm(as, lref, RSET_GPR, st64); |
722 | if (LJ_64 && st == IRT_U64) { | 836 | if (LJ_64 && st == IRT_U64) { |
723 | MCLabel l_end = emit_label(as); | 837 | MCLabel l_end = emit_label(as); |
724 | const void *k = lj_ir_k64_find(as->J, U64x(43f00000,00000000)); | 838 | cTValue *k = &as->J->k64[LJ_K64_2P64]; |
725 | emit_rma(as, XO_ADDSD, dest, k); /* Add 2^64 to compensate. */ | 839 | emit_rma(as, XO_ADDSD, dest, k); /* Add 2^64 to compensate. */ |
726 | emit_sjcc(as, CC_NS, l_end); | 840 | emit_sjcc(as, CC_NS, l_end); |
727 | emit_rr(as, XO_TEST, left|REX_64, left); /* Check if u64 >= 2^63. */ | 841 | emit_rr(as, XO_TEST, left|REX_64, left); /* Check if u64 >= 2^63. */ |
@@ -729,8 +843,7 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
729 | emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS, | 843 | emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS, |
730 | dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left); | 844 | dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left); |
731 | } | 845 | } |
732 | if (!(as->flags & JIT_F_SPLIT_XMM)) | 846 | emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */ |
733 | emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */ | ||
734 | } else if (stfp) { /* FP to integer conversion. */ | 847 | } else if (stfp) { /* FP to integer conversion. */ |
735 | if (irt_isguard(ir->t)) { | 848 | if (irt_isguard(ir->t)) { |
736 | /* Checked conversions are only supported from number to int. */ | 849 | /* Checked conversions are only supported from number to int. */ |
@@ -738,9 +851,7 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
738 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); | 851 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); |
739 | } else { | 852 | } else { |
740 | Reg dest = ra_dest(as, ir, RSET_GPR); | 853 | Reg dest = ra_dest(as, ir, RSET_GPR); |
741 | x86Op op = st == IRT_NUM ? | 854 | x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI; |
742 | ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) : | ||
743 | ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI); | ||
744 | if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) { | 855 | if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) { |
745 | /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */ | 856 | /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */ |
746 | /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */ | 857 | /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */ |
@@ -751,23 +862,20 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
751 | emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000); | 862 | emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000); |
752 | emit_rr(as, op, dest|REX_64, tmp); | 863 | emit_rr(as, op, dest|REX_64, tmp); |
753 | if (st == IRT_NUM) | 864 | if (st == IRT_NUM) |
754 | emit_rma(as, XO_ADDSD, tmp, lj_ir_k64_find(as->J, | 865 | emit_rma(as, XO_ADDSD, tmp, &as->J->k64[LJ_K64_M2P64_31]); |
755 | LJ_64 ? U64x(c3f00000,00000000) : U64x(c1e00000,00000000))); | ||
756 | else | 866 | else |
757 | emit_rma(as, XO_ADDSS, tmp, lj_ir_k64_find(as->J, | 867 | emit_rma(as, XO_ADDSS, tmp, &as->J->k32[LJ_K32_M2P64_31]); |
758 | LJ_64 ? U64x(00000000,df800000) : U64x(00000000,cf000000))); | ||
759 | emit_sjcc(as, CC_NS, l_end); | 868 | emit_sjcc(as, CC_NS, l_end); |
760 | emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest negative. */ | 869 | emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest negative. */ |
761 | emit_rr(as, op, dest|REX_64, tmp); | 870 | emit_rr(as, op, dest|REX_64, tmp); |
762 | ra_left(as, tmp, lref); | 871 | ra_left(as, tmp, lref); |
763 | } else { | 872 | } else { |
764 | Reg left = asm_fuseload(as, lref, RSET_FPR); | ||
765 | if (LJ_64 && irt_isu32(ir->t)) | 873 | if (LJ_64 && irt_isu32(ir->t)) |
766 | emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */ | 874 | emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */ |
767 | emit_mrm(as, op, | 875 | emit_mrm(as, op, |
768 | dest|((LJ_64 && | 876 | dest|((LJ_64 && |
769 | (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0), | 877 | (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0), |
770 | left); | 878 | asm_fuseload(as, lref, RSET_FPR)); |
771 | } | 879 | } |
772 | } | 880 | } |
773 | } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ | 881 | } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ |
@@ -834,16 +942,14 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir) | |||
834 | if (ra_hasreg(dest)) { | 942 | if (ra_hasreg(dest)) { |
835 | ra_free(as, dest); | 943 | ra_free(as, dest); |
836 | ra_modified(as, dest); | 944 | ra_modified(as, dest); |
837 | emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, | 945 | emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, dest, RID_ESP, ofs); |
838 | dest, RID_ESP, ofs); | ||
839 | } | 946 | } |
840 | emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, | 947 | emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, |
841 | irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); | 948 | irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); |
842 | if (((ir-1)->op2 & IRCONV_SRCMASK) == IRT_U64) { | 949 | if (((ir-1)->op2 & IRCONV_SRCMASK) == IRT_U64) { |
843 | /* For inputs in [2^63,2^64-1] add 2^64 to compensate. */ | 950 | /* For inputs in [2^63,2^64-1] add 2^64 to compensate. */ |
844 | MCLabel l_end = emit_label(as); | 951 | MCLabel l_end = emit_label(as); |
845 | emit_rma(as, XO_FADDq, XOg_FADDq, | 952 | emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_2P64]); |
846 | lj_ir_k64_find(as->J, U64x(43f00000,00000000))); | ||
847 | emit_sjcc(as, CC_NS, l_end); | 953 | emit_sjcc(as, CC_NS, l_end); |
848 | emit_rr(as, XO_TEST, hi, hi); /* Check if u64 >= 2^63. */ | 954 | emit_rr(as, XO_TEST, hi, hi); /* Check if u64 >= 2^63. */ |
849 | } else { | 955 | } else { |
@@ -863,7 +969,6 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir) | |||
863 | Reg lo, hi; | 969 | Reg lo, hi; |
864 | lua_assert(st == IRT_NUM || st == IRT_FLOAT); | 970 | lua_assert(st == IRT_NUM || st == IRT_FLOAT); |
865 | lua_assert(dt == IRT_I64 || dt == IRT_U64); | 971 | lua_assert(dt == IRT_I64 || dt == IRT_U64); |
866 | lua_assert(((ir-1)->op2 & IRCONV_TRUNC)); | ||
867 | hi = ra_dest(as, ir, RSET_GPR); | 972 | hi = ra_dest(as, ir, RSET_GPR); |
868 | lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi)); | 973 | lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi)); |
869 | if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0); | 974 | if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0); |
@@ -884,8 +989,7 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir) | |||
884 | emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); | 989 | emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); |
885 | else | 990 | else |
886 | emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0); | 991 | emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0); |
887 | emit_rma(as, XO_FADDq, XOg_FADDq, | 992 | emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_M2P64]); |
888 | lj_ir_k64_find(as->J, U64x(c3f00000,00000000))); | ||
889 | emit_sjcc(as, CC_NS, l_pop); | 993 | emit_sjcc(as, CC_NS, l_pop); |
890 | emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */ | 994 | emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */ |
891 | } | 995 | } |
@@ -906,6 +1010,14 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir) | |||
906 | st == IRT_NUM ? XOg_FLDq: XOg_FLDd, | 1010 | st == IRT_NUM ? XOg_FLDq: XOg_FLDd, |
907 | asm_fuseload(as, ir->op1, RSET_EMPTY)); | 1011 | asm_fuseload(as, ir->op1, RSET_EMPTY)); |
908 | } | 1012 | } |
1013 | |||
1014 | static void asm_conv64(ASMState *as, IRIns *ir) | ||
1015 | { | ||
1016 | if (irt_isfp(ir->t)) | ||
1017 | asm_conv_fp_int64(as, ir); | ||
1018 | else | ||
1019 | asm_conv_int64_fp(as, ir); | ||
1020 | } | ||
909 | #endif | 1021 | #endif |
910 | 1022 | ||
911 | static void asm_strto(ASMState *as, IRIns *ir) | 1023 | static void asm_strto(ASMState *as, IRIns *ir) |
@@ -927,54 +1039,60 @@ static void asm_strto(ASMState *as, IRIns *ir) | |||
927 | RID_ESP, sps_scale(ir->s)); | 1039 | RID_ESP, sps_scale(ir->s)); |
928 | } | 1040 | } |
929 | 1041 | ||
930 | static void asm_tostr(ASMState *as, IRIns *ir) | 1042 | /* -- Memory references --------------------------------------------------- */ |
1043 | |||
1044 | /* Get pointer to TValue. */ | ||
1045 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) | ||
931 | { | 1046 | { |
932 | IRIns *irl = IR(ir->op1); | 1047 | IRIns *ir = IR(ref); |
933 | IRRef args[2]; | 1048 | if (irt_isnum(ir->t)) { |
934 | args[0] = ASMREF_L; | 1049 | /* For numbers use the constant itself or a spill slot as a TValue. */ |
935 | as->gcsteps++; | 1050 | if (irref_isk(ref)) |
936 | if (irt_isnum(irl->t)) { | 1051 | emit_loada(as, dest, ir_knum(ir)); |
937 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; | 1052 | else |
938 | args[1] = ASMREF_TMP1; /* const lua_Number * */ | 1053 | emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir)); |
939 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
940 | asm_gencall(as, ci, args); | ||
941 | emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64, | ||
942 | RID_ESP, ra_spill(as, irl)); | ||
943 | } else { | 1054 | } else { |
944 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; | 1055 | /* Otherwise use g->tmptv to hold the TValue. */ |
945 | args[1] = ir->op1; /* int32_t k */ | 1056 | #if LJ_GC64 |
946 | asm_setupresult(as, ir, ci); /* GCstr * */ | 1057 | if (irref_isk(ref)) { |
947 | asm_gencall(as, ci, args); | 1058 | TValue k; |
1059 | lj_ir_kvalue(as->J->L, &k, ir); | ||
1060 | emit_movmroi(as, dest, 4, k.u32.hi); | ||
1061 | emit_movmroi(as, dest, 0, k.u32.lo); | ||
1062 | } else { | ||
1063 | /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */ | ||
1064 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest)); | ||
1065 | if (irt_is64(ir->t)) { | ||
1066 | emit_u32(as, irt_toitype(ir->t) << 15); | ||
1067 | emit_rmro(as, XO_ARITHi, XOg_OR, dest, 4); | ||
1068 | } else { | ||
1069 | /* Currently, no caller passes integers that might end up here. */ | ||
1070 | emit_movmroi(as, dest, 4, (irt_toitype(ir->t) << 15)); | ||
1071 | } | ||
1072 | emit_movtomro(as, REX_64IR(ir, src), dest, 0); | ||
1073 | } | ||
1074 | #else | ||
1075 | if (!irref_isk(ref)) { | ||
1076 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest)); | ||
1077 | emit_movtomro(as, REX_64IR(ir, src), dest, 0); | ||
1078 | } else if (!irt_ispri(ir->t)) { | ||
1079 | emit_movmroi(as, dest, 0, ir->i); | ||
1080 | } | ||
1081 | if (!(LJ_64 && irt_islightud(ir->t))) | ||
1082 | emit_movmroi(as, dest, 4, irt_toitype(ir->t)); | ||
1083 | #endif | ||
1084 | emit_loada(as, dest, &J2G(as->J)->tmptv); | ||
948 | } | 1085 | } |
949 | } | 1086 | } |
950 | 1087 | ||
951 | /* -- Memory references --------------------------------------------------- */ | ||
952 | |||
953 | static void asm_aref(ASMState *as, IRIns *ir) | 1088 | static void asm_aref(ASMState *as, IRIns *ir) |
954 | { | 1089 | { |
955 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1090 | Reg dest = ra_dest(as, ir, RSET_GPR); |
956 | asm_fusearef(as, ir, RSET_GPR); | 1091 | asm_fusearef(as, ir, RSET_GPR); |
957 | if (!(as->mrm.idx == RID_NONE && as->mrm.ofs == 0)) | 1092 | if (!(as->mrm.idx == RID_NONE && as->mrm.ofs == 0)) |
958 | emit_mrm(as, XO_LEA, dest, RID_MRM); | 1093 | emit_mrm(as, XO_LEA, dest|REX_GC64, RID_MRM); |
959 | else if (as->mrm.base != dest) | 1094 | else if (as->mrm.base != dest) |
960 | emit_rr(as, XO_MOV, dest, as->mrm.base); | 1095 | emit_rr(as, XO_MOV, dest|REX_GC64, as->mrm.base); |
961 | } | ||
962 | |||
963 | /* Merge NE(HREF, niltv) check. */ | ||
964 | static MCode *merge_href_niltv(ASMState *as, IRIns *ir) | ||
965 | { | ||
966 | /* Assumes nothing else generates NE of HREF. */ | ||
967 | if ((ir[1].o == IR_NE || ir[1].o == IR_EQ) && ir[1].op1 == as->curins && | ||
968 | ra_hasreg(ir->r)) { | ||
969 | MCode *p = as->mcp; | ||
970 | p += (LJ_64 && *p != XI_ARITHi) ? 7+6 : 6+6; | ||
971 | /* Ensure no loop branch inversion happened. */ | ||
972 | if (p[-6] == 0x0f && p[-5] == XI_JCCn+(CC_NE^(ir[1].o & 1))) { | ||
973 | as->mcp = p; /* Kill cmp reg, imm32 + jz exit. */ | ||
974 | return p + *(int32_t *)(p-4); /* Return exit address. */ | ||
975 | } | ||
976 | } | ||
977 | return NULL; | ||
978 | } | 1096 | } |
979 | 1097 | ||
980 | /* Inlined hash lookup. Specialized for key type and for const keys. | 1098 | /* Inlined hash lookup. Specialized for key type and for const keys. |
@@ -985,10 +1103,10 @@ static MCode *merge_href_niltv(ASMState *as, IRIns *ir) | |||
985 | ** } while ((n = nextnode(n))); | 1103 | ** } while ((n = nextnode(n))); |
986 | ** return niltv(L); | 1104 | ** return niltv(L); |
987 | */ | 1105 | */ |
988 | static void asm_href(ASMState *as, IRIns *ir) | 1106 | static void asm_href(ASMState *as, IRIns *ir, IROp merge) |
989 | { | 1107 | { |
990 | MCode *nilexit = merge_href_niltv(as, ir); /* Do this before any restores. */ | ||
991 | RegSet allow = RSET_GPR; | 1108 | RegSet allow = RSET_GPR; |
1109 | int destused = ra_used(ir); | ||
992 | Reg dest = ra_dest(as, ir, allow); | 1110 | Reg dest = ra_dest(as, ir, allow); |
993 | Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); | 1111 | Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); |
994 | Reg key = RID_NONE, tmp = RID_NONE; | 1112 | Reg key = RID_NONE, tmp = RID_NONE; |
@@ -1001,28 +1119,26 @@ static void asm_href(ASMState *as, IRIns *ir) | |||
1001 | if (!isk) { | 1119 | if (!isk) { |
1002 | rset_clear(allow, tab); | 1120 | rset_clear(allow, tab); |
1003 | key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); | 1121 | key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); |
1004 | if (!irt_isstr(kt)) | 1122 | if (LJ_GC64 || !irt_isstr(kt)) |
1005 | tmp = ra_scratch(as, rset_exclude(allow, key)); | 1123 | tmp = ra_scratch(as, rset_exclude(allow, key)); |
1006 | } | 1124 | } |
1007 | 1125 | ||
1008 | /* Key not found in chain: jump to exit (if merged with NE) or load niltv. */ | 1126 | /* Key not found in chain: jump to exit (if merged) or load niltv. */ |
1009 | l_end = emit_label(as); | 1127 | l_end = emit_label(as); |
1010 | if (nilexit && ir[1].o == IR_NE) { | 1128 | if (merge == IR_NE) |
1011 | emit_jcc(as, CC_E, nilexit); /* XI_JMP is not found by lj_asm_patchexit. */ | 1129 | asm_guardcc(as, CC_E); /* XI_JMP is not found by lj_asm_patchexit. */ |
1012 | nilexit = NULL; | 1130 | else if (destused) |
1013 | } else { | ||
1014 | emit_loada(as, dest, niltvg(J2G(as->J))); | 1131 | emit_loada(as, dest, niltvg(J2G(as->J))); |
1015 | } | ||
1016 | 1132 | ||
1017 | /* Follow hash chain until the end. */ | 1133 | /* Follow hash chain until the end. */ |
1018 | l_loop = emit_sjcc_label(as, CC_NZ); | 1134 | l_loop = emit_sjcc_label(as, CC_NZ); |
1019 | emit_rr(as, XO_TEST, dest, dest); | 1135 | emit_rr(as, XO_TEST, dest|REX_GC64, dest); |
1020 | emit_rmro(as, XO_MOV, dest, dest, offsetof(Node, next)); | 1136 | emit_rmro(as, XO_MOV, dest|REX_GC64, dest, offsetof(Node, next)); |
1021 | l_next = emit_label(as); | 1137 | l_next = emit_label(as); |
1022 | 1138 | ||
1023 | /* Type and value comparison. */ | 1139 | /* Type and value comparison. */ |
1024 | if (nilexit) | 1140 | if (merge == IR_EQ) |
1025 | emit_jcc(as, CC_E, nilexit); | 1141 | asm_guardcc(as, CC_E); |
1026 | else | 1142 | else |
1027 | emit_sjcc(as, CC_E, l_end); | 1143 | emit_sjcc(as, CC_E, l_end); |
1028 | if (irt_isnum(kt)) { | 1144 | if (irt_isnum(kt)) { |
@@ -1038,7 +1154,7 @@ static void asm_href(ASMState *as, IRIns *ir) | |||
1038 | emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n)); | 1154 | emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n)); |
1039 | emit_sjcc(as, CC_AE, l_next); | 1155 | emit_sjcc(as, CC_AE, l_next); |
1040 | /* The type check avoids NaN penalties and complaints from Valgrind. */ | 1156 | /* The type check avoids NaN penalties and complaints from Valgrind. */ |
1041 | #if LJ_64 | 1157 | #if LJ_64 && !LJ_GC64 |
1042 | emit_u32(as, LJ_TISNUM); | 1158 | emit_u32(as, LJ_TISNUM); |
1043 | emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it)); | 1159 | emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it)); |
1044 | #else | 1160 | #else |
@@ -1046,10 +1162,28 @@ static void asm_href(ASMState *as, IRIns *ir) | |||
1046 | emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); | 1162 | emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); |
1047 | #endif | 1163 | #endif |
1048 | } | 1164 | } |
1049 | #if LJ_64 | 1165 | #if LJ_64 && !LJ_GC64 |
1050 | } else if (irt_islightud(kt)) { | 1166 | } else if (irt_islightud(kt)) { |
1051 | emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64)); | 1167 | emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64)); |
1052 | #endif | 1168 | #endif |
1169 | #if LJ_GC64 | ||
1170 | } else if (irt_isaddr(kt)) { | ||
1171 | if (isk) { | ||
1172 | TValue k; | ||
1173 | k.u64 = ((uint64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; | ||
1174 | emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.lo), | ||
1175 | k.u32.lo); | ||
1176 | emit_sjcc(as, CC_NE, l_next); | ||
1177 | emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.hi), | ||
1178 | k.u32.hi); | ||
1179 | } else { | ||
1180 | emit_rmro(as, XO_CMP, tmp|REX_64, dest, offsetof(Node, key.u64)); | ||
1181 | } | ||
1182 | } else { | ||
1183 | lua_assert(irt_ispri(kt) && !irt_isnil(kt)); | ||
1184 | emit_u32(as, (irt_toitype(kt)<<15)|0x7fff); | ||
1185 | emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it)); | ||
1186 | #else | ||
1053 | } else { | 1187 | } else { |
1054 | if (!irt_ispri(kt)) { | 1188 | if (!irt_ispri(kt)) { |
1055 | lua_assert(irt_isaddr(kt)); | 1189 | lua_assert(irt_isaddr(kt)); |
@@ -1063,16 +1197,23 @@ static void asm_href(ASMState *as, IRIns *ir) | |||
1063 | lua_assert(!irt_isnil(kt)); | 1197 | lua_assert(!irt_isnil(kt)); |
1064 | emit_i8(as, irt_toitype(kt)); | 1198 | emit_i8(as, irt_toitype(kt)); |
1065 | emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); | 1199 | emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); |
1200 | #endif | ||
1066 | } | 1201 | } |
1067 | emit_sfixup(as, l_loop); | 1202 | emit_sfixup(as, l_loop); |
1068 | checkmclim(as); | 1203 | checkmclim(as); |
1204 | #if LJ_GC64 | ||
1205 | if (!isk && irt_isaddr(kt)) { | ||
1206 | emit_rr(as, XO_OR, tmp|REX_64, key); | ||
1207 | emit_loadu64(as, tmp, (uint64_t)irt_toitype(kt) << 47); | ||
1208 | } | ||
1209 | #endif | ||
1069 | 1210 | ||
1070 | /* Load main position relative to tab->node into dest. */ | 1211 | /* Load main position relative to tab->node into dest. */ |
1071 | khash = isk ? ir_khash(irkey) : 1; | 1212 | khash = isk ? ir_khash(irkey) : 1; |
1072 | if (khash == 0) { | 1213 | if (khash == 0) { |
1073 | emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, node)); | 1214 | emit_rmro(as, XO_MOV, dest|REX_GC64, tab, offsetof(GCtab, node)); |
1074 | } else { | 1215 | } else { |
1075 | emit_rmro(as, XO_ARITH(XOg_ADD), dest, tab, offsetof(GCtab, node)); | 1216 | emit_rmro(as, XO_ARITH(XOg_ADD), dest|REX_GC64, tab, offsetof(GCtab,node)); |
1076 | if ((as->flags & JIT_F_PREFER_IMUL)) { | 1217 | if ((as->flags & JIT_F_PREFER_IMUL)) { |
1077 | emit_i8(as, sizeof(Node)); | 1218 | emit_i8(as, sizeof(Node)); |
1078 | emit_rr(as, XO_IMULi8, dest, dest); | 1219 | emit_rr(as, XO_IMULi8, dest, dest); |
@@ -1107,7 +1248,19 @@ static void asm_href(ASMState *as, IRIns *ir) | |||
1107 | #endif | 1248 | #endif |
1108 | } else { | 1249 | } else { |
1109 | emit_rr(as, XO_MOV, tmp, key); | 1250 | emit_rr(as, XO_MOV, tmp, key); |
1251 | #if LJ_GC64 | ||
1252 | checkmclim(as); | ||
1253 | emit_gri(as, XG_ARITHi(XOg_XOR), dest, irt_toitype(kt) << 15); | ||
1254 | if ((as->flags & JIT_F_BMI2)) { | ||
1255 | emit_i8(as, 32); | ||
1256 | emit_mrm(as, XV_RORX|VEX_64, dest, key); | ||
1257 | } else { | ||
1258 | emit_shifti(as, XOg_SHR|REX_64, dest, 32); | ||
1259 | emit_rr(as, XO_MOV, dest|REX_64, key|REX_64); | ||
1260 | } | ||
1261 | #else | ||
1110 | emit_rmro(as, XO_LEA, dest, key, HASH_BIAS); | 1262 | emit_rmro(as, XO_LEA, dest, key, HASH_BIAS); |
1263 | #endif | ||
1111 | } | 1264 | } |
1112 | } | 1265 | } |
1113 | } | 1266 | } |
@@ -1127,11 +1280,11 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
1127 | if (ra_hasreg(dest)) { | 1280 | if (ra_hasreg(dest)) { |
1128 | if (ofs != 0) { | 1281 | if (ofs != 0) { |
1129 | if (dest == node && !(as->flags & JIT_F_LEA_AGU)) | 1282 | if (dest == node && !(as->flags & JIT_F_LEA_AGU)) |
1130 | emit_gri(as, XG_ARITHi(XOg_ADD), dest, ofs); | 1283 | emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, ofs); |
1131 | else | 1284 | else |
1132 | emit_rmro(as, XO_LEA, dest, node, ofs); | 1285 | emit_rmro(as, XO_LEA, dest|REX_GC64, node, ofs); |
1133 | } else if (dest != node) { | 1286 | } else if (dest != node) { |
1134 | emit_rr(as, XO_MOV, dest, node); | 1287 | emit_rr(as, XO_MOV, dest|REX_GC64, node); |
1135 | } | 1288 | } |
1136 | } | 1289 | } |
1137 | asm_guardcc(as, CC_NE); | 1290 | asm_guardcc(as, CC_NE); |
@@ -1143,13 +1296,24 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
1143 | lua_assert(irt_isnum(irkey->t) || irt_isgcv(irkey->t)); | 1296 | lua_assert(irt_isnum(irkey->t) || irt_isgcv(irkey->t)); |
1144 | /* Assumes -0.0 is already canonicalized to +0.0. */ | 1297 | /* Assumes -0.0 is already canonicalized to +0.0. */ |
1145 | emit_loadu64(as, key, irt_isnum(irkey->t) ? ir_knum(irkey)->u64 : | 1298 | emit_loadu64(as, key, irt_isnum(irkey->t) ? ir_knum(irkey)->u64 : |
1299 | #if LJ_GC64 | ||
1300 | ((uint64_t)irt_toitype(irkey->t) << 47) | | ||
1301 | (uint64_t)ir_kgc(irkey)); | ||
1302 | #else | ||
1146 | ((uint64_t)irt_toitype(irkey->t) << 32) | | 1303 | ((uint64_t)irt_toitype(irkey->t) << 32) | |
1147 | (uint64_t)(uint32_t)ptr2addr(ir_kgc(irkey))); | 1304 | (uint64_t)(uint32_t)ptr2addr(ir_kgc(irkey))); |
1305 | #endif | ||
1148 | } else { | 1306 | } else { |
1149 | lua_assert(!irt_isnil(irkey->t)); | 1307 | lua_assert(!irt_isnil(irkey->t)); |
1308 | #if LJ_GC64 | ||
1309 | emit_i32(as, (irt_toitype(irkey->t)<<15)|0x7fff); | ||
1310 | emit_rmro(as, XO_ARITHi, XOg_CMP, node, | ||
1311 | ofs + (int32_t)offsetof(Node, key.it)); | ||
1312 | #else | ||
1150 | emit_i8(as, irt_toitype(irkey->t)); | 1313 | emit_i8(as, irt_toitype(irkey->t)); |
1151 | emit_rmro(as, XO_ARITHi8, XOg_CMP, node, | 1314 | emit_rmro(as, XO_ARITHi8, XOg_CMP, node, |
1152 | ofs + (int32_t)offsetof(Node, key.it)); | 1315 | ofs + (int32_t)offsetof(Node, key.it)); |
1316 | #endif | ||
1153 | } | 1317 | } |
1154 | #else | 1318 | #else |
1155 | l_exit = emit_label(as); | 1319 | l_exit = emit_label(as); |
@@ -1178,61 +1342,27 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
1178 | #endif | 1342 | #endif |
1179 | } | 1343 | } |
1180 | 1344 | ||
1181 | static void asm_newref(ASMState *as, IRIns *ir) | ||
1182 | { | ||
1183 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; | ||
1184 | IRRef args[3]; | ||
1185 | IRIns *irkey; | ||
1186 | Reg tmp; | ||
1187 | if (ir->r == RID_SINK) | ||
1188 | return; | ||
1189 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1190 | args[1] = ir->op1; /* GCtab *t */ | ||
1191 | args[2] = ASMREF_TMP1; /* cTValue *key */ | ||
1192 | asm_setupresult(as, ir, ci); /* TValue * */ | ||
1193 | asm_gencall(as, ci, args); | ||
1194 | tmp = ra_releasetmp(as, ASMREF_TMP1); | ||
1195 | irkey = IR(ir->op2); | ||
1196 | if (irt_isnum(irkey->t)) { | ||
1197 | /* For numbers use the constant itself or a spill slot as a TValue. */ | ||
1198 | if (irref_isk(ir->op2)) | ||
1199 | emit_loada(as, tmp, ir_knum(irkey)); | ||
1200 | else | ||
1201 | emit_rmro(as, XO_LEA, tmp|REX_64, RID_ESP, ra_spill(as, irkey)); | ||
1202 | } else { | ||
1203 | /* Otherwise use g->tmptv to hold the TValue. */ | ||
1204 | if (!irref_isk(ir->op2)) { | ||
1205 | Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp)); | ||
1206 | emit_movtomro(as, REX_64IR(irkey, src), tmp, 0); | ||
1207 | } else if (!irt_ispri(irkey->t)) { | ||
1208 | emit_movmroi(as, tmp, 0, irkey->i); | ||
1209 | } | ||
1210 | if (!(LJ_64 && irt_islightud(irkey->t))) | ||
1211 | emit_movmroi(as, tmp, 4, irt_toitype(irkey->t)); | ||
1212 | emit_loada(as, tmp, &J2G(as->J)->tmptv); | ||
1213 | } | ||
1214 | } | ||
1215 | |||
1216 | static void asm_uref(ASMState *as, IRIns *ir) | 1345 | static void asm_uref(ASMState *as, IRIns *ir) |
1217 | { | 1346 | { |
1218 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1347 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1219 | if (irref_isk(ir->op1)) { | 1348 | if (irref_isk(ir->op1)) { |
1220 | GCfunc *fn = ir_kfunc(IR(ir->op1)); | 1349 | GCfunc *fn = ir_kfunc(IR(ir->op1)); |
1221 | MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; | 1350 | MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; |
1222 | emit_rma(as, XO_MOV, dest, v); | 1351 | emit_rma(as, XO_MOV, dest|REX_GC64, v); |
1223 | } else { | 1352 | } else { |
1224 | Reg uv = ra_scratch(as, RSET_GPR); | 1353 | Reg uv = ra_scratch(as, RSET_GPR); |
1225 | Reg func = ra_alloc1(as, ir->op1, RSET_GPR); | 1354 | Reg func = ra_alloc1(as, ir->op1, RSET_GPR); |
1226 | if (ir->o == IR_UREFC) { | 1355 | if (ir->o == IR_UREFC) { |
1227 | emit_rmro(as, XO_LEA, dest, uv, offsetof(GCupval, tv)); | 1356 | emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv)); |
1228 | asm_guardcc(as, CC_NE); | 1357 | asm_guardcc(as, CC_NE); |
1229 | emit_i8(as, 1); | 1358 | emit_i8(as, 1); |
1230 | emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed)); | 1359 | emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed)); |
1231 | } else { | 1360 | } else { |
1232 | emit_rmro(as, XO_MOV, dest, uv, offsetof(GCupval, v)); | 1361 | emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v)); |
1233 | } | 1362 | } |
1234 | emit_rmro(as, XO_MOV, uv, func, | 1363 | emit_rmro(as, XO_MOV, uv|REX_GC64, func, |
1235 | (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); | 1364 | (int32_t)offsetof(GCfuncL, uvptr) + |
1365 | (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); | ||
1236 | } | 1366 | } |
1237 | } | 1367 | } |
1238 | 1368 | ||
@@ -1250,9 +1380,9 @@ static void asm_strref(ASMState *as, IRIns *ir) | |||
1250 | if (as->mrm.base == RID_NONE) | 1380 | if (as->mrm.base == RID_NONE) |
1251 | emit_loadi(as, dest, as->mrm.ofs); | 1381 | emit_loadi(as, dest, as->mrm.ofs); |
1252 | else if (as->mrm.base == dest && as->mrm.idx == RID_NONE) | 1382 | else if (as->mrm.base == dest && as->mrm.idx == RID_NONE) |
1253 | emit_gri(as, XG_ARITHi(XOg_ADD), dest, as->mrm.ofs); | 1383 | emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, as->mrm.ofs); |
1254 | else | 1384 | else |
1255 | emit_mrm(as, XO_LEA, dest, RID_MRM); | 1385 | emit_mrm(as, XO_LEA, dest|REX_GC64, RID_MRM); |
1256 | } | 1386 | } |
1257 | 1387 | ||
1258 | /* -- Loads and stores ---------------------------------------------------- */ | 1388 | /* -- Loads and stores ---------------------------------------------------- */ |
@@ -1271,7 +1401,7 @@ static void asm_fxload(ASMState *as, IRIns *ir) | |||
1271 | case IRT_U8: xo = XO_MOVZXb; break; | 1401 | case IRT_U8: xo = XO_MOVZXb; break; |
1272 | case IRT_I16: xo = XO_MOVSXw; break; | 1402 | case IRT_I16: xo = XO_MOVSXw; break; |
1273 | case IRT_U16: xo = XO_MOVZXw; break; | 1403 | case IRT_U16: xo = XO_MOVZXw; break; |
1274 | case IRT_NUM: xo = XMM_MOVRM(as); break; | 1404 | case IRT_NUM: xo = XO_MOVSD; break; |
1275 | case IRT_FLOAT: xo = XO_MOVSS; break; | 1405 | case IRT_FLOAT: xo = XO_MOVSS; break; |
1276 | default: | 1406 | default: |
1277 | if (LJ_64 && irt_is64(ir->t)) | 1407 | if (LJ_64 && irt_is64(ir->t)) |
@@ -1284,6 +1414,9 @@ static void asm_fxload(ASMState *as, IRIns *ir) | |||
1284 | emit_mrm(as, xo, dest, RID_MRM); | 1414 | emit_mrm(as, xo, dest, RID_MRM); |
1285 | } | 1415 | } |
1286 | 1416 | ||
1417 | #define asm_fload(as, ir) asm_fxload(as, ir) | ||
1418 | #define asm_xload(as, ir) asm_fxload(as, ir) | ||
1419 | |||
1287 | static void asm_fxstore(ASMState *as, IRIns *ir) | 1420 | static void asm_fxstore(ASMState *as, IRIns *ir) |
1288 | { | 1421 | { |
1289 | RegSet allow = RSET_GPR; | 1422 | RegSet allow = RSET_GPR; |
@@ -1318,7 +1451,7 @@ static void asm_fxstore(ASMState *as, IRIns *ir) | |||
1318 | case IRT_I16: case IRT_U16: xo = XO_MOVtow; break; | 1451 | case IRT_I16: case IRT_U16: xo = XO_MOVtow; break; |
1319 | case IRT_NUM: xo = XO_MOVSDto; break; | 1452 | case IRT_NUM: xo = XO_MOVSDto; break; |
1320 | case IRT_FLOAT: xo = XO_MOVSSto; break; | 1453 | case IRT_FLOAT: xo = XO_MOVSSto; break; |
1321 | #if LJ_64 | 1454 | #if LJ_64 && !LJ_GC64 |
1322 | case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */ | 1455 | case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */ |
1323 | #endif | 1456 | #endif |
1324 | default: | 1457 | default: |
@@ -1347,7 +1480,10 @@ static void asm_fxstore(ASMState *as, IRIns *ir) | |||
1347 | } | 1480 | } |
1348 | } | 1481 | } |
1349 | 1482 | ||
1350 | #if LJ_64 | 1483 | #define asm_fstore(as, ir) asm_fxstore(as, ir) |
1484 | #define asm_xstore(as, ir) asm_fxstore(as, ir) | ||
1485 | |||
1486 | #if LJ_64 && !LJ_GC64 | ||
1351 | static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) | 1487 | static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) |
1352 | { | 1488 | { |
1353 | if (ra_used(ir) || typecheck) { | 1489 | if (ra_used(ir) || typecheck) { |
@@ -1369,9 +1505,12 @@ static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) | |||
1369 | 1505 | ||
1370 | static void asm_ahuvload(ASMState *as, IRIns *ir) | 1506 | static void asm_ahuvload(ASMState *as, IRIns *ir) |
1371 | { | 1507 | { |
1508 | #if LJ_GC64 | ||
1509 | Reg tmp = RID_NONE; | ||
1510 | #endif | ||
1372 | lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || | 1511 | lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || |
1373 | (LJ_DUALNUM && irt_isint(ir->t))); | 1512 | (LJ_DUALNUM && irt_isint(ir->t))); |
1374 | #if LJ_64 | 1513 | #if LJ_64 && !LJ_GC64 |
1375 | if (irt_islightud(ir->t)) { | 1514 | if (irt_islightud(ir->t)) { |
1376 | Reg dest = asm_load_lightud64(as, ir, 1); | 1515 | Reg dest = asm_load_lightud64(as, ir, 1); |
1377 | if (ra_hasreg(dest)) { | 1516 | if (ra_hasreg(dest)) { |
@@ -1385,20 +1524,64 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) | |||
1385 | RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; | 1524 | RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; |
1386 | Reg dest = ra_dest(as, ir, allow); | 1525 | Reg dest = ra_dest(as, ir, allow); |
1387 | asm_fuseahuref(as, ir->op1, RSET_GPR); | 1526 | asm_fuseahuref(as, ir->op1, RSET_GPR); |
1388 | emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM); | 1527 | #if LJ_GC64 |
1528 | if (irt_isaddr(ir->t)) { | ||
1529 | emit_shifti(as, XOg_SHR|REX_64, dest, 17); | ||
1530 | asm_guardcc(as, CC_NE); | ||
1531 | emit_i8(as, irt_toitype(ir->t)); | ||
1532 | emit_rr(as, XO_ARITHi8, XOg_CMP, dest); | ||
1533 | emit_i8(as, XI_O16); | ||
1534 | if ((as->flags & JIT_F_BMI2)) { | ||
1535 | emit_i8(as, 47); | ||
1536 | emit_mrm(as, XV_RORX|VEX_64, dest, RID_MRM); | ||
1537 | } else { | ||
1538 | emit_shifti(as, XOg_ROR|REX_64, dest, 47); | ||
1539 | emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM); | ||
1540 | } | ||
1541 | return; | ||
1542 | } else | ||
1543 | #endif | ||
1544 | emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM); | ||
1389 | } else { | 1545 | } else { |
1390 | asm_fuseahuref(as, ir->op1, RSET_GPR); | 1546 | RegSet gpr = RSET_GPR; |
1547 | #if LJ_GC64 | ||
1548 | if (irt_isaddr(ir->t)) { | ||
1549 | tmp = ra_scratch(as, RSET_GPR); | ||
1550 | gpr = rset_exclude(gpr, tmp); | ||
1551 | } | ||
1552 | #endif | ||
1553 | asm_fuseahuref(as, ir->op1, gpr); | ||
1391 | } | 1554 | } |
1392 | /* Always do the type check, even if the load result is unused. */ | 1555 | /* Always do the type check, even if the load result is unused. */ |
1393 | as->mrm.ofs += 4; | 1556 | as->mrm.ofs += 4; |
1394 | asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE); | 1557 | asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE); |
1395 | if (LJ_64 && irt_type(ir->t) >= IRT_NUM) { | 1558 | if (LJ_64 && irt_type(ir->t) >= IRT_NUM) { |
1396 | lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); | 1559 | lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); |
1560 | #if LJ_GC64 | ||
1561 | emit_u32(as, LJ_TISNUM << 15); | ||
1562 | #else | ||
1397 | emit_u32(as, LJ_TISNUM); | 1563 | emit_u32(as, LJ_TISNUM); |
1564 | #endif | ||
1398 | emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); | 1565 | emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); |
1566 | #if LJ_GC64 | ||
1567 | } else if (irt_isaddr(ir->t)) { | ||
1568 | as->mrm.ofs -= 4; | ||
1569 | emit_i8(as, irt_toitype(ir->t)); | ||
1570 | emit_mrm(as, XO_ARITHi8, XOg_CMP, tmp); | ||
1571 | emit_shifti(as, XOg_SAR|REX_64, tmp, 47); | ||
1572 | emit_mrm(as, XO_MOV, tmp|REX_64, RID_MRM); | ||
1573 | } else if (irt_isnil(ir->t)) { | ||
1574 | as->mrm.ofs -= 4; | ||
1575 | emit_i8(as, -1); | ||
1576 | emit_mrm(as, XO_ARITHi8, XOg_CMP|REX_64, RID_MRM); | ||
1577 | } else { | ||
1578 | emit_u32(as, (irt_toitype(ir->t) << 15) | 0x7fff); | ||
1579 | emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); | ||
1580 | #else | ||
1399 | } else { | 1581 | } else { |
1400 | emit_i8(as, irt_toitype(ir->t)); | 1582 | emit_i8(as, irt_toitype(ir->t)); |
1401 | emit_mrm(as, XO_ARITHi8, XOg_CMP, RID_MRM); | 1583 | emit_mrm(as, XO_ARITHi8, XOg_CMP, RID_MRM); |
1584 | #endif | ||
1402 | } | 1585 | } |
1403 | } | 1586 | } |
1404 | 1587 | ||
@@ -1410,12 +1593,28 @@ static void asm_ahustore(ASMState *as, IRIns *ir) | |||
1410 | Reg src = ra_alloc1(as, ir->op2, RSET_FPR); | 1593 | Reg src = ra_alloc1(as, ir->op2, RSET_FPR); |
1411 | asm_fuseahuref(as, ir->op1, RSET_GPR); | 1594 | asm_fuseahuref(as, ir->op1, RSET_GPR); |
1412 | emit_mrm(as, XO_MOVSDto, src, RID_MRM); | 1595 | emit_mrm(as, XO_MOVSDto, src, RID_MRM); |
1413 | #if LJ_64 | 1596 | #if LJ_64 && !LJ_GC64 |
1414 | } else if (irt_islightud(ir->t)) { | 1597 | } else if (irt_islightud(ir->t)) { |
1415 | Reg src = ra_alloc1(as, ir->op2, RSET_GPR); | 1598 | Reg src = ra_alloc1(as, ir->op2, RSET_GPR); |
1416 | asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src)); | 1599 | asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src)); |
1417 | emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM); | 1600 | emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM); |
1418 | #endif | 1601 | #endif |
1602 | #if LJ_GC64 | ||
1603 | } else if (irref_isk(ir->op2)) { | ||
1604 | TValue k; | ||
1605 | lj_ir_kvalue(as->J->L, &k, IR(ir->op2)); | ||
1606 | asm_fuseahuref(as, ir->op1, RSET_GPR); | ||
1607 | if (tvisnil(&k)) { | ||
1608 | emit_i32(as, -1); | ||
1609 | emit_mrm(as, XO_MOVmi, REX_64, RID_MRM); | ||
1610 | } else { | ||
1611 | emit_u32(as, k.u32.lo); | ||
1612 | emit_mrm(as, XO_MOVmi, 0, RID_MRM); | ||
1613 | as->mrm.ofs += 4; | ||
1614 | emit_u32(as, k.u32.hi); | ||
1615 | emit_mrm(as, XO_MOVmi, 0, RID_MRM); | ||
1616 | } | ||
1617 | #endif | ||
1419 | } else { | 1618 | } else { |
1420 | IRIns *irr = IR(ir->op2); | 1619 | IRIns *irr = IR(ir->op2); |
1421 | RegSet allow = RSET_GPR; | 1620 | RegSet allow = RSET_GPR; |
@@ -1426,6 +1625,17 @@ static void asm_ahustore(ASMState *as, IRIns *ir) | |||
1426 | } | 1625 | } |
1427 | asm_fuseahuref(as, ir->op1, allow); | 1626 | asm_fuseahuref(as, ir->op1, allow); |
1428 | if (ra_hasreg(src)) { | 1627 | if (ra_hasreg(src)) { |
1628 | #if LJ_GC64 | ||
1629 | if (!(LJ_DUALNUM && irt_isinteger(ir->t))) { | ||
1630 | /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */ | ||
1631 | as->mrm.ofs += 4; | ||
1632 | emit_u32(as, irt_toitype(ir->t) << 15); | ||
1633 | emit_mrm(as, XO_ARITHi, XOg_OR, RID_MRM); | ||
1634 | as->mrm.ofs -= 4; | ||
1635 | emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM); | ||
1636 | return; | ||
1637 | } | ||
1638 | #endif | ||
1429 | emit_mrm(as, XO_MOVto, src, RID_MRM); | 1639 | emit_mrm(as, XO_MOVto, src, RID_MRM); |
1430 | } else if (!irt_ispri(irr->t)) { | 1640 | } else if (!irt_ispri(irr->t)) { |
1431 | lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t))); | 1641 | lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t))); |
@@ -1433,14 +1643,20 @@ static void asm_ahustore(ASMState *as, IRIns *ir) | |||
1433 | emit_mrm(as, XO_MOVmi, 0, RID_MRM); | 1643 | emit_mrm(as, XO_MOVmi, 0, RID_MRM); |
1434 | } | 1644 | } |
1435 | as->mrm.ofs += 4; | 1645 | as->mrm.ofs += 4; |
1646 | #if LJ_GC64 | ||
1647 | lua_assert(LJ_DUALNUM && irt_isinteger(ir->t)); | ||
1648 | emit_i32(as, LJ_TNUMX << 15); | ||
1649 | #else | ||
1436 | emit_i32(as, (int32_t)irt_toitype(ir->t)); | 1650 | emit_i32(as, (int32_t)irt_toitype(ir->t)); |
1651 | #endif | ||
1437 | emit_mrm(as, XO_MOVmi, 0, RID_MRM); | 1652 | emit_mrm(as, XO_MOVmi, 0, RID_MRM); |
1438 | } | 1653 | } |
1439 | } | 1654 | } |
1440 | 1655 | ||
1441 | static void asm_sload(ASMState *as, IRIns *ir) | 1656 | static void asm_sload(ASMState *as, IRIns *ir) |
1442 | { | 1657 | { |
1443 | int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); | 1658 | int32_t ofs = 8*((int32_t)ir->op1-1-LJ_FR2) + |
1659 | (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0); | ||
1444 | IRType1 t = ir->t; | 1660 | IRType1 t = ir->t; |
1445 | Reg base; | 1661 | Reg base; |
1446 | lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ | 1662 | lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ |
@@ -1451,9 +1667,9 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1451 | Reg left = ra_scratch(as, RSET_FPR); | 1667 | Reg left = ra_scratch(as, RSET_FPR); |
1452 | asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ | 1668 | asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ |
1453 | base = ra_alloc1(as, REF_BASE, RSET_GPR); | 1669 | base = ra_alloc1(as, REF_BASE, RSET_GPR); |
1454 | emit_rmro(as, XMM_MOVRM(as), left, base, ofs); | 1670 | emit_rmro(as, XO_MOVSD, left, base, ofs); |
1455 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ | 1671 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ |
1456 | #if LJ_64 | 1672 | #if LJ_64 && !LJ_GC64 |
1457 | } else if (irt_islightud(t)) { | 1673 | } else if (irt_islightud(t)) { |
1458 | Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK)); | 1674 | Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK)); |
1459 | if (ra_hasreg(dest)) { | 1675 | if (ra_hasreg(dest)) { |
@@ -1469,11 +1685,39 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1469 | lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); | 1685 | lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); |
1470 | if ((ir->op2 & IRSLOAD_CONVERT)) { | 1686 | if ((ir->op2 & IRSLOAD_CONVERT)) { |
1471 | t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ | 1687 | t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ |
1472 | emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs); | 1688 | emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs); |
1473 | } else if (irt_isnum(t)) { | ||
1474 | emit_rmro(as, XMM_MOVRM(as), dest, base, ofs); | ||
1475 | } else { | 1689 | } else { |
1476 | emit_rmro(as, XO_MOV, dest, base, ofs); | 1690 | #if LJ_GC64 |
1691 | if (irt_isaddr(t)) { | ||
1692 | /* LJ_GC64 type check + tag removal without BMI2 and with BMI2: | ||
1693 | ** | ||
1694 | ** mov r64, [addr] rorx r64, [addr], 47 | ||
1695 | ** ror r64, 47 | ||
1696 | ** cmp r16, itype cmp r16, itype | ||
1697 | ** jne ->exit jne ->exit | ||
1698 | ** shr r64, 16 shr r64, 16 | ||
1699 | */ | ||
1700 | emit_shifti(as, XOg_SHR|REX_64, dest, 17); | ||
1701 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { | ||
1702 | asm_guardcc(as, CC_NE); | ||
1703 | emit_i8(as, irt_toitype(t)); | ||
1704 | emit_rr(as, XO_ARITHi8, XOg_CMP, dest); | ||
1705 | emit_i8(as, XI_O16); | ||
1706 | } | ||
1707 | if ((as->flags & JIT_F_BMI2)) { | ||
1708 | emit_i8(as, 47); | ||
1709 | emit_rmro(as, XV_RORX|VEX_64, dest, base, ofs); | ||
1710 | } else { | ||
1711 | if ((ir->op2 & IRSLOAD_TYPECHECK)) | ||
1712 | emit_shifti(as, XOg_ROR|REX_64, dest, 47); | ||
1713 | else | ||
1714 | emit_shifti(as, XOg_SHL|REX_64, dest, 17); | ||
1715 | emit_rmro(as, XO_MOV, dest|REX_64, base, ofs); | ||
1716 | } | ||
1717 | return; | ||
1718 | } else | ||
1719 | #endif | ||
1720 | emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs); | ||
1477 | } | 1721 | } |
1478 | } else { | 1722 | } else { |
1479 | if (!(ir->op2 & IRSLOAD_TYPECHECK)) | 1723 | if (!(ir->op2 & IRSLOAD_TYPECHECK)) |
@@ -1485,11 +1729,42 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1485 | asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE); | 1729 | asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE); |
1486 | if (LJ_64 && irt_type(t) >= IRT_NUM) { | 1730 | if (LJ_64 && irt_type(t) >= IRT_NUM) { |
1487 | lua_assert(irt_isinteger(t) || irt_isnum(t)); | 1731 | lua_assert(irt_isinteger(t) || irt_isnum(t)); |
1732 | #if LJ_GC64 | ||
1733 | emit_u32(as, LJ_TISNUM << 15); | ||
1734 | #else | ||
1488 | emit_u32(as, LJ_TISNUM); | 1735 | emit_u32(as, LJ_TISNUM); |
1736 | #endif | ||
1737 | emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4); | ||
1738 | #if LJ_GC64 | ||
1739 | } else if (irt_isnil(t)) { | ||
1740 | /* LJ_GC64 type check for nil: | ||
1741 | ** | ||
1742 | ** cmp qword [addr], -1 | ||
1743 | ** jne ->exit | ||
1744 | */ | ||
1745 | emit_i8(as, -1); | ||
1746 | emit_rmro(as, XO_ARITHi8, XOg_CMP|REX_64, base, ofs); | ||
1747 | } else if (irt_ispri(t)) { | ||
1748 | emit_u32(as, (irt_toitype(t) << 15) | 0x7fff); | ||
1489 | emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4); | 1749 | emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4); |
1490 | } else { | 1750 | } else { |
1751 | /* LJ_GC64 type check only: | ||
1752 | ** | ||
1753 | ** mov r64, [addr] | ||
1754 | ** sar r64, 47 | ||
1755 | ** cmp r32, itype | ||
1756 | ** jne ->exit | ||
1757 | */ | ||
1758 | Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, base)); | ||
1759 | emit_i8(as, irt_toitype(t)); | ||
1760 | emit_rr(as, XO_ARITHi8, XOg_CMP, tmp); | ||
1761 | emit_shifti(as, XOg_SAR|REX_64, tmp, 47); | ||
1762 | emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs); | ||
1763 | #else | ||
1764 | } else { | ||
1491 | emit_i8(as, irt_toitype(t)); | 1765 | emit_i8(as, irt_toitype(t)); |
1492 | emit_rmro(as, XO_ARITHi8, XOg_CMP, base, ofs+4); | 1766 | emit_rmro(as, XO_ARITHi8, XOg_CMP, base, ofs+4); |
1767 | #endif | ||
1493 | } | 1768 | } |
1494 | } | 1769 | } |
1495 | } | 1770 | } |
@@ -1500,15 +1775,13 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1500 | static void asm_cnew(ASMState *as, IRIns *ir) | 1775 | static void asm_cnew(ASMState *as, IRIns *ir) |
1501 | { | 1776 | { |
1502 | CTState *cts = ctype_ctsG(J2G(as->J)); | 1777 | CTState *cts = ctype_ctsG(J2G(as->J)); |
1503 | CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; | 1778 | CTypeID id = (CTypeID)IR(ir->op1)->i; |
1504 | CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? | 1779 | CTSize sz; |
1505 | lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; | 1780 | CTInfo info = lj_ctype_info(cts, id, &sz); |
1506 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; | 1781 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; |
1507 | IRRef args[2]; | 1782 | IRRef args[4]; |
1508 | lua_assert(sz != CTSIZE_INVALID); | 1783 | lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); |
1509 | 1784 | ||
1510 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1511 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1512 | as->gcsteps++; | 1785 | as->gcsteps++; |
1513 | asm_setupresult(as, ir, ci); /* GCcdata * */ | 1786 | asm_setupresult(as, ir, ci); /* GCcdata * */ |
1514 | 1787 | ||
@@ -1519,8 +1792,9 @@ static void asm_cnew(ASMState *as, IRIns *ir) | |||
1519 | Reg r64 = sz == 8 ? REX_64 : 0; | 1792 | Reg r64 = sz == 8 ? REX_64 : 0; |
1520 | if (irref_isk(ir->op2)) { | 1793 | if (irref_isk(ir->op2)) { |
1521 | IRIns *irk = IR(ir->op2); | 1794 | IRIns *irk = IR(ir->op2); |
1522 | uint64_t k = irk->o == IR_KINT64 ? ir_k64(irk)->u64 : | 1795 | uint64_t k = (irk->o == IR_KINT64 || |
1523 | (uint64_t)(uint32_t)irk->i; | 1796 | (LJ_GC64 && (irk->o == IR_KPTR || irk->o == IR_KKPTR))) ? |
1797 | ir_k64(irk)->u64 : (uint64_t)(uint32_t)irk->i; | ||
1524 | if (sz == 4 || checki32((int64_t)k)) { | 1798 | if (sz == 4 || checki32((int64_t)k)) { |
1525 | emit_i32(as, (int32_t)k); | 1799 | emit_i32(as, (int32_t)k); |
1526 | emit_rmro(as, XO_MOVmi, r64, RID_RET, sizeof(GCcdata)); | 1800 | emit_rmro(as, XO_MOVmi, r64, RID_RET, sizeof(GCcdata)); |
@@ -1551,15 +1825,26 @@ static void asm_cnew(ASMState *as, IRIns *ir) | |||
1551 | } while (1); | 1825 | } while (1); |
1552 | #endif | 1826 | #endif |
1553 | lua_assert(sz == 4 || sz == 8); | 1827 | lua_assert(sz == 4 || sz == 8); |
1828 | } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ | ||
1829 | ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; | ||
1830 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1831 | args[1] = ir->op1; /* CTypeID id */ | ||
1832 | args[2] = ir->op2; /* CTSize sz */ | ||
1833 | args[3] = ASMREF_TMP1; /* CTSize align */ | ||
1834 | asm_gencall(as, ci, args); | ||
1835 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); | ||
1836 | return; | ||
1554 | } | 1837 | } |
1555 | 1838 | ||
1556 | /* Combine initialization of marked, gct and ctypeid. */ | 1839 | /* Combine initialization of marked, gct and ctypeid. */ |
1557 | emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked)); | 1840 | emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked)); |
1558 | emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX, | 1841 | emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX, |
1559 | (int32_t)((~LJ_TCDATA<<8)+(ctypeid<<16))); | 1842 | (int32_t)((~LJ_TCDATA<<8)+(id<<16))); |
1560 | emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES); | 1843 | emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES); |
1561 | emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite); | 1844 | emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite); |
1562 | 1845 | ||
1846 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1847 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1563 | asm_gencall(as, ci, args); | 1848 | asm_gencall(as, ci, args); |
1564 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata))); | 1849 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata))); |
1565 | } | 1850 | } |
@@ -1574,7 +1859,7 @@ static void asm_tbar(ASMState *as, IRIns *ir) | |||
1574 | Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); | 1859 | Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); |
1575 | Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, tab)); | 1860 | Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, tab)); |
1576 | MCLabel l_end = emit_label(as); | 1861 | MCLabel l_end = emit_label(as); |
1577 | emit_movtomro(as, tmp, tab, offsetof(GCtab, gclist)); | 1862 | emit_movtomro(as, tmp|REX_GC64, tab, offsetof(GCtab, gclist)); |
1578 | emit_setgl(as, tab, gc.grayagain); | 1863 | emit_setgl(as, tab, gc.grayagain); |
1579 | emit_getgl(as, tmp, gc.grayagain); | 1864 | emit_getgl(as, tmp, gc.grayagain); |
1580 | emit_i8(as, ~LJ_GC_BLACK); | 1865 | emit_i8(as, ~LJ_GC_BLACK); |
@@ -1637,36 +1922,9 @@ static void asm_x87load(ASMState *as, IRRef ref) | |||
1637 | } | 1922 | } |
1638 | } | 1923 | } |
1639 | 1924 | ||
1640 | /* Try to rejoin pow from EXP2, MUL and LOG2 (if still unsplit). */ | ||
1641 | static int fpmjoin_pow(ASMState *as, IRIns *ir) | ||
1642 | { | ||
1643 | IRIns *irp = IR(ir->op1); | ||
1644 | if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { | ||
1645 | IRIns *irpp = IR(irp->op1); | ||
1646 | if (irpp == ir-2 && irpp->o == IR_FPMATH && | ||
1647 | irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { | ||
1648 | /* The modified regs must match with the *.dasc implementation. */ | ||
1649 | RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX); | ||
1650 | IRIns *irx; | ||
1651 | if (ra_hasreg(ir->r)) | ||
1652 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | ||
1653 | ra_evictset(as, drop); | ||
1654 | ra_destreg(as, ir, RID_XMM0); | ||
1655 | emit_call(as, lj_vm_pow_sse); | ||
1656 | irx = IR(irpp->op1); | ||
1657 | if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1) | ||
1658 | irx->r = RID_INIT; /* Avoid allocating xmm1 for x. */ | ||
1659 | ra_left(as, RID_XMM0, irpp->op1); | ||
1660 | ra_left(as, RID_XMM1, irp->op2); | ||
1661 | return 1; | ||
1662 | } | ||
1663 | } | ||
1664 | return 0; | ||
1665 | } | ||
1666 | |||
1667 | static void asm_fpmath(ASMState *as, IRIns *ir) | 1925 | static void asm_fpmath(ASMState *as, IRIns *ir) |
1668 | { | 1926 | { |
1669 | IRFPMathOp fpm = ir->o == IR_FPMATH ? (IRFPMathOp)ir->op2 : IRFPM_OTHER; | 1927 | IRFPMathOp fpm = (IRFPMathOp)ir->op2; |
1670 | if (fpm == IRFPM_SQRT) { | 1928 | if (fpm == IRFPM_SQRT) { |
1671 | Reg dest = ra_dest(as, ir, RSET_FPR); | 1929 | Reg dest = ra_dest(as, ir, RSET_FPR); |
1672 | Reg left = asm_fuseload(as, ir->op1, RSET_FPR); | 1930 | Reg left = asm_fuseload(as, ir->op1, RSET_FPR); |
@@ -1697,51 +1955,29 @@ static void asm_fpmath(ASMState *as, IRIns *ir) | |||
1697 | fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse); | 1955 | fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse); |
1698 | ra_left(as, RID_XMM0, ir->op1); | 1956 | ra_left(as, RID_XMM0, ir->op1); |
1699 | } | 1957 | } |
1700 | } else if (fpm == IRFPM_EXP2 && fpmjoin_pow(as, ir)) { | 1958 | } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) { |
1701 | /* Rejoined to pow(). */ | 1959 | /* Rejoined to pow(). */ |
1702 | } else { /* Handle x87 ops. */ | 1960 | } else { |
1703 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ | 1961 | asm_callid(as, ir, IRCALL_lj_vm_floor + fpm); |
1704 | Reg dest = ir->r; | 1962 | } |
1705 | if (ra_hasreg(dest)) { | 1963 | } |
1706 | ra_free(as, dest); | 1964 | |
1707 | ra_modified(as, dest); | 1965 | #define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) |
1708 | emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs); | 1966 | |
1709 | } | 1967 | static void asm_ldexp(ASMState *as, IRIns *ir) |
1710 | emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); | 1968 | { |
1711 | switch (fpm) { /* st0 = lj_vm_*(st0) */ | 1969 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ |
1712 | case IRFPM_EXP: emit_call(as, lj_vm_exp_x87); break; | 1970 | Reg dest = ir->r; |
1713 | case IRFPM_EXP2: emit_call(as, lj_vm_exp2_x87); break; | 1971 | if (ra_hasreg(dest)) { |
1714 | case IRFPM_SIN: emit_x87op(as, XI_FSIN); break; | 1972 | ra_free(as, dest); |
1715 | case IRFPM_COS: emit_x87op(as, XI_FCOS); break; | 1973 | ra_modified(as, dest); |
1716 | case IRFPM_TAN: emit_x87op(as, XI_FPOP); emit_x87op(as, XI_FPTAN); break; | 1974 | emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs); |
1717 | case IRFPM_LOG: case IRFPM_LOG2: case IRFPM_LOG10: | ||
1718 | /* Note: the use of fyl2xp1 would be pointless here. When computing | ||
1719 | ** log(1.0+eps) the precision is already lost after 1.0 is added. | ||
1720 | ** Subtracting 1.0 won't recover it. OTOH math.log1p would make sense. | ||
1721 | */ | ||
1722 | emit_x87op(as, XI_FYL2X); break; | ||
1723 | case IRFPM_OTHER: | ||
1724 | switch (ir->o) { | ||
1725 | case IR_ATAN2: | ||
1726 | emit_x87op(as, XI_FPATAN); asm_x87load(as, ir->op2); break; | ||
1727 | case IR_LDEXP: | ||
1728 | emit_x87op(as, XI_FPOP1); emit_x87op(as, XI_FSCALE); break; | ||
1729 | default: lua_assert(0); break; | ||
1730 | } | ||
1731 | break; | ||
1732 | default: lua_assert(0); break; | ||
1733 | } | ||
1734 | asm_x87load(as, ir->op1); | ||
1735 | switch (fpm) { | ||
1736 | case IRFPM_LOG: emit_x87op(as, XI_FLDLN2); break; | ||
1737 | case IRFPM_LOG2: emit_x87op(as, XI_FLD1); break; | ||
1738 | case IRFPM_LOG10: emit_x87op(as, XI_FLDLG2); break; | ||
1739 | case IRFPM_OTHER: | ||
1740 | if (ir->o == IR_LDEXP) asm_x87load(as, ir->op2); | ||
1741 | break; | ||
1742 | default: break; | ||
1743 | } | ||
1744 | } | 1975 | } |
1976 | emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); | ||
1977 | emit_x87op(as, XI_FPOP1); | ||
1978 | emit_x87op(as, XI_FSCALE); | ||
1979 | asm_x87load(as, ir->op1); | ||
1980 | asm_x87load(as, ir->op2); | ||
1745 | } | 1981 | } |
1746 | 1982 | ||
1747 | static void asm_fppowi(ASMState *as, IRIns *ir) | 1983 | static void asm_fppowi(ASMState *as, IRIns *ir) |
@@ -1757,26 +1993,15 @@ static void asm_fppowi(ASMState *as, IRIns *ir) | |||
1757 | ra_left(as, RID_EAX, ir->op2); | 1993 | ra_left(as, RID_EAX, ir->op2); |
1758 | } | 1994 | } |
1759 | 1995 | ||
1760 | #if LJ_64 && LJ_HASFFI | 1996 | static void asm_pow(ASMState *as, IRIns *ir) |
1761 | static void asm_arith64(ASMState *as, IRIns *ir, IRCallID id) | ||
1762 | { | 1997 | { |
1763 | const CCallInfo *ci = &lj_ir_callinfo[id]; | 1998 | #if LJ_64 && LJ_HASFFI |
1764 | IRRef args[2]; | 1999 | if (!irt_isnum(ir->t)) |
1765 | args[0] = ir->op1; | 2000 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : |
1766 | args[1] = ir->op2; | 2001 | IRCALL_lj_carith_powu64); |
1767 | asm_setupresult(as, ir, ci); | 2002 | else |
1768 | asm_gencall(as, ci, args); | ||
1769 | } | ||
1770 | #endif | 2003 | #endif |
1771 | 2004 | asm_fppowi(as, ir); | |
1772 | static void asm_intmod(ASMState *as, IRIns *ir) | ||
1773 | { | ||
1774 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_vm_modi]; | ||
1775 | IRRef args[2]; | ||
1776 | args[0] = ir->op1; | ||
1777 | args[1] = ir->op2; | ||
1778 | asm_setupresult(as, ir, ci); | ||
1779 | asm_gencall(as, ci, args); | ||
1780 | } | 2005 | } |
1781 | 2006 | ||
1782 | static int asm_swapops(ASMState *as, IRIns *ir) | 2007 | static int asm_swapops(ASMState *as, IRIns *ir) |
@@ -1959,6 +2184,44 @@ static void asm_add(ASMState *as, IRIns *ir) | |||
1959 | asm_intarith(as, ir, XOg_ADD); | 2184 | asm_intarith(as, ir, XOg_ADD); |
1960 | } | 2185 | } |
1961 | 2186 | ||
2187 | static void asm_sub(ASMState *as, IRIns *ir) | ||
2188 | { | ||
2189 | if (irt_isnum(ir->t)) | ||
2190 | asm_fparith(as, ir, XO_SUBSD); | ||
2191 | else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */ | ||
2192 | asm_intarith(as, ir, XOg_SUB); | ||
2193 | } | ||
2194 | |||
2195 | static void asm_mul(ASMState *as, IRIns *ir) | ||
2196 | { | ||
2197 | if (irt_isnum(ir->t)) | ||
2198 | asm_fparith(as, ir, XO_MULSD); | ||
2199 | else | ||
2200 | asm_intarith(as, ir, XOg_X_IMUL); | ||
2201 | } | ||
2202 | |||
2203 | static void asm_div(ASMState *as, IRIns *ir) | ||
2204 | { | ||
2205 | #if LJ_64 && LJ_HASFFI | ||
2206 | if (!irt_isnum(ir->t)) | ||
2207 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : | ||
2208 | IRCALL_lj_carith_divu64); | ||
2209 | else | ||
2210 | #endif | ||
2211 | asm_fparith(as, ir, XO_DIVSD); | ||
2212 | } | ||
2213 | |||
2214 | static void asm_mod(ASMState *as, IRIns *ir) | ||
2215 | { | ||
2216 | #if LJ_64 && LJ_HASFFI | ||
2217 | if (!irt_isint(ir->t)) | ||
2218 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : | ||
2219 | IRCALL_lj_carith_modu64); | ||
2220 | else | ||
2221 | #endif | ||
2222 | asm_callid(as, ir, IRCALL_lj_vm_modi); | ||
2223 | } | ||
2224 | |||
1962 | static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg) | 2225 | static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg) |
1963 | { | 2226 | { |
1964 | Reg dest = ra_dest(as, ir, RSET_GPR); | 2227 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -1966,7 +2229,17 @@ static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg) | |||
1966 | ra_left(as, dest, ir->op1); | 2229 | ra_left(as, dest, ir->op1); |
1967 | } | 2230 | } |
1968 | 2231 | ||
1969 | static void asm_min_max(ASMState *as, IRIns *ir, int cc) | 2232 | static void asm_neg(ASMState *as, IRIns *ir) |
2233 | { | ||
2234 | if (irt_isnum(ir->t)) | ||
2235 | asm_fparith(as, ir, XO_XORPS); | ||
2236 | else | ||
2237 | asm_neg_not(as, ir, XOg_NEG); | ||
2238 | } | ||
2239 | |||
2240 | #define asm_abs(as, ir) asm_fparith(as, ir, XO_ANDPS) | ||
2241 | |||
2242 | static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) | ||
1970 | { | 2243 | { |
1971 | Reg right, dest = ra_dest(as, ir, RSET_GPR); | 2244 | Reg right, dest = ra_dest(as, ir, RSET_GPR); |
1972 | IRRef lref = ir->op1, rref = ir->op2; | 2245 | IRRef lref = ir->op1, rref = ir->op2; |
@@ -1977,7 +2250,30 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc) | |||
1977 | ra_left(as, dest, lref); | 2250 | ra_left(as, dest, lref); |
1978 | } | 2251 | } |
1979 | 2252 | ||
1980 | static void asm_bitswap(ASMState *as, IRIns *ir) | 2253 | static void asm_min(ASMState *as, IRIns *ir) |
2254 | { | ||
2255 | if (irt_isnum(ir->t)) | ||
2256 | asm_fparith(as, ir, XO_MINSD); | ||
2257 | else | ||
2258 | asm_intmin_max(as, ir, CC_G); | ||
2259 | } | ||
2260 | |||
2261 | static void asm_max(ASMState *as, IRIns *ir) | ||
2262 | { | ||
2263 | if (irt_isnum(ir->t)) | ||
2264 | asm_fparith(as, ir, XO_MAXSD); | ||
2265 | else | ||
2266 | asm_intmin_max(as, ir, CC_L); | ||
2267 | } | ||
2268 | |||
2269 | /* Note: don't use LEA for overflow-checking arithmetic! */ | ||
2270 | #define asm_addov(as, ir) asm_intarith(as, ir, XOg_ADD) | ||
2271 | #define asm_subov(as, ir) asm_intarith(as, ir, XOg_SUB) | ||
2272 | #define asm_mulov(as, ir) asm_intarith(as, ir, XOg_X_IMUL) | ||
2273 | |||
2274 | #define asm_bnot(as, ir) asm_neg_not(as, ir, XOg_NOT) | ||
2275 | |||
2276 | static void asm_bswap(ASMState *as, IRIns *ir) | ||
1981 | { | 2277 | { |
1982 | Reg dest = ra_dest(as, ir, RSET_GPR); | 2278 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1983 | as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24), | 2279 | as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24), |
@@ -1985,7 +2281,11 @@ static void asm_bitswap(ASMState *as, IRIns *ir) | |||
1985 | ra_left(as, dest, ir->op1); | 2281 | ra_left(as, dest, ir->op1); |
1986 | } | 2282 | } |
1987 | 2283 | ||
1988 | static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) | 2284 | #define asm_band(as, ir) asm_intarith(as, ir, XOg_AND) |
2285 | #define asm_bor(as, ir) asm_intarith(as, ir, XOg_OR) | ||
2286 | #define asm_bxor(as, ir) asm_intarith(as, ir, XOg_XOR) | ||
2287 | |||
2288 | static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs, x86Op xv) | ||
1989 | { | 2289 | { |
1990 | IRRef rref = ir->op2; | 2290 | IRRef rref = ir->op2; |
1991 | IRIns *irr = IR(rref); | 2291 | IRIns *irr = IR(rref); |
@@ -1994,11 +2294,27 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) | |||
1994 | int shift; | 2294 | int shift; |
1995 | dest = ra_dest(as, ir, RSET_GPR); | 2295 | dest = ra_dest(as, ir, RSET_GPR); |
1996 | shift = irr->i & (irt_is64(ir->t) ? 63 : 31); | 2296 | shift = irr->i & (irt_is64(ir->t) ? 63 : 31); |
2297 | if (!xv && shift && (as->flags & JIT_F_BMI2)) { | ||
2298 | Reg left = asm_fuseloadm(as, ir->op1, RSET_GPR, irt_is64(ir->t)); | ||
2299 | if (left != dest) { /* BMI2 rotate right by constant. */ | ||
2300 | emit_i8(as, xs == XOg_ROL ? -shift : shift); | ||
2301 | emit_mrm(as, VEX_64IR(ir, XV_RORX), dest, left); | ||
2302 | return; | ||
2303 | } | ||
2304 | } | ||
1997 | switch (shift) { | 2305 | switch (shift) { |
1998 | case 0: break; | 2306 | case 0: break; |
1999 | case 1: emit_rr(as, XO_SHIFT1, REX_64IR(ir, xs), dest); break; | 2307 | case 1: emit_rr(as, XO_SHIFT1, REX_64IR(ir, xs), dest); break; |
2000 | default: emit_shifti(as, REX_64IR(ir, xs), dest, shift); break; | 2308 | default: emit_shifti(as, REX_64IR(ir, xs), dest, shift); break; |
2001 | } | 2309 | } |
2310 | } else if ((as->flags & JIT_F_BMI2) && xv) { /* BMI2 variable shifts. */ | ||
2311 | Reg left, right; | ||
2312 | dest = ra_dest(as, ir, RSET_GPR); | ||
2313 | right = ra_alloc1(as, rref, RSET_GPR); | ||
2314 | left = asm_fuseloadm(as, ir->op1, rset_exclude(RSET_GPR, right), | ||
2315 | irt_is64(ir->t)); | ||
2316 | emit_mrm(as, VEX_64IR(ir, xv) ^ (right << 19), dest, left); | ||
2317 | return; | ||
2002 | } else { /* Variable shifts implicitly use register cl (i.e. ecx). */ | 2318 | } else { /* Variable shifts implicitly use register cl (i.e. ecx). */ |
2003 | Reg right; | 2319 | Reg right; |
2004 | dest = ra_dest(as, ir, rset_exclude(RSET_GPR, RID_ECX)); | 2320 | dest = ra_dest(as, ir, rset_exclude(RSET_GPR, RID_ECX)); |
@@ -2024,6 +2340,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) | |||
2024 | */ | 2340 | */ |
2025 | } | 2341 | } |
2026 | 2342 | ||
2343 | #define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL, XV_SHLX) | ||
2344 | #define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR, XV_SHRX) | ||
2345 | #define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR, XV_SARX) | ||
2346 | #define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL, 0) | ||
2347 | #define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR, 0) | ||
2348 | |||
2027 | /* -- Comparisons --------------------------------------------------------- */ | 2349 | /* -- Comparisons --------------------------------------------------------- */ |
2028 | 2350 | ||
2029 | /* Virtual flags for unordered FP comparisons. */ | 2351 | /* Virtual flags for unordered FP comparisons. */ |
@@ -2050,8 +2372,9 @@ static const uint16_t asm_compmap[IR_ABC+1] = { | |||
2050 | }; | 2372 | }; |
2051 | 2373 | ||
2052 | /* FP and integer comparisons. */ | 2374 | /* FP and integer comparisons. */ |
2053 | static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) | 2375 | static void asm_comp(ASMState *as, IRIns *ir) |
2054 | { | 2376 | { |
2377 | uint32_t cc = asm_compmap[ir->o]; | ||
2055 | if (irt_isnum(ir->t)) { | 2378 | if (irt_isnum(ir->t)) { |
2056 | IRRef lref = ir->op1; | 2379 | IRRef lref = ir->op1; |
2057 | IRRef rref = ir->op2; | 2380 | IRRef rref = ir->op2; |
@@ -2072,7 +2395,6 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) | |||
2072 | cc ^= (VCC_PS|(5<<4)); /* A <-> B, AE <-> BE, PS <-> none */ | 2395 | cc ^= (VCC_PS|(5<<4)); /* A <-> B, AE <-> BE, PS <-> none */ |
2073 | } | 2396 | } |
2074 | left = ra_alloc1(as, lref, RSET_FPR); | 2397 | left = ra_alloc1(as, lref, RSET_FPR); |
2075 | right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left)); | ||
2076 | l_around = emit_label(as); | 2398 | l_around = emit_label(as); |
2077 | asm_guardcc(as, cc >> 4); | 2399 | asm_guardcc(as, cc >> 4); |
2078 | if (cc & VCC_P) { /* Extra CC_P branch required? */ | 2400 | if (cc & VCC_P) { /* Extra CC_P branch required? */ |
@@ -2089,6 +2411,7 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) | |||
2089 | emit_jcc(as, CC_P, as->mcp); | 2411 | emit_jcc(as, CC_P, as->mcp); |
2090 | } | 2412 | } |
2091 | } | 2413 | } |
2414 | right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left)); | ||
2092 | emit_mrm(as, XO_UCOMISD, left, right); | 2415 | emit_mrm(as, XO_UCOMISD, left, right); |
2093 | } else { | 2416 | } else { |
2094 | IRRef lref = ir->op1, rref = ir->op2; | 2417 | IRRef lref = ir->op1, rref = ir->op2; |
@@ -2206,6 +2529,8 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) | |||
2206 | } | 2529 | } |
2207 | } | 2530 | } |
2208 | 2531 | ||
2532 | #define asm_equal(as, ir) asm_comp(as, ir) | ||
2533 | |||
2209 | #if LJ_32 && LJ_HASFFI | 2534 | #if LJ_32 && LJ_HASFFI |
2210 | /* 64 bit integer comparisons in 32 bit mode. */ | 2535 | /* 64 bit integer comparisons in 32 bit mode. */ |
2211 | static void asm_comp_int64(ASMState *as, IRIns *ir) | 2536 | static void asm_comp_int64(ASMState *as, IRIns *ir) |
@@ -2288,13 +2613,9 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
2288 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ | 2613 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ |
2289 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; | 2614 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; |
2290 | if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ | 2615 | if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ |
2291 | if (usehi || uselo) { | ||
2292 | if (irt_isfp(ir->t)) | ||
2293 | asm_conv_fp_int64(as, ir); | ||
2294 | else | ||
2295 | asm_conv_int64_fp(as, ir); | ||
2296 | } | ||
2297 | as->curins--; /* Always skip the CONV. */ | 2616 | as->curins--; /* Always skip the CONV. */ |
2617 | if (usehi || uselo) | ||
2618 | asm_conv64(as, ir); | ||
2298 | return; | 2619 | return; |
2299 | } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ | 2620 | } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ |
2300 | asm_comp_int64(as, ir); | 2621 | asm_comp_int64(as, ir); |
@@ -2343,6 +2664,16 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
2343 | #endif | 2664 | #endif |
2344 | } | 2665 | } |
2345 | 2666 | ||
2667 | /* -- Profiling ----------------------------------------------------------- */ | ||
2668 | |||
2669 | static void asm_prof(ASMState *as, IRIns *ir) | ||
2670 | { | ||
2671 | UNUSED(ir); | ||
2672 | asm_guardcc(as, CC_NE); | ||
2673 | emit_i8(as, HOOK_PROFILE); | ||
2674 | emit_rma(as, XO_GROUP3b, XOg_TEST, &J2G(as->J)->hookmask); | ||
2675 | } | ||
2676 | |||
2346 | /* -- Stack handling ------------------------------------------------------ */ | 2677 | /* -- Stack handling ------------------------------------------------------ */ |
2347 | 2678 | ||
2348 | /* Check Lua stack size for overflow. Use exit handler as fallback. */ | 2679 | /* Check Lua stack size for overflow. Use exit handler as fallback. */ |
@@ -2357,14 +2688,19 @@ static void asm_stack_check(ASMState *as, BCReg topslot, | |||
2357 | emit_rmro(as, XO_MOV, r|REX_64, RID_ESP, 0); | 2688 | emit_rmro(as, XO_MOV, r|REX_64, RID_ESP, 0); |
2358 | else | 2689 | else |
2359 | ra_modified(as, r); | 2690 | ra_modified(as, r); |
2360 | emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*topslot)); | 2691 | emit_gri(as, XG_ARITHi(XOg_CMP), r|REX_GC64, (int32_t)(8*topslot)); |
2361 | if (ra_hasreg(pbase) && pbase != r) | 2692 | if (ra_hasreg(pbase) && pbase != r) |
2362 | emit_rr(as, XO_ARITH(XOg_SUB), r, pbase); | 2693 | emit_rr(as, XO_ARITH(XOg_SUB), r|REX_GC64, pbase); |
2363 | else | 2694 | else |
2695 | #if LJ_GC64 | ||
2696 | emit_rmro(as, XO_ARITH(XOg_SUB), r|REX_64, RID_DISPATCH, | ||
2697 | (int32_t)dispofs(as, &J2G(as->J)->jit_base)); | ||
2698 | #else | ||
2364 | emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE, | 2699 | emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE, |
2365 | ptr2addr(&J2G(as->J)->jit_base)); | 2700 | ptr2addr(&J2G(as->J)->jit_base)); |
2366 | emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack)); | 2701 | #endif |
2367 | emit_getgl(as, r, jit_L); | 2702 | emit_rmro(as, XO_MOV, r|REX_GC64, r, offsetof(lua_State, maxstack)); |
2703 | emit_getgl(as, r, cur_L); | ||
2368 | if (allow == RSET_EMPTY) /* Spill temp. register. */ | 2704 | if (allow == RSET_EMPTY) /* Spill temp. register. */ |
2369 | emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0); | 2705 | emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0); |
2370 | } | 2706 | } |
@@ -2373,13 +2709,15 @@ static void asm_stack_check(ASMState *as, BCReg topslot, | |||
2373 | static void asm_stack_restore(ASMState *as, SnapShot *snap) | 2709 | static void asm_stack_restore(ASMState *as, SnapShot *snap) |
2374 | { | 2710 | { |
2375 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; | 2711 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; |
2376 | SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; | 2712 | #if !LJ_FR2 || defined(LUA_USE_ASSERT) |
2713 | SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; | ||
2714 | #endif | ||
2377 | MSize n, nent = snap->nent; | 2715 | MSize n, nent = snap->nent; |
2378 | /* Store the value of all modified slots to the Lua stack. */ | 2716 | /* Store the value of all modified slots to the Lua stack. */ |
2379 | for (n = 0; n < nent; n++) { | 2717 | for (n = 0; n < nent; n++) { |
2380 | SnapEntry sn = map[n]; | 2718 | SnapEntry sn = map[n]; |
2381 | BCReg s = snap_slot(sn); | 2719 | BCReg s = snap_slot(sn); |
2382 | int32_t ofs = 8*((int32_t)s-1); | 2720 | int32_t ofs = 8*((int32_t)s-1-LJ_FR2); |
2383 | IRRef ref = snap_ref(sn); | 2721 | IRRef ref = snap_ref(sn); |
2384 | IRIns *ir = IR(ref); | 2722 | IRIns *ir = IR(ref); |
2385 | if ((sn & SNAP_NORESTORE)) | 2723 | if ((sn & SNAP_NORESTORE)) |
@@ -2392,16 +2730,44 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
2392 | (LJ_DUALNUM && irt_isinteger(ir->t))); | 2730 | (LJ_DUALNUM && irt_isinteger(ir->t))); |
2393 | if (!irref_isk(ref)) { | 2731 | if (!irref_isk(ref)) { |
2394 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); | 2732 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); |
2733 | #if LJ_GC64 | ||
2734 | if (irt_is64(ir->t)) { | ||
2735 | /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */ | ||
2736 | emit_u32(as, irt_toitype(ir->t) << 15); | ||
2737 | emit_rmro(as, XO_ARITHi, XOg_OR, RID_BASE, ofs+4); | ||
2738 | } else if (LJ_DUALNUM && irt_isinteger(ir->t)) { | ||
2739 | emit_movmroi(as, RID_BASE, ofs+4, LJ_TISNUM << 15); | ||
2740 | } else { | ||
2741 | emit_movmroi(as, RID_BASE, ofs+4, (irt_toitype(ir->t)<<15)|0x7fff); | ||
2742 | } | ||
2743 | #endif | ||
2395 | emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs); | 2744 | emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs); |
2745 | #if LJ_GC64 | ||
2746 | } else { | ||
2747 | TValue k; | ||
2748 | lj_ir_kvalue(as->J->L, &k, ir); | ||
2749 | if (tvisnil(&k)) { | ||
2750 | emit_i32(as, -1); | ||
2751 | emit_rmro(as, XO_MOVmi, REX_64, RID_BASE, ofs); | ||
2752 | } else { | ||
2753 | emit_movmroi(as, RID_BASE, ofs+4, k.u32.hi); | ||
2754 | emit_movmroi(as, RID_BASE, ofs, k.u32.lo); | ||
2755 | } | ||
2756 | #else | ||
2396 | } else if (!irt_ispri(ir->t)) { | 2757 | } else if (!irt_ispri(ir->t)) { |
2397 | emit_movmroi(as, RID_BASE, ofs, ir->i); | 2758 | emit_movmroi(as, RID_BASE, ofs, ir->i); |
2759 | #endif | ||
2398 | } | 2760 | } |
2399 | if ((sn & (SNAP_CONT|SNAP_FRAME))) { | 2761 | if ((sn & (SNAP_CONT|SNAP_FRAME))) { |
2762 | #if !LJ_FR2 | ||
2400 | if (s != 0) /* Do not overwrite link to previous frame. */ | 2763 | if (s != 0) /* Do not overwrite link to previous frame. */ |
2401 | emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--)); | 2764 | emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--)); |
2765 | #endif | ||
2766 | #if !LJ_GC64 | ||
2402 | } else { | 2767 | } else { |
2403 | if (!(LJ_64 && irt_islightud(ir->t))) | 2768 | if (!(LJ_64 && irt_islightud(ir->t))) |
2404 | emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); | 2769 | emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); |
2770 | #endif | ||
2405 | } | 2771 | } |
2406 | } | 2772 | } |
2407 | checkmclim(as); | 2773 | checkmclim(as); |
@@ -2427,11 +2793,15 @@ static void asm_gc_check(ASMState *as) | |||
2427 | args[1] = ASMREF_TMP2; /* MSize steps */ | 2793 | args[1] = ASMREF_TMP2; /* MSize steps */ |
2428 | asm_gencall(as, ci, args); | 2794 | asm_gencall(as, ci, args); |
2429 | tmp = ra_releasetmp(as, ASMREF_TMP1); | 2795 | tmp = ra_releasetmp(as, ASMREF_TMP1); |
2796 | #if LJ_GC64 | ||
2797 | emit_rmro(as, XO_LEA, tmp|REX_64, RID_DISPATCH, GG_DISP2G); | ||
2798 | #else | ||
2430 | emit_loada(as, tmp, J2G(as->J)); | 2799 | emit_loada(as, tmp, J2G(as->J)); |
2800 | #endif | ||
2431 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP2), as->gcsteps); | 2801 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP2), as->gcsteps); |
2432 | /* Jump around GC step if GC total < GC threshold. */ | 2802 | /* Jump around GC step if GC total < GC threshold. */ |
2433 | emit_sjcc(as, CC_B, l_end); | 2803 | emit_sjcc(as, CC_B, l_end); |
2434 | emit_opgl(as, XO_ARITH(XOg_CMP), tmp, gc.threshold); | 2804 | emit_opgl(as, XO_ARITH(XOg_CMP), tmp|REX_GC64, gc.threshold); |
2435 | emit_getgl(as, tmp, gc.total); | 2805 | emit_getgl(as, tmp, gc.total); |
2436 | as->gcsteps = 0; | 2806 | as->gcsteps = 0; |
2437 | checkmclim(as); | 2807 | checkmclim(as); |
@@ -2496,7 +2866,7 @@ static void asm_head_root_base(ASMState *as) | |||
2496 | if (rset_test(as->modset, r) || irt_ismarked(ir->t)) | 2866 | if (rset_test(as->modset, r) || irt_ismarked(ir->t)) |
2497 | ir->r = RID_INIT; /* No inheritance for modified BASE register. */ | 2867 | ir->r = RID_INIT; /* No inheritance for modified BASE register. */ |
2498 | if (r != RID_BASE) | 2868 | if (r != RID_BASE) |
2499 | emit_rr(as, XO_MOV, r, RID_BASE); | 2869 | emit_rr(as, XO_MOV, r|REX_GC64, RID_BASE); |
2500 | } | 2870 | } |
2501 | } | 2871 | } |
2502 | 2872 | ||
@@ -2512,8 +2882,9 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) | |||
2512 | if (irp->r == r) { | 2882 | if (irp->r == r) { |
2513 | rset_clear(allow, r); /* Mark same BASE register as coalesced. */ | 2883 | rset_clear(allow, r); /* Mark same BASE register as coalesced. */ |
2514 | } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { | 2884 | } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { |
2885 | /* Move from coalesced parent reg. */ | ||
2515 | rset_clear(allow, irp->r); | 2886 | rset_clear(allow, irp->r); |
2516 | emit_rr(as, XO_MOV, r, irp->r); /* Move from coalesced parent reg. */ | 2887 | emit_rr(as, XO_MOV, r|REX_GC64, irp->r); |
2517 | } else { | 2888 | } else { |
2518 | emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ | 2889 | emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ |
2519 | } | 2890 | } |
@@ -2592,163 +2963,6 @@ static void asm_tail_prep(ASMState *as) | |||
2592 | } | 2963 | } |
2593 | } | 2964 | } |
2594 | 2965 | ||
2595 | /* -- Instruction dispatch ------------------------------------------------ */ | ||
2596 | |||
2597 | /* Assemble a single instruction. */ | ||
2598 | static void asm_ir(ASMState *as, IRIns *ir) | ||
2599 | { | ||
2600 | switch ((IROp)ir->o) { | ||
2601 | /* Miscellaneous ops. */ | ||
2602 | case IR_LOOP: asm_loop(as); break; | ||
2603 | case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; | ||
2604 | case IR_USE: | ||
2605 | ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; | ||
2606 | case IR_PHI: asm_phi(as, ir); break; | ||
2607 | case IR_HIOP: asm_hiop(as, ir); break; | ||
2608 | case IR_GCSTEP: asm_gcstep(as, ir); break; | ||
2609 | |||
2610 | /* Guarded assertions. */ | ||
2611 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: | ||
2612 | case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: | ||
2613 | case IR_EQ: case IR_NE: case IR_ABC: | ||
2614 | asm_comp(as, ir, asm_compmap[ir->o]); | ||
2615 | break; | ||
2616 | |||
2617 | case IR_RETF: asm_retf(as, ir); break; | ||
2618 | |||
2619 | /* Bit ops. */ | ||
2620 | case IR_BNOT: asm_neg_not(as, ir, XOg_NOT); break; | ||
2621 | case IR_BSWAP: asm_bitswap(as, ir); break; | ||
2622 | |||
2623 | case IR_BAND: asm_intarith(as, ir, XOg_AND); break; | ||
2624 | case IR_BOR: asm_intarith(as, ir, XOg_OR); break; | ||
2625 | case IR_BXOR: asm_intarith(as, ir, XOg_XOR); break; | ||
2626 | |||
2627 | case IR_BSHL: asm_bitshift(as, ir, XOg_SHL); break; | ||
2628 | case IR_BSHR: asm_bitshift(as, ir, XOg_SHR); break; | ||
2629 | case IR_BSAR: asm_bitshift(as, ir, XOg_SAR); break; | ||
2630 | case IR_BROL: asm_bitshift(as, ir, XOg_ROL); break; | ||
2631 | case IR_BROR: asm_bitshift(as, ir, XOg_ROR); break; | ||
2632 | |||
2633 | /* Arithmetic ops. */ | ||
2634 | case IR_ADD: asm_add(as, ir); break; | ||
2635 | case IR_SUB: | ||
2636 | if (irt_isnum(ir->t)) | ||
2637 | asm_fparith(as, ir, XO_SUBSD); | ||
2638 | else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */ | ||
2639 | asm_intarith(as, ir, XOg_SUB); | ||
2640 | break; | ||
2641 | case IR_MUL: | ||
2642 | if (irt_isnum(ir->t)) | ||
2643 | asm_fparith(as, ir, XO_MULSD); | ||
2644 | else | ||
2645 | asm_intarith(as, ir, XOg_X_IMUL); | ||
2646 | break; | ||
2647 | case IR_DIV: | ||
2648 | #if LJ_64 && LJ_HASFFI | ||
2649 | if (!irt_isnum(ir->t)) | ||
2650 | asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : | ||
2651 | IRCALL_lj_carith_divu64); | ||
2652 | else | ||
2653 | #endif | ||
2654 | asm_fparith(as, ir, XO_DIVSD); | ||
2655 | break; | ||
2656 | case IR_MOD: | ||
2657 | #if LJ_64 && LJ_HASFFI | ||
2658 | if (!irt_isint(ir->t)) | ||
2659 | asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : | ||
2660 | IRCALL_lj_carith_modu64); | ||
2661 | else | ||
2662 | #endif | ||
2663 | asm_intmod(as, ir); | ||
2664 | break; | ||
2665 | |||
2666 | case IR_NEG: | ||
2667 | if (irt_isnum(ir->t)) | ||
2668 | asm_fparith(as, ir, XO_XORPS); | ||
2669 | else | ||
2670 | asm_neg_not(as, ir, XOg_NEG); | ||
2671 | break; | ||
2672 | case IR_ABS: asm_fparith(as, ir, XO_ANDPS); break; | ||
2673 | |||
2674 | case IR_MIN: | ||
2675 | if (irt_isnum(ir->t)) | ||
2676 | asm_fparith(as, ir, XO_MINSD); | ||
2677 | else | ||
2678 | asm_min_max(as, ir, CC_G); | ||
2679 | break; | ||
2680 | case IR_MAX: | ||
2681 | if (irt_isnum(ir->t)) | ||
2682 | asm_fparith(as, ir, XO_MAXSD); | ||
2683 | else | ||
2684 | asm_min_max(as, ir, CC_L); | ||
2685 | break; | ||
2686 | |||
2687 | case IR_FPMATH: case IR_ATAN2: case IR_LDEXP: | ||
2688 | asm_fpmath(as, ir); | ||
2689 | break; | ||
2690 | case IR_POW: | ||
2691 | #if LJ_64 && LJ_HASFFI | ||
2692 | if (!irt_isnum(ir->t)) | ||
2693 | asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : | ||
2694 | IRCALL_lj_carith_powu64); | ||
2695 | else | ||
2696 | #endif | ||
2697 | asm_fppowi(as, ir); | ||
2698 | break; | ||
2699 | |||
2700 | /* Overflow-checking arithmetic ops. Note: don't use LEA here! */ | ||
2701 | case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break; | ||
2702 | case IR_SUBOV: asm_intarith(as, ir, XOg_SUB); break; | ||
2703 | case IR_MULOV: asm_intarith(as, ir, XOg_X_IMUL); break; | ||
2704 | |||
2705 | /* Memory references. */ | ||
2706 | case IR_AREF: asm_aref(as, ir); break; | ||
2707 | case IR_HREF: asm_href(as, ir); break; | ||
2708 | case IR_HREFK: asm_hrefk(as, ir); break; | ||
2709 | case IR_NEWREF: asm_newref(as, ir); break; | ||
2710 | case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; | ||
2711 | case IR_FREF: asm_fref(as, ir); break; | ||
2712 | case IR_STRREF: asm_strref(as, ir); break; | ||
2713 | |||
2714 | /* Loads and stores. */ | ||
2715 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | ||
2716 | asm_ahuvload(as, ir); | ||
2717 | break; | ||
2718 | case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break; | ||
2719 | case IR_SLOAD: asm_sload(as, ir); break; | ||
2720 | |||
2721 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; | ||
2722 | case IR_FSTORE: case IR_XSTORE: asm_fxstore(as, ir); break; | ||
2723 | |||
2724 | /* Allocations. */ | ||
2725 | case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; | ||
2726 | case IR_TNEW: asm_tnew(as, ir); break; | ||
2727 | case IR_TDUP: asm_tdup(as, ir); break; | ||
2728 | case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; | ||
2729 | |||
2730 | /* Write barriers. */ | ||
2731 | case IR_TBAR: asm_tbar(as, ir); break; | ||
2732 | case IR_OBAR: asm_obar(as, ir); break; | ||
2733 | |||
2734 | /* Type conversions. */ | ||
2735 | case IR_TOBIT: asm_tobit(as, ir); break; | ||
2736 | case IR_CONV: asm_conv(as, ir); break; | ||
2737 | case IR_TOSTR: asm_tostr(as, ir); break; | ||
2738 | case IR_STRTO: asm_strto(as, ir); break; | ||
2739 | |||
2740 | /* Calls. */ | ||
2741 | case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; | ||
2742 | case IR_CALLXS: asm_callx(as, ir); break; | ||
2743 | case IR_CARG: break; | ||
2744 | |||
2745 | default: | ||
2746 | setintV(&as->J->errinfo, ir->o); | ||
2747 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
2748 | break; | ||
2749 | } | ||
2750 | } | ||
2751 | |||
2752 | /* -- Trace setup --------------------------------------------------------- */ | 2966 | /* -- Trace setup --------------------------------------------------------- */ |
2753 | 2967 | ||
2754 | /* Ensure there are enough stack slots for call arguments. */ | 2968 | /* Ensure there are enough stack slots for call arguments. */ |
@@ -2771,6 +2985,7 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
2771 | static void asm_setup_target(ASMState *as) | 2985 | static void asm_setup_target(ASMState *as) |
2772 | { | 2986 | { |
2773 | asm_exitstub_setup(as, as->T->nsnap); | 2987 | asm_exitstub_setup(as, as->T->nsnap); |
2988 | as->mrm.base = 0; | ||
2774 | } | 2989 | } |
2775 | 2990 | ||
2776 | /* -- Trace patching ------------------------------------------------------ */ | 2991 | /* -- Trace patching ------------------------------------------------------ */ |
@@ -2883,13 +3098,19 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) | |||
2883 | MSize len = T->szmcode; | 3098 | MSize len = T->szmcode; |
2884 | MCode *px = exitstub_addr(J, exitno) - 6; | 3099 | MCode *px = exitstub_addr(J, exitno) - 6; |
2885 | MCode *pe = p+len-6; | 3100 | MCode *pe = p+len-6; |
2886 | uint32_t stateaddr = u32ptr(&J2G(J)->vmstate); | 3101 | #if LJ_GC64 |
3102 | uint32_t statei = (uint32_t)(GG_OFS(g.vmstate) - GG_OFS(dispatch)); | ||
3103 | #else | ||
3104 | uint32_t statei = u32ptr(&J2G(J)->vmstate); | ||
3105 | #endif | ||
2887 | if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px) | 3106 | if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px) |
2888 | *(int32_t *)(p+len-4) = jmprel(p+len, target); | 3107 | *(int32_t *)(p+len-4) = jmprel(p+len, target); |
2889 | /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */ | 3108 | /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */ |
2890 | for (; p < pe; p += asm_x86_inslen(p)) | 3109 | for (; p < pe; p += asm_x86_inslen(p)) { |
2891 | if (*(uint32_t *)(p+(LJ_64 ? 3 : 2)) == stateaddr && p[0] == XI_MOVmi) | 3110 | intptr_t ofs = LJ_GC64 ? (p[0] & 0xf0) == 0x40 : LJ_64; |
3111 | if (*(uint32_t *)(p+2+ofs) == statei && p[ofs+LJ_GC64-LJ_64] == XI_MOVmi) | ||
2892 | break; | 3112 | break; |
3113 | } | ||
2893 | lua_assert(p < pe); | 3114 | lua_assert(p < pe); |
2894 | for (; p < pe; p += asm_x86_inslen(p)) | 3115 | for (; p < pe; p += asm_x86_inslen(p)) |
2895 | if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px) | 3116 | if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px) |
diff --git a/src/lj_bc.h b/src/lj_bc.h index 8fd7a2ed..44c78f83 100644 --- a/src/lj_bc.h +++ b/src/lj_bc.h | |||
@@ -89,6 +89,8 @@ | |||
89 | _(ISFC, dst, ___, var, ___) \ | 89 | _(ISFC, dst, ___, var, ___) \ |
90 | _(IST, ___, ___, var, ___) \ | 90 | _(IST, ___, ___, var, ___) \ |
91 | _(ISF, ___, ___, var, ___) \ | 91 | _(ISF, ___, ___, var, ___) \ |
92 | _(ISTYPE, var, ___, lit, ___) \ | ||
93 | _(ISNUM, var, ___, lit, ___) \ | ||
92 | \ | 94 | \ |
93 | /* Unary ops. */ \ | 95 | /* Unary ops. */ \ |
94 | _(MOV, dst, ___, var, ___) \ | 96 | _(MOV, dst, ___, var, ___) \ |
@@ -143,10 +145,12 @@ | |||
143 | _(TGETV, dst, var, var, index) \ | 145 | _(TGETV, dst, var, var, index) \ |
144 | _(TGETS, dst, var, str, index) \ | 146 | _(TGETS, dst, var, str, index) \ |
145 | _(TGETB, dst, var, lit, index) \ | 147 | _(TGETB, dst, var, lit, index) \ |
148 | _(TGETR, dst, var, var, index) \ | ||
146 | _(TSETV, var, var, var, newindex) \ | 149 | _(TSETV, var, var, var, newindex) \ |
147 | _(TSETS, var, var, str, newindex) \ | 150 | _(TSETS, var, var, str, newindex) \ |
148 | _(TSETB, var, var, lit, newindex) \ | 151 | _(TSETB, var, var, lit, newindex) \ |
149 | _(TSETM, base, ___, num, newindex) \ | 152 | _(TSETM, base, ___, num, newindex) \ |
153 | _(TSETR, var, var, var, newindex) \ | ||
150 | \ | 154 | \ |
151 | /* Calls and vararg handling. T = tail call. */ \ | 155 | /* Calls and vararg handling. T = tail call. */ \ |
152 | _(CALLM, base, lit, lit, call) \ | 156 | _(CALLM, base, lit, lit, call) \ |
diff --git a/src/lj_bcdump.h b/src/lj_bcdump.h index ad564619..f458d41f 100644 --- a/src/lj_bcdump.h +++ b/src/lj_bcdump.h | |||
@@ -36,14 +36,15 @@ | |||
36 | /* If you perform *any* kind of private modifications to the bytecode itself | 36 | /* If you perform *any* kind of private modifications to the bytecode itself |
37 | ** or to the dump format, you *must* set BCDUMP_VERSION to 0x80 or higher. | 37 | ** or to the dump format, you *must* set BCDUMP_VERSION to 0x80 or higher. |
38 | */ | 38 | */ |
39 | #define BCDUMP_VERSION 1 | 39 | #define BCDUMP_VERSION 2 |
40 | 40 | ||
41 | /* Compatibility flags. */ | 41 | /* Compatibility flags. */ |
42 | #define BCDUMP_F_BE 0x01 | 42 | #define BCDUMP_F_BE 0x01 |
43 | #define BCDUMP_F_STRIP 0x02 | 43 | #define BCDUMP_F_STRIP 0x02 |
44 | #define BCDUMP_F_FFI 0x04 | 44 | #define BCDUMP_F_FFI 0x04 |
45 | #define BCDUMP_F_FR2 0x08 | ||
45 | 46 | ||
46 | #define BCDUMP_F_KNOWN (BCDUMP_F_FFI*2-1) | 47 | #define BCDUMP_F_KNOWN (BCDUMP_F_FR2*2-1) |
47 | 48 | ||
48 | /* Type codes for the GC constants of a prototype. Plus length for strings. */ | 49 | /* Type codes for the GC constants of a prototype. Plus length for strings. */ |
49 | enum { | 50 | enum { |
@@ -61,6 +62,7 @@ enum { | |||
61 | 62 | ||
62 | LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, | 63 | LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, |
63 | void *data, int strip); | 64 | void *data, int strip); |
65 | LJ_FUNC GCproto *lj_bcread_proto(LexState *ls); | ||
64 | LJ_FUNC GCproto *lj_bcread(LexState *ls); | 66 | LJ_FUNC GCproto *lj_bcread(LexState *ls); |
65 | 67 | ||
66 | #endif | 68 | #endif |
diff --git a/src/lj_bcread.c b/src/lj_bcread.c index b88794eb..1585272f 100644 --- a/src/lj_bcread.c +++ b/src/lj_bcread.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include "lj_obj.h" | 9 | #include "lj_obj.h" |
10 | #include "lj_gc.h" | 10 | #include "lj_gc.h" |
11 | #include "lj_err.h" | 11 | #include "lj_err.h" |
12 | #include "lj_buf.h" | ||
12 | #include "lj_str.h" | 13 | #include "lj_str.h" |
13 | #include "lj_tab.h" | 14 | #include "lj_tab.h" |
14 | #include "lj_bc.h" | 15 | #include "lj_bc.h" |
@@ -20,6 +21,7 @@ | |||
20 | #include "lj_lex.h" | 21 | #include "lj_lex.h" |
21 | #include "lj_bcdump.h" | 22 | #include "lj_bcdump.h" |
22 | #include "lj_state.h" | 23 | #include "lj_state.h" |
24 | #include "lj_strfmt.h" | ||
23 | 25 | ||
24 | /* Reuse some lexer fields for our own purposes. */ | 26 | /* Reuse some lexer fields for our own purposes. */ |
25 | #define bcread_flags(ls) ls->level | 27 | #define bcread_flags(ls) ls->level |
@@ -38,85 +40,74 @@ static LJ_NOINLINE void bcread_error(LexState *ls, ErrMsg em) | |||
38 | const char *name = ls->chunkarg; | 40 | const char *name = ls->chunkarg; |
39 | if (*name == BCDUMP_HEAD1) name = "(binary)"; | 41 | if (*name == BCDUMP_HEAD1) name = "(binary)"; |
40 | else if (*name == '@' || *name == '=') name++; | 42 | else if (*name == '@' || *name == '=') name++; |
41 | lj_str_pushf(L, "%s: %s", name, err2msg(em)); | 43 | lj_strfmt_pushf(L, "%s: %s", name, err2msg(em)); |
42 | lj_err_throw(L, LUA_ERRSYNTAX); | 44 | lj_err_throw(L, LUA_ERRSYNTAX); |
43 | } | 45 | } |
44 | 46 | ||
45 | /* Resize input buffer. */ | 47 | /* Refill buffer. */ |
46 | static void bcread_resize(LexState *ls, MSize len) | ||
47 | { | ||
48 | if (ls->sb.sz < len) { | ||
49 | MSize sz = ls->sb.sz * 2; | ||
50 | while (len > sz) sz = sz * 2; | ||
51 | lj_str_resizebuf(ls->L, &ls->sb, sz); | ||
52 | /* Caveat: this may change ls->sb.buf which may affect ls->p. */ | ||
53 | } | ||
54 | } | ||
55 | |||
56 | /* Refill buffer if needed. */ | ||
57 | static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need) | 48 | static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need) |
58 | { | 49 | { |
59 | lua_assert(len != 0); | 50 | lua_assert(len != 0); |
60 | if (len > LJ_MAX_MEM || ls->current < 0) | 51 | if (len > LJ_MAX_BUF || ls->c < 0) |
61 | bcread_error(ls, LJ_ERR_BCBAD); | 52 | bcread_error(ls, LJ_ERR_BCBAD); |
62 | do { | 53 | do { |
63 | const char *buf; | 54 | const char *buf; |
64 | size_t size; | 55 | size_t sz; |
65 | if (ls->n) { /* Copy remainder to buffer. */ | 56 | char *p = sbufB(&ls->sb); |
66 | if (ls->sb.n) { /* Move down in buffer. */ | 57 | MSize n = (MSize)(ls->pe - ls->p); |
67 | lua_assert(ls->p + ls->n == ls->sb.buf + ls->sb.n); | 58 | if (n) { /* Copy remainder to buffer. */ |
68 | if (ls->n != ls->sb.n) | 59 | if (sbuflen(&ls->sb)) { /* Move down in buffer. */ |
69 | memmove(ls->sb.buf, ls->p, ls->n); | 60 | lua_assert(ls->pe == sbufP(&ls->sb)); |
61 | if (ls->p != p) memmove(p, ls->p, n); | ||
70 | } else { /* Copy from buffer provided by reader. */ | 62 | } else { /* Copy from buffer provided by reader. */ |
71 | bcread_resize(ls, len); | 63 | p = lj_buf_need(&ls->sb, len); |
72 | memcpy(ls->sb.buf, ls->p, ls->n); | 64 | memcpy(p, ls->p, n); |
73 | } | 65 | } |
74 | ls->p = ls->sb.buf; | 66 | ls->p = p; |
67 | ls->pe = p + n; | ||
75 | } | 68 | } |
76 | ls->sb.n = ls->n; | 69 | setsbufP(&ls->sb, p + n); |
77 | buf = ls->rfunc(ls->L, ls->rdata, &size); /* Get more data from reader. */ | 70 | buf = ls->rfunc(ls->L, ls->rdata, &sz); /* Get more data from reader. */ |
78 | if (buf == NULL || size == 0) { /* EOF? */ | 71 | if (buf == NULL || sz == 0) { /* EOF? */ |
79 | if (need) bcread_error(ls, LJ_ERR_BCBAD); | 72 | if (need) bcread_error(ls, LJ_ERR_BCBAD); |
80 | ls->current = -1; /* Only bad if we get called again. */ | 73 | ls->c = -1; /* Only bad if we get called again. */ |
81 | break; | 74 | break; |
82 | } | 75 | } |
83 | if (size >= LJ_MAX_MEM - ls->sb.n) lj_err_mem(ls->L); | 76 | if (sz >= LJ_MAX_BUF - n) lj_err_mem(ls->L); |
84 | if (ls->sb.n) { /* Append to buffer. */ | 77 | if (n) { /* Append to buffer. */ |
85 | MSize n = ls->sb.n + (MSize)size; | 78 | n += (MSize)sz; |
86 | bcread_resize(ls, n < len ? len : n); | 79 | p = lj_buf_need(&ls->sb, n < len ? len : n); |
87 | memcpy(ls->sb.buf + ls->sb.n, buf, size); | 80 | memcpy(sbufP(&ls->sb), buf, sz); |
88 | ls->n = ls->sb.n = n; | 81 | setsbufP(&ls->sb, p + n); |
89 | ls->p = ls->sb.buf; | 82 | ls->p = p; |
83 | ls->pe = p + n; | ||
90 | } else { /* Return buffer provided by reader. */ | 84 | } else { /* Return buffer provided by reader. */ |
91 | ls->n = (MSize)size; | ||
92 | ls->p = buf; | 85 | ls->p = buf; |
86 | ls->pe = buf + sz; | ||
93 | } | 87 | } |
94 | } while (ls->n < len); | 88 | } while ((MSize)(ls->pe - ls->p) < len); |
95 | } | 89 | } |
96 | 90 | ||
97 | /* Need a certain number of bytes. */ | 91 | /* Need a certain number of bytes. */ |
98 | static LJ_AINLINE void bcread_need(LexState *ls, MSize len) | 92 | static LJ_AINLINE void bcread_need(LexState *ls, MSize len) |
99 | { | 93 | { |
100 | if (LJ_UNLIKELY(ls->n < len)) | 94 | if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len)) |
101 | bcread_fill(ls, len, 1); | 95 | bcread_fill(ls, len, 1); |
102 | } | 96 | } |
103 | 97 | ||
104 | /* Want to read up to a certain number of bytes, but may need less. */ | 98 | /* Want to read up to a certain number of bytes, but may need less. */ |
105 | static LJ_AINLINE void bcread_want(LexState *ls, MSize len) | 99 | static LJ_AINLINE void bcread_want(LexState *ls, MSize len) |
106 | { | 100 | { |
107 | if (LJ_UNLIKELY(ls->n < len)) | 101 | if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len)) |
108 | bcread_fill(ls, len, 0); | 102 | bcread_fill(ls, len, 0); |
109 | } | 103 | } |
110 | 104 | ||
111 | #define bcread_dec(ls) check_exp(ls->n > 0, ls->n--) | ||
112 | #define bcread_consume(ls, len) check_exp(ls->n >= (len), ls->n -= (len)) | ||
113 | |||
114 | /* Return memory block from buffer. */ | 105 | /* Return memory block from buffer. */ |
115 | static uint8_t *bcread_mem(LexState *ls, MSize len) | 106 | static LJ_AINLINE uint8_t *bcread_mem(LexState *ls, MSize len) |
116 | { | 107 | { |
117 | uint8_t *p = (uint8_t *)ls->p; | 108 | uint8_t *p = (uint8_t *)ls->p; |
118 | bcread_consume(ls, len); | 109 | ls->p += len; |
119 | ls->p = (char *)p + len; | 110 | lua_assert(ls->p <= ls->pe); |
120 | return p; | 111 | return p; |
121 | } | 112 | } |
122 | 113 | ||
@@ -129,25 +120,15 @@ static void bcread_block(LexState *ls, void *q, MSize len) | |||
129 | /* Read byte from buffer. */ | 120 | /* Read byte from buffer. */ |
130 | static LJ_AINLINE uint32_t bcread_byte(LexState *ls) | 121 | static LJ_AINLINE uint32_t bcread_byte(LexState *ls) |
131 | { | 122 | { |
132 | bcread_dec(ls); | 123 | lua_assert(ls->p < ls->pe); |
133 | return (uint32_t)(uint8_t)*ls->p++; | 124 | return (uint32_t)(uint8_t)*ls->p++; |
134 | } | 125 | } |
135 | 126 | ||
136 | /* Read ULEB128 value from buffer. */ | 127 | /* Read ULEB128 value from buffer. */ |
137 | static uint32_t bcread_uleb128(LexState *ls) | 128 | static LJ_AINLINE uint32_t bcread_uleb128(LexState *ls) |
138 | { | 129 | { |
139 | const uint8_t *p = (const uint8_t *)ls->p; | 130 | uint32_t v = lj_buf_ruleb128(&ls->p); |
140 | uint32_t v = *p++; | 131 | lua_assert(ls->p <= ls->pe); |
141 | if (LJ_UNLIKELY(v >= 0x80)) { | ||
142 | int sh = 0; | ||
143 | v &= 0x7f; | ||
144 | do { | ||
145 | v |= ((*p & 0x7f) << (sh += 7)); | ||
146 | bcread_dec(ls); | ||
147 | } while (*p++ >= 0x80); | ||
148 | } | ||
149 | bcread_dec(ls); | ||
150 | ls->p = (char *)p; | ||
151 | return v; | 132 | return v; |
152 | } | 133 | } |
153 | 134 | ||
@@ -161,11 +142,10 @@ static uint32_t bcread_uleb128_33(LexState *ls) | |||
161 | v &= 0x3f; | 142 | v &= 0x3f; |
162 | do { | 143 | do { |
163 | v |= ((*p & 0x7f) << (sh += 7)); | 144 | v |= ((*p & 0x7f) << (sh += 7)); |
164 | bcread_dec(ls); | ||
165 | } while (*p++ >= 0x80); | 145 | } while (*p++ >= 0x80); |
166 | } | 146 | } |
167 | bcread_dec(ls); | ||
168 | ls->p = (char *)p; | 147 | ls->p = (char *)p; |
148 | lua_assert(ls->p <= ls->pe); | ||
169 | return v; | 149 | return v; |
170 | } | 150 | } |
171 | 151 | ||
@@ -213,7 +193,7 @@ static void bcread_ktabk(LexState *ls, TValue *o) | |||
213 | o->u32.hi = bcread_uleb128(ls); | 193 | o->u32.hi = bcread_uleb128(ls); |
214 | } else { | 194 | } else { |
215 | lua_assert(tp <= BCDUMP_KTAB_TRUE); | 195 | lua_assert(tp <= BCDUMP_KTAB_TRUE); |
216 | setitype(o, ~tp); | 196 | setpriV(o, ~tp); |
217 | } | 197 | } |
218 | } | 198 | } |
219 | 199 | ||
@@ -327,25 +307,13 @@ static void bcread_uv(LexState *ls, GCproto *pt, MSize sizeuv) | |||
327 | } | 307 | } |
328 | 308 | ||
329 | /* Read a prototype. */ | 309 | /* Read a prototype. */ |
330 | static GCproto *bcread_proto(LexState *ls) | 310 | GCproto *lj_bcread_proto(LexState *ls) |
331 | { | 311 | { |
332 | GCproto *pt; | 312 | GCproto *pt; |
333 | MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept; | 313 | MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept; |
334 | MSize ofsk, ofsuv, ofsdbg; | 314 | MSize ofsk, ofsuv, ofsdbg; |
335 | MSize sizedbg = 0; | 315 | MSize sizedbg = 0; |
336 | BCLine firstline = 0, numline = 0; | 316 | BCLine firstline = 0, numline = 0; |
337 | MSize len, startn; | ||
338 | |||
339 | /* Read length. */ | ||
340 | if (ls->n > 0 && ls->p[0] == 0) { /* Shortcut EOF. */ | ||
341 | ls->n--; ls->p++; | ||
342 | return NULL; | ||
343 | } | ||
344 | bcread_want(ls, 5); | ||
345 | len = bcread_uleb128(ls); | ||
346 | if (!len) return NULL; /* EOF */ | ||
347 | bcread_need(ls, len); | ||
348 | startn = ls->n; | ||
349 | 317 | ||
350 | /* Read prototype header. */ | 318 | /* Read prototype header. */ |
351 | flags = bcread_byte(ls); | 319 | flags = bcread_byte(ls); |
@@ -414,9 +382,6 @@ static GCproto *bcread_proto(LexState *ls) | |||
414 | setmref(pt->uvinfo, NULL); | 382 | setmref(pt->uvinfo, NULL); |
415 | setmref(pt->varinfo, NULL); | 383 | setmref(pt->varinfo, NULL); |
416 | } | 384 | } |
417 | |||
418 | if (len != startn - ls->n) | ||
419 | bcread_error(ls, LJ_ERR_BCBAD); | ||
420 | return pt; | 385 | return pt; |
421 | } | 386 | } |
422 | 387 | ||
@@ -430,6 +395,7 @@ static int bcread_header(LexState *ls) | |||
430 | bcread_byte(ls) != BCDUMP_VERSION) return 0; | 395 | bcread_byte(ls) != BCDUMP_VERSION) return 0; |
431 | bcread_flags(ls) = flags = bcread_uleb128(ls); | 396 | bcread_flags(ls) = flags = bcread_uleb128(ls); |
432 | if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0; | 397 | if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0; |
398 | if ((flags & BCDUMP_F_FR2) != LJ_FR2*BCDUMP_F_FR2) return 0; | ||
433 | if ((flags & BCDUMP_F_FFI)) { | 399 | if ((flags & BCDUMP_F_FFI)) { |
434 | #if LJ_HASFFI | 400 | #if LJ_HASFFI |
435 | lua_State *L = ls->L; | 401 | lua_State *L = ls->L; |
@@ -456,19 +422,33 @@ static int bcread_header(LexState *ls) | |||
456 | GCproto *lj_bcread(LexState *ls) | 422 | GCproto *lj_bcread(LexState *ls) |
457 | { | 423 | { |
458 | lua_State *L = ls->L; | 424 | lua_State *L = ls->L; |
459 | lua_assert(ls->current == BCDUMP_HEAD1); | 425 | lua_assert(ls->c == BCDUMP_HEAD1); |
460 | bcread_savetop(L, ls, L->top); | 426 | bcread_savetop(L, ls, L->top); |
461 | lj_str_resetbuf(&ls->sb); | 427 | lj_buf_reset(&ls->sb); |
462 | /* Check for a valid bytecode dump header. */ | 428 | /* Check for a valid bytecode dump header. */ |
463 | if (!bcread_header(ls)) | 429 | if (!bcread_header(ls)) |
464 | bcread_error(ls, LJ_ERR_BCFMT); | 430 | bcread_error(ls, LJ_ERR_BCFMT); |
465 | for (;;) { /* Process all prototypes in the bytecode dump. */ | 431 | for (;;) { /* Process all prototypes in the bytecode dump. */ |
466 | GCproto *pt = bcread_proto(ls); | 432 | GCproto *pt; |
467 | if (!pt) break; | 433 | MSize len; |
434 | const char *startp; | ||
435 | /* Read length. */ | ||
436 | if (ls->p < ls->pe && ls->p[0] == 0) { /* Shortcut EOF. */ | ||
437 | ls->p++; | ||
438 | break; | ||
439 | } | ||
440 | bcread_want(ls, 5); | ||
441 | len = bcread_uleb128(ls); | ||
442 | if (!len) break; /* EOF */ | ||
443 | bcread_need(ls, len); | ||
444 | startp = ls->p; | ||
445 | pt = lj_bcread_proto(ls); | ||
446 | if (ls->p != startp + len) | ||
447 | bcread_error(ls, LJ_ERR_BCBAD); | ||
468 | setprotoV(L, L->top, pt); | 448 | setprotoV(L, L->top, pt); |
469 | incr_top(L); | 449 | incr_top(L); |
470 | } | 450 | } |
471 | if ((ls->n && !ls->endmark) || L->top-1 != bcread_oldtop(L, ls)) | 451 | if ((ls->pe != ls->p && !ls->endmark) || L->top-1 != bcread_oldtop(L, ls)) |
472 | bcread_error(ls, LJ_ERR_BCBAD); | 452 | bcread_error(ls, LJ_ERR_BCBAD); |
473 | /* Pop off last prototype. */ | 453 | /* Pop off last prototype. */ |
474 | L->top--; | 454 | L->top--; |
diff --git a/src/lj_bcwrite.c b/src/lj_bcwrite.c index f57fcfd6..dd38289e 100644 --- a/src/lj_bcwrite.c +++ b/src/lj_bcwrite.c | |||
@@ -8,7 +8,7 @@ | |||
8 | 8 | ||
9 | #include "lj_obj.h" | 9 | #include "lj_obj.h" |
10 | #include "lj_gc.h" | 10 | #include "lj_gc.h" |
11 | #include "lj_str.h" | 11 | #include "lj_buf.h" |
12 | #include "lj_bc.h" | 12 | #include "lj_bc.h" |
13 | #if LJ_HASFFI | 13 | #if LJ_HASFFI |
14 | #include "lj_ctype.h" | 14 | #include "lj_ctype.h" |
@@ -17,13 +17,13 @@ | |||
17 | #include "lj_dispatch.h" | 17 | #include "lj_dispatch.h" |
18 | #include "lj_jit.h" | 18 | #include "lj_jit.h" |
19 | #endif | 19 | #endif |
20 | #include "lj_strfmt.h" | ||
20 | #include "lj_bcdump.h" | 21 | #include "lj_bcdump.h" |
21 | #include "lj_vm.h" | 22 | #include "lj_vm.h" |
22 | 23 | ||
23 | /* Context for bytecode writer. */ | 24 | /* Context for bytecode writer. */ |
24 | typedef struct BCWriteCtx { | 25 | typedef struct BCWriteCtx { |
25 | SBuf sb; /* Output buffer. */ | 26 | SBuf sb; /* Output buffer. */ |
26 | lua_State *L; /* Lua state. */ | ||
27 | GCproto *pt; /* Root prototype. */ | 27 | GCproto *pt; /* Root prototype. */ |
28 | lua_Writer wfunc; /* Writer callback. */ | 28 | lua_Writer wfunc; /* Writer callback. */ |
29 | void *wdata; /* Writer callback data. */ | 29 | void *wdata; /* Writer callback data. */ |
@@ -31,85 +31,44 @@ typedef struct BCWriteCtx { | |||
31 | int status; /* Status from writer callback. */ | 31 | int status; /* Status from writer callback. */ |
32 | } BCWriteCtx; | 32 | } BCWriteCtx; |
33 | 33 | ||
34 | /* -- Output buffer handling ---------------------------------------------- */ | ||
35 | |||
36 | /* Resize buffer if needed. */ | ||
37 | static LJ_NOINLINE void bcwrite_resize(BCWriteCtx *ctx, MSize len) | ||
38 | { | ||
39 | MSize sz = ctx->sb.sz * 2; | ||
40 | while (ctx->sb.n + len > sz) sz = sz * 2; | ||
41 | lj_str_resizebuf(ctx->L, &ctx->sb, sz); | ||
42 | } | ||
43 | |||
44 | /* Need a certain amount of buffer space. */ | ||
45 | static LJ_AINLINE void bcwrite_need(BCWriteCtx *ctx, MSize len) | ||
46 | { | ||
47 | if (LJ_UNLIKELY(ctx->sb.n + len > ctx->sb.sz)) | ||
48 | bcwrite_resize(ctx, len); | ||
49 | } | ||
50 | |||
51 | /* Add memory block to buffer. */ | ||
52 | static void bcwrite_block(BCWriteCtx *ctx, const void *p, MSize len) | ||
53 | { | ||
54 | uint8_t *q = (uint8_t *)(ctx->sb.buf + ctx->sb.n); | ||
55 | MSize i; | ||
56 | ctx->sb.n += len; | ||
57 | for (i = 0; i < len; i++) q[i] = ((uint8_t *)p)[i]; | ||
58 | } | ||
59 | |||
60 | /* Add byte to buffer. */ | ||
61 | static LJ_AINLINE void bcwrite_byte(BCWriteCtx *ctx, uint8_t b) | ||
62 | { | ||
63 | ctx->sb.buf[ctx->sb.n++] = b; | ||
64 | } | ||
65 | |||
66 | /* Add ULEB128 value to buffer. */ | ||
67 | static void bcwrite_uleb128(BCWriteCtx *ctx, uint32_t v) | ||
68 | { | ||
69 | MSize n = ctx->sb.n; | ||
70 | uint8_t *p = (uint8_t *)ctx->sb.buf; | ||
71 | for (; v >= 0x80; v >>= 7) | ||
72 | p[n++] = (uint8_t)((v & 0x7f) | 0x80); | ||
73 | p[n++] = (uint8_t)v; | ||
74 | ctx->sb.n = n; | ||
75 | } | ||
76 | |||
77 | /* -- Bytecode writer ----------------------------------------------------- */ | 34 | /* -- Bytecode writer ----------------------------------------------------- */ |
78 | 35 | ||
79 | /* Write a single constant key/value of a template table. */ | 36 | /* Write a single constant key/value of a template table. */ |
80 | static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow) | 37 | static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow) |
81 | { | 38 | { |
82 | bcwrite_need(ctx, 1+10); | 39 | char *p = lj_buf_more(&ctx->sb, 1+10); |
83 | if (tvisstr(o)) { | 40 | if (tvisstr(o)) { |
84 | const GCstr *str = strV(o); | 41 | const GCstr *str = strV(o); |
85 | MSize len = str->len; | 42 | MSize len = str->len; |
86 | bcwrite_need(ctx, 5+len); | 43 | p = lj_buf_more(&ctx->sb, 5+len); |
87 | bcwrite_uleb128(ctx, BCDUMP_KTAB_STR+len); | 44 | p = lj_strfmt_wuleb128(p, BCDUMP_KTAB_STR+len); |
88 | bcwrite_block(ctx, strdata(str), len); | 45 | p = lj_buf_wmem(p, strdata(str), len); |
89 | } else if (tvisint(o)) { | 46 | } else if (tvisint(o)) { |
90 | bcwrite_byte(ctx, BCDUMP_KTAB_INT); | 47 | *p++ = BCDUMP_KTAB_INT; |
91 | bcwrite_uleb128(ctx, intV(o)); | 48 | p = lj_strfmt_wuleb128(p, intV(o)); |
92 | } else if (tvisnum(o)) { | 49 | } else if (tvisnum(o)) { |
93 | if (!LJ_DUALNUM && narrow) { /* Narrow number constants to integers. */ | 50 | if (!LJ_DUALNUM && narrow) { /* Narrow number constants to integers. */ |
94 | lua_Number num = numV(o); | 51 | lua_Number num = numV(o); |
95 | int32_t k = lj_num2int(num); | 52 | int32_t k = lj_num2int(num); |
96 | if (num == (lua_Number)k) { /* -0 is never a constant. */ | 53 | if (num == (lua_Number)k) { /* -0 is never a constant. */ |
97 | bcwrite_byte(ctx, BCDUMP_KTAB_INT); | 54 | *p++ = BCDUMP_KTAB_INT; |
98 | bcwrite_uleb128(ctx, k); | 55 | p = lj_strfmt_wuleb128(p, k); |
56 | setsbufP(&ctx->sb, p); | ||
99 | return; | 57 | return; |
100 | } | 58 | } |
101 | } | 59 | } |
102 | bcwrite_byte(ctx, BCDUMP_KTAB_NUM); | 60 | *p++ = BCDUMP_KTAB_NUM; |
103 | bcwrite_uleb128(ctx, o->u32.lo); | 61 | p = lj_strfmt_wuleb128(p, o->u32.lo); |
104 | bcwrite_uleb128(ctx, o->u32.hi); | 62 | p = lj_strfmt_wuleb128(p, o->u32.hi); |
105 | } else { | 63 | } else { |
106 | lua_assert(tvispri(o)); | 64 | lua_assert(tvispri(o)); |
107 | bcwrite_byte(ctx, BCDUMP_KTAB_NIL+~itype(o)); | 65 | *p++ = BCDUMP_KTAB_NIL+~itype(o); |
108 | } | 66 | } |
67 | setsbufP(&ctx->sb, p); | ||
109 | } | 68 | } |
110 | 69 | ||
111 | /* Write a template table. */ | 70 | /* Write a template table. */ |
112 | static void bcwrite_ktab(BCWriteCtx *ctx, const GCtab *t) | 71 | static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t) |
113 | { | 72 | { |
114 | MSize narray = 0, nhash = 0; | 73 | MSize narray = 0, nhash = 0; |
115 | if (t->asize > 0) { /* Determine max. length of array part. */ | 74 | if (t->asize > 0) { /* Determine max. length of array part. */ |
@@ -127,8 +86,9 @@ static void bcwrite_ktab(BCWriteCtx *ctx, const GCtab *t) | |||
127 | nhash += !tvisnil(&node[i].val); | 86 | nhash += !tvisnil(&node[i].val); |
128 | } | 87 | } |
129 | /* Write number of array slots and hash slots. */ | 88 | /* Write number of array slots and hash slots. */ |
130 | bcwrite_uleb128(ctx, narray); | 89 | p = lj_strfmt_wuleb128(p, narray); |
131 | bcwrite_uleb128(ctx, nhash); | 90 | p = lj_strfmt_wuleb128(p, nhash); |
91 | setsbufP(&ctx->sb, p); | ||
132 | if (narray) { /* Write array entries (may contain nil). */ | 92 | if (narray) { /* Write array entries (may contain nil). */ |
133 | MSize i; | 93 | MSize i; |
134 | TValue *o = tvref(t->array); | 94 | TValue *o = tvref(t->array); |
@@ -155,6 +115,7 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt) | |||
155 | for (i = 0; i < sizekgc; i++, kr++) { | 115 | for (i = 0; i < sizekgc; i++, kr++) { |
156 | GCobj *o = gcref(*kr); | 116 | GCobj *o = gcref(*kr); |
157 | MSize tp, need = 1; | 117 | MSize tp, need = 1; |
118 | char *p; | ||
158 | /* Determine constant type and needed size. */ | 119 | /* Determine constant type and needed size. */ |
159 | if (o->gch.gct == ~LJ_TSTR) { | 120 | if (o->gch.gct == ~LJ_TSTR) { |
160 | tp = BCDUMP_KGC_STR + gco2str(o)->len; | 121 | tp = BCDUMP_KGC_STR + gco2str(o)->len; |
@@ -181,24 +142,26 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt) | |||
181 | need = 1+2*5; | 142 | need = 1+2*5; |
182 | } | 143 | } |
183 | /* Write constant type. */ | 144 | /* Write constant type. */ |
184 | bcwrite_need(ctx, need); | 145 | p = lj_buf_more(&ctx->sb, need); |
185 | bcwrite_uleb128(ctx, tp); | 146 | p = lj_strfmt_wuleb128(p, tp); |
186 | /* Write constant data (if any). */ | 147 | /* Write constant data (if any). */ |
187 | if (tp >= BCDUMP_KGC_STR) { | 148 | if (tp >= BCDUMP_KGC_STR) { |
188 | bcwrite_block(ctx, strdata(gco2str(o)), gco2str(o)->len); | 149 | p = lj_buf_wmem(p, strdata(gco2str(o)), gco2str(o)->len); |
189 | } else if (tp == BCDUMP_KGC_TAB) { | 150 | } else if (tp == BCDUMP_KGC_TAB) { |
190 | bcwrite_ktab(ctx, gco2tab(o)); | 151 | bcwrite_ktab(ctx, p, gco2tab(o)); |
152 | continue; | ||
191 | #if LJ_HASFFI | 153 | #if LJ_HASFFI |
192 | } else if (tp != BCDUMP_KGC_CHILD) { | 154 | } else if (tp != BCDUMP_KGC_CHILD) { |
193 | cTValue *p = (TValue *)cdataptr(gco2cd(o)); | 155 | cTValue *q = (TValue *)cdataptr(gco2cd(o)); |
194 | bcwrite_uleb128(ctx, p[0].u32.lo); | 156 | p = lj_strfmt_wuleb128(p, q[0].u32.lo); |
195 | bcwrite_uleb128(ctx, p[0].u32.hi); | 157 | p = lj_strfmt_wuleb128(p, q[0].u32.hi); |
196 | if (tp == BCDUMP_KGC_COMPLEX) { | 158 | if (tp == BCDUMP_KGC_COMPLEX) { |
197 | bcwrite_uleb128(ctx, p[1].u32.lo); | 159 | p = lj_strfmt_wuleb128(p, q[1].u32.lo); |
198 | bcwrite_uleb128(ctx, p[1].u32.hi); | 160 | p = lj_strfmt_wuleb128(p, q[1].u32.hi); |
199 | } | 161 | } |
200 | #endif | 162 | #endif |
201 | } | 163 | } |
164 | setsbufP(&ctx->sb, p); | ||
202 | } | 165 | } |
203 | } | 166 | } |
204 | 167 | ||
@@ -207,7 +170,7 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt) | |||
207 | { | 170 | { |
208 | MSize i, sizekn = pt->sizekn; | 171 | MSize i, sizekn = pt->sizekn; |
209 | cTValue *o = mref(pt->k, TValue); | 172 | cTValue *o = mref(pt->k, TValue); |
210 | bcwrite_need(ctx, 10*sizekn); | 173 | char *p = lj_buf_more(&ctx->sb, 10*sizekn); |
211 | for (i = 0; i < sizekn; i++, o++) { | 174 | for (i = 0; i < sizekn; i++, o++) { |
212 | int32_t k; | 175 | int32_t k; |
213 | if (tvisint(o)) { | 176 | if (tvisint(o)) { |
@@ -220,55 +183,55 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt) | |||
220 | k = lj_num2int(num); | 183 | k = lj_num2int(num); |
221 | if (num == (lua_Number)k) { /* -0 is never a constant. */ | 184 | if (num == (lua_Number)k) { /* -0 is never a constant. */ |
222 | save_int: | 185 | save_int: |
223 | bcwrite_uleb128(ctx, 2*(uint32_t)k | ((uint32_t)k & 0x80000000u)); | 186 | p = lj_strfmt_wuleb128(p, 2*(uint32_t)k | ((uint32_t)k&0x80000000u)); |
224 | if (k < 0) { | 187 | if (k < 0) |
225 | char *p = &ctx->sb.buf[ctx->sb.n-1]; | 188 | p[-1] = (p[-1] & 7) | ((k>>27) & 0x18); |
226 | *p = (*p & 7) | ((k>>27) & 0x18); | ||
227 | } | ||
228 | continue; | 189 | continue; |
229 | } | 190 | } |
230 | } | 191 | } |
231 | bcwrite_uleb128(ctx, 1+(2*o->u32.lo | (o->u32.lo & 0x80000000u))); | 192 | p = lj_strfmt_wuleb128(p, 1+(2*o->u32.lo | (o->u32.lo & 0x80000000u))); |
232 | if (o->u32.lo >= 0x80000000u) { | 193 | if (o->u32.lo >= 0x80000000u) |
233 | char *p = &ctx->sb.buf[ctx->sb.n-1]; | 194 | p[-1] = (p[-1] & 7) | ((o->u32.lo>>27) & 0x18); |
234 | *p = (*p & 7) | ((o->u32.lo>>27) & 0x18); | 195 | p = lj_strfmt_wuleb128(p, o->u32.hi); |
235 | } | ||
236 | bcwrite_uleb128(ctx, o->u32.hi); | ||
237 | } | 196 | } |
238 | } | 197 | } |
198 | setsbufP(&ctx->sb, p); | ||
239 | } | 199 | } |
240 | 200 | ||
241 | /* Write bytecode instructions. */ | 201 | /* Write bytecode instructions. */ |
242 | static void bcwrite_bytecode(BCWriteCtx *ctx, GCproto *pt) | 202 | static char *bcwrite_bytecode(BCWriteCtx *ctx, char *p, GCproto *pt) |
243 | { | 203 | { |
244 | MSize nbc = pt->sizebc-1; /* Omit the [JI]FUNC* header. */ | 204 | MSize nbc = pt->sizebc-1; /* Omit the [JI]FUNC* header. */ |
245 | #if LJ_HASJIT | 205 | #if LJ_HASJIT |
246 | uint8_t *p = (uint8_t *)&ctx->sb.buf[ctx->sb.n]; | 206 | uint8_t *q = (uint8_t *)p; |
247 | #endif | 207 | #endif |
248 | bcwrite_block(ctx, proto_bc(pt)+1, nbc*(MSize)sizeof(BCIns)); | 208 | p = lj_buf_wmem(p, proto_bc(pt)+1, nbc*(MSize)sizeof(BCIns)); |
209 | UNUSED(ctx); | ||
249 | #if LJ_HASJIT | 210 | #if LJ_HASJIT |
250 | /* Unpatch modified bytecode containing ILOOP/JLOOP etc. */ | 211 | /* Unpatch modified bytecode containing ILOOP/JLOOP etc. */ |
251 | if ((pt->flags & PROTO_ILOOP) || pt->trace) { | 212 | if ((pt->flags & PROTO_ILOOP) || pt->trace) { |
252 | jit_State *J = L2J(ctx->L); | 213 | jit_State *J = L2J(sbufL(&ctx->sb)); |
253 | MSize i; | 214 | MSize i; |
254 | for (i = 0; i < nbc; i++, p += sizeof(BCIns)) { | 215 | for (i = 0; i < nbc; i++, q += sizeof(BCIns)) { |
255 | BCOp op = (BCOp)p[LJ_ENDIAN_SELECT(0, 3)]; | 216 | BCOp op = (BCOp)q[LJ_ENDIAN_SELECT(0, 3)]; |
256 | if (op == BC_IFORL || op == BC_IITERL || op == BC_ILOOP || | 217 | if (op == BC_IFORL || op == BC_IITERL || op == BC_ILOOP || |
257 | op == BC_JFORI) { | 218 | op == BC_JFORI) { |
258 | p[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_IFORL+BC_FORL); | 219 | q[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_IFORL+BC_FORL); |
259 | } else if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) { | 220 | } else if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) { |
260 | BCReg rd = p[LJ_ENDIAN_SELECT(2, 1)] + (p[LJ_ENDIAN_SELECT(3, 0)] << 8); | 221 | BCReg rd = q[LJ_ENDIAN_SELECT(2, 1)] + (q[LJ_ENDIAN_SELECT(3, 0)] << 8); |
261 | memcpy(p, &traceref(J, rd)->startins, 4); | 222 | memcpy(q, &traceref(J, rd)->startins, 4); |
262 | } | 223 | } |
263 | } | 224 | } |
264 | } | 225 | } |
265 | #endif | 226 | #endif |
227 | return p; | ||
266 | } | 228 | } |
267 | 229 | ||
268 | /* Write prototype. */ | 230 | /* Write prototype. */ |
269 | static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt) | 231 | static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt) |
270 | { | 232 | { |
271 | MSize sizedbg = 0; | 233 | MSize sizedbg = 0; |
234 | char *p; | ||
272 | 235 | ||
273 | /* Recursively write children of prototype. */ | 236 | /* Recursively write children of prototype. */ |
274 | if ((pt->flags & PROTO_CHILD)) { | 237 | if ((pt->flags & PROTO_CHILD)) { |
@@ -282,31 +245,32 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt) | |||
282 | } | 245 | } |
283 | 246 | ||
284 | /* Start writing the prototype info to a buffer. */ | 247 | /* Start writing the prototype info to a buffer. */ |
285 | lj_str_resetbuf(&ctx->sb); | 248 | p = lj_buf_need(&ctx->sb, |
286 | ctx->sb.n = 5; /* Leave room for final size. */ | 249 | 5+4+6*5+(pt->sizebc-1)*(MSize)sizeof(BCIns)+pt->sizeuv*2); |
287 | bcwrite_need(ctx, 4+6*5+(pt->sizebc-1)*(MSize)sizeof(BCIns)+pt->sizeuv*2); | 250 | p += 5; /* Leave room for final size. */ |
288 | 251 | ||
289 | /* Write prototype header. */ | 252 | /* Write prototype header. */ |
290 | bcwrite_byte(ctx, (pt->flags & (PROTO_CHILD|PROTO_VARARG|PROTO_FFI))); | 253 | *p++ = (pt->flags & (PROTO_CHILD|PROTO_VARARG|PROTO_FFI)); |
291 | bcwrite_byte(ctx, pt->numparams); | 254 | *p++ = pt->numparams; |
292 | bcwrite_byte(ctx, pt->framesize); | 255 | *p++ = pt->framesize; |
293 | bcwrite_byte(ctx, pt->sizeuv); | 256 | *p++ = pt->sizeuv; |
294 | bcwrite_uleb128(ctx, pt->sizekgc); | 257 | p = lj_strfmt_wuleb128(p, pt->sizekgc); |
295 | bcwrite_uleb128(ctx, pt->sizekn); | 258 | p = lj_strfmt_wuleb128(p, pt->sizekn); |
296 | bcwrite_uleb128(ctx, pt->sizebc-1); | 259 | p = lj_strfmt_wuleb128(p, pt->sizebc-1); |
297 | if (!ctx->strip) { | 260 | if (!ctx->strip) { |
298 | if (proto_lineinfo(pt)) | 261 | if (proto_lineinfo(pt)) |
299 | sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt); | 262 | sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt); |
300 | bcwrite_uleb128(ctx, sizedbg); | 263 | p = lj_strfmt_wuleb128(p, sizedbg); |
301 | if (sizedbg) { | 264 | if (sizedbg) { |
302 | bcwrite_uleb128(ctx, pt->firstline); | 265 | p = lj_strfmt_wuleb128(p, pt->firstline); |
303 | bcwrite_uleb128(ctx, pt->numline); | 266 | p = lj_strfmt_wuleb128(p, pt->numline); |
304 | } | 267 | } |
305 | } | 268 | } |
306 | 269 | ||
307 | /* Write bytecode instructions and upvalue refs. */ | 270 | /* Write bytecode instructions and upvalue refs. */ |
308 | bcwrite_bytecode(ctx, pt); | 271 | p = bcwrite_bytecode(ctx, p, pt); |
309 | bcwrite_block(ctx, proto_uv(pt), pt->sizeuv*2); | 272 | p = lj_buf_wmem(p, proto_uv(pt), pt->sizeuv*2); |
273 | setsbufP(&ctx->sb, p); | ||
310 | 274 | ||
311 | /* Write constants. */ | 275 | /* Write constants. */ |
312 | bcwrite_kgc(ctx, pt); | 276 | bcwrite_kgc(ctx, pt); |
@@ -314,18 +278,19 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt) | |||
314 | 278 | ||
315 | /* Write debug info, if not stripped. */ | 279 | /* Write debug info, if not stripped. */ |
316 | if (sizedbg) { | 280 | if (sizedbg) { |
317 | bcwrite_need(ctx, sizedbg); | 281 | p = lj_buf_more(&ctx->sb, sizedbg); |
318 | bcwrite_block(ctx, proto_lineinfo(pt), sizedbg); | 282 | p = lj_buf_wmem(p, proto_lineinfo(pt), sizedbg); |
283 | setsbufP(&ctx->sb, p); | ||
319 | } | 284 | } |
320 | 285 | ||
321 | /* Pass buffer to writer function. */ | 286 | /* Pass buffer to writer function. */ |
322 | if (ctx->status == 0) { | 287 | if (ctx->status == 0) { |
323 | MSize n = ctx->sb.n - 5; | 288 | MSize n = sbuflen(&ctx->sb) - 5; |
324 | MSize nn = (lj_fls(n)+8)*9 >> 6; | 289 | MSize nn = (lj_fls(n)+8)*9 >> 6; |
325 | ctx->sb.n = 5 - nn; | 290 | char *q = sbufB(&ctx->sb) + (5 - nn); |
326 | bcwrite_uleb128(ctx, n); /* Fill in final size. */ | 291 | p = lj_strfmt_wuleb128(q, n); /* Fill in final size. */ |
327 | lua_assert(ctx->sb.n == 5); | 292 | lua_assert(p == sbufB(&ctx->sb) + 5); |
328 | ctx->status = ctx->wfunc(ctx->L, ctx->sb.buf+5-nn, nn+n, ctx->wdata); | 293 | ctx->status = ctx->wfunc(sbufL(&ctx->sb), q, nn+n, ctx->wdata); |
329 | } | 294 | } |
330 | } | 295 | } |
331 | 296 | ||
@@ -335,20 +300,21 @@ static void bcwrite_header(BCWriteCtx *ctx) | |||
335 | GCstr *chunkname = proto_chunkname(ctx->pt); | 300 | GCstr *chunkname = proto_chunkname(ctx->pt); |
336 | const char *name = strdata(chunkname); | 301 | const char *name = strdata(chunkname); |
337 | MSize len = chunkname->len; | 302 | MSize len = chunkname->len; |
338 | lj_str_resetbuf(&ctx->sb); | 303 | char *p = lj_buf_need(&ctx->sb, 5+5+len); |
339 | bcwrite_need(ctx, 5+5+len); | 304 | *p++ = BCDUMP_HEAD1; |
340 | bcwrite_byte(ctx, BCDUMP_HEAD1); | 305 | *p++ = BCDUMP_HEAD2; |
341 | bcwrite_byte(ctx, BCDUMP_HEAD2); | 306 | *p++ = BCDUMP_HEAD3; |
342 | bcwrite_byte(ctx, BCDUMP_HEAD3); | 307 | *p++ = BCDUMP_VERSION; |
343 | bcwrite_byte(ctx, BCDUMP_VERSION); | 308 | *p++ = (ctx->strip ? BCDUMP_F_STRIP : 0) + |
344 | bcwrite_byte(ctx, (ctx->strip ? BCDUMP_F_STRIP : 0) + | 309 | LJ_BE*BCDUMP_F_BE + |
345 | (LJ_BE ? BCDUMP_F_BE : 0) + | 310 | ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0) + |
346 | ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0)); | 311 | LJ_FR2*BCDUMP_F_FR2; |
347 | if (!ctx->strip) { | 312 | if (!ctx->strip) { |
348 | bcwrite_uleb128(ctx, len); | 313 | p = lj_strfmt_wuleb128(p, len); |
349 | bcwrite_block(ctx, name, len); | 314 | p = lj_buf_wmem(p, name, len); |
350 | } | 315 | } |
351 | ctx->status = ctx->wfunc(ctx->L, ctx->sb.buf, ctx->sb.n, ctx->wdata); | 316 | ctx->status = ctx->wfunc(sbufL(&ctx->sb), sbufB(&ctx->sb), |
317 | (MSize)(p - sbufB(&ctx->sb)), ctx->wdata); | ||
352 | } | 318 | } |
353 | 319 | ||
354 | /* Write footer of bytecode dump. */ | 320 | /* Write footer of bytecode dump. */ |
@@ -356,7 +322,7 @@ static void bcwrite_footer(BCWriteCtx *ctx) | |||
356 | { | 322 | { |
357 | if (ctx->status == 0) { | 323 | if (ctx->status == 0) { |
358 | uint8_t zero = 0; | 324 | uint8_t zero = 0; |
359 | ctx->status = ctx->wfunc(ctx->L, &zero, 1, ctx->wdata); | 325 | ctx->status = ctx->wfunc(sbufL(&ctx->sb), &zero, 1, ctx->wdata); |
360 | } | 326 | } |
361 | } | 327 | } |
362 | 328 | ||
@@ -364,8 +330,8 @@ static void bcwrite_footer(BCWriteCtx *ctx) | |||
364 | static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud) | 330 | static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud) |
365 | { | 331 | { |
366 | BCWriteCtx *ctx = (BCWriteCtx *)ud; | 332 | BCWriteCtx *ctx = (BCWriteCtx *)ud; |
367 | UNUSED(dummy); | 333 | UNUSED(L); UNUSED(dummy); |
368 | lj_str_resizebuf(L, &ctx->sb, 1024); /* Avoids resize for most prototypes. */ | 334 | lj_buf_need(&ctx->sb, 1024); /* Avoids resize for most prototypes. */ |
369 | bcwrite_header(ctx); | 335 | bcwrite_header(ctx); |
370 | bcwrite_proto(ctx, ctx->pt); | 336 | bcwrite_proto(ctx, ctx->pt); |
371 | bcwrite_footer(ctx); | 337 | bcwrite_footer(ctx); |
@@ -378,16 +344,15 @@ int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data, | |||
378 | { | 344 | { |
379 | BCWriteCtx ctx; | 345 | BCWriteCtx ctx; |
380 | int status; | 346 | int status; |
381 | ctx.L = L; | ||
382 | ctx.pt = pt; | 347 | ctx.pt = pt; |
383 | ctx.wfunc = writer; | 348 | ctx.wfunc = writer; |
384 | ctx.wdata = data; | 349 | ctx.wdata = data; |
385 | ctx.strip = strip; | 350 | ctx.strip = strip; |
386 | ctx.status = 0; | 351 | ctx.status = 0; |
387 | lj_str_initbuf(&ctx.sb); | 352 | lj_buf_init(L, &ctx.sb); |
388 | status = lj_vm_cpcall(L, NULL, &ctx, cpwriter); | 353 | status = lj_vm_cpcall(L, NULL, &ctx, cpwriter); |
389 | if (status == 0) status = ctx.status; | 354 | if (status == 0) status = ctx.status; |
390 | lj_str_freebuf(G(ctx.L), &ctx.sb); | 355 | lj_buf_free(G(sbufL(&ctx.sb)), &ctx.sb); |
391 | return status; | 356 | return status; |
392 | } | 357 | } |
393 | 358 | ||
diff --git a/src/lj_buf.c b/src/lj_buf.c new file mode 100644 index 00000000..c8778016 --- /dev/null +++ b/src/lj_buf.c | |||
@@ -0,0 +1,232 @@ | |||
1 | /* | ||
2 | ** Buffer handling. | ||
3 | ** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #define lj_buf_c | ||
7 | #define LUA_CORE | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | #include "lj_gc.h" | ||
11 | #include "lj_err.h" | ||
12 | #include "lj_buf.h" | ||
13 | #include "lj_str.h" | ||
14 | #include "lj_tab.h" | ||
15 | #include "lj_strfmt.h" | ||
16 | |||
17 | /* -- Buffer management --------------------------------------------------- */ | ||
18 | |||
19 | static void buf_grow(SBuf *sb, MSize sz) | ||
20 | { | ||
21 | MSize osz = sbufsz(sb), len = sbuflen(sb), nsz = osz; | ||
22 | char *b; | ||
23 | if (nsz < LJ_MIN_SBUF) nsz = LJ_MIN_SBUF; | ||
24 | while (nsz < sz) nsz += nsz; | ||
25 | b = (char *)lj_mem_realloc(sbufL(sb), sbufB(sb), osz, nsz); | ||
26 | setmref(sb->b, b); | ||
27 | setmref(sb->p, b + len); | ||
28 | setmref(sb->e, b + nsz); | ||
29 | } | ||
30 | |||
31 | LJ_NOINLINE char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz) | ||
32 | { | ||
33 | lua_assert(sz > sbufsz(sb)); | ||
34 | if (LJ_UNLIKELY(sz > LJ_MAX_BUF)) | ||
35 | lj_err_mem(sbufL(sb)); | ||
36 | buf_grow(sb, sz); | ||
37 | return sbufB(sb); | ||
38 | } | ||
39 | |||
40 | LJ_NOINLINE char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz) | ||
41 | { | ||
42 | MSize len = sbuflen(sb); | ||
43 | lua_assert(sz > sbufleft(sb)); | ||
44 | if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF)) | ||
45 | lj_err_mem(sbufL(sb)); | ||
46 | buf_grow(sb, len + sz); | ||
47 | return sbufP(sb); | ||
48 | } | ||
49 | |||
50 | void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb) | ||
51 | { | ||
52 | char *b = sbufB(sb); | ||
53 | MSize osz = (MSize)(sbufE(sb) - b); | ||
54 | if (osz > 2*LJ_MIN_SBUF) { | ||
55 | MSize n = (MSize)(sbufP(sb) - b); | ||
56 | b = lj_mem_realloc(L, b, osz, (osz >> 1)); | ||
57 | setmref(sb->b, b); | ||
58 | setmref(sb->p, b + n); | ||
59 | setmref(sb->e, b + (osz >> 1)); | ||
60 | } | ||
61 | } | ||
62 | |||
63 | char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz) | ||
64 | { | ||
65 | SBuf *sb = &G(L)->tmpbuf; | ||
66 | setsbufL(sb, L); | ||
67 | return lj_buf_need(sb, sz); | ||
68 | } | ||
69 | |||
70 | /* -- Low-level buffer put operations ------------------------------------- */ | ||
71 | |||
72 | SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len) | ||
73 | { | ||
74 | char *p = lj_buf_more(sb, len); | ||
75 | p = lj_buf_wmem(p, q, len); | ||
76 | setsbufP(sb, p); | ||
77 | return sb; | ||
78 | } | ||
79 | |||
80 | SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c) | ||
81 | { | ||
82 | char *p = lj_buf_more(sb, 1); | ||
83 | *p++ = (char)c; | ||
84 | setsbufP(sb, p); | ||
85 | return sb; | ||
86 | } | ||
87 | |||
88 | SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s) | ||
89 | { | ||
90 | MSize len = s->len; | ||
91 | char *p = lj_buf_more(sb, len); | ||
92 | p = lj_buf_wmem(p, strdata(s), len); | ||
93 | setsbufP(sb, p); | ||
94 | return sb; | ||
95 | } | ||
96 | |||
97 | /* -- High-level buffer put operations ------------------------------------ */ | ||
98 | |||
99 | SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s) | ||
100 | { | ||
101 | MSize len = s->len; | ||
102 | char *p = lj_buf_more(sb, len), *e = p+len; | ||
103 | const char *q = strdata(s)+len-1; | ||
104 | while (p < e) | ||
105 | *p++ = *q--; | ||
106 | setsbufP(sb, p); | ||
107 | return sb; | ||
108 | } | ||
109 | |||
110 | SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s) | ||
111 | { | ||
112 | MSize len = s->len; | ||
113 | char *p = lj_buf_more(sb, len), *e = p+len; | ||
114 | const char *q = strdata(s); | ||
115 | for (; p < e; p++, q++) { | ||
116 | uint32_t c = *(unsigned char *)q; | ||
117 | #if LJ_TARGET_PPC | ||
118 | *p = c + ((c >= 'A' && c <= 'Z') << 5); | ||
119 | #else | ||
120 | if (c >= 'A' && c <= 'Z') c += 0x20; | ||
121 | *p = c; | ||
122 | #endif | ||
123 | } | ||
124 | setsbufP(sb, p); | ||
125 | return sb; | ||
126 | } | ||
127 | |||
128 | SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s) | ||
129 | { | ||
130 | MSize len = s->len; | ||
131 | char *p = lj_buf_more(sb, len), *e = p+len; | ||
132 | const char *q = strdata(s); | ||
133 | for (; p < e; p++, q++) { | ||
134 | uint32_t c = *(unsigned char *)q; | ||
135 | #if LJ_TARGET_PPC | ||
136 | *p = c - ((c >= 'a' && c <= 'z') << 5); | ||
137 | #else | ||
138 | if (c >= 'a' && c <= 'z') c -= 0x20; | ||
139 | *p = c; | ||
140 | #endif | ||
141 | } | ||
142 | setsbufP(sb, p); | ||
143 | return sb; | ||
144 | } | ||
145 | |||
146 | SBuf *lj_buf_putstr_rep(SBuf *sb, GCstr *s, int32_t rep) | ||
147 | { | ||
148 | MSize len = s->len; | ||
149 | if (rep > 0 && len) { | ||
150 | uint64_t tlen = (uint64_t)rep * len; | ||
151 | char *p; | ||
152 | if (LJ_UNLIKELY(tlen > LJ_MAX_STR)) | ||
153 | lj_err_mem(sbufL(sb)); | ||
154 | p = lj_buf_more(sb, (MSize)tlen); | ||
155 | if (len == 1) { /* Optimize a common case. */ | ||
156 | uint32_t c = strdata(s)[0]; | ||
157 | do { *p++ = c; } while (--rep > 0); | ||
158 | } else { | ||
159 | const char *e = strdata(s) + len; | ||
160 | do { | ||
161 | const char *q = strdata(s); | ||
162 | do { *p++ = *q++; } while (q < e); | ||
163 | } while (--rep > 0); | ||
164 | } | ||
165 | setsbufP(sb, p); | ||
166 | } | ||
167 | return sb; | ||
168 | } | ||
169 | |||
170 | SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep, int32_t i, int32_t e) | ||
171 | { | ||
172 | MSize seplen = sep ? sep->len : 0; | ||
173 | if (i <= e) { | ||
174 | for (;;) { | ||
175 | cTValue *o = lj_tab_getint(t, i); | ||
176 | char *p; | ||
177 | if (!o) { | ||
178 | badtype: /* Error: bad element type. */ | ||
179 | setsbufP(sb, (void *)(intptr_t)i); /* Store failing index. */ | ||
180 | return NULL; | ||
181 | } else if (tvisstr(o)) { | ||
182 | MSize len = strV(o)->len; | ||
183 | p = lj_buf_wmem(lj_buf_more(sb, len + seplen), strVdata(o), len); | ||
184 | } else if (tvisint(o)) { | ||
185 | p = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT+seplen), intV(o)); | ||
186 | } else if (tvisnum(o)) { | ||
187 | p = lj_buf_more(lj_strfmt_putfnum(sb, STRFMT_G14, numV(o)), seplen); | ||
188 | } else { | ||
189 | goto badtype; | ||
190 | } | ||
191 | if (i++ == e) { | ||
192 | setsbufP(sb, p); | ||
193 | break; | ||
194 | } | ||
195 | if (seplen) p = lj_buf_wmem(p, strdata(sep), seplen); | ||
196 | setsbufP(sb, p); | ||
197 | } | ||
198 | } | ||
199 | return sb; | ||
200 | } | ||
201 | |||
202 | /* -- Miscellaneous buffer operations ------------------------------------- */ | ||
203 | |||
204 | GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb) | ||
205 | { | ||
206 | return lj_str_new(sbufL(sb), sbufB(sb), sbuflen(sb)); | ||
207 | } | ||
208 | |||
209 | /* Concatenate two strings. */ | ||
210 | GCstr *lj_buf_cat2str(lua_State *L, GCstr *s1, GCstr *s2) | ||
211 | { | ||
212 | MSize len1 = s1->len, len2 = s2->len; | ||
213 | char *buf = lj_buf_tmp(L, len1 + len2); | ||
214 | memcpy(buf, strdata(s1), len1); | ||
215 | memcpy(buf+len1, strdata(s2), len2); | ||
216 | return lj_str_new(L, buf, len1 + len2); | ||
217 | } | ||
218 | |||
219 | /* Read ULEB128 from buffer. */ | ||
220 | uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp) | ||
221 | { | ||
222 | const uint8_t *p = (const uint8_t *)*pp; | ||
223 | uint32_t v = *p++; | ||
224 | if (LJ_UNLIKELY(v >= 0x80)) { | ||
225 | int sh = 0; | ||
226 | v &= 0x7f; | ||
227 | do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80); | ||
228 | } | ||
229 | *pp = (const char *)p; | ||
230 | return v; | ||
231 | } | ||
232 | |||
diff --git a/src/lj_buf.h b/src/lj_buf.h new file mode 100644 index 00000000..dab13bd2 --- /dev/null +++ b/src/lj_buf.h | |||
@@ -0,0 +1,103 @@ | |||
1 | /* | ||
2 | ** Buffer handling. | ||
3 | ** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_BUF_H | ||
7 | #define _LJ_BUF_H | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | #include "lj_gc.h" | ||
11 | #include "lj_str.h" | ||
12 | |||
13 | /* Resizable string buffers. Struct definition in lj_obj.h. */ | ||
14 | #define sbufB(sb) (mref((sb)->b, char)) | ||
15 | #define sbufP(sb) (mref((sb)->p, char)) | ||
16 | #define sbufE(sb) (mref((sb)->e, char)) | ||
17 | #define sbufL(sb) (mref((sb)->L, lua_State)) | ||
18 | #define sbufsz(sb) ((MSize)(sbufE((sb)) - sbufB((sb)))) | ||
19 | #define sbuflen(sb) ((MSize)(sbufP((sb)) - sbufB((sb)))) | ||
20 | #define sbufleft(sb) ((MSize)(sbufE((sb)) - sbufP((sb)))) | ||
21 | #define setsbufP(sb, q) (setmref((sb)->p, (q))) | ||
22 | #define setsbufL(sb, l) (setmref((sb)->L, (l))) | ||
23 | |||
24 | /* Buffer management */ | ||
25 | LJ_FUNC char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz); | ||
26 | LJ_FUNC char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz); | ||
27 | LJ_FUNC void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb); | ||
28 | LJ_FUNC char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz); | ||
29 | |||
30 | static LJ_AINLINE void lj_buf_init(lua_State *L, SBuf *sb) | ||
31 | { | ||
32 | setsbufL(sb, L); | ||
33 | setmref(sb->p, NULL); setmref(sb->e, NULL); setmref(sb->b, NULL); | ||
34 | } | ||
35 | |||
36 | static LJ_AINLINE void lj_buf_reset(SBuf *sb) | ||
37 | { | ||
38 | setmrefr(sb->p, sb->b); | ||
39 | } | ||
40 | |||
41 | static LJ_AINLINE SBuf *lj_buf_tmp_(lua_State *L) | ||
42 | { | ||
43 | SBuf *sb = &G(L)->tmpbuf; | ||
44 | setsbufL(sb, L); | ||
45 | lj_buf_reset(sb); | ||
46 | return sb; | ||
47 | } | ||
48 | |||
49 | static LJ_AINLINE void lj_buf_free(global_State *g, SBuf *sb) | ||
50 | { | ||
51 | lj_mem_free(g, sbufB(sb), sbufsz(sb)); | ||
52 | } | ||
53 | |||
54 | static LJ_AINLINE char *lj_buf_need(SBuf *sb, MSize sz) | ||
55 | { | ||
56 | if (LJ_UNLIKELY(sz > sbufsz(sb))) | ||
57 | return lj_buf_need2(sb, sz); | ||
58 | return sbufB(sb); | ||
59 | } | ||
60 | |||
61 | static LJ_AINLINE char *lj_buf_more(SBuf *sb, MSize sz) | ||
62 | { | ||
63 | if (LJ_UNLIKELY(sz > sbufleft(sb))) | ||
64 | return lj_buf_more2(sb, sz); | ||
65 | return sbufP(sb); | ||
66 | } | ||
67 | |||
68 | /* Low-level buffer put operations */ | ||
69 | LJ_FUNC SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len); | ||
70 | LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c); | ||
71 | LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s); | ||
72 | |||
73 | static LJ_AINLINE char *lj_buf_wmem(char *p, const void *q, MSize len) | ||
74 | { | ||
75 | return (char *)memcpy(p, q, len) + len; | ||
76 | } | ||
77 | |||
78 | static LJ_AINLINE void lj_buf_putb(SBuf *sb, int c) | ||
79 | { | ||
80 | char *p = lj_buf_more(sb, 1); | ||
81 | *p++ = (char)c; | ||
82 | setsbufP(sb, p); | ||
83 | } | ||
84 | |||
85 | /* High-level buffer put operations */ | ||
86 | LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s); | ||
87 | LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s); | ||
88 | LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s); | ||
89 | LJ_FUNC SBuf *lj_buf_putstr_rep(SBuf *sb, GCstr *s, int32_t rep); | ||
90 | LJ_FUNC SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep, | ||
91 | int32_t i, int32_t e); | ||
92 | |||
93 | /* Miscellaneous buffer operations */ | ||
94 | LJ_FUNCA GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb); | ||
95 | LJ_FUNC GCstr *lj_buf_cat2str(lua_State *L, GCstr *s1, GCstr *s2); | ||
96 | LJ_FUNC uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp); | ||
97 | |||
98 | static LJ_AINLINE GCstr *lj_buf_str(lua_State *L, SBuf *sb) | ||
99 | { | ||
100 | return lj_str_new(L, sbufB(sb), sbuflen(sb)); | ||
101 | } | ||
102 | |||
103 | #endif | ||
diff --git a/src/lj_carith.c b/src/lj_carith.c index b33b1f36..cb7e8db6 100644 --- a/src/lj_carith.c +++ b/src/lj_carith.c | |||
@@ -11,10 +11,12 @@ | |||
11 | #include "lj_err.h" | 11 | #include "lj_err.h" |
12 | #include "lj_tab.h" | 12 | #include "lj_tab.h" |
13 | #include "lj_meta.h" | 13 | #include "lj_meta.h" |
14 | #include "lj_ir.h" | ||
14 | #include "lj_ctype.h" | 15 | #include "lj_ctype.h" |
15 | #include "lj_cconv.h" | 16 | #include "lj_cconv.h" |
16 | #include "lj_cdata.h" | 17 | #include "lj_cdata.h" |
17 | #include "lj_carith.h" | 18 | #include "lj_carith.h" |
19 | #include "lj_strscan.h" | ||
18 | 20 | ||
19 | /* -- C data arithmetic --------------------------------------------------- */ | 21 | /* -- C data arithmetic --------------------------------------------------- */ |
20 | 22 | ||
@@ -281,6 +283,79 @@ int lj_carith_len(lua_State *L) | |||
281 | return lj_carith_meta(L, cts, &ca, MM_len); | 283 | return lj_carith_meta(L, cts, &ca, MM_len); |
282 | } | 284 | } |
283 | 285 | ||
286 | /* -- 64 bit bit operations helpers --------------------------------------- */ | ||
287 | |||
288 | #if LJ_64 | ||
289 | #define B64DEF(name) \ | ||
290 | static LJ_AINLINE uint64_t lj_carith_##name(uint64_t x, int32_t sh) | ||
291 | #else | ||
292 | /* Not inlined on 32 bit archs, since some of these are quite lengthy. */ | ||
293 | #define B64DEF(name) \ | ||
294 | uint64_t LJ_NOINLINE lj_carith_##name(uint64_t x, int32_t sh) | ||
295 | #endif | ||
296 | |||
297 | B64DEF(shl64) { return x << (sh&63); } | ||
298 | B64DEF(shr64) { return x >> (sh&63); } | ||
299 | B64DEF(sar64) { return (uint64_t)((int64_t)x >> (sh&63)); } | ||
300 | B64DEF(rol64) { return lj_rol(x, (sh&63)); } | ||
301 | B64DEF(ror64) { return lj_ror(x, (sh&63)); } | ||
302 | |||
303 | #undef B64DEF | ||
304 | |||
305 | uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op) | ||
306 | { | ||
307 | switch (op) { | ||
308 | case IR_BSHL-IR_BSHL: x = lj_carith_shl64(x, sh); break; | ||
309 | case IR_BSHR-IR_BSHL: x = lj_carith_shr64(x, sh); break; | ||
310 | case IR_BSAR-IR_BSHL: x = lj_carith_sar64(x, sh); break; | ||
311 | case IR_BROL-IR_BSHL: x = lj_carith_rol64(x, sh); break; | ||
312 | case IR_BROR-IR_BSHL: x = lj_carith_ror64(x, sh); break; | ||
313 | default: lua_assert(0); break; | ||
314 | } | ||
315 | return x; | ||
316 | } | ||
317 | |||
318 | /* Equivalent to lj_lib_checkbit(), but handles cdata. */ | ||
319 | uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id) | ||
320 | { | ||
321 | TValue *o = L->base + narg-1; | ||
322 | if (o >= L->top) { | ||
323 | err: | ||
324 | lj_err_argt(L, narg, LUA_TNUMBER); | ||
325 | } else if (LJ_LIKELY(tvisnumber(o))) { | ||
326 | /* Handled below. */ | ||
327 | } else if (tviscdata(o)) { | ||
328 | CTState *cts = ctype_cts(L); | ||
329 | uint8_t *sp = (uint8_t *)cdataptr(cdataV(o)); | ||
330 | CTypeID sid = cdataV(o)->ctypeid; | ||
331 | CType *s = ctype_get(cts, sid); | ||
332 | uint64_t x; | ||
333 | if (ctype_isref(s->info)) { | ||
334 | sp = *(void **)sp; | ||
335 | sid = ctype_cid(s->info); | ||
336 | } | ||
337 | s = ctype_raw(cts, sid); | ||
338 | if (ctype_isenum(s->info)) s = ctype_child(cts, s); | ||
339 | if ((s->info & (CTMASK_NUM|CTF_BOOL|CTF_FP|CTF_UNSIGNED)) == | ||
340 | CTINFO(CT_NUM, CTF_UNSIGNED) && s->size == 8) | ||
341 | *id = CTID_UINT64; /* Use uint64_t, since it has the highest rank. */ | ||
342 | else if (!*id) | ||
343 | *id = CTID_INT64; /* Use int64_t, unless already set. */ | ||
344 | lj_cconv_ct_ct(cts, ctype_get(cts, *id), s, | ||
345 | (uint8_t *)&x, sp, CCF_ARG(narg)); | ||
346 | return x; | ||
347 | } else if (!(tvisstr(o) && lj_strscan_number(strV(o), o))) { | ||
348 | goto err; | ||
349 | } | ||
350 | if (LJ_LIKELY(tvisint(o))) { | ||
351 | return (uint32_t)intV(o); | ||
352 | } else { | ||
353 | int32_t i = lj_num2bit(numV(o)); | ||
354 | if (LJ_DUALNUM) setintV(o, i); | ||
355 | return (uint32_t)i; | ||
356 | } | ||
357 | } | ||
358 | |||
284 | /* -- 64 bit integer arithmetic helpers ----------------------------------- */ | 359 | /* -- 64 bit integer arithmetic helpers ----------------------------------- */ |
285 | 360 | ||
286 | #if LJ_32 && LJ_HASJIT | 361 | #if LJ_32 && LJ_HASJIT |
diff --git a/src/lj_carith.h b/src/lj_carith.h index bac3e1a4..af6225ae 100644 --- a/src/lj_carith.h +++ b/src/lj_carith.h | |||
@@ -13,6 +13,16 @@ | |||
13 | LJ_FUNC int lj_carith_op(lua_State *L, MMS mm); | 13 | LJ_FUNC int lj_carith_op(lua_State *L, MMS mm); |
14 | LJ_FUNC int lj_carith_len(lua_State *L); | 14 | LJ_FUNC int lj_carith_len(lua_State *L); |
15 | 15 | ||
16 | #if LJ_32 | ||
17 | LJ_FUNC uint64_t lj_carith_shl64(uint64_t x, int32_t sh); | ||
18 | LJ_FUNC uint64_t lj_carith_shr64(uint64_t x, int32_t sh); | ||
19 | LJ_FUNC uint64_t lj_carith_sar64(uint64_t x, int32_t sh); | ||
20 | LJ_FUNC uint64_t lj_carith_rol64(uint64_t x, int32_t sh); | ||
21 | LJ_FUNC uint64_t lj_carith_ror64(uint64_t x, int32_t sh); | ||
22 | #endif | ||
23 | LJ_FUNC uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op); | ||
24 | LJ_FUNC uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id); | ||
25 | |||
16 | #if LJ_32 && LJ_HASJIT | 26 | #if LJ_32 && LJ_HASJIT |
17 | LJ_FUNC int64_t lj_carith_mul64(int64_t x, int64_t k); | 27 | LJ_FUNC int64_t lj_carith_mul64(int64_t x, int64_t k); |
18 | #endif | 28 | #endif |
diff --git a/src/lj_ccall.c b/src/lj_ccall.c index fe1e0a3a..a9b81aa5 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c | |||
@@ -9,7 +9,6 @@ | |||
9 | 9 | ||
10 | #include "lj_gc.h" | 10 | #include "lj_gc.h" |
11 | #include "lj_err.h" | 11 | #include "lj_err.h" |
12 | #include "lj_str.h" | ||
13 | #include "lj_tab.h" | 12 | #include "lj_tab.h" |
14 | #include "lj_ctype.h" | 13 | #include "lj_ctype.h" |
15 | #include "lj_cconv.h" | 14 | #include "lj_cconv.h" |
@@ -291,56 +290,84 @@ | |||
291 | #define CCALL_HANDLE_RET \ | 290 | #define CCALL_HANDLE_RET \ |
292 | if ((ct->info & CTF_VARARG)) sp = (uint8_t *)&cc->gpr[0]; | 291 | if ((ct->info & CTF_VARARG)) sp = (uint8_t *)&cc->gpr[0]; |
293 | 292 | ||
294 | #elif LJ_TARGET_PPC | 293 | #elif LJ_TARGET_ARM64 |
295 | /* -- PPC calling conventions --------------------------------------------- */ | 294 | /* -- ARM64 calling conventions ------------------------------------------- */ |
296 | 295 | ||
297 | #define CCALL_HANDLE_STRUCTRET \ | 296 | #define CCALL_HANDLE_STRUCTRET \ |
298 | cc->retref = 1; /* Return all structs by reference. */ \ | 297 | cc->retref = !ccall_classify_struct(cts, ctr); \ |
299 | cc->gpr[ngpr++] = (GPRArg)dp; | 298 | if (cc->retref) cc->retp = dp; |
299 | |||
300 | #define CCALL_HANDLE_STRUCTRET2 \ | ||
301 | unsigned int cl = ccall_classify_struct(cts, ctr); \ | ||
302 | if ((cl & 4)) { /* Combine float HFA from separate registers. */ \ | ||
303 | CTSize i = (cl >> 8) - 1; \ | ||
304 | do { ((uint32_t *)dp)[i] = cc->fpr[i].lo; } while (i--); \ | ||
305 | } else { \ | ||
306 | if (cl > 1) sp = (uint8_t *)&cc->fpr[0]; \ | ||
307 | memcpy(dp, sp, ctr->size); \ | ||
308 | } | ||
300 | 309 | ||
301 | #define CCALL_HANDLE_COMPLEXRET \ | 310 | #define CCALL_HANDLE_COMPLEXRET \ |
302 | /* Complex values are returned in 2 or 4 GPRs. */ \ | 311 | /* Complex values are returned in one or two FPRs. */ \ |
303 | cc->retref = 0; | 312 | cc->retref = 0; |
304 | 313 | ||
305 | #define CCALL_HANDLE_COMPLEXRET2 \ | 314 | #define CCALL_HANDLE_COMPLEXRET2 \ |
306 | memcpy(dp, sp, ctr->size); /* Copy complex from GPRs. */ | 315 | if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ |
316 | ((float *)dp)[0] = cc->fpr[0].f; \ | ||
317 | ((float *)dp)[1] = cc->fpr[1].f; \ | ||
318 | } else { /* Copy complex double from FPRs. */ \ | ||
319 | ((double *)dp)[0] = cc->fpr[0].d; \ | ||
320 | ((double *)dp)[1] = cc->fpr[1].d; \ | ||
321 | } | ||
307 | 322 | ||
308 | #define CCALL_HANDLE_STRUCTARG \ | 323 | #define CCALL_HANDLE_STRUCTARG \ |
309 | rp = cdataptr(lj_cdata_new(cts, did, sz)); \ | 324 | unsigned int cl = ccall_classify_struct(cts, d); \ |
310 | sz = CTSIZE_PTR; /* Pass all structs by reference. */ | 325 | if (cl == 0) { /* Pass struct by reference. */ \ |
326 | rp = cdataptr(lj_cdata_new(cts, did, sz)); \ | ||
327 | sz = CTSIZE_PTR; \ | ||
328 | } else if (cl > 1) { /* Pass struct in FPRs or on stack. */ \ | ||
329 | isfp = (cl & 4) ? 2 : 1; \ | ||
330 | } /* else: Pass struct in GPRs or on stack. */ | ||
311 | 331 | ||
312 | #define CCALL_HANDLE_COMPLEXARG \ | 332 | #define CCALL_HANDLE_COMPLEXARG \ |
313 | /* Pass complex by value in 2 or 4 GPRs. */ | 333 | /* Pass complex by value in separate (!) FPRs or on stack. */ \ |
334 | isfp = sz == 2*sizeof(float) ? 2 : 1; | ||
314 | 335 | ||
315 | #define CCALL_HANDLE_REGARG \ | 336 | #define CCALL_HANDLE_REGARG \ |
316 | if (isfp) { /* Try to pass argument in FPRs. */ \ | 337 | if (LJ_TARGET_IOS && isva) { \ |
317 | if (nfpr + 1 <= CCALL_NARG_FPR) { \ | 338 | /* IOS: All variadic arguments are on the stack. */ \ |
339 | } else if (isfp) { /* Try to pass argument in FPRs. */ \ | ||
340 | int n2 = ctype_isvector(d->info) ? 1 : n*isfp; \ | ||
341 | if (nfpr + n2 <= CCALL_NARG_FPR) { \ | ||
318 | dp = &cc->fpr[nfpr]; \ | 342 | dp = &cc->fpr[nfpr]; \ |
319 | nfpr += 1; \ | 343 | nfpr += n2; \ |
320 | d = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \ | ||
321 | goto done; \ | 344 | goto done; \ |
345 | } else { \ | ||
346 | nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \ | ||
347 | if (LJ_TARGET_IOS && d->size < 8) goto err_nyi; \ | ||
322 | } \ | 348 | } \ |
323 | } else { /* Try to pass argument in GPRs. */ \ | 349 | } else { /* Try to pass argument in GPRs. */ \ |
324 | if (n > 1) { \ | 350 | if (!LJ_TARGET_IOS && (d->info & CTF_ALIGN) > CTALIGN_PTR) \ |
325 | lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \ | 351 | ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ |
326 | if (ctype_isinteger(d->info)) \ | ||
327 | ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ | ||
328 | else if (ngpr + n > maxgpr) \ | ||
329 | ngpr = maxgpr; /* Prevent reordering. */ \ | ||
330 | } \ | ||
331 | if (ngpr + n <= maxgpr) { \ | 352 | if (ngpr + n <= maxgpr) { \ |
332 | dp = &cc->gpr[ngpr]; \ | 353 | dp = &cc->gpr[ngpr]; \ |
333 | ngpr += n; \ | 354 | ngpr += n; \ |
334 | goto done; \ | 355 | goto done; \ |
356 | } else { \ | ||
357 | ngpr = maxgpr; /* Prevent reordering. */ \ | ||
358 | if (LJ_TARGET_IOS && d->size < 8) goto err_nyi; \ | ||
335 | } \ | 359 | } \ |
336 | } | 360 | } |
337 | 361 | ||
362 | #if LJ_BE | ||
338 | #define CCALL_HANDLE_RET \ | 363 | #define CCALL_HANDLE_RET \ |
339 | if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ | 364 | if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ |
340 | ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ | 365 | sp = (uint8_t *)&cc->fpr[0].f; |
366 | #endif | ||
341 | 367 | ||
342 | #elif LJ_TARGET_PPCSPE | 368 | |
343 | /* -- PPC/SPE calling conventions ----------------------------------------- */ | 369 | #elif LJ_TARGET_PPC |
370 | /* -- PPC calling conventions --------------------------------------------- */ | ||
344 | 371 | ||
345 | #define CCALL_HANDLE_STRUCTRET \ | 372 | #define CCALL_HANDLE_STRUCTRET \ |
346 | cc->retref = 1; /* Return all structs by reference. */ \ | 373 | cc->retref = 1; /* Return all structs by reference. */ \ |
@@ -360,12 +387,12 @@ | |||
360 | #define CCALL_HANDLE_COMPLEXARG \ | 387 | #define CCALL_HANDLE_COMPLEXARG \ |
361 | /* Pass complex by value in 2 or 4 GPRs. */ | 388 | /* Pass complex by value in 2 or 4 GPRs. */ |
362 | 389 | ||
363 | /* PPC/SPE has a softfp ABI. */ | 390 | #define CCALL_HANDLE_GPR \ |
364 | #define CCALL_HANDLE_REGARG \ | 391 | /* Try to pass argument in GPRs. */ \ |
365 | if (n > 1) { /* Doesn't fit in a single GPR? */ \ | 392 | if (n > 1) { \ |
366 | lua_assert(n == 2 || n == 4); /* int64_t, double or complex (float). */ \ | 393 | lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \ |
367 | if (n == 2) \ | 394 | if (ctype_isinteger(d->info) || ctype_isfp(d->info)) \ |
368 | ngpr = (ngpr + 1u) & ~1u; /* Only align 64 bit value to regpair. */ \ | 395 | ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ |
369 | else if (ngpr + n > maxgpr) \ | 396 | else if (ngpr + n > maxgpr) \ |
370 | ngpr = maxgpr; /* Prevent reordering. */ \ | 397 | ngpr = maxgpr; /* Prevent reordering. */ \ |
371 | } \ | 398 | } \ |
@@ -373,10 +400,32 @@ | |||
373 | dp = &cc->gpr[ngpr]; \ | 400 | dp = &cc->gpr[ngpr]; \ |
374 | ngpr += n; \ | 401 | ngpr += n; \ |
375 | goto done; \ | 402 | goto done; \ |
403 | } \ | ||
404 | |||
405 | #if LJ_ABI_SOFTFP | ||
406 | #define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR | ||
407 | #else | ||
408 | #define CCALL_HANDLE_REGARG \ | ||
409 | if (isfp) { /* Try to pass argument in FPRs. */ \ | ||
410 | if (nfpr + 1 <= CCALL_NARG_FPR) { \ | ||
411 | dp = &cc->fpr[nfpr]; \ | ||
412 | nfpr += 1; \ | ||
413 | d = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \ | ||
414 | goto done; \ | ||
415 | } \ | ||
416 | } else { \ | ||
417 | CCALL_HANDLE_GPR \ | ||
376 | } | 418 | } |
419 | #endif | ||
377 | 420 | ||
378 | #elif LJ_TARGET_MIPS | 421 | #if !LJ_ABI_SOFTFP |
379 | /* -- MIPS calling conventions -------------------------------------------- */ | 422 | #define CCALL_HANDLE_RET \ |
423 | if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ | ||
424 | ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ | ||
425 | #endif | ||
426 | |||
427 | #elif LJ_TARGET_MIPS32 | ||
428 | /* -- MIPS o32 calling conventions ---------------------------------------- */ | ||
380 | 429 | ||
381 | #define CCALL_HANDLE_STRUCTRET \ | 430 | #define CCALL_HANDLE_STRUCTRET \ |
382 | cc->retref = 1; /* Return all structs by reference. */ \ | 431 | cc->retref = 1; /* Return all structs by reference. */ \ |
@@ -386,6 +435,18 @@ | |||
386 | /* Complex values are returned in 1 or 2 FPRs. */ \ | 435 | /* Complex values are returned in 1 or 2 FPRs. */ \ |
387 | cc->retref = 0; | 436 | cc->retref = 0; |
388 | 437 | ||
438 | #if LJ_ABI_SOFTFP | ||
439 | #define CCALL_HANDLE_COMPLEXRET2 \ | ||
440 | if (ctr->size == 2*sizeof(float)) { /* Copy complex float from GPRs. */ \ | ||
441 | ((intptr_t *)dp)[0] = cc->gpr[0]; \ | ||
442 | ((intptr_t *)dp)[1] = cc->gpr[1]; \ | ||
443 | } else { /* Copy complex double from GPRs. */ \ | ||
444 | ((intptr_t *)dp)[0] = cc->gpr[0]; \ | ||
445 | ((intptr_t *)dp)[1] = cc->gpr[1]; \ | ||
446 | ((intptr_t *)dp)[2] = cc->gpr[2]; \ | ||
447 | ((intptr_t *)dp)[3] = cc->gpr[3]; \ | ||
448 | } | ||
449 | #else | ||
389 | #define CCALL_HANDLE_COMPLEXRET2 \ | 450 | #define CCALL_HANDLE_COMPLEXRET2 \ |
390 | if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ | 451 | if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ |
391 | ((float *)dp)[0] = cc->fpr[0].f; \ | 452 | ((float *)dp)[0] = cc->fpr[0].f; \ |
@@ -394,6 +455,7 @@ | |||
394 | ((double *)dp)[0] = cc->fpr[0].d; \ | 455 | ((double *)dp)[0] = cc->fpr[0].d; \ |
395 | ((double *)dp)[1] = cc->fpr[1].d; \ | 456 | ((double *)dp)[1] = cc->fpr[1].d; \ |
396 | } | 457 | } |
458 | #endif | ||
397 | 459 | ||
398 | #define CCALL_HANDLE_STRUCTARG \ | 460 | #define CCALL_HANDLE_STRUCTARG \ |
399 | /* Pass all structs by value in registers and/or on the stack. */ | 461 | /* Pass all structs by value in registers and/or on the stack. */ |
@@ -401,6 +463,22 @@ | |||
401 | #define CCALL_HANDLE_COMPLEXARG \ | 463 | #define CCALL_HANDLE_COMPLEXARG \ |
402 | /* Pass complex by value in 2 or 4 GPRs. */ | 464 | /* Pass complex by value in 2 or 4 GPRs. */ |
403 | 465 | ||
466 | #define CCALL_HANDLE_GPR \ | ||
467 | if ((d->info & CTF_ALIGN) > CTALIGN_PTR) \ | ||
468 | ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ | ||
469 | if (ngpr < maxgpr) { \ | ||
470 | dp = &cc->gpr[ngpr]; \ | ||
471 | if (ngpr + n > maxgpr) { \ | ||
472 | nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ | ||
473 | if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ | ||
474 | ngpr = maxgpr; \ | ||
475 | } else { \ | ||
476 | ngpr += n; \ | ||
477 | } \ | ||
478 | goto done; \ | ||
479 | } | ||
480 | |||
481 | #if !LJ_ABI_SOFTFP /* MIPS32 hard-float */ | ||
404 | #define CCALL_HANDLE_REGARG \ | 482 | #define CCALL_HANDLE_REGARG \ |
405 | if (isfp && nfpr < CCALL_NARG_FPR && !(ct->info & CTF_VARARG)) { \ | 483 | if (isfp && nfpr < CCALL_NARG_FPR && !(ct->info & CTF_VARARG)) { \ |
406 | /* Try to pass argument in FPRs. */ \ | 484 | /* Try to pass argument in FPRs. */ \ |
@@ -409,25 +487,91 @@ | |||
409 | goto done; \ | 487 | goto done; \ |
410 | } else { /* Try to pass argument in GPRs. */ \ | 488 | } else { /* Try to pass argument in GPRs. */ \ |
411 | nfpr = CCALL_NARG_FPR; \ | 489 | nfpr = CCALL_NARG_FPR; \ |
412 | if ((d->info & CTF_ALIGN) > CTALIGN_PTR) \ | 490 | CCALL_HANDLE_GPR \ |
413 | ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ | 491 | } |
414 | if (ngpr < maxgpr) { \ | 492 | #else /* MIPS32 soft-float */ |
415 | dp = &cc->gpr[ngpr]; \ | 493 | #define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR |
416 | if (ngpr + n > maxgpr) { \ | 494 | #endif |
417 | nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ | 495 | |
418 | if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ | 496 | #if !LJ_ABI_SOFTFP |
419 | ngpr = maxgpr; \ | 497 | /* On MIPS64 soft-float, position of float return values is endian-dependant. */ |
420 | } else { \ | 498 | #define CCALL_HANDLE_RET \ |
421 | ngpr += n; \ | 499 | if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ |
422 | } \ | 500 | sp = (uint8_t *)&cc->fpr[0].f; |
423 | goto done; \ | 501 | #endif |
424 | } \ | 502 | |
503 | #elif LJ_TARGET_MIPS64 | ||
504 | /* -- MIPS n64 calling conventions ---------------------------------------- */ | ||
505 | |||
506 | #define CCALL_HANDLE_STRUCTRET \ | ||
507 | cc->retref = !(sz <= 16); \ | ||
508 | if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp; | ||
509 | |||
510 | #define CCALL_HANDLE_STRUCTRET2 \ | ||
511 | ccall_copy_struct(cc, ctr, dp, sp, ccall_classify_struct(cts, ctr, ct)); | ||
512 | |||
513 | #define CCALL_HANDLE_COMPLEXRET \ | ||
514 | /* Complex values are returned in 1 or 2 FPRs. */ \ | ||
515 | cc->retref = 0; | ||
516 | |||
517 | #if LJ_ABI_SOFTFP /* MIPS64 soft-float */ | ||
518 | |||
519 | #define CCALL_HANDLE_COMPLEXRET2 \ | ||
520 | if (ctr->size == 2*sizeof(float)) { /* Copy complex float from GPRs. */ \ | ||
521 | ((intptr_t *)dp)[0] = cc->gpr[0]; \ | ||
522 | } else { /* Copy complex double from GPRs. */ \ | ||
523 | ((intptr_t *)dp)[0] = cc->gpr[0]; \ | ||
524 | ((intptr_t *)dp)[1] = cc->gpr[1]; \ | ||
525 | } | ||
526 | |||
527 | #define CCALL_HANDLE_COMPLEXARG \ | ||
528 | /* Pass complex by value in 2 or 4 GPRs. */ | ||
529 | |||
530 | /* Position of soft-float 'float' return value depends on endianess. */ | ||
531 | #define CCALL_HANDLE_RET \ | ||
532 | if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ | ||
533 | sp = (uint8_t *)cc->gpr + LJ_ENDIAN_SELECT(0, 4); | ||
534 | |||
535 | #else /* MIPS64 hard-float */ | ||
536 | |||
537 | #define CCALL_HANDLE_COMPLEXRET2 \ | ||
538 | if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ | ||
539 | ((float *)dp)[0] = cc->fpr[0].f; \ | ||
540 | ((float *)dp)[1] = cc->fpr[1].f; \ | ||
541 | } else { /* Copy complex double from FPRs. */ \ | ||
542 | ((double *)dp)[0] = cc->fpr[0].d; \ | ||
543 | ((double *)dp)[1] = cc->fpr[1].d; \ | ||
544 | } | ||
545 | |||
546 | #define CCALL_HANDLE_COMPLEXARG \ | ||
547 | if (sz == 2*sizeof(float)) { \ | ||
548 | isfp = 2; \ | ||
549 | if (ngpr < maxgpr) \ | ||
550 | sz *= 2; \ | ||
425 | } | 551 | } |
426 | 552 | ||
427 | #define CCALL_HANDLE_RET \ | 553 | #define CCALL_HANDLE_RET \ |
428 | if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ | 554 | if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ |
429 | sp = (uint8_t *)&cc->fpr[0].f; | 555 | sp = (uint8_t *)&cc->fpr[0].f; |
430 | 556 | ||
557 | #endif | ||
558 | |||
559 | #define CCALL_HANDLE_STRUCTARG \ | ||
560 | /* Pass all structs by value in registers and/or on the stack. */ | ||
561 | |||
562 | #define CCALL_HANDLE_REGARG \ | ||
563 | if (ngpr < maxgpr) { \ | ||
564 | dp = &cc->gpr[ngpr]; \ | ||
565 | if (ngpr + n > maxgpr) { \ | ||
566 | nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ | ||
567 | if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ | ||
568 | ngpr = maxgpr; \ | ||
569 | } else { \ | ||
570 | ngpr += n; \ | ||
571 | } \ | ||
572 | goto done; \ | ||
573 | } | ||
574 | |||
431 | #else | 575 | #else |
432 | #error "Missing calling convention definitions for this architecture" | 576 | #error "Missing calling convention definitions for this architecture" |
433 | #endif | 577 | #endif |
@@ -621,6 +765,125 @@ noth: /* Not a homogeneous float/double aggregate. */ | |||
621 | 765 | ||
622 | #endif | 766 | #endif |
623 | 767 | ||
768 | /* -- ARM64 ABI struct classification ------------------------------------- */ | ||
769 | |||
770 | #if LJ_TARGET_ARM64 | ||
771 | |||
772 | /* Classify a struct based on its fields. */ | ||
773 | static unsigned int ccall_classify_struct(CTState *cts, CType *ct) | ||
774 | { | ||
775 | CTSize sz = ct->size; | ||
776 | unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION); | ||
777 | while (ct->sib) { | ||
778 | CType *sct; | ||
779 | ct = ctype_get(cts, ct->sib); | ||
780 | if (ctype_isfield(ct->info)) { | ||
781 | sct = ctype_rawchild(cts, ct); | ||
782 | if (ctype_isfp(sct->info)) { | ||
783 | r |= sct->size; | ||
784 | if (!isu) n++; else if (n == 0) n = 1; | ||
785 | } else if (ctype_iscomplex(sct->info)) { | ||
786 | r |= (sct->size >> 1); | ||
787 | if (!isu) n += 2; else if (n < 2) n = 2; | ||
788 | } else if (ctype_isstruct(sct->info)) { | ||
789 | goto substruct; | ||
790 | } else { | ||
791 | goto noth; | ||
792 | } | ||
793 | } else if (ctype_isbitfield(ct->info)) { | ||
794 | goto noth; | ||
795 | } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) { | ||
796 | sct = ctype_rawchild(cts, ct); | ||
797 | substruct: | ||
798 | if (sct->size > 0) { | ||
799 | unsigned int s = ccall_classify_struct(cts, sct); | ||
800 | if (s <= 1) goto noth; | ||
801 | r |= (s & 255); | ||
802 | if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8); | ||
803 | } | ||
804 | } | ||
805 | } | ||
806 | if ((r == 4 || r == 8) && n <= 4) | ||
807 | return r + (n << 8); | ||
808 | noth: /* Not a homogeneous float/double aggregate. */ | ||
809 | return (sz <= 16); /* Return structs of size <= 16 in GPRs. */ | ||
810 | } | ||
811 | |||
812 | #endif | ||
813 | |||
814 | /* -- MIPS64 ABI struct classification ---------------------------- */ | ||
815 | |||
816 | #if LJ_TARGET_MIPS64 | ||
817 | |||
818 | #define FTYPE_FLOAT 1 | ||
819 | #define FTYPE_DOUBLE 2 | ||
820 | |||
821 | /* Classify FP fields (max. 2) and their types. */ | ||
822 | static unsigned int ccall_classify_struct(CTState *cts, CType *ct, CType *ctf) | ||
823 | { | ||
824 | int n = 0, ft = 0; | ||
825 | if ((ctf->info & CTF_VARARG) || (ct->info & CTF_UNION)) | ||
826 | goto noth; | ||
827 | while (ct->sib) { | ||
828 | CType *sct; | ||
829 | ct = ctype_get(cts, ct->sib); | ||
830 | if (n == 2) { | ||
831 | goto noth; | ||
832 | } else if (ctype_isfield(ct->info)) { | ||
833 | sct = ctype_rawchild(cts, ct); | ||
834 | if (ctype_isfp(sct->info)) { | ||
835 | ft |= (sct->size == 4 ? FTYPE_FLOAT : FTYPE_DOUBLE) << 2*n; | ||
836 | n++; | ||
837 | } else { | ||
838 | goto noth; | ||
839 | } | ||
840 | } else if (ctype_isbitfield(ct->info) || | ||
841 | ctype_isxattrib(ct->info, CTA_SUBTYPE)) { | ||
842 | goto noth; | ||
843 | } | ||
844 | } | ||
845 | if (n <= 2) | ||
846 | return ft; | ||
847 | noth: /* Not a homogeneous float/double aggregate. */ | ||
848 | return 0; /* Struct is in GPRs. */ | ||
849 | } | ||
850 | |||
851 | static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, | ||
852 | int ft) | ||
853 | { | ||
854 | if (LJ_ABI_SOFTFP ? ft : | ||
855 | ((ft & 3) == FTYPE_FLOAT || (ft >> 2) == FTYPE_FLOAT)) { | ||
856 | int i, ofs = 0; | ||
857 | for (i = 0; ft != 0; i++, ft >>= 2) { | ||
858 | if ((ft & 3) == FTYPE_FLOAT) { | ||
859 | #if LJ_ABI_SOFTFP | ||
860 | /* The 2nd FP struct result is in CARG1 (gpr[2]) and not CRET2. */ | ||
861 | memcpy((uint8_t *)dp + ofs, | ||
862 | (uint8_t *)&cc->gpr[2*i] + LJ_ENDIAN_SELECT(0, 4), 4); | ||
863 | #else | ||
864 | *(float *)((uint8_t *)dp + ofs) = cc->fpr[i].f; | ||
865 | #endif | ||
866 | ofs += 4; | ||
867 | } else { | ||
868 | ofs = (ofs + 7) & ~7; /* 64 bit alignment. */ | ||
869 | #if LJ_ABI_SOFTFP | ||
870 | *(intptr_t *)((uint8_t *)dp + ofs) = cc->gpr[2*i]; | ||
871 | #else | ||
872 | *(double *)((uint8_t *)dp + ofs) = cc->fpr[i].d; | ||
873 | #endif | ||
874 | ofs += 8; | ||
875 | } | ||
876 | } | ||
877 | } else { | ||
878 | #if !LJ_ABI_SOFTFP | ||
879 | if (ft) sp = (uint8_t *)&cc->fpr[0]; | ||
880 | #endif | ||
881 | memcpy(dp, sp, ctr->size); | ||
882 | } | ||
883 | } | ||
884 | |||
885 | #endif | ||
886 | |||
624 | /* -- Common C call handling ---------------------------------------------- */ | 887 | /* -- Common C call handling ---------------------------------------------- */ |
625 | 888 | ||
626 | /* Infer the destination CTypeID for a vararg argument. */ | 889 | /* Infer the destination CTypeID for a vararg argument. */ |
@@ -788,6 +1051,19 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, | |||
788 | *(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp : | 1051 | *(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp : |
789 | (int32_t)*(int16_t *)dp; | 1052 | (int32_t)*(int16_t *)dp; |
790 | } | 1053 | } |
1054 | #if LJ_TARGET_ARM64 && LJ_BE | ||
1055 | if (isfp && d->size == sizeof(float)) | ||
1056 | ((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */ | ||
1057 | #endif | ||
1058 | #if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) | ||
1059 | if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) | ||
1060 | #if LJ_TARGET_MIPS64 | ||
1061 | || (isfp && nsp == 0) | ||
1062 | #endif | ||
1063 | ) && d->size <= 4) { | ||
1064 | *(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */ | ||
1065 | } | ||
1066 | #endif | ||
791 | #if LJ_TARGET_X64 && LJ_ABI_WIN | 1067 | #if LJ_TARGET_X64 && LJ_ABI_WIN |
792 | if (isva) { /* Windows/x64 mirrors varargs in both register sets. */ | 1068 | if (isva) { /* Windows/x64 mirrors varargs in both register sets. */ |
793 | if (nfpr == ngpr) | 1069 | if (nfpr == ngpr) |
@@ -803,13 +1079,19 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, | |||
803 | cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1]; /* Split complex double. */ | 1079 | cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1]; /* Split complex double. */ |
804 | cc->fpr[nfpr-2].d[1] = 0; | 1080 | cc->fpr[nfpr-2].d[1] = 0; |
805 | } | 1081 | } |
1082 | #elif LJ_TARGET_ARM64 || (LJ_TARGET_MIPS64 && !LJ_ABI_SOFTFP) | ||
1083 | if (isfp == 2 && (uint8_t *)dp < (uint8_t *)cc->stack) { | ||
1084 | /* Split float HFA or complex float into separate registers. */ | ||
1085 | CTSize i = (sz >> 2) - 1; | ||
1086 | do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--); | ||
1087 | } | ||
806 | #else | 1088 | #else |
807 | UNUSED(isfp); | 1089 | UNUSED(isfp); |
808 | #endif | 1090 | #endif |
809 | } | 1091 | } |
810 | if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */ | 1092 | if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */ |
811 | 1093 | ||
812 | #if LJ_TARGET_X64 || LJ_TARGET_PPC | 1094 | #if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) |
813 | cc->nfpr = nfpr; /* Required for vararg functions. */ | 1095 | cc->nfpr = nfpr; /* Required for vararg functions. */ |
814 | #endif | 1096 | #endif |
815 | cc->nsp = nsp; | 1097 | cc->nsp = nsp; |
@@ -844,7 +1126,8 @@ static int ccall_get_results(lua_State *L, CTState *cts, CType *ct, | |||
844 | CCALL_HANDLE_COMPLEXRET2 | 1126 | CCALL_HANDLE_COMPLEXRET2 |
845 | return 1; /* One GC step. */ | 1127 | return 1; /* One GC step. */ |
846 | } | 1128 | } |
847 | if (LJ_BE && ctype_isinteger_or_bool(ctr->info) && ctr->size < CTSIZE_PTR) | 1129 | if (LJ_BE && ctr->size < CTSIZE_PTR && |
1130 | (ctype_isinteger_or_bool(ctr->info) || ctype_isenum(ctr->info))) | ||
848 | sp += (CTSIZE_PTR - ctr->size); | 1131 | sp += (CTSIZE_PTR - ctr->size); |
849 | #if CCALL_NUM_FPR | 1132 | #if CCALL_NUM_FPR |
850 | if (ctype_isfp(ctr->info) || ctype_isvector(ctr->info)) | 1133 | if (ctype_isfp(ctr->info) || ctype_isvector(ctr->info)) |
diff --git a/src/lj_ccall.h b/src/lj_ccall.h index 5f6d5101..8b3abdf9 100644 --- a/src/lj_ccall.h +++ b/src/lj_ccall.h | |||
@@ -68,35 +68,56 @@ typedef union FPRArg { | |||
68 | float f[2]; | 68 | float f[2]; |
69 | } FPRArg; | 69 | } FPRArg; |
70 | 70 | ||
71 | #elif LJ_TARGET_PPC | 71 | #elif LJ_TARGET_ARM64 |
72 | 72 | ||
73 | #define CCALL_NARG_GPR 8 | 73 | #define CCALL_NARG_GPR 8 |
74 | #define CCALL_NRET_GPR 2 | ||
74 | #define CCALL_NARG_FPR 8 | 75 | #define CCALL_NARG_FPR 8 |
76 | #define CCALL_NRET_FPR 4 | ||
77 | #define CCALL_SPS_FREE 0 | ||
78 | |||
79 | typedef intptr_t GPRArg; | ||
80 | typedef union FPRArg { | ||
81 | double d; | ||
82 | struct { LJ_ENDIAN_LOHI(float f; , float g;) }; | ||
83 | struct { LJ_ENDIAN_LOHI(uint32_t lo; , uint32_t hi;) }; | ||
84 | } FPRArg; | ||
85 | |||
86 | #elif LJ_TARGET_PPC | ||
87 | |||
88 | #define CCALL_NARG_GPR 8 | ||
89 | #define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 8) | ||
75 | #define CCALL_NRET_GPR 4 /* For complex double. */ | 90 | #define CCALL_NRET_GPR 4 /* For complex double. */ |
76 | #define CCALL_NRET_FPR 1 | 91 | #define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 1) |
77 | #define CCALL_SPS_EXTRA 4 | 92 | #define CCALL_SPS_EXTRA 4 |
78 | #define CCALL_SPS_FREE 0 | 93 | #define CCALL_SPS_FREE 0 |
79 | 94 | ||
80 | typedef intptr_t GPRArg; | 95 | typedef intptr_t GPRArg; |
81 | typedef double FPRArg; | 96 | typedef double FPRArg; |
82 | 97 | ||
83 | #elif LJ_TARGET_PPCSPE | 98 | #elif LJ_TARGET_MIPS32 |
84 | 99 | ||
85 | #define CCALL_NARG_GPR 8 | 100 | #define CCALL_NARG_GPR 4 |
86 | #define CCALL_NARG_FPR 0 | 101 | #define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 2) |
87 | #define CCALL_NRET_GPR 4 /* For softfp complex double. */ | 102 | #define CCALL_NRET_GPR (LJ_ABI_SOFTFP ? 4 : 2) |
88 | #define CCALL_NRET_FPR 0 | 103 | #define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 2) |
89 | #define CCALL_SPS_FREE 0 /* NYI */ | 104 | #define CCALL_SPS_EXTRA 7 |
105 | #define CCALL_SPS_FREE 1 | ||
90 | 106 | ||
91 | typedef intptr_t GPRArg; | 107 | typedef intptr_t GPRArg; |
108 | typedef union FPRArg { | ||
109 | double d; | ||
110 | struct { LJ_ENDIAN_LOHI(float f; , float g;) }; | ||
111 | } FPRArg; | ||
92 | 112 | ||
93 | #elif LJ_TARGET_MIPS | 113 | #elif LJ_TARGET_MIPS64 |
94 | 114 | ||
95 | #define CCALL_NARG_GPR 4 | 115 | /* FP args are positional and overlay the GPR array. */ |
96 | #define CCALL_NARG_FPR 2 | 116 | #define CCALL_NARG_GPR 8 |
117 | #define CCALL_NARG_FPR 0 | ||
97 | #define CCALL_NRET_GPR 2 | 118 | #define CCALL_NRET_GPR 2 |
98 | #define CCALL_NRET_FPR 2 | 119 | #define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 2) |
99 | #define CCALL_SPS_EXTRA 7 | 120 | #define CCALL_SPS_EXTRA 3 |
100 | #define CCALL_SPS_FREE 1 | 121 | #define CCALL_SPS_FREE 1 |
101 | 122 | ||
102 | typedef intptr_t GPRArg; | 123 | typedef intptr_t GPRArg; |
@@ -145,6 +166,8 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState { | |||
145 | uint8_t nfpr; /* Number of arguments in FPRs. */ | 166 | uint8_t nfpr; /* Number of arguments in FPRs. */ |
146 | #elif LJ_TARGET_X86 | 167 | #elif LJ_TARGET_X86 |
147 | uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */ | 168 | uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */ |
169 | #elif LJ_TARGET_ARM64 | ||
170 | void *retp; /* Aggregate return pointer in x8. */ | ||
148 | #elif LJ_TARGET_PPC | 171 | #elif LJ_TARGET_PPC |
149 | uint8_t nfpr; /* Number of arguments in FPRs. */ | 172 | uint8_t nfpr; /* Number of arguments in FPRs. */ |
150 | #endif | 173 | #endif |
diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c index 26377f82..4edd8a35 100644 --- a/src/lj_ccallback.c +++ b/src/lj_ccallback.c | |||
@@ -27,7 +27,7 @@ | |||
27 | 27 | ||
28 | #if LJ_OS_NOJIT | 28 | #if LJ_OS_NOJIT |
29 | 29 | ||
30 | /* Disabled callback support. */ | 30 | /* Callbacks disabled. */ |
31 | #define CALLBACK_SLOT2OFS(slot) (0*(slot)) | 31 | #define CALLBACK_SLOT2OFS(slot) (0*(slot)) |
32 | #define CALLBACK_OFS2SLOT(ofs) (0*(ofs)) | 32 | #define CALLBACK_OFS2SLOT(ofs) (0*(ofs)) |
33 | #define CALLBACK_MAX_SLOT 0 | 33 | #define CALLBACK_MAX_SLOT 0 |
@@ -35,7 +35,7 @@ | |||
35 | #elif LJ_TARGET_X86ORX64 | 35 | #elif LJ_TARGET_X86ORX64 |
36 | 36 | ||
37 | #define CALLBACK_MCODE_HEAD (LJ_64 ? 8 : 0) | 37 | #define CALLBACK_MCODE_HEAD (LJ_64 ? 8 : 0) |
38 | #define CALLBACK_MCODE_GROUP (-2+1+2+5+(LJ_64 ? 6 : 5)) | 38 | #define CALLBACK_MCODE_GROUP (-2+1+2+(LJ_GC64 ? 10 : 5)+(LJ_64 ? 6 : 5)) |
39 | 39 | ||
40 | #define CALLBACK_SLOT2OFS(slot) \ | 40 | #define CALLBACK_SLOT2OFS(slot) \ |
41 | (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/32) + 4*(slot)) | 41 | (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/32) + 4*(slot)) |
@@ -54,23 +54,22 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) | |||
54 | #elif LJ_TARGET_ARM | 54 | #elif LJ_TARGET_ARM |
55 | 55 | ||
56 | #define CALLBACK_MCODE_HEAD 32 | 56 | #define CALLBACK_MCODE_HEAD 32 |
57 | #define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot)) | 57 | |
58 | #define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8) | 58 | #elif LJ_TARGET_ARM64 |
59 | #define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) | 59 | |
60 | #define CALLBACK_MCODE_HEAD 32 | ||
60 | 61 | ||
61 | #elif LJ_TARGET_PPC | 62 | #elif LJ_TARGET_PPC |
62 | 63 | ||
63 | #define CALLBACK_MCODE_HEAD 24 | 64 | #define CALLBACK_MCODE_HEAD 24 |
64 | #define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot)) | ||
65 | #define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8) | ||
66 | #define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) | ||
67 | 65 | ||
68 | #elif LJ_TARGET_MIPS | 66 | #elif LJ_TARGET_MIPS32 |
69 | 67 | ||
70 | #define CALLBACK_MCODE_HEAD 24 | 68 | #define CALLBACK_MCODE_HEAD 20 |
71 | #define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot)) | 69 | |
72 | #define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8) | 70 | #elif LJ_TARGET_MIPS64 |
73 | #define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) | 71 | |
72 | #define CALLBACK_MCODE_HEAD 52 | ||
74 | 73 | ||
75 | #else | 74 | #else |
76 | 75 | ||
@@ -81,6 +80,12 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) | |||
81 | 80 | ||
82 | #endif | 81 | #endif |
83 | 82 | ||
83 | #ifndef CALLBACK_SLOT2OFS | ||
84 | #define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot)) | ||
85 | #define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8) | ||
86 | #define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) | ||
87 | #endif | ||
88 | |||
84 | /* Convert callback slot number to callback function pointer. */ | 89 | /* Convert callback slot number to callback function pointer. */ |
85 | static void *callback_slot2ptr(CTState *cts, MSize slot) | 90 | static void *callback_slot2ptr(CTState *cts, MSize slot) |
86 | { | 91 | { |
@@ -119,8 +124,13 @@ static void callback_mcode_init(global_State *g, uint8_t *page) | |||
119 | /* push ebp/rbp; mov ah, slot>>8; mov ebp, &g. */ | 124 | /* push ebp/rbp; mov ah, slot>>8; mov ebp, &g. */ |
120 | *p++ = XI_PUSH + RID_EBP; | 125 | *p++ = XI_PUSH + RID_EBP; |
121 | *p++ = XI_MOVrib | (RID_EAX+4); *p++ = (uint8_t)(slot >> 8); | 126 | *p++ = XI_MOVrib | (RID_EAX+4); *p++ = (uint8_t)(slot >> 8); |
127 | #if LJ_GC64 | ||
128 | *p++ = 0x48; *p++ = XI_MOVri | RID_EBP; | ||
129 | *(uint64_t *)p = (uint64_t)(g); p += 8; | ||
130 | #else | ||
122 | *p++ = XI_MOVri | RID_EBP; | 131 | *p++ = XI_MOVri | RID_EBP; |
123 | *(int32_t *)p = i32ptr(g); p += 4; | 132 | *(int32_t *)p = i32ptr(g); p += 4; |
133 | #endif | ||
124 | #if LJ_64 | 134 | #if LJ_64 |
125 | /* jmp [rip-pageofs] where lj_vm_ffi_callback is stored. */ | 135 | /* jmp [rip-pageofs] where lj_vm_ffi_callback is stored. */ |
126 | *p++ = XI_GROUP5; *p++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP; | 136 | *p++ = XI_GROUP5; *p++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP; |
@@ -157,6 +167,26 @@ static void callback_mcode_init(global_State *g, uint32_t *page) | |||
157 | } | 167 | } |
158 | lua_assert(p - page <= CALLBACK_MCODE_SIZE); | 168 | lua_assert(p - page <= CALLBACK_MCODE_SIZE); |
159 | } | 169 | } |
170 | #elif LJ_TARGET_ARM64 | ||
171 | static void callback_mcode_init(global_State *g, uint32_t *page) | ||
172 | { | ||
173 | uint32_t *p = page; | ||
174 | void *target = (void *)lj_vm_ffi_callback; | ||
175 | MSize slot; | ||
176 | *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4)); | ||
177 | *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5)); | ||
178 | *p++ = A64I_LE(A64I_BR | A64F_N(RID_X11)); | ||
179 | *p++ = A64I_LE(A64I_NOP); | ||
180 | ((void **)p)[0] = target; | ||
181 | ((void **)p)[1] = g; | ||
182 | p += 4; | ||
183 | for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { | ||
184 | *p++ = A64I_LE(A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot)); | ||
185 | *p = A64I_LE(A64I_B | A64F_S26((page-p) & 0x03ffffffu)); | ||
186 | p++; | ||
187 | } | ||
188 | lua_assert(p - page <= CALLBACK_MCODE_SIZE); | ||
189 | } | ||
160 | #elif LJ_TARGET_PPC | 190 | #elif LJ_TARGET_PPC |
161 | static void callback_mcode_init(global_State *g, uint32_t *page) | 191 | static void callback_mcode_init(global_State *g, uint32_t *page) |
162 | { | 192 | { |
@@ -180,14 +210,27 @@ static void callback_mcode_init(global_State *g, uint32_t *page) | |||
180 | static void callback_mcode_init(global_State *g, uint32_t *page) | 210 | static void callback_mcode_init(global_State *g, uint32_t *page) |
181 | { | 211 | { |
182 | uint32_t *p = page; | 212 | uint32_t *p = page; |
183 | void *target = (void *)lj_vm_ffi_callback; | 213 | uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback; |
214 | uintptr_t ug = (uintptr_t)(void *)g; | ||
184 | MSize slot; | 215 | MSize slot; |
185 | *p++ = MIPSI_SW | MIPSF_T(RID_R1)|MIPSF_S(RID_SP) | 0; | 216 | #if LJ_TARGET_MIPS32 |
186 | *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (u32ptr(target) >> 16); | 217 | *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (target >> 16); |
187 | *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (u32ptr(g) >> 16); | 218 | *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (ug >> 16); |
188 | *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) |(u32ptr(target)&0xffff); | 219 | #else |
220 | *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (target >> 48); | ||
221 | *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (ug >> 48); | ||
222 | *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | ((target >> 32) & 0xffff); | ||
223 | *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | ((ug >> 32) & 0xffff); | ||
224 | *p++ = MIPSI_DSLL | MIPSF_D(RID_R3)|MIPSF_T(RID_R3) | MIPSF_A(16); | ||
225 | *p++ = MIPSI_DSLL | MIPSF_D(RID_R2)|MIPSF_T(RID_R2) | MIPSF_A(16); | ||
226 | *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | ((target >> 16) & 0xffff); | ||
227 | *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | ((ug >> 16) & 0xffff); | ||
228 | *p++ = MIPSI_DSLL | MIPSF_D(RID_R3)|MIPSF_T(RID_R3) | MIPSF_A(16); | ||
229 | *p++ = MIPSI_DSLL | MIPSF_D(RID_R2)|MIPSF_T(RID_R2) | MIPSF_A(16); | ||
230 | #endif | ||
231 | *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | (target & 0xffff); | ||
189 | *p++ = MIPSI_JR | MIPSF_S(RID_R3); | 232 | *p++ = MIPSI_JR | MIPSF_S(RID_R3); |
190 | *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | (u32ptr(g)&0xffff); | 233 | *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | (ug & 0xffff); |
191 | for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { | 234 | for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { |
192 | *p = MIPSI_B | ((page-p-1) & 0x0000ffffu); | 235 | *p = MIPSI_B | ((page-p-1) & 0x0000ffffu); |
193 | p++; | 236 | p++; |
@@ -224,7 +267,7 @@ static void callback_mcode_new(CTState *cts) | |||
224 | if (CALLBACK_MAX_SLOT == 0) | 267 | if (CALLBACK_MAX_SLOT == 0) |
225 | lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); | 268 | lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); |
226 | #if LJ_TARGET_WINDOWS | 269 | #if LJ_TARGET_WINDOWS |
227 | p = VirtualAlloc(NULL, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); | 270 | p = LJ_WIN_VALLOC(NULL, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); |
228 | if (!p) | 271 | if (!p) |
229 | lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); | 272 | lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); |
230 | #elif LJ_TARGET_POSIX | 273 | #elif LJ_TARGET_POSIX |
@@ -242,7 +285,7 @@ static void callback_mcode_new(CTState *cts) | |||
242 | #if LJ_TARGET_WINDOWS | 285 | #if LJ_TARGET_WINDOWS |
243 | { | 286 | { |
244 | DWORD oprot; | 287 | DWORD oprot; |
245 | VirtualProtect(p, sz, PAGE_EXECUTE_READ, &oprot); | 288 | LJ_WIN_VPROTECT(p, sz, PAGE_EXECUTE_READ, &oprot); |
246 | } | 289 | } |
247 | #elif LJ_TARGET_POSIX | 290 | #elif LJ_TARGET_POSIX |
248 | mprotect(p, sz, (PROT_READ|PROT_EXEC)); | 291 | mprotect(p, sz, (PROT_READ|PROT_EXEC)); |
@@ -351,33 +394,77 @@ void lj_ccallback_mcode_free(CTState *cts) | |||
351 | goto done; \ | 394 | goto done; \ |
352 | } CALLBACK_HANDLE_REGARG_FP2 | 395 | } CALLBACK_HANDLE_REGARG_FP2 |
353 | 396 | ||
354 | #elif LJ_TARGET_PPC | 397 | #elif LJ_TARGET_ARM64 |
355 | 398 | ||
356 | #define CALLBACK_HANDLE_REGARG \ | 399 | #define CALLBACK_HANDLE_REGARG \ |
357 | if (isfp) { \ | 400 | if (isfp) { \ |
358 | if (nfpr + 1 <= CCALL_NARG_FPR) { \ | 401 | if (nfpr + n <= CCALL_NARG_FPR) { \ |
359 | sp = &cts->cb.fpr[nfpr++]; \ | 402 | sp = &cts->cb.fpr[nfpr]; \ |
360 | cta = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \ | 403 | nfpr += n; \ |
361 | goto done; \ | 404 | goto done; \ |
405 | } else { \ | ||
406 | nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \ | ||
362 | } \ | 407 | } \ |
363 | } else { /* Try to pass argument in GPRs. */ \ | 408 | } else { \ |
364 | if (n > 1) { \ | 409 | if (!LJ_TARGET_IOS && n > 1) \ |
365 | lua_assert(ctype_isinteger(cta->info) && n == 2); /* int64_t. */ \ | 410 | ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ |
366 | ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ | ||
367 | } \ | ||
368 | if (ngpr + n <= maxgpr) { \ | 411 | if (ngpr + n <= maxgpr) { \ |
369 | sp = &cts->cb.gpr[ngpr]; \ | 412 | sp = &cts->cb.gpr[ngpr]; \ |
370 | ngpr += n; \ | 413 | ngpr += n; \ |
371 | goto done; \ | 414 | goto done; \ |
415 | } else { \ | ||
416 | ngpr = CCALL_NARG_GPR; /* Prevent reordering. */ \ | ||
417 | } \ | ||
418 | } | ||
419 | |||
420 | #elif LJ_TARGET_PPC | ||
421 | |||
422 | #define CALLBACK_HANDLE_GPR \ | ||
423 | if (n > 1) { \ | ||
424 | lua_assert(((LJ_ABI_SOFTFP && ctype_isnum(cta->info)) || /* double. */ \ | ||
425 | ctype_isinteger(cta->info)) && n == 2); /* int64_t. */ \ | ||
426 | ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ | ||
427 | } \ | ||
428 | if (ngpr + n <= maxgpr) { \ | ||
429 | sp = &cts->cb.gpr[ngpr]; \ | ||
430 | ngpr += n; \ | ||
431 | goto done; \ | ||
432 | } | ||
433 | |||
434 | #if LJ_ABI_SOFTFP | ||
435 | #define CALLBACK_HANDLE_REGARG \ | ||
436 | CALLBACK_HANDLE_GPR \ | ||
437 | UNUSED(isfp); | ||
438 | #else | ||
439 | #define CALLBACK_HANDLE_REGARG \ | ||
440 | if (isfp) { \ | ||
441 | if (nfpr + 1 <= CCALL_NARG_FPR) { \ | ||
442 | sp = &cts->cb.fpr[nfpr++]; \ | ||
443 | cta = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \ | ||
444 | goto done; \ | ||
372 | } \ | 445 | } \ |
446 | } else { /* Try to pass argument in GPRs. */ \ | ||
447 | CALLBACK_HANDLE_GPR \ | ||
373 | } | 448 | } |
449 | #endif | ||
374 | 450 | ||
451 | #if !LJ_ABI_SOFTFP | ||
375 | #define CALLBACK_HANDLE_RET \ | 452 | #define CALLBACK_HANDLE_RET \ |
376 | if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ | 453 | if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ |
377 | *(double *)dp = *(float *)dp; /* FPRs always hold doubles. */ | 454 | *(double *)dp = *(float *)dp; /* FPRs always hold doubles. */ |
455 | #endif | ||
378 | 456 | ||
379 | #elif LJ_TARGET_MIPS | 457 | #elif LJ_TARGET_MIPS32 |
380 | 458 | ||
459 | #define CALLBACK_HANDLE_GPR \ | ||
460 | if (n > 1) ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ | ||
461 | if (ngpr + n <= maxgpr) { \ | ||
462 | sp = &cts->cb.gpr[ngpr]; \ | ||
463 | ngpr += n; \ | ||
464 | goto done; \ | ||
465 | } | ||
466 | |||
467 | #if !LJ_ABI_SOFTFP /* MIPS32 hard-float */ | ||
381 | #define CALLBACK_HANDLE_REGARG \ | 468 | #define CALLBACK_HANDLE_REGARG \ |
382 | if (isfp && nfpr < CCALL_NARG_FPR) { /* Try to pass argument in FPRs. */ \ | 469 | if (isfp && nfpr < CCALL_NARG_FPR) { /* Try to pass argument in FPRs. */ \ |
383 | sp = (void *)((uint8_t *)&cts->cb.fpr[nfpr] + ((LJ_BE && n==1) ? 4 : 0)); \ | 470 | sp = (void *)((uint8_t *)&cts->cb.fpr[nfpr] + ((LJ_BE && n==1) ? 4 : 0)); \ |
@@ -385,13 +472,36 @@ void lj_ccallback_mcode_free(CTState *cts) | |||
385 | goto done; \ | 472 | goto done; \ |
386 | } else { /* Try to pass argument in GPRs. */ \ | 473 | } else { /* Try to pass argument in GPRs. */ \ |
387 | nfpr = CCALL_NARG_FPR; \ | 474 | nfpr = CCALL_NARG_FPR; \ |
388 | if (n > 1) ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ | 475 | CALLBACK_HANDLE_GPR \ |
389 | if (ngpr + n <= maxgpr) { \ | 476 | } |
390 | sp = &cts->cb.gpr[ngpr]; \ | 477 | #else /* MIPS32 soft-float */ |
391 | ngpr += n; \ | 478 | #define CALLBACK_HANDLE_REGARG \ |
392 | goto done; \ | 479 | CALLBACK_HANDLE_GPR \ |
393 | } \ | 480 | UNUSED(isfp); |
481 | #endif | ||
482 | |||
483 | #define CALLBACK_HANDLE_RET \ | ||
484 | if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ | ||
485 | ((float *)dp)[1] = *(float *)dp; | ||
486 | |||
487 | #elif LJ_TARGET_MIPS64 | ||
488 | |||
489 | #if !LJ_ABI_SOFTFP /* MIPS64 hard-float */ | ||
490 | #define CALLBACK_HANDLE_REGARG \ | ||
491 | if (ngpr + n <= maxgpr) { \ | ||
492 | sp = isfp ? (void*) &cts->cb.fpr[ngpr] : (void*) &cts->cb.gpr[ngpr]; \ | ||
493 | ngpr += n; \ | ||
494 | goto done; \ | ||
394 | } | 495 | } |
496 | #else /* MIPS64 soft-float */ | ||
497 | #define CALLBACK_HANDLE_REGARG \ | ||
498 | if (ngpr + n <= maxgpr) { \ | ||
499 | UNUSED(isfp); \ | ||
500 | sp = (void*) &cts->cb.gpr[ngpr]; \ | ||
501 | ngpr += n; \ | ||
502 | goto done; \ | ||
503 | } | ||
504 | #endif | ||
395 | 505 | ||
396 | #define CALLBACK_HANDLE_RET \ | 506 | #define CALLBACK_HANDLE_RET \ |
397 | if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ | 507 | if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ |
@@ -411,6 +521,7 @@ static void callback_conv_args(CTState *cts, lua_State *L) | |||
411 | int gcsteps = 0; | 521 | int gcsteps = 0; |
412 | CType *ct; | 522 | CType *ct; |
413 | GCfunc *fn; | 523 | GCfunc *fn; |
524 | int fntp; | ||
414 | MSize ngpr = 0, nsp = 0, maxgpr = CCALL_NARG_GPR; | 525 | MSize ngpr = 0, nsp = 0, maxgpr = CCALL_NARG_GPR; |
415 | #if CCALL_NARG_FPR | 526 | #if CCALL_NARG_FPR |
416 | MSize nfpr = 0; | 527 | MSize nfpr = 0; |
@@ -421,18 +532,27 @@ static void callback_conv_args(CTState *cts, lua_State *L) | |||
421 | 532 | ||
422 | if (slot < cts->cb.sizeid && (id = cts->cb.cbid[slot]) != 0) { | 533 | if (slot < cts->cb.sizeid && (id = cts->cb.cbid[slot]) != 0) { |
423 | ct = ctype_get(cts, id); | 534 | ct = ctype_get(cts, id); |
424 | rid = ctype_cid(ct->info); | 535 | rid = ctype_cid(ct->info); /* Return type. x86: +(spadj<<16). */ |
425 | fn = funcV(lj_tab_getint(cts->miscmap, (int32_t)slot)); | 536 | fn = funcV(lj_tab_getint(cts->miscmap, (int32_t)slot)); |
537 | fntp = LJ_TFUNC; | ||
426 | } else { /* Must set up frame first, before throwing the error. */ | 538 | } else { /* Must set up frame first, before throwing the error. */ |
427 | ct = NULL; | 539 | ct = NULL; |
428 | rid = 0; | 540 | rid = 0; |
429 | fn = (GCfunc *)L; | 541 | fn = (GCfunc *)L; |
542 | fntp = LJ_TTHREAD; | ||
543 | } | ||
544 | /* Continuation returns from callback. */ | ||
545 | if (LJ_FR2) { | ||
546 | (o++)->u64 = LJ_CONT_FFI_CALLBACK; | ||
547 | (o++)->u64 = rid; | ||
548 | o++; | ||
549 | } else { | ||
550 | o->u32.lo = LJ_CONT_FFI_CALLBACK; | ||
551 | o->u32.hi = rid; | ||
552 | o++; | ||
430 | } | 553 | } |
431 | o->u32.lo = LJ_CONT_FFI_CALLBACK; /* Continuation returns from callback. */ | 554 | setframe_gc(o, obj2gco(fn), fntp); |
432 | o->u32.hi = rid; /* Return type. x86: +(spadj<<16). */ | 555 | setframe_ftsz(o, ((char *)(o+1) - (char *)L->base) + FRAME_CONT); |
433 | o++; | ||
434 | setframe_gc(o, obj2gco(fn)); | ||
435 | setframe_ftsz(o, (int)((char *)(o+1) - (char *)L->base) + FRAME_CONT); | ||
436 | L->top = L->base = ++o; | 556 | L->top = L->base = ++o; |
437 | if (!ct) | 557 | if (!ct) |
438 | lj_err_caller(cts->L, LJ_ERR_FFI_BADCBACK); | 558 | lj_err_caller(cts->L, LJ_ERR_FFI_BADCBACK); |
@@ -474,7 +594,11 @@ static void callback_conv_args(CTState *cts, lua_State *L) | |||
474 | nsp += n; | 594 | nsp += n; |
475 | 595 | ||
476 | done: | 596 | done: |
477 | if (LJ_BE && cta->size < CTSIZE_PTR) | 597 | if (LJ_BE && cta->size < CTSIZE_PTR |
598 | #if LJ_TARGET_MIPS64 | ||
599 | && !(isfp && nsp) | ||
600 | #endif | ||
601 | ) | ||
478 | sp = (void *)((uint8_t *)sp + CTSIZE_PTR-cta->size); | 602 | sp = (void *)((uint8_t *)sp + CTSIZE_PTR-cta->size); |
479 | gcsteps += lj_cconv_tv_ct(cts, cta, 0, o++, sp); | 603 | gcsteps += lj_cconv_tv_ct(cts, cta, 0, o++, sp); |
480 | } | 604 | } |
@@ -483,9 +607,14 @@ static void callback_conv_args(CTState *cts, lua_State *L) | |||
483 | L->top = o; | 607 | L->top = o; |
484 | #if LJ_TARGET_X86 | 608 | #if LJ_TARGET_X86 |
485 | /* Store stack adjustment for returns from non-cdecl callbacks. */ | 609 | /* Store stack adjustment for returns from non-cdecl callbacks. */ |
486 | if (ctype_cconv(ct->info) != CTCC_CDECL) | 610 | if (ctype_cconv(ct->info) != CTCC_CDECL) { |
611 | #if LJ_FR2 | ||
612 | (L->base-3)->u64 |= (nsp << (16+2)); | ||
613 | #else | ||
487 | (L->base-2)->u32.hi |= (nsp << (16+2)); | 614 | (L->base-2)->u32.hi |= (nsp << (16+2)); |
488 | #endif | 615 | #endif |
616 | } | ||
617 | #endif | ||
489 | while (gcsteps-- > 0) | 618 | while (gcsteps-- > 0) |
490 | lj_gc_check(L); | 619 | lj_gc_check(L); |
491 | } | 620 | } |
@@ -493,7 +622,11 @@ static void callback_conv_args(CTState *cts, lua_State *L) | |||
493 | /* Convert Lua object to callback result. */ | 622 | /* Convert Lua object to callback result. */ |
494 | static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) | 623 | static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) |
495 | { | 624 | { |
625 | #if LJ_FR2 | ||
626 | CType *ctr = ctype_raw(cts, (uint16_t)(L->base-3)->u64); | ||
627 | #else | ||
496 | CType *ctr = ctype_raw(cts, (uint16_t)(L->base-2)->u32.hi); | 628 | CType *ctr = ctype_raw(cts, (uint16_t)(L->base-2)->u32.hi); |
629 | #endif | ||
497 | #if LJ_TARGET_X86 | 630 | #if LJ_TARGET_X86 |
498 | cts->cb.gpr[2] = 0; | 631 | cts->cb.gpr[2] = 0; |
499 | #endif | 632 | #endif |
@@ -503,6 +636,10 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) | |||
503 | if (ctype_isfp(ctr->info)) | 636 | if (ctype_isfp(ctr->info)) |
504 | dp = (uint8_t *)&cts->cb.fpr[0]; | 637 | dp = (uint8_t *)&cts->cb.fpr[0]; |
505 | #endif | 638 | #endif |
639 | #if LJ_TARGET_ARM64 && LJ_BE | ||
640 | if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) | ||
641 | dp = (uint8_t *)&cts->cb.fpr[0].f[1]; | ||
642 | #endif | ||
506 | lj_cconv_ct_tv(cts, ctr, dp, o, 0); | 643 | lj_cconv_ct_tv(cts, ctr, dp, o, 0); |
507 | #ifdef CALLBACK_HANDLE_RET | 644 | #ifdef CALLBACK_HANDLE_RET |
508 | CALLBACK_HANDLE_RET | 645 | CALLBACK_HANDLE_RET |
@@ -516,6 +653,12 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) | |||
516 | *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp : | 653 | *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp : |
517 | (int32_t)*(int16_t *)dp; | 654 | (int32_t)*(int16_t *)dp; |
518 | } | 655 | } |
656 | #if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) | ||
657 | /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */ | ||
658 | if (ctr->size <= 4 && | ||
659 | (LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info))) | ||
660 | *(int64_t *)dp = (int64_t)*(int32_t *)dp; | ||
661 | #endif | ||
519 | #if LJ_TARGET_X86 | 662 | #if LJ_TARGET_X86 |
520 | if (ctype_isfp(ctr->info)) | 663 | if (ctype_isfp(ctr->info)) |
521 | cts->cb.gpr[2] = ctr->size == sizeof(float) ? 1 : 2; | 664 | cts->cb.gpr[2] = ctr->size == sizeof(float) ? 1 : 2; |
@@ -529,7 +672,7 @@ lua_State * LJ_FASTCALL lj_ccallback_enter(CTState *cts, void *cf) | |||
529 | lua_State *L = cts->L; | 672 | lua_State *L = cts->L; |
530 | global_State *g = cts->g; | 673 | global_State *g = cts->g; |
531 | lua_assert(L != NULL); | 674 | lua_assert(L != NULL); |
532 | if (gcref(g->jit_L)) { | 675 | if (tvref(g->jit_base)) { |
533 | setstrV(L, L->top++, lj_err_str(L, LJ_ERR_FFI_BADCBACK)); | 676 | setstrV(L, L->top++, lj_err_str(L, LJ_ERR_FFI_BADCBACK)); |
534 | if (g->panic) g->panic(L); | 677 | if (g->panic) g->panic(L); |
535 | exit(EXIT_FAILURE); | 678 | exit(EXIT_FAILURE); |
@@ -562,9 +705,9 @@ void LJ_FASTCALL lj_ccallback_leave(CTState *cts, TValue *o) | |||
562 | } | 705 | } |
563 | callback_conv_result(cts, L, o); | 706 | callback_conv_result(cts, L, o); |
564 | /* Finally drop C frame and continuation frame. */ | 707 | /* Finally drop C frame and continuation frame. */ |
565 | L->cframe = cframe_prev(L->cframe); | 708 | L->top -= 2+2*LJ_FR2; |
566 | L->top -= 2; | ||
567 | L->base = obase; | 709 | L->base = obase; |
710 | L->cframe = cframe_prev(L->cframe); | ||
568 | cts->cb.slot = 0; /* Blacklist C function that called the callback. */ | 711 | cts->cb.slot = 0; /* Blacklist C function that called the callback. */ |
569 | } | 712 | } |
570 | 713 | ||
diff --git a/src/lj_cconv.c b/src/lj_cconv.c index 99776b0e..03ed0ce2 100644 --- a/src/lj_cconv.c +++ b/src/lj_cconv.c | |||
@@ -448,8 +448,10 @@ int lj_cconv_tv_bf(CTState *cts, CType *s, TValue *o, uint8_t *sp) | |||
448 | setintV(o, (int32_t)val); | 448 | setintV(o, (int32_t)val); |
449 | } | 449 | } |
450 | } else { | 450 | } else { |
451 | uint32_t b = (val >> pos) & 1; | ||
451 | lua_assert(bsz == 1); | 452 | lua_assert(bsz == 1); |
452 | setboolV(o, (val >> pos) & 1); | 453 | setboolV(o, b); |
454 | setboolV(&cts->g->tmptv2, b); /* Remember for trace recorder. */ | ||
453 | } | 455 | } |
454 | return 0; /* No GC step needed. */ | 456 | return 0; /* No GC step needed. */ |
455 | } | 457 | } |
diff --git a/src/lj_cdata.c b/src/lj_cdata.c index 4aeb0ce3..10d9423d 100644 --- a/src/lj_cdata.c +++ b/src/lj_cdata.c | |||
@@ -9,7 +9,6 @@ | |||
9 | 9 | ||
10 | #include "lj_gc.h" | 10 | #include "lj_gc.h" |
11 | #include "lj_err.h" | 11 | #include "lj_err.h" |
12 | #include "lj_str.h" | ||
13 | #include "lj_tab.h" | 12 | #include "lj_tab.h" |
14 | #include "lj_ctype.h" | 13 | #include "lj_ctype.h" |
15 | #include "lj_cconv.h" | 14 | #include "lj_cconv.h" |
@@ -27,12 +26,12 @@ GCcdata *lj_cdata_newref(CTState *cts, const void *p, CTypeID id) | |||
27 | } | 26 | } |
28 | 27 | ||
29 | /* Allocate variable-sized or specially aligned C data object. */ | 28 | /* Allocate variable-sized or specially aligned C data object. */ |
30 | GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, CTSize align) | 29 | GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz, CTSize align) |
31 | { | 30 | { |
32 | global_State *g; | 31 | global_State *g; |
33 | MSize extra = sizeof(GCcdataVar) + sizeof(GCcdata) + | 32 | MSize extra = sizeof(GCcdataVar) + sizeof(GCcdata) + |
34 | (align > CT_MEMALIGN ? (1u<<align) - (1u<<CT_MEMALIGN) : 0); | 33 | (align > CT_MEMALIGN ? (1u<<align) - (1u<<CT_MEMALIGN) : 0); |
35 | char *p = lj_mem_newt(cts->L, extra + sz, char); | 34 | char *p = lj_mem_newt(L, extra + sz, char); |
36 | uintptr_t adata = (uintptr_t)p + sizeof(GCcdataVar) + sizeof(GCcdata); | 35 | uintptr_t adata = (uintptr_t)p + sizeof(GCcdataVar) + sizeof(GCcdata); |
37 | uintptr_t almask = (1u << align) - 1u; | 36 | uintptr_t almask = (1u << align) - 1u; |
38 | GCcdata *cd = (GCcdata *)(((adata + almask) & ~almask) - sizeof(GCcdata)); | 37 | GCcdata *cd = (GCcdata *)(((adata + almask) & ~almask) - sizeof(GCcdata)); |
@@ -40,7 +39,7 @@ GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, CTSize align) | |||
40 | cdatav(cd)->offset = (uint16_t)((char *)cd - p); | 39 | cdatav(cd)->offset = (uint16_t)((char *)cd - p); |
41 | cdatav(cd)->extra = extra; | 40 | cdatav(cd)->extra = extra; |
42 | cdatav(cd)->len = sz; | 41 | cdatav(cd)->len = sz; |
43 | g = cts->g; | 42 | g = G(L); |
44 | setgcrefr(cd->nextgc, g->gc.root); | 43 | setgcrefr(cd->nextgc, g->gc.root); |
45 | setgcref(g->gc.root, obj2gco(cd)); | 44 | setgcref(g->gc.root, obj2gco(cd)); |
46 | newwhite(g, obj2gco(cd)); | 45 | newwhite(g, obj2gco(cd)); |
@@ -50,6 +49,15 @@ GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, CTSize align) | |||
50 | return cd; | 49 | return cd; |
51 | } | 50 | } |
52 | 51 | ||
52 | /* Allocate arbitrary C data object. */ | ||
53 | GCcdata *lj_cdata_newx(CTState *cts, CTypeID id, CTSize sz, CTInfo info) | ||
54 | { | ||
55 | if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN) | ||
56 | return lj_cdata_new(cts, id, sz); | ||
57 | else | ||
58 | return lj_cdata_newv(cts->L, id, sz, ctype_align(info)); | ||
59 | } | ||
60 | |||
53 | /* Free a C data object. */ | 61 | /* Free a C data object. */ |
54 | void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd) | 62 | void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd) |
55 | { | 63 | { |
@@ -76,21 +84,22 @@ void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd) | |||
76 | } | 84 | } |
77 | } | 85 | } |
78 | 86 | ||
79 | TValue * LJ_FASTCALL lj_cdata_setfin(lua_State *L, GCcdata *cd) | 87 | void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj, uint32_t it) |
80 | { | 88 | { |
81 | global_State *g = G(L); | 89 | GCtab *t = ctype_ctsG(G(L))->finalizer; |
82 | GCtab *t = ctype_ctsG(g)->finalizer; | ||
83 | if (gcref(t->metatable)) { | 90 | if (gcref(t->metatable)) { |
84 | /* Add cdata to finalizer table, if still enabled. */ | 91 | /* Add cdata to finalizer table, if still enabled. */ |
85 | TValue *tv, tmp; | 92 | TValue *tv, tmp; |
86 | setcdataV(L, &tmp, cd); | 93 | setcdataV(L, &tmp, cd); |
87 | lj_gc_anybarriert(L, t); | 94 | lj_gc_anybarriert(L, t); |
88 | tv = lj_tab_set(L, t, &tmp); | 95 | tv = lj_tab_set(L, t, &tmp); |
89 | cd->marked |= LJ_GC_CDATA_FIN; | 96 | if (it == LJ_TNIL) { |
90 | return tv; | 97 | setnilV(tv); |
91 | } else { | 98 | cd->marked &= ~LJ_GC_CDATA_FIN; |
92 | /* Otherwise return dummy TValue. */ | 99 | } else { |
93 | return &g->tmptv; | 100 | setgcV(L, tv, obj, it); |
101 | cd->marked |= LJ_GC_CDATA_FIN; | ||
102 | } | ||
94 | } | 103 | } |
95 | } | 104 | } |
96 | 105 | ||
@@ -123,7 +132,12 @@ collect_attrib: | |||
123 | idx = (ptrdiff_t)intV(key); | 132 | idx = (ptrdiff_t)intV(key); |
124 | goto integer_key; | 133 | goto integer_key; |
125 | } else if (tvisnum(key)) { /* Numeric key. */ | 134 | } else if (tvisnum(key)) { /* Numeric key. */ |
126 | idx = LJ_64 ? (ptrdiff_t)numV(key) : (ptrdiff_t)lj_num2int(numV(key)); | 135 | #ifdef _MSC_VER |
136 | /* Workaround for MSVC bug. */ | ||
137 | volatile | ||
138 | #endif | ||
139 | lua_Number n = numV(key); | ||
140 | idx = LJ_64 ? (ptrdiff_t)n : (ptrdiff_t)lj_num2int(n); | ||
127 | integer_key: | 141 | integer_key: |
128 | if (ctype_ispointer(ct->info)) { | 142 | if (ctype_ispointer(ct->info)) { |
129 | CTSize sz = lj_ctype_size(cts, ctype_cid(ct->info)); /* Element size. */ | 143 | CTSize sz = lj_ctype_size(cts, ctype_cid(ct->info)); /* Element size. */ |
diff --git a/src/lj_cdata.h b/src/lj_cdata.h index 2ce90bdf..c1089e64 100644 --- a/src/lj_cdata.h +++ b/src/lj_cdata.h | |||
@@ -58,11 +58,14 @@ static LJ_AINLINE GCcdata *lj_cdata_new_(lua_State *L, CTypeID id, CTSize sz) | |||
58 | } | 58 | } |
59 | 59 | ||
60 | LJ_FUNC GCcdata *lj_cdata_newref(CTState *cts, const void *pp, CTypeID id); | 60 | LJ_FUNC GCcdata *lj_cdata_newref(CTState *cts, const void *pp, CTypeID id); |
61 | LJ_FUNC GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, | 61 | LJ_FUNC GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz, |
62 | CTSize align); | 62 | CTSize align); |
63 | LJ_FUNC GCcdata *lj_cdata_newx(CTState *cts, CTypeID id, CTSize sz, | ||
64 | CTInfo info); | ||
63 | 65 | ||
64 | LJ_FUNC void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd); | 66 | LJ_FUNC void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd); |
65 | LJ_FUNCA TValue * LJ_FASTCALL lj_cdata_setfin(lua_State *L, GCcdata *cd); | 67 | LJ_FUNC void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj, |
68 | uint32_t it); | ||
66 | 69 | ||
67 | LJ_FUNC CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key, | 70 | LJ_FUNC CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key, |
68 | uint8_t **pp, CTInfo *qual); | 71 | uint8_t **pp, CTInfo *qual); |
diff --git a/src/lj_clib.c b/src/lj_clib.c index df20aca3..2ea6ff45 100644 --- a/src/lj_clib.c +++ b/src/lj_clib.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include "lj_cconv.h" | 16 | #include "lj_cconv.h" |
17 | #include "lj_cdata.h" | 17 | #include "lj_cdata.h" |
18 | #include "lj_clib.h" | 18 | #include "lj_clib.h" |
19 | #include "lj_strfmt.h" | ||
19 | 20 | ||
20 | /* -- OS-specific functions ----------------------------------------------- */ | 21 | /* -- OS-specific functions ----------------------------------------------- */ |
21 | 22 | ||
@@ -61,7 +62,7 @@ static const char *clib_extname(lua_State *L, const char *name) | |||
61 | #endif | 62 | #endif |
62 | ) { | 63 | ) { |
63 | if (!strchr(name, '.')) { | 64 | if (!strchr(name, '.')) { |
64 | name = lj_str_pushf(L, CLIB_SOEXT, name); | 65 | name = lj_strfmt_pushf(L, CLIB_SOEXT, name); |
65 | L->top--; | 66 | L->top--; |
66 | #if LJ_TARGET_CYGWIN | 67 | #if LJ_TARGET_CYGWIN |
67 | } else { | 68 | } else { |
@@ -70,7 +71,7 @@ static const char *clib_extname(lua_State *L, const char *name) | |||
70 | } | 71 | } |
71 | if (!(name[0] == CLIB_SOPREFIX[0] && name[1] == CLIB_SOPREFIX[1] && | 72 | if (!(name[0] == CLIB_SOPREFIX[0] && name[1] == CLIB_SOPREFIX[1] && |
72 | name[2] == CLIB_SOPREFIX[2])) { | 73 | name[2] == CLIB_SOPREFIX[2])) { |
73 | name = lj_str_pushf(L, CLIB_SOPREFIX "%s", name); | 74 | name = lj_strfmt_pushf(L, CLIB_SOPREFIX "%s", name); |
74 | L->top--; | 75 | L->top--; |
75 | } | 76 | } |
76 | } | 77 | } |
@@ -158,11 +159,13 @@ BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*); | |||
158 | /* Default libraries. */ | 159 | /* Default libraries. */ |
159 | enum { | 160 | enum { |
160 | CLIB_HANDLE_EXE, | 161 | CLIB_HANDLE_EXE, |
162 | #if !LJ_TARGET_UWP | ||
161 | CLIB_HANDLE_DLL, | 163 | CLIB_HANDLE_DLL, |
162 | CLIB_HANDLE_CRT, | 164 | CLIB_HANDLE_CRT, |
163 | CLIB_HANDLE_KERNEL32, | 165 | CLIB_HANDLE_KERNEL32, |
164 | CLIB_HANDLE_USER32, | 166 | CLIB_HANDLE_USER32, |
165 | CLIB_HANDLE_GDI32, | 167 | CLIB_HANDLE_GDI32, |
168 | #endif | ||
166 | CLIB_HANDLE_MAX | 169 | CLIB_HANDLE_MAX |
167 | }; | 170 | }; |
168 | 171 | ||
@@ -172,11 +175,19 @@ LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt, | |||
172 | const char *name) | 175 | const char *name) |
173 | { | 176 | { |
174 | DWORD err = GetLastError(); | 177 | DWORD err = GetLastError(); |
178 | #if LJ_TARGET_XBOXONE | ||
179 | wchar_t wbuf[128]; | ||
180 | char buf[128*2]; | ||
181 | if (!FormatMessageW(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM, | ||
182 | NULL, err, 0, wbuf, sizeof(wbuf)/sizeof(wchar_t), NULL) || | ||
183 | !WideCharToMultiByte(CP_ACP, 0, wbuf, 128, buf, 128*2, NULL, NULL)) | ||
184 | #else | ||
175 | char buf[128]; | 185 | char buf[128]; |
176 | if (!FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM, | 186 | if (!FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM, |
177 | NULL, err, 0, buf, sizeof(buf), NULL)) | 187 | NULL, err, 0, buf, sizeof(buf), NULL)) |
188 | #endif | ||
178 | buf[0] = '\0'; | 189 | buf[0] = '\0'; |
179 | lj_err_callermsg(L, lj_str_pushf(L, fmt, name, buf)); | 190 | lj_err_callermsg(L, lj_strfmt_pushf(L, fmt, name, buf)); |
180 | } | 191 | } |
181 | 192 | ||
182 | static int clib_needext(const char *s) | 193 | static int clib_needext(const char *s) |
@@ -191,7 +202,7 @@ static int clib_needext(const char *s) | |||
191 | static const char *clib_extname(lua_State *L, const char *name) | 202 | static const char *clib_extname(lua_State *L, const char *name) |
192 | { | 203 | { |
193 | if (clib_needext(name)) { | 204 | if (clib_needext(name)) { |
194 | name = lj_str_pushf(L, "%s.dll", name); | 205 | name = lj_strfmt_pushf(L, "%s.dll", name); |
195 | L->top--; | 206 | L->top--; |
196 | } | 207 | } |
197 | return name; | 208 | return name; |
@@ -200,7 +211,7 @@ static const char *clib_extname(lua_State *L, const char *name) | |||
200 | static void *clib_loadlib(lua_State *L, const char *name, int global) | 211 | static void *clib_loadlib(lua_State *L, const char *name, int global) |
201 | { | 212 | { |
202 | DWORD oldwerr = GetLastError(); | 213 | DWORD oldwerr = GetLastError(); |
203 | void *h = (void *)LoadLibraryA(clib_extname(L, name)); | 214 | void *h = LJ_WIN_LOADLIBA(clib_extname(L, name)); |
204 | if (!h) clib_error(L, "cannot load module " LUA_QS ": %s", name); | 215 | if (!h) clib_error(L, "cannot load module " LUA_QS ": %s", name); |
205 | SetLastError(oldwerr); | 216 | SetLastError(oldwerr); |
206 | UNUSED(global); | 217 | UNUSED(global); |
@@ -210,6 +221,7 @@ static void *clib_loadlib(lua_State *L, const char *name, int global) | |||
210 | static void clib_unloadlib(CLibrary *cl) | 221 | static void clib_unloadlib(CLibrary *cl) |
211 | { | 222 | { |
212 | if (cl->handle == CLIB_DEFHANDLE) { | 223 | if (cl->handle == CLIB_DEFHANDLE) { |
224 | #if !LJ_TARGET_UWP | ||
213 | MSize i; | 225 | MSize i; |
214 | for (i = CLIB_HANDLE_KERNEL32; i < CLIB_HANDLE_MAX; i++) { | 226 | for (i = CLIB_HANDLE_KERNEL32; i < CLIB_HANDLE_MAX; i++) { |
215 | void *h = clib_def_handle[i]; | 227 | void *h = clib_def_handle[i]; |
@@ -218,11 +230,16 @@ static void clib_unloadlib(CLibrary *cl) | |||
218 | FreeLibrary((HINSTANCE)h); | 230 | FreeLibrary((HINSTANCE)h); |
219 | } | 231 | } |
220 | } | 232 | } |
233 | #endif | ||
221 | } else if (cl->handle) { | 234 | } else if (cl->handle) { |
222 | FreeLibrary((HINSTANCE)cl->handle); | 235 | FreeLibrary((HINSTANCE)cl->handle); |
223 | } | 236 | } |
224 | } | 237 | } |
225 | 238 | ||
239 | #if LJ_TARGET_UWP | ||
240 | EXTERN_C IMAGE_DOS_HEADER __ImageBase; | ||
241 | #endif | ||
242 | |||
226 | static void *clib_getsym(CLibrary *cl, const char *name) | 243 | static void *clib_getsym(CLibrary *cl, const char *name) |
227 | { | 244 | { |
228 | void *p = NULL; | 245 | void *p = NULL; |
@@ -231,6 +248,9 @@ static void *clib_getsym(CLibrary *cl, const char *name) | |||
231 | for (i = 0; i < CLIB_HANDLE_MAX; i++) { | 248 | for (i = 0; i < CLIB_HANDLE_MAX; i++) { |
232 | HINSTANCE h = (HINSTANCE)clib_def_handle[i]; | 249 | HINSTANCE h = (HINSTANCE)clib_def_handle[i]; |
233 | if (!(void *)h) { /* Resolve default library handles (once). */ | 250 | if (!(void *)h) { /* Resolve default library handles (once). */ |
251 | #if LJ_TARGET_UWP | ||
252 | h = (HINSTANCE)&__ImageBase; | ||
253 | #else | ||
234 | switch (i) { | 254 | switch (i) { |
235 | case CLIB_HANDLE_EXE: GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, NULL, &h); break; | 255 | case CLIB_HANDLE_EXE: GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, NULL, &h); break; |
236 | case CLIB_HANDLE_DLL: | 256 | case CLIB_HANDLE_DLL: |
@@ -241,11 +261,12 @@ static void *clib_getsym(CLibrary *cl, const char *name) | |||
241 | GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, | 261 | GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, |
242 | (const char *)&_fmode, &h); | 262 | (const char *)&_fmode, &h); |
243 | break; | 263 | break; |
244 | case CLIB_HANDLE_KERNEL32: h = LoadLibraryA("kernel32.dll"); break; | 264 | case CLIB_HANDLE_KERNEL32: h = LJ_WIN_LOADLIBA("kernel32.dll"); break; |
245 | case CLIB_HANDLE_USER32: h = LoadLibraryA("user32.dll"); break; | 265 | case CLIB_HANDLE_USER32: h = LJ_WIN_LOADLIBA("user32.dll"); break; |
246 | case CLIB_HANDLE_GDI32: h = LoadLibraryA("gdi32.dll"); break; | 266 | case CLIB_HANDLE_GDI32: h = LJ_WIN_LOADLIBA("gdi32.dll"); break; |
247 | } | 267 | } |
248 | if (!h) continue; | 268 | if (!h) continue; |
269 | #endif | ||
249 | clib_def_handle[i] = (void *)h; | 270 | clib_def_handle[i] = (void *)h; |
250 | } | 271 | } |
251 | p = (void *)GetProcAddress(h, name); | 272 | p = (void *)GetProcAddress(h, name); |
@@ -264,7 +285,7 @@ static void *clib_getsym(CLibrary *cl, const char *name) | |||
264 | LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt, | 285 | LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt, |
265 | const char *name) | 286 | const char *name) |
266 | { | 287 | { |
267 | lj_err_callermsg(L, lj_str_pushf(L, fmt, name, "no support for this OS")); | 288 | lj_err_callermsg(L, lj_strfmt_pushf(L, fmt, name, "no support for this OS")); |
268 | } | 289 | } |
269 | 290 | ||
270 | static void *clib_loadlib(lua_State *L, const char *name, int global) | 291 | static void *clib_loadlib(lua_State *L, const char *name, int global) |
@@ -348,7 +369,7 @@ TValue *lj_clib_index(lua_State *L, CLibrary *cl, GCstr *name) | |||
348 | CTInfo cconv = ctype_cconv(ct->info); | 369 | CTInfo cconv = ctype_cconv(ct->info); |
349 | if (cconv == CTCC_FASTCALL || cconv == CTCC_STDCALL) { | 370 | if (cconv == CTCC_FASTCALL || cconv == CTCC_STDCALL) { |
350 | CTSize sz = clib_func_argsize(cts, ct); | 371 | CTSize sz = clib_func_argsize(cts, ct); |
351 | const char *symd = lj_str_pushf(L, | 372 | const char *symd = lj_strfmt_pushf(L, |
352 | cconv == CTCC_FASTCALL ? "@%s@%d" : "_%s@%d", | 373 | cconv == CTCC_FASTCALL ? "@%s@%d" : "_%s@%d", |
353 | sym, sz); | 374 | sym, sz); |
354 | L->top--; | 375 | L->top--; |
diff --git a/src/lj_cparse.c b/src/lj_cparse.c index 50bb76ad..70b82af3 100644 --- a/src/lj_cparse.c +++ b/src/lj_cparse.c | |||
@@ -9,13 +9,14 @@ | |||
9 | 9 | ||
10 | #include "lj_gc.h" | 10 | #include "lj_gc.h" |
11 | #include "lj_err.h" | 11 | #include "lj_err.h" |
12 | #include "lj_str.h" | 12 | #include "lj_buf.h" |
13 | #include "lj_ctype.h" | 13 | #include "lj_ctype.h" |
14 | #include "lj_cparse.h" | 14 | #include "lj_cparse.h" |
15 | #include "lj_frame.h" | 15 | #include "lj_frame.h" |
16 | #include "lj_vm.h" | 16 | #include "lj_vm.h" |
17 | #include "lj_char.h" | 17 | #include "lj_char.h" |
18 | #include "lj_strscan.h" | 18 | #include "lj_strscan.h" |
19 | #include "lj_strfmt.h" | ||
19 | 20 | ||
20 | /* | 21 | /* |
21 | ** Important note: this is NOT a validating C parser! This is a minimal | 22 | ** Important note: this is NOT a validating C parser! This is a minimal |
@@ -27,6 +28,24 @@ | |||
27 | ** If in doubt, please check the input against your favorite C compiler. | 28 | ** If in doubt, please check the input against your favorite C compiler. |
28 | */ | 29 | */ |
29 | 30 | ||
31 | /* -- Miscellaneous ------------------------------------------------------- */ | ||
32 | |||
33 | /* Match string against a C literal. */ | ||
34 | #define cp_str_is(str, k) \ | ||
35 | ((str)->len == sizeof(k)-1 && !memcmp(strdata(str), k, sizeof(k)-1)) | ||
36 | |||
37 | /* Check string against a linear list of matches. */ | ||
38 | int lj_cparse_case(GCstr *str, const char *match) | ||
39 | { | ||
40 | MSize len; | ||
41 | int n; | ||
42 | for (n = 0; (len = (MSize)*match++); n++, match += len) { | ||
43 | if (str->len == len && !memcmp(match, strdata(str), len)) | ||
44 | return n; | ||
45 | } | ||
46 | return -1; | ||
47 | } | ||
48 | |||
30 | /* -- C lexer ------------------------------------------------------------- */ | 49 | /* -- C lexer ------------------------------------------------------------- */ |
31 | 50 | ||
32 | /* C lexer token names. */ | 51 | /* C lexer token names. */ |
@@ -46,9 +65,9 @@ static const char *cp_tok2str(CPState *cp, CPToken tok) | |||
46 | if (tok > CTOK_OFS) | 65 | if (tok > CTOK_OFS) |
47 | return ctoknames[tok-CTOK_OFS-1]; | 66 | return ctoknames[tok-CTOK_OFS-1]; |
48 | else if (!lj_char_iscntrl(tok)) | 67 | else if (!lj_char_iscntrl(tok)) |
49 | return lj_str_pushf(cp->L, "%c", tok); | 68 | return lj_strfmt_pushf(cp->L, "%c", tok); |
50 | else | 69 | else |
51 | return lj_str_pushf(cp->L, "char(%d)", tok); | 70 | return lj_strfmt_pushf(cp->L, "char(%d)", tok); |
52 | } | 71 | } |
53 | 72 | ||
54 | /* End-of-line? */ | 73 | /* End-of-line? */ |
@@ -85,24 +104,10 @@ static LJ_NOINLINE CPChar cp_get_bs(CPState *cp) | |||
85 | return cp_get(cp); | 104 | return cp_get(cp); |
86 | } | 105 | } |
87 | 106 | ||
88 | /* Grow save buffer. */ | ||
89 | static LJ_NOINLINE void cp_save_grow(CPState *cp, CPChar c) | ||
90 | { | ||
91 | MSize newsize; | ||
92 | if (cp->sb.sz >= CPARSE_MAX_BUF/2) | ||
93 | cp_err(cp, LJ_ERR_XELEM); | ||
94 | newsize = cp->sb.sz * 2; | ||
95 | lj_str_resizebuf(cp->L, &cp->sb, newsize); | ||
96 | cp->sb.buf[cp->sb.n++] = (char)c; | ||
97 | } | ||
98 | |||
99 | /* Save character in buffer. */ | 107 | /* Save character in buffer. */ |
100 | static LJ_AINLINE void cp_save(CPState *cp, CPChar c) | 108 | static LJ_AINLINE void cp_save(CPState *cp, CPChar c) |
101 | { | 109 | { |
102 | if (LJ_UNLIKELY(cp->sb.n + 1 > cp->sb.sz)) | 110 | lj_buf_putb(&cp->sb, c); |
103 | cp_save_grow(cp, c); | ||
104 | else | ||
105 | cp->sb.buf[cp->sb.n++] = (char)c; | ||
106 | } | 111 | } |
107 | 112 | ||
108 | /* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */ | 113 | /* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */ |
@@ -122,20 +127,20 @@ LJ_NORET static void cp_errmsg(CPState *cp, CPToken tok, ErrMsg em, ...) | |||
122 | tokstr = NULL; | 127 | tokstr = NULL; |
123 | } else if (tok == CTOK_IDENT || tok == CTOK_INTEGER || tok == CTOK_STRING || | 128 | } else if (tok == CTOK_IDENT || tok == CTOK_INTEGER || tok == CTOK_STRING || |
124 | tok >= CTOK_FIRSTDECL) { | 129 | tok >= CTOK_FIRSTDECL) { |
125 | if (cp->sb.n == 0) cp_save(cp, '$'); | 130 | if (sbufP(&cp->sb) == sbufB(&cp->sb)) cp_save(cp, '$'); |
126 | cp_save(cp, '\0'); | 131 | cp_save(cp, '\0'); |
127 | tokstr = cp->sb.buf; | 132 | tokstr = sbufB(&cp->sb); |
128 | } else { | 133 | } else { |
129 | tokstr = cp_tok2str(cp, tok); | 134 | tokstr = cp_tok2str(cp, tok); |
130 | } | 135 | } |
131 | L = cp->L; | 136 | L = cp->L; |
132 | va_start(argp, em); | 137 | va_start(argp, em); |
133 | msg = lj_str_pushvf(L, err2msg(em), argp); | 138 | msg = lj_strfmt_pushvf(L, err2msg(em), argp); |
134 | va_end(argp); | 139 | va_end(argp); |
135 | if (tokstr) | 140 | if (tokstr) |
136 | msg = lj_str_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tokstr); | 141 | msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tokstr); |
137 | if (cp->linenumber > 1) | 142 | if (cp->linenumber > 1) |
138 | msg = lj_str_pushf(L, "%s at line %d", msg, cp->linenumber); | 143 | msg = lj_strfmt_pushf(L, "%s at line %d", msg, cp->linenumber); |
139 | lj_err_callermsg(L, msg); | 144 | lj_err_callermsg(L, msg); |
140 | } | 145 | } |
141 | 146 | ||
@@ -164,7 +169,7 @@ static CPToken cp_number(CPState *cp) | |||
164 | TValue o; | 169 | TValue o; |
165 | do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp))); | 170 | do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp))); |
166 | cp_save(cp, '\0'); | 171 | cp_save(cp, '\0'); |
167 | fmt = lj_strscan_scan((const uint8_t *)cp->sb.buf, &o, STRSCAN_OPT_C); | 172 | fmt = lj_strscan_scan((const uint8_t *)sbufB(&cp->sb), &o, STRSCAN_OPT_C); |
168 | if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32; | 173 | if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32; |
169 | else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32; | 174 | else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32; |
170 | else if (!(cp->mode & CPARSE_MODE_SKIP)) | 175 | else if (!(cp->mode & CPARSE_MODE_SKIP)) |
@@ -177,7 +182,7 @@ static CPToken cp_number(CPState *cp) | |||
177 | static CPToken cp_ident(CPState *cp) | 182 | static CPToken cp_ident(CPState *cp) |
178 | { | 183 | { |
179 | do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp))); | 184 | do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp))); |
180 | cp->str = lj_str_new(cp->L, cp->sb.buf, cp->sb.n); | 185 | cp->str = lj_buf_str(cp->L, &cp->sb); |
181 | cp->val.id = lj_ctype_getname(cp->cts, &cp->ct, cp->str, cp->tmask); | 186 | cp->val.id = lj_ctype_getname(cp->cts, &cp->ct, cp->str, cp->tmask); |
182 | if (ctype_type(cp->ct->info) == CT_KW) | 187 | if (ctype_type(cp->ct->info) == CT_KW) |
183 | return ctype_cid(cp->ct->info); | 188 | return ctype_cid(cp->ct->info); |
@@ -263,11 +268,11 @@ static CPToken cp_string(CPState *cp) | |||
263 | } | 268 | } |
264 | cp_get(cp); | 269 | cp_get(cp); |
265 | if (delim == '"') { | 270 | if (delim == '"') { |
266 | cp->str = lj_str_new(cp->L, cp->sb.buf, cp->sb.n); | 271 | cp->str = lj_buf_str(cp->L, &cp->sb); |
267 | return CTOK_STRING; | 272 | return CTOK_STRING; |
268 | } else { | 273 | } else { |
269 | if (cp->sb.n != 1) cp_err_token(cp, '\''); | 274 | if (sbuflen(&cp->sb) != 1) cp_err_token(cp, '\''); |
270 | cp->val.i32 = (int32_t)(char)cp->sb.buf[0]; | 275 | cp->val.i32 = (int32_t)(char)*sbufB(&cp->sb); |
271 | cp->val.id = CTID_INT32; | 276 | cp->val.id = CTID_INT32; |
272 | return CTOK_INTEGER; | 277 | return CTOK_INTEGER; |
273 | } | 278 | } |
@@ -296,7 +301,7 @@ static void cp_comment_cpp(CPState *cp) | |||
296 | /* Lexical scanner for C. Only a minimal subset is implemented. */ | 301 | /* Lexical scanner for C. Only a minimal subset is implemented. */ |
297 | static CPToken cp_next_(CPState *cp) | 302 | static CPToken cp_next_(CPState *cp) |
298 | { | 303 | { |
299 | lj_str_resetbuf(&cp->sb); | 304 | lj_buf_reset(&cp->sb); |
300 | for (;;) { | 305 | for (;;) { |
301 | if (lj_char_isident(cp->c)) | 306 | if (lj_char_isident(cp->c)) |
302 | return lj_char_isdigit(cp->c) ? cp_number(cp) : cp_ident(cp); | 307 | return lj_char_isdigit(cp->c) ? cp_number(cp) : cp_ident(cp); |
@@ -385,8 +390,7 @@ static void cp_init(CPState *cp) | |||
385 | cp->depth = 0; | 390 | cp->depth = 0; |
386 | cp->curpack = 0; | 391 | cp->curpack = 0; |
387 | cp->packstack[0] = 255; | 392 | cp->packstack[0] = 255; |
388 | lj_str_initbuf(&cp->sb); | 393 | lj_buf_init(cp->L, &cp->sb); |
389 | lj_str_resizebuf(cp->L, &cp->sb, LJ_MIN_SBUF); | ||
390 | lua_assert(cp->p != NULL); | 394 | lua_assert(cp->p != NULL); |
391 | cp_get(cp); /* Read-ahead first char. */ | 395 | cp_get(cp); /* Read-ahead first char. */ |
392 | cp->tok = 0; | 396 | cp->tok = 0; |
@@ -398,7 +402,7 @@ static void cp_init(CPState *cp) | |||
398 | static void cp_cleanup(CPState *cp) | 402 | static void cp_cleanup(CPState *cp) |
399 | { | 403 | { |
400 | global_State *g = G(cp->L); | 404 | global_State *g = G(cp->L); |
401 | lj_str_freebuf(g, &cp->sb); | 405 | lj_buf_free(g, &cp->sb); |
402 | } | 406 | } |
403 | 407 | ||
404 | /* Check and consume optional token. */ | 408 | /* Check and consume optional token. */ |
@@ -953,8 +957,6 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl) | |||
953 | 957 | ||
954 | /* -- C declaration parser ------------------------------------------------ */ | 958 | /* -- C declaration parser ------------------------------------------------ */ |
955 | 959 | ||
956 | #define H_(le, be) LJ_ENDIAN_SELECT(0x##le, 0x##be) | ||
957 | |||
958 | /* Reset declaration state to declaration specifier. */ | 960 | /* Reset declaration state to declaration specifier. */ |
959 | static void cp_decl_reset(CPDecl *decl) | 961 | static void cp_decl_reset(CPDecl *decl) |
960 | { | 962 | { |
@@ -1031,7 +1033,7 @@ static void cp_decl_asm(CPState *cp, CPDecl *decl) | |||
1031 | if (cp->tok == CTOK_STRING) { | 1033 | if (cp->tok == CTOK_STRING) { |
1032 | GCstr *str = cp->str; | 1034 | GCstr *str = cp->str; |
1033 | while (cp_next(cp) == CTOK_STRING) { | 1035 | while (cp_next(cp) == CTOK_STRING) { |
1034 | lj_str_pushf(cp->L, "%s%s", strdata(str), strdata(cp->str)); | 1036 | lj_strfmt_pushf(cp->L, "%s%s", strdata(str), strdata(cp->str)); |
1035 | cp->L->top--; | 1037 | cp->L->top--; |
1036 | str = strV(cp->L->top); | 1038 | str = strV(cp->L->top); |
1037 | } | 1039 | } |
@@ -1083,44 +1085,57 @@ static void cp_decl_gccattribute(CPState *cp, CPDecl *decl) | |||
1083 | if (cp->tok == CTOK_IDENT) { | 1085 | if (cp->tok == CTOK_IDENT) { |
1084 | GCstr *attrstr = cp->str; | 1086 | GCstr *attrstr = cp->str; |
1085 | cp_next(cp); | 1087 | cp_next(cp); |
1086 | switch (attrstr->hash) { | 1088 | switch (lj_cparse_case(attrstr, |
1087 | case H_(64a9208e,8ce14319): case H_(8e6331b2,95a282af): /* aligned */ | 1089 | "\007aligned" "\013__aligned__" |
1090 | "\006packed" "\012__packed__" | ||
1091 | "\004mode" "\010__mode__" | ||
1092 | "\013vector_size" "\017__vector_size__" | ||
1093 | #if LJ_TARGET_X86 | ||
1094 | "\007regparm" "\013__regparm__" | ||
1095 | "\005cdecl" "\011__cdecl__" | ||
1096 | "\010thiscall" "\014__thiscall__" | ||
1097 | "\010fastcall" "\014__fastcall__" | ||
1098 | "\007stdcall" "\013__stdcall__" | ||
1099 | "\012sseregparm" "\016__sseregparm__" | ||
1100 | #endif | ||
1101 | )) { | ||
1102 | case 0: case 1: /* aligned */ | ||
1088 | cp_decl_align(cp, decl); | 1103 | cp_decl_align(cp, decl); |
1089 | break; | 1104 | break; |
1090 | case H_(42eb47de,f0ede26c): case H_(29f48a09,cf383e0c): /* packed */ | 1105 | case 2: case 3: /* packed */ |
1091 | decl->attr |= CTFP_PACKED; | 1106 | decl->attr |= CTFP_PACKED; |
1092 | break; | 1107 | break; |
1093 | case H_(0a84eef6,8dfab04c): case H_(995cf92c,d5696591): /* mode */ | 1108 | case 4: case 5: /* mode */ |
1094 | cp_decl_mode(cp, decl); | 1109 | cp_decl_mode(cp, decl); |
1095 | break; | 1110 | break; |
1096 | case H_(0ab31997,2d5213fa): case H_(bf875611,200e9990): /* vector_size */ | 1111 | case 6: case 7: /* vector_size */ |
1097 | { | 1112 | { |
1098 | CTSize vsize = cp_decl_sizeattr(cp); | 1113 | CTSize vsize = cp_decl_sizeattr(cp); |
1099 | if (vsize) CTF_INSERT(decl->attr, VSIZEP, lj_fls(vsize)); | 1114 | if (vsize) CTF_INSERT(decl->attr, VSIZEP, lj_fls(vsize)); |
1100 | } | 1115 | } |
1101 | break; | 1116 | break; |
1102 | #if LJ_TARGET_X86 | 1117 | #if LJ_TARGET_X86 |
1103 | case H_(5ad22db8,c689b848): case H_(439150fa,65ea78cb): /* regparm */ | 1118 | case 8: case 9: /* regparm */ |
1104 | CTF_INSERT(decl->fattr, REGPARM, cp_decl_sizeattr(cp)); | 1119 | CTF_INSERT(decl->fattr, REGPARM, cp_decl_sizeattr(cp)); |
1105 | decl->fattr |= CTFP_CCONV; | 1120 | decl->fattr |= CTFP_CCONV; |
1106 | break; | 1121 | break; |
1107 | case H_(18fc0b98,7ff4c074): case H_(4e62abed,0a747424): /* cdecl */ | 1122 | case 10: case 11: /* cdecl */ |
1108 | CTF_INSERT(decl->fattr, CCONV, CTCC_CDECL); | 1123 | CTF_INSERT(decl->fattr, CCONV, CTCC_CDECL); |
1109 | decl->fattr |= CTFP_CCONV; | 1124 | decl->fattr |= CTFP_CCONV; |
1110 | break; | 1125 | break; |
1111 | case H_(72b2e41b,494c5a44): case H_(f2356d59,f25fc9bd): /* thiscall */ | 1126 | case 12: case 13: /* thiscall */ |
1112 | CTF_INSERT(decl->fattr, CCONV, CTCC_THISCALL); | 1127 | CTF_INSERT(decl->fattr, CCONV, CTCC_THISCALL); |
1113 | decl->fattr |= CTFP_CCONV; | 1128 | decl->fattr |= CTFP_CCONV; |
1114 | break; | 1129 | break; |
1115 | case H_(0d0ffc42,ab746f88): case H_(21c54ba1,7f0ca7e3): /* fastcall */ | 1130 | case 14: case 15: /* fastcall */ |
1116 | CTF_INSERT(decl->fattr, CCONV, CTCC_FASTCALL); | 1131 | CTF_INSERT(decl->fattr, CCONV, CTCC_FASTCALL); |
1117 | decl->fattr |= CTFP_CCONV; | 1132 | decl->fattr |= CTFP_CCONV; |
1118 | break; | 1133 | break; |
1119 | case H_(ef76b040,9412e06a): case H_(de56697b,c750e6e1): /* stdcall */ | 1134 | case 16: case 17: /* stdcall */ |
1120 | CTF_INSERT(decl->fattr, CCONV, CTCC_STDCALL); | 1135 | CTF_INSERT(decl->fattr, CCONV, CTCC_STDCALL); |
1121 | decl->fattr |= CTFP_CCONV; | 1136 | decl->fattr |= CTFP_CCONV; |
1122 | break; | 1137 | break; |
1123 | case H_(ea78b622,f234bd8e): case H_(252ffb06,8d50f34b): /* sseregparm */ | 1138 | case 18: case 19: /* sseregparm */ |
1124 | decl->fattr |= CTF_SSEREGPARM; | 1139 | decl->fattr |= CTF_SSEREGPARM; |
1125 | decl->fattr |= CTFP_CCONV; | 1140 | decl->fattr |= CTFP_CCONV; |
1126 | break; | 1141 | break; |
@@ -1152,16 +1167,13 @@ static void cp_decl_msvcattribute(CPState *cp, CPDecl *decl) | |||
1152 | while (cp->tok == CTOK_IDENT) { | 1167 | while (cp->tok == CTOK_IDENT) { |
1153 | GCstr *attrstr = cp->str; | 1168 | GCstr *attrstr = cp->str; |
1154 | cp_next(cp); | 1169 | cp_next(cp); |
1155 | switch (attrstr->hash) { | 1170 | if (cp_str_is(attrstr, "align")) { |
1156 | case H_(bc2395fa,98f267f8): /* align */ | ||
1157 | cp_decl_align(cp, decl); | 1171 | cp_decl_align(cp, decl); |
1158 | break; | 1172 | } else { /* Ignore all other attributes. */ |
1159 | default: /* Ignore all other attributes. */ | ||
1160 | if (cp_opt(cp, '(')) { | 1173 | if (cp_opt(cp, '(')) { |
1161 | while (cp->tok != ')' && cp->tok != CTOK_EOF) cp_next(cp); | 1174 | while (cp->tok != ')' && cp->tok != CTOK_EOF) cp_next(cp); |
1162 | cp_check(cp, ')'); | 1175 | cp_check(cp, ')'); |
1163 | } | 1176 | } |
1164 | break; | ||
1165 | } | 1177 | } |
1166 | } | 1178 | } |
1167 | cp_check(cp, ')'); | 1179 | cp_check(cp, ')'); |
@@ -1741,17 +1753,16 @@ static CTypeID cp_decl_abstract(CPState *cp) | |||
1741 | static void cp_pragma(CPState *cp, BCLine pragmaline) | 1753 | static void cp_pragma(CPState *cp, BCLine pragmaline) |
1742 | { | 1754 | { |
1743 | cp_next(cp); | 1755 | cp_next(cp); |
1744 | if (cp->tok == CTOK_IDENT && | 1756 | if (cp->tok == CTOK_IDENT && cp_str_is(cp->str, "pack")) { |
1745 | cp->str->hash == H_(e79b999f,42ca3e85)) { /* pack */ | ||
1746 | cp_next(cp); | 1757 | cp_next(cp); |
1747 | cp_check(cp, '('); | 1758 | cp_check(cp, '('); |
1748 | if (cp->tok == CTOK_IDENT) { | 1759 | if (cp->tok == CTOK_IDENT) { |
1749 | if (cp->str->hash == H_(738e923c,a1b65954)) { /* push */ | 1760 | if (cp_str_is(cp->str, "push")) { |
1750 | if (cp->curpack < CPARSE_MAX_PACKSTACK) { | 1761 | if (cp->curpack < CPARSE_MAX_PACKSTACK) { |
1751 | cp->packstack[cp->curpack+1] = cp->packstack[cp->curpack]; | 1762 | cp->packstack[cp->curpack+1] = cp->packstack[cp->curpack]; |
1752 | cp->curpack++; | 1763 | cp->curpack++; |
1753 | } | 1764 | } |
1754 | } else if (cp->str->hash == H_(6c71cf27,6c71cf27)) { /* pop */ | 1765 | } else if (cp_str_is(cp->str, "pop")) { |
1755 | if (cp->curpack > 0) cp->curpack--; | 1766 | if (cp->curpack > 0) cp->curpack--; |
1756 | } else { | 1767 | } else { |
1757 | cp_errmsg(cp, cp->tok, LJ_ERR_XSYMBOL); | 1768 | cp_errmsg(cp, cp->tok, LJ_ERR_XSYMBOL); |
@@ -1773,6 +1784,16 @@ static void cp_pragma(CPState *cp, BCLine pragmaline) | |||
1773 | } | 1784 | } |
1774 | } | 1785 | } |
1775 | 1786 | ||
1787 | /* Handle line number. */ | ||
1788 | static void cp_line(CPState *cp, BCLine hashline) | ||
1789 | { | ||
1790 | BCLine newline = cp->val.u32; | ||
1791 | /* TODO: Handle file name and include it in error messages. */ | ||
1792 | while (cp->tok != CTOK_EOF && cp->linenumber == hashline) | ||
1793 | cp_next(cp); | ||
1794 | cp->linenumber = newline; | ||
1795 | } | ||
1796 | |||
1776 | /* Parse multiple C declarations of types or extern identifiers. */ | 1797 | /* Parse multiple C declarations of types or extern identifiers. */ |
1777 | static void cp_decl_multi(CPState *cp) | 1798 | static void cp_decl_multi(CPState *cp) |
1778 | { | 1799 | { |
@@ -1785,12 +1806,21 @@ static void cp_decl_multi(CPState *cp) | |||
1785 | continue; | 1806 | continue; |
1786 | } | 1807 | } |
1787 | if (cp->tok == '#') { /* Workaround, since we have no preprocessor, yet. */ | 1808 | if (cp->tok == '#') { /* Workaround, since we have no preprocessor, yet. */ |
1788 | BCLine pragmaline = cp->linenumber; | 1809 | BCLine hashline = cp->linenumber; |
1789 | if (!(cp_next(cp) == CTOK_IDENT && | 1810 | CPToken tok = cp_next(cp); |
1790 | cp->str->hash == H_(f5e6b4f8,1d509107))) /* pragma */ | 1811 | if (tok == CTOK_INTEGER) { |
1812 | cp_line(cp, hashline); | ||
1813 | continue; | ||
1814 | } else if (tok == CTOK_IDENT && cp_str_is(cp->str, "line")) { | ||
1815 | if (cp_next(cp) != CTOK_INTEGER) cp_err_token(cp, tok); | ||
1816 | cp_line(cp, hashline); | ||
1817 | continue; | ||
1818 | } else if (tok == CTOK_IDENT && cp_str_is(cp->str, "pragma")) { | ||
1819 | cp_pragma(cp, hashline); | ||
1820 | continue; | ||
1821 | } else { | ||
1791 | cp_errmsg(cp, cp->tok, LJ_ERR_XSYMBOL); | 1822 | cp_errmsg(cp, cp->tok, LJ_ERR_XSYMBOL); |
1792 | cp_pragma(cp, pragmaline); | 1823 | } |
1793 | continue; | ||
1794 | } | 1824 | } |
1795 | scl = cp_decl_spec(cp, &decl, CDF_TYPEDEF|CDF_EXTERN|CDF_STATIC); | 1825 | scl = cp_decl_spec(cp, &decl, CDF_TYPEDEF|CDF_EXTERN|CDF_STATIC); |
1796 | if ((cp->tok == ';' || cp->tok == CTOK_EOF) && | 1826 | if ((cp->tok == ';' || cp->tok == CTOK_EOF) && |
@@ -1856,8 +1886,6 @@ static void cp_decl_single(CPState *cp) | |||
1856 | if (cp->tok != CTOK_EOF) cp_err_token(cp, CTOK_EOF); | 1886 | if (cp->tok != CTOK_EOF) cp_err_token(cp, CTOK_EOF); |
1857 | } | 1887 | } |
1858 | 1888 | ||
1859 | #undef H_ | ||
1860 | |||
1861 | /* ------------------------------------------------------------------------ */ | 1889 | /* ------------------------------------------------------------------------ */ |
1862 | 1890 | ||
1863 | /* Protected callback for C parser. */ | 1891 | /* Protected callback for C parser. */ |
diff --git a/src/lj_cparse.h b/src/lj_cparse.h index 87eb3ff4..5f667a7c 100644 --- a/src/lj_cparse.h +++ b/src/lj_cparse.h | |||
@@ -60,6 +60,8 @@ typedef struct CPState { | |||
60 | 60 | ||
61 | LJ_FUNC int lj_cparse(CPState *cp); | 61 | LJ_FUNC int lj_cparse(CPState *cp); |
62 | 62 | ||
63 | LJ_FUNC int lj_cparse_case(GCstr *str, const char *match); | ||
64 | |||
63 | #endif | 65 | #endif |
64 | 66 | ||
65 | #endif | 67 | #endif |
diff --git a/src/lj_crecord.c b/src/lj_crecord.c index 99344b79..6e999cc9 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c | |||
@@ -11,13 +11,13 @@ | |||
11 | #if LJ_HASJIT && LJ_HASFFI | 11 | #if LJ_HASJIT && LJ_HASFFI |
12 | 12 | ||
13 | #include "lj_err.h" | 13 | #include "lj_err.h" |
14 | #include "lj_str.h" | ||
15 | #include "lj_tab.h" | 14 | #include "lj_tab.h" |
16 | #include "lj_frame.h" | 15 | #include "lj_frame.h" |
17 | #include "lj_ctype.h" | 16 | #include "lj_ctype.h" |
18 | #include "lj_cdata.h" | 17 | #include "lj_cdata.h" |
19 | #include "lj_cparse.h" | 18 | #include "lj_cparse.h" |
20 | #include "lj_cconv.h" | 19 | #include "lj_cconv.h" |
20 | #include "lj_carith.h" | ||
21 | #include "lj_clib.h" | 21 | #include "lj_clib.h" |
22 | #include "lj_ccall.h" | 22 | #include "lj_ccall.h" |
23 | #include "lj_ff.h" | 23 | #include "lj_ff.h" |
@@ -31,6 +31,7 @@ | |||
31 | #include "lj_snap.h" | 31 | #include "lj_snap.h" |
32 | #include "lj_crecord.h" | 32 | #include "lj_crecord.h" |
33 | #include "lj_dispatch.h" | 33 | #include "lj_dispatch.h" |
34 | #include "lj_strfmt.h" | ||
34 | 35 | ||
35 | /* Some local macros to save typing. Undef'd at the end. */ | 36 | /* Some local macros to save typing. Undef'd at the end. */ |
36 | #define IR(ref) (&J->cur.ir[(ref)]) | 37 | #define IR(ref) (&J->cur.ir[(ref)]) |
@@ -211,7 +212,7 @@ static void crec_copy_emit(jit_State *J, CRecMemList *ml, MSize mlp, | |||
211 | ml[i].trval = emitir(IRT(IR_XLOAD, ml[i].tp), trsptr, 0); | 212 | ml[i].trval = emitir(IRT(IR_XLOAD, ml[i].tp), trsptr, 0); |
212 | ml[i].trofs = trofs; | 213 | ml[i].trofs = trofs; |
213 | i++; | 214 | i++; |
214 | rwin += (LJ_SOFTFP && ml[i].tp == IRT_NUM) ? 2 : 1; | 215 | rwin += (LJ_SOFTFP32 && ml[i].tp == IRT_NUM) ? 2 : 1; |
215 | if (rwin >= CREC_COPY_REGWIN || i >= mlp) { /* Flush buffered stores. */ | 216 | if (rwin >= CREC_COPY_REGWIN || i >= mlp) { /* Flush buffered stores. */ |
216 | rwin = 0; | 217 | rwin = 0; |
217 | for ( ; j < i; j++) { | 218 | for ( ; j < i; j++) { |
@@ -441,7 +442,7 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp, | |||
441 | /* fallthrough */ | 442 | /* fallthrough */ |
442 | case CCX(I, F): | 443 | case CCX(I, F): |
443 | if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi; | 444 | if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi; |
444 | sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_TRUNC|IRCONV_ANY); | 445 | sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_ANY); |
445 | goto xstore; | 446 | goto xstore; |
446 | case CCX(I, P): | 447 | case CCX(I, P): |
447 | case CCX(I, A): | 448 | case CCX(I, A): |
@@ -521,7 +522,7 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp, | |||
521 | if (st == IRT_CDATA) goto err_nyi; | 522 | if (st == IRT_CDATA) goto err_nyi; |
522 | /* The signed conversion is cheaper. x64 really has 47 bit pointers. */ | 523 | /* The signed conversion is cheaper. x64 really has 47 bit pointers. */ |
523 | sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32, | 524 | sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32, |
524 | st, IRCONV_TRUNC|IRCONV_ANY); | 525 | st, IRCONV_ANY); |
525 | goto xstore; | 526 | goto xstore; |
526 | 527 | ||
527 | /* Destination is an array. */ | 528 | /* Destination is an array. */ |
@@ -640,12 +641,23 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval) | |||
640 | sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCstr))); | 641 | sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCstr))); |
641 | sid = CTID_A_CCHAR; | 642 | sid = CTID_A_CCHAR; |
642 | } | 643 | } |
643 | } else { /* NYI: tref_istab(sp), tref_islightud(sp). */ | 644 | } else if (tref_islightud(sp)) { |
645 | #if LJ_64 | ||
646 | sp = emitir(IRT(IR_BAND, IRT_P64), sp, | ||
647 | lj_ir_kint64(J, U64x(00007fff,ffffffff))); | ||
648 | #endif | ||
649 | } else { /* NYI: tref_istab(sp). */ | ||
644 | IRType t; | 650 | IRType t; |
645 | sid = argv2cdata(J, sp, sval)->ctypeid; | 651 | sid = argv2cdata(J, sp, sval)->ctypeid; |
646 | s = ctype_raw(cts, sid); | 652 | s = ctype_raw(cts, sid); |
647 | svisnz = cdataptr(cdataV(sval)); | 653 | svisnz = cdataptr(cdataV(sval)); |
648 | t = crec_ct2irt(cts, s); | 654 | if (ctype_isfunc(s->info)) { |
655 | sid = lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|sid), CTSIZE_PTR); | ||
656 | s = ctype_get(cts, sid); | ||
657 | t = IRT_PTR; | ||
658 | } else { | ||
659 | t = crec_ct2irt(cts, s); | ||
660 | } | ||
649 | if (ctype_isptr(s->info)) { | 661 | if (ctype_isptr(s->info)) { |
650 | sp = emitir(IRT(IR_FLOAD, t), sp, IRFL_CDATA_PTR); | 662 | sp = emitir(IRT(IR_FLOAD, t), sp, IRFL_CDATA_PTR); |
651 | if (ctype_isref(s->info)) { | 663 | if (ctype_isref(s->info)) { |
@@ -700,6 +712,19 @@ static TRef crec_reassoc_ofs(jit_State *J, TRef tr, ptrdiff_t *ofsp, MSize sz) | |||
700 | return tr; | 712 | return tr; |
701 | } | 713 | } |
702 | 714 | ||
715 | /* Tailcall to function. */ | ||
716 | static void crec_tailcall(jit_State *J, RecordFFData *rd, cTValue *tv) | ||
717 | { | ||
718 | TRef kfunc = lj_ir_kfunc(J, funcV(tv)); | ||
719 | #if LJ_FR2 | ||
720 | J->base[-2] = kfunc; | ||
721 | J->base[-1] = TREF_FRAME; | ||
722 | #else | ||
723 | J->base[-1] = kfunc | TREF_FRAME; | ||
724 | #endif | ||
725 | rd->nres = -1; /* Pending tailcall. */ | ||
726 | } | ||
727 | |||
703 | /* Record ctype __index/__newindex metamethods. */ | 728 | /* Record ctype __index/__newindex metamethods. */ |
704 | static void crec_index_meta(jit_State *J, CTState *cts, CType *ct, | 729 | static void crec_index_meta(jit_State *J, CTState *cts, CType *ct, |
705 | RecordFFData *rd) | 730 | RecordFFData *rd) |
@@ -709,8 +734,7 @@ static void crec_index_meta(jit_State *J, CTState *cts, CType *ct, | |||
709 | if (!tv) | 734 | if (!tv) |
710 | lj_trace_err(J, LJ_TRERR_BADTYPE); | 735 | lj_trace_err(J, LJ_TRERR_BADTYPE); |
711 | if (tvisfunc(tv)) { | 736 | if (tvisfunc(tv)) { |
712 | J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; | 737 | crec_tailcall(J, rd, tv); |
713 | rd->nres = -1; /* Pending tailcall. */ | ||
714 | } else if (rd->data == 0 && tvistab(tv) && tref_isstr(J->base[1])) { | 738 | } else if (rd->data == 0 && tvistab(tv) && tref_isstr(J->base[1])) { |
715 | /* Specialize to result of __index lookup. */ | 739 | /* Specialize to result of __index lookup. */ |
716 | cTValue *o = lj_tab_get(J->L, tabV(tv), &rd->argv[1]); | 740 | cTValue *o = lj_tab_get(J->L, tabV(tv), &rd->argv[1]); |
@@ -727,6 +751,48 @@ static void crec_index_meta(jit_State *J, CTState *cts, CType *ct, | |||
727 | } | 751 | } |
728 | } | 752 | } |
729 | 753 | ||
754 | /* Record bitfield load/store. */ | ||
755 | static void crec_index_bf(jit_State *J, RecordFFData *rd, TRef ptr, CTInfo info) | ||
756 | { | ||
757 | IRType t = IRT_I8 + 2*lj_fls(ctype_bitcsz(info)) + ((info&CTF_UNSIGNED)?1:0); | ||
758 | TRef tr = emitir(IRT(IR_XLOAD, t), ptr, 0); | ||
759 | CTSize pos = ctype_bitpos(info), bsz = ctype_bitbsz(info), shift = 32 - bsz; | ||
760 | lua_assert(t <= IRT_U32); /* NYI: 64 bit bitfields. */ | ||
761 | if (rd->data == 0) { /* __index metamethod. */ | ||
762 | if ((info & CTF_BOOL)) { | ||
763 | tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << pos)))); | ||
764 | /* Assume not equal to zero. Fixup and emit pending guard later. */ | ||
765 | lj_ir_set(J, IRTGI(IR_NE), tr, lj_ir_kint(J, 0)); | ||
766 | J->postproc = LJ_POST_FIXGUARD; | ||
767 | tr = TREF_TRUE; | ||
768 | } else if (!(info & CTF_UNSIGNED)) { | ||
769 | tr = emitir(IRTI(IR_BSHL), tr, lj_ir_kint(J, shift - pos)); | ||
770 | tr = emitir(IRTI(IR_BSAR), tr, lj_ir_kint(J, shift)); | ||
771 | } else { | ||
772 | lua_assert(bsz < 32); /* Full-size fields cannot end up here. */ | ||
773 | tr = emitir(IRTI(IR_BSHR), tr, lj_ir_kint(J, pos)); | ||
774 | tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << bsz)-1))); | ||
775 | /* We can omit the U32 to NUM conversion, since bsz < 32. */ | ||
776 | } | ||
777 | J->base[0] = tr; | ||
778 | } else { /* __newindex metamethod. */ | ||
779 | CTState *cts = ctype_ctsG(J2G(J)); | ||
780 | CType *ct = ctype_get(cts, | ||
781 | (info & CTF_BOOL) ? CTID_BOOL : | ||
782 | (info & CTF_UNSIGNED) ? CTID_UINT32 : CTID_INT32); | ||
783 | int32_t mask = (int32_t)(((1u << bsz)-1) << pos); | ||
784 | TRef sp = crec_ct_tv(J, ct, 0, J->base[2], &rd->argv[2]); | ||
785 | sp = emitir(IRTI(IR_BSHL), sp, lj_ir_kint(J, pos)); | ||
786 | /* Use of the target type avoids forwarding conversions. */ | ||
787 | sp = emitir(IRT(IR_BAND, t), sp, lj_ir_kint(J, mask)); | ||
788 | tr = emitir(IRT(IR_BAND, t), tr, lj_ir_kint(J, (int32_t)~mask)); | ||
789 | tr = emitir(IRT(IR_BOR, t), tr, sp); | ||
790 | emitir(IRT(IR_XSTORE, t), ptr, tr); | ||
791 | rd->nres = 0; | ||
792 | J->needsnap = 1; | ||
793 | } | ||
794 | } | ||
795 | |||
730 | void LJ_FASTCALL recff_cdata_index(jit_State *J, RecordFFData *rd) | 796 | void LJ_FASTCALL recff_cdata_index(jit_State *J, RecordFFData *rd) |
731 | { | 797 | { |
732 | TRef idx, ptr = J->base[0]; | 798 | TRef idx, ptr = J->base[0]; |
@@ -801,6 +867,7 @@ again: | |||
801 | CType *fct; | 867 | CType *fct; |
802 | fct = lj_ctype_getfield(cts, ct, name, &fofs); | 868 | fct = lj_ctype_getfield(cts, ct, name, &fofs); |
803 | if (fct) { | 869 | if (fct) { |
870 | ofs += (ptrdiff_t)fofs; | ||
804 | /* Always specialize to the field name. */ | 871 | /* Always specialize to the field name. */ |
805 | emitir(IRTG(IR_EQ, IRT_STR), idx, lj_ir_kstr(J, name)); | 872 | emitir(IRTG(IR_EQ, IRT_STR), idx, lj_ir_kstr(J, name)); |
806 | if (ctype_isconstval(fct->info)) { | 873 | if (ctype_isconstval(fct->info)) { |
@@ -812,12 +879,14 @@ again: | |||
812 | J->base[0] = lj_ir_kint(J, (int32_t)fct->size); | 879 | J->base[0] = lj_ir_kint(J, (int32_t)fct->size); |
813 | return; /* Interpreter will throw for newindex. */ | 880 | return; /* Interpreter will throw for newindex. */ |
814 | } else if (ctype_isbitfield(fct->info)) { | 881 | } else if (ctype_isbitfield(fct->info)) { |
815 | lj_trace_err(J, LJ_TRERR_NYICONV); | 882 | if (ofs) |
883 | ptr = emitir(IRT(IR_ADD, IRT_PTR), ptr, lj_ir_kintp(J, ofs)); | ||
884 | crec_index_bf(J, rd, ptr, fct->info); | ||
885 | return; | ||
816 | } else { | 886 | } else { |
817 | lua_assert(ctype_isfield(fct->info)); | 887 | lua_assert(ctype_isfield(fct->info)); |
818 | sid = ctype_cid(fct->info); | 888 | sid = ctype_cid(fct->info); |
819 | } | 889 | } |
820 | ofs += (ptrdiff_t)fofs; | ||
821 | } | 890 | } |
822 | } else if (ctype_iscomplex(ct->info)) { | 891 | } else if (ctype_iscomplex(ct->info)) { |
823 | if (name->len == 2 && | 892 | if (name->len == 2 && |
@@ -867,21 +936,17 @@ again: | |||
867 | } | 936 | } |
868 | 937 | ||
869 | /* Record setting a finalizer. */ | 938 | /* Record setting a finalizer. */ |
870 | static void crec_finalizer(jit_State *J, TRef trcd, cTValue *fin) | 939 | static void crec_finalizer(jit_State *J, TRef trcd, TRef trfin, cTValue *fin) |
871 | { | 940 | { |
872 | TRef trlo = lj_ir_call(J, IRCALL_lj_cdata_setfin, trcd); | 941 | if (tvisgcv(fin)) { |
873 | TRef trhi = emitir(IRT(IR_ADD, IRT_P32), trlo, lj_ir_kint(J, 4)); | 942 | if (!trfin) trfin = lj_ir_kptr(J, gcval(fin)); |
874 | if (LJ_BE) { TRef tmp = trlo; trlo = trhi; trhi = tmp; } | 943 | } else if (tvisnil(fin)) { |
875 | if (tvisfunc(fin)) { | 944 | trfin = lj_ir_kptr(J, NULL); |
876 | emitir(IRT(IR_XSTORE, IRT_P32), trlo, lj_ir_kfunc(J, funcV(fin))); | ||
877 | emitir(IRTI(IR_XSTORE), trhi, lj_ir_kint(J, LJ_TFUNC)); | ||
878 | } else if (tviscdata(fin)) { | ||
879 | emitir(IRT(IR_XSTORE, IRT_P32), trlo, | ||
880 | lj_ir_kgc(J, obj2gco(cdataV(fin)), IRT_CDATA)); | ||
881 | emitir(IRTI(IR_XSTORE), trhi, lj_ir_kint(J, LJ_TCDATA)); | ||
882 | } else { | 945 | } else { |
883 | lj_trace_err(J, LJ_TRERR_BADTYPE); | 946 | lj_trace_err(J, LJ_TRERR_BADTYPE); |
884 | } | 947 | } |
948 | lj_ir_call(J, IRCALL_lj_cdata_setfin, trcd, | ||
949 | trfin, lj_ir_kint(J, (int32_t)itype(fin))); | ||
885 | J->needsnap = 1; | 950 | J->needsnap = 1; |
886 | } | 951 | } |
887 | 952 | ||
@@ -892,10 +957,8 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id) | |||
892 | CTSize sz; | 957 | CTSize sz; |
893 | CTInfo info = lj_ctype_info(cts, id, &sz); | 958 | CTInfo info = lj_ctype_info(cts, id, &sz); |
894 | CType *d = ctype_raw(cts, id); | 959 | CType *d = ctype_raw(cts, id); |
895 | TRef trid; | 960 | TRef trcd, trid = lj_ir_kint(J, id); |
896 | if (!sz || sz > 128 || (info & CTF_VLA) || ctype_align(info) > CT_MEMALIGN) | 961 | cTValue *fin; |
897 | lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: large/special allocations. */ | ||
898 | trid = lj_ir_kint(J, id); | ||
899 | /* Use special instruction to box pointer or 32/64 bit integer. */ | 962 | /* Use special instruction to box pointer or 32/64 bit integer. */ |
900 | if (ctype_isptr(info) || (ctype_isinteger(info) && (sz == 4 || sz == 8))) { | 963 | if (ctype_isptr(info) || (ctype_isinteger(info) && (sz == 4 || sz == 8))) { |
901 | TRef sp = J->base[1] ? crec_ct_tv(J, d, 0, J->base[1], &rd->argv[1]) : | 964 | TRef sp = J->base[1] ? crec_ct_tv(J, d, 0, J->base[1], &rd->argv[1]) : |
@@ -903,11 +966,36 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id) | |||
903 | sz == 4 ? lj_ir_kint(J, 0) : | 966 | sz == 4 ? lj_ir_kint(J, 0) : |
904 | (lj_needsplit(J), lj_ir_kint64(J, 0)); | 967 | (lj_needsplit(J), lj_ir_kint64(J, 0)); |
905 | J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, sp); | 968 | J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, sp); |
969 | return; | ||
906 | } else { | 970 | } else { |
907 | TRef trcd = emitir(IRTG(IR_CNEW, IRT_CDATA), trid, TREF_NIL); | 971 | TRef trsz = TREF_NIL; |
908 | cTValue *fin; | 972 | if ((info & CTF_VLA)) { /* Calculate VLA/VLS size at runtime. */ |
909 | J->base[0] = trcd; | 973 | CTSize sz0, sz1; |
910 | if (J->base[1] && !J->base[2] && | 974 | if (!J->base[1] || J->base[2]) |
975 | lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init VLA/VLS. */ | ||
976 | trsz = crec_ct_tv(J, ctype_get(cts, CTID_INT32), 0, | ||
977 | J->base[1], &rd->argv[1]); | ||
978 | sz0 = lj_ctype_vlsize(cts, d, 0); | ||
979 | sz1 = lj_ctype_vlsize(cts, d, 1); | ||
980 | trsz = emitir(IRTGI(IR_MULOV), trsz, lj_ir_kint(J, (int32_t)(sz1-sz0))); | ||
981 | trsz = emitir(IRTGI(IR_ADDOV), trsz, lj_ir_kint(J, (int32_t)sz0)); | ||
982 | J->base[1] = 0; /* Simplify logic below. */ | ||
983 | } else if (ctype_align(info) > CT_MEMALIGN) { | ||
984 | trsz = lj_ir_kint(J, sz); | ||
985 | } | ||
986 | trcd = emitir(IRTG(IR_CNEW, IRT_CDATA), trid, trsz); | ||
987 | if (sz > 128 || (info & CTF_VLA)) { | ||
988 | TRef dp; | ||
989 | CTSize align; | ||
990 | special: /* Only handle bulk zero-fill for large/VLA/VLS types. */ | ||
991 | if (J->base[1]) | ||
992 | lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init large/VLA/VLS types. */ | ||
993 | dp = emitir(IRT(IR_ADD, IRT_PTR), trcd, lj_ir_kintp(J, sizeof(GCcdata))); | ||
994 | if (trsz == TREF_NIL) trsz = lj_ir_kint(J, sz); | ||
995 | align = ctype_align(info); | ||
996 | if (align < CT_MEMALIGN) align = CT_MEMALIGN; | ||
997 | crec_fill(J, dp, trsz, lj_ir_kint(J, 0), (1u << align)); | ||
998 | } else if (J->base[1] && !J->base[2] && | ||
911 | !lj_cconv_multi_init(cts, d, &rd->argv[1])) { | 999 | !lj_cconv_multi_init(cts, d, &rd->argv[1])) { |
912 | goto single_init; | 1000 | goto single_init; |
913 | } else if (ctype_isarray(d->info)) { | 1001 | } else if (ctype_isarray(d->info)) { |
@@ -918,8 +1006,9 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id) | |||
918 | TValue *sval = &tv; | 1006 | TValue *sval = &tv; |
919 | MSize i; | 1007 | MSize i; |
920 | tv.u64 = 0; | 1008 | tv.u64 = 0; |
921 | if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info))) | 1009 | if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info)) || |
922 | lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init array of aggregates. */ | 1010 | esize * CREC_FILL_MAXUNROLL < sz) |
1011 | goto special; | ||
923 | for (i = 1, ofs = 0; ofs < sz; ofs += esize) { | 1012 | for (i = 1, ofs = 0; ofs < sz; ofs += esize) { |
924 | TRef dp = emitir(IRT(IR_ADD, IRT_PTR), trcd, | 1013 | TRef dp = emitir(IRT(IR_ADD, IRT_PTR), trcd, |
925 | lj_ir_kintp(J, ofs + sizeof(GCcdata))); | 1014 | lj_ir_kintp(J, ofs + sizeof(GCcdata))); |
@@ -976,11 +1065,12 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id) | |||
976 | crec_ct_tv(J, d, dp, lj_ir_kint(J, 0), &tv); | 1065 | crec_ct_tv(J, d, dp, lj_ir_kint(J, 0), &tv); |
977 | } | 1066 | } |
978 | } | 1067 | } |
979 | /* Handle __gc metamethod. */ | ||
980 | fin = lj_ctype_meta(cts, id, MM_gc); | ||
981 | if (fin) | ||
982 | crec_finalizer(J, trcd, fin); | ||
983 | } | 1068 | } |
1069 | J->base[0] = trcd; | ||
1070 | /* Handle __gc metamethod. */ | ||
1071 | fin = lj_ctype_meta(cts, id, MM_gc); | ||
1072 | if (fin) | ||
1073 | crec_finalizer(J, trcd, 0, fin); | ||
984 | } | 1074 | } |
985 | 1075 | ||
986 | /* Record argument conversions. */ | 1076 | /* Record argument conversions. */ |
@@ -1040,7 +1130,7 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, | |||
1040 | else | 1130 | else |
1041 | tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_I8 : IRT_I16,IRCONV_SEXT); | 1131 | tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_I8 : IRT_I16,IRCONV_SEXT); |
1042 | } | 1132 | } |
1043 | } else if (LJ_SOFTFP && ctype_isfp(d->info) && d->size > 4) { | 1133 | } else if (LJ_SOFTFP32 && ctype_isfp(d->info) && d->size > 4) { |
1044 | lj_needsplit(J); | 1134 | lj_needsplit(J); |
1045 | } | 1135 | } |
1046 | #if LJ_TARGET_X86 | 1136 | #if LJ_TARGET_X86 |
@@ -1086,20 +1176,20 @@ static void crec_snap_caller(jit_State *J) | |||
1086 | lua_State *L = J->L; | 1176 | lua_State *L = J->L; |
1087 | TValue *base = L->base, *top = L->top; | 1177 | TValue *base = L->base, *top = L->top; |
1088 | const BCIns *pc = J->pc; | 1178 | const BCIns *pc = J->pc; |
1089 | TRef ftr = J->base[-1]; | 1179 | TRef ftr = J->base[-1-LJ_FR2]; |
1090 | ptrdiff_t delta; | 1180 | ptrdiff_t delta; |
1091 | if (!frame_islua(base-1) || J->framedepth <= 0) | 1181 | if (!frame_islua(base-1) || J->framedepth <= 0) |
1092 | lj_trace_err(J, LJ_TRERR_NYICALL); | 1182 | lj_trace_err(J, LJ_TRERR_NYICALL); |
1093 | J->pc = frame_pc(base-1); delta = 1+bc_a(J->pc[-1]); | 1183 | J->pc = frame_pc(base-1); delta = 1+LJ_FR2+bc_a(J->pc[-1]); |
1094 | L->top = base; L->base = base - delta; | 1184 | L->top = base; L->base = base - delta; |
1095 | J->base[-1] = TREF_FALSE; | 1185 | J->base[-1-LJ_FR2] = TREF_FALSE; |
1096 | J->base -= delta; J->baseslot -= (BCReg)delta; | 1186 | J->base -= delta; J->baseslot -= (BCReg)delta; |
1097 | J->maxslot = (BCReg)delta; J->framedepth--; | 1187 | J->maxslot = (BCReg)delta-LJ_FR2; J->framedepth--; |
1098 | lj_snap_add(J); | 1188 | lj_snap_add(J); |
1099 | L->base = base; L->top = top; | 1189 | L->base = base; L->top = top; |
1100 | J->framedepth++; J->maxslot = 1; | 1190 | J->framedepth++; J->maxslot = 1; |
1101 | J->base += delta; J->baseslot += (BCReg)delta; | 1191 | J->base += delta; J->baseslot += (BCReg)delta; |
1102 | J->base[-1] = ftr; J->pc = pc; | 1192 | J->base[-1-LJ_FR2] = ftr; J->pc = pc; |
1103 | } | 1193 | } |
1104 | 1194 | ||
1105 | /* Record function call. */ | 1195 | /* Record function call. */ |
@@ -1191,8 +1281,7 @@ void LJ_FASTCALL recff_cdata_call(jit_State *J, RecordFFData *rd) | |||
1191 | tv = lj_ctype_meta(cts, ctype_isptr(ct->info) ? ctype_cid(ct->info) : id, mm); | 1281 | tv = lj_ctype_meta(cts, ctype_isptr(ct->info) ? ctype_cid(ct->info) : id, mm); |
1192 | if (tv) { | 1282 | if (tv) { |
1193 | if (tvisfunc(tv)) { | 1283 | if (tvisfunc(tv)) { |
1194 | J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; | 1284 | crec_tailcall(J, rd, tv); |
1195 | rd->nres = -1; /* Pending tailcall. */ | ||
1196 | return; | 1285 | return; |
1197 | } | 1286 | } |
1198 | } else if (mm == MM_new) { | 1287 | } else if (mm == MM_new) { |
@@ -1233,7 +1322,7 @@ static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm) | |||
1233 | for (i = 0; i < 2; i++) { | 1322 | for (i = 0; i < 2; i++) { |
1234 | IRType st = tref_type(sp[i]); | 1323 | IRType st = tref_type(sp[i]); |
1235 | if (st == IRT_NUM || st == IRT_FLOAT) | 1324 | if (st == IRT_NUM || st == IRT_FLOAT) |
1236 | sp[i] = emitconv(sp[i], dt, st, IRCONV_TRUNC|IRCONV_ANY); | 1325 | sp[i] = emitconv(sp[i], dt, st, IRCONV_ANY); |
1237 | else if (!(st == IRT_I64 || st == IRT_U64)) | 1326 | else if (!(st == IRT_I64 || st == IRT_U64)) |
1238 | sp[i] = emitconv(sp[i], dt, IRT_INT, | 1327 | sp[i] = emitconv(sp[i], dt, IRT_INT, |
1239 | (s[i]->info & CTF_UNSIGNED) ? 0 : IRCONV_SEXT); | 1328 | (s[i]->info & CTF_UNSIGNED) ? 0 : IRCONV_SEXT); |
@@ -1302,15 +1391,14 @@ static TRef crec_arith_ptr(jit_State *J, TRef *sp, CType **s, MMS mm) | |||
1302 | CTypeID id; | 1391 | CTypeID id; |
1303 | #if LJ_64 | 1392 | #if LJ_64 |
1304 | if (t == IRT_NUM || t == IRT_FLOAT) | 1393 | if (t == IRT_NUM || t == IRT_FLOAT) |
1305 | tr = emitconv(tr, IRT_INTP, t, IRCONV_TRUNC|IRCONV_ANY); | 1394 | tr = emitconv(tr, IRT_INTP, t, IRCONV_ANY); |
1306 | else if (!(t == IRT_I64 || t == IRT_U64)) | 1395 | else if (!(t == IRT_I64 || t == IRT_U64)) |
1307 | tr = emitconv(tr, IRT_INTP, IRT_INT, | 1396 | tr = emitconv(tr, IRT_INTP, IRT_INT, |
1308 | ((t - IRT_I8) & 1) ? 0 : IRCONV_SEXT); | 1397 | ((t - IRT_I8) & 1) ? 0 : IRCONV_SEXT); |
1309 | #else | 1398 | #else |
1310 | if (!tref_typerange(sp[1], IRT_I8, IRT_U32)) { | 1399 | if (!tref_typerange(sp[1], IRT_I8, IRT_U32)) { |
1311 | tr = emitconv(tr, IRT_INTP, t, | 1400 | tr = emitconv(tr, IRT_INTP, t, |
1312 | (t == IRT_NUM || t == IRT_FLOAT) ? | 1401 | (t == IRT_NUM || t == IRT_FLOAT) ? IRCONV_ANY : 0); |
1313 | IRCONV_TRUNC|IRCONV_ANY : 0); | ||
1314 | } | 1402 | } |
1315 | #endif | 1403 | #endif |
1316 | tr = emitir(IRT(IR_MUL, IRT_INTP), tr, lj_ir_kintp(J, sz)); | 1404 | tr = emitir(IRT(IR_MUL, IRT_INTP), tr, lj_ir_kintp(J, sz)); |
@@ -1342,8 +1430,7 @@ static TRef crec_arith_meta(jit_State *J, TRef *sp, CType **s, CTState *cts, | |||
1342 | } | 1430 | } |
1343 | if (tv) { | 1431 | if (tv) { |
1344 | if (tvisfunc(tv)) { | 1432 | if (tvisfunc(tv)) { |
1345 | J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; | 1433 | crec_tailcall(J, rd, tv); |
1346 | rd->nres = -1; /* Pending tailcall. */ | ||
1347 | return 0; | 1434 | return 0; |
1348 | } /* NYI: non-function metamethods. */ | 1435 | } /* NYI: non-function metamethods. */ |
1349 | } else if ((MMS)rd->data == MM_eq) { /* Fallback cdata pointer comparison. */ | 1436 | } else if ((MMS)rd->data == MM_eq) { /* Fallback cdata pointer comparison. */ |
@@ -1453,8 +1540,7 @@ void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd) | |||
1453 | !irt_isguard(J->guardemit)) { | 1540 | !irt_isguard(J->guardemit)) { |
1454 | const BCIns *pc = frame_contpc(J->L->base-1) - 1; | 1541 | const BCIns *pc = frame_contpc(J->L->base-1) - 1; |
1455 | if (bc_op(*pc) <= BC_ISNEP) { | 1542 | if (bc_op(*pc) <= BC_ISNEP) { |
1456 | setframe_pc(&J2G(J)->tmptv, pc); | 1543 | J2G(J)->tmptv.u64 = (uint64_t)(uintptr_t)pc; |
1457 | J2G(J)->tmptv.u32.lo = ((tref_istrue(tr) ^ bc_op(*pc)) & 1); | ||
1458 | J->postproc = LJ_POST_FIXCOMP; | 1544 | J->postproc = LJ_POST_FIXCOMP; |
1459 | } | 1545 | } |
1460 | } | 1546 | } |
@@ -1643,7 +1729,139 @@ void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd) | |||
1643 | void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd) | 1729 | void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd) |
1644 | { | 1730 | { |
1645 | argv2cdata(J, J->base[0], &rd->argv[0]); | 1731 | argv2cdata(J, J->base[0], &rd->argv[0]); |
1646 | crec_finalizer(J, J->base[0], &rd->argv[1]); | 1732 | if (!J->base[1]) |
1733 | lj_trace_err(J, LJ_TRERR_BADTYPE); | ||
1734 | crec_finalizer(J, J->base[0], J->base[1], &rd->argv[1]); | ||
1735 | } | ||
1736 | |||
1737 | /* -- 64 bit bit.* library functions -------------------------------------- */ | ||
1738 | |||
1739 | /* Determine bit operation type from argument type. */ | ||
1740 | static CTypeID crec_bit64_type(CTState *cts, cTValue *tv) | ||
1741 | { | ||
1742 | if (tviscdata(tv)) { | ||
1743 | CType *ct = lj_ctype_rawref(cts, cdataV(tv)->ctypeid); | ||
1744 | if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct); | ||
1745 | if ((ct->info & (CTMASK_NUM|CTF_BOOL|CTF_FP|CTF_UNSIGNED)) == | ||
1746 | CTINFO(CT_NUM, CTF_UNSIGNED) && ct->size == 8) | ||
1747 | return CTID_UINT64; /* Use uint64_t, since it has the highest rank. */ | ||
1748 | return CTID_INT64; /* Otherwise use int64_t. */ | ||
1749 | } | ||
1750 | return 0; /* Use regular 32 bit ops. */ | ||
1751 | } | ||
1752 | |||
1753 | void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd) | ||
1754 | { | ||
1755 | CTState *cts = ctype_ctsG(J2G(J)); | ||
1756 | TRef tr = crec_ct_tv(J, ctype_get(cts, CTID_INT64), 0, | ||
1757 | J->base[0], &rd->argv[0]); | ||
1758 | if (!tref_isinteger(tr)) | ||
1759 | tr = emitconv(tr, IRT_INT, tref_type(tr), 0); | ||
1760 | J->base[0] = tr; | ||
1761 | } | ||
1762 | |||
1763 | int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd) | ||
1764 | { | ||
1765 | CTState *cts = ctype_ctsG(J2G(J)); | ||
1766 | CTypeID id = crec_bit64_type(cts, &rd->argv[0]); | ||
1767 | if (id) { | ||
1768 | TRef tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]); | ||
1769 | tr = emitir(IRT(rd->data, id-CTID_INT64+IRT_I64), tr, 0); | ||
1770 | J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr); | ||
1771 | return 1; | ||
1772 | } | ||
1773 | return 0; | ||
1774 | } | ||
1775 | |||
1776 | int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd) | ||
1777 | { | ||
1778 | CTState *cts = ctype_ctsG(J2G(J)); | ||
1779 | CTypeID id = 0; | ||
1780 | MSize i; | ||
1781 | for (i = 0; J->base[i] != 0; i++) { | ||
1782 | CTypeID aid = crec_bit64_type(cts, &rd->argv[i]); | ||
1783 | if (id < aid) id = aid; /* Determine highest type rank of all arguments. */ | ||
1784 | } | ||
1785 | if (id) { | ||
1786 | CType *ct = ctype_get(cts, id); | ||
1787 | uint32_t ot = IRT(rd->data, id-CTID_INT64+IRT_I64); | ||
1788 | TRef tr = crec_ct_tv(J, ct, 0, J->base[0], &rd->argv[0]); | ||
1789 | for (i = 1; J->base[i] != 0; i++) { | ||
1790 | TRef tr2 = crec_ct_tv(J, ct, 0, J->base[i], &rd->argv[i]); | ||
1791 | tr = emitir(ot, tr, tr2); | ||
1792 | } | ||
1793 | J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr); | ||
1794 | return 1; | ||
1795 | } | ||
1796 | return 0; | ||
1797 | } | ||
1798 | |||
1799 | int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd) | ||
1800 | { | ||
1801 | CTState *cts = ctype_ctsG(J2G(J)); | ||
1802 | CTypeID id; | ||
1803 | TRef tsh = 0; | ||
1804 | if (J->base[0] && tref_iscdata(J->base[1])) { | ||
1805 | tsh = crec_ct_tv(J, ctype_get(cts, CTID_INT64), 0, | ||
1806 | J->base[1], &rd->argv[1]); | ||
1807 | if (!tref_isinteger(tsh)) | ||
1808 | tsh = emitconv(tsh, IRT_INT, tref_type(tsh), 0); | ||
1809 | J->base[1] = tsh; | ||
1810 | } | ||
1811 | id = crec_bit64_type(cts, &rd->argv[0]); | ||
1812 | if (id) { | ||
1813 | TRef tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]); | ||
1814 | uint32_t op = rd->data; | ||
1815 | if (!tsh) tsh = lj_opt_narrow_tobit(J, J->base[1]); | ||
1816 | if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && | ||
1817 | !tref_isk(tsh)) | ||
1818 | tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 63)); | ||
1819 | #ifdef LJ_TARGET_UNIFYROT | ||
1820 | if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) { | ||
1821 | op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR; | ||
1822 | tsh = emitir(IRTI(IR_NEG), tsh, tsh); | ||
1823 | } | ||
1824 | #endif | ||
1825 | tr = emitir(IRT(op, id-CTID_INT64+IRT_I64), tr, tsh); | ||
1826 | J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr); | ||
1827 | return 1; | ||
1828 | } | ||
1829 | return 0; | ||
1830 | } | ||
1831 | |||
1832 | TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr) | ||
1833 | { | ||
1834 | CTState *cts = ctype_ctsG(J2G(J)); | ||
1835 | CTypeID id = crec_bit64_type(cts, &rd->argv[0]); | ||
1836 | TRef tr, trsf = J->base[1]; | ||
1837 | SFormat sf = (STRFMT_UINT|STRFMT_T_HEX); | ||
1838 | int32_t n; | ||
1839 | if (trsf) { | ||
1840 | CTypeID id2 = 0; | ||
1841 | n = (int32_t)lj_carith_check64(J->L, 2, &id2); | ||
1842 | if (id2) | ||
1843 | trsf = crec_ct_tv(J, ctype_get(cts, CTID_INT32), 0, trsf, &rd->argv[1]); | ||
1844 | else | ||
1845 | trsf = lj_opt_narrow_tobit(J, trsf); | ||
1846 | emitir(IRTGI(IR_EQ), trsf, lj_ir_kint(J, n)); /* Specialize to n. */ | ||
1847 | } else { | ||
1848 | n = id ? 16 : 8; | ||
1849 | } | ||
1850 | if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; } | ||
1851 | sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC); | ||
1852 | if (id) { | ||
1853 | tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]); | ||
1854 | if (n < 16) | ||
1855 | tr = emitir(IRT(IR_BAND, IRT_U64), tr, | ||
1856 | lj_ir_kint64(J, ((uint64_t)1 << 4*n)-1)); | ||
1857 | } else { | ||
1858 | tr = lj_opt_narrow_tobit(J, J->base[0]); | ||
1859 | if (n < 8) | ||
1860 | tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << 4*n)-1))); | ||
1861 | tr = emitconv(tr, IRT_U64, IRT_INT, 0); /* No sign-extension. */ | ||
1862 | lj_needsplit(J); | ||
1863 | } | ||
1864 | return lj_ir_call(J, IRCALL_lj_strfmt_putfxint, hdr, lj_ir_kint(J, sf), tr); | ||
1647 | } | 1865 | } |
1648 | 1866 | ||
1649 | /* -- Miscellaneous library functions ------------------------------------- */ | 1867 | /* -- Miscellaneous library functions ------------------------------------- */ |
diff --git a/src/lj_crecord.h b/src/lj_crecord.h index 941c8adb..4a8465ad 100644 --- a/src/lj_crecord.h +++ b/src/lj_crecord.h | |||
@@ -25,6 +25,13 @@ LJ_FUNC void LJ_FASTCALL recff_ffi_istype(jit_State *J, RecordFFData *rd); | |||
25 | LJ_FUNC void LJ_FASTCALL recff_ffi_abi(jit_State *J, RecordFFData *rd); | 25 | LJ_FUNC void LJ_FASTCALL recff_ffi_abi(jit_State *J, RecordFFData *rd); |
26 | LJ_FUNC void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd); | 26 | LJ_FUNC void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd); |
27 | LJ_FUNC void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd); | 27 | LJ_FUNC void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd); |
28 | |||
29 | LJ_FUNC void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd); | ||
30 | LJ_FUNC int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd); | ||
31 | LJ_FUNC int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd); | ||
32 | LJ_FUNC int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd); | ||
33 | LJ_FUNC TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr); | ||
34 | |||
28 | LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd); | 35 | LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd); |
29 | #endif | 36 | #endif |
30 | 37 | ||
diff --git a/src/lj_ctype.c b/src/lj_ctype.c index a338e8e6..7e96e1bc 100644 --- a/src/lj_ctype.c +++ b/src/lj_ctype.c | |||
@@ -11,8 +11,10 @@ | |||
11 | #include "lj_err.h" | 11 | #include "lj_err.h" |
12 | #include "lj_str.h" | 12 | #include "lj_str.h" |
13 | #include "lj_tab.h" | 13 | #include "lj_tab.h" |
14 | #include "lj_strfmt.h" | ||
14 | #include "lj_ctype.h" | 15 | #include "lj_ctype.h" |
15 | #include "lj_ccallback.h" | 16 | #include "lj_ccallback.h" |
17 | #include "lj_buf.h" | ||
16 | 18 | ||
17 | /* -- C type definitions -------------------------------------------------- */ | 19 | /* -- C type definitions -------------------------------------------------- */ |
18 | 20 | ||
@@ -37,6 +39,8 @@ | |||
37 | _("uint64_t", UINT64) \ | 39 | _("uint64_t", UINT64) \ |
38 | _("intptr_t", INT_PSZ) \ | 40 | _("intptr_t", INT_PSZ) \ |
39 | _("uintptr_t", UINT_PSZ) \ | 41 | _("uintptr_t", UINT_PSZ) \ |
42 | /* From POSIX. */ \ | ||
43 | _("ssize_t", INT_PSZ) \ | ||
40 | /* End of typedef list. */ | 44 | /* End of typedef list. */ |
41 | 45 | ||
42 | /* Keywords (only the ones we actually care for). */ | 46 | /* Keywords (only the ones we actually care for). */ |
@@ -568,19 +572,18 @@ GCstr *lj_ctype_repr_int64(lua_State *L, uint64_t n, int isunsigned) | |||
568 | /* Convert complex to string with 'i' or 'I' suffix. */ | 572 | /* Convert complex to string with 'i' or 'I' suffix. */ |
569 | GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size) | 573 | GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size) |
570 | { | 574 | { |
571 | char buf[2*LJ_STR_NUMBUF+2+1]; | 575 | SBuf *sb = lj_buf_tmp_(L); |
572 | TValue re, im; | 576 | TValue re, im; |
573 | size_t len; | ||
574 | if (size == 2*sizeof(double)) { | 577 | if (size == 2*sizeof(double)) { |
575 | re.n = *(double *)sp; im.n = ((double *)sp)[1]; | 578 | re.n = *(double *)sp; im.n = ((double *)sp)[1]; |
576 | } else { | 579 | } else { |
577 | re.n = (double)*(float *)sp; im.n = (double)((float *)sp)[1]; | 580 | re.n = (double)*(float *)sp; im.n = (double)((float *)sp)[1]; |
578 | } | 581 | } |
579 | len = lj_str_bufnum(buf, &re); | 582 | lj_strfmt_putfnum(sb, STRFMT_G14, re.n); |
580 | if (!(im.u32.hi & 0x80000000u) || im.n != im.n) buf[len++] = '+'; | 583 | if (!(im.u32.hi & 0x80000000u) || im.n != im.n) lj_buf_putchar(sb, '+'); |
581 | len += lj_str_bufnum(buf+len, &im); | 584 | lj_strfmt_putfnum(sb, STRFMT_G14, im.n); |
582 | buf[len] = buf[len-1] >= 'a' ? 'I' : 'i'; | 585 | lj_buf_putchar(sb, sbufP(sb)[-1] >= 'a' ? 'I' : 'i'); |
583 | return lj_str_new(L, buf, len+1); | 586 | return lj_buf_str(L, sb); |
584 | } | 587 | } |
585 | 588 | ||
586 | /* -- C type state -------------------------------------------------------- */ | 589 | /* -- C type state -------------------------------------------------------- */ |
diff --git a/src/lj_ctype.h b/src/lj_ctype.h index 8066214f..73cefef8 100644 --- a/src/lj_ctype.h +++ b/src/lj_ctype.h | |||
@@ -263,7 +263,7 @@ typedef struct CTState { | |||
263 | /* -- Predefined types ---------------------------------------------------- */ | 263 | /* -- Predefined types ---------------------------------------------------- */ |
264 | 264 | ||
265 | /* Target-dependent types. */ | 265 | /* Target-dependent types. */ |
266 | #if LJ_TARGET_PPC || LJ_TARGET_PPCSPE | 266 | #if LJ_TARGET_PPC |
267 | #define CTTYDEFP(_) \ | 267 | #define CTTYDEFP(_) \ |
268 | _(LINT32, 4, CT_NUM, CTF_LONG|CTALIGN(2)) | 268 | _(LINT32, 4, CT_NUM, CTF_LONG|CTALIGN(2)) |
269 | #else | 269 | #else |
diff --git a/src/lj_debug.c b/src/lj_debug.c index 04fecfaf..70f77c74 100644 --- a/src/lj_debug.c +++ b/src/lj_debug.c | |||
@@ -9,12 +9,12 @@ | |||
9 | #include "lj_obj.h" | 9 | #include "lj_obj.h" |
10 | #include "lj_err.h" | 10 | #include "lj_err.h" |
11 | #include "lj_debug.h" | 11 | #include "lj_debug.h" |
12 | #include "lj_str.h" | 12 | #include "lj_buf.h" |
13 | #include "lj_tab.h" | 13 | #include "lj_tab.h" |
14 | #include "lj_state.h" | 14 | #include "lj_state.h" |
15 | #include "lj_frame.h" | 15 | #include "lj_frame.h" |
16 | #include "lj_bc.h" | 16 | #include "lj_bc.h" |
17 | #include "lj_vm.h" | 17 | #include "lj_strfmt.h" |
18 | #if LJ_HASJIT | 18 | #if LJ_HASJIT |
19 | #include "lj_jit.h" | 19 | #include "lj_jit.h" |
20 | #endif | 20 | #endif |
@@ -24,11 +24,11 @@ | |||
24 | /* Get frame corresponding to a level. */ | 24 | /* Get frame corresponding to a level. */ |
25 | cTValue *lj_debug_frame(lua_State *L, int level, int *size) | 25 | cTValue *lj_debug_frame(lua_State *L, int level, int *size) |
26 | { | 26 | { |
27 | cTValue *frame, *nextframe, *bot = tvref(L->stack); | 27 | cTValue *frame, *nextframe, *bot = tvref(L->stack)+LJ_FR2; |
28 | /* Traverse frames backwards. */ | 28 | /* Traverse frames backwards. */ |
29 | for (nextframe = frame = L->base-1; frame > bot; ) { | 29 | for (nextframe = frame = L->base-1; frame > bot; ) { |
30 | if (frame_gc(frame) == obj2gco(L)) | 30 | if (frame_gc(frame) == obj2gco(L)) |
31 | level++; /* Skip dummy frames. See lj_meta_call(). */ | 31 | level++; /* Skip dummy frames. See lj_err_optype_call(). */ |
32 | if (level-- == 0) { | 32 | if (level-- == 0) { |
33 | *size = (int)(nextframe - frame); | 33 | *size = (int)(nextframe - frame); |
34 | return frame; /* Level found. */ | 34 | return frame; /* Level found. */ |
@@ -87,8 +87,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe) | |||
87 | if (frame_islua(f)) { | 87 | if (frame_islua(f)) { |
88 | f = frame_prevl(f); | 88 | f = frame_prevl(f); |
89 | } else { | 89 | } else { |
90 | if (frame_isc(f) || (LJ_HASFFI && frame_iscont(f) && | 90 | if (frame_isc(f) || (frame_iscont(f) && frame_iscont_fficb(f))) |
91 | (f-1)->u32.lo == LJ_CONT_FFI_CALLBACK)) | ||
92 | cf = cframe_raw(cframe_prev(cf)); | 91 | cf = cframe_raw(cframe_prev(cf)); |
93 | f = frame_prevd(f); | 92 | f = frame_prevd(f); |
94 | } | 93 | } |
@@ -142,38 +141,25 @@ static BCLine debug_frameline(lua_State *L, GCfunc *fn, cTValue *nextframe) | |||
142 | 141 | ||
143 | /* -- Variable names ------------------------------------------------------ */ | 142 | /* -- Variable names ------------------------------------------------------ */ |
144 | 143 | ||
145 | /* Read ULEB128 value. */ | ||
146 | static uint32_t debug_read_uleb128(const uint8_t **pp) | ||
147 | { | ||
148 | const uint8_t *p = *pp; | ||
149 | uint32_t v = *p++; | ||
150 | if (LJ_UNLIKELY(v >= 0x80)) { | ||
151 | int sh = 0; | ||
152 | v &= 0x7f; | ||
153 | do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80); | ||
154 | } | ||
155 | *pp = p; | ||
156 | return v; | ||
157 | } | ||
158 | |||
159 | /* Get name of a local variable from slot number and PC. */ | 144 | /* Get name of a local variable from slot number and PC. */ |
160 | static const char *debug_varname(const GCproto *pt, BCPos pc, BCReg slot) | 145 | static const char *debug_varname(const GCproto *pt, BCPos pc, BCReg slot) |
161 | { | 146 | { |
162 | const uint8_t *p = proto_varinfo(pt); | 147 | const char *p = (const char *)proto_varinfo(pt); |
163 | if (p) { | 148 | if (p) { |
164 | BCPos lastpc = 0; | 149 | BCPos lastpc = 0; |
165 | for (;;) { | 150 | for (;;) { |
166 | const char *name = (const char *)p; | 151 | const char *name = p; |
167 | uint32_t vn = *p++; | 152 | uint32_t vn = *(const uint8_t *)p; |
168 | BCPos startpc, endpc; | 153 | BCPos startpc, endpc; |
169 | if (vn < VARNAME__MAX) { | 154 | if (vn < VARNAME__MAX) { |
170 | if (vn == VARNAME_END) break; /* End of varinfo. */ | 155 | if (vn == VARNAME_END) break; /* End of varinfo. */ |
171 | } else { | 156 | } else { |
172 | while (*p++) ; /* Skip over variable name string. */ | 157 | do { p++; } while (*(const uint8_t *)p); /* Skip over variable name. */ |
173 | } | 158 | } |
174 | lastpc = startpc = lastpc + debug_read_uleb128(&p); | 159 | p++; |
160 | lastpc = startpc = lastpc + lj_buf_ruleb128(&p); | ||
175 | if (startpc > pc) break; | 161 | if (startpc > pc) break; |
176 | endpc = startpc + debug_read_uleb128(&p); | 162 | endpc = startpc + lj_buf_ruleb128(&p); |
177 | if (pc < endpc && slot-- == 0) { | 163 | if (pc < endpc && slot-- == 0) { |
178 | if (vn < VARNAME__MAX) { | 164 | if (vn < VARNAME__MAX) { |
179 | #define VARNAMESTR(name, str) str "\0" | 165 | #define VARNAMESTR(name, str) str "\0" |
@@ -198,7 +184,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar, | |||
198 | TValue *nextframe = size ? frame + size : NULL; | 184 | TValue *nextframe = size ? frame + size : NULL; |
199 | GCfunc *fn = frame_func(frame); | 185 | GCfunc *fn = frame_func(frame); |
200 | BCPos pc = debug_framepc(L, fn, nextframe); | 186 | BCPos pc = debug_framepc(L, fn, nextframe); |
201 | if (!nextframe) nextframe = L->top; | 187 | if (!nextframe) nextframe = L->top+LJ_FR2; |
202 | if ((int)slot1 < 0) { /* Negative slot number is for varargs. */ | 188 | if ((int)slot1 < 0) { /* Negative slot number is for varargs. */ |
203 | if (pc != NO_BCPOS) { | 189 | if (pc != NO_BCPOS) { |
204 | GCproto *pt = funcproto(fn); | 190 | GCproto *pt = funcproto(fn); |
@@ -208,7 +194,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar, | |||
208 | nextframe = frame; | 194 | nextframe = frame; |
209 | frame = frame_prevd(frame); | 195 | frame = frame_prevd(frame); |
210 | } | 196 | } |
211 | if (frame + slot1 < nextframe) { | 197 | if (frame + slot1+LJ_FR2 < nextframe) { |
212 | *name = "(*vararg)"; | 198 | *name = "(*vararg)"; |
213 | return frame+slot1; | 199 | return frame+slot1; |
214 | } | 200 | } |
@@ -219,7 +205,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar, | |||
219 | if (pc != NO_BCPOS && | 205 | if (pc != NO_BCPOS && |
220 | (*name = debug_varname(funcproto(fn), pc, slot1-1)) != NULL) | 206 | (*name = debug_varname(funcproto(fn), pc, slot1-1)) != NULL) |
221 | ; | 207 | ; |
222 | else if (slot1 > 0 && frame + slot1 < nextframe) | 208 | else if (slot1 > 0 && frame + slot1+LJ_FR2 < nextframe) |
223 | *name = "(*temporary)"; | 209 | *name = "(*temporary)"; |
224 | return frame+slot1; | 210 | return frame+slot1; |
225 | } | 211 | } |
@@ -282,7 +268,7 @@ restart: | |||
282 | *name = strdata(gco2str(proto_kgc(pt, ~(ptrdiff_t)bc_c(ins)))); | 268 | *name = strdata(gco2str(proto_kgc(pt, ~(ptrdiff_t)bc_c(ins)))); |
283 | if (ip > proto_bc(pt)) { | 269 | if (ip > proto_bc(pt)) { |
284 | BCIns insp = ip[-1]; | 270 | BCIns insp = ip[-1]; |
285 | if (bc_op(insp) == BC_MOV && bc_a(insp) == ra+1 && | 271 | if (bc_op(insp) == BC_MOV && bc_a(insp) == ra+1+LJ_FR2 && |
286 | bc_d(insp) == bc_b(ins)) | 272 | bc_d(insp) == bc_b(ins)) |
287 | return "method"; | 273 | return "method"; |
288 | } | 274 | } |
@@ -299,12 +285,12 @@ restart: | |||
299 | } | 285 | } |
300 | 286 | ||
301 | /* Deduce function name from caller of a frame. */ | 287 | /* Deduce function name from caller of a frame. */ |
302 | const char *lj_debug_funcname(lua_State *L, TValue *frame, const char **name) | 288 | const char *lj_debug_funcname(lua_State *L, cTValue *frame, const char **name) |
303 | { | 289 | { |
304 | TValue *pframe; | 290 | cTValue *pframe; |
305 | GCfunc *fn; | 291 | GCfunc *fn; |
306 | BCPos pc; | 292 | BCPos pc; |
307 | if (frame <= tvref(L->stack)) | 293 | if (frame <= tvref(L->stack)+LJ_FR2) |
308 | return NULL; | 294 | return NULL; |
309 | if (frame_isvarg(frame)) | 295 | if (frame_isvarg(frame)) |
310 | frame = frame_prevd(frame); | 296 | frame = frame_prevd(frame); |
@@ -330,7 +316,7 @@ const char *lj_debug_funcname(lua_State *L, TValue *frame, const char **name) | |||
330 | /* -- Source code locations ----------------------------------------------- */ | 316 | /* -- Source code locations ----------------------------------------------- */ |
331 | 317 | ||
332 | /* Generate shortened source name. */ | 318 | /* Generate shortened source name. */ |
333 | void lj_debug_shortname(char *out, GCstr *str) | 319 | void lj_debug_shortname(char *out, GCstr *str, BCLine line) |
334 | { | 320 | { |
335 | const char *src = strdata(str); | 321 | const char *src = strdata(str); |
336 | if (*src == '=') { | 322 | if (*src == '=') { |
@@ -344,11 +330,11 @@ void lj_debug_shortname(char *out, GCstr *str) | |||
344 | *out++ = '.'; *out++ = '.'; *out++ = '.'; | 330 | *out++ = '.'; *out++ = '.'; *out++ = '.'; |
345 | } | 331 | } |
346 | strcpy(out, src); | 332 | strcpy(out, src); |
347 | } else { /* Output [string "string"]. */ | 333 | } else { /* Output [string "string"] or [builtin:name]. */ |
348 | size_t len; /* Length, up to first control char. */ | 334 | size_t len; /* Length, up to first control char. */ |
349 | for (len = 0; len < LUA_IDSIZE-12; len++) | 335 | for (len = 0; len < LUA_IDSIZE-12; len++) |
350 | if (((const unsigned char *)src)[len] < ' ') break; | 336 | if (((const unsigned char *)src)[len] < ' ') break; |
351 | strcpy(out, "[string \""); out += 9; | 337 | strcpy(out, line == ~(BCLine)0 ? "[builtin:" : "[string \""); out += 9; |
352 | if (src[len] != '\0') { /* Must truncate? */ | 338 | if (src[len] != '\0') { /* Must truncate? */ |
353 | if (len > LUA_IDSIZE-15) len = LUA_IDSIZE-15; | 339 | if (len > LUA_IDSIZE-15) len = LUA_IDSIZE-15; |
354 | strncpy(out, src, len); out += len; | 340 | strncpy(out, src, len); out += len; |
@@ -356,7 +342,7 @@ void lj_debug_shortname(char *out, GCstr *str) | |||
356 | } else { | 342 | } else { |
357 | strcpy(out, src); out += len; | 343 | strcpy(out, src); out += len; |
358 | } | 344 | } |
359 | strcpy(out, "\"]"); | 345 | strcpy(out, line == ~(BCLine)0 ? "]" : "\"]"); |
360 | } | 346 | } |
361 | } | 347 | } |
362 | 348 | ||
@@ -369,14 +355,15 @@ void lj_debug_addloc(lua_State *L, const char *msg, | |||
369 | if (isluafunc(fn)) { | 355 | if (isluafunc(fn)) { |
370 | BCLine line = debug_frameline(L, fn, nextframe); | 356 | BCLine line = debug_frameline(L, fn, nextframe); |
371 | if (line >= 0) { | 357 | if (line >= 0) { |
358 | GCproto *pt = funcproto(fn); | ||
372 | char buf[LUA_IDSIZE]; | 359 | char buf[LUA_IDSIZE]; |
373 | lj_debug_shortname(buf, proto_chunkname(funcproto(fn))); | 360 | lj_debug_shortname(buf, proto_chunkname(pt), pt->firstline); |
374 | lj_str_pushf(L, "%s:%d: %s", buf, line, msg); | 361 | lj_strfmt_pushf(L, "%s:%d: %s", buf, line, msg); |
375 | return; | 362 | return; |
376 | } | 363 | } |
377 | } | 364 | } |
378 | } | 365 | } |
379 | lj_str_pushf(L, "%s", msg); | 366 | lj_strfmt_pushf(L, "%s", msg); |
380 | } | 367 | } |
381 | 368 | ||
382 | /* Push location string for a bytecode position to Lua stack. */ | 369 | /* Push location string for a bytecode position to Lua stack. */ |
@@ -386,20 +373,22 @@ void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc) | |||
386 | const char *s = strdata(name); | 373 | const char *s = strdata(name); |
387 | MSize i, len = name->len; | 374 | MSize i, len = name->len; |
388 | BCLine line = lj_debug_line(pt, pc); | 375 | BCLine line = lj_debug_line(pt, pc); |
389 | if (*s == '@') { | 376 | if (pt->firstline == ~(BCLine)0) { |
377 | lj_strfmt_pushf(L, "builtin:%s", s); | ||
378 | } else if (*s == '@') { | ||
390 | s++; len--; | 379 | s++; len--; |
391 | for (i = len; i > 0; i--) | 380 | for (i = len; i > 0; i--) |
392 | if (s[i] == '/' || s[i] == '\\') { | 381 | if (s[i] == '/' || s[i] == '\\') { |
393 | s += i+1; | 382 | s += i+1; |
394 | break; | 383 | break; |
395 | } | 384 | } |
396 | lj_str_pushf(L, "%s:%d", s, line); | 385 | lj_strfmt_pushf(L, "%s:%d", s, line); |
397 | } else if (len > 40) { | 386 | } else if (len > 40) { |
398 | lj_str_pushf(L, "%p:%d", pt, line); | 387 | lj_strfmt_pushf(L, "%p:%d", pt, line); |
399 | } else if (*s == '=') { | 388 | } else if (*s == '=') { |
400 | lj_str_pushf(L, "%s:%d", s+1, line); | 389 | lj_strfmt_pushf(L, "%s:%d", s+1, line); |
401 | } else { | 390 | } else { |
402 | lj_str_pushf(L, "\"%s\":%d", s, line); | 391 | lj_strfmt_pushf(L, "\"%s\":%d", s, line); |
403 | } | 392 | } |
404 | } | 393 | } |
405 | 394 | ||
@@ -462,7 +451,7 @@ int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, int ext) | |||
462 | BCLine firstline = pt->firstline; | 451 | BCLine firstline = pt->firstline; |
463 | GCstr *name = proto_chunkname(pt); | 452 | GCstr *name = proto_chunkname(pt); |
464 | ar->source = strdata(name); | 453 | ar->source = strdata(name); |
465 | lj_debug_shortname(ar->short_src, name); | 454 | lj_debug_shortname(ar->short_src, name, pt->firstline); |
466 | ar->linedefined = (int)firstline; | 455 | ar->linedefined = (int)firstline; |
467 | ar->lastlinedefined = (int)(firstline + pt->numline); | 456 | ar->lastlinedefined = (int)(firstline + pt->numline); |
468 | ar->what = (firstline || !pt->numline) ? "Lua" : "main"; | 457 | ar->what = (firstline || !pt->numline) ? "Lua" : "main"; |
@@ -552,6 +541,111 @@ LUA_API int lua_getstack(lua_State *L, int level, lua_Debug *ar) | |||
552 | } | 541 | } |
553 | } | 542 | } |
554 | 543 | ||
544 | #if LJ_HASPROFILE | ||
545 | /* Put the chunkname into a buffer. */ | ||
546 | static int debug_putchunkname(SBuf *sb, GCproto *pt, int pathstrip) | ||
547 | { | ||
548 | GCstr *name = proto_chunkname(pt); | ||
549 | const char *p = strdata(name); | ||
550 | if (pt->firstline == ~(BCLine)0) { | ||
551 | lj_buf_putmem(sb, "[builtin:", 9); | ||
552 | lj_buf_putstr(sb, name); | ||
553 | lj_buf_putb(sb, ']'); | ||
554 | return 0; | ||
555 | } | ||
556 | if (*p == '=' || *p == '@') { | ||
557 | MSize len = name->len-1; | ||
558 | p++; | ||
559 | if (pathstrip) { | ||
560 | int i; | ||
561 | for (i = len-1; i >= 0; i--) | ||
562 | if (p[i] == '/' || p[i] == '\\') { | ||
563 | len -= i+1; | ||
564 | p = p+i+1; | ||
565 | break; | ||
566 | } | ||
567 | } | ||
568 | lj_buf_putmem(sb, p, len); | ||
569 | } else { | ||
570 | lj_buf_putmem(sb, "[string]", 8); | ||
571 | } | ||
572 | return 1; | ||
573 | } | ||
574 | |||
575 | /* Put a compact stack dump into a buffer. */ | ||
576 | void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt, int depth) | ||
577 | { | ||
578 | int level = 0, dir = 1, pathstrip = 1; | ||
579 | MSize lastlen = 0; | ||
580 | if (depth < 0) { level = ~depth; depth = dir = -1; } /* Reverse frames. */ | ||
581 | while (level != depth) { /* Loop through all frame. */ | ||
582 | int size; | ||
583 | cTValue *frame = lj_debug_frame(L, level, &size); | ||
584 | if (frame) { | ||
585 | cTValue *nextframe = size ? frame+size : NULL; | ||
586 | GCfunc *fn = frame_func(frame); | ||
587 | const uint8_t *p = (const uint8_t *)fmt; | ||
588 | int c; | ||
589 | while ((c = *p++)) { | ||
590 | switch (c) { | ||
591 | case 'p': /* Preserve full path. */ | ||
592 | pathstrip = 0; | ||
593 | break; | ||
594 | case 'F': case 'f': { /* Dump function name. */ | ||
595 | const char *name; | ||
596 | const char *what = lj_debug_funcname(L, frame, &name); | ||
597 | if (what) { | ||
598 | if (c == 'F' && isluafunc(fn)) { /* Dump module:name for 'F'. */ | ||
599 | GCproto *pt = funcproto(fn); | ||
600 | if (pt->firstline != ~(BCLine)0) { /* Not a bytecode builtin. */ | ||
601 | debug_putchunkname(sb, pt, pathstrip); | ||
602 | lj_buf_putb(sb, ':'); | ||
603 | } | ||
604 | } | ||
605 | lj_buf_putmem(sb, name, (MSize)strlen(name)); | ||
606 | break; | ||
607 | } /* else: can't derive a name, dump module:line. */ | ||
608 | } | ||
609 | /* fallthrough */ | ||
610 | case 'l': /* Dump module:line. */ | ||
611 | if (isluafunc(fn)) { | ||
612 | GCproto *pt = funcproto(fn); | ||
613 | if (debug_putchunkname(sb, pt, pathstrip)) { | ||
614 | /* Regular Lua function. */ | ||
615 | BCLine line = c == 'l' ? debug_frameline(L, fn, nextframe) : | ||
616 | pt->firstline; | ||
617 | lj_buf_putb(sb, ':'); | ||
618 | lj_strfmt_putint(sb, line >= 0 ? line : pt->firstline); | ||
619 | } | ||
620 | } else if (isffunc(fn)) { /* Dump numbered builtins. */ | ||
621 | lj_buf_putmem(sb, "[builtin#", 9); | ||
622 | lj_strfmt_putint(sb, fn->c.ffid); | ||
623 | lj_buf_putb(sb, ']'); | ||
624 | } else { /* Dump C function address. */ | ||
625 | lj_buf_putb(sb, '@'); | ||
626 | lj_strfmt_putptr(sb, fn->c.f); | ||
627 | } | ||
628 | break; | ||
629 | case 'Z': /* Zap trailing separator. */ | ||
630 | lastlen = sbuflen(sb); | ||
631 | break; | ||
632 | default: | ||
633 | lj_buf_putb(sb, c); | ||
634 | break; | ||
635 | } | ||
636 | } | ||
637 | } else if (dir == 1) { | ||
638 | break; | ||
639 | } else { | ||
640 | level -= size; /* Reverse frame order: quickly skip missing level. */ | ||
641 | } | ||
642 | level += dir; | ||
643 | } | ||
644 | if (lastlen) | ||
645 | setsbufP(sb, sbufB(sb) + lastlen); /* Zap trailing separator. */ | ||
646 | } | ||
647 | #endif | ||
648 | |||
555 | /* Number of frames for the leading and trailing part of a traceback. */ | 649 | /* Number of frames for the leading and trailing part of a traceback. */ |
556 | #define TRACEBACK_LEVELS1 12 | 650 | #define TRACEBACK_LEVELS1 12 |
557 | #define TRACEBACK_LEVELS2 10 | 651 | #define TRACEBACK_LEVELS2 10 |
diff --git a/src/lj_debug.h b/src/lj_debug.h index 75ea927c..cc7e93d2 100644 --- a/src/lj_debug.h +++ b/src/lj_debug.h | |||
@@ -32,14 +32,18 @@ LJ_FUNC const char *lj_debug_uvname(GCproto *pt, uint32_t idx); | |||
32 | LJ_FUNC const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp); | 32 | LJ_FUNC const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp); |
33 | LJ_FUNC const char *lj_debug_slotname(GCproto *pt, const BCIns *pc, | 33 | LJ_FUNC const char *lj_debug_slotname(GCproto *pt, const BCIns *pc, |
34 | BCReg slot, const char **name); | 34 | BCReg slot, const char **name); |
35 | LJ_FUNC const char *lj_debug_funcname(lua_State *L, TValue *frame, | 35 | LJ_FUNC const char *lj_debug_funcname(lua_State *L, cTValue *frame, |
36 | const char **name); | 36 | const char **name); |
37 | LJ_FUNC void lj_debug_shortname(char *out, GCstr *str); | 37 | LJ_FUNC void lj_debug_shortname(char *out, GCstr *str, BCLine line); |
38 | LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg, | 38 | LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg, |
39 | cTValue *frame, cTValue *nextframe); | 39 | cTValue *frame, cTValue *nextframe); |
40 | LJ_FUNC void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc); | 40 | LJ_FUNC void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc); |
41 | LJ_FUNC int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, | 41 | LJ_FUNC int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, |
42 | int ext); | 42 | int ext); |
43 | #if LJ_HASPROFILE | ||
44 | LJ_FUNC void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt, | ||
45 | int depth); | ||
46 | #endif | ||
43 | 47 | ||
44 | /* Fixed internal variable names. */ | 48 | /* Fixed internal variable names. */ |
45 | #define VARNAMEDEF(_) \ | 49 | #define VARNAMEDEF(_) \ |
diff --git a/src/lj_def.h b/src/lj_def.h index b5e26d69..75aaeb79 100644 --- a/src/lj_def.h +++ b/src/lj_def.h | |||
@@ -46,10 +46,14 @@ typedef unsigned int uintptr_t; | |||
46 | #include <stdlib.h> | 46 | #include <stdlib.h> |
47 | 47 | ||
48 | /* Various VM limits. */ | 48 | /* Various VM limits. */ |
49 | #define LJ_MAX_MEM 0x7fffff00 /* Max. total memory allocation. */ | 49 | #define LJ_MAX_MEM32 0x7fffff00 /* Max. 32 bit memory allocation. */ |
50 | #define LJ_MAX_MEM64 ((uint64_t)1<<47) /* Max. 64 bit memory allocation. */ | ||
51 | /* Max. total memory allocation. */ | ||
52 | #define LJ_MAX_MEM (LJ_GC64 ? LJ_MAX_MEM64 : LJ_MAX_MEM32) | ||
50 | #define LJ_MAX_ALLOC LJ_MAX_MEM /* Max. individual allocation length. */ | 53 | #define LJ_MAX_ALLOC LJ_MAX_MEM /* Max. individual allocation length. */ |
51 | #define LJ_MAX_STR LJ_MAX_MEM /* Max. string length. */ | 54 | #define LJ_MAX_STR LJ_MAX_MEM32 /* Max. string length. */ |
52 | #define LJ_MAX_UDATA LJ_MAX_MEM /* Max. userdata length. */ | 55 | #define LJ_MAX_BUF LJ_MAX_MEM32 /* Max. buffer length. */ |
56 | #define LJ_MAX_UDATA LJ_MAX_MEM32 /* Max. userdata length. */ | ||
53 | 57 | ||
54 | #define LJ_MAX_STRTAB (1<<26) /* Max. string table size. */ | 58 | #define LJ_MAX_STRTAB (1<<26) /* Max. string table size. */ |
55 | #define LJ_MAX_HBITS 26 /* Max. hash bits. */ | 59 | #define LJ_MAX_HBITS 26 /* Max. hash bits. */ |
@@ -57,7 +61,7 @@ typedef unsigned int uintptr_t; | |||
57 | #define LJ_MAX_ASIZE ((1<<(LJ_MAX_ABITS-1))+1) /* Max. array part size. */ | 61 | #define LJ_MAX_ASIZE ((1<<(LJ_MAX_ABITS-1))+1) /* Max. array part size. */ |
58 | #define LJ_MAX_COLOSIZE 16 /* Max. elems for colocated array. */ | 62 | #define LJ_MAX_COLOSIZE 16 /* Max. elems for colocated array. */ |
59 | 63 | ||
60 | #define LJ_MAX_LINE LJ_MAX_MEM /* Max. source code line number. */ | 64 | #define LJ_MAX_LINE LJ_MAX_MEM32 /* Max. source code line number. */ |
61 | #define LJ_MAX_XLEVEL 200 /* Max. syntactic nesting level. */ | 65 | #define LJ_MAX_XLEVEL 200 /* Max. syntactic nesting level. */ |
62 | #define LJ_MAX_BCINS (1<<26) /* Max. # of bytecode instructions. */ | 66 | #define LJ_MAX_BCINS (1<<26) /* Max. # of bytecode instructions. */ |
63 | #define LJ_MAX_SLOTS 250 /* Max. # of slots in a Lua func. */ | 67 | #define LJ_MAX_SLOTS 250 /* Max. # of slots in a Lua func. */ |
@@ -65,7 +69,7 @@ typedef unsigned int uintptr_t; | |||
65 | #define LJ_MAX_UPVAL 60 /* Max. # of upvalues. */ | 69 | #define LJ_MAX_UPVAL 60 /* Max. # of upvalues. */ |
66 | 70 | ||
67 | #define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */ | 71 | #define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */ |
68 | #define LJ_STACK_EXTRA 5 /* Extra stack space (metamethods). */ | 72 | #define LJ_STACK_EXTRA (5+2*LJ_FR2) /* Extra stack space (metamethods). */ |
69 | 73 | ||
70 | #define LJ_NUM_CBPAGE 1 /* Number of FFI callback pages. */ | 74 | #define LJ_NUM_CBPAGE 1 /* Number of FFI callback pages. */ |
71 | 75 | ||
@@ -76,7 +80,6 @@ typedef unsigned int uintptr_t; | |||
76 | #define LJ_MIN_SBUF 32 /* Min. string buffer length. */ | 80 | #define LJ_MIN_SBUF 32 /* Min. string buffer length. */ |
77 | #define LJ_MIN_VECSZ 8 /* Min. size for growable vectors. */ | 81 | #define LJ_MIN_VECSZ 8 /* Min. size for growable vectors. */ |
78 | #define LJ_MIN_IRSZ 32 /* Min. size for growable IR. */ | 82 | #define LJ_MIN_IRSZ 32 /* Min. size for growable IR. */ |
79 | #define LJ_MIN_K64SZ 16 /* Min. size for chained K64Array. */ | ||
80 | 83 | ||
81 | /* JIT compiler limits. */ | 84 | /* JIT compiler limits. */ |
82 | #define LJ_MAX_JSLOTS 250 /* Max. # of stack slots for a trace. */ | 85 | #define LJ_MAX_JSLOTS 250 /* Max. # of stack slots for a trace. */ |
@@ -91,6 +94,9 @@ typedef unsigned int uintptr_t; | |||
91 | #define U64x(hi, lo) (((uint64_t)0x##hi << 32) + (uint64_t)0x##lo) | 94 | #define U64x(hi, lo) (((uint64_t)0x##hi << 32) + (uint64_t)0x##lo) |
92 | #define i32ptr(p) ((int32_t)(intptr_t)(void *)(p)) | 95 | #define i32ptr(p) ((int32_t)(intptr_t)(void *)(p)) |
93 | #define u32ptr(p) ((uint32_t)(intptr_t)(void *)(p)) | 96 | #define u32ptr(p) ((uint32_t)(intptr_t)(void *)(p)) |
97 | #define i64ptr(p) ((int64_t)(intptr_t)(void *)(p)) | ||
98 | #define u64ptr(p) ((uint64_t)(intptr_t)(void *)(p)) | ||
99 | #define igcptr(p) (LJ_GC64 ? i64ptr(p) : i32ptr(p)) | ||
94 | 100 | ||
95 | #define checki8(x) ((x) == (int32_t)(int8_t)(x)) | 101 | #define checki8(x) ((x) == (int32_t)(int8_t)(x)) |
96 | #define checku8(x) ((x) == (int32_t)(uint8_t)(x)) | 102 | #define checku8(x) ((x) == (int32_t)(uint8_t)(x)) |
@@ -99,6 +105,8 @@ typedef unsigned int uintptr_t; | |||
99 | #define checki32(x) ((x) == (int32_t)(x)) | 105 | #define checki32(x) ((x) == (int32_t)(x)) |
100 | #define checku32(x) ((x) == (uint32_t)(x)) | 106 | #define checku32(x) ((x) == (uint32_t)(x)) |
101 | #define checkptr32(x) ((uintptr_t)(x) == (uint32_t)(uintptr_t)(x)) | 107 | #define checkptr32(x) ((uintptr_t)(x) == (uint32_t)(uintptr_t)(x)) |
108 | #define checkptr47(x) (((uint64_t)(uintptr_t)(x) >> 47) == 0) | ||
109 | #define checkptrGC(x) (LJ_GC64 ? checkptr47((x)) : LJ_64 ? checkptr32((x)) :1) | ||
102 | 110 | ||
103 | /* Every half-decent C compiler transforms this into a rotate instruction. */ | 111 | /* Every half-decent C compiler transforms this into a rotate instruction. */ |
104 | #define lj_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1)))) | 112 | #define lj_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1)))) |
diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c index 644e9028..8553438c 100644 --- a/src/lj_dispatch.c +++ b/src/lj_dispatch.c | |||
@@ -8,6 +8,7 @@ | |||
8 | 8 | ||
9 | #include "lj_obj.h" | 9 | #include "lj_obj.h" |
10 | #include "lj_err.h" | 10 | #include "lj_err.h" |
11 | #include "lj_buf.h" | ||
11 | #include "lj_func.h" | 12 | #include "lj_func.h" |
12 | #include "lj_str.h" | 13 | #include "lj_str.h" |
13 | #include "lj_tab.h" | 14 | #include "lj_tab.h" |
@@ -17,6 +18,7 @@ | |||
17 | #include "lj_frame.h" | 18 | #include "lj_frame.h" |
18 | #include "lj_bc.h" | 19 | #include "lj_bc.h" |
19 | #include "lj_ff.h" | 20 | #include "lj_ff.h" |
21 | #include "lj_strfmt.h" | ||
20 | #if LJ_HASJIT | 22 | #if LJ_HASJIT |
21 | #include "lj_jit.h" | 23 | #include "lj_jit.h" |
22 | #endif | 24 | #endif |
@@ -25,6 +27,9 @@ | |||
25 | #endif | 27 | #endif |
26 | #include "lj_trace.h" | 28 | #include "lj_trace.h" |
27 | #include "lj_dispatch.h" | 29 | #include "lj_dispatch.h" |
30 | #if LJ_HASPROFILE | ||
31 | #include "lj_profile.h" | ||
32 | #endif | ||
28 | #include "lj_vm.h" | 33 | #include "lj_vm.h" |
29 | #include "luajit.h" | 34 | #include "luajit.h" |
30 | 35 | ||
@@ -37,6 +42,12 @@ LJ_STATIC_ASSERT(GG_NUM_ASMFF == FF_NUM_ASMFUNC); | |||
37 | #include <math.h> | 42 | #include <math.h> |
38 | LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L, | 43 | LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L, |
39 | lua_State *co); | 44 | lua_State *co); |
45 | #if !LJ_HASJIT | ||
46 | #define lj_dispatch_stitch lj_dispatch_ins | ||
47 | #endif | ||
48 | #if !LJ_HASPROFILE | ||
49 | #define lj_dispatch_profile lj_dispatch_ins | ||
50 | #endif | ||
40 | 51 | ||
41 | #define GOTFUNC(name) (ASMFunction)name, | 52 | #define GOTFUNC(name) (ASMFunction)name, |
42 | static const ASMFunction dispatch_got[] = { | 53 | static const ASMFunction dispatch_got[] = { |
@@ -64,7 +75,7 @@ void lj_dispatch_init(GG_State *GG) | |||
64 | for (i = 0; i < GG_NUM_ASMFF; i++) | 75 | for (i = 0; i < GG_NUM_ASMFF; i++) |
65 | GG->bcff[i] = BCINS_AD(BC__MAX+i, 0, 0); | 76 | GG->bcff[i] = BCINS_AD(BC__MAX+i, 0, 0); |
66 | #if LJ_TARGET_MIPS | 77 | #if LJ_TARGET_MIPS |
67 | memcpy(GG->got, dispatch_got, LJ_GOT__MAX*4); | 78 | memcpy(GG->got, dispatch_got, LJ_GOT__MAX*sizeof(ASMFunction *)); |
68 | #endif | 79 | #endif |
69 | } | 80 | } |
70 | 81 | ||
@@ -82,11 +93,12 @@ void lj_dispatch_init_hotcount(global_State *g) | |||
82 | #endif | 93 | #endif |
83 | 94 | ||
84 | /* Internal dispatch mode bits. */ | 95 | /* Internal dispatch mode bits. */ |
85 | #define DISPMODE_JIT 0x01 /* JIT compiler on. */ | 96 | #define DISPMODE_CALL 0x01 /* Override call dispatch. */ |
86 | #define DISPMODE_REC 0x02 /* Recording active. */ | 97 | #define DISPMODE_RET 0x02 /* Override return dispatch. */ |
87 | #define DISPMODE_INS 0x04 /* Override instruction dispatch. */ | 98 | #define DISPMODE_INS 0x04 /* Override instruction dispatch. */ |
88 | #define DISPMODE_CALL 0x08 /* Override call dispatch. */ | 99 | #define DISPMODE_JIT 0x10 /* JIT compiler on. */ |
89 | #define DISPMODE_RET 0x10 /* Override return dispatch. */ | 100 | #define DISPMODE_REC 0x20 /* Recording active. */ |
101 | #define DISPMODE_PROF 0x40 /* Profiling active. */ | ||
90 | 102 | ||
91 | /* Update dispatch table depending on various flags. */ | 103 | /* Update dispatch table depending on various flags. */ |
92 | void lj_dispatch_update(global_State *g) | 104 | void lj_dispatch_update(global_State *g) |
@@ -98,6 +110,9 @@ void lj_dispatch_update(global_State *g) | |||
98 | mode |= G2J(g)->state != LJ_TRACE_IDLE ? | 110 | mode |= G2J(g)->state != LJ_TRACE_IDLE ? |
99 | (DISPMODE_REC|DISPMODE_INS|DISPMODE_CALL) : 0; | 111 | (DISPMODE_REC|DISPMODE_INS|DISPMODE_CALL) : 0; |
100 | #endif | 112 | #endif |
113 | #if LJ_HASPROFILE | ||
114 | mode |= (g->hookmask & HOOK_PROFILE) ? (DISPMODE_PROF|DISPMODE_INS) : 0; | ||
115 | #endif | ||
101 | mode |= (g->hookmask & (LUA_MASKLINE|LUA_MASKCOUNT)) ? DISPMODE_INS : 0; | 116 | mode |= (g->hookmask & (LUA_MASKLINE|LUA_MASKCOUNT)) ? DISPMODE_INS : 0; |
102 | mode |= (g->hookmask & LUA_MASKCALL) ? DISPMODE_CALL : 0; | 117 | mode |= (g->hookmask & LUA_MASKCALL) ? DISPMODE_CALL : 0; |
103 | mode |= (g->hookmask & LUA_MASKRET) ? DISPMODE_RET : 0; | 118 | mode |= (g->hookmask & LUA_MASKRET) ? DISPMODE_RET : 0; |
@@ -126,9 +141,9 @@ void lj_dispatch_update(global_State *g) | |||
126 | disp[GG_LEN_DDISP+BC_LOOP] = f_loop; | 141 | disp[GG_LEN_DDISP+BC_LOOP] = f_loop; |
127 | 142 | ||
128 | /* Set dynamic instruction dispatch. */ | 143 | /* Set dynamic instruction dispatch. */ |
129 | if ((oldmode ^ mode) & (DISPMODE_REC|DISPMODE_INS)) { | 144 | if ((oldmode ^ mode) & (DISPMODE_PROF|DISPMODE_REC|DISPMODE_INS)) { |
130 | /* Need to update the whole table. */ | 145 | /* Need to update the whole table. */ |
131 | if (!(mode & (DISPMODE_REC|DISPMODE_INS))) { /* No ins dispatch? */ | 146 | if (!(mode & DISPMODE_INS)) { /* No ins dispatch? */ |
132 | /* Copy static dispatch table to dynamic dispatch table. */ | 147 | /* Copy static dispatch table to dynamic dispatch table. */ |
133 | memcpy(&disp[0], &disp[GG_LEN_DDISP], GG_LEN_SDISP*sizeof(ASMFunction)); | 148 | memcpy(&disp[0], &disp[GG_LEN_DDISP], GG_LEN_SDISP*sizeof(ASMFunction)); |
134 | /* Overwrite with dynamic return dispatch. */ | 149 | /* Overwrite with dynamic return dispatch. */ |
@@ -140,12 +155,13 @@ void lj_dispatch_update(global_State *g) | |||
140 | } | 155 | } |
141 | } else { | 156 | } else { |
142 | /* The recording dispatch also checks for hooks. */ | 157 | /* The recording dispatch also checks for hooks. */ |
143 | ASMFunction f = (mode & DISPMODE_REC) ? lj_vm_record : lj_vm_inshook; | 158 | ASMFunction f = (mode & DISPMODE_PROF) ? lj_vm_profhook : |
159 | (mode & DISPMODE_REC) ? lj_vm_record : lj_vm_inshook; | ||
144 | uint32_t i; | 160 | uint32_t i; |
145 | for (i = 0; i < GG_LEN_SDISP; i++) | 161 | for (i = 0; i < GG_LEN_SDISP; i++) |
146 | disp[i] = f; | 162 | disp[i] = f; |
147 | } | 163 | } |
148 | } else if (!(mode & (DISPMODE_REC|DISPMODE_INS))) { | 164 | } else if (!(mode & DISPMODE_INS)) { |
149 | /* Otherwise set dynamic counting ins. */ | 165 | /* Otherwise set dynamic counting ins. */ |
150 | disp[BC_FORL] = f_forl; | 166 | disp[BC_FORL] = f_forl; |
151 | disp[BC_ITERL] = f_iterl; | 167 | disp[BC_ITERL] = f_iterl; |
@@ -251,7 +267,7 @@ int luaJIT_setmode(lua_State *L, int idx, int mode) | |||
251 | case LUAJIT_MODE_FUNC: | 267 | case LUAJIT_MODE_FUNC: |
252 | case LUAJIT_MODE_ALLFUNC: | 268 | case LUAJIT_MODE_ALLFUNC: |
253 | case LUAJIT_MODE_ALLSUBFUNC: { | 269 | case LUAJIT_MODE_ALLSUBFUNC: { |
254 | cTValue *tv = idx == 0 ? frame_prev(L->base-1) : | 270 | cTValue *tv = idx == 0 ? frame_prev(L->base-1)-LJ_FR2 : |
255 | idx > 0 ? L->base + (idx-1) : L->top + idx; | 271 | idx > 0 ? L->base + (idx-1) : L->top + idx; |
256 | GCproto *pt; | 272 | GCproto *pt; |
257 | if ((idx == 0 || tvisfunc(tv)) && isluafunc(&gcval(tv)->fn)) | 273 | if ((idx == 0 || tvisfunc(tv)) && isluafunc(&gcval(tv)->fn)) |
@@ -352,10 +368,19 @@ static void callhook(lua_State *L, int event, BCLine line) | |||
352 | /* Top frame, nextframe = NULL. */ | 368 | /* Top frame, nextframe = NULL. */ |
353 | ar.i_ci = (int)((L->base-1) - tvref(L->stack)); | 369 | ar.i_ci = (int)((L->base-1) - tvref(L->stack)); |
354 | lj_state_checkstack(L, 1+LUA_MINSTACK); | 370 | lj_state_checkstack(L, 1+LUA_MINSTACK); |
371 | #if LJ_HASPROFILE && !LJ_PROFILE_SIGPROF | ||
372 | lj_profile_hook_enter(g); | ||
373 | #else | ||
355 | hook_enter(g); | 374 | hook_enter(g); |
375 | #endif | ||
356 | hookf(L, &ar); | 376 | hookf(L, &ar); |
357 | lua_assert(hook_active(g)); | 377 | lua_assert(hook_active(g)); |
378 | setgcref(g->cur_L, obj2gco(L)); | ||
379 | #if LJ_HASPROFILE && !LJ_PROFILE_SIGPROF | ||
380 | lj_profile_hook_leave(g); | ||
381 | #else | ||
358 | hook_leave(g); | 382 | hook_leave(g); |
383 | #endif | ||
359 | } | 384 | } |
360 | } | 385 | } |
361 | 386 | ||
@@ -368,7 +393,7 @@ static BCReg cur_topslot(GCproto *pt, const BCIns *pc, uint32_t nres) | |||
368 | if (bc_op(ins) == BC_UCLO) | 393 | if (bc_op(ins) == BC_UCLO) |
369 | ins = pc[bc_j(ins)]; | 394 | ins = pc[bc_j(ins)]; |
370 | switch (bc_op(ins)) { | 395 | switch (bc_op(ins)) { |
371 | case BC_CALLM: case BC_CALLMT: return bc_a(ins) + bc_c(ins) + nres-1+1; | 396 | case BC_CALLM: case BC_CALLMT: return bc_a(ins) + bc_c(ins) + nres-1+1+LJ_FR2; |
372 | case BC_RETM: return bc_a(ins) + bc_d(ins) + nres-1; | 397 | case BC_RETM: return bc_a(ins) + bc_d(ins) + nres-1; |
373 | case BC_TSETM: return bc_a(ins) + nres-1; | 398 | case BC_TSETM: return bc_a(ins) + nres-1; |
374 | default: return pt->framesize; | 399 | default: return pt->framesize; |
@@ -492,3 +517,41 @@ out: | |||
492 | return makeasmfunc(lj_bc_ofs[op]); /* Return static dispatch target. */ | 517 | return makeasmfunc(lj_bc_ofs[op]); /* Return static dispatch target. */ |
493 | } | 518 | } |
494 | 519 | ||
520 | #if LJ_HASJIT | ||
521 | /* Stitch a new trace. */ | ||
522 | void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc) | ||
523 | { | ||
524 | ERRNO_SAVE | ||
525 | lua_State *L = J->L; | ||
526 | void *cf = cframe_raw(L->cframe); | ||
527 | const BCIns *oldpc = cframe_pc(cf); | ||
528 | setcframe_pc(cf, pc); | ||
529 | /* Before dispatch, have to bias PC by 1. */ | ||
530 | L->top = L->base + cur_topslot(curr_proto(L), pc+1, cframe_multres_n(cf)); | ||
531 | lj_trace_stitch(J, pc-1); /* Point to the CALL instruction. */ | ||
532 | setcframe_pc(cf, oldpc); | ||
533 | ERRNO_RESTORE | ||
534 | } | ||
535 | #endif | ||
536 | |||
537 | #if LJ_HASPROFILE | ||
538 | /* Profile dispatch. */ | ||
539 | void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc) | ||
540 | { | ||
541 | ERRNO_SAVE | ||
542 | GCfunc *fn = curr_func(L); | ||
543 | GCproto *pt = funcproto(fn); | ||
544 | void *cf = cframe_raw(L->cframe); | ||
545 | const BCIns *oldpc = cframe_pc(cf); | ||
546 | global_State *g; | ||
547 | setcframe_pc(cf, pc); | ||
548 | L->top = L->base + cur_topslot(pt, pc, cframe_multres_n(cf)); | ||
549 | lj_profile_interpreter(L); | ||
550 | setcframe_pc(cf, oldpc); | ||
551 | g = G(L); | ||
552 | setgcref(g->cur_L, obj2gco(L)); | ||
553 | setvmstate(g, INTERP); | ||
554 | ERRNO_RESTORE | ||
555 | } | ||
556 | #endif | ||
557 | |||
diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h index 17bf93da..4ea6e85d 100644 --- a/src/lj_dispatch.h +++ b/src/lj_dispatch.h | |||
@@ -14,6 +14,22 @@ | |||
14 | 14 | ||
15 | #if LJ_TARGET_MIPS | 15 | #if LJ_TARGET_MIPS |
16 | /* Need our own global offset table for the dreaded MIPS calling conventions. */ | 16 | /* Need our own global offset table for the dreaded MIPS calling conventions. */ |
17 | |||
18 | #ifndef _LJ_VM_H | ||
19 | LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b); | ||
20 | #endif | ||
21 | |||
22 | #if LJ_SOFTFP | ||
23 | #ifndef _LJ_IRCALL_H | ||
24 | extern double __adddf3(double a, double b); | ||
25 | extern double __subdf3(double a, double b); | ||
26 | extern double __muldf3(double a, double b); | ||
27 | extern double __divdf3(double a, double b); | ||
28 | #endif | ||
29 | #define SFGOTDEF(_) _(sqrt) _(__adddf3) _(__subdf3) _(__muldf3) _(__divdf3) | ||
30 | #else | ||
31 | #define SFGOTDEF(_) | ||
32 | #endif | ||
17 | #if LJ_HASJIT | 33 | #if LJ_HASJIT |
18 | #define JITGOTDEF(_) _(lj_trace_exit) _(lj_trace_hot) | 34 | #define JITGOTDEF(_) _(lj_trace_exit) _(lj_trace_hot) |
19 | #else | 35 | #else |
@@ -28,16 +44,19 @@ | |||
28 | #define GOTDEF(_) \ | 44 | #define GOTDEF(_) \ |
29 | _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \ | 45 | _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \ |
30 | _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \ | 46 | _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \ |
31 | _(pow) _(fmod) _(ldexp) \ | 47 | _(pow) _(fmod) _(ldexp) _(lj_vm_modi) \ |
32 | _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_err_throw) \ | 48 | _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_stitch) \ |
49 | _(lj_dispatch_profile) _(lj_err_throw) \ | ||
33 | _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \ | 50 | _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \ |
34 | _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \ | 51 | _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \ |
35 | _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \ | 52 | _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \ |
36 | _(lj_meta_for) _(lj_meta_len) _(lj_meta_tget) _(lj_meta_tset) \ | 53 | _(lj_meta_for) _(lj_meta_istype) _(lj_meta_len) _(lj_meta_tget) \ |
37 | _(lj_state_growstack) _(lj_str_fromnum) _(lj_str_fromnumber) _(lj_str_new) \ | 54 | _(lj_meta_tset) _(lj_state_growstack) _(lj_strfmt_number) \ |
38 | _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) _(lj_tab_new) \ | 55 | _(lj_str_new) _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) \ |
39 | _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \ | 56 | _(lj_tab_new) _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \ |
40 | JITGOTDEF(_) FFIGOTDEF(_) | 57 | _(lj_tab_setinth) _(lj_buf_putstr_reverse) _(lj_buf_putstr_lower) \ |
58 | _(lj_buf_putstr_upper) _(lj_buf_tostr) \ | ||
59 | JITGOTDEF(_) FFIGOTDEF(_) SFGOTDEF(_) | ||
41 | 60 | ||
42 | enum { | 61 | enum { |
43 | #define GOTENUM(name) LJ_GOT_##name, | 62 | #define GOTENUM(name) LJ_GOT_##name, |
@@ -60,7 +79,7 @@ typedef uint16_t HotCount; | |||
60 | #define HOTCOUNT_CALL 1 | 79 | #define HOTCOUNT_CALL 1 |
61 | 80 | ||
62 | /* This solves a circular dependency problem -- bump as needed. Sigh. */ | 81 | /* This solves a circular dependency problem -- bump as needed. Sigh. */ |
63 | #define GG_NUM_ASMFF 62 | 82 | #define GG_NUM_ASMFF 57 |
64 | 83 | ||
65 | #define GG_LEN_DDISP (BC__MAX + GG_NUM_ASMFF) | 84 | #define GG_LEN_DDISP (BC__MAX + GG_NUM_ASMFF) |
66 | #define GG_LEN_SDISP BC_FUNCF | 85 | #define GG_LEN_SDISP BC_FUNCF |
@@ -88,6 +107,7 @@ typedef struct GG_State { | |||
88 | #define J2G(J) (&J2GG(J)->g) | 107 | #define J2G(J) (&J2GG(J)->g) |
89 | #define G2J(gl) (&G2GG(gl)->J) | 108 | #define G2J(gl) (&G2GG(gl)->J) |
90 | #define L2J(L) (&L2GG(L)->J) | 109 | #define L2J(L) (&L2GG(L)->J) |
110 | #define GG_G2J (GG_OFS(J) - GG_OFS(g)) | ||
91 | #define GG_G2DISP (GG_OFS(dispatch) - GG_OFS(g)) | 111 | #define GG_G2DISP (GG_OFS(dispatch) - GG_OFS(g)) |
92 | #define GG_DISP2G (GG_OFS(g) - GG_OFS(dispatch)) | 112 | #define GG_DISP2G (GG_OFS(g) - GG_OFS(dispatch)) |
93 | #define GG_DISP2J (GG_OFS(J) - GG_OFS(dispatch)) | 113 | #define GG_DISP2J (GG_OFS(J) - GG_OFS(dispatch)) |
@@ -109,7 +129,12 @@ LJ_FUNC void lj_dispatch_update(global_State *g); | |||
109 | /* Instruction dispatch callback for hooks or when recording. */ | 129 | /* Instruction dispatch callback for hooks or when recording. */ |
110 | LJ_FUNCA void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc); | 130 | LJ_FUNCA void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc); |
111 | LJ_FUNCA ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns*pc); | 131 | LJ_FUNCA ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns*pc); |
112 | LJ_FUNCA void LJ_FASTCALL lj_dispatch_return(lua_State *L, const BCIns *pc); | 132 | #if LJ_HASJIT |
133 | LJ_FUNCA void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc); | ||
134 | #endif | ||
135 | #if LJ_HASPROFILE | ||
136 | LJ_FUNCA void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc); | ||
137 | #endif | ||
113 | 138 | ||
114 | #if LJ_HASFFI && !defined(_BUILDVM_H) | 139 | #if LJ_HASFFI && !defined(_BUILDVM_H) |
115 | /* Save/restore errno and GetLastError() around hooks, exits and recording. */ | 140 | /* Save/restore errno and GetLastError() around hooks, exits and recording. */ |
diff --git a/src/lj_emit_arm.h b/src/lj_emit_arm.h index 6a136e51..25561549 100644 --- a/src/lj_emit_arm.h +++ b/src/lj_emit_arm.h | |||
@@ -207,7 +207,7 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) | |||
207 | 207 | ||
208 | #define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) | 208 | #define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) |
209 | 209 | ||
210 | static Reg ra_allock(ASMState *as, int32_t k, RegSet allow); | 210 | static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); |
211 | 211 | ||
212 | /* Get/set from constant pointer. */ | 212 | /* Get/set from constant pointer. */ |
213 | static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p) | 213 | static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p) |
@@ -219,8 +219,9 @@ static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p) | |||
219 | 219 | ||
220 | #if !LJ_SOFTFP | 220 | #if !LJ_SOFTFP |
221 | /* Load a number constant into an FPR. */ | 221 | /* Load a number constant into an FPR. */ |
222 | static void emit_loadn(ASMState *as, Reg r, cTValue *tv) | 222 | static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) |
223 | { | 223 | { |
224 | cTValue *tv = ir_knum(ir); | ||
224 | int32_t i; | 225 | int32_t i; |
225 | if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) { | 226 | if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) { |
226 | uint32_t hi = tv->u32.hi; | 227 | uint32_t hi = tv->u32.hi; |
@@ -308,30 +309,30 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) | |||
308 | emit_dm(as, ARMI_MOV, dst, src); | 309 | emit_dm(as, ARMI_MOV, dst, src); |
309 | } | 310 | } |
310 | 311 | ||
311 | /* Generic load of register from stack slot. */ | 312 | /* Generic load of register with base and (small) offset address. */ |
312 | static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) | 313 | static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) |
313 | { | 314 | { |
314 | #if LJ_SOFTFP | 315 | #if LJ_SOFTFP |
315 | lua_assert(!irt_isnum(ir->t)); UNUSED(ir); | 316 | lua_assert(!irt_isnum(ir->t)); UNUSED(ir); |
316 | #else | 317 | #else |
317 | if (r >= RID_MAX_GPR) | 318 | if (r >= RID_MAX_GPR) |
318 | emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, RID_SP, ofs); | 319 | emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, base, ofs); |
319 | else | 320 | else |
320 | #endif | 321 | #endif |
321 | emit_lso(as, ARMI_LDR, r, RID_SP, ofs); | 322 | emit_lso(as, ARMI_LDR, r, base, ofs); |
322 | } | 323 | } |
323 | 324 | ||
324 | /* Generic store of register to stack slot. */ | 325 | /* Generic store of register with base and (small) offset address. */ |
325 | static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) | 326 | static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) |
326 | { | 327 | { |
327 | #if LJ_SOFTFP | 328 | #if LJ_SOFTFP |
328 | lua_assert(!irt_isnum(ir->t)); UNUSED(ir); | 329 | lua_assert(!irt_isnum(ir->t)); UNUSED(ir); |
329 | #else | 330 | #else |
330 | if (r >= RID_MAX_GPR) | 331 | if (r >= RID_MAX_GPR) |
331 | emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, RID_SP, ofs); | 332 | emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, base, ofs); |
332 | else | 333 | else |
333 | #endif | 334 | #endif |
334 | emit_lso(as, ARMI_STR, r, RID_SP, ofs); | 335 | emit_lso(as, ARMI_STR, r, base, ofs); |
335 | } | 336 | } |
336 | 337 | ||
337 | /* Emit an arithmetic/logic operation with a constant operand. */ | 338 | /* Emit an arithmetic/logic operation with a constant operand. */ |
diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h new file mode 100644 index 00000000..f09c0f3a --- /dev/null +++ b/src/lj_emit_arm64.h | |||
@@ -0,0 +1,419 @@ | |||
1 | /* | ||
2 | ** ARM64 instruction emitter. | ||
3 | ** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** | ||
5 | ** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. | ||
6 | ** Sponsored by Cisco Systems, Inc. | ||
7 | */ | ||
8 | |||
9 | /* -- Constant encoding --------------------------------------------------- */ | ||
10 | |||
11 | static uint64_t get_k64val(IRIns *ir) | ||
12 | { | ||
13 | if (ir->o == IR_KINT64) { | ||
14 | return ir_kint64(ir)->u64; | ||
15 | } else if (ir->o == IR_KGC) { | ||
16 | return (uint64_t)ir_kgc(ir); | ||
17 | } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { | ||
18 | return (uint64_t)ir_kptr(ir); | ||
19 | } else { | ||
20 | lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL); | ||
21 | return ir->i; /* Sign-extended. */ | ||
22 | } | ||
23 | } | ||
24 | |||
25 | /* Encode constant in K12 format for data processing instructions. */ | ||
26 | static uint32_t emit_isk12(int64_t n) | ||
27 | { | ||
28 | uint64_t k = (n < 0) ? -n : n; | ||
29 | uint32_t m = (n < 0) ? 0x40000000 : 0; | ||
30 | if (k < 0x1000) { | ||
31 | return A64I_K12|m|A64F_U12(k); | ||
32 | } else if ((k & 0xfff000) == k) { | ||
33 | return A64I_K12|m|0x400000|A64F_U12(k>>12); | ||
34 | } | ||
35 | return 0; | ||
36 | } | ||
37 | |||
38 | #define emit_clz64(n) __builtin_clzll(n) | ||
39 | #define emit_ctz64(n) __builtin_ctzll(n) | ||
40 | |||
41 | /* Encode constant in K13 format for logical data processing instructions. */ | ||
42 | static uint32_t emit_isk13(uint64_t n, int is64) | ||
43 | { | ||
44 | int inv = 0, w = 128, lz, tz; | ||
45 | if (n & 1) { n = ~n; w = 64; inv = 1; } /* Avoid wrap-around of ones. */ | ||
46 | if (!n) return 0; /* Neither all-zero nor all-ones are allowed. */ | ||
47 | do { /* Find the repeat width. */ | ||
48 | if (is64 && (uint32_t)(n^(n>>32))) break; | ||
49 | n = (uint32_t)n; | ||
50 | if (!n) return 0; /* Ditto when passing n=0xffffffff and is64=0. */ | ||
51 | w = 32; if ((n^(n>>16)) & 0xffff) break; | ||
52 | n = n & 0xffff; w = 16; if ((n^(n>>8)) & 0xff) break; | ||
53 | n = n & 0xff; w = 8; if ((n^(n>>4)) & 0xf) break; | ||
54 | n = n & 0xf; w = 4; if ((n^(n>>2)) & 0x3) break; | ||
55 | n = n & 0x3; w = 2; | ||
56 | } while (0); | ||
57 | lz = emit_clz64(n); | ||
58 | tz = emit_ctz64(n); | ||
59 | if ((int64_t)(n << lz) >> (lz+tz) != -1ll) return 0; /* Non-contiguous? */ | ||
60 | if (inv) | ||
61 | return A64I_K13 | (((lz-w) & 127) << 16) | (((lz+tz-w-1) & 63) << 10); | ||
62 | else | ||
63 | return A64I_K13 | ((w-tz) << 16) | (((63-lz-tz-w-w) & 63) << 10); | ||
64 | } | ||
65 | |||
66 | static uint32_t emit_isfpk64(uint64_t n) | ||
67 | { | ||
68 | uint64_t etop9 = ((n >> 54) & 0x1ff); | ||
69 | if ((n << 16) == 0 && (etop9 == 0x100 || etop9 == 0x0ff)) { | ||
70 | return (uint32_t)(((n >> 48) & 0x7f) | ((n >> 56) & 0x80)); | ||
71 | } | ||
72 | return ~0u; | ||
73 | } | ||
74 | |||
75 | /* -- Emit basic instructions --------------------------------------------- */ | ||
76 | |||
77 | static void emit_dnma(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm, Reg ra) | ||
78 | { | ||
79 | *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm) | A64F_A(ra); | ||
80 | } | ||
81 | |||
82 | static void emit_dnm(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm) | ||
83 | { | ||
84 | *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm); | ||
85 | } | ||
86 | |||
87 | static void emit_dm(ASMState *as, A64Ins ai, Reg rd, Reg rm) | ||
88 | { | ||
89 | *--as->mcp = ai | A64F_D(rd) | A64F_M(rm); | ||
90 | } | ||
91 | |||
92 | static void emit_dn(ASMState *as, A64Ins ai, Reg rd, Reg rn) | ||
93 | { | ||
94 | *--as->mcp = ai | A64F_D(rd) | A64F_N(rn); | ||
95 | } | ||
96 | |||
97 | static void emit_nm(ASMState *as, A64Ins ai, Reg rn, Reg rm) | ||
98 | { | ||
99 | *--as->mcp = ai | A64F_N(rn) | A64F_M(rm); | ||
100 | } | ||
101 | |||
102 | static void emit_d(ASMState *as, A64Ins ai, Reg rd) | ||
103 | { | ||
104 | *--as->mcp = ai | A64F_D(rd); | ||
105 | } | ||
106 | |||
107 | static void emit_n(ASMState *as, A64Ins ai, Reg rn) | ||
108 | { | ||
109 | *--as->mcp = ai | A64F_N(rn); | ||
110 | } | ||
111 | |||
112 | static int emit_checkofs(A64Ins ai, int64_t ofs) | ||
113 | { | ||
114 | int scale = (ai >> 30) & 3; | ||
115 | if (ofs < 0 || (ofs & ((1<<scale)-1))) { | ||
116 | return (ofs >= -256 && ofs <= 255) ? -1 : 0; | ||
117 | } else { | ||
118 | return (ofs < (4096<<scale)) ? 1 : 0; | ||
119 | } | ||
120 | } | ||
121 | |||
122 | static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs) | ||
123 | { | ||
124 | int ot = emit_checkofs(ai, ofs), sc = (ai >> 30) & 3; | ||
125 | lua_assert(ot); | ||
126 | /* Combine LDR/STR pairs to LDP/STP. */ | ||
127 | if ((sc == 2 || sc == 3) && | ||
128 | (!(ai & 0x400000) || rd != rn) && | ||
129 | as->mcp != as->mcloop) { | ||
130 | uint32_t prev = *as->mcp & ~A64F_D(31); | ||
131 | int ofsm = ofs - (1<<sc), ofsp = ofs + (1<<sc); | ||
132 | A64Ins aip; | ||
133 | if (prev == (ai | A64F_N(rn) | A64F_U12(ofsm>>sc)) || | ||
134 | prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsm&0x1ff))) { | ||
135 | aip = (A64F_A(rd) | A64F_D(*as->mcp & 31)); | ||
136 | } else if (prev == (ai | A64F_N(rn) | A64F_U12(ofsp>>sc)) || | ||
137 | prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsp&0x1ff))) { | ||
138 | aip = (A64F_D(rd) | A64F_A(*as->mcp & 31)); | ||
139 | ofsm = ofs; | ||
140 | } else { | ||
141 | goto nopair; | ||
142 | } | ||
143 | if (ofsm >= (int)((unsigned int)-64<<sc) && ofsm <= (63<<sc)) { | ||
144 | *as->mcp = aip | A64F_N(rn) | ((ofsm >> sc) << 15) | | ||
145 | (ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000)); | ||
146 | return; | ||
147 | } | ||
148 | } | ||
149 | nopair: | ||
150 | if (ot == 1) | ||
151 | *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_U12(ofs >> sc); | ||
152 | else | ||
153 | *--as->mcp = (ai^A64I_LS_U) | A64F_D(rd) | A64F_N(rn) | A64F_S9(ofs & 0x1ff); | ||
154 | } | ||
155 | |||
156 | /* -- Emit loads/stores --------------------------------------------------- */ | ||
157 | |||
158 | /* Prefer rematerialization of BASE/L from global_State over spills. */ | ||
159 | #define emit_canremat(ref) ((ref) <= ASMREF_L) | ||
160 | |||
161 | /* Try to find an N-step delta relative to other consts with N < lim. */ | ||
162 | static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim) | ||
163 | { | ||
164 | RegSet work = ~as->freeset & RSET_GPR; | ||
165 | if (lim <= 1) return 0; /* Can't beat that. */ | ||
166 | while (work) { | ||
167 | Reg r = rset_picktop(work); | ||
168 | IRRef ref = regcost_ref(as->cost[r]); | ||
169 | lua_assert(r != rd); | ||
170 | if (ref < REF_TRUE) { | ||
171 | uint64_t kx = ra_iskref(ref) ? (uint64_t)ra_krefk(as, ref) : | ||
172 | get_k64val(IR(ref)); | ||
173 | int64_t delta = (int64_t)(k - kx); | ||
174 | if (delta == 0) { | ||
175 | emit_dm(as, A64I_MOVx, rd, r); | ||
176 | return 1; | ||
177 | } else { | ||
178 | uint32_t k12 = emit_isk12(delta < 0 ? -delta : delta); | ||
179 | if (k12) { | ||
180 | emit_dn(as, (delta < 0 ? A64I_SUBx : A64I_ADDx)^k12, rd, r); | ||
181 | return 1; | ||
182 | } | ||
183 | /* Do other ops or multi-step deltas pay off? Probably not. | ||
184 | ** E.g. XOR rarely helps with pointer consts. | ||
185 | */ | ||
186 | } | ||
187 | } | ||
188 | rset_clear(work, r); | ||
189 | } | ||
190 | return 0; /* Failed. */ | ||
191 | } | ||
192 | |||
193 | static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64) | ||
194 | { | ||
195 | uint32_t k13 = emit_isk13(u64, is64); | ||
196 | if (k13) { /* Can the constant be represented as a bitmask immediate? */ | ||
197 | emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO); | ||
198 | } else { | ||
199 | int i, zeros = 0, ones = 0, neg; | ||
200 | if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */ | ||
201 | /* Count homogeneous 16 bit fragments. */ | ||
202 | for (i = 0; i < 4; i++) { | ||
203 | uint64_t frag = (u64 >> i*16) & 0xffff; | ||
204 | zeros += (frag == 0); | ||
205 | ones += (frag == 0xffff); | ||
206 | } | ||
207 | neg = ones > zeros; /* Use MOVN if it pays off. */ | ||
208 | if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) { | ||
209 | int shift = 0, lshift = 0; | ||
210 | uint64_t n64 = neg ? ~u64 : u64; | ||
211 | if (n64 != 0) { | ||
212 | /* Find first/last fragment to be filled. */ | ||
213 | shift = (63-emit_clz64(n64)) & ~15; | ||
214 | lshift = emit_ctz64(n64) & ~15; | ||
215 | } | ||
216 | /* MOVK requires the original value (u64). */ | ||
217 | while (shift > lshift) { | ||
218 | uint32_t u16 = (u64 >> shift) & 0xffff; | ||
219 | /* Skip fragments that are correctly filled by MOVN/MOVZ. */ | ||
220 | if (u16 != (neg ? 0xffff : 0)) | ||
221 | emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd); | ||
222 | shift -= 16; | ||
223 | } | ||
224 | /* But MOVN needs an inverted value (n64). */ | ||
225 | emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) | | ||
226 | A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd); | ||
227 | } | ||
228 | } | ||
229 | } | ||
230 | |||
231 | /* Load a 32 bit constant into a GPR. */ | ||
232 | #define emit_loadi(as, rd, i) emit_loadk(as, rd, i, 0) | ||
233 | |||
234 | /* Load a 64 bit constant into a GPR. */ | ||
235 | #define emit_loadu64(as, rd, i) emit_loadk(as, rd, i, A64I_X) | ||
236 | |||
237 | #define emit_loada(as, r, addr) emit_loadu64(as, (r), (uintptr_t)(addr)) | ||
238 | |||
239 | #define glofs(as, k) \ | ||
240 | ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g)) | ||
241 | #define mcpofs(as, k) \ | ||
242 | ((intptr_t)((uintptr_t)(k) - (uintptr_t)(as->mcp - 1))) | ||
243 | #define checkmcpofs(as, k) \ | ||
244 | (A64F_S_OK(mcpofs(as, k)>>2, 19)) | ||
245 | |||
246 | static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); | ||
247 | |||
248 | /* Get/set from constant pointer. */ | ||
249 | static void emit_lsptr(ASMState *as, A64Ins ai, Reg r, void *p) | ||
250 | { | ||
251 | /* First, check if ip + offset is in range. */ | ||
252 | if ((ai & 0x00400000) && checkmcpofs(as, p)) { | ||
253 | emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, p)>>2), r); | ||
254 | } else { | ||
255 | Reg base = RID_GL; /* Next, try GL + offset. */ | ||
256 | int64_t ofs = glofs(as, p); | ||
257 | if (!emit_checkofs(ai, ofs)) { /* Else split up into base reg + offset. */ | ||
258 | int64_t i64 = i64ptr(p); | ||
259 | base = ra_allock(as, (i64 & ~0x7fffull), rset_exclude(RSET_GPR, r)); | ||
260 | ofs = i64 & 0x7fffull; | ||
261 | } | ||
262 | emit_lso(as, ai, r, base, ofs); | ||
263 | } | ||
264 | } | ||
265 | |||
266 | /* Load 64 bit IR constant into register. */ | ||
267 | static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) | ||
268 | { | ||
269 | const uint64_t *k = &ir_k64(ir)->u64; | ||
270 | int64_t ofs; | ||
271 | if (r >= RID_MAX_GPR) { | ||
272 | uint32_t fpk = emit_isfpk64(*k); | ||
273 | if (fpk != ~0u) { | ||
274 | emit_d(as, A64I_FMOV_DI | A64F_FP8(fpk), (r & 31)); | ||
275 | return; | ||
276 | } | ||
277 | } | ||
278 | ofs = glofs(as, k); | ||
279 | if (emit_checkofs(A64I_LDRx, ofs)) { | ||
280 | emit_lso(as, r >= RID_MAX_GPR ? A64I_LDRd : A64I_LDRx, | ||
281 | (r & 31), RID_GL, ofs); | ||
282 | } else { | ||
283 | if (r >= RID_MAX_GPR) { | ||
284 | emit_dn(as, A64I_FMOV_D_R, (r & 31), RID_TMP); | ||
285 | r = RID_TMP; | ||
286 | } | ||
287 | if (checkmcpofs(as, k)) | ||
288 | emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, k)>>2), r); | ||
289 | else | ||
290 | emit_loadu64(as, r, *k); | ||
291 | } | ||
292 | } | ||
293 | |||
294 | /* Get/set global_State fields. */ | ||
295 | #define emit_getgl(as, r, field) \ | ||
296 | emit_lsptr(as, A64I_LDRx, (r), (void *)&J2G(as->J)->field) | ||
297 | #define emit_setgl(as, r, field) \ | ||
298 | emit_lsptr(as, A64I_STRx, (r), (void *)&J2G(as->J)->field) | ||
299 | |||
300 | /* Trace number is determined from pc of exit instruction. */ | ||
301 | #define emit_setvmstate(as, i) UNUSED(i) | ||
302 | |||
303 | /* -- Emit control-flow instructions -------------------------------------- */ | ||
304 | |||
305 | /* Label for internal jumps. */ | ||
306 | typedef MCode *MCLabel; | ||
307 | |||
308 | /* Return label pointing to current PC. */ | ||
309 | #define emit_label(as) ((as)->mcp) | ||
310 | |||
311 | static void emit_cond_branch(ASMState *as, A64CC cond, MCode *target) | ||
312 | { | ||
313 | MCode *p = --as->mcp; | ||
314 | ptrdiff_t delta = target - p; | ||
315 | lua_assert(A64F_S_OK(delta, 19)); | ||
316 | *p = A64I_BCC | A64F_S19(delta) | cond; | ||
317 | } | ||
318 | |||
319 | static void emit_branch(ASMState *as, A64Ins ai, MCode *target) | ||
320 | { | ||
321 | MCode *p = --as->mcp; | ||
322 | ptrdiff_t delta = target - p; | ||
323 | lua_assert(A64F_S_OK(delta, 26)); | ||
324 | *p = ai | A64F_S26(delta); | ||
325 | } | ||
326 | |||
327 | static void emit_tnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit, MCode *target) | ||
328 | { | ||
329 | MCode *p = --as->mcp; | ||
330 | ptrdiff_t delta = target - p; | ||
331 | lua_assert(bit < 63 && A64F_S_OK(delta, 14)); | ||
332 | if (bit > 31) ai |= A64I_X; | ||
333 | *p = ai | A64F_BIT(bit & 31) | A64F_S14(delta) | r; | ||
334 | } | ||
335 | |||
336 | static void emit_cnb(ASMState *as, A64Ins ai, Reg r, MCode *target) | ||
337 | { | ||
338 | MCode *p = --as->mcp; | ||
339 | ptrdiff_t delta = target - p; | ||
340 | lua_assert(A64F_S_OK(delta, 19)); | ||
341 | *p = ai | A64F_S19(delta) | r; | ||
342 | } | ||
343 | |||
344 | #define emit_jmp(as, target) emit_branch(as, A64I_B, (target)) | ||
345 | |||
346 | static void emit_call(ASMState *as, void *target) | ||
347 | { | ||
348 | MCode *p = --as->mcp; | ||
349 | ptrdiff_t delta = (char *)target - (char *)p; | ||
350 | if (A64F_S_OK(delta>>2, 26)) { | ||
351 | *p = A64I_BL | A64F_S26(delta>>2); | ||
352 | } else { /* Target out of range: need indirect call. But don't use R0-R7. */ | ||
353 | Reg r = ra_allock(as, i64ptr(target), | ||
354 | RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED); | ||
355 | *p = A64I_BLR | A64F_N(r); | ||
356 | } | ||
357 | } | ||
358 | |||
359 | /* -- Emit generic operations --------------------------------------------- */ | ||
360 | |||
361 | /* Generic move between two regs. */ | ||
362 | static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) | ||
363 | { | ||
364 | if (dst >= RID_MAX_GPR) { | ||
365 | emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D : A64I_FMOV_S, | ||
366 | (dst & 31), (src & 31)); | ||
367 | return; | ||
368 | } | ||
369 | if (as->mcp != as->mcloop) { /* Swap early registers for loads/stores. */ | ||
370 | MCode ins = *as->mcp, swp = (src^dst); | ||
371 | if ((ins & 0xbf800000) == 0xb9000000) { | ||
372 | if (!((ins ^ (dst << 5)) & 0x000003e0)) | ||
373 | *as->mcp = ins ^ (swp << 5); /* Swap N in load/store. */ | ||
374 | if (!(ins & 0x00400000) && !((ins ^ dst) & 0x0000001f)) | ||
375 | *as->mcp = ins ^ swp; /* Swap D in store. */ | ||
376 | } | ||
377 | } | ||
378 | emit_dm(as, A64I_MOVx, dst, src); | ||
379 | } | ||
380 | |||
381 | /* Generic load of register with base and (small) offset address. */ | ||
382 | static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) | ||
383 | { | ||
384 | if (r >= RID_MAX_GPR) | ||
385 | emit_lso(as, irt_isnum(ir->t) ? A64I_LDRd : A64I_LDRs, (r & 31), base, ofs); | ||
386 | else | ||
387 | emit_lso(as, irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw, r, base, ofs); | ||
388 | } | ||
389 | |||
390 | /* Generic store of register with base and (small) offset address. */ | ||
391 | static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) | ||
392 | { | ||
393 | if (r >= RID_MAX_GPR) | ||
394 | emit_lso(as, irt_isnum(ir->t) ? A64I_STRd : A64I_STRs, (r & 31), base, ofs); | ||
395 | else | ||
396 | emit_lso(as, irt_is64(ir->t) ? A64I_STRx : A64I_STRw, r, base, ofs); | ||
397 | } | ||
398 | |||
399 | /* Emit an arithmetic operation with a constant operand. */ | ||
400 | static void emit_opk(ASMState *as, A64Ins ai, Reg dest, Reg src, | ||
401 | int32_t i, RegSet allow) | ||
402 | { | ||
403 | uint32_t k = emit_isk12(i); | ||
404 | if (k) | ||
405 | emit_dn(as, ai^k, dest, src); | ||
406 | else | ||
407 | emit_dnm(as, ai, dest, src, ra_allock(as, i, allow)); | ||
408 | } | ||
409 | |||
410 | /* Add offset to pointer. */ | ||
411 | static void emit_addptr(ASMState *as, Reg r, int32_t ofs) | ||
412 | { | ||
413 | if (ofs) | ||
414 | emit_opk(as, ofs < 0 ? A64I_SUBx : A64I_ADDx, r, r, | ||
415 | ofs < 0 ? -ofs : ofs, rset_exclude(RSET_GPR, r)); | ||
416 | } | ||
417 | |||
418 | #define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) | ||
419 | |||
diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h index f3dcd1dd..bdabcf16 100644 --- a/src/lj_emit_mips.h +++ b/src/lj_emit_mips.h | |||
@@ -3,6 +3,30 @@ | |||
3 | ** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h | 3 | ** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h |
4 | */ | 4 | */ |
5 | 5 | ||
6 | #if LJ_64 | ||
7 | static intptr_t get_k64val(IRIns *ir) | ||
8 | { | ||
9 | if (ir->o == IR_KINT64) { | ||
10 | return (intptr_t)ir_kint64(ir)->u64; | ||
11 | } else if (ir->o == IR_KGC) { | ||
12 | return (intptr_t)ir_kgc(ir); | ||
13 | } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { | ||
14 | return (intptr_t)ir_kptr(ir); | ||
15 | } else if (LJ_SOFTFP && ir->o == IR_KNUM) { | ||
16 | return (intptr_t)ir_knum(ir)->u64; | ||
17 | } else { | ||
18 | lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL); | ||
19 | return ir->i; /* Sign-extended. */ | ||
20 | } | ||
21 | } | ||
22 | #endif | ||
23 | |||
24 | #if LJ_64 | ||
25 | #define get_kval(ir) get_k64val(ir) | ||
26 | #else | ||
27 | #define get_kval(ir) ((ir)->i) | ||
28 | #endif | ||
29 | |||
6 | /* -- Emit basic instructions --------------------------------------------- */ | 30 | /* -- Emit basic instructions --------------------------------------------- */ |
7 | 31 | ||
8 | static void emit_dst(ASMState *as, MIPSIns mi, Reg rd, Reg rs, Reg rt) | 32 | static void emit_dst(ASMState *as, MIPSIns mi, Reg rd, Reg rs, Reg rt) |
@@ -35,7 +59,7 @@ static void emit_fgh(ASMState *as, MIPSIns mi, Reg rf, Reg rg, Reg rh) | |||
35 | 59 | ||
36 | static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift) | 60 | static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift) |
37 | { | 61 | { |
38 | if ((as->flags & JIT_F_MIPS32R2)) { | 62 | if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) { |
39 | emit_dta(as, MIPSI_ROTR, dest, src, shift); | 63 | emit_dta(as, MIPSI_ROTR, dest, src, shift); |
40 | } else { | 64 | } else { |
41 | emit_dst(as, MIPSI_OR, dest, dest, tmp); | 65 | emit_dst(as, MIPSI_OR, dest, dest, tmp); |
@@ -44,13 +68,21 @@ static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift) | |||
44 | } | 68 | } |
45 | } | 69 | } |
46 | 70 | ||
71 | #if LJ_64 | ||
72 | static void emit_tsml(ASMState *as, MIPSIns mi, Reg rt, Reg rs, uint32_t msb, | ||
73 | uint32_t lsb) | ||
74 | { | ||
75 | *--as->mcp = mi | MIPSF_T(rt) | MIPSF_S(rs) | MIPSF_M(msb) | MIPSF_L(lsb); | ||
76 | } | ||
77 | #endif | ||
78 | |||
47 | /* -- Emit loads/stores --------------------------------------------------- */ | 79 | /* -- Emit loads/stores --------------------------------------------------- */ |
48 | 80 | ||
49 | /* Prefer rematerialization of BASE/L from global_State over spills. */ | 81 | /* Prefer rematerialization of BASE/L from global_State over spills. */ |
50 | #define emit_canremat(ref) ((ref) <= REF_BASE) | 82 | #define emit_canremat(ref) ((ref) <= REF_BASE) |
51 | 83 | ||
52 | /* Try to find a one step delta relative to another constant. */ | 84 | /* Try to find a one step delta relative to another constant. */ |
53 | static int emit_kdelta1(ASMState *as, Reg t, int32_t i) | 85 | static int emit_kdelta1(ASMState *as, Reg t, intptr_t i) |
54 | { | 86 | { |
55 | RegSet work = ~as->freeset & RSET_GPR; | 87 | RegSet work = ~as->freeset & RSET_GPR; |
56 | while (work) { | 88 | while (work) { |
@@ -58,9 +90,10 @@ static int emit_kdelta1(ASMState *as, Reg t, int32_t i) | |||
58 | IRRef ref = regcost_ref(as->cost[r]); | 90 | IRRef ref = regcost_ref(as->cost[r]); |
59 | lua_assert(r != t); | 91 | lua_assert(r != t); |
60 | if (ref < ASMREF_L) { | 92 | if (ref < ASMREF_L) { |
61 | int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); | 93 | intptr_t delta = (intptr_t)((uintptr_t)i - |
94 | (uintptr_t)(ra_iskref(ref) ? ra_krefk(as, ref) : get_kval(IR(ref)))); | ||
62 | if (checki16(delta)) { | 95 | if (checki16(delta)) { |
63 | emit_tsi(as, MIPSI_ADDIU, t, r, delta); | 96 | emit_tsi(as, MIPSI_AADDIU, t, r, delta); |
64 | return 1; | 97 | return 1; |
65 | } | 98 | } |
66 | } | 99 | } |
@@ -76,8 +109,8 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) | |||
76 | emit_ti(as, MIPSI_LI, r, i); | 109 | emit_ti(as, MIPSI_LI, r, i); |
77 | } else { | 110 | } else { |
78 | if ((i & 0xffff)) { | 111 | if ((i & 0xffff)) { |
79 | int32_t jgl = i32ptr(J2G(as->J)); | 112 | intptr_t jgl = (intptr_t)(void *)J2G(as->J); |
80 | if ((uint32_t)(i-jgl) < 65536) { | 113 | if ((uintptr_t)(i-jgl) < 65536) { |
81 | emit_tsi(as, MIPSI_ADDIU, r, RID_JGL, i-jgl-32768); | 114 | emit_tsi(as, MIPSI_ADDIU, r, RID_JGL, i-jgl-32768); |
82 | return; | 115 | return; |
83 | } else if (emit_kdelta1(as, r, i)) { | 116 | } else if (emit_kdelta1(as, r, i)) { |
@@ -92,16 +125,49 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) | |||
92 | } | 125 | } |
93 | } | 126 | } |
94 | 127 | ||
128 | #if LJ_64 | ||
129 | /* Load a 64 bit constant into a GPR. */ | ||
130 | static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) | ||
131 | { | ||
132 | if (checki32((int64_t)u64)) { | ||
133 | emit_loadi(as, r, (int32_t)u64); | ||
134 | } else { | ||
135 | uint64_t delta = u64 - (uint64_t)(void *)J2G(as->J); | ||
136 | if (delta < 65536) { | ||
137 | emit_tsi(as, MIPSI_DADDIU, r, RID_JGL, (int32_t)(delta-32768)); | ||
138 | } else if (emit_kdelta1(as, r, (intptr_t)u64)) { | ||
139 | return; | ||
140 | } else { | ||
141 | /* TODO MIPSR6: Use DAHI & DATI. Caveat: sign-extension. */ | ||
142 | if ((u64 & 0xffff)) { | ||
143 | emit_tsi(as, MIPSI_ORI, r, r, u64 & 0xffff); | ||
144 | } | ||
145 | if (((u64 >> 16) & 0xffff)) { | ||
146 | emit_dta(as, MIPSI_DSLL, r, r, 16); | ||
147 | emit_tsi(as, MIPSI_ORI, r, r, (u64 >> 16) & 0xffff); | ||
148 | emit_dta(as, MIPSI_DSLL, r, r, 16); | ||
149 | } else { | ||
150 | emit_dta(as, MIPSI_DSLL32, r, r, 0); | ||
151 | } | ||
152 | emit_loadi(as, r, (int32_t)(u64 >> 32)); | ||
153 | } | ||
154 | /* TODO: There are probably more optimization opportunities. */ | ||
155 | } | ||
156 | } | ||
157 | |||
158 | #define emit_loada(as, r, addr) emit_loadu64(as, (r), u64ptr((addr))) | ||
159 | #else | ||
95 | #define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) | 160 | #define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) |
161 | #endif | ||
96 | 162 | ||
97 | static Reg ra_allock(ASMState *as, int32_t k, RegSet allow); | 163 | static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); |
98 | static void ra_allockreg(ASMState *as, int32_t k, Reg r); | 164 | static void ra_allockreg(ASMState *as, intptr_t k, Reg r); |
99 | 165 | ||
100 | /* Get/set from constant pointer. */ | 166 | /* Get/set from constant pointer. */ |
101 | static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow) | 167 | static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow) |
102 | { | 168 | { |
103 | int32_t jgl = i32ptr(J2G(as->J)); | 169 | intptr_t jgl = (intptr_t)(J2G(as->J)); |
104 | int32_t i = i32ptr(p); | 170 | intptr_t i = (intptr_t)(p); |
105 | Reg base; | 171 | Reg base; |
106 | if ((uint32_t)(i-jgl) < 65536) { | 172 | if ((uint32_t)(i-jgl) < 65536) { |
107 | i = i-jgl-32768; | 173 | i = i-jgl-32768; |
@@ -112,8 +178,24 @@ static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow) | |||
112 | emit_tsi(as, mi, r, base, i); | 178 | emit_tsi(as, mi, r, base, i); |
113 | } | 179 | } |
114 | 180 | ||
115 | #define emit_loadn(as, r, tv) \ | 181 | #if LJ_64 |
116 | emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)(tv), RSET_GPR) | 182 | static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) |
183 | { | ||
184 | const uint64_t *k = &ir_k64(ir)->u64; | ||
185 | Reg r64 = r; | ||
186 | if (rset_test(RSET_FPR, r)) { | ||
187 | r64 = RID_TMP; | ||
188 | emit_tg(as, MIPSI_DMTC1, r64, r); | ||
189 | } | ||
190 | if ((uint32_t)((intptr_t)k-(intptr_t)J2G(as->J)) < 65536) | ||
191 | emit_lsptr(as, MIPSI_LD, r64, (void *)k, 0); | ||
192 | else | ||
193 | emit_loadu64(as, r64, *k); | ||
194 | } | ||
195 | #else | ||
196 | #define emit_loadk64(as, r, ir) \ | ||
197 | emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR) | ||
198 | #endif | ||
117 | 199 | ||
118 | /* Get/set global_State fields. */ | 200 | /* Get/set global_State fields. */ |
119 | static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs) | 201 | static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs) |
@@ -122,9 +204,9 @@ static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs) | |||
122 | } | 204 | } |
123 | 205 | ||
124 | #define emit_getgl(as, r, field) \ | 206 | #define emit_getgl(as, r, field) \ |
125 | emit_lsglptr(as, MIPSI_LW, (r), (int32_t)offsetof(global_State, field)) | 207 | emit_lsglptr(as, MIPSI_AL, (r), (int32_t)offsetof(global_State, field)) |
126 | #define emit_setgl(as, r, field) \ | 208 | #define emit_setgl(as, r, field) \ |
127 | emit_lsglptr(as, MIPSI_SW, (r), (int32_t)offsetof(global_State, field)) | 209 | emit_lsglptr(as, MIPSI_AS, (r), (int32_t)offsetof(global_State, field)) |
128 | 210 | ||
129 | /* Trace number is determined from per-trace exit stubs. */ | 211 | /* Trace number is determined from per-trace exit stubs. */ |
130 | #define emit_setvmstate(as, i) UNUSED(i) | 212 | #define emit_setvmstate(as, i) UNUSED(i) |
@@ -152,16 +234,31 @@ static void emit_jmp(ASMState *as, MCode *target) | |||
152 | emit_branch(as, MIPSI_B, RID_ZERO, RID_ZERO, (target)); | 234 | emit_branch(as, MIPSI_B, RID_ZERO, RID_ZERO, (target)); |
153 | } | 235 | } |
154 | 236 | ||
155 | static void emit_call(ASMState *as, void *target) | 237 | static void emit_call(ASMState *as, void *target, int needcfa) |
156 | { | 238 | { |
157 | MCode *p = as->mcp; | 239 | MCode *p = as->mcp; |
158 | *--p = MIPSI_NOP; | 240 | #if LJ_TARGET_MIPSR6 |
159 | if ((((uintptr_t)target ^ (uintptr_t)p) >> 28) == 0) | 241 | ptrdiff_t delta = (char *)target - (char *)p; |
242 | if ((((delta>>2) + 0x02000000) >> 26) == 0) { /* Try compact call first. */ | ||
243 | *--p = MIPSI_BALC | (((uintptr_t)delta >>2) & 0x03ffffffu); | ||
244 | as->mcp = p; | ||
245 | return; | ||
246 | } | ||
247 | #endif | ||
248 | *--p = MIPSI_NOP; /* Delay slot. */ | ||
249 | if ((((uintptr_t)target ^ (uintptr_t)p) >> 28) == 0) { | ||
250 | #if !LJ_TARGET_MIPSR6 | ||
251 | *--p = (((uintptr_t)target & 1) ? MIPSI_JALX : MIPSI_JAL) | | ||
252 | (((uintptr_t)target >>2) & 0x03ffffffu); | ||
253 | #else | ||
160 | *--p = MIPSI_JAL | (((uintptr_t)target >>2) & 0x03ffffffu); | 254 | *--p = MIPSI_JAL | (((uintptr_t)target >>2) & 0x03ffffffu); |
161 | else /* Target out of range: need indirect call. */ | 255 | #endif |
256 | } else { /* Target out of range: need indirect call. */ | ||
162 | *--p = MIPSI_JALR | MIPSF_S(RID_CFUNCADDR); | 257 | *--p = MIPSI_JALR | MIPSF_S(RID_CFUNCADDR); |
258 | needcfa = 1; | ||
259 | } | ||
163 | as->mcp = p; | 260 | as->mcp = p; |
164 | ra_allockreg(as, i32ptr(target), RID_CFUNCADDR); | 261 | if (needcfa) ra_allockreg(as, (intptr_t)target, RID_CFUNCADDR); |
165 | } | 262 | } |
166 | 263 | ||
167 | /* -- Emit generic operations --------------------------------------------- */ | 264 | /* -- Emit generic operations --------------------------------------------- */ |
@@ -178,24 +275,24 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) | |||
178 | emit_fg(as, irt_isnum(ir->t) ? MIPSI_MOV_D : MIPSI_MOV_S, dst, src); | 275 | emit_fg(as, irt_isnum(ir->t) ? MIPSI_MOV_D : MIPSI_MOV_S, dst, src); |
179 | } | 276 | } |
180 | 277 | ||
181 | /* Generic load of register from stack slot. */ | 278 | /* Generic load of register with base and (small) offset address. */ |
182 | static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) | 279 | static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) |
183 | { | 280 | { |
184 | if (r < RID_MAX_GPR) | 281 | if (r < RID_MAX_GPR) |
185 | emit_tsi(as, MIPSI_LW, r, RID_SP, ofs); | 282 | emit_tsi(as, irt_is64(ir->t) ? MIPSI_LD : MIPSI_LW, r, base, ofs); |
186 | else | 283 | else |
187 | emit_tsi(as, irt_isnum(ir->t) ? MIPSI_LDC1 : MIPSI_LWC1, | 284 | emit_tsi(as, irt_isnum(ir->t) ? MIPSI_LDC1 : MIPSI_LWC1, |
188 | (r & 31), RID_SP, ofs); | 285 | (r & 31), base, ofs); |
189 | } | 286 | } |
190 | 287 | ||
191 | /* Generic store of register to stack slot. */ | 288 | /* Generic store of register with base and (small) offset address. */ |
192 | static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) | 289 | static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) |
193 | { | 290 | { |
194 | if (r < RID_MAX_GPR) | 291 | if (r < RID_MAX_GPR) |
195 | emit_tsi(as, MIPSI_SW, r, RID_SP, ofs); | 292 | emit_tsi(as, irt_is64(ir->t) ? MIPSI_SD : MIPSI_SW, r, base, ofs); |
196 | else | 293 | else |
197 | emit_tsi(as, irt_isnum(ir->t) ? MIPSI_SDC1 : MIPSI_SWC1, | 294 | emit_tsi(as, irt_isnum(ir->t) ? MIPSI_SDC1 : MIPSI_SWC1, |
198 | (r&31), RID_SP, ofs); | 295 | (r&31), base, ofs); |
199 | } | 296 | } |
200 | 297 | ||
201 | /* Add offset to pointer. */ | 298 | /* Add offset to pointer. */ |
@@ -203,7 +300,7 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs) | |||
203 | { | 300 | { |
204 | if (ofs) { | 301 | if (ofs) { |
205 | lua_assert(checki16(ofs)); | 302 | lua_assert(checki16(ofs)); |
206 | emit_tsi(as, MIPSI_ADDIU, r, r, ofs); | 303 | emit_tsi(as, MIPSI_AADDIU, r, r, ofs); |
207 | } | 304 | } |
208 | } | 305 | } |
209 | 306 | ||
diff --git a/src/lj_emit_ppc.h b/src/lj_emit_ppc.h index e993d294..69765528 100644 --- a/src/lj_emit_ppc.h +++ b/src/lj_emit_ppc.h | |||
@@ -98,7 +98,7 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) | |||
98 | 98 | ||
99 | #define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) | 99 | #define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) |
100 | 100 | ||
101 | static Reg ra_allock(ASMState *as, int32_t k, RegSet allow); | 101 | static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); |
102 | 102 | ||
103 | /* Get/set from constant pointer. */ | 103 | /* Get/set from constant pointer. */ |
104 | static void emit_lsptr(ASMState *as, PPCIns pi, Reg r, void *p, RegSet allow) | 104 | static void emit_lsptr(ASMState *as, PPCIns pi, Reg r, void *p, RegSet allow) |
@@ -115,8 +115,8 @@ static void emit_lsptr(ASMState *as, PPCIns pi, Reg r, void *p, RegSet allow) | |||
115 | emit_tai(as, pi, r, base, i); | 115 | emit_tai(as, pi, r, base, i); |
116 | } | 116 | } |
117 | 117 | ||
118 | #define emit_loadn(as, r, tv) \ | 118 | #define emit_loadk64(as, r, ir) \ |
119 | emit_lsptr(as, PPCI_LFD, ((r) & 31), (void *)(tv), RSET_GPR) | 119 | emit_lsptr(as, PPCI_LFD, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR) |
120 | 120 | ||
121 | /* Get/set global_State fields. */ | 121 | /* Get/set global_State fields. */ |
122 | static void emit_lsglptr(ASMState *as, PPCIns pi, Reg r, int32_t ofs) | 122 | static void emit_lsglptr(ASMState *as, PPCIns pi, Reg r, int32_t ofs) |
@@ -186,22 +186,22 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) | |||
186 | emit_fb(as, PPCI_FMR, dst, src); | 186 | emit_fb(as, PPCI_FMR, dst, src); |
187 | } | 187 | } |
188 | 188 | ||
189 | /* Generic load of register from stack slot. */ | 189 | /* Generic load of register with base and (small) offset address. */ |
190 | static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) | 190 | static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) |
191 | { | 191 | { |
192 | if (r < RID_MAX_GPR) | 192 | if (r < RID_MAX_GPR) |
193 | emit_tai(as, PPCI_LWZ, r, RID_SP, ofs); | 193 | emit_tai(as, PPCI_LWZ, r, base, ofs); |
194 | else | 194 | else |
195 | emit_fai(as, irt_isnum(ir->t) ? PPCI_LFD : PPCI_LFS, r, RID_SP, ofs); | 195 | emit_fai(as, irt_isnum(ir->t) ? PPCI_LFD : PPCI_LFS, r, base, ofs); |
196 | } | 196 | } |
197 | 197 | ||
198 | /* Generic store of register to stack slot. */ | 198 | /* Generic store of register with base and (small) offset address. */ |
199 | static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) | 199 | static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) |
200 | { | 200 | { |
201 | if (r < RID_MAX_GPR) | 201 | if (r < RID_MAX_GPR) |
202 | emit_tai(as, PPCI_STW, r, RID_SP, ofs); | 202 | emit_tai(as, PPCI_STW, r, base, ofs); |
203 | else | 203 | else |
204 | emit_fai(as, irt_isnum(ir->t) ? PPCI_STFD : PPCI_STFS, r, RID_SP, ofs); | 204 | emit_fai(as, irt_isnum(ir->t) ? PPCI_STFD : PPCI_STFS, r, base, ofs); |
205 | } | 205 | } |
206 | 206 | ||
207 | /* Emit a compare (for equality) with a constant operand. */ | 207 | /* Emit a compare (for equality) with a constant operand. */ |
diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h index 7bbc695b..bc4391a0 100644 --- a/src/lj_emit_x86.h +++ b/src/lj_emit_x86.h | |||
@@ -13,10 +13,17 @@ | |||
13 | if (rex != 0x40) *--(p) = rex; } | 13 | if (rex != 0x40) *--(p) = rex; } |
14 | #define FORCE_REX 0x200 | 14 | #define FORCE_REX 0x200 |
15 | #define REX_64 (FORCE_REX|0x080000) | 15 | #define REX_64 (FORCE_REX|0x080000) |
16 | #define VEX_64 0x800000 | ||
16 | #else | 17 | #else |
17 | #define REXRB(p, rr, rb) ((void)0) | 18 | #define REXRB(p, rr, rb) ((void)0) |
18 | #define FORCE_REX 0 | 19 | #define FORCE_REX 0 |
19 | #define REX_64 0 | 20 | #define REX_64 0 |
21 | #define VEX_64 0 | ||
22 | #endif | ||
23 | #if LJ_GC64 | ||
24 | #define REX_GC64 REX_64 | ||
25 | #else | ||
26 | #define REX_GC64 0 | ||
20 | #endif | 27 | #endif |
21 | 28 | ||
22 | #define emit_i8(as, i) (*--as->mcp = (MCode)(i)) | 29 | #define emit_i8(as, i) (*--as->mcp = (MCode)(i)) |
@@ -31,6 +38,13 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx, | |||
31 | MCode *p, int delta) | 38 | MCode *p, int delta) |
32 | { | 39 | { |
33 | int n = (int8_t)xo; | 40 | int n = (int8_t)xo; |
41 | if (n == -60) { /* VEX-encoded instruction */ | ||
42 | #if LJ_64 | ||
43 | xo ^= (((rr>>1)&4)+((rx>>2)&2)+((rb>>3)&1))<<13; | ||
44 | #endif | ||
45 | *(uint32_t *)(p+delta-5) = (uint32_t)xo; | ||
46 | return p+delta-5; | ||
47 | } | ||
34 | #if defined(__GNUC__) | 48 | #if defined(__GNUC__) |
35 | if (__builtin_constant_p(xo) && n == -2) | 49 | if (__builtin_constant_p(xo) && n == -2) |
36 | p[delta-2] = (MCode)(xo >> 24); | 50 | p[delta-2] = (MCode)(xo >> 24); |
@@ -85,26 +99,17 @@ static int32_t ptr2addr(const void *p) | |||
85 | #define ptr2addr(p) (i32ptr((p))) | 99 | #define ptr2addr(p) (i32ptr((p))) |
86 | #endif | 100 | #endif |
87 | 101 | ||
88 | /* op r, [addr] */ | ||
89 | static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr) | ||
90 | { | ||
91 | MCode *p = as->mcp; | ||
92 | *(int32_t *)(p-4) = ptr2addr(addr); | ||
93 | #if LJ_64 | ||
94 | p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP); | ||
95 | as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5); | ||
96 | #else | ||
97 | as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4); | ||
98 | #endif | ||
99 | } | ||
100 | |||
101 | /* op r, [base+ofs] */ | 102 | /* op r, [base+ofs] */ |
102 | static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs) | 103 | static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs) |
103 | { | 104 | { |
104 | MCode *p = as->mcp; | 105 | MCode *p = as->mcp; |
105 | x86Mode mode; | 106 | x86Mode mode; |
106 | if (ra_hasreg(rb)) { | 107 | if (ra_hasreg(rb)) { |
107 | if (ofs == 0 && (rb&7) != RID_EBP) { | 108 | if (LJ_GC64 && rb == RID_RIP) { |
109 | mode = XM_OFS0; | ||
110 | p -= 4; | ||
111 | *(int32_t *)p = ofs; | ||
112 | } else if (ofs == 0 && (rb&7) != RID_EBP) { | ||
108 | mode = XM_OFS0; | 113 | mode = XM_OFS0; |
109 | } else if (checki8(ofs)) { | 114 | } else if (checki8(ofs)) { |
110 | *--p = (MCode)ofs; | 115 | *--p = (MCode)ofs; |
@@ -202,6 +207,11 @@ static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb) | |||
202 | *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP); | 207 | *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP); |
203 | rb = RID_ESP; | 208 | rb = RID_ESP; |
204 | #endif | 209 | #endif |
210 | } else if (LJ_GC64 && rb == RID_RIP) { | ||
211 | lua_assert(as->mrm.idx == RID_NONE); | ||
212 | mode = XM_OFS0; | ||
213 | p -= 4; | ||
214 | *(int32_t *)p = as->mrm.ofs; | ||
205 | } else { | 215 | } else { |
206 | if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) { | 216 | if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) { |
207 | mode = XM_OFS0; | 217 | mode = XM_OFS0; |
@@ -241,10 +251,6 @@ static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i) | |||
241 | 251 | ||
242 | /* -- Emit loads/stores --------------------------------------------------- */ | 252 | /* -- Emit loads/stores --------------------------------------------------- */ |
243 | 253 | ||
244 | /* Instruction selection for XMM moves. */ | ||
245 | #define XMM_MOVRR(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVSD : XO_MOVAPS) | ||
246 | #define XMM_MOVRM(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVLPD : XO_MOVSD) | ||
247 | |||
248 | /* mov [base+ofs], i */ | 254 | /* mov [base+ofs], i */ |
249 | static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) | 255 | static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) |
250 | { | 256 | { |
@@ -259,8 +265,8 @@ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) | |||
259 | /* Get/set global_State fields. */ | 265 | /* Get/set global_State fields. */ |
260 | #define emit_opgl(as, xo, r, field) \ | 266 | #define emit_opgl(as, xo, r, field) \ |
261 | emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field) | 267 | emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field) |
262 | #define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r), field) | 268 | #define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r)|REX_GC64, field) |
263 | #define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r), field) | 269 | #define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r)|REX_GC64, field) |
264 | 270 | ||
265 | #define emit_setvmstate(as, i) \ | 271 | #define emit_setvmstate(as, i) \ |
266 | (emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate)) | 272 | (emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate)) |
@@ -285,9 +291,21 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) | |||
285 | } | 291 | } |
286 | } | 292 | } |
287 | 293 | ||
294 | #if LJ_GC64 | ||
295 | #define dispofs(as, k) \ | ||
296 | ((intptr_t)((uintptr_t)(k) - (uintptr_t)J2GG(as->J)->dispatch)) | ||
297 | #define mcpofs(as, k) \ | ||
298 | ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mcp)) | ||
299 | #define mctopofs(as, k) \ | ||
300 | ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mctop)) | ||
301 | /* mov r, addr */ | ||
302 | #define emit_loada(as, r, addr) \ | ||
303 | emit_loadu64(as, (r), (uintptr_t)(addr)) | ||
304 | #else | ||
288 | /* mov r, addr */ | 305 | /* mov r, addr */ |
289 | #define emit_loada(as, r, addr) \ | 306 | #define emit_loada(as, r, addr) \ |
290 | emit_loadi(as, (r), ptr2addr((addr))) | 307 | emit_loadi(as, (r), ptr2addr((addr))) |
308 | #endif | ||
291 | 309 | ||
292 | #if LJ_64 | 310 | #if LJ_64 |
293 | /* mov r, imm64 or shorter 32 bit extended load. */ | 311 | /* mov r, imm64 or shorter 32 bit extended load. */ |
@@ -299,6 +317,15 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) | |||
299 | MCode *p = as->mcp; | 317 | MCode *p = as->mcp; |
300 | *(int32_t *)(p-4) = (int32_t)u64; | 318 | *(int32_t *)(p-4) = (int32_t)u64; |
301 | as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4); | 319 | as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4); |
320 | #if LJ_GC64 | ||
321 | } else if (checki32(dispofs(as, u64))) { | ||
322 | emit_rmro(as, XO_LEA, r|REX_64, RID_DISPATCH, (int32_t)dispofs(as, u64)); | ||
323 | } else if (checki32(mcpofs(as, u64)) && checki32(mctopofs(as, u64))) { | ||
324 | /* Since as->realign assumes the code size doesn't change, check | ||
325 | ** RIP-relative addressing reachability for both as->mcp and as->mctop. | ||
326 | */ | ||
327 | emit_rmro(as, XO_LEA, r|REX_64, RID_RIP, (int32_t)mcpofs(as, u64)); | ||
328 | #endif | ||
302 | } else { /* Full-size 64 bit load. */ | 329 | } else { /* Full-size 64 bit load. */ |
303 | MCode *p = as->mcp; | 330 | MCode *p = as->mcp; |
304 | *(uint64_t *)(p-8) = u64; | 331 | *(uint64_t *)(p-8) = u64; |
@@ -310,13 +337,89 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) | |||
310 | } | 337 | } |
311 | #endif | 338 | #endif |
312 | 339 | ||
313 | /* movsd r, [&tv->n] / xorps r, r */ | 340 | /* op r, [addr] */ |
314 | static void emit_loadn(ASMState *as, Reg r, cTValue *tv) | 341 | static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr) |
315 | { | 342 | { |
316 | if (tvispzero(tv)) /* Use xor only for +0. */ | 343 | #if LJ_GC64 |
317 | emit_rr(as, XO_XORPS, r, r); | 344 | if (checki32(dispofs(as, addr))) { |
318 | else | 345 | emit_rmro(as, xo, rr, RID_DISPATCH, (int32_t)dispofs(as, addr)); |
319 | emit_rma(as, XMM_MOVRM(as), r, &tv->n); | 346 | } else if (checki32(mcpofs(as, addr)) && checki32(mctopofs(as, addr))) { |
347 | emit_rmro(as, xo, rr, RID_RIP, (int32_t)mcpofs(as, addr)); | ||
348 | } else if (!checki32((intptr_t)addr)) { | ||
349 | Reg ra = (rr & 15); | ||
350 | if (xo != XO_MOV) { | ||
351 | /* We can't allocate a register here. Use and restore DISPATCH. Ugly. */ | ||
352 | uint64_t dispaddr = (uintptr_t)J2GG(as->J)->dispatch; | ||
353 | uint8_t i8 = xo == XO_GROUP3b ? *as->mcp++ : 0; | ||
354 | ra = RID_DISPATCH; | ||
355 | if (checku32(dispaddr)) { | ||
356 | emit_loadi(as, ra, (int32_t)dispaddr); | ||
357 | } else { /* Full-size 64 bit load. */ | ||
358 | MCode *p = as->mcp; | ||
359 | *(uint64_t *)(p-8) = dispaddr; | ||
360 | p[-9] = (MCode)(XI_MOVri+(ra&7)); | ||
361 | p[-10] = 0x48 + ((ra>>3)&1); | ||
362 | p -= 10; | ||
363 | as->mcp = p; | ||
364 | } | ||
365 | if (xo == XO_GROUP3b) emit_i8(as, i8); | ||
366 | } | ||
367 | emit_rmro(as, xo, rr, ra, 0); | ||
368 | emit_loadu64(as, ra, (uintptr_t)addr); | ||
369 | } else | ||
370 | #endif | ||
371 | { | ||
372 | MCode *p = as->mcp; | ||
373 | *(int32_t *)(p-4) = ptr2addr(addr); | ||
374 | #if LJ_64 | ||
375 | p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP); | ||
376 | as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5); | ||
377 | #else | ||
378 | as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4); | ||
379 | #endif | ||
380 | } | ||
381 | } | ||
382 | |||
383 | /* Load 64 bit IR constant into register. */ | ||
384 | static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) | ||
385 | { | ||
386 | Reg r64; | ||
387 | x86Op xo; | ||
388 | const uint64_t *k = &ir_k64(ir)->u64; | ||
389 | if (rset_test(RSET_FPR, r)) { | ||
390 | r64 = r; | ||
391 | xo = XO_MOVSD; | ||
392 | } else { | ||
393 | r64 = r | REX_64; | ||
394 | xo = XO_MOV; | ||
395 | } | ||
396 | if (*k == 0) { | ||
397 | emit_rr(as, rset_test(RSET_FPR, r) ? XO_XORPS : XO_ARITH(XOg_XOR), r, r); | ||
398 | #if LJ_GC64 | ||
399 | } else if (checki32((intptr_t)k) || checki32(dispofs(as, k)) || | ||
400 | (checki32(mcpofs(as, k)) && checki32(mctopofs(as, k)))) { | ||
401 | emit_rma(as, xo, r64, k); | ||
402 | } else { | ||
403 | if (ir->i) { | ||
404 | lua_assert(*k == *(uint64_t*)(as->mctop - ir->i)); | ||
405 | } else if (as->curins <= as->stopins && rset_test(RSET_GPR, r)) { | ||
406 | emit_loadu64(as, r, *k); | ||
407 | return; | ||
408 | } else { | ||
409 | /* If all else fails, add the FP constant at the MCode area bottom. */ | ||
410 | while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3; | ||
411 | *(uint64_t *)as->mcbot = *k; | ||
412 | ir->i = (int32_t)(as->mctop - as->mcbot); | ||
413 | as->mcbot += 8; | ||
414 | as->mclim = as->mcbot + MCLIM_REDZONE; | ||
415 | lj_mcode_commitbot(as->J, as->mcbot); | ||
416 | } | ||
417 | emit_rmro(as, xo, r64, RID_RIP, (int32_t)mcpofs(as, as->mctop - ir->i)); | ||
418 | #else | ||
419 | } else { | ||
420 | emit_rma(as, xo, r64, k); | ||
421 | #endif | ||
422 | } | ||
320 | } | 423 | } |
321 | 424 | ||
322 | /* -- Emit control-flow instructions -------------------------------------- */ | 425 | /* -- Emit control-flow instructions -------------------------------------- */ |
@@ -418,8 +521,10 @@ static void emit_call_(ASMState *as, MCode *target) | |||
418 | /* Use 64 bit operations to handle 64 bit IR types. */ | 521 | /* Use 64 bit operations to handle 64 bit IR types. */ |
419 | #if LJ_64 | 522 | #if LJ_64 |
420 | #define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0)) | 523 | #define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0)) |
524 | #define VEX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? VEX_64 : 0)) | ||
421 | #else | 525 | #else |
422 | #define REX_64IR(ir, r) (r) | 526 | #define REX_64IR(ir, r) (r) |
527 | #define VEX_64IR(ir, r) (r) | ||
423 | #endif | 528 | #endif |
424 | 529 | ||
425 | /* Generic move between two regs. */ | 530 | /* Generic move between two regs. */ |
@@ -429,25 +534,25 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) | |||
429 | if (dst < RID_MAX_GPR) | 534 | if (dst < RID_MAX_GPR) |
430 | emit_rr(as, XO_MOV, REX_64IR(ir, dst), src); | 535 | emit_rr(as, XO_MOV, REX_64IR(ir, dst), src); |
431 | else | 536 | else |
432 | emit_rr(as, XMM_MOVRR(as), dst, src); | 537 | emit_rr(as, XO_MOVAPS, dst, src); |
433 | } | 538 | } |
434 | 539 | ||
435 | /* Generic load of register from stack slot. */ | 540 | /* Generic load of register with base and (small) offset address. */ |
436 | static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) | 541 | static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) |
437 | { | 542 | { |
438 | if (r < RID_MAX_GPR) | 543 | if (r < RID_MAX_GPR) |
439 | emit_rmro(as, XO_MOV, REX_64IR(ir, r), RID_ESP, ofs); | 544 | emit_rmro(as, XO_MOV, REX_64IR(ir, r), base, ofs); |
440 | else | 545 | else |
441 | emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, r, RID_ESP, ofs); | 546 | emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, r, base, ofs); |
442 | } | 547 | } |
443 | 548 | ||
444 | /* Generic store of register to stack slot. */ | 549 | /* Generic store of register with base and (small) offset address. */ |
445 | static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) | 550 | static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) |
446 | { | 551 | { |
447 | if (r < RID_MAX_GPR) | 552 | if (r < RID_MAX_GPR) |
448 | emit_rmro(as, XO_MOVto, REX_64IR(ir, r), RID_ESP, ofs); | 553 | emit_rmro(as, XO_MOVto, REX_64IR(ir, r), base, ofs); |
449 | else | 554 | else |
450 | emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, RID_ESP, ofs); | 555 | emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, base, ofs); |
451 | } | 556 | } |
452 | 557 | ||
453 | /* Add offset to pointer. */ | 558 | /* Add offset to pointer. */ |
@@ -455,9 +560,9 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs) | |||
455 | { | 560 | { |
456 | if (ofs) { | 561 | if (ofs) { |
457 | if ((as->flags & JIT_F_LEA_AGU)) | 562 | if ((as->flags & JIT_F_LEA_AGU)) |
458 | emit_rmro(as, XO_LEA, r, r, ofs); | 563 | emit_rmro(as, XO_LEA, r|REX_GC64, r, ofs); |
459 | else | 564 | else |
460 | emit_gri(as, XG_ARITHi(XOg_ADD), r, ofs); | 565 | emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs); |
461 | } | 566 | } |
462 | } | 567 | } |
463 | 568 | ||
diff --git a/src/lj_err.c b/src/lj_err.c index caa7487f..52498932 100644 --- a/src/lj_err.c +++ b/src/lj_err.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include "lj_ff.h" | 16 | #include "lj_ff.h" |
17 | #include "lj_trace.h" | 17 | #include "lj_trace.h" |
18 | #include "lj_vm.h" | 18 | #include "lj_vm.h" |
19 | #include "lj_strfmt.h" | ||
19 | 20 | ||
20 | /* | 21 | /* |
21 | ** LuaJIT can either use internal or external frame unwinding: | 22 | ** LuaJIT can either use internal or external frame unwinding: |
@@ -45,7 +46,8 @@ | |||
45 | ** the wrapper function feature. Lua errors thrown through C++ frames | 46 | ** the wrapper function feature. Lua errors thrown through C++ frames |
46 | ** cannot be caught by C++ code and C++ destructors are not run. | 47 | ** cannot be caught by C++ code and C++ destructors are not run. |
47 | ** | 48 | ** |
48 | ** EXT is the default on x64 systems, INT is the default on all other systems. | 49 | ** EXT is the default on x64 systems and on Windows, INT is the default on all |
50 | ** other systems. | ||
49 | ** | 51 | ** |
50 | ** EXT can be manually enabled on POSIX systems using GCC and DWARF2 stack | 52 | ** EXT can be manually enabled on POSIX systems using GCC and DWARF2 stack |
51 | ** unwinding with -DLUAJIT_UNWIND_EXTERNAL. *All* C code must be compiled | 53 | ** unwinding with -DLUAJIT_UNWIND_EXTERNAL. *All* C code must be compiled |
@@ -54,7 +56,6 @@ | |||
54 | ** and all C libraries that have callbacks which may be used to call back | 56 | ** and all C libraries that have callbacks which may be used to call back |
55 | ** into Lua. C++ code must *not* be compiled with -fno-exceptions. | 57 | ** into Lua. C++ code must *not* be compiled with -fno-exceptions. |
56 | ** | 58 | ** |
57 | ** EXT cannot be enabled on WIN32 since system exceptions use code-driven SEH. | ||
58 | ** EXT is mandatory on WIN64 since the calling convention has an abundance | 59 | ** EXT is mandatory on WIN64 since the calling convention has an abundance |
59 | ** of callee-saved registers (rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15). | 60 | ** of callee-saved registers (rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15). |
60 | ** The POSIX/x64 interpreter only saves r12/r13 for INT (e.g. PS4). | 61 | ** The POSIX/x64 interpreter only saves r12/r13 for INT (e.g. PS4). |
@@ -62,7 +63,7 @@ | |||
62 | 63 | ||
63 | #if defined(__GNUC__) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND | 64 | #if defined(__GNUC__) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND |
64 | #define LJ_UNWIND_EXT 1 | 65 | #define LJ_UNWIND_EXT 1 |
65 | #elif LJ_TARGET_X64 && LJ_TARGET_WINDOWS | 66 | #elif LJ_TARGET_WINDOWS |
66 | #define LJ_UNWIND_EXT 1 | 67 | #define LJ_UNWIND_EXT 1 |
67 | #endif | 68 | #endif |
68 | 69 | ||
@@ -98,14 +99,14 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) | |||
98 | TValue *top = restorestack(L, -nres); | 99 | TValue *top = restorestack(L, -nres); |
99 | if (frame < top) { /* Frame reached? */ | 100 | if (frame < top) { /* Frame reached? */ |
100 | if (errcode) { | 101 | if (errcode) { |
101 | L->cframe = cframe_prev(cf); | ||
102 | L->base = frame+1; | 102 | L->base = frame+1; |
103 | L->cframe = cframe_prev(cf); | ||
103 | unwindstack(L, top); | 104 | unwindstack(L, top); |
104 | } | 105 | } |
105 | return cf; | 106 | return cf; |
106 | } | 107 | } |
107 | } | 108 | } |
108 | if (frame <= tvref(L->stack)) | 109 | if (frame <= tvref(L->stack)+LJ_FR2) |
109 | break; | 110 | break; |
110 | switch (frame_typep(frame)) { | 111 | switch (frame_typep(frame)) { |
111 | case FRAME_LUA: /* Lua frame. */ | 112 | case FRAME_LUA: /* Lua frame. */ |
@@ -113,14 +114,12 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) | |||
113 | frame = frame_prevl(frame); | 114 | frame = frame_prevl(frame); |
114 | break; | 115 | break; |
115 | case FRAME_C: /* C frame. */ | 116 | case FRAME_C: /* C frame. */ |
116 | #if LJ_HASFFI | ||
117 | unwind_c: | 117 | unwind_c: |
118 | #endif | ||
119 | #if LJ_UNWIND_EXT | 118 | #if LJ_UNWIND_EXT |
120 | if (errcode) { | 119 | if (errcode) { |
121 | L->cframe = cframe_prev(cf); | ||
122 | L->base = frame_prevd(frame) + 1; | 120 | L->base = frame_prevd(frame) + 1; |
123 | unwindstack(L, frame); | 121 | L->cframe = cframe_prev(cf); |
122 | unwindstack(L, frame - LJ_FR2); | ||
124 | } else if (cf != stopcf) { | 123 | } else if (cf != stopcf) { |
125 | cf = cframe_prev(cf); | 124 | cf = cframe_prev(cf); |
126 | frame = frame_prevd(frame); | 125 | frame = frame_prevd(frame); |
@@ -143,16 +142,14 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) | |||
143 | return cf; | 142 | return cf; |
144 | } | 143 | } |
145 | if (errcode) { | 144 | if (errcode) { |
146 | L->cframe = cframe_prev(cf); | ||
147 | L->base = frame_prevd(frame) + 1; | 145 | L->base = frame_prevd(frame) + 1; |
148 | unwindstack(L, frame); | 146 | L->cframe = cframe_prev(cf); |
147 | unwindstack(L, frame - LJ_FR2); | ||
149 | } | 148 | } |
150 | return cf; | 149 | return cf; |
151 | case FRAME_CONT: /* Continuation frame. */ | 150 | case FRAME_CONT: /* Continuation frame. */ |
152 | #if LJ_HASFFI | 151 | if (frame_iscont_fficb(frame)) |
153 | if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK) | ||
154 | goto unwind_c; | 152 | goto unwind_c; |
155 | #endif | ||
156 | /* fallthrough */ | 153 | /* fallthrough */ |
157 | case FRAME_VARG: /* Vararg frame. */ | 154 | case FRAME_VARG: /* Vararg frame. */ |
158 | frame = frame_prevd(frame); | 155 | frame = frame_prevd(frame); |
@@ -166,8 +163,8 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) | |||
166 | } | 163 | } |
167 | if (frame_typep(frame) == FRAME_PCALL) | 164 | if (frame_typep(frame) == FRAME_PCALL) |
168 | hook_leave(G(L)); | 165 | hook_leave(G(L)); |
169 | L->cframe = cf; | ||
170 | L->base = frame_prevd(frame) + 1; | 166 | L->base = frame_prevd(frame) + 1; |
167 | L->cframe = cf; | ||
171 | unwindstack(L, L->base); | 168 | unwindstack(L, L->base); |
172 | } | 169 | } |
173 | return (void *)((intptr_t)cf | CFRAME_UNWIND_FF); | 170 | return (void *)((intptr_t)cf | CFRAME_UNWIND_FF); |
@@ -175,8 +172,8 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) | |||
175 | } | 172 | } |
176 | /* No C frame. */ | 173 | /* No C frame. */ |
177 | if (errcode) { | 174 | if (errcode) { |
175 | L->base = tvref(L->stack)+1+LJ_FR2; | ||
178 | L->cframe = NULL; | 176 | L->cframe = NULL; |
179 | L->base = tvref(L->stack)+1; | ||
180 | unwindstack(L, L->base); | 177 | unwindstack(L, L->base); |
181 | if (G(L)->panic) | 178 | if (G(L)->panic) |
182 | G(L)->panic(L); | 179 | G(L)->panic(L); |
@@ -194,13 +191,6 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) | |||
194 | ** since various OS, distros and compilers mess up the header installation. | 191 | ** since various OS, distros and compilers mess up the header installation. |
195 | */ | 192 | */ |
196 | 193 | ||
197 | typedef struct _Unwind_Exception | ||
198 | { | ||
199 | uint64_t exclass; | ||
200 | void (*excleanup)(int, struct _Unwind_Exception *); | ||
201 | uintptr_t p1, p2; | ||
202 | } __attribute__((__aligned__)) _Unwind_Exception; | ||
203 | |||
204 | typedef struct _Unwind_Context _Unwind_Context; | 194 | typedef struct _Unwind_Context _Unwind_Context; |
205 | 195 | ||
206 | #define _URC_OK 0 | 196 | #define _URC_OK 0 |
@@ -210,8 +200,20 @@ typedef struct _Unwind_Context _Unwind_Context; | |||
210 | #define _URC_CONTINUE_UNWIND 8 | 200 | #define _URC_CONTINUE_UNWIND 8 |
211 | #define _URC_FAILURE 9 | 201 | #define _URC_FAILURE 9 |
212 | 202 | ||
203 | #define LJ_UEXCLASS 0x4c55414a49543200ULL /* LUAJIT2\0 */ | ||
204 | #define LJ_UEXCLASS_MAKE(c) (LJ_UEXCLASS | (uint64_t)(c)) | ||
205 | #define LJ_UEXCLASS_CHECK(cl) (((cl) ^ LJ_UEXCLASS) <= 0xff) | ||
206 | #define LJ_UEXCLASS_ERRCODE(cl) ((int)((cl) & 0xff)) | ||
207 | |||
213 | #if !LJ_TARGET_ARM | 208 | #if !LJ_TARGET_ARM |
214 | 209 | ||
210 | typedef struct _Unwind_Exception | ||
211 | { | ||
212 | uint64_t exclass; | ||
213 | void (*excleanup)(int, struct _Unwind_Exception *); | ||
214 | uintptr_t p1, p2; | ||
215 | } __attribute__((__aligned__)) _Unwind_Exception; | ||
216 | |||
215 | extern uintptr_t _Unwind_GetCFA(_Unwind_Context *); | 217 | extern uintptr_t _Unwind_GetCFA(_Unwind_Context *); |
216 | extern void _Unwind_SetGR(_Unwind_Context *, int, uintptr_t); | 218 | extern void _Unwind_SetGR(_Unwind_Context *, int, uintptr_t); |
217 | extern void _Unwind_SetIP(_Unwind_Context *, uintptr_t); | 219 | extern void _Unwind_SetIP(_Unwind_Context *, uintptr_t); |
@@ -223,11 +225,6 @@ extern int _Unwind_RaiseException(_Unwind_Exception *); | |||
223 | #define _UA_HANDLER_FRAME 4 | 225 | #define _UA_HANDLER_FRAME 4 |
224 | #define _UA_FORCE_UNWIND 8 | 226 | #define _UA_FORCE_UNWIND 8 |
225 | 227 | ||
226 | #define LJ_UEXCLASS 0x4c55414a49543200ULL /* LUAJIT2\0 */ | ||
227 | #define LJ_UEXCLASS_MAKE(c) (LJ_UEXCLASS | (uint64_t)(c)) | ||
228 | #define LJ_UEXCLASS_CHECK(cl) (((cl) ^ LJ_UEXCLASS) <= 0xff) | ||
229 | #define LJ_UEXCLASS_ERRCODE(cl) ((int)((cl) & 0xff)) | ||
230 | |||
231 | /* DWARF2 personality handler referenced from interpreter .eh_frame. */ | 228 | /* DWARF2 personality handler referenced from interpreter .eh_frame. */ |
232 | LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions, | 229 | LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions, |
233 | uint64_t uexclass, _Unwind_Exception *uex, _Unwind_Context *ctx) | 230 | uint64_t uexclass, _Unwind_Exception *uex, _Unwind_Context *ctx) |
@@ -301,10 +298,22 @@ static void err_raise_ext(int errcode) | |||
301 | } | 298 | } |
302 | #endif | 299 | #endif |
303 | 300 | ||
304 | #else | 301 | #else /* LJ_TARGET_ARM */ |
305 | 302 | ||
306 | extern void _Unwind_DeleteException(void *); | 303 | #define _US_VIRTUAL_UNWIND_FRAME 0 |
307 | extern int __gnu_unwind_frame (void *, _Unwind_Context *); | 304 | #define _US_UNWIND_FRAME_STARTING 1 |
305 | #define _US_ACTION_MASK 3 | ||
306 | #define _US_FORCE_UNWIND 8 | ||
307 | |||
308 | typedef struct _Unwind_Control_Block _Unwind_Control_Block; | ||
309 | |||
310 | struct _Unwind_Control_Block { | ||
311 | uint64_t exclass; | ||
312 | uint32_t misc[20]; | ||
313 | }; | ||
314 | |||
315 | extern int _Unwind_RaiseException(_Unwind_Control_Block *); | ||
316 | extern int __gnu_unwind_frame(_Unwind_Control_Block *, _Unwind_Context *); | ||
308 | extern int _Unwind_VRS_Set(_Unwind_Context *, int, uint32_t, int, void *); | 317 | extern int _Unwind_VRS_Set(_Unwind_Context *, int, uint32_t, int, void *); |
309 | extern int _Unwind_VRS_Get(_Unwind_Context *, int, uint32_t, int, void *); | 318 | extern int _Unwind_VRS_Get(_Unwind_Context *, int, uint32_t, int, void *); |
310 | 319 | ||
@@ -320,35 +329,58 @@ static inline void _Unwind_SetGR(_Unwind_Context *ctx, int r, uint32_t v) | |||
320 | _Unwind_VRS_Set(ctx, 0, r, 0, &v); | 329 | _Unwind_VRS_Set(ctx, 0, r, 0, &v); |
321 | } | 330 | } |
322 | 331 | ||
323 | #define _US_VIRTUAL_UNWIND_FRAME 0 | 332 | extern void lj_vm_unwind_ext(void); |
324 | #define _US_UNWIND_FRAME_STARTING 1 | ||
325 | #define _US_ACTION_MASK 3 | ||
326 | #define _US_FORCE_UNWIND 8 | ||
327 | 333 | ||
328 | /* ARM unwinder personality handler referenced from interpreter .ARM.extab. */ | 334 | /* ARM unwinder personality handler referenced from interpreter .ARM.extab. */ |
329 | LJ_FUNCA int lj_err_unwind_arm(int state, void *ucb, _Unwind_Context *ctx) | 335 | LJ_FUNCA int lj_err_unwind_arm(int state, _Unwind_Control_Block *ucb, |
336 | _Unwind_Context *ctx) | ||
330 | { | 337 | { |
331 | void *cf = (void *)_Unwind_GetGR(ctx, 13); | 338 | void *cf = (void *)_Unwind_GetGR(ctx, 13); |
332 | lua_State *L = cframe_L(cf); | 339 | lua_State *L = cframe_L(cf); |
333 | if ((state & _US_ACTION_MASK) == _US_VIRTUAL_UNWIND_FRAME) { | 340 | int errcode; |
334 | setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); | 341 | |
342 | switch ((state & _US_ACTION_MASK)) { | ||
343 | case _US_VIRTUAL_UNWIND_FRAME: | ||
344 | if ((state & _US_FORCE_UNWIND)) break; | ||
335 | return _URC_HANDLER_FOUND; | 345 | return _URC_HANDLER_FOUND; |
336 | } | 346 | case _US_UNWIND_FRAME_STARTING: |
337 | if ((state&(_US_ACTION_MASK|_US_FORCE_UNWIND)) == _US_UNWIND_FRAME_STARTING) { | 347 | if (LJ_UEXCLASS_CHECK(ucb->exclass)) { |
338 | _Unwind_DeleteException(ucb); | 348 | errcode = LJ_UEXCLASS_ERRCODE(ucb->exclass); |
339 | _Unwind_SetGR(ctx, 15, (uint32_t)(void *)lj_err_throw); | 349 | } else { |
340 | _Unwind_SetGR(ctx, 0, (uint32_t)L); | 350 | errcode = LUA_ERRRUN; |
341 | _Unwind_SetGR(ctx, 1, (uint32_t)LUA_ERRRUN); | 351 | setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); |
352 | } | ||
353 | cf = err_unwind(L, cf, errcode); | ||
354 | if ((state & _US_FORCE_UNWIND) || cf == NULL) break; | ||
355 | _Unwind_SetGR(ctx, 15, (uint32_t)lj_vm_unwind_ext); | ||
356 | _Unwind_SetGR(ctx, 0, (uint32_t)ucb); | ||
357 | _Unwind_SetGR(ctx, 1, (uint32_t)errcode); | ||
358 | _Unwind_SetGR(ctx, 2, cframe_unwind_ff(cf) ? | ||
359 | (uint32_t)lj_vm_unwind_ff_eh : | ||
360 | (uint32_t)lj_vm_unwind_c_eh); | ||
342 | return _URC_INSTALL_CONTEXT; | 361 | return _URC_INSTALL_CONTEXT; |
362 | default: | ||
363 | return _URC_FAILURE; | ||
343 | } | 364 | } |
344 | if (__gnu_unwind_frame(ucb, ctx) != _URC_OK) | 365 | if (__gnu_unwind_frame(ucb, ctx) != _URC_OK) |
345 | return _URC_FAILURE; | 366 | return _URC_FAILURE; |
346 | return _URC_CONTINUE_UNWIND; | 367 | return _URC_CONTINUE_UNWIND; |
347 | } | 368 | } |
348 | 369 | ||
370 | #if LJ_UNWIND_EXT | ||
371 | static __thread _Unwind_Control_Block static_uex; | ||
372 | |||
373 | static void err_raise_ext(int errcode) | ||
374 | { | ||
375 | memset(&static_uex, 0, sizeof(static_uex)); | ||
376 | static_uex.exclass = LJ_UEXCLASS_MAKE(errcode); | ||
377 | _Unwind_RaiseException(&static_uex); | ||
378 | } | ||
349 | #endif | 379 | #endif |
350 | 380 | ||
351 | #elif LJ_TARGET_X64 && LJ_ABI_WIN | 381 | #endif /* LJ_TARGET_ARM */ |
382 | |||
383 | #elif LJ_ABI_WIN | ||
352 | 384 | ||
353 | /* | 385 | /* |
354 | ** Someone in Redmond owes me several days of my life. A lot of this is | 386 | ** Someone in Redmond owes me several days of my life. A lot of this is |
@@ -366,6 +398,7 @@ LJ_FUNCA int lj_err_unwind_arm(int state, void *ucb, _Unwind_Context *ctx) | |||
366 | #define WIN32_LEAN_AND_MEAN | 398 | #define WIN32_LEAN_AND_MEAN |
367 | #include <windows.h> | 399 | #include <windows.h> |
368 | 400 | ||
401 | #if LJ_TARGET_X64 | ||
369 | /* Taken from: http://www.nynaeve.net/?p=99 */ | 402 | /* Taken from: http://www.nynaeve.net/?p=99 */ |
370 | typedef struct UndocumentedDispatcherContext { | 403 | typedef struct UndocumentedDispatcherContext { |
371 | ULONG64 ControlPc; | 404 | ULONG64 ControlPc; |
@@ -380,11 +413,14 @@ typedef struct UndocumentedDispatcherContext { | |||
380 | ULONG ScopeIndex; | 413 | ULONG ScopeIndex; |
381 | ULONG Fill0; | 414 | ULONG Fill0; |
382 | } UndocumentedDispatcherContext; | 415 | } UndocumentedDispatcherContext; |
416 | #else | ||
417 | typedef void *UndocumentedDispatcherContext; | ||
418 | #endif | ||
383 | 419 | ||
384 | /* Another wild guess. */ | 420 | /* Another wild guess. */ |
385 | extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow); | 421 | extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow); |
386 | 422 | ||
387 | #ifdef MINGW_SDK_INIT | 423 | #if LJ_TARGET_X64 && defined(MINGW_SDK_INIT) |
388 | /* Workaround for broken MinGW64 declaration. */ | 424 | /* Workaround for broken MinGW64 declaration. */ |
389 | VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx"); | 425 | VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx"); |
390 | #define RtlUnwindEx RtlUnwindEx_FIXED | 426 | #define RtlUnwindEx RtlUnwindEx_FIXED |
@@ -398,10 +434,15 @@ VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx"); | |||
398 | #define LJ_EXCODE_CHECK(cl) (((cl) ^ LJ_EXCODE) <= 0xff) | 434 | #define LJ_EXCODE_CHECK(cl) (((cl) ^ LJ_EXCODE) <= 0xff) |
399 | #define LJ_EXCODE_ERRCODE(cl) ((int)((cl) & 0xff)) | 435 | #define LJ_EXCODE_ERRCODE(cl) ((int)((cl) & 0xff)) |
400 | 436 | ||
401 | /* Win64 exception handler for interpreter frame. */ | 437 | /* Windows exception handler for interpreter frame. */ |
402 | LJ_FUNCA EXCEPTION_DISPOSITION lj_err_unwind_win64(EXCEPTION_RECORD *rec, | 438 | LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec, |
403 | void *cf, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch) | 439 | void *f, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch) |
404 | { | 440 | { |
441 | #if LJ_TARGET_X64 | ||
442 | void *cf = f; | ||
443 | #else | ||
444 | void *cf = (char *)f - CFRAME_OFS_SEH; | ||
445 | #endif | ||
405 | lua_State *L = cframe_L(cf); | 446 | lua_State *L = cframe_L(cf); |
406 | int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ? | 447 | int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ? |
407 | LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN; | 448 | LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN; |
@@ -419,8 +460,9 @@ LJ_FUNCA EXCEPTION_DISPOSITION lj_err_unwind_win64(EXCEPTION_RECORD *rec, | |||
419 | setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); | 460 | setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); |
420 | } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) { | 461 | } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) { |
421 | /* Don't catch access violations etc. */ | 462 | /* Don't catch access violations etc. */ |
422 | return ExceptionContinueSearch; | 463 | return 1; /* ExceptionContinueSearch */ |
423 | } | 464 | } |
465 | #if LJ_TARGET_X64 | ||
424 | /* Unwind the stack and call all handlers for all lower C frames | 466 | /* Unwind the stack and call all handlers for all lower C frames |
425 | ** (including ourselves) again with EH_UNWINDING set. Then set | 467 | ** (including ourselves) again with EH_UNWINDING set. Then set |
426 | ** rsp = cf, rax = errcode and jump to the specified target. | 468 | ** rsp = cf, rax = errcode and jump to the specified target. |
@@ -430,9 +472,21 @@ LJ_FUNCA EXCEPTION_DISPOSITION lj_err_unwind_win64(EXCEPTION_RECORD *rec, | |||
430 | lj_vm_unwind_c_eh), | 472 | lj_vm_unwind_c_eh), |
431 | rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable); | 473 | rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable); |
432 | /* RtlUnwindEx should never return. */ | 474 | /* RtlUnwindEx should never return. */ |
475 | #else | ||
476 | UNUSED(ctx); | ||
477 | UNUSED(dispatch); | ||
478 | /* Call all handlers for all lower C frames (including ourselves) again | ||
479 | ** with EH_UNWINDING set. Then call the specified function, passing cf | ||
480 | ** and errcode. | ||
481 | */ | ||
482 | lj_vm_rtlunwind(cf, (void *)rec, | ||
483 | (cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ? | ||
484 | (void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode); | ||
485 | /* lj_vm_rtlunwind does not return. */ | ||
486 | #endif | ||
433 | } | 487 | } |
434 | } | 488 | } |
435 | return ExceptionContinueSearch; | 489 | return 1; /* ExceptionContinueSearch */ |
436 | } | 490 | } |
437 | 491 | ||
438 | /* Raise Windows exception. */ | 492 | /* Raise Windows exception. */ |
@@ -450,8 +504,8 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode) | |||
450 | { | 504 | { |
451 | global_State *g = G(L); | 505 | global_State *g = G(L); |
452 | lj_trace_abort(g); | 506 | lj_trace_abort(g); |
453 | setgcrefnull(g->jit_L); | 507 | setmref(g->jit_base, NULL); |
454 | L->status = 0; | 508 | L->status = LUA_OK; |
455 | #if LJ_UNWIND_EXT | 509 | #if LJ_UNWIND_EXT |
456 | err_raise_ext(errcode); | 510 | err_raise_ext(errcode); |
457 | /* | 511 | /* |
@@ -495,7 +549,7 @@ LJ_NOINLINE void lj_err_mem(lua_State *L) | |||
495 | /* Find error function for runtime errors. Requires an extra stack traversal. */ | 549 | /* Find error function for runtime errors. Requires an extra stack traversal. */ |
496 | static ptrdiff_t finderrfunc(lua_State *L) | 550 | static ptrdiff_t finderrfunc(lua_State *L) |
497 | { | 551 | { |
498 | cTValue *frame = L->base-1, *bot = tvref(L->stack); | 552 | cTValue *frame = L->base-1, *bot = tvref(L->stack)+LJ_FR2; |
499 | void *cf = L->cframe; | 553 | void *cf = L->cframe; |
500 | while (frame > bot && cf) { | 554 | while (frame > bot && cf) { |
501 | while (cframe_nres(cframe_raw(cf)) < 0) { /* cframe without frame? */ | 555 | while (cframe_nres(cframe_raw(cf)) < 0) { /* cframe without frame? */ |
@@ -519,10 +573,8 @@ static ptrdiff_t finderrfunc(lua_State *L) | |||
519 | frame = frame_prevd(frame); | 573 | frame = frame_prevd(frame); |
520 | break; | 574 | break; |
521 | case FRAME_CONT: | 575 | case FRAME_CONT: |
522 | #if LJ_HASFFI | 576 | if (frame_iscont_fficb(frame)) |
523 | if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK) | ||
524 | cf = cframe_prev(cf); | 577 | cf = cframe_prev(cf); |
525 | #endif | ||
526 | frame = frame_prevd(frame); | 578 | frame = frame_prevd(frame); |
527 | break; | 579 | break; |
528 | case FRAME_CP: | 580 | case FRAME_CP: |
@@ -533,8 +585,8 @@ static ptrdiff_t finderrfunc(lua_State *L) | |||
533 | break; | 585 | break; |
534 | case FRAME_PCALL: | 586 | case FRAME_PCALL: |
535 | case FRAME_PCALLH: | 587 | case FRAME_PCALLH: |
536 | if (frame_ftsz(frame) >= (ptrdiff_t)(2*sizeof(TValue))) /* xpcall? */ | 588 | if (frame_func(frame_prevd(frame))->c.ffid == FF_xpcall) |
537 | return savestack(L, frame-1); /* Point to xpcall's errorfunc. */ | 589 | return savestack(L, frame_prevd(frame)+1); /* xpcall's errorfunc. */ |
538 | return 0; | 590 | return 0; |
539 | default: | 591 | default: |
540 | lua_assert(0); | 592 | lua_assert(0); |
@@ -557,8 +609,9 @@ LJ_NOINLINE void lj_err_run(lua_State *L) | |||
557 | lj_err_throw(L, LUA_ERRERR); | 609 | lj_err_throw(L, LUA_ERRERR); |
558 | } | 610 | } |
559 | L->status = LUA_ERRERR; | 611 | L->status = LUA_ERRERR; |
560 | copyTV(L, top, top-1); | 612 | copyTV(L, top+LJ_FR2, top-1); |
561 | copyTV(L, top-1, errfunc); | 613 | copyTV(L, top-1, errfunc); |
614 | if (LJ_FR2) setnilV(top++); | ||
562 | L->top = top+1; | 615 | L->top = top+1; |
563 | lj_vm_call(L, top, 1+1); /* Stack: |errfunc|msg| -> |msg| */ | 616 | lj_vm_call(L, top, 1+1); /* Stack: |errfunc|msg| -> |msg| */ |
564 | } | 617 | } |
@@ -572,7 +625,7 @@ LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...) | |||
572 | va_list argp; | 625 | va_list argp; |
573 | va_start(argp, em); | 626 | va_start(argp, em); |
574 | if (curr_funcisL(L)) L->top = curr_topL(L); | 627 | if (curr_funcisL(L)) L->top = curr_topL(L); |
575 | msg = lj_str_pushvf(L, err2msg(em), argp); | 628 | msg = lj_strfmt_pushvf(L, err2msg(em), argp); |
576 | va_end(argp); | 629 | va_end(argp); |
577 | lj_debug_addloc(L, msg, L->base-1, NULL); | 630 | lj_debug_addloc(L, msg, L->base-1, NULL); |
578 | lj_err_run(L); | 631 | lj_err_run(L); |
@@ -590,11 +643,11 @@ LJ_NOINLINE void lj_err_lex(lua_State *L, GCstr *src, const char *tok, | |||
590 | { | 643 | { |
591 | char buff[LUA_IDSIZE]; | 644 | char buff[LUA_IDSIZE]; |
592 | const char *msg; | 645 | const char *msg; |
593 | lj_debug_shortname(buff, src); | 646 | lj_debug_shortname(buff, src, line); |
594 | msg = lj_str_pushvf(L, err2msg(em), argp); | 647 | msg = lj_strfmt_pushvf(L, err2msg(em), argp); |
595 | msg = lj_str_pushf(L, "%s:%d: %s", buff, line, msg); | 648 | msg = lj_strfmt_pushf(L, "%s:%d: %s", buff, line, msg); |
596 | if (tok) | 649 | if (tok) |
597 | lj_str_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tok); | 650 | lj_strfmt_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tok); |
598 | lj_err_throw(L, LUA_ERRSYNTAX); | 651 | lj_err_throw(L, LUA_ERRSYNTAX); |
599 | } | 652 | } |
600 | 653 | ||
@@ -633,8 +686,9 @@ LJ_NOINLINE void lj_err_optype_call(lua_State *L, TValue *o) | |||
633 | const BCIns *pc = cframe_Lpc(L); | 686 | const BCIns *pc = cframe_Lpc(L); |
634 | if (((ptrdiff_t)pc & FRAME_TYPE) != FRAME_LUA) { | 687 | if (((ptrdiff_t)pc & FRAME_TYPE) != FRAME_LUA) { |
635 | const char *tname = lj_typename(o); | 688 | const char *tname = lj_typename(o); |
689 | if (LJ_FR2) o++; | ||
636 | setframe_pc(o, pc); | 690 | setframe_pc(o, pc); |
637 | setframe_gc(o, obj2gco(L)); | 691 | setframe_gc(o, obj2gco(L), LJ_TTHREAD); |
638 | L->top = L->base = o+1; | 692 | L->top = L->base = o+1; |
639 | err_msgv(L, LJ_ERR_BADCALL, tname); | 693 | err_msgv(L, LJ_ERR_BADCALL, tname); |
640 | } | 694 | } |
@@ -649,13 +703,10 @@ LJ_NOINLINE void lj_err_callermsg(lua_State *L, const char *msg) | |||
649 | if (frame_islua(frame)) { | 703 | if (frame_islua(frame)) { |
650 | pframe = frame_prevl(frame); | 704 | pframe = frame_prevl(frame); |
651 | } else if (frame_iscont(frame)) { | 705 | } else if (frame_iscont(frame)) { |
652 | #if LJ_HASFFI | 706 | if (frame_iscont_fficb(frame)) { |
653 | if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK) { | ||
654 | pframe = frame; | 707 | pframe = frame; |
655 | frame = NULL; | 708 | frame = NULL; |
656 | } else | 709 | } else { |
657 | #endif | ||
658 | { | ||
659 | pframe = frame_prevd(frame); | 710 | pframe = frame_prevd(frame); |
660 | #if LJ_HASFFI | 711 | #if LJ_HASFFI |
661 | /* Remove frame for FFI metamethods. */ | 712 | /* Remove frame for FFI metamethods. */ |
@@ -678,7 +729,7 @@ LJ_NOINLINE void lj_err_callerv(lua_State *L, ErrMsg em, ...) | |||
678 | const char *msg; | 729 | const char *msg; |
679 | va_list argp; | 730 | va_list argp; |
680 | va_start(argp, em); | 731 | va_start(argp, em); |
681 | msg = lj_str_pushvf(L, err2msg(em), argp); | 732 | msg = lj_strfmt_pushvf(L, err2msg(em), argp); |
682 | va_end(argp); | 733 | va_end(argp); |
683 | lj_err_callermsg(L, msg); | 734 | lj_err_callermsg(L, msg); |
684 | } | 735 | } |
@@ -698,9 +749,9 @@ LJ_NORET LJ_NOINLINE static void err_argmsg(lua_State *L, int narg, | |||
698 | if (narg < 0 && narg > LUA_REGISTRYINDEX) | 749 | if (narg < 0 && narg > LUA_REGISTRYINDEX) |
699 | narg = (int)(L->top - L->base) + narg + 1; | 750 | narg = (int)(L->top - L->base) + narg + 1; |
700 | if (ftype && ftype[3] == 'h' && --narg == 0) /* Check for "method". */ | 751 | if (ftype && ftype[3] == 'h' && --narg == 0) /* Check for "method". */ |
701 | msg = lj_str_pushf(L, err2msg(LJ_ERR_BADSELF), fname, msg); | 752 | msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADSELF), fname, msg); |
702 | else | 753 | else |
703 | msg = lj_str_pushf(L, err2msg(LJ_ERR_BADARG), narg, fname, msg); | 754 | msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADARG), narg, fname, msg); |
704 | lj_err_callermsg(L, msg); | 755 | lj_err_callermsg(L, msg); |
705 | } | 756 | } |
706 | 757 | ||
@@ -710,7 +761,7 @@ LJ_NOINLINE void lj_err_argv(lua_State *L, int narg, ErrMsg em, ...) | |||
710 | const char *msg; | 761 | const char *msg; |
711 | va_list argp; | 762 | va_list argp; |
712 | va_start(argp, em); | 763 | va_start(argp, em); |
713 | msg = lj_str_pushvf(L, err2msg(em), argp); | 764 | msg = lj_strfmt_pushvf(L, err2msg(em), argp); |
714 | va_end(argp); | 765 | va_end(argp); |
715 | err_argmsg(L, narg, msg); | 766 | err_argmsg(L, narg, msg); |
716 | } | 767 | } |
@@ -740,7 +791,7 @@ LJ_NOINLINE void lj_err_argtype(lua_State *L, int narg, const char *xname) | |||
740 | TValue *o = narg < 0 ? L->top + narg : L->base + narg-1; | 791 | TValue *o = narg < 0 ? L->top + narg : L->base + narg-1; |
741 | tname = o < L->top ? lj_typename(o) : lj_obj_typename[0]; | 792 | tname = o < L->top ? lj_typename(o) : lj_obj_typename[0]; |
742 | } | 793 | } |
743 | msg = lj_str_pushf(L, err2msg(LJ_ERR_BADTYPE), xname, tname); | 794 | msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADTYPE), xname, tname); |
744 | err_argmsg(L, narg, msg); | 795 | err_argmsg(L, narg, msg); |
745 | } | 796 | } |
746 | 797 | ||
@@ -790,7 +841,7 @@ LUALIB_API int luaL_error(lua_State *L, const char *fmt, ...) | |||
790 | const char *msg; | 841 | const char *msg; |
791 | va_list argp; | 842 | va_list argp; |
792 | va_start(argp, fmt); | 843 | va_start(argp, fmt); |
793 | msg = lj_str_pushvf(L, fmt, argp); | 844 | msg = lj_strfmt_pushvf(L, fmt, argp); |
794 | va_end(argp); | 845 | va_end(argp); |
795 | lj_err_callermsg(L, msg); | 846 | lj_err_callermsg(L, msg); |
796 | return 0; /* unreachable */ | 847 | return 0; /* unreachable */ |
diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h index 35b5edd5..efb7c3f3 100644 --- a/src/lj_errmsg.h +++ b/src/lj_errmsg.h | |||
@@ -96,9 +96,7 @@ ERRDEF(STRPATX, "pattern too complex") | |||
96 | ERRDEF(STRCAPI, "invalid capture index") | 96 | ERRDEF(STRCAPI, "invalid capture index") |
97 | ERRDEF(STRCAPN, "too many captures") | 97 | ERRDEF(STRCAPN, "too many captures") |
98 | ERRDEF(STRCAPU, "unfinished capture") | 98 | ERRDEF(STRCAPU, "unfinished capture") |
99 | ERRDEF(STRFMTO, "invalid option " LUA_QL("%%%c") " to " LUA_QL("format")) | 99 | ERRDEF(STRFMT, "invalid option " LUA_QS " to " LUA_QL("format")) |
100 | ERRDEF(STRFMTR, "invalid format (repeated flags)") | ||
101 | ERRDEF(STRFMTW, "invalid format (width or precision too long)") | ||
102 | ERRDEF(STRGSRV, "invalid replacement value (a %s)") | 100 | ERRDEF(STRGSRV, "invalid replacement value (a %s)") |
103 | ERRDEF(BADMODN, "name conflict for module " LUA_QS) | 101 | ERRDEF(BADMODN, "name conflict for module " LUA_QS) |
104 | #if LJ_HASJIT | 102 | #if LJ_HASJIT |
@@ -118,7 +116,6 @@ ERRDEF(JITOPT, "unknown or malformed optimization flag " LUA_QS) | |||
118 | /* Lexer/parser errors. */ | 116 | /* Lexer/parser errors. */ |
119 | ERRDEF(XMODE, "attempt to load chunk with wrong mode") | 117 | ERRDEF(XMODE, "attempt to load chunk with wrong mode") |
120 | ERRDEF(XNEAR, "%s near " LUA_QS) | 118 | ERRDEF(XNEAR, "%s near " LUA_QS) |
121 | ERRDEF(XELEM, "lexical element too long") | ||
122 | ERRDEF(XLINES, "chunk has too many lines") | 119 | ERRDEF(XLINES, "chunk has too many lines") |
123 | ERRDEF(XLEVELS, "chunk has too many syntax levels") | 120 | ERRDEF(XLEVELS, "chunk has too many syntax levels") |
124 | ERRDEF(XNUMBER, "malformed number") | 121 | ERRDEF(XNUMBER, "malformed number") |
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index 1d428590..5282217f 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include "lj_dispatch.h" | 27 | #include "lj_dispatch.h" |
28 | #include "lj_vm.h" | 28 | #include "lj_vm.h" |
29 | #include "lj_strscan.h" | 29 | #include "lj_strscan.h" |
30 | #include "lj_strfmt.h" | ||
30 | 31 | ||
31 | /* Some local macros to save typing. Undef'd at the end. */ | 32 | /* Some local macros to save typing. Undef'd at the end. */ |
32 | #define IR(ref) (&J->cur.ir[(ref)]) | 33 | #define IR(ref) (&J->cur.ir[(ref)]) |
@@ -79,10 +80,7 @@ static GCstr *argv2str(jit_State *J, TValue *o) | |||
79 | GCstr *s; | 80 | GCstr *s; |
80 | if (!tvisnumber(o)) | 81 | if (!tvisnumber(o)) |
81 | lj_trace_err(J, LJ_TRERR_BADTYPE); | 82 | lj_trace_err(J, LJ_TRERR_BADTYPE); |
82 | if (tvisint(o)) | 83 | s = lj_strfmt_number(J->L, o); |
83 | s = lj_str_fromint(J->L, intV(o)); | ||
84 | else | ||
85 | s = lj_str_fromnum(J->L, &o->n); | ||
86 | setstrV(J->L, o, s); | 84 | setstrV(J->L, o, s); |
87 | return s; | 85 | return s; |
88 | } | 86 | } |
@@ -98,27 +96,90 @@ static ptrdiff_t results_wanted(jit_State *J) | |||
98 | return -1; | 96 | return -1; |
99 | } | 97 | } |
100 | 98 | ||
101 | /* Throw error for unsupported variant of fast function. */ | 99 | /* Trace stitching: add continuation below frame to start a new trace. */ |
102 | LJ_NORET static void recff_nyiu(jit_State *J) | 100 | static void recff_stitch(jit_State *J) |
103 | { | 101 | { |
104 | setfuncV(J->L, &J->errinfo, J->fn); | 102 | ASMFunction cont = lj_cont_stitch; |
105 | lj_trace_err_info(J, LJ_TRERR_NYIFFU); | 103 | lua_State *L = J->L; |
104 | TValue *base = L->base; | ||
105 | BCReg nslot = J->maxslot + 1 + LJ_FR2; | ||
106 | TValue *nframe = base + 1 + LJ_FR2; | ||
107 | const BCIns *pc = frame_pc(base-1); | ||
108 | TValue *pframe = frame_prevl(base-1); | ||
109 | |||
110 | /* Move func + args up in Lua stack and insert continuation. */ | ||
111 | memmove(&base[1], &base[-1-LJ_FR2], sizeof(TValue)*nslot); | ||
112 | setframe_ftsz(nframe, ((char *)nframe - (char *)pframe) + FRAME_CONT); | ||
113 | setcont(base-LJ_FR2, cont); | ||
114 | setframe_pc(base, pc); | ||
115 | setnilV(base-1-LJ_FR2); /* Incorrect, but rec_check_slots() won't run anymore. */ | ||
116 | L->base += 2 + LJ_FR2; | ||
117 | L->top += 2 + LJ_FR2; | ||
118 | |||
119 | /* Ditto for the IR. */ | ||
120 | memmove(&J->base[1], &J->base[-1-LJ_FR2], sizeof(TRef)*nslot); | ||
121 | #if LJ_FR2 | ||
122 | J->base[2] = TREF_FRAME; | ||
123 | J->base[-1] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont))); | ||
124 | J->base[0] = lj_ir_k64(J, IR_KNUM, u64ptr(pc)) | TREF_CONT; | ||
125 | #else | ||
126 | J->base[0] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT; | ||
127 | #endif | ||
128 | J->ktrace = tref_ref((J->base[-1-LJ_FR2] = lj_ir_ktrace(J))); | ||
129 | J->base += 2 + LJ_FR2; | ||
130 | J->baseslot += 2 + LJ_FR2; | ||
131 | J->framedepth++; | ||
132 | |||
133 | lj_record_stop(J, LJ_TRLINK_STITCH, 0); | ||
134 | |||
135 | /* Undo Lua stack changes. */ | ||
136 | memmove(&base[-1-LJ_FR2], &base[1], sizeof(TValue)*nslot); | ||
137 | setframe_pc(base-1, pc); | ||
138 | L->base -= 2 + LJ_FR2; | ||
139 | L->top -= 2 + LJ_FR2; | ||
106 | } | 140 | } |
107 | 141 | ||
108 | /* Fallback handler for all fast functions that are not recorded (yet). */ | 142 | /* Fallback handler for fast functions that are not recorded (yet). */ |
109 | static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd) | 143 | static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd) |
110 | { | 144 | { |
111 | setfuncV(J->L, &J->errinfo, J->fn); | 145 | if (J->cur.nins < (IRRef)J->param[JIT_P_minstitch] + REF_BASE) { |
112 | lj_trace_err_info(J, LJ_TRERR_NYIFF); | 146 | lj_trace_err_info(J, LJ_TRERR_TRACEUV); |
113 | UNUSED(rd); | 147 | } else { |
148 | /* Can only stitch from Lua call. */ | ||
149 | if (J->framedepth && frame_islua(J->L->base-1)) { | ||
150 | BCOp op = bc_op(*frame_pc(J->L->base-1)); | ||
151 | /* Stitched trace cannot start with *M op with variable # of args. */ | ||
152 | if (!(op == BC_CALLM || op == BC_CALLMT || | ||
153 | op == BC_RETM || op == BC_TSETM)) { | ||
154 | switch (J->fn->c.ffid) { | ||
155 | case FF_error: | ||
156 | case FF_debug_sethook: | ||
157 | case FF_jit_flush: | ||
158 | break; /* Don't stitch across special builtins. */ | ||
159 | default: | ||
160 | recff_stitch(J); /* Use trace stitching. */ | ||
161 | rd->nres = -1; | ||
162 | return; | ||
163 | } | ||
164 | } | ||
165 | } | ||
166 | /* Otherwise stop trace and return to interpreter. */ | ||
167 | lj_record_stop(J, LJ_TRLINK_RETURN, 0); | ||
168 | rd->nres = -1; | ||
169 | } | ||
114 | } | 170 | } |
115 | 171 | ||
116 | /* C functions can have arbitrary side-effects and are not recorded (yet). */ | 172 | /* Fallback handler for unsupported variants of fast functions. */ |
117 | static void LJ_FASTCALL recff_c(jit_State *J, RecordFFData *rd) | 173 | #define recff_nyiu recff_nyi |
174 | |||
175 | /* Must stop the trace for classic C functions with arbitrary side-effects. */ | ||
176 | #define recff_c recff_nyi | ||
177 | |||
178 | /* Emit BUFHDR for the global temporary buffer. */ | ||
179 | static TRef recff_bufhdr(jit_State *J) | ||
118 | { | 180 | { |
119 | setfuncV(J->L, &J->errinfo, J->fn); | 181 | return emitir(IRT(IR_BUFHDR, IRT_PGC), |
120 | lj_trace_err_info(J, LJ_TRERR_NYICF); | 182 | lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET); |
121 | UNUSED(rd); | ||
122 | } | 183 | } |
123 | 184 | ||
124 | /* -- Base library fast functions ----------------------------------------- */ | 185 | /* -- Base library fast functions ----------------------------------------- */ |
@@ -135,7 +196,7 @@ static void LJ_FASTCALL recff_type(jit_State *J, RecordFFData *rd) | |||
135 | uint32_t t; | 196 | uint32_t t; |
136 | if (tvisnumber(&rd->argv[0])) | 197 | if (tvisnumber(&rd->argv[0])) |
137 | t = ~LJ_TNUMX; | 198 | t = ~LJ_TNUMX; |
138 | else if (LJ_64 && tvislightud(&rd->argv[0])) | 199 | else if (LJ_64 && !LJ_GC64 && tvislightud(&rd->argv[0])) |
139 | t = ~LJ_TLIGHTUD; | 200 | t = ~LJ_TLIGHTUD; |
140 | else | 201 | else |
141 | t = ~itype(&rd->argv[0]); | 202 | t = ~itype(&rd->argv[0]); |
@@ -167,7 +228,7 @@ static void LJ_FASTCALL recff_setmetatable(jit_State *J, RecordFFData *rd) | |||
167 | ix.tab = tr; | 228 | ix.tab = tr; |
168 | copyTV(J->L, &ix.tabv, &rd->argv[0]); | 229 | copyTV(J->L, &ix.tabv, &rd->argv[0]); |
169 | lj_record_mm_lookup(J, &ix, MM_metatable); /* Guard for no __metatable. */ | 230 | lj_record_mm_lookup(J, &ix, MM_metatable); /* Guard for no __metatable. */ |
170 | fref = emitir(IRT(IR_FREF, IRT_P32), tr, IRFL_TAB_META); | 231 | fref = emitir(IRT(IR_FREF, IRT_PGC), tr, IRFL_TAB_META); |
171 | mtref = tref_isnil(mt) ? lj_ir_knull(J, IRT_TAB) : mt; | 232 | mtref = tref_isnil(mt) ? lj_ir_knull(J, IRT_TAB) : mt; |
172 | emitir(IRT(IR_FSTORE, IRT_TAB), fref, mtref); | 233 | emitir(IRT(IR_FSTORE, IRT_TAB), fref, mtref); |
173 | if (!tref_isnil(mt)) | 234 | if (!tref_isnil(mt)) |
@@ -233,7 +294,7 @@ int32_t lj_ffrecord_select_mode(jit_State *J, TRef tr, TValue *tv) | |||
233 | if (strV(tv)->len == 1) { | 294 | if (strV(tv)->len == 1) { |
234 | emitir(IRTG(IR_EQ, IRT_STR), tr, lj_ir_kstr(J, strV(tv))); | 295 | emitir(IRTG(IR_EQ, IRT_STR), tr, lj_ir_kstr(J, strV(tv))); |
235 | } else { | 296 | } else { |
236 | TRef trptr = emitir(IRT(IR_STRREF, IRT_P32), tr, lj_ir_kint(J, 0)); | 297 | TRef trptr = emitir(IRT(IR_STRREF, IRT_PGC), tr, lj_ir_kint(J, 0)); |
237 | TRef trchar = emitir(IRT(IR_XLOAD, IRT_U8), trptr, IRXLOAD_READONLY); | 298 | TRef trchar = emitir(IRT(IR_XLOAD, IRT_U8), trptr, IRXLOAD_READONLY); |
238 | emitir(IRTG(IR_EQ, IRT_INT), trchar, lj_ir_kint(J, '#')); | 299 | emitir(IRTG(IR_EQ, IRT_INT), trchar, lj_ir_kint(J, '#')); |
239 | } | 300 | } |
@@ -263,7 +324,8 @@ static void LJ_FASTCALL recff_select(jit_State *J, RecordFFData *rd) | |||
263 | J->base[i] = J->base[start+i]; | 324 | J->base[i] = J->base[start+i]; |
264 | } /* else: Interpreter will throw. */ | 325 | } /* else: Interpreter will throw. */ |
265 | } else { | 326 | } else { |
266 | recff_nyiu(J); | 327 | recff_nyiu(J, rd); |
328 | return; | ||
267 | } | 329 | } |
268 | } /* else: Interpreter will throw. */ | 330 | } /* else: Interpreter will throw. */ |
269 | } | 331 | } |
@@ -274,14 +336,18 @@ static void LJ_FASTCALL recff_tonumber(jit_State *J, RecordFFData *rd) | |||
274 | TRef base = J->base[1]; | 336 | TRef base = J->base[1]; |
275 | if (tr && !tref_isnil(base)) { | 337 | if (tr && !tref_isnil(base)) { |
276 | base = lj_opt_narrow_toint(J, base); | 338 | base = lj_opt_narrow_toint(J, base); |
277 | if (!tref_isk(base) || IR(tref_ref(base))->i != 10) | 339 | if (!tref_isk(base) || IR(tref_ref(base))->i != 10) { |
278 | recff_nyiu(J); | 340 | recff_nyiu(J, rd); |
341 | return; | ||
342 | } | ||
279 | } | 343 | } |
280 | if (tref_isnumber_str(tr)) { | 344 | if (tref_isnumber_str(tr)) { |
281 | if (tref_isstr(tr)) { | 345 | if (tref_isstr(tr)) { |
282 | TValue tmp; | 346 | TValue tmp; |
283 | if (!lj_strscan_num(strV(&rd->argv[0]), &tmp)) | 347 | if (!lj_strscan_num(strV(&rd->argv[0]), &tmp)) { |
284 | recff_nyiu(J); /* Would need an inverted STRTO for this case. */ | 348 | recff_nyiu(J, rd); /* Would need an inverted STRTO for this case. */ |
349 | return; | ||
350 | } | ||
285 | tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); | 351 | tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); |
286 | } | 352 | } |
287 | #if LJ_HASFFI | 353 | #if LJ_HASFFI |
@@ -313,10 +379,10 @@ static int recff_metacall(jit_State *J, RecordFFData *rd, MMS mm) | |||
313 | int errcode; | 379 | int errcode; |
314 | TValue argv0; | 380 | TValue argv0; |
315 | /* Temporarily insert metamethod below object. */ | 381 | /* Temporarily insert metamethod below object. */ |
316 | J->base[1] = J->base[0]; | 382 | J->base[1+LJ_FR2] = J->base[0]; |
317 | J->base[0] = ix.mobj; | 383 | J->base[0] = ix.mobj; |
318 | copyTV(J->L, &argv0, &rd->argv[0]); | 384 | copyTV(J->L, &argv0, &rd->argv[0]); |
319 | copyTV(J->L, &rd->argv[1], &rd->argv[0]); | 385 | copyTV(J->L, &rd->argv[1+LJ_FR2], &rd->argv[0]); |
320 | copyTV(J->L, &rd->argv[0], &ix.mobjv); | 386 | copyTV(J->L, &rd->argv[0], &ix.mobjv); |
321 | /* Need to protect lj_record_tailcall because it may throw. */ | 387 | /* Need to protect lj_record_tailcall because it may throw. */ |
322 | errcode = lj_vm_cpcall(J->L, NULL, J, recff_metacall_cp); | 388 | errcode = lj_vm_cpcall(J->L, NULL, J, recff_metacall_cp); |
@@ -336,13 +402,15 @@ static void LJ_FASTCALL recff_tostring(jit_State *J, RecordFFData *rd) | |||
336 | if (tref_isstr(tr)) { | 402 | if (tref_isstr(tr)) { |
337 | /* Ignore __tostring in the string base metatable. */ | 403 | /* Ignore __tostring in the string base metatable. */ |
338 | /* Pass on result in J->base[0]. */ | 404 | /* Pass on result in J->base[0]. */ |
339 | } else if (!recff_metacall(J, rd, MM_tostring)) { | 405 | } else if (tr && !recff_metacall(J, rd, MM_tostring)) { |
340 | if (tref_isnumber(tr)) { | 406 | if (tref_isnumber(tr)) { |
341 | J->base[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0); | 407 | J->base[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr, |
408 | tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT); | ||
342 | } else if (tref_ispri(tr)) { | 409 | } else if (tref_ispri(tr)) { |
343 | J->base[0] = lj_ir_kstr(J, strV(&J->fn->c.upvalue[tref_type(tr)])); | 410 | J->base[0] = lj_ir_kstr(J, lj_strfmt_obj(J->L, &rd->argv[0])); |
344 | } else { | 411 | } else { |
345 | recff_nyiu(J); | 412 | recff_nyiu(J, rd); |
413 | return; | ||
346 | } | 414 | } |
347 | } | 415 | } |
348 | } | 416 | } |
@@ -364,15 +432,15 @@ static void LJ_FASTCALL recff_ipairs_aux(jit_State *J, RecordFFData *rd) | |||
364 | } /* else: Interpreter will throw. */ | 432 | } /* else: Interpreter will throw. */ |
365 | } | 433 | } |
366 | 434 | ||
367 | static void LJ_FASTCALL recff_ipairs(jit_State *J, RecordFFData *rd) | 435 | static void LJ_FASTCALL recff_xpairs(jit_State *J, RecordFFData *rd) |
368 | { | 436 | { |
369 | TRef tr = J->base[0]; | 437 | TRef tr = J->base[0]; |
370 | if (!((LJ_52 || (LJ_HASFFI && tref_iscdata(tr))) && | 438 | if (!((LJ_52 || (LJ_HASFFI && tref_iscdata(tr))) && |
371 | recff_metacall(J, rd, MM_ipairs))) { | 439 | recff_metacall(J, rd, MM_pairs + rd->data))) { |
372 | if (tref_istab(tr)) { | 440 | if (tref_istab(tr)) { |
373 | J->base[0] = lj_ir_kfunc(J, funcV(&J->fn->c.upvalue[0])); | 441 | J->base[0] = lj_ir_kfunc(J, funcV(&J->fn->c.upvalue[0])); |
374 | J->base[1] = tr; | 442 | J->base[1] = tr; |
375 | J->base[2] = lj_ir_kint(J, 0); | 443 | J->base[2] = rd->data ? lj_ir_kint(J, 0) : TREF_NIL; |
376 | rd->nres = 3; | 444 | rd->nres = 3; |
377 | } /* else: Interpreter will throw. */ | 445 | } /* else: Interpreter will throw. */ |
378 | } | 446 | } |
@@ -381,6 +449,10 @@ static void LJ_FASTCALL recff_ipairs(jit_State *J, RecordFFData *rd) | |||
381 | static void LJ_FASTCALL recff_pcall(jit_State *J, RecordFFData *rd) | 449 | static void LJ_FASTCALL recff_pcall(jit_State *J, RecordFFData *rd) |
382 | { | 450 | { |
383 | if (J->maxslot >= 1) { | 451 | if (J->maxslot >= 1) { |
452 | #if LJ_FR2 | ||
453 | /* Shift function arguments up. */ | ||
454 | memmove(J->base + 1, J->base, sizeof(TRef) * J->maxslot); | ||
455 | #endif | ||
384 | lj_record_call(J, 0, J->maxslot - 1); | 456 | lj_record_call(J, 0, J->maxslot - 1); |
385 | rd->nres = -1; /* Pending call. */ | 457 | rd->nres = -1; /* Pending call. */ |
386 | } /* else: Interpreter will throw. */ | 458 | } /* else: Interpreter will throw. */ |
@@ -406,6 +478,10 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd) | |||
406 | copyTV(J->L, &argv1, &rd->argv[1]); | 478 | copyTV(J->L, &argv1, &rd->argv[1]); |
407 | copyTV(J->L, &rd->argv[0], &argv1); | 479 | copyTV(J->L, &rd->argv[0], &argv1); |
408 | copyTV(J->L, &rd->argv[1], &argv0); | 480 | copyTV(J->L, &rd->argv[1], &argv0); |
481 | #if LJ_FR2 | ||
482 | /* Shift function arguments up. */ | ||
483 | memmove(J->base + 2, J->base + 1, sizeof(TRef) * (J->maxslot-1)); | ||
484 | #endif | ||
409 | /* Need to protect lj_record_call because it may throw. */ | 485 | /* Need to protect lj_record_call because it may throw. */ |
410 | errcode = lj_vm_cpcall(J->L, NULL, J, recff_xpcall_cp); | 486 | errcode = lj_vm_cpcall(J->L, NULL, J, recff_xpcall_cp); |
411 | /* Always undo Lua stack swap to avoid confusing the interpreter. */ | 487 | /* Always undo Lua stack swap to avoid confusing the interpreter. */ |
@@ -417,12 +493,24 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd) | |||
417 | } /* else: Interpreter will throw. */ | 493 | } /* else: Interpreter will throw. */ |
418 | } | 494 | } |
419 | 495 | ||
496 | static void LJ_FASTCALL recff_getfenv(jit_State *J, RecordFFData *rd) | ||
497 | { | ||
498 | TRef tr = J->base[0]; | ||
499 | /* Only support getfenv(0) for now. */ | ||
500 | if (tref_isint(tr) && tref_isk(tr) && IR(tref_ref(tr))->i == 0) { | ||
501 | TRef trl = emitir(IRT(IR_LREF, IRT_THREAD), 0, 0); | ||
502 | J->base[0] = emitir(IRT(IR_FLOAD, IRT_TAB), trl, IRFL_THREAD_ENV); | ||
503 | return; | ||
504 | } | ||
505 | recff_nyiu(J, rd); | ||
506 | } | ||
507 | |||
420 | /* -- Math library fast functions ----------------------------------------- */ | 508 | /* -- Math library fast functions ----------------------------------------- */ |
421 | 509 | ||
422 | static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd) | 510 | static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd) |
423 | { | 511 | { |
424 | TRef tr = lj_ir_tonum(J, J->base[0]); | 512 | TRef tr = lj_ir_tonum(J, J->base[0]); |
425 | J->base[0] = emitir(IRTN(IR_ABS), tr, lj_ir_knum_abs(J)); | 513 | J->base[0] = emitir(IRTN(IR_ABS), tr, lj_ir_ksimd(J, LJ_KSIMD_ABS)); |
426 | UNUSED(rd); | 514 | UNUSED(rd); |
427 | } | 515 | } |
428 | 516 | ||
@@ -529,14 +617,6 @@ static void LJ_FASTCALL recff_math_modf(jit_State *J, RecordFFData *rd) | |||
529 | rd->nres = 2; | 617 | rd->nres = 2; |
530 | } | 618 | } |
531 | 619 | ||
532 | static void LJ_FASTCALL recff_math_degrad(jit_State *J, RecordFFData *rd) | ||
533 | { | ||
534 | TRef tr = lj_ir_tonum(J, J->base[0]); | ||
535 | TRef trm = lj_ir_knum(J, numV(&J->fn->c.upvalue[0])); | ||
536 | J->base[0] = emitir(IRTN(IR_MUL), tr, trm); | ||
537 | UNUSED(rd); | ||
538 | } | ||
539 | |||
540 | static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd) | 620 | static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd) |
541 | { | 621 | { |
542 | J->base[0] = lj_opt_narrow_pow(J, J->base[0], J->base[1], | 622 | J->base[0] = lj_opt_narrow_pow(J, J->base[0], J->base[1], |
@@ -591,48 +671,105 @@ static void LJ_FASTCALL recff_math_random(jit_State *J, RecordFFData *rd) | |||
591 | 671 | ||
592 | /* -- Bit library fast functions ------------------------------------------ */ | 672 | /* -- Bit library fast functions ------------------------------------------ */ |
593 | 673 | ||
594 | /* Record unary bit.tobit, bit.bnot, bit.bswap. */ | 674 | /* Record bit.tobit. */ |
675 | static void LJ_FASTCALL recff_bit_tobit(jit_State *J, RecordFFData *rd) | ||
676 | { | ||
677 | TRef tr = J->base[0]; | ||
678 | #if LJ_HASFFI | ||
679 | if (tref_iscdata(tr)) { recff_bit64_tobit(J, rd); return; } | ||
680 | #endif | ||
681 | J->base[0] = lj_opt_narrow_tobit(J, tr); | ||
682 | UNUSED(rd); | ||
683 | } | ||
684 | |||
685 | /* Record unary bit.bnot, bit.bswap. */ | ||
595 | static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd) | 686 | static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd) |
596 | { | 687 | { |
597 | TRef tr = lj_opt_narrow_tobit(J, J->base[0]); | 688 | #if LJ_HASFFI |
598 | J->base[0] = (rd->data == IR_TOBIT) ? tr : emitir(IRTI(rd->data), tr, 0); | 689 | if (recff_bit64_unary(J, rd)) |
690 | return; | ||
691 | #endif | ||
692 | J->base[0] = emitir(IRTI(rd->data), lj_opt_narrow_tobit(J, J->base[0]), 0); | ||
599 | } | 693 | } |
600 | 694 | ||
601 | /* Record N-ary bit.band, bit.bor, bit.bxor. */ | 695 | /* Record N-ary bit.band, bit.bor, bit.bxor. */ |
602 | static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd) | 696 | static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd) |
603 | { | 697 | { |
604 | TRef tr = lj_opt_narrow_tobit(J, J->base[0]); | 698 | #if LJ_HASFFI |
605 | uint32_t op = rd->data; | 699 | if (recff_bit64_nary(J, rd)) |
606 | BCReg i; | 700 | return; |
607 | for (i = 1; J->base[i] != 0; i++) | 701 | #endif |
608 | tr = emitir(IRTI(op), tr, lj_opt_narrow_tobit(J, J->base[i])); | 702 | { |
609 | J->base[0] = tr; | 703 | TRef tr = lj_opt_narrow_tobit(J, J->base[0]); |
704 | uint32_t ot = IRTI(rd->data); | ||
705 | BCReg i; | ||
706 | for (i = 1; J->base[i] != 0; i++) | ||
707 | tr = emitir(ot, tr, lj_opt_narrow_tobit(J, J->base[i])); | ||
708 | J->base[0] = tr; | ||
709 | } | ||
610 | } | 710 | } |
611 | 711 | ||
612 | /* Record bit shifts. */ | 712 | /* Record bit shifts. */ |
613 | static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd) | 713 | static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd) |
614 | { | 714 | { |
615 | TRef tr = lj_opt_narrow_tobit(J, J->base[0]); | 715 | #if LJ_HASFFI |
616 | TRef tsh = lj_opt_narrow_tobit(J, J->base[1]); | 716 | if (recff_bit64_shift(J, rd)) |
617 | IROp op = (IROp)rd->data; | 717 | return; |
618 | if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && | 718 | #endif |
619 | !tref_isk(tsh)) | 719 | { |
620 | tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31)); | 720 | TRef tr = lj_opt_narrow_tobit(J, J->base[0]); |
721 | TRef tsh = lj_opt_narrow_tobit(J, J->base[1]); | ||
722 | IROp op = (IROp)rd->data; | ||
723 | if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && | ||
724 | !tref_isk(tsh)) | ||
725 | tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31)); | ||
621 | #ifdef LJ_TARGET_UNIFYROT | 726 | #ifdef LJ_TARGET_UNIFYROT |
622 | if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) { | 727 | if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) { |
623 | op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR; | 728 | op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR; |
624 | tsh = emitir(IRTI(IR_NEG), tsh, tsh); | 729 | tsh = emitir(IRTI(IR_NEG), tsh, tsh); |
730 | } | ||
731 | #endif | ||
732 | J->base[0] = emitir(IRTI(op), tr, tsh); | ||
625 | } | 733 | } |
734 | } | ||
735 | |||
736 | static void LJ_FASTCALL recff_bit_tohex(jit_State *J, RecordFFData *rd) | ||
737 | { | ||
738 | #if LJ_HASFFI | ||
739 | TRef hdr = recff_bufhdr(J); | ||
740 | TRef tr = recff_bit64_tohex(J, rd, hdr); | ||
741 | J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); | ||
742 | #else | ||
743 | recff_nyiu(J, rd); /* Don't bother working around this NYI. */ | ||
626 | #endif | 744 | #endif |
627 | J->base[0] = emitir(IRTI(op), tr, tsh); | ||
628 | } | 745 | } |
629 | 746 | ||
630 | /* -- String library fast functions --------------------------------------- */ | 747 | /* -- String library fast functions --------------------------------------- */ |
631 | 748 | ||
632 | static void LJ_FASTCALL recff_string_len(jit_State *J, RecordFFData *rd) | 749 | /* Specialize to relative starting position for string. */ |
750 | static TRef recff_string_start(jit_State *J, GCstr *s, int32_t *st, TRef tr, | ||
751 | TRef trlen, TRef tr0) | ||
633 | { | 752 | { |
634 | J->base[0] = emitir(IRTI(IR_FLOAD), lj_ir_tostr(J, J->base[0]), IRFL_STR_LEN); | 753 | int32_t start = *st; |
635 | UNUSED(rd); | 754 | if (start < 0) { |
755 | emitir(IRTGI(IR_LT), tr, tr0); | ||
756 | tr = emitir(IRTI(IR_ADD), trlen, tr); | ||
757 | start = start + (int32_t)s->len; | ||
758 | emitir(start < 0 ? IRTGI(IR_LT) : IRTGI(IR_GE), tr, tr0); | ||
759 | if (start < 0) { | ||
760 | tr = tr0; | ||
761 | start = 0; | ||
762 | } | ||
763 | } else if (start == 0) { | ||
764 | emitir(IRTGI(IR_EQ), tr, tr0); | ||
765 | tr = tr0; | ||
766 | } else { | ||
767 | tr = emitir(IRTI(IR_ADD), tr, lj_ir_kint(J, -1)); | ||
768 | emitir(IRTGI(IR_GE), tr, tr0); | ||
769 | start--; | ||
770 | } | ||
771 | *st = start; | ||
772 | return tr; | ||
636 | } | 773 | } |
637 | 774 | ||
638 | /* Handle string.byte (rd->data = 0) and string.sub (rd->data = 1). */ | 775 | /* Handle string.byte (rd->data = 0) and string.sub (rd->data = 1). */ |
@@ -679,39 +816,21 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd) | |||
679 | } else if ((MSize)end <= str->len) { | 816 | } else if ((MSize)end <= str->len) { |
680 | emitir(IRTGI(IR_ULE), trend, trlen); | 817 | emitir(IRTGI(IR_ULE), trend, trlen); |
681 | } else { | 818 | } else { |
682 | emitir(IRTGI(IR_GT), trend, trlen); | 819 | emitir(IRTGI(IR_UGT), trend, trlen); |
683 | end = (int32_t)str->len; | 820 | end = (int32_t)str->len; |
684 | trend = trlen; | 821 | trend = trlen; |
685 | } | 822 | } |
686 | if (start < 0) { | 823 | trstart = recff_string_start(J, str, &start, trstart, trlen, tr0); |
687 | emitir(IRTGI(IR_LT), trstart, tr0); | ||
688 | trstart = emitir(IRTI(IR_ADD), trlen, trstart); | ||
689 | start = start+(int32_t)str->len; | ||
690 | emitir(start < 0 ? IRTGI(IR_LT) : IRTGI(IR_GE), trstart, tr0); | ||
691 | if (start < 0) { | ||
692 | trstart = tr0; | ||
693 | start = 0; | ||
694 | } | ||
695 | } else { | ||
696 | if (start == 0) { | ||
697 | emitir(IRTGI(IR_EQ), trstart, tr0); | ||
698 | trstart = tr0; | ||
699 | } else { | ||
700 | trstart = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, -1)); | ||
701 | emitir(IRTGI(IR_GE), trstart, tr0); | ||
702 | start--; | ||
703 | } | ||
704 | } | ||
705 | if (rd->data) { /* Return string.sub result. */ | 824 | if (rd->data) { /* Return string.sub result. */ |
706 | if (end - start >= 0) { | 825 | if (end - start >= 0) { |
707 | /* Also handle empty range here, to avoid extra traces. */ | 826 | /* Also handle empty range here, to avoid extra traces. */ |
708 | TRef trptr, trslen = emitir(IRTI(IR_SUB), trend, trstart); | 827 | TRef trptr, trslen = emitir(IRTI(IR_SUB), trend, trstart); |
709 | emitir(IRTGI(IR_GE), trslen, tr0); | 828 | emitir(IRTGI(IR_GE), trslen, tr0); |
710 | trptr = emitir(IRT(IR_STRREF, IRT_P32), trstr, trstart); | 829 | trptr = emitir(IRT(IR_STRREF, IRT_PGC), trstr, trstart); |
711 | J->base[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen); | 830 | J->base[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen); |
712 | } else { /* Range underflow: return empty string. */ | 831 | } else { /* Range underflow: return empty string. */ |
713 | emitir(IRTGI(IR_LT), trend, trstart); | 832 | emitir(IRTGI(IR_LT), trend, trstart); |
714 | J->base[0] = lj_ir_kstr(J, lj_str_new(J->L, strdata(str), 0)); | 833 | J->base[0] = lj_ir_kstr(J, &J2G(J)->strempty); |
715 | } | 834 | } |
716 | } else { /* Return string.byte result(s). */ | 835 | } else { /* Return string.byte result(s). */ |
717 | ptrdiff_t i, len = end - start; | 836 | ptrdiff_t i, len = end - start; |
@@ -723,7 +842,7 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd) | |||
723 | rd->nres = len; | 842 | rd->nres = len; |
724 | for (i = 0; i < len; i++) { | 843 | for (i = 0; i < len; i++) { |
725 | TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, (int32_t)i)); | 844 | TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, (int32_t)i)); |
726 | tmp = emitir(IRT(IR_STRREF, IRT_P32), trstr, tmp); | 845 | tmp = emitir(IRT(IR_STRREF, IRT_PGC), trstr, tmp); |
727 | J->base[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, IRXLOAD_READONLY); | 846 | J->base[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, IRXLOAD_READONLY); |
728 | } | 847 | } |
729 | } else { /* Empty range or range underflow: return no results. */ | 848 | } else { /* Empty range or range underflow: return no results. */ |
@@ -733,48 +852,203 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd) | |||
733 | } | 852 | } |
734 | } | 853 | } |
735 | 854 | ||
736 | /* -- Table library fast functions ---------------------------------------- */ | 855 | static void LJ_FASTCALL recff_string_char(jit_State *J, RecordFFData *rd) |
737 | |||
738 | static void LJ_FASTCALL recff_table_getn(jit_State *J, RecordFFData *rd) | ||
739 | { | 856 | { |
740 | if (tref_istab(J->base[0])) | 857 | TRef k255 = lj_ir_kint(J, 255); |
741 | J->base[0] = lj_ir_call(J, IRCALL_lj_tab_len, J->base[0]); | 858 | BCReg i; |
742 | /* else: Interpreter will throw. */ | 859 | for (i = 0; J->base[i] != 0; i++) { /* Convert char values to strings. */ |
860 | TRef tr = lj_opt_narrow_toint(J, J->base[i]); | ||
861 | emitir(IRTGI(IR_ULE), tr, k255); | ||
862 | J->base[i] = emitir(IRT(IR_TOSTR, IRT_STR), tr, IRTOSTR_CHAR); | ||
863 | } | ||
864 | if (i > 1) { /* Concatenate the strings, if there's more than one. */ | ||
865 | TRef hdr = recff_bufhdr(J), tr = hdr; | ||
866 | for (i = 0; J->base[i] != 0; i++) | ||
867 | tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, J->base[i]); | ||
868 | J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); | ||
869 | } else if (i == 0) { | ||
870 | J->base[0] = lj_ir_kstr(J, &J2G(J)->strempty); | ||
871 | } | ||
743 | UNUSED(rd); | 872 | UNUSED(rd); |
744 | } | 873 | } |
745 | 874 | ||
746 | static void LJ_FASTCALL recff_table_remove(jit_State *J, RecordFFData *rd) | 875 | static void LJ_FASTCALL recff_string_rep(jit_State *J, RecordFFData *rd) |
747 | { | 876 | { |
748 | TRef tab = J->base[0]; | 877 | TRef str = lj_ir_tostr(J, J->base[0]); |
749 | rd->nres = 0; | 878 | TRef rep = lj_opt_narrow_toint(J, J->base[1]); |
750 | if (tref_istab(tab)) { | 879 | TRef hdr, tr, str2 = 0; |
751 | if (tref_isnil(J->base[1])) { /* Simple pop: t[#t] = nil */ | 880 | if (!tref_isnil(J->base[2])) { |
752 | TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, tab); | 881 | TRef sep = lj_ir_tostr(J, J->base[2]); |
753 | GCtab *t = tabV(&rd->argv[0]); | 882 | int32_t vrep = argv2int(J, &rd->argv[1]); |
754 | MSize len = lj_tab_len(t); | 883 | emitir(IRTGI(vrep > 1 ? IR_GT : IR_LE), rep, lj_ir_kint(J, 1)); |
755 | emitir(IRTGI(len ? IR_NE : IR_EQ), trlen, lj_ir_kint(J, 0)); | 884 | if (vrep > 1) { |
756 | if (len) { | 885 | TRef hdr2 = recff_bufhdr(J); |
757 | RecordIndex ix; | 886 | TRef tr2 = emitir(IRT(IR_BUFPUT, IRT_PGC), hdr2, sep); |
758 | ix.tab = tab; | 887 | tr2 = emitir(IRT(IR_BUFPUT, IRT_PGC), tr2, str); |
759 | ix.key = trlen; | 888 | str2 = emitir(IRT(IR_BUFSTR, IRT_STR), tr2, hdr2); |
760 | settabV(J->L, &ix.tabv, t); | 889 | } |
761 | setintV(&ix.keyv, len); | 890 | } |
762 | ix.idxchain = 0; | 891 | tr = hdr = recff_bufhdr(J); |
763 | if (results_wanted(J) != 0) { /* Specialize load only if needed. */ | 892 | if (str2) { |
764 | ix.val = 0; | 893 | tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, str); |
765 | J->base[0] = lj_record_idx(J, &ix); /* Load previous value. */ | 894 | str = str2; |
766 | rd->nres = 1; | 895 | rep = emitir(IRTI(IR_ADD), rep, lj_ir_kint(J, -1)); |
767 | /* Assumes ix.key/ix.tab is not modified for raw lj_record_idx(). */ | 896 | } |
768 | } | 897 | tr = lj_ir_call(J, IRCALL_lj_buf_putstr_rep, tr, str, rep); |
769 | ix.val = TREF_NIL; | 898 | J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); |
770 | lj_record_idx(J, &ix); /* Remove value. */ | 899 | } |
900 | |||
901 | static void LJ_FASTCALL recff_string_op(jit_State *J, RecordFFData *rd) | ||
902 | { | ||
903 | TRef str = lj_ir_tostr(J, J->base[0]); | ||
904 | TRef hdr = recff_bufhdr(J); | ||
905 | TRef tr = lj_ir_call(J, rd->data, hdr, str); | ||
906 | J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); | ||
907 | } | ||
908 | |||
909 | static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd) | ||
910 | { | ||
911 | TRef trstr = lj_ir_tostr(J, J->base[0]); | ||
912 | TRef trpat = lj_ir_tostr(J, J->base[1]); | ||
913 | TRef trlen = emitir(IRTI(IR_FLOAD), trstr, IRFL_STR_LEN); | ||
914 | TRef tr0 = lj_ir_kint(J, 0); | ||
915 | TRef trstart; | ||
916 | GCstr *str = argv2str(J, &rd->argv[0]); | ||
917 | GCstr *pat = argv2str(J, &rd->argv[1]); | ||
918 | int32_t start; | ||
919 | J->needsnap = 1; | ||
920 | if (tref_isnil(J->base[2])) { | ||
921 | trstart = lj_ir_kint(J, 1); | ||
922 | start = 1; | ||
923 | } else { | ||
924 | trstart = lj_opt_narrow_toint(J, J->base[2]); | ||
925 | start = argv2int(J, &rd->argv[2]); | ||
926 | } | ||
927 | trstart = recff_string_start(J, str, &start, trstart, trlen, tr0); | ||
928 | if ((MSize)start <= str->len) { | ||
929 | emitir(IRTGI(IR_ULE), trstart, trlen); | ||
930 | } else { | ||
931 | emitir(IRTGI(IR_UGT), trstart, trlen); | ||
932 | #if LJ_52 | ||
933 | J->base[0] = TREF_NIL; | ||
934 | return; | ||
935 | #else | ||
936 | trstart = trlen; | ||
937 | start = str->len; | ||
938 | #endif | ||
939 | } | ||
940 | /* Fixed arg or no pattern matching chars? (Specialized to pattern string.) */ | ||
941 | if ((J->base[2] && tref_istruecond(J->base[3])) || | ||
942 | (emitir(IRTG(IR_EQ, IRT_STR), trpat, lj_ir_kstr(J, pat)), | ||
943 | !lj_str_haspattern(pat))) { /* Search for fixed string. */ | ||
944 | TRef trsptr = emitir(IRT(IR_STRREF, IRT_PGC), trstr, trstart); | ||
945 | TRef trpptr = emitir(IRT(IR_STRREF, IRT_PGC), trpat, tr0); | ||
946 | TRef trslen = emitir(IRTI(IR_SUB), trlen, trstart); | ||
947 | TRef trplen = emitir(IRTI(IR_FLOAD), trpat, IRFL_STR_LEN); | ||
948 | TRef tr = lj_ir_call(J, IRCALL_lj_str_find, trsptr, trpptr, trslen, trplen); | ||
949 | TRef trp0 = lj_ir_kkptr(J, NULL); | ||
950 | if (lj_str_find(strdata(str)+(MSize)start, strdata(pat), | ||
951 | str->len-(MSize)start, pat->len)) { | ||
952 | TRef pos; | ||
953 | emitir(IRTG(IR_NE, IRT_PGC), tr, trp0); | ||
954 | /* Recompute offset. trsptr may not point into trstr after folding. */ | ||
955 | pos = emitir(IRTI(IR_ADD), emitir(IRTI(IR_SUB), tr, trsptr), trstart); | ||
956 | J->base[0] = emitir(IRTI(IR_ADD), pos, lj_ir_kint(J, 1)); | ||
957 | J->base[1] = emitir(IRTI(IR_ADD), pos, trplen); | ||
958 | rd->nres = 2; | ||
959 | } else { | ||
960 | emitir(IRTG(IR_EQ, IRT_PGC), tr, trp0); | ||
961 | J->base[0] = TREF_NIL; | ||
962 | } | ||
963 | } else { /* Search for pattern. */ | ||
964 | recff_nyiu(J, rd); | ||
965 | return; | ||
966 | } | ||
967 | } | ||
968 | |||
969 | static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd) | ||
970 | { | ||
971 | TRef trfmt = lj_ir_tostr(J, J->base[0]); | ||
972 | GCstr *fmt = argv2str(J, &rd->argv[0]); | ||
973 | int arg = 1; | ||
974 | TRef hdr, tr; | ||
975 | FormatState fs; | ||
976 | SFormat sf; | ||
977 | /* Specialize to the format string. */ | ||
978 | emitir(IRTG(IR_EQ, IRT_STR), trfmt, lj_ir_kstr(J, fmt)); | ||
979 | tr = hdr = recff_bufhdr(J); | ||
980 | lj_strfmt_init(&fs, strdata(fmt), fmt->len); | ||
981 | while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) { /* Parse format. */ | ||
982 | TRef tra = sf == STRFMT_LIT ? 0 : J->base[arg++]; | ||
983 | TRef trsf = lj_ir_kint(J, (int32_t)sf); | ||
984 | IRCallID id; | ||
985 | switch (STRFMT_TYPE(sf)) { | ||
986 | case STRFMT_LIT: | ||
987 | tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, | ||
988 | lj_ir_kstr(J, lj_str_new(J->L, fs.str, fs.len))); | ||
989 | break; | ||
990 | case STRFMT_INT: | ||
991 | id = IRCALL_lj_strfmt_putfnum_int; | ||
992 | handle_int: | ||
993 | if (!tref_isinteger(tra)) | ||
994 | goto handle_num; | ||
995 | if (sf == STRFMT_INT) { /* Shortcut for plain %d. */ | ||
996 | tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, | ||
997 | emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_INT)); | ||
998 | } else { | ||
999 | #if LJ_HASFFI | ||
1000 | tra = emitir(IRT(IR_CONV, IRT_U64), tra, | ||
1001 | (IRT_INT|(IRT_U64<<5)|IRCONV_SEXT)); | ||
1002 | tr = lj_ir_call(J, IRCALL_lj_strfmt_putfxint, tr, trsf, tra); | ||
1003 | lj_needsplit(J); | ||
1004 | #else | ||
1005 | recff_nyiu(J, rd); /* Don't bother working around this NYI. */ | ||
1006 | return; | ||
1007 | #endif | ||
1008 | } | ||
1009 | break; | ||
1010 | case STRFMT_UINT: | ||
1011 | id = IRCALL_lj_strfmt_putfnum_uint; | ||
1012 | goto handle_int; | ||
1013 | case STRFMT_NUM: | ||
1014 | id = IRCALL_lj_strfmt_putfnum; | ||
1015 | handle_num: | ||
1016 | tra = lj_ir_tonum(J, tra); | ||
1017 | tr = lj_ir_call(J, id, tr, trsf, tra); | ||
1018 | if (LJ_SOFTFP32) lj_needsplit(J); | ||
1019 | break; | ||
1020 | case STRFMT_STR: | ||
1021 | if (!tref_isstr(tra)) { | ||
1022 | recff_nyiu(J, rd); /* NYI: __tostring and non-string types for %s. */ | ||
1023 | return; | ||
771 | } | 1024 | } |
772 | } else { /* Complex case: remove in the middle. */ | 1025 | if (sf == STRFMT_STR) /* Shortcut for plain %s. */ |
773 | recff_nyiu(J); | 1026 | tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, tra); |
1027 | else if ((sf & STRFMT_T_QUOTED)) | ||
1028 | tr = lj_ir_call(J, IRCALL_lj_strfmt_putquoted, tr, tra); | ||
1029 | else | ||
1030 | tr = lj_ir_call(J, IRCALL_lj_strfmt_putfstr, tr, trsf, tra); | ||
1031 | break; | ||
1032 | case STRFMT_CHAR: | ||
1033 | tra = lj_opt_narrow_toint(J, tra); | ||
1034 | if (sf == STRFMT_CHAR) /* Shortcut for plain %c. */ | ||
1035 | tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, | ||
1036 | emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_CHAR)); | ||
1037 | else | ||
1038 | tr = lj_ir_call(J, IRCALL_lj_strfmt_putfchar, tr, trsf, tra); | ||
1039 | break; | ||
1040 | case STRFMT_PTR: /* NYI */ | ||
1041 | case STRFMT_ERR: | ||
1042 | default: | ||
1043 | recff_nyiu(J, rd); | ||
1044 | return; | ||
774 | } | 1045 | } |
775 | } /* else: Interpreter will throw. */ | 1046 | } |
1047 | J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); | ||
776 | } | 1048 | } |
777 | 1049 | ||
1050 | /* -- Table library fast functions ---------------------------------------- */ | ||
1051 | |||
778 | static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd) | 1052 | static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd) |
779 | { | 1053 | { |
780 | RecordIndex ix; | 1054 | RecordIndex ix; |
@@ -791,11 +1065,49 @@ static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd) | |||
791 | ix.idxchain = 0; | 1065 | ix.idxchain = 0; |
792 | lj_record_idx(J, &ix); /* Set new value. */ | 1066 | lj_record_idx(J, &ix); /* Set new value. */ |
793 | } else { /* Complex case: insert in the middle. */ | 1067 | } else { /* Complex case: insert in the middle. */ |
794 | recff_nyiu(J); | 1068 | recff_nyiu(J, rd); |
1069 | return; | ||
795 | } | 1070 | } |
796 | } /* else: Interpreter will throw. */ | 1071 | } /* else: Interpreter will throw. */ |
797 | } | 1072 | } |
798 | 1073 | ||
1074 | static void LJ_FASTCALL recff_table_concat(jit_State *J, RecordFFData *rd) | ||
1075 | { | ||
1076 | TRef tab = J->base[0]; | ||
1077 | if (tref_istab(tab)) { | ||
1078 | TRef sep = !tref_isnil(J->base[1]) ? | ||
1079 | lj_ir_tostr(J, J->base[1]) : lj_ir_knull(J, IRT_STR); | ||
1080 | TRef tri = (J->base[1] && !tref_isnil(J->base[2])) ? | ||
1081 | lj_opt_narrow_toint(J, J->base[2]) : lj_ir_kint(J, 1); | ||
1082 | TRef tre = (J->base[1] && J->base[2] && !tref_isnil(J->base[3])) ? | ||
1083 | lj_opt_narrow_toint(J, J->base[3]) : | ||
1084 | lj_ir_call(J, IRCALL_lj_tab_len, tab); | ||
1085 | TRef hdr = recff_bufhdr(J); | ||
1086 | TRef tr = lj_ir_call(J, IRCALL_lj_buf_puttab, hdr, tab, sep, tri, tre); | ||
1087 | emitir(IRTG(IR_NE, IRT_PTR), tr, lj_ir_kptr(J, NULL)); | ||
1088 | J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); | ||
1089 | } /* else: Interpreter will throw. */ | ||
1090 | UNUSED(rd); | ||
1091 | } | ||
1092 | |||
1093 | static void LJ_FASTCALL recff_table_new(jit_State *J, RecordFFData *rd) | ||
1094 | { | ||
1095 | TRef tra = lj_opt_narrow_toint(J, J->base[0]); | ||
1096 | TRef trh = lj_opt_narrow_toint(J, J->base[1]); | ||
1097 | J->base[0] = lj_ir_call(J, IRCALL_lj_tab_new_ah, tra, trh); | ||
1098 | UNUSED(rd); | ||
1099 | } | ||
1100 | |||
1101 | static void LJ_FASTCALL recff_table_clear(jit_State *J, RecordFFData *rd) | ||
1102 | { | ||
1103 | TRef tr = J->base[0]; | ||
1104 | if (tref_istab(tr)) { | ||
1105 | rd->nres = 0; | ||
1106 | lj_ir_call(J, IRCALL_lj_tab_clear, tr); | ||
1107 | J->needsnap = 1; | ||
1108 | } /* else: Interpreter will throw. */ | ||
1109 | } | ||
1110 | |||
799 | /* -- I/O library fast functions ------------------------------------------ */ | 1111 | /* -- I/O library fast functions ------------------------------------------ */ |
800 | 1112 | ||
801 | /* Get FILE* for I/O function. Any I/O error aborts recording, so there's | 1113 | /* Get FILE* for I/O function. Any I/O error aborts recording, so there's |
@@ -805,8 +1117,13 @@ static TRef recff_io_fp(jit_State *J, TRef *udp, int32_t id) | |||
805 | { | 1117 | { |
806 | TRef tr, ud, fp; | 1118 | TRef tr, ud, fp; |
807 | if (id) { /* io.func() */ | 1119 | if (id) { /* io.func() */ |
1120 | #if LJ_GC64 | ||
1121 | /* TODO: fix ARM32 asm_fload(), so we can use this for all archs. */ | ||
1122 | ud = lj_ir_ggfload(J, IRT_UDATA, GG_OFS(g.gcroot[id])); | ||
1123 | #else | ||
808 | tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]); | 1124 | tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]); |
809 | ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0); | 1125 | ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0); |
1126 | #endif | ||
810 | } else { /* fp:method() */ | 1127 | } else { /* fp:method() */ |
811 | ud = J->base[0]; | 1128 | ud = J->base[0]; |
812 | if (!tref_isudata(ud)) | 1129 | if (!tref_isudata(ud)) |
@@ -828,10 +1145,13 @@ static void LJ_FASTCALL recff_io_write(jit_State *J, RecordFFData *rd) | |||
828 | ptrdiff_t i = rd->data == 0 ? 1 : 0; | 1145 | ptrdiff_t i = rd->data == 0 ? 1 : 0; |
829 | for (; J->base[i]; i++) { | 1146 | for (; J->base[i]; i++) { |
830 | TRef str = lj_ir_tostr(J, J->base[i]); | 1147 | TRef str = lj_ir_tostr(J, J->base[i]); |
831 | TRef buf = emitir(IRT(IR_STRREF, IRT_P32), str, zero); | 1148 | TRef buf = emitir(IRT(IR_STRREF, IRT_PGC), str, zero); |
832 | TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN); | 1149 | TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN); |
833 | if (tref_isk(len) && IR(tref_ref(len))->i == 1) { | 1150 | if (tref_isk(len) && IR(tref_ref(len))->i == 1) { |
834 | TRef tr = emitir(IRT(IR_XLOAD, IRT_U8), buf, IRXLOAD_READONLY); | 1151 | IRIns *irs = IR(tref_ref(str)); |
1152 | TRef tr = (irs->o == IR_TOSTR && irs->op2 == IRTOSTR_CHAR) ? | ||
1153 | irs->op1 : | ||
1154 | emitir(IRT(IR_XLOAD, IRT_U8), buf, IRXLOAD_READONLY); | ||
835 | tr = lj_ir_call(J, IRCALL_fputc, tr, fp); | 1155 | tr = lj_ir_call(J, IRCALL_fputc, tr, fp); |
836 | if (results_wanted(J) != 0) /* Check result only if not ignored. */ | 1156 | if (results_wanted(J) != 0) /* Check result only if not ignored. */ |
837 | emitir(IRTGI(IR_NE), tr, lj_ir_kint(J, -1)); | 1157 | emitir(IRTGI(IR_NE), tr, lj_ir_kint(J, -1)); |
@@ -853,6 +1173,28 @@ static void LJ_FASTCALL recff_io_flush(jit_State *J, RecordFFData *rd) | |||
853 | J->base[0] = TREF_TRUE; | 1173 | J->base[0] = TREF_TRUE; |
854 | } | 1174 | } |
855 | 1175 | ||
1176 | /* -- Debug library fast functions ---------------------------------------- */ | ||
1177 | |||
1178 | static void LJ_FASTCALL recff_debug_getmetatable(jit_State *J, RecordFFData *rd) | ||
1179 | { | ||
1180 | GCtab *mt; | ||
1181 | TRef mtref; | ||
1182 | TRef tr = J->base[0]; | ||
1183 | if (tref_istab(tr)) { | ||
1184 | mt = tabref(tabV(&rd->argv[0])->metatable); | ||
1185 | mtref = emitir(IRT(IR_FLOAD, IRT_TAB), tr, IRFL_TAB_META); | ||
1186 | } else if (tref_isudata(tr)) { | ||
1187 | mt = tabref(udataV(&rd->argv[0])->metatable); | ||
1188 | mtref = emitir(IRT(IR_FLOAD, IRT_TAB), tr, IRFL_UDATA_META); | ||
1189 | } else { | ||
1190 | mt = tabref(basemt_obj(J2G(J), &rd->argv[0])); | ||
1191 | J->base[0] = mt ? lj_ir_ktab(J, mt) : TREF_NIL; | ||
1192 | return; | ||
1193 | } | ||
1194 | emitir(IRTG(mt ? IR_NE : IR_EQ, IRT_TAB), mtref, lj_ir_knull(J, IRT_TAB)); | ||
1195 | J->base[0] = mt ? mtref : TREF_NIL; | ||
1196 | } | ||
1197 | |||
856 | /* -- Record calls to fast functions -------------------------------------- */ | 1198 | /* -- Record calls to fast functions -------------------------------------- */ |
857 | 1199 | ||
858 | #include "lj_recdef.h" | 1200 | #include "lj_recdef.h" |
diff --git a/src/lj_frame.h b/src/lj_frame.h index e78a28a5..599a2d1c 100644 --- a/src/lj_frame.h +++ b/src/lj_frame.h | |||
@@ -11,7 +11,16 @@ | |||
11 | 11 | ||
12 | /* -- Lua stack frame ----------------------------------------------------- */ | 12 | /* -- Lua stack frame ----------------------------------------------------- */ |
13 | 13 | ||
14 | /* Frame type markers in callee function slot (callee base-1). */ | 14 | /* Frame type markers in LSB of PC (4-byte aligned) or delta (8-byte aligned: |
15 | ** | ||
16 | ** PC 00 Lua frame | ||
17 | ** delta 001 C frame | ||
18 | ** delta 010 Continuation frame | ||
19 | ** delta 011 Lua vararg frame | ||
20 | ** delta 101 cpcall() frame | ||
21 | ** delta 110 ff pcall() frame | ||
22 | ** delta 111 ff pcall() frame with active hook | ||
23 | */ | ||
15 | enum { | 24 | enum { |
16 | FRAME_LUA, FRAME_C, FRAME_CONT, FRAME_VARG, | 25 | FRAME_LUA, FRAME_C, FRAME_CONT, FRAME_VARG, |
17 | FRAME_LUAP, FRAME_CP, FRAME_PCALL, FRAME_PCALLH | 26 | FRAME_LUAP, FRAME_CP, FRAME_PCALL, FRAME_PCALLH |
@@ -21,9 +30,47 @@ enum { | |||
21 | #define FRAME_TYPEP (FRAME_TYPE|FRAME_P) | 30 | #define FRAME_TYPEP (FRAME_TYPE|FRAME_P) |
22 | 31 | ||
23 | /* Macros to access and modify Lua frames. */ | 32 | /* Macros to access and modify Lua frames. */ |
33 | #if LJ_FR2 | ||
34 | /* Two-slot frame info, required for 64 bit PC/GCRef: | ||
35 | ** | ||
36 | ** base-2 base-1 | base base+1 ... | ||
37 | ** [func PC/delta/ft] | [slots ...] | ||
38 | ** ^-- frame | ^-- base ^-- top | ||
39 | ** | ||
40 | ** Continuation frames: | ||
41 | ** | ||
42 | ** base-4 base-3 base-2 base-1 | base base+1 ... | ||
43 | ** [cont PC ] [func PC/delta/ft] | [slots ...] | ||
44 | ** ^-- frame | ^-- base ^-- top | ||
45 | */ | ||
46 | #define frame_gc(f) (gcval((f)-1)) | ||
47 | #define frame_ftsz(f) ((ptrdiff_t)(f)->ftsz) | ||
48 | #define frame_pc(f) ((const BCIns *)frame_ftsz(f)) | ||
49 | #define setframe_gc(f, p, tp) (setgcVraw((f)-1, (p), (tp))) | ||
50 | #define setframe_ftsz(f, sz) ((f)->ftsz = (sz)) | ||
51 | #define setframe_pc(f, pc) ((f)->ftsz = (int64_t)(intptr_t)(pc)) | ||
52 | #else | ||
53 | /* One-slot frame info, sufficient for 32 bit PC/GCRef: | ||
54 | ** | ||
55 | ** base-1 | base base+1 ... | ||
56 | ** lo hi | | ||
57 | ** [func | PC/delta/ft] | [slots ...] | ||
58 | ** ^-- frame | ^-- base ^-- top | ||
59 | ** | ||
60 | ** Continuation frames: | ||
61 | ** | ||
62 | ** base-2 base-1 | base base+1 ... | ||
63 | ** lo hi lo hi | | ||
64 | ** [cont | PC] [func | PC/delta/ft] | [slots ...] | ||
65 | ** ^-- frame | ^-- base ^-- top | ||
66 | */ | ||
24 | #define frame_gc(f) (gcref((f)->fr.func)) | 67 | #define frame_gc(f) (gcref((f)->fr.func)) |
25 | #define frame_func(f) (&frame_gc(f)->fn) | 68 | #define frame_ftsz(f) ((ptrdiff_t)(f)->fr.tp.ftsz) |
26 | #define frame_ftsz(f) ((f)->fr.tp.ftsz) | 69 | #define frame_pc(f) (mref((f)->fr.tp.pcr, const BCIns)) |
70 | #define setframe_gc(f, p, tp) (setgcref((f)->fr.func, (p)), UNUSED(tp)) | ||
71 | #define setframe_ftsz(f, sz) ((f)->fr.tp.ftsz = (int32_t)(sz)) | ||
72 | #define setframe_pc(f, pc) (setmref((f)->fr.tp.pcr, (pc))) | ||
73 | #endif | ||
27 | 74 | ||
28 | #define frame_type(f) (frame_ftsz(f) & FRAME_TYPE) | 75 | #define frame_type(f) (frame_ftsz(f) & FRAME_TYPE) |
29 | #define frame_typep(f) (frame_ftsz(f) & FRAME_TYPEP) | 76 | #define frame_typep(f) (frame_ftsz(f) & FRAME_TYPEP) |
@@ -33,33 +80,53 @@ enum { | |||
33 | #define frame_isvarg(f) (frame_typep(f) == FRAME_VARG) | 80 | #define frame_isvarg(f) (frame_typep(f) == FRAME_VARG) |
34 | #define frame_ispcall(f) ((frame_ftsz(f) & 6) == FRAME_PCALL) | 81 | #define frame_ispcall(f) ((frame_ftsz(f) & 6) == FRAME_PCALL) |
35 | 82 | ||
36 | #define frame_pc(f) (mref((f)->fr.tp.pcr, const BCIns)) | 83 | #define frame_func(f) (&frame_gc(f)->fn) |
84 | #define frame_delta(f) (frame_ftsz(f) >> 3) | ||
85 | #define frame_sized(f) (frame_ftsz(f) & ~FRAME_TYPEP) | ||
86 | |||
87 | enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ | ||
88 | |||
89 | #if LJ_FR2 | ||
90 | #define frame_contpc(f) (frame_pc((f)-2)) | ||
91 | #define frame_contv(f) (((f)-3)->u64) | ||
92 | #else | ||
37 | #define frame_contpc(f) (frame_pc((f)-1)) | 93 | #define frame_contpc(f) (frame_pc((f)-1)) |
38 | #if LJ_64 | 94 | #define frame_contv(f) (((f)-1)->u32.lo) |
95 | #endif | ||
96 | #if LJ_FR2 | ||
97 | #define frame_contf(f) ((ASMFunction)(uintptr_t)((f)-3)->u64) | ||
98 | #elif LJ_64 | ||
39 | #define frame_contf(f) \ | 99 | #define frame_contf(f) \ |
40 | ((ASMFunction)(void *)((intptr_t)lj_vm_asm_begin + \ | 100 | ((ASMFunction)(void *)((intptr_t)lj_vm_asm_begin + \ |
41 | (intptr_t)(int32_t)((f)-1)->u32.lo)) | 101 | (intptr_t)(int32_t)((f)-1)->u32.lo)) |
42 | #else | 102 | #else |
43 | #define frame_contf(f) ((ASMFunction)gcrefp(((f)-1)->gcr, void)) | 103 | #define frame_contf(f) ((ASMFunction)gcrefp(((f)-1)->gcr, void)) |
44 | #endif | 104 | #endif |
45 | #define frame_delta(f) (frame_ftsz(f) >> 3) | 105 | #define frame_iscont_fficb(f) \ |
46 | #define frame_sized(f) (frame_ftsz(f) & ~FRAME_TYPEP) | 106 | (LJ_HASFFI && frame_contv(f) == LJ_CONT_FFI_CALLBACK) |
47 | 107 | ||
48 | #define frame_prevl(f) ((f) - (1+bc_a(frame_pc(f)[-1]))) | 108 | #define frame_prevl(f) ((f) - (1+LJ_FR2+bc_a(frame_pc(f)[-1]))) |
49 | #define frame_prevd(f) ((TValue *)((char *)(f) - frame_sized(f))) | 109 | #define frame_prevd(f) ((TValue *)((char *)(f) - frame_sized(f))) |
50 | #define frame_prev(f) (frame_islua(f)?frame_prevl(f):frame_prevd(f)) | 110 | #define frame_prev(f) (frame_islua(f)?frame_prevl(f):frame_prevd(f)) |
51 | /* Note: this macro does not skip over FRAME_VARG. */ | 111 | /* Note: this macro does not skip over FRAME_VARG. */ |
52 | 112 | ||
53 | #define setframe_pc(f, pc) (setmref((f)->fr.tp.pcr, (pc))) | ||
54 | #define setframe_ftsz(f, sz) ((f)->fr.tp.ftsz = (sz)) | ||
55 | #define setframe_gc(f, p) (setgcref((f)->fr.func, (p))) | ||
56 | |||
57 | /* -- C stack frame ------------------------------------------------------- */ | 113 | /* -- C stack frame ------------------------------------------------------- */ |
58 | 114 | ||
59 | /* Macros to access and modify the C stack frame chain. */ | 115 | /* Macros to access and modify the C stack frame chain. */ |
60 | 116 | ||
61 | /* These definitions must match with the arch-specific *.dasc files. */ | 117 | /* These definitions must match with the arch-specific *.dasc files. */ |
62 | #if LJ_TARGET_X86 | 118 | #if LJ_TARGET_X86 |
119 | #if LJ_ABI_WIN | ||
120 | #define CFRAME_OFS_ERRF (19*4) | ||
121 | #define CFRAME_OFS_NRES (18*4) | ||
122 | #define CFRAME_OFS_PREV (17*4) | ||
123 | #define CFRAME_OFS_L (16*4) | ||
124 | #define CFRAME_OFS_SEH (9*4) | ||
125 | #define CFRAME_OFS_PC (6*4) | ||
126 | #define CFRAME_OFS_MULTRES (5*4) | ||
127 | #define CFRAME_SIZE (16*4) | ||
128 | #define CFRAME_SHIFT_MULTRES 0 | ||
129 | #else | ||
63 | #define CFRAME_OFS_ERRF (15*4) | 130 | #define CFRAME_OFS_ERRF (15*4) |
64 | #define CFRAME_OFS_NRES (14*4) | 131 | #define CFRAME_OFS_NRES (14*4) |
65 | #define CFRAME_OFS_PREV (13*4) | 132 | #define CFRAME_OFS_PREV (13*4) |
@@ -68,24 +135,41 @@ enum { | |||
68 | #define CFRAME_OFS_MULTRES (5*4) | 135 | #define CFRAME_OFS_MULTRES (5*4) |
69 | #define CFRAME_SIZE (12*4) | 136 | #define CFRAME_SIZE (12*4) |
70 | #define CFRAME_SHIFT_MULTRES 0 | 137 | #define CFRAME_SHIFT_MULTRES 0 |
138 | #endif | ||
71 | #elif LJ_TARGET_X64 | 139 | #elif LJ_TARGET_X64 |
72 | #if LJ_ABI_WIN | 140 | #if LJ_ABI_WIN |
73 | #define CFRAME_OFS_PREV (13*8) | 141 | #define CFRAME_OFS_PREV (13*8) |
142 | #if LJ_GC64 | ||
143 | #define CFRAME_OFS_PC (12*8) | ||
144 | #define CFRAME_OFS_L (11*8) | ||
145 | #define CFRAME_OFS_ERRF (21*4) | ||
146 | #define CFRAME_OFS_NRES (20*4) | ||
147 | #define CFRAME_OFS_MULTRES (8*4) | ||
148 | #else | ||
74 | #define CFRAME_OFS_PC (25*4) | 149 | #define CFRAME_OFS_PC (25*4) |
75 | #define CFRAME_OFS_L (24*4) | 150 | #define CFRAME_OFS_L (24*4) |
76 | #define CFRAME_OFS_ERRF (23*4) | 151 | #define CFRAME_OFS_ERRF (23*4) |
77 | #define CFRAME_OFS_NRES (22*4) | 152 | #define CFRAME_OFS_NRES (22*4) |
78 | #define CFRAME_OFS_MULTRES (21*4) | 153 | #define CFRAME_OFS_MULTRES (21*4) |
154 | #endif | ||
79 | #define CFRAME_SIZE (10*8) | 155 | #define CFRAME_SIZE (10*8) |
80 | #define CFRAME_SIZE_JIT (CFRAME_SIZE + 9*16 + 4*8) | 156 | #define CFRAME_SIZE_JIT (CFRAME_SIZE + 9*16 + 4*8) |
81 | #define CFRAME_SHIFT_MULTRES 0 | 157 | #define CFRAME_SHIFT_MULTRES 0 |
82 | #else | 158 | #else |
83 | #define CFRAME_OFS_PREV (4*8) | 159 | #define CFRAME_OFS_PREV (4*8) |
160 | #if LJ_GC64 | ||
161 | #define CFRAME_OFS_PC (3*8) | ||
162 | #define CFRAME_OFS_L (2*8) | ||
163 | #define CFRAME_OFS_ERRF (3*4) | ||
164 | #define CFRAME_OFS_NRES (2*4) | ||
165 | #define CFRAME_OFS_MULTRES (0*4) | ||
166 | #else | ||
84 | #define CFRAME_OFS_PC (7*4) | 167 | #define CFRAME_OFS_PC (7*4) |
85 | #define CFRAME_OFS_L (6*4) | 168 | #define CFRAME_OFS_L (6*4) |
86 | #define CFRAME_OFS_ERRF (5*4) | 169 | #define CFRAME_OFS_ERRF (5*4) |
87 | #define CFRAME_OFS_NRES (4*4) | 170 | #define CFRAME_OFS_NRES (4*4) |
88 | #define CFRAME_OFS_MULTRES (1*4) | 171 | #define CFRAME_OFS_MULTRES (1*4) |
172 | #endif | ||
89 | #if LJ_NO_UNWIND | 173 | #if LJ_NO_UNWIND |
90 | #define CFRAME_SIZE (12*8) | 174 | #define CFRAME_SIZE (12*8) |
91 | #else | 175 | #else |
@@ -107,6 +191,15 @@ enum { | |||
107 | #define CFRAME_SIZE 64 | 191 | #define CFRAME_SIZE 64 |
108 | #endif | 192 | #endif |
109 | #define CFRAME_SHIFT_MULTRES 3 | 193 | #define CFRAME_SHIFT_MULTRES 3 |
194 | #elif LJ_TARGET_ARM64 | ||
195 | #define CFRAME_OFS_ERRF 196 | ||
196 | #define CFRAME_OFS_NRES 200 | ||
197 | #define CFRAME_OFS_PREV 160 | ||
198 | #define CFRAME_OFS_L 176 | ||
199 | #define CFRAME_OFS_PC 168 | ||
200 | #define CFRAME_OFS_MULTRES 192 | ||
201 | #define CFRAME_SIZE 208 | ||
202 | #define CFRAME_SHIFT_MULTRES 3 | ||
110 | #elif LJ_TARGET_PPC | 203 | #elif LJ_TARGET_PPC |
111 | #if LJ_TARGET_XBOX360 | 204 | #if LJ_TARGET_XBOX360 |
112 | #define CFRAME_OFS_ERRF 424 | 205 | #define CFRAME_OFS_ERRF 424 |
@@ -117,7 +210,7 @@ enum { | |||
117 | #define CFRAME_OFS_MULTRES 408 | 210 | #define CFRAME_OFS_MULTRES 408 |
118 | #define CFRAME_SIZE 384 | 211 | #define CFRAME_SIZE 384 |
119 | #define CFRAME_SHIFT_MULTRES 3 | 212 | #define CFRAME_SHIFT_MULTRES 3 |
120 | #elif LJ_ARCH_PPC64 | 213 | #elif LJ_ARCH_PPC32ON64 |
121 | #define CFRAME_OFS_ERRF 472 | 214 | #define CFRAME_OFS_ERRF 472 |
122 | #define CFRAME_OFS_NRES 468 | 215 | #define CFRAME_OFS_NRES 468 |
123 | #define CFRAME_OFS_PREV 448 | 216 | #define CFRAME_OFS_PREV 448 |
@@ -133,26 +226,43 @@ enum { | |||
133 | #define CFRAME_OFS_L 36 | 226 | #define CFRAME_OFS_L 36 |
134 | #define CFRAME_OFS_PC 32 | 227 | #define CFRAME_OFS_PC 32 |
135 | #define CFRAME_OFS_MULTRES 28 | 228 | #define CFRAME_OFS_MULTRES 28 |
136 | #define CFRAME_SIZE 272 | 229 | #define CFRAME_SIZE (LJ_ARCH_HASFPU ? 272 : 128) |
137 | #define CFRAME_SHIFT_MULTRES 3 | 230 | #define CFRAME_SHIFT_MULTRES 3 |
138 | #endif | 231 | #endif |
139 | #elif LJ_TARGET_PPCSPE | 232 | #elif LJ_TARGET_MIPS32 |
140 | #define CFRAME_OFS_ERRF 28 | 233 | #if LJ_ARCH_HASFPU |
141 | #define CFRAME_OFS_NRES 24 | ||
142 | #define CFRAME_OFS_PREV 20 | ||
143 | #define CFRAME_OFS_L 16 | ||
144 | #define CFRAME_OFS_PC 12 | ||
145 | #define CFRAME_OFS_MULTRES 8 | ||
146 | #define CFRAME_SIZE 184 | ||
147 | #define CFRAME_SHIFT_MULTRES 3 | ||
148 | #elif LJ_TARGET_MIPS | ||
149 | #define CFRAME_OFS_ERRF 124 | 234 | #define CFRAME_OFS_ERRF 124 |
150 | #define CFRAME_OFS_NRES 120 | 235 | #define CFRAME_OFS_NRES 120 |
151 | #define CFRAME_OFS_PREV 116 | 236 | #define CFRAME_OFS_PREV 116 |
152 | #define CFRAME_OFS_L 112 | 237 | #define CFRAME_OFS_L 112 |
238 | #define CFRAME_SIZE 112 | ||
239 | #else | ||
240 | #define CFRAME_OFS_ERRF 76 | ||
241 | #define CFRAME_OFS_NRES 72 | ||
242 | #define CFRAME_OFS_PREV 68 | ||
243 | #define CFRAME_OFS_L 64 | ||
244 | #define CFRAME_SIZE 64 | ||
245 | #endif | ||
153 | #define CFRAME_OFS_PC 20 | 246 | #define CFRAME_OFS_PC 20 |
154 | #define CFRAME_OFS_MULTRES 16 | 247 | #define CFRAME_OFS_MULTRES 16 |
155 | #define CFRAME_SIZE 112 | 248 | #define CFRAME_SHIFT_MULTRES 3 |
249 | #elif LJ_TARGET_MIPS64 | ||
250 | #if LJ_ARCH_HASFPU | ||
251 | #define CFRAME_OFS_ERRF 188 | ||
252 | #define CFRAME_OFS_NRES 184 | ||
253 | #define CFRAME_OFS_PREV 176 | ||
254 | #define CFRAME_OFS_L 168 | ||
255 | #define CFRAME_OFS_PC 160 | ||
256 | #define CFRAME_SIZE 192 | ||
257 | #else | ||
258 | #define CFRAME_OFS_ERRF 124 | ||
259 | #define CFRAME_OFS_NRES 120 | ||
260 | #define CFRAME_OFS_PREV 112 | ||
261 | #define CFRAME_OFS_L 104 | ||
262 | #define CFRAME_OFS_PC 96 | ||
263 | #define CFRAME_SIZE 128 | ||
264 | #endif | ||
265 | #define CFRAME_OFS_MULTRES 0 | ||
156 | #define CFRAME_SHIFT_MULTRES 3 | 266 | #define CFRAME_SHIFT_MULTRES 3 |
157 | #else | 267 | #else |
158 | #error "Missing CFRAME_* definitions for this architecture" | 268 | #error "Missing CFRAME_* definitions for this architecture" |
diff --git a/src/lj_gc.c b/src/lj_gc.c index 86fcd6eb..81439aab 100644 --- a/src/lj_gc.c +++ b/src/lj_gc.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include "lj_obj.h" | 12 | #include "lj_obj.h" |
13 | #include "lj_gc.h" | 13 | #include "lj_gc.h" |
14 | #include "lj_err.h" | 14 | #include "lj_err.h" |
15 | #include "lj_buf.h" | ||
15 | #include "lj_str.h" | 16 | #include "lj_str.h" |
16 | #include "lj_tab.h" | 17 | #include "lj_tab.h" |
17 | #include "lj_func.h" | 18 | #include "lj_func.h" |
@@ -24,6 +25,7 @@ | |||
24 | #include "lj_cdata.h" | 25 | #include "lj_cdata.h" |
25 | #endif | 26 | #endif |
26 | #include "lj_trace.h" | 27 | #include "lj_trace.h" |
28 | #include "lj_dispatch.h" | ||
27 | #include "lj_vm.h" | 29 | #include "lj_vm.h" |
28 | 30 | ||
29 | #define GCSTEPSIZE 1024u | 31 | #define GCSTEPSIZE 1024u |
@@ -68,7 +70,7 @@ static void gc_mark(global_State *g, GCobj *o) | |||
68 | gray2black(o); /* Closed upvalues are never gray. */ | 70 | gray2black(o); /* Closed upvalues are never gray. */ |
69 | } else if (gct != ~LJ_TSTR && gct != ~LJ_TCDATA) { | 71 | } else if (gct != ~LJ_TSTR && gct != ~LJ_TCDATA) { |
70 | lua_assert(gct == ~LJ_TFUNC || gct == ~LJ_TTAB || | 72 | lua_assert(gct == ~LJ_TFUNC || gct == ~LJ_TTAB || |
71 | gct == ~LJ_TTHREAD || gct == ~LJ_TPROTO); | 73 | gct == ~LJ_TTHREAD || gct == ~LJ_TPROTO || gct == ~LJ_TTRACE); |
72 | setgcrefr(o->gch.gclist, g->gc.gray); | 74 | setgcrefr(o->gch.gclist, g->gc.gray); |
73 | setgcref(g->gc.gray, o); | 75 | setgcref(g->gc.gray, o); |
74 | } | 76 | } |
@@ -244,6 +246,8 @@ static void gc_traverse_trace(global_State *g, GCtrace *T) | |||
244 | IRIns *ir = &T->ir[ref]; | 246 | IRIns *ir = &T->ir[ref]; |
245 | if (ir->o == IR_KGC) | 247 | if (ir->o == IR_KGC) |
246 | gc_markobj(g, ir_kgc(ir)); | 248 | gc_markobj(g, ir_kgc(ir)); |
249 | if (irt_is64(ir->t) && ir->o != IR_KNULL) | ||
250 | ref++; | ||
247 | } | 251 | } |
248 | if (T->link) gc_marktrace(g, T->link); | 252 | if (T->link) gc_marktrace(g, T->link); |
249 | if (T->nextroot) gc_marktrace(g, T->nextroot); | 253 | if (T->nextroot) gc_marktrace(g, T->nextroot); |
@@ -274,12 +278,12 @@ static MSize gc_traverse_frames(global_State *g, lua_State *th) | |||
274 | { | 278 | { |
275 | TValue *frame, *top = th->top-1, *bot = tvref(th->stack); | 279 | TValue *frame, *top = th->top-1, *bot = tvref(th->stack); |
276 | /* Note: extra vararg frame not skipped, marks function twice (harmless). */ | 280 | /* Note: extra vararg frame not skipped, marks function twice (harmless). */ |
277 | for (frame = th->base-1; frame > bot; frame = frame_prev(frame)) { | 281 | for (frame = th->base-1; frame > bot+LJ_FR2; frame = frame_prev(frame)) { |
278 | GCfunc *fn = frame_func(frame); | 282 | GCfunc *fn = frame_func(frame); |
279 | TValue *ftop = frame; | 283 | TValue *ftop = frame; |
280 | if (isluafunc(fn)) ftop += funcproto(fn)->framesize; | 284 | if (isluafunc(fn)) ftop += funcproto(fn)->framesize; |
281 | if (ftop > top) top = ftop; | 285 | if (ftop > top) top = ftop; |
282 | gc_markobj(g, fn); /* Need to mark hidden function (or L). */ | 286 | if (!LJ_FR2) gc_markobj(g, fn); /* Need to mark hidden function (or L). */ |
283 | } | 287 | } |
284 | top++; /* Correct bias of -1 (frame == base-1). */ | 288 | top++; /* Correct bias of -1 (frame == base-1). */ |
285 | if (top > tvref(th->maxstack)) top = tvref(th->maxstack); | 289 | if (top > tvref(th->maxstack)) top = tvref(th->maxstack); |
@@ -290,7 +294,7 @@ static MSize gc_traverse_frames(global_State *g, lua_State *th) | |||
290 | static void gc_traverse_thread(global_State *g, lua_State *th) | 294 | static void gc_traverse_thread(global_State *g, lua_State *th) |
291 | { | 295 | { |
292 | TValue *o, *top = th->top; | 296 | TValue *o, *top = th->top; |
293 | for (o = tvref(th->stack)+1; o < top; o++) | 297 | for (o = tvref(th->stack)+1+LJ_FR2; o < top; o++) |
294 | gc_marktv(g, o); | 298 | gc_marktv(g, o); |
295 | if (g->gc.state == GCSatomic) { | 299 | if (g->gc.state == GCSatomic) { |
296 | top = tvref(th->stack) + th->stacksize; | 300 | top = tvref(th->stack) + th->stacksize; |
@@ -355,15 +359,6 @@ static size_t gc_propagate_gray(global_State *g) | |||
355 | 359 | ||
356 | /* -- Sweep phase --------------------------------------------------------- */ | 360 | /* -- Sweep phase --------------------------------------------------------- */ |
357 | 361 | ||
358 | /* Try to shrink some common data structures. */ | ||
359 | static void gc_shrink(global_State *g, lua_State *L) | ||
360 | { | ||
361 | if (g->strnum <= (g->strmask >> 2) && g->strmask > LJ_MIN_STRTAB*2-1) | ||
362 | lj_str_resize(L, g->strmask >> 1); /* Shrink string table. */ | ||
363 | if (g->tmpbuf.sz > LJ_MIN_SBUF*2) | ||
364 | lj_str_resizebuf(L, &g->tmpbuf, g->tmpbuf.sz >> 1); /* Shrink temp buf. */ | ||
365 | } | ||
366 | |||
367 | /* Type of GC free functions. */ | 362 | /* Type of GC free functions. */ |
368 | typedef void (LJ_FASTCALL *GCFreeFunc)(global_State *g, GCobj *o); | 363 | typedef void (LJ_FASTCALL *GCFreeFunc)(global_State *g, GCobj *o); |
369 | 364 | ||
@@ -389,7 +384,7 @@ static const GCFreeFunc gc_freefunc[] = { | |||
389 | }; | 384 | }; |
390 | 385 | ||
391 | /* Full sweep of a GC list. */ | 386 | /* Full sweep of a GC list. */ |
392 | #define gc_fullsweep(g, p) gc_sweep(g, (p), LJ_MAX_MEM) | 387 | #define gc_fullsweep(g, p) gc_sweep(g, (p), ~(uint32_t)0) |
393 | 388 | ||
394 | /* Partial sweep of a GC list. */ | 389 | /* Partial sweep of a GC list. */ |
395 | static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim) | 390 | static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim) |
@@ -467,18 +462,21 @@ static void gc_call_finalizer(global_State *g, lua_State *L, | |||
467 | { | 462 | { |
468 | /* Save and restore lots of state around the __gc callback. */ | 463 | /* Save and restore lots of state around the __gc callback. */ |
469 | uint8_t oldh = hook_save(g); | 464 | uint8_t oldh = hook_save(g); |
470 | MSize oldt = g->gc.threshold; | 465 | GCSize oldt = g->gc.threshold; |
471 | int errcode; | 466 | int errcode; |
472 | TValue *top; | 467 | TValue *top; |
473 | lj_trace_abort(g); | 468 | lj_trace_abort(g); |
474 | top = L->top; | ||
475 | L->top = top+2; | ||
476 | hook_entergc(g); /* Disable hooks and new traces during __gc. */ | 469 | hook_entergc(g); /* Disable hooks and new traces during __gc. */ |
470 | if (LJ_HASPROFILE && (oldh & HOOK_PROFILE)) lj_dispatch_update(g); | ||
477 | g->gc.threshold = LJ_MAX_MEM; /* Prevent GC steps. */ | 471 | g->gc.threshold = LJ_MAX_MEM; /* Prevent GC steps. */ |
478 | copyTV(L, top, mo); | 472 | top = L->top; |
479 | setgcV(L, top+1, o, ~o->gch.gct); | 473 | copyTV(L, top++, mo); |
480 | errcode = lj_vm_pcall(L, top+1, 1+0, -1); /* Stack: |mo|o| -> | */ | 474 | if (LJ_FR2) setnilV(top++); |
475 | setgcV(L, top, o, ~o->gch.gct); | ||
476 | L->top = top+1; | ||
477 | errcode = lj_vm_pcall(L, top, 1+0, -1); /* Stack: |mo|o| -> | */ | ||
481 | hook_restore(g, oldh); | 478 | hook_restore(g, oldh); |
479 | if (LJ_HASPROFILE && (oldh & HOOK_PROFILE)) lj_dispatch_update(g); | ||
482 | g->gc.threshold = oldt; /* Restore GC threshold. */ | 480 | g->gc.threshold = oldt; /* Restore GC threshold. */ |
483 | if (errcode) | 481 | if (errcode) |
484 | lj_err_throw(L, errcode); /* Propagate errors. */ | 482 | lj_err_throw(L, errcode); /* Propagate errors. */ |
@@ -490,7 +488,7 @@ static void gc_finalize(lua_State *L) | |||
490 | global_State *g = G(L); | 488 | global_State *g = G(L); |
491 | GCobj *o = gcnext(gcref(g->gc.mmudata)); | 489 | GCobj *o = gcnext(gcref(g->gc.mmudata)); |
492 | cTValue *mo; | 490 | cTValue *mo; |
493 | lua_assert(gcref(g->jit_L) == NULL); /* Must not be called on trace. */ | 491 | lua_assert(tvref(g->jit_base) == NULL); /* Must not be called on trace. */ |
494 | /* Unchain from list of userdata to be finalized. */ | 492 | /* Unchain from list of userdata to be finalized. */ |
495 | if (o == gcref(g->gc.mmudata)) | 493 | if (o == gcref(g->gc.mmudata)) |
496 | setgcrefnull(g->gc.mmudata); | 494 | setgcrefnull(g->gc.mmudata); |
@@ -599,11 +597,13 @@ static void atomic(global_State *g, lua_State *L) | |||
599 | /* All marking done, clear weak tables. */ | 597 | /* All marking done, clear weak tables. */ |
600 | gc_clearweak(gcref(g->gc.weak)); | 598 | gc_clearweak(gcref(g->gc.weak)); |
601 | 599 | ||
600 | lj_buf_shrink(L, &g->tmpbuf); /* Shrink temp buffer. */ | ||
601 | |||
602 | /* Prepare for sweep phase. */ | 602 | /* Prepare for sweep phase. */ |
603 | g->gc.currentwhite = (uint8_t)otherwhite(g); /* Flip current white. */ | 603 | g->gc.currentwhite = (uint8_t)otherwhite(g); /* Flip current white. */ |
604 | g->strempty.marked = g->gc.currentwhite; | 604 | g->strempty.marked = g->gc.currentwhite; |
605 | setmref(g->gc.sweep, &g->gc.root); | 605 | setmref(g->gc.sweep, &g->gc.root); |
606 | g->gc.estimate = g->gc.total - (MSize)udsize; /* Initial estimate. */ | 606 | g->gc.estimate = g->gc.total - (GCSize)udsize; /* Initial estimate. */ |
607 | } | 607 | } |
608 | 608 | ||
609 | /* GC state machine. Returns a cost estimate for each step performed. */ | 609 | /* GC state machine. Returns a cost estimate for each step performed. */ |
@@ -620,14 +620,14 @@ static size_t gc_onestep(lua_State *L) | |||
620 | g->gc.state = GCSatomic; /* End of mark phase. */ | 620 | g->gc.state = GCSatomic; /* End of mark phase. */ |
621 | return 0; | 621 | return 0; |
622 | case GCSatomic: | 622 | case GCSatomic: |
623 | if (gcref(g->jit_L)) /* Don't run atomic phase on trace. */ | 623 | if (tvref(g->jit_base)) /* Don't run atomic phase on trace. */ |
624 | return LJ_MAX_MEM; | 624 | return LJ_MAX_MEM; |
625 | atomic(g, L); | 625 | atomic(g, L); |
626 | g->gc.state = GCSsweepstring; /* Start of sweep phase. */ | 626 | g->gc.state = GCSsweepstring; /* Start of sweep phase. */ |
627 | g->gc.sweepstr = 0; | 627 | g->gc.sweepstr = 0; |
628 | return 0; | 628 | return 0; |
629 | case GCSsweepstring: { | 629 | case GCSsweepstring: { |
630 | MSize old = g->gc.total; | 630 | GCSize old = g->gc.total; |
631 | gc_fullsweep(g, &g->strhash[g->gc.sweepstr++]); /* Sweep one chain. */ | 631 | gc_fullsweep(g, &g->strhash[g->gc.sweepstr++]); /* Sweep one chain. */ |
632 | if (g->gc.sweepstr > g->strmask) | 632 | if (g->gc.sweepstr > g->strmask) |
633 | g->gc.state = GCSsweep; /* All string hash chains sweeped. */ | 633 | g->gc.state = GCSsweep; /* All string hash chains sweeped. */ |
@@ -636,12 +636,13 @@ static size_t gc_onestep(lua_State *L) | |||
636 | return GCSWEEPCOST; | 636 | return GCSWEEPCOST; |
637 | } | 637 | } |
638 | case GCSsweep: { | 638 | case GCSsweep: { |
639 | MSize old = g->gc.total; | 639 | GCSize old = g->gc.total; |
640 | setmref(g->gc.sweep, gc_sweep(g, mref(g->gc.sweep, GCRef), GCSWEEPMAX)); | 640 | setmref(g->gc.sweep, gc_sweep(g, mref(g->gc.sweep, GCRef), GCSWEEPMAX)); |
641 | lua_assert(old >= g->gc.total); | 641 | lua_assert(old >= g->gc.total); |
642 | g->gc.estimate -= old - g->gc.total; | 642 | g->gc.estimate -= old - g->gc.total; |
643 | if (gcref(*mref(g->gc.sweep, GCRef)) == NULL) { | 643 | if (gcref(*mref(g->gc.sweep, GCRef)) == NULL) { |
644 | gc_shrink(g, L); | 644 | if (g->strnum <= (g->strmask >> 2) && g->strmask > LJ_MIN_STRTAB*2-1) |
645 | lj_str_resize(L, g->strmask >> 1); /* Shrink string table. */ | ||
645 | if (gcref(g->gc.mmudata)) { /* Need any finalizations? */ | 646 | if (gcref(g->gc.mmudata)) { /* Need any finalizations? */ |
646 | g->gc.state = GCSfinalize; | 647 | g->gc.state = GCSfinalize; |
647 | #if LJ_HASFFI | 648 | #if LJ_HASFFI |
@@ -656,7 +657,7 @@ static size_t gc_onestep(lua_State *L) | |||
656 | } | 657 | } |
657 | case GCSfinalize: | 658 | case GCSfinalize: |
658 | if (gcref(g->gc.mmudata) != NULL) { | 659 | if (gcref(g->gc.mmudata) != NULL) { |
659 | if (gcref(g->jit_L)) /* Don't call finalizers on trace. */ | 660 | if (tvref(g->jit_base)) /* Don't call finalizers on trace. */ |
660 | return LJ_MAX_MEM; | 661 | return LJ_MAX_MEM; |
661 | gc_finalize(L); /* Finalize one userdata object. */ | 662 | gc_finalize(L); /* Finalize one userdata object. */ |
662 | if (g->gc.estimate > GCFINALIZECOST) | 663 | if (g->gc.estimate > GCFINALIZECOST) |
@@ -679,7 +680,7 @@ static size_t gc_onestep(lua_State *L) | |||
679 | int LJ_FASTCALL lj_gc_step(lua_State *L) | 680 | int LJ_FASTCALL lj_gc_step(lua_State *L) |
680 | { | 681 | { |
681 | global_State *g = G(L); | 682 | global_State *g = G(L); |
682 | MSize lim; | 683 | GCSize lim; |
683 | int32_t ostate = g->vmstate; | 684 | int32_t ostate = g->vmstate; |
684 | setvmstate(g, GC); | 685 | setvmstate(g, GC); |
685 | lim = (GCSTEPSIZE/100) * g->gc.stepmul; | 686 | lim = (GCSTEPSIZE/100) * g->gc.stepmul; |
@@ -688,13 +689,13 @@ int LJ_FASTCALL lj_gc_step(lua_State *L) | |||
688 | if (g->gc.total > g->gc.threshold) | 689 | if (g->gc.total > g->gc.threshold) |
689 | g->gc.debt += g->gc.total - g->gc.threshold; | 690 | g->gc.debt += g->gc.total - g->gc.threshold; |
690 | do { | 691 | do { |
691 | lim -= (MSize)gc_onestep(L); | 692 | lim -= (GCSize)gc_onestep(L); |
692 | if (g->gc.state == GCSpause) { | 693 | if (g->gc.state == GCSpause) { |
693 | g->gc.threshold = (g->gc.estimate/100) * g->gc.pause; | 694 | g->gc.threshold = (g->gc.estimate/100) * g->gc.pause; |
694 | g->vmstate = ostate; | 695 | g->vmstate = ostate; |
695 | return 1; /* Finished a GC cycle. */ | 696 | return 1; /* Finished a GC cycle. */ |
696 | } | 697 | } |
697 | } while ((int32_t)lim > 0); | 698 | } while (sizeof(lim) == 8 ? ((int64_t)lim > 0) : ((int32_t)lim > 0)); |
698 | if (g->gc.debt < GCSTEPSIZE) { | 699 | if (g->gc.debt < GCSTEPSIZE) { |
699 | g->gc.threshold = g->gc.total + GCSTEPSIZE; | 700 | g->gc.threshold = g->gc.total + GCSTEPSIZE; |
700 | g->vmstate = ostate; | 701 | g->vmstate = ostate; |
@@ -718,8 +719,8 @@ void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L) | |||
718 | /* Perform multiple GC steps. Called from JIT-compiled code. */ | 719 | /* Perform multiple GC steps. Called from JIT-compiled code. */ |
719 | int LJ_FASTCALL lj_gc_step_jit(global_State *g, MSize steps) | 720 | int LJ_FASTCALL lj_gc_step_jit(global_State *g, MSize steps) |
720 | { | 721 | { |
721 | lua_State *L = gco2th(gcref(g->jit_L)); | 722 | lua_State *L = gco2th(gcref(g->cur_L)); |
722 | L->base = mref(G(L)->jit_base, TValue); | 723 | L->base = tvref(G(L)->jit_base); |
723 | L->top = curr_topL(L); | 724 | L->top = curr_topL(L); |
724 | while (steps-- > 0 && lj_gc_step(L) == 0) | 725 | while (steps-- > 0 && lj_gc_step(L) == 0) |
725 | ; | 726 | ; |
@@ -813,7 +814,7 @@ void lj_gc_barriertrace(global_State *g, uint32_t traceno) | |||
813 | /* -- Allocator ----------------------------------------------------------- */ | 814 | /* -- Allocator ----------------------------------------------------------- */ |
814 | 815 | ||
815 | /* Call pluggable memory allocator to allocate or resize a fragment. */ | 816 | /* Call pluggable memory allocator to allocate or resize a fragment. */ |
816 | void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz) | 817 | void *lj_mem_realloc(lua_State *L, void *p, GCSize osz, GCSize nsz) |
817 | { | 818 | { |
818 | global_State *g = G(L); | 819 | global_State *g = G(L); |
819 | lua_assert((osz == 0) == (p == NULL)); | 820 | lua_assert((osz == 0) == (p == NULL)); |
@@ -821,19 +822,19 @@ void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz) | |||
821 | if (p == NULL && nsz > 0) | 822 | if (p == NULL && nsz > 0) |
822 | lj_err_mem(L); | 823 | lj_err_mem(L); |
823 | lua_assert((nsz == 0) == (p == NULL)); | 824 | lua_assert((nsz == 0) == (p == NULL)); |
824 | lua_assert(checkptr32(p)); | 825 | lua_assert(checkptrGC(p)); |
825 | g->gc.total = (g->gc.total - osz) + nsz; | 826 | g->gc.total = (g->gc.total - osz) + nsz; |
826 | return p; | 827 | return p; |
827 | } | 828 | } |
828 | 829 | ||
829 | /* Allocate new GC object and link it to the root set. */ | 830 | /* Allocate new GC object and link it to the root set. */ |
830 | void * LJ_FASTCALL lj_mem_newgco(lua_State *L, MSize size) | 831 | void * LJ_FASTCALL lj_mem_newgco(lua_State *L, GCSize size) |
831 | { | 832 | { |
832 | global_State *g = G(L); | 833 | global_State *g = G(L); |
833 | GCobj *o = (GCobj *)g->allocf(g->allocd, NULL, 0, size); | 834 | GCobj *o = (GCobj *)g->allocf(g->allocd, NULL, 0, size); |
834 | if (o == NULL) | 835 | if (o == NULL) |
835 | lj_err_mem(L); | 836 | lj_err_mem(L); |
836 | lua_assert(checkptr32(o)); | 837 | lua_assert(checkptrGC(o)); |
837 | g->gc.total += size; | 838 | g->gc.total += size; |
838 | setgcrefr(o->gch.nextgc, g->gc.root); | 839 | setgcrefr(o->gch.nextgc, g->gc.root); |
839 | setgcref(g->gc.root, o); | 840 | setgcref(g->gc.root, o); |
diff --git a/src/lj_gc.h b/src/lj_gc.h index e42dbcf0..1725c639 100644 --- a/src/lj_gc.h +++ b/src/lj_gc.h | |||
@@ -107,8 +107,8 @@ static LJ_AINLINE void lj_gc_barrierback(global_State *g, GCtab *t) | |||
107 | lj_gc_barrierf(G(L), obj2gco(p), obj2gco(o)); } | 107 | lj_gc_barrierf(G(L), obj2gco(p), obj2gco(o)); } |
108 | 108 | ||
109 | /* Allocator. */ | 109 | /* Allocator. */ |
110 | LJ_FUNC void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz); | 110 | LJ_FUNC void *lj_mem_realloc(lua_State *L, void *p, GCSize osz, GCSize nsz); |
111 | LJ_FUNC void * LJ_FASTCALL lj_mem_newgco(lua_State *L, MSize size); | 111 | LJ_FUNC void * LJ_FASTCALL lj_mem_newgco(lua_State *L, GCSize size); |
112 | LJ_FUNC void *lj_mem_grow(lua_State *L, void *p, | 112 | LJ_FUNC void *lj_mem_grow(lua_State *L, void *p, |
113 | MSize *szp, MSize lim, MSize esz); | 113 | MSize *szp, MSize lim, MSize esz); |
114 | 114 | ||
@@ -116,13 +116,13 @@ LJ_FUNC void *lj_mem_grow(lua_State *L, void *p, | |||
116 | 116 | ||
117 | static LJ_AINLINE void lj_mem_free(global_State *g, void *p, size_t osize) | 117 | static LJ_AINLINE void lj_mem_free(global_State *g, void *p, size_t osize) |
118 | { | 118 | { |
119 | g->gc.total -= (MSize)osize; | 119 | g->gc.total -= (GCSize)osize; |
120 | g->allocf(g->allocd, p, osize, 0); | 120 | g->allocf(g->allocd, p, osize, 0); |
121 | } | 121 | } |
122 | 122 | ||
123 | #define lj_mem_newvec(L, n, t) ((t *)lj_mem_new(L, (MSize)((n)*sizeof(t)))) | 123 | #define lj_mem_newvec(L, n, t) ((t *)lj_mem_new(L, (GCSize)((n)*sizeof(t)))) |
124 | #define lj_mem_reallocvec(L, p, on, n, t) \ | 124 | #define lj_mem_reallocvec(L, p, on, n, t) \ |
125 | ((p) = (t *)lj_mem_realloc(L, p, (on)*sizeof(t), (MSize)((n)*sizeof(t)))) | 125 | ((p) = (t *)lj_mem_realloc(L, p, (on)*sizeof(t), (GCSize)((n)*sizeof(t)))) |
126 | #define lj_mem_growvec(L, p, n, m, t) \ | 126 | #define lj_mem_growvec(L, p, n, m, t) \ |
127 | ((p) = (t *)lj_mem_grow(L, (p), &(n), (m), (MSize)sizeof(t))) | 127 | ((p) = (t *)lj_mem_grow(L, (p), &(n), (m), (MSize)sizeof(t))) |
128 | #define lj_mem_freevec(g, p, n, t) lj_mem_free(g, (p), (n)*sizeof(t)) | 128 | #define lj_mem_freevec(g, p, n, t) lj_mem_free(g, (p), (n)*sizeof(t)) |
diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c index c2a9e901..a20d9ae2 100644 --- a/src/lj_gdbjit.c +++ b/src/lj_gdbjit.c | |||
@@ -14,6 +14,8 @@ | |||
14 | #include "lj_err.h" | 14 | #include "lj_err.h" |
15 | #include "lj_debug.h" | 15 | #include "lj_debug.h" |
16 | #include "lj_frame.h" | 16 | #include "lj_frame.h" |
17 | #include "lj_buf.h" | ||
18 | #include "lj_strfmt.h" | ||
17 | #include "lj_jit.h" | 19 | #include "lj_jit.h" |
18 | #include "lj_dispatch.h" | 20 | #include "lj_dispatch.h" |
19 | 21 | ||
@@ -294,6 +296,9 @@ enum { | |||
294 | #elif LJ_TARGET_ARM | 296 | #elif LJ_TARGET_ARM |
295 | DW_REG_SP = 13, | 297 | DW_REG_SP = 13, |
296 | DW_REG_RA = 14, | 298 | DW_REG_RA = 14, |
299 | #elif LJ_TARGET_ARM64 | ||
300 | DW_REG_SP = 31, | ||
301 | DW_REG_RA = 30, | ||
297 | #elif LJ_TARGET_PPC | 302 | #elif LJ_TARGET_PPC |
298 | DW_REG_SP = 1, | 303 | DW_REG_SP = 1, |
299 | DW_REG_RA = 65, | 304 | DW_REG_RA = 65, |
@@ -372,6 +377,8 @@ static const ELFheader elfhdr_template = { | |||
372 | .machine = 62, | 377 | .machine = 62, |
373 | #elif LJ_TARGET_ARM | 378 | #elif LJ_TARGET_ARM |
374 | .machine = 40, | 379 | .machine = 40, |
380 | #elif LJ_TARGET_ARM64 | ||
381 | .machine = 183, | ||
375 | #elif LJ_TARGET_PPC | 382 | #elif LJ_TARGET_PPC |
376 | .machine = 20, | 383 | .machine = 20, |
377 | #elif LJ_TARGET_MIPS | 384 | #elif LJ_TARGET_MIPS |
@@ -428,16 +435,6 @@ static void gdbjit_catnum(GDBJITctx *ctx, uint32_t n) | |||
428 | *ctx->p++ = '0' + n; | 435 | *ctx->p++ = '0' + n; |
429 | } | 436 | } |
430 | 437 | ||
431 | /* Add a ULEB128 value. */ | ||
432 | static void gdbjit_uleb128(GDBJITctx *ctx, uint32_t v) | ||
433 | { | ||
434 | uint8_t *p = ctx->p; | ||
435 | for (; v >= 0x80; v >>= 7) | ||
436 | *p++ = (uint8_t)((v & 0x7f) | 0x80); | ||
437 | *p++ = (uint8_t)v; | ||
438 | ctx->p = p; | ||
439 | } | ||
440 | |||
441 | /* Add a SLEB128 value. */ | 438 | /* Add a SLEB128 value. */ |
442 | static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v) | 439 | static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v) |
443 | { | 440 | { |
@@ -454,7 +451,7 @@ static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v) | |||
454 | #define DU16(x) (*(uint16_t *)p = (x), p += 2) | 451 | #define DU16(x) (*(uint16_t *)p = (x), p += 2) |
455 | #define DU32(x) (*(uint32_t *)p = (x), p += 4) | 452 | #define DU32(x) (*(uint32_t *)p = (x), p += 4) |
456 | #define DADDR(x) (*(uintptr_t *)p = (x), p += sizeof(uintptr_t)) | 453 | #define DADDR(x) (*(uintptr_t *)p = (x), p += sizeof(uintptr_t)) |
457 | #define DUV(x) (ctx->p = p, gdbjit_uleb128(ctx, (x)), p = ctx->p) | 454 | #define DUV(x) (p = (uint8_t *)lj_strfmt_wuleb128((char *)p, (x))) |
458 | #define DSV(x) (ctx->p = p, gdbjit_sleb128(ctx, (x)), p = ctx->p) | 455 | #define DSV(x) (ctx->p = p, gdbjit_sleb128(ctx, (x)), p = ctx->p) |
459 | #define DSTR(str) (ctx->p = p, gdbjit_strz(ctx, (str)), p = ctx->p) | 456 | #define DSTR(str) (ctx->p = p, gdbjit_strz(ctx, (str)), p = ctx->p) |
460 | #define DALIGNNOP(s) while ((uintptr_t)p & ((s)-1)) *p++ = DW_CFA_nop | 457 | #define DALIGNNOP(s) while ((uintptr_t)p & ((s)-1)) *p++ = DW_CFA_nop |
@@ -564,13 +561,20 @@ static void LJ_FASTCALL gdbjit_ehframe(GDBJITctx *ctx) | |||
564 | DB(DW_CFA_offset|DW_REG_15); DUV(4); | 561 | DB(DW_CFA_offset|DW_REG_15); DUV(4); |
565 | DB(DW_CFA_offset|DW_REG_14); DUV(5); | 562 | DB(DW_CFA_offset|DW_REG_14); DUV(5); |
566 | /* Extra registers saved for JIT-compiled code. */ | 563 | /* Extra registers saved for JIT-compiled code. */ |
567 | DB(DW_CFA_offset|DW_REG_13); DUV(9); | 564 | DB(DW_CFA_offset|DW_REG_13); DUV(LJ_GC64 ? 10 : 9); |
568 | DB(DW_CFA_offset|DW_REG_12); DUV(10); | 565 | DB(DW_CFA_offset|DW_REG_12); DUV(LJ_GC64 ? 11 : 10); |
569 | #elif LJ_TARGET_ARM | 566 | #elif LJ_TARGET_ARM |
570 | { | 567 | { |
571 | int i; | 568 | int i; |
572 | for (i = 11; i >= 4; i--) { DB(DW_CFA_offset|i); DUV(2+(11-i)); } | 569 | for (i = 11; i >= 4; i--) { DB(DW_CFA_offset|i); DUV(2+(11-i)); } |
573 | } | 570 | } |
571 | #elif LJ_TARGET_ARM64 | ||
572 | { | ||
573 | int i; | ||
574 | DB(DW_CFA_offset|31); DUV(2); | ||
575 | for (i = 28; i >= 19; i--) { DB(DW_CFA_offset|i); DUV(3+(28-i)); } | ||
576 | for (i = 15; i >= 8; i--) { DB(DW_CFA_offset|32|i); DUV(28-i); } | ||
577 | } | ||
574 | #elif LJ_TARGET_PPC | 578 | #elif LJ_TARGET_PPC |
575 | { | 579 | { |
576 | int i; | 580 | int i; |
@@ -727,6 +731,20 @@ static void gdbjit_buildobj(GDBJITctx *ctx) | |||
727 | 731 | ||
728 | /* -- Interface to GDB JIT API -------------------------------------------- */ | 732 | /* -- Interface to GDB JIT API -------------------------------------------- */ |
729 | 733 | ||
734 | static int gdbjit_lock; | ||
735 | |||
736 | static void gdbjit_lock_acquire() | ||
737 | { | ||
738 | while (__sync_lock_test_and_set(&gdbjit_lock, 1)) { | ||
739 | /* Just spin; futexes or pthreads aren't worth the portability cost. */ | ||
740 | } | ||
741 | } | ||
742 | |||
743 | static void gdbjit_lock_release() | ||
744 | { | ||
745 | __sync_lock_release(&gdbjit_lock); | ||
746 | } | ||
747 | |||
730 | /* Add new entry to GDB JIT symbol chain. */ | 748 | /* Add new entry to GDB JIT symbol chain. */ |
731 | static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx) | 749 | static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx) |
732 | { | 750 | { |
@@ -738,6 +756,7 @@ static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx) | |||
738 | ctx->T->gdbjit_entry = (void *)eo; | 756 | ctx->T->gdbjit_entry = (void *)eo; |
739 | /* Link new entry to chain and register it. */ | 757 | /* Link new entry to chain and register it. */ |
740 | eo->entry.prev_entry = NULL; | 758 | eo->entry.prev_entry = NULL; |
759 | gdbjit_lock_acquire(); | ||
741 | eo->entry.next_entry = __jit_debug_descriptor.first_entry; | 760 | eo->entry.next_entry = __jit_debug_descriptor.first_entry; |
742 | if (eo->entry.next_entry) | 761 | if (eo->entry.next_entry) |
743 | eo->entry.next_entry->prev_entry = &eo->entry; | 762 | eo->entry.next_entry->prev_entry = &eo->entry; |
@@ -747,6 +766,7 @@ static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx) | |||
747 | __jit_debug_descriptor.relevant_entry = &eo->entry; | 766 | __jit_debug_descriptor.relevant_entry = &eo->entry; |
748 | __jit_debug_descriptor.action_flag = GDBJIT_REGISTER; | 767 | __jit_debug_descriptor.action_flag = GDBJIT_REGISTER; |
749 | __jit_debug_register_code(); | 768 | __jit_debug_register_code(); |
769 | gdbjit_lock_release(); | ||
750 | } | 770 | } |
751 | 771 | ||
752 | /* Add debug info for newly compiled trace and notify GDB. */ | 772 | /* Add debug info for newly compiled trace and notify GDB. */ |
@@ -778,6 +798,7 @@ void lj_gdbjit_deltrace(jit_State *J, GCtrace *T) | |||
778 | { | 798 | { |
779 | GDBJITentryobj *eo = (GDBJITentryobj *)T->gdbjit_entry; | 799 | GDBJITentryobj *eo = (GDBJITentryobj *)T->gdbjit_entry; |
780 | if (eo) { | 800 | if (eo) { |
801 | gdbjit_lock_acquire(); | ||
781 | if (eo->entry.prev_entry) | 802 | if (eo->entry.prev_entry) |
782 | eo->entry.prev_entry->next_entry = eo->entry.next_entry; | 803 | eo->entry.prev_entry->next_entry = eo->entry.next_entry; |
783 | else | 804 | else |
@@ -787,6 +808,7 @@ void lj_gdbjit_deltrace(jit_State *J, GCtrace *T) | |||
787 | __jit_debug_descriptor.relevant_entry = &eo->entry; | 808 | __jit_debug_descriptor.relevant_entry = &eo->entry; |
788 | __jit_debug_descriptor.action_flag = GDBJIT_UNREGISTER; | 809 | __jit_debug_descriptor.action_flag = GDBJIT_UNREGISTER; |
789 | __jit_debug_register_code(); | 810 | __jit_debug_register_code(); |
811 | gdbjit_lock_release(); | ||
790 | lj_mem_free(J2G(J), eo, eo->sz); | 812 | lj_mem_free(J2G(J), eo, eo->sz); |
791 | } | 813 | } |
792 | } | 814 | } |
diff --git a/src/lj_ir.c b/src/lj_ir.c index 38f289cb..1dd25f23 100644 --- a/src/lj_ir.c +++ b/src/lj_ir.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #if LJ_HASJIT | 15 | #if LJ_HASJIT |
16 | 16 | ||
17 | #include "lj_gc.h" | 17 | #include "lj_gc.h" |
18 | #include "lj_buf.h" | ||
18 | #include "lj_str.h" | 19 | #include "lj_str.h" |
19 | #include "lj_tab.h" | 20 | #include "lj_tab.h" |
20 | #include "lj_ir.h" | 21 | #include "lj_ir.h" |
@@ -29,6 +30,7 @@ | |||
29 | #endif | 30 | #endif |
30 | #include "lj_vm.h" | 31 | #include "lj_vm.h" |
31 | #include "lj_strscan.h" | 32 | #include "lj_strscan.h" |
33 | #include "lj_strfmt.h" | ||
32 | #include "lj_lib.h" | 34 | #include "lj_lib.h" |
33 | 35 | ||
34 | /* Some local macros to save typing. Undef'd at the end. */ | 36 | /* Some local macros to save typing. Undef'd at the end. */ |
@@ -89,7 +91,7 @@ static void lj_ir_growbot(jit_State *J) | |||
89 | IRIns *baseir = J->irbuf + J->irbotlim; | 91 | IRIns *baseir = J->irbuf + J->irbotlim; |
90 | MSize szins = J->irtoplim - J->irbotlim; | 92 | MSize szins = J->irtoplim - J->irbotlim; |
91 | lua_assert(szins != 0); | 93 | lua_assert(szins != 0); |
92 | lua_assert(J->cur.nk == J->irbotlim); | 94 | lua_assert(J->cur.nk == J->irbotlim || J->cur.nk-1 == J->irbotlim); |
93 | if (J->cur.nins + (szins >> 1) < J->irtoplim) { | 95 | if (J->cur.nins + (szins >> 1) < J->irtoplim) { |
94 | /* More than half of the buffer is free on top: shift up by a quarter. */ | 96 | /* More than half of the buffer is free on top: shift up by a quarter. */ |
95 | MSize ofs = szins >> 2; | 97 | MSize ofs = szins >> 2; |
@@ -143,6 +145,16 @@ TRef lj_ir_call(jit_State *J, IRCallID id, ...) | |||
143 | return emitir(CCI_OPTYPE(ci), tr, id); | 145 | return emitir(CCI_OPTYPE(ci), tr, id); |
144 | } | 146 | } |
145 | 147 | ||
148 | /* Load field of type t from GG_State + offset. Must be 32 bit aligned. */ | ||
149 | LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs) | ||
150 | { | ||
151 | lua_assert((ofs & 3) == 0); | ||
152 | ofs >>= 2; | ||
153 | lua_assert(ofs >= IRFL__MAX && ofs <= 0x3ff); /* 10 bit FOLD key limit. */ | ||
154 | lj_ir_set(J, IRT(IR_FLOAD, t), REF_NIL, ofs); | ||
155 | return lj_opt_fold(J); | ||
156 | } | ||
157 | |||
146 | /* -- Interning of constants ---------------------------------------------- */ | 158 | /* -- Interning of constants ---------------------------------------------- */ |
147 | 159 | ||
148 | /* | 160 | /* |
@@ -163,6 +175,24 @@ static LJ_AINLINE IRRef ir_nextk(jit_State *J) | |||
163 | return ref; | 175 | return ref; |
164 | } | 176 | } |
165 | 177 | ||
178 | /* Get ref of next 64 bit IR constant and optionally grow IR. | ||
179 | ** Note: this may invalidate all IRIns *! | ||
180 | */ | ||
181 | static LJ_AINLINE IRRef ir_nextk64(jit_State *J) | ||
182 | { | ||
183 | IRRef ref = J->cur.nk - 2; | ||
184 | lua_assert(J->state != LJ_TRACE_ASM); | ||
185 | if (LJ_UNLIKELY(ref < J->irbotlim)) lj_ir_growbot(J); | ||
186 | J->cur.nk = ref; | ||
187 | return ref; | ||
188 | } | ||
189 | |||
190 | #if LJ_GC64 | ||
191 | #define ir_nextkgc ir_nextk64 | ||
192 | #else | ||
193 | #define ir_nextkgc ir_nextk | ||
194 | #endif | ||
195 | |||
166 | /* Intern int32_t constant. */ | 196 | /* Intern int32_t constant. */ |
167 | TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k) | 197 | TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k) |
168 | { | 198 | { |
@@ -182,79 +212,21 @@ found: | |||
182 | return TREF(ref, IRT_INT); | 212 | return TREF(ref, IRT_INT); |
183 | } | 213 | } |
184 | 214 | ||
185 | /* The MRef inside the KNUM/KINT64 IR instructions holds the address of the | 215 | /* Intern 64 bit constant, given by its 64 bit pattern. */ |
186 | ** 64 bit constant. The constants themselves are stored in a chained array | 216 | TRef lj_ir_k64(jit_State *J, IROp op, uint64_t u64) |
187 | ** and shared across traces. | ||
188 | ** | ||
189 | ** Rationale for choosing this data structure: | ||
190 | ** - The address of the constants is embedded in the generated machine code | ||
191 | ** and must never move. A resizable array or hash table wouldn't work. | ||
192 | ** - Most apps need very few non-32 bit integer constants (less than a dozen). | ||
193 | ** - Linear search is hard to beat in terms of speed and low complexity. | ||
194 | */ | ||
195 | typedef struct K64Array { | ||
196 | MRef next; /* Pointer to next list. */ | ||
197 | MSize numk; /* Number of used elements in this array. */ | ||
198 | TValue k[LJ_MIN_K64SZ]; /* Array of constants. */ | ||
199 | } K64Array; | ||
200 | |||
201 | /* Free all chained arrays. */ | ||
202 | void lj_ir_k64_freeall(jit_State *J) | ||
203 | { | ||
204 | K64Array *k; | ||
205 | for (k = mref(J->k64, K64Array); k; ) { | ||
206 | K64Array *next = mref(k->next, K64Array); | ||
207 | lj_mem_free(J2G(J), k, sizeof(K64Array)); | ||
208 | k = next; | ||
209 | } | ||
210 | } | ||
211 | |||
212 | /* Find 64 bit constant in chained array or add it. */ | ||
213 | cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64) | ||
214 | { | ||
215 | K64Array *k, *kp = NULL; | ||
216 | TValue *ntv; | ||
217 | MSize idx; | ||
218 | /* Search for the constant in the whole chain of arrays. */ | ||
219 | for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) { | ||
220 | kp = k; /* Remember previous element in list. */ | ||
221 | for (idx = 0; idx < k->numk; idx++) { /* Search one array. */ | ||
222 | TValue *tv = &k->k[idx]; | ||
223 | if (tv->u64 == u64) /* Needed for +-0/NaN/absmask. */ | ||
224 | return tv; | ||
225 | } | ||
226 | } | ||
227 | /* Constant was not found, need to add it. */ | ||
228 | if (!(kp && kp->numk < LJ_MIN_K64SZ)) { /* Allocate a new array. */ | ||
229 | K64Array *kn = lj_mem_newt(J->L, sizeof(K64Array), K64Array); | ||
230 | setmref(kn->next, NULL); | ||
231 | kn->numk = 0; | ||
232 | if (kp) | ||
233 | setmref(kp->next, kn); /* Chain to the end of the list. */ | ||
234 | else | ||
235 | setmref(J->k64, kn); /* Link first array. */ | ||
236 | kp = kn; | ||
237 | } | ||
238 | ntv = &kp->k[kp->numk++]; /* Add to current array. */ | ||
239 | ntv->u64 = u64; | ||
240 | return ntv; | ||
241 | } | ||
242 | |||
243 | /* Intern 64 bit constant, given by its address. */ | ||
244 | TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv) | ||
245 | { | 217 | { |
246 | IRIns *ir, *cir = J->cur.ir; | 218 | IRIns *ir, *cir = J->cur.ir; |
247 | IRRef ref; | 219 | IRRef ref; |
248 | IRType t = op == IR_KNUM ? IRT_NUM : IRT_I64; | 220 | IRType t = op == IR_KNUM ? IRT_NUM : IRT_I64; |
249 | for (ref = J->chain[op]; ref; ref = cir[ref].prev) | 221 | for (ref = J->chain[op]; ref; ref = cir[ref].prev) |
250 | if (ir_k64(&cir[ref]) == tv) | 222 | if (ir_k64(&cir[ref])->u64 == u64) |
251 | goto found; | 223 | goto found; |
252 | ref = ir_nextk(J); | 224 | ref = ir_nextk64(J); |
253 | ir = IR(ref); | 225 | ir = IR(ref); |
254 | lua_assert(checkptr32(tv)); | 226 | ir[1].tv.u64 = u64; |
255 | setmref(ir->ptr, tv); | ||
256 | ir->t.irt = t; | 227 | ir->t.irt = t; |
257 | ir->o = op; | 228 | ir->o = op; |
229 | ir->op12 = 0; | ||
258 | ir->prev = J->chain[op]; | 230 | ir->prev = J->chain[op]; |
259 | J->chain[op] = (IRRef1)ref; | 231 | J->chain[op] = (IRRef1)ref; |
260 | found: | 232 | found: |
@@ -264,13 +236,13 @@ found: | |||
264 | /* Intern FP constant, given by its 64 bit pattern. */ | 236 | /* Intern FP constant, given by its 64 bit pattern. */ |
265 | TRef lj_ir_knum_u64(jit_State *J, uint64_t u64) | 237 | TRef lj_ir_knum_u64(jit_State *J, uint64_t u64) |
266 | { | 238 | { |
267 | return lj_ir_k64(J, IR_KNUM, lj_ir_k64_find(J, u64)); | 239 | return lj_ir_k64(J, IR_KNUM, u64); |
268 | } | 240 | } |
269 | 241 | ||
270 | /* Intern 64 bit integer constant. */ | 242 | /* Intern 64 bit integer constant. */ |
271 | TRef lj_ir_kint64(jit_State *J, uint64_t u64) | 243 | TRef lj_ir_kint64(jit_State *J, uint64_t u64) |
272 | { | 244 | { |
273 | return lj_ir_k64(J, IR_KINT64, lj_ir_k64_find(J, u64)); | 245 | return lj_ir_k64(J, IR_KINT64, u64); |
274 | } | 246 | } |
275 | 247 | ||
276 | /* Check whether a number is int and return it. -0 is NOT considered an int. */ | 248 | /* Check whether a number is int and return it. -0 is NOT considered an int. */ |
@@ -309,10 +281,11 @@ TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t) | |||
309 | for (ref = J->chain[IR_KGC]; ref; ref = cir[ref].prev) | 281 | for (ref = J->chain[IR_KGC]; ref; ref = cir[ref].prev) |
310 | if (ir_kgc(&cir[ref]) == o) | 282 | if (ir_kgc(&cir[ref]) == o) |
311 | goto found; | 283 | goto found; |
312 | ref = ir_nextk(J); | 284 | ref = ir_nextkgc(J); |
313 | ir = IR(ref); | 285 | ir = IR(ref); |
314 | /* NOBARRIER: Current trace is a GC root. */ | 286 | /* NOBARRIER: Current trace is a GC root. */ |
315 | setgcref(ir->gcr, o); | 287 | ir->op12 = 0; |
288 | setgcref(ir[LJ_GC64].gcr, o); | ||
316 | ir->t.irt = (uint8_t)t; | 289 | ir->t.irt = (uint8_t)t; |
317 | ir->o = IR_KGC; | 290 | ir->o = IR_KGC; |
318 | ir->prev = J->chain[IR_KGC]; | 291 | ir->prev = J->chain[IR_KGC]; |
@@ -321,24 +294,44 @@ found: | |||
321 | return TREF(ref, t); | 294 | return TREF(ref, t); |
322 | } | 295 | } |
323 | 296 | ||
324 | /* Intern 32 bit pointer constant. */ | 297 | /* Allocate GCtrace constant placeholder (no interning). */ |
298 | TRef lj_ir_ktrace(jit_State *J) | ||
299 | { | ||
300 | IRRef ref = ir_nextkgc(J); | ||
301 | IRIns *ir = IR(ref); | ||
302 | lua_assert(irt_toitype_(IRT_P64) == LJ_TTRACE); | ||
303 | ir->t.irt = IRT_P64; | ||
304 | ir->o = LJ_GC64 ? IR_KNUM : IR_KNULL; /* Not IR_KGC yet, but same size. */ | ||
305 | ir->op12 = 0; | ||
306 | ir->prev = 0; | ||
307 | return TREF(ref, IRT_P64); | ||
308 | } | ||
309 | |||
310 | /* Intern pointer constant. */ | ||
325 | TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr) | 311 | TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr) |
326 | { | 312 | { |
327 | IRIns *ir, *cir = J->cur.ir; | 313 | IRIns *ir, *cir = J->cur.ir; |
328 | IRRef ref; | 314 | IRRef ref; |
329 | lua_assert((void *)(intptr_t)i32ptr(ptr) == ptr); | 315 | #if LJ_64 && !LJ_GC64 |
316 | lua_assert((void *)(uintptr_t)u32ptr(ptr) == ptr); | ||
317 | #endif | ||
330 | for (ref = J->chain[op]; ref; ref = cir[ref].prev) | 318 | for (ref = J->chain[op]; ref; ref = cir[ref].prev) |
331 | if (mref(cir[ref].ptr, void) == ptr) | 319 | if (ir_kptr(&cir[ref]) == ptr) |
332 | goto found; | 320 | goto found; |
321 | #if LJ_GC64 | ||
322 | ref = ir_nextk64(J); | ||
323 | #else | ||
333 | ref = ir_nextk(J); | 324 | ref = ir_nextk(J); |
325 | #endif | ||
334 | ir = IR(ref); | 326 | ir = IR(ref); |
335 | setmref(ir->ptr, ptr); | 327 | ir->op12 = 0; |
336 | ir->t.irt = IRT_P32; | 328 | setmref(ir[LJ_GC64].ptr, ptr); |
329 | ir->t.irt = IRT_PGC; | ||
337 | ir->o = op; | 330 | ir->o = op; |
338 | ir->prev = J->chain[op]; | 331 | ir->prev = J->chain[op]; |
339 | J->chain[op] = (IRRef1)ref; | 332 | J->chain[op] = (IRRef1)ref; |
340 | found: | 333 | found: |
341 | return TREF(ref, IRT_P32); | 334 | return TREF(ref, IRT_PGC); |
342 | } | 335 | } |
343 | 336 | ||
344 | /* Intern typed NULL constant. */ | 337 | /* Intern typed NULL constant. */ |
@@ -390,12 +383,11 @@ void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir) | |||
390 | UNUSED(L); | 383 | UNUSED(L); |
391 | lua_assert(ir->o != IR_KSLOT); /* Common mistake. */ | 384 | lua_assert(ir->o != IR_KSLOT); /* Common mistake. */ |
392 | switch (ir->o) { | 385 | switch (ir->o) { |
393 | case IR_KPRI: setitype(tv, irt_toitype(ir->t)); break; | 386 | case IR_KPRI: setpriV(tv, irt_toitype(ir->t)); break; |
394 | case IR_KINT: setintV(tv, ir->i); break; | 387 | case IR_KINT: setintV(tv, ir->i); break; |
395 | case IR_KGC: setgcV(L, tv, ir_kgc(ir), irt_toitype(ir->t)); break; | 388 | case IR_KGC: setgcV(L, tv, ir_kgc(ir), irt_toitype(ir->t)); break; |
396 | case IR_KPTR: case IR_KKPTR: case IR_KNULL: | 389 | case IR_KPTR: case IR_KKPTR: setlightudV(tv, ir_kptr(ir)); break; |
397 | setlightudV(tv, mref(ir->ptr, void)); | 390 | case IR_KNULL: setlightudV(tv, NULL); break; |
398 | break; | ||
399 | case IR_KNUM: setnumV(tv, ir_knum(ir)->n); break; | 391 | case IR_KNUM: setnumV(tv, ir_knum(ir)->n); break; |
400 | #if LJ_HASFFI | 392 | #if LJ_HASFFI |
401 | case IR_KINT64: { | 393 | case IR_KINT64: { |
@@ -443,7 +435,8 @@ TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr) | |||
443 | if (!tref_isstr(tr)) { | 435 | if (!tref_isstr(tr)) { |
444 | if (!tref_isnumber(tr)) | 436 | if (!tref_isnumber(tr)) |
445 | lj_trace_err(J, LJ_TRERR_BADTYPE); | 437 | lj_trace_err(J, LJ_TRERR_BADTYPE); |
446 | tr = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0); | 438 | tr = emitir(IRT(IR_TOSTR, IRT_STR), tr, |
439 | tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT); | ||
447 | } | 440 | } |
448 | return tr; | 441 | return tr; |
449 | } | 442 | } |
diff --git a/src/lj_ir.h b/src/lj_ir.h index f91d6d0e..6bbe0a33 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h | |||
@@ -40,6 +40,7 @@ | |||
40 | _(USE, S , ref, ___) \ | 40 | _(USE, S , ref, ___) \ |
41 | _(PHI, S , ref, ref) \ | 41 | _(PHI, S , ref, ref) \ |
42 | _(RENAME, S , ref, lit) \ | 42 | _(RENAME, S , ref, lit) \ |
43 | _(PROF, S , ___, ___) \ | ||
43 | \ | 44 | \ |
44 | /* Constants. */ \ | 45 | /* Constants. */ \ |
45 | _(KPRI, N , ___, ___) \ | 46 | _(KPRI, N , ___, ___) \ |
@@ -96,6 +97,7 @@ | |||
96 | _(UREFC, LW, ref, lit) \ | 97 | _(UREFC, LW, ref, lit) \ |
97 | _(FREF, R , ref, lit) \ | 98 | _(FREF, R , ref, lit) \ |
98 | _(STRREF, N , ref, ref) \ | 99 | _(STRREF, N , ref, ref) \ |
100 | _(LREF, L , ___, ___) \ | ||
99 | \ | 101 | \ |
100 | /* Loads and Stores. These must be in the same order. */ \ | 102 | /* Loads and Stores. These must be in the same order. */ \ |
101 | _(ALOAD, L , ref, ___) \ | 103 | _(ALOAD, L , ref, ___) \ |
@@ -120,6 +122,11 @@ | |||
120 | _(CNEW, AW, ref, ref) \ | 122 | _(CNEW, AW, ref, ref) \ |
121 | _(CNEWI, NW, ref, ref) /* CSE is ok, not marked as A. */ \ | 123 | _(CNEWI, NW, ref, ref) /* CSE is ok, not marked as A. */ \ |
122 | \ | 124 | \ |
125 | /* Buffer operations. */ \ | ||
126 | _(BUFHDR, L , ref, lit) \ | ||
127 | _(BUFPUT, L , ref, ref) \ | ||
128 | _(BUFSTR, A , ref, ref) \ | ||
129 | \ | ||
123 | /* Barriers. */ \ | 130 | /* Barriers. */ \ |
124 | _(TBAR, S , ref, ___) \ | 131 | _(TBAR, S , ref, ___) \ |
125 | _(OBAR, S , ref, ref) \ | 132 | _(OBAR, S , ref, ref) \ |
@@ -128,11 +135,12 @@ | |||
128 | /* Type conversions. */ \ | 135 | /* Type conversions. */ \ |
129 | _(CONV, NW, ref, lit) \ | 136 | _(CONV, NW, ref, lit) \ |
130 | _(TOBIT, N , ref, ref) \ | 137 | _(TOBIT, N , ref, ref) \ |
131 | _(TOSTR, N , ref, ___) \ | 138 | _(TOSTR, N , ref, lit) \ |
132 | _(STRTO, N , ref, ___) \ | 139 | _(STRTO, N , ref, ___) \ |
133 | \ | 140 | \ |
134 | /* Calls. */ \ | 141 | /* Calls. */ \ |
135 | _(CALLN, N , ref, lit) \ | 142 | _(CALLN, N , ref, lit) \ |
143 | _(CALLA, A , ref, lit) \ | ||
136 | _(CALLL, L , ref, lit) \ | 144 | _(CALLL, L , ref, lit) \ |
137 | _(CALLS, S , ref, lit) \ | 145 | _(CALLS, S , ref, lit) \ |
138 | _(CALLXS, S , ref, ref) \ | 146 | _(CALLXS, S , ref, ref) \ |
@@ -186,6 +194,8 @@ IRFPMDEF(FPMENUM) | |||
186 | _(STR_LEN, offsetof(GCstr, len)) \ | 194 | _(STR_LEN, offsetof(GCstr, len)) \ |
187 | _(FUNC_ENV, offsetof(GCfunc, l.env)) \ | 195 | _(FUNC_ENV, offsetof(GCfunc, l.env)) \ |
188 | _(FUNC_PC, offsetof(GCfunc, l.pc)) \ | 196 | _(FUNC_PC, offsetof(GCfunc, l.pc)) \ |
197 | _(FUNC_FFID, offsetof(GCfunc, l.ffid)) \ | ||
198 | _(THREAD_ENV, offsetof(lua_State, env)) \ | ||
189 | _(TAB_META, offsetof(GCtab, metatable)) \ | 199 | _(TAB_META, offsetof(GCtab, metatable)) \ |
190 | _(TAB_ARRAY, offsetof(GCtab, array)) \ | 200 | _(TAB_ARRAY, offsetof(GCtab, array)) \ |
191 | _(TAB_NODE, offsetof(GCtab, node)) \ | 201 | _(TAB_NODE, offsetof(GCtab, node)) \ |
@@ -210,7 +220,7 @@ IRFLDEF(FLENUM) | |||
210 | 220 | ||
211 | /* SLOAD mode bits, stored in op2. */ | 221 | /* SLOAD mode bits, stored in op2. */ |
212 | #define IRSLOAD_PARENT 0x01 /* Coalesce with parent trace. */ | 222 | #define IRSLOAD_PARENT 0x01 /* Coalesce with parent trace. */ |
213 | #define IRSLOAD_FRAME 0x02 /* Load hiword of frame. */ | 223 | #define IRSLOAD_FRAME 0x02 /* Load 32 bits of ftsz. */ |
214 | #define IRSLOAD_TYPECHECK 0x04 /* Needs type check. */ | 224 | #define IRSLOAD_TYPECHECK 0x04 /* Needs type check. */ |
215 | #define IRSLOAD_CONVERT 0x08 /* Number to integer conversion. */ | 225 | #define IRSLOAD_CONVERT 0x08 /* Number to integer conversion. */ |
216 | #define IRSLOAD_READONLY 0x10 /* Read-only, omit slot store. */ | 226 | #define IRSLOAD_READONLY 0x10 /* Read-only, omit slot store. */ |
@@ -221,13 +231,16 @@ IRFLDEF(FLENUM) | |||
221 | #define IRXLOAD_VOLATILE 2 /* Load from volatile data. */ | 231 | #define IRXLOAD_VOLATILE 2 /* Load from volatile data. */ |
222 | #define IRXLOAD_UNALIGNED 4 /* Unaligned load. */ | 232 | #define IRXLOAD_UNALIGNED 4 /* Unaligned load. */ |
223 | 233 | ||
234 | /* BUFHDR mode, stored in op2. */ | ||
235 | #define IRBUFHDR_RESET 0 /* Reset buffer. */ | ||
236 | #define IRBUFHDR_APPEND 1 /* Append to buffer. */ | ||
237 | |||
224 | /* CONV mode, stored in op2. */ | 238 | /* CONV mode, stored in op2. */ |
225 | #define IRCONV_SRCMASK 0x001f /* Source IRType. */ | 239 | #define IRCONV_SRCMASK 0x001f /* Source IRType. */ |
226 | #define IRCONV_DSTMASK 0x03e0 /* Dest. IRType (also in ir->t). */ | 240 | #define IRCONV_DSTMASK 0x03e0 /* Dest. IRType (also in ir->t). */ |
227 | #define IRCONV_DSH 5 | 241 | #define IRCONV_DSH 5 |
228 | #define IRCONV_NUM_INT ((IRT_NUM<<IRCONV_DSH)|IRT_INT) | 242 | #define IRCONV_NUM_INT ((IRT_NUM<<IRCONV_DSH)|IRT_INT) |
229 | #define IRCONV_INT_NUM ((IRT_INT<<IRCONV_DSH)|IRT_NUM) | 243 | #define IRCONV_INT_NUM ((IRT_INT<<IRCONV_DSH)|IRT_NUM) |
230 | #define IRCONV_TRUNC 0x0400 /* Truncate number to integer. */ | ||
231 | #define IRCONV_SEXT 0x0800 /* Sign-extend integer to integer. */ | 244 | #define IRCONV_SEXT 0x0800 /* Sign-extend integer to integer. */ |
232 | #define IRCONV_MODEMASK 0x0fff | 245 | #define IRCONV_MODEMASK 0x0fff |
233 | #define IRCONV_CONVMASK 0xf000 | 246 | #define IRCONV_CONVMASK 0xf000 |
@@ -238,6 +251,11 @@ IRFLDEF(FLENUM) | |||
238 | #define IRCONV_INDEX (2<<IRCONV_CSH) /* Check + special backprop rules. */ | 251 | #define IRCONV_INDEX (2<<IRCONV_CSH) /* Check + special backprop rules. */ |
239 | #define IRCONV_CHECK (3<<IRCONV_CSH) /* Number checked for integerness. */ | 252 | #define IRCONV_CHECK (3<<IRCONV_CSH) /* Number checked for integerness. */ |
240 | 253 | ||
254 | /* TOSTR mode, stored in op2. */ | ||
255 | #define IRTOSTR_INT 0 /* Convert integer to string. */ | ||
256 | #define IRTOSTR_NUM 1 /* Convert number to string. */ | ||
257 | #define IRTOSTR_CHAR 2 /* Convert char value to string. */ | ||
258 | |||
241 | /* -- IR operands --------------------------------------------------------- */ | 259 | /* -- IR operands --------------------------------------------------------- */ |
242 | 260 | ||
243 | /* IR operand mode (2 bit). */ | 261 | /* IR operand mode (2 bit). */ |
@@ -276,7 +294,9 @@ LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1]; | |||
276 | 294 | ||
277 | /* -- IR instruction types ------------------------------------------------ */ | 295 | /* -- IR instruction types ------------------------------------------------ */ |
278 | 296 | ||
279 | /* Map of itypes to non-negative numbers. ORDER LJ_T. | 297 | #define IRTSIZE_PGC (LJ_GC64 ? 8 : 4) |
298 | |||
299 | /* Map of itypes to non-negative numbers and their sizes. ORDER LJ_T. | ||
280 | ** LJ_TUPVAL/LJ_TTRACE never appear in a TValue. Use these itypes for | 300 | ** LJ_TUPVAL/LJ_TTRACE never appear in a TValue. Use these itypes for |
281 | ** IRT_P32 and IRT_P64, which never escape the IR. | 301 | ** IRT_P32 and IRT_P64, which never escape the IR. |
282 | ** The various integers are only used in the IR and can only escape to | 302 | ** The various integers are only used in the IR and can only escape to |
@@ -284,12 +304,13 @@ LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1]; | |||
284 | ** contiguous and next to IRT_NUM (see the typerange macros below). | 304 | ** contiguous and next to IRT_NUM (see the typerange macros below). |
285 | */ | 305 | */ |
286 | #define IRTDEF(_) \ | 306 | #define IRTDEF(_) \ |
287 | _(NIL, 4) _(FALSE, 4) _(TRUE, 4) _(LIGHTUD, LJ_64 ? 8 : 4) _(STR, 4) \ | 307 | _(NIL, 4) _(FALSE, 4) _(TRUE, 4) _(LIGHTUD, LJ_64 ? 8 : 4) \ |
288 | _(P32, 4) _(THREAD, 4) _(PROTO, 4) _(FUNC, 4) _(P64, 8) _(CDATA, 4) \ | 308 | _(STR, IRTSIZE_PGC) _(P32, 4) _(THREAD, IRTSIZE_PGC) _(PROTO, IRTSIZE_PGC) \ |
289 | _(TAB, 4) _(UDATA, 4) \ | 309 | _(FUNC, IRTSIZE_PGC) _(P64, 8) _(CDATA, IRTSIZE_PGC) _(TAB, IRTSIZE_PGC) \ |
310 | _(UDATA, IRTSIZE_PGC) \ | ||
290 | _(FLOAT, 4) _(NUM, 8) _(I8, 1) _(U8, 1) _(I16, 2) _(U16, 2) \ | 311 | _(FLOAT, 4) _(NUM, 8) _(I8, 1) _(U8, 1) _(I16, 2) _(U16, 2) \ |
291 | _(INT, 4) _(U32, 4) _(I64, 8) _(U64, 8) \ | 312 | _(INT, 4) _(U32, 4) _(I64, 8) _(U64, 8) \ |
292 | _(SOFTFP, 4) /* There is room for 9 more types. */ | 313 | _(SOFTFP, 4) /* There is room for 8 more types. */ |
293 | 314 | ||
294 | /* IR result type and flags (8 bit). */ | 315 | /* IR result type and flags (8 bit). */ |
295 | typedef enum { | 316 | typedef enum { |
@@ -300,6 +321,8 @@ IRTDEF(IRTENUM) | |||
300 | 321 | ||
301 | /* Native pointer type and the corresponding integer type. */ | 322 | /* Native pointer type and the corresponding integer type. */ |
302 | IRT_PTR = LJ_64 ? IRT_P64 : IRT_P32, | 323 | IRT_PTR = LJ_64 ? IRT_P64 : IRT_P32, |
324 | IRT_PGC = LJ_GC64 ? IRT_P64 : IRT_P32, | ||
325 | IRT_IGC = LJ_GC64 ? IRT_I64 : IRT_INT, | ||
303 | IRT_INTP = LJ_64 ? IRT_I64 : IRT_INT, | 326 | IRT_INTP = LJ_64 ? IRT_I64 : IRT_INT, |
304 | IRT_UINTP = LJ_64 ? IRT_U64 : IRT_U32, | 327 | IRT_UINTP = LJ_64 ? IRT_U64 : IRT_U32, |
305 | 328 | ||
@@ -353,7 +376,14 @@ typedef struct IRType1 { uint8_t irt; } IRType1; | |||
353 | #define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA)) | 376 | #define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA)) |
354 | #define irt_isint64(t) (irt_typerange((t), IRT_I64, IRT_U64)) | 377 | #define irt_isint64(t) (irt_typerange((t), IRT_I64, IRT_U64)) |
355 | 378 | ||
356 | #if LJ_64 | 379 | #if LJ_GC64 |
380 | /* Include IRT_NIL, so IR(ASMREF_L) (aka REF_NIL) is considered 64 bit. */ | ||
381 | #define IRT_IS64 \ | ||
382 | ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|\ | ||
383 | (1u<<IRT_LIGHTUD)|(1u<<IRT_STR)|(1u<<IRT_THREAD)|(1u<<IRT_PROTO)|\ | ||
384 | (1u<<IRT_FUNC)|(1u<<IRT_CDATA)|(1u<<IRT_TAB)|(1u<<IRT_UDATA)|\ | ||
385 | (1u<<IRT_NIL)) | ||
386 | #elif LJ_64 | ||
357 | #define IRT_IS64 \ | 387 | #define IRT_IS64 \ |
358 | ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|(1u<<IRT_LIGHTUD)) | 388 | ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|(1u<<IRT_LIGHTUD)) |
359 | #else | 389 | #else |
@@ -374,7 +404,7 @@ static LJ_AINLINE IRType itype2irt(const TValue *tv) | |||
374 | return IRT_INT; | 404 | return IRT_INT; |
375 | else if (tvisnum(tv)) | 405 | else if (tvisnum(tv)) |
376 | return IRT_NUM; | 406 | return IRT_NUM; |
377 | #if LJ_64 | 407 | #if LJ_64 && !LJ_GC64 |
378 | else if (tvislightud(tv)) | 408 | else if (tvislightud(tv)) |
379 | return IRT_LIGHTUD; | 409 | return IRT_LIGHTUD; |
380 | #endif | 410 | #endif |
@@ -384,7 +414,7 @@ static LJ_AINLINE IRType itype2irt(const TValue *tv) | |||
384 | 414 | ||
385 | static LJ_AINLINE uint32_t irt_toitype_(IRType t) | 415 | static LJ_AINLINE uint32_t irt_toitype_(IRType t) |
386 | { | 416 | { |
387 | lua_assert(!LJ_64 || t != IRT_LIGHTUD); | 417 | lua_assert(!LJ_64 || LJ_GC64 || t != IRT_LIGHTUD); |
388 | if (LJ_DUALNUM && t > IRT_NUM) { | 418 | if (LJ_DUALNUM && t > IRT_NUM) { |
389 | return LJ_TISNUM; | 419 | return LJ_TISNUM; |
390 | } else { | 420 | } else { |
@@ -464,6 +494,7 @@ typedef uint32_t TRef; | |||
464 | #define tref_isnil(tr) (tref_istype((tr), IRT_NIL)) | 494 | #define tref_isnil(tr) (tref_istype((tr), IRT_NIL)) |
465 | #define tref_isfalse(tr) (tref_istype((tr), IRT_FALSE)) | 495 | #define tref_isfalse(tr) (tref_istype((tr), IRT_FALSE)) |
466 | #define tref_istrue(tr) (tref_istype((tr), IRT_TRUE)) | 496 | #define tref_istrue(tr) (tref_istype((tr), IRT_TRUE)) |
497 | #define tref_islightud(tr) (tref_istype((tr), IRT_LIGHTUD)) | ||
467 | #define tref_isstr(tr) (tref_istype((tr), IRT_STR)) | 498 | #define tref_isstr(tr) (tref_istype((tr), IRT_STR)) |
468 | #define tref_isfunc(tr) (tref_istype((tr), IRT_FUNC)) | 499 | #define tref_isfunc(tr) (tref_istype((tr), IRT_FUNC)) |
469 | #define tref_iscdata(tr) (tref_istype((tr), IRT_CDATA)) | 500 | #define tref_iscdata(tr) (tref_istype((tr), IRT_CDATA)) |
@@ -496,7 +527,9 @@ typedef uint32_t TRef; | |||
496 | ** +-------+-------+---+---+---+---+ | 527 | ** +-------+-------+---+---+---+---+ |
497 | ** | op1 | op2 | t | o | r | s | | 528 | ** | op1 | op2 | t | o | r | s | |
498 | ** +-------+-------+---+---+---+---+ | 529 | ** +-------+-------+---+---+---+---+ |
499 | ** | op12/i/gco | ot | prev | (alternative fields in union) | 530 | ** | op12/i/gco32 | ot | prev | (alternative fields in union) |
531 | ** +-------+-------+---+---+---+---+ | ||
532 | ** | TValue/gco64 | (2nd IR slot for 64 bit constants) | ||
500 | ** +---------------+-------+-------+ | 533 | ** +---------------+-------+-------+ |
501 | ** 32 16 16 | 534 | ** 32 16 16 |
502 | ** | 535 | ** |
@@ -524,21 +557,27 @@ typedef union IRIns { | |||
524 | ) | 557 | ) |
525 | }; | 558 | }; |
526 | int32_t i; /* 32 bit signed integer literal (overlaps op12). */ | 559 | int32_t i; /* 32 bit signed integer literal (overlaps op12). */ |
527 | GCRef gcr; /* GCobj constant (overlaps op12). */ | 560 | GCRef gcr; /* GCobj constant (overlaps op12 or entire slot). */ |
528 | MRef ptr; /* Pointer constant (overlaps op12). */ | 561 | MRef ptr; /* Pointer constant (overlaps op12 or entire slot). */ |
562 | TValue tv; /* TValue constant (overlaps entire slot). */ | ||
529 | } IRIns; | 563 | } IRIns; |
530 | 564 | ||
531 | #define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)->gcr)) | 565 | #define ir_isk64(ir) \ |
566 | ((ir)->o == IR_KNUM || (ir)->o == IR_KINT64 || \ | ||
567 | (LJ_GC64 && \ | ||
568 | ((ir)->o == IR_KGC || (ir)->o == IR_KPTR || (ir)->o == IR_KKPTR))) | ||
569 | |||
570 | #define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)[LJ_GC64].gcr)) | ||
532 | #define ir_kstr(ir) (gco2str(ir_kgc((ir)))) | 571 | #define ir_kstr(ir) (gco2str(ir_kgc((ir)))) |
533 | #define ir_ktab(ir) (gco2tab(ir_kgc((ir)))) | 572 | #define ir_ktab(ir) (gco2tab(ir_kgc((ir)))) |
534 | #define ir_kfunc(ir) (gco2func(ir_kgc((ir)))) | 573 | #define ir_kfunc(ir) (gco2func(ir_kgc((ir)))) |
535 | #define ir_kcdata(ir) (gco2cd(ir_kgc((ir)))) | 574 | #define ir_kcdata(ir) (gco2cd(ir_kgc((ir)))) |
536 | #define ir_knum(ir) check_exp((ir)->o == IR_KNUM, mref((ir)->ptr, cTValue)) | 575 | #define ir_knum(ir) check_exp((ir)->o == IR_KNUM, &(ir)[1].tv) |
537 | #define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, mref((ir)->ptr,cTValue)) | 576 | #define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, &(ir)[1].tv) |
538 | #define ir_k64(ir) \ | 577 | #define ir_k64(ir) check_exp(ir_isk64(ir), &(ir)[1].tv) |
539 | check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64, mref((ir)->ptr,cTValue)) | ||
540 | #define ir_kptr(ir) \ | 578 | #define ir_kptr(ir) \ |
541 | check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, mref((ir)->ptr, void)) | 579 | check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, \ |
580 | mref((ir)[LJ_GC64].ptr, void)) | ||
542 | 581 | ||
543 | /* A store or any other op with a non-weak guard has a side-effect. */ | 582 | /* A store or any other op with a non-weak guard has a side-effect. */ |
544 | static LJ_AINLINE int ir_sideeff(IRIns *ir) | 583 | static LJ_AINLINE int ir_sideeff(IRIns *ir) |
diff --git a/src/lj_ircall.h b/src/lj_ircall.h index aae9adbb..f4f3f781 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h | |||
@@ -16,7 +16,7 @@ typedef struct CCallInfo { | |||
16 | uint32_t flags; /* Number of arguments and flags. */ | 16 | uint32_t flags; /* Number of arguments and flags. */ |
17 | } CCallInfo; | 17 | } CCallInfo; |
18 | 18 | ||
19 | #define CCI_NARGS(ci) ((ci)->flags & 0xff) /* Extract # of args. */ | 19 | #define CCI_NARGS(ci) ((ci)->flags & 0xff) /* # of args. */ |
20 | #define CCI_NARGS_MAX 32 /* Max. # of args. */ | 20 | #define CCI_NARGS_MAX 32 /* Max. # of args. */ |
21 | 21 | ||
22 | #define CCI_OTSHIFT 16 | 22 | #define CCI_OTSHIFT 16 |
@@ -25,6 +25,7 @@ typedef struct CCallInfo { | |||
25 | #define CCI_OP(ci) ((ci)->flags >> CCI_OPSHIFT) /* Get op. */ | 25 | #define CCI_OP(ci) ((ci)->flags >> CCI_OPSHIFT) /* Get op. */ |
26 | 26 | ||
27 | #define CCI_CALL_N (IR_CALLN << CCI_OPSHIFT) | 27 | #define CCI_CALL_N (IR_CALLN << CCI_OPSHIFT) |
28 | #define CCI_CALL_A (IR_CALLA << CCI_OPSHIFT) | ||
28 | #define CCI_CALL_L (IR_CALLL << CCI_OPSHIFT) | 29 | #define CCI_CALL_L (IR_CALLL << CCI_OPSHIFT) |
29 | #define CCI_CALL_S (IR_CALLS << CCI_OPSHIFT) | 30 | #define CCI_CALL_S (IR_CALLS << CCI_OPSHIFT) |
30 | #define CCI_CALL_FN (CCI_CALL_N|CCI_CC_FASTCALL) | 31 | #define CCI_CALL_FN (CCI_CALL_N|CCI_CC_FASTCALL) |
@@ -45,6 +46,17 @@ typedef struct CCallInfo { | |||
45 | #define CCI_CC_FASTCALL 0x2000 /* Fastcall calling convention. */ | 46 | #define CCI_CC_FASTCALL 0x2000 /* Fastcall calling convention. */ |
46 | #define CCI_CC_STDCALL 0x3000 /* Stdcall calling convention. */ | 47 | #define CCI_CC_STDCALL 0x3000 /* Stdcall calling convention. */ |
47 | 48 | ||
49 | /* Extra args for SOFTFP, SPLIT 64 bit. */ | ||
50 | #define CCI_XARGS_SHIFT 14 | ||
51 | #define CCI_XARGS(ci) (((ci)->flags >> CCI_XARGS_SHIFT) & 3) | ||
52 | #define CCI_XA (1u << CCI_XARGS_SHIFT) | ||
53 | |||
54 | #if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) | ||
55 | #define CCI_XNARGS(ci) (CCI_NARGS((ci)) + CCI_XARGS((ci))) | ||
56 | #else | ||
57 | #define CCI_XNARGS(ci) CCI_NARGS((ci)) | ||
58 | #endif | ||
59 | |||
48 | /* Helpers for conditional function definitions. */ | 60 | /* Helpers for conditional function definitions. */ |
49 | #define IRCALLCOND_ANY(x) x | 61 | #define IRCALLCOND_ANY(x) x |
50 | 62 | ||
@@ -66,6 +78,18 @@ typedef struct CCallInfo { | |||
66 | #define IRCALLCOND_SOFTFP_FFI(x) NULL | 78 | #define IRCALLCOND_SOFTFP_FFI(x) NULL |
67 | #endif | 79 | #endif |
68 | 80 | ||
81 | #if LJ_SOFTFP && LJ_TARGET_MIPS | ||
82 | #define IRCALLCOND_SOFTFP_MIPS(x) x | ||
83 | #else | ||
84 | #define IRCALLCOND_SOFTFP_MIPS(x) NULL | ||
85 | #endif | ||
86 | |||
87 | #if LJ_SOFTFP && LJ_TARGET_MIPS64 | ||
88 | #define IRCALLCOND_SOFTFP_MIPS64(x) x | ||
89 | #else | ||
90 | #define IRCALLCOND_SOFTFP_MIPS64(x) NULL | ||
91 | #endif | ||
92 | |||
69 | #define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS) | 93 | #define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS) |
70 | 94 | ||
71 | #if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64) | 95 | #if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64) |
@@ -87,92 +111,135 @@ typedef struct CCallInfo { | |||
87 | #endif | 111 | #endif |
88 | 112 | ||
89 | #if LJ_SOFTFP | 113 | #if LJ_SOFTFP |
90 | #define ARG1_FP 2 /* Treat as 2 32 bit arguments. */ | 114 | #define XA_FP CCI_XA |
115 | #define XA2_FP (CCI_XA+CCI_XA) | ||
91 | #else | 116 | #else |
92 | #define ARG1_FP 1 | 117 | #define XA_FP 0 |
118 | #define XA2_FP 0 | ||
119 | #endif | ||
120 | |||
121 | #if LJ_SOFTFP32 | ||
122 | #define XA_FP32 CCI_XA | ||
123 | #define XA2_FP32 (CCI_XA+CCI_XA) | ||
124 | #else | ||
125 | #define XA_FP32 0 | ||
126 | #define XA2_FP32 0 | ||
93 | #endif | 127 | #endif |
94 | 128 | ||
95 | #if LJ_32 | 129 | #if LJ_32 |
96 | #define ARG2_64 4 /* Treat as 4 32 bit arguments. */ | 130 | #define XA_64 CCI_XA |
131 | #define XA2_64 (CCI_XA+CCI_XA) | ||
97 | #else | 132 | #else |
98 | #define ARG2_64 2 | 133 | #define XA_64 0 |
134 | #define XA2_64 0 | ||
99 | #endif | 135 | #endif |
100 | 136 | ||
101 | /* Function definitions for CALL* instructions. */ | 137 | /* Function definitions for CALL* instructions. */ |
102 | #define IRCALLDEF(_) \ | 138 | #define IRCALLDEF(_) \ |
103 | _(ANY, lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \ | 139 | _(ANY, lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \ |
140 | _(ANY, lj_str_find, 4, N, PGC, 0) \ | ||
104 | _(ANY, lj_str_new, 3, S, STR, CCI_L) \ | 141 | _(ANY, lj_str_new, 3, S, STR, CCI_L) \ |
105 | _(ANY, lj_strscan_num, 2, FN, INT, 0) \ | 142 | _(ANY, lj_strscan_num, 2, FN, INT, 0) \ |
106 | _(ANY, lj_str_fromint, 2, FN, STR, CCI_L) \ | 143 | _(ANY, lj_strfmt_int, 2, FN, STR, CCI_L) \ |
107 | _(ANY, lj_str_fromnum, 2, FN, STR, CCI_L) \ | 144 | _(ANY, lj_strfmt_num, 2, FN, STR, CCI_L) \ |
145 | _(ANY, lj_strfmt_char, 2, FN, STR, CCI_L) \ | ||
146 | _(ANY, lj_strfmt_putint, 2, FL, PGC, 0) \ | ||
147 | _(ANY, lj_strfmt_putnum, 2, FL, PGC, 0) \ | ||
148 | _(ANY, lj_strfmt_putquoted, 2, FL, PGC, 0) \ | ||
149 | _(ANY, lj_strfmt_putfxint, 3, L, PGC, XA_64) \ | ||
150 | _(ANY, lj_strfmt_putfnum_int, 3, L, PGC, XA_FP) \ | ||
151 | _(ANY, lj_strfmt_putfnum_uint, 3, L, PGC, XA_FP) \ | ||
152 | _(ANY, lj_strfmt_putfnum, 3, L, PGC, XA_FP) \ | ||
153 | _(ANY, lj_strfmt_putfstr, 3, L, PGC, 0) \ | ||
154 | _(ANY, lj_strfmt_putfchar, 3, L, PGC, 0) \ | ||
155 | _(ANY, lj_buf_putmem, 3, S, PGC, 0) \ | ||
156 | _(ANY, lj_buf_putstr, 2, FL, PGC, 0) \ | ||
157 | _(ANY, lj_buf_putchar, 2, FL, PGC, 0) \ | ||
158 | _(ANY, lj_buf_putstr_reverse, 2, FL, PGC, 0) \ | ||
159 | _(ANY, lj_buf_putstr_lower, 2, FL, PGC, 0) \ | ||
160 | _(ANY, lj_buf_putstr_upper, 2, FL, PGC, 0) \ | ||
161 | _(ANY, lj_buf_putstr_rep, 3, L, PGC, 0) \ | ||
162 | _(ANY, lj_buf_puttab, 5, L, PGC, 0) \ | ||
163 | _(ANY, lj_buf_tostr, 1, FL, STR, 0) \ | ||
164 | _(ANY, lj_tab_new_ah, 3, A, TAB, CCI_L) \ | ||
108 | _(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \ | 165 | _(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \ |
109 | _(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \ | 166 | _(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \ |
110 | _(ANY, lj_tab_newkey, 3, S, P32, CCI_L) \ | 167 | _(ANY, lj_tab_clear, 1, FS, NIL, 0) \ |
168 | _(ANY, lj_tab_newkey, 3, S, PGC, CCI_L) \ | ||
111 | _(ANY, lj_tab_len, 1, FL, INT, 0) \ | 169 | _(ANY, lj_tab_len, 1, FL, INT, 0) \ |
112 | _(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \ | 170 | _(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \ |
113 | _(ANY, lj_gc_barrieruv, 2, FS, NIL, 0) \ | 171 | _(ANY, lj_gc_barrieruv, 2, FS, NIL, 0) \ |
114 | _(ANY, lj_mem_newgco, 2, FS, P32, CCI_L) \ | 172 | _(ANY, lj_mem_newgco, 2, FS, PGC, CCI_L) \ |
115 | _(ANY, lj_math_random_step, 1, FS, NUM, CCI_CASTU64) \ | 173 | _(ANY, lj_math_random_step, 1, FS, NUM, CCI_CASTU64) \ |
116 | _(ANY, lj_vm_modi, 2, FN, INT, 0) \ | 174 | _(ANY, lj_vm_modi, 2, FN, INT, 0) \ |
117 | _(ANY, sinh, ARG1_FP, N, NUM, 0) \ | 175 | _(ANY, sinh, 1, N, NUM, XA_FP) \ |
118 | _(ANY, cosh, ARG1_FP, N, NUM, 0) \ | 176 | _(ANY, cosh, 1, N, NUM, XA_FP) \ |
119 | _(ANY, tanh, ARG1_FP, N, NUM, 0) \ | 177 | _(ANY, tanh, 1, N, NUM, XA_FP) \ |
120 | _(ANY, fputc, 2, S, INT, 0) \ | 178 | _(ANY, fputc, 2, S, INT, 0) \ |
121 | _(ANY, fwrite, 4, S, INT, 0) \ | 179 | _(ANY, fwrite, 4, S, INT, 0) \ |
122 | _(ANY, fflush, 1, S, INT, 0) \ | 180 | _(ANY, fflush, 1, S, INT, 0) \ |
123 | /* ORDER FPM */ \ | 181 | /* ORDER FPM */ \ |
124 | _(FPMATH, lj_vm_floor, ARG1_FP, N, NUM, 0) \ | 182 | _(FPMATH, lj_vm_floor, 1, N, NUM, XA_FP) \ |
125 | _(FPMATH, lj_vm_ceil, ARG1_FP, N, NUM, 0) \ | 183 | _(FPMATH, lj_vm_ceil, 1, N, NUM, XA_FP) \ |
126 | _(FPMATH, lj_vm_trunc, ARG1_FP, N, NUM, 0) \ | 184 | _(FPMATH, lj_vm_trunc, 1, N, NUM, XA_FP) \ |
127 | _(FPMATH, sqrt, ARG1_FP, N, NUM, 0) \ | 185 | _(FPMATH, sqrt, 1, N, NUM, XA_FP) \ |
128 | _(FPMATH, exp, ARG1_FP, N, NUM, 0) \ | 186 | _(ANY, exp, 1, N, NUM, XA_FP) \ |
129 | _(FPMATH, lj_vm_exp2, ARG1_FP, N, NUM, 0) \ | 187 | _(ANY, lj_vm_exp2, 1, N, NUM, XA_FP) \ |
130 | _(FPMATH, log, ARG1_FP, N, NUM, 0) \ | 188 | _(ANY, log, 1, N, NUM, XA_FP) \ |
131 | _(FPMATH, lj_vm_log2, ARG1_FP, N, NUM, 0) \ | 189 | _(ANY, lj_vm_log2, 1, N, NUM, XA_FP) \ |
132 | _(FPMATH, log10, ARG1_FP, N, NUM, 0) \ | 190 | _(ANY, log10, 1, N, NUM, XA_FP) \ |
133 | _(FPMATH, sin, ARG1_FP, N, NUM, 0) \ | 191 | _(ANY, sin, 1, N, NUM, XA_FP) \ |
134 | _(FPMATH, cos, ARG1_FP, N, NUM, 0) \ | 192 | _(ANY, cos, 1, N, NUM, XA_FP) \ |
135 | _(FPMATH, tan, ARG1_FP, N, NUM, 0) \ | 193 | _(ANY, tan, 1, N, NUM, XA_FP) \ |
136 | _(FPMATH, lj_vm_powi, ARG1_FP+1, N, NUM, 0) \ | 194 | _(ANY, lj_vm_powi, 2, N, NUM, XA_FP) \ |
137 | _(FPMATH, pow, ARG1_FP*2, N, NUM, 0) \ | 195 | _(ANY, pow, 2, N, NUM, XA2_FP) \ |
138 | _(FPMATH, atan2, ARG1_FP*2, N, NUM, 0) \ | 196 | _(ANY, atan2, 2, N, NUM, XA2_FP) \ |
139 | _(FPMATH, ldexp, ARG1_FP+1, N, NUM, 0) \ | 197 | _(ANY, ldexp, 2, N, NUM, XA_FP) \ |
140 | _(SOFTFP, lj_vm_tobit, 2, N, INT, 0) \ | 198 | _(SOFTFP, lj_vm_tobit, 1, N, INT, XA_FP32) \ |
141 | _(SOFTFP, softfp_add, 4, N, NUM, 0) \ | 199 | _(SOFTFP, softfp_add, 2, N, NUM, XA2_FP32) \ |
142 | _(SOFTFP, softfp_sub, 4, N, NUM, 0) \ | 200 | _(SOFTFP, softfp_sub, 2, N, NUM, XA2_FP32) \ |
143 | _(SOFTFP, softfp_mul, 4, N, NUM, 0) \ | 201 | _(SOFTFP, softfp_mul, 2, N, NUM, XA2_FP32) \ |
144 | _(SOFTFP, softfp_div, 4, N, NUM, 0) \ | 202 | _(SOFTFP, softfp_div, 2, N, NUM, XA2_FP32) \ |
145 | _(SOFTFP, softfp_cmp, 4, N, NIL, 0) \ | 203 | _(SOFTFP, softfp_cmp, 2, N, NIL, XA2_FP32) \ |
146 | _(SOFTFP, softfp_i2d, 1, N, NUM, 0) \ | 204 | _(SOFTFP, softfp_i2d, 1, N, NUM, 0) \ |
147 | _(SOFTFP, softfp_d2i, 2, N, INT, 0) \ | 205 | _(SOFTFP, softfp_d2i, 1, N, INT, XA_FP32) \ |
206 | _(SOFTFP_MIPS, lj_vm_sfmin, 2, N, NUM, XA2_FP32) \ | ||
207 | _(SOFTFP_MIPS, lj_vm_sfmax, 2, N, NUM, XA2_FP32) \ | ||
208 | _(SOFTFP_MIPS64, lj_vm_tointg, 1, N, INT, 0) \ | ||
148 | _(SOFTFP_FFI, softfp_ui2d, 1, N, NUM, 0) \ | 209 | _(SOFTFP_FFI, softfp_ui2d, 1, N, NUM, 0) \ |
149 | _(SOFTFP_FFI, softfp_f2d, 1, N, NUM, 0) \ | 210 | _(SOFTFP_FFI, softfp_f2d, 1, N, NUM, 0) \ |
150 | _(SOFTFP_FFI, softfp_d2ui, 2, N, INT, 0) \ | 211 | _(SOFTFP_FFI, softfp_d2ui, 1, N, INT, XA_FP32) \ |
151 | _(SOFTFP_FFI, softfp_d2f, 2, N, FLOAT, 0) \ | 212 | _(SOFTFP_FFI, softfp_d2f, 1, N, FLOAT, XA_FP32) \ |
152 | _(SOFTFP_FFI, softfp_i2f, 1, N, FLOAT, 0) \ | 213 | _(SOFTFP_FFI, softfp_i2f, 1, N, FLOAT, 0) \ |
153 | _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \ | 214 | _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \ |
154 | _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \ | 215 | _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \ |
155 | _(SOFTFP_FFI, softfp_f2ui, 1, N, INT, 0) \ | 216 | _(SOFTFP_FFI, softfp_f2ui, 1, N, INT, 0) \ |
156 | _(FP64_FFI, fp64_l2d, 2, N, NUM, 0) \ | 217 | _(FP64_FFI, fp64_l2d, 1, N, NUM, XA_64) \ |
157 | _(FP64_FFI, fp64_ul2d, 2, N, NUM, 0) \ | 218 | _(FP64_FFI, fp64_ul2d, 1, N, NUM, XA_64) \ |
158 | _(FP64_FFI, fp64_l2f, 2, N, FLOAT, 0) \ | 219 | _(FP64_FFI, fp64_l2f, 1, N, FLOAT, XA_64) \ |
159 | _(FP64_FFI, fp64_ul2f, 2, N, FLOAT, 0) \ | 220 | _(FP64_FFI, fp64_ul2f, 1, N, FLOAT, XA_64) \ |
160 | _(FP64_FFI, fp64_d2l, ARG1_FP, N, I64, 0) \ | 221 | _(FP64_FFI, fp64_d2l, 1, N, I64, XA_FP) \ |
161 | _(FP64_FFI, fp64_d2ul, ARG1_FP, N, U64, 0) \ | 222 | _(FP64_FFI, fp64_d2ul, 1, N, U64, XA_FP) \ |
162 | _(FP64_FFI, fp64_f2l, 1, N, I64, 0) \ | 223 | _(FP64_FFI, fp64_f2l, 1, N, I64, 0) \ |
163 | _(FP64_FFI, fp64_f2ul, 1, N, U64, 0) \ | 224 | _(FP64_FFI, fp64_f2ul, 1, N, U64, 0) \ |
164 | _(FFI, lj_carith_divi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \ | 225 | _(FFI, lj_carith_divi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \ |
165 | _(FFI, lj_carith_divu64, ARG2_64, N, U64, CCI_NOFPRCLOBBER) \ | 226 | _(FFI, lj_carith_divu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \ |
166 | _(FFI, lj_carith_modi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \ | 227 | _(FFI, lj_carith_modi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \ |
167 | _(FFI, lj_carith_modu64, ARG2_64, N, U64, CCI_NOFPRCLOBBER) \ | 228 | _(FFI, lj_carith_modu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \ |
168 | _(FFI, lj_carith_powi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \ | 229 | _(FFI, lj_carith_powi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \ |
169 | _(FFI, lj_carith_powu64, ARG2_64, N, U64, CCI_NOFPRCLOBBER) \ | 230 | _(FFI, lj_carith_powu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \ |
170 | _(FFI, lj_cdata_setfin, 2, FN, P32, CCI_L) \ | 231 | _(FFI, lj_cdata_newv, 4, S, CDATA, CCI_L) \ |
171 | _(FFI, strlen, 1, L, INTP, 0) \ | 232 | _(FFI, lj_cdata_setfin, 4, S, NIL, CCI_L) \ |
172 | _(FFI, memcpy, 3, S, PTR, 0) \ | 233 | _(FFI, strlen, 1, L, INTP, 0) \ |
173 | _(FFI, memset, 3, S, PTR, 0) \ | 234 | _(FFI, memcpy, 3, S, PTR, 0) \ |
174 | _(FFI, lj_vm_errno, 0, S, INT, CCI_NOFPRCLOBBER) \ | 235 | _(FFI, memset, 3, S, PTR, 0) \ |
175 | _(FFI32, lj_carith_mul64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) | 236 | _(FFI, lj_vm_errno, 0, S, INT, CCI_NOFPRCLOBBER) \ |
237 | _(FFI32, lj_carith_mul64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \ | ||
238 | _(FFI32, lj_carith_shl64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \ | ||
239 | _(FFI32, lj_carith_shr64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \ | ||
240 | _(FFI32, lj_carith_sar64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \ | ||
241 | _(FFI32, lj_carith_rol64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \ | ||
242 | _(FFI32, lj_carith_ror64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \ | ||
176 | \ | 243 | \ |
177 | /* End of list. */ | 244 | /* End of list. */ |
178 | 245 | ||
@@ -220,6 +287,22 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1]; | |||
220 | #define fp64_f2l __aeabi_f2lz | 287 | #define fp64_f2l __aeabi_f2lz |
221 | #define fp64_f2ul __aeabi_f2ulz | 288 | #define fp64_f2ul __aeabi_f2ulz |
222 | #endif | 289 | #endif |
290 | #elif LJ_TARGET_MIPS || LJ_TARGET_PPC | ||
291 | #define softfp_add __adddf3 | ||
292 | #define softfp_sub __subdf3 | ||
293 | #define softfp_mul __muldf3 | ||
294 | #define softfp_div __divdf3 | ||
295 | #define softfp_cmp __ledf2 | ||
296 | #define softfp_i2d __floatsidf | ||
297 | #define softfp_d2i __fixdfsi | ||
298 | #define softfp_ui2d __floatunsidf | ||
299 | #define softfp_f2d __extendsfdf2 | ||
300 | #define softfp_d2ui __fixunsdfsi | ||
301 | #define softfp_d2f __truncdfsf2 | ||
302 | #define softfp_i2f __floatsisf | ||
303 | #define softfp_ui2f __floatunsisf | ||
304 | #define softfp_f2i __fixsfsi | ||
305 | #define softfp_f2ui __fixunssfsi | ||
223 | #else | 306 | #else |
224 | #error "Missing soft-float definitions for target architecture" | 307 | #error "Missing soft-float definitions for target architecture" |
225 | #endif | 308 | #endif |
@@ -240,6 +323,10 @@ extern float softfp_ui2f(uint32_t a); | |||
240 | extern int32_t softfp_f2i(float a); | 323 | extern int32_t softfp_f2i(float a); |
241 | extern uint32_t softfp_f2ui(float a); | 324 | extern uint32_t softfp_f2ui(float a); |
242 | #endif | 325 | #endif |
326 | #if LJ_TARGET_MIPS | ||
327 | extern double lj_vm_sfmin(double a, double b); | ||
328 | extern double lj_vm_sfmax(double a, double b); | ||
329 | #endif | ||
243 | #endif | 330 | #endif |
244 | 331 | ||
245 | #if LJ_HASFFI && LJ_NEED_FP64 && !(LJ_TARGET_ARM && LJ_SOFTFP) | 332 | #if LJ_HASFFI && LJ_NEED_FP64 && !(LJ_TARGET_ARM && LJ_SOFTFP) |
diff --git a/src/lj_iropt.h b/src/lj_iropt.h index cf5b4d1f..02d6b946 100644 --- a/src/lj_iropt.h +++ b/src/lj_iropt.h | |||
@@ -36,11 +36,11 @@ static LJ_AINLINE IRRef lj_ir_nextins(jit_State *J) | |||
36 | return ref; | 36 | return ref; |
37 | } | 37 | } |
38 | 38 | ||
39 | LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs); | ||
40 | |||
39 | /* Interning of constants. */ | 41 | /* Interning of constants. */ |
40 | LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k); | 42 | LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k); |
41 | LJ_FUNC void lj_ir_k64_freeall(jit_State *J); | 43 | LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, uint64_t u64); |
42 | LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv); | ||
43 | LJ_FUNC cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64); | ||
44 | LJ_FUNC TRef lj_ir_knum_u64(jit_State *J, uint64_t u64); | 44 | LJ_FUNC TRef lj_ir_knum_u64(jit_State *J, uint64_t u64); |
45 | LJ_FUNC TRef lj_ir_knumint(jit_State *J, lua_Number n); | 45 | LJ_FUNC TRef lj_ir_knumint(jit_State *J, lua_Number n); |
46 | LJ_FUNC TRef lj_ir_kint64(jit_State *J, uint64_t u64); | 46 | LJ_FUNC TRef lj_ir_kint64(jit_State *J, uint64_t u64); |
@@ -48,6 +48,7 @@ LJ_FUNC TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t); | |||
48 | LJ_FUNC TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr); | 48 | LJ_FUNC TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr); |
49 | LJ_FUNC TRef lj_ir_knull(jit_State *J, IRType t); | 49 | LJ_FUNC TRef lj_ir_knull(jit_State *J, IRType t); |
50 | LJ_FUNC TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot); | 50 | LJ_FUNC TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot); |
51 | LJ_FUNC TRef lj_ir_ktrace(jit_State *J); | ||
51 | 52 | ||
52 | #if LJ_64 | 53 | #if LJ_64 |
53 | #define lj_ir_kintp(J, k) lj_ir_kint64(J, (uint64_t)(k)) | 54 | #define lj_ir_kintp(J, k) lj_ir_kint64(J, (uint64_t)(k)) |
@@ -74,8 +75,8 @@ static LJ_AINLINE TRef lj_ir_knum(jit_State *J, lua_Number n) | |||
74 | #define lj_ir_knum_tobit(J) lj_ir_knum_u64(J, U64x(43380000,00000000)) | 75 | #define lj_ir_knum_tobit(J) lj_ir_knum_u64(J, U64x(43380000,00000000)) |
75 | 76 | ||
76 | /* Special 128 bit SIMD constants. */ | 77 | /* Special 128 bit SIMD constants. */ |
77 | #define lj_ir_knum_abs(J) lj_ir_k64(J, IR_KNUM, LJ_KSIMD(J, LJ_KSIMD_ABS)) | 78 | #define lj_ir_ksimd(J, idx) \ |
78 | #define lj_ir_knum_neg(J) lj_ir_k64(J, IR_KNUM, LJ_KSIMD(J, LJ_KSIMD_NEG)) | 79 | lj_ir_ggfload(J, IRT_NUM, (uintptr_t)LJ_KSIMD(J, idx) - (uintptr_t)J2GG(J)) |
79 | 80 | ||
80 | /* Access to constants. */ | 81 | /* Access to constants. */ |
81 | LJ_FUNC void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir); | 82 | LJ_FUNC void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir); |
@@ -149,7 +150,7 @@ LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase); | |||
149 | /* Optimization passes. */ | 150 | /* Optimization passes. */ |
150 | LJ_FUNC void lj_opt_dce(jit_State *J); | 151 | LJ_FUNC void lj_opt_dce(jit_State *J); |
151 | LJ_FUNC int lj_opt_loop(jit_State *J); | 152 | LJ_FUNC int lj_opt_loop(jit_State *J); |
152 | #if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) | 153 | #if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) |
153 | LJ_FUNC void lj_opt_split(jit_State *J); | 154 | LJ_FUNC void lj_opt_split(jit_State *J); |
154 | #else | 155 | #else |
155 | #define lj_opt_split(J) UNUSED(J) | 156 | #define lj_opt_split(J) UNUSED(J) |
diff --git a/src/lj_jit.h b/src/lj_jit.h index 0e1c4827..f179f17f 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h | |||
@@ -14,18 +14,16 @@ | |||
14 | 14 | ||
15 | /* CPU-specific JIT engine flags. */ | 15 | /* CPU-specific JIT engine flags. */ |
16 | #if LJ_TARGET_X86ORX64 | 16 | #if LJ_TARGET_X86ORX64 |
17 | #define JIT_F_CMOV 0x00000010 | 17 | #define JIT_F_SSE2 0x00000010 |
18 | #define JIT_F_SSE2 0x00000020 | 18 | #define JIT_F_SSE3 0x00000020 |
19 | #define JIT_F_SSE3 0x00000040 | 19 | #define JIT_F_SSE4_1 0x00000040 |
20 | #define JIT_F_SSE4_1 0x00000080 | 20 | #define JIT_F_PREFER_IMUL 0x00000080 |
21 | #define JIT_F_P4 0x00000100 | 21 | #define JIT_F_LEA_AGU 0x00000100 |
22 | #define JIT_F_PREFER_IMUL 0x00000200 | 22 | #define JIT_F_BMI2 0x00000200 |
23 | #define JIT_F_SPLIT_XMM 0x00000400 | ||
24 | #define JIT_F_LEA_AGU 0x00000800 | ||
25 | 23 | ||
26 | /* Names for the CPU-specific flags. Must match the order above. */ | 24 | /* Names for the CPU-specific flags. Must match the order above. */ |
27 | #define JIT_F_CPU_FIRST JIT_F_CMOV | 25 | #define JIT_F_CPU_FIRST JIT_F_SSE2 |
28 | #define JIT_F_CPUSTRING "\4CMOV\4SSE2\4SSE3\6SSE4.1\2P4\3AMD\2K8\4ATOM" | 26 | #define JIT_F_CPUSTRING "\4SSE2\4SSE3\6SSE4.1\3AMD\4ATOM\4BMI2" |
29 | #elif LJ_TARGET_ARM | 27 | #elif LJ_TARGET_ARM |
30 | #define JIT_F_ARMV6_ 0x00000010 | 28 | #define JIT_F_ARMV6_ 0x00000010 |
31 | #define JIT_F_ARMV6T2_ 0x00000020 | 29 | #define JIT_F_ARMV6T2_ 0x00000020 |
@@ -48,11 +46,23 @@ | |||
48 | #define JIT_F_CPU_FIRST JIT_F_SQRT | 46 | #define JIT_F_CPU_FIRST JIT_F_SQRT |
49 | #define JIT_F_CPUSTRING "\4SQRT\5ROUND" | 47 | #define JIT_F_CPUSTRING "\4SQRT\5ROUND" |
50 | #elif LJ_TARGET_MIPS | 48 | #elif LJ_TARGET_MIPS |
51 | #define JIT_F_MIPS32R2 0x00000010 | 49 | #define JIT_F_MIPSXXR2 0x00000010 |
52 | 50 | ||
53 | /* Names for the CPU-specific flags. Must match the order above. */ | 51 | /* Names for the CPU-specific flags. Must match the order above. */ |
54 | #define JIT_F_CPU_FIRST JIT_F_MIPS32R2 | 52 | #define JIT_F_CPU_FIRST JIT_F_MIPSXXR2 |
53 | #if LJ_TARGET_MIPS32 | ||
54 | #if LJ_TARGET_MIPSR6 | ||
55 | #define JIT_F_CPUSTRING "\010MIPS32R6" | ||
56 | #else | ||
55 | #define JIT_F_CPUSTRING "\010MIPS32R2" | 57 | #define JIT_F_CPUSTRING "\010MIPS32R2" |
58 | #endif | ||
59 | #else | ||
60 | #if LJ_TARGET_MIPSR6 | ||
61 | #define JIT_F_CPUSTRING "\010MIPS64R6" | ||
62 | #else | ||
63 | #define JIT_F_CPUSTRING "\010MIPS64R2" | ||
64 | #endif | ||
65 | #endif | ||
56 | #else | 66 | #else |
57 | #define JIT_F_CPU_FIRST 0 | 67 | #define JIT_F_CPU_FIRST 0 |
58 | #define JIT_F_CPUSTRING "" | 68 | #define JIT_F_CPUSTRING "" |
@@ -100,6 +110,7 @@ | |||
100 | _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \ | 110 | _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \ |
101 | _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \ | 111 | _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \ |
102 | _(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \ | 112 | _(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \ |
113 | _(\011, minstitch, 0) /* Min. # of IR ins for a stitched trace. */ \ | ||
103 | \ | 114 | \ |
104 | _(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \ | 115 | _(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \ |
105 | _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \ | 116 | _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \ |
@@ -186,14 +197,26 @@ LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT); | |||
186 | #define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref)) | 197 | #define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref)) |
187 | #define SNAP_TR(slot, tr) \ | 198 | #define SNAP_TR(slot, tr) \ |
188 | (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK))) | 199 | (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK))) |
200 | #if !LJ_FR2 | ||
189 | #define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) | 201 | #define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) |
202 | #endif | ||
190 | #define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz)) | 203 | #define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz)) |
191 | #define snap_ref(sn) ((sn) & 0xffff) | 204 | #define snap_ref(sn) ((sn) & 0xffff) |
192 | #define snap_slot(sn) ((BCReg)((sn) >> 24)) | 205 | #define snap_slot(sn) ((BCReg)((sn) >> 24)) |
193 | #define snap_isframe(sn) ((sn) & SNAP_FRAME) | 206 | #define snap_isframe(sn) ((sn) & SNAP_FRAME) |
194 | #define snap_pc(sn) ((const BCIns *)(uintptr_t)(sn)) | ||
195 | #define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref)) | 207 | #define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref)) |
196 | 208 | ||
209 | static LJ_AINLINE const BCIns *snap_pc(SnapEntry *sn) | ||
210 | { | ||
211 | #if LJ_FR2 | ||
212 | uint64_t pcbase; | ||
213 | memcpy(&pcbase, sn, sizeof(uint64_t)); | ||
214 | return (const BCIns *)(pcbase >> 8); | ||
215 | #else | ||
216 | return (const BCIns *)(uintptr_t)*sn; | ||
217 | #endif | ||
218 | } | ||
219 | |||
197 | /* Snapshot and exit numbers. */ | 220 | /* Snapshot and exit numbers. */ |
198 | typedef uint32_t SnapNo; | 221 | typedef uint32_t SnapNo; |
199 | typedef uint32_t ExitNo; | 222 | typedef uint32_t ExitNo; |
@@ -211,7 +234,8 @@ typedef enum { | |||
211 | LJ_TRLINK_UPREC, /* Up-recursion. */ | 234 | LJ_TRLINK_UPREC, /* Up-recursion. */ |
212 | LJ_TRLINK_DOWNREC, /* Down-recursion. */ | 235 | LJ_TRLINK_DOWNREC, /* Down-recursion. */ |
213 | LJ_TRLINK_INTERP, /* Fallback to interpreter. */ | 236 | LJ_TRLINK_INTERP, /* Fallback to interpreter. */ |
214 | LJ_TRLINK_RETURN /* Return to interpreter. */ | 237 | LJ_TRLINK_RETURN, /* Return to interpreter. */ |
238 | LJ_TRLINK_STITCH /* Trace stitching. */ | ||
215 | } TraceLink; | 239 | } TraceLink; |
216 | 240 | ||
217 | /* Trace object. */ | 241 | /* Trace object. */ |
@@ -219,6 +243,9 @@ typedef struct GCtrace { | |||
219 | GCHeader; | 243 | GCHeader; |
220 | uint16_t nsnap; /* Number of snapshots. */ | 244 | uint16_t nsnap; /* Number of snapshots. */ |
221 | IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */ | 245 | IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */ |
246 | #if LJ_GC64 | ||
247 | uint32_t unused_gc64; | ||
248 | #endif | ||
222 | GCRef gclist; | 249 | GCRef gclist; |
223 | IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */ | 250 | IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */ |
224 | IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */ | 251 | IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */ |
@@ -294,6 +321,16 @@ typedef struct ScEvEntry { | |||
294 | uint8_t dir; /* Direction. 1: +, 0: -. */ | 321 | uint8_t dir; /* Direction. 1: +, 0: -. */ |
295 | } ScEvEntry; | 322 | } ScEvEntry; |
296 | 323 | ||
324 | /* Reverse bytecode map (IRRef -> PC). Only for selected instructions. */ | ||
325 | typedef struct RBCHashEntry { | ||
326 | MRef pc; /* Bytecode PC. */ | ||
327 | GCRef pt; /* Prototype. */ | ||
328 | IRRef ref; /* IR reference. */ | ||
329 | } RBCHashEntry; | ||
330 | |||
331 | /* Number of slots in the reverse bytecode hash table. Must be a power of 2. */ | ||
332 | #define RBCHASH_SLOTS 8 | ||
333 | |||
297 | /* 128 bit SIMD constants. */ | 334 | /* 128 bit SIMD constants. */ |
298 | enum { | 335 | enum { |
299 | LJ_KSIMD_ABS, | 336 | LJ_KSIMD_ABS, |
@@ -301,12 +338,51 @@ enum { | |||
301 | LJ_KSIMD__MAX | 338 | LJ_KSIMD__MAX |
302 | }; | 339 | }; |
303 | 340 | ||
341 | enum { | ||
342 | #if LJ_TARGET_X86ORX64 | ||
343 | LJ_K64_TOBIT, /* 2^52 + 2^51 */ | ||
344 | LJ_K64_2P64, /* 2^64 */ | ||
345 | LJ_K64_M2P64, /* -2^64 */ | ||
346 | #if LJ_32 | ||
347 | LJ_K64_M2P64_31, /* -2^64 or -2^31 */ | ||
348 | #else | ||
349 | LJ_K64_M2P64_31 = LJ_K64_M2P64, | ||
350 | #endif | ||
351 | #endif | ||
352 | #if LJ_TARGET_MIPS | ||
353 | LJ_K64_2P31, /* 2^31 */ | ||
354 | #if LJ_64 | ||
355 | LJ_K64_2P63, /* 2^63 */ | ||
356 | LJ_K64_M2P64, /* -2^64 */ | ||
357 | #endif | ||
358 | #endif | ||
359 | LJ_K64__MAX, | ||
360 | }; | ||
361 | |||
362 | enum { | ||
363 | #if LJ_TARGET_X86ORX64 | ||
364 | LJ_K32_M2P64_31, /* -2^64 or -2^31 */ | ||
365 | #endif | ||
366 | #if LJ_TARGET_PPC | ||
367 | LJ_K32_2P52_2P31, /* 2^52 + 2^31 */ | ||
368 | LJ_K32_2P52, /* 2^52 */ | ||
369 | #endif | ||
370 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS | ||
371 | LJ_K32_2P31, /* 2^31 */ | ||
372 | #endif | ||
373 | #if LJ_TARGET_MIPS64 | ||
374 | LJ_K32_2P63, /* 2^63 */ | ||
375 | LJ_K32_M2P64, /* -2^64 */ | ||
376 | #endif | ||
377 | LJ_K32__MAX | ||
378 | }; | ||
379 | |||
304 | /* Get 16 byte aligned pointer to SIMD constant. */ | 380 | /* Get 16 byte aligned pointer to SIMD constant. */ |
305 | #define LJ_KSIMD(J, n) \ | 381 | #define LJ_KSIMD(J, n) \ |
306 | ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) | 382 | ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) |
307 | 383 | ||
308 | /* Set/reset flag to activate the SPLIT pass for the current trace. */ | 384 | /* Set/reset flag to activate the SPLIT pass for the current trace. */ |
309 | #if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) | 385 | #if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) |
310 | #define lj_needsplit(J) (J->needsplit = 1) | 386 | #define lj_needsplit(J) (J->needsplit = 1) |
311 | #define lj_resetsplit(J) (J->needsplit = 0) | 387 | #define lj_resetsplit(J) (J->needsplit = 0) |
312 | #else | 388 | #else |
@@ -317,13 +393,14 @@ enum { | |||
317 | /* Fold state is used to fold instructions on-the-fly. */ | 393 | /* Fold state is used to fold instructions on-the-fly. */ |
318 | typedef struct FoldState { | 394 | typedef struct FoldState { |
319 | IRIns ins; /* Currently emitted instruction. */ | 395 | IRIns ins; /* Currently emitted instruction. */ |
320 | IRIns left; /* Instruction referenced by left operand. */ | 396 | IRIns left[2]; /* Instruction referenced by left operand. */ |
321 | IRIns right; /* Instruction referenced by right operand. */ | 397 | IRIns right[2]; /* Instruction referenced by right operand. */ |
322 | } FoldState; | 398 | } FoldState; |
323 | 399 | ||
324 | /* JIT compiler state. */ | 400 | /* JIT compiler state. */ |
325 | typedef struct jit_State { | 401 | typedef struct jit_State { |
326 | GCtrace cur; /* Current trace. */ | 402 | GCtrace cur; /* Current trace. */ |
403 | GCtrace *curfinal; /* Final address of current trace (set during asm). */ | ||
327 | 404 | ||
328 | lua_State *L; /* Current Lua state. */ | 405 | lua_State *L; /* Current Lua state. */ |
329 | const BCIns *pc; /* Current PC. */ | 406 | const BCIns *pc; /* Current PC. */ |
@@ -353,8 +430,9 @@ typedef struct jit_State { | |||
353 | int32_t framedepth; /* Current frame depth. */ | 430 | int32_t framedepth; /* Current frame depth. */ |
354 | int32_t retdepth; /* Return frame depth (count of RETF). */ | 431 | int32_t retdepth; /* Return frame depth (count of RETF). */ |
355 | 432 | ||
356 | MRef k64; /* Pointer to chained array of 64 bit constants. */ | ||
357 | TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */ | 433 | TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */ |
434 | TValue k64[LJ_K64__MAX]; /* Common 8 byte constants used by backends. */ | ||
435 | uint32_t k32[LJ_K32__MAX]; /* Ditto for 4 byte constants. */ | ||
358 | 436 | ||
359 | IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ | 437 | IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ |
360 | IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ | 438 | IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ |
@@ -367,13 +445,15 @@ typedef struct jit_State { | |||
367 | MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ | 445 | MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ |
368 | 446 | ||
369 | PostProc postproc; /* Required post-processing after execution. */ | 447 | PostProc postproc; /* Required post-processing after execution. */ |
370 | #if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) | 448 | #if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) |
371 | int needsplit; /* Need SPLIT pass. */ | 449 | uint8_t needsplit; /* Need SPLIT pass. */ |
372 | #endif | 450 | #endif |
451 | uint8_t retryrec; /* Retry recording. */ | ||
373 | 452 | ||
374 | GCRef *trace; /* Array of traces. */ | 453 | GCRef *trace; /* Array of traces. */ |
375 | TraceNo freetrace; /* Start of scan for next free trace. */ | 454 | TraceNo freetrace; /* Start of scan for next free trace. */ |
376 | MSize sizetrace; /* Size of trace array. */ | 455 | MSize sizetrace; /* Size of trace array. */ |
456 | IRRef1 ktrace; /* Reference to KGC with GCtrace. */ | ||
377 | 457 | ||
378 | IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */ | 458 | IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */ |
379 | TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */ | 459 | TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */ |
@@ -386,6 +466,10 @@ typedef struct jit_State { | |||
386 | uint32_t penaltyslot; /* Round-robin index into penalty slots. */ | 466 | uint32_t penaltyslot; /* Round-robin index into penalty slots. */ |
387 | uint32_t prngstate; /* PRNG state. */ | 467 | uint32_t prngstate; /* PRNG state. */ |
388 | 468 | ||
469 | #ifdef LUAJIT_ENABLE_TABLE_BUMP | ||
470 | RBCHashEntry rbchash[RBCHASH_SLOTS]; /* Reverse bytecode map. */ | ||
471 | #endif | ||
472 | |||
389 | BPropEntry bpropcache[BPROP_SLOTS]; /* Backpropagation cache slots. */ | 473 | BPropEntry bpropcache[BPROP_SLOTS]; /* Backpropagation cache slots. */ |
390 | uint32_t bpropslot; /* Round-robin index into bpropcache slots. */ | 474 | uint32_t bpropslot; /* Round-robin index into bpropcache slots. */ |
391 | 475 | ||
@@ -406,6 +490,12 @@ typedef struct jit_State { | |||
406 | size_t szallmcarea; /* Total size of all allocated mcode areas. */ | 490 | size_t szallmcarea; /* Total size of all allocated mcode areas. */ |
407 | 491 | ||
408 | TValue errinfo; /* Additional info element for trace errors. */ | 492 | TValue errinfo; /* Additional info element for trace errors. */ |
493 | |||
494 | #if LJ_HASPROFILE | ||
495 | GCproto *prev_pt; /* Previous prototype. */ | ||
496 | BCLine prev_line; /* Previous line. */ | ||
497 | int prof_mode; /* Profiling mode: 0, 'f', 'l'. */ | ||
498 | #endif | ||
409 | } | 499 | } |
410 | #if LJ_TARGET_ARM | 500 | #if LJ_TARGET_ARM |
411 | LJ_ALIGN(16) /* For DISPATCH-relative addresses in assembler part. */ | 501 | LJ_ALIGN(16) /* For DISPATCH-relative addresses in assembler part. */ |
diff --git a/src/lj_lex.c b/src/lj_lex.c index ca942583..05a2efc3 100644 --- a/src/lj_lex.c +++ b/src/lj_lex.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include "lj_obj.h" | 12 | #include "lj_obj.h" |
13 | #include "lj_gc.h" | 13 | #include "lj_gc.h" |
14 | #include "lj_err.h" | 14 | #include "lj_err.h" |
15 | #include "lj_buf.h" | ||
15 | #include "lj_str.h" | 16 | #include "lj_str.h" |
16 | #if LJ_HASFFI | 17 | #if LJ_HASFFI |
17 | #include "lj_tab.h" | 18 | #include "lj_tab.h" |
@@ -24,6 +25,7 @@ | |||
24 | #include "lj_parse.h" | 25 | #include "lj_parse.h" |
25 | #include "lj_char.h" | 26 | #include "lj_char.h" |
26 | #include "lj_strscan.h" | 27 | #include "lj_strscan.h" |
28 | #include "lj_strfmt.h" | ||
27 | 29 | ||
28 | /* Lua lexer token names. */ | 30 | /* Lua lexer token names. */ |
29 | static const char *const tokennames[] = { | 31 | static const char *const tokennames[] = { |
@@ -37,54 +39,54 @@ TKDEF(TKSTR1, TKSTR2) | |||
37 | 39 | ||
38 | /* -- Buffer handling ----------------------------------------------------- */ | 40 | /* -- Buffer handling ----------------------------------------------------- */ |
39 | 41 | ||
40 | #define char2int(c) ((int)(uint8_t)(c)) | 42 | #define LEX_EOF (-1) |
41 | #define next(ls) \ | 43 | #define lex_iseol(ls) (ls->c == '\n' || ls->c == '\r') |
42 | (ls->current = (ls->n--) > 0 ? char2int(*ls->p++) : fillbuf(ls)) | ||
43 | #define save_and_next(ls) (save(ls, ls->current), next(ls)) | ||
44 | #define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r') | ||
45 | #define END_OF_STREAM (-1) | ||
46 | 44 | ||
47 | static int fillbuf(LexState *ls) | 45 | /* Get more input from reader. */ |
46 | static LJ_NOINLINE LexChar lex_more(LexState *ls) | ||
48 | { | 47 | { |
49 | size_t sz; | 48 | size_t sz; |
50 | const char *buf = ls->rfunc(ls->L, ls->rdata, &sz); | 49 | const char *p = ls->rfunc(ls->L, ls->rdata, &sz); |
51 | if (buf == NULL || sz == 0) return END_OF_STREAM; | 50 | if (p == NULL || sz == 0) return LEX_EOF; |
52 | if (sz >= LJ_MAX_MEM) { | 51 | if (sz >= LJ_MAX_BUF) { |
53 | if (sz != ~(size_t)0) lj_err_mem(ls->L); | 52 | if (sz != ~(size_t)0) lj_err_mem(ls->L); |
53 | sz = ~(uintptr_t)0 - (uintptr_t)p; | ||
54 | if (sz >= LJ_MAX_BUF) sz = LJ_MAX_BUF-1; | ||
54 | ls->endmark = 1; | 55 | ls->endmark = 1; |
55 | } | 56 | } |
56 | ls->n = (MSize)sz - 1; | 57 | ls->pe = p + sz; |
57 | ls->p = buf; | 58 | ls->p = p + 1; |
58 | return char2int(*(ls->p++)); | 59 | return (LexChar)(uint8_t)p[0]; |
59 | } | 60 | } |
60 | 61 | ||
61 | static LJ_NOINLINE void save_grow(LexState *ls, int c) | 62 | /* Get next character. */ |
63 | static LJ_AINLINE LexChar lex_next(LexState *ls) | ||
62 | { | 64 | { |
63 | MSize newsize; | 65 | return (ls->c = ls->p < ls->pe ? (LexChar)(uint8_t)*ls->p++ : lex_more(ls)); |
64 | if (ls->sb.sz >= LJ_MAX_STR/2) | ||
65 | lj_lex_error(ls, 0, LJ_ERR_XELEM); | ||
66 | newsize = ls->sb.sz * 2; | ||
67 | lj_str_resizebuf(ls->L, &ls->sb, newsize); | ||
68 | ls->sb.buf[ls->sb.n++] = (char)c; | ||
69 | } | 66 | } |
70 | 67 | ||
71 | static LJ_AINLINE void save(LexState *ls, int c) | 68 | /* Save character. */ |
69 | static LJ_AINLINE void lex_save(LexState *ls, LexChar c) | ||
72 | { | 70 | { |
73 | if (LJ_UNLIKELY(ls->sb.n + 1 > ls->sb.sz)) | 71 | lj_buf_putb(&ls->sb, c); |
74 | save_grow(ls, c); | ||
75 | else | ||
76 | ls->sb.buf[ls->sb.n++] = (char)c; | ||
77 | } | 72 | } |
78 | 73 | ||
79 | static void inclinenumber(LexState *ls) | 74 | /* Save previous character and get next character. */ |
75 | static LJ_AINLINE LexChar lex_savenext(LexState *ls) | ||
80 | { | 76 | { |
81 | int old = ls->current; | 77 | lex_save(ls, ls->c); |
82 | lua_assert(currIsNewline(ls)); | 78 | return lex_next(ls); |
83 | next(ls); /* skip `\n' or `\r' */ | 79 | } |
84 | if (currIsNewline(ls) && ls->current != old) | 80 | |
85 | next(ls); /* skip `\n\r' or `\r\n' */ | 81 | /* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */ |
82 | static void lex_newline(LexState *ls) | ||
83 | { | ||
84 | LexChar old = ls->c; | ||
85 | lua_assert(lex_iseol(ls)); | ||
86 | lex_next(ls); /* Skip "\n" or "\r". */ | ||
87 | if (lex_iseol(ls) && ls->c != old) lex_next(ls); /* Skip "\n\r" or "\r\n". */ | ||
86 | if (++ls->linenumber >= LJ_MAX_LINE) | 88 | if (++ls->linenumber >= LJ_MAX_LINE) |
87 | lj_lex_error(ls, ls->token, LJ_ERR_XLINES); | 89 | lj_lex_error(ls, ls->tok, LJ_ERR_XLINES); |
88 | } | 90 | } |
89 | 91 | ||
90 | /* -- Scanner for terminals ----------------------------------------------- */ | 92 | /* -- Scanner for terminals ----------------------------------------------- */ |
@@ -93,19 +95,17 @@ static void inclinenumber(LexState *ls) | |||
93 | static void lex_number(LexState *ls, TValue *tv) | 95 | static void lex_number(LexState *ls, TValue *tv) |
94 | { | 96 | { |
95 | StrScanFmt fmt; | 97 | StrScanFmt fmt; |
96 | int c, xp = 'e'; | 98 | LexChar c, xp = 'e'; |
97 | lua_assert(lj_char_isdigit(ls->current)); | 99 | lua_assert(lj_char_isdigit(ls->c)); |
98 | if ((c = ls->current) == '0') { | 100 | if ((c = ls->c) == '0' && (lex_savenext(ls) | 0x20) == 'x') |
99 | save_and_next(ls); | 101 | xp = 'p'; |
100 | if ((ls->current | 0x20) == 'x') xp = 'p'; | 102 | while (lj_char_isident(ls->c) || ls->c == '.' || |
101 | } | 103 | ((ls->c == '-' || ls->c == '+') && (c | 0x20) == xp)) { |
102 | while (lj_char_isident(ls->current) || ls->current == '.' || | 104 | c = ls->c; |
103 | ((ls->current == '-' || ls->current == '+') && (c | 0x20) == xp)) { | 105 | lex_savenext(ls); |
104 | c = ls->current; | ||
105 | save_and_next(ls); | ||
106 | } | 106 | } |
107 | save(ls, '\0'); | 107 | lex_save(ls, '\0'); |
108 | fmt = lj_strscan_scan((const uint8_t *)ls->sb.buf, tv, | 108 | fmt = lj_strscan_scan((const uint8_t *)sbufB(&ls->sb), tv, |
109 | (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) | | 109 | (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) | |
110 | (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0)); | 110 | (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0)); |
111 | if (LJ_DUALNUM && fmt == STRSCAN_INT) { | 111 | if (LJ_DUALNUM && fmt == STRSCAN_INT) { |
@@ -138,60 +138,60 @@ static void lex_number(LexState *ls, TValue *tv) | |||
138 | } | 138 | } |
139 | } | 139 | } |
140 | 140 | ||
141 | static int skip_sep(LexState *ls) | 141 | /* Skip equal signs for "[=...=[" and "]=...=]" and return their count. */ |
142 | static int lex_skipeq(LexState *ls) | ||
142 | { | 143 | { |
143 | int count = 0; | 144 | int count = 0; |
144 | int s = ls->current; | 145 | LexChar s = ls->c; |
145 | lua_assert(s == '[' || s == ']'); | 146 | lua_assert(s == '[' || s == ']'); |
146 | save_and_next(ls); | 147 | while (lex_savenext(ls) == '=' && count < 0x20000000) |
147 | while (ls->current == '=' && count < 0x20000000) { | ||
148 | save_and_next(ls); | ||
149 | count++; | 148 | count++; |
150 | } | 149 | return (ls->c == s) ? count : (-count) - 1; |
151 | return (ls->current == s) ? count : (-count) - 1; | ||
152 | } | 150 | } |
153 | 151 | ||
154 | static void read_long_string(LexState *ls, TValue *tv, int sep) | 152 | /* Parse a long string or long comment (tv set to NULL). */ |
153 | static void lex_longstring(LexState *ls, TValue *tv, int sep) | ||
155 | { | 154 | { |
156 | save_and_next(ls); /* skip 2nd `[' */ | 155 | lex_savenext(ls); /* Skip second '['. */ |
157 | if (currIsNewline(ls)) /* string starts with a newline? */ | 156 | if (lex_iseol(ls)) /* Skip initial newline. */ |
158 | inclinenumber(ls); /* skip it */ | 157 | lex_newline(ls); |
159 | for (;;) { | 158 | for (;;) { |
160 | switch (ls->current) { | 159 | switch (ls->c) { |
161 | case END_OF_STREAM: | 160 | case LEX_EOF: |
162 | lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM); | 161 | lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM); |
163 | break; | 162 | break; |
164 | case ']': | 163 | case ']': |
165 | if (skip_sep(ls) == sep) { | 164 | if (lex_skipeq(ls) == sep) { |
166 | save_and_next(ls); /* skip 2nd `]' */ | 165 | lex_savenext(ls); /* Skip second ']'. */ |
167 | goto endloop; | 166 | goto endloop; |
168 | } | 167 | } |
169 | break; | 168 | break; |
170 | case '\n': | 169 | case '\n': |
171 | case '\r': | 170 | case '\r': |
172 | save(ls, '\n'); | 171 | lex_save(ls, '\n'); |
173 | inclinenumber(ls); | 172 | lex_newline(ls); |
174 | if (!tv) lj_str_resetbuf(&ls->sb); /* avoid wasting space */ | 173 | if (!tv) lj_buf_reset(&ls->sb); /* Don't waste space for comments. */ |
175 | break; | 174 | break; |
176 | default: | 175 | default: |
177 | if (tv) save_and_next(ls); | 176 | lex_savenext(ls); |
178 | else next(ls); | ||
179 | break; | 177 | break; |
180 | } | 178 | } |
181 | } endloop: | 179 | } endloop: |
182 | if (tv) { | 180 | if (tv) { |
183 | GCstr *str = lj_parse_keepstr(ls, ls->sb.buf + (2 + (MSize)sep), | 181 | GCstr *str = lj_parse_keepstr(ls, sbufB(&ls->sb) + (2 + (MSize)sep), |
184 | ls->sb.n - 2*(2 + (MSize)sep)); | 182 | sbuflen(&ls->sb) - 2*(2 + (MSize)sep)); |
185 | setstrV(ls->L, tv, str); | 183 | setstrV(ls->L, tv, str); |
186 | } | 184 | } |
187 | } | 185 | } |
188 | 186 | ||
189 | static void read_string(LexState *ls, int delim, TValue *tv) | 187 | /* Parse a string. */ |
188 | static void lex_string(LexState *ls, TValue *tv) | ||
190 | { | 189 | { |
191 | save_and_next(ls); | 190 | LexChar delim = ls->c; /* Delimiter is '\'' or '"'. */ |
192 | while (ls->current != delim) { | 191 | lex_savenext(ls); |
193 | switch (ls->current) { | 192 | while (ls->c != delim) { |
194 | case END_OF_STREAM: | 193 | switch (ls->c) { |
194 | case LEX_EOF: | ||
195 | lj_lex_error(ls, TK_eof, LJ_ERR_XSTR); | 195 | lj_lex_error(ls, TK_eof, LJ_ERR_XSTR); |
196 | continue; | 196 | continue; |
197 | case '\n': | 197 | case '\n': |
@@ -199,7 +199,7 @@ static void read_string(LexState *ls, int delim, TValue *tv) | |||
199 | lj_lex_error(ls, TK_string, LJ_ERR_XSTR); | 199 | lj_lex_error(ls, TK_string, LJ_ERR_XSTR); |
200 | continue; | 200 | continue; |
201 | case '\\': { | 201 | case '\\': { |
202 | int c = next(ls); /* Skip the '\\'. */ | 202 | LexChar c = lex_next(ls); /* Skip the '\\'. */ |
203 | switch (c) { | 203 | switch (c) { |
204 | case 'a': c = '\a'; break; | 204 | case 'a': c = '\a'; break; |
205 | case 'b': c = '\b'; break; | 205 | case 'b': c = '\b'; break; |
@@ -209,111 +209,139 @@ static void read_string(LexState *ls, int delim, TValue *tv) | |||
209 | case 't': c = '\t'; break; | 209 | case 't': c = '\t'; break; |
210 | case 'v': c = '\v'; break; | 210 | case 'v': c = '\v'; break; |
211 | case 'x': /* Hexadecimal escape '\xXX'. */ | 211 | case 'x': /* Hexadecimal escape '\xXX'. */ |
212 | c = (next(ls) & 15u) << 4; | 212 | c = (lex_next(ls) & 15u) << 4; |
213 | if (!lj_char_isdigit(ls->current)) { | 213 | if (!lj_char_isdigit(ls->c)) { |
214 | if (!lj_char_isxdigit(ls->current)) goto err_xesc; | 214 | if (!lj_char_isxdigit(ls->c)) goto err_xesc; |
215 | c += 9 << 4; | 215 | c += 9 << 4; |
216 | } | 216 | } |
217 | c += (next(ls) & 15u); | 217 | c += (lex_next(ls) & 15u); |
218 | if (!lj_char_isdigit(ls->current)) { | 218 | if (!lj_char_isdigit(ls->c)) { |
219 | if (!lj_char_isxdigit(ls->current)) goto err_xesc; | 219 | if (!lj_char_isxdigit(ls->c)) goto err_xesc; |
220 | c += 9; | 220 | c += 9; |
221 | } | 221 | } |
222 | break; | 222 | break; |
223 | case 'u': /* Unicode escape '\u{XX...}'. */ | ||
224 | if (lex_next(ls) != '{') goto err_xesc; | ||
225 | lex_next(ls); | ||
226 | c = 0; | ||
227 | do { | ||
228 | c = (c << 4) | (ls->c & 15u); | ||
229 | if (!lj_char_isdigit(ls->c)) { | ||
230 | if (!lj_char_isxdigit(ls->c)) goto err_xesc; | ||
231 | c += 9; | ||
232 | } | ||
233 | if (c >= 0x110000) goto err_xesc; /* Out of Unicode range. */ | ||
234 | } while (lex_next(ls) != '}'); | ||
235 | if (c < 0x800) { | ||
236 | if (c < 0x80) break; | ||
237 | lex_save(ls, 0xc0 | (c >> 6)); | ||
238 | } else { | ||
239 | if (c >= 0x10000) { | ||
240 | lex_save(ls, 0xf0 | (c >> 18)); | ||
241 | lex_save(ls, 0x80 | ((c >> 12) & 0x3f)); | ||
242 | } else { | ||
243 | if (c >= 0xd800 && c < 0xe000) goto err_xesc; /* No surrogates. */ | ||
244 | lex_save(ls, 0xe0 | (c >> 12)); | ||
245 | } | ||
246 | lex_save(ls, 0x80 | ((c >> 6) & 0x3f)); | ||
247 | } | ||
248 | c = 0x80 | (c & 0x3f); | ||
249 | break; | ||
223 | case 'z': /* Skip whitespace. */ | 250 | case 'z': /* Skip whitespace. */ |
224 | next(ls); | 251 | lex_next(ls); |
225 | while (lj_char_isspace(ls->current)) | 252 | while (lj_char_isspace(ls->c)) |
226 | if (currIsNewline(ls)) inclinenumber(ls); else next(ls); | 253 | if (lex_iseol(ls)) lex_newline(ls); else lex_next(ls); |
227 | continue; | 254 | continue; |
228 | case '\n': case '\r': save(ls, '\n'); inclinenumber(ls); continue; | 255 | case '\n': case '\r': lex_save(ls, '\n'); lex_newline(ls); continue; |
229 | case '\\': case '\"': case '\'': break; | 256 | case '\\': case '\"': case '\'': break; |
230 | case END_OF_STREAM: continue; | 257 | case LEX_EOF: continue; |
231 | default: | 258 | default: |
232 | if (!lj_char_isdigit(c)) | 259 | if (!lj_char_isdigit(c)) |
233 | goto err_xesc; | 260 | goto err_xesc; |
234 | c -= '0'; /* Decimal escape '\ddd'. */ | 261 | c -= '0'; /* Decimal escape '\ddd'. */ |
235 | if (lj_char_isdigit(next(ls))) { | 262 | if (lj_char_isdigit(lex_next(ls))) { |
236 | c = c*10 + (ls->current - '0'); | 263 | c = c*10 + (ls->c - '0'); |
237 | if (lj_char_isdigit(next(ls))) { | 264 | if (lj_char_isdigit(lex_next(ls))) { |
238 | c = c*10 + (ls->current - '0'); | 265 | c = c*10 + (ls->c - '0'); |
239 | if (c > 255) { | 266 | if (c > 255) { |
240 | err_xesc: | 267 | err_xesc: |
241 | lj_lex_error(ls, TK_string, LJ_ERR_XESC); | 268 | lj_lex_error(ls, TK_string, LJ_ERR_XESC); |
242 | } | 269 | } |
243 | next(ls); | 270 | lex_next(ls); |
244 | } | 271 | } |
245 | } | 272 | } |
246 | save(ls, c); | 273 | lex_save(ls, c); |
247 | continue; | 274 | continue; |
248 | } | 275 | } |
249 | save(ls, c); | 276 | lex_save(ls, c); |
250 | next(ls); | 277 | lex_next(ls); |
251 | continue; | 278 | continue; |
252 | } | 279 | } |
253 | default: | 280 | default: |
254 | save_and_next(ls); | 281 | lex_savenext(ls); |
255 | break; | 282 | break; |
256 | } | 283 | } |
257 | } | 284 | } |
258 | save_and_next(ls); /* skip delimiter */ | 285 | lex_savenext(ls); /* Skip trailing delimiter. */ |
259 | setstrV(ls->L, tv, lj_parse_keepstr(ls, ls->sb.buf + 1, ls->sb.n - 2)); | 286 | setstrV(ls->L, tv, |
287 | lj_parse_keepstr(ls, sbufB(&ls->sb)+1, sbuflen(&ls->sb)-2)); | ||
260 | } | 288 | } |
261 | 289 | ||
262 | /* -- Main lexical scanner ------------------------------------------------ */ | 290 | /* -- Main lexical scanner ------------------------------------------------ */ |
263 | 291 | ||
264 | static int llex(LexState *ls, TValue *tv) | 292 | /* Get next lexical token. */ |
293 | static LexToken lex_scan(LexState *ls, TValue *tv) | ||
265 | { | 294 | { |
266 | lj_str_resetbuf(&ls->sb); | 295 | lj_buf_reset(&ls->sb); |
267 | for (;;) { | 296 | for (;;) { |
268 | if (lj_char_isident(ls->current)) { | 297 | if (lj_char_isident(ls->c)) { |
269 | GCstr *s; | 298 | GCstr *s; |
270 | if (lj_char_isdigit(ls->current)) { /* Numeric literal. */ | 299 | if (lj_char_isdigit(ls->c)) { /* Numeric literal. */ |
271 | lex_number(ls, tv); | 300 | lex_number(ls, tv); |
272 | return TK_number; | 301 | return TK_number; |
273 | } | 302 | } |
274 | /* Identifier or reserved word. */ | 303 | /* Identifier or reserved word. */ |
275 | do { | 304 | do { |
276 | save_and_next(ls); | 305 | lex_savenext(ls); |
277 | } while (lj_char_isident(ls->current)); | 306 | } while (lj_char_isident(ls->c)); |
278 | s = lj_parse_keepstr(ls, ls->sb.buf, ls->sb.n); | 307 | s = lj_parse_keepstr(ls, sbufB(&ls->sb), sbuflen(&ls->sb)); |
279 | setstrV(ls->L, tv, s); | 308 | setstrV(ls->L, tv, s); |
280 | if (s->reserved > 0) /* Reserved word? */ | 309 | if (s->reserved > 0) /* Reserved word? */ |
281 | return TK_OFS + s->reserved; | 310 | return TK_OFS + s->reserved; |
282 | return TK_name; | 311 | return TK_name; |
283 | } | 312 | } |
284 | switch (ls->current) { | 313 | switch (ls->c) { |
285 | case '\n': | 314 | case '\n': |
286 | case '\r': | 315 | case '\r': |
287 | inclinenumber(ls); | 316 | lex_newline(ls); |
288 | continue; | 317 | continue; |
289 | case ' ': | 318 | case ' ': |
290 | case '\t': | 319 | case '\t': |
291 | case '\v': | 320 | case '\v': |
292 | case '\f': | 321 | case '\f': |
293 | next(ls); | 322 | lex_next(ls); |
294 | continue; | 323 | continue; |
295 | case '-': | 324 | case '-': |
296 | next(ls); | 325 | lex_next(ls); |
297 | if (ls->current != '-') return '-'; | 326 | if (ls->c != '-') return '-'; |
298 | /* else is a comment */ | 327 | lex_next(ls); |
299 | next(ls); | 328 | if (ls->c == '[') { /* Long comment "--[=*[...]=*]". */ |
300 | if (ls->current == '[') { | 329 | int sep = lex_skipeq(ls); |
301 | int sep = skip_sep(ls); | 330 | lj_buf_reset(&ls->sb); /* `lex_skipeq' may dirty the buffer */ |
302 | lj_str_resetbuf(&ls->sb); /* `skip_sep' may dirty the buffer */ | ||
303 | if (sep >= 0) { | 331 | if (sep >= 0) { |
304 | read_long_string(ls, NULL, sep); /* long comment */ | 332 | lex_longstring(ls, NULL, sep); |
305 | lj_str_resetbuf(&ls->sb); | 333 | lj_buf_reset(&ls->sb); |
306 | continue; | 334 | continue; |
307 | } | 335 | } |
308 | } | 336 | } |
309 | /* else short comment */ | 337 | /* Short comment "--.*\n". */ |
310 | while (!currIsNewline(ls) && ls->current != END_OF_STREAM) | 338 | while (!lex_iseol(ls) && ls->c != LEX_EOF) |
311 | next(ls); | 339 | lex_next(ls); |
312 | continue; | 340 | continue; |
313 | case '[': { | 341 | case '[': { |
314 | int sep = skip_sep(ls); | 342 | int sep = lex_skipeq(ls); |
315 | if (sep >= 0) { | 343 | if (sep >= 0) { |
316 | read_long_string(ls, tv, sep); | 344 | lex_longstring(ls, tv, sep); |
317 | return TK_string; | 345 | return TK_string; |
318 | } else if (sep == -1) { | 346 | } else if (sep == -1) { |
319 | return '['; | 347 | return '['; |
@@ -323,44 +351,43 @@ static int llex(LexState *ls, TValue *tv) | |||
323 | } | 351 | } |
324 | } | 352 | } |
325 | case '=': | 353 | case '=': |
326 | next(ls); | 354 | lex_next(ls); |
327 | if (ls->current != '=') return '='; else { next(ls); return TK_eq; } | 355 | if (ls->c != '=') return '='; else { lex_next(ls); return TK_eq; } |
328 | case '<': | 356 | case '<': |
329 | next(ls); | 357 | lex_next(ls); |
330 | if (ls->current != '=') return '<'; else { next(ls); return TK_le; } | 358 | if (ls->c != '=') return '<'; else { lex_next(ls); return TK_le; } |
331 | case '>': | 359 | case '>': |
332 | next(ls); | 360 | lex_next(ls); |
333 | if (ls->current != '=') return '>'; else { next(ls); return TK_ge; } | 361 | if (ls->c != '=') return '>'; else { lex_next(ls); return TK_ge; } |
334 | case '~': | 362 | case '~': |
335 | next(ls); | 363 | lex_next(ls); |
336 | if (ls->current != '=') return '~'; else { next(ls); return TK_ne; } | 364 | if (ls->c != '=') return '~'; else { lex_next(ls); return TK_ne; } |
337 | case ':': | 365 | case ':': |
338 | next(ls); | 366 | lex_next(ls); |
339 | if (ls->current != ':') return ':'; else { next(ls); return TK_label; } | 367 | if (ls->c != ':') return ':'; else { lex_next(ls); return TK_label; } |
340 | case '"': | 368 | case '"': |
341 | case '\'': | 369 | case '\'': |
342 | read_string(ls, ls->current, tv); | 370 | lex_string(ls, tv); |
343 | return TK_string; | 371 | return TK_string; |
344 | case '.': | 372 | case '.': |
345 | save_and_next(ls); | 373 | if (lex_savenext(ls) == '.') { |
346 | if (ls->current == '.') { | 374 | lex_next(ls); |
347 | next(ls); | 375 | if (ls->c == '.') { |
348 | if (ls->current == '.') { | 376 | lex_next(ls); |
349 | next(ls); | ||
350 | return TK_dots; /* ... */ | 377 | return TK_dots; /* ... */ |
351 | } | 378 | } |
352 | return TK_concat; /* .. */ | 379 | return TK_concat; /* .. */ |
353 | } else if (!lj_char_isdigit(ls->current)) { | 380 | } else if (!lj_char_isdigit(ls->c)) { |
354 | return '.'; | 381 | return '.'; |
355 | } else { | 382 | } else { |
356 | lex_number(ls, tv); | 383 | lex_number(ls, tv); |
357 | return TK_number; | 384 | return TK_number; |
358 | } | 385 | } |
359 | case END_OF_STREAM: | 386 | case LEX_EOF: |
360 | return TK_eof; | 387 | return TK_eof; |
361 | default: { | 388 | default: { |
362 | int c = ls->current; | 389 | LexChar c = ls->c; |
363 | next(ls); | 390 | lex_next(ls); |
364 | return c; /* Single-char tokens (+ - / ...). */ | 391 | return c; /* Single-char tokens (+ - / ...). */ |
365 | } | 392 | } |
366 | } | 393 | } |
@@ -375,36 +402,33 @@ int lj_lex_setup(lua_State *L, LexState *ls) | |||
375 | int header = 0; | 402 | int header = 0; |
376 | ls->L = L; | 403 | ls->L = L; |
377 | ls->fs = NULL; | 404 | ls->fs = NULL; |
378 | ls->n = 0; | 405 | ls->pe = ls->p = NULL; |
379 | ls->p = NULL; | ||
380 | ls->vstack = NULL; | 406 | ls->vstack = NULL; |
381 | ls->sizevstack = 0; | 407 | ls->sizevstack = 0; |
382 | ls->vtop = 0; | 408 | ls->vtop = 0; |
383 | ls->bcstack = NULL; | 409 | ls->bcstack = NULL; |
384 | ls->sizebcstack = 0; | 410 | ls->sizebcstack = 0; |
385 | ls->token = 0; | 411 | ls->tok = 0; |
386 | ls->lookahead = TK_eof; /* No look-ahead token. */ | 412 | ls->lookahead = TK_eof; /* No look-ahead token. */ |
387 | ls->linenumber = 1; | 413 | ls->linenumber = 1; |
388 | ls->lastline = 1; | 414 | ls->lastline = 1; |
389 | ls->endmark = 0; | 415 | ls->endmark = 0; |
390 | lj_str_resizebuf(ls->L, &ls->sb, LJ_MIN_SBUF); | 416 | lex_next(ls); /* Read-ahead first char. */ |
391 | next(ls); /* Read-ahead first char. */ | 417 | if (ls->c == 0xef && ls->p + 2 <= ls->pe && (uint8_t)ls->p[0] == 0xbb && |
392 | if (ls->current == 0xef && ls->n >= 2 && char2int(ls->p[0]) == 0xbb && | 418 | (uint8_t)ls->p[1] == 0xbf) { /* Skip UTF-8 BOM (if buffered). */ |
393 | char2int(ls->p[1]) == 0xbf) { /* Skip UTF-8 BOM (if buffered). */ | ||
394 | ls->n -= 2; | ||
395 | ls->p += 2; | 419 | ls->p += 2; |
396 | next(ls); | 420 | lex_next(ls); |
397 | header = 1; | 421 | header = 1; |
398 | } | 422 | } |
399 | if (ls->current == '#') { /* Skip POSIX #! header line. */ | 423 | if (ls->c == '#') { /* Skip POSIX #! header line. */ |
400 | do { | 424 | do { |
401 | next(ls); | 425 | lex_next(ls); |
402 | if (ls->current == END_OF_STREAM) return 0; | 426 | if (ls->c == LEX_EOF) return 0; |
403 | } while (!currIsNewline(ls)); | 427 | } while (!lex_iseol(ls)); |
404 | inclinenumber(ls); | 428 | lex_newline(ls); |
405 | header = 1; | 429 | header = 1; |
406 | } | 430 | } |
407 | if (ls->current == LUA_SIGNATURE[0]) { /* Bytecode dump. */ | 431 | if (ls->c == LUA_SIGNATURE[0]) { /* Bytecode dump. */ |
408 | if (header) { | 432 | if (header) { |
409 | /* | 433 | /* |
410 | ** Loading bytecode with an extra header is disabled for security | 434 | ** Loading bytecode with an extra header is disabled for security |
@@ -426,55 +450,60 @@ void lj_lex_cleanup(lua_State *L, LexState *ls) | |||
426 | global_State *g = G(L); | 450 | global_State *g = G(L); |
427 | lj_mem_freevec(g, ls->bcstack, ls->sizebcstack, BCInsLine); | 451 | lj_mem_freevec(g, ls->bcstack, ls->sizebcstack, BCInsLine); |
428 | lj_mem_freevec(g, ls->vstack, ls->sizevstack, VarInfo); | 452 | lj_mem_freevec(g, ls->vstack, ls->sizevstack, VarInfo); |
429 | lj_str_freebuf(g, &ls->sb); | 453 | lj_buf_free(g, &ls->sb); |
430 | } | 454 | } |
431 | 455 | ||
456 | /* Return next lexical token. */ | ||
432 | void lj_lex_next(LexState *ls) | 457 | void lj_lex_next(LexState *ls) |
433 | { | 458 | { |
434 | ls->lastline = ls->linenumber; | 459 | ls->lastline = ls->linenumber; |
435 | if (LJ_LIKELY(ls->lookahead == TK_eof)) { /* No lookahead token? */ | 460 | if (LJ_LIKELY(ls->lookahead == TK_eof)) { /* No lookahead token? */ |
436 | ls->token = llex(ls, &ls->tokenval); /* Get next token. */ | 461 | ls->tok = lex_scan(ls, &ls->tokval); /* Get next token. */ |
437 | } else { /* Otherwise return lookahead token. */ | 462 | } else { /* Otherwise return lookahead token. */ |
438 | ls->token = ls->lookahead; | 463 | ls->tok = ls->lookahead; |
439 | ls->lookahead = TK_eof; | 464 | ls->lookahead = TK_eof; |
440 | ls->tokenval = ls->lookaheadval; | 465 | ls->tokval = ls->lookaheadval; |
441 | } | 466 | } |
442 | } | 467 | } |
443 | 468 | ||
469 | /* Look ahead for the next token. */ | ||
444 | LexToken lj_lex_lookahead(LexState *ls) | 470 | LexToken lj_lex_lookahead(LexState *ls) |
445 | { | 471 | { |
446 | lua_assert(ls->lookahead == TK_eof); | 472 | lua_assert(ls->lookahead == TK_eof); |
447 | ls->lookahead = llex(ls, &ls->lookaheadval); | 473 | ls->lookahead = lex_scan(ls, &ls->lookaheadval); |
448 | return ls->lookahead; | 474 | return ls->lookahead; |
449 | } | 475 | } |
450 | 476 | ||
451 | const char *lj_lex_token2str(LexState *ls, LexToken token) | 477 | /* Convert token to string. */ |
478 | const char *lj_lex_token2str(LexState *ls, LexToken tok) | ||
452 | { | 479 | { |
453 | if (token > TK_OFS) | 480 | if (tok > TK_OFS) |
454 | return tokennames[token-TK_OFS-1]; | 481 | return tokennames[tok-TK_OFS-1]; |
455 | else if (!lj_char_iscntrl(token)) | 482 | else if (!lj_char_iscntrl(tok)) |
456 | return lj_str_pushf(ls->L, "%c", token); | 483 | return lj_strfmt_pushf(ls->L, "%c", tok); |
457 | else | 484 | else |
458 | return lj_str_pushf(ls->L, "char(%d)", token); | 485 | return lj_strfmt_pushf(ls->L, "char(%d)", tok); |
459 | } | 486 | } |
460 | 487 | ||
461 | void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...) | 488 | /* Lexer error. */ |
489 | void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...) | ||
462 | { | 490 | { |
463 | const char *tok; | 491 | const char *tokstr; |
464 | va_list argp; | 492 | va_list argp; |
465 | if (token == 0) { | 493 | if (tok == 0) { |
466 | tok = NULL; | 494 | tokstr = NULL; |
467 | } else if (token == TK_name || token == TK_string || token == TK_number) { | 495 | } else if (tok == TK_name || tok == TK_string || tok == TK_number) { |
468 | save(ls, '\0'); | 496 | lex_save(ls, '\0'); |
469 | tok = ls->sb.buf; | 497 | tokstr = sbufB(&ls->sb); |
470 | } else { | 498 | } else { |
471 | tok = lj_lex_token2str(ls, token); | 499 | tokstr = lj_lex_token2str(ls, tok); |
472 | } | 500 | } |
473 | va_start(argp, em); | 501 | va_start(argp, em); |
474 | lj_err_lex(ls->L, ls->chunkname, tok, ls->linenumber, em, argp); | 502 | lj_err_lex(ls->L, ls->chunkname, tokstr, ls->linenumber, em, argp); |
475 | va_end(argp); | 503 | va_end(argp); |
476 | } | 504 | } |
477 | 505 | ||
506 | /* Initialize strings for reserved words. */ | ||
478 | void lj_lex_init(lua_State *L) | 507 | void lj_lex_init(lua_State *L) |
479 | { | 508 | { |
480 | uint32_t i; | 509 | uint32_t i; |
diff --git a/src/lj_lex.h b/src/lj_lex.h index b1bc4876..8665aa2a 100644 --- a/src/lj_lex.h +++ b/src/lj_lex.h | |||
@@ -30,7 +30,8 @@ TKDEF(TKENUM1, TKENUM2) | |||
30 | TK_RESERVED = TK_while - TK_OFS | 30 | TK_RESERVED = TK_while - TK_OFS |
31 | }; | 31 | }; |
32 | 32 | ||
33 | typedef int LexToken; | 33 | typedef int LexChar; /* Lexical character. Unsigned ext. from char. */ |
34 | typedef int LexToken; /* Lexical token. */ | ||
34 | 35 | ||
35 | /* Combined bytecode ins/line. Only used during bytecode generation. */ | 36 | /* Combined bytecode ins/line. Only used during bytecode generation. */ |
36 | typedef struct BCInsLine { | 37 | typedef struct BCInsLine { |
@@ -51,13 +52,13 @@ typedef struct VarInfo { | |||
51 | typedef struct LexState { | 52 | typedef struct LexState { |
52 | struct FuncState *fs; /* Current FuncState. Defined in lj_parse.c. */ | 53 | struct FuncState *fs; /* Current FuncState. Defined in lj_parse.c. */ |
53 | struct lua_State *L; /* Lua state. */ | 54 | struct lua_State *L; /* Lua state. */ |
54 | TValue tokenval; /* Current token value. */ | 55 | TValue tokval; /* Current token value. */ |
55 | TValue lookaheadval; /* Lookahead token value. */ | 56 | TValue lookaheadval; /* Lookahead token value. */ |
56 | int current; /* Current character (charint). */ | ||
57 | LexToken token; /* Current token. */ | ||
58 | LexToken lookahead; /* Lookahead token. */ | ||
59 | MSize n; /* Bytes left in input buffer. */ | ||
60 | const char *p; /* Current position in input buffer. */ | 57 | const char *p; /* Current position in input buffer. */ |
58 | const char *pe; /* End of input buffer. */ | ||
59 | LexChar c; /* Current character. */ | ||
60 | LexToken tok; /* Current token. */ | ||
61 | LexToken lookahead; /* Lookahead token. */ | ||
61 | SBuf sb; /* String buffer for tokens. */ | 62 | SBuf sb; /* String buffer for tokens. */ |
62 | lua_Reader rfunc; /* Reader callback. */ | 63 | lua_Reader rfunc; /* Reader callback. */ |
63 | void *rdata; /* Reader callback data. */ | 64 | void *rdata; /* Reader callback data. */ |
@@ -79,8 +80,8 @@ LJ_FUNC int lj_lex_setup(lua_State *L, LexState *ls); | |||
79 | LJ_FUNC void lj_lex_cleanup(lua_State *L, LexState *ls); | 80 | LJ_FUNC void lj_lex_cleanup(lua_State *L, LexState *ls); |
80 | LJ_FUNC void lj_lex_next(LexState *ls); | 81 | LJ_FUNC void lj_lex_next(LexState *ls); |
81 | LJ_FUNC LexToken lj_lex_lookahead(LexState *ls); | 82 | LJ_FUNC LexToken lj_lex_lookahead(LexState *ls); |
82 | LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken token); | 83 | LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken tok); |
83 | LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...); | 84 | LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...); |
84 | LJ_FUNC void lj_lex_init(lua_State *L); | 85 | LJ_FUNC void lj_lex_init(lua_State *L); |
85 | 86 | ||
86 | #endif | 87 | #endif |
diff --git a/src/lj_lib.c b/src/lj_lib.c index 0c91a1c8..56fb6555 100644 --- a/src/lj_lib.c +++ b/src/lj_lib.c | |||
@@ -18,6 +18,9 @@ | |||
18 | #include "lj_dispatch.h" | 18 | #include "lj_dispatch.h" |
19 | #include "lj_vm.h" | 19 | #include "lj_vm.h" |
20 | #include "lj_strscan.h" | 20 | #include "lj_strscan.h" |
21 | #include "lj_strfmt.h" | ||
22 | #include "lj_lex.h" | ||
23 | #include "lj_bcdump.h" | ||
21 | #include "lj_lib.h" | 24 | #include "lj_lib.h" |
22 | 25 | ||
23 | /* -- Library initialization ---------------------------------------------- */ | 26 | /* -- Library initialization ---------------------------------------------- */ |
@@ -43,6 +46,28 @@ static GCtab *lib_create_table(lua_State *L, const char *libname, int hsize) | |||
43 | return tabV(L->top-1); | 46 | return tabV(L->top-1); |
44 | } | 47 | } |
45 | 48 | ||
49 | static const uint8_t *lib_read_lfunc(lua_State *L, const uint8_t *p, GCtab *tab) | ||
50 | { | ||
51 | int len = *p++; | ||
52 | GCstr *name = lj_str_new(L, (const char *)p, len); | ||
53 | LexState ls; | ||
54 | GCproto *pt; | ||
55 | GCfunc *fn; | ||
56 | memset(&ls, 0, sizeof(ls)); | ||
57 | ls.L = L; | ||
58 | ls.p = (const char *)(p+len); | ||
59 | ls.pe = (const char *)~(uintptr_t)0; | ||
60 | ls.c = -1; | ||
61 | ls.level = (BCDUMP_F_STRIP|(LJ_BE*BCDUMP_F_BE)); | ||
62 | ls.chunkname = name; | ||
63 | pt = lj_bcread_proto(&ls); | ||
64 | pt->firstline = ~(BCLine)0; | ||
65 | fn = lj_func_newL_empty(L, pt, tabref(L->env)); | ||
66 | /* NOBARRIER: See below for common barrier. */ | ||
67 | setfuncV(L, lj_tab_setstr(L, tab, name), fn); | ||
68 | return (const uint8_t *)ls.p; | ||
69 | } | ||
70 | |||
46 | void lj_lib_register(lua_State *L, const char *libname, | 71 | void lj_lib_register(lua_State *L, const char *libname, |
47 | const uint8_t *p, const lua_CFunction *cf) | 72 | const uint8_t *p, const lua_CFunction *cf) |
48 | { | 73 | { |
@@ -87,6 +112,9 @@ void lj_lib_register(lua_State *L, const char *libname, | |||
87 | ofn = fn; | 112 | ofn = fn; |
88 | } else { | 113 | } else { |
89 | switch (tag | len) { | 114 | switch (tag | len) { |
115 | case LIBINIT_LUA: | ||
116 | p = lib_read_lfunc(L, p, tab); | ||
117 | break; | ||
90 | case LIBINIT_SET: | 118 | case LIBINIT_SET: |
91 | L->top -= 2; | 119 | L->top -= 2; |
92 | if (tvisstr(L->top+1) && strV(L->top+1)->len == 0) | 120 | if (tvisstr(L->top+1) && strV(L->top+1)->len == 0) |
@@ -120,6 +148,37 @@ void lj_lib_register(lua_State *L, const char *libname, | |||
120 | } | 148 | } |
121 | } | 149 | } |
122 | 150 | ||
151 | /* Push internal function on the stack. */ | ||
152 | GCfunc *lj_lib_pushcc(lua_State *L, lua_CFunction f, int id, int n) | ||
153 | { | ||
154 | GCfunc *fn; | ||
155 | lua_pushcclosure(L, f, n); | ||
156 | fn = funcV(L->top-1); | ||
157 | fn->c.ffid = (uint8_t)id; | ||
158 | setmref(fn->c.pc, &G(L)->bc_cfunc_int); | ||
159 | return fn; | ||
160 | } | ||
161 | |||
162 | void lj_lib_prereg(lua_State *L, const char *name, lua_CFunction f, GCtab *env) | ||
163 | { | ||
164 | luaL_findtable(L, LUA_REGISTRYINDEX, "_PRELOAD", 4); | ||
165 | lua_pushcfunction(L, f); | ||
166 | /* NOBARRIER: The function is new (marked white). */ | ||
167 | setgcref(funcV(L->top-1)->c.env, obj2gco(env)); | ||
168 | lua_setfield(L, -2, name); | ||
169 | L->top--; | ||
170 | } | ||
171 | |||
172 | int lj_lib_postreg(lua_State *L, lua_CFunction cf, int id, const char *name) | ||
173 | { | ||
174 | GCfunc *fn = lj_lib_pushcf(L, cf, id); | ||
175 | GCtab *t = tabref(curr_func(L)->c.env); /* Reference to parent table. */ | ||
176 | setfuncV(L, lj_tab_setstr(L, t, lj_str_newz(L, name)), fn); | ||
177 | lj_gc_anybarriert(L, t); | ||
178 | setfuncV(L, L->top++, fn); | ||
179 | return 1; | ||
180 | } | ||
181 | |||
123 | /* -- Type checks --------------------------------------------------------- */ | 182 | /* -- Type checks --------------------------------------------------------- */ |
124 | 183 | ||
125 | TValue *lj_lib_checkany(lua_State *L, int narg) | 184 | TValue *lj_lib_checkany(lua_State *L, int narg) |
@@ -137,7 +196,7 @@ GCstr *lj_lib_checkstr(lua_State *L, int narg) | |||
137 | if (LJ_LIKELY(tvisstr(o))) { | 196 | if (LJ_LIKELY(tvisstr(o))) { |
138 | return strV(o); | 197 | return strV(o); |
139 | } else if (tvisnumber(o)) { | 198 | } else if (tvisnumber(o)) { |
140 | GCstr *s = lj_str_fromnumber(L, o); | 199 | GCstr *s = lj_strfmt_number(L, o); |
141 | setstrV(L, o, s); | 200 | setstrV(L, o, s); |
142 | return s; | 201 | return s; |
143 | } | 202 | } |
@@ -196,20 +255,6 @@ int32_t lj_lib_optint(lua_State *L, int narg, int32_t def) | |||
196 | return (o < L->top && !tvisnil(o)) ? lj_lib_checkint(L, narg) : def; | 255 | return (o < L->top && !tvisnil(o)) ? lj_lib_checkint(L, narg) : def; |
197 | } | 256 | } |
198 | 257 | ||
199 | int32_t lj_lib_checkbit(lua_State *L, int narg) | ||
200 | { | ||
201 | TValue *o = L->base + narg-1; | ||
202 | if (!(o < L->top && lj_strscan_numberobj(o))) | ||
203 | lj_err_argt(L, narg, LUA_TNUMBER); | ||
204 | if (LJ_LIKELY(tvisint(o))) { | ||
205 | return intV(o); | ||
206 | } else { | ||
207 | int32_t i = lj_num2bit(numV(o)); | ||
208 | if (LJ_DUALNUM) setintV(o, i); | ||
209 | return i; | ||
210 | } | ||
211 | } | ||
212 | |||
213 | GCfunc *lj_lib_checkfunc(lua_State *L, int narg) | 258 | GCfunc *lj_lib_checkfunc(lua_State *L, int narg) |
214 | { | 259 | { |
215 | TValue *o = L->base + narg-1; | 260 | TValue *o = L->base + narg-1; |
diff --git a/src/lj_lib.h b/src/lj_lib.h index 754e7444..83778b83 100644 --- a/src/lj_lib.h +++ b/src/lj_lib.h | |||
@@ -41,15 +41,22 @@ LJ_FUNC void lj_lib_checknumber(lua_State *L, int narg); | |||
41 | LJ_FUNC lua_Number lj_lib_checknum(lua_State *L, int narg); | 41 | LJ_FUNC lua_Number lj_lib_checknum(lua_State *L, int narg); |
42 | LJ_FUNC int32_t lj_lib_checkint(lua_State *L, int narg); | 42 | LJ_FUNC int32_t lj_lib_checkint(lua_State *L, int narg); |
43 | LJ_FUNC int32_t lj_lib_optint(lua_State *L, int narg, int32_t def); | 43 | LJ_FUNC int32_t lj_lib_optint(lua_State *L, int narg, int32_t def); |
44 | LJ_FUNC int32_t lj_lib_checkbit(lua_State *L, int narg); | ||
45 | LJ_FUNC GCfunc *lj_lib_checkfunc(lua_State *L, int narg); | 44 | LJ_FUNC GCfunc *lj_lib_checkfunc(lua_State *L, int narg); |
46 | LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg); | 45 | LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg); |
47 | LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg); | 46 | LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg); |
48 | LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst); | 47 | LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst); |
49 | 48 | ||
50 | /* Avoid including lj_frame.h. */ | 49 | /* Avoid including lj_frame.h. */ |
50 | #if LJ_GC64 | ||
51 | #define lj_lib_upvalue(L, n) \ | ||
52 | (&gcval(L->base-2)->fn.c.upvalue[(n)-1]) | ||
53 | #elif LJ_FR2 | ||
54 | #define lj_lib_upvalue(L, n) \ | ||
55 | (&gcref((L->base-2)->gcr)->fn.c.upvalue[(n)-1]) | ||
56 | #else | ||
51 | #define lj_lib_upvalue(L, n) \ | 57 | #define lj_lib_upvalue(L, n) \ |
52 | (&gcref((L->base-1)->fr.func)->fn.c.upvalue[(n)-1]) | 58 | (&gcref((L->base-1)->fr.func)->fn.c.upvalue[(n)-1]) |
59 | #endif | ||
53 | 60 | ||
54 | #if LJ_TARGET_WINDOWS | 61 | #if LJ_TARGET_WINDOWS |
55 | #define lj_lib_checkfpu(L) \ | 62 | #define lj_lib_checkfpu(L) \ |
@@ -60,23 +67,14 @@ LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst); | |||
60 | #define lj_lib_checkfpu(L) UNUSED(L) | 67 | #define lj_lib_checkfpu(L) UNUSED(L) |
61 | #endif | 68 | #endif |
62 | 69 | ||
63 | /* Push internal function on the stack. */ | 70 | LJ_FUNC GCfunc *lj_lib_pushcc(lua_State *L, lua_CFunction f, int id, int n); |
64 | static LJ_AINLINE void lj_lib_pushcc(lua_State *L, lua_CFunction f, | ||
65 | int id, int n) | ||
66 | { | ||
67 | GCfunc *fn; | ||
68 | lua_pushcclosure(L, f, n); | ||
69 | fn = funcV(L->top-1); | ||
70 | fn->c.ffid = (uint8_t)id; | ||
71 | setmref(fn->c.pc, &G(L)->bc_cfunc_int); | ||
72 | } | ||
73 | |||
74 | #define lj_lib_pushcf(L, fn, id) (lj_lib_pushcc(L, (fn), (id), 0)) | 71 | #define lj_lib_pushcf(L, fn, id) (lj_lib_pushcc(L, (fn), (id), 0)) |
75 | 72 | ||
76 | /* Library function declarations. Scanned by buildvm. */ | 73 | /* Library function declarations. Scanned by buildvm. */ |
77 | #define LJLIB_CF(name) static int lj_cf_##name(lua_State *L) | 74 | #define LJLIB_CF(name) static int lj_cf_##name(lua_State *L) |
78 | #define LJLIB_ASM(name) static int lj_ffh_##name(lua_State *L) | 75 | #define LJLIB_ASM(name) static int lj_ffh_##name(lua_State *L) |
79 | #define LJLIB_ASM_(name) | 76 | #define LJLIB_ASM_(name) |
77 | #define LJLIB_LUA(name) | ||
80 | #define LJLIB_SET(name) | 78 | #define LJLIB_SET(name) |
81 | #define LJLIB_PUSH(arg) | 79 | #define LJLIB_PUSH(arg) |
82 | #define LJLIB_REC(handler) | 80 | #define LJLIB_REC(handler) |
@@ -88,6 +86,10 @@ static LJ_AINLINE void lj_lib_pushcc(lua_State *L, lua_CFunction f, | |||
88 | 86 | ||
89 | LJ_FUNC void lj_lib_register(lua_State *L, const char *libname, | 87 | LJ_FUNC void lj_lib_register(lua_State *L, const char *libname, |
90 | const uint8_t *init, const lua_CFunction *cf); | 88 | const uint8_t *init, const lua_CFunction *cf); |
89 | LJ_FUNC void lj_lib_prereg(lua_State *L, const char *name, lua_CFunction f, | ||
90 | GCtab *env); | ||
91 | LJ_FUNC int lj_lib_postreg(lua_State *L, lua_CFunction cf, int id, | ||
92 | const char *name); | ||
91 | 93 | ||
92 | /* Library init data tags. */ | 94 | /* Library init data tags. */ |
93 | #define LIBINIT_LENMASK 0x3f | 95 | #define LIBINIT_LENMASK 0x3f |
@@ -96,7 +98,8 @@ LJ_FUNC void lj_lib_register(lua_State *L, const char *libname, | |||
96 | #define LIBINIT_ASM 0x40 | 98 | #define LIBINIT_ASM 0x40 |
97 | #define LIBINIT_ASM_ 0x80 | 99 | #define LIBINIT_ASM_ 0x80 |
98 | #define LIBINIT_STRING 0xc0 | 100 | #define LIBINIT_STRING 0xc0 |
99 | #define LIBINIT_MAXSTR 0x39 | 101 | #define LIBINIT_MAXSTR 0x38 |
102 | #define LIBINIT_LUA 0xf9 | ||
100 | #define LIBINIT_SET 0xfa | 103 | #define LIBINIT_SET 0xfa |
101 | #define LIBINIT_NUMBER 0xfb | 104 | #define LIBINIT_NUMBER 0xfb |
102 | #define LIBINIT_COPY 0xfc | 105 | #define LIBINIT_COPY 0xfc |
diff --git a/src/lj_load.c b/src/lj_load.c index ec6f0aba..746bf428 100644 --- a/src/lj_load.c +++ b/src/lj_load.c | |||
@@ -15,7 +15,7 @@ | |||
15 | #include "lj_obj.h" | 15 | #include "lj_obj.h" |
16 | #include "lj_gc.h" | 16 | #include "lj_gc.h" |
17 | #include "lj_err.h" | 17 | #include "lj_err.h" |
18 | #include "lj_str.h" | 18 | #include "lj_buf.h" |
19 | #include "lj_func.h" | 19 | #include "lj_func.h" |
20 | #include "lj_frame.h" | 20 | #include "lj_frame.h" |
21 | #include "lj_vm.h" | 21 | #include "lj_vm.h" |
@@ -54,7 +54,7 @@ LUA_API int lua_loadx(lua_State *L, lua_Reader reader, void *data, | |||
54 | ls.rdata = data; | 54 | ls.rdata = data; |
55 | ls.chunkarg = chunkname ? chunkname : "?"; | 55 | ls.chunkarg = chunkname ? chunkname : "?"; |
56 | ls.mode = mode; | 56 | ls.mode = mode; |
57 | lj_str_initbuf(&ls.sb); | 57 | lj_buf_init(L, &ls.sb); |
58 | status = lj_vm_cpcall(L, NULL, &ls, cpparser); | 58 | status = lj_vm_cpcall(L, NULL, &ls, cpparser); |
59 | lj_lex_cleanup(L, &ls); | 59 | lj_lex_cleanup(L, &ls); |
60 | lj_gc_check(L); | 60 | lj_gc_check(L); |
diff --git a/src/lj_mcode.c b/src/lj_mcode.c index 02ade1d4..bc3e922f 100644 --- a/src/lj_mcode.c +++ b/src/lj_mcode.c | |||
@@ -66,8 +66,8 @@ void lj_mcode_sync(void *start, void *end) | |||
66 | 66 | ||
67 | static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, DWORD prot) | 67 | static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, DWORD prot) |
68 | { | 68 | { |
69 | void *p = VirtualAlloc((void *)hint, sz, | 69 | void *p = LJ_WIN_VALLOC((void *)hint, sz, |
70 | MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot); | 70 | MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot); |
71 | if (!p && !hint) | 71 | if (!p && !hint) |
72 | lj_trace_err(J, LJ_TRERR_MCODEAL); | 72 | lj_trace_err(J, LJ_TRERR_MCODEAL); |
73 | return p; | 73 | return p; |
@@ -82,7 +82,7 @@ static void mcode_free(jit_State *J, void *p, size_t sz) | |||
82 | static int mcode_setprot(void *p, size_t sz, DWORD prot) | 82 | static int mcode_setprot(void *p, size_t sz, DWORD prot) |
83 | { | 83 | { |
84 | DWORD oprot; | 84 | DWORD oprot; |
85 | return !VirtualProtect(p, sz, prot, &oprot); | 85 | return !LJ_WIN_VPROTECT(p, sz, prot, &oprot); |
86 | } | 86 | } |
87 | 87 | ||
88 | #elif LJ_TARGET_POSIX | 88 | #elif LJ_TARGET_POSIX |
@@ -221,8 +221,8 @@ static void *mcode_alloc(jit_State *J, size_t sz) | |||
221 | */ | 221 | */ |
222 | #if LJ_TARGET_MIPS | 222 | #if LJ_TARGET_MIPS |
223 | /* Use the middle of the 256MB-aligned region. */ | 223 | /* Use the middle of the 256MB-aligned region. */ |
224 | uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler & 0xf0000000u) + | 224 | uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler & |
225 | 0x08000000u; | 225 | ~(uintptr_t)0x0fffffffu) + 0x08000000u; |
226 | #else | 226 | #else |
227 | uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler & ~(uintptr_t)0xffff; | 227 | uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler & ~(uintptr_t)0xffff; |
228 | #endif | 228 | #endif |
@@ -255,7 +255,7 @@ static void *mcode_alloc(jit_State *J, size_t sz) | |||
255 | /* All memory addresses are reachable by relative jumps. */ | 255 | /* All memory addresses are reachable by relative jumps. */ |
256 | static void *mcode_alloc(jit_State *J, size_t sz) | 256 | static void *mcode_alloc(jit_State *J, size_t sz) |
257 | { | 257 | { |
258 | #ifdef __OpenBSD__ | 258 | #if defined(__OpenBSD__) || LJ_TARGET_UWP |
259 | /* Allow better executable memory allocation for OpenBSD W^X mode. */ | 259 | /* Allow better executable memory allocation for OpenBSD W^X mode. */ |
260 | void *p = mcode_alloc_at(J, 0, sz, MCPROT_RUN); | 260 | void *p = mcode_alloc_at(J, 0, sz, MCPROT_RUN); |
261 | if (p && mcode_setprot(p, sz, MCPROT_GEN)) { | 261 | if (p && mcode_setprot(p, sz, MCPROT_GEN)) { |
diff --git a/src/lj_meta.c b/src/lj_meta.c index 6affc18b..7391ff00 100644 --- a/src/lj_meta.c +++ b/src/lj_meta.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include "lj_obj.h" | 12 | #include "lj_obj.h" |
13 | #include "lj_gc.h" | 13 | #include "lj_gc.h" |
14 | #include "lj_err.h" | 14 | #include "lj_err.h" |
15 | #include "lj_buf.h" | ||
15 | #include "lj_str.h" | 16 | #include "lj_str.h" |
16 | #include "lj_tab.h" | 17 | #include "lj_tab.h" |
17 | #include "lj_meta.h" | 18 | #include "lj_meta.h" |
@@ -19,6 +20,8 @@ | |||
19 | #include "lj_bc.h" | 20 | #include "lj_bc.h" |
20 | #include "lj_vm.h" | 21 | #include "lj_vm.h" |
21 | #include "lj_strscan.h" | 22 | #include "lj_strscan.h" |
23 | #include "lj_strfmt.h" | ||
24 | #include "lj_lib.h" | ||
22 | 25 | ||
23 | /* -- Metamethod handling ------------------------------------------------- */ | 26 | /* -- Metamethod handling ------------------------------------------------- */ |
24 | 27 | ||
@@ -77,12 +80,16 @@ int lj_meta_tailcall(lua_State *L, cTValue *tv) | |||
77 | TValue *base = L->base; | 80 | TValue *base = L->base; |
78 | TValue *top = L->top; | 81 | TValue *top = L->top; |
79 | const BCIns *pc = frame_pc(base-1); /* Preserve old PC from frame. */ | 82 | const BCIns *pc = frame_pc(base-1); /* Preserve old PC from frame. */ |
80 | copyTV(L, base-1, tv); /* Replace frame with new object. */ | 83 | copyTV(L, base-1-LJ_FR2, tv); /* Replace frame with new object. */ |
81 | top->u32.lo = LJ_CONT_TAILCALL; | 84 | if (LJ_FR2) |
82 | setframe_pc(top, pc); | 85 | (top++)->u64 = LJ_CONT_TAILCALL; |
83 | setframe_gc(top+1, obj2gco(L)); /* Dummy frame object. */ | 86 | else |
84 | setframe_ftsz(top+1, (int)((char *)(top+2) - (char *)base) + FRAME_CONT); | 87 | top->u32.lo = LJ_CONT_TAILCALL; |
85 | L->base = L->top = top+2; | 88 | setframe_pc(top++, pc); |
89 | if (LJ_FR2) top++; | ||
90 | setframe_gc(top, obj2gco(L), LJ_TTHREAD); /* Dummy frame object. */ | ||
91 | setframe_ftsz(top, ((char *)(top+1) - (char *)base) + FRAME_CONT); | ||
92 | L->base = L->top = top+1; | ||
86 | /* | 93 | /* |
87 | ** before: [old_mo|PC] [... ...] | 94 | ** before: [old_mo|PC] [... ...] |
88 | ** ^base ^top | 95 | ** ^base ^top |
@@ -113,11 +120,13 @@ static TValue *mmcall(lua_State *L, ASMFunction cont, cTValue *mo, | |||
113 | */ | 120 | */ |
114 | TValue *top = L->top; | 121 | TValue *top = L->top; |
115 | if (curr_funcisL(L)) top = curr_topL(L); | 122 | if (curr_funcisL(L)) top = curr_topL(L); |
116 | setcont(top, cont); /* Assembler VM stores PC in upper word. */ | 123 | setcont(top++, cont); /* Assembler VM stores PC in upper word or FR2. */ |
117 | copyTV(L, top+1, mo); /* Store metamethod and two arguments. */ | 124 | if (LJ_FR2) setnilV(top++); |
118 | copyTV(L, top+2, a); | 125 | copyTV(L, top++, mo); /* Store metamethod and two arguments. */ |
119 | copyTV(L, top+3, b); | 126 | if (LJ_FR2) setnilV(top++); |
120 | return top+2; /* Return new base. */ | 127 | copyTV(L, top, a); |
128 | copyTV(L, top+1, b); | ||
129 | return top; /* Return new base. */ | ||
121 | } | 130 | } |
122 | 131 | ||
123 | /* -- C helpers for some instructions, called from assembler VM ----------- */ | 132 | /* -- C helpers for some instructions, called from assembler VM ----------- */ |
@@ -225,27 +234,14 @@ TValue *lj_meta_arith(lua_State *L, TValue *ra, cTValue *rb, cTValue *rc, | |||
225 | } | 234 | } |
226 | } | 235 | } |
227 | 236 | ||
228 | /* In-place coercion of a number to a string. */ | ||
229 | static LJ_AINLINE int tostring(lua_State *L, TValue *o) | ||
230 | { | ||
231 | if (tvisstr(o)) { | ||
232 | return 1; | ||
233 | } else if (tvisnumber(o)) { | ||
234 | setstrV(L, o, lj_str_fromnumber(L, o)); | ||
235 | return 1; | ||
236 | } else { | ||
237 | return 0; | ||
238 | } | ||
239 | } | ||
240 | |||
241 | /* Helper for CAT. Coercion, iterative concat, __concat metamethod. */ | 237 | /* Helper for CAT. Coercion, iterative concat, __concat metamethod. */ |
242 | TValue *lj_meta_cat(lua_State *L, TValue *top, int left) | 238 | TValue *lj_meta_cat(lua_State *L, TValue *top, int left) |
243 | { | 239 | { |
244 | int fromc = 0; | 240 | int fromc = 0; |
245 | if (left < 0) { left = -left; fromc = 1; } | 241 | if (left < 0) { left = -left; fromc = 1; } |
246 | do { | 242 | do { |
247 | int n = 1; | 243 | if (!(tvisstr(top) || tvisnumber(top)) || |
248 | if (!(tvisstr(top-1) || tvisnumber(top-1)) || !tostring(L, top)) { | 244 | !(tvisstr(top-1) || tvisnumber(top-1))) { |
249 | cTValue *mo = lj_meta_lookup(L, top-1, MM_concat); | 245 | cTValue *mo = lj_meta_lookup(L, top-1, MM_concat); |
250 | if (tvisnil(mo)) { | 246 | if (tvisnil(mo)) { |
251 | mo = lj_meta_lookup(L, top, MM_concat); | 247 | mo = lj_meta_lookup(L, top, MM_concat); |
@@ -266,13 +262,12 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left) | |||
266 | ** after mm: [...][CAT stack ...] <--push-- [result] | 262 | ** after mm: [...][CAT stack ...] <--push-- [result] |
267 | ** next step: [...][CAT stack .............] | 263 | ** next step: [...][CAT stack .............] |
268 | */ | 264 | */ |
269 | copyTV(L, top+2, top); /* Careful with the order of stack copies! */ | 265 | copyTV(L, top+2*LJ_FR2+2, top); /* Carefully ordered stack copies! */ |
270 | copyTV(L, top+1, top-1); | 266 | copyTV(L, top+2*LJ_FR2+1, top-1); |
271 | copyTV(L, top, mo); | 267 | copyTV(L, top+LJ_FR2, mo); |
272 | setcont(top-1, lj_cont_cat); | 268 | setcont(top-1, lj_cont_cat); |
269 | if (LJ_FR2) { setnilV(top); setnilV(top+2); top += 2; } | ||
273 | return top+1; /* Trigger metamethod call. */ | 270 | return top+1; /* Trigger metamethod call. */ |
274 | } else if (strV(top)->len == 0) { /* Shortcut. */ | ||
275 | (void)tostring(L, top-1); | ||
276 | } else { | 271 | } else { |
277 | /* Pick as many strings as possible from the top and concatenate them: | 272 | /* Pick as many strings as possible from the top and concatenate them: |
278 | ** | 273 | ** |
@@ -281,27 +276,28 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left) | |||
281 | ** concat: [...][CAT stack ...] [result] | 276 | ** concat: [...][CAT stack ...] [result] |
282 | ** next step: [...][CAT stack ............] | 277 | ** next step: [...][CAT stack ............] |
283 | */ | 278 | */ |
284 | MSize tlen = strV(top)->len; | 279 | TValue *e, *o = top; |
285 | char *buffer; | 280 | uint64_t tlen = tvisstr(o) ? strV(o)->len : STRFMT_MAXBUF_NUM; |
286 | int i; | 281 | SBuf *sb; |
287 | for (n = 1; n <= left && tostring(L, top-n); n++) { | 282 | do { |
288 | MSize len = strV(top-n)->len; | 283 | o--; tlen += tvisstr(o) ? strV(o)->len : STRFMT_MAXBUF_NUM; |
289 | if (len >= LJ_MAX_STR - tlen) | 284 | } while (--left > 0 && (tvisstr(o-1) || tvisnumber(o-1))); |
290 | lj_err_msg(L, LJ_ERR_STROV); | 285 | if (tlen >= LJ_MAX_STR) lj_err_msg(L, LJ_ERR_STROV); |
291 | tlen += len; | 286 | sb = lj_buf_tmp_(L); |
292 | } | 287 | lj_buf_more(sb, (MSize)tlen); |
293 | buffer = lj_str_needbuf(L, &G(L)->tmpbuf, tlen); | 288 | for (e = top, top = o; o <= e; o++) { |
294 | n--; | 289 | if (tvisstr(o)) { |
295 | tlen = 0; | 290 | GCstr *s = strV(o); |
296 | for (i = n; i >= 0; i--) { | 291 | MSize len = s->len; |
297 | MSize len = strV(top-i)->len; | 292 | lj_buf_putmem(sb, strdata(s), len); |
298 | memcpy(buffer + tlen, strVdata(top-i), len); | 293 | } else if (tvisint(o)) { |
299 | tlen += len; | 294 | lj_strfmt_putint(sb, intV(o)); |
295 | } else { | ||
296 | lj_strfmt_putfnum(sb, STRFMT_G14, numV(o)); | ||
297 | } | ||
300 | } | 298 | } |
301 | setstrV(L, top-n, lj_str_new(L, buffer, tlen)); | 299 | setstrV(L, top, lj_buf_str(L, sb)); |
302 | } | 300 | } |
303 | left -= n; | ||
304 | top -= n; | ||
305 | } while (left >= 1); | 301 | } while (left >= 1); |
306 | if (LJ_UNLIKELY(G(L)->gc.total >= G(L)->gc.threshold)) { | 302 | if (LJ_UNLIKELY(G(L)->gc.total >= G(L)->gc.threshold)) { |
307 | if (!fromc) L->top = curr_topL(L); | 303 | if (!fromc) L->top = curr_topL(L); |
@@ -338,12 +334,14 @@ TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne) | |||
338 | return (TValue *)(intptr_t)ne; | 334 | return (TValue *)(intptr_t)ne; |
339 | } | 335 | } |
340 | top = curr_top(L); | 336 | top = curr_top(L); |
341 | setcont(top, ne ? lj_cont_condf : lj_cont_condt); | 337 | setcont(top++, ne ? lj_cont_condf : lj_cont_condt); |
342 | copyTV(L, top+1, mo); | 338 | if (LJ_FR2) setnilV(top++); |
339 | copyTV(L, top++, mo); | ||
340 | if (LJ_FR2) setnilV(top++); | ||
343 | it = ~(uint32_t)o1->gch.gct; | 341 | it = ~(uint32_t)o1->gch.gct; |
344 | setgcV(L, top+2, o1, it); | 342 | setgcV(L, top, o1, it); |
345 | setgcV(L, top+3, o2, it); | 343 | setgcV(L, top+1, o2, it); |
346 | return top+2; /* Trigger metamethod call. */ | 344 | return top; /* Trigger metamethod call. */ |
347 | } | 345 | } |
348 | return (TValue *)(intptr_t)ne; | 346 | return (TValue *)(intptr_t)ne; |
349 | } | 347 | } |
@@ -366,7 +364,7 @@ TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins) | |||
366 | o2 = &mref(curr_proto(L)->k, cTValue)[bc_d(ins)]; | 364 | o2 = &mref(curr_proto(L)->k, cTValue)[bc_d(ins)]; |
367 | } else { | 365 | } else { |
368 | lua_assert(op == BC_ISEQP); | 366 | lua_assert(op == BC_ISEQP); |
369 | setitype(&tv, ~bc_d(ins)); | 367 | setpriV(&tv, ~bc_d(ins)); |
370 | o2 = &tv; | 368 | o2 = &tv; |
371 | } | 369 | } |
372 | mo = lj_meta_lookup(L, o1mm, MM_eq); | 370 | mo = lj_meta_lookup(L, o1mm, MM_eq); |
@@ -423,6 +421,18 @@ TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op) | |||
423 | } | 421 | } |
424 | } | 422 | } |
425 | 423 | ||
424 | /* Helper for ISTYPE and ISNUM. Implicit coercion or error. */ | ||
425 | void lj_meta_istype(lua_State *L, BCReg ra, BCReg tp) | ||
426 | { | ||
427 | L->top = curr_topL(L); | ||
428 | ra++; tp--; | ||
429 | lua_assert(LJ_DUALNUM || tp != ~LJ_TNUMX); /* ISTYPE -> ISNUM broken. */ | ||
430 | if (LJ_DUALNUM && tp == ~LJ_TNUMX) lj_lib_checkint(L, ra); | ||
431 | else if (tp == ~LJ_TNUMX+1) lj_lib_checknum(L, ra); | ||
432 | else if (tp == ~LJ_TSTR) lj_lib_checkstr(L, ra); | ||
433 | else lj_err_argtype(L, ra, lj_obj_itypename[tp]); | ||
434 | } | ||
435 | |||
426 | /* Helper for calls. __call metamethod. */ | 436 | /* Helper for calls. __call metamethod. */ |
427 | void lj_meta_call(lua_State *L, TValue *func, TValue *top) | 437 | void lj_meta_call(lua_State *L, TValue *func, TValue *top) |
428 | { | 438 | { |
@@ -430,7 +440,8 @@ void lj_meta_call(lua_State *L, TValue *func, TValue *top) | |||
430 | TValue *p; | 440 | TValue *p; |
431 | if (!tvisfunc(mo)) | 441 | if (!tvisfunc(mo)) |
432 | lj_err_optype_call(L, func); | 442 | lj_err_optype_call(L, func); |
433 | for (p = top; p > func; p--) copyTV(L, p, p-1); | 443 | for (p = top; p > func+2*LJ_FR2; p--) copyTV(L, p, p-1); |
444 | if (LJ_FR2) copyTV(L, func+2, func); | ||
434 | copyTV(L, func, mo); | 445 | copyTV(L, func, mo); |
435 | } | 446 | } |
436 | 447 | ||
diff --git a/src/lj_meta.h b/src/lj_meta.h index bd911e94..d6d31924 100644 --- a/src/lj_meta.h +++ b/src/lj_meta.h | |||
@@ -31,6 +31,7 @@ LJ_FUNCA TValue * LJ_FASTCALL lj_meta_len(lua_State *L, cTValue *o); | |||
31 | LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne); | 31 | LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne); |
32 | LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins); | 32 | LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins); |
33 | LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op); | 33 | LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op); |
34 | LJ_FUNCA void lj_meta_istype(lua_State *L, BCReg ra, BCReg tp); | ||
34 | LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top); | 35 | LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top); |
35 | LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o); | 36 | LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o); |
36 | 37 | ||
diff --git a/src/lj_obj.c b/src/lj_obj.c index 9cdce625..5d16e0e5 100644 --- a/src/lj_obj.c +++ b/src/lj_obj.c | |||
@@ -20,7 +20,7 @@ LJ_DATADEF const char *const lj_obj_itypename[] = { /* ORDER LJ_T */ | |||
20 | }; | 20 | }; |
21 | 21 | ||
22 | /* Compare two objects without calling metamethods. */ | 22 | /* Compare two objects without calling metamethods. */ |
23 | int lj_obj_equal(cTValue *o1, cTValue *o2) | 23 | int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2) |
24 | { | 24 | { |
25 | if (itype(o1) == itype(o2)) { | 25 | if (itype(o1) == itype(o2)) { |
26 | if (tvispri(o1)) | 26 | if (tvispri(o1)) |
@@ -33,3 +33,18 @@ int lj_obj_equal(cTValue *o1, cTValue *o2) | |||
33 | return numberVnum(o1) == numberVnum(o2); | 33 | return numberVnum(o1) == numberVnum(o2); |
34 | } | 34 | } |
35 | 35 | ||
36 | /* Return pointer to object or its object data. */ | ||
37 | const void * LJ_FASTCALL lj_obj_ptr(cTValue *o) | ||
38 | { | ||
39 | if (tvisudata(o)) | ||
40 | return uddata(udataV(o)); | ||
41 | else if (tvislightud(o)) | ||
42 | return lightudV(o); | ||
43 | else if (LJ_HASFFI && tviscdata(o)) | ||
44 | return cdataptr(cdataV(o)); | ||
45 | else if (tvisgcv(o)) | ||
46 | return gcV(o); | ||
47 | else | ||
48 | return NULL; | ||
49 | } | ||
50 | |||
diff --git a/src/lj_obj.h b/src/lj_obj.h index 5c3c88fc..7d582949 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h | |||
@@ -15,42 +15,75 @@ | |||
15 | 15 | ||
16 | /* -- Memory references (32 bit address space) ---------------------------- */ | 16 | /* -- Memory references (32 bit address space) ---------------------------- */ |
17 | 17 | ||
18 | /* Memory size. */ | 18 | /* Memory and GC object sizes. */ |
19 | typedef uint32_t MSize; | 19 | typedef uint32_t MSize; |
20 | #if LJ_GC64 | ||
21 | typedef uint64_t GCSize; | ||
22 | #else | ||
23 | typedef uint32_t GCSize; | ||
24 | #endif | ||
20 | 25 | ||
21 | /* Memory reference */ | 26 | /* Memory reference */ |
22 | typedef struct MRef { | 27 | typedef struct MRef { |
28 | #if LJ_GC64 | ||
29 | uint64_t ptr64; /* True 64 bit pointer. */ | ||
30 | #else | ||
23 | uint32_t ptr32; /* Pseudo 32 bit pointer. */ | 31 | uint32_t ptr32; /* Pseudo 32 bit pointer. */ |
32 | #endif | ||
24 | } MRef; | 33 | } MRef; |
25 | 34 | ||
35 | #if LJ_GC64 | ||
36 | #define mref(r, t) ((t *)(void *)(r).ptr64) | ||
37 | |||
38 | #define setmref(r, p) ((r).ptr64 = (uint64_t)(void *)(p)) | ||
39 | #define setmrefr(r, v) ((r).ptr64 = (v).ptr64) | ||
40 | #else | ||
26 | #define mref(r, t) ((t *)(void *)(uintptr_t)(r).ptr32) | 41 | #define mref(r, t) ((t *)(void *)(uintptr_t)(r).ptr32) |
27 | 42 | ||
28 | #define setmref(r, p) ((r).ptr32 = (uint32_t)(uintptr_t)(void *)(p)) | 43 | #define setmref(r, p) ((r).ptr32 = (uint32_t)(uintptr_t)(void *)(p)) |
29 | #define setmrefr(r, v) ((r).ptr32 = (v).ptr32) | 44 | #define setmrefr(r, v) ((r).ptr32 = (v).ptr32) |
45 | #endif | ||
30 | 46 | ||
31 | /* -- GC object references (32 bit address space) ------------------------- */ | 47 | /* -- GC object references (32 bit address space) ------------------------- */ |
32 | 48 | ||
33 | /* GCobj reference */ | 49 | /* GCobj reference */ |
34 | typedef struct GCRef { | 50 | typedef struct GCRef { |
51 | #if LJ_GC64 | ||
52 | uint64_t gcptr64; /* True 64 bit pointer. */ | ||
53 | #else | ||
35 | uint32_t gcptr32; /* Pseudo 32 bit pointer. */ | 54 | uint32_t gcptr32; /* Pseudo 32 bit pointer. */ |
55 | #endif | ||
36 | } GCRef; | 56 | } GCRef; |
37 | 57 | ||
38 | /* Common GC header for all collectable objects. */ | 58 | /* Common GC header for all collectable objects. */ |
39 | #define GCHeader GCRef nextgc; uint8_t marked; uint8_t gct | 59 | #define GCHeader GCRef nextgc; uint8_t marked; uint8_t gct |
40 | /* This occupies 6 bytes, so use the next 2 bytes for non-32 bit fields. */ | 60 | /* This occupies 6 bytes, so use the next 2 bytes for non-32 bit fields. */ |
41 | 61 | ||
62 | #if LJ_GC64 | ||
63 | #define gcref(r) ((GCobj *)(r).gcptr64) | ||
64 | #define gcrefp(r, t) ((t *)(void *)(r).gcptr64) | ||
65 | #define gcrefu(r) ((r).gcptr64) | ||
66 | #define gcrefeq(r1, r2) ((r1).gcptr64 == (r2).gcptr64) | ||
67 | |||
68 | #define setgcref(r, gc) ((r).gcptr64 = (uint64_t)&(gc)->gch) | ||
69 | #define setgcreft(r, gc, it) \ | ||
70 | (r).gcptr64 = (uint64_t)&(gc)->gch | (((uint64_t)(it)) << 47) | ||
71 | #define setgcrefp(r, p) ((r).gcptr64 = (uint64_t)(p)) | ||
72 | #define setgcrefnull(r) ((r).gcptr64 = 0) | ||
73 | #define setgcrefr(r, v) ((r).gcptr64 = (v).gcptr64) | ||
74 | #else | ||
42 | #define gcref(r) ((GCobj *)(uintptr_t)(r).gcptr32) | 75 | #define gcref(r) ((GCobj *)(uintptr_t)(r).gcptr32) |
43 | #define gcrefp(r, t) ((t *)(void *)(uintptr_t)(r).gcptr32) | 76 | #define gcrefp(r, t) ((t *)(void *)(uintptr_t)(r).gcptr32) |
44 | #define gcrefu(r) ((r).gcptr32) | 77 | #define gcrefu(r) ((r).gcptr32) |
45 | #define gcrefi(r) ((int32_t)(r).gcptr32) | ||
46 | #define gcrefeq(r1, r2) ((r1).gcptr32 == (r2).gcptr32) | 78 | #define gcrefeq(r1, r2) ((r1).gcptr32 == (r2).gcptr32) |
47 | #define gcnext(gc) (gcref((gc)->gch.nextgc)) | ||
48 | 79 | ||
49 | #define setgcref(r, gc) ((r).gcptr32 = (uint32_t)(uintptr_t)&(gc)->gch) | 80 | #define setgcref(r, gc) ((r).gcptr32 = (uint32_t)(uintptr_t)&(gc)->gch) |
50 | #define setgcrefi(r, i) ((r).gcptr32 = (uint32_t)(i)) | ||
51 | #define setgcrefp(r, p) ((r).gcptr32 = (uint32_t)(uintptr_t)(p)) | 81 | #define setgcrefp(r, p) ((r).gcptr32 = (uint32_t)(uintptr_t)(p)) |
52 | #define setgcrefnull(r) ((r).gcptr32 = 0) | 82 | #define setgcrefnull(r) ((r).gcptr32 = 0) |
53 | #define setgcrefr(r, v) ((r).gcptr32 = (v).gcptr32) | 83 | #define setgcrefr(r, v) ((r).gcptr32 = (v).gcptr32) |
84 | #endif | ||
85 | |||
86 | #define gcnext(gc) (gcref((gc)->gch.nextgc)) | ||
54 | 87 | ||
55 | /* IMPORTANT NOTE: | 88 | /* IMPORTANT NOTE: |
56 | ** | 89 | ** |
@@ -119,11 +152,12 @@ typedef int32_t BCLine; /* Bytecode line number. */ | |||
119 | /* Internal assembler functions. Never call these directly from C. */ | 152 | /* Internal assembler functions. Never call these directly from C. */ |
120 | typedef void (*ASMFunction)(void); | 153 | typedef void (*ASMFunction)(void); |
121 | 154 | ||
122 | /* Resizable string buffer. Need this here, details in lj_str.h. */ | 155 | /* Resizable string buffer. Need this here, details in lj_buf.h. */ |
123 | typedef struct SBuf { | 156 | typedef struct SBuf { |
124 | char *buf; /* String buffer base. */ | 157 | MRef p; /* String buffer pointer. */ |
125 | MSize n; /* String buffer length. */ | 158 | MRef e; /* String buffer end pointer. */ |
126 | MSize sz; /* String buffer size. */ | 159 | MRef b; /* String buffer base. */ |
160 | MRef L; /* lua_State, used for buffer resizing. */ | ||
127 | } SBuf; | 161 | } SBuf; |
128 | 162 | ||
129 | /* -- Tags and values ----------------------------------------------------- */ | 163 | /* -- Tags and values ----------------------------------------------------- */ |
@@ -131,13 +165,23 @@ typedef struct SBuf { | |||
131 | /* Frame link. */ | 165 | /* Frame link. */ |
132 | typedef union { | 166 | typedef union { |
133 | int32_t ftsz; /* Frame type and size of previous frame. */ | 167 | int32_t ftsz; /* Frame type and size of previous frame. */ |
134 | MRef pcr; /* Overlaps PC for Lua frames. */ | 168 | MRef pcr; /* Or PC for Lua frames. */ |
135 | } FrameLink; | 169 | } FrameLink; |
136 | 170 | ||
137 | /* Tagged value. */ | 171 | /* Tagged value. */ |
138 | typedef LJ_ALIGN(8) union TValue { | 172 | typedef LJ_ALIGN(8) union TValue { |
139 | uint64_t u64; /* 64 bit pattern overlaps number. */ | 173 | uint64_t u64; /* 64 bit pattern overlaps number. */ |
140 | lua_Number n; /* Number object overlaps split tag/value object. */ | 174 | lua_Number n; /* Number object overlaps split tag/value object. */ |
175 | #if LJ_GC64 | ||
176 | GCRef gcr; /* GCobj reference with tag. */ | ||
177 | int64_t it64; | ||
178 | struct { | ||
179 | LJ_ENDIAN_LOHI( | ||
180 | int32_t i; /* Integer value. */ | ||
181 | , uint32_t it; /* Internal object tag. Must overlap MSW of number. */ | ||
182 | ) | ||
183 | }; | ||
184 | #else | ||
141 | struct { | 185 | struct { |
142 | LJ_ENDIAN_LOHI( | 186 | LJ_ENDIAN_LOHI( |
143 | union { | 187 | union { |
@@ -147,12 +191,17 @@ typedef LJ_ALIGN(8) union TValue { | |||
147 | , uint32_t it; /* Internal object tag. Must overlap MSW of number. */ | 191 | , uint32_t it; /* Internal object tag. Must overlap MSW of number. */ |
148 | ) | 192 | ) |
149 | }; | 193 | }; |
194 | #endif | ||
195 | #if LJ_FR2 | ||
196 | int64_t ftsz; /* Frame type and size of previous frame, or PC. */ | ||
197 | #else | ||
150 | struct { | 198 | struct { |
151 | LJ_ENDIAN_LOHI( | 199 | LJ_ENDIAN_LOHI( |
152 | GCRef func; /* Function for next frame (or dummy L). */ | 200 | GCRef func; /* Function for next frame (or dummy L). */ |
153 | , FrameLink tp; /* Link to previous frame. */ | 201 | , FrameLink tp; /* Link to previous frame. */ |
154 | ) | 202 | ) |
155 | } fr; | 203 | } fr; |
204 | #endif | ||
156 | struct { | 205 | struct { |
157 | LJ_ENDIAN_LOHI( | 206 | LJ_ENDIAN_LOHI( |
158 | uint32_t lo; /* Lower 32 bits of number. */ | 207 | uint32_t lo; /* Lower 32 bits of number. */ |
@@ -172,6 +221,8 @@ typedef const TValue cTValue; | |||
172 | 221 | ||
173 | /* Internal object tags. | 222 | /* Internal object tags. |
174 | ** | 223 | ** |
224 | ** Format for 32 bit GC references (!LJ_GC64): | ||
225 | ** | ||
175 | ** Internal tags overlap the MSW of a number object (must be a double). | 226 | ** Internal tags overlap the MSW of a number object (must be a double). |
176 | ** Interpreted as a double these are special NaNs. The FPU only generates | 227 | ** Interpreted as a double these are special NaNs. The FPU only generates |
177 | ** one type of NaN (0xfff8_0000_0000_0000). So MSWs > 0xfff80000 are available | 228 | ** one type of NaN (0xfff8_0000_0000_0000). So MSWs > 0xfff80000 are available |
@@ -186,6 +237,18 @@ typedef const TValue cTValue; | |||
186 | ** int (LJ_DUALNUM)| itype | int | | 237 | ** int (LJ_DUALNUM)| itype | int | |
187 | ** number -------double------ | 238 | ** number -------double------ |
188 | ** | 239 | ** |
240 | ** Format for 64 bit GC references (LJ_GC64): | ||
241 | ** | ||
242 | ** The upper 13 bits must be 1 (0xfff8...) for a special NaN. The next | ||
243 | ** 4 bits hold the internal tag. The lowest 47 bits either hold a pointer, | ||
244 | ** a zero-extended 32 bit integer or all bits set to 1 for primitive types. | ||
245 | ** | ||
246 | ** ------MSW------.------LSW------ | ||
247 | ** primitive types |1..1|itype|1..................1| | ||
248 | ** GC objects/lightud |1..1|itype|-------GCRef--------| | ||
249 | ** int (LJ_DUALNUM) |1..1|itype|0..0|-----int-------| | ||
250 | ** number ------------double------------- | ||
251 | ** | ||
189 | ** ORDER LJ_T | 252 | ** ORDER LJ_T |
190 | ** Primitive types nil/false/true must be first, lightuserdata next. | 253 | ** Primitive types nil/false/true must be first, lightuserdata next. |
191 | ** GC objects are at the end, table/userdata must be lowest. | 254 | ** GC objects are at the end, table/userdata must be lowest. |
@@ -208,7 +271,7 @@ typedef const TValue cTValue; | |||
208 | #define LJ_TNUMX (~13u) | 271 | #define LJ_TNUMX (~13u) |
209 | 272 | ||
210 | /* Integers have itype == LJ_TISNUM doubles have itype < LJ_TISNUM */ | 273 | /* Integers have itype == LJ_TISNUM doubles have itype < LJ_TISNUM */ |
211 | #if LJ_64 | 274 | #if LJ_64 && !LJ_GC64 |
212 | #define LJ_TISNUM 0xfffeffffu | 275 | #define LJ_TISNUM 0xfffeffffu |
213 | #else | 276 | #else |
214 | #define LJ_TISNUM LJ_TNUMX | 277 | #define LJ_TISNUM LJ_TNUMX |
@@ -218,6 +281,10 @@ typedef const TValue cTValue; | |||
218 | #define LJ_TISGCV (LJ_TSTR+1) | 281 | #define LJ_TISGCV (LJ_TSTR+1) |
219 | #define LJ_TISTABUD LJ_TTAB | 282 | #define LJ_TISTABUD LJ_TTAB |
220 | 283 | ||
284 | #if LJ_GC64 | ||
285 | #define LJ_GCVMASK (((uint64_t)1 << 47) - 1) | ||
286 | #endif | ||
287 | |||
221 | /* -- String object ------------------------------------------------------- */ | 288 | /* -- String object ------------------------------------------------------- */ |
222 | 289 | ||
223 | /* String object header. String payload follows. */ | 290 | /* String object header. String payload follows. */ |
@@ -291,6 +358,9 @@ typedef struct GCproto { | |||
291 | uint8_t numparams; /* Number of parameters. */ | 358 | uint8_t numparams; /* Number of parameters. */ |
292 | uint8_t framesize; /* Fixed frame size. */ | 359 | uint8_t framesize; /* Fixed frame size. */ |
293 | MSize sizebc; /* Number of bytecode instructions. */ | 360 | MSize sizebc; /* Number of bytecode instructions. */ |
361 | #if LJ_GC64 | ||
362 | uint32_t unused_gc64; | ||
363 | #endif | ||
294 | GCRef gclist; | 364 | GCRef gclist; |
295 | MRef k; /* Split constant array (points to the middle). */ | 365 | MRef k; /* Split constant array (points to the middle). */ |
296 | MRef uv; /* Upvalue list. local slot|0x8000 or parent uv idx. */ | 366 | MRef uv; /* Upvalue list. local slot|0x8000 or parent uv idx. */ |
@@ -402,7 +472,9 @@ typedef struct Node { | |||
402 | TValue val; /* Value object. Must be first field. */ | 472 | TValue val; /* Value object. Must be first field. */ |
403 | TValue key; /* Key object. */ | 473 | TValue key; /* Key object. */ |
404 | MRef next; /* Hash chain. */ | 474 | MRef next; /* Hash chain. */ |
475 | #if !LJ_GC64 | ||
405 | MRef freetop; /* Top of free elements (stored in t->node[0]). */ | 476 | MRef freetop; /* Top of free elements (stored in t->node[0]). */ |
477 | #endif | ||
406 | } Node; | 478 | } Node; |
407 | 479 | ||
408 | LJ_STATIC_ASSERT(offsetof(Node, val) == 0); | 480 | LJ_STATIC_ASSERT(offsetof(Node, val) == 0); |
@@ -417,12 +489,22 @@ typedef struct GCtab { | |||
417 | MRef node; /* Hash part. */ | 489 | MRef node; /* Hash part. */ |
418 | uint32_t asize; /* Size of array part (keys [0, asize-1]). */ | 490 | uint32_t asize; /* Size of array part (keys [0, asize-1]). */ |
419 | uint32_t hmask; /* Hash part mask (size of hash part - 1). */ | 491 | uint32_t hmask; /* Hash part mask (size of hash part - 1). */ |
492 | #if LJ_GC64 | ||
493 | MRef freetop; /* Top of free elements. */ | ||
494 | #endif | ||
420 | } GCtab; | 495 | } GCtab; |
421 | 496 | ||
422 | #define sizetabcolo(n) ((n)*sizeof(TValue) + sizeof(GCtab)) | 497 | #define sizetabcolo(n) ((n)*sizeof(TValue) + sizeof(GCtab)) |
423 | #define tabref(r) (&gcref((r))->tab) | 498 | #define tabref(r) (&gcref((r))->tab) |
424 | #define noderef(r) (mref((r), Node)) | 499 | #define noderef(r) (mref((r), Node)) |
425 | #define nextnode(n) (mref((n)->next, Node)) | 500 | #define nextnode(n) (mref((n)->next, Node)) |
501 | #if LJ_GC64 | ||
502 | #define getfreetop(t, n) (noderef((t)->freetop)) | ||
503 | #define setfreetop(t, n, v) (setmref((t)->freetop, (v))) | ||
504 | #else | ||
505 | #define getfreetop(t, n) (noderef((n)->freetop)) | ||
506 | #define setfreetop(t, n, v) (setmref((n)->freetop, (v))) | ||
507 | #endif | ||
426 | 508 | ||
427 | /* -- State objects ------------------------------------------------------- */ | 509 | /* -- State objects ------------------------------------------------------- */ |
428 | 510 | ||
@@ -489,8 +571,8 @@ typedef enum { | |||
489 | #define mmname_str(g, mm) (strref((g)->gcroot[GCROOT_MMNAME+(mm)])) | 571 | #define mmname_str(g, mm) (strref((g)->gcroot[GCROOT_MMNAME+(mm)])) |
490 | 572 | ||
491 | typedef struct GCState { | 573 | typedef struct GCState { |
492 | MSize total; /* Memory currently allocated. */ | 574 | GCSize total; /* Memory currently allocated. */ |
493 | MSize threshold; /* Memory threshold. */ | 575 | GCSize threshold; /* Memory threshold. */ |
494 | uint8_t currentwhite; /* Current white color. */ | 576 | uint8_t currentwhite; /* Current white color. */ |
495 | uint8_t state; /* GC state. */ | 577 | uint8_t state; /* GC state. */ |
496 | uint8_t nocdatafin; /* No cdata finalizer called. */ | 578 | uint8_t nocdatafin; /* No cdata finalizer called. */ |
@@ -502,9 +584,9 @@ typedef struct GCState { | |||
502 | GCRef grayagain; /* List of objects for atomic traversal. */ | 584 | GCRef grayagain; /* List of objects for atomic traversal. */ |
503 | GCRef weak; /* List of weak tables (to be cleared). */ | 585 | GCRef weak; /* List of weak tables (to be cleared). */ |
504 | GCRef mmudata; /* List of userdata (to be finalized). */ | 586 | GCRef mmudata; /* List of userdata (to be finalized). */ |
587 | GCSize debt; /* Debt (how much GC is behind schedule). */ | ||
588 | GCSize estimate; /* Estimate of memory actually in use. */ | ||
505 | MSize stepmul; /* Incremental GC step granularity. */ | 589 | MSize stepmul; /* Incremental GC step granularity. */ |
506 | MSize debt; /* Debt (how much GC is behind schedule). */ | ||
507 | MSize estimate; /* Estimate of memory actually in use. */ | ||
508 | MSize pause; /* Pause between successive GC cycles. */ | 590 | MSize pause; /* Pause between successive GC cycles. */ |
509 | } GCState; | 591 | } GCState; |
510 | 592 | ||
@@ -516,8 +598,8 @@ typedef struct global_State { | |||
516 | lua_Alloc allocf; /* Memory allocator. */ | 598 | lua_Alloc allocf; /* Memory allocator. */ |
517 | void *allocd; /* Memory allocator data. */ | 599 | void *allocd; /* Memory allocator data. */ |
518 | GCState gc; /* Garbage collector. */ | 600 | GCState gc; /* Garbage collector. */ |
519 | SBuf tmpbuf; /* Temporary buffer for string concatenation. */ | 601 | volatile int32_t vmstate; /* VM state or current JIT code trace number. */ |
520 | Node nilnode; /* Fallback 1-element hash part (nil key and value). */ | 602 | SBuf tmpbuf; /* Temporary string buffer. */ |
521 | GCstr strempty; /* Empty string. */ | 603 | GCstr strempty; /* Empty string. */ |
522 | uint8_t stremptyz; /* Zero terminator of empty string. */ | 604 | uint8_t stremptyz; /* Zero terminator of empty string. */ |
523 | uint8_t hookmask; /* Hook mask. */ | 605 | uint8_t hookmask; /* Hook mask. */ |
@@ -526,17 +608,17 @@ typedef struct global_State { | |||
526 | GCRef mainthref; /* Link to main thread. */ | 608 | GCRef mainthref; /* Link to main thread. */ |
527 | TValue registrytv; /* Anchor for registry. */ | 609 | TValue registrytv; /* Anchor for registry. */ |
528 | TValue tmptv, tmptv2; /* Temporary TValues. */ | 610 | TValue tmptv, tmptv2; /* Temporary TValues. */ |
611 | Node nilnode; /* Fallback 1-element hash part (nil key and value). */ | ||
529 | GCupval uvhead; /* Head of double-linked list of all open upvalues. */ | 612 | GCupval uvhead; /* Head of double-linked list of all open upvalues. */ |
530 | int32_t hookcount; /* Instruction hook countdown. */ | 613 | int32_t hookcount; /* Instruction hook countdown. */ |
531 | int32_t hookcstart; /* Start count for instruction hook counter. */ | 614 | int32_t hookcstart; /* Start count for instruction hook counter. */ |
532 | lua_Hook hookf; /* Hook function. */ | 615 | lua_Hook hookf; /* Hook function. */ |
533 | lua_CFunction wrapf; /* Wrapper for C function calls. */ | 616 | lua_CFunction wrapf; /* Wrapper for C function calls. */ |
534 | lua_CFunction panic; /* Called as a last resort for errors. */ | 617 | lua_CFunction panic; /* Called as a last resort for errors. */ |
535 | volatile int32_t vmstate; /* VM state or current JIT code trace number. */ | ||
536 | BCIns bc_cfunc_int; /* Bytecode for internal C function calls. */ | 618 | BCIns bc_cfunc_int; /* Bytecode for internal C function calls. */ |
537 | BCIns bc_cfunc_ext; /* Bytecode for external C function calls. */ | 619 | BCIns bc_cfunc_ext; /* Bytecode for external C function calls. */ |
538 | GCRef jit_L; /* Current JIT code lua_State or NULL. */ | 620 | GCRef cur_L; /* Currently executing lua_State. */ |
539 | MRef jit_base; /* Current JIT code L->base. */ | 621 | MRef jit_base; /* Current JIT code L->base or NULL. */ |
540 | MRef ctype_state; /* Pointer to C type state. */ | 622 | MRef ctype_state; /* Pointer to C type state. */ |
541 | GCRef gcroot[GCROOT_MAX]; /* GC roots. */ | 623 | GCRef gcroot[GCROOT_MAX]; /* GC roots. */ |
542 | } global_State; | 624 | } global_State; |
@@ -553,9 +635,11 @@ typedef struct global_State { | |||
553 | #define HOOK_ACTIVE_SHIFT 4 | 635 | #define HOOK_ACTIVE_SHIFT 4 |
554 | #define HOOK_VMEVENT 0x20 | 636 | #define HOOK_VMEVENT 0x20 |
555 | #define HOOK_GC 0x40 | 637 | #define HOOK_GC 0x40 |
638 | #define HOOK_PROFILE 0x80 | ||
556 | #define hook_active(g) ((g)->hookmask & HOOK_ACTIVE) | 639 | #define hook_active(g) ((g)->hookmask & HOOK_ACTIVE) |
557 | #define hook_enter(g) ((g)->hookmask |= HOOK_ACTIVE) | 640 | #define hook_enter(g) ((g)->hookmask |= HOOK_ACTIVE) |
558 | #define hook_entergc(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_GC)) | 641 | #define hook_entergc(g) \ |
642 | ((g)->hookmask = ((g)->hookmask | (HOOK_ACTIVE|HOOK_GC)) & ~HOOK_PROFILE) | ||
559 | #define hook_vmevent(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_VMEVENT)) | 643 | #define hook_vmevent(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_VMEVENT)) |
560 | #define hook_leave(g) ((g)->hookmask &= ~HOOK_ACTIVE) | 644 | #define hook_leave(g) ((g)->hookmask &= ~HOOK_ACTIVE) |
561 | #define hook_save(g) ((g)->hookmask & ~HOOK_EVENTMASK) | 645 | #define hook_save(g) ((g)->hookmask & ~HOOK_EVENTMASK) |
@@ -583,7 +667,13 @@ struct lua_State { | |||
583 | #define registry(L) (&G(L)->registrytv) | 667 | #define registry(L) (&G(L)->registrytv) |
584 | 668 | ||
585 | /* Macros to access the currently executing (Lua) function. */ | 669 | /* Macros to access the currently executing (Lua) function. */ |
670 | #if LJ_GC64 | ||
671 | #define curr_func(L) (&gcval(L->base-2)->fn) | ||
672 | #elif LJ_FR2 | ||
673 | #define curr_func(L) (&gcref((L->base-2)->gcr)->fn) | ||
674 | #else | ||
586 | #define curr_func(L) (&gcref((L->base-1)->fr.func)->fn) | 675 | #define curr_func(L) (&gcref((L->base-1)->fr.func)->fn) |
676 | #endif | ||
587 | #define curr_funcisL(L) (isluafunc(curr_func(L))) | 677 | #define curr_funcisL(L) (isluafunc(curr_func(L))) |
588 | #define curr_proto(L) (funcproto(curr_func(L))) | 678 | #define curr_proto(L) (funcproto(curr_func(L))) |
589 | #define curr_topL(L) (L->base + curr_proto(L)->framesize) | 679 | #define curr_topL(L) (L->base + curr_proto(L)->framesize) |
@@ -647,12 +737,17 @@ typedef union GCobj { | |||
647 | #endif | 737 | #endif |
648 | 738 | ||
649 | /* Macros to test types. */ | 739 | /* Macros to test types. */ |
740 | #if LJ_GC64 | ||
741 | #define itype(o) ((uint32_t)((o)->it64 >> 47)) | ||
742 | #define tvisnil(o) ((o)->it64 == -1) | ||
743 | #else | ||
650 | #define itype(o) ((o)->it) | 744 | #define itype(o) ((o)->it) |
651 | #define tvisnil(o) (itype(o) == LJ_TNIL) | 745 | #define tvisnil(o) (itype(o) == LJ_TNIL) |
746 | #endif | ||
652 | #define tvisfalse(o) (itype(o) == LJ_TFALSE) | 747 | #define tvisfalse(o) (itype(o) == LJ_TFALSE) |
653 | #define tvistrue(o) (itype(o) == LJ_TTRUE) | 748 | #define tvistrue(o) (itype(o) == LJ_TTRUE) |
654 | #define tvisbool(o) (tvisfalse(o) || tvistrue(o)) | 749 | #define tvisbool(o) (tvisfalse(o) || tvistrue(o)) |
655 | #if LJ_64 | 750 | #if LJ_64 && !LJ_GC64 |
656 | #define tvislightud(o) (((int32_t)itype(o) >> 15) == -2) | 751 | #define tvislightud(o) (((int32_t)itype(o) >> 15) == -2) |
657 | #else | 752 | #else |
658 | #define tvislightud(o) (itype(o) == LJ_TLIGHTUD) | 753 | #define tvislightud(o) (itype(o) == LJ_TLIGHTUD) |
@@ -686,7 +781,7 @@ typedef union GCobj { | |||
686 | #define rawnumequal(o1, o2) ((o1)->u64 == (o2)->u64) | 781 | #define rawnumequal(o1, o2) ((o1)->u64 == (o2)->u64) |
687 | 782 | ||
688 | /* Macros to convert type ids. */ | 783 | /* Macros to convert type ids. */ |
689 | #if LJ_64 | 784 | #if LJ_64 && !LJ_GC64 |
690 | #define itypemap(o) \ | 785 | #define itypemap(o) \ |
691 | (tvisnumber(o) ? ~LJ_TNUMX : tvislightud(o) ? ~LJ_TLIGHTUD : ~itype(o)) | 786 | (tvisnumber(o) ? ~LJ_TNUMX : tvislightud(o) ? ~LJ_TLIGHTUD : ~itype(o)) |
692 | #else | 787 | #else |
@@ -694,8 +789,12 @@ typedef union GCobj { | |||
694 | #endif | 789 | #endif |
695 | 790 | ||
696 | /* Macros to get tagged values. */ | 791 | /* Macros to get tagged values. */ |
792 | #if LJ_GC64 | ||
793 | #define gcval(o) ((GCobj *)(gcrefu((o)->gcr) & LJ_GCVMASK)) | ||
794 | #else | ||
697 | #define gcval(o) (gcref((o)->gcr)) | 795 | #define gcval(o) (gcref((o)->gcr)) |
698 | #define boolV(o) check_exp(tvisbool(o), (LJ_TFALSE - (o)->it)) | 796 | #endif |
797 | #define boolV(o) check_exp(tvisbool(o), (LJ_TFALSE - itype(o))) | ||
699 | #if LJ_64 | 798 | #if LJ_64 |
700 | #define lightudV(o) \ | 799 | #define lightudV(o) \ |
701 | check_exp(tvislightud(o), (void *)((o)->u64 & U64x(00007fff,ffffffff))) | 800 | check_exp(tvislightud(o), (void *)((o)->u64 & U64x(00007fff,ffffffff))) |
@@ -714,13 +813,23 @@ typedef union GCobj { | |||
714 | #define intV(o) check_exp(tvisint(o), (int32_t)(o)->i) | 813 | #define intV(o) check_exp(tvisint(o), (int32_t)(o)->i) |
715 | 814 | ||
716 | /* Macros to set tagged values. */ | 815 | /* Macros to set tagged values. */ |
816 | #if LJ_GC64 | ||
817 | #define setitype(o, i) ((o)->it = ((i) << 15)) | ||
818 | #define setnilV(o) ((o)->it64 = -1) | ||
819 | #define setpriV(o, x) ((o)->it64 = (int64_t)~((uint64_t)~(x)<<47)) | ||
820 | #define setboolV(o, x) ((o)->it64 = (int64_t)~((uint64_t)((x)+1)<<47)) | ||
821 | #else | ||
717 | #define setitype(o, i) ((o)->it = (i)) | 822 | #define setitype(o, i) ((o)->it = (i)) |
718 | #define setnilV(o) ((o)->it = LJ_TNIL) | 823 | #define setnilV(o) ((o)->it = LJ_TNIL) |
719 | #define setboolV(o, x) ((o)->it = LJ_TFALSE-(uint32_t)(x)) | 824 | #define setboolV(o, x) ((o)->it = LJ_TFALSE-(uint32_t)(x)) |
825 | #define setpriV(o, i) (setitype((o), (i))) | ||
826 | #endif | ||
720 | 827 | ||
721 | static LJ_AINLINE void setlightudV(TValue *o, void *p) | 828 | static LJ_AINLINE void setlightudV(TValue *o, void *p) |
722 | { | 829 | { |
723 | #if LJ_64 | 830 | #if LJ_GC64 |
831 | o->u64 = (uint64_t)p | (((uint64_t)LJ_TLIGHTUD) << 47); | ||
832 | #elif LJ_64 | ||
724 | o->u64 = (uint64_t)p | (((uint64_t)0xffff) << 48); | 833 | o->u64 = (uint64_t)p | (((uint64_t)0xffff) << 48); |
725 | #else | 834 | #else |
726 | setgcrefp(o->gcr, p); setitype(o, LJ_TLIGHTUD); | 835 | setgcrefp(o->gcr, p); setitype(o, LJ_TLIGHTUD); |
@@ -730,20 +839,39 @@ static LJ_AINLINE void setlightudV(TValue *o, void *p) | |||
730 | #if LJ_64 | 839 | #if LJ_64 |
731 | #define checklightudptr(L, p) \ | 840 | #define checklightudptr(L, p) \ |
732 | (((uint64_t)(p) >> 47) ? (lj_err_msg(L, LJ_ERR_BADLU), NULL) : (p)) | 841 | (((uint64_t)(p) >> 47) ? (lj_err_msg(L, LJ_ERR_BADLU), NULL) : (p)) |
842 | #else | ||
843 | #define checklightudptr(L, p) (p) | ||
844 | #endif | ||
845 | |||
846 | #if LJ_FR2 | ||
847 | #define contptr(f) ((void *)(f)) | ||
848 | #define setcont(o, f) ((o)->u64 = (uint64_t)(uintptr_t)contptr(f)) | ||
849 | #elif LJ_64 | ||
850 | #define contptr(f) \ | ||
851 | ((void *)(uintptr_t)(uint32_t)((intptr_t)(f) - (intptr_t)lj_vm_asm_begin)) | ||
733 | #define setcont(o, f) \ | 852 | #define setcont(o, f) \ |
734 | ((o)->u64 = (uint64_t)(void *)(f) - (uint64_t)lj_vm_asm_begin) | 853 | ((o)->u64 = (uint64_t)(void *)(f) - (uint64_t)lj_vm_asm_begin) |
735 | #else | 854 | #else |
736 | #define checklightudptr(L, p) (p) | 855 | #define contptr(f) ((void *)(f)) |
737 | #define setcont(o, f) setlightudV((o), (void *)(f)) | 856 | #define setcont(o, f) setlightudV((o), contptr(f)) |
738 | #endif | 857 | #endif |
739 | 858 | ||
740 | #define tvchecklive(L, o) \ | 859 | #define tvchecklive(L, o) \ |
741 | UNUSED(L), lua_assert(!tvisgcv(o) || \ | 860 | UNUSED(L), lua_assert(!tvisgcv(o) || \ |
742 | ((~itype(o) == gcval(o)->gch.gct) && !isdead(G(L), gcval(o)))) | 861 | ((~itype(o) == gcval(o)->gch.gct) && !isdead(G(L), gcval(o)))) |
743 | 862 | ||
744 | static LJ_AINLINE void setgcV(lua_State *L, TValue *o, GCobj *v, uint32_t itype) | 863 | static LJ_AINLINE void setgcVraw(TValue *o, GCobj *v, uint32_t itype) |
745 | { | 864 | { |
746 | setgcref(o->gcr, v); setitype(o, itype); tvchecklive(L, o); | 865 | #if LJ_GC64 |
866 | setgcreft(o->gcr, v, itype); | ||
867 | #else | ||
868 | setgcref(o->gcr, v); setitype(o, itype); | ||
869 | #endif | ||
870 | } | ||
871 | |||
872 | static LJ_AINLINE void setgcV(lua_State *L, TValue *o, GCobj *v, uint32_t it) | ||
873 | { | ||
874 | setgcVraw(o, v, it); tvchecklive(L, o); | ||
747 | } | 875 | } |
748 | 876 | ||
749 | #define define_setV(name, type, tag) \ | 877 | #define define_setV(name, type, tag) \ |
@@ -797,6 +925,9 @@ static LJ_AINLINE void copyTV(lua_State *L, TValue *o1, const TValue *o2) | |||
797 | 925 | ||
798 | #if LJ_SOFTFP | 926 | #if LJ_SOFTFP |
799 | LJ_ASMF int32_t lj_vm_tobit(double x); | 927 | LJ_ASMF int32_t lj_vm_tobit(double x); |
928 | #if LJ_TARGET_MIPS64 | ||
929 | LJ_ASMF int32_t lj_vm_tointg(double x); | ||
930 | #endif | ||
800 | #endif | 931 | #endif |
801 | 932 | ||
802 | static LJ_AINLINE int32_t lj_num2bit(lua_Number n) | 933 | static LJ_AINLINE int32_t lj_num2bit(lua_Number n) |
@@ -810,11 +941,7 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n) | |||
810 | #endif | 941 | #endif |
811 | } | 942 | } |
812 | 943 | ||
813 | #if LJ_TARGET_X86 && !defined(__SSE2__) | ||
814 | #define lj_num2int(n) lj_num2bit((n)) | ||
815 | #else | ||
816 | #define lj_num2int(n) ((int32_t)(n)) | 944 | #define lj_num2int(n) ((int32_t)(n)) |
817 | #endif | ||
818 | 945 | ||
819 | /* | 946 | /* |
820 | ** This must match the JIT backend behavior. In particular for archs | 947 | ** This must match the JIT backend behavior. In particular for archs |
@@ -859,6 +986,7 @@ LJ_DATA const char *const lj_obj_itypename[~LJ_TNUMX+1]; | |||
859 | #define lj_typename(o) (lj_obj_itypename[itypemap(o)]) | 986 | #define lj_typename(o) (lj_obj_itypename[itypemap(o)]) |
860 | 987 | ||
861 | /* Compare two objects without calling metamethods. */ | 988 | /* Compare two objects without calling metamethods. */ |
862 | LJ_FUNC int lj_obj_equal(cTValue *o1, cTValue *o2); | 989 | LJ_FUNC int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2); |
990 | LJ_FUNC const void * LJ_FASTCALL lj_obj_ptr(cTValue *o); | ||
863 | 991 | ||
864 | #endif | 992 | #endif |
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 928d3852..b4d05a26 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c | |||
@@ -14,18 +14,21 @@ | |||
14 | 14 | ||
15 | #if LJ_HASJIT | 15 | #if LJ_HASJIT |
16 | 16 | ||
17 | #include "lj_buf.h" | ||
17 | #include "lj_str.h" | 18 | #include "lj_str.h" |
18 | #include "lj_tab.h" | 19 | #include "lj_tab.h" |
19 | #include "lj_ir.h" | 20 | #include "lj_ir.h" |
20 | #include "lj_jit.h" | 21 | #include "lj_jit.h" |
22 | #include "lj_ircall.h" | ||
21 | #include "lj_iropt.h" | 23 | #include "lj_iropt.h" |
22 | #include "lj_trace.h" | 24 | #include "lj_trace.h" |
23 | #if LJ_HASFFI | 25 | #if LJ_HASFFI |
24 | #include "lj_ctype.h" | 26 | #include "lj_ctype.h" |
25 | #endif | ||
26 | #include "lj_carith.h" | 27 | #include "lj_carith.h" |
28 | #endif | ||
27 | #include "lj_vm.h" | 29 | #include "lj_vm.h" |
28 | #include "lj_strscan.h" | 30 | #include "lj_strscan.h" |
31 | #include "lj_strfmt.h" | ||
29 | 32 | ||
30 | /* Here's a short description how the FOLD engine processes instructions: | 33 | /* Here's a short description how the FOLD engine processes instructions: |
31 | ** | 34 | ** |
@@ -133,8 +136,8 @@ | |||
133 | /* Some local macros to save typing. Undef'd at the end. */ | 136 | /* Some local macros to save typing. Undef'd at the end. */ |
134 | #define IR(ref) (&J->cur.ir[(ref)]) | 137 | #define IR(ref) (&J->cur.ir[(ref)]) |
135 | #define fins (&J->fold.ins) | 138 | #define fins (&J->fold.ins) |
136 | #define fleft (&J->fold.left) | 139 | #define fleft (J->fold.left) |
137 | #define fright (&J->fold.right) | 140 | #define fright (J->fold.right) |
138 | #define knumleft (ir_knum(fleft)->n) | 141 | #define knumleft (ir_knum(fleft)->n) |
139 | #define knumright (ir_knum(fright)->n) | 142 | #define knumright (ir_knum(fright)->n) |
140 | 143 | ||
@@ -155,13 +158,14 @@ typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J); | |||
155 | 158 | ||
156 | /* Barrier to prevent folding across a GC step. | 159 | /* Barrier to prevent folding across a GC step. |
157 | ** GC steps can only happen at the head of a trace and at LOOP. | 160 | ** GC steps can only happen at the head of a trace and at LOOP. |
158 | ** And the GC is only driven forward if there is at least one allocation. | 161 | ** And the GC is only driven forward if there's at least one allocation. |
159 | */ | 162 | */ |
160 | #define gcstep_barrier(J, ref) \ | 163 | #define gcstep_barrier(J, ref) \ |
161 | ((ref) < J->chain[IR_LOOP] && \ | 164 | ((ref) < J->chain[IR_LOOP] && \ |
162 | (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \ | 165 | (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \ |
163 | J->chain[IR_TNEW] || J->chain[IR_TDUP] || \ | 166 | J->chain[IR_TNEW] || J->chain[IR_TDUP] || \ |
164 | J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR])) | 167 | J->chain[IR_CNEW] || J->chain[IR_CNEWI] || \ |
168 | J->chain[IR_BUFSTR] || J->chain[IR_TOSTR] || J->chain[IR_CALLA])) | ||
165 | 169 | ||
166 | /* -- Constant folding for FP numbers ------------------------------------- */ | 170 | /* -- Constant folding for FP numbers ------------------------------------- */ |
167 | 171 | ||
@@ -169,8 +173,6 @@ LJFOLD(ADD KNUM KNUM) | |||
169 | LJFOLD(SUB KNUM KNUM) | 173 | LJFOLD(SUB KNUM KNUM) |
170 | LJFOLD(MUL KNUM KNUM) | 174 | LJFOLD(MUL KNUM KNUM) |
171 | LJFOLD(DIV KNUM KNUM) | 175 | LJFOLD(DIV KNUM KNUM) |
172 | LJFOLD(NEG KNUM KNUM) | ||
173 | LJFOLD(ABS KNUM KNUM) | ||
174 | LJFOLD(ATAN2 KNUM KNUM) | 176 | LJFOLD(ATAN2 KNUM KNUM) |
175 | LJFOLD(LDEXP KNUM KNUM) | 177 | LJFOLD(LDEXP KNUM KNUM) |
176 | LJFOLD(MIN KNUM KNUM) | 178 | LJFOLD(MIN KNUM KNUM) |
@@ -183,6 +185,15 @@ LJFOLDF(kfold_numarith) | |||
183 | return lj_ir_knum(J, y); | 185 | return lj_ir_knum(J, y); |
184 | } | 186 | } |
185 | 187 | ||
188 | LJFOLD(NEG KNUM FLOAD) | ||
189 | LJFOLD(ABS KNUM FLOAD) | ||
190 | LJFOLDF(kfold_numabsneg) | ||
191 | { | ||
192 | lua_Number a = knumleft; | ||
193 | lua_Number y = lj_vm_foldarith(a, a, fins->o - IR_ADD); | ||
194 | return lj_ir_knum(J, y); | ||
195 | } | ||
196 | |||
186 | LJFOLD(LDEXP KNUM KINT) | 197 | LJFOLD(LDEXP KNUM KINT) |
187 | LJFOLDF(kfold_ldexp) | 198 | LJFOLDF(kfold_ldexp) |
188 | { | 199 | { |
@@ -336,15 +347,18 @@ LJFOLDF(kfold_intcomp0) | |||
336 | static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) | 347 | static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) |
337 | { | 348 | { |
338 | switch (op) { | 349 | switch (op) { |
339 | #if LJ_64 || LJ_HASFFI | 350 | #if LJ_HASFFI |
340 | case IR_ADD: k1 += k2; break; | 351 | case IR_ADD: k1 += k2; break; |
341 | case IR_SUB: k1 -= k2; break; | 352 | case IR_SUB: k1 -= k2; break; |
342 | #endif | ||
343 | #if LJ_HASFFI | ||
344 | case IR_MUL: k1 *= k2; break; | 353 | case IR_MUL: k1 *= k2; break; |
345 | case IR_BAND: k1 &= k2; break; | 354 | case IR_BAND: k1 &= k2; break; |
346 | case IR_BOR: k1 |= k2; break; | 355 | case IR_BOR: k1 |= k2; break; |
347 | case IR_BXOR: k1 ^= k2; break; | 356 | case IR_BXOR: k1 ^= k2; break; |
357 | case IR_BSHL: k1 <<= (k2 & 63); break; | ||
358 | case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 63)); break; | ||
359 | case IR_BSAR: k1 >>= (k2 & 63); break; | ||
360 | case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 63)); break; | ||
361 | case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 63)); break; | ||
348 | #endif | 362 | #endif |
349 | default: UNUSED(k2); lua_assert(0); break; | 363 | default: UNUSED(k2); lua_assert(0); break; |
350 | } | 364 | } |
@@ -392,20 +406,10 @@ LJFOLD(BROL KINT64 KINT) | |||
392 | LJFOLD(BROR KINT64 KINT) | 406 | LJFOLD(BROR KINT64 KINT) |
393 | LJFOLDF(kfold_int64shift) | 407 | LJFOLDF(kfold_int64shift) |
394 | { | 408 | { |
395 | #if LJ_HASFFI || LJ_64 | 409 | #if LJ_HASFFI |
396 | uint64_t k = ir_k64(fleft)->u64; | 410 | uint64_t k = ir_k64(fleft)->u64; |
397 | int32_t sh = (fright->i & 63); | 411 | int32_t sh = (fright->i & 63); |
398 | switch ((IROp)fins->o) { | 412 | return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL)); |
399 | case IR_BSHL: k <<= sh; break; | ||
400 | #if LJ_HASFFI | ||
401 | case IR_BSHR: k >>= sh; break; | ||
402 | case IR_BSAR: k = (uint64_t)((int64_t)k >> sh); break; | ||
403 | case IR_BROL: k = lj_rol(k, sh); break; | ||
404 | case IR_BROR: k = lj_ror(k, sh); break; | ||
405 | #endif | ||
406 | default: lua_assert(0); break; | ||
407 | } | ||
408 | return INT64FOLD(k); | ||
409 | #else | 413 | #else |
410 | UNUSED(J); lua_assert(0); return FAILFOLD; | 414 | UNUSED(J); lua_assert(0); return FAILFOLD; |
411 | #endif | 415 | #endif |
@@ -510,7 +514,7 @@ LJFOLDF(kfold_strref_snew) | |||
510 | PHIBARRIER(ir); | 514 | PHIBARRIER(ir); |
511 | fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */ | 515 | fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */ |
512 | fins->op1 = str; | 516 | fins->op1 = str; |
513 | fins->ot = IRT(IR_STRREF, IRT_P32); | 517 | fins->ot = IRT(IR_STRREF, IRT_PGC); |
514 | return RETRYFOLD; | 518 | return RETRYFOLD; |
515 | } | 519 | } |
516 | } | 520 | } |
@@ -528,6 +532,180 @@ LJFOLDF(kfold_strcmp) | |||
528 | return NEXTFOLD; | 532 | return NEXTFOLD; |
529 | } | 533 | } |
530 | 534 | ||
535 | /* -- Constant folding and forwarding for buffers ------------------------- */ | ||
536 | |||
537 | /* | ||
538 | ** Buffer ops perform stores, but their effect is limited to the buffer | ||
539 | ** itself. Also, buffer ops are chained: a use of an op implies a use of | ||
540 | ** all other ops up the chain. Conversely, if an op is unused, all ops | ||
541 | ** up the chain can go unsed. This largely eliminates the need to treat | ||
542 | ** them as stores. | ||
543 | ** | ||
544 | ** Alas, treating them as normal (IRM_N) ops doesn't work, because they | ||
545 | ** cannot be CSEd in isolation. CSE for IRM_N is implicitly done in LOOP | ||
546 | ** or if FOLD is disabled. | ||
547 | ** | ||
548 | ** The compromise is to declare them as loads, emit them like stores and | ||
549 | ** CSE whole chains manually when the BUFSTR is to be emitted. Any chain | ||
550 | ** fragments left over from CSE are eliminated by DCE. | ||
551 | */ | ||
552 | |||
553 | /* BUFHDR is emitted like a store, see below. */ | ||
554 | |||
555 | LJFOLD(BUFPUT BUFHDR BUFSTR) | ||
556 | LJFOLDF(bufput_append) | ||
557 | { | ||
558 | /* New buffer, no other buffer op inbetween and same buffer? */ | ||
559 | if ((J->flags & JIT_F_OPT_FWD) && | ||
560 | !(fleft->op2 & IRBUFHDR_APPEND) && | ||
561 | fleft->prev == fright->op2 && | ||
562 | fleft->op1 == IR(fright->op2)->op1) { | ||
563 | IRRef ref = fins->op1; | ||
564 | IR(ref)->op2 = (fleft->op2 | IRBUFHDR_APPEND); /* Modify BUFHDR. */ | ||
565 | IR(ref)->op1 = fright->op1; | ||
566 | return ref; | ||
567 | } | ||
568 | return EMITFOLD; /* Always emit, CSE later. */ | ||
569 | } | ||
570 | |||
571 | LJFOLD(BUFPUT any any) | ||
572 | LJFOLDF(bufput_kgc) | ||
573 | { | ||
574 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fright->o == IR_KGC) { | ||
575 | GCstr *s2 = ir_kstr(fright); | ||
576 | if (s2->len == 0) { /* Empty string? */ | ||
577 | return LEFTFOLD; | ||
578 | } else { | ||
579 | if (fleft->o == IR_BUFPUT && irref_isk(fleft->op2) && | ||
580 | !irt_isphi(fleft->t)) { /* Join two constant string puts in a row. */ | ||
581 | GCstr *s1 = ir_kstr(IR(fleft->op2)); | ||
582 | IRRef kref = lj_ir_kstr(J, lj_buf_cat2str(J->L, s1, s2)); | ||
583 | /* lj_ir_kstr() may realloc the IR and invalidates any IRIns *. */ | ||
584 | IR(fins->op1)->op2 = kref; /* Modify previous BUFPUT. */ | ||
585 | return fins->op1; | ||
586 | } | ||
587 | } | ||
588 | } | ||
589 | return EMITFOLD; /* Always emit, CSE later. */ | ||
590 | } | ||
591 | |||
592 | LJFOLD(BUFSTR any any) | ||
593 | LJFOLDF(bufstr_kfold_cse) | ||
594 | { | ||
595 | lua_assert(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT || | ||
596 | fleft->o == IR_CALLL); | ||
597 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) { | ||
598 | if (fleft->o == IR_BUFHDR) { /* No put operations? */ | ||
599 | if (!(fleft->op2 & IRBUFHDR_APPEND)) /* Empty buffer? */ | ||
600 | return lj_ir_kstr(J, &J2G(J)->strempty); | ||
601 | fins->op1 = fleft->op1; | ||
602 | fins->op2 = fleft->prev; /* Relies on checks in bufput_append. */ | ||
603 | return CSEFOLD; | ||
604 | } else if (fleft->o == IR_BUFPUT) { | ||
605 | IRIns *irb = IR(fleft->op1); | ||
606 | if (irb->o == IR_BUFHDR && !(irb->op2 & IRBUFHDR_APPEND)) | ||
607 | return fleft->op2; /* Shortcut for a single put operation. */ | ||
608 | } | ||
609 | } | ||
610 | /* Try to CSE the whole chain. */ | ||
611 | if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { | ||
612 | IRRef ref = J->chain[IR_BUFSTR]; | ||
613 | while (ref) { | ||
614 | IRIns *irs = IR(ref), *ira = fleft, *irb = IR(irs->op1); | ||
615 | while (ira->o == irb->o && ira->op2 == irb->op2) { | ||
616 | lua_assert(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT || | ||
617 | ira->o == IR_CALLL || ira->o == IR_CARG); | ||
618 | if (ira->o == IR_BUFHDR && !(ira->op2 & IRBUFHDR_APPEND)) | ||
619 | return ref; /* CSE succeeded. */ | ||
620 | if (ira->o == IR_CALLL && ira->op2 == IRCALL_lj_buf_puttab) | ||
621 | break; | ||
622 | ira = IR(ira->op1); | ||
623 | irb = IR(irb->op1); | ||
624 | } | ||
625 | ref = irs->prev; | ||
626 | } | ||
627 | } | ||
628 | return EMITFOLD; /* No CSE possible. */ | ||
629 | } | ||
630 | |||
631 | LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_reverse) | ||
632 | LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_upper) | ||
633 | LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_lower) | ||
634 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putquoted) | ||
635 | LJFOLDF(bufput_kfold_op) | ||
636 | { | ||
637 | if (irref_isk(fleft->op2)) { | ||
638 | const CCallInfo *ci = &lj_ir_callinfo[fins->op2]; | ||
639 | SBuf *sb = lj_buf_tmp_(J->L); | ||
640 | sb = ((SBuf * (LJ_FASTCALL *)(SBuf *, GCstr *))ci->func)(sb, | ||
641 | ir_kstr(IR(fleft->op2))); | ||
642 | fins->o = IR_BUFPUT; | ||
643 | fins->op1 = fleft->op1; | ||
644 | fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb)); | ||
645 | return RETRYFOLD; | ||
646 | } | ||
647 | return EMITFOLD; /* Always emit, CSE later. */ | ||
648 | } | ||
649 | |||
650 | LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_rep) | ||
651 | LJFOLDF(bufput_kfold_rep) | ||
652 | { | ||
653 | if (irref_isk(fleft->op2)) { | ||
654 | IRIns *irc = IR(fleft->op1); | ||
655 | if (irref_isk(irc->op2)) { | ||
656 | SBuf *sb = lj_buf_tmp_(J->L); | ||
657 | sb = lj_buf_putstr_rep(sb, ir_kstr(IR(irc->op2)), IR(fleft->op2)->i); | ||
658 | fins->o = IR_BUFPUT; | ||
659 | fins->op1 = irc->op1; | ||
660 | fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb)); | ||
661 | return RETRYFOLD; | ||
662 | } | ||
663 | } | ||
664 | return EMITFOLD; /* Always emit, CSE later. */ | ||
665 | } | ||
666 | |||
667 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfxint) | ||
668 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_int) | ||
669 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_uint) | ||
670 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum) | ||
671 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfstr) | ||
672 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfchar) | ||
673 | LJFOLDF(bufput_kfold_fmt) | ||
674 | { | ||
675 | IRIns *irc = IR(fleft->op1); | ||
676 | lua_assert(irref_isk(irc->op2)); /* SFormat must be const. */ | ||
677 | if (irref_isk(fleft->op2)) { | ||
678 | SFormat sf = (SFormat)IR(irc->op2)->i; | ||
679 | IRIns *ira = IR(fleft->op2); | ||
680 | SBuf *sb = lj_buf_tmp_(J->L); | ||
681 | switch (fins->op2) { | ||
682 | case IRCALL_lj_strfmt_putfxint: | ||
683 | sb = lj_strfmt_putfxint(sb, sf, ir_k64(ira)->u64); | ||
684 | break; | ||
685 | case IRCALL_lj_strfmt_putfstr: | ||
686 | sb = lj_strfmt_putfstr(sb, sf, ir_kstr(ira)); | ||
687 | break; | ||
688 | case IRCALL_lj_strfmt_putfchar: | ||
689 | sb = lj_strfmt_putfchar(sb, sf, ira->i); | ||
690 | break; | ||
691 | case IRCALL_lj_strfmt_putfnum_int: | ||
692 | case IRCALL_lj_strfmt_putfnum_uint: | ||
693 | case IRCALL_lj_strfmt_putfnum: | ||
694 | default: { | ||
695 | const CCallInfo *ci = &lj_ir_callinfo[fins->op2]; | ||
696 | sb = ((SBuf * (*)(SBuf *, SFormat, lua_Number))ci->func)(sb, sf, | ||
697 | ir_knum(ira)->n); | ||
698 | break; | ||
699 | } | ||
700 | } | ||
701 | fins->o = IR_BUFPUT; | ||
702 | fins->op1 = irc->op1; | ||
703 | fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb)); | ||
704 | return RETRYFOLD; | ||
705 | } | ||
706 | return EMITFOLD; /* Always emit, CSE later. */ | ||
707 | } | ||
708 | |||
531 | /* -- Constant folding of pointer arithmetic ------------------------------ */ | 709 | /* -- Constant folding of pointer arithmetic ------------------------------ */ |
532 | 710 | ||
533 | LJFOLD(ADD KGC KINT) | 711 | LJFOLD(ADD KGC KINT) |
@@ -648,27 +826,22 @@ LJFOLD(CONV KNUM IRCONV_INT_NUM) | |||
648 | LJFOLDF(kfold_conv_knum_int_num) | 826 | LJFOLDF(kfold_conv_knum_int_num) |
649 | { | 827 | { |
650 | lua_Number n = knumleft; | 828 | lua_Number n = knumleft; |
651 | if (!(fins->op2 & IRCONV_TRUNC)) { | 829 | int32_t k = lj_num2int(n); |
652 | int32_t k = lj_num2int(n); | 830 | if (irt_isguard(fins->t) && n != (lua_Number)k) { |
653 | if (irt_isguard(fins->t) && n != (lua_Number)k) { | 831 | /* We're about to create a guard which always fails, like CONV +1.5. |
654 | /* We're about to create a guard which always fails, like CONV +1.5. | 832 | ** Some pathological loops cause this during LICM, e.g.: |
655 | ** Some pathological loops cause this during LICM, e.g.: | 833 | ** local x,k,t = 0,1.5,{1,[1.5]=2} |
656 | ** local x,k,t = 0,1.5,{1,[1.5]=2} | 834 | ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end |
657 | ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end | 835 | ** assert(x == 300) |
658 | ** assert(x == 300) | 836 | */ |
659 | */ | 837 | return FAILFOLD; |
660 | return FAILFOLD; | ||
661 | } | ||
662 | return INTFOLD(k); | ||
663 | } else { | ||
664 | return INTFOLD((int32_t)n); | ||
665 | } | 838 | } |
839 | return INTFOLD(k); | ||
666 | } | 840 | } |
667 | 841 | ||
668 | LJFOLD(CONV KNUM IRCONV_U32_NUM) | 842 | LJFOLD(CONV KNUM IRCONV_U32_NUM) |
669 | LJFOLDF(kfold_conv_knum_u32_num) | 843 | LJFOLDF(kfold_conv_knum_u32_num) |
670 | { | 844 | { |
671 | lua_assert((fins->op2 & IRCONV_TRUNC)); | ||
672 | #ifdef _MSC_VER | 845 | #ifdef _MSC_VER |
673 | { /* Workaround for MSVC bug. */ | 846 | { /* Workaround for MSVC bug. */ |
674 | volatile uint32_t u = (uint32_t)knumleft; | 847 | volatile uint32_t u = (uint32_t)knumleft; |
@@ -682,27 +855,27 @@ LJFOLDF(kfold_conv_knum_u32_num) | |||
682 | LJFOLD(CONV KNUM IRCONV_I64_NUM) | 855 | LJFOLD(CONV KNUM IRCONV_I64_NUM) |
683 | LJFOLDF(kfold_conv_knum_i64_num) | 856 | LJFOLDF(kfold_conv_knum_i64_num) |
684 | { | 857 | { |
685 | lua_assert((fins->op2 & IRCONV_TRUNC)); | ||
686 | return INT64FOLD((uint64_t)(int64_t)knumleft); | 858 | return INT64FOLD((uint64_t)(int64_t)knumleft); |
687 | } | 859 | } |
688 | 860 | ||
689 | LJFOLD(CONV KNUM IRCONV_U64_NUM) | 861 | LJFOLD(CONV KNUM IRCONV_U64_NUM) |
690 | LJFOLDF(kfold_conv_knum_u64_num) | 862 | LJFOLDF(kfold_conv_knum_u64_num) |
691 | { | 863 | { |
692 | lua_assert((fins->op2 & IRCONV_TRUNC)); | ||
693 | return INT64FOLD(lj_num2u64(knumleft)); | 864 | return INT64FOLD(lj_num2u64(knumleft)); |
694 | } | 865 | } |
695 | 866 | ||
696 | LJFOLD(TOSTR KNUM) | 867 | LJFOLD(TOSTR KNUM any) |
697 | LJFOLDF(kfold_tostr_knum) | 868 | LJFOLDF(kfold_tostr_knum) |
698 | { | 869 | { |
699 | return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft)); | 870 | return lj_ir_kstr(J, lj_strfmt_num(J->L, ir_knum(fleft))); |
700 | } | 871 | } |
701 | 872 | ||
702 | LJFOLD(TOSTR KINT) | 873 | LJFOLD(TOSTR KINT any) |
703 | LJFOLDF(kfold_tostr_kint) | 874 | LJFOLDF(kfold_tostr_kint) |
704 | { | 875 | { |
705 | return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i)); | 876 | return lj_ir_kstr(J, fins->op2 == IRTOSTR_INT ? |
877 | lj_strfmt_int(J->L, fleft->i) : | ||
878 | lj_strfmt_char(J->L, fleft->i)); | ||
706 | } | 879 | } |
707 | 880 | ||
708 | LJFOLD(STRTO KGC) | 881 | LJFOLD(STRTO KGC) |
@@ -750,13 +923,13 @@ LJFOLDF(shortcut_round) | |||
750 | return NEXTFOLD; | 923 | return NEXTFOLD; |
751 | } | 924 | } |
752 | 925 | ||
753 | LJFOLD(ABS ABS KNUM) | 926 | LJFOLD(ABS ABS FLOAD) |
754 | LJFOLDF(shortcut_left) | 927 | LJFOLDF(shortcut_left) |
755 | { | 928 | { |
756 | return LEFTFOLD; /* f(g(x)) ==> g(x) */ | 929 | return LEFTFOLD; /* f(g(x)) ==> g(x) */ |
757 | } | 930 | } |
758 | 931 | ||
759 | LJFOLD(ABS NEG KNUM) | 932 | LJFOLD(ABS NEG FLOAD) |
760 | LJFOLDF(shortcut_dropleft) | 933 | LJFOLDF(shortcut_dropleft) |
761 | { | 934 | { |
762 | PHIBARRIER(fleft); | 935 | PHIBARRIER(fleft); |
@@ -837,8 +1010,10 @@ LJFOLDF(simplify_nummuldiv_k) | |||
837 | if (n == 1.0) { /* x o 1 ==> x */ | 1010 | if (n == 1.0) { /* x o 1 ==> x */ |
838 | return LEFTFOLD; | 1011 | return LEFTFOLD; |
839 | } else if (n == -1.0) { /* x o -1 ==> -x */ | 1012 | } else if (n == -1.0) { /* x o -1 ==> -x */ |
1013 | IRRef op1 = fins->op1; | ||
1014 | fins->op2 = (IRRef1)lj_ir_ksimd(J, LJ_KSIMD_NEG); /* Modifies fins. */ | ||
1015 | fins->op1 = op1; | ||
840 | fins->o = IR_NEG; | 1016 | fins->o = IR_NEG; |
841 | fins->op2 = (IRRef1)lj_ir_knum_neg(J); | ||
842 | return RETRYFOLD; | 1017 | return RETRYFOLD; |
843 | } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */ | 1018 | } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */ |
844 | fins->o = IR_ADD; | 1019 | fins->o = IR_ADD; |
@@ -1205,7 +1380,9 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k) | |||
1205 | ** But this is mainly intended for simple address arithmetic. | 1380 | ** But this is mainly intended for simple address arithmetic. |
1206 | ** Also it's easier for the backend to optimize the original multiplies. | 1381 | ** Also it's easier for the backend to optimize the original multiplies. |
1207 | */ | 1382 | */ |
1208 | if (k == 1) { /* i * 1 ==> i */ | 1383 | if (k == 0) { /* i * 0 ==> 0 */ |
1384 | return RIGHTFOLD; | ||
1385 | } else if (k == 1) { /* i * 1 ==> i */ | ||
1209 | return LEFTFOLD; | 1386 | return LEFTFOLD; |
1210 | } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */ | 1387 | } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */ |
1211 | fins->o = IR_BSHL; | 1388 | fins->o = IR_BSHL; |
@@ -1218,9 +1395,7 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k) | |||
1218 | LJFOLD(MUL any KINT) | 1395 | LJFOLD(MUL any KINT) |
1219 | LJFOLDF(simplify_intmul_k32) | 1396 | LJFOLDF(simplify_intmul_k32) |
1220 | { | 1397 | { |
1221 | if (fright->i == 0) /* i * 0 ==> 0 */ | 1398 | if (fright->i >= 0) |
1222 | return INTFOLD(0); | ||
1223 | else if (fright->i > 0) | ||
1224 | return simplify_intmul_k(J, fright->i); | 1399 | return simplify_intmul_k(J, fright->i); |
1225 | return NEXTFOLD; | 1400 | return NEXTFOLD; |
1226 | } | 1401 | } |
@@ -1228,14 +1403,13 @@ LJFOLDF(simplify_intmul_k32) | |||
1228 | LJFOLD(MUL any KINT64) | 1403 | LJFOLD(MUL any KINT64) |
1229 | LJFOLDF(simplify_intmul_k64) | 1404 | LJFOLDF(simplify_intmul_k64) |
1230 | { | 1405 | { |
1231 | if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */ | 1406 | #if LJ_HASFFI |
1232 | return INT64FOLD(0); | 1407 | if (ir_kint64(fright)->u64 < 0x80000000u) |
1233 | #if LJ_64 | ||
1234 | /* NYI: SPLIT for BSHL and 32 bit backend support. */ | ||
1235 | else if (ir_kint64(fright)->u64 < 0x80000000u) | ||
1236 | return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64); | 1408 | return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64); |
1237 | #endif | ||
1238 | return NEXTFOLD; | 1409 | return NEXTFOLD; |
1410 | #else | ||
1411 | UNUSED(J); lua_assert(0); return FAILFOLD; | ||
1412 | #endif | ||
1239 | } | 1413 | } |
1240 | 1414 | ||
1241 | LJFOLD(MOD any KINT) | 1415 | LJFOLD(MOD any KINT) |
@@ -1491,6 +1665,14 @@ LJFOLDF(simplify_shiftk_andk) | |||
1491 | fins->op2 = (IRRef1)lj_ir_kint(J, k); | 1665 | fins->op2 = (IRRef1)lj_ir_kint(J, k); |
1492 | fins->ot = IRTI(IR_BAND); | 1666 | fins->ot = IRTI(IR_BAND); |
1493 | return RETRYFOLD; | 1667 | return RETRYFOLD; |
1668 | } else if (irk->o == IR_KINT64) { | ||
1669 | uint64_t k = kfold_int64arith(ir_k64(irk)->u64, fright->i, (IROp)fins->o); | ||
1670 | IROpT ot = fleft->ot; | ||
1671 | fins->op1 = fleft->op1; | ||
1672 | fins->op1 = (IRRef1)lj_opt_fold(J); | ||
1673 | fins->op2 = (IRRef1)lj_ir_kint64(J, k); | ||
1674 | fins->ot = ot; | ||
1675 | return RETRYFOLD; | ||
1494 | } | 1676 | } |
1495 | return NEXTFOLD; | 1677 | return NEXTFOLD; |
1496 | } | 1678 | } |
@@ -1506,6 +1688,47 @@ LJFOLDF(simplify_andk_shiftk) | |||
1506 | return NEXTFOLD; | 1688 | return NEXTFOLD; |
1507 | } | 1689 | } |
1508 | 1690 | ||
1691 | LJFOLD(BAND BOR KINT) | ||
1692 | LJFOLD(BOR BAND KINT) | ||
1693 | LJFOLDF(simplify_andor_k) | ||
1694 | { | ||
1695 | IRIns *irk = IR(fleft->op2); | ||
1696 | PHIBARRIER(fleft); | ||
1697 | if (irk->o == IR_KINT) { | ||
1698 | int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o); | ||
1699 | /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */ | ||
1700 | /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */ | ||
1701 | if (k == (fins->o == IR_BAND ? 0 : -1)) { | ||
1702 | fins->op1 = fleft->op1; | ||
1703 | return RETRYFOLD; | ||
1704 | } | ||
1705 | } | ||
1706 | return NEXTFOLD; | ||
1707 | } | ||
1708 | |||
1709 | LJFOLD(BAND BOR KINT64) | ||
1710 | LJFOLD(BOR BAND KINT64) | ||
1711 | LJFOLDF(simplify_andor_k64) | ||
1712 | { | ||
1713 | #if LJ_HASFFI | ||
1714 | IRIns *irk = IR(fleft->op2); | ||
1715 | PHIBARRIER(fleft); | ||
1716 | if (irk->o == IR_KINT64) { | ||
1717 | uint64_t k = kfold_int64arith(ir_k64(irk)->u64, | ||
1718 | ir_k64(fright)->u64, (IROp)fins->o); | ||
1719 | /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */ | ||
1720 | /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */ | ||
1721 | if (k == (fins->o == IR_BAND ? (uint64_t)0 : ~(uint64_t)0)) { | ||
1722 | fins->op1 = fleft->op1; | ||
1723 | return RETRYFOLD; | ||
1724 | } | ||
1725 | } | ||
1726 | return NEXTFOLD; | ||
1727 | #else | ||
1728 | UNUSED(J); lua_assert(0); return FAILFOLD; | ||
1729 | #endif | ||
1730 | } | ||
1731 | |||
1509 | /* -- Reassociation ------------------------------------------------------- */ | 1732 | /* -- Reassociation ------------------------------------------------------- */ |
1510 | 1733 | ||
1511 | LJFOLD(ADD ADD KINT) | 1734 | LJFOLD(ADD ADD KINT) |
@@ -1535,7 +1758,7 @@ LJFOLD(BOR BOR KINT64) | |||
1535 | LJFOLD(BXOR BXOR KINT64) | 1758 | LJFOLD(BXOR BXOR KINT64) |
1536 | LJFOLDF(reassoc_intarith_k64) | 1759 | LJFOLDF(reassoc_intarith_k64) |
1537 | { | 1760 | { |
1538 | #if LJ_HASFFI || LJ_64 | 1761 | #if LJ_HASFFI |
1539 | IRIns *irk = IR(fleft->op2); | 1762 | IRIns *irk = IR(fleft->op2); |
1540 | if (irk->o == IR_KINT64) { | 1763 | if (irk->o == IR_KINT64) { |
1541 | uint64_t k = kfold_int64arith(ir_k64(irk)->u64, | 1764 | uint64_t k = kfold_int64arith(ir_k64(irk)->u64, |
@@ -1953,6 +2176,7 @@ LJFOLDF(fwd_href_tdup) | |||
1953 | ** an aliased table, as it may invalidate all of the pointers and fields. | 2176 | ** an aliased table, as it may invalidate all of the pointers and fields. |
1954 | ** Only HREF needs the NEWREF check -- AREF and HREFK already depend on | 2177 | ** Only HREF needs the NEWREF check -- AREF and HREFK already depend on |
1955 | ** FLOADs. And NEWREF itself is treated like a store (see below). | 2178 | ** FLOADs. And NEWREF itself is treated like a store (see below). |
2179 | ** LREF is constant (per trace) since coroutine switches are not inlined. | ||
1956 | */ | 2180 | */ |
1957 | LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE) | 2181 | LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE) |
1958 | LJFOLDF(fload_tab_tnew_asize) | 2182 | LJFOLDF(fload_tab_tnew_asize) |
@@ -2016,6 +2240,14 @@ LJFOLDF(fload_str_len_snew) | |||
2016 | return NEXTFOLD; | 2240 | return NEXTFOLD; |
2017 | } | 2241 | } |
2018 | 2242 | ||
2243 | LJFOLD(FLOAD TOSTR IRFL_STR_LEN) | ||
2244 | LJFOLDF(fload_str_len_tostr) | ||
2245 | { | ||
2246 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fleft->op2 == IRTOSTR_CHAR) | ||
2247 | return INTFOLD(1); | ||
2248 | return NEXTFOLD; | ||
2249 | } | ||
2250 | |||
2019 | /* The C type ID of cdata objects is immutable. */ | 2251 | /* The C type ID of cdata objects is immutable. */ |
2020 | LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID) | 2252 | LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID) |
2021 | LJFOLDF(fload_cdata_typeid_kgc) | 2253 | LJFOLDF(fload_cdata_typeid_kgc) |
@@ -2062,6 +2294,8 @@ LJFOLDF(fload_cdata_ptr_int64_cnew) | |||
2062 | } | 2294 | } |
2063 | 2295 | ||
2064 | LJFOLD(FLOAD any IRFL_STR_LEN) | 2296 | LJFOLD(FLOAD any IRFL_STR_LEN) |
2297 | LJFOLD(FLOAD any IRFL_FUNC_ENV) | ||
2298 | LJFOLD(FLOAD any IRFL_THREAD_ENV) | ||
2065 | LJFOLD(FLOAD any IRFL_CDATA_CTYPEID) | 2299 | LJFOLD(FLOAD any IRFL_CDATA_CTYPEID) |
2066 | LJFOLD(FLOAD any IRFL_CDATA_PTR) | 2300 | LJFOLD(FLOAD any IRFL_CDATA_PTR) |
2067 | LJFOLD(FLOAD any IRFL_CDATA_INT) | 2301 | LJFOLD(FLOAD any IRFL_CDATA_INT) |
@@ -2127,6 +2361,17 @@ LJFOLDF(barrier_tnew_tdup) | |||
2127 | return DROPFOLD; | 2361 | return DROPFOLD; |
2128 | } | 2362 | } |
2129 | 2363 | ||
2364 | /* -- Profiling ----------------------------------------------------------- */ | ||
2365 | |||
2366 | LJFOLD(PROF any any) | ||
2367 | LJFOLDF(prof) | ||
2368 | { | ||
2369 | IRRef ref = J->chain[IR_PROF]; | ||
2370 | if (ref+1 == J->cur.nins) /* Drop neighbouring IR_PROF. */ | ||
2371 | return ref; | ||
2372 | return EMITFOLD; | ||
2373 | } | ||
2374 | |||
2130 | /* -- Stores and allocations ---------------------------------------------- */ | 2375 | /* -- Stores and allocations ---------------------------------------------- */ |
2131 | 2376 | ||
2132 | /* Stores and allocations cannot be folded or passed on to CSE in general. | 2377 | /* Stores and allocations cannot be folded or passed on to CSE in general. |
@@ -2149,8 +2394,9 @@ LJFOLD(XSTORE any any) | |||
2149 | LJFOLDX(lj_opt_dse_xstore) | 2394 | LJFOLDX(lj_opt_dse_xstore) |
2150 | 2395 | ||
2151 | LJFOLD(NEWREF any any) /* Treated like a store. */ | 2396 | LJFOLD(NEWREF any any) /* Treated like a store. */ |
2152 | LJFOLD(CALLS any any) | 2397 | LJFOLD(CALLA any any) |
2153 | LJFOLD(CALLL any any) /* Safeguard fallback. */ | 2398 | LJFOLD(CALLL any any) /* Safeguard fallback. */ |
2399 | LJFOLD(CALLS any any) | ||
2154 | LJFOLD(CALLXS any any) | 2400 | LJFOLD(CALLXS any any) |
2155 | LJFOLD(XBAR) | 2401 | LJFOLD(XBAR) |
2156 | LJFOLD(RETF any any) /* Modifies BASE. */ | 2402 | LJFOLD(RETF any any) /* Modifies BASE. */ |
@@ -2158,6 +2404,7 @@ LJFOLD(TNEW any any) | |||
2158 | LJFOLD(TDUP any) | 2404 | LJFOLD(TDUP any) |
2159 | LJFOLD(CNEW any any) | 2405 | LJFOLD(CNEW any any) |
2160 | LJFOLD(XSNEW any any) | 2406 | LJFOLD(XSNEW any any) |
2407 | LJFOLD(BUFHDR any any) | ||
2161 | LJFOLDX(lj_ir_emit) | 2408 | LJFOLDX(lj_ir_emit) |
2162 | 2409 | ||
2163 | /* ------------------------------------------------------------------------ */ | 2410 | /* ------------------------------------------------------------------------ */ |
@@ -2209,10 +2456,14 @@ retry: | |||
2209 | if (fins->op1 >= J->cur.nk) { | 2456 | if (fins->op1 >= J->cur.nk) { |
2210 | key += (uint32_t)IR(fins->op1)->o << 10; | 2457 | key += (uint32_t)IR(fins->op1)->o << 10; |
2211 | *fleft = *IR(fins->op1); | 2458 | *fleft = *IR(fins->op1); |
2459 | if (fins->op1 < REF_TRUE) | ||
2460 | fleft[1] = IR(fins->op1)[1]; | ||
2212 | } | 2461 | } |
2213 | if (fins->op2 >= J->cur.nk) { | 2462 | if (fins->op2 >= J->cur.nk) { |
2214 | key += (uint32_t)IR(fins->op2)->o; | 2463 | key += (uint32_t)IR(fins->op2)->o; |
2215 | *fright = *IR(fins->op2); | 2464 | *fright = *IR(fins->op2); |
2465 | if (fins->op2 < REF_TRUE) | ||
2466 | fright[1] = IR(fins->op2)[1]; | ||
2216 | } else { | 2467 | } else { |
2217 | key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */ | 2468 | key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */ |
2218 | } | 2469 | } |
diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c index d5e1eb13..c5919ca0 100644 --- a/src/lj_opt_loop.c +++ b/src/lj_opt_loop.c | |||
@@ -11,7 +11,7 @@ | |||
11 | #if LJ_HASJIT | 11 | #if LJ_HASJIT |
12 | 12 | ||
13 | #include "lj_err.h" | 13 | #include "lj_err.h" |
14 | #include "lj_str.h" | 14 | #include "lj_buf.h" |
15 | #include "lj_ir.h" | 15 | #include "lj_ir.h" |
16 | #include "lj_jit.h" | 16 | #include "lj_jit.h" |
17 | #include "lj_iropt.h" | 17 | #include "lj_iropt.h" |
@@ -254,9 +254,16 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap, | |||
254 | J->cur.nsnapmap = (uint32_t)(nmap - J->cur.snapmap); | 254 | J->cur.nsnapmap = (uint32_t)(nmap - J->cur.snapmap); |
255 | } | 255 | } |
256 | 256 | ||
257 | typedef struct LoopState { | ||
258 | jit_State *J; | ||
259 | IRRef1 *subst; | ||
260 | MSize sizesubst; | ||
261 | } LoopState; | ||
262 | |||
257 | /* Unroll loop. */ | 263 | /* Unroll loop. */ |
258 | static void loop_unroll(jit_State *J) | 264 | static void loop_unroll(LoopState *lps) |
259 | { | 265 | { |
266 | jit_State *J = lps->J; | ||
260 | IRRef1 phi[LJ_MAX_PHI]; | 267 | IRRef1 phi[LJ_MAX_PHI]; |
261 | uint32_t nphi = 0; | 268 | uint32_t nphi = 0; |
262 | IRRef1 *subst; | 269 | IRRef1 *subst; |
@@ -265,13 +272,13 @@ static void loop_unroll(jit_State *J) | |||
265 | SnapEntry *loopmap, *psentinel; | 272 | SnapEntry *loopmap, *psentinel; |
266 | IRRef ins, invar; | 273 | IRRef ins, invar; |
267 | 274 | ||
268 | /* Use temp buffer for substitution table. | 275 | /* Allocate substitution table. |
269 | ** Only non-constant refs in [REF_BIAS,invar) are valid indexes. | 276 | ** Only non-constant refs in [REF_BIAS,invar) are valid indexes. |
270 | ** Caveat: don't call into the VM or run the GC or the buffer may be gone. | ||
271 | */ | 277 | */ |
272 | invar = J->cur.nins; | 278 | invar = J->cur.nins; |
273 | subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, | 279 | lps->sizesubst = invar - REF_BIAS; |
274 | (invar-REF_BIAS)*sizeof(IRRef1)) - REF_BIAS; | 280 | lps->subst = lj_mem_newvec(J->L, lps->sizesubst, IRRef1); |
281 | subst = lps->subst - REF_BIAS; | ||
275 | subst[REF_BASE] = REF_BASE; | 282 | subst[REF_BASE] = REF_BASE; |
276 | 283 | ||
277 | /* LOOP separates the pre-roll from the loop body. */ | 284 | /* LOOP separates the pre-roll from the loop body. */ |
@@ -396,7 +403,7 @@ static void loop_undo(jit_State *J, IRRef ins, SnapNo nsnap, MSize nsnapmap) | |||
396 | static TValue *cploop_opt(lua_State *L, lua_CFunction dummy, void *ud) | 403 | static TValue *cploop_opt(lua_State *L, lua_CFunction dummy, void *ud) |
397 | { | 404 | { |
398 | UNUSED(L); UNUSED(dummy); | 405 | UNUSED(L); UNUSED(dummy); |
399 | loop_unroll((jit_State *)ud); | 406 | loop_unroll((LoopState *)ud); |
400 | return NULL; | 407 | return NULL; |
401 | } | 408 | } |
402 | 409 | ||
@@ -406,7 +413,13 @@ int lj_opt_loop(jit_State *J) | |||
406 | IRRef nins = J->cur.nins; | 413 | IRRef nins = J->cur.nins; |
407 | SnapNo nsnap = J->cur.nsnap; | 414 | SnapNo nsnap = J->cur.nsnap; |
408 | MSize nsnapmap = J->cur.nsnapmap; | 415 | MSize nsnapmap = J->cur.nsnapmap; |
409 | int errcode = lj_vm_cpcall(J->L, NULL, J, cploop_opt); | 416 | LoopState lps; |
417 | int errcode; | ||
418 | lps.J = J; | ||
419 | lps.subst = NULL; | ||
420 | lps.sizesubst = 0; | ||
421 | errcode = lj_vm_cpcall(J->L, NULL, &lps, cploop_opt); | ||
422 | lj_mem_freevec(J2G(J), lps.subst, lps.sizesubst, IRRef1); | ||
410 | if (LJ_UNLIKELY(errcode)) { | 423 | if (LJ_UNLIKELY(errcode)) { |
411 | lua_State *L = J->L; | 424 | lua_State *L = J->L; |
412 | if (errcode == LUA_ERRRUN && tvisnumber(L->top-1)) { /* Trace error? */ | 425 | if (errcode == LUA_ERRRUN && tvisnumber(L->top-1)) { /* Trace error? */ |
diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c index 281f29ad..079f7cfe 100644 --- a/src/lj_opt_mem.c +++ b/src/lj_opt_mem.c | |||
@@ -17,12 +17,13 @@ | |||
17 | #include "lj_ir.h" | 17 | #include "lj_ir.h" |
18 | #include "lj_jit.h" | 18 | #include "lj_jit.h" |
19 | #include "lj_iropt.h" | 19 | #include "lj_iropt.h" |
20 | #include "lj_ircall.h" | ||
20 | 21 | ||
21 | /* Some local macros to save typing. Undef'd at the end. */ | 22 | /* Some local macros to save typing. Undef'd at the end. */ |
22 | #define IR(ref) (&J->cur.ir[(ref)]) | 23 | #define IR(ref) (&J->cur.ir[(ref)]) |
23 | #define fins (&J->fold.ins) | 24 | #define fins (&J->fold.ins) |
24 | #define fleft (&J->fold.left) | 25 | #define fleft (J->fold.left) |
25 | #define fright (&J->fold.right) | 26 | #define fright (J->fold.right) |
26 | 27 | ||
27 | /* | 28 | /* |
28 | ** Caveat #1: return value is not always a TRef -- only use with tref_ref(). | 29 | ** Caveat #1: return value is not always a TRef -- only use with tref_ref(). |
@@ -309,7 +310,21 @@ int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J) | |||
309 | return 1; /* No conflict. Can fold to niltv. */ | 310 | return 1; /* No conflict. Can fold to niltv. */ |
310 | } | 311 | } |
311 | 312 | ||
312 | /* Check whether there's no aliasing NEWREF for the left operand. */ | 313 | /* Check whether there's no aliasing table.clear. */ |
314 | static int fwd_aa_tab_clear(jit_State *J, IRRef lim, IRRef ta) | ||
315 | { | ||
316 | IRRef ref = J->chain[IR_CALLS]; | ||
317 | while (ref > lim) { | ||
318 | IRIns *calls = IR(ref); | ||
319 | if (calls->op2 == IRCALL_lj_tab_clear && | ||
320 | (ta == calls->op1 || aa_table(J, ta, calls->op1) != ALIAS_NO)) | ||
321 | return 0; /* Conflict. */ | ||
322 | ref = calls->prev; | ||
323 | } | ||
324 | return 1; /* No conflict. Can safely FOLD/CSE. */ | ||
325 | } | ||
326 | |||
327 | /* Check whether there's no aliasing NEWREF/table.clear for the left operand. */ | ||
313 | int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim) | 328 | int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim) |
314 | { | 329 | { |
315 | IRRef ta = fins->op1; | 330 | IRRef ta = fins->op1; |
@@ -320,7 +335,7 @@ int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim) | |||
320 | return 0; /* Conflict. */ | 335 | return 0; /* Conflict. */ |
321 | ref = newref->prev; | 336 | ref = newref->prev; |
322 | } | 337 | } |
323 | return 1; /* No conflict. Can safely FOLD/CSE. */ | 338 | return fwd_aa_tab_clear(J, lim, ta); |
324 | } | 339 | } |
325 | 340 | ||
326 | /* ASTORE/HSTORE elimination. */ | 341 | /* ASTORE/HSTORE elimination. */ |
@@ -855,6 +870,10 @@ TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J) | |||
855 | ref = store->prev; | 870 | ref = store->prev; |
856 | } | 871 | } |
857 | 872 | ||
873 | /* Search for aliasing table.clear. */ | ||
874 | if (!fwd_aa_tab_clear(J, lim, tab)) | ||
875 | return lj_ir_emit(J); | ||
876 | |||
858 | /* Try to find a matching load. Below the conflicting store, if any. */ | 877 | /* Try to find a matching load. Below the conflicting store, if any. */ |
859 | return lj_opt_cselim(J, lim); | 878 | return lj_opt_cselim(J, lim); |
860 | } | 879 | } |
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c index 28d3c255..ef0599c9 100644 --- a/src/lj_opt_narrow.c +++ b/src/lj_opt_narrow.c | |||
@@ -555,7 +555,7 @@ TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc) | |||
555 | return emitir(IRTGI(IR_SUBOV), lj_ir_kint(J, 0), rc); | 555 | return emitir(IRTGI(IR_SUBOV), lj_ir_kint(J, 0), rc); |
556 | rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT); | 556 | rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT); |
557 | } | 557 | } |
558 | return emitir(IRTN(IR_NEG), rc, lj_ir_knum_neg(J)); | 558 | return emitir(IRTN(IR_NEG), rc, lj_ir_ksimd(J, LJ_KSIMD_NEG)); |
559 | } | 559 | } |
560 | 560 | ||
561 | /* Narrowing of modulo operator. */ | 561 | /* Narrowing of modulo operator. */ |
diff --git a/src/lj_opt_sink.c b/src/lj_opt_sink.c index df7f58af..c5323b11 100644 --- a/src/lj_opt_sink.c +++ b/src/lj_opt_sink.c | |||
@@ -165,8 +165,8 @@ static void sink_remark_phi(jit_State *J) | |||
165 | /* Sweep instructions and tag sunken allocations and stores. */ | 165 | /* Sweep instructions and tag sunken allocations and stores. */ |
166 | static void sink_sweep_ins(jit_State *J) | 166 | static void sink_sweep_ins(jit_State *J) |
167 | { | 167 | { |
168 | IRIns *ir, *irfirst = IR(J->cur.nk); | 168 | IRIns *ir, *irbase = IR(REF_BASE); |
169 | for (ir = IR(J->cur.nins-1) ; ir >= irfirst; ir--) { | 169 | for (ir = IR(J->cur.nins-1) ; ir >= irbase; ir--) { |
170 | switch (ir->o) { | 170 | switch (ir->o) { |
171 | case IR_ASTORE: case IR_HSTORE: case IR_FSTORE: case IR_XSTORE: { | 171 | case IR_ASTORE: case IR_HSTORE: case IR_FSTORE: case IR_XSTORE: { |
172 | IRIns *ira = sink_checkalloc(J, ir); | 172 | IRIns *ira = sink_checkalloc(J, ir); |
@@ -216,6 +216,13 @@ static void sink_sweep_ins(jit_State *J) | |||
216 | break; | 216 | break; |
217 | } | 217 | } |
218 | } | 218 | } |
219 | for (ir = IR(J->cur.nk); ir < irbase; ir++) { | ||
220 | irt_clearmark(ir->t); | ||
221 | ir->prev = REGSP_INIT; | ||
222 | /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */ | ||
223 | if (irt_is64(ir->t) && ir->o != IR_KNULL) | ||
224 | ir++; | ||
225 | } | ||
219 | } | 226 | } |
220 | 227 | ||
221 | /* Allocation sinking and store sinking. | 228 | /* Allocation sinking and store sinking. |
diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c index a517fa8a..ee7cf0f9 100644 --- a/src/lj_opt_split.c +++ b/src/lj_opt_split.c | |||
@@ -8,14 +8,15 @@ | |||
8 | 8 | ||
9 | #include "lj_obj.h" | 9 | #include "lj_obj.h" |
10 | 10 | ||
11 | #if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) | 11 | #if LJ_HASJIT && (LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) |
12 | 12 | ||
13 | #include "lj_err.h" | 13 | #include "lj_err.h" |
14 | #include "lj_str.h" | 14 | #include "lj_buf.h" |
15 | #include "lj_ir.h" | 15 | #include "lj_ir.h" |
16 | #include "lj_jit.h" | 16 | #include "lj_jit.h" |
17 | #include "lj_ircall.h" | 17 | #include "lj_ircall.h" |
18 | #include "lj_iropt.h" | 18 | #include "lj_iropt.h" |
19 | #include "lj_dispatch.h" | ||
19 | #include "lj_vm.h" | 20 | #include "lj_vm.h" |
20 | 21 | ||
21 | /* SPLIT pass: | 22 | /* SPLIT pass: |
@@ -139,6 +140,7 @@ static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir, | |||
139 | ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); | 140 | ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); |
140 | return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); | 141 | return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); |
141 | } | 142 | } |
143 | #endif | ||
142 | 144 | ||
143 | /* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */ | 145 | /* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */ |
144 | static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir, | 146 | static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir, |
@@ -155,7 +157,6 @@ static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir, | |||
155 | ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); | 157 | ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); |
156 | return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); | 158 | return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); |
157 | } | 159 | } |
158 | #endif | ||
159 | 160 | ||
160 | /* Emit a CALLN with two split 64 bit arguments. */ | 161 | /* Emit a CALLN with two split 64 bit arguments. */ |
161 | static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir, | 162 | static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir, |
@@ -192,9 +193,121 @@ static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref) | |||
192 | nref = ir->op1; | 193 | nref = ir->op1; |
193 | if (ofs == 0) return nref; | 194 | if (ofs == 0) return nref; |
194 | } | 195 | } |
195 | return split_emit(J, IRTI(IR_ADD), nref, lj_ir_kint(J, ofs)); | 196 | return split_emit(J, IRT(IR_ADD, IRT_PTR), nref, lj_ir_kint(J, ofs)); |
197 | } | ||
198 | |||
199 | #if LJ_HASFFI | ||
200 | static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst, | ||
201 | IRIns *oir, IRIns *nir, IRIns *ir) | ||
202 | { | ||
203 | IROp op = ir->o; | ||
204 | IRRef kref = nir->op2; | ||
205 | if (irref_isk(kref)) { /* Optimize constant shifts. */ | ||
206 | int32_t k = (IR(kref)->i & 63); | ||
207 | IRRef lo = nir->op1, hi = hisubst[ir->op1]; | ||
208 | if (op == IR_BROL || op == IR_BROR) { | ||
209 | if (op == IR_BROR) k = (-k & 63); | ||
210 | if (k >= 32) { IRRef t = lo; lo = hi; hi = t; k -= 32; } | ||
211 | if (k == 0) { | ||
212 | passthrough: | ||
213 | J->cur.nins--; | ||
214 | ir->prev = lo; | ||
215 | return hi; | ||
216 | } else { | ||
217 | TRef k1, k2; | ||
218 | IRRef t1, t2, t3, t4; | ||
219 | J->cur.nins--; | ||
220 | k1 = lj_ir_kint(J, k); | ||
221 | k2 = lj_ir_kint(J, (-k & 31)); | ||
222 | t1 = split_emit(J, IRTI(IR_BSHL), lo, k1); | ||
223 | t2 = split_emit(J, IRTI(IR_BSHL), hi, k1); | ||
224 | t3 = split_emit(J, IRTI(IR_BSHR), lo, k2); | ||
225 | t4 = split_emit(J, IRTI(IR_BSHR), hi, k2); | ||
226 | ir->prev = split_emit(J, IRTI(IR_BOR), t1, t4); | ||
227 | return split_emit(J, IRTI(IR_BOR), t2, t3); | ||
228 | } | ||
229 | } else if (k == 0) { | ||
230 | goto passthrough; | ||
231 | } else if (k < 32) { | ||
232 | if (op == IR_BSHL) { | ||
233 | IRRef t1 = split_emit(J, IRTI(IR_BSHL), hi, kref); | ||
234 | IRRef t2 = split_emit(J, IRTI(IR_BSHR), lo, lj_ir_kint(J, (-k&31))); | ||
235 | return split_emit(J, IRTI(IR_BOR), t1, t2); | ||
236 | } else { | ||
237 | IRRef t1 = ir->prev, t2; | ||
238 | lua_assert(op == IR_BSHR || op == IR_BSAR); | ||
239 | nir->o = IR_BSHR; | ||
240 | t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31))); | ||
241 | ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2); | ||
242 | return split_emit(J, IRTI(op), hi, kref); | ||
243 | } | ||
244 | } else { | ||
245 | if (op == IR_BSHL) { | ||
246 | if (k == 32) | ||
247 | J->cur.nins--; | ||
248 | else | ||
249 | lo = ir->prev; | ||
250 | ir->prev = lj_ir_kint(J, 0); | ||
251 | return lo; | ||
252 | } else { | ||
253 | lua_assert(op == IR_BSHR || op == IR_BSAR); | ||
254 | if (k == 32) { | ||
255 | J->cur.nins--; | ||
256 | ir->prev = hi; | ||
257 | } else { | ||
258 | nir->op1 = hi; | ||
259 | } | ||
260 | if (op == IR_BSHR) | ||
261 | return lj_ir_kint(J, 0); | ||
262 | else | ||
263 | return split_emit(J, IRTI(IR_BSAR), hi, lj_ir_kint(J, 31)); | ||
264 | } | ||
265 | } | ||
266 | } | ||
267 | return split_call_li(J, hisubst, oir, ir, | ||
268 | op - IR_BSHL + IRCALL_lj_carith_shl64); | ||
196 | } | 269 | } |
197 | 270 | ||
271 | static IRRef split_bitop(jit_State *J, IRRef1 *hisubst, | ||
272 | IRIns *nir, IRIns *ir) | ||
273 | { | ||
274 | IROp op = ir->o; | ||
275 | IRRef hi, kref = nir->op2; | ||
276 | if (irref_isk(kref)) { /* Optimize bit operations with lo constant. */ | ||
277 | int32_t k = IR(kref)->i; | ||
278 | if (k == 0 || k == -1) { | ||
279 | if (op == IR_BAND) k = ~k; | ||
280 | if (k == 0) { | ||
281 | J->cur.nins--; | ||
282 | ir->prev = nir->op1; | ||
283 | } else if (op == IR_BXOR) { | ||
284 | nir->o = IR_BNOT; | ||
285 | nir->op2 = 0; | ||
286 | } else { | ||
287 | J->cur.nins--; | ||
288 | ir->prev = kref; | ||
289 | } | ||
290 | } | ||
291 | } | ||
292 | hi = hisubst[ir->op1]; | ||
293 | kref = hisubst[ir->op2]; | ||
294 | if (irref_isk(kref)) { /* Optimize bit operations with hi constant. */ | ||
295 | int32_t k = IR(kref)->i; | ||
296 | if (k == 0 || k == -1) { | ||
297 | if (op == IR_BAND) k = ~k; | ||
298 | if (k == 0) { | ||
299 | return hi; | ||
300 | } else if (op == IR_BXOR) { | ||
301 | return split_emit(J, IRTI(IR_BNOT), hi, 0); | ||
302 | } else { | ||
303 | return kref; | ||
304 | } | ||
305 | } | ||
306 | } | ||
307 | return split_emit(J, IRTI(op), hi, kref); | ||
308 | } | ||
309 | #endif | ||
310 | |||
198 | /* Substitute references of a snapshot. */ | 311 | /* Substitute references of a snapshot. */ |
199 | static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir) | 312 | static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir) |
200 | { | 313 | { |
@@ -214,7 +327,7 @@ static void split_ir(jit_State *J) | |||
214 | IRRef nins = J->cur.nins, nk = J->cur.nk; | 327 | IRRef nins = J->cur.nins, nk = J->cur.nk; |
215 | MSize irlen = nins - nk; | 328 | MSize irlen = nins - nk; |
216 | MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1)); | 329 | MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1)); |
217 | IRIns *oir = (IRIns *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, need); | 330 | IRIns *oir = (IRIns *)lj_buf_tmp(J->L, need); |
218 | IRRef1 *hisubst; | 331 | IRRef1 *hisubst; |
219 | IRRef ref, snref; | 332 | IRRef ref, snref; |
220 | SnapShot *snap; | 333 | SnapShot *snap; |
@@ -241,6 +354,8 @@ static void split_ir(jit_State *J) | |||
241 | ir->prev = ref; /* Identity substitution for loword. */ | 354 | ir->prev = ref; /* Identity substitution for loword. */ |
242 | hisubst[ref] = 0; | 355 | hisubst[ref] = 0; |
243 | } | 356 | } |
357 | if (irt_is64(ir->t) && ir->o != IR_KNULL) | ||
358 | ref++; | ||
244 | } | 359 | } |
245 | 360 | ||
246 | /* Process old IR instructions. */ | 361 | /* Process old IR instructions. */ |
@@ -321,7 +436,8 @@ static void split_ir(jit_State *J) | |||
321 | nir->o = IR_CONV; /* Pass through loword. */ | 436 | nir->o = IR_CONV; /* Pass through loword. */ |
322 | nir->op2 = (IRT_INT << 5) | IRT_INT; | 437 | nir->op2 = (IRT_INT << 5) | IRT_INT; |
323 | hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP), | 438 | hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP), |
324 | hisubst[ir->op1], hisubst[ir->op2]); | 439 | hisubst[ir->op1], |
440 | lj_ir_kint(J, (int32_t)(0x7fffffffu + (ir->o == IR_NEG)))); | ||
325 | break; | 441 | break; |
326 | case IR_SLOAD: | 442 | case IR_SLOAD: |
327 | if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */ | 443 | if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */ |
@@ -336,15 +452,24 @@ static void split_ir(jit_State *J) | |||
336 | case IR_STRTO: | 452 | case IR_STRTO: |
337 | hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); | 453 | hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); |
338 | break; | 454 | break; |
455 | case IR_FLOAD: | ||
456 | lua_assert(ir->op1 == REF_NIL); | ||
457 | hi = lj_ir_kint(J, *(int32_t*)((char*)J2GG(J) + ir->op2 + LJ_LE*4)); | ||
458 | nir->op2 += LJ_BE*4; | ||
459 | break; | ||
339 | case IR_XLOAD: { | 460 | case IR_XLOAD: { |
340 | IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */ | 461 | IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */ |
341 | J->cur.nins--; | 462 | J->cur.nins--; |
342 | hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */ | 463 | hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */ |
464 | #if LJ_BE | ||
465 | hi = split_emit(J, IRT(IR_XLOAD, IRT_INT), hi, ir->op2); | ||
466 | inslo.t.irt = IRT_SOFTFP | (inslo.t.irt & IRT_GUARD); | ||
467 | #endif | ||
343 | nref = lj_ir_nextins(J); | 468 | nref = lj_ir_nextins(J); |
344 | nir = IR(nref); | 469 | nir = IR(nref); |
345 | *nir = inslo; /* Re-emit lo XLOAD immediately before hi XLOAD. */ | 470 | *nir = inslo; /* Re-emit lo XLOAD. */ |
346 | hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2); | ||
347 | #if LJ_LE | 471 | #if LJ_LE |
472 | hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2); | ||
348 | ir->prev = nref; | 473 | ir->prev = nref; |
349 | #else | 474 | #else |
350 | ir->prev = hi; hi = nref; | 475 | ir->prev = hi; hi = nref; |
@@ -438,6 +563,19 @@ static void split_ir(jit_State *J) | |||
438 | irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : | 563 | irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : |
439 | IRCALL_lj_carith_powu64); | 564 | IRCALL_lj_carith_powu64); |
440 | break; | 565 | break; |
566 | case IR_BNOT: | ||
567 | hi = split_emit(J, IRTI(IR_BNOT), hiref, 0); | ||
568 | break; | ||
569 | case IR_BSWAP: | ||
570 | ir->prev = split_emit(J, IRTI(IR_BSWAP), hiref, 0); | ||
571 | hi = nref; | ||
572 | break; | ||
573 | case IR_BAND: case IR_BOR: case IR_BXOR: | ||
574 | hi = split_bitop(J, hisubst, nir, ir); | ||
575 | break; | ||
576 | case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: | ||
577 | hi = split_bitshift(J, hisubst, oir, nir, ir); | ||
578 | break; | ||
441 | case IR_FLOAD: | 579 | case IR_FLOAD: |
442 | lua_assert(ir->op2 == IRFL_CDATA_INT64); | 580 | lua_assert(ir->op2 == IRFL_CDATA_INT64); |
443 | hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4); | 581 | hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4); |
diff --git a/src/lj_parse.c b/src/lj_parse.c index 74dd5706..68f3789e 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include "lj_gc.h" | 13 | #include "lj_gc.h" |
14 | #include "lj_err.h" | 14 | #include "lj_err.h" |
15 | #include "lj_debug.h" | 15 | #include "lj_debug.h" |
16 | #include "lj_buf.h" | ||
16 | #include "lj_str.h" | 17 | #include "lj_str.h" |
17 | #include "lj_tab.h" | 18 | #include "lj_tab.h" |
18 | #include "lj_func.h" | 19 | #include "lj_func.h" |
@@ -21,6 +22,7 @@ | |||
21 | #if LJ_HASFFI | 22 | #if LJ_HASFFI |
22 | #include "lj_ctype.h" | 23 | #include "lj_ctype.h" |
23 | #endif | 24 | #endif |
25 | #include "lj_strfmt.h" | ||
24 | #include "lj_lex.h" | 26 | #include "lj_lex.h" |
25 | #include "lj_parse.h" | 27 | #include "lj_parse.h" |
26 | #include "lj_vm.h" | 28 | #include "lj_vm.h" |
@@ -165,12 +167,12 @@ LJ_STATIC_ASSERT((int)BC_MODVV-(int)BC_ADDVV == (int)OPR_MOD-(int)OPR_ADD); | |||
165 | 167 | ||
166 | LJ_NORET LJ_NOINLINE static void err_syntax(LexState *ls, ErrMsg em) | 168 | LJ_NORET LJ_NOINLINE static void err_syntax(LexState *ls, ErrMsg em) |
167 | { | 169 | { |
168 | lj_lex_error(ls, ls->token, em); | 170 | lj_lex_error(ls, ls->tok, em); |
169 | } | 171 | } |
170 | 172 | ||
171 | LJ_NORET LJ_NOINLINE static void err_token(LexState *ls, LexToken token) | 173 | LJ_NORET LJ_NOINLINE static void err_token(LexState *ls, LexToken tok) |
172 | { | 174 | { |
173 | lj_lex_error(ls, ls->token, LJ_ERR_XTOKEN, lj_lex_token2str(ls, token)); | 175 | lj_lex_error(ls, ls->tok, LJ_ERR_XTOKEN, lj_lex_token2str(ls, tok)); |
174 | } | 176 | } |
175 | 177 | ||
176 | LJ_NORET static void err_limit(FuncState *fs, uint32_t limit, const char *what) | 178 | LJ_NORET static void err_limit(FuncState *fs, uint32_t limit, const char *what) |
@@ -660,16 +662,16 @@ static void bcemit_method(FuncState *fs, ExpDesc *e, ExpDesc *key) | |||
660 | BCReg idx, func, obj = expr_toanyreg(fs, e); | 662 | BCReg idx, func, obj = expr_toanyreg(fs, e); |
661 | expr_free(fs, e); | 663 | expr_free(fs, e); |
662 | func = fs->freereg; | 664 | func = fs->freereg; |
663 | bcemit_AD(fs, BC_MOV, func+1, obj); /* Copy object to first argument. */ | 665 | bcemit_AD(fs, BC_MOV, func+1+LJ_FR2, obj); /* Copy object to 1st argument. */ |
664 | lua_assert(expr_isstrk(key)); | 666 | lua_assert(expr_isstrk(key)); |
665 | idx = const_str(fs, key); | 667 | idx = const_str(fs, key); |
666 | if (idx <= BCMAX_C) { | 668 | if (idx <= BCMAX_C) { |
667 | bcreg_reserve(fs, 2); | 669 | bcreg_reserve(fs, 2+LJ_FR2); |
668 | bcemit_ABC(fs, BC_TGETS, func, obj, idx); | 670 | bcemit_ABC(fs, BC_TGETS, func, obj, idx); |
669 | } else { | 671 | } else { |
670 | bcreg_reserve(fs, 3); | 672 | bcreg_reserve(fs, 3+LJ_FR2); |
671 | bcemit_AD(fs, BC_KSTR, func+2, idx); | 673 | bcemit_AD(fs, BC_KSTR, func+2+LJ_FR2, idx); |
672 | bcemit_ABC(fs, BC_TGETV, func, obj, func+2); | 674 | bcemit_ABC(fs, BC_TGETV, func, obj, func+2+LJ_FR2); |
673 | fs->freereg--; | 675 | fs->freereg--; |
674 | } | 676 | } |
675 | e->u.s.info = func; | 677 | e->u.s.info = func; |
@@ -983,7 +985,7 @@ static void bcemit_unop(FuncState *fs, BCOp op, ExpDesc *e) | |||
983 | /* Check and consume optional token. */ | 985 | /* Check and consume optional token. */ |
984 | static int lex_opt(LexState *ls, LexToken tok) | 986 | static int lex_opt(LexState *ls, LexToken tok) |
985 | { | 987 | { |
986 | if (ls->token == tok) { | 988 | if (ls->tok == tok) { |
987 | lj_lex_next(ls); | 989 | lj_lex_next(ls); |
988 | return 1; | 990 | return 1; |
989 | } | 991 | } |
@@ -993,7 +995,7 @@ static int lex_opt(LexState *ls, LexToken tok) | |||
993 | /* Check and consume token. */ | 995 | /* Check and consume token. */ |
994 | static void lex_check(LexState *ls, LexToken tok) | 996 | static void lex_check(LexState *ls, LexToken tok) |
995 | { | 997 | { |
996 | if (ls->token != tok) | 998 | if (ls->tok != tok) |
997 | err_token(ls, tok); | 999 | err_token(ls, tok); |
998 | lj_lex_next(ls); | 1000 | lj_lex_next(ls); |
999 | } | 1001 | } |
@@ -1007,7 +1009,7 @@ static void lex_match(LexState *ls, LexToken what, LexToken who, BCLine line) | |||
1007 | } else { | 1009 | } else { |
1008 | const char *swhat = lj_lex_token2str(ls, what); | 1010 | const char *swhat = lj_lex_token2str(ls, what); |
1009 | const char *swho = lj_lex_token2str(ls, who); | 1011 | const char *swho = lj_lex_token2str(ls, who); |
1010 | lj_lex_error(ls, ls->token, LJ_ERR_XMATCH, swhat, swho, line); | 1012 | lj_lex_error(ls, ls->tok, LJ_ERR_XMATCH, swhat, swho, line); |
1011 | } | 1013 | } |
1012 | } | 1014 | } |
1013 | } | 1015 | } |
@@ -1016,9 +1018,9 @@ static void lex_match(LexState *ls, LexToken what, LexToken who, BCLine line) | |||
1016 | static GCstr *lex_str(LexState *ls) | 1018 | static GCstr *lex_str(LexState *ls) |
1017 | { | 1019 | { |
1018 | GCstr *s; | 1020 | GCstr *s; |
1019 | if (ls->token != TK_name && (LJ_52 || ls->token != TK_goto)) | 1021 | if (ls->tok != TK_name && (LJ_52 || ls->tok != TK_goto)) |
1020 | err_token(ls, TK_name); | 1022 | err_token(ls, TK_name); |
1021 | s = strV(&ls->tokenval); | 1023 | s = strV(&ls->tokval); |
1022 | lj_lex_next(ls); | 1024 | lj_lex_next(ls); |
1023 | return s; | 1025 | return s; |
1024 | } | 1026 | } |
@@ -1433,78 +1435,46 @@ static void fs_fixup_line(FuncState *fs, GCproto *pt, | |||
1433 | } | 1435 | } |
1434 | } | 1436 | } |
1435 | 1437 | ||
1436 | /* Resize buffer if needed. */ | ||
1437 | static LJ_NOINLINE void fs_buf_resize(LexState *ls, MSize len) | ||
1438 | { | ||
1439 | MSize sz = ls->sb.sz * 2; | ||
1440 | while (ls->sb.n + len > sz) sz = sz * 2; | ||
1441 | lj_str_resizebuf(ls->L, &ls->sb, sz); | ||
1442 | } | ||
1443 | |||
1444 | static LJ_AINLINE void fs_buf_need(LexState *ls, MSize len) | ||
1445 | { | ||
1446 | if (LJ_UNLIKELY(ls->sb.n + len > ls->sb.sz)) | ||
1447 | fs_buf_resize(ls, len); | ||
1448 | } | ||
1449 | |||
1450 | /* Add string to buffer. */ | ||
1451 | static void fs_buf_str(LexState *ls, const char *str, MSize len) | ||
1452 | { | ||
1453 | char *p = ls->sb.buf + ls->sb.n; | ||
1454 | MSize i; | ||
1455 | ls->sb.n += len; | ||
1456 | for (i = 0; i < len; i++) p[i] = str[i]; | ||
1457 | } | ||
1458 | |||
1459 | /* Add ULEB128 value to buffer. */ | ||
1460 | static void fs_buf_uleb128(LexState *ls, uint32_t v) | ||
1461 | { | ||
1462 | MSize n = ls->sb.n; | ||
1463 | uint8_t *p = (uint8_t *)ls->sb.buf; | ||
1464 | for (; v >= 0x80; v >>= 7) | ||
1465 | p[n++] = (uint8_t)((v & 0x7f) | 0x80); | ||
1466 | p[n++] = (uint8_t)v; | ||
1467 | ls->sb.n = n; | ||
1468 | } | ||
1469 | |||
1470 | /* Prepare variable info for prototype. */ | 1438 | /* Prepare variable info for prototype. */ |
1471 | static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar) | 1439 | static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar) |
1472 | { | 1440 | { |
1473 | VarInfo *vs =ls->vstack, *ve; | 1441 | VarInfo *vs =ls->vstack, *ve; |
1474 | MSize i, n; | 1442 | MSize i, n; |
1475 | BCPos lastpc; | 1443 | BCPos lastpc; |
1476 | lj_str_resetbuf(&ls->sb); /* Copy to temp. string buffer. */ | 1444 | lj_buf_reset(&ls->sb); /* Copy to temp. string buffer. */ |
1477 | /* Store upvalue names. */ | 1445 | /* Store upvalue names. */ |
1478 | for (i = 0, n = fs->nuv; i < n; i++) { | 1446 | for (i = 0, n = fs->nuv; i < n; i++) { |
1479 | GCstr *s = strref(vs[fs->uvmap[i]].name); | 1447 | GCstr *s = strref(vs[fs->uvmap[i]].name); |
1480 | MSize len = s->len+1; | 1448 | MSize len = s->len+1; |
1481 | fs_buf_need(ls, len); | 1449 | char *p = lj_buf_more(&ls->sb, len); |
1482 | fs_buf_str(ls, strdata(s), len); | 1450 | p = lj_buf_wmem(p, strdata(s), len); |
1451 | setsbufP(&ls->sb, p); | ||
1483 | } | 1452 | } |
1484 | *ofsvar = ls->sb.n; | 1453 | *ofsvar = sbuflen(&ls->sb); |
1485 | lastpc = 0; | 1454 | lastpc = 0; |
1486 | /* Store local variable names and compressed ranges. */ | 1455 | /* Store local variable names and compressed ranges. */ |
1487 | for (ve = vs + ls->vtop, vs += fs->vbase; vs < ve; vs++) { | 1456 | for (ve = vs + ls->vtop, vs += fs->vbase; vs < ve; vs++) { |
1488 | if (!gola_isgotolabel(vs)) { | 1457 | if (!gola_isgotolabel(vs)) { |
1489 | GCstr *s = strref(vs->name); | 1458 | GCstr *s = strref(vs->name); |
1490 | BCPos startpc; | 1459 | BCPos startpc; |
1460 | char *p; | ||
1491 | if ((uintptr_t)s < VARNAME__MAX) { | 1461 | if ((uintptr_t)s < VARNAME__MAX) { |
1492 | fs_buf_need(ls, 1 + 2*5); | 1462 | p = lj_buf_more(&ls->sb, 1 + 2*5); |
1493 | ls->sb.buf[ls->sb.n++] = (uint8_t)(uintptr_t)s; | 1463 | *p++ = (char)(uintptr_t)s; |
1494 | } else { | 1464 | } else { |
1495 | MSize len = s->len+1; | 1465 | MSize len = s->len+1; |
1496 | fs_buf_need(ls, len + 2*5); | 1466 | p = lj_buf_more(&ls->sb, len + 2*5); |
1497 | fs_buf_str(ls, strdata(s), len); | 1467 | p = lj_buf_wmem(p, strdata(s), len); |
1498 | } | 1468 | } |
1499 | startpc = vs->startpc; | 1469 | startpc = vs->startpc; |
1500 | fs_buf_uleb128(ls, startpc-lastpc); | 1470 | p = lj_strfmt_wuleb128(p, startpc-lastpc); |
1501 | fs_buf_uleb128(ls, vs->endpc-startpc); | 1471 | p = lj_strfmt_wuleb128(p, vs->endpc-startpc); |
1472 | setsbufP(&ls->sb, p); | ||
1502 | lastpc = startpc; | 1473 | lastpc = startpc; |
1503 | } | 1474 | } |
1504 | } | 1475 | } |
1505 | fs_buf_need(ls, 1); | 1476 | lj_buf_putb(&ls->sb, '\0'); /* Terminator for varinfo. */ |
1506 | ls->sb.buf[ls->sb.n++] = '\0'; /* Terminator for varinfo. */ | 1477 | return sbuflen(&ls->sb); |
1507 | return ls->sb.n; | ||
1508 | } | 1478 | } |
1509 | 1479 | ||
1510 | /* Fixup variable info for prototype. */ | 1480 | /* Fixup variable info for prototype. */ |
@@ -1512,7 +1482,7 @@ static void fs_fixup_var(LexState *ls, GCproto *pt, uint8_t *p, size_t ofsvar) | |||
1512 | { | 1482 | { |
1513 | setmref(pt->uvinfo, p); | 1483 | setmref(pt->uvinfo, p); |
1514 | setmref(pt->varinfo, (char *)p + ofsvar); | 1484 | setmref(pt->varinfo, (char *)p + ofsvar); |
1515 | memcpy(p, ls->sb.buf, ls->sb.n); /* Copy from temp. string buffer. */ | 1485 | memcpy(p, sbufB(&ls->sb), sbuflen(&ls->sb)); /* Copy from temp. buffer. */ |
1516 | } | 1486 | } |
1517 | #else | 1487 | #else |
1518 | 1488 | ||
@@ -1621,7 +1591,7 @@ static GCproto *fs_finish(LexState *ls, BCLine line) | |||
1621 | L->top--; /* Pop table of constants. */ | 1591 | L->top--; /* Pop table of constants. */ |
1622 | ls->vtop = fs->vbase; /* Reset variable stack. */ | 1592 | ls->vtop = fs->vbase; /* Reset variable stack. */ |
1623 | ls->fs = fs->prev; | 1593 | ls->fs = fs->prev; |
1624 | lua_assert(ls->fs != NULL || ls->token == TK_eof); | 1594 | lua_assert(ls->fs != NULL || ls->tok == TK_eof); |
1625 | return pt; | 1595 | return pt; |
1626 | } | 1596 | } |
1627 | 1597 | ||
@@ -1718,10 +1688,9 @@ static void expr_bracket(LexState *ls, ExpDesc *v) | |||
1718 | static void expr_kvalue(TValue *v, ExpDesc *e) | 1688 | static void expr_kvalue(TValue *v, ExpDesc *e) |
1719 | { | 1689 | { |
1720 | if (e->k <= VKTRUE) { | 1690 | if (e->k <= VKTRUE) { |
1721 | setitype(v, ~(uint32_t)e->k); | 1691 | setpriV(v, ~(uint32_t)e->k); |
1722 | } else if (e->k == VKSTR) { | 1692 | } else if (e->k == VKSTR) { |
1723 | setgcref(v->gcr, obj2gco(e->u.sval)); | 1693 | setgcVraw(v, obj2gco(e->u.sval), LJ_TSTR); |
1724 | setitype(v, LJ_TSTR); | ||
1725 | } else { | 1694 | } else { |
1726 | lua_assert(tvisnumber(expr_numtv(e))); | 1695 | lua_assert(tvisnumber(expr_numtv(e))); |
1727 | *v = *expr_numtv(e); | 1696 | *v = *expr_numtv(e); |
@@ -1743,15 +1712,15 @@ static void expr_table(LexState *ls, ExpDesc *e) | |||
1743 | bcreg_reserve(fs, 1); | 1712 | bcreg_reserve(fs, 1); |
1744 | freg++; | 1713 | freg++; |
1745 | lex_check(ls, '{'); | 1714 | lex_check(ls, '{'); |
1746 | while (ls->token != '}') { | 1715 | while (ls->tok != '}') { |
1747 | ExpDesc key, val; | 1716 | ExpDesc key, val; |
1748 | vcall = 0; | 1717 | vcall = 0; |
1749 | if (ls->token == '[') { | 1718 | if (ls->tok == '[') { |
1750 | expr_bracket(ls, &key); /* Already calls expr_toval. */ | 1719 | expr_bracket(ls, &key); /* Already calls expr_toval. */ |
1751 | if (!expr_isk(&key)) expr_index(fs, e, &key); | 1720 | if (!expr_isk(&key)) expr_index(fs, e, &key); |
1752 | if (expr_isnumk(&key) && expr_numiszero(&key)) needarr = 1; else nhash++; | 1721 | if (expr_isnumk(&key) && expr_numiszero(&key)) needarr = 1; else nhash++; |
1753 | lex_check(ls, '='); | 1722 | lex_check(ls, '='); |
1754 | } else if ((ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) && | 1723 | } else if ((ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) && |
1755 | lj_lex_lookahead(ls) == '=') { | 1724 | lj_lex_lookahead(ls) == '=') { |
1756 | expr_str(ls, &key); | 1725 | expr_str(ls, &key); |
1757 | lex_check(ls, '='); | 1726 | lex_check(ls, '='); |
@@ -1844,11 +1813,11 @@ static BCReg parse_params(LexState *ls, int needself) | |||
1844 | lex_check(ls, '('); | 1813 | lex_check(ls, '('); |
1845 | if (needself) | 1814 | if (needself) |
1846 | var_new_lit(ls, nparams++, "self"); | 1815 | var_new_lit(ls, nparams++, "self"); |
1847 | if (ls->token != ')') { | 1816 | if (ls->tok != ')') { |
1848 | do { | 1817 | do { |
1849 | if (ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) { | 1818 | if (ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) { |
1850 | var_new(ls, nparams++, lex_str(ls)); | 1819 | var_new(ls, nparams++, lex_str(ls)); |
1851 | } else if (ls->token == TK_dots) { | 1820 | } else if (ls->tok == TK_dots) { |
1852 | lj_lex_next(ls); | 1821 | lj_lex_next(ls); |
1853 | fs->flags |= PROTO_VARARG; | 1822 | fs->flags |= PROTO_VARARG; |
1854 | break; | 1823 | break; |
@@ -1882,7 +1851,7 @@ static void parse_body(LexState *ls, ExpDesc *e, int needself, BCLine line) | |||
1882 | fs.bclim = pfs->bclim - pfs->pc; | 1851 | fs.bclim = pfs->bclim - pfs->pc; |
1883 | bcemit_AD(&fs, BC_FUNCF, 0, 0); /* Placeholder. */ | 1852 | bcemit_AD(&fs, BC_FUNCF, 0, 0); /* Placeholder. */ |
1884 | parse_chunk(ls); | 1853 | parse_chunk(ls); |
1885 | if (ls->token != TK_end) lex_match(ls, TK_end, TK_function, line); | 1854 | if (ls->tok != TK_end) lex_match(ls, TK_end, TK_function, line); |
1886 | pt = fs_finish(ls, (ls->lastline = ls->linenumber)); | 1855 | pt = fs_finish(ls, (ls->lastline = ls->linenumber)); |
1887 | pfs->bcbase = ls->bcstack + oldbase; /* May have been reallocated. */ | 1856 | pfs->bcbase = ls->bcstack + oldbase; /* May have been reallocated. */ |
1888 | pfs->bclim = (BCPos)(ls->sizebcstack - oldbase); | 1857 | pfs->bclim = (BCPos)(ls->sizebcstack - oldbase); |
@@ -1921,13 +1890,13 @@ static void parse_args(LexState *ls, ExpDesc *e) | |||
1921 | BCIns ins; | 1890 | BCIns ins; |
1922 | BCReg base; | 1891 | BCReg base; |
1923 | BCLine line = ls->linenumber; | 1892 | BCLine line = ls->linenumber; |
1924 | if (ls->token == '(') { | 1893 | if (ls->tok == '(') { |
1925 | #if !LJ_52 | 1894 | #if !LJ_52 |
1926 | if (line != ls->lastline) | 1895 | if (line != ls->lastline) |
1927 | err_syntax(ls, LJ_ERR_XAMBIG); | 1896 | err_syntax(ls, LJ_ERR_XAMBIG); |
1928 | #endif | 1897 | #endif |
1929 | lj_lex_next(ls); | 1898 | lj_lex_next(ls); |
1930 | if (ls->token == ')') { /* f(). */ | 1899 | if (ls->tok == ')') { /* f(). */ |
1931 | args.k = VVOID; | 1900 | args.k = VVOID; |
1932 | } else { | 1901 | } else { |
1933 | expr_list(ls, &args); | 1902 | expr_list(ls, &args); |
@@ -1935,11 +1904,11 @@ static void parse_args(LexState *ls, ExpDesc *e) | |||
1935 | setbc_b(bcptr(fs, &args), 0); /* Pass on multiple results. */ | 1904 | setbc_b(bcptr(fs, &args), 0); /* Pass on multiple results. */ |
1936 | } | 1905 | } |
1937 | lex_match(ls, ')', '(', line); | 1906 | lex_match(ls, ')', '(', line); |
1938 | } else if (ls->token == '{') { | 1907 | } else if (ls->tok == '{') { |
1939 | expr_table(ls, &args); | 1908 | expr_table(ls, &args); |
1940 | } else if (ls->token == TK_string) { | 1909 | } else if (ls->tok == TK_string) { |
1941 | expr_init(&args, VKSTR, 0); | 1910 | expr_init(&args, VKSTR, 0); |
1942 | args.u.sval = strV(&ls->tokenval); | 1911 | args.u.sval = strV(&ls->tokval); |
1943 | lj_lex_next(ls); | 1912 | lj_lex_next(ls); |
1944 | } else { | 1913 | } else { |
1945 | err_syntax(ls, LJ_ERR_XFUNARG); | 1914 | err_syntax(ls, LJ_ERR_XFUNARG); |
@@ -1948,11 +1917,11 @@ static void parse_args(LexState *ls, ExpDesc *e) | |||
1948 | lua_assert(e->k == VNONRELOC); | 1917 | lua_assert(e->k == VNONRELOC); |
1949 | base = e->u.s.info; /* Base register for call. */ | 1918 | base = e->u.s.info; /* Base register for call. */ |
1950 | if (args.k == VCALL) { | 1919 | if (args.k == VCALL) { |
1951 | ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1); | 1920 | ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1 - LJ_FR2); |
1952 | } else { | 1921 | } else { |
1953 | if (args.k != VVOID) | 1922 | if (args.k != VVOID) |
1954 | expr_tonextreg(fs, &args); | 1923 | expr_tonextreg(fs, &args); |
1955 | ins = BCINS_ABC(BC_CALL, base, 2, fs->freereg - base); | 1924 | ins = BCINS_ABC(BC_CALL, base, 2, fs->freereg - base - LJ_FR2); |
1956 | } | 1925 | } |
1957 | expr_init(e, VCALL, bcemit_INS(fs, ins)); | 1926 | expr_init(e, VCALL, bcemit_INS(fs, ins)); |
1958 | e->u.s.aux = base; | 1927 | e->u.s.aux = base; |
@@ -1965,33 +1934,34 @@ static void expr_primary(LexState *ls, ExpDesc *v) | |||
1965 | { | 1934 | { |
1966 | FuncState *fs = ls->fs; | 1935 | FuncState *fs = ls->fs; |
1967 | /* Parse prefix expression. */ | 1936 | /* Parse prefix expression. */ |
1968 | if (ls->token == '(') { | 1937 | if (ls->tok == '(') { |
1969 | BCLine line = ls->linenumber; | 1938 | BCLine line = ls->linenumber; |
1970 | lj_lex_next(ls); | 1939 | lj_lex_next(ls); |
1971 | expr(ls, v); | 1940 | expr(ls, v); |
1972 | lex_match(ls, ')', '(', line); | 1941 | lex_match(ls, ')', '(', line); |
1973 | expr_discharge(ls->fs, v); | 1942 | expr_discharge(ls->fs, v); |
1974 | } else if (ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) { | 1943 | } else if (ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) { |
1975 | var_lookup(ls, v); | 1944 | var_lookup(ls, v); |
1976 | } else { | 1945 | } else { |
1977 | err_syntax(ls, LJ_ERR_XSYMBOL); | 1946 | err_syntax(ls, LJ_ERR_XSYMBOL); |
1978 | } | 1947 | } |
1979 | for (;;) { /* Parse multiple expression suffixes. */ | 1948 | for (;;) { /* Parse multiple expression suffixes. */ |
1980 | if (ls->token == '.') { | 1949 | if (ls->tok == '.') { |
1981 | expr_field(ls, v); | 1950 | expr_field(ls, v); |
1982 | } else if (ls->token == '[') { | 1951 | } else if (ls->tok == '[') { |
1983 | ExpDesc key; | 1952 | ExpDesc key; |
1984 | expr_toanyreg(fs, v); | 1953 | expr_toanyreg(fs, v); |
1985 | expr_bracket(ls, &key); | 1954 | expr_bracket(ls, &key); |
1986 | expr_index(fs, v, &key); | 1955 | expr_index(fs, v, &key); |
1987 | } else if (ls->token == ':') { | 1956 | } else if (ls->tok == ':') { |
1988 | ExpDesc key; | 1957 | ExpDesc key; |
1989 | lj_lex_next(ls); | 1958 | lj_lex_next(ls); |
1990 | expr_str(ls, &key); | 1959 | expr_str(ls, &key); |
1991 | bcemit_method(fs, v, &key); | 1960 | bcemit_method(fs, v, &key); |
1992 | parse_args(ls, v); | 1961 | parse_args(ls, v); |
1993 | } else if (ls->token == '(' || ls->token == TK_string || ls->token == '{') { | 1962 | } else if (ls->tok == '(' || ls->tok == TK_string || ls->tok == '{') { |
1994 | expr_tonextreg(fs, v); | 1963 | expr_tonextreg(fs, v); |
1964 | if (LJ_FR2) bcreg_reserve(fs, 1); | ||
1995 | parse_args(ls, v); | 1965 | parse_args(ls, v); |
1996 | } else { | 1966 | } else { |
1997 | break; | 1967 | break; |
@@ -2002,14 +1972,14 @@ static void expr_primary(LexState *ls, ExpDesc *v) | |||
2002 | /* Parse simple expression. */ | 1972 | /* Parse simple expression. */ |
2003 | static void expr_simple(LexState *ls, ExpDesc *v) | 1973 | static void expr_simple(LexState *ls, ExpDesc *v) |
2004 | { | 1974 | { |
2005 | switch (ls->token) { | 1975 | switch (ls->tok) { |
2006 | case TK_number: | 1976 | case TK_number: |
2007 | expr_init(v, (LJ_HASFFI && tviscdata(&ls->tokenval)) ? VKCDATA : VKNUM, 0); | 1977 | expr_init(v, (LJ_HASFFI && tviscdata(&ls->tokval)) ? VKCDATA : VKNUM, 0); |
2008 | copyTV(ls->L, &v->u.nval, &ls->tokenval); | 1978 | copyTV(ls->L, &v->u.nval, &ls->tokval); |
2009 | break; | 1979 | break; |
2010 | case TK_string: | 1980 | case TK_string: |
2011 | expr_init(v, VKSTR, 0); | 1981 | expr_init(v, VKSTR, 0); |
2012 | v->u.sval = strV(&ls->tokenval); | 1982 | v->u.sval = strV(&ls->tokval); |
2013 | break; | 1983 | break; |
2014 | case TK_nil: | 1984 | case TK_nil: |
2015 | expr_init(v, VKNIL, 0); | 1985 | expr_init(v, VKNIL, 0); |
@@ -2097,11 +2067,11 @@ static BinOpr expr_binop(LexState *ls, ExpDesc *v, uint32_t limit); | |||
2097 | static void expr_unop(LexState *ls, ExpDesc *v) | 2067 | static void expr_unop(LexState *ls, ExpDesc *v) |
2098 | { | 2068 | { |
2099 | BCOp op; | 2069 | BCOp op; |
2100 | if (ls->token == TK_not) { | 2070 | if (ls->tok == TK_not) { |
2101 | op = BC_NOT; | 2071 | op = BC_NOT; |
2102 | } else if (ls->token == '-') { | 2072 | } else if (ls->tok == '-') { |
2103 | op = BC_UNM; | 2073 | op = BC_UNM; |
2104 | } else if (ls->token == '#') { | 2074 | } else if (ls->tok == '#') { |
2105 | op = BC_LEN; | 2075 | op = BC_LEN; |
2106 | } else { | 2076 | } else { |
2107 | expr_simple(ls, v); | 2077 | expr_simple(ls, v); |
@@ -2118,7 +2088,7 @@ static BinOpr expr_binop(LexState *ls, ExpDesc *v, uint32_t limit) | |||
2118 | BinOpr op; | 2088 | BinOpr op; |
2119 | synlevel_begin(ls); | 2089 | synlevel_begin(ls); |
2120 | expr_unop(ls, v); | 2090 | expr_unop(ls, v); |
2121 | op = token2binop(ls->token); | 2091 | op = token2binop(ls->tok); |
2122 | while (op != OPR_NOBINOPR && priority[op].left > limit) { | 2092 | while (op != OPR_NOBINOPR && priority[op].left > limit) { |
2123 | ExpDesc v2; | 2093 | ExpDesc v2; |
2124 | BinOpr nextop; | 2094 | BinOpr nextop; |
@@ -2307,9 +2277,9 @@ static void parse_func(LexState *ls, BCLine line) | |||
2307 | lj_lex_next(ls); /* Skip 'function'. */ | 2277 | lj_lex_next(ls); /* Skip 'function'. */ |
2308 | /* Parse function name. */ | 2278 | /* Parse function name. */ |
2309 | var_lookup(ls, &v); | 2279 | var_lookup(ls, &v); |
2310 | while (ls->token == '.') /* Multiple dot-separated fields. */ | 2280 | while (ls->tok == '.') /* Multiple dot-separated fields. */ |
2311 | expr_field(ls, &v); | 2281 | expr_field(ls, &v); |
2312 | if (ls->token == ':') { /* Optional colon to signify method call. */ | 2282 | if (ls->tok == ':') { /* Optional colon to signify method call. */ |
2313 | needself = 1; | 2283 | needself = 1; |
2314 | expr_field(ls, &v); | 2284 | expr_field(ls, &v); |
2315 | } | 2285 | } |
@@ -2322,9 +2292,9 @@ static void parse_func(LexState *ls, BCLine line) | |||
2322 | /* -- Control transfer statements ----------------------------------------- */ | 2292 | /* -- Control transfer statements ----------------------------------------- */ |
2323 | 2293 | ||
2324 | /* Check for end of block. */ | 2294 | /* Check for end of block. */ |
2325 | static int endofblock(LexToken token) | 2295 | static int parse_isend(LexToken tok) |
2326 | { | 2296 | { |
2327 | switch (token) { | 2297 | switch (tok) { |
2328 | case TK_else: case TK_elseif: case TK_end: case TK_until: case TK_eof: | 2298 | case TK_else: case TK_elseif: case TK_end: case TK_until: case TK_eof: |
2329 | return 1; | 2299 | return 1; |
2330 | default: | 2300 | default: |
@@ -2339,7 +2309,7 @@ static void parse_return(LexState *ls) | |||
2339 | FuncState *fs = ls->fs; | 2309 | FuncState *fs = ls->fs; |
2340 | lj_lex_next(ls); /* Skip 'return'. */ | 2310 | lj_lex_next(ls); /* Skip 'return'. */ |
2341 | fs->flags |= PROTO_HAS_RETURN; | 2311 | fs->flags |= PROTO_HAS_RETURN; |
2342 | if (endofblock(ls->token) || ls->token == ';') { /* Bare return. */ | 2312 | if (parse_isend(ls->tok) || ls->tok == ';') { /* Bare return. */ |
2343 | ins = BCINS_AD(BC_RET0, 0, 1); | 2313 | ins = BCINS_AD(BC_RET0, 0, 1); |
2344 | } else { /* Return with one or more values. */ | 2314 | } else { /* Return with one or more values. */ |
2345 | ExpDesc e; /* Receives the _last_ expression in the list. */ | 2315 | ExpDesc e; /* Receives the _last_ expression in the list. */ |
@@ -2405,18 +2375,18 @@ static void parse_label(LexState *ls) | |||
2405 | lex_check(ls, TK_label); | 2375 | lex_check(ls, TK_label); |
2406 | /* Recursively parse trailing statements: labels and ';' (Lua 5.2 only). */ | 2376 | /* Recursively parse trailing statements: labels and ';' (Lua 5.2 only). */ |
2407 | for (;;) { | 2377 | for (;;) { |
2408 | if (ls->token == TK_label) { | 2378 | if (ls->tok == TK_label) { |
2409 | synlevel_begin(ls); | 2379 | synlevel_begin(ls); |
2410 | parse_label(ls); | 2380 | parse_label(ls); |
2411 | synlevel_end(ls); | 2381 | synlevel_end(ls); |
2412 | } else if (LJ_52 && ls->token == ';') { | 2382 | } else if (LJ_52 && ls->tok == ';') { |
2413 | lj_lex_next(ls); | 2383 | lj_lex_next(ls); |
2414 | } else { | 2384 | } else { |
2415 | break; | 2385 | break; |
2416 | } | 2386 | } |
2417 | } | 2387 | } |
2418 | /* Trailing label is considered to be outside of scope. */ | 2388 | /* Trailing label is considered to be outside of scope. */ |
2419 | if (endofblock(ls->token) && ls->token != TK_until) | 2389 | if (parse_isend(ls->tok) && ls->tok != TK_until) |
2420 | ls->vstack[idx].slot = fs->bl->nactvar; | 2390 | ls->vstack[idx].slot = fs->bl->nactvar; |
2421 | gola_resolve(ls, fs->bl, idx); | 2391 | gola_resolve(ls, fs->bl, idx); |
2422 | } | 2392 | } |
@@ -2572,7 +2542,8 @@ static void parse_for_iter(LexState *ls, GCstr *indexname) | |||
2572 | lex_check(ls, TK_in); | 2542 | lex_check(ls, TK_in); |
2573 | line = ls->linenumber; | 2543 | line = ls->linenumber; |
2574 | assign_adjust(ls, 3, expr_list(ls, &e), &e); | 2544 | assign_adjust(ls, 3, expr_list(ls, &e), &e); |
2575 | bcreg_bump(fs, 3); /* The iterator needs another 3 slots (func + 2 args). */ | 2545 | /* The iterator needs another 3 [4] slots (func [pc] | state ctl). */ |
2546 | bcreg_bump(fs, 3+LJ_FR2); | ||
2576 | isnext = (nvars <= 5 && predict_next(ls, fs, exprpc)); | 2547 | isnext = (nvars <= 5 && predict_next(ls, fs, exprpc)); |
2577 | var_add(ls, 3); /* Hidden control variables. */ | 2548 | var_add(ls, 3); /* Hidden control variables. */ |
2578 | lex_check(ls, TK_do); | 2549 | lex_check(ls, TK_do); |
@@ -2600,9 +2571,9 @@ static void parse_for(LexState *ls, BCLine line) | |||
2600 | fscope_begin(fs, &bl, FSCOPE_LOOP); | 2571 | fscope_begin(fs, &bl, FSCOPE_LOOP); |
2601 | lj_lex_next(ls); /* Skip 'for'. */ | 2572 | lj_lex_next(ls); /* Skip 'for'. */ |
2602 | varname = lex_str(ls); /* Get first variable name. */ | 2573 | varname = lex_str(ls); /* Get first variable name. */ |
2603 | if (ls->token == '=') | 2574 | if (ls->tok == '=') |
2604 | parse_for_num(ls, varname, line); | 2575 | parse_for_num(ls, varname, line); |
2605 | else if (ls->token == ',' || ls->token == TK_in) | 2576 | else if (ls->tok == ',' || ls->tok == TK_in) |
2606 | parse_for_iter(ls, varname); | 2577 | parse_for_iter(ls, varname); |
2607 | else | 2578 | else |
2608 | err_syntax(ls, LJ_ERR_XFOR); | 2579 | err_syntax(ls, LJ_ERR_XFOR); |
@@ -2628,12 +2599,12 @@ static void parse_if(LexState *ls, BCLine line) | |||
2628 | BCPos flist; | 2599 | BCPos flist; |
2629 | BCPos escapelist = NO_JMP; | 2600 | BCPos escapelist = NO_JMP; |
2630 | flist = parse_then(ls); | 2601 | flist = parse_then(ls); |
2631 | while (ls->token == TK_elseif) { /* Parse multiple 'elseif' blocks. */ | 2602 | while (ls->tok == TK_elseif) { /* Parse multiple 'elseif' blocks. */ |
2632 | jmp_append(fs, &escapelist, bcemit_jmp(fs)); | 2603 | jmp_append(fs, &escapelist, bcemit_jmp(fs)); |
2633 | jmp_tohere(fs, flist); | 2604 | jmp_tohere(fs, flist); |
2634 | flist = parse_then(ls); | 2605 | flist = parse_then(ls); |
2635 | } | 2606 | } |
2636 | if (ls->token == TK_else) { /* Parse optional 'else' block. */ | 2607 | if (ls->tok == TK_else) { /* Parse optional 'else' block. */ |
2637 | jmp_append(fs, &escapelist, bcemit_jmp(fs)); | 2608 | jmp_append(fs, &escapelist, bcemit_jmp(fs)); |
2638 | jmp_tohere(fs, flist); | 2609 | jmp_tohere(fs, flist); |
2639 | lj_lex_next(ls); /* Skip 'else'. */ | 2610 | lj_lex_next(ls); /* Skip 'else'. */ |
@@ -2651,7 +2622,7 @@ static void parse_if(LexState *ls, BCLine line) | |||
2651 | static int parse_stmt(LexState *ls) | 2622 | static int parse_stmt(LexState *ls) |
2652 | { | 2623 | { |
2653 | BCLine line = ls->linenumber; | 2624 | BCLine line = ls->linenumber; |
2654 | switch (ls->token) { | 2625 | switch (ls->tok) { |
2655 | case TK_if: | 2626 | case TK_if: |
2656 | parse_if(ls, line); | 2627 | parse_if(ls, line); |
2657 | break; | 2628 | break; |
@@ -2710,7 +2681,7 @@ static void parse_chunk(LexState *ls) | |||
2710 | { | 2681 | { |
2711 | int islast = 0; | 2682 | int islast = 0; |
2712 | synlevel_begin(ls); | 2683 | synlevel_begin(ls); |
2713 | while (!islast && !endofblock(ls->token)) { | 2684 | while (!islast && !parse_isend(ls->tok)) { |
2714 | islast = parse_stmt(ls); | 2685 | islast = parse_stmt(ls); |
2715 | lex_opt(ls, ';'); | 2686 | lex_opt(ls, ';'); |
2716 | lua_assert(ls->fs->framesize >= ls->fs->freereg && | 2687 | lua_assert(ls->fs->framesize >= ls->fs->freereg && |
@@ -2745,7 +2716,7 @@ GCproto *lj_parse(LexState *ls) | |||
2745 | bcemit_AD(&fs, BC_FUNCV, 0, 0); /* Placeholder. */ | 2716 | bcemit_AD(&fs, BC_FUNCV, 0, 0); /* Placeholder. */ |
2746 | lj_lex_next(ls); /* Read-ahead first token. */ | 2717 | lj_lex_next(ls); /* Read-ahead first token. */ |
2747 | parse_chunk(ls); | 2718 | parse_chunk(ls); |
2748 | if (ls->token != TK_eof) | 2719 | if (ls->tok != TK_eof) |
2749 | err_token(ls, TK_eof); | 2720 | err_token(ls, TK_eof); |
2750 | pt = fs_finish(ls, ls->linenumber); | 2721 | pt = fs_finish(ls, ls->linenumber); |
2751 | L->top--; /* Drop chunkname. */ | 2722 | L->top--; /* Drop chunkname. */ |
diff --git a/src/lj_profile.c b/src/lj_profile.c new file mode 100644 index 00000000..2fe40858 --- /dev/null +++ b/src/lj_profile.c | |||
@@ -0,0 +1,368 @@ | |||
1 | /* | ||
2 | ** Low-overhead profiling. | ||
3 | ** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #define lj_profile_c | ||
7 | #define LUA_CORE | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | |||
11 | #if LJ_HASPROFILE | ||
12 | |||
13 | #include "lj_buf.h" | ||
14 | #include "lj_frame.h" | ||
15 | #include "lj_debug.h" | ||
16 | #include "lj_dispatch.h" | ||
17 | #if LJ_HASJIT | ||
18 | #include "lj_jit.h" | ||
19 | #include "lj_trace.h" | ||
20 | #endif | ||
21 | #include "lj_profile.h" | ||
22 | |||
23 | #include "luajit.h" | ||
24 | |||
25 | #if LJ_PROFILE_SIGPROF | ||
26 | |||
27 | #include <sys/time.h> | ||
28 | #include <signal.h> | ||
29 | #define profile_lock(ps) UNUSED(ps) | ||
30 | #define profile_unlock(ps) UNUSED(ps) | ||
31 | |||
32 | #elif LJ_PROFILE_PTHREAD | ||
33 | |||
34 | #include <pthread.h> | ||
35 | #include <time.h> | ||
36 | #if LJ_TARGET_PS3 | ||
37 | #include <sys/timer.h> | ||
38 | #endif | ||
39 | #define profile_lock(ps) pthread_mutex_lock(&ps->lock) | ||
40 | #define profile_unlock(ps) pthread_mutex_unlock(&ps->lock) | ||
41 | |||
42 | #elif LJ_PROFILE_WTHREAD | ||
43 | |||
44 | #define WIN32_LEAN_AND_MEAN | ||
45 | #if LJ_TARGET_XBOX360 | ||
46 | #include <xtl.h> | ||
47 | #include <xbox.h> | ||
48 | #else | ||
49 | #include <windows.h> | ||
50 | #endif | ||
51 | typedef unsigned int (WINAPI *WMM_TPFUNC)(unsigned int); | ||
52 | #define profile_lock(ps) EnterCriticalSection(&ps->lock) | ||
53 | #define profile_unlock(ps) LeaveCriticalSection(&ps->lock) | ||
54 | |||
55 | #endif | ||
56 | |||
57 | /* Profiler state. */ | ||
58 | typedef struct ProfileState { | ||
59 | global_State *g; /* VM state that started the profiler. */ | ||
60 | luaJIT_profile_callback cb; /* Profiler callback. */ | ||
61 | void *data; /* Profiler callback data. */ | ||
62 | SBuf sb; /* String buffer for stack dumps. */ | ||
63 | int interval; /* Sample interval in milliseconds. */ | ||
64 | int samples; /* Number of samples for next callback. */ | ||
65 | int vmstate; /* VM state when profile timer triggered. */ | ||
66 | #if LJ_PROFILE_SIGPROF | ||
67 | struct sigaction oldsa; /* Previous SIGPROF state. */ | ||
68 | #elif LJ_PROFILE_PTHREAD | ||
69 | pthread_mutex_t lock; /* g->hookmask update lock. */ | ||
70 | pthread_t thread; /* Timer thread. */ | ||
71 | int abort; /* Abort timer thread. */ | ||
72 | #elif LJ_PROFILE_WTHREAD | ||
73 | #if LJ_TARGET_WINDOWS | ||
74 | HINSTANCE wmm; /* WinMM library handle. */ | ||
75 | WMM_TPFUNC wmm_tbp; /* WinMM timeBeginPeriod function. */ | ||
76 | WMM_TPFUNC wmm_tep; /* WinMM timeEndPeriod function. */ | ||
77 | #endif | ||
78 | CRITICAL_SECTION lock; /* g->hookmask update lock. */ | ||
79 | HANDLE thread; /* Timer thread. */ | ||
80 | int abort; /* Abort timer thread. */ | ||
81 | #endif | ||
82 | } ProfileState; | ||
83 | |||
84 | /* Sadly, we have to use a static profiler state. | ||
85 | ** | ||
86 | ** The SIGPROF variant needs a static pointer to the global state, anyway. | ||
87 | ** And it would be hard to extend for multiple threads. You can still use | ||
88 | ** multiple VMs in multiple threads, but only profile one at a time. | ||
89 | */ | ||
90 | static ProfileState profile_state; | ||
91 | |||
92 | /* Default sample interval in milliseconds. */ | ||
93 | #define LJ_PROFILE_INTERVAL_DEFAULT 10 | ||
94 | |||
95 | /* -- Profiler/hook interaction ------------------------------------------- */ | ||
96 | |||
97 | #if !LJ_PROFILE_SIGPROF | ||
98 | void LJ_FASTCALL lj_profile_hook_enter(global_State *g) | ||
99 | { | ||
100 | ProfileState *ps = &profile_state; | ||
101 | if (ps->g) { | ||
102 | profile_lock(ps); | ||
103 | hook_enter(g); | ||
104 | profile_unlock(ps); | ||
105 | } else { | ||
106 | hook_enter(g); | ||
107 | } | ||
108 | } | ||
109 | |||
110 | void LJ_FASTCALL lj_profile_hook_leave(global_State *g) | ||
111 | { | ||
112 | ProfileState *ps = &profile_state; | ||
113 | if (ps->g) { | ||
114 | profile_lock(ps); | ||
115 | hook_leave(g); | ||
116 | profile_unlock(ps); | ||
117 | } else { | ||
118 | hook_leave(g); | ||
119 | } | ||
120 | } | ||
121 | #endif | ||
122 | |||
123 | /* -- Profile callbacks --------------------------------------------------- */ | ||
124 | |||
125 | /* Callback from profile hook (HOOK_PROFILE already cleared). */ | ||
126 | void LJ_FASTCALL lj_profile_interpreter(lua_State *L) | ||
127 | { | ||
128 | ProfileState *ps = &profile_state; | ||
129 | global_State *g = G(L); | ||
130 | uint8_t mask; | ||
131 | profile_lock(ps); | ||
132 | mask = (g->hookmask & ~HOOK_PROFILE); | ||
133 | if (!(mask & HOOK_VMEVENT)) { | ||
134 | int samples = ps->samples; | ||
135 | ps->samples = 0; | ||
136 | g->hookmask = HOOK_VMEVENT; | ||
137 | lj_dispatch_update(g); | ||
138 | profile_unlock(ps); | ||
139 | ps->cb(ps->data, L, samples, ps->vmstate); /* Invoke user callback. */ | ||
140 | profile_lock(ps); | ||
141 | mask |= (g->hookmask & HOOK_PROFILE); | ||
142 | } | ||
143 | g->hookmask = mask; | ||
144 | lj_dispatch_update(g); | ||
145 | profile_unlock(ps); | ||
146 | } | ||
147 | |||
148 | /* Trigger profile hook. Asynchronous call from OS-specific profile timer. */ | ||
149 | static void profile_trigger(ProfileState *ps) | ||
150 | { | ||
151 | global_State *g = ps->g; | ||
152 | uint8_t mask; | ||
153 | profile_lock(ps); | ||
154 | ps->samples++; /* Always increment number of samples. */ | ||
155 | mask = g->hookmask; | ||
156 | if (!(mask & (HOOK_PROFILE|HOOK_VMEVENT|HOOK_GC))) { /* Set profile hook. */ | ||
157 | int st = g->vmstate; | ||
158 | ps->vmstate = st >= 0 ? 'N' : | ||
159 | st == ~LJ_VMST_INTERP ? 'I' : | ||
160 | st == ~LJ_VMST_C ? 'C' : | ||
161 | st == ~LJ_VMST_GC ? 'G' : 'J'; | ||
162 | g->hookmask = (mask | HOOK_PROFILE); | ||
163 | lj_dispatch_update(g); | ||
164 | } | ||
165 | profile_unlock(ps); | ||
166 | } | ||
167 | |||
168 | /* -- OS-specific profile timer handling ---------------------------------- */ | ||
169 | |||
170 | #if LJ_PROFILE_SIGPROF | ||
171 | |||
172 | /* SIGPROF handler. */ | ||
173 | static void profile_signal(int sig) | ||
174 | { | ||
175 | UNUSED(sig); | ||
176 | profile_trigger(&profile_state); | ||
177 | } | ||
178 | |||
179 | /* Start profiling timer. */ | ||
180 | static void profile_timer_start(ProfileState *ps) | ||
181 | { | ||
182 | int interval = ps->interval; | ||
183 | struct itimerval tm; | ||
184 | struct sigaction sa; | ||
185 | tm.it_value.tv_sec = tm.it_interval.tv_sec = interval / 1000; | ||
186 | tm.it_value.tv_usec = tm.it_interval.tv_usec = (interval % 1000) * 1000; | ||
187 | setitimer(ITIMER_PROF, &tm, NULL); | ||
188 | sa.sa_flags = SA_RESTART; | ||
189 | sa.sa_handler = profile_signal; | ||
190 | sigemptyset(&sa.sa_mask); | ||
191 | sigaction(SIGPROF, &sa, &ps->oldsa); | ||
192 | } | ||
193 | |||
194 | /* Stop profiling timer. */ | ||
195 | static void profile_timer_stop(ProfileState *ps) | ||
196 | { | ||
197 | struct itimerval tm; | ||
198 | tm.it_value.tv_sec = tm.it_interval.tv_sec = 0; | ||
199 | tm.it_value.tv_usec = tm.it_interval.tv_usec = 0; | ||
200 | setitimer(ITIMER_PROF, &tm, NULL); | ||
201 | sigaction(SIGPROF, &ps->oldsa, NULL); | ||
202 | } | ||
203 | |||
204 | #elif LJ_PROFILE_PTHREAD | ||
205 | |||
206 | /* POSIX timer thread. */ | ||
207 | static void *profile_thread(ProfileState *ps) | ||
208 | { | ||
209 | int interval = ps->interval; | ||
210 | #if !LJ_TARGET_PS3 | ||
211 | struct timespec ts; | ||
212 | ts.tv_sec = interval / 1000; | ||
213 | ts.tv_nsec = (interval % 1000) * 1000000; | ||
214 | #endif | ||
215 | while (1) { | ||
216 | #if LJ_TARGET_PS3 | ||
217 | sys_timer_usleep(interval * 1000); | ||
218 | #else | ||
219 | nanosleep(&ts, NULL); | ||
220 | #endif | ||
221 | if (ps->abort) break; | ||
222 | profile_trigger(ps); | ||
223 | } | ||
224 | return NULL; | ||
225 | } | ||
226 | |||
227 | /* Start profiling timer thread. */ | ||
228 | static void profile_timer_start(ProfileState *ps) | ||
229 | { | ||
230 | pthread_mutex_init(&ps->lock, 0); | ||
231 | ps->abort = 0; | ||
232 | pthread_create(&ps->thread, NULL, (void *(*)(void *))profile_thread, ps); | ||
233 | } | ||
234 | |||
235 | /* Stop profiling timer thread. */ | ||
236 | static void profile_timer_stop(ProfileState *ps) | ||
237 | { | ||
238 | ps->abort = 1; | ||
239 | pthread_join(ps->thread, NULL); | ||
240 | pthread_mutex_destroy(&ps->lock); | ||
241 | } | ||
242 | |||
243 | #elif LJ_PROFILE_WTHREAD | ||
244 | |||
245 | /* Windows timer thread. */ | ||
246 | static DWORD WINAPI profile_thread(void *psx) | ||
247 | { | ||
248 | ProfileState *ps = (ProfileState *)psx; | ||
249 | int interval = ps->interval; | ||
250 | #if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP | ||
251 | ps->wmm_tbp(interval); | ||
252 | #endif | ||
253 | while (1) { | ||
254 | Sleep(interval); | ||
255 | if (ps->abort) break; | ||
256 | profile_trigger(ps); | ||
257 | } | ||
258 | #if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP | ||
259 | ps->wmm_tep(interval); | ||
260 | #endif | ||
261 | return 0; | ||
262 | } | ||
263 | |||
264 | /* Start profiling timer thread. */ | ||
265 | static void profile_timer_start(ProfileState *ps) | ||
266 | { | ||
267 | #if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP | ||
268 | if (!ps->wmm) { /* Load WinMM library on-demand. */ | ||
269 | ps->wmm = LJ_WIN_LOADLIBA("winmm.dll"); | ||
270 | if (ps->wmm) { | ||
271 | ps->wmm_tbp = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeBeginPeriod"); | ||
272 | ps->wmm_tep = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeEndPeriod"); | ||
273 | if (!ps->wmm_tbp || !ps->wmm_tep) { | ||
274 | ps->wmm = NULL; | ||
275 | return; | ||
276 | } | ||
277 | } | ||
278 | } | ||
279 | #endif | ||
280 | InitializeCriticalSection(&ps->lock); | ||
281 | ps->abort = 0; | ||
282 | ps->thread = CreateThread(NULL, 0, profile_thread, ps, 0, NULL); | ||
283 | } | ||
284 | |||
285 | /* Stop profiling timer thread. */ | ||
286 | static void profile_timer_stop(ProfileState *ps) | ||
287 | { | ||
288 | ps->abort = 1; | ||
289 | WaitForSingleObject(ps->thread, INFINITE); | ||
290 | DeleteCriticalSection(&ps->lock); | ||
291 | } | ||
292 | |||
293 | #endif | ||
294 | |||
295 | /* -- Public profiling API ------------------------------------------------ */ | ||
296 | |||
297 | /* Start profiling. */ | ||
298 | LUA_API void luaJIT_profile_start(lua_State *L, const char *mode, | ||
299 | luaJIT_profile_callback cb, void *data) | ||
300 | { | ||
301 | ProfileState *ps = &profile_state; | ||
302 | int interval = LJ_PROFILE_INTERVAL_DEFAULT; | ||
303 | while (*mode) { | ||
304 | int m = *mode++; | ||
305 | switch (m) { | ||
306 | case 'i': | ||
307 | interval = 0; | ||
308 | while (*mode >= '0' && *mode <= '9') | ||
309 | interval = interval * 10 + (*mode++ - '0'); | ||
310 | if (interval <= 0) interval = 1; | ||
311 | break; | ||
312 | #if LJ_HASJIT | ||
313 | case 'l': case 'f': | ||
314 | L2J(L)->prof_mode = m; | ||
315 | lj_trace_flushall(L); | ||
316 | break; | ||
317 | #endif | ||
318 | default: /* Ignore unknown mode chars. */ | ||
319 | break; | ||
320 | } | ||
321 | } | ||
322 | if (ps->g) { | ||
323 | luaJIT_profile_stop(L); | ||
324 | if (ps->g) return; /* Profiler in use by another VM. */ | ||
325 | } | ||
326 | ps->g = G(L); | ||
327 | ps->interval = interval; | ||
328 | ps->cb = cb; | ||
329 | ps->data = data; | ||
330 | ps->samples = 0; | ||
331 | lj_buf_init(L, &ps->sb); | ||
332 | profile_timer_start(ps); | ||
333 | } | ||
334 | |||
335 | /* Stop profiling. */ | ||
336 | LUA_API void luaJIT_profile_stop(lua_State *L) | ||
337 | { | ||
338 | ProfileState *ps = &profile_state; | ||
339 | global_State *g = ps->g; | ||
340 | if (G(L) == g) { /* Only stop profiler if started by this VM. */ | ||
341 | profile_timer_stop(ps); | ||
342 | g->hookmask &= ~HOOK_PROFILE; | ||
343 | lj_dispatch_update(g); | ||
344 | #if LJ_HASJIT | ||
345 | G2J(g)->prof_mode = 0; | ||
346 | lj_trace_flushall(L); | ||
347 | #endif | ||
348 | lj_buf_free(g, &ps->sb); | ||
349 | setmref(ps->sb.b, NULL); | ||
350 | setmref(ps->sb.e, NULL); | ||
351 | ps->g = NULL; | ||
352 | } | ||
353 | } | ||
354 | |||
355 | /* Return a compact stack dump. */ | ||
356 | LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt, | ||
357 | int depth, size_t *len) | ||
358 | { | ||
359 | ProfileState *ps = &profile_state; | ||
360 | SBuf *sb = &ps->sb; | ||
361 | setsbufL(sb, L); | ||
362 | lj_buf_reset(sb); | ||
363 | lj_debug_dumpstack(L, sb, fmt, depth); | ||
364 | *len = (size_t)sbuflen(sb); | ||
365 | return sbufB(sb); | ||
366 | } | ||
367 | |||
368 | #endif | ||
diff --git a/src/lj_profile.h b/src/lj_profile.h new file mode 100644 index 00000000..db69eb9e --- /dev/null +++ b/src/lj_profile.h | |||
@@ -0,0 +1,21 @@ | |||
1 | /* | ||
2 | ** Low-overhead profiling. | ||
3 | ** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_PROFILE_H | ||
7 | #define _LJ_PROFILE_H | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | |||
11 | #if LJ_HASPROFILE | ||
12 | |||
13 | LJ_FUNC void LJ_FASTCALL lj_profile_interpreter(lua_State *L); | ||
14 | #if !LJ_PROFILE_SIGPROF | ||
15 | LJ_FUNC void LJ_FASTCALL lj_profile_hook_enter(global_State *g); | ||
16 | LJ_FUNC void LJ_FASTCALL lj_profile_hook_leave(global_State *g); | ||
17 | #endif | ||
18 | |||
19 | #endif | ||
20 | |||
21 | #endif | ||
diff --git a/src/lj_record.c b/src/lj_record.c index 69822f54..8eec0071 100644 --- a/src/lj_record.c +++ b/src/lj_record.c | |||
@@ -20,6 +20,9 @@ | |||
20 | #endif | 20 | #endif |
21 | #include "lj_bc.h" | 21 | #include "lj_bc.h" |
22 | #include "lj_ff.h" | 22 | #include "lj_ff.h" |
23 | #if LJ_HASPROFILE | ||
24 | #include "lj_debug.h" | ||
25 | #endif | ||
23 | #include "lj_ir.h" | 26 | #include "lj_ir.h" |
24 | #include "lj_jit.h" | 27 | #include "lj_jit.h" |
25 | #include "lj_ircall.h" | 28 | #include "lj_ircall.h" |
@@ -48,7 +51,7 @@ static void rec_check_ir(jit_State *J) | |||
48 | { | 51 | { |
49 | IRRef i, nins = J->cur.nins, nk = J->cur.nk; | 52 | IRRef i, nins = J->cur.nins, nk = J->cur.nk; |
50 | lua_assert(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536); | 53 | lua_assert(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536); |
51 | for (i = nins-1; i >= nk; i--) { | 54 | for (i = nk; i < nins; i++) { |
52 | IRIns *ir = IR(i); | 55 | IRIns *ir = IR(i); |
53 | uint32_t mode = lj_ir_mode[ir->o]; | 56 | uint32_t mode = lj_ir_mode[ir->o]; |
54 | IRRef op1 = ir->op1; | 57 | IRRef op1 = ir->op1; |
@@ -58,7 +61,10 @@ static void rec_check_ir(jit_State *J) | |||
58 | case IRMref: lua_assert(op1 >= nk); | 61 | case IRMref: lua_assert(op1 >= nk); |
59 | lua_assert(i >= REF_BIAS ? op1 < i : op1 > i); break; | 62 | lua_assert(i >= REF_BIAS ? op1 < i : op1 > i); break; |
60 | case IRMlit: break; | 63 | case IRMlit: break; |
61 | case IRMcst: lua_assert(i < REF_BIAS); continue; | 64 | case IRMcst: lua_assert(i < REF_BIAS); |
65 | if (irt_is64(ir->t) && ir->o != IR_KNULL) | ||
66 | i++; | ||
67 | continue; | ||
62 | } | 68 | } |
63 | switch (irm_op2(mode)) { | 69 | switch (irm_op2(mode)) { |
64 | case IRMnone: lua_assert(op2 == 0); break; | 70 | case IRMnone: lua_assert(op2 == 0); break; |
@@ -81,30 +87,48 @@ static void rec_check_slots(jit_State *J) | |||
81 | BCReg s, nslots = J->baseslot + J->maxslot; | 87 | BCReg s, nslots = J->baseslot + J->maxslot; |
82 | int32_t depth = 0; | 88 | int32_t depth = 0; |
83 | cTValue *base = J->L->base - J->baseslot; | 89 | cTValue *base = J->L->base - J->baseslot; |
84 | lua_assert(J->baseslot >= 1); | 90 | lua_assert(J->baseslot >= 1+LJ_FR2); |
85 | lua_assert(J->baseslot == 1 || (J->slot[J->baseslot-1] & TREF_FRAME)); | 91 | lua_assert(J->baseslot == 1+LJ_FR2 || (J->slot[J->baseslot-1] & TREF_FRAME)); |
86 | lua_assert(nslots <= LJ_MAX_JSLOTS); | 92 | lua_assert(nslots <= LJ_MAX_JSLOTS); |
87 | for (s = 0; s < nslots; s++) { | 93 | for (s = 0; s < nslots; s++) { |
88 | TRef tr = J->slot[s]; | 94 | TRef tr = J->slot[s]; |
89 | if (tr) { | 95 | if (tr) { |
90 | cTValue *tv = &base[s]; | 96 | cTValue *tv = &base[s]; |
91 | IRRef ref = tref_ref(tr); | 97 | IRRef ref = tref_ref(tr); |
92 | IRIns *ir; | 98 | IRIns *ir = NULL; /* Silence compiler. */ |
93 | lua_assert(ref >= J->cur.nk && ref < J->cur.nins); | 99 | if (!LJ_FR2 || ref || !(tr & (TREF_FRAME | TREF_CONT))) { |
94 | ir = IR(ref); | 100 | lua_assert(ref >= J->cur.nk && ref < J->cur.nins); |
95 | lua_assert(irt_t(ir->t) == tref_t(tr)); | 101 | ir = IR(ref); |
102 | lua_assert(irt_t(ir->t) == tref_t(tr)); | ||
103 | } | ||
96 | if (s == 0) { | 104 | if (s == 0) { |
97 | lua_assert(tref_isfunc(tr)); | 105 | lua_assert(tref_isfunc(tr)); |
106 | #if LJ_FR2 | ||
107 | } else if (s == 1) { | ||
108 | lua_assert((tr & ~TREF_FRAME) == 0); | ||
109 | #endif | ||
98 | } else if ((tr & TREF_FRAME)) { | 110 | } else if ((tr & TREF_FRAME)) { |
99 | GCfunc *fn = gco2func(frame_gc(tv)); | 111 | GCfunc *fn = gco2func(frame_gc(tv)); |
100 | BCReg delta = (BCReg)(tv - frame_prev(tv)); | 112 | BCReg delta = (BCReg)(tv - frame_prev(tv)); |
113 | #if LJ_FR2 | ||
114 | if (ref) | ||
115 | lua_assert(ir_knum(ir)->u64 == tv->u64); | ||
116 | tr = J->slot[s-1]; | ||
117 | ir = IR(tref_ref(tr)); | ||
118 | #endif | ||
101 | lua_assert(tref_isfunc(tr)); | 119 | lua_assert(tref_isfunc(tr)); |
102 | if (tref_isk(tr)) lua_assert(fn == ir_kfunc(ir)); | 120 | if (tref_isk(tr)) lua_assert(fn == ir_kfunc(ir)); |
103 | lua_assert(s > delta ? (J->slot[s-delta] & TREF_FRAME) : (s == delta)); | 121 | lua_assert(s > delta + LJ_FR2 ? (J->slot[s-delta] & TREF_FRAME) |
122 | : (s == delta + LJ_FR2)); | ||
104 | depth++; | 123 | depth++; |
105 | } else if ((tr & TREF_CONT)) { | 124 | } else if ((tr & TREF_CONT)) { |
125 | #if LJ_FR2 | ||
126 | if (ref) | ||
127 | lua_assert(ir_knum(ir)->u64 == tv->u64); | ||
128 | #else | ||
106 | lua_assert(ir_kptr(ir) == gcrefp(tv->gcr, void)); | 129 | lua_assert(ir_kptr(ir) == gcrefp(tv->gcr, void)); |
107 | lua_assert((J->slot[s+1] & TREF_FRAME)); | 130 | #endif |
131 | lua_assert((J->slot[s+1+LJ_FR2] & TREF_FRAME)); | ||
108 | depth++; | 132 | depth++; |
109 | } else { | 133 | } else { |
110 | if (tvisnumber(tv)) | 134 | if (tvisnumber(tv)) |
@@ -156,10 +180,10 @@ static TRef sload(jit_State *J, int32_t slot) | |||
156 | /* Get TRef for current function. */ | 180 | /* Get TRef for current function. */ |
157 | static TRef getcurrf(jit_State *J) | 181 | static TRef getcurrf(jit_State *J) |
158 | { | 182 | { |
159 | if (J->base[-1]) | 183 | if (J->base[-1-LJ_FR2]) |
160 | return J->base[-1]; | 184 | return J->base[-1-LJ_FR2]; |
161 | lua_assert(J->baseslot == 1); | 185 | lua_assert(J->baseslot == 1+LJ_FR2); |
162 | return sloadt(J, -1, IRT_FUNC, IRSLOAD_READONLY); | 186 | return sloadt(J, -1-LJ_FR2, IRT_FUNC, IRSLOAD_READONLY); |
163 | } | 187 | } |
164 | 188 | ||
165 | /* Compare for raw object equality. | 189 | /* Compare for raw object equality. |
@@ -230,8 +254,12 @@ static void canonicalize_slots(jit_State *J) | |||
230 | } | 254 | } |
231 | 255 | ||
232 | /* Stop recording. */ | 256 | /* Stop recording. */ |
233 | static void rec_stop(jit_State *J, TraceLink linktype, TraceNo lnk) | 257 | void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk) |
234 | { | 258 | { |
259 | #ifdef LUAJIT_ENABLE_TABLE_BUMP | ||
260 | if (J->retryrec) | ||
261 | lj_trace_err(J, LJ_TRERR_RETRY); | ||
262 | #endif | ||
235 | lj_trace_end(J); | 263 | lj_trace_end(J); |
236 | J->cur.linktype = (uint8_t)linktype; | 264 | J->cur.linktype = (uint8_t)linktype; |
237 | J->cur.link = (uint16_t)lnk; | 265 | J->cur.link = (uint16_t)lnk; |
@@ -499,8 +527,7 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) | |||
499 | static LoopEvent rec_iterl(jit_State *J, const BCIns iterins) | 527 | static LoopEvent rec_iterl(jit_State *J, const BCIns iterins) |
500 | { | 528 | { |
501 | BCReg ra = bc_a(iterins); | 529 | BCReg ra = bc_a(iterins); |
502 | lua_assert(J->base[ra] != 0); | 530 | if (!tref_isnil(getslot(J, ra))) { /* Looping back? */ |
503 | if (!tref_isnil(J->base[ra])) { /* Looping back? */ | ||
504 | J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */ | 531 | J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */ |
505 | J->maxslot = ra-1+bc_b(J->pc[-1]); | 532 | J->maxslot = ra-1+bc_b(J->pc[-1]); |
506 | J->pc += bc_j(iterins)+1; | 533 | J->pc += bc_j(iterins)+1; |
@@ -538,12 +565,12 @@ static int innerloopleft(jit_State *J, const BCIns *pc) | |||
538 | /* Handle the case when an interpreted loop op is hit. */ | 565 | /* Handle the case when an interpreted loop op is hit. */ |
539 | static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev) | 566 | static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev) |
540 | { | 567 | { |
541 | if (J->parent == 0) { | 568 | if (J->parent == 0 && J->exitno == 0) { |
542 | if (pc == J->startpc && J->framedepth + J->retdepth == 0) { | 569 | if (pc == J->startpc && J->framedepth + J->retdepth == 0) { |
543 | /* Same loop? */ | 570 | /* Same loop? */ |
544 | if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */ | 571 | if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */ |
545 | lj_trace_err(J, LJ_TRERR_LLEAVE); | 572 | lj_trace_err(J, LJ_TRERR_LLEAVE); |
546 | rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping root trace. */ | 573 | lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping trace. */ |
547 | } else if (ev != LOOPEV_LEAVE) { /* Entering inner loop? */ | 574 | } else if (ev != LOOPEV_LEAVE) { /* Entering inner loop? */ |
548 | /* It's usually better to abort here and wait until the inner loop | 575 | /* It's usually better to abort here and wait until the inner loop |
549 | ** is traced. But if the inner loop repeatedly didn't loop back, | 576 | ** is traced. But if the inner loop repeatedly didn't loop back, |
@@ -568,18 +595,64 @@ static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev) | |||
568 | /* Handle the case when an already compiled loop op is hit. */ | 595 | /* Handle the case when an already compiled loop op is hit. */ |
569 | static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev) | 596 | static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev) |
570 | { | 597 | { |
571 | if (J->parent == 0) { /* Root trace hit an inner loop. */ | 598 | if (J->parent == 0 && J->exitno == 0) { /* Root trace hit an inner loop. */ |
572 | /* Better let the inner loop spawn a side trace back here. */ | 599 | /* Better let the inner loop spawn a side trace back here. */ |
573 | lj_trace_err(J, LJ_TRERR_LINNER); | 600 | lj_trace_err(J, LJ_TRERR_LINNER); |
574 | } else if (ev != LOOPEV_LEAVE) { /* Side trace enters a compiled loop. */ | 601 | } else if (ev != LOOPEV_LEAVE) { /* Side trace enters a compiled loop. */ |
575 | J->instunroll = 0; /* Cannot continue across a compiled loop op. */ | 602 | J->instunroll = 0; /* Cannot continue across a compiled loop op. */ |
576 | if (J->pc == J->startpc && J->framedepth + J->retdepth == 0) | 603 | if (J->pc == J->startpc && J->framedepth + J->retdepth == 0) |
577 | rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Form an extra loop. */ | 604 | lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Form extra loop. */ |
578 | else | 605 | else |
579 | rec_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the loop. */ | 606 | lj_record_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the loop. */ |
580 | } /* Side trace continues across a loop that's left or not entered. */ | 607 | } /* Side trace continues across a loop that's left or not entered. */ |
581 | } | 608 | } |
582 | 609 | ||
610 | /* -- Record profiler hook checks ----------------------------------------- */ | ||
611 | |||
612 | #if LJ_HASPROFILE | ||
613 | |||
614 | /* Need to insert profiler hook check? */ | ||
615 | static int rec_profile_need(jit_State *J, GCproto *pt, const BCIns *pc) | ||
616 | { | ||
617 | GCproto *ppt; | ||
618 | lua_assert(J->prof_mode == 'f' || J->prof_mode == 'l'); | ||
619 | if (!pt) | ||
620 | return 0; | ||
621 | ppt = J->prev_pt; | ||
622 | J->prev_pt = pt; | ||
623 | if (pt != ppt && ppt) { | ||
624 | J->prev_line = -1; | ||
625 | return 1; | ||
626 | } | ||
627 | if (J->prof_mode == 'l') { | ||
628 | BCLine line = lj_debug_line(pt, proto_bcpos(pt, pc)); | ||
629 | BCLine pline = J->prev_line; | ||
630 | J->prev_line = line; | ||
631 | if (pline != line) | ||
632 | return 1; | ||
633 | } | ||
634 | return 0; | ||
635 | } | ||
636 | |||
637 | static void rec_profile_ins(jit_State *J, const BCIns *pc) | ||
638 | { | ||
639 | if (J->prof_mode && rec_profile_need(J, J->pt, pc)) { | ||
640 | emitir(IRTG(IR_PROF, IRT_NIL), 0, 0); | ||
641 | lj_snap_add(J); | ||
642 | } | ||
643 | } | ||
644 | |||
645 | static void rec_profile_ret(jit_State *J) | ||
646 | { | ||
647 | if (J->prof_mode == 'f') { | ||
648 | emitir(IRTG(IR_PROF, IRT_NIL), 0, 0); | ||
649 | J->prev_pt = NULL; | ||
650 | lj_snap_add(J); | ||
651 | } | ||
652 | } | ||
653 | |||
654 | #endif | ||
655 | |||
583 | /* -- Record calls and returns -------------------------------------------- */ | 656 | /* -- Record calls and returns -------------------------------------------- */ |
584 | 657 | ||
585 | /* Specialize to the runtime value of the called function or its prototype. */ | 658 | /* Specialize to the runtime value of the called function or its prototype. */ |
@@ -590,11 +663,26 @@ static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr) | |||
590 | GCproto *pt = funcproto(fn); | 663 | GCproto *pt = funcproto(fn); |
591 | /* Too many closures created? Probably not a monomorphic function. */ | 664 | /* Too many closures created? Probably not a monomorphic function. */ |
592 | if (pt->flags >= PROTO_CLC_POLY) { /* Specialize to prototype instead. */ | 665 | if (pt->flags >= PROTO_CLC_POLY) { /* Specialize to prototype instead. */ |
593 | TRef trpt = emitir(IRT(IR_FLOAD, IRT_P32), tr, IRFL_FUNC_PC); | 666 | TRef trpt = emitir(IRT(IR_FLOAD, IRT_PGC), tr, IRFL_FUNC_PC); |
594 | emitir(IRTG(IR_EQ, IRT_P32), trpt, lj_ir_kptr(J, proto_bc(pt))); | 667 | emitir(IRTG(IR_EQ, IRT_PGC), trpt, lj_ir_kptr(J, proto_bc(pt))); |
595 | (void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */ | 668 | (void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */ |
596 | return tr; | 669 | return tr; |
597 | } | 670 | } |
671 | } else { | ||
672 | /* Don't specialize to non-monomorphic builtins. */ | ||
673 | switch (fn->c.ffid) { | ||
674 | case FF_coroutine_wrap_aux: | ||
675 | case FF_string_gmatch_aux: | ||
676 | /* NYI: io_file_iter doesn't have an ffid, yet. */ | ||
677 | { /* Specialize to the ffid. */ | ||
678 | TRef trid = emitir(IRT(IR_FLOAD, IRT_U8), tr, IRFL_FUNC_FFID); | ||
679 | emitir(IRTG(IR_EQ, IRT_INT), trid, lj_ir_kint(J, fn->c.ffid)); | ||
680 | } | ||
681 | return tr; | ||
682 | default: | ||
683 | /* NYI: don't specialize to non-monomorphic C functions. */ | ||
684 | break; | ||
685 | } | ||
598 | } | 686 | } |
599 | /* Otherwise specialize to the function (closure) value itself. */ | 687 | /* Otherwise specialize to the function (closure) value itself. */ |
600 | kfunc = lj_ir_kfunc(J, fn); | 688 | kfunc = lj_ir_kfunc(J, fn); |
@@ -607,21 +695,31 @@ static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs) | |||
607 | { | 695 | { |
608 | RecordIndex ix; | 696 | RecordIndex ix; |
609 | TValue *functv = &J->L->base[func]; | 697 | TValue *functv = &J->L->base[func]; |
610 | TRef *fbase = &J->base[func]; | 698 | TRef kfunc, *fbase = &J->base[func]; |
611 | ptrdiff_t i; | 699 | ptrdiff_t i; |
612 | for (i = 0; i <= nargs; i++) | 700 | (void)getslot(J, func); /* Ensure func has a reference. */ |
613 | (void)getslot(J, func+i); /* Ensure func and all args have a reference. */ | 701 | for (i = 1; i <= nargs; i++) |
702 | (void)getslot(J, func+LJ_FR2+i); /* Ensure all args have a reference. */ | ||
614 | if (!tref_isfunc(fbase[0])) { /* Resolve __call metamethod. */ | 703 | if (!tref_isfunc(fbase[0])) { /* Resolve __call metamethod. */ |
615 | ix.tab = fbase[0]; | 704 | ix.tab = fbase[0]; |
616 | copyTV(J->L, &ix.tabv, functv); | 705 | copyTV(J->L, &ix.tabv, functv); |
617 | if (!lj_record_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj)) | 706 | if (!lj_record_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj)) |
618 | lj_trace_err(J, LJ_TRERR_NOMM); | 707 | lj_trace_err(J, LJ_TRERR_NOMM); |
619 | for (i = ++nargs; i > 0; i--) /* Shift arguments up. */ | 708 | for (i = ++nargs; i > LJ_FR2; i--) /* Shift arguments up. */ |
620 | fbase[i] = fbase[i-1]; | 709 | fbase[i+LJ_FR2] = fbase[i+LJ_FR2-1]; |
710 | #if LJ_FR2 | ||
711 | fbase[2] = fbase[0]; | ||
712 | #endif | ||
621 | fbase[0] = ix.mobj; /* Replace function. */ | 713 | fbase[0] = ix.mobj; /* Replace function. */ |
622 | functv = &ix.mobjv; | 714 | functv = &ix.mobjv; |
623 | } | 715 | } |
624 | fbase[0] = TREF_FRAME | rec_call_specialize(J, funcV(functv), fbase[0]); | 716 | kfunc = rec_call_specialize(J, funcV(functv), fbase[0]); |
717 | #if LJ_FR2 | ||
718 | fbase[0] = kfunc; | ||
719 | fbase[1] = TREF_FRAME; | ||
720 | #else | ||
721 | fbase[0] = kfunc | TREF_FRAME; | ||
722 | #endif | ||
625 | J->maxslot = (BCReg)nargs; | 723 | J->maxslot = (BCReg)nargs; |
626 | } | 724 | } |
627 | 725 | ||
@@ -631,8 +729,8 @@ void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs) | |||
631 | rec_call_setup(J, func, nargs); | 729 | rec_call_setup(J, func, nargs); |
632 | /* Bump frame. */ | 730 | /* Bump frame. */ |
633 | J->framedepth++; | 731 | J->framedepth++; |
634 | J->base += func+1; | 732 | J->base += func+1+LJ_FR2; |
635 | J->baseslot += func+1; | 733 | J->baseslot += func+1+LJ_FR2; |
636 | if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS) | 734 | if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS) |
637 | lj_trace_err(J, LJ_TRERR_STACKOV); | 735 | lj_trace_err(J, LJ_TRERR_STACKOV); |
638 | } | 736 | } |
@@ -650,7 +748,9 @@ void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs) | |||
650 | func += cbase; | 748 | func += cbase; |
651 | } | 749 | } |
652 | /* Move func + args down. */ | 750 | /* Move func + args down. */ |
653 | memmove(&J->base[-1], &J->base[func], sizeof(TRef)*(J->maxslot+1)); | 751 | if (LJ_FR2 && J->baseslot == 2) |
752 | J->base[func+1] = TREF_FRAME; | ||
753 | memmove(&J->base[-1-LJ_FR2], &J->base[func], sizeof(TRef)*(J->maxslot+1+LJ_FR2)); | ||
654 | /* Note: the new TREF_FRAME is now at J->base[-1] (even for slot #0). */ | 754 | /* Note: the new TREF_FRAME is now at J->base[-1] (even for slot #0). */ |
655 | /* Tailcalls can form a loop, so count towards the loop unroll limit. */ | 755 | /* Tailcalls can form a loop, so count towards the loop unroll limit. */ |
656 | if (++J->tailcalled > J->loopunroll) | 756 | if (++J->tailcalled > J->loopunroll) |
@@ -680,6 +780,8 @@ static int check_downrec_unroll(jit_State *J, GCproto *pt) | |||
680 | return 0; | 780 | return 0; |
681 | } | 781 | } |
682 | 782 | ||
783 | static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot); | ||
784 | |||
683 | /* Record return. */ | 785 | /* Record return. */ |
684 | void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) | 786 | void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) |
685 | { | 787 | { |
@@ -691,7 +793,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) | |||
691 | BCReg cbase = (BCReg)frame_delta(frame); | 793 | BCReg cbase = (BCReg)frame_delta(frame); |
692 | if (--J->framedepth <= 0) | 794 | if (--J->framedepth <= 0) |
693 | lj_trace_err(J, LJ_TRERR_NYIRETL); | 795 | lj_trace_err(J, LJ_TRERR_NYIRETL); |
694 | lua_assert(J->baseslot > 1); | 796 | lua_assert(J->baseslot > 1+LJ_FR2); |
695 | gotresults++; | 797 | gotresults++; |
696 | rbase += cbase; | 798 | rbase += cbase; |
697 | J->baseslot -= (BCReg)cbase; | 799 | J->baseslot -= (BCReg)cbase; |
@@ -702,19 +804,20 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) | |||
702 | /* Return to lower frame via interpreter for unhandled cases. */ | 804 | /* Return to lower frame via interpreter for unhandled cases. */ |
703 | if (J->framedepth == 0 && J->pt && bc_isret(bc_op(*J->pc)) && | 805 | if (J->framedepth == 0 && J->pt && bc_isret(bc_op(*J->pc)) && |
704 | (!frame_islua(frame) || | 806 | (!frame_islua(frame) || |
705 | (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))))) { | 807 | (J->parent == 0 && J->exitno == 0 && |
808 | !bc_isret(bc_op(J->cur.startins))))) { | ||
706 | /* NYI: specialize to frame type and return directly, not via RET*. */ | 809 | /* NYI: specialize to frame type and return directly, not via RET*. */ |
707 | for (i = 0; i < (ptrdiff_t)rbase; i++) | 810 | for (i = 0; i < (ptrdiff_t)rbase; i++) |
708 | J->base[i] = 0; /* Purge dead slots. */ | 811 | J->base[i] = 0; /* Purge dead slots. */ |
709 | J->maxslot = rbase + (BCReg)gotresults; | 812 | J->maxslot = rbase + (BCReg)gotresults; |
710 | rec_stop(J, LJ_TRLINK_RETURN, 0); /* Return to interpreter. */ | 813 | lj_record_stop(J, LJ_TRLINK_RETURN, 0); /* Return to interpreter. */ |
711 | return; | 814 | return; |
712 | } | 815 | } |
713 | if (frame_isvarg(frame)) { | 816 | if (frame_isvarg(frame)) { |
714 | BCReg cbase = (BCReg)frame_delta(frame); | 817 | BCReg cbase = (BCReg)frame_delta(frame); |
715 | if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */ | 818 | if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */ |
716 | lj_trace_err(J, LJ_TRERR_NYIRETL); | 819 | lj_trace_err(J, LJ_TRERR_NYIRETL); |
717 | lua_assert(J->baseslot > 1); | 820 | lua_assert(J->baseslot > 1+LJ_FR2); |
718 | rbase += cbase; | 821 | rbase += cbase; |
719 | J->baseslot -= (BCReg)cbase; | 822 | J->baseslot -= (BCReg)cbase; |
720 | J->base -= cbase; | 823 | J->base -= cbase; |
@@ -724,27 +827,28 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) | |||
724 | BCIns callins = *(frame_pc(frame)-1); | 827 | BCIns callins = *(frame_pc(frame)-1); |
725 | ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults; | 828 | ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults; |
726 | BCReg cbase = bc_a(callins); | 829 | BCReg cbase = bc_a(callins); |
727 | GCproto *pt = funcproto(frame_func(frame - (cbase+1))); | 830 | GCproto *pt = funcproto(frame_func(frame - (cbase+1+LJ_FR2))); |
728 | if ((pt->flags & PROTO_NOJIT)) | 831 | if ((pt->flags & PROTO_NOJIT)) |
729 | lj_trace_err(J, LJ_TRERR_CJITOFF); | 832 | lj_trace_err(J, LJ_TRERR_CJITOFF); |
730 | if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) { | 833 | if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) { |
731 | if (check_downrec_unroll(J, pt)) { | 834 | if (check_downrec_unroll(J, pt)) { |
732 | J->maxslot = (BCReg)(rbase + gotresults); | 835 | J->maxslot = (BCReg)(rbase + gotresults); |
733 | lj_snap_purge(J); | 836 | lj_snap_purge(J); |
734 | rec_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno); /* Down-recursion. */ | 837 | lj_record_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno); /* Down-rec. */ |
735 | return; | 838 | return; |
736 | } | 839 | } |
737 | lj_snap_add(J); | 840 | lj_snap_add(J); |
738 | } | 841 | } |
739 | for (i = 0; i < nresults; i++) /* Adjust results. */ | 842 | for (i = 0; i < nresults; i++) /* Adjust results. */ |
740 | J->base[i-1] = i < gotresults ? J->base[rbase+i] : TREF_NIL; | 843 | J->base[i-1-LJ_FR2] = i < gotresults ? J->base[rbase+i] : TREF_NIL; |
741 | J->maxslot = cbase+(BCReg)nresults; | 844 | J->maxslot = cbase+(BCReg)nresults; |
742 | if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */ | 845 | if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */ |
743 | J->framedepth--; | 846 | J->framedepth--; |
744 | lua_assert(J->baseslot > cbase+1); | 847 | lua_assert(J->baseslot > cbase+1+LJ_FR2); |
745 | J->baseslot -= cbase+1; | 848 | J->baseslot -= cbase+1+LJ_FR2; |
746 | J->base -= cbase+1; | 849 | J->base -= cbase+1+LJ_FR2; |
747 | } else if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) { | 850 | } else if (J->parent == 0 && J->exitno == 0 && |
851 | !bc_isret(bc_op(J->cur.startins))) { | ||
748 | /* Return to lower frame would leave the loop in a root trace. */ | 852 | /* Return to lower frame would leave the loop in a root trace. */ |
749 | lj_trace_err(J, LJ_TRERR_LLEAVE); | 853 | lj_trace_err(J, LJ_TRERR_LLEAVE); |
750 | } else if (J->needsnap) { /* Tailcalled to ff with side-effects. */ | 854 | } else if (J->needsnap) { /* Tailcalled to ff with side-effects. */ |
@@ -752,13 +856,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) | |||
752 | } else { /* Return to lower frame. Guard for the target we return to. */ | 856 | } else { /* Return to lower frame. Guard for the target we return to. */ |
753 | TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); | 857 | TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); |
754 | TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame)); | 858 | TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame)); |
755 | emitir(IRTG(IR_RETF, IRT_P32), trpt, trpc); | 859 | emitir(IRTG(IR_RETF, IRT_PGC), trpt, trpc); |
756 | J->retdepth++; | 860 | J->retdepth++; |
757 | J->needsnap = 1; | 861 | J->needsnap = 1; |
758 | lua_assert(J->baseslot == 1); | 862 | lua_assert(J->baseslot == 1+LJ_FR2); |
759 | /* Shift result slots up and clear the slots of the new frame below. */ | 863 | /* Shift result slots up and clear the slots of the new frame below. */ |
760 | memmove(J->base + cbase, J->base-1, sizeof(TRef)*nresults); | 864 | memmove(J->base + cbase, J->base-1-LJ_FR2, sizeof(TRef)*nresults); |
761 | memset(J->base-1, 0, sizeof(TRef)*(cbase+1)); | 865 | memset(J->base-1-LJ_FR2, 0, sizeof(TRef)*(cbase+1+LJ_FR2)); |
762 | } | 866 | } |
763 | } else if (frame_iscont(frame)) { /* Return to continuation frame. */ | 867 | } else if (frame_iscont(frame)) { /* Return to continuation frame. */ |
764 | ASMFunction cont = frame_contf(frame); | 868 | ASMFunction cont = frame_contf(frame); |
@@ -767,16 +871,40 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) | |||
767 | lj_trace_err(J, LJ_TRERR_NYIRETL); | 871 | lj_trace_err(J, LJ_TRERR_NYIRETL); |
768 | J->baseslot -= (BCReg)cbase; | 872 | J->baseslot -= (BCReg)cbase; |
769 | J->base -= cbase; | 873 | J->base -= cbase; |
770 | J->maxslot = cbase-2; | 874 | J->maxslot = cbase-(2<<LJ_FR2); |
771 | if (cont == lj_cont_ra) { | 875 | if (cont == lj_cont_ra) { |
772 | /* Copy result to destination slot. */ | 876 | /* Copy result to destination slot. */ |
773 | BCReg dst = bc_a(*(frame_contpc(frame)-1)); | 877 | BCReg dst = bc_a(*(frame_contpc(frame)-1)); |
774 | J->base[dst] = gotresults ? J->base[cbase+rbase] : TREF_NIL; | 878 | J->base[dst] = gotresults ? J->base[cbase+rbase] : TREF_NIL; |
775 | if (dst >= J->maxslot) J->maxslot = dst+1; | 879 | if (dst >= J->maxslot) { |
880 | J->maxslot = dst+1; | ||
881 | } | ||
776 | } else if (cont == lj_cont_nop) { | 882 | } else if (cont == lj_cont_nop) { |
777 | /* Nothing to do here. */ | 883 | /* Nothing to do here. */ |
778 | } else if (cont == lj_cont_cat) { | 884 | } else if (cont == lj_cont_cat) { |
779 | lua_assert(0); | 885 | BCReg bslot = bc_b(*(frame_contpc(frame)-1)); |
886 | TRef tr = gotresults ? J->base[cbase+rbase] : TREF_NIL; | ||
887 | if (bslot != J->maxslot) { /* Concatenate the remainder. */ | ||
888 | TValue *b = J->L->base, save; /* Simulate lower frame and result. */ | ||
889 | J->base[J->maxslot] = tr; | ||
890 | copyTV(J->L, &save, b-(2<<LJ_FR2)); | ||
891 | if (gotresults) | ||
892 | copyTV(J->L, b-(2<<LJ_FR2), b+rbase); | ||
893 | else | ||
894 | setnilV(b-(2<<LJ_FR2)); | ||
895 | J->L->base = b - cbase; | ||
896 | tr = rec_cat(J, bslot, cbase-(2<<LJ_FR2)); | ||
897 | b = J->L->base + cbase; /* Undo. */ | ||
898 | J->L->base = b; | ||
899 | copyTV(J->L, b-(2<<LJ_FR2), &save); | ||
900 | } | ||
901 | if (tr) { /* Store final result. */ | ||
902 | BCReg dst = bc_a(*(frame_contpc(frame)-1)); | ||
903 | J->base[dst] = tr; | ||
904 | if (dst >= J->maxslot) { | ||
905 | J->maxslot = dst+1; | ||
906 | } | ||
907 | } /* Otherwise continue with another __concat call. */ | ||
780 | } else { | 908 | } else { |
781 | /* Result type already specialized. */ | 909 | /* Result type already specialized. */ |
782 | lua_assert(cont == lj_cont_condf || cont == lj_cont_condt); | 910 | lua_assert(cont == lj_cont_condf || cont == lj_cont_condt); |
@@ -784,7 +912,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) | |||
784 | } else { | 912 | } else { |
785 | lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */ | 913 | lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */ |
786 | } | 914 | } |
787 | lua_assert(J->baseslot >= 1); | 915 | lua_assert(J->baseslot >= 1+LJ_FR2); |
788 | } | 916 | } |
789 | 917 | ||
790 | /* -- Metamethod handling ------------------------------------------------- */ | 918 | /* -- Metamethod handling ------------------------------------------------- */ |
@@ -792,19 +920,17 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) | |||
792 | /* Prepare to record call to metamethod. */ | 920 | /* Prepare to record call to metamethod. */ |
793 | static BCReg rec_mm_prep(jit_State *J, ASMFunction cont) | 921 | static BCReg rec_mm_prep(jit_State *J, ASMFunction cont) |
794 | { | 922 | { |
795 | BCReg s, top = curr_proto(J->L)->framesize; | 923 | BCReg s, top = cont == lj_cont_cat ? J->maxslot : curr_proto(J->L)->framesize; |
796 | TRef trcont; | 924 | #if LJ_FR2 |
797 | setcont(&J->L->base[top], cont); | 925 | J->base[top] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont))); |
798 | #if LJ_64 | 926 | J->base[top+1] = TREF_CONT; |
799 | trcont = lj_ir_kptr(J, (void *)((int64_t)cont - (int64_t)lj_vm_asm_begin)); | ||
800 | #else | 927 | #else |
801 | trcont = lj_ir_kptr(J, (void *)cont); | 928 | J->base[top] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT; |
802 | #endif | 929 | #endif |
803 | J->base[top] = trcont | TREF_CONT; | ||
804 | J->framedepth++; | 930 | J->framedepth++; |
805 | for (s = J->maxslot; s < top; s++) | 931 | for (s = J->maxslot; s < top; s++) |
806 | J->base[s] = 0; /* Clear frame gap to avoid resurrecting previous refs. */ | 932 | J->base[s] = 0; /* Clear frame gap to avoid resurrecting previous refs. */ |
807 | return top+1; | 933 | return top+1+LJ_FR2; |
808 | } | 934 | } |
809 | 935 | ||
810 | /* Record metamethod lookup. */ | 936 | /* Record metamethod lookup. */ |
@@ -823,7 +949,7 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) | |||
823 | cTValue *mo; | 949 | cTValue *mo; |
824 | if (LJ_HASFFI && udtype == UDTYPE_FFI_CLIB) { | 950 | if (LJ_HASFFI && udtype == UDTYPE_FFI_CLIB) { |
825 | /* Specialize to the C library namespace object. */ | 951 | /* Specialize to the C library namespace object. */ |
826 | emitir(IRTG(IR_EQ, IRT_P32), ix->tab, lj_ir_kptr(J, udataV(&ix->tabv))); | 952 | emitir(IRTG(IR_EQ, IRT_PGC), ix->tab, lj_ir_kptr(J, udataV(&ix->tabv))); |
827 | } else { | 953 | } else { |
828 | /* Specialize to the type of userdata. */ | 954 | /* Specialize to the type of userdata. */ |
829 | TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), ix->tab, IRFL_UDATA_UDTYPE); | 955 | TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), ix->tab, IRFL_UDATA_UDTYPE); |
@@ -852,7 +978,13 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) | |||
852 | } | 978 | } |
853 | /* The cdata metatable is treated as immutable. */ | 979 | /* The cdata metatable is treated as immutable. */ |
854 | if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt; | 980 | if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt; |
981 | #if LJ_GC64 | ||
982 | /* TODO: fix ARM32 asm_fload(), so we can use this for all archs. */ | ||
983 | ix->mt = mix.tab = lj_ir_ggfload(J, IRT_TAB, | ||
984 | GG_OFS(g.gcroot[GCROOT_BASEMT+itypemap(&ix->tabv)])); | ||
985 | #else | ||
855 | ix->mt = mix.tab = lj_ir_ktab(J, mt); | 986 | ix->mt = mix.tab = lj_ir_ktab(J, mt); |
987 | #endif | ||
856 | goto nocheck; | 988 | goto nocheck; |
857 | } | 989 | } |
858 | ix->mt = mt ? mix.tab : TREF_NIL; | 990 | ix->mt = mt ? mix.tab : TREF_NIL; |
@@ -879,12 +1011,12 @@ nocheck: | |||
879 | static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) | 1011 | static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) |
880 | { | 1012 | { |
881 | /* Set up metamethod call first to save ix->tab and ix->tabv. */ | 1013 | /* Set up metamethod call first to save ix->tab and ix->tabv. */ |
882 | BCReg func = rec_mm_prep(J, lj_cont_ra); | 1014 | BCReg func = rec_mm_prep(J, mm == MM_concat ? lj_cont_cat : lj_cont_ra); |
883 | TRef *base = J->base + func; | 1015 | TRef *base = J->base + func; |
884 | TValue *basev = J->L->base + func; | 1016 | TValue *basev = J->L->base + func; |
885 | base[1] = ix->tab; base[2] = ix->key; | 1017 | base[1+LJ_FR2] = ix->tab; base[2+LJ_FR2] = ix->key; |
886 | copyTV(J->L, basev+1, &ix->tabv); | 1018 | copyTV(J->L, basev+1+LJ_FR2, &ix->tabv); |
887 | copyTV(J->L, basev+2, &ix->keyv); | 1019 | copyTV(J->L, basev+2+LJ_FR2, &ix->keyv); |
888 | if (!lj_record_mm_lookup(J, ix, mm)) { /* Lookup mm on 1st operand. */ | 1020 | if (!lj_record_mm_lookup(J, ix, mm)) { /* Lookup mm on 1st operand. */ |
889 | if (mm != MM_unm) { | 1021 | if (mm != MM_unm) { |
890 | ix->tab = ix->key; | 1022 | ix->tab = ix->key; |
@@ -896,6 +1028,9 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) | |||
896 | } | 1028 | } |
897 | ok: | 1029 | ok: |
898 | base[0] = ix->mobj; | 1030 | base[0] = ix->mobj; |
1031 | #if LJ_FR2 | ||
1032 | base[1] = 0; | ||
1033 | #endif | ||
899 | copyTV(J->L, basev+0, &ix->mobjv); | 1034 | copyTV(J->L, basev+0, &ix->mobjv); |
900 | lj_record_call(J, func, 2); | 1035 | lj_record_call(J, func, 2); |
901 | return 0; /* No result yet. */ | 1036 | return 0; /* No result yet. */ |
@@ -912,6 +1047,8 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv) | |||
912 | TRef *base = J->base + func; | 1047 | TRef *base = J->base + func; |
913 | TValue *basev = J->L->base + func; | 1048 | TValue *basev = J->L->base + func; |
914 | base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv); | 1049 | base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv); |
1050 | base += LJ_FR2; | ||
1051 | basev += LJ_FR2; | ||
915 | base[1] = tr; copyTV(J->L, basev+1, tv); | 1052 | base[1] = tr; copyTV(J->L, basev+1, tv); |
916 | #if LJ_52 | 1053 | #if LJ_52 |
917 | base[2] = tr; copyTV(J->L, basev+2, tv); | 1054 | base[2] = tr; copyTV(J->L, basev+2, tv); |
@@ -931,10 +1068,10 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv) | |||
931 | static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op) | 1068 | static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op) |
932 | { | 1069 | { |
933 | BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt); | 1070 | BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt); |
934 | TRef *base = J->base + func; | 1071 | TRef *base = J->base + func + LJ_FR2; |
935 | TValue *tv = J->L->base + func; | 1072 | TValue *tv = J->L->base + func + LJ_FR2; |
936 | base[0] = ix->mobj; base[1] = ix->val; base[2] = ix->key; | 1073 | base[-LJ_FR2] = ix->mobj; base[1] = ix->val; base[2] = ix->key; |
937 | copyTV(J->L, tv+0, &ix->mobjv); | 1074 | copyTV(J->L, tv-LJ_FR2, &ix->mobjv); |
938 | copyTV(J->L, tv+1, &ix->valv); | 1075 | copyTV(J->L, tv+1, &ix->valv); |
939 | copyTV(J->L, tv+2, &ix->keyv); | 1076 | copyTV(J->L, tv+2, &ix->keyv); |
940 | lj_record_call(J, func, 2); | 1077 | lj_record_call(J, func, 2); |
@@ -1041,6 +1178,72 @@ static void rec_mm_comp_cdata(jit_State *J, RecordIndex *ix, int op, MMS mm) | |||
1041 | 1178 | ||
1042 | /* -- Indexed access ------------------------------------------------------ */ | 1179 | /* -- Indexed access ------------------------------------------------------ */ |
1043 | 1180 | ||
1181 | #ifdef LUAJIT_ENABLE_TABLE_BUMP | ||
1182 | /* Bump table allocations in bytecode when they grow during recording. */ | ||
1183 | static void rec_idx_bump(jit_State *J, RecordIndex *ix) | ||
1184 | { | ||
1185 | RBCHashEntry *rbc = &J->rbchash[(ix->tab & (RBCHASH_SLOTS-1))]; | ||
1186 | if (tref_ref(ix->tab) == rbc->ref) { | ||
1187 | const BCIns *pc = mref(rbc->pc, const BCIns); | ||
1188 | GCtab *tb = tabV(&ix->tabv); | ||
1189 | uint32_t nhbits; | ||
1190 | IRIns *ir; | ||
1191 | if (!tvisnil(&ix->keyv)) | ||
1192 | (void)lj_tab_set(J->L, tb, &ix->keyv); /* Grow table right now. */ | ||
1193 | nhbits = tb->hmask > 0 ? lj_fls(tb->hmask)+1 : 0; | ||
1194 | ir = IR(tref_ref(ix->tab)); | ||
1195 | if (ir->o == IR_TNEW) { | ||
1196 | uint32_t ah = bc_d(*pc); | ||
1197 | uint32_t asize = ah & 0x7ff, hbits = ah >> 11; | ||
1198 | if (nhbits > hbits) hbits = nhbits; | ||
1199 | if (tb->asize > asize) { | ||
1200 | asize = tb->asize <= 0x7ff ? tb->asize : 0x7ff; | ||
1201 | } | ||
1202 | if ((asize | (hbits<<11)) != ah) { /* Has the size changed? */ | ||
1203 | /* Patch bytecode, but continue recording (for more patching). */ | ||
1204 | setbc_d(pc, (asize | (hbits<<11))); | ||
1205 | /* Patching TNEW operands is only safe if the trace is aborted. */ | ||
1206 | ir->op1 = asize; ir->op2 = hbits; | ||
1207 | J->retryrec = 1; /* Abort the trace at the end of recording. */ | ||
1208 | } | ||
1209 | } else if (ir->o == IR_TDUP) { | ||
1210 | GCtab *tpl = gco2tab(proto_kgc(&gcref(rbc->pt)->pt, ~(ptrdiff_t)bc_d(*pc))); | ||
1211 | /* Grow template table, but preserve keys with nil values. */ | ||
1212 | if ((tb->asize > tpl->asize && (1u << nhbits)-1 == tpl->hmask) || | ||
1213 | (tb->asize == tpl->asize && (1u << nhbits)-1 > tpl->hmask)) { | ||
1214 | Node *node = noderef(tpl->node); | ||
1215 | uint32_t i, hmask = tpl->hmask, asize; | ||
1216 | TValue *array; | ||
1217 | for (i = 0; i <= hmask; i++) { | ||
1218 | if (!tvisnil(&node[i].key) && tvisnil(&node[i].val)) | ||
1219 | settabV(J->L, &node[i].val, tpl); | ||
1220 | } | ||
1221 | if (!tvisnil(&ix->keyv) && tref_isk(ix->key)) { | ||
1222 | TValue *o = lj_tab_set(J->L, tpl, &ix->keyv); | ||
1223 | if (tvisnil(o)) settabV(J->L, o, tpl); | ||
1224 | } | ||
1225 | lj_tab_resize(J->L, tpl, tb->asize, nhbits); | ||
1226 | node = noderef(tpl->node); | ||
1227 | hmask = tpl->hmask; | ||
1228 | for (i = 0; i <= hmask; i++) { | ||
1229 | /* This is safe, since template tables only hold immutable values. */ | ||
1230 | if (tvistab(&node[i].val)) | ||
1231 | setnilV(&node[i].val); | ||
1232 | } | ||
1233 | /* The shape of the table may have changed. Clean up array part, too. */ | ||
1234 | asize = tpl->asize; | ||
1235 | array = tvref(tpl->array); | ||
1236 | for (i = 0; i < asize; i++) { | ||
1237 | if (tvistab(&array[i])) | ||
1238 | setnilV(&array[i]); | ||
1239 | } | ||
1240 | J->retryrec = 1; /* Abort the trace at the end of recording. */ | ||
1241 | } | ||
1242 | } | ||
1243 | } | ||
1244 | } | ||
1245 | #endif | ||
1246 | |||
1044 | /* Record bounds-check. */ | 1247 | /* Record bounds-check. */ |
1045 | static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize) | 1248 | static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize) |
1046 | { | 1249 | { |
@@ -1080,11 +1283,14 @@ static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize) | |||
1080 | } | 1283 | } |
1081 | 1284 | ||
1082 | /* Record indexed key lookup. */ | 1285 | /* Record indexed key lookup. */ |
1083 | static TRef rec_idx_key(jit_State *J, RecordIndex *ix) | 1286 | static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref, |
1287 | IRType1 *rbguard) | ||
1084 | { | 1288 | { |
1085 | TRef key; | 1289 | TRef key; |
1086 | GCtab *t = tabV(&ix->tabv); | 1290 | GCtab *t = tabV(&ix->tabv); |
1087 | ix->oldv = lj_tab_get(J->L, t, &ix->keyv); /* Lookup previous value. */ | 1291 | ix->oldv = lj_tab_get(J->L, t, &ix->keyv); /* Lookup previous value. */ |
1292 | *rbref = 0; | ||
1293 | rbguard->irt = 0; | ||
1088 | 1294 | ||
1089 | /* Integer keys are looked up in the array part first. */ | 1295 | /* Integer keys are looked up in the array part first. */ |
1090 | key = ix->key; | 1296 | key = ix->key; |
@@ -1098,8 +1304,8 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix) | |||
1098 | if ((MSize)k < t->asize) { /* Currently an array key? */ | 1304 | if ((MSize)k < t->asize) { /* Currently an array key? */ |
1099 | TRef arrayref; | 1305 | TRef arrayref; |
1100 | rec_idx_abc(J, asizeref, ikey, t->asize); | 1306 | rec_idx_abc(J, asizeref, ikey, t->asize); |
1101 | arrayref = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_ARRAY); | 1307 | arrayref = emitir(IRT(IR_FLOAD, IRT_PGC), ix->tab, IRFL_TAB_ARRAY); |
1102 | return emitir(IRT(IR_AREF, IRT_P32), arrayref, ikey); | 1308 | return emitir(IRT(IR_AREF, IRT_PGC), arrayref, ikey); |
1103 | } else { /* Currently not in array (may be an array extension)? */ | 1309 | } else { /* Currently not in array (may be an array extension)? */ |
1104 | emitir(IRTGI(IR_ULE), asizeref, ikey); /* Inv. bounds check. */ | 1310 | emitir(IRTGI(IR_ULE), asizeref, ikey); /* Inv. bounds check. */ |
1105 | if (k == 0 && tref_isk(key)) | 1311 | if (k == 0 && tref_isk(key)) |
@@ -1134,16 +1340,18 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix) | |||
1134 | MSize hslot = (MSize)((char *)ix->oldv - (char *)&noderef(t->node)[0].val); | 1340 | MSize hslot = (MSize)((char *)ix->oldv - (char *)&noderef(t->node)[0].val); |
1135 | if (t->hmask > 0 && hslot <= t->hmask*(MSize)sizeof(Node) && | 1341 | if (t->hmask > 0 && hslot <= t->hmask*(MSize)sizeof(Node) && |
1136 | hslot <= 65535*(MSize)sizeof(Node)) { | 1342 | hslot <= 65535*(MSize)sizeof(Node)) { |
1137 | TRef node, kslot; | 1343 | TRef node, kslot, hm; |
1138 | TRef hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK); | 1344 | *rbref = J->cur.nins; /* Mark possible rollback point. */ |
1345 | *rbguard = J->guardemit; | ||
1346 | hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK); | ||
1139 | emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask)); | 1347 | emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask)); |
1140 | node = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_NODE); | 1348 | node = emitir(IRT(IR_FLOAD, IRT_PGC), ix->tab, IRFL_TAB_NODE); |
1141 | kslot = lj_ir_kslot(J, key, hslot / sizeof(Node)); | 1349 | kslot = lj_ir_kslot(J, key, hslot / sizeof(Node)); |
1142 | return emitir(IRTG(IR_HREFK, IRT_P32), node, kslot); | 1350 | return emitir(IRTG(IR_HREFK, IRT_PGC), node, kslot); |
1143 | } | 1351 | } |
1144 | } | 1352 | } |
1145 | /* Fall back to a regular hash lookup. */ | 1353 | /* Fall back to a regular hash lookup. */ |
1146 | return emitir(IRT(IR_HREF, IRT_P32), ix->tab, key); | 1354 | return emitir(IRT(IR_HREF, IRT_PGC), ix->tab, key); |
1147 | } | 1355 | } |
1148 | 1356 | ||
1149 | /* Determine whether a key is NOT one of the fast metamethod names. */ | 1357 | /* Determine whether a key is NOT one of the fast metamethod names. */ |
@@ -1168,6 +1376,8 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) | |||
1168 | { | 1376 | { |
1169 | TRef xref; | 1377 | TRef xref; |
1170 | IROp xrefop, loadop; | 1378 | IROp xrefop, loadop; |
1379 | IRRef rbref; | ||
1380 | IRType1 rbguard; | ||
1171 | cTValue *oldv; | 1381 | cTValue *oldv; |
1172 | 1382 | ||
1173 | while (!tref_istab(ix->tab)) { /* Handle non-table lookup. */ | 1383 | while (!tref_istab(ix->tab)) { /* Handle non-table lookup. */ |
@@ -1178,10 +1388,10 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) | |||
1178 | handlemm: | 1388 | handlemm: |
1179 | if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */ | 1389 | if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */ |
1180 | BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra); | 1390 | BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra); |
1181 | TRef *base = J->base + func; | 1391 | TRef *base = J->base + func + LJ_FR2; |
1182 | TValue *tv = J->L->base + func; | 1392 | TValue *tv = J->L->base + func + LJ_FR2; |
1183 | base[0] = ix->mobj; base[1] = ix->tab; base[2] = ix->key; | 1393 | base[-LJ_FR2] = ix->mobj; base[1] = ix->tab; base[2] = ix->key; |
1184 | setfuncV(J->L, tv+0, funcV(&ix->mobjv)); | 1394 | setfuncV(J->L, tv-LJ_FR2, funcV(&ix->mobjv)); |
1185 | copyTV(J->L, tv+1, &ix->tabv); | 1395 | copyTV(J->L, tv+1, &ix->tabv); |
1186 | copyTV(J->L, tv+2, &ix->keyv); | 1396 | copyTV(J->L, tv+2, &ix->keyv); |
1187 | if (ix->val) { | 1397 | if (ix->val) { |
@@ -1213,7 +1423,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) | |||
1213 | } | 1423 | } |
1214 | 1424 | ||
1215 | /* Record the key lookup. */ | 1425 | /* Record the key lookup. */ |
1216 | xref = rec_idx_key(J, ix); | 1426 | xref = rec_idx_key(J, ix, &rbref, &rbguard); |
1217 | xrefop = IR(tref_ref(xref))->o; | 1427 | xrefop = IR(tref_ref(xref))->o; |
1218 | loadop = xrefop == IR_AREF ? IR_ALOAD : IR_HLOAD; | 1428 | loadop = xrefop == IR_AREF ? IR_ALOAD : IR_HLOAD; |
1219 | /* The lj_meta_tset() inconsistency is gone, but better play safe. */ | 1429 | /* The lj_meta_tset() inconsistency is gone, but better play safe. */ |
@@ -1223,11 +1433,15 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) | |||
1223 | IRType t = itype2irt(oldv); | 1433 | IRType t = itype2irt(oldv); |
1224 | TRef res; | 1434 | TRef res; |
1225 | if (oldv == niltvg(J2G(J))) { | 1435 | if (oldv == niltvg(J2G(J))) { |
1226 | emitir(IRTG(IR_EQ, IRT_P32), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); | 1436 | emitir(IRTG(IR_EQ, IRT_PGC), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); |
1227 | res = TREF_NIL; | 1437 | res = TREF_NIL; |
1228 | } else { | 1438 | } else { |
1229 | res = emitir(IRTG(loadop, t), xref, 0); | 1439 | res = emitir(IRTG(loadop, t), xref, 0); |
1230 | } | 1440 | } |
1441 | if (tref_ref(res) < rbref) { /* HREFK + load forwarded? */ | ||
1442 | lj_ir_rollback(J, rbref); /* Rollback to eliminate hmask guard. */ | ||
1443 | J->guardemit = rbguard; | ||
1444 | } | ||
1231 | if (t == IRT_NIL && ix->idxchain && lj_record_mm_lookup(J, ix, MM_index)) | 1445 | if (t == IRT_NIL && ix->idxchain && lj_record_mm_lookup(J, ix, MM_index)) |
1232 | goto handlemm; | 1446 | goto handlemm; |
1233 | if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitives. */ | 1447 | if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitives. */ |
@@ -1235,6 +1449,10 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) | |||
1235 | } else { /* Indexed store. */ | 1449 | } else { /* Indexed store. */ |
1236 | GCtab *mt = tabref(tabV(&ix->tabv)->metatable); | 1450 | GCtab *mt = tabref(tabV(&ix->tabv)->metatable); |
1237 | int keybarrier = tref_isgcv(ix->key) && !tref_isnil(ix->val); | 1451 | int keybarrier = tref_isgcv(ix->key) && !tref_isnil(ix->val); |
1452 | if (tref_ref(xref) < rbref) { /* HREFK forwarded? */ | ||
1453 | lj_ir_rollback(J, rbref); /* Rollback to eliminate hmask guard. */ | ||
1454 | J->guardemit = rbguard; | ||
1455 | } | ||
1238 | if (tvisnil(oldv)) { /* Previous value was nil? */ | 1456 | if (tvisnil(oldv)) { /* Previous value was nil? */ |
1239 | /* Need to duplicate the hasmm check for the early guards. */ | 1457 | /* Need to duplicate the hasmm check for the early guards. */ |
1240 | int hasmm = 0; | 1458 | int hasmm = 0; |
@@ -1245,7 +1463,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) | |||
1245 | if (hasmm) | 1463 | if (hasmm) |
1246 | emitir(IRTG(loadop, IRT_NIL), xref, 0); /* Guard for nil value. */ | 1464 | emitir(IRTG(loadop, IRT_NIL), xref, 0); /* Guard for nil value. */ |
1247 | else if (xrefop == IR_HREF) | 1465 | else if (xrefop == IR_HREF) |
1248 | emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_P32), | 1466 | emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_PGC), |
1249 | xref, lj_ir_kkptr(J, niltvg(J2G(J)))); | 1467 | xref, lj_ir_kkptr(J, niltvg(J2G(J)))); |
1250 | if (ix->idxchain && lj_record_mm_lookup(J, ix, MM_newindex)) { | 1468 | if (ix->idxchain && lj_record_mm_lookup(J, ix, MM_newindex)) { |
1251 | lua_assert(hasmm); | 1469 | lua_assert(hasmm); |
@@ -1256,13 +1474,17 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) | |||
1256 | TRef key = ix->key; | 1474 | TRef key = ix->key; |
1257 | if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */ | 1475 | if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */ |
1258 | key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT); | 1476 | key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT); |
1259 | xref = emitir(IRT(IR_NEWREF, IRT_P32), ix->tab, key); | 1477 | xref = emitir(IRT(IR_NEWREF, IRT_PGC), ix->tab, key); |
1260 | keybarrier = 0; /* NEWREF already takes care of the key barrier. */ | 1478 | keybarrier = 0; /* NEWREF already takes care of the key barrier. */ |
1479 | #ifdef LUAJIT_ENABLE_TABLE_BUMP | ||
1480 | if ((J->flags & JIT_F_OPT_SINK)) /* Avoid a separate flag. */ | ||
1481 | rec_idx_bump(J, ix); | ||
1482 | #endif | ||
1261 | } | 1483 | } |
1262 | } else if (!lj_opt_fwd_wasnonnil(J, loadop, tref_ref(xref))) { | 1484 | } else if (!lj_opt_fwd_wasnonnil(J, loadop, tref_ref(xref))) { |
1263 | /* Cannot derive that the previous value was non-nil, must do checks. */ | 1485 | /* Cannot derive that the previous value was non-nil, must do checks. */ |
1264 | if (xrefop == IR_HREF) /* Guard against store to niltv. */ | 1486 | if (xrefop == IR_HREF) /* Guard against store to niltv. */ |
1265 | emitir(IRTG(IR_NE, IRT_P32), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); | 1487 | emitir(IRTG(IR_NE, IRT_PGC), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); |
1266 | if (ix->idxchain) { /* Metamethod lookup required? */ | 1488 | if (ix->idxchain) { /* Metamethod lookup required? */ |
1267 | /* A check for NULL metatable is cheaper (hoistable) than a load. */ | 1489 | /* A check for NULL metatable is cheaper (hoistable) than a load. */ |
1268 | if (!mt) { | 1490 | if (!mt) { |
@@ -1284,7 +1506,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) | |||
1284 | emitir(IRT(IR_TBAR, IRT_NIL), ix->tab, 0); | 1506 | emitir(IRT(IR_TBAR, IRT_NIL), ix->tab, 0); |
1285 | /* Invalidate neg. metamethod cache for stores with certain string keys. */ | 1507 | /* Invalidate neg. metamethod cache for stores with certain string keys. */ |
1286 | if (!nommstr(J, ix->key)) { | 1508 | if (!nommstr(J, ix->key)) { |
1287 | TRef fref = emitir(IRT(IR_FREF, IRT_P32), ix->tab, IRFL_TAB_NOMM); | 1509 | TRef fref = emitir(IRT(IR_FREF, IRT_PGC), ix->tab, IRFL_TAB_NOMM); |
1288 | emitir(IRT(IR_FSTORE, IRT_U8), fref, lj_ir_kint(J, 0)); | 1510 | emitir(IRT(IR_FSTORE, IRT_U8), fref, lj_ir_kint(J, 0)); |
1289 | } | 1511 | } |
1290 | J->needsnap = 1; | 1512 | J->needsnap = 1; |
@@ -1292,6 +1514,31 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) | |||
1292 | } | 1514 | } |
1293 | } | 1515 | } |
1294 | 1516 | ||
1517 | static void rec_tsetm(jit_State *J, BCReg ra, BCReg rn, int32_t i) | ||
1518 | { | ||
1519 | RecordIndex ix; | ||
1520 | cTValue *basev = J->L->base; | ||
1521 | GCtab *t = tabV(&basev[ra-1]); | ||
1522 | settabV(J->L, &ix.tabv, t); | ||
1523 | ix.tab = getslot(J, ra-1); | ||
1524 | ix.idxchain = 0; | ||
1525 | #ifdef LUAJIT_ENABLE_TABLE_BUMP | ||
1526 | if ((J->flags & JIT_F_OPT_SINK)) { | ||
1527 | if (t->asize < i+rn-ra) | ||
1528 | lj_tab_reasize(J->L, t, i+rn-ra); | ||
1529 | setnilV(&ix.keyv); | ||
1530 | rec_idx_bump(J, &ix); | ||
1531 | } | ||
1532 | #endif | ||
1533 | for (; ra < rn; i++, ra++) { | ||
1534 | setintV(&ix.keyv, i); | ||
1535 | ix.key = lj_ir_kint(J, i); | ||
1536 | copyTV(J->L, &ix.valv, &basev[ra]); | ||
1537 | ix.val = getslot(J, ra); | ||
1538 | lj_record_idx(J, &ix); | ||
1539 | } | ||
1540 | } | ||
1541 | |||
1295 | /* -- Upvalue access ------------------------------------------------------ */ | 1542 | /* -- Upvalue access ------------------------------------------------------ */ |
1296 | 1543 | ||
1297 | /* Check whether upvalue is immutable and ok to constify. */ | 1544 | /* Check whether upvalue is immutable and ok to constify. */ |
@@ -1334,7 +1581,11 @@ static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val) | |||
1334 | goto noconstify; | 1581 | goto noconstify; |
1335 | kfunc = lj_ir_kfunc(J, J->fn); | 1582 | kfunc = lj_ir_kfunc(J, J->fn); |
1336 | emitir(IRTG(IR_EQ, IRT_FUNC), fn, kfunc); | 1583 | emitir(IRTG(IR_EQ, IRT_FUNC), fn, kfunc); |
1337 | J->base[-1] = TREF_FRAME | kfunc; | 1584 | #if LJ_FR2 |
1585 | J->base[-2] = kfunc; | ||
1586 | #else | ||
1587 | J->base[-1] = kfunc | TREF_FRAME; | ||
1588 | #endif | ||
1338 | fn = kfunc; | 1589 | fn = kfunc; |
1339 | } | 1590 | } |
1340 | tr = lj_record_constify(J, uvval(uvp)); | 1591 | tr = lj_record_constify(J, uvval(uvp)); |
@@ -1345,16 +1596,16 @@ noconstify: | |||
1345 | /* Note: this effectively limits LJ_MAX_UPVAL to 127. */ | 1596 | /* Note: this effectively limits LJ_MAX_UPVAL to 127. */ |
1346 | uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff); | 1597 | uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff); |
1347 | if (!uvp->closed) { | 1598 | if (!uvp->closed) { |
1348 | uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_P32), fn, uv)); | 1599 | uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_PGC), fn, uv)); |
1349 | /* In current stack? */ | 1600 | /* In current stack? */ |
1350 | if (uvval(uvp) >= tvref(J->L->stack) && | 1601 | if (uvval(uvp) >= tvref(J->L->stack) && |
1351 | uvval(uvp) < tvref(J->L->maxstack)) { | 1602 | uvval(uvp) < tvref(J->L->maxstack)) { |
1352 | int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot)); | 1603 | int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot)); |
1353 | if (slot >= 0) { /* Aliases an SSA slot? */ | 1604 | if (slot >= 0) { /* Aliases an SSA slot? */ |
1354 | emitir(IRTG(IR_EQ, IRT_P32), | 1605 | emitir(IRTG(IR_EQ, IRT_PGC), |
1355 | REF_BASE, | 1606 | REF_BASE, |
1356 | emitir(IRT(IR_ADD, IRT_P32), uref, | 1607 | emitir(IRT(IR_ADD, IRT_PGC), uref, |
1357 | lj_ir_kint(J, (slot - 1) * -8))); | 1608 | lj_ir_kint(J, (slot - 1 - LJ_FR2) * -8))); |
1358 | slot -= (int32_t)J->baseslot; /* Note: slot number may be negative! */ | 1609 | slot -= (int32_t)J->baseslot; /* Note: slot number may be negative! */ |
1359 | if (val == 0) { | 1610 | if (val == 0) { |
1360 | return getslot(J, slot); | 1611 | return getslot(J, slot); |
@@ -1365,12 +1616,12 @@ noconstify: | |||
1365 | } | 1616 | } |
1366 | } | 1617 | } |
1367 | } | 1618 | } |
1368 | emitir(IRTG(IR_UGT, IRT_P32), | 1619 | emitir(IRTG(IR_UGT, IRT_PGC), |
1369 | emitir(IRT(IR_SUB, IRT_P32), uref, REF_BASE), | 1620 | emitir(IRT(IR_SUB, IRT_PGC), uref, REF_BASE), |
1370 | lj_ir_kint(J, (J->baseslot + J->maxslot) * 8)); | 1621 | lj_ir_kint(J, (J->baseslot + J->maxslot) * 8)); |
1371 | } else { | 1622 | } else { |
1372 | needbarrier = 1; | 1623 | needbarrier = 1; |
1373 | uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_P32), fn, uv)); | 1624 | uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_PGC), fn, uv)); |
1374 | } | 1625 | } |
1375 | if (val == 0) { /* Upvalue load */ | 1626 | if (val == 0) { /* Upvalue load */ |
1376 | IRType t = itype2irt(uvval(uvp)); | 1627 | IRType t = itype2irt(uvval(uvp)); |
@@ -1409,9 +1660,9 @@ static void check_call_unroll(jit_State *J, TraceNo lnk) | |||
1409 | if (count + J->tailcalled > J->param[JIT_P_recunroll]) { | 1660 | if (count + J->tailcalled > J->param[JIT_P_recunroll]) { |
1410 | J->pc++; | 1661 | J->pc++; |
1411 | if (J->framedepth + J->retdepth == 0) | 1662 | if (J->framedepth + J->retdepth == 0) |
1412 | rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Tail-recursion. */ | 1663 | lj_record_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Tail-rec. */ |
1413 | else | 1664 | else |
1414 | rec_stop(J, LJ_TRLINK_UPREC, J->cur.traceno); /* Up-recursion. */ | 1665 | lj_record_stop(J, LJ_TRLINK_UPREC, J->cur.traceno); /* Up-recursion. */ |
1415 | } | 1666 | } |
1416 | } else { | 1667 | } else { |
1417 | if (count > J->param[JIT_P_callunroll]) { | 1668 | if (count > J->param[JIT_P_callunroll]) { |
@@ -1445,11 +1696,14 @@ static void rec_func_setup(jit_State *J) | |||
1445 | static void rec_func_vararg(jit_State *J) | 1696 | static void rec_func_vararg(jit_State *J) |
1446 | { | 1697 | { |
1447 | GCproto *pt = J->pt; | 1698 | GCproto *pt = J->pt; |
1448 | BCReg s, fixargs, vframe = J->maxslot+1; | 1699 | BCReg s, fixargs, vframe = J->maxslot+1+LJ_FR2; |
1449 | lua_assert((pt->flags & PROTO_VARARG)); | 1700 | lua_assert((pt->flags & PROTO_VARARG)); |
1450 | if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS) | 1701 | if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS) |
1451 | lj_trace_err(J, LJ_TRERR_STACKOV); | 1702 | lj_trace_err(J, LJ_TRERR_STACKOV); |
1452 | J->base[vframe-1] = J->base[-1]; /* Copy function up. */ | 1703 | J->base[vframe-1-LJ_FR2] = J->base[-1-LJ_FR2]; /* Copy function up. */ |
1704 | #if LJ_FR2 | ||
1705 | J->base[vframe-1] = TREF_FRAME; | ||
1706 | #endif | ||
1453 | /* Copy fixarg slots up and set their original slots to nil. */ | 1707 | /* Copy fixarg slots up and set their original slots to nil. */ |
1454 | fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot; | 1708 | fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot; |
1455 | for (s = 0; s < fixargs; s++) { | 1709 | for (s = 0; s < fixargs; s++) { |
@@ -1485,9 +1739,9 @@ static void rec_func_jit(jit_State *J, TraceNo lnk) | |||
1485 | } | 1739 | } |
1486 | J->instunroll = 0; /* Cannot continue across a compiled function. */ | 1740 | J->instunroll = 0; /* Cannot continue across a compiled function. */ |
1487 | if (J->pc == J->startpc && J->framedepth + J->retdepth == 0) | 1741 | if (J->pc == J->startpc && J->framedepth + J->retdepth == 0) |
1488 | rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Extra tail-recursion. */ | 1742 | lj_record_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Extra tail-rec. */ |
1489 | else | 1743 | else |
1490 | rec_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the function. */ | 1744 | lj_record_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the function. */ |
1491 | } | 1745 | } |
1492 | 1746 | ||
1493 | /* -- Vararg handling ----------------------------------------------------- */ | 1747 | /* -- Vararg handling ----------------------------------------------------- */ |
@@ -1511,8 +1765,10 @@ static int select_detect(jit_State *J) | |||
1511 | static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) | 1765 | static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) |
1512 | { | 1766 | { |
1513 | int32_t numparams = J->pt->numparams; | 1767 | int32_t numparams = J->pt->numparams; |
1514 | ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1; | 1768 | ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1 - LJ_FR2; |
1515 | lua_assert(frame_isvarg(J->L->base-1)); | 1769 | lua_assert(frame_isvarg(J->L->base-1)); |
1770 | if (LJ_FR2 && dst > J->maxslot) | ||
1771 | J->base[dst-1] = 0; /* Prevent resurrection of unrelated slot. */ | ||
1516 | if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */ | 1772 | if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */ |
1517 | ptrdiff_t i; | 1773 | ptrdiff_t i; |
1518 | if (nvararg < 0) nvararg = 0; | 1774 | if (nvararg < 0) nvararg = 0; |
@@ -1523,10 +1779,10 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) | |||
1523 | J->maxslot = dst + (BCReg)nresults; | 1779 | J->maxslot = dst + (BCReg)nresults; |
1524 | } | 1780 | } |
1525 | for (i = 0; i < nresults; i++) | 1781 | for (i = 0; i < nresults; i++) |
1526 | J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1) : TREF_NIL; | 1782 | J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1 - LJ_FR2) : TREF_NIL; |
1527 | } else { /* Unknown number of varargs passed to trace. */ | 1783 | } else { /* Unknown number of varargs passed to trace. */ |
1528 | TRef fr = emitir(IRTI(IR_SLOAD), 0, IRSLOAD_READONLY|IRSLOAD_FRAME); | 1784 | TRef fr = emitir(IRTI(IR_SLOAD), LJ_FR2, IRSLOAD_READONLY|IRSLOAD_FRAME); |
1529 | int32_t frofs = 8*(1+numparams)+FRAME_VARG; | 1785 | int32_t frofs = 8*(1+LJ_FR2+numparams)+FRAME_VARG; |
1530 | if (nresults >= 0) { /* Known fixed number of results. */ | 1786 | if (nresults >= 0) { /* Known fixed number of results. */ |
1531 | ptrdiff_t i; | 1787 | ptrdiff_t i; |
1532 | if (nvararg > 0) { | 1788 | if (nvararg > 0) { |
@@ -1535,12 +1791,13 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) | |||
1535 | if (nvararg >= nresults) | 1791 | if (nvararg >= nresults) |
1536 | emitir(IRTGI(IR_GE), fr, lj_ir_kint(J, frofs+8*(int32_t)nresults)); | 1792 | emitir(IRTGI(IR_GE), fr, lj_ir_kint(J, frofs+8*(int32_t)nresults)); |
1537 | else | 1793 | else |
1538 | emitir(IRTGI(IR_EQ), fr, lj_ir_kint(J, frame_ftsz(J->L->base-1))); | 1794 | emitir(IRTGI(IR_EQ), fr, |
1539 | vbase = emitir(IRTI(IR_SUB), REF_BASE, fr); | 1795 | lj_ir_kint(J, (int32_t)frame_ftsz(J->L->base-1))); |
1540 | vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8)); | 1796 | vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr); |
1797 | vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8)); | ||
1541 | for (i = 0; i < nload; i++) { | 1798 | for (i = 0; i < nload; i++) { |
1542 | IRType t = itype2irt(&J->L->base[i-1-nvararg]); | 1799 | IRType t = itype2irt(&J->L->base[i-1-LJ_FR2-nvararg]); |
1543 | TRef aref = emitir(IRT(IR_AREF, IRT_P32), | 1800 | TRef aref = emitir(IRT(IR_AREF, IRT_PGC), |
1544 | vbase, lj_ir_kint(J, (int32_t)i)); | 1801 | vbase, lj_ir_kint(J, (int32_t)i)); |
1545 | TRef tr = emitir(IRTG(IR_VLOAD, t), aref, 0); | 1802 | TRef tr = emitir(IRTG(IR_VLOAD, t), aref, 0); |
1546 | if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ | 1803 | if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ |
@@ -1586,15 +1843,16 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) | |||
1586 | } | 1843 | } |
1587 | if (idx != 0 && idx <= nvararg) { | 1844 | if (idx != 0 && idx <= nvararg) { |
1588 | IRType t; | 1845 | IRType t; |
1589 | TRef aref, vbase = emitir(IRTI(IR_SUB), REF_BASE, fr); | 1846 | TRef aref, vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr); |
1590 | vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8)); | 1847 | vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, |
1591 | t = itype2irt(&J->L->base[idx-2-nvararg]); | 1848 | lj_ir_kint(J, frofs-(8<<LJ_FR2))); |
1592 | aref = emitir(IRT(IR_AREF, IRT_P32), vbase, tridx); | 1849 | t = itype2irt(&J->L->base[idx-2-LJ_FR2-nvararg]); |
1850 | aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, tridx); | ||
1593 | tr = emitir(IRTG(IR_VLOAD, t), aref, 0); | 1851 | tr = emitir(IRTG(IR_VLOAD, t), aref, 0); |
1594 | if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ | 1852 | if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ |
1595 | } | 1853 | } |
1596 | J->base[dst-2] = tr; | 1854 | J->base[dst-2-LJ_FR2] = tr; |
1597 | J->maxslot = dst-1; | 1855 | J->maxslot = dst-1-LJ_FR2; |
1598 | J->bcskip = 2; /* Skip CALLM + select. */ | 1856 | J->bcskip = 2; /* Skip CALLM + select. */ |
1599 | } else { | 1857 | } else { |
1600 | nyivarg: | 1858 | nyivarg: |
@@ -1612,8 +1870,63 @@ static TRef rec_tnew(jit_State *J, uint32_t ah) | |||
1612 | { | 1870 | { |
1613 | uint32_t asize = ah & 0x7ff; | 1871 | uint32_t asize = ah & 0x7ff; |
1614 | uint32_t hbits = ah >> 11; | 1872 | uint32_t hbits = ah >> 11; |
1873 | TRef tr; | ||
1615 | if (asize == 0x7ff) asize = 0x801; | 1874 | if (asize == 0x7ff) asize = 0x801; |
1616 | return emitir(IRTG(IR_TNEW, IRT_TAB), asize, hbits); | 1875 | tr = emitir(IRTG(IR_TNEW, IRT_TAB), asize, hbits); |
1876 | #ifdef LUAJIT_ENABLE_TABLE_BUMP | ||
1877 | J->rbchash[(tr & (RBCHASH_SLOTS-1))].ref = tref_ref(tr); | ||
1878 | setmref(J->rbchash[(tr & (RBCHASH_SLOTS-1))].pc, J->pc); | ||
1879 | setgcref(J->rbchash[(tr & (RBCHASH_SLOTS-1))].pt, obj2gco(J->pt)); | ||
1880 | #endif | ||
1881 | return tr; | ||
1882 | } | ||
1883 | |||
1884 | /* -- Concatenation ------------------------------------------------------- */ | ||
1885 | |||
1886 | static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot) | ||
1887 | { | ||
1888 | TRef *top = &J->base[topslot]; | ||
1889 | TValue savetv[5]; | ||
1890 | BCReg s; | ||
1891 | RecordIndex ix; | ||
1892 | lua_assert(baseslot < topslot); | ||
1893 | for (s = baseslot; s <= topslot; s++) | ||
1894 | (void)getslot(J, s); /* Ensure all arguments have a reference. */ | ||
1895 | if (tref_isnumber_str(top[0]) && tref_isnumber_str(top[-1])) { | ||
1896 | TRef tr, hdr, *trp, *xbase, *base = &J->base[baseslot]; | ||
1897 | /* First convert numbers to strings. */ | ||
1898 | for (trp = top; trp >= base; trp--) { | ||
1899 | if (tref_isnumber(*trp)) | ||
1900 | *trp = emitir(IRT(IR_TOSTR, IRT_STR), *trp, | ||
1901 | tref_isnum(*trp) ? IRTOSTR_NUM : IRTOSTR_INT); | ||
1902 | else if (!tref_isstr(*trp)) | ||
1903 | break; | ||
1904 | } | ||
1905 | xbase = ++trp; | ||
1906 | tr = hdr = emitir(IRT(IR_BUFHDR, IRT_PGC), | ||
1907 | lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET); | ||
1908 | do { | ||
1909 | tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, *trp++); | ||
1910 | } while (trp <= top); | ||
1911 | tr = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); | ||
1912 | J->maxslot = (BCReg)(xbase - J->base); | ||
1913 | if (xbase == base) return tr; /* Return simple concatenation result. */ | ||
1914 | /* Pass partial result. */ | ||
1915 | topslot = J->maxslot--; | ||
1916 | *xbase = tr; | ||
1917 | top = xbase; | ||
1918 | setstrV(J->L, &ix.keyv, &J2G(J)->strempty); /* Simulate string result. */ | ||
1919 | } else { | ||
1920 | J->maxslot = topslot-1; | ||
1921 | copyTV(J->L, &ix.keyv, &J->L->base[topslot]); | ||
1922 | } | ||
1923 | copyTV(J->L, &ix.tabv, &J->L->base[topslot-1]); | ||
1924 | ix.tab = top[-1]; | ||
1925 | ix.key = top[0]; | ||
1926 | memcpy(savetv, &J->L->base[topslot-1], sizeof(savetv)); /* Save slots. */ | ||
1927 | rec_mm_arith(J, &ix, MM_concat); /* Call __concat metamethod. */ | ||
1928 | memcpy(&J->L->base[topslot-1], savetv, sizeof(savetv)); /* Restore slots. */ | ||
1929 | return 0; /* No result yet. */ | ||
1617 | } | 1930 | } |
1618 | 1931 | ||
1619 | /* -- Record bytecode ops ------------------------------------------------- */ | 1932 | /* -- Record bytecode ops ------------------------------------------------- */ |
@@ -1634,7 +1947,15 @@ static void rec_comp_fixup(jit_State *J, const BCIns *pc, int cond) | |||
1634 | const BCIns *npc = pc + 2 + (cond ? bc_j(jmpins) : 0); | 1947 | const BCIns *npc = pc + 2 + (cond ? bc_j(jmpins) : 0); |
1635 | SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; | 1948 | SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; |
1636 | /* Set PC to opposite target to avoid re-recording the comp. in side trace. */ | 1949 | /* Set PC to opposite target to avoid re-recording the comp. in side trace. */ |
1950 | #if LJ_FR2 | ||
1951 | SnapEntry *flink = &J->cur.snapmap[snap->mapofs + snap->nent]; | ||
1952 | uint64_t pcbase; | ||
1953 | memcpy(&pcbase, flink, sizeof(uint64_t)); | ||
1954 | pcbase = (pcbase & 0xff) | (u64ptr(npc) << 8); | ||
1955 | memcpy(flink, &pcbase, sizeof(uint64_t)); | ||
1956 | #else | ||
1637 | J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc); | 1957 | J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc); |
1958 | #endif | ||
1638 | J->needsnap = 1; | 1959 | J->needsnap = 1; |
1639 | if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins); | 1960 | if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins); |
1640 | lj_snap_shrink(J); /* Shrink last snapshot if possible. */ | 1961 | lj_snap_shrink(J); /* Shrink last snapshot if possible. */ |
@@ -1654,7 +1975,7 @@ void lj_record_ins(jit_State *J) | |||
1654 | if (LJ_UNLIKELY(J->postproc != LJ_POST_NONE)) { | 1975 | if (LJ_UNLIKELY(J->postproc != LJ_POST_NONE)) { |
1655 | switch (J->postproc) { | 1976 | switch (J->postproc) { |
1656 | case LJ_POST_FIXCOMP: /* Fixup comparison. */ | 1977 | case LJ_POST_FIXCOMP: /* Fixup comparison. */ |
1657 | pc = frame_pc(&J2G(J)->tmptv); | 1978 | pc = (const BCIns *)(uintptr_t)J2G(J)->tmptv.u64; |
1658 | rec_comp_fixup(J, pc, (!tvistruecond(&J2G(J)->tmptv2) ^ (bc_op(*pc)&1))); | 1979 | rec_comp_fixup(J, pc, (!tvistruecond(&J2G(J)->tmptv2) ^ (bc_op(*pc)&1))); |
1659 | /* fallthrough */ | 1980 | /* fallthrough */ |
1660 | case LJ_POST_FIXGUARD: /* Fixup and emit pending guard. */ | 1981 | case LJ_POST_FIXGUARD: /* Fixup and emit pending guard. */ |
@@ -1722,6 +2043,10 @@ void lj_record_ins(jit_State *J) | |||
1722 | rec_check_ir(J); | 2043 | rec_check_ir(J); |
1723 | #endif | 2044 | #endif |
1724 | 2045 | ||
2046 | #if LJ_HASPROFILE | ||
2047 | rec_profile_ins(J, pc); | ||
2048 | #endif | ||
2049 | |||
1725 | /* Keep a copy of the runtime values of var/num/str operands. */ | 2050 | /* Keep a copy of the runtime values of var/num/str operands. */ |
1726 | #define rav (&ix.valv) | 2051 | #define rav (&ix.valv) |
1727 | #define rbv (&ix.tabv) | 2052 | #define rbv (&ix.tabv) |
@@ -1748,7 +2073,7 @@ void lj_record_ins(jit_State *J) | |||
1748 | switch (bcmode_c(op)) { | 2073 | switch (bcmode_c(op)) { |
1749 | case BCMvar: | 2074 | case BCMvar: |
1750 | copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break; | 2075 | copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break; |
1751 | case BCMpri: setitype(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break; | 2076 | case BCMpri: setpriV(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break; |
1752 | case BCMnum: { cTValue *tv = proto_knumtv(J->pt, rc); | 2077 | case BCMnum: { cTValue *tv = proto_knumtv(J->pt, rc); |
1753 | copyTV(J->L, rcv, tv); ix.key = rc = tvisint(tv) ? lj_ir_kint(J, intV(tv)) : | 2078 | copyTV(J->L, rcv, tv); ix.key = rc = tvisint(tv) ? lj_ir_kint(J, intV(tv)) : |
1754 | lj_ir_knumint(J, numV(tv)); } break; | 2079 | lj_ir_knumint(J, numV(tv)); } break; |
@@ -1843,6 +2168,18 @@ void lj_record_ins(jit_State *J) | |||
1843 | J->maxslot = bc_a(pc[1]); /* Shrink used slots. */ | 2168 | J->maxslot = bc_a(pc[1]); /* Shrink used slots. */ |
1844 | break; | 2169 | break; |
1845 | 2170 | ||
2171 | case BC_ISTYPE: case BC_ISNUM: | ||
2172 | /* These coercions need to correspond with lj_meta_istype(). */ | ||
2173 | if (LJ_DUALNUM && rc == ~LJ_TNUMX+1) | ||
2174 | ra = lj_opt_narrow_toint(J, ra); | ||
2175 | else if (rc == ~LJ_TNUMX+2) | ||
2176 | ra = lj_ir_tonum(J, ra); | ||
2177 | else if (rc == ~LJ_TSTR+1) | ||
2178 | ra = lj_ir_tostr(J, ra); | ||
2179 | /* else: type specialization suffices. */ | ||
2180 | J->base[bc_a(ins)] = ra; | ||
2181 | break; | ||
2182 | |||
1846 | /* -- Unary ops --------------------------------------------------------- */ | 2183 | /* -- Unary ops --------------------------------------------------------- */ |
1847 | 2184 | ||
1848 | case BC_NOT: | 2185 | case BC_NOT: |
@@ -1906,11 +2243,23 @@ void lj_record_ins(jit_State *J) | |||
1906 | rc = rec_mm_arith(J, &ix, MM_pow); | 2243 | rc = rec_mm_arith(J, &ix, MM_pow); |
1907 | break; | 2244 | break; |
1908 | 2245 | ||
2246 | /* -- Miscellaneous ops ------------------------------------------------- */ | ||
2247 | |||
2248 | case BC_CAT: | ||
2249 | rc = rec_cat(J, rb, rc); | ||
2250 | break; | ||
2251 | |||
1909 | /* -- Constant and move ops --------------------------------------------- */ | 2252 | /* -- Constant and move ops --------------------------------------------- */ |
1910 | 2253 | ||
1911 | case BC_MOV: | 2254 | case BC_MOV: |
1912 | /* Clear gap of method call to avoid resurrecting previous refs. */ | 2255 | /* Clear gap of method call to avoid resurrecting previous refs. */ |
1913 | if (ra > J->maxslot) J->base[ra-1] = 0; | 2256 | if (ra > J->maxslot) { |
2257 | #if LJ_FR2 | ||
2258 | memset(J->base + J->maxslot, 0, (ra - J->maxslot) * sizeof(TRef)); | ||
2259 | #else | ||
2260 | J->base[ra-1] = 0; | ||
2261 | #endif | ||
2262 | } | ||
1914 | break; | 2263 | break; |
1915 | case BC_KSTR: case BC_KNUM: case BC_KPRI: | 2264 | case BC_KSTR: case BC_KNUM: case BC_KPRI: |
1916 | break; | 2265 | break; |
@@ -1918,6 +2267,8 @@ void lj_record_ins(jit_State *J) | |||
1918 | rc = lj_ir_kint(J, (int32_t)(int16_t)rc); | 2267 | rc = lj_ir_kint(J, (int32_t)(int16_t)rc); |
1919 | break; | 2268 | break; |
1920 | case BC_KNIL: | 2269 | case BC_KNIL: |
2270 | if (LJ_FR2 && ra > J->maxslot) | ||
2271 | J->base[ra-1] = 0; | ||
1921 | while (ra <= rc) | 2272 | while (ra <= rc) |
1922 | J->base[ra++] = TREF_NIL; | 2273 | J->base[ra++] = TREF_NIL; |
1923 | if (rc >= J->maxslot) J->maxslot = rc+1; | 2274 | if (rc >= J->maxslot) J->maxslot = rc+1; |
@@ -1954,6 +2305,14 @@ void lj_record_ins(jit_State *J) | |||
1954 | ix.idxchain = LJ_MAX_IDXCHAIN; | 2305 | ix.idxchain = LJ_MAX_IDXCHAIN; |
1955 | rc = lj_record_idx(J, &ix); | 2306 | rc = lj_record_idx(J, &ix); |
1956 | break; | 2307 | break; |
2308 | case BC_TGETR: case BC_TSETR: | ||
2309 | ix.idxchain = 0; | ||
2310 | rc = lj_record_idx(J, &ix); | ||
2311 | break; | ||
2312 | |||
2313 | case BC_TSETM: | ||
2314 | rec_tsetm(J, ra, (BCReg)(J->L->top - J->L->base), (int32_t)rcv->u32.lo); | ||
2315 | break; | ||
1957 | 2316 | ||
1958 | case BC_TNEW: | 2317 | case BC_TNEW: |
1959 | rc = rec_tnew(J, rc); | 2318 | rc = rec_tnew(J, rc); |
@@ -1961,33 +2320,38 @@ void lj_record_ins(jit_State *J) | |||
1961 | case BC_TDUP: | 2320 | case BC_TDUP: |
1962 | rc = emitir(IRTG(IR_TDUP, IRT_TAB), | 2321 | rc = emitir(IRTG(IR_TDUP, IRT_TAB), |
1963 | lj_ir_ktab(J, gco2tab(proto_kgc(J->pt, ~(ptrdiff_t)rc))), 0); | 2322 | lj_ir_ktab(J, gco2tab(proto_kgc(J->pt, ~(ptrdiff_t)rc))), 0); |
2323 | #ifdef LUAJIT_ENABLE_TABLE_BUMP | ||
2324 | J->rbchash[(rc & (RBCHASH_SLOTS-1))].ref = tref_ref(rc); | ||
2325 | setmref(J->rbchash[(rc & (RBCHASH_SLOTS-1))].pc, pc); | ||
2326 | setgcref(J->rbchash[(rc & (RBCHASH_SLOTS-1))].pt, obj2gco(J->pt)); | ||
2327 | #endif | ||
1964 | break; | 2328 | break; |
1965 | 2329 | ||
1966 | /* -- Calls and vararg handling ----------------------------------------- */ | 2330 | /* -- Calls and vararg handling ----------------------------------------- */ |
1967 | 2331 | ||
1968 | case BC_ITERC: | 2332 | case BC_ITERC: |
1969 | J->base[ra] = getslot(J, ra-3); | 2333 | J->base[ra] = getslot(J, ra-3); |
1970 | J->base[ra+1] = getslot(J, ra-2); | 2334 | J->base[ra+1+LJ_FR2] = getslot(J, ra-2); |
1971 | J->base[ra+2] = getslot(J, ra-1); | 2335 | J->base[ra+2+LJ_FR2] = getslot(J, ra-1); |
1972 | { /* Do the actual copy now because lj_record_call needs the values. */ | 2336 | { /* Do the actual copy now because lj_record_call needs the values. */ |
1973 | TValue *b = &J->L->base[ra]; | 2337 | TValue *b = &J->L->base[ra]; |
1974 | copyTV(J->L, b, b-3); | 2338 | copyTV(J->L, b, b-3); |
1975 | copyTV(J->L, b+1, b-2); | 2339 | copyTV(J->L, b+1+LJ_FR2, b-2); |
1976 | copyTV(J->L, b+2, b-1); | 2340 | copyTV(J->L, b+2+LJ_FR2, b-1); |
1977 | } | 2341 | } |
1978 | lj_record_call(J, ra, (ptrdiff_t)rc-1); | 2342 | lj_record_call(J, ra, (ptrdiff_t)rc-1); |
1979 | break; | 2343 | break; |
1980 | 2344 | ||
1981 | /* L->top is set to L->base+ra+rc+NARGS-1+1. See lj_dispatch_ins(). */ | 2345 | /* L->top is set to L->base+ra+rc+NARGS-1+1. See lj_dispatch_ins(). */ |
1982 | case BC_CALLM: | 2346 | case BC_CALLM: |
1983 | rc = (BCReg)(J->L->top - J->L->base) - ra; | 2347 | rc = (BCReg)(J->L->top - J->L->base) - ra - LJ_FR2; |
1984 | /* fallthrough */ | 2348 | /* fallthrough */ |
1985 | case BC_CALL: | 2349 | case BC_CALL: |
1986 | lj_record_call(J, ra, (ptrdiff_t)rc-1); | 2350 | lj_record_call(J, ra, (ptrdiff_t)rc-1); |
1987 | break; | 2351 | break; |
1988 | 2352 | ||
1989 | case BC_CALLMT: | 2353 | case BC_CALLMT: |
1990 | rc = (BCReg)(J->L->top - J->L->base) - ra; | 2354 | rc = (BCReg)(J->L->top - J->L->base) - ra - LJ_FR2; |
1991 | /* fallthrough */ | 2355 | /* fallthrough */ |
1992 | case BC_CALLT: | 2356 | case BC_CALLT: |
1993 | lj_record_tailcall(J, ra, (ptrdiff_t)rc-1); | 2357 | lj_record_tailcall(J, ra, (ptrdiff_t)rc-1); |
@@ -2004,6 +2368,9 @@ void lj_record_ins(jit_State *J) | |||
2004 | rc = (BCReg)(J->L->top - J->L->base) - ra + 1; | 2368 | rc = (BCReg)(J->L->top - J->L->base) - ra + 1; |
2005 | /* fallthrough */ | 2369 | /* fallthrough */ |
2006 | case BC_RET: case BC_RET0: case BC_RET1: | 2370 | case BC_RET: case BC_RET0: case BC_RET1: |
2371 | #if LJ_HASPROFILE | ||
2372 | rec_profile_ret(J); | ||
2373 | #endif | ||
2007 | lj_record_ret(J, ra, (ptrdiff_t)rc-1); | 2374 | lj_record_ret(J, ra, (ptrdiff_t)rc-1); |
2008 | break; | 2375 | break; |
2009 | 2376 | ||
@@ -2016,7 +2383,7 @@ void lj_record_ins(jit_State *J) | |||
2016 | case BC_JFORI: | 2383 | case BC_JFORI: |
2017 | lua_assert(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL); | 2384 | lua_assert(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL); |
2018 | if (rec_for(J, pc, 0) != LOOPEV_LEAVE) /* Link to existing loop. */ | 2385 | if (rec_for(J, pc, 0) != LOOPEV_LEAVE) /* Link to existing loop. */ |
2019 | rec_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J])); | 2386 | lj_record_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J])); |
2020 | /* Continue tracing if the loop is not entered. */ | 2387 | /* Continue tracing if the loop is not entered. */ |
2021 | break; | 2388 | break; |
2022 | 2389 | ||
@@ -2083,10 +2450,8 @@ void lj_record_ins(jit_State *J) | |||
2083 | /* fallthrough */ | 2450 | /* fallthrough */ |
2084 | case BC_ITERN: | 2451 | case BC_ITERN: |
2085 | case BC_ISNEXT: | 2452 | case BC_ISNEXT: |
2086 | case BC_CAT: | ||
2087 | case BC_UCLO: | 2453 | case BC_UCLO: |
2088 | case BC_FNEW: | 2454 | case BC_FNEW: |
2089 | case BC_TSETM: | ||
2090 | setintV(&J->errinfo, (int32_t)op); | 2455 | setintV(&J->errinfo, (int32_t)op); |
2091 | lj_trace_err_info(J, LJ_TRERR_NYIBC); | 2456 | lj_trace_err_info(J, LJ_TRERR_NYIBC); |
2092 | break; | 2457 | break; |
@@ -2095,15 +2460,21 @@ void lj_record_ins(jit_State *J) | |||
2095 | /* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */ | 2460 | /* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */ |
2096 | if (bcmode_a(op) == BCMdst && rc) { | 2461 | if (bcmode_a(op) == BCMdst && rc) { |
2097 | J->base[ra] = rc; | 2462 | J->base[ra] = rc; |
2098 | if (ra >= J->maxslot) J->maxslot = ra+1; | 2463 | if (ra >= J->maxslot) { |
2464 | #if LJ_FR2 | ||
2465 | if (ra > J->maxslot) J->base[ra-1] = 0; | ||
2466 | #endif | ||
2467 | J->maxslot = ra+1; | ||
2468 | } | ||
2099 | } | 2469 | } |
2100 | 2470 | ||
2101 | #undef rav | 2471 | #undef rav |
2102 | #undef rbv | 2472 | #undef rbv |
2103 | #undef rcv | 2473 | #undef rcv |
2104 | 2474 | ||
2105 | /* Limit the number of recorded IR instructions. */ | 2475 | /* Limit the number of recorded IR instructions and constants. */ |
2106 | if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord]) | 2476 | if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord] || |
2477 | J->cur.nk < REF_BIAS-(IRRef)J->param[JIT_P_maxirconst]) | ||
2107 | lj_trace_err(J, LJ_TRERR_TRACEOV); | 2478 | lj_trace_err(J, LJ_TRERR_TRACEOV); |
2108 | } | 2479 | } |
2109 | 2480 | ||
@@ -2152,6 +2523,12 @@ static const BCIns *rec_setup_root(jit_State *J) | |||
2152 | J->maxslot = J->pt->numparams; | 2523 | J->maxslot = J->pt->numparams; |
2153 | pc++; | 2524 | pc++; |
2154 | break; | 2525 | break; |
2526 | case BC_CALLM: | ||
2527 | case BC_CALL: | ||
2528 | case BC_ITERC: | ||
2529 | /* No bytecode range check for stitched traces. */ | ||
2530 | pc++; | ||
2531 | break; | ||
2155 | default: | 2532 | default: |
2156 | lua_assert(0); | 2533 | lua_assert(0); |
2157 | break; | 2534 | break; |
@@ -2167,11 +2544,14 @@ void lj_record_setup(jit_State *J) | |||
2167 | /* Initialize state related to current trace. */ | 2544 | /* Initialize state related to current trace. */ |
2168 | memset(J->slot, 0, sizeof(J->slot)); | 2545 | memset(J->slot, 0, sizeof(J->slot)); |
2169 | memset(J->chain, 0, sizeof(J->chain)); | 2546 | memset(J->chain, 0, sizeof(J->chain)); |
2547 | #ifdef LUAJIT_ENABLE_TABLE_BUMP | ||
2548 | memset(J->rbchash, 0, sizeof(J->rbchash)); | ||
2549 | #endif | ||
2170 | memset(J->bpropcache, 0, sizeof(J->bpropcache)); | 2550 | memset(J->bpropcache, 0, sizeof(J->bpropcache)); |
2171 | J->scev.idx = REF_NIL; | 2551 | J->scev.idx = REF_NIL; |
2172 | setmref(J->scev.pc, NULL); | 2552 | setmref(J->scev.pc, NULL); |
2173 | 2553 | ||
2174 | J->baseslot = 1; /* Invoking function is at base[-1]. */ | 2554 | J->baseslot = 1+LJ_FR2; /* Invoking function is at base[-1-LJ_FR2]. */ |
2175 | J->base = J->slot + J->baseslot; | 2555 | J->base = J->slot + J->baseslot; |
2176 | J->maxslot = 0; | 2556 | J->maxslot = 0; |
2177 | J->framedepth = 0; | 2557 | J->framedepth = 0; |
@@ -2186,7 +2566,7 @@ void lj_record_setup(jit_State *J) | |||
2186 | J->bc_extent = ~(MSize)0; | 2566 | J->bc_extent = ~(MSize)0; |
2187 | 2567 | ||
2188 | /* Emit instructions for fixed references. Also triggers initial IR alloc. */ | 2568 | /* Emit instructions for fixed references. Also triggers initial IR alloc. */ |
2189 | emitir_raw(IRT(IR_BASE, IRT_P32), J->parent, J->exitno); | 2569 | emitir_raw(IRT(IR_BASE, IRT_PGC), J->parent, J->exitno); |
2190 | for (i = 0; i <= 2; i++) { | 2570 | for (i = 0; i <= 2; i++) { |
2191 | IRIns *ir = IR(REF_NIL-i); | 2571 | IRIns *ir = IR(REF_NIL-i); |
2192 | ir->i = 0; | 2572 | ir->i = 0; |
@@ -2220,7 +2600,7 @@ void lj_record_setup(jit_State *J) | |||
2220 | if (traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] || | 2600 | if (traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] || |
2221 | T->snap[J->exitno].count >= J->param[JIT_P_hotexit] + | 2601 | T->snap[J->exitno].count >= J->param[JIT_P_hotexit] + |
2222 | J->param[JIT_P_tryside]) { | 2602 | J->param[JIT_P_tryside]) { |
2223 | rec_stop(J, LJ_TRLINK_INTERP, 0); | 2603 | lj_record_stop(J, LJ_TRLINK_INTERP, 0); |
2224 | } | 2604 | } |
2225 | } else { /* Root trace. */ | 2605 | } else { /* Root trace. */ |
2226 | J->cur.root = 0; | 2606 | J->cur.root = 0; |
@@ -2232,9 +2612,15 @@ void lj_record_setup(jit_State *J) | |||
2232 | lj_snap_add(J); | 2612 | lj_snap_add(J); |
2233 | if (bc_op(J->cur.startins) == BC_FORL) | 2613 | if (bc_op(J->cur.startins) == BC_FORL) |
2234 | rec_for_loop(J, J->pc-1, &J->scev, 1); | 2614 | rec_for_loop(J, J->pc-1, &J->scev, 1); |
2615 | else if (bc_op(J->cur.startins) == BC_ITERC) | ||
2616 | J->startpc = NULL; | ||
2235 | if (1 + J->pt->framesize >= LJ_MAX_JSLOTS) | 2617 | if (1 + J->pt->framesize >= LJ_MAX_JSLOTS) |
2236 | lj_trace_err(J, LJ_TRERR_STACKOV); | 2618 | lj_trace_err(J, LJ_TRERR_STACKOV); |
2237 | } | 2619 | } |
2620 | #if LJ_HASPROFILE | ||
2621 | J->prev_pt = NULL; | ||
2622 | J->prev_line = -1; | ||
2623 | #endif | ||
2238 | #ifdef LUAJIT_ENABLE_CHECKHOOK | 2624 | #ifdef LUAJIT_ENABLE_CHECKHOOK |
2239 | /* Regularly check for instruction/line hooks from compiled code and | 2625 | /* Regularly check for instruction/line hooks from compiled code and |
2240 | ** exit to the interpreter if the hooks are set. | 2626 | ** exit to the interpreter if the hooks are set. |
diff --git a/src/lj_record.h b/src/lj_record.h index 8ef8b2a7..e7d24fae 100644 --- a/src/lj_record.h +++ b/src/lj_record.h | |||
@@ -28,6 +28,7 @@ typedef struct RecordIndex { | |||
28 | 28 | ||
29 | LJ_FUNC int lj_record_objcmp(jit_State *J, TRef a, TRef b, | 29 | LJ_FUNC int lj_record_objcmp(jit_State *J, TRef a, TRef b, |
30 | cTValue *av, cTValue *bv); | 30 | cTValue *av, cTValue *bv); |
31 | LJ_FUNC void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk); | ||
31 | LJ_FUNC TRef lj_record_constify(jit_State *J, cTValue *o); | 32 | LJ_FUNC TRef lj_record_constify(jit_State *J, cTValue *o); |
32 | 33 | ||
33 | LJ_FUNC void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs); | 34 | LJ_FUNC void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs); |
diff --git a/src/lj_snap.c b/src/lj_snap.c index de8068ac..a47c0e3e 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c | |||
@@ -68,10 +68,22 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) | |||
68 | for (s = 0; s < nslots; s++) { | 68 | for (s = 0; s < nslots; s++) { |
69 | TRef tr = J->slot[s]; | 69 | TRef tr = J->slot[s]; |
70 | IRRef ref = tref_ref(tr); | 70 | IRRef ref = tref_ref(tr); |
71 | #if LJ_FR2 | ||
72 | if (s == 1) { /* Ignore slot 1 in LJ_FR2 mode, except if tailcalled. */ | ||
73 | if ((tr & TREF_FRAME)) | ||
74 | map[n++] = SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL); | ||
75 | continue; | ||
76 | } | ||
77 | if ((tr & (TREF_FRAME | TREF_CONT)) && !ref) { | ||
78 | cTValue *base = J->L->base - J->baseslot; | ||
79 | tr = J->slot[s] = (tr & 0xff0000) | lj_ir_k64(J, IR_KNUM, base[s].u64); | ||
80 | ref = tref_ref(tr); | ||
81 | } | ||
82 | #endif | ||
71 | if (ref) { | 83 | if (ref) { |
72 | SnapEntry sn = SNAP_TR(s, tr); | 84 | SnapEntry sn = SNAP_TR(s, tr); |
73 | IRIns *ir = &J->cur.ir[ref]; | 85 | IRIns *ir = &J->cur.ir[ref]; |
74 | if (!(sn & (SNAP_CONT|SNAP_FRAME)) && | 86 | if ((LJ_FR2 || !(sn & (SNAP_CONT|SNAP_FRAME))) && |
75 | ir->o == IR_SLOAD && ir->op1 == s && ref > retf) { | 87 | ir->o == IR_SLOAD && ir->op1 == s && ref > retf) { |
76 | /* No need to snapshot unmodified non-inherited slots. */ | 88 | /* No need to snapshot unmodified non-inherited slots. */ |
77 | if (!(ir->op2 & IRSLOAD_INHERIT)) | 89 | if (!(ir->op2 & IRSLOAD_INHERIT)) |
@@ -81,7 +93,7 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) | |||
81 | (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) | 93 | (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) |
82 | sn |= SNAP_NORESTORE; | 94 | sn |= SNAP_NORESTORE; |
83 | } | 95 | } |
84 | if (LJ_SOFTFP && irt_isnum(ir->t)) | 96 | if (LJ_SOFTFP32 && irt_isnum(ir->t)) |
85 | sn |= SNAP_SOFTFPNUM; | 97 | sn |= SNAP_SOFTFPNUM; |
86 | map[n++] = sn; | 98 | map[n++] = sn; |
87 | } | 99 | } |
@@ -90,32 +102,51 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) | |||
90 | } | 102 | } |
91 | 103 | ||
92 | /* Add frame links at the end of the snapshot. */ | 104 | /* Add frame links at the end of the snapshot. */ |
93 | static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map) | 105 | static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot) |
94 | { | 106 | { |
95 | cTValue *frame = J->L->base - 1; | 107 | cTValue *frame = J->L->base - 1; |
96 | cTValue *lim = J->L->base - J->baseslot; | 108 | cTValue *lim = J->L->base - J->baseslot + LJ_FR2; |
97 | cTValue *ftop = frame + funcproto(frame_func(frame))->framesize; | 109 | GCfunc *fn = frame_func(frame); |
110 | cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top; | ||
111 | #if LJ_FR2 | ||
112 | uint64_t pcbase = (u64ptr(J->pc) << 8) | (J->baseslot - 2); | ||
113 | lua_assert(2 <= J->baseslot && J->baseslot <= 257); | ||
114 | memcpy(map, &pcbase, sizeof(uint64_t)); | ||
115 | #else | ||
98 | MSize f = 0; | 116 | MSize f = 0; |
99 | map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ | 117 | map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ |
118 | #endif | ||
100 | while (frame > lim) { /* Backwards traversal of all frames above base. */ | 119 | while (frame > lim) { /* Backwards traversal of all frames above base. */ |
101 | if (frame_islua(frame)) { | 120 | if (frame_islua(frame)) { |
121 | #if !LJ_FR2 | ||
102 | map[f++] = SNAP_MKPC(frame_pc(frame)); | 122 | map[f++] = SNAP_MKPC(frame_pc(frame)); |
123 | #endif | ||
103 | frame = frame_prevl(frame); | 124 | frame = frame_prevl(frame); |
104 | } else if (frame_iscont(frame)) { | 125 | } else if (frame_iscont(frame)) { |
126 | #if !LJ_FR2 | ||
105 | map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); | 127 | map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); |
106 | map[f++] = SNAP_MKPC(frame_contpc(frame)); | 128 | map[f++] = SNAP_MKPC(frame_contpc(frame)); |
129 | #endif | ||
107 | frame = frame_prevd(frame); | 130 | frame = frame_prevd(frame); |
108 | } else { | 131 | } else { |
109 | lua_assert(!frame_isc(frame)); | 132 | lua_assert(!frame_isc(frame)); |
133 | #if !LJ_FR2 | ||
110 | map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); | 134 | map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); |
135 | #endif | ||
111 | frame = frame_prevd(frame); | 136 | frame = frame_prevd(frame); |
112 | continue; | 137 | continue; |
113 | } | 138 | } |
114 | if (frame + funcproto(frame_func(frame))->framesize > ftop) | 139 | if (frame + funcproto(frame_func(frame))->framesize > ftop) |
115 | ftop = frame + funcproto(frame_func(frame))->framesize; | 140 | ftop = frame + funcproto(frame_func(frame))->framesize; |
116 | } | 141 | } |
142 | *topslot = (uint8_t)(ftop - lim); | ||
143 | #if LJ_FR2 | ||
144 | lua_assert(sizeof(SnapEntry) * 2 == sizeof(uint64_t)); | ||
145 | return 2; | ||
146 | #else | ||
117 | lua_assert(f == (MSize)(1 + J->framedepth)); | 147 | lua_assert(f == (MSize)(1 + J->framedepth)); |
118 | return (BCReg)(ftop - lim); | 148 | return f; |
149 | #endif | ||
119 | } | 150 | } |
120 | 151 | ||
121 | /* Take a snapshot of the current stack. */ | 152 | /* Take a snapshot of the current stack. */ |
@@ -125,16 +156,16 @@ static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) | |||
125 | MSize nent; | 156 | MSize nent; |
126 | SnapEntry *p; | 157 | SnapEntry *p; |
127 | /* Conservative estimate. */ | 158 | /* Conservative estimate. */ |
128 | lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth+1); | 159 | lj_snap_grow_map(J, nsnapmap + nslots + (MSize)(LJ_FR2?2:J->framedepth+1)); |
129 | p = &J->cur.snapmap[nsnapmap]; | 160 | p = &J->cur.snapmap[nsnapmap]; |
130 | nent = snapshot_slots(J, p, nslots); | 161 | nent = snapshot_slots(J, p, nslots); |
131 | snap->topslot = (uint8_t)snapshot_framelinks(J, p + nent); | 162 | snap->nent = (uint8_t)nent; |
163 | nent += snapshot_framelinks(J, p + nent, &snap->topslot); | ||
132 | snap->mapofs = (uint32_t)nsnapmap; | 164 | snap->mapofs = (uint32_t)nsnapmap; |
133 | snap->ref = (IRRef1)J->cur.nins; | 165 | snap->ref = (IRRef1)J->cur.nins; |
134 | snap->nent = (uint8_t)nent; | ||
135 | snap->nslots = (uint8_t)nslots; | 166 | snap->nslots = (uint8_t)nslots; |
136 | snap->count = 0; | 167 | snap->count = 0; |
137 | J->cur.nsnapmap = (uint32_t)(nsnapmap + nent + 1 + J->framedepth); | 168 | J->cur.nsnapmap = (uint32_t)(nsnapmap + nent); |
138 | } | 169 | } |
139 | 170 | ||
140 | /* Add or merge a snapshot. */ | 171 | /* Add or merge a snapshot. */ |
@@ -143,8 +174,8 @@ void lj_snap_add(jit_State *J) | |||
143 | MSize nsnap = J->cur.nsnap; | 174 | MSize nsnap = J->cur.nsnap; |
144 | MSize nsnapmap = J->cur.nsnapmap; | 175 | MSize nsnapmap = J->cur.nsnapmap; |
145 | /* Merge if no ins. inbetween or if requested and no guard inbetween. */ | 176 | /* Merge if no ins. inbetween or if requested and no guard inbetween. */ |
146 | if (J->mergesnap ? !irt_isguard(J->guardemit) : | 177 | if ((nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins) || |
147 | (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) { | 178 | (J->mergesnap && !irt_isguard(J->guardemit))) { |
148 | if (nsnap == 1) { /* But preserve snap #0 PC. */ | 179 | if (nsnap == 1) { /* But preserve snap #0 PC. */ |
149 | emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0); | 180 | emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0); |
150 | goto nomerge; | 181 | goto nomerge; |
@@ -237,7 +268,8 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf, | |||
237 | case BCMbase: | 268 | case BCMbase: |
238 | if (op >= BC_CALLM && op <= BC_VARG) { | 269 | if (op >= BC_CALLM && op <= BC_VARG) { |
239 | BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ? | 270 | BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ? |
240 | maxslot : (bc_a(ins) + bc_c(ins)); | 271 | maxslot : (bc_a(ins) + bc_c(ins)+LJ_FR2); |
272 | if (LJ_FR2) DEF_SLOT(bc_a(ins)+1); | ||
241 | s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0); | 273 | s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0); |
242 | for (; s < top; s++) USE_SLOT(s); | 274 | for (; s < top; s++) USE_SLOT(s); |
243 | for (; s < maxslot; s++) DEF_SLOT(s); | 275 | for (; s < maxslot; s++) DEF_SLOT(s); |
@@ -281,8 +313,8 @@ void lj_snap_shrink(jit_State *J) | |||
281 | MSize n, m, nlim, nent = snap->nent; | 313 | MSize n, m, nlim, nent = snap->nent; |
282 | uint8_t udf[SNAP_USEDEF_SLOTS]; | 314 | uint8_t udf[SNAP_USEDEF_SLOTS]; |
283 | BCReg maxslot = J->maxslot; | 315 | BCReg maxslot = J->maxslot; |
284 | BCReg minslot = snap_usedef(J, udf, snap_pc(map[nent]), maxslot); | ||
285 | BCReg baseslot = J->baseslot; | 316 | BCReg baseslot = J->baseslot; |
317 | BCReg minslot = snap_usedef(J, udf, snap_pc(&map[nent]), maxslot); | ||
286 | maxslot += baseslot; | 318 | maxslot += baseslot; |
287 | minslot += baseslot; | 319 | minslot += baseslot; |
288 | snap->nslots = (uint8_t)maxslot; | 320 | snap->nslots = (uint8_t)maxslot; |
@@ -342,7 +374,7 @@ IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir) | |||
342 | break; | 374 | break; |
343 | } | 375 | } |
344 | } | 376 | } |
345 | } else if (LJ_SOFTFP && ir->o == IR_HIOP) { | 377 | } else if (LJ_SOFTFP32 && ir->o == IR_HIOP) { |
346 | ref++; | 378 | ref++; |
347 | } else if (ir->o == IR_PVAL) { | 379 | } else if (ir->o == IR_PVAL) { |
348 | ref = ir->op1 + REF_BIAS; | 380 | ref = ir->op1 + REF_BIAS; |
@@ -368,8 +400,8 @@ static TRef snap_replay_const(jit_State *J, IRIns *ir) | |||
368 | case IR_KPRI: return TREF_PRI(irt_type(ir->t)); | 400 | case IR_KPRI: return TREF_PRI(irt_type(ir->t)); |
369 | case IR_KINT: return lj_ir_kint(J, ir->i); | 401 | case IR_KINT: return lj_ir_kint(J, ir->i); |
370 | case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); | 402 | case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); |
371 | case IR_KNUM: return lj_ir_k64(J, IR_KNUM, ir_knum(ir)); | 403 | case IR_KNUM: case IR_KINT64: |
372 | case IR_KINT64: return lj_ir_k64(J, IR_KINT64, ir_kint64(ir)); | 404 | return lj_ir_k64(J, (IROp)ir->o, ir_k64(ir)->u64); |
373 | case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */ | 405 | case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */ |
374 | default: lua_assert(0); return TREF_NIL; break; | 406 | default: lua_assert(0); return TREF_NIL; break; |
375 | } | 407 | } |
@@ -442,7 +474,11 @@ void lj_snap_replay(jit_State *J, GCtrace *T) | |||
442 | goto setslot; | 474 | goto setslot; |
443 | bloomset(seen, ref); | 475 | bloomset(seen, ref); |
444 | if (irref_isk(ref)) { | 476 | if (irref_isk(ref)) { |
445 | tr = snap_replay_const(J, ir); | 477 | /* See special treatment of LJ_FR2 slot 1 in snapshot_slots() above. */ |
478 | if (LJ_FR2 && (sn == SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL))) | ||
479 | tr = 0; | ||
480 | else | ||
481 | tr = snap_replay_const(J, ir); | ||
446 | } else if (!regsp_used(ir->prev)) { | 482 | } else if (!regsp_used(ir->prev)) { |
447 | pass23 = 1; | 483 | pass23 = 1; |
448 | lua_assert(s != 0); | 484 | lua_assert(s != 0); |
@@ -450,13 +486,13 @@ void lj_snap_replay(jit_State *J, GCtrace *T) | |||
450 | } else { | 486 | } else { |
451 | IRType t = irt_type(ir->t); | 487 | IRType t = irt_type(ir->t); |
452 | uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT; | 488 | uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT; |
453 | if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; | 489 | if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; |
454 | if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY); | 490 | if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY); |
455 | tr = emitir_raw(IRT(IR_SLOAD, t), s, mode); | 491 | tr = emitir_raw(IRT(IR_SLOAD, t), s, mode); |
456 | } | 492 | } |
457 | setslot: | 493 | setslot: |
458 | J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */ | 494 | J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */ |
459 | J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && s); | 495 | J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && (s != LJ_FR2)); |
460 | if ((sn & SNAP_FRAME)) | 496 | if ((sn & SNAP_FRAME)) |
461 | J->baseslot = s+1; | 497 | J->baseslot = s+1; |
462 | } | 498 | } |
@@ -484,7 +520,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T) | |||
484 | if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { | 520 | if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { |
485 | if (snap_pref(J, T, map, nent, seen, irs->op2) == 0) | 521 | if (snap_pref(J, T, map, nent, seen, irs->op2) == 0) |
486 | snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1); | 522 | snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1); |
487 | else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && | 523 | else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) && |
488 | irs+1 < irlast && (irs+1)->o == IR_HIOP) | 524 | irs+1 < irlast && (irs+1)->o == IR_HIOP) |
489 | snap_pref(J, T, map, nent, seen, (irs+1)->op2); | 525 | snap_pref(J, T, map, nent, seen, (irs+1)->op2); |
490 | } | 526 | } |
@@ -543,17 +579,16 @@ void lj_snap_replay(jit_State *J, GCtrace *T) | |||
543 | lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT); | 579 | lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT); |
544 | val = snap_pref(J, T, map, nent, seen, irc->op1); | 580 | val = snap_pref(J, T, map, nent, seen, irc->op1); |
545 | val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); | 581 | val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); |
546 | } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && | 582 | } else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) && |
547 | irs+1 < irlast && (irs+1)->o == IR_HIOP) { | 583 | irs+1 < irlast && (irs+1)->o == IR_HIOP) { |
548 | IRType t = IRT_I64; | 584 | IRType t = IRT_I64; |
549 | if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP) | 585 | if (LJ_SOFTFP32 && irt_type((irs+1)->t) == IRT_SOFTFP) |
550 | t = IRT_NUM; | 586 | t = IRT_NUM; |
551 | lj_needsplit(J); | 587 | lj_needsplit(J); |
552 | if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) { | 588 | if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) { |
553 | uint64_t k = (uint32_t)T->ir[irs->op2].i + | 589 | uint64_t k = (uint32_t)T->ir[irs->op2].i + |
554 | ((uint64_t)T->ir[(irs+1)->op2].i << 32); | 590 | ((uint64_t)T->ir[(irs+1)->op2].i << 32); |
555 | val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, | 591 | val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, k); |
556 | lj_ir_k64_find(J, k)); | ||
557 | } else { | 592 | } else { |
558 | val = emitir_raw(IRT(IR_HIOP, t), val, | 593 | val = emitir_raw(IRT(IR_HIOP, t), val, |
559 | snap_pref(J, T, map, nent, seen, (irs+1)->op2)); | 594 | snap_pref(J, T, map, nent, seen, (irs+1)->op2)); |
@@ -600,17 +635,18 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, | |||
600 | int32_t *sps = &ex->spill[regsp_spill(rs)]; | 635 | int32_t *sps = &ex->spill[regsp_spill(rs)]; |
601 | if (irt_isinteger(t)) { | 636 | if (irt_isinteger(t)) { |
602 | setintV(o, *sps); | 637 | setintV(o, *sps); |
603 | #if !LJ_SOFTFP | 638 | #if !LJ_SOFTFP32 |
604 | } else if (irt_isnum(t)) { | 639 | } else if (irt_isnum(t)) { |
605 | o->u64 = *(uint64_t *)sps; | 640 | o->u64 = *(uint64_t *)sps; |
606 | #endif | 641 | #endif |
607 | } else if (LJ_64 && irt_islightud(t)) { | 642 | #if LJ_64 && !LJ_GC64 |
643 | } else if (irt_islightud(t)) { | ||
608 | /* 64 bit lightuserdata which may escape already has the tag bits. */ | 644 | /* 64 bit lightuserdata which may escape already has the tag bits. */ |
609 | o->u64 = *(uint64_t *)sps; | 645 | o->u64 = *(uint64_t *)sps; |
646 | #endif | ||
610 | } else { | 647 | } else { |
611 | lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ | 648 | lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ |
612 | setgcrefi(o->gcr, *sps); | 649 | setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t)); |
613 | setitype(o, irt_toitype(t)); | ||
614 | } | 650 | } |
615 | } else { /* Restore from register. */ | 651 | } else { /* Restore from register. */ |
616 | Reg r = regsp_reg(rs); | 652 | Reg r = regsp_reg(rs); |
@@ -624,14 +660,19 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, | |||
624 | #if !LJ_SOFTFP | 660 | #if !LJ_SOFTFP |
625 | } else if (irt_isnum(t)) { | 661 | } else if (irt_isnum(t)) { |
626 | setnumV(o, ex->fpr[r-RID_MIN_FPR]); | 662 | setnumV(o, ex->fpr[r-RID_MIN_FPR]); |
663 | #elif LJ_64 /* && LJ_SOFTFP */ | ||
664 | } else if (irt_isnum(t)) { | ||
665 | o->u64 = ex->gpr[r-RID_MIN_GPR]; | ||
627 | #endif | 666 | #endif |
628 | } else if (LJ_64 && irt_islightud(t)) { | 667 | #if LJ_64 && !LJ_GC64 |
629 | /* 64 bit lightuserdata which may escape already has the tag bits. */ | 668 | } else if (irt_is64(t)) { |
669 | /* 64 bit values that already have the tag bits. */ | ||
630 | o->u64 = ex->gpr[r-RID_MIN_GPR]; | 670 | o->u64 = ex->gpr[r-RID_MIN_GPR]; |
671 | #endif | ||
672 | } else if (irt_ispri(t)) { | ||
673 | setpriV(o, irt_toitype(t)); | ||
631 | } else { | 674 | } else { |
632 | if (!irt_ispri(t)) | 675 | setgcV(J->L, o, (GCobj *)ex->gpr[r-RID_MIN_GPR], irt_toitype(t)); |
633 | setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]); | ||
634 | setitype(o, irt_toitype(t)); | ||
635 | } | 676 | } |
636 | } | 677 | } |
637 | } | 678 | } |
@@ -647,8 +688,8 @@ static void snap_restoredata(GCtrace *T, ExitState *ex, | |||
647 | int32_t *src; | 688 | int32_t *src; |
648 | uint64_t tmp; | 689 | uint64_t tmp; |
649 | if (irref_isk(ref)) { | 690 | if (irref_isk(ref)) { |
650 | if (ir->o == IR_KNUM || ir->o == IR_KINT64) { | 691 | if (ir_isk64(ir)) { |
651 | src = mref(ir->ptr, int32_t); | 692 | src = (int32_t *)&ir[1]; |
652 | } else if (sz == 8) { | 693 | } else if (sz == 8) { |
653 | tmp = (uint64_t)(uint32_t)ir->i; | 694 | tmp = (uint64_t)(uint32_t)ir->i; |
654 | src = (int32_t *)&tmp; | 695 | src = (int32_t *)&tmp; |
@@ -685,8 +726,9 @@ static void snap_restoredata(GCtrace *T, ExitState *ex, | |||
685 | #else | 726 | #else |
686 | if (LJ_BE && sz == 4) src++; | 727 | if (LJ_BE && sz == 4) src++; |
687 | #endif | 728 | #endif |
688 | } | 729 | } else |
689 | #endif | 730 | #endif |
731 | if (LJ_64 && LJ_BE && sz == 4) src++; | ||
690 | } | 732 | } |
691 | } | 733 | } |
692 | lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8); | 734 | lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8); |
@@ -708,8 +750,9 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, | |||
708 | if (ir->o == IR_CNEW || ir->o == IR_CNEWI) { | 750 | if (ir->o == IR_CNEW || ir->o == IR_CNEWI) { |
709 | CTState *cts = ctype_cts(J->L); | 751 | CTState *cts = ctype_cts(J->L); |
710 | CTypeID id = (CTypeID)T->ir[ir->op1].i; | 752 | CTypeID id = (CTypeID)T->ir[ir->op1].i; |
711 | CTSize sz = lj_ctype_size(cts, id); | 753 | CTSize sz; |
712 | GCcdata *cd = lj_cdata_new(cts, id, sz); | 754 | CTInfo info = lj_ctype_info(cts, id, &sz); |
755 | GCcdata *cd = lj_cdata_newx(cts, id, sz, info); | ||
713 | setcdataV(J->L, o, cd); | 756 | setcdataV(J->L, o, cd); |
714 | if (ir->o == IR_CNEWI) { | 757 | if (ir->o == IR_CNEWI) { |
715 | uint8_t *p = (uint8_t *)cdataptr(cd); | 758 | uint8_t *p = (uint8_t *)cdataptr(cd); |
@@ -773,7 +816,7 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, | |||
773 | val = lj_tab_set(J->L, t, &tmp); | 816 | val = lj_tab_set(J->L, t, &tmp); |
774 | /* NOBARRIER: The table is new (marked white). */ | 817 | /* NOBARRIER: The table is new (marked white). */ |
775 | snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val); | 818 | snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val); |
776 | if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { | 819 | if (LJ_SOFTFP32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { |
777 | snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp); | 820 | snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp); |
778 | val->u32.hi = tmp.u32.lo; | 821 | val->u32.hi = tmp.u32.lo; |
779 | } | 822 | } |
@@ -791,11 +834,15 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) | |||
791 | SnapShot *snap = &T->snap[snapno]; | 834 | SnapShot *snap = &T->snap[snapno]; |
792 | MSize n, nent = snap->nent; | 835 | MSize n, nent = snap->nent; |
793 | SnapEntry *map = &T->snapmap[snap->mapofs]; | 836 | SnapEntry *map = &T->snapmap[snap->mapofs]; |
794 | SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1]; | 837 | #if !LJ_FR2 || defined(LUA_USE_ASSERT) |
795 | int32_t ftsz0; | 838 | SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1-LJ_FR2]; |
839 | #endif | ||
840 | #if !LJ_FR2 | ||
841 | ptrdiff_t ftsz0; | ||
842 | #endif | ||
796 | TValue *frame; | 843 | TValue *frame; |
797 | BloomFilter rfilt = snap_renamefilter(T, snapno); | 844 | BloomFilter rfilt = snap_renamefilter(T, snapno); |
798 | const BCIns *pc = snap_pc(map[nent]); | 845 | const BCIns *pc = snap_pc(&map[nent]); |
799 | lua_State *L = J->L; | 846 | lua_State *L = J->L; |
800 | 847 | ||
801 | /* Set interpreter PC to the next PC to get correct error messages. */ | 848 | /* Set interpreter PC to the next PC to get correct error messages. */ |
@@ -808,8 +855,10 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) | |||
808 | } | 855 | } |
809 | 856 | ||
810 | /* Fill stack slots with data from the registers and spill slots. */ | 857 | /* Fill stack slots with data from the registers and spill slots. */ |
811 | frame = L->base-1; | 858 | frame = L->base-1-LJ_FR2; |
859 | #if !LJ_FR2 | ||
812 | ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */ | 860 | ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */ |
861 | #endif | ||
813 | for (n = 0; n < nent; n++) { | 862 | for (n = 0; n < nent; n++) { |
814 | SnapEntry sn = map[n]; | 863 | SnapEntry sn = map[n]; |
815 | if (!(sn & SNAP_NORESTORE)) { | 864 | if (!(sn & SNAP_NORESTORE)) { |
@@ -828,17 +877,22 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) | |||
828 | continue; | 877 | continue; |
829 | } | 878 | } |
830 | snap_restoreval(J, T, ex, snapno, rfilt, ref, o); | 879 | snap_restoreval(J, T, ex, snapno, rfilt, ref, o); |
831 | if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { | 880 | if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { |
832 | TValue tmp; | 881 | TValue tmp; |
833 | snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp); | 882 | snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp); |
834 | o->u32.hi = tmp.u32.lo; | 883 | o->u32.hi = tmp.u32.lo; |
884 | #if !LJ_FR2 | ||
835 | } else if ((sn & (SNAP_CONT|SNAP_FRAME))) { | 885 | } else if ((sn & (SNAP_CONT|SNAP_FRAME))) { |
836 | /* Overwrite tag with frame link. */ | 886 | /* Overwrite tag with frame link. */ |
837 | o->fr.tp.ftsz = snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0; | 887 | setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0); |
838 | L->base = o+1; | 888 | L->base = o+1; |
889 | #endif | ||
839 | } | 890 | } |
840 | } | 891 | } |
841 | } | 892 | } |
893 | #if LJ_FR2 | ||
894 | L->base += (map[nent+LJ_BE] & 0xff); | ||
895 | #endif | ||
842 | lua_assert(map + nent == flinks); | 896 | lua_assert(map + nent == flinks); |
843 | 897 | ||
844 | /* Compute current stack top. */ | 898 | /* Compute current stack top. */ |
diff --git a/src/lj_state.c b/src/lj_state.c index ab064266..dc82e260 100644 --- a/src/lj_state.c +++ b/src/lj_state.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include "lj_obj.h" | 12 | #include "lj_obj.h" |
13 | #include "lj_gc.h" | 13 | #include "lj_gc.h" |
14 | #include "lj_err.h" | 14 | #include "lj_err.h" |
15 | #include "lj_buf.h" | ||
15 | #include "lj_str.h" | 16 | #include "lj_str.h" |
16 | #include "lj_tab.h" | 17 | #include "lj_tab.h" |
17 | #include "lj_func.h" | 18 | #include "lj_func.h" |
@@ -26,6 +27,7 @@ | |||
26 | #include "lj_vm.h" | 27 | #include "lj_vm.h" |
27 | #include "lj_lex.h" | 28 | #include "lj_lex.h" |
28 | #include "lj_alloc.h" | 29 | #include "lj_alloc.h" |
30 | #include "luajit.h" | ||
29 | 31 | ||
30 | /* -- Stack handling ------------------------------------------------------ */ | 32 | /* -- Stack handling ------------------------------------------------------ */ |
31 | 33 | ||
@@ -47,6 +49,7 @@ | |||
47 | ** one extra slot if mobj is not a function. Only lj_meta_tset needs 5 | 49 | ** one extra slot if mobj is not a function. Only lj_meta_tset needs 5 |
48 | ** slots above top, but then mobj is always a function. So we can get by | 50 | ** slots above top, but then mobj is always a function. So we can get by |
49 | ** with 5 extra slots. | 51 | ** with 5 extra slots. |
52 | ** LJ_FR2: We need 2 more slots for the frame PC and the continuation PC. | ||
50 | */ | 53 | */ |
51 | 54 | ||
52 | /* Resize stack slots and adjust pointers in state. */ | 55 | /* Resize stack slots and adjust pointers in state. */ |
@@ -59,7 +62,7 @@ static void resizestack(lua_State *L, MSize n) | |||
59 | GCobj *up; | 62 | GCobj *up; |
60 | lua_assert((MSize)(tvref(L->maxstack)-oldst)==L->stacksize-LJ_STACK_EXTRA-1); | 63 | lua_assert((MSize)(tvref(L->maxstack)-oldst)==L->stacksize-LJ_STACK_EXTRA-1); |
61 | st = (TValue *)lj_mem_realloc(L, tvref(L->stack), | 64 | st = (TValue *)lj_mem_realloc(L, tvref(L->stack), |
62 | (MSize)(L->stacksize*sizeof(TValue)), | 65 | (MSize)(oldsize*sizeof(TValue)), |
63 | (MSize)(realsize*sizeof(TValue))); | 66 | (MSize)(realsize*sizeof(TValue))); |
64 | setmref(L->stack, st); | 67 | setmref(L->stack, st); |
65 | delta = (char *)st - (char *)oldst; | 68 | delta = (char *)st - (char *)oldst; |
@@ -67,12 +70,12 @@ static void resizestack(lua_State *L, MSize n) | |||
67 | while (oldsize < realsize) /* Clear new slots. */ | 70 | while (oldsize < realsize) /* Clear new slots. */ |
68 | setnilV(st + oldsize++); | 71 | setnilV(st + oldsize++); |
69 | L->stacksize = realsize; | 72 | L->stacksize = realsize; |
73 | if ((size_t)(mref(G(L)->jit_base, char) - (char *)oldst) < oldsize) | ||
74 | setmref(G(L)->jit_base, mref(G(L)->jit_base, char) + delta); | ||
70 | L->base = (TValue *)((char *)L->base + delta); | 75 | L->base = (TValue *)((char *)L->base + delta); |
71 | L->top = (TValue *)((char *)L->top + delta); | 76 | L->top = (TValue *)((char *)L->top + delta); |
72 | for (up = gcref(L->openupval); up != NULL; up = gcnext(up)) | 77 | for (up = gcref(L->openupval); up != NULL; up = gcnext(up)) |
73 | setmref(gco2uv(up)->v, (TValue *)((char *)uvval(gco2uv(up)) + delta)); | 78 | setmref(gco2uv(up)->v, (TValue *)((char *)uvval(gco2uv(up)) + delta)); |
74 | if (obj2gco(L) == gcref(G(L)->jit_L)) | ||
75 | setmref(G(L)->jit_base, mref(G(L)->jit_base, char) + delta); | ||
76 | } | 79 | } |
77 | 80 | ||
78 | /* Relimit stack after error, in case the limit was overdrawn. */ | 81 | /* Relimit stack after error, in case the limit was overdrawn. */ |
@@ -89,7 +92,8 @@ void lj_state_shrinkstack(lua_State *L, MSize used) | |||
89 | return; /* Avoid stack shrinking while handling stack overflow. */ | 92 | return; /* Avoid stack shrinking while handling stack overflow. */ |
90 | if (4*used < L->stacksize && | 93 | if (4*used < L->stacksize && |
91 | 2*(LJ_STACK_START+LJ_STACK_EXTRA) < L->stacksize && | 94 | 2*(LJ_STACK_START+LJ_STACK_EXTRA) < L->stacksize && |
92 | obj2gco(L) != gcref(G(L)->jit_L)) /* Don't shrink stack of live trace. */ | 95 | /* Don't shrink stack of live trace. */ |
96 | (tvref(G(L)->jit_base) == NULL || obj2gco(L) != gcref(G(L)->cur_L))) | ||
93 | resizestack(L, L->stacksize >> 1); | 97 | resizestack(L, L->stacksize >> 1); |
94 | } | 98 | } |
95 | 99 | ||
@@ -125,8 +129,9 @@ static void stack_init(lua_State *L1, lua_State *L) | |||
125 | L1->stacksize = LJ_STACK_START + LJ_STACK_EXTRA; | 129 | L1->stacksize = LJ_STACK_START + LJ_STACK_EXTRA; |
126 | stend = st + L1->stacksize; | 130 | stend = st + L1->stacksize; |
127 | setmref(L1->maxstack, stend - LJ_STACK_EXTRA - 1); | 131 | setmref(L1->maxstack, stend - LJ_STACK_EXTRA - 1); |
128 | L1->base = L1->top = st+1; | 132 | setthreadV(L1, st++, L1); /* Needed for curr_funcisL() on empty stack. */ |
129 | setthreadV(L1, st, L1); /* Needed for curr_funcisL() on empty stack. */ | 133 | if (LJ_FR2) setnilV(st++); |
134 | L1->base = L1->top = st; | ||
130 | while (st < stend) /* Clear new slots. */ | 135 | while (st < stend) /* Clear new slots. */ |
131 | setnilV(st++); | 136 | setnilV(st++); |
132 | } | 137 | } |
@@ -164,7 +169,7 @@ static void close_state(lua_State *L) | |||
164 | lj_ctype_freestate(g); | 169 | lj_ctype_freestate(g); |
165 | #endif | 170 | #endif |
166 | lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef); | 171 | lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef); |
167 | lj_str_freebuf(g, &g->tmpbuf); | 172 | lj_buf_free(g, &g->tmpbuf); |
168 | lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); | 173 | lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); |
169 | lua_assert(g->gc.total == sizeof(GG_State)); | 174 | lua_assert(g->gc.total == sizeof(GG_State)); |
170 | #ifndef LUAJIT_USE_SYSMALLOC | 175 | #ifndef LUAJIT_USE_SYSMALLOC |
@@ -175,7 +180,7 @@ static void close_state(lua_State *L) | |||
175 | g->allocf(g->allocd, G2GG(g), sizeof(GG_State), 0); | 180 | g->allocf(g->allocd, G2GG(g), sizeof(GG_State), 0); |
176 | } | 181 | } |
177 | 182 | ||
178 | #if LJ_64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC)) | 183 | #if LJ_64 && !LJ_GC64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC)) |
179 | lua_State *lj_state_newstate(lua_Alloc f, void *ud) | 184 | lua_State *lj_state_newstate(lua_Alloc f, void *ud) |
180 | #else | 185 | #else |
181 | LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) | 186 | LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) |
@@ -184,7 +189,7 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) | |||
184 | GG_State *GG = (GG_State *)f(ud, NULL, 0, sizeof(GG_State)); | 189 | GG_State *GG = (GG_State *)f(ud, NULL, 0, sizeof(GG_State)); |
185 | lua_State *L = &GG->L; | 190 | lua_State *L = &GG->L; |
186 | global_State *g = &GG->g; | 191 | global_State *g = &GG->g; |
187 | if (GG == NULL || !checkptr32(GG)) return NULL; | 192 | if (GG == NULL || !checkptrGC(GG)) return NULL; |
188 | memset(GG, 0, sizeof(GG_State)); | 193 | memset(GG, 0, sizeof(GG_State)); |
189 | L->gct = ~LJ_TTHREAD; | 194 | L->gct = ~LJ_TTHREAD; |
190 | L->marked = LJ_GC_WHITE0 | LJ_GC_FIXED | LJ_GC_SFIXED; /* Prevent free. */ | 195 | L->marked = LJ_GC_WHITE0 | LJ_GC_FIXED | LJ_GC_SFIXED; /* Prevent free. */ |
@@ -202,8 +207,10 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) | |||
202 | setnilV(registry(L)); | 207 | setnilV(registry(L)); |
203 | setnilV(&g->nilnode.val); | 208 | setnilV(&g->nilnode.val); |
204 | setnilV(&g->nilnode.key); | 209 | setnilV(&g->nilnode.key); |
210 | #if !LJ_GC64 | ||
205 | setmref(g->nilnode.freetop, &g->nilnode); | 211 | setmref(g->nilnode.freetop, &g->nilnode); |
206 | lj_str_initbuf(&g->tmpbuf); | 212 | #endif |
213 | lj_buf_init(NULL, &g->tmpbuf); | ||
207 | g->gc.state = GCSpause; | 214 | g->gc.state = GCSpause; |
208 | setgcref(g->gc.root, obj2gco(L)); | 215 | setgcref(g->gc.root, obj2gco(L)); |
209 | setmref(g->gc.sweep, &g->gc.root); | 216 | setmref(g->gc.sweep, &g->gc.root); |
@@ -217,7 +224,7 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) | |||
217 | close_state(L); | 224 | close_state(L); |
218 | return NULL; | 225 | return NULL; |
219 | } | 226 | } |
220 | L->status = 0; | 227 | L->status = LUA_OK; |
221 | return L; | 228 | return L; |
222 | } | 229 | } |
223 | 230 | ||
@@ -236,6 +243,10 @@ LUA_API void lua_close(lua_State *L) | |||
236 | global_State *g = G(L); | 243 | global_State *g = G(L); |
237 | int i; | 244 | int i; |
238 | L = mainthread(g); /* Only the main thread can be closed. */ | 245 | L = mainthread(g); /* Only the main thread can be closed. */ |
246 | #if LJ_HASPROFILE | ||
247 | luaJIT_profile_stop(L); | ||
248 | #endif | ||
249 | setgcrefnull(g->cur_L); | ||
239 | lj_func_closeuv(L, tvref(L->stack)); | 250 | lj_func_closeuv(L, tvref(L->stack)); |
240 | lj_gc_separateudata(g, 1); /* Separate udata which have GC metamethods. */ | 251 | lj_gc_separateudata(g, 1); /* Separate udata which have GC metamethods. */ |
241 | #if LJ_HASJIT | 252 | #if LJ_HASJIT |
@@ -245,10 +256,10 @@ LUA_API void lua_close(lua_State *L) | |||
245 | #endif | 256 | #endif |
246 | for (i = 0;;) { | 257 | for (i = 0;;) { |
247 | hook_enter(g); | 258 | hook_enter(g); |
248 | L->status = 0; | 259 | L->status = LUA_OK; |
260 | L->base = L->top = tvref(L->stack) + 1 + LJ_FR2; | ||
249 | L->cframe = NULL; | 261 | L->cframe = NULL; |
250 | L->base = L->top = tvref(L->stack) + 1; | 262 | if (lj_vm_cpcall(L, NULL, NULL, cpfinalize) == LUA_OK) { |
251 | if (lj_vm_cpcall(L, NULL, NULL, cpfinalize) == 0) { | ||
252 | if (++i >= 10) break; | 263 | if (++i >= 10) break; |
253 | lj_gc_separateudata(g, 1); /* Separate udata again. */ | 264 | lj_gc_separateudata(g, 1); /* Separate udata again. */ |
254 | if (gcref(g->gc.mmudata) == NULL) /* Until nothing is left to do. */ | 265 | if (gcref(g->gc.mmudata) == NULL) /* Until nothing is left to do. */ |
@@ -263,7 +274,7 @@ lua_State *lj_state_new(lua_State *L) | |||
263 | lua_State *L1 = lj_mem_newobj(L, lua_State); | 274 | lua_State *L1 = lj_mem_newobj(L, lua_State); |
264 | L1->gct = ~LJ_TTHREAD; | 275 | L1->gct = ~LJ_TTHREAD; |
265 | L1->dummy_ffid = FF_C; | 276 | L1->dummy_ffid = FF_C; |
266 | L1->status = 0; | 277 | L1->status = LUA_OK; |
267 | L1->stacksize = 0; | 278 | L1->stacksize = 0; |
268 | setmref(L1->stack, NULL); | 279 | setmref(L1->stack, NULL); |
269 | L1->cframe = NULL; | 280 | L1->cframe = NULL; |
@@ -279,6 +290,8 @@ lua_State *lj_state_new(lua_State *L) | |||
279 | void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L) | 290 | void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L) |
280 | { | 291 | { |
281 | lua_assert(L != mainthread(g)); | 292 | lua_assert(L != mainthread(g)); |
293 | if (obj2gco(L) == gcref(g->cur_L)) | ||
294 | setgcrefnull(g->cur_L); | ||
282 | lj_func_closeuv(L, tvref(L->stack)); | 295 | lj_func_closeuv(L, tvref(L->stack)); |
283 | lua_assert(gcref(L->openupval) == NULL); | 296 | lua_assert(gcref(L->openupval) == NULL); |
284 | lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); | 297 | lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); |
diff --git a/src/lj_state.h b/src/lj_state.h index 18afe55c..9a8c7d93 100644 --- a/src/lj_state.h +++ b/src/lj_state.h | |||
@@ -28,7 +28,7 @@ static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need) | |||
28 | 28 | ||
29 | LJ_FUNC lua_State *lj_state_new(lua_State *L); | 29 | LJ_FUNC lua_State *lj_state_new(lua_State *L); |
30 | LJ_FUNC void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L); | 30 | LJ_FUNC void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L); |
31 | #if LJ_64 | 31 | #if LJ_64 && !LJ_GC64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC)) |
32 | LJ_FUNC lua_State *lj_state_newstate(lua_Alloc f, void *ud); | 32 | LJ_FUNC lua_State *lj_state_newstate(lua_Alloc f, void *ud); |
33 | #endif | 33 | #endif |
34 | 34 | ||
diff --git a/src/lj_str.c b/src/lj_str.c index 279c5cc3..ec74afa5 100644 --- a/src/lj_str.c +++ b/src/lj_str.c | |||
@@ -1,13 +1,8 @@ | |||
1 | /* | 1 | /* |
2 | ** String handling. | 2 | ** String handling. |
3 | ** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h | 3 | ** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h |
4 | ** | ||
5 | ** Portions taken verbatim or adapted from the Lua interpreter. | ||
6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h | ||
7 | */ | 4 | */ |
8 | 5 | ||
9 | #include <stdio.h> | ||
10 | |||
11 | #define lj_str_c | 6 | #define lj_str_c |
12 | #define LUA_CORE | 7 | #define LUA_CORE |
13 | 8 | ||
@@ -15,10 +10,9 @@ | |||
15 | #include "lj_gc.h" | 10 | #include "lj_gc.h" |
16 | #include "lj_err.h" | 11 | #include "lj_err.h" |
17 | #include "lj_str.h" | 12 | #include "lj_str.h" |
18 | #include "lj_state.h" | ||
19 | #include "lj_char.h" | 13 | #include "lj_char.h" |
20 | 14 | ||
21 | /* -- String interning ---------------------------------------------------- */ | 15 | /* -- String helpers ------------------------------------------------------ */ |
22 | 16 | ||
23 | /* Ordered compare of strings. Assumes string data is 4-byte aligned. */ | 17 | /* Ordered compare of strings. Assumes string data is 4-byte aligned. */ |
24 | int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) | 18 | int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) |
@@ -64,6 +58,40 @@ static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len) | |||
64 | return 0; | 58 | return 0; |
65 | } | 59 | } |
66 | 60 | ||
61 | /* Find fixed string p inside string s. */ | ||
62 | const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen) | ||
63 | { | ||
64 | if (plen <= slen) { | ||
65 | if (plen == 0) { | ||
66 | return s; | ||
67 | } else { | ||
68 | int c = *(const uint8_t *)p++; | ||
69 | plen--; slen -= plen; | ||
70 | while (slen) { | ||
71 | const char *q = (const char *)memchr(s, c, slen); | ||
72 | if (!q) break; | ||
73 | if (memcmp(q+1, p, plen) == 0) return q; | ||
74 | q++; slen -= (MSize)(q-s); s = q; | ||
75 | } | ||
76 | } | ||
77 | } | ||
78 | return NULL; | ||
79 | } | ||
80 | |||
81 | /* Check whether a string has a pattern matching character. */ | ||
82 | int lj_str_haspattern(GCstr *s) | ||
83 | { | ||
84 | const char *p = strdata(s), *q = p + s->len; | ||
85 | while (p < q) { | ||
86 | int c = *(const uint8_t *)p++; | ||
87 | if (lj_char_ispunct(c) && strchr("^$*+?.([%-", c)) | ||
88 | return 1; /* Found a pattern matching char. */ | ||
89 | } | ||
90 | return 0; /* No pattern matching chars found. */ | ||
91 | } | ||
92 | |||
93 | /* -- String interning ---------------------------------------------------- */ | ||
94 | |||
67 | /* Resize the string hash table (grow and shrink). */ | 95 | /* Resize the string hash table (grow and shrink). */ |
68 | void lj_str_resize(lua_State *L, MSize newmask) | 96 | void lj_str_resize(lua_State *L, MSize newmask) |
69 | { | 97 | { |
@@ -167,173 +195,3 @@ void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s) | |||
167 | lj_mem_free(g, s, sizestring(s)); | 195 | lj_mem_free(g, s, sizestring(s)); |
168 | } | 196 | } |
169 | 197 | ||
170 | /* -- Type conversions ---------------------------------------------------- */ | ||
171 | |||
172 | /* Print number to buffer. Canonicalizes non-finite values. */ | ||
173 | size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o) | ||
174 | { | ||
175 | if (LJ_LIKELY((o->u32.hi << 1) < 0xffe00000)) { /* Finite? */ | ||
176 | lua_Number n = o->n; | ||
177 | #if __BIONIC__ | ||
178 | if (tvismzero(o)) { s[0] = '-'; s[1] = '0'; return 2; } | ||
179 | #endif | ||
180 | return (size_t)lua_number2str(s, n); | ||
181 | } else if (((o->u32.hi & 0x000fffff) | o->u32.lo) != 0) { | ||
182 | s[0] = 'n'; s[1] = 'a'; s[2] = 'n'; return 3; | ||
183 | } else if ((o->u32.hi & 0x80000000) == 0) { | ||
184 | s[0] = 'i'; s[1] = 'n'; s[2] = 'f'; return 3; | ||
185 | } else { | ||
186 | s[0] = '-'; s[1] = 'i'; s[2] = 'n'; s[3] = 'f'; return 4; | ||
187 | } | ||
188 | } | ||
189 | |||
190 | /* Print integer to buffer. Returns pointer to start. */ | ||
191 | char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k) | ||
192 | { | ||
193 | uint32_t u = (uint32_t)(k < 0 ? -k : k); | ||
194 | p += 1+10; | ||
195 | do { *--p = (char)('0' + u % 10); } while (u /= 10); | ||
196 | if (k < 0) *--p = '-'; | ||
197 | return p; | ||
198 | } | ||
199 | |||
200 | /* Convert number to string. */ | ||
201 | GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np) | ||
202 | { | ||
203 | char buf[LJ_STR_NUMBUF]; | ||
204 | size_t len = lj_str_bufnum(buf, (TValue *)np); | ||
205 | return lj_str_new(L, buf, len); | ||
206 | } | ||
207 | |||
208 | /* Convert integer to string. */ | ||
209 | GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k) | ||
210 | { | ||
211 | char s[1+10]; | ||
212 | char *p = lj_str_bufint(s, k); | ||
213 | return lj_str_new(L, p, (size_t)(s+sizeof(s)-p)); | ||
214 | } | ||
215 | |||
216 | GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o) | ||
217 | { | ||
218 | return tvisint(o) ? lj_str_fromint(L, intV(o)) : lj_str_fromnum(L, &o->n); | ||
219 | } | ||
220 | |||
221 | /* -- String formatting --------------------------------------------------- */ | ||
222 | |||
223 | static void addstr(lua_State *L, SBuf *sb, const char *str, MSize len) | ||
224 | { | ||
225 | char *p; | ||
226 | MSize i; | ||
227 | if (sb->n + len > sb->sz) { | ||
228 | MSize sz = sb->sz * 2; | ||
229 | while (sb->n + len > sz) sz = sz * 2; | ||
230 | lj_str_resizebuf(L, sb, sz); | ||
231 | } | ||
232 | p = sb->buf + sb->n; | ||
233 | sb->n += len; | ||
234 | for (i = 0; i < len; i++) p[i] = str[i]; | ||
235 | } | ||
236 | |||
237 | static void addchar(lua_State *L, SBuf *sb, int c) | ||
238 | { | ||
239 | if (sb->n + 1 > sb->sz) { | ||
240 | MSize sz = sb->sz * 2; | ||
241 | lj_str_resizebuf(L, sb, sz); | ||
242 | } | ||
243 | sb->buf[sb->n++] = (char)c; | ||
244 | } | ||
245 | |||
246 | /* Push formatted message as a string object to Lua stack. va_list variant. */ | ||
247 | const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp) | ||
248 | { | ||
249 | SBuf *sb = &G(L)->tmpbuf; | ||
250 | lj_str_needbuf(L, sb, (MSize)strlen(fmt)); | ||
251 | lj_str_resetbuf(sb); | ||
252 | for (;;) { | ||
253 | const char *e = strchr(fmt, '%'); | ||
254 | if (e == NULL) break; | ||
255 | addstr(L, sb, fmt, (MSize)(e-fmt)); | ||
256 | /* This function only handles %s, %c, %d, %f and %p formats. */ | ||
257 | switch (e[1]) { | ||
258 | case 's': { | ||
259 | const char *s = va_arg(argp, char *); | ||
260 | if (s == NULL) s = "(null)"; | ||
261 | addstr(L, sb, s, (MSize)strlen(s)); | ||
262 | break; | ||
263 | } | ||
264 | case 'c': | ||
265 | addchar(L, sb, va_arg(argp, int)); | ||
266 | break; | ||
267 | case 'd': { | ||
268 | char buf[LJ_STR_INTBUF]; | ||
269 | char *p = lj_str_bufint(buf, va_arg(argp, int32_t)); | ||
270 | addstr(L, sb, p, (MSize)(buf+LJ_STR_INTBUF-p)); | ||
271 | break; | ||
272 | } | ||
273 | case 'f': { | ||
274 | char buf[LJ_STR_NUMBUF]; | ||
275 | TValue tv; | ||
276 | MSize len; | ||
277 | tv.n = (lua_Number)(va_arg(argp, LUAI_UACNUMBER)); | ||
278 | len = (MSize)lj_str_bufnum(buf, &tv); | ||
279 | addstr(L, sb, buf, len); | ||
280 | break; | ||
281 | } | ||
282 | case 'p': { | ||
283 | #define FMTP_CHARS (2*sizeof(ptrdiff_t)) | ||
284 | char buf[2+FMTP_CHARS]; | ||
285 | ptrdiff_t p = (ptrdiff_t)(va_arg(argp, void *)); | ||
286 | ptrdiff_t i, lasti = 2+FMTP_CHARS; | ||
287 | if (p == 0) { | ||
288 | addstr(L, sb, "NULL", 4); | ||
289 | break; | ||
290 | } | ||
291 | #if LJ_64 | ||
292 | /* Shorten output for 64 bit pointers. */ | ||
293 | lasti = 2+2*4+((p >> 32) ? 2+2*(lj_fls((uint32_t)(p >> 32))>>3) : 0); | ||
294 | #endif | ||
295 | buf[0] = '0'; | ||
296 | buf[1] = 'x'; | ||
297 | for (i = lasti-1; i >= 2; i--, p >>= 4) | ||
298 | buf[i] = "0123456789abcdef"[(p & 15)]; | ||
299 | addstr(L, sb, buf, (MSize)lasti); | ||
300 | break; | ||
301 | } | ||
302 | case '%': | ||
303 | addchar(L, sb, '%'); | ||
304 | break; | ||
305 | default: | ||
306 | addchar(L, sb, '%'); | ||
307 | addchar(L, sb, e[1]); | ||
308 | break; | ||
309 | } | ||
310 | fmt = e+2; | ||
311 | } | ||
312 | addstr(L, sb, fmt, (MSize)strlen(fmt)); | ||
313 | setstrV(L, L->top, lj_str_new(L, sb->buf, sb->n)); | ||
314 | incr_top(L); | ||
315 | return strVdata(L->top - 1); | ||
316 | } | ||
317 | |||
318 | /* Push formatted message as a string object to Lua stack. Vararg variant. */ | ||
319 | const char *lj_str_pushf(lua_State *L, const char *fmt, ...) | ||
320 | { | ||
321 | const char *msg; | ||
322 | va_list argp; | ||
323 | va_start(argp, fmt); | ||
324 | msg = lj_str_pushvf(L, fmt, argp); | ||
325 | va_end(argp); | ||
326 | return msg; | ||
327 | } | ||
328 | |||
329 | /* -- Buffer handling ----------------------------------------------------- */ | ||
330 | |||
331 | char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz) | ||
332 | { | ||
333 | if (sz > sb->sz) { | ||
334 | if (sz < LJ_MIN_SBUF) sz = LJ_MIN_SBUF; | ||
335 | lj_str_resizebuf(L, sb, sz); | ||
336 | } | ||
337 | return sb->buf; | ||
338 | } | ||
339 | |||
diff --git a/src/lj_str.h b/src/lj_str.h index e7687cb1..2e9bfc1d 100644 --- a/src/lj_str.h +++ b/src/lj_str.h | |||
@@ -10,8 +10,13 @@ | |||
10 | 10 | ||
11 | #include "lj_obj.h" | 11 | #include "lj_obj.h" |
12 | 12 | ||
13 | /* String interning. */ | 13 | /* String helpers. */ |
14 | LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b); | 14 | LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b); |
15 | LJ_FUNC const char *lj_str_find(const char *s, const char *f, | ||
16 | MSize slen, MSize flen); | ||
17 | LJ_FUNC int lj_str_haspattern(GCstr *s); | ||
18 | |||
19 | /* String interning. */ | ||
15 | LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask); | 20 | LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask); |
16 | LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len); | 21 | LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len); |
17 | LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); | 22 | LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); |
@@ -19,32 +24,4 @@ LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); | |||
19 | #define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s))) | 24 | #define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s))) |
20 | #define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1)) | 25 | #define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1)) |
21 | 26 | ||
22 | /* Type conversions. */ | ||
23 | LJ_FUNC size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o); | ||
24 | LJ_FUNC char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k); | ||
25 | LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np); | ||
26 | LJ_FUNC GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k); | ||
27 | LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o); | ||
28 | |||
29 | #define LJ_STR_INTBUF (1+10) | ||
30 | #define LJ_STR_NUMBUF LUAI_MAXNUMBER2STR | ||
31 | |||
32 | /* String formatting. */ | ||
33 | LJ_FUNC const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp); | ||
34 | LJ_FUNC const char *lj_str_pushf(lua_State *L, const char *fmt, ...) | ||
35 | #if defined(__GNUC__) | ||
36 | __attribute__ ((format (printf, 2, 3))) | ||
37 | #endif | ||
38 | ; | ||
39 | |||
40 | /* Resizable string buffers. Struct definition in lj_obj.h. */ | ||
41 | LJ_FUNC char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz); | ||
42 | |||
43 | #define lj_str_initbuf(sb) ((sb)->buf = NULL, (sb)->sz = 0) | ||
44 | #define lj_str_resetbuf(sb) ((sb)->n = 0) | ||
45 | #define lj_str_resizebuf(L, sb, size) \ | ||
46 | ((sb)->buf = (char *)lj_mem_realloc(L, (sb)->buf, (sb)->sz, (size)), \ | ||
47 | (sb)->sz = (size)) | ||
48 | #define lj_str_freebuf(g, sb) lj_mem_free(g, (void *)(sb)->buf, (sb)->sz) | ||
49 | |||
50 | #endif | 27 | #endif |
diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c new file mode 100644 index 00000000..8f968d32 --- /dev/null +++ b/src/lj_strfmt.c | |||
@@ -0,0 +1,472 @@ | |||
1 | /* | ||
2 | ** String formatting. | ||
3 | ** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #include <stdio.h> | ||
7 | |||
8 | #define lj_strfmt_c | ||
9 | #define LUA_CORE | ||
10 | |||
11 | #include "lj_obj.h" | ||
12 | #include "lj_buf.h" | ||
13 | #include "lj_str.h" | ||
14 | #include "lj_state.h" | ||
15 | #include "lj_char.h" | ||
16 | #include "lj_strfmt.h" | ||
17 | |||
18 | /* -- Format parser ------------------------------------------------------- */ | ||
19 | |||
20 | static const uint8_t strfmt_map[('x'-'A')+1] = { | ||
21 | STRFMT_A,0,0,0,STRFMT_E,STRFMT_F,STRFMT_G,0,0,0,0,0,0, | ||
22 | 0,0,0,0,0,0,0,0,0,0,STRFMT_X,0,0, | ||
23 | 0,0,0,0,0,0, | ||
24 | STRFMT_A,0,STRFMT_C,STRFMT_D,STRFMT_E,STRFMT_F,STRFMT_G,0,STRFMT_I,0,0,0,0, | ||
25 | 0,STRFMT_O,STRFMT_P,STRFMT_Q,0,STRFMT_S,0,STRFMT_U,0,0,STRFMT_X | ||
26 | }; | ||
27 | |||
28 | SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs) | ||
29 | { | ||
30 | const uint8_t *p = fs->p, *e = fs->e; | ||
31 | fs->str = (const char *)p; | ||
32 | for (; p < e; p++) { | ||
33 | if (*p == '%') { /* Escape char? */ | ||
34 | if (p[1] == '%') { /* '%%'? */ | ||
35 | fs->p = ++p+1; | ||
36 | goto retlit; | ||
37 | } else { | ||
38 | SFormat sf = 0; | ||
39 | uint32_t c; | ||
40 | if (p != (const uint8_t *)fs->str) | ||
41 | break; | ||
42 | for (p++; (uint32_t)*p - ' ' <= (uint32_t)('0' - ' '); p++) { | ||
43 | /* Parse flags. */ | ||
44 | if (*p == '-') sf |= STRFMT_F_LEFT; | ||
45 | else if (*p == '+') sf |= STRFMT_F_PLUS; | ||
46 | else if (*p == '0') sf |= STRFMT_F_ZERO; | ||
47 | else if (*p == ' ') sf |= STRFMT_F_SPACE; | ||
48 | else if (*p == '#') sf |= STRFMT_F_ALT; | ||
49 | else break; | ||
50 | } | ||
51 | if ((uint32_t)*p - '0' < 10) { /* Parse width. */ | ||
52 | uint32_t width = (uint32_t)*p++ - '0'; | ||
53 | if ((uint32_t)*p - '0' < 10) | ||
54 | width = (uint32_t)*p++ - '0' + width*10; | ||
55 | sf |= (width << STRFMT_SH_WIDTH); | ||
56 | } | ||
57 | if (*p == '.') { /* Parse precision. */ | ||
58 | uint32_t prec = 0; | ||
59 | p++; | ||
60 | if ((uint32_t)*p - '0' < 10) { | ||
61 | prec = (uint32_t)*p++ - '0'; | ||
62 | if ((uint32_t)*p - '0' < 10) | ||
63 | prec = (uint32_t)*p++ - '0' + prec*10; | ||
64 | } | ||
65 | sf |= ((prec+1) << STRFMT_SH_PREC); | ||
66 | } | ||
67 | /* Parse conversion. */ | ||
68 | c = (uint32_t)*p - 'A'; | ||
69 | if (LJ_LIKELY(c <= (uint32_t)('x' - 'A'))) { | ||
70 | uint32_t sx = strfmt_map[c]; | ||
71 | if (sx) { | ||
72 | fs->p = p+1; | ||
73 | return (sf | sx | ((c & 0x20) ? 0 : STRFMT_F_UPPER)); | ||
74 | } | ||
75 | } | ||
76 | /* Return error location. */ | ||
77 | if (*p >= 32) p++; | ||
78 | fs->len = (MSize)(p - (const uint8_t *)fs->str); | ||
79 | fs->p = fs->e; | ||
80 | return STRFMT_ERR; | ||
81 | } | ||
82 | } | ||
83 | } | ||
84 | fs->p = p; | ||
85 | retlit: | ||
86 | fs->len = (MSize)(p - (const uint8_t *)fs->str); | ||
87 | return fs->len ? STRFMT_LIT : STRFMT_EOF; | ||
88 | } | ||
89 | |||
90 | /* -- Raw conversions ----------------------------------------------------- */ | ||
91 | |||
92 | #define WINT_R(x, sh, sc) \ | ||
93 | { uint32_t d = (x*(((1<<sh)+sc-1)/sc))>>sh; x -= d*sc; *p++ = (char)('0'+d); } | ||
94 | |||
95 | /* Write integer to buffer. */ | ||
96 | char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k) | ||
97 | { | ||
98 | uint32_t u = (uint32_t)k; | ||
99 | if (k < 0) { u = (uint32_t)-k; *p++ = '-'; } | ||
100 | if (u < 10000) { | ||
101 | if (u < 10) goto dig1; | ||
102 | if (u < 100) goto dig2; | ||
103 | if (u < 1000) goto dig3; | ||
104 | } else { | ||
105 | uint32_t v = u / 10000; u -= v * 10000; | ||
106 | if (v < 10000) { | ||
107 | if (v < 10) goto dig5; | ||
108 | if (v < 100) goto dig6; | ||
109 | if (v < 1000) goto dig7; | ||
110 | } else { | ||
111 | uint32_t w = v / 10000; v -= w * 10000; | ||
112 | if (w >= 10) WINT_R(w, 10, 10) | ||
113 | *p++ = (char)('0'+w); | ||
114 | } | ||
115 | WINT_R(v, 23, 1000) | ||
116 | dig7: WINT_R(v, 12, 100) | ||
117 | dig6: WINT_R(v, 10, 10) | ||
118 | dig5: *p++ = (char)('0'+v); | ||
119 | } | ||
120 | WINT_R(u, 23, 1000) | ||
121 | dig3: WINT_R(u, 12, 100) | ||
122 | dig2: WINT_R(u, 10, 10) | ||
123 | dig1: *p++ = (char)('0'+u); | ||
124 | return p; | ||
125 | } | ||
126 | #undef WINT_R | ||
127 | |||
128 | /* Write pointer to buffer. */ | ||
129 | char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v) | ||
130 | { | ||
131 | ptrdiff_t x = (ptrdiff_t)v; | ||
132 | MSize i, n = STRFMT_MAXBUF_PTR; | ||
133 | if (x == 0) { | ||
134 | *p++ = 'N'; *p++ = 'U'; *p++ = 'L'; *p++ = 'L'; | ||
135 | return p; | ||
136 | } | ||
137 | #if LJ_64 | ||
138 | /* Shorten output for 64 bit pointers. */ | ||
139 | n = 2+2*4+((x >> 32) ? 2+2*(lj_fls((uint32_t)(x >> 32))>>3) : 0); | ||
140 | #endif | ||
141 | p[0] = '0'; | ||
142 | p[1] = 'x'; | ||
143 | for (i = n-1; i >= 2; i--, x >>= 4) | ||
144 | p[i] = "0123456789abcdef"[(x & 15)]; | ||
145 | return p+n; | ||
146 | } | ||
147 | |||
148 | /* Write ULEB128 to buffer. */ | ||
149 | char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v) | ||
150 | { | ||
151 | for (; v >= 0x80; v >>= 7) | ||
152 | *p++ = (char)((v & 0x7f) | 0x80); | ||
153 | *p++ = (char)v; | ||
154 | return p; | ||
155 | } | ||
156 | |||
157 | /* Return string or write number to tmp buffer and return pointer to start. */ | ||
158 | const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp) | ||
159 | { | ||
160 | SBuf *sb; | ||
161 | if (tvisstr(o)) { | ||
162 | *lenp = strV(o)->len; | ||
163 | return strVdata(o); | ||
164 | } else if (tvisint(o)) { | ||
165 | sb = lj_strfmt_putint(lj_buf_tmp_(L), intV(o)); | ||
166 | } else if (tvisnum(o)) { | ||
167 | sb = lj_strfmt_putfnum(lj_buf_tmp_(L), STRFMT_G14, o->n); | ||
168 | } else { | ||
169 | return NULL; | ||
170 | } | ||
171 | *lenp = sbuflen(sb); | ||
172 | return sbufB(sb); | ||
173 | } | ||
174 | |||
175 | /* -- Unformatted conversions to buffer ----------------------------------- */ | ||
176 | |||
177 | /* Add integer to buffer. */ | ||
178 | SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k) | ||
179 | { | ||
180 | setsbufP(sb, lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT), k)); | ||
181 | return sb; | ||
182 | } | ||
183 | |||
184 | #if LJ_HASJIT | ||
185 | /* Add number to buffer. */ | ||
186 | SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o) | ||
187 | { | ||
188 | return lj_strfmt_putfnum(sb, STRFMT_G14, o->n); | ||
189 | } | ||
190 | #endif | ||
191 | |||
192 | SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v) | ||
193 | { | ||
194 | setsbufP(sb, lj_strfmt_wptr(lj_buf_more(sb, STRFMT_MAXBUF_PTR), v)); | ||
195 | return sb; | ||
196 | } | ||
197 | |||
198 | /* Add quoted string to buffer. */ | ||
199 | SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str) | ||
200 | { | ||
201 | const char *s = strdata(str); | ||
202 | MSize len = str->len; | ||
203 | lj_buf_putb(sb, '"'); | ||
204 | while (len--) { | ||
205 | uint32_t c = (uint32_t)(uint8_t)*s++; | ||
206 | char *p = lj_buf_more(sb, 4); | ||
207 | if (c == '"' || c == '\\' || c == '\n') { | ||
208 | *p++ = '\\'; | ||
209 | } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */ | ||
210 | uint32_t d; | ||
211 | *p++ = '\\'; | ||
212 | if (c >= 100 || lj_char_isdigit((uint8_t)*s)) { | ||
213 | *p++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100; | ||
214 | goto tens; | ||
215 | } else if (c >= 10) { | ||
216 | tens: | ||
217 | d = (c * 205) >> 11; c -= d * 10; *p++ = (char)('0'+d); | ||
218 | } | ||
219 | c += '0'; | ||
220 | } | ||
221 | *p++ = (char)c; | ||
222 | setsbufP(sb, p); | ||
223 | } | ||
224 | lj_buf_putb(sb, '"'); | ||
225 | return sb; | ||
226 | } | ||
227 | |||
228 | /* -- Formatted conversions to buffer ------------------------------------- */ | ||
229 | |||
230 | /* Add formatted char to buffer. */ | ||
231 | SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat sf, int32_t c) | ||
232 | { | ||
233 | MSize width = STRFMT_WIDTH(sf); | ||
234 | char *p = lj_buf_more(sb, width > 1 ? width : 1); | ||
235 | if ((sf & STRFMT_F_LEFT)) *p++ = (char)c; | ||
236 | while (width-- > 1) *p++ = ' '; | ||
237 | if (!(sf & STRFMT_F_LEFT)) *p++ = (char)c; | ||
238 | setsbufP(sb, p); | ||
239 | return sb; | ||
240 | } | ||
241 | |||
242 | /* Add formatted string to buffer. */ | ||
243 | SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat sf, GCstr *str) | ||
244 | { | ||
245 | MSize len = str->len <= STRFMT_PREC(sf) ? str->len : STRFMT_PREC(sf); | ||
246 | MSize width = STRFMT_WIDTH(sf); | ||
247 | char *p = lj_buf_more(sb, width > len ? width : len); | ||
248 | if ((sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len); | ||
249 | while (width-- > len) *p++ = ' '; | ||
250 | if (!(sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len); | ||
251 | setsbufP(sb, p); | ||
252 | return sb; | ||
253 | } | ||
254 | |||
255 | /* Add formatted signed/unsigned integer to buffer. */ | ||
256 | SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k) | ||
257 | { | ||
258 | char buf[STRFMT_MAXBUF_XINT], *q = buf + sizeof(buf), *p; | ||
259 | #ifdef LUA_USE_ASSERT | ||
260 | char *ps; | ||
261 | #endif | ||
262 | MSize prefix = 0, len, prec, pprec, width, need; | ||
263 | |||
264 | /* Figure out signed prefixes. */ | ||
265 | if (STRFMT_TYPE(sf) == STRFMT_INT) { | ||
266 | if ((int64_t)k < 0) { | ||
267 | k = (uint64_t)-(int64_t)k; | ||
268 | prefix = 256 + '-'; | ||
269 | } else if ((sf & STRFMT_F_PLUS)) { | ||
270 | prefix = 256 + '+'; | ||
271 | } else if ((sf & STRFMT_F_SPACE)) { | ||
272 | prefix = 256 + ' '; | ||
273 | } | ||
274 | } | ||
275 | |||
276 | /* Convert number and store to fixed-size buffer in reverse order. */ | ||
277 | prec = STRFMT_PREC(sf); | ||
278 | if ((int32_t)prec >= 0) sf &= ~STRFMT_F_ZERO; | ||
279 | if (k == 0) { /* Special-case zero argument. */ | ||
280 | if (prec != 0 || | ||
281 | (sf & (STRFMT_T_OCT|STRFMT_F_ALT)) == (STRFMT_T_OCT|STRFMT_F_ALT)) | ||
282 | *--q = '0'; | ||
283 | } else if (!(sf & (STRFMT_T_HEX|STRFMT_T_OCT))) { /* Decimal. */ | ||
284 | uint32_t k2; | ||
285 | while ((k >> 32)) { *--q = (char)('0' + k % 10); k /= 10; } | ||
286 | k2 = (uint32_t)k; | ||
287 | do { *--q = (char)('0' + k2 % 10); k2 /= 10; } while (k2); | ||
288 | } else if ((sf & STRFMT_T_HEX)) { /* Hex. */ | ||
289 | const char *hexdig = (sf & STRFMT_F_UPPER) ? "0123456789ABCDEF" : | ||
290 | "0123456789abcdef"; | ||
291 | do { *--q = hexdig[(k & 15)]; k >>= 4; } while (k); | ||
292 | if ((sf & STRFMT_F_ALT)) prefix = 512 + ((sf & STRFMT_F_UPPER) ? 'X' : 'x'); | ||
293 | } else { /* Octal. */ | ||
294 | do { *--q = (char)('0' + (uint32_t)(k & 7)); k >>= 3; } while (k); | ||
295 | if ((sf & STRFMT_F_ALT)) *--q = '0'; | ||
296 | } | ||
297 | |||
298 | /* Calculate sizes. */ | ||
299 | len = (MSize)(buf + sizeof(buf) - q); | ||
300 | if ((int32_t)len >= (int32_t)prec) prec = len; | ||
301 | width = STRFMT_WIDTH(sf); | ||
302 | pprec = prec + (prefix >> 8); | ||
303 | need = width > pprec ? width : pprec; | ||
304 | p = lj_buf_more(sb, need); | ||
305 | #ifdef LUA_USE_ASSERT | ||
306 | ps = p; | ||
307 | #endif | ||
308 | |||
309 | /* Format number with leading/trailing whitespace and zeros. */ | ||
310 | if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == 0) | ||
311 | while (width-- > pprec) *p++ = ' '; | ||
312 | if (prefix) { | ||
313 | if ((char)prefix >= 'X') *p++ = '0'; | ||
314 | *p++ = (char)prefix; | ||
315 | } | ||
316 | if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == STRFMT_F_ZERO) | ||
317 | while (width-- > pprec) *p++ = '0'; | ||
318 | while (prec-- > len) *p++ = '0'; | ||
319 | while (q < buf + sizeof(buf)) *p++ = *q++; /* Add number itself. */ | ||
320 | if ((sf & STRFMT_F_LEFT)) | ||
321 | while (width-- > pprec) *p++ = ' '; | ||
322 | |||
323 | lua_assert(need == (MSize)(p - ps)); | ||
324 | setsbufP(sb, p); | ||
325 | return sb; | ||
326 | } | ||
327 | |||
328 | /* Add number formatted as signed integer to buffer. */ | ||
329 | SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n) | ||
330 | { | ||
331 | int64_t k = (int64_t)n; | ||
332 | if (checki32(k) && sf == STRFMT_INT) | ||
333 | return lj_strfmt_putint(sb, (int32_t)k); /* Shortcut for plain %d. */ | ||
334 | else | ||
335 | return lj_strfmt_putfxint(sb, sf, (uint64_t)k); | ||
336 | } | ||
337 | |||
338 | /* Add number formatted as unsigned integer to buffer. */ | ||
339 | SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n) | ||
340 | { | ||
341 | int64_t k; | ||
342 | if (n >= 9223372036854775808.0) | ||
343 | k = (int64_t)(n - 18446744073709551616.0); | ||
344 | else | ||
345 | k = (int64_t)n; | ||
346 | return lj_strfmt_putfxint(sb, sf, (uint64_t)k); | ||
347 | } | ||
348 | |||
349 | /* -- Conversions to strings ---------------------------------------------- */ | ||
350 | |||
351 | /* Convert integer to string. */ | ||
352 | GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k) | ||
353 | { | ||
354 | char buf[STRFMT_MAXBUF_INT]; | ||
355 | MSize len = (MSize)(lj_strfmt_wint(buf, k) - buf); | ||
356 | return lj_str_new(L, buf, len); | ||
357 | } | ||
358 | |||
359 | /* Convert integer or number to string. */ | ||
360 | GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o) | ||
361 | { | ||
362 | return tvisint(o) ? lj_strfmt_int(L, intV(o)) : lj_strfmt_num(L, o); | ||
363 | } | ||
364 | |||
365 | #if LJ_HASJIT | ||
366 | /* Convert char value to string. */ | ||
367 | GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c) | ||
368 | { | ||
369 | char buf[1]; | ||
370 | buf[0] = c; | ||
371 | return lj_str_new(L, buf, 1); | ||
372 | } | ||
373 | #endif | ||
374 | |||
375 | /* Raw conversion of object to string. */ | ||
376 | GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o) | ||
377 | { | ||
378 | if (tvisstr(o)) { | ||
379 | return strV(o); | ||
380 | } else if (tvisnumber(o)) { | ||
381 | return lj_strfmt_number(L, o); | ||
382 | } else if (tvisnil(o)) { | ||
383 | return lj_str_newlit(L, "nil"); | ||
384 | } else if (tvisfalse(o)) { | ||
385 | return lj_str_newlit(L, "false"); | ||
386 | } else if (tvistrue(o)) { | ||
387 | return lj_str_newlit(L, "true"); | ||
388 | } else { | ||
389 | char buf[8+2+2+16], *p = buf; | ||
390 | p = lj_buf_wmem(p, lj_typename(o), (MSize)strlen(lj_typename(o))); | ||
391 | *p++ = ':'; *p++ = ' '; | ||
392 | if (tvisfunc(o) && isffunc(funcV(o))) { | ||
393 | p = lj_buf_wmem(p, "builtin#", 8); | ||
394 | p = lj_strfmt_wint(p, funcV(o)->c.ffid); | ||
395 | } else { | ||
396 | p = lj_strfmt_wptr(p, lj_obj_ptr(o)); | ||
397 | } | ||
398 | return lj_str_new(L, buf, (size_t)(p - buf)); | ||
399 | } | ||
400 | } | ||
401 | |||
402 | /* -- Internal string formatting ------------------------------------------ */ | ||
403 | |||
404 | /* | ||
405 | ** These functions are only used for lua_pushfstring(), lua_pushvfstring() | ||
406 | ** and for internal string formatting (e.g. error messages). Caveat: unlike | ||
407 | ** string.format(), only a limited subset of formats and flags are supported! | ||
408 | ** | ||
409 | ** LuaJIT has support for a couple more formats than Lua 5.1/5.2: | ||
410 | ** - %d %u %o %x with full formatting, 32 bit integers only. | ||
411 | ** - %f and other FP formats are really %.14g. | ||
412 | ** - %s %c %p without formatting. | ||
413 | */ | ||
414 | |||
415 | /* Push formatted message as a string object to Lua stack. va_list variant. */ | ||
416 | const char *lj_strfmt_pushvf(lua_State *L, const char *fmt, va_list argp) | ||
417 | { | ||
418 | SBuf *sb = lj_buf_tmp_(L); | ||
419 | FormatState fs; | ||
420 | SFormat sf; | ||
421 | GCstr *str; | ||
422 | lj_strfmt_init(&fs, fmt, (MSize)strlen(fmt)); | ||
423 | while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) { | ||
424 | switch (STRFMT_TYPE(sf)) { | ||
425 | case STRFMT_LIT: | ||
426 | lj_buf_putmem(sb, fs.str, fs.len); | ||
427 | break; | ||
428 | case STRFMT_INT: | ||
429 | lj_strfmt_putfxint(sb, sf, va_arg(argp, int32_t)); | ||
430 | break; | ||
431 | case STRFMT_UINT: | ||
432 | lj_strfmt_putfxint(sb, sf, va_arg(argp, uint32_t)); | ||
433 | break; | ||
434 | case STRFMT_NUM: | ||
435 | lj_strfmt_putfnum(sb, STRFMT_G14, va_arg(argp, lua_Number)); | ||
436 | break; | ||
437 | case STRFMT_STR: { | ||
438 | const char *s = va_arg(argp, char *); | ||
439 | if (s == NULL) s = "(null)"; | ||
440 | lj_buf_putmem(sb, s, (MSize)strlen(s)); | ||
441 | break; | ||
442 | } | ||
443 | case STRFMT_CHAR: | ||
444 | lj_buf_putb(sb, va_arg(argp, int)); | ||
445 | break; | ||
446 | case STRFMT_PTR: | ||
447 | lj_strfmt_putptr(sb, va_arg(argp, void *)); | ||
448 | break; | ||
449 | case STRFMT_ERR: | ||
450 | default: | ||
451 | lj_buf_putb(sb, '?'); | ||
452 | lua_assert(0); | ||
453 | break; | ||
454 | } | ||
455 | } | ||
456 | str = lj_buf_str(L, sb); | ||
457 | setstrV(L, L->top, str); | ||
458 | incr_top(L); | ||
459 | return strdata(str); | ||
460 | } | ||
461 | |||
462 | /* Push formatted message as a string object to Lua stack. Vararg variant. */ | ||
463 | const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...) | ||
464 | { | ||
465 | const char *msg; | ||
466 | va_list argp; | ||
467 | va_start(argp, fmt); | ||
468 | msg = lj_strfmt_pushvf(L, fmt, argp); | ||
469 | va_end(argp); | ||
470 | return msg; | ||
471 | } | ||
472 | |||
diff --git a/src/lj_strfmt.h b/src/lj_strfmt.h new file mode 100644 index 00000000..339f8e15 --- /dev/null +++ b/src/lj_strfmt.h | |||
@@ -0,0 +1,125 @@ | |||
1 | /* | ||
2 | ** String formatting. | ||
3 | ** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_STRFMT_H | ||
7 | #define _LJ_STRFMT_H | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | |||
11 | typedef uint32_t SFormat; /* Format indicator. */ | ||
12 | |||
13 | /* Format parser state. */ | ||
14 | typedef struct FormatState { | ||
15 | const uint8_t *p; /* Current format string pointer. */ | ||
16 | const uint8_t *e; /* End of format string. */ | ||
17 | const char *str; /* Returned literal string. */ | ||
18 | MSize len; /* Size of literal string. */ | ||
19 | } FormatState; | ||
20 | |||
21 | /* Format types (max. 16). */ | ||
22 | typedef enum FormatType { | ||
23 | STRFMT_EOF, STRFMT_ERR, STRFMT_LIT, | ||
24 | STRFMT_INT, STRFMT_UINT, STRFMT_NUM, STRFMT_STR, STRFMT_CHAR, STRFMT_PTR | ||
25 | } FormatType; | ||
26 | |||
27 | /* Format subtypes (bits are reused). */ | ||
28 | #define STRFMT_T_HEX 0x0010 /* STRFMT_UINT */ | ||
29 | #define STRFMT_T_OCT 0x0020 /* STRFMT_UINT */ | ||
30 | #define STRFMT_T_FP_A 0x0000 /* STRFMT_NUM */ | ||
31 | #define STRFMT_T_FP_E 0x0010 /* STRFMT_NUM */ | ||
32 | #define STRFMT_T_FP_F 0x0020 /* STRFMT_NUM */ | ||
33 | #define STRFMT_T_FP_G 0x0030 /* STRFMT_NUM */ | ||
34 | #define STRFMT_T_QUOTED 0x0010 /* STRFMT_STR */ | ||
35 | |||
36 | /* Format flags. */ | ||
37 | #define STRFMT_F_LEFT 0x0100 | ||
38 | #define STRFMT_F_PLUS 0x0200 | ||
39 | #define STRFMT_F_ZERO 0x0400 | ||
40 | #define STRFMT_F_SPACE 0x0800 | ||
41 | #define STRFMT_F_ALT 0x1000 | ||
42 | #define STRFMT_F_UPPER 0x2000 | ||
43 | |||
44 | /* Format indicator fields. */ | ||
45 | #define STRFMT_SH_WIDTH 16 | ||
46 | #define STRFMT_SH_PREC 24 | ||
47 | |||
48 | #define STRFMT_TYPE(sf) ((FormatType)((sf) & 15)) | ||
49 | #define STRFMT_WIDTH(sf) (((sf) >> STRFMT_SH_WIDTH) & 255u) | ||
50 | #define STRFMT_PREC(sf) ((((sf) >> STRFMT_SH_PREC) & 255u) - 1u) | ||
51 | #define STRFMT_FP(sf) (((sf) >> 4) & 3) | ||
52 | |||
53 | /* Formats for conversion characters. */ | ||
54 | #define STRFMT_A (STRFMT_NUM|STRFMT_T_FP_A) | ||
55 | #define STRFMT_C (STRFMT_CHAR) | ||
56 | #define STRFMT_D (STRFMT_INT) | ||
57 | #define STRFMT_E (STRFMT_NUM|STRFMT_T_FP_E) | ||
58 | #define STRFMT_F (STRFMT_NUM|STRFMT_T_FP_F) | ||
59 | #define STRFMT_G (STRFMT_NUM|STRFMT_T_FP_G) | ||
60 | #define STRFMT_I STRFMT_D | ||
61 | #define STRFMT_O (STRFMT_UINT|STRFMT_T_OCT) | ||
62 | #define STRFMT_P (STRFMT_PTR) | ||
63 | #define STRFMT_Q (STRFMT_STR|STRFMT_T_QUOTED) | ||
64 | #define STRFMT_S (STRFMT_STR) | ||
65 | #define STRFMT_U (STRFMT_UINT) | ||
66 | #define STRFMT_X (STRFMT_UINT|STRFMT_T_HEX) | ||
67 | #define STRFMT_G14 (STRFMT_G | ((14+1) << STRFMT_SH_PREC)) | ||
68 | |||
69 | /* Maximum buffer sizes for conversions. */ | ||
70 | #define STRFMT_MAXBUF_XINT (1+22) /* '0' prefix + uint64_t in octal. */ | ||
71 | #define STRFMT_MAXBUF_INT (1+10) /* Sign + int32_t in decimal. */ | ||
72 | #define STRFMT_MAXBUF_NUM 32 /* Must correspond with STRFMT_G14. */ | ||
73 | #define STRFMT_MAXBUF_PTR (2+2*sizeof(ptrdiff_t)) /* "0x" + hex ptr. */ | ||
74 | |||
75 | /* Format parser. */ | ||
76 | LJ_FUNC SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs); | ||
77 | |||
78 | static LJ_AINLINE void lj_strfmt_init(FormatState *fs, const char *p, MSize len) | ||
79 | { | ||
80 | fs->p = (const uint8_t *)p; | ||
81 | fs->e = (const uint8_t *)p + len; | ||
82 | lua_assert(*fs->e == 0); /* Must be NUL-terminated (may have NULs inside). */ | ||
83 | } | ||
84 | |||
85 | /* Raw conversions. */ | ||
86 | LJ_FUNC char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k); | ||
87 | LJ_FUNC char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v); | ||
88 | LJ_FUNC char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v); | ||
89 | LJ_FUNC const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp); | ||
90 | |||
91 | /* Unformatted conversions to buffer. */ | ||
92 | LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k); | ||
93 | #if LJ_HASJIT | ||
94 | LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o); | ||
95 | #endif | ||
96 | LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v); | ||
97 | LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str); | ||
98 | |||
99 | /* Formatted conversions to buffer. */ | ||
100 | LJ_FUNC SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k); | ||
101 | LJ_FUNC SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n); | ||
102 | LJ_FUNC SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n); | ||
103 | LJ_FUNC SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat, lua_Number n); | ||
104 | LJ_FUNC SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat, int32_t c); | ||
105 | LJ_FUNC SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat, GCstr *str); | ||
106 | |||
107 | /* Conversions to strings. */ | ||
108 | LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k); | ||
109 | LJ_FUNCA GCstr * LJ_FASTCALL lj_strfmt_num(lua_State *L, cTValue *o); | ||
110 | LJ_FUNCA GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o); | ||
111 | #if LJ_HASJIT | ||
112 | LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c); | ||
113 | #endif | ||
114 | LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o); | ||
115 | |||
116 | /* Internal string formatting. */ | ||
117 | LJ_FUNC const char *lj_strfmt_pushvf(lua_State *L, const char *fmt, | ||
118 | va_list argp); | ||
119 | LJ_FUNC const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...) | ||
120 | #ifdef __GNUC__ | ||
121 | __attribute__ ((format (printf, 2, 3))) | ||
122 | #endif | ||
123 | ; | ||
124 | |||
125 | #endif | ||
diff --git a/src/lj_strfmt_num.c b/src/lj_strfmt_num.c new file mode 100644 index 00000000..36b11dc0 --- /dev/null +++ b/src/lj_strfmt_num.c | |||
@@ -0,0 +1,592 @@ | |||
1 | /* | ||
2 | ** String formatting for floating-point numbers. | ||
3 | ** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** Contributed by Peter Cawley. | ||
5 | */ | ||
6 | |||
7 | #include <stdio.h> | ||
8 | |||
9 | #define lj_strfmt_num_c | ||
10 | #define LUA_CORE | ||
11 | |||
12 | #include "lj_obj.h" | ||
13 | #include "lj_buf.h" | ||
14 | #include "lj_str.h" | ||
15 | #include "lj_strfmt.h" | ||
16 | |||
17 | /* -- Precomputed tables -------------------------------------------------- */ | ||
18 | |||
19 | /* Rescale factors to push the exponent of a number towards zero. */ | ||
20 | #define RESCALE_EXPONENTS(P, N) \ | ||
21 | P(308), P(289), P(270), P(250), P(231), P(212), P(193), P(173), P(154), \ | ||
22 | P(135), P(115), P(96), P(77), P(58), P(38), P(0), P(0), P(0), N(39), N(58), \ | ||
23 | N(77), N(96), N(116), N(135), N(154), N(174), N(193), N(212), N(231), \ | ||
24 | N(251), N(270), N(289) | ||
25 | |||
26 | #define ONE_E_P(X) 1e+0 ## X | ||
27 | #define ONE_E_N(X) 1e-0 ## X | ||
28 | static const int16_t rescale_e[] = { RESCALE_EXPONENTS(-, +) }; | ||
29 | static const double rescale_n[] = { RESCALE_EXPONENTS(ONE_E_P, ONE_E_N) }; | ||
30 | #undef ONE_E_N | ||
31 | #undef ONE_E_P | ||
32 | |||
33 | /* | ||
34 | ** For p in range -70 through 57, this table encodes pairs (m, e) such that | ||
35 | ** 4*2^p <= (uint8_t)m*10^e, and is the smallest value for which this holds. | ||
36 | */ | ||
37 | static const int8_t four_ulp_m_e[] = { | ||
38 | 34, -21, 68, -21, 14, -20, 28, -20, 55, -20, 2, -19, 3, -19, 5, -19, 9, -19, | ||
39 | -82, -18, 35, -18, 7, -17, -117, -17, 28, -17, 56, -17, 112, -16, -33, -16, | ||
40 | 45, -16, 89, -16, -78, -15, 36, -15, 72, -15, -113, -14, 29, -14, 57, -14, | ||
41 | 114, -13, -28, -13, 46, -13, 91, -12, -74, -12, 37, -12, 73, -12, 15, -11, 3, | ||
42 | -11, 59, -11, 2, -10, 3, -10, 5, -10, 1, -9, -69, -9, 38, -9, 75, -9, 15, -7, | ||
43 | 3, -7, 6, -7, 12, -6, -17, -7, 48, -7, 96, -7, -65, -6, 39, -6, 77, -6, -103, | ||
44 | -5, 31, -5, 62, -5, 123, -4, -11, -4, 49, -4, 98, -4, -60, -3, 4, -2, 79, -3, | ||
45 | 16, -2, 32, -2, 63, -2, 2, -1, 25, 0, 5, 1, 1, 2, 2, 2, 4, 2, 8, 2, 16, 2, | ||
46 | 32, 2, 64, 2, -128, 2, 26, 2, 52, 2, 103, 3, -51, 3, 41, 4, 82, 4, -92, 4, | ||
47 | 33, 4, 66, 4, -124, 5, 27, 5, 53, 5, 105, 6, 21, 6, 42, 6, 84, 6, 17, 7, 34, | ||
48 | 7, 68, 7, 2, 8, 3, 8, 6, 8, 108, 9, -41, 9, 43, 10, 86, 9, -84, 10, 35, 10, | ||
49 | 69, 10, -118, 11, 28, 11, 55, 12, 11, 13, 22, 13, 44, 13, 88, 13, -80, 13, | ||
50 | 36, 13, 71, 13, -115, 14, 29, 14, 57, 14, 113, 15, -30, 15, 46, 15, 91, 15, | ||
51 | 19, 16, 37, 16, 73, 16, 2, 17, 3, 17, 6, 17 | ||
52 | }; | ||
53 | |||
54 | /* min(2^32-1, 10^e-1) for e in range 0 through 10 */ | ||
55 | static uint32_t ndigits_dec_threshold[] = { | ||
56 | 0, 9U, 99U, 999U, 9999U, 99999U, 999999U, | ||
57 | 9999999U, 99999999U, 999999999U, 0xffffffffU | ||
58 | }; | ||
59 | |||
60 | /* -- Helper functions ---------------------------------------------------- */ | ||
61 | |||
62 | /* Compute the number of digits in the decimal representation of x. */ | ||
63 | static MSize ndigits_dec(uint32_t x) | ||
64 | { | ||
65 | MSize t = ((lj_fls(x | 1) * 77) >> 8) + 1; /* 2^8/77 is roughly log2(10) */ | ||
66 | return t + (x > ndigits_dec_threshold[t]); | ||
67 | } | ||
68 | |||
69 | #define WINT_R(x, sh, sc) \ | ||
70 | { uint32_t d = (x*(((1<<sh)+sc-1)/sc))>>sh; x -= d*sc; *p++ = (char)('0'+d); } | ||
71 | |||
72 | /* Write 9-digit unsigned integer to buffer. */ | ||
73 | static char *lj_strfmt_wuint9(char *p, uint32_t u) | ||
74 | { | ||
75 | uint32_t v = u / 10000, w; | ||
76 | u -= v * 10000; | ||
77 | w = v / 10000; | ||
78 | v -= w * 10000; | ||
79 | *p++ = (char)('0'+w); | ||
80 | WINT_R(v, 23, 1000) | ||
81 | WINT_R(v, 12, 100) | ||
82 | WINT_R(v, 10, 10) | ||
83 | *p++ = (char)('0'+v); | ||
84 | WINT_R(u, 23, 1000) | ||
85 | WINT_R(u, 12, 100) | ||
86 | WINT_R(u, 10, 10) | ||
87 | *p++ = (char)('0'+u); | ||
88 | return p; | ||
89 | } | ||
90 | #undef WINT_R | ||
91 | |||
92 | /* -- Extended precision arithmetic --------------------------------------- */ | ||
93 | |||
94 | /* | ||
95 | ** The "nd" format is a fixed-precision decimal representation for numbers. It | ||
96 | ** consists of up to 64 uint32_t values, with each uint32_t storing a value | ||
97 | ** in the range [0, 1e9). A number in "nd" format consists of three variables: | ||
98 | ** | ||
99 | ** uint32_t nd[64]; | ||
100 | ** uint32_t ndlo; | ||
101 | ** uint32_t ndhi; | ||
102 | ** | ||
103 | ** The integral part of the number is stored in nd[0 ... ndhi], the value of | ||
104 | ** which is sum{i in [0, ndhi] | nd[i] * 10^(9*i)}. If the fractional part of | ||
105 | ** the number is zero, ndlo is zero. Otherwise, the fractional part is stored | ||
106 | ** in nd[ndlo ... 63], the value of which is taken to be | ||
107 | ** sum{i in [ndlo, 63] | nd[i] * 10^(9*(i-64))}. | ||
108 | ** | ||
109 | ** If the array part had 128 elements rather than 64, then every double would | ||
110 | ** have an exact representation in "nd" format. With 64 elements, all integral | ||
111 | ** doubles have an exact representation, and all non-integral doubles have | ||
112 | ** enough digits to make both %.99e and %.99f do the right thing. | ||
113 | */ | ||
114 | |||
115 | #if LJ_64 | ||
116 | #define ND_MUL2K_MAX_SHIFT 29 | ||
117 | #define ND_MUL2K_DIV1E9(val) ((uint32_t)((val) / 1000000000)) | ||
118 | #else | ||
119 | #define ND_MUL2K_MAX_SHIFT 11 | ||
120 | #define ND_MUL2K_DIV1E9(val) ((uint32_t)((val) >> 9) / 1953125) | ||
121 | #endif | ||
122 | |||
123 | /* Multiply nd by 2^k and add carry_in (ndlo is assumed to be zero). */ | ||
124 | static uint32_t nd_mul2k(uint32_t* nd, uint32_t ndhi, uint32_t k, | ||
125 | uint32_t carry_in, SFormat sf) | ||
126 | { | ||
127 | uint32_t i, ndlo = 0, start = 1; | ||
128 | /* Performance hacks. */ | ||
129 | if (k > ND_MUL2K_MAX_SHIFT*2 && STRFMT_FP(sf) != STRFMT_FP(STRFMT_T_FP_F)) { | ||
130 | start = ndhi - (STRFMT_PREC(sf) + 17) / 8; | ||
131 | } | ||
132 | /* Real logic. */ | ||
133 | while (k >= ND_MUL2K_MAX_SHIFT) { | ||
134 | for (i = ndlo; i <= ndhi; i++) { | ||
135 | uint64_t val = ((uint64_t)nd[i] << ND_MUL2K_MAX_SHIFT) | carry_in; | ||
136 | carry_in = ND_MUL2K_DIV1E9(val); | ||
137 | nd[i] = (uint32_t)val - carry_in * 1000000000; | ||
138 | } | ||
139 | if (carry_in) { | ||
140 | nd[++ndhi] = carry_in; carry_in = 0; | ||
141 | if (start++ == ndlo) ++ndlo; | ||
142 | } | ||
143 | k -= ND_MUL2K_MAX_SHIFT; | ||
144 | } | ||
145 | if (k) { | ||
146 | for (i = ndlo; i <= ndhi; i++) { | ||
147 | uint64_t val = ((uint64_t)nd[i] << k) | carry_in; | ||
148 | carry_in = ND_MUL2K_DIV1E9(val); | ||
149 | nd[i] = (uint32_t)val - carry_in * 1000000000; | ||
150 | } | ||
151 | if (carry_in) nd[++ndhi] = carry_in; | ||
152 | } | ||
153 | return ndhi; | ||
154 | } | ||
155 | |||
156 | /* Divide nd by 2^k (ndlo is assumed to be zero). */ | ||
157 | static uint32_t nd_div2k(uint32_t* nd, uint32_t ndhi, uint32_t k, SFormat sf) | ||
158 | { | ||
159 | uint32_t ndlo = 0, stop1 = ~0, stop2 = ~0; | ||
160 | /* Performance hacks. */ | ||
161 | if (!ndhi) { | ||
162 | if (!nd[0]) { | ||
163 | return 0; | ||
164 | } else { | ||
165 | uint32_t s = lj_ffs(nd[0]); | ||
166 | if (s >= k) { nd[0] >>= k; return 0; } | ||
167 | nd[0] >>= s; k -= s; | ||
168 | } | ||
169 | } | ||
170 | if (k > 18) { | ||
171 | if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_F)) { | ||
172 | stop1 = 63 - (int32_t)STRFMT_PREC(sf) / 9; | ||
173 | } else { | ||
174 | int32_t floorlog2 = ndhi * 29 + lj_fls(nd[ndhi]) - k; | ||
175 | int32_t floorlog10 = (int32_t)(floorlog2 * 0.30102999566398114); | ||
176 | stop1 = 62 + (floorlog10 - (int32_t)STRFMT_PREC(sf)) / 9; | ||
177 | stop2 = 61 + ndhi - (int32_t)STRFMT_PREC(sf) / 8; | ||
178 | } | ||
179 | } | ||
180 | /* Real logic. */ | ||
181 | while (k >= 9) { | ||
182 | uint32_t i = ndhi, carry = 0; | ||
183 | for (;;) { | ||
184 | uint32_t val = nd[i]; | ||
185 | nd[i] = (val >> 9) + carry; | ||
186 | carry = (val & 0x1ff) * 1953125; | ||
187 | if (i == ndlo) break; | ||
188 | i = (i - 1) & 0x3f; | ||
189 | } | ||
190 | if (ndlo != stop1 && ndlo != stop2) { | ||
191 | if (carry) { ndlo = (ndlo - 1) & 0x3f; nd[ndlo] = carry; } | ||
192 | if (!nd[ndhi]) { ndhi = (ndhi - 1) & 0x3f; stop2--; } | ||
193 | } else if (!nd[ndhi]) { | ||
194 | if (ndhi != ndlo) { ndhi = (ndhi - 1) & 0x3f; stop2--; } | ||
195 | else return ndlo; | ||
196 | } | ||
197 | k -= 9; | ||
198 | } | ||
199 | if (k) { | ||
200 | uint32_t mask = (1U << k) - 1, mul = 1000000000 >> k, i = ndhi, carry = 0; | ||
201 | for (;;) { | ||
202 | uint32_t val = nd[i]; | ||
203 | nd[i] = (val >> k) + carry; | ||
204 | carry = (val & mask) * mul; | ||
205 | if (i == ndlo) break; | ||
206 | i = (i - 1) & 0x3f; | ||
207 | } | ||
208 | if (carry) { ndlo = (ndlo - 1) & 0x3f; nd[ndlo] = carry; } | ||
209 | } | ||
210 | return ndlo; | ||
211 | } | ||
212 | |||
213 | /* Add m*10^e to nd (assumes ndlo <= e/9 <= ndhi and 0 <= m <= 9). */ | ||
214 | static uint32_t nd_add_m10e(uint32_t* nd, uint32_t ndhi, uint8_t m, int32_t e) | ||
215 | { | ||
216 | uint32_t i, carry; | ||
217 | if (e >= 0) { | ||
218 | i = (uint32_t)e/9; | ||
219 | carry = m * (ndigits_dec_threshold[e - (int32_t)i*9] + 1); | ||
220 | } else { | ||
221 | int32_t f = (e-8)/9; | ||
222 | i = (uint32_t)(64 + f); | ||
223 | carry = m * (ndigits_dec_threshold[e - f*9] + 1); | ||
224 | } | ||
225 | for (;;) { | ||
226 | uint32_t val = nd[i] + carry; | ||
227 | if (LJ_UNLIKELY(val >= 1000000000)) { | ||
228 | val -= 1000000000; | ||
229 | nd[i] = val; | ||
230 | if (LJ_UNLIKELY(i == ndhi)) { | ||
231 | ndhi = (ndhi + 1) & 0x3f; | ||
232 | nd[ndhi] = 1; | ||
233 | break; | ||
234 | } | ||
235 | carry = 1; | ||
236 | i = (i + 1) & 0x3f; | ||
237 | } else { | ||
238 | nd[i] = val; | ||
239 | break; | ||
240 | } | ||
241 | } | ||
242 | return ndhi; | ||
243 | } | ||
244 | |||
245 | /* Test whether two "nd" values are equal in their most significant digits. */ | ||
246 | static int nd_similar(uint32_t* nd, uint32_t ndhi, uint32_t* ref, MSize hilen, | ||
247 | MSize prec) | ||
248 | { | ||
249 | char nd9[9], ref9[9]; | ||
250 | if (hilen <= prec) { | ||
251 | if (LJ_UNLIKELY(nd[ndhi] != *ref)) return 0; | ||
252 | prec -= hilen; ref--; ndhi = (ndhi - 1) & 0x3f; | ||
253 | if (prec >= 9) { | ||
254 | if (LJ_UNLIKELY(nd[ndhi] != *ref)) return 0; | ||
255 | prec -= 9; ref--; ndhi = (ndhi - 1) & 0x3f; | ||
256 | } | ||
257 | } else { | ||
258 | prec -= hilen - 9; | ||
259 | } | ||
260 | lua_assert(prec < 9); | ||
261 | lj_strfmt_wuint9(nd9, nd[ndhi]); | ||
262 | lj_strfmt_wuint9(ref9, *ref); | ||
263 | return !memcmp(nd9, ref9, prec) && (nd9[prec] < '5') == (ref9[prec] < '5'); | ||
264 | } | ||
265 | |||
266 | /* -- Formatted conversions to buffer ------------------------------------- */ | ||
267 | |||
268 | /* Write formatted floating-point number to either sb or p. */ | ||
269 | static char *lj_strfmt_wfnum(SBuf *sb, SFormat sf, lua_Number n, char *p) | ||
270 | { | ||
271 | MSize width = STRFMT_WIDTH(sf), prec = STRFMT_PREC(sf), len; | ||
272 | TValue t; | ||
273 | t.n = n; | ||
274 | if (LJ_UNLIKELY((t.u32.hi << 1) >= 0xffe00000)) { | ||
275 | /* Handle non-finite values uniformly for %a, %e, %f, %g. */ | ||
276 | int prefix = 0, ch = (sf & STRFMT_F_UPPER) ? 0x202020 : 0; | ||
277 | if (((t.u32.hi & 0x000fffff) | t.u32.lo) != 0) { | ||
278 | ch ^= ('n' << 16) | ('a' << 8) | 'n'; | ||
279 | if ((sf & STRFMT_F_SPACE)) prefix = ' '; | ||
280 | } else { | ||
281 | ch ^= ('i' << 16) | ('n' << 8) | 'f'; | ||
282 | if ((t.u32.hi & 0x80000000)) prefix = '-'; | ||
283 | else if ((sf & STRFMT_F_PLUS)) prefix = '+'; | ||
284 | else if ((sf & STRFMT_F_SPACE)) prefix = ' '; | ||
285 | } | ||
286 | len = 3 + (prefix != 0); | ||
287 | if (!p) p = lj_buf_more(sb, width > len ? width : len); | ||
288 | if (!(sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' '; | ||
289 | if (prefix) *p++ = prefix; | ||
290 | *p++ = (char)(ch >> 16); *p++ = (char)(ch >> 8); *p++ = (char)ch; | ||
291 | } else if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_A)) { | ||
292 | /* %a */ | ||
293 | const char *hexdig = (sf & STRFMT_F_UPPER) ? "0123456789ABCDEFPX" | ||
294 | : "0123456789abcdefpx"; | ||
295 | int32_t e = (t.u32.hi >> 20) & 0x7ff; | ||
296 | char prefix = 0, eprefix = '+'; | ||
297 | if (t.u32.hi & 0x80000000) prefix = '-'; | ||
298 | else if ((sf & STRFMT_F_PLUS)) prefix = '+'; | ||
299 | else if ((sf & STRFMT_F_SPACE)) prefix = ' '; | ||
300 | t.u32.hi &= 0xfffff; | ||
301 | if (e) { | ||
302 | t.u32.hi |= 0x100000; | ||
303 | e -= 1023; | ||
304 | } else if (t.u32.lo | t.u32.hi) { | ||
305 | /* Non-zero denormal - normalise it. */ | ||
306 | uint32_t shift = t.u32.hi ? 20-lj_fls(t.u32.hi) : 52-lj_fls(t.u32.lo); | ||
307 | e = -1022 - shift; | ||
308 | t.u64 <<= shift; | ||
309 | } | ||
310 | /* abs(n) == t.u64 * 2^(e - 52) */ | ||
311 | /* If n != 0, bit 52 of t.u64 is set, and is the highest set bit. */ | ||
312 | if ((int32_t)prec < 0) { | ||
313 | /* Default precision: use smallest precision giving exact result. */ | ||
314 | prec = t.u32.lo ? 13-lj_ffs(t.u32.lo)/4 : 5-lj_ffs(t.u32.hi|0x100000)/4; | ||
315 | } else if (prec < 13) { | ||
316 | /* Precision is sufficiently low as to maybe require rounding. */ | ||
317 | t.u64 += (((uint64_t)1) << (51 - prec*4)); | ||
318 | } | ||
319 | if (e < 0) { | ||
320 | eprefix = '-'; | ||
321 | e = -e; | ||
322 | } | ||
323 | len = 5 + ndigits_dec((uint32_t)e) + prec + (prefix != 0) | ||
324 | + ((prec | (sf & STRFMT_F_ALT)) != 0); | ||
325 | if (!p) p = lj_buf_more(sb, width > len ? width : len); | ||
326 | if (!(sf & (STRFMT_F_LEFT | STRFMT_F_ZERO))) { | ||
327 | while (width-- > len) *p++ = ' '; | ||
328 | } | ||
329 | if (prefix) *p++ = prefix; | ||
330 | *p++ = '0'; | ||
331 | *p++ = hexdig[17]; /* x or X */ | ||
332 | if ((sf & (STRFMT_F_LEFT | STRFMT_F_ZERO)) == STRFMT_F_ZERO) { | ||
333 | while (width-- > len) *p++ = '0'; | ||
334 | } | ||
335 | *p++ = '0' + (t.u32.hi >> 20); /* Usually '1', sometimes '0' or '2'. */ | ||
336 | if ((prec | (sf & STRFMT_F_ALT))) { | ||
337 | /* Emit fractional part. */ | ||
338 | char *q = p + 1 + prec; | ||
339 | *p = '.'; | ||
340 | if (prec < 13) t.u64 >>= (52 - prec*4); | ||
341 | else while (prec > 13) p[prec--] = '0'; | ||
342 | while (prec) { p[prec--] = hexdig[t.u64 & 15]; t.u64 >>= 4; } | ||
343 | p = q; | ||
344 | } | ||
345 | *p++ = hexdig[16]; /* p or P */ | ||
346 | *p++ = eprefix; /* + or - */ | ||
347 | p = lj_strfmt_wint(p, e); | ||
348 | } else { | ||
349 | /* %e or %f or %g - begin by converting n to "nd" format. */ | ||
350 | uint32_t nd[64]; | ||
351 | uint32_t ndhi = 0, ndlo, i; | ||
352 | int32_t e = (t.u32.hi >> 20) & 0x7ff, ndebias = 0; | ||
353 | char prefix = 0, *q; | ||
354 | if (t.u32.hi & 0x80000000) prefix = '-'; | ||
355 | else if ((sf & STRFMT_F_PLUS)) prefix = '+'; | ||
356 | else if ((sf & STRFMT_F_SPACE)) prefix = ' '; | ||
357 | prec += ((int32_t)prec >> 31) & 7; /* Default precision is 6. */ | ||
358 | if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_G)) { | ||
359 | /* %g - decrement precision if non-zero (to make it like %e). */ | ||
360 | prec--; | ||
361 | prec ^= (uint32_t)((int32_t)prec >> 31); | ||
362 | } | ||
363 | if ((sf & STRFMT_T_FP_E) && prec < 14 && n != 0) { | ||
364 | /* Precision is sufficiently low that rescaling will probably work. */ | ||
365 | if ((ndebias = rescale_e[e >> 6])) { | ||
366 | t.n = n * rescale_n[e >> 6]; | ||
367 | if (LJ_UNLIKELY(!e)) t.n *= 1e10, ndebias -= 10; | ||
368 | t.u64 -= 2; /* Convert 2ulp below (later we convert 2ulp above). */ | ||
369 | nd[0] = 0x100000 | (t.u32.hi & 0xfffff); | ||
370 | e = ((t.u32.hi >> 20) & 0x7ff) - 1075 - (ND_MUL2K_MAX_SHIFT < 29); | ||
371 | goto load_t_lo; rescale_failed: | ||
372 | t.n = n; | ||
373 | e = (t.u32.hi >> 20) & 0x7ff; | ||
374 | ndebias = ndhi = 0; | ||
375 | } | ||
376 | } | ||
377 | nd[0] = t.u32.hi & 0xfffff; | ||
378 | if (e == 0) e++; else nd[0] |= 0x100000; | ||
379 | e -= 1043; | ||
380 | if (t.u32.lo) { | ||
381 | e -= 32 + (ND_MUL2K_MAX_SHIFT < 29); load_t_lo: | ||
382 | #if ND_MUL2K_MAX_SHIFT >= 29 | ||
383 | nd[0] = (nd[0] << 3) | (t.u32.lo >> 29); | ||
384 | ndhi = nd_mul2k(nd, ndhi, 29, t.u32.lo & 0x1fffffff, sf); | ||
385 | #elif ND_MUL2K_MAX_SHIFT >= 11 | ||
386 | ndhi = nd_mul2k(nd, ndhi, 11, t.u32.lo >> 21, sf); | ||
387 | ndhi = nd_mul2k(nd, ndhi, 11, (t.u32.lo >> 10) & 0x7ff, sf); | ||
388 | ndhi = nd_mul2k(nd, ndhi, 11, (t.u32.lo << 1) & 0x7ff, sf); | ||
389 | #else | ||
390 | #error "ND_MUL2K_MAX_SHIFT too small" | ||
391 | #endif | ||
392 | } | ||
393 | if (e >= 0) { | ||
394 | ndhi = nd_mul2k(nd, ndhi, (uint32_t)e, 0, sf); | ||
395 | ndlo = 0; | ||
396 | } else { | ||
397 | ndlo = nd_div2k(nd, ndhi, (uint32_t)-e, sf); | ||
398 | if (ndhi && !nd[ndhi]) ndhi--; | ||
399 | } | ||
400 | /* abs(n) == nd * 10^ndebias (for slightly loose interpretation of ==) */ | ||
401 | if ((sf & STRFMT_T_FP_E)) { | ||
402 | /* %e or %g - assume %e and start by calculating nd's exponent (nde). */ | ||
403 | char eprefix = '+'; | ||
404 | int32_t nde = -1; | ||
405 | MSize hilen; | ||
406 | if (ndlo && !nd[ndhi]) { | ||
407 | ndhi = 64; do {} while (!nd[--ndhi]); | ||
408 | nde -= 64 * 9; | ||
409 | } | ||
410 | hilen = ndigits_dec(nd[ndhi]); | ||
411 | nde += ndhi * 9 + hilen; | ||
412 | if (ndebias) { | ||
413 | /* | ||
414 | ** Rescaling was performed, but this introduced some error, and might | ||
415 | ** have pushed us across a rounding boundary. We check whether this | ||
416 | ** error affected the result by introducing even more error (2ulp in | ||
417 | ** either direction), and seeing whether a roundary boundary was | ||
418 | ** crossed. Having already converted the -2ulp case, we save off its | ||
419 | ** most significant digits, convert the +2ulp case, and compare them. | ||
420 | */ | ||
421 | int32_t eidx = e + 70 + (ND_MUL2K_MAX_SHIFT < 29) | ||
422 | + (t.u32.lo >= 0xfffffffe && !(~t.u32.hi << 12)); | ||
423 | const int8_t *m_e = four_ulp_m_e + eidx * 2; | ||
424 | lua_assert(0 <= eidx && eidx < 128); | ||
425 | nd[33] = nd[ndhi]; | ||
426 | nd[32] = nd[(ndhi - 1) & 0x3f]; | ||
427 | nd[31] = nd[(ndhi - 2) & 0x3f]; | ||
428 | nd_add_m10e(nd, ndhi, (uint8_t)*m_e, m_e[1]); | ||
429 | if (LJ_UNLIKELY(!nd_similar(nd, ndhi, nd + 33, hilen, prec + 1))) { | ||
430 | goto rescale_failed; | ||
431 | } | ||
432 | } | ||
433 | if ((int32_t)(prec - nde) < (0x3f & -(int32_t)ndlo) * 9) { | ||
434 | /* Precision is sufficiently low as to maybe require rounding. */ | ||
435 | ndhi = nd_add_m10e(nd, ndhi, 5, nde - prec - 1); | ||
436 | nde += (hilen != ndigits_dec(nd[ndhi])); | ||
437 | } | ||
438 | nde += ndebias; | ||
439 | if ((sf & STRFMT_T_FP_F)) { | ||
440 | /* %g */ | ||
441 | if ((int32_t)prec >= nde && nde >= -4) { | ||
442 | if (nde < 0) ndhi = 0; | ||
443 | prec -= nde; | ||
444 | goto g_format_like_f; | ||
445 | } else if (!(sf & STRFMT_F_ALT) && prec && width > 5) { | ||
446 | /* Decrease precision in order to strip trailing zeroes. */ | ||
447 | char tail[9]; | ||
448 | uint32_t maxprec = hilen - 1 + ((ndhi - ndlo) & 0x3f) * 9; | ||
449 | if (prec >= maxprec) prec = maxprec; | ||
450 | else ndlo = (ndhi - (((int32_t)(prec - hilen) + 9) / 9)) & 0x3f; | ||
451 | i = prec - hilen - (((ndhi - ndlo) & 0x3f) * 9) + 10; | ||
452 | lj_strfmt_wuint9(tail, nd[ndlo]); | ||
453 | while (prec && tail[--i] == '0') { | ||
454 | prec--; | ||
455 | if (!i) { | ||
456 | if (ndlo == ndhi) { prec = 0; break; } | ||
457 | lj_strfmt_wuint9(tail, nd[++ndlo]); | ||
458 | i = 9; | ||
459 | } | ||
460 | } | ||
461 | } | ||
462 | } | ||
463 | if (nde < 0) { | ||
464 | /* Make nde non-negative. */ | ||
465 | eprefix = '-'; | ||
466 | nde = -nde; | ||
467 | } | ||
468 | len = 3 + prec + (prefix != 0) + ndigits_dec((uint32_t)nde) + (nde < 10) | ||
469 | + ((prec | (sf & STRFMT_F_ALT)) != 0); | ||
470 | if (!p) p = lj_buf_more(sb, (width > len ? width : len) + 5); | ||
471 | if (!(sf & (STRFMT_F_LEFT | STRFMT_F_ZERO))) { | ||
472 | while (width-- > len) *p++ = ' '; | ||
473 | } | ||
474 | if (prefix) *p++ = prefix; | ||
475 | if ((sf & (STRFMT_F_LEFT | STRFMT_F_ZERO)) == STRFMT_F_ZERO) { | ||
476 | while (width-- > len) *p++ = '0'; | ||
477 | } | ||
478 | q = lj_strfmt_wint(p + 1, nd[ndhi]); | ||
479 | p[0] = p[1]; /* Put leading digit in the correct place. */ | ||
480 | if ((prec | (sf & STRFMT_F_ALT))) { | ||
481 | /* Emit fractional part. */ | ||
482 | p[1] = '.'; p += 2; | ||
483 | prec -= (MSize)(q - p); p = q; /* Account for digits already emitted. */ | ||
484 | /* Then emit chunks of 9 digits (this may emit 8 digits too many). */ | ||
485 | for (i = ndhi; (int32_t)prec > 0 && i != ndlo; prec -= 9) { | ||
486 | i = (i - 1) & 0x3f; | ||
487 | p = lj_strfmt_wuint9(p, nd[i]); | ||
488 | } | ||
489 | if ((sf & STRFMT_T_FP_F) && !(sf & STRFMT_F_ALT)) { | ||
490 | /* %g (and not %#g) - strip trailing zeroes. */ | ||
491 | p += (int32_t)prec & ((int32_t)prec >> 31); | ||
492 | while (p[-1] == '0') p--; | ||
493 | if (p[-1] == '.') p--; | ||
494 | } else { | ||
495 | /* %e (or %#g) - emit trailing zeroes. */ | ||
496 | while ((int32_t)prec > 0) { *p++ = '0'; prec--; } | ||
497 | p += (int32_t)prec; | ||
498 | } | ||
499 | } else { | ||
500 | p++; | ||
501 | } | ||
502 | *p++ = (sf & STRFMT_F_UPPER) ? 'E' : 'e'; | ||
503 | *p++ = eprefix; /* + or - */ | ||
504 | if (nde < 10) *p++ = '0'; /* Always at least two digits of exponent. */ | ||
505 | p = lj_strfmt_wint(p, nde); | ||
506 | } else { | ||
507 | /* %f (or, shortly, %g in %f style) */ | ||
508 | if (prec < (MSize)(0x3f & -(int32_t)ndlo) * 9) { | ||
509 | /* Precision is sufficiently low as to maybe require rounding. */ | ||
510 | ndhi = nd_add_m10e(nd, ndhi, 5, 0 - prec - 1); | ||
511 | } | ||
512 | g_format_like_f: | ||
513 | if ((sf & STRFMT_T_FP_E) && !(sf & STRFMT_F_ALT) && prec && width) { | ||
514 | /* Decrease precision in order to strip trailing zeroes. */ | ||
515 | if (ndlo) { | ||
516 | /* nd has a fractional part; we need to look at its digits. */ | ||
517 | char tail[9]; | ||
518 | uint32_t maxprec = (64 - ndlo) * 9; | ||
519 | if (prec >= maxprec) prec = maxprec; | ||
520 | else ndlo = 64 - (prec + 8) / 9; | ||
521 | i = prec - ((63 - ndlo) * 9); | ||
522 | lj_strfmt_wuint9(tail, nd[ndlo]); | ||
523 | while (prec && tail[--i] == '0') { | ||
524 | prec--; | ||
525 | if (!i) { | ||
526 | if (ndlo == 63) { prec = 0; break; } | ||
527 | lj_strfmt_wuint9(tail, nd[++ndlo]); | ||
528 | i = 9; | ||
529 | } | ||
530 | } | ||
531 | } else { | ||
532 | /* nd has no fractional part, so precision goes straight to zero. */ | ||
533 | prec = 0; | ||
534 | } | ||
535 | } | ||
536 | len = ndhi * 9 + ndigits_dec(nd[ndhi]) + prec + (prefix != 0) | ||
537 | + ((prec | (sf & STRFMT_F_ALT)) != 0); | ||
538 | if (!p) p = lj_buf_more(sb, (width > len ? width : len) + 8); | ||
539 | if (!(sf & (STRFMT_F_LEFT | STRFMT_F_ZERO))) { | ||
540 | while (width-- > len) *p++ = ' '; | ||
541 | } | ||
542 | if (prefix) *p++ = prefix; | ||
543 | if ((sf & (STRFMT_F_LEFT | STRFMT_F_ZERO)) == STRFMT_F_ZERO) { | ||
544 | while (width-- > len) *p++ = '0'; | ||
545 | } | ||
546 | /* Emit integer part. */ | ||
547 | p = lj_strfmt_wint(p, nd[ndhi]); | ||
548 | i = ndhi; | ||
549 | while (i) p = lj_strfmt_wuint9(p, nd[--i]); | ||
550 | if ((prec | (sf & STRFMT_F_ALT))) { | ||
551 | /* Emit fractional part. */ | ||
552 | *p++ = '.'; | ||
553 | /* Emit chunks of 9 digits (this may emit 8 digits too many). */ | ||
554 | while ((int32_t)prec > 0 && i != ndlo) { | ||
555 | i = (i - 1) & 0x3f; | ||
556 | p = lj_strfmt_wuint9(p, nd[i]); | ||
557 | prec -= 9; | ||
558 | } | ||
559 | if ((sf & STRFMT_T_FP_E) && !(sf & STRFMT_F_ALT)) { | ||
560 | /* %g (and not %#g) - strip trailing zeroes. */ | ||
561 | p += (int32_t)prec & ((int32_t)prec >> 31); | ||
562 | while (p[-1] == '0') p--; | ||
563 | if (p[-1] == '.') p--; | ||
564 | } else { | ||
565 | /* %f (or %#g) - emit trailing zeroes. */ | ||
566 | while ((int32_t)prec > 0) { *p++ = '0'; prec--; } | ||
567 | p += (int32_t)prec; | ||
568 | } | ||
569 | } | ||
570 | } | ||
571 | } | ||
572 | if ((sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' '; | ||
573 | return p; | ||
574 | } | ||
575 | |||
576 | /* Add formatted floating-point number to buffer. */ | ||
577 | SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat sf, lua_Number n) | ||
578 | { | ||
579 | setsbufP(sb, lj_strfmt_wfnum(sb, sf, n, NULL)); | ||
580 | return sb; | ||
581 | } | ||
582 | |||
583 | /* -- Conversions to strings ---------------------------------------------- */ | ||
584 | |||
585 | /* Convert number to string. */ | ||
586 | GCstr * LJ_FASTCALL lj_strfmt_num(lua_State *L, cTValue *o) | ||
587 | { | ||
588 | char buf[STRFMT_MAXBUF_NUM]; | ||
589 | MSize len = (MSize)(lj_strfmt_wfnum(NULL, STRFMT_G14, o->n, buf) - buf); | ||
590 | return lj_str_new(L, buf, len); | ||
591 | } | ||
592 | |||
diff --git a/src/lj_strscan.c b/src/lj_strscan.c index 8614facd..948c84a7 100644 --- a/src/lj_strscan.c +++ b/src/lj_strscan.c | |||
@@ -140,7 +140,7 @@ static StrScanFmt strscan_hex(const uint8_t *p, TValue *o, | |||
140 | break; | 140 | break; |
141 | } | 141 | } |
142 | 142 | ||
143 | /* Reduce range then convert to double. */ | 143 | /* Reduce range, then convert to double. */ |
144 | if ((x & U64x(c0000000,0000000))) { x = (x >> 2) | (x & 3); ex2 += 2; } | 144 | if ((x & U64x(c0000000,0000000))) { x = (x >> 2) | (x & 3); ex2 += 2; } |
145 | strscan_double(x, o, ex2, neg); | 145 | strscan_double(x, o, ex2, neg); |
146 | return fmt; | 146 | return fmt; |
@@ -326,6 +326,49 @@ static StrScanFmt strscan_dec(const uint8_t *p, TValue *o, | |||
326 | return fmt; | 326 | return fmt; |
327 | } | 327 | } |
328 | 328 | ||
329 | /* Parse binary number. */ | ||
330 | static StrScanFmt strscan_bin(const uint8_t *p, TValue *o, | ||
331 | StrScanFmt fmt, uint32_t opt, | ||
332 | int32_t ex2, int32_t neg, uint32_t dig) | ||
333 | { | ||
334 | uint64_t x = 0; | ||
335 | uint32_t i; | ||
336 | |||
337 | if (ex2 || dig > 64) return STRSCAN_ERROR; | ||
338 | |||
339 | /* Scan binary digits. */ | ||
340 | for (i = dig; i; i--, p++) { | ||
341 | if ((*p & ~1) != '0') return STRSCAN_ERROR; | ||
342 | x = (x << 1) | (*p & 1); | ||
343 | } | ||
344 | |||
345 | /* Format-specific handling. */ | ||
346 | switch (fmt) { | ||
347 | case STRSCAN_INT: | ||
348 | if (!(opt & STRSCAN_OPT_TONUM) && x < 0x80000000u+neg) { | ||
349 | o->i = neg ? -(int32_t)x : (int32_t)x; | ||
350 | return STRSCAN_INT; /* Fast path for 32 bit integers. */ | ||
351 | } | ||
352 | if (!(opt & STRSCAN_OPT_C)) { fmt = STRSCAN_NUM; break; } | ||
353 | /* fallthrough */ | ||
354 | case STRSCAN_U32: | ||
355 | if (dig > 32) return STRSCAN_ERROR; | ||
356 | o->i = neg ? -(int32_t)x : (int32_t)x; | ||
357 | return STRSCAN_U32; | ||
358 | case STRSCAN_I64: | ||
359 | case STRSCAN_U64: | ||
360 | o->u64 = neg ? (uint64_t)-(int64_t)x : x; | ||
361 | return fmt; | ||
362 | default: | ||
363 | break; | ||
364 | } | ||
365 | |||
366 | /* Reduce range, then convert to double. */ | ||
367 | if ((x & U64x(c0000000,0000000))) { x = (x >> 2) | (x & 3); ex2 += 2; } | ||
368 | strscan_double(x, o, ex2, neg); | ||
369 | return fmt; | ||
370 | } | ||
371 | |||
329 | /* Scan string containing a number. Returns format. Returns value in o. */ | 372 | /* Scan string containing a number. Returns format. Returns value in o. */ |
330 | StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) | 373 | StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) |
331 | { | 374 | { |
@@ -364,8 +407,12 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) | |||
364 | 407 | ||
365 | /* Determine base and skip leading zeros. */ | 408 | /* Determine base and skip leading zeros. */ |
366 | if (LJ_UNLIKELY(*p <= '0')) { | 409 | if (LJ_UNLIKELY(*p <= '0')) { |
367 | if (*p == '0' && casecmp(p[1], 'x')) | 410 | if (*p == '0') { |
368 | base = 16, cmask = LJ_CHAR_XDIGIT, p += 2; | 411 | if (casecmp(p[1], 'x')) |
412 | base = 16, cmask = LJ_CHAR_XDIGIT, p += 2; | ||
413 | else if (casecmp(p[1], 'b')) | ||
414 | base = 2, cmask = LJ_CHAR_DIGIT, p += 2; | ||
415 | } | ||
369 | for ( ; ; p++) { | 416 | for ( ; ; p++) { |
370 | if (*p == '0') { | 417 | if (*p == '0') { |
371 | hasdig = 1; | 418 | hasdig = 1; |
@@ -403,7 +450,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) | |||
403 | } | 450 | } |
404 | 451 | ||
405 | /* Parse exponent. */ | 452 | /* Parse exponent. */ |
406 | if (casecmp(*p, (uint32_t)(base == 16 ? 'p' : 'e'))) { | 453 | if (base >= 10 && casecmp(*p, (uint32_t)(base == 16 ? 'p' : 'e'))) { |
407 | uint32_t xx; | 454 | uint32_t xx; |
408 | int negx = 0; | 455 | int negx = 0; |
409 | fmt = STRSCAN_NUM; p++; | 456 | fmt = STRSCAN_NUM; p++; |
@@ -459,6 +506,8 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) | |||
459 | return strscan_oct(sp, o, fmt, neg, dig); | 506 | return strscan_oct(sp, o, fmt, neg, dig); |
460 | if (base == 16) | 507 | if (base == 16) |
461 | fmt = strscan_hex(sp, o, fmt, opt, ex, neg, dig); | 508 | fmt = strscan_hex(sp, o, fmt, opt, ex, neg, dig); |
509 | else if (base == 2) | ||
510 | fmt = strscan_bin(sp, o, fmt, opt, ex, neg, dig); | ||
462 | else | 511 | else |
463 | fmt = strscan_dec(sp, o, fmt, opt, ex, neg, dig); | 512 | fmt = strscan_dec(sp, o, fmt, opt, ex, neg, dig); |
464 | 513 | ||
diff --git a/src/lj_tab.c b/src/lj_tab.c index a45ddaca..dcd24d31 100644 --- a/src/lj_tab.c +++ b/src/lj_tab.c | |||
@@ -28,8 +28,12 @@ static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash) | |||
28 | 28 | ||
29 | #define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi))) | 29 | #define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi))) |
30 | #define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1)) | 30 | #define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1)) |
31 | #define hashptr(t, p) hashlohi((t), u32ptr(p), u32ptr(p) + HASH_BIAS) | 31 | #if LJ_GC64 |
32 | #define hashgcref(t, r) \ | ||
33 | hashlohi((t), (uint32_t)gcrefu(r), (uint32_t)(gcrefu(r) >> 32)) | ||
34 | #else | ||
32 | #define hashgcref(t, r) hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS) | 35 | #define hashgcref(t, r) hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS) |
36 | #endif | ||
33 | 37 | ||
34 | /* Hash an arbitrary key and return its anchor position in the hash table. */ | 38 | /* Hash an arbitrary key and return its anchor position in the hash table. */ |
35 | static Node *hashkey(const GCtab *t, cTValue *key) | 39 | static Node *hashkey(const GCtab *t, cTValue *key) |
@@ -58,8 +62,8 @@ static LJ_AINLINE void newhpart(lua_State *L, GCtab *t, uint32_t hbits) | |||
58 | lj_err_msg(L, LJ_ERR_TABOV); | 62 | lj_err_msg(L, LJ_ERR_TABOV); |
59 | hsize = 1u << hbits; | 63 | hsize = 1u << hbits; |
60 | node = lj_mem_newvec(L, hsize, Node); | 64 | node = lj_mem_newvec(L, hsize, Node); |
61 | setmref(node->freetop, &node[hsize]); | ||
62 | setmref(t->node, node); | 65 | setmref(t->node, node); |
66 | setfreetop(t, node, &node[hsize]); | ||
63 | t->hmask = hsize-1; | 67 | t->hmask = hsize-1; |
64 | } | 68 | } |
65 | 69 | ||
@@ -98,6 +102,7 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits) | |||
98 | GCtab *t; | 102 | GCtab *t; |
99 | /* First try to colocate the array part. */ | 103 | /* First try to colocate the array part. */ |
100 | if (LJ_MAX_COLOSIZE != 0 && asize > 0 && asize <= LJ_MAX_COLOSIZE) { | 104 | if (LJ_MAX_COLOSIZE != 0 && asize > 0 && asize <= LJ_MAX_COLOSIZE) { |
105 | Node *nilnode; | ||
101 | lua_assert((sizeof(GCtab) & 7) == 0); | 106 | lua_assert((sizeof(GCtab) & 7) == 0); |
102 | t = (GCtab *)lj_mem_newgco(L, sizetabcolo(asize)); | 107 | t = (GCtab *)lj_mem_newgco(L, sizetabcolo(asize)); |
103 | t->gct = ~LJ_TTAB; | 108 | t->gct = ~LJ_TTAB; |
@@ -107,8 +112,13 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits) | |||
107 | setgcrefnull(t->metatable); | 112 | setgcrefnull(t->metatable); |
108 | t->asize = asize; | 113 | t->asize = asize; |
109 | t->hmask = 0; | 114 | t->hmask = 0; |
110 | setmref(t->node, &G(L)->nilnode); | 115 | nilnode = &G(L)->nilnode; |
116 | setmref(t->node, nilnode); | ||
117 | #if LJ_GC64 | ||
118 | setmref(t->freetop, nilnode); | ||
119 | #endif | ||
111 | } else { /* Otherwise separately allocate the array part. */ | 120 | } else { /* Otherwise separately allocate the array part. */ |
121 | Node *nilnode; | ||
112 | t = lj_mem_newobj(L, GCtab); | 122 | t = lj_mem_newobj(L, GCtab); |
113 | t->gct = ~LJ_TTAB; | 123 | t->gct = ~LJ_TTAB; |
114 | t->nomm = (uint8_t)~0; | 124 | t->nomm = (uint8_t)~0; |
@@ -117,7 +127,11 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits) | |||
117 | setgcrefnull(t->metatable); | 127 | setgcrefnull(t->metatable); |
118 | t->asize = 0; /* In case the array allocation fails. */ | 128 | t->asize = 0; /* In case the array allocation fails. */ |
119 | t->hmask = 0; | 129 | t->hmask = 0; |
120 | setmref(t->node, &G(L)->nilnode); | 130 | nilnode = &G(L)->nilnode; |
131 | setmref(t->node, nilnode); | ||
132 | #if LJ_GC64 | ||
133 | setmref(t->freetop, nilnode); | ||
134 | #endif | ||
121 | if (asize > 0) { | 135 | if (asize > 0) { |
122 | if (asize > LJ_MAX_ASIZE) | 136 | if (asize > LJ_MAX_ASIZE) |
123 | lj_err_msg(L, LJ_ERR_TABOV); | 137 | lj_err_msg(L, LJ_ERR_TABOV); |
@@ -149,6 +163,12 @@ GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits) | |||
149 | return t; | 163 | return t; |
150 | } | 164 | } |
151 | 165 | ||
166 | /* The API of this function conforms to lua_createtable(). */ | ||
167 | GCtab *lj_tab_new_ah(lua_State *L, int32_t a, int32_t h) | ||
168 | { | ||
169 | return lj_tab_new(L, (uint32_t)(a > 0 ? a+1 : 0), hsize2hbits(h)); | ||
170 | } | ||
171 | |||
152 | #if LJ_HASJIT | 172 | #if LJ_HASJIT |
153 | GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize) | 173 | GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize) |
154 | { | 174 | { |
@@ -185,7 +205,7 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt) | |||
185 | Node *node = noderef(t->node); | 205 | Node *node = noderef(t->node); |
186 | Node *knode = noderef(kt->node); | 206 | Node *knode = noderef(kt->node); |
187 | ptrdiff_t d = (char *)node - (char *)knode; | 207 | ptrdiff_t d = (char *)node - (char *)knode; |
188 | setmref(node->freetop, (Node *)((char *)noderef(knode->freetop) + d)); | 208 | setfreetop(t, node, (Node *)((char *)getfreetop(kt, knode) + d)); |
189 | for (i = 0; i <= hmask; i++) { | 209 | for (i = 0; i <= hmask; i++) { |
190 | Node *kn = &knode[i]; | 210 | Node *kn = &knode[i]; |
191 | Node *n = &node[i]; | 211 | Node *n = &node[i]; |
@@ -198,6 +218,17 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt) | |||
198 | return t; | 218 | return t; |
199 | } | 219 | } |
200 | 220 | ||
221 | /* Clear a table. */ | ||
222 | void LJ_FASTCALL lj_tab_clear(GCtab *t) | ||
223 | { | ||
224 | clearapart(t); | ||
225 | if (t->hmask > 0) { | ||
226 | Node *node = noderef(t->node); | ||
227 | setfreetop(t, node, &node[t->hmask+1]); | ||
228 | clearhpart(t); | ||
229 | } | ||
230 | } | ||
231 | |||
201 | /* Free a table. */ | 232 | /* Free a table. */ |
202 | void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t) | 233 | void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t) |
203 | { | 234 | { |
@@ -214,7 +245,7 @@ void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t) | |||
214 | /* -- Table resizing ------------------------------------------------------ */ | 245 | /* -- Table resizing ------------------------------------------------------ */ |
215 | 246 | ||
216 | /* Resize a table to fit the new array/hash part sizes. */ | 247 | /* Resize a table to fit the new array/hash part sizes. */ |
217 | static void resizetab(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits) | 248 | void lj_tab_resize(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits) |
218 | { | 249 | { |
219 | Node *oldnode = noderef(t->node); | 250 | Node *oldnode = noderef(t->node); |
220 | uint32_t oldasize = t->asize; | 251 | uint32_t oldasize = t->asize; |
@@ -247,6 +278,9 @@ static void resizetab(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits) | |||
247 | } else { | 278 | } else { |
248 | global_State *g = G(L); | 279 | global_State *g = G(L); |
249 | setmref(t->node, &g->nilnode); | 280 | setmref(t->node, &g->nilnode); |
281 | #if LJ_GC64 | ||
282 | setmref(t->freetop, &g->nilnode); | ||
283 | #endif | ||
250 | t->hmask = 0; | 284 | t->hmask = 0; |
251 | } | 285 | } |
252 | if (asize < oldasize) { /* Array part shrinks? */ | 286 | if (asize < oldasize) { /* Array part shrinks? */ |
@@ -348,7 +382,7 @@ static void rehashtab(lua_State *L, GCtab *t, cTValue *ek) | |||
348 | asize += countint(ek, bins); | 382 | asize += countint(ek, bins); |
349 | na = bestasize(bins, &asize); | 383 | na = bestasize(bins, &asize); |
350 | total -= na; | 384 | total -= na; |
351 | resizetab(L, t, asize, hsize2hbits(total)); | 385 | lj_tab_resize(L, t, asize, hsize2hbits(total)); |
352 | } | 386 | } |
353 | 387 | ||
354 | #if LJ_HASFFI | 388 | #if LJ_HASFFI |
@@ -360,7 +394,7 @@ void lj_tab_rehash(lua_State *L, GCtab *t) | |||
360 | 394 | ||
361 | void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize) | 395 | void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize) |
362 | { | 396 | { |
363 | resizetab(L, t, nasize+1, t->hmask > 0 ? lj_fls(t->hmask)+1 : 0); | 397 | lj_tab_resize(L, t, nasize+1, t->hmask > 0 ? lj_fls(t->hmask)+1 : 0); |
364 | } | 398 | } |
365 | 399 | ||
366 | /* -- Table getters ------------------------------------------------------- */ | 400 | /* -- Table getters ------------------------------------------------------- */ |
@@ -428,7 +462,7 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) | |||
428 | Node *n = hashkey(t, key); | 462 | Node *n = hashkey(t, key); |
429 | if (!tvisnil(&n->val) || t->hmask == 0) { | 463 | if (!tvisnil(&n->val) || t->hmask == 0) { |
430 | Node *nodebase = noderef(t->node); | 464 | Node *nodebase = noderef(t->node); |
431 | Node *collide, *freenode = noderef(nodebase->freetop); | 465 | Node *collide, *freenode = getfreetop(t, nodebase); |
432 | lua_assert(freenode >= nodebase && freenode <= nodebase+t->hmask+1); | 466 | lua_assert(freenode >= nodebase && freenode <= nodebase+t->hmask+1); |
433 | do { | 467 | do { |
434 | if (freenode == nodebase) { /* No free node found? */ | 468 | if (freenode == nodebase) { /* No free node found? */ |
@@ -436,7 +470,7 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) | |||
436 | return lj_tab_set(L, t, key); /* Retry key insertion. */ | 470 | return lj_tab_set(L, t, key); /* Retry key insertion. */ |
437 | } | 471 | } |
438 | } while (!tvisnil(&(--freenode)->key)); | 472 | } while (!tvisnil(&(--freenode)->key)); |
439 | setmref(nodebase->freetop, freenode); | 473 | setfreetop(t, nodebase, freenode); |
440 | lua_assert(freenode != &G(L)->nilnode); | 474 | lua_assert(freenode != &G(L)->nilnode); |
441 | collide = hashkey(t, &n->key); | 475 | collide = hashkey(t, &n->key); |
442 | if (collide != n) { /* Colliding node not the main node? */ | 476 | if (collide != n) { /* Colliding node not the main node? */ |
diff --git a/src/lj_tab.h b/src/lj_tab.h index dc3c8dc1..597c94b2 100644 --- a/src/lj_tab.h +++ b/src/lj_tab.h | |||
@@ -34,14 +34,17 @@ static LJ_AINLINE uint32_t hashrot(uint32_t lo, uint32_t hi) | |||
34 | #define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0) | 34 | #define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0) |
35 | 35 | ||
36 | LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits); | 36 | LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits); |
37 | LJ_FUNC GCtab *lj_tab_new_ah(lua_State *L, int32_t a, int32_t h); | ||
37 | #if LJ_HASJIT | 38 | #if LJ_HASJIT |
38 | LJ_FUNC GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize); | 39 | LJ_FUNC GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize); |
39 | #endif | 40 | #endif |
40 | LJ_FUNCA GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt); | 41 | LJ_FUNCA GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt); |
42 | LJ_FUNC void LJ_FASTCALL lj_tab_clear(GCtab *t); | ||
41 | LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t); | 43 | LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t); |
42 | #if LJ_HASFFI | 44 | #if LJ_HASFFI |
43 | LJ_FUNC void lj_tab_rehash(lua_State *L, GCtab *t); | 45 | LJ_FUNC void lj_tab_rehash(lua_State *L, GCtab *t); |
44 | #endif | 46 | #endif |
47 | LJ_FUNC void lj_tab_resize(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits); | ||
45 | LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize); | 48 | LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize); |
46 | 49 | ||
47 | /* Caveat: all getters except lj_tab_get() can return NULL! */ | 50 | /* Caveat: all getters except lj_tab_get() can return NULL! */ |
@@ -53,7 +56,7 @@ LJ_FUNCA cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key); | |||
53 | /* Caveat: all setters require a write barrier for the stored value. */ | 56 | /* Caveat: all setters require a write barrier for the stored value. */ |
54 | 57 | ||
55 | LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key); | 58 | LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key); |
56 | LJ_FUNC TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key); | 59 | LJ_FUNCA TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key); |
57 | LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key); | 60 | LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key); |
58 | LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key); | 61 | LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key); |
59 | 62 | ||
diff --git a/src/lj_target.h b/src/lj_target.h index a8182596..47c960bc 100644 --- a/src/lj_target.h +++ b/src/lj_target.h | |||
@@ -55,7 +55,7 @@ typedef uint32_t RegSP; | |||
55 | /* Bitset for registers. 32 registers suffice for most architectures. | 55 | /* Bitset for registers. 32 registers suffice for most architectures. |
56 | ** Note that one set holds bits for both GPRs and FPRs. | 56 | ** Note that one set holds bits for both GPRs and FPRs. |
57 | */ | 57 | */ |
58 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS | 58 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 |
59 | typedef uint64_t RegSet; | 59 | typedef uint64_t RegSet; |
60 | #else | 60 | #else |
61 | typedef uint32_t RegSet; | 61 | typedef uint32_t RegSet; |
@@ -69,7 +69,7 @@ typedef uint32_t RegSet; | |||
69 | #define rset_set(rs, r) (rs |= RID2RSET(r)) | 69 | #define rset_set(rs, r) (rs |= RID2RSET(r)) |
70 | #define rset_clear(rs, r) (rs &= ~RID2RSET(r)) | 70 | #define rset_clear(rs, r) (rs &= ~RID2RSET(r)) |
71 | #define rset_exclude(rs, r) (rs & ~RID2RSET(r)) | 71 | #define rset_exclude(rs, r) (rs & ~RID2RSET(r)) |
72 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS | 72 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 |
73 | #define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63)) | 73 | #define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63)) |
74 | #define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs)) | 74 | #define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs)) |
75 | #else | 75 | #else |
@@ -138,6 +138,8 @@ typedef uint32_t RegCost; | |||
138 | #include "lj_target_x86.h" | 138 | #include "lj_target_x86.h" |
139 | #elif LJ_TARGET_ARM | 139 | #elif LJ_TARGET_ARM |
140 | #include "lj_target_arm.h" | 140 | #include "lj_target_arm.h" |
141 | #elif LJ_TARGET_ARM64 | ||
142 | #include "lj_target_arm64.h" | ||
141 | #elif LJ_TARGET_PPC | 143 | #elif LJ_TARGET_PPC |
142 | #include "lj_target_ppc.h" | 144 | #include "lj_target_ppc.h" |
143 | #elif LJ_TARGET_MIPS | 145 | #elif LJ_TARGET_MIPS |
diff --git a/src/lj_target_arm.h b/src/lj_target_arm.h index 4d292dc9..48e50fe9 100644 --- a/src/lj_target_arm.h +++ b/src/lj_target_arm.h | |||
@@ -243,10 +243,6 @@ typedef enum ARMIns { | |||
243 | ARMI_VCVT_S32_F64 = 0xeebd0bc0, | 243 | ARMI_VCVT_S32_F64 = 0xeebd0bc0, |
244 | ARMI_VCVT_U32_F32 = 0xeebc0ac0, | 244 | ARMI_VCVT_U32_F32 = 0xeebc0ac0, |
245 | ARMI_VCVT_U32_F64 = 0xeebc0bc0, | 245 | ARMI_VCVT_U32_F64 = 0xeebc0bc0, |
246 | ARMI_VCVTR_S32_F32 = 0xeebd0a40, | ||
247 | ARMI_VCVTR_S32_F64 = 0xeebd0b40, | ||
248 | ARMI_VCVTR_U32_F32 = 0xeebc0a40, | ||
249 | ARMI_VCVTR_U32_F64 = 0xeebc0b40, | ||
250 | ARMI_VCVT_F32_S32 = 0xeeb80ac0, | 246 | ARMI_VCVT_F32_S32 = 0xeeb80ac0, |
251 | ARMI_VCVT_F64_S32 = 0xeeb80bc0, | 247 | ARMI_VCVT_F64_S32 = 0xeeb80bc0, |
252 | ARMI_VCVT_F32_U32 = 0xeeb80a40, | 248 | ARMI_VCVT_F32_U32 = 0xeeb80a40, |
diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h new file mode 100644 index 00000000..d729e178 --- /dev/null +++ b/src/lj_target_arm64.h | |||
@@ -0,0 +1,332 @@ | |||
1 | /* | ||
2 | ** Definitions for ARM64 CPUs. | ||
3 | ** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_TARGET_ARM64_H | ||
7 | #define _LJ_TARGET_ARM64_H | ||
8 | |||
9 | /* -- Registers IDs ------------------------------------------------------- */ | ||
10 | |||
11 | #define GPRDEF(_) \ | ||
12 | _(X0) _(X1) _(X2) _(X3) _(X4) _(X5) _(X6) _(X7) \ | ||
13 | _(X8) _(X9) _(X10) _(X11) _(X12) _(X13) _(X14) _(X15) \ | ||
14 | _(X16) _(X17) _(X18) _(X19) _(X20) _(X21) _(X22) _(X23) \ | ||
15 | _(X24) _(X25) _(X26) _(X27) _(X28) _(FP) _(LR) _(SP) | ||
16 | #define FPRDEF(_) \ | ||
17 | _(D0) _(D1) _(D2) _(D3) _(D4) _(D5) _(D6) _(D7) \ | ||
18 | _(D8) _(D9) _(D10) _(D11) _(D12) _(D13) _(D14) _(D15) \ | ||
19 | _(D16) _(D17) _(D18) _(D19) _(D20) _(D21) _(D22) _(D23) \ | ||
20 | _(D24) _(D25) _(D26) _(D27) _(D28) _(D29) _(D30) _(D31) | ||
21 | #define VRIDDEF(_) | ||
22 | |||
23 | #define RIDENUM(name) RID_##name, | ||
24 | |||
25 | enum { | ||
26 | GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ | ||
27 | FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ | ||
28 | RID_MAX, | ||
29 | RID_TMP = RID_LR, | ||
30 | RID_ZERO = RID_SP, | ||
31 | |||
32 | /* Calling conventions. */ | ||
33 | RID_RET = RID_X0, | ||
34 | RID_FPRET = RID_D0, | ||
35 | |||
36 | /* These definitions must match with the *.dasc file(s): */ | ||
37 | RID_BASE = RID_X19, /* Interpreter BASE. */ | ||
38 | RID_LPC = RID_X21, /* Interpreter PC. */ | ||
39 | RID_GL = RID_X22, /* Interpreter GL. */ | ||
40 | RID_LREG = RID_X23, /* Interpreter L. */ | ||
41 | |||
42 | /* Register ranges [min, max) and number of registers. */ | ||
43 | RID_MIN_GPR = RID_X0, | ||
44 | RID_MAX_GPR = RID_SP+1, | ||
45 | RID_MIN_FPR = RID_MAX_GPR, | ||
46 | RID_MAX_FPR = RID_D31+1, | ||
47 | RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, | ||
48 | RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR | ||
49 | }; | ||
50 | |||
51 | #define RID_NUM_KREF RID_NUM_GPR | ||
52 | #define RID_MIN_KREF RID_X0 | ||
53 | |||
54 | /* -- Register sets ------------------------------------------------------- */ | ||
55 | |||
56 | /* Make use of all registers, except for x18, fp, lr and sp. */ | ||
57 | #define RSET_FIXED \ | ||
58 | (RID2RSET(RID_X18)|RID2RSET(RID_FP)|RID2RSET(RID_LR)|RID2RSET(RID_SP)|\ | ||
59 | RID2RSET(RID_GL)) | ||
60 | #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) | ||
61 | #define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) | ||
62 | #define RSET_ALL (RSET_GPR|RSET_FPR) | ||
63 | #define RSET_INIT RSET_ALL | ||
64 | |||
65 | /* lr is an implicit scratch register. */ | ||
66 | #define RSET_SCRATCH_GPR (RSET_RANGE(RID_X0, RID_X17+1)) | ||
67 | #define RSET_SCRATCH_FPR \ | ||
68 | (RSET_RANGE(RID_D0, RID_D7+1)|RSET_RANGE(RID_D16, RID_D31+1)) | ||
69 | #define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) | ||
70 | #define REGARG_FIRSTGPR RID_X0 | ||
71 | #define REGARG_LASTGPR RID_X7 | ||
72 | #define REGARG_NUMGPR 8 | ||
73 | #define REGARG_FIRSTFPR RID_D0 | ||
74 | #define REGARG_LASTFPR RID_D7 | ||
75 | #define REGARG_NUMFPR 8 | ||
76 | |||
77 | /* -- Spill slots --------------------------------------------------------- */ | ||
78 | |||
79 | /* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. | ||
80 | ** | ||
81 | ** SPS_FIXED: Available fixed spill slots in interpreter frame. | ||
82 | ** This definition must match with the vm_arm64.dasc file. | ||
83 | ** Pre-allocate some slots to avoid sp adjust in every root trace. | ||
84 | ** | ||
85 | ** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots. | ||
86 | */ | ||
87 | #define SPS_FIXED 4 | ||
88 | #define SPS_FIRST 2 | ||
89 | |||
90 | #define SPOFS_TMP 0 | ||
91 | |||
92 | #define sps_scale(slot) (4 * (int32_t)(slot)) | ||
93 | #define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3) | ||
94 | |||
95 | /* -- Exit state ---------------------------------------------------------- */ | ||
96 | |||
97 | /* This definition must match with the *.dasc file(s). */ | ||
98 | typedef struct { | ||
99 | lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ | ||
100 | intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ | ||
101 | int32_t spill[256]; /* Spill slots. */ | ||
102 | } ExitState; | ||
103 | |||
104 | /* Highest exit + 1 indicates stack check. */ | ||
105 | #define EXITSTATE_CHECKEXIT 1 | ||
106 | |||
107 | /* Return the address of a per-trace exit stub. */ | ||
108 | static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) | ||
109 | { | ||
110 | while (*p == (LJ_LE ? 0xd503201f : 0x1f2003d5)) p++; /* Skip A64I_NOP. */ | ||
111 | return p + 3 + exitno; | ||
112 | } | ||
113 | /* Avoid dependence on lj_jit.h if only including lj_target.h. */ | ||
114 | #define exitstub_trace_addr(T, exitno) \ | ||
115 | exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode), (exitno)) | ||
116 | |||
117 | /* -- Instructions -------------------------------------------------------- */ | ||
118 | |||
119 | /* ARM64 instructions are always little-endian. Swap for ARM64BE. */ | ||
120 | #if LJ_BE | ||
121 | #define A64I_LE(x) (lj_bswap(x)) | ||
122 | #else | ||
123 | #define A64I_LE(x) (x) | ||
124 | #endif | ||
125 | |||
126 | /* Instruction fields. */ | ||
127 | #define A64F_D(r) (r) | ||
128 | #define A64F_N(r) ((r) << 5) | ||
129 | #define A64F_A(r) ((r) << 10) | ||
130 | #define A64F_M(r) ((r) << 16) | ||
131 | #define A64F_IMMS(x) ((x) << 10) | ||
132 | #define A64F_IMMR(x) ((x) << 16) | ||
133 | #define A64F_U16(x) ((x) << 5) | ||
134 | #define A64F_U12(x) ((x) << 10) | ||
135 | #define A64F_S26(x) (((uint32_t)(x) & 0x03ffffffu)) | ||
136 | #define A64F_S19(x) (((uint32_t)(x) & 0x7ffffu) << 5) | ||
137 | #define A64F_S14(x) (((uint32_t)(x) & 0x3fffu) << 5) | ||
138 | #define A64F_S9(x) ((x) << 12) | ||
139 | #define A64F_BIT(x) ((x) << 19) | ||
140 | #define A64F_SH(sh, x) (((sh) << 22) | ((x) << 10)) | ||
141 | #define A64F_EX(ex) (A64I_EX | ((ex) << 13)) | ||
142 | #define A64F_EXSH(ex,x) (A64I_EX | ((ex) << 13) | ((x) << 10)) | ||
143 | #define A64F_FP8(x) ((x) << 13) | ||
144 | #define A64F_CC(cc) ((cc) << 12) | ||
145 | #define A64F_LSL16(x) (((x) / 16) << 21) | ||
146 | #define A64F_BSH(sh) ((sh) << 10) | ||
147 | |||
148 | /* Check for valid field range. */ | ||
149 | #define A64F_S_OK(x, b) ((((x) + (1 << (b-1))) >> (b)) == 0) | ||
150 | |||
151 | typedef enum A64Ins { | ||
152 | A64I_S = 0x20000000, | ||
153 | A64I_X = 0x80000000, | ||
154 | A64I_EX = 0x00200000, | ||
155 | A64I_ON = 0x00200000, | ||
156 | A64I_K12 = 0x1a000000, | ||
157 | A64I_K13 = 0x18000000, | ||
158 | A64I_LS_U = 0x01000000, | ||
159 | A64I_LS_S = 0x00800000, | ||
160 | A64I_LS_R = 0x01200800, | ||
161 | A64I_LS_SH = 0x00001000, | ||
162 | A64I_LS_UXTWx = 0x00004000, | ||
163 | A64I_LS_SXTWx = 0x0000c000, | ||
164 | A64I_LS_SXTXx = 0x0000e000, | ||
165 | A64I_LS_LSLx = 0x00006000, | ||
166 | |||
167 | A64I_ADDw = 0x0b000000, | ||
168 | A64I_ADDx = 0x8b000000, | ||
169 | A64I_ADDSw = 0x2b000000, | ||
170 | A64I_ADDSx = 0xab000000, | ||
171 | A64I_NEGw = 0x4b0003e0, | ||
172 | A64I_NEGx = 0xcb0003e0, | ||
173 | A64I_SUBw = 0x4b000000, | ||
174 | A64I_SUBx = 0xcb000000, | ||
175 | A64I_SUBSw = 0x6b000000, | ||
176 | A64I_SUBSx = 0xeb000000, | ||
177 | |||
178 | A64I_MULw = 0x1b007c00, | ||
179 | A64I_MULx = 0x9b007c00, | ||
180 | A64I_SMULL = 0x9b207c00, | ||
181 | |||
182 | A64I_ANDw = 0x0a000000, | ||
183 | A64I_ANDx = 0x8a000000, | ||
184 | A64I_ANDSw = 0x6a000000, | ||
185 | A64I_ANDSx = 0xea000000, | ||
186 | A64I_EORw = 0x4a000000, | ||
187 | A64I_EORx = 0xca000000, | ||
188 | A64I_ORRw = 0x2a000000, | ||
189 | A64I_ORRx = 0xaa000000, | ||
190 | A64I_TSTw = 0x6a00001f, | ||
191 | A64I_TSTx = 0xea00001f, | ||
192 | |||
193 | A64I_CMPw = 0x6b00001f, | ||
194 | A64I_CMPx = 0xeb00001f, | ||
195 | A64I_CMNw = 0x2b00001f, | ||
196 | A64I_CMNx = 0xab00001f, | ||
197 | A64I_CCMPw = 0x7a400000, | ||
198 | A64I_CCMPx = 0xfa400000, | ||
199 | A64I_CSELw = 0x1a800000, | ||
200 | A64I_CSELx = 0x9a800000, | ||
201 | |||
202 | A64I_ASRw = 0x13007c00, | ||
203 | A64I_ASRx = 0x9340fc00, | ||
204 | A64I_LSLx = 0xd3400000, | ||
205 | A64I_LSRx = 0xd340fc00, | ||
206 | A64I_SHRw = 0x1ac02000, | ||
207 | A64I_SHRx = 0x9ac02000, /* lsl/lsr/asr/ror x0, x0, x0 */ | ||
208 | A64I_REVw = 0x5ac00800, | ||
209 | A64I_REVx = 0xdac00c00, | ||
210 | |||
211 | A64I_EXTRw = 0x13800000, | ||
212 | A64I_EXTRx = 0x93c00000, | ||
213 | A64I_SBFMw = 0x13000000, | ||
214 | A64I_SBFMx = 0x93400000, | ||
215 | A64I_SXTBw = 0x13001c00, | ||
216 | A64I_SXTHw = 0x13003c00, | ||
217 | A64I_SXTW = 0x93407c00, | ||
218 | A64I_UBFMw = 0x53000000, | ||
219 | A64I_UBFMx = 0xd3400000, | ||
220 | A64I_UXTBw = 0x53001c00, | ||
221 | A64I_UXTHw = 0x53003c00, | ||
222 | |||
223 | A64I_MOVw = 0x2a0003e0, | ||
224 | A64I_MOVx = 0xaa0003e0, | ||
225 | A64I_MVNw = 0x2a2003e0, | ||
226 | A64I_MVNx = 0xaa2003e0, | ||
227 | A64I_MOVKw = 0x72800000, | ||
228 | A64I_MOVKx = 0xf2800000, | ||
229 | A64I_MOVZw = 0x52800000, | ||
230 | A64I_MOVZx = 0xd2800000, | ||
231 | A64I_MOVNw = 0x12800000, | ||
232 | A64I_MOVNx = 0x92800000, | ||
233 | |||
234 | A64I_LDRB = 0x39400000, | ||
235 | A64I_LDRH = 0x79400000, | ||
236 | A64I_LDRw = 0xb9400000, | ||
237 | A64I_LDRx = 0xf9400000, | ||
238 | A64I_LDRLw = 0x18000000, | ||
239 | A64I_LDRLx = 0x58000000, | ||
240 | A64I_STRB = 0x39000000, | ||
241 | A64I_STRH = 0x79000000, | ||
242 | A64I_STRw = 0xb9000000, | ||
243 | A64I_STRx = 0xf9000000, | ||
244 | A64I_STPw = 0x29000000, | ||
245 | A64I_STPx = 0xa9000000, | ||
246 | A64I_LDPw = 0x29400000, | ||
247 | A64I_LDPx = 0xa9400000, | ||
248 | |||
249 | A64I_B = 0x14000000, | ||
250 | A64I_BCC = 0x54000000, | ||
251 | A64I_BL = 0x94000000, | ||
252 | A64I_BR = 0xd61f0000, | ||
253 | A64I_BLR = 0xd63f0000, | ||
254 | A64I_TBZ = 0x36000000, | ||
255 | A64I_TBNZ = 0x37000000, | ||
256 | A64I_CBZ = 0x34000000, | ||
257 | A64I_CBNZ = 0x35000000, | ||
258 | |||
259 | A64I_NOP = 0xd503201f, | ||
260 | |||
261 | /* FP */ | ||
262 | A64I_FADDd = 0x1e602800, | ||
263 | A64I_FSUBd = 0x1e603800, | ||
264 | A64I_FMADDd = 0x1f400000, | ||
265 | A64I_FMSUBd = 0x1f408000, | ||
266 | A64I_FNMADDd = 0x1f600000, | ||
267 | A64I_FNMSUBd = 0x1f608000, | ||
268 | A64I_FMULd = 0x1e600800, | ||
269 | A64I_FDIVd = 0x1e601800, | ||
270 | A64I_FNEGd = 0x1e614000, | ||
271 | A64I_FABS = 0x1e60c000, | ||
272 | A64I_FSQRTd = 0x1e61c000, | ||
273 | A64I_LDRs = 0xbd400000, | ||
274 | A64I_LDRd = 0xfd400000, | ||
275 | A64I_STRs = 0xbd000000, | ||
276 | A64I_STRd = 0xfd000000, | ||
277 | A64I_LDPs = 0x2d400000, | ||
278 | A64I_LDPd = 0x6d400000, | ||
279 | A64I_STPs = 0x2d000000, | ||
280 | A64I_STPd = 0x6d000000, | ||
281 | A64I_FCMPd = 0x1e602000, | ||
282 | A64I_FCMPZd = 0x1e602008, | ||
283 | A64I_FCSELd = 0x1e600c00, | ||
284 | A64I_FRINTMd = 0x1e654000, | ||
285 | A64I_FRINTPd = 0x1e64c000, | ||
286 | A64I_FRINTZd = 0x1e65c000, | ||
287 | |||
288 | A64I_FCVT_F32_F64 = 0x1e624000, | ||
289 | A64I_FCVT_F64_F32 = 0x1e22c000, | ||
290 | A64I_FCVT_F32_S32 = 0x1e220000, | ||
291 | A64I_FCVT_F64_S32 = 0x1e620000, | ||
292 | A64I_FCVT_F32_U32 = 0x1e230000, | ||
293 | A64I_FCVT_F64_U32 = 0x1e630000, | ||
294 | A64I_FCVT_F32_S64 = 0x9e220000, | ||
295 | A64I_FCVT_F64_S64 = 0x9e620000, | ||
296 | A64I_FCVT_F32_U64 = 0x9e230000, | ||
297 | A64I_FCVT_F64_U64 = 0x9e630000, | ||
298 | A64I_FCVT_S32_F64 = 0x1e780000, | ||
299 | A64I_FCVT_S32_F32 = 0x1e380000, | ||
300 | A64I_FCVT_U32_F64 = 0x1e790000, | ||
301 | A64I_FCVT_U32_F32 = 0x1e390000, | ||
302 | A64I_FCVT_S64_F64 = 0x9e780000, | ||
303 | A64I_FCVT_S64_F32 = 0x9e380000, | ||
304 | A64I_FCVT_U64_F64 = 0x9e790000, | ||
305 | A64I_FCVT_U64_F32 = 0x9e390000, | ||
306 | |||
307 | A64I_FMOV_S = 0x1e204000, | ||
308 | A64I_FMOV_D = 0x1e604000, | ||
309 | A64I_FMOV_R_S = 0x1e260000, | ||
310 | A64I_FMOV_S_R = 0x1e270000, | ||
311 | A64I_FMOV_R_D = 0x9e660000, | ||
312 | A64I_FMOV_D_R = 0x9e670000, | ||
313 | A64I_FMOV_DI = 0x1e601000, | ||
314 | } A64Ins; | ||
315 | |||
316 | typedef enum A64Shift { | ||
317 | A64SH_LSL, A64SH_LSR, A64SH_ASR, A64SH_ROR | ||
318 | } A64Shift; | ||
319 | |||
320 | typedef enum A64Extend { | ||
321 | A64EX_UXTB, A64EX_UXTH, A64EX_UXTW, A64EX_UXTX, | ||
322 | A64EX_SXTB, A64EX_SXTH, A64EX_SXTW, A64EX_SXTX, | ||
323 | } A64Extend; | ||
324 | |||
325 | /* ARM condition codes. */ | ||
326 | typedef enum A64CC { | ||
327 | CC_EQ, CC_NE, CC_CS, CC_CC, CC_MI, CC_PL, CC_VS, CC_VC, | ||
328 | CC_HI, CC_LS, CC_GE, CC_LT, CC_GT, CC_LE, CC_AL, | ||
329 | CC_HS = CC_CS, CC_LO = CC_CC | ||
330 | } A64CC; | ||
331 | |||
332 | #endif | ||
diff --git a/src/lj_target_mips.h b/src/lj_target_mips.h index 4bbdc743..6e436967 100644 --- a/src/lj_target_mips.h +++ b/src/lj_target_mips.h | |||
@@ -13,11 +13,15 @@ | |||
13 | _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15) \ | 13 | _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15) \ |
14 | _(R16) _(R17) _(R18) _(R19) _(R20) _(R21) _(R22) _(R23) \ | 14 | _(R16) _(R17) _(R18) _(R19) _(R20) _(R21) _(R22) _(R23) \ |
15 | _(R24) _(R25) _(SYS1) _(SYS2) _(R28) _(SP) _(R30) _(RA) | 15 | _(R24) _(R25) _(SYS1) _(SYS2) _(R28) _(SP) _(R30) _(RA) |
16 | #if LJ_SOFTFP | ||
17 | #define FPRDEF(_) | ||
18 | #else | ||
16 | #define FPRDEF(_) \ | 19 | #define FPRDEF(_) \ |
17 | _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \ | 20 | _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \ |
18 | _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \ | 21 | _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \ |
19 | _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \ | 22 | _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \ |
20 | _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31) | 23 | _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31) |
24 | #endif | ||
21 | #define VRIDDEF(_) | 25 | #define VRIDDEF(_) |
22 | 26 | ||
23 | #define RIDENUM(name) RID_##name, | 27 | #define RIDENUM(name) RID_##name, |
@@ -39,7 +43,11 @@ enum { | |||
39 | RID_RETHI = RID_R2, | 43 | RID_RETHI = RID_R2, |
40 | RID_RETLO = RID_R3, | 44 | RID_RETLO = RID_R3, |
41 | #endif | 45 | #endif |
46 | #if LJ_SOFTFP | ||
47 | RID_FPRET = RID_R2, | ||
48 | #else | ||
42 | RID_FPRET = RID_F0, | 49 | RID_FPRET = RID_F0, |
50 | #endif | ||
43 | RID_CFUNCADDR = RID_R25, | 51 | RID_CFUNCADDR = RID_R25, |
44 | 52 | ||
45 | /* These definitions must match with the *.dasc file(s): */ | 53 | /* These definitions must match with the *.dasc file(s): */ |
@@ -52,8 +60,12 @@ enum { | |||
52 | /* Register ranges [min, max) and number of registers. */ | 60 | /* Register ranges [min, max) and number of registers. */ |
53 | RID_MIN_GPR = RID_R0, | 61 | RID_MIN_GPR = RID_R0, |
54 | RID_MAX_GPR = RID_RA+1, | 62 | RID_MAX_GPR = RID_RA+1, |
55 | RID_MIN_FPR = RID_F0, | 63 | RID_MIN_FPR = RID_MAX_GPR, |
64 | #if LJ_SOFTFP | ||
65 | RID_MAX_FPR = RID_MIN_FPR, | ||
66 | #else | ||
56 | RID_MAX_FPR = RID_F31+1, | 67 | RID_MAX_FPR = RID_F31+1, |
68 | #endif | ||
57 | RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, | 69 | RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, |
58 | RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR /* Only even regs are used. */ | 70 | RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR /* Only even regs are used. */ |
59 | }; | 71 | }; |
@@ -68,28 +80,60 @@ enum { | |||
68 | (RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_SP)|\ | 80 | (RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_SP)|\ |
69 | RID2RSET(RID_SYS1)|RID2RSET(RID_SYS2)|RID2RSET(RID_JGL)|RID2RSET(RID_GP)) | 81 | RID2RSET(RID_SYS1)|RID2RSET(RID_SYS2)|RID2RSET(RID_JGL)|RID2RSET(RID_GP)) |
70 | #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) | 82 | #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) |
83 | #if LJ_SOFTFP | ||
84 | #define RSET_FPR 0 | ||
85 | #else | ||
86 | #if LJ_32 | ||
71 | #define RSET_FPR \ | 87 | #define RSET_FPR \ |
72 | (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\ | 88 | (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\ |
73 | RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\ | 89 | RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\ |
74 | RID2RSET(RID_F16)|RID2RSET(RID_F18)|RID2RSET(RID_F20)|RID2RSET(RID_F22)|\ | 90 | RID2RSET(RID_F16)|RID2RSET(RID_F18)|RID2RSET(RID_F20)|RID2RSET(RID_F22)|\ |
75 | RID2RSET(RID_F24)|RID2RSET(RID_F26)|RID2RSET(RID_F28)|RID2RSET(RID_F30)) | 91 | RID2RSET(RID_F24)|RID2RSET(RID_F26)|RID2RSET(RID_F28)|RID2RSET(RID_F30)) |
76 | #define RSET_ALL (RSET_GPR|RSET_FPR) | 92 | #else |
77 | #define RSET_INIT RSET_ALL | 93 | #define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) |
94 | #endif | ||
95 | #endif | ||
96 | #define RSET_ALL (RSET_GPR|RSET_FPR) | ||
97 | #define RSET_INIT RSET_ALL | ||
78 | 98 | ||
79 | #define RSET_SCRATCH_GPR \ | 99 | #define RSET_SCRATCH_GPR \ |
80 | (RSET_RANGE(RID_R1, RID_R15+1)|\ | 100 | (RSET_RANGE(RID_R1, RID_R15+1)|\ |
81 | RID2RSET(RID_R24)|RID2RSET(RID_R25)) | 101 | RID2RSET(RID_R24)|RID2RSET(RID_R25)) |
102 | #if LJ_SOFTFP | ||
103 | #define RSET_SCRATCH_FPR 0 | ||
104 | #else | ||
105 | #if LJ_32 | ||
82 | #define RSET_SCRATCH_FPR \ | 106 | #define RSET_SCRATCH_FPR \ |
83 | (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\ | 107 | (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\ |
84 | RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\ | 108 | RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\ |
85 | RID2RSET(RID_F16)|RID2RSET(RID_F18)) | 109 | RID2RSET(RID_F16)|RID2RSET(RID_F18)) |
110 | #else | ||
111 | #define RSET_SCRATCH_FPR RSET_RANGE(RID_F0, RID_F24) | ||
112 | #endif | ||
113 | #endif | ||
86 | #define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) | 114 | #define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) |
87 | #define REGARG_FIRSTGPR RID_R4 | 115 | #define REGARG_FIRSTGPR RID_R4 |
116 | #if LJ_32 | ||
88 | #define REGARG_LASTGPR RID_R7 | 117 | #define REGARG_LASTGPR RID_R7 |
89 | #define REGARG_NUMGPR 4 | 118 | #define REGARG_NUMGPR 4 |
119 | #else | ||
120 | #define REGARG_LASTGPR RID_R11 | ||
121 | #define REGARG_NUMGPR 8 | ||
122 | #endif | ||
123 | #if LJ_ABI_SOFTFP | ||
124 | #define REGARG_FIRSTFPR 0 | ||
125 | #define REGARG_LASTFPR 0 | ||
126 | #define REGARG_NUMFPR 0 | ||
127 | #else | ||
90 | #define REGARG_FIRSTFPR RID_F12 | 128 | #define REGARG_FIRSTFPR RID_F12 |
129 | #if LJ_32 | ||
91 | #define REGARG_LASTFPR RID_F14 | 130 | #define REGARG_LASTFPR RID_F14 |
92 | #define REGARG_NUMFPR 2 | 131 | #define REGARG_NUMFPR 2 |
132 | #else | ||
133 | #define REGARG_LASTFPR RID_F19 | ||
134 | #define REGARG_NUMFPR 8 | ||
135 | #endif | ||
136 | #endif | ||
93 | 137 | ||
94 | /* -- Spill slots --------------------------------------------------------- */ | 138 | /* -- Spill slots --------------------------------------------------------- */ |
95 | 139 | ||
@@ -100,7 +144,11 @@ enum { | |||
100 | ** | 144 | ** |
101 | ** SPS_FIRST: First spill slot for general use. | 145 | ** SPS_FIRST: First spill slot for general use. |
102 | */ | 146 | */ |
147 | #if LJ_32 | ||
103 | #define SPS_FIXED 5 | 148 | #define SPS_FIXED 5 |
149 | #else | ||
150 | #define SPS_FIXED 4 | ||
151 | #endif | ||
104 | #define SPS_FIRST 4 | 152 | #define SPS_FIRST 4 |
105 | 153 | ||
106 | #define SPOFS_TMP 0 | 154 | #define SPOFS_TMP 0 |
@@ -112,8 +160,10 @@ enum { | |||
112 | 160 | ||
113 | /* This definition must match with the *.dasc file(s). */ | 161 | /* This definition must match with the *.dasc file(s). */ |
114 | typedef struct { | 162 | typedef struct { |
163 | #if !LJ_SOFTFP | ||
115 | lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ | 164 | lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ |
116 | int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ | 165 | #endif |
166 | intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ | ||
117 | int32_t spill[256]; /* Spill slots. */ | 167 | int32_t spill[256]; /* Spill slots. */ |
118 | } ExitState; | 168 | } ExitState; |
119 | 169 | ||
@@ -142,52 +192,85 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p) | |||
142 | #define MIPSF_F(r) ((r) << 6) | 192 | #define MIPSF_F(r) ((r) << 6) |
143 | #define MIPSF_A(n) ((n) << 6) | 193 | #define MIPSF_A(n) ((n) << 6) |
144 | #define MIPSF_M(n) ((n) << 11) | 194 | #define MIPSF_M(n) ((n) << 11) |
195 | #define MIPSF_L(n) ((n) << 6) | ||
145 | 196 | ||
146 | typedef enum MIPSIns { | 197 | typedef enum MIPSIns { |
198 | MIPSI_D = 0x38, | ||
199 | MIPSI_DV = 0x10, | ||
200 | MIPSI_D32 = 0x3c, | ||
147 | /* Integer instructions. */ | 201 | /* Integer instructions. */ |
148 | MIPSI_MOVE = 0x00000021, | 202 | MIPSI_MOVE = 0x00000025, |
149 | MIPSI_NOP = 0x00000000, | 203 | MIPSI_NOP = 0x00000000, |
150 | 204 | ||
151 | MIPSI_LI = 0x24000000, | 205 | MIPSI_LI = 0x24000000, |
152 | MIPSI_LU = 0x34000000, | 206 | MIPSI_LU = 0x34000000, |
153 | MIPSI_LUI = 0x3c000000, | 207 | MIPSI_LUI = 0x3c000000, |
154 | 208 | ||
155 | MIPSI_ADDIU = 0x24000000, | 209 | MIPSI_AND = 0x00000024, |
156 | MIPSI_ANDI = 0x30000000, | 210 | MIPSI_ANDI = 0x30000000, |
211 | MIPSI_OR = 0x00000025, | ||
157 | MIPSI_ORI = 0x34000000, | 212 | MIPSI_ORI = 0x34000000, |
213 | MIPSI_XOR = 0x00000026, | ||
158 | MIPSI_XORI = 0x38000000, | 214 | MIPSI_XORI = 0x38000000, |
215 | MIPSI_NOR = 0x00000027, | ||
216 | |||
217 | MIPSI_SLT = 0x0000002a, | ||
218 | MIPSI_SLTU = 0x0000002b, | ||
159 | MIPSI_SLTI = 0x28000000, | 219 | MIPSI_SLTI = 0x28000000, |
160 | MIPSI_SLTIU = 0x2c000000, | 220 | MIPSI_SLTIU = 0x2c000000, |
161 | 221 | ||
162 | MIPSI_ADDU = 0x00000021, | 222 | MIPSI_ADDU = 0x00000021, |
223 | MIPSI_ADDIU = 0x24000000, | ||
224 | MIPSI_SUB = 0x00000022, | ||
163 | MIPSI_SUBU = 0x00000023, | 225 | MIPSI_SUBU = 0x00000023, |
226 | |||
227 | #if !LJ_TARGET_MIPSR6 | ||
164 | MIPSI_MUL = 0x70000002, | 228 | MIPSI_MUL = 0x70000002, |
165 | MIPSI_AND = 0x00000024, | 229 | MIPSI_DIV = 0x0000001a, |
166 | MIPSI_OR = 0x00000025, | 230 | MIPSI_DIVU = 0x0000001b, |
167 | MIPSI_XOR = 0x00000026, | 231 | |
168 | MIPSI_NOR = 0x00000027, | ||
169 | MIPSI_SLT = 0x0000002a, | ||
170 | MIPSI_SLTU = 0x0000002b, | ||
171 | MIPSI_MOVZ = 0x0000000a, | 232 | MIPSI_MOVZ = 0x0000000a, |
172 | MIPSI_MOVN = 0x0000000b, | 233 | MIPSI_MOVN = 0x0000000b, |
234 | MIPSI_MFHI = 0x00000010, | ||
235 | MIPSI_MFLO = 0x00000012, | ||
236 | MIPSI_MULT = 0x00000018, | ||
237 | #else | ||
238 | MIPSI_MUL = 0x00000098, | ||
239 | MIPSI_MUH = 0x000000d8, | ||
240 | MIPSI_DIV = 0x0000009a, | ||
241 | MIPSI_DIVU = 0x0000009b, | ||
242 | |||
243 | MIPSI_SELEQZ = 0x00000035, | ||
244 | MIPSI_SELNEZ = 0x00000037, | ||
245 | #endif | ||
173 | 246 | ||
174 | MIPSI_SLL = 0x00000000, | 247 | MIPSI_SLL = 0x00000000, |
175 | MIPSI_SRL = 0x00000002, | 248 | MIPSI_SRL = 0x00000002, |
176 | MIPSI_SRA = 0x00000003, | 249 | MIPSI_SRA = 0x00000003, |
177 | MIPSI_ROTR = 0x00200002, /* MIPS32R2 */ | 250 | MIPSI_ROTR = 0x00200002, /* MIPSXXR2 */ |
251 | MIPSI_DROTR = 0x0020003a, | ||
252 | MIPSI_DROTR32 = 0x0020003e, | ||
178 | MIPSI_SLLV = 0x00000004, | 253 | MIPSI_SLLV = 0x00000004, |
179 | MIPSI_SRLV = 0x00000006, | 254 | MIPSI_SRLV = 0x00000006, |
180 | MIPSI_SRAV = 0x00000007, | 255 | MIPSI_SRAV = 0x00000007, |
181 | MIPSI_ROTRV = 0x00000046, /* MIPS32R2 */ | 256 | MIPSI_ROTRV = 0x00000046, /* MIPSXXR2 */ |
257 | MIPSI_DROTRV = 0x00000056, | ||
182 | 258 | ||
183 | MIPSI_SEB = 0x7c000420, /* MIPS32R2 */ | 259 | MIPSI_SEB = 0x7c000420, /* MIPSXXR2 */ |
184 | MIPSI_SEH = 0x7c000620, /* MIPS32R2 */ | 260 | MIPSI_SEH = 0x7c000620, /* MIPSXXR2 */ |
185 | MIPSI_WSBH = 0x7c0000a0, /* MIPS32R2 */ | 261 | MIPSI_WSBH = 0x7c0000a0, /* MIPSXXR2 */ |
262 | MIPSI_DSBH = 0x7c0000a4, | ||
186 | 263 | ||
187 | MIPSI_B = 0x10000000, | 264 | MIPSI_B = 0x10000000, |
188 | MIPSI_J = 0x08000000, | 265 | MIPSI_J = 0x08000000, |
189 | MIPSI_JAL = 0x0c000000, | 266 | MIPSI_JAL = 0x0c000000, |
267 | #if !LJ_TARGET_MIPSR6 | ||
268 | MIPSI_JALX = 0x74000000, | ||
190 | MIPSI_JR = 0x00000008, | 269 | MIPSI_JR = 0x00000008, |
270 | #else | ||
271 | MIPSI_JR = 0x00000009, | ||
272 | MIPSI_BALC = 0xe8000000, | ||
273 | #endif | ||
191 | MIPSI_JALR = 0x0000f809, | 274 | MIPSI_JALR = 0x0000f809, |
192 | 275 | ||
193 | MIPSI_BEQ = 0x10000000, | 276 | MIPSI_BEQ = 0x10000000, |
@@ -199,7 +282,9 @@ typedef enum MIPSIns { | |||
199 | 282 | ||
200 | /* Load/store instructions. */ | 283 | /* Load/store instructions. */ |
201 | MIPSI_LW = 0x8c000000, | 284 | MIPSI_LW = 0x8c000000, |
285 | MIPSI_LD = 0xdc000000, | ||
202 | MIPSI_SW = 0xac000000, | 286 | MIPSI_SW = 0xac000000, |
287 | MIPSI_SD = 0xfc000000, | ||
203 | MIPSI_LB = 0x80000000, | 288 | MIPSI_LB = 0x80000000, |
204 | MIPSI_SB = 0xa0000000, | 289 | MIPSI_SB = 0xa0000000, |
205 | MIPSI_LH = 0x84000000, | 290 | MIPSI_LH = 0x84000000, |
@@ -211,11 +296,69 @@ typedef enum MIPSIns { | |||
211 | MIPSI_LDC1 = 0xd4000000, | 296 | MIPSI_LDC1 = 0xd4000000, |
212 | MIPSI_SDC1 = 0xf4000000, | 297 | MIPSI_SDC1 = 0xf4000000, |
213 | 298 | ||
299 | /* MIPS64 instructions. */ | ||
300 | MIPSI_DADD = 0x0000002c, | ||
301 | MIPSI_DADDU = 0x0000002d, | ||
302 | MIPSI_DADDIU = 0x64000000, | ||
303 | MIPSI_DSUB = 0x0000002e, | ||
304 | MIPSI_DSUBU = 0x0000002f, | ||
305 | #if !LJ_TARGET_MIPSR6 | ||
306 | MIPSI_DDIV = 0x0000001e, | ||
307 | MIPSI_DDIVU = 0x0000001f, | ||
308 | MIPSI_DMULT = 0x0000001c, | ||
309 | MIPSI_DMULTU = 0x0000001d, | ||
310 | #else | ||
311 | MIPSI_DDIV = 0x0000009e, | ||
312 | MIPSI_DMOD = 0x000000de, | ||
313 | MIPSI_DDIVU = 0x0000009f, | ||
314 | MIPSI_DMODU = 0x000000df, | ||
315 | MIPSI_DMUL = 0x0000009c, | ||
316 | MIPSI_DMUH = 0x000000dc, | ||
317 | #endif | ||
318 | |||
319 | MIPSI_DSLL = 0x00000038, | ||
320 | MIPSI_DSRL = 0x0000003a, | ||
321 | MIPSI_DSLLV = 0x00000014, | ||
322 | MIPSI_DSRLV = 0x00000016, | ||
323 | MIPSI_DSRA = 0x0000003b, | ||
324 | MIPSI_DSRAV = 0x00000017, | ||
325 | MIPSI_DSRA32 = 0x0000003f, | ||
326 | MIPSI_DSLL32 = 0x0000003c, | ||
327 | MIPSI_DSRL32 = 0x0000003e, | ||
328 | MIPSI_DSHD = 0x7c000164, | ||
329 | |||
330 | MIPSI_AADDU = LJ_32 ? MIPSI_ADDU : MIPSI_DADDU, | ||
331 | MIPSI_AADDIU = LJ_32 ? MIPSI_ADDIU : MIPSI_DADDIU, | ||
332 | MIPSI_ASUBU = LJ_32 ? MIPSI_SUBU : MIPSI_DSUBU, | ||
333 | MIPSI_AL = LJ_32 ? MIPSI_LW : MIPSI_LD, | ||
334 | MIPSI_AS = LJ_32 ? MIPSI_SW : MIPSI_SD, | ||
335 | #if LJ_TARGET_MIPSR6 | ||
336 | MIPSI_LSA = 0x00000005, | ||
337 | MIPSI_DLSA = 0x00000015, | ||
338 | MIPSI_ALSA = LJ_32 ? MIPSI_LSA : MIPSI_DLSA, | ||
339 | #endif | ||
340 | |||
341 | /* Extract/insert instructions. */ | ||
342 | MIPSI_DEXTM = 0x7c000001, | ||
343 | MIPSI_DEXTU = 0x7c000002, | ||
344 | MIPSI_DEXT = 0x7c000003, | ||
345 | MIPSI_DINSM = 0x7c000005, | ||
346 | MIPSI_DINSU = 0x7c000006, | ||
347 | MIPSI_DINS = 0x7c000007, | ||
348 | |||
349 | MIPSI_FLOOR_D = 0x4620000b, | ||
350 | |||
214 | /* FP instructions. */ | 351 | /* FP instructions. */ |
215 | MIPSI_MOV_S = 0x46000006, | 352 | MIPSI_MOV_S = 0x46000006, |
216 | MIPSI_MOV_D = 0x46200006, | 353 | MIPSI_MOV_D = 0x46200006, |
354 | #if !LJ_TARGET_MIPSR6 | ||
217 | MIPSI_MOVT_D = 0x46210011, | 355 | MIPSI_MOVT_D = 0x46210011, |
218 | MIPSI_MOVF_D = 0x46200011, | 356 | MIPSI_MOVF_D = 0x46200011, |
357 | #else | ||
358 | MIPSI_MIN_D = 0x4620001C, | ||
359 | MIPSI_MAX_D = 0x4620001E, | ||
360 | MIPSI_SEL_D = 0x46200010, | ||
361 | #endif | ||
219 | 362 | ||
220 | MIPSI_ABS_D = 0x46200005, | 363 | MIPSI_ABS_D = 0x46200005, |
221 | MIPSI_NEG_D = 0x46200007, | 364 | MIPSI_NEG_D = 0x46200007, |
@@ -235,23 +378,37 @@ typedef enum MIPSIns { | |||
235 | MIPSI_CVT_W_D = 0x46200024, | 378 | MIPSI_CVT_W_D = 0x46200024, |
236 | MIPSI_CVT_S_W = 0x46800020, | 379 | MIPSI_CVT_S_W = 0x46800020, |
237 | MIPSI_CVT_D_W = 0x46800021, | 380 | MIPSI_CVT_D_W = 0x46800021, |
381 | MIPSI_CVT_S_L = 0x46a00020, | ||
382 | MIPSI_CVT_D_L = 0x46a00021, | ||
238 | 383 | ||
239 | MIPSI_TRUNC_W_S = 0x4600000d, | 384 | MIPSI_TRUNC_W_S = 0x4600000d, |
240 | MIPSI_TRUNC_W_D = 0x4620000d, | 385 | MIPSI_TRUNC_W_D = 0x4620000d, |
386 | MIPSI_TRUNC_L_S = 0x46000009, | ||
387 | MIPSI_TRUNC_L_D = 0x46200009, | ||
241 | MIPSI_FLOOR_W_S = 0x4600000f, | 388 | MIPSI_FLOOR_W_S = 0x4600000f, |
242 | MIPSI_FLOOR_W_D = 0x4620000f, | 389 | MIPSI_FLOOR_W_D = 0x4620000f, |
243 | 390 | ||
244 | MIPSI_MFC1 = 0x44000000, | 391 | MIPSI_MFC1 = 0x44000000, |
245 | MIPSI_MTC1 = 0x44800000, | 392 | MIPSI_MTC1 = 0x44800000, |
393 | MIPSI_DMTC1 = 0x44a00000, | ||
394 | MIPSI_DMFC1 = 0x44200000, | ||
246 | 395 | ||
396 | #if !LJ_TARGET_MIPSR6 | ||
247 | MIPSI_BC1F = 0x45000000, | 397 | MIPSI_BC1F = 0x45000000, |
248 | MIPSI_BC1T = 0x45010000, | 398 | MIPSI_BC1T = 0x45010000, |
249 | |||
250 | MIPSI_C_EQ_D = 0x46200032, | 399 | MIPSI_C_EQ_D = 0x46200032, |
400 | MIPSI_C_OLT_S = 0x46000034, | ||
251 | MIPSI_C_OLT_D = 0x46200034, | 401 | MIPSI_C_OLT_D = 0x46200034, |
252 | MIPSI_C_ULT_D = 0x46200035, | 402 | MIPSI_C_ULT_D = 0x46200035, |
253 | MIPSI_C_OLE_D = 0x46200036, | 403 | MIPSI_C_OLE_D = 0x46200036, |
254 | MIPSI_C_ULE_D = 0x46200037, | 404 | MIPSI_C_ULE_D = 0x46200037, |
405 | #else | ||
406 | MIPSI_BC1EQZ = 0x45200000, | ||
407 | MIPSI_BC1NEZ = 0x45a00000, | ||
408 | MIPSI_CMP_EQ_D = 0x46a00002, | ||
409 | MIPSI_CMP_LT_S = 0x46800004, | ||
410 | MIPSI_CMP_LT_D = 0x46a00004, | ||
411 | #endif | ||
255 | 412 | ||
256 | } MIPSIns; | 413 | } MIPSIns; |
257 | 414 | ||
diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h index 580995d5..c7d4c229 100644 --- a/src/lj_target_ppc.h +++ b/src/lj_target_ppc.h | |||
@@ -104,7 +104,7 @@ enum { | |||
104 | /* This definition must match with the *.dasc file(s). */ | 104 | /* This definition must match with the *.dasc file(s). */ |
105 | typedef struct { | 105 | typedef struct { |
106 | lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ | 106 | lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ |
107 | int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ | 107 | intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ |
108 | int32_t spill[256]; /* Spill slots. */ | 108 | int32_t spill[256]; /* Spill slots. */ |
109 | } ExitState; | 109 | } ExitState; |
110 | 110 | ||
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index 8a96cbf2..71c930fe 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h | |||
@@ -22,7 +22,7 @@ | |||
22 | _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) | 22 | _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) |
23 | #endif | 23 | #endif |
24 | #define VRIDDEF(_) \ | 24 | #define VRIDDEF(_) \ |
25 | _(MRM) | 25 | _(MRM) _(RIP) |
26 | 26 | ||
27 | #define RIDENUM(name) RID_##name, | 27 | #define RIDENUM(name) RID_##name, |
28 | 28 | ||
@@ -31,8 +31,10 @@ enum { | |||
31 | FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ | 31 | FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ |
32 | RID_MAX, | 32 | RID_MAX, |
33 | RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */ | 33 | RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */ |
34 | RID_RIP = RID_MAX+5, /* Pseudo-id for RIP (x64 only), rm bits = 5. */ | ||
34 | 35 | ||
35 | /* Calling conventions. */ | 36 | /* Calling conventions. */ |
37 | RID_SP = RID_ESP, | ||
36 | RID_RET = RID_EAX, | 38 | RID_RET = RID_EAX, |
37 | #if LJ_64 | 39 | #if LJ_64 |
38 | RID_FPRET = RID_XMM0, | 40 | RID_FPRET = RID_XMM0, |
@@ -62,8 +64,10 @@ enum { | |||
62 | 64 | ||
63 | /* -- Register sets ------------------------------------------------------- */ | 65 | /* -- Register sets ------------------------------------------------------- */ |
64 | 66 | ||
65 | /* Make use of all registers, except the stack pointer. */ | 67 | /* Make use of all registers, except the stack pointer (and maybe DISPATCH). */ |
66 | #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR)-RID2RSET(RID_ESP)) | 68 | #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) \ |
69 | - RID2RSET(RID_ESP) \ | ||
70 | - LJ_GC64*RID2RSET(RID_DISPATCH)) | ||
67 | #define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) | 71 | #define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) |
68 | #define RSET_ALL (RSET_GPR|RSET_FPR) | 72 | #define RSET_ALL (RSET_GPR|RSET_FPR) |
69 | #define RSET_INIT RSET_ALL | 73 | #define RSET_INIT RSET_ALL |
@@ -131,7 +135,11 @@ enum { | |||
131 | #define SPS_FIXED (4*2) | 135 | #define SPS_FIXED (4*2) |
132 | #define SPS_FIRST (4*2) /* Don't use callee register save area. */ | 136 | #define SPS_FIRST (4*2) /* Don't use callee register save area. */ |
133 | #else | 137 | #else |
138 | #if LJ_GC64 | ||
139 | #define SPS_FIXED 2 | ||
140 | #else | ||
134 | #define SPS_FIXED 4 | 141 | #define SPS_FIXED 4 |
142 | #endif | ||
135 | #define SPS_FIRST 2 | 143 | #define SPS_FIRST 2 |
136 | #endif | 144 | #endif |
137 | #else | 145 | #else |
@@ -184,12 +192,18 @@ typedef struct { | |||
184 | #define XO_f20f(o) ((uint32_t)(0x0ff2fc + (0x##o<<24))) | 192 | #define XO_f20f(o) ((uint32_t)(0x0ff2fc + (0x##o<<24))) |
185 | #define XO_f30f(o) ((uint32_t)(0x0ff3fc + (0x##o<<24))) | 193 | #define XO_f30f(o) ((uint32_t)(0x0ff3fc + (0x##o<<24))) |
186 | 194 | ||
195 | #define XV_660f38(o) ((uint32_t)(0x79e2c4 + (0x##o<<24))) | ||
196 | #define XV_f20f38(o) ((uint32_t)(0x7be2c4 + (0x##o<<24))) | ||
197 | #define XV_f20f3a(o) ((uint32_t)(0x7be3c4 + (0x##o<<24))) | ||
198 | #define XV_f30f38(o) ((uint32_t)(0x7ae2c4 + (0x##o<<24))) | ||
199 | |||
187 | /* This list of x86 opcodes is not intended to be complete. Opcodes are only | 200 | /* This list of x86 opcodes is not intended to be complete. Opcodes are only |
188 | ** included when needed. Take a look at DynASM or jit.dis_x86 to see the | 201 | ** included when needed. Take a look at DynASM or jit.dis_x86 to see the |
189 | ** whole mess. | 202 | ** whole mess. |
190 | */ | 203 | */ |
191 | typedef enum { | 204 | typedef enum { |
192 | /* Fixed length opcodes. XI_* prefix. */ | 205 | /* Fixed length opcodes. XI_* prefix. */ |
206 | XI_O16 = 0x66, | ||
193 | XI_NOP = 0x90, | 207 | XI_NOP = 0x90, |
194 | XI_XCHGa = 0x90, | 208 | XI_XCHGa = 0x90, |
195 | XI_CALL = 0xe8, | 209 | XI_CALL = 0xe8, |
@@ -207,6 +221,7 @@ typedef enum { | |||
207 | XI_PUSHi8 = 0x6a, | 221 | XI_PUSHi8 = 0x6a, |
208 | XI_TESTb = 0x84, | 222 | XI_TESTb = 0x84, |
209 | XI_TEST = 0x85, | 223 | XI_TEST = 0x85, |
224 | XI_INT3 = 0xcc, | ||
210 | XI_MOVmi = 0xc7, | 225 | XI_MOVmi = 0xc7, |
211 | XI_GROUP5 = 0xff, | 226 | XI_GROUP5 = 0xff, |
212 | 227 | ||
@@ -226,7 +241,14 @@ typedef enum { | |||
226 | XI_FSCALE = 0xfdd9, | 241 | XI_FSCALE = 0xfdd9, |
227 | XI_FYL2X = 0xf1d9, | 242 | XI_FYL2X = 0xf1d9, |
228 | 243 | ||
244 | /* VEX-encoded instructions. XV_* prefix. */ | ||
245 | XV_RORX = XV_f20f3a(f0), | ||
246 | XV_SARX = XV_f30f38(f7), | ||
247 | XV_SHLX = XV_660f38(f7), | ||
248 | XV_SHRX = XV_f20f38(f7), | ||
249 | |||
229 | /* Variable-length opcodes. XO_* prefix. */ | 250 | /* Variable-length opcodes. XO_* prefix. */ |
251 | XO_OR = XO_(0b), | ||
230 | XO_MOV = XO_(8b), | 252 | XO_MOV = XO_(8b), |
231 | XO_MOVto = XO_(89), | 253 | XO_MOVto = XO_(89), |
232 | XO_MOVtow = XO_66(89), | 254 | XO_MOVtow = XO_66(89), |
@@ -277,10 +299,8 @@ typedef enum { | |||
277 | XO_ROUNDSD = 0x0b3a0ffc, /* Really 66 0f 3a 0b. See asm_fpmath. */ | 299 | XO_ROUNDSD = 0x0b3a0ffc, /* Really 66 0f 3a 0b. See asm_fpmath. */ |
278 | XO_UCOMISD = XO_660f(2e), | 300 | XO_UCOMISD = XO_660f(2e), |
279 | XO_CVTSI2SD = XO_f20f(2a), | 301 | XO_CVTSI2SD = XO_f20f(2a), |
280 | XO_CVTSD2SI = XO_f20f(2d), | ||
281 | XO_CVTTSD2SI= XO_f20f(2c), | 302 | XO_CVTTSD2SI= XO_f20f(2c), |
282 | XO_CVTSI2SS = XO_f30f(2a), | 303 | XO_CVTSI2SS = XO_f30f(2a), |
283 | XO_CVTSS2SI = XO_f30f(2d), | ||
284 | XO_CVTTSS2SI= XO_f30f(2c), | 304 | XO_CVTTSS2SI= XO_f30f(2c), |
285 | XO_CVTSS2SD = XO_f30f(5a), | 305 | XO_CVTSS2SD = XO_f30f(5a), |
286 | XO_CVTSD2SS = XO_f20f(5a), | 306 | XO_CVTSD2SS = XO_f20f(5a), |
diff --git a/src/lj_trace.c b/src/lj_trace.c index 311baa73..a43c8c4e 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c | |||
@@ -117,15 +117,26 @@ static void perftools_addtrace(GCtrace *T) | |||
117 | } | 117 | } |
118 | #endif | 118 | #endif |
119 | 119 | ||
120 | /* Allocate space for copy of trace. */ | 120 | /* Allocate space for copy of T. */ |
121 | static GCtrace *trace_save_alloc(jit_State *J) | 121 | GCtrace * LJ_FASTCALL lj_trace_alloc(lua_State *L, GCtrace *T) |
122 | { | 122 | { |
123 | size_t sztr = ((sizeof(GCtrace)+7)&~7); | 123 | size_t sztr = ((sizeof(GCtrace)+7)&~7); |
124 | size_t szins = (J->cur.nins-J->cur.nk)*sizeof(IRIns); | 124 | size_t szins = (T->nins-T->nk)*sizeof(IRIns); |
125 | size_t sz = sztr + szins + | 125 | size_t sz = sztr + szins + |
126 | J->cur.nsnap*sizeof(SnapShot) + | 126 | T->nsnap*sizeof(SnapShot) + |
127 | J->cur.nsnapmap*sizeof(SnapEntry); | 127 | T->nsnapmap*sizeof(SnapEntry); |
128 | return lj_mem_newt(J->L, (MSize)sz, GCtrace); | 128 | GCtrace *T2 = lj_mem_newt(L, (MSize)sz, GCtrace); |
129 | char *p = (char *)T2 + sztr; | ||
130 | T2->gct = ~LJ_TTRACE; | ||
131 | T2->marked = 0; | ||
132 | T2->traceno = 0; | ||
133 | T2->ir = (IRIns *)p - T->nk; | ||
134 | T2->nins = T->nins; | ||
135 | T2->nk = T->nk; | ||
136 | T2->nsnap = T->nsnap; | ||
137 | T2->nsnapmap = T->nsnapmap; | ||
138 | memcpy(p, T->ir + T->nk, szins); | ||
139 | return T2; | ||
129 | } | 140 | } |
130 | 141 | ||
131 | /* Save current trace by copying and compacting it. */ | 142 | /* Save current trace by copying and compacting it. */ |
@@ -139,12 +150,12 @@ static void trace_save(jit_State *J, GCtrace *T) | |||
139 | setgcrefp(J2G(J)->gc.root, T); | 150 | setgcrefp(J2G(J)->gc.root, T); |
140 | newwhite(J2G(J), T); | 151 | newwhite(J2G(J), T); |
141 | T->gct = ~LJ_TTRACE; | 152 | T->gct = ~LJ_TTRACE; |
142 | T->ir = (IRIns *)p - J->cur.nk; | 153 | T->ir = (IRIns *)p - J->cur.nk; /* The IR has already been copied above. */ |
143 | memcpy(p, J->cur.ir+J->cur.nk, szins); | ||
144 | p += szins; | 154 | p += szins; |
145 | TRACE_APPENDVEC(snap, nsnap, SnapShot) | 155 | TRACE_APPENDVEC(snap, nsnap, SnapShot) |
146 | TRACE_APPENDVEC(snapmap, nsnapmap, SnapEntry) | 156 | TRACE_APPENDVEC(snapmap, nsnapmap, SnapEntry) |
147 | J->cur.traceno = 0; | 157 | J->cur.traceno = 0; |
158 | J->curfinal = NULL; | ||
148 | setgcrefp(J->trace[T->traceno], T); | 159 | setgcrefp(J->trace[T->traceno], T); |
149 | lj_gc_barriertrace(J2G(J), T->traceno); | 160 | lj_gc_barriertrace(J2G(J), T->traceno); |
150 | lj_gdbjit_addtrace(J, T); | 161 | lj_gdbjit_addtrace(J, T); |
@@ -274,7 +285,7 @@ int lj_trace_flushall(lua_State *L) | |||
274 | if (T->root == 0) | 285 | if (T->root == 0) |
275 | trace_flushroot(J, T); | 286 | trace_flushroot(J, T); |
276 | lj_gdbjit_deltrace(J, T); | 287 | lj_gdbjit_deltrace(J, T); |
277 | T->traceno = 0; | 288 | T->traceno = T->link = 0; /* Blacklist the link for cont_stitch. */ |
278 | setgcrefnull(J->trace[i]); | 289 | setgcrefnull(J->trace[i]); |
279 | } | 290 | } |
280 | } | 291 | } |
@@ -296,13 +307,42 @@ void lj_trace_initstate(global_State *g) | |||
296 | { | 307 | { |
297 | jit_State *J = G2J(g); | 308 | jit_State *J = G2J(g); |
298 | TValue *tv; | 309 | TValue *tv; |
299 | /* Initialize SIMD constants. */ | 310 | |
311 | /* Initialize aligned SIMD constants. */ | ||
300 | tv = LJ_KSIMD(J, LJ_KSIMD_ABS); | 312 | tv = LJ_KSIMD(J, LJ_KSIMD_ABS); |
301 | tv[0].u64 = U64x(7fffffff,ffffffff); | 313 | tv[0].u64 = U64x(7fffffff,ffffffff); |
302 | tv[1].u64 = U64x(7fffffff,ffffffff); | 314 | tv[1].u64 = U64x(7fffffff,ffffffff); |
303 | tv = LJ_KSIMD(J, LJ_KSIMD_NEG); | 315 | tv = LJ_KSIMD(J, LJ_KSIMD_NEG); |
304 | tv[0].u64 = U64x(80000000,00000000); | 316 | tv[0].u64 = U64x(80000000,00000000); |
305 | tv[1].u64 = U64x(80000000,00000000); | 317 | tv[1].u64 = U64x(80000000,00000000); |
318 | |||
319 | /* Initialize 32/64 bit constants. */ | ||
320 | #if LJ_TARGET_X86ORX64 | ||
321 | J->k64[LJ_K64_TOBIT].u64 = U64x(43380000,00000000); | ||
322 | #if LJ_32 | ||
323 | J->k64[LJ_K64_M2P64_31].u64 = U64x(c1e00000,00000000); | ||
324 | #endif | ||
325 | J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000); | ||
326 | J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000; | ||
327 | #endif | ||
328 | #if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64 | ||
329 | J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000); | ||
330 | #endif | ||
331 | #if LJ_TARGET_PPC | ||
332 | J->k32[LJ_K32_2P52_2P31] = 0x59800004; | ||
333 | J->k32[LJ_K32_2P52] = 0x59800000; | ||
334 | #endif | ||
335 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS | ||
336 | J->k32[LJ_K32_2P31] = 0x4f000000; | ||
337 | #endif | ||
338 | #if LJ_TARGET_MIPS | ||
339 | J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000); | ||
340 | #if LJ_64 | ||
341 | J->k64[LJ_K64_2P63].u64 = U64x(43e00000,00000000); | ||
342 | J->k32[LJ_K32_2P63] = 0x5f000000; | ||
343 | J->k32[LJ_K32_M2P64] = 0xdf800000; | ||
344 | #endif | ||
345 | #endif | ||
306 | } | 346 | } |
307 | 347 | ||
308 | /* Free everything associated with the JIT compiler state. */ | 348 | /* Free everything associated with the JIT compiler state. */ |
@@ -317,7 +357,6 @@ void lj_trace_freestate(global_State *g) | |||
317 | } | 357 | } |
318 | #endif | 358 | #endif |
319 | lj_mcode_free(J); | 359 | lj_mcode_free(J); |
320 | lj_ir_k64_freeall(J); | ||
321 | lj_mem_freevec(g, J->snapmapbuf, J->sizesnapmap, SnapEntry); | 360 | lj_mem_freevec(g, J->snapmapbuf, J->sizesnapmap, SnapEntry); |
322 | lj_mem_freevec(g, J->snapbuf, J->sizesnap, SnapShot); | 361 | lj_mem_freevec(g, J->snapbuf, J->sizesnap, SnapShot); |
323 | lj_mem_freevec(g, J->irbuf + J->irbotlim, J->irtoplim - J->irbotlim, IRIns); | 362 | lj_mem_freevec(g, J->irbuf + J->irbotlim, J->irtoplim - J->irbotlim, IRIns); |
@@ -367,7 +406,7 @@ static void trace_start(jit_State *J) | |||
367 | TraceNo traceno; | 406 | TraceNo traceno; |
368 | 407 | ||
369 | if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */ | 408 | if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */ |
370 | if (J->parent == 0) { | 409 | if (J->parent == 0 && J->exitno == 0) { |
371 | /* Lazy bytecode patching to disable hotcount events. */ | 410 | /* Lazy bytecode patching to disable hotcount events. */ |
372 | lua_assert(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL || | 411 | lua_assert(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL || |
373 | bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF); | 412 | bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF); |
@@ -401,6 +440,8 @@ static void trace_start(jit_State *J) | |||
401 | J->guardemit.irt = 0; | 440 | J->guardemit.irt = 0; |
402 | J->postproc = LJ_POST_NONE; | 441 | J->postproc = LJ_POST_NONE; |
403 | lj_resetsplit(J); | 442 | lj_resetsplit(J); |
443 | J->retryrec = 0; | ||
444 | J->ktrace = 0; | ||
404 | setgcref(J->cur.startpt, obj2gco(J->pt)); | 445 | setgcref(J->cur.startpt, obj2gco(J->pt)); |
405 | 446 | ||
406 | L = J->L; | 447 | L = J->L; |
@@ -412,6 +453,12 @@ static void trace_start(jit_State *J) | |||
412 | if (J->parent) { | 453 | if (J->parent) { |
413 | setintV(L->top++, J->parent); | 454 | setintV(L->top++, J->parent); |
414 | setintV(L->top++, J->exitno); | 455 | setintV(L->top++, J->exitno); |
456 | } else { | ||
457 | BCOp op = bc_op(*J->pc); | ||
458 | if (op == BC_CALLM || op == BC_CALL || op == BC_ITERC) { | ||
459 | setintV(L->top++, J->exitno); /* Parent of stitched trace. */ | ||
460 | setintV(L->top++, -1); | ||
461 | } | ||
415 | } | 462 | } |
416 | ); | 463 | ); |
417 | lj_record_setup(J); | 464 | lj_record_setup(J); |
@@ -424,7 +471,7 @@ static void trace_stop(jit_State *J) | |||
424 | BCOp op = bc_op(J->cur.startins); | 471 | BCOp op = bc_op(J->cur.startins); |
425 | GCproto *pt = &gcref(J->cur.startpt)->pt; | 472 | GCproto *pt = &gcref(J->cur.startpt)->pt; |
426 | TraceNo traceno = J->cur.traceno; | 473 | TraceNo traceno = J->cur.traceno; |
427 | GCtrace *T = trace_save_alloc(J); /* Do this first. May throw OOM. */ | 474 | GCtrace *T = J->curfinal; |
428 | lua_State *L; | 475 | lua_State *L; |
429 | 476 | ||
430 | switch (op) { | 477 | switch (op) { |
@@ -461,6 +508,12 @@ static void trace_stop(jit_State *J) | |||
461 | root->nextside = (TraceNo1)traceno; | 508 | root->nextside = (TraceNo1)traceno; |
462 | } | 509 | } |
463 | break; | 510 | break; |
511 | case BC_CALLM: | ||
512 | case BC_CALL: | ||
513 | case BC_ITERC: | ||
514 | /* Trace stitching: patch link of previous trace. */ | ||
515 | traceref(J, J->exitno)->link = traceno; | ||
516 | break; | ||
464 | default: | 517 | default: |
465 | lua_assert(0); | 518 | lua_assert(0); |
466 | break; | 519 | break; |
@@ -475,6 +528,7 @@ static void trace_stop(jit_State *J) | |||
475 | lj_vmevent_send(L, TRACE, | 528 | lj_vmevent_send(L, TRACE, |
476 | setstrV(L, L->top++, lj_str_newlit(L, "stop")); | 529 | setstrV(L, L->top++, lj_str_newlit(L, "stop")); |
477 | setintV(L->top++, traceno); | 530 | setintV(L->top++, traceno); |
531 | setfuncV(L, L->top++, J->fn); | ||
478 | ); | 532 | ); |
479 | } | 533 | } |
480 | 534 | ||
@@ -502,6 +556,10 @@ static int trace_abort(jit_State *J) | |||
502 | 556 | ||
503 | J->postproc = LJ_POST_NONE; | 557 | J->postproc = LJ_POST_NONE; |
504 | lj_mcode_abort(J); | 558 | lj_mcode_abort(J); |
559 | if (J->curfinal) { | ||
560 | lj_trace_free(J2G(J), J->curfinal); | ||
561 | J->curfinal = NULL; | ||
562 | } | ||
505 | if (tvisnumber(L->top-1)) | 563 | if (tvisnumber(L->top-1)) |
506 | e = (TraceError)numberVint(L->top-1); | 564 | e = (TraceError)numberVint(L->top-1); |
507 | if (e == LJ_TRERR_MCODELM) { | 565 | if (e == LJ_TRERR_MCODELM) { |
@@ -510,8 +568,17 @@ static int trace_abort(jit_State *J) | |||
510 | return 1; /* Retry ASM with new MCode area. */ | 568 | return 1; /* Retry ASM with new MCode area. */ |
511 | } | 569 | } |
512 | /* Penalize or blacklist starting bytecode instruction. */ | 570 | /* Penalize or blacklist starting bytecode instruction. */ |
513 | if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) | 571 | if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) { |
514 | penalty_pc(J, &gcref(J->cur.startpt)->pt, mref(J->cur.startpc, BCIns), e); | 572 | if (J->exitno == 0) { |
573 | BCIns *startpc = mref(J->cur.startpc, BCIns); | ||
574 | if (e == LJ_TRERR_RETRY) | ||
575 | hotcount_set(J2GG(J), startpc+1, 1); /* Immediate retry. */ | ||
576 | else | ||
577 | penalty_pc(J, &gcref(J->cur.startpt)->pt, startpc, e); | ||
578 | } else { | ||
579 | traceref(J, J->exitno)->link = J->exitno; /* Self-link is blacklisted. */ | ||
580 | } | ||
581 | } | ||
515 | 582 | ||
516 | /* Is there anything to abort? */ | 583 | /* Is there anything to abort? */ |
517 | traceno = J->cur.traceno; | 584 | traceno = J->cur.traceno; |
@@ -680,6 +747,7 @@ static void trace_hotside(jit_State *J, const BCIns *pc) | |||
680 | { | 747 | { |
681 | SnapShot *snap = &traceref(J, J->parent)->snap[J->exitno]; | 748 | SnapShot *snap = &traceref(J, J->parent)->snap[J->exitno]; |
682 | if (!(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT)) && | 749 | if (!(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT)) && |
750 | isluafunc(curr_func(J->L)) && | ||
683 | snap->count != SNAPCOUNT_DONE && | 751 | snap->count != SNAPCOUNT_DONE && |
684 | ++snap->count >= J->param[JIT_P_hotexit]) { | 752 | ++snap->count >= J->param[JIT_P_hotexit]) { |
685 | lua_assert(J->state == LJ_TRACE_IDLE); | 753 | lua_assert(J->state == LJ_TRACE_IDLE); |
@@ -689,6 +757,20 @@ static void trace_hotside(jit_State *J, const BCIns *pc) | |||
689 | } | 757 | } |
690 | } | 758 | } |
691 | 759 | ||
760 | /* Stitch a new trace to the previous trace. */ | ||
761 | void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc) | ||
762 | { | ||
763 | /* Only start a new trace if not recording or inside __gc call or vmevent. */ | ||
764 | if (J->state == LJ_TRACE_IDLE && | ||
765 | !(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT))) { | ||
766 | J->parent = 0; /* Have to treat it like a root trace. */ | ||
767 | /* J->exitno is set to the invoking trace. */ | ||
768 | J->state = LJ_TRACE_START; | ||
769 | lj_trace_ins(J, pc); | ||
770 | } | ||
771 | } | ||
772 | |||
773 | |||
692 | /* Tiny struct to pass data to protected call. */ | 774 | /* Tiny struct to pass data to protected call. */ |
693 | typedef struct ExitDataCP { | 775 | typedef struct ExitDataCP { |
694 | jit_State *J; | 776 | jit_State *J; |
@@ -775,17 +857,20 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) | |||
775 | if (errcode) | 857 | if (errcode) |
776 | return -errcode; /* Return negated error code. */ | 858 | return -errcode; /* Return negated error code. */ |
777 | 859 | ||
778 | lj_vmevent_send(L, TEXIT, | 860 | if (!(LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE))) |
779 | lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK); | 861 | lj_vmevent_send(L, TEXIT, |
780 | setintV(L->top++, J->parent); | 862 | lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK); |
781 | setintV(L->top++, J->exitno); | 863 | setintV(L->top++, J->parent); |
782 | trace_exit_regs(L, ex); | 864 | setintV(L->top++, J->exitno); |
783 | ); | 865 | trace_exit_regs(L, ex); |
866 | ); | ||
784 | 867 | ||
785 | pc = exd.pc; | 868 | pc = exd.pc; |
786 | cf = cframe_raw(L->cframe); | 869 | cf = cframe_raw(L->cframe); |
787 | setcframe_pc(cf, pc); | 870 | setcframe_pc(cf, pc); |
788 | if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) { | 871 | if (LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)) { |
872 | /* Just exit to interpreter. */ | ||
873 | } else if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) { | ||
789 | if (!(G(L)->hookmask & HOOK_GC)) | 874 | if (!(G(L)->hookmask & HOOK_GC)) |
790 | lj_gc_step(L); /* Exited because of GC: drive GC forward. */ | 875 | lj_gc_step(L); /* Exited because of GC: drive GC forward. */ |
791 | } else { | 876 | } else { |
@@ -809,7 +894,7 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) | |||
809 | ERRNO_RESTORE | 894 | ERRNO_RESTORE |
810 | switch (bc_op(*pc)) { | 895 | switch (bc_op(*pc)) { |
811 | case BC_CALLM: case BC_CALLMT: | 896 | case BC_CALLM: case BC_CALLMT: |
812 | return (int)((BCReg)(L->top - L->base) - bc_a(*pc) - bc_c(*pc)); | 897 | return (int)((BCReg)(L->top - L->base) - bc_a(*pc) - bc_c(*pc) - LJ_FR2); |
813 | case BC_RETM: | 898 | case BC_RETM: |
814 | return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc) - bc_d(*pc)); | 899 | return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc) - bc_d(*pc)); |
815 | case BC_TSETM: | 900 | case BC_TSETM: |
diff --git a/src/lj_trace.h b/src/lj_trace.h index 460f10a1..93d7aea1 100644 --- a/src/lj_trace.h +++ b/src/lj_trace.h | |||
@@ -23,6 +23,7 @@ LJ_FUNC_NORET void lj_trace_err(jit_State *J, TraceError e); | |||
23 | LJ_FUNC_NORET void lj_trace_err_info(jit_State *J, TraceError e); | 23 | LJ_FUNC_NORET void lj_trace_err_info(jit_State *J, TraceError e); |
24 | 24 | ||
25 | /* Trace management. */ | 25 | /* Trace management. */ |
26 | LJ_FUNC GCtrace * LJ_FASTCALL lj_trace_alloc(lua_State *L, GCtrace *T); | ||
26 | LJ_FUNC void LJ_FASTCALL lj_trace_free(global_State *g, GCtrace *T); | 27 | LJ_FUNC void LJ_FASTCALL lj_trace_free(global_State *g, GCtrace *T); |
27 | LJ_FUNC void lj_trace_reenableproto(GCproto *pt); | 28 | LJ_FUNC void lj_trace_reenableproto(GCproto *pt); |
28 | LJ_FUNC void lj_trace_flushproto(global_State *g, GCproto *pt); | 29 | LJ_FUNC void lj_trace_flushproto(global_State *g, GCproto *pt); |
@@ -34,6 +35,7 @@ LJ_FUNC void lj_trace_freestate(global_State *g); | |||
34 | /* Event handling. */ | 35 | /* Event handling. */ |
35 | LJ_FUNC void lj_trace_ins(jit_State *J, const BCIns *pc); | 36 | LJ_FUNC void lj_trace_ins(jit_State *J, const BCIns *pc); |
36 | LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc); | 37 | LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc); |
38 | LJ_FUNCA void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc); | ||
37 | LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr); | 39 | LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr); |
38 | 40 | ||
39 | /* Signal asynchronous abort of trace or end of trace. */ | 41 | /* Signal asynchronous abort of trace or end of trace. */ |
diff --git a/src/lj_traceerr.h b/src/lj_traceerr.h index ecba11a6..0156a664 100644 --- a/src/lj_traceerr.h +++ b/src/lj_traceerr.h | |||
@@ -7,10 +7,12 @@ | |||
7 | 7 | ||
8 | /* Recording. */ | 8 | /* Recording. */ |
9 | TREDEF(RECERR, "error thrown or hook called during recording") | 9 | TREDEF(RECERR, "error thrown or hook called during recording") |
10 | TREDEF(TRACEUV, "trace too short") | ||
10 | TREDEF(TRACEOV, "trace too long") | 11 | TREDEF(TRACEOV, "trace too long") |
11 | TREDEF(STACKOV, "trace too deep") | 12 | TREDEF(STACKOV, "trace too deep") |
12 | TREDEF(SNAPOV, "too many snapshots") | 13 | TREDEF(SNAPOV, "too many snapshots") |
13 | TREDEF(BLACKL, "blacklisted") | 14 | TREDEF(BLACKL, "blacklisted") |
15 | TREDEF(RETRY, "retry recording") | ||
14 | TREDEF(NYIBC, "NYI: bytecode %d") | 16 | TREDEF(NYIBC, "NYI: bytecode %d") |
15 | 17 | ||
16 | /* Recording loop ops. */ | 18 | /* Recording loop ops. */ |
@@ -23,8 +25,6 @@ TREDEF(BADTYPE, "bad argument type") | |||
23 | TREDEF(CJITOFF, "JIT compilation disabled for function") | 25 | TREDEF(CJITOFF, "JIT compilation disabled for function") |
24 | TREDEF(CUNROLL, "call unroll limit reached") | 26 | TREDEF(CUNROLL, "call unroll limit reached") |
25 | TREDEF(DOWNREC, "down-recursion, restarting") | 27 | TREDEF(DOWNREC, "down-recursion, restarting") |
26 | TREDEF(NYICF, "NYI: C function %s") | ||
27 | TREDEF(NYIFF, "NYI: FastFunc %s") | ||
28 | TREDEF(NYIFFU, "NYI: unsupported variant of FastFunc %s") | 28 | TREDEF(NYIFFU, "NYI: unsupported variant of FastFunc %s") |
29 | TREDEF(NYIRETL, "NYI: return to lower frame") | 29 | TREDEF(NYIRETL, "NYI: return to lower frame") |
30 | 30 | ||
diff --git a/src/lj_vm.h b/src/lj_vm.h index 5b10adf3..5a7bc392 100644 --- a/src/lj_vm.h +++ b/src/lj_vm.h | |||
@@ -17,6 +17,10 @@ LJ_ASMF int lj_vm_cpcall(lua_State *L, lua_CFunction func, void *ud, | |||
17 | LJ_ASMF int lj_vm_resume(lua_State *L, TValue *base, int nres1, ptrdiff_t ef); | 17 | LJ_ASMF int lj_vm_resume(lua_State *L, TValue *base, int nres1, ptrdiff_t ef); |
18 | LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_c(void *cframe, int errcode); | 18 | LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_c(void *cframe, int errcode); |
19 | LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_ff(void *cframe); | 19 | LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_ff(void *cframe); |
20 | #if LJ_ABI_WIN && LJ_TARGET_X86 | ||
21 | LJ_ASMF_NORET void LJ_FASTCALL lj_vm_rtlunwind(void *cframe, void *excptrec, | ||
22 | void *unwinder, int errcode); | ||
23 | #endif | ||
20 | LJ_ASMF void lj_vm_unwind_c_eh(void); | 24 | LJ_ASMF void lj_vm_unwind_c_eh(void); |
21 | LJ_ASMF void lj_vm_unwind_ff_eh(void); | 25 | LJ_ASMF void lj_vm_unwind_ff_eh(void); |
22 | #if LJ_TARGET_X86ORX64 | 26 | #if LJ_TARGET_X86ORX64 |
@@ -43,13 +47,14 @@ LJ_ASMF void lj_vm_record(void); | |||
43 | LJ_ASMF void lj_vm_inshook(void); | 47 | LJ_ASMF void lj_vm_inshook(void); |
44 | LJ_ASMF void lj_vm_rethook(void); | 48 | LJ_ASMF void lj_vm_rethook(void); |
45 | LJ_ASMF void lj_vm_callhook(void); | 49 | LJ_ASMF void lj_vm_callhook(void); |
50 | LJ_ASMF void lj_vm_profhook(void); | ||
46 | 51 | ||
47 | /* Trace exit handling. */ | 52 | /* Trace exit handling. */ |
48 | LJ_ASMF void lj_vm_exit_handler(void); | 53 | LJ_ASMF void lj_vm_exit_handler(void); |
49 | LJ_ASMF void lj_vm_exit_interp(void); | 54 | LJ_ASMF void lj_vm_exit_interp(void); |
50 | 55 | ||
51 | /* Internal math helper functions. */ | 56 | /* Internal math helper functions. */ |
52 | #if LJ_TARGET_X86ORX64 || LJ_TARGET_PPC | 57 | #if LJ_TARGET_PPC || LJ_TARGET_ARM64 || (LJ_TARGET_MIPS && LJ_ABI_SOFTFP) |
53 | #define lj_vm_floor floor | 58 | #define lj_vm_floor floor |
54 | #define lj_vm_ceil ceil | 59 | #define lj_vm_ceil ceil |
55 | #else | 60 | #else |
@@ -60,23 +65,26 @@ LJ_ASMF double lj_vm_floor_sf(double); | |||
60 | LJ_ASMF double lj_vm_ceil_sf(double); | 65 | LJ_ASMF double lj_vm_ceil_sf(double); |
61 | #endif | 66 | #endif |
62 | #endif | 67 | #endif |
63 | #if defined(LUAJIT_NO_LOG2) || LJ_TARGET_X86ORX64 | 68 | #ifdef LUAJIT_NO_LOG2 |
64 | LJ_ASMF double lj_vm_log2(double); | 69 | LJ_ASMF double lj_vm_log2(double); |
65 | #else | 70 | #else |
66 | #define lj_vm_log2 log2 | 71 | #define lj_vm_log2 log2 |
67 | #endif | 72 | #endif |
73 | #if !(defined(_LJ_DISPATCH_H) && LJ_TARGET_MIPS) | ||
74 | LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t); | ||
75 | #endif | ||
68 | 76 | ||
69 | #if LJ_HASJIT | 77 | #if LJ_HASJIT |
70 | #if LJ_TARGET_X86ORX64 | 78 | #if LJ_TARGET_X86ORX64 |
71 | LJ_ASMF void lj_vm_floor_sse(void); | 79 | LJ_ASMF void lj_vm_floor_sse(void); |
72 | LJ_ASMF void lj_vm_ceil_sse(void); | 80 | LJ_ASMF void lj_vm_ceil_sse(void); |
73 | LJ_ASMF void lj_vm_trunc_sse(void); | 81 | LJ_ASMF void lj_vm_trunc_sse(void); |
74 | LJ_ASMF void lj_vm_exp_x87(void); | ||
75 | LJ_ASMF void lj_vm_exp2_x87(void); | ||
76 | LJ_ASMF void lj_vm_pow_sse(void); | ||
77 | LJ_ASMF void lj_vm_powi_sse(void); | 82 | LJ_ASMF void lj_vm_powi_sse(void); |
83 | #define lj_vm_powi NULL | ||
78 | #else | 84 | #else |
79 | #if LJ_TARGET_PPC | 85 | LJ_ASMF double lj_vm_powi(double, int32_t); |
86 | #endif | ||
87 | #if LJ_TARGET_PPC || LJ_TARGET_ARM64 | ||
80 | #define lj_vm_trunc trunc | 88 | #define lj_vm_trunc trunc |
81 | #else | 89 | #else |
82 | LJ_ASMF double lj_vm_trunc(double); | 90 | LJ_ASMF double lj_vm_trunc(double); |
@@ -84,14 +92,11 @@ LJ_ASMF double lj_vm_trunc(double); | |||
84 | LJ_ASMF double lj_vm_trunc_sf(double); | 92 | LJ_ASMF double lj_vm_trunc_sf(double); |
85 | #endif | 93 | #endif |
86 | #endif | 94 | #endif |
87 | LJ_ASMF double lj_vm_powi(double, int32_t); | ||
88 | #ifdef LUAJIT_NO_EXP2 | 95 | #ifdef LUAJIT_NO_EXP2 |
89 | LJ_ASMF double lj_vm_exp2(double); | 96 | LJ_ASMF double lj_vm_exp2(double); |
90 | #else | 97 | #else |
91 | #define lj_vm_exp2 exp2 | 98 | #define lj_vm_exp2 exp2 |
92 | #endif | 99 | #endif |
93 | #endif | ||
94 | LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t); | ||
95 | #if LJ_HASFFI | 100 | #if LJ_HASFFI |
96 | LJ_ASMF int lj_vm_errno(void); | 101 | LJ_ASMF int lj_vm_errno(void); |
97 | #endif | 102 | #endif |
@@ -104,8 +109,7 @@ LJ_ASMF void lj_cont_nop(void); /* Do nothing, just continue execution. */ | |||
104 | LJ_ASMF void lj_cont_condt(void); /* Branch if result is true. */ | 109 | LJ_ASMF void lj_cont_condt(void); /* Branch if result is true. */ |
105 | LJ_ASMF void lj_cont_condf(void); /* Branch if result is false. */ | 110 | LJ_ASMF void lj_cont_condf(void); /* Branch if result is false. */ |
106 | LJ_ASMF void lj_cont_hook(void); /* Continue from hook yield. */ | 111 | LJ_ASMF void lj_cont_hook(void); /* Continue from hook yield. */ |
107 | 112 | LJ_ASMF void lj_cont_stitch(void); /* Trace stitching. */ | |
108 | enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ | ||
109 | 113 | ||
110 | /* Start of the ASM code. */ | 114 | /* Start of the ASM code. */ |
111 | LJ_ASMF char lj_vm_asm_begin[]; | 115 | LJ_ASMF char lj_vm_asm_begin[]; |
diff --git a/src/lj_vmevent.c b/src/lj_vmevent.c index 1d496748..8b442a44 100644 --- a/src/lj_vmevent.c +++ b/src/lj_vmevent.c | |||
@@ -27,6 +27,7 @@ ptrdiff_t lj_vmevent_prepare(lua_State *L, VMEvent ev) | |||
27 | if (tv && tvisfunc(tv)) { | 27 | if (tv && tvisfunc(tv)) { |
28 | lj_state_checkstack(L, LUA_MINSTACK); | 28 | lj_state_checkstack(L, LUA_MINSTACK); |
29 | setfuncV(L, L->top++, funcV(tv)); | 29 | setfuncV(L, L->top++, funcV(tv)); |
30 | if (LJ_FR2) setnilV(L->top++); | ||
30 | return savestack(L, L->top); | 31 | return savestack(L, L->top); |
31 | } | 32 | } |
32 | } | 33 | } |
diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c index 50a2cbba..2a41bcaa 100644 --- a/src/lj_vmmath.c +++ b/src/lj_vmmath.c | |||
@@ -13,16 +13,29 @@ | |||
13 | #include "lj_ir.h" | 13 | #include "lj_ir.h" |
14 | #include "lj_vm.h" | 14 | #include "lj_vm.h" |
15 | 15 | ||
16 | /* -- Helper functions for generated machine code ------------------------- */ | 16 | /* -- Wrapper functions --------------------------------------------------- */ |
17 | 17 | ||
18 | #if LJ_TARGET_X86ORX64 | 18 | #if LJ_TARGET_X86 && __ELF__ && __PIC__ |
19 | /* Wrapper functions to avoid linker issues on OSX. */ | 19 | /* Wrapper functions to deal with the ELF/x86 PIC disaster. */ |
20 | LJ_FUNCA double lj_vm_sinh(double x) { return sinh(x); } | 20 | LJ_FUNCA double lj_wrap_log(double x) { return log(x); } |
21 | LJ_FUNCA double lj_vm_cosh(double x) { return cosh(x); } | 21 | LJ_FUNCA double lj_wrap_log10(double x) { return log10(x); } |
22 | LJ_FUNCA double lj_vm_tanh(double x) { return tanh(x); } | 22 | LJ_FUNCA double lj_wrap_exp(double x) { return exp(x); } |
23 | LJ_FUNCA double lj_wrap_sin(double x) { return sin(x); } | ||
24 | LJ_FUNCA double lj_wrap_cos(double x) { return cos(x); } | ||
25 | LJ_FUNCA double lj_wrap_tan(double x) { return tan(x); } | ||
26 | LJ_FUNCA double lj_wrap_asin(double x) { return asin(x); } | ||
27 | LJ_FUNCA double lj_wrap_acos(double x) { return acos(x); } | ||
28 | LJ_FUNCA double lj_wrap_atan(double x) { return atan(x); } | ||
29 | LJ_FUNCA double lj_wrap_sinh(double x) { return sinh(x); } | ||
30 | LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); } | ||
31 | LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); } | ||
32 | LJ_FUNCA double lj_wrap_atan2(double x, double y) { return atan2(x, y); } | ||
33 | LJ_FUNCA double lj_wrap_pow(double x, double y) { return pow(x, y); } | ||
34 | LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); } | ||
23 | #endif | 35 | #endif |
24 | 36 | ||
25 | #if !LJ_TARGET_X86ORX64 | 37 | /* -- Helper functions for generated machine code ------------------------- */ |
38 | |||
26 | double lj_vm_foldarith(double x, double y, int op) | 39 | double lj_vm_foldarith(double x, double y, int op) |
27 | { | 40 | { |
28 | switch (op) { | 41 | switch (op) { |
@@ -43,6 +56,19 @@ double lj_vm_foldarith(double x, double y, int op) | |||
43 | default: return x; | 56 | default: return x; |
44 | } | 57 | } |
45 | } | 58 | } |
59 | |||
60 | #if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS | ||
61 | int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) | ||
62 | { | ||
63 | uint32_t y, ua, ub; | ||
64 | lua_assert(b != 0); /* This must be checked before using this function. */ | ||
65 | ua = a < 0 ? (uint32_t)-a : (uint32_t)a; | ||
66 | ub = b < 0 ? (uint32_t)-b : (uint32_t)b; | ||
67 | y = ua % ub; | ||
68 | if (y != 0 && (a^b) < 0) y = y - ub; | ||
69 | if (((int32_t)y^b) < 0) y = (uint32_t)-(int32_t)y; | ||
70 | return (int32_t)y; | ||
71 | } | ||
46 | #endif | 72 | #endif |
47 | 73 | ||
48 | #if LJ_HASJIT | 74 | #if LJ_HASJIT |
@@ -61,20 +87,6 @@ double lj_vm_exp2(double a) | |||
61 | } | 87 | } |
62 | #endif | 88 | #endif |
63 | 89 | ||
64 | #if !(LJ_TARGET_ARM || LJ_TARGET_PPC) | ||
65 | int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) | ||
66 | { | ||
67 | uint32_t y, ua, ub; | ||
68 | lua_assert(b != 0); /* This must be checked before using this function. */ | ||
69 | ua = a < 0 ? (uint32_t)-a : (uint32_t)a; | ||
70 | ub = b < 0 ? (uint32_t)-b : (uint32_t)b; | ||
71 | y = ua % ub; | ||
72 | if (y != 0 && (a^b) < 0) y = y - ub; | ||
73 | if (((int32_t)y^b) < 0) y = (uint32_t)-(int32_t)y; | ||
74 | return (int32_t)y; | ||
75 | } | ||
76 | #endif | ||
77 | |||
78 | #if !LJ_TARGET_X86ORX64 | 90 | #if !LJ_TARGET_X86ORX64 |
79 | /* Unsigned x^k. */ | 91 | /* Unsigned x^k. */ |
80 | static double lj_vm_powui(double x, uint32_t k) | 92 | static double lj_vm_powui(double x, uint32_t k) |
@@ -107,6 +119,7 @@ double lj_vm_powi(double x, int32_t k) | |||
107 | else | 119 | else |
108 | return 1.0 / lj_vm_powui(x, (uint32_t)-k); | 120 | return 1.0 / lj_vm_powui(x, (uint32_t)-k); |
109 | } | 121 | } |
122 | #endif | ||
110 | 123 | ||
111 | /* Computes fpm(x) for extended math functions. */ | 124 | /* Computes fpm(x) for extended math functions. */ |
112 | double lj_vm_foldfpm(double x, int fpm) | 125 | double lj_vm_foldfpm(double x, int fpm) |
@@ -128,7 +141,6 @@ double lj_vm_foldfpm(double x, int fpm) | |||
128 | } | 141 | } |
129 | return 0; | 142 | return 0; |
130 | } | 143 | } |
131 | #endif | ||
132 | 144 | ||
133 | #if LJ_HASFFI | 145 | #if LJ_HASFFI |
134 | int lj_vm_errno(void) | 146 | int lj_vm_errno(void) |
diff --git a/src/ljamalg.c b/src/ljamalg.c index 21b46314..39542981 100644 --- a/src/ljamalg.c +++ b/src/ljamalg.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include "lj_char.c" | 33 | #include "lj_char.c" |
34 | #include "lj_bc.c" | 34 | #include "lj_bc.c" |
35 | #include "lj_obj.c" | 35 | #include "lj_obj.c" |
36 | #include "lj_buf.c" | ||
36 | #include "lj_str.c" | 37 | #include "lj_str.c" |
37 | #include "lj_tab.c" | 38 | #include "lj_tab.c" |
38 | #include "lj_func.c" | 39 | #include "lj_func.c" |
@@ -44,7 +45,10 @@ | |||
44 | #include "lj_vmevent.c" | 45 | #include "lj_vmevent.c" |
45 | #include "lj_vmmath.c" | 46 | #include "lj_vmmath.c" |
46 | #include "lj_strscan.c" | 47 | #include "lj_strscan.c" |
48 | #include "lj_strfmt.c" | ||
49 | #include "lj_strfmt_num.c" | ||
47 | #include "lj_api.c" | 50 | #include "lj_api.c" |
51 | #include "lj_profile.c" | ||
48 | #include "lj_lex.c" | 52 | #include "lj_lex.c" |
49 | #include "lj_parse.c" | 53 | #include "lj_parse.c" |
50 | #include "lj_bcread.c" | 54 | #include "lj_bcread.c" |
@@ -39,7 +39,8 @@ | |||
39 | #define lua_upvalueindex(i) (LUA_GLOBALSINDEX-(i)) | 39 | #define lua_upvalueindex(i) (LUA_GLOBALSINDEX-(i)) |
40 | 40 | ||
41 | 41 | ||
42 | /* thread status; 0 is OK */ | 42 | /* thread status */ |
43 | #define LUA_OK 0 | ||
43 | #define LUA_YIELD 1 | 44 | #define LUA_YIELD 1 |
44 | #define LUA_ERRRUN 2 | 45 | #define LUA_ERRRUN 2 |
45 | #define LUA_ERRSYNTAX 3 | 46 | #define LUA_ERRSYNTAX 3 |
@@ -226,6 +227,7 @@ LUA_API int (lua_status) (lua_State *L); | |||
226 | #define LUA_GCSTEP 5 | 227 | #define LUA_GCSTEP 5 |
227 | #define LUA_GCSETPAUSE 6 | 228 | #define LUA_GCSETPAUSE 6 |
228 | #define LUA_GCSETSTEPMUL 7 | 229 | #define LUA_GCSETSTEPMUL 7 |
230 | #define LUA_GCISRUNNING 9 | ||
229 | 231 | ||
230 | LUA_API int (lua_gc) (lua_State *L, int what, int data); | 232 | LUA_API int (lua_gc) (lua_State *L, int what, int data); |
231 | 233 | ||
@@ -346,6 +348,13 @@ LUA_API void *lua_upvalueid (lua_State *L, int idx, int n); | |||
346 | LUA_API void lua_upvaluejoin (lua_State *L, int idx1, int n1, int idx2, int n2); | 348 | LUA_API void lua_upvaluejoin (lua_State *L, int idx1, int n1, int idx2, int n2); |
347 | LUA_API int lua_loadx (lua_State *L, lua_Reader reader, void *dt, | 349 | LUA_API int lua_loadx (lua_State *L, lua_Reader reader, void *dt, |
348 | const char *chunkname, const char *mode); | 350 | const char *chunkname, const char *mode); |
351 | LUA_API const lua_Number *lua_version (lua_State *L); | ||
352 | LUA_API void lua_copy (lua_State *L, int fromidx, int toidx); | ||
353 | LUA_API lua_Number lua_tonumberx (lua_State *L, int idx, int *isnum); | ||
354 | LUA_API lua_Integer lua_tointegerx (lua_State *L, int idx, int *isnum); | ||
355 | |||
356 | /* From Lua 5.3. */ | ||
357 | LUA_API int lua_isyieldable (lua_State *L); | ||
349 | 358 | ||
350 | 359 | ||
351 | struct lua_Debug { | 360 | struct lua_Debug { |
diff --git a/src/luaconf.h b/src/luaconf.h index 20feaca8..d422827a 100644 --- a/src/luaconf.h +++ b/src/luaconf.h | |||
@@ -37,7 +37,7 @@ | |||
37 | #endif | 37 | #endif |
38 | #define LUA_LROOT "/usr/local" | 38 | #define LUA_LROOT "/usr/local" |
39 | #define LUA_LUADIR "/lua/5.1/" | 39 | #define LUA_LUADIR "/lua/5.1/" |
40 | #define LUA_LJDIR "/luajit-2.0.5/" | 40 | #define LUA_LJDIR "/luajit-2.1.0-beta3/" |
41 | 41 | ||
42 | #ifdef LUA_ROOT | 42 | #ifdef LUA_ROOT |
43 | #define LUA_JROOT LUA_ROOT | 43 | #define LUA_JROOT LUA_ROOT |
@@ -79,7 +79,7 @@ | |||
79 | #define LUA_IGMARK "-" | 79 | #define LUA_IGMARK "-" |
80 | #define LUA_PATH_CONFIG \ | 80 | #define LUA_PATH_CONFIG \ |
81 | LUA_DIRSEP "\n" LUA_PATHSEP "\n" LUA_PATH_MARK "\n" \ | 81 | LUA_DIRSEP "\n" LUA_PATHSEP "\n" LUA_PATH_MARK "\n" \ |
82 | LUA_EXECDIR "\n" LUA_IGMARK | 82 | LUA_EXECDIR "\n" LUA_IGMARK "\n" |
83 | 83 | ||
84 | /* Quoting in error messages. */ | 84 | /* Quoting in error messages. */ |
85 | #define LUA_QL(x) "'" x "'" | 85 | #define LUA_QL(x) "'" x "'" |
@@ -92,10 +92,6 @@ | |||
92 | #define LUAI_GCMUL 200 /* Run GC at 200% of allocation speed. */ | 92 | #define LUAI_GCMUL 200 /* Run GC at 200% of allocation speed. */ |
93 | #define LUA_MAXCAPTURES 32 /* Max. pattern captures. */ | 93 | #define LUA_MAXCAPTURES 32 /* Max. pattern captures. */ |
94 | 94 | ||
95 | /* Compatibility with older library function names. */ | ||
96 | #define LUA_COMPAT_MOD /* OLD: math.mod, NEW: math.fmod */ | ||
97 | #define LUA_COMPAT_GFIND /* OLD: string.gfind, NEW: string.gmatch */ | ||
98 | |||
99 | /* Configuration for the frontend (the luajit executable). */ | 95 | /* Configuration for the frontend (the luajit executable). */ |
100 | #if defined(luajit_c) | 96 | #if defined(luajit_c) |
101 | #define LUA_PROGNAME "luajit" /* Fallback frontend name. */ | 97 | #define LUA_PROGNAME "luajit" /* Fallback frontend name. */ |
diff --git a/src/luajit.c b/src/luajit.c index 3901762d..53902480 100644 --- a/src/luajit.c +++ b/src/luajit.c | |||
@@ -61,8 +61,9 @@ static void laction(int i) | |||
61 | 61 | ||
62 | static void print_usage(void) | 62 | static void print_usage(void) |
63 | { | 63 | { |
64 | fprintf(stderr, | 64 | fputs("usage: ", stderr); |
65 | "usage: %s [options]... [script [args]...].\n" | 65 | fputs(progname, stderr); |
66 | fputs(" [options]... [script [args]...].\n" | ||
66 | "Available options are:\n" | 67 | "Available options are:\n" |
67 | " -e chunk Execute string " LUA_QL("chunk") ".\n" | 68 | " -e chunk Execute string " LUA_QL("chunk") ".\n" |
68 | " -l name Require library " LUA_QL("name") ".\n" | 69 | " -l name Require library " LUA_QL("name") ".\n" |
@@ -73,16 +74,14 @@ static void print_usage(void) | |||
73 | " -v Show version information.\n" | 74 | " -v Show version information.\n" |
74 | " -E Ignore environment variables.\n" | 75 | " -E Ignore environment variables.\n" |
75 | " -- Stop handling options.\n" | 76 | " -- Stop handling options.\n" |
76 | " - Execute stdin and stop handling options.\n" | 77 | " - Execute stdin and stop handling options.\n", stderr); |
77 | , | ||
78 | progname); | ||
79 | fflush(stderr); | 78 | fflush(stderr); |
80 | } | 79 | } |
81 | 80 | ||
82 | static void l_message(const char *pname, const char *msg) | 81 | static void l_message(const char *pname, const char *msg) |
83 | { | 82 | { |
84 | if (pname) fprintf(stderr, "%s: ", pname); | 83 | if (pname) { fputs(pname, stderr); fputc(':', stderr); fputc(' ', stderr); } |
85 | fprintf(stderr, "%s\n", msg); | 84 | fputs(msg, stderr); fputc('\n', stderr); |
86 | fflush(stderr); | 85 | fflush(stderr); |
87 | } | 86 | } |
88 | 87 | ||
@@ -125,7 +124,7 @@ static int docall(lua_State *L, int narg, int clear) | |||
125 | #endif | 124 | #endif |
126 | lua_remove(L, base); /* remove traceback function */ | 125 | lua_remove(L, base); /* remove traceback function */ |
127 | /* force a complete garbage collection in case of errors */ | 126 | /* force a complete garbage collection in case of errors */ |
128 | if (status != 0) lua_gc(L, LUA_GCCOLLECT, 0); | 127 | if (status != LUA_OK) lua_gc(L, LUA_GCCOLLECT, 0); |
129 | return status; | 128 | return status; |
130 | } | 129 | } |
131 | 130 | ||
@@ -154,22 +153,15 @@ static void print_jit_status(lua_State *L) | |||
154 | lua_settop(L, 0); /* clear stack */ | 153 | lua_settop(L, 0); /* clear stack */ |
155 | } | 154 | } |
156 | 155 | ||
157 | static int getargs(lua_State *L, char **argv, int n) | 156 | static void createargtable(lua_State *L, char **argv, int argc, int argf) |
158 | { | 157 | { |
159 | int narg; | ||
160 | int i; | 158 | int i; |
161 | int argc = 0; | 159 | lua_createtable(L, argc - argf, argf); |
162 | while (argv[argc]) argc++; /* count total number of arguments */ | ||
163 | narg = argc - (n + 1); /* number of arguments to the script */ | ||
164 | luaL_checkstack(L, narg + 3, "too many arguments to script"); | ||
165 | for (i = n+1; i < argc; i++) | ||
166 | lua_pushstring(L, argv[i]); | ||
167 | lua_createtable(L, narg, n + 1); | ||
168 | for (i = 0; i < argc; i++) { | 160 | for (i = 0; i < argc; i++) { |
169 | lua_pushstring(L, argv[i]); | 161 | lua_pushstring(L, argv[i]); |
170 | lua_rawseti(L, -2, i - n); | 162 | lua_rawseti(L, -2, i - argf); |
171 | } | 163 | } |
172 | return narg; | 164 | lua_setglobal(L, "arg"); |
173 | } | 165 | } |
174 | 166 | ||
175 | static int dofile(lua_State *L, const char *name) | 167 | static int dofile(lua_State *L, const char *name) |
@@ -258,9 +250,9 @@ static void dotty(lua_State *L) | |||
258 | const char *oldprogname = progname; | 250 | const char *oldprogname = progname; |
259 | progname = NULL; | 251 | progname = NULL; |
260 | while ((status = loadline(L)) != -1) { | 252 | while ((status = loadline(L)) != -1) { |
261 | if (status == 0) status = docall(L, 0, 0); | 253 | if (status == LUA_OK) status = docall(L, 0, 0); |
262 | report(L, status); | 254 | report(L, status); |
263 | if (status == 0 && lua_gettop(L) > 0) { /* any result to print? */ | 255 | if (status == LUA_OK && lua_gettop(L) > 0) { /* any result to print? */ |
264 | lua_getglobal(L, "print"); | 256 | lua_getglobal(L, "print"); |
265 | lua_insert(L, 1); | 257 | lua_insert(L, 1); |
266 | if (lua_pcall(L, lua_gettop(L)-1, 0, 0) != 0) | 258 | if (lua_pcall(L, lua_gettop(L)-1, 0, 0) != 0) |
@@ -275,21 +267,30 @@ static void dotty(lua_State *L) | |||
275 | progname = oldprogname; | 267 | progname = oldprogname; |
276 | } | 268 | } |
277 | 269 | ||
278 | static int handle_script(lua_State *L, char **argv, int n) | 270 | static int handle_script(lua_State *L, char **argx) |
279 | { | 271 | { |
280 | int status; | 272 | int status; |
281 | const char *fname; | 273 | const char *fname = argx[0]; |
282 | int narg = getargs(L, argv, n); /* collect arguments */ | 274 | if (strcmp(fname, "-") == 0 && strcmp(argx[-1], "--") != 0) |
283 | lua_setglobal(L, "arg"); | ||
284 | fname = argv[n]; | ||
285 | if (strcmp(fname, "-") == 0 && strcmp(argv[n-1], "--") != 0) | ||
286 | fname = NULL; /* stdin */ | 275 | fname = NULL; /* stdin */ |
287 | status = luaL_loadfile(L, fname); | 276 | status = luaL_loadfile(L, fname); |
288 | lua_insert(L, -(narg+1)); | 277 | if (status == LUA_OK) { |
289 | if (status == 0) | 278 | /* Fetch args from arg table. LUA_INIT or -e might have changed them. */ |
279 | int narg = 0; | ||
280 | lua_getglobal(L, "arg"); | ||
281 | if (lua_istable(L, -1)) { | ||
282 | do { | ||
283 | narg++; | ||
284 | lua_rawgeti(L, -narg, narg); | ||
285 | } while (!lua_isnil(L, -1)); | ||
286 | lua_pop(L, 1); | ||
287 | lua_remove(L, -narg); | ||
288 | narg--; | ||
289 | } else { | ||
290 | lua_pop(L, 1); | ||
291 | } | ||
290 | status = docall(L, narg, 0); | 292 | status = docall(L, narg, 0); |
291 | else | 293 | } |
292 | lua_pop(L, narg); | ||
293 | return report(L, status); | 294 | return report(L, status); |
294 | } | 295 | } |
295 | 296 | ||
@@ -386,7 +387,8 @@ static int dobytecode(lua_State *L, char **argv) | |||
386 | } | 387 | } |
387 | for (argv++; *argv != NULL; narg++, argv++) | 388 | for (argv++; *argv != NULL; narg++, argv++) |
388 | lua_pushstring(L, *argv); | 389 | lua_pushstring(L, *argv); |
389 | return report(L, lua_pcall(L, narg, 0, 0)); | 390 | report(L, lua_pcall(L, narg, 0, 0)); |
391 | return -1; | ||
390 | } | 392 | } |
391 | 393 | ||
392 | /* check that argument has no extra characters at the end */ | 394 | /* check that argument has no extra characters at the end */ |
@@ -407,7 +409,7 @@ static int collectargs(char **argv, int *flags) | |||
407 | switch (argv[i][1]) { /* Check option. */ | 409 | switch (argv[i][1]) { /* Check option. */ |
408 | case '-': | 410 | case '-': |
409 | notail(argv[i]); | 411 | notail(argv[i]); |
410 | return (argv[i+1] != NULL ? i+1 : 0); | 412 | return i+1; |
411 | case '\0': | 413 | case '\0': |
412 | return i; | 414 | return i; |
413 | case 'i': | 415 | case 'i': |
@@ -433,23 +435,23 @@ static int collectargs(char **argv, int *flags) | |||
433 | case 'b': /* LuaJIT extension */ | 435 | case 'b': /* LuaJIT extension */ |
434 | if (*flags) return -1; | 436 | if (*flags) return -1; |
435 | *flags |= FLAGS_EXEC; | 437 | *flags |= FLAGS_EXEC; |
436 | return 0; | 438 | return i+1; |
437 | case 'E': | 439 | case 'E': |
438 | *flags |= FLAGS_NOENV; | 440 | *flags |= FLAGS_NOENV; |
439 | break; | 441 | break; |
440 | default: return -1; /* invalid option */ | 442 | default: return -1; /* invalid option */ |
441 | } | 443 | } |
442 | } | 444 | } |
443 | return 0; | 445 | return i; |
444 | } | 446 | } |
445 | 447 | ||
446 | static int runargs(lua_State *L, char **argv, int n) | 448 | static int runargs(lua_State *L, char **argv, int argn) |
447 | { | 449 | { |
448 | int i; | 450 | int i; |
449 | for (i = 1; i < n; i++) { | 451 | for (i = 1; i < argn; i++) { |
450 | if (argv[i] == NULL) continue; | 452 | if (argv[i] == NULL) continue; |
451 | lua_assert(argv[i][0] == '-'); | 453 | lua_assert(argv[i][0] == '-'); |
452 | switch (argv[i][1]) { /* option */ | 454 | switch (argv[i][1]) { |
453 | case 'e': { | 455 | case 'e': { |
454 | const char *chunk = argv[i] + 2; | 456 | const char *chunk = argv[i] + 2; |
455 | if (*chunk == '\0') chunk = argv[++i]; | 457 | if (*chunk == '\0') chunk = argv[++i]; |
@@ -463,10 +465,10 @@ static int runargs(lua_State *L, char **argv, int n) | |||
463 | if (*filename == '\0') filename = argv[++i]; | 465 | if (*filename == '\0') filename = argv[++i]; |
464 | lua_assert(filename != NULL); | 466 | lua_assert(filename != NULL); |
465 | if (dolibrary(L, filename)) | 467 | if (dolibrary(L, filename)) |
466 | return 1; /* stop if file fails */ | 468 | return 1; |
467 | break; | 469 | break; |
468 | } | 470 | } |
469 | case 'j': { /* LuaJIT extension */ | 471 | case 'j': { /* LuaJIT extension. */ |
470 | const char *cmd = argv[i] + 2; | 472 | const char *cmd = argv[i] + 2; |
471 | if (*cmd == '\0') cmd = argv[++i]; | 473 | if (*cmd == '\0') cmd = argv[++i]; |
472 | lua_assert(cmd != NULL); | 474 | lua_assert(cmd != NULL); |
@@ -474,16 +476,16 @@ static int runargs(lua_State *L, char **argv, int n) | |||
474 | return 1; | 476 | return 1; |
475 | break; | 477 | break; |
476 | } | 478 | } |
477 | case 'O': /* LuaJIT extension */ | 479 | case 'O': /* LuaJIT extension. */ |
478 | if (dojitopt(L, argv[i] + 2)) | 480 | if (dojitopt(L, argv[i] + 2)) |
479 | return 1; | 481 | return 1; |
480 | break; | 482 | break; |
481 | case 'b': /* LuaJIT extension */ | 483 | case 'b': /* LuaJIT extension. */ |
482 | return dobytecode(L, argv+i); | 484 | return dobytecode(L, argv+i); |
483 | default: break; | 485 | default: break; |
484 | } | 486 | } |
485 | } | 487 | } |
486 | return 0; | 488 | return LUA_OK; |
487 | } | 489 | } |
488 | 490 | ||
489 | static int handle_luainit(lua_State *L) | 491 | static int handle_luainit(lua_State *L) |
@@ -494,7 +496,7 @@ static int handle_luainit(lua_State *L) | |||
494 | const char *init = getenv(LUA_INIT); | 496 | const char *init = getenv(LUA_INIT); |
495 | #endif | 497 | #endif |
496 | if (init == NULL) | 498 | if (init == NULL) |
497 | return 0; /* status OK */ | 499 | return LUA_OK; |
498 | else if (init[0] == '@') | 500 | else if (init[0] == '@') |
499 | return dofile(L, init+1); | 501 | return dofile(L, init+1); |
500 | else | 502 | else |
@@ -511,45 +513,57 @@ static int pmain(lua_State *L) | |||
511 | { | 513 | { |
512 | struct Smain *s = &smain; | 514 | struct Smain *s = &smain; |
513 | char **argv = s->argv; | 515 | char **argv = s->argv; |
514 | int script; | 516 | int argn; |
515 | int flags = 0; | 517 | int flags = 0; |
516 | globalL = L; | 518 | globalL = L; |
517 | if (argv[0] && argv[0][0]) progname = argv[0]; | 519 | if (argv[0] && argv[0][0]) progname = argv[0]; |
518 | LUAJIT_VERSION_SYM(); /* linker-enforced version check */ | 520 | |
519 | script = collectargs(argv, &flags); | 521 | LUAJIT_VERSION_SYM(); /* Linker-enforced version check. */ |
520 | if (script < 0) { /* invalid args? */ | 522 | |
523 | argn = collectargs(argv, &flags); | ||
524 | if (argn < 0) { /* Invalid args? */ | ||
521 | print_usage(); | 525 | print_usage(); |
522 | s->status = 1; | 526 | s->status = 1; |
523 | return 0; | 527 | return 0; |
524 | } | 528 | } |
529 | |||
525 | if ((flags & FLAGS_NOENV)) { | 530 | if ((flags & FLAGS_NOENV)) { |
526 | lua_pushboolean(L, 1); | 531 | lua_pushboolean(L, 1); |
527 | lua_setfield(L, LUA_REGISTRYINDEX, "LUA_NOENV"); | 532 | lua_setfield(L, LUA_REGISTRYINDEX, "LUA_NOENV"); |
528 | } | 533 | } |
529 | lua_gc(L, LUA_GCSTOP, 0); /* stop collector during initialization */ | 534 | |
530 | luaL_openlibs(L); /* open libraries */ | 535 | /* Stop collector during library initialization. */ |
536 | lua_gc(L, LUA_GCSTOP, 0); | ||
537 | luaL_openlibs(L); | ||
531 | lua_gc(L, LUA_GCRESTART, -1); | 538 | lua_gc(L, LUA_GCRESTART, -1); |
539 | |||
540 | createargtable(L, argv, s->argc, argn); | ||
541 | |||
532 | if (!(flags & FLAGS_NOENV)) { | 542 | if (!(flags & FLAGS_NOENV)) { |
533 | s->status = handle_luainit(L); | 543 | s->status = handle_luainit(L); |
534 | if (s->status != 0) return 0; | 544 | if (s->status != LUA_OK) return 0; |
535 | } | 545 | } |
546 | |||
536 | if ((flags & FLAGS_VERSION)) print_version(); | 547 | if ((flags & FLAGS_VERSION)) print_version(); |
537 | s->status = runargs(L, argv, (script > 0) ? script : s->argc); | 548 | |
538 | if (s->status != 0) return 0; | 549 | s->status = runargs(L, argv, argn); |
539 | if (script) { | 550 | if (s->status != LUA_OK) return 0; |
540 | s->status = handle_script(L, argv, script); | 551 | |
541 | if (s->status != 0) return 0; | 552 | if (s->argc > argn) { |
553 | s->status = handle_script(L, argv + argn); | ||
554 | if (s->status != LUA_OK) return 0; | ||
542 | } | 555 | } |
556 | |||
543 | if ((flags & FLAGS_INTERACTIVE)) { | 557 | if ((flags & FLAGS_INTERACTIVE)) { |
544 | print_jit_status(L); | 558 | print_jit_status(L); |
545 | dotty(L); | 559 | dotty(L); |
546 | } else if (script == 0 && !(flags & (FLAGS_EXEC|FLAGS_VERSION))) { | 560 | } else if (s->argc == argn && !(flags & (FLAGS_EXEC|FLAGS_VERSION))) { |
547 | if (lua_stdin_is_tty()) { | 561 | if (lua_stdin_is_tty()) { |
548 | print_version(); | 562 | print_version(); |
549 | print_jit_status(L); | 563 | print_jit_status(L); |
550 | dotty(L); | 564 | dotty(L); |
551 | } else { | 565 | } else { |
552 | dofile(L, NULL); /* executes stdin as a file */ | 566 | dofile(L, NULL); /* Executes stdin as a file. */ |
553 | } | 567 | } |
554 | } | 568 | } |
555 | return 0; | 569 | return 0; |
@@ -558,7 +572,7 @@ static int pmain(lua_State *L) | |||
558 | int main(int argc, char **argv) | 572 | int main(int argc, char **argv) |
559 | { | 573 | { |
560 | int status; | 574 | int status; |
561 | lua_State *L = lua_open(); /* create state */ | 575 | lua_State *L = lua_open(); |
562 | if (L == NULL) { | 576 | if (L == NULL) { |
563 | l_message(argv[0], "cannot create state: not enough memory"); | 577 | l_message(argv[0], "cannot create state: not enough memory"); |
564 | return EXIT_FAILURE; | 578 | return EXIT_FAILURE; |
@@ -568,6 +582,6 @@ int main(int argc, char **argv) | |||
568 | status = lua_cpcall(L, pmain, NULL); | 582 | status = lua_cpcall(L, pmain, NULL); |
569 | report(L, status); | 583 | report(L, status); |
570 | lua_close(L); | 584 | lua_close(L); |
571 | return (status || smain.status) ? EXIT_FAILURE : EXIT_SUCCESS; | 585 | return (status || smain.status > 0) ? EXIT_FAILURE : EXIT_SUCCESS; |
572 | } | 586 | } |
573 | 587 | ||
diff --git a/src/luajit.h b/src/luajit.h index 5f5b3887..600031a1 100644 --- a/src/luajit.h +++ b/src/luajit.h | |||
@@ -30,9 +30,9 @@ | |||
30 | 30 | ||
31 | #include "lua.h" | 31 | #include "lua.h" |
32 | 32 | ||
33 | #define LUAJIT_VERSION "LuaJIT 2.0.5" | 33 | #define LUAJIT_VERSION "LuaJIT 2.1.0-beta3" |
34 | #define LUAJIT_VERSION_NUM 20005 /* Version 2.0.5 = 02.00.05. */ | 34 | #define LUAJIT_VERSION_NUM 20100 /* Version 2.1.0 = 02.01.00. */ |
35 | #define LUAJIT_VERSION_SYM luaJIT_version_2_0_5 | 35 | #define LUAJIT_VERSION_SYM luaJIT_version_2_1_0_beta3 |
36 | #define LUAJIT_COPYRIGHT "Copyright (C) 2005-2020 Mike Pall" | 36 | #define LUAJIT_COPYRIGHT "Copyright (C) 2005-2020 Mike Pall" |
37 | #define LUAJIT_URL "http://luajit.org/" | 37 | #define LUAJIT_URL "http://luajit.org/" |
38 | 38 | ||
@@ -64,6 +64,15 @@ enum { | |||
64 | /* Control the JIT engine. */ | 64 | /* Control the JIT engine. */ |
65 | LUA_API int luaJIT_setmode(lua_State *L, int idx, int mode); | 65 | LUA_API int luaJIT_setmode(lua_State *L, int idx, int mode); |
66 | 66 | ||
67 | /* Low-overhead profiling API. */ | ||
68 | typedef void (*luaJIT_profile_callback)(void *data, lua_State *L, | ||
69 | int samples, int vmstate); | ||
70 | LUA_API void luaJIT_profile_start(lua_State *L, const char *mode, | ||
71 | luaJIT_profile_callback cb, void *data); | ||
72 | LUA_API void luaJIT_profile_stop(lua_State *L); | ||
73 | LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt, | ||
74 | int depth, size_t *len); | ||
75 | |||
67 | /* Enforce (dynamic) linker error for version mismatches. Call from main. */ | 76 | /* Enforce (dynamic) linker error for version mismatches. Call from main. */ |
68 | LUA_API void LUAJIT_VERSION_SYM(void); | 77 | LUA_API void LUAJIT_VERSION_SYM(void); |
69 | 78 | ||
diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat index 499b5f12..ae035dc6 100644 --- a/src/msvcbuild.bat +++ b/src/msvcbuild.bat | |||
@@ -5,6 +5,7 @@ | |||
5 | @rem Then cd to this directory and run this script. Use the following | 5 | @rem Then cd to this directory and run this script. Use the following |
6 | @rem options (in order), if needed. The default is a dynamic release build. | 6 | @rem options (in order), if needed. The default is a dynamic release build. |
7 | @rem | 7 | @rem |
8 | @rem nogc64 disable LJ_GC64 mode for x64 | ||
8 | @rem debug emit debug symbols | 9 | @rem debug emit debug symbols |
9 | @rem amalg amalgamated build | 10 | @rem amalg amalgamated build |
10 | @rem static static linkage | 11 | @rem static static linkage |
@@ -20,6 +21,7 @@ | |||
20 | @set LJLIB=lib /nologo /nodefaultlib | 21 | @set LJLIB=lib /nologo /nodefaultlib |
21 | @set DASMDIR=..\dynasm | 22 | @set DASMDIR=..\dynasm |
22 | @set DASM=%DASMDIR%\dynasm.lua | 23 | @set DASM=%DASMDIR%\dynasm.lua |
24 | @set DASC=vm_x64.dasc | ||
23 | @set LJDLLNAME=lua51.dll | 25 | @set LJDLLNAME=lua51.dll |
24 | @set LJLIBNAME=lua51.lib | 26 | @set LJLIBNAME=lua51.lib |
25 | @set BUILDTYPE=release | 27 | @set BUILDTYPE=release |
@@ -36,10 +38,17 @@ if exist minilua.exe.manifest^ | |||
36 | @set LJARCH=x64 | 38 | @set LJARCH=x64 |
37 | @minilua | 39 | @minilua |
38 | @if errorlevel 8 goto :X64 | 40 | @if errorlevel 8 goto :X64 |
41 | @set DASC=vm_x86.dasc | ||
39 | @set DASMFLAGS=-D WIN -D JIT -D FFI | 42 | @set DASMFLAGS=-D WIN -D JIT -D FFI |
40 | @set LJARCH=x86 | 43 | @set LJARCH=x86 |
44 | @set LJCOMPILE=%LJCOMPILE% /arch:SSE2 | ||
41 | :X64 | 45 | :X64 |
42 | minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc | 46 | @if "%1" neq "nogc64" goto :GC64 |
47 | @shift | ||
48 | @set DASC=vm_x86.dasc | ||
49 | @set LJCOMPILE=%LJCOMPILE% /DLUAJIT_DISABLE_GC64 | ||
50 | :GC64 | ||
51 | minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC% | ||
43 | @if errorlevel 1 goto :BAD | 52 | @if errorlevel 1 goto :BAD |
44 | 53 | ||
45 | %LJCOMPILE% /I "." /I %DASMDIR% host\buildvm*.c | 54 | %LJCOMPILE% /I "." /I %DASMDIR% host\buildvm*.c |
@@ -68,6 +77,7 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c | |||
68 | @shift | 77 | @shift |
69 | @set BUILDTYPE=debug | 78 | @set BUILDTYPE=debug |
70 | @set LJCOMPILE=%LJCOMPILE% /Zi %DEBUGCFLAGS% | 79 | @set LJCOMPILE=%LJCOMPILE% /Zi %DEBUGCFLAGS% |
80 | @set LJLINK=%LJLINK% /opt:ref /opt:icf /incremental:no | ||
71 | :NODEBUG | 81 | :NODEBUG |
72 | @set LJLINK=%LJLINK% /%BUILDTYPE% | 82 | @set LJLINK=%LJLINK% /%BUILDTYPE% |
73 | @if "%1"=="amalg" goto :AMALGDLL | 83 | @if "%1"=="amalg" goto :AMALGDLL |
diff --git a/src/ps4build.bat b/src/ps4build.bat index 337a44fa..e4a7defe 100644 --- a/src/ps4build.bat +++ b/src/ps4build.bat | |||
@@ -2,7 +2,19 @@ | |||
2 | @rem Donated to the public domain. | 2 | @rem Donated to the public domain. |
3 | @rem | 3 | @rem |
4 | @rem Open a "Visual Studio .NET Command Prompt" (64 bit host compiler) | 4 | @rem Open a "Visual Studio .NET Command Prompt" (64 bit host compiler) |
5 | @rem or "VS2015 x64 Native Tools Command Prompt". | ||
6 | @rem | ||
5 | @rem Then cd to this directory and run this script. | 7 | @rem Then cd to this directory and run this script. |
8 | @rem | ||
9 | @rem Recommended invocation: | ||
10 | @rem | ||
11 | @rem ps4build release build, amalgamated, 64-bit GC | ||
12 | @rem ps4build debug debug build, amalgamated, 64-bit GC | ||
13 | @rem | ||
14 | @rem Additional command-line options (not generally recommended): | ||
15 | @rem | ||
16 | @rem gc32 (before debug) 32-bit GC | ||
17 | @rem noamalg (after debug) non-amalgamated build | ||
6 | 18 | ||
7 | @if not defined INCLUDE goto :FAIL | 19 | @if not defined INCLUDE goto :FAIL |
8 | @if not defined SCE_ORBIS_SDK_DIR goto :FAIL | 20 | @if not defined SCE_ORBIS_SDK_DIR goto :FAIL |
@@ -15,6 +27,14 @@ | |||
15 | @set DASMDIR=..\dynasm | 27 | @set DASMDIR=..\dynasm |
16 | @set DASM=%DASMDIR%\dynasm.lua | 28 | @set DASM=%DASMDIR%\dynasm.lua |
17 | @set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c | 29 | @set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c |
30 | @set GC64=-DLUAJIT_ENABLE_GC64 | ||
31 | @set DASC=vm_x64.dasc | ||
32 | |||
33 | @if "%1" neq "gc32" goto :NOGC32 | ||
34 | @shift | ||
35 | @set GC64= | ||
36 | @set DASC=vm_x86.dasc | ||
37 | :NOGC32 | ||
18 | 38 | ||
19 | %LJCOMPILE% host\minilua.c | 39 | %LJCOMPILE% host\minilua.c |
20 | @if errorlevel 1 goto :BAD | 40 | @if errorlevel 1 goto :BAD |
@@ -28,10 +48,10 @@ if exist minilua.exe.manifest^ | |||
28 | @if not errorlevel 8 goto :FAIL | 48 | @if not errorlevel 8 goto :FAIL |
29 | 49 | ||
30 | @set DASMFLAGS=-D P64 -D NO_UNWIND | 50 | @set DASMFLAGS=-D P64 -D NO_UNWIND |
31 | minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc | 51 | minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC% |
32 | @if errorlevel 1 goto :BAD | 52 | @if errorlevel 1 goto :BAD |
33 | 53 | ||
34 | %LJCOMPILE% /I "." /I %DASMDIR% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_NO_UNWIND host\buildvm*.c | 54 | %LJCOMPILE% /I "." /I %DASMDIR% %GC64% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_NO_UNWIND host\buildvm*.c |
35 | @if errorlevel 1 goto :BAD | 55 | @if errorlevel 1 goto :BAD |
36 | %LJLINK% /out:buildvm.exe buildvm*.obj | 56 | %LJLINK% /out:buildvm.exe buildvm*.obj |
37 | @if errorlevel 1 goto :BAD | 57 | @if errorlevel 1 goto :BAD |
@@ -54,7 +74,7 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c | |||
54 | @if errorlevel 1 goto :BAD | 74 | @if errorlevel 1 goto :BAD |
55 | 75 | ||
56 | @rem ---- Cross compiler ---- | 76 | @rem ---- Cross compiler ---- |
57 | @set LJCOMPILE="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-clang" -c -Wall -DLUAJIT_DISABLE_FFI | 77 | @set LJCOMPILE="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-clang" -c -Wall -DLUAJIT_DISABLE_FFI %GC64% |
58 | @set LJLIB="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-ar" rcus | 78 | @set LJLIB="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-ar" rcus |
59 | @set INCLUDE="" | 79 | @set INCLUDE="" |
60 | 80 | ||
@@ -63,14 +83,14 @@ orbis-as -o lj_vm.o lj_vm.s | |||
63 | @if "%1" neq "debug" goto :NODEBUG | 83 | @if "%1" neq "debug" goto :NODEBUG |
64 | @shift | 84 | @shift |
65 | @set LJCOMPILE=%LJCOMPILE% -g -O0 | 85 | @set LJCOMPILE=%LJCOMPILE% -g -O0 |
66 | @set TARGETLIB=libluajitD.a | 86 | @set TARGETLIB=libluajitD_ps4.a |
67 | goto :BUILD | 87 | goto :BUILD |
68 | :NODEBUG | 88 | :NODEBUG |
69 | @set LJCOMPILE=%LJCOMPILE% -O2 | 89 | @set LJCOMPILE=%LJCOMPILE% -O2 |
70 | @set TARGETLIB=libluajit.a | 90 | @set TARGETLIB=libluajit_ps4.a |
71 | :BUILD | 91 | :BUILD |
72 | del %TARGETLIB% | 92 | del %TARGETLIB% |
73 | @if "%1"=="amalg" goto :AMALG | 93 | @if "%1" neq "noamalg" goto :AMALG |
74 | for %%f in (lj_*.c lib_*.c) do ( | 94 | for %%f in (lj_*.c lib_*.c) do ( |
75 | %LJCOMPILE% %%f | 95 | %LJCOMPILE% %%f |
76 | @if errorlevel 1 goto :BAD | 96 | @if errorlevel 1 goto :BAD |
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc index c5e0498e..edefac32 100644 --- a/src/vm_arm.dasc +++ b/src/vm_arm.dasc | |||
@@ -99,6 +99,7 @@ | |||
99 | |.type NODE, Node | 99 | |.type NODE, Node |
100 | |.type NARGS8, int | 100 | |.type NARGS8, int |
101 | |.type TRACE, GCtrace | 101 | |.type TRACE, GCtrace |
102 | |.type SBUF, SBuf | ||
102 | | | 103 | | |
103 | |//----------------------------------------------------------------------- | 104 | |//----------------------------------------------------------------------- |
104 | | | 105 | | |
@@ -372,6 +373,17 @@ static void build_subroutines(BuildCtx *ctx) | |||
372 | | st_vmstate CARG2 | 373 | | st_vmstate CARG2 |
373 | | b ->vm_returnc | 374 | | b ->vm_returnc |
374 | | | 375 | | |
376 | |->vm_unwind_ext: // Complete external unwind. | ||
377 | #if !LJ_NO_UNWIND | ||
378 | | push {r0, r1, r2, lr} | ||
379 | | bl extern _Unwind_Complete | ||
380 | | ldr r0, [sp] | ||
381 | | bl extern _Unwind_DeleteException | ||
382 | | pop {r0, r1, r2, lr} | ||
383 | | mov r0, r1 | ||
384 | | bx r2 | ||
385 | #endif | ||
386 | | | ||
375 | |//----------------------------------------------------------------------- | 387 | |//----------------------------------------------------------------------- |
376 | |//-- Grow stack for calls ----------------------------------------------- | 388 | |//-- Grow stack for calls ----------------------------------------------- |
377 | |//----------------------------------------------------------------------- | 389 | |//----------------------------------------------------------------------- |
@@ -418,13 +430,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
418 | | add CARG2, sp, #CFRAME_RESUME | 430 | | add CARG2, sp, #CFRAME_RESUME |
419 | | ldrb CARG1, L->status | 431 | | ldrb CARG1, L->status |
420 | | str CARG3, SAVE_ERRF | 432 | | str CARG3, SAVE_ERRF |
421 | | str CARG2, L->cframe | 433 | | str L, SAVE_PC // Any value outside of bytecode is ok. |
422 | | str CARG3, SAVE_CFRAME | 434 | | str CARG3, SAVE_CFRAME |
423 | | cmp CARG1, #0 | 435 | | cmp CARG1, #0 |
424 | | str L, SAVE_PC // Any value outside of bytecode is ok. | 436 | | str CARG2, L->cframe |
425 | | beq >3 | 437 | | beq >3 |
426 | | | 438 | | |
427 | | // Resume after yield (like a return). | 439 | | // Resume after yield (like a return). |
440 | | str L, [DISPATCH, #DISPATCH_GL(cur_L)] | ||
428 | | mov RA, BASE | 441 | | mov RA, BASE |
429 | | ldr BASE, L->base | 442 | | ldr BASE, L->base |
430 | | ldr CARG1, L->top | 443 | | ldr CARG1, L->top |
@@ -458,14 +471,15 @@ static void build_subroutines(BuildCtx *ctx) | |||
458 | | str CARG3, SAVE_NRES | 471 | | str CARG3, SAVE_NRES |
459 | | mov L, CARG1 | 472 | | mov L, CARG1 |
460 | | str CARG1, SAVE_L | 473 | | str CARG1, SAVE_L |
461 | | mov BASE, CARG2 | ||
462 | | str sp, L->cframe // Add our C frame to cframe chain. | ||
463 | | ldr DISPATCH, L->glref // Setup pointer to dispatch table. | 474 | | ldr DISPATCH, L->glref // Setup pointer to dispatch table. |
475 | | mov BASE, CARG2 | ||
464 | | str CARG1, SAVE_PC // Any value outside of bytecode is ok. | 476 | | str CARG1, SAVE_PC // Any value outside of bytecode is ok. |
465 | | str RC, SAVE_CFRAME | 477 | | str RC, SAVE_CFRAME |
466 | | add DISPATCH, DISPATCH, #GG_G2DISP | 478 | | add DISPATCH, DISPATCH, #GG_G2DISP |
479 | | str sp, L->cframe // Add our C frame to cframe chain. | ||
467 | | | 480 | | |
468 | |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). | 481 | |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). |
482 | | str L, [DISPATCH, #DISPATCH_GL(cur_L)] | ||
469 | | ldr RB, L->base // RB = old base (for vmeta_call). | 483 | | ldr RB, L->base // RB = old base (for vmeta_call). |
470 | | ldr CARG1, L->top | 484 | | ldr CARG1, L->top |
471 | | mov MASKR8, #255 | 485 | | mov MASKR8, #255 |
@@ -491,20 +505,21 @@ static void build_subroutines(BuildCtx *ctx) | |||
491 | | mov L, CARG1 | 505 | | mov L, CARG1 |
492 | | ldr RA, L:CARG1->stack | 506 | | ldr RA, L:CARG1->stack |
493 | | str CARG1, SAVE_L | 507 | | str CARG1, SAVE_L |
508 | | ldr DISPATCH, L->glref // Setup pointer to dispatch table. | ||
494 | | ldr RB, L->top | 509 | | ldr RB, L->top |
495 | | str CARG1, SAVE_PC // Any value outside of bytecode is ok. | 510 | | str CARG1, SAVE_PC // Any value outside of bytecode is ok. |
496 | | ldr RC, L->cframe | 511 | | ldr RC, L->cframe |
512 | | add DISPATCH, DISPATCH, #GG_G2DISP | ||
497 | | sub RA, RA, RB // Compute -savestack(L, L->top). | 513 | | sub RA, RA, RB // Compute -savestack(L, L->top). |
498 | | str sp, L->cframe // Add our C frame to cframe chain. | ||
499 | | mov RB, #0 | 514 | | mov RB, #0 |
500 | | str RA, SAVE_NRES // Neg. delta means cframe w/o frame. | 515 | | str RA, SAVE_NRES // Neg. delta means cframe w/o frame. |
501 | | str RB, SAVE_ERRF // No error function. | 516 | | str RB, SAVE_ERRF // No error function. |
502 | | str RC, SAVE_CFRAME | 517 | | str RC, SAVE_CFRAME |
518 | | str sp, L->cframe // Add our C frame to cframe chain. | ||
519 | | str L, [DISPATCH, #DISPATCH_GL(cur_L)] | ||
503 | | blx CARG4 // (lua_State *L, lua_CFunction func, void *ud) | 520 | | blx CARG4 // (lua_State *L, lua_CFunction func, void *ud) |
504 | | ldr DISPATCH, L->glref // Setup pointer to dispatch table. | ||
505 | | movs BASE, CRET1 | 521 | | movs BASE, CRET1 |
506 | | mov PC, #FRAME_CP | 522 | | mov PC, #FRAME_CP |
507 | | add DISPATCH, DISPATCH, #GG_G2DISP | ||
508 | | bne <3 // Else continue with the call. | 523 | | bne <3 // Else continue with the call. |
509 | | b ->vm_leave_cp // No base? Just remove C frame. | 524 | | b ->vm_leave_cp // No base? Just remove C frame. |
510 | | | 525 | | |
@@ -614,6 +629,16 @@ static void build_subroutines(BuildCtx *ctx) | |||
614 | | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. | 629 | | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. |
615 | | b ->vm_call_dispatch_f | 630 | | b ->vm_call_dispatch_f |
616 | | | 631 | | |
632 | |->vmeta_tgetr: | ||
633 | | .IOS mov RC, BASE | ||
634 | | bl extern lj_tab_getinth // (GCtab *t, int32_t key) | ||
635 | | // Returns cTValue * or NULL. | ||
636 | | .IOS mov BASE, RC | ||
637 | | cmp CRET1, #0 | ||
638 | | ldrdne CARG12, [CRET1] | ||
639 | | mvneq CARG2, #~LJ_TNIL | ||
640 | | b ->BC_TGETR_Z | ||
641 | | | ||
617 | |//----------------------------------------------------------------------- | 642 | |//----------------------------------------------------------------------- |
618 | | | 643 | | |
619 | |->vmeta_tsets1: | 644 | |->vmeta_tsets1: |
@@ -671,6 +696,15 @@ static void build_subroutines(BuildCtx *ctx) | |||
671 | | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. | 696 | | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. |
672 | | b ->vm_call_dispatch_f | 697 | | b ->vm_call_dispatch_f |
673 | | | 698 | | |
699 | |->vmeta_tsetr: | ||
700 | | str BASE, L->base | ||
701 | | .IOS mov RC, BASE | ||
702 | | str PC, SAVE_PC | ||
703 | | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) | ||
704 | | // Returns TValue *. | ||
705 | | .IOS mov BASE, RC | ||
706 | | b ->BC_TSETR_Z | ||
707 | | | ||
674 | |//-- Comparison metamethods --------------------------------------------- | 708 | |//-- Comparison metamethods --------------------------------------------- |
675 | | | 709 | | |
676 | |->vmeta_comp: | 710 | |->vmeta_comp: |
@@ -735,6 +769,17 @@ static void build_subroutines(BuildCtx *ctx) | |||
735 | | b <3 | 769 | | b <3 |
736 | |.endif | 770 | |.endif |
737 | | | 771 | | |
772 | |->vmeta_istype: | ||
773 | | sub PC, PC, #4 | ||
774 | | str BASE, L->base | ||
775 | | mov CARG1, L | ||
776 | | lsr CARG2, RA, #3 | ||
777 | | mov CARG3, RC | ||
778 | | str PC, SAVE_PC | ||
779 | | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) | ||
780 | | .IOS ldr BASE, L->base | ||
781 | | b ->cont_nop | ||
782 | | | ||
738 | |//-- Arithmetic metamethods --------------------------------------------- | 783 | |//-- Arithmetic metamethods --------------------------------------------- |
739 | | | 784 | | |
740 | |->vmeta_arith_vn: | 785 | |->vmeta_arith_vn: |
@@ -1052,7 +1097,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
1052 | | ffgccheck | 1097 | | ffgccheck |
1053 | | mov CARG1, L | 1098 | | mov CARG1, L |
1054 | | mov CARG2, BASE | 1099 | | mov CARG2, BASE |
1055 | | bl extern lj_str_fromnumber // (lua_State *L, cTValue *o) | 1100 | | bl extern lj_strfmt_number // (lua_State *L, cTValue *o) |
1056 | | // Returns GCstr *. | 1101 | | // Returns GCstr *. |
1057 | | ldr BASE, L->base | 1102 | | ldr BASE, L->base |
1058 | | mvn CARG2, #~LJ_TSTR | 1103 | | mvn CARG2, #~LJ_TSTR |
@@ -1230,9 +1275,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
1230 | | ldr CARG3, L:RA->base | 1275 | | ldr CARG3, L:RA->base |
1231 | | mv_vmstate CARG2, INTERP | 1276 | | mv_vmstate CARG2, INTERP |
1232 | | ldr CARG4, L:RA->top | 1277 | | ldr CARG4, L:RA->top |
1233 | | st_vmstate CARG2 | ||
1234 | | cmp CRET1, #LUA_YIELD | 1278 | | cmp CRET1, #LUA_YIELD |
1235 | | ldr BASE, L->base | 1279 | | ldr BASE, L->base |
1280 | | str L, [DISPATCH, #DISPATCH_GL(cur_L)] | ||
1281 | | st_vmstate CARG2 | ||
1236 | | bhi >8 | 1282 | | bhi >8 |
1237 | | subs RC, CARG4, CARG3 | 1283 | | subs RC, CARG4, CARG3 |
1238 | | ldr CARG1, L->maxstack | 1284 | | ldr CARG1, L->maxstack |
@@ -1500,19 +1546,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
1500 | | math_extern2 atan2 | 1546 | | math_extern2 atan2 |
1501 | | math_extern2 fmod | 1547 | | math_extern2 fmod |
1502 | | | 1548 | | |
1503 | |->ff_math_deg: | ||
1504 | |.if FPU | ||
1505 | | .ffunc_d math_rad | ||
1506 | | vldr d1, CFUNC:CARG3->upvalue[0] | ||
1507 | | vmul.f64 d0, d0, d1 | ||
1508 | | b ->fff_resd | ||
1509 | |.else | ||
1510 | | .ffunc_n math_rad | ||
1511 | | ldrd CARG34, CFUNC:CARG3->upvalue[0] | ||
1512 | | bl extern __aeabi_dmul | ||
1513 | | b ->fff_restv | ||
1514 | |.endif | ||
1515 | | | ||
1516 | |.if HFABI | 1549 | |.if HFABI |
1517 | | .ffunc math_ldexp | 1550 | | .ffunc math_ldexp |
1518 | | ldr CARG4, [BASE, #4] | 1551 | | ldr CARG4, [BASE, #4] |
@@ -1687,12 +1720,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
1687 | | | 1720 | | |
1688 | |//-- String library ----------------------------------------------------- | 1721 | |//-- String library ----------------------------------------------------- |
1689 | | | 1722 | | |
1690 | |.ffunc_1 string_len | ||
1691 | | checkstr CARG2, ->fff_fallback | ||
1692 | | ldr CARG1, STR:CARG1->len | ||
1693 | | mvn CARG2, #~LJ_TISNUM | ||
1694 | | b ->fff_restv | ||
1695 | | | ||
1696 | |.ffunc string_byte // Only handle the 1-arg case here. | 1723 | |.ffunc string_byte // Only handle the 1-arg case here. |
1697 | | ldrd CARG12, [BASE] | 1724 | | ldrd CARG12, [BASE] |
1698 | | ldr PC, [BASE, FRAME_PC] | 1725 | | ldr PC, [BASE, FRAME_PC] |
@@ -1725,6 +1752,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
1725 | | mov CARG1, L | 1752 | | mov CARG1, L |
1726 | | str PC, SAVE_PC | 1753 | | str PC, SAVE_PC |
1727 | | bl extern lj_str_new // (lua_State *L, char *str, size_t l) | 1754 | | bl extern lj_str_new // (lua_State *L, char *str, size_t l) |
1755 | |->fff_resstr: | ||
1728 | | // Returns GCstr *. | 1756 | | // Returns GCstr *. |
1729 | | ldr BASE, L->base | 1757 | | ldr BASE, L->base |
1730 | | mvn CARG2, #~LJ_TSTR | 1758 | | mvn CARG2, #~LJ_TSTR |
@@ -1768,91 +1796,28 @@ static void build_subroutines(BuildCtx *ctx) | |||
1768 | | mvn CARG2, #~LJ_TSTR | 1796 | | mvn CARG2, #~LJ_TSTR |
1769 | | b ->fff_restv | 1797 | | b ->fff_restv |
1770 | | | 1798 | | |
1771 | |.ffunc string_rep // Only handle the 1-char case inline. | 1799 | |.macro ffstring_op, name |
1772 | | ffgccheck | 1800 | | .ffunc string_ .. name |
1773 | | ldrd CARG12, [BASE] | ||
1774 | | ldrd CARG34, [BASE, #8] | ||
1775 | | cmp NARGS8:RC, #16 | ||
1776 | | bne ->fff_fallback // Exactly 2 arguments | ||
1777 | | checktp CARG2, LJ_TSTR | ||
1778 | | checktpeq CARG4, LJ_TISNUM | ||
1779 | | bne ->fff_fallback | ||
1780 | | subs CARG4, CARG3, #1 | ||
1781 | | ldr CARG2, STR:CARG1->len | ||
1782 | | blt ->fff_emptystr // Count <= 0? | ||
1783 | | cmp CARG2, #1 | ||
1784 | | blo ->fff_emptystr // Zero-length string? | ||
1785 | | bne ->fff_fallback // Fallback for > 1-char strings. | ||
1786 | | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)] | ||
1787 | | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)] | ||
1788 | | ldr CARG1, STR:CARG1[1] | ||
1789 | | cmp RB, CARG3 | ||
1790 | | blo ->fff_fallback | ||
1791 | |1: // Fill buffer with char. | ||
1792 | | strb CARG1, [CARG2, CARG4] | ||
1793 | | subs CARG4, CARG4, #1 | ||
1794 | | bge <1 | ||
1795 | | b ->fff_newstr | ||
1796 | | | ||
1797 | |.ffunc string_reverse | ||
1798 | | ffgccheck | 1801 | | ffgccheck |
1799 | | ldrd CARG12, [BASE] | 1802 | | ldr CARG3, [BASE, #4] |
1800 | | cmp NARGS8:RC, #8 | 1803 | | cmp NARGS8:RC, #8 |
1804 | | ldr STR:CARG2, [BASE] | ||
1801 | | blo ->fff_fallback | 1805 | | blo ->fff_fallback |
1802 | | checkstr CARG2, ->fff_fallback | 1806 | | sub SBUF:CARG1, DISPATCH, #-DISPATCH_GL(tmpbuf) |
1803 | | ldr CARG3, STR:CARG1->len | 1807 | | checkstr CARG3, ->fff_fallback |
1804 | | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)] | 1808 | | ldr CARG4, SBUF:CARG1->b |
1805 | | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)] | 1809 | | str BASE, L->base |
1806 | | mov CARG4, CARG3 | 1810 | | str PC, SAVE_PC |
1807 | | add CARG1, STR:CARG1, #sizeof(GCstr) | 1811 | | str L, SBUF:CARG1->L |
1808 | | cmp RB, CARG3 | 1812 | | str CARG4, SBUF:CARG1->p |
1809 | | blo ->fff_fallback | 1813 | | bl extern lj_buf_putstr_ .. name |
1810 | |1: // Reverse string copy. | 1814 | | bl extern lj_buf_tostr |
1811 | | ldrb RB, [CARG1], #1 | 1815 | | b ->fff_resstr |
1812 | | subs CARG4, CARG4, #1 | ||
1813 | | blt ->fff_newstr | ||
1814 | | strb RB, [CARG2, CARG4] | ||
1815 | | b <1 | ||
1816 | | | ||
1817 | |.macro ffstring_case, name, lo | ||
1818 | | .ffunc name | ||
1819 | | ffgccheck | ||
1820 | | ldrd CARG12, [BASE] | ||
1821 | | cmp NARGS8:RC, #8 | ||
1822 | | blo ->fff_fallback | ||
1823 | | checkstr CARG2, ->fff_fallback | ||
1824 | | ldr CARG3, STR:CARG1->len | ||
1825 | | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)] | ||
1826 | | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)] | ||
1827 | | mov CARG4, #0 | ||
1828 | | add CARG1, STR:CARG1, #sizeof(GCstr) | ||
1829 | | cmp RB, CARG3 | ||
1830 | | blo ->fff_fallback | ||
1831 | |1: // ASCII case conversion. | ||
1832 | | ldrb RB, [CARG1, CARG4] | ||
1833 | | cmp CARG4, CARG3 | ||
1834 | | bhs ->fff_newstr | ||
1835 | | sub RC, RB, #lo | ||
1836 | | cmp RC, #26 | ||
1837 | | eorlo RB, RB, #0x20 | ||
1838 | | strb RB, [CARG2, CARG4] | ||
1839 | | add CARG4, CARG4, #1 | ||
1840 | | b <1 | ||
1841 | |.endmacro | 1816 | |.endmacro |
1842 | | | 1817 | | |
1843 | |ffstring_case string_lower, 65 | 1818 | |ffstring_op reverse |
1844 | |ffstring_case string_upper, 97 | 1819 | |ffstring_op lower |
1845 | | | 1820 | |ffstring_op upper |
1846 | |//-- Table library ------------------------------------------------------ | ||
1847 | | | ||
1848 | |.ffunc_1 table_getn | ||
1849 | | checktab CARG2, ->fff_fallback | ||
1850 | | .IOS mov RA, BASE | ||
1851 | | bl extern lj_tab_len // (GCtab *t) | ||
1852 | | // Returns uint32_t (but less than 2^31). | ||
1853 | | .IOS mov BASE, RA | ||
1854 | | mvn CARG2, #~LJ_TISNUM | ||
1855 | | b ->fff_restv | ||
1856 | | | 1821 | | |
1857 | |//-- Bit library -------------------------------------------------------- | 1822 | |//-- Bit library -------------------------------------------------------- |
1858 | | | 1823 | | |
@@ -2127,6 +2092,66 @@ static void build_subroutines(BuildCtx *ctx) | |||
2127 | | ldr INS, [PC, #-4] | 2092 | | ldr INS, [PC, #-4] |
2128 | | bx CRET1 | 2093 | | bx CRET1 |
2129 | | | 2094 | | |
2095 | |->cont_stitch: // Trace stitching. | ||
2096 | |.if JIT | ||
2097 | | // RA = resultptr, CARG4 = meta base | ||
2098 | | ldr RB, SAVE_MULTRES | ||
2099 | | ldr INS, [PC, #-4] | ||
2100 | | ldr TRACE:CARG3, [CARG4, #-24] // Save previous trace. | ||
2101 | | subs RB, RB, #8 | ||
2102 | | decode_RA8 RC, INS // Call base. | ||
2103 | | beq >2 | ||
2104 | |1: // Move results down. | ||
2105 | | ldrd CARG12, [RA] | ||
2106 | | add RA, RA, #8 | ||
2107 | | subs RB, RB, #8 | ||
2108 | | strd CARG12, [BASE, RC] | ||
2109 | | add RC, RC, #8 | ||
2110 | | bne <1 | ||
2111 | |2: | ||
2112 | | decode_RA8 RA, INS | ||
2113 | | decode_RB8 RB, INS | ||
2114 | | add RA, RA, RB | ||
2115 | |3: | ||
2116 | | cmp RA, RC | ||
2117 | | mvn CARG2, #~LJ_TNIL | ||
2118 | | bhi >9 // More results wanted? | ||
2119 | | | ||
2120 | | ldrh RA, TRACE:CARG3->traceno | ||
2121 | | ldrh RC, TRACE:CARG3->link | ||
2122 | | cmp RC, RA | ||
2123 | | beq ->cont_nop // Blacklisted. | ||
2124 | | cmp RC, #0 | ||
2125 | | bne =>BC_JLOOP // Jump to stitched trace. | ||
2126 | | | ||
2127 | | // Stitch a new trace to the previous trace. | ||
2128 | | str RA, [DISPATCH, #DISPATCH_J(exitno)] | ||
2129 | | str L, [DISPATCH, #DISPATCH_J(L)] | ||
2130 | | str BASE, L->base | ||
2131 | | sub CARG1, DISPATCH, #-GG_DISP2J | ||
2132 | | mov CARG2, PC | ||
2133 | | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) | ||
2134 | | ldr BASE, L->base | ||
2135 | | b ->cont_nop | ||
2136 | | | ||
2137 | |9: // Fill up results with nil. | ||
2138 | | strd CARG12, [BASE, RC] | ||
2139 | | add RC, RC, #8 | ||
2140 | | b <3 | ||
2141 | |.endif | ||
2142 | | | ||
2143 | |->vm_profhook: // Dispatch target for profiler hook. | ||
2144 | #if LJ_HASPROFILE | ||
2145 | | mov CARG1, L | ||
2146 | | str BASE, L->base | ||
2147 | | mov CARG2, PC | ||
2148 | | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) | ||
2149 | | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. | ||
2150 | | ldr BASE, L->base | ||
2151 | | sub PC, PC, #4 | ||
2152 | | b ->cont_nop | ||
2153 | #endif | ||
2154 | | | ||
2130 | |//----------------------------------------------------------------------- | 2155 | |//----------------------------------------------------------------------- |
2131 | |//-- Trace exit handler ------------------------------------------------- | 2156 | |//-- Trace exit handler ------------------------------------------------- |
2132 | |//----------------------------------------------------------------------- | 2157 | |//----------------------------------------------------------------------- |
@@ -2151,14 +2176,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
2151 | | add CARG1, CARG1, CARG2, asr #6 | 2176 | | add CARG1, CARG1, CARG2, asr #6 |
2152 | | ldr CARG2, [lr, #4] // Load exit stub group offset. | 2177 | | ldr CARG2, [lr, #4] // Load exit stub group offset. |
2153 | | sub CARG1, CARG1, lr | 2178 | | sub CARG1, CARG1, lr |
2154 | | ldr L, [DISPATCH, #DISPATCH_GL(jit_L)] | 2179 | | ldr L, [DISPATCH, #DISPATCH_GL(cur_L)] |
2155 | | add CARG1, CARG2, CARG1, lsr #2 // Compute exit number. | 2180 | | add CARG1, CARG2, CARG1, lsr #2 // Compute exit number. |
2156 | | ldr BASE, [DISPATCH, #DISPATCH_GL(jit_base)] | 2181 | | ldr BASE, [DISPATCH, #DISPATCH_GL(jit_base)] |
2157 | | str CARG1, [DISPATCH, #DISPATCH_J(exitno)] | 2182 | | str CARG1, [DISPATCH, #DISPATCH_J(exitno)] |
2158 | | mov CARG4, #0 | 2183 | | mov CARG4, #0 |
2159 | | str L, [DISPATCH, #DISPATCH_J(L)] | ||
2160 | | str BASE, L->base | 2184 | | str BASE, L->base |
2161 | | str CARG4, [DISPATCH, #DISPATCH_GL(jit_L)] | 2185 | | str L, [DISPATCH, #DISPATCH_J(L)] |
2186 | | str CARG4, [DISPATCH, #DISPATCH_GL(jit_base)] | ||
2162 | | sub CARG1, DISPATCH, #-GG_DISP2J | 2187 | | sub CARG1, DISPATCH, #-GG_DISP2J |
2163 | | mov CARG2, sp | 2188 | | mov CARG2, sp |
2164 | | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) | 2189 | | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) |
@@ -2177,13 +2202,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
2177 | | ldr L, SAVE_L | 2202 | | ldr L, SAVE_L |
2178 | |1: | 2203 | |1: |
2179 | | cmp CARG1, #0 | 2204 | | cmp CARG1, #0 |
2180 | | blt >3 // Check for error from exit. | 2205 | | blt >9 // Check for error from exit. |
2181 | | lsl RC, CARG1, #3 | 2206 | | lsl RC, CARG1, #3 |
2182 | | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] | 2207 | | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] |
2183 | | str RC, SAVE_MULTRES | 2208 | | str RC, SAVE_MULTRES |
2184 | | mov CARG3, #0 | 2209 | | mov CARG3, #0 |
2210 | | str BASE, L->base | ||
2185 | | ldr CARG2, LFUNC:CARG2->field_pc | 2211 | | ldr CARG2, LFUNC:CARG2->field_pc |
2186 | | str CARG3, [DISPATCH, #DISPATCH_GL(jit_L)] | 2212 | | str CARG3, [DISPATCH, #DISPATCH_GL(jit_base)] |
2187 | | mv_vmstate CARG4, INTERP | 2213 | | mv_vmstate CARG4, INTERP |
2188 | | ldr KBASE, [CARG2, #PC2PROTO(k)] | 2214 | | ldr KBASE, [CARG2, #PC2PROTO(k)] |
2189 | | // Modified copy of ins_next which handles function header dispatch, too. | 2215 | | // Modified copy of ins_next which handles function header dispatch, too. |
@@ -2192,15 +2218,32 @@ static void build_subroutines(BuildCtx *ctx) | |||
2192 | | ldr INS, [PC], #4 | 2218 | | ldr INS, [PC], #4 |
2193 | | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. | 2219 | | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. |
2194 | | st_vmstate CARG4 | 2220 | | st_vmstate CARG4 |
2221 | | cmp OP, #BC_FUNCC+2 // Fast function? | ||
2222 | | bhs >4 | ||
2223 | |2: | ||
2195 | | cmp OP, #BC_FUNCF // Function header? | 2224 | | cmp OP, #BC_FUNCF // Function header? |
2196 | | ldr OP, [DISPATCH, OP, lsl #2] | 2225 | | ldr OP, [DISPATCH, OP, lsl #2] |
2197 | | decode_RA8 RA, INS | 2226 | | decode_RA8 RA, INS |
2198 | | lsrlo RC, INS, #16 // No: Decode operands A*8 and D. | 2227 | | lsrlo RC, INS, #16 // No: Decode operands A*8 and D. |
2199 | | subhs RC, RC, #8 | 2228 | | subhs RC, RC, #8 |
2200 | | addhs RA, RA, BASE // Yes: RA = BASE+framesize*8, RC = nargs*8 | 2229 | | addhs RA, RA, BASE // Yes: RA = BASE+framesize*8, RC = nargs*8 |
2230 | | ldrhs CARG3, [BASE, FRAME_FUNC] | ||
2201 | | bx OP | 2231 | | bx OP |
2202 | | | 2232 | | |
2203 | |3: // Rethrow error from the right C frame. | 2233 | |4: // Check frame below fast function. |
2234 | | ldr CARG1, [BASE, FRAME_PC] | ||
2235 | | ands CARG2, CARG1, #FRAME_TYPE | ||
2236 | | bne <2 // Trace stitching continuation? | ||
2237 | | // Otherwise set KBASE for Lua function below fast function. | ||
2238 | | ldr CARG3, [CARG1, #-4] | ||
2239 | | decode_RA8 CARG1, CARG3 | ||
2240 | | sub CARG2, BASE, CARG1 | ||
2241 | | ldr LFUNC:CARG3, [CARG2, #-16] | ||
2242 | | ldr CARG3, LFUNC:CARG3->field_pc | ||
2243 | | ldr KBASE, [CARG3, #PC2PROTO(k)] | ||
2244 | | b <2 | ||
2245 | | | ||
2246 | |9: // Rethrow error from the right C frame. | ||
2204 | | rsb CARG2, CARG1, #0 | 2247 | | rsb CARG2, CARG1, #0 |
2205 | | mov CARG1, L | 2248 | | mov CARG1, L |
2206 | | bl extern lj_err_throw // (lua_State *L, int errcode) | 2249 | | bl extern lj_err_throw // (lua_State *L, int errcode) |
@@ -2833,6 +2876,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2833 | | ins_next | 2876 | | ins_next |
2834 | break; | 2877 | break; |
2835 | 2878 | ||
2879 | case BC_ISTYPE: | ||
2880 | | // RA = src*8, RC = -type | ||
2881 | | ldrd CARG12, [BASE, RA] | ||
2882 | | ins_next1 | ||
2883 | | cmn CARG2, RC | ||
2884 | | ins_next2 | ||
2885 | | bne ->vmeta_istype | ||
2886 | | ins_next3 | ||
2887 | break; | ||
2888 | case BC_ISNUM: | ||
2889 | | // RA = src*8, RC = -(TISNUM-1) | ||
2890 | | ldrd CARG12, [BASE, RA] | ||
2891 | | ins_next1 | ||
2892 | | checktp CARG2, LJ_TISNUM | ||
2893 | | ins_next2 | ||
2894 | | bhs ->vmeta_istype | ||
2895 | | ins_next3 | ||
2896 | break; | ||
2897 | |||
2836 | /* -- Unary ops --------------------------------------------------------- */ | 2898 | /* -- Unary ops --------------------------------------------------------- */ |
2837 | 2899 | ||
2838 | case BC_MOV: | 2900 | case BC_MOV: |
@@ -3503,6 +3565,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3503 | | bne <1 // 'no __index' flag set: done. | 3565 | | bne <1 // 'no __index' flag set: done. |
3504 | | b ->vmeta_tgetb | 3566 | | b ->vmeta_tgetb |
3505 | break; | 3567 | break; |
3568 | case BC_TGETR: | ||
3569 | | decode_RB8 RB, INS | ||
3570 | | decode_RC8 RC, INS | ||
3571 | | // RA = dst*8, RB = table*8, RC = key*8 | ||
3572 | | ldr TAB:CARG1, [BASE, RB] | ||
3573 | | ldr CARG2, [BASE, RC] | ||
3574 | | ldr CARG4, TAB:CARG1->array | ||
3575 | | ldr CARG3, TAB:CARG1->asize | ||
3576 | | add CARG4, CARG4, CARG2, lsl #3 | ||
3577 | | cmp CARG2, CARG3 // In array part? | ||
3578 | | bhs ->vmeta_tgetr | ||
3579 | | ldrd CARG12, [CARG4] | ||
3580 | |->BC_TGETR_Z: | ||
3581 | | ins_next1 | ||
3582 | | ins_next2 | ||
3583 | | strd CARG12, [BASE, RA] | ||
3584 | | ins_next3 | ||
3585 | break; | ||
3506 | 3586 | ||
3507 | case BC_TSETV: | 3587 | case BC_TSETV: |
3508 | | decode_RB8 RB, INS | 3588 | | decode_RB8 RB, INS |
@@ -3673,6 +3753,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3673 | | barrierback TAB:CARG1, INS, CARG3 | 3753 | | barrierback TAB:CARG1, INS, CARG3 |
3674 | | b <2 | 3754 | | b <2 |
3675 | break; | 3755 | break; |
3756 | case BC_TSETR: | ||
3757 | | decode_RB8 RB, INS | ||
3758 | | decode_RC8 RC, INS | ||
3759 | | // RA = src*8, RB = table*8, RC = key*8 | ||
3760 | | ldr TAB:CARG2, [BASE, RB] | ||
3761 | | ldr CARG3, [BASE, RC] | ||
3762 | | ldrb INS, TAB:CARG2->marked | ||
3763 | | ldr CARG1, TAB:CARG2->array | ||
3764 | | ldr CARG4, TAB:CARG2->asize | ||
3765 | | tst INS, #LJ_GC_BLACK // isblack(table) | ||
3766 | | add CARG1, CARG1, CARG3, lsl #3 | ||
3767 | | bne >7 | ||
3768 | |2: | ||
3769 | | cmp CARG3, CARG4 // In array part? | ||
3770 | | bhs ->vmeta_tsetr | ||
3771 | |->BC_TSETR_Z: | ||
3772 | | ldrd CARG34, [BASE, RA] | ||
3773 | | ins_next1 | ||
3774 | | ins_next2 | ||
3775 | | strd CARG34, [CARG1] | ||
3776 | | ins_next3 | ||
3777 | | | ||
3778 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
3779 | | barrierback TAB:CARG2, INS, RB | ||
3780 | | b <2 | ||
3781 | break; | ||
3676 | 3782 | ||
3677 | case BC_TSETM: | 3783 | case BC_TSETM: |
3678 | | // RA = base*8 (table at base-1), RC = num_const (start index) | 3784 | | // RA = base*8 (table at base-1), RC = num_const (start index) |
@@ -4270,7 +4376,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4270 | | st_vmstate CARG2 | 4376 | | st_vmstate CARG2 |
4271 | | ldr RA, TRACE:RC->mcode | 4377 | | ldr RA, TRACE:RC->mcode |
4272 | | str BASE, [DISPATCH, #DISPATCH_GL(jit_base)] | 4378 | | str BASE, [DISPATCH, #DISPATCH_GL(jit_base)] |
4273 | | str L, [DISPATCH, #DISPATCH_GL(jit_L)] | 4379 | | str L, [DISPATCH, #DISPATCH_GL(tmpbuf.L)] |
4274 | | bx RA | 4380 | | bx RA |
4275 | |.endif | 4381 | |.endif |
4276 | break; | 4382 | break; |
@@ -4388,6 +4494,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4388 | | ldr BASE, L->base | 4494 | | ldr BASE, L->base |
4389 | | mv_vmstate CARG3, INTERP | 4495 | | mv_vmstate CARG3, INTERP |
4390 | | ldr CRET2, L->top | 4496 | | ldr CRET2, L->top |
4497 | | str L, [DISPATCH, #DISPATCH_GL(cur_L)] | ||
4391 | | lsl RC, CRET1, #3 | 4498 | | lsl RC, CRET1, #3 |
4392 | | st_vmstate CARG3 | 4499 | | st_vmstate CARG3 |
4393 | | ldr PC, [BASE, FRAME_PC] | 4500 | | ldr PC, [BASE, FRAME_PC] |
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc new file mode 100644 index 00000000..edceb549 --- /dev/null +++ b/src/vm_arm64.dasc | |||
@@ -0,0 +1,3988 @@ | |||
1 | |// Low-level VM code for ARM64 CPUs. | ||
2 | |// Bytecode interpreter, fast functions and helper functions. | ||
3 | |// Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h | ||
4 | | | ||
5 | |.arch arm64 | ||
6 | |.section code_op, code_sub | ||
7 | | | ||
8 | |.actionlist build_actionlist | ||
9 | |.globals GLOB_ | ||
10 | |.globalnames globnames | ||
11 | |.externnames extnames | ||
12 | | | ||
13 | |// Note: The ragged indentation of the instructions is intentional. | ||
14 | |// The starting columns indicate data dependencies. | ||
15 | | | ||
16 | |//----------------------------------------------------------------------- | ||
17 | | | ||
18 | |// ARM64 registers and the AAPCS64 ABI 1.0 at a glance: | ||
19 | |// | ||
20 | |// x0-x17 temp, x19-x28 callee-saved, x29 fp, x30 lr | ||
21 | |// x18 is reserved on most platforms. Don't use it, save it or restore it. | ||
22 | |// x31 doesn't exist. Register number 31 either means xzr/wzr (zero) or sp, | ||
23 | |// depending on the instruction. | ||
24 | |// v0-v7 temp, v8-v15 callee-saved (only d8-d15 preserved), v16-v31 temp | ||
25 | |// | ||
26 | |// x0-x7/v0-v7 hold parameters and results. | ||
27 | | | ||
28 | |// Fixed register assignments for the interpreter. | ||
29 | | | ||
30 | |// The following must be C callee-save. | ||
31 | |.define BASE, x19 // Base of current Lua stack frame. | ||
32 | |.define KBASE, x20 // Constants of current Lua function. | ||
33 | |.define PC, x21 // Next PC. | ||
34 | |.define GLREG, x22 // Global state. | ||
35 | |.define LREG, x23 // Register holding lua_State (also in SAVE_L). | ||
36 | |.define TISNUM, x24 // Constant LJ_TISNUM << 47. | ||
37 | |.define TISNUMhi, x25 // Constant LJ_TISNUM << 15. | ||
38 | |.define TISNIL, x26 // Constant -1LL. | ||
39 | |.define fp, x29 // Yes, we have to maintain a frame pointer. | ||
40 | | | ||
41 | |.define ST_INTERP, w26 // Constant -1. | ||
42 | | | ||
43 | |// The following temporaries are not saved across C calls, except for RA/RC. | ||
44 | |.define RA, x27 | ||
45 | |.define RC, x28 | ||
46 | |.define RB, x17 | ||
47 | |.define RAw, w27 | ||
48 | |.define RCw, w28 | ||
49 | |.define RBw, w17 | ||
50 | |.define INS, x16 | ||
51 | |.define INSw, w16 | ||
52 | |.define ITYPE, x15 | ||
53 | |.define TMP0, x8 | ||
54 | |.define TMP1, x9 | ||
55 | |.define TMP2, x10 | ||
56 | |.define TMP3, x11 | ||
57 | |.define TMP0w, w8 | ||
58 | |.define TMP1w, w9 | ||
59 | |.define TMP2w, w10 | ||
60 | |.define TMP3w, w11 | ||
61 | | | ||
62 | |// Calling conventions. Also used as temporaries. | ||
63 | |.define CARG1, x0 | ||
64 | |.define CARG2, x1 | ||
65 | |.define CARG3, x2 | ||
66 | |.define CARG4, x3 | ||
67 | |.define CARG5, x4 | ||
68 | |.define CARG1w, w0 | ||
69 | |.define CARG2w, w1 | ||
70 | |.define CARG3w, w2 | ||
71 | |.define CARG4w, w3 | ||
72 | |.define CARG5w, w4 | ||
73 | | | ||
74 | |.define FARG1, d0 | ||
75 | |.define FARG2, d1 | ||
76 | | | ||
77 | |.define CRET1, x0 | ||
78 | |.define CRET1w, w0 | ||
79 | | | ||
80 | |// Stack layout while in interpreter. Must match with lj_frame.h. | ||
81 | | | ||
82 | |.define CFRAME_SPACE, 208 | ||
83 | |//----- 16 byte aligned, <-- sp entering interpreter | ||
84 | |// Unused [sp, #204] // 32 bit values | ||
85 | |.define SAVE_NRES, [sp, #200] | ||
86 | |.define SAVE_ERRF, [sp, #196] | ||
87 | |.define SAVE_MULTRES, [sp, #192] | ||
88 | |.define TMPD, [sp, #184] // 64 bit values | ||
89 | |.define SAVE_L, [sp, #176] | ||
90 | |.define SAVE_PC, [sp, #168] | ||
91 | |.define SAVE_CFRAME, [sp, #160] | ||
92 | |.define SAVE_FPR_, 96 // 96+8*8: 64 bit FPR saves | ||
93 | |.define SAVE_GPR_, 16 // 16+10*8: 64 bit GPR saves | ||
94 | |.define SAVE_LR, [sp, #8] | ||
95 | |.define SAVE_FP, [sp] | ||
96 | |//----- 16 byte aligned, <-- sp while in interpreter. | ||
97 | | | ||
98 | |.define TMPDofs, #184 | ||
99 | | | ||
100 | |.macro save_, gpr1, gpr2, fpr1, fpr2 | ||
101 | | stp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8] | ||
102 | | stp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8] | ||
103 | |.endmacro | ||
104 | |.macro rest_, gpr1, gpr2, fpr1, fpr2 | ||
105 | | ldp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8] | ||
106 | | ldp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8] | ||
107 | |.endmacro | ||
108 | | | ||
109 | |.macro saveregs | ||
110 | | stp fp, lr, [sp, #-CFRAME_SPACE]! | ||
111 | | add fp, sp, #0 | ||
112 | | stp x19, x20, [sp, # SAVE_GPR_] | ||
113 | | save_ 21, 22, 8, 9 | ||
114 | | save_ 23, 24, 10, 11 | ||
115 | | save_ 25, 26, 12, 13 | ||
116 | | save_ 27, 28, 14, 15 | ||
117 | |.endmacro | ||
118 | |.macro restoreregs | ||
119 | | ldp x19, x20, [sp, # SAVE_GPR_] | ||
120 | | rest_ 21, 22, 8, 9 | ||
121 | | rest_ 23, 24, 10, 11 | ||
122 | | rest_ 25, 26, 12, 13 | ||
123 | | rest_ 27, 28, 14, 15 | ||
124 | | ldp fp, lr, [sp], # CFRAME_SPACE | ||
125 | |.endmacro | ||
126 | | | ||
127 | |// Type definitions. Some of these are only used for documentation. | ||
128 | |.type L, lua_State, LREG | ||
129 | |.type GL, global_State, GLREG | ||
130 | |.type TVALUE, TValue | ||
131 | |.type GCOBJ, GCobj | ||
132 | |.type STR, GCstr | ||
133 | |.type TAB, GCtab | ||
134 | |.type LFUNC, GCfuncL | ||
135 | |.type CFUNC, GCfuncC | ||
136 | |.type PROTO, GCproto | ||
137 | |.type UPVAL, GCupval | ||
138 | |.type NODE, Node | ||
139 | |.type NARGS8, int | ||
140 | |.type TRACE, GCtrace | ||
141 | |.type SBUF, SBuf | ||
142 | | | ||
143 | |//----------------------------------------------------------------------- | ||
144 | | | ||
145 | |// Trap for not-yet-implemented parts. | ||
146 | |.macro NYI; brk; .endmacro | ||
147 | | | ||
148 | |//----------------------------------------------------------------------- | ||
149 | | | ||
150 | |// Access to frame relative to BASE. | ||
151 | |.define FRAME_FUNC, #-16 | ||
152 | |.define FRAME_PC, #-8 | ||
153 | | | ||
154 | |// Endian-specific defines. | ||
155 | |.if ENDIAN_LE | ||
156 | |.define LO, 0 | ||
157 | |.define OFS_RD, 2 | ||
158 | |.define OFS_RB, 3 | ||
159 | |.define OFS_RA, 1 | ||
160 | |.define OFS_OP, 0 | ||
161 | |.else | ||
162 | |.define LO, 4 | ||
163 | |.define OFS_RD, 0 | ||
164 | |.define OFS_RB, 0 | ||
165 | |.define OFS_RA, 2 | ||
166 | |.define OFS_OP, 3 | ||
167 | |.endif | ||
168 | | | ||
169 | |.macro decode_RA, dst, ins; ubfx dst, ins, #8, #8; .endmacro | ||
170 | |.macro decode_RB, dst, ins; ubfx dst, ins, #24, #8; .endmacro | ||
171 | |.macro decode_RC, dst, ins; ubfx dst, ins, #16, #8; .endmacro | ||
172 | |.macro decode_RD, dst, ins; ubfx dst, ins, #16, #16; .endmacro | ||
173 | |.macro decode_RC8RD, dst, src; ubfiz dst, src, #3, #8; .endmacro | ||
174 | | | ||
175 | |// Instruction decode+dispatch. | ||
176 | |.macro ins_NEXT | ||
177 | | ldr INSw, [PC], #4 | ||
178 | | add TMP1, GL, INS, uxtb #3 | ||
179 | | decode_RA RA, INS | ||
180 | | ldr TMP0, [TMP1, #GG_G2DISP] | ||
181 | | decode_RD RC, INS | ||
182 | | br TMP0 | ||
183 | |.endmacro | ||
184 | | | ||
185 | |// Instruction footer. | ||
186 | |.if 1 | ||
187 | | // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. | ||
188 | | .define ins_next, ins_NEXT | ||
189 | | .define ins_next_, ins_NEXT | ||
190 | |.else | ||
191 | | // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. | ||
192 | | // Affects only certain kinds of benchmarks (and only with -j off). | ||
193 | | .macro ins_next | ||
194 | | b ->ins_next | ||
195 | | .endmacro | ||
196 | | .macro ins_next_ | ||
197 | | ->ins_next: | ||
198 | | ins_NEXT | ||
199 | | .endmacro | ||
200 | |.endif | ||
201 | | | ||
202 | |// Call decode and dispatch. | ||
203 | |.macro ins_callt | ||
204 | | // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC | ||
205 | | ldr PC, LFUNC:CARG3->pc | ||
206 | | ldr INSw, [PC], #4 | ||
207 | | add TMP1, GL, INS, uxtb #3 | ||
208 | | decode_RA RA, INS | ||
209 | | ldr TMP0, [TMP1, #GG_G2DISP] | ||
210 | | add RA, BASE, RA, lsl #3 | ||
211 | | br TMP0 | ||
212 | |.endmacro | ||
213 | | | ||
214 | |.macro ins_call | ||
215 | | // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, PC = caller PC | ||
216 | | str PC, [BASE, FRAME_PC] | ||
217 | | ins_callt | ||
218 | |.endmacro | ||
219 | | | ||
220 | |//----------------------------------------------------------------------- | ||
221 | | | ||
222 | |// Macros to check the TValue type and extract the GCobj. Branch on failure. | ||
223 | |.macro checktp, reg, tp, target | ||
224 | | asr ITYPE, reg, #47 | ||
225 | | cmn ITYPE, #-tp | ||
226 | | and reg, reg, #LJ_GCVMASK | ||
227 | | bne target | ||
228 | |.endmacro | ||
229 | |.macro checktp, dst, reg, tp, target | ||
230 | | asr ITYPE, reg, #47 | ||
231 | | cmn ITYPE, #-tp | ||
232 | | and dst, reg, #LJ_GCVMASK | ||
233 | | bne target | ||
234 | |.endmacro | ||
235 | |.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro | ||
236 | |.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro | ||
237 | |.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro | ||
238 | |.macro checkint, reg, target | ||
239 | | cmp TISNUMhi, reg, lsr #32 | ||
240 | | bne target | ||
241 | |.endmacro | ||
242 | |.macro checknum, reg, target | ||
243 | | cmp TISNUMhi, reg, lsr #32 | ||
244 | | bls target | ||
245 | |.endmacro | ||
246 | |.macro checknumber, reg, target | ||
247 | | cmp TISNUMhi, reg, lsr #32 | ||
248 | | blo target | ||
249 | |.endmacro | ||
250 | | | ||
251 | |.macro mov_false, reg; movn reg, #0x8000, lsl #32; .endmacro | ||
252 | |.macro mov_true, reg; movn reg, #0x0001, lsl #48; .endmacro | ||
253 | | | ||
254 | #define GL_J(field) (GG_G2J + (int)offsetof(jit_State, field)) | ||
255 | | | ||
256 | #define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) | ||
257 | | | ||
258 | |.macro hotcheck, delta | ||
259 | | lsr CARG1, PC, #1 | ||
260 | | and CARG1, CARG1, #126 | ||
261 | | add CARG1, CARG1, #GG_G2DISP+GG_DISP2HOT | ||
262 | | ldrh CARG2w, [GL, CARG1] | ||
263 | | subs CARG2, CARG2, #delta | ||
264 | | strh CARG2w, [GL, CARG1] | ||
265 | |.endmacro | ||
266 | | | ||
267 | |.macro hotloop | ||
268 | | hotcheck HOTCOUNT_LOOP | ||
269 | | blo ->vm_hotloop | ||
270 | |.endmacro | ||
271 | | | ||
272 | |.macro hotcall | ||
273 | | hotcheck HOTCOUNT_CALL | ||
274 | | blo ->vm_hotcall | ||
275 | |.endmacro | ||
276 | | | ||
277 | |// Set current VM state. | ||
278 | |.macro mv_vmstate, reg, st; movn reg, #LJ_VMST_..st; .endmacro | ||
279 | |.macro st_vmstate, reg; str reg, GL->vmstate; .endmacro | ||
280 | | | ||
281 | |// Move table write barrier back. Overwrites mark and tmp. | ||
282 | |.macro barrierback, tab, mark, tmp | ||
283 | | ldr tmp, GL->gc.grayagain | ||
284 | | and mark, mark, #~LJ_GC_BLACK // black2gray(tab) | ||
285 | | str tab, GL->gc.grayagain | ||
286 | | strb mark, tab->marked | ||
287 | | str tmp, tab->gclist | ||
288 | |.endmacro | ||
289 | | | ||
290 | |//----------------------------------------------------------------------- | ||
291 | |||
292 | #if !LJ_DUALNUM | ||
293 | #error "Only dual-number mode supported for ARM64 target" | ||
294 | #endif | ||
295 | |||
296 | /* Generate subroutines used by opcodes and other parts of the VM. */ | ||
297 | /* The .code_sub section should be last to help static branch prediction. */ | ||
298 | static void build_subroutines(BuildCtx *ctx) | ||
299 | { | ||
300 | |.code_sub | ||
301 | | | ||
302 | |//----------------------------------------------------------------------- | ||
303 | |//-- Return handling ---------------------------------------------------- | ||
304 | |//----------------------------------------------------------------------- | ||
305 | | | ||
306 | |->vm_returnp: | ||
307 | | // See vm_return. Also: RB = previous base. | ||
308 | | tbz PC, #2, ->cont_dispatch // (PC & FRAME_P) == 0? | ||
309 | | | ||
310 | | // Return from pcall or xpcall fast func. | ||
311 | | ldr PC, [RB, FRAME_PC] // Fetch PC of previous frame. | ||
312 | | mov_true TMP0 | ||
313 | | mov BASE, RB | ||
314 | | // Prepending may overwrite the pcall frame, so do it at the end. | ||
315 | | str TMP0, [RA, #-8]! // Prepend true to results. | ||
316 | | | ||
317 | |->vm_returnc: | ||
318 | | adds RC, RC, #8 // RC = (nresults+1)*8. | ||
319 | | mov CRET1, #LUA_YIELD | ||
320 | | beq ->vm_unwind_c_eh | ||
321 | | str RCw, SAVE_MULTRES | ||
322 | | ands CARG1, PC, #FRAME_TYPE | ||
323 | | beq ->BC_RET_Z // Handle regular return to Lua. | ||
324 | | | ||
325 | |->vm_return: | ||
326 | | // BASE = base, RA = resultptr, RC/MULTRES = (nresults+1)*8, PC = return | ||
327 | | // CARG1 = PC & FRAME_TYPE | ||
328 | | and RB, PC, #~FRAME_TYPEP | ||
329 | | cmp CARG1, #FRAME_C | ||
330 | | sub RB, BASE, RB // RB = previous base. | ||
331 | | bne ->vm_returnp | ||
332 | | | ||
333 | | str RB, L->base | ||
334 | | ldrsw CARG2, SAVE_NRES // CARG2 = nresults+1. | ||
335 | | mv_vmstate TMP0w, C | ||
336 | | sub BASE, BASE, #16 | ||
337 | | subs TMP2, RC, #8 | ||
338 | | st_vmstate TMP0w | ||
339 | | beq >2 | ||
340 | |1: | ||
341 | | subs TMP2, TMP2, #8 | ||
342 | | ldr TMP0, [RA], #8 | ||
343 | | str TMP0, [BASE], #8 | ||
344 | | bne <1 | ||
345 | |2: | ||
346 | | cmp RC, CARG2, lsl #3 // More/less results wanted? | ||
347 | | bne >6 | ||
348 | |3: | ||
349 | | str BASE, L->top // Store new top. | ||
350 | | | ||
351 | |->vm_leave_cp: | ||
352 | | ldr RC, SAVE_CFRAME // Restore previous C frame. | ||
353 | | mov CRET1, #0 // Ok return status for vm_pcall. | ||
354 | | str RC, L->cframe | ||
355 | | | ||
356 | |->vm_leave_unw: | ||
357 | | restoreregs | ||
358 | | ret | ||
359 | | | ||
360 | |6: | ||
361 | | bgt >7 // Less results wanted? | ||
362 | | // More results wanted. Check stack size and fill up results with nil. | ||
363 | | ldr CARG3, L->maxstack | ||
364 | | cmp BASE, CARG3 | ||
365 | | bhs >8 | ||
366 | | str TISNIL, [BASE], #8 | ||
367 | | add RC, RC, #8 | ||
368 | | b <2 | ||
369 | | | ||
370 | |7: // Less results wanted. | ||
371 | | cbz CARG2, <3 // LUA_MULTRET+1 case? | ||
372 | | sub CARG1, RC, CARG2, lsl #3 | ||
373 | | sub BASE, BASE, CARG1 // Shrink top. | ||
374 | | b <3 | ||
375 | | | ||
376 | |8: // Corner case: need to grow stack for filling up results. | ||
377 | | // This can happen if: | ||
378 | | // - A C function grows the stack (a lot). | ||
379 | | // - The GC shrinks the stack in between. | ||
380 | | // - A return back from a lua_call() with (high) nresults adjustment. | ||
381 | | str BASE, L->top // Save current top held in BASE (yes). | ||
382 | | mov CARG1, L | ||
383 | | bl extern lj_state_growstack // (lua_State *L, int n) | ||
384 | | ldr BASE, L->top // Need the (realloced) L->top in BASE. | ||
385 | | ldrsw CARG2, SAVE_NRES | ||
386 | | b <2 | ||
387 | | | ||
388 | |->vm_unwind_c: // Unwind C stack, return from vm_pcall. | ||
389 | | // (void *cframe, int errcode) | ||
390 | | mov sp, CARG1 | ||
391 | | mov CRET1, CARG2 | ||
392 | |->vm_unwind_c_eh: // Landing pad for external unwinder. | ||
393 | | ldr L, SAVE_L | ||
394 | | mv_vmstate TMP0w, C | ||
395 | | ldr GL, L->glref | ||
396 | | st_vmstate TMP0w | ||
397 | | b ->vm_leave_unw | ||
398 | | | ||
399 | |->vm_unwind_ff: // Unwind C stack, return from ff pcall. | ||
400 | | // (void *cframe) | ||
401 | | and sp, CARG1, #CFRAME_RAWMASK | ||
402 | |->vm_unwind_ff_eh: // Landing pad for external unwinder. | ||
403 | | ldr L, SAVE_L | ||
404 | | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 | ||
405 | | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 | ||
406 | | movn TISNIL, #0 | ||
407 | | mov RC, #16 // 2 results: false + error message. | ||
408 | | ldr BASE, L->base | ||
409 | | ldr GL, L->glref // Setup pointer to global state. | ||
410 | | mov_false TMP0 | ||
411 | | sub RA, BASE, #8 // Results start at BASE-8. | ||
412 | | ldr PC, [BASE, FRAME_PC] // Fetch PC of previous frame. | ||
413 | | str TMP0, [BASE, #-8] // Prepend false to error message. | ||
414 | | st_vmstate ST_INTERP | ||
415 | | b ->vm_returnc | ||
416 | | | ||
417 | |//----------------------------------------------------------------------- | ||
418 | |//-- Grow stack for calls ----------------------------------------------- | ||
419 | |//----------------------------------------------------------------------- | ||
420 | | | ||
421 | |->vm_growstack_c: // Grow stack for C function. | ||
422 | | // CARG1 = L | ||
423 | | mov CARG2, #LUA_MINSTACK | ||
424 | | b >2 | ||
425 | | | ||
426 | |->vm_growstack_l: // Grow stack for Lua function. | ||
427 | | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC | ||
428 | | add RC, BASE, RC | ||
429 | | sub RA, RA, BASE | ||
430 | | mov CARG1, L | ||
431 | | stp BASE, RC, L->base | ||
432 | | add PC, PC, #4 // Must point after first instruction. | ||
433 | | lsr CARG2, RA, #3 | ||
434 | |2: | ||
435 | | // L->base = new base, L->top = top | ||
436 | | str PC, SAVE_PC | ||
437 | | bl extern lj_state_growstack // (lua_State *L, int n) | ||
438 | | ldp BASE, RC, L->base | ||
439 | | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] | ||
440 | | sub NARGS8:RC, RC, BASE | ||
441 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
442 | | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC | ||
443 | | ins_callt // Just retry the call. | ||
444 | | | ||
445 | |//----------------------------------------------------------------------- | ||
446 | |//-- Entry points into the assembler VM --------------------------------- | ||
447 | |//----------------------------------------------------------------------- | ||
448 | | | ||
449 | |->vm_resume: // Setup C frame and resume thread. | ||
450 | | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) | ||
451 | | saveregs | ||
452 | | mov L, CARG1 | ||
453 | | ldr GL, L->glref // Setup pointer to global state. | ||
454 | | mov BASE, CARG2 | ||
455 | | str L, SAVE_L | ||
456 | | mov PC, #FRAME_CP | ||
457 | | str wzr, SAVE_NRES | ||
458 | | add TMP0, sp, #CFRAME_RESUME | ||
459 | | ldrb TMP1w, L->status | ||
460 | | str wzr, SAVE_ERRF | ||
461 | | str L, SAVE_PC // Any value outside of bytecode is ok. | ||
462 | | str xzr, SAVE_CFRAME | ||
463 | | str TMP0, L->cframe | ||
464 | | cbz TMP1w, >3 | ||
465 | | | ||
466 | | // Resume after yield (like a return). | ||
467 | | str L, GL->cur_L | ||
468 | | mov RA, BASE | ||
469 | | ldp BASE, CARG1, L->base | ||
470 | | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 | ||
471 | | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 | ||
472 | | ldr PC, [BASE, FRAME_PC] | ||
473 | | strb wzr, L->status | ||
474 | | movn TISNIL, #0 | ||
475 | | sub RC, CARG1, BASE | ||
476 | | ands CARG1, PC, #FRAME_TYPE | ||
477 | | add RC, RC, #8 | ||
478 | | st_vmstate ST_INTERP | ||
479 | | str RCw, SAVE_MULTRES | ||
480 | | beq ->BC_RET_Z | ||
481 | | b ->vm_return | ||
482 | | | ||
483 | |->vm_pcall: // Setup protected C frame and enter VM. | ||
484 | | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) | ||
485 | | saveregs | ||
486 | | mov PC, #FRAME_CP | ||
487 | | str CARG4w, SAVE_ERRF | ||
488 | | b >1 | ||
489 | | | ||
490 | |->vm_call: // Setup C frame and enter VM. | ||
491 | | // (lua_State *L, TValue *base, int nres1) | ||
492 | | saveregs | ||
493 | | mov PC, #FRAME_C | ||
494 | | | ||
495 | |1: // Entry point for vm_pcall above (PC = ftype). | ||
496 | | ldr RC, L:CARG1->cframe | ||
497 | | str CARG3w, SAVE_NRES | ||
498 | | mov L, CARG1 | ||
499 | | str CARG1, SAVE_L | ||
500 | | ldr GL, L->glref // Setup pointer to global state. | ||
501 | | mov BASE, CARG2 | ||
502 | | str CARG1, SAVE_PC // Any value outside of bytecode is ok. | ||
503 | | str RC, SAVE_CFRAME | ||
504 | | str fp, L->cframe // Add our C frame to cframe chain. | ||
505 | | | ||
506 | |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). | ||
507 | | str L, GL->cur_L | ||
508 | | ldp RB, CARG1, L->base // RB = old base (for vmeta_call). | ||
509 | | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 | ||
510 | | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 | ||
511 | | add PC, PC, BASE | ||
512 | | movn TISNIL, #0 | ||
513 | | sub PC, PC, RB // PC = frame delta + frame type | ||
514 | | sub NARGS8:RC, CARG1, BASE | ||
515 | | st_vmstate ST_INTERP | ||
516 | | | ||
517 | |->vm_call_dispatch: | ||
518 | | // RB = old base, BASE = new base, RC = nargs*8, PC = caller PC | ||
519 | | ldr CARG3, [BASE, FRAME_FUNC] | ||
520 | | checkfunc CARG3, ->vmeta_call | ||
521 | | | ||
522 | |->vm_call_dispatch_f: | ||
523 | | ins_call | ||
524 | | // BASE = new base, CARG3 = func, RC = nargs*8, PC = caller PC | ||
525 | | | ||
526 | |->vm_cpcall: // Setup protected C frame, call C. | ||
527 | | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) | ||
528 | | saveregs | ||
529 | | mov L, CARG1 | ||
530 | | ldr RA, L:CARG1->stack | ||
531 | | str CARG1, SAVE_L | ||
532 | | ldr GL, L->glref // Setup pointer to global state. | ||
533 | | ldr RB, L->top | ||
534 | | str CARG1, SAVE_PC // Any value outside of bytecode is ok. | ||
535 | | ldr RC, L->cframe | ||
536 | | sub RA, RA, RB // Compute -savestack(L, L->top). | ||
537 | | str RAw, SAVE_NRES // Neg. delta means cframe w/o frame. | ||
538 | | str wzr, SAVE_ERRF // No error function. | ||
539 | | str RC, SAVE_CFRAME | ||
540 | | str fp, L->cframe // Add our C frame to cframe chain. | ||
541 | | str L, GL->cur_L | ||
542 | | blr CARG4 // (lua_State *L, lua_CFunction func, void *ud) | ||
543 | | mov BASE, CRET1 | ||
544 | | mov PC, #FRAME_CP | ||
545 | | cbnz BASE, <3 // Else continue with the call. | ||
546 | | b ->vm_leave_cp // No base? Just remove C frame. | ||
547 | | | ||
548 | |//----------------------------------------------------------------------- | ||
549 | |//-- Metamethod handling ------------------------------------------------ | ||
550 | |//----------------------------------------------------------------------- | ||
551 | | | ||
552 | |//-- Continuation dispatch ---------------------------------------------- | ||
553 | | | ||
554 | |->cont_dispatch: | ||
555 | | // BASE = meta base, RA = resultptr, RC = (nresults+1)*8 | ||
556 | | ldr LFUNC:CARG3, [RB, FRAME_FUNC] | ||
557 | | ldr CARG1, [BASE, #-32] // Get continuation. | ||
558 | | mov CARG4, BASE | ||
559 | | mov BASE, RB // Restore caller BASE. | ||
560 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
561 | |.if FFI | ||
562 | | cmp CARG1, #1 | ||
563 | |.endif | ||
564 | | ldr PC, [CARG4, #-24] // Restore PC from [cont|PC]. | ||
565 | | ldr CARG3, LFUNC:CARG3->pc | ||
566 | | add TMP0, RA, RC | ||
567 | | str TISNIL, [TMP0, #-8] // Ensure one valid arg. | ||
568 | |.if FFI | ||
569 | | bls >1 | ||
570 | |.endif | ||
571 | | ldr KBASE, [CARG3, #PC2PROTO(k)] | ||
572 | | // BASE = base, RA = resultptr, CARG4 = meta base | ||
573 | | br CARG1 | ||
574 | | | ||
575 | |.if FFI | ||
576 | |1: | ||
577 | | beq ->cont_ffi_callback // cont = 1: return from FFI callback. | ||
578 | | // cont = 0: tailcall from C function. | ||
579 | | sub CARG4, CARG4, #32 | ||
580 | | sub RC, CARG4, BASE | ||
581 | | b ->vm_call_tail | ||
582 | |.endif | ||
583 | | | ||
584 | |->cont_cat: // RA = resultptr, CARG4 = meta base | ||
585 | | ldr INSw, [PC, #-4] | ||
586 | | sub CARG2, CARG4, #32 | ||
587 | | ldr TMP0, [RA] | ||
588 | | str BASE, L->base | ||
589 | | decode_RB RB, INS | ||
590 | | decode_RA RA, INS | ||
591 | | add TMP1, BASE, RB, lsl #3 | ||
592 | | subs TMP1, CARG2, TMP1 | ||
593 | | beq >1 | ||
594 | | str TMP0, [CARG2] | ||
595 | | lsr CARG3, TMP1, #3 | ||
596 | | b ->BC_CAT_Z | ||
597 | | | ||
598 | |1: | ||
599 | | str TMP0, [BASE, RA, lsl #3] | ||
600 | | b ->cont_nop | ||
601 | | | ||
602 | |//-- Table indexing metamethods ----------------------------------------- | ||
603 | | | ||
604 | |->vmeta_tgets1: | ||
605 | | movn CARG4, #~LJ_TSTR | ||
606 | | add CARG2, BASE, RB, lsl #3 | ||
607 | | add CARG4, STR:RC, CARG4, lsl #47 | ||
608 | | b >2 | ||
609 | | | ||
610 | |->vmeta_tgets: | ||
611 | | movk CARG2, #(LJ_TTAB>>1)&0xffff, lsl #48 | ||
612 | | str CARG2, GL->tmptv | ||
613 | | add CARG2, GL, #offsetof(global_State, tmptv) | ||
614 | |2: | ||
615 | | add CARG3, sp, TMPDofs | ||
616 | | str CARG4, TMPD | ||
617 | | b >1 | ||
618 | | | ||
619 | |->vmeta_tgetb: // RB = table, RC = index | ||
620 | | add RC, RC, TISNUM | ||
621 | | add CARG2, BASE, RB, lsl #3 | ||
622 | | add CARG3, sp, TMPDofs | ||
623 | | str RC, TMPD | ||
624 | | b >1 | ||
625 | | | ||
626 | |->vmeta_tgetv: // RB = table, RC = key | ||
627 | | add CARG2, BASE, RB, lsl #3 | ||
628 | | add CARG3, BASE, RC, lsl #3 | ||
629 | |1: | ||
630 | | str BASE, L->base | ||
631 | | mov CARG1, L | ||
632 | | str PC, SAVE_PC | ||
633 | | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) | ||
634 | | // Returns TValue * (finished) or NULL (metamethod). | ||
635 | | cbz CRET1, >3 | ||
636 | | ldr TMP0, [CRET1] | ||
637 | | str TMP0, [BASE, RA, lsl #3] | ||
638 | | ins_next | ||
639 | | | ||
640 | |3: // Call __index metamethod. | ||
641 | | // BASE = base, L->top = new base, stack = cont/func/t/k | ||
642 | | sub TMP1, BASE, #FRAME_CONT | ||
643 | | ldr BASE, L->top | ||
644 | | mov NARGS8:RC, #16 // 2 args for func(t, k). | ||
645 | | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. | ||
646 | | str PC, [BASE, #-24] // [cont|PC] | ||
647 | | sub PC, BASE, TMP1 | ||
648 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
649 | | b ->vm_call_dispatch_f | ||
650 | | | ||
651 | |->vmeta_tgetr: | ||
652 | | sxtw CARG2, TMP1w | ||
653 | | bl extern lj_tab_getinth // (GCtab *t, int32_t key) | ||
654 | | // Returns cTValue * or NULL. | ||
655 | | mov TMP0, TISNIL | ||
656 | | cbz CRET1, ->BC_TGETR_Z | ||
657 | | ldr TMP0, [CRET1] | ||
658 | | b ->BC_TGETR_Z | ||
659 | | | ||
660 | |//----------------------------------------------------------------------- | ||
661 | | | ||
662 | |->vmeta_tsets1: | ||
663 | | movn CARG4, #~LJ_TSTR | ||
664 | | add CARG2, BASE, RB, lsl #3 | ||
665 | | add CARG4, STR:RC, CARG4, lsl #47 | ||
666 | | b >2 | ||
667 | | | ||
668 | |->vmeta_tsets: | ||
669 | | movk CARG2, #(LJ_TTAB>>1)&0xffff, lsl #48 | ||
670 | | str CARG2, GL->tmptv | ||
671 | | add CARG2, GL, #offsetof(global_State, tmptv) | ||
672 | |2: | ||
673 | | add CARG3, sp, TMPDofs | ||
674 | | str CARG4, TMPD | ||
675 | | b >1 | ||
676 | | | ||
677 | |->vmeta_tsetb: // RB = table, RC = index | ||
678 | | add RC, RC, TISNUM | ||
679 | | add CARG2, BASE, RB, lsl #3 | ||
680 | | add CARG3, sp, TMPDofs | ||
681 | | str RC, TMPD | ||
682 | | b >1 | ||
683 | | | ||
684 | |->vmeta_tsetv: | ||
685 | | add CARG2, BASE, RB, lsl #3 | ||
686 | | add CARG3, BASE, RC, lsl #3 | ||
687 | |1: | ||
688 | | str BASE, L->base | ||
689 | | mov CARG1, L | ||
690 | | str PC, SAVE_PC | ||
691 | | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) | ||
692 | | // Returns TValue * (finished) or NULL (metamethod). | ||
693 | | ldr TMP0, [BASE, RA, lsl #3] | ||
694 | | cbz CRET1, >3 | ||
695 | | // NOBARRIER: lj_meta_tset ensures the table is not black. | ||
696 | | str TMP0, [CRET1] | ||
697 | | ins_next | ||
698 | | | ||
699 | |3: // Call __newindex metamethod. | ||
700 | | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) | ||
701 | | sub TMP1, BASE, #FRAME_CONT | ||
702 | | ldr BASE, L->top | ||
703 | | mov NARGS8:RC, #24 // 3 args for func(t, k, v). | ||
704 | | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. | ||
705 | | str TMP0, [BASE, #16] // Copy value to third argument. | ||
706 | | str PC, [BASE, #-24] // [cont|PC] | ||
707 | | sub PC, BASE, TMP1 | ||
708 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
709 | | b ->vm_call_dispatch_f | ||
710 | | | ||
711 | |->vmeta_tsetr: | ||
712 | | sxtw CARG3, TMP1w | ||
713 | | str BASE, L->base | ||
714 | | str PC, SAVE_PC | ||
715 | | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) | ||
716 | | // Returns TValue *. | ||
717 | | b ->BC_TSETR_Z | ||
718 | | | ||
719 | |//-- Comparison metamethods --------------------------------------------- | ||
720 | | | ||
721 | |->vmeta_comp: | ||
722 | | add CARG2, BASE, RA, lsl #3 | ||
723 | | sub PC, PC, #4 | ||
724 | | add CARG3, BASE, RC, lsl #3 | ||
725 | | str BASE, L->base | ||
726 | | mov CARG1, L | ||
727 | | str PC, SAVE_PC | ||
728 | | uxtb CARG4w, INSw | ||
729 | | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) | ||
730 | | // Returns 0/1 or TValue * (metamethod). | ||
731 | |3: | ||
732 | | cmp CRET1, #1 | ||
733 | | bhi ->vmeta_binop | ||
734 | |4: | ||
735 | | ldrh RBw, [PC, # OFS_RD] | ||
736 | | add PC, PC, #4 | ||
737 | | add RB, PC, RB, lsl #2 | ||
738 | | sub RB, RB, #0x20000 | ||
739 | | csel PC, PC, RB, lo | ||
740 | |->cont_nop: | ||
741 | | ins_next | ||
742 | | | ||
743 | |->cont_ra: // RA = resultptr | ||
744 | | ldr INSw, [PC, #-4] | ||
745 | | ldr TMP0, [RA] | ||
746 | | decode_RA TMP1, INS | ||
747 | | str TMP0, [BASE, TMP1, lsl #3] | ||
748 | | b ->cont_nop | ||
749 | | | ||
750 | |->cont_condt: // RA = resultptr | ||
751 | | ldr TMP0, [RA] | ||
752 | | mov_true TMP1 | ||
753 | | cmp TMP1, TMP0 // Branch if result is true. | ||
754 | | b <4 | ||
755 | | | ||
756 | |->cont_condf: // RA = resultptr | ||
757 | | ldr TMP0, [RA] | ||
758 | | mov_false TMP1 | ||
759 | | cmp TMP0, TMP1 // Branch if result is false. | ||
760 | | b <4 | ||
761 | | | ||
762 | |->vmeta_equal: | ||
763 | | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV. | ||
764 | | and TAB:CARG3, CARG3, #LJ_GCVMASK | ||
765 | | sub PC, PC, #4 | ||
766 | | str BASE, L->base | ||
767 | | mov CARG1, L | ||
768 | | str PC, SAVE_PC | ||
769 | | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) | ||
770 | | // Returns 0/1 or TValue * (metamethod). | ||
771 | | b <3 | ||
772 | | | ||
773 | |->vmeta_equal_cd: | ||
774 | |.if FFI | ||
775 | | sub PC, PC, #4 | ||
776 | | str BASE, L->base | ||
777 | | mov CARG1, L | ||
778 | | mov CARG2, INS | ||
779 | | str PC, SAVE_PC | ||
780 | | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op) | ||
781 | | // Returns 0/1 or TValue * (metamethod). | ||
782 | | b <3 | ||
783 | |.endif | ||
784 | | | ||
785 | |->vmeta_istype: | ||
786 | | sub PC, PC, #4 | ||
787 | | str BASE, L->base | ||
788 | | mov CARG1, L | ||
789 | | mov CARG2, RA | ||
790 | | mov CARG3, RC | ||
791 | | str PC, SAVE_PC | ||
792 | | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) | ||
793 | | b ->cont_nop | ||
794 | | | ||
795 | |//-- Arithmetic metamethods --------------------------------------------- | ||
796 | | | ||
797 | |->vmeta_arith_vn: | ||
798 | | add CARG3, BASE, RB, lsl #3 | ||
799 | | add CARG4, KBASE, RC, lsl #3 | ||
800 | | b >1 | ||
801 | | | ||
802 | |->vmeta_arith_nv: | ||
803 | | add CARG4, BASE, RB, lsl #3 | ||
804 | | add CARG3, KBASE, RC, lsl #3 | ||
805 | | b >1 | ||
806 | | | ||
807 | |->vmeta_unm: | ||
808 | | add CARG3, BASE, RC, lsl #3 | ||
809 | | mov CARG4, CARG3 | ||
810 | | b >1 | ||
811 | | | ||
812 | |->vmeta_arith_vv: | ||
813 | | add CARG3, BASE, RB, lsl #3 | ||
814 | | add CARG4, BASE, RC, lsl #3 | ||
815 | |1: | ||
816 | | uxtb CARG5w, INSw | ||
817 | | add CARG2, BASE, RA, lsl #3 | ||
818 | | str BASE, L->base | ||
819 | | mov CARG1, L | ||
820 | | str PC, SAVE_PC | ||
821 | | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) | ||
822 | | // Returns NULL (finished) or TValue * (metamethod). | ||
823 | | cbz CRET1, ->cont_nop | ||
824 | | | ||
825 | | // Call metamethod for binary op. | ||
826 | |->vmeta_binop: | ||
827 | | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 | ||
828 | | sub TMP1, CRET1, BASE | ||
829 | | str PC, [CRET1, #-24] // [cont|PC] | ||
830 | | add PC, TMP1, #FRAME_CONT | ||
831 | | mov BASE, CRET1 | ||
832 | | mov NARGS8:RC, #16 // 2 args for func(o1, o2). | ||
833 | | b ->vm_call_dispatch | ||
834 | | | ||
835 | |->vmeta_len: | ||
836 | | add CARG2, BASE, RC, lsl #3 | ||
837 | #if LJ_52 | ||
838 | | mov TAB:RC, TAB:CARG1 // Save table (ignored for other types). | ||
839 | #endif | ||
840 | | str BASE, L->base | ||
841 | | mov CARG1, L | ||
842 | | str PC, SAVE_PC | ||
843 | | bl extern lj_meta_len // (lua_State *L, TValue *o) | ||
844 | | // Returns NULL (retry) or TValue * (metamethod base). | ||
845 | #if LJ_52 | ||
846 | | cbnz CRET1, ->vmeta_binop // Binop call for compatibility. | ||
847 | | mov TAB:CARG1, TAB:RC | ||
848 | | b ->BC_LEN_Z | ||
849 | #else | ||
850 | | b ->vmeta_binop // Binop call for compatibility. | ||
851 | #endif | ||
852 | | | ||
853 | |//-- Call metamethod ---------------------------------------------------- | ||
854 | | | ||
855 | |->vmeta_call: // Resolve and call __call metamethod. | ||
856 | | // RB = old base, BASE = new base, RC = nargs*8 | ||
857 | | mov CARG1, L | ||
858 | | str RB, L->base // This is the callers base! | ||
859 | | sub CARG2, BASE, #16 | ||
860 | | str PC, SAVE_PC | ||
861 | | add CARG3, BASE, NARGS8:RC | ||
862 | | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) | ||
863 | | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. | ||
864 | | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now. | ||
865 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
866 | | ins_call | ||
867 | | | ||
868 | |->vmeta_callt: // Resolve __call for BC_CALLT. | ||
869 | | // BASE = old base, RA = new base, RC = nargs*8 | ||
870 | | mov CARG1, L | ||
871 | | str BASE, L->base | ||
872 | | sub CARG2, RA, #16 | ||
873 | | str PC, SAVE_PC | ||
874 | | add CARG3, RA, NARGS8:RC | ||
875 | | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) | ||
876 | | ldr TMP1, [RA, FRAME_FUNC] // Guaranteed to be a function here. | ||
877 | | ldr PC, [BASE, FRAME_PC] | ||
878 | | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now. | ||
879 | | and LFUNC:CARG3, TMP1, #LJ_GCVMASK | ||
880 | | b ->BC_CALLT2_Z | ||
881 | | | ||
882 | |//-- Argument coercion for 'for' statement ------------------------------ | ||
883 | | | ||
884 | |->vmeta_for: | ||
885 | | mov CARG1, L | ||
886 | | str BASE, L->base | ||
887 | | mov CARG2, RA | ||
888 | | str PC, SAVE_PC | ||
889 | | bl extern lj_meta_for // (lua_State *L, TValue *base) | ||
890 | | ldr INSw, [PC, #-4] | ||
891 | |.if JIT | ||
892 | | uxtb TMP0w, INSw | ||
893 | |.endif | ||
894 | | decode_RA RA, INS | ||
895 | | decode_RD RC, INS | ||
896 | |.if JIT | ||
897 | | cmp TMP0, #BC_JFORI | ||
898 | | beq =>BC_JFORI | ||
899 | |.endif | ||
900 | | b =>BC_FORI | ||
901 | | | ||
902 | |//----------------------------------------------------------------------- | ||
903 | |//-- Fast functions ----------------------------------------------------- | ||
904 | |//----------------------------------------------------------------------- | ||
905 | | | ||
906 | |.macro .ffunc, name | ||
907 | |->ff_ .. name: | ||
908 | |.endmacro | ||
909 | | | ||
910 | |.macro .ffunc_1, name | ||
911 | |->ff_ .. name: | ||
912 | | ldr CARG1, [BASE] | ||
913 | | cmp NARGS8:RC, #8 | ||
914 | | blo ->fff_fallback | ||
915 | |.endmacro | ||
916 | | | ||
917 | |.macro .ffunc_2, name | ||
918 | |->ff_ .. name: | ||
919 | | ldp CARG1, CARG2, [BASE] | ||
920 | | cmp NARGS8:RC, #16 | ||
921 | | blo ->fff_fallback | ||
922 | |.endmacro | ||
923 | | | ||
924 | |.macro .ffunc_n, name | ||
925 | | .ffunc name | ||
926 | | ldr CARG1, [BASE] | ||
927 | | cmp NARGS8:RC, #8 | ||
928 | | ldr FARG1, [BASE] | ||
929 | | blo ->fff_fallback | ||
930 | | checknum CARG1, ->fff_fallback | ||
931 | |.endmacro | ||
932 | | | ||
933 | |.macro .ffunc_nn, name | ||
934 | | .ffunc name | ||
935 | | ldp CARG1, CARG2, [BASE] | ||
936 | | cmp NARGS8:RC, #16 | ||
937 | | ldp FARG1, FARG2, [BASE] | ||
938 | | blo ->fff_fallback | ||
939 | | checknum CARG1, ->fff_fallback | ||
940 | | checknum CARG2, ->fff_fallback | ||
941 | |.endmacro | ||
942 | | | ||
943 | |// Inlined GC threshold check. Caveat: uses CARG1 and CARG2. | ||
944 | |.macro ffgccheck | ||
945 | | ldp CARG1, CARG2, GL->gc.total // Assumes threshold follows total. | ||
946 | | cmp CARG1, CARG2 | ||
947 | | blt >1 | ||
948 | | bl ->fff_gcstep | ||
949 | |1: | ||
950 | |.endmacro | ||
951 | | | ||
952 | |//-- Base library: checks ----------------------------------------------- | ||
953 | | | ||
954 | |.ffunc_1 assert | ||
955 | | ldr PC, [BASE, FRAME_PC] | ||
956 | | mov_false TMP1 | ||
957 | | cmp CARG1, TMP1 | ||
958 | | bhs ->fff_fallback | ||
959 | | str CARG1, [BASE, #-16] | ||
960 | | sub RB, BASE, #8 | ||
961 | | subs RA, NARGS8:RC, #8 | ||
962 | | add RC, NARGS8:RC, #8 // Compute (nresults+1)*8. | ||
963 | | cbz RA, ->fff_res // Done if exactly 1 argument. | ||
964 | |1: | ||
965 | | ldr CARG1, [RB, #16] | ||
966 | | sub RA, RA, #8 | ||
967 | | str CARG1, [RB], #8 | ||
968 | | cbnz RA, <1 | ||
969 | | b ->fff_res | ||
970 | | | ||
971 | |.ffunc_1 type | ||
972 | | mov TMP0, #~LJ_TISNUM | ||
973 | | asr ITYPE, CARG1, #47 | ||
974 | | cmn ITYPE, #~LJ_TISNUM | ||
975 | | csinv TMP1, TMP0, ITYPE, lo | ||
976 | | add TMP1, TMP1, #offsetof(GCfuncC, upvalue)/8 | ||
977 | | ldr CARG1, [CFUNC:CARG3, TMP1, lsl #3] | ||
978 | | b ->fff_restv | ||
979 | | | ||
980 | |//-- Base library: getters and setters --------------------------------- | ||
981 | | | ||
982 | |.ffunc_1 getmetatable | ||
983 | | asr ITYPE, CARG1, #47 | ||
984 | | cmn ITYPE, #-LJ_TTAB | ||
985 | | ccmn ITYPE, #-LJ_TUDATA, #4, ne | ||
986 | | and TAB:CARG1, CARG1, #LJ_GCVMASK | ||
987 | | bne >6 | ||
988 | |1: // Field metatable must be at same offset for GCtab and GCudata! | ||
989 | | ldr TAB:RB, TAB:CARG1->metatable | ||
990 | |2: | ||
991 | | mov CARG1, TISNIL | ||
992 | | ldr STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable] | ||
993 | | cbz TAB:RB, ->fff_restv | ||
994 | | ldr TMP1w, TAB:RB->hmask | ||
995 | | ldr TMP2w, STR:RC->hash | ||
996 | | ldr NODE:CARG3, TAB:RB->node | ||
997 | | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask | ||
998 | | add TMP1, TMP1, TMP1, lsl #1 | ||
999 | | movn CARG4, #~LJ_TSTR | ||
1000 | | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8 | ||
1001 | | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for. | ||
1002 | |3: // Rearranged logic, because we expect _not_ to find the key. | ||
1003 | | ldp CARG1, TMP0, NODE:CARG3->val | ||
1004 | | ldr NODE:CARG3, NODE:CARG3->next | ||
1005 | | cmp TMP0, CARG4 | ||
1006 | | beq >5 | ||
1007 | | cbnz NODE:CARG3, <3 | ||
1008 | |4: | ||
1009 | | mov CARG1, RB // Use metatable as default result. | ||
1010 | | movk CARG1, #(LJ_TTAB>>1)&0xffff, lsl #48 | ||
1011 | | b ->fff_restv | ||
1012 | |5: | ||
1013 | | cmp TMP0, TISNIL | ||
1014 | | bne ->fff_restv | ||
1015 | | b <4 | ||
1016 | | | ||
1017 | |6: | ||
1018 | | movn TMP0, #~LJ_TISNUM | ||
1019 | | cmp ITYPE, TMP0 | ||
1020 | | csel ITYPE, ITYPE, TMP0, hs | ||
1021 | | sub TMP1, GL, ITYPE, lsl #3 | ||
1022 | | ldr TAB:RB, [TMP1, #offsetof(global_State, gcroot[GCROOT_BASEMT])-8] | ||
1023 | | b <2 | ||
1024 | | | ||
1025 | |.ffunc_2 setmetatable | ||
1026 | | // Fast path: no mt for table yet and not clearing the mt. | ||
1027 | | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback | ||
1028 | | ldr TAB:TMP0, TAB:TMP1->metatable | ||
1029 | | asr ITYPE, CARG2, #47 | ||
1030 | | ldrb TMP2w, TAB:TMP1->marked | ||
1031 | | cmn ITYPE, #-LJ_TTAB | ||
1032 | | and TAB:CARG2, CARG2, #LJ_GCVMASK | ||
1033 | | ccmp TAB:TMP0, #0, #0, eq | ||
1034 | | bne ->fff_fallback | ||
1035 | | str TAB:CARG2, TAB:TMP1->metatable | ||
1036 | | tbz TMP2w, #2, ->fff_restv // isblack(table) | ||
1037 | | barrierback TAB:TMP1, TMP2w, TMP0 | ||
1038 | | b ->fff_restv | ||
1039 | | | ||
1040 | |.ffunc rawget | ||
1041 | | ldr CARG2, [BASE] | ||
1042 | | cmp NARGS8:RC, #16 | ||
1043 | | blo ->fff_fallback | ||
1044 | | checktab CARG2, ->fff_fallback | ||
1045 | | mov CARG1, L | ||
1046 | | add CARG3, BASE, #8 | ||
1047 | | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) | ||
1048 | | // Returns cTValue *. | ||
1049 | | ldr CARG1, [CRET1] | ||
1050 | | b ->fff_restv | ||
1051 | | | ||
1052 | |//-- Base library: conversions ------------------------------------------ | ||
1053 | | | ||
1054 | |.ffunc tonumber | ||
1055 | | // Only handles the number case inline (without a base argument). | ||
1056 | | ldr CARG1, [BASE] | ||
1057 | | cmp NARGS8:RC, #8 | ||
1058 | | bne ->fff_fallback | ||
1059 | | checknumber CARG1, ->fff_fallback | ||
1060 | | b ->fff_restv | ||
1061 | | | ||
1062 | |.ffunc_1 tostring | ||
1063 | | // Only handles the string or number case inline. | ||
1064 | | asr ITYPE, CARG1, #47 | ||
1065 | | cmn ITYPE, #-LJ_TSTR | ||
1066 | | // A __tostring method in the string base metatable is ignored. | ||
1067 | | beq ->fff_restv | ||
1068 | | // Handle numbers inline, unless a number base metatable is present. | ||
1069 | | ldr TMP1, GL->gcroot[GCROOT_BASEMT_NUM] | ||
1070 | | str BASE, L->base | ||
1071 | | cmn ITYPE, #-LJ_TISNUM | ||
1072 | | ccmp TMP1, #0, #0, ls | ||
1073 | | str PC, SAVE_PC // Redundant (but a defined value). | ||
1074 | | bne ->fff_fallback | ||
1075 | | ffgccheck | ||
1076 | | mov CARG1, L | ||
1077 | | mov CARG2, BASE | ||
1078 | | bl extern lj_strfmt_number // (lua_State *L, cTValue *o) | ||
1079 | | // Returns GCstr *. | ||
1080 | | movn TMP1, #~LJ_TSTR | ||
1081 | | ldr BASE, L->base | ||
1082 | | add CARG1, CARG1, TMP1, lsl #47 | ||
1083 | | b ->fff_restv | ||
1084 | | | ||
1085 | |//-- Base library: iterators ------------------------------------------- | ||
1086 | | | ||
1087 | |.ffunc_1 next | ||
1088 | | checktp CARG2, CARG1, LJ_TTAB, ->fff_fallback | ||
1089 | | str TISNIL, [BASE, NARGS8:RC] // Set missing 2nd arg to nil. | ||
1090 | | ldr PC, [BASE, FRAME_PC] | ||
1091 | | stp BASE, BASE, L->base // Add frame since C call can throw. | ||
1092 | | mov CARG1, L | ||
1093 | | add CARG3, BASE, #8 | ||
1094 | | str PC, SAVE_PC | ||
1095 | | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) | ||
1096 | | // Returns 0 at end of traversal. | ||
1097 | | str TISNIL, [BASE, #-16] | ||
1098 | | cbz CRET1, ->fff_res1 // End of traversal: return nil. | ||
1099 | | ldp CARG1, CARG2, [BASE, #8] // Copy key and value to results. | ||
1100 | | mov RC, #(2+1)*8 | ||
1101 | | stp CARG1, CARG2, [BASE, #-16] | ||
1102 | | b ->fff_res | ||
1103 | | | ||
1104 | |.ffunc_1 pairs | ||
1105 | | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback | ||
1106 | #if LJ_52 | ||
1107 | | ldr TAB:CARG2, TAB:TMP1->metatable | ||
1108 | #endif | ||
1109 | | ldr CFUNC:CARG4, CFUNC:CARG3->upvalue[0] | ||
1110 | | ldr PC, [BASE, FRAME_PC] | ||
1111 | #if LJ_52 | ||
1112 | | cbnz TAB:CARG2, ->fff_fallback | ||
1113 | #endif | ||
1114 | | mov RC, #(3+1)*8 | ||
1115 | | stp CARG1, TISNIL, [BASE, #-8] | ||
1116 | | str CFUNC:CARG4, [BASE, #-16] | ||
1117 | | b ->fff_res | ||
1118 | | | ||
1119 | |.ffunc_2 ipairs_aux | ||
1120 | | checktab CARG1, ->fff_fallback | ||
1121 | | checkint CARG2, ->fff_fallback | ||
1122 | | ldr TMP1w, TAB:CARG1->asize | ||
1123 | | ldr CARG3, TAB:CARG1->array | ||
1124 | | ldr TMP0w, TAB:CARG1->hmask | ||
1125 | | add CARG2w, CARG2w, #1 | ||
1126 | | cmp CARG2w, TMP1w | ||
1127 | | ldr PC, [BASE, FRAME_PC] | ||
1128 | | add TMP2, CARG2, TISNUM | ||
1129 | | mov RC, #(0+1)*8 | ||
1130 | | str TMP2, [BASE, #-16] | ||
1131 | | bhs >2 // Not in array part? | ||
1132 | | ldr TMP0, [CARG3, CARG2, lsl #3] | ||
1133 | |1: | ||
1134 | | mov TMP1, #(2+1)*8 | ||
1135 | | cmp TMP0, TISNIL | ||
1136 | | str TMP0, [BASE, #-8] | ||
1137 | | csel RC, RC, TMP1, eq | ||
1138 | | b ->fff_res | ||
1139 | |2: // Check for empty hash part first. Otherwise call C function. | ||
1140 | | cbz TMP0w, ->fff_res | ||
1141 | | bl extern lj_tab_getinth // (GCtab *t, int32_t key) | ||
1142 | | // Returns cTValue * or NULL. | ||
1143 | | cbz CRET1, ->fff_res | ||
1144 | | ldr TMP0, [CRET1] | ||
1145 | | b <1 | ||
1146 | | | ||
1147 | |.ffunc_1 ipairs | ||
1148 | | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback | ||
1149 | #if LJ_52 | ||
1150 | | ldr TAB:CARG2, TAB:TMP1->metatable | ||
1151 | #endif | ||
1152 | | ldr CFUNC:CARG4, CFUNC:CARG3->upvalue[0] | ||
1153 | | ldr PC, [BASE, FRAME_PC] | ||
1154 | #if LJ_52 | ||
1155 | | cbnz TAB:CARG2, ->fff_fallback | ||
1156 | #endif | ||
1157 | | mov RC, #(3+1)*8 | ||
1158 | | stp CARG1, TISNUM, [BASE, #-8] | ||
1159 | | str CFUNC:CARG4, [BASE, #-16] | ||
1160 | | b ->fff_res | ||
1161 | | | ||
1162 | |//-- Base library: catch errors ---------------------------------------- | ||
1163 | | | ||
1164 | |.ffunc pcall | ||
1165 | | ldrb TMP0w, GL->hookmask | ||
1166 | | subs NARGS8:RC, NARGS8:RC, #8 | ||
1167 | | blo ->fff_fallback | ||
1168 | | mov RB, BASE | ||
1169 | | add BASE, BASE, #16 | ||
1170 | | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1 | ||
1171 | | add PC, TMP0, #16+FRAME_PCALL | ||
1172 | | beq ->vm_call_dispatch | ||
1173 | |1: | ||
1174 | | add TMP2, BASE, NARGS8:RC | ||
1175 | |2: | ||
1176 | | ldr TMP0, [TMP2, #-16] | ||
1177 | | str TMP0, [TMP2, #-8]! | ||
1178 | | cmp TMP2, BASE | ||
1179 | | bne <2 | ||
1180 | | b ->vm_call_dispatch | ||
1181 | | | ||
1182 | |.ffunc xpcall | ||
1183 | | ldp CARG1, CARG2, [BASE] | ||
1184 | | ldrb TMP0w, GL->hookmask | ||
1185 | | subs NARGS8:TMP1, NARGS8:RC, #16 | ||
1186 | | blo ->fff_fallback | ||
1187 | | mov RB, BASE | ||
1188 | | asr ITYPE, CARG2, #47 | ||
1189 | | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1 | ||
1190 | | cmn ITYPE, #-LJ_TFUNC | ||
1191 | | add PC, TMP0, #24+FRAME_PCALL | ||
1192 | | bne ->fff_fallback // Traceback must be a function. | ||
1193 | | mov NARGS8:RC, NARGS8:TMP1 | ||
1194 | | add BASE, BASE, #24 | ||
1195 | | stp CARG2, CARG1, [RB] // Swap function and traceback. | ||
1196 | | cbz NARGS8:RC, ->vm_call_dispatch | ||
1197 | | b <1 | ||
1198 | | | ||
1199 | |//-- Coroutine library -------------------------------------------------- | ||
1200 | | | ||
1201 | |.macro coroutine_resume_wrap, resume | ||
1202 | |.if resume | ||
1203 | |.ffunc_1 coroutine_resume | ||
1204 | | checktp CARG1, LJ_TTHREAD, ->fff_fallback | ||
1205 | |.else | ||
1206 | |.ffunc coroutine_wrap_aux | ||
1207 | | ldr L:CARG1, CFUNC:CARG3->upvalue[0].gcr | ||
1208 | | and L:CARG1, CARG1, #LJ_GCVMASK | ||
1209 | |.endif | ||
1210 | | ldr PC, [BASE, FRAME_PC] | ||
1211 | | str BASE, L->base | ||
1212 | | ldp RB, CARG2, L:CARG1->base | ||
1213 | | ldrb TMP1w, L:CARG1->status | ||
1214 | | add TMP0, CARG2, TMP1 | ||
1215 | | str PC, SAVE_PC | ||
1216 | | cmp TMP0, RB | ||
1217 | | beq ->fff_fallback | ||
1218 | | cmp TMP1, #LUA_YIELD | ||
1219 | | add TMP0, CARG2, #8 | ||
1220 | | csel CARG2, CARG2, TMP0, hs | ||
1221 | | ldr CARG4, L:CARG1->maxstack | ||
1222 | | add CARG3, CARG2, NARGS8:RC | ||
1223 | | ldr RB, L:CARG1->cframe | ||
1224 | | ccmp CARG3, CARG4, #2, ls | ||
1225 | | ccmp RB, #0, #2, ls | ||
1226 | | bhi ->fff_fallback | ||
1227 | |.if resume | ||
1228 | | sub CARG3, CARG3, #8 // Keep resumed thread in stack for GC. | ||
1229 | | add BASE, BASE, #8 | ||
1230 | | sub NARGS8:RC, NARGS8:RC, #8 | ||
1231 | |.endif | ||
1232 | | str CARG3, L:CARG1->top | ||
1233 | | str BASE, L->top | ||
1234 | | cbz NARGS8:RC, >3 | ||
1235 | |2: // Move args to coroutine. | ||
1236 | | ldr TMP0, [BASE, RB] | ||
1237 | | cmp RB, NARGS8:RC | ||
1238 | | str TMP0, [CARG2, RB] | ||
1239 | | add RB, RB, #8 | ||
1240 | | bne <2 | ||
1241 | |3: | ||
1242 | | mov CARG3, #0 | ||
1243 | | mov L:RA, L:CARG1 | ||
1244 | | mov CARG4, #0 | ||
1245 | | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0) | ||
1246 | | // Returns thread status. | ||
1247 | |4: | ||
1248 | | ldp CARG3, CARG4, L:RA->base | ||
1249 | | cmp CRET1, #LUA_YIELD | ||
1250 | | ldr BASE, L->base | ||
1251 | | str L, GL->cur_L | ||
1252 | | st_vmstate ST_INTERP | ||
1253 | | bhi >8 | ||
1254 | | sub RC, CARG4, CARG3 | ||
1255 | | ldr CARG1, L->maxstack | ||
1256 | | add CARG2, BASE, RC | ||
1257 | | cbz RC, >6 // No results? | ||
1258 | | cmp CARG2, CARG1 | ||
1259 | | mov RB, #0 | ||
1260 | | bhi >9 // Need to grow stack? | ||
1261 | | | ||
1262 | | sub CARG4, RC, #8 | ||
1263 | | str CARG3, L:RA->top // Clear coroutine stack. | ||
1264 | |5: // Move results from coroutine. | ||
1265 | | ldr TMP0, [CARG3, RB] | ||
1266 | | cmp RB, CARG4 | ||
1267 | | str TMP0, [BASE, RB] | ||
1268 | | add RB, RB, #8 | ||
1269 | | bne <5 | ||
1270 | |6: | ||
1271 | |.if resume | ||
1272 | | mov_true TMP1 | ||
1273 | | add RC, RC, #16 | ||
1274 | |7: | ||
1275 | | str TMP1, [BASE, #-8] // Prepend true/false to results. | ||
1276 | | sub RA, BASE, #8 | ||
1277 | |.else | ||
1278 | | mov RA, BASE | ||
1279 | | add RC, RC, #8 | ||
1280 | |.endif | ||
1281 | | ands CARG1, PC, #FRAME_TYPE | ||
1282 | | str PC, SAVE_PC | ||
1283 | | str RCw, SAVE_MULTRES | ||
1284 | | beq ->BC_RET_Z | ||
1285 | | b ->vm_return | ||
1286 | | | ||
1287 | |8: // Coroutine returned with error (at co->top-1). | ||
1288 | |.if resume | ||
1289 | | ldr TMP0, [CARG4, #-8]! | ||
1290 | | mov_false TMP1 | ||
1291 | | mov RC, #(2+1)*8 | ||
1292 | | str CARG4, L:RA->top // Remove error from coroutine stack. | ||
1293 | | str TMP0, [BASE] // Copy error message. | ||
1294 | | b <7 | ||
1295 | |.else | ||
1296 | | mov CARG1, L | ||
1297 | | mov CARG2, L:RA | ||
1298 | | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) | ||
1299 | | // Never returns. | ||
1300 | |.endif | ||
1301 | | | ||
1302 | |9: // Handle stack expansion on return from yield. | ||
1303 | | mov CARG1, L | ||
1304 | | lsr CARG2, RC, #3 | ||
1305 | | bl extern lj_state_growstack // (lua_State *L, int n) | ||
1306 | | mov CRET1, #0 | ||
1307 | | b <4 | ||
1308 | |.endmacro | ||
1309 | | | ||
1310 | | coroutine_resume_wrap 1 // coroutine.resume | ||
1311 | | coroutine_resume_wrap 0 // coroutine.wrap | ||
1312 | | | ||
1313 | |.ffunc coroutine_yield | ||
1314 | | ldr TMP0, L->cframe | ||
1315 | | add TMP1, BASE, NARGS8:RC | ||
1316 | | mov CRET1, #LUA_YIELD | ||
1317 | | stp BASE, TMP1, L->base | ||
1318 | | tbz TMP0, #0, ->fff_fallback | ||
1319 | | str xzr, L->cframe | ||
1320 | | strb CRET1w, L->status | ||
1321 | | b ->vm_leave_unw | ||
1322 | | | ||
1323 | |//-- Math library ------------------------------------------------------- | ||
1324 | | | ||
1325 | |.macro math_round, func, round | ||
1326 | | .ffunc math_ .. func | ||
1327 | | ldr CARG1, [BASE] | ||
1328 | | cmp NARGS8:RC, #8 | ||
1329 | | ldr d0, [BASE] | ||
1330 | | blo ->fff_fallback | ||
1331 | | cmp TISNUMhi, CARG1, lsr #32 | ||
1332 | | beq ->fff_restv | ||
1333 | | blo ->fff_fallback | ||
1334 | | round d0, d0 | ||
1335 | | b ->fff_resn | ||
1336 | |.endmacro | ||
1337 | | | ||
1338 | | math_round floor, frintm | ||
1339 | | math_round ceil, frintp | ||
1340 | | | ||
1341 | |.ffunc_1 math_abs | ||
1342 | | checknumber CARG1, ->fff_fallback | ||
1343 | | and CARG1, CARG1, #U64x(7fffffff,ffffffff) | ||
1344 | | bne ->fff_restv | ||
1345 | | eor CARG2w, CARG1w, CARG1w, asr #31 | ||
1346 | | movz CARG3, #0x41e0, lsl #48 // 2^31. | ||
1347 | | subs CARG1w, CARG2w, CARG1w, asr #31 | ||
1348 | | add CARG1, CARG1, TISNUM | ||
1349 | | csel CARG1, CARG1, CARG3, pl | ||
1350 | | // Fallthrough. | ||
1351 | | | ||
1352 | |->fff_restv: | ||
1353 | | // CARG1 = TValue result. | ||
1354 | | ldr PC, [BASE, FRAME_PC] | ||
1355 | | str CARG1, [BASE, #-16] | ||
1356 | |->fff_res1: | ||
1357 | | // PC = return. | ||
1358 | | mov RC, #(1+1)*8 | ||
1359 | |->fff_res: | ||
1360 | | // RC = (nresults+1)*8, PC = return. | ||
1361 | | ands CARG1, PC, #FRAME_TYPE | ||
1362 | | str RCw, SAVE_MULTRES | ||
1363 | | sub RA, BASE, #16 | ||
1364 | | bne ->vm_return | ||
1365 | | ldr INSw, [PC, #-4] | ||
1366 | | decode_RB RB, INS | ||
1367 | |5: | ||
1368 | | cmp RC, RB, lsl #3 // More results expected? | ||
1369 | | blo >6 | ||
1370 | | decode_RA TMP1, INS | ||
1371 | | // Adjust BASE. KBASE is assumed to be set for the calling frame. | ||
1372 | | sub BASE, RA, TMP1, lsl #3 | ||
1373 | | ins_next | ||
1374 | | | ||
1375 | |6: // Fill up results with nil. | ||
1376 | | add TMP1, RA, RC | ||
1377 | | add RC, RC, #8 | ||
1378 | | str TISNIL, [TMP1, #-8] | ||
1379 | | b <5 | ||
1380 | | | ||
1381 | |.macro math_extern, func | ||
1382 | | .ffunc_n math_ .. func | ||
1383 | | bl extern func | ||
1384 | | b ->fff_resn | ||
1385 | |.endmacro | ||
1386 | | | ||
1387 | |.macro math_extern2, func | ||
1388 | | .ffunc_nn math_ .. func | ||
1389 | | bl extern func | ||
1390 | | b ->fff_resn | ||
1391 | |.endmacro | ||
1392 | | | ||
1393 | |.ffunc_n math_sqrt | ||
1394 | | fsqrt d0, d0 | ||
1395 | |->fff_resn: | ||
1396 | | ldr PC, [BASE, FRAME_PC] | ||
1397 | | str d0, [BASE, #-16] | ||
1398 | | b ->fff_res1 | ||
1399 | | | ||
1400 | |.ffunc math_log | ||
1401 | | ldr CARG1, [BASE] | ||
1402 | | cmp NARGS8:RC, #8 | ||
1403 | | ldr FARG1, [BASE] | ||
1404 | | bne ->fff_fallback // Need exactly 1 argument. | ||
1405 | | checknum CARG1, ->fff_fallback | ||
1406 | | bl extern log | ||
1407 | | b ->fff_resn | ||
1408 | | | ||
1409 | | math_extern log10 | ||
1410 | | math_extern exp | ||
1411 | | math_extern sin | ||
1412 | | math_extern cos | ||
1413 | | math_extern tan | ||
1414 | | math_extern asin | ||
1415 | | math_extern acos | ||
1416 | | math_extern atan | ||
1417 | | math_extern sinh | ||
1418 | | math_extern cosh | ||
1419 | | math_extern tanh | ||
1420 | | math_extern2 pow | ||
1421 | | math_extern2 atan2 | ||
1422 | | math_extern2 fmod | ||
1423 | | | ||
1424 | |.ffunc_2 math_ldexp | ||
1425 | | ldr FARG1, [BASE] | ||
1426 | | checknum CARG1, ->fff_fallback | ||
1427 | | checkint CARG2, ->fff_fallback | ||
1428 | | sxtw CARG1, CARG2w | ||
1429 | | bl extern ldexp // (double x, int exp) | ||
1430 | | b ->fff_resn | ||
1431 | | | ||
1432 | |.ffunc_n math_frexp | ||
1433 | | add CARG1, sp, TMPDofs | ||
1434 | | bl extern frexp | ||
1435 | | ldr CARG2w, TMPD | ||
1436 | | ldr PC, [BASE, FRAME_PC] | ||
1437 | | str d0, [BASE, #-16] | ||
1438 | | mov RC, #(2+1)*8 | ||
1439 | | add CARG2, CARG2, TISNUM | ||
1440 | | str CARG2, [BASE, #-8] | ||
1441 | | b ->fff_res | ||
1442 | | | ||
1443 | |.ffunc_n math_modf | ||
1444 | | sub CARG1, BASE, #16 | ||
1445 | | ldr PC, [BASE, FRAME_PC] | ||
1446 | | bl extern modf | ||
1447 | | mov RC, #(2+1)*8 | ||
1448 | | str d0, [BASE, #-8] | ||
1449 | | b ->fff_res | ||
1450 | | | ||
1451 | |.macro math_minmax, name, cond, fcond | ||
1452 | | .ffunc_1 name | ||
1453 | | add RB, BASE, RC | ||
1454 | | add RA, BASE, #8 | ||
1455 | | checkint CARG1, >4 | ||
1456 | |1: // Handle integers. | ||
1457 | | ldr CARG2, [RA] | ||
1458 | | cmp RA, RB | ||
1459 | | bhs ->fff_restv | ||
1460 | | checkint CARG2, >3 | ||
1461 | | cmp CARG1w, CARG2w | ||
1462 | | add RA, RA, #8 | ||
1463 | | csel CARG1, CARG2, CARG1, cond | ||
1464 | | b <1 | ||
1465 | |3: // Convert intermediate result to number and continue below. | ||
1466 | | scvtf d0, CARG1w | ||
1467 | | blo ->fff_fallback | ||
1468 | | ldr d1, [RA] | ||
1469 | | b >6 | ||
1470 | | | ||
1471 | |4: | ||
1472 | | ldr d0, [BASE] | ||
1473 | | blo ->fff_fallback | ||
1474 | |5: // Handle numbers. | ||
1475 | | ldr CARG2, [RA] | ||
1476 | | ldr d1, [RA] | ||
1477 | | cmp RA, RB | ||
1478 | | bhs ->fff_resn | ||
1479 | | checknum CARG2, >7 | ||
1480 | |6: | ||
1481 | | fcmp d0, d1 | ||
1482 | | add RA, RA, #8 | ||
1483 | | fcsel d0, d1, d0, fcond | ||
1484 | | b <5 | ||
1485 | |7: // Convert integer to number and continue above. | ||
1486 | | scvtf d1, CARG2w | ||
1487 | | blo ->fff_fallback | ||
1488 | | b <6 | ||
1489 | |.endmacro | ||
1490 | | | ||
1491 | | math_minmax math_min, gt, hi | ||
1492 | | math_minmax math_max, lt, lo | ||
1493 | | | ||
1494 | |//-- String library ----------------------------------------------------- | ||
1495 | | | ||
1496 | |.ffunc string_byte // Only handle the 1-arg case here. | ||
1497 | | ldp PC, CARG1, [BASE, FRAME_PC] | ||
1498 | | cmp NARGS8:RC, #8 | ||
1499 | | asr ITYPE, CARG1, #47 | ||
1500 | | ccmn ITYPE, #-LJ_TSTR, #0, eq | ||
1501 | | and STR:CARG1, CARG1, #LJ_GCVMASK | ||
1502 | | bne ->fff_fallback | ||
1503 | | ldrb TMP0w, STR:CARG1[1] // Access is always ok (NUL at end). | ||
1504 | | ldr CARG3w, STR:CARG1->len | ||
1505 | | add TMP0, TMP0, TISNUM | ||
1506 | | str TMP0, [BASE, #-16] | ||
1507 | | mov RC, #(0+1)*8 | ||
1508 | | cbz CARG3, ->fff_res | ||
1509 | | b ->fff_res1 | ||
1510 | | | ||
1511 | |.ffunc string_char // Only handle the 1-arg case here. | ||
1512 | | ffgccheck | ||
1513 | | ldp PC, CARG1, [BASE, FRAME_PC] | ||
1514 | | cmp CARG1w, #255 | ||
1515 | | ccmp NARGS8:RC, #8, #0, ls // Need exactly 1 argument. | ||
1516 | | bne ->fff_fallback | ||
1517 | | checkint CARG1, ->fff_fallback | ||
1518 | | mov CARG3, #1 | ||
1519 | | // Point to the char inside the integer in the stack slot. | ||
1520 | |.if ENDIAN_LE | ||
1521 | | mov CARG2, BASE | ||
1522 | |.else | ||
1523 | | add CARG2, BASE, #7 | ||
1524 | |.endif | ||
1525 | |->fff_newstr: | ||
1526 | | // CARG2 = str, CARG3 = len. | ||
1527 | | str BASE, L->base | ||
1528 | | mov CARG1, L | ||
1529 | | str PC, SAVE_PC | ||
1530 | | bl extern lj_str_new // (lua_State *L, char *str, size_t l) | ||
1531 | |->fff_resstr: | ||
1532 | | // Returns GCstr *. | ||
1533 | | ldr BASE, L->base | ||
1534 | | movn TMP1, #~LJ_TSTR | ||
1535 | | add CARG1, CARG1, TMP1, lsl #47 | ||
1536 | | b ->fff_restv | ||
1537 | | | ||
1538 | |.ffunc string_sub | ||
1539 | | ffgccheck | ||
1540 | | ldr CARG1, [BASE] | ||
1541 | | ldr CARG3, [BASE, #16] | ||
1542 | | cmp NARGS8:RC, #16 | ||
1543 | | movn RB, #0 | ||
1544 | | beq >1 | ||
1545 | | blo ->fff_fallback | ||
1546 | | checkint CARG3, ->fff_fallback | ||
1547 | | sxtw RB, CARG3w | ||
1548 | |1: | ||
1549 | | ldr CARG2, [BASE, #8] | ||
1550 | | checkstr CARG1, ->fff_fallback | ||
1551 | | ldr TMP1w, STR:CARG1->len | ||
1552 | | checkint CARG2, ->fff_fallback | ||
1553 | | sxtw CARG2, CARG2w | ||
1554 | | // CARG1 = str, TMP1 = str->len, CARG2 = start, RB = end | ||
1555 | | add TMP2, RB, TMP1 | ||
1556 | | cmp RB, #0 | ||
1557 | | add TMP0, CARG2, TMP1 | ||
1558 | | csinc RB, RB, TMP2, ge // if (end < 0) end += len+1 | ||
1559 | | cmp CARG2, #0 | ||
1560 | | csinc CARG2, CARG2, TMP0, ge // if (start < 0) start += len+1 | ||
1561 | | cmp RB, #0 | ||
1562 | | csel RB, RB, xzr, ge // if (end < 0) end = 0 | ||
1563 | | cmp CARG2, #1 | ||
1564 | | csinc CARG2, CARG2, xzr, ge // if (start < 1) start = 1 | ||
1565 | | cmp RB, TMP1 | ||
1566 | | csel RB, RB, TMP1, le // if (end > len) end = len | ||
1567 | | add CARG1, STR:CARG1, #sizeof(GCstr)-1 | ||
1568 | | subs CARG3, RB, CARG2 // len = end - start | ||
1569 | | add CARG2, CARG1, CARG2 | ||
1570 | | add CARG3, CARG3, #1 // len += 1 | ||
1571 | | bge ->fff_newstr | ||
1572 | | add STR:CARG1, GL, #offsetof(global_State, strempty) | ||
1573 | | movn TMP1, #~LJ_TSTR | ||
1574 | | add CARG1, CARG1, TMP1, lsl #47 | ||
1575 | | b ->fff_restv | ||
1576 | | | ||
1577 | |.macro ffstring_op, name | ||
1578 | | .ffunc string_ .. name | ||
1579 | | ffgccheck | ||
1580 | | ldr CARG2, [BASE] | ||
1581 | | cmp NARGS8:RC, #8 | ||
1582 | | asr ITYPE, CARG2, #47 | ||
1583 | | ccmn ITYPE, #-LJ_TSTR, #0, hs | ||
1584 | | and STR:CARG2, CARG2, #LJ_GCVMASK | ||
1585 | | bne ->fff_fallback | ||
1586 | | ldr TMP0, GL->tmpbuf.b | ||
1587 | | add SBUF:CARG1, GL, #offsetof(global_State, tmpbuf) | ||
1588 | | str BASE, L->base | ||
1589 | | str PC, SAVE_PC | ||
1590 | | str L, GL->tmpbuf.L | ||
1591 | | str TMP0, GL->tmpbuf.p | ||
1592 | | bl extern lj_buf_putstr_ .. name | ||
1593 | | bl extern lj_buf_tostr | ||
1594 | | b ->fff_resstr | ||
1595 | |.endmacro | ||
1596 | | | ||
1597 | |ffstring_op reverse | ||
1598 | |ffstring_op lower | ||
1599 | |ffstring_op upper | ||
1600 | | | ||
1601 | |//-- Bit library -------------------------------------------------------- | ||
1602 | | | ||
1603 | |// FP number to bit conversion for soft-float. Clobbers CARG1-CARG3 | ||
1604 | |->vm_tobit_fb: | ||
1605 | | bls ->fff_fallback | ||
1606 | | add CARG2, CARG1, CARG1 | ||
1607 | | mov CARG3, #1076 | ||
1608 | | sub CARG3, CARG3, CARG2, lsr #53 | ||
1609 | | cmp CARG3, #53 | ||
1610 | | bhi >1 | ||
1611 | | and CARG2, CARG2, #U64x(001fffff,ffffffff) | ||
1612 | | orr CARG2, CARG2, #U64x(00200000,00000000) | ||
1613 | | cmp CARG1, #0 | ||
1614 | | lsr CARG2, CARG2, CARG3 | ||
1615 | | cneg CARG1w, CARG2w, mi | ||
1616 | | br lr | ||
1617 | |1: | ||
1618 | | mov CARG1w, #0 | ||
1619 | | br lr | ||
1620 | | | ||
1621 | |.macro .ffunc_bit, name | ||
1622 | | .ffunc_1 bit_..name | ||
1623 | | adr lr, >1 | ||
1624 | | checkint CARG1, ->vm_tobit_fb | ||
1625 | |1: | ||
1626 | |.endmacro | ||
1627 | | | ||
1628 | |.macro .ffunc_bit_op, name, ins | ||
1629 | | .ffunc_bit name | ||
1630 | | mov RA, #8 | ||
1631 | | mov TMP0w, CARG1w | ||
1632 | | adr lr, >2 | ||
1633 | |1: | ||
1634 | | ldr CARG1, [BASE, RA] | ||
1635 | | cmp RA, NARGS8:RC | ||
1636 | | add RA, RA, #8 | ||
1637 | | bge >9 | ||
1638 | | checkint CARG1, ->vm_tobit_fb | ||
1639 | |2: | ||
1640 | | ins TMP0w, TMP0w, CARG1w | ||
1641 | | b <1 | ||
1642 | |.endmacro | ||
1643 | | | ||
1644 | |.ffunc_bit_op band, and | ||
1645 | |.ffunc_bit_op bor, orr | ||
1646 | |.ffunc_bit_op bxor, eor | ||
1647 | | | ||
1648 | |.ffunc_bit tobit | ||
1649 | | mov TMP0w, CARG1w | ||
1650 | |9: // Label reused by .ffunc_bit_op users. | ||
1651 | | add CARG1, TMP0, TISNUM | ||
1652 | | b ->fff_restv | ||
1653 | | | ||
1654 | |.ffunc_bit bswap | ||
1655 | | rev TMP0w, CARG1w | ||
1656 | | add CARG1, TMP0, TISNUM | ||
1657 | | b ->fff_restv | ||
1658 | | | ||
1659 | |.ffunc_bit bnot | ||
1660 | | mvn TMP0w, CARG1w | ||
1661 | | add CARG1, TMP0, TISNUM | ||
1662 | | b ->fff_restv | ||
1663 | | | ||
1664 | |.macro .ffunc_bit_sh, name, ins, shmod | ||
1665 | | .ffunc bit_..name | ||
1666 | | ldp TMP0, CARG1, [BASE] | ||
1667 | | cmp NARGS8:RC, #16 | ||
1668 | | blo ->fff_fallback | ||
1669 | | adr lr, >1 | ||
1670 | | checkint CARG1, ->vm_tobit_fb | ||
1671 | |1: | ||
1672 | |.if shmod == 0 | ||
1673 | | mov TMP1, CARG1 | ||
1674 | |.else | ||
1675 | | neg TMP1, CARG1 | ||
1676 | |.endif | ||
1677 | | mov CARG1, TMP0 | ||
1678 | | adr lr, >2 | ||
1679 | | checkint CARG1, ->vm_tobit_fb | ||
1680 | |2: | ||
1681 | | ins TMP0w, CARG1w, TMP1w | ||
1682 | | add CARG1, TMP0, TISNUM | ||
1683 | | b ->fff_restv | ||
1684 | |.endmacro | ||
1685 | | | ||
1686 | |.ffunc_bit_sh lshift, lsl, 0 | ||
1687 | |.ffunc_bit_sh rshift, lsr, 0 | ||
1688 | |.ffunc_bit_sh arshift, asr, 0 | ||
1689 | |.ffunc_bit_sh rol, ror, 1 | ||
1690 | |.ffunc_bit_sh ror, ror, 0 | ||
1691 | | | ||
1692 | |//----------------------------------------------------------------------- | ||
1693 | | | ||
1694 | |->fff_fallback: // Call fast function fallback handler. | ||
1695 | | // BASE = new base, RC = nargs*8 | ||
1696 | | ldp CFUNC:CARG3, PC, [BASE, FRAME_FUNC] // Fallback may overwrite PC. | ||
1697 | | ldr TMP2, L->maxstack | ||
1698 | | add TMP1, BASE, NARGS8:RC | ||
1699 | | stp BASE, TMP1, L->base | ||
1700 | | and CFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
1701 | | add TMP1, TMP1, #8*LUA_MINSTACK | ||
1702 | | ldr CARG3, CFUNC:CARG3->f | ||
1703 | | str PC, SAVE_PC // Redundant (but a defined value). | ||
1704 | | cmp TMP1, TMP2 | ||
1705 | | mov CARG1, L | ||
1706 | | bhi >5 // Need to grow stack. | ||
1707 | | blr CARG3 // (lua_State *L) | ||
1708 | | // Either throws an error, or recovers and returns -1, 0 or nresults+1. | ||
1709 | | ldr BASE, L->base | ||
1710 | | cmp CRET1w, #0 | ||
1711 | | lsl RC, CRET1, #3 | ||
1712 | | sub RA, BASE, #16 | ||
1713 | | bgt ->fff_res // Returned nresults+1? | ||
1714 | |1: // Returned 0 or -1: retry fast path. | ||
1715 | | ldr CARG1, L->top | ||
1716 | | ldr CFUNC:CARG3, [BASE, FRAME_FUNC] | ||
1717 | | sub NARGS8:RC, CARG1, BASE | ||
1718 | | bne ->vm_call_tail // Returned -1? | ||
1719 | | and CFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
1720 | | ins_callt // Returned 0: retry fast path. | ||
1721 | | | ||
1722 | |// Reconstruct previous base for vmeta_call during tailcall. | ||
1723 | |->vm_call_tail: | ||
1724 | | ands TMP0, PC, #FRAME_TYPE | ||
1725 | | and TMP1, PC, #~FRAME_TYPEP | ||
1726 | | bne >3 | ||
1727 | | ldrb RAw, [PC, #-4+OFS_RA] | ||
1728 | | lsl RA, RA, #3 | ||
1729 | | add TMP1, RA, #16 | ||
1730 | |3: | ||
1731 | | sub RB, BASE, TMP1 | ||
1732 | | b ->vm_call_dispatch // Resolve again for tailcall. | ||
1733 | | | ||
1734 | |5: // Grow stack for fallback handler. | ||
1735 | | mov CARG2, #LUA_MINSTACK | ||
1736 | | bl extern lj_state_growstack // (lua_State *L, int n) | ||
1737 | | ldr BASE, L->base | ||
1738 | | cmp CARG1, CARG1 // Set zero-flag to force retry. | ||
1739 | | b <1 | ||
1740 | | | ||
1741 | |->fff_gcstep: // Call GC step function. | ||
1742 | | // BASE = new base, RC = nargs*8 | ||
1743 | | add CARG2, BASE, NARGS8:RC // Calculate L->top. | ||
1744 | | mov RA, lr | ||
1745 | | stp BASE, CARG2, L->base | ||
1746 | | str PC, SAVE_PC // Redundant (but a defined value). | ||
1747 | | mov CARG1, L | ||
1748 | | bl extern lj_gc_step // (lua_State *L) | ||
1749 | | ldp BASE, CARG2, L->base | ||
1750 | | ldr CFUNC:CARG3, [BASE, FRAME_FUNC] | ||
1751 | | mov lr, RA // Help return address predictor. | ||
1752 | | sub NARGS8:RC, CARG2, BASE // Calculate nargs*8. | ||
1753 | | and CFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
1754 | | ret | ||
1755 | | | ||
1756 | |//----------------------------------------------------------------------- | ||
1757 | |//-- Special dispatch targets ------------------------------------------- | ||
1758 | |//----------------------------------------------------------------------- | ||
1759 | | | ||
1760 | |->vm_record: // Dispatch target for recording phase. | ||
1761 | |.if JIT | ||
1762 | | ldrb CARG1w, GL->hookmask | ||
1763 | | tst CARG1, #HOOK_VMEVENT // No recording while in vmevent. | ||
1764 | | bne >5 | ||
1765 | | // Decrement the hookcount for consistency, but always do the call. | ||
1766 | | ldr CARG2w, GL->hookcount | ||
1767 | | tst CARG1, #HOOK_ACTIVE | ||
1768 | | bne >1 | ||
1769 | | sub CARG2w, CARG2w, #1 | ||
1770 | | tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT | ||
1771 | | beq >1 | ||
1772 | | str CARG2w, GL->hookcount | ||
1773 | | b >1 | ||
1774 | |.endif | ||
1775 | | | ||
1776 | |->vm_rethook: // Dispatch target for return hooks. | ||
1777 | | ldrb TMP2w, GL->hookmask | ||
1778 | | tbz TMP2w, #HOOK_ACTIVE_SHIFT, >1 // Hook already active? | ||
1779 | |5: // Re-dispatch to static ins. | ||
1780 | | ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC] | ||
1781 | | br TMP0 | ||
1782 | | | ||
1783 | |->vm_inshook: // Dispatch target for instr/line hooks. | ||
1784 | | ldrb TMP2w, GL->hookmask | ||
1785 | | ldr TMP3w, GL->hookcount | ||
1786 | | tbnz TMP2w, #HOOK_ACTIVE_SHIFT, <5 // Hook already active? | ||
1787 | | tst TMP2w, #LUA_MASKLINE|LUA_MASKCOUNT | ||
1788 | | beq <5 | ||
1789 | | sub TMP3w, TMP3w, #1 | ||
1790 | | str TMP3w, GL->hookcount | ||
1791 | | cbz TMP3w, >1 | ||
1792 | | tbz TMP2w, #LUA_HOOKLINE, <5 | ||
1793 | |1: | ||
1794 | | mov CARG1, L | ||
1795 | | str BASE, L->base | ||
1796 | | mov CARG2, PC | ||
1797 | | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. | ||
1798 | | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) | ||
1799 | |3: | ||
1800 | | ldr BASE, L->base | ||
1801 | |4: // Re-dispatch to static ins. | ||
1802 | | ldr INSw, [PC, #-4] | ||
1803 | | add TMP1, GL, INS, uxtb #3 | ||
1804 | | decode_RA RA, INS | ||
1805 | | ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC] | ||
1806 | | decode_RD RC, INS | ||
1807 | | br TMP0 | ||
1808 | | | ||
1809 | |->cont_hook: // Continue from hook yield. | ||
1810 | | ldr CARG1, [CARG4, #-40] | ||
1811 | | add PC, PC, #4 | ||
1812 | | str CARG1w, SAVE_MULTRES // Restore MULTRES for *M ins. | ||
1813 | | b <4 | ||
1814 | | | ||
1815 | |->vm_hotloop: // Hot loop counter underflow. | ||
1816 | |.if JIT | ||
1817 | | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Same as curr_topL(L). | ||
1818 | | add CARG1, GL, #GG_G2DISP+GG_DISP2J | ||
1819 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
1820 | | str PC, SAVE_PC | ||
1821 | | ldr CARG3, LFUNC:CARG3->pc | ||
1822 | | mov CARG2, PC | ||
1823 | | str L, [GL, #GL_J(L)] | ||
1824 | | ldrb CARG3w, [CARG3, #PC2PROTO(framesize)] | ||
1825 | | str BASE, L->base | ||
1826 | | add CARG3, BASE, CARG3, lsl #3 | ||
1827 | | str CARG3, L->top | ||
1828 | | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc) | ||
1829 | | b <3 | ||
1830 | |.endif | ||
1831 | | | ||
1832 | |->vm_callhook: // Dispatch target for call hooks. | ||
1833 | | mov CARG2, PC | ||
1834 | |.if JIT | ||
1835 | | b >1 | ||
1836 | |.endif | ||
1837 | | | ||
1838 | |->vm_hotcall: // Hot call counter underflow. | ||
1839 | |.if JIT | ||
1840 | | orr CARG2, PC, #1 | ||
1841 | |1: | ||
1842 | |.endif | ||
1843 | | add TMP1, BASE, NARGS8:RC | ||
1844 | | str PC, SAVE_PC | ||
1845 | | mov CARG1, L | ||
1846 | | sub RA, RA, BASE | ||
1847 | | stp BASE, TMP1, L->base | ||
1848 | | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc) | ||
1849 | | // Returns ASMFunction. | ||
1850 | | ldp BASE, TMP1, L->base | ||
1851 | | str xzr, SAVE_PC // Invalidate for subsequent line hook. | ||
1852 | | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] | ||
1853 | | add RA, BASE, RA | ||
1854 | | sub NARGS8:RC, TMP1, BASE | ||
1855 | | ldr INSw, [PC, #-4] | ||
1856 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
1857 | | br CRET1 | ||
1858 | | | ||
1859 | |->cont_stitch: // Trace stitching. | ||
1860 | |.if JIT | ||
1861 | | // RA = resultptr, CARG4 = meta base | ||
1862 | | ldr RBw, SAVE_MULTRES | ||
1863 | | ldr INSw, [PC, #-4] | ||
1864 | | ldr TRACE:CARG3, [CARG4, #-40] // Save previous trace. | ||
1865 | | subs RB, RB, #8 | ||
1866 | | decode_RA RC, INS // Call base. | ||
1867 | | and CARG3, CARG3, #LJ_GCVMASK | ||
1868 | | beq >2 | ||
1869 | |1: // Move results down. | ||
1870 | | ldr CARG1, [RA] | ||
1871 | | add RA, RA, #8 | ||
1872 | | subs RB, RB, #8 | ||
1873 | | str CARG1, [BASE, RC, lsl #3] | ||
1874 | | add RC, RC, #1 | ||
1875 | | bne <1 | ||
1876 | |2: | ||
1877 | | decode_RA RA, INS | ||
1878 | | decode_RB RB, INS | ||
1879 | | add RA, RA, RB | ||
1880 | |3: | ||
1881 | | cmp RA, RC | ||
1882 | | bhi >9 // More results wanted? | ||
1883 | | | ||
1884 | | ldrh RAw, TRACE:CARG3->traceno | ||
1885 | | ldrh RCw, TRACE:CARG3->link | ||
1886 | | cmp RCw, RAw | ||
1887 | | beq ->cont_nop // Blacklisted. | ||
1888 | | cmp RCw, #0 | ||
1889 | | bne =>BC_JLOOP // Jump to stitched trace. | ||
1890 | | | ||
1891 | | // Stitch a new trace to the previous trace. | ||
1892 | | mov CARG1, #GL_J(exitno) | ||
1893 | | str RAw, [GL, CARG1] | ||
1894 | | mov CARG1, #GL_J(L) | ||
1895 | | str L, [GL, CARG1] | ||
1896 | | str BASE, L->base | ||
1897 | | add CARG1, GL, #GG_G2J | ||
1898 | | mov CARG2, PC | ||
1899 | | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) | ||
1900 | | ldr BASE, L->base | ||
1901 | | b ->cont_nop | ||
1902 | | | ||
1903 | |9: // Fill up results with nil. | ||
1904 | | str TISNIL, [BASE, RC, lsl #3] | ||
1905 | | add RC, RC, #1 | ||
1906 | | b <3 | ||
1907 | |.endif | ||
1908 | | | ||
1909 | |->vm_profhook: // Dispatch target for profiler hook. | ||
1910 | #if LJ_HASPROFILE | ||
1911 | | mov CARG1, L | ||
1912 | | str BASE, L->base | ||
1913 | | mov CARG2, PC | ||
1914 | | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) | ||
1915 | | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. | ||
1916 | | ldr BASE, L->base | ||
1917 | | sub PC, PC, #4 | ||
1918 | | b ->cont_nop | ||
1919 | #endif | ||
1920 | | | ||
1921 | |//----------------------------------------------------------------------- | ||
1922 | |//-- Trace exit handler ------------------------------------------------- | ||
1923 | |//----------------------------------------------------------------------- | ||
1924 | | | ||
1925 | |.macro savex_, a, b | ||
1926 | | stp d..a, d..b, [sp, #a*8] | ||
1927 | | stp x..a, x..b, [sp, #32*8+a*8] | ||
1928 | |.endmacro | ||
1929 | | | ||
1930 | |->vm_exit_handler: | ||
1931 | |.if JIT | ||
1932 | | sub sp, sp, #(64*8) | ||
1933 | | savex_, 0, 1 | ||
1934 | | savex_, 2, 3 | ||
1935 | | savex_, 4, 5 | ||
1936 | | savex_, 6, 7 | ||
1937 | | savex_, 8, 9 | ||
1938 | | savex_, 10, 11 | ||
1939 | | savex_, 12, 13 | ||
1940 | | savex_, 14, 15 | ||
1941 | | savex_, 16, 17 | ||
1942 | | savex_, 18, 19 | ||
1943 | | savex_, 20, 21 | ||
1944 | | savex_, 22, 23 | ||
1945 | | savex_, 24, 25 | ||
1946 | | savex_, 26, 27 | ||
1947 | | savex_, 28, 29 | ||
1948 | | stp d30, d31, [sp, #30*8] | ||
1949 | | ldr CARG1, [sp, #64*8] // Load original value of lr. | ||
1950 | | add CARG3, sp, #64*8 // Recompute original value of sp. | ||
1951 | | mv_vmstate CARG4w, EXIT | ||
1952 | | stp xzr, CARG3, [sp, #62*8] // Store 0/sp in RID_LR/RID_SP. | ||
1953 | | sub CARG1, CARG1, lr | ||
1954 | | ldr L, GL->cur_L | ||
1955 | | lsr CARG1, CARG1, #2 | ||
1956 | | ldr BASE, GL->jit_base | ||
1957 | | sub CARG1, CARG1, #2 | ||
1958 | | ldr CARG2w, [lr] // Load trace number. | ||
1959 | | st_vmstate CARG4w | ||
1960 | |.if ENDIAN_BE | ||
1961 | | rev32 CARG2, CARG2 | ||
1962 | |.endif | ||
1963 | | str BASE, L->base | ||
1964 | | ubfx CARG2w, CARG2w, #5, #16 | ||
1965 | | str CARG1w, [GL, #GL_J(exitno)] | ||
1966 | | str CARG2w, [GL, #GL_J(parent)] | ||
1967 | | str L, [GL, #GL_J(L)] | ||
1968 | | str xzr, GL->jit_base | ||
1969 | | add CARG1, GL, #GG_G2J | ||
1970 | | mov CARG2, sp | ||
1971 | | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) | ||
1972 | | // Returns MULTRES (unscaled) or negated error code. | ||
1973 | | ldr CARG2, L->cframe | ||
1974 | | ldr BASE, L->base | ||
1975 | | and sp, CARG2, #CFRAME_RAWMASK | ||
1976 | | ldr PC, SAVE_PC // Get SAVE_PC. | ||
1977 | | str L, SAVE_L // Set SAVE_L (on-trace resume/yield). | ||
1978 | | b >1 | ||
1979 | |.endif | ||
1980 | | | ||
1981 | |->vm_exit_interp: | ||
1982 | | // CARG1 = MULTRES or negated error code, BASE, PC and GL set. | ||
1983 | |.if JIT | ||
1984 | | ldr L, SAVE_L | ||
1985 | |1: | ||
1986 | | cmp CARG1w, #0 | ||
1987 | | blt >9 // Check for error from exit. | ||
1988 | | lsl RC, CARG1, #3 | ||
1989 | | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] | ||
1990 | | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 | ||
1991 | | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 | ||
1992 | | movn TISNIL, #0 | ||
1993 | | and LFUNC:CARG2, CARG2, #LJ_GCVMASK | ||
1994 | | str RCw, SAVE_MULTRES | ||
1995 | | str BASE, L->base | ||
1996 | | ldr CARG2, LFUNC:CARG2->pc | ||
1997 | | str xzr, GL->jit_base | ||
1998 | | mv_vmstate CARG4w, INTERP | ||
1999 | | ldr KBASE, [CARG2, #PC2PROTO(k)] | ||
2000 | | // Modified copy of ins_next which handles function header dispatch, too. | ||
2001 | | ldrb RBw, [PC, # OFS_OP] | ||
2002 | | ldr INSw, [PC], #4 | ||
2003 | | st_vmstate CARG4w | ||
2004 | | cmp RBw, #BC_FUNCC+2 // Fast function? | ||
2005 | | add TMP1, GL, INS, uxtb #3 | ||
2006 | | bhs >4 | ||
2007 | |2: | ||
2008 | | cmp RBw, #BC_FUNCF // Function header? | ||
2009 | | add TMP0, GL, RB, uxtb #3 | ||
2010 | | ldr RB, [TMP0, #GG_G2DISP] | ||
2011 | | decode_RA RA, INS | ||
2012 | | lsr TMP0, INS, #16 | ||
2013 | | csel RC, TMP0, RC, lo | ||
2014 | | blo >5 | ||
2015 | | ldr CARG3, [BASE, FRAME_FUNC] | ||
2016 | | sub RC, RC, #8 | ||
2017 | | add RA, BASE, RA, lsl #3 // Yes: RA = BASE+framesize*8, RC = nargs*8 | ||
2018 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
2019 | |5: | ||
2020 | | br RB | ||
2021 | | | ||
2022 | |4: // Check frame below fast function. | ||
2023 | | ldr CARG1, [BASE, FRAME_PC] | ||
2024 | | ands CARG2, CARG1, #FRAME_TYPE | ||
2025 | | bne <2 // Trace stitching continuation? | ||
2026 | | // Otherwise set KBASE for Lua function below fast function. | ||
2027 | | ldr CARG3w, [CARG1, #-4] | ||
2028 | | decode_RA CARG1, CARG3 | ||
2029 | | sub CARG2, BASE, CARG1, lsl #3 | ||
2030 | | ldr LFUNC:CARG3, [CARG2, #-32] | ||
2031 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
2032 | | ldr CARG3, LFUNC:CARG3->pc | ||
2033 | | ldr KBASE, [CARG3, #PC2PROTO(k)] | ||
2034 | | b <2 | ||
2035 | | | ||
2036 | |9: // Rethrow error from the right C frame. | ||
2037 | | neg CARG2, CARG1 | ||
2038 | | mov CARG1, L | ||
2039 | | bl extern lj_err_throw // (lua_State *L, int errcode) | ||
2040 | |.endif | ||
2041 | | | ||
2042 | |//----------------------------------------------------------------------- | ||
2043 | |//-- Math helper functions ---------------------------------------------- | ||
2044 | |//----------------------------------------------------------------------- | ||
2045 | | | ||
2046 | | // int lj_vm_modi(int dividend, int divisor); | ||
2047 | |->vm_modi: | ||
2048 | | eor CARG4w, CARG1w, CARG2w | ||
2049 | | cmp CARG4w, #0 | ||
2050 | | eor CARG3w, CARG1w, CARG1w, asr #31 | ||
2051 | | eor CARG4w, CARG2w, CARG2w, asr #31 | ||
2052 | | sub CARG3w, CARG3w, CARG1w, asr #31 | ||
2053 | | sub CARG4w, CARG4w, CARG2w, asr #31 | ||
2054 | | udiv CARG1w, CARG3w, CARG4w | ||
2055 | | msub CARG1w, CARG1w, CARG4w, CARG3w | ||
2056 | | ccmp CARG1w, #0, #4, mi | ||
2057 | | sub CARG3w, CARG1w, CARG4w | ||
2058 | | csel CARG1w, CARG1w, CARG3w, eq | ||
2059 | | eor CARG3w, CARG1w, CARG2w | ||
2060 | | cmp CARG3w, #0 | ||
2061 | | cneg CARG1w, CARG1w, mi | ||
2062 | | ret | ||
2063 | | | ||
2064 | |//----------------------------------------------------------------------- | ||
2065 | |//-- Miscellaneous functions -------------------------------------------- | ||
2066 | |//----------------------------------------------------------------------- | ||
2067 | | | ||
2068 | |//----------------------------------------------------------------------- | ||
2069 | |//-- FFI helper functions ----------------------------------------------- | ||
2070 | |//----------------------------------------------------------------------- | ||
2071 | | | ||
2072 | |// Handler for callback functions. | ||
2073 | |// Saveregs already performed. Callback slot number in [sp], g in r12. | ||
2074 | |->vm_ffi_callback: | ||
2075 | |.if FFI | ||
2076 | |.type CTSTATE, CTState, PC | ||
2077 | | saveregs | ||
2078 | | ldr CTSTATE, GL:x10->ctype_state | ||
2079 | | mov GL, x10 | ||
2080 | | add x10, sp, # CFRAME_SPACE | ||
2081 | | str w9, CTSTATE->cb.slot | ||
2082 | | stp x0, x1, CTSTATE->cb.gpr[0] | ||
2083 | | stp d0, d1, CTSTATE->cb.fpr[0] | ||
2084 | | stp x2, x3, CTSTATE->cb.gpr[2] | ||
2085 | | stp d2, d3, CTSTATE->cb.fpr[2] | ||
2086 | | stp x4, x5, CTSTATE->cb.gpr[4] | ||
2087 | | stp d4, d5, CTSTATE->cb.fpr[4] | ||
2088 | | stp x6, x7, CTSTATE->cb.gpr[6] | ||
2089 | | stp d6, d7, CTSTATE->cb.fpr[6] | ||
2090 | | str x10, CTSTATE->cb.stack | ||
2091 | | mov CARG1, CTSTATE | ||
2092 | | str CTSTATE, SAVE_PC // Any value outside of bytecode is ok. | ||
2093 | | mov CARG2, sp | ||
2094 | | bl extern lj_ccallback_enter // (CTState *cts, void *cf) | ||
2095 | | // Returns lua_State *. | ||
2096 | | ldp BASE, RC, L:CRET1->base | ||
2097 | | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 | ||
2098 | | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 | ||
2099 | | movn TISNIL, #0 | ||
2100 | | mov L, CRET1 | ||
2101 | | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] | ||
2102 | | sub RC, RC, BASE | ||
2103 | | st_vmstate ST_INTERP | ||
2104 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
2105 | | ins_callt | ||
2106 | |.endif | ||
2107 | | | ||
2108 | |->cont_ffi_callback: // Return from FFI callback. | ||
2109 | |.if FFI | ||
2110 | | ldr CTSTATE, GL->ctype_state | ||
2111 | | stp BASE, CARG4, L->base | ||
2112 | | str L, CTSTATE->L | ||
2113 | | mov CARG1, CTSTATE | ||
2114 | | mov CARG2, RA | ||
2115 | | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) | ||
2116 | | ldp x0, x1, CTSTATE->cb.gpr[0] | ||
2117 | | ldp d0, d1, CTSTATE->cb.fpr[0] | ||
2118 | | b ->vm_leave_unw | ||
2119 | |.endif | ||
2120 | | | ||
2121 | |->vm_ffi_call: // Call C function via FFI. | ||
2122 | | // Caveat: needs special frame unwinding, see below. | ||
2123 | |.if FFI | ||
2124 | | .type CCSTATE, CCallState, x19 | ||
2125 | | stp fp, lr, [sp, #-32]! | ||
2126 | | add fp, sp, #0 | ||
2127 | | str CCSTATE, [sp, #16] | ||
2128 | | mov CCSTATE, x0 | ||
2129 | | ldr TMP0w, CCSTATE:x0->spadj | ||
2130 | | ldrb TMP1w, CCSTATE->nsp | ||
2131 | | add TMP2, CCSTATE, #offsetof(CCallState, stack) | ||
2132 | | subs TMP1, TMP1, #1 | ||
2133 | | ldr TMP3, CCSTATE->func | ||
2134 | | sub sp, fp, TMP0 | ||
2135 | | bmi >2 | ||
2136 | |1: // Copy stack slots | ||
2137 | | ldr TMP0, [TMP2, TMP1, lsl #3] | ||
2138 | | str TMP0, [sp, TMP1, lsl #3] | ||
2139 | | subs TMP1, TMP1, #1 | ||
2140 | | bpl <1 | ||
2141 | |2: | ||
2142 | | ldp x0, x1, CCSTATE->gpr[0] | ||
2143 | | ldp d0, d1, CCSTATE->fpr[0] | ||
2144 | | ldp x2, x3, CCSTATE->gpr[2] | ||
2145 | | ldp d2, d3, CCSTATE->fpr[2] | ||
2146 | | ldp x4, x5, CCSTATE->gpr[4] | ||
2147 | | ldp d4, d5, CCSTATE->fpr[4] | ||
2148 | | ldp x6, x7, CCSTATE->gpr[6] | ||
2149 | | ldp d6, d7, CCSTATE->fpr[6] | ||
2150 | | ldr x8, CCSTATE->retp | ||
2151 | | blr TMP3 | ||
2152 | | mov sp, fp | ||
2153 | | stp x0, x1, CCSTATE->gpr[0] | ||
2154 | | stp d0, d1, CCSTATE->fpr[0] | ||
2155 | | stp d2, d3, CCSTATE->fpr[2] | ||
2156 | | ldr CCSTATE, [sp, #16] | ||
2157 | | ldp fp, lr, [sp], #32 | ||
2158 | | ret | ||
2159 | |.endif | ||
2160 | |// Note: vm_ffi_call must be the last function in this object file! | ||
2161 | | | ||
2162 | |//----------------------------------------------------------------------- | ||
2163 | } | ||
2164 | |||
2165 | /* Generate the code for a single instruction. */ | ||
2166 | static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ||
2167 | { | ||
2168 | int vk = 0; | ||
2169 | |=>defop: | ||
2170 | |||
2171 | switch (op) { | ||
2172 | |||
2173 | /* -- Comparison ops ---------------------------------------------------- */ | ||
2174 | |||
2175 | /* Remember: all ops branch for a true comparison, fall through otherwise. */ | ||
2176 | |||
2177 | case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: | ||
2178 | | // RA = src1, RC = src2, JMP with RC = target | ||
2179 | | ldr CARG1, [BASE, RA, lsl #3] | ||
2180 | | ldrh RBw, [PC, # OFS_RD] | ||
2181 | | ldr CARG2, [BASE, RC, lsl #3] | ||
2182 | | add PC, PC, #4 | ||
2183 | | add RB, PC, RB, lsl #2 | ||
2184 | | sub RB, RB, #0x20000 | ||
2185 | | checkint CARG1, >3 | ||
2186 | | checkint CARG2, >4 | ||
2187 | | cmp CARG1w, CARG2w | ||
2188 | if (op == BC_ISLT) { | ||
2189 | | csel PC, RB, PC, lt | ||
2190 | } else if (op == BC_ISGE) { | ||
2191 | | csel PC, RB, PC, ge | ||
2192 | } else if (op == BC_ISLE) { | ||
2193 | | csel PC, RB, PC, le | ||
2194 | } else { | ||
2195 | | csel PC, RB, PC, gt | ||
2196 | } | ||
2197 | |1: | ||
2198 | | ins_next | ||
2199 | | | ||
2200 | |3: // RA not int. | ||
2201 | | ldr FARG1, [BASE, RA, lsl #3] | ||
2202 | | blo ->vmeta_comp | ||
2203 | | ldr FARG2, [BASE, RC, lsl #3] | ||
2204 | | cmp TISNUMhi, CARG2, lsr #32 | ||
2205 | | bhi >5 | ||
2206 | | bne ->vmeta_comp | ||
2207 | | // RA number, RC int. | ||
2208 | | scvtf FARG2, CARG2w | ||
2209 | | b >5 | ||
2210 | | | ||
2211 | |4: // RA int, RC not int | ||
2212 | | ldr FARG2, [BASE, RC, lsl #3] | ||
2213 | | blo ->vmeta_comp | ||
2214 | | // RA int, RC number. | ||
2215 | | scvtf FARG1, CARG1w | ||
2216 | | | ||
2217 | |5: // RA number, RC number | ||
2218 | | fcmp FARG1, FARG2 | ||
2219 | | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. | ||
2220 | if (op == BC_ISLT) { | ||
2221 | | csel PC, RB, PC, lo | ||
2222 | } else if (op == BC_ISGE) { | ||
2223 | | csel PC, RB, PC, hs | ||
2224 | } else if (op == BC_ISLE) { | ||
2225 | | csel PC, RB, PC, ls | ||
2226 | } else { | ||
2227 | | csel PC, RB, PC, hi | ||
2228 | } | ||
2229 | | b <1 | ||
2230 | break; | ||
2231 | |||
2232 | case BC_ISEQV: case BC_ISNEV: | ||
2233 | vk = op == BC_ISEQV; | ||
2234 | | // RA = src1, RC = src2, JMP with RC = target | ||
2235 | | ldr CARG1, [BASE, RA, lsl #3] | ||
2236 | | add RC, BASE, RC, lsl #3 | ||
2237 | | ldrh RBw, [PC, # OFS_RD] | ||
2238 | | ldr CARG3, [RC] | ||
2239 | | add PC, PC, #4 | ||
2240 | | add RB, PC, RB, lsl #2 | ||
2241 | | sub RB, RB, #0x20000 | ||
2242 | | asr ITYPE, CARG3, #47 | ||
2243 | | cmn ITYPE, #-LJ_TISNUM | ||
2244 | if (vk) { | ||
2245 | | bls ->BC_ISEQN_Z | ||
2246 | } else { | ||
2247 | | bls ->BC_ISNEN_Z | ||
2248 | } | ||
2249 | | // RC is not a number. | ||
2250 | | asr TMP0, CARG1, #47 | ||
2251 | |.if FFI | ||
2252 | | // Check if RC or RA is a cdata. | ||
2253 | | cmn ITYPE, #-LJ_TCDATA | ||
2254 | | ccmn TMP0, #-LJ_TCDATA, #4, ne | ||
2255 | | beq ->vmeta_equal_cd | ||
2256 | |.endif | ||
2257 | | cmp CARG1, CARG3 | ||
2258 | | bne >2 | ||
2259 | | // Tag and value are equal. | ||
2260 | if (vk) { | ||
2261 | |->BC_ISEQV_Z: | ||
2262 | | mov PC, RB // Perform branch. | ||
2263 | } | ||
2264 | |1: | ||
2265 | | ins_next | ||
2266 | | | ||
2267 | |2: // Check if the tags are the same and it's a table or userdata. | ||
2268 | | cmp ITYPE, TMP0 | ||
2269 | | ccmn ITYPE, #-LJ_TISTABUD, #2, eq | ||
2270 | if (vk) { | ||
2271 | | bhi <1 | ||
2272 | } else { | ||
2273 | | bhi ->BC_ISEQV_Z // Reuse code from opposite instruction. | ||
2274 | } | ||
2275 | | // Different tables or userdatas. Need to check __eq metamethod. | ||
2276 | | // Field metatable must be at same offset for GCtab and GCudata! | ||
2277 | | and TAB:CARG2, CARG1, #LJ_GCVMASK | ||
2278 | | ldr TAB:TMP2, TAB:CARG2->metatable | ||
2279 | if (vk) { | ||
2280 | | cbz TAB:TMP2, <1 // No metatable? | ||
2281 | | ldrb TMP1w, TAB:TMP2->nomm | ||
2282 | | mov CARG4, #0 // ne = 0 | ||
2283 | | tbnz TMP1w, #MM_eq, <1 // 'no __eq' flag set: done. | ||
2284 | } else { | ||
2285 | | cbz TAB:TMP2, ->BC_ISEQV_Z // No metatable? | ||
2286 | | ldrb TMP1w, TAB:TMP2->nomm | ||
2287 | | mov CARG4, #1 // ne = 1. | ||
2288 | | tbnz TMP1w, #MM_eq, ->BC_ISEQV_Z // 'no __eq' flag set: done. | ||
2289 | } | ||
2290 | | b ->vmeta_equal | ||
2291 | break; | ||
2292 | |||
2293 | case BC_ISEQS: case BC_ISNES: | ||
2294 | vk = op == BC_ISEQS; | ||
2295 | | // RA = src, RC = str_const (~), JMP with RC = target | ||
2296 | | ldr CARG1, [BASE, RA, lsl #3] | ||
2297 | | mvn RC, RC | ||
2298 | | ldrh RBw, [PC, # OFS_RD] | ||
2299 | | ldr CARG2, [KBASE, RC, lsl #3] | ||
2300 | | add PC, PC, #4 | ||
2301 | | movn TMP0, #~LJ_TSTR | ||
2302 | |.if FFI | ||
2303 | | asr ITYPE, CARG1, #47 | ||
2304 | |.endif | ||
2305 | | add RB, PC, RB, lsl #2 | ||
2306 | | add CARG2, CARG2, TMP0, lsl #47 | ||
2307 | | sub RB, RB, #0x20000 | ||
2308 | |.if FFI | ||
2309 | | cmn ITYPE, #-LJ_TCDATA | ||
2310 | | beq ->vmeta_equal_cd | ||
2311 | |.endif | ||
2312 | | cmp CARG1, CARG2 | ||
2313 | if (vk) { | ||
2314 | | csel PC, RB, PC, eq | ||
2315 | } else { | ||
2316 | | csel PC, RB, PC, ne | ||
2317 | } | ||
2318 | | ins_next | ||
2319 | break; | ||
2320 | |||
2321 | case BC_ISEQN: case BC_ISNEN: | ||
2322 | vk = op == BC_ISEQN; | ||
2323 | | // RA = src, RC = num_const (~), JMP with RC = target | ||
2324 | | ldr CARG1, [BASE, RA, lsl #3] | ||
2325 | | add RC, KBASE, RC, lsl #3 | ||
2326 | | ldrh RBw, [PC, # OFS_RD] | ||
2327 | | ldr CARG3, [RC] | ||
2328 | | add PC, PC, #4 | ||
2329 | | add RB, PC, RB, lsl #2 | ||
2330 | | sub RB, RB, #0x20000 | ||
2331 | if (vk) { | ||
2332 | |->BC_ISEQN_Z: | ||
2333 | } else { | ||
2334 | |->BC_ISNEN_Z: | ||
2335 | } | ||
2336 | | checkint CARG1, >4 | ||
2337 | | checkint CARG3, >6 | ||
2338 | | cmp CARG1w, CARG3w | ||
2339 | |1: | ||
2340 | if (vk) { | ||
2341 | | csel PC, RB, PC, eq | ||
2342 | |2: | ||
2343 | } else { | ||
2344 | |2: | ||
2345 | | csel PC, RB, PC, ne | ||
2346 | } | ||
2347 | |3: | ||
2348 | | ins_next | ||
2349 | | | ||
2350 | |4: // RA not int. | ||
2351 | |.if FFI | ||
2352 | | blo >7 | ||
2353 | |.else | ||
2354 | | blo <2 | ||
2355 | |.endif | ||
2356 | | ldr FARG1, [BASE, RA, lsl #3] | ||
2357 | | ldr FARG2, [RC] | ||
2358 | | cmp TISNUMhi, CARG3, lsr #32 | ||
2359 | | bne >5 | ||
2360 | | // RA number, RC int. | ||
2361 | | scvtf FARG2, CARG3w | ||
2362 | |5: | ||
2363 | | // RA number, RC number. | ||
2364 | | fcmp FARG1, FARG2 | ||
2365 | | b <1 | ||
2366 | | | ||
2367 | |6: // RA int, RC number | ||
2368 | | ldr FARG2, [RC] | ||
2369 | | scvtf FARG1, CARG1w | ||
2370 | | fcmp FARG1, FARG2 | ||
2371 | | b <1 | ||
2372 | | | ||
2373 | |.if FFI | ||
2374 | |7: | ||
2375 | | asr ITYPE, CARG1, #47 | ||
2376 | | cmn ITYPE, #-LJ_TCDATA | ||
2377 | | bne <2 | ||
2378 | | b ->vmeta_equal_cd | ||
2379 | |.endif | ||
2380 | break; | ||
2381 | |||
2382 | case BC_ISEQP: case BC_ISNEP: | ||
2383 | vk = op == BC_ISEQP; | ||
2384 | | // RA = src, RC = primitive_type (~), JMP with RC = target | ||
2385 | | ldr TMP0, [BASE, RA, lsl #3] | ||
2386 | | ldrh RBw, [PC, # OFS_RD] | ||
2387 | | add PC, PC, #4 | ||
2388 | | add RC, RC, #1 | ||
2389 | | add RB, PC, RB, lsl #2 | ||
2390 | |.if FFI | ||
2391 | | asr ITYPE, TMP0, #47 | ||
2392 | | cmn ITYPE, #-LJ_TCDATA | ||
2393 | | beq ->vmeta_equal_cd | ||
2394 | | cmn RC, ITYPE | ||
2395 | |.else | ||
2396 | | cmn RC, TMP0, asr #47 | ||
2397 | |.endif | ||
2398 | | sub RB, RB, #0x20000 | ||
2399 | if (vk) { | ||
2400 | | csel PC, RB, PC, eq | ||
2401 | } else { | ||
2402 | | csel PC, RB, PC, ne | ||
2403 | } | ||
2404 | | ins_next | ||
2405 | break; | ||
2406 | |||
2407 | /* -- Unary test and copy ops ------------------------------------------- */ | ||
2408 | |||
2409 | case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: | ||
2410 | | // RA = dst or unused, RC = src, JMP with RC = target | ||
2411 | | ldrh RBw, [PC, # OFS_RD] | ||
2412 | | ldr TMP0, [BASE, RC, lsl #3] | ||
2413 | | add PC, PC, #4 | ||
2414 | | mov_false TMP1 | ||
2415 | | add RB, PC, RB, lsl #2 | ||
2416 | | cmp TMP0, TMP1 | ||
2417 | | sub RB, RB, #0x20000 | ||
2418 | if (op == BC_ISTC || op == BC_IST) { | ||
2419 | if (op == BC_ISTC) { | ||
2420 | | csel RA, RA, RC, lo | ||
2421 | } | ||
2422 | | csel PC, RB, PC, lo | ||
2423 | } else { | ||
2424 | if (op == BC_ISFC) { | ||
2425 | | csel RA, RA, RC, hs | ||
2426 | } | ||
2427 | | csel PC, RB, PC, hs | ||
2428 | } | ||
2429 | if (op == BC_ISTC || op == BC_ISFC) { | ||
2430 | | str TMP0, [BASE, RA, lsl #3] | ||
2431 | } | ||
2432 | | ins_next | ||
2433 | break; | ||
2434 | |||
2435 | case BC_ISTYPE: | ||
2436 | | // RA = src, RC = -type | ||
2437 | | ldr TMP0, [BASE, RA, lsl #3] | ||
2438 | | cmn RC, TMP0, asr #47 | ||
2439 | | bne ->vmeta_istype | ||
2440 | | ins_next | ||
2441 | break; | ||
2442 | case BC_ISNUM: | ||
2443 | | // RA = src, RC = -(TISNUM-1) | ||
2444 | | ldr TMP0, [BASE, RA] | ||
2445 | | checknum TMP0, ->vmeta_istype | ||
2446 | | ins_next | ||
2447 | break; | ||
2448 | |||
2449 | /* -- Unary ops --------------------------------------------------------- */ | ||
2450 | |||
2451 | case BC_MOV: | ||
2452 | | // RA = dst, RC = src | ||
2453 | | ldr TMP0, [BASE, RC, lsl #3] | ||
2454 | | str TMP0, [BASE, RA, lsl #3] | ||
2455 | | ins_next | ||
2456 | break; | ||
2457 | case BC_NOT: | ||
2458 | | // RA = dst, RC = src | ||
2459 | | ldr TMP0, [BASE, RC, lsl #3] | ||
2460 | | mov_false TMP1 | ||
2461 | | mov_true TMP2 | ||
2462 | | cmp TMP0, TMP1 | ||
2463 | | csel TMP0, TMP1, TMP2, lo | ||
2464 | | str TMP0, [BASE, RA, lsl #3] | ||
2465 | | ins_next | ||
2466 | break; | ||
2467 | case BC_UNM: | ||
2468 | | // RA = dst, RC = src | ||
2469 | | ldr TMP0, [BASE, RC, lsl #3] | ||
2470 | | asr ITYPE, TMP0, #47 | ||
2471 | | cmn ITYPE, #-LJ_TISNUM | ||
2472 | | bhi ->vmeta_unm | ||
2473 | | eor TMP0, TMP0, #U64x(80000000,00000000) | ||
2474 | | bne >5 | ||
2475 | | negs TMP0w, TMP0w | ||
2476 | | movz CARG3, #0x41e0, lsl #48 // 2^31. | ||
2477 | | add TMP0, TMP0, TISNUM | ||
2478 | | csel TMP0, TMP0, CARG3, vc | ||
2479 | |5: | ||
2480 | | str TMP0, [BASE, RA, lsl #3] | ||
2481 | | ins_next | ||
2482 | break; | ||
2483 | case BC_LEN: | ||
2484 | | // RA = dst, RC = src | ||
2485 | | ldr CARG1, [BASE, RC, lsl #3] | ||
2486 | | asr ITYPE, CARG1, #47 | ||
2487 | | cmn ITYPE, #-LJ_TSTR | ||
2488 | | and CARG1, CARG1, #LJ_GCVMASK | ||
2489 | | bne >2 | ||
2490 | | ldr CARG1w, STR:CARG1->len | ||
2491 | |1: | ||
2492 | | add CARG1, CARG1, TISNUM | ||
2493 | | str CARG1, [BASE, RA, lsl #3] | ||
2494 | | ins_next | ||
2495 | | | ||
2496 | |2: | ||
2497 | | cmn ITYPE, #-LJ_TTAB | ||
2498 | | bne ->vmeta_len | ||
2499 | #if LJ_52 | ||
2500 | | ldr TAB:CARG2, TAB:CARG1->metatable | ||
2501 | | cbnz TAB:CARG2, >9 | ||
2502 | |3: | ||
2503 | #endif | ||
2504 | |->BC_LEN_Z: | ||
2505 | | bl extern lj_tab_len // (GCtab *t) | ||
2506 | | // Returns uint32_t (but less than 2^31). | ||
2507 | | b <1 | ||
2508 | | | ||
2509 | #if LJ_52 | ||
2510 | |9: | ||
2511 | | ldrb TMP1w, TAB:CARG2->nomm | ||
2512 | | tbnz TMP1w, #MM_len, <3 // 'no __len' flag set: done. | ||
2513 | | b ->vmeta_len | ||
2514 | #endif | ||
2515 | break; | ||
2516 | |||
2517 | /* -- Binary ops -------------------------------------------------------- */ | ||
2518 | |||
2519 | |.macro ins_arithcheck_int, target | ||
2520 | | checkint CARG1, target | ||
2521 | | checkint CARG2, target | ||
2522 | |.endmacro | ||
2523 | | | ||
2524 | |.macro ins_arithcheck_num, target | ||
2525 | | checknum CARG1, target | ||
2526 | | checknum CARG2, target | ||
2527 | |.endmacro | ||
2528 | | | ||
2529 | |.macro ins_arithcheck_nzdiv, target | ||
2530 | | cbz CARG2w, target | ||
2531 | |.endmacro | ||
2532 | | | ||
2533 | |.macro ins_arithhead | ||
2534 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); | ||
2535 | ||if (vk == 1) { | ||
2536 | | and RC, RC, #255 | ||
2537 | | decode_RB RB, INS | ||
2538 | ||} else { | ||
2539 | | decode_RB RB, INS | ||
2540 | | and RC, RC, #255 | ||
2541 | ||} | ||
2542 | |.endmacro | ||
2543 | | | ||
2544 | |.macro ins_arithload, reg1, reg2 | ||
2545 | | // RA = dst, RB = src1, RC = src2 | num_const | ||
2546 | ||switch (vk) { | ||
2547 | ||case 0: | ||
2548 | | ldr reg1, [BASE, RB, lsl #3] | ||
2549 | | ldr reg2, [KBASE, RC, lsl #3] | ||
2550 | || break; | ||
2551 | ||case 1: | ||
2552 | | ldr reg1, [KBASE, RC, lsl #3] | ||
2553 | | ldr reg2, [BASE, RB, lsl #3] | ||
2554 | || break; | ||
2555 | ||default: | ||
2556 | | ldr reg1, [BASE, RB, lsl #3] | ||
2557 | | ldr reg2, [BASE, RC, lsl #3] | ||
2558 | || break; | ||
2559 | ||} | ||
2560 | |.endmacro | ||
2561 | | | ||
2562 | |.macro ins_arithfallback, ins | ||
2563 | ||switch (vk) { | ||
2564 | ||case 0: | ||
2565 | | ins ->vmeta_arith_vn | ||
2566 | || break; | ||
2567 | ||case 1: | ||
2568 | | ins ->vmeta_arith_nv | ||
2569 | || break; | ||
2570 | ||default: | ||
2571 | | ins ->vmeta_arith_vv | ||
2572 | || break; | ||
2573 | ||} | ||
2574 | |.endmacro | ||
2575 | | | ||
2576 | |.macro ins_arithmod, res, reg1, reg2 | ||
2577 | | fdiv d2, reg1, reg2 | ||
2578 | | frintm d2, d2 | ||
2579 | | fmsub res, d2, reg2, reg1 | ||
2580 | |.endmacro | ||
2581 | | | ||
2582 | |.macro ins_arithdn, intins, fpins | ||
2583 | | ins_arithhead | ||
2584 | | ins_arithload CARG1, CARG2 | ||
2585 | | ins_arithcheck_int >5 | ||
2586 | |.if "intins" == "smull" | ||
2587 | | smull CARG1, CARG1w, CARG2w | ||
2588 | | cmp CARG1, CARG1, sxtw | ||
2589 | | mov CARG1w, CARG1w | ||
2590 | | ins_arithfallback bne | ||
2591 | |.elif "intins" == "ins_arithmodi" | ||
2592 | | ins_arithfallback ins_arithcheck_nzdiv | ||
2593 | | bl ->vm_modi | ||
2594 | |.else | ||
2595 | | intins CARG1w, CARG1w, CARG2w | ||
2596 | | ins_arithfallback bvs | ||
2597 | |.endif | ||
2598 | | add CARG1, CARG1, TISNUM | ||
2599 | | str CARG1, [BASE, RA, lsl #3] | ||
2600 | |4: | ||
2601 | | ins_next | ||
2602 | | | ||
2603 | |5: // FP variant. | ||
2604 | | ins_arithload FARG1, FARG2 | ||
2605 | | ins_arithfallback ins_arithcheck_num | ||
2606 | | fpins FARG1, FARG1, FARG2 | ||
2607 | | str FARG1, [BASE, RA, lsl #3] | ||
2608 | | b <4 | ||
2609 | |.endmacro | ||
2610 | | | ||
2611 | |.macro ins_arithfp, fpins | ||
2612 | | ins_arithhead | ||
2613 | | ins_arithload CARG1, CARG2 | ||
2614 | | ins_arithload FARG1, FARG2 | ||
2615 | | ins_arithfallback ins_arithcheck_num | ||
2616 | |.if "fpins" == "fpow" | ||
2617 | | bl extern pow | ||
2618 | |.else | ||
2619 | | fpins FARG1, FARG1, FARG2 | ||
2620 | |.endif | ||
2621 | | str FARG1, [BASE, RA, lsl #3] | ||
2622 | | ins_next | ||
2623 | |.endmacro | ||
2624 | |||
2625 | case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: | ||
2626 | | ins_arithdn adds, fadd | ||
2627 | break; | ||
2628 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: | ||
2629 | | ins_arithdn subs, fsub | ||
2630 | break; | ||
2631 | case BC_MULVN: case BC_MULNV: case BC_MULVV: | ||
2632 | | ins_arithdn smull, fmul | ||
2633 | break; | ||
2634 | case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: | ||
2635 | | ins_arithfp fdiv | ||
2636 | break; | ||
2637 | case BC_MODVN: case BC_MODNV: case BC_MODVV: | ||
2638 | | ins_arithdn ins_arithmodi, ins_arithmod | ||
2639 | break; | ||
2640 | case BC_POW: | ||
2641 | | // NYI: (partial) integer arithmetic. | ||
2642 | | ins_arithfp fpow | ||
2643 | break; | ||
2644 | |||
2645 | case BC_CAT: | ||
2646 | | decode_RB RB, INS | ||
2647 | | and RC, RC, #255 | ||
2648 | | // RA = dst, RB = src_start, RC = src_end | ||
2649 | | str BASE, L->base | ||
2650 | | sub CARG3, RC, RB | ||
2651 | | add CARG2, BASE, RC, lsl #3 | ||
2652 | |->BC_CAT_Z: | ||
2653 | | // RA = dst, CARG2 = top-1, CARG3 = left | ||
2654 | | mov CARG1, L | ||
2655 | | str PC, SAVE_PC | ||
2656 | | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left) | ||
2657 | | // Returns NULL (finished) or TValue * (metamethod). | ||
2658 | | ldrb RBw, [PC, #-4+OFS_RB] | ||
2659 | | ldr BASE, L->base | ||
2660 | | cbnz CRET1, ->vmeta_binop | ||
2661 | | ldr TMP0, [BASE, RB, lsl #3] | ||
2662 | | str TMP0, [BASE, RA, lsl #3] // Copy result to RA. | ||
2663 | | ins_next | ||
2664 | break; | ||
2665 | |||
2666 | /* -- Constant ops ------------------------------------------------------ */ | ||
2667 | |||
2668 | case BC_KSTR: | ||
2669 | | // RA = dst, RC = str_const (~) | ||
2670 | | mvn RC, RC | ||
2671 | | ldr TMP0, [KBASE, RC, lsl #3] | ||
2672 | | movn TMP1, #~LJ_TSTR | ||
2673 | | add TMP0, TMP0, TMP1, lsl #47 | ||
2674 | | str TMP0, [BASE, RA, lsl #3] | ||
2675 | | ins_next | ||
2676 | break; | ||
2677 | case BC_KCDATA: | ||
2678 | |.if FFI | ||
2679 | | // RA = dst, RC = cdata_const (~) | ||
2680 | | mvn RC, RC | ||
2681 | | ldr TMP0, [KBASE, RC, lsl #3] | ||
2682 | | movn TMP1, #~LJ_TCDATA | ||
2683 | | add TMP0, TMP0, TMP1, lsl #47 | ||
2684 | | str TMP0, [BASE, RA, lsl #3] | ||
2685 | | ins_next | ||
2686 | |.endif | ||
2687 | break; | ||
2688 | case BC_KSHORT: | ||
2689 | | // RA = dst, RC = int16_literal | ||
2690 | | sxth RCw, RCw | ||
2691 | | add TMP0, RC, TISNUM | ||
2692 | | str TMP0, [BASE, RA, lsl #3] | ||
2693 | | ins_next | ||
2694 | break; | ||
2695 | case BC_KNUM: | ||
2696 | | // RA = dst, RC = num_const | ||
2697 | | ldr TMP0, [KBASE, RC, lsl #3] | ||
2698 | | str TMP0, [BASE, RA, lsl #3] | ||
2699 | | ins_next | ||
2700 | break; | ||
2701 | case BC_KPRI: | ||
2702 | | // RA = dst, RC = primitive_type (~) | ||
2703 | | mvn TMP0, RC, lsl #47 | ||
2704 | | str TMP0, [BASE, RA, lsl #3] | ||
2705 | | ins_next | ||
2706 | break; | ||
2707 | case BC_KNIL: | ||
2708 | | // RA = base, RC = end | ||
2709 | | add RA, BASE, RA, lsl #3 | ||
2710 | | add RC, BASE, RC, lsl #3 | ||
2711 | | str TISNIL, [RA], #8 | ||
2712 | |1: | ||
2713 | | cmp RA, RC | ||
2714 | | str TISNIL, [RA], #8 | ||
2715 | | blt <1 | ||
2716 | | ins_next_ | ||
2717 | break; | ||
2718 | |||
2719 | /* -- Upvalue and function ops ------------------------------------------ */ | ||
2720 | |||
2721 | case BC_UGET: | ||
2722 | | // RA = dst, RC = uvnum | ||
2723 | | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] | ||
2724 | | add RC, RC, #offsetof(GCfuncL, uvptr)/8 | ||
2725 | | and LFUNC:CARG2, CARG2, #LJ_GCVMASK | ||
2726 | | ldr UPVAL:CARG2, [LFUNC:CARG2, RC, lsl #3] | ||
2727 | | ldr CARG2, UPVAL:CARG2->v | ||
2728 | | ldr TMP0, [CARG2] | ||
2729 | | str TMP0, [BASE, RA, lsl #3] | ||
2730 | | ins_next | ||
2731 | break; | ||
2732 | case BC_USETV: | ||
2733 | | // RA = uvnum, RC = src | ||
2734 | | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] | ||
2735 | | add RA, RA, #offsetof(GCfuncL, uvptr)/8 | ||
2736 | | and LFUNC:CARG2, CARG2, #LJ_GCVMASK | ||
2737 | | ldr UPVAL:CARG1, [LFUNC:CARG2, RA, lsl #3] | ||
2738 | | ldr CARG3, [BASE, RC, lsl #3] | ||
2739 | | ldr CARG2, UPVAL:CARG1->v | ||
2740 | | ldrb TMP2w, UPVAL:CARG1->marked | ||
2741 | | ldrb TMP0w, UPVAL:CARG1->closed | ||
2742 | | asr ITYPE, CARG3, #47 | ||
2743 | | str CARG3, [CARG2] | ||
2744 | | add ITYPE, ITYPE, #-LJ_TISGCV | ||
2745 | | tst TMP2w, #LJ_GC_BLACK // isblack(uv) | ||
2746 | | ccmp TMP0w, #0, #4, ne // && uv->closed | ||
2747 | | ccmn ITYPE, #-(LJ_TNUMX - LJ_TISGCV), #0, ne // && tvisgcv(v) | ||
2748 | | bhi >2 | ||
2749 | |1: | ||
2750 | | ins_next | ||
2751 | | | ||
2752 | |2: // Check if new value is white. | ||
2753 | | and GCOBJ:CARG3, CARG3, #LJ_GCVMASK | ||
2754 | | ldrb TMP1w, GCOBJ:CARG3->gch.marked | ||
2755 | | tst TMP1w, #LJ_GC_WHITES // iswhite(str) | ||
2756 | | beq <1 | ||
2757 | | // Crossed a write barrier. Move the barrier forward. | ||
2758 | | mov CARG1, GL | ||
2759 | | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) | ||
2760 | | b <1 | ||
2761 | break; | ||
2762 | case BC_USETS: | ||
2763 | | // RA = uvnum, RC = str_const (~) | ||
2764 | | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] | ||
2765 | | add RA, RA, #offsetof(GCfuncL, uvptr)/8 | ||
2766 | | mvn RC, RC | ||
2767 | | and LFUNC:CARG2, CARG2, #LJ_GCVMASK | ||
2768 | | ldr UPVAL:CARG1, [LFUNC:CARG2, RA, lsl #3] | ||
2769 | | ldr STR:CARG3, [KBASE, RC, lsl #3] | ||
2770 | | movn TMP0, #~LJ_TSTR | ||
2771 | | ldr CARG2, UPVAL:CARG1->v | ||
2772 | | ldrb TMP2w, UPVAL:CARG1->marked | ||
2773 | | add TMP0, STR:CARG3, TMP0, lsl #47 | ||
2774 | | ldrb TMP1w, STR:CARG3->marked | ||
2775 | | str TMP0, [CARG2] | ||
2776 | | tbnz TMP2w, #2, >2 // isblack(uv) | ||
2777 | |1: | ||
2778 | | ins_next | ||
2779 | | | ||
2780 | |2: // Check if string is white and ensure upvalue is closed. | ||
2781 | | ldrb TMP0w, UPVAL:CARG1->closed | ||
2782 | | tst TMP1w, #LJ_GC_WHITES // iswhite(str) | ||
2783 | | ccmp TMP0w, #0, #4, ne | ||
2784 | | beq <1 | ||
2785 | | // Crossed a write barrier. Move the barrier forward. | ||
2786 | | mov CARG1, GL | ||
2787 | | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) | ||
2788 | | b <1 | ||
2789 | break; | ||
2790 | case BC_USETN: | ||
2791 | | // RA = uvnum, RC = num_const | ||
2792 | | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] | ||
2793 | | add RA, RA, #offsetof(GCfuncL, uvptr)/8 | ||
2794 | | and LFUNC:CARG2, CARG2, #LJ_GCVMASK | ||
2795 | | ldr UPVAL:CARG2, [LFUNC:CARG2, RA, lsl #3] | ||
2796 | | ldr TMP0, [KBASE, RC, lsl #3] | ||
2797 | | ldr CARG2, UPVAL:CARG2->v | ||
2798 | | str TMP0, [CARG2] | ||
2799 | | ins_next | ||
2800 | break; | ||
2801 | case BC_USETP: | ||
2802 | | // RA = uvnum, RC = primitive_type (~) | ||
2803 | | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] | ||
2804 | | add RA, RA, #offsetof(GCfuncL, uvptr)/8 | ||
2805 | | and LFUNC:CARG2, CARG2, #LJ_GCVMASK | ||
2806 | | ldr UPVAL:CARG2, [LFUNC:CARG2, RA, lsl #3] | ||
2807 | | mvn TMP0, RC, lsl #47 | ||
2808 | | ldr CARG2, UPVAL:CARG2->v | ||
2809 | | str TMP0, [CARG2] | ||
2810 | | ins_next | ||
2811 | break; | ||
2812 | |||
2813 | case BC_UCLO: | ||
2814 | | // RA = level, RC = target | ||
2815 | | ldr CARG3, L->openupval | ||
2816 | | add RC, PC, RC, lsl #2 | ||
2817 | | str BASE, L->base | ||
2818 | | sub PC, RC, #0x20000 | ||
2819 | | cbz CARG3, >1 | ||
2820 | | mov CARG1, L | ||
2821 | | add CARG2, BASE, RA, lsl #3 | ||
2822 | | bl extern lj_func_closeuv // (lua_State *L, TValue *level) | ||
2823 | | ldr BASE, L->base | ||
2824 | |1: | ||
2825 | | ins_next | ||
2826 | break; | ||
2827 | |||
2828 | case BC_FNEW: | ||
2829 | | // RA = dst, RC = proto_const (~) (holding function prototype) | ||
2830 | | mvn RC, RC | ||
2831 | | str BASE, L->base | ||
2832 | | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] | ||
2833 | | str PC, SAVE_PC | ||
2834 | | ldr CARG2, [KBASE, RC, lsl #3] | ||
2835 | | mov CARG1, L | ||
2836 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
2837 | | // (lua_State *L, GCproto *pt, GCfuncL *parent) | ||
2838 | | bl extern lj_func_newL_gc | ||
2839 | | // Returns GCfuncL *. | ||
2840 | | ldr BASE, L->base | ||
2841 | | movn TMP0, #~LJ_TFUNC | ||
2842 | | add CRET1, CRET1, TMP0, lsl #47 | ||
2843 | | str CRET1, [BASE, RA, lsl #3] | ||
2844 | | ins_next | ||
2845 | break; | ||
2846 | |||
2847 | /* -- Table ops --------------------------------------------------------- */ | ||
2848 | |||
2849 | case BC_TNEW: | ||
2850 | case BC_TDUP: | ||
2851 | | // RA = dst, RC = (hbits|asize) | tab_const (~) | ||
2852 | | ldp CARG3, CARG4, GL->gc.total // Assumes threshold follows total. | ||
2853 | | str BASE, L->base | ||
2854 | | str PC, SAVE_PC | ||
2855 | | mov CARG1, L | ||
2856 | | cmp CARG3, CARG4 | ||
2857 | | bhs >5 | ||
2858 | |1: | ||
2859 | if (op == BC_TNEW) { | ||
2860 | | and CARG2, RC, #0x7ff | ||
2861 | | lsr CARG3, RC, #11 | ||
2862 | | cmp CARG2, #0x7ff | ||
2863 | | mov TMP0, #0x801 | ||
2864 | | csel CARG2, CARG2, TMP0, ne | ||
2865 | | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) | ||
2866 | | // Returns GCtab *. | ||
2867 | } else { | ||
2868 | | mvn RC, RC | ||
2869 | | ldr CARG2, [KBASE, RC, lsl #3] | ||
2870 | | bl extern lj_tab_dup // (lua_State *L, Table *kt) | ||
2871 | | // Returns GCtab *. | ||
2872 | } | ||
2873 | | ldr BASE, L->base | ||
2874 | | movk CRET1, #(LJ_TTAB>>1)&0xffff, lsl #48 | ||
2875 | | str CRET1, [BASE, RA, lsl #3] | ||
2876 | | ins_next | ||
2877 | | | ||
2878 | |5: | ||
2879 | | bl extern lj_gc_step_fixtop // (lua_State *L) | ||
2880 | | mov CARG1, L | ||
2881 | | b <1 | ||
2882 | break; | ||
2883 | |||
2884 | case BC_GGET: | ||
2885 | | // RA = dst, RC = str_const (~) | ||
2886 | case BC_GSET: | ||
2887 | | // RA = dst, RC = str_const (~) | ||
2888 | | ldr LFUNC:CARG1, [BASE, FRAME_FUNC] | ||
2889 | | mvn RC, RC | ||
2890 | | and LFUNC:CARG1, CARG1, #LJ_GCVMASK | ||
2891 | | ldr TAB:CARG2, LFUNC:CARG1->env | ||
2892 | | ldr STR:RC, [KBASE, RC, lsl #3] | ||
2893 | if (op == BC_GGET) { | ||
2894 | | b ->BC_TGETS_Z | ||
2895 | } else { | ||
2896 | | b ->BC_TSETS_Z | ||
2897 | } | ||
2898 | break; | ||
2899 | |||
2900 | case BC_TGETV: | ||
2901 | | decode_RB RB, INS | ||
2902 | | and RC, RC, #255 | ||
2903 | | // RA = dst, RB = table, RC = key | ||
2904 | | ldr CARG2, [BASE, RB, lsl #3] | ||
2905 | | ldr TMP1, [BASE, RC, lsl #3] | ||
2906 | | checktab CARG2, ->vmeta_tgetv | ||
2907 | | checkint TMP1, >9 // Integer key? | ||
2908 | | ldr CARG3, TAB:CARG2->array | ||
2909 | | ldr CARG1w, TAB:CARG2->asize | ||
2910 | | add CARG3, CARG3, TMP1, uxtw #3 | ||
2911 | | cmp TMP1w, CARG1w // In array part? | ||
2912 | | bhs ->vmeta_tgetv | ||
2913 | | ldr TMP0, [CARG3] | ||
2914 | | cmp TMP0, TISNIL | ||
2915 | | beq >5 | ||
2916 | |1: | ||
2917 | | str TMP0, [BASE, RA, lsl #3] | ||
2918 | | ins_next | ||
2919 | | | ||
2920 | |5: // Check for __index if table value is nil. | ||
2921 | | ldr TAB:CARG1, TAB:CARG2->metatable | ||
2922 | | cbz TAB:CARG1, <1 // No metatable: done. | ||
2923 | | ldrb TMP1w, TAB:CARG1->nomm | ||
2924 | | tbnz TMP1w, #MM_index, <1 // 'no __index' flag set: done. | ||
2925 | | b ->vmeta_tgetv | ||
2926 | | | ||
2927 | |9: | ||
2928 | | asr ITYPE, TMP1, #47 | ||
2929 | | cmn ITYPE, #-LJ_TSTR // String key? | ||
2930 | | bne ->vmeta_tgetv | ||
2931 | | and STR:RC, TMP1, #LJ_GCVMASK | ||
2932 | | b ->BC_TGETS_Z | ||
2933 | break; | ||
2934 | case BC_TGETS: | ||
2935 | | decode_RB RB, INS | ||
2936 | | and RC, RC, #255 | ||
2937 | | // RA = dst, RB = table, RC = str_const (~) | ||
2938 | | ldr CARG2, [BASE, RB, lsl #3] | ||
2939 | | mvn RC, RC | ||
2940 | | ldr STR:RC, [KBASE, RC, lsl #3] | ||
2941 | | checktab CARG2, ->vmeta_tgets1 | ||
2942 | |->BC_TGETS_Z: | ||
2943 | | // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = dst | ||
2944 | | ldr TMP1w, TAB:CARG2->hmask | ||
2945 | | ldr TMP2w, STR:RC->hash | ||
2946 | | ldr NODE:CARG3, TAB:CARG2->node | ||
2947 | | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask | ||
2948 | | add TMP1, TMP1, TMP1, lsl #1 | ||
2949 | | movn CARG4, #~LJ_TSTR | ||
2950 | | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8 | ||
2951 | | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for. | ||
2952 | |1: | ||
2953 | | ldp TMP0, CARG1, NODE:CARG3->val | ||
2954 | | ldr NODE:CARG3, NODE:CARG3->next | ||
2955 | | cmp CARG1, CARG4 | ||
2956 | | bne >4 | ||
2957 | | cmp TMP0, TISNIL | ||
2958 | | beq >5 | ||
2959 | |3: | ||
2960 | | str TMP0, [BASE, RA, lsl #3] | ||
2961 | | ins_next | ||
2962 | | | ||
2963 | |4: // Follow hash chain. | ||
2964 | | cbnz NODE:CARG3, <1 | ||
2965 | | // End of hash chain: key not found, nil result. | ||
2966 | | mov TMP0, TISNIL | ||
2967 | | | ||
2968 | |5: // Check for __index if table value is nil. | ||
2969 | | ldr TAB:CARG1, TAB:CARG2->metatable | ||
2970 | | cbz TAB:CARG1, <3 // No metatable: done. | ||
2971 | | ldrb TMP1w, TAB:CARG1->nomm | ||
2972 | | tbnz TMP1w, #MM_index, <3 // 'no __index' flag set: done. | ||
2973 | | b ->vmeta_tgets | ||
2974 | break; | ||
2975 | case BC_TGETB: | ||
2976 | | decode_RB RB, INS | ||
2977 | | and RC, RC, #255 | ||
2978 | | // RA = dst, RB = table, RC = index | ||
2979 | | ldr CARG2, [BASE, RB, lsl #3] | ||
2980 | | checktab CARG2, ->vmeta_tgetb | ||
2981 | | ldr CARG3, TAB:CARG2->array | ||
2982 | | ldr CARG1w, TAB:CARG2->asize | ||
2983 | | add CARG3, CARG3, RC, lsl #3 | ||
2984 | | cmp RCw, CARG1w // In array part? | ||
2985 | | bhs ->vmeta_tgetb | ||
2986 | | ldr TMP0, [CARG3] | ||
2987 | | cmp TMP0, TISNIL | ||
2988 | | beq >5 | ||
2989 | |1: | ||
2990 | | str TMP0, [BASE, RA, lsl #3] | ||
2991 | | ins_next | ||
2992 | | | ||
2993 | |5: // Check for __index if table value is nil. | ||
2994 | | ldr TAB:CARG1, TAB:CARG2->metatable | ||
2995 | | cbz TAB:CARG1, <1 // No metatable: done. | ||
2996 | | ldrb TMP1w, TAB:CARG1->nomm | ||
2997 | | tbnz TMP1w, #MM_index, <1 // 'no __index' flag set: done. | ||
2998 | | b ->vmeta_tgetb | ||
2999 | break; | ||
3000 | case BC_TGETR: | ||
3001 | | decode_RB RB, INS | ||
3002 | | and RC, RC, #255 | ||
3003 | | // RA = dst, RB = table, RC = key | ||
3004 | | ldr CARG1, [BASE, RB, lsl #3] | ||
3005 | | ldr TMP1, [BASE, RC, lsl #3] | ||
3006 | | and TAB:CARG1, CARG1, #LJ_GCVMASK | ||
3007 | | ldr CARG3, TAB:CARG1->array | ||
3008 | | ldr TMP2w, TAB:CARG1->asize | ||
3009 | | add CARG3, CARG3, TMP1w, uxtw #3 | ||
3010 | | cmp TMP1w, TMP2w // In array part? | ||
3011 | | bhs ->vmeta_tgetr | ||
3012 | | ldr TMP0, [CARG3] | ||
3013 | |->BC_TGETR_Z: | ||
3014 | | str TMP0, [BASE, RA, lsl #3] | ||
3015 | | ins_next | ||
3016 | break; | ||
3017 | |||
3018 | case BC_TSETV: | ||
3019 | | decode_RB RB, INS | ||
3020 | | and RC, RC, #255 | ||
3021 | | // RA = src, RB = table, RC = key | ||
3022 | | ldr CARG2, [BASE, RB, lsl #3] | ||
3023 | | ldr TMP1, [BASE, RC, lsl #3] | ||
3024 | | checktab CARG2, ->vmeta_tsetv | ||
3025 | | checkint TMP1, >9 // Integer key? | ||
3026 | | ldr CARG3, TAB:CARG2->array | ||
3027 | | ldr CARG1w, TAB:CARG2->asize | ||
3028 | | add CARG3, CARG3, TMP1, uxtw #3 | ||
3029 | | cmp TMP1w, CARG1w // In array part? | ||
3030 | | bhs ->vmeta_tsetv | ||
3031 | | ldr TMP1, [CARG3] | ||
3032 | | ldr TMP0, [BASE, RA, lsl #3] | ||
3033 | | ldrb TMP2w, TAB:CARG2->marked | ||
3034 | | cmp TMP1, TISNIL // Previous value is nil? | ||
3035 | | beq >5 | ||
3036 | |1: | ||
3037 | | str TMP0, [CARG3] | ||
3038 | | tbnz TMP2w, #2, >7 // isblack(table) | ||
3039 | |2: | ||
3040 | | ins_next | ||
3041 | | | ||
3042 | |5: // Check for __newindex if previous value is nil. | ||
3043 | | ldr TAB:CARG1, TAB:CARG2->metatable | ||
3044 | | cbz TAB:CARG1, <1 // No metatable: done. | ||
3045 | | ldrb TMP1w, TAB:CARG1->nomm | ||
3046 | | tbnz TMP1w, #MM_newindex, <1 // 'no __newindex' flag set: done. | ||
3047 | | b ->vmeta_tsetv | ||
3048 | | | ||
3049 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
3050 | | barrierback TAB:CARG2, TMP2w, TMP1 | ||
3051 | | b <2 | ||
3052 | | | ||
3053 | |9: | ||
3054 | | asr ITYPE, TMP1, #47 | ||
3055 | | cmn ITYPE, #-LJ_TSTR // String key? | ||
3056 | | bne ->vmeta_tsetv | ||
3057 | | and STR:RC, TMP1, #LJ_GCVMASK | ||
3058 | | b ->BC_TSETS_Z | ||
3059 | break; | ||
3060 | case BC_TSETS: | ||
3061 | | decode_RB RB, INS | ||
3062 | | and RC, RC, #255 | ||
3063 | | // RA = dst, RB = table, RC = str_const (~) | ||
3064 | | ldr CARG2, [BASE, RB, lsl #3] | ||
3065 | | mvn RC, RC | ||
3066 | | ldr STR:RC, [KBASE, RC, lsl #3] | ||
3067 | | checktab CARG2, ->vmeta_tsets1 | ||
3068 | |->BC_TSETS_Z: | ||
3069 | | // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = src | ||
3070 | | ldr TMP1w, TAB:CARG2->hmask | ||
3071 | | ldr TMP2w, STR:RC->hash | ||
3072 | | ldr NODE:CARG3, TAB:CARG2->node | ||
3073 | | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask | ||
3074 | | add TMP1, TMP1, TMP1, lsl #1 | ||
3075 | | movn CARG4, #~LJ_TSTR | ||
3076 | | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8 | ||
3077 | | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for. | ||
3078 | | strb wzr, TAB:CARG2->nomm // Clear metamethod cache. | ||
3079 | |1: | ||
3080 | | ldp TMP1, CARG1, NODE:CARG3->val | ||
3081 | | ldr NODE:TMP3, NODE:CARG3->next | ||
3082 | | ldrb TMP2w, TAB:CARG2->marked | ||
3083 | | cmp CARG1, CARG4 | ||
3084 | | bne >5 | ||
3085 | | ldr TMP0, [BASE, RA, lsl #3] | ||
3086 | | cmp TMP1, TISNIL // Previous value is nil? | ||
3087 | | beq >4 | ||
3088 | |2: | ||
3089 | | str TMP0, NODE:CARG3->val | ||
3090 | | tbnz TMP2w, #2, >7 // isblack(table) | ||
3091 | |3: | ||
3092 | | ins_next | ||
3093 | | | ||
3094 | |4: // Check for __newindex if previous value is nil. | ||
3095 | | ldr TAB:CARG1, TAB:CARG2->metatable | ||
3096 | | cbz TAB:CARG1, <2 // No metatable: done. | ||
3097 | | ldrb TMP1w, TAB:CARG1->nomm | ||
3098 | | tbnz TMP1w, #MM_newindex, <2 // 'no __newindex' flag set: done. | ||
3099 | | b ->vmeta_tsets | ||
3100 | | | ||
3101 | |5: // Follow hash chain. | ||
3102 | | mov NODE:CARG3, NODE:TMP3 | ||
3103 | | cbnz NODE:TMP3, <1 | ||
3104 | | // End of hash chain: key not found, add a new one. | ||
3105 | | | ||
3106 | | // But check for __newindex first. | ||
3107 | | ldr TAB:CARG1, TAB:CARG2->metatable | ||
3108 | | cbz TAB:CARG1, >6 // No metatable: continue. | ||
3109 | | ldrb TMP1w, TAB:CARG1->nomm | ||
3110 | | // 'no __newindex' flag NOT set: check. | ||
3111 | | tbz TMP1w, #MM_newindex, ->vmeta_tsets | ||
3112 | |6: | ||
3113 | | movn TMP1, #~LJ_TSTR | ||
3114 | | str PC, SAVE_PC | ||
3115 | | add TMP0, STR:RC, TMP1, lsl #47 | ||
3116 | | str BASE, L->base | ||
3117 | | mov CARG1, L | ||
3118 | | str TMP0, TMPD | ||
3119 | | add CARG3, sp, TMPDofs | ||
3120 | | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) | ||
3121 | | // Returns TValue *. | ||
3122 | | ldr BASE, L->base | ||
3123 | | ldr TMP0, [BASE, RA, lsl #3] | ||
3124 | | str TMP0, [CRET1] | ||
3125 | | b <3 // No 2nd write barrier needed. | ||
3126 | | | ||
3127 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
3128 | | barrierback TAB:CARG2, TMP2w, TMP1 | ||
3129 | | b <3 | ||
3130 | break; | ||
3131 | case BC_TSETB: | ||
3132 | | decode_RB RB, INS | ||
3133 | | and RC, RC, #255 | ||
3134 | | // RA = src, RB = table, RC = index | ||
3135 | | ldr CARG2, [BASE, RB, lsl #3] | ||
3136 | | checktab CARG2, ->vmeta_tsetb | ||
3137 | | ldr CARG3, TAB:CARG2->array | ||
3138 | | ldr CARG1w, TAB:CARG2->asize | ||
3139 | | add CARG3, CARG3, RC, lsl #3 | ||
3140 | | cmp RCw, CARG1w // In array part? | ||
3141 | | bhs ->vmeta_tsetb | ||
3142 | | ldr TMP1, [CARG3] | ||
3143 | | ldr TMP0, [BASE, RA, lsl #3] | ||
3144 | | ldrb TMP2w, TAB:CARG2->marked | ||
3145 | | cmp TMP1, TISNIL // Previous value is nil? | ||
3146 | | beq >5 | ||
3147 | |1: | ||
3148 | | str TMP0, [CARG3] | ||
3149 | | tbnz TMP2w, #2, >7 // isblack(table) | ||
3150 | |2: | ||
3151 | | ins_next | ||
3152 | | | ||
3153 | |5: // Check for __newindex if previous value is nil. | ||
3154 | | ldr TAB:CARG1, TAB:CARG2->metatable | ||
3155 | | cbz TAB:CARG1, <1 // No metatable: done. | ||
3156 | | ldrb TMP1w, TAB:CARG1->nomm | ||
3157 | | tbnz TMP1w, #MM_newindex, <1 // 'no __newindex' flag set: done. | ||
3158 | | b ->vmeta_tsetb | ||
3159 | | | ||
3160 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
3161 | | barrierback TAB:CARG2, TMP2w, TMP1 | ||
3162 | | b <2 | ||
3163 | break; | ||
3164 | case BC_TSETR: | ||
3165 | | decode_RB RB, INS | ||
3166 | | and RC, RC, #255 | ||
3167 | | // RA = src, RB = table, RC = key | ||
3168 | | ldr CARG2, [BASE, RB, lsl #3] | ||
3169 | | ldr TMP1, [BASE, RC, lsl #3] | ||
3170 | | and TAB:CARG2, CARG2, #LJ_GCVMASK | ||
3171 | | ldr CARG1, TAB:CARG2->array | ||
3172 | | ldrb TMP2w, TAB:CARG2->marked | ||
3173 | | ldr CARG4w, TAB:CARG2->asize | ||
3174 | | add CARG1, CARG1, TMP1, uxtw #3 | ||
3175 | | tbnz TMP2w, #2, >7 // isblack(table) | ||
3176 | |2: | ||
3177 | | cmp TMP1w, CARG4w // In array part? | ||
3178 | | bhs ->vmeta_tsetr | ||
3179 | |->BC_TSETR_Z: | ||
3180 | | ldr TMP0, [BASE, RA, lsl #3] | ||
3181 | | str TMP0, [CARG1] | ||
3182 | | ins_next | ||
3183 | | | ||
3184 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
3185 | | barrierback TAB:CARG2, TMP2w, TMP0 | ||
3186 | | b <2 | ||
3187 | break; | ||
3188 | |||
3189 | case BC_TSETM: | ||
3190 | | // RA = base (table at base-1), RC = num_const (start index) | ||
3191 | | add RA, BASE, RA, lsl #3 | ||
3192 | |1: | ||
3193 | | ldr RBw, SAVE_MULTRES | ||
3194 | | ldr TAB:CARG2, [RA, #-8] // Guaranteed to be a table. | ||
3195 | | ldr TMP1, [KBASE, RC, lsl #3] // Integer constant is in lo-word. | ||
3196 | | sub RB, RB, #8 | ||
3197 | | cbz RB, >4 // Nothing to copy? | ||
3198 | | and TAB:CARG2, CARG2, #LJ_GCVMASK | ||
3199 | | ldr CARG1w, TAB:CARG2->asize | ||
3200 | | add CARG3w, TMP1w, RBw, lsr #3 | ||
3201 | | ldr CARG4, TAB:CARG2->array | ||
3202 | | cmp CARG3, CARG1 | ||
3203 | | add RB, RA, RB | ||
3204 | | bhi >5 | ||
3205 | | add TMP1, CARG4, TMP1w, uxtw #3 | ||
3206 | | ldrb TMP2w, TAB:CARG2->marked | ||
3207 | |3: // Copy result slots to table. | ||
3208 | | ldr TMP0, [RA], #8 | ||
3209 | | str TMP0, [TMP1], #8 | ||
3210 | | cmp RA, RB | ||
3211 | | blo <3 | ||
3212 | | tbnz TMP2w, #2, >7 // isblack(table) | ||
3213 | |4: | ||
3214 | | ins_next | ||
3215 | | | ||
3216 | |5: // Need to resize array part. | ||
3217 | | str BASE, L->base | ||
3218 | | mov CARG1, L | ||
3219 | | str PC, SAVE_PC | ||
3220 | | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) | ||
3221 | | // Must not reallocate the stack. | ||
3222 | | b <1 | ||
3223 | | | ||
3224 | |7: // Possible table write barrier for any value. Skip valiswhite check. | ||
3225 | | barrierback TAB:CARG2, TMP2w, TMP1 | ||
3226 | | b <4 | ||
3227 | break; | ||
3228 | |||
3229 | /* -- Calls and vararg handling ----------------------------------------- */ | ||
3230 | |||
3231 | case BC_CALLM: | ||
3232 | | // RA = base, (RB = nresults+1,) RC = extra_nargs | ||
3233 | | ldr TMP0w, SAVE_MULTRES | ||
3234 | | decode_RC8RD NARGS8:RC, RC | ||
3235 | | add NARGS8:RC, NARGS8:RC, TMP0 | ||
3236 | | b ->BC_CALL_Z | ||
3237 | break; | ||
3238 | case BC_CALL: | ||
3239 | | decode_RC8RD NARGS8:RC, RC | ||
3240 | | // RA = base, (RB = nresults+1,) RC = (nargs+1)*8 | ||
3241 | |->BC_CALL_Z: | ||
3242 | | mov RB, BASE // Save old BASE for vmeta_call. | ||
3243 | | add BASE, BASE, RA, lsl #3 | ||
3244 | | ldr CARG3, [BASE] | ||
3245 | | sub NARGS8:RC, NARGS8:RC, #8 | ||
3246 | | add BASE, BASE, #16 | ||
3247 | | checkfunc CARG3, ->vmeta_call | ||
3248 | | ins_call | ||
3249 | break; | ||
3250 | |||
3251 | case BC_CALLMT: | ||
3252 | | // RA = base, (RB = 0,) RC = extra_nargs | ||
3253 | | ldr TMP0w, SAVE_MULTRES | ||
3254 | | add NARGS8:RC, TMP0, RC, lsl #3 | ||
3255 | | b ->BC_CALLT1_Z | ||
3256 | break; | ||
3257 | case BC_CALLT: | ||
3258 | | lsl NARGS8:RC, RC, #3 | ||
3259 | | // RA = base, (RB = 0,) RC = (nargs+1)*8 | ||
3260 | |->BC_CALLT1_Z: | ||
3261 | | add RA, BASE, RA, lsl #3 | ||
3262 | | ldr TMP1, [RA] | ||
3263 | | sub NARGS8:RC, NARGS8:RC, #8 | ||
3264 | | add RA, RA, #16 | ||
3265 | | checktp CARG3, TMP1, LJ_TFUNC, ->vmeta_callt | ||
3266 | | ldr PC, [BASE, FRAME_PC] | ||
3267 | |->BC_CALLT2_Z: | ||
3268 | | mov RB, #0 | ||
3269 | | ldrb TMP2w, LFUNC:CARG3->ffid | ||
3270 | | tst PC, #FRAME_TYPE | ||
3271 | | bne >7 | ||
3272 | |1: | ||
3273 | | str TMP1, [BASE, FRAME_FUNC] // Copy function down, but keep PC. | ||
3274 | | cbz NARGS8:RC, >3 | ||
3275 | |2: | ||
3276 | | ldr TMP0, [RA, RB] | ||
3277 | | add TMP1, RB, #8 | ||
3278 | | cmp TMP1, NARGS8:RC | ||
3279 | | str TMP0, [BASE, RB] | ||
3280 | | mov RB, TMP1 | ||
3281 | | bne <2 | ||
3282 | |3: | ||
3283 | | cmp TMP2, #1 // (> FF_C) Calling a fast function? | ||
3284 | | bhi >5 | ||
3285 | |4: | ||
3286 | | ins_callt | ||
3287 | | | ||
3288 | |5: // Tailcall to a fast function with a Lua frame below. | ||
3289 | | ldrb RAw, [PC, #-4+OFS_RA] | ||
3290 | | sub CARG1, BASE, RA, lsl #3 | ||
3291 | | ldr LFUNC:CARG1, [CARG1, #-32] | ||
3292 | | and LFUNC:CARG1, CARG1, #LJ_GCVMASK | ||
3293 | | ldr CARG1, LFUNC:CARG1->pc | ||
3294 | | ldr KBASE, [CARG1, #PC2PROTO(k)] | ||
3295 | | b <4 | ||
3296 | | | ||
3297 | |7: // Tailcall from a vararg function. | ||
3298 | | eor PC, PC, #FRAME_VARG | ||
3299 | | tst PC, #FRAME_TYPEP // Vararg frame below? | ||
3300 | | csel TMP2, RB, TMP2, ne // Clear ffid if no Lua function below. | ||
3301 | | bne <1 | ||
3302 | | sub BASE, BASE, PC | ||
3303 | | ldr PC, [BASE, FRAME_PC] | ||
3304 | | tst PC, #FRAME_TYPE | ||
3305 | | csel TMP2, RB, TMP2, ne // Clear ffid if no Lua function below. | ||
3306 | | b <1 | ||
3307 | break; | ||
3308 | |||
3309 | case BC_ITERC: | ||
3310 | | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) | ||
3311 | | add RA, BASE, RA, lsl #3 | ||
3312 | | ldr CARG3, [RA, #-24] | ||
3313 | | mov RB, BASE // Save old BASE for vmeta_call. | ||
3314 | | ldp CARG1, CARG2, [RA, #-16] | ||
3315 | | add BASE, RA, #16 | ||
3316 | | mov NARGS8:RC, #16 // Iterators get 2 arguments. | ||
3317 | | str CARG3, [RA] // Copy callable. | ||
3318 | | stp CARG1, CARG2, [RA, #16] // Copy state and control var. | ||
3319 | | checkfunc CARG3, ->vmeta_call | ||
3320 | | ins_call | ||
3321 | break; | ||
3322 | |||
3323 | case BC_ITERN: | ||
3324 | | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) | ||
3325 | |.if JIT | ||
3326 | | // NYI: add hotloop, record BC_ITERN. | ||
3327 | |.endif | ||
3328 | | add RA, BASE, RA, lsl #3 | ||
3329 | | ldr TAB:RB, [RA, #-16] | ||
3330 | | ldrh TMP3w, [PC, # OFS_RD] | ||
3331 | | ldr CARG1w, [RA, #-8+LO] // Get index from control var. | ||
3332 | | add PC, PC, #4 | ||
3333 | | add TMP3, PC, TMP3, lsl #2 | ||
3334 | | and TAB:RB, RB, #LJ_GCVMASK | ||
3335 | | sub TMP3, TMP3, #0x20000 | ||
3336 | | ldr TMP1w, TAB:RB->asize | ||
3337 | | ldr CARG2, TAB:RB->array | ||
3338 | |1: // Traverse array part. | ||
3339 | | subs RC, CARG1, TMP1 | ||
3340 | | add CARG3, CARG2, CARG1, lsl #3 | ||
3341 | | bhs >5 // Index points after array part? | ||
3342 | | ldr TMP0, [CARG3] | ||
3343 | | cmp TMP0, TISNIL | ||
3344 | | cinc CARG1, CARG1, eq // Skip holes in array part. | ||
3345 | | beq <1 | ||
3346 | | add CARG1, CARG1, TISNUM | ||
3347 | | stp CARG1, TMP0, [RA] | ||
3348 | | add CARG1, CARG1, #1 | ||
3349 | |3: | ||
3350 | | str CARG1w, [RA, #-8+LO] // Update control var. | ||
3351 | | mov PC, TMP3 | ||
3352 | |4: | ||
3353 | | ins_next | ||
3354 | | | ||
3355 | |5: // Traverse hash part. | ||
3356 | | ldr TMP2w, TAB:RB->hmask | ||
3357 | | ldr NODE:RB, TAB:RB->node | ||
3358 | |6: | ||
3359 | | add CARG1, RC, RC, lsl #1 | ||
3360 | | cmp RC, TMP2 // End of iteration? Branch to ITERN+1. | ||
3361 | | add NODE:CARG3, NODE:RB, CARG1, lsl #3 // node = tab->node + idx*3*8 | ||
3362 | | bhi <4 | ||
3363 | | ldp TMP0, CARG1, NODE:CARG3->val | ||
3364 | | cmp TMP0, TISNIL | ||
3365 | | add RC, RC, #1 | ||
3366 | | beq <6 // Skip holes in hash part. | ||
3367 | | stp CARG1, TMP0, [RA] | ||
3368 | | add CARG1, RC, TMP1 | ||
3369 | | b <3 | ||
3370 | break; | ||
3371 | |||
3372 | case BC_ISNEXT: | ||
3373 | | // RA = base, RC = target (points to ITERN) | ||
3374 | | add RA, BASE, RA, lsl #3 | ||
3375 | | ldr CFUNC:CARG1, [RA, #-24] | ||
3376 | | add RC, PC, RC, lsl #2 | ||
3377 | | ldp TAB:CARG3, CARG4, [RA, #-16] | ||
3378 | | sub RC, RC, #0x20000 | ||
3379 | | checkfunc CFUNC:CARG1, >5 | ||
3380 | | asr TMP0, TAB:CARG3, #47 | ||
3381 | | ldrb TMP1w, CFUNC:CARG1->ffid | ||
3382 | | cmn TMP0, #-LJ_TTAB | ||
3383 | | ccmp CARG4, TISNIL, #0, eq | ||
3384 | | ccmp TMP1w, #FF_next_N, #0, eq | ||
3385 | | bne >5 | ||
3386 | | mov TMP0w, #0xfffe7fff | ||
3387 | | lsl TMP0, TMP0, #32 | ||
3388 | | str TMP0, [RA, #-8] // Initialize control var. | ||
3389 | |1: | ||
3390 | | mov PC, RC | ||
3391 | | ins_next | ||
3392 | | | ||
3393 | |5: // Despecialize bytecode if any of the checks fail. | ||
3394 | | mov TMP0, #BC_JMP | ||
3395 | | mov TMP1, #BC_ITERC | ||
3396 | | strb TMP0w, [PC, #-4+OFS_OP] | ||
3397 | | strb TMP1w, [RC, # OFS_OP] | ||
3398 | | b <1 | ||
3399 | break; | ||
3400 | |||
3401 | case BC_VARG: | ||
3402 | | decode_RB RB, INS | ||
3403 | | and RC, RC, #255 | ||
3404 | | // RA = base, RB = (nresults+1), RC = numparams | ||
3405 | | ldr TMP1, [BASE, FRAME_PC] | ||
3406 | | add RC, BASE, RC, lsl #3 | ||
3407 | | add RA, BASE, RA, lsl #3 | ||
3408 | | add RC, RC, #FRAME_VARG | ||
3409 | | add TMP2, RA, RB, lsl #3 | ||
3410 | | sub RC, RC, TMP1 // RC = vbase | ||
3411 | | // Note: RC may now be even _above_ BASE if nargs was < numparams. | ||
3412 | | sub TMP3, BASE, #16 // TMP3 = vtop | ||
3413 | | cbz RB, >5 | ||
3414 | | sub TMP2, TMP2, #16 | ||
3415 | |1: // Copy vararg slots to destination slots. | ||
3416 | | cmp RC, TMP3 | ||
3417 | | ldr TMP0, [RC], #8 | ||
3418 | | csel TMP0, TMP0, TISNIL, lo | ||
3419 | | cmp RA, TMP2 | ||
3420 | | str TMP0, [RA], #8 | ||
3421 | | blo <1 | ||
3422 | |2: | ||
3423 | | ins_next | ||
3424 | | | ||
3425 | |5: // Copy all varargs. | ||
3426 | | ldr TMP0, L->maxstack | ||
3427 | | subs TMP2, TMP3, RC | ||
3428 | | csel RB, xzr, TMP2, le // MULTRES = (max(vtop-vbase,0)+1)*8 | ||
3429 | | add RB, RB, #8 | ||
3430 | | add TMP1, RA, TMP2 | ||
3431 | | str RBw, SAVE_MULTRES | ||
3432 | | ble <2 // Nothing to copy. | ||
3433 | | cmp TMP1, TMP0 | ||
3434 | | bhi >7 | ||
3435 | |6: | ||
3436 | | ldr TMP0, [RC], #8 | ||
3437 | | str TMP0, [RA], #8 | ||
3438 | | cmp RC, TMP3 | ||
3439 | | blo <6 | ||
3440 | | b <2 | ||
3441 | | | ||
3442 | |7: // Grow stack for varargs. | ||
3443 | | lsr CARG2, TMP2, #3 | ||
3444 | | stp BASE, RA, L->base | ||
3445 | | mov CARG1, L | ||
3446 | | sub RC, RC, BASE // Need delta, because BASE may change. | ||
3447 | | str PC, SAVE_PC | ||
3448 | | bl extern lj_state_growstack // (lua_State *L, int n) | ||
3449 | | ldp BASE, RA, L->base | ||
3450 | | add RC, BASE, RC | ||
3451 | | sub TMP3, BASE, #16 | ||
3452 | | b <6 | ||
3453 | break; | ||
3454 | |||
3455 | /* -- Returns ----------------------------------------------------------- */ | ||
3456 | |||
3457 | case BC_RETM: | ||
3458 | | // RA = results, RC = extra results | ||
3459 | | ldr TMP0w, SAVE_MULTRES | ||
3460 | | ldr PC, [BASE, FRAME_PC] | ||
3461 | | add RA, BASE, RA, lsl #3 | ||
3462 | | add RC, TMP0, RC, lsl #3 | ||
3463 | | b ->BC_RETM_Z | ||
3464 | break; | ||
3465 | |||
3466 | case BC_RET: | ||
3467 | | // RA = results, RC = nresults+1 | ||
3468 | | ldr PC, [BASE, FRAME_PC] | ||
3469 | | lsl RC, RC, #3 | ||
3470 | | add RA, BASE, RA, lsl #3 | ||
3471 | |->BC_RETM_Z: | ||
3472 | | str RCw, SAVE_MULTRES | ||
3473 | |1: | ||
3474 | | ands CARG1, PC, #FRAME_TYPE | ||
3475 | | eor CARG2, PC, #FRAME_VARG | ||
3476 | | bne ->BC_RETV2_Z | ||
3477 | | | ||
3478 | |->BC_RET_Z: | ||
3479 | | // BASE = base, RA = resultptr, RC = (nresults+1)*8, PC = return | ||
3480 | | ldr INSw, [PC, #-4] | ||
3481 | | subs TMP1, RC, #8 | ||
3482 | | sub CARG3, BASE, #16 | ||
3483 | | beq >3 | ||
3484 | |2: | ||
3485 | | ldr TMP0, [RA], #8 | ||
3486 | | add BASE, BASE, #8 | ||
3487 | | sub TMP1, TMP1, #8 | ||
3488 | | str TMP0, [BASE, #-24] | ||
3489 | | cbnz TMP1, <2 | ||
3490 | |3: | ||
3491 | | decode_RA RA, INS | ||
3492 | | sub CARG4, CARG3, RA, lsl #3 | ||
3493 | | decode_RB RB, INS | ||
3494 | | ldr LFUNC:CARG1, [CARG4, FRAME_FUNC] | ||
3495 | |5: | ||
3496 | | cmp RC, RB, lsl #3 // More results expected? | ||
3497 | | blo >6 | ||
3498 | | and LFUNC:CARG1, CARG1, #LJ_GCVMASK | ||
3499 | | mov BASE, CARG4 | ||
3500 | | ldr CARG2, LFUNC:CARG1->pc | ||
3501 | | ldr KBASE, [CARG2, #PC2PROTO(k)] | ||
3502 | | ins_next | ||
3503 | | | ||
3504 | |6: // Fill up results with nil. | ||
3505 | | add BASE, BASE, #8 | ||
3506 | | add RC, RC, #8 | ||
3507 | | str TISNIL, [BASE, #-24] | ||
3508 | | b <5 | ||
3509 | | | ||
3510 | |->BC_RETV1_Z: // Non-standard return case. | ||
3511 | | add RA, BASE, RA, lsl #3 | ||
3512 | |->BC_RETV2_Z: | ||
3513 | | tst CARG2, #FRAME_TYPEP | ||
3514 | | bne ->vm_return | ||
3515 | | // Return from vararg function: relocate BASE down. | ||
3516 | | sub BASE, BASE, CARG2 | ||
3517 | | ldr PC, [BASE, FRAME_PC] | ||
3518 | | b <1 | ||
3519 | break; | ||
3520 | |||
3521 | case BC_RET0: case BC_RET1: | ||
3522 | | // RA = results, RC = nresults+1 | ||
3523 | | ldr PC, [BASE, FRAME_PC] | ||
3524 | | lsl RC, RC, #3 | ||
3525 | | str RCw, SAVE_MULTRES | ||
3526 | | ands CARG1, PC, #FRAME_TYPE | ||
3527 | | eor CARG2, PC, #FRAME_VARG | ||
3528 | | bne ->BC_RETV1_Z | ||
3529 | | ldr INSw, [PC, #-4] | ||
3530 | if (op == BC_RET1) { | ||
3531 | | ldr TMP0, [BASE, RA, lsl #3] | ||
3532 | } | ||
3533 | | sub CARG4, BASE, #16 | ||
3534 | | decode_RA RA, INS | ||
3535 | | sub BASE, CARG4, RA, lsl #3 | ||
3536 | if (op == BC_RET1) { | ||
3537 | | str TMP0, [CARG4], #8 | ||
3538 | } | ||
3539 | | decode_RB RB, INS | ||
3540 | | ldr LFUNC:CARG1, [BASE, FRAME_FUNC] | ||
3541 | |5: | ||
3542 | | cmp RC, RB, lsl #3 | ||
3543 | | blo >6 | ||
3544 | | and LFUNC:CARG1, CARG1, #LJ_GCVMASK | ||
3545 | | ldr CARG2, LFUNC:CARG1->pc | ||
3546 | | ldr KBASE, [CARG2, #PC2PROTO(k)] | ||
3547 | | ins_next | ||
3548 | | | ||
3549 | |6: // Fill up results with nil. | ||
3550 | | add RC, RC, #8 | ||
3551 | | str TISNIL, [CARG4], #8 | ||
3552 | | b <5 | ||
3553 | break; | ||
3554 | |||
3555 | /* -- Loops and branches ------------------------------------------------ */ | ||
3556 | |||
3557 | |.define FOR_IDX, [RA]; .define FOR_TIDX, [RA, #4] | ||
3558 | |.define FOR_STOP, [RA, #8]; .define FOR_TSTOP, [RA, #12] | ||
3559 | |.define FOR_STEP, [RA, #16]; .define FOR_TSTEP, [RA, #20] | ||
3560 | |.define FOR_EXT, [RA, #24]; .define FOR_TEXT, [RA, #28] | ||
3561 | |||
3562 | case BC_FORL: | ||
3563 | |.if JIT | ||
3564 | | hotloop | ||
3565 | |.endif | ||
3566 | | // Fall through. Assumes BC_IFORL follows. | ||
3567 | break; | ||
3568 | |||
3569 | case BC_JFORI: | ||
3570 | case BC_JFORL: | ||
3571 | #if !LJ_HASJIT | ||
3572 | break; | ||
3573 | #endif | ||
3574 | case BC_FORI: | ||
3575 | case BC_IFORL: | ||
3576 | | // RA = base, RC = target (after end of loop or start of loop) | ||
3577 | vk = (op == BC_IFORL || op == BC_JFORL); | ||
3578 | | add RA, BASE, RA, lsl #3 | ||
3579 | | ldp CARG1, CARG2, FOR_IDX // CARG1 = IDX, CARG2 = STOP | ||
3580 | | ldr CARG3, FOR_STEP // CARG3 = STEP | ||
3581 | if (op != BC_JFORL) { | ||
3582 | | add RC, PC, RC, lsl #2 | ||
3583 | | sub RC, RC, #0x20000 | ||
3584 | } | ||
3585 | | checkint CARG1, >5 | ||
3586 | if (!vk) { | ||
3587 | | checkint CARG2, ->vmeta_for | ||
3588 | | checkint CARG3, ->vmeta_for | ||
3589 | | tbnz CARG3w, #31, >4 | ||
3590 | | cmp CARG1w, CARG2w | ||
3591 | } else { | ||
3592 | | adds CARG1w, CARG1w, CARG3w | ||
3593 | | bvs >2 | ||
3594 | | add TMP0, CARG1, TISNUM | ||
3595 | | tbnz CARG3w, #31, >4 | ||
3596 | | cmp CARG1w, CARG2w | ||
3597 | } | ||
3598 | |1: | ||
3599 | if (op == BC_FORI) { | ||
3600 | | csel PC, RC, PC, gt | ||
3601 | } else if (op == BC_JFORI) { | ||
3602 | | mov PC, RC | ||
3603 | | ldrh RCw, [RC, #-4+OFS_RD] | ||
3604 | } else if (op == BC_IFORL) { | ||
3605 | | csel PC, RC, PC, le | ||
3606 | } | ||
3607 | if (vk) { | ||
3608 | | str TMP0, FOR_IDX | ||
3609 | | str TMP0, FOR_EXT | ||
3610 | } else { | ||
3611 | | str CARG1, FOR_EXT | ||
3612 | } | ||
3613 | if (op == BC_JFORI || op == BC_JFORL) { | ||
3614 | | ble =>BC_JLOOP | ||
3615 | } | ||
3616 | |2: | ||
3617 | | ins_next | ||
3618 | | | ||
3619 | |4: // Invert check for negative step. | ||
3620 | | cmp CARG2w, CARG1w | ||
3621 | | b <1 | ||
3622 | | | ||
3623 | |5: // FP loop. | ||
3624 | | ldp d0, d1, FOR_IDX | ||
3625 | | blo ->vmeta_for | ||
3626 | if (!vk) { | ||
3627 | | checknum CARG2, ->vmeta_for | ||
3628 | | checknum CARG3, ->vmeta_for | ||
3629 | | str d0, FOR_EXT | ||
3630 | } else { | ||
3631 | | ldr d2, FOR_STEP | ||
3632 | | fadd d0, d0, d2 | ||
3633 | } | ||
3634 | | tbnz CARG3, #63, >7 | ||
3635 | | fcmp d0, d1 | ||
3636 | |6: | ||
3637 | if (vk) { | ||
3638 | | str d0, FOR_IDX | ||
3639 | | str d0, FOR_EXT | ||
3640 | } | ||
3641 | if (op == BC_FORI) { | ||
3642 | | csel PC, RC, PC, hi | ||
3643 | } else if (op == BC_JFORI) { | ||
3644 | | ldrh RCw, [RC, #-4+OFS_RD] | ||
3645 | | bls =>BC_JLOOP | ||
3646 | } else if (op == BC_IFORL) { | ||
3647 | | csel PC, RC, PC, ls | ||
3648 | } else { | ||
3649 | | bls =>BC_JLOOP | ||
3650 | } | ||
3651 | | b <2 | ||
3652 | | | ||
3653 | |7: // Invert check for negative step. | ||
3654 | | fcmp d1, d0 | ||
3655 | | b <6 | ||
3656 | break; | ||
3657 | |||
3658 | case BC_ITERL: | ||
3659 | |.if JIT | ||
3660 | | hotloop | ||
3661 | |.endif | ||
3662 | | // Fall through. Assumes BC_IITERL follows. | ||
3663 | break; | ||
3664 | |||
3665 | case BC_JITERL: | ||
3666 | #if !LJ_HASJIT | ||
3667 | break; | ||
3668 | #endif | ||
3669 | case BC_IITERL: | ||
3670 | | // RA = base, RC = target | ||
3671 | | ldr CARG1, [BASE, RA, lsl #3] | ||
3672 | | add TMP1, BASE, RA, lsl #3 | ||
3673 | | cmp CARG1, TISNIL | ||
3674 | | beq >1 // Stop if iterator returned nil. | ||
3675 | if (op == BC_JITERL) { | ||
3676 | | str CARG1, [TMP1, #-8] | ||
3677 | | b =>BC_JLOOP | ||
3678 | } else { | ||
3679 | | add TMP0, PC, RC, lsl #2 // Otherwise save control var + branch. | ||
3680 | | sub PC, TMP0, #0x20000 | ||
3681 | | str CARG1, [TMP1, #-8] | ||
3682 | } | ||
3683 | |1: | ||
3684 | | ins_next | ||
3685 | break; | ||
3686 | |||
3687 | case BC_LOOP: | ||
3688 | | // RA = base, RC = target (loop extent) | ||
3689 | | // Note: RA/RC is only used by trace recorder to determine scope/extent | ||
3690 | | // This opcode does NOT jump, it's only purpose is to detect a hot loop. | ||
3691 | |.if JIT | ||
3692 | | hotloop | ||
3693 | |.endif | ||
3694 | | // Fall through. Assumes BC_ILOOP follows. | ||
3695 | break; | ||
3696 | |||
3697 | case BC_ILOOP: | ||
3698 | | // RA = base, RC = target (loop extent) | ||
3699 | | ins_next | ||
3700 | break; | ||
3701 | |||
3702 | case BC_JLOOP: | ||
3703 | |.if JIT | ||
3704 | | // RA = base (ignored), RC = traceno | ||
3705 | | ldr CARG1, [GL, #GL_J(trace)] | ||
3706 | | mov CARG2w, #0 // Traces on ARM64 don't store the trace #, so use 0. | ||
3707 | | ldr TRACE:RC, [CARG1, RC, lsl #3] | ||
3708 | | st_vmstate CARG2w | ||
3709 | | ldr RA, TRACE:RC->mcode | ||
3710 | | str BASE, GL->jit_base | ||
3711 | | str L, GL->tmpbuf.L | ||
3712 | | sub sp, sp, #16 // See SPS_FIXED. Avoids sp adjust in every root trace. | ||
3713 | | br RA | ||
3714 | |.endif | ||
3715 | break; | ||
3716 | |||
3717 | case BC_JMP: | ||
3718 | | // RA = base (only used by trace recorder), RC = target | ||
3719 | | add RC, PC, RC, lsl #2 | ||
3720 | | sub PC, RC, #0x20000 | ||
3721 | | ins_next | ||
3722 | break; | ||
3723 | |||
3724 | /* -- Function headers -------------------------------------------------- */ | ||
3725 | |||
3726 | case BC_FUNCF: | ||
3727 | |.if JIT | ||
3728 | | hotcall | ||
3729 | |.endif | ||
3730 | case BC_FUNCV: /* NYI: compiled vararg functions. */ | ||
3731 | | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. | ||
3732 | break; | ||
3733 | |||
3734 | case BC_JFUNCF: | ||
3735 | #if !LJ_HASJIT | ||
3736 | break; | ||
3737 | #endif | ||
3738 | case BC_IFUNCF: | ||
3739 | | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8 | ||
3740 | | ldr CARG1, L->maxstack | ||
3741 | | ldrb TMP1w, [PC, #-4+PC2PROTO(numparams)] | ||
3742 | | ldr KBASE, [PC, #-4+PC2PROTO(k)] | ||
3743 | | cmp RA, CARG1 | ||
3744 | | bhi ->vm_growstack_l | ||
3745 | |2: | ||
3746 | | cmp NARGS8:RC, TMP1, lsl #3 // Check for missing parameters. | ||
3747 | | blo >3 | ||
3748 | if (op == BC_JFUNCF) { | ||
3749 | | decode_RD RC, INS | ||
3750 | | b =>BC_JLOOP | ||
3751 | } else { | ||
3752 | | ins_next | ||
3753 | } | ||
3754 | | | ||
3755 | |3: // Clear missing parameters. | ||
3756 | | str TISNIL, [BASE, NARGS8:RC] | ||
3757 | | add NARGS8:RC, NARGS8:RC, #8 | ||
3758 | | b <2 | ||
3759 | break; | ||
3760 | |||
3761 | case BC_JFUNCV: | ||
3762 | #if !LJ_HASJIT | ||
3763 | break; | ||
3764 | #endif | ||
3765 | | NYI // NYI: compiled vararg functions | ||
3766 | break; /* NYI: compiled vararg functions. */ | ||
3767 | |||
3768 | case BC_IFUNCV: | ||
3769 | | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8 | ||
3770 | | ldr CARG1, L->maxstack | ||
3771 | | movn TMP0, #~LJ_TFUNC | ||
3772 | | add TMP2, BASE, RC | ||
3773 | | add LFUNC:CARG3, CARG3, TMP0, lsl #47 | ||
3774 | | add RA, RA, RC | ||
3775 | | add TMP0, RC, #16+FRAME_VARG | ||
3776 | | str LFUNC:CARG3, [TMP2], #8 // Store (tagged) copy of LFUNC. | ||
3777 | | ldr KBASE, [PC, #-4+PC2PROTO(k)] | ||
3778 | | cmp RA, CARG1 | ||
3779 | | str TMP0, [TMP2], #8 // Store delta + FRAME_VARG. | ||
3780 | | bhs ->vm_growstack_l | ||
3781 | | sub RC, TMP2, #16 | ||
3782 | | ldrb TMP1w, [PC, #-4+PC2PROTO(numparams)] | ||
3783 | | mov RA, BASE | ||
3784 | | mov BASE, TMP2 | ||
3785 | | cbz TMP1, >2 | ||
3786 | |1: | ||
3787 | | cmp RA, RC // Less args than parameters? | ||
3788 | | bhs >3 | ||
3789 | | ldr TMP0, [RA] | ||
3790 | | sub TMP1, TMP1, #1 | ||
3791 | | str TISNIL, [RA], #8 // Clear old fixarg slot (help the GC). | ||
3792 | | str TMP0, [TMP2], #8 | ||
3793 | | cbnz TMP1, <1 | ||
3794 | |2: | ||
3795 | | ins_next | ||
3796 | | | ||
3797 | |3: | ||
3798 | | sub TMP1, TMP1, #1 | ||
3799 | | str TISNIL, [TMP2], #8 | ||
3800 | | cbz TMP1, <2 | ||
3801 | | b <3 | ||
3802 | break; | ||
3803 | |||
3804 | case BC_FUNCC: | ||
3805 | case BC_FUNCCW: | ||
3806 | | // BASE = new base, RA = BASE+framesize*8, CARG3 = CFUNC, RC = nargs*8 | ||
3807 | if (op == BC_FUNCC) { | ||
3808 | | ldr CARG4, CFUNC:CARG3->f | ||
3809 | } else { | ||
3810 | | ldr CARG4, GL->wrapf | ||
3811 | } | ||
3812 | | add CARG2, RA, NARGS8:RC | ||
3813 | | ldr CARG1, L->maxstack | ||
3814 | | add RC, BASE, NARGS8:RC | ||
3815 | | cmp CARG2, CARG1 | ||
3816 | | stp BASE, RC, L->base | ||
3817 | if (op == BC_FUNCCW) { | ||
3818 | | ldr CARG2, CFUNC:CARG3->f | ||
3819 | } | ||
3820 | | mv_vmstate TMP0w, C | ||
3821 | | mov CARG1, L | ||
3822 | | bhi ->vm_growstack_c // Need to grow stack. | ||
3823 | | st_vmstate TMP0w | ||
3824 | | blr CARG4 // (lua_State *L [, lua_CFunction f]) | ||
3825 | | // Returns nresults. | ||
3826 | | ldp BASE, TMP1, L->base | ||
3827 | | str L, GL->cur_L | ||
3828 | | sbfiz RC, CRET1, #3, #32 | ||
3829 | | st_vmstate ST_INTERP | ||
3830 | | ldr PC, [BASE, FRAME_PC] | ||
3831 | | sub RA, TMP1, RC // RA = L->top - nresults*8 | ||
3832 | | b ->vm_returnc | ||
3833 | break; | ||
3834 | |||
3835 | /* ---------------------------------------------------------------------- */ | ||
3836 | |||
3837 | default: | ||
3838 | fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); | ||
3839 | exit(2); | ||
3840 | break; | ||
3841 | } | ||
3842 | } | ||
3843 | |||
3844 | static int build_backend(BuildCtx *ctx) | ||
3845 | { | ||
3846 | int op; | ||
3847 | |||
3848 | dasm_growpc(Dst, BC__MAX); | ||
3849 | |||
3850 | build_subroutines(ctx); | ||
3851 | |||
3852 | |.code_op | ||
3853 | for (op = 0; op < BC__MAX; op++) | ||
3854 | build_ins(ctx, (BCOp)op, op); | ||
3855 | |||
3856 | return BC__MAX; | ||
3857 | } | ||
3858 | |||
3859 | /* Emit pseudo frame-info for all assembler functions. */ | ||
3860 | static void emit_asm_debug(BuildCtx *ctx) | ||
3861 | { | ||
3862 | int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); | ||
3863 | int i, cf = CFRAME_SIZE >> 3; | ||
3864 | switch (ctx->mode) { | ||
3865 | case BUILD_elfasm: | ||
3866 | fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n"); | ||
3867 | fprintf(ctx->fp, | ||
3868 | ".Lframe0:\n" | ||
3869 | "\t.long .LECIE0-.LSCIE0\n" | ||
3870 | ".LSCIE0:\n" | ||
3871 | "\t.long 0xffffffff\n" | ||
3872 | "\t.byte 0x1\n" | ||
3873 | "\t.string \"\"\n" | ||
3874 | "\t.uleb128 0x1\n" | ||
3875 | "\t.sleb128 -8\n" | ||
3876 | "\t.byte 30\n" /* Return address is in lr. */ | ||
3877 | "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ | ||
3878 | "\t.align 3\n" | ||
3879 | ".LECIE0:\n\n"); | ||
3880 | fprintf(ctx->fp, | ||
3881 | ".LSFDE0:\n" | ||
3882 | "\t.long .LEFDE0-.LASFDE0\n" | ||
3883 | ".LASFDE0:\n" | ||
3884 | "\t.long .Lframe0\n" | ||
3885 | "\t.quad .Lbegin\n" | ||
3886 | "\t.quad %d\n" | ||
3887 | "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ | ||
3888 | "\t.byte 0x9d\n\t.uleb128 %d\n" /* offset fp */ | ||
3889 | "\t.byte 0x9e\n\t.uleb128 %d\n", /* offset lr */ | ||
3890 | fcofs, CFRAME_SIZE, cf, cf-1); | ||
3891 | for (i = 19; i <= 28; i++) /* offset x19-x28 */ | ||
3892 | fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17); | ||
3893 | for (i = 8; i <= 15; i++) /* offset d8-d15 */ | ||
3894 | fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n", | ||
3895 | 64+i, cf-i-4); | ||
3896 | fprintf(ctx->fp, | ||
3897 | "\t.align 3\n" | ||
3898 | ".LEFDE0:\n\n"); | ||
3899 | #if LJ_HASFFI | ||
3900 | fprintf(ctx->fp, | ||
3901 | ".LSFDE1:\n" | ||
3902 | "\t.long .LEFDE1-.LASFDE1\n" | ||
3903 | ".LASFDE1:\n" | ||
3904 | "\t.long .Lframe0\n" | ||
3905 | "\t.quad lj_vm_ffi_call\n" | ||
3906 | "\t.quad %d\n" | ||
3907 | "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */ | ||
3908 | "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */ | ||
3909 | "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */ | ||
3910 | "\t.byte 0x93\n\t.uleb128 2\n" /* offset x19 */ | ||
3911 | "\t.align 3\n" | ||
3912 | ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); | ||
3913 | #endif | ||
3914 | fprintf(ctx->fp, "\t.section .eh_frame,\"a\",%%progbits\n"); | ||
3915 | fprintf(ctx->fp, | ||
3916 | ".Lframe1:\n" | ||
3917 | "\t.long .LECIE1-.LSCIE1\n" | ||
3918 | ".LSCIE1:\n" | ||
3919 | "\t.long 0\n" | ||
3920 | "\t.byte 0x1\n" | ||
3921 | "\t.string \"zPR\"\n" | ||
3922 | "\t.uleb128 0x1\n" | ||
3923 | "\t.sleb128 -8\n" | ||
3924 | "\t.byte 30\n" /* Return address is in lr. */ | ||
3925 | "\t.uleb128 6\n" /* augmentation length */ | ||
3926 | "\t.byte 0x1b\n" /* pcrel|sdata4 */ | ||
3927 | "\t.long lj_err_unwind_dwarf-.\n" | ||
3928 | "\t.byte 0x1b\n" /* pcrel|sdata4 */ | ||
3929 | "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ | ||
3930 | "\t.align 3\n" | ||
3931 | ".LECIE1:\n\n"); | ||
3932 | fprintf(ctx->fp, | ||
3933 | ".LSFDE2:\n" | ||
3934 | "\t.long .LEFDE2-.LASFDE2\n" | ||
3935 | ".LASFDE2:\n" | ||
3936 | "\t.long .LASFDE2-.Lframe1\n" | ||
3937 | "\t.long .Lbegin-.\n" | ||
3938 | "\t.long %d\n" | ||
3939 | "\t.uleb128 0\n" /* augmentation length */ | ||
3940 | "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ | ||
3941 | "\t.byte 0x9d\n\t.uleb128 %d\n" /* offset fp */ | ||
3942 | "\t.byte 0x9e\n\t.uleb128 %d\n", /* offset lr */ | ||
3943 | fcofs, CFRAME_SIZE, cf, cf-1); | ||
3944 | for (i = 19; i <= 28; i++) /* offset x19-x28 */ | ||
3945 | fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17); | ||
3946 | for (i = 8; i <= 15; i++) /* offset d8-d15 */ | ||
3947 | fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n", | ||
3948 | 64+i, cf-i-4); | ||
3949 | fprintf(ctx->fp, | ||
3950 | "\t.align 3\n" | ||
3951 | ".LEFDE2:\n\n"); | ||
3952 | #if LJ_HASFFI | ||
3953 | fprintf(ctx->fp, | ||
3954 | ".Lframe2:\n" | ||
3955 | "\t.long .LECIE2-.LSCIE2\n" | ||
3956 | ".LSCIE2:\n" | ||
3957 | "\t.long 0\n" | ||
3958 | "\t.byte 0x1\n" | ||
3959 | "\t.string \"zR\"\n" | ||
3960 | "\t.uleb128 0x1\n" | ||
3961 | "\t.sleb128 -8\n" | ||
3962 | "\t.byte 30\n" /* Return address is in lr. */ | ||
3963 | "\t.uleb128 1\n" /* augmentation length */ | ||
3964 | "\t.byte 0x1b\n" /* pcrel|sdata4 */ | ||
3965 | "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ | ||
3966 | "\t.align 3\n" | ||
3967 | ".LECIE2:\n\n"); | ||
3968 | fprintf(ctx->fp, | ||
3969 | ".LSFDE3:\n" | ||
3970 | "\t.long .LEFDE3-.LASFDE3\n" | ||
3971 | ".LASFDE3:\n" | ||
3972 | "\t.long .LASFDE3-.Lframe2\n" | ||
3973 | "\t.long lj_vm_ffi_call-.\n" | ||
3974 | "\t.long %d\n" | ||
3975 | "\t.uleb128 0\n" /* augmentation length */ | ||
3976 | "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */ | ||
3977 | "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */ | ||
3978 | "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */ | ||
3979 | "\t.byte 0x93\n\t.uleb128 2\n" /* offset x19 */ | ||
3980 | "\t.align 3\n" | ||
3981 | ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); | ||
3982 | #endif | ||
3983 | break; | ||
3984 | default: | ||
3985 | break; | ||
3986 | } | ||
3987 | } | ||
3988 | |||
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc index e6b53e0d..37506139 100644 --- a/src/vm_mips.dasc +++ b/src/vm_mips.dasc | |||
@@ -1,6 +1,9 @@ | |||
1 | |// Low-level VM code for MIPS CPUs. | 1 | |// Low-level VM code for MIPS CPUs. |
2 | |// Bytecode interpreter, fast functions and helper functions. | 2 | |// Bytecode interpreter, fast functions and helper functions. |
3 | |// Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h | 3 | |// Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h |
4 | |// | ||
5 | |// MIPS soft-float support contributed by Djordje Kovacevic and | ||
6 | |// Stefan Pejic from RT-RK.com, sponsored by Cisco Systems, Inc. | ||
4 | | | 7 | | |
5 | |.arch mips | 8 | |.arch mips |
6 | |.section code_op, code_sub | 9 | |.section code_op, code_sub |
@@ -18,6 +21,12 @@ | |||
18 | |// Fixed register assignments for the interpreter. | 21 | |// Fixed register assignments for the interpreter. |
19 | |// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra | 22 | |// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra |
20 | | | 23 | | |
24 | |.macro .FPU, a, b | ||
25 | |.if FPU | ||
26 | | a, b | ||
27 | |.endif | ||
28 | |.endmacro | ||
29 | | | ||
21 | |// The following must be C callee-save (but BASE is often refetched). | 30 | |// The following must be C callee-save (but BASE is often refetched). |
22 | |.define BASE, r16 // Base of current Lua stack frame. | 31 | |.define BASE, r16 // Base of current Lua stack frame. |
23 | |.define KBASE, r17 // Constants of current Lua function. | 32 | |.define KBASE, r17 // Constants of current Lua function. |
@@ -25,13 +34,15 @@ | |||
25 | |.define DISPATCH, r19 // Opcode dispatch table. | 34 | |.define DISPATCH, r19 // Opcode dispatch table. |
26 | |.define LREG, r20 // Register holding lua_State (also in SAVE_L). | 35 | |.define LREG, r20 // Register holding lua_State (also in SAVE_L). |
27 | |.define MULTRES, r21 // Size of multi-result: (nresults+1)*8. | 36 | |.define MULTRES, r21 // Size of multi-result: (nresults+1)*8. |
28 | |// NYI: r22 currently unused. | ||
29 | | | 37 | | |
30 | |.define JGL, r30 // On-trace: global_State + 32768. | 38 | |.define JGL, r30 // On-trace: global_State + 32768. |
31 | | | 39 | | |
32 | |// Constants for type-comparisons, stores and conversions. C callee-save. | 40 | |// Constants for type-comparisons, stores and conversions. C callee-save. |
41 | |.define TISNUM, r22 | ||
33 | |.define TISNIL, r30 | 42 | |.define TISNIL, r30 |
43 | |.if FPU | ||
34 | |.define TOBIT, f30 // 2^52 + 2^51. | 44 | |.define TOBIT, f30 // 2^52 + 2^51. |
45 | |.endif | ||
35 | | | 46 | | |
36 | |// The following temporaries are not saved across C calls, except for RA. | 47 | |// The following temporaries are not saved across C calls, except for RA. |
37 | |.define RA, r23 // Callee-save. | 48 | |.define RA, r23 // Callee-save. |
@@ -46,7 +57,7 @@ | |||
46 | |.define TMP2, r14 | 57 | |.define TMP2, r14 |
47 | |.define TMP3, r15 | 58 | |.define TMP3, r15 |
48 | | | 59 | | |
49 | |// Calling conventions. | 60 | |// MIPS o32 calling convention. |
50 | |.define CFUNCADDR, r25 | 61 | |.define CFUNCADDR, r25 |
51 | |.define CARG1, r4 | 62 | |.define CARG1, r4 |
52 | |.define CARG2, r5 | 63 | |.define CARG2, r5 |
@@ -56,13 +67,33 @@ | |||
56 | |.define CRET1, r2 | 67 | |.define CRET1, r2 |
57 | |.define CRET2, r3 | 68 | |.define CRET2, r3 |
58 | | | 69 | | |
70 | |.if ENDIAN_LE | ||
71 | |.define SFRETLO, CRET1 | ||
72 | |.define SFRETHI, CRET2 | ||
73 | |.define SFARG1LO, CARG1 | ||
74 | |.define SFARG1HI, CARG2 | ||
75 | |.define SFARG2LO, CARG3 | ||
76 | |.define SFARG2HI, CARG4 | ||
77 | |.else | ||
78 | |.define SFRETLO, CRET2 | ||
79 | |.define SFRETHI, CRET1 | ||
80 | |.define SFARG1LO, CARG2 | ||
81 | |.define SFARG1HI, CARG1 | ||
82 | |.define SFARG2LO, CARG4 | ||
83 | |.define SFARG2HI, CARG3 | ||
84 | |.endif | ||
85 | | | ||
86 | |.if FPU | ||
59 | |.define FARG1, f12 | 87 | |.define FARG1, f12 |
60 | |.define FARG2, f14 | 88 | |.define FARG2, f14 |
61 | | | 89 | | |
62 | |.define FRET1, f0 | 90 | |.define FRET1, f0 |
63 | |.define FRET2, f2 | 91 | |.define FRET2, f2 |
92 | |.endif | ||
64 | | | 93 | | |
65 | |// Stack layout while in interpreter. Must match with lj_frame.h. | 94 | |// Stack layout while in interpreter. Must match with lj_frame.h. |
95 | |.if FPU // MIPS32 hard-float. | ||
96 | | | ||
66 | |.define CFRAME_SPACE, 112 // Delta for sp. | 97 | |.define CFRAME_SPACE, 112 // Delta for sp. |
67 | | | 98 | | |
68 | |.define SAVE_ERRF, 124(sp) // 32 bit C frame info. | 99 | |.define SAVE_ERRF, 124(sp) // 32 bit C frame info. |
@@ -72,6 +103,20 @@ | |||
72 | |//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter. | 103 | |//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter. |
73 | |.define SAVE_GPR_, 72 // .. 72+10*4: 32 bit GPR saves. | 104 | |.define SAVE_GPR_, 72 // .. 72+10*4: 32 bit GPR saves. |
74 | |.define SAVE_FPR_, 24 // .. 24+6*8: 64 bit FPR saves. | 105 | |.define SAVE_FPR_, 24 // .. 24+6*8: 64 bit FPR saves. |
106 | | | ||
107 | |.else // MIPS32 soft-float | ||
108 | | | ||
109 | |.define CFRAME_SPACE, 64 // Delta for sp. | ||
110 | | | ||
111 | |.define SAVE_ERRF, 76(sp) // 32 bit C frame info. | ||
112 | |.define SAVE_NRES, 72(sp) | ||
113 | |.define SAVE_CFRAME, 68(sp) | ||
114 | |.define SAVE_L, 64(sp) | ||
115 | |//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter. | ||
116 | |.define SAVE_GPR_, 24 // .. 24+10*4: 32 bit GPR saves. | ||
117 | | | ||
118 | |.endif | ||
119 | | | ||
75 | |.define SAVE_PC, 20(sp) | 120 | |.define SAVE_PC, 20(sp) |
76 | |.define ARG5, 16(sp) | 121 | |.define ARG5, 16(sp) |
77 | |.define CSAVE_4, 12(sp) | 122 | |.define CSAVE_4, 12(sp) |
@@ -83,43 +128,45 @@ | |||
83 | |.define ARG5_OFS, 16 | 128 | |.define ARG5_OFS, 16 |
84 | |.define SAVE_MULTRES, ARG5 | 129 | |.define SAVE_MULTRES, ARG5 |
85 | | | 130 | | |
131 | |//----------------------------------------------------------------------- | ||
132 | | | ||
86 | |.macro saveregs | 133 | |.macro saveregs |
87 | | addiu sp, sp, -CFRAME_SPACE | 134 | | addiu sp, sp, -CFRAME_SPACE |
88 | | sw ra, SAVE_GPR_+9*4(sp) | 135 | | sw ra, SAVE_GPR_+9*4(sp) |
89 | | sw r30, SAVE_GPR_+8*4(sp) | 136 | | sw r30, SAVE_GPR_+8*4(sp) |
90 | | sdc1 f30, SAVE_FPR_+5*8(sp) | 137 | | .FPU sdc1 f30, SAVE_FPR_+5*8(sp) |
91 | | sw r23, SAVE_GPR_+7*4(sp) | 138 | | sw r23, SAVE_GPR_+7*4(sp) |
92 | | sw r22, SAVE_GPR_+6*4(sp) | 139 | | sw r22, SAVE_GPR_+6*4(sp) |
93 | | sdc1 f28, SAVE_FPR_+4*8(sp) | 140 | | .FPU sdc1 f28, SAVE_FPR_+4*8(sp) |
94 | | sw r21, SAVE_GPR_+5*4(sp) | 141 | | sw r21, SAVE_GPR_+5*4(sp) |
95 | | sw r20, SAVE_GPR_+4*4(sp) | 142 | | sw r20, SAVE_GPR_+4*4(sp) |
96 | | sdc1 f26, SAVE_FPR_+3*8(sp) | 143 | | .FPU sdc1 f26, SAVE_FPR_+3*8(sp) |
97 | | sw r19, SAVE_GPR_+3*4(sp) | 144 | | sw r19, SAVE_GPR_+3*4(sp) |
98 | | sw r18, SAVE_GPR_+2*4(sp) | 145 | | sw r18, SAVE_GPR_+2*4(sp) |
99 | | sdc1 f24, SAVE_FPR_+2*8(sp) | 146 | | .FPU sdc1 f24, SAVE_FPR_+2*8(sp) |
100 | | sw r17, SAVE_GPR_+1*4(sp) | 147 | | sw r17, SAVE_GPR_+1*4(sp) |
101 | | sw r16, SAVE_GPR_+0*4(sp) | 148 | | sw r16, SAVE_GPR_+0*4(sp) |
102 | | sdc1 f22, SAVE_FPR_+1*8(sp) | 149 | | .FPU sdc1 f22, SAVE_FPR_+1*8(sp) |
103 | | sdc1 f20, SAVE_FPR_+0*8(sp) | 150 | | .FPU sdc1 f20, SAVE_FPR_+0*8(sp) |
104 | |.endmacro | 151 | |.endmacro |
105 | | | 152 | | |
106 | |.macro restoreregs_ret | 153 | |.macro restoreregs_ret |
107 | | lw ra, SAVE_GPR_+9*4(sp) | 154 | | lw ra, SAVE_GPR_+9*4(sp) |
108 | | lw r30, SAVE_GPR_+8*4(sp) | 155 | | lw r30, SAVE_GPR_+8*4(sp) |
109 | | ldc1 f30, SAVE_FPR_+5*8(sp) | 156 | | .FPU ldc1 f30, SAVE_FPR_+5*8(sp) |
110 | | lw r23, SAVE_GPR_+7*4(sp) | 157 | | lw r23, SAVE_GPR_+7*4(sp) |
111 | | lw r22, SAVE_GPR_+6*4(sp) | 158 | | lw r22, SAVE_GPR_+6*4(sp) |
112 | | ldc1 f28, SAVE_FPR_+4*8(sp) | 159 | | .FPU ldc1 f28, SAVE_FPR_+4*8(sp) |
113 | | lw r21, SAVE_GPR_+5*4(sp) | 160 | | lw r21, SAVE_GPR_+5*4(sp) |
114 | | lw r20, SAVE_GPR_+4*4(sp) | 161 | | lw r20, SAVE_GPR_+4*4(sp) |
115 | | ldc1 f26, SAVE_FPR_+3*8(sp) | 162 | | .FPU ldc1 f26, SAVE_FPR_+3*8(sp) |
116 | | lw r19, SAVE_GPR_+3*4(sp) | 163 | | lw r19, SAVE_GPR_+3*4(sp) |
117 | | lw r18, SAVE_GPR_+2*4(sp) | 164 | | lw r18, SAVE_GPR_+2*4(sp) |
118 | | ldc1 f24, SAVE_FPR_+2*8(sp) | 165 | | .FPU ldc1 f24, SAVE_FPR_+2*8(sp) |
119 | | lw r17, SAVE_GPR_+1*4(sp) | 166 | | lw r17, SAVE_GPR_+1*4(sp) |
120 | | lw r16, SAVE_GPR_+0*4(sp) | 167 | | lw r16, SAVE_GPR_+0*4(sp) |
121 | | ldc1 f22, SAVE_FPR_+1*8(sp) | 168 | | .FPU ldc1 f22, SAVE_FPR_+1*8(sp) |
122 | | ldc1 f20, SAVE_FPR_+0*8(sp) | 169 | | .FPU ldc1 f20, SAVE_FPR_+0*8(sp) |
123 | | jr ra | 170 | | jr ra |
124 | | addiu sp, sp, CFRAME_SPACE | 171 | | addiu sp, sp, CFRAME_SPACE |
125 | |.endmacro | 172 | |.endmacro |
@@ -138,6 +185,7 @@ | |||
138 | |.type NODE, Node | 185 | |.type NODE, Node |
139 | |.type NARGS8, int | 186 | |.type NARGS8, int |
140 | |.type TRACE, GCtrace | 187 | |.type TRACE, GCtrace |
188 | |.type SBUF, SBuf | ||
141 | | | 189 | | |
142 | |//----------------------------------------------------------------------- | 190 | |//----------------------------------------------------------------------- |
143 | | | 191 | | |
@@ -152,13 +200,23 @@ | |||
152 | |//----------------------------------------------------------------------- | 200 | |//----------------------------------------------------------------------- |
153 | | | 201 | | |
154 | |// Endian-specific defines. | 202 | |// Endian-specific defines. |
155 | |.define FRAME_PC, LJ_ENDIAN_SELECT(-4,-8) | 203 | |.if ENDIAN_LE |
156 | |.define FRAME_FUNC, LJ_ENDIAN_SELECT(-8,-4) | 204 | |.define FRAME_PC, -4 |
157 | |.define HI, LJ_ENDIAN_SELECT(4,0) | 205 | |.define FRAME_FUNC, -8 |
158 | |.define LO, LJ_ENDIAN_SELECT(0,4) | 206 | |.define HI, 4 |
159 | |.define OFS_RD, LJ_ENDIAN_SELECT(2,0) | 207 | |.define LO, 0 |
160 | |.define OFS_RA, LJ_ENDIAN_SELECT(1,2) | 208 | |.define OFS_RD, 2 |
161 | |.define OFS_OP, LJ_ENDIAN_SELECT(0,3) | 209 | |.define OFS_RA, 1 |
210 | |.define OFS_OP, 0 | ||
211 | |.else | ||
212 | |.define FRAME_PC, -8 | ||
213 | |.define FRAME_FUNC, -4 | ||
214 | |.define HI, 0 | ||
215 | |.define LO, 4 | ||
216 | |.define OFS_RD, 0 | ||
217 | |.define OFS_RA, 2 | ||
218 | |.define OFS_OP, 3 | ||
219 | |.endif | ||
162 | | | 220 | | |
163 | |// Instruction decode. | 221 | |// Instruction decode. |
164 | |.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro | 222 | |.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro |
@@ -353,9 +411,11 @@ static void build_subroutines(BuildCtx *ctx) | |||
353 | |. sll TMP2, TMP2, 3 | 411 | |. sll TMP2, TMP2, 3 |
354 | |1: | 412 | |1: |
355 | | addiu TMP1, TMP1, -8 | 413 | | addiu TMP1, TMP1, -8 |
356 | | ldc1 f0, 0(RA) | 414 | | lw SFRETHI, HI(RA) |
415 | | lw SFRETLO, LO(RA) | ||
357 | | addiu RA, RA, 8 | 416 | | addiu RA, RA, 8 |
358 | | sdc1 f0, 0(BASE) | 417 | | sw SFRETHI, HI(BASE) |
418 | | sw SFRETLO, LO(BASE) | ||
359 | | bnez TMP1, <1 | 419 | | bnez TMP1, <1 |
360 | |. addiu BASE, BASE, 8 | 420 | |. addiu BASE, BASE, 8 |
361 | | | 421 | | |
@@ -424,15 +484,16 @@ static void build_subroutines(BuildCtx *ctx) | |||
424 | | and sp, CARG1, AT | 484 | | and sp, CARG1, AT |
425 | |->vm_unwind_ff_eh: // Landing pad for external unwinder. | 485 | |->vm_unwind_ff_eh: // Landing pad for external unwinder. |
426 | | lw L, SAVE_L | 486 | | lw L, SAVE_L |
427 | | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | 487 | | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). |
488 | | li TISNUM, LJ_TISNUM // Setup type comparison constants. | ||
428 | | li TISNIL, LJ_TNIL | 489 | | li TISNIL, LJ_TNIL |
429 | | lw BASE, L->base | 490 | | lw BASE, L->base |
430 | | lw DISPATCH, L->glref // Setup pointer to dispatch table. | 491 | | lw DISPATCH, L->glref // Setup pointer to dispatch table. |
431 | | mtc1 TMP3, TOBIT | 492 | | .FPU mtc1 TMP3, TOBIT |
432 | | li TMP1, LJ_TFALSE | 493 | | li TMP1, LJ_TFALSE |
433 | | li_vmstate INTERP | 494 | | li_vmstate INTERP |
434 | | lw PC, FRAME_PC(BASE) // Fetch PC of previous frame. | 495 | | lw PC, FRAME_PC(BASE) // Fetch PC of previous frame. |
435 | | cvt.d.s TOBIT, TOBIT | 496 | | .FPU cvt.d.s TOBIT, TOBIT |
436 | | addiu RA, BASE, -8 // Results start at BASE-8. | 497 | | addiu RA, BASE, -8 // Results start at BASE-8. |
437 | | addiu DISPATCH, DISPATCH, GG_G2DISP | 498 | | addiu DISPATCH, DISPATCH, GG_G2DISP |
438 | | sw TMP1, HI(RA) // Prepend false to error message. | 499 | | sw TMP1, HI(RA) // Prepend false to error message. |
@@ -486,21 +547,23 @@ static void build_subroutines(BuildCtx *ctx) | |||
486 | | addiu DISPATCH, DISPATCH, GG_G2DISP | 547 | | addiu DISPATCH, DISPATCH, GG_G2DISP |
487 | | sw r0, SAVE_NRES | 548 | | sw r0, SAVE_NRES |
488 | | sw r0, SAVE_ERRF | 549 | | sw r0, SAVE_ERRF |
489 | | sw TMP0, L->cframe | 550 | | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. |
490 | | sw r0, SAVE_CFRAME | 551 | | sw r0, SAVE_CFRAME |
491 | | beqz TMP1, >3 | 552 | | beqz TMP1, >3 |
492 | |. sw CARG1, SAVE_PC // Any value outside of bytecode is ok. | 553 | |. sw TMP0, L->cframe |
493 | | | 554 | | |
494 | | // Resume after yield (like a return). | 555 | | // Resume after yield (like a return). |
556 | | sw L, DISPATCH_GL(cur_L)(DISPATCH) | ||
495 | | move RA, BASE | 557 | | move RA, BASE |
496 | | lw BASE, L->base | 558 | | lw BASE, L->base |
559 | | li TISNUM, LJ_TISNUM // Setup type comparison constants. | ||
497 | | lw TMP1, L->top | 560 | | lw TMP1, L->top |
498 | | lw PC, FRAME_PC(BASE) | 561 | | lw PC, FRAME_PC(BASE) |
499 | | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | 562 | | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). |
500 | | subu RD, TMP1, BASE | 563 | | subu RD, TMP1, BASE |
501 | | mtc1 TMP3, TOBIT | 564 | | .FPU mtc1 TMP3, TOBIT |
502 | | sb r0, L->status | 565 | | sb r0, L->status |
503 | | cvt.d.s TOBIT, TOBIT | 566 | | .FPU cvt.d.s TOBIT, TOBIT |
504 | | li_vmstate INTERP | 567 | | li_vmstate INTERP |
505 | | addiu RD, RD, 8 | 568 | | addiu RD, RD, 8 |
506 | | st_vmstate | 569 | | st_vmstate |
@@ -525,25 +588,27 @@ static void build_subroutines(BuildCtx *ctx) | |||
525 | | | 588 | | |
526 | |1: // Entry point for vm_pcall above (PC = ftype). | 589 | |1: // Entry point for vm_pcall above (PC = ftype). |
527 | | lw TMP1, L:CARG1->cframe | 590 | | lw TMP1, L:CARG1->cframe |
528 | | sw CARG3, SAVE_NRES | ||
529 | | move L, CARG1 | 591 | | move L, CARG1 |
530 | | sw CARG1, SAVE_L | 592 | | sw CARG3, SAVE_NRES |
531 | | move BASE, CARG2 | ||
532 | | sw sp, L->cframe // Add our C frame to cframe chain. | ||
533 | | lw DISPATCH, L->glref // Setup pointer to dispatch table. | 593 | | lw DISPATCH, L->glref // Setup pointer to dispatch table. |
594 | | sw CARG1, SAVE_L | ||
595 | | move BASE, CARG2 | ||
596 | | addiu DISPATCH, DISPATCH, GG_G2DISP | ||
534 | | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. | 597 | | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. |
535 | | sw TMP1, SAVE_CFRAME | 598 | | sw TMP1, SAVE_CFRAME |
536 | | addiu DISPATCH, DISPATCH, GG_G2DISP | 599 | | sw sp, L->cframe // Add our C frame to cframe chain. |
537 | | | 600 | | |
538 | |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). | 601 | |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). |
602 | | sw L, DISPATCH_GL(cur_L)(DISPATCH) | ||
539 | | lw TMP2, L->base // TMP2 = old base (used in vmeta_call). | 603 | | lw TMP2, L->base // TMP2 = old base (used in vmeta_call). |
540 | | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | 604 | | li TISNUM, LJ_TISNUM // Setup type comparison constants. |
605 | | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | ||
541 | | lw TMP1, L->top | 606 | | lw TMP1, L->top |
542 | | mtc1 TMP3, TOBIT | 607 | | .FPU mtc1 TMP3, TOBIT |
543 | | addu PC, PC, BASE | 608 | | addu PC, PC, BASE |
544 | | subu NARGS8:RC, TMP1, BASE | 609 | | subu NARGS8:RC, TMP1, BASE |
545 | | subu PC, PC, TMP2 // PC = frame delta + frame type | 610 | | subu PC, PC, TMP2 // PC = frame delta + frame type |
546 | | cvt.d.s TOBIT, TOBIT | 611 | | .FPU cvt.d.s TOBIT, TOBIT |
547 | | li_vmstate INTERP | 612 | | li_vmstate INTERP |
548 | | li TISNIL, LJ_TNIL | 613 | | li TISNIL, LJ_TNIL |
549 | | st_vmstate | 614 | | st_vmstate |
@@ -566,20 +631,21 @@ static void build_subroutines(BuildCtx *ctx) | |||
566 | | lw TMP0, L:CARG1->stack | 631 | | lw TMP0, L:CARG1->stack |
567 | | sw CARG1, SAVE_L | 632 | | sw CARG1, SAVE_L |
568 | | lw TMP1, L->top | 633 | | lw TMP1, L->top |
634 | | lw DISPATCH, L->glref // Setup pointer to dispatch table. | ||
569 | | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. | 635 | | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. |
570 | | subu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). | 636 | | subu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). |
571 | | lw TMP1, L->cframe | 637 | | lw TMP1, L->cframe |
572 | | sw sp, L->cframe // Add our C frame to cframe chain. | 638 | | addiu DISPATCH, DISPATCH, GG_G2DISP |
573 | | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. | 639 | | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. |
574 | | sw r0, SAVE_ERRF // No error function. | 640 | | sw r0, SAVE_ERRF // No error function. |
575 | | move CFUNCADDR, CARG4 | 641 | | sw TMP1, SAVE_CFRAME |
642 | | sw sp, L->cframe // Add our C frame to cframe chain. | ||
643 | | sw L, DISPATCH_GL(cur_L)(DISPATCH) | ||
576 | | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud) | 644 | | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud) |
577 | |. sw TMP1, SAVE_CFRAME | 645 | |. move CFUNCADDR, CARG4 |
578 | | move BASE, CRET1 | 646 | | move BASE, CRET1 |
579 | | lw DISPATCH, L->glref // Setup pointer to dispatch table. | ||
580 | | li PC, FRAME_CP | ||
581 | | bnez CRET1, <3 // Else continue with the call. | 647 | | bnez CRET1, <3 // Else continue with the call. |
582 | |. addiu DISPATCH, DISPATCH, GG_G2DISP | 648 | |. li PC, FRAME_CP |
583 | | b ->vm_leave_cp // No base? Just remove C frame. | 649 | | b ->vm_leave_cp // No base? Just remove C frame. |
584 | |. nop | 650 | |. nop |
585 | | | 651 | | |
@@ -624,7 +690,8 @@ static void build_subroutines(BuildCtx *ctx) | |||
624 | |->cont_cat: // RA = resultptr, RB = meta base | 690 | |->cont_cat: // RA = resultptr, RB = meta base |
625 | | lw INS, -4(PC) | 691 | | lw INS, -4(PC) |
626 | | addiu CARG2, RB, -16 | 692 | | addiu CARG2, RB, -16 |
627 | | ldc1 f0, 0(RA) | 693 | | lw SFRETHI, HI(RA) |
694 | | lw SFRETLO, LO(RA) | ||
628 | | decode_RB8a MULTRES, INS | 695 | | decode_RB8a MULTRES, INS |
629 | | decode_RA8a RA, INS | 696 | | decode_RA8a RA, INS |
630 | | decode_RB8b MULTRES | 697 | | decode_RB8b MULTRES |
@@ -632,11 +699,13 @@ static void build_subroutines(BuildCtx *ctx) | |||
632 | | addu TMP1, BASE, MULTRES | 699 | | addu TMP1, BASE, MULTRES |
633 | | sw BASE, L->base | 700 | | sw BASE, L->base |
634 | | subu CARG3, CARG2, TMP1 | 701 | | subu CARG3, CARG2, TMP1 |
702 | | sw SFRETHI, HI(CARG2) | ||
635 | | bne TMP1, CARG2, ->BC_CAT_Z | 703 | | bne TMP1, CARG2, ->BC_CAT_Z |
636 | |. sdc1 f0, 0(CARG2) | 704 | |. sw SFRETLO, LO(CARG2) |
637 | | addu RA, BASE, RA | 705 | | addu RA, BASE, RA |
706 | | sw SFRETHI, HI(RA) | ||
638 | | b ->cont_nop | 707 | | b ->cont_nop |
639 | |. sdc1 f0, 0(RA) | 708 | |. sw SFRETLO, LO(RA) |
640 | | | 709 | | |
641 | |//-- Table indexing metamethods ----------------------------------------- | 710 | |//-- Table indexing metamethods ----------------------------------------- |
642 | | | 711 | | |
@@ -659,10 +728,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
659 | |. sw TMP1, HI(CARG3) | 728 | |. sw TMP1, HI(CARG3) |
660 | | | 729 | | |
661 | |->vmeta_tgetb: // TMP0 = index | 730 | |->vmeta_tgetb: // TMP0 = index |
662 | | mtc1 TMP0, f0 | ||
663 | | cvt.d.w f0, f0 | ||
664 | | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) | 731 | | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) |
665 | | sdc1 f0, 0(CARG3) | 732 | | sw TMP0, LO(CARG3) |
733 | | sw TISNUM, HI(CARG3) | ||
666 | | | 734 | | |
667 | |->vmeta_tgetv: | 735 | |->vmeta_tgetv: |
668 | |1: | 736 | |1: |
@@ -674,9 +742,11 @@ static void build_subroutines(BuildCtx *ctx) | |||
674 | | // Returns TValue * (finished) or NULL (metamethod). | 742 | | // Returns TValue * (finished) or NULL (metamethod). |
675 | | beqz CRET1, >3 | 743 | | beqz CRET1, >3 |
676 | |. addiu TMP1, BASE, -FRAME_CONT | 744 | |. addiu TMP1, BASE, -FRAME_CONT |
677 | | ldc1 f0, 0(CRET1) | 745 | | lw SFARG1HI, HI(CRET1) |
746 | | lw SFARG2HI, LO(CRET1) | ||
678 | | ins_next1 | 747 | | ins_next1 |
679 | | sdc1 f0, 0(RA) | 748 | | sw SFARG1HI, HI(RA) |
749 | | sw SFARG2HI, LO(RA) | ||
680 | | ins_next2 | 750 | | ins_next2 |
681 | | | 751 | | |
682 | |3: // Call __index metamethod. | 752 | |3: // Call __index metamethod. |
@@ -688,6 +758,17 @@ static void build_subroutines(BuildCtx *ctx) | |||
688 | | b ->vm_call_dispatch_f | 758 | | b ->vm_call_dispatch_f |
689 | |. li NARGS8:RC, 16 // 2 args for func(t, k). | 759 | |. li NARGS8:RC, 16 // 2 args for func(t, k). |
690 | | | 760 | | |
761 | |->vmeta_tgetr: | ||
762 | | load_got lj_tab_getinth | ||
763 | | call_intern lj_tab_getinth // (GCtab *t, int32_t key) | ||
764 | |. nop | ||
765 | | // Returns cTValue * or NULL. | ||
766 | | beqz CRET1, ->BC_TGETR_Z | ||
767 | |. move SFARG2HI, TISNIL | ||
768 | | lw SFARG2HI, HI(CRET1) | ||
769 | | b ->BC_TGETR_Z | ||
770 | |. lw SFARG2LO, LO(CRET1) | ||
771 | | | ||
691 | |//----------------------------------------------------------------------- | 772 | |//----------------------------------------------------------------------- |
692 | | | 773 | | |
693 | |->vmeta_tsets1: | 774 | |->vmeta_tsets1: |
@@ -709,10 +790,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
709 | |. sw TMP1, HI(CARG3) | 790 | |. sw TMP1, HI(CARG3) |
710 | | | 791 | | |
711 | |->vmeta_tsetb: // TMP0 = index | 792 | |->vmeta_tsetb: // TMP0 = index |
712 | | mtc1 TMP0, f0 | ||
713 | | cvt.d.w f0, f0 | ||
714 | | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) | 793 | | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) |
715 | | sdc1 f0, 0(CARG3) | 794 | | sw TMP0, LO(CARG3) |
795 | | sw TISNUM, HI(CARG3) | ||
716 | | | 796 | | |
717 | |->vmeta_tsetv: | 797 | |->vmeta_tsetv: |
718 | |1: | 798 | |1: |
@@ -722,11 +802,13 @@ static void build_subroutines(BuildCtx *ctx) | |||
722 | | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) | 802 | | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) |
723 | |. move CARG1, L | 803 | |. move CARG1, L |
724 | | // Returns TValue * (finished) or NULL (metamethod). | 804 | | // Returns TValue * (finished) or NULL (metamethod). |
805 | | lw SFARG1HI, HI(RA) | ||
725 | | beqz CRET1, >3 | 806 | | beqz CRET1, >3 |
726 | |. ldc1 f0, 0(RA) | 807 | |. lw SFARG1LO, LO(RA) |
727 | | // NOBARRIER: lj_meta_tset ensures the table is not black. | 808 | | // NOBARRIER: lj_meta_tset ensures the table is not black. |
728 | | ins_next1 | 809 | | ins_next1 |
729 | | sdc1 f0, 0(CRET1) | 810 | | sw SFARG1HI, HI(CRET1) |
811 | | sw SFARG1LO, LO(CRET1) | ||
730 | | ins_next2 | 812 | | ins_next2 |
731 | | | 813 | | |
732 | |3: // Call __newindex metamethod. | 814 | |3: // Call __newindex metamethod. |
@@ -736,14 +818,27 @@ static void build_subroutines(BuildCtx *ctx) | |||
736 | | sw PC, -16+HI(BASE) // [cont|PC] | 818 | | sw PC, -16+HI(BASE) // [cont|PC] |
737 | | subu PC, BASE, TMP1 | 819 | | subu PC, BASE, TMP1 |
738 | | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. | 820 | | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. |
739 | | sdc1 f0, 16(BASE) // Copy value to third argument. | 821 | | sw SFARG1HI, 16+HI(BASE) // Copy value to third argument. |
822 | | sw SFARG1LO, 16+LO(BASE) | ||
740 | | b ->vm_call_dispatch_f | 823 | | b ->vm_call_dispatch_f |
741 | |. li NARGS8:RC, 24 // 3 args for func(t, k, v) | 824 | |. li NARGS8:RC, 24 // 3 args for func(t, k, v) |
742 | | | 825 | | |
826 | |->vmeta_tsetr: | ||
827 | | load_got lj_tab_setinth | ||
828 | | sw BASE, L->base | ||
829 | | sw PC, SAVE_PC | ||
830 | | call_intern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) | ||
831 | |. move CARG1, L | ||
832 | | // Returns TValue *. | ||
833 | | b ->BC_TSETR_Z | ||
834 | |. nop | ||
835 | | | ||
743 | |//-- Comparison metamethods --------------------------------------------- | 836 | |//-- Comparison metamethods --------------------------------------------- |
744 | | | 837 | | |
745 | |->vmeta_comp: | 838 | |->vmeta_comp: |
746 | | // CARG2, CARG3 are already set by BC_ISLT/BC_ISGE/BC_ISLE/BC_ISGT. | 839 | | // RA/RD point to o1/o2. |
840 | | move CARG2, RA | ||
841 | | move CARG3, RD | ||
747 | | load_got lj_meta_comp | 842 | | load_got lj_meta_comp |
748 | | addiu PC, PC, -4 | 843 | | addiu PC, PC, -4 |
749 | | sw BASE, L->base | 844 | | sw BASE, L->base |
@@ -769,11 +864,13 @@ static void build_subroutines(BuildCtx *ctx) | |||
769 | | | 864 | | |
770 | |->cont_ra: // RA = resultptr | 865 | |->cont_ra: // RA = resultptr |
771 | | lbu TMP1, -4+OFS_RA(PC) | 866 | | lbu TMP1, -4+OFS_RA(PC) |
772 | | ldc1 f0, 0(RA) | 867 | | lw SFRETHI, HI(RA) |
868 | | lw SFRETLO, LO(RA) | ||
773 | | sll TMP1, TMP1, 3 | 869 | | sll TMP1, TMP1, 3 |
774 | | addu TMP1, BASE, TMP1 | 870 | | addu TMP1, BASE, TMP1 |
871 | | sw SFRETHI, HI(TMP1) | ||
775 | | b ->cont_nop | 872 | | b ->cont_nop |
776 | |. sdc1 f0, 0(TMP1) | 873 | |. sw SFRETLO, LO(TMP1) |
777 | | | 874 | | |
778 | |->cont_condt: // RA = resultptr | 875 | |->cont_condt: // RA = resultptr |
779 | | lw TMP0, HI(RA) | 876 | | lw TMP0, HI(RA) |
@@ -788,8 +885,11 @@ static void build_subroutines(BuildCtx *ctx) | |||
788 | |. addiu TMP2, AT, -1 // Branch if result is false. | 885 | |. addiu TMP2, AT, -1 // Branch if result is false. |
789 | | | 886 | | |
790 | |->vmeta_equal: | 887 | |->vmeta_equal: |
791 | | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV. | 888 | | // SFARG1LO/SFARG2LO point to o1/o2. TMP0 is set to 0/1. |
792 | | load_got lj_meta_equal | 889 | | load_got lj_meta_equal |
890 | | move CARG2, SFARG1LO | ||
891 | | move CARG3, SFARG2LO | ||
892 | | move CARG4, TMP0 | ||
793 | | addiu PC, PC, -4 | 893 | | addiu PC, PC, -4 |
794 | | sw BASE, L->base | 894 | | sw BASE, L->base |
795 | | sw PC, SAVE_PC | 895 | | sw PC, SAVE_PC |
@@ -813,17 +913,31 @@ static void build_subroutines(BuildCtx *ctx) | |||
813 | |. nop | 913 | |. nop |
814 | |.endif | 914 | |.endif |
815 | | | 915 | | |
916 | |->vmeta_istype: | ||
917 | | load_got lj_meta_istype | ||
918 | | addiu PC, PC, -4 | ||
919 | | sw BASE, L->base | ||
920 | | srl CARG2, RA, 3 | ||
921 | | srl CARG3, RD, 3 | ||
922 | | sw PC, SAVE_PC | ||
923 | | call_intern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) | ||
924 | |. move CARG1, L | ||
925 | | b ->cont_nop | ||
926 | |. nop | ||
927 | | | ||
816 | |//-- Arithmetic metamethods --------------------------------------------- | 928 | |//-- Arithmetic metamethods --------------------------------------------- |
817 | | | 929 | | |
818 | |->vmeta_unm: | 930 | |->vmeta_unm: |
819 | | move CARG4, CARG3 | 931 | | move RC, RB |
820 | | | 932 | | |
821 | |->vmeta_arith: | 933 | |->vmeta_arith: |
822 | | load_got lj_meta_arith | 934 | | load_got lj_meta_arith |
823 | | decode_OP1 TMP0, INS | 935 | | decode_OP1 TMP0, INS |
824 | | sw BASE, L->base | 936 | | sw BASE, L->base |
825 | | sw PC, SAVE_PC | ||
826 | | move CARG2, RA | 937 | | move CARG2, RA |
938 | | sw PC, SAVE_PC | ||
939 | | move CARG3, RB | ||
940 | | move CARG4, RC | ||
827 | | sw TMP0, ARG5 | 941 | | sw TMP0, ARG5 |
828 | | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) | 942 | | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) |
829 | |. move CARG1, L | 943 | |. move CARG1, L |
@@ -931,40 +1045,52 @@ static void build_subroutines(BuildCtx *ctx) | |||
931 | | | 1045 | | |
932 | |.macro .ffunc_1, name | 1046 | |.macro .ffunc_1, name |
933 | |->ff_ .. name: | 1047 | |->ff_ .. name: |
1048 | | lw SFARG1HI, HI(BASE) | ||
934 | | beqz NARGS8:RC, ->fff_fallback | 1049 | | beqz NARGS8:RC, ->fff_fallback |
935 | |. lw CARG3, HI(BASE) | 1050 | |. lw SFARG1LO, LO(BASE) |
936 | | lw CARG1, LO(BASE) | ||
937 | |.endmacro | 1051 | |.endmacro |
938 | | | 1052 | | |
939 | |.macro .ffunc_2, name | 1053 | |.macro .ffunc_2, name |
940 | |->ff_ .. name: | 1054 | |->ff_ .. name: |
941 | | sltiu AT, NARGS8:RC, 16 | 1055 | | sltiu AT, NARGS8:RC, 16 |
942 | | lw CARG3, HI(BASE) | 1056 | | lw SFARG1HI, HI(BASE) |
943 | | bnez AT, ->fff_fallback | 1057 | | bnez AT, ->fff_fallback |
944 | |. lw CARG4, 8+HI(BASE) | 1058 | |. lw SFARG2HI, 8+HI(BASE) |
945 | | lw CARG1, LO(BASE) | 1059 | | lw SFARG1LO, LO(BASE) |
946 | | lw CARG2, 8+LO(BASE) | 1060 | | lw SFARG2LO, 8+LO(BASE) |
947 | |.endmacro | 1061 | |.endmacro |
948 | | | 1062 | | |
949 | |.macro .ffunc_n, name // Caveat: has delay slot! | 1063 | |.macro .ffunc_n, name // Caveat: has delay slot! |
950 | |->ff_ .. name: | 1064 | |->ff_ .. name: |
951 | | lw CARG3, HI(BASE) | 1065 | | lw SFARG1HI, HI(BASE) |
1066 | |.if FPU | ||
1067 | | ldc1 FARG1, 0(BASE) | ||
1068 | |.else | ||
1069 | | lw SFARG1LO, LO(BASE) | ||
1070 | |.endif | ||
952 | | beqz NARGS8:RC, ->fff_fallback | 1071 | | beqz NARGS8:RC, ->fff_fallback |
953 | |. ldc1 FARG1, 0(BASE) | 1072 | |. sltiu AT, SFARG1HI, LJ_TISNUM |
954 | | sltiu AT, CARG3, LJ_TISNUM | ||
955 | | beqz AT, ->fff_fallback | 1073 | | beqz AT, ->fff_fallback |
956 | |.endmacro | 1074 | |.endmacro |
957 | | | 1075 | | |
958 | |.macro .ffunc_nn, name // Caveat: has delay slot! | 1076 | |.macro .ffunc_nn, name // Caveat: has delay slot! |
959 | |->ff_ .. name: | 1077 | |->ff_ .. name: |
960 | | sltiu AT, NARGS8:RC, 16 | 1078 | | sltiu AT, NARGS8:RC, 16 |
961 | | lw CARG3, HI(BASE) | 1079 | | lw SFARG1HI, HI(BASE) |
962 | | bnez AT, ->fff_fallback | 1080 | | bnez AT, ->fff_fallback |
963 | |. lw CARG4, 8+HI(BASE) | 1081 | |. lw SFARG2HI, 8+HI(BASE) |
964 | | ldc1 FARG1, 0(BASE) | 1082 | | sltiu TMP0, SFARG1HI, LJ_TISNUM |
965 | | ldc1 FARG2, 8(BASE) | 1083 | |.if FPU |
966 | | sltiu TMP0, CARG3, LJ_TISNUM | 1084 | | ldc1 FARG1, 0(BASE) |
967 | | sltiu TMP1, CARG4, LJ_TISNUM | 1085 | |.else |
1086 | | lw SFARG1LO, LO(BASE) | ||
1087 | |.endif | ||
1088 | | sltiu TMP1, SFARG2HI, LJ_TISNUM | ||
1089 | |.if FPU | ||
1090 | | ldc1 FARG2, 8(BASE) | ||
1091 | |.else | ||
1092 | | lw SFARG2LO, 8+LO(BASE) | ||
1093 | |.endif | ||
968 | | and TMP0, TMP0, TMP1 | 1094 | | and TMP0, TMP0, TMP1 |
969 | | beqz TMP0, ->fff_fallback | 1095 | | beqz TMP0, ->fff_fallback |
970 | |.endmacro | 1096 | |.endmacro |
@@ -980,52 +1106,54 @@ static void build_subroutines(BuildCtx *ctx) | |||
980 | |//-- Base library: checks ----------------------------------------------- | 1106 | |//-- Base library: checks ----------------------------------------------- |
981 | | | 1107 | | |
982 | |.ffunc_1 assert | 1108 | |.ffunc_1 assert |
983 | | sltiu AT, CARG3, LJ_TISTRUECOND | 1109 | | sltiu AT, SFARG1HI, LJ_TISTRUECOND |
984 | | beqz AT, ->fff_fallback | 1110 | | beqz AT, ->fff_fallback |
985 | |. addiu RA, BASE, -8 | 1111 | |. addiu RA, BASE, -8 |
986 | | lw PC, FRAME_PC(BASE) | 1112 | | lw PC, FRAME_PC(BASE) |
987 | | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8. | 1113 | | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8. |
988 | | addu TMP2, RA, NARGS8:RC | 1114 | | addu TMP2, RA, NARGS8:RC |
989 | | sw CARG3, HI(RA) | 1115 | | sw SFARG1HI, HI(RA) |
990 | | addiu TMP1, BASE, 8 | 1116 | | addiu TMP1, BASE, 8 |
991 | | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument. | 1117 | | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument. |
992 | |. sw CARG1, LO(RA) | 1118 | |. sw SFARG1LO, LO(RA) |
993 | |1: | 1119 | |1: |
994 | | ldc1 f0, 0(TMP1) | 1120 | | lw SFRETHI, HI(TMP1) |
995 | | sdc1 f0, -8(TMP1) | 1121 | | lw SFRETLO, LO(TMP1) |
1122 | | sw SFRETHI, -8+HI(TMP1) | ||
1123 | | sw SFRETLO, -8+LO(TMP1) | ||
996 | | bne TMP1, TMP2, <1 | 1124 | | bne TMP1, TMP2, <1 |
997 | |. addiu TMP1, TMP1, 8 | 1125 | |. addiu TMP1, TMP1, 8 |
998 | | b ->fff_res | 1126 | | b ->fff_res |
999 | |. nop | 1127 | |. nop |
1000 | | | 1128 | | |
1001 | |.ffunc type | 1129 | |.ffunc type |
1002 | | lw CARG3, HI(BASE) | 1130 | | lw SFARG1HI, HI(BASE) |
1003 | | li TMP1, LJ_TISNUM | ||
1004 | | beqz NARGS8:RC, ->fff_fallback | 1131 | | beqz NARGS8:RC, ->fff_fallback |
1005 | |. sltiu TMP0, CARG3, LJ_TISNUM | 1132 | |. sltiu TMP0, SFARG1HI, LJ_TISNUM |
1006 | | movz TMP1, CARG3, TMP0 | 1133 | | movn SFARG1HI, TISNUM, TMP0 |
1007 | | not TMP1, TMP1 | 1134 | | not TMP1, SFARG1HI |
1008 | | sll TMP1, TMP1, 3 | 1135 | | sll TMP1, TMP1, 3 |
1009 | | addu TMP1, CFUNC:RB, TMP1 | 1136 | | addu TMP1, CFUNC:RB, TMP1 |
1010 | | b ->fff_resn | 1137 | | lw SFARG1HI, CFUNC:TMP1->upvalue[0].u32.hi |
1011 | |. ldc1 FRET1, CFUNC:TMP1->upvalue | 1138 | | b ->fff_restv |
1139 | |. lw SFARG1LO, CFUNC:TMP1->upvalue[0].u32.lo | ||
1012 | | | 1140 | | |
1013 | |//-- Base library: getters and setters --------------------------------- | 1141 | |//-- Base library: getters and setters --------------------------------- |
1014 | | | 1142 | | |
1015 | |.ffunc_1 getmetatable | 1143 | |.ffunc_1 getmetatable |
1016 | | li AT, LJ_TTAB | 1144 | | li AT, LJ_TTAB |
1017 | | bne CARG3, AT, >6 | 1145 | | bne SFARG1HI, AT, >6 |
1018 | |. li AT, LJ_TUDATA | 1146 | |. li AT, LJ_TUDATA |
1019 | |1: // Field metatable must be at same offset for GCtab and GCudata! | 1147 | |1: // Field metatable must be at same offset for GCtab and GCudata! |
1020 | | lw TAB:CARG1, TAB:CARG1->metatable | 1148 | | lw TAB:SFARG1LO, TAB:SFARG1LO->metatable |
1021 | |2: | 1149 | |2: |
1022 | | lw STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) | 1150 | | lw STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) |
1023 | | beqz TAB:CARG1, ->fff_restv | 1151 | | beqz TAB:SFARG1LO, ->fff_restv |
1024 | |. li CARG3, LJ_TNIL | 1152 | |. li SFARG1HI, LJ_TNIL |
1025 | | lw TMP0, TAB:CARG1->hmask | 1153 | | lw TMP0, TAB:SFARG1LO->hmask |
1026 | | li CARG3, LJ_TTAB // Use metatable as default result. | 1154 | | li SFARG1HI, LJ_TTAB // Use metatable as default result. |
1027 | | lw TMP1, STR:RC->hash | 1155 | | lw TMP1, STR:RC->hash |
1028 | | lw NODE:TMP2, TAB:CARG1->node | 1156 | | lw NODE:TMP2, TAB:SFARG1LO->node |
1029 | | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask | 1157 | | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask |
1030 | | sll TMP0, TMP1, 5 | 1158 | | sll TMP0, TMP1, 5 |
1031 | | sll TMP1, TMP1, 3 | 1159 | | sll TMP1, TMP1, 3 |
@@ -1037,7 +1165,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
1037 | | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) | 1165 | | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) |
1038 | | lw NODE:TMP3, NODE:TMP2->next | 1166 | | lw NODE:TMP3, NODE:TMP2->next |
1039 | | bne CARG4, AT, >4 | 1167 | | bne CARG4, AT, >4 |
1040 | |. lw CARG2, offsetof(Node, val)+HI(NODE:TMP2) | 1168 | |. lw CARG3, offsetof(Node, val)+HI(NODE:TMP2) |
1041 | | beq TMP0, STR:RC, >5 | 1169 | | beq TMP0, STR:RC, >5 |
1042 | |. lw TMP1, offsetof(Node, val)+LO(NODE:TMP2) | 1170 | |. lw TMP1, offsetof(Node, val)+LO(NODE:TMP2) |
1043 | |4: | 1171 | |4: |
@@ -1046,36 +1174,35 @@ static void build_subroutines(BuildCtx *ctx) | |||
1046 | | b <3 | 1174 | | b <3 |
1047 | |. nop | 1175 | |. nop |
1048 | |5: | 1176 | |5: |
1049 | | beq CARG2, TISNIL, ->fff_restv // Ditto for nil value. | 1177 | | beq CARG3, TISNIL, ->fff_restv // Ditto for nil value. |
1050 | |. nop | 1178 | |. nop |
1051 | | move CARG3, CARG2 // Return value of mt.__metatable. | 1179 | | move SFARG1HI, CARG3 // Return value of mt.__metatable. |
1052 | | b ->fff_restv | 1180 | | b ->fff_restv |
1053 | |. move CARG1, TMP1 | 1181 | |. move SFARG1LO, TMP1 |
1054 | | | 1182 | | |
1055 | |6: | 1183 | |6: |
1056 | | beq CARG3, AT, <1 | 1184 | | beq SFARG1HI, AT, <1 |
1057 | |. sltiu TMP0, CARG3, LJ_TISNUM | 1185 | |. sltu AT, TISNUM, SFARG1HI |
1058 | | li TMP1, LJ_TISNUM | 1186 | | movz SFARG1HI, TISNUM, AT |
1059 | | movz TMP1, CARG3, TMP0 | 1187 | | not TMP1, SFARG1HI |
1060 | | not TMP1, TMP1 | ||
1061 | | sll TMP1, TMP1, 2 | 1188 | | sll TMP1, TMP1, 2 |
1062 | | addu TMP1, DISPATCH, TMP1 | 1189 | | addu TMP1, DISPATCH, TMP1 |
1063 | | b <2 | 1190 | | b <2 |
1064 | |. lw TAB:CARG1, DISPATCH_GL(gcroot[GCROOT_BASEMT])(TMP1) | 1191 | |. lw TAB:SFARG1LO, DISPATCH_GL(gcroot[GCROOT_BASEMT])(TMP1) |
1065 | | | 1192 | | |
1066 | |.ffunc_2 setmetatable | 1193 | |.ffunc_2 setmetatable |
1067 | | // Fast path: no mt for table yet and not clearing the mt. | 1194 | | // Fast path: no mt for table yet and not clearing the mt. |
1068 | | li AT, LJ_TTAB | 1195 | | li AT, LJ_TTAB |
1069 | | bne CARG3, AT, ->fff_fallback | 1196 | | bne SFARG1HI, AT, ->fff_fallback |
1070 | |. addiu CARG4, CARG4, -LJ_TTAB | 1197 | |. addiu SFARG2HI, SFARG2HI, -LJ_TTAB |
1071 | | lw TAB:TMP1, TAB:CARG1->metatable | 1198 | | lw TAB:TMP1, TAB:SFARG1LO->metatable |
1072 | | lbu TMP3, TAB:CARG1->marked | 1199 | | lbu TMP3, TAB:SFARG1LO->marked |
1073 | | or AT, CARG4, TAB:TMP1 | 1200 | | or AT, SFARG2HI, TAB:TMP1 |
1074 | | bnez AT, ->fff_fallback | 1201 | | bnez AT, ->fff_fallback |
1075 | |. andi AT, TMP3, LJ_GC_BLACK // isblack(table) | 1202 | |. andi AT, TMP3, LJ_GC_BLACK // isblack(table) |
1076 | | beqz AT, ->fff_restv | 1203 | | beqz AT, ->fff_restv |
1077 | |. sw TAB:CARG2, TAB:CARG1->metatable | 1204 | |. sw TAB:SFARG2LO, TAB:SFARG1LO->metatable |
1078 | | barrierback TAB:CARG1, TMP3, TMP0, ->fff_restv | 1205 | | barrierback TAB:SFARG1LO, TMP3, TMP0, ->fff_restv |
1079 | | | 1206 | | |
1080 | |.ffunc rawget | 1207 | |.ffunc rawget |
1081 | | lw CARG4, HI(BASE) | 1208 | | lw CARG4, HI(BASE) |
@@ -1089,44 +1216,44 @@ static void build_subroutines(BuildCtx *ctx) | |||
1089 | | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) | 1216 | | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) |
1090 | |. move CARG1, L | 1217 | |. move CARG1, L |
1091 | | // Returns cTValue *. | 1218 | | // Returns cTValue *. |
1092 | | b ->fff_resn | 1219 | | lw SFARG1HI, HI(CRET1) |
1093 | |. ldc1 FRET1, 0(CRET1) | 1220 | | b ->fff_restv |
1221 | |. lw SFARG1LO, LO(CRET1) | ||
1094 | | | 1222 | | |
1095 | |//-- Base library: conversions ------------------------------------------ | 1223 | |//-- Base library: conversions ------------------------------------------ |
1096 | | | 1224 | | |
1097 | |.ffunc tonumber | 1225 | |.ffunc tonumber |
1098 | | // Only handles the number case inline (without a base argument). | 1226 | | // Only handles the number case inline (without a base argument). |
1099 | | lw CARG1, HI(BASE) | 1227 | | lw CARG1, HI(BASE) |
1100 | | xori AT, NARGS8:RC, 8 | 1228 | | xori AT, NARGS8:RC, 8 // Exactly one number argument. |
1101 | | sltiu CARG1, CARG1, LJ_TISNUM | 1229 | | sltu TMP0, TISNUM, CARG1 |
1102 | | movn CARG1, r0, AT | 1230 | | or AT, AT, TMP0 |
1103 | | beqz CARG1, ->fff_fallback // Exactly one number argument. | 1231 | | bnez AT, ->fff_fallback |
1104 | |. ldc1 FRET1, 0(BASE) | 1232 | |. lw SFARG1HI, HI(BASE) |
1105 | | b ->fff_resn | 1233 | | b ->fff_restv |
1106 | |. nop | 1234 | |. lw SFARG1LO, LO(BASE) |
1107 | | | 1235 | | |
1108 | |.ffunc_1 tostring | 1236 | |.ffunc_1 tostring |
1109 | | // Only handles the string or number case inline. | 1237 | | // Only handles the string or number case inline. |
1110 | | li AT, LJ_TSTR | 1238 | | li AT, LJ_TSTR |
1111 | | // A __tostring method in the string base metatable is ignored. | 1239 | | // A __tostring method in the string base metatable is ignored. |
1112 | | beq CARG3, AT, ->fff_restv // String key? | 1240 | | beq SFARG1HI, AT, ->fff_restv // String key? |
1113 | | // Handle numbers inline, unless a number base metatable is present. | 1241 | | // Handle numbers inline, unless a number base metatable is present. |
1114 | |. lw TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) | 1242 | |. lw TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) |
1115 | | sltiu TMP0, CARG3, LJ_TISNUM | 1243 | | sltu TMP0, TISNUM, SFARG1HI |
1116 | | sltiu TMP1, TMP1, 1 | 1244 | | or TMP0, TMP0, TMP1 |
1117 | | and TMP0, TMP0, TMP1 | 1245 | | bnez TMP0, ->fff_fallback |
1118 | | beqz TMP0, ->fff_fallback | ||
1119 | |. sw BASE, L->base // Add frame since C call can throw. | 1246 | |. sw BASE, L->base // Add frame since C call can throw. |
1120 | | ffgccheck | 1247 | | ffgccheck |
1121 | |. sw PC, SAVE_PC // Redundant (but a defined value). | 1248 | |. sw PC, SAVE_PC // Redundant (but a defined value). |
1122 | | load_got lj_str_fromnum | 1249 | | load_got lj_strfmt_number |
1123 | | move CARG1, L | 1250 | | move CARG1, L |
1124 | | call_intern lj_str_fromnum // (lua_State *L, lua_Number *np) | 1251 | | call_intern lj_strfmt_number // (lua_State *L, cTValue *o) |
1125 | |. move CARG2, BASE | 1252 | |. move CARG2, BASE |
1126 | | // Returns GCstr *. | 1253 | | // Returns GCstr *. |
1127 | | li CARG3, LJ_TSTR | 1254 | | li SFARG1HI, LJ_TSTR |
1128 | | b ->fff_restv | 1255 | | b ->fff_restv |
1129 | |. move CARG1, CRET1 | 1256 | |. move SFARG1LO, CRET1 |
1130 | | | 1257 | | |
1131 | |//-- Base library: iterators ------------------------------------------- | 1258 | |//-- Base library: iterators ------------------------------------------- |
1132 | | | 1259 | | |
@@ -1148,31 +1275,38 @@ static void build_subroutines(BuildCtx *ctx) | |||
1148 | |. move CARG1, L | 1275 | |. move CARG1, L |
1149 | | // Returns 0 at end of traversal. | 1276 | | // Returns 0 at end of traversal. |
1150 | | beqz CRET1, ->fff_restv // End of traversal: return nil. | 1277 | | beqz CRET1, ->fff_restv // End of traversal: return nil. |
1151 | |. li CARG3, LJ_TNIL | 1278 | |. li SFARG1HI, LJ_TNIL |
1152 | | ldc1 f0, 8(BASE) // Copy key and value to results. | 1279 | | lw TMP0, 8+HI(BASE) |
1280 | | lw TMP1, 8+LO(BASE) | ||
1153 | | addiu RA, BASE, -8 | 1281 | | addiu RA, BASE, -8 |
1154 | | ldc1 f2, 16(BASE) | 1282 | | lw TMP2, 16+HI(BASE) |
1155 | | li RD, (2+1)*8 | 1283 | | lw TMP3, 16+LO(BASE) |
1156 | | sdc1 f0, 0(RA) | 1284 | | sw TMP0, HI(RA) |
1285 | | sw TMP1, LO(RA) | ||
1286 | | sw TMP2, 8+HI(RA) | ||
1287 | | sw TMP3, 8+LO(RA) | ||
1157 | | b ->fff_res | 1288 | | b ->fff_res |
1158 | |. sdc1 f2, 8(RA) | 1289 | |. li RD, (2+1)*8 |
1159 | | | 1290 | | |
1160 | |.ffunc_1 pairs | 1291 | |.ffunc_1 pairs |
1161 | | li AT, LJ_TTAB | 1292 | | li AT, LJ_TTAB |
1162 | | bne CARG3, AT, ->fff_fallback | 1293 | | bne SFARG1HI, AT, ->fff_fallback |
1163 | |. lw PC, FRAME_PC(BASE) | 1294 | |. lw PC, FRAME_PC(BASE) |
1164 | #if LJ_52 | 1295 | #if LJ_52 |
1165 | | lw TAB:TMP2, TAB:CARG1->metatable | 1296 | | lw TAB:TMP2, TAB:SFARG1LO->metatable |
1166 | | ldc1 f0, CFUNC:RB->upvalue[0] | 1297 | | lw TMP0, CFUNC:RB->upvalue[0].u32.hi |
1298 | | lw TMP1, CFUNC:RB->upvalue[0].u32.lo | ||
1167 | | bnez TAB:TMP2, ->fff_fallback | 1299 | | bnez TAB:TMP2, ->fff_fallback |
1168 | #else | 1300 | #else |
1169 | | ldc1 f0, CFUNC:RB->upvalue[0] | 1301 | | lw TMP0, CFUNC:RB->upvalue[0].u32.hi |
1302 | | lw TMP1, CFUNC:RB->upvalue[0].u32.lo | ||
1170 | #endif | 1303 | #endif |
1171 | |. addiu RA, BASE, -8 | 1304 | |. addiu RA, BASE, -8 |
1172 | | sw TISNIL, 8+HI(BASE) | 1305 | | sw TISNIL, 8+HI(BASE) |
1173 | | li RD, (3+1)*8 | 1306 | | sw TMP0, HI(RA) |
1307 | | sw TMP1, LO(RA) | ||
1174 | | b ->fff_res | 1308 | | b ->fff_res |
1175 | |. sdc1 f0, 0(RA) | 1309 | |. li RD, (3+1)*8 |
1176 | | | 1310 | | |
1177 | |.ffunc ipairs_aux | 1311 | |.ffunc ipairs_aux |
1178 | | sltiu AT, NARGS8:RC, 16 | 1312 | | sltiu AT, NARGS8:RC, 16 |
@@ -1180,35 +1314,32 @@ static void build_subroutines(BuildCtx *ctx) | |||
1180 | | lw TAB:CARG1, LO(BASE) | 1314 | | lw TAB:CARG1, LO(BASE) |
1181 | | lw CARG4, 8+HI(BASE) | 1315 | | lw CARG4, 8+HI(BASE) |
1182 | | bnez AT, ->fff_fallback | 1316 | | bnez AT, ->fff_fallback |
1183 | |. ldc1 FARG2, 8(BASE) | 1317 | |. addiu CARG3, CARG3, -LJ_TTAB |
1184 | | addiu CARG3, CARG3, -LJ_TTAB | 1318 | | xor CARG4, CARG4, TISNUM |
1185 | | sltiu AT, CARG4, LJ_TISNUM | 1319 | | and AT, CARG3, CARG4 |
1186 | | li TMP0, 1 | 1320 | | bnez AT, ->fff_fallback |
1187 | | movn AT, r0, CARG3 | ||
1188 | | mtc1 TMP0, FARG1 | ||
1189 | | beqz AT, ->fff_fallback | ||
1190 | |. lw PC, FRAME_PC(BASE) | 1321 | |. lw PC, FRAME_PC(BASE) |
1191 | | cvt.w.d FRET1, FARG2 | 1322 | | lw TMP2, 8+LO(BASE) |
1192 | | cvt.d.w FARG1, FARG1 | ||
1193 | | lw TMP0, TAB:CARG1->asize | 1323 | | lw TMP0, TAB:CARG1->asize |
1194 | | lw TMP1, TAB:CARG1->array | 1324 | | lw TMP1, TAB:CARG1->array |
1195 | | mfc1 TMP2, FRET1 | ||
1196 | | addiu RA, BASE, -8 | ||
1197 | | add.d FARG2, FARG2, FARG1 | ||
1198 | | addiu TMP2, TMP2, 1 | 1325 | | addiu TMP2, TMP2, 1 |
1326 | | sw TISNUM, -8+HI(BASE) | ||
1199 | | sltu AT, TMP2, TMP0 | 1327 | | sltu AT, TMP2, TMP0 |
1328 | | sw TMP2, -8+LO(BASE) | ||
1329 | | beqz AT, >2 // Not in array part? | ||
1330 | |. addiu RA, BASE, -8 | ||
1200 | | sll TMP3, TMP2, 3 | 1331 | | sll TMP3, TMP2, 3 |
1201 | | addu TMP3, TMP1, TMP3 | 1332 | | addu TMP3, TMP1, TMP3 |
1202 | | beqz AT, >2 // Not in array part? | 1333 | | lw TMP1, HI(TMP3) |
1203 | |. sdc1 FARG2, 0(RA) | 1334 | | lw TMP2, LO(TMP3) |
1204 | | lw TMP2, HI(TMP3) | ||
1205 | | ldc1 f0, 0(TMP3) | ||
1206 | |1: | 1335 | |1: |
1207 | | beq TMP2, TISNIL, ->fff_res // End of iteration, return 0 results. | 1336 | | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results. |
1208 | |. li RD, (0+1)*8 | 1337 | |. li RD, (0+1)*8 |
1209 | | li RD, (2+1)*8 | 1338 | | sw TMP1, 8+HI(RA) |
1339 | | sw TMP2, 8+LO(RA) | ||
1210 | | b ->fff_res | 1340 | | b ->fff_res |
1211 | |. sdc1 f0, 8(RA) | 1341 | |. li RD, (2+1)*8 |
1342 | | | ||
1212 | |2: // Check for empty hash part first. Otherwise call C function. | 1343 | |2: // Check for empty hash part first. Otherwise call C function. |
1213 | | lw TMP0, TAB:CARG1->hmask | 1344 | | lw TMP0, TAB:CARG1->hmask |
1214 | | load_got lj_tab_getinth | 1345 | | load_got lj_tab_getinth |
@@ -1219,27 +1350,30 @@ static void build_subroutines(BuildCtx *ctx) | |||
1219 | | // Returns cTValue * or NULL. | 1350 | | // Returns cTValue * or NULL. |
1220 | | beqz CRET1, ->fff_res | 1351 | | beqz CRET1, ->fff_res |
1221 | |. li RD, (0+1)*8 | 1352 | |. li RD, (0+1)*8 |
1222 | | lw TMP2, HI(CRET1) | 1353 | | lw TMP1, HI(CRET1) |
1223 | | b <1 | 1354 | | b <1 |
1224 | |. ldc1 f0, 0(CRET1) | 1355 | |. lw TMP2, LO(CRET1) |
1225 | | | 1356 | | |
1226 | |.ffunc_1 ipairs | 1357 | |.ffunc_1 ipairs |
1227 | | li AT, LJ_TTAB | 1358 | | li AT, LJ_TTAB |
1228 | | bne CARG3, AT, ->fff_fallback | 1359 | | bne SFARG1HI, AT, ->fff_fallback |
1229 | |. lw PC, FRAME_PC(BASE) | 1360 | |. lw PC, FRAME_PC(BASE) |
1230 | #if LJ_52 | 1361 | #if LJ_52 |
1231 | | lw TAB:TMP2, TAB:CARG1->metatable | 1362 | | lw TAB:TMP2, TAB:SFARG1LO->metatable |
1232 | | ldc1 f0, CFUNC:RB->upvalue[0] | 1363 | | lw TMP0, CFUNC:RB->upvalue[0].u32.hi |
1364 | | lw TMP1, CFUNC:RB->upvalue[0].u32.lo | ||
1233 | | bnez TAB:TMP2, ->fff_fallback | 1365 | | bnez TAB:TMP2, ->fff_fallback |
1234 | #else | 1366 | #else |
1235 | | ldc1 f0, CFUNC:RB->upvalue[0] | 1367 | | lw TMP0, CFUNC:RB->upvalue[0].u32.hi |
1368 | | lw TMP1, CFUNC:RB->upvalue[0].u32.lo | ||
1236 | #endif | 1369 | #endif |
1237 | |. addiu RA, BASE, -8 | 1370 | |. addiu RA, BASE, -8 |
1238 | | sw r0, 8+HI(BASE) | 1371 | | sw TISNUM, 8+HI(BASE) |
1239 | | sw r0, 8+LO(BASE) | 1372 | | sw r0, 8+LO(BASE) |
1240 | | li RD, (3+1)*8 | 1373 | | sw TMP0, HI(RA) |
1374 | | sw TMP1, LO(RA) | ||
1241 | | b ->fff_res | 1375 | | b ->fff_res |
1242 | |. sdc1 f0, 0(RA) | 1376 | |. li RD, (3+1)*8 |
1243 | | | 1377 | | |
1244 | |//-- Base library: catch errors ---------------------------------------- | 1378 | |//-- Base library: catch errors ---------------------------------------- |
1245 | | | 1379 | | |
@@ -1259,8 +1393,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
1259 | | sltiu AT, NARGS8:RC, 16 | 1393 | | sltiu AT, NARGS8:RC, 16 |
1260 | | lw CARG4, 8+HI(BASE) | 1394 | | lw CARG4, 8+HI(BASE) |
1261 | | bnez AT, ->fff_fallback | 1395 | | bnez AT, ->fff_fallback |
1262 | |. ldc1 FARG2, 8(BASE) | 1396 | |. lw CARG3, 8+LO(BASE) |
1263 | | ldc1 FARG1, 0(BASE) | 1397 | | lw CARG1, LO(BASE) |
1398 | | lw CARG2, HI(BASE) | ||
1264 | | lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) | 1399 | | lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) |
1265 | | li AT, LJ_TFUNC | 1400 | | li AT, LJ_TFUNC |
1266 | | move TMP2, BASE | 1401 | | move TMP2, BASE |
@@ -1268,9 +1403,11 @@ static void build_subroutines(BuildCtx *ctx) | |||
1268 | | addiu BASE, BASE, 16 | 1403 | | addiu BASE, BASE, 16 |
1269 | | // Remember active hook before pcall. | 1404 | | // Remember active hook before pcall. |
1270 | | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT | 1405 | | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT |
1271 | | sdc1 FARG2, 0(TMP2) // Swap function and traceback. | 1406 | | sw CARG3, LO(TMP2) // Swap function and traceback. |
1407 | | sw CARG4, HI(TMP2) | ||
1272 | | andi TMP3, TMP3, 1 | 1408 | | andi TMP3, TMP3, 1 |
1273 | | sdc1 FARG1, 8(TMP2) | 1409 | | sw CARG1, 8+LO(TMP2) |
1410 | | sw CARG2, 8+HI(TMP2) | ||
1274 | | addiu PC, TMP3, 16+FRAME_PCALL | 1411 | | addiu PC, TMP3, 16+FRAME_PCALL |
1275 | | b ->vm_call_dispatch | 1412 | | b ->vm_call_dispatch |
1276 | |. addiu NARGS8:RC, NARGS8:RC, -16 | 1413 | |. addiu NARGS8:RC, NARGS8:RC, -16 |
@@ -1279,7 +1416,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
1279 | | | 1416 | | |
1280 | |.macro coroutine_resume_wrap, resume | 1417 | |.macro coroutine_resume_wrap, resume |
1281 | |.if resume | 1418 | |.if resume |
1282 | |.ffunc_1 coroutine_resume | 1419 | |.ffunc coroutine_resume |
1420 | | lw CARG3, HI(BASE) | ||
1421 | | beqz NARGS8:RC, ->fff_fallback | ||
1422 | |. lw CARG1, LO(BASE) | ||
1283 | | li AT, LJ_TTHREAD | 1423 | | li AT, LJ_TTHREAD |
1284 | | bne CARG3, AT, ->fff_fallback | 1424 | | bne CARG3, AT, ->fff_fallback |
1285 | |.else | 1425 | |.else |
@@ -1314,11 +1454,13 @@ static void build_subroutines(BuildCtx *ctx) | |||
1314 | | move CARG3, CARG2 | 1454 | | move CARG3, CARG2 |
1315 | | sw BASE, L->top | 1455 | | sw BASE, L->top |
1316 | |2: // Move args to coroutine. | 1456 | |2: // Move args to coroutine. |
1317 | | ldc1 f0, 0(BASE) | 1457 | | lw SFRETHI, HI(BASE) |
1458 | | lw SFRETLO, LO(BASE) | ||
1318 | | sltu AT, BASE, TMP1 | 1459 | | sltu AT, BASE, TMP1 |
1319 | | beqz AT, >3 | 1460 | | beqz AT, >3 |
1320 | |. addiu BASE, BASE, 8 | 1461 | |. addiu BASE, BASE, 8 |
1321 | | sdc1 f0, 0(CARG3) | 1462 | | sw SFRETHI, HI(CARG3) |
1463 | | sw SFRETLO, LO(CARG3) | ||
1322 | | b <2 | 1464 | | b <2 |
1323 | |. addiu CARG3, CARG3, 8 | 1465 | |. addiu CARG3, CARG3, 8 |
1324 | |3: | 1466 | |3: |
@@ -1331,6 +1473,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
1331 | | lw TMP3, L:RA->top | 1473 | | lw TMP3, L:RA->top |
1332 | | li_vmstate INTERP | 1474 | | li_vmstate INTERP |
1333 | | lw BASE, L->base | 1475 | | lw BASE, L->base |
1476 | | sw L, DISPATCH_GL(cur_L)(DISPATCH) | ||
1334 | | st_vmstate | 1477 | | st_vmstate |
1335 | | beqz AT, >8 | 1478 | | beqz AT, >8 |
1336 | |. subu RD, TMP3, TMP2 | 1479 | |. subu RD, TMP3, TMP2 |
@@ -1343,10 +1486,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
1343 | | sw TMP2, L:RA->top // Clear coroutine stack. | 1486 | | sw TMP2, L:RA->top // Clear coroutine stack. |
1344 | | move TMP1, BASE | 1487 | | move TMP1, BASE |
1345 | |5: // Move results from coroutine. | 1488 | |5: // Move results from coroutine. |
1346 | | ldc1 f0, 0(TMP2) | 1489 | | lw SFRETHI, HI(TMP2) |
1490 | | lw SFRETLO, LO(TMP2) | ||
1347 | | addiu TMP2, TMP2, 8 | 1491 | | addiu TMP2, TMP2, 8 |
1348 | | sltu AT, TMP2, TMP3 | 1492 | | sltu AT, TMP2, TMP3 |
1349 | | sdc1 f0, 0(TMP1) | 1493 | | sw SFRETHI, HI(TMP1) |
1494 | | sw SFRETLO, LO(TMP1) | ||
1350 | | bnez AT, <5 | 1495 | | bnez AT, <5 |
1351 | |. addiu TMP1, TMP1, 8 | 1496 | |. addiu TMP1, TMP1, 8 |
1352 | |6: | 1497 | |6: |
@@ -1371,12 +1516,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
1371 | |.if resume | 1516 | |.if resume |
1372 | | addiu TMP3, TMP3, -8 | 1517 | | addiu TMP3, TMP3, -8 |
1373 | | li TMP1, LJ_TFALSE | 1518 | | li TMP1, LJ_TFALSE |
1374 | | ldc1 f0, 0(TMP3) | 1519 | | lw SFRETHI, HI(TMP3) |
1520 | | lw SFRETLO, LO(TMP3) | ||
1375 | | sw TMP3, L:RA->top // Remove error from coroutine stack. | 1521 | | sw TMP3, L:RA->top // Remove error from coroutine stack. |
1376 | | li RD, (2+1)*8 | 1522 | | li RD, (2+1)*8 |
1377 | | sw TMP1, -8+HI(BASE) // Prepend false to results. | 1523 | | sw TMP1, -8+HI(BASE) // Prepend false to results. |
1378 | | addiu RA, BASE, -8 | 1524 | | addiu RA, BASE, -8 |
1379 | | sdc1 f0, 0(BASE) // Copy error message. | 1525 | | sw SFRETHI, HI(BASE) // Copy error message. |
1526 | | sw SFRETLO, LO(BASE) | ||
1380 | | b <7 | 1527 | | b <7 |
1381 | |. andi TMP0, PC, FRAME_TYPE | 1528 | |. andi TMP0, PC, FRAME_TYPE |
1382 | |.else | 1529 | |.else |
@@ -1412,20 +1559,29 @@ static void build_subroutines(BuildCtx *ctx) | |||
1412 | | | 1559 | | |
1413 | |//-- Math library ------------------------------------------------------- | 1560 | |//-- Math library ------------------------------------------------------- |
1414 | | | 1561 | | |
1415 | |.ffunc_n math_abs | 1562 | |.ffunc_1 math_abs |
1416 | |. abs.d FRET1, FARG1 | 1563 | | bne SFARG1HI, TISNUM, >1 |
1417 | |->fff_resn: | 1564 | |. sra TMP0, SFARG1LO, 31 |
1418 | | lw PC, FRAME_PC(BASE) | 1565 | | xor TMP1, SFARG1LO, TMP0 |
1419 | | addiu RA, BASE, -8 | 1566 | | subu SFARG1LO, TMP1, TMP0 |
1420 | | b ->fff_res1 | 1567 | | bgez SFARG1LO, ->fff_restv |
1421 | |. sdc1 FRET1, -8(BASE) | 1568 | |. nop |
1569 | | lui SFARG1HI, 0x41e0 // 2^31 as a double. | ||
1570 | | b ->fff_restv | ||
1571 | |. li SFARG1LO, 0 | ||
1572 | |1: | ||
1573 | | sltiu AT, SFARG1HI, LJ_TISNUM | ||
1574 | | beqz AT, ->fff_fallback | ||
1575 | |. sll SFARG1HI, SFARG1HI, 1 | ||
1576 | | srl SFARG1HI, SFARG1HI, 1 | ||
1577 | |// fallthrough | ||
1422 | | | 1578 | | |
1423 | |->fff_restv: | 1579 | |->fff_restv: |
1424 | | // CARG3/CARG1 = TValue result. | 1580 | | // SFARG1LO/SFARG1HI = TValue result. |
1425 | | lw PC, FRAME_PC(BASE) | 1581 | | lw PC, FRAME_PC(BASE) |
1426 | | sw CARG3, -8+HI(BASE) | 1582 | | sw SFARG1HI, -8+HI(BASE) |
1427 | | addiu RA, BASE, -8 | 1583 | | addiu RA, BASE, -8 |
1428 | | sw CARG1, -8+LO(BASE) | 1584 | | sw SFARG1LO, -8+LO(BASE) |
1429 | |->fff_res1: | 1585 | |->fff_res1: |
1430 | | // RA = results, PC = return. | 1586 | | // RA = results, PC = return. |
1431 | | li RD, (1+1)*8 | 1587 | | li RD, (1+1)*8 |
@@ -1454,15 +1610,19 @@ static void build_subroutines(BuildCtx *ctx) | |||
1454 | |. sw TISNIL, -8+HI(TMP1) | 1610 | |. sw TISNIL, -8+HI(TMP1) |
1455 | | | 1611 | | |
1456 | |.macro math_extern, func | 1612 | |.macro math_extern, func |
1457 | |->ff_math_ .. func: | 1613 | | .ffunc math_ .. func |
1458 | | lw CARG3, HI(BASE) | 1614 | | lw SFARG1HI, HI(BASE) |
1459 | | beqz NARGS8:RC, ->fff_fallback | 1615 | | beqz NARGS8:RC, ->fff_fallback |
1460 | |. load_got func | 1616 | |. load_got func |
1461 | | sltiu AT, CARG3, LJ_TISNUM | 1617 | | sltiu AT, SFARG1HI, LJ_TISNUM |
1462 | | beqz AT, ->fff_fallback | 1618 | | beqz AT, ->fff_fallback |
1463 | |. nop | 1619 | |.if FPU |
1464 | | call_extern | ||
1465 | |. ldc1 FARG1, 0(BASE) | 1620 | |. ldc1 FARG1, 0(BASE) |
1621 | |.else | ||
1622 | |. lw SFARG1LO, LO(BASE) | ||
1623 | |.endif | ||
1624 | | call_extern | ||
1625 | |. nop | ||
1466 | | b ->fff_resn | 1626 | | b ->fff_resn |
1467 | |. nop | 1627 | |. nop |
1468 | |.endmacro | 1628 | |.endmacro |
@@ -1476,10 +1636,22 @@ static void build_subroutines(BuildCtx *ctx) | |||
1476 | |. nop | 1636 | |. nop |
1477 | |.endmacro | 1637 | |.endmacro |
1478 | | | 1638 | | |
1639 | |// TODO: Return integer type if result is integer (own sf implementation). | ||
1479 | |.macro math_round, func | 1640 | |.macro math_round, func |
1480 | | .ffunc_n math_ .. func | 1641 | |->ff_math_ .. func: |
1481 | |. nop | 1642 | | lw SFARG1HI, HI(BASE) |
1643 | | beqz NARGS8:RC, ->fff_fallback | ||
1644 | |. lw SFARG1LO, LO(BASE) | ||
1645 | | beq SFARG1HI, TISNUM, ->fff_restv | ||
1646 | |. sltu AT, SFARG1HI, TISNUM | ||
1647 | | beqz AT, ->fff_fallback | ||
1648 | |.if FPU | ||
1649 | |. ldc1 FARG1, 0(BASE) | ||
1482 | | bal ->vm_ .. func | 1650 | | bal ->vm_ .. func |
1651 | |.else | ||
1652 | |. load_got func | ||
1653 | | call_extern | ||
1654 | |.endif | ||
1483 | |. nop | 1655 | |. nop |
1484 | | b ->fff_resn | 1656 | | b ->fff_resn |
1485 | |. nop | 1657 | |. nop |
@@ -1489,15 +1661,19 @@ static void build_subroutines(BuildCtx *ctx) | |||
1489 | | math_round ceil | 1661 | | math_round ceil |
1490 | | | 1662 | | |
1491 | |.ffunc math_log | 1663 | |.ffunc math_log |
1492 | | lw CARG3, HI(BASE) | ||
1493 | | li AT, 8 | 1664 | | li AT, 8 |
1494 | | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. | 1665 | | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. |
1495 | |. load_got log | 1666 | |. lw SFARG1HI, HI(BASE) |
1496 | | sltiu AT, CARG3, LJ_TISNUM | 1667 | | sltiu AT, SFARG1HI, LJ_TISNUM |
1497 | | beqz AT, ->fff_fallback | 1668 | | beqz AT, ->fff_fallback |
1498 | |. nop | 1669 | |. load_got log |
1670 | |.if FPU | ||
1499 | | call_extern | 1671 | | call_extern |
1500 | |. ldc1 FARG1, 0(BASE) | 1672 | |. ldc1 FARG1, 0(BASE) |
1673 | |.else | ||
1674 | | call_extern | ||
1675 | |. lw SFARG1LO, LO(BASE) | ||
1676 | |.endif | ||
1501 | | b ->fff_resn | 1677 | | b ->fff_resn |
1502 | |. nop | 1678 | |. nop |
1503 | | | 1679 | | |
@@ -1516,23 +1692,43 @@ static void build_subroutines(BuildCtx *ctx) | |||
1516 | | math_extern2 atan2 | 1692 | | math_extern2 atan2 |
1517 | | math_extern2 fmod | 1693 | | math_extern2 fmod |
1518 | | | 1694 | | |
1695 | |.if FPU | ||
1519 | |.ffunc_n math_sqrt | 1696 | |.ffunc_n math_sqrt |
1520 | |. sqrt.d FRET1, FARG1 | 1697 | |. sqrt.d FRET1, FARG1 |
1521 | | b ->fff_resn | 1698 | |// fallthrough to ->fff_resn |
1522 | |. nop | 1699 | |.else |
1700 | | math_extern sqrt | ||
1701 | |.endif | ||
1702 | | | ||
1703 | |->fff_resn: | ||
1704 | | lw PC, FRAME_PC(BASE) | ||
1705 | | addiu RA, BASE, -8 | ||
1706 | |.if FPU | ||
1707 | | b ->fff_res1 | ||
1708 | |. sdc1 FRET1, -8(BASE) | ||
1709 | |.else | ||
1710 | | sw SFRETHI, -8+HI(BASE) | ||
1711 | | b ->fff_res1 | ||
1712 | |. sw SFRETLO, -8+LO(BASE) | ||
1713 | |.endif | ||
1523 | | | 1714 | | |
1524 | |->ff_math_deg: | ||
1525 | |.ffunc_n math_rad | ||
1526 | |. ldc1 FARG2, CFUNC:RB->upvalue[0] | ||
1527 | | b ->fff_resn | ||
1528 | |. mul.d FRET1, FARG1, FARG2 | ||
1529 | | | 1715 | | |
1530 | |.ffunc_nn math_ldexp | 1716 | |.ffunc math_ldexp |
1531 | | cvt.w.d FARG2, FARG2 | 1717 | | sltiu AT, NARGS8:RC, 16 |
1718 | | lw SFARG1HI, HI(BASE) | ||
1719 | | bnez AT, ->fff_fallback | ||
1720 | |. lw CARG4, 8+HI(BASE) | ||
1721 | | bne CARG4, TISNUM, ->fff_fallback | ||
1532 | | load_got ldexp | 1722 | | load_got ldexp |
1533 | | mfc1 CARG3, FARG2 | 1723 | |. sltu AT, SFARG1HI, TISNUM |
1724 | | beqz AT, ->fff_fallback | ||
1725 | |.if FPU | ||
1726 | |. ldc1 FARG1, 0(BASE) | ||
1727 | |.else | ||
1728 | |. lw SFARG1LO, LO(BASE) | ||
1729 | |.endif | ||
1534 | | call_extern | 1730 | | call_extern |
1535 | |. nop | 1731 | |. lw CARG3, 8+LO(BASE) |
1536 | | b ->fff_resn | 1732 | | b ->fff_resn |
1537 | |. nop | 1733 | |. nop |
1538 | | | 1734 | | |
@@ -1543,10 +1739,17 @@ static void build_subroutines(BuildCtx *ctx) | |||
1543 | |. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) | 1739 | |. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) |
1544 | | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH) | 1740 | | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH) |
1545 | | addiu RA, BASE, -8 | 1741 | | addiu RA, BASE, -8 |
1742 | |.if FPU | ||
1546 | | mtc1 TMP1, FARG2 | 1743 | | mtc1 TMP1, FARG2 |
1547 | | sdc1 FRET1, 0(RA) | 1744 | | sdc1 FRET1, 0(RA) |
1548 | | cvt.d.w FARG2, FARG2 | 1745 | | cvt.d.w FARG2, FARG2 |
1549 | | sdc1 FARG2, 8(RA) | 1746 | | sdc1 FARG2, 8(RA) |
1747 | |.else | ||
1748 | | sw SFRETLO, LO(RA) | ||
1749 | | sw SFRETHI, HI(RA) | ||
1750 | | sw TMP1, 8+LO(RA) | ||
1751 | | sw TISNUM, 8+HI(RA) | ||
1752 | |.endif | ||
1550 | | b ->fff_res | 1753 | | b ->fff_res |
1551 | |. li RD, (2+1)*8 | 1754 | |. li RD, (2+1)*8 |
1552 | | | 1755 | | |
@@ -1556,49 +1759,101 @@ static void build_subroutines(BuildCtx *ctx) | |||
1556 | | call_extern | 1759 | | call_extern |
1557 | |. addiu CARG3, BASE, -8 | 1760 | |. addiu CARG3, BASE, -8 |
1558 | | addiu RA, BASE, -8 | 1761 | | addiu RA, BASE, -8 |
1762 | |.if FPU | ||
1559 | | sdc1 FRET1, 0(BASE) | 1763 | | sdc1 FRET1, 0(BASE) |
1764 | |.else | ||
1765 | | sw SFRETLO, LO(BASE) | ||
1766 | | sw SFRETHI, HI(BASE) | ||
1767 | |.endif | ||
1560 | | b ->fff_res | 1768 | | b ->fff_res |
1561 | |. li RD, (2+1)*8 | 1769 | |. li RD, (2+1)*8 |
1562 | | | 1770 | | |
1563 | |.macro math_minmax, name, ismax | 1771 | |.macro math_minmax, name, intins, fpins |
1564 | |->ff_ .. name: | 1772 | | .ffunc_1 name |
1565 | | lw CARG3, HI(BASE) | 1773 | | addu TMP3, BASE, NARGS8:RC |
1566 | | beqz NARGS8:RC, ->fff_fallback | 1774 | | bne SFARG1HI, TISNUM, >5 |
1567 | |. ldc1 FRET1, 0(BASE) | 1775 | |. addiu TMP2, BASE, 8 |
1568 | | sltiu AT, CARG3, LJ_TISNUM | 1776 | |1: // Handle integers. |
1777 | |. lw SFARG2HI, HI(TMP2) | ||
1778 | | beq TMP2, TMP3, ->fff_restv | ||
1779 | |. lw SFARG2LO, LO(TMP2) | ||
1780 | | bne SFARG2HI, TISNUM, >3 | ||
1781 | |. slt AT, SFARG1LO, SFARG2LO | ||
1782 | | intins SFARG1LO, SFARG2LO, AT | ||
1783 | | b <1 | ||
1784 | |. addiu TMP2, TMP2, 8 | ||
1785 | | | ||
1786 | |3: // Convert intermediate result to number and continue with number loop. | ||
1787 | | sltiu AT, SFARG2HI, LJ_TISNUM | ||
1569 | | beqz AT, ->fff_fallback | 1788 | | beqz AT, ->fff_fallback |
1570 | |. addu TMP2, BASE, NARGS8:RC | 1789 | |.if FPU |
1571 | | addiu TMP1, BASE, 8 | 1790 | |. mtc1 SFARG1LO, FRET1 |
1572 | | beq TMP1, TMP2, ->fff_resn | 1791 | | cvt.d.w FRET1, FRET1 |
1573 | |1: | 1792 | | b >7 |
1574 | |. lw CARG3, HI(TMP1) | 1793 | |. ldc1 FARG1, 0(TMP2) |
1575 | | ldc1 FARG1, 0(TMP1) | 1794 | |.else |
1576 | | addiu TMP1, TMP1, 8 | 1795 | |. nop |
1577 | | sltiu AT, CARG3, LJ_TISNUM | 1796 | | bal ->vm_sfi2d_1 |
1797 | |. nop | ||
1798 | | b >7 | ||
1799 | |. nop | ||
1800 | |.endif | ||
1801 | | | ||
1802 | |5: | ||
1803 | |. sltiu AT, SFARG1HI, LJ_TISNUM | ||
1578 | | beqz AT, ->fff_fallback | 1804 | | beqz AT, ->fff_fallback |
1579 | |.if ismax | 1805 | |.if FPU |
1580 | |. c.olt.d FARG1, FRET1 | 1806 | |. ldc1 FRET1, 0(BASE) |
1807 | |.endif | ||
1808 | | | ||
1809 | |6: // Handle numbers. | ||
1810 | |. lw SFARG2HI, HI(TMP2) | ||
1811 | |.if FPU | ||
1812 | | beq TMP2, TMP3, ->fff_resn | ||
1581 | |.else | 1813 | |.else |
1582 | |. c.olt.d FRET1, FARG1 | 1814 | | beq TMP2, TMP3, ->fff_restv |
1583 | |.endif | 1815 | |.endif |
1584 | | bne TMP1, TMP2, <1 | 1816 | |. sltiu AT, SFARG2HI, LJ_TISNUM |
1585 | |. movf.d FRET1, FARG1 | 1817 | | beqz AT, >8 |
1586 | | b ->fff_resn | 1818 | |.if FPU |
1819 | |. ldc1 FARG1, 0(TMP2) | ||
1820 | |.else | ||
1821 | |. lw SFARG2LO, LO(TMP2) | ||
1822 | |.endif | ||
1823 | |7: | ||
1824 | |.if FPU | ||
1825 | | c.olt.d FRET1, FARG1 | ||
1826 | | fpins FRET1, FARG1 | ||
1827 | |.else | ||
1828 | | bal ->vm_sfcmpolt | ||
1587 | |. nop | 1829 | |. nop |
1830 | | intins SFARG1LO, SFARG2LO, CRET1 | ||
1831 | | intins SFARG1HI, SFARG2HI, CRET1 | ||
1832 | |.endif | ||
1833 | | b <6 | ||
1834 | |. addiu TMP2, TMP2, 8 | ||
1835 | | | ||
1836 | |8: // Convert integer to number and continue with number loop. | ||
1837 | | bne SFARG2HI, TISNUM, ->fff_fallback | ||
1838 | |.if FPU | ||
1839 | |. lwc1 FARG1, LO(TMP2) | ||
1840 | | b <7 | ||
1841 | |. cvt.d.w FARG1, FARG1 | ||
1842 | |.else | ||
1843 | |. nop | ||
1844 | | bal ->vm_sfi2d_2 | ||
1845 | |. nop | ||
1846 | | b <7 | ||
1847 | |. nop | ||
1848 | |.endif | ||
1849 | | | ||
1588 | |.endmacro | 1850 | |.endmacro |
1589 | | | 1851 | | |
1590 | | math_minmax math_min, 0 | 1852 | | math_minmax math_min, movz, movf.d |
1591 | | math_minmax math_max, 1 | 1853 | | math_minmax math_max, movn, movt.d |
1592 | | | 1854 | | |
1593 | |//-- String library ----------------------------------------------------- | 1855 | |//-- String library ----------------------------------------------------- |
1594 | | | 1856 | | |
1595 | |.ffunc_1 string_len | ||
1596 | | li AT, LJ_TSTR | ||
1597 | | bne CARG3, AT, ->fff_fallback | ||
1598 | |. nop | ||
1599 | | b ->fff_resi | ||
1600 | |. lw CRET1, STR:CARG1->len | ||
1601 | | | ||
1602 | |.ffunc string_byte // Only handle the 1-arg case here. | 1857 | |.ffunc string_byte // Only handle the 1-arg case here. |
1603 | | lw CARG3, HI(BASE) | 1858 | | lw CARG3, HI(BASE) |
1604 | | lw STR:CARG1, LO(BASE) | 1859 | | lw STR:CARG1, LO(BASE) |
@@ -1608,33 +1863,31 @@ static void build_subroutines(BuildCtx *ctx) | |||
1608 | | bnez AT, ->fff_fallback // Need exactly 1 string argument. | 1863 | | bnez AT, ->fff_fallback // Need exactly 1 string argument. |
1609 | |. nop | 1864 | |. nop |
1610 | | lw TMP0, STR:CARG1->len | 1865 | | lw TMP0, STR:CARG1->len |
1611 | | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end). | ||
1612 | | addiu RA, BASE, -8 | 1866 | | addiu RA, BASE, -8 |
1867 | | lw PC, FRAME_PC(BASE) | ||
1613 | | sltu RD, r0, TMP0 | 1868 | | sltu RD, r0, TMP0 |
1614 | | mtc1 TMP1, f0 | 1869 | | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end). |
1615 | | addiu RD, RD, 1 | 1870 | | addiu RD, RD, 1 |
1616 | | cvt.d.w f0, f0 | ||
1617 | | lw PC, FRAME_PC(BASE) | ||
1618 | | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8 | 1871 | | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8 |
1872 | | sw TISNUM, HI(RA) | ||
1619 | | b ->fff_res | 1873 | | b ->fff_res |
1620 | |. sdc1 f0, 0(RA) | 1874 | |. sw TMP1, LO(RA) |
1621 | | | 1875 | | |
1622 | |.ffunc string_char // Only handle the 1-arg case here. | 1876 | |.ffunc string_char // Only handle the 1-arg case here. |
1623 | | ffgccheck | 1877 | | ffgccheck |
1624 | |. nop | 1878 | |. nop |
1625 | | lw CARG3, HI(BASE) | 1879 | | lw CARG3, HI(BASE) |
1626 | | ldc1 FARG1, 0(BASE) | 1880 | | lw CARG1, LO(BASE) |
1627 | | li AT, 8 | 1881 | | li TMP1, 255 |
1628 | | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. | 1882 | | xori AT, NARGS8:RC, 8 // Exactly 1 argument. |
1629 | |. sltiu AT, CARG3, LJ_TISNUM | 1883 | | xor TMP0, CARG3, TISNUM // Integer. |
1630 | | beqz AT, ->fff_fallback | 1884 | | sltu TMP1, TMP1, CARG1 // !(255 < n). |
1885 | | or AT, AT, TMP0 | ||
1886 | | or AT, AT, TMP1 | ||
1887 | | bnez AT, ->fff_fallback | ||
1631 | |. li CARG3, 1 | 1888 | |. li CARG3, 1 |
1632 | | cvt.w.d FARG1, FARG1 | ||
1633 | | addiu CARG2, sp, ARG5_OFS | 1889 | | addiu CARG2, sp, ARG5_OFS |
1634 | | sltiu AT, TMP0, 256 | 1890 | | sb CARG1, ARG5 |
1635 | | mfc1 TMP0, FARG1 | ||
1636 | | beqz AT, ->fff_fallback | ||
1637 | |. sw TMP0, ARG5 | ||
1638 | |->fff_newstr: | 1891 | |->fff_newstr: |
1639 | | load_got lj_str_new | 1892 | | load_got lj_str_new |
1640 | | sw BASE, L->base | 1893 | | sw BASE, L->base |
@@ -1643,35 +1896,30 @@ static void build_subroutines(BuildCtx *ctx) | |||
1643 | |. move CARG1, L | 1896 | |. move CARG1, L |
1644 | | // Returns GCstr *. | 1897 | | // Returns GCstr *. |
1645 | | lw BASE, L->base | 1898 | | lw BASE, L->base |
1646 | | move CARG1, CRET1 | 1899 | |->fff_resstr: |
1900 | | move SFARG1LO, CRET1 | ||
1647 | | b ->fff_restv | 1901 | | b ->fff_restv |
1648 | |. li CARG3, LJ_TSTR | 1902 | |. li SFARG1HI, LJ_TSTR |
1649 | | | 1903 | | |
1650 | |.ffunc string_sub | 1904 | |.ffunc string_sub |
1651 | | ffgccheck | 1905 | | ffgccheck |
1652 | |. nop | 1906 | |. nop |
1653 | | addiu AT, NARGS8:RC, -16 | 1907 | | addiu AT, NARGS8:RC, -16 |
1654 | | lw CARG3, 16+HI(BASE) | 1908 | | lw CARG3, 16+HI(BASE) |
1655 | | ldc1 f0, 16(BASE) | ||
1656 | | lw TMP0, HI(BASE) | 1909 | | lw TMP0, HI(BASE) |
1657 | | lw STR:CARG1, LO(BASE) | 1910 | | lw STR:CARG1, LO(BASE) |
1658 | | bltz AT, ->fff_fallback | 1911 | | bltz AT, ->fff_fallback |
1659 | | lw CARG2, 8+HI(BASE) | 1912 | |. lw CARG2, 8+HI(BASE) |
1660 | | ldc1 f2, 8(BASE) | ||
1661 | | beqz AT, >1 | 1913 | | beqz AT, >1 |
1662 | |. li CARG4, -1 | 1914 | |. li CARG4, -1 |
1663 | | cvt.w.d f0, f0 | 1915 | | bne CARG3, TISNUM, ->fff_fallback |
1664 | | sltiu AT, CARG3, LJ_TISNUM | 1916 | |. lw CARG4, 16+LO(BASE) |
1665 | | beqz AT, ->fff_fallback | ||
1666 | |. mfc1 CARG4, f0 | ||
1667 | |1: | 1917 | |1: |
1668 | | sltiu AT, CARG2, LJ_TISNUM | 1918 | | bne CARG2, TISNUM, ->fff_fallback |
1669 | | beqz AT, ->fff_fallback | ||
1670 | |. li AT, LJ_TSTR | 1919 | |. li AT, LJ_TSTR |
1671 | | cvt.w.d f2, f2 | ||
1672 | | bne TMP0, AT, ->fff_fallback | 1920 | | bne TMP0, AT, ->fff_fallback |
1673 | |. lw CARG2, STR:CARG1->len | 1921 | |. lw CARG3, 8+LO(BASE) |
1674 | | mfc1 CARG3, f2 | 1922 | | lw CARG2, STR:CARG1->len |
1675 | | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end | 1923 | | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end |
1676 | | slt AT, CARG4, r0 | 1924 | | slt AT, CARG4, r0 |
1677 | | addiu TMP0, CARG2, 1 | 1925 | | addiu TMP0, CARG2, 1 |
@@ -1693,139 +1941,130 @@ static void build_subroutines(BuildCtx *ctx) | |||
1693 | | bgez CARG3, ->fff_newstr | 1941 | | bgez CARG3, ->fff_newstr |
1694 | |. addiu CARG3, CARG3, 1 // len++ | 1942 | |. addiu CARG3, CARG3, 1 // len++ |
1695 | |->fff_emptystr: // Return empty string. | 1943 | |->fff_emptystr: // Return empty string. |
1696 | | addiu STR:CARG1, DISPATCH, DISPATCH_GL(strempty) | 1944 | | addiu STR:SFARG1LO, DISPATCH, DISPATCH_GL(strempty) |
1697 | | b ->fff_restv | 1945 | | b ->fff_restv |
1698 | |. li CARG3, LJ_TSTR | 1946 | |. li SFARG1HI, LJ_TSTR |
1699 | | | ||
1700 | |.ffunc string_rep // Only handle the 1-char case inline. | ||
1701 | | ffgccheck | ||
1702 | |. nop | ||
1703 | | lw TMP0, HI(BASE) | ||
1704 | | addiu AT, NARGS8:RC, -16 // Exactly 2 arguments. | ||
1705 | | lw CARG4, 8+HI(BASE) | ||
1706 | | lw STR:CARG1, LO(BASE) | ||
1707 | | addiu TMP0, TMP0, -LJ_TSTR | ||
1708 | | ldc1 f0, 8(BASE) | ||
1709 | | or AT, AT, TMP0 | ||
1710 | | bnez AT, ->fff_fallback | ||
1711 | |. sltiu AT, CARG4, LJ_TISNUM | ||
1712 | | cvt.w.d f0, f0 | ||
1713 | | beqz AT, ->fff_fallback | ||
1714 | |. lw TMP0, STR:CARG1->len | ||
1715 | | mfc1 CARG3, f0 | ||
1716 | | lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) | ||
1717 | | li AT, 1 | ||
1718 | | blez CARG3, ->fff_emptystr // Count <= 0? | ||
1719 | |. sltu AT, AT, TMP0 | ||
1720 | | beqz TMP0, ->fff_emptystr // Zero length string? | ||
1721 | |. sltu TMP0, TMP1, CARG3 | ||
1722 | | or AT, AT, TMP0 | ||
1723 | | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) | ||
1724 | | bnez AT, ->fff_fallback // Fallback for > 1-char strings. | ||
1725 | |. lbu TMP0, STR:CARG1[1] | ||
1726 | | addu TMP2, CARG2, CARG3 | ||
1727 | |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?). | ||
1728 | | addiu TMP2, TMP2, -1 | ||
1729 | | sltu AT, CARG2, TMP2 | ||
1730 | | bnez AT, <1 | ||
1731 | |. sb TMP0, 0(TMP2) | ||
1732 | | b ->fff_newstr | ||
1733 | |. nop | ||
1734 | | | ||
1735 | |.ffunc string_reverse | ||
1736 | | ffgccheck | ||
1737 | |. nop | ||
1738 | | lw CARG3, HI(BASE) | ||
1739 | | lw STR:CARG1, LO(BASE) | ||
1740 | | beqz NARGS8:RC, ->fff_fallback | ||
1741 | |. li AT, LJ_TSTR | ||
1742 | | bne CARG3, AT, ->fff_fallback | ||
1743 | |. lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) | ||
1744 | | lw CARG3, STR:CARG1->len | ||
1745 | | addiu CARG1, STR:CARG1, #STR | ||
1746 | | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) | ||
1747 | | sltu AT, TMP1, CARG3 | ||
1748 | | bnez AT, ->fff_fallback | ||
1749 | |. addu TMP3, CARG1, CARG3 | ||
1750 | | addu CARG4, CARG2, CARG3 | ||
1751 | |1: // Reverse string copy. | ||
1752 | | lbu TMP1, 0(CARG1) | ||
1753 | | sltu AT, CARG1, TMP3 | ||
1754 | | beqz AT, ->fff_newstr | ||
1755 | |. addiu CARG1, CARG1, 1 | ||
1756 | | addiu CARG4, CARG4, -1 | ||
1757 | | b <1 | ||
1758 | | sb TMP1, 0(CARG4) | ||
1759 | | | 1947 | | |
1760 | |.macro ffstring_case, name, lo | 1948 | |.macro ffstring_op, name |
1761 | | .ffunc name | 1949 | | .ffunc string_ .. name |
1762 | | ffgccheck | 1950 | | ffgccheck |
1763 | |. nop | 1951 | |. nop |
1764 | | lw CARG3, HI(BASE) | 1952 | | lw CARG3, HI(BASE) |
1765 | | lw STR:CARG1, LO(BASE) | 1953 | | lw STR:CARG2, LO(BASE) |
1766 | | beqz NARGS8:RC, ->fff_fallback | 1954 | | beqz NARGS8:RC, ->fff_fallback |
1767 | |. li AT, LJ_TSTR | 1955 | |. li AT, LJ_TSTR |
1768 | | bne CARG3, AT, ->fff_fallback | 1956 | | bne CARG3, AT, ->fff_fallback |
1769 | |. lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) | 1957 | |. addiu SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf) |
1770 | | lw CARG3, STR:CARG1->len | 1958 | | load_got lj_buf_putstr_ .. name |
1771 | | addiu CARG1, STR:CARG1, #STR | 1959 | | lw TMP0, SBUF:CARG1->b |
1772 | | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) | 1960 | | sw L, SBUF:CARG1->L |
1773 | | sltu AT, TMP1, CARG3 | 1961 | | sw BASE, L->base |
1774 | | bnez AT, ->fff_fallback | 1962 | | sw TMP0, SBUF:CARG1->p |
1775 | |. addu TMP3, CARG1, CARG3 | 1963 | | call_intern extern lj_buf_putstr_ .. name |
1776 | | move CARG4, CARG2 | 1964 | |. sw PC, SAVE_PC |
1777 | |1: // ASCII case conversion. | 1965 | | load_got lj_buf_tostr |
1778 | | lbu TMP1, 0(CARG1) | 1966 | | call_intern lj_buf_tostr |
1779 | | sltu AT, CARG1, TMP3 | 1967 | |. move SBUF:CARG1, SBUF:CRET1 |
1780 | | beqz AT, ->fff_newstr | 1968 | | b ->fff_resstr |
1781 | |. addiu TMP0, TMP1, -lo | 1969 | |. lw BASE, L->base |
1782 | | xori TMP2, TMP1, 0x20 | ||
1783 | | sltiu AT, TMP0, 26 | ||
1784 | | movn TMP1, TMP2, AT | ||
1785 | | addiu CARG1, CARG1, 1 | ||
1786 | | sb TMP1, 0(CARG4) | ||
1787 | | b <1 | ||
1788 | |. addiu CARG4, CARG4, 1 | ||
1789 | |.endmacro | 1970 | |.endmacro |
1790 | | | 1971 | | |
1791 | |ffstring_case string_lower, 65 | 1972 | |ffstring_op reverse |
1792 | |ffstring_case string_upper, 97 | 1973 | |ffstring_op lower |
1974 | |ffstring_op upper | ||
1793 | | | 1975 | | |
1794 | |//-- Table library ------------------------------------------------------ | 1976 | |//-- Bit library -------------------------------------------------------- |
1795 | | | 1977 | | |
1796 | |.ffunc_1 table_getn | 1978 | |->vm_tobit_fb: |
1797 | | li AT, LJ_TTAB | 1979 | | beqz TMP1, ->fff_fallback |
1798 | | bne CARG3, AT, ->fff_fallback | 1980 | |.if FPU |
1799 | |. load_got lj_tab_len | 1981 | |. ldc1 FARG1, 0(BASE) |
1800 | | call_intern lj_tab_len // (GCtab *t) | 1982 | | add.d FARG1, FARG1, TOBIT |
1801 | |. nop | 1983 | | jr ra |
1802 | | // Returns uint32_t (but less than 2^31). | 1984 | |. mfc1 CRET1, FARG1 |
1803 | | b ->fff_resi | 1985 | |.else |
1986 | |// FP number to bit conversion for soft-float. | ||
1987 | |->vm_tobit: | ||
1988 | | sll TMP0, SFARG1HI, 1 | ||
1989 | | lui AT, 0x0020 | ||
1990 | | addu TMP0, TMP0, AT | ||
1991 | | slt AT, TMP0, r0 | ||
1992 | | movz SFARG1LO, r0, AT | ||
1993 | | beqz AT, >2 | ||
1994 | |. li TMP1, 0x3e0 | ||
1995 | | not TMP1, TMP1 | ||
1996 | | sra TMP0, TMP0, 21 | ||
1997 | | subu TMP0, TMP1, TMP0 | ||
1998 | | slt AT, TMP0, r0 | ||
1999 | | bnez AT, >1 | ||
2000 | |. sll TMP1, SFARG1HI, 11 | ||
2001 | | lui AT, 0x8000 | ||
2002 | | or TMP1, TMP1, AT | ||
2003 | | srl AT, SFARG1LO, 21 | ||
2004 | | or TMP1, TMP1, AT | ||
2005 | | slt AT, SFARG1HI, r0 | ||
2006 | | beqz AT, >2 | ||
2007 | |. srlv SFARG1LO, TMP1, TMP0 | ||
2008 | | subu SFARG1LO, r0, SFARG1LO | ||
2009 | |2: | ||
2010 | | jr ra | ||
2011 | |. move CRET1, SFARG1LO | ||
2012 | |1: | ||
2013 | | addiu TMP0, TMP0, 21 | ||
2014 | | srlv TMP1, SFARG1LO, TMP0 | ||
2015 | | li AT, 20 | ||
2016 | | subu TMP0, AT, TMP0 | ||
2017 | | sll SFARG1LO, SFARG1HI, 12 | ||
2018 | | sllv AT, SFARG1LO, TMP0 | ||
2019 | | or SFARG1LO, TMP1, AT | ||
2020 | | slt AT, SFARG1HI, r0 | ||
2021 | | beqz AT, <2 | ||
1804 | |. nop | 2022 | |. nop |
1805 | | | 2023 | | jr ra |
1806 | |//-- Bit library -------------------------------------------------------- | 2024 | |. subu CRET1, r0, SFARG1LO |
2025 | |.endif | ||
1807 | | | 2026 | | |
1808 | |.macro .ffunc_bit, name | 2027 | |.macro .ffunc_bit, name |
1809 | | .ffunc_n bit_..name | 2028 | | .ffunc_1 bit_..name |
1810 | |. add.d FARG1, FARG1, TOBIT | 2029 | | beq SFARG1HI, TISNUM, >6 |
1811 | | mfc1 CRET1, FARG1 | 2030 | |. move CRET1, SFARG1LO |
2031 | | bal ->vm_tobit_fb | ||
2032 | |. sltu TMP1, SFARG1HI, TISNUM | ||
2033 | |6: | ||
1812 | |.endmacro | 2034 | |.endmacro |
1813 | | | 2035 | | |
1814 | |.macro .ffunc_bit_op, name, ins | 2036 | |.macro .ffunc_bit_op, name, ins |
1815 | | .ffunc_bit name | 2037 | | .ffunc_bit name |
1816 | | addiu TMP1, BASE, 8 | 2038 | | addiu TMP2, BASE, 8 |
1817 | | addu TMP2, BASE, NARGS8:RC | 2039 | | addu TMP3, BASE, NARGS8:RC |
1818 | |1: | 2040 | |1: |
1819 | | lw CARG4, HI(TMP1) | 2041 | | lw SFARG1HI, HI(TMP2) |
1820 | | beq TMP1, TMP2, ->fff_resi | 2042 | | beq TMP2, TMP3, ->fff_resi |
1821 | |. ldc1 FARG1, 0(TMP1) | 2043 | |. lw SFARG1LO, LO(TMP2) |
1822 | | sltiu AT, CARG4, LJ_TISNUM | 2044 | |.if FPU |
1823 | | beqz AT, ->fff_fallback | 2045 | | bne SFARG1HI, TISNUM, >2 |
1824 | | add.d FARG1, FARG1, TOBIT | 2046 | |. addiu TMP2, TMP2, 8 |
1825 | | mfc1 CARG2, FARG1 | ||
1826 | | ins CRET1, CRET1, CARG2 | ||
1827 | | b <1 | 2047 | | b <1 |
1828 | |. addiu TMP1, TMP1, 8 | 2048 | |. ins CRET1, CRET1, SFARG1LO |
2049 | |2: | ||
2050 | | ldc1 FARG1, -8(TMP2) | ||
2051 | | sltu TMP1, SFARG1HI, TISNUM | ||
2052 | | beqz TMP1, ->fff_fallback | ||
2053 | |. add.d FARG1, FARG1, TOBIT | ||
2054 | | mfc1 SFARG1LO, FARG1 | ||
2055 | | b <1 | ||
2056 | |. ins CRET1, CRET1, SFARG1LO | ||
2057 | |.else | ||
2058 | | beq SFARG1HI, TISNUM, >2 | ||
2059 | |. move CRET2, CRET1 | ||
2060 | | bal ->vm_tobit_fb | ||
2061 | |. sltu TMP1, SFARG1HI, TISNUM | ||
2062 | | move SFARG1LO, CRET2 | ||
2063 | |2: | ||
2064 | | ins CRET1, CRET1, SFARG1LO | ||
2065 | | b <1 | ||
2066 | |. addiu TMP2, TMP2, 8 | ||
2067 | |.endif | ||
1829 | |.endmacro | 2068 | |.endmacro |
1830 | | | 2069 | | |
1831 | |.ffunc_bit_op band, and | 2070 | |.ffunc_bit_op band, and |
@@ -1849,24 +2088,28 @@ static void build_subroutines(BuildCtx *ctx) | |||
1849 | |. not CRET1, CRET1 | 2088 | |. not CRET1, CRET1 |
1850 | | | 2089 | | |
1851 | |.macro .ffunc_bit_sh, name, ins, shmod | 2090 | |.macro .ffunc_bit_sh, name, ins, shmod |
1852 | | .ffunc_nn bit_..name | 2091 | | .ffunc_2 bit_..name |
1853 | |. add.d FARG1, FARG1, TOBIT | 2092 | | beq SFARG1HI, TISNUM, >1 |
1854 | | add.d FARG2, FARG2, TOBIT | 2093 | |. nop |
1855 | | mfc1 CARG1, FARG1 | 2094 | | bal ->vm_tobit_fb |
1856 | | mfc1 CARG2, FARG2 | 2095 | |. sltu TMP1, SFARG1HI, TISNUM |
2096 | | move SFARG1LO, CRET1 | ||
2097 | |1: | ||
2098 | | bne SFARG2HI, TISNUM, ->fff_fallback | ||
2099 | |. nop | ||
1857 | |.if shmod == 1 | 2100 | |.if shmod == 1 |
1858 | | li AT, 32 | 2101 | | li AT, 32 |
1859 | | subu TMP0, AT, CARG2 | 2102 | | subu TMP0, AT, SFARG2LO |
1860 | | sllv CARG2, CARG1, CARG2 | 2103 | | sllv SFARG2LO, SFARG1LO, SFARG2LO |
1861 | | srlv CARG1, CARG1, TMP0 | 2104 | | srlv SFARG1LO, SFARG1LO, TMP0 |
1862 | |.elif shmod == 2 | 2105 | |.elif shmod == 2 |
1863 | | li AT, 32 | 2106 | | li AT, 32 |
1864 | | subu TMP0, AT, CARG2 | 2107 | | subu TMP0, AT, SFARG2LO |
1865 | | srlv CARG2, CARG1, CARG2 | 2108 | | srlv SFARG2LO, SFARG1LO, SFARG2LO |
1866 | | sllv CARG1, CARG1, TMP0 | 2109 | | sllv SFARG1LO, SFARG1LO, TMP0 |
1867 | |.endif | 2110 | |.endif |
1868 | | b ->fff_resi | 2111 | | b ->fff_resi |
1869 | |. ins CRET1, CARG1, CARG2 | 2112 | |. ins CRET1, SFARG1LO, SFARG2LO |
1870 | |.endmacro | 2113 | |.endmacro |
1871 | | | 2114 | | |
1872 | |.ffunc_bit_sh lshift, sllv, 0 | 2115 | |.ffunc_bit_sh lshift, sllv, 0 |
@@ -1878,9 +2121,11 @@ static void build_subroutines(BuildCtx *ctx) | |||
1878 | | | 2121 | | |
1879 | |.ffunc_bit tobit | 2122 | |.ffunc_bit tobit |
1880 | |->fff_resi: | 2123 | |->fff_resi: |
1881 | | mtc1 CRET1, FRET1 | 2124 | | lw PC, FRAME_PC(BASE) |
1882 | | b ->fff_resn | 2125 | | addiu RA, BASE, -8 |
1883 | |. cvt.d.w FRET1, FRET1 | 2126 | | sw TISNUM, -8+HI(BASE) |
2127 | | b ->fff_res1 | ||
2128 | |. sw CRET1, -8+LO(BASE) | ||
1884 | | | 2129 | | |
1885 | |//----------------------------------------------------------------------- | 2130 | |//----------------------------------------------------------------------- |
1886 | | | 2131 | | |
@@ -2067,19 +2312,96 @@ static void build_subroutines(BuildCtx *ctx) | |||
2067 | | jr CRET1 | 2312 | | jr CRET1 |
2068 | |. lw INS, -4(PC) | 2313 | |. lw INS, -4(PC) |
2069 | | | 2314 | | |
2315 | |->cont_stitch: // Trace stitching. | ||
2316 | |.if JIT | ||
2317 | | // RA = resultptr, RB = meta base | ||
2318 | | lw INS, -4(PC) | ||
2319 | | lw TMP2, -24+LO(RB) // Save previous trace. | ||
2320 | | decode_RA8a RC, INS | ||
2321 | | addiu AT, MULTRES, -8 | ||
2322 | | decode_RA8b RC | ||
2323 | | beqz AT, >2 | ||
2324 | |. addu RC, BASE, RC // Call base. | ||
2325 | |1: // Move results down. | ||
2326 | | lw SFRETHI, HI(RA) | ||
2327 | | lw SFRETLO, LO(RA) | ||
2328 | | addiu AT, AT, -8 | ||
2329 | | addiu RA, RA, 8 | ||
2330 | | sw SFRETHI, HI(RC) | ||
2331 | | sw SFRETLO, LO(RC) | ||
2332 | | bnez AT, <1 | ||
2333 | |. addiu RC, RC, 8 | ||
2334 | |2: | ||
2335 | | decode_RA8a RA, INS | ||
2336 | | decode_RB8a RB, INS | ||
2337 | | decode_RA8b RA | ||
2338 | | decode_RB8b RB | ||
2339 | | addu RA, RA, RB | ||
2340 | | addu RA, BASE, RA | ||
2341 | |3: | ||
2342 | | sltu AT, RC, RA | ||
2343 | | bnez AT, >9 // More results wanted? | ||
2344 | |. nop | ||
2345 | | | ||
2346 | | lhu TMP3, TRACE:TMP2->traceno | ||
2347 | | lhu RD, TRACE:TMP2->link | ||
2348 | | beq RD, TMP3, ->cont_nop // Blacklisted. | ||
2349 | |. load_got lj_dispatch_stitch | ||
2350 | | bnez RD, =>BC_JLOOP // Jump to stitched trace. | ||
2351 | |. sll RD, RD, 3 | ||
2352 | | | ||
2353 | | // Stitch a new trace to the previous trace. | ||
2354 | | sw TMP3, DISPATCH_J(exitno)(DISPATCH) | ||
2355 | | sw L, DISPATCH_J(L)(DISPATCH) | ||
2356 | | sw BASE, L->base | ||
2357 | | addiu CARG1, DISPATCH, GG_DISP2J | ||
2358 | | call_intern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) | ||
2359 | |. move CARG2, PC | ||
2360 | | b ->cont_nop | ||
2361 | |. lw BASE, L->base | ||
2362 | | | ||
2363 | |9: | ||
2364 | | sw TISNIL, HI(RC) | ||
2365 | | b <3 | ||
2366 | |. addiu RC, RC, 8 | ||
2367 | |.endif | ||
2368 | | | ||
2369 | |->vm_profhook: // Dispatch target for profiler hook. | ||
2370 | #if LJ_HASPROFILE | ||
2371 | | load_got lj_dispatch_profile | ||
2372 | | sw MULTRES, SAVE_MULTRES | ||
2373 | | move CARG2, PC | ||
2374 | | sw BASE, L->base | ||
2375 | | call_intern lj_dispatch_profile // (lua_State *L, const BCIns *pc) | ||
2376 | |. move CARG1, L | ||
2377 | | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. | ||
2378 | | addiu PC, PC, -4 | ||
2379 | | b ->cont_nop | ||
2380 | |. lw BASE, L->base | ||
2381 | #endif | ||
2382 | | | ||
2070 | |//----------------------------------------------------------------------- | 2383 | |//----------------------------------------------------------------------- |
2071 | |//-- Trace exit handler ------------------------------------------------- | 2384 | |//-- Trace exit handler ------------------------------------------------- |
2072 | |//----------------------------------------------------------------------- | 2385 | |//----------------------------------------------------------------------- |
2073 | | | 2386 | | |
2074 | |.macro savex_, a, b | 2387 | |.macro savex_, a, b |
2388 | |.if FPU | ||
2075 | | sdc1 f..a, 16+a*8(sp) | 2389 | | sdc1 f..a, 16+a*8(sp) |
2076 | | sw r..a, 16+32*8+a*4(sp) | 2390 | | sw r..a, 16+32*8+a*4(sp) |
2077 | | sw r..b, 16+32*8+b*4(sp) | 2391 | | sw r..b, 16+32*8+b*4(sp) |
2392 | |.else | ||
2393 | | sw r..a, 16+a*4(sp) | ||
2394 | | sw r..b, 16+b*4(sp) | ||
2395 | |.endif | ||
2078 | |.endmacro | 2396 | |.endmacro |
2079 | | | 2397 | | |
2080 | |->vm_exit_handler: | 2398 | |->vm_exit_handler: |
2081 | |.if JIT | 2399 | |.if JIT |
2400 | |.if FPU | ||
2082 | | addiu sp, sp, -(16+32*8+32*4) | 2401 | | addiu sp, sp, -(16+32*8+32*4) |
2402 | |.else | ||
2403 | | addiu sp, sp, -(16+32*4) | ||
2404 | |.endif | ||
2083 | | savex_ 0, 1 | 2405 | | savex_ 0, 1 |
2084 | | savex_ 2, 3 | 2406 | | savex_ 2, 3 |
2085 | | savex_ 4, 5 | 2407 | | savex_ 4, 5 |
@@ -2094,25 +2416,34 @@ static void build_subroutines(BuildCtx *ctx) | |||
2094 | | savex_ 22, 23 | 2416 | | savex_ 22, 23 |
2095 | | savex_ 24, 25 | 2417 | | savex_ 24, 25 |
2096 | | savex_ 26, 27 | 2418 | | savex_ 26, 27 |
2419 | |.if FPU | ||
2097 | | sdc1 f28, 16+28*8(sp) | 2420 | | sdc1 f28, 16+28*8(sp) |
2098 | | sw r28, 16+32*8+28*4(sp) | ||
2099 | | sdc1 f30, 16+30*8(sp) | 2421 | | sdc1 f30, 16+30*8(sp) |
2422 | | sw r28, 16+32*8+28*4(sp) | ||
2100 | | sw r30, 16+32*8+30*4(sp) | 2423 | | sw r30, 16+32*8+30*4(sp) |
2101 | | sw r0, 16+32*8+31*4(sp) // Clear RID_TMP. | 2424 | | sw r0, 16+32*8+31*4(sp) // Clear RID_TMP. |
2425 | | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp. | ||
2426 | | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP | ||
2427 | |.else | ||
2428 | | sw r28, 16+28*4(sp) | ||
2429 | | sw r30, 16+30*4(sp) | ||
2430 | | sw r0, 16+31*4(sp) // Clear RID_TMP. | ||
2431 | | addiu TMP2, sp, 16+32*4 // Recompute original value of sp. | ||
2432 | | sw TMP2, 16+29*4(sp) // Store sp in RID_SP | ||
2433 | |.endif | ||
2102 | | li_vmstate EXIT | 2434 | | li_vmstate EXIT |
2103 | | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp. | ||
2104 | | addiu DISPATCH, JGL, -GG_DISP2G-32768 | 2435 | | addiu DISPATCH, JGL, -GG_DISP2G-32768 |
2105 | | lw TMP1, 0(TMP2) // Load exit number. | 2436 | | lw TMP1, 0(TMP2) // Load exit number. |
2106 | | st_vmstate | 2437 | | st_vmstate |
2107 | | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP. | 2438 | | lw L, DISPATCH_GL(cur_L)(DISPATCH) |
2108 | | lw L, DISPATCH_GL(jit_L)(DISPATCH) | 2439 | | lw BASE, DISPATCH_GL(jit_base)(DISPATCH) |
2109 | | lw BASE, DISPATCH_GL(jit_base)(DISPATCH) | ||
2110 | | load_got lj_trace_exit | 2440 | | load_got lj_trace_exit |
2111 | | sw L, DISPATCH_J(L)(DISPATCH) | 2441 | | sw L, DISPATCH_J(L)(DISPATCH) |
2112 | | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number. | 2442 | | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number. |
2443 | | sw BASE, L->base | ||
2113 | | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number. | 2444 | | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number. |
2114 | | addiu CARG1, DISPATCH, GG_DISP2J | 2445 | | addiu CARG1, DISPATCH, GG_DISP2J |
2115 | | sw BASE, L->base | 2446 | | sw r0, DISPATCH_GL(jit_base)(DISPATCH) |
2116 | | call_intern lj_trace_exit // (jit_State *J, ExitState *ex) | 2447 | | call_intern lj_trace_exit // (jit_State *J, ExitState *ex) |
2117 | |. addiu CARG2, sp, 16 | 2448 | |. addiu CARG2, sp, 16 |
2118 | | // Returns MULTRES (unscaled) or negated error code. | 2449 | | // Returns MULTRES (unscaled) or negated error code. |
@@ -2128,19 +2459,21 @@ static void build_subroutines(BuildCtx *ctx) | |||
2128 | |.if JIT | 2459 | |.if JIT |
2129 | | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set. | 2460 | | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set. |
2130 | | lw L, SAVE_L | 2461 | | lw L, SAVE_L |
2131 | | addiu DISPATCH, JGL, -GG_DISP2G-32768 | 2462 | | addiu DISPATCH, JGL, -GG_DISP2G-32768 |
2463 | | sw BASE, L->base | ||
2132 | |1: | 2464 | |1: |
2133 | | bltz CRET1, >3 // Check for error from exit. | 2465 | | bltz CRET1, >9 // Check for error from exit. |
2134 | |. lw LFUNC:TMP1, FRAME_FUNC(BASE) | 2466 | |. lw LFUNC:RB, FRAME_FUNC(BASE) |
2135 | | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | 2467 | | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). |
2136 | | sll MULTRES, CRET1, 3 | 2468 | | sll MULTRES, CRET1, 3 |
2137 | | li TISNIL, LJ_TNIL | 2469 | | li TISNIL, LJ_TNIL |
2470 | | li TISNUM, LJ_TISNUM // Setup type comparison constants. | ||
2138 | | sw MULTRES, SAVE_MULTRES | 2471 | | sw MULTRES, SAVE_MULTRES |
2139 | | mtc1 TMP3, TOBIT | 2472 | | .FPU mtc1 TMP3, TOBIT |
2140 | | lw TMP1, LFUNC:TMP1->pc | 2473 | | lw TMP1, LFUNC:RB->pc |
2141 | | sw r0, DISPATCH_GL(jit_L)(DISPATCH) | 2474 | | sw r0, DISPATCH_GL(jit_base)(DISPATCH) |
2142 | | lw KBASE, PC2PROTO(k)(TMP1) | 2475 | | lw KBASE, PC2PROTO(k)(TMP1) |
2143 | | cvt.d.s TOBIT, TOBIT | 2476 | | .FPU cvt.d.s TOBIT, TOBIT |
2144 | | // Modified copy of ins_next which handles function header dispatch, too. | 2477 | | // Modified copy of ins_next which handles function header dispatch, too. |
2145 | | lw INS, 0(PC) | 2478 | | lw INS, 0(PC) |
2146 | | addiu PC, PC, 4 | 2479 | | addiu PC, PC, 4 |
@@ -2148,7 +2481,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2148 | | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) | 2481 | | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) |
2149 | | decode_OP4a TMP1, INS | 2482 | | decode_OP4a TMP1, INS |
2150 | | decode_OP4b TMP1 | 2483 | | decode_OP4b TMP1 |
2151 | | sltiu TMP2, TMP1, BC_FUNCF*4 // Function header? | 2484 | | sltiu TMP2, TMP1, BC_FUNCF*4 |
2152 | | addu TMP0, DISPATCH, TMP1 | 2485 | | addu TMP0, DISPATCH, TMP1 |
2153 | | decode_RD8a RD, INS | 2486 | | decode_RD8a RD, INS |
2154 | | lw AT, 0(TMP0) | 2487 | | lw AT, 0(TMP0) |
@@ -2158,11 +2491,27 @@ static void build_subroutines(BuildCtx *ctx) | |||
2158 | | jr AT | 2491 | | jr AT |
2159 | |. decode_RD8b RD | 2492 | |. decode_RD8b RD |
2160 | |2: | 2493 | |2: |
2494 | | sltiu TMP2, TMP1, (BC_FUNCC+2)*4 // Fast function? | ||
2495 | | bnez TMP2, >3 | ||
2496 | |. lw TMP1, FRAME_PC(BASE) | ||
2497 | | // Check frame below fast function. | ||
2498 | | andi TMP0, TMP1, FRAME_TYPE | ||
2499 | | bnez TMP0, >3 // Trace stitching continuation? | ||
2500 | |. nop | ||
2501 | | // Otherwise set KBASE for Lua function below fast function. | ||
2502 | | lw TMP2, -4(TMP1) | ||
2503 | | decode_RA8a TMP0, TMP2 | ||
2504 | | decode_RA8b TMP0 | ||
2505 | | subu TMP1, BASE, TMP0 | ||
2506 | | lw LFUNC:TMP2, -8+FRAME_FUNC(TMP1) | ||
2507 | | lw TMP1, LFUNC:TMP2->pc | ||
2508 | | lw KBASE, PC2PROTO(k)(TMP1) | ||
2509 | |3: | ||
2161 | | addiu RC, MULTRES, -8 | 2510 | | addiu RC, MULTRES, -8 |
2162 | | jr AT | 2511 | | jr AT |
2163 | |. addu RA, RA, BASE | 2512 | |. addu RA, RA, BASE |
2164 | | | 2513 | | |
2165 | |3: // Rethrow error from the right C frame. | 2514 | |9: // Rethrow error from the right C frame. |
2166 | | load_got lj_err_throw | 2515 | | load_got lj_err_throw |
2167 | | negu CARG2, CRET1 | 2516 | | negu CARG2, CRET1 |
2168 | | call_intern lj_err_throw // (lua_State *L, int errcode) | 2517 | | call_intern lj_err_throw // (lua_State *L, int errcode) |
@@ -2173,8 +2522,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
2173 | |//-- Math helper functions ---------------------------------------------- | 2522 | |//-- Math helper functions ---------------------------------------------- |
2174 | |//----------------------------------------------------------------------- | 2523 | |//----------------------------------------------------------------------- |
2175 | | | 2524 | | |
2525 | |// Hard-float round to integer. | ||
2176 | |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1. | 2526 | |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1. |
2177 | |.macro vm_round, func | 2527 | |.macro vm_round_hf, func |
2178 | | lui TMP0, 0x4330 // Hiword of 2^52 (double). | 2528 | | lui TMP0, 0x4330 // Hiword of 2^52 (double). |
2179 | | mtc1 r0, f4 | 2529 | | mtc1 r0, f4 |
2180 | | mtc1 TMP0, f5 | 2530 | | mtc1 TMP0, f5 |
@@ -2216,6 +2566,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
2216 | |. mov.d FRET1, FARG1 | 2566 | |. mov.d FRET1, FARG1 |
2217 | |.endmacro | 2567 | |.endmacro |
2218 | | | 2568 | | |
2569 | |.macro vm_round, func | ||
2570 | |.if FPU | ||
2571 | | vm_round_hf, func | ||
2572 | |.endif | ||
2573 | |.endmacro | ||
2574 | | | ||
2219 | |->vm_floor: | 2575 | |->vm_floor: |
2220 | | vm_round floor | 2576 | | vm_round floor |
2221 | |->vm_ceil: | 2577 | |->vm_ceil: |
@@ -2225,6 +2581,178 @@ static void build_subroutines(BuildCtx *ctx) | |||
2225 | | vm_round trunc | 2581 | | vm_round trunc |
2226 | |.endif | 2582 | |.endif |
2227 | | | 2583 | | |
2584 | |// Soft-float integer to number conversion. | ||
2585 | |.macro sfi2d, AHI, ALO | ||
2586 | |.if not FPU | ||
2587 | | beqz ALO, >9 // Handle zero first. | ||
2588 | |. sra TMP0, ALO, 31 | ||
2589 | | xor TMP1, ALO, TMP0 | ||
2590 | | subu TMP1, TMP1, TMP0 // Absolute value in TMP1. | ||
2591 | | clz AHI, TMP1 | ||
2592 | | andi TMP0, TMP0, 0x800 // Mask sign bit. | ||
2593 | | li AT, 0x3ff+31-1 | ||
2594 | | sllv TMP1, TMP1, AHI // Align mantissa left with leading 1. | ||
2595 | | subu AHI, AT, AHI // Exponent - 1 in AHI. | ||
2596 | | sll ALO, TMP1, 21 | ||
2597 | | or AHI, AHI, TMP0 // Sign | Exponent. | ||
2598 | | srl TMP1, TMP1, 11 | ||
2599 | | sll AHI, AHI, 20 // Align left. | ||
2600 | | jr ra | ||
2601 | |. addu AHI, AHI, TMP1 // Add mantissa, increment exponent. | ||
2602 | |9: | ||
2603 | | jr ra | ||
2604 | |. li AHI, 0 | ||
2605 | |.endif | ||
2606 | |.endmacro | ||
2607 | | | ||
2608 | |// Input SFARG1LO. Output: SFARG1*. Temporaries: AT, TMP0, TMP1. | ||
2609 | |->vm_sfi2d_1: | ||
2610 | | sfi2d SFARG1HI, SFARG1LO | ||
2611 | | | ||
2612 | |// Input SFARG2LO. Output: SFARG2*. Temporaries: AT, TMP0, TMP1. | ||
2613 | |->vm_sfi2d_2: | ||
2614 | | sfi2d SFARG2HI, SFARG2LO | ||
2615 | | | ||
2616 | |// Soft-float comparison. Equivalent to c.eq.d. | ||
2617 | |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1. | ||
2618 | |->vm_sfcmpeq: | ||
2619 | |.if not FPU | ||
2620 | | sll AT, SFARG1HI, 1 | ||
2621 | | sll TMP0, SFARG2HI, 1 | ||
2622 | | or CRET1, SFARG1LO, SFARG2LO | ||
2623 | | or TMP1, AT, TMP0 | ||
2624 | | or TMP1, TMP1, CRET1 | ||
2625 | | beqz TMP1, >8 // Both args +-0: return 1. | ||
2626 | |. sltu CRET1, r0, SFARG1LO | ||
2627 | | lui TMP1, 0xffe0 | ||
2628 | | addu AT, AT, CRET1 | ||
2629 | | sltu CRET1, r0, SFARG2LO | ||
2630 | | sltu AT, TMP1, AT | ||
2631 | | addu TMP0, TMP0, CRET1 | ||
2632 | | sltu TMP0, TMP1, TMP0 | ||
2633 | | or TMP1, AT, TMP0 | ||
2634 | | bnez TMP1, >9 // Either arg is NaN: return 0; | ||
2635 | |. xor TMP0, SFARG1HI, SFARG2HI | ||
2636 | | xor TMP1, SFARG1LO, SFARG2LO | ||
2637 | | or AT, TMP0, TMP1 | ||
2638 | | jr ra | ||
2639 | |. sltiu CRET1, AT, 1 // Same values: return 1. | ||
2640 | |8: | ||
2641 | | jr ra | ||
2642 | |. li CRET1, 1 | ||
2643 | |9: | ||
2644 | | jr ra | ||
2645 | |. li CRET1, 0 | ||
2646 | |.endif | ||
2647 | | | ||
2648 | |// Soft-float comparison. Equivalent to c.ult.d and c.olt.d. | ||
2649 | |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1, CRET2. | ||
2650 | |->vm_sfcmpult: | ||
2651 | |.if not FPU | ||
2652 | | b >1 | ||
2653 | |. li CRET2, 1 | ||
2654 | |.endif | ||
2655 | | | ||
2656 | |->vm_sfcmpolt: | ||
2657 | |.if not FPU | ||
2658 | | li CRET2, 0 | ||
2659 | |1: | ||
2660 | | sll AT, SFARG1HI, 1 | ||
2661 | | sll TMP0, SFARG2HI, 1 | ||
2662 | | or CRET1, SFARG1LO, SFARG2LO | ||
2663 | | or TMP1, AT, TMP0 | ||
2664 | | or TMP1, TMP1, CRET1 | ||
2665 | | beqz TMP1, >8 // Both args +-0: return 0. | ||
2666 | |. sltu CRET1, r0, SFARG1LO | ||
2667 | | lui TMP1, 0xffe0 | ||
2668 | | addu AT, AT, CRET1 | ||
2669 | | sltu CRET1, r0, SFARG2LO | ||
2670 | | sltu AT, TMP1, AT | ||
2671 | | addu TMP0, TMP0, CRET1 | ||
2672 | | sltu TMP0, TMP1, TMP0 | ||
2673 | | or TMP1, AT, TMP0 | ||
2674 | | bnez TMP1, >9 // Either arg is NaN: return 0 or 1; | ||
2675 | |. and AT, SFARG1HI, SFARG2HI | ||
2676 | | bltz AT, >5 // Both args negative? | ||
2677 | |. nop | ||
2678 | | beq SFARG1HI, SFARG2HI, >8 | ||
2679 | |. sltu CRET1, SFARG1LO, SFARG2LO | ||
2680 | | jr ra | ||
2681 | |. slt CRET1, SFARG1HI, SFARG2HI | ||
2682 | |5: // Swap conditions if both operands are negative. | ||
2683 | | beq SFARG1HI, SFARG2HI, >8 | ||
2684 | |. sltu CRET1, SFARG2LO, SFARG1LO | ||
2685 | | jr ra | ||
2686 | |. slt CRET1, SFARG2HI, SFARG1HI | ||
2687 | |8: | ||
2688 | | jr ra | ||
2689 | |. nop | ||
2690 | |9: | ||
2691 | | jr ra | ||
2692 | |. move CRET1, CRET2 | ||
2693 | |.endif | ||
2694 | | | ||
2695 | |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a. | ||
2696 | |// Input: SFARG*, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1. | ||
2697 | |->vm_sfcmpolex: | ||
2698 | |.if not FPU | ||
2699 | | sll AT, SFARG1HI, 1 | ||
2700 | | sll TMP0, SFARG2HI, 1 | ||
2701 | | or CRET1, SFARG1LO, SFARG2LO | ||
2702 | | or TMP1, AT, TMP0 | ||
2703 | | or TMP1, TMP1, CRET1 | ||
2704 | | beqz TMP1, >8 // Both args +-0: return 1. | ||
2705 | |. sltu CRET1, r0, SFARG1LO | ||
2706 | | lui TMP1, 0xffe0 | ||
2707 | | addu AT, AT, CRET1 | ||
2708 | | sltu CRET1, r0, SFARG2LO | ||
2709 | | sltu AT, TMP1, AT | ||
2710 | | addu TMP0, TMP0, CRET1 | ||
2711 | | sltu TMP0, TMP1, TMP0 | ||
2712 | | or TMP1, AT, TMP0 | ||
2713 | | bnez TMP1, >9 // Either arg is NaN: return 0; | ||
2714 | |. and AT, SFARG1HI, SFARG2HI | ||
2715 | | xor AT, AT, TMP3 | ||
2716 | | bltz AT, >5 // Both args negative? | ||
2717 | |. nop | ||
2718 | | beq SFARG1HI, SFARG2HI, >6 | ||
2719 | |. sltu CRET1, SFARG2LO, SFARG1LO | ||
2720 | | jr ra | ||
2721 | |. slt CRET1, SFARG2HI, SFARG1HI | ||
2722 | |5: // Swap conditions if both operands are negative. | ||
2723 | | beq SFARG1HI, SFARG2HI, >6 | ||
2724 | |. sltu CRET1, SFARG1LO, SFARG2LO | ||
2725 | | slt CRET1, SFARG1HI, SFARG2HI | ||
2726 | |6: | ||
2727 | | jr ra | ||
2728 | |. nop | ||
2729 | |8: | ||
2730 | | jr ra | ||
2731 | |. li CRET1, 1 | ||
2732 | |9: | ||
2733 | | jr ra | ||
2734 | |. li CRET1, 0 | ||
2735 | |.endif | ||
2736 | | | ||
2737 | |.macro sfmin_max, name, intins | ||
2738 | |->vm_sf .. name: | ||
2739 | |.if JIT and not FPU | ||
2740 | | move TMP2, ra | ||
2741 | | bal ->vm_sfcmpolt | ||
2742 | |. nop | ||
2743 | | move TMP0, CRET1 | ||
2744 | | move SFRETHI, SFARG1HI | ||
2745 | | move SFRETLO, SFARG1LO | ||
2746 | | move ra, TMP2 | ||
2747 | | intins SFRETHI, SFARG2HI, TMP0 | ||
2748 | | jr ra | ||
2749 | |. intins SFRETLO, SFARG2LO, TMP0 | ||
2750 | |.endif | ||
2751 | |.endmacro | ||
2752 | | | ||
2753 | | sfmin_max min, movz | ||
2754 | | sfmin_max max, movn | ||
2755 | | | ||
2228 | |//----------------------------------------------------------------------- | 2756 | |//----------------------------------------------------------------------- |
2229 | |//-- Miscellaneous functions -------------------------------------------- | 2757 | |//-- Miscellaneous functions -------------------------------------------- |
2230 | |//----------------------------------------------------------------------- | 2758 | |//----------------------------------------------------------------------- |
@@ -2244,10 +2772,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
2244 | | sw r1, CTSTATE->cb.slot | 2772 | | sw r1, CTSTATE->cb.slot |
2245 | | sw CARG1, CTSTATE->cb.gpr[0] | 2773 | | sw CARG1, CTSTATE->cb.gpr[0] |
2246 | | sw CARG2, CTSTATE->cb.gpr[1] | 2774 | | sw CARG2, CTSTATE->cb.gpr[1] |
2247 | | sdc1 FARG1, CTSTATE->cb.fpr[0] | 2775 | | .FPU sdc1 FARG1, CTSTATE->cb.fpr[0] |
2248 | | sw CARG3, CTSTATE->cb.gpr[2] | 2776 | | sw CARG3, CTSTATE->cb.gpr[2] |
2249 | | sw CARG4, CTSTATE->cb.gpr[3] | 2777 | | sw CARG4, CTSTATE->cb.gpr[3] |
2250 | | sdc1 FARG2, CTSTATE->cb.fpr[1] | 2778 | | .FPU sdc1 FARG2, CTSTATE->cb.fpr[1] |
2251 | | addiu TMP0, sp, CFRAME_SPACE+16 | 2779 | | addiu TMP0, sp, CFRAME_SPACE+16 |
2252 | | sw TMP0, CTSTATE->cb.stack | 2780 | | sw TMP0, CTSTATE->cb.stack |
2253 | | sw r0, SAVE_PC // Any value outside of bytecode is ok. | 2781 | | sw r0, SAVE_PC // Any value outside of bytecode is ok. |
@@ -2257,15 +2785,16 @@ static void build_subroutines(BuildCtx *ctx) | |||
2257 | | // Returns lua_State *. | 2785 | | // Returns lua_State *. |
2258 | | lw BASE, L:CRET1->base | 2786 | | lw BASE, L:CRET1->base |
2259 | | lw RC, L:CRET1->top | 2787 | | lw RC, L:CRET1->top |
2788 | | li TISNUM, LJ_TISNUM // Setup type comparison constants. | ||
2260 | | move L, CRET1 | 2789 | | move L, CRET1 |
2261 | | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | 2790 | | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). |
2262 | | lw LFUNC:RB, FRAME_FUNC(BASE) | 2791 | | lw LFUNC:RB, FRAME_FUNC(BASE) |
2263 | | mtc1 TMP3, TOBIT | 2792 | | .FPU mtc1 TMP3, TOBIT |
2264 | | li_vmstate INTERP | 2793 | | li_vmstate INTERP |
2265 | | li TISNIL, LJ_TNIL | 2794 | | li TISNIL, LJ_TNIL |
2266 | | subu RC, RC, BASE | 2795 | | subu RC, RC, BASE |
2267 | | st_vmstate | 2796 | | st_vmstate |
2268 | | cvt.d.s TOBIT, TOBIT | 2797 | | .FPU cvt.d.s TOBIT, TOBIT |
2269 | | ins_callt | 2798 | | ins_callt |
2270 | |.endif | 2799 | |.endif |
2271 | | | 2800 | | |
@@ -2279,11 +2808,11 @@ static void build_subroutines(BuildCtx *ctx) | |||
2279 | | move CARG2, RA | 2808 | | move CARG2, RA |
2280 | | call_intern lj_ccallback_leave // (CTState *cts, TValue *o) | 2809 | | call_intern lj_ccallback_leave // (CTState *cts, TValue *o) |
2281 | |. move CARG1, CTSTATE | 2810 | |. move CARG1, CTSTATE |
2811 | | .FPU ldc1 FRET1, CTSTATE->cb.fpr[0] | ||
2282 | | lw CRET1, CTSTATE->cb.gpr[0] | 2812 | | lw CRET1, CTSTATE->cb.gpr[0] |
2283 | | ldc1 FRET1, CTSTATE->cb.fpr[0] | 2813 | | .FPU ldc1 FRET2, CTSTATE->cb.fpr[1] |
2284 | | lw CRET2, CTSTATE->cb.gpr[1] | ||
2285 | | b ->vm_leave_unw | 2814 | | b ->vm_leave_unw |
2286 | |. ldc1 FRET2, CTSTATE->cb.fpr[1] | 2815 | |. lw CRET2, CTSTATE->cb.gpr[1] |
2287 | |.endif | 2816 | |.endif |
2288 | | | 2817 | | |
2289 | |->vm_ffi_call: // Call C function via FFI. | 2818 | |->vm_ffi_call: // Call C function via FFI. |
@@ -2315,8 +2844,8 @@ static void build_subroutines(BuildCtx *ctx) | |||
2315 | | lw CARG2, CCSTATE->gpr[1] | 2844 | | lw CARG2, CCSTATE->gpr[1] |
2316 | | lw CARG3, CCSTATE->gpr[2] | 2845 | | lw CARG3, CCSTATE->gpr[2] |
2317 | | lw CARG4, CCSTATE->gpr[3] | 2846 | | lw CARG4, CCSTATE->gpr[3] |
2318 | | ldc1 FARG1, CCSTATE->fpr[0] | 2847 | | .FPU ldc1 FARG1, CCSTATE->fpr[0] |
2319 | | ldc1 FARG2, CCSTATE->fpr[1] | 2848 | | .FPU ldc1 FARG2, CCSTATE->fpr[1] |
2320 | | jalr CFUNCADDR | 2849 | | jalr CFUNCADDR |
2321 | |. lw CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. | 2850 | |. lw CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. |
2322 | | lw CCSTATE:TMP1, -12(r16) | 2851 | | lw CCSTATE:TMP1, -12(r16) |
@@ -2324,8 +2853,13 @@ static void build_subroutines(BuildCtx *ctx) | |||
2324 | | lw ra, -4(r16) | 2853 | | lw ra, -4(r16) |
2325 | | sw CRET1, CCSTATE:TMP1->gpr[0] | 2854 | | sw CRET1, CCSTATE:TMP1->gpr[0] |
2326 | | sw CRET2, CCSTATE:TMP1->gpr[1] | 2855 | | sw CRET2, CCSTATE:TMP1->gpr[1] |
2856 | |.if FPU | ||
2327 | | sdc1 FRET1, CCSTATE:TMP1->fpr[0] | 2857 | | sdc1 FRET1, CCSTATE:TMP1->fpr[0] |
2328 | | sdc1 FRET2, CCSTATE:TMP1->fpr[1] | 2858 | | sdc1 FRET2, CCSTATE:TMP1->fpr[1] |
2859 | |.else | ||
2860 | | sw CARG1, CCSTATE:TMP1->gpr[2] // Soft-float: complex double .im part. | ||
2861 | | sw CARG2, CCSTATE:TMP1->gpr[3] | ||
2862 | |.endif | ||
2329 | | move sp, r16 | 2863 | | move sp, r16 |
2330 | | jr ra | 2864 | | jr ra |
2331 | |. move r16, TMP2 | 2865 | |. move r16, TMP2 |
@@ -2349,82 +2883,143 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2349 | 2883 | ||
2350 | case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: | 2884 | case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: |
2351 | | // RA = src1*8, RD = src2*8, JMP with RD = target | 2885 | | // RA = src1*8, RD = src2*8, JMP with RD = target |
2352 | | addu CARG2, BASE, RA | 2886 | |.macro bc_comp, FRA, FRD, RAHI, RALO, RDHI, RDLO, movop, fmovop, fcomp, sfcomp |
2353 | | addu CARG3, BASE, RD | 2887 | | addu RA, BASE, RA |
2354 | | lw TMP0, HI(CARG2) | 2888 | | addu RD, BASE, RD |
2355 | | lw TMP1, HI(CARG3) | 2889 | | lw RAHI, HI(RA) |
2356 | | ldc1 f0, 0(CARG2) | 2890 | | lw RDHI, HI(RD) |
2357 | | ldc1 f2, 0(CARG3) | ||
2358 | | sltiu TMP0, TMP0, LJ_TISNUM | ||
2359 | | sltiu TMP1, TMP1, LJ_TISNUM | ||
2360 | | lhu TMP2, OFS_RD(PC) | 2891 | | lhu TMP2, OFS_RD(PC) |
2361 | | and TMP0, TMP0, TMP1 | ||
2362 | | addiu PC, PC, 4 | 2892 | | addiu PC, PC, 4 |
2363 | | beqz TMP0, ->vmeta_comp | 2893 | | bne RAHI, TISNUM, >2 |
2364 | |. lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535) | 2894 | |. lw RALO, LO(RA) |
2365 | | decode_RD4b TMP2 | 2895 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) |
2366 | | addu TMP2, TMP2, TMP1 | 2896 | | lw RDLO, LO(RD) |
2367 | if (op == BC_ISLT || op == BC_ISGE) { | 2897 | | bne RDHI, TISNUM, >5 |
2368 | | c.olt.d f0, f2 | 2898 | |. decode_RD4b TMP2 |
2369 | } else { | 2899 | | slt AT, SFARG1LO, SFARG2LO |
2370 | | c.ole.d f0, f2 | 2900 | | addu TMP2, TMP2, TMP3 |
2371 | } | 2901 | | movop TMP2, r0, AT |
2372 | if (op == BC_ISLT || op == BC_ISLE) { | ||
2373 | | movf TMP2, r0 | ||
2374 | } else { | ||
2375 | | movt TMP2, r0 | ||
2376 | } | ||
2377 | | addu PC, PC, TMP2 | ||
2378 | |1: | 2902 | |1: |
2903 | | addu PC, PC, TMP2 | ||
2379 | | ins_next | 2904 | | ins_next |
2905 | | | ||
2906 | |2: // RA is not an integer. | ||
2907 | | sltiu AT, RAHI, LJ_TISNUM | ||
2908 | | beqz AT, ->vmeta_comp | ||
2909 | |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | ||
2910 | | sltiu AT, RDHI, LJ_TISNUM | ||
2911 | |.if FPU | ||
2912 | | ldc1 FRA, 0(RA) | ||
2913 | | ldc1 FRD, 0(RD) | ||
2914 | |.else | ||
2915 | | lw RDLO, LO(RD) | ||
2916 | |.endif | ||
2917 | | beqz AT, >4 | ||
2918 | |. decode_RD4b TMP2 | ||
2919 | |3: // RA and RD are both numbers. | ||
2920 | |.if FPU | ||
2921 | | fcomp f20, f22 | ||
2922 | | addu TMP2, TMP2, TMP3 | ||
2923 | | b <1 | ||
2924 | |. fmovop TMP2, r0 | ||
2925 | |.else | ||
2926 | | bal sfcomp | ||
2927 | |. addu TMP2, TMP2, TMP3 | ||
2928 | | b <1 | ||
2929 | |. movop TMP2, r0, CRET1 | ||
2930 | |.endif | ||
2931 | | | ||
2932 | |4: // RA is a number, RD is not a number. | ||
2933 | | bne RDHI, TISNUM, ->vmeta_comp | ||
2934 | | // RA is a number, RD is an integer. Convert RD to a number. | ||
2935 | |.if FPU | ||
2936 | |. lwc1 FRD, LO(RD) | ||
2937 | | b <3 | ||
2938 | |. cvt.d.w FRD, FRD | ||
2939 | |.else | ||
2940 | |. nop | ||
2941 | |.if "RDHI" == "SFARG1HI" | ||
2942 | | bal ->vm_sfi2d_1 | ||
2943 | |.else | ||
2944 | | bal ->vm_sfi2d_2 | ||
2945 | |.endif | ||
2946 | |. nop | ||
2947 | | b <3 | ||
2948 | |. nop | ||
2949 | |.endif | ||
2950 | | | ||
2951 | |5: // RA is an integer, RD is not an integer | ||
2952 | | sltiu AT, RDHI, LJ_TISNUM | ||
2953 | | beqz AT, ->vmeta_comp | ||
2954 | | // RA is an integer, RD is a number. Convert RA to a number. | ||
2955 | |.if FPU | ||
2956 | |. mtc1 RALO, FRA | ||
2957 | | ldc1 FRD, 0(RD) | ||
2958 | | b <3 | ||
2959 | | cvt.d.w FRA, FRA | ||
2960 | |.else | ||
2961 | |. nop | ||
2962 | |.if "RAHI" == "SFARG1HI" | ||
2963 | | bal ->vm_sfi2d_1 | ||
2964 | |.else | ||
2965 | | bal ->vm_sfi2d_2 | ||
2966 | |.endif | ||
2967 | |. nop | ||
2968 | | b <3 | ||
2969 | |. nop | ||
2970 | |.endif | ||
2971 | |.endmacro | ||
2972 | | | ||
2973 | if (op == BC_ISLT) { | ||
2974 | | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movz, movf, c.olt.d, ->vm_sfcmpolt | ||
2975 | } else if (op == BC_ISGE) { | ||
2976 | | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movn, movt, c.olt.d, ->vm_sfcmpolt | ||
2977 | } else if (op == BC_ISLE) { | ||
2978 | | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movn, movt, c.ult.d, ->vm_sfcmpult | ||
2979 | } else { | ||
2980 | | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movz, movf, c.ult.d, ->vm_sfcmpult | ||
2981 | } | ||
2380 | break; | 2982 | break; |
2381 | 2983 | ||
2382 | case BC_ISEQV: case BC_ISNEV: | 2984 | case BC_ISEQV: case BC_ISNEV: |
2383 | vk = op == BC_ISEQV; | 2985 | vk = op == BC_ISEQV; |
2384 | | // RA = src1*8, RD = src2*8, JMP with RD = target | 2986 | | // RA = src1*8, RD = src2*8, JMP with RD = target |
2385 | | addu RA, BASE, RA | 2987 | | addu RA, BASE, RA |
2386 | | addiu PC, PC, 4 | 2988 | | addiu PC, PC, 4 |
2387 | | lw TMP0, HI(RA) | ||
2388 | | ldc1 f0, 0(RA) | ||
2389 | | addu RD, BASE, RD | 2989 | | addu RD, BASE, RD |
2990 | | lw SFARG1HI, HI(RA) | ||
2390 | | lhu TMP2, -4+OFS_RD(PC) | 2991 | | lhu TMP2, -4+OFS_RD(PC) |
2391 | | lw TMP1, HI(RD) | 2992 | | lw SFARG2HI, HI(RD) |
2392 | | ldc1 f2, 0(RD) | ||
2393 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | 2993 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) |
2394 | | sltiu AT, TMP0, LJ_TISNUM | 2994 | | sltu AT, TISNUM, SFARG1HI |
2395 | | sltiu CARG1, TMP1, LJ_TISNUM | 2995 | | sltu TMP0, TISNUM, SFARG2HI |
2396 | | decode_RD4b TMP2 | 2996 | | or AT, AT, TMP0 |
2397 | | and AT, AT, CARG1 | ||
2398 | | beqz AT, >5 | ||
2399 | |. addu TMP2, TMP2, TMP3 | ||
2400 | | c.eq.d f0, f2 | ||
2401 | if (vk) { | 2997 | if (vk) { |
2402 | | movf TMP2, r0 | 2998 | | beqz AT, ->BC_ISEQN_Z |
2403 | } else { | 2999 | } else { |
2404 | | movt TMP2, r0 | 3000 | | beqz AT, ->BC_ISNEN_Z |
2405 | } | 3001 | } |
2406 | |1: | 3002 | |. decode_RD4b TMP2 |
2407 | | addu PC, PC, TMP2 | 3003 | | // Either or both types are not numbers. |
2408 | | ins_next | 3004 | | lw SFARG1LO, LO(RA) |
2409 | |5: // Either or both types are not numbers. | 3005 | | lw SFARG2LO, LO(RD) |
2410 | | lw CARG2, LO(RA) | 3006 | | addu TMP2, TMP2, TMP3 |
2411 | | lw CARG3, LO(RD) | ||
2412 | |.if FFI | 3007 | |.if FFI |
2413 | | li TMP3, LJ_TCDATA | 3008 | | li TMP3, LJ_TCDATA |
2414 | | beq TMP0, TMP3, ->vmeta_equal_cd | 3009 | | beq SFARG1HI, TMP3, ->vmeta_equal_cd |
2415 | |.endif | 3010 | |.endif |
2416 | |. sltiu AT, TMP0, LJ_TISPRI // Not a primitive? | 3011 | |. sltiu AT, SFARG1HI, LJ_TISPRI // Not a primitive? |
2417 | |.if FFI | 3012 | |.if FFI |
2418 | | beq TMP1, TMP3, ->vmeta_equal_cd | 3013 | | beq SFARG2HI, TMP3, ->vmeta_equal_cd |
2419 | |.endif | 3014 | |.endif |
2420 | |. xor TMP3, CARG2, CARG3 // Same tv? | 3015 | |. xor TMP3, SFARG1LO, SFARG2LO // Same tv? |
2421 | | xor TMP1, TMP1, TMP0 // Same type? | 3016 | | xor SFARG2HI, SFARG2HI, SFARG1HI // Same type? |
2422 | | sltiu CARG1, TMP0, LJ_TISTABUD+1 // Table or userdata? | 3017 | | sltiu TMP0, SFARG1HI, LJ_TISTABUD+1 // Table or userdata? |
2423 | | movz TMP3, r0, AT // Ignore tv if primitive. | 3018 | | movz TMP3, r0, AT // Ignore tv if primitive. |
2424 | | movn CARG1, r0, TMP1 // Tab/ud and same type? | 3019 | | movn TMP0, r0, SFARG2HI // Tab/ud and same type? |
2425 | | or AT, TMP1, TMP3 // Same type && (pri||same tv). | 3020 | | or AT, SFARG2HI, TMP3 // Same type && (pri||same tv). |
2426 | | movz CARG1, r0, AT | 3021 | | movz TMP0, r0, AT |
2427 | | beqz CARG1, <1 // Done if not tab/ud or not same type or same tv. | 3022 | | beqz TMP0, >1 // Done if not tab/ud or not same type or same tv. |
2428 | if (vk) { | 3023 | if (vk) { |
2429 | |. movn TMP2, r0, AT | 3024 | |. movn TMP2, r0, AT |
2430 | } else { | 3025 | } else { |
@@ -2432,15 +3027,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2432 | } | 3027 | } |
2433 | | // Different tables or userdatas. Need to check __eq metamethod. | 3028 | | // Different tables or userdatas. Need to check __eq metamethod. |
2434 | | // Field metatable must be at same offset for GCtab and GCudata! | 3029 | | // Field metatable must be at same offset for GCtab and GCudata! |
2435 | | lw TAB:TMP1, TAB:CARG2->metatable | 3030 | | lw TAB:TMP1, TAB:SFARG1LO->metatable |
2436 | | beqz TAB:TMP1, <1 // No metatable? | 3031 | | beqz TAB:TMP1, >1 // No metatable? |
2437 | |. nop | 3032 | |. nop |
2438 | | lbu TMP1, TAB:TMP1->nomm | 3033 | | lbu TMP1, TAB:TMP1->nomm |
2439 | | andi TMP1, TMP1, 1<<MM_eq | 3034 | | andi TMP1, TMP1, 1<<MM_eq |
2440 | | bnez TMP1, <1 // Or 'no __eq' flag set? | 3035 | | bnez TMP1, >1 // Or 'no __eq' flag set? |
2441 | |. nop | 3036 | |. nop |
2442 | | b ->vmeta_equal // Handle __eq metamethod. | 3037 | | b ->vmeta_equal // Handle __eq metamethod. |
2443 | |. li CARG4, 1-vk // ne = 0 or 1. | 3038 | |. li TMP0, 1-vk // ne = 0 or 1. |
3039 | |1: | ||
3040 | | addu PC, PC, TMP2 | ||
3041 | | ins_next | ||
2444 | break; | 3042 | break; |
2445 | 3043 | ||
2446 | case BC_ISEQS: case BC_ISNES: | 3044 | case BC_ISEQS: case BC_ISNES: |
@@ -2477,38 +3075,124 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2477 | vk = op == BC_ISEQN; | 3075 | vk = op == BC_ISEQN; |
2478 | | // RA = src*8, RD = num_const*8, JMP with RD = target | 3076 | | // RA = src*8, RD = num_const*8, JMP with RD = target |
2479 | | addu RA, BASE, RA | 3077 | | addu RA, BASE, RA |
2480 | | addiu PC, PC, 4 | 3078 | | addu RD, KBASE, RD |
2481 | | lw TMP0, HI(RA) | 3079 | | lw SFARG1HI, HI(RA) |
2482 | | ldc1 f0, 0(RA) | 3080 | | lw SFARG2HI, HI(RD) |
2483 | | addu RD, KBASE, RD | 3081 | | lhu TMP2, OFS_RD(PC) |
2484 | | lhu TMP2, -4+OFS_RD(PC) | 3082 | | addiu PC, PC, 4 |
2485 | | ldc1 f2, 0(RD) | ||
2486 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | 3083 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) |
2487 | | sltiu AT, TMP0, LJ_TISNUM | ||
2488 | | decode_RD4b TMP2 | 3084 | | decode_RD4b TMP2 |
2489 | |.if FFI | ||
2490 | | beqz AT, >5 | ||
2491 | |.else | ||
2492 | | beqz AT, >1 | ||
2493 | |.endif | ||
2494 | |. addu TMP2, TMP2, TMP3 | ||
2495 | | c.eq.d f0, f2 | ||
2496 | if (vk) { | 3085 | if (vk) { |
2497 | | movf TMP2, r0 | 3086 | |->BC_ISEQN_Z: |
2498 | | addu PC, PC, TMP2 | 3087 | } else { |
3088 | |->BC_ISNEN_Z: | ||
3089 | } | ||
3090 | | bne SFARG1HI, TISNUM, >3 | ||
3091 | |. lw SFARG1LO, LO(RA) | ||
3092 | | lw SFARG2LO, LO(RD) | ||
3093 | | addu TMP2, TMP2, TMP3 | ||
3094 | | bne SFARG2HI, TISNUM, >6 | ||
3095 | |. xor AT, SFARG1LO, SFARG2LO | ||
3096 | if (vk) { | ||
3097 | | movn TMP2, r0, AT | ||
2499 | |1: | 3098 | |1: |
3099 | | addu PC, PC, TMP2 | ||
3100 | |2: | ||
2500 | } else { | 3101 | } else { |
2501 | | movt TMP2, r0 | 3102 | | movz TMP2, r0, AT |
2502 | |1: | 3103 | |1: |
3104 | |2: | ||
2503 | | addu PC, PC, TMP2 | 3105 | | addu PC, PC, TMP2 |
2504 | } | 3106 | } |
2505 | | ins_next | 3107 | | ins_next |
3108 | | | ||
3109 | |3: // RA is not an integer. | ||
3110 | | sltiu AT, SFARG1HI, LJ_TISNUM | ||
2506 | |.if FFI | 3111 | |.if FFI |
2507 | |5: | 3112 | | beqz AT, >8 |
2508 | | li AT, LJ_TCDATA | 3113 | |.else |
2509 | | beq TMP0, AT, ->vmeta_equal_cd | 3114 | | beqz AT, <2 |
3115 | |.endif | ||
3116 | |. addu TMP2, TMP2, TMP3 | ||
3117 | | sltiu AT, SFARG2HI, LJ_TISNUM | ||
3118 | |.if FPU | ||
3119 | | ldc1 f20, 0(RA) | ||
3120 | | ldc1 f22, 0(RD) | ||
3121 | |.endif | ||
3122 | | beqz AT, >5 | ||
3123 | |. lw SFARG2LO, LO(RD) | ||
3124 | |4: // RA and RD are both numbers. | ||
3125 | |.if FPU | ||
3126 | | c.eq.d f20, f22 | ||
3127 | | b <1 | ||
3128 | if (vk) { | ||
3129 | |. movf TMP2, r0 | ||
3130 | } else { | ||
3131 | |. movt TMP2, r0 | ||
3132 | } | ||
3133 | |.else | ||
3134 | | bal ->vm_sfcmpeq | ||
2510 | |. nop | 3135 | |. nop |
2511 | | b <1 | 3136 | | b <1 |
3137 | if (vk) { | ||
3138 | |. movz TMP2, r0, CRET1 | ||
3139 | } else { | ||
3140 | |. movn TMP2, r0, CRET1 | ||
3141 | } | ||
3142 | |.endif | ||
3143 | | | ||
3144 | |5: // RA is a number, RD is not a number. | ||
3145 | |.if FFI | ||
3146 | | bne SFARG2HI, TISNUM, >9 | ||
3147 | |.else | ||
3148 | | bne SFARG2HI, TISNUM, <2 | ||
3149 | |.endif | ||
3150 | | // RA is a number, RD is an integer. Convert RD to a number. | ||
3151 | |.if FPU | ||
3152 | |. lwc1 f22, LO(RD) | ||
3153 | | b <4 | ||
3154 | |. cvt.d.w f22, f22 | ||
3155 | |.else | ||
3156 | |. nop | ||
3157 | | bal ->vm_sfi2d_2 | ||
3158 | |. nop | ||
3159 | | b <4 | ||
3160 | |. nop | ||
3161 | |.endif | ||
3162 | | | ||
3163 | |6: // RA is an integer, RD is not an integer | ||
3164 | | sltiu AT, SFARG2HI, LJ_TISNUM | ||
3165 | |.if FFI | ||
3166 | | beqz AT, >9 | ||
3167 | |.else | ||
3168 | | beqz AT, <2 | ||
3169 | |.endif | ||
3170 | | // RA is an integer, RD is a number. Convert RA to a number. | ||
3171 | |.if FPU | ||
3172 | |. mtc1 SFARG1LO, f20 | ||
3173 | | ldc1 f22, 0(RD) | ||
3174 | | b <4 | ||
3175 | | cvt.d.w f20, f20 | ||
3176 | |.else | ||
3177 | |. nop | ||
3178 | | bal ->vm_sfi2d_1 | ||
3179 | |. nop | ||
3180 | | b <4 | ||
3181 | |. nop | ||
3182 | |.endif | ||
3183 | | | ||
3184 | |.if FFI | ||
3185 | |8: | ||
3186 | | li AT, LJ_TCDATA | ||
3187 | | bne SFARG1HI, AT, <2 | ||
3188 | |. nop | ||
3189 | | b ->vmeta_equal_cd | ||
3190 | |. nop | ||
3191 | |9: | ||
3192 | | li AT, LJ_TCDATA | ||
3193 | | bne SFARG2HI, AT, <2 | ||
3194 | |. nop | ||
3195 | | b ->vmeta_equal_cd | ||
2512 | |. nop | 3196 | |. nop |
2513 | |.endif | 3197 | |.endif |
2514 | break; | 3198 | break; |
@@ -2560,7 +3244,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2560 | | addu PC, PC, TMP2 | 3244 | | addu PC, PC, TMP2 |
2561 | } else { | 3245 | } else { |
2562 | | sltiu TMP0, TMP0, LJ_TISTRUECOND | 3246 | | sltiu TMP0, TMP0, LJ_TISTRUECOND |
2563 | | ldc1 f0, 0(RD) | 3247 | | lw SFRETHI, HI(RD) |
3248 | | lw SFRETLO, LO(RD) | ||
2564 | if (op == BC_ISTC) { | 3249 | if (op == BC_ISTC) { |
2565 | | beqz TMP0, >1 | 3250 | | beqz TMP0, >1 |
2566 | } else { | 3251 | } else { |
@@ -2570,22 +3255,45 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2570 | | decode_RD4b TMP2 | 3255 | | decode_RD4b TMP2 |
2571 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | 3256 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) |
2572 | | addu TMP2, TMP2, TMP3 | 3257 | | addu TMP2, TMP2, TMP3 |
2573 | | sdc1 f0, 0(RA) | 3258 | | sw SFRETHI, HI(RA) |
3259 | | sw SFRETLO, LO(RA) | ||
2574 | | addu PC, PC, TMP2 | 3260 | | addu PC, PC, TMP2 |
2575 | |1: | 3261 | |1: |
2576 | } | 3262 | } |
2577 | | ins_next | 3263 | | ins_next |
2578 | break; | 3264 | break; |
2579 | 3265 | ||
3266 | case BC_ISTYPE: | ||
3267 | | // RA = src*8, RD = -type*8 | ||
3268 | | addu TMP2, BASE, RA | ||
3269 | | srl TMP1, RD, 3 | ||
3270 | | lw TMP0, HI(TMP2) | ||
3271 | | ins_next1 | ||
3272 | | addu AT, TMP0, TMP1 | ||
3273 | | bnez AT, ->vmeta_istype | ||
3274 | |. ins_next2 | ||
3275 | break; | ||
3276 | case BC_ISNUM: | ||
3277 | | // RA = src*8, RD = -(TISNUM-1)*8 | ||
3278 | | addu TMP2, BASE, RA | ||
3279 | | lw TMP0, HI(TMP2) | ||
3280 | | ins_next1 | ||
3281 | | sltiu AT, TMP0, LJ_TISNUM | ||
3282 | | beqz AT, ->vmeta_istype | ||
3283 | |. ins_next2 | ||
3284 | break; | ||
3285 | |||
2580 | /* -- Unary ops --------------------------------------------------------- */ | 3286 | /* -- Unary ops --------------------------------------------------------- */ |
2581 | 3287 | ||
2582 | case BC_MOV: | 3288 | case BC_MOV: |
2583 | | // RA = dst*8, RD = src*8 | 3289 | | // RA = dst*8, RD = src*8 |
2584 | | addu RD, BASE, RD | 3290 | | addu RD, BASE, RD |
2585 | | addu RA, BASE, RA | 3291 | | addu RA, BASE, RA |
2586 | | ldc1 f0, 0(RD) | 3292 | | lw SFRETHI, HI(RD) |
3293 | | lw SFRETLO, LO(RD) | ||
2587 | | ins_next1 | 3294 | | ins_next1 |
2588 | | sdc1 f0, 0(RA) | 3295 | | sw SFRETHI, HI(RA) |
3296 | | sw SFRETLO, LO(RA) | ||
2589 | | ins_next2 | 3297 | | ins_next2 |
2590 | break; | 3298 | break; |
2591 | case BC_NOT: | 3299 | case BC_NOT: |
@@ -2602,16 +3310,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2602 | break; | 3310 | break; |
2603 | case BC_UNM: | 3311 | case BC_UNM: |
2604 | | // RA = dst*8, RD = src*8 | 3312 | | // RA = dst*8, RD = src*8 |
2605 | | addu CARG3, BASE, RD | 3313 | | addu RB, BASE, RD |
3314 | | lw SFARG1HI, HI(RB) | ||
2606 | | addu RA, BASE, RA | 3315 | | addu RA, BASE, RA |
2607 | | lw TMP0, HI(CARG3) | 3316 | | bne SFARG1HI, TISNUM, >2 |
2608 | | ldc1 f0, 0(CARG3) | 3317 | |. lw SFARG1LO, LO(RB) |
2609 | | sltiu AT, TMP0, LJ_TISNUM | 3318 | | lui TMP1, 0x8000 |
2610 | | beqz AT, ->vmeta_unm | 3319 | | beq SFARG1LO, TMP1, ->vmeta_unm // Meta handler deals with -2^31. |
2611 | |. neg.d f0, f0 | 3320 | |. negu SFARG1LO, SFARG1LO |
3321 | |1: | ||
2612 | | ins_next1 | 3322 | | ins_next1 |
2613 | | sdc1 f0, 0(RA) | 3323 | | sw SFARG1HI, HI(RA) |
3324 | | sw SFARG1LO, LO(RA) | ||
2614 | | ins_next2 | 3325 | | ins_next2 |
3326 | |2: | ||
3327 | | sltiu AT, SFARG1HI, LJ_TISNUM | ||
3328 | | beqz AT, ->vmeta_unm | ||
3329 | |. lui TMP1, 0x8000 | ||
3330 | | b <1 | ||
3331 | |. xor SFARG1HI, SFARG1HI, TMP1 | ||
2615 | break; | 3332 | break; |
2616 | case BC_LEN: | 3333 | case BC_LEN: |
2617 | | // RA = dst*8, RD = src*8 | 3334 | | // RA = dst*8, RD = src*8 |
@@ -2622,12 +3339,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2622 | | li AT, LJ_TSTR | 3339 | | li AT, LJ_TSTR |
2623 | | bne TMP0, AT, >2 | 3340 | | bne TMP0, AT, >2 |
2624 | |. li AT, LJ_TTAB | 3341 | |. li AT, LJ_TTAB |
2625 | | lw CRET1, STR:CARG1->len | 3342 | | lw CRET1, STR:CARG1->len |
2626 | |1: | 3343 | |1: |
2627 | | mtc1 CRET1, f0 | ||
2628 | | cvt.d.w f0, f0 | ||
2629 | | ins_next1 | 3344 | | ins_next1 |
2630 | | sdc1 f0, 0(RA) | 3345 | | sw TISNUM, HI(RA) |
3346 | | sw CRET1, LO(RA) | ||
2631 | | ins_next2 | 3347 | | ins_next2 |
2632 | |2: | 3348 | |2: |
2633 | | bne TMP0, AT, ->vmeta_len | 3349 | | bne TMP0, AT, ->vmeta_len |
@@ -2658,104 +3374,232 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2658 | 3374 | ||
2659 | /* -- Binary ops -------------------------------------------------------- */ | 3375 | /* -- Binary ops -------------------------------------------------------- */ |
2660 | 3376 | ||
2661 | |.macro ins_arithpre | 3377 | |.macro fpmod, a, b, c |
3378 | | bal ->vm_floor // floor(b/c) | ||
3379 | |. div.d FARG1, b, c | ||
3380 | | mul.d a, FRET1, c | ||
3381 | | sub.d a, b, a // b - floor(b/c)*c | ||
3382 | |.endmacro | ||
3383 | |||
3384 | |.macro sfpmod | ||
3385 | | addiu sp, sp, -16 | ||
3386 | | | ||
3387 | | load_got __divdf3 | ||
3388 | | sw SFARG1HI, HI(sp) | ||
3389 | | sw SFARG1LO, LO(sp) | ||
3390 | | sw SFARG2HI, 8+HI(sp) | ||
3391 | | call_extern | ||
3392 | |. sw SFARG2LO, 8+LO(sp) | ||
3393 | | | ||
3394 | | load_got floor | ||
3395 | | move SFARG1HI, SFRETHI | ||
3396 | | call_extern | ||
3397 | |. move SFARG1LO, SFRETLO | ||
3398 | | | ||
3399 | | load_got __muldf3 | ||
3400 | | move SFARG1HI, SFRETHI | ||
3401 | | move SFARG1LO, SFRETLO | ||
3402 | | lw SFARG2HI, 8+HI(sp) | ||
3403 | | call_extern | ||
3404 | |. lw SFARG2LO, 8+LO(sp) | ||
3405 | | | ||
3406 | | load_got __subdf3 | ||
3407 | | lw SFARG1HI, HI(sp) | ||
3408 | | lw SFARG1LO, LO(sp) | ||
3409 | | move SFARG2HI, SFRETHI | ||
3410 | | call_extern | ||
3411 | |. move SFARG2LO, SFRETLO | ||
3412 | | | ||
3413 | | addiu sp, sp, 16 | ||
3414 | |.endmacro | ||
3415 | |||
3416 | |.macro ins_arithpre, label | ||
2662 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); | 3417 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); |
2663 | | decode_RB8a RB, INS | ||
2664 | | decode_RB8b RB | ||
2665 | | decode_RDtoRC8 RC, RD | ||
2666 | | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 | 3418 | | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 |
2667 | ||switch (vk) { | 3419 | ||switch (vk) { |
2668 | ||case 0: | 3420 | ||case 0: |
2669 | | addu CARG3, BASE, RB | 3421 | | decode_RB8a RB, INS |
2670 | | addu CARG4, KBASE, RC | 3422 | | decode_RB8b RB |
2671 | | lw TMP1, HI(CARG3) | 3423 | | decode_RDtoRC8 RC, RD |
2672 | | ldc1 f20, 0(CARG3) | 3424 | | // RA = dst*8, RB = src1*8, RC = num_const*8 |
2673 | | ldc1 f22, 0(CARG4) | 3425 | | addu RB, BASE, RB |
2674 | | sltiu AT, TMP1, LJ_TISNUM | 3426 | |.if "label" ~= "none" |
3427 | | b label | ||
3428 | |.endif | ||
3429 | |. addu RC, KBASE, RC | ||
2675 | || break; | 3430 | || break; |
2676 | ||case 1: | 3431 | ||case 1: |
2677 | | addu CARG4, BASE, RB | 3432 | | decode_RB8a RC, INS |
2678 | | addu CARG3, KBASE, RC | 3433 | | decode_RB8b RC |
2679 | | lw TMP1, HI(CARG4) | 3434 | | decode_RDtoRC8 RB, RD |
2680 | | ldc1 f22, 0(CARG4) | 3435 | | // RA = dst*8, RB = num_const*8, RC = src1*8 |
2681 | | ldc1 f20, 0(CARG3) | 3436 | | addu RC, BASE, RC |
2682 | | sltiu AT, TMP1, LJ_TISNUM | 3437 | |.if "label" ~= "none" |
3438 | | b label | ||
3439 | |.endif | ||
3440 | |. addu RB, KBASE, RB | ||
2683 | || break; | 3441 | || break; |
2684 | ||default: | 3442 | ||default: |
2685 | | addu CARG3, BASE, RB | 3443 | | decode_RB8a RB, INS |
2686 | | addu CARG4, BASE, RC | 3444 | | decode_RB8b RB |
2687 | | lw TMP1, HI(CARG3) | 3445 | | decode_RDtoRC8 RC, RD |
2688 | | lw TMP2, HI(CARG4) | 3446 | | // RA = dst*8, RB = src1*8, RC = src2*8 |
2689 | | ldc1 f20, 0(CARG3) | 3447 | | addu RB, BASE, RB |
2690 | | ldc1 f22, 0(CARG4) | 3448 | |.if "label" ~= "none" |
2691 | | sltiu AT, TMP1, LJ_TISNUM | 3449 | | b label |
2692 | | sltiu TMP0, TMP2, LJ_TISNUM | 3450 | |.endif |
2693 | | and AT, AT, TMP0 | 3451 | |. addu RC, BASE, RC |
2694 | || break; | 3452 | || break; |
2695 | ||} | 3453 | ||} |
2696 | | beqz AT, ->vmeta_arith | ||
2697 | |. addu RA, BASE, RA | ||
2698 | |.endmacro | 3454 | |.endmacro |
2699 | | | 3455 | | |
2700 | |.macro fpmod, a, b, c | 3456 | |.macro ins_arith, intins, fpins, fpcall, label |
2701 | |->BC_MODVN_Z: | 3457 | | ins_arithpre none |
2702 | | bal ->vm_floor // floor(b/c) | ||
2703 | |. div.d FARG1, b, c | ||
2704 | | mul.d a, FRET1, c | ||
2705 | | sub.d a, b, a // b - floor(b/c)*c | ||
2706 | |.endmacro | ||
2707 | | | 3458 | | |
2708 | |.macro ins_arith, ins | 3459 | |.if "label" ~= "none" |
2709 | | ins_arithpre | 3460 | |label: |
2710 | |.if "ins" == "fpmod_" | 3461 | |.endif |
2711 | | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. | 3462 | | |
2712 | |. nop | 3463 | | lw SFARG1HI, HI(RB) |
3464 | | lw SFARG2HI, HI(RC) | ||
3465 | | | ||
3466 | |.if "intins" ~= "div" | ||
3467 | | | ||
3468 | | // Check for two integers. | ||
3469 | | lw SFARG1LO, LO(RB) | ||
3470 | | bne SFARG1HI, TISNUM, >5 | ||
3471 | |. lw SFARG2LO, LO(RC) | ||
3472 | | bne SFARG2HI, TISNUM, >5 | ||
3473 | | | ||
3474 | |.if "intins" == "addu" | ||
3475 | |. intins CRET1, SFARG1LO, SFARG2LO | ||
3476 | | xor TMP1, CRET1, SFARG1LO // ((y^a) & (y^b)) < 0: overflow. | ||
3477 | | xor TMP2, CRET1, SFARG2LO | ||
3478 | | and TMP1, TMP1, TMP2 | ||
3479 | | bltz TMP1, ->vmeta_arith | ||
3480 | |. addu RA, BASE, RA | ||
3481 | |.elif "intins" == "subu" | ||
3482 | |. intins CRET1, SFARG1LO, SFARG2LO | ||
3483 | | xor TMP1, CRET1, SFARG1LO // ((y^a) & (a^b)) < 0: overflow. | ||
3484 | | xor TMP2, SFARG1LO, SFARG2LO | ||
3485 | | and TMP1, TMP1, TMP2 | ||
3486 | | bltz TMP1, ->vmeta_arith | ||
3487 | |. addu RA, BASE, RA | ||
3488 | |.elif "intins" == "mult" | ||
3489 | |. intins SFARG1LO, SFARG2LO | ||
3490 | | mflo CRET1 | ||
3491 | | mfhi TMP2 | ||
3492 | | sra TMP1, CRET1, 31 | ||
3493 | | bne TMP1, TMP2, ->vmeta_arith | ||
3494 | |. addu RA, BASE, RA | ||
2713 | |.else | 3495 | |.else |
2714 | | ins f0, f20, f22 | 3496 | |. load_got lj_vm_modi |
3497 | | beqz SFARG2LO, ->vmeta_arith | ||
3498 | |. addu RA, BASE, RA | ||
3499 | |.if ENDIAN_BE | ||
3500 | | move CARG1, SFARG1LO | ||
3501 | |.endif | ||
3502 | | call_extern | ||
3503 | |. move CARG2, SFARG2LO | ||
3504 | |.endif | ||
3505 | | | ||
3506 | | ins_next1 | ||
3507 | | sw TISNUM, HI(RA) | ||
3508 | | sw CRET1, LO(RA) | ||
3509 | |3: | ||
3510 | | ins_next2 | ||
3511 | | | ||
3512 | |.elif not FPU | ||
3513 | | | ||
3514 | | lw SFARG1LO, LO(RB) | ||
3515 | | lw SFARG2LO, LO(RC) | ||
3516 | | | ||
3517 | |.endif | ||
3518 | | | ||
3519 | |5: // Check for two numbers. | ||
3520 | | .FPU ldc1 f20, 0(RB) | ||
3521 | | sltiu AT, SFARG1HI, LJ_TISNUM | ||
3522 | | sltiu TMP0, SFARG2HI, LJ_TISNUM | ||
3523 | | .FPU ldc1 f22, 0(RC) | ||
3524 | | and AT, AT, TMP0 | ||
3525 | | beqz AT, ->vmeta_arith | ||
3526 | |. addu RA, BASE, RA | ||
3527 | | | ||
3528 | |.if FPU | ||
3529 | | fpins FRET1, f20, f22 | ||
3530 | |.elif "fpcall" == "sfpmod" | ||
3531 | | sfpmod | ||
3532 | |.else | ||
3533 | | load_got fpcall | ||
3534 | | call_extern | ||
3535 | |. nop | ||
3536 | |.endif | ||
3537 | | | ||
2715 | | ins_next1 | 3538 | | ins_next1 |
2716 | | sdc1 f0, 0(RA) | 3539 | |.if not FPU |
3540 | | sw SFRETHI, HI(RA) | ||
3541 | |.endif | ||
3542 | |.if "intins" ~= "div" | ||
3543 | | b <3 | ||
3544 | |.endif | ||
3545 | |.if FPU | ||
3546 | |. sdc1 FRET1, 0(RA) | ||
3547 | |.else | ||
3548 | |. sw SFRETLO, LO(RA) | ||
3549 | |.endif | ||
3550 | |.if "intins" == "div" | ||
2717 | | ins_next2 | 3551 | | ins_next2 |
2718 | |.endif | 3552 | |.endif |
3553 | | | ||
2719 | |.endmacro | 3554 | |.endmacro |
2720 | 3555 | ||
2721 | case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: | 3556 | case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: |
2722 | | ins_arith add.d | 3557 | | ins_arith addu, add.d, __adddf3, none |
2723 | break; | 3558 | break; |
2724 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: | 3559 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: |
2725 | | ins_arith sub.d | 3560 | | ins_arith subu, sub.d, __subdf3, none |
2726 | break; | 3561 | break; |
2727 | case BC_MULVN: case BC_MULNV: case BC_MULVV: | 3562 | case BC_MULVN: case BC_MULNV: case BC_MULVV: |
2728 | | ins_arith mul.d | 3563 | | ins_arith mult, mul.d, __muldf3, none |
3564 | break; | ||
3565 | case BC_DIVVN: | ||
3566 | | ins_arith div, div.d, __divdf3, ->BC_DIVVN_Z | ||
2729 | break; | 3567 | break; |
2730 | case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: | 3568 | case BC_DIVNV: case BC_DIVVV: |
2731 | | ins_arith div.d | 3569 | | ins_arithpre ->BC_DIVVN_Z |
2732 | break; | 3570 | break; |
2733 | case BC_MODVN: | 3571 | case BC_MODVN: |
2734 | | ins_arith fpmod | 3572 | | ins_arith modi, fpmod, sfpmod, ->BC_MODVN_Z |
2735 | break; | 3573 | break; |
2736 | case BC_MODNV: case BC_MODVV: | 3574 | case BC_MODNV: case BC_MODVV: |
2737 | | ins_arith fpmod_ | 3575 | | ins_arithpre ->BC_MODVN_Z |
2738 | break; | 3576 | break; |
2739 | case BC_POW: | 3577 | case BC_POW: |
2740 | | decode_RB8a RB, INS | 3578 | | ins_arithpre none |
2741 | | decode_RB8b RB | 3579 | | lw SFARG1HI, HI(RB) |
2742 | | decode_RDtoRC8 RC, RD | 3580 | | lw SFARG2HI, HI(RC) |
2743 | | addu CARG3, BASE, RB | 3581 | | sltiu AT, SFARG1HI, LJ_TISNUM |
2744 | | addu CARG4, BASE, RC | 3582 | | sltiu TMP0, SFARG2HI, LJ_TISNUM |
2745 | | lw TMP1, HI(CARG3) | ||
2746 | | lw TMP2, HI(CARG4) | ||
2747 | | ldc1 FARG1, 0(CARG3) | ||
2748 | | ldc1 FARG2, 0(CARG4) | ||
2749 | | sltiu AT, TMP1, LJ_TISNUM | ||
2750 | | sltiu TMP0, TMP2, LJ_TISNUM | ||
2751 | | and AT, AT, TMP0 | 3583 | | and AT, AT, TMP0 |
2752 | | load_got pow | 3584 | | load_got pow |
2753 | | beqz AT, ->vmeta_arith | 3585 | | beqz AT, ->vmeta_arith |
2754 | |. addu RA, BASE, RA | 3586 | |. addu RA, BASE, RA |
3587 | |.if FPU | ||
3588 | | ldc1 FARG1, 0(RB) | ||
3589 | | ldc1 FARG2, 0(RC) | ||
3590 | |.else | ||
3591 | | lw SFARG1LO, LO(RB) | ||
3592 | | lw SFARG2LO, LO(RC) | ||
3593 | |.endif | ||
2755 | | call_extern | 3594 | | call_extern |
2756 | |. nop | 3595 | |. nop |
2757 | | ins_next1 | 3596 | | ins_next1 |
3597 | |.if FPU | ||
2758 | | sdc1 FRET1, 0(RA) | 3598 | | sdc1 FRET1, 0(RA) |
3599 | |.else | ||
3600 | | sw SFRETHI, HI(RA) | ||
3601 | | sw SFRETLO, LO(RA) | ||
3602 | |.endif | ||
2759 | | ins_next2 | 3603 | | ins_next2 |
2760 | break; | 3604 | break; |
2761 | 3605 | ||
@@ -2778,10 +3622,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2778 | | bnez CRET1, ->vmeta_binop | 3622 | | bnez CRET1, ->vmeta_binop |
2779 | |. lw BASE, L->base | 3623 | |. lw BASE, L->base |
2780 | | addu RB, BASE, MULTRES | 3624 | | addu RB, BASE, MULTRES |
2781 | | ldc1 f0, 0(RB) | 3625 | | lw SFRETHI, HI(RB) |
3626 | | lw SFRETLO, LO(RB) | ||
2782 | | addu RA, BASE, RA | 3627 | | addu RA, BASE, RA |
2783 | | ins_next1 | 3628 | | ins_next1 |
2784 | | sdc1 f0, 0(RA) // Copy result from RB to RA. | 3629 | | sw SFRETHI, HI(RA) |
3630 | | sw SFRETLO, LO(RA) | ||
2785 | | ins_next2 | 3631 | | ins_next2 |
2786 | break; | 3632 | break; |
2787 | 3633 | ||
@@ -2816,20 +3662,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2816 | case BC_KSHORT: | 3662 | case BC_KSHORT: |
2817 | | // RA = dst*8, RD = int16_literal*8 | 3663 | | // RA = dst*8, RD = int16_literal*8 |
2818 | | sra RD, INS, 16 | 3664 | | sra RD, INS, 16 |
2819 | | mtc1 RD, f0 | ||
2820 | | addu RA, BASE, RA | 3665 | | addu RA, BASE, RA |
2821 | | cvt.d.w f0, f0 | ||
2822 | | ins_next1 | 3666 | | ins_next1 |
2823 | | sdc1 f0, 0(RA) | 3667 | | sw TISNUM, HI(RA) |
3668 | | sw RD, LO(RA) | ||
2824 | | ins_next2 | 3669 | | ins_next2 |
2825 | break; | 3670 | break; |
2826 | case BC_KNUM: | 3671 | case BC_KNUM: |
2827 | | // RA = dst*8, RD = num_const*8 | 3672 | | // RA = dst*8, RD = num_const*8 |
2828 | | addu RD, KBASE, RD | 3673 | | addu RD, KBASE, RD |
2829 | | addu RA, BASE, RA | 3674 | | addu RA, BASE, RA |
2830 | | ldc1 f0, 0(RD) | 3675 | | lw SFRETHI, HI(RD) |
3676 | | lw SFRETLO, LO(RD) | ||
2831 | | ins_next1 | 3677 | | ins_next1 |
2832 | | sdc1 f0, 0(RA) | 3678 | | sw SFRETHI, HI(RA) |
3679 | | sw SFRETLO, LO(RA) | ||
2833 | | ins_next2 | 3680 | | ins_next2 |
2834 | break; | 3681 | break; |
2835 | case BC_KPRI: | 3682 | case BC_KPRI: |
@@ -2865,9 +3712,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2865 | | lw UPVAL:RB, LFUNC:RD->uvptr | 3712 | | lw UPVAL:RB, LFUNC:RD->uvptr |
2866 | | ins_next1 | 3713 | | ins_next1 |
2867 | | lw TMP1, UPVAL:RB->v | 3714 | | lw TMP1, UPVAL:RB->v |
2868 | | ldc1 f0, 0(TMP1) | 3715 | | lw SFRETHI, HI(TMP1) |
3716 | | lw SFRETLO, LO(TMP1) | ||
2869 | | addu RA, BASE, RA | 3717 | | addu RA, BASE, RA |
2870 | | sdc1 f0, 0(RA) | 3718 | | sw SFRETHI, HI(RA) |
3719 | | sw SFRETLO, LO(RA) | ||
2871 | | ins_next2 | 3720 | | ins_next2 |
2872 | break; | 3721 | break; |
2873 | case BC_USETV: | 3722 | case BC_USETV: |
@@ -2876,26 +3725,27 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2876 | | srl RA, RA, 1 | 3725 | | srl RA, RA, 1 |
2877 | | addu RD, BASE, RD | 3726 | | addu RD, BASE, RD |
2878 | | addu RA, RA, LFUNC:RB | 3727 | | addu RA, RA, LFUNC:RB |
2879 | | ldc1 f0, 0(RD) | ||
2880 | | lw UPVAL:RB, LFUNC:RA->uvptr | 3728 | | lw UPVAL:RB, LFUNC:RA->uvptr |
3729 | | lw SFRETHI, HI(RD) | ||
3730 | | lw SFRETLO, LO(RD) | ||
2881 | | lbu TMP3, UPVAL:RB->marked | 3731 | | lbu TMP3, UPVAL:RB->marked |
2882 | | lw CARG2, UPVAL:RB->v | 3732 | | lw CARG2, UPVAL:RB->v |
2883 | | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) | 3733 | | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) |
2884 | | lbu TMP0, UPVAL:RB->closed | 3734 | | lbu TMP0, UPVAL:RB->closed |
2885 | | lw TMP2, HI(RD) | 3735 | | sw SFRETHI, HI(CARG2) |
2886 | | sdc1 f0, 0(CARG2) | 3736 | | sw SFRETLO, LO(CARG2) |
2887 | | li AT, LJ_GC_BLACK|1 | 3737 | | li AT, LJ_GC_BLACK|1 |
2888 | | or TMP3, TMP3, TMP0 | 3738 | | or TMP3, TMP3, TMP0 |
2889 | | beq TMP3, AT, >2 // Upvalue is closed and black? | 3739 | | beq TMP3, AT, >2 // Upvalue is closed and black? |
2890 | |. addiu TMP2, TMP2, -(LJ_TNUMX+1) | 3740 | |. addiu TMP2, SFRETHI, -(LJ_TNUMX+1) |
2891 | |1: | 3741 | |1: |
2892 | | ins_next | 3742 | | ins_next |
2893 | | | 3743 | | |
2894 | |2: // Check if new value is collectable. | 3744 | |2: // Check if new value is collectable. |
2895 | | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1) | 3745 | | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1) |
2896 | | beqz AT, <1 // tvisgcv(v) | 3746 | | beqz AT, <1 // tvisgcv(v) |
2897 | |. lw TMP1, LO(RD) | 3747 | |. nop |
2898 | | lbu TMP3, GCOBJ:TMP1->gch.marked | 3748 | | lbu TMP3, GCOBJ:SFRETLO->gch.marked |
2899 | | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v) | 3749 | | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v) |
2900 | | beqz TMP3, <1 | 3750 | | beqz TMP3, <1 |
2901 | |. load_got lj_gc_barrieruv | 3751 | |. load_got lj_gc_barrieruv |
@@ -2943,11 +3793,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2943 | | srl RA, RA, 1 | 3793 | | srl RA, RA, 1 |
2944 | | addu RD, KBASE, RD | 3794 | | addu RD, KBASE, RD |
2945 | | addu RA, RA, LFUNC:RB | 3795 | | addu RA, RA, LFUNC:RB |
2946 | | ldc1 f0, 0(RD) | 3796 | | lw UPVAL:RB, LFUNC:RA->uvptr |
2947 | | lw UPVAL:RB, LFUNC:RA->uvptr | 3797 | | lw SFRETHI, HI(RD) |
3798 | | lw SFRETLO, LO(RD) | ||
3799 | | lw TMP1, UPVAL:RB->v | ||
2948 | | ins_next1 | 3800 | | ins_next1 |
2949 | | lw TMP1, UPVAL:RB->v | 3801 | | sw SFRETHI, HI(TMP1) |
2950 | | sdc1 f0, 0(TMP1) | 3802 | | sw SFRETLO, LO(TMP1) |
2951 | | ins_next2 | 3803 | | ins_next2 |
2952 | break; | 3804 | break; |
2953 | case BC_USETP: | 3805 | case BC_USETP: |
@@ -2957,10 +3809,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2957 | | srl TMP0, RD, 3 | 3809 | | srl TMP0, RD, 3 |
2958 | | addu RA, RA, LFUNC:RB | 3810 | | addu RA, RA, LFUNC:RB |
2959 | | not TMP0, TMP0 | 3811 | | not TMP0, TMP0 |
2960 | | lw UPVAL:RB, LFUNC:RA->uvptr | 3812 | | lw UPVAL:RB, LFUNC:RA->uvptr |
2961 | | ins_next1 | 3813 | | ins_next1 |
2962 | | lw TMP1, UPVAL:RB->v | 3814 | | lw TMP1, UPVAL:RB->v |
2963 | | sw TMP0, HI(TMP1) | 3815 | | sw TMP0, HI(TMP1) |
2964 | | ins_next2 | 3816 | | ins_next2 |
2965 | break; | 3817 | break; |
2966 | 3818 | ||
@@ -2996,8 +3848,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2996 | | li TMP0, LJ_TFUNC | 3848 | | li TMP0, LJ_TFUNC |
2997 | | ins_next1 | 3849 | | ins_next1 |
2998 | | addu RA, BASE, RA | 3850 | | addu RA, BASE, RA |
2999 | | sw TMP0, HI(RA) | ||
3000 | | sw LFUNC:CRET1, LO(RA) | 3851 | | sw LFUNC:CRET1, LO(RA) |
3852 | | sw TMP0, HI(RA) | ||
3001 | | ins_next2 | 3853 | | ins_next2 |
3002 | break; | 3854 | break; |
3003 | 3855 | ||
@@ -3078,31 +3930,23 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3078 | | lw TMP2, HI(CARG3) | 3930 | | lw TMP2, HI(CARG3) |
3079 | | lw TAB:RB, LO(CARG2) | 3931 | | lw TAB:RB, LO(CARG2) |
3080 | | li AT, LJ_TTAB | 3932 | | li AT, LJ_TTAB |
3081 | | ldc1 f0, 0(CARG3) | ||
3082 | | bne TMP1, AT, ->vmeta_tgetv | 3933 | | bne TMP1, AT, ->vmeta_tgetv |
3083 | |. addu RA, BASE, RA | 3934 | |. addu RA, BASE, RA |
3084 | | sltiu AT, TMP2, LJ_TISNUM | 3935 | | bne TMP2, TISNUM, >5 |
3085 | | beqz AT, >5 | 3936 | |. lw RC, LO(CARG3) |
3086 | |. li AT, LJ_TSTR | 3937 | | lw TMP0, TAB:RB->asize |
3087 | | | ||
3088 | | // Convert number key to integer, check for integerness and range. | ||
3089 | | cvt.w.d f2, f0 | ||
3090 | | lw TMP0, TAB:RB->asize | ||
3091 | | mfc1 TMP2, f2 | ||
3092 | | cvt.d.w f4, f2 | ||
3093 | | lw TMP1, TAB:RB->array | 3938 | | lw TMP1, TAB:RB->array |
3094 | | c.eq.d f0, f4 | 3939 | | sltu AT, RC, TMP0 |
3095 | | sltu AT, TMP2, TMP0 | 3940 | | sll TMP2, RC, 3 |
3096 | | movf AT, r0 | ||
3097 | | sll TMP2, TMP2, 3 | ||
3098 | | beqz AT, ->vmeta_tgetv // Integer key and in array part? | 3941 | | beqz AT, ->vmeta_tgetv // Integer key and in array part? |
3099 | |. addu TMP2, TMP1, TMP2 | 3942 | |. addu TMP2, TMP1, TMP2 |
3100 | | lw TMP0, HI(TMP2) | 3943 | | lw SFRETHI, HI(TMP2) |
3101 | | beq TMP0, TISNIL, >2 | 3944 | | beq SFRETHI, TISNIL, >2 |
3102 | |. ldc1 f0, 0(TMP2) | 3945 | |. lw SFRETLO, LO(TMP2) |
3103 | |1: | 3946 | |1: |
3104 | | ins_next1 | 3947 | | ins_next1 |
3105 | | sdc1 f0, 0(RA) | 3948 | | sw SFRETHI, HI(RA) |
3949 | | sw SFRETLO, LO(RA) | ||
3106 | | ins_next2 | 3950 | | ins_next2 |
3107 | | | 3951 | | |
3108 | |2: // Check for __index if table value is nil. | 3952 | |2: // Check for __index if table value is nil. |
@@ -3117,8 +3961,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3117 | |. nop | 3961 | |. nop |
3118 | | | 3962 | | |
3119 | |5: | 3963 | |5: |
3964 | | li AT, LJ_TSTR | ||
3120 | | bne TMP2, AT, ->vmeta_tgetv | 3965 | | bne TMP2, AT, ->vmeta_tgetv |
3121 | |. lw STR:RC, LO(CARG3) | 3966 | |. nop |
3122 | | b ->BC_TGETS_Z // String key? | 3967 | | b ->BC_TGETS_Z // String key? |
3123 | |. nop | 3968 | |. nop |
3124 | break; | 3969 | break; |
@@ -3150,18 +3995,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3150 | | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) | 3995 | | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) |
3151 | | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) | 3996 | | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) |
3152 | | lw NODE:TMP1, NODE:TMP2->next | 3997 | | lw NODE:TMP1, NODE:TMP2->next |
3153 | | lw CARG2, offsetof(Node, val)+HI(NODE:TMP2) | 3998 | | lw SFRETHI, offsetof(Node, val)+HI(NODE:TMP2) |
3154 | | addiu CARG1, CARG1, -LJ_TSTR | 3999 | | addiu CARG1, CARG1, -LJ_TSTR |
3155 | | xor TMP0, TMP0, STR:RC | 4000 | | xor TMP0, TMP0, STR:RC |
3156 | | or AT, CARG1, TMP0 | 4001 | | or AT, CARG1, TMP0 |
3157 | | bnez AT, >4 | 4002 | | bnez AT, >4 |
3158 | |. lw TAB:TMP3, TAB:RB->metatable | 4003 | |. lw TAB:TMP3, TAB:RB->metatable |
3159 | | beq CARG2, TISNIL, >5 // Key found, but nil value? | 4004 | | beq SFRETHI, TISNIL, >5 // Key found, but nil value? |
3160 | |. lw CARG1, offsetof(Node, val)+LO(NODE:TMP2) | 4005 | |. lw SFRETLO, offsetof(Node, val)+LO(NODE:TMP2) |
3161 | |3: | 4006 | |3: |
3162 | | ins_next1 | 4007 | | ins_next1 |
3163 | | sw CARG2, HI(RA) | 4008 | | sw SFRETHI, HI(RA) |
3164 | | sw CARG1, LO(RA) | 4009 | | sw SFRETLO, LO(RA) |
3165 | | ins_next2 | 4010 | | ins_next2 |
3166 | | | 4011 | | |
3167 | |4: // Follow hash chain. | 4012 | |4: // Follow hash chain. |
@@ -3171,7 +4016,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3171 | | | 4016 | | |
3172 | |5: // Check for __index if table value is nil. | 4017 | |5: // Check for __index if table value is nil. |
3173 | | beqz TAB:TMP3, <3 // No metatable: done. | 4018 | | beqz TAB:TMP3, <3 // No metatable: done. |
3174 | |. li CARG2, LJ_TNIL | 4019 | |. li SFRETHI, LJ_TNIL |
3175 | | lbu TMP0, TAB:TMP3->nomm | 4020 | | lbu TMP0, TAB:TMP3->nomm |
3176 | | andi TMP0, TMP0, 1<<MM_index | 4021 | | andi TMP0, TMP0, 1<<MM_index |
3177 | | bnez TMP0, <3 // 'no __index' flag set: done. | 4022 | | bnez TMP0, <3 // 'no __index' flag set: done. |
@@ -3196,12 +4041,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3196 | | sltu AT, TMP0, TMP1 | 4041 | | sltu AT, TMP0, TMP1 |
3197 | | beqz AT, ->vmeta_tgetb | 4042 | | beqz AT, ->vmeta_tgetb |
3198 | |. addu RC, TMP2, RC | 4043 | |. addu RC, TMP2, RC |
3199 | | lw TMP1, HI(RC) | 4044 | | lw SFRETHI, HI(RC) |
3200 | | beq TMP1, TISNIL, >5 | 4045 | | beq SFRETHI, TISNIL, >5 |
3201 | |. ldc1 f0, 0(RC) | 4046 | |. lw SFRETLO, LO(RC) |
3202 | |1: | 4047 | |1: |
3203 | | ins_next1 | 4048 | | ins_next1 |
3204 | | sdc1 f0, 0(RA) | 4049 | | sw SFRETHI, HI(RA) |
4050 | | sw SFRETLO, LO(RA) | ||
3205 | | ins_next2 | 4051 | | ins_next2 |
3206 | | | 4052 | | |
3207 | |5: // Check for __index if table value is nil. | 4053 | |5: // Check for __index if table value is nil. |
@@ -3212,9 +4058,33 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3212 | | andi TMP1, TMP1, 1<<MM_index | 4058 | | andi TMP1, TMP1, 1<<MM_index |
3213 | | bnez TMP1, <1 // 'no __index' flag set: done. | 4059 | | bnez TMP1, <1 // 'no __index' flag set: done. |
3214 | |. nop | 4060 | |. nop |
3215 | | b ->vmeta_tgetb // Caveat: preserve TMP0! | 4061 | | b ->vmeta_tgetb // Caveat: preserve TMP0 and CARG2! |
3216 | |. nop | 4062 | |. nop |
3217 | break; | 4063 | break; |
4064 | case BC_TGETR: | ||
4065 | | // RA = dst*8, RB = table*8, RC = key*8 | ||
4066 | | decode_RB8a RB, INS | ||
4067 | | decode_RB8b RB | ||
4068 | | decode_RDtoRC8 RC, RD | ||
4069 | | addu RB, BASE, RB | ||
4070 | | addu RC, BASE, RC | ||
4071 | | lw TAB:CARG1, LO(RB) | ||
4072 | | lw CARG2, LO(RC) | ||
4073 | | addu RA, BASE, RA | ||
4074 | | lw TMP0, TAB:CARG1->asize | ||
4075 | | lw TMP1, TAB:CARG1->array | ||
4076 | | sltu AT, CARG2, TMP0 | ||
4077 | | sll TMP2, CARG2, 3 | ||
4078 | | beqz AT, ->vmeta_tgetr // In array part? | ||
4079 | |. addu CRET1, TMP1, TMP2 | ||
4080 | | lw SFARG2HI, HI(CRET1) | ||
4081 | | lw SFARG2LO, LO(CRET1) | ||
4082 | |->BC_TGETR_Z: | ||
4083 | | ins_next1 | ||
4084 | | sw SFARG2HI, HI(RA) | ||
4085 | | sw SFARG2LO, LO(RA) | ||
4086 | | ins_next2 | ||
4087 | break; | ||
3218 | 4088 | ||
3219 | case BC_TSETV: | 4089 | case BC_TSETV: |
3220 | | // RA = src*8, RB = table*8, RC = key*8 | 4090 | | // RA = src*8, RB = table*8, RC = key*8 |
@@ -3227,33 +4097,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3227 | | lw TMP2, HI(CARG3) | 4097 | | lw TMP2, HI(CARG3) |
3228 | | lw TAB:RB, LO(CARG2) | 4098 | | lw TAB:RB, LO(CARG2) |
3229 | | li AT, LJ_TTAB | 4099 | | li AT, LJ_TTAB |
3230 | | ldc1 f0, 0(CARG3) | ||
3231 | | bne TMP1, AT, ->vmeta_tsetv | 4100 | | bne TMP1, AT, ->vmeta_tsetv |
3232 | |. addu RA, BASE, RA | 4101 | |. addu RA, BASE, RA |
3233 | | sltiu AT, TMP2, LJ_TISNUM | 4102 | | bne TMP2, TISNUM, >5 |
3234 | | beqz AT, >5 | 4103 | |. lw RC, LO(CARG3) |
3235 | |. li AT, LJ_TSTR | 4104 | | lw TMP0, TAB:RB->asize |
3236 | | | ||
3237 | | // Convert number key to integer, check for integerness and range. | ||
3238 | | cvt.w.d f2, f0 | ||
3239 | | lw TMP0, TAB:RB->asize | ||
3240 | | mfc1 TMP2, f2 | ||
3241 | | cvt.d.w f4, f2 | ||
3242 | | lw TMP1, TAB:RB->array | 4105 | | lw TMP1, TAB:RB->array |
3243 | | c.eq.d f0, f4 | 4106 | | sltu AT, RC, TMP0 |
3244 | | sltu AT, TMP2, TMP0 | 4107 | | sll TMP2, RC, 3 |
3245 | | movf AT, r0 | ||
3246 | | sll TMP2, TMP2, 3 | ||
3247 | | beqz AT, ->vmeta_tsetv // Integer key and in array part? | 4108 | | beqz AT, ->vmeta_tsetv // Integer key and in array part? |
3248 | |. addu TMP1, TMP1, TMP2 | 4109 | |. addu TMP1, TMP1, TMP2 |
3249 | | lbu TMP3, TAB:RB->marked | ||
3250 | | lw TMP0, HI(TMP1) | 4110 | | lw TMP0, HI(TMP1) |
4111 | | lbu TMP3, TAB:RB->marked | ||
4112 | | lw SFRETHI, HI(RA) | ||
3251 | | beq TMP0, TISNIL, >3 | 4113 | | beq TMP0, TISNIL, >3 |
3252 | |. ldc1 f0, 0(RA) | 4114 | |. lw SFRETLO, LO(RA) |
3253 | |1: | 4115 | |1: |
3254 | | andi AT, TMP3, LJ_GC_BLACK // isblack(table) | 4116 | | andi AT, TMP3, LJ_GC_BLACK // isblack(table) |
3255 | | bnez AT, >7 | 4117 | | sw SFRETHI, HI(TMP1) |
3256 | |. sdc1 f0, 0(TMP1) | 4118 | | bnez AT, >7 |
4119 | |. sw SFRETLO, LO(TMP1) | ||
3257 | |2: | 4120 | |2: |
3258 | | ins_next | 4121 | | ins_next |
3259 | | | 4122 | | |
@@ -3269,8 +4132,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3269 | |. nop | 4132 | |. nop |
3270 | | | 4133 | | |
3271 | |5: | 4134 | |5: |
4135 | | li AT, LJ_TSTR | ||
3272 | | bne TMP2, AT, ->vmeta_tsetv | 4136 | | bne TMP2, AT, ->vmeta_tsetv |
3273 | |. lw STR:RC, LO(CARG3) | 4137 | |. nop |
3274 | | b ->BC_TSETS_Z // String key? | 4138 | | b ->BC_TSETS_Z // String key? |
3275 | |. nop | 4139 | |. nop |
3276 | | | 4140 | | |
@@ -3302,7 +4166,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3302 | | sll TMP1, TMP1, 3 | 4166 | | sll TMP1, TMP1, 3 |
3303 | | subu TMP1, TMP0, TMP1 | 4167 | | subu TMP1, TMP0, TMP1 |
3304 | | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) | 4168 | | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) |
4169 | |.if FPU | ||
3305 | | ldc1 f20, 0(RA) | 4170 | | ldc1 f20, 0(RA) |
4171 | |.else | ||
4172 | | lw SFRETHI, HI(RA) | ||
4173 | | lw SFRETLO, LO(RA) | ||
4174 | |.endif | ||
3306 | |1: | 4175 | |1: |
3307 | | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) | 4176 | | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) |
3308 | | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) | 4177 | | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) |
@@ -3316,8 +4185,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3316 | |. lw TAB:TMP0, TAB:RB->metatable | 4185 | |. lw TAB:TMP0, TAB:RB->metatable |
3317 | |2: | 4186 | |2: |
3318 | | andi AT, TMP3, LJ_GC_BLACK // isblack(table) | 4187 | | andi AT, TMP3, LJ_GC_BLACK // isblack(table) |
4188 | |.if FPU | ||
3319 | | bnez AT, >7 | 4189 | | bnez AT, >7 |
3320 | |. sdc1 f20, NODE:TMP2->val | 4190 | |. sdc1 f20, NODE:TMP2->val |
4191 | |.else | ||
4192 | | sw SFRETHI, NODE:TMP2->val.u32.hi | ||
4193 | | bnez AT, >7 | ||
4194 | |. sw SFRETLO, NODE:TMP2->val.u32.lo | ||
4195 | |.endif | ||
3321 | |3: | 4196 | |3: |
3322 | | ins_next | 4197 | | ins_next |
3323 | | | 4198 | | |
@@ -3355,8 +4230,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3355 | |. move CARG1, L | 4230 | |. move CARG1, L |
3356 | | // Returns TValue *. | 4231 | | // Returns TValue *. |
3357 | | lw BASE, L->base | 4232 | | lw BASE, L->base |
4233 | |.if FPU | ||
3358 | | b <3 // No 2nd write barrier needed. | 4234 | | b <3 // No 2nd write barrier needed. |
3359 | |. sdc1 f20, 0(CRET1) | 4235 | |. sdc1 f20, 0(CRET1) |
4236 | |.else | ||
4237 | | lw SFARG1HI, HI(RA) | ||
4238 | | lw SFARG1LO, LO(RA) | ||
4239 | | sw SFARG1HI, HI(CRET1) | ||
4240 | | b <3 // No 2nd write barrier needed. | ||
4241 | |. sw SFARG1LO, LO(CRET1) | ||
4242 | |.endif | ||
3360 | | | 4243 | | |
3361 | |7: // Possible table write barrier for the value. Skip valiswhite check. | 4244 | |7: // Possible table write barrier for the value. Skip valiswhite check. |
3362 | | barrierback TAB:RB, TMP3, TMP0, <3 | 4245 | | barrierback TAB:RB, TMP3, TMP0, <3 |
@@ -3381,11 +4264,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3381 | | lw TMP1, HI(RC) | 4264 | | lw TMP1, HI(RC) |
3382 | | lbu TMP3, TAB:RB->marked | 4265 | | lbu TMP3, TAB:RB->marked |
3383 | | beq TMP1, TISNIL, >5 | 4266 | | beq TMP1, TISNIL, >5 |
3384 | |. ldc1 f0, 0(RA) | ||
3385 | |1: | 4267 | |1: |
4268 | |. lw SFRETHI, HI(RA) | ||
4269 | | lw SFRETLO, LO(RA) | ||
3386 | | andi AT, TMP3, LJ_GC_BLACK // isblack(table) | 4270 | | andi AT, TMP3, LJ_GC_BLACK // isblack(table) |
4271 | | sw SFRETHI, HI(RC) | ||
3387 | | bnez AT, >7 | 4272 | | bnez AT, >7 |
3388 | |. sdc1 f0, 0(RC) | 4273 | |. sw SFRETLO, LO(RC) |
3389 | |2: | 4274 | |2: |
3390 | | ins_next | 4275 | | ins_next |
3391 | | | 4276 | | |
@@ -3397,12 +4282,43 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3397 | | andi TMP1, TMP1, 1<<MM_newindex | 4282 | | andi TMP1, TMP1, 1<<MM_newindex |
3398 | | bnez TMP1, <1 // 'no __newindex' flag set: done. | 4283 | | bnez TMP1, <1 // 'no __newindex' flag set: done. |
3399 | |. nop | 4284 | |. nop |
3400 | | b ->vmeta_tsetb // Caveat: preserve TMP0! | 4285 | | b ->vmeta_tsetb // Caveat: preserve TMP0 and CARG2! |
3401 | |. nop | 4286 | |. nop |
3402 | | | 4287 | | |
3403 | |7: // Possible table write barrier for the value. Skip valiswhite check. | 4288 | |7: // Possible table write barrier for the value. Skip valiswhite check. |
3404 | | barrierback TAB:RB, TMP3, TMP0, <2 | 4289 | | barrierback TAB:RB, TMP3, TMP0, <2 |
3405 | break; | 4290 | break; |
4291 | case BC_TSETR: | ||
4292 | | // RA = dst*8, RB = table*8, RC = key*8 | ||
4293 | | decode_RB8a RB, INS | ||
4294 | | decode_RB8b RB | ||
4295 | | decode_RDtoRC8 RC, RD | ||
4296 | | addu CARG1, BASE, RB | ||
4297 | | addu CARG3, BASE, RC | ||
4298 | | lw TAB:CARG2, LO(CARG1) | ||
4299 | | lw CARG3, LO(CARG3) | ||
4300 | | lbu TMP3, TAB:CARG2->marked | ||
4301 | | lw TMP0, TAB:CARG2->asize | ||
4302 | | lw TMP1, TAB:CARG2->array | ||
4303 | | andi AT, TMP3, LJ_GC_BLACK // isblack(table) | ||
4304 | | bnez AT, >7 | ||
4305 | |. addu RA, BASE, RA | ||
4306 | |2: | ||
4307 | | sltu AT, CARG3, TMP0 | ||
4308 | | sll TMP2, CARG3, 3 | ||
4309 | | beqz AT, ->vmeta_tsetr // In array part? | ||
4310 | |. addu CRET1, TMP1, TMP2 | ||
4311 | |->BC_TSETR_Z: | ||
4312 | | lw SFARG1HI, HI(RA) | ||
4313 | | lw SFARG1LO, LO(RA) | ||
4314 | | ins_next1 | ||
4315 | | sw SFARG1HI, HI(CRET1) | ||
4316 | | sw SFARG1LO, LO(CRET1) | ||
4317 | | ins_next2 | ||
4318 | | | ||
4319 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
4320 | | barrierback TAB:CARG2, TMP3, CRET1, <2 | ||
4321 | break; | ||
3406 | 4322 | ||
3407 | case BC_TSETM: | 4323 | case BC_TSETM: |
3408 | | // RA = base*8 (table at base-1), RD = num_const*8 (start index) | 4324 | | // RA = base*8 (table at base-1), RD = num_const*8 (start index) |
@@ -3425,10 +4341,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3425 | | addu TMP1, TMP1, CARG1 | 4341 | | addu TMP1, TMP1, CARG1 |
3426 | | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) | 4342 | | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) |
3427 | |3: // Copy result slots to table. | 4343 | |3: // Copy result slots to table. |
3428 | | ldc1 f0, 0(RA) | 4344 | | lw SFRETHI, HI(RA) |
4345 | | lw SFRETLO, LO(RA) | ||
3429 | | addiu RA, RA, 8 | 4346 | | addiu RA, RA, 8 |
3430 | | sltu AT, RA, TMP2 | 4347 | | sltu AT, RA, TMP2 |
3431 | | sdc1 f0, 0(TMP1) | 4348 | | sw SFRETHI, HI(TMP1) |
4349 | | sw SFRETLO, LO(TMP1) | ||
3432 | | bnez AT, <3 | 4350 | | bnez AT, <3 |
3433 | |. addiu TMP1, TMP1, 8 | 4351 | |. addiu TMP1, TMP1, 8 |
3434 | | bnez TMP0, >7 | 4352 | | bnez TMP0, >7 |
@@ -3503,10 +4421,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3503 | | beqz NARGS8:RC, >3 | 4421 | | beqz NARGS8:RC, >3 |
3504 | |. move TMP3, NARGS8:RC | 4422 | |. move TMP3, NARGS8:RC |
3505 | |2: | 4423 | |2: |
3506 | | ldc1 f0, 0(RA) | 4424 | | lw SFRETHI, HI(RA) |
4425 | | lw SFRETLO, LO(RA) | ||
3507 | | addiu RA, RA, 8 | 4426 | | addiu RA, RA, 8 |
3508 | | addiu TMP3, TMP3, -8 | 4427 | | addiu TMP3, TMP3, -8 |
3509 | | sdc1 f0, 0(TMP2) | 4428 | | sw SFRETHI, HI(TMP2) |
4429 | | sw SFRETLO, LO(TMP2) | ||
3510 | | bnez TMP3, <2 | 4430 | | bnez TMP3, <2 |
3511 | |. addiu TMP2, TMP2, 8 | 4431 | |. addiu TMP2, TMP2, 8 |
3512 | |3: | 4432 | |3: |
@@ -3543,12 +4463,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3543 | | li AT, LJ_TFUNC | 4463 | | li AT, LJ_TFUNC |
3544 | | lw TMP1, -24+HI(BASE) | 4464 | | lw TMP1, -24+HI(BASE) |
3545 | | lw LFUNC:RB, -24+LO(BASE) | 4465 | | lw LFUNC:RB, -24+LO(BASE) |
3546 | | ldc1 f2, -8(BASE) | 4466 | | lw SFARG1HI, -16+HI(BASE) |
3547 | | ldc1 f0, -16(BASE) | 4467 | | lw SFARG1LO, -16+LO(BASE) |
4468 | | lw SFARG2HI, -8+HI(BASE) | ||
4469 | | lw SFARG2LO, -8+LO(BASE) | ||
3548 | | sw TMP1, HI(BASE) // Copy callable. | 4470 | | sw TMP1, HI(BASE) // Copy callable. |
3549 | | sw LFUNC:RB, LO(BASE) | 4471 | | sw LFUNC:RB, LO(BASE) |
3550 | | sdc1 f2, 16(BASE) // Copy control var. | 4472 | | sw SFARG1HI, 8+HI(BASE) // Copy state. |
3551 | | sdc1 f0, 8(BASE) // Copy state. | 4473 | | sw SFARG1LO, 8+LO(BASE) |
4474 | | sw SFARG2HI, 16+HI(BASE) // Copy control var. | ||
4475 | | sw SFARG2LO, 16+LO(BASE) | ||
3552 | | addiu BASE, BASE, 8 | 4476 | | addiu BASE, BASE, 8 |
3553 | | bne TMP1, AT, ->vmeta_call | 4477 | | bne TMP1, AT, ->vmeta_call |
3554 | |. li NARGS8:RC, 16 // Iterators get 2 arguments. | 4478 | |. li NARGS8:RC, 16 // Iterators get 2 arguments. |
@@ -3571,20 +4495,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3571 | | beqz AT, >5 // Index points after array part? | 4495 | | beqz AT, >5 // Index points after array part? |
3572 | |. sll TMP3, RC, 3 | 4496 | |. sll TMP3, RC, 3 |
3573 | | addu TMP3, TMP1, TMP3 | 4497 | | addu TMP3, TMP1, TMP3 |
3574 | | lw TMP2, HI(TMP3) | 4498 | | lw SFARG1HI, HI(TMP3) |
3575 | | ldc1 f0, 0(TMP3) | 4499 | | lw SFARG1LO, LO(TMP3) |
3576 | | mtc1 RC, f2 | ||
3577 | | lhu RD, -4+OFS_RD(PC) | 4500 | | lhu RD, -4+OFS_RD(PC) |
3578 | | beq TMP2, TISNIL, <1 // Skip holes in array part. | 4501 | | sw TISNUM, HI(RA) |
4502 | | sw RC, LO(RA) | ||
4503 | | beq SFARG1HI, TISNIL, <1 // Skip holes in array part. | ||
3579 | |. addiu RC, RC, 1 | 4504 | |. addiu RC, RC, 1 |
3580 | | cvt.d.w f2, f2 | 4505 | | sw SFARG1HI, 8+HI(RA) |
4506 | | sw SFARG1LO, 8+LO(RA) | ||
3581 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | 4507 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) |
3582 | | sdc1 f0, 8(RA) | ||
3583 | | decode_RD4b RD | 4508 | | decode_RD4b RD |
3584 | | addu RD, RD, TMP3 | 4509 | | addu RD, RD, TMP3 |
3585 | | sw RC, -8+LO(RA) // Update control var. | 4510 | | sw RC, -8+LO(RA) // Update control var. |
3586 | | addu PC, PC, RD | 4511 | | addu PC, PC, RD |
3587 | | sdc1 f2, 0(RA) | ||
3588 | |3: | 4512 | |3: |
3589 | | ins_next | 4513 | | ins_next |
3590 | | | 4514 | | |
@@ -3599,18 +4523,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3599 | | sll RB, RC, 3 | 4523 | | sll RB, RC, 3 |
3600 | | subu TMP3, TMP3, RB | 4524 | | subu TMP3, TMP3, RB |
3601 | | addu NODE:TMP3, TMP3, TMP2 | 4525 | | addu NODE:TMP3, TMP3, TMP2 |
3602 | | lw RB, HI(NODE:TMP3) | 4526 | | lw SFARG1HI, NODE:TMP3->val.u32.hi |
3603 | | ldc1 f0, 0(NODE:TMP3) | 4527 | | lw SFARG1LO, NODE:TMP3->val.u32.lo |
3604 | | lhu RD, -4+OFS_RD(PC) | 4528 | | lhu RD, -4+OFS_RD(PC) |
3605 | | beq RB, TISNIL, <6 // Skip holes in hash part. | 4529 | | beq SFARG1HI, TISNIL, <6 // Skip holes in hash part. |
3606 | |. addiu RC, RC, 1 | 4530 | |. addiu RC, RC, 1 |
3607 | | ldc1 f2, NODE:TMP3->key | 4531 | | lw SFARG2HI, NODE:TMP3->key.u32.hi |
4532 | | lw SFARG2LO, NODE:TMP3->key.u32.lo | ||
3608 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | 4533 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) |
3609 | | sdc1 f0, 8(RA) | 4534 | | sw SFARG1HI, 8+HI(RA) |
4535 | | sw SFARG1LO, 8+LO(RA) | ||
3610 | | addu RC, RC, TMP0 | 4536 | | addu RC, RC, TMP0 |
3611 | | decode_RD4b RD | 4537 | | decode_RD4b RD |
3612 | | addu RD, RD, TMP3 | 4538 | | addu RD, RD, TMP3 |
3613 | | sdc1 f2, 0(RA) | 4539 | | sw SFARG2HI, HI(RA) |
4540 | | sw SFARG2LO, LO(RA) | ||
3614 | | addu PC, PC, RD | 4541 | | addu PC, PC, RD |
3615 | | b <3 | 4542 | | b <3 |
3616 | |. sw RC, -8+LO(RA) // Update control var. | 4543 | |. sw RC, -8+LO(RA) // Update control var. |
@@ -3690,9 +4617,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3690 | | bnez AT, >7 | 4617 | | bnez AT, >7 |
3691 | |. addiu MULTRES, TMP1, 8 | 4618 | |. addiu MULTRES, TMP1, 8 |
3692 | |6: | 4619 | |6: |
3693 | | ldc1 f0, 0(RC) | 4620 | | lw SFRETHI, HI(RC) |
4621 | | lw SFRETLO, LO(RC) | ||
3694 | | addiu RC, RC, 8 | 4622 | | addiu RC, RC, 8 |
3695 | | sdc1 f0, 0(RA) | 4623 | | sw SFRETHI, HI(RA) |
4624 | | sw SFRETLO, LO(RA) | ||
3696 | | sltu AT, RC, TMP3 | 4625 | | sltu AT, RC, TMP3 |
3697 | | bnez AT, <6 // More vararg slots? | 4626 | | bnez AT, <6 // More vararg slots? |
3698 | |. addiu RA, RA, 8 | 4627 | |. addiu RA, RA, 8 |
@@ -3748,10 +4677,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3748 | | beqz RC, >3 | 4677 | | beqz RC, >3 |
3749 | |. subu BASE, TMP2, TMP0 | 4678 | |. subu BASE, TMP2, TMP0 |
3750 | |2: | 4679 | |2: |
3751 | | ldc1 f0, 0(RA) | 4680 | | lw SFRETHI, HI(RA) |
4681 | | lw SFRETLO, LO(RA) | ||
3752 | | addiu RA, RA, 8 | 4682 | | addiu RA, RA, 8 |
3753 | | addiu RC, RC, -8 | 4683 | | addiu RC, RC, -8 |
3754 | | sdc1 f0, 0(TMP2) | 4684 | | sw SFRETHI, HI(TMP2) |
4685 | | sw SFRETLO, LO(TMP2) | ||
3755 | | bnez RC, <2 | 4686 | | bnez RC, <2 |
3756 | |. addiu TMP2, TMP2, 8 | 4687 | |. addiu TMP2, TMP2, 8 |
3757 | |3: | 4688 | |3: |
@@ -3792,14 +4723,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3792 | | lw INS, -4(PC) | 4723 | | lw INS, -4(PC) |
3793 | | addiu TMP2, BASE, -8 | 4724 | | addiu TMP2, BASE, -8 |
3794 | if (op == BC_RET1) { | 4725 | if (op == BC_RET1) { |
3795 | | ldc1 f0, 0(RA) | 4726 | | lw SFRETHI, HI(RA) |
4727 | | lw SFRETLO, LO(RA) | ||
3796 | } | 4728 | } |
3797 | | decode_RB8a RB, INS | 4729 | | decode_RB8a RB, INS |
3798 | | decode_RA8a RA, INS | 4730 | | decode_RA8a RA, INS |
3799 | | decode_RB8b RB | 4731 | | decode_RB8b RB |
3800 | | decode_RA8b RA | 4732 | | decode_RA8b RA |
3801 | if (op == BC_RET1) { | 4733 | if (op == BC_RET1) { |
3802 | | sdc1 f0, 0(TMP2) | 4734 | | sw SFRETHI, HI(TMP2) |
4735 | | sw SFRETLO, LO(TMP2) | ||
3803 | } | 4736 | } |
3804 | | subu BASE, TMP2, RA | 4737 | | subu BASE, TMP2, RA |
3805 | |5: | 4738 | |5: |
@@ -3841,69 +4774,147 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3841 | | // RA = base*8, RD = target (after end of loop or start of loop) | 4774 | | // RA = base*8, RD = target (after end of loop or start of loop) |
3842 | vk = (op == BC_IFORL || op == BC_JFORL); | 4775 | vk = (op == BC_IFORL || op == BC_JFORL); |
3843 | | addu RA, BASE, RA | 4776 | | addu RA, BASE, RA |
3844 | if (vk) { | 4777 | | lw SFARG1HI, FORL_IDX*8+HI(RA) |
3845 | | ldc1 f0, FORL_IDX*8(RA) | 4778 | | lw SFARG1LO, FORL_IDX*8+LO(RA) |
3846 | | ldc1 f4, FORL_STEP*8(RA) | ||
3847 | | ldc1 f2, FORL_STOP*8(RA) | ||
3848 | | lw TMP3, FORL_STEP*8+HI(RA) | ||
3849 | | add.d f0, f0, f4 | ||
3850 | | sdc1 f0, FORL_IDX*8(RA) | ||
3851 | } else { | ||
3852 | | lw TMP1, FORL_IDX*8+HI(RA) | ||
3853 | | lw TMP3, FORL_STEP*8+HI(RA) | ||
3854 | | lw TMP2, FORL_STOP*8+HI(RA) | ||
3855 | | sltiu TMP1, TMP1, LJ_TISNUM | ||
3856 | | sltiu TMP0, TMP3, LJ_TISNUM | ||
3857 | | sltiu TMP2, TMP2, LJ_TISNUM | ||
3858 | | and TMP1, TMP1, TMP0 | ||
3859 | | and TMP1, TMP1, TMP2 | ||
3860 | | ldc1 f0, FORL_IDX*8(RA) | ||
3861 | | beqz TMP1, ->vmeta_for | ||
3862 | |. ldc1 f2, FORL_STOP*8(RA) | ||
3863 | } | ||
3864 | if (op != BC_JFORL) { | 4779 | if (op != BC_JFORL) { |
3865 | | srl RD, RD, 1 | 4780 | | srl RD, RD, 1 |
3866 | | lui TMP0, (-(BCBIAS_J*4 >> 16) & 65535) | 4781 | | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535) |
4782 | | addu TMP2, RD, TMP2 | ||
4783 | } | ||
4784 | if (!vk) { | ||
4785 | | lw SFARG2HI, FORL_STOP*8+HI(RA) | ||
4786 | | lw SFARG2LO, FORL_STOP*8+LO(RA) | ||
4787 | | bne SFARG1HI, TISNUM, >5 | ||
4788 | |. lw SFRETHI, FORL_STEP*8+HI(RA) | ||
4789 | | xor AT, SFARG2HI, TISNUM | ||
4790 | | lw SFRETLO, FORL_STEP*8+LO(RA) | ||
4791 | | xor TMP0, SFRETHI, TISNUM | ||
4792 | | or AT, AT, TMP0 | ||
4793 | | bnez AT, ->vmeta_for | ||
4794 | |. slt AT, SFRETLO, r0 | ||
4795 | | slt CRET1, SFARG2LO, SFARG1LO | ||
4796 | | slt TMP1, SFARG1LO, SFARG2LO | ||
4797 | | movn CRET1, TMP1, AT | ||
4798 | } else { | ||
4799 | | bne SFARG1HI, TISNUM, >5 | ||
4800 | |. lw SFARG2LO, FORL_STEP*8+LO(RA) | ||
4801 | | lw SFRETLO, FORL_STOP*8+LO(RA) | ||
4802 | | move TMP3, SFARG1LO | ||
4803 | | addu SFARG1LO, SFARG1LO, SFARG2LO | ||
4804 | | xor TMP0, SFARG1LO, TMP3 | ||
4805 | | xor TMP1, SFARG1LO, SFARG2LO | ||
4806 | | and TMP0, TMP0, TMP1 | ||
4807 | | slt TMP1, SFARG1LO, SFRETLO | ||
4808 | | slt CRET1, SFRETLO, SFARG1LO | ||
4809 | | slt AT, SFARG2LO, r0 | ||
4810 | | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow. | ||
4811 | | movn CRET1, TMP1, AT | ||
4812 | | or CRET1, CRET1, TMP0 | ||
4813 | } | ||
4814 | |1: | ||
4815 | if (op == BC_FORI) { | ||
4816 | | movz TMP2, r0, CRET1 | ||
4817 | | addu PC, PC, TMP2 | ||
4818 | } else if (op == BC_JFORI) { | ||
4819 | | addu PC, PC, TMP2 | ||
4820 | | lhu RD, -4+OFS_RD(PC) | ||
4821 | } else if (op == BC_IFORL) { | ||
4822 | | movn TMP2, r0, CRET1 | ||
4823 | | addu PC, PC, TMP2 | ||
3867 | } | 4824 | } |
3868 | | c.le.d 0, f0, f2 | 4825 | if (vk) { |
3869 | | c.le.d 1, f2, f0 | 4826 | | sw SFARG1HI, FORL_IDX*8+HI(RA) |
3870 | | sdc1 f0, FORL_EXT*8(RA) | 4827 | | sw SFARG1LO, FORL_IDX*8+LO(RA) |
4828 | } | ||
4829 | | ins_next1 | ||
4830 | | sw SFARG1HI, FORL_EXT*8+HI(RA) | ||
4831 | | sw SFARG1LO, FORL_EXT*8+LO(RA) | ||
4832 | |2: | ||
3871 | if (op == BC_JFORI) { | 4833 | if (op == BC_JFORI) { |
3872 | | li TMP1, 1 | 4834 | | beqz CRET1, =>BC_JLOOP |
3873 | | li TMP2, 1 | ||
3874 | | addu TMP0, RD, TMP0 | ||
3875 | | slt TMP3, TMP3, r0 | ||
3876 | | movf TMP1, r0, 0 | ||
3877 | | addu PC, PC, TMP0 | ||
3878 | | movf TMP2, r0, 1 | ||
3879 | | lhu RD, -4+OFS_RD(PC) | ||
3880 | | movn TMP1, TMP2, TMP3 | ||
3881 | | bnez TMP1, =>BC_JLOOP | ||
3882 | |. decode_RD8b RD | 4835 | |. decode_RD8b RD |
3883 | } else if (op == BC_JFORL) { | 4836 | } else if (op == BC_JFORL) { |
3884 | | li TMP1, 1 | 4837 | | beqz CRET1, =>BC_JLOOP |
3885 | | li TMP2, 1 | 4838 | } |
3886 | | slt TMP3, TMP3, r0 | 4839 | | ins_next2 |
3887 | | movf TMP1, r0, 0 | 4840 | | |
3888 | | movf TMP2, r0, 1 | 4841 | |5: // FP loop. |
3889 | | movn TMP1, TMP2, TMP3 | 4842 | |.if FPU |
3890 | | bnez TMP1, =>BC_JLOOP | 4843 | if (!vk) { |
4844 | | ldc1 f0, FORL_IDX*8(RA) | ||
4845 | | ldc1 f2, FORL_STOP*8(RA) | ||
4846 | | sltiu TMP0, SFARG1HI, LJ_TISNUM | ||
4847 | | sltiu TMP1, SFARG2HI, LJ_TISNUM | ||
4848 | | sltiu AT, SFRETHI, LJ_TISNUM | ||
4849 | | and TMP0, TMP0, TMP1 | ||
4850 | | and AT, AT, TMP0 | ||
4851 | | beqz AT, ->vmeta_for | ||
4852 | |. slt TMP3, SFRETHI, r0 | ||
4853 | | c.ole.d 0, f0, f2 | ||
4854 | | c.ole.d 1, f2, f0 | ||
4855 | | li CRET1, 1 | ||
4856 | | movt CRET1, r0, 0 | ||
4857 | | movt AT, r0, 1 | ||
4858 | | b <1 | ||
4859 | |. movn CRET1, AT, TMP3 | ||
4860 | } else { | ||
4861 | | ldc1 f0, FORL_IDX*8(RA) | ||
4862 | | ldc1 f4, FORL_STEP*8(RA) | ||
4863 | | ldc1 f2, FORL_STOP*8(RA) | ||
4864 | | lw SFARG2HI, FORL_STEP*8+HI(RA) | ||
4865 | | add.d f0, f0, f4 | ||
4866 | | c.ole.d 0, f0, f2 | ||
4867 | | c.ole.d 1, f2, f0 | ||
4868 | | slt TMP3, SFARG2HI, r0 | ||
4869 | | li CRET1, 1 | ||
4870 | | li AT, 1 | ||
4871 | | movt CRET1, r0, 0 | ||
4872 | | movt AT, r0, 1 | ||
4873 | | movn CRET1, AT, TMP3 | ||
4874 | if (op == BC_IFORL) { | ||
4875 | | movn TMP2, r0, CRET1 | ||
4876 | | addu PC, PC, TMP2 | ||
4877 | } | ||
4878 | | sdc1 f0, FORL_IDX*8(RA) | ||
4879 | | ins_next1 | ||
4880 | | b <2 | ||
4881 | |. sdc1 f0, FORL_EXT*8(RA) | ||
4882 | } | ||
4883 | |.else | ||
4884 | if (!vk) { | ||
4885 | | sltiu TMP0, SFARG1HI, LJ_TISNUM | ||
4886 | | sltiu TMP1, SFARG2HI, LJ_TISNUM | ||
4887 | | sltiu AT, SFRETHI, LJ_TISNUM | ||
4888 | | and TMP0, TMP0, TMP1 | ||
4889 | | and AT, AT, TMP0 | ||
4890 | | beqz AT, ->vmeta_for | ||
4891 | |. nop | ||
4892 | | bal ->vm_sfcmpolex | ||
4893 | |. move TMP3, SFRETHI | ||
4894 | | b <1 | ||
3891 | |. nop | 4895 | |. nop |
3892 | } else { | 4896 | } else { |
3893 | | addu TMP1, RD, TMP0 | 4897 | | lw SFARG2HI, FORL_STEP*8+HI(RA) |
3894 | | slt TMP3, TMP3, r0 | 4898 | | load_got __adddf3 |
3895 | | move TMP2, TMP1 | 4899 | | call_extern |
3896 | if (op == BC_FORI) { | 4900 | |. sw TMP2, ARG5 |
3897 | | movt TMP1, r0, 0 | 4901 | | lw SFARG2HI, FORL_STOP*8+HI(RA) |
3898 | | movt TMP2, r0, 1 | 4902 | | lw SFARG2LO, FORL_STOP*8+LO(RA) |
4903 | | move SFARG1HI, SFRETHI | ||
4904 | | move SFARG1LO, SFRETLO | ||
4905 | | bal ->vm_sfcmpolex | ||
4906 | |. lw TMP3, FORL_STEP*8+HI(RA) | ||
4907 | if ( op == BC_JFORL ) { | ||
4908 | | lhu RD, -4+OFS_RD(PC) | ||
4909 | | lw TMP2, ARG5 | ||
4910 | | b <1 | ||
4911 | |. decode_RD8b RD | ||
3899 | } else { | 4912 | } else { |
3900 | | movf TMP1, r0, 0 | 4913 | | b <1 |
3901 | | movf TMP2, r0, 1 | 4914 | |. lw TMP2, ARG5 |
3902 | } | 4915 | } |
3903 | | movn TMP1, TMP2, TMP3 | ||
3904 | | addu PC, PC, TMP1 | ||
3905 | } | 4916 | } |
3906 | | ins_next | 4917 | |.endif |
3907 | break; | 4918 | break; |
3908 | 4919 | ||
3909 | case BC_ITERL: | 4920 | case BC_ITERL: |
@@ -3962,8 +4973,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3962 | | sw AT, DISPATCH_GL(vmstate)(DISPATCH) | 4973 | | sw AT, DISPATCH_GL(vmstate)(DISPATCH) |
3963 | | lw TRACE:TMP2, 0(TMP1) | 4974 | | lw TRACE:TMP2, 0(TMP1) |
3964 | | sw BASE, DISPATCH_GL(jit_base)(DISPATCH) | 4975 | | sw BASE, DISPATCH_GL(jit_base)(DISPATCH) |
3965 | | sw L, DISPATCH_GL(jit_L)(DISPATCH) | ||
3966 | | lw TMP2, TRACE:TMP2->mcode | 4976 | | lw TMP2, TRACE:TMP2->mcode |
4977 | | sw L, DISPATCH_GL(tmpbuf.L)(DISPATCH) | ||
3967 | | jr TMP2 | 4978 | | jr TMP2 |
3968 | |. addiu JGL, DISPATCH, GG_DISP2G+32768 | 4979 | |. addiu JGL, DISPATCH, GG_DISP2G+32768 |
3969 | |.endif | 4980 | |.endif |
@@ -4089,6 +5100,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4089 | | li_vmstate INTERP | 5100 | | li_vmstate INTERP |
4090 | | lw PC, FRAME_PC(BASE) // Fetch PC of caller. | 5101 | | lw PC, FRAME_PC(BASE) // Fetch PC of caller. |
4091 | | subu RA, TMP1, RD // RA = L->top - nresults*8 | 5102 | | subu RA, TMP1, RD // RA = L->top - nresults*8 |
5103 | | sw L, DISPATCH_GL(cur_L)(DISPATCH) | ||
4092 | | b ->vm_returnc | 5104 | | b ->vm_returnc |
4093 | |. st_vmstate | 5105 | |. st_vmstate |
4094 | break; | 5106 | break; |
@@ -4151,8 +5163,10 @@ static void emit_asm_debug(BuildCtx *ctx) | |||
4151 | fcofs, CFRAME_SIZE); | 5163 | fcofs, CFRAME_SIZE); |
4152 | for (i = 23; i >= 16; i--) | 5164 | for (i = 23; i >= 16; i--) |
4153 | fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i); | 5165 | fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i); |
5166 | #if !LJ_SOFTFP | ||
4154 | for (i = 30; i >= 20; i -= 2) | 5167 | for (i = 30; i >= 20; i -= 2) |
4155 | fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i); | 5168 | fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i); |
5169 | #endif | ||
4156 | fprintf(ctx->fp, | 5170 | fprintf(ctx->fp, |
4157 | "\t.align 2\n" | 5171 | "\t.align 2\n" |
4158 | ".LEFDE0:\n\n"); | 5172 | ".LEFDE0:\n\n"); |
@@ -4204,8 +5218,10 @@ static void emit_asm_debug(BuildCtx *ctx) | |||
4204 | fcofs, CFRAME_SIZE); | 5218 | fcofs, CFRAME_SIZE); |
4205 | for (i = 23; i >= 16; i--) | 5219 | for (i = 23; i >= 16; i--) |
4206 | fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i); | 5220 | fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i); |
5221 | #if !LJ_SOFTFP | ||
4207 | for (i = 30; i >= 20; i -= 2) | 5222 | for (i = 30; i >= 20; i -= 2) |
4208 | fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i); | 5223 | fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i); |
5224 | #endif | ||
4209 | fprintf(ctx->fp, | 5225 | fprintf(ctx->fp, |
4210 | "\t.align 2\n" | 5226 | "\t.align 2\n" |
4211 | ".LEFDE2:\n\n"); | 5227 | ".LEFDE2:\n\n"); |
diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc new file mode 100644 index 00000000..4ae19b7d --- /dev/null +++ b/src/vm_mips64.dasc | |||
@@ -0,0 +1,5424 @@ | |||
1 | |// Low-level VM code for MIPS64 CPUs. | ||
2 | |// Bytecode interpreter, fast functions and helper functions. | ||
3 | |// Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h | ||
4 | |// | ||
5 | |// Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. | ||
6 | |// Sponsored by Cisco Systems, Inc. | ||
7 | | | ||
8 | |.arch mips64 | ||
9 | |.section code_op, code_sub | ||
10 | | | ||
11 | |.actionlist build_actionlist | ||
12 | |.globals GLOB_ | ||
13 | |.globalnames globnames | ||
14 | |.externnames extnames | ||
15 | | | ||
16 | |// Note: The ragged indentation of the instructions is intentional. | ||
17 | |// The starting columns indicate data dependencies. | ||
18 | | | ||
19 | |//----------------------------------------------------------------------- | ||
20 | | | ||
21 | |// Fixed register assignments for the interpreter. | ||
22 | |// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra | ||
23 | | | ||
24 | |.macro .FPU, a, b | ||
25 | |.if FPU | ||
26 | | a, b | ||
27 | |.endif | ||
28 | |.endmacro | ||
29 | | | ||
30 | |// The following must be C callee-save (but BASE is often refetched). | ||
31 | |.define BASE, r16 // Base of current Lua stack frame. | ||
32 | |.define KBASE, r17 // Constants of current Lua function. | ||
33 | |.define PC, r18 // Next PC. | ||
34 | |.define DISPATCH, r19 // Opcode dispatch table. | ||
35 | |.define LREG, r20 // Register holding lua_State (also in SAVE_L). | ||
36 | |.define MULTRES, r21 // Size of multi-result: (nresults+1)*8. | ||
37 | | | ||
38 | |.define JGL, r30 // On-trace: global_State + 32768. | ||
39 | | | ||
40 | |// Constants for type-comparisons, stores and conversions. C callee-save. | ||
41 | |.define TISNIL, r30 | ||
42 | |.define TISNUM, r22 | ||
43 | |.if FPU | ||
44 | |.define TOBIT, f30 // 2^52 + 2^51. | ||
45 | |.endif | ||
46 | | | ||
47 | |// The following temporaries are not saved across C calls, except for RA. | ||
48 | |.define RA, r23 // Callee-save. | ||
49 | |.define RB, r8 | ||
50 | |.define RC, r9 | ||
51 | |.define RD, r10 | ||
52 | |.define INS, r11 | ||
53 | | | ||
54 | |.define AT, r1 // Assembler temporary. | ||
55 | |.define TMP0, r12 | ||
56 | |.define TMP1, r13 | ||
57 | |.define TMP2, r14 | ||
58 | |.define TMP3, r15 | ||
59 | | | ||
60 | |// MIPS n64 calling convention. | ||
61 | |.define CFUNCADDR, r25 | ||
62 | |.define CARG1, r4 | ||
63 | |.define CARG2, r5 | ||
64 | |.define CARG3, r6 | ||
65 | |.define CARG4, r7 | ||
66 | |.define CARG5, r8 | ||
67 | |.define CARG6, r9 | ||
68 | |.define CARG7, r10 | ||
69 | |.define CARG8, r11 | ||
70 | | | ||
71 | |.define CRET1, r2 | ||
72 | |.define CRET2, r3 | ||
73 | | | ||
74 | |.if FPU | ||
75 | |.define FARG1, f12 | ||
76 | |.define FARG2, f13 | ||
77 | |.define FARG3, f14 | ||
78 | |.define FARG4, f15 | ||
79 | |.define FARG5, f16 | ||
80 | |.define FARG6, f17 | ||
81 | |.define FARG7, f18 | ||
82 | |.define FARG8, f19 | ||
83 | | | ||
84 | |.define FRET1, f0 | ||
85 | |.define FRET2, f2 | ||
86 | | | ||
87 | |.define FTMP0, f20 | ||
88 | |.define FTMP1, f21 | ||
89 | |.define FTMP2, f22 | ||
90 | |.endif | ||
91 | | | ||
92 | |// Stack layout while in interpreter. Must match with lj_frame.h. | ||
93 | |.if FPU // MIPS64 hard-float. | ||
94 | | | ||
95 | |.define CFRAME_SPACE, 192 // Delta for sp. | ||
96 | | | ||
97 | |//----- 16 byte aligned, <-- sp entering interpreter | ||
98 | |.define SAVE_ERRF, 188(sp) // 32 bit values. | ||
99 | |.define SAVE_NRES, 184(sp) | ||
100 | |.define SAVE_CFRAME, 176(sp) // 64 bit values. | ||
101 | |.define SAVE_L, 168(sp) | ||
102 | |.define SAVE_PC, 160(sp) | ||
103 | |//----- 16 byte aligned | ||
104 | |.define SAVE_GPR_, 80 // .. 80+10*8: 64 bit GPR saves. | ||
105 | |.define SAVE_FPR_, 16 // .. 16+8*8: 64 bit FPR saves. | ||
106 | | | ||
107 | |.else // MIPS64 soft-float | ||
108 | | | ||
109 | |.define CFRAME_SPACE, 128 // Delta for sp. | ||
110 | | | ||
111 | |//----- 16 byte aligned, <-- sp entering interpreter | ||
112 | |.define SAVE_ERRF, 124(sp) // 32 bit values. | ||
113 | |.define SAVE_NRES, 120(sp) | ||
114 | |.define SAVE_CFRAME, 112(sp) // 64 bit values. | ||
115 | |.define SAVE_L, 104(sp) | ||
116 | |.define SAVE_PC, 96(sp) | ||
117 | |//----- 16 byte aligned | ||
118 | |.define SAVE_GPR_, 16 // .. 16+10*8: 64 bit GPR saves. | ||
119 | | | ||
120 | |.endif | ||
121 | | | ||
122 | |.define TMPX, 8(sp) // Unused by interpreter, temp for JIT code. | ||
123 | |.define TMPD, 0(sp) | ||
124 | |//----- 16 byte aligned | ||
125 | | | ||
126 | |.define TMPD_OFS, 0 | ||
127 | | | ||
128 | |.define SAVE_MULTRES, TMPD | ||
129 | | | ||
130 | |//----------------------------------------------------------------------- | ||
131 | | | ||
132 | |.macro saveregs | ||
133 | | daddiu sp, sp, -CFRAME_SPACE | ||
134 | | sd ra, SAVE_GPR_+9*8(sp) | ||
135 | | sd r30, SAVE_GPR_+8*8(sp) | ||
136 | | .FPU sdc1 f31, SAVE_FPR_+7*8(sp) | ||
137 | | sd r23, SAVE_GPR_+7*8(sp) | ||
138 | | .FPU sdc1 f30, SAVE_FPR_+6*8(sp) | ||
139 | | sd r22, SAVE_GPR_+6*8(sp) | ||
140 | | .FPU sdc1 f29, SAVE_FPR_+5*8(sp) | ||
141 | | sd r21, SAVE_GPR_+5*8(sp) | ||
142 | | .FPU sdc1 f28, SAVE_FPR_+4*8(sp) | ||
143 | | sd r20, SAVE_GPR_+4*8(sp) | ||
144 | | .FPU sdc1 f27, SAVE_FPR_+3*8(sp) | ||
145 | | sd r19, SAVE_GPR_+3*8(sp) | ||
146 | | .FPU sdc1 f26, SAVE_FPR_+2*8(sp) | ||
147 | | sd r18, SAVE_GPR_+2*8(sp) | ||
148 | | .FPU sdc1 f25, SAVE_FPR_+1*8(sp) | ||
149 | | sd r17, SAVE_GPR_+1*8(sp) | ||
150 | | .FPU sdc1 f24, SAVE_FPR_+0*8(sp) | ||
151 | | sd r16, SAVE_GPR_+0*8(sp) | ||
152 | |.endmacro | ||
153 | | | ||
154 | |.macro restoreregs_ret | ||
155 | | ld ra, SAVE_GPR_+9*8(sp) | ||
156 | | ld r30, SAVE_GPR_+8*8(sp) | ||
157 | | ld r23, SAVE_GPR_+7*8(sp) | ||
158 | | .FPU ldc1 f31, SAVE_FPR_+7*8(sp) | ||
159 | | ld r22, SAVE_GPR_+6*8(sp) | ||
160 | | .FPU ldc1 f30, SAVE_FPR_+6*8(sp) | ||
161 | | ld r21, SAVE_GPR_+5*8(sp) | ||
162 | | .FPU ldc1 f29, SAVE_FPR_+5*8(sp) | ||
163 | | ld r20, SAVE_GPR_+4*8(sp) | ||
164 | | .FPU ldc1 f28, SAVE_FPR_+4*8(sp) | ||
165 | | ld r19, SAVE_GPR_+3*8(sp) | ||
166 | | .FPU ldc1 f27, SAVE_FPR_+3*8(sp) | ||
167 | | ld r18, SAVE_GPR_+2*8(sp) | ||
168 | | .FPU ldc1 f26, SAVE_FPR_+2*8(sp) | ||
169 | | ld r17, SAVE_GPR_+1*8(sp) | ||
170 | | .FPU ldc1 f25, SAVE_FPR_+1*8(sp) | ||
171 | | ld r16, SAVE_GPR_+0*8(sp) | ||
172 | | .FPU ldc1 f24, SAVE_FPR_+0*8(sp) | ||
173 | | jr ra | ||
174 | | daddiu sp, sp, CFRAME_SPACE | ||
175 | |.endmacro | ||
176 | | | ||
177 | |// Type definitions. Some of these are only used for documentation. | ||
178 | |.type L, lua_State, LREG | ||
179 | |.type GL, global_State | ||
180 | |.type TVALUE, TValue | ||
181 | |.type GCOBJ, GCobj | ||
182 | |.type STR, GCstr | ||
183 | |.type TAB, GCtab | ||
184 | |.type LFUNC, GCfuncL | ||
185 | |.type CFUNC, GCfuncC | ||
186 | |.type PROTO, GCproto | ||
187 | |.type UPVAL, GCupval | ||
188 | |.type NODE, Node | ||
189 | |.type NARGS8, int | ||
190 | |.type TRACE, GCtrace | ||
191 | |.type SBUF, SBuf | ||
192 | | | ||
193 | |//----------------------------------------------------------------------- | ||
194 | | | ||
195 | |// Trap for not-yet-implemented parts. | ||
196 | |.macro NYI; .long 0xf0f0f0f0; .endmacro | ||
197 | | | ||
198 | |// Macros to mark delay slots. | ||
199 | |.macro ., a; a; .endmacro | ||
200 | |.macro ., a,b; a,b; .endmacro | ||
201 | |.macro ., a,b,c; a,b,c; .endmacro | ||
202 | |.macro ., a,b,c,d; a,b,c,d; .endmacro | ||
203 | | | ||
204 | |.define FRAME_PC, -8 | ||
205 | |.define FRAME_FUNC, -16 | ||
206 | | | ||
207 | |//----------------------------------------------------------------------- | ||
208 | | | ||
209 | |// Endian-specific defines. | ||
210 | |.if ENDIAN_LE | ||
211 | |.define HI, 4 | ||
212 | |.define LO, 0 | ||
213 | |.define OFS_RD, 2 | ||
214 | |.define OFS_RA, 1 | ||
215 | |.define OFS_OP, 0 | ||
216 | |.else | ||
217 | |.define HI, 0 | ||
218 | |.define LO, 4 | ||
219 | |.define OFS_RD, 0 | ||
220 | |.define OFS_RA, 2 | ||
221 | |.define OFS_OP, 3 | ||
222 | |.endif | ||
223 | | | ||
224 | |// Instruction decode. | ||
225 | |.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro | ||
226 | |.macro decode_OP8a, dst, ins; andi dst, ins, 0xff; .endmacro | ||
227 | |.macro decode_OP8b, dst; sll dst, dst, 3; .endmacro | ||
228 | |.macro decode_RC8a, dst, ins; srl dst, ins, 13; .endmacro | ||
229 | |.macro decode_RC8b, dst; andi dst, dst, 0x7f8; .endmacro | ||
230 | |.macro decode_RD4b, dst; sll dst, dst, 2; .endmacro | ||
231 | |.macro decode_RA8a, dst, ins; srl dst, ins, 5; .endmacro | ||
232 | |.macro decode_RA8b, dst; andi dst, dst, 0x7f8; .endmacro | ||
233 | |.macro decode_RB8a, dst, ins; srl dst, ins, 21; .endmacro | ||
234 | |.macro decode_RB8b, dst; andi dst, dst, 0x7f8; .endmacro | ||
235 | |.macro decode_RD8a, dst, ins; srl dst, ins, 16; .endmacro | ||
236 | |.macro decode_RD8b, dst; sll dst, dst, 3; .endmacro | ||
237 | |.macro decode_RDtoRC8, dst, src; andi dst, src, 0x7f8; .endmacro | ||
238 | | | ||
239 | |// Instruction fetch. | ||
240 | |.macro ins_NEXT1 | ||
241 | | lw INS, 0(PC) | ||
242 | | daddiu PC, PC, 4 | ||
243 | |.endmacro | ||
244 | |// Instruction decode+dispatch. | ||
245 | |.macro ins_NEXT2 | ||
246 | | decode_OP8a TMP1, INS | ||
247 | | decode_OP8b TMP1 | ||
248 | | daddu TMP0, DISPATCH, TMP1 | ||
249 | | decode_RD8a RD, INS | ||
250 | | ld AT, 0(TMP0) | ||
251 | | decode_RA8a RA, INS | ||
252 | | decode_RD8b RD | ||
253 | | jr AT | ||
254 | | decode_RA8b RA | ||
255 | |.endmacro | ||
256 | |.macro ins_NEXT | ||
257 | | ins_NEXT1 | ||
258 | | ins_NEXT2 | ||
259 | |.endmacro | ||
260 | | | ||
261 | |// Instruction footer. | ||
262 | |.if 1 | ||
263 | | // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. | ||
264 | | .define ins_next, ins_NEXT | ||
265 | | .define ins_next_, ins_NEXT | ||
266 | | .define ins_next1, ins_NEXT1 | ||
267 | | .define ins_next2, ins_NEXT2 | ||
268 | |.else | ||
269 | | // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. | ||
270 | | // Affects only certain kinds of benchmarks (and only with -j off). | ||
271 | | .macro ins_next | ||
272 | | b ->ins_next | ||
273 | | .endmacro | ||
274 | | .macro ins_next1 | ||
275 | | .endmacro | ||
276 | | .macro ins_next2 | ||
277 | | b ->ins_next | ||
278 | | .endmacro | ||
279 | | .macro ins_next_ | ||
280 | | ->ins_next: | ||
281 | | ins_NEXT | ||
282 | | .endmacro | ||
283 | |.endif | ||
284 | | | ||
285 | |// Call decode and dispatch. | ||
286 | |.macro ins_callt | ||
287 | | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC | ||
288 | | ld PC, LFUNC:RB->pc | ||
289 | | lw INS, 0(PC) | ||
290 | | daddiu PC, PC, 4 | ||
291 | | decode_OP8a TMP1, INS | ||
292 | | decode_RA8a RA, INS | ||
293 | | decode_OP8b TMP1 | ||
294 | | decode_RA8b RA | ||
295 | | daddu TMP0, DISPATCH, TMP1 | ||
296 | | ld TMP0, 0(TMP0) | ||
297 | | jr TMP0 | ||
298 | | daddu RA, RA, BASE | ||
299 | |.endmacro | ||
300 | | | ||
301 | |.macro ins_call | ||
302 | | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC | ||
303 | | sd PC, FRAME_PC(BASE) | ||
304 | | ins_callt | ||
305 | |.endmacro | ||
306 | | | ||
307 | |//----------------------------------------------------------------------- | ||
308 | | | ||
309 | |.macro branch_RD | ||
310 | | srl TMP0, RD, 1 | ||
311 | | lui AT, (-(BCBIAS_J*4 >> 16) & 65535) | ||
312 | | addu TMP0, TMP0, AT | ||
313 | | daddu PC, PC, TMP0 | ||
314 | |.endmacro | ||
315 | | | ||
316 | |// Assumes DISPATCH is relative to GL. | ||
317 | #define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) | ||
318 | #define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) | ||
319 | #define GG_DISP2GOT (GG_OFS(got) - GG_OFS(dispatch)) | ||
320 | #define DISPATCH_GOT(name) (GG_DISP2GOT + sizeof(void*)*LJ_GOT_##name) | ||
321 | | | ||
322 | #define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) | ||
323 | | | ||
324 | |.macro load_got, func | ||
325 | | ld CFUNCADDR, DISPATCH_GOT(func)(DISPATCH) | ||
326 | |.endmacro | ||
327 | |// Much faster. Sadly, there's no easy way to force the required code layout. | ||
328 | |// .macro call_intern, func; bal extern func; .endmacro | ||
329 | |.macro call_intern, func; jalr CFUNCADDR; .endmacro | ||
330 | |.macro call_extern; jalr CFUNCADDR; .endmacro | ||
331 | |.macro jmp_extern; jr CFUNCADDR; .endmacro | ||
332 | | | ||
333 | |.macro hotcheck, delta, target | ||
334 | | dsrl TMP1, PC, 1 | ||
335 | | andi TMP1, TMP1, 126 | ||
336 | | daddu TMP1, TMP1, DISPATCH | ||
337 | | lhu TMP2, GG_DISP2HOT(TMP1) | ||
338 | | addiu TMP2, TMP2, -delta | ||
339 | | bltz TMP2, target | ||
340 | |. sh TMP2, GG_DISP2HOT(TMP1) | ||
341 | |.endmacro | ||
342 | | | ||
343 | |.macro hotloop | ||
344 | | hotcheck HOTCOUNT_LOOP, ->vm_hotloop | ||
345 | |.endmacro | ||
346 | | | ||
347 | |.macro hotcall | ||
348 | | hotcheck HOTCOUNT_CALL, ->vm_hotcall | ||
349 | |.endmacro | ||
350 | | | ||
351 | |// Set current VM state. Uses TMP0. | ||
352 | |.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro | ||
353 | |.macro st_vmstate; sw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro | ||
354 | | | ||
355 | |// Move table write barrier back. Overwrites mark and tmp. | ||
356 | |.macro barrierback, tab, mark, tmp, target | ||
357 | | ld tmp, DISPATCH_GL(gc.grayagain)(DISPATCH) | ||
358 | | andi mark, mark, ~LJ_GC_BLACK & 255 // black2gray(tab) | ||
359 | | sd tab, DISPATCH_GL(gc.grayagain)(DISPATCH) | ||
360 | | sb mark, tab->marked | ||
361 | | b target | ||
362 | |. sd tmp, tab->gclist | ||
363 | |.endmacro | ||
364 | | | ||
365 | |// Clear type tag. Isolate lowest 14+32+1=47 bits of reg. | ||
366 | |.macro cleartp, reg; dextm reg, reg, 0, 14; .endmacro | ||
367 | |.macro cleartp, dst, reg; dextm dst, reg, 0, 14; .endmacro | ||
368 | | | ||
369 | |// Set type tag: Merge 17 type bits into bits [15+32=47, 31+32+1=64) of dst. | ||
370 | |.macro settp, dst, tp; dinsu dst, tp, 15, 31; .endmacro | ||
371 | | | ||
372 | |// Extract (negative) type tag. | ||
373 | |.macro gettp, dst, src; dsra dst, src, 47; .endmacro | ||
374 | | | ||
375 | |// Macros to check the TValue type and extract the GCobj. Branch on failure. | ||
376 | |.macro checktp, reg, tp, target | ||
377 | | gettp AT, reg | ||
378 | | daddiu AT, AT, tp | ||
379 | | bnez AT, target | ||
380 | |. cleartp reg | ||
381 | |.endmacro | ||
382 | |.macro checktp, dst, reg, tp, target | ||
383 | | gettp AT, reg | ||
384 | | daddiu AT, AT, tp | ||
385 | | bnez AT, target | ||
386 | |. cleartp dst, reg | ||
387 | |.endmacro | ||
388 | |.macro checkstr, reg, target; checktp reg, -LJ_TSTR, target; .endmacro | ||
389 | |.macro checktab, reg, target; checktp reg, -LJ_TTAB, target; .endmacro | ||
390 | |.macro checkfunc, reg, target; checktp reg, -LJ_TFUNC, target; .endmacro | ||
391 | |.macro checkint, reg, target // Caveat: has delay slot! | ||
392 | | gettp AT, reg | ||
393 | | bne AT, TISNUM, target | ||
394 | |.endmacro | ||
395 | |.macro checknum, reg, target // Caveat: has delay slot! | ||
396 | | gettp AT, reg | ||
397 | | sltiu AT, AT, LJ_TISNUM | ||
398 | | beqz AT, target | ||
399 | |.endmacro | ||
400 | | | ||
401 | |.macro mov_false, reg | ||
402 | | lu reg, 0x8000 | ||
403 | | dsll reg, reg, 32 | ||
404 | | not reg, reg | ||
405 | |.endmacro | ||
406 | |.macro mov_true, reg | ||
407 | | li reg, 0x0001 | ||
408 | | dsll reg, reg, 48 | ||
409 | | not reg, reg | ||
410 | |.endmacro | ||
411 | | | ||
412 | |//----------------------------------------------------------------------- | ||
413 | |||
414 | /* Generate subroutines used by opcodes and other parts of the VM. */ | ||
415 | /* The .code_sub section should be last to help static branch prediction. */ | ||
416 | static void build_subroutines(BuildCtx *ctx) | ||
417 | { | ||
418 | |.code_sub | ||
419 | | | ||
420 | |//----------------------------------------------------------------------- | ||
421 | |//-- Return handling ---------------------------------------------------- | ||
422 | |//----------------------------------------------------------------------- | ||
423 | | | ||
424 | |->vm_returnp: | ||
425 | | // See vm_return. Also: TMP2 = previous base. | ||
426 | | andi AT, PC, FRAME_P | ||
427 | | beqz AT, ->cont_dispatch | ||
428 | | | ||
429 | | // Return from pcall or xpcall fast func. | ||
430 | |. mov_true TMP1 | ||
431 | | ld PC, FRAME_PC(TMP2) // Fetch PC of previous frame. | ||
432 | | move BASE, TMP2 // Restore caller base. | ||
433 | | // Prepending may overwrite the pcall frame, so do it at the end. | ||
434 | | sd TMP1, -8(RA) // Prepend true to results. | ||
435 | | daddiu RA, RA, -8 | ||
436 | | | ||
437 | |->vm_returnc: | ||
438 | | addiu RD, RD, 8 // RD = (nresults+1)*8. | ||
439 | | andi TMP0, PC, FRAME_TYPE | ||
440 | | beqz RD, ->vm_unwind_c_eh | ||
441 | |. li CRET1, LUA_YIELD | ||
442 | | beqz TMP0, ->BC_RET_Z // Handle regular return to Lua. | ||
443 | |. move MULTRES, RD | ||
444 | | | ||
445 | |->vm_return: | ||
446 | | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return | ||
447 | | // TMP0 = PC & FRAME_TYPE | ||
448 | | li TMP2, -8 | ||
449 | | xori AT, TMP0, FRAME_C | ||
450 | | and TMP2, PC, TMP2 | ||
451 | | bnez AT, ->vm_returnp | ||
452 | | dsubu TMP2, BASE, TMP2 // TMP2 = previous base. | ||
453 | | | ||
454 | | addiu TMP1, RD, -8 | ||
455 | | sd TMP2, L->base | ||
456 | | li_vmstate C | ||
457 | | lw TMP2, SAVE_NRES | ||
458 | | daddiu BASE, BASE, -16 | ||
459 | | st_vmstate | ||
460 | | beqz TMP1, >2 | ||
461 | |. sll TMP2, TMP2, 3 | ||
462 | |1: | ||
463 | | addiu TMP1, TMP1, -8 | ||
464 | | ld CRET1, 0(RA) | ||
465 | | daddiu RA, RA, 8 | ||
466 | | sd CRET1, 0(BASE) | ||
467 | | bnez TMP1, <1 | ||
468 | |. daddiu BASE, BASE, 8 | ||
469 | | | ||
470 | |2: | ||
471 | | bne TMP2, RD, >6 | ||
472 | |3: | ||
473 | |. sd BASE, L->top // Store new top. | ||
474 | | | ||
475 | |->vm_leave_cp: | ||
476 | | ld TMP0, SAVE_CFRAME // Restore previous C frame. | ||
477 | | move CRET1, r0 // Ok return status for vm_pcall. | ||
478 | | sd TMP0, L->cframe | ||
479 | | | ||
480 | |->vm_leave_unw: | ||
481 | | restoreregs_ret | ||
482 | | | ||
483 | |6: | ||
484 | | ld TMP1, L->maxstack | ||
485 | | slt AT, TMP2, RD | ||
486 | | bnez AT, >7 // Less results wanted? | ||
487 | | // More results wanted. Check stack size and fill up results with nil. | ||
488 | |. slt AT, BASE, TMP1 | ||
489 | | beqz AT, >8 | ||
490 | |. nop | ||
491 | | sd TISNIL, 0(BASE) | ||
492 | | addiu RD, RD, 8 | ||
493 | | b <2 | ||
494 | |. daddiu BASE, BASE, 8 | ||
495 | | | ||
496 | |7: // Less results wanted. | ||
497 | | subu TMP0, RD, TMP2 | ||
498 | | dsubu TMP0, BASE, TMP0 // Either keep top or shrink it. | ||
499 | |.if MIPSR6 | ||
500 | | selnez TMP0, TMP0, TMP2 // LUA_MULTRET+1 case? | ||
501 | | seleqz BASE, BASE, TMP2 | ||
502 | | b <3 | ||
503 | |. or BASE, BASE, TMP0 | ||
504 | |.else | ||
505 | | b <3 | ||
506 | |. movn BASE, TMP0, TMP2 // LUA_MULTRET+1 case? | ||
507 | |.endif | ||
508 | | | ||
509 | |8: // Corner case: need to grow stack for filling up results. | ||
510 | | // This can happen if: | ||
511 | | // - A C function grows the stack (a lot). | ||
512 | | // - The GC shrinks the stack in between. | ||
513 | | // - A return back from a lua_call() with (high) nresults adjustment. | ||
514 | | load_got lj_state_growstack | ||
515 | | move MULTRES, RD | ||
516 | | srl CARG2, TMP2, 3 | ||
517 | | call_intern lj_state_growstack // (lua_State *L, int n) | ||
518 | |. move CARG1, L | ||
519 | | lw TMP2, SAVE_NRES | ||
520 | | ld BASE, L->top // Need the (realloced) L->top in BASE. | ||
521 | | move RD, MULTRES | ||
522 | | b <2 | ||
523 | |. sll TMP2, TMP2, 3 | ||
524 | | | ||
525 | |->vm_unwind_c: // Unwind C stack, return from vm_pcall. | ||
526 | | // (void *cframe, int errcode) | ||
527 | | move sp, CARG1 | ||
528 | | move CRET1, CARG2 | ||
529 | |->vm_unwind_c_eh: // Landing pad for external unwinder. | ||
530 | | ld L, SAVE_L | ||
531 | | li TMP0, ~LJ_VMST_C | ||
532 | | ld GL:TMP1, L->glref | ||
533 | | b ->vm_leave_unw | ||
534 | |. sw TMP0, GL:TMP1->vmstate | ||
535 | | | ||
536 | |->vm_unwind_ff: // Unwind C stack, return from ff pcall. | ||
537 | | // (void *cframe) | ||
538 | | li AT, -4 | ||
539 | | and sp, CARG1, AT | ||
540 | |->vm_unwind_ff_eh: // Landing pad for external unwinder. | ||
541 | | ld L, SAVE_L | ||
542 | | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | ||
543 | | li TISNIL, LJ_TNIL | ||
544 | | li TISNUM, LJ_TISNUM | ||
545 | | ld BASE, L->base | ||
546 | | ld DISPATCH, L->glref // Setup pointer to dispatch table. | ||
547 | | .FPU mtc1 TMP3, TOBIT | ||
548 | | mov_false TMP1 | ||
549 | | li_vmstate INTERP | ||
550 | | ld PC, FRAME_PC(BASE) // Fetch PC of previous frame. | ||
551 | | .FPU cvt.d.s TOBIT, TOBIT | ||
552 | | daddiu RA, BASE, -8 // Results start at BASE-8. | ||
553 | | daddiu DISPATCH, DISPATCH, GG_G2DISP | ||
554 | | sd TMP1, 0(RA) // Prepend false to error message. | ||
555 | | st_vmstate | ||
556 | | b ->vm_returnc | ||
557 | |. li RD, 16 // 2 results: false + error message. | ||
558 | | | ||
559 | |//----------------------------------------------------------------------- | ||
560 | |//-- Grow stack for calls ----------------------------------------------- | ||
561 | |//----------------------------------------------------------------------- | ||
562 | | | ||
563 | |->vm_growstack_c: // Grow stack for C function. | ||
564 | | b >2 | ||
565 | |. li CARG2, LUA_MINSTACK | ||
566 | | | ||
567 | |->vm_growstack_l: // Grow stack for Lua function. | ||
568 | | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC | ||
569 | | daddu RC, BASE, RC | ||
570 | | dsubu RA, RA, BASE | ||
571 | | sd BASE, L->base | ||
572 | | daddiu PC, PC, 4 // Must point after first instruction. | ||
573 | | sd RC, L->top | ||
574 | | srl CARG2, RA, 3 | ||
575 | |2: | ||
576 | | // L->base = new base, L->top = top | ||
577 | | load_got lj_state_growstack | ||
578 | | sd PC, SAVE_PC | ||
579 | | call_intern lj_state_growstack // (lua_State *L, int n) | ||
580 | |. move CARG1, L | ||
581 | | ld BASE, L->base | ||
582 | | ld RC, L->top | ||
583 | | ld LFUNC:RB, FRAME_FUNC(BASE) | ||
584 | | dsubu RC, RC, BASE | ||
585 | | cleartp LFUNC:RB | ||
586 | | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC | ||
587 | | ins_callt // Just retry the call. | ||
588 | | | ||
589 | |//----------------------------------------------------------------------- | ||
590 | |//-- Entry points into the assembler VM --------------------------------- | ||
591 | |//----------------------------------------------------------------------- | ||
592 | | | ||
593 | |->vm_resume: // Setup C frame and resume thread. | ||
594 | | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) | ||
595 | | saveregs | ||
596 | | move L, CARG1 | ||
597 | | ld DISPATCH, L->glref // Setup pointer to dispatch table. | ||
598 | | move BASE, CARG2 | ||
599 | | lbu TMP1, L->status | ||
600 | | sd L, SAVE_L | ||
601 | | li PC, FRAME_CP | ||
602 | | daddiu TMP0, sp, CFRAME_RESUME | ||
603 | | daddiu DISPATCH, DISPATCH, GG_G2DISP | ||
604 | | sw r0, SAVE_NRES | ||
605 | | sw r0, SAVE_ERRF | ||
606 | | sd CARG1, SAVE_PC // Any value outside of bytecode is ok. | ||
607 | | sd r0, SAVE_CFRAME | ||
608 | | beqz TMP1, >3 | ||
609 | |. sd TMP0, L->cframe | ||
610 | | | ||
611 | | // Resume after yield (like a return). | ||
612 | | sd L, DISPATCH_GL(cur_L)(DISPATCH) | ||
613 | | move RA, BASE | ||
614 | | ld BASE, L->base | ||
615 | | ld TMP1, L->top | ||
616 | | ld PC, FRAME_PC(BASE) | ||
617 | | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | ||
618 | | dsubu RD, TMP1, BASE | ||
619 | | .FPU mtc1 TMP3, TOBIT | ||
620 | | sb r0, L->status | ||
621 | | .FPU cvt.d.s TOBIT, TOBIT | ||
622 | | li_vmstate INTERP | ||
623 | | daddiu RD, RD, 8 | ||
624 | | st_vmstate | ||
625 | | move MULTRES, RD | ||
626 | | andi TMP0, PC, FRAME_TYPE | ||
627 | | li TISNIL, LJ_TNIL | ||
628 | | beqz TMP0, ->BC_RET_Z | ||
629 | |. li TISNUM, LJ_TISNUM | ||
630 | | b ->vm_return | ||
631 | |. nop | ||
632 | | | ||
633 | |->vm_pcall: // Setup protected C frame and enter VM. | ||
634 | | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) | ||
635 | | saveregs | ||
636 | | sw CARG4, SAVE_ERRF | ||
637 | | b >1 | ||
638 | |. li PC, FRAME_CP | ||
639 | | | ||
640 | |->vm_call: // Setup C frame and enter VM. | ||
641 | | // (lua_State *L, TValue *base, int nres1) | ||
642 | | saveregs | ||
643 | | li PC, FRAME_C | ||
644 | | | ||
645 | |1: // Entry point for vm_pcall above (PC = ftype). | ||
646 | | ld TMP1, L:CARG1->cframe | ||
647 | | move L, CARG1 | ||
648 | | sw CARG3, SAVE_NRES | ||
649 | | ld DISPATCH, L->glref // Setup pointer to dispatch table. | ||
650 | | sd CARG1, SAVE_L | ||
651 | | move BASE, CARG2 | ||
652 | | daddiu DISPATCH, DISPATCH, GG_G2DISP | ||
653 | | sd CARG1, SAVE_PC // Any value outside of bytecode is ok. | ||
654 | | sd TMP1, SAVE_CFRAME | ||
655 | | sd sp, L->cframe // Add our C frame to cframe chain. | ||
656 | | | ||
657 | |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). | ||
658 | | sd L, DISPATCH_GL(cur_L)(DISPATCH) | ||
659 | | ld TMP2, L->base // TMP2 = old base (used in vmeta_call). | ||
660 | | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | ||
661 | | ld TMP1, L->top | ||
662 | | .FPU mtc1 TMP3, TOBIT | ||
663 | | daddu PC, PC, BASE | ||
664 | | dsubu NARGS8:RC, TMP1, BASE | ||
665 | | li TISNUM, LJ_TISNUM | ||
666 | | dsubu PC, PC, TMP2 // PC = frame delta + frame type | ||
667 | | .FPU cvt.d.s TOBIT, TOBIT | ||
668 | | li_vmstate INTERP | ||
669 | | li TISNIL, LJ_TNIL | ||
670 | | st_vmstate | ||
671 | | | ||
672 | |->vm_call_dispatch: | ||
673 | | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC | ||
674 | | ld LFUNC:RB, FRAME_FUNC(BASE) | ||
675 | | checkfunc LFUNC:RB, ->vmeta_call | ||
676 | | | ||
677 | |->vm_call_dispatch_f: | ||
678 | | ins_call | ||
679 | | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC | ||
680 | | | ||
681 | |->vm_cpcall: // Setup protected C frame, call C. | ||
682 | | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) | ||
683 | | saveregs | ||
684 | | move L, CARG1 | ||
685 | | ld TMP0, L:CARG1->stack | ||
686 | | sd CARG1, SAVE_L | ||
687 | | ld TMP1, L->top | ||
688 | | ld DISPATCH, L->glref // Setup pointer to dispatch table. | ||
689 | | sd CARG1, SAVE_PC // Any value outside of bytecode is ok. | ||
690 | | dsubu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). | ||
691 | | ld TMP1, L->cframe | ||
692 | | daddiu DISPATCH, DISPATCH, GG_G2DISP | ||
693 | | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. | ||
694 | | sw r0, SAVE_ERRF // No error function. | ||
695 | | sd TMP1, SAVE_CFRAME | ||
696 | | sd sp, L->cframe // Add our C frame to cframe chain. | ||
697 | | sd L, DISPATCH_GL(cur_L)(DISPATCH) | ||
698 | | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud) | ||
699 | |. move CFUNCADDR, CARG4 | ||
700 | | move BASE, CRET1 | ||
701 | | bnez CRET1, <3 // Else continue with the call. | ||
702 | |. li PC, FRAME_CP | ||
703 | | b ->vm_leave_cp // No base? Just remove C frame. | ||
704 | |. nop | ||
705 | | | ||
706 | |//----------------------------------------------------------------------- | ||
707 | |//-- Metamethod handling ------------------------------------------------ | ||
708 | |//----------------------------------------------------------------------- | ||
709 | | | ||
710 | |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the | ||
711 | |// stack, so BASE doesn't need to be reloaded across these calls. | ||
712 | | | ||
713 | |//-- Continuation dispatch ---------------------------------------------- | ||
714 | | | ||
715 | |->cont_dispatch: | ||
716 | | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8 | ||
717 | | ld TMP0, -32(BASE) // Continuation. | ||
718 | | move RB, BASE | ||
719 | | move BASE, TMP2 // Restore caller BASE. | ||
720 | | ld LFUNC:TMP1, FRAME_FUNC(TMP2) | ||
721 | |.if FFI | ||
722 | | sltiu AT, TMP0, 2 | ||
723 | |.endif | ||
724 | | ld PC, -24(RB) // Restore PC from [cont|PC]. | ||
725 | | cleartp LFUNC:TMP1 | ||
726 | | daddu TMP2, RA, RD | ||
727 | | ld TMP1, LFUNC:TMP1->pc | ||
728 | |.if FFI | ||
729 | | bnez AT, >1 | ||
730 | |.endif | ||
731 | |. sd TISNIL, -8(TMP2) // Ensure one valid arg. | ||
732 | | // BASE = base, RA = resultptr, RB = meta base | ||
733 | | jr TMP0 // Jump to continuation. | ||
734 | |. ld KBASE, PC2PROTO(k)(TMP1) | ||
735 | | | ||
736 | |.if FFI | ||
737 | |1: | ||
738 | | bnez TMP0, ->cont_ffi_callback // cont = 1: return from FFI callback. | ||
739 | | // cont = 0: tailcall from C function. | ||
740 | |. daddiu TMP1, RB, -32 | ||
741 | | b ->vm_call_tail | ||
742 | |. dsubu RC, TMP1, BASE | ||
743 | |.endif | ||
744 | | | ||
745 | |->cont_cat: // RA = resultptr, RB = meta base | ||
746 | | lw INS, -4(PC) | ||
747 | | daddiu CARG2, RB, -32 | ||
748 | | ld CRET1, 0(RA) | ||
749 | | decode_RB8a MULTRES, INS | ||
750 | | decode_RA8a RA, INS | ||
751 | | decode_RB8b MULTRES | ||
752 | | decode_RA8b RA | ||
753 | | daddu TMP1, BASE, MULTRES | ||
754 | | sd BASE, L->base | ||
755 | | dsubu CARG3, CARG2, TMP1 | ||
756 | | bne TMP1, CARG2, ->BC_CAT_Z | ||
757 | |. sd CRET1, 0(CARG2) | ||
758 | | daddu RA, BASE, RA | ||
759 | | b ->cont_nop | ||
760 | |. sd CRET1, 0(RA) | ||
761 | | | ||
762 | |//-- Table indexing metamethods ----------------------------------------- | ||
763 | | | ||
764 | |->vmeta_tgets1: | ||
765 | | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) | ||
766 | | li TMP0, LJ_TSTR | ||
767 | | settp STR:RC, TMP0 | ||
768 | | b >1 | ||
769 | |. sd STR:RC, 0(CARG3) | ||
770 | | | ||
771 | |->vmeta_tgets: | ||
772 | | daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv) | ||
773 | | li TMP0, LJ_TTAB | ||
774 | | li TMP1, LJ_TSTR | ||
775 | | settp TAB:RB, TMP0 | ||
776 | | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv2) | ||
777 | | sd TAB:RB, 0(CARG2) | ||
778 | | settp STR:RC, TMP1 | ||
779 | | b >1 | ||
780 | |. sd STR:RC, 0(CARG3) | ||
781 | | | ||
782 | |->vmeta_tgetb: // TMP0 = index | ||
783 | | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) | ||
784 | | settp TMP0, TISNUM | ||
785 | | sd TMP0, 0(CARG3) | ||
786 | | | ||
787 | |->vmeta_tgetv: | ||
788 | |1: | ||
789 | | load_got lj_meta_tget | ||
790 | | sd BASE, L->base | ||
791 | | sd PC, SAVE_PC | ||
792 | | call_intern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) | ||
793 | |. move CARG1, L | ||
794 | | // Returns TValue * (finished) or NULL (metamethod). | ||
795 | | beqz CRET1, >3 | ||
796 | |. daddiu TMP1, BASE, -FRAME_CONT | ||
797 | | ld CARG1, 0(CRET1) | ||
798 | | ins_next1 | ||
799 | | sd CARG1, 0(RA) | ||
800 | | ins_next2 | ||
801 | | | ||
802 | |3: // Call __index metamethod. | ||
803 | | // BASE = base, L->top = new base, stack = cont/func/t/k | ||
804 | | ld BASE, L->top | ||
805 | | sd PC, -24(BASE) // [cont|PC] | ||
806 | | dsubu PC, BASE, TMP1 | ||
807 | | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. | ||
808 | | cleartp LFUNC:RB | ||
809 | | b ->vm_call_dispatch_f | ||
810 | |. li NARGS8:RC, 16 // 2 args for func(t, k). | ||
811 | | | ||
812 | |->vmeta_tgetr: | ||
813 | | load_got lj_tab_getinth | ||
814 | | call_intern lj_tab_getinth // (GCtab *t, int32_t key) | ||
815 | |. nop | ||
816 | | // Returns cTValue * or NULL. | ||
817 | | beqz CRET1, ->BC_TGETR_Z | ||
818 | |. move CARG2, TISNIL | ||
819 | | b ->BC_TGETR_Z | ||
820 | |. ld CARG2, 0(CRET1) | ||
821 | | | ||
822 | |//----------------------------------------------------------------------- | ||
823 | | | ||
824 | |->vmeta_tsets1: | ||
825 | | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) | ||
826 | | li TMP0, LJ_TSTR | ||
827 | | settp STR:RC, TMP0 | ||
828 | | b >1 | ||
829 | |. sd STR:RC, 0(CARG3) | ||
830 | | | ||
831 | |->vmeta_tsets: | ||
832 | | daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv) | ||
833 | | li TMP0, LJ_TTAB | ||
834 | | li TMP1, LJ_TSTR | ||
835 | | settp TAB:RB, TMP0 | ||
836 | | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv2) | ||
837 | | sd TAB:RB, 0(CARG2) | ||
838 | | settp STR:RC, TMP1 | ||
839 | | b >1 | ||
840 | |. sd STR:RC, 0(CARG3) | ||
841 | | | ||
842 | |->vmeta_tsetb: // TMP0 = index | ||
843 | | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) | ||
844 | | settp TMP0, TISNUM | ||
845 | | sd TMP0, 0(CARG3) | ||
846 | | | ||
847 | |->vmeta_tsetv: | ||
848 | |1: | ||
849 | | load_got lj_meta_tset | ||
850 | | sd BASE, L->base | ||
851 | | sd PC, SAVE_PC | ||
852 | | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) | ||
853 | |. move CARG1, L | ||
854 | | // Returns TValue * (finished) or NULL (metamethod). | ||
855 | | beqz CRET1, >3 | ||
856 | |. ld CARG1, 0(RA) | ||
857 | | // NOBARRIER: lj_meta_tset ensures the table is not black. | ||
858 | | ins_next1 | ||
859 | | sd CARG1, 0(CRET1) | ||
860 | | ins_next2 | ||
861 | | | ||
862 | |3: // Call __newindex metamethod. | ||
863 | | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) | ||
864 | | daddiu TMP1, BASE, -FRAME_CONT | ||
865 | | ld BASE, L->top | ||
866 | | sd PC, -24(BASE) // [cont|PC] | ||
867 | | dsubu PC, BASE, TMP1 | ||
868 | | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. | ||
869 | | cleartp LFUNC:RB | ||
870 | | sd CARG1, 16(BASE) // Copy value to third argument. | ||
871 | | b ->vm_call_dispatch_f | ||
872 | |. li NARGS8:RC, 24 // 3 args for func(t, k, v) | ||
873 | | | ||
874 | |->vmeta_tsetr: | ||
875 | | load_got lj_tab_setinth | ||
876 | | sd BASE, L->base | ||
877 | | sd PC, SAVE_PC | ||
878 | | call_intern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) | ||
879 | |. move CARG1, L | ||
880 | | // Returns TValue *. | ||
881 | | b ->BC_TSETR_Z | ||
882 | |. nop | ||
883 | | | ||
884 | |//-- Comparison metamethods --------------------------------------------- | ||
885 | | | ||
886 | |->vmeta_comp: | ||
887 | | // RA/RD point to o1/o2. | ||
888 | | move CARG2, RA | ||
889 | | move CARG3, RD | ||
890 | | load_got lj_meta_comp | ||
891 | | daddiu PC, PC, -4 | ||
892 | | sd BASE, L->base | ||
893 | | sd PC, SAVE_PC | ||
894 | | decode_OP1 CARG4, INS | ||
895 | | call_intern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) | ||
896 | |. move CARG1, L | ||
897 | | // Returns 0/1 or TValue * (metamethod). | ||
898 | |3: | ||
899 | | sltiu AT, CRET1, 2 | ||
900 | | beqz AT, ->vmeta_binop | ||
901 | | negu TMP2, CRET1 | ||
902 | |4: | ||
903 | | lhu RD, OFS_RD(PC) | ||
904 | | daddiu PC, PC, 4 | ||
905 | | lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535) | ||
906 | | sll RD, RD, 2 | ||
907 | | addu RD, RD, TMP1 | ||
908 | | and RD, RD, TMP2 | ||
909 | | daddu PC, PC, RD | ||
910 | |->cont_nop: | ||
911 | | ins_next | ||
912 | | | ||
913 | |->cont_ra: // RA = resultptr | ||
914 | | lbu TMP1, -4+OFS_RA(PC) | ||
915 | | ld CRET1, 0(RA) | ||
916 | | sll TMP1, TMP1, 3 | ||
917 | | daddu TMP1, BASE, TMP1 | ||
918 | | b ->cont_nop | ||
919 | |. sd CRET1, 0(TMP1) | ||
920 | | | ||
921 | |->cont_condt: // RA = resultptr | ||
922 | | ld TMP0, 0(RA) | ||
923 | | gettp TMP0, TMP0 | ||
924 | | sltiu AT, TMP0, LJ_TISTRUECOND | ||
925 | | b <4 | ||
926 | |. negu TMP2, AT // Branch if result is true. | ||
927 | | | ||
928 | |->cont_condf: // RA = resultptr | ||
929 | | ld TMP0, 0(RA) | ||
930 | | gettp TMP0, TMP0 | ||
931 | | sltiu AT, TMP0, LJ_TISTRUECOND | ||
932 | | b <4 | ||
933 | |. addiu TMP2, AT, -1 // Branch if result is false. | ||
934 | | | ||
935 | |->vmeta_equal: | ||
936 | | // CARG1/CARG2 point to o1/o2. TMP0 is set to 0/1. | ||
937 | | load_got lj_meta_equal | ||
938 | | cleartp LFUNC:CARG3, CARG2 | ||
939 | | cleartp LFUNC:CARG2, CARG1 | ||
940 | | move CARG4, TMP0 | ||
941 | | daddiu PC, PC, -4 | ||
942 | | sd BASE, L->base | ||
943 | | sd PC, SAVE_PC | ||
944 | | call_intern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) | ||
945 | |. move CARG1, L | ||
946 | | // Returns 0/1 or TValue * (metamethod). | ||
947 | | b <3 | ||
948 | |. nop | ||
949 | | | ||
950 | |->vmeta_equal_cd: | ||
951 | |.if FFI | ||
952 | | load_got lj_meta_equal_cd | ||
953 | | move CARG2, INS | ||
954 | | daddiu PC, PC, -4 | ||
955 | | sd BASE, L->base | ||
956 | | sd PC, SAVE_PC | ||
957 | | call_intern lj_meta_equal_cd // (lua_State *L, BCIns op) | ||
958 | |. move CARG1, L | ||
959 | | // Returns 0/1 or TValue * (metamethod). | ||
960 | | b <3 | ||
961 | |. nop | ||
962 | |.endif | ||
963 | | | ||
964 | |->vmeta_istype: | ||
965 | | load_got lj_meta_istype | ||
966 | | daddiu PC, PC, -4 | ||
967 | | sd BASE, L->base | ||
968 | | srl CARG2, RA, 3 | ||
969 | | srl CARG3, RD, 3 | ||
970 | | sd PC, SAVE_PC | ||
971 | | call_intern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) | ||
972 | |. move CARG1, L | ||
973 | | b ->cont_nop | ||
974 | |. nop | ||
975 | | | ||
976 | |//-- Arithmetic metamethods --------------------------------------------- | ||
977 | | | ||
978 | |->vmeta_unm: | ||
979 | | move RC, RB | ||
980 | | | ||
981 | |->vmeta_arith: | ||
982 | | load_got lj_meta_arith | ||
983 | | sd BASE, L->base | ||
984 | | move CARG2, RA | ||
985 | | sd PC, SAVE_PC | ||
986 | | move CARG3, RB | ||
987 | | move CARG4, RC | ||
988 | | decode_OP1 CARG5, INS // CARG5 == RB. | ||
989 | | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) | ||
990 | |. move CARG1, L | ||
991 | | // Returns NULL (finished) or TValue * (metamethod). | ||
992 | | beqz CRET1, ->cont_nop | ||
993 | |. nop | ||
994 | | | ||
995 | | // Call metamethod for binary op. | ||
996 | |->vmeta_binop: | ||
997 | | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 | ||
998 | | dsubu TMP1, CRET1, BASE | ||
999 | | sd PC, -24(CRET1) // [cont|PC] | ||
1000 | | move TMP2, BASE | ||
1001 | | daddiu PC, TMP1, FRAME_CONT | ||
1002 | | move BASE, CRET1 | ||
1003 | | b ->vm_call_dispatch | ||
1004 | |. li NARGS8:RC, 16 // 2 args for func(o1, o2). | ||
1005 | | | ||
1006 | |->vmeta_len: | ||
1007 | | // CARG2 already set by BC_LEN. | ||
1008 | #if LJ_52 | ||
1009 | | move MULTRES, CARG1 | ||
1010 | #endif | ||
1011 | | load_got lj_meta_len | ||
1012 | | sd BASE, L->base | ||
1013 | | sd PC, SAVE_PC | ||
1014 | | call_intern lj_meta_len // (lua_State *L, TValue *o) | ||
1015 | |. move CARG1, L | ||
1016 | | // Returns NULL (retry) or TValue * (metamethod base). | ||
1017 | #if LJ_52 | ||
1018 | | bnez CRET1, ->vmeta_binop // Binop call for compatibility. | ||
1019 | |. nop | ||
1020 | | b ->BC_LEN_Z | ||
1021 | |. move CARG1, MULTRES | ||
1022 | #else | ||
1023 | | b ->vmeta_binop // Binop call for compatibility. | ||
1024 | |. nop | ||
1025 | #endif | ||
1026 | | | ||
1027 | |//-- Call metamethod ---------------------------------------------------- | ||
1028 | | | ||
1029 | |->vmeta_call: // Resolve and call __call metamethod. | ||
1030 | | // TMP2 = old base, BASE = new base, RC = nargs*8 | ||
1031 | | load_got lj_meta_call | ||
1032 | | sd TMP2, L->base // This is the callers base! | ||
1033 | | daddiu CARG2, BASE, -16 | ||
1034 | | sd PC, SAVE_PC | ||
1035 | | daddu CARG3, BASE, RC | ||
1036 | | move MULTRES, NARGS8:RC | ||
1037 | | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top) | ||
1038 | |. move CARG1, L | ||
1039 | | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. | ||
1040 | | daddiu NARGS8:RC, MULTRES, 8 // Got one more argument now. | ||
1041 | | cleartp LFUNC:RB | ||
1042 | | ins_call | ||
1043 | | | ||
1044 | |->vmeta_callt: // Resolve __call for BC_CALLT. | ||
1045 | | // BASE = old base, RA = new base, RC = nargs*8 | ||
1046 | | load_got lj_meta_call | ||
1047 | | sd BASE, L->base | ||
1048 | | daddiu CARG2, RA, -16 | ||
1049 | | sd PC, SAVE_PC | ||
1050 | | daddu CARG3, RA, RC | ||
1051 | | move MULTRES, NARGS8:RC | ||
1052 | | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top) | ||
1053 | |. move CARG1, L | ||
1054 | | ld RB, FRAME_FUNC(RA) // Guaranteed to be a function here. | ||
1055 | | ld TMP1, FRAME_PC(BASE) | ||
1056 | | daddiu NARGS8:RC, MULTRES, 8 // Got one more argument now. | ||
1057 | | b ->BC_CALLT_Z | ||
1058 | |. cleartp LFUNC:CARG3, RB | ||
1059 | | | ||
1060 | |//-- Argument coercion for 'for' statement ------------------------------ | ||
1061 | | | ||
1062 | |->vmeta_for: | ||
1063 | | load_got lj_meta_for | ||
1064 | | sd BASE, L->base | ||
1065 | | move CARG2, RA | ||
1066 | | sd PC, SAVE_PC | ||
1067 | | move MULTRES, INS | ||
1068 | | call_intern lj_meta_for // (lua_State *L, TValue *base) | ||
1069 | |. move CARG1, L | ||
1070 | |.if JIT | ||
1071 | | decode_OP1 TMP0, MULTRES | ||
1072 | | li AT, BC_JFORI | ||
1073 | |.endif | ||
1074 | | decode_RA8a RA, MULTRES | ||
1075 | | decode_RD8a RD, MULTRES | ||
1076 | | decode_RA8b RA | ||
1077 | |.if JIT | ||
1078 | | beq TMP0, AT, =>BC_JFORI | ||
1079 | |. decode_RD8b RD | ||
1080 | | b =>BC_FORI | ||
1081 | |. nop | ||
1082 | |.else | ||
1083 | | b =>BC_FORI | ||
1084 | |. decode_RD8b RD | ||
1085 | |.endif | ||
1086 | | | ||
1087 | |//----------------------------------------------------------------------- | ||
1088 | |//-- Fast functions ----------------------------------------------------- | ||
1089 | |//----------------------------------------------------------------------- | ||
1090 | | | ||
1091 | |.macro .ffunc, name | ||
1092 | |->ff_ .. name: | ||
1093 | |.endmacro | ||
1094 | | | ||
1095 | |.macro .ffunc_1, name | ||
1096 | |->ff_ .. name: | ||
1097 | | beqz NARGS8:RC, ->fff_fallback | ||
1098 | |. ld CARG1, 0(BASE) | ||
1099 | |.endmacro | ||
1100 | | | ||
1101 | |.macro .ffunc_2, name | ||
1102 | |->ff_ .. name: | ||
1103 | | sltiu AT, NARGS8:RC, 16 | ||
1104 | | ld CARG1, 0(BASE) | ||
1105 | | bnez AT, ->fff_fallback | ||
1106 | |. ld CARG2, 8(BASE) | ||
1107 | |.endmacro | ||
1108 | | | ||
1109 | |.macro .ffunc_n, name // Caveat: has delay slot! | ||
1110 | |->ff_ .. name: | ||
1111 | | ld CARG1, 0(BASE) | ||
1112 | | beqz NARGS8:RC, ->fff_fallback | ||
1113 | | // Either ldc1 or the 1st instruction of checknum is in the delay slot. | ||
1114 | | .FPU ldc1 FARG1, 0(BASE) | ||
1115 | | checknum CARG1, ->fff_fallback | ||
1116 | |.endmacro | ||
1117 | | | ||
1118 | |.macro .ffunc_nn, name // Caveat: has delay slot! | ||
1119 | |->ff_ .. name: | ||
1120 | | ld CARG1, 0(BASE) | ||
1121 | | sltiu AT, NARGS8:RC, 16 | ||
1122 | | ld CARG2, 8(BASE) | ||
1123 | | bnez AT, ->fff_fallback | ||
1124 | |. gettp TMP0, CARG1 | ||
1125 | | gettp TMP1, CARG2 | ||
1126 | | sltiu TMP0, TMP0, LJ_TISNUM | ||
1127 | | sltiu TMP1, TMP1, LJ_TISNUM | ||
1128 | | .FPU ldc1 FARG1, 0(BASE) | ||
1129 | | and TMP0, TMP0, TMP1 | ||
1130 | | .FPU ldc1 FARG2, 8(BASE) | ||
1131 | | beqz TMP0, ->fff_fallback | ||
1132 | |.endmacro | ||
1133 | | | ||
1134 | |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1 and has delay slot! | ||
1135 | |// MIPSR6: no delay slot, but a forbidden slot. | ||
1136 | |.macro ffgccheck | ||
1137 | | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH) | ||
1138 | | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) | ||
1139 | | dsubu AT, TMP0, TMP1 | ||
1140 | |.if MIPSR6 | ||
1141 | | bgezalc AT, ->fff_gcstep | ||
1142 | |.else | ||
1143 | | bgezal AT, ->fff_gcstep | ||
1144 | |.endif | ||
1145 | |.endmacro | ||
1146 | | | ||
1147 | |//-- Base library: checks ----------------------------------------------- | ||
1148 | |.ffunc_1 assert | ||
1149 | | gettp AT, CARG1 | ||
1150 | | sltiu AT, AT, LJ_TISTRUECOND | ||
1151 | | beqz AT, ->fff_fallback | ||
1152 | |. daddiu RA, BASE, -16 | ||
1153 | | ld PC, FRAME_PC(BASE) | ||
1154 | | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8. | ||
1155 | | daddu TMP2, RA, RD | ||
1156 | | daddiu TMP1, BASE, 8 | ||
1157 | | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument. | ||
1158 | |. sd CARG1, 0(RA) | ||
1159 | |1: | ||
1160 | | ld CRET1, 0(TMP1) | ||
1161 | | sd CRET1, -16(TMP1) | ||
1162 | | bne TMP1, TMP2, <1 | ||
1163 | |. daddiu TMP1, TMP1, 8 | ||
1164 | | b ->fff_res | ||
1165 | |. nop | ||
1166 | | | ||
1167 | |.ffunc_1 type | ||
1168 | | gettp TMP0, CARG1 | ||
1169 | | sltu TMP1, TISNUM, TMP0 | ||
1170 | | not TMP2, TMP0 | ||
1171 | | li TMP3, ~LJ_TISNUM | ||
1172 | |.if MIPSR6 | ||
1173 | | selnez TMP2, TMP2, TMP1 | ||
1174 | | seleqz TMP3, TMP3, TMP1 | ||
1175 | | or TMP2, TMP2, TMP3 | ||
1176 | |.else | ||
1177 | | movz TMP2, TMP3, TMP1 | ||
1178 | |.endif | ||
1179 | | dsll TMP2, TMP2, 3 | ||
1180 | | daddu TMP2, CFUNC:RB, TMP2 | ||
1181 | | b ->fff_restv | ||
1182 | |. ld CARG1, CFUNC:TMP2->upvalue | ||
1183 | | | ||
1184 | |//-- Base library: getters and setters --------------------------------- | ||
1185 | | | ||
1186 | |.ffunc_1 getmetatable | ||
1187 | | gettp TMP2, CARG1 | ||
1188 | | daddiu TMP0, TMP2, -LJ_TTAB | ||
1189 | | daddiu TMP1, TMP2, -LJ_TUDATA | ||
1190 | |.if MIPSR6 | ||
1191 | | selnez TMP0, TMP1, TMP0 | ||
1192 | |.else | ||
1193 | | movn TMP0, TMP1, TMP0 | ||
1194 | |.endif | ||
1195 | | bnez TMP0, >6 | ||
1196 | |. cleartp TAB:CARG1 | ||
1197 | |1: // Field metatable must be at same offset for GCtab and GCudata! | ||
1198 | | ld TAB:RB, TAB:CARG1->metatable | ||
1199 | |2: | ||
1200 | | ld STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) | ||
1201 | | beqz TAB:RB, ->fff_restv | ||
1202 | |. li CARG1, LJ_TNIL | ||
1203 | | lw TMP0, TAB:RB->hmask | ||
1204 | | lw TMP1, STR:RC->hash | ||
1205 | | ld NODE:TMP2, TAB:RB->node | ||
1206 | | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask | ||
1207 | | dsll TMP0, TMP1, 5 | ||
1208 | | dsll TMP1, TMP1, 3 | ||
1209 | | dsubu TMP1, TMP0, TMP1 | ||
1210 | | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) | ||
1211 | | li CARG4, LJ_TSTR | ||
1212 | | settp STR:RC, CARG4 // Tagged key to look for. | ||
1213 | |3: // Rearranged logic, because we expect _not_ to find the key. | ||
1214 | | ld TMP0, NODE:TMP2->key | ||
1215 | | ld CARG1, NODE:TMP2->val | ||
1216 | | ld NODE:TMP2, NODE:TMP2->next | ||
1217 | | beq RC, TMP0, >5 | ||
1218 | |. li AT, LJ_TTAB | ||
1219 | | bnez NODE:TMP2, <3 | ||
1220 | |. nop | ||
1221 | |4: | ||
1222 | | move CARG1, RB | ||
1223 | | b ->fff_restv // Not found, keep default result. | ||
1224 | |. settp CARG1, AT | ||
1225 | |5: | ||
1226 | | bne CARG1, TISNIL, ->fff_restv | ||
1227 | |. nop | ||
1228 | | b <4 // Ditto for nil value. | ||
1229 | |. nop | ||
1230 | | | ||
1231 | |6: | ||
1232 | | sltiu AT, TMP2, LJ_TISNUM | ||
1233 | |.if MIPSR6 | ||
1234 | | selnez TMP0, TISNUM, AT | ||
1235 | | seleqz AT, TMP2, AT | ||
1236 | | or TMP2, TMP0, AT | ||
1237 | |.else | ||
1238 | | movn TMP2, TISNUM, AT | ||
1239 | |.endif | ||
1240 | | dsll TMP2, TMP2, 3 | ||
1241 | | dsubu TMP0, DISPATCH, TMP2 | ||
1242 | | b <2 | ||
1243 | |. ld TAB:RB, DISPATCH_GL(gcroot[GCROOT_BASEMT])-8(TMP0) | ||
1244 | | | ||
1245 | |.ffunc_2 setmetatable | ||
1246 | | // Fast path: no mt for table yet and not clearing the mt. | ||
1247 | | checktp TMP1, CARG1, -LJ_TTAB, ->fff_fallback | ||
1248 | | gettp TMP3, CARG2 | ||
1249 | | ld TAB:TMP0, TAB:TMP1->metatable | ||
1250 | | lbu TMP2, TAB:TMP1->marked | ||
1251 | | daddiu AT, TMP3, -LJ_TTAB | ||
1252 | | cleartp TAB:CARG2 | ||
1253 | | or AT, AT, TAB:TMP0 | ||
1254 | | bnez AT, ->fff_fallback | ||
1255 | |. andi AT, TMP2, LJ_GC_BLACK // isblack(table) | ||
1256 | | beqz AT, ->fff_restv | ||
1257 | |. sd TAB:CARG2, TAB:TMP1->metatable | ||
1258 | | barrierback TAB:TMP1, TMP2, TMP0, ->fff_restv | ||
1259 | | | ||
1260 | |.ffunc rawget | ||
1261 | | ld CARG2, 0(BASE) | ||
1262 | | sltiu AT, NARGS8:RC, 16 | ||
1263 | | load_got lj_tab_get | ||
1264 | | gettp TMP0, CARG2 | ||
1265 | | cleartp CARG2 | ||
1266 | | daddiu TMP0, TMP0, -LJ_TTAB | ||
1267 | | or AT, AT, TMP0 | ||
1268 | | bnez AT, ->fff_fallback | ||
1269 | |. daddiu CARG3, BASE, 8 | ||
1270 | | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) | ||
1271 | |. move CARG1, L | ||
1272 | | b ->fff_restv | ||
1273 | |. ld CARG1, 0(CRET1) | ||
1274 | | | ||
1275 | |//-- Base library: conversions ------------------------------------------ | ||
1276 | | | ||
1277 | |.ffunc tonumber | ||
1278 | | // Only handles the number case inline (without a base argument). | ||
1279 | | ld CARG1, 0(BASE) | ||
1280 | | xori AT, NARGS8:RC, 8 // Exactly one number argument. | ||
1281 | | gettp TMP1, CARG1 | ||
1282 | | sltu TMP0, TISNUM, TMP1 | ||
1283 | | or AT, AT, TMP0 | ||
1284 | | bnez AT, ->fff_fallback | ||
1285 | |. nop | ||
1286 | | b ->fff_restv | ||
1287 | |. nop | ||
1288 | | | ||
1289 | |.ffunc_1 tostring | ||
1290 | | // Only handles the string or number case inline. | ||
1291 | | gettp TMP0, CARG1 | ||
1292 | | daddiu AT, TMP0, -LJ_TSTR | ||
1293 | | // A __tostring method in the string base metatable is ignored. | ||
1294 | | beqz AT, ->fff_restv // String key? | ||
1295 | | // Handle numbers inline, unless a number base metatable is present. | ||
1296 | |. ld TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) | ||
1297 | | sltu TMP0, TISNUM, TMP0 | ||
1298 | | or TMP0, TMP0, TMP1 | ||
1299 | | bnez TMP0, ->fff_fallback | ||
1300 | |. sd BASE, L->base // Add frame since C call can throw. | ||
1301 | |.if MIPSR6 | ||
1302 | | sd PC, SAVE_PC // Redundant (but a defined value). | ||
1303 | | ffgccheck | ||
1304 | |.else | ||
1305 | | ffgccheck | ||
1306 | |. sd PC, SAVE_PC // Redundant (but a defined value). | ||
1307 | |.endif | ||
1308 | | load_got lj_strfmt_number | ||
1309 | | move CARG1, L | ||
1310 | | call_intern lj_strfmt_number // (lua_State *L, cTValue *o) | ||
1311 | |. move CARG2, BASE | ||
1312 | | // Returns GCstr *. | ||
1313 | | li AT, LJ_TSTR | ||
1314 | | settp CRET1, AT | ||
1315 | | b ->fff_restv | ||
1316 | |. move CARG1, CRET1 | ||
1317 | | | ||
1318 | |//-- Base library: iterators ------------------------------------------- | ||
1319 | | | ||
1320 | |.ffunc_1 next | ||
1321 | | checktp CARG2, CARG1, -LJ_TTAB, ->fff_fallback | ||
1322 | | daddu TMP2, BASE, NARGS8:RC | ||
1323 | | sd TISNIL, 0(TMP2) // Set missing 2nd arg to nil. | ||
1324 | | ld PC, FRAME_PC(BASE) | ||
1325 | | load_got lj_tab_next | ||
1326 | | sd BASE, L->base // Add frame since C call can throw. | ||
1327 | | sd BASE, L->top // Dummy frame length is ok. | ||
1328 | | daddiu CARG3, BASE, 8 | ||
1329 | | sd PC, SAVE_PC | ||
1330 | | call_intern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) | ||
1331 | |. move CARG1, L | ||
1332 | | // Returns 0 at end of traversal. | ||
1333 | | beqz CRET1, ->fff_restv // End of traversal: return nil. | ||
1334 | |. move CARG1, TISNIL | ||
1335 | | ld TMP0, 8(BASE) | ||
1336 | | daddiu RA, BASE, -16 | ||
1337 | | ld TMP2, 16(BASE) | ||
1338 | | sd TMP0, 0(RA) | ||
1339 | | sd TMP2, 8(RA) | ||
1340 | | b ->fff_res | ||
1341 | |. li RD, (2+1)*8 | ||
1342 | | | ||
1343 | |.ffunc_1 pairs | ||
1344 | | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback | ||
1345 | | ld PC, FRAME_PC(BASE) | ||
1346 | #if LJ_52 | ||
1347 | | ld TAB:TMP2, TAB:TMP1->metatable | ||
1348 | | ld TMP0, CFUNC:RB->upvalue[0] | ||
1349 | | bnez TAB:TMP2, ->fff_fallback | ||
1350 | #else | ||
1351 | | ld TMP0, CFUNC:RB->upvalue[0] | ||
1352 | #endif | ||
1353 | |. daddiu RA, BASE, -16 | ||
1354 | | sd TISNIL, 0(BASE) | ||
1355 | | sd CARG1, -8(BASE) | ||
1356 | | sd TMP0, 0(RA) | ||
1357 | | b ->fff_res | ||
1358 | |. li RD, (3+1)*8 | ||
1359 | | | ||
1360 | |.ffunc_2 ipairs_aux | ||
1361 | | checktab CARG1, ->fff_fallback | ||
1362 | | checkint CARG2, ->fff_fallback | ||
1363 | |. lw TMP0, TAB:CARG1->asize | ||
1364 | | ld TMP1, TAB:CARG1->array | ||
1365 | | ld PC, FRAME_PC(BASE) | ||
1366 | | sextw TMP2, CARG2 | ||
1367 | | addiu TMP2, TMP2, 1 | ||
1368 | | sltu AT, TMP2, TMP0 | ||
1369 | | daddiu RA, BASE, -16 | ||
1370 | | zextw TMP0, TMP2 | ||
1371 | | settp TMP0, TISNUM | ||
1372 | | beqz AT, >2 // Not in array part? | ||
1373 | |. sd TMP0, 0(RA) | ||
1374 | | dsll TMP3, TMP2, 3 | ||
1375 | | daddu TMP3, TMP1, TMP3 | ||
1376 | | ld TMP1, 0(TMP3) | ||
1377 | |1: | ||
1378 | | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results. | ||
1379 | |. li RD, (0+1)*8 | ||
1380 | | sd TMP1, -8(BASE) | ||
1381 | | b ->fff_res | ||
1382 | |. li RD, (2+1)*8 | ||
1383 | |2: // Check for empty hash part first. Otherwise call C function. | ||
1384 | | lw TMP0, TAB:CARG1->hmask | ||
1385 | | load_got lj_tab_getinth | ||
1386 | | beqz TMP0, ->fff_res | ||
1387 | |. li RD, (0+1)*8 | ||
1388 | | call_intern lj_tab_getinth // (GCtab *t, int32_t key) | ||
1389 | |. move CARG2, TMP2 | ||
1390 | | // Returns cTValue * or NULL. | ||
1391 | | beqz CRET1, ->fff_res | ||
1392 | |. li RD, (0+1)*8 | ||
1393 | | b <1 | ||
1394 | |. ld TMP1, 0(CRET1) | ||
1395 | | | ||
1396 | |.ffunc_1 ipairs | ||
1397 | | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback | ||
1398 | | ld PC, FRAME_PC(BASE) | ||
1399 | #if LJ_52 | ||
1400 | | ld TAB:TMP2, TAB:TMP1->metatable | ||
1401 | | ld CFUNC:TMP0, CFUNC:RB->upvalue[0] | ||
1402 | | bnez TAB:TMP2, ->fff_fallback | ||
1403 | #else | ||
1404 | | ld TMP0, CFUNC:RB->upvalue[0] | ||
1405 | #endif | ||
1406 | | daddiu RA, BASE, -16 | ||
1407 | | dsll AT, TISNUM, 47 | ||
1408 | | sd CARG1, -8(BASE) | ||
1409 | | sd AT, 0(BASE) | ||
1410 | | sd CFUNC:TMP0, 0(RA) | ||
1411 | | b ->fff_res | ||
1412 | |. li RD, (3+1)*8 | ||
1413 | | | ||
1414 | |//-- Base library: catch errors ---------------------------------------- | ||
1415 | | | ||
1416 | |.ffunc pcall | ||
1417 | | daddiu NARGS8:RC, NARGS8:RC, -8 | ||
1418 | | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) | ||
1419 | | bltz NARGS8:RC, ->fff_fallback | ||
1420 | |. move TMP2, BASE | ||
1421 | | daddiu BASE, BASE, 16 | ||
1422 | | // Remember active hook before pcall. | ||
1423 | | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT | ||
1424 | | andi TMP3, TMP3, 1 | ||
1425 | | daddiu PC, TMP3, 16+FRAME_PCALL | ||
1426 | | beqz NARGS8:RC, ->vm_call_dispatch | ||
1427 | |1: | ||
1428 | |. daddu TMP0, BASE, NARGS8:RC | ||
1429 | |2: | ||
1430 | | ld TMP1, -16(TMP0) | ||
1431 | | sd TMP1, -8(TMP0) | ||
1432 | | daddiu TMP0, TMP0, -8 | ||
1433 | | bne TMP0, BASE, <2 | ||
1434 | |. nop | ||
1435 | | b ->vm_call_dispatch | ||
1436 | |. nop | ||
1437 | | | ||
1438 | |.ffunc xpcall | ||
1439 | | daddiu NARGS8:TMP0, NARGS8:RC, -16 | ||
1440 | | ld CARG1, 0(BASE) | ||
1441 | | ld CARG2, 8(BASE) | ||
1442 | | bltz NARGS8:TMP0, ->fff_fallback | ||
1443 | |. lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) | ||
1444 | | gettp AT, CARG2 | ||
1445 | | daddiu AT, AT, -LJ_TFUNC | ||
1446 | | bnez AT, ->fff_fallback // Traceback must be a function. | ||
1447 | |. move TMP2, BASE | ||
1448 | | move NARGS8:RC, NARGS8:TMP0 | ||
1449 | | daddiu BASE, BASE, 24 | ||
1450 | | // Remember active hook before pcall. | ||
1451 | | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT | ||
1452 | | sd CARG2, 0(TMP2) // Swap function and traceback. | ||
1453 | | andi TMP3, TMP3, 1 | ||
1454 | | sd CARG1, 8(TMP2) | ||
1455 | | beqz NARGS8:RC, ->vm_call_dispatch | ||
1456 | |. daddiu PC, TMP3, 24+FRAME_PCALL | ||
1457 | | b <1 | ||
1458 | |. nop | ||
1459 | | | ||
1460 | |//-- Coroutine library -------------------------------------------------- | ||
1461 | | | ||
1462 | |.macro coroutine_resume_wrap, resume | ||
1463 | |.if resume | ||
1464 | |.ffunc_1 coroutine_resume | ||
1465 | | checktp CARG1, CARG1, -LJ_TTHREAD, ->fff_fallback | ||
1466 | |.else | ||
1467 | |.ffunc coroutine_wrap_aux | ||
1468 | | ld L:CARG1, CFUNC:RB->upvalue[0].gcr | ||
1469 | | cleartp L:CARG1 | ||
1470 | |.endif | ||
1471 | | lbu TMP0, L:CARG1->status | ||
1472 | | ld TMP1, L:CARG1->cframe | ||
1473 | | ld CARG2, L:CARG1->top | ||
1474 | | ld TMP2, L:CARG1->base | ||
1475 | | addiu AT, TMP0, -LUA_YIELD | ||
1476 | | daddu CARG3, CARG2, TMP0 | ||
1477 | | daddiu TMP3, CARG2, 8 | ||
1478 | |.if MIPSR6 | ||
1479 | | seleqz CARG2, CARG2, AT | ||
1480 | | selnez TMP3, TMP3, AT | ||
1481 | | bgtz AT, ->fff_fallback // st > LUA_YIELD? | ||
1482 | |. or CARG2, TMP3, CARG2 | ||
1483 | |.else | ||
1484 | | bgtz AT, ->fff_fallback // st > LUA_YIELD? | ||
1485 | |. movn CARG2, TMP3, AT | ||
1486 | |.endif | ||
1487 | | xor TMP2, TMP2, CARG3 | ||
1488 | | bnez TMP1, ->fff_fallback // cframe != 0? | ||
1489 | |. or AT, TMP2, TMP0 | ||
1490 | | ld TMP0, L:CARG1->maxstack | ||
1491 | | beqz AT, ->fff_fallback // base == top && st == 0? | ||
1492 | |. ld PC, FRAME_PC(BASE) | ||
1493 | | daddu TMP2, CARG2, NARGS8:RC | ||
1494 | | sltu AT, TMP0, TMP2 | ||
1495 | | bnez AT, ->fff_fallback // Stack overflow? | ||
1496 | |. sd PC, SAVE_PC | ||
1497 | | sd BASE, L->base | ||
1498 | |1: | ||
1499 | |.if resume | ||
1500 | | daddiu BASE, BASE, 8 // Keep resumed thread in stack for GC. | ||
1501 | | daddiu NARGS8:RC, NARGS8:RC, -8 | ||
1502 | | daddiu TMP2, TMP2, -8 | ||
1503 | |.endif | ||
1504 | | sd TMP2, L:CARG1->top | ||
1505 | | daddu TMP1, BASE, NARGS8:RC | ||
1506 | | move CARG3, CARG2 | ||
1507 | | sd BASE, L->top | ||
1508 | |2: // Move args to coroutine. | ||
1509 | | ld CRET1, 0(BASE) | ||
1510 | | sltu AT, BASE, TMP1 | ||
1511 | | beqz AT, >3 | ||
1512 | |. daddiu BASE, BASE, 8 | ||
1513 | | sd CRET1, 0(CARG3) | ||
1514 | | b <2 | ||
1515 | |. daddiu CARG3, CARG3, 8 | ||
1516 | |3: | ||
1517 | | bal ->vm_resume // (lua_State *L, TValue *base, 0, 0) | ||
1518 | |. move L:RA, L:CARG1 | ||
1519 | | // Returns thread status. | ||
1520 | |4: | ||
1521 | | ld TMP2, L:RA->base | ||
1522 | | sltiu AT, CRET1, LUA_YIELD+1 | ||
1523 | | ld TMP3, L:RA->top | ||
1524 | | li_vmstate INTERP | ||
1525 | | ld BASE, L->base | ||
1526 | | sd L, DISPATCH_GL(cur_L)(DISPATCH) | ||
1527 | | st_vmstate | ||
1528 | | beqz AT, >8 | ||
1529 | |. dsubu RD, TMP3, TMP2 | ||
1530 | | ld TMP0, L->maxstack | ||
1531 | | beqz RD, >6 // No results? | ||
1532 | |. daddu TMP1, BASE, RD | ||
1533 | | sltu AT, TMP0, TMP1 | ||
1534 | | bnez AT, >9 // Need to grow stack? | ||
1535 | |. daddu TMP3, TMP2, RD | ||
1536 | | sd TMP2, L:RA->top // Clear coroutine stack. | ||
1537 | | move TMP1, BASE | ||
1538 | |5: // Move results from coroutine. | ||
1539 | | ld CRET1, 0(TMP2) | ||
1540 | | daddiu TMP2, TMP2, 8 | ||
1541 | | sltu AT, TMP2, TMP3 | ||
1542 | | sd CRET1, 0(TMP1) | ||
1543 | | bnez AT, <5 | ||
1544 | |. daddiu TMP1, TMP1, 8 | ||
1545 | |6: | ||
1546 | | andi TMP0, PC, FRAME_TYPE | ||
1547 | |.if resume | ||
1548 | | mov_true TMP1 | ||
1549 | | daddiu RA, BASE, -8 | ||
1550 | | sd TMP1, -8(BASE) // Prepend true to results. | ||
1551 | | daddiu RD, RD, 16 | ||
1552 | |.else | ||
1553 | | move RA, BASE | ||
1554 | | daddiu RD, RD, 8 | ||
1555 | |.endif | ||
1556 | |7: | ||
1557 | | sd PC, SAVE_PC | ||
1558 | | beqz TMP0, ->BC_RET_Z | ||
1559 | |. move MULTRES, RD | ||
1560 | | b ->vm_return | ||
1561 | |. nop | ||
1562 | | | ||
1563 | |8: // Coroutine returned with error (at co->top-1). | ||
1564 | |.if resume | ||
1565 | | daddiu TMP3, TMP3, -8 | ||
1566 | | mov_false TMP1 | ||
1567 | | ld CRET1, 0(TMP3) | ||
1568 | | sd TMP3, L:RA->top // Remove error from coroutine stack. | ||
1569 | | li RD, (2+1)*8 | ||
1570 | | sd TMP1, -8(BASE) // Prepend false to results. | ||
1571 | | daddiu RA, BASE, -8 | ||
1572 | | sd CRET1, 0(BASE) // Copy error message. | ||
1573 | | b <7 | ||
1574 | |. andi TMP0, PC, FRAME_TYPE | ||
1575 | |.else | ||
1576 | | load_got lj_ffh_coroutine_wrap_err | ||
1577 | | move CARG2, L:RA | ||
1578 | | call_intern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) | ||
1579 | |. move CARG1, L | ||
1580 | |.endif | ||
1581 | | | ||
1582 | |9: // Handle stack expansion on return from yield. | ||
1583 | | load_got lj_state_growstack | ||
1584 | | srl CARG2, RD, 3 | ||
1585 | | call_intern lj_state_growstack // (lua_State *L, int n) | ||
1586 | |. move CARG1, L | ||
1587 | | b <4 | ||
1588 | |. li CRET1, 0 | ||
1589 | |.endmacro | ||
1590 | | | ||
1591 | | coroutine_resume_wrap 1 // coroutine.resume | ||
1592 | | coroutine_resume_wrap 0 // coroutine.wrap | ||
1593 | | | ||
1594 | |.ffunc coroutine_yield | ||
1595 | | ld TMP0, L->cframe | ||
1596 | | daddu TMP1, BASE, NARGS8:RC | ||
1597 | | sd BASE, L->base | ||
1598 | | andi TMP0, TMP0, CFRAME_RESUME | ||
1599 | | sd TMP1, L->top | ||
1600 | | beqz TMP0, ->fff_fallback | ||
1601 | |. li CRET1, LUA_YIELD | ||
1602 | | sd r0, L->cframe | ||
1603 | | b ->vm_leave_unw | ||
1604 | |. sb CRET1, L->status | ||
1605 | | | ||
1606 | |//-- Math library ------------------------------------------------------- | ||
1607 | | | ||
1608 | |.ffunc_1 math_abs | ||
1609 | | gettp CARG2, CARG1 | ||
1610 | | daddiu AT, CARG2, -LJ_TISNUM | ||
1611 | | bnez AT, >1 | ||
1612 | |. sextw TMP1, CARG1 | ||
1613 | | sra TMP0, TMP1, 31 // Extract sign. | ||
1614 | | xor TMP1, TMP1, TMP0 | ||
1615 | | dsubu CARG1, TMP1, TMP0 | ||
1616 | | dsll TMP3, CARG1, 32 | ||
1617 | | bgez TMP3, ->fff_restv | ||
1618 | |. settp CARG1, TISNUM | ||
1619 | | li CARG1, 0x41e0 // 2^31 as a double. | ||
1620 | | b ->fff_restv | ||
1621 | |. dsll CARG1, CARG1, 48 | ||
1622 | |1: | ||
1623 | | sltiu AT, CARG2, LJ_TISNUM | ||
1624 | | beqz AT, ->fff_fallback | ||
1625 | |. dextm CARG1, CARG1, 0, 30 | ||
1626 | |// fallthrough | ||
1627 | | | ||
1628 | |->fff_restv: | ||
1629 | | // CARG1 = TValue result. | ||
1630 | | ld PC, FRAME_PC(BASE) | ||
1631 | | daddiu RA, BASE, -16 | ||
1632 | | sd CARG1, -16(BASE) | ||
1633 | |->fff_res1: | ||
1634 | | // RA = results, PC = return. | ||
1635 | | li RD, (1+1)*8 | ||
1636 | |->fff_res: | ||
1637 | | // RA = results, RD = (nresults+1)*8, PC = return. | ||
1638 | | andi TMP0, PC, FRAME_TYPE | ||
1639 | | bnez TMP0, ->vm_return | ||
1640 | |. move MULTRES, RD | ||
1641 | | lw INS, -4(PC) | ||
1642 | | decode_RB8a RB, INS | ||
1643 | | decode_RB8b RB | ||
1644 | |5: | ||
1645 | | sltu AT, RD, RB | ||
1646 | | bnez AT, >6 // More results expected? | ||
1647 | |. decode_RA8a TMP0, INS | ||
1648 | | decode_RA8b TMP0 | ||
1649 | | ins_next1 | ||
1650 | | // Adjust BASE. KBASE is assumed to be set for the calling frame. | ||
1651 | | dsubu BASE, RA, TMP0 | ||
1652 | | ins_next2 | ||
1653 | | | ||
1654 | |6: // Fill up results with nil. | ||
1655 | | daddu TMP1, RA, RD | ||
1656 | | daddiu RD, RD, 8 | ||
1657 | | b <5 | ||
1658 | |. sd TISNIL, -8(TMP1) | ||
1659 | | | ||
1660 | |.macro math_extern, func | ||
1661 | | .ffunc_n math_ .. func | ||
1662 | | load_got func | ||
1663 | | call_extern | ||
1664 | |. nop | ||
1665 | | b ->fff_resn | ||
1666 | |. nop | ||
1667 | |.endmacro | ||
1668 | | | ||
1669 | |.macro math_extern2, func | ||
1670 | | .ffunc_nn math_ .. func | ||
1671 | |. load_got func | ||
1672 | | call_extern | ||
1673 | |. nop | ||
1674 | | b ->fff_resn | ||
1675 | |. nop | ||
1676 | |.endmacro | ||
1677 | | | ||
1678 | |// TODO: Return integer type if result is integer (own sf implementation). | ||
1679 | |.macro math_round, func | ||
1680 | |->ff_math_ .. func: | ||
1681 | | ld CARG1, 0(BASE) | ||
1682 | | beqz NARGS8:RC, ->fff_fallback | ||
1683 | |. gettp TMP0, CARG1 | ||
1684 | | beq TMP0, TISNUM, ->fff_restv | ||
1685 | |. sltu AT, TMP0, TISNUM | ||
1686 | | beqz AT, ->fff_fallback | ||
1687 | |.if FPU | ||
1688 | |. ldc1 FARG1, 0(BASE) | ||
1689 | | bal ->vm_ .. func | ||
1690 | |. nop | ||
1691 | |.else | ||
1692 | |. load_got func | ||
1693 | | call_extern | ||
1694 | |. nop | ||
1695 | |.endif | ||
1696 | | b ->fff_resn | ||
1697 | |. nop | ||
1698 | |.endmacro | ||
1699 | | | ||
1700 | | math_round floor | ||
1701 | | math_round ceil | ||
1702 | | | ||
1703 | |.ffunc math_log | ||
1704 | | li AT, 8 | ||
1705 | | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. | ||
1706 | |. ld CARG1, 0(BASE) | ||
1707 | | checknum CARG1, ->fff_fallback | ||
1708 | |. load_got log | ||
1709 | |.if FPU | ||
1710 | | call_extern | ||
1711 | |. ldc1 FARG1, 0(BASE) | ||
1712 | |.else | ||
1713 | | call_extern | ||
1714 | |. nop | ||
1715 | |.endif | ||
1716 | | b ->fff_resn | ||
1717 | |. nop | ||
1718 | | | ||
1719 | | math_extern log10 | ||
1720 | | math_extern exp | ||
1721 | | math_extern sin | ||
1722 | | math_extern cos | ||
1723 | | math_extern tan | ||
1724 | | math_extern asin | ||
1725 | | math_extern acos | ||
1726 | | math_extern atan | ||
1727 | | math_extern sinh | ||
1728 | | math_extern cosh | ||
1729 | | math_extern tanh | ||
1730 | | math_extern2 pow | ||
1731 | | math_extern2 atan2 | ||
1732 | | math_extern2 fmod | ||
1733 | | | ||
1734 | |.if FPU | ||
1735 | |.ffunc_n math_sqrt | ||
1736 | |. sqrt.d FRET1, FARG1 | ||
1737 | |// fallthrough to ->fff_resn | ||
1738 | |.else | ||
1739 | | math_extern sqrt | ||
1740 | |.endif | ||
1741 | | | ||
1742 | |->fff_resn: | ||
1743 | | ld PC, FRAME_PC(BASE) | ||
1744 | | daddiu RA, BASE, -16 | ||
1745 | | b ->fff_res1 | ||
1746 | |.if FPU | ||
1747 | |. sdc1 FRET1, 0(RA) | ||
1748 | |.else | ||
1749 | |. sd CRET1, 0(RA) | ||
1750 | |.endif | ||
1751 | | | ||
1752 | | | ||
1753 | |.ffunc_2 math_ldexp | ||
1754 | | checknum CARG1, ->fff_fallback | ||
1755 | | checkint CARG2, ->fff_fallback | ||
1756 | |. load_got ldexp | ||
1757 | | .FPU ldc1 FARG1, 0(BASE) | ||
1758 | | call_extern | ||
1759 | |. lw CARG2, 8+LO(BASE) | ||
1760 | | b ->fff_resn | ||
1761 | |. nop | ||
1762 | | | ||
1763 | |.ffunc_n math_frexp | ||
1764 | | load_got frexp | ||
1765 | | ld PC, FRAME_PC(BASE) | ||
1766 | | call_extern | ||
1767 | |. daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv) | ||
1768 | | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH) | ||
1769 | | daddiu RA, BASE, -16 | ||
1770 | |.if FPU | ||
1771 | | mtc1 TMP1, FARG2 | ||
1772 | | sdc1 FRET1, 0(RA) | ||
1773 | | cvt.d.w FARG2, FARG2 | ||
1774 | | sdc1 FARG2, 8(RA) | ||
1775 | |.else | ||
1776 | | sd CRET1, 0(RA) | ||
1777 | | zextw TMP1, TMP1 | ||
1778 | | settp TMP1, TISNUM | ||
1779 | | sd TMP1, 8(RA) | ||
1780 | |.endif | ||
1781 | | b ->fff_res | ||
1782 | |. li RD, (2+1)*8 | ||
1783 | | | ||
1784 | |.ffunc_n math_modf | ||
1785 | | load_got modf | ||
1786 | | ld PC, FRAME_PC(BASE) | ||
1787 | | call_extern | ||
1788 | |. daddiu CARG2, BASE, -16 | ||
1789 | | daddiu RA, BASE, -16 | ||
1790 | |.if FPU | ||
1791 | | sdc1 FRET1, -8(BASE) | ||
1792 | |.else | ||
1793 | | sd CRET1, -8(BASE) | ||
1794 | |.endif | ||
1795 | | b ->fff_res | ||
1796 | |. li RD, (2+1)*8 | ||
1797 | | | ||
1798 | |.macro math_minmax, name, intins, intinsc, fpins | ||
1799 | | .ffunc_1 name | ||
1800 | | daddu TMP3, BASE, NARGS8:RC | ||
1801 | | checkint CARG1, >5 | ||
1802 | |. daddiu TMP2, BASE, 8 | ||
1803 | |1: // Handle integers. | ||
1804 | | beq TMP2, TMP3, ->fff_restv | ||
1805 | |. ld CARG2, 0(TMP2) | ||
1806 | | checkint CARG2, >3 | ||
1807 | |. sextw CARG1, CARG1 | ||
1808 | | lw CARG2, LO(TMP2) | ||
1809 | |. slt AT, CARG1, CARG2 | ||
1810 | |.if MIPSR6 | ||
1811 | | intins TMP1, CARG2, AT | ||
1812 | | intinsc CARG1, CARG1, AT | ||
1813 | | or CARG1, CARG1, TMP1 | ||
1814 | |.else | ||
1815 | | intins CARG1, CARG2, AT | ||
1816 | |.endif | ||
1817 | | daddiu TMP2, TMP2, 8 | ||
1818 | | zextw CARG1, CARG1 | ||
1819 | | b <1 | ||
1820 | |. settp CARG1, TISNUM | ||
1821 | | | ||
1822 | |3: // Convert intermediate result to number and continue with number loop. | ||
1823 | | checknum CARG2, ->fff_fallback | ||
1824 | |.if FPU | ||
1825 | |. mtc1 CARG1, FRET1 | ||
1826 | | cvt.d.w FRET1, FRET1 | ||
1827 | | b >7 | ||
1828 | |. ldc1 FARG1, 0(TMP2) | ||
1829 | |.else | ||
1830 | |. nop | ||
1831 | | bal ->vm_sfi2d_1 | ||
1832 | |. nop | ||
1833 | | b >7 | ||
1834 | |. nop | ||
1835 | |.endif | ||
1836 | | | ||
1837 | |5: | ||
1838 | | .FPU ldc1 FRET1, 0(BASE) | ||
1839 | | checknum CARG1, ->fff_fallback | ||
1840 | |6: // Handle numbers. | ||
1841 | |. ld CARG2, 0(TMP2) | ||
1842 | | beq TMP2, TMP3, ->fff_resn | ||
1843 | |.if FPU | ||
1844 | | ldc1 FARG1, 0(TMP2) | ||
1845 | |.else | ||
1846 | | move CRET1, CARG1 | ||
1847 | |.endif | ||
1848 | | checknum CARG2, >8 | ||
1849 | |. nop | ||
1850 | |7: | ||
1851 | |.if FPU | ||
1852 | |.if MIPSR6 | ||
1853 | | fpins FRET1, FRET1, FARG1 | ||
1854 | |.else | ||
1855 | | c.olt.d FRET1, FARG1 | ||
1856 | | fpins FRET1, FARG1 | ||
1857 | |.endif | ||
1858 | |.else | ||
1859 | | bal ->vm_sfcmpolt | ||
1860 | |. nop | ||
1861 | |.if MIPSR6 | ||
1862 | | intins AT, CARG2, CRET1 | ||
1863 | | intinsc CARG1, CARG1, CRET1 | ||
1864 | | or CARG1, CARG1, AT | ||
1865 | |.else | ||
1866 | | intins CARG1, CARG2, CRET1 | ||
1867 | |.endif | ||
1868 | |.endif | ||
1869 | | b <6 | ||
1870 | |. daddiu TMP2, TMP2, 8 | ||
1871 | | | ||
1872 | |8: // Convert integer to number and continue with number loop. | ||
1873 | | checkint CARG2, ->fff_fallback | ||
1874 | |.if FPU | ||
1875 | |. lwc1 FARG1, LO(TMP2) | ||
1876 | | b <7 | ||
1877 | |. cvt.d.w FARG1, FARG1 | ||
1878 | |.else | ||
1879 | |. lw CARG2, LO(TMP2) | ||
1880 | | bal ->vm_sfi2d_2 | ||
1881 | |. nop | ||
1882 | | b <7 | ||
1883 | |. nop | ||
1884 | |.endif | ||
1885 | | | ||
1886 | |.endmacro | ||
1887 | | | ||
1888 | |.if MIPSR6 | ||
1889 | | math_minmax math_min, seleqz, selnez, min.d | ||
1890 | | math_minmax math_max, selnez, seleqz, max.d | ||
1891 | |.else | ||
1892 | | math_minmax math_min, movz, _, movf.d | ||
1893 | | math_minmax math_max, movn, _, movt.d | ||
1894 | |.endif | ||
1895 | | | ||
1896 | |//-- String library ----------------------------------------------------- | ||
1897 | | | ||
1898 | |.ffunc string_byte // Only handle the 1-arg case here. | ||
1899 | | ld CARG1, 0(BASE) | ||
1900 | | gettp TMP0, CARG1 | ||
1901 | | xori AT, NARGS8:RC, 8 | ||
1902 | | daddiu TMP0, TMP0, -LJ_TSTR | ||
1903 | | or AT, AT, TMP0 | ||
1904 | | bnez AT, ->fff_fallback // Need exactly 1 string argument. | ||
1905 | |. cleartp STR:CARG1 | ||
1906 | | lw TMP0, STR:CARG1->len | ||
1907 | | daddiu RA, BASE, -16 | ||
1908 | | ld PC, FRAME_PC(BASE) | ||
1909 | | sltu RD, r0, TMP0 | ||
1910 | | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end). | ||
1911 | | addiu RD, RD, 1 | ||
1912 | | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8 | ||
1913 | | settp TMP1, TISNUM | ||
1914 | | b ->fff_res | ||
1915 | |. sd TMP1, 0(RA) | ||
1916 | | | ||
1917 | |.ffunc string_char // Only handle the 1-arg case here. | ||
1918 | | ffgccheck | ||
1919 | |.if not MIPSR6 | ||
1920 | |. nop | ||
1921 | |.endif | ||
1922 | | ld CARG1, 0(BASE) | ||
1923 | | gettp TMP0, CARG1 | ||
1924 | | xori AT, NARGS8:RC, 8 // Exactly 1 argument. | ||
1925 | | daddiu TMP0, TMP0, -LJ_TISNUM // Integer. | ||
1926 | | li TMP1, 255 | ||
1927 | | sextw CARG1, CARG1 | ||
1928 | | or AT, AT, TMP0 | ||
1929 | | sltu TMP1, TMP1, CARG1 // !(255 < n). | ||
1930 | | or AT, AT, TMP1 | ||
1931 | | bnez AT, ->fff_fallback | ||
1932 | |. li CARG3, 1 | ||
1933 | | daddiu CARG2, sp, TMPD_OFS | ||
1934 | | sb CARG1, TMPD | ||
1935 | |->fff_newstr: | ||
1936 | | load_got lj_str_new | ||
1937 | | sd BASE, L->base | ||
1938 | | sd PC, SAVE_PC | ||
1939 | | call_intern lj_str_new // (lua_State *L, char *str, size_t l) | ||
1940 | |. move CARG1, L | ||
1941 | | // Returns GCstr *. | ||
1942 | | ld BASE, L->base | ||
1943 | |->fff_resstr: | ||
1944 | | li AT, LJ_TSTR | ||
1945 | | settp CRET1, AT | ||
1946 | | b ->fff_restv | ||
1947 | |. move CARG1, CRET1 | ||
1948 | | | ||
1949 | |.ffunc string_sub | ||
1950 | | ffgccheck | ||
1951 | |.if not MIPSR6 | ||
1952 | |. nop | ||
1953 | |.endif | ||
1954 | | addiu AT, NARGS8:RC, -16 | ||
1955 | | ld TMP0, 0(BASE) | ||
1956 | | bltz AT, ->fff_fallback | ||
1957 | |. gettp TMP3, TMP0 | ||
1958 | | cleartp STR:CARG1, TMP0 | ||
1959 | | ld CARG2, 8(BASE) | ||
1960 | | beqz AT, >1 | ||
1961 | |. li CARG4, -1 | ||
1962 | | ld CARG3, 16(BASE) | ||
1963 | | checkint CARG3, ->fff_fallback | ||
1964 | |. sextw CARG4, CARG3 | ||
1965 | |1: | ||
1966 | | checkint CARG2, ->fff_fallback | ||
1967 | |. li AT, LJ_TSTR | ||
1968 | | bne TMP3, AT, ->fff_fallback | ||
1969 | |. sextw CARG3, CARG2 | ||
1970 | | lw CARG2, STR:CARG1->len | ||
1971 | | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end | ||
1972 | | slt AT, CARG4, r0 | ||
1973 | | addiu TMP0, CARG2, 1 | ||
1974 | | addu TMP1, CARG4, TMP0 | ||
1975 | | slt TMP3, CARG3, r0 | ||
1976 | |.if MIPSR6 | ||
1977 | | seleqz CARG4, CARG4, AT | ||
1978 | | selnez TMP1, TMP1, AT | ||
1979 | | or CARG4, TMP1, CARG4 // if (end < 0) end += len+1 | ||
1980 | |.else | ||
1981 | | movn CARG4, TMP1, AT // if (end < 0) end += len+1 | ||
1982 | |.endif | ||
1983 | | addu TMP1, CARG3, TMP0 | ||
1984 | |.if MIPSR6 | ||
1985 | | selnez TMP1, TMP1, TMP3 | ||
1986 | | seleqz CARG3, CARG3, TMP3 | ||
1987 | | or CARG3, TMP1, CARG3 // if (start < 0) start += len+1 | ||
1988 | | li TMP2, 1 | ||
1989 | | slt AT, CARG4, r0 | ||
1990 | | slt TMP3, r0, CARG3 | ||
1991 | | seleqz CARG4, CARG4, AT // if (end < 0) end = 0 | ||
1992 | | selnez CARG3, CARG3, TMP3 | ||
1993 | | seleqz TMP2, TMP2, TMP3 | ||
1994 | | or CARG3, TMP2, CARG3 // if (start < 1) start = 1 | ||
1995 | | slt AT, CARG2, CARG4 | ||
1996 | | seleqz CARG4, CARG4, AT | ||
1997 | | selnez CARG2, CARG2, AT | ||
1998 | | or CARG4, CARG2, CARG4 // if (end > len) end = len | ||
1999 | |.else | ||
2000 | | movn CARG3, TMP1, TMP3 // if (start < 0) start += len+1 | ||
2001 | | li TMP2, 1 | ||
2002 | | slt AT, CARG4, r0 | ||
2003 | | slt TMP3, r0, CARG3 | ||
2004 | | movn CARG4, r0, AT // if (end < 0) end = 0 | ||
2005 | | movz CARG3, TMP2, TMP3 // if (start < 1) start = 1 | ||
2006 | | slt AT, CARG2, CARG4 | ||
2007 | | movn CARG4, CARG2, AT // if (end > len) end = len | ||
2008 | |.endif | ||
2009 | | daddu CARG2, STR:CARG1, CARG3 | ||
2010 | | subu CARG3, CARG4, CARG3 // len = end - start | ||
2011 | | daddiu CARG2, CARG2, sizeof(GCstr)-1 | ||
2012 | | bgez CARG3, ->fff_newstr | ||
2013 | |. addiu CARG3, CARG3, 1 // len++ | ||
2014 | |->fff_emptystr: // Return empty string. | ||
2015 | | li AT, LJ_TSTR | ||
2016 | | daddiu STR:CARG1, DISPATCH, DISPATCH_GL(strempty) | ||
2017 | | b ->fff_restv | ||
2018 | |. settp CARG1, AT | ||
2019 | | | ||
2020 | |.macro ffstring_op, name | ||
2021 | | .ffunc string_ .. name | ||
2022 | | ffgccheck | ||
2023 | |. nop | ||
2024 | | beqz NARGS8:RC, ->fff_fallback | ||
2025 | |. ld CARG2, 0(BASE) | ||
2026 | | checkstr STR:CARG2, ->fff_fallback | ||
2027 | | daddiu SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf) | ||
2028 | | load_got lj_buf_putstr_ .. name | ||
2029 | | ld TMP0, SBUF:CARG1->b | ||
2030 | | sd L, SBUF:CARG1->L | ||
2031 | | sd BASE, L->base | ||
2032 | | sd TMP0, SBUF:CARG1->p | ||
2033 | | call_intern extern lj_buf_putstr_ .. name | ||
2034 | |. sd PC, SAVE_PC | ||
2035 | | load_got lj_buf_tostr | ||
2036 | | call_intern lj_buf_tostr | ||
2037 | |. move SBUF:CARG1, SBUF:CRET1 | ||
2038 | | b ->fff_resstr | ||
2039 | |. ld BASE, L->base | ||
2040 | |.endmacro | ||
2041 | | | ||
2042 | |ffstring_op reverse | ||
2043 | |ffstring_op lower | ||
2044 | |ffstring_op upper | ||
2045 | | | ||
2046 | |//-- Bit library -------------------------------------------------------- | ||
2047 | | | ||
2048 | |->vm_tobit_fb: | ||
2049 | | beqz TMP1, ->fff_fallback | ||
2050 | |.if FPU | ||
2051 | |. ldc1 FARG1, 0(BASE) | ||
2052 | | add.d FARG1, FARG1, TOBIT | ||
2053 | | mfc1 CRET1, FARG1 | ||
2054 | | jr ra | ||
2055 | |. zextw CRET1, CRET1 | ||
2056 | |.else | ||
2057 | |// FP number to bit conversion for soft-float. | ||
2058 | |->vm_tobit: | ||
2059 | | dsll TMP0, CARG1, 1 | ||
2060 | | li CARG3, 1076 | ||
2061 | | dsrl AT, TMP0, 53 | ||
2062 | | dsubu CARG3, CARG3, AT | ||
2063 | | sltiu AT, CARG3, 54 | ||
2064 | | beqz AT, >1 | ||
2065 | |. dextm TMP0, TMP0, 0, 20 | ||
2066 | | dinsu TMP0, AT, 21, 21 | ||
2067 | | slt AT, CARG1, r0 | ||
2068 | | dsrlv CRET1, TMP0, CARG3 | ||
2069 | | dsubu TMP0, r0, CRET1 | ||
2070 | |.if MIPSR6 | ||
2071 | | selnez TMP0, TMP0, AT | ||
2072 | | seleqz CRET1, CRET1, AT | ||
2073 | | or CRET1, CRET1, TMP0 | ||
2074 | |.else | ||
2075 | | movn CRET1, TMP0, AT | ||
2076 | |.endif | ||
2077 | | jr ra | ||
2078 | |. zextw CRET1, CRET1 | ||
2079 | |1: | ||
2080 | | jr ra | ||
2081 | |. move CRET1, r0 | ||
2082 | | | ||
2083 | |// FP number to int conversion with a check for soft-float. | ||
2084 | |// Modifies CARG1, CRET1, CRET2, TMP0, AT. | ||
2085 | |->vm_tointg: | ||
2086 | |.if JIT | ||
2087 | | dsll CRET2, CARG1, 1 | ||
2088 | | beqz CRET2, >2 | ||
2089 | |. li TMP0, 1076 | ||
2090 | | dsrl AT, CRET2, 53 | ||
2091 | | dsubu TMP0, TMP0, AT | ||
2092 | | sltiu AT, TMP0, 54 | ||
2093 | | beqz AT, >1 | ||
2094 | |. dextm CRET2, CRET2, 0, 20 | ||
2095 | | dinsu CRET2, AT, 21, 21 | ||
2096 | | slt AT, CARG1, r0 | ||
2097 | | dsrlv CRET1, CRET2, TMP0 | ||
2098 | | dsubu CARG1, r0, CRET1 | ||
2099 | |.if MIPSR6 | ||
2100 | | seleqz CRET1, CRET1, AT | ||
2101 | | selnez CARG1, CARG1, AT | ||
2102 | | or CRET1, CRET1, CARG1 | ||
2103 | |.else | ||
2104 | | movn CRET1, CARG1, AT | ||
2105 | |.endif | ||
2106 | | li CARG1, 64 | ||
2107 | | subu TMP0, CARG1, TMP0 | ||
2108 | | dsllv CRET2, CRET2, TMP0 // Integer check. | ||
2109 | | sextw AT, CRET1 | ||
2110 | | xor AT, CRET1, AT // Range check. | ||
2111 | | jr ra | ||
2112 | |.if MIPSR6 | ||
2113 | | seleqz AT, AT, CRET2 | ||
2114 | | selnez CRET2, CRET2, CRET2 | ||
2115 | | jr ra | ||
2116 | |. or CRET2, AT, CRET2 | ||
2117 | |.else | ||
2118 | | jr ra | ||
2119 | |. movz CRET2, AT, CRET2 | ||
2120 | |.endif | ||
2121 | |1: | ||
2122 | | jr ra | ||
2123 | |. li CRET2, 1 | ||
2124 | |2: | ||
2125 | | jr ra | ||
2126 | |. move CRET1, r0 | ||
2127 | |.endif | ||
2128 | |.endif | ||
2129 | | | ||
2130 | |.macro .ffunc_bit, name | ||
2131 | | .ffunc_1 bit_..name | ||
2132 | | gettp TMP0, CARG1 | ||
2133 | | beq TMP0, TISNUM, >6 | ||
2134 | |. zextw CRET1, CARG1 | ||
2135 | | bal ->vm_tobit_fb | ||
2136 | |. sltiu TMP1, TMP0, LJ_TISNUM | ||
2137 | |6: | ||
2138 | |.endmacro | ||
2139 | | | ||
2140 | |.macro .ffunc_bit_op, name, bins | ||
2141 | | .ffunc_bit name | ||
2142 | | daddiu TMP2, BASE, 8 | ||
2143 | | daddu TMP3, BASE, NARGS8:RC | ||
2144 | |1: | ||
2145 | | beq TMP2, TMP3, ->fff_resi | ||
2146 | |. ld CARG1, 0(TMP2) | ||
2147 | | gettp TMP0, CARG1 | ||
2148 | |.if FPU | ||
2149 | | bne TMP0, TISNUM, >2 | ||
2150 | |. daddiu TMP2, TMP2, 8 | ||
2151 | | zextw CARG1, CARG1 | ||
2152 | | b <1 | ||
2153 | |. bins CRET1, CRET1, CARG1 | ||
2154 | |2: | ||
2155 | | ldc1 FARG1, -8(TMP2) | ||
2156 | | sltiu AT, TMP0, LJ_TISNUM | ||
2157 | | beqz AT, ->fff_fallback | ||
2158 | |. add.d FARG1, FARG1, TOBIT | ||
2159 | | mfc1 CARG1, FARG1 | ||
2160 | | zextw CARG1, CARG1 | ||
2161 | | b <1 | ||
2162 | |. bins CRET1, CRET1, CARG1 | ||
2163 | |.else | ||
2164 | | beq TMP0, TISNUM, >2 | ||
2165 | |. move CRET2, CRET1 | ||
2166 | | bal ->vm_tobit_fb | ||
2167 | |. sltiu TMP1, TMP0, LJ_TISNUM | ||
2168 | | move CARG1, CRET2 | ||
2169 | |2: | ||
2170 | | zextw CARG1, CARG1 | ||
2171 | | bins CRET1, CRET1, CARG1 | ||
2172 | | b <1 | ||
2173 | |. daddiu TMP2, TMP2, 8 | ||
2174 | |.endif | ||
2175 | |.endmacro | ||
2176 | | | ||
2177 | |.ffunc_bit_op band, and | ||
2178 | |.ffunc_bit_op bor, or | ||
2179 | |.ffunc_bit_op bxor, xor | ||
2180 | | | ||
2181 | |.ffunc_bit bswap | ||
2182 | | dsrl TMP0, CRET1, 8 | ||
2183 | | dsrl TMP1, CRET1, 24 | ||
2184 | | andi TMP2, TMP0, 0xff00 | ||
2185 | | dins TMP1, CRET1, 24, 31 | ||
2186 | | dins TMP2, TMP0, 16, 23 | ||
2187 | | b ->fff_resi | ||
2188 | |. or CRET1, TMP1, TMP2 | ||
2189 | | | ||
2190 | |.ffunc_bit bnot | ||
2191 | | not CRET1, CRET1 | ||
2192 | | b ->fff_resi | ||
2193 | |. zextw CRET1, CRET1 | ||
2194 | | | ||
2195 | |.macro .ffunc_bit_sh, name, shins, shmod | ||
2196 | | .ffunc_2 bit_..name | ||
2197 | | gettp TMP0, CARG1 | ||
2198 | | beq TMP0, TISNUM, >1 | ||
2199 | |. nop | ||
2200 | | bal ->vm_tobit_fb | ||
2201 | |. sltiu TMP1, TMP0, LJ_TISNUM | ||
2202 | | move CARG1, CRET1 | ||
2203 | |1: | ||
2204 | | gettp TMP0, CARG2 | ||
2205 | | bne TMP0, TISNUM, ->fff_fallback | ||
2206 | |. zextw CARG2, CARG2 | ||
2207 | | sextw CARG1, CARG1 | ||
2208 | |.if shmod == 1 | ||
2209 | | negu CARG2, CARG2 | ||
2210 | |.endif | ||
2211 | | shins CRET1, CARG1, CARG2 | ||
2212 | | b ->fff_resi | ||
2213 | |. zextw CRET1, CRET1 | ||
2214 | |.endmacro | ||
2215 | | | ||
2216 | |.ffunc_bit_sh lshift, sllv, 0 | ||
2217 | |.ffunc_bit_sh rshift, srlv, 0 | ||
2218 | |.ffunc_bit_sh arshift, srav, 0 | ||
2219 | |.ffunc_bit_sh rol, rotrv, 1 | ||
2220 | |.ffunc_bit_sh ror, rotrv, 0 | ||
2221 | | | ||
2222 | |.ffunc_bit tobit | ||
2223 | |->fff_resi: | ||
2224 | | ld PC, FRAME_PC(BASE) | ||
2225 | | daddiu RA, BASE, -16 | ||
2226 | | settp CRET1, TISNUM | ||
2227 | | b ->fff_res1 | ||
2228 | |. sd CRET1, -16(BASE) | ||
2229 | | | ||
2230 | |//----------------------------------------------------------------------- | ||
2231 | |->fff_fallback: // Call fast function fallback handler. | ||
2232 | | // BASE = new base, RB = CFUNC, RC = nargs*8 | ||
2233 | | ld TMP3, CFUNC:RB->f | ||
2234 | | daddu TMP1, BASE, NARGS8:RC | ||
2235 | | ld PC, FRAME_PC(BASE) // Fallback may overwrite PC. | ||
2236 | | daddiu TMP0, TMP1, 8*LUA_MINSTACK | ||
2237 | | ld TMP2, L->maxstack | ||
2238 | | sd PC, SAVE_PC // Redundant (but a defined value). | ||
2239 | | sltu AT, TMP2, TMP0 | ||
2240 | | sd BASE, L->base | ||
2241 | | sd TMP1, L->top | ||
2242 | | bnez AT, >5 // Need to grow stack. | ||
2243 | |. move CFUNCADDR, TMP3 | ||
2244 | | jalr TMP3 // (lua_State *L) | ||
2245 | |. move CARG1, L | ||
2246 | | // Either throws an error, or recovers and returns -1, 0 or nresults+1. | ||
2247 | | ld BASE, L->base | ||
2248 | | sll RD, CRET1, 3 | ||
2249 | | bgtz CRET1, ->fff_res // Returned nresults+1? | ||
2250 | |. daddiu RA, BASE, -16 | ||
2251 | |1: // Returned 0 or -1: retry fast path. | ||
2252 | | ld LFUNC:RB, FRAME_FUNC(BASE) | ||
2253 | | ld TMP0, L->top | ||
2254 | | cleartp LFUNC:RB | ||
2255 | | bnez CRET1, ->vm_call_tail // Returned -1? | ||
2256 | |. dsubu NARGS8:RC, TMP0, BASE | ||
2257 | | ins_callt // Returned 0: retry fast path. | ||
2258 | | | ||
2259 | |// Reconstruct previous base for vmeta_call during tailcall. | ||
2260 | |->vm_call_tail: | ||
2261 | | andi TMP0, PC, FRAME_TYPE | ||
2262 | | li AT, -4 | ||
2263 | | bnez TMP0, >3 | ||
2264 | |. and TMP1, PC, AT | ||
2265 | | lbu TMP1, OFS_RA(PC) | ||
2266 | | sll TMP1, TMP1, 3 | ||
2267 | | addiu TMP1, TMP1, 16 | ||
2268 | |3: | ||
2269 | | b ->vm_call_dispatch // Resolve again for tailcall. | ||
2270 | |. dsubu TMP2, BASE, TMP1 | ||
2271 | | | ||
2272 | |5: // Grow stack for fallback handler. | ||
2273 | | load_got lj_state_growstack | ||
2274 | | li CARG2, LUA_MINSTACK | ||
2275 | | call_intern lj_state_growstack // (lua_State *L, int n) | ||
2276 | |. move CARG1, L | ||
2277 | | ld BASE, L->base | ||
2278 | | b <1 | ||
2279 | |. li CRET1, 0 // Force retry. | ||
2280 | | | ||
2281 | |->fff_gcstep: // Call GC step function. | ||
2282 | | // BASE = new base, RC = nargs*8 | ||
2283 | | move MULTRES, ra | ||
2284 | | load_got lj_gc_step | ||
2285 | | sd BASE, L->base | ||
2286 | | daddu TMP0, BASE, NARGS8:RC | ||
2287 | | sd PC, SAVE_PC // Redundant (but a defined value). | ||
2288 | | sd TMP0, L->top | ||
2289 | | call_intern lj_gc_step // (lua_State *L) | ||
2290 | |. move CARG1, L | ||
2291 | | ld BASE, L->base | ||
2292 | | move ra, MULTRES | ||
2293 | | ld TMP0, L->top | ||
2294 | | ld CFUNC:RB, FRAME_FUNC(BASE) | ||
2295 | | cleartp CFUNC:RB | ||
2296 | | jr ra | ||
2297 | |. dsubu NARGS8:RC, TMP0, BASE | ||
2298 | | | ||
2299 | |//----------------------------------------------------------------------- | ||
2300 | |//-- Special dispatch targets ------------------------------------------- | ||
2301 | |//----------------------------------------------------------------------- | ||
2302 | | | ||
2303 | |->vm_record: // Dispatch target for recording phase. | ||
2304 | |.if JIT | ||
2305 | | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) | ||
2306 | | andi AT, TMP3, HOOK_VMEVENT // No recording while in vmevent. | ||
2307 | | bnez AT, >5 | ||
2308 | | // Decrement the hookcount for consistency, but always do the call. | ||
2309 | |. lw TMP2, DISPATCH_GL(hookcount)(DISPATCH) | ||
2310 | | andi AT, TMP3, HOOK_ACTIVE | ||
2311 | | bnez AT, >1 | ||
2312 | |. addiu TMP2, TMP2, -1 | ||
2313 | | andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT | ||
2314 | | beqz AT, >1 | ||
2315 | |. nop | ||
2316 | | b >1 | ||
2317 | |. sw TMP2, DISPATCH_GL(hookcount)(DISPATCH) | ||
2318 | |.endif | ||
2319 | | | ||
2320 | |->vm_rethook: // Dispatch target for return hooks. | ||
2321 | | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) | ||
2322 | | andi AT, TMP3, HOOK_ACTIVE // Hook already active? | ||
2323 | | beqz AT, >1 | ||
2324 | |5: // Re-dispatch to static ins. | ||
2325 | |. ld AT, GG_DISP2STATIC(TMP0) // Assumes TMP0 holds DISPATCH+OP*4. | ||
2326 | | jr AT | ||
2327 | |. nop | ||
2328 | | | ||
2329 | |->vm_inshook: // Dispatch target for instr/line hooks. | ||
2330 | | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) | ||
2331 | | lw TMP2, DISPATCH_GL(hookcount)(DISPATCH) | ||
2332 | | andi AT, TMP3, HOOK_ACTIVE // Hook already active? | ||
2333 | | bnez AT, <5 | ||
2334 | |. andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT | ||
2335 | | beqz AT, <5 | ||
2336 | |. addiu TMP2, TMP2, -1 | ||
2337 | | beqz TMP2, >1 | ||
2338 | |. sw TMP2, DISPATCH_GL(hookcount)(DISPATCH) | ||
2339 | | andi AT, TMP3, LUA_MASKLINE | ||
2340 | | beqz AT, <5 | ||
2341 | |1: | ||
2342 | |. load_got lj_dispatch_ins | ||
2343 | | sw MULTRES, SAVE_MULTRES | ||
2344 | | move CARG2, PC | ||
2345 | | sd BASE, L->base | ||
2346 | | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. | ||
2347 | | call_intern lj_dispatch_ins // (lua_State *L, const BCIns *pc) | ||
2348 | |. move CARG1, L | ||
2349 | |3: | ||
2350 | | ld BASE, L->base | ||
2351 | |4: // Re-dispatch to static ins. | ||
2352 | | lw INS, -4(PC) | ||
2353 | | decode_OP8a TMP1, INS | ||
2354 | | decode_OP8b TMP1 | ||
2355 | | daddu TMP0, DISPATCH, TMP1 | ||
2356 | | decode_RD8a RD, INS | ||
2357 | | ld AT, GG_DISP2STATIC(TMP0) | ||
2358 | | decode_RA8a RA, INS | ||
2359 | | decode_RD8b RD | ||
2360 | | jr AT | ||
2361 | | decode_RA8b RA | ||
2362 | | | ||
2363 | |->cont_hook: // Continue from hook yield. | ||
2364 | | daddiu PC, PC, 4 | ||
2365 | | b <4 | ||
2366 | |. lw MULTRES, -24+LO(RB) // Restore MULTRES for *M ins. | ||
2367 | | | ||
2368 | |->vm_hotloop: // Hot loop counter underflow. | ||
2369 | |.if JIT | ||
2370 | | ld LFUNC:TMP1, FRAME_FUNC(BASE) | ||
2371 | | daddiu CARG1, DISPATCH, GG_DISP2J | ||
2372 | | cleartp LFUNC:TMP1 | ||
2373 | | sd PC, SAVE_PC | ||
2374 | | ld TMP1, LFUNC:TMP1->pc | ||
2375 | | move CARG2, PC | ||
2376 | | sd L, DISPATCH_J(L)(DISPATCH) | ||
2377 | | lbu TMP1, PC2PROTO(framesize)(TMP1) | ||
2378 | | load_got lj_trace_hot | ||
2379 | | sd BASE, L->base | ||
2380 | | dsll TMP1, TMP1, 3 | ||
2381 | | daddu TMP1, BASE, TMP1 | ||
2382 | | call_intern lj_trace_hot // (jit_State *J, const BCIns *pc) | ||
2383 | |. sd TMP1, L->top | ||
2384 | | b <3 | ||
2385 | |. nop | ||
2386 | |.endif | ||
2387 | | | ||
2388 | | | ||
2389 | |->vm_callhook: // Dispatch target for call hooks. | ||
2390 | |.if JIT | ||
2391 | | b >1 | ||
2392 | |.endif | ||
2393 | |. move CARG2, PC | ||
2394 | | | ||
2395 | |->vm_hotcall: // Hot call counter underflow. | ||
2396 | |.if JIT | ||
2397 | | ori CARG2, PC, 1 | ||
2398 | |1: | ||
2399 | |.endif | ||
2400 | | load_got lj_dispatch_call | ||
2401 | | daddu TMP0, BASE, RC | ||
2402 | | sd PC, SAVE_PC | ||
2403 | | sd BASE, L->base | ||
2404 | | dsubu RA, RA, BASE | ||
2405 | | sd TMP0, L->top | ||
2406 | | call_intern lj_dispatch_call // (lua_State *L, const BCIns *pc) | ||
2407 | |. move CARG1, L | ||
2408 | | // Returns ASMFunction. | ||
2409 | | ld BASE, L->base | ||
2410 | | ld TMP0, L->top | ||
2411 | | sd r0, SAVE_PC // Invalidate for subsequent line hook. | ||
2412 | | dsubu NARGS8:RC, TMP0, BASE | ||
2413 | | daddu RA, BASE, RA | ||
2414 | | ld LFUNC:RB, FRAME_FUNC(BASE) | ||
2415 | | cleartp LFUNC:RB | ||
2416 | | jr CRET1 | ||
2417 | |. lw INS, -4(PC) | ||
2418 | | | ||
2419 | |->cont_stitch: // Trace stitching. | ||
2420 | |.if JIT | ||
2421 | | // RA = resultptr, RB = meta base | ||
2422 | | lw INS, -4(PC) | ||
2423 | | ld TRACE:TMP2, -40(RB) // Save previous trace. | ||
2424 | | decode_RA8a RC, INS | ||
2425 | | daddiu AT, MULTRES, -8 | ||
2426 | | cleartp TRACE:TMP2 | ||
2427 | | decode_RA8b RC | ||
2428 | | beqz AT, >2 | ||
2429 | |. daddu RC, BASE, RC // Call base. | ||
2430 | |1: // Move results down. | ||
2431 | | ld CARG1, 0(RA) | ||
2432 | | daddiu AT, AT, -8 | ||
2433 | | daddiu RA, RA, 8 | ||
2434 | | sd CARG1, 0(RC) | ||
2435 | | bnez AT, <1 | ||
2436 | |. daddiu RC, RC, 8 | ||
2437 | |2: | ||
2438 | | decode_RA8a RA, INS | ||
2439 | | decode_RB8a RB, INS | ||
2440 | | decode_RA8b RA | ||
2441 | | decode_RB8b RB | ||
2442 | | daddu RA, RA, RB | ||
2443 | | daddu RA, BASE, RA | ||
2444 | |3: | ||
2445 | | sltu AT, RC, RA | ||
2446 | | bnez AT, >9 // More results wanted? | ||
2447 | |. nop | ||
2448 | | | ||
2449 | | lhu TMP3, TRACE:TMP2->traceno | ||
2450 | | lhu RD, TRACE:TMP2->link | ||
2451 | | beq RD, TMP3, ->cont_nop // Blacklisted. | ||
2452 | |. load_got lj_dispatch_stitch | ||
2453 | | bnez RD, =>BC_JLOOP // Jump to stitched trace. | ||
2454 | |. sll RD, RD, 3 | ||
2455 | | | ||
2456 | | // Stitch a new trace to the previous trace. | ||
2457 | | sw TMP3, DISPATCH_J(exitno)(DISPATCH) | ||
2458 | | sd L, DISPATCH_J(L)(DISPATCH) | ||
2459 | | sd BASE, L->base | ||
2460 | | daddiu CARG1, DISPATCH, GG_DISP2J | ||
2461 | | call_intern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) | ||
2462 | |. move CARG2, PC | ||
2463 | | b ->cont_nop | ||
2464 | |. ld BASE, L->base | ||
2465 | | | ||
2466 | |9: | ||
2467 | | sd TISNIL, 0(RC) | ||
2468 | | b <3 | ||
2469 | |. daddiu RC, RC, 8 | ||
2470 | |.endif | ||
2471 | | | ||
2472 | |->vm_profhook: // Dispatch target for profiler hook. | ||
2473 | #if LJ_HASPROFILE | ||
2474 | | load_got lj_dispatch_profile | ||
2475 | | sw MULTRES, SAVE_MULTRES | ||
2476 | | move CARG2, PC | ||
2477 | | sd BASE, L->base | ||
2478 | | call_intern lj_dispatch_profile // (lua_State *L, const BCIns *pc) | ||
2479 | |. move CARG1, L | ||
2480 | | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. | ||
2481 | | daddiu PC, PC, -4 | ||
2482 | | b ->cont_nop | ||
2483 | |. ld BASE, L->base | ||
2484 | #endif | ||
2485 | | | ||
2486 | |//----------------------------------------------------------------------- | ||
2487 | |//-- Trace exit handler ------------------------------------------------- | ||
2488 | |//----------------------------------------------------------------------- | ||
2489 | | | ||
2490 | |.macro savex_, a, b | ||
2491 | |.if FPU | ||
2492 | | sdc1 f..a, a*8(sp) | ||
2493 | | sdc1 f..b, b*8(sp) | ||
2494 | | sd r..a, 32*8+a*8(sp) | ||
2495 | | sd r..b, 32*8+b*8(sp) | ||
2496 | |.else | ||
2497 | | sd r..a, a*8(sp) | ||
2498 | | sd r..b, b*8(sp) | ||
2499 | |.endif | ||
2500 | |.endmacro | ||
2501 | | | ||
2502 | |->vm_exit_handler: | ||
2503 | |.if JIT | ||
2504 | |.if FPU | ||
2505 | | daddiu sp, sp, -(32*8+32*8) | ||
2506 | |.else | ||
2507 | | daddiu sp, sp, -(32*8) | ||
2508 | |.endif | ||
2509 | | savex_ 0, 1 | ||
2510 | | savex_ 2, 3 | ||
2511 | | savex_ 4, 5 | ||
2512 | | savex_ 6, 7 | ||
2513 | | savex_ 8, 9 | ||
2514 | | savex_ 10, 11 | ||
2515 | | savex_ 12, 13 | ||
2516 | | savex_ 14, 15 | ||
2517 | | savex_ 16, 17 | ||
2518 | | savex_ 18, 19 | ||
2519 | | savex_ 20, 21 | ||
2520 | | savex_ 22, 23 | ||
2521 | | savex_ 24, 25 | ||
2522 | | savex_ 26, 27 | ||
2523 | | savex_ 28, 30 | ||
2524 | |.if FPU | ||
2525 | | sdc1 f29, 29*8(sp) | ||
2526 | | sdc1 f31, 31*8(sp) | ||
2527 | | sd r0, 32*8+31*8(sp) // Clear RID_TMP. | ||
2528 | | daddiu TMP2, sp, 32*8+32*8 // Recompute original value of sp. | ||
2529 | | sd TMP2, 32*8+29*8(sp) // Store sp in RID_SP | ||
2530 | |.else | ||
2531 | | sd r0, 31*8(sp) // Clear RID_TMP. | ||
2532 | | daddiu TMP2, sp, 32*8 // Recompute original value of sp. | ||
2533 | | sd TMP2, 29*8(sp) // Store sp in RID_SP | ||
2534 | |.endif | ||
2535 | | li_vmstate EXIT | ||
2536 | | daddiu DISPATCH, JGL, -GG_DISP2G-32768 | ||
2537 | | lw TMP1, 0(TMP2) // Load exit number. | ||
2538 | | st_vmstate | ||
2539 | | ld L, DISPATCH_GL(cur_L)(DISPATCH) | ||
2540 | | ld BASE, DISPATCH_GL(jit_base)(DISPATCH) | ||
2541 | | load_got lj_trace_exit | ||
2542 | | sd L, DISPATCH_J(L)(DISPATCH) | ||
2543 | | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number. | ||
2544 | | sd BASE, L->base | ||
2545 | | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number. | ||
2546 | | daddiu CARG1, DISPATCH, GG_DISP2J | ||
2547 | | sd r0, DISPATCH_GL(jit_base)(DISPATCH) | ||
2548 | | call_intern lj_trace_exit // (jit_State *J, ExitState *ex) | ||
2549 | |. move CARG2, sp | ||
2550 | | // Returns MULTRES (unscaled) or negated error code. | ||
2551 | | ld TMP1, L->cframe | ||
2552 | | li AT, -4 | ||
2553 | | ld BASE, L->base | ||
2554 | | and sp, TMP1, AT | ||
2555 | | ld PC, SAVE_PC // Get SAVE_PC. | ||
2556 | | b >1 | ||
2557 | |. sd L, SAVE_L // Set SAVE_L (on-trace resume/yield). | ||
2558 | |.endif | ||
2559 | |->vm_exit_interp: | ||
2560 | |.if JIT | ||
2561 | | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set. | ||
2562 | | ld L, SAVE_L | ||
2563 | | daddiu DISPATCH, JGL, -GG_DISP2G-32768 | ||
2564 | | sd BASE, L->base | ||
2565 | |1: | ||
2566 | | bltz CRET1, >9 // Check for error from exit. | ||
2567 | |. ld LFUNC:RB, FRAME_FUNC(BASE) | ||
2568 | | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | ||
2569 | | dsll MULTRES, CRET1, 3 | ||
2570 | | cleartp LFUNC:RB | ||
2571 | | sw MULTRES, SAVE_MULTRES | ||
2572 | | li TISNIL, LJ_TNIL | ||
2573 | | li TISNUM, LJ_TISNUM // Setup type comparison constants. | ||
2574 | | .FPU mtc1 TMP3, TOBIT | ||
2575 | | ld TMP1, LFUNC:RB->pc | ||
2576 | | sd r0, DISPATCH_GL(jit_base)(DISPATCH) | ||
2577 | | ld KBASE, PC2PROTO(k)(TMP1) | ||
2578 | | .FPU cvt.d.s TOBIT, TOBIT | ||
2579 | | // Modified copy of ins_next which handles function header dispatch, too. | ||
2580 | | lw INS, 0(PC) | ||
2581 | | daddiu PC, PC, 4 | ||
2582 | | // Assumes TISNIL == ~LJ_VMST_INTERP == -1 | ||
2583 | | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) | ||
2584 | | decode_OP8a TMP1, INS | ||
2585 | | decode_OP8b TMP1 | ||
2586 | | sltiu TMP2, TMP1, BC_FUNCF*8 | ||
2587 | | daddu TMP0, DISPATCH, TMP1 | ||
2588 | | decode_RD8a RD, INS | ||
2589 | | ld AT, 0(TMP0) | ||
2590 | | decode_RA8a RA, INS | ||
2591 | | beqz TMP2, >2 | ||
2592 | |. decode_RA8b RA | ||
2593 | | jr AT | ||
2594 | |. decode_RD8b RD | ||
2595 | |2: | ||
2596 | | sltiu TMP2, TMP1, (BC_FUNCC+2)*8 // Fast function? | ||
2597 | | bnez TMP2, >3 | ||
2598 | |. ld TMP1, FRAME_PC(BASE) | ||
2599 | | // Check frame below fast function. | ||
2600 | | andi TMP0, TMP1, FRAME_TYPE | ||
2601 | | bnez TMP0, >3 // Trace stitching continuation? | ||
2602 | |. nop | ||
2603 | | // Otherwise set KBASE for Lua function below fast function. | ||
2604 | | lw TMP2, -4(TMP1) | ||
2605 | | decode_RA8a TMP0, TMP2 | ||
2606 | | decode_RA8b TMP0 | ||
2607 | | dsubu TMP1, BASE, TMP0 | ||
2608 | | ld LFUNC:TMP2, -32(TMP1) | ||
2609 | | cleartp LFUNC:TMP2 | ||
2610 | | ld TMP1, LFUNC:TMP2->pc | ||
2611 | | ld KBASE, PC2PROTO(k)(TMP1) | ||
2612 | |3: | ||
2613 | | daddiu RC, MULTRES, -8 | ||
2614 | | jr AT | ||
2615 | |. daddu RA, RA, BASE | ||
2616 | | | ||
2617 | |9: // Rethrow error from the right C frame. | ||
2618 | | load_got lj_err_throw | ||
2619 | | negu CARG2, CRET1 | ||
2620 | | call_intern lj_err_throw // (lua_State *L, int errcode) | ||
2621 | |. move CARG1, L | ||
2622 | |.endif | ||
2623 | | | ||
2624 | |//----------------------------------------------------------------------- | ||
2625 | |//-- Math helper functions ---------------------------------------------- | ||
2626 | |//----------------------------------------------------------------------- | ||
2627 | | | ||
2628 | |// Hard-float round to integer. | ||
2629 | |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1. | ||
2630 | |// MIPSR6: Modifies FTMP1, too. | ||
2631 | |.macro vm_round_hf, func | ||
2632 | | lui TMP0, 0x4330 // Hiword of 2^52 (double). | ||
2633 | | dsll TMP0, TMP0, 32 | ||
2634 | | dmtc1 TMP0, f4 | ||
2635 | | abs.d FRET2, FARG1 // |x| | ||
2636 | | dmfc1 AT, FARG1 | ||
2637 | |.if MIPSR6 | ||
2638 | | cmp.lt.d FTMP1, FRET2, f4 | ||
2639 | | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52 | ||
2640 | | bc1eqz FTMP1, >1 // Truncate only if |x| < 2^52. | ||
2641 | |.else | ||
2642 | | c.olt.d 0, FRET2, f4 | ||
2643 | | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52 | ||
2644 | | bc1f 0, >1 // Truncate only if |x| < 2^52. | ||
2645 | |.endif | ||
2646 | |. sub.d FRET1, FRET1, f4 | ||
2647 | | slt AT, AT, r0 | ||
2648 | |.if "func" == "ceil" | ||
2649 | | lui TMP0, 0xbff0 // Hiword of -1 (double). Preserves -0. | ||
2650 | |.else | ||
2651 | | lui TMP0, 0x3ff0 // Hiword of +1 (double). | ||
2652 | |.endif | ||
2653 | |.if "func" == "trunc" | ||
2654 | | dsll TMP0, TMP0, 32 | ||
2655 | | dmtc1 TMP0, f4 | ||
2656 | |.if MIPSR6 | ||
2657 | | cmp.lt.d FTMP1, FRET2, FRET1 // |x| < result? | ||
2658 | | sub.d FRET2, FRET1, f4 | ||
2659 | | sel.d FTMP1, FRET1, FRET2 // If yes, subtract +1. | ||
2660 | | dmtc1 AT, FRET1 | ||
2661 | | neg.d FRET2, FTMP1 | ||
2662 | | jr ra | ||
2663 | |. sel.d FRET1, FTMP1, FRET2 // Merge sign bit back in. | ||
2664 | |.else | ||
2665 | | c.olt.d 0, FRET2, FRET1 // |x| < result? | ||
2666 | | sub.d FRET2, FRET1, f4 | ||
2667 | | movt.d FRET1, FRET2, 0 // If yes, subtract +1. | ||
2668 | | neg.d FRET2, FRET1 | ||
2669 | | jr ra | ||
2670 | |. movn.d FRET1, FRET2, AT // Merge sign bit back in. | ||
2671 | |.endif | ||
2672 | |.else | ||
2673 | | neg.d FRET2, FRET1 | ||
2674 | | dsll TMP0, TMP0, 32 | ||
2675 | | dmtc1 TMP0, f4 | ||
2676 | |.if MIPSR6 | ||
2677 | | dmtc1 AT, FTMP1 | ||
2678 | | sel.d FTMP1, FRET1, FRET2 | ||
2679 | |.if "func" == "ceil" | ||
2680 | | cmp.lt.d FRET1, FTMP1, FARG1 // x > result? | ||
2681 | |.else | ||
2682 | | cmp.lt.d FRET1, FARG1, FTMP1 // x < result? | ||
2683 | |.endif | ||
2684 | | sub.d FRET2, FTMP1, f4 // If yes, subtract +-1. | ||
2685 | | jr ra | ||
2686 | |. sel.d FRET1, FTMP1, FRET2 | ||
2687 | |.else | ||
2688 | | movn.d FRET1, FRET2, AT // Merge sign bit back in. | ||
2689 | |.if "func" == "ceil" | ||
2690 | | c.olt.d 0, FRET1, FARG1 // x > result? | ||
2691 | |.else | ||
2692 | | c.olt.d 0, FARG1, FRET1 // x < result? | ||
2693 | |.endif | ||
2694 | | sub.d FRET2, FRET1, f4 // If yes, subtract +-1. | ||
2695 | | jr ra | ||
2696 | |. movt.d FRET1, FRET2, 0 | ||
2697 | |.endif | ||
2698 | |.endif | ||
2699 | |1: | ||
2700 | | jr ra | ||
2701 | |. mov.d FRET1, FARG1 | ||
2702 | |.endmacro | ||
2703 | | | ||
2704 | |.macro vm_round, func | ||
2705 | |.if FPU | ||
2706 | | vm_round_hf, func | ||
2707 | |.endif | ||
2708 | |.endmacro | ||
2709 | | | ||
2710 | |->vm_floor: | ||
2711 | | vm_round floor | ||
2712 | |->vm_ceil: | ||
2713 | | vm_round ceil | ||
2714 | |->vm_trunc: | ||
2715 | |.if JIT | ||
2716 | | vm_round trunc | ||
2717 | |.endif | ||
2718 | | | ||
2719 | |// Soft-float integer to number conversion. | ||
2720 | |.macro sfi2d, ARG | ||
2721 | |.if not FPU | ||
2722 | | beqz ARG, >9 // Handle zero first. | ||
2723 | |. sra TMP0, ARG, 31 | ||
2724 | | xor TMP1, ARG, TMP0 | ||
2725 | | dsubu TMP1, TMP1, TMP0 // Absolute value in TMP1. | ||
2726 | | dclz ARG, TMP1 | ||
2727 | | addiu ARG, ARG, -11 | ||
2728 | | li AT, 0x3ff+63-11-1 | ||
2729 | | dsllv TMP1, TMP1, ARG // Align mantissa left with leading 1. | ||
2730 | | subu ARG, AT, ARG // Exponent - 1. | ||
2731 | | ins ARG, TMP0, 11, 11 // Sign | Exponent. | ||
2732 | | dsll ARG, ARG, 52 // Align left. | ||
2733 | | jr ra | ||
2734 | |. daddu ARG, ARG, TMP1 // Add mantissa, increment exponent. | ||
2735 | |9: | ||
2736 | | jr ra | ||
2737 | |. nop | ||
2738 | |.endif | ||
2739 | |.endmacro | ||
2740 | | | ||
2741 | |// Input CARG1. Output: CARG1. Temporaries: AT, TMP0, TMP1. | ||
2742 | |->vm_sfi2d_1: | ||
2743 | | sfi2d CARG1 | ||
2744 | | | ||
2745 | |// Input CARG2. Output: CARG2. Temporaries: AT, TMP0, TMP1. | ||
2746 | |->vm_sfi2d_2: | ||
2747 | | sfi2d CARG2 | ||
2748 | | | ||
2749 | |// Soft-float comparison. Equivalent to c.eq.d. | ||
2750 | |// Input: CARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1. | ||
2751 | |->vm_sfcmpeq: | ||
2752 | |.if not FPU | ||
2753 | | dsll AT, CARG1, 1 | ||
2754 | | dsll TMP0, CARG2, 1 | ||
2755 | | or TMP1, AT, TMP0 | ||
2756 | | beqz TMP1, >8 // Both args +-0: return 1. | ||
2757 | |. lui TMP1, 0xffe0 | ||
2758 | | dsll TMP1, TMP1, 32 | ||
2759 | | sltu AT, TMP1, AT | ||
2760 | | sltu TMP0, TMP1, TMP0 | ||
2761 | | or TMP1, AT, TMP0 | ||
2762 | | bnez TMP1, >9 // Either arg is NaN: return 0; | ||
2763 | |. xor AT, CARG1, CARG2 | ||
2764 | | jr ra | ||
2765 | |. sltiu CRET1, AT, 1 // Same values: return 1. | ||
2766 | |8: | ||
2767 | | jr ra | ||
2768 | |. li CRET1, 1 | ||
2769 | |9: | ||
2770 | | jr ra | ||
2771 | |. li CRET1, 0 | ||
2772 | |.endif | ||
2773 | | | ||
2774 | |// Soft-float comparison. Equivalent to c.ult.d and c.olt.d. | ||
2775 | |// Input: CARG1, CARG2. Output: CRET1. Temporaries: AT, TMP0, TMP1, CRET2. | ||
2776 | |->vm_sfcmpult: | ||
2777 | |.if not FPU | ||
2778 | | b >1 | ||
2779 | |. li CRET2, 1 | ||
2780 | |.endif | ||
2781 | | | ||
2782 | |->vm_sfcmpolt: | ||
2783 | |.if not FPU | ||
2784 | | li CRET2, 0 | ||
2785 | |1: | ||
2786 | | dsll AT, CARG1, 1 | ||
2787 | | dsll TMP0, CARG2, 1 | ||
2788 | | or TMP1, AT, TMP0 | ||
2789 | | beqz TMP1, >8 // Both args +-0: return 0. | ||
2790 | |. lui TMP1, 0xffe0 | ||
2791 | | dsll TMP1, TMP1, 32 | ||
2792 | | sltu AT, TMP1, AT | ||
2793 | | sltu TMP0, TMP1, TMP0 | ||
2794 | | or TMP1, AT, TMP0 | ||
2795 | | bnez TMP1, >9 // Either arg is NaN: return 0 or 1; | ||
2796 | |. and AT, CARG1, CARG2 | ||
2797 | | bltz AT, >5 // Both args negative? | ||
2798 | |. nop | ||
2799 | | jr ra | ||
2800 | |. slt CRET1, CARG1, CARG2 | ||
2801 | |5: // Swap conditions if both operands are negative. | ||
2802 | | jr ra | ||
2803 | |. slt CRET1, CARG2, CARG1 | ||
2804 | |8: | ||
2805 | | jr ra | ||
2806 | |. li CRET1, 0 | ||
2807 | |9: | ||
2808 | | jr ra | ||
2809 | |. move CRET1, CRET2 | ||
2810 | |.endif | ||
2811 | | | ||
2812 | |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a. | ||
2813 | |// Input: CARG1, CARG2, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1. | ||
2814 | |->vm_sfcmpolex: | ||
2815 | |.if not FPU | ||
2816 | | dsll AT, CARG1, 1 | ||
2817 | | dsll TMP0, CARG2, 1 | ||
2818 | | or TMP1, AT, TMP0 | ||
2819 | | beqz TMP1, >8 // Both args +-0: return 1. | ||
2820 | |. lui TMP1, 0xffe0 | ||
2821 | | dsll TMP1, TMP1, 32 | ||
2822 | | sltu AT, TMP1, AT | ||
2823 | | sltu TMP0, TMP1, TMP0 | ||
2824 | | or TMP1, AT, TMP0 | ||
2825 | | bnez TMP1, >9 // Either arg is NaN: return 0; | ||
2826 | |. and AT, CARG1, CARG2 | ||
2827 | | xor AT, AT, TMP3 | ||
2828 | | bltz AT, >5 // Both args negative? | ||
2829 | |. nop | ||
2830 | | jr ra | ||
2831 | |. slt CRET1, CARG2, CARG1 | ||
2832 | |5: // Swap conditions if both operands are negative. | ||
2833 | | jr ra | ||
2834 | |. slt CRET1, CARG1, CARG2 | ||
2835 | |8: | ||
2836 | | jr ra | ||
2837 | |. li CRET1, 1 | ||
2838 | |9: | ||
2839 | | jr ra | ||
2840 | |. li CRET1, 0 | ||
2841 | |.endif | ||
2842 | | | ||
2843 | |.macro sfmin_max, name, intins, intinsc | ||
2844 | |->vm_sf .. name: | ||
2845 | |.if JIT and not FPU | ||
2846 | | move TMP2, ra | ||
2847 | | bal ->vm_sfcmpolt | ||
2848 | |. nop | ||
2849 | | move ra, TMP2 | ||
2850 | | move TMP0, CRET1 | ||
2851 | | move CRET1, CARG1 | ||
2852 | |.if MIPSR6 | ||
2853 | | intins CRET1, CRET1, TMP0 | ||
2854 | | intinsc TMP0, CARG2, TMP0 | ||
2855 | | jr ra | ||
2856 | |. or CRET1, CRET1, TMP0 | ||
2857 | |.else | ||
2858 | | jr ra | ||
2859 | |. intins CRET1, CARG2, TMP0 | ||
2860 | |.endif | ||
2861 | |.endif | ||
2862 | |.endmacro | ||
2863 | | | ||
2864 | |.if MIPSR6 | ||
2865 | | sfmin_max min, selnez, seleqz | ||
2866 | | sfmin_max max, seleqz, selnez | ||
2867 | |.else | ||
2868 | | sfmin_max min, movz, _ | ||
2869 | | sfmin_max max, movn, _ | ||
2870 | |.endif | ||
2871 | | | ||
2872 | |//----------------------------------------------------------------------- | ||
2873 | |//-- Miscellaneous functions -------------------------------------------- | ||
2874 | |//----------------------------------------------------------------------- | ||
2875 | | | ||
2876 | |//----------------------------------------------------------------------- | ||
2877 | |//-- FFI helper functions ----------------------------------------------- | ||
2878 | |//----------------------------------------------------------------------- | ||
2879 | | | ||
2880 | |// Handler for callback functions. Callback slot number in r1, g in r2. | ||
2881 | |->vm_ffi_callback: | ||
2882 | |.if FFI | ||
2883 | |.type CTSTATE, CTState, PC | ||
2884 | | saveregs | ||
2885 | | ld CTSTATE, GL:r2->ctype_state | ||
2886 | | daddiu DISPATCH, r2, GG_G2DISP | ||
2887 | | load_got lj_ccallback_enter | ||
2888 | | sw r1, CTSTATE->cb.slot | ||
2889 | | sd CARG1, CTSTATE->cb.gpr[0] | ||
2890 | | .FPU sdc1 FARG1, CTSTATE->cb.fpr[0] | ||
2891 | | sd CARG2, CTSTATE->cb.gpr[1] | ||
2892 | | .FPU sdc1 FARG2, CTSTATE->cb.fpr[1] | ||
2893 | | sd CARG3, CTSTATE->cb.gpr[2] | ||
2894 | | .FPU sdc1 FARG3, CTSTATE->cb.fpr[2] | ||
2895 | | sd CARG4, CTSTATE->cb.gpr[3] | ||
2896 | | .FPU sdc1 FARG4, CTSTATE->cb.fpr[3] | ||
2897 | | sd CARG5, CTSTATE->cb.gpr[4] | ||
2898 | | .FPU sdc1 FARG5, CTSTATE->cb.fpr[4] | ||
2899 | | sd CARG6, CTSTATE->cb.gpr[5] | ||
2900 | | .FPU sdc1 FARG6, CTSTATE->cb.fpr[5] | ||
2901 | | sd CARG7, CTSTATE->cb.gpr[6] | ||
2902 | | .FPU sdc1 FARG7, CTSTATE->cb.fpr[6] | ||
2903 | | sd CARG8, CTSTATE->cb.gpr[7] | ||
2904 | | .FPU sdc1 FARG8, CTSTATE->cb.fpr[7] | ||
2905 | | daddiu TMP0, sp, CFRAME_SPACE | ||
2906 | | sd TMP0, CTSTATE->cb.stack | ||
2907 | | sd r0, SAVE_PC // Any value outside of bytecode is ok. | ||
2908 | | move CARG2, sp | ||
2909 | | call_intern lj_ccallback_enter // (CTState *cts, void *cf) | ||
2910 | |. move CARG1, CTSTATE | ||
2911 | | // Returns lua_State *. | ||
2912 | | ld BASE, L:CRET1->base | ||
2913 | | ld RC, L:CRET1->top | ||
2914 | | move L, CRET1 | ||
2915 | | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | ||
2916 | | ld LFUNC:RB, FRAME_FUNC(BASE) | ||
2917 | | .FPU mtc1 TMP3, TOBIT | ||
2918 | | li TISNIL, LJ_TNIL | ||
2919 | | li TISNUM, LJ_TISNUM | ||
2920 | | li_vmstate INTERP | ||
2921 | | subu RC, RC, BASE | ||
2922 | | cleartp LFUNC:RB | ||
2923 | | st_vmstate | ||
2924 | | .FPU cvt.d.s TOBIT, TOBIT | ||
2925 | | ins_callt | ||
2926 | |.endif | ||
2927 | | | ||
2928 | |->cont_ffi_callback: // Return from FFI callback. | ||
2929 | |.if FFI | ||
2930 | | load_got lj_ccallback_leave | ||
2931 | | ld CTSTATE, DISPATCH_GL(ctype_state)(DISPATCH) | ||
2932 | | sd BASE, L->base | ||
2933 | | sd RB, L->top | ||
2934 | | sd L, CTSTATE->L | ||
2935 | | move CARG2, RA | ||
2936 | | call_intern lj_ccallback_leave // (CTState *cts, TValue *o) | ||
2937 | |. move CARG1, CTSTATE | ||
2938 | | .FPU ldc1 FRET1, CTSTATE->cb.fpr[0] | ||
2939 | | ld CRET1, CTSTATE->cb.gpr[0] | ||
2940 | | .FPU ldc1 FRET2, CTSTATE->cb.fpr[1] | ||
2941 | | b ->vm_leave_unw | ||
2942 | |. ld CRET2, CTSTATE->cb.gpr[1] | ||
2943 | |.endif | ||
2944 | | | ||
2945 | |->vm_ffi_call: // Call C function via FFI. | ||
2946 | | // Caveat: needs special frame unwinding, see below. | ||
2947 | |.if FFI | ||
2948 | | .type CCSTATE, CCallState, CARG1 | ||
2949 | | lw TMP1, CCSTATE->spadj | ||
2950 | | lbu CARG2, CCSTATE->nsp | ||
2951 | | move TMP2, sp | ||
2952 | | dsubu sp, sp, TMP1 | ||
2953 | | sd ra, -8(TMP2) | ||
2954 | | sll CARG2, CARG2, 3 | ||
2955 | | sd r16, -16(TMP2) | ||
2956 | | sd CCSTATE, -24(TMP2) | ||
2957 | | move r16, TMP2 | ||
2958 | | daddiu TMP1, CCSTATE, offsetof(CCallState, stack) | ||
2959 | | move TMP2, sp | ||
2960 | | beqz CARG2, >2 | ||
2961 | |. daddu TMP3, TMP1, CARG2 | ||
2962 | |1: | ||
2963 | | ld TMP0, 0(TMP1) | ||
2964 | | daddiu TMP1, TMP1, 8 | ||
2965 | | sltu AT, TMP1, TMP3 | ||
2966 | | sd TMP0, 0(TMP2) | ||
2967 | | bnez AT, <1 | ||
2968 | |. daddiu TMP2, TMP2, 8 | ||
2969 | |2: | ||
2970 | | ld CFUNCADDR, CCSTATE->func | ||
2971 | | .FPU ldc1 FARG1, CCSTATE->gpr[0] | ||
2972 | | ld CARG2, CCSTATE->gpr[1] | ||
2973 | | .FPU ldc1 FARG2, CCSTATE->gpr[1] | ||
2974 | | ld CARG3, CCSTATE->gpr[2] | ||
2975 | | .FPU ldc1 FARG3, CCSTATE->gpr[2] | ||
2976 | | ld CARG4, CCSTATE->gpr[3] | ||
2977 | | .FPU ldc1 FARG4, CCSTATE->gpr[3] | ||
2978 | | ld CARG5, CCSTATE->gpr[4] | ||
2979 | | .FPU ldc1 FARG5, CCSTATE->gpr[4] | ||
2980 | | ld CARG6, CCSTATE->gpr[5] | ||
2981 | | .FPU ldc1 FARG6, CCSTATE->gpr[5] | ||
2982 | | ld CARG7, CCSTATE->gpr[6] | ||
2983 | | .FPU ldc1 FARG7, CCSTATE->gpr[6] | ||
2984 | | ld CARG8, CCSTATE->gpr[7] | ||
2985 | | .FPU ldc1 FARG8, CCSTATE->gpr[7] | ||
2986 | | jalr CFUNCADDR | ||
2987 | |. ld CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. | ||
2988 | | ld CCSTATE:TMP1, -24(r16) | ||
2989 | | ld TMP2, -16(r16) | ||
2990 | | ld ra, -8(r16) | ||
2991 | | sd CRET1, CCSTATE:TMP1->gpr[0] | ||
2992 | | sd CRET2, CCSTATE:TMP1->gpr[1] | ||
2993 | |.if FPU | ||
2994 | | sdc1 FRET1, CCSTATE:TMP1->fpr[0] | ||
2995 | | sdc1 FRET2, CCSTATE:TMP1->fpr[1] | ||
2996 | |.else | ||
2997 | | sd CARG1, CCSTATE:TMP1->gpr[2] // 2nd FP struct field for soft-float. | ||
2998 | |.endif | ||
2999 | | move sp, r16 | ||
3000 | | jr ra | ||
3001 | |. move r16, TMP2 | ||
3002 | |.endif | ||
3003 | |// Note: vm_ffi_call must be the last function in this object file! | ||
3004 | | | ||
3005 | |//----------------------------------------------------------------------- | ||
3006 | } | ||
3007 | |||
3008 | /* Generate the code for a single instruction. */ | ||
3009 | static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ||
3010 | { | ||
3011 | int vk = 0; | ||
3012 | |=>defop: | ||
3013 | |||
3014 | switch (op) { | ||
3015 | |||
3016 | /* -- Comparison ops ---------------------------------------------------- */ | ||
3017 | |||
3018 | /* Remember: all ops branch for a true comparison, fall through otherwise. */ | ||
3019 | |||
3020 | case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: | ||
3021 | | // RA = src1*8, RD = src2*8, JMP with RD = target | ||
3022 | |.macro bc_comp, FRA, FRD, ARGRA, ARGRD, movop, fmovop, fcomp, sfcomp | ||
3023 | | daddu RA, BASE, RA | ||
3024 | | daddu RD, BASE, RD | ||
3025 | | ld ARGRA, 0(RA) | ||
3026 | | ld ARGRD, 0(RD) | ||
3027 | | lhu TMP2, OFS_RD(PC) | ||
3028 | | gettp CARG3, ARGRA | ||
3029 | | gettp CARG4, ARGRD | ||
3030 | | bne CARG3, TISNUM, >2 | ||
3031 | |. daddiu PC, PC, 4 | ||
3032 | | bne CARG4, TISNUM, >5 | ||
3033 | |. decode_RD4b TMP2 | ||
3034 | | sextw ARGRA, ARGRA | ||
3035 | | sextw ARGRD, ARGRD | ||
3036 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | ||
3037 | | slt AT, CARG1, CARG2 | ||
3038 | | addu TMP2, TMP2, TMP3 | ||
3039 | |.if MIPSR6 | ||
3040 | | movop TMP2, TMP2, AT | ||
3041 | |.else | ||
3042 | | movop TMP2, r0, AT | ||
3043 | |.endif | ||
3044 | |1: | ||
3045 | | daddu PC, PC, TMP2 | ||
3046 | | ins_next | ||
3047 | | | ||
3048 | |2: // RA is not an integer. | ||
3049 | | sltiu AT, CARG3, LJ_TISNUM | ||
3050 | | beqz AT, ->vmeta_comp | ||
3051 | |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | ||
3052 | | sltiu AT, CARG4, LJ_TISNUM | ||
3053 | | beqz AT, >4 | ||
3054 | |. decode_RD4b TMP2 | ||
3055 | |.if FPU | ||
3056 | | ldc1 FRA, 0(RA) | ||
3057 | | ldc1 FRD, 0(RD) | ||
3058 | |.endif | ||
3059 | |3: // RA and RD are both numbers. | ||
3060 | |.if FPU | ||
3061 | |.if MIPSR6 | ||
3062 | | fcomp FTMP0, FTMP0, FTMP2 | ||
3063 | | addu TMP2, TMP2, TMP3 | ||
3064 | | mfc1 TMP3, FTMP0 | ||
3065 | | b <1 | ||
3066 | |. fmovop TMP2, TMP2, TMP3 | ||
3067 | |.else | ||
3068 | | fcomp FTMP0, FTMP2 | ||
3069 | | addu TMP2, TMP2, TMP3 | ||
3070 | | b <1 | ||
3071 | |. fmovop TMP2, r0 | ||
3072 | |.endif | ||
3073 | |.else | ||
3074 | | bal sfcomp | ||
3075 | |. addu TMP2, TMP2, TMP3 | ||
3076 | | b <1 | ||
3077 | |.if MIPSR6 | ||
3078 | |. movop TMP2, TMP2, CRET1 | ||
3079 | |.else | ||
3080 | |. movop TMP2, r0, CRET1 | ||
3081 | |.endif | ||
3082 | |.endif | ||
3083 | | | ||
3084 | |4: // RA is a number, RD is not a number. | ||
3085 | | bne CARG4, TISNUM, ->vmeta_comp | ||
3086 | | // RA is a number, RD is an integer. Convert RD to a number. | ||
3087 | |.if FPU | ||
3088 | |. lwc1 FRD, LO(RD) | ||
3089 | | ldc1 FRA, 0(RA) | ||
3090 | | b <3 | ||
3091 | |. cvt.d.w FRD, FRD | ||
3092 | |.else | ||
3093 | |.if "ARGRD" == "CARG1" | ||
3094 | |. sextw CARG1, CARG1 | ||
3095 | | bal ->vm_sfi2d_1 | ||
3096 | |. nop | ||
3097 | |.else | ||
3098 | |. sextw CARG2, CARG2 | ||
3099 | | bal ->vm_sfi2d_2 | ||
3100 | |. nop | ||
3101 | |.endif | ||
3102 | | b <3 | ||
3103 | |. nop | ||
3104 | |.endif | ||
3105 | | | ||
3106 | |5: // RA is an integer, RD is not an integer | ||
3107 | | sltiu AT, CARG4, LJ_TISNUM | ||
3108 | | beqz AT, ->vmeta_comp | ||
3109 | |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | ||
3110 | | // RA is an integer, RD is a number. Convert RA to a number. | ||
3111 | |.if FPU | ||
3112 | | lwc1 FRA, LO(RA) | ||
3113 | | ldc1 FRD, 0(RD) | ||
3114 | | b <3 | ||
3115 | | cvt.d.w FRA, FRA | ||
3116 | |.else | ||
3117 | |.if "ARGRA" == "CARG1" | ||
3118 | | bal ->vm_sfi2d_1 | ||
3119 | |. sextw CARG1, CARG1 | ||
3120 | |.else | ||
3121 | | bal ->vm_sfi2d_2 | ||
3122 | |. sextw CARG2, CARG2 | ||
3123 | |.endif | ||
3124 | | b <3 | ||
3125 | |. nop | ||
3126 | |.endif | ||
3127 | |.endmacro | ||
3128 | | | ||
3129 | |.if MIPSR6 | ||
3130 | if (op == BC_ISLT) { | ||
3131 | | bc_comp FTMP0, FTMP2, CARG1, CARG2, selnez, selnez, cmp.lt.d, ->vm_sfcmpolt | ||
3132 | } else if (op == BC_ISGE) { | ||
3133 | | bc_comp FTMP0, FTMP2, CARG1, CARG2, seleqz, seleqz, cmp.lt.d, ->vm_sfcmpolt | ||
3134 | } else if (op == BC_ISLE) { | ||
3135 | | bc_comp FTMP2, FTMP0, CARG2, CARG1, seleqz, seleqz, cmp.ult.d, ->vm_sfcmpult | ||
3136 | } else { | ||
3137 | | bc_comp FTMP2, FTMP0, CARG2, CARG1, selnez, selnez, cmp.ult.d, ->vm_sfcmpult | ||
3138 | } | ||
3139 | |.else | ||
3140 | if (op == BC_ISLT) { | ||
3141 | | bc_comp FTMP0, FTMP2, CARG1, CARG2, movz, movf, c.olt.d, ->vm_sfcmpolt | ||
3142 | } else if (op == BC_ISGE) { | ||
3143 | | bc_comp FTMP0, FTMP2, CARG1, CARG2, movn, movt, c.olt.d, ->vm_sfcmpolt | ||
3144 | } else if (op == BC_ISLE) { | ||
3145 | | bc_comp FTMP2, FTMP0, CARG2, CARG1, movn, movt, c.ult.d, ->vm_sfcmpult | ||
3146 | } else { | ||
3147 | | bc_comp FTMP2, FTMP0, CARG2, CARG1, movz, movf, c.ult.d, ->vm_sfcmpult | ||
3148 | } | ||
3149 | |.endif | ||
3150 | break; | ||
3151 | |||
3152 | case BC_ISEQV: case BC_ISNEV: | ||
3153 | vk = op == BC_ISEQV; | ||
3154 | | // RA = src1*8, RD = src2*8, JMP with RD = target | ||
3155 | | daddu RA, BASE, RA | ||
3156 | | daddiu PC, PC, 4 | ||
3157 | | daddu RD, BASE, RD | ||
3158 | | ld CARG1, 0(RA) | ||
3159 | | lhu TMP2, -4+OFS_RD(PC) | ||
3160 | | ld CARG2, 0(RD) | ||
3161 | | gettp CARG3, CARG1 | ||
3162 | | gettp CARG4, CARG2 | ||
3163 | | sltu AT, TISNUM, CARG3 | ||
3164 | | sltu TMP1, TISNUM, CARG4 | ||
3165 | | or AT, AT, TMP1 | ||
3166 | if (vk) { | ||
3167 | | beqz AT, ->BC_ISEQN_Z | ||
3168 | } else { | ||
3169 | | beqz AT, ->BC_ISNEN_Z | ||
3170 | } | ||
3171 | | // Either or both types are not numbers. | ||
3172 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | ||
3173 | |.if FFI | ||
3174 | |. li AT, LJ_TCDATA | ||
3175 | | beq CARG3, AT, ->vmeta_equal_cd | ||
3176 | |.endif | ||
3177 | | decode_RD4b TMP2 | ||
3178 | |.if FFI | ||
3179 | | beq CARG4, AT, ->vmeta_equal_cd | ||
3180 | |. nop | ||
3181 | |.endif | ||
3182 | | bne CARG1, CARG2, >2 | ||
3183 | |. addu TMP2, TMP2, TMP3 | ||
3184 | | // Tag and value are equal. | ||
3185 | if (vk) { | ||
3186 | |->BC_ISEQV_Z: | ||
3187 | | daddu PC, PC, TMP2 | ||
3188 | } | ||
3189 | |1: | ||
3190 | | ins_next | ||
3191 | | | ||
3192 | |2: // Check if the tags are the same and it's a table or userdata. | ||
3193 | | xor AT, CARG3, CARG4 // Same type? | ||
3194 | | sltiu TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata? | ||
3195 | |.if MIPSR6 | ||
3196 | | seleqz TMP0, TMP0, AT | ||
3197 | |.else | ||
3198 | | movn TMP0, r0, AT | ||
3199 | |.endif | ||
3200 | if (vk) { | ||
3201 | | beqz TMP0, <1 | ||
3202 | } else { | ||
3203 | | beqz TMP0, ->BC_ISEQV_Z // Reuse code from opposite instruction. | ||
3204 | } | ||
3205 | | // Different tables or userdatas. Need to check __eq metamethod. | ||
3206 | | // Field metatable must be at same offset for GCtab and GCudata! | ||
3207 | |. cleartp TAB:TMP1, CARG1 | ||
3208 | | ld TAB:TMP3, TAB:TMP1->metatable | ||
3209 | if (vk) { | ||
3210 | | beqz TAB:TMP3, <1 // No metatable? | ||
3211 | |. nop | ||
3212 | | lbu TMP3, TAB:TMP3->nomm | ||
3213 | | andi TMP3, TMP3, 1<<MM_eq | ||
3214 | | bnez TMP3, >1 // Or 'no __eq' flag set? | ||
3215 | } else { | ||
3216 | | beqz TAB:TMP3,->BC_ISEQV_Z // No metatable? | ||
3217 | |. nop | ||
3218 | | lbu TMP3, TAB:TMP3->nomm | ||
3219 | | andi TMP3, TMP3, 1<<MM_eq | ||
3220 | | bnez TMP3, ->BC_ISEQV_Z // Or 'no __eq' flag set? | ||
3221 | } | ||
3222 | |. nop | ||
3223 | | b ->vmeta_equal // Handle __eq metamethod. | ||
3224 | |. li TMP0, 1-vk // ne = 0 or 1. | ||
3225 | break; | ||
3226 | |||
3227 | case BC_ISEQS: case BC_ISNES: | ||
3228 | vk = op == BC_ISEQS; | ||
3229 | | // RA = src*8, RD = str_const*8 (~), JMP with RD = target | ||
3230 | | daddu RA, BASE, RA | ||
3231 | | daddiu PC, PC, 4 | ||
3232 | | ld CARG1, 0(RA) | ||
3233 | | dsubu RD, KBASE, RD | ||
3234 | | lhu TMP2, -4+OFS_RD(PC) | ||
3235 | | ld CARG2, -8(RD) // KBASE-8-str_const*8 | ||
3236 | |.if FFI | ||
3237 | | gettp TMP0, CARG1 | ||
3238 | | li AT, LJ_TCDATA | ||
3239 | |.endif | ||
3240 | | li TMP1, LJ_TSTR | ||
3241 | | decode_RD4b TMP2 | ||
3242 | |.if FFI | ||
3243 | | beq TMP0, AT, ->vmeta_equal_cd | ||
3244 | |.endif | ||
3245 | |. settp CARG2, TMP1 | ||
3246 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | ||
3247 | | xor TMP1, CARG1, CARG2 | ||
3248 | | addu TMP2, TMP2, TMP3 | ||
3249 | |.if MIPSR6 | ||
3250 | if (vk) { | ||
3251 | | seleqz TMP2, TMP2, TMP1 | ||
3252 | } else { | ||
3253 | | selnez TMP2, TMP2, TMP1 | ||
3254 | } | ||
3255 | |.else | ||
3256 | if (vk) { | ||
3257 | | movn TMP2, r0, TMP1 | ||
3258 | } else { | ||
3259 | | movz TMP2, r0, TMP1 | ||
3260 | } | ||
3261 | |.endif | ||
3262 | | daddu PC, PC, TMP2 | ||
3263 | | ins_next | ||
3264 | break; | ||
3265 | |||
3266 | case BC_ISEQN: case BC_ISNEN: | ||
3267 | vk = op == BC_ISEQN; | ||
3268 | | // RA = src*8, RD = num_const*8, JMP with RD = target | ||
3269 | | daddu RA, BASE, RA | ||
3270 | | daddu RD, KBASE, RD | ||
3271 | | ld CARG1, 0(RA) | ||
3272 | | ld CARG2, 0(RD) | ||
3273 | | lhu TMP2, OFS_RD(PC) | ||
3274 | | gettp CARG3, CARG1 | ||
3275 | | gettp CARG4, CARG2 | ||
3276 | | daddiu PC, PC, 4 | ||
3277 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | ||
3278 | if (vk) { | ||
3279 | |->BC_ISEQN_Z: | ||
3280 | } else { | ||
3281 | |->BC_ISNEN_Z: | ||
3282 | } | ||
3283 | | bne CARG3, TISNUM, >3 | ||
3284 | |. decode_RD4b TMP2 | ||
3285 | | bne CARG4, TISNUM, >6 | ||
3286 | |. addu TMP2, TMP2, TMP3 | ||
3287 | | xor AT, CARG1, CARG2 | ||
3288 | |.if MIPSR6 | ||
3289 | if (vk) { | ||
3290 | | seleqz TMP2, TMP2, AT | ||
3291 | |1: | ||
3292 | | daddu PC, PC, TMP2 | ||
3293 | |2: | ||
3294 | } else { | ||
3295 | | selnez TMP2, TMP2, AT | ||
3296 | |1: | ||
3297 | |2: | ||
3298 | | daddu PC, PC, TMP2 | ||
3299 | } | ||
3300 | |.else | ||
3301 | if (vk) { | ||
3302 | | movn TMP2, r0, AT | ||
3303 | |1: | ||
3304 | | daddu PC, PC, TMP2 | ||
3305 | |2: | ||
3306 | } else { | ||
3307 | | movz TMP2, r0, AT | ||
3308 | |1: | ||
3309 | |2: | ||
3310 | | daddu PC, PC, TMP2 | ||
3311 | } | ||
3312 | |.endif | ||
3313 | | ins_next | ||
3314 | | | ||
3315 | |3: // RA is not an integer. | ||
3316 | | sltu AT, CARG3, TISNUM | ||
3317 | |.if FFI | ||
3318 | | beqz AT, >8 | ||
3319 | |.else | ||
3320 | | beqz AT, <2 | ||
3321 | |.endif | ||
3322 | |. addu TMP2, TMP2, TMP3 | ||
3323 | | sltu AT, CARG4, TISNUM | ||
3324 | |.if FPU | ||
3325 | | ldc1 FTMP0, 0(RA) | ||
3326 | | ldc1 FTMP2, 0(RD) | ||
3327 | |.endif | ||
3328 | | beqz AT, >5 | ||
3329 | |. nop | ||
3330 | |4: // RA and RD are both numbers. | ||
3331 | |.if FPU | ||
3332 | |.if MIPSR6 | ||
3333 | | cmp.eq.d FTMP0, FTMP0, FTMP2 | ||
3334 | | dmfc1 TMP1, FTMP0 | ||
3335 | | b <1 | ||
3336 | if (vk) { | ||
3337 | |. selnez TMP2, TMP2, TMP1 | ||
3338 | } else { | ||
3339 | |. seleqz TMP2, TMP2, TMP1 | ||
3340 | } | ||
3341 | |.else | ||
3342 | | c.eq.d FTMP0, FTMP2 | ||
3343 | | b <1 | ||
3344 | if (vk) { | ||
3345 | |. movf TMP2, r0 | ||
3346 | } else { | ||
3347 | |. movt TMP2, r0 | ||
3348 | } | ||
3349 | |.endif | ||
3350 | |.else | ||
3351 | | bal ->vm_sfcmpeq | ||
3352 | |. nop | ||
3353 | | b <1 | ||
3354 | |.if MIPSR6 | ||
3355 | if (vk) { | ||
3356 | |. selnez TMP2, TMP2, CRET1 | ||
3357 | } else { | ||
3358 | |. seleqz TMP2, TMP2, CRET1 | ||
3359 | } | ||
3360 | |.else | ||
3361 | if (vk) { | ||
3362 | |. movz TMP2, r0, CRET1 | ||
3363 | } else { | ||
3364 | |. movn TMP2, r0, CRET1 | ||
3365 | } | ||
3366 | |.endif | ||
3367 | |.endif | ||
3368 | | | ||
3369 | |5: // RA is a number, RD is not a number. | ||
3370 | |.if FFI | ||
3371 | | bne CARG4, TISNUM, >9 | ||
3372 | |.else | ||
3373 | | bne CARG4, TISNUM, <2 | ||
3374 | |.endif | ||
3375 | | // RA is a number, RD is an integer. Convert RD to a number. | ||
3376 | |.if FPU | ||
3377 | |. lwc1 FTMP2, LO(RD) | ||
3378 | | b <4 | ||
3379 | |. cvt.d.w FTMP2, FTMP2 | ||
3380 | |.else | ||
3381 | |. sextw CARG2, CARG2 | ||
3382 | | bal ->vm_sfi2d_2 | ||
3383 | |. nop | ||
3384 | | b <4 | ||
3385 | |. nop | ||
3386 | |.endif | ||
3387 | | | ||
3388 | |6: // RA is an integer, RD is not an integer | ||
3389 | | sltu AT, CARG4, TISNUM | ||
3390 | |.if FFI | ||
3391 | | beqz AT, >9 | ||
3392 | |.else | ||
3393 | | beqz AT, <2 | ||
3394 | |.endif | ||
3395 | | // RA is an integer, RD is a number. Convert RA to a number. | ||
3396 | |.if FPU | ||
3397 | |. lwc1 FTMP0, LO(RA) | ||
3398 | | ldc1 FTMP2, 0(RD) | ||
3399 | | b <4 | ||
3400 | | cvt.d.w FTMP0, FTMP0 | ||
3401 | |.else | ||
3402 | |. sextw CARG1, CARG1 | ||
3403 | | bal ->vm_sfi2d_1 | ||
3404 | |. nop | ||
3405 | | b <4 | ||
3406 | |. nop | ||
3407 | |.endif | ||
3408 | | | ||
3409 | |.if FFI | ||
3410 | |8: | ||
3411 | | li AT, LJ_TCDATA | ||
3412 | | bne CARG3, AT, <2 | ||
3413 | |. nop | ||
3414 | | b ->vmeta_equal_cd | ||
3415 | |. nop | ||
3416 | |9: | ||
3417 | | li AT, LJ_TCDATA | ||
3418 | | bne CARG4, AT, <2 | ||
3419 | |. nop | ||
3420 | | b ->vmeta_equal_cd | ||
3421 | |. nop | ||
3422 | |.endif | ||
3423 | break; | ||
3424 | |||
3425 | case BC_ISEQP: case BC_ISNEP: | ||
3426 | vk = op == BC_ISEQP; | ||
3427 | | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target | ||
3428 | | daddu RA, BASE, RA | ||
3429 | | srl TMP1, RD, 3 | ||
3430 | | ld TMP0, 0(RA) | ||
3431 | | lhu TMP2, OFS_RD(PC) | ||
3432 | | not TMP1, TMP1 | ||
3433 | | gettp TMP0, TMP0 | ||
3434 | | daddiu PC, PC, 4 | ||
3435 | |.if FFI | ||
3436 | | li AT, LJ_TCDATA | ||
3437 | | beq TMP0, AT, ->vmeta_equal_cd | ||
3438 | |.endif | ||
3439 | |. xor TMP0, TMP0, TMP1 | ||
3440 | | decode_RD4b TMP2 | ||
3441 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | ||
3442 | | addu TMP2, TMP2, TMP3 | ||
3443 | |.if MIPSR6 | ||
3444 | if (vk) { | ||
3445 | | seleqz TMP2, TMP2, TMP0 | ||
3446 | } else { | ||
3447 | | selnez TMP2, TMP2, TMP0 | ||
3448 | } | ||
3449 | |.else | ||
3450 | if (vk) { | ||
3451 | | movn TMP2, r0, TMP0 | ||
3452 | } else { | ||
3453 | | movz TMP2, r0, TMP0 | ||
3454 | } | ||
3455 | |.endif | ||
3456 | | daddu PC, PC, TMP2 | ||
3457 | | ins_next | ||
3458 | break; | ||
3459 | |||
3460 | /* -- Unary test and copy ops ------------------------------------------- */ | ||
3461 | |||
3462 | case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: | ||
3463 | | // RA = dst*8 or unused, RD = src*8, JMP with RD = target | ||
3464 | | daddu RD, BASE, RD | ||
3465 | | lhu TMP2, OFS_RD(PC) | ||
3466 | | ld TMP0, 0(RD) | ||
3467 | | daddiu PC, PC, 4 | ||
3468 | | gettp TMP0, TMP0 | ||
3469 | | sltiu TMP0, TMP0, LJ_TISTRUECOND | ||
3470 | if (op == BC_IST || op == BC_ISF) { | ||
3471 | | decode_RD4b TMP2 | ||
3472 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | ||
3473 | | addu TMP2, TMP2, TMP3 | ||
3474 | |.if MIPSR6 | ||
3475 | if (op == BC_IST) { | ||
3476 | | selnez TMP2, TMP2, TMP0; | ||
3477 | } else { | ||
3478 | | seleqz TMP2, TMP2, TMP0; | ||
3479 | } | ||
3480 | |.else | ||
3481 | if (op == BC_IST) { | ||
3482 | | movz TMP2, r0, TMP0 | ||
3483 | } else { | ||
3484 | | movn TMP2, r0, TMP0 | ||
3485 | } | ||
3486 | |.endif | ||
3487 | | daddu PC, PC, TMP2 | ||
3488 | } else { | ||
3489 | | ld CRET1, 0(RD) | ||
3490 | if (op == BC_ISTC) { | ||
3491 | | beqz TMP0, >1 | ||
3492 | } else { | ||
3493 | | bnez TMP0, >1 | ||
3494 | } | ||
3495 | |. daddu RA, BASE, RA | ||
3496 | | decode_RD4b TMP2 | ||
3497 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | ||
3498 | | addu TMP2, TMP2, TMP3 | ||
3499 | | sd CRET1, 0(RA) | ||
3500 | | daddu PC, PC, TMP2 | ||
3501 | |1: | ||
3502 | } | ||
3503 | | ins_next | ||
3504 | break; | ||
3505 | |||
3506 | case BC_ISTYPE: | ||
3507 | | // RA = src*8, RD = -type*8 | ||
3508 | | daddu TMP2, BASE, RA | ||
3509 | | srl TMP1, RD, 3 | ||
3510 | | ld TMP0, 0(TMP2) | ||
3511 | | ins_next1 | ||
3512 | | gettp TMP0, TMP0 | ||
3513 | | daddu AT, TMP0, TMP1 | ||
3514 | | bnez AT, ->vmeta_istype | ||
3515 | |. ins_next2 | ||
3516 | break; | ||
3517 | case BC_ISNUM: | ||
3518 | | // RA = src*8, RD = -(TISNUM-1)*8 | ||
3519 | | daddu TMP2, BASE, RA | ||
3520 | | ld TMP0, 0(TMP2) | ||
3521 | | ins_next1 | ||
3522 | | checknum TMP0, ->vmeta_istype | ||
3523 | |. ins_next2 | ||
3524 | break; | ||
3525 | |||
3526 | /* -- Unary ops --------------------------------------------------------- */ | ||
3527 | |||
3528 | case BC_MOV: | ||
3529 | | // RA = dst*8, RD = src*8 | ||
3530 | | daddu RD, BASE, RD | ||
3531 | | daddu RA, BASE, RA | ||
3532 | | ld CRET1, 0(RD) | ||
3533 | | ins_next1 | ||
3534 | | sd CRET1, 0(RA) | ||
3535 | | ins_next2 | ||
3536 | break; | ||
3537 | case BC_NOT: | ||
3538 | | // RA = dst*8, RD = src*8 | ||
3539 | | daddu RD, BASE, RD | ||
3540 | | daddu RA, BASE, RA | ||
3541 | | ld TMP0, 0(RD) | ||
3542 | | li AT, LJ_TTRUE | ||
3543 | | gettp TMP0, TMP0 | ||
3544 | | sltu TMP0, AT, TMP0 | ||
3545 | | addiu TMP0, TMP0, 1 | ||
3546 | | dsll TMP0, TMP0, 47 | ||
3547 | | not TMP0, TMP0 | ||
3548 | | ins_next1 | ||
3549 | | sd TMP0, 0(RA) | ||
3550 | | ins_next2 | ||
3551 | break; | ||
3552 | case BC_UNM: | ||
3553 | | // RA = dst*8, RD = src*8 | ||
3554 | | daddu RB, BASE, RD | ||
3555 | | ld CARG1, 0(RB) | ||
3556 | | daddu RA, BASE, RA | ||
3557 | | gettp CARG3, CARG1 | ||
3558 | | bne CARG3, TISNUM, >2 | ||
3559 | |. lui TMP1, 0x8000 | ||
3560 | | sextw CARG1, CARG1 | ||
3561 | | beq CARG1, TMP1, ->vmeta_unm // Meta handler deals with -2^31. | ||
3562 | |. negu CARG1, CARG1 | ||
3563 | | zextw CARG1, CARG1 | ||
3564 | | settp CARG1, TISNUM | ||
3565 | |1: | ||
3566 | | ins_next1 | ||
3567 | | sd CARG1, 0(RA) | ||
3568 | | ins_next2 | ||
3569 | |2: | ||
3570 | | sltiu AT, CARG3, LJ_TISNUM | ||
3571 | | beqz AT, ->vmeta_unm | ||
3572 | |. dsll TMP1, TMP1, 32 | ||
3573 | | b <1 | ||
3574 | |. xor CARG1, CARG1, TMP1 | ||
3575 | break; | ||
3576 | case BC_LEN: | ||
3577 | | // RA = dst*8, RD = src*8 | ||
3578 | | daddu CARG2, BASE, RD | ||
3579 | | daddu RA, BASE, RA | ||
3580 | | ld TMP0, 0(CARG2) | ||
3581 | | gettp TMP1, TMP0 | ||
3582 | | daddiu AT, TMP1, -LJ_TSTR | ||
3583 | | bnez AT, >2 | ||
3584 | |. cleartp STR:CARG1, TMP0 | ||
3585 | | lw CRET1, STR:CARG1->len | ||
3586 | |1: | ||
3587 | | settp CRET1, TISNUM | ||
3588 | | ins_next1 | ||
3589 | | sd CRET1, 0(RA) | ||
3590 | | ins_next2 | ||
3591 | |2: | ||
3592 | | daddiu AT, TMP1, -LJ_TTAB | ||
3593 | | bnez AT, ->vmeta_len | ||
3594 | |. nop | ||
3595 | #if LJ_52 | ||
3596 | | ld TAB:TMP2, TAB:CARG1->metatable | ||
3597 | | bnez TAB:TMP2, >9 | ||
3598 | |. nop | ||
3599 | |3: | ||
3600 | #endif | ||
3601 | |->BC_LEN_Z: | ||
3602 | | load_got lj_tab_len | ||
3603 | | call_intern lj_tab_len // (GCtab *t) | ||
3604 | |. nop | ||
3605 | | // Returns uint32_t (but less than 2^31). | ||
3606 | | b <1 | ||
3607 | |. nop | ||
3608 | #if LJ_52 | ||
3609 | |9: | ||
3610 | | lbu TMP0, TAB:TMP2->nomm | ||
3611 | | andi TMP0, TMP0, 1<<MM_len | ||
3612 | | bnez TMP0, <3 // 'no __len' flag set: done. | ||
3613 | |. nop | ||
3614 | | b ->vmeta_len | ||
3615 | |. nop | ||
3616 | #endif | ||
3617 | break; | ||
3618 | |||
3619 | /* -- Binary ops -------------------------------------------------------- */ | ||
3620 | |||
3621 | |.macro fpmod, a, b, c | ||
3622 | | bal ->vm_floor // floor(b/c) | ||
3623 | |. div.d FARG1, b, c | ||
3624 | | mul.d a, FRET1, c | ||
3625 | | sub.d a, b, a // b - floor(b/c)*c | ||
3626 | |.endmacro | ||
3627 | |||
3628 | |.macro sfpmod | ||
3629 | | daddiu sp, sp, -16 | ||
3630 | | | ||
3631 | | load_got __divdf3 | ||
3632 | | sd CARG1, 0(sp) | ||
3633 | | call_extern | ||
3634 | |. sd CARG2, 8(sp) | ||
3635 | | | ||
3636 | | load_got floor | ||
3637 | | call_extern | ||
3638 | |. move CARG1, CRET1 | ||
3639 | | | ||
3640 | | load_got __muldf3 | ||
3641 | | move CARG1, CRET1 | ||
3642 | | call_extern | ||
3643 | |. ld CARG2, 8(sp) | ||
3644 | | | ||
3645 | | load_got __subdf3 | ||
3646 | | ld CARG1, 0(sp) | ||
3647 | | call_extern | ||
3648 | |. move CARG2, CRET1 | ||
3649 | | | ||
3650 | | daddiu sp, sp, 16 | ||
3651 | |.endmacro | ||
3652 | |||
3653 | |.macro ins_arithpre, label | ||
3654 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); | ||
3655 | | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 | ||
3656 | ||switch (vk) { | ||
3657 | ||case 0: | ||
3658 | | decode_RB8a RB, INS | ||
3659 | | decode_RB8b RB | ||
3660 | | decode_RDtoRC8 RC, RD | ||
3661 | | // RA = dst*8, RB = src1*8, RC = num_const*8 | ||
3662 | | daddu RB, BASE, RB | ||
3663 | |.if "label" ~= "none" | ||
3664 | | b label | ||
3665 | |.endif | ||
3666 | |. daddu RC, KBASE, RC | ||
3667 | || break; | ||
3668 | ||case 1: | ||
3669 | | decode_RB8a RC, INS | ||
3670 | | decode_RB8b RC | ||
3671 | | decode_RDtoRC8 RB, RD | ||
3672 | | // RA = dst*8, RB = num_const*8, RC = src1*8 | ||
3673 | | daddu RC, BASE, RC | ||
3674 | |.if "label" ~= "none" | ||
3675 | | b label | ||
3676 | |.endif | ||
3677 | |. daddu RB, KBASE, RB | ||
3678 | || break; | ||
3679 | ||default: | ||
3680 | | decode_RB8a RB, INS | ||
3681 | | decode_RB8b RB | ||
3682 | | decode_RDtoRC8 RC, RD | ||
3683 | | // RA = dst*8, RB = src1*8, RC = src2*8 | ||
3684 | | daddu RB, BASE, RB | ||
3685 | |.if "label" ~= "none" | ||
3686 | | b label | ||
3687 | |.endif | ||
3688 | |. daddu RC, BASE, RC | ||
3689 | || break; | ||
3690 | ||} | ||
3691 | |.endmacro | ||
3692 | | | ||
3693 | |.macro ins_arith, intins, fpins, fpcall, label | ||
3694 | | ins_arithpre none | ||
3695 | | | ||
3696 | |.if "label" ~= "none" | ||
3697 | |label: | ||
3698 | |.endif | ||
3699 | | | ||
3700 | |// Used in 5. | ||
3701 | | ld CARG1, 0(RB) | ||
3702 | | ld CARG2, 0(RC) | ||
3703 | | gettp TMP0, CARG1 | ||
3704 | | gettp TMP1, CARG2 | ||
3705 | | | ||
3706 | |.if "intins" ~= "div" | ||
3707 | | | ||
3708 | | // Check for two integers. | ||
3709 | | sextw CARG3, CARG1 | ||
3710 | | bne TMP0, TISNUM, >5 | ||
3711 | |. sextw CARG4, CARG2 | ||
3712 | | bne TMP1, TISNUM, >5 | ||
3713 | | | ||
3714 | |.if "intins" == "addu" | ||
3715 | |. intins CRET1, CARG3, CARG4 | ||
3716 | | xor TMP1, CRET1, CARG3 // ((y^a) & (y^b)) < 0: overflow. | ||
3717 | | xor TMP2, CRET1, CARG4 | ||
3718 | | and TMP1, TMP1, TMP2 | ||
3719 | | bltz TMP1, ->vmeta_arith | ||
3720 | |. daddu RA, BASE, RA | ||
3721 | |.elif "intins" == "subu" | ||
3722 | |. intins CRET1, CARG3, CARG4 | ||
3723 | | xor TMP1, CRET1, CARG3 // ((y^a) & (a^b)) < 0: overflow. | ||
3724 | | xor TMP2, CARG3, CARG4 | ||
3725 | | and TMP1, TMP1, TMP2 | ||
3726 | | bltz TMP1, ->vmeta_arith | ||
3727 | |. daddu RA, BASE, RA | ||
3728 | |.elif "intins" == "mult" | ||
3729 | |.if MIPSR6 | ||
3730 | |. nop | ||
3731 | | mul CRET1, CARG3, CARG4 | ||
3732 | | muh TMP2, CARG3, CARG4 | ||
3733 | |.else | ||
3734 | |. intins CARG3, CARG4 | ||
3735 | | mflo CRET1 | ||
3736 | | mfhi TMP2 | ||
3737 | |.endif | ||
3738 | | sra TMP1, CRET1, 31 | ||
3739 | | bne TMP1, TMP2, ->vmeta_arith | ||
3740 | |. daddu RA, BASE, RA | ||
3741 | |.else | ||
3742 | |. load_got lj_vm_modi | ||
3743 | | beqz CARG4, ->vmeta_arith | ||
3744 | |. daddu RA, BASE, RA | ||
3745 | | move CARG1, CARG3 | ||
3746 | | call_extern | ||
3747 | |. move CARG2, CARG4 | ||
3748 | |.endif | ||
3749 | | | ||
3750 | | zextw CRET1, CRET1 | ||
3751 | | settp CRET1, TISNUM | ||
3752 | | ins_next1 | ||
3753 | | sd CRET1, 0(RA) | ||
3754 | |3: | ||
3755 | | ins_next2 | ||
3756 | | | ||
3757 | |.endif | ||
3758 | | | ||
3759 | |5: // Check for two numbers. | ||
3760 | | .FPU ldc1 FTMP0, 0(RB) | ||
3761 | | sltu AT, TMP0, TISNUM | ||
3762 | | sltu TMP0, TMP1, TISNUM | ||
3763 | | .FPU ldc1 FTMP2, 0(RC) | ||
3764 | | and AT, AT, TMP0 | ||
3765 | | beqz AT, ->vmeta_arith | ||
3766 | |. daddu RA, BASE, RA | ||
3767 | | | ||
3768 | |.if FPU | ||
3769 | | fpins FRET1, FTMP0, FTMP2 | ||
3770 | |.elif "fpcall" == "sfpmod" | ||
3771 | | sfpmod | ||
3772 | |.else | ||
3773 | | load_got fpcall | ||
3774 | | call_extern | ||
3775 | |. nop | ||
3776 | |.endif | ||
3777 | | | ||
3778 | | ins_next1 | ||
3779 | |.if "intins" ~= "div" | ||
3780 | | b <3 | ||
3781 | |.endif | ||
3782 | |.if FPU | ||
3783 | |. sdc1 FRET1, 0(RA) | ||
3784 | |.else | ||
3785 | |. sd CRET1, 0(RA) | ||
3786 | |.endif | ||
3787 | |.if "intins" == "div" | ||
3788 | | ins_next2 | ||
3789 | |.endif | ||
3790 | | | ||
3791 | |.endmacro | ||
3792 | |||
3793 | case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: | ||
3794 | | ins_arith addu, add.d, __adddf3, none | ||
3795 | break; | ||
3796 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: | ||
3797 | | ins_arith subu, sub.d, __subdf3, none | ||
3798 | break; | ||
3799 | case BC_MULVN: case BC_MULNV: case BC_MULVV: | ||
3800 | | ins_arith mult, mul.d, __muldf3, none | ||
3801 | break; | ||
3802 | case BC_DIVVN: | ||
3803 | | ins_arith div, div.d, __divdf3, ->BC_DIVVN_Z | ||
3804 | break; | ||
3805 | case BC_DIVNV: case BC_DIVVV: | ||
3806 | | ins_arithpre ->BC_DIVVN_Z | ||
3807 | break; | ||
3808 | case BC_MODVN: | ||
3809 | | ins_arith modi, fpmod, sfpmod, ->BC_MODVN_Z | ||
3810 | break; | ||
3811 | case BC_MODNV: case BC_MODVV: | ||
3812 | | ins_arithpre ->BC_MODVN_Z | ||
3813 | break; | ||
3814 | case BC_POW: | ||
3815 | | ins_arithpre none | ||
3816 | | ld CARG1, 0(RB) | ||
3817 | | ld CARG2, 0(RC) | ||
3818 | | gettp TMP0, CARG1 | ||
3819 | | gettp TMP1, CARG2 | ||
3820 | | sltiu TMP0, TMP0, LJ_TISNUM | ||
3821 | | sltiu TMP1, TMP1, LJ_TISNUM | ||
3822 | | and AT, TMP0, TMP1 | ||
3823 | | load_got pow | ||
3824 | | beqz AT, ->vmeta_arith | ||
3825 | |. daddu RA, BASE, RA | ||
3826 | |.if FPU | ||
3827 | | ldc1 FARG1, 0(RB) | ||
3828 | | ldc1 FARG2, 0(RC) | ||
3829 | |.endif | ||
3830 | | call_extern | ||
3831 | |. nop | ||
3832 | | ins_next1 | ||
3833 | |.if FPU | ||
3834 | | sdc1 FRET1, 0(RA) | ||
3835 | |.else | ||
3836 | | sd CRET1, 0(RA) | ||
3837 | |.endif | ||
3838 | | ins_next2 | ||
3839 | break; | ||
3840 | |||
3841 | case BC_CAT: | ||
3842 | | // RA = dst*8, RB = src_start*8, RC = src_end*8 | ||
3843 | | decode_RB8a RB, INS | ||
3844 | | decode_RB8b RB | ||
3845 | | decode_RDtoRC8 RC, RD | ||
3846 | | dsubu CARG3, RC, RB | ||
3847 | | sd BASE, L->base | ||
3848 | | daddu CARG2, BASE, RC | ||
3849 | | move MULTRES, RB | ||
3850 | |->BC_CAT_Z: | ||
3851 | | load_got lj_meta_cat | ||
3852 | | srl CARG3, CARG3, 3 | ||
3853 | | sd PC, SAVE_PC | ||
3854 | | call_intern lj_meta_cat // (lua_State *L, TValue *top, int left) | ||
3855 | |. move CARG1, L | ||
3856 | | // Returns NULL (finished) or TValue * (metamethod). | ||
3857 | | bnez CRET1, ->vmeta_binop | ||
3858 | |. ld BASE, L->base | ||
3859 | | daddu RB, BASE, MULTRES | ||
3860 | | ld CRET1, 0(RB) | ||
3861 | | daddu RA, BASE, RA | ||
3862 | | ins_next1 | ||
3863 | | sd CRET1, 0(RA) | ||
3864 | | ins_next2 | ||
3865 | break; | ||
3866 | |||
3867 | /* -- Constant ops ------------------------------------------------------ */ | ||
3868 | |||
3869 | case BC_KSTR: | ||
3870 | | // RA = dst*8, RD = str_const*8 (~) | ||
3871 | | dsubu TMP1, KBASE, RD | ||
3872 | | ins_next1 | ||
3873 | | li TMP2, LJ_TSTR | ||
3874 | | ld TMP0, -8(TMP1) // KBASE-8-str_const*8 | ||
3875 | | daddu RA, BASE, RA | ||
3876 | | settp TMP0, TMP2 | ||
3877 | | sd TMP0, 0(RA) | ||
3878 | | ins_next2 | ||
3879 | break; | ||
3880 | case BC_KCDATA: | ||
3881 | |.if FFI | ||
3882 | | // RA = dst*8, RD = cdata_const*8 (~) | ||
3883 | | dsubu TMP1, KBASE, RD | ||
3884 | | ins_next1 | ||
3885 | | ld TMP0, -8(TMP1) // KBASE-8-cdata_const*8 | ||
3886 | | li TMP2, LJ_TCDATA | ||
3887 | | daddu RA, BASE, RA | ||
3888 | | settp TMP0, TMP2 | ||
3889 | | sd TMP0, 0(RA) | ||
3890 | | ins_next2 | ||
3891 | |.endif | ||
3892 | break; | ||
3893 | case BC_KSHORT: | ||
3894 | | // RA = dst*8, RD = int16_literal*8 | ||
3895 | | sra RD, INS, 16 | ||
3896 | | daddu RA, BASE, RA | ||
3897 | | zextw RD, RD | ||
3898 | | ins_next1 | ||
3899 | | settp RD, TISNUM | ||
3900 | | sd RD, 0(RA) | ||
3901 | | ins_next2 | ||
3902 | break; | ||
3903 | case BC_KNUM: | ||
3904 | | // RA = dst*8, RD = num_const*8 | ||
3905 | | daddu RD, KBASE, RD | ||
3906 | | daddu RA, BASE, RA | ||
3907 | | ld CRET1, 0(RD) | ||
3908 | | ins_next1 | ||
3909 | | sd CRET1, 0(RA) | ||
3910 | | ins_next2 | ||
3911 | break; | ||
3912 | case BC_KPRI: | ||
3913 | | // RA = dst*8, RD = primitive_type*8 (~) | ||
3914 | | daddu RA, BASE, RA | ||
3915 | | dsll TMP0, RD, 44 | ||
3916 | | not TMP0, TMP0 | ||
3917 | | ins_next1 | ||
3918 | | sd TMP0, 0(RA) | ||
3919 | | ins_next2 | ||
3920 | break; | ||
3921 | case BC_KNIL: | ||
3922 | | // RA = base*8, RD = end*8 | ||
3923 | | daddu RA, BASE, RA | ||
3924 | | sd TISNIL, 0(RA) | ||
3925 | | daddiu RA, RA, 8 | ||
3926 | | daddu RD, BASE, RD | ||
3927 | |1: | ||
3928 | | sd TISNIL, 0(RA) | ||
3929 | | slt AT, RA, RD | ||
3930 | | bnez AT, <1 | ||
3931 | |. daddiu RA, RA, 8 | ||
3932 | | ins_next_ | ||
3933 | break; | ||
3934 | |||
3935 | /* -- Upvalue and function ops ------------------------------------------ */ | ||
3936 | |||
3937 | case BC_UGET: | ||
3938 | | // RA = dst*8, RD = uvnum*8 | ||
3939 | | ld LFUNC:RB, FRAME_FUNC(BASE) | ||
3940 | | daddu RA, BASE, RA | ||
3941 | | cleartp LFUNC:RB | ||
3942 | | daddu RD, RD, LFUNC:RB | ||
3943 | | ld UPVAL:RB, LFUNC:RD->uvptr | ||
3944 | | ins_next1 | ||
3945 | | ld TMP1, UPVAL:RB->v | ||
3946 | | ld CRET1, 0(TMP1) | ||
3947 | | sd CRET1, 0(RA) | ||
3948 | | ins_next2 | ||
3949 | break; | ||
3950 | case BC_USETV: | ||
3951 | | // RA = uvnum*8, RD = src*8 | ||
3952 | | ld LFUNC:RB, FRAME_FUNC(BASE) | ||
3953 | | daddu RD, BASE, RD | ||
3954 | | cleartp LFUNC:RB | ||
3955 | | daddu RA, RA, LFUNC:RB | ||
3956 | | ld UPVAL:RB, LFUNC:RA->uvptr | ||
3957 | | ld CRET1, 0(RD) | ||
3958 | | lbu TMP3, UPVAL:RB->marked | ||
3959 | | ld CARG2, UPVAL:RB->v | ||
3960 | | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) | ||
3961 | | lbu TMP0, UPVAL:RB->closed | ||
3962 | | gettp TMP2, CRET1 | ||
3963 | | sd CRET1, 0(CARG2) | ||
3964 | | li AT, LJ_GC_BLACK|1 | ||
3965 | | or TMP3, TMP3, TMP0 | ||
3966 | | beq TMP3, AT, >2 // Upvalue is closed and black? | ||
3967 | |. daddiu TMP2, TMP2, -(LJ_TNUMX+1) | ||
3968 | |1: | ||
3969 | | ins_next | ||
3970 | | | ||
3971 | |2: // Check if new value is collectable. | ||
3972 | | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1) | ||
3973 | | beqz AT, <1 // tvisgcv(v) | ||
3974 | |. cleartp GCOBJ:CRET1, CRET1 | ||
3975 | | lbu TMP3, GCOBJ:CRET1->gch.marked | ||
3976 | | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v) | ||
3977 | | beqz TMP3, <1 | ||
3978 | |. load_got lj_gc_barrieruv | ||
3979 | | // Crossed a write barrier. Move the barrier forward. | ||
3980 | | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv) | ||
3981 | |. daddiu CARG1, DISPATCH, GG_DISP2G | ||
3982 | | b <1 | ||
3983 | |. nop | ||
3984 | break; | ||
3985 | case BC_USETS: | ||
3986 | | // RA = uvnum*8, RD = str_const*8 (~) | ||
3987 | | ld LFUNC:RB, FRAME_FUNC(BASE) | ||
3988 | | dsubu TMP1, KBASE, RD | ||
3989 | | cleartp LFUNC:RB | ||
3990 | | daddu RA, RA, LFUNC:RB | ||
3991 | | ld UPVAL:RB, LFUNC:RA->uvptr | ||
3992 | | ld STR:TMP1, -8(TMP1) // KBASE-8-str_const*8 | ||
3993 | | lbu TMP2, UPVAL:RB->marked | ||
3994 | | ld CARG2, UPVAL:RB->v | ||
3995 | | lbu TMP3, STR:TMP1->marked | ||
3996 | | andi AT, TMP2, LJ_GC_BLACK // isblack(uv) | ||
3997 | | lbu TMP2, UPVAL:RB->closed | ||
3998 | | li TMP0, LJ_TSTR | ||
3999 | | settp TMP1, TMP0 | ||
4000 | | bnez AT, >2 | ||
4001 | |. sd TMP1, 0(CARG2) | ||
4002 | |1: | ||
4003 | | ins_next | ||
4004 | | | ||
4005 | |2: // Check if string is white and ensure upvalue is closed. | ||
4006 | | beqz TMP2, <1 | ||
4007 | |. andi AT, TMP3, LJ_GC_WHITES // iswhite(str) | ||
4008 | | beqz AT, <1 | ||
4009 | |. load_got lj_gc_barrieruv | ||
4010 | | // Crossed a write barrier. Move the barrier forward. | ||
4011 | | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv) | ||
4012 | |. daddiu CARG1, DISPATCH, GG_DISP2G | ||
4013 | | b <1 | ||
4014 | |. nop | ||
4015 | break; | ||
4016 | case BC_USETN: | ||
4017 | | // RA = uvnum*8, RD = num_const*8 | ||
4018 | | ld LFUNC:RB, FRAME_FUNC(BASE) | ||
4019 | | daddu RD, KBASE, RD | ||
4020 | | cleartp LFUNC:RB | ||
4021 | | daddu RA, RA, LFUNC:RB | ||
4022 | | ld UPVAL:RB, LFUNC:RA->uvptr | ||
4023 | | ld CRET1, 0(RD) | ||
4024 | | ld TMP1, UPVAL:RB->v | ||
4025 | | ins_next1 | ||
4026 | | sd CRET1, 0(TMP1) | ||
4027 | | ins_next2 | ||
4028 | break; | ||
4029 | case BC_USETP: | ||
4030 | | // RA = uvnum*8, RD = primitive_type*8 (~) | ||
4031 | | ld LFUNC:RB, FRAME_FUNC(BASE) | ||
4032 | | dsll TMP0, RD, 44 | ||
4033 | | cleartp LFUNC:RB | ||
4034 | | daddu RA, RA, LFUNC:RB | ||
4035 | | not TMP0, TMP0 | ||
4036 | | ld UPVAL:RB, LFUNC:RA->uvptr | ||
4037 | | ins_next1 | ||
4038 | | ld TMP1, UPVAL:RB->v | ||
4039 | | sd TMP0, 0(TMP1) | ||
4040 | | ins_next2 | ||
4041 | break; | ||
4042 | |||
4043 | case BC_UCLO: | ||
4044 | | // RA = level*8, RD = target | ||
4045 | | ld TMP2, L->openupval | ||
4046 | | branch_RD // Do this first since RD is not saved. | ||
4047 | | load_got lj_func_closeuv | ||
4048 | | sd BASE, L->base | ||
4049 | | beqz TMP2, >1 | ||
4050 | |. move CARG1, L | ||
4051 | | call_intern lj_func_closeuv // (lua_State *L, TValue *level) | ||
4052 | |. daddu CARG2, BASE, RA | ||
4053 | | ld BASE, L->base | ||
4054 | |1: | ||
4055 | | ins_next | ||
4056 | break; | ||
4057 | |||
4058 | case BC_FNEW: | ||
4059 | | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype) | ||
4060 | | load_got lj_func_newL_gc | ||
4061 | | dsubu TMP1, KBASE, RD | ||
4062 | | ld CARG3, FRAME_FUNC(BASE) | ||
4063 | | ld CARG2, -8(TMP1) // KBASE-8-tab_const*8 | ||
4064 | | sd BASE, L->base | ||
4065 | | sd PC, SAVE_PC | ||
4066 | | cleartp CARG3 | ||
4067 | | // (lua_State *L, GCproto *pt, GCfuncL *parent) | ||
4068 | | call_intern lj_func_newL_gc | ||
4069 | |. move CARG1, L | ||
4070 | | // Returns GCfuncL *. | ||
4071 | | li TMP0, LJ_TFUNC | ||
4072 | | ld BASE, L->base | ||
4073 | | ins_next1 | ||
4074 | | settp CRET1, TMP0 | ||
4075 | | daddu RA, BASE, RA | ||
4076 | | sd CRET1, 0(RA) | ||
4077 | | ins_next2 | ||
4078 | break; | ||
4079 | |||
4080 | /* -- Table ops --------------------------------------------------------- */ | ||
4081 | |||
4082 | case BC_TNEW: | ||
4083 | case BC_TDUP: | ||
4084 | | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~) | ||
4085 | | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH) | ||
4086 | | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) | ||
4087 | | sd BASE, L->base | ||
4088 | | sd PC, SAVE_PC | ||
4089 | | sltu AT, TMP0, TMP1 | ||
4090 | | beqz AT, >5 | ||
4091 | |1: | ||
4092 | if (op == BC_TNEW) { | ||
4093 | | load_got lj_tab_new | ||
4094 | | srl CARG2, RD, 3 | ||
4095 | | andi CARG2, CARG2, 0x7ff | ||
4096 | | li TMP0, 0x801 | ||
4097 | | addiu AT, CARG2, -0x7ff | ||
4098 | | srl CARG3, RD, 14 | ||
4099 | |.if MIPSR6 | ||
4100 | | seleqz TMP0, TMP0, AT | ||
4101 | | selnez CARG2, CARG2, AT | ||
4102 | | or CARG2, CARG2, TMP0 | ||
4103 | |.else | ||
4104 | | movz CARG2, TMP0, AT | ||
4105 | |.endif | ||
4106 | | // (lua_State *L, int32_t asize, uint32_t hbits) | ||
4107 | | call_intern lj_tab_new | ||
4108 | |. move CARG1, L | ||
4109 | | // Returns Table *. | ||
4110 | } else { | ||
4111 | | load_got lj_tab_dup | ||
4112 | | dsubu TMP1, KBASE, RD | ||
4113 | | move CARG1, L | ||
4114 | | call_intern lj_tab_dup // (lua_State *L, Table *kt) | ||
4115 | |. ld CARG2, -8(TMP1) // KBASE-8-str_const*8 | ||
4116 | | // Returns Table *. | ||
4117 | } | ||
4118 | | li TMP0, LJ_TTAB | ||
4119 | | ld BASE, L->base | ||
4120 | | ins_next1 | ||
4121 | | daddu RA, BASE, RA | ||
4122 | | settp CRET1, TMP0 | ||
4123 | | sd CRET1, 0(RA) | ||
4124 | | ins_next2 | ||
4125 | |5: | ||
4126 | | load_got lj_gc_step_fixtop | ||
4127 | | move MULTRES, RD | ||
4128 | | call_intern lj_gc_step_fixtop // (lua_State *L) | ||
4129 | |. move CARG1, L | ||
4130 | | b <1 | ||
4131 | |. move RD, MULTRES | ||
4132 | break; | ||
4133 | |||
4134 | case BC_GGET: | ||
4135 | | // RA = dst*8, RD = str_const*8 (~) | ||
4136 | case BC_GSET: | ||
4137 | | // RA = src*8, RD = str_const*8 (~) | ||
4138 | | ld LFUNC:TMP2, FRAME_FUNC(BASE) | ||
4139 | | dsubu TMP1, KBASE, RD | ||
4140 | | ld STR:RC, -8(TMP1) // KBASE-8-str_const*8 | ||
4141 | | cleartp LFUNC:TMP2 | ||
4142 | | ld TAB:RB, LFUNC:TMP2->env | ||
4143 | if (op == BC_GGET) { | ||
4144 | | b ->BC_TGETS_Z | ||
4145 | } else { | ||
4146 | | b ->BC_TSETS_Z | ||
4147 | } | ||
4148 | |. daddu RA, BASE, RA | ||
4149 | break; | ||
4150 | |||
4151 | case BC_TGETV: | ||
4152 | | // RA = dst*8, RB = table*8, RC = key*8 | ||
4153 | | decode_RB8a RB, INS | ||
4154 | | decode_RB8b RB | ||
4155 | | decode_RDtoRC8 RC, RD | ||
4156 | | daddu CARG2, BASE, RB | ||
4157 | | daddu CARG3, BASE, RC | ||
4158 | | ld TAB:RB, 0(CARG2) | ||
4159 | | ld TMP2, 0(CARG3) | ||
4160 | | daddu RA, BASE, RA | ||
4161 | | checktab TAB:RB, ->vmeta_tgetv | ||
4162 | | gettp TMP3, TMP2 | ||
4163 | | bne TMP3, TISNUM, >5 // Integer key? | ||
4164 | |. lw TMP0, TAB:RB->asize | ||
4165 | | sextw TMP2, TMP2 | ||
4166 | | ld TMP1, TAB:RB->array | ||
4167 | | sltu AT, TMP2, TMP0 | ||
4168 | | sll TMP2, TMP2, 3 | ||
4169 | | beqz AT, ->vmeta_tgetv // Integer key and in array part? | ||
4170 | |. daddu TMP2, TMP1, TMP2 | ||
4171 | | ld AT, 0(TMP2) | ||
4172 | | beq AT, TISNIL, >2 | ||
4173 | |. ld CRET1, 0(TMP2) | ||
4174 | |1: | ||
4175 | | ins_next1 | ||
4176 | | sd CRET1, 0(RA) | ||
4177 | | ins_next2 | ||
4178 | | | ||
4179 | |2: // Check for __index if table value is nil. | ||
4180 | | ld TAB:TMP2, TAB:RB->metatable | ||
4181 | | beqz TAB:TMP2, <1 // No metatable: done. | ||
4182 | |. nop | ||
4183 | | lbu TMP0, TAB:TMP2->nomm | ||
4184 | | andi TMP0, TMP0, 1<<MM_index | ||
4185 | | bnez TMP0, <1 // 'no __index' flag set: done. | ||
4186 | |. nop | ||
4187 | | b ->vmeta_tgetv | ||
4188 | |. nop | ||
4189 | | | ||
4190 | |5: | ||
4191 | | li AT, LJ_TSTR | ||
4192 | | bne TMP3, AT, ->vmeta_tgetv | ||
4193 | |. cleartp RC, TMP2 | ||
4194 | | b ->BC_TGETS_Z // String key? | ||
4195 | |. nop | ||
4196 | break; | ||
4197 | case BC_TGETS: | ||
4198 | | // RA = dst*8, RB = table*8, RC = str_const*8 (~) | ||
4199 | | decode_RB8a RB, INS | ||
4200 | | decode_RB8b RB | ||
4201 | | decode_RC8a RC, INS | ||
4202 | | daddu CARG2, BASE, RB | ||
4203 | | decode_RC8b RC | ||
4204 | | ld TAB:RB, 0(CARG2) | ||
4205 | | dsubu CARG3, KBASE, RC | ||
4206 | | daddu RA, BASE, RA | ||
4207 | | ld STR:RC, -8(CARG3) // KBASE-8-str_const*8 | ||
4208 | | checktab TAB:RB, ->vmeta_tgets1 | ||
4209 | |->BC_TGETS_Z: | ||
4210 | | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 | ||
4211 | | lw TMP0, TAB:RB->hmask | ||
4212 | | lw TMP1, STR:RC->hash | ||
4213 | | ld NODE:TMP2, TAB:RB->node | ||
4214 | | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask | ||
4215 | | sll TMP0, TMP1, 5 | ||
4216 | | sll TMP1, TMP1, 3 | ||
4217 | | subu TMP1, TMP0, TMP1 | ||
4218 | | li TMP3, LJ_TSTR | ||
4219 | | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) | ||
4220 | | settp STR:RC, TMP3 // Tagged key to look for. | ||
4221 | |1: | ||
4222 | | ld CARG1, NODE:TMP2->key | ||
4223 | | ld CRET1, NODE:TMP2->val | ||
4224 | | ld NODE:TMP1, NODE:TMP2->next | ||
4225 | | bne CARG1, RC, >4 | ||
4226 | |. ld TAB:TMP3, TAB:RB->metatable | ||
4227 | | beq CRET1, TISNIL, >5 // Key found, but nil value? | ||
4228 | |. nop | ||
4229 | |3: | ||
4230 | | ins_next1 | ||
4231 | | sd CRET1, 0(RA) | ||
4232 | | ins_next2 | ||
4233 | | | ||
4234 | |4: // Follow hash chain. | ||
4235 | | bnez NODE:TMP1, <1 | ||
4236 | |. move NODE:TMP2, NODE:TMP1 | ||
4237 | | // End of hash chain: key not found, nil result. | ||
4238 | | | ||
4239 | |5: // Check for __index if table value is nil. | ||
4240 | | beqz TAB:TMP3, <3 // No metatable: done. | ||
4241 | |. move CRET1, TISNIL | ||
4242 | | lbu TMP0, TAB:TMP3->nomm | ||
4243 | | andi TMP0, TMP0, 1<<MM_index | ||
4244 | | bnez TMP0, <3 // 'no __index' flag set: done. | ||
4245 | |. nop | ||
4246 | | b ->vmeta_tgets | ||
4247 | |. nop | ||
4248 | break; | ||
4249 | case BC_TGETB: | ||
4250 | | // RA = dst*8, RB = table*8, RC = index*8 | ||
4251 | | decode_RB8a RB, INS | ||
4252 | | decode_RB8b RB | ||
4253 | | daddu CARG2, BASE, RB | ||
4254 | | decode_RDtoRC8 RC, RD | ||
4255 | | ld TAB:RB, 0(CARG2) | ||
4256 | | daddu RA, BASE, RA | ||
4257 | | srl TMP0, RC, 3 | ||
4258 | | checktab TAB:RB, ->vmeta_tgetb | ||
4259 | | lw TMP1, TAB:RB->asize | ||
4260 | | ld TMP2, TAB:RB->array | ||
4261 | | sltu AT, TMP0, TMP1 | ||
4262 | | beqz AT, ->vmeta_tgetb | ||
4263 | |. daddu RC, TMP2, RC | ||
4264 | | ld AT, 0(RC) | ||
4265 | | beq AT, TISNIL, >5 | ||
4266 | |. ld CRET1, 0(RC) | ||
4267 | |1: | ||
4268 | | ins_next1 | ||
4269 | | sd CRET1, 0(RA) | ||
4270 | | ins_next2 | ||
4271 | | | ||
4272 | |5: // Check for __index if table value is nil. | ||
4273 | | ld TAB:TMP2, TAB:RB->metatable | ||
4274 | | beqz TAB:TMP2, <1 // No metatable: done. | ||
4275 | |. nop | ||
4276 | | lbu TMP1, TAB:TMP2->nomm | ||
4277 | | andi TMP1, TMP1, 1<<MM_index | ||
4278 | | bnez TMP1, <1 // 'no __index' flag set: done. | ||
4279 | |. nop | ||
4280 | | b ->vmeta_tgetb // Caveat: preserve TMP0 and CARG2! | ||
4281 | |. nop | ||
4282 | break; | ||
4283 | case BC_TGETR: | ||
4284 | | // RA = dst*8, RB = table*8, RC = key*8 | ||
4285 | | decode_RB8a RB, INS | ||
4286 | | decode_RB8b RB | ||
4287 | | decode_RDtoRC8 RC, RD | ||
4288 | | daddu RB, BASE, RB | ||
4289 | | daddu RC, BASE, RC | ||
4290 | | ld TAB:CARG1, 0(RB) | ||
4291 | | lw CARG2, LO(RC) | ||
4292 | | daddu RA, BASE, RA | ||
4293 | | cleartp TAB:CARG1 | ||
4294 | | lw TMP0, TAB:CARG1->asize | ||
4295 | | ld TMP1, TAB:CARG1->array | ||
4296 | | sltu AT, CARG2, TMP0 | ||
4297 | | sll TMP2, CARG2, 3 | ||
4298 | | beqz AT, ->vmeta_tgetr // In array part? | ||
4299 | |. daddu CRET1, TMP1, TMP2 | ||
4300 | | ld CARG2, 0(CRET1) | ||
4301 | |->BC_TGETR_Z: | ||
4302 | | ins_next1 | ||
4303 | | sd CARG2, 0(RA) | ||
4304 | | ins_next2 | ||
4305 | break; | ||
4306 | |||
4307 | case BC_TSETV: | ||
4308 | | // RA = src*8, RB = table*8, RC = key*8 | ||
4309 | | decode_RB8a RB, INS | ||
4310 | | decode_RB8b RB | ||
4311 | | decode_RDtoRC8 RC, RD | ||
4312 | | daddu CARG2, BASE, RB | ||
4313 | | daddu CARG3, BASE, RC | ||
4314 | | ld RB, 0(CARG2) | ||
4315 | | ld TMP2, 0(CARG3) | ||
4316 | | daddu RA, BASE, RA | ||
4317 | | checktab RB, ->vmeta_tsetv | ||
4318 | | checkint TMP2, >5 | ||
4319 | |. sextw RC, TMP2 | ||
4320 | | lw TMP0, TAB:RB->asize | ||
4321 | | ld TMP1, TAB:RB->array | ||
4322 | | sltu AT, RC, TMP0 | ||
4323 | | sll TMP2, RC, 3 | ||
4324 | | beqz AT, ->vmeta_tsetv // Integer key and in array part? | ||
4325 | |. daddu TMP1, TMP1, TMP2 | ||
4326 | | ld TMP0, 0(TMP1) | ||
4327 | | lbu TMP3, TAB:RB->marked | ||
4328 | | beq TMP0, TISNIL, >3 | ||
4329 | |. ld CRET1, 0(RA) | ||
4330 | |1: | ||
4331 | | andi AT, TMP3, LJ_GC_BLACK // isblack(table) | ||
4332 | | bnez AT, >7 | ||
4333 | |. sd CRET1, 0(TMP1) | ||
4334 | |2: | ||
4335 | | ins_next | ||
4336 | | | ||
4337 | |3: // Check for __newindex if previous value is nil. | ||
4338 | | ld TAB:TMP2, TAB:RB->metatable | ||
4339 | | beqz TAB:TMP2, <1 // No metatable: done. | ||
4340 | |. nop | ||
4341 | | lbu TMP2, TAB:TMP2->nomm | ||
4342 | | andi TMP2, TMP2, 1<<MM_newindex | ||
4343 | | bnez TMP2, <1 // 'no __newindex' flag set: done. | ||
4344 | |. nop | ||
4345 | | b ->vmeta_tsetv | ||
4346 | |. nop | ||
4347 | | | ||
4348 | |5: | ||
4349 | | gettp AT, TMP2 | ||
4350 | | daddiu AT, AT, -LJ_TSTR | ||
4351 | | bnez AT, ->vmeta_tsetv | ||
4352 | |. nop | ||
4353 | | b ->BC_TSETS_Z // String key? | ||
4354 | |. cleartp STR:RC, TMP2 | ||
4355 | | | ||
4356 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
4357 | | barrierback TAB:RB, TMP3, TMP0, <2 | ||
4358 | break; | ||
4359 | case BC_TSETS: | ||
4360 | | // RA = src*8, RB = table*8, RC = str_const*8 (~) | ||
4361 | | decode_RB8a RB, INS | ||
4362 | | decode_RB8b RB | ||
4363 | | daddu CARG2, BASE, RB | ||
4364 | | decode_RC8a RC, INS | ||
4365 | | ld TAB:RB, 0(CARG2) | ||
4366 | | decode_RC8b RC | ||
4367 | | dsubu CARG3, KBASE, RC | ||
4368 | | ld RC, -8(CARG3) // KBASE-8-str_const*8 | ||
4369 | | daddu RA, BASE, RA | ||
4370 | | cleartp STR:RC | ||
4371 | | checktab TAB:RB, ->vmeta_tsets1 | ||
4372 | |->BC_TSETS_Z: | ||
4373 | | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 | ||
4374 | | lw TMP0, TAB:RB->hmask | ||
4375 | | lw TMP1, STR:RC->hash | ||
4376 | | ld NODE:TMP2, TAB:RB->node | ||
4377 | | sb r0, TAB:RB->nomm // Clear metamethod cache. | ||
4378 | | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask | ||
4379 | | sll TMP0, TMP1, 5 | ||
4380 | | sll TMP1, TMP1, 3 | ||
4381 | | subu TMP1, TMP0, TMP1 | ||
4382 | | li TMP3, LJ_TSTR | ||
4383 | | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) | ||
4384 | | settp STR:RC, TMP3 // Tagged key to look for. | ||
4385 | |.if FPU | ||
4386 | | ldc1 FTMP0, 0(RA) | ||
4387 | |.else | ||
4388 | | ld CRET1, 0(RA) | ||
4389 | |.endif | ||
4390 | |1: | ||
4391 | | ld TMP0, NODE:TMP2->key | ||
4392 | | ld CARG2, NODE:TMP2->val | ||
4393 | | ld NODE:TMP1, NODE:TMP2->next | ||
4394 | | bne TMP0, RC, >5 | ||
4395 | |. lbu TMP3, TAB:RB->marked | ||
4396 | | beq CARG2, TISNIL, >4 // Key found, but nil value? | ||
4397 | |. ld TAB:TMP0, TAB:RB->metatable | ||
4398 | |2: | ||
4399 | | andi AT, TMP3, LJ_GC_BLACK // isblack(table) | ||
4400 | | bnez AT, >7 | ||
4401 | |.if FPU | ||
4402 | |. sdc1 FTMP0, NODE:TMP2->val | ||
4403 | |.else | ||
4404 | |. sd CRET1, NODE:TMP2->val | ||
4405 | |.endif | ||
4406 | |3: | ||
4407 | | ins_next | ||
4408 | | | ||
4409 | |4: // Check for __newindex if previous value is nil. | ||
4410 | | beqz TAB:TMP0, <2 // No metatable: done. | ||
4411 | |. nop | ||
4412 | | lbu TMP0, TAB:TMP0->nomm | ||
4413 | | andi TMP0, TMP0, 1<<MM_newindex | ||
4414 | | bnez TMP0, <2 // 'no __newindex' flag set: done. | ||
4415 | |. nop | ||
4416 | | b ->vmeta_tsets | ||
4417 | |. nop | ||
4418 | | | ||
4419 | |5: // Follow hash chain. | ||
4420 | | bnez NODE:TMP1, <1 | ||
4421 | |. move NODE:TMP2, NODE:TMP1 | ||
4422 | | // End of hash chain: key not found, add a new one | ||
4423 | | | ||
4424 | | // But check for __newindex first. | ||
4425 | | ld TAB:TMP2, TAB:RB->metatable | ||
4426 | | beqz TAB:TMP2, >6 // No metatable: continue. | ||
4427 | |. daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) | ||
4428 | | lbu TMP0, TAB:TMP2->nomm | ||
4429 | | andi TMP0, TMP0, 1<<MM_newindex | ||
4430 | | beqz TMP0, ->vmeta_tsets // 'no __newindex' flag NOT set: check. | ||
4431 | |6: | ||
4432 | | load_got lj_tab_newkey | ||
4433 | | sd RC, 0(CARG3) | ||
4434 | | sd BASE, L->base | ||
4435 | | move CARG2, TAB:RB | ||
4436 | | sd PC, SAVE_PC | ||
4437 | | call_intern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k | ||
4438 | |. move CARG1, L | ||
4439 | | // Returns TValue *. | ||
4440 | | ld BASE, L->base | ||
4441 | |.if FPU | ||
4442 | | b <3 // No 2nd write barrier needed. | ||
4443 | |. sdc1 FTMP0, 0(CRET1) | ||
4444 | |.else | ||
4445 | | ld CARG1, 0(RA) | ||
4446 | | b <3 // No 2nd write barrier needed. | ||
4447 | |. sd CARG1, 0(CRET1) | ||
4448 | |.endif | ||
4449 | | | ||
4450 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
4451 | | barrierback TAB:RB, TMP3, TMP0, <3 | ||
4452 | break; | ||
4453 | case BC_TSETB: | ||
4454 | | // RA = src*8, RB = table*8, RC = index*8 | ||
4455 | | decode_RB8a RB, INS | ||
4456 | | decode_RB8b RB | ||
4457 | | daddu CARG2, BASE, RB | ||
4458 | | decode_RDtoRC8 RC, RD | ||
4459 | | ld TAB:RB, 0(CARG2) | ||
4460 | | daddu RA, BASE, RA | ||
4461 | | srl TMP0, RC, 3 | ||
4462 | | checktab RB, ->vmeta_tsetb | ||
4463 | | lw TMP1, TAB:RB->asize | ||
4464 | | ld TMP2, TAB:RB->array | ||
4465 | | sltu AT, TMP0, TMP1 | ||
4466 | | beqz AT, ->vmeta_tsetb | ||
4467 | |. daddu RC, TMP2, RC | ||
4468 | | ld TMP1, 0(RC) | ||
4469 | | lbu TMP3, TAB:RB->marked | ||
4470 | | beq TMP1, TISNIL, >5 | ||
4471 | |1: | ||
4472 | |. ld CRET1, 0(RA) | ||
4473 | | andi AT, TMP3, LJ_GC_BLACK // isblack(table) | ||
4474 | | bnez AT, >7 | ||
4475 | |. sd CRET1, 0(RC) | ||
4476 | |2: | ||
4477 | | ins_next | ||
4478 | | | ||
4479 | |5: // Check for __newindex if previous value is nil. | ||
4480 | | ld TAB:TMP2, TAB:RB->metatable | ||
4481 | | beqz TAB:TMP2, <1 // No metatable: done. | ||
4482 | |. nop | ||
4483 | | lbu TMP1, TAB:TMP2->nomm | ||
4484 | | andi TMP1, TMP1, 1<<MM_newindex | ||
4485 | | bnez TMP1, <1 // 'no __newindex' flag set: done. | ||
4486 | |. nop | ||
4487 | | b ->vmeta_tsetb // Caveat: preserve TMP0 and CARG2! | ||
4488 | |. nop | ||
4489 | | | ||
4490 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
4491 | | barrierback TAB:RB, TMP3, TMP0, <2 | ||
4492 | break; | ||
4493 | case BC_TSETR: | ||
4494 | | // RA = dst*8, RB = table*8, RC = key*8 | ||
4495 | | decode_RB8a RB, INS | ||
4496 | | decode_RB8b RB | ||
4497 | | decode_RDtoRC8 RC, RD | ||
4498 | | daddu CARG1, BASE, RB | ||
4499 | | daddu CARG3, BASE, RC | ||
4500 | | ld TAB:CARG2, 0(CARG1) | ||
4501 | | lw CARG3, LO(CARG3) | ||
4502 | | cleartp TAB:CARG2 | ||
4503 | | lbu TMP3, TAB:CARG2->marked | ||
4504 | | lw TMP0, TAB:CARG2->asize | ||
4505 | | ld TMP1, TAB:CARG2->array | ||
4506 | | andi AT, TMP3, LJ_GC_BLACK // isblack(table) | ||
4507 | | bnez AT, >7 | ||
4508 | |. daddu RA, BASE, RA | ||
4509 | |2: | ||
4510 | | sltu AT, CARG3, TMP0 | ||
4511 | | sll TMP2, CARG3, 3 | ||
4512 | | beqz AT, ->vmeta_tsetr // In array part? | ||
4513 | |. daddu CRET1, TMP1, TMP2 | ||
4514 | |->BC_TSETR_Z: | ||
4515 | | ld CARG1, 0(RA) | ||
4516 | | ins_next1 | ||
4517 | | sd CARG1, 0(CRET1) | ||
4518 | | ins_next2 | ||
4519 | | | ||
4520 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
4521 | | barrierback TAB:CARG2, TMP3, CRET1, <2 | ||
4522 | break; | ||
4523 | |||
4524 | case BC_TSETM: | ||
4525 | | // RA = base*8 (table at base-1), RD = num_const*8 (start index) | ||
4526 | | daddu RA, BASE, RA | ||
4527 | |1: | ||
4528 | | daddu TMP3, KBASE, RD | ||
4529 | | ld TAB:CARG2, -8(RA) // Guaranteed to be a table. | ||
4530 | | addiu TMP0, MULTRES, -8 | ||
4531 | | lw TMP3, LO(TMP3) // Integer constant is in lo-word. | ||
4532 | | beqz TMP0, >4 // Nothing to copy? | ||
4533 | |. srl CARG3, TMP0, 3 | ||
4534 | | cleartp CARG2 | ||
4535 | | addu CARG3, CARG3, TMP3 | ||
4536 | | lw TMP2, TAB:CARG2->asize | ||
4537 | | sll TMP1, TMP3, 3 | ||
4538 | | lbu TMP3, TAB:CARG2->marked | ||
4539 | | ld CARG1, TAB:CARG2->array | ||
4540 | | sltu AT, TMP2, CARG3 | ||
4541 | | bnez AT, >5 | ||
4542 | |. daddu TMP2, RA, TMP0 | ||
4543 | | daddu TMP1, TMP1, CARG1 | ||
4544 | | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) | ||
4545 | |3: // Copy result slots to table. | ||
4546 | | ld CRET1, 0(RA) | ||
4547 | | daddiu RA, RA, 8 | ||
4548 | | sltu AT, RA, TMP2 | ||
4549 | | sd CRET1, 0(TMP1) | ||
4550 | | bnez AT, <3 | ||
4551 | |. daddiu TMP1, TMP1, 8 | ||
4552 | | bnez TMP0, >7 | ||
4553 | |. nop | ||
4554 | |4: | ||
4555 | | ins_next | ||
4556 | | | ||
4557 | |5: // Need to resize array part. | ||
4558 | | load_got lj_tab_reasize | ||
4559 | | sd BASE, L->base | ||
4560 | | sd PC, SAVE_PC | ||
4561 | | move BASE, RD | ||
4562 | | call_intern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) | ||
4563 | |. move CARG1, L | ||
4564 | | // Must not reallocate the stack. | ||
4565 | | move RD, BASE | ||
4566 | | b <1 | ||
4567 | |. ld BASE, L->base // Reload BASE for lack of a saved register. | ||
4568 | | | ||
4569 | |7: // Possible table write barrier for any value. Skip valiswhite check. | ||
4570 | | barrierback TAB:CARG2, TMP3, TMP0, <4 | ||
4571 | break; | ||
4572 | |||
4573 | /* -- Calls and vararg handling ----------------------------------------- */ | ||
4574 | |||
4575 | case BC_CALLM: | ||
4576 | | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8 | ||
4577 | | decode_RDtoRC8 NARGS8:RC, RD | ||
4578 | | b ->BC_CALL_Z | ||
4579 | |. addu NARGS8:RC, NARGS8:RC, MULTRES | ||
4580 | break; | ||
4581 | case BC_CALL: | ||
4582 | | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 | ||
4583 | | decode_RDtoRC8 NARGS8:RC, RD | ||
4584 | |->BC_CALL_Z: | ||
4585 | | move TMP2, BASE | ||
4586 | | daddu BASE, BASE, RA | ||
4587 | | ld LFUNC:RB, 0(BASE) | ||
4588 | | daddiu BASE, BASE, 16 | ||
4589 | | addiu NARGS8:RC, NARGS8:RC, -8 | ||
4590 | | checkfunc RB, ->vmeta_call | ||
4591 | | ins_call | ||
4592 | break; | ||
4593 | |||
4594 | case BC_CALLMT: | ||
4595 | | // RA = base*8, (RB = 0,) RC = extra_nargs*8 | ||
4596 | | addu NARGS8:RD, NARGS8:RD, MULTRES // BC_CALLT gets RC from RD. | ||
4597 | | // Fall through. Assumes BC_CALLT follows. | ||
4598 | break; | ||
4599 | case BC_CALLT: | ||
4600 | | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 | ||
4601 | | daddu RA, BASE, RA | ||
4602 | | ld RB, 0(RA) | ||
4603 | | move NARGS8:RC, RD | ||
4604 | | ld TMP1, FRAME_PC(BASE) | ||
4605 | | daddiu RA, RA, 16 | ||
4606 | | addiu NARGS8:RC, NARGS8:RC, -8 | ||
4607 | | checktp CARG3, RB, -LJ_TFUNC, ->vmeta_callt | ||
4608 | |->BC_CALLT_Z: | ||
4609 | | andi TMP0, TMP1, FRAME_TYPE // Caveat: preserve TMP0 until the 'or'. | ||
4610 | | lbu TMP3, LFUNC:CARG3->ffid | ||
4611 | | bnez TMP0, >7 | ||
4612 | |. xori TMP2, TMP1, FRAME_VARG | ||
4613 | |1: | ||
4614 | | sd RB, FRAME_FUNC(BASE) // Copy function down, but keep PC. | ||
4615 | | sltiu AT, TMP3, 2 // (> FF_C) Calling a fast function? | ||
4616 | | move TMP2, BASE | ||
4617 | | move RB, CARG3 | ||
4618 | | beqz NARGS8:RC, >3 | ||
4619 | |. move TMP3, NARGS8:RC | ||
4620 | |2: | ||
4621 | | ld CRET1, 0(RA) | ||
4622 | | daddiu RA, RA, 8 | ||
4623 | | addiu TMP3, TMP3, -8 | ||
4624 | | sd CRET1, 0(TMP2) | ||
4625 | | bnez TMP3, <2 | ||
4626 | |. daddiu TMP2, TMP2, 8 | ||
4627 | |3: | ||
4628 | | or TMP0, TMP0, AT | ||
4629 | | beqz TMP0, >5 | ||
4630 | |. nop | ||
4631 | |4: | ||
4632 | | ins_callt | ||
4633 | | | ||
4634 | |5: // Tailcall to a fast function with a Lua frame below. | ||
4635 | | lw INS, -4(TMP1) | ||
4636 | | decode_RA8a RA, INS | ||
4637 | | decode_RA8b RA | ||
4638 | | dsubu TMP1, BASE, RA | ||
4639 | | ld TMP1, -32(TMP1) | ||
4640 | | cleartp LFUNC:TMP1 | ||
4641 | | ld TMP1, LFUNC:TMP1->pc | ||
4642 | | b <4 | ||
4643 | |. ld KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE. | ||
4644 | | | ||
4645 | |7: // Tailcall from a vararg function. | ||
4646 | | andi AT, TMP2, FRAME_TYPEP | ||
4647 | | bnez AT, <1 // Vararg frame below? | ||
4648 | |. dsubu TMP2, BASE, TMP2 // Relocate BASE down. | ||
4649 | | move BASE, TMP2 | ||
4650 | | ld TMP1, FRAME_PC(TMP2) | ||
4651 | | b <1 | ||
4652 | |. andi TMP0, TMP1, FRAME_TYPE | ||
4653 | break; | ||
4654 | |||
4655 | case BC_ITERC: | ||
4656 | | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) | ||
4657 | | move TMP2, BASE // Save old BASE fir vmeta_call. | ||
4658 | | daddu BASE, BASE, RA | ||
4659 | | ld RB, -24(BASE) | ||
4660 | | ld CARG1, -16(BASE) | ||
4661 | | ld CARG2, -8(BASE) | ||
4662 | | li NARGS8:RC, 16 // Iterators get 2 arguments. | ||
4663 | | sd RB, 0(BASE) // Copy callable. | ||
4664 | | sd CARG1, 16(BASE) // Copy state. | ||
4665 | | sd CARG2, 24(BASE) // Copy control var. | ||
4666 | | daddiu BASE, BASE, 16 | ||
4667 | | checkfunc RB, ->vmeta_call | ||
4668 | | ins_call | ||
4669 | break; | ||
4670 | |||
4671 | case BC_ITERN: | ||
4672 | | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) | ||
4673 | |.if JIT | ||
4674 | | // NYI: add hotloop, record BC_ITERN. | ||
4675 | |.endif | ||
4676 | | daddu RA, BASE, RA | ||
4677 | | ld TAB:RB, -16(RA) | ||
4678 | | lw RC, -8+LO(RA) // Get index from control var. | ||
4679 | | cleartp TAB:RB | ||
4680 | | daddiu PC, PC, 4 | ||
4681 | | lw TMP0, TAB:RB->asize | ||
4682 | | ld TMP1, TAB:RB->array | ||
4683 | | dsll CARG3, TISNUM, 47 | ||
4684 | |1: // Traverse array part. | ||
4685 | | sltu AT, RC, TMP0 | ||
4686 | | beqz AT, >5 // Index points after array part? | ||
4687 | |. sll TMP3, RC, 3 | ||
4688 | | daddu TMP3, TMP1, TMP3 | ||
4689 | | ld CARG1, 0(TMP3) | ||
4690 | | lhu RD, -4+OFS_RD(PC) | ||
4691 | | or TMP2, RC, CARG3 | ||
4692 | | beq CARG1, TISNIL, <1 // Skip holes in array part. | ||
4693 | |. addiu RC, RC, 1 | ||
4694 | | sd TMP2, 0(RA) | ||
4695 | | sd CARG1, 8(RA) | ||
4696 | | or TMP0, RC, CARG3 | ||
4697 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | ||
4698 | | decode_RD4b RD | ||
4699 | | daddu RD, RD, TMP3 | ||
4700 | | sw TMP0, -8+LO(RA) // Update control var. | ||
4701 | | daddu PC, PC, RD | ||
4702 | |3: | ||
4703 | | ins_next | ||
4704 | | | ||
4705 | |5: // Traverse hash part. | ||
4706 | | lw TMP1, TAB:RB->hmask | ||
4707 | | subu RC, RC, TMP0 | ||
4708 | | ld TMP2, TAB:RB->node | ||
4709 | |6: | ||
4710 | | sltu AT, TMP1, RC // End of iteration? Branch to ITERL+1. | ||
4711 | | bnez AT, <3 | ||
4712 | |. sll TMP3, RC, 5 | ||
4713 | | sll RB, RC, 3 | ||
4714 | | subu TMP3, TMP3, RB | ||
4715 | | daddu NODE:TMP3, TMP3, TMP2 | ||
4716 | | ld CARG1, 0(NODE:TMP3) | ||
4717 | | lhu RD, -4+OFS_RD(PC) | ||
4718 | | beq CARG1, TISNIL, <6 // Skip holes in hash part. | ||
4719 | |. addiu RC, RC, 1 | ||
4720 | | ld CARG2, NODE:TMP3->key | ||
4721 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | ||
4722 | | sd CARG1, 8(RA) | ||
4723 | | addu RC, RC, TMP0 | ||
4724 | | decode_RD4b RD | ||
4725 | | addu RD, RD, TMP3 | ||
4726 | | sd CARG2, 0(RA) | ||
4727 | | daddu PC, PC, RD | ||
4728 | | b <3 | ||
4729 | |. sw RC, -8+LO(RA) // Update control var. | ||
4730 | break; | ||
4731 | |||
4732 | case BC_ISNEXT: | ||
4733 | | // RA = base*8, RD = target (points to ITERN) | ||
4734 | | daddu RA, BASE, RA | ||
4735 | | srl TMP0, RD, 1 | ||
4736 | | ld CFUNC:CARG1, -24(RA) | ||
4737 | | daddu TMP0, PC, TMP0 | ||
4738 | | ld CARG2, -16(RA) | ||
4739 | | ld CARG3, -8(RA) | ||
4740 | | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535) | ||
4741 | | checkfunc CFUNC:CARG1, >5 | ||
4742 | | gettp CARG2, CARG2 | ||
4743 | | daddiu CARG2, CARG2, -LJ_TTAB | ||
4744 | | lbu TMP1, CFUNC:CARG1->ffid | ||
4745 | | daddiu CARG3, CARG3, -LJ_TNIL | ||
4746 | | or AT, CARG2, CARG3 | ||
4747 | | daddiu TMP1, TMP1, -FF_next_N | ||
4748 | | or AT, AT, TMP1 | ||
4749 | | bnez AT, >5 | ||
4750 | |. lui TMP1, 0xfffe | ||
4751 | | daddu PC, TMP0, TMP2 | ||
4752 | | ori TMP1, TMP1, 0x7fff | ||
4753 | | dsll TMP1, TMP1, 32 | ||
4754 | | sd TMP1, -8(RA) | ||
4755 | |1: | ||
4756 | | ins_next | ||
4757 | |5: // Despecialize bytecode if any of the checks fail. | ||
4758 | | li TMP3, BC_JMP | ||
4759 | | li TMP1, BC_ITERC | ||
4760 | | sb TMP3, -4+OFS_OP(PC) | ||
4761 | | daddu PC, TMP0, TMP2 | ||
4762 | | b <1 | ||
4763 | |. sb TMP1, OFS_OP(PC) | ||
4764 | break; | ||
4765 | |||
4766 | case BC_VARG: | ||
4767 | | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 | ||
4768 | | ld TMP0, FRAME_PC(BASE) | ||
4769 | | decode_RDtoRC8 RC, RD | ||
4770 | | decode_RB8a RB, INS | ||
4771 | | daddu RC, BASE, RC | ||
4772 | | decode_RB8b RB | ||
4773 | | daddu RA, BASE, RA | ||
4774 | | daddiu RC, RC, FRAME_VARG | ||
4775 | | daddu TMP2, RA, RB | ||
4776 | | daddiu TMP3, BASE, -16 // TMP3 = vtop | ||
4777 | | dsubu RC, RC, TMP0 // RC = vbase | ||
4778 | | // Note: RC may now be even _above_ BASE if nargs was < numparams. | ||
4779 | | beqz RB, >5 // Copy all varargs? | ||
4780 | |. dsubu TMP1, TMP3, RC | ||
4781 | | daddiu TMP2, TMP2, -16 | ||
4782 | |1: // Copy vararg slots to destination slots. | ||
4783 | | ld CARG1, 0(RC) | ||
4784 | | sltu AT, RC, TMP3 | ||
4785 | | daddiu RC, RC, 8 | ||
4786 | |.if MIPSR6 | ||
4787 | | selnez CARG1, CARG1, AT | ||
4788 | | seleqz AT, TISNIL, AT | ||
4789 | | or CARG1, CARG1, AT | ||
4790 | |.else | ||
4791 | | movz CARG1, TISNIL, AT | ||
4792 | |.endif | ||
4793 | | sd CARG1, 0(RA) | ||
4794 | | sltu AT, RA, TMP2 | ||
4795 | | bnez AT, <1 | ||
4796 | |. daddiu RA, RA, 8 | ||
4797 | |3: | ||
4798 | | ins_next | ||
4799 | | | ||
4800 | |5: // Copy all varargs. | ||
4801 | | ld TMP0, L->maxstack | ||
4802 | | blez TMP1, <3 // No vararg slots? | ||
4803 | |. li MULTRES, 8 // MULTRES = (0+1)*8 | ||
4804 | | daddu TMP2, RA, TMP1 | ||
4805 | | sltu AT, TMP0, TMP2 | ||
4806 | | bnez AT, >7 | ||
4807 | |. daddiu MULTRES, TMP1, 8 | ||
4808 | |6: | ||
4809 | | ld CRET1, 0(RC) | ||
4810 | | daddiu RC, RC, 8 | ||
4811 | | sd CRET1, 0(RA) | ||
4812 | | sltu AT, RC, TMP3 | ||
4813 | | bnez AT, <6 // More vararg slots? | ||
4814 | |. daddiu RA, RA, 8 | ||
4815 | | b <3 | ||
4816 | |. nop | ||
4817 | | | ||
4818 | |7: // Grow stack for varargs. | ||
4819 | | load_got lj_state_growstack | ||
4820 | | sd RA, L->top | ||
4821 | | dsubu RA, RA, BASE | ||
4822 | | sd BASE, L->base | ||
4823 | | dsubu BASE, RC, BASE // Need delta, because BASE may change. | ||
4824 | | sd PC, SAVE_PC | ||
4825 | | srl CARG2, TMP1, 3 | ||
4826 | | call_intern lj_state_growstack // (lua_State *L, int n) | ||
4827 | |. move CARG1, L | ||
4828 | | move RC, BASE | ||
4829 | | ld BASE, L->base | ||
4830 | | daddu RA, BASE, RA | ||
4831 | | daddu RC, BASE, RC | ||
4832 | | b <6 | ||
4833 | |. daddiu TMP3, BASE, -16 | ||
4834 | break; | ||
4835 | |||
4836 | /* -- Returns ----------------------------------------------------------- */ | ||
4837 | |||
4838 | case BC_RETM: | ||
4839 | | // RA = results*8, RD = extra_nresults*8 | ||
4840 | | addu RD, RD, MULTRES // MULTRES >= 8, so RD >= 8. | ||
4841 | | // Fall through. Assumes BC_RET follows. | ||
4842 | break; | ||
4843 | |||
4844 | case BC_RET: | ||
4845 | | // RA = results*8, RD = (nresults+1)*8 | ||
4846 | | ld PC, FRAME_PC(BASE) | ||
4847 | | daddu RA, BASE, RA | ||
4848 | | move MULTRES, RD | ||
4849 | |1: | ||
4850 | | andi TMP0, PC, FRAME_TYPE | ||
4851 | | bnez TMP0, ->BC_RETV_Z | ||
4852 | |. xori TMP1, PC, FRAME_VARG | ||
4853 | | | ||
4854 | |->BC_RET_Z: | ||
4855 | | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return | ||
4856 | | lw INS, -4(PC) | ||
4857 | | daddiu TMP2, BASE, -16 | ||
4858 | | daddiu RC, RD, -8 | ||
4859 | | decode_RA8a TMP0, INS | ||
4860 | | decode_RB8a RB, INS | ||
4861 | | decode_RA8b TMP0 | ||
4862 | | decode_RB8b RB | ||
4863 | | daddu TMP3, TMP2, RB | ||
4864 | | beqz RC, >3 | ||
4865 | |. dsubu BASE, TMP2, TMP0 | ||
4866 | |2: | ||
4867 | | ld CRET1, 0(RA) | ||
4868 | | daddiu RA, RA, 8 | ||
4869 | | daddiu RC, RC, -8 | ||
4870 | | sd CRET1, 0(TMP2) | ||
4871 | | bnez RC, <2 | ||
4872 | |. daddiu TMP2, TMP2, 8 | ||
4873 | |3: | ||
4874 | | daddiu TMP3, TMP3, -8 | ||
4875 | |5: | ||
4876 | | sltu AT, TMP2, TMP3 | ||
4877 | | bnez AT, >6 | ||
4878 | |. ld LFUNC:TMP1, FRAME_FUNC(BASE) | ||
4879 | | ins_next1 | ||
4880 | | cleartp LFUNC:TMP1 | ||
4881 | | ld TMP1, LFUNC:TMP1->pc | ||
4882 | | ld KBASE, PC2PROTO(k)(TMP1) | ||
4883 | | ins_next2 | ||
4884 | | | ||
4885 | |6: // Fill up results with nil. | ||
4886 | | sd TISNIL, 0(TMP2) | ||
4887 | | b <5 | ||
4888 | |. daddiu TMP2, TMP2, 8 | ||
4889 | | | ||
4890 | |->BC_RETV_Z: // Non-standard return case. | ||
4891 | | andi TMP2, TMP1, FRAME_TYPEP | ||
4892 | | bnez TMP2, ->vm_return | ||
4893 | |. nop | ||
4894 | | // Return from vararg function: relocate BASE down. | ||
4895 | | dsubu BASE, BASE, TMP1 | ||
4896 | | b <1 | ||
4897 | |. ld PC, FRAME_PC(BASE) | ||
4898 | break; | ||
4899 | |||
4900 | case BC_RET0: case BC_RET1: | ||
4901 | | // RA = results*8, RD = (nresults+1)*8 | ||
4902 | | ld PC, FRAME_PC(BASE) | ||
4903 | | daddu RA, BASE, RA | ||
4904 | | move MULTRES, RD | ||
4905 | | andi TMP0, PC, FRAME_TYPE | ||
4906 | | bnez TMP0, ->BC_RETV_Z | ||
4907 | |. xori TMP1, PC, FRAME_VARG | ||
4908 | | lw INS, -4(PC) | ||
4909 | | daddiu TMP2, BASE, -16 | ||
4910 | if (op == BC_RET1) { | ||
4911 | | ld CRET1, 0(RA) | ||
4912 | } | ||
4913 | | decode_RB8a RB, INS | ||
4914 | | decode_RA8a RA, INS | ||
4915 | | decode_RB8b RB | ||
4916 | | decode_RA8b RA | ||
4917 | | dsubu BASE, TMP2, RA | ||
4918 | if (op == BC_RET1) { | ||
4919 | | sd CRET1, 0(TMP2) | ||
4920 | } | ||
4921 | |5: | ||
4922 | | sltu AT, RD, RB | ||
4923 | | bnez AT, >6 | ||
4924 | |. ld TMP1, FRAME_FUNC(BASE) | ||
4925 | | ins_next1 | ||
4926 | | cleartp LFUNC:TMP1 | ||
4927 | | ld TMP1, LFUNC:TMP1->pc | ||
4928 | | ld KBASE, PC2PROTO(k)(TMP1) | ||
4929 | | ins_next2 | ||
4930 | | | ||
4931 | |6: // Fill up results with nil. | ||
4932 | | daddiu TMP2, TMP2, 8 | ||
4933 | | daddiu RD, RD, 8 | ||
4934 | | b <5 | ||
4935 | if (op == BC_RET1) { | ||
4936 | |. sd TISNIL, 0(TMP2) | ||
4937 | } else { | ||
4938 | |. sd TISNIL, -8(TMP2) | ||
4939 | } | ||
4940 | break; | ||
4941 | |||
4942 | /* -- Loops and branches ------------------------------------------------ */ | ||
4943 | |||
4944 | case BC_FORL: | ||
4945 | |.if JIT | ||
4946 | | hotloop | ||
4947 | |.endif | ||
4948 | | // Fall through. Assumes BC_IFORL follows. | ||
4949 | break; | ||
4950 | |||
4951 | case BC_JFORI: | ||
4952 | case BC_JFORL: | ||
4953 | #if !LJ_HASJIT | ||
4954 | break; | ||
4955 | #endif | ||
4956 | case BC_FORI: | ||
4957 | case BC_IFORL: | ||
4958 | | // RA = base*8, RD = target (after end of loop or start of loop) | ||
4959 | vk = (op == BC_IFORL || op == BC_JFORL); | ||
4960 | | daddu RA, BASE, RA | ||
4961 | | ld CARG1, FORL_IDX*8(RA) // IDX CARG1 - CARG3 type | ||
4962 | | gettp CARG3, CARG1 | ||
4963 | if (op != BC_JFORL) { | ||
4964 | | srl RD, RD, 1 | ||
4965 | | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535) | ||
4966 | | daddu TMP2, RD, TMP2 | ||
4967 | } | ||
4968 | if (!vk) { | ||
4969 | | ld CARG2, FORL_STOP*8(RA) // STOP CARG2 - CARG4 type | ||
4970 | | ld CRET1, FORL_STEP*8(RA) // STEP CRET1 - CRET2 type | ||
4971 | | gettp CARG4, CARG2 | ||
4972 | | bne CARG3, TISNUM, >5 | ||
4973 | |. gettp CRET2, CRET1 | ||
4974 | | bne CARG4, TISNUM, ->vmeta_for | ||
4975 | |. sextw CARG3, CARG1 | ||
4976 | | bne CRET2, TISNUM, ->vmeta_for | ||
4977 | |. sextw CARG2, CARG2 | ||
4978 | | dext AT, CRET1, 31, 0 | ||
4979 | | slt CRET1, CARG2, CARG3 | ||
4980 | | slt TMP1, CARG3, CARG2 | ||
4981 | |.if MIPSR6 | ||
4982 | | selnez TMP1, TMP1, AT | ||
4983 | | seleqz CRET1, CRET1, AT | ||
4984 | | or CRET1, CRET1, TMP1 | ||
4985 | |.else | ||
4986 | | movn CRET1, TMP1, AT | ||
4987 | |.endif | ||
4988 | } else { | ||
4989 | | bne CARG3, TISNUM, >5 | ||
4990 | |. ld CARG2, FORL_STEP*8(RA) // STEP CARG2 - CARG4 type | ||
4991 | | ld CRET1, FORL_STOP*8(RA) // STOP CRET1 - CRET2 type | ||
4992 | | sextw TMP3, CARG1 | ||
4993 | | sextw CARG2, CARG2 | ||
4994 | | sextw CRET1, CRET1 | ||
4995 | | addu CARG1, TMP3, CARG2 | ||
4996 | | xor TMP0, CARG1, TMP3 | ||
4997 | | xor TMP1, CARG1, CARG2 | ||
4998 | | and TMP0, TMP0, TMP1 | ||
4999 | | slt TMP1, CARG1, CRET1 | ||
5000 | | slt CRET1, CRET1, CARG1 | ||
5001 | | slt AT, CARG2, r0 | ||
5002 | | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow. | ||
5003 | |.if MIPSR6 | ||
5004 | | selnez TMP1, TMP1, AT | ||
5005 | | seleqz CRET1, CRET1, AT | ||
5006 | | or CRET1, CRET1, TMP1 | ||
5007 | |.else | ||
5008 | | movn CRET1, TMP1, AT | ||
5009 | |.endif | ||
5010 | | or CRET1, CRET1, TMP0 | ||
5011 | | zextw CARG1, CARG1 | ||
5012 | | settp CARG1, TISNUM | ||
5013 | } | ||
5014 | |1: | ||
5015 | if (op == BC_FORI) { | ||
5016 | |.if MIPSR6 | ||
5017 | | selnez TMP2, TMP2, CRET1 | ||
5018 | |.else | ||
5019 | | movz TMP2, r0, CRET1 | ||
5020 | |.endif | ||
5021 | | daddu PC, PC, TMP2 | ||
5022 | } else if (op == BC_JFORI) { | ||
5023 | | daddu PC, PC, TMP2 | ||
5024 | | lhu RD, -4+OFS_RD(PC) | ||
5025 | } else if (op == BC_IFORL) { | ||
5026 | |.if MIPSR6 | ||
5027 | | seleqz TMP2, TMP2, CRET1 | ||
5028 | |.else | ||
5029 | | movn TMP2, r0, CRET1 | ||
5030 | |.endif | ||
5031 | | daddu PC, PC, TMP2 | ||
5032 | } | ||
5033 | if (vk) { | ||
5034 | | sd CARG1, FORL_IDX*8(RA) | ||
5035 | } | ||
5036 | | ins_next1 | ||
5037 | | sd CARG1, FORL_EXT*8(RA) | ||
5038 | |2: | ||
5039 | if (op == BC_JFORI) { | ||
5040 | | beqz CRET1, =>BC_JLOOP | ||
5041 | |. decode_RD8b RD | ||
5042 | } else if (op == BC_JFORL) { | ||
5043 | | beqz CRET1, =>BC_JLOOP | ||
5044 | } | ||
5045 | | ins_next2 | ||
5046 | | | ||
5047 | |5: // FP loop. | ||
5048 | |.if FPU | ||
5049 | if (!vk) { | ||
5050 | | ldc1 f0, FORL_IDX*8(RA) | ||
5051 | | ldc1 f2, FORL_STOP*8(RA) | ||
5052 | | sltiu TMP0, CARG3, LJ_TISNUM | ||
5053 | | sltiu TMP1, CARG4, LJ_TISNUM | ||
5054 | | sltiu AT, CRET2, LJ_TISNUM | ||
5055 | | ld TMP3, FORL_STEP*8(RA) | ||
5056 | | and TMP0, TMP0, TMP1 | ||
5057 | | and AT, AT, TMP0 | ||
5058 | | beqz AT, ->vmeta_for | ||
5059 | |. slt TMP3, TMP3, r0 | ||
5060 | |.if MIPSR6 | ||
5061 | | dmtc1 TMP3, FTMP2 | ||
5062 | | cmp.lt.d FTMP0, f0, f2 | ||
5063 | | cmp.lt.d FTMP1, f2, f0 | ||
5064 | | sel.d FTMP2, FTMP1, FTMP0 | ||
5065 | | b <1 | ||
5066 | |. dmfc1 CRET1, FTMP2 | ||
5067 | |.else | ||
5068 | | c.ole.d 0, f0, f2 | ||
5069 | | c.ole.d 1, f2, f0 | ||
5070 | | li CRET1, 1 | ||
5071 | | movt CRET1, r0, 0 | ||
5072 | | movt AT, r0, 1 | ||
5073 | | b <1 | ||
5074 | |. movn CRET1, AT, TMP3 | ||
5075 | |.endif | ||
5076 | } else { | ||
5077 | | ldc1 f0, FORL_IDX*8(RA) | ||
5078 | | ldc1 f4, FORL_STEP*8(RA) | ||
5079 | | ldc1 f2, FORL_STOP*8(RA) | ||
5080 | | ld TMP3, FORL_STEP*8(RA) | ||
5081 | | add.d f0, f0, f4 | ||
5082 | |.if MIPSR6 | ||
5083 | | slt TMP3, TMP3, r0 | ||
5084 | | dmtc1 TMP3, FTMP2 | ||
5085 | | cmp.lt.d FTMP0, f0, f2 | ||
5086 | | cmp.lt.d FTMP1, f2, f0 | ||
5087 | | sel.d FTMP2, FTMP1, FTMP0 | ||
5088 | | dmfc1 CRET1, FTMP2 | ||
5089 | if (op == BC_IFORL) { | ||
5090 | | seleqz TMP2, TMP2, CRET1 | ||
5091 | | daddu PC, PC, TMP2 | ||
5092 | } | ||
5093 | |.else | ||
5094 | | c.ole.d 0, f0, f2 | ||
5095 | | c.ole.d 1, f2, f0 | ||
5096 | | slt TMP3, TMP3, r0 | ||
5097 | | li CRET1, 1 | ||
5098 | | li AT, 1 | ||
5099 | | movt CRET1, r0, 0 | ||
5100 | | movt AT, r0, 1 | ||
5101 | | movn CRET1, AT, TMP3 | ||
5102 | if (op == BC_IFORL) { | ||
5103 | | movn TMP2, r0, CRET1 | ||
5104 | | daddu PC, PC, TMP2 | ||
5105 | } | ||
5106 | |.endif | ||
5107 | | sdc1 f0, FORL_IDX*8(RA) | ||
5108 | | ins_next1 | ||
5109 | | b <2 | ||
5110 | |. sdc1 f0, FORL_EXT*8(RA) | ||
5111 | } | ||
5112 | |.else | ||
5113 | if (!vk) { | ||
5114 | | sltiu TMP0, CARG3, LJ_TISNUM | ||
5115 | | sltiu TMP1, CARG4, LJ_TISNUM | ||
5116 | | sltiu AT, CRET2, LJ_TISNUM | ||
5117 | | and TMP0, TMP0, TMP1 | ||
5118 | | and AT, AT, TMP0 | ||
5119 | | beqz AT, ->vmeta_for | ||
5120 | |. nop | ||
5121 | | bal ->vm_sfcmpolex | ||
5122 | |. lw TMP3, FORL_STEP*8+HI(RA) | ||
5123 | | b <1 | ||
5124 | |. nop | ||
5125 | } else { | ||
5126 | | load_got __adddf3 | ||
5127 | | call_extern | ||
5128 | |. sw TMP2, TMPD | ||
5129 | | ld CARG2, FORL_STOP*8(RA) | ||
5130 | | move CARG1, CRET1 | ||
5131 | if ( op == BC_JFORL ) { | ||
5132 | | lhu RD, -4+OFS_RD(PC) | ||
5133 | | decode_RD8b RD | ||
5134 | } | ||
5135 | | bal ->vm_sfcmpolex | ||
5136 | |. lw TMP3, FORL_STEP*8+HI(RA) | ||
5137 | | b <1 | ||
5138 | |. lw TMP2, TMPD | ||
5139 | } | ||
5140 | |.endif | ||
5141 | break; | ||
5142 | |||
5143 | case BC_ITERL: | ||
5144 | |.if JIT | ||
5145 | | hotloop | ||
5146 | |.endif | ||
5147 | | // Fall through. Assumes BC_IITERL follows. | ||
5148 | break; | ||
5149 | |||
5150 | case BC_JITERL: | ||
5151 | #if !LJ_HASJIT | ||
5152 | break; | ||
5153 | #endif | ||
5154 | case BC_IITERL: | ||
5155 | | // RA = base*8, RD = target | ||
5156 | | daddu RA, BASE, RA | ||
5157 | | ld TMP1, 0(RA) | ||
5158 | | beq TMP1, TISNIL, >1 // Stop if iterator returned nil. | ||
5159 | |. nop | ||
5160 | if (op == BC_JITERL) { | ||
5161 | | b =>BC_JLOOP | ||
5162 | |. sd TMP1, -8(RA) | ||
5163 | } else { | ||
5164 | | branch_RD // Otherwise save control var + branch. | ||
5165 | | sd TMP1, -8(RA) | ||
5166 | } | ||
5167 | |1: | ||
5168 | | ins_next | ||
5169 | break; | ||
5170 | |||
5171 | case BC_LOOP: | ||
5172 | | // RA = base*8, RD = target (loop extent) | ||
5173 | | // Note: RA/RD is only used by trace recorder to determine scope/extent | ||
5174 | | // This opcode does NOT jump, it's only purpose is to detect a hot loop. | ||
5175 | |.if JIT | ||
5176 | | hotloop | ||
5177 | |.endif | ||
5178 | | // Fall through. Assumes BC_ILOOP follows. | ||
5179 | break; | ||
5180 | |||
5181 | case BC_ILOOP: | ||
5182 | | // RA = base*8, RD = target (loop extent) | ||
5183 | | ins_next | ||
5184 | break; | ||
5185 | |||
5186 | case BC_JLOOP: | ||
5187 | |.if JIT | ||
5188 | | // RA = base*8 (ignored), RD = traceno*8 | ||
5189 | | ld TMP1, DISPATCH_J(trace)(DISPATCH) | ||
5190 | | li AT, 0 | ||
5191 | | daddu TMP1, TMP1, RD | ||
5192 | | // Traces on MIPS don't store the trace number, so use 0. | ||
5193 | | sd AT, DISPATCH_GL(vmstate)(DISPATCH) | ||
5194 | | ld TRACE:TMP2, 0(TMP1) | ||
5195 | | sd BASE, DISPATCH_GL(jit_base)(DISPATCH) | ||
5196 | | ld TMP2, TRACE:TMP2->mcode | ||
5197 | | sd L, DISPATCH_GL(tmpbuf.L)(DISPATCH) | ||
5198 | | jr TMP2 | ||
5199 | |. daddiu JGL, DISPATCH, GG_DISP2G+32768 | ||
5200 | |.endif | ||
5201 | break; | ||
5202 | |||
5203 | case BC_JMP: | ||
5204 | | // RA = base*8 (only used by trace recorder), RD = target | ||
5205 | | branch_RD | ||
5206 | | ins_next | ||
5207 | break; | ||
5208 | |||
5209 | /* -- Function headers -------------------------------------------------- */ | ||
5210 | |||
5211 | case BC_FUNCF: | ||
5212 | |.if JIT | ||
5213 | | hotcall | ||
5214 | |.endif | ||
5215 | case BC_FUNCV: /* NYI: compiled vararg functions. */ | ||
5216 | | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. | ||
5217 | break; | ||
5218 | |||
5219 | case BC_JFUNCF: | ||
5220 | #if !LJ_HASJIT | ||
5221 | break; | ||
5222 | #endif | ||
5223 | case BC_IFUNCF: | ||
5224 | | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 | ||
5225 | | ld TMP2, L->maxstack | ||
5226 | | lbu TMP1, -4+PC2PROTO(numparams)(PC) | ||
5227 | | ld KBASE, -4+PC2PROTO(k)(PC) | ||
5228 | | sltu AT, TMP2, RA | ||
5229 | | bnez AT, ->vm_growstack_l | ||
5230 | |. sll TMP1, TMP1, 3 | ||
5231 | if (op != BC_JFUNCF) { | ||
5232 | | ins_next1 | ||
5233 | } | ||
5234 | |2: | ||
5235 | | sltu AT, NARGS8:RC, TMP1 // Check for missing parameters. | ||
5236 | | bnez AT, >3 | ||
5237 | |. daddu AT, BASE, NARGS8:RC | ||
5238 | if (op == BC_JFUNCF) { | ||
5239 | | decode_RD8a RD, INS | ||
5240 | | b =>BC_JLOOP | ||
5241 | |. decode_RD8b RD | ||
5242 | } else { | ||
5243 | | ins_next2 | ||
5244 | } | ||
5245 | | | ||
5246 | |3: // Clear missing parameters. | ||
5247 | | sd TISNIL, 0(AT) | ||
5248 | | b <2 | ||
5249 | |. addiu NARGS8:RC, NARGS8:RC, 8 | ||
5250 | break; | ||
5251 | |||
5252 | case BC_JFUNCV: | ||
5253 | #if !LJ_HASJIT | ||
5254 | break; | ||
5255 | #endif | ||
5256 | | NYI // NYI: compiled vararg functions | ||
5257 | break; /* NYI: compiled vararg functions. */ | ||
5258 | |||
5259 | case BC_IFUNCV: | ||
5260 | | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 | ||
5261 | | li TMP0, LJ_TFUNC | ||
5262 | | daddu TMP1, BASE, RC | ||
5263 | | ld TMP2, L->maxstack | ||
5264 | | settp LFUNC:RB, TMP0 | ||
5265 | | daddu TMP0, RA, RC | ||
5266 | | sd LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC. | ||
5267 | | daddiu TMP3, RC, 16+FRAME_VARG | ||
5268 | | sltu AT, TMP0, TMP2 | ||
5269 | | ld KBASE, -4+PC2PROTO(k)(PC) | ||
5270 | | beqz AT, ->vm_growstack_l | ||
5271 | |. sd TMP3, 8(TMP1) // Store delta + FRAME_VARG. | ||
5272 | | lbu TMP2, -4+PC2PROTO(numparams)(PC) | ||
5273 | | move RA, BASE | ||
5274 | | move RC, TMP1 | ||
5275 | | ins_next1 | ||
5276 | | beqz TMP2, >3 | ||
5277 | |. daddiu BASE, TMP1, 16 | ||
5278 | |1: | ||
5279 | | ld TMP0, 0(RA) | ||
5280 | | sltu AT, RA, RC // Less args than parameters? | ||
5281 | | move CARG1, TMP0 | ||
5282 | |.if MIPSR6 | ||
5283 | | selnez TMP0, TMP0, AT | ||
5284 | | seleqz TMP3, TISNIL, AT | ||
5285 | | or TMP0, TMP0, TMP3 | ||
5286 | | seleqz TMP3, CARG1, AT | ||
5287 | | selnez CARG1, TISNIL, AT | ||
5288 | | or CARG1, CARG1, TMP3 | ||
5289 | |.else | ||
5290 | | movz TMP0, TISNIL, AT // Clear missing parameters. | ||
5291 | | movn CARG1, TISNIL, AT // Clear old fixarg slot (help the GC). | ||
5292 | |.endif | ||
5293 | | addiu TMP2, TMP2, -1 | ||
5294 | | sd TMP0, 16(TMP1) | ||
5295 | | daddiu TMP1, TMP1, 8 | ||
5296 | | sd CARG1, 0(RA) | ||
5297 | | bnez TMP2, <1 | ||
5298 | |. daddiu RA, RA, 8 | ||
5299 | |3: | ||
5300 | | ins_next2 | ||
5301 | break; | ||
5302 | |||
5303 | case BC_FUNCC: | ||
5304 | case BC_FUNCCW: | ||
5305 | | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 | ||
5306 | if (op == BC_FUNCC) { | ||
5307 | | ld CFUNCADDR, CFUNC:RB->f | ||
5308 | } else { | ||
5309 | | ld CFUNCADDR, DISPATCH_GL(wrapf)(DISPATCH) | ||
5310 | } | ||
5311 | | daddu TMP1, RA, NARGS8:RC | ||
5312 | | ld TMP2, L->maxstack | ||
5313 | | daddu RC, BASE, NARGS8:RC | ||
5314 | | sd BASE, L->base | ||
5315 | | sltu AT, TMP2, TMP1 | ||
5316 | | sd RC, L->top | ||
5317 | | li_vmstate C | ||
5318 | if (op == BC_FUNCCW) { | ||
5319 | | ld CARG2, CFUNC:RB->f | ||
5320 | } | ||
5321 | | bnez AT, ->vm_growstack_c // Need to grow stack. | ||
5322 | |. move CARG1, L | ||
5323 | | jalr CFUNCADDR // (lua_State *L [, lua_CFunction f]) | ||
5324 | |. st_vmstate | ||
5325 | | // Returns nresults. | ||
5326 | | ld BASE, L->base | ||
5327 | | sll RD, CRET1, 3 | ||
5328 | | ld TMP1, L->top | ||
5329 | | li_vmstate INTERP | ||
5330 | | ld PC, FRAME_PC(BASE) // Fetch PC of caller. | ||
5331 | | dsubu RA, TMP1, RD // RA = L->top - nresults*8 | ||
5332 | | sd L, DISPATCH_GL(cur_L)(DISPATCH) | ||
5333 | | b ->vm_returnc | ||
5334 | |. st_vmstate | ||
5335 | break; | ||
5336 | |||
5337 | /* ---------------------------------------------------------------------- */ | ||
5338 | |||
5339 | default: | ||
5340 | fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); | ||
5341 | exit(2); | ||
5342 | break; | ||
5343 | } | ||
5344 | } | ||
5345 | |||
5346 | static int build_backend(BuildCtx *ctx) | ||
5347 | { | ||
5348 | int op; | ||
5349 | |||
5350 | dasm_growpc(Dst, BC__MAX); | ||
5351 | |||
5352 | build_subroutines(ctx); | ||
5353 | |||
5354 | |.code_op | ||
5355 | for (op = 0; op < BC__MAX; op++) | ||
5356 | build_ins(ctx, (BCOp)op, op); | ||
5357 | |||
5358 | return BC__MAX; | ||
5359 | } | ||
5360 | |||
5361 | /* Emit pseudo frame-info for all assembler functions. */ | ||
5362 | static void emit_asm_debug(BuildCtx *ctx) | ||
5363 | { | ||
5364 | int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); | ||
5365 | int i; | ||
5366 | switch (ctx->mode) { | ||
5367 | case BUILD_elfasm: | ||
5368 | fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); | ||
5369 | fprintf(ctx->fp, | ||
5370 | ".Lframe0:\n" | ||
5371 | "\t.4byte .LECIE0-.LSCIE0\n" | ||
5372 | ".LSCIE0:\n" | ||
5373 | "\t.4byte 0xffffffff\n" | ||
5374 | "\t.byte 0x1\n" | ||
5375 | "\t.string \"\"\n" | ||
5376 | "\t.uleb128 0x1\n" | ||
5377 | "\t.sleb128 -4\n" | ||
5378 | "\t.byte 31\n" | ||
5379 | "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 0\n" | ||
5380 | "\t.align 2\n" | ||
5381 | ".LECIE0:\n\n"); | ||
5382 | fprintf(ctx->fp, | ||
5383 | ".LSFDE0:\n" | ||
5384 | "\t.4byte .LEFDE0-.LASFDE0\n" | ||
5385 | ".LASFDE0:\n" | ||
5386 | "\t.4byte .Lframe0\n" | ||
5387 | "\t.8byte .Lbegin\n" | ||
5388 | "\t.8byte %d\n" | ||
5389 | "\t.byte 0xe\n\t.uleb128 %d\n" | ||
5390 | "\t.byte 0x9f\n\t.sleb128 2*5\n" | ||
5391 | "\t.byte 0x9e\n\t.sleb128 2*6\n", | ||
5392 | fcofs, CFRAME_SIZE); | ||
5393 | for (i = 23; i >= 16; i--) | ||
5394 | fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(30-i)); | ||
5395 | #if !LJ_SOFTFP | ||
5396 | for (i = 31; i >= 24; i--) | ||
5397 | fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(46-i)); | ||
5398 | #endif | ||
5399 | fprintf(ctx->fp, | ||
5400 | "\t.align 2\n" | ||
5401 | ".LEFDE0:\n\n"); | ||
5402 | #if LJ_HASFFI | ||
5403 | fprintf(ctx->fp, | ||
5404 | ".LSFDE1:\n" | ||
5405 | "\t.4byte .LEFDE1-.LASFDE1\n" | ||
5406 | ".LASFDE1:\n" | ||
5407 | "\t.4byte .Lframe0\n" | ||
5408 | "\t.4byte lj_vm_ffi_call\n" | ||
5409 | "\t.4byte %d\n" | ||
5410 | "\t.byte 0x9f\n\t.uleb128 2*1\n" | ||
5411 | "\t.byte 0x90\n\t.uleb128 2*2\n" | ||
5412 | "\t.byte 0xd\n\t.uleb128 0x10\n" | ||
5413 | "\t.align 2\n" | ||
5414 | ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); | ||
5415 | #endif | ||
5416 | #if !LJ_NO_UNWIND | ||
5417 | /* NYI */ | ||
5418 | #endif | ||
5419 | break; | ||
5420 | default: | ||
5421 | break; | ||
5422 | } | ||
5423 | } | ||
5424 | |||
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc index 6b973d4e..4299e266 100644 --- a/src/vm_ppc.dasc +++ b/src/vm_ppc.dasc | |||
@@ -1,4 +1,4 @@ | |||
1 | |// Low-level VM code for PowerPC CPUs. | 1 | |// Low-level VM code for PowerPC 32 bit or 32on64 bit mode. |
2 | |// Bytecode interpreter, fast functions and helper functions. | 2 | |// Bytecode interpreter, fast functions and helper functions. |
3 | |// Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h | 3 | |// Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h |
4 | | | 4 | | |
@@ -18,7 +18,6 @@ | |||
18 | |// DynASM defines used by the PPC port: | 18 | |// DynASM defines used by the PPC port: |
19 | |// | 19 | |// |
20 | |// P64 64 bit pointers (only for GPR64 testing). | 20 | |// P64 64 bit pointers (only for GPR64 testing). |
21 | |// Note: a full PPC64 _LP64 port is not planned. | ||
22 | |// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3). | 21 | |// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3). |
23 | |// Affects reg saves, stack layout, carry/overflow/dot flags etc. | 22 | |// Affects reg saves, stack layout, carry/overflow/dot flags etc. |
24 | |// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360). | 23 | |// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360). |
@@ -103,6 +102,18 @@ | |||
103 | |// Fixed register assignments for the interpreter. | 102 | |// Fixed register assignments for the interpreter. |
104 | |// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA) | 103 | |// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA) |
105 | | | 104 | | |
105 | |.macro .FPU, a, b | ||
106 | |.if FPU | ||
107 | | a, b | ||
108 | |.endif | ||
109 | |.endmacro | ||
110 | | | ||
111 | |.macro .FPU, a, b, c | ||
112 | |.if FPU | ||
113 | | a, b, c | ||
114 | |.endif | ||
115 | |.endmacro | ||
116 | | | ||
106 | |// The following must be C callee-save (but BASE is often refetched). | 117 | |// The following must be C callee-save (but BASE is often refetched). |
107 | |.define BASE, r14 // Base of current Lua stack frame. | 118 | |.define BASE, r14 // Base of current Lua stack frame. |
108 | |.define KBASE, r15 // Constants of current Lua function. | 119 | |.define KBASE, r15 // Constants of current Lua function. |
@@ -116,8 +127,10 @@ | |||
116 | |.define TISNUM, r22 | 127 | |.define TISNUM, r22 |
117 | |.define TISNIL, r23 | 128 | |.define TISNIL, r23 |
118 | |.define ZERO, r24 | 129 | |.define ZERO, r24 |
130 | |.if FPU | ||
119 | |.define TOBIT, f30 // 2^52 + 2^51. | 131 | |.define TOBIT, f30 // 2^52 + 2^51. |
120 | |.define TONUM, f31 // 2^52 + 2^51 + 2^31. | 132 | |.define TONUM, f31 // 2^52 + 2^51 + 2^31. |
133 | |.endif | ||
121 | | | 134 | | |
122 | |// The following temporaries are not saved across C calls, except for RA. | 135 | |// The following temporaries are not saved across C calls, except for RA. |
123 | |.define RA, r20 // Callee-save. | 136 | |.define RA, r20 // Callee-save. |
@@ -133,6 +146,7 @@ | |||
133 | | | 146 | | |
134 | |// Saved temporaries. | 147 | |// Saved temporaries. |
135 | |.define SAVE0, r21 | 148 | |.define SAVE0, r21 |
149 | |.define SAVE1, r25 | ||
136 | | | 150 | | |
137 | |// Calling conventions. | 151 | |// Calling conventions. |
138 | |.define CARG1, r3 | 152 | |.define CARG1, r3 |
@@ -141,8 +155,10 @@ | |||
141 | |.define CARG4, r6 // Overlaps TMP3. | 155 | |.define CARG4, r6 // Overlaps TMP3. |
142 | |.define CARG5, r7 // Overlaps INS. | 156 | |.define CARG5, r7 // Overlaps INS. |
143 | | | 157 | | |
158 | |.if FPU | ||
144 | |.define FARG1, f1 | 159 | |.define FARG1, f1 |
145 | |.define FARG2, f2 | 160 | |.define FARG2, f2 |
161 | |.endif | ||
146 | | | 162 | | |
147 | |.define CRET1, r3 | 163 | |.define CRET1, r3 |
148 | |.define CRET2, r4 | 164 | |.define CRET2, r4 |
@@ -213,10 +229,16 @@ | |||
213 | |.endif | 229 | |.endif |
214 | |.else | 230 | |.else |
215 | | | 231 | | |
232 | |.if FPU | ||
216 | |.define SAVE_LR, 276(sp) | 233 | |.define SAVE_LR, 276(sp) |
217 | |.define CFRAME_SPACE, 272 // Delta for sp. | 234 | |.define CFRAME_SPACE, 272 // Delta for sp. |
218 | |// Back chain for sp: 272(sp) <-- sp entering interpreter | 235 | |// Back chain for sp: 272(sp) <-- sp entering interpreter |
219 | |.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves. | 236 | |.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves. |
237 | |.else | ||
238 | |.define SAVE_LR, 132(sp) | ||
239 | |.define CFRAME_SPACE, 128 // Delta for sp. | ||
240 | |// Back chain for sp: 128(sp) <-- sp entering interpreter | ||
241 | |.endif | ||
220 | |.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves. | 242 | |.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves. |
221 | |.define SAVE_CR, 52(sp) // 32 bit CR save. | 243 | |.define SAVE_CR, 52(sp) // 32 bit CR save. |
222 | |.define SAVE_ERRF, 48(sp) // 32 bit C frame info. | 244 | |.define SAVE_ERRF, 48(sp) // 32 bit C frame info. |
@@ -226,16 +248,25 @@ | |||
226 | |.define SAVE_PC, 32(sp) | 248 | |.define SAVE_PC, 32(sp) |
227 | |.define SAVE_MULTRES, 28(sp) | 249 | |.define SAVE_MULTRES, 28(sp) |
228 | |.define UNUSED1, 24(sp) | 250 | |.define UNUSED1, 24(sp) |
251 | |.if FPU | ||
229 | |.define TMPD_LO, 20(sp) | 252 | |.define TMPD_LO, 20(sp) |
230 | |.define TMPD_HI, 16(sp) | 253 | |.define TMPD_HI, 16(sp) |
231 | |.define TONUM_LO, 12(sp) | 254 | |.define TONUM_LO, 12(sp) |
232 | |.define TONUM_HI, 8(sp) | 255 | |.define TONUM_HI, 8(sp) |
256 | |.else | ||
257 | |.define SFSAVE_4, 20(sp) | ||
258 | |.define SFSAVE_3, 16(sp) | ||
259 | |.define SFSAVE_2, 12(sp) | ||
260 | |.define SFSAVE_1, 8(sp) | ||
261 | |.endif | ||
233 | |// Next frame lr: 4(sp) | 262 | |// Next frame lr: 4(sp) |
234 | |// Back chain for sp: 0(sp) <-- sp while in interpreter | 263 | |// Back chain for sp: 0(sp) <-- sp while in interpreter |
235 | | | 264 | | |
265 | |.if FPU | ||
236 | |.define TMPD_BLO, 23(sp) | 266 | |.define TMPD_BLO, 23(sp) |
237 | |.define TMPD, TMPD_HI | 267 | |.define TMPD, TMPD_HI |
238 | |.define TONUM_D, TONUM_HI | 268 | |.define TONUM_D, TONUM_HI |
269 | |.endif | ||
239 | | | 270 | | |
240 | |.endif | 271 | |.endif |
241 | | | 272 | | |
@@ -245,7 +276,7 @@ | |||
245 | |.else | 276 | |.else |
246 | | stw r..reg, SAVE_GPR_+(reg-14)*4(sp) | 277 | | stw r..reg, SAVE_GPR_+(reg-14)*4(sp) |
247 | |.endif | 278 | |.endif |
248 | | stfd f..reg, SAVE_FPR_+(reg-14)*8(sp) | 279 | | .FPU stfd f..reg, SAVE_FPR_+(reg-14)*8(sp) |
249 | |.endmacro | 280 | |.endmacro |
250 | |.macro rest_, reg | 281 | |.macro rest_, reg |
251 | |.if GPR64 | 282 | |.if GPR64 |
@@ -253,7 +284,7 @@ | |||
253 | |.else | 284 | |.else |
254 | | lwz r..reg, SAVE_GPR_+(reg-14)*4(sp) | 285 | | lwz r..reg, SAVE_GPR_+(reg-14)*4(sp) |
255 | |.endif | 286 | |.endif |
256 | | lfd f..reg, SAVE_FPR_+(reg-14)*8(sp) | 287 | | .FPU lfd f..reg, SAVE_FPR_+(reg-14)*8(sp) |
257 | |.endmacro | 288 | |.endmacro |
258 | | | 289 | | |
259 | |.macro saveregs | 290 | |.macro saveregs |
@@ -316,19 +347,14 @@ | |||
316 | |.type NODE, Node | 347 | |.type NODE, Node |
317 | |.type NARGS8, int | 348 | |.type NARGS8, int |
318 | |.type TRACE, GCtrace | 349 | |.type TRACE, GCtrace |
350 | |.type SBUF, SBuf | ||
319 | | | 351 | | |
320 | |//----------------------------------------------------------------------- | 352 | |//----------------------------------------------------------------------- |
321 | | | 353 | | |
322 | |// These basic macros should really be part of DynASM. | ||
323 | |.macro srwi, rx, ry, n; rlwinm rx, ry, 32-n, n, 31; .endmacro | ||
324 | |.macro slwi, rx, ry, n; rlwinm rx, ry, n, 0, 31-n; .endmacro | ||
325 | |.macro rotlwi, rx, ry, n; rlwinm rx, ry, n, 0, 31; .endmacro | ||
326 | |.macro rotlw, rx, ry, rn; rlwnm rx, ry, rn, 0, 31; .endmacro | ||
327 | |.macro subi, rx, ry, i; addi rx, ry, -i; .endmacro | ||
328 | | | ||
329 | |// Trap for not-yet-implemented parts. | 354 | |// Trap for not-yet-implemented parts. |
330 | |.macro NYI; tw 4, sp, sp; .endmacro | 355 | |.macro NYI; tw 4, sp, sp; .endmacro |
331 | | | 356 | | |
357 | |.if FPU | ||
332 | |// int/FP conversions. | 358 | |// int/FP conversions. |
333 | |.macro tonum_i, freg, reg | 359 | |.macro tonum_i, freg, reg |
334 | | xoris reg, reg, 0x8000 | 360 | | xoris reg, reg, 0x8000 |
@@ -352,6 +378,7 @@ | |||
352 | |.macro toint, reg, freg | 378 | |.macro toint, reg, freg |
353 | | toint reg, freg, freg | 379 | | toint reg, freg, freg |
354 | |.endmacro | 380 | |.endmacro |
381 | |.endif | ||
355 | | | 382 | | |
356 | |//----------------------------------------------------------------------- | 383 | |//----------------------------------------------------------------------- |
357 | | | 384 | | |
@@ -539,9 +566,19 @@ static void build_subroutines(BuildCtx *ctx) | |||
539 | | beq >2 | 566 | | beq >2 |
540 | |1: | 567 | |1: |
541 | | addic. TMP1, TMP1, -8 | 568 | | addic. TMP1, TMP1, -8 |
569 | |.if FPU | ||
542 | | lfd f0, 0(RA) | 570 | | lfd f0, 0(RA) |
571 | |.else | ||
572 | | lwz CARG1, 0(RA) | ||
573 | | lwz CARG2, 4(RA) | ||
574 | |.endif | ||
543 | | addi RA, RA, 8 | 575 | | addi RA, RA, 8 |
576 | |.if FPU | ||
544 | | stfd f0, 0(BASE) | 577 | | stfd f0, 0(BASE) |
578 | |.else | ||
579 | | stw CARG1, 0(BASE) | ||
580 | | stw CARG2, 4(BASE) | ||
581 | |.endif | ||
545 | | addi BASE, BASE, 8 | 582 | | addi BASE, BASE, 8 |
546 | | bney <1 | 583 | | bney <1 |
547 | | | 584 | | |
@@ -619,23 +656,23 @@ static void build_subroutines(BuildCtx *ctx) | |||
619 | | .toc ld TOCREG, SAVE_TOC | 656 | | .toc ld TOCREG, SAVE_TOC |
620 | | li TISNUM, LJ_TISNUM // Setup type comparison constants. | 657 | | li TISNUM, LJ_TISNUM // Setup type comparison constants. |
621 | | lp BASE, L->base | 658 | | lp BASE, L->base |
622 | | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | 659 | | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). |
623 | | lwz DISPATCH, L->glref // Setup pointer to dispatch table. | 660 | | lwz DISPATCH, L->glref // Setup pointer to dispatch table. |
624 | | li ZERO, 0 | 661 | | li ZERO, 0 |
625 | | stw TMP3, TMPD | 662 | | .FPU stw TMP3, TMPD |
626 | | li TMP1, LJ_TFALSE | 663 | | li TMP1, LJ_TFALSE |
627 | | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). | 664 | | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). |
628 | | li TISNIL, LJ_TNIL | 665 | | li TISNIL, LJ_TNIL |
629 | | li_vmstate INTERP | 666 | | li_vmstate INTERP |
630 | | lfs TOBIT, TMPD | 667 | | .FPU lfs TOBIT, TMPD |
631 | | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame. | 668 | | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame. |
632 | | la RA, -8(BASE) // Results start at BASE-8. | 669 | | la RA, -8(BASE) // Results start at BASE-8. |
633 | | stw TMP3, TMPD | 670 | | .FPU stw TMP3, TMPD |
634 | | addi DISPATCH, DISPATCH, GG_G2DISP | 671 | | addi DISPATCH, DISPATCH, GG_G2DISP |
635 | | stw TMP1, 0(RA) // Prepend false to error message. | 672 | | stw TMP1, 0(RA) // Prepend false to error message. |
636 | | li RD, 16 // 2 results: false + error message. | 673 | | li RD, 16 // 2 results: false + error message. |
637 | | st_vmstate | 674 | | st_vmstate |
638 | | lfs TONUM, TMPD | 675 | | .FPU lfs TONUM, TMPD |
639 | | b ->vm_returnc | 676 | | b ->vm_returnc |
640 | | | 677 | | |
641 | |//----------------------------------------------------------------------- | 678 | |//----------------------------------------------------------------------- |
@@ -684,33 +721,34 @@ static void build_subroutines(BuildCtx *ctx) | |||
684 | | stw CARG3, SAVE_NRES | 721 | | stw CARG3, SAVE_NRES |
685 | | cmplwi TMP1, 0 | 722 | | cmplwi TMP1, 0 |
686 | | stw CARG3, SAVE_ERRF | 723 | | stw CARG3, SAVE_ERRF |
687 | | stp TMP0, L->cframe | ||
688 | | stp CARG3, SAVE_CFRAME | 724 | | stp CARG3, SAVE_CFRAME |
689 | | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. | 725 | | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. |
726 | | stp TMP0, L->cframe | ||
690 | | beq >3 | 727 | | beq >3 |
691 | | | 728 | | |
692 | | // Resume after yield (like a return). | 729 | | // Resume after yield (like a return). |
730 | | stw L, DISPATCH_GL(cur_L)(DISPATCH) | ||
693 | | mr RA, BASE | 731 | | mr RA, BASE |
694 | | lp BASE, L->base | 732 | | lp BASE, L->base |
695 | | li TISNUM, LJ_TISNUM // Setup type comparison constants. | 733 | | li TISNUM, LJ_TISNUM // Setup type comparison constants. |
696 | | lp TMP1, L->top | 734 | | lp TMP1, L->top |
697 | | lwz PC, FRAME_PC(BASE) | 735 | | lwz PC, FRAME_PC(BASE) |
698 | | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | 736 | | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). |
699 | | stb CARG3, L->status | 737 | | stb CARG3, L->status |
700 | | stw TMP3, TMPD | 738 | | .FPU stw TMP3, TMPD |
701 | | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). | 739 | | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). |
702 | | lfs TOBIT, TMPD | 740 | | .FPU lfs TOBIT, TMPD |
703 | | sub RD, TMP1, BASE | 741 | | sub RD, TMP1, BASE |
704 | | stw TMP3, TMPD | 742 | | .FPU stw TMP3, TMPD |
705 | | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) | 743 | | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) |
706 | | addi RD, RD, 8 | 744 | | addi RD, RD, 8 |
707 | | stw TMP0, TONUM_HI | 745 | | .FPU stw TMP0, TONUM_HI |
708 | | li_vmstate INTERP | 746 | | li_vmstate INTERP |
709 | | li ZERO, 0 | 747 | | li ZERO, 0 |
710 | | st_vmstate | 748 | | st_vmstate |
711 | | andix. TMP0, PC, FRAME_TYPE | 749 | | andix. TMP0, PC, FRAME_TYPE |
712 | | mr MULTRES, RD | 750 | | mr MULTRES, RD |
713 | | lfs TONUM, TMPD | 751 | | .FPU lfs TONUM, TMPD |
714 | | li TISNIL, LJ_TNIL | 752 | | li TISNIL, LJ_TNIL |
715 | | beq ->BC_RET_Z | 753 | | beq ->BC_RET_Z |
716 | | b ->vm_return | 754 | | b ->vm_return |
@@ -729,33 +767,34 @@ static void build_subroutines(BuildCtx *ctx) | |||
729 | | | 767 | | |
730 | |1: // Entry point for vm_pcall above (PC = ftype). | 768 | |1: // Entry point for vm_pcall above (PC = ftype). |
731 | | lp TMP1, L:CARG1->cframe | 769 | | lp TMP1, L:CARG1->cframe |
732 | | stw CARG3, SAVE_NRES | ||
733 | | mr L, CARG1 | 770 | | mr L, CARG1 |
734 | | stw CARG1, SAVE_L | 771 | | stw CARG3, SAVE_NRES |
735 | | mr BASE, CARG2 | ||
736 | | stp sp, L->cframe // Add our C frame to cframe chain. | ||
737 | | lwz DISPATCH, L->glref // Setup pointer to dispatch table. | 772 | | lwz DISPATCH, L->glref // Setup pointer to dispatch table. |
773 | | stw CARG1, SAVE_L | ||
774 | | mr BASE, CARG2 | ||
775 | | addi DISPATCH, DISPATCH, GG_G2DISP | ||
738 | | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. | 776 | | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. |
739 | | stp TMP1, SAVE_CFRAME | 777 | | stp TMP1, SAVE_CFRAME |
740 | | addi DISPATCH, DISPATCH, GG_G2DISP | 778 | | stp sp, L->cframe // Add our C frame to cframe chain. |
741 | | | 779 | | |
742 | |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). | 780 | |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). |
781 | | stw L, DISPATCH_GL(cur_L)(DISPATCH) | ||
743 | | lp TMP2, L->base // TMP2 = old base (used in vmeta_call). | 782 | | lp TMP2, L->base // TMP2 = old base (used in vmeta_call). |
744 | | li TISNUM, LJ_TISNUM // Setup type comparison constants. | 783 | | li TISNUM, LJ_TISNUM // Setup type comparison constants. |
745 | | lp TMP1, L->top | 784 | | lp TMP1, L->top |
746 | | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | 785 | | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). |
747 | | add PC, PC, BASE | 786 | | add PC, PC, BASE |
748 | | stw TMP3, TMPD | 787 | | .FPU stw TMP3, TMPD |
749 | | li ZERO, 0 | 788 | | li ZERO, 0 |
750 | | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). | 789 | | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). |
751 | | lfs TOBIT, TMPD | 790 | | .FPU lfs TOBIT, TMPD |
752 | | sub PC, PC, TMP2 // PC = frame delta + frame type | 791 | | sub PC, PC, TMP2 // PC = frame delta + frame type |
753 | | stw TMP3, TMPD | 792 | | .FPU stw TMP3, TMPD |
754 | | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) | 793 | | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) |
755 | | sub NARGS8:RC, TMP1, BASE | 794 | | sub NARGS8:RC, TMP1, BASE |
756 | | stw TMP0, TONUM_HI | 795 | | .FPU stw TMP0, TONUM_HI |
757 | | li_vmstate INTERP | 796 | | li_vmstate INTERP |
758 | | lfs TONUM, TMPD | 797 | | .FPU lfs TONUM, TMPD |
759 | | li TISNIL, LJ_TNIL | 798 | | li TISNIL, LJ_TNIL |
760 | | st_vmstate | 799 | | st_vmstate |
761 | | | 800 | | |
@@ -776,15 +815,18 @@ static void build_subroutines(BuildCtx *ctx) | |||
776 | | lwz TMP0, L:CARG1->stack | 815 | | lwz TMP0, L:CARG1->stack |
777 | | stw CARG1, SAVE_L | 816 | | stw CARG1, SAVE_L |
778 | | lp TMP1, L->top | 817 | | lp TMP1, L->top |
818 | | lwz DISPATCH, L->glref // Setup pointer to dispatch table. | ||
779 | | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. | 819 | | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. |
780 | | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). | 820 | | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). |
781 | | lp TMP1, L->cframe | 821 | | lp TMP1, L->cframe |
782 | | stp sp, L->cframe // Add our C frame to cframe chain. | 822 | | addi DISPATCH, DISPATCH, GG_G2DISP |
783 | | .toc lp CARG4, 0(CARG4) | 823 | | .toc lp CARG4, 0(CARG4) |
784 | | li TMP2, 0 | 824 | | li TMP2, 0 |
785 | | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. | 825 | | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. |
786 | | stw TMP2, SAVE_ERRF // No error function. | 826 | | stw TMP2, SAVE_ERRF // No error function. |
787 | | stp TMP1, SAVE_CFRAME | 827 | | stp TMP1, SAVE_CFRAME |
828 | | stp sp, L->cframe // Add our C frame to cframe chain. | ||
829 | | stw L, DISPATCH_GL(cur_L)(DISPATCH) | ||
788 | | mtctr CARG4 | 830 | | mtctr CARG4 |
789 | | bctrl // (lua_State *L, lua_CFunction func, void *ud) | 831 | | bctrl // (lua_State *L, lua_CFunction func, void *ud) |
790 | |.if PPE | 832 | |.if PPE |
@@ -793,9 +835,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
793 | |.else | 835 | |.else |
794 | | mr. BASE, CRET1 | 836 | | mr. BASE, CRET1 |
795 | |.endif | 837 | |.endif |
796 | | lwz DISPATCH, L->glref // Setup pointer to dispatch table. | 838 | | li PC, FRAME_CP |
797 | | li PC, FRAME_CP | ||
798 | | addi DISPATCH, DISPATCH, GG_G2DISP | ||
799 | | bne <3 // Else continue with the call. | 839 | | bne <3 // Else continue with the call. |
800 | | b ->vm_leave_cp // No base? Just remove C frame. | 840 | | b ->vm_leave_cp // No base? Just remove C frame. |
801 | | | 841 | | |
@@ -842,15 +882,30 @@ static void build_subroutines(BuildCtx *ctx) | |||
842 | | lwz INS, -4(PC) | 882 | | lwz INS, -4(PC) |
843 | | subi CARG2, RB, 16 | 883 | | subi CARG2, RB, 16 |
844 | | decode_RB8 SAVE0, INS | 884 | | decode_RB8 SAVE0, INS |
885 | |.if FPU | ||
845 | | lfd f0, 0(RA) | 886 | | lfd f0, 0(RA) |
887 | |.else | ||
888 | | lwz TMP2, 0(RA) | ||
889 | | lwz TMP3, 4(RA) | ||
890 | |.endif | ||
846 | | add TMP1, BASE, SAVE0 | 891 | | add TMP1, BASE, SAVE0 |
847 | | stp BASE, L->base | 892 | | stp BASE, L->base |
848 | | cmplw TMP1, CARG2 | 893 | | cmplw TMP1, CARG2 |
849 | | sub CARG3, CARG2, TMP1 | 894 | | sub CARG3, CARG2, TMP1 |
850 | | decode_RA8 RA, INS | 895 | | decode_RA8 RA, INS |
896 | |.if FPU | ||
851 | | stfd f0, 0(CARG2) | 897 | | stfd f0, 0(CARG2) |
898 | |.else | ||
899 | | stw TMP2, 0(CARG2) | ||
900 | | stw TMP3, 4(CARG2) | ||
901 | |.endif | ||
852 | | bney ->BC_CAT_Z | 902 | | bney ->BC_CAT_Z |
903 | |.if FPU | ||
853 | | stfdx f0, BASE, RA | 904 | | stfdx f0, BASE, RA |
905 | |.else | ||
906 | | stwux TMP2, RA, BASE | ||
907 | | stw TMP3, 4(RA) | ||
908 | |.endif | ||
854 | | b ->cont_nop | 909 | | b ->cont_nop |
855 | | | 910 | | |
856 | |//-- Table indexing metamethods ----------------------------------------- | 911 | |//-- Table indexing metamethods ----------------------------------------- |
@@ -903,9 +958,19 @@ static void build_subroutines(BuildCtx *ctx) | |||
903 | | // Returns TValue * (finished) or NULL (metamethod). | 958 | | // Returns TValue * (finished) or NULL (metamethod). |
904 | | cmplwi CRET1, 0 | 959 | | cmplwi CRET1, 0 |
905 | | beq >3 | 960 | | beq >3 |
961 | |.if FPU | ||
906 | | lfd f0, 0(CRET1) | 962 | | lfd f0, 0(CRET1) |
963 | |.else | ||
964 | | lwz TMP0, 0(CRET1) | ||
965 | | lwz TMP1, 4(CRET1) | ||
966 | |.endif | ||
907 | | ins_next1 | 967 | | ins_next1 |
968 | |.if FPU | ||
908 | | stfdx f0, BASE, RA | 969 | | stfdx f0, BASE, RA |
970 | |.else | ||
971 | | stwux TMP0, RA, BASE | ||
972 | | stw TMP1, 4(RA) | ||
973 | |.endif | ||
909 | | ins_next2 | 974 | | ins_next2 |
910 | | | 975 | | |
911 | |3: // Call __index metamethod. | 976 | |3: // Call __index metamethod. |
@@ -918,6 +983,22 @@ static void build_subroutines(BuildCtx *ctx) | |||
918 | | li NARGS8:RC, 16 // 2 args for func(t, k). | 983 | | li NARGS8:RC, 16 // 2 args for func(t, k). |
919 | | b ->vm_call_dispatch_f | 984 | | b ->vm_call_dispatch_f |
920 | | | 985 | | |
986 | |->vmeta_tgetr: | ||
987 | | bl extern lj_tab_getinth // (GCtab *t, int32_t key) | ||
988 | | // Returns cTValue * or NULL. | ||
989 | | cmplwi CRET1, 0 | ||
990 | | beq >1 | ||
991 | |.if FPU | ||
992 | | lfd f14, 0(CRET1) | ||
993 | |.else | ||
994 | | lwz SAVE0, 0(CRET1) | ||
995 | | lwz SAVE1, 4(CRET1) | ||
996 | |.endif | ||
997 | | b ->BC_TGETR_Z | ||
998 | |1: | ||
999 | | stwx TISNIL, BASE, RA | ||
1000 | | b ->cont_nop | ||
1001 | | | ||
921 | |//----------------------------------------------------------------------- | 1002 | |//----------------------------------------------------------------------- |
922 | | | 1003 | | |
923 | |->vmeta_tsets1: | 1004 | |->vmeta_tsets1: |
@@ -967,11 +1048,21 @@ static void build_subroutines(BuildCtx *ctx) | |||
967 | | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) | 1048 | | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) |
968 | | // Returns TValue * (finished) or NULL (metamethod). | 1049 | | // Returns TValue * (finished) or NULL (metamethod). |
969 | | cmplwi CRET1, 0 | 1050 | | cmplwi CRET1, 0 |
1051 | |.if FPU | ||
970 | | lfdx f0, BASE, RA | 1052 | | lfdx f0, BASE, RA |
1053 | |.else | ||
1054 | | lwzux TMP2, RA, BASE | ||
1055 | | lwz TMP3, 4(RA) | ||
1056 | |.endif | ||
971 | | beq >3 | 1057 | | beq >3 |
972 | | // NOBARRIER: lj_meta_tset ensures the table is not black. | 1058 | | // NOBARRIER: lj_meta_tset ensures the table is not black. |
973 | | ins_next1 | 1059 | | ins_next1 |
1060 | |.if FPU | ||
974 | | stfd f0, 0(CRET1) | 1061 | | stfd f0, 0(CRET1) |
1062 | |.else | ||
1063 | | stw TMP2, 0(CRET1) | ||
1064 | | stw TMP3, 4(CRET1) | ||
1065 | |.endif | ||
975 | | ins_next2 | 1066 | | ins_next2 |
976 | | | 1067 | | |
977 | |3: // Call __newindex metamethod. | 1068 | |3: // Call __newindex metamethod. |
@@ -982,9 +1073,27 @@ static void build_subroutines(BuildCtx *ctx) | |||
982 | | add PC, TMP1, BASE | 1073 | | add PC, TMP1, BASE |
983 | | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. | 1074 | | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. |
984 | | li NARGS8:RC, 24 // 3 args for func(t, k, v) | 1075 | | li NARGS8:RC, 24 // 3 args for func(t, k, v) |
1076 | |.if FPU | ||
985 | | stfd f0, 16(BASE) // Copy value to third argument. | 1077 | | stfd f0, 16(BASE) // Copy value to third argument. |
1078 | |.else | ||
1079 | | stw TMP2, 16(BASE) | ||
1080 | | stw TMP3, 20(BASE) | ||
1081 | |.endif | ||
986 | | b ->vm_call_dispatch_f | 1082 | | b ->vm_call_dispatch_f |
987 | | | 1083 | | |
1084 | |->vmeta_tsetr: | ||
1085 | | stp BASE, L->base | ||
1086 | | stw PC, SAVE_PC | ||
1087 | | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) | ||
1088 | | // Returns TValue *. | ||
1089 | |.if FPU | ||
1090 | | stfd f14, 0(CRET1) | ||
1091 | |.else | ||
1092 | | stw SAVE0, 0(CRET1) | ||
1093 | | stw SAVE1, 4(CRET1) | ||
1094 | |.endif | ||
1095 | | b ->cont_nop | ||
1096 | | | ||
988 | |//-- Comparison metamethods --------------------------------------------- | 1097 | |//-- Comparison metamethods --------------------------------------------- |
989 | | | 1098 | | |
990 | |->vmeta_comp: | 1099 | |->vmeta_comp: |
@@ -1021,9 +1130,19 @@ static void build_subroutines(BuildCtx *ctx) | |||
1021 | | | 1130 | | |
1022 | |->cont_ra: // RA = resultptr | 1131 | |->cont_ra: // RA = resultptr |
1023 | | lwz INS, -4(PC) | 1132 | | lwz INS, -4(PC) |
1133 | |.if FPU | ||
1024 | | lfd f0, 0(RA) | 1134 | | lfd f0, 0(RA) |
1135 | |.else | ||
1136 | | lwz CARG1, 0(RA) | ||
1137 | | lwz CARG2, 4(RA) | ||
1138 | |.endif | ||
1025 | | decode_RA8 TMP1, INS | 1139 | | decode_RA8 TMP1, INS |
1140 | |.if FPU | ||
1026 | | stfdx f0, BASE, TMP1 | 1141 | | stfdx f0, BASE, TMP1 |
1142 | |.else | ||
1143 | | stwux CARG1, TMP1, BASE | ||
1144 | | stw CARG2, 4(TMP1) | ||
1145 | |.endif | ||
1027 | | b ->cont_nop | 1146 | | b ->cont_nop |
1028 | | | 1147 | | |
1029 | |->cont_condt: // RA = resultptr | 1148 | |->cont_condt: // RA = resultptr |
@@ -1063,6 +1182,16 @@ static void build_subroutines(BuildCtx *ctx) | |||
1063 | | b <3 | 1182 | | b <3 |
1064 | |.endif | 1183 | |.endif |
1065 | | | 1184 | | |
1185 | |->vmeta_istype: | ||
1186 | | subi PC, PC, 4 | ||
1187 | | stp BASE, L->base | ||
1188 | | srwi CARG2, RA, 3 | ||
1189 | | mr CARG1, L | ||
1190 | | srwi CARG3, RD, 3 | ||
1191 | | stw PC, SAVE_PC | ||
1192 | | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) | ||
1193 | | b ->cont_nop | ||
1194 | | | ||
1066 | |//-- Arithmetic metamethods --------------------------------------------- | 1195 | |//-- Arithmetic metamethods --------------------------------------------- |
1067 | | | 1196 | | |
1068 | |->vmeta_arith_nv: | 1197 | |->vmeta_arith_nv: |
@@ -1219,22 +1348,32 @@ static void build_subroutines(BuildCtx *ctx) | |||
1219 | |.macro .ffunc_n, name | 1348 | |.macro .ffunc_n, name |
1220 | |->ff_ .. name: | 1349 | |->ff_ .. name: |
1221 | | cmplwi NARGS8:RC, 8 | 1350 | | cmplwi NARGS8:RC, 8 |
1222 | | lwz CARG3, 0(BASE) | 1351 | | lwz CARG1, 0(BASE) |
1352 | |.if FPU | ||
1223 | | lfd FARG1, 0(BASE) | 1353 | | lfd FARG1, 0(BASE) |
1354 | |.else | ||
1355 | | lwz CARG2, 4(BASE) | ||
1356 | |.endif | ||
1224 | | blt ->fff_fallback | 1357 | | blt ->fff_fallback |
1225 | | checknum CARG3; bge ->fff_fallback | 1358 | | checknum CARG1; bge ->fff_fallback |
1226 | |.endmacro | 1359 | |.endmacro |
1227 | | | 1360 | | |
1228 | |.macro .ffunc_nn, name | 1361 | |.macro .ffunc_nn, name |
1229 | |->ff_ .. name: | 1362 | |->ff_ .. name: |
1230 | | cmplwi NARGS8:RC, 16 | 1363 | | cmplwi NARGS8:RC, 16 |
1231 | | lwz CARG3, 0(BASE) | 1364 | | lwz CARG1, 0(BASE) |
1365 | |.if FPU | ||
1232 | | lfd FARG1, 0(BASE) | 1366 | | lfd FARG1, 0(BASE) |
1233 | | lwz CARG4, 8(BASE) | 1367 | | lwz CARG3, 8(BASE) |
1234 | | lfd FARG2, 8(BASE) | 1368 | | lfd FARG2, 8(BASE) |
1369 | |.else | ||
1370 | | lwz CARG2, 4(BASE) | ||
1371 | | lwz CARG3, 8(BASE) | ||
1372 | | lwz CARG4, 12(BASE) | ||
1373 | |.endif | ||
1235 | | blt ->fff_fallback | 1374 | | blt ->fff_fallback |
1375 | | checknum CARG1; bge ->fff_fallback | ||
1236 | | checknum CARG3; bge ->fff_fallback | 1376 | | checknum CARG3; bge ->fff_fallback |
1237 | | checknum CARG4; bge ->fff_fallback | ||
1238 | |.endmacro | 1377 | |.endmacro |
1239 | | | 1378 | | |
1240 | |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1. | 1379 | |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1. |
@@ -1255,14 +1394,21 @@ static void build_subroutines(BuildCtx *ctx) | |||
1255 | | bge cr1, ->fff_fallback | 1394 | | bge cr1, ->fff_fallback |
1256 | | stw CARG3, 0(RA) | 1395 | | stw CARG3, 0(RA) |
1257 | | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8. | 1396 | | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8. |
1397 | | addi TMP1, BASE, 8 | ||
1398 | | add TMP2, RA, NARGS8:RC | ||
1258 | | stw CARG1, 4(RA) | 1399 | | stw CARG1, 4(RA) |
1259 | | beq ->fff_res // Done if exactly 1 argument. | 1400 | | beq ->fff_res // Done if exactly 1 argument. |
1260 | | li TMP1, 8 | ||
1261 | | subi RC, RC, 8 | ||
1262 | |1: | 1401 | |1: |
1263 | | cmplw TMP1, RC | 1402 | | cmplw TMP1, TMP2 |
1264 | | lfdx f0, BASE, TMP1 | 1403 | |.if FPU |
1265 | | stfdx f0, RA, TMP1 | 1404 | | lfd f0, 0(TMP1) |
1405 | | stfd f0, 0(TMP1) | ||
1406 | |.else | ||
1407 | | lwz CARG1, 0(TMP1) | ||
1408 | | lwz CARG2, 4(TMP1) | ||
1409 | | stw CARG1, -8(TMP1) | ||
1410 | | stw CARG2, -4(TMP1) | ||
1411 | |.endif | ||
1266 | | addi TMP1, TMP1, 8 | 1412 | | addi TMP1, TMP1, 8 |
1267 | | bney <1 | 1413 | | bney <1 |
1268 | | b ->fff_res | 1414 | | b ->fff_res |
@@ -1277,8 +1423,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
1277 | | orc TMP1, TMP2, TMP0 | 1423 | | orc TMP1, TMP2, TMP0 |
1278 | | addi TMP1, TMP1, ~LJ_TISNUM+1 | 1424 | | addi TMP1, TMP1, ~LJ_TISNUM+1 |
1279 | | slwi TMP1, TMP1, 3 | 1425 | | slwi TMP1, TMP1, 3 |
1426 | |.if FPU | ||
1280 | | la TMP2, CFUNC:RB->upvalue | 1427 | | la TMP2, CFUNC:RB->upvalue |
1281 | | lfdx FARG1, TMP2, TMP1 | 1428 | | lfdx FARG1, TMP2, TMP1 |
1429 | |.else | ||
1430 | | add TMP1, CFUNC:RB, TMP1 | ||
1431 | | lwz CARG1, CFUNC:TMP1->upvalue[0].u32.hi | ||
1432 | | lwz CARG2, CFUNC:TMP1->upvalue[0].u32.lo | ||
1433 | |.endif | ||
1282 | | b ->fff_resn | 1434 | | b ->fff_resn |
1283 | | | 1435 | | |
1284 | |//-- Base library: getters and setters --------------------------------- | 1436 | |//-- Base library: getters and setters --------------------------------- |
@@ -1356,7 +1508,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
1356 | | mr CARG1, L | 1508 | | mr CARG1, L |
1357 | | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) | 1509 | | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) |
1358 | | // Returns cTValue *. | 1510 | | // Returns cTValue *. |
1511 | |.if FPU | ||
1359 | | lfd FARG1, 0(CRET1) | 1512 | | lfd FARG1, 0(CRET1) |
1513 | |.else | ||
1514 | | lwz CARG2, 4(CRET1) | ||
1515 | | lwz CARG1, 0(CRET1) // Caveat: CARG1 == CRET1. | ||
1516 | |.endif | ||
1360 | | b ->fff_resn | 1517 | | b ->fff_resn |
1361 | | | 1518 | | |
1362 | |//-- Base library: conversions ------------------------------------------ | 1519 | |//-- Base library: conversions ------------------------------------------ |
@@ -1365,7 +1522,11 @@ static void build_subroutines(BuildCtx *ctx) | |||
1365 | | // Only handles the number case inline (without a base argument). | 1522 | | // Only handles the number case inline (without a base argument). |
1366 | | cmplwi NARGS8:RC, 8 | 1523 | | cmplwi NARGS8:RC, 8 |
1367 | | lwz CARG1, 0(BASE) | 1524 | | lwz CARG1, 0(BASE) |
1525 | |.if FPU | ||
1368 | | lfd FARG1, 0(BASE) | 1526 | | lfd FARG1, 0(BASE) |
1527 | |.else | ||
1528 | | lwz CARG2, 4(BASE) | ||
1529 | |.endif | ||
1369 | | bne ->fff_fallback // Exactly one argument. | 1530 | | bne ->fff_fallback // Exactly one argument. |
1370 | | checknum CARG1; bgt ->fff_fallback | 1531 | | checknum CARG1; bgt ->fff_fallback |
1371 | | b ->fff_resn | 1532 | | b ->fff_resn |
@@ -1387,9 +1548,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
1387 | | mr CARG1, L | 1548 | | mr CARG1, L |
1388 | | mr CARG2, BASE | 1549 | | mr CARG2, BASE |
1389 | |.if DUALNUM | 1550 | |.if DUALNUM |
1390 | | bl extern lj_str_fromnumber // (lua_State *L, cTValue *o) | 1551 | | bl extern lj_strfmt_number // (lua_State *L, cTValue *o) |
1391 | |.else | 1552 | |.else |
1392 | | bl extern lj_str_fromnum // (lua_State *L, lua_Number *np) | 1553 | | bl extern lj_strfmt_num // (lua_State *L, lua_Number *np) |
1393 | |.endif | 1554 | |.endif |
1394 | | // Returns GCstr *. | 1555 | | // Returns GCstr *. |
1395 | | li CARG3, LJ_TSTR | 1556 | | li CARG3, LJ_TSTR |
@@ -1416,12 +1577,23 @@ static void build_subroutines(BuildCtx *ctx) | |||
1416 | | cmplwi CRET1, 0 | 1577 | | cmplwi CRET1, 0 |
1417 | | li CARG3, LJ_TNIL | 1578 | | li CARG3, LJ_TNIL |
1418 | | beq ->fff_restv // End of traversal: return nil. | 1579 | | beq ->fff_restv // End of traversal: return nil. |
1419 | | lfd f0, 8(BASE) // Copy key and value to results. | ||
1420 | | la RA, -8(BASE) | 1580 | | la RA, -8(BASE) |
1581 | |.if FPU | ||
1582 | | lfd f0, 8(BASE) // Copy key and value to results. | ||
1421 | | lfd f1, 16(BASE) | 1583 | | lfd f1, 16(BASE) |
1422 | | stfd f0, 0(RA) | 1584 | | stfd f0, 0(RA) |
1423 | | li RD, (2+1)*8 | ||
1424 | | stfd f1, 8(RA) | 1585 | | stfd f1, 8(RA) |
1586 | |.else | ||
1587 | | lwz CARG1, 8(BASE) | ||
1588 | | lwz CARG2, 12(BASE) | ||
1589 | | lwz CARG3, 16(BASE) | ||
1590 | | lwz CARG4, 20(BASE) | ||
1591 | | stw CARG1, 0(RA) | ||
1592 | | stw CARG2, 4(RA) | ||
1593 | | stw CARG3, 8(RA) | ||
1594 | | stw CARG4, 12(RA) | ||
1595 | |.endif | ||
1596 | | li RD, (2+1)*8 | ||
1425 | | b ->fff_res | 1597 | | b ->fff_res |
1426 | | | 1598 | | |
1427 | |.ffunc_1 pairs | 1599 | |.ffunc_1 pairs |
@@ -1430,17 +1602,32 @@ static void build_subroutines(BuildCtx *ctx) | |||
1430 | | bne ->fff_fallback | 1602 | | bne ->fff_fallback |
1431 | #if LJ_52 | 1603 | #if LJ_52 |
1432 | | lwz TAB:TMP2, TAB:CARG1->metatable | 1604 | | lwz TAB:TMP2, TAB:CARG1->metatable |
1605 | |.if FPU | ||
1433 | | lfd f0, CFUNC:RB->upvalue[0] | 1606 | | lfd f0, CFUNC:RB->upvalue[0] |
1607 | |.else | ||
1608 | | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi | ||
1609 | | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo | ||
1610 | |.endif | ||
1434 | | cmplwi TAB:TMP2, 0 | 1611 | | cmplwi TAB:TMP2, 0 |
1435 | | la RA, -8(BASE) | 1612 | | la RA, -8(BASE) |
1436 | | bne ->fff_fallback | 1613 | | bne ->fff_fallback |
1437 | #else | 1614 | #else |
1615 | |.if FPU | ||
1438 | | lfd f0, CFUNC:RB->upvalue[0] | 1616 | | lfd f0, CFUNC:RB->upvalue[0] |
1617 | |.else | ||
1618 | | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi | ||
1619 | | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo | ||
1620 | |.endif | ||
1439 | | la RA, -8(BASE) | 1621 | | la RA, -8(BASE) |
1440 | #endif | 1622 | #endif |
1441 | | stw TISNIL, 8(BASE) | 1623 | | stw TISNIL, 8(BASE) |
1442 | | li RD, (3+1)*8 | 1624 | | li RD, (3+1)*8 |
1625 | |.if FPU | ||
1443 | | stfd f0, 0(RA) | 1626 | | stfd f0, 0(RA) |
1627 | |.else | ||
1628 | | stw TMP0, 0(RA) | ||
1629 | | stw TMP1, 4(RA) | ||
1630 | |.endif | ||
1444 | | b ->fff_res | 1631 | | b ->fff_res |
1445 | | | 1632 | | |
1446 | |.ffunc ipairs_aux | 1633 | |.ffunc ipairs_aux |
@@ -1486,14 +1673,24 @@ static void build_subroutines(BuildCtx *ctx) | |||
1486 | | stfd FARG2, 0(RA) | 1673 | | stfd FARG2, 0(RA) |
1487 | |.endif | 1674 | |.endif |
1488 | | ble >2 // Not in array part? | 1675 | | ble >2 // Not in array part? |
1676 | |.if FPU | ||
1489 | | lwzx TMP2, TMP1, TMP3 | 1677 | | lwzx TMP2, TMP1, TMP3 |
1490 | | lfdx f0, TMP1, TMP3 | 1678 | | lfdx f0, TMP1, TMP3 |
1679 | |.else | ||
1680 | | lwzux TMP2, TMP1, TMP3 | ||
1681 | | lwz TMP3, 4(TMP1) | ||
1682 | |.endif | ||
1491 | |1: | 1683 | |1: |
1492 | | checknil TMP2 | 1684 | | checknil TMP2 |
1493 | | li RD, (0+1)*8 | 1685 | | li RD, (0+1)*8 |
1494 | | beq ->fff_res // End of iteration, return 0 results. | 1686 | | beq ->fff_res // End of iteration, return 0 results. |
1495 | | li RD, (2+1)*8 | 1687 | | li RD, (2+1)*8 |
1688 | |.if FPU | ||
1496 | | stfd f0, 8(RA) | 1689 | | stfd f0, 8(RA) |
1690 | |.else | ||
1691 | | stw TMP2, 8(RA) | ||
1692 | | stw TMP3, 12(RA) | ||
1693 | |.endif | ||
1497 | | b ->fff_res | 1694 | | b ->fff_res |
1498 | |2: // Check for empty hash part first. Otherwise call C function. | 1695 | |2: // Check for empty hash part first. Otherwise call C function. |
1499 | | lwz TMP0, TAB:CARG1->hmask | 1696 | | lwz TMP0, TAB:CARG1->hmask |
@@ -1507,7 +1704,11 @@ static void build_subroutines(BuildCtx *ctx) | |||
1507 | | li RD, (0+1)*8 | 1704 | | li RD, (0+1)*8 |
1508 | | beq ->fff_res | 1705 | | beq ->fff_res |
1509 | | lwz TMP2, 0(CRET1) | 1706 | | lwz TMP2, 0(CRET1) |
1707 | |.if FPU | ||
1510 | | lfd f0, 0(CRET1) | 1708 | | lfd f0, 0(CRET1) |
1709 | |.else | ||
1710 | | lwz TMP3, 4(CRET1) | ||
1711 | |.endif | ||
1511 | | b <1 | 1712 | | b <1 |
1512 | | | 1713 | | |
1513 | |.ffunc_1 ipairs | 1714 | |.ffunc_1 ipairs |
@@ -1516,12 +1717,22 @@ static void build_subroutines(BuildCtx *ctx) | |||
1516 | | bne ->fff_fallback | 1717 | | bne ->fff_fallback |
1517 | #if LJ_52 | 1718 | #if LJ_52 |
1518 | | lwz TAB:TMP2, TAB:CARG1->metatable | 1719 | | lwz TAB:TMP2, TAB:CARG1->metatable |
1720 | |.if FPU | ||
1519 | | lfd f0, CFUNC:RB->upvalue[0] | 1721 | | lfd f0, CFUNC:RB->upvalue[0] |
1722 | |.else | ||
1723 | | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi | ||
1724 | | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo | ||
1725 | |.endif | ||
1520 | | cmplwi TAB:TMP2, 0 | 1726 | | cmplwi TAB:TMP2, 0 |
1521 | | la RA, -8(BASE) | 1727 | | la RA, -8(BASE) |
1522 | | bne ->fff_fallback | 1728 | | bne ->fff_fallback |
1523 | #else | 1729 | #else |
1730 | |.if FPU | ||
1524 | | lfd f0, CFUNC:RB->upvalue[0] | 1731 | | lfd f0, CFUNC:RB->upvalue[0] |
1732 | |.else | ||
1733 | | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi | ||
1734 | | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo | ||
1735 | |.endif | ||
1525 | | la RA, -8(BASE) | 1736 | | la RA, -8(BASE) |
1526 | #endif | 1737 | #endif |
1527 | |.if DUALNUM | 1738 | |.if DUALNUM |
@@ -1531,7 +1742,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
1531 | |.endif | 1742 | |.endif |
1532 | | stw ZERO, 12(BASE) | 1743 | | stw ZERO, 12(BASE) |
1533 | | li RD, (3+1)*8 | 1744 | | li RD, (3+1)*8 |
1745 | |.if FPU | ||
1534 | | stfd f0, 0(RA) | 1746 | | stfd f0, 0(RA) |
1747 | |.else | ||
1748 | | stw TMP0, 0(RA) | ||
1749 | | stw TMP1, 4(RA) | ||
1750 | |.endif | ||
1535 | | b ->fff_res | 1751 | | b ->fff_res |
1536 | | | 1752 | | |
1537 | |//-- Base library: catch errors ---------------------------------------- | 1753 | |//-- Base library: catch errors ---------------------------------------- |
@@ -1550,19 +1766,32 @@ static void build_subroutines(BuildCtx *ctx) | |||
1550 | | | 1766 | | |
1551 | |.ffunc xpcall | 1767 | |.ffunc xpcall |
1552 | | cmplwi NARGS8:RC, 16 | 1768 | | cmplwi NARGS8:RC, 16 |
1553 | | lwz CARG4, 8(BASE) | 1769 | | lwz CARG3, 8(BASE) |
1770 | |.if FPU | ||
1554 | | lfd FARG2, 8(BASE) | 1771 | | lfd FARG2, 8(BASE) |
1555 | | lfd FARG1, 0(BASE) | 1772 | | lfd FARG1, 0(BASE) |
1773 | |.else | ||
1774 | | lwz CARG1, 0(BASE) | ||
1775 | | lwz CARG2, 4(BASE) | ||
1776 | | lwz CARG4, 12(BASE) | ||
1777 | |.endif | ||
1556 | | blt ->fff_fallback | 1778 | | blt ->fff_fallback |
1557 | | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH) | 1779 | | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH) |
1558 | | mr TMP2, BASE | 1780 | | mr TMP2, BASE |
1559 | | checkfunc CARG4; bne ->fff_fallback // Traceback must be a function. | 1781 | | checkfunc CARG3; bne ->fff_fallback // Traceback must be a function. |
1560 | | la BASE, 16(BASE) | 1782 | | la BASE, 16(BASE) |
1561 | | // Remember active hook before pcall. | 1783 | | // Remember active hook before pcall. |
1562 | | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31 | 1784 | | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31 |
1785 | |.if FPU | ||
1563 | | stfd FARG2, 0(TMP2) // Swap function and traceback. | 1786 | | stfd FARG2, 0(TMP2) // Swap function and traceback. |
1564 | | subi NARGS8:RC, NARGS8:RC, 16 | ||
1565 | | stfd FARG1, 8(TMP2) | 1787 | | stfd FARG1, 8(TMP2) |
1788 | |.else | ||
1789 | | stw CARG3, 0(TMP2) | ||
1790 | | stw CARG4, 4(TMP2) | ||
1791 | | stw CARG1, 8(TMP2) | ||
1792 | | stw CARG2, 12(TMP2) | ||
1793 | |.endif | ||
1794 | | subi NARGS8:RC, NARGS8:RC, 16 | ||
1566 | | addi PC, TMP1, 16+FRAME_PCALL | 1795 | | addi PC, TMP1, 16+FRAME_PCALL |
1567 | | b ->vm_call_dispatch | 1796 | | b ->vm_call_dispatch |
1568 | | | 1797 | | |
@@ -1605,9 +1834,21 @@ static void build_subroutines(BuildCtx *ctx) | |||
1605 | | stp BASE, L->top | 1834 | | stp BASE, L->top |
1606 | |2: // Move args to coroutine. | 1835 | |2: // Move args to coroutine. |
1607 | | cmpw TMP1, NARGS8:RC | 1836 | | cmpw TMP1, NARGS8:RC |
1837 | |.if FPU | ||
1608 | | lfdx f0, BASE, TMP1 | 1838 | | lfdx f0, BASE, TMP1 |
1839 | |.else | ||
1840 | | add CARG3, BASE, TMP1 | ||
1841 | | lwz TMP2, 0(CARG3) | ||
1842 | | lwz TMP3, 4(CARG3) | ||
1843 | |.endif | ||
1609 | | beq >3 | 1844 | | beq >3 |
1845 | |.if FPU | ||
1610 | | stfdx f0, CARG2, TMP1 | 1846 | | stfdx f0, CARG2, TMP1 |
1847 | |.else | ||
1848 | | add CARG3, CARG2, TMP1 | ||
1849 | | stw TMP2, 0(CARG3) | ||
1850 | | stw TMP3, 4(CARG3) | ||
1851 | |.endif | ||
1611 | | addi TMP1, TMP1, 8 | 1852 | | addi TMP1, TMP1, 8 |
1612 | | b <2 | 1853 | | b <2 |
1613 | |3: | 1854 | |3: |
@@ -1622,6 +1863,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
1622 | | lp TMP3, L:SAVE0->top | 1863 | | lp TMP3, L:SAVE0->top |
1623 | | li_vmstate INTERP | 1864 | | li_vmstate INTERP |
1624 | | lp BASE, L->base | 1865 | | lp BASE, L->base |
1866 | | stw L, DISPATCH_GL(cur_L)(DISPATCH) | ||
1625 | | st_vmstate | 1867 | | st_vmstate |
1626 | | bgt >8 | 1868 | | bgt >8 |
1627 | | sub RD, TMP3, TMP2 | 1869 | | sub RD, TMP3, TMP2 |
@@ -1637,8 +1879,17 @@ static void build_subroutines(BuildCtx *ctx) | |||
1637 | | stp TMP2, L:SAVE0->top // Clear coroutine stack. | 1879 | | stp TMP2, L:SAVE0->top // Clear coroutine stack. |
1638 | |5: // Move results from coroutine. | 1880 | |5: // Move results from coroutine. |
1639 | | cmplw TMP1, TMP3 | 1881 | | cmplw TMP1, TMP3 |
1882 | |.if FPU | ||
1640 | | lfdx f0, TMP2, TMP1 | 1883 | | lfdx f0, TMP2, TMP1 |
1641 | | stfdx f0, BASE, TMP1 | 1884 | | stfdx f0, BASE, TMP1 |
1885 | |.else | ||
1886 | | add CARG3, TMP2, TMP1 | ||
1887 | | lwz CARG1, 0(CARG3) | ||
1888 | | lwz CARG2, 4(CARG3) | ||
1889 | | add CARG3, BASE, TMP1 | ||
1890 | | stw CARG1, 0(CARG3) | ||
1891 | | stw CARG2, 4(CARG3) | ||
1892 | |.endif | ||
1642 | | addi TMP1, TMP1, 8 | 1893 | | addi TMP1, TMP1, 8 |
1643 | | bne <5 | 1894 | | bne <5 |
1644 | |6: | 1895 | |6: |
@@ -1663,12 +1914,22 @@ static void build_subroutines(BuildCtx *ctx) | |||
1663 | | andix. TMP0, PC, FRAME_TYPE | 1914 | | andix. TMP0, PC, FRAME_TYPE |
1664 | | la TMP3, -8(TMP3) | 1915 | | la TMP3, -8(TMP3) |
1665 | | li TMP1, LJ_TFALSE | 1916 | | li TMP1, LJ_TFALSE |
1917 | |.if FPU | ||
1666 | | lfd f0, 0(TMP3) | 1918 | | lfd f0, 0(TMP3) |
1919 | |.else | ||
1920 | | lwz CARG1, 0(TMP3) | ||
1921 | | lwz CARG2, 4(TMP3) | ||
1922 | |.endif | ||
1667 | | stp TMP3, L:SAVE0->top // Remove error from coroutine stack. | 1923 | | stp TMP3, L:SAVE0->top // Remove error from coroutine stack. |
1668 | | li RD, (2+1)*8 | 1924 | | li RD, (2+1)*8 |
1669 | | stw TMP1, -8(BASE) // Prepend false to results. | 1925 | | stw TMP1, -8(BASE) // Prepend false to results. |
1670 | | la RA, -8(BASE) | 1926 | | la RA, -8(BASE) |
1927 | |.if FPU | ||
1671 | | stfd f0, 0(BASE) // Copy error message. | 1928 | | stfd f0, 0(BASE) // Copy error message. |
1929 | |.else | ||
1930 | | stw CARG1, 0(BASE) // Copy error message. | ||
1931 | | stw CARG2, 4(BASE) | ||
1932 | |.endif | ||
1672 | | b <7 | 1933 | | b <7 |
1673 | |.else | 1934 | |.else |
1674 | | mr CARG1, L | 1935 | | mr CARG1, L |
@@ -1847,7 +2108,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
1847 | | lus CARG1, 0x8000 // -(2^31). | 2108 | | lus CARG1, 0x8000 // -(2^31). |
1848 | | beqy ->fff_resi | 2109 | | beqy ->fff_resi |
1849 | |5: | 2110 | |5: |
2111 | |.if FPU | ||
1850 | | lfd FARG1, 0(BASE) | 2112 | | lfd FARG1, 0(BASE) |
2113 | |.else | ||
2114 | | lwz CARG1, 0(BASE) | ||
2115 | | lwz CARG2, 4(BASE) | ||
2116 | |.endif | ||
1851 | | blex func | 2117 | | blex func |
1852 | | b ->fff_resn | 2118 | | b ->fff_resn |
1853 | |.endmacro | 2119 | |.endmacro |
@@ -1871,10 +2137,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
1871 | | | 2137 | | |
1872 | |.ffunc math_log | 2138 | |.ffunc math_log |
1873 | | cmplwi NARGS8:RC, 8 | 2139 | | cmplwi NARGS8:RC, 8 |
1874 | | lwz CARG3, 0(BASE) | 2140 | | lwz CARG1, 0(BASE) |
1875 | | lfd FARG1, 0(BASE) | ||
1876 | | bne ->fff_fallback // Need exactly 1 argument. | 2141 | | bne ->fff_fallback // Need exactly 1 argument. |
1877 | | checknum CARG3; bge ->fff_fallback | 2142 | | checknum CARG1; bge ->fff_fallback |
2143 | |.if FPU | ||
2144 | | lfd FARG1, 0(BASE) | ||
2145 | |.else | ||
2146 | | lwz CARG2, 4(BASE) | ||
2147 | |.endif | ||
1878 | | blex log | 2148 | | blex log |
1879 | | b ->fff_resn | 2149 | | b ->fff_resn |
1880 | | | 2150 | | |
@@ -1893,26 +2163,27 @@ static void build_subroutines(BuildCtx *ctx) | |||
1893 | | math_extern2 atan2 | 2163 | | math_extern2 atan2 |
1894 | | math_extern2 fmod | 2164 | | math_extern2 fmod |
1895 | | | 2165 | | |
1896 | |->ff_math_deg: | ||
1897 | |.ffunc_n math_rad | ||
1898 | | lfd FARG2, CFUNC:RB->upvalue[0] | ||
1899 | | fmul FARG1, FARG1, FARG2 | ||
1900 | | b ->fff_resn | ||
1901 | | | ||
1902 | |.if DUALNUM | 2166 | |.if DUALNUM |
1903 | |.ffunc math_ldexp | 2167 | |.ffunc math_ldexp |
1904 | | cmplwi NARGS8:RC, 16 | 2168 | | cmplwi NARGS8:RC, 16 |
1905 | | lwz CARG3, 0(BASE) | 2169 | | lwz TMP0, 0(BASE) |
2170 | |.if FPU | ||
1906 | | lfd FARG1, 0(BASE) | 2171 | | lfd FARG1, 0(BASE) |
1907 | | lwz CARG4, 8(BASE) | 2172 | |.else |
2173 | | lwz CARG1, 0(BASE) | ||
2174 | | lwz CARG2, 4(BASE) | ||
2175 | |.endif | ||
2176 | | lwz TMP1, 8(BASE) | ||
1908 | |.if GPR64 | 2177 | |.if GPR64 |
1909 | | lwz CARG2, 12(BASE) | 2178 | | lwz CARG2, 12(BASE) |
1910 | |.else | 2179 | |.elif FPU |
1911 | | lwz CARG1, 12(BASE) | 2180 | | lwz CARG1, 12(BASE) |
2181 | |.else | ||
2182 | | lwz CARG3, 12(BASE) | ||
1912 | |.endif | 2183 | |.endif |
1913 | | blt ->fff_fallback | 2184 | | blt ->fff_fallback |
1914 | | checknum CARG3; bge ->fff_fallback | 2185 | | checknum TMP0; bge ->fff_fallback |
1915 | | checknum CARG4; bne ->fff_fallback | 2186 | | checknum TMP1; bne ->fff_fallback |
1916 | |.else | 2187 | |.else |
1917 | |.ffunc_nn math_ldexp | 2188 | |.ffunc_nn math_ldexp |
1918 | |.if GPR64 | 2189 | |.if GPR64 |
@@ -1927,8 +2198,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
1927 | |.ffunc_n math_frexp | 2198 | |.ffunc_n math_frexp |
1928 | |.if GPR64 | 2199 | |.if GPR64 |
1929 | | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) | 2200 | | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) |
1930 | |.else | 2201 | |.elif FPU |
1931 | | la CARG1, DISPATCH_GL(tmptv)(DISPATCH) | 2202 | | la CARG1, DISPATCH_GL(tmptv)(DISPATCH) |
2203 | |.else | ||
2204 | | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) | ||
1932 | |.endif | 2205 | |.endif |
1933 | | lwz PC, FRAME_PC(BASE) | 2206 | | lwz PC, FRAME_PC(BASE) |
1934 | | blex frexp | 2207 | | blex frexp |
@@ -1937,7 +2210,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
1937 | |.if not DUALNUM | 2210 | |.if not DUALNUM |
1938 | | tonum_i FARG2, TMP1 | 2211 | | tonum_i FARG2, TMP1 |
1939 | |.endif | 2212 | |.endif |
2213 | |.if FPU | ||
1940 | | stfd FARG1, 0(RA) | 2214 | | stfd FARG1, 0(RA) |
2215 | |.else | ||
2216 | | stw CRET1, 0(RA) | ||
2217 | | stw CRET2, 4(RA) | ||
2218 | |.endif | ||
1941 | | li RD, (2+1)*8 | 2219 | | li RD, (2+1)*8 |
1942 | |.if DUALNUM | 2220 | |.if DUALNUM |
1943 | | stw TISNUM, 8(RA) | 2221 | | stw TISNUM, 8(RA) |
@@ -1950,13 +2228,20 @@ static void build_subroutines(BuildCtx *ctx) | |||
1950 | |.ffunc_n math_modf | 2228 | |.ffunc_n math_modf |
1951 | |.if GPR64 | 2229 | |.if GPR64 |
1952 | | la CARG2, -8(BASE) | 2230 | | la CARG2, -8(BASE) |
1953 | |.else | 2231 | |.elif FPU |
1954 | | la CARG1, -8(BASE) | 2232 | | la CARG1, -8(BASE) |
2233 | |.else | ||
2234 | | la CARG3, -8(BASE) | ||
1955 | |.endif | 2235 | |.endif |
1956 | | lwz PC, FRAME_PC(BASE) | 2236 | | lwz PC, FRAME_PC(BASE) |
1957 | | blex modf | 2237 | | blex modf |
1958 | | la RA, -8(BASE) | 2238 | | la RA, -8(BASE) |
2239 | |.if FPU | ||
1959 | | stfd FARG1, 0(BASE) | 2240 | | stfd FARG1, 0(BASE) |
2241 | |.else | ||
2242 | | stw CRET1, 0(BASE) | ||
2243 | | stw CRET2, 4(BASE) | ||
2244 | |.endif | ||
1960 | | li RD, (2+1)*8 | 2245 | | li RD, (2+1)*8 |
1961 | | b ->fff_res | 2246 | | b ->fff_res |
1962 | | | 2247 | | |
@@ -1964,13 +2249,13 @@ static void build_subroutines(BuildCtx *ctx) | |||
1964 | |.if DUALNUM | 2249 | |.if DUALNUM |
1965 | | .ffunc_1 name | 2250 | | .ffunc_1 name |
1966 | | checknum CARG3 | 2251 | | checknum CARG3 |
1967 | | addi TMP1, BASE, 8 | 2252 | | addi SAVE0, BASE, 8 |
1968 | | add TMP2, BASE, NARGS8:RC | 2253 | | add SAVE1, BASE, NARGS8:RC |
1969 | | bne >4 | 2254 | | bne >4 |
1970 | |1: // Handle integers. | 2255 | |1: // Handle integers. |
1971 | | lwz CARG4, 0(TMP1) | 2256 | | lwz CARG4, 0(SAVE0) |
1972 | | cmplw cr1, TMP1, TMP2 | 2257 | | cmplw cr1, SAVE0, SAVE1 |
1973 | | lwz CARG2, 4(TMP1) | 2258 | | lwz CARG2, 4(SAVE0) |
1974 | | bge cr1, ->fff_resi | 2259 | | bge cr1, ->fff_resi |
1975 | | checknum CARG4 | 2260 | | checknum CARG4 |
1976 | | xoris TMP0, CARG1, 0x8000 | 2261 | | xoris TMP0, CARG1, 0x8000 |
@@ -1987,36 +2272,76 @@ static void build_subroutines(BuildCtx *ctx) | |||
1987 | |.if GPR64 | 2272 | |.if GPR64 |
1988 | | rldicl CARG1, CARG1, 0, 32 | 2273 | | rldicl CARG1, CARG1, 0, 32 |
1989 | |.endif | 2274 | |.endif |
1990 | | addi TMP1, TMP1, 8 | 2275 | | addi SAVE0, SAVE0, 8 |
1991 | | b <1 | 2276 | | b <1 |
1992 | |3: | 2277 | |3: |
1993 | | bge ->fff_fallback | 2278 | | bge ->fff_fallback |
1994 | | // Convert intermediate result to number and continue below. | 2279 | | // Convert intermediate result to number and continue below. |
2280 | |.if FPU | ||
1995 | | tonum_i FARG1, CARG1 | 2281 | | tonum_i FARG1, CARG1 |
1996 | | lfd FARG2, 0(TMP1) | 2282 | | lfd FARG2, 0(SAVE0) |
2283 | |.else | ||
2284 | | mr CARG2, CARG1 | ||
2285 | | bl ->vm_sfi2d_1 | ||
2286 | | lwz CARG3, 0(SAVE0) | ||
2287 | | lwz CARG4, 4(SAVE0) | ||
2288 | |.endif | ||
1997 | | b >6 | 2289 | | b >6 |
1998 | |4: | 2290 | |4: |
2291 | |.if FPU | ||
1999 | | lfd FARG1, 0(BASE) | 2292 | | lfd FARG1, 0(BASE) |
2293 | |.else | ||
2294 | | lwz CARG1, 0(BASE) | ||
2295 | | lwz CARG2, 4(BASE) | ||
2296 | |.endif | ||
2000 | | bge ->fff_fallback | 2297 | | bge ->fff_fallback |
2001 | |5: // Handle numbers. | 2298 | |5: // Handle numbers. |
2002 | | lwz CARG4, 0(TMP1) | 2299 | | lwz CARG3, 0(SAVE0) |
2003 | | cmplw cr1, TMP1, TMP2 | 2300 | | cmplw cr1, SAVE0, SAVE1 |
2004 | | lfd FARG2, 0(TMP1) | 2301 | |.if FPU |
2302 | | lfd FARG2, 0(SAVE0) | ||
2303 | |.else | ||
2304 | | lwz CARG4, 4(SAVE0) | ||
2305 | |.endif | ||
2005 | | bge cr1, ->fff_resn | 2306 | | bge cr1, ->fff_resn |
2006 | | checknum CARG4; bge >7 | 2307 | | checknum CARG3; bge >7 |
2007 | |6: | 2308 | |6: |
2309 | | addi SAVE0, SAVE0, 8 | ||
2310 | |.if FPU | ||
2008 | | fsub f0, FARG1, FARG2 | 2311 | | fsub f0, FARG1, FARG2 |
2009 | | addi TMP1, TMP1, 8 | ||
2010 | |.if ismax | 2312 | |.if ismax |
2011 | | fsel FARG1, f0, FARG1, FARG2 | 2313 | | fsel FARG1, f0, FARG1, FARG2 |
2012 | |.else | 2314 | |.else |
2013 | | fsel FARG1, f0, FARG2, FARG1 | 2315 | | fsel FARG1, f0, FARG2, FARG1 |
2014 | |.endif | 2316 | |.endif |
2317 | |.else | ||
2318 | | stw CARG1, SFSAVE_1 | ||
2319 | | stw CARG2, SFSAVE_2 | ||
2320 | | stw CARG3, SFSAVE_3 | ||
2321 | | stw CARG4, SFSAVE_4 | ||
2322 | | blex __ledf2 | ||
2323 | | cmpwi CRET1, 0 | ||
2324 | |.if ismax | ||
2325 | | blt >8 | ||
2326 | |.else | ||
2327 | | bge >8 | ||
2328 | |.endif | ||
2329 | | lwz CARG1, SFSAVE_1 | ||
2330 | | lwz CARG2, SFSAVE_2 | ||
2331 | | b <5 | ||
2332 | |8: | ||
2333 | | lwz CARG1, SFSAVE_3 | ||
2334 | | lwz CARG2, SFSAVE_4 | ||
2335 | |.endif | ||
2015 | | b <5 | 2336 | | b <5 |
2016 | |7: // Convert integer to number and continue above. | 2337 | |7: // Convert integer to number and continue above. |
2017 | | lwz CARG2, 4(TMP1) | 2338 | | lwz CARG3, 4(SAVE0) |
2018 | | bne ->fff_fallback | 2339 | | bne ->fff_fallback |
2019 | | tonum_i FARG2, CARG2 | 2340 | |.if FPU |
2341 | | tonum_i FARG2, CARG3 | ||
2342 | |.else | ||
2343 | | bl ->vm_sfi2d_2 | ||
2344 | |.endif | ||
2020 | | b <6 | 2345 | | b <6 |
2021 | |.else | 2346 | |.else |
2022 | | .ffunc_n name | 2347 | | .ffunc_n name |
@@ -2044,11 +2369,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
2044 | | | 2369 | | |
2045 | |//-- String library ----------------------------------------------------- | 2370 | |//-- String library ----------------------------------------------------- |
2046 | | | 2371 | | |
2047 | |.ffunc_1 string_len | ||
2048 | | checkstr CARG3; bne ->fff_fallback | ||
2049 | | lwz CRET1, STR:CARG1->len | ||
2050 | | b ->fff_resi | ||
2051 | | | ||
2052 | |.ffunc string_byte // Only handle the 1-arg case here. | 2372 | |.ffunc string_byte // Only handle the 1-arg case here. |
2053 | | cmplwi NARGS8:RC, 8 | 2373 | | cmplwi NARGS8:RC, 8 |
2054 | | lwz CARG3, 0(BASE) | 2374 | | lwz CARG3, 0(BASE) |
@@ -2103,6 +2423,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2103 | | stp BASE, L->base | 2423 | | stp BASE, L->base |
2104 | | stw PC, SAVE_PC | 2424 | | stw PC, SAVE_PC |
2105 | | bl extern lj_str_new // (lua_State *L, char *str, size_t l) | 2425 | | bl extern lj_str_new // (lua_State *L, char *str, size_t l) |
2426 | |->fff_resstr: | ||
2106 | | // Returns GCstr *. | 2427 | | // Returns GCstr *. |
2107 | | lp BASE, L->base | 2428 | | lp BASE, L->base |
2108 | | li CARG3, LJ_TSTR | 2429 | | li CARG3, LJ_TSTR |
@@ -2180,114 +2501,29 @@ static void build_subroutines(BuildCtx *ctx) | |||
2180 | | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0) | 2501 | | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0) |
2181 | | b <3 | 2502 | | b <3 |
2182 | | | 2503 | | |
2183 | |.ffunc string_rep // Only handle the 1-char case inline. | 2504 | |.macro ffstring_op, name |
2184 | | ffgccheck | 2505 | | .ffunc string_ .. name |
2185 | | cmplwi NARGS8:RC, 16 | ||
2186 | | lwz TMP0, 0(BASE) | ||
2187 | | lwz STR:CARG1, 4(BASE) | ||
2188 | | lwz CARG4, 8(BASE) | ||
2189 | |.if DUALNUM | ||
2190 | | lwz CARG3, 12(BASE) | ||
2191 | |.else | ||
2192 | | lfd FARG2, 8(BASE) | ||
2193 | |.endif | ||
2194 | | bne ->fff_fallback // Exactly 2 arguments. | ||
2195 | | checkstr TMP0; bne ->fff_fallback | ||
2196 | |.if DUALNUM | ||
2197 | | checknum CARG4; bne ->fff_fallback | ||
2198 | |.else | ||
2199 | | checknum CARG4; bge ->fff_fallback | ||
2200 | | toint CARG3, FARG2 | ||
2201 | |.endif | ||
2202 | | lwz TMP0, STR:CARG1->len | ||
2203 | | cmpwi CARG3, 0 | ||
2204 | | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) | ||
2205 | | ble >2 // Count <= 0? (or non-int) | ||
2206 | | cmplwi TMP0, 1 | ||
2207 | | subi TMP2, CARG3, 1 | ||
2208 | | blt >2 // Zero length string? | ||
2209 | | cmplw cr1, TMP1, CARG3 | ||
2210 | | bne ->fff_fallback // Fallback for > 1-char strings. | ||
2211 | | lbz TMP0, STR:CARG1[1] | ||
2212 | | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) | ||
2213 | | blt cr1, ->fff_fallback | ||
2214 | |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?). | ||
2215 | | cmplwi TMP2, 0 | ||
2216 | | stbx TMP0, CARG2, TMP2 | ||
2217 | | subi TMP2, TMP2, 1 | ||
2218 | | bne <1 | ||
2219 | | b ->fff_newstr | ||
2220 | |2: // Return empty string. | ||
2221 | | la STR:CARG1, DISPATCH_GL(strempty)(DISPATCH) | ||
2222 | | li CARG3, LJ_TSTR | ||
2223 | | b ->fff_restv | ||
2224 | | | ||
2225 | |.ffunc string_reverse | ||
2226 | | ffgccheck | ||
2227 | | cmplwi NARGS8:RC, 8 | ||
2228 | | lwz CARG3, 0(BASE) | ||
2229 | | lwz STR:CARG1, 4(BASE) | ||
2230 | | blt ->fff_fallback | ||
2231 | | checkstr CARG3 | ||
2232 | | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) | ||
2233 | | bne ->fff_fallback | ||
2234 | | lwz CARG3, STR:CARG1->len | ||
2235 | | la CARG1, #STR(STR:CARG1) | ||
2236 | | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) | ||
2237 | | li TMP2, 0 | ||
2238 | | cmplw TMP1, CARG3 | ||
2239 | | subi TMP3, CARG3, 1 | ||
2240 | | blt ->fff_fallback | ||
2241 | |1: // Reverse string copy. | ||
2242 | | cmpwi TMP3, 0 | ||
2243 | | lbzx TMP1, CARG1, TMP2 | ||
2244 | | blty ->fff_newstr | ||
2245 | | stbx TMP1, CARG2, TMP3 | ||
2246 | | subi TMP3, TMP3, 1 | ||
2247 | | addi TMP2, TMP2, 1 | ||
2248 | | b <1 | ||
2249 | | | ||
2250 | |.macro ffstring_case, name, lo | ||
2251 | | .ffunc name | ||
2252 | | ffgccheck | 2506 | | ffgccheck |
2253 | | cmplwi NARGS8:RC, 8 | 2507 | | cmplwi NARGS8:RC, 8 |
2254 | | lwz CARG3, 0(BASE) | 2508 | | lwz CARG3, 0(BASE) |
2255 | | lwz STR:CARG1, 4(BASE) | 2509 | | lwz STR:CARG2, 4(BASE) |
2256 | | blt ->fff_fallback | 2510 | | blt ->fff_fallback |
2257 | | checkstr CARG3 | 2511 | | checkstr CARG3 |
2258 | | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) | 2512 | | la SBUF:CARG1, DISPATCH_GL(tmpbuf)(DISPATCH) |
2259 | | bne ->fff_fallback | 2513 | | bne ->fff_fallback |
2260 | | lwz CARG3, STR:CARG1->len | 2514 | | lwz TMP0, SBUF:CARG1->b |
2261 | | la CARG1, #STR(STR:CARG1) | 2515 | | stw L, SBUF:CARG1->L |
2262 | | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) | 2516 | | stp BASE, L->base |
2263 | | cmplw TMP1, CARG3 | 2517 | | stw PC, SAVE_PC |
2264 | | li TMP2, 0 | 2518 | | stw TMP0, SBUF:CARG1->p |
2265 | | blt ->fff_fallback | 2519 | | bl extern lj_buf_putstr_ .. name |
2266 | |1: // ASCII case conversion. | 2520 | | bl extern lj_buf_tostr |
2267 | | cmplw TMP2, CARG3 | 2521 | | b ->fff_resstr |
2268 | | lbzx TMP1, CARG1, TMP2 | ||
2269 | | bgey ->fff_newstr | ||
2270 | | subi TMP0, TMP1, lo | ||
2271 | | xori TMP3, TMP1, 0x20 | ||
2272 | | addic TMP0, TMP0, -26 | ||
2273 | | subfe TMP3, TMP3, TMP3 | ||
2274 | | rlwinm TMP3, TMP3, 0, 26, 26 // x &= 0x20. | ||
2275 | | xor TMP1, TMP1, TMP3 | ||
2276 | | stbx TMP1, CARG2, TMP2 | ||
2277 | | addi TMP2, TMP2, 1 | ||
2278 | | b <1 | ||
2279 | |.endmacro | 2522 | |.endmacro |
2280 | | | 2523 | | |
2281 | |ffstring_case string_lower, 65 | 2524 | |ffstring_op reverse |
2282 | |ffstring_case string_upper, 97 | 2525 | |ffstring_op lower |
2283 | | | 2526 | |ffstring_op upper |
2284 | |//-- Table library ------------------------------------------------------ | ||
2285 | | | ||
2286 | |.ffunc_1 table_getn | ||
2287 | | checktab CARG3; bne ->fff_fallback | ||
2288 | | bl extern lj_tab_len // (GCtab *t) | ||
2289 | | // Returns uint32_t (but less than 2^31). | ||
2290 | | b ->fff_resi | ||
2291 | | | 2527 | | |
2292 | |//-- Bit library -------------------------------------------------------- | 2528 | |//-- Bit library -------------------------------------------------------- |
2293 | | | 2529 | | |
@@ -2305,28 +2541,37 @@ static void build_subroutines(BuildCtx *ctx) | |||
2305 | | | 2541 | | |
2306 | |.macro .ffunc_bit_op, name, ins | 2542 | |.macro .ffunc_bit_op, name, ins |
2307 | | .ffunc_bit name | 2543 | | .ffunc_bit name |
2308 | | addi TMP1, BASE, 8 | 2544 | | addi SAVE0, BASE, 8 |
2309 | | add TMP2, BASE, NARGS8:RC | 2545 | | add SAVE1, BASE, NARGS8:RC |
2310 | |1: | 2546 | |1: |
2311 | | lwz CARG4, 0(TMP1) | 2547 | | lwz CARG4, 0(SAVE0) |
2312 | | cmplw cr1, TMP1, TMP2 | 2548 | | cmplw cr1, SAVE0, SAVE1 |
2313 | |.if DUALNUM | 2549 | |.if DUALNUM |
2314 | | lwz CARG2, 4(TMP1) | 2550 | | lwz CARG2, 4(SAVE0) |
2315 | |.else | 2551 | |.else |
2316 | | lfd FARG1, 0(TMP1) | 2552 | | lfd FARG1, 0(SAVE0) |
2317 | |.endif | 2553 | |.endif |
2318 | | bgey cr1, ->fff_resi | 2554 | | bgey cr1, ->fff_resi |
2319 | | checknum CARG4 | 2555 | | checknum CARG4 |
2320 | |.if DUALNUM | 2556 | |.if DUALNUM |
2557 | |.if FPU | ||
2321 | | bnel ->fff_bitop_fb | 2558 | | bnel ->fff_bitop_fb |
2322 | |.else | 2559 | |.else |
2560 | | beq >3 | ||
2561 | | stw CARG1, SFSAVE_1 | ||
2562 | | bl ->fff_bitop_fb | ||
2563 | | mr CARG2, CARG1 | ||
2564 | | lwz CARG1, SFSAVE_1 | ||
2565 | |3: | ||
2566 | |.endif | ||
2567 | |.else | ||
2323 | | fadd FARG1, FARG1, TOBIT | 2568 | | fadd FARG1, FARG1, TOBIT |
2324 | | bge ->fff_fallback | 2569 | | bge ->fff_fallback |
2325 | | stfd FARG1, TMPD | 2570 | | stfd FARG1, TMPD |
2326 | | lwz CARG2, TMPD_LO | 2571 | | lwz CARG2, TMPD_LO |
2327 | |.endif | 2572 | |.endif |
2328 | | ins CARG1, CARG1, CARG2 | 2573 | | ins CARG1, CARG1, CARG2 |
2329 | | addi TMP1, TMP1, 8 | 2574 | | addi SAVE0, SAVE0, 8 |
2330 | | b <1 | 2575 | | b <1 |
2331 | |.endmacro | 2576 | |.endmacro |
2332 | | | 2577 | | |
@@ -2348,7 +2593,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
2348 | |.macro .ffunc_bit_sh, name, ins, shmod | 2593 | |.macro .ffunc_bit_sh, name, ins, shmod |
2349 | |.if DUALNUM | 2594 | |.if DUALNUM |
2350 | | .ffunc_2 bit_..name | 2595 | | .ffunc_2 bit_..name |
2596 | |.if FPU | ||
2351 | | checknum CARG3; bnel ->fff_tobit_fb | 2597 | | checknum CARG3; bnel ->fff_tobit_fb |
2598 | |.else | ||
2599 | | checknum CARG3; beq >1 | ||
2600 | | bl ->fff_tobit_fb | ||
2601 | | lwz CARG2, 12(BASE) // Conversion polluted CARG2. | ||
2602 | |1: | ||
2603 | |.endif | ||
2352 | | // Note: no inline conversion from number for 2nd argument! | 2604 | | // Note: no inline conversion from number for 2nd argument! |
2353 | | checknum CARG4; bne ->fff_fallback | 2605 | | checknum CARG4; bne ->fff_fallback |
2354 | |.else | 2606 | |.else |
@@ -2385,27 +2637,77 @@ static void build_subroutines(BuildCtx *ctx) | |||
2385 | |->fff_resn: | 2637 | |->fff_resn: |
2386 | | lwz PC, FRAME_PC(BASE) | 2638 | | lwz PC, FRAME_PC(BASE) |
2387 | | la RA, -8(BASE) | 2639 | | la RA, -8(BASE) |
2640 | |.if FPU | ||
2388 | | stfd FARG1, -8(BASE) | 2641 | | stfd FARG1, -8(BASE) |
2642 | |.else | ||
2643 | | stw CARG1, -8(BASE) | ||
2644 | | stw CARG2, -4(BASE) | ||
2645 | |.endif | ||
2389 | | b ->fff_res1 | 2646 | | b ->fff_res1 |
2390 | | | 2647 | | |
2391 | |// Fallback FP number to bit conversion. | 2648 | |// Fallback FP number to bit conversion. |
2392 | |->fff_tobit_fb: | 2649 | |->fff_tobit_fb: |
2393 | |.if DUALNUM | 2650 | |.if DUALNUM |
2651 | |.if FPU | ||
2394 | | lfd FARG1, 0(BASE) | 2652 | | lfd FARG1, 0(BASE) |
2395 | | bgt ->fff_fallback | 2653 | | bgt ->fff_fallback |
2396 | | fadd FARG1, FARG1, TOBIT | 2654 | | fadd FARG1, FARG1, TOBIT |
2397 | | stfd FARG1, TMPD | 2655 | | stfd FARG1, TMPD |
2398 | | lwz CARG1, TMPD_LO | 2656 | | lwz CARG1, TMPD_LO |
2399 | | blr | 2657 | | blr |
2658 | |.else | ||
2659 | | bgt ->fff_fallback | ||
2660 | | mr CARG2, CARG1 | ||
2661 | | mr CARG1, CARG3 | ||
2662 | |// Modifies: CARG1, CARG2, TMP0, TMP1, TMP2. | ||
2663 | |->vm_tobit: | ||
2664 | | slwi TMP2, CARG1, 1 | ||
2665 | | addis TMP2, TMP2, 0x0020 | ||
2666 | | cmpwi TMP2, 0 | ||
2667 | | bge >2 | ||
2668 | | li TMP1, 0x3e0 | ||
2669 | | srawi TMP2, TMP2, 21 | ||
2670 | | not TMP1, TMP1 | ||
2671 | | sub. TMP2, TMP1, TMP2 | ||
2672 | | cmpwi cr7, CARG1, 0 | ||
2673 | | blt >1 | ||
2674 | | slwi TMP1, CARG1, 11 | ||
2675 | | srwi TMP0, CARG2, 21 | ||
2676 | | oris TMP1, TMP1, 0x8000 | ||
2677 | | or TMP1, TMP1, TMP0 | ||
2678 | | srw CARG1, TMP1, TMP2 | ||
2679 | | bclr 4, 28 // Return if cr7[lt] == 0, no hint. | ||
2680 | | neg CARG1, CARG1 | ||
2681 | | blr | ||
2682 | |1: | ||
2683 | | addi TMP2, TMP2, 21 | ||
2684 | | srw TMP1, CARG2, TMP2 | ||
2685 | | slwi CARG2, CARG1, 12 | ||
2686 | | subfic TMP2, TMP2, 20 | ||
2687 | | slw TMP0, CARG2, TMP2 | ||
2688 | | or CARG1, TMP1, TMP0 | ||
2689 | | bclr 4, 28 // Return if cr7[lt] == 0, no hint. | ||
2690 | | neg CARG1, CARG1 | ||
2691 | | blr | ||
2692 | |2: | ||
2693 | | li CARG1, 0 | ||
2694 | | blr | ||
2695 | |.endif | ||
2400 | |.endif | 2696 | |.endif |
2401 | |->fff_bitop_fb: | 2697 | |->fff_bitop_fb: |
2402 | |.if DUALNUM | 2698 | |.if DUALNUM |
2403 | | lfd FARG1, 0(TMP1) | 2699 | |.if FPU |
2700 | | lfd FARG1, 0(SAVE0) | ||
2404 | | bgt ->fff_fallback | 2701 | | bgt ->fff_fallback |
2405 | | fadd FARG1, FARG1, TOBIT | 2702 | | fadd FARG1, FARG1, TOBIT |
2406 | | stfd FARG1, TMPD | 2703 | | stfd FARG1, TMPD |
2407 | | lwz CARG2, TMPD_LO | 2704 | | lwz CARG2, TMPD_LO |
2408 | | blr | 2705 | | blr |
2706 | |.else | ||
2707 | | bgt ->fff_fallback | ||
2708 | | mr CARG1, CARG4 | ||
2709 | | b ->vm_tobit | ||
2710 | |.endif | ||
2409 | |.endif | 2711 | |.endif |
2410 | | | 2712 | | |
2411 | |//----------------------------------------------------------------------- | 2713 | |//----------------------------------------------------------------------- |
@@ -2589,15 +2891,88 @@ static void build_subroutines(BuildCtx *ctx) | |||
2589 | | mtctr CRET1 | 2891 | | mtctr CRET1 |
2590 | | bctr | 2892 | | bctr |
2591 | | | 2893 | | |
2894 | |->cont_stitch: // Trace stitching. | ||
2895 | |.if JIT | ||
2896 | | // RA = resultptr, RB = meta base | ||
2897 | | lwz INS, -4(PC) | ||
2898 | | lwz TRACE:TMP2, -20(RB) // Save previous trace. | ||
2899 | | addic. TMP1, MULTRES, -8 | ||
2900 | | decode_RA8 RC, INS // Call base. | ||
2901 | | beq >2 | ||
2902 | |1: // Move results down. | ||
2903 | |.if FPU | ||
2904 | | lfd f0, 0(RA) | ||
2905 | |.else | ||
2906 | | lwz CARG1, 0(RA) | ||
2907 | | lwz CARG2, 4(RA) | ||
2908 | |.endif | ||
2909 | | addic. TMP1, TMP1, -8 | ||
2910 | | addi RA, RA, 8 | ||
2911 | |.if FPU | ||
2912 | | stfdx f0, BASE, RC | ||
2913 | |.else | ||
2914 | | add CARG3, BASE, RC | ||
2915 | | stw CARG1, 0(CARG3) | ||
2916 | | stw CARG2, 4(CARG3) | ||
2917 | |.endif | ||
2918 | | addi RC, RC, 8 | ||
2919 | | bne <1 | ||
2920 | |2: | ||
2921 | | decode_RA8 RA, INS | ||
2922 | | decode_RB8 RB, INS | ||
2923 | | add RA, RA, RB | ||
2924 | |3: | ||
2925 | | cmplw RA, RC | ||
2926 | | bgt >9 // More results wanted? | ||
2927 | | | ||
2928 | | lhz TMP3, TRACE:TMP2->traceno | ||
2929 | | lhz RD, TRACE:TMP2->link | ||
2930 | | cmpw RD, TMP3 | ||
2931 | | cmpwi cr1, RD, 0 | ||
2932 | | beq ->cont_nop // Blacklisted. | ||
2933 | | slwi RD, RD, 3 | ||
2934 | | bne cr1, =>BC_JLOOP // Jump to stitched trace. | ||
2935 | | | ||
2936 | | // Stitch a new trace to the previous trace. | ||
2937 | | stw TMP3, DISPATCH_J(exitno)(DISPATCH) | ||
2938 | | stp L, DISPATCH_J(L)(DISPATCH) | ||
2939 | | stp BASE, L->base | ||
2940 | | addi CARG1, DISPATCH, GG_DISP2J | ||
2941 | | mr CARG2, PC | ||
2942 | | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) | ||
2943 | | lp BASE, L->base | ||
2944 | | b ->cont_nop | ||
2945 | | | ||
2946 | |9: | ||
2947 | | stwx TISNIL, BASE, RC | ||
2948 | | addi RC, RC, 8 | ||
2949 | | b <3 | ||
2950 | |.endif | ||
2951 | | | ||
2952 | |->vm_profhook: // Dispatch target for profiler hook. | ||
2953 | #if LJ_HASPROFILE | ||
2954 | | mr CARG1, L | ||
2955 | | stw MULTRES, SAVE_MULTRES | ||
2956 | | mr CARG2, PC | ||
2957 | | stp BASE, L->base | ||
2958 | | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) | ||
2959 | | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. | ||
2960 | | lp BASE, L->base | ||
2961 | | subi PC, PC, 4 | ||
2962 | | b ->cont_nop | ||
2963 | #endif | ||
2964 | | | ||
2592 | |//----------------------------------------------------------------------- | 2965 | |//----------------------------------------------------------------------- |
2593 | |//-- Trace exit handler ------------------------------------------------- | 2966 | |//-- Trace exit handler ------------------------------------------------- |
2594 | |//----------------------------------------------------------------------- | 2967 | |//----------------------------------------------------------------------- |
2595 | | | 2968 | | |
2596 | |.macro savex_, a, b, c, d | 2969 | |.macro savex_, a, b, c, d |
2970 | |.if FPU | ||
2597 | | stfd f..a, 16+a*8(sp) | 2971 | | stfd f..a, 16+a*8(sp) |
2598 | | stfd f..b, 16+b*8(sp) | 2972 | | stfd f..b, 16+b*8(sp) |
2599 | | stfd f..c, 16+c*8(sp) | 2973 | | stfd f..c, 16+c*8(sp) |
2600 | | stfd f..d, 16+d*8(sp) | 2974 | | stfd f..d, 16+d*8(sp) |
2975 | |.endif | ||
2601 | |.endmacro | 2976 | |.endmacro |
2602 | | | 2977 | | |
2603 | |->vm_exit_handler: | 2978 | |->vm_exit_handler: |
@@ -2623,16 +2998,16 @@ static void build_subroutines(BuildCtx *ctx) | |||
2623 | | savex_ 20,21,22,23 | 2998 | | savex_ 20,21,22,23 |
2624 | | lhz CARG4, 2(CARG3) // Load trace number. | 2999 | | lhz CARG4, 2(CARG3) // Load trace number. |
2625 | | savex_ 24,25,26,27 | 3000 | | savex_ 24,25,26,27 |
2626 | | lwz L, DISPATCH_GL(jit_L)(DISPATCH) | 3001 | | lwz L, DISPATCH_GL(cur_L)(DISPATCH) |
2627 | | savex_ 28,29,30,31 | 3002 | | savex_ 28,29,30,31 |
2628 | | sub CARG3, TMP0, CARG3 // Compute exit number. | 3003 | | sub CARG3, TMP0, CARG3 // Compute exit number. |
2629 | | lp BASE, DISPATCH_GL(jit_base)(DISPATCH) | 3004 | | lp BASE, DISPATCH_GL(jit_base)(DISPATCH) |
2630 | | srwi CARG3, CARG3, 2 | 3005 | | srwi CARG3, CARG3, 2 |
2631 | | stw L, DISPATCH_J(L)(DISPATCH) | 3006 | | stp L, DISPATCH_J(L)(DISPATCH) |
2632 | | subi CARG3, CARG3, 2 | 3007 | | subi CARG3, CARG3, 2 |
2633 | | stw TMP1, DISPATCH_GL(jit_L)(DISPATCH) | ||
2634 | | stw CARG4, DISPATCH_J(parent)(DISPATCH) | ||
2635 | | stp BASE, L->base | 3008 | | stp BASE, L->base |
3009 | | stw CARG4, DISPATCH_J(parent)(DISPATCH) | ||
3010 | | stw TMP1, DISPATCH_GL(jit_base)(DISPATCH) | ||
2636 | | addi CARG1, DISPATCH, GG_DISP2J | 3011 | | addi CARG1, DISPATCH, GG_DISP2J |
2637 | | stw CARG3, DISPATCH_J(exitno)(DISPATCH) | 3012 | | stw CARG3, DISPATCH_J(exitno)(DISPATCH) |
2638 | | addi CARG2, sp, 16 | 3013 | | addi CARG2, sp, 16 |
@@ -2656,28 +3031,29 @@ static void build_subroutines(BuildCtx *ctx) | |||
2656 | | // CARG1 = MULTRES or negated error code, BASE, PC and JGL set. | 3031 | | // CARG1 = MULTRES or negated error code, BASE, PC and JGL set. |
2657 | | lwz L, SAVE_L | 3032 | | lwz L, SAVE_L |
2658 | | addi DISPATCH, JGL, -GG_DISP2G-32768 | 3033 | | addi DISPATCH, JGL, -GG_DISP2G-32768 |
3034 | | stp BASE, L->base | ||
2659 | |1: | 3035 | |1: |
2660 | | cmpwi CARG1, 0 | 3036 | | cmpwi CARG1, 0 |
2661 | | blt >3 // Check for error from exit. | 3037 | | blt >9 // Check for error from exit. |
2662 | | lwz LFUNC:TMP1, FRAME_FUNC(BASE) | 3038 | | lwz LFUNC:RB, FRAME_FUNC(BASE) |
2663 | | slwi MULTRES, CARG1, 3 | 3039 | | slwi MULTRES, CARG1, 3 |
2664 | | li TMP2, 0 | 3040 | | li TMP2, 0 |
2665 | | stw MULTRES, SAVE_MULTRES | 3041 | | stw MULTRES, SAVE_MULTRES |
2666 | | lwz TMP1, LFUNC:TMP1->pc | 3042 | | lwz TMP1, LFUNC:RB->pc |
2667 | | stw TMP2, DISPATCH_GL(jit_L)(DISPATCH) | 3043 | | stw TMP2, DISPATCH_GL(jit_base)(DISPATCH) |
2668 | | lwz KBASE, PC2PROTO(k)(TMP1) | 3044 | | lwz KBASE, PC2PROTO(k)(TMP1) |
2669 | | // Setup type comparison constants. | 3045 | | // Setup type comparison constants. |
2670 | | li TISNUM, LJ_TISNUM | 3046 | | li TISNUM, LJ_TISNUM |
2671 | | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | 3047 | | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). |
2672 | | stw TMP3, TMPD | 3048 | | .FPU stw TMP3, TMPD |
2673 | | li ZERO, 0 | 3049 | | li ZERO, 0 |
2674 | | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). | 3050 | | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). |
2675 | | lfs TOBIT, TMPD | 3051 | | .FPU lfs TOBIT, TMPD |
2676 | | stw TMP3, TMPD | 3052 | | .FPU stw TMP3, TMPD |
2677 | | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) | 3053 | | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) |
2678 | | li TISNIL, LJ_TNIL | 3054 | | li TISNIL, LJ_TNIL |
2679 | | stw TMP0, TONUM_HI | 3055 | | .FPU stw TMP0, TONUM_HI |
2680 | | lfs TONUM, TMPD | 3056 | | .FPU lfs TONUM, TMPD |
2681 | | // Modified copy of ins_next which handles function header dispatch, too. | 3057 | | // Modified copy of ins_next which handles function header dispatch, too. |
2682 | | lwz INS, 0(PC) | 3058 | | lwz INS, 0(PC) |
2683 | | addi PC, PC, 4 | 3059 | | addi PC, PC, 4 |
@@ -2694,11 +3070,25 @@ static void build_subroutines(BuildCtx *ctx) | |||
2694 | | decode_RC8 RC, INS | 3070 | | decode_RC8 RC, INS |
2695 | | bctr | 3071 | | bctr |
2696 | |2: | 3072 | |2: |
3073 | | cmplwi TMP1, (BC_FUNCC+2)*4 // Fast function? | ||
3074 | | blt >3 | ||
3075 | | // Check frame below fast function. | ||
3076 | | lwz TMP1, FRAME_PC(BASE) | ||
3077 | | andix. TMP0, TMP1, FRAME_TYPE | ||
3078 | | bney >3 // Trace stitching continuation? | ||
3079 | | // Otherwise set KBASE for Lua function below fast function. | ||
3080 | | lwz TMP2, -4(TMP1) | ||
3081 | | decode_RA8 TMP0, TMP2 | ||
3082 | | sub TMP1, BASE, TMP0 | ||
3083 | | lwz LFUNC:TMP2, -12(TMP1) | ||
3084 | | lwz TMP1, LFUNC:TMP2->pc | ||
3085 | | lwz KBASE, PC2PROTO(k)(TMP1) | ||
3086 | |3: | ||
2697 | | subi RC, MULTRES, 8 | 3087 | | subi RC, MULTRES, 8 |
2698 | | add RA, RA, BASE | 3088 | | add RA, RA, BASE |
2699 | | bctr | 3089 | | bctr |
2700 | | | 3090 | | |
2701 | |3: // Rethrow error from the right C frame. | 3091 | |9: // Rethrow error from the right C frame. |
2702 | | neg CARG2, CARG1 | 3092 | | neg CARG2, CARG1 |
2703 | | mr CARG1, L | 3093 | | mr CARG1, L |
2704 | | bl extern lj_err_throw // (lua_State *L, int errcode) | 3094 | | bl extern lj_err_throw // (lua_State *L, int errcode) |
@@ -2708,7 +3098,35 @@ static void build_subroutines(BuildCtx *ctx) | |||
2708 | |//-- Math helper functions ---------------------------------------------- | 3098 | |//-- Math helper functions ---------------------------------------------- |
2709 | |//----------------------------------------------------------------------- | 3099 | |//----------------------------------------------------------------------- |
2710 | | | 3100 | | |
2711 | |// NYI: Use internal implementations of floor, ceil, trunc. | 3101 | |// NYI: Use internal implementations of floor, ceil, trunc, sfcmp. |
3102 | | | ||
3103 | |.macro sfi2d, AHI, ALO | ||
3104 | |.if not FPU | ||
3105 | | mr. AHI, ALO | ||
3106 | | bclr 12, 2 // Handle zero first. | ||
3107 | | srawi TMP0, ALO, 31 | ||
3108 | | xor TMP1, ALO, TMP0 | ||
3109 | | sub TMP1, TMP1, TMP0 // Absolute value in TMP1. | ||
3110 | | cntlzw AHI, TMP1 | ||
3111 | | andix. TMP0, TMP0, 0x800 // Mask sign bit. | ||
3112 | | slw TMP1, TMP1, AHI // Align mantissa left with leading 1. | ||
3113 | | subfic AHI, AHI, 0x3ff+31-1 // Exponent -1 in AHI. | ||
3114 | | slwi ALO, TMP1, 21 | ||
3115 | | or AHI, AHI, TMP0 // Sign | Exponent. | ||
3116 | | srwi TMP1, TMP1, 11 | ||
3117 | | slwi AHI, AHI, 20 // Align left. | ||
3118 | | add AHI, AHI, TMP1 // Add mantissa, increment exponent. | ||
3119 | | blr | ||
3120 | |.endif | ||
3121 | |.endmacro | ||
3122 | | | ||
3123 | |// Input: CARG2. Output: CARG1, CARG2. Temporaries: TMP0, TMP1. | ||
3124 | |->vm_sfi2d_1: | ||
3125 | | sfi2d CARG1, CARG2 | ||
3126 | | | ||
3127 | |// Input: CARG4. Output: CARG3, CARG4. Temporaries: TMP0, TMP1. | ||
3128 | |->vm_sfi2d_2: | ||
3129 | | sfi2d CARG3, CARG4 | ||
2712 | | | 3130 | | |
2713 | |->vm_modi: | 3131 | |->vm_modi: |
2714 | | divwo. TMP0, CARG1, CARG2 | 3132 | | divwo. TMP0, CARG1, CARG2 |
@@ -2776,21 +3194,21 @@ static void build_subroutines(BuildCtx *ctx) | |||
2776 | | addi DISPATCH, r12, GG_G2DISP | 3194 | | addi DISPATCH, r12, GG_G2DISP |
2777 | | stw r11, CTSTATE->cb.slot | 3195 | | stw r11, CTSTATE->cb.slot |
2778 | | stw r3, CTSTATE->cb.gpr[0] | 3196 | | stw r3, CTSTATE->cb.gpr[0] |
2779 | | stfd f1, CTSTATE->cb.fpr[0] | 3197 | | .FPU stfd f1, CTSTATE->cb.fpr[0] |
2780 | | stw r4, CTSTATE->cb.gpr[1] | 3198 | | stw r4, CTSTATE->cb.gpr[1] |
2781 | | stfd f2, CTSTATE->cb.fpr[1] | 3199 | | .FPU stfd f2, CTSTATE->cb.fpr[1] |
2782 | | stw r5, CTSTATE->cb.gpr[2] | 3200 | | stw r5, CTSTATE->cb.gpr[2] |
2783 | | stfd f3, CTSTATE->cb.fpr[2] | 3201 | | .FPU stfd f3, CTSTATE->cb.fpr[2] |
2784 | | stw r6, CTSTATE->cb.gpr[3] | 3202 | | stw r6, CTSTATE->cb.gpr[3] |
2785 | | stfd f4, CTSTATE->cb.fpr[3] | 3203 | | .FPU stfd f4, CTSTATE->cb.fpr[3] |
2786 | | stw r7, CTSTATE->cb.gpr[4] | 3204 | | stw r7, CTSTATE->cb.gpr[4] |
2787 | | stfd f5, CTSTATE->cb.fpr[4] | 3205 | | .FPU stfd f5, CTSTATE->cb.fpr[4] |
2788 | | stw r8, CTSTATE->cb.gpr[5] | 3206 | | stw r8, CTSTATE->cb.gpr[5] |
2789 | | stfd f6, CTSTATE->cb.fpr[5] | 3207 | | .FPU stfd f6, CTSTATE->cb.fpr[5] |
2790 | | stw r9, CTSTATE->cb.gpr[6] | 3208 | | stw r9, CTSTATE->cb.gpr[6] |
2791 | | stfd f7, CTSTATE->cb.fpr[6] | 3209 | | .FPU stfd f7, CTSTATE->cb.fpr[6] |
2792 | | stw r10, CTSTATE->cb.gpr[7] | 3210 | | stw r10, CTSTATE->cb.gpr[7] |
2793 | | stfd f8, CTSTATE->cb.fpr[7] | 3211 | | .FPU stfd f8, CTSTATE->cb.fpr[7] |
2794 | | addi TMP0, sp, CFRAME_SPACE+8 | 3212 | | addi TMP0, sp, CFRAME_SPACE+8 |
2795 | | stw TMP0, CTSTATE->cb.stack | 3213 | | stw TMP0, CTSTATE->cb.stack |
2796 | | mr CARG1, CTSTATE | 3214 | | mr CARG1, CTSTATE |
@@ -2801,21 +3219,21 @@ static void build_subroutines(BuildCtx *ctx) | |||
2801 | | lp BASE, L:CRET1->base | 3219 | | lp BASE, L:CRET1->base |
2802 | | li TISNUM, LJ_TISNUM // Setup type comparison constants. | 3220 | | li TISNUM, LJ_TISNUM // Setup type comparison constants. |
2803 | | lp RC, L:CRET1->top | 3221 | | lp RC, L:CRET1->top |
2804 | | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | 3222 | | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). |
2805 | | li ZERO, 0 | 3223 | | li ZERO, 0 |
2806 | | mr L, CRET1 | 3224 | | mr L, CRET1 |
2807 | | stw TMP3, TMPD | 3225 | | .FPU stw TMP3, TMPD |
2808 | | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) | 3226 | | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) |
2809 | | lwz LFUNC:RB, FRAME_FUNC(BASE) | 3227 | | lwz LFUNC:RB, FRAME_FUNC(BASE) |
2810 | | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). | 3228 | | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). |
2811 | | stw TMP0, TONUM_HI | 3229 | | .FPU stw TMP0, TONUM_HI |
2812 | | li TISNIL, LJ_TNIL | 3230 | | li TISNIL, LJ_TNIL |
2813 | | li_vmstate INTERP | 3231 | | li_vmstate INTERP |
2814 | | lfs TOBIT, TMPD | 3232 | | .FPU lfs TOBIT, TMPD |
2815 | | stw TMP3, TMPD | 3233 | | .FPU stw TMP3, TMPD |
2816 | | sub RC, RC, BASE | 3234 | | sub RC, RC, BASE |
2817 | | st_vmstate | 3235 | | st_vmstate |
2818 | | lfs TONUM, TMPD | 3236 | | .FPU lfs TONUM, TMPD |
2819 | | ins_callt | 3237 | | ins_callt |
2820 | |.endif | 3238 | |.endif |
2821 | | | 3239 | | |
@@ -2829,7 +3247,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2829 | | mr CARG2, RA | 3247 | | mr CARG2, RA |
2830 | | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) | 3248 | | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) |
2831 | | lwz CRET1, CTSTATE->cb.gpr[0] | 3249 | | lwz CRET1, CTSTATE->cb.gpr[0] |
2832 | | lfd FARG1, CTSTATE->cb.fpr[0] | 3250 | | .FPU lfd FARG1, CTSTATE->cb.fpr[0] |
2833 | | lwz CRET2, CTSTATE->cb.gpr[1] | 3251 | | lwz CRET2, CTSTATE->cb.gpr[1] |
2834 | | b ->vm_leave_unw | 3252 | | b ->vm_leave_unw |
2835 | |.endif | 3253 | |.endif |
@@ -2863,14 +3281,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
2863 | | bge <1 | 3281 | | bge <1 |
2864 | |2: | 3282 | |2: |
2865 | | bney cr1, >3 | 3283 | | bney cr1, >3 |
2866 | | lfd f1, CCSTATE->fpr[0] | 3284 | | .FPU lfd f1, CCSTATE->fpr[0] |
2867 | | lfd f2, CCSTATE->fpr[1] | 3285 | | .FPU lfd f2, CCSTATE->fpr[1] |
2868 | | lfd f3, CCSTATE->fpr[2] | 3286 | | .FPU lfd f3, CCSTATE->fpr[2] |
2869 | | lfd f4, CCSTATE->fpr[3] | 3287 | | .FPU lfd f4, CCSTATE->fpr[3] |
2870 | | lfd f5, CCSTATE->fpr[4] | 3288 | | .FPU lfd f5, CCSTATE->fpr[4] |
2871 | | lfd f6, CCSTATE->fpr[5] | 3289 | | .FPU lfd f6, CCSTATE->fpr[5] |
2872 | | lfd f7, CCSTATE->fpr[6] | 3290 | | .FPU lfd f7, CCSTATE->fpr[6] |
2873 | | lfd f8, CCSTATE->fpr[7] | 3291 | | .FPU lfd f8, CCSTATE->fpr[7] |
2874 | |3: | 3292 | |3: |
2875 | | lp TMP0, CCSTATE->func | 3293 | | lp TMP0, CCSTATE->func |
2876 | | lwz CARG2, CCSTATE->gpr[1] | 3294 | | lwz CARG2, CCSTATE->gpr[1] |
@@ -2887,7 +3305,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2887 | | lwz TMP2, -4(r14) | 3305 | | lwz TMP2, -4(r14) |
2888 | | lwz TMP0, 4(r14) | 3306 | | lwz TMP0, 4(r14) |
2889 | | stw CARG1, CCSTATE:TMP1->gpr[0] | 3307 | | stw CARG1, CCSTATE:TMP1->gpr[0] |
2890 | | stfd FARG1, CCSTATE:TMP1->fpr[0] | 3308 | | .FPU stfd FARG1, CCSTATE:TMP1->fpr[0] |
2891 | | stw CARG2, CCSTATE:TMP1->gpr[1] | 3309 | | stw CARG2, CCSTATE:TMP1->gpr[1] |
2892 | | mtlr TMP0 | 3310 | | mtlr TMP0 |
2893 | | stw CARG3, CCSTATE:TMP1->gpr[2] | 3311 | | stw CARG3, CCSTATE:TMP1->gpr[2] |
@@ -2916,19 +3334,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2916 | case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: | 3334 | case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: |
2917 | | // RA = src1*8, RD = src2*8, JMP with RD = target | 3335 | | // RA = src1*8, RD = src2*8, JMP with RD = target |
2918 | |.if DUALNUM | 3336 | |.if DUALNUM |
2919 | | lwzux TMP0, RA, BASE | 3337 | | lwzux CARG1, RA, BASE |
2920 | | addi PC, PC, 4 | 3338 | | addi PC, PC, 4 |
2921 | | lwz CARG2, 4(RA) | 3339 | | lwz CARG2, 4(RA) |
2922 | | lwzux TMP1, RD, BASE | 3340 | | lwzux CARG3, RD, BASE |
2923 | | lwz TMP2, -4(PC) | 3341 | | lwz TMP2, -4(PC) |
2924 | | checknum cr0, TMP0 | 3342 | | checknum cr0, CARG1 |
2925 | | lwz CARG3, 4(RD) | 3343 | | lwz CARG4, 4(RD) |
2926 | | decode_RD4 TMP2, TMP2 | 3344 | | decode_RD4 TMP2, TMP2 |
2927 | | checknum cr1, TMP1 | 3345 | | checknum cr1, CARG3 |
2928 | | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) | 3346 | | addis SAVE0, TMP2, -(BCBIAS_J*4 >> 16) |
2929 | | bne cr0, >7 | 3347 | | bne cr0, >7 |
2930 | | bne cr1, >8 | 3348 | | bne cr1, >8 |
2931 | | cmpw CARG2, CARG3 | 3349 | | cmpw CARG2, CARG4 |
2932 | if (op == BC_ISLT) { | 3350 | if (op == BC_ISLT) { |
2933 | | bge >2 | 3351 | | bge >2 |
2934 | } else if (op == BC_ISGE) { | 3352 | } else if (op == BC_ISGE) { |
@@ -2939,28 +3357,41 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2939 | | ble >2 | 3357 | | ble >2 |
2940 | } | 3358 | } |
2941 | |1: | 3359 | |1: |
2942 | | add PC, PC, TMP2 | 3360 | | add PC, PC, SAVE0 |
2943 | |2: | 3361 | |2: |
2944 | | ins_next | 3362 | | ins_next |
2945 | | | 3363 | | |
2946 | |7: // RA is not an integer. | 3364 | |7: // RA is not an integer. |
2947 | | bgt cr0, ->vmeta_comp | 3365 | | bgt cr0, ->vmeta_comp |
2948 | | // RA is a number. | 3366 | | // RA is a number. |
2949 | | lfd f0, 0(RA) | 3367 | | .FPU lfd f0, 0(RA) |
2950 | | bgt cr1, ->vmeta_comp | 3368 | | bgt cr1, ->vmeta_comp |
2951 | | blt cr1, >4 | 3369 | | blt cr1, >4 |
2952 | | // RA is a number, RD is an integer. | 3370 | | // RA is a number, RD is an integer. |
2953 | | tonum_i f1, CARG3 | 3371 | |.if FPU |
3372 | | tonum_i f1, CARG4 | ||
3373 | |.else | ||
3374 | | bl ->vm_sfi2d_2 | ||
3375 | |.endif | ||
2954 | | b >5 | 3376 | | b >5 |
2955 | | | 3377 | | |
2956 | |8: // RA is an integer, RD is not an integer. | 3378 | |8: // RA is an integer, RD is not an integer. |
2957 | | bgt cr1, ->vmeta_comp | 3379 | | bgt cr1, ->vmeta_comp |
2958 | | // RA is an integer, RD is a number. | 3380 | | // RA is an integer, RD is a number. |
3381 | |.if FPU | ||
2959 | | tonum_i f0, CARG2 | 3382 | | tonum_i f0, CARG2 |
3383 | |.else | ||
3384 | | bl ->vm_sfi2d_1 | ||
3385 | |.endif | ||
2960 | |4: | 3386 | |4: |
2961 | | lfd f1, 0(RD) | 3387 | | .FPU lfd f1, 0(RD) |
2962 | |5: | 3388 | |5: |
3389 | |.if FPU | ||
2963 | | fcmpu cr0, f0, f1 | 3390 | | fcmpu cr0, f0, f1 |
3391 | |.else | ||
3392 | | blex __ledf2 | ||
3393 | | cmpwi CRET1, 0 | ||
3394 | |.endif | ||
2964 | if (op == BC_ISLT) { | 3395 | if (op == BC_ISLT) { |
2965 | | bge <2 | 3396 | | bge <2 |
2966 | } else if (op == BC_ISGE) { | 3397 | } else if (op == BC_ISGE) { |
@@ -3008,42 +3439,42 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3008 | vk = op == BC_ISEQV; | 3439 | vk = op == BC_ISEQV; |
3009 | | // RA = src1*8, RD = src2*8, JMP with RD = target | 3440 | | // RA = src1*8, RD = src2*8, JMP with RD = target |
3010 | |.if DUALNUM | 3441 | |.if DUALNUM |
3011 | | lwzux TMP0, RA, BASE | 3442 | | lwzux CARG1, RA, BASE |
3012 | | addi PC, PC, 4 | 3443 | | addi PC, PC, 4 |
3013 | | lwz CARG2, 4(RA) | 3444 | | lwz CARG2, 4(RA) |
3014 | | lwzux TMP1, RD, BASE | 3445 | | lwzux CARG3, RD, BASE |
3015 | | checknum cr0, TMP0 | 3446 | | checknum cr0, CARG1 |
3016 | | lwz TMP2, -4(PC) | 3447 | | lwz SAVE0, -4(PC) |
3017 | | checknum cr1, TMP1 | 3448 | | checknum cr1, CARG3 |
3018 | | decode_RD4 TMP2, TMP2 | 3449 | | decode_RD4 SAVE0, SAVE0 |
3019 | | lwz CARG3, 4(RD) | 3450 | | lwz CARG4, 4(RD) |
3020 | | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt | 3451 | | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt |
3021 | | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) | 3452 | | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) |
3022 | if (vk) { | 3453 | if (vk) { |
3023 | | ble cr7, ->BC_ISEQN_Z | 3454 | | ble cr7, ->BC_ISEQN_Z |
3024 | } else { | 3455 | } else { |
3025 | | ble cr7, ->BC_ISNEN_Z | 3456 | | ble cr7, ->BC_ISNEN_Z |
3026 | } | 3457 | } |
3027 | |.else | 3458 | |.else |
3028 | | lwzux TMP0, RA, BASE | 3459 | | lwzux CARG1, RA, BASE |
3029 | | lwz TMP2, 0(PC) | 3460 | | lwz SAVE0, 0(PC) |
3030 | | lfd f0, 0(RA) | 3461 | | lfd f0, 0(RA) |
3031 | | addi PC, PC, 4 | 3462 | | addi PC, PC, 4 |
3032 | | lwzux TMP1, RD, BASE | 3463 | | lwzux CARG3, RD, BASE |
3033 | | checknum cr0, TMP0 | 3464 | | checknum cr0, CARG1 |
3034 | | decode_RD4 TMP2, TMP2 | 3465 | | decode_RD4 SAVE0, SAVE0 |
3035 | | lfd f1, 0(RD) | 3466 | | lfd f1, 0(RD) |
3036 | | checknum cr1, TMP1 | 3467 | | checknum cr1, CARG3 |
3037 | | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) | 3468 | | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) |
3038 | | bge cr0, >5 | 3469 | | bge cr0, >5 |
3039 | | bge cr1, >5 | 3470 | | bge cr1, >5 |
3040 | | fcmpu cr0, f0, f1 | 3471 | | fcmpu cr0, f0, f1 |
3041 | if (vk) { | 3472 | if (vk) { |
3042 | | bne >1 | 3473 | | bne >1 |
3043 | | add PC, PC, TMP2 | 3474 | | add PC, PC, SAVE0 |
3044 | } else { | 3475 | } else { |
3045 | | beq >1 | 3476 | | beq >1 |
3046 | | add PC, PC, TMP2 | 3477 | | add PC, PC, SAVE0 |
3047 | } | 3478 | } |
3048 | |1: | 3479 | |1: |
3049 | | ins_next | 3480 | | ins_next |
@@ -3051,36 +3482,36 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3051 | |5: // Either or both types are not numbers. | 3482 | |5: // Either or both types are not numbers. |
3052 | |.if not DUALNUM | 3483 | |.if not DUALNUM |
3053 | | lwz CARG2, 4(RA) | 3484 | | lwz CARG2, 4(RA) |
3054 | | lwz CARG3, 4(RD) | 3485 | | lwz CARG4, 4(RD) |
3055 | |.endif | 3486 | |.endif |
3056 | |.if FFI | 3487 | |.if FFI |
3057 | | cmpwi cr7, TMP0, LJ_TCDATA | 3488 | | cmpwi cr7, CARG1, LJ_TCDATA |
3058 | | cmpwi cr5, TMP1, LJ_TCDATA | 3489 | | cmpwi cr5, CARG3, LJ_TCDATA |
3059 | |.endif | 3490 | |.endif |
3060 | | not TMP3, TMP0 | 3491 | | not TMP2, CARG1 |
3061 | | cmplw TMP0, TMP1 | 3492 | | cmplw CARG1, CARG3 |
3062 | | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive? | 3493 | | cmplwi cr1, TMP2, ~LJ_TISPRI // Primitive? |
3063 | |.if FFI | 3494 | |.if FFI |
3064 | | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq | 3495 | | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq |
3065 | |.endif | 3496 | |.endif |
3066 | | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata? | 3497 | | cmplwi cr6, TMP2, ~LJ_TISTABUD // Table or userdata? |
3067 | |.if FFI | 3498 | |.if FFI |
3068 | | beq cr7, ->vmeta_equal_cd | 3499 | | beq cr7, ->vmeta_equal_cd |
3069 | |.endif | 3500 | |.endif |
3070 | | cmplw cr5, CARG2, CARG3 | 3501 | | cmplw cr5, CARG2, CARG4 |
3071 | | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive. | 3502 | | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive. |
3072 | | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type. | 3503 | | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type. |
3073 | | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv. | 3504 | | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv. |
3074 | | mr SAVE0, PC | 3505 | | mr SAVE1, PC |
3075 | | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2. | 3506 | | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2. |
3076 | | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2. | 3507 | | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2. |
3077 | if (vk) { | 3508 | if (vk) { |
3078 | | bne cr0, >6 | 3509 | | bne cr0, >6 |
3079 | | add PC, PC, TMP2 | 3510 | | add PC, PC, SAVE0 |
3080 | |6: | 3511 | |6: |
3081 | } else { | 3512 | } else { |
3082 | | beq cr0, >6 | 3513 | | beq cr0, >6 |
3083 | | add PC, PC, TMP2 | 3514 | | add PC, PC, SAVE0 |
3084 | |6: | 3515 | |6: |
3085 | } | 3516 | } |
3086 | |.if DUALNUM | 3517 | |.if DUALNUM |
@@ -3095,6 +3526,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3095 | | | 3526 | | |
3096 | | // Different tables or userdatas. Need to check __eq metamethod. | 3527 | | // Different tables or userdatas. Need to check __eq metamethod. |
3097 | | // Field metatable must be at same offset for GCtab and GCudata! | 3528 | | // Field metatable must be at same offset for GCtab and GCudata! |
3529 | | mr CARG3, CARG4 | ||
3098 | | lwz TAB:TMP2, TAB:CARG2->metatable | 3530 | | lwz TAB:TMP2, TAB:CARG2->metatable |
3099 | | li CARG4, 1-vk // ne = 0 or 1. | 3531 | | li CARG4, 1-vk // ne = 0 or 1. |
3100 | | cmplwi TAB:TMP2, 0 | 3532 | | cmplwi TAB:TMP2, 0 |
@@ -3102,7 +3534,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3102 | | lbz TMP2, TAB:TMP2->nomm | 3534 | | lbz TMP2, TAB:TMP2->nomm |
3103 | | andix. TMP2, TMP2, 1<<MM_eq | 3535 | | andix. TMP2, TMP2, 1<<MM_eq |
3104 | | bne <1 // Or 'no __eq' flag set? | 3536 | | bne <1 // Or 'no __eq' flag set? |
3105 | | mr PC, SAVE0 // Restore old PC. | 3537 | | mr PC, SAVE1 // Restore old PC. |
3106 | | b ->vmeta_equal // Handle __eq metamethod. | 3538 | | b ->vmeta_equal // Handle __eq metamethod. |
3107 | break; | 3539 | break; |
3108 | 3540 | ||
@@ -3143,16 +3575,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3143 | vk = op == BC_ISEQN; | 3575 | vk = op == BC_ISEQN; |
3144 | | // RA = src*8, RD = num_const*8, JMP with RD = target | 3576 | | // RA = src*8, RD = num_const*8, JMP with RD = target |
3145 | |.if DUALNUM | 3577 | |.if DUALNUM |
3146 | | lwzux TMP0, RA, BASE | 3578 | | lwzux CARG1, RA, BASE |
3147 | | addi PC, PC, 4 | 3579 | | addi PC, PC, 4 |
3148 | | lwz CARG2, 4(RA) | 3580 | | lwz CARG2, 4(RA) |
3149 | | lwzux TMP1, RD, KBASE | 3581 | | lwzux CARG3, RD, KBASE |
3150 | | checknum cr0, TMP0 | 3582 | | checknum cr0, CARG1 |
3151 | | lwz TMP2, -4(PC) | 3583 | | lwz SAVE0, -4(PC) |
3152 | | checknum cr1, TMP1 | 3584 | | checknum cr1, CARG3 |
3153 | | decode_RD4 TMP2, TMP2 | 3585 | | decode_RD4 SAVE0, SAVE0 |
3154 | | lwz CARG3, 4(RD) | 3586 | | lwz CARG4, 4(RD) |
3155 | | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) | 3587 | | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) |
3156 | if (vk) { | 3588 | if (vk) { |
3157 | |->BC_ISEQN_Z: | 3589 | |->BC_ISEQN_Z: |
3158 | } else { | 3590 | } else { |
@@ -3160,7 +3592,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3160 | } | 3592 | } |
3161 | | bne cr0, >7 | 3593 | | bne cr0, >7 |
3162 | | bne cr1, >8 | 3594 | | bne cr1, >8 |
3163 | | cmpw CARG2, CARG3 | 3595 | | cmpw CARG2, CARG4 |
3164 | |4: | 3596 | |4: |
3165 | |.else | 3597 | |.else |
3166 | if (vk) { | 3598 | if (vk) { |
@@ -3168,20 +3600,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3168 | } else { | 3600 | } else { |
3169 | |->BC_ISNEN_Z: // Dummy label. | 3601 | |->BC_ISNEN_Z: // Dummy label. |
3170 | } | 3602 | } |
3171 | | lwzx TMP0, BASE, RA | 3603 | | lwzx CARG1, BASE, RA |
3172 | | addi PC, PC, 4 | 3604 | | addi PC, PC, 4 |
3173 | | lfdx f0, BASE, RA | 3605 | | lfdx f0, BASE, RA |
3174 | | lwz TMP2, -4(PC) | 3606 | | lwz SAVE0, -4(PC) |
3175 | | lfdx f1, KBASE, RD | 3607 | | lfdx f1, KBASE, RD |
3176 | | decode_RD4 TMP2, TMP2 | 3608 | | decode_RD4 SAVE0, SAVE0 |
3177 | | checknum TMP0 | 3609 | | checknum CARG1 |
3178 | | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) | 3610 | | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) |
3179 | | bge >3 | 3611 | | bge >3 |
3180 | | fcmpu cr0, f0, f1 | 3612 | | fcmpu cr0, f0, f1 |
3181 | |.endif | 3613 | |.endif |
3182 | if (vk) { | 3614 | if (vk) { |
3183 | | bne >1 | 3615 | | bne >1 |
3184 | | add PC, PC, TMP2 | 3616 | | add PC, PC, SAVE0 |
3185 | |1: | 3617 | |1: |
3186 | |.if not FFI | 3618 | |.if not FFI |
3187 | |3: | 3619 | |3: |
@@ -3192,13 +3624,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3192 | |.if not FFI | 3624 | |.if not FFI |
3193 | |3: | 3625 | |3: |
3194 | |.endif | 3626 | |.endif |
3195 | | add PC, PC, TMP2 | 3627 | | add PC, PC, SAVE0 |
3196 | |2: | 3628 | |2: |
3197 | } | 3629 | } |
3198 | | ins_next | 3630 | | ins_next |
3199 | |.if FFI | 3631 | |.if FFI |
3200 | |3: | 3632 | |3: |
3201 | | cmpwi TMP0, LJ_TCDATA | 3633 | | cmpwi CARG1, LJ_TCDATA |
3202 | | beq ->vmeta_equal_cd | 3634 | | beq ->vmeta_equal_cd |
3203 | | b <1 | 3635 | | b <1 |
3204 | |.endif | 3636 | |.endif |
@@ -3206,18 +3638,31 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3206 | |7: // RA is not an integer. | 3638 | |7: // RA is not an integer. |
3207 | | bge cr0, <3 | 3639 | | bge cr0, <3 |
3208 | | // RA is a number. | 3640 | | // RA is a number. |
3209 | | lfd f0, 0(RA) | 3641 | | .FPU lfd f0, 0(RA) |
3210 | | blt cr1, >1 | 3642 | | blt cr1, >1 |
3211 | | // RA is a number, RD is an integer. | 3643 | | // RA is a number, RD is an integer. |
3212 | | tonum_i f1, CARG3 | 3644 | |.if FPU |
3645 | | tonum_i f1, CARG4 | ||
3646 | |.else | ||
3647 | | bl ->vm_sfi2d_2 | ||
3648 | |.endif | ||
3213 | | b >2 | 3649 | | b >2 |
3214 | | | 3650 | | |
3215 | |8: // RA is an integer, RD is a number. | 3651 | |8: // RA is an integer, RD is a number. |
3652 | |.if FPU | ||
3216 | | tonum_i f0, CARG2 | 3653 | | tonum_i f0, CARG2 |
3654 | |.else | ||
3655 | | bl ->vm_sfi2d_1 | ||
3656 | |.endif | ||
3217 | |1: | 3657 | |1: |
3218 | | lfd f1, 0(RD) | 3658 | | .FPU lfd f1, 0(RD) |
3219 | |2: | 3659 | |2: |
3660 | |.if FPU | ||
3220 | | fcmpu cr0, f0, f1 | 3661 | | fcmpu cr0, f0, f1 |
3662 | |.else | ||
3663 | | blex __ledf2 | ||
3664 | | cmpwi CRET1, 0 | ||
3665 | |.endif | ||
3221 | | b <4 | 3666 | | b <4 |
3222 | |.endif | 3667 | |.endif |
3223 | break; | 3668 | break; |
@@ -3272,7 +3717,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3272 | | add PC, PC, TMP2 | 3717 | | add PC, PC, TMP2 |
3273 | } else { | 3718 | } else { |
3274 | | li TMP1, LJ_TFALSE | 3719 | | li TMP1, LJ_TFALSE |
3720 | |.if FPU | ||
3275 | | lfdx f0, BASE, RD | 3721 | | lfdx f0, BASE, RD |
3722 | |.else | ||
3723 | | lwzux CARG1, RD, BASE | ||
3724 | | lwz CARG2, 4(RD) | ||
3725 | |.endif | ||
3276 | | cmplw TMP0, TMP1 | 3726 | | cmplw TMP0, TMP1 |
3277 | if (op == BC_ISTC) { | 3727 | if (op == BC_ISTC) { |
3278 | | bge >1 | 3728 | | bge >1 |
@@ -3281,20 +3731,55 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3281 | } | 3731 | } |
3282 | | addis PC, PC, -(BCBIAS_J*4 >> 16) | 3732 | | addis PC, PC, -(BCBIAS_J*4 >> 16) |
3283 | | decode_RD4 TMP2, INS | 3733 | | decode_RD4 TMP2, INS |
3734 | |.if FPU | ||
3284 | | stfdx f0, BASE, RA | 3735 | | stfdx f0, BASE, RA |
3736 | |.else | ||
3737 | | stwux CARG1, RA, BASE | ||
3738 | | stw CARG2, 4(RA) | ||
3739 | |.endif | ||
3285 | | add PC, PC, TMP2 | 3740 | | add PC, PC, TMP2 |
3286 | |1: | 3741 | |1: |
3287 | } | 3742 | } |
3288 | | ins_next | 3743 | | ins_next |
3289 | break; | 3744 | break; |
3290 | 3745 | ||
3746 | case BC_ISTYPE: | ||
3747 | | // RA = src*8, RD = -type*8 | ||
3748 | | lwzx TMP0, BASE, RA | ||
3749 | | srwi TMP1, RD, 3 | ||
3750 | | ins_next1 | ||
3751 | |.if not PPE and not GPR64 | ||
3752 | | add. TMP0, TMP0, TMP1 | ||
3753 | |.else | ||
3754 | | neg TMP1, TMP1 | ||
3755 | | cmpw TMP0, TMP1 | ||
3756 | |.endif | ||
3757 | | bne ->vmeta_istype | ||
3758 | | ins_next2 | ||
3759 | break; | ||
3760 | case BC_ISNUM: | ||
3761 | | // RA = src*8, RD = -(TISNUM-1)*8 | ||
3762 | | lwzx TMP0, BASE, RA | ||
3763 | | ins_next1 | ||
3764 | | checknum TMP0 | ||
3765 | | bge ->vmeta_istype | ||
3766 | | ins_next2 | ||
3767 | break; | ||
3768 | |||
3291 | /* -- Unary ops --------------------------------------------------------- */ | 3769 | /* -- Unary ops --------------------------------------------------------- */ |
3292 | 3770 | ||
3293 | case BC_MOV: | 3771 | case BC_MOV: |
3294 | | // RA = dst*8, RD = src*8 | 3772 | | // RA = dst*8, RD = src*8 |
3295 | | ins_next1 | 3773 | | ins_next1 |
3774 | |.if FPU | ||
3296 | | lfdx f0, BASE, RD | 3775 | | lfdx f0, BASE, RD |
3297 | | stfdx f0, BASE, RA | 3776 | | stfdx f0, BASE, RA |
3777 | |.else | ||
3778 | | lwzux TMP0, RD, BASE | ||
3779 | | lwz TMP1, 4(RD) | ||
3780 | | stwux TMP0, RA, BASE | ||
3781 | | stw TMP1, 4(RA) | ||
3782 | |.endif | ||
3298 | | ins_next2 | 3783 | | ins_next2 |
3299 | break; | 3784 | break; |
3300 | case BC_NOT: | 3785 | case BC_NOT: |
@@ -3396,44 +3881,65 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3396 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); | 3881 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); |
3397 | ||switch (vk) { | 3882 | ||switch (vk) { |
3398 | ||case 0: | 3883 | ||case 0: |
3399 | | lwzx TMP1, BASE, RB | 3884 | | lwzx CARG1, BASE, RB |
3400 | | .if DUALNUM | 3885 | | .if DUALNUM |
3401 | | lwzx TMP2, KBASE, RC | 3886 | | lwzx CARG3, KBASE, RC |
3402 | | .endif | 3887 | | .endif |
3888 | | .if FPU | ||
3403 | | lfdx f14, BASE, RB | 3889 | | lfdx f14, BASE, RB |
3404 | | lfdx f15, KBASE, RC | 3890 | | lfdx f15, KBASE, RC |
3891 | | .else | ||
3892 | | add TMP1, BASE, RB | ||
3893 | | add TMP2, KBASE, RC | ||
3894 | | lwz CARG2, 4(TMP1) | ||
3895 | | lwz CARG4, 4(TMP2) | ||
3896 | | .endif | ||
3405 | | .if DUALNUM | 3897 | | .if DUALNUM |
3406 | | checknum cr0, TMP1 | 3898 | | checknum cr0, CARG1 |
3407 | | checknum cr1, TMP2 | 3899 | | checknum cr1, CARG3 |
3408 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | 3900 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt |
3409 | | bge ->vmeta_arith_vn | 3901 | | bge ->vmeta_arith_vn |
3410 | | .else | 3902 | | .else |
3411 | | checknum TMP1; bge ->vmeta_arith_vn | 3903 | | checknum CARG1; bge ->vmeta_arith_vn |
3412 | | .endif | 3904 | | .endif |
3413 | || break; | 3905 | || break; |
3414 | ||case 1: | 3906 | ||case 1: |
3415 | | lwzx TMP1, BASE, RB | 3907 | | lwzx CARG1, BASE, RB |
3416 | | .if DUALNUM | 3908 | | .if DUALNUM |
3417 | | lwzx TMP2, KBASE, RC | 3909 | | lwzx CARG3, KBASE, RC |
3418 | | .endif | 3910 | | .endif |
3911 | | .if FPU | ||
3419 | | lfdx f15, BASE, RB | 3912 | | lfdx f15, BASE, RB |
3420 | | lfdx f14, KBASE, RC | 3913 | | lfdx f14, KBASE, RC |
3914 | | .else | ||
3915 | | add TMP1, BASE, RB | ||
3916 | | add TMP2, KBASE, RC | ||
3917 | | lwz CARG2, 4(TMP1) | ||
3918 | | lwz CARG4, 4(TMP2) | ||
3919 | | .endif | ||
3421 | | .if DUALNUM | 3920 | | .if DUALNUM |
3422 | | checknum cr0, TMP1 | 3921 | | checknum cr0, CARG1 |
3423 | | checknum cr1, TMP2 | 3922 | | checknum cr1, CARG3 |
3424 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | 3923 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt |
3425 | | bge ->vmeta_arith_nv | 3924 | | bge ->vmeta_arith_nv |
3426 | | .else | 3925 | | .else |
3427 | | checknum TMP1; bge ->vmeta_arith_nv | 3926 | | checknum CARG1; bge ->vmeta_arith_nv |
3428 | | .endif | 3927 | | .endif |
3429 | || break; | 3928 | || break; |
3430 | ||default: | 3929 | ||default: |
3431 | | lwzx TMP1, BASE, RB | 3930 | | lwzx CARG1, BASE, RB |
3432 | | lwzx TMP2, BASE, RC | 3931 | | lwzx CARG3, BASE, RC |
3932 | | .if FPU | ||
3433 | | lfdx f14, BASE, RB | 3933 | | lfdx f14, BASE, RB |
3434 | | lfdx f15, BASE, RC | 3934 | | lfdx f15, BASE, RC |
3435 | | checknum cr0, TMP1 | 3935 | | .else |
3436 | | checknum cr1, TMP2 | 3936 | | add TMP1, BASE, RB |
3937 | | add TMP2, BASE, RC | ||
3938 | | lwz CARG2, 4(TMP1) | ||
3939 | | lwz CARG4, 4(TMP2) | ||
3940 | | .endif | ||
3941 | | checknum cr0, CARG1 | ||
3942 | | checknum cr1, CARG3 | ||
3437 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | 3943 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt |
3438 | | bge ->vmeta_arith_vv | 3944 | | bge ->vmeta_arith_vv |
3439 | || break; | 3945 | || break; |
@@ -3467,48 +3973,78 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3467 | | fsub a, b, a // b - floor(b/c)*c | 3973 | | fsub a, b, a // b - floor(b/c)*c |
3468 | |.endmacro | 3974 | |.endmacro |
3469 | | | 3975 | | |
3976 | |.macro sfpmod | ||
3977 | |->BC_MODVN_Z: | ||
3978 | | stw CARG1, SFSAVE_1 | ||
3979 | | stw CARG2, SFSAVE_2 | ||
3980 | | mr SAVE0, CARG3 | ||
3981 | | mr SAVE1, CARG4 | ||
3982 | | blex __divdf3 | ||
3983 | | blex floor | ||
3984 | | mr CARG3, SAVE0 | ||
3985 | | mr CARG4, SAVE1 | ||
3986 | | blex __muldf3 | ||
3987 | | mr CARG3, CRET1 | ||
3988 | | mr CARG4, CRET2 | ||
3989 | | lwz CARG1, SFSAVE_1 | ||
3990 | | lwz CARG2, SFSAVE_2 | ||
3991 | | blex __subdf3 | ||
3992 | |.endmacro | ||
3993 | | | ||
3470 | |.macro ins_arithfp, fpins | 3994 | |.macro ins_arithfp, fpins |
3471 | | ins_arithpre | 3995 | | ins_arithpre |
3472 | |.if "fpins" == "fpmod_" | 3996 | |.if "fpins" == "fpmod_" |
3473 | | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. | 3997 | | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. |
3474 | |.else | 3998 | |.elif FPU |
3475 | | fpins f0, f14, f15 | 3999 | | fpins f0, f14, f15 |
3476 | | ins_next1 | 4000 | | ins_next1 |
3477 | | stfdx f0, BASE, RA | 4001 | | stfdx f0, BASE, RA |
3478 | | ins_next2 | 4002 | | ins_next2 |
4003 | |.else | ||
4004 | | blex __divdf3 // Only soft-float div uses this macro. | ||
4005 | | ins_next1 | ||
4006 | | stwux CRET1, RA, BASE | ||
4007 | | stw CRET2, 4(RA) | ||
4008 | | ins_next2 | ||
3479 | |.endif | 4009 | |.endif |
3480 | |.endmacro | 4010 | |.endmacro |
3481 | | | 4011 | | |
3482 | |.macro ins_arithdn, intins, fpins | 4012 | |.macro ins_arithdn, intins, fpins, fpcall |
3483 | | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 | 4013 | | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 |
3484 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); | 4014 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); |
3485 | ||switch (vk) { | 4015 | ||switch (vk) { |
3486 | ||case 0: | 4016 | ||case 0: |
3487 | | lwzux TMP1, RB, BASE | 4017 | | lwzux CARG1, RB, BASE |
3488 | | lwzux TMP2, RC, KBASE | 4018 | | lwzux CARG3, RC, KBASE |
3489 | | lwz CARG1, 4(RB) | 4019 | | lwz CARG2, 4(RB) |
3490 | | checknum cr0, TMP1 | 4020 | | checknum cr0, CARG1 |
3491 | | lwz CARG2, 4(RC) | 4021 | | lwz CARG4, 4(RC) |
4022 | | checknum cr1, CARG3 | ||
3492 | || break; | 4023 | || break; |
3493 | ||case 1: | 4024 | ||case 1: |
3494 | | lwzux TMP1, RB, BASE | 4025 | | lwzux CARG3, RB, BASE |
3495 | | lwzux TMP2, RC, KBASE | 4026 | | lwzux CARG1, RC, KBASE |
3496 | | lwz CARG2, 4(RB) | 4027 | | lwz CARG4, 4(RB) |
3497 | | checknum cr0, TMP1 | 4028 | | checknum cr0, CARG3 |
3498 | | lwz CARG1, 4(RC) | 4029 | | lwz CARG2, 4(RC) |
4030 | | checknum cr1, CARG1 | ||
3499 | || break; | 4031 | || break; |
3500 | ||default: | 4032 | ||default: |
3501 | | lwzux TMP1, RB, BASE | 4033 | | lwzux CARG1, RB, BASE |
3502 | | lwzux TMP2, RC, BASE | 4034 | | lwzux CARG3, RC, BASE |
3503 | | lwz CARG1, 4(RB) | 4035 | | lwz CARG2, 4(RB) |
3504 | | checknum cr0, TMP1 | 4036 | | checknum cr0, CARG1 |
3505 | | lwz CARG2, 4(RC) | 4037 | | lwz CARG4, 4(RC) |
4038 | | checknum cr1, CARG3 | ||
3506 | || break; | 4039 | || break; |
3507 | ||} | 4040 | ||} |
3508 | | checknum cr1, TMP2 | ||
3509 | | bne >5 | 4041 | | bne >5 |
3510 | | bne cr1, >5 | 4042 | | bne cr1, >5 |
3511 | | intins CARG1, CARG1, CARG2 | 4043 | |.if "intins" == "intmod" |
4044 | | mr CARG1, CARG2 | ||
4045 | | mr CARG2, CARG4 | ||
4046 | |.endif | ||
4047 | | intins CARG1, CARG2, CARG4 | ||
3512 | | bso >4 | 4048 | | bso >4 |
3513 | |1: | 4049 | |1: |
3514 | | ins_next1 | 4050 | | ins_next1 |
@@ -3520,29 +4056,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3520 | | checkov TMP0, <1 // Ignore unrelated overflow. | 4056 | | checkov TMP0, <1 // Ignore unrelated overflow. |
3521 | | ins_arithfallback b | 4057 | | ins_arithfallback b |
3522 | |5: // FP variant. | 4058 | |5: // FP variant. |
4059 | |.if FPU | ||
3523 | ||if (vk == 1) { | 4060 | ||if (vk == 1) { |
3524 | | lfd f15, 0(RB) | 4061 | | lfd f15, 0(RB) |
3525 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | ||
3526 | | lfd f14, 0(RC) | 4062 | | lfd f14, 0(RC) |
3527 | ||} else { | 4063 | ||} else { |
3528 | | lfd f14, 0(RB) | 4064 | | lfd f14, 0(RB) |
3529 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | ||
3530 | | lfd f15, 0(RC) | 4065 | | lfd f15, 0(RC) |
3531 | ||} | 4066 | ||} |
4067 | |.endif | ||
4068 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | ||
3532 | | ins_arithfallback bge | 4069 | | ins_arithfallback bge |
3533 | |.if "fpins" == "fpmod_" | 4070 | |.if "fpins" == "fpmod_" |
3534 | | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. | 4071 | | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. |
3535 | |.else | 4072 | |.else |
4073 | |.if FPU | ||
3536 | | fpins f0, f14, f15 | 4074 | | fpins f0, f14, f15 |
3537 | | ins_next1 | ||
3538 | | stfdx f0, BASE, RA | 4075 | | stfdx f0, BASE, RA |
4076 | |.else | ||
4077 | |.if "fpcall" == "sfpmod" | ||
4078 | | sfpmod | ||
4079 | |.else | ||
4080 | | blex fpcall | ||
4081 | |.endif | ||
4082 | | stwux CRET1, RA, BASE | ||
4083 | | stw CRET2, 4(RA) | ||
4084 | |.endif | ||
4085 | | ins_next1 | ||
3539 | | b <2 | 4086 | | b <2 |
3540 | |.endif | 4087 | |.endif |
3541 | |.endmacro | 4088 | |.endmacro |
3542 | | | 4089 | | |
3543 | |.macro ins_arith, intins, fpins | 4090 | |.macro ins_arith, intins, fpins, fpcall |
3544 | |.if DUALNUM | 4091 | |.if DUALNUM |
3545 | | ins_arithdn intins, fpins | 4092 | | ins_arithdn intins, fpins, fpcall |
3546 | |.else | 4093 | |.else |
3547 | | ins_arithfp fpins | 4094 | | ins_arithfp fpins |
3548 | |.endif | 4095 | |.endif |
@@ -3557,9 +4104,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3557 | | addo. TMP0, TMP0, TMP3 | 4104 | | addo. TMP0, TMP0, TMP3 |
3558 | | add y, a, b | 4105 | | add y, a, b |
3559 | |.endmacro | 4106 | |.endmacro |
3560 | | ins_arith addo32., fadd | 4107 | | ins_arith addo32., fadd, __adddf3 |
3561 | |.else | 4108 | |.else |
3562 | | ins_arith addo., fadd | 4109 | | ins_arith addo., fadd, __adddf3 |
3563 | |.endif | 4110 | |.endif |
3564 | break; | 4111 | break; |
3565 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: | 4112 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: |
@@ -3571,36 +4118,48 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3571 | | subo. TMP0, TMP0, TMP3 | 4118 | | subo. TMP0, TMP0, TMP3 |
3572 | | sub y, a, b | 4119 | | sub y, a, b |
3573 | |.endmacro | 4120 | |.endmacro |
3574 | | ins_arith subo32., fsub | 4121 | | ins_arith subo32., fsub, __subdf3 |
3575 | |.else | 4122 | |.else |
3576 | | ins_arith subo., fsub | 4123 | | ins_arith subo., fsub, __subdf3 |
3577 | |.endif | 4124 | |.endif |
3578 | break; | 4125 | break; |
3579 | case BC_MULVN: case BC_MULNV: case BC_MULVV: | 4126 | case BC_MULVN: case BC_MULNV: case BC_MULVV: |
3580 | | ins_arith mullwo., fmul | 4127 | | ins_arith mullwo., fmul, __muldf3 |
3581 | break; | 4128 | break; |
3582 | case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: | 4129 | case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: |
3583 | | ins_arithfp fdiv | 4130 | | ins_arithfp fdiv |
3584 | break; | 4131 | break; |
3585 | case BC_MODVN: | 4132 | case BC_MODVN: |
3586 | | ins_arith intmod, fpmod | 4133 | | ins_arith intmod, fpmod, sfpmod |
3587 | break; | 4134 | break; |
3588 | case BC_MODNV: case BC_MODVV: | 4135 | case BC_MODNV: case BC_MODVV: |
3589 | | ins_arith intmod, fpmod_ | 4136 | | ins_arith intmod, fpmod_, sfpmod |
3590 | break; | 4137 | break; |
3591 | case BC_POW: | 4138 | case BC_POW: |
3592 | | // NYI: (partial) integer arithmetic. | 4139 | | // NYI: (partial) integer arithmetic. |
3593 | | lwzx TMP1, BASE, RB | 4140 | | lwzx CARG1, BASE, RB |
4141 | | lwzx CARG3, BASE, RC | ||
4142 | |.if FPU | ||
3594 | | lfdx FARG1, BASE, RB | 4143 | | lfdx FARG1, BASE, RB |
3595 | | lwzx TMP2, BASE, RC | ||
3596 | | lfdx FARG2, BASE, RC | 4144 | | lfdx FARG2, BASE, RC |
3597 | | checknum cr0, TMP1 | 4145 | |.else |
3598 | | checknum cr1, TMP2 | 4146 | | add TMP1, BASE, RB |
4147 | | add TMP2, BASE, RC | ||
4148 | | lwz CARG2, 4(TMP1) | ||
4149 | | lwz CARG4, 4(TMP2) | ||
4150 | |.endif | ||
4151 | | checknum cr0, CARG1 | ||
4152 | | checknum cr1, CARG3 | ||
3599 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | 4153 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt |
3600 | | bge ->vmeta_arith_vv | 4154 | | bge ->vmeta_arith_vv |
3601 | | blex pow | 4155 | | blex pow |
3602 | | ins_next1 | 4156 | | ins_next1 |
4157 | |.if FPU | ||
3603 | | stfdx FARG1, BASE, RA | 4158 | | stfdx FARG1, BASE, RA |
4159 | |.else | ||
4160 | | stwux CARG1, RA, BASE | ||
4161 | | stw CARG2, 4(RA) | ||
4162 | |.endif | ||
3604 | | ins_next2 | 4163 | | ins_next2 |
3605 | break; | 4164 | break; |
3606 | 4165 | ||
@@ -3620,8 +4179,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3620 | | lp BASE, L->base | 4179 | | lp BASE, L->base |
3621 | | bne ->vmeta_binop | 4180 | | bne ->vmeta_binop |
3622 | | ins_next1 | 4181 | | ins_next1 |
4182 | |.if FPU | ||
3623 | | lfdx f0, BASE, SAVE0 // Copy result from RB to RA. | 4183 | | lfdx f0, BASE, SAVE0 // Copy result from RB to RA. |
3624 | | stfdx f0, BASE, RA | 4184 | | stfdx f0, BASE, RA |
4185 | |.else | ||
4186 | | lwzux TMP0, SAVE0, BASE | ||
4187 | | lwz TMP1, 4(SAVE0) | ||
4188 | | stwux TMP0, RA, BASE | ||
4189 | | stw TMP1, 4(RA) | ||
4190 | |.endif | ||
3625 | | ins_next2 | 4191 | | ins_next2 |
3626 | break; | 4192 | break; |
3627 | 4193 | ||
@@ -3684,8 +4250,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3684 | case BC_KNUM: | 4250 | case BC_KNUM: |
3685 | | // RA = dst*8, RD = num_const*8 | 4251 | | // RA = dst*8, RD = num_const*8 |
3686 | | ins_next1 | 4252 | | ins_next1 |
4253 | |.if FPU | ||
3687 | | lfdx f0, KBASE, RD | 4254 | | lfdx f0, KBASE, RD |
3688 | | stfdx f0, BASE, RA | 4255 | | stfdx f0, BASE, RA |
4256 | |.else | ||
4257 | | lwzux TMP0, RD, KBASE | ||
4258 | | lwz TMP1, 4(RD) | ||
4259 | | stwux TMP0, RA, BASE | ||
4260 | | stw TMP1, 4(RA) | ||
4261 | |.endif | ||
3689 | | ins_next2 | 4262 | | ins_next2 |
3690 | break; | 4263 | break; |
3691 | case BC_KPRI: | 4264 | case BC_KPRI: |
@@ -3718,8 +4291,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3718 | | lwzx UPVAL:RB, LFUNC:RB, RD | 4291 | | lwzx UPVAL:RB, LFUNC:RB, RD |
3719 | | ins_next1 | 4292 | | ins_next1 |
3720 | | lwz TMP1, UPVAL:RB->v | 4293 | | lwz TMP1, UPVAL:RB->v |
4294 | |.if FPU | ||
3721 | | lfd f0, 0(TMP1) | 4295 | | lfd f0, 0(TMP1) |
3722 | | stfdx f0, BASE, RA | 4296 | | stfdx f0, BASE, RA |
4297 | |.else | ||
4298 | | lwz TMP2, 0(TMP1) | ||
4299 | | lwz TMP3, 4(TMP1) | ||
4300 | | stwux TMP2, RA, BASE | ||
4301 | | stw TMP3, 4(RA) | ||
4302 | |.endif | ||
3723 | | ins_next2 | 4303 | | ins_next2 |
3724 | break; | 4304 | break; |
3725 | case BC_USETV: | 4305 | case BC_USETV: |
@@ -3727,14 +4307,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3727 | | lwz LFUNC:RB, FRAME_FUNC(BASE) | 4307 | | lwz LFUNC:RB, FRAME_FUNC(BASE) |
3728 | | srwi RA, RA, 1 | 4308 | | srwi RA, RA, 1 |
3729 | | addi RA, RA, offsetof(GCfuncL, uvptr) | 4309 | | addi RA, RA, offsetof(GCfuncL, uvptr) |
4310 | |.if FPU | ||
3730 | | lfdux f0, RD, BASE | 4311 | | lfdux f0, RD, BASE |
4312 | |.else | ||
4313 | | lwzux CARG1, RD, BASE | ||
4314 | | lwz CARG3, 4(RD) | ||
4315 | |.endif | ||
3731 | | lwzx UPVAL:RB, LFUNC:RB, RA | 4316 | | lwzx UPVAL:RB, LFUNC:RB, RA |
3732 | | lbz TMP3, UPVAL:RB->marked | 4317 | | lbz TMP3, UPVAL:RB->marked |
3733 | | lwz CARG2, UPVAL:RB->v | 4318 | | lwz CARG2, UPVAL:RB->v |
3734 | | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) | 4319 | | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) |
3735 | | lbz TMP0, UPVAL:RB->closed | 4320 | | lbz TMP0, UPVAL:RB->closed |
3736 | | lwz TMP2, 0(RD) | 4321 | | lwz TMP2, 0(RD) |
4322 | |.if FPU | ||
3737 | | stfd f0, 0(CARG2) | 4323 | | stfd f0, 0(CARG2) |
4324 | |.else | ||
4325 | | stw CARG1, 0(CARG2) | ||
4326 | | stw CARG3, 4(CARG2) | ||
4327 | |.endif | ||
3738 | | cmplwi cr1, TMP0, 0 | 4328 | | cmplwi cr1, TMP0, 0 |
3739 | | lwz TMP1, 4(RD) | 4329 | | lwz TMP1, 4(RD) |
3740 | | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq | 4330 | | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq |
@@ -3790,11 +4380,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3790 | | lwz LFUNC:RB, FRAME_FUNC(BASE) | 4380 | | lwz LFUNC:RB, FRAME_FUNC(BASE) |
3791 | | srwi RA, RA, 1 | 4381 | | srwi RA, RA, 1 |
3792 | | addi RA, RA, offsetof(GCfuncL, uvptr) | 4382 | | addi RA, RA, offsetof(GCfuncL, uvptr) |
4383 | |.if FPU | ||
3793 | | lfdx f0, KBASE, RD | 4384 | | lfdx f0, KBASE, RD |
4385 | |.else | ||
4386 | | lwzux TMP2, RD, KBASE | ||
4387 | | lwz TMP3, 4(RD) | ||
4388 | |.endif | ||
3794 | | lwzx UPVAL:RB, LFUNC:RB, RA | 4389 | | lwzx UPVAL:RB, LFUNC:RB, RA |
3795 | | ins_next1 | 4390 | | ins_next1 |
3796 | | lwz TMP1, UPVAL:RB->v | 4391 | | lwz TMP1, UPVAL:RB->v |
4392 | |.if FPU | ||
3797 | | stfd f0, 0(TMP1) | 4393 | | stfd f0, 0(TMP1) |
4394 | |.else | ||
4395 | | stw TMP2, 0(TMP1) | ||
4396 | | stw TMP3, 4(TMP1) | ||
4397 | |.endif | ||
3798 | | ins_next2 | 4398 | | ins_next2 |
3799 | break; | 4399 | break; |
3800 | case BC_USETP: | 4400 | case BC_USETP: |
@@ -3942,11 +4542,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3942 | |.endif | 4542 | |.endif |
3943 | | ble ->vmeta_tgetv // Integer key and in array part? | 4543 | | ble ->vmeta_tgetv // Integer key and in array part? |
3944 | | lwzx TMP0, TMP1, TMP2 | 4544 | | lwzx TMP0, TMP1, TMP2 |
4545 | |.if FPU | ||
3945 | | lfdx f14, TMP1, TMP2 | 4546 | | lfdx f14, TMP1, TMP2 |
4547 | |.else | ||
4548 | | lwzux SAVE0, TMP1, TMP2 | ||
4549 | | lwz SAVE1, 4(TMP1) | ||
4550 | |.endif | ||
3946 | | checknil TMP0; beq >2 | 4551 | | checknil TMP0; beq >2 |
3947 | |1: | 4552 | |1: |
3948 | | ins_next1 | 4553 | | ins_next1 |
4554 | |.if FPU | ||
3949 | | stfdx f14, BASE, RA | 4555 | | stfdx f14, BASE, RA |
4556 | |.else | ||
4557 | | stwux SAVE0, RA, BASE | ||
4558 | | stw SAVE1, 4(RA) | ||
4559 | |.endif | ||
3950 | | ins_next2 | 4560 | | ins_next2 |
3951 | | | 4561 | | |
3952 | |2: // Check for __index if table value is nil. | 4562 | |2: // Check for __index if table value is nil. |
@@ -4022,12 +4632,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4022 | | lwz TMP1, TAB:RB->asize | 4632 | | lwz TMP1, TAB:RB->asize |
4023 | | lwz TMP2, TAB:RB->array | 4633 | | lwz TMP2, TAB:RB->array |
4024 | | cmplw TMP0, TMP1; bge ->vmeta_tgetb | 4634 | | cmplw TMP0, TMP1; bge ->vmeta_tgetb |
4635 | |.if FPU | ||
4025 | | lwzx TMP1, TMP2, RC | 4636 | | lwzx TMP1, TMP2, RC |
4026 | | lfdx f0, TMP2, RC | 4637 | | lfdx f0, TMP2, RC |
4638 | |.else | ||
4639 | | lwzux TMP1, TMP2, RC | ||
4640 | | lwz TMP3, 4(TMP2) | ||
4641 | |.endif | ||
4027 | | checknil TMP1; beq >5 | 4642 | | checknil TMP1; beq >5 |
4028 | |1: | 4643 | |1: |
4029 | | ins_next1 | 4644 | | ins_next1 |
4645 | |.if FPU | ||
4030 | | stfdx f0, BASE, RA | 4646 | | stfdx f0, BASE, RA |
4647 | |.else | ||
4648 | | stwux TMP1, RA, BASE | ||
4649 | | stw TMP3, 4(RA) | ||
4650 | |.endif | ||
4031 | | ins_next2 | 4651 | | ins_next2 |
4032 | | | 4652 | | |
4033 | |5: // Check for __index if table value is nil. | 4653 | |5: // Check for __index if table value is nil. |
@@ -4039,6 +4659,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4039 | | bne <1 // 'no __index' flag set: done. | 4659 | | bne <1 // 'no __index' flag set: done. |
4040 | | b ->vmeta_tgetb // Caveat: preserve TMP0! | 4660 | | b ->vmeta_tgetb // Caveat: preserve TMP0! |
4041 | break; | 4661 | break; |
4662 | case BC_TGETR: | ||
4663 | | // RA = dst*8, RB = table*8, RC = key*8 | ||
4664 | | add RB, BASE, RB | ||
4665 | | lwz TAB:CARG1, 4(RB) | ||
4666 | |.if DUALNUM | ||
4667 | | add RC, BASE, RC | ||
4668 | | lwz TMP0, TAB:CARG1->asize | ||
4669 | | lwz CARG2, 4(RC) | ||
4670 | | lwz TMP1, TAB:CARG1->array | ||
4671 | |.else | ||
4672 | | lfdx f0, BASE, RC | ||
4673 | | lwz TMP0, TAB:CARG1->asize | ||
4674 | | toint CARG2, f0 | ||
4675 | | lwz TMP1, TAB:CARG1->array | ||
4676 | |.endif | ||
4677 | | cmplw TMP0, CARG2 | ||
4678 | | slwi TMP2, CARG2, 3 | ||
4679 | | ble ->vmeta_tgetr // In array part? | ||
4680 | |.if FPU | ||
4681 | | lfdx f14, TMP1, TMP2 | ||
4682 | |.else | ||
4683 | | lwzux SAVE0, TMP2, TMP1 | ||
4684 | | lwz SAVE1, 4(TMP2) | ||
4685 | |.endif | ||
4686 | |->BC_TGETR_Z: | ||
4687 | | ins_next1 | ||
4688 | |.if FPU | ||
4689 | | stfdx f14, BASE, RA | ||
4690 | |.else | ||
4691 | | stwux SAVE0, RA, BASE | ||
4692 | | stw SAVE1, 4(RA) | ||
4693 | |.endif | ||
4694 | | ins_next2 | ||
4695 | break; | ||
4042 | 4696 | ||
4043 | case BC_TSETV: | 4697 | case BC_TSETV: |
4044 | | // RA = src*8, RB = table*8, RC = key*8 | 4698 | | // RA = src*8, RB = table*8, RC = key*8 |
@@ -4077,11 +4731,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4077 | | ble ->vmeta_tsetv // Integer key and in array part? | 4731 | | ble ->vmeta_tsetv // Integer key and in array part? |
4078 | | lwzx TMP2, TMP1, TMP0 | 4732 | | lwzx TMP2, TMP1, TMP0 |
4079 | | lbz TMP3, TAB:RB->marked | 4733 | | lbz TMP3, TAB:RB->marked |
4734 | |.if FPU | ||
4080 | | lfdx f14, BASE, RA | 4735 | | lfdx f14, BASE, RA |
4736 | |.else | ||
4737 | | add SAVE1, BASE, RA | ||
4738 | | lwz SAVE0, 0(SAVE1) | ||
4739 | | lwz SAVE1, 4(SAVE1) | ||
4740 | |.endif | ||
4081 | | checknil TMP2; beq >3 | 4741 | | checknil TMP2; beq >3 |
4082 | |1: | 4742 | |1: |
4083 | | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table) | 4743 | | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table) |
4744 | |.if FPU | ||
4084 | | stfdx f14, TMP1, TMP0 | 4745 | | stfdx f14, TMP1, TMP0 |
4746 | |.else | ||
4747 | | stwux SAVE0, TMP1, TMP0 | ||
4748 | | stw SAVE1, 4(TMP1) | ||
4749 | |.endif | ||
4085 | | bne >7 | 4750 | | bne >7 |
4086 | |2: | 4751 | |2: |
4087 | | ins_next | 4752 | | ins_next |
@@ -4122,7 +4787,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4122 | | lwz NODE:TMP2, TAB:RB->node | 4787 | | lwz NODE:TMP2, TAB:RB->node |
4123 | | stb ZERO, TAB:RB->nomm // Clear metamethod cache. | 4788 | | stb ZERO, TAB:RB->nomm // Clear metamethod cache. |
4124 | | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask | 4789 | | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask |
4790 | |.if FPU | ||
4125 | | lfdx f14, BASE, RA | 4791 | | lfdx f14, BASE, RA |
4792 | |.else | ||
4793 | | add CARG2, BASE, RA | ||
4794 | | lwz SAVE0, 0(CARG2) | ||
4795 | | lwz SAVE1, 4(CARG2) | ||
4796 | |.endif | ||
4126 | | slwi TMP0, TMP1, 5 | 4797 | | slwi TMP0, TMP1, 5 |
4127 | | slwi TMP1, TMP1, 3 | 4798 | | slwi TMP1, TMP1, 3 |
4128 | | sub TMP1, TMP0, TMP1 | 4799 | | sub TMP1, TMP0, TMP1 |
@@ -4138,7 +4809,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4138 | | checknil CARG2; beq >4 // Key found, but nil value? | 4809 | | checknil CARG2; beq >4 // Key found, but nil value? |
4139 | |2: | 4810 | |2: |
4140 | | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) | 4811 | | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) |
4812 | |.if FPU | ||
4141 | | stfd f14, NODE:TMP2->val | 4813 | | stfd f14, NODE:TMP2->val |
4814 | |.else | ||
4815 | | stw SAVE0, NODE:TMP2->val.u32.hi | ||
4816 | | stw SAVE1, NODE:TMP2->val.u32.lo | ||
4817 | |.endif | ||
4142 | | bne >7 | 4818 | | bne >7 |
4143 | |3: | 4819 | |3: |
4144 | | ins_next | 4820 | | ins_next |
@@ -4177,7 +4853,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4177 | | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) | 4853 | | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) |
4178 | | // Returns TValue *. | 4854 | | // Returns TValue *. |
4179 | | lp BASE, L->base | 4855 | | lp BASE, L->base |
4856 | |.if FPU | ||
4180 | | stfd f14, 0(CRET1) | 4857 | | stfd f14, 0(CRET1) |
4858 | |.else | ||
4859 | | stw SAVE0, 0(CRET1) | ||
4860 | | stw SAVE1, 4(CRET1) | ||
4861 | |.endif | ||
4181 | | b <3 // No 2nd write barrier needed. | 4862 | | b <3 // No 2nd write barrier needed. |
4182 | | | 4863 | | |
4183 | |7: // Possible table write barrier for the value. Skip valiswhite check. | 4864 | |7: // Possible table write barrier for the value. Skip valiswhite check. |
@@ -4194,13 +4875,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4194 | | lwz TMP2, TAB:RB->array | 4875 | | lwz TMP2, TAB:RB->array |
4195 | | lbz TMP3, TAB:RB->marked | 4876 | | lbz TMP3, TAB:RB->marked |
4196 | | cmplw TMP0, TMP1 | 4877 | | cmplw TMP0, TMP1 |
4878 | |.if FPU | ||
4197 | | lfdx f14, BASE, RA | 4879 | | lfdx f14, BASE, RA |
4880 | |.else | ||
4881 | | add CARG2, BASE, RA | ||
4882 | | lwz SAVE0, 0(CARG2) | ||
4883 | | lwz SAVE1, 4(CARG2) | ||
4884 | |.endif | ||
4198 | | bge ->vmeta_tsetb | 4885 | | bge ->vmeta_tsetb |
4199 | | lwzx TMP1, TMP2, RC | 4886 | | lwzx TMP1, TMP2, RC |
4200 | | checknil TMP1; beq >5 | 4887 | | checknil TMP1; beq >5 |
4201 | |1: | 4888 | |1: |
4202 | | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) | 4889 | | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) |
4890 | |.if FPU | ||
4203 | | stfdx f14, TMP2, RC | 4891 | | stfdx f14, TMP2, RC |
4892 | |.else | ||
4893 | | stwux SAVE0, RC, TMP2 | ||
4894 | | stw SAVE1, 4(RC) | ||
4895 | |.endif | ||
4204 | | bne >7 | 4896 | | bne >7 |
4205 | |2: | 4897 | |2: |
4206 | | ins_next | 4898 | | ins_next |
@@ -4218,6 +4910,49 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4218 | | barrierback TAB:RB, TMP3, TMP0 | 4910 | | barrierback TAB:RB, TMP3, TMP0 |
4219 | | b <2 | 4911 | | b <2 |
4220 | break; | 4912 | break; |
4913 | case BC_TSETR: | ||
4914 | | // RA = dst*8, RB = table*8, RC = key*8 | ||
4915 | | add RB, BASE, RB | ||
4916 | | lwz TAB:CARG2, 4(RB) | ||
4917 | |.if DUALNUM | ||
4918 | | add RC, BASE, RC | ||
4919 | | lbz TMP3, TAB:CARG2->marked | ||
4920 | | lwz TMP0, TAB:CARG2->asize | ||
4921 | | lwz CARG3, 4(RC) | ||
4922 | | lwz TMP1, TAB:CARG2->array | ||
4923 | |.else | ||
4924 | | lfdx f0, BASE, RC | ||
4925 | | lbz TMP3, TAB:CARG2->marked | ||
4926 | | lwz TMP0, TAB:CARG2->asize | ||
4927 | | toint CARG3, f0 | ||
4928 | | lwz TMP1, TAB:CARG2->array | ||
4929 | |.endif | ||
4930 | | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table) | ||
4931 | | bne >7 | ||
4932 | |2: | ||
4933 | | cmplw TMP0, CARG3 | ||
4934 | | slwi TMP2, CARG3, 3 | ||
4935 | |.if FPU | ||
4936 | | lfdx f14, BASE, RA | ||
4937 | |.else | ||
4938 | | lwzux SAVE0, RA, BASE | ||
4939 | | lwz SAVE1, 4(RA) | ||
4940 | |.endif | ||
4941 | | ble ->vmeta_tsetr // In array part? | ||
4942 | | ins_next1 | ||
4943 | |.if FPU | ||
4944 | | stfdx f14, TMP1, TMP2 | ||
4945 | |.else | ||
4946 | | stwux SAVE0, TMP1, TMP2 | ||
4947 | | stw SAVE1, 4(TMP1) | ||
4948 | |.endif | ||
4949 | | ins_next2 | ||
4950 | | | ||
4951 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
4952 | | barrierback TAB:CARG2, TMP3, TMP2 | ||
4953 | | b <2 | ||
4954 | break; | ||
4955 | |||
4221 | 4956 | ||
4222 | case BC_TSETM: | 4957 | case BC_TSETM: |
4223 | | // RA = base*8 (table at base-1), RD = num_const*8 (start index) | 4958 | | // RA = base*8 (table at base-1), RD = num_const*8 (start index) |
@@ -4240,10 +4975,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4240 | | add TMP1, TMP1, TMP0 | 4975 | | add TMP1, TMP1, TMP0 |
4241 | | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) | 4976 | | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) |
4242 | |3: // Copy result slots to table. | 4977 | |3: // Copy result slots to table. |
4978 | |.if FPU | ||
4243 | | lfd f0, 0(RA) | 4979 | | lfd f0, 0(RA) |
4980 | |.else | ||
4981 | | lwz SAVE0, 0(RA) | ||
4982 | | lwz SAVE1, 4(RA) | ||
4983 | |.endif | ||
4244 | | addi RA, RA, 8 | 4984 | | addi RA, RA, 8 |
4245 | | cmpw cr1, RA, TMP2 | 4985 | | cmpw cr1, RA, TMP2 |
4986 | |.if FPU | ||
4246 | | stfd f0, 0(TMP1) | 4987 | | stfd f0, 0(TMP1) |
4988 | |.else | ||
4989 | | stw SAVE0, 0(TMP1) | ||
4990 | | stw SAVE1, 4(TMP1) | ||
4991 | |.endif | ||
4247 | | addi TMP1, TMP1, 8 | 4992 | | addi TMP1, TMP1, 8 |
4248 | | blt cr1, <3 | 4993 | | blt cr1, <3 |
4249 | | bne >7 | 4994 | | bne >7 |
@@ -4310,9 +5055,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4310 | | beq cr1, >3 | 5055 | | beq cr1, >3 |
4311 | |2: | 5056 | |2: |
4312 | | addi TMP3, TMP2, 8 | 5057 | | addi TMP3, TMP2, 8 |
5058 | |.if FPU | ||
4313 | | lfdx f0, RA, TMP2 | 5059 | | lfdx f0, RA, TMP2 |
5060 | |.else | ||
5061 | | add CARG3, RA, TMP2 | ||
5062 | | lwz CARG1, 0(CARG3) | ||
5063 | | lwz CARG2, 4(CARG3) | ||
5064 | |.endif | ||
4314 | | cmplw cr1, TMP3, NARGS8:RC | 5065 | | cmplw cr1, TMP3, NARGS8:RC |
5066 | |.if FPU | ||
4315 | | stfdx f0, BASE, TMP2 | 5067 | | stfdx f0, BASE, TMP2 |
5068 | |.else | ||
5069 | | stwux CARG1, TMP2, BASE | ||
5070 | | stw CARG2, 4(TMP2) | ||
5071 | |.endif | ||
4316 | | mr TMP2, TMP3 | 5072 | | mr TMP2, TMP3 |
4317 | | bne cr1, <2 | 5073 | | bne cr1, <2 |
4318 | |3: | 5074 | |3: |
@@ -4345,14 +5101,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4345 | | add BASE, BASE, RA | 5101 | | add BASE, BASE, RA |
4346 | | lwz TMP1, -24(BASE) | 5102 | | lwz TMP1, -24(BASE) |
4347 | | lwz LFUNC:RB, -20(BASE) | 5103 | | lwz LFUNC:RB, -20(BASE) |
5104 | |.if FPU | ||
4348 | | lfd f1, -8(BASE) | 5105 | | lfd f1, -8(BASE) |
4349 | | lfd f0, -16(BASE) | 5106 | | lfd f0, -16(BASE) |
5107 | |.else | ||
5108 | | lwz CARG1, -8(BASE) | ||
5109 | | lwz CARG2, -4(BASE) | ||
5110 | | lwz CARG3, -16(BASE) | ||
5111 | | lwz CARG4, -12(BASE) | ||
5112 | |.endif | ||
4350 | | stw TMP1, 0(BASE) // Copy callable. | 5113 | | stw TMP1, 0(BASE) // Copy callable. |
4351 | | stw LFUNC:RB, 4(BASE) | 5114 | | stw LFUNC:RB, 4(BASE) |
4352 | | checkfunc TMP1 | 5115 | | checkfunc TMP1 |
4353 | | stfd f1, 16(BASE) // Copy control var. | ||
4354 | | li NARGS8:RC, 16 // Iterators get 2 arguments. | 5116 | | li NARGS8:RC, 16 // Iterators get 2 arguments. |
5117 | |.if FPU | ||
5118 | | stfd f1, 16(BASE) // Copy control var. | ||
4355 | | stfdu f0, 8(BASE) // Copy state. | 5119 | | stfdu f0, 8(BASE) // Copy state. |
5120 | |.else | ||
5121 | | stw CARG1, 16(BASE) // Copy control var. | ||
5122 | | stw CARG2, 20(BASE) | ||
5123 | | stwu CARG3, 8(BASE) // Copy state. | ||
5124 | | stw CARG4, 4(BASE) | ||
5125 | |.endif | ||
4356 | | bne ->vmeta_call | 5126 | | bne ->vmeta_call |
4357 | | ins_call | 5127 | | ins_call |
4358 | break; | 5128 | break; |
@@ -4373,7 +5143,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4373 | | slwi TMP3, RC, 3 | 5143 | | slwi TMP3, RC, 3 |
4374 | | bge >5 // Index points after array part? | 5144 | | bge >5 // Index points after array part? |
4375 | | lwzx TMP2, TMP1, TMP3 | 5145 | | lwzx TMP2, TMP1, TMP3 |
5146 | |.if FPU | ||
4376 | | lfdx f0, TMP1, TMP3 | 5147 | | lfdx f0, TMP1, TMP3 |
5148 | |.else | ||
5149 | | lwzux CARG1, TMP3, TMP1 | ||
5150 | | lwz CARG2, 4(TMP3) | ||
5151 | |.endif | ||
4377 | | checknil TMP2 | 5152 | | checknil TMP2 |
4378 | | lwz INS, -4(PC) | 5153 | | lwz INS, -4(PC) |
4379 | | beq >4 | 5154 | | beq >4 |
@@ -4385,7 +5160,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4385 | |.endif | 5160 | |.endif |
4386 | | addi RC, RC, 1 | 5161 | | addi RC, RC, 1 |
4387 | | addis TMP3, PC, -(BCBIAS_J*4 >> 16) | 5162 | | addis TMP3, PC, -(BCBIAS_J*4 >> 16) |
5163 | |.if FPU | ||
4388 | | stfd f0, 8(RA) | 5164 | | stfd f0, 8(RA) |
5165 | |.else | ||
5166 | | stw CARG1, 8(RA) | ||
5167 | | stw CARG2, 12(RA) | ||
5168 | |.endif | ||
4389 | | decode_RD4 TMP1, INS | 5169 | | decode_RD4 TMP1, INS |
4390 | | stw RC, -4(RA) // Update control var. | 5170 | | stw RC, -4(RA) // Update control var. |
4391 | | add PC, TMP1, TMP3 | 5171 | | add PC, TMP1, TMP3 |
@@ -4410,17 +5190,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4410 | | slwi RB, RC, 3 | 5190 | | slwi RB, RC, 3 |
4411 | | sub TMP3, TMP3, RB | 5191 | | sub TMP3, TMP3, RB |
4412 | | lwzx RB, TMP2, TMP3 | 5192 | | lwzx RB, TMP2, TMP3 |
5193 | |.if FPU | ||
4413 | | lfdx f0, TMP2, TMP3 | 5194 | | lfdx f0, TMP2, TMP3 |
5195 | |.else | ||
5196 | | add CARG3, TMP2, TMP3 | ||
5197 | | lwz CARG1, 0(CARG3) | ||
5198 | | lwz CARG2, 4(CARG3) | ||
5199 | |.endif | ||
4414 | | add NODE:TMP3, TMP2, TMP3 | 5200 | | add NODE:TMP3, TMP2, TMP3 |
4415 | | checknil RB | 5201 | | checknil RB |
4416 | | lwz INS, -4(PC) | 5202 | | lwz INS, -4(PC) |
4417 | | beq >7 | 5203 | | beq >7 |
5204 | |.if FPU | ||
4418 | | lfd f1, NODE:TMP3->key | 5205 | | lfd f1, NODE:TMP3->key |
5206 | |.else | ||
5207 | | lwz CARG3, NODE:TMP3->key.u32.hi | ||
5208 | | lwz CARG4, NODE:TMP3->key.u32.lo | ||
5209 | |.endif | ||
4419 | | addis TMP2, PC, -(BCBIAS_J*4 >> 16) | 5210 | | addis TMP2, PC, -(BCBIAS_J*4 >> 16) |
5211 | |.if FPU | ||
4420 | | stfd f0, 8(RA) | 5212 | | stfd f0, 8(RA) |
5213 | |.else | ||
5214 | | stw CARG1, 8(RA) | ||
5215 | | stw CARG2, 12(RA) | ||
5216 | |.endif | ||
4421 | | add RC, RC, TMP0 | 5217 | | add RC, RC, TMP0 |
4422 | | decode_RD4 TMP1, INS | 5218 | | decode_RD4 TMP1, INS |
5219 | |.if FPU | ||
4423 | | stfd f1, 0(RA) | 5220 | | stfd f1, 0(RA) |
5221 | |.else | ||
5222 | | stw CARG3, 0(RA) | ||
5223 | | stw CARG4, 4(RA) | ||
5224 | |.endif | ||
4424 | | addi RC, RC, 1 | 5225 | | addi RC, RC, 1 |
4425 | | add PC, TMP1, TMP2 | 5226 | | add PC, TMP1, TMP2 |
4426 | | stw RC, -4(RA) // Update control var. | 5227 | | stw RC, -4(RA) // Update control var. |
@@ -4486,9 +5287,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4486 | | subi TMP2, TMP2, 16 | 5287 | | subi TMP2, TMP2, 16 |
4487 | | ble >2 // No vararg slots? | 5288 | | ble >2 // No vararg slots? |
4488 | |1: // Copy vararg slots to destination slots. | 5289 | |1: // Copy vararg slots to destination slots. |
5290 | |.if FPU | ||
4489 | | lfd f0, 0(RC) | 5291 | | lfd f0, 0(RC) |
5292 | |.else | ||
5293 | | lwz CARG1, 0(RC) | ||
5294 | | lwz CARG2, 4(RC) | ||
5295 | |.endif | ||
4490 | | addi RC, RC, 8 | 5296 | | addi RC, RC, 8 |
5297 | |.if FPU | ||
4491 | | stfd f0, 0(RA) | 5298 | | stfd f0, 0(RA) |
5299 | |.else | ||
5300 | | stw CARG1, 0(RA) | ||
5301 | | stw CARG2, 4(RA) | ||
5302 | |.endif | ||
4492 | | cmplw RA, TMP2 | 5303 | | cmplw RA, TMP2 |
4493 | | cmplw cr1, RC, TMP3 | 5304 | | cmplw cr1, RC, TMP3 |
4494 | | bge >3 // All destination slots filled? | 5305 | | bge >3 // All destination slots filled? |
@@ -4511,9 +5322,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4511 | | addi MULTRES, TMP1, 8 | 5322 | | addi MULTRES, TMP1, 8 |
4512 | | bgt >7 | 5323 | | bgt >7 |
4513 | |6: | 5324 | |6: |
5325 | |.if FPU | ||
4514 | | lfd f0, 0(RC) | 5326 | | lfd f0, 0(RC) |
5327 | |.else | ||
5328 | | lwz CARG1, 0(RC) | ||
5329 | | lwz CARG2, 4(RC) | ||
5330 | |.endif | ||
4515 | | addi RC, RC, 8 | 5331 | | addi RC, RC, 8 |
5332 | |.if FPU | ||
4516 | | stfd f0, 0(RA) | 5333 | | stfd f0, 0(RA) |
5334 | |.else | ||
5335 | | stw CARG1, 0(RA) | ||
5336 | | stw CARG2, 4(RA) | ||
5337 | |.endif | ||
4517 | | cmplw RC, TMP3 | 5338 | | cmplw RC, TMP3 |
4518 | | addi RA, RA, 8 | 5339 | | addi RA, RA, 8 |
4519 | | blt <6 // More vararg slots? | 5340 | | blt <6 // More vararg slots? |
@@ -4564,14 +5385,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4564 | | li TMP1, 0 | 5385 | | li TMP1, 0 |
4565 | |2: | 5386 | |2: |
4566 | | addi TMP3, TMP1, 8 | 5387 | | addi TMP3, TMP1, 8 |
5388 | |.if FPU | ||
4567 | | lfdx f0, RA, TMP1 | 5389 | | lfdx f0, RA, TMP1 |
5390 | |.else | ||
5391 | | add CARG3, RA, TMP1 | ||
5392 | | lwz CARG1, 0(CARG3) | ||
5393 | | lwz CARG2, 4(CARG3) | ||
5394 | |.endif | ||
4568 | | cmpw TMP3, RC | 5395 | | cmpw TMP3, RC |
5396 | |.if FPU | ||
4569 | | stfdx f0, TMP2, TMP1 | 5397 | | stfdx f0, TMP2, TMP1 |
5398 | |.else | ||
5399 | | add CARG3, TMP2, TMP1 | ||
5400 | | stw CARG1, 0(CARG3) | ||
5401 | | stw CARG2, 4(CARG3) | ||
5402 | |.endif | ||
4570 | | beq >3 | 5403 | | beq >3 |
4571 | | addi TMP1, TMP3, 8 | 5404 | | addi TMP1, TMP3, 8 |
5405 | |.if FPU | ||
4572 | | lfdx f1, RA, TMP3 | 5406 | | lfdx f1, RA, TMP3 |
5407 | |.else | ||
5408 | | add CARG3, RA, TMP3 | ||
5409 | | lwz CARG1, 0(CARG3) | ||
5410 | | lwz CARG2, 4(CARG3) | ||
5411 | |.endif | ||
4573 | | cmpw TMP1, RC | 5412 | | cmpw TMP1, RC |
5413 | |.if FPU | ||
4574 | | stfdx f1, TMP2, TMP3 | 5414 | | stfdx f1, TMP2, TMP3 |
5415 | |.else | ||
5416 | | add CARG3, TMP2, TMP3 | ||
5417 | | stw CARG1, 0(CARG3) | ||
5418 | | stw CARG2, 4(CARG3) | ||
5419 | |.endif | ||
4575 | | bne <2 | 5420 | | bne <2 |
4576 | |3: | 5421 | |3: |
4577 | |5: | 5422 | |5: |
@@ -4613,8 +5458,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4613 | | subi TMP2, BASE, 8 | 5458 | | subi TMP2, BASE, 8 |
4614 | | decode_RB8 RB, INS | 5459 | | decode_RB8 RB, INS |
4615 | if (op == BC_RET1) { | 5460 | if (op == BC_RET1) { |
5461 | |.if FPU | ||
4616 | | lfd f0, 0(RA) | 5462 | | lfd f0, 0(RA) |
4617 | | stfd f0, 0(TMP2) | 5463 | | stfd f0, 0(TMP2) |
5464 | |.else | ||
5465 | | lwz CARG1, 0(RA) | ||
5466 | | lwz CARG2, 4(RA) | ||
5467 | | stw CARG1, 0(TMP2) | ||
5468 | | stw CARG2, 4(TMP2) | ||
5469 | |.endif | ||
4618 | } | 5470 | } |
4619 | |5: | 5471 | |5: |
4620 | | cmplw RB, RD | 5472 | | cmplw RB, RD |
@@ -4675,11 +5527,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4675 | |4: | 5527 | |4: |
4676 | | stw CARG1, FORL_IDX*8+4(RA) | 5528 | | stw CARG1, FORL_IDX*8+4(RA) |
4677 | } else { | 5529 | } else { |
4678 | | lwz TMP3, FORL_STEP*8(RA) | 5530 | | lwz SAVE0, FORL_STEP*8(RA) |
4679 | | lwz CARG3, FORL_STEP*8+4(RA) | 5531 | | lwz CARG3, FORL_STEP*8+4(RA) |
4680 | | lwz TMP2, FORL_STOP*8(RA) | 5532 | | lwz TMP2, FORL_STOP*8(RA) |
4681 | | lwz CARG2, FORL_STOP*8+4(RA) | 5533 | | lwz CARG2, FORL_STOP*8+4(RA) |
4682 | | cmplw cr7, TMP3, TISNUM | 5534 | | cmplw cr7, SAVE0, TISNUM |
4683 | | cmplw cr1, TMP2, TISNUM | 5535 | | cmplw cr1, TMP2, TISNUM |
4684 | | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq | 5536 | | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq |
4685 | | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq | 5537 | | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq |
@@ -4722,41 +5574,80 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4722 | if (vk) { | 5574 | if (vk) { |
4723 | |.if DUALNUM | 5575 | |.if DUALNUM |
4724 | |9: // FP loop. | 5576 | |9: // FP loop. |
5577 | |.if FPU | ||
4725 | | lfd f1, FORL_IDX*8(RA) | 5578 | | lfd f1, FORL_IDX*8(RA) |
4726 | |.else | 5579 | |.else |
5580 | | lwz CARG1, FORL_IDX*8(RA) | ||
5581 | | lwz CARG2, FORL_IDX*8+4(RA) | ||
5582 | |.endif | ||
5583 | |.else | ||
4727 | | lfdux f1, RA, BASE | 5584 | | lfdux f1, RA, BASE |
4728 | |.endif | 5585 | |.endif |
5586 | |.if FPU | ||
4729 | | lfd f3, FORL_STEP*8(RA) | 5587 | | lfd f3, FORL_STEP*8(RA) |
4730 | | lfd f2, FORL_STOP*8(RA) | 5588 | | lfd f2, FORL_STOP*8(RA) |
4731 | | lwz TMP3, FORL_STEP*8(RA) | ||
4732 | | fadd f1, f1, f3 | 5589 | | fadd f1, f1, f3 |
4733 | | stfd f1, FORL_IDX*8(RA) | 5590 | | stfd f1, FORL_IDX*8(RA) |
5591 | |.else | ||
5592 | | lwz CARG3, FORL_STEP*8(RA) | ||
5593 | | lwz CARG4, FORL_STEP*8+4(RA) | ||
5594 | | mr SAVE1, RD | ||
5595 | | blex __adddf3 | ||
5596 | | mr RD, SAVE1 | ||
5597 | | stw CRET1, FORL_IDX*8(RA) | ||
5598 | | stw CRET2, FORL_IDX*8+4(RA) | ||
5599 | | lwz CARG3, FORL_STOP*8(RA) | ||
5600 | | lwz CARG4, FORL_STOP*8+4(RA) | ||
5601 | |.endif | ||
5602 | | lwz SAVE0, FORL_STEP*8(RA) | ||
4734 | } else { | 5603 | } else { |
4735 | |.if DUALNUM | 5604 | |.if DUALNUM |
4736 | |9: // FP loop. | 5605 | |9: // FP loop. |
4737 | |.else | 5606 | |.else |
4738 | | lwzux TMP1, RA, BASE | 5607 | | lwzux TMP1, RA, BASE |
4739 | | lwz TMP3, FORL_STEP*8(RA) | 5608 | | lwz SAVE0, FORL_STEP*8(RA) |
4740 | | lwz TMP2, FORL_STOP*8(RA) | 5609 | | lwz TMP2, FORL_STOP*8(RA) |
4741 | | cmplw cr0, TMP1, TISNUM | 5610 | | cmplw cr0, TMP1, TISNUM |
4742 | | cmplw cr7, TMP3, TISNUM | 5611 | | cmplw cr7, SAVE0, TISNUM |
4743 | | cmplw cr1, TMP2, TISNUM | 5612 | | cmplw cr1, TMP2, TISNUM |
4744 | |.endif | 5613 | |.endif |
5614 | |.if FPU | ||
4745 | | lfd f1, FORL_IDX*8(RA) | 5615 | | lfd f1, FORL_IDX*8(RA) |
5616 | |.else | ||
5617 | | lwz CARG1, FORL_IDX*8(RA) | ||
5618 | | lwz CARG2, FORL_IDX*8+4(RA) | ||
5619 | |.endif | ||
4746 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt | 5620 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt |
4747 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | 5621 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt |
5622 | |.if FPU | ||
4748 | | lfd f2, FORL_STOP*8(RA) | 5623 | | lfd f2, FORL_STOP*8(RA) |
5624 | |.else | ||
5625 | | lwz CARG3, FORL_STOP*8(RA) | ||
5626 | | lwz CARG4, FORL_STOP*8+4(RA) | ||
5627 | |.endif | ||
4749 | | bge ->vmeta_for | 5628 | | bge ->vmeta_for |
4750 | } | 5629 | } |
4751 | | cmpwi cr6, TMP3, 0 | 5630 | | cmpwi cr6, SAVE0, 0 |
4752 | if (op != BC_JFORL) { | 5631 | if (op != BC_JFORL) { |
4753 | | srwi RD, RD, 1 | 5632 | | srwi RD, RD, 1 |
4754 | } | 5633 | } |
5634 | |.if FPU | ||
4755 | | stfd f1, FORL_EXT*8(RA) | 5635 | | stfd f1, FORL_EXT*8(RA) |
5636 | |.else | ||
5637 | | stw CARG1, FORL_EXT*8(RA) | ||
5638 | | stw CARG2, FORL_EXT*8+4(RA) | ||
5639 | |.endif | ||
4756 | if (op != BC_JFORL) { | 5640 | if (op != BC_JFORL) { |
4757 | | add RD, PC, RD | 5641 | | add RD, PC, RD |
4758 | } | 5642 | } |
5643 | |.if FPU | ||
4759 | | fcmpu cr0, f1, f2 | 5644 | | fcmpu cr0, f1, f2 |
5645 | |.else | ||
5646 | | mr SAVE1, RD | ||
5647 | | blex __ledf2 | ||
5648 | | cmpwi CRET1, 0 | ||
5649 | | mr RD, SAVE1 | ||
5650 | |.endif | ||
4760 | if (op == BC_JFORI) { | 5651 | if (op == BC_JFORI) { |
4761 | | addis PC, RD, -(BCBIAS_J*4 >> 16) | 5652 | | addis PC, RD, -(BCBIAS_J*4 >> 16) |
4762 | } | 5653 | } |
@@ -4859,8 +5750,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4859 | | lp TMP2, TRACE:TMP2->mcode | 5750 | | lp TMP2, TRACE:TMP2->mcode |
4860 | | stw BASE, DISPATCH_GL(jit_base)(DISPATCH) | 5751 | | stw BASE, DISPATCH_GL(jit_base)(DISPATCH) |
4861 | | mtctr TMP2 | 5752 | | mtctr TMP2 |
4862 | | stw L, DISPATCH_GL(jit_L)(DISPATCH) | ||
4863 | | addi JGL, DISPATCH, GG_DISP2G+32768 | 5753 | | addi JGL, DISPATCH, GG_DISP2G+32768 |
5754 | | stw L, DISPATCH_GL(tmpbuf.L)(DISPATCH) | ||
4864 | | bctr | 5755 | | bctr |
4865 | |.endif | 5756 | |.endif |
4866 | break; | 5757 | break; |
@@ -4995,6 +5886,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4995 | | lp TMP1, L->top | 5886 | | lp TMP1, L->top |
4996 | | li_vmstate INTERP | 5887 | | li_vmstate INTERP |
4997 | | lwz PC, FRAME_PC(BASE) // Fetch PC of caller. | 5888 | | lwz PC, FRAME_PC(BASE) // Fetch PC of caller. |
5889 | | stw L, DISPATCH_GL(cur_L)(DISPATCH) | ||
4998 | | sub RA, TMP1, RD // RA = L->top - nresults*8 | 5890 | | sub RA, TMP1, RD // RA = L->top - nresults*8 |
4999 | | st_vmstate | 5891 | | st_vmstate |
5000 | | b ->vm_returnc | 5892 | | b ->vm_returnc |
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc new file mode 100644 index 00000000..a5749b17 --- /dev/null +++ b/src/vm_x64.dasc | |||
@@ -0,0 +1,4909 @@ | |||
1 | |// Low-level VM code for x64 CPUs in LJ_GC64 mode. | ||
2 | |// Bytecode interpreter, fast functions and helper functions. | ||
3 | |// Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h | ||
4 | | | ||
5 | |.arch x64 | ||
6 | |.section code_op, code_sub | ||
7 | | | ||
8 | |.actionlist build_actionlist | ||
9 | |.globals GLOB_ | ||
10 | |.globalnames globnames | ||
11 | |.externnames extnames | ||
12 | | | ||
13 | |//----------------------------------------------------------------------- | ||
14 | | | ||
15 | |.if WIN | ||
16 | |.define X64WIN, 1 // Windows/x64 calling conventions. | ||
17 | |.endif | ||
18 | | | ||
19 | |// Fixed register assignments for the interpreter. | ||
20 | |// This is very fragile and has many dependencies. Caveat emptor. | ||
21 | |.define BASE, rdx // Not C callee-save, refetched anyway. | ||
22 | |.if X64WIN | ||
23 | |.define KBASE, rdi // Must be C callee-save. | ||
24 | |.define PC, rsi // Must be C callee-save. | ||
25 | |.define DISPATCH, rbx // Must be C callee-save. | ||
26 | |.define KBASEd, edi | ||
27 | |.define PCd, esi | ||
28 | |.define DISPATCHd, ebx | ||
29 | |.else | ||
30 | |.define KBASE, r15 // Must be C callee-save. | ||
31 | |.define PC, rbx // Must be C callee-save. | ||
32 | |.define DISPATCH, r14 // Must be C callee-save. | ||
33 | |.define KBASEd, r15d | ||
34 | |.define PCd, ebx | ||
35 | |.define DISPATCHd, r14d | ||
36 | |.endif | ||
37 | | | ||
38 | |.define RA, rcx | ||
39 | |.define RAd, ecx | ||
40 | |.define RAH, ch | ||
41 | |.define RAL, cl | ||
42 | |.define RB, rbp // Must be rbp (C callee-save). | ||
43 | |.define RBd, ebp | ||
44 | |.define RC, rax // Must be rax. | ||
45 | |.define RCd, eax | ||
46 | |.define RCW, ax | ||
47 | |.define RCH, ah | ||
48 | |.define RCL, al | ||
49 | |.define OP, RBd | ||
50 | |.define RD, RC | ||
51 | |.define RDd, RCd | ||
52 | |.define RDW, RCW | ||
53 | |.define RDL, RCL | ||
54 | |.define TMPR, r10 | ||
55 | |.define TMPRd, r10d | ||
56 | |.define ITYPE, r11 | ||
57 | |.define ITYPEd, r11d | ||
58 | | | ||
59 | |.if X64WIN | ||
60 | |.define CARG1, rcx // x64/WIN64 C call arguments. | ||
61 | |.define CARG2, rdx | ||
62 | |.define CARG3, r8 | ||
63 | |.define CARG4, r9 | ||
64 | |.define CARG1d, ecx | ||
65 | |.define CARG2d, edx | ||
66 | |.define CARG3d, r8d | ||
67 | |.define CARG4d, r9d | ||
68 | |.else | ||
69 | |.define CARG1, rdi // x64/POSIX C call arguments. | ||
70 | |.define CARG2, rsi | ||
71 | |.define CARG3, rdx | ||
72 | |.define CARG4, rcx | ||
73 | |.define CARG5, r8 | ||
74 | |.define CARG6, r9 | ||
75 | |.define CARG1d, edi | ||
76 | |.define CARG2d, esi | ||
77 | |.define CARG3d, edx | ||
78 | |.define CARG4d, ecx | ||
79 | |.define CARG5d, r8d | ||
80 | |.define CARG6d, r9d | ||
81 | |.endif | ||
82 | | | ||
83 | |// Type definitions. Some of these are only used for documentation. | ||
84 | |.type L, lua_State | ||
85 | |.type GL, global_State | ||
86 | |.type TVALUE, TValue | ||
87 | |.type GCOBJ, GCobj | ||
88 | |.type STR, GCstr | ||
89 | |.type TAB, GCtab | ||
90 | |.type LFUNC, GCfuncL | ||
91 | |.type CFUNC, GCfuncC | ||
92 | |.type PROTO, GCproto | ||
93 | |.type UPVAL, GCupval | ||
94 | |.type NODE, Node | ||
95 | |.type NARGS, int | ||
96 | |.type TRACE, GCtrace | ||
97 | |.type SBUF, SBuf | ||
98 | | | ||
99 | |// Stack layout while in interpreter. Must match with lj_frame.h. | ||
100 | |//----------------------------------------------------------------------- | ||
101 | |.if X64WIN // x64/Windows stack layout | ||
102 | | | ||
103 | |.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). | ||
104 | |.macro saveregs_ | ||
105 | | push rdi; push rsi; push rbx | ||
106 | | sub rsp, CFRAME_SPACE | ||
107 | |.endmacro | ||
108 | |.macro saveregs | ||
109 | | push rbp; saveregs_ | ||
110 | |.endmacro | ||
111 | |.macro restoreregs | ||
112 | | add rsp, CFRAME_SPACE | ||
113 | | pop rbx; pop rsi; pop rdi; pop rbp | ||
114 | |.endmacro | ||
115 | | | ||
116 | |.define SAVE_CFRAME, aword [rsp+aword*13] | ||
117 | |.define SAVE_PC, aword [rsp+aword*12] | ||
118 | |.define SAVE_L, aword [rsp+aword*11] | ||
119 | |.define SAVE_ERRF, dword [rsp+dword*21] | ||
120 | |.define SAVE_NRES, dword [rsp+dword*20] | ||
121 | |//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter | ||
122 | |.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter. | ||
123 | |.define SAVE_R4, aword [rsp+aword*8] | ||
124 | |.define SAVE_R3, aword [rsp+aword*7] | ||
125 | |.define SAVE_R2, aword [rsp+aword*6] | ||
126 | |.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. | ||
127 | |.define ARG5, aword [rsp+aword*4] | ||
128 | |.define CSAVE_4, aword [rsp+aword*3] | ||
129 | |.define CSAVE_3, aword [rsp+aword*2] | ||
130 | |.define CSAVE_2, aword [rsp+aword*1] | ||
131 | |.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter. | ||
132 | |//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee | ||
133 | | | ||
134 | |.define ARG5d, dword [rsp+dword*8] | ||
135 | |.define TMP1, ARG5 // TMP1 overlaps ARG5 | ||
136 | |.define TMP1d, ARG5d | ||
137 | |.define TMP1hi, dword [rsp+dword*9] | ||
138 | |.define MULTRES, TMP1d // MULTRES overlaps TMP1d. | ||
139 | | | ||
140 | |//----------------------------------------------------------------------- | ||
141 | |.else // x64/POSIX stack layout | ||
142 | | | ||
143 | |.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). | ||
144 | |.macro saveregs_ | ||
145 | | push rbx; push r15; push r14 | ||
146 | |.if NO_UNWIND | ||
147 | | push r13; push r12 | ||
148 | |.endif | ||
149 | | sub rsp, CFRAME_SPACE | ||
150 | |.endmacro | ||
151 | |.macro saveregs | ||
152 | | push rbp; saveregs_ | ||
153 | |.endmacro | ||
154 | |.macro restoreregs | ||
155 | | add rsp, CFRAME_SPACE | ||
156 | |.if NO_UNWIND | ||
157 | | pop r12; pop r13 | ||
158 | |.endif | ||
159 | | pop r14; pop r15; pop rbx; pop rbp | ||
160 | |.endmacro | ||
161 | | | ||
162 | |//----- 16 byte aligned, | ||
163 | |.if NO_UNWIND | ||
164 | |.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter. | ||
165 | |.define SAVE_R4, aword [rsp+aword*10] | ||
166 | |.define SAVE_R3, aword [rsp+aword*9] | ||
167 | |.define SAVE_R2, aword [rsp+aword*8] | ||
168 | |.define SAVE_R1, aword [rsp+aword*7] | ||
169 | |.define SAVE_RU2, aword [rsp+aword*6] | ||
170 | |.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves. | ||
171 | |.else | ||
172 | |.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter. | ||
173 | |.define SAVE_R4, aword [rsp+aword*8] | ||
174 | |.define SAVE_R3, aword [rsp+aword*7] | ||
175 | |.define SAVE_R2, aword [rsp+aword*6] | ||
176 | |.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. | ||
177 | |.endif | ||
178 | |.define SAVE_CFRAME, aword [rsp+aword*4] | ||
179 | |.define SAVE_PC, aword [rsp+aword*3] | ||
180 | |.define SAVE_L, aword [rsp+aword*2] | ||
181 | |.define SAVE_ERRF, dword [rsp+dword*3] | ||
182 | |.define SAVE_NRES, dword [rsp+dword*2] | ||
183 | |.define TMP1, aword [rsp] //<-- rsp while in interpreter. | ||
184 | |//----- 16 byte aligned | ||
185 | | | ||
186 | |.define TMP1d, dword [rsp] | ||
187 | |.define TMP1hi, dword [rsp+dword*1] | ||
188 | |.define MULTRES, TMP1d // MULTRES overlaps TMP1d. | ||
189 | | | ||
190 | |.endif | ||
191 | | | ||
192 | |//----------------------------------------------------------------------- | ||
193 | | | ||
194 | |// Instruction headers. | ||
195 | |.macro ins_A; .endmacro | ||
196 | |.macro ins_AD; .endmacro | ||
197 | |.macro ins_AJ; .endmacro | ||
198 | |.macro ins_ABC; movzx RBd, RCH; movzx RCd, RCL; .endmacro | ||
199 | |.macro ins_AB_; movzx RBd, RCH; .endmacro | ||
200 | |.macro ins_A_C; movzx RCd, RCL; .endmacro | ||
201 | |.macro ins_AND; not RD; .endmacro | ||
202 | | | ||
203 | |// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster). | ||
204 | |.macro ins_NEXT | ||
205 | | mov RCd, [PC] | ||
206 | | movzx RAd, RCH | ||
207 | | movzx OP, RCL | ||
208 | | add PC, 4 | ||
209 | | shr RCd, 16 | ||
210 | | jmp aword [DISPATCH+OP*8] | ||
211 | |.endmacro | ||
212 | | | ||
213 | |// Instruction footer. | ||
214 | |.if 1 | ||
215 | | // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. | ||
216 | | .define ins_next, ins_NEXT | ||
217 | | .define ins_next_, ins_NEXT | ||
218 | |.else | ||
219 | | // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. | ||
220 | | // Affects only certain kinds of benchmarks (and only with -j off). | ||
221 | | // Around 10%-30% slower on Core2, a lot more slower on P4. | ||
222 | | .macro ins_next | ||
223 | | jmp ->ins_next | ||
224 | | .endmacro | ||
225 | | .macro ins_next_ | ||
226 | | ->ins_next: | ||
227 | | ins_NEXT | ||
228 | | .endmacro | ||
229 | |.endif | ||
230 | | | ||
231 | |// Call decode and dispatch. | ||
232 | |.macro ins_callt | ||
233 | | // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-8] = PC | ||
234 | | mov PC, LFUNC:RB->pc | ||
235 | | mov RAd, [PC] | ||
236 | | movzx OP, RAL | ||
237 | | movzx RAd, RAH | ||
238 | | add PC, 4 | ||
239 | | jmp aword [DISPATCH+OP*8] | ||
240 | |.endmacro | ||
241 | | | ||
242 | |.macro ins_call | ||
243 | | // BASE = new base, RB = LFUNC, RD = nargs+1 | ||
244 | | mov [BASE-8], PC | ||
245 | | ins_callt | ||
246 | |.endmacro | ||
247 | | | ||
248 | |//----------------------------------------------------------------------- | ||
249 | | | ||
250 | |// Macros to clear or set tags. | ||
251 | |.macro cleartp, reg; shl reg, 17; shr reg, 17; .endmacro | ||
252 | |.macro settp, reg, tp | ||
253 | | mov64 ITYPE, ((uint64_t)tp<<47) | ||
254 | | or reg, ITYPE | ||
255 | |.endmacro | ||
256 | |.macro settp, dst, reg, tp | ||
257 | | mov64 dst, ((uint64_t)tp<<47) | ||
258 | | or dst, reg | ||
259 | |.endmacro | ||
260 | |.macro setint, reg | ||
261 | | settp reg, LJ_TISNUM | ||
262 | |.endmacro | ||
263 | |.macro setint, dst, reg | ||
264 | | settp dst, reg, LJ_TISNUM | ||
265 | |.endmacro | ||
266 | | | ||
267 | |// Macros to test operand types. | ||
268 | |.macro checktp_nc, reg, tp, target | ||
269 | | mov ITYPE, reg | ||
270 | | sar ITYPE, 47 | ||
271 | | cmp ITYPEd, tp | ||
272 | | jne target | ||
273 | |.endmacro | ||
274 | |.macro checktp, reg, tp, target | ||
275 | | mov ITYPE, reg | ||
276 | | cleartp reg | ||
277 | | sar ITYPE, 47 | ||
278 | | cmp ITYPEd, tp | ||
279 | | jne target | ||
280 | |.endmacro | ||
281 | |.macro checktptp, src, tp, target | ||
282 | | mov ITYPE, src | ||
283 | | sar ITYPE, 47 | ||
284 | | cmp ITYPEd, tp | ||
285 | | jne target | ||
286 | |.endmacro | ||
287 | |.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro | ||
288 | |.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro | ||
289 | |.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro | ||
290 | | | ||
291 | |.macro checknumx, reg, target, jump | ||
292 | | mov ITYPE, reg | ||
293 | | sar ITYPE, 47 | ||
294 | | cmp ITYPEd, LJ_TISNUM | ||
295 | | jump target | ||
296 | |.endmacro | ||
297 | |.macro checkint, reg, target; checknumx reg, target, jne; .endmacro | ||
298 | |.macro checkinttp, src, target; checknumx src, target, jne; .endmacro | ||
299 | |.macro checknum, reg, target; checknumx reg, target, jae; .endmacro | ||
300 | |.macro checknumtp, src, target; checknumx src, target, jae; .endmacro | ||
301 | |.macro checknumber, src, target; checknumx src, target, ja; .endmacro | ||
302 | | | ||
303 | |.macro mov_false, reg; mov64 reg, (int64_t)~((uint64_t)1<<47); .endmacro | ||
304 | |.macro mov_true, reg; mov64 reg, (int64_t)~((uint64_t)2<<47); .endmacro | ||
305 | | | ||
306 | |// These operands must be used with movzx. | ||
307 | |.define PC_OP, byte [PC-4] | ||
308 | |.define PC_RA, byte [PC-3] | ||
309 | |.define PC_RB, byte [PC-1] | ||
310 | |.define PC_RC, byte [PC-2] | ||
311 | |.define PC_RD, word [PC-2] | ||
312 | | | ||
313 | |.macro branchPC, reg | ||
314 | | lea PC, [PC+reg*4-BCBIAS_J*4] | ||
315 | |.endmacro | ||
316 | | | ||
317 | |// Assumes DISPATCH is relative to GL. | ||
318 | #define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) | ||
319 | #define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) | ||
320 | | | ||
321 | #define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) | ||
322 | | | ||
323 | |// Decrement hashed hotcount and trigger trace recorder if zero. | ||
324 | |.macro hotloop, reg | ||
325 | | mov reg, PCd | ||
326 | | shr reg, 1 | ||
327 | | and reg, HOTCOUNT_PCMASK | ||
328 | | sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP | ||
329 | | jb ->vm_hotloop | ||
330 | |.endmacro | ||
331 | | | ||
332 | |.macro hotcall, reg | ||
333 | | mov reg, PCd | ||
334 | | shr reg, 1 | ||
335 | | and reg, HOTCOUNT_PCMASK | ||
336 | | sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL | ||
337 | | jb ->vm_hotcall | ||
338 | |.endmacro | ||
339 | | | ||
340 | |// Set current VM state. | ||
341 | |.macro set_vmstate, st | ||
342 | | mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st | ||
343 | |.endmacro | ||
344 | | | ||
345 | |.macro fpop1; fstp st1; .endmacro | ||
346 | | | ||
347 | |// Synthesize SSE FP constants. | ||
348 | |.macro sseconst_abs, reg, tmp // Synthesize abs mask. | ||
349 | | mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp | ||
350 | |.endmacro | ||
351 | | | ||
352 | |.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const. | ||
353 | | mov64 tmp, U64x(val,00000000); movd reg, tmp | ||
354 | |.endmacro | ||
355 | | | ||
356 | |.macro sseconst_sign, reg, tmp // Synthesize sign mask. | ||
357 | | sseconst_hi reg, tmp, 80000000 | ||
358 | |.endmacro | ||
359 | |.macro sseconst_1, reg, tmp // Synthesize 1.0. | ||
360 | | sseconst_hi reg, tmp, 3ff00000 | ||
361 | |.endmacro | ||
362 | |.macro sseconst_m1, reg, tmp // Synthesize -1.0. | ||
363 | | sseconst_hi reg, tmp, bff00000 | ||
364 | |.endmacro | ||
365 | |.macro sseconst_2p52, reg, tmp // Synthesize 2^52. | ||
366 | | sseconst_hi reg, tmp, 43300000 | ||
367 | |.endmacro | ||
368 | |.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51. | ||
369 | | sseconst_hi reg, tmp, 43380000 | ||
370 | |.endmacro | ||
371 | | | ||
372 | |// Move table write barrier back. Overwrites reg. | ||
373 | |.macro barrierback, tab, reg | ||
374 | | and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab) | ||
375 | | mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)] | ||
376 | | mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab | ||
377 | | mov tab->gclist, reg | ||
378 | |.endmacro | ||
379 | | | ||
380 | |//----------------------------------------------------------------------- | ||
381 | |||
382 | /* Generate subroutines used by opcodes and other parts of the VM. */ | ||
383 | /* The .code_sub section should be last to help static branch prediction. */ | ||
384 | static void build_subroutines(BuildCtx *ctx) | ||
385 | { | ||
386 | |.code_sub | ||
387 | | | ||
388 | |//----------------------------------------------------------------------- | ||
389 | |//-- Return handling ---------------------------------------------------- | ||
390 | |//----------------------------------------------------------------------- | ||
391 | | | ||
392 | |->vm_returnp: | ||
393 | | test PCd, FRAME_P | ||
394 | | jz ->cont_dispatch | ||
395 | | | ||
396 | | // Return from pcall or xpcall fast func. | ||
397 | | and PC, -8 | ||
398 | | sub BASE, PC // Restore caller base. | ||
399 | | lea RA, [RA+PC-8] // Rebase RA and prepend one result. | ||
400 | | mov PC, [BASE-8] // Fetch PC of previous frame. | ||
401 | | // Prepending may overwrite the pcall frame, so do it at the end. | ||
402 | | mov_true ITYPE | ||
403 | | mov aword [BASE+RA], ITYPE // Prepend true to results. | ||
404 | | | ||
405 | |->vm_returnc: | ||
406 | | add RDd, 1 // RD = nresults+1 | ||
407 | | jz ->vm_unwind_yield | ||
408 | | mov MULTRES, RDd | ||
409 | | test PC, FRAME_TYPE | ||
410 | | jz ->BC_RET_Z // Handle regular return to Lua. | ||
411 | | | ||
412 | |->vm_return: | ||
413 | | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return | ||
414 | | xor PC, FRAME_C | ||
415 | | test PCd, FRAME_TYPE | ||
416 | | jnz ->vm_returnp | ||
417 | | | ||
418 | | // Return to C. | ||
419 | | set_vmstate C | ||
420 | | and PC, -8 | ||
421 | | sub PC, BASE | ||
422 | | neg PC // Previous base = BASE - delta. | ||
423 | | | ||
424 | | sub RDd, 1 | ||
425 | | jz >2 | ||
426 | |1: // Move results down. | ||
427 | | mov RB, [BASE+RA] | ||
428 | | mov [BASE-16], RB | ||
429 | | add BASE, 8 | ||
430 | | sub RDd, 1 | ||
431 | | jnz <1 | ||
432 | |2: | ||
433 | | mov L:RB, SAVE_L | ||
434 | | mov L:RB->base, PC | ||
435 | |3: | ||
436 | | mov RDd, MULTRES | ||
437 | | mov RAd, SAVE_NRES // RA = wanted nresults+1 | ||
438 | |4: | ||
439 | | cmp RAd, RDd | ||
440 | | jne >6 // More/less results wanted? | ||
441 | |5: | ||
442 | | sub BASE, 16 | ||
443 | | mov L:RB->top, BASE | ||
444 | | | ||
445 | |->vm_leave_cp: | ||
446 | | mov RA, SAVE_CFRAME // Restore previous C frame. | ||
447 | | mov L:RB->cframe, RA | ||
448 | | xor eax, eax // Ok return status for vm_pcall. | ||
449 | | | ||
450 | |->vm_leave_unw: | ||
451 | | restoreregs | ||
452 | | ret | ||
453 | | | ||
454 | |6: | ||
455 | | jb >7 // Less results wanted? | ||
456 | | // More results wanted. Check stack size and fill up results with nil. | ||
457 | | cmp BASE, L:RB->maxstack | ||
458 | | ja >8 | ||
459 | | mov aword [BASE-16], LJ_TNIL | ||
460 | | add BASE, 8 | ||
461 | | add RDd, 1 | ||
462 | | jmp <4 | ||
463 | | | ||
464 | |7: // Less results wanted. | ||
465 | | test RAd, RAd | ||
466 | | jz <5 // But check for LUA_MULTRET+1. | ||
467 | | sub RA, RD // Negative result! | ||
468 | | lea BASE, [BASE+RA*8] // Correct top. | ||
469 | | jmp <5 | ||
470 | | | ||
471 | |8: // Corner case: need to grow stack for filling up results. | ||
472 | | // This can happen if: | ||
473 | | // - A C function grows the stack (a lot). | ||
474 | | // - The GC shrinks the stack in between. | ||
475 | | // - A return back from a lua_call() with (high) nresults adjustment. | ||
476 | | mov L:RB->top, BASE // Save current top held in BASE (yes). | ||
477 | | mov MULTRES, RDd // Need to fill only remainder with nil. | ||
478 | | mov CARG2d, RAd | ||
479 | | mov CARG1, L:RB | ||
480 | | call extern lj_state_growstack // (lua_State *L, int n) | ||
481 | | mov BASE, L:RB->top // Need the (realloced) L->top in BASE. | ||
482 | | jmp <3 | ||
483 | | | ||
484 | |->vm_unwind_yield: | ||
485 | | mov al, LUA_YIELD | ||
486 | | jmp ->vm_unwind_c_eh | ||
487 | | | ||
488 | |->vm_unwind_c: // Unwind C stack, return from vm_pcall. | ||
489 | | // (void *cframe, int errcode) | ||
490 | | mov eax, CARG2d // Error return status for vm_pcall. | ||
491 | | mov rsp, CARG1 | ||
492 | |->vm_unwind_c_eh: // Landing pad for external unwinder. | ||
493 | | mov L:RB, SAVE_L | ||
494 | | mov GL:RB, L:RB->glref | ||
495 | | mov dword GL:RB->vmstate, ~LJ_VMST_C | ||
496 | | jmp ->vm_leave_unw | ||
497 | | | ||
498 | |->vm_unwind_rethrow: | ||
499 | |.if not X64WIN | ||
500 | | mov CARG1, SAVE_L | ||
501 | | mov CARG2d, eax | ||
502 | | restoreregs | ||
503 | | jmp extern lj_err_throw // (lua_State *L, int errcode) | ||
504 | |.endif | ||
505 | | | ||
506 | |->vm_unwind_ff: // Unwind C stack, return from ff pcall. | ||
507 | | // (void *cframe) | ||
508 | | and CARG1, CFRAME_RAWMASK | ||
509 | | mov rsp, CARG1 | ||
510 | |->vm_unwind_ff_eh: // Landing pad for external unwinder. | ||
511 | | mov L:RB, SAVE_L | ||
512 | | mov RDd, 1+1 // Really 1+2 results, incr. later. | ||
513 | | mov BASE, L:RB->base | ||
514 | | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | ||
515 | | add DISPATCH, GG_G2DISP | ||
516 | | mov PC, [BASE-8] // Fetch PC of previous frame. | ||
517 | | mov_false RA | ||
518 | | mov RB, [BASE] | ||
519 | | mov [BASE-16], RA // Prepend false to error message. | ||
520 | | mov [BASE-8], RB | ||
521 | | mov RA, -16 // Results start at BASE+RA = BASE-16. | ||
522 | | set_vmstate INTERP | ||
523 | | jmp ->vm_returnc // Increments RD/MULTRES and returns. | ||
524 | | | ||
525 | |//----------------------------------------------------------------------- | ||
526 | |//-- Grow stack for calls ----------------------------------------------- | ||
527 | |//----------------------------------------------------------------------- | ||
528 | | | ||
529 | |->vm_growstack_c: // Grow stack for C function. | ||
530 | | mov CARG2d, LUA_MINSTACK | ||
531 | | jmp >2 | ||
532 | | | ||
533 | |->vm_growstack_v: // Grow stack for vararg Lua function. | ||
534 | | sub RD, 16 // LJ_FR2 | ||
535 | | jmp >1 | ||
536 | | | ||
537 | |->vm_growstack_f: // Grow stack for fixarg Lua function. | ||
538 | | // BASE = new base, RD = nargs+1, RB = L, PC = first PC | ||
539 | | lea RD, [BASE+NARGS:RD*8-8] | ||
540 | |1: | ||
541 | | movzx RAd, byte [PC-4+PC2PROTO(framesize)] | ||
542 | | add PC, 4 // Must point after first instruction. | ||
543 | | mov L:RB->base, BASE | ||
544 | | mov L:RB->top, RD | ||
545 | | mov SAVE_PC, PC | ||
546 | | mov CARG2, RA | ||
547 | |2: | ||
548 | | // RB = L, L->base = new base, L->top = top | ||
549 | | mov CARG1, L:RB | ||
550 | | call extern lj_state_growstack // (lua_State *L, int n) | ||
551 | | mov BASE, L:RB->base | ||
552 | | mov RD, L:RB->top | ||
553 | | mov LFUNC:RB, [BASE-16] | ||
554 | | cleartp LFUNC:RB | ||
555 | | sub RD, BASE | ||
556 | | shr RDd, 3 | ||
557 | | add NARGS:RDd, 1 | ||
558 | | // BASE = new base, RB = LFUNC, RD = nargs+1 | ||
559 | | ins_callt // Just retry the call. | ||
560 | | | ||
561 | |//----------------------------------------------------------------------- | ||
562 | |//-- Entry points into the assembler VM --------------------------------- | ||
563 | |//----------------------------------------------------------------------- | ||
564 | | | ||
565 | |->vm_resume: // Setup C frame and resume thread. | ||
566 | | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) | ||
567 | | saveregs | ||
568 | | mov L:RB, CARG1 // Caveat: CARG1 may be RA. | ||
569 | | mov SAVE_L, CARG1 | ||
570 | | mov RA, CARG2 | ||
571 | | mov PCd, FRAME_CP | ||
572 | | xor RDd, RDd | ||
573 | | lea KBASE, [esp+CFRAME_RESUME] | ||
574 | | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | ||
575 | | add DISPATCH, GG_G2DISP | ||
576 | | mov SAVE_PC, RD // Any value outside of bytecode is ok. | ||
577 | | mov SAVE_CFRAME, RD | ||
578 | | mov SAVE_NRES, RDd | ||
579 | | mov SAVE_ERRF, RDd | ||
580 | | mov L:RB->cframe, KBASE | ||
581 | | cmp byte L:RB->status, RDL | ||
582 | | je >2 // Initial resume (like a call). | ||
583 | | | ||
584 | | // Resume after yield (like a return). | ||
585 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | ||
586 | | set_vmstate INTERP | ||
587 | | mov byte L:RB->status, RDL | ||
588 | | mov BASE, L:RB->base | ||
589 | | mov RD, L:RB->top | ||
590 | | sub RD, RA | ||
591 | | shr RDd, 3 | ||
592 | | add RDd, 1 // RD = nresults+1 | ||
593 | | sub RA, BASE // RA = resultofs | ||
594 | | mov PC, [BASE-8] | ||
595 | | mov MULTRES, RDd | ||
596 | | test PCd, FRAME_TYPE | ||
597 | | jz ->BC_RET_Z | ||
598 | | jmp ->vm_return | ||
599 | | | ||
600 | |->vm_pcall: // Setup protected C frame and enter VM. | ||
601 | | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) | ||
602 | | saveregs | ||
603 | | mov PCd, FRAME_CP | ||
604 | | mov SAVE_ERRF, CARG4d | ||
605 | | jmp >1 | ||
606 | | | ||
607 | |->vm_call: // Setup C frame and enter VM. | ||
608 | | // (lua_State *L, TValue *base, int nres1) | ||
609 | | saveregs | ||
610 | | mov PCd, FRAME_C | ||
611 | | | ||
612 | |1: // Entry point for vm_pcall above (PC = ftype). | ||
613 | | mov SAVE_NRES, CARG3d | ||
614 | | mov L:RB, CARG1 // Caveat: CARG1 may be RA. | ||
615 | | mov SAVE_L, CARG1 | ||
616 | | mov RA, CARG2 | ||
617 | | | ||
618 | | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | ||
619 | | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. | ||
620 | | mov SAVE_CFRAME, KBASE | ||
621 | | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. | ||
622 | | add DISPATCH, GG_G2DISP | ||
623 | | mov L:RB->cframe, rsp | ||
624 | | | ||
625 | |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype). | ||
626 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | ||
627 | | set_vmstate INTERP | ||
628 | | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). | ||
629 | | add PC, RA | ||
630 | | sub PC, BASE // PC = frame delta + frame type | ||
631 | | | ||
632 | | mov RD, L:RB->top | ||
633 | | sub RD, RA | ||
634 | | shr NARGS:RDd, 3 | ||
635 | | add NARGS:RDd, 1 // RD = nargs+1 | ||
636 | | | ||
637 | |->vm_call_dispatch: | ||
638 | | mov LFUNC:RB, [RA-16] | ||
639 | | checkfunc LFUNC:RB, ->vmeta_call // Ensure KBASE defined and != BASE. | ||
640 | | | ||
641 | |->vm_call_dispatch_f: | ||
642 | | mov BASE, RA | ||
643 | | ins_call | ||
644 | | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC | ||
645 | | | ||
646 | |->vm_cpcall: // Setup protected C frame, call C. | ||
647 | | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) | ||
648 | | saveregs | ||
649 | | mov L:RB, CARG1 // Caveat: CARG1 may be RA. | ||
650 | | mov SAVE_L, CARG1 | ||
651 | | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. | ||
652 | | | ||
653 | | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). | ||
654 | | sub KBASE, L:RB->top | ||
655 | | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | ||
656 | | mov SAVE_ERRF, 0 // No error function. | ||
657 | | mov SAVE_NRES, KBASEd // Neg. delta means cframe w/o frame. | ||
658 | | add DISPATCH, GG_G2DISP | ||
659 | | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). | ||
660 | | | ||
661 | | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. | ||
662 | | mov SAVE_CFRAME, KBASE | ||
663 | | mov L:RB->cframe, rsp | ||
664 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | ||
665 | | | ||
666 | | call CARG4 // (lua_State *L, lua_CFunction func, void *ud) | ||
667 | | // TValue * (new base) or NULL returned in eax (RC). | ||
668 | | test RC, RC | ||
669 | | jz ->vm_leave_cp // No base? Just remove C frame. | ||
670 | | mov RA, RC | ||
671 | | mov PCd, FRAME_CP | ||
672 | | jmp <2 // Else continue with the call. | ||
673 | | | ||
674 | |//----------------------------------------------------------------------- | ||
675 | |//-- Metamethod handling ------------------------------------------------ | ||
676 | |//----------------------------------------------------------------------- | ||
677 | | | ||
678 | |//-- Continuation dispatch ---------------------------------------------- | ||
679 | | | ||
680 | |->cont_dispatch: | ||
681 | | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES) | ||
682 | | add RA, BASE | ||
683 | | and PC, -8 | ||
684 | | mov RB, BASE | ||
685 | | sub BASE, PC // Restore caller BASE. | ||
686 | | mov aword [RA+RD*8-8], LJ_TNIL // Ensure one valid arg. | ||
687 | | mov RC, RA // ... in [RC] | ||
688 | | mov PC, [RB-24] // Restore PC from [cont|PC]. | ||
689 | | mov RA, qword [RB-32] // May be negative on WIN64 with debug. | ||
690 | |.if FFI | ||
691 | | cmp RA, 1 | ||
692 | | jbe >1 | ||
693 | |.endif | ||
694 | | mov LFUNC:KBASE, [BASE-16] | ||
695 | | cleartp LFUNC:KBASE | ||
696 | | mov KBASE, LFUNC:KBASE->pc | ||
697 | | mov KBASE, [KBASE+PC2PROTO(k)] | ||
698 | | // BASE = base, RC = result, RB = meta base | ||
699 | | jmp RA // Jump to continuation. | ||
700 | | | ||
701 | |.if FFI | ||
702 | |1: | ||
703 | | je ->cont_ffi_callback // cont = 1: return from FFI callback. | ||
704 | | // cont = 0: Tail call from C function. | ||
705 | | sub RB, BASE | ||
706 | | shr RBd, 3 | ||
707 | | lea RDd, [RBd-3] | ||
708 | | jmp ->vm_call_tail | ||
709 | |.endif | ||
710 | | | ||
711 | |->cont_cat: // BASE = base, RC = result, RB = mbase | ||
712 | | movzx RAd, PC_RB | ||
713 | | sub RB, 32 | ||
714 | | lea RA, [BASE+RA*8] | ||
715 | | sub RA, RB | ||
716 | | je ->cont_ra | ||
717 | | neg RA | ||
718 | | shr RAd, 3 | ||
719 | |.if X64WIN | ||
720 | | mov CARG3d, RAd | ||
721 | | mov L:CARG1, SAVE_L | ||
722 | | mov L:CARG1->base, BASE | ||
723 | | mov RC, [RC] | ||
724 | | mov [RB], RC | ||
725 | | mov CARG2, RB | ||
726 | |.else | ||
727 | | mov L:CARG1, SAVE_L | ||
728 | | mov L:CARG1->base, BASE | ||
729 | | mov CARG3d, RAd | ||
730 | | mov RA, [RC] | ||
731 | | mov [RB], RA | ||
732 | | mov CARG2, RB | ||
733 | |.endif | ||
734 | | jmp ->BC_CAT_Z | ||
735 | | | ||
736 | |//-- Table indexing metamethods ----------------------------------------- | ||
737 | | | ||
738 | |->vmeta_tgets: | ||
739 | | settp STR:RC, LJ_TSTR // STR:RC = GCstr * | ||
740 | | mov TMP1, STR:RC | ||
741 | | lea RC, TMP1 | ||
742 | | cmp PC_OP, BC_GGET | ||
743 | | jne >1 | ||
744 | | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab * | ||
745 | | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. | ||
746 | | mov [RB], TAB:RA | ||
747 | | jmp >2 | ||
748 | | | ||
749 | |->vmeta_tgetb: | ||
750 | | movzx RCd, PC_RC | ||
751 | |.if DUALNUM | ||
752 | | setint RC | ||
753 | | mov TMP1, RC | ||
754 | |.else | ||
755 | | cvtsi2sd xmm0, RCd | ||
756 | | movsd TMP1, xmm0 | ||
757 | |.endif | ||
758 | | lea RC, TMP1 | ||
759 | | jmp >1 | ||
760 | | | ||
761 | |->vmeta_tgetv: | ||
762 | | movzx RCd, PC_RC // Reload TValue *k from RC. | ||
763 | | lea RC, [BASE+RC*8] | ||
764 | |1: | ||
765 | | movzx RBd, PC_RB // Reload TValue *t from RB. | ||
766 | | lea RB, [BASE+RB*8] | ||
767 | |2: | ||
768 | | mov L:CARG1, SAVE_L | ||
769 | | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE. | ||
770 | | mov CARG2, RB | ||
771 | | mov CARG3, RC | ||
772 | | mov L:RB, L:CARG1 | ||
773 | | mov SAVE_PC, PC | ||
774 | | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) | ||
775 | | // TValue * (finished) or NULL (metamethod) returned in eax (RC). | ||
776 | | mov BASE, L:RB->base | ||
777 | | test RC, RC | ||
778 | | jz >3 | ||
779 | |->cont_ra: // BASE = base, RC = result | ||
780 | | movzx RAd, PC_RA | ||
781 | | mov RB, [RC] | ||
782 | | mov [BASE+RA*8], RB | ||
783 | | ins_next | ||
784 | | | ||
785 | |3: // Call __index metamethod. | ||
786 | | // BASE = base, L->top = new base, stack = cont/func/t/k | ||
787 | | mov RA, L:RB->top | ||
788 | | mov [RA-24], PC // [cont|PC] | ||
789 | | lea PC, [RA+FRAME_CONT] | ||
790 | | sub PC, BASE | ||
791 | | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here. | ||
792 | | mov NARGS:RDd, 2+1 // 2 args for func(t, k). | ||
793 | | cleartp LFUNC:RB | ||
794 | | jmp ->vm_call_dispatch_f | ||
795 | | | ||
796 | |->vmeta_tgetr: | ||
797 | | mov CARG1, TAB:RB | ||
798 | | mov RB, BASE // Save BASE. | ||
799 | | mov CARG2d, RCd // Caveat: CARG2 == BASE | ||
800 | | call extern lj_tab_getinth // (GCtab *t, int32_t key) | ||
801 | | // cTValue * or NULL returned in eax (RC). | ||
802 | | movzx RAd, PC_RA | ||
803 | | mov BASE, RB // Restore BASE. | ||
804 | | test RC, RC | ||
805 | | jnz ->BC_TGETR_Z | ||
806 | | mov ITYPE, LJ_TNIL | ||
807 | | jmp ->BC_TGETR2_Z | ||
808 | | | ||
809 | |//----------------------------------------------------------------------- | ||
810 | | | ||
811 | |->vmeta_tsets: | ||
812 | | settp STR:RC, LJ_TSTR // STR:RC = GCstr * | ||
813 | | mov TMP1, STR:RC | ||
814 | | lea RC, TMP1 | ||
815 | | cmp PC_OP, BC_GSET | ||
816 | | jne >1 | ||
817 | | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab * | ||
818 | | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. | ||
819 | | mov [RB], TAB:RA | ||
820 | | jmp >2 | ||
821 | | | ||
822 | |->vmeta_tsetb: | ||
823 | | movzx RCd, PC_RC | ||
824 | |.if DUALNUM | ||
825 | | setint RC | ||
826 | | mov TMP1, RC | ||
827 | |.else | ||
828 | | cvtsi2sd xmm0, RCd | ||
829 | | movsd TMP1, xmm0 | ||
830 | |.endif | ||
831 | | lea RC, TMP1 | ||
832 | | jmp >1 | ||
833 | | | ||
834 | |->vmeta_tsetv: | ||
835 | | movzx RCd, PC_RC // Reload TValue *k from RC. | ||
836 | | lea RC, [BASE+RC*8] | ||
837 | |1: | ||
838 | | movzx RBd, PC_RB // Reload TValue *t from RB. | ||
839 | | lea RB, [BASE+RB*8] | ||
840 | |2: | ||
841 | | mov L:CARG1, SAVE_L | ||
842 | | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE. | ||
843 | | mov CARG2, RB | ||
844 | | mov CARG3, RC | ||
845 | | mov L:RB, L:CARG1 | ||
846 | | mov SAVE_PC, PC | ||
847 | | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) | ||
848 | | // TValue * (finished) or NULL (metamethod) returned in eax (RC). | ||
849 | | mov BASE, L:RB->base | ||
850 | | test RC, RC | ||
851 | | jz >3 | ||
852 | | // NOBARRIER: lj_meta_tset ensures the table is not black. | ||
853 | | movzx RAd, PC_RA | ||
854 | | mov RB, [BASE+RA*8] | ||
855 | | mov [RC], RB | ||
856 | |->cont_nop: // BASE = base, (RC = result) | ||
857 | | ins_next | ||
858 | | | ||
859 | |3: // Call __newindex metamethod. | ||
860 | | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) | ||
861 | | mov RA, L:RB->top | ||
862 | | mov [RA-24], PC // [cont|PC] | ||
863 | | movzx RCd, PC_RA | ||
864 | | // Copy value to third argument. | ||
865 | | mov RB, [BASE+RC*8] | ||
866 | | mov [RA+16], RB | ||
867 | | lea PC, [RA+FRAME_CONT] | ||
868 | | sub PC, BASE | ||
869 | | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here. | ||
870 | | mov NARGS:RDd, 3+1 // 3 args for func(t, k, v). | ||
871 | | cleartp LFUNC:RB | ||
872 | | jmp ->vm_call_dispatch_f | ||
873 | | | ||
874 | |->vmeta_tsetr: | ||
875 | |.if X64WIN | ||
876 | | mov L:CARG1, SAVE_L | ||
877 | | mov CARG3d, RCd | ||
878 | | mov L:CARG1->base, BASE | ||
879 | | xchg CARG2, TAB:RB // Caveat: CARG2 == BASE. | ||
880 | |.else | ||
881 | | mov L:CARG1, SAVE_L | ||
882 | | mov CARG2, TAB:RB | ||
883 | | mov L:CARG1->base, BASE | ||
884 | | mov RB, BASE // Save BASE. | ||
885 | | mov CARG3d, RCd // Caveat: CARG3 == BASE. | ||
886 | |.endif | ||
887 | | mov SAVE_PC, PC | ||
888 | | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) | ||
889 | | // TValue * returned in eax (RC). | ||
890 | | movzx RAd, PC_RA | ||
891 | | mov BASE, RB // Restore BASE. | ||
892 | | jmp ->BC_TSETR_Z | ||
893 | | | ||
894 | |//-- Comparison metamethods --------------------------------------------- | ||
895 | | | ||
896 | |->vmeta_comp: | ||
897 | | movzx RDd, PC_RD | ||
898 | | movzx RAd, PC_RA | ||
899 | | mov L:RB, SAVE_L | ||
900 | | mov L:RB->base, BASE // Caveat: CARG2/CARG3 == BASE. | ||
901 | |.if X64WIN | ||
902 | | lea CARG3, [BASE+RD*8] | ||
903 | | lea CARG2, [BASE+RA*8] | ||
904 | |.else | ||
905 | | lea CARG2, [BASE+RA*8] | ||
906 | | lea CARG3, [BASE+RD*8] | ||
907 | |.endif | ||
908 | | mov CARG1, L:RB // Caveat: CARG1/CARG4 == RA. | ||
909 | | movzx CARG4d, PC_OP | ||
910 | | mov SAVE_PC, PC | ||
911 | | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) | ||
912 | | // 0/1 or TValue * (metamethod) returned in eax (RC). | ||
913 | |3: | ||
914 | | mov BASE, L:RB->base | ||
915 | | cmp RC, 1 | ||
916 | | ja ->vmeta_binop | ||
917 | |4: | ||
918 | | lea PC, [PC+4] | ||
919 | | jb >6 | ||
920 | |5: | ||
921 | | movzx RDd, PC_RD | ||
922 | | branchPC RD | ||
923 | |6: | ||
924 | | ins_next | ||
925 | | | ||
926 | |->cont_condt: // BASE = base, RC = result | ||
927 | | add PC, 4 | ||
928 | | mov ITYPE, [RC] | ||
929 | | sar ITYPE, 47 | ||
930 | | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is true. | ||
931 | | jb <5 | ||
932 | | jmp <6 | ||
933 | | | ||
934 | |->cont_condf: // BASE = base, RC = result | ||
935 | | mov ITYPE, [RC] | ||
936 | | sar ITYPE, 47 | ||
937 | | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is false. | ||
938 | | jmp <4 | ||
939 | | | ||
940 | |->vmeta_equal: | ||
941 | | cleartp TAB:RD | ||
942 | | sub PC, 4 | ||
943 | |.if X64WIN | ||
944 | | mov CARG3, RD | ||
945 | | mov CARG4d, RBd | ||
946 | | mov L:RB, SAVE_L | ||
947 | | mov L:RB->base, BASE // Caveat: CARG2 == BASE. | ||
948 | | mov CARG2, RA | ||
949 | | mov CARG1, L:RB // Caveat: CARG1 == RA. | ||
950 | |.else | ||
951 | | mov CARG2, RA | ||
952 | | mov CARG4d, RBd // Caveat: CARG4 == RA. | ||
953 | | mov L:RB, SAVE_L | ||
954 | | mov L:RB->base, BASE // Caveat: CARG3 == BASE. | ||
955 | | mov CARG3, RD | ||
956 | | mov CARG1, L:RB | ||
957 | |.endif | ||
958 | | mov SAVE_PC, PC | ||
959 | | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) | ||
960 | | // 0/1 or TValue * (metamethod) returned in eax (RC). | ||
961 | | jmp <3 | ||
962 | | | ||
963 | |->vmeta_equal_cd: | ||
964 | |.if FFI | ||
965 | | sub PC, 4 | ||
966 | | mov L:RB, SAVE_L | ||
967 | | mov L:RB->base, BASE | ||
968 | | mov CARG1, L:RB | ||
969 | | mov CARG2d, dword [PC-4] | ||
970 | | mov SAVE_PC, PC | ||
971 | | call extern lj_meta_equal_cd // (lua_State *L, BCIns ins) | ||
972 | | // 0/1 or TValue * (metamethod) returned in eax (RC). | ||
973 | | jmp <3 | ||
974 | |.endif | ||
975 | | | ||
976 | |->vmeta_istype: | ||
977 | | mov L:RB, SAVE_L | ||
978 | | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE. | ||
979 | | mov CARG2d, RAd | ||
980 | | mov CARG3d, RDd | ||
981 | | mov L:CARG1, L:RB | ||
982 | | mov SAVE_PC, PC | ||
983 | | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) | ||
984 | | mov BASE, L:RB->base | ||
985 | | jmp <6 | ||
986 | | | ||
987 | |//-- Arithmetic metamethods --------------------------------------------- | ||
988 | | | ||
989 | |->vmeta_arith_vno: | ||
990 | |.if DUALNUM | ||
991 | | movzx RBd, PC_RB | ||
992 | | movzx RCd, PC_RC | ||
993 | |.endif | ||
994 | |->vmeta_arith_vn: | ||
995 | | lea RC, [KBASE+RC*8] | ||
996 | | jmp >1 | ||
997 | | | ||
998 | |->vmeta_arith_nvo: | ||
999 | |.if DUALNUM | ||
1000 | | movzx RBd, PC_RB | ||
1001 | | movzx RCd, PC_RC | ||
1002 | |.endif | ||
1003 | |->vmeta_arith_nv: | ||
1004 | | lea TMPR, [KBASE+RC*8] | ||
1005 | | lea RC, [BASE+RB*8] | ||
1006 | | mov RB, TMPR | ||
1007 | | jmp >2 | ||
1008 | | | ||
1009 | |->vmeta_unm: | ||
1010 | | lea RC, [BASE+RD*8] | ||
1011 | | mov RB, RC | ||
1012 | | jmp >2 | ||
1013 | | | ||
1014 | |->vmeta_arith_vvo: | ||
1015 | |.if DUALNUM | ||
1016 | | movzx RBd, PC_RB | ||
1017 | | movzx RCd, PC_RC | ||
1018 | |.endif | ||
1019 | |->vmeta_arith_vv: | ||
1020 | | lea RC, [BASE+RC*8] | ||
1021 | |1: | ||
1022 | | lea RB, [BASE+RB*8] | ||
1023 | |2: | ||
1024 | | lea RA, [BASE+RA*8] | ||
1025 | |.if X64WIN | ||
1026 | | mov CARG3, RB | ||
1027 | | mov CARG4, RC | ||
1028 | | movzx RCd, PC_OP | ||
1029 | | mov ARG5d, RCd | ||
1030 | | mov L:RB, SAVE_L | ||
1031 | | mov L:RB->base, BASE // Caveat: CARG2 == BASE. | ||
1032 | | mov CARG2, RA | ||
1033 | | mov CARG1, L:RB // Caveat: CARG1 == RA. | ||
1034 | |.else | ||
1035 | | movzx CARG5d, PC_OP | ||
1036 | | mov CARG2, RA | ||
1037 | | mov CARG4, RC // Caveat: CARG4 == RA. | ||
1038 | | mov L:CARG1, SAVE_L | ||
1039 | | mov L:CARG1->base, BASE // Caveat: CARG3 == BASE. | ||
1040 | | mov CARG3, RB | ||
1041 | | mov L:RB, L:CARG1 | ||
1042 | |.endif | ||
1043 | | mov SAVE_PC, PC | ||
1044 | | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) | ||
1045 | | // NULL (finished) or TValue * (metamethod) returned in eax (RC). | ||
1046 | | mov BASE, L:RB->base | ||
1047 | | test RC, RC | ||
1048 | | jz ->cont_nop | ||
1049 | | | ||
1050 | | // Call metamethod for binary op. | ||
1051 | |->vmeta_binop: | ||
1052 | | // BASE = base, RC = new base, stack = cont/func/o1/o2 | ||
1053 | | mov RA, RC | ||
1054 | | sub RC, BASE | ||
1055 | | mov [RA-24], PC // [cont|PC] | ||
1056 | | lea PC, [RC+FRAME_CONT] | ||
1057 | | mov NARGS:RDd, 2+1 // 2 args for func(o1, o2). | ||
1058 | | jmp ->vm_call_dispatch | ||
1059 | | | ||
1060 | |->vmeta_len: | ||
1061 | | movzx RDd, PC_RD | ||
1062 | | mov L:RB, SAVE_L | ||
1063 | | mov L:RB->base, BASE | ||
1064 | | lea CARG2, [BASE+RD*8] // Caveat: CARG2 == BASE | ||
1065 | | mov L:CARG1, L:RB | ||
1066 | | mov SAVE_PC, PC | ||
1067 | | call extern lj_meta_len // (lua_State *L, TValue *o) | ||
1068 | | // NULL (retry) or TValue * (metamethod) returned in eax (RC). | ||
1069 | | mov BASE, L:RB->base | ||
1070 | #if LJ_52 | ||
1071 | | test RC, RC | ||
1072 | | jne ->vmeta_binop // Binop call for compatibility. | ||
1073 | | movzx RDd, PC_RD | ||
1074 | | mov TAB:CARG1, [BASE+RD*8] | ||
1075 | | cleartp TAB:CARG1 | ||
1076 | | jmp ->BC_LEN_Z | ||
1077 | #else | ||
1078 | | jmp ->vmeta_binop // Binop call for compatibility. | ||
1079 | #endif | ||
1080 | | | ||
1081 | |//-- Call metamethod ---------------------------------------------------- | ||
1082 | | | ||
1083 | |->vmeta_call_ra: | ||
1084 | | lea RA, [BASE+RA*8+16] | ||
1085 | |->vmeta_call: // Resolve and call __call metamethod. | ||
1086 | | // BASE = old base, RA = new base, RC = nargs+1, PC = return | ||
1087 | | mov TMP1d, NARGS:RDd // Save RA, RC for us. | ||
1088 | | mov RB, RA | ||
1089 | |.if X64WIN | ||
1090 | | mov L:TMPR, SAVE_L | ||
1091 | | mov L:TMPR->base, BASE // Caveat: CARG2 is BASE. | ||
1092 | | lea CARG2, [RA-16] | ||
1093 | | lea CARG3, [RA+NARGS:RD*8-8] | ||
1094 | | mov CARG1, L:TMPR // Caveat: CARG1 is RA. | ||
1095 | |.else | ||
1096 | | mov L:CARG1, SAVE_L | ||
1097 | | mov L:CARG1->base, BASE // Caveat: CARG3 is BASE. | ||
1098 | | lea CARG2, [RA-16] | ||
1099 | | lea CARG3, [RA+NARGS:RD*8-8] | ||
1100 | |.endif | ||
1101 | | mov SAVE_PC, PC | ||
1102 | | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) | ||
1103 | | mov RA, RB | ||
1104 | | mov L:RB, SAVE_L | ||
1105 | | mov BASE, L:RB->base | ||
1106 | | mov NARGS:RDd, TMP1d | ||
1107 | | mov LFUNC:RB, [RA-16] | ||
1108 | | add NARGS:RDd, 1 | ||
1109 | | // This is fragile. L->base must not move, KBASE must always be defined. | ||
1110 | | cmp KBASE, BASE // Continue with CALLT if flag set. | ||
1111 | | je ->BC_CALLT_Z | ||
1112 | | cleartp LFUNC:RB | ||
1113 | | mov BASE, RA | ||
1114 | | ins_call // Otherwise call resolved metamethod. | ||
1115 | | | ||
1116 | |//-- Argument coercion for 'for' statement ------------------------------ | ||
1117 | | | ||
1118 | |->vmeta_for: | ||
1119 | | mov L:RB, SAVE_L | ||
1120 | | mov L:RB->base, BASE | ||
1121 | | mov CARG2, RA // Caveat: CARG2 == BASE | ||
1122 | | mov L:CARG1, L:RB // Caveat: CARG1 == RA | ||
1123 | | mov SAVE_PC, PC | ||
1124 | | call extern lj_meta_for // (lua_State *L, TValue *base) | ||
1125 | | mov BASE, L:RB->base | ||
1126 | | mov RCd, [PC-4] | ||
1127 | | movzx RAd, RCH | ||
1128 | | movzx OP, RCL | ||
1129 | | shr RCd, 16 | ||
1130 | | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI. | ||
1131 | | | ||
1132 | |//----------------------------------------------------------------------- | ||
1133 | |//-- Fast functions ----------------------------------------------------- | ||
1134 | |//----------------------------------------------------------------------- | ||
1135 | | | ||
1136 | |.macro .ffunc, name | ||
1137 | |->ff_ .. name: | ||
1138 | |.endmacro | ||
1139 | | | ||
1140 | |.macro .ffunc_1, name | ||
1141 | |->ff_ .. name: | ||
1142 | | cmp NARGS:RDd, 1+1; jb ->fff_fallback | ||
1143 | |.endmacro | ||
1144 | | | ||
1145 | |.macro .ffunc_2, name | ||
1146 | |->ff_ .. name: | ||
1147 | | cmp NARGS:RDd, 2+1; jb ->fff_fallback | ||
1148 | |.endmacro | ||
1149 | | | ||
1150 | |.macro .ffunc_n, name, op | ||
1151 | | .ffunc_1 name | ||
1152 | | checknumtp [BASE], ->fff_fallback | ||
1153 | | op xmm0, qword [BASE] | ||
1154 | |.endmacro | ||
1155 | | | ||
1156 | |.macro .ffunc_n, name | ||
1157 | | .ffunc_n name, movsd | ||
1158 | |.endmacro | ||
1159 | | | ||
1160 | |.macro .ffunc_nn, name | ||
1161 | | .ffunc_2 name | ||
1162 | | checknumtp [BASE], ->fff_fallback | ||
1163 | | checknumtp [BASE+8], ->fff_fallback | ||
1164 | | movsd xmm0, qword [BASE] | ||
1165 | | movsd xmm1, qword [BASE+8] | ||
1166 | |.endmacro | ||
1167 | | | ||
1168 | |// Inlined GC threshold check. Caveat: uses label 1. | ||
1169 | |.macro ffgccheck | ||
1170 | | mov RB, [DISPATCH+DISPATCH_GL(gc.total)] | ||
1171 | | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)] | ||
1172 | | jb >1 | ||
1173 | | call ->fff_gcstep | ||
1174 | |1: | ||
1175 | |.endmacro | ||
1176 | | | ||
1177 | |//-- Base library: checks ----------------------------------------------- | ||
1178 | | | ||
1179 | |.ffunc_1 assert | ||
1180 | | mov ITYPE, [BASE] | ||
1181 | | mov RB, ITYPE | ||
1182 | | sar ITYPE, 47 | ||
1183 | | cmp ITYPEd, LJ_TISTRUECOND; jae ->fff_fallback | ||
1184 | | mov PC, [BASE-8] | ||
1185 | | mov MULTRES, RDd | ||
1186 | | mov RB, [BASE] | ||
1187 | | mov [BASE-16], RB | ||
1188 | | sub RDd, 2 | ||
1189 | | jz >2 | ||
1190 | | mov RA, BASE | ||
1191 | |1: | ||
1192 | | add RA, 8 | ||
1193 | | mov RB, [RA] | ||
1194 | | mov [RA-16], RB | ||
1195 | | sub RDd, 1 | ||
1196 | | jnz <1 | ||
1197 | |2: | ||
1198 | | mov RDd, MULTRES | ||
1199 | | jmp ->fff_res_ | ||
1200 | | | ||
1201 | |.ffunc_1 type | ||
1202 | | mov RC, [BASE] | ||
1203 | | sar RC, 47 | ||
1204 | | mov RBd, LJ_TISNUM | ||
1205 | | cmp RCd, RBd | ||
1206 | | cmovb RCd, RBd | ||
1207 | | not RCd | ||
1208 | |2: | ||
1209 | | mov CFUNC:RB, [BASE-16] | ||
1210 | | cleartp CFUNC:RB | ||
1211 | | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))] | ||
1212 | | mov PC, [BASE-8] | ||
1213 | | settp STR:RC, LJ_TSTR | ||
1214 | | mov [BASE-16], STR:RC | ||
1215 | | jmp ->fff_res1 | ||
1216 | | | ||
1217 | |//-- Base library: getters and setters --------------------------------- | ||
1218 | | | ||
1219 | |.ffunc_1 getmetatable | ||
1220 | | mov TAB:RB, [BASE] | ||
1221 | | mov PC, [BASE-8] | ||
1222 | | checktab TAB:RB, >6 | ||
1223 | |1: // Field metatable must be at same offset for GCtab and GCudata! | ||
1224 | | mov TAB:RB, TAB:RB->metatable | ||
1225 | |2: | ||
1226 | | test TAB:RB, TAB:RB | ||
1227 | | mov aword [BASE-16], LJ_TNIL | ||
1228 | | jz ->fff_res1 | ||
1229 | | settp TAB:RC, TAB:RB, LJ_TTAB | ||
1230 | | mov [BASE-16], TAB:RC // Store metatable as default result. | ||
1231 | | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable)] | ||
1232 | | mov RAd, TAB:RB->hmask | ||
1233 | | and RAd, STR:RC->hash | ||
1234 | | settp STR:RC, LJ_TSTR | ||
1235 | | imul RAd, #NODE | ||
1236 | | add NODE:RA, TAB:RB->node | ||
1237 | |3: // Rearranged logic, because we expect _not_ to find the key. | ||
1238 | | cmp NODE:RA->key, STR:RC | ||
1239 | | je >5 | ||
1240 | |4: | ||
1241 | | mov NODE:RA, NODE:RA->next | ||
1242 | | test NODE:RA, NODE:RA | ||
1243 | | jnz <3 | ||
1244 | | jmp ->fff_res1 // Not found, keep default result. | ||
1245 | |5: | ||
1246 | | mov RB, NODE:RA->val | ||
1247 | | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value. | ||
1248 | | mov [BASE-16], RB // Return value of mt.__metatable. | ||
1249 | | jmp ->fff_res1 | ||
1250 | | | ||
1251 | |6: | ||
1252 | | cmp ITYPEd, LJ_TUDATA; je <1 | ||
1253 | | cmp ITYPEd, LJ_TISNUM; ja >7 | ||
1254 | | mov ITYPEd, LJ_TISNUM | ||
1255 | |7: | ||
1256 | | not ITYPEd | ||
1257 | | mov TAB:RB, [DISPATCH+ITYPE*8+DISPATCH_GL(gcroot[GCROOT_BASEMT])] | ||
1258 | | jmp <2 | ||
1259 | | | ||
1260 | |.ffunc_2 setmetatable | ||
1261 | | mov TAB:RB, [BASE] | ||
1262 | | mov TAB:TMPR, TAB:RB | ||
1263 | | checktab TAB:RB, ->fff_fallback | ||
1264 | | // Fast path: no mt for table yet and not clearing the mt. | ||
1265 | | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback | ||
1266 | | mov TAB:RA, [BASE+8] | ||
1267 | | checktab TAB:RA, ->fff_fallback | ||
1268 | | mov TAB:RB->metatable, TAB:RA | ||
1269 | | mov PC, [BASE-8] | ||
1270 | | mov [BASE-16], TAB:TMPR // Return original table. | ||
1271 | | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) | ||
1272 | | jz >1 | ||
1273 | | // Possible write barrier. Table is black, but skip iswhite(mt) check. | ||
1274 | | barrierback TAB:RB, RC | ||
1275 | |1: | ||
1276 | | jmp ->fff_res1 | ||
1277 | | | ||
1278 | |.ffunc_2 rawget | ||
1279 | |.if X64WIN | ||
1280 | | mov TAB:RA, [BASE] | ||
1281 | | checktab TAB:RA, ->fff_fallback | ||
1282 | | mov RB, BASE // Save BASE. | ||
1283 | | lea CARG3, [BASE+8] | ||
1284 | | mov CARG2, TAB:RA // Caveat: CARG2 == BASE. | ||
1285 | | mov CARG1, SAVE_L | ||
1286 | |.else | ||
1287 | | mov TAB:CARG2, [BASE] | ||
1288 | | checktab TAB:CARG2, ->fff_fallback | ||
1289 | | mov RB, BASE // Save BASE. | ||
1290 | | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE. | ||
1291 | | mov CARG1, SAVE_L | ||
1292 | |.endif | ||
1293 | | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) | ||
1294 | | // cTValue * returned in eax (RD). | ||
1295 | | mov BASE, RB // Restore BASE. | ||
1296 | | // Copy table slot. | ||
1297 | | mov RB, [RD] | ||
1298 | | mov PC, [BASE-8] | ||
1299 | | mov [BASE-16], RB | ||
1300 | | jmp ->fff_res1 | ||
1301 | | | ||
1302 | |//-- Base library: conversions ------------------------------------------ | ||
1303 | | | ||
1304 | |.ffunc tonumber | ||
1305 | | // Only handles the number case inline (without a base argument). | ||
1306 | | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument. | ||
1307 | | mov RB, [BASE] | ||
1308 | | checknumber RB, ->fff_fallback | ||
1309 | | mov PC, [BASE-8] | ||
1310 | | mov [BASE-16], RB | ||
1311 | | jmp ->fff_res1 | ||
1312 | | | ||
1313 | |.ffunc_1 tostring | ||
1314 | | // Only handles the string or number case inline. | ||
1315 | | mov PC, [BASE-8] | ||
1316 | | mov STR:RB, [BASE] | ||
1317 | | checktp_nc STR:RB, LJ_TSTR, >3 | ||
1318 | | // A __tostring method in the string base metatable is ignored. | ||
1319 | |2: | ||
1320 | | mov [BASE-16], STR:RB | ||
1321 | | jmp ->fff_res1 | ||
1322 | |3: // Handle numbers inline, unless a number base metatable is present. | ||
1323 | | cmp ITYPEd, LJ_TISNUM; ja ->fff_fallback_1 | ||
1324 | | cmp aword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0 | ||
1325 | | jne ->fff_fallback | ||
1326 | | ffgccheck // Caveat: uses label 1. | ||
1327 | | mov L:RB, SAVE_L | ||
1328 | | mov L:RB->base, BASE // Add frame since C call can throw. | ||
1329 | | mov SAVE_PC, PC // Redundant (but a defined value). | ||
1330 | |.if not X64WIN | ||
1331 | | mov CARG2, BASE // Otherwise: CARG2 == BASE | ||
1332 | |.endif | ||
1333 | | mov L:CARG1, L:RB | ||
1334 | |.if DUALNUM | ||
1335 | | call extern lj_strfmt_number // (lua_State *L, cTValue *o) | ||
1336 | |.else | ||
1337 | | call extern lj_strfmt_num // (lua_State *L, lua_Number *np) | ||
1338 | |.endif | ||
1339 | | // GCstr returned in eax (RD). | ||
1340 | | mov BASE, L:RB->base | ||
1341 | | settp STR:RB, RD, LJ_TSTR | ||
1342 | | jmp <2 | ||
1343 | | | ||
1344 | |//-- Base library: iterators ------------------------------------------- | ||
1345 | | | ||
1346 | |.ffunc_1 next | ||
1347 | | je >2 // Missing 2nd arg? | ||
1348 | |1: | ||
1349 | |.if X64WIN | ||
1350 | | mov RA, [BASE] | ||
1351 | | checktab RA, ->fff_fallback | ||
1352 | |.else | ||
1353 | | mov CARG2, [BASE] | ||
1354 | | checktab CARG2, ->fff_fallback | ||
1355 | |.endif | ||
1356 | | mov L:RB, SAVE_L | ||
1357 | | mov L:RB->base, BASE // Add frame since C call can throw. | ||
1358 | | mov L:RB->top, BASE // Dummy frame length is ok. | ||
1359 | | mov PC, [BASE-8] | ||
1360 | |.if X64WIN | ||
1361 | | lea CARG3, [BASE+8] | ||
1362 | | mov CARG2, RA // Caveat: CARG2 == BASE. | ||
1363 | | mov CARG1, L:RB | ||
1364 | |.else | ||
1365 | | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE. | ||
1366 | | mov CARG1, L:RB | ||
1367 | |.endif | ||
1368 | | mov SAVE_PC, PC // Needed for ITERN fallback. | ||
1369 | | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) | ||
1370 | | // Flag returned in eax (RD). | ||
1371 | | mov BASE, L:RB->base | ||
1372 | | test RDd, RDd; jz >3 // End of traversal? | ||
1373 | | // Copy key and value to results. | ||
1374 | | mov RB, [BASE+8] | ||
1375 | | mov RD, [BASE+16] | ||
1376 | | mov [BASE-16], RB | ||
1377 | | mov [BASE-8], RD | ||
1378 | |->fff_res2: | ||
1379 | | mov RDd, 1+2 | ||
1380 | | jmp ->fff_res | ||
1381 | |2: // Set missing 2nd arg to nil. | ||
1382 | | mov aword [BASE+8], LJ_TNIL | ||
1383 | | jmp <1 | ||
1384 | |3: // End of traversal: return nil. | ||
1385 | | mov aword [BASE-16], LJ_TNIL | ||
1386 | | jmp ->fff_res1 | ||
1387 | | | ||
1388 | |.ffunc_1 pairs | ||
1389 | | mov TAB:RB, [BASE] | ||
1390 | | mov TMPR, TAB:RB | ||
1391 | | checktab TAB:RB, ->fff_fallback | ||
1392 | #if LJ_52 | ||
1393 | | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback | ||
1394 | #endif | ||
1395 | | mov CFUNC:RD, [BASE-16] | ||
1396 | | cleartp CFUNC:RD | ||
1397 | | mov CFUNC:RD, CFUNC:RD->upvalue[0] | ||
1398 | | settp CFUNC:RD, LJ_TFUNC | ||
1399 | | mov PC, [BASE-8] | ||
1400 | | mov [BASE-16], CFUNC:RD | ||
1401 | | mov [BASE-8], TMPR | ||
1402 | | mov aword [BASE], LJ_TNIL | ||
1403 | | mov RDd, 1+3 | ||
1404 | | jmp ->fff_res | ||
1405 | | | ||
1406 | |.ffunc_2 ipairs_aux | ||
1407 | | mov TAB:RB, [BASE] | ||
1408 | | checktab TAB:RB, ->fff_fallback | ||
1409 | |.if DUALNUM | ||
1410 | | mov RA, [BASE+8] | ||
1411 | | checkint RA, ->fff_fallback | ||
1412 | |.else | ||
1413 | | checknumtp [BASE+8], ->fff_fallback | ||
1414 | | movsd xmm0, qword [BASE+8] | ||
1415 | |.endif | ||
1416 | | mov PC, [BASE-8] | ||
1417 | |.if DUALNUM | ||
1418 | | add RAd, 1 | ||
1419 | | setint ITYPE, RA | ||
1420 | | mov [BASE-16], ITYPE | ||
1421 | |.else | ||
1422 | | sseconst_1 xmm1, TMPR | ||
1423 | | addsd xmm0, xmm1 | ||
1424 | | cvttsd2si RAd, xmm0 | ||
1425 | | movsd qword [BASE-16], xmm0 | ||
1426 | |.endif | ||
1427 | | cmp RAd, TAB:RB->asize; jae >2 // Not in array part? | ||
1428 | | mov RD, TAB:RB->array | ||
1429 | | lea RD, [RD+RA*8] | ||
1430 | |1: | ||
1431 | | cmp aword [RD], LJ_TNIL; je ->fff_res0 | ||
1432 | | // Copy array slot. | ||
1433 | | mov RB, [RD] | ||
1434 | | mov [BASE-8], RB | ||
1435 | | jmp ->fff_res2 | ||
1436 | |2: // Check for empty hash part first. Otherwise call C function. | ||
1437 | | cmp dword TAB:RB->hmask, 0; je ->fff_res0 | ||
1438 | |.if X64WIN | ||
1439 | | mov TMPR, BASE | ||
1440 | | mov CARG2d, RAd | ||
1441 | | mov CARG1, TAB:RB | ||
1442 | | mov RB, TMPR | ||
1443 | |.else | ||
1444 | | mov CARG1, TAB:RB | ||
1445 | | mov RB, BASE // Save BASE. | ||
1446 | | mov CARG2d, RAd // Caveat: CARG2 == BASE | ||
1447 | |.endif | ||
1448 | | call extern lj_tab_getinth // (GCtab *t, int32_t key) | ||
1449 | | // cTValue * or NULL returned in eax (RD). | ||
1450 | | mov BASE, RB | ||
1451 | | test RD, RD | ||
1452 | | jnz <1 | ||
1453 | |->fff_res0: | ||
1454 | | mov RDd, 1+0 | ||
1455 | | jmp ->fff_res | ||
1456 | | | ||
1457 | |.ffunc_1 ipairs | ||
1458 | | mov TAB:RB, [BASE] | ||
1459 | | mov TMPR, TAB:RB | ||
1460 | | checktab TAB:RB, ->fff_fallback | ||
1461 | #if LJ_52 | ||
1462 | | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback | ||
1463 | #endif | ||
1464 | | mov CFUNC:RD, [BASE-16] | ||
1465 | | cleartp CFUNC:RD | ||
1466 | | mov CFUNC:RD, CFUNC:RD->upvalue[0] | ||
1467 | | settp CFUNC:RD, LJ_TFUNC | ||
1468 | | mov PC, [BASE-8] | ||
1469 | | mov [BASE-16], CFUNC:RD | ||
1470 | | mov [BASE-8], TMPR | ||
1471 | |.if DUALNUM | ||
1472 | | mov64 RD, ((uint64_t)LJ_TISNUM<<47) | ||
1473 | | mov [BASE], RD | ||
1474 | |.else | ||
1475 | | mov qword [BASE], 0 | ||
1476 | |.endif | ||
1477 | | mov RDd, 1+3 | ||
1478 | | jmp ->fff_res | ||
1479 | | | ||
1480 | |//-- Base library: catch errors ---------------------------------------- | ||
1481 | | | ||
1482 | |.ffunc_1 pcall | ||
1483 | | lea RA, [BASE+16] | ||
1484 | | sub NARGS:RDd, 1 | ||
1485 | | mov PCd, 16+FRAME_PCALL | ||
1486 | |1: | ||
1487 | | movzx RBd, byte [DISPATCH+DISPATCH_GL(hookmask)] | ||
1488 | | shr RB, HOOK_ACTIVE_SHIFT | ||
1489 | | and RB, 1 | ||
1490 | | add PC, RB // Remember active hook before pcall. | ||
1491 | | // Note: this does a (harmless) copy of the function to the PC slot, too. | ||
1492 | | mov KBASE, RD | ||
1493 | |2: | ||
1494 | | mov RB, [RA+KBASE*8-24] | ||
1495 | | mov [RA+KBASE*8-16], RB | ||
1496 | | sub KBASE, 1 | ||
1497 | | ja <2 | ||
1498 | | jmp ->vm_call_dispatch | ||
1499 | | | ||
1500 | |.ffunc_2 xpcall | ||
1501 | | mov LFUNC:RA, [BASE+8] | ||
1502 | | checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback | ||
1503 | | mov LFUNC:RB, [BASE] // Swap function and traceback. | ||
1504 | | mov [BASE], LFUNC:RA | ||
1505 | | mov [BASE+8], LFUNC:RB | ||
1506 | | lea RA, [BASE+24] | ||
1507 | | sub NARGS:RDd, 2 | ||
1508 | | mov PCd, 24+FRAME_PCALL | ||
1509 | | jmp <1 | ||
1510 | | | ||
1511 | |//-- Coroutine library -------------------------------------------------- | ||
1512 | | | ||
1513 | |.macro coroutine_resume_wrap, resume | ||
1514 | |.if resume | ||
1515 | |.ffunc_1 coroutine_resume | ||
1516 | | mov L:RB, [BASE] | ||
1517 | | cleartp L:RB | ||
1518 | |.else | ||
1519 | |.ffunc coroutine_wrap_aux | ||
1520 | | mov CFUNC:RB, [BASE-16] | ||
1521 | | cleartp CFUNC:RB | ||
1522 | | mov L:RB, CFUNC:RB->upvalue[0].gcr | ||
1523 | | cleartp L:RB | ||
1524 | |.endif | ||
1525 | | mov PC, [BASE-8] | ||
1526 | | mov SAVE_PC, PC | ||
1527 | | mov TMP1, L:RB | ||
1528 | |.if resume | ||
1529 | | checktptp [BASE], LJ_TTHREAD, ->fff_fallback | ||
1530 | |.endif | ||
1531 | | cmp aword L:RB->cframe, 0; jne ->fff_fallback | ||
1532 | | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback | ||
1533 | | mov RA, L:RB->top | ||
1534 | | je >1 // Status != LUA_YIELD (i.e. 0)? | ||
1535 | | cmp RA, L:RB->base // Check for presence of initial func. | ||
1536 | | je ->fff_fallback | ||
1537 | | mov PC, [RA-8] // Move initial function up. | ||
1538 | | mov [RA], PC | ||
1539 | | add RA, 8 | ||
1540 | |1: | ||
1541 | |.if resume | ||
1542 | | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread). | ||
1543 | |.else | ||
1544 | | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1). | ||
1545 | |.endif | ||
1546 | | cmp PC, L:RB->maxstack; ja ->fff_fallback | ||
1547 | | mov L:RB->top, PC | ||
1548 | | | ||
1549 | | mov L:RB, SAVE_L | ||
1550 | | mov L:RB->base, BASE | ||
1551 | |.if resume | ||
1552 | | add BASE, 8 // Keep resumed thread in stack for GC. | ||
1553 | |.endif | ||
1554 | | mov L:RB->top, BASE | ||
1555 | |.if resume | ||
1556 | | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move. | ||
1557 | |.else | ||
1558 | | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move. | ||
1559 | |.endif | ||
1560 | | sub RB, PC // Relative to PC. | ||
1561 | | | ||
1562 | | cmp PC, RA | ||
1563 | | je >3 | ||
1564 | |2: // Move args to coroutine. | ||
1565 | | mov RC, [PC+RB] | ||
1566 | | mov [PC-8], RC | ||
1567 | | sub PC, 8 | ||
1568 | | cmp PC, RA | ||
1569 | | jne <2 | ||
1570 | |3: | ||
1571 | | mov CARG2, RA | ||
1572 | | mov CARG1, TMP1 | ||
1573 | | call ->vm_resume // (lua_State *L, TValue *base, 0, 0) | ||
1574 | | | ||
1575 | | mov L:RB, SAVE_L | ||
1576 | | mov L:PC, TMP1 | ||
1577 | | mov BASE, L:RB->base | ||
1578 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | ||
1579 | | set_vmstate INTERP | ||
1580 | | | ||
1581 | | cmp eax, LUA_YIELD | ||
1582 | | ja >8 | ||
1583 | |4: | ||
1584 | | mov RA, L:PC->base | ||
1585 | | mov KBASE, L:PC->top | ||
1586 | | mov L:PC->top, RA // Clear coroutine stack. | ||
1587 | | mov PC, KBASE | ||
1588 | | sub PC, RA | ||
1589 | | je >6 // No results? | ||
1590 | | lea RD, [BASE+PC] | ||
1591 | | shr PCd, 3 | ||
1592 | | cmp RD, L:RB->maxstack | ||
1593 | | ja >9 // Need to grow stack? | ||
1594 | | | ||
1595 | | mov RB, BASE | ||
1596 | | sub RB, RA | ||
1597 | |5: // Move results from coroutine. | ||
1598 | | mov RD, [RA] | ||
1599 | | mov [RA+RB], RD | ||
1600 | | add RA, 8 | ||
1601 | | cmp RA, KBASE | ||
1602 | | jne <5 | ||
1603 | |6: | ||
1604 | |.if resume | ||
1605 | | lea RDd, [PCd+2] // nresults+1 = 1 + true + results. | ||
1606 | | mov_true ITYPE // Prepend true to results. | ||
1607 | | mov [BASE-8], ITYPE | ||
1608 | |.else | ||
1609 | | lea RDd, [PCd+1] // nresults+1 = 1 + results. | ||
1610 | |.endif | ||
1611 | |7: | ||
1612 | | mov PC, SAVE_PC | ||
1613 | | mov MULTRES, RDd | ||
1614 | |.if resume | ||
1615 | | mov RA, -8 | ||
1616 | |.else | ||
1617 | | xor RAd, RAd | ||
1618 | |.endif | ||
1619 | | test PCd, FRAME_TYPE | ||
1620 | | jz ->BC_RET_Z | ||
1621 | | jmp ->vm_return | ||
1622 | | | ||
1623 | |8: // Coroutine returned with error (at co->top-1). | ||
1624 | |.if resume | ||
1625 | | mov_false ITYPE // Prepend false to results. | ||
1626 | | mov [BASE-8], ITYPE | ||
1627 | | mov RA, L:PC->top | ||
1628 | | sub RA, 8 | ||
1629 | | mov L:PC->top, RA // Clear error from coroutine stack. | ||
1630 | | // Copy error message. | ||
1631 | | mov RD, [RA] | ||
1632 | | mov [BASE], RD | ||
1633 | | mov RDd, 1+2 // nresults+1 = 1 + false + error. | ||
1634 | | jmp <7 | ||
1635 | |.else | ||
1636 | | mov CARG2, L:PC | ||
1637 | | mov CARG1, L:RB | ||
1638 | | call extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) | ||
1639 | | // Error function does not return. | ||
1640 | |.endif | ||
1641 | | | ||
1642 | |9: // Handle stack expansion on return from yield. | ||
1643 | | mov L:RA, TMP1 | ||
1644 | | mov L:RA->top, KBASE // Undo coroutine stack clearing. | ||
1645 | | mov CARG2, PC | ||
1646 | | mov CARG1, L:RB | ||
1647 | | call extern lj_state_growstack // (lua_State *L, int n) | ||
1648 | | mov L:PC, TMP1 | ||
1649 | | mov BASE, L:RB->base | ||
1650 | | jmp <4 // Retry the stack move. | ||
1651 | |.endmacro | ||
1652 | | | ||
1653 | | coroutine_resume_wrap 1 // coroutine.resume | ||
1654 | | coroutine_resume_wrap 0 // coroutine.wrap | ||
1655 | | | ||
1656 | |.ffunc coroutine_yield | ||
1657 | | mov L:RB, SAVE_L | ||
1658 | | test aword L:RB->cframe, CFRAME_RESUME | ||
1659 | | jz ->fff_fallback | ||
1660 | | mov L:RB->base, BASE | ||
1661 | | lea RD, [BASE+NARGS:RD*8-8] | ||
1662 | | mov L:RB->top, RD | ||
1663 | | xor RDd, RDd | ||
1664 | | mov aword L:RB->cframe, RD | ||
1665 | | mov al, LUA_YIELD | ||
1666 | | mov byte L:RB->status, al | ||
1667 | | jmp ->vm_leave_unw | ||
1668 | | | ||
1669 | |//-- Math library ------------------------------------------------------- | ||
1670 | | | ||
1671 | | .ffunc_1 math_abs | ||
1672 | | mov RB, [BASE] | ||
1673 | |.if DUALNUM | ||
1674 | | checkint RB, >3 | ||
1675 | | cmp RBd, 0; jns ->fff_resi | ||
1676 | | neg RBd; js >2 | ||
1677 | |->fff_resbit: | ||
1678 | |->fff_resi: | ||
1679 | | setint RB | ||
1680 | |->fff_resRB: | ||
1681 | | mov PC, [BASE-8] | ||
1682 | | mov [BASE-16], RB | ||
1683 | | jmp ->fff_res1 | ||
1684 | |2: | ||
1685 | | mov64 RB, U64x(41e00000,00000000) // 2^31. | ||
1686 | | jmp ->fff_resRB | ||
1687 | |3: | ||
1688 | | ja ->fff_fallback | ||
1689 | |.else | ||
1690 | | checknum RB, ->fff_fallback | ||
1691 | |.endif | ||
1692 | | shl RB, 1 | ||
1693 | | shr RB, 1 | ||
1694 | | mov PC, [BASE-8] | ||
1695 | | mov [BASE-16], RB | ||
1696 | | jmp ->fff_res1 | ||
1697 | | | ||
1698 | |.ffunc_n math_sqrt, sqrtsd | ||
1699 | |->fff_resxmm0: | ||
1700 | | mov PC, [BASE-8] | ||
1701 | | movsd qword [BASE-16], xmm0 | ||
1702 | | // fallthrough | ||
1703 | | | ||
1704 | |->fff_res1: | ||
1705 | | mov RDd, 1+1 | ||
1706 | |->fff_res: | ||
1707 | | mov MULTRES, RDd | ||
1708 | |->fff_res_: | ||
1709 | | test PCd, FRAME_TYPE | ||
1710 | | jnz >7 | ||
1711 | |5: | ||
1712 | | cmp PC_RB, RDL // More results expected? | ||
1713 | | ja >6 | ||
1714 | | // Adjust BASE. KBASE is assumed to be set for the calling frame. | ||
1715 | | movzx RAd, PC_RA | ||
1716 | | neg RA | ||
1717 | | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8 | ||
1718 | | ins_next | ||
1719 | | | ||
1720 | |6: // Fill up results with nil. | ||
1721 | | mov aword [BASE+RD*8-24], LJ_TNIL | ||
1722 | | add RD, 1 | ||
1723 | | jmp <5 | ||
1724 | | | ||
1725 | |7: // Non-standard return case. | ||
1726 | | mov RA, -16 // Results start at BASE+RA = BASE-16. | ||
1727 | | jmp ->vm_return | ||
1728 | | | ||
1729 | |.macro math_round, func | ||
1730 | | .ffunc math_ .. func | ||
1731 | |.if DUALNUM | ||
1732 | | mov RB, [BASE] | ||
1733 | | checknumx RB, ->fff_resRB, je | ||
1734 | | ja ->fff_fallback | ||
1735 | |.else | ||
1736 | | checknumtp [BASE], ->fff_fallback | ||
1737 | |.endif | ||
1738 | | movsd xmm0, qword [BASE] | ||
1739 | | call ->vm_ .. func .. _sse | ||
1740 | |.if DUALNUM | ||
1741 | | cvttsd2si RBd, xmm0 | ||
1742 | | cmp RBd, 0x80000000 | ||
1743 | | jne ->fff_resi | ||
1744 | | cvtsi2sd xmm1, RBd | ||
1745 | | ucomisd xmm0, xmm1 | ||
1746 | | jp ->fff_resxmm0 | ||
1747 | | je ->fff_resi | ||
1748 | |.endif | ||
1749 | | jmp ->fff_resxmm0 | ||
1750 | |.endmacro | ||
1751 | | | ||
1752 | | math_round floor | ||
1753 | | math_round ceil | ||
1754 | | | ||
1755 | |.ffunc math_log | ||
1756 | | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument. | ||
1757 | | checknumtp [BASE], ->fff_fallback | ||
1758 | | movsd xmm0, qword [BASE] | ||
1759 | | mov RB, BASE | ||
1760 | | call extern log | ||
1761 | | mov BASE, RB | ||
1762 | | jmp ->fff_resxmm0 | ||
1763 | | | ||
1764 | |.macro math_extern, func | ||
1765 | | .ffunc_n math_ .. func | ||
1766 | | mov RB, BASE | ||
1767 | | call extern func | ||
1768 | | mov BASE, RB | ||
1769 | | jmp ->fff_resxmm0 | ||
1770 | |.endmacro | ||
1771 | | | ||
1772 | |.macro math_extern2, func | ||
1773 | | .ffunc_nn math_ .. func | ||
1774 | | mov RB, BASE | ||
1775 | | call extern func | ||
1776 | | mov BASE, RB | ||
1777 | | jmp ->fff_resxmm0 | ||
1778 | |.endmacro | ||
1779 | | | ||
1780 | | math_extern log10 | ||
1781 | | math_extern exp | ||
1782 | | math_extern sin | ||
1783 | | math_extern cos | ||
1784 | | math_extern tan | ||
1785 | | math_extern asin | ||
1786 | | math_extern acos | ||
1787 | | math_extern atan | ||
1788 | | math_extern sinh | ||
1789 | | math_extern cosh | ||
1790 | | math_extern tanh | ||
1791 | | math_extern2 pow | ||
1792 | | math_extern2 atan2 | ||
1793 | | math_extern2 fmod | ||
1794 | | | ||
1795 | |.ffunc_2 math_ldexp | ||
1796 | | checknumtp [BASE], ->fff_fallback | ||
1797 | | checknumtp [BASE+8], ->fff_fallback | ||
1798 | | fld qword [BASE+8] | ||
1799 | | fld qword [BASE] | ||
1800 | | fscale | ||
1801 | | fpop1 | ||
1802 | | mov PC, [BASE-8] | ||
1803 | | fstp qword [BASE-16] | ||
1804 | | jmp ->fff_res1 | ||
1805 | | | ||
1806 | |.ffunc_n math_frexp | ||
1807 | | mov RB, BASE | ||
1808 | |.if X64WIN | ||
1809 | | lea CARG2, TMP1 // Caveat: CARG2 == BASE | ||
1810 | |.else | ||
1811 | | lea CARG1, TMP1 | ||
1812 | |.endif | ||
1813 | | call extern frexp | ||
1814 | | mov BASE, RB | ||
1815 | | mov RBd, TMP1d | ||
1816 | | mov PC, [BASE-8] | ||
1817 | | movsd qword [BASE-16], xmm0 | ||
1818 | |.if DUALNUM | ||
1819 | | setint RB | ||
1820 | | mov [BASE-8], RB | ||
1821 | |.else | ||
1822 | | cvtsi2sd xmm1, RBd | ||
1823 | | movsd qword [BASE-8], xmm1 | ||
1824 | |.endif | ||
1825 | | mov RDd, 1+2 | ||
1826 | | jmp ->fff_res | ||
1827 | | | ||
1828 | |.ffunc_n math_modf | ||
1829 | | mov RB, BASE | ||
1830 | |.if X64WIN | ||
1831 | | lea CARG2, [BASE-16] // Caveat: CARG2 == BASE | ||
1832 | |.else | ||
1833 | | lea CARG1, [BASE-16] | ||
1834 | |.endif | ||
1835 | | call extern modf | ||
1836 | | mov BASE, RB | ||
1837 | | mov PC, [BASE-8] | ||
1838 | | movsd qword [BASE-8], xmm0 | ||
1839 | | mov RDd, 1+2 | ||
1840 | | jmp ->fff_res | ||
1841 | | | ||
1842 | |.macro math_minmax, name, cmovop, sseop | ||
1843 | | .ffunc name | ||
1844 | | mov RAd, 2 | ||
1845 | |.if DUALNUM | ||
1846 | | mov RB, [BASE] | ||
1847 | | checkint RB, >4 | ||
1848 | |1: // Handle integers. | ||
1849 | | cmp RAd, RDd; jae ->fff_resRB | ||
1850 | | mov TMPR, [BASE+RA*8-8] | ||
1851 | | checkint TMPR, >3 | ||
1852 | | cmp RBd, TMPRd | ||
1853 | | cmovop RB, TMPR | ||
1854 | | add RAd, 1 | ||
1855 | | jmp <1 | ||
1856 | |3: | ||
1857 | | ja ->fff_fallback | ||
1858 | | // Convert intermediate result to number and continue below. | ||
1859 | | cvtsi2sd xmm0, RBd | ||
1860 | | jmp >6 | ||
1861 | |4: | ||
1862 | | ja ->fff_fallback | ||
1863 | |.else | ||
1864 | | checknumtp [BASE], ->fff_fallback | ||
1865 | |.endif | ||
1866 | | | ||
1867 | | movsd xmm0, qword [BASE] | ||
1868 | |5: // Handle numbers or integers. | ||
1869 | | cmp RAd, RDd; jae ->fff_resxmm0 | ||
1870 | |.if DUALNUM | ||
1871 | | mov RB, [BASE+RA*8-8] | ||
1872 | | checknumx RB, >6, jb | ||
1873 | | ja ->fff_fallback | ||
1874 | | cvtsi2sd xmm1, RBd | ||
1875 | | jmp >7 | ||
1876 | |.else | ||
1877 | | checknumtp [BASE+RA*8-8], ->fff_fallback | ||
1878 | |.endif | ||
1879 | |6: | ||
1880 | | movsd xmm1, qword [BASE+RA*8-8] | ||
1881 | |7: | ||
1882 | | sseop xmm0, xmm1 | ||
1883 | | add RAd, 1 | ||
1884 | | jmp <5 | ||
1885 | |.endmacro | ||
1886 | | | ||
1887 | | math_minmax math_min, cmovg, minsd | ||
1888 | | math_minmax math_max, cmovl, maxsd | ||
1889 | | | ||
1890 | |//-- String library ----------------------------------------------------- | ||
1891 | | | ||
1892 | |.ffunc string_byte // Only handle the 1-arg case here. | ||
1893 | | cmp NARGS:RDd, 1+1; jne ->fff_fallback | ||
1894 | | mov STR:RB, [BASE] | ||
1895 | | checkstr STR:RB, ->fff_fallback | ||
1896 | | mov PC, [BASE-8] | ||
1897 | | cmp dword STR:RB->len, 1 | ||
1898 | | jb ->fff_res0 // Return no results for empty string. | ||
1899 | | movzx RBd, byte STR:RB[1] | ||
1900 | |.if DUALNUM | ||
1901 | | jmp ->fff_resi | ||
1902 | |.else | ||
1903 | | cvtsi2sd xmm0, RBd; jmp ->fff_resxmm0 | ||
1904 | |.endif | ||
1905 | | | ||
1906 | |.ffunc string_char // Only handle the 1-arg case here. | ||
1907 | | ffgccheck | ||
1908 | | cmp NARGS:RDd, 1+1; jne ->fff_fallback // *Exactly* 1 arg. | ||
1909 | |.if DUALNUM | ||
1910 | | mov RB, [BASE] | ||
1911 | | checkint RB, ->fff_fallback | ||
1912 | |.else | ||
1913 | | checknumtp [BASE], ->fff_fallback | ||
1914 | | cvttsd2si RBd, qword [BASE] | ||
1915 | |.endif | ||
1916 | | cmp RBd, 255; ja ->fff_fallback | ||
1917 | | mov TMP1d, RBd | ||
1918 | | mov TMPRd, 1 | ||
1919 | | lea RD, TMP1 // Points to stack. Little-endian. | ||
1920 | |->fff_newstr: | ||
1921 | | mov L:RB, SAVE_L | ||
1922 | | mov L:RB->base, BASE | ||
1923 | | mov CARG3d, TMPRd // Zero-extended to size_t. | ||
1924 | | mov CARG2, RD | ||
1925 | | mov CARG1, L:RB | ||
1926 | | mov SAVE_PC, PC | ||
1927 | | call extern lj_str_new // (lua_State *L, char *str, size_t l) | ||
1928 | |->fff_resstr: | ||
1929 | | // GCstr * returned in eax (RD). | ||
1930 | | mov BASE, L:RB->base | ||
1931 | | mov PC, [BASE-8] | ||
1932 | | settp STR:RD, LJ_TSTR | ||
1933 | | mov [BASE-16], STR:RD | ||
1934 | | jmp ->fff_res1 | ||
1935 | | | ||
1936 | |.ffunc string_sub | ||
1937 | | ffgccheck | ||
1938 | | mov TMPRd, -1 | ||
1939 | | cmp NARGS:RDd, 1+2; jb ->fff_fallback | ||
1940 | | jna >1 | ||
1941 | |.if DUALNUM | ||
1942 | | mov TMPR, [BASE+16] | ||
1943 | | checkint TMPR, ->fff_fallback | ||
1944 | |.else | ||
1945 | | checknumtp [BASE+16], ->fff_fallback | ||
1946 | | cvttsd2si TMPRd, qword [BASE+16] | ||
1947 | |.endif | ||
1948 | |1: | ||
1949 | | mov STR:RB, [BASE] | ||
1950 | | checkstr STR:RB, ->fff_fallback | ||
1951 | |.if DUALNUM | ||
1952 | | mov ITYPE, [BASE+8] | ||
1953 | | mov RAd, ITYPEd // Must clear hiword for lea below. | ||
1954 | | sar ITYPE, 47 | ||
1955 | | cmp ITYPEd, LJ_TISNUM | ||
1956 | | jne ->fff_fallback | ||
1957 | |.else | ||
1958 | | checknumtp [BASE+8], ->fff_fallback | ||
1959 | | cvttsd2si RAd, qword [BASE+8] | ||
1960 | |.endif | ||
1961 | | mov RCd, STR:RB->len | ||
1962 | | cmp RCd, TMPRd // len < end? (unsigned compare) | ||
1963 | | jb >5 | ||
1964 | |2: | ||
1965 | | test RAd, RAd // start <= 0? | ||
1966 | | jle >7 | ||
1967 | |3: | ||
1968 | | sub TMPRd, RAd // start > end? | ||
1969 | | jl ->fff_emptystr | ||
1970 | | lea RD, [STR:RB+RAd+#STR-1] | ||
1971 | | add TMPRd, 1 | ||
1972 | |4: | ||
1973 | | jmp ->fff_newstr | ||
1974 | | | ||
1975 | |5: // Negative end or overflow. | ||
1976 | | jl >6 | ||
1977 | | lea TMPRd, [TMPRd+RCd+1] // end = end+(len+1) | ||
1978 | | jmp <2 | ||
1979 | |6: // Overflow. | ||
1980 | | mov TMPRd, RCd // end = len | ||
1981 | | jmp <2 | ||
1982 | | | ||
1983 | |7: // Negative start or underflow. | ||
1984 | | je >8 | ||
1985 | | add RAd, RCd // start = start+(len+1) | ||
1986 | | add RAd, 1 | ||
1987 | | jg <3 // start > 0? | ||
1988 | |8: // Underflow. | ||
1989 | | mov RAd, 1 // start = 1 | ||
1990 | | jmp <3 | ||
1991 | | | ||
1992 | |->fff_emptystr: // Range underflow. | ||
1993 | | xor TMPRd, TMPRd // Zero length. Any ptr in RD is ok. | ||
1994 | | jmp <4 | ||
1995 | | | ||
1996 | |.macro ffstring_op, name | ||
1997 | | .ffunc_1 string_ .. name | ||
1998 | | ffgccheck | ||
1999 | |.if X64WIN | ||
2000 | | mov STR:TMPR, [BASE] | ||
2001 | | checkstr STR:TMPR, ->fff_fallback | ||
2002 | |.else | ||
2003 | | mov STR:CARG2, [BASE] | ||
2004 | | checkstr STR:CARG2, ->fff_fallback | ||
2005 | |.endif | ||
2006 | | mov L:RB, SAVE_L | ||
2007 | | lea SBUF:CARG1, [DISPATCH+DISPATCH_GL(tmpbuf)] | ||
2008 | | mov L:RB->base, BASE | ||
2009 | |.if X64WIN | ||
2010 | | mov STR:CARG2, STR:TMPR // Caveat: CARG2 == BASE | ||
2011 | |.endif | ||
2012 | | mov RC, SBUF:CARG1->b | ||
2013 | | mov SBUF:CARG1->L, L:RB | ||
2014 | | mov SBUF:CARG1->p, RC | ||
2015 | | mov SAVE_PC, PC | ||
2016 | | call extern lj_buf_putstr_ .. name | ||
2017 | | mov CARG1, rax | ||
2018 | | call extern lj_buf_tostr | ||
2019 | | jmp ->fff_resstr | ||
2020 | |.endmacro | ||
2021 | | | ||
2022 | |ffstring_op reverse | ||
2023 | |ffstring_op lower | ||
2024 | |ffstring_op upper | ||
2025 | | | ||
2026 | |//-- Bit library -------------------------------------------------------- | ||
2027 | | | ||
2028 | |.macro .ffunc_bit, name, kind, fdef | ||
2029 | | fdef name | ||
2030 | |.if kind == 2 | ||
2031 | | sseconst_tobit xmm1, RB | ||
2032 | |.endif | ||
2033 | |.if DUALNUM | ||
2034 | | mov RB, [BASE] | ||
2035 | | checkint RB, >1 | ||
2036 | |.if kind > 0 | ||
2037 | | jmp >2 | ||
2038 | |.else | ||
2039 | | jmp ->fff_resbit | ||
2040 | |.endif | ||
2041 | |1: | ||
2042 | | ja ->fff_fallback | ||
2043 | | movd xmm0, RB | ||
2044 | |.else | ||
2045 | | checknumtp [BASE], ->fff_fallback | ||
2046 | | movsd xmm0, qword [BASE] | ||
2047 | |.endif | ||
2048 | |.if kind < 2 | ||
2049 | | sseconst_tobit xmm1, RB | ||
2050 | |.endif | ||
2051 | | addsd xmm0, xmm1 | ||
2052 | | movd RBd, xmm0 | ||
2053 | |2: | ||
2054 | |.endmacro | ||
2055 | | | ||
2056 | |.macro .ffunc_bit, name, kind | ||
2057 | | .ffunc_bit name, kind, .ffunc_1 | ||
2058 | |.endmacro | ||
2059 | | | ||
2060 | |.ffunc_bit bit_tobit, 0 | ||
2061 | | jmp ->fff_resbit | ||
2062 | | | ||
2063 | |.macro .ffunc_bit_op, name, ins | ||
2064 | | .ffunc_bit name, 2 | ||
2065 | | mov TMPRd, NARGS:RDd // Save for fallback. | ||
2066 | | lea RD, [BASE+NARGS:RD*8-16] | ||
2067 | |1: | ||
2068 | | cmp RD, BASE | ||
2069 | | jbe ->fff_resbit | ||
2070 | |.if DUALNUM | ||
2071 | | mov RA, [RD] | ||
2072 | | checkint RA, >2 | ||
2073 | | ins RBd, RAd | ||
2074 | | sub RD, 8 | ||
2075 | | jmp <1 | ||
2076 | |2: | ||
2077 | | ja ->fff_fallback_bit_op | ||
2078 | | movd xmm0, RA | ||
2079 | |.else | ||
2080 | | checknumtp [RD], ->fff_fallback_bit_op | ||
2081 | | movsd xmm0, qword [RD] | ||
2082 | |.endif | ||
2083 | | addsd xmm0, xmm1 | ||
2084 | | movd RAd, xmm0 | ||
2085 | | ins RBd, RAd | ||
2086 | | sub RD, 8 | ||
2087 | | jmp <1 | ||
2088 | |.endmacro | ||
2089 | | | ||
2090 | |.ffunc_bit_op bit_band, and | ||
2091 | |.ffunc_bit_op bit_bor, or | ||
2092 | |.ffunc_bit_op bit_bxor, xor | ||
2093 | | | ||
2094 | |.ffunc_bit bit_bswap, 1 | ||
2095 | | bswap RBd | ||
2096 | | jmp ->fff_resbit | ||
2097 | | | ||
2098 | |.ffunc_bit bit_bnot, 1 | ||
2099 | | not RBd | ||
2100 | |.if DUALNUM | ||
2101 | | jmp ->fff_resbit | ||
2102 | |.else | ||
2103 | |->fff_resbit: | ||
2104 | | cvtsi2sd xmm0, RBd | ||
2105 | | jmp ->fff_resxmm0 | ||
2106 | |.endif | ||
2107 | | | ||
2108 | |->fff_fallback_bit_op: | ||
2109 | | mov NARGS:RDd, TMPRd // Restore for fallback | ||
2110 | | jmp ->fff_fallback | ||
2111 | | | ||
2112 | |.macro .ffunc_bit_sh, name, ins | ||
2113 | |.if DUALNUM | ||
2114 | | .ffunc_bit name, 1, .ffunc_2 | ||
2115 | | // Note: no inline conversion from number for 2nd argument! | ||
2116 | | mov RA, [BASE+8] | ||
2117 | | checkint RA, ->fff_fallback | ||
2118 | |.else | ||
2119 | | .ffunc_nn name | ||
2120 | | sseconst_tobit xmm2, RB | ||
2121 | | addsd xmm0, xmm2 | ||
2122 | | addsd xmm1, xmm2 | ||
2123 | | movd RBd, xmm0 | ||
2124 | | movd RAd, xmm1 | ||
2125 | |.endif | ||
2126 | | ins RBd, cl // Assumes RA is ecx. | ||
2127 | | jmp ->fff_resbit | ||
2128 | |.endmacro | ||
2129 | | | ||
2130 | |.ffunc_bit_sh bit_lshift, shl | ||
2131 | |.ffunc_bit_sh bit_rshift, shr | ||
2132 | |.ffunc_bit_sh bit_arshift, sar | ||
2133 | |.ffunc_bit_sh bit_rol, rol | ||
2134 | |.ffunc_bit_sh bit_ror, ror | ||
2135 | | | ||
2136 | |//----------------------------------------------------------------------- | ||
2137 | | | ||
2138 | |->fff_fallback_2: | ||
2139 | | mov NARGS:RDd, 1+2 // Other args are ignored, anyway. | ||
2140 | | jmp ->fff_fallback | ||
2141 | |->fff_fallback_1: | ||
2142 | | mov NARGS:RDd, 1+1 // Other args are ignored, anyway. | ||
2143 | |->fff_fallback: // Call fast function fallback handler. | ||
2144 | | // BASE = new base, RD = nargs+1 | ||
2145 | | mov L:RB, SAVE_L | ||
2146 | | mov PC, [BASE-8] // Fallback may overwrite PC. | ||
2147 | | mov SAVE_PC, PC // Redundant (but a defined value). | ||
2148 | | mov L:RB->base, BASE | ||
2149 | | lea RD, [BASE+NARGS:RD*8-8] | ||
2150 | | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler. | ||
2151 | | mov L:RB->top, RD | ||
2152 | | mov CFUNC:RD, [BASE-16] | ||
2153 | | cleartp CFUNC:RD | ||
2154 | | cmp RA, L:RB->maxstack | ||
2155 | | ja >5 // Need to grow stack. | ||
2156 | | mov CARG1, L:RB | ||
2157 | | call aword CFUNC:RD->f // (lua_State *L) | ||
2158 | | mov BASE, L:RB->base | ||
2159 | | // Either throws an error, or recovers and returns -1, 0 or nresults+1. | ||
2160 | | test RDd, RDd; jg ->fff_res // Returned nresults+1? | ||
2161 | |1: | ||
2162 | | mov RA, L:RB->top | ||
2163 | | sub RA, BASE | ||
2164 | | shr RAd, 3 | ||
2165 | | test RDd, RDd | ||
2166 | | lea NARGS:RDd, [RAd+1] | ||
2167 | | mov LFUNC:RB, [BASE-16] | ||
2168 | | jne ->vm_call_tail // Returned -1? | ||
2169 | | cleartp LFUNC:RB | ||
2170 | | ins_callt // Returned 0: retry fast path. | ||
2171 | | | ||
2172 | |// Reconstruct previous base for vmeta_call during tailcall. | ||
2173 | |->vm_call_tail: | ||
2174 | | mov RA, BASE | ||
2175 | | test PCd, FRAME_TYPE | ||
2176 | | jnz >3 | ||
2177 | | movzx RBd, PC_RA | ||
2178 | | neg RB | ||
2179 | | lea BASE, [BASE+RB*8-16] // base = base - (RB+2)*8 | ||
2180 | | jmp ->vm_call_dispatch // Resolve again for tailcall. | ||
2181 | |3: | ||
2182 | | mov RB, PC | ||
2183 | | and RB, -8 | ||
2184 | | sub BASE, RB | ||
2185 | | jmp ->vm_call_dispatch // Resolve again for tailcall. | ||
2186 | | | ||
2187 | |5: // Grow stack for fallback handler. | ||
2188 | | mov CARG2d, LUA_MINSTACK | ||
2189 | | mov CARG1, L:RB | ||
2190 | | call extern lj_state_growstack // (lua_State *L, int n) | ||
2191 | | mov BASE, L:RB->base | ||
2192 | | xor RDd, RDd // Simulate a return 0. | ||
2193 | | jmp <1 // Dumb retry (goes through ff first). | ||
2194 | | | ||
2195 | |->fff_gcstep: // Call GC step function. | ||
2196 | | // BASE = new base, RD = nargs+1 | ||
2197 | | pop RB // Must keep stack at same level. | ||
2198 | | mov TMP1, RB // Save return address | ||
2199 | | mov L:RB, SAVE_L | ||
2200 | | mov SAVE_PC, PC // Redundant (but a defined value). | ||
2201 | | mov L:RB->base, BASE | ||
2202 | | lea RD, [BASE+NARGS:RD*8-8] | ||
2203 | | mov CARG1, L:RB | ||
2204 | | mov L:RB->top, RD | ||
2205 | | call extern lj_gc_step // (lua_State *L) | ||
2206 | | mov BASE, L:RB->base | ||
2207 | | mov RD, L:RB->top | ||
2208 | | sub RD, BASE | ||
2209 | | shr RDd, 3 | ||
2210 | | add NARGS:RDd, 1 | ||
2211 | | mov RB, TMP1 | ||
2212 | | push RB // Restore return address. | ||
2213 | | ret | ||
2214 | | | ||
2215 | |//----------------------------------------------------------------------- | ||
2216 | |//-- Special dispatch targets ------------------------------------------- | ||
2217 | |//----------------------------------------------------------------------- | ||
2218 | | | ||
2219 | |->vm_record: // Dispatch target for recording phase. | ||
2220 | |.if JIT | ||
2221 | | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)] | ||
2222 | | test RDL, HOOK_VMEVENT // No recording while in vmevent. | ||
2223 | | jnz >5 | ||
2224 | | // Decrement the hookcount for consistency, but always do the call. | ||
2225 | | test RDL, HOOK_ACTIVE | ||
2226 | | jnz >1 | ||
2227 | | test RDL, LUA_MASKLINE|LUA_MASKCOUNT | ||
2228 | | jz >1 | ||
2229 | | dec dword [DISPATCH+DISPATCH_GL(hookcount)] | ||
2230 | | jmp >1 | ||
2231 | |.endif | ||
2232 | | | ||
2233 | |->vm_rethook: // Dispatch target for return hooks. | ||
2234 | | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)] | ||
2235 | | test RDL, HOOK_ACTIVE // Hook already active? | ||
2236 | | jnz >5 | ||
2237 | | jmp >1 | ||
2238 | | | ||
2239 | |->vm_inshook: // Dispatch target for instr/line hooks. | ||
2240 | | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)] | ||
2241 | | test RDL, HOOK_ACTIVE // Hook already active? | ||
2242 | | jnz >5 | ||
2243 | | | ||
2244 | | test RDL, LUA_MASKLINE|LUA_MASKCOUNT | ||
2245 | | jz >5 | ||
2246 | | dec dword [DISPATCH+DISPATCH_GL(hookcount)] | ||
2247 | | jz >1 | ||
2248 | | test RDL, LUA_MASKLINE | ||
2249 | | jz >5 | ||
2250 | |1: | ||
2251 | | mov L:RB, SAVE_L | ||
2252 | | mov L:RB->base, BASE | ||
2253 | | mov CARG2, PC // Caveat: CARG2 == BASE | ||
2254 | | mov CARG1, L:RB | ||
2255 | | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. | ||
2256 | | call extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) | ||
2257 | |3: | ||
2258 | | mov BASE, L:RB->base | ||
2259 | |4: | ||
2260 | | movzx RAd, PC_RA | ||
2261 | |5: | ||
2262 | | movzx OP, PC_OP | ||
2263 | | movzx RDd, PC_RD | ||
2264 | | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins. | ||
2265 | | | ||
2266 | |->cont_hook: // Continue from hook yield. | ||
2267 | | add PC, 4 | ||
2268 | | mov RA, [RB-40] | ||
2269 | | mov MULTRES, RAd // Restore MULTRES for *M ins. | ||
2270 | | jmp <4 | ||
2271 | | | ||
2272 | |->vm_hotloop: // Hot loop counter underflow. | ||
2273 | |.if JIT | ||
2274 | | mov LFUNC:RB, [BASE-16] // Same as curr_topL(L). | ||
2275 | | cleartp LFUNC:RB | ||
2276 | | mov RB, LFUNC:RB->pc | ||
2277 | | movzx RDd, byte [RB+PC2PROTO(framesize)] | ||
2278 | | lea RD, [BASE+RD*8] | ||
2279 | | mov L:RB, SAVE_L | ||
2280 | | mov L:RB->base, BASE | ||
2281 | | mov L:RB->top, RD | ||
2282 | | mov CARG2, PC | ||
2283 | | lea CARG1, [DISPATCH+GG_DISP2J] | ||
2284 | | mov aword [DISPATCH+DISPATCH_J(L)], L:RB | ||
2285 | | mov SAVE_PC, PC | ||
2286 | | call extern lj_trace_hot // (jit_State *J, const BCIns *pc) | ||
2287 | | jmp <3 | ||
2288 | |.endif | ||
2289 | | | ||
2290 | |->vm_callhook: // Dispatch target for call hooks. | ||
2291 | | mov SAVE_PC, PC | ||
2292 | |.if JIT | ||
2293 | | jmp >1 | ||
2294 | |.endif | ||
2295 | | | ||
2296 | |->vm_hotcall: // Hot call counter underflow. | ||
2297 | |.if JIT | ||
2298 | | mov SAVE_PC, PC | ||
2299 | | or PC, 1 // Marker for hot call. | ||
2300 | |1: | ||
2301 | |.endif | ||
2302 | | lea RD, [BASE+NARGS:RD*8-8] | ||
2303 | | mov L:RB, SAVE_L | ||
2304 | | mov L:RB->base, BASE | ||
2305 | | mov L:RB->top, RD | ||
2306 | | mov CARG2, PC | ||
2307 | | mov CARG1, L:RB | ||
2308 | | call extern lj_dispatch_call // (lua_State *L, const BCIns *pc) | ||
2309 | | // ASMFunction returned in eax/rax (RD). | ||
2310 | | mov SAVE_PC, 0 // Invalidate for subsequent line hook. | ||
2311 | |.if JIT | ||
2312 | | and PC, -2 | ||
2313 | |.endif | ||
2314 | | mov BASE, L:RB->base | ||
2315 | | mov RA, RD | ||
2316 | | mov RD, L:RB->top | ||
2317 | | sub RD, BASE | ||
2318 | | mov RB, RA | ||
2319 | | movzx RAd, PC_RA | ||
2320 | | shr RDd, 3 | ||
2321 | | add NARGS:RDd, 1 | ||
2322 | | jmp RB | ||
2323 | | | ||
2324 | |->cont_stitch: // Trace stitching. | ||
2325 | |.if JIT | ||
2326 | | // BASE = base, RC = result, RB = mbase | ||
2327 | | mov TRACE:ITYPE, [RB-40] // Save previous trace. | ||
2328 | | cleartp TRACE:ITYPE | ||
2329 | | mov TMPRd, MULTRES | ||
2330 | | movzx RAd, PC_RA | ||
2331 | | lea RA, [BASE+RA*8] // Call base. | ||
2332 | | sub TMPRd, 1 | ||
2333 | | jz >2 | ||
2334 | |1: // Move results down. | ||
2335 | | mov RB, [RC] | ||
2336 | | mov [RA], RB | ||
2337 | | add RC, 8 | ||
2338 | | add RA, 8 | ||
2339 | | sub TMPRd, 1 | ||
2340 | | jnz <1 | ||
2341 | |2: | ||
2342 | | movzx RCd, PC_RA | ||
2343 | | movzx RBd, PC_RB | ||
2344 | | add RC, RB | ||
2345 | | lea RC, [BASE+RC*8-8] | ||
2346 | |3: | ||
2347 | | cmp RC, RA | ||
2348 | | ja >9 // More results wanted? | ||
2349 | | | ||
2350 | | test TRACE:ITYPE, TRACE:ITYPE | ||
2351 | | jz ->cont_nop | ||
2352 | | movzx RBd, word TRACE:ITYPE->traceno | ||
2353 | | movzx RDd, word TRACE:ITYPE->link | ||
2354 | | cmp RDd, RBd | ||
2355 | | je ->cont_nop // Blacklisted. | ||
2356 | | test RDd, RDd | ||
2357 | | jne =>BC_JLOOP // Jump to stitched trace. | ||
2358 | | | ||
2359 | | // Stitch a new trace to the previous trace. | ||
2360 | | mov [DISPATCH+DISPATCH_J(exitno)], RB | ||
2361 | | mov L:RB, SAVE_L | ||
2362 | | mov L:RB->base, BASE | ||
2363 | | mov CARG2, PC | ||
2364 | | lea CARG1, [DISPATCH+GG_DISP2J] | ||
2365 | | mov aword [DISPATCH+DISPATCH_J(L)], L:RB | ||
2366 | | call extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) | ||
2367 | | mov BASE, L:RB->base | ||
2368 | | jmp ->cont_nop | ||
2369 | | | ||
2370 | |9: // Fill up results with nil. | ||
2371 | | mov aword [RA], LJ_TNIL | ||
2372 | | add RA, 8 | ||
2373 | | jmp <3 | ||
2374 | |.endif | ||
2375 | | | ||
2376 | |->vm_profhook: // Dispatch target for profiler hook. | ||
2377 | #if LJ_HASPROFILE | ||
2378 | | mov L:RB, SAVE_L | ||
2379 | | mov L:RB->base, BASE | ||
2380 | | mov CARG2, PC // Caveat: CARG2 == BASE | ||
2381 | | mov CARG1, L:RB | ||
2382 | | call extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) | ||
2383 | | mov BASE, L:RB->base | ||
2384 | | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. | ||
2385 | | sub PC, 4 | ||
2386 | | jmp ->cont_nop | ||
2387 | #endif | ||
2388 | | | ||
2389 | |//----------------------------------------------------------------------- | ||
2390 | |//-- Trace exit handler ------------------------------------------------- | ||
2391 | |//----------------------------------------------------------------------- | ||
2392 | | | ||
2393 | |// Called from an exit stub with the exit number on the stack. | ||
2394 | |// The 16 bit exit number is stored with two (sign-extended) push imm8. | ||
2395 | |->vm_exit_handler: | ||
2396 | |.if JIT | ||
2397 | | push r13; push r12 | ||
2398 | | push r11; push r10; push r9; push r8 | ||
2399 | | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp | ||
2400 | | push rbx; push rdx; push rcx; push rax | ||
2401 | | movzx RCd, byte [rbp-8] // Reconstruct exit number. | ||
2402 | | mov RCH, byte [rbp-16] | ||
2403 | | mov [rbp-8], r15; mov [rbp-16], r14 | ||
2404 | | // DISPATCH is preserved on-trace in LJ_GC64 mode. | ||
2405 | | mov RAd, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number. | ||
2406 | | set_vmstate EXIT | ||
2407 | | mov [DISPATCH+DISPATCH_J(exitno)], RCd | ||
2408 | | mov [DISPATCH+DISPATCH_J(parent)], RAd | ||
2409 | |.if X64WIN | ||
2410 | | sub rsp, 16*8+4*8 // Room for SSE regs + save area. | ||
2411 | |.else | ||
2412 | | sub rsp, 16*8 // Room for SSE regs. | ||
2413 | |.endif | ||
2414 | | add rbp, -128 | ||
2415 | | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14 | ||
2416 | | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12 | ||
2417 | | movsd qword [rbp-40], xmm11; movsd qword [rbp-48], xmm10 | ||
2418 | | movsd qword [rbp-56], xmm9; movsd qword [rbp-64], xmm8 | ||
2419 | | movsd qword [rbp-72], xmm7; movsd qword [rbp-80], xmm6 | ||
2420 | | movsd qword [rbp-88], xmm5; movsd qword [rbp-96], xmm4 | ||
2421 | | movsd qword [rbp-104], xmm3; movsd qword [rbp-112], xmm2 | ||
2422 | | movsd qword [rbp-120], xmm1; movsd qword [rbp-128], xmm0 | ||
2423 | | // Caveat: RB is rbp. | ||
2424 | | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)] | ||
2425 | | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] | ||
2426 | | mov aword [DISPATCH+DISPATCH_J(L)], L:RB | ||
2427 | | mov L:RB->base, BASE | ||
2428 | |.if X64WIN | ||
2429 | | lea CARG2, [rsp+4*8] | ||
2430 | |.else | ||
2431 | | mov CARG2, rsp | ||
2432 | |.endif | ||
2433 | | lea CARG1, [DISPATCH+GG_DISP2J] | ||
2434 | | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0 | ||
2435 | | call extern lj_trace_exit // (jit_State *J, ExitState *ex) | ||
2436 | | // MULTRES or negated error code returned in eax (RD). | ||
2437 | | mov RA, L:RB->cframe | ||
2438 | | and RA, CFRAME_RAWMASK | ||
2439 | | mov [RA+CFRAME_OFS_L], L:RB // Set SAVE_L (on-trace resume/yield). | ||
2440 | | mov BASE, L:RB->base | ||
2441 | | mov PC, [RA+CFRAME_OFS_PC] // Get SAVE_PC. | ||
2442 | | jmp >1 | ||
2443 | |.endif | ||
2444 | |->vm_exit_interp: | ||
2445 | | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set. | ||
2446 | |.if JIT | ||
2447 | | // Restore additional callee-save registers only used in compiled code. | ||
2448 | |.if X64WIN | ||
2449 | | lea RA, [rsp+10*16+4*8] | ||
2450 | |1: | ||
2451 | | movdqa xmm15, [RA-10*16] | ||
2452 | | movdqa xmm14, [RA-9*16] | ||
2453 | | movdqa xmm13, [RA-8*16] | ||
2454 | | movdqa xmm12, [RA-7*16] | ||
2455 | | movdqa xmm11, [RA-6*16] | ||
2456 | | movdqa xmm10, [RA-5*16] | ||
2457 | | movdqa xmm9, [RA-4*16] | ||
2458 | | movdqa xmm8, [RA-3*16] | ||
2459 | | movdqa xmm7, [RA-2*16] | ||
2460 | | mov rsp, RA // Reposition stack to C frame. | ||
2461 | | movdqa xmm6, [RA-1*16] | ||
2462 | | mov r15, CSAVE_1 | ||
2463 | | mov r14, CSAVE_2 | ||
2464 | | mov r13, CSAVE_3 | ||
2465 | | mov r12, CSAVE_4 | ||
2466 | |.else | ||
2467 | | lea RA, [rsp+16] | ||
2468 | |1: | ||
2469 | | mov r13, [RA-8] | ||
2470 | | mov r12, [RA] | ||
2471 | | mov rsp, RA // Reposition stack to C frame. | ||
2472 | |.endif | ||
2473 | | test RDd, RDd; js >9 // Check for error from exit. | ||
2474 | | mov L:RB, SAVE_L | ||
2475 | | mov MULTRES, RDd | ||
2476 | | mov LFUNC:KBASE, [BASE-16] | ||
2477 | | cleartp LFUNC:KBASE | ||
2478 | | mov KBASE, LFUNC:KBASE->pc | ||
2479 | | mov KBASE, [KBASE+PC2PROTO(k)] | ||
2480 | | mov L:RB->base, BASE | ||
2481 | | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0 | ||
2482 | | set_vmstate INTERP | ||
2483 | | // Modified copy of ins_next which handles function header dispatch, too. | ||
2484 | | mov RCd, [PC] | ||
2485 | | movzx RAd, RCH | ||
2486 | | movzx OP, RCL | ||
2487 | | add PC, 4 | ||
2488 | | shr RCd, 16 | ||
2489 | | cmp OP, BC_FUNCF // Function header? | ||
2490 | | jb >3 | ||
2491 | | cmp OP, BC_FUNCC+2 // Fast function? | ||
2492 | | jae >4 | ||
2493 | |2: | ||
2494 | | mov RCd, MULTRES // RC/RD holds nres+1. | ||
2495 | |3: | ||
2496 | | jmp aword [DISPATCH+OP*8] | ||
2497 | | | ||
2498 | |4: // Check frame below fast function. | ||
2499 | | mov RC, [BASE-8] | ||
2500 | | test RCd, FRAME_TYPE | ||
2501 | | jnz <2 // Trace stitching continuation? | ||
2502 | | // Otherwise set KBASE for Lua function below fast function. | ||
2503 | | movzx RCd, byte [RC-3] | ||
2504 | | neg RC | ||
2505 | | mov LFUNC:KBASE, [BASE+RC*8-32] | ||
2506 | | cleartp LFUNC:KBASE | ||
2507 | | mov KBASE, LFUNC:KBASE->pc | ||
2508 | | mov KBASE, [KBASE+PC2PROTO(k)] | ||
2509 | | jmp <2 | ||
2510 | | | ||
2511 | |9: // Rethrow error from the right C frame. | ||
2512 | | neg RD | ||
2513 | | mov CARG1, L:RB | ||
2514 | | mov CARG2, RD | ||
2515 | | call extern lj_err_throw // (lua_State *L, int errcode) | ||
2516 | |.endif | ||
2517 | | | ||
2518 | |//----------------------------------------------------------------------- | ||
2519 | |//-- Math helper functions ---------------------------------------------- | ||
2520 | |//----------------------------------------------------------------------- | ||
2521 | | | ||
2522 | |// FP value rounding. Called by math.floor/math.ceil fast functions | ||
2523 | |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. | ||
2524 | |.macro vm_round, name, mode, cond | ||
2525 | |->name: | ||
2526 | |->name .. _sse: | ||
2527 | | sseconst_abs xmm2, RD | ||
2528 | | sseconst_2p52 xmm3, RD | ||
2529 | | movaps xmm1, xmm0 | ||
2530 | | andpd xmm1, xmm2 // |x| | ||
2531 | | ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|. | ||
2532 | | jbe >1 | ||
2533 | | andnpd xmm2, xmm0 // Isolate sign bit. | ||
2534 | |.if mode == 2 // trunc(x)? | ||
2535 | | movaps xmm0, xmm1 | ||
2536 | | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52 | ||
2537 | | subsd xmm1, xmm3 | ||
2538 | | sseconst_1 xmm3, RD | ||
2539 | | cmpsd xmm0, xmm1, 1 // |x| < result? | ||
2540 | | andpd xmm0, xmm3 | ||
2541 | | subsd xmm1, xmm0 // If yes, subtract -1. | ||
2542 | | orpd xmm1, xmm2 // Merge sign bit back in. | ||
2543 | |.else | ||
2544 | | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52 | ||
2545 | | subsd xmm1, xmm3 | ||
2546 | | orpd xmm1, xmm2 // Merge sign bit back in. | ||
2547 | | .if mode == 1 // ceil(x)? | ||
2548 | | sseconst_m1 xmm2, RD // Must subtract -1 to preserve -0. | ||
2549 | | cmpsd xmm0, xmm1, 6 // x > result? | ||
2550 | | .else // floor(x)? | ||
2551 | | sseconst_1 xmm2, RD | ||
2552 | | cmpsd xmm0, xmm1, 1 // x < result? | ||
2553 | | .endif | ||
2554 | | andpd xmm0, xmm2 | ||
2555 | | subsd xmm1, xmm0 // If yes, subtract +-1. | ||
2556 | |.endif | ||
2557 | | movaps xmm0, xmm1 | ||
2558 | |1: | ||
2559 | | ret | ||
2560 | |.endmacro | ||
2561 | | | ||
2562 | | vm_round vm_floor, 0, 1 | ||
2563 | | vm_round vm_ceil, 1, JIT | ||
2564 | | vm_round vm_trunc, 2, JIT | ||
2565 | | | ||
2566 | |// FP modulo x%y. Called by BC_MOD* and vm_arith. | ||
2567 | |->vm_mod: | ||
2568 | |// Args in xmm0/xmm1, return value in xmm0. | ||
2569 | |// Caveat: xmm0-xmm5 and RC (eax) modified! | ||
2570 | | movaps xmm5, xmm0 | ||
2571 | | divsd xmm0, xmm1 | ||
2572 | | sseconst_abs xmm2, RD | ||
2573 | | sseconst_2p52 xmm3, RD | ||
2574 | | movaps xmm4, xmm0 | ||
2575 | | andpd xmm4, xmm2 // |x/y| | ||
2576 | | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|. | ||
2577 | | jbe >1 | ||
2578 | | andnpd xmm2, xmm0 // Isolate sign bit. | ||
2579 | | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52 | ||
2580 | | subsd xmm4, xmm3 | ||
2581 | | orpd xmm4, xmm2 // Merge sign bit back in. | ||
2582 | | sseconst_1 xmm2, RD | ||
2583 | | cmpsd xmm0, xmm4, 1 // x/y < result? | ||
2584 | | andpd xmm0, xmm2 | ||
2585 | | subsd xmm4, xmm0 // If yes, subtract 1.0. | ||
2586 | | movaps xmm0, xmm5 | ||
2587 | | mulsd xmm1, xmm4 | ||
2588 | | subsd xmm0, xmm1 | ||
2589 | | ret | ||
2590 | |1: | ||
2591 | | mulsd xmm1, xmm0 | ||
2592 | | movaps xmm0, xmm5 | ||
2593 | | subsd xmm0, xmm1 | ||
2594 | | ret | ||
2595 | | | ||
2596 | |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. | ||
2597 | |->vm_powi_sse: | ||
2598 | | cmp eax, 1; jle >6 // i<=1? | ||
2599 | | // Now 1 < (unsigned)i <= 0x80000000. | ||
2600 | |1: // Handle leading zeros. | ||
2601 | | test eax, 1; jnz >2 | ||
2602 | | mulsd xmm0, xmm0 | ||
2603 | | shr eax, 1 | ||
2604 | | jmp <1 | ||
2605 | |2: | ||
2606 | | shr eax, 1; jz >5 | ||
2607 | | movaps xmm1, xmm0 | ||
2608 | |3: // Handle trailing bits. | ||
2609 | | mulsd xmm0, xmm0 | ||
2610 | | shr eax, 1; jz >4 | ||
2611 | | jnc <3 | ||
2612 | | mulsd xmm1, xmm0 | ||
2613 | | jmp <3 | ||
2614 | |4: | ||
2615 | | mulsd xmm0, xmm1 | ||
2616 | |5: | ||
2617 | | ret | ||
2618 | |6: | ||
2619 | | je <5 // x^1 ==> x | ||
2620 | | jb >7 // x^0 ==> 1 | ||
2621 | | neg eax | ||
2622 | | call <1 | ||
2623 | | sseconst_1 xmm1, RD | ||
2624 | | divsd xmm1, xmm0 | ||
2625 | | movaps xmm0, xmm1 | ||
2626 | | ret | ||
2627 | |7: | ||
2628 | | sseconst_1 xmm0, RD | ||
2629 | | ret | ||
2630 | | | ||
2631 | |//----------------------------------------------------------------------- | ||
2632 | |//-- Miscellaneous functions -------------------------------------------- | ||
2633 | |//----------------------------------------------------------------------- | ||
2634 | | | ||
2635 | |// int lj_vm_cpuid(uint32_t f, uint32_t res[4]) | ||
2636 | |->vm_cpuid: | ||
2637 | | mov eax, CARG1d | ||
2638 | | .if X64WIN; push rsi; mov rsi, CARG2; .endif | ||
2639 | | push rbx | ||
2640 | | xor ecx, ecx | ||
2641 | | cpuid | ||
2642 | | mov [rsi], eax | ||
2643 | | mov [rsi+4], ebx | ||
2644 | | mov [rsi+8], ecx | ||
2645 | | mov [rsi+12], edx | ||
2646 | | pop rbx | ||
2647 | | .if X64WIN; pop rsi; .endif | ||
2648 | | ret | ||
2649 | | | ||
2650 | |//----------------------------------------------------------------------- | ||
2651 | |//-- Assertions --------------------------------------------------------- | ||
2652 | |//----------------------------------------------------------------------- | ||
2653 | | | ||
2654 | |->assert_bad_for_arg_type: | ||
2655 | #ifdef LUA_USE_ASSERT | ||
2656 | | int3 | ||
2657 | #endif | ||
2658 | | int3 | ||
2659 | | | ||
2660 | |//----------------------------------------------------------------------- | ||
2661 | |//-- FFI helper functions ----------------------------------------------- | ||
2662 | |//----------------------------------------------------------------------- | ||
2663 | | | ||
2664 | |// Handler for callback functions. Callback slot number in ah/al. | ||
2665 | |->vm_ffi_callback: | ||
2666 | |.if FFI | ||
2667 | |.type CTSTATE, CTState, PC | ||
2668 | | saveregs_ // ebp/rbp already saved. ebp now holds global_State *. | ||
2669 | | lea DISPATCH, [ebp+GG_G2DISP] | ||
2670 | | mov CTSTATE, GL:ebp->ctype_state | ||
2671 | | movzx eax, ax | ||
2672 | | mov CTSTATE->cb.slot, eax | ||
2673 | | mov CTSTATE->cb.gpr[0], CARG1 | ||
2674 | | mov CTSTATE->cb.gpr[1], CARG2 | ||
2675 | | mov CTSTATE->cb.gpr[2], CARG3 | ||
2676 | | mov CTSTATE->cb.gpr[3], CARG4 | ||
2677 | | movsd qword CTSTATE->cb.fpr[0], xmm0 | ||
2678 | | movsd qword CTSTATE->cb.fpr[1], xmm1 | ||
2679 | | movsd qword CTSTATE->cb.fpr[2], xmm2 | ||
2680 | | movsd qword CTSTATE->cb.fpr[3], xmm3 | ||
2681 | |.if X64WIN | ||
2682 | | lea rax, [rsp+CFRAME_SIZE+4*8] | ||
2683 | |.else | ||
2684 | | lea rax, [rsp+CFRAME_SIZE] | ||
2685 | | mov CTSTATE->cb.gpr[4], CARG5 | ||
2686 | | mov CTSTATE->cb.gpr[5], CARG6 | ||
2687 | | movsd qword CTSTATE->cb.fpr[4], xmm4 | ||
2688 | | movsd qword CTSTATE->cb.fpr[5], xmm5 | ||
2689 | | movsd qword CTSTATE->cb.fpr[6], xmm6 | ||
2690 | | movsd qword CTSTATE->cb.fpr[7], xmm7 | ||
2691 | |.endif | ||
2692 | | mov CTSTATE->cb.stack, rax | ||
2693 | | mov CARG2, rsp | ||
2694 | | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok. | ||
2695 | | mov CARG1, CTSTATE | ||
2696 | | call extern lj_ccallback_enter // (CTState *cts, void *cf) | ||
2697 | | // lua_State * returned in eax (RD). | ||
2698 | | set_vmstate INTERP | ||
2699 | | mov BASE, L:RD->base | ||
2700 | | mov RD, L:RD->top | ||
2701 | | sub RD, BASE | ||
2702 | | mov LFUNC:RB, [BASE-16] | ||
2703 | | cleartp LFUNC:RB | ||
2704 | | shr RD, 3 | ||
2705 | | add RD, 1 | ||
2706 | | ins_callt | ||
2707 | |.endif | ||
2708 | | | ||
2709 | |->cont_ffi_callback: // Return from FFI callback. | ||
2710 | |.if FFI | ||
2711 | | mov L:RA, SAVE_L | ||
2712 | | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)] | ||
2713 | | mov aword CTSTATE->L, L:RA | ||
2714 | | mov L:RA->base, BASE | ||
2715 | | mov L:RA->top, RB | ||
2716 | | mov CARG1, CTSTATE | ||
2717 | | mov CARG2, RC | ||
2718 | | call extern lj_ccallback_leave // (CTState *cts, TValue *o) | ||
2719 | | mov rax, CTSTATE->cb.gpr[0] | ||
2720 | | movsd xmm0, qword CTSTATE->cb.fpr[0] | ||
2721 | | jmp ->vm_leave_unw | ||
2722 | |.endif | ||
2723 | | | ||
2724 | |->vm_ffi_call: // Call C function via FFI. | ||
2725 | | // Caveat: needs special frame unwinding, see below. | ||
2726 | |.if FFI | ||
2727 | | .type CCSTATE, CCallState, rbx | ||
2728 | | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1 | ||
2729 | | | ||
2730 | | // Readjust stack. | ||
2731 | | mov eax, CCSTATE->spadj | ||
2732 | | sub rsp, rax | ||
2733 | | | ||
2734 | | // Copy stack slots. | ||
2735 | | movzx ecx, byte CCSTATE->nsp | ||
2736 | | sub ecx, 1 | ||
2737 | | js >2 | ||
2738 | |1: | ||
2739 | | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)] | ||
2740 | | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax | ||
2741 | | sub ecx, 1 | ||
2742 | | jns <1 | ||
2743 | |2: | ||
2744 | | | ||
2745 | | movzx eax, byte CCSTATE->nfpr | ||
2746 | | mov CARG1, CCSTATE->gpr[0] | ||
2747 | | mov CARG2, CCSTATE->gpr[1] | ||
2748 | | mov CARG3, CCSTATE->gpr[2] | ||
2749 | | mov CARG4, CCSTATE->gpr[3] | ||
2750 | |.if not X64WIN | ||
2751 | | mov CARG5, CCSTATE->gpr[4] | ||
2752 | | mov CARG6, CCSTATE->gpr[5] | ||
2753 | |.endif | ||
2754 | | test eax, eax; jz >5 | ||
2755 | | movaps xmm0, CCSTATE->fpr[0] | ||
2756 | | movaps xmm1, CCSTATE->fpr[1] | ||
2757 | | movaps xmm2, CCSTATE->fpr[2] | ||
2758 | | movaps xmm3, CCSTATE->fpr[3] | ||
2759 | |.if not X64WIN | ||
2760 | | cmp eax, 4; jbe >5 | ||
2761 | | movaps xmm4, CCSTATE->fpr[4] | ||
2762 | | movaps xmm5, CCSTATE->fpr[5] | ||
2763 | | movaps xmm6, CCSTATE->fpr[6] | ||
2764 | | movaps xmm7, CCSTATE->fpr[7] | ||
2765 | |.endif | ||
2766 | |5: | ||
2767 | | | ||
2768 | | call aword CCSTATE->func | ||
2769 | | | ||
2770 | | mov CCSTATE->gpr[0], rax | ||
2771 | | movaps CCSTATE->fpr[0], xmm0 | ||
2772 | |.if not X64WIN | ||
2773 | | mov CCSTATE->gpr[1], rdx | ||
2774 | | movaps CCSTATE->fpr[1], xmm1 | ||
2775 | |.endif | ||
2776 | | | ||
2777 | | mov rbx, [rbp-8]; leave; ret | ||
2778 | |.endif | ||
2779 | |// Note: vm_ffi_call must be the last function in this object file! | ||
2780 | | | ||
2781 | |//----------------------------------------------------------------------- | ||
2782 | } | ||
2783 | |||
2784 | /* Generate the code for a single instruction. */ | ||
2785 | static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ||
2786 | { | ||
2787 | int vk = 0; | ||
2788 | |// Note: aligning all instructions does not pay off. | ||
2789 | |=>defop: | ||
2790 | |||
2791 | switch (op) { | ||
2792 | |||
2793 | /* -- Comparison ops ---------------------------------------------------- */ | ||
2794 | |||
2795 | /* Remember: all ops branch for a true comparison, fall through otherwise. */ | ||
2796 | |||
2797 | |.macro jmp_comp, lt, ge, le, gt, target | ||
2798 | ||switch (op) { | ||
2799 | ||case BC_ISLT: | ||
2800 | | lt target | ||
2801 | ||break; | ||
2802 | ||case BC_ISGE: | ||
2803 | | ge target | ||
2804 | ||break; | ||
2805 | ||case BC_ISLE: | ||
2806 | | le target | ||
2807 | ||break; | ||
2808 | ||case BC_ISGT: | ||
2809 | | gt target | ||
2810 | ||break; | ||
2811 | ||default: break; /* Shut up GCC. */ | ||
2812 | ||} | ||
2813 | |.endmacro | ||
2814 | |||
2815 | case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: | ||
2816 | | // RA = src1, RD = src2, JMP with RD = target | ||
2817 | | ins_AD | ||
2818 | | mov ITYPE, [BASE+RA*8] | ||
2819 | | mov RB, [BASE+RD*8] | ||
2820 | | mov RA, ITYPE | ||
2821 | | mov RD, RB | ||
2822 | | sar ITYPE, 47 | ||
2823 | | sar RB, 47 | ||
2824 | |.if DUALNUM | ||
2825 | | cmp ITYPEd, LJ_TISNUM; jne >7 | ||
2826 | | cmp RBd, LJ_TISNUM; jne >8 | ||
2827 | | add PC, 4 | ||
2828 | | cmp RAd, RDd | ||
2829 | | jmp_comp jge, jl, jg, jle, >9 | ||
2830 | |6: | ||
2831 | | movzx RDd, PC_RD | ||
2832 | | branchPC RD | ||
2833 | |9: | ||
2834 | | ins_next | ||
2835 | | | ||
2836 | |7: // RA is not an integer. | ||
2837 | | ja ->vmeta_comp | ||
2838 | | // RA is a number. | ||
2839 | | cmp RBd, LJ_TISNUM; jb >1; jne ->vmeta_comp | ||
2840 | | // RA is a number, RD is an integer. | ||
2841 | | cvtsi2sd xmm0, RDd | ||
2842 | | jmp >2 | ||
2843 | | | ||
2844 | |8: // RA is an integer, RD is not an integer. | ||
2845 | | ja ->vmeta_comp | ||
2846 | | // RA is an integer, RD is a number. | ||
2847 | | cvtsi2sd xmm1, RAd | ||
2848 | | movd xmm0, RD | ||
2849 | | jmp >3 | ||
2850 | |.else | ||
2851 | | cmp ITYPEd, LJ_TISNUM; jae ->vmeta_comp | ||
2852 | | cmp RBd, LJ_TISNUM; jae ->vmeta_comp | ||
2853 | |.endif | ||
2854 | |1: | ||
2855 | | movd xmm0, RD | ||
2856 | |2: | ||
2857 | | movd xmm1, RA | ||
2858 | |3: | ||
2859 | | add PC, 4 | ||
2860 | | ucomisd xmm0, xmm1 | ||
2861 | | // Unordered: all of ZF CF PF set, ordered: PF clear. | ||
2862 | | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. | ||
2863 | |.if DUALNUM | ||
2864 | | jmp_comp jbe, ja, jb, jae, <9 | ||
2865 | | jmp <6 | ||
2866 | |.else | ||
2867 | | jmp_comp jbe, ja, jb, jae, >1 | ||
2868 | | movzx RDd, PC_RD | ||
2869 | | branchPC RD | ||
2870 | |1: | ||
2871 | | ins_next | ||
2872 | |.endif | ||
2873 | break; | ||
2874 | |||
2875 | case BC_ISEQV: case BC_ISNEV: | ||
2876 | vk = op == BC_ISEQV; | ||
2877 | | ins_AD // RA = src1, RD = src2, JMP with RD = target | ||
2878 | | mov RB, [BASE+RD*8] | ||
2879 | | mov ITYPE, [BASE+RA*8] | ||
2880 | | add PC, 4 | ||
2881 | | mov RD, RB | ||
2882 | | mov RA, ITYPE | ||
2883 | | sar RB, 47 | ||
2884 | | sar ITYPE, 47 | ||
2885 | |.if DUALNUM | ||
2886 | | cmp RBd, LJ_TISNUM; jne >7 | ||
2887 | | cmp ITYPEd, LJ_TISNUM; jne >8 | ||
2888 | | cmp RDd, RAd | ||
2889 | if (vk) { | ||
2890 | | jne >9 | ||
2891 | } else { | ||
2892 | | je >9 | ||
2893 | } | ||
2894 | | movzx RDd, PC_RD | ||
2895 | | branchPC RD | ||
2896 | |9: | ||
2897 | | ins_next | ||
2898 | | | ||
2899 | |7: // RD is not an integer. | ||
2900 | | ja >5 | ||
2901 | | // RD is a number. | ||
2902 | | movd xmm1, RD | ||
2903 | | cmp ITYPEd, LJ_TISNUM; jb >1; jne >5 | ||
2904 | | // RD is a number, RA is an integer. | ||
2905 | | cvtsi2sd xmm0, RAd | ||
2906 | | jmp >2 | ||
2907 | | | ||
2908 | |8: // RD is an integer, RA is not an integer. | ||
2909 | | ja >5 | ||
2910 | | // RD is an integer, RA is a number. | ||
2911 | | cvtsi2sd xmm1, RDd | ||
2912 | | jmp >1 | ||
2913 | | | ||
2914 | |.else | ||
2915 | | cmp RBd, LJ_TISNUM; jae >5 | ||
2916 | | cmp ITYPEd, LJ_TISNUM; jae >5 | ||
2917 | | movd xmm1, RD | ||
2918 | |.endif | ||
2919 | |1: | ||
2920 | | movd xmm0, RA | ||
2921 | |2: | ||
2922 | | ucomisd xmm0, xmm1 | ||
2923 | |4: | ||
2924 | iseqne_fp: | ||
2925 | if (vk) { | ||
2926 | | jp >2 // Unordered means not equal. | ||
2927 | | jne >2 | ||
2928 | } else { | ||
2929 | | jp >2 // Unordered means not equal. | ||
2930 | | je >1 | ||
2931 | } | ||
2932 | iseqne_end: | ||
2933 | if (vk) { | ||
2934 | |1: // EQ: Branch to the target. | ||
2935 | | movzx RDd, PC_RD | ||
2936 | | branchPC RD | ||
2937 | |2: // NE: Fallthrough to next instruction. | ||
2938 | |.if not FFI | ||
2939 | |3: | ||
2940 | |.endif | ||
2941 | } else { | ||
2942 | |.if not FFI | ||
2943 | |3: | ||
2944 | |.endif | ||
2945 | |2: // NE: Branch to the target. | ||
2946 | | movzx RDd, PC_RD | ||
2947 | | branchPC RD | ||
2948 | |1: // EQ: Fallthrough to next instruction. | ||
2949 | } | ||
2950 | if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV || | ||
2951 | op == BC_ISEQN || op == BC_ISNEN)) { | ||
2952 | | jmp <9 | ||
2953 | } else { | ||
2954 | | ins_next | ||
2955 | } | ||
2956 | | | ||
2957 | if (op == BC_ISEQV || op == BC_ISNEV) { | ||
2958 | |5: // Either or both types are not numbers. | ||
2959 | |.if FFI | ||
2960 | | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd | ||
2961 | | cmp ITYPEd, LJ_TCDATA; je ->vmeta_equal_cd | ||
2962 | |.endif | ||
2963 | | cmp RA, RD | ||
2964 | | je <1 // Same GCobjs or pvalues? | ||
2965 | | cmp RBd, ITYPEd | ||
2966 | | jne <2 // Not the same type? | ||
2967 | | cmp RBd, LJ_TISTABUD | ||
2968 | | ja <2 // Different objects and not table/ud? | ||
2969 | | | ||
2970 | | // Different tables or userdatas. Need to check __eq metamethod. | ||
2971 | | // Field metatable must be at same offset for GCtab and GCudata! | ||
2972 | | cleartp TAB:RA | ||
2973 | | mov TAB:RB, TAB:RA->metatable | ||
2974 | | test TAB:RB, TAB:RB | ||
2975 | | jz <2 // No metatable? | ||
2976 | | test byte TAB:RB->nomm, 1<<MM_eq | ||
2977 | | jnz <2 // Or 'no __eq' flag set? | ||
2978 | if (vk) { | ||
2979 | | xor RBd, RBd // ne = 0 | ||
2980 | } else { | ||
2981 | | mov RBd, 1 // ne = 1 | ||
2982 | } | ||
2983 | | jmp ->vmeta_equal // Handle __eq metamethod. | ||
2984 | } else { | ||
2985 | |.if FFI | ||
2986 | |3: | ||
2987 | | cmp ITYPEd, LJ_TCDATA | ||
2988 | if (LJ_DUALNUM && vk) { | ||
2989 | | jne <9 | ||
2990 | } else { | ||
2991 | | jne <2 | ||
2992 | } | ||
2993 | | jmp ->vmeta_equal_cd | ||
2994 | |.endif | ||
2995 | } | ||
2996 | break; | ||
2997 | case BC_ISEQS: case BC_ISNES: | ||
2998 | vk = op == BC_ISEQS; | ||
2999 | | ins_AND // RA = src, RD = str const, JMP with RD = target | ||
3000 | | mov RB, [BASE+RA*8] | ||
3001 | | add PC, 4 | ||
3002 | | checkstr RB, >3 | ||
3003 | | cmp RB, [KBASE+RD*8] | ||
3004 | iseqne_test: | ||
3005 | if (vk) { | ||
3006 | | jne >2 | ||
3007 | } else { | ||
3008 | | je >1 | ||
3009 | } | ||
3010 | goto iseqne_end; | ||
3011 | case BC_ISEQN: case BC_ISNEN: | ||
3012 | vk = op == BC_ISEQN; | ||
3013 | | ins_AD // RA = src, RD = num const, JMP with RD = target | ||
3014 | | mov RB, [BASE+RA*8] | ||
3015 | | add PC, 4 | ||
3016 | |.if DUALNUM | ||
3017 | | checkint RB, >7 | ||
3018 | | mov RD, [KBASE+RD*8] | ||
3019 | | checkint RD, >8 | ||
3020 | | cmp RBd, RDd | ||
3021 | if (vk) { | ||
3022 | | jne >9 | ||
3023 | } else { | ||
3024 | | je >9 | ||
3025 | } | ||
3026 | | movzx RDd, PC_RD | ||
3027 | | branchPC RD | ||
3028 | |9: | ||
3029 | | ins_next | ||
3030 | | | ||
3031 | |7: // RA is not an integer. | ||
3032 | | ja >3 | ||
3033 | | // RA is a number. | ||
3034 | | mov RD, [KBASE+RD*8] | ||
3035 | | checkint RD, >1 | ||
3036 | | // RA is a number, RD is an integer. | ||
3037 | | cvtsi2sd xmm0, RDd | ||
3038 | | jmp >2 | ||
3039 | | | ||
3040 | |8: // RA is an integer, RD is a number. | ||
3041 | | cvtsi2sd xmm0, RBd | ||
3042 | | movd xmm1, RD | ||
3043 | | ucomisd xmm0, xmm1 | ||
3044 | | jmp >4 | ||
3045 | |1: | ||
3046 | | movd xmm0, RD | ||
3047 | |.else | ||
3048 | | checknum RB, >3 | ||
3049 | |1: | ||
3050 | | movsd xmm0, qword [KBASE+RD*8] | ||
3051 | |.endif | ||
3052 | |2: | ||
3053 | | ucomisd xmm0, qword [BASE+RA*8] | ||
3054 | |4: | ||
3055 | goto iseqne_fp; | ||
3056 | case BC_ISEQP: case BC_ISNEP: | ||
3057 | vk = op == BC_ISEQP; | ||
3058 | | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target | ||
3059 | | mov RB, [BASE+RA*8] | ||
3060 | | sar RB, 47 | ||
3061 | | add PC, 4 | ||
3062 | | cmp RBd, RDd | ||
3063 | if (!LJ_HASFFI) goto iseqne_test; | ||
3064 | if (vk) { | ||
3065 | | jne >3 | ||
3066 | | movzx RDd, PC_RD | ||
3067 | | branchPC RD | ||
3068 | |2: | ||
3069 | | ins_next | ||
3070 | |3: | ||
3071 | | cmp RBd, LJ_TCDATA; jne <2 | ||
3072 | | jmp ->vmeta_equal_cd | ||
3073 | } else { | ||
3074 | | je >2 | ||
3075 | | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd | ||
3076 | | movzx RDd, PC_RD | ||
3077 | | branchPC RD | ||
3078 | |2: | ||
3079 | | ins_next | ||
3080 | } | ||
3081 | break; | ||
3082 | |||
3083 | /* -- Unary test and copy ops ------------------------------------------- */ | ||
3084 | |||
3085 | case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: | ||
3086 | | ins_AD // RA = dst or unused, RD = src, JMP with RD = target | ||
3087 | | mov ITYPE, [BASE+RD*8] | ||
3088 | | add PC, 4 | ||
3089 | if (op == BC_ISTC || op == BC_ISFC) { | ||
3090 | | mov RB, ITYPE | ||
3091 | } | ||
3092 | | sar ITYPE, 47 | ||
3093 | | cmp ITYPEd, LJ_TISTRUECOND | ||
3094 | if (op == BC_IST || op == BC_ISTC) { | ||
3095 | | jae >1 | ||
3096 | } else { | ||
3097 | | jb >1 | ||
3098 | } | ||
3099 | if (op == BC_ISTC || op == BC_ISFC) { | ||
3100 | | mov [BASE+RA*8], RB | ||
3101 | } | ||
3102 | | movzx RDd, PC_RD | ||
3103 | | branchPC RD | ||
3104 | |1: // Fallthrough to the next instruction. | ||
3105 | | ins_next | ||
3106 | break; | ||
3107 | |||
3108 | case BC_ISTYPE: | ||
3109 | | ins_AD // RA = src, RD = -type | ||
3110 | | mov RB, [BASE+RA*8] | ||
3111 | | sar RB, 47 | ||
3112 | | add RBd, RDd | ||
3113 | | jne ->vmeta_istype | ||
3114 | | ins_next | ||
3115 | break; | ||
3116 | case BC_ISNUM: | ||
3117 | | ins_AD // RA = src, RD = -(TISNUM-1) | ||
3118 | | checknumtp [BASE+RA*8], ->vmeta_istype | ||
3119 | | ins_next | ||
3120 | break; | ||
3121 | |||
3122 | /* -- Unary ops --------------------------------------------------------- */ | ||
3123 | |||
3124 | case BC_MOV: | ||
3125 | | ins_AD // RA = dst, RD = src | ||
3126 | | mov RB, [BASE+RD*8] | ||
3127 | | mov [BASE+RA*8], RB | ||
3128 | | ins_next_ | ||
3129 | break; | ||
3130 | case BC_NOT: | ||
3131 | | ins_AD // RA = dst, RD = src | ||
3132 | | mov RB, [BASE+RD*8] | ||
3133 | | sar RB, 47 | ||
3134 | | mov RCd, 2 | ||
3135 | | cmp RB, LJ_TISTRUECOND | ||
3136 | | sbb RCd, 0 | ||
3137 | | shl RC, 47 | ||
3138 | | not RC | ||
3139 | | mov [BASE+RA*8], RC | ||
3140 | | ins_next | ||
3141 | break; | ||
3142 | case BC_UNM: | ||
3143 | | ins_AD // RA = dst, RD = src | ||
3144 | | mov RB, [BASE+RD*8] | ||
3145 | |.if DUALNUM | ||
3146 | | checkint RB, >5 | ||
3147 | | neg RBd | ||
3148 | | jo >4 | ||
3149 | | setint RB | ||
3150 | |9: | ||
3151 | | mov [BASE+RA*8], RB | ||
3152 | | ins_next | ||
3153 | |4: | ||
3154 | | mov64 RB, U64x(41e00000,00000000) // 2^31. | ||
3155 | | jmp <9 | ||
3156 | |5: | ||
3157 | | ja ->vmeta_unm | ||
3158 | |.else | ||
3159 | | checknum RB, ->vmeta_unm | ||
3160 | |.endif | ||
3161 | | mov64 RD, U64x(80000000,00000000) | ||
3162 | | xor RB, RD | ||
3163 | |.if DUALNUM | ||
3164 | | jmp <9 | ||
3165 | |.else | ||
3166 | | mov [BASE+RA*8], RB | ||
3167 | | ins_next | ||
3168 | |.endif | ||
3169 | break; | ||
3170 | case BC_LEN: | ||
3171 | | ins_AD // RA = dst, RD = src | ||
3172 | | mov RD, [BASE+RD*8] | ||
3173 | | checkstr RD, >2 | ||
3174 | |.if DUALNUM | ||
3175 | | mov RDd, dword STR:RD->len | ||
3176 | |1: | ||
3177 | | setint RD | ||
3178 | | mov [BASE+RA*8], RD | ||
3179 | |.else | ||
3180 | | xorps xmm0, xmm0 | ||
3181 | | cvtsi2sd xmm0, dword STR:RD->len | ||
3182 | |1: | ||
3183 | | movsd qword [BASE+RA*8], xmm0 | ||
3184 | |.endif | ||
3185 | | ins_next | ||
3186 | |2: | ||
3187 | | cmp ITYPEd, LJ_TTAB; jne ->vmeta_len | ||
3188 | | mov TAB:CARG1, TAB:RD | ||
3189 | #if LJ_52 | ||
3190 | | mov TAB:RB, TAB:RD->metatable | ||
3191 | | cmp TAB:RB, 0 | ||
3192 | | jnz >9 | ||
3193 | |3: | ||
3194 | #endif | ||
3195 | |->BC_LEN_Z: | ||
3196 | | mov RB, BASE // Save BASE. | ||
3197 | | call extern lj_tab_len // (GCtab *t) | ||
3198 | | // Length of table returned in eax (RD). | ||
3199 | |.if DUALNUM | ||
3200 | | // Nothing to do. | ||
3201 | |.else | ||
3202 | | cvtsi2sd xmm0, RDd | ||
3203 | |.endif | ||
3204 | | mov BASE, RB // Restore BASE. | ||
3205 | | movzx RAd, PC_RA | ||
3206 | | jmp <1 | ||
3207 | #if LJ_52 | ||
3208 | |9: // Check for __len. | ||
3209 | | test byte TAB:RB->nomm, 1<<MM_len | ||
3210 | | jnz <3 | ||
3211 | | jmp ->vmeta_len // 'no __len' flag NOT set: check. | ||
3212 | #endif | ||
3213 | break; | ||
3214 | |||
3215 | /* -- Binary ops -------------------------------------------------------- */ | ||
3216 | |||
3217 | |.macro ins_arithpre, sseins, ssereg | ||
3218 | | ins_ABC | ||
3219 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); | ||
3220 | ||switch (vk) { | ||
3221 | ||case 0: | ||
3222 | | checknumtp [BASE+RB*8], ->vmeta_arith_vn | ||
3223 | | .if DUALNUM | ||
3224 | | checknumtp [KBASE+RC*8], ->vmeta_arith_vn | ||
3225 | | .endif | ||
3226 | | movsd xmm0, qword [BASE+RB*8] | ||
3227 | | sseins ssereg, qword [KBASE+RC*8] | ||
3228 | || break; | ||
3229 | ||case 1: | ||
3230 | | checknumtp [BASE+RB*8], ->vmeta_arith_nv | ||
3231 | | .if DUALNUM | ||
3232 | | checknumtp [KBASE+RC*8], ->vmeta_arith_nv | ||
3233 | | .endif | ||
3234 | | movsd xmm0, qword [KBASE+RC*8] | ||
3235 | | sseins ssereg, qword [BASE+RB*8] | ||
3236 | || break; | ||
3237 | ||default: | ||
3238 | | checknumtp [BASE+RB*8], ->vmeta_arith_vv | ||
3239 | | checknumtp [BASE+RC*8], ->vmeta_arith_vv | ||
3240 | | movsd xmm0, qword [BASE+RB*8] | ||
3241 | | sseins ssereg, qword [BASE+RC*8] | ||
3242 | || break; | ||
3243 | ||} | ||
3244 | |.endmacro | ||
3245 | | | ||
3246 | |.macro ins_arithdn, intins | ||
3247 | | ins_ABC | ||
3248 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); | ||
3249 | ||switch (vk) { | ||
3250 | ||case 0: | ||
3251 | | mov RB, [BASE+RB*8] | ||
3252 | | mov RC, [KBASE+RC*8] | ||
3253 | | checkint RB, ->vmeta_arith_vno | ||
3254 | | checkint RC, ->vmeta_arith_vno | ||
3255 | | intins RBd, RCd; jo ->vmeta_arith_vno | ||
3256 | || break; | ||
3257 | ||case 1: | ||
3258 | | mov RB, [BASE+RB*8] | ||
3259 | | mov RC, [KBASE+RC*8] | ||
3260 | | checkint RB, ->vmeta_arith_nvo | ||
3261 | | checkint RC, ->vmeta_arith_nvo | ||
3262 | | intins RCd, RBd; jo ->vmeta_arith_nvo | ||
3263 | || break; | ||
3264 | ||default: | ||
3265 | | mov RB, [BASE+RB*8] | ||
3266 | | mov RC, [BASE+RC*8] | ||
3267 | | checkint RB, ->vmeta_arith_vvo | ||
3268 | | checkint RC, ->vmeta_arith_vvo | ||
3269 | | intins RBd, RCd; jo ->vmeta_arith_vvo | ||
3270 | || break; | ||
3271 | ||} | ||
3272 | ||if (vk == 1) { | ||
3273 | | setint RC | ||
3274 | | mov [BASE+RA*8], RC | ||
3275 | ||} else { | ||
3276 | | setint RB | ||
3277 | | mov [BASE+RA*8], RB | ||
3278 | ||} | ||
3279 | | ins_next | ||
3280 | |.endmacro | ||
3281 | | | ||
3282 | |.macro ins_arithpost | ||
3283 | | movsd qword [BASE+RA*8], xmm0 | ||
3284 | |.endmacro | ||
3285 | | | ||
3286 | |.macro ins_arith, sseins | ||
3287 | | ins_arithpre sseins, xmm0 | ||
3288 | | ins_arithpost | ||
3289 | | ins_next | ||
3290 | |.endmacro | ||
3291 | | | ||
3292 | |.macro ins_arith, intins, sseins | ||
3293 | |.if DUALNUM | ||
3294 | | ins_arithdn intins | ||
3295 | |.else | ||
3296 | | ins_arith, sseins | ||
3297 | |.endif | ||
3298 | |.endmacro | ||
3299 | |||
3300 | | // RA = dst, RB = src1 or num const, RC = src2 or num const | ||
3301 | case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: | ||
3302 | | ins_arith add, addsd | ||
3303 | break; | ||
3304 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: | ||
3305 | | ins_arith sub, subsd | ||
3306 | break; | ||
3307 | case BC_MULVN: case BC_MULNV: case BC_MULVV: | ||
3308 | | ins_arith imul, mulsd | ||
3309 | break; | ||
3310 | case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: | ||
3311 | | ins_arith divsd | ||
3312 | break; | ||
3313 | case BC_MODVN: | ||
3314 | | ins_arithpre movsd, xmm1 | ||
3315 | |->BC_MODVN_Z: | ||
3316 | | call ->vm_mod | ||
3317 | | ins_arithpost | ||
3318 | | ins_next | ||
3319 | break; | ||
3320 | case BC_MODNV: case BC_MODVV: | ||
3321 | | ins_arithpre movsd, xmm1 | ||
3322 | | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. | ||
3323 | break; | ||
3324 | case BC_POW: | ||
3325 | | ins_arithpre movsd, xmm1 | ||
3326 | | mov RB, BASE | ||
3327 | | call extern pow | ||
3328 | | movzx RAd, PC_RA | ||
3329 | | mov BASE, RB | ||
3330 | | ins_arithpost | ||
3331 | | ins_next | ||
3332 | break; | ||
3333 | |||
3334 | case BC_CAT: | ||
3335 | | ins_ABC // RA = dst, RB = src_start, RC = src_end | ||
3336 | | mov L:CARG1, SAVE_L | ||
3337 | | mov L:CARG1->base, BASE | ||
3338 | | lea CARG2, [BASE+RC*8] | ||
3339 | | mov CARG3d, RCd | ||
3340 | | sub CARG3d, RBd | ||
3341 | |->BC_CAT_Z: | ||
3342 | | mov L:RB, L:CARG1 | ||
3343 | | mov SAVE_PC, PC | ||
3344 | | call extern lj_meta_cat // (lua_State *L, TValue *top, int left) | ||
3345 | | // NULL (finished) or TValue * (metamethod) returned in eax (RC). | ||
3346 | | mov BASE, L:RB->base | ||
3347 | | test RC, RC | ||
3348 | | jnz ->vmeta_binop | ||
3349 | | movzx RBd, PC_RB // Copy result to Stk[RA] from Stk[RB]. | ||
3350 | | movzx RAd, PC_RA | ||
3351 | | mov RC, [BASE+RB*8] | ||
3352 | | mov [BASE+RA*8], RC | ||
3353 | | ins_next | ||
3354 | break; | ||
3355 | |||
3356 | /* -- Constant ops ------------------------------------------------------ */ | ||
3357 | |||
3358 | case BC_KSTR: | ||
3359 | | ins_AND // RA = dst, RD = str const (~) | ||
3360 | | mov RD, [KBASE+RD*8] | ||
3361 | | settp RD, LJ_TSTR | ||
3362 | | mov [BASE+RA*8], RD | ||
3363 | | ins_next | ||
3364 | break; | ||
3365 | case BC_KCDATA: | ||
3366 | |.if FFI | ||
3367 | | ins_AND // RA = dst, RD = cdata const (~) | ||
3368 | | mov RD, [KBASE+RD*8] | ||
3369 | | settp RD, LJ_TCDATA | ||
3370 | | mov [BASE+RA*8], RD | ||
3371 | | ins_next | ||
3372 | |.endif | ||
3373 | break; | ||
3374 | case BC_KSHORT: | ||
3375 | | ins_AD // RA = dst, RD = signed int16 literal | ||
3376 | |.if DUALNUM | ||
3377 | | movsx RDd, RDW | ||
3378 | | setint RD | ||
3379 | | mov [BASE+RA*8], RD | ||
3380 | |.else | ||
3381 | | movsx RDd, RDW // Sign-extend literal. | ||
3382 | | cvtsi2sd xmm0, RDd | ||
3383 | | movsd qword [BASE+RA*8], xmm0 | ||
3384 | |.endif | ||
3385 | | ins_next | ||
3386 | break; | ||
3387 | case BC_KNUM: | ||
3388 | | ins_AD // RA = dst, RD = num const | ||
3389 | | movsd xmm0, qword [KBASE+RD*8] | ||
3390 | | movsd qword [BASE+RA*8], xmm0 | ||
3391 | | ins_next | ||
3392 | break; | ||
3393 | case BC_KPRI: | ||
3394 | | ins_AD // RA = dst, RD = primitive type (~) | ||
3395 | | shl RD, 47 | ||
3396 | | not RD | ||
3397 | | mov [BASE+RA*8], RD | ||
3398 | | ins_next | ||
3399 | break; | ||
3400 | case BC_KNIL: | ||
3401 | | ins_AD // RA = dst_start, RD = dst_end | ||
3402 | | lea RA, [BASE+RA*8+8] | ||
3403 | | lea RD, [BASE+RD*8] | ||
3404 | | mov RB, LJ_TNIL | ||
3405 | | mov [RA-8], RB // Sets minimum 2 slots. | ||
3406 | |1: | ||
3407 | | mov [RA], RB | ||
3408 | | add RA, 8 | ||
3409 | | cmp RA, RD | ||
3410 | | jbe <1 | ||
3411 | | ins_next | ||
3412 | break; | ||
3413 | |||
3414 | /* -- Upvalue and function ops ------------------------------------------ */ | ||
3415 | |||
3416 | case BC_UGET: | ||
3417 | | ins_AD // RA = dst, RD = upvalue # | ||
3418 | | mov LFUNC:RB, [BASE-16] | ||
3419 | | cleartp LFUNC:RB | ||
3420 | | mov UPVAL:RB, [LFUNC:RB+RD*8+offsetof(GCfuncL, uvptr)] | ||
3421 | | mov RB, UPVAL:RB->v | ||
3422 | | mov RD, [RB] | ||
3423 | | mov [BASE+RA*8], RD | ||
3424 | | ins_next | ||
3425 | break; | ||
3426 | case BC_USETV: | ||
3427 | #define TV2MARKOFS \ | ||
3428 | ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)) | ||
3429 | | ins_AD // RA = upvalue #, RD = src | ||
3430 | | mov LFUNC:RB, [BASE-16] | ||
3431 | | cleartp LFUNC:RB | ||
3432 | | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)] | ||
3433 | | cmp byte UPVAL:RB->closed, 0 | ||
3434 | | mov RB, UPVAL:RB->v | ||
3435 | | mov RA, [BASE+RD*8] | ||
3436 | | mov [RB], RA | ||
3437 | | jz >1 | ||
3438 | | // Check barrier for closed upvalue. | ||
3439 | | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv) | ||
3440 | | jnz >2 | ||
3441 | |1: | ||
3442 | | ins_next | ||
3443 | | | ||
3444 | |2: // Upvalue is black. Check if new value is collectable and white. | ||
3445 | | mov RD, RA | ||
3446 | | sar RD, 47 | ||
3447 | | sub RDd, LJ_TISGCV | ||
3448 | | cmp RDd, LJ_TNUMX - LJ_TISGCV // tvisgcv(v) | ||
3449 | | jbe <1 | ||
3450 | | cleartp GCOBJ:RA | ||
3451 | | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v) | ||
3452 | | jz <1 | ||
3453 | | // Crossed a write barrier. Move the barrier forward. | ||
3454 | |.if not X64WIN | ||
3455 | | mov CARG2, RB | ||
3456 | | mov RB, BASE // Save BASE. | ||
3457 | |.else | ||
3458 | | xchg CARG2, RB // Save BASE (CARG2 == BASE). | ||
3459 | |.endif | ||
3460 | | lea GL:CARG1, [DISPATCH+GG_DISP2G] | ||
3461 | | call extern lj_gc_barrieruv // (global_State *g, TValue *tv) | ||
3462 | | mov BASE, RB // Restore BASE. | ||
3463 | | jmp <1 | ||
3464 | break; | ||
3465 | #undef TV2MARKOFS | ||
3466 | case BC_USETS: | ||
3467 | | ins_AND // RA = upvalue #, RD = str const (~) | ||
3468 | | mov LFUNC:RB, [BASE-16] | ||
3469 | | cleartp LFUNC:RB | ||
3470 | | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)] | ||
3471 | | mov STR:RA, [KBASE+RD*8] | ||
3472 | | mov RD, UPVAL:RB->v | ||
3473 | | settp STR:ITYPE, STR:RA, LJ_TSTR | ||
3474 | | mov [RD], STR:ITYPE | ||
3475 | | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv) | ||
3476 | | jnz >2 | ||
3477 | |1: | ||
3478 | | ins_next | ||
3479 | | | ||
3480 | |2: // Check if string is white and ensure upvalue is closed. | ||
3481 | | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str) | ||
3482 | | jz <1 | ||
3483 | | cmp byte UPVAL:RB->closed, 0 | ||
3484 | | jz <1 | ||
3485 | | // Crossed a write barrier. Move the barrier forward. | ||
3486 | | mov RB, BASE // Save BASE (CARG2 == BASE). | ||
3487 | | mov CARG2, RD | ||
3488 | | lea GL:CARG1, [DISPATCH+GG_DISP2G] | ||
3489 | | call extern lj_gc_barrieruv // (global_State *g, TValue *tv) | ||
3490 | | mov BASE, RB // Restore BASE. | ||
3491 | | jmp <1 | ||
3492 | break; | ||
3493 | case BC_USETN: | ||
3494 | | ins_AD // RA = upvalue #, RD = num const | ||
3495 | | mov LFUNC:RB, [BASE-16] | ||
3496 | | cleartp LFUNC:RB | ||
3497 | | movsd xmm0, qword [KBASE+RD*8] | ||
3498 | | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)] | ||
3499 | | mov RA, UPVAL:RB->v | ||
3500 | | movsd qword [RA], xmm0 | ||
3501 | | ins_next | ||
3502 | break; | ||
3503 | case BC_USETP: | ||
3504 | | ins_AD // RA = upvalue #, RD = primitive type (~) | ||
3505 | | mov LFUNC:RB, [BASE-16] | ||
3506 | | cleartp LFUNC:RB | ||
3507 | | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)] | ||
3508 | | shl RD, 47 | ||
3509 | | not RD | ||
3510 | | mov RA, UPVAL:RB->v | ||
3511 | | mov [RA], RD | ||
3512 | | ins_next | ||
3513 | break; | ||
3514 | case BC_UCLO: | ||
3515 | | ins_AD // RA = level, RD = target | ||
3516 | | branchPC RD // Do this first to free RD. | ||
3517 | | mov L:RB, SAVE_L | ||
3518 | | cmp aword L:RB->openupval, 0 | ||
3519 | | je >1 | ||
3520 | | mov L:RB->base, BASE | ||
3521 | | lea CARG2, [BASE+RA*8] // Caveat: CARG2 == BASE | ||
3522 | | mov L:CARG1, L:RB // Caveat: CARG1 == RA | ||
3523 | | call extern lj_func_closeuv // (lua_State *L, TValue *level) | ||
3524 | | mov BASE, L:RB->base | ||
3525 | |1: | ||
3526 | | ins_next | ||
3527 | break; | ||
3528 | |||
3529 | case BC_FNEW: | ||
3530 | | ins_AND // RA = dst, RD = proto const (~) (holding function prototype) | ||
3531 | | mov L:RB, SAVE_L | ||
3532 | | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE. | ||
3533 | | mov CARG3, [BASE-16] | ||
3534 | | cleartp CARG3 | ||
3535 | | mov CARG2, [KBASE+RD*8] // Fetch GCproto *. | ||
3536 | | mov CARG1, L:RB | ||
3537 | | mov SAVE_PC, PC | ||
3538 | | // (lua_State *L, GCproto *pt, GCfuncL *parent) | ||
3539 | | call extern lj_func_newL_gc | ||
3540 | | // GCfuncL * returned in eax (RC). | ||
3541 | | mov BASE, L:RB->base | ||
3542 | | movzx RAd, PC_RA | ||
3543 | | settp LFUNC:RC, LJ_TFUNC | ||
3544 | | mov [BASE+RA*8], LFUNC:RC | ||
3545 | | ins_next | ||
3546 | break; | ||
3547 | |||
3548 | /* -- Table ops --------------------------------------------------------- */ | ||
3549 | |||
3550 | case BC_TNEW: | ||
3551 | | ins_AD // RA = dst, RD = hbits|asize | ||
3552 | | mov L:RB, SAVE_L | ||
3553 | | mov L:RB->base, BASE | ||
3554 | | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] | ||
3555 | | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] | ||
3556 | | mov SAVE_PC, PC | ||
3557 | | jae >5 | ||
3558 | |1: | ||
3559 | | mov CARG3d, RDd | ||
3560 | | and RDd, 0x7ff | ||
3561 | | shr CARG3d, 11 | ||
3562 | | cmp RDd, 0x7ff | ||
3563 | | je >3 | ||
3564 | |2: | ||
3565 | | mov L:CARG1, L:RB | ||
3566 | | mov CARG2d, RDd | ||
3567 | | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) | ||
3568 | | // Table * returned in eax (RC). | ||
3569 | | mov BASE, L:RB->base | ||
3570 | | movzx RAd, PC_RA | ||
3571 | | settp TAB:RC, LJ_TTAB | ||
3572 | | mov [BASE+RA*8], TAB:RC | ||
3573 | | ins_next | ||
3574 | |3: // Turn 0x7ff into 0x801. | ||
3575 | | mov RDd, 0x801 | ||
3576 | | jmp <2 | ||
3577 | |5: | ||
3578 | | mov L:CARG1, L:RB | ||
3579 | | call extern lj_gc_step_fixtop // (lua_State *L) | ||
3580 | | movzx RDd, PC_RD | ||
3581 | | jmp <1 | ||
3582 | break; | ||
3583 | case BC_TDUP: | ||
3584 | | ins_AND // RA = dst, RD = table const (~) (holding template table) | ||
3585 | | mov L:RB, SAVE_L | ||
3586 | | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] | ||
3587 | | mov SAVE_PC, PC | ||
3588 | | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] | ||
3589 | | mov L:RB->base, BASE | ||
3590 | | jae >3 | ||
3591 | |2: | ||
3592 | | mov TAB:CARG2, [KBASE+RD*8] // Caveat: CARG2 == BASE | ||
3593 | | mov L:CARG1, L:RB // Caveat: CARG1 == RA | ||
3594 | | call extern lj_tab_dup // (lua_State *L, Table *kt) | ||
3595 | | // Table * returned in eax (RC). | ||
3596 | | mov BASE, L:RB->base | ||
3597 | | movzx RAd, PC_RA | ||
3598 | | settp TAB:RC, LJ_TTAB | ||
3599 | | mov [BASE+RA*8], TAB:RC | ||
3600 | | ins_next | ||
3601 | |3: | ||
3602 | | mov L:CARG1, L:RB | ||
3603 | | call extern lj_gc_step_fixtop // (lua_State *L) | ||
3604 | | movzx RDd, PC_RD // Need to reload RD. | ||
3605 | | not RD | ||
3606 | | jmp <2 | ||
3607 | break; | ||
3608 | |||
3609 | case BC_GGET: | ||
3610 | | ins_AND // RA = dst, RD = str const (~) | ||
3611 | | mov LFUNC:RB, [BASE-16] | ||
3612 | | cleartp LFUNC:RB | ||
3613 | | mov TAB:RB, LFUNC:RB->env | ||
3614 | | mov STR:RC, [KBASE+RD*8] | ||
3615 | | jmp ->BC_TGETS_Z | ||
3616 | break; | ||
3617 | case BC_GSET: | ||
3618 | | ins_AND // RA = src, RD = str const (~) | ||
3619 | | mov LFUNC:RB, [BASE-16] | ||
3620 | | cleartp LFUNC:RB | ||
3621 | | mov TAB:RB, LFUNC:RB->env | ||
3622 | | mov STR:RC, [KBASE+RD*8] | ||
3623 | | jmp ->BC_TSETS_Z | ||
3624 | break; | ||
3625 | |||
3626 | case BC_TGETV: | ||
3627 | | ins_ABC // RA = dst, RB = table, RC = key | ||
3628 | | mov TAB:RB, [BASE+RB*8] | ||
3629 | | mov RC, [BASE+RC*8] | ||
3630 | | checktab TAB:RB, ->vmeta_tgetv | ||
3631 | | | ||
3632 | | // Integer key? | ||
3633 | |.if DUALNUM | ||
3634 | | checkint RC, >5 | ||
3635 | |.else | ||
3636 | | // Convert number to int and back and compare. | ||
3637 | | checknum RC, >5 | ||
3638 | | movd xmm0, RC | ||
3639 | | cvttsd2si RCd, xmm0 | ||
3640 | | cvtsi2sd xmm1, RCd | ||
3641 | | ucomisd xmm0, xmm1 | ||
3642 | | jne ->vmeta_tgetv // Generic numeric key? Use fallback. | ||
3643 | |.endif | ||
3644 | | cmp RCd, TAB:RB->asize // Takes care of unordered, too. | ||
3645 | | jae ->vmeta_tgetv // Not in array part? Use fallback. | ||
3646 | | shl RCd, 3 | ||
3647 | | add RC, TAB:RB->array | ||
3648 | | // Get array slot. | ||
3649 | | mov ITYPE, [RC] | ||
3650 | | cmp ITYPE, LJ_TNIL // Avoid overwriting RB in fastpath. | ||
3651 | | je >2 | ||
3652 | |1: | ||
3653 | | mov [BASE+RA*8], ITYPE | ||
3654 | | ins_next | ||
3655 | | | ||
3656 | |2: // Check for __index if table value is nil. | ||
3657 | | mov TAB:TMPR, TAB:RB->metatable | ||
3658 | | test TAB:TMPR, TAB:TMPR | ||
3659 | | jz <1 | ||
3660 | | test byte TAB:TMPR->nomm, 1<<MM_index | ||
3661 | | jz ->vmeta_tgetv // 'no __index' flag NOT set: check. | ||
3662 | | jmp <1 | ||
3663 | | | ||
3664 | |5: // String key? | ||
3665 | | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tgetv | ||
3666 | | cleartp STR:RC | ||
3667 | | jmp ->BC_TGETS_Z | ||
3668 | break; | ||
3669 | case BC_TGETS: | ||
3670 | | ins_ABC // RA = dst, RB = table, RC = str const (~) | ||
3671 | | mov TAB:RB, [BASE+RB*8] | ||
3672 | | not RC | ||
3673 | | mov STR:RC, [KBASE+RC*8] | ||
3674 | | checktab TAB:RB, ->vmeta_tgets | ||
3675 | |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr * | ||
3676 | | mov TMPRd, TAB:RB->hmask | ||
3677 | | and TMPRd, STR:RC->hash | ||
3678 | | imul TMPRd, #NODE | ||
3679 | | add NODE:TMPR, TAB:RB->node | ||
3680 | | settp ITYPE, STR:RC, LJ_TSTR | ||
3681 | |1: | ||
3682 | | cmp NODE:TMPR->key, ITYPE | ||
3683 | | jne >4 | ||
3684 | | // Get node value. | ||
3685 | | mov ITYPE, NODE:TMPR->val | ||
3686 | | cmp ITYPE, LJ_TNIL | ||
3687 | | je >5 // Key found, but nil value? | ||
3688 | |2: | ||
3689 | | mov [BASE+RA*8], ITYPE | ||
3690 | | ins_next | ||
3691 | | | ||
3692 | |4: // Follow hash chain. | ||
3693 | | mov NODE:TMPR, NODE:TMPR->next | ||
3694 | | test NODE:TMPR, NODE:TMPR | ||
3695 | | jnz <1 | ||
3696 | | // End of hash chain: key not found, nil result. | ||
3697 | | mov ITYPE, LJ_TNIL | ||
3698 | | | ||
3699 | |5: // Check for __index if table value is nil. | ||
3700 | | mov TAB:TMPR, TAB:RB->metatable | ||
3701 | | test TAB:TMPR, TAB:TMPR | ||
3702 | | jz <2 // No metatable: done. | ||
3703 | | test byte TAB:TMPR->nomm, 1<<MM_index | ||
3704 | | jnz <2 // 'no __index' flag set: done. | ||
3705 | | jmp ->vmeta_tgets // Caveat: preserve STR:RC. | ||
3706 | break; | ||
3707 | case BC_TGETB: | ||
3708 | | ins_ABC // RA = dst, RB = table, RC = byte literal | ||
3709 | | mov TAB:RB, [BASE+RB*8] | ||
3710 | | checktab TAB:RB, ->vmeta_tgetb | ||
3711 | | cmp RCd, TAB:RB->asize | ||
3712 | | jae ->vmeta_tgetb | ||
3713 | | shl RCd, 3 | ||
3714 | | add RC, TAB:RB->array | ||
3715 | | // Get array slot. | ||
3716 | | mov ITYPE, [RC] | ||
3717 | | cmp ITYPE, LJ_TNIL | ||
3718 | | je >2 | ||
3719 | |1: | ||
3720 | | mov [BASE+RA*8], ITYPE | ||
3721 | | ins_next | ||
3722 | | | ||
3723 | |2: // Check for __index if table value is nil. | ||
3724 | | mov TAB:TMPR, TAB:RB->metatable | ||
3725 | | test TAB:TMPR, TAB:TMPR | ||
3726 | | jz <1 | ||
3727 | | test byte TAB:TMPR->nomm, 1<<MM_index | ||
3728 | | jz ->vmeta_tgetb // 'no __index' flag NOT set: check. | ||
3729 | | jmp <1 | ||
3730 | break; | ||
3731 | case BC_TGETR: | ||
3732 | | ins_ABC // RA = dst, RB = table, RC = key | ||
3733 | | mov TAB:RB, [BASE+RB*8] | ||
3734 | | cleartp TAB:RB | ||
3735 | |.if DUALNUM | ||
3736 | | mov RCd, dword [BASE+RC*8] | ||
3737 | |.else | ||
3738 | | cvttsd2si RCd, qword [BASE+RC*8] | ||
3739 | |.endif | ||
3740 | | cmp RCd, TAB:RB->asize | ||
3741 | | jae ->vmeta_tgetr // Not in array part? Use fallback. | ||
3742 | | shl RCd, 3 | ||
3743 | | add RC, TAB:RB->array | ||
3744 | | // Get array slot. | ||
3745 | |->BC_TGETR_Z: | ||
3746 | | mov ITYPE, [RC] | ||
3747 | |->BC_TGETR2_Z: | ||
3748 | | mov [BASE+RA*8], ITYPE | ||
3749 | | ins_next | ||
3750 | break; | ||
3751 | |||
3752 | case BC_TSETV: | ||
3753 | | ins_ABC // RA = src, RB = table, RC = key | ||
3754 | | mov TAB:RB, [BASE+RB*8] | ||
3755 | | mov RC, [BASE+RC*8] | ||
3756 | | checktab TAB:RB, ->vmeta_tsetv | ||
3757 | | | ||
3758 | | // Integer key? | ||
3759 | |.if DUALNUM | ||
3760 | | checkint RC, >5 | ||
3761 | |.else | ||
3762 | | // Convert number to int and back and compare. | ||
3763 | | checknum RC, >5 | ||
3764 | | movd xmm0, RC | ||
3765 | | cvttsd2si RCd, xmm0 | ||
3766 | | cvtsi2sd xmm1, RCd | ||
3767 | | ucomisd xmm0, xmm1 | ||
3768 | | jne ->vmeta_tsetv // Generic numeric key? Use fallback. | ||
3769 | |.endif | ||
3770 | | cmp RCd, TAB:RB->asize // Takes care of unordered, too. | ||
3771 | | jae ->vmeta_tsetv | ||
3772 | | shl RCd, 3 | ||
3773 | | add RC, TAB:RB->array | ||
3774 | | cmp aword [RC], LJ_TNIL | ||
3775 | | je >3 // Previous value is nil? | ||
3776 | |1: | ||
3777 | | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) | ||
3778 | | jnz >7 | ||
3779 | |2: // Set array slot. | ||
3780 | | mov RB, [BASE+RA*8] | ||
3781 | | mov [RC], RB | ||
3782 | | ins_next | ||
3783 | | | ||
3784 | |3: // Check for __newindex if previous value is nil. | ||
3785 | | mov TAB:TMPR, TAB:RB->metatable | ||
3786 | | test TAB:TMPR, TAB:TMPR | ||
3787 | | jz <1 | ||
3788 | | test byte TAB:TMPR->nomm, 1<<MM_newindex | ||
3789 | | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check. | ||
3790 | | jmp <1 | ||
3791 | | | ||
3792 | |5: // String key? | ||
3793 | | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tsetv | ||
3794 | | cleartp STR:RC | ||
3795 | | jmp ->BC_TSETS_Z | ||
3796 | | | ||
3797 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
3798 | | barrierback TAB:RB, TMPR | ||
3799 | | jmp <2 | ||
3800 | break; | ||
3801 | case BC_TSETS: | ||
3802 | | ins_ABC // RA = src, RB = table, RC = str const (~) | ||
3803 | | mov TAB:RB, [BASE+RB*8] | ||
3804 | | not RC | ||
3805 | | mov STR:RC, [KBASE+RC*8] | ||
3806 | | checktab TAB:RB, ->vmeta_tsets | ||
3807 | |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr * | ||
3808 | | mov TMPRd, TAB:RB->hmask | ||
3809 | | and TMPRd, STR:RC->hash | ||
3810 | | imul TMPRd, #NODE | ||
3811 | | mov byte TAB:RB->nomm, 0 // Clear metamethod cache. | ||
3812 | | add NODE:TMPR, TAB:RB->node | ||
3813 | | settp ITYPE, STR:RC, LJ_TSTR | ||
3814 | |1: | ||
3815 | | cmp NODE:TMPR->key, ITYPE | ||
3816 | | jne >5 | ||
3817 | | // Ok, key found. Assumes: offsetof(Node, val) == 0 | ||
3818 | | cmp aword [TMPR], LJ_TNIL | ||
3819 | | je >4 // Previous value is nil? | ||
3820 | |2: | ||
3821 | | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) | ||
3822 | | jnz >7 | ||
3823 | |3: // Set node value. | ||
3824 | | mov ITYPE, [BASE+RA*8] | ||
3825 | | mov [TMPR], ITYPE | ||
3826 | | ins_next | ||
3827 | | | ||
3828 | |4: // Check for __newindex if previous value is nil. | ||
3829 | | mov TAB:ITYPE, TAB:RB->metatable | ||
3830 | | test TAB:ITYPE, TAB:ITYPE | ||
3831 | | jz <2 | ||
3832 | | test byte TAB:ITYPE->nomm, 1<<MM_newindex | ||
3833 | | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check. | ||
3834 | | jmp <2 | ||
3835 | | | ||
3836 | |5: // Follow hash chain. | ||
3837 | | mov NODE:TMPR, NODE:TMPR->next | ||
3838 | | test NODE:TMPR, NODE:TMPR | ||
3839 | | jnz <1 | ||
3840 | | // End of hash chain: key not found, add a new one. | ||
3841 | | | ||
3842 | | // But check for __newindex first. | ||
3843 | | mov TAB:TMPR, TAB:RB->metatable | ||
3844 | | test TAB:TMPR, TAB:TMPR | ||
3845 | | jz >6 // No metatable: continue. | ||
3846 | | test byte TAB:TMPR->nomm, 1<<MM_newindex | ||
3847 | | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check. | ||
3848 | |6: | ||
3849 | | mov TMP1, ITYPE | ||
3850 | | mov L:CARG1, SAVE_L | ||
3851 | | mov L:CARG1->base, BASE | ||
3852 | | lea CARG3, TMP1 | ||
3853 | | mov CARG2, TAB:RB | ||
3854 | | mov SAVE_PC, PC | ||
3855 | | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) | ||
3856 | | // Handles write barrier for the new key. TValue * returned in eax (RC). | ||
3857 | | mov L:CARG1, SAVE_L | ||
3858 | | mov BASE, L:CARG1->base | ||
3859 | | mov TMPR, rax | ||
3860 | | movzx RAd, PC_RA | ||
3861 | | jmp <2 // Must check write barrier for value. | ||
3862 | | | ||
3863 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
3864 | | barrierback TAB:RB, ITYPE | ||
3865 | | jmp <3 | ||
3866 | break; | ||
3867 | case BC_TSETB: | ||
3868 | | ins_ABC // RA = src, RB = table, RC = byte literal | ||
3869 | | mov TAB:RB, [BASE+RB*8] | ||
3870 | | checktab TAB:RB, ->vmeta_tsetb | ||
3871 | | cmp RCd, TAB:RB->asize | ||
3872 | | jae ->vmeta_tsetb | ||
3873 | | shl RCd, 3 | ||
3874 | | add RC, TAB:RB->array | ||
3875 | | cmp aword [RC], LJ_TNIL | ||
3876 | | je >3 // Previous value is nil? | ||
3877 | |1: | ||
3878 | | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) | ||
3879 | | jnz >7 | ||
3880 | |2: // Set array slot. | ||
3881 | | mov ITYPE, [BASE+RA*8] | ||
3882 | | mov [RC], ITYPE | ||
3883 | | ins_next | ||
3884 | | | ||
3885 | |3: // Check for __newindex if previous value is nil. | ||
3886 | | mov TAB:TMPR, TAB:RB->metatable | ||
3887 | | test TAB:TMPR, TAB:TMPR | ||
3888 | | jz <1 | ||
3889 | | test byte TAB:TMPR->nomm, 1<<MM_newindex | ||
3890 | | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check. | ||
3891 | | jmp <1 | ||
3892 | | | ||
3893 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
3894 | | barrierback TAB:RB, TMPR | ||
3895 | | jmp <2 | ||
3896 | break; | ||
3897 | case BC_TSETR: | ||
3898 | | ins_ABC // RA = src, RB = table, RC = key | ||
3899 | | mov TAB:RB, [BASE+RB*8] | ||
3900 | | cleartp TAB:RB | ||
3901 | |.if DUALNUM | ||
3902 | | mov RC, [BASE+RC*8] | ||
3903 | |.else | ||
3904 | | cvttsd2si RCd, qword [BASE+RC*8] | ||
3905 | |.endif | ||
3906 | | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) | ||
3907 | | jnz >7 | ||
3908 | |2: | ||
3909 | | cmp RCd, TAB:RB->asize | ||
3910 | | jae ->vmeta_tsetr | ||
3911 | | shl RCd, 3 | ||
3912 | | add RC, TAB:RB->array | ||
3913 | | // Set array slot. | ||
3914 | |->BC_TSETR_Z: | ||
3915 | | mov ITYPE, [BASE+RA*8] | ||
3916 | | mov [RC], ITYPE | ||
3917 | | ins_next | ||
3918 | | | ||
3919 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
3920 | | barrierback TAB:RB, TMPR | ||
3921 | | jmp <2 | ||
3922 | break; | ||
3923 | |||
3924 | case BC_TSETM: | ||
3925 | | ins_AD // RA = base (table at base-1), RD = num const (start index) | ||
3926 | |1: | ||
3927 | | mov TMPRd, dword [KBASE+RD*8] // Integer constant is in lo-word. | ||
3928 | | lea RA, [BASE+RA*8] | ||
3929 | | mov TAB:RB, [RA-8] // Guaranteed to be a table. | ||
3930 | | cleartp TAB:RB | ||
3931 | | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) | ||
3932 | | jnz >7 | ||
3933 | |2: | ||
3934 | | mov RDd, MULTRES | ||
3935 | | sub RDd, 1 | ||
3936 | | jz >4 // Nothing to copy? | ||
3937 | | add RDd, TMPRd // Compute needed size. | ||
3938 | | cmp RDd, TAB:RB->asize | ||
3939 | | ja >5 // Doesn't fit into array part? | ||
3940 | | sub RDd, TMPRd | ||
3941 | | shl TMPRd, 3 | ||
3942 | | add TMPR, TAB:RB->array | ||
3943 | |3: // Copy result slots to table. | ||
3944 | | mov RB, [RA] | ||
3945 | | add RA, 8 | ||
3946 | | mov [TMPR], RB | ||
3947 | | add TMPR, 8 | ||
3948 | | sub RDd, 1 | ||
3949 | | jnz <3 | ||
3950 | |4: | ||
3951 | | ins_next | ||
3952 | | | ||
3953 | |5: // Need to resize array part. | ||
3954 | | mov L:CARG1, SAVE_L | ||
3955 | | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE. | ||
3956 | | mov CARG2, TAB:RB | ||
3957 | | mov CARG3d, RDd | ||
3958 | | mov L:RB, L:CARG1 | ||
3959 | | mov SAVE_PC, PC | ||
3960 | | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) | ||
3961 | | mov BASE, L:RB->base | ||
3962 | | movzx RAd, PC_RA // Restore RA. | ||
3963 | | movzx RDd, PC_RD // Restore RD. | ||
3964 | | jmp <1 // Retry. | ||
3965 | | | ||
3966 | |7: // Possible table write barrier for any value. Skip valiswhite check. | ||
3967 | | barrierback TAB:RB, RD | ||
3968 | | jmp <2 | ||
3969 | break; | ||
3970 | |||
3971 | /* -- Calls and vararg handling ----------------------------------------- */ | ||
3972 | |||
3973 | case BC_CALL: case BC_CALLM: | ||
3974 | | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs | ||
3975 | if (op == BC_CALLM) { | ||
3976 | | add NARGS:RDd, MULTRES | ||
3977 | } | ||
3978 | | mov LFUNC:RB, [BASE+RA*8] | ||
3979 | | checkfunc LFUNC:RB, ->vmeta_call_ra | ||
3980 | | lea BASE, [BASE+RA*8+16] | ||
3981 | | ins_call | ||
3982 | break; | ||
3983 | |||
3984 | case BC_CALLMT: | ||
3985 | | ins_AD // RA = base, RD = extra_nargs | ||
3986 | | add NARGS:RDd, MULTRES | ||
3987 | | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op. | ||
3988 | break; | ||
3989 | case BC_CALLT: | ||
3990 | | ins_AD // RA = base, RD = nargs+1 | ||
3991 | | lea RA, [BASE+RA*8+16] | ||
3992 | | mov KBASE, BASE // Use KBASE for move + vmeta_call hint. | ||
3993 | | mov LFUNC:RB, [RA-16] | ||
3994 | | checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call | ||
3995 | |->BC_CALLT_Z: | ||
3996 | | mov PC, [BASE-8] | ||
3997 | | test PCd, FRAME_TYPE | ||
3998 | | jnz >7 | ||
3999 | |1: | ||
4000 | | mov [BASE-16], LFUNC:RB // Copy func+tag down, reloaded below. | ||
4001 | | mov MULTRES, NARGS:RDd | ||
4002 | | sub NARGS:RDd, 1 | ||
4003 | | jz >3 | ||
4004 | |2: // Move args down. | ||
4005 | | mov RB, [RA] | ||
4006 | | add RA, 8 | ||
4007 | | mov [KBASE], RB | ||
4008 | | add KBASE, 8 | ||
4009 | | sub NARGS:RDd, 1 | ||
4010 | | jnz <2 | ||
4011 | | | ||
4012 | | mov LFUNC:RB, [BASE-16] | ||
4013 | |3: | ||
4014 | | cleartp LFUNC:RB | ||
4015 | | mov NARGS:RDd, MULTRES | ||
4016 | | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function? | ||
4017 | | ja >5 | ||
4018 | |4: | ||
4019 | | ins_callt | ||
4020 | | | ||
4021 | |5: // Tailcall to a fast function. | ||
4022 | | test PCd, FRAME_TYPE // Lua frame below? | ||
4023 | | jnz <4 | ||
4024 | | movzx RAd, PC_RA | ||
4025 | | neg RA | ||
4026 | | mov LFUNC:KBASE, [BASE+RA*8-32] // Need to prepare KBASE. | ||
4027 | | cleartp LFUNC:KBASE | ||
4028 | | mov KBASE, LFUNC:KBASE->pc | ||
4029 | | mov KBASE, [KBASE+PC2PROTO(k)] | ||
4030 | | jmp <4 | ||
4031 | | | ||
4032 | |7: // Tailcall from a vararg function. | ||
4033 | | sub PC, FRAME_VARG | ||
4034 | | test PCd, FRAME_TYPEP | ||
4035 | | jnz >8 // Vararg frame below? | ||
4036 | | sub BASE, PC // Need to relocate BASE/KBASE down. | ||
4037 | | mov KBASE, BASE | ||
4038 | | mov PC, [BASE-8] | ||
4039 | | jmp <1 | ||
4040 | |8: | ||
4041 | | add PCd, FRAME_VARG | ||
4042 | | jmp <1 | ||
4043 | break; | ||
4044 | |||
4045 | case BC_ITERC: | ||
4046 | | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1) | ||
4047 | | lea RA, [BASE+RA*8+16] // fb = base+2 | ||
4048 | | mov RB, [RA-32] // Copy state. fb[0] = fb[-4]. | ||
4049 | | mov RC, [RA-24] // Copy control var. fb[1] = fb[-3]. | ||
4050 | | mov [RA], RB | ||
4051 | | mov [RA+8], RC | ||
4052 | | mov LFUNC:RB, [RA-40] // Copy callable. fb[-2] = fb[-5] | ||
4053 | | mov [RA-16], LFUNC:RB | ||
4054 | | mov NARGS:RDd, 2+1 // Handle like a regular 2-arg call. | ||
4055 | | checkfunc LFUNC:RB, ->vmeta_call | ||
4056 | | mov BASE, RA | ||
4057 | | ins_call | ||
4058 | break; | ||
4059 | |||
4060 | case BC_ITERN: | ||
4061 | | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) | ||
4062 | |.if JIT | ||
4063 | | // NYI: add hotloop, record BC_ITERN. | ||
4064 | |.endif | ||
4065 | | mov TAB:RB, [BASE+RA*8-16] | ||
4066 | | cleartp TAB:RB | ||
4067 | | mov RCd, [BASE+RA*8-8] // Get index from control var. | ||
4068 | | mov TMPRd, TAB:RB->asize | ||
4069 | | add PC, 4 | ||
4070 | | mov ITYPE, TAB:RB->array | ||
4071 | |1: // Traverse array part. | ||
4072 | | cmp RCd, TMPRd; jae >5 // Index points after array part? | ||
4073 | | cmp aword [ITYPE+RC*8], LJ_TNIL; je >4 | ||
4074 | |.if not DUALNUM | ||
4075 | | cvtsi2sd xmm0, RCd | ||
4076 | |.endif | ||
4077 | | // Copy array slot to returned value. | ||
4078 | | mov RB, [ITYPE+RC*8] | ||
4079 | | mov [BASE+RA*8+8], RB | ||
4080 | | // Return array index as a numeric key. | ||
4081 | |.if DUALNUM | ||
4082 | | setint ITYPE, RC | ||
4083 | | mov [BASE+RA*8], ITYPE | ||
4084 | |.else | ||
4085 | | movsd qword [BASE+RA*8], xmm0 | ||
4086 | |.endif | ||
4087 | | add RCd, 1 | ||
4088 | | mov [BASE+RA*8-8], RCd // Update control var. | ||
4089 | |2: | ||
4090 | | movzx RDd, PC_RD // Get target from ITERL. | ||
4091 | | branchPC RD | ||
4092 | |3: | ||
4093 | | ins_next | ||
4094 | | | ||
4095 | |4: // Skip holes in array part. | ||
4096 | | add RCd, 1 | ||
4097 | | jmp <1 | ||
4098 | | | ||
4099 | |5: // Traverse hash part. | ||
4100 | | sub RCd, TMPRd | ||
4101 | |6: | ||
4102 | | cmp RCd, TAB:RB->hmask; ja <3 // End of iteration? Branch to ITERL+1. | ||
4103 | | imul ITYPEd, RCd, #NODE | ||
4104 | | add NODE:ITYPE, TAB:RB->node | ||
4105 | | cmp aword NODE:ITYPE->val, LJ_TNIL; je >7 | ||
4106 | | lea TMPRd, [RCd+TMPRd+1] | ||
4107 | | // Copy key and value from hash slot. | ||
4108 | | mov RB, NODE:ITYPE->key | ||
4109 | | mov RC, NODE:ITYPE->val | ||
4110 | | mov [BASE+RA*8], RB | ||
4111 | | mov [BASE+RA*8+8], RC | ||
4112 | | mov [BASE+RA*8-8], TMPRd | ||
4113 | | jmp <2 | ||
4114 | | | ||
4115 | |7: // Skip holes in hash part. | ||
4116 | | add RCd, 1 | ||
4117 | | jmp <6 | ||
4118 | break; | ||
4119 | |||
4120 | case BC_ISNEXT: | ||
4121 | | ins_AD // RA = base, RD = target (points to ITERN) | ||
4122 | | mov CFUNC:RB, [BASE+RA*8-24] | ||
4123 | | checkfunc CFUNC:RB, >5 | ||
4124 | | checktptp [BASE+RA*8-16], LJ_TTAB, >5 | ||
4125 | | cmp aword [BASE+RA*8-8], LJ_TNIL; jne >5 | ||
4126 | | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5 | ||
4127 | | branchPC RD | ||
4128 | | mov64 TMPR, U64x(fffe7fff, 00000000) | ||
4129 | | mov [BASE+RA*8-8], TMPR // Initialize control var. | ||
4130 | |1: | ||
4131 | | ins_next | ||
4132 | |5: // Despecialize bytecode if any of the checks fail. | ||
4133 | | mov PC_OP, BC_JMP | ||
4134 | | branchPC RD | ||
4135 | | mov byte [PC], BC_ITERC | ||
4136 | | jmp <1 | ||
4137 | break; | ||
4138 | |||
4139 | case BC_VARG: | ||
4140 | | ins_ABC // RA = base, RB = nresults+1, RC = numparams | ||
4141 | | lea TMPR, [BASE+RC*8+(16+FRAME_VARG)] | ||
4142 | | lea RA, [BASE+RA*8] | ||
4143 | | sub TMPR, [BASE-8] | ||
4144 | | // Note: TMPR may now be even _above_ BASE if nargs was < numparams. | ||
4145 | | test RB, RB | ||
4146 | | jz >5 // Copy all varargs? | ||
4147 | | lea RB, [RA+RB*8-8] | ||
4148 | | cmp TMPR, BASE // No vararg slots? | ||
4149 | | jnb >2 | ||
4150 | |1: // Copy vararg slots to destination slots. | ||
4151 | | mov RC, [TMPR-16] | ||
4152 | | add TMPR, 8 | ||
4153 | | mov [RA], RC | ||
4154 | | add RA, 8 | ||
4155 | | cmp RA, RB // All destination slots filled? | ||
4156 | | jnb >3 | ||
4157 | | cmp TMPR, BASE // No more vararg slots? | ||
4158 | | jb <1 | ||
4159 | |2: // Fill up remainder with nil. | ||
4160 | | mov aword [RA], LJ_TNIL | ||
4161 | | add RA, 8 | ||
4162 | | cmp RA, RB | ||
4163 | | jb <2 | ||
4164 | |3: | ||
4165 | | ins_next | ||
4166 | | | ||
4167 | |5: // Copy all varargs. | ||
4168 | | mov MULTRES, 1 // MULTRES = 0+1 | ||
4169 | | mov RC, BASE | ||
4170 | | sub RC, TMPR | ||
4171 | | jbe <3 // No vararg slots? | ||
4172 | | mov RBd, RCd | ||
4173 | | shr RBd, 3 | ||
4174 | | add RBd, 1 | ||
4175 | | mov MULTRES, RBd // MULTRES = #varargs+1 | ||
4176 | | mov L:RB, SAVE_L | ||
4177 | | add RC, RA | ||
4178 | | cmp RC, L:RB->maxstack | ||
4179 | | ja >7 // Need to grow stack? | ||
4180 | |6: // Copy all vararg slots. | ||
4181 | | mov RC, [TMPR-16] | ||
4182 | | add TMPR, 8 | ||
4183 | | mov [RA], RC | ||
4184 | | add RA, 8 | ||
4185 | | cmp TMPR, BASE // No more vararg slots? | ||
4186 | | jb <6 | ||
4187 | | jmp <3 | ||
4188 | | | ||
4189 | |7: // Grow stack for varargs. | ||
4190 | | mov L:RB->base, BASE | ||
4191 | | mov L:RB->top, RA | ||
4192 | | mov SAVE_PC, PC | ||
4193 | | sub TMPR, BASE // Need delta, because BASE may change. | ||
4194 | | mov TMP1hi, TMPRd | ||
4195 | | mov CARG2d, MULTRES | ||
4196 | | sub CARG2d, 1 | ||
4197 | | mov CARG1, L:RB | ||
4198 | | call extern lj_state_growstack // (lua_State *L, int n) | ||
4199 | | mov BASE, L:RB->base | ||
4200 | | movsxd TMPR, TMP1hi | ||
4201 | | mov RA, L:RB->top | ||
4202 | | add TMPR, BASE | ||
4203 | | jmp <6 | ||
4204 | break; | ||
4205 | |||
4206 | /* -- Returns ----------------------------------------------------------- */ | ||
4207 | |||
4208 | case BC_RETM: | ||
4209 | | ins_AD // RA = results, RD = extra_nresults | ||
4210 | | add RDd, MULTRES // MULTRES >=1, so RD >=1. | ||
4211 | | // Fall through. Assumes BC_RET follows and ins_AD is a no-op. | ||
4212 | break; | ||
4213 | |||
4214 | case BC_RET: case BC_RET0: case BC_RET1: | ||
4215 | | ins_AD // RA = results, RD = nresults+1 | ||
4216 | if (op != BC_RET0) { | ||
4217 | | shl RAd, 3 | ||
4218 | } | ||
4219 | |1: | ||
4220 | | mov PC, [BASE-8] | ||
4221 | | mov MULTRES, RDd // Save nresults+1. | ||
4222 | | test PCd, FRAME_TYPE // Check frame type marker. | ||
4223 | | jnz >7 // Not returning to a fixarg Lua func? | ||
4224 | switch (op) { | ||
4225 | case BC_RET: | ||
4226 | |->BC_RET_Z: | ||
4227 | | mov KBASE, BASE // Use KBASE for result move. | ||
4228 | | sub RDd, 1 | ||
4229 | | jz >3 | ||
4230 | |2: // Move results down. | ||
4231 | | mov RB, [KBASE+RA] | ||
4232 | | mov [KBASE-16], RB | ||
4233 | | add KBASE, 8 | ||
4234 | | sub RDd, 1 | ||
4235 | | jnz <2 | ||
4236 | |3: | ||
4237 | | mov RDd, MULTRES // Note: MULTRES may be >255. | ||
4238 | | movzx RBd, PC_RB // So cannot compare with RDL! | ||
4239 | |5: | ||
4240 | | cmp RBd, RDd // More results expected? | ||
4241 | | ja >6 | ||
4242 | break; | ||
4243 | case BC_RET1: | ||
4244 | | mov RB, [BASE+RA] | ||
4245 | | mov [BASE-16], RB | ||
4246 | /* fallthrough */ | ||
4247 | case BC_RET0: | ||
4248 | |5: | ||
4249 | | cmp PC_RB, RDL // More results expected? | ||
4250 | | ja >6 | ||
4251 | default: | ||
4252 | break; | ||
4253 | } | ||
4254 | | movzx RAd, PC_RA | ||
4255 | | neg RA | ||
4256 | | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8 | ||
4257 | | mov LFUNC:KBASE, [BASE-16] | ||
4258 | | cleartp LFUNC:KBASE | ||
4259 | | mov KBASE, LFUNC:KBASE->pc | ||
4260 | | mov KBASE, [KBASE+PC2PROTO(k)] | ||
4261 | | ins_next | ||
4262 | | | ||
4263 | |6: // Fill up results with nil. | ||
4264 | if (op == BC_RET) { | ||
4265 | | mov aword [KBASE-16], LJ_TNIL // Note: relies on shifted base. | ||
4266 | | add KBASE, 8 | ||
4267 | } else { | ||
4268 | | mov aword [BASE+RD*8-24], LJ_TNIL | ||
4269 | } | ||
4270 | | add RD, 1 | ||
4271 | | jmp <5 | ||
4272 | | | ||
4273 | |7: // Non-standard return case. | ||
4274 | | lea RB, [PC-FRAME_VARG] | ||
4275 | | test RBd, FRAME_TYPEP | ||
4276 | | jnz ->vm_return | ||
4277 | | // Return from vararg function: relocate BASE down and RA up. | ||
4278 | | sub BASE, RB | ||
4279 | if (op != BC_RET0) { | ||
4280 | | add RA, RB | ||
4281 | } | ||
4282 | | jmp <1 | ||
4283 | break; | ||
4284 | |||
4285 | /* -- Loops and branches ------------------------------------------------ */ | ||
4286 | |||
4287 | |.define FOR_IDX, [RA] | ||
4288 | |.define FOR_STOP, [RA+8] | ||
4289 | |.define FOR_STEP, [RA+16] | ||
4290 | |.define FOR_EXT, [RA+24] | ||
4291 | |||
4292 | case BC_FORL: | ||
4293 | |.if JIT | ||
4294 | | hotloop RBd | ||
4295 | |.endif | ||
4296 | | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op. | ||
4297 | break; | ||
4298 | |||
4299 | case BC_JFORI: | ||
4300 | case BC_JFORL: | ||
4301 | #if !LJ_HASJIT | ||
4302 | break; | ||
4303 | #endif | ||
4304 | case BC_FORI: | ||
4305 | case BC_IFORL: | ||
4306 | vk = (op == BC_IFORL || op == BC_JFORL); | ||
4307 | | ins_AJ // RA = base, RD = target (after end of loop or start of loop) | ||
4308 | | lea RA, [BASE+RA*8] | ||
4309 | if (LJ_DUALNUM) { | ||
4310 | | mov RB, FOR_IDX | ||
4311 | | checkint RB, >9 | ||
4312 | | mov TMPR, FOR_STOP | ||
4313 | if (!vk) { | ||
4314 | | checkint TMPR, ->vmeta_for | ||
4315 | | mov ITYPE, FOR_STEP | ||
4316 | | test ITYPEd, ITYPEd; js >5 | ||
4317 | | sar ITYPE, 47; | ||
4318 | | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for | ||
4319 | } else { | ||
4320 | #ifdef LUA_USE_ASSERT | ||
4321 | | checkinttp FOR_STOP, ->assert_bad_for_arg_type | ||
4322 | | checkinttp FOR_STEP, ->assert_bad_for_arg_type | ||
4323 | #endif | ||
4324 | | mov ITYPE, FOR_STEP | ||
4325 | | test ITYPEd, ITYPEd; js >5 | ||
4326 | | add RBd, ITYPEd; jo >1 | ||
4327 | | setint RB | ||
4328 | | mov FOR_IDX, RB | ||
4329 | } | ||
4330 | | cmp RBd, TMPRd | ||
4331 | | mov FOR_EXT, RB | ||
4332 | if (op == BC_FORI) { | ||
4333 | | jle >7 | ||
4334 | |1: | ||
4335 | |6: | ||
4336 | | branchPC RD | ||
4337 | } else if (op == BC_JFORI) { | ||
4338 | | branchPC RD | ||
4339 | | movzx RDd, PC_RD | ||
4340 | | jle =>BC_JLOOP | ||
4341 | |1: | ||
4342 | |6: | ||
4343 | } else if (op == BC_IFORL) { | ||
4344 | | jg >7 | ||
4345 | |6: | ||
4346 | | branchPC RD | ||
4347 | |1: | ||
4348 | } else { | ||
4349 | | jle =>BC_JLOOP | ||
4350 | |1: | ||
4351 | |6: | ||
4352 | } | ||
4353 | |7: | ||
4354 | | ins_next | ||
4355 | | | ||
4356 | |5: // Invert check for negative step. | ||
4357 | if (!vk) { | ||
4358 | | sar ITYPE, 47; | ||
4359 | | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for | ||
4360 | } else { | ||
4361 | | add RBd, ITYPEd; jo <1 | ||
4362 | | setint RB | ||
4363 | | mov FOR_IDX, RB | ||
4364 | } | ||
4365 | | cmp RBd, TMPRd | ||
4366 | | mov FOR_EXT, RB | ||
4367 | if (op == BC_FORI) { | ||
4368 | | jge <7 | ||
4369 | } else if (op == BC_JFORI) { | ||
4370 | | branchPC RD | ||
4371 | | movzx RDd, PC_RD | ||
4372 | | jge =>BC_JLOOP | ||
4373 | } else if (op == BC_IFORL) { | ||
4374 | | jl <7 | ||
4375 | } else { | ||
4376 | | jge =>BC_JLOOP | ||
4377 | } | ||
4378 | | jmp <6 | ||
4379 | |9: // Fallback to FP variant. | ||
4380 | if (!vk) { | ||
4381 | | jae ->vmeta_for | ||
4382 | } | ||
4383 | } else if (!vk) { | ||
4384 | | checknumtp FOR_IDX, ->vmeta_for | ||
4385 | } | ||
4386 | if (!vk) { | ||
4387 | | checknumtp FOR_STOP, ->vmeta_for | ||
4388 | } else { | ||
4389 | #ifdef LUA_USE_ASSERT | ||
4390 | | checknumtp FOR_STOP, ->assert_bad_for_arg_type | ||
4391 | | checknumtp FOR_STEP, ->assert_bad_for_arg_type | ||
4392 | #endif | ||
4393 | } | ||
4394 | | mov RB, FOR_STEP | ||
4395 | if (!vk) { | ||
4396 | | checknum RB, ->vmeta_for | ||
4397 | } | ||
4398 | | movsd xmm0, qword FOR_IDX | ||
4399 | | movsd xmm1, qword FOR_STOP | ||
4400 | if (vk) { | ||
4401 | | addsd xmm0, qword FOR_STEP | ||
4402 | | movsd qword FOR_IDX, xmm0 | ||
4403 | | test RB, RB; js >3 | ||
4404 | } else { | ||
4405 | | jl >3 | ||
4406 | } | ||
4407 | | ucomisd xmm1, xmm0 | ||
4408 | |1: | ||
4409 | | movsd qword FOR_EXT, xmm0 | ||
4410 | if (op == BC_FORI) { | ||
4411 | |.if DUALNUM | ||
4412 | | jnb <7 | ||
4413 | |.else | ||
4414 | | jnb >2 | ||
4415 | | branchPC RD | ||
4416 | |.endif | ||
4417 | } else if (op == BC_JFORI) { | ||
4418 | | branchPC RD | ||
4419 | | movzx RDd, PC_RD | ||
4420 | | jnb =>BC_JLOOP | ||
4421 | } else if (op == BC_IFORL) { | ||
4422 | |.if DUALNUM | ||
4423 | | jb <7 | ||
4424 | |.else | ||
4425 | | jb >2 | ||
4426 | | branchPC RD | ||
4427 | |.endif | ||
4428 | } else { | ||
4429 | | jnb =>BC_JLOOP | ||
4430 | } | ||
4431 | |.if DUALNUM | ||
4432 | | jmp <6 | ||
4433 | |.else | ||
4434 | |2: | ||
4435 | | ins_next | ||
4436 | |.endif | ||
4437 | | | ||
4438 | |3: // Invert comparison if step is negative. | ||
4439 | | ucomisd xmm0, xmm1 | ||
4440 | | jmp <1 | ||
4441 | break; | ||
4442 | |||
4443 | case BC_ITERL: | ||
4444 | |.if JIT | ||
4445 | | hotloop RBd | ||
4446 | |.endif | ||
4447 | | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op. | ||
4448 | break; | ||
4449 | |||
4450 | case BC_JITERL: | ||
4451 | #if !LJ_HASJIT | ||
4452 | break; | ||
4453 | #endif | ||
4454 | case BC_IITERL: | ||
4455 | | ins_AJ // RA = base, RD = target | ||
4456 | | lea RA, [BASE+RA*8] | ||
4457 | | mov RB, [RA] | ||
4458 | | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil. | ||
4459 | if (op == BC_JITERL) { | ||
4460 | | mov [RA-8], RB | ||
4461 | | jmp =>BC_JLOOP | ||
4462 | } else { | ||
4463 | | branchPC RD // Otherwise save control var + branch. | ||
4464 | | mov [RA-8], RB | ||
4465 | } | ||
4466 | |1: | ||
4467 | | ins_next | ||
4468 | break; | ||
4469 | |||
4470 | case BC_LOOP: | ||
4471 | | ins_A // RA = base, RD = target (loop extent) | ||
4472 | | // Note: RA/RD is only used by trace recorder to determine scope/extent | ||
4473 | | // This opcode does NOT jump, it's only purpose is to detect a hot loop. | ||
4474 | |.if JIT | ||
4475 | | hotloop RBd | ||
4476 | |.endif | ||
4477 | | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. | ||
4478 | break; | ||
4479 | |||
4480 | case BC_ILOOP: | ||
4481 | | ins_A // RA = base, RD = target (loop extent) | ||
4482 | | ins_next | ||
4483 | break; | ||
4484 | |||
4485 | case BC_JLOOP: | ||
4486 | |.if JIT | ||
4487 | | ins_AD // RA = base (ignored), RD = traceno | ||
4488 | | mov RA, [DISPATCH+DISPATCH_J(trace)] | ||
4489 | | mov TRACE:RD, [RA+RD*8] | ||
4490 | | mov RD, TRACE:RD->mcode | ||
4491 | | mov L:RB, SAVE_L | ||
4492 | | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE | ||
4493 | | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB | ||
4494 | | // Save additional callee-save registers only used in compiled code. | ||
4495 | |.if X64WIN | ||
4496 | | mov CSAVE_4, r12 | ||
4497 | | mov CSAVE_3, r13 | ||
4498 | | mov CSAVE_2, r14 | ||
4499 | | mov CSAVE_1, r15 | ||
4500 | | mov RA, rsp | ||
4501 | | sub rsp, 10*16+4*8 | ||
4502 | | movdqa [RA-1*16], xmm6 | ||
4503 | | movdqa [RA-2*16], xmm7 | ||
4504 | | movdqa [RA-3*16], xmm8 | ||
4505 | | movdqa [RA-4*16], xmm9 | ||
4506 | | movdqa [RA-5*16], xmm10 | ||
4507 | | movdqa [RA-6*16], xmm11 | ||
4508 | | movdqa [RA-7*16], xmm12 | ||
4509 | | movdqa [RA-8*16], xmm13 | ||
4510 | | movdqa [RA-9*16], xmm14 | ||
4511 | | movdqa [RA-10*16], xmm15 | ||
4512 | |.else | ||
4513 | | sub rsp, 16 | ||
4514 | | mov [rsp+16], r12 | ||
4515 | | mov [rsp+8], r13 | ||
4516 | |.endif | ||
4517 | | jmp RD | ||
4518 | |.endif | ||
4519 | break; | ||
4520 | |||
4521 | case BC_JMP: | ||
4522 | | ins_AJ // RA = unused, RD = target | ||
4523 | | branchPC RD | ||
4524 | | ins_next | ||
4525 | break; | ||
4526 | |||
4527 | /* -- Function headers -------------------------------------------------- */ | ||
4528 | |||
4529 | /* | ||
4530 | ** Reminder: A function may be called with func/args above L->maxstack, | ||
4531 | ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot, | ||
4532 | ** too. This means all FUNC* ops (including fast functions) must check | ||
4533 | ** for stack overflow _before_ adding more slots! | ||
4534 | */ | ||
4535 | |||
4536 | case BC_FUNCF: | ||
4537 | |.if JIT | ||
4538 | | hotcall RBd | ||
4539 | |.endif | ||
4540 | case BC_FUNCV: /* NYI: compiled vararg functions. */ | ||
4541 | | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op. | ||
4542 | break; | ||
4543 | |||
4544 | case BC_JFUNCF: | ||
4545 | #if !LJ_HASJIT | ||
4546 | break; | ||
4547 | #endif | ||
4548 | case BC_IFUNCF: | ||
4549 | | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 | ||
4550 | | mov KBASE, [PC-4+PC2PROTO(k)] | ||
4551 | | mov L:RB, SAVE_L | ||
4552 | | lea RA, [BASE+RA*8] // Top of frame. | ||
4553 | | cmp RA, L:RB->maxstack | ||
4554 | | ja ->vm_growstack_f | ||
4555 | | movzx RAd, byte [PC-4+PC2PROTO(numparams)] | ||
4556 | | cmp NARGS:RDd, RAd // Check for missing parameters. | ||
4557 | | jbe >3 | ||
4558 | |2: | ||
4559 | if (op == BC_JFUNCF) { | ||
4560 | | movzx RDd, PC_RD | ||
4561 | | jmp =>BC_JLOOP | ||
4562 | } else { | ||
4563 | | ins_next | ||
4564 | } | ||
4565 | | | ||
4566 | |3: // Clear missing parameters. | ||
4567 | | mov aword [BASE+NARGS:RD*8-8], LJ_TNIL | ||
4568 | | add NARGS:RDd, 1 | ||
4569 | | cmp NARGS:RDd, RAd | ||
4570 | | jbe <3 | ||
4571 | | jmp <2 | ||
4572 | break; | ||
4573 | |||
4574 | case BC_JFUNCV: | ||
4575 | #if !LJ_HASJIT | ||
4576 | break; | ||
4577 | #endif | ||
4578 | | int3 // NYI: compiled vararg functions | ||
4579 | break; /* NYI: compiled vararg functions. */ | ||
4580 | |||
4581 | case BC_IFUNCV: | ||
4582 | | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 | ||
4583 | | lea RBd, [NARGS:RD*8+FRAME_VARG+8] | ||
4584 | | lea RD, [BASE+NARGS:RD*8+8] | ||
4585 | | mov LFUNC:KBASE, [BASE-16] | ||
4586 | | mov [RD-8], RB // Store delta + FRAME_VARG. | ||
4587 | | mov [RD-16], LFUNC:KBASE // Store copy of LFUNC. | ||
4588 | | mov L:RB, SAVE_L | ||
4589 | | lea RA, [RD+RA*8] | ||
4590 | | cmp RA, L:RB->maxstack | ||
4591 | | ja ->vm_growstack_v // Need to grow stack. | ||
4592 | | mov RA, BASE | ||
4593 | | mov BASE, RD | ||
4594 | | movzx RBd, byte [PC-4+PC2PROTO(numparams)] | ||
4595 | | test RBd, RBd | ||
4596 | | jz >2 | ||
4597 | | add RA, 8 | ||
4598 | |1: // Copy fixarg slots up to new frame. | ||
4599 | | add RA, 8 | ||
4600 | | cmp RA, BASE | ||
4601 | | jnb >3 // Less args than parameters? | ||
4602 | | mov KBASE, [RA-16] | ||
4603 | | mov [RD], KBASE | ||
4604 | | add RD, 8 | ||
4605 | | mov aword [RA-16], LJ_TNIL // Clear old fixarg slot (help the GC). | ||
4606 | | sub RBd, 1 | ||
4607 | | jnz <1 | ||
4608 | |2: | ||
4609 | if (op == BC_JFUNCV) { | ||
4610 | | movzx RDd, PC_RD | ||
4611 | | jmp =>BC_JLOOP | ||
4612 | } else { | ||
4613 | | mov KBASE, [PC-4+PC2PROTO(k)] | ||
4614 | | ins_next | ||
4615 | } | ||
4616 | | | ||
4617 | |3: // Clear missing parameters. | ||
4618 | | mov aword [RD], LJ_TNIL | ||
4619 | | add RD, 8 | ||
4620 | | sub RBd, 1 | ||
4621 | | jnz <3 | ||
4622 | | jmp <2 | ||
4623 | break; | ||
4624 | |||
4625 | case BC_FUNCC: | ||
4626 | case BC_FUNCCW: | ||
4627 | | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1 | ||
4628 | | mov CFUNC:RB, [BASE-16] | ||
4629 | | cleartp CFUNC:RB | ||
4630 | | mov KBASE, CFUNC:RB->f | ||
4631 | | mov L:RB, SAVE_L | ||
4632 | | lea RD, [BASE+NARGS:RD*8-8] | ||
4633 | | mov L:RB->base, BASE | ||
4634 | | lea RA, [RD+8*LUA_MINSTACK] | ||
4635 | | cmp RA, L:RB->maxstack | ||
4636 | | mov L:RB->top, RD | ||
4637 | if (op == BC_FUNCC) { | ||
4638 | | mov CARG1, L:RB // Caveat: CARG1 may be RA. | ||
4639 | } else { | ||
4640 | | mov CARG2, KBASE | ||
4641 | | mov CARG1, L:RB // Caveat: CARG1 may be RA. | ||
4642 | } | ||
4643 | | ja ->vm_growstack_c // Need to grow stack. | ||
4644 | | set_vmstate C | ||
4645 | if (op == BC_FUNCC) { | ||
4646 | | call KBASE // (lua_State *L) | ||
4647 | } else { | ||
4648 | | // (lua_State *L, lua_CFunction f) | ||
4649 | | call aword [DISPATCH+DISPATCH_GL(wrapf)] | ||
4650 | } | ||
4651 | | // nresults returned in eax (RD). | ||
4652 | | mov BASE, L:RB->base | ||
4653 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | ||
4654 | | set_vmstate INTERP | ||
4655 | | lea RA, [BASE+RD*8] | ||
4656 | | neg RA | ||
4657 | | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 | ||
4658 | | mov PC, [BASE-8] // Fetch PC of caller. | ||
4659 | | jmp ->vm_returnc | ||
4660 | break; | ||
4661 | |||
4662 | /* ---------------------------------------------------------------------- */ | ||
4663 | |||
4664 | default: | ||
4665 | fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); | ||
4666 | exit(2); | ||
4667 | break; | ||
4668 | } | ||
4669 | } | ||
4670 | |||
4671 | static int build_backend(BuildCtx *ctx) | ||
4672 | { | ||
4673 | int op; | ||
4674 | dasm_growpc(Dst, BC__MAX); | ||
4675 | build_subroutines(ctx); | ||
4676 | |.code_op | ||
4677 | for (op = 0; op < BC__MAX; op++) | ||
4678 | build_ins(ctx, (BCOp)op, op); | ||
4679 | return BC__MAX; | ||
4680 | } | ||
4681 | |||
4682 | /* Emit pseudo frame-info for all assembler functions. */ | ||
4683 | static void emit_asm_debug(BuildCtx *ctx) | ||
4684 | { | ||
4685 | int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); | ||
4686 | switch (ctx->mode) { | ||
4687 | case BUILD_elfasm: | ||
4688 | fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); | ||
4689 | fprintf(ctx->fp, | ||
4690 | ".Lframe0:\n" | ||
4691 | "\t.long .LECIE0-.LSCIE0\n" | ||
4692 | ".LSCIE0:\n" | ||
4693 | "\t.long 0xffffffff\n" | ||
4694 | "\t.byte 0x1\n" | ||
4695 | "\t.string \"\"\n" | ||
4696 | "\t.uleb128 0x1\n" | ||
4697 | "\t.sleb128 -8\n" | ||
4698 | "\t.byte 0x10\n" | ||
4699 | "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n" | ||
4700 | "\t.byte 0x80+0x10\n\t.uleb128 0x1\n" | ||
4701 | "\t.align 8\n" | ||
4702 | ".LECIE0:\n\n"); | ||
4703 | fprintf(ctx->fp, | ||
4704 | ".LSFDE0:\n" | ||
4705 | "\t.long .LEFDE0-.LASFDE0\n" | ||
4706 | ".LASFDE0:\n" | ||
4707 | "\t.long .Lframe0\n" | ||
4708 | "\t.quad .Lbegin\n" | ||
4709 | "\t.quad %d\n" | ||
4710 | "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ | ||
4711 | "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ | ||
4712 | "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ | ||
4713 | "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */ | ||
4714 | "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */ | ||
4715 | #if LJ_NO_UNWIND | ||
4716 | "\t.byte 0x8d\n\t.uleb128 0x6\n" /* offset r13 */ | ||
4717 | "\t.byte 0x8c\n\t.uleb128 0x7\n" /* offset r12 */ | ||
4718 | #endif | ||
4719 | "\t.align 8\n" | ||
4720 | ".LEFDE0:\n\n", fcofs, CFRAME_SIZE); | ||
4721 | #if LJ_HASFFI | ||
4722 | fprintf(ctx->fp, | ||
4723 | ".LSFDE1:\n" | ||
4724 | "\t.long .LEFDE1-.LASFDE1\n" | ||
4725 | ".LASFDE1:\n" | ||
4726 | "\t.long .Lframe0\n" | ||
4727 | "\t.quad lj_vm_ffi_call\n" | ||
4728 | "\t.quad %d\n" | ||
4729 | "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ | ||
4730 | "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ | ||
4731 | "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */ | ||
4732 | "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ | ||
4733 | "\t.align 8\n" | ||
4734 | ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); | ||
4735 | #endif | ||
4736 | #if !LJ_NO_UNWIND | ||
4737 | #if (defined(__sun__) && defined(__svr4__)) | ||
4738 | fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n"); | ||
4739 | #else | ||
4740 | fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); | ||
4741 | #endif | ||
4742 | fprintf(ctx->fp, | ||
4743 | ".Lframe1:\n" | ||
4744 | "\t.long .LECIE1-.LSCIE1\n" | ||
4745 | ".LSCIE1:\n" | ||
4746 | "\t.long 0\n" | ||
4747 | "\t.byte 0x1\n" | ||
4748 | "\t.string \"zPR\"\n" | ||
4749 | "\t.uleb128 0x1\n" | ||
4750 | "\t.sleb128 -8\n" | ||
4751 | "\t.byte 0x10\n" | ||
4752 | "\t.uleb128 6\n" /* augmentation length */ | ||
4753 | "\t.byte 0x1b\n" /* pcrel|sdata4 */ | ||
4754 | "\t.long lj_err_unwind_dwarf-.\n" | ||
4755 | "\t.byte 0x1b\n" /* pcrel|sdata4 */ | ||
4756 | "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n" | ||
4757 | "\t.byte 0x80+0x10\n\t.uleb128 0x1\n" | ||
4758 | "\t.align 8\n" | ||
4759 | ".LECIE1:\n\n"); | ||
4760 | fprintf(ctx->fp, | ||
4761 | ".LSFDE2:\n" | ||
4762 | "\t.long .LEFDE2-.LASFDE2\n" | ||
4763 | ".LASFDE2:\n" | ||
4764 | "\t.long .LASFDE2-.Lframe1\n" | ||
4765 | "\t.long .Lbegin-.\n" | ||
4766 | "\t.long %d\n" | ||
4767 | "\t.uleb128 0\n" /* augmentation length */ | ||
4768 | "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ | ||
4769 | "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ | ||
4770 | "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ | ||
4771 | "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */ | ||
4772 | "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */ | ||
4773 | "\t.align 8\n" | ||
4774 | ".LEFDE2:\n\n", fcofs, CFRAME_SIZE); | ||
4775 | #if LJ_HASFFI | ||
4776 | fprintf(ctx->fp, | ||
4777 | ".Lframe2:\n" | ||
4778 | "\t.long .LECIE2-.LSCIE2\n" | ||
4779 | ".LSCIE2:\n" | ||
4780 | "\t.long 0\n" | ||
4781 | "\t.byte 0x1\n" | ||
4782 | "\t.string \"zR\"\n" | ||
4783 | "\t.uleb128 0x1\n" | ||
4784 | "\t.sleb128 -8\n" | ||
4785 | "\t.byte 0x10\n" | ||
4786 | "\t.uleb128 1\n" /* augmentation length */ | ||
4787 | "\t.byte 0x1b\n" /* pcrel|sdata4 */ | ||
4788 | "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n" | ||
4789 | "\t.byte 0x80+0x10\n\t.uleb128 0x1\n" | ||
4790 | "\t.align 8\n" | ||
4791 | ".LECIE2:\n\n"); | ||
4792 | fprintf(ctx->fp, | ||
4793 | ".LSFDE3:\n" | ||
4794 | "\t.long .LEFDE3-.LASFDE3\n" | ||
4795 | ".LASFDE3:\n" | ||
4796 | "\t.long .LASFDE3-.Lframe2\n" | ||
4797 | "\t.long lj_vm_ffi_call-.\n" | ||
4798 | "\t.long %d\n" | ||
4799 | "\t.uleb128 0\n" /* augmentation length */ | ||
4800 | "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ | ||
4801 | "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ | ||
4802 | "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */ | ||
4803 | "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ | ||
4804 | "\t.align 8\n" | ||
4805 | ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); | ||
4806 | #endif | ||
4807 | #endif | ||
4808 | break; | ||
4809 | #if !LJ_NO_UNWIND | ||
4810 | /* Mental note: never let Apple design an assembler. | ||
4811 | ** Or a linker. Or a plastic case. But I digress. | ||
4812 | */ | ||
4813 | case BUILD_machasm: { | ||
4814 | #if LJ_HASFFI | ||
4815 | int fcsize = 0; | ||
4816 | #endif | ||
4817 | int i; | ||
4818 | fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n"); | ||
4819 | fprintf(ctx->fp, | ||
4820 | "EH_frame1:\n" | ||
4821 | "\t.set L$set$x,LECIEX-LSCIEX\n" | ||
4822 | "\t.long L$set$x\n" | ||
4823 | "LSCIEX:\n" | ||
4824 | "\t.long 0\n" | ||
4825 | "\t.byte 0x1\n" | ||
4826 | "\t.ascii \"zPR\\0\"\n" | ||
4827 | "\t.byte 0x1\n" | ||
4828 | "\t.byte 128-8\n" | ||
4829 | "\t.byte 0x10\n" | ||
4830 | "\t.byte 6\n" /* augmentation length */ | ||
4831 | "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */ | ||
4832 | "\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n" | ||
4833 | "\t.byte 0x1b\n" /* pcrel|sdata4 */ | ||
4834 | "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n" | ||
4835 | "\t.byte 0x80+0x10\n\t.byte 0x1\n" | ||
4836 | "\t.align 3\n" | ||
4837 | "LECIEX:\n\n"); | ||
4838 | for (i = 0; i < ctx->nsym; i++) { | ||
4839 | const char *name = ctx->sym[i].name; | ||
4840 | int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs; | ||
4841 | if (size == 0) continue; | ||
4842 | #if LJ_HASFFI | ||
4843 | if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; } | ||
4844 | #endif | ||
4845 | fprintf(ctx->fp, | ||
4846 | "%s.eh:\n" | ||
4847 | "LSFDE%d:\n" | ||
4848 | "\t.set L$set$%d,LEFDE%d-LASFDE%d\n" | ||
4849 | "\t.long L$set$%d\n" | ||
4850 | "LASFDE%d:\n" | ||
4851 | "\t.long LASFDE%d-EH_frame1\n" | ||
4852 | "\t.long %s-.\n" | ||
4853 | "\t.long %d\n" | ||
4854 | "\t.byte 0\n" /* augmentation length */ | ||
4855 | "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */ | ||
4856 | "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */ | ||
4857 | "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */ | ||
4858 | "\t.byte 0x8f\n\t.byte 0x4\n" /* offset r15 */ | ||
4859 | "\t.byte 0x8e\n\t.byte 0x5\n" /* offset r14 */ | ||
4860 | "\t.align 3\n" | ||
4861 | "LEFDE%d:\n\n", | ||
4862 | name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i); | ||
4863 | } | ||
4864 | #if LJ_HASFFI | ||
4865 | if (fcsize) { | ||
4866 | fprintf(ctx->fp, | ||
4867 | "EH_frame2:\n" | ||
4868 | "\t.set L$set$y,LECIEY-LSCIEY\n" | ||
4869 | "\t.long L$set$y\n" | ||
4870 | "LSCIEY:\n" | ||
4871 | "\t.long 0\n" | ||
4872 | "\t.byte 0x1\n" | ||
4873 | "\t.ascii \"zR\\0\"\n" | ||
4874 | "\t.byte 0x1\n" | ||
4875 | "\t.byte 128-8\n" | ||
4876 | "\t.byte 0x10\n" | ||
4877 | "\t.byte 1\n" /* augmentation length */ | ||
4878 | "\t.byte 0x1b\n" /* pcrel|sdata4 */ | ||
4879 | "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n" | ||
4880 | "\t.byte 0x80+0x10\n\t.byte 0x1\n" | ||
4881 | "\t.align 3\n" | ||
4882 | "LECIEY:\n\n"); | ||
4883 | fprintf(ctx->fp, | ||
4884 | "_lj_vm_ffi_call.eh:\n" | ||
4885 | "LSFDEY:\n" | ||
4886 | "\t.set L$set$yy,LEFDEY-LASFDEY\n" | ||
4887 | "\t.long L$set$yy\n" | ||
4888 | "LASFDEY:\n" | ||
4889 | "\t.long LASFDEY-EH_frame2\n" | ||
4890 | "\t.long _lj_vm_ffi_call-.\n" | ||
4891 | "\t.long %d\n" | ||
4892 | "\t.byte 0\n" /* augmentation length */ | ||
4893 | "\t.byte 0xe\n\t.byte 16\n" /* def_cfa_offset */ | ||
4894 | "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */ | ||
4895 | "\t.byte 0xd\n\t.byte 0x6\n" /* def_cfa_register rbp */ | ||
4896 | "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */ | ||
4897 | "\t.align 3\n" | ||
4898 | "LEFDEY:\n\n", fcsize); | ||
4899 | } | ||
4900 | #endif | ||
4901 | fprintf(ctx->fp, ".subsections_via_symbols\n"); | ||
4902 | } | ||
4903 | break; | ||
4904 | #endif | ||
4905 | default: /* Difficult for other modes. */ | ||
4906 | break; | ||
4907 | } | ||
4908 | } | ||
4909 | |||
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index 2ccc671f..1965b06b 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc | |||
@@ -18,7 +18,6 @@ | |||
18 | | | 18 | | |
19 | |.if P64 | 19 | |.if P64 |
20 | |.define X64, 1 | 20 | |.define X64, 1 |
21 | |.define SSE, 1 | ||
22 | |.if WIN | 21 | |.if WIN |
23 | |.define X64WIN, 1 | 22 | |.define X64WIN, 1 |
24 | |.endif | 23 | |.endif |
@@ -116,24 +115,74 @@ | |||
116 | |.type NODE, Node | 115 | |.type NODE, Node |
117 | |.type NARGS, int | 116 | |.type NARGS, int |
118 | |.type TRACE, GCtrace | 117 | |.type TRACE, GCtrace |
118 | |.type SBUF, SBuf | ||
119 | | | 119 | | |
120 | |// Stack layout while in interpreter. Must match with lj_frame.h. | 120 | |// Stack layout while in interpreter. Must match with lj_frame.h. |
121 | |//----------------------------------------------------------------------- | 121 | |//----------------------------------------------------------------------- |
122 | |.if not X64 // x86 stack layout. | 122 | |.if not X64 // x86 stack layout. |
123 | | | 123 | | |
124 | |.define CFRAME_SPACE, aword*7 // Delta for esp (see <--). | 124 | |.if WIN |
125 | | | ||
126 | |.define CFRAME_SPACE, aword*9 // Delta for esp (see <--). | ||
125 | |.macro saveregs_ | 127 | |.macro saveregs_ |
126 | | push edi; push esi; push ebx | 128 | | push edi; push esi; push ebx |
129 | | push extern lj_err_unwind_win | ||
130 | | fs; push dword [0] | ||
131 | | fs; mov [0], esp | ||
127 | | sub esp, CFRAME_SPACE | 132 | | sub esp, CFRAME_SPACE |
128 | |.endmacro | 133 | |.endmacro |
129 | |.macro saveregs | 134 | |.macro restoreregs |
130 | | push ebp; saveregs_ | 135 | | add esp, CFRAME_SPACE |
136 | | fs; pop dword [0] | ||
137 | | pop edi // Short for esp += 4. | ||
138 | | pop ebx; pop esi; pop edi; pop ebp | ||
139 | |.endmacro | ||
140 | | | ||
141 | |.else | ||
142 | | | ||
143 | |.define CFRAME_SPACE, aword*7 // Delta for esp (see <--). | ||
144 | |.macro saveregs_ | ||
145 | | push edi; push esi; push ebx | ||
146 | | sub esp, CFRAME_SPACE | ||
131 | |.endmacro | 147 | |.endmacro |
132 | |.macro restoreregs | 148 | |.macro restoreregs |
133 | | add esp, CFRAME_SPACE | 149 | | add esp, CFRAME_SPACE |
134 | | pop ebx; pop esi; pop edi; pop ebp | 150 | | pop ebx; pop esi; pop edi; pop ebp |
135 | |.endmacro | 151 | |.endmacro |
136 | | | 152 | | |
153 | |.endif | ||
154 | | | ||
155 | |.macro saveregs | ||
156 | | push ebp; saveregs_ | ||
157 | |.endmacro | ||
158 | | | ||
159 | |.if WIN | ||
160 | |.define SAVE_ERRF, aword [esp+aword*19] // vm_pcall/vm_cpcall only. | ||
161 | |.define SAVE_NRES, aword [esp+aword*18] | ||
162 | |.define SAVE_CFRAME, aword [esp+aword*17] | ||
163 | |.define SAVE_L, aword [esp+aword*16] | ||
164 | |//----- 16 byte aligned, ^^^ arguments from C caller | ||
165 | |.define SAVE_RET, aword [esp+aword*15] //<-- esp entering interpreter. | ||
166 | |.define SAVE_R4, aword [esp+aword*14] | ||
167 | |.define SAVE_R3, aword [esp+aword*13] | ||
168 | |.define SAVE_R2, aword [esp+aword*12] | ||
169 | |//----- 16 byte aligned | ||
170 | |.define SAVE_R1, aword [esp+aword*11] | ||
171 | |.define SEH_FUNC, aword [esp+aword*10] | ||
172 | |.define SEH_NEXT, aword [esp+aword*9] //<-- esp after register saves. | ||
173 | |.define UNUSED2, aword [esp+aword*8] | ||
174 | |//----- 16 byte aligned | ||
175 | |.define UNUSED1, aword [esp+aword*7] | ||
176 | |.define SAVE_PC, aword [esp+aword*6] | ||
177 | |.define TMP2, aword [esp+aword*5] | ||
178 | |.define TMP1, aword [esp+aword*4] | ||
179 | |//----- 16 byte aligned | ||
180 | |.define ARG4, aword [esp+aword*3] | ||
181 | |.define ARG3, aword [esp+aword*2] | ||
182 | |.define ARG2, aword [esp+aword*1] | ||
183 | |.define ARG1, aword [esp] //<-- esp while in interpreter. | ||
184 | |//----- 16 byte aligned, ^^^ arguments for C callee | ||
185 | |.else | ||
137 | |.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only. | 186 | |.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only. |
138 | |.define SAVE_NRES, aword [esp+aword*14] | 187 | |.define SAVE_NRES, aword [esp+aword*14] |
139 | |.define SAVE_CFRAME, aword [esp+aword*13] | 188 | |.define SAVE_CFRAME, aword [esp+aword*13] |
@@ -154,6 +203,7 @@ | |||
154 | |.define ARG2, aword [esp+aword*1] | 203 | |.define ARG2, aword [esp+aword*1] |
155 | |.define ARG1, aword [esp] //<-- esp while in interpreter. | 204 | |.define ARG1, aword [esp] //<-- esp while in interpreter. |
156 | |//----- 16 byte aligned, ^^^ arguments for C callee | 205 | |//----- 16 byte aligned, ^^^ arguments for C callee |
206 | |.endif | ||
157 | | | 207 | | |
158 | |// FPARGx overlaps ARGx and ARG(x+1) on x86. | 208 | |// FPARGx overlaps ARGx and ARG(x+1) on x86. |
159 | |.define FPARG3, qword [esp+qword*1] | 209 | |.define FPARG3, qword [esp+qword*1] |
@@ -389,7 +439,6 @@ | |||
389 | | fpop | 439 | | fpop |
390 | |.endmacro | 440 | |.endmacro |
391 | | | 441 | | |
392 | |.macro fdup; fld st0; .endmacro | ||
393 | |.macro fpop1; fstp st1; .endmacro | 442 | |.macro fpop1; fstp st1; .endmacro |
394 | | | 443 | | |
395 | |// Synthesize SSE FP constants. | 444 | |// Synthesize SSE FP constants. |
@@ -555,6 +604,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
555 | |.else | 604 | |.else |
556 | | mov eax, FCARG2 // Error return status for vm_pcall. | 605 | | mov eax, FCARG2 // Error return status for vm_pcall. |
557 | | mov esp, FCARG1 | 606 | | mov esp, FCARG1 |
607 | |.if WIN | ||
608 | | lea FCARG1, SEH_NEXT | ||
609 | | fs; mov [0], FCARG1 | ||
610 | |.endif | ||
558 | |.endif | 611 | |.endif |
559 | |->vm_unwind_c_eh: // Landing pad for external unwinder. | 612 | |->vm_unwind_c_eh: // Landing pad for external unwinder. |
560 | | mov L:RB, SAVE_L | 613 | | mov L:RB, SAVE_L |
@@ -578,6 +631,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
578 | |.else | 631 | |.else |
579 | | and FCARG1, CFRAME_RAWMASK | 632 | | and FCARG1, CFRAME_RAWMASK |
580 | | mov esp, FCARG1 | 633 | | mov esp, FCARG1 |
634 | |.if WIN | ||
635 | | lea FCARG1, SEH_NEXT | ||
636 | | fs; mov [0], FCARG1 | ||
637 | |.endif | ||
581 | |.endif | 638 | |.endif |
582 | |->vm_unwind_ff_eh: // Landing pad for external unwinder. | 639 | |->vm_unwind_ff_eh: // Landing pad for external unwinder. |
583 | | mov L:RB, SAVE_L | 640 | | mov L:RB, SAVE_L |
@@ -591,6 +648,19 @@ static void build_subroutines(BuildCtx *ctx) | |||
591 | | set_vmstate INTERP | 648 | | set_vmstate INTERP |
592 | | jmp ->vm_returnc // Increments RD/MULTRES and returns. | 649 | | jmp ->vm_returnc // Increments RD/MULTRES and returns. |
593 | | | 650 | | |
651 | |.if WIN and not X64 | ||
652 | |->vm_rtlunwind@16: // Thin layer around RtlUnwind. | ||
653 | | // (void *cframe, void *excptrec, void *unwinder, int errcode) | ||
654 | | mov [esp], FCARG1 // Return value for RtlUnwind. | ||
655 | | push FCARG2 // Exception record for RtlUnwind. | ||
656 | | push 0 // Ignored by RtlUnwind. | ||
657 | | push dword [FCARG1+CFRAME_OFS_SEH] | ||
658 | | call extern RtlUnwind@16 // Violates ABI (clobbers too much). | ||
659 | | mov FCARG1, eax | ||
660 | | mov FCARG2, [esp+4] // errcode (for vm_unwind_c). | ||
661 | | ret // Jump to unwinder. | ||
662 | |.endif | ||
663 | | | ||
594 | |//----------------------------------------------------------------------- | 664 | |//----------------------------------------------------------------------- |
595 | |//-- Grow stack for calls ----------------------------------------------- | 665 | |//-- Grow stack for calls ----------------------------------------------- |
596 | |//----------------------------------------------------------------------- | 666 | |//----------------------------------------------------------------------- |
@@ -646,17 +716,18 @@ static void build_subroutines(BuildCtx *ctx) | |||
646 | | lea KBASEa, [esp+CFRAME_RESUME] | 716 | | lea KBASEa, [esp+CFRAME_RESUME] |
647 | | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | 717 | | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. |
648 | | add DISPATCH, GG_G2DISP | 718 | | add DISPATCH, GG_G2DISP |
649 | | mov L:RB->cframe, KBASEa | ||
650 | | mov SAVE_PC, RD // Any value outside of bytecode is ok. | 719 | | mov SAVE_PC, RD // Any value outside of bytecode is ok. |
651 | | mov SAVE_CFRAME, RDa | 720 | | mov SAVE_CFRAME, RDa |
652 | |.if X64 | 721 | |.if X64 |
653 | | mov SAVE_NRES, RD | 722 | | mov SAVE_NRES, RD |
654 | | mov SAVE_ERRF, RD | 723 | | mov SAVE_ERRF, RD |
655 | |.endif | 724 | |.endif |
725 | | mov L:RB->cframe, KBASEa | ||
656 | | cmp byte L:RB->status, RDL | 726 | | cmp byte L:RB->status, RDL |
657 | | je >3 // Initial resume (like a call). | 727 | | je >2 // Initial resume (like a call). |
658 | | | 728 | | |
659 | | // Resume after yield (like a return). | 729 | | // Resume after yield (like a return). |
730 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | ||
660 | | set_vmstate INTERP | 731 | | set_vmstate INTERP |
661 | | mov byte L:RB->status, RDL | 732 | | mov byte L:RB->status, RDL |
662 | | mov BASE, L:RB->base | 733 | | mov BASE, L:RB->base |
@@ -696,20 +767,19 @@ static void build_subroutines(BuildCtx *ctx) | |||
696 | | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! | 767 | | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! |
697 | |.endif | 768 | |.endif |
698 | | | 769 | | |
770 | | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | ||
699 | | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. | 771 | | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. |
700 | | mov SAVE_CFRAME, KBASEa | 772 | | mov SAVE_CFRAME, KBASEa |
701 | | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. | 773 | | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. |
774 | | add DISPATCH, GG_G2DISP | ||
702 | |.if X64 | 775 | |.if X64 |
703 | | mov L:RB->cframe, rsp | 776 | | mov L:RB->cframe, rsp |
704 | |.else | 777 | |.else |
705 | | mov L:RB->cframe, esp | 778 | | mov L:RB->cframe, esp |
706 | |.endif | 779 | |.endif |
707 | | | 780 | | |
708 | |2: // Entry point for vm_cpcall below (RA = base, RB = L, PC = ftype). | 781 | |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype). |
709 | | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | 782 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB |
710 | | add DISPATCH, GG_G2DISP | ||
711 | | | ||
712 | |3: // Entry point for vm_resume above (RA = base, RB = L, PC = ftype). | ||
713 | | set_vmstate INTERP | 783 | | set_vmstate INTERP |
714 | | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). | 784 | | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). |
715 | | add PC, RA | 785 | | add PC, RA |
@@ -747,14 +817,17 @@ static void build_subroutines(BuildCtx *ctx) | |||
747 | | | 817 | | |
748 | | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). | 818 | | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). |
749 | | sub KBASE, L:RB->top | 819 | | sub KBASE, L:RB->top |
820 | | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | ||
750 | | mov SAVE_ERRF, 0 // No error function. | 821 | | mov SAVE_ERRF, 0 // No error function. |
751 | | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame. | 822 | | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame. |
823 | | add DISPATCH, GG_G2DISP | ||
752 | | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). | 824 | | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). |
753 | | | 825 | | |
754 | |.if X64 | 826 | |.if X64 |
755 | | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. | 827 | | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. |
756 | | mov SAVE_CFRAME, KBASEa | 828 | | mov SAVE_CFRAME, KBASEa |
757 | | mov L:RB->cframe, rsp | 829 | | mov L:RB->cframe, rsp |
830 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | ||
758 | | | 831 | | |
759 | | call CARG4 // (lua_State *L, lua_CFunction func, void *ud) | 832 | | call CARG4 // (lua_State *L, lua_CFunction func, void *ud) |
760 | |.else | 833 | |.else |
@@ -765,6 +838,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
765 | | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. | 838 | | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. |
766 | | mov SAVE_CFRAME, KBASE | 839 | | mov SAVE_CFRAME, KBASE |
767 | | mov L:RB->cframe, esp | 840 | | mov L:RB->cframe, esp |
841 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | ||
768 | | | 842 | | |
769 | | call BASE // (lua_State *L, lua_CFunction func, void *ud) | 843 | | call BASE // (lua_State *L, lua_CFunction func, void *ud) |
770 | |.endif | 844 | |.endif |
@@ -872,13 +946,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
872 | |.if DUALNUM | 946 | |.if DUALNUM |
873 | | mov TMP2, LJ_TISNUM | 947 | | mov TMP2, LJ_TISNUM |
874 | | mov TMP1, RC | 948 | | mov TMP1, RC |
875 | |.elif SSE | 949 | |.else |
876 | | cvtsi2sd xmm0, RC | 950 | | cvtsi2sd xmm0, RC |
877 | | movsd TMPQ, xmm0 | 951 | | movsd TMPQ, xmm0 |
878 | |.else | ||
879 | | mov ARG4, RC | ||
880 | | fild ARG4 | ||
881 | | fstp TMPQ | ||
882 | |.endif | 952 | |.endif |
883 | | lea RCa, TMPQ // Store temp. TValue in TMPQ. | 953 | | lea RCa, TMPQ // Store temp. TValue in TMPQ. |
884 | | jmp >1 | 954 | | jmp >1 |
@@ -932,6 +1002,19 @@ static void build_subroutines(BuildCtx *ctx) | |||
932 | | mov NARGS:RD, 2+1 // 2 args for func(t, k). | 1002 | | mov NARGS:RD, 2+1 // 2 args for func(t, k). |
933 | | jmp ->vm_call_dispatch_f | 1003 | | jmp ->vm_call_dispatch_f |
934 | | | 1004 | | |
1005 | |->vmeta_tgetr: | ||
1006 | | mov FCARG1, TAB:RB | ||
1007 | | mov RB, BASE // Save BASE. | ||
1008 | | mov FCARG2, RC // Caveat: FCARG2 == BASE | ||
1009 | | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key) | ||
1010 | | // cTValue * or NULL returned in eax (RC). | ||
1011 | | movzx RA, PC_RA | ||
1012 | | mov BASE, RB // Restore BASE. | ||
1013 | | test RC, RC | ||
1014 | | jnz ->BC_TGETR_Z | ||
1015 | | mov dword [BASE+RA*8+4], LJ_TNIL | ||
1016 | | jmp ->BC_TGETR2_Z | ||
1017 | | | ||
935 | |//----------------------------------------------------------------------- | 1018 | |//----------------------------------------------------------------------- |
936 | | | 1019 | | |
937 | |->vmeta_tsets: | 1020 | |->vmeta_tsets: |
@@ -951,13 +1034,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
951 | |.if DUALNUM | 1034 | |.if DUALNUM |
952 | | mov TMP2, LJ_TISNUM | 1035 | | mov TMP2, LJ_TISNUM |
953 | | mov TMP1, RC | 1036 | | mov TMP1, RC |
954 | |.elif SSE | 1037 | |.else |
955 | | cvtsi2sd xmm0, RC | 1038 | | cvtsi2sd xmm0, RC |
956 | | movsd TMPQ, xmm0 | 1039 | | movsd TMPQ, xmm0 |
957 | |.else | ||
958 | | mov ARG4, RC | ||
959 | | fild ARG4 | ||
960 | | fstp TMPQ | ||
961 | |.endif | 1040 | |.endif |
962 | | lea RCa, TMPQ // Store temp. TValue in TMPQ. | 1041 | | lea RCa, TMPQ // Store temp. TValue in TMPQ. |
963 | | jmp >1 | 1042 | | jmp >1 |
@@ -1023,6 +1102,33 @@ static void build_subroutines(BuildCtx *ctx) | |||
1023 | | mov NARGS:RD, 3+1 // 3 args for func(t, k, v). | 1102 | | mov NARGS:RD, 3+1 // 3 args for func(t, k, v). |
1024 | | jmp ->vm_call_dispatch_f | 1103 | | jmp ->vm_call_dispatch_f |
1025 | | | 1104 | | |
1105 | |->vmeta_tsetr: | ||
1106 | |.if X64WIN | ||
1107 | | mov L:CARG1d, SAVE_L | ||
1108 | | mov CARG3d, RC | ||
1109 | | mov L:CARG1d->base, BASE | ||
1110 | | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE. | ||
1111 | |.elif X64 | ||
1112 | | mov L:CARG1d, SAVE_L | ||
1113 | | mov CARG2d, TAB:RB | ||
1114 | | mov L:CARG1d->base, BASE | ||
1115 | | mov RB, BASE // Save BASE. | ||
1116 | | mov CARG3d, RC // Caveat: CARG3d == BASE. | ||
1117 | |.else | ||
1118 | | mov L:RA, SAVE_L | ||
1119 | | mov ARG2, TAB:RB | ||
1120 | | mov RB, BASE // Save BASE. | ||
1121 | | mov ARG3, RC | ||
1122 | | mov ARG1, L:RA | ||
1123 | | mov L:RA->base, BASE | ||
1124 | |.endif | ||
1125 | | mov SAVE_PC, PC | ||
1126 | | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) | ||
1127 | | // TValue * returned in eax (RC). | ||
1128 | | movzx RA, PC_RA | ||
1129 | | mov BASE, RB // Restore BASE. | ||
1130 | | jmp ->BC_TSETR_Z | ||
1131 | | | ||
1026 | |//-- Comparison metamethods --------------------------------------------- | 1132 | |//-- Comparison metamethods --------------------------------------------- |
1027 | | | 1133 | | |
1028 | |->vmeta_comp: | 1134 | |->vmeta_comp: |
@@ -1117,6 +1223,26 @@ static void build_subroutines(BuildCtx *ctx) | |||
1117 | | jmp <3 | 1223 | | jmp <3 |
1118 | |.endif | 1224 | |.endif |
1119 | | | 1225 | | |
1226 | |->vmeta_istype: | ||
1227 | |.if X64 | ||
1228 | | mov L:RB, SAVE_L | ||
1229 | | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE. | ||
1230 | | mov CARG2d, RA | ||
1231 | | movzx CARG3d, PC_RD | ||
1232 | | mov L:CARG1d, L:RB | ||
1233 | |.else | ||
1234 | | movzx RD, PC_RD | ||
1235 | | mov ARG2, RA | ||
1236 | | mov L:RB, SAVE_L | ||
1237 | | mov ARG3, RD | ||
1238 | | mov ARG1, L:RB | ||
1239 | | mov L:RB->base, BASE | ||
1240 | |.endif | ||
1241 | | mov SAVE_PC, PC | ||
1242 | | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) | ||
1243 | | mov BASE, L:RB->base | ||
1244 | | jmp <6 | ||
1245 | | | ||
1120 | |//-- Arithmetic metamethods --------------------------------------------- | 1246 | |//-- Arithmetic metamethods --------------------------------------------- |
1121 | | | 1247 | | |
1122 | |->vmeta_arith_vno: | 1248 | |->vmeta_arith_vno: |
@@ -1289,19 +1415,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
1289 | | cmp NARGS:RD, 2+1; jb ->fff_fallback | 1415 | | cmp NARGS:RD, 2+1; jb ->fff_fallback |
1290 | |.endmacro | 1416 | |.endmacro |
1291 | | | 1417 | | |
1292 | |.macro .ffunc_n, name | ||
1293 | | .ffunc_1 name | ||
1294 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | ||
1295 | | fld qword [BASE] | ||
1296 | |.endmacro | ||
1297 | | | ||
1298 | |.macro .ffunc_n, name, op | ||
1299 | | .ffunc_1 name | ||
1300 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | ||
1301 | | op | ||
1302 | | fld qword [BASE] | ||
1303 | |.endmacro | ||
1304 | | | ||
1305 | |.macro .ffunc_nsse, name, op | 1418 | |.macro .ffunc_nsse, name, op |
1306 | | .ffunc_1 name | 1419 | | .ffunc_1 name |
1307 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | 1420 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
@@ -1312,14 +1425,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
1312 | | .ffunc_nsse name, movsd | 1425 | | .ffunc_nsse name, movsd |
1313 | |.endmacro | 1426 | |.endmacro |
1314 | | | 1427 | | |
1315 | |.macro .ffunc_nn, name | ||
1316 | | .ffunc_2 name | ||
1317 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | ||
1318 | | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback | ||
1319 | | fld qword [BASE] | ||
1320 | | fld qword [BASE+8] | ||
1321 | |.endmacro | ||
1322 | | | ||
1323 | |.macro .ffunc_nnsse, name | 1428 | |.macro .ffunc_nnsse, name |
1324 | | .ffunc_2 name | 1429 | | .ffunc_2 name |
1325 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | 1430 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
@@ -1525,11 +1630,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
1525 | |.else | 1630 | |.else |
1526 | | jae ->fff_fallback | 1631 | | jae ->fff_fallback |
1527 | |.endif | 1632 | |.endif |
1528 | |.if SSE | ||
1529 | | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 | 1633 | | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 |
1530 | |.else | ||
1531 | | fld qword [BASE]; jmp ->fff_resn | ||
1532 | |.endif | ||
1533 | | | 1634 | | |
1534 | |.ffunc_1 tostring | 1635 | |.ffunc_1 tostring |
1535 | | // Only handles the string or number case inline. | 1636 | | // Only handles the string or number case inline. |
@@ -1554,9 +1655,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
1554 | |.endif | 1655 | |.endif |
1555 | | mov L:FCARG1, L:RB | 1656 | | mov L:FCARG1, L:RB |
1556 | |.if DUALNUM | 1657 | |.if DUALNUM |
1557 | | call extern lj_str_fromnumber@8 // (lua_State *L, cTValue *o) | 1658 | | call extern lj_strfmt_number@8 // (lua_State *L, cTValue *o) |
1558 | |.else | 1659 | |.else |
1559 | | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np) | 1660 | | call extern lj_strfmt_num@8 // (lua_State *L, lua_Number *np) |
1560 | |.endif | 1661 | |.endif |
1561 | | // GCstr returned in eax (RD). | 1662 | | // GCstr returned in eax (RD). |
1562 | | mov BASE, L:RB->base | 1663 | | mov BASE, L:RB->base |
@@ -1647,19 +1748,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
1647 | | add RD, 1 | 1748 | | add RD, 1 |
1648 | | mov dword [BASE-4], LJ_TISNUM | 1749 | | mov dword [BASE-4], LJ_TISNUM |
1649 | | mov dword [BASE-8], RD | 1750 | | mov dword [BASE-8], RD |
1650 | |.elif SSE | 1751 | |.else |
1651 | | movsd xmm0, qword [BASE+8] | 1752 | | movsd xmm0, qword [BASE+8] |
1652 | | sseconst_1 xmm1, RBa | 1753 | | sseconst_1 xmm1, RBa |
1653 | | addsd xmm0, xmm1 | 1754 | | addsd xmm0, xmm1 |
1654 | | cvtsd2si RD, xmm0 | 1755 | | cvttsd2si RD, xmm0 |
1655 | | movsd qword [BASE-8], xmm0 | 1756 | | movsd qword [BASE-8], xmm0 |
1656 | |.else | ||
1657 | | fld qword [BASE+8] | ||
1658 | | fld1 | ||
1659 | | faddp st1 | ||
1660 | | fist ARG1 | ||
1661 | | fstp qword [BASE-8] | ||
1662 | | mov RD, ARG1 | ||
1663 | |.endif | 1757 | |.endif |
1664 | | mov TAB:RB, [BASE] | 1758 | | mov TAB:RB, [BASE] |
1665 | | cmp RD, TAB:RB->asize; jae >2 // Not in array part? | 1759 | | cmp RD, TAB:RB->asize; jae >2 // Not in array part? |
@@ -1706,12 +1800,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
1706 | |.if DUALNUM | 1800 | |.if DUALNUM |
1707 | | mov dword [BASE+12], LJ_TISNUM | 1801 | | mov dword [BASE+12], LJ_TISNUM |
1708 | | mov dword [BASE+8], 0 | 1802 | | mov dword [BASE+8], 0 |
1709 | |.elif SSE | 1803 | |.else |
1710 | | xorps xmm0, xmm0 | 1804 | | xorps xmm0, xmm0 |
1711 | | movsd qword [BASE+8], xmm0 | 1805 | | movsd qword [BASE+8], xmm0 |
1712 | |.else | ||
1713 | | fldz | ||
1714 | | fstp qword [BASE+8] | ||
1715 | |.endif | 1806 | |.endif |
1716 | | mov RD, 1+3 | 1807 | | mov RD, 1+3 |
1717 | | jmp ->fff_res | 1808 | | jmp ->fff_res |
@@ -1818,7 +1909,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
1818 | | mov ARG3, RA | 1909 | | mov ARG3, RA |
1819 | |.endif | 1910 | |.endif |
1820 | | call ->vm_resume // (lua_State *L, TValue *base, 0, 0) | 1911 | | call ->vm_resume // (lua_State *L, TValue *base, 0, 0) |
1821 | | set_vmstate INTERP | ||
1822 | | | 1912 | | |
1823 | | mov L:RB, SAVE_L | 1913 | | mov L:RB, SAVE_L |
1824 | |.if X64 | 1914 | |.if X64 |
@@ -1827,6 +1917,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
1827 | | mov L:PC, ARG1 // The callee doesn't modify SAVE_L. | 1917 | | mov L:PC, ARG1 // The callee doesn't modify SAVE_L. |
1828 | |.endif | 1918 | |.endif |
1829 | | mov BASE, L:RB->base | 1919 | | mov BASE, L:RB->base |
1920 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | ||
1921 | | set_vmstate INTERP | ||
1922 | | | ||
1830 | | cmp eax, LUA_YIELD | 1923 | | cmp eax, LUA_YIELD |
1831 | | ja >8 | 1924 | | ja >8 |
1832 | |4: | 1925 | |4: |
@@ -1941,12 +2034,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
1941 | |->fff_resi: // Dummy. | 2034 | |->fff_resi: // Dummy. |
1942 | |.endif | 2035 | |.endif |
1943 | | | 2036 | | |
1944 | |.if SSE | ||
1945 | |->fff_resn: | 2037 | |->fff_resn: |
1946 | | mov PC, [BASE-4] | 2038 | | mov PC, [BASE-4] |
1947 | | fstp qword [BASE-8] | 2039 | | fstp qword [BASE-8] |
1948 | | jmp ->fff_res1 | 2040 | | jmp ->fff_res1 |
1949 | |.endif | ||
1950 | | | 2041 | | |
1951 | | .ffunc_1 math_abs | 2042 | | .ffunc_1 math_abs |
1952 | |.if DUALNUM | 2043 | |.if DUALNUM |
@@ -1970,8 +2061,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
1970 | |.else | 2061 | |.else |
1971 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | 2062 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
1972 | |.endif | 2063 | |.endif |
1973 | | | ||
1974 | |.if SSE | ||
1975 | | movsd xmm0, qword [BASE] | 2064 | | movsd xmm0, qword [BASE] |
1976 | | sseconst_abs xmm1, RDa | 2065 | | sseconst_abs xmm1, RDa |
1977 | | andps xmm0, xmm1 | 2066 | | andps xmm0, xmm1 |
@@ -1979,15 +2068,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
1979 | | mov PC, [BASE-4] | 2068 | | mov PC, [BASE-4] |
1980 | | movsd qword [BASE-8], xmm0 | 2069 | | movsd qword [BASE-8], xmm0 |
1981 | | // fallthrough | 2070 | | // fallthrough |
1982 | |.else | ||
1983 | | fld qword [BASE] | ||
1984 | | fabs | ||
1985 | | // fallthrough | ||
1986 | |->fff_resxmm0: // Dummy. | ||
1987 | |->fff_resn: | ||
1988 | | mov PC, [BASE-4] | ||
1989 | | fstp qword [BASE-8] | ||
1990 | |.endif | ||
1991 | | | 2071 | | |
1992 | |->fff_res1: | 2072 | |->fff_res1: |
1993 | | mov RD, 1+1 | 2073 | | mov RD, 1+1 |
@@ -2014,6 +2094,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
2014 | | mov RAa, -8 // Results start at BASE+RA = BASE-8. | 2094 | | mov RAa, -8 // Results start at BASE+RA = BASE-8. |
2015 | | jmp ->vm_return | 2095 | | jmp ->vm_return |
2016 | | | 2096 | | |
2097 | |.if X64 | ||
2098 | |.define fff_resfp, fff_resxmm0 | ||
2099 | |.else | ||
2100 | |.define fff_resfp, fff_resn | ||
2101 | |.endif | ||
2102 | | | ||
2017 | |.macro math_round, func | 2103 | |.macro math_round, func |
2018 | | .ffunc math_ .. func | 2104 | | .ffunc math_ .. func |
2019 | |.if DUALNUM | 2105 | |.if DUALNUM |
@@ -2024,107 +2110,75 @@ static void build_subroutines(BuildCtx *ctx) | |||
2024 | |.else | 2110 | |.else |
2025 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | 2111 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
2026 | |.endif | 2112 | |.endif |
2027 | |.if SSE | ||
2028 | | movsd xmm0, qword [BASE] | 2113 | | movsd xmm0, qword [BASE] |
2029 | | call ->vm_ .. func | 2114 | | call ->vm_ .. func .. _sse |
2030 | | .if DUALNUM | 2115 | |.if DUALNUM |
2031 | | cvtsd2si RB, xmm0 | 2116 | | cvttsd2si RB, xmm0 |
2032 | | cmp RB, 0x80000000 | 2117 | | cmp RB, 0x80000000 |
2033 | | jne ->fff_resi | 2118 | | jne ->fff_resi |
2034 | | cvtsi2sd xmm1, RB | 2119 | | cvtsi2sd xmm1, RB |
2035 | | ucomisd xmm0, xmm1 | 2120 | | ucomisd xmm0, xmm1 |
2036 | | jp ->fff_resxmm0 | 2121 | | jp ->fff_resxmm0 |
2037 | | je ->fff_resi | 2122 | | je ->fff_resi |
2038 | | .endif | ||
2039 | | jmp ->fff_resxmm0 | ||
2040 | |.else | ||
2041 | | fld qword [BASE] | ||
2042 | | call ->vm_ .. func | ||
2043 | | .if DUALNUM | ||
2044 | | fist ARG1 | ||
2045 | | mov RB, ARG1 | ||
2046 | | cmp RB, 0x80000000; jne >2 | ||
2047 | | fdup | ||
2048 | | fild ARG1 | ||
2049 | | fcomparepp | ||
2050 | | jp ->fff_resn | ||
2051 | | jne ->fff_resn | ||
2052 | |2: | ||
2053 | | fpop | ||
2054 | | jmp ->fff_resi | ||
2055 | | .else | ||
2056 | | jmp ->fff_resn | ||
2057 | | .endif | ||
2058 | |.endif | 2123 | |.endif |
2124 | | jmp ->fff_resxmm0 | ||
2059 | |.endmacro | 2125 | |.endmacro |
2060 | | | 2126 | | |
2061 | | math_round floor | 2127 | | math_round floor |
2062 | | math_round ceil | 2128 | | math_round ceil |
2063 | | | 2129 | | |
2064 | |.if SSE | ||
2065 | |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 | 2130 | |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 |
2066 | |.else | ||
2067 | |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn | ||
2068 | |.endif | ||
2069 | | | 2131 | | |
2070 | |.ffunc math_log | 2132 | |.ffunc math_log |
2071 | | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. | 2133 | | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. |
2072 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | 2134 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
2073 | | fldln2; fld qword [BASE]; fyl2x; jmp ->fff_resn | 2135 | | movsd xmm0, qword [BASE] |
2074 | | | 2136 | |.if not X64 |
2075 | |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn | 2137 | | movsd FPARG1, xmm0 |
2076 | |.ffunc_n math_exp; call ->vm_exp_x87; jmp ->fff_resn | 2138 | |.endif |
2077 | | | 2139 | | mov RB, BASE |
2078 | |.ffunc_n math_sin; fsin; jmp ->fff_resn | 2140 | | call extern log |
2079 | |.ffunc_n math_cos; fcos; jmp ->fff_resn | 2141 | | mov BASE, RB |
2080 | |.ffunc_n math_tan; fptan; fpop; jmp ->fff_resn | 2142 | | jmp ->fff_resfp |
2081 | | | ||
2082 | |.ffunc_n math_asin | ||
2083 | | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fpatan | ||
2084 | | jmp ->fff_resn | ||
2085 | |.ffunc_n math_acos | ||
2086 | | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fxch; fpatan | ||
2087 | | jmp ->fff_resn | ||
2088 | |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn | ||
2089 | | | 2143 | | |
2090 | |.macro math_extern, func | 2144 | |.macro math_extern, func |
2091 | |.if SSE | ||
2092 | | .ffunc_nsse math_ .. func | 2145 | | .ffunc_nsse math_ .. func |
2093 | | .if not X64 | 2146 | |.if not X64 |
2094 | | movsd FPARG1, xmm0 | 2147 | | movsd FPARG1, xmm0 |
2095 | | .endif | ||
2096 | |.else | ||
2097 | | .ffunc_n math_ .. func | ||
2098 | | fstp FPARG1 | ||
2099 | |.endif | 2148 | |.endif |
2100 | | mov RB, BASE | 2149 | | mov RB, BASE |
2101 | | call extern lj_vm_ .. func | 2150 | | call extern func |
2102 | | mov BASE, RB | 2151 | | mov BASE, RB |
2103 | | .if X64 | 2152 | | jmp ->fff_resfp |
2104 | | jmp ->fff_resxmm0 | 2153 | |.endmacro |
2105 | | .else | 2154 | | |
2106 | | jmp ->fff_resn | 2155 | |.macro math_extern2, func |
2107 | | .endif | 2156 | | .ffunc_nnsse math_ .. func |
2157 | |.if not X64 | ||
2158 | | movsd FPARG1, xmm0 | ||
2159 | | movsd FPARG3, xmm1 | ||
2160 | |.endif | ||
2161 | | mov RB, BASE | ||
2162 | | call extern func | ||
2163 | | mov BASE, RB | ||
2164 | | jmp ->fff_resfp | ||
2108 | |.endmacro | 2165 | |.endmacro |
2109 | | | 2166 | | |
2167 | | math_extern log10 | ||
2168 | | math_extern exp | ||
2169 | | math_extern sin | ||
2170 | | math_extern cos | ||
2171 | | math_extern tan | ||
2172 | | math_extern asin | ||
2173 | | math_extern acos | ||
2174 | | math_extern atan | ||
2110 | | math_extern sinh | 2175 | | math_extern sinh |
2111 | | math_extern cosh | 2176 | | math_extern cosh |
2112 | | math_extern tanh | 2177 | | math_extern tanh |
2178 | | math_extern2 pow | ||
2179 | | math_extern2 atan2 | ||
2180 | | math_extern2 fmod | ||
2113 | | | 2181 | | |
2114 | |->ff_math_deg: | ||
2115 | |.if SSE | ||
2116 | |.ffunc_nsse math_rad | ||
2117 | | mov CFUNC:RB, [BASE-8] | ||
2118 | | mulsd xmm0, qword CFUNC:RB->upvalue[0] | ||
2119 | | jmp ->fff_resxmm0 | ||
2120 | |.else | ||
2121 | |.ffunc_n math_rad | ||
2122 | | mov CFUNC:RB, [BASE-8] | ||
2123 | | fmul qword CFUNC:RB->upvalue[0] | ||
2124 | | jmp ->fff_resn | ||
2125 | |.endif | ||
2126 | | | ||
2127 | |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn | ||
2128 | |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn | 2182 | |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn |
2129 | | | 2183 | | |
2130 | |.ffunc_1 math_frexp | 2184 | |.ffunc_1 math_frexp |
@@ -2139,65 +2193,34 @@ static void build_subroutines(BuildCtx *ctx) | |||
2139 | | cmp RB, 0x00200000; jb >4 | 2193 | | cmp RB, 0x00200000; jb >4 |
2140 | |1: | 2194 | |1: |
2141 | | shr RB, 21; sub RB, RC // Extract and unbias exponent. | 2195 | | shr RB, 21; sub RB, RC // Extract and unbias exponent. |
2142 | |.if SSE | ||
2143 | | cvtsi2sd xmm0, RB | 2196 | | cvtsi2sd xmm0, RB |
2144 | |.else | ||
2145 | | mov TMP1, RB; fild TMP1 | ||
2146 | |.endif | ||
2147 | | mov RB, [BASE-4] | 2197 | | mov RB, [BASE-4] |
2148 | | and RB, 0x800fffff // Mask off exponent. | 2198 | | and RB, 0x800fffff // Mask off exponent. |
2149 | | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. | 2199 | | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. |
2150 | | mov [BASE-4], RB | 2200 | | mov [BASE-4], RB |
2151 | |2: | 2201 | |2: |
2152 | |.if SSE | ||
2153 | | movsd qword [BASE], xmm0 | 2202 | | movsd qword [BASE], xmm0 |
2154 | |.else | ||
2155 | | fstp qword [BASE] | ||
2156 | |.endif | ||
2157 | | mov RD, 1+2 | 2203 | | mov RD, 1+2 |
2158 | | jmp ->fff_res | 2204 | | jmp ->fff_res |
2159 | |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. | 2205 | |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. |
2160 | |.if SSE | ||
2161 | | xorps xmm0, xmm0; jmp <2 | 2206 | | xorps xmm0, xmm0; jmp <2 |
2162 | |.else | ||
2163 | | fldz; jmp <2 | ||
2164 | |.endif | ||
2165 | |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. | 2207 | |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. |
2166 | |.if SSE | ||
2167 | | movsd xmm0, qword [BASE] | 2208 | | movsd xmm0, qword [BASE] |
2168 | | sseconst_hi xmm1, RBa, 43500000 // 2^54. | 2209 | | sseconst_hi xmm1, RBa, 43500000 // 2^54. |
2169 | | mulsd xmm0, xmm1 | 2210 | | mulsd xmm0, xmm1 |
2170 | | movsd qword [BASE-8], xmm0 | 2211 | | movsd qword [BASE-8], xmm0 |
2171 | |.else | ||
2172 | | fld qword [BASE] | ||
2173 | | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54 | ||
2174 | | fstp qword [BASE-8] | ||
2175 | |.endif | ||
2176 | | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 | 2212 | | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 |
2177 | | | 2213 | | |
2178 | |.if SSE | ||
2179 | |.ffunc_nsse math_modf | 2214 | |.ffunc_nsse math_modf |
2180 | |.else | ||
2181 | |.ffunc_n math_modf | ||
2182 | |.endif | ||
2183 | | mov RB, [BASE+4] | 2215 | | mov RB, [BASE+4] |
2184 | | mov PC, [BASE-4] | 2216 | | mov PC, [BASE-4] |
2185 | | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? | 2217 | | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? |
2186 | |.if SSE | ||
2187 | | movaps xmm4, xmm0 | 2218 | | movaps xmm4, xmm0 |
2188 | | call ->vm_trunc | 2219 | | call ->vm_trunc_sse |
2189 | | subsd xmm4, xmm0 | 2220 | | subsd xmm4, xmm0 |
2190 | |1: | 2221 | |1: |
2191 | | movsd qword [BASE-8], xmm0 | 2222 | | movsd qword [BASE-8], xmm0 |
2192 | | movsd qword [BASE], xmm4 | 2223 | | movsd qword [BASE], xmm4 |
2193 | |.else | ||
2194 | | fdup | ||
2195 | | call ->vm_trunc | ||
2196 | | fsub st1, st0 | ||
2197 | |1: | ||
2198 | | fstp qword [BASE-8] | ||
2199 | | fstp qword [BASE] | ||
2200 | |.endif | ||
2201 | | mov RC, [BASE-4]; mov RB, [BASE+4] | 2224 | | mov RC, [BASE-4]; mov RB, [BASE+4] |
2202 | | xor RC, RB; js >3 // Need to adjust sign? | 2225 | | xor RC, RB; js >3 // Need to adjust sign? |
2203 | |2: | 2226 | |2: |
@@ -2207,24 +2230,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
2207 | | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. | 2230 | | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. |
2208 | | jmp <2 | 2231 | | jmp <2 |
2209 | |4: | 2232 | |4: |
2210 | |.if SSE | ||
2211 | | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. | 2233 | | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. |
2212 | |.else | ||
2213 | | fldz; fxch; jmp <1 // Return +-Inf and +-0. | ||
2214 | |.endif | ||
2215 | | | ||
2216 | |.ffunc_nnr math_fmod | ||
2217 | |1: ; fprem; fnstsw ax; and ax, 0x400; jnz <1 | ||
2218 | | fpop1 | ||
2219 | | jmp ->fff_resn | ||
2220 | | | 2234 | | |
2221 | |.if SSE | 2235 | |.macro math_minmax, name, cmovop, sseop |
2222 | |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0 | ||
2223 | |.else | ||
2224 | |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn | ||
2225 | |.endif | ||
2226 | | | ||
2227 | |.macro math_minmax, name, cmovop, fcmovop, sseop | ||
2228 | | .ffunc name | 2236 | | .ffunc name |
2229 | | mov RA, 2 | 2237 | | mov RA, 2 |
2230 | | cmp dword [BASE+4], LJ_TISNUM | 2238 | | cmp dword [BASE+4], LJ_TISNUM |
@@ -2241,12 +2249,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2241 | |3: | 2249 | |3: |
2242 | | ja ->fff_fallback | 2250 | | ja ->fff_fallback |
2243 | | // Convert intermediate result to number and continue below. | 2251 | | // Convert intermediate result to number and continue below. |
2244 | |.if SSE | ||
2245 | | cvtsi2sd xmm0, RB | 2252 | | cvtsi2sd xmm0, RB |
2246 | |.else | ||
2247 | | mov TMP1, RB | ||
2248 | | fild TMP1 | ||
2249 | |.endif | ||
2250 | | jmp >6 | 2253 | | jmp >6 |
2251 | |4: | 2254 | |4: |
2252 | | ja ->fff_fallback | 2255 | | ja ->fff_fallback |
@@ -2254,7 +2257,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
2254 | | jae ->fff_fallback | 2257 | | jae ->fff_fallback |
2255 | |.endif | 2258 | |.endif |
2256 | | | 2259 | | |
2257 | |.if SSE | ||
2258 | | movsd xmm0, qword [BASE] | 2260 | | movsd xmm0, qword [BASE] |
2259 | |5: // Handle numbers or integers. | 2261 | |5: // Handle numbers or integers. |
2260 | | cmp RA, RD; jae ->fff_resxmm0 | 2262 | | cmp RA, RD; jae ->fff_resxmm0 |
@@ -2273,48 +2275,13 @@ static void build_subroutines(BuildCtx *ctx) | |||
2273 | | sseop xmm0, xmm1 | 2275 | | sseop xmm0, xmm1 |
2274 | | add RA, 1 | 2276 | | add RA, 1 |
2275 | | jmp <5 | 2277 | | jmp <5 |
2276 | |.else | ||
2277 | | fld qword [BASE] | ||
2278 | |5: // Handle numbers or integers. | ||
2279 | | cmp RA, RD; jae ->fff_resn | ||
2280 | | cmp dword [BASE+RA*8-4], LJ_TISNUM | ||
2281 | |.if DUALNUM | ||
2282 | | jb >6 | ||
2283 | | ja >9 | ||
2284 | | fild dword [BASE+RA*8-8] | ||
2285 | | jmp >7 | ||
2286 | |.else | ||
2287 | | jae >9 | ||
2288 | |.endif | ||
2289 | |6: | ||
2290 | | fld qword [BASE+RA*8-8] | ||
2291 | |7: | ||
2292 | | fucomi st1; fcmovop st1; fpop1 | ||
2293 | | add RA, 1 | ||
2294 | | jmp <5 | ||
2295 | |.endif | ||
2296 | |.endmacro | 2278 | |.endmacro |
2297 | | | 2279 | | |
2298 | | math_minmax math_min, cmovg, fcmovnbe, minsd | 2280 | | math_minmax math_min, cmovg, minsd |
2299 | | math_minmax math_max, cmovl, fcmovbe, maxsd | 2281 | | math_minmax math_max, cmovl, maxsd |
2300 | |.if not SSE | ||
2301 | |9: | ||
2302 | | fpop; jmp ->fff_fallback | ||
2303 | |.endif | ||
2304 | | | 2282 | | |
2305 | |//-- String library ----------------------------------------------------- | 2283 | |//-- String library ----------------------------------------------------- |
2306 | | | 2284 | | |
2307 | |.ffunc_1 string_len | ||
2308 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | ||
2309 | | mov STR:RB, [BASE] | ||
2310 | |.if DUALNUM | ||
2311 | | mov RB, dword STR:RB->len; jmp ->fff_resi | ||
2312 | |.elif SSE | ||
2313 | | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0 | ||
2314 | |.else | ||
2315 | | fild dword STR:RB->len; jmp ->fff_resn | ||
2316 | |.endif | ||
2317 | | | ||
2318 | |.ffunc string_byte // Only handle the 1-arg case here. | 2285 | |.ffunc string_byte // Only handle the 1-arg case here. |
2319 | | cmp NARGS:RD, 1+1; jne ->fff_fallback | 2286 | | cmp NARGS:RD, 1+1; jne ->fff_fallback |
2320 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | 2287 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback |
@@ -2325,10 +2292,8 @@ static void build_subroutines(BuildCtx *ctx) | |||
2325 | | movzx RB, byte STR:RB[1] | 2292 | | movzx RB, byte STR:RB[1] |
2326 | |.if DUALNUM | 2293 | |.if DUALNUM |
2327 | | jmp ->fff_resi | 2294 | | jmp ->fff_resi |
2328 | |.elif SSE | ||
2329 | | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0 | ||
2330 | |.else | 2295 | |.else |
2331 | | mov TMP1, RB; fild TMP1; jmp ->fff_resn | 2296 | | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0 |
2332 | |.endif | 2297 | |.endif |
2333 | | | 2298 | | |
2334 | |.ffunc string_char // Only handle the 1-arg case here. | 2299 | |.ffunc string_char // Only handle the 1-arg case here. |
@@ -2340,16 +2305,11 @@ static void build_subroutines(BuildCtx *ctx) | |||
2340 | | mov RB, dword [BASE] | 2305 | | mov RB, dword [BASE] |
2341 | | cmp RB, 255; ja ->fff_fallback | 2306 | | cmp RB, 255; ja ->fff_fallback |
2342 | | mov TMP2, RB | 2307 | | mov TMP2, RB |
2343 | |.elif SSE | 2308 | |.else |
2344 | | jae ->fff_fallback | 2309 | | jae ->fff_fallback |
2345 | | cvttsd2si RB, qword [BASE] | 2310 | | cvttsd2si RB, qword [BASE] |
2346 | | cmp RB, 255; ja ->fff_fallback | 2311 | | cmp RB, 255; ja ->fff_fallback |
2347 | | mov TMP2, RB | 2312 | | mov TMP2, RB |
2348 | |.else | ||
2349 | | jae ->fff_fallback | ||
2350 | | fld qword [BASE] | ||
2351 | | fistp TMP2 | ||
2352 | | cmp TMP2, 255; ja ->fff_fallback | ||
2353 | |.endif | 2313 | |.endif |
2354 | |.if X64 | 2314 | |.if X64 |
2355 | | mov TMP3, 1 | 2315 | | mov TMP3, 1 |
@@ -2370,6 +2330,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2370 | |.endif | 2330 | |.endif |
2371 | | mov SAVE_PC, PC | 2331 | | mov SAVE_PC, PC |
2372 | | call extern lj_str_new // (lua_State *L, char *str, size_t l) | 2332 | | call extern lj_str_new // (lua_State *L, char *str, size_t l) |
2333 | |->fff_resstr: | ||
2373 | | // GCstr * returned in eax (RD). | 2334 | | // GCstr * returned in eax (RD). |
2374 | | mov BASE, L:RB->base | 2335 | | mov BASE, L:RB->base |
2375 | | mov PC, [BASE-4] | 2336 | | mov PC, [BASE-4] |
@@ -2387,14 +2348,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
2387 | | jne ->fff_fallback | 2348 | | jne ->fff_fallback |
2388 | | mov RB, dword [BASE+16] | 2349 | | mov RB, dword [BASE+16] |
2389 | | mov TMP2, RB | 2350 | | mov TMP2, RB |
2390 | |.elif SSE | 2351 | |.else |
2391 | | jae ->fff_fallback | 2352 | | jae ->fff_fallback |
2392 | | cvttsd2si RB, qword [BASE+16] | 2353 | | cvttsd2si RB, qword [BASE+16] |
2393 | | mov TMP2, RB | 2354 | | mov TMP2, RB |
2394 | |.else | ||
2395 | | jae ->fff_fallback | ||
2396 | | fld qword [BASE+16] | ||
2397 | | fistp TMP2 | ||
2398 | |.endif | 2355 | |.endif |
2399 | |1: | 2356 | |1: |
2400 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | 2357 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback |
@@ -2409,12 +2366,8 @@ static void build_subroutines(BuildCtx *ctx) | |||
2409 | | mov RB, STR:RB->len | 2366 | | mov RB, STR:RB->len |
2410 | |.if DUALNUM | 2367 | |.if DUALNUM |
2411 | | mov RA, dword [BASE+8] | 2368 | | mov RA, dword [BASE+8] |
2412 | |.elif SSE | ||
2413 | | cvttsd2si RA, qword [BASE+8] | ||
2414 | |.else | 2369 | |.else |
2415 | | fld qword [BASE+8] | 2370 | | cvttsd2si RA, qword [BASE+8] |
2416 | | fistp ARG3 | ||
2417 | | mov RA, ARG3 | ||
2418 | |.endif | 2371 | |.endif |
2419 | | mov RC, TMP2 | 2372 | | mov RC, TMP2 |
2420 | | cmp RB, RC // len < end? (unsigned compare) | 2373 | | cmp RB, RC // len < end? (unsigned compare) |
@@ -2458,136 +2411,34 @@ static void build_subroutines(BuildCtx *ctx) | |||
2458 | | xor RC, RC // Zero length. Any ptr in RB is ok. | 2411 | | xor RC, RC // Zero length. Any ptr in RB is ok. |
2459 | | jmp <4 | 2412 | | jmp <4 |
2460 | | | 2413 | | |
2461 | |.ffunc string_rep // Only handle the 1-char case inline. | 2414 | |.macro ffstring_op, name |
2462 | | ffgccheck | 2415 | | .ffunc_1 string_ .. name |
2463 | | cmp NARGS:RD, 2+1; jne ->fff_fallback // Exactly 2 arguments. | ||
2464 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | ||
2465 | | cmp dword [BASE+12], LJ_TISNUM | ||
2466 | | mov STR:RB, [BASE] | ||
2467 | |.if DUALNUM | ||
2468 | | jne ->fff_fallback | ||
2469 | | mov RC, dword [BASE+8] | ||
2470 | |.elif SSE | ||
2471 | | jae ->fff_fallback | ||
2472 | | cvttsd2si RC, qword [BASE+8] | ||
2473 | |.else | ||
2474 | | jae ->fff_fallback | ||
2475 | | fld qword [BASE+8] | ||
2476 | | fistp TMP2 | ||
2477 | | mov RC, TMP2 | ||
2478 | |.endif | ||
2479 | | test RC, RC | ||
2480 | | jle ->fff_emptystr // Count <= 0? (or non-int) | ||
2481 | | cmp dword STR:RB->len, 1 | ||
2482 | | jb ->fff_emptystr // Zero length string? | ||
2483 | | jne ->fff_fallback_2 // Fallback for > 1-char strings. | ||
2484 | | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_2 | ||
2485 | | movzx RA, byte STR:RB[1] | ||
2486 | | mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] | ||
2487 | |.if X64 | ||
2488 | | mov TMP3, RC | ||
2489 | |.else | ||
2490 | | mov ARG3, RC | ||
2491 | |.endif | ||
2492 | |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?). | ||
2493 | | mov [RB], RAL | ||
2494 | | add RB, 1 | ||
2495 | | sub RC, 1 | ||
2496 | | jnz <1 | ||
2497 | | mov RD, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] | ||
2498 | | jmp ->fff_newstr | ||
2499 | | | ||
2500 | |.ffunc_1 string_reverse | ||
2501 | | ffgccheck | 2416 | | ffgccheck |
2502 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | 2417 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback |
2503 | | mov STR:RB, [BASE] | 2418 | | mov L:RB, SAVE_L |
2504 | | mov RC, STR:RB->len | 2419 | | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)] |
2505 | | test RC, RC | 2420 | | mov L:RB->base, BASE |
2506 | | jz ->fff_emptystr // Zero length string? | 2421 | | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE |
2507 | | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 | 2422 | | mov RC, SBUF:FCARG1->b |
2508 | | add RB, #STR | 2423 | | mov SBUF:FCARG1->L, L:RB |
2509 | | mov TMP2, PC // Need another temp register. | 2424 | | mov SBUF:FCARG1->p, RC |
2510 | |.if X64 | 2425 | | mov SAVE_PC, PC |
2511 | | mov TMP3, RC | 2426 | | call extern lj_buf_putstr_ .. name .. @8 |
2512 | |.else | 2427 | | mov FCARG1, eax |
2513 | | mov ARG3, RC | 2428 | | call extern lj_buf_tostr@4 |
2514 | |.endif | 2429 | | jmp ->fff_resstr |
2515 | | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] | ||
2516 | |1: | ||
2517 | | movzx RA, byte [RB] | ||
2518 | | add RB, 1 | ||
2519 | | sub RC, 1 | ||
2520 | | mov [PC+RC], RAL | ||
2521 | | jnz <1 | ||
2522 | | mov RD, PC | ||
2523 | | mov PC, TMP2 | ||
2524 | | jmp ->fff_newstr | ||
2525 | | | ||
2526 | |.macro ffstring_case, name, lo, hi | ||
2527 | | .ffunc_1 name | ||
2528 | | ffgccheck | ||
2529 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | ||
2530 | | mov STR:RB, [BASE] | ||
2531 | | mov RC, STR:RB->len | ||
2532 | | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 | ||
2533 | | add RB, #STR | ||
2534 | | mov TMP2, PC // Need another temp register. | ||
2535 | |.if X64 | ||
2536 | | mov TMP3, RC | ||
2537 | |.else | ||
2538 | | mov ARG3, RC | ||
2539 | |.endif | ||
2540 | | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] | ||
2541 | | jmp >3 | ||
2542 | |1: // ASCII case conversion. Yes, this is suboptimal code (do you care?). | ||
2543 | | movzx RA, byte [RB+RC] | ||
2544 | | cmp RA, lo | ||
2545 | | jb >2 | ||
2546 | | cmp RA, hi | ||
2547 | | ja >2 | ||
2548 | | xor RA, 0x20 | ||
2549 | |2: | ||
2550 | | mov [PC+RC], RAL | ||
2551 | |3: | ||
2552 | | sub RC, 1 | ||
2553 | | jns <1 | ||
2554 | | mov RD, PC | ||
2555 | | mov PC, TMP2 | ||
2556 | | jmp ->fff_newstr | ||
2557 | |.endmacro | 2430 | |.endmacro |
2558 | | | 2431 | | |
2559 | |ffstring_case string_lower, 0x41, 0x5a | 2432 | |ffstring_op reverse |
2560 | |ffstring_case string_upper, 0x61, 0x7a | 2433 | |ffstring_op lower |
2561 | | | 2434 | |ffstring_op upper |
2562 | |//-- Table library ------------------------------------------------------ | ||
2563 | | | ||
2564 | |.ffunc_1 table_getn | ||
2565 | | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback | ||
2566 | | mov RB, BASE // Save BASE. | ||
2567 | | mov TAB:FCARG1, [BASE] | ||
2568 | | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t) | ||
2569 | | // Length of table returned in eax (RD). | ||
2570 | | mov BASE, RB // Restore BASE. | ||
2571 | |.if DUALNUM | ||
2572 | | mov RB, RD; jmp ->fff_resi | ||
2573 | |.elif SSE | ||
2574 | | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0 | ||
2575 | |.else | ||
2576 | | mov ARG1, RD; fild ARG1; jmp ->fff_resn | ||
2577 | |.endif | ||
2578 | | | 2435 | | |
2579 | |//-- Bit library -------------------------------------------------------- | 2436 | |//-- Bit library -------------------------------------------------------- |
2580 | | | 2437 | | |
2581 | |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!). | ||
2582 | | | ||
2583 | |.macro .ffunc_bit, name, kind, fdef | 2438 | |.macro .ffunc_bit, name, kind, fdef |
2584 | | fdef name | 2439 | | fdef name |
2585 | |.if kind == 2 | 2440 | |.if kind == 2 |
2586 | |.if SSE | ||
2587 | | sseconst_tobit xmm1, RBa | 2441 | | sseconst_tobit xmm1, RBa |
2588 | |.else | ||
2589 | | mov TMP1, TOBIT_BIAS | ||
2590 | |.endif | ||
2591 | |.endif | 2442 | |.endif |
2592 | | cmp dword [BASE+4], LJ_TISNUM | 2443 | | cmp dword [BASE+4], LJ_TISNUM |
2593 | |.if DUALNUM | 2444 | |.if DUALNUM |
@@ -2603,24 +2454,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
2603 | |.else | 2454 | |.else |
2604 | | jae ->fff_fallback | 2455 | | jae ->fff_fallback |
2605 | |.endif | 2456 | |.endif |
2606 | |.if SSE | ||
2607 | | movsd xmm0, qword [BASE] | 2457 | | movsd xmm0, qword [BASE] |
2608 | |.if kind < 2 | 2458 | |.if kind < 2 |
2609 | | sseconst_tobit xmm1, RBa | 2459 | | sseconst_tobit xmm1, RBa |
2610 | |.endif | 2460 | |.endif |
2611 | | addsd xmm0, xmm1 | 2461 | | addsd xmm0, xmm1 |
2612 | | movd RB, xmm0 | 2462 | | movd RB, xmm0 |
2613 | |.else | ||
2614 | | fld qword [BASE] | ||
2615 | |.if kind < 2 | ||
2616 | | mov TMP1, TOBIT_BIAS | ||
2617 | |.endif | ||
2618 | | fadd TMP1 | ||
2619 | | fstp FPARG1 | ||
2620 | |.if kind > 0 | ||
2621 | | mov RB, ARG1 | ||
2622 | |.endif | ||
2623 | |.endif | ||
2624 | |2: | 2463 | |2: |
2625 | |.endmacro | 2464 | |.endmacro |
2626 | | | 2465 | | |
@@ -2629,15 +2468,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2629 | |.endmacro | 2468 | |.endmacro |
2630 | | | 2469 | | |
2631 | |.ffunc_bit bit_tobit, 0 | 2470 | |.ffunc_bit bit_tobit, 0 |
2632 | |.if DUALNUM or SSE | ||
2633 | |.if not SSE | ||
2634 | | mov RB, ARG1 | ||
2635 | |.endif | ||
2636 | | jmp ->fff_resbit | 2471 | | jmp ->fff_resbit |
2637 | |.else | ||
2638 | | fild ARG1 | ||
2639 | | jmp ->fff_resn | ||
2640 | |.endif | ||
2641 | | | 2472 | | |
2642 | |.macro .ffunc_bit_op, name, ins | 2473 | |.macro .ffunc_bit_op, name, ins |
2643 | | .ffunc_bit name, 2 | 2474 | | .ffunc_bit name, 2 |
@@ -2657,17 +2488,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
2657 | |.else | 2488 | |.else |
2658 | | jae ->fff_fallback_bit_op | 2489 | | jae ->fff_fallback_bit_op |
2659 | |.endif | 2490 | |.endif |
2660 | |.if SSE | ||
2661 | | movsd xmm0, qword [RD] | 2491 | | movsd xmm0, qword [RD] |
2662 | | addsd xmm0, xmm1 | 2492 | | addsd xmm0, xmm1 |
2663 | | movd RA, xmm0 | 2493 | | movd RA, xmm0 |
2664 | | ins RB, RA | 2494 | | ins RB, RA |
2665 | |.else | ||
2666 | | fld qword [RD] | ||
2667 | | fadd TMP1 | ||
2668 | | fstp FPARG1 | ||
2669 | | ins RB, ARG1 | ||
2670 | |.endif | ||
2671 | | sub RD, 8 | 2495 | | sub RD, 8 |
2672 | | jmp <1 | 2496 | | jmp <1 |
2673 | |.endmacro | 2497 | |.endmacro |
@@ -2684,15 +2508,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
2684 | | not RB | 2508 | | not RB |
2685 | |.if DUALNUM | 2509 | |.if DUALNUM |
2686 | | jmp ->fff_resbit | 2510 | | jmp ->fff_resbit |
2687 | |.elif SSE | 2511 | |.else |
2688 | |->fff_resbit: | 2512 | |->fff_resbit: |
2689 | | cvtsi2sd xmm0, RB | 2513 | | cvtsi2sd xmm0, RB |
2690 | | jmp ->fff_resxmm0 | 2514 | | jmp ->fff_resxmm0 |
2691 | |.else | ||
2692 | |->fff_resbit: | ||
2693 | | mov ARG1, RB | ||
2694 | | fild ARG1 | ||
2695 | | jmp ->fff_resn | ||
2696 | |.endif | 2515 | |.endif |
2697 | | | 2516 | | |
2698 | |->fff_fallback_bit_op: | 2517 | |->fff_fallback_bit_op: |
@@ -2705,22 +2524,13 @@ static void build_subroutines(BuildCtx *ctx) | |||
2705 | | // Note: no inline conversion from number for 2nd argument! | 2524 | | // Note: no inline conversion from number for 2nd argument! |
2706 | | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback | 2525 | | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback |
2707 | | mov RA, dword [BASE+8] | 2526 | | mov RA, dword [BASE+8] |
2708 | |.elif SSE | 2527 | |.else |
2709 | | .ffunc_nnsse name | 2528 | | .ffunc_nnsse name |
2710 | | sseconst_tobit xmm2, RBa | 2529 | | sseconst_tobit xmm2, RBa |
2711 | | addsd xmm0, xmm2 | 2530 | | addsd xmm0, xmm2 |
2712 | | addsd xmm1, xmm2 | 2531 | | addsd xmm1, xmm2 |
2713 | | movd RB, xmm0 | 2532 | | movd RB, xmm0 |
2714 | | movd RA, xmm1 | 2533 | | movd RA, xmm1 |
2715 | |.else | ||
2716 | | .ffunc_nn name | ||
2717 | | mov TMP1, TOBIT_BIAS | ||
2718 | | fadd TMP1 | ||
2719 | | fstp FPARG3 | ||
2720 | | fadd TMP1 | ||
2721 | | fstp FPARG1 | ||
2722 | | mov RA, ARG3 | ||
2723 | | mov RB, ARG1 | ||
2724 | |.endif | 2534 | |.endif |
2725 | | ins RB, cl // Assumes RA is ecx. | 2535 | | ins RB, cl // Assumes RA is ecx. |
2726 | | jmp ->fff_resbit | 2536 | | jmp ->fff_resbit |
@@ -2854,7 +2664,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2854 | | mov FCARG2, PC // Caveat: FCARG2 == BASE | 2664 | | mov FCARG2, PC // Caveat: FCARG2 == BASE |
2855 | | mov FCARG1, L:RB | 2665 | | mov FCARG1, L:RB |
2856 | | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. | 2666 | | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. |
2857 | | call extern lj_dispatch_ins@8 // (lua_State *L, BCIns *pc) | 2667 | | call extern lj_dispatch_ins@8 // (lua_State *L, const BCIns *pc) |
2858 | |3: | 2668 | |3: |
2859 | | mov BASE, L:RB->base | 2669 | | mov BASE, L:RB->base |
2860 | |4: | 2670 | |4: |
@@ -2925,6 +2735,79 @@ static void build_subroutines(BuildCtx *ctx) | |||
2925 | | add NARGS:RD, 1 | 2735 | | add NARGS:RD, 1 |
2926 | | jmp RBa | 2736 | | jmp RBa |
2927 | | | 2737 | | |
2738 | |->cont_stitch: // Trace stitching. | ||
2739 | |.if JIT | ||
2740 | | // BASE = base, RC = result, RB = mbase | ||
2741 | | mov TRACE:RA, [RB-24] // Save previous trace. | ||
2742 | | mov TMP1, TRACE:RA | ||
2743 | | mov TMP3, DISPATCH // Need one more register. | ||
2744 | | mov DISPATCH, MULTRES | ||
2745 | | movzx RA, PC_RA | ||
2746 | | lea RA, [BASE+RA*8] // Call base. | ||
2747 | | sub DISPATCH, 1 | ||
2748 | | jz >2 | ||
2749 | |1: // Move results down. | ||
2750 | |.if X64 | ||
2751 | | mov RBa, [RC] | ||
2752 | | mov [RA], RBa | ||
2753 | |.else | ||
2754 | | mov RB, [RC] | ||
2755 | | mov [RA], RB | ||
2756 | | mov RB, [RC+4] | ||
2757 | | mov [RA+4], RB | ||
2758 | |.endif | ||
2759 | | add RC, 8 | ||
2760 | | add RA, 8 | ||
2761 | | sub DISPATCH, 1 | ||
2762 | | jnz <1 | ||
2763 | |2: | ||
2764 | | movzx RC, PC_RA | ||
2765 | | movzx RB, PC_RB | ||
2766 | | add RC, RB | ||
2767 | | lea RC, [BASE+RC*8-8] | ||
2768 | |3: | ||
2769 | | cmp RC, RA | ||
2770 | | ja >9 // More results wanted? | ||
2771 | | | ||
2772 | | mov DISPATCH, TMP3 | ||
2773 | | mov TRACE:RD, TMP1 // Get previous trace. | ||
2774 | | movzx RB, word TRACE:RD->traceno | ||
2775 | | movzx RD, word TRACE:RD->link | ||
2776 | | cmp RD, RB | ||
2777 | | je ->cont_nop // Blacklisted. | ||
2778 | | test RD, RD | ||
2779 | | jne =>BC_JLOOP // Jump to stitched trace. | ||
2780 | | | ||
2781 | | // Stitch a new trace to the previous trace. | ||
2782 | | mov [DISPATCH+DISPATCH_J(exitno)], RB | ||
2783 | | mov L:RB, SAVE_L | ||
2784 | | mov L:RB->base, BASE | ||
2785 | | mov FCARG2, PC | ||
2786 | | lea FCARG1, [DISPATCH+GG_DISP2J] | ||
2787 | | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa | ||
2788 | | call extern lj_dispatch_stitch@8 // (jit_State *J, const BCIns *pc) | ||
2789 | | mov BASE, L:RB->base | ||
2790 | | jmp ->cont_nop | ||
2791 | | | ||
2792 | |9: // Fill up results with nil. | ||
2793 | | mov dword [RA+4], LJ_TNIL | ||
2794 | | add RA, 8 | ||
2795 | | jmp <3 | ||
2796 | |.endif | ||
2797 | | | ||
2798 | |->vm_profhook: // Dispatch target for profiler hook. | ||
2799 | #if LJ_HASPROFILE | ||
2800 | | mov L:RB, SAVE_L | ||
2801 | | mov L:RB->base, BASE | ||
2802 | | mov FCARG2, PC // Caveat: FCARG2 == BASE | ||
2803 | | mov FCARG1, L:RB | ||
2804 | | call extern lj_dispatch_profile@8 // (lua_State *L, const BCIns *pc) | ||
2805 | | mov BASE, L:RB->base | ||
2806 | | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. | ||
2807 | | sub PC, 4 | ||
2808 | | jmp ->cont_nop | ||
2809 | #endif | ||
2810 | | | ||
2928 | |//----------------------------------------------------------------------- | 2811 | |//----------------------------------------------------------------------- |
2929 | |//-- Trace exit handler ------------------------------------------------- | 2812 | |//-- Trace exit handler ------------------------------------------------- |
2930 | |//----------------------------------------------------------------------- | 2813 | |//----------------------------------------------------------------------- |
@@ -2977,10 +2860,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
2977 | | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0 | 2860 | | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0 |
2978 | |.endif | 2861 | |.endif |
2979 | | // Caveat: RB is ebp. | 2862 | | // Caveat: RB is ebp. |
2980 | | mov L:RB, [DISPATCH+DISPATCH_GL(jit_L)] | 2863 | | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)] |
2981 | | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] | 2864 | | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] |
2982 | | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa | 2865 | | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa |
2983 | | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0 | ||
2984 | | mov L:RB->base, BASE | 2866 | | mov L:RB->base, BASE |
2985 | |.if X64WIN | 2867 | |.if X64WIN |
2986 | | lea CARG2, [rsp+4*8] | 2868 | | lea CARG2, [rsp+4*8] |
@@ -2990,6 +2872,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2990 | | lea FCARG2, [esp+16] | 2872 | | lea FCARG2, [esp+16] |
2991 | |.endif | 2873 | |.endif |
2992 | | lea FCARG1, [DISPATCH+GG_DISP2J] | 2874 | | lea FCARG1, [DISPATCH+GG_DISP2J] |
2875 | | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0 | ||
2993 | | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex) | 2876 | | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex) |
2994 | | // MULTRES or negated error code returned in eax (RD). | 2877 | | // MULTRES or negated error code returned in eax (RD). |
2995 | | mov RAa, L:RB->cframe | 2878 | | mov RAa, L:RB->cframe |
@@ -3036,12 +2919,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
3036 | | mov r13, TMPa | 2919 | | mov r13, TMPa |
3037 | | mov r12, TMPQ | 2920 | | mov r12, TMPQ |
3038 | |.endif | 2921 | |.endif |
3039 | | test RD, RD; js >3 // Check for error from exit. | 2922 | | test RD, RD; js >9 // Check for error from exit. |
2923 | | mov L:RB, SAVE_L | ||
3040 | | mov MULTRES, RD | 2924 | | mov MULTRES, RD |
3041 | | mov LFUNC:KBASE, [BASE-8] | 2925 | | mov LFUNC:KBASE, [BASE-8] |
3042 | | mov KBASE, LFUNC:KBASE->pc | 2926 | | mov KBASE, LFUNC:KBASE->pc |
3043 | | mov KBASE, [KBASE+PC2PROTO(k)] | 2927 | | mov KBASE, [KBASE+PC2PROTO(k)] |
3044 | | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0 | 2928 | | mov L:RB->base, BASE |
2929 | | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0 | ||
3045 | | set_vmstate INTERP | 2930 | | set_vmstate INTERP |
3046 | | // Modified copy of ins_next which handles function header dispatch, too. | 2931 | | // Modified copy of ins_next which handles function header dispatch, too. |
3047 | | mov RC, [PC] | 2932 | | mov RC, [PC] |
@@ -3050,16 +2935,31 @@ static void build_subroutines(BuildCtx *ctx) | |||
3050 | | add PC, 4 | 2935 | | add PC, 4 |
3051 | | shr RC, 16 | 2936 | | shr RC, 16 |
3052 | | cmp OP, BC_FUNCF // Function header? | 2937 | | cmp OP, BC_FUNCF // Function header? |
3053 | | jb >2 | 2938 | | jb >3 |
3054 | | mov RC, MULTRES // RC/RD holds nres+1. | 2939 | | cmp OP, BC_FUNCC+2 // Fast function? |
2940 | | jae >4 | ||
3055 | |2: | 2941 | |2: |
2942 | | mov RC, MULTRES // RC/RD holds nres+1. | ||
2943 | |3: | ||
3056 | |.if X64 | 2944 | |.if X64 |
3057 | | jmp aword [DISPATCH+OP*8] | 2945 | | jmp aword [DISPATCH+OP*8] |
3058 | |.else | 2946 | |.else |
3059 | | jmp aword [DISPATCH+OP*4] | 2947 | | jmp aword [DISPATCH+OP*4] |
3060 | |.endif | 2948 | |.endif |
3061 | | | 2949 | | |
3062 | |3: // Rethrow error from the right C frame. | 2950 | |4: // Check frame below fast function. |
2951 | | mov RC, [BASE-4] | ||
2952 | | test RC, FRAME_TYPE | ||
2953 | | jnz <2 // Trace stitching continuation? | ||
2954 | | // Otherwise set KBASE for Lua function below fast function. | ||
2955 | | movzx RC, byte [RC-3] | ||
2956 | | not RCa | ||
2957 | | mov LFUNC:KBASE, [BASE+RC*8-8] | ||
2958 | | mov KBASE, LFUNC:KBASE->pc | ||
2959 | | mov KBASE, [KBASE+PC2PROTO(k)] | ||
2960 | | jmp <2 | ||
2961 | | | ||
2962 | |9: // Rethrow error from the right C frame. | ||
3063 | | neg RD | 2963 | | neg RD |
3064 | | mov FCARG1, L:RB | 2964 | | mov FCARG1, L:RB |
3065 | | mov FCARG2, RD | 2965 | | mov FCARG2, RD |
@@ -3071,27 +2971,18 @@ static void build_subroutines(BuildCtx *ctx) | |||
3071 | |//----------------------------------------------------------------------- | 2971 | |//----------------------------------------------------------------------- |
3072 | | | 2972 | | |
3073 | |// FP value rounding. Called by math.floor/math.ceil fast functions | 2973 | |// FP value rounding. Called by math.floor/math.ceil fast functions |
3074 | |// and from JIT code. | 2974 | |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. |
3075 | | | 2975 | |.macro vm_round, name, mode, cond |
3076 | |// x87 variant: Arg/ret on x87 stack. No int/xmm registers modified. | 2976 | |->name: |
3077 | |.macro vm_round_x87, mode1, mode2 | 2977 | |.if not X64 and cond |
3078 | | fnstcw word [esp+4] // Caveat: overwrites ARG1 and ARG2. | 2978 | | movsd xmm0, qword [esp+4] |
3079 | | mov [esp+8], eax | 2979 | | call ->name .. _sse |
3080 | | mov ax, mode1 | 2980 | | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg. |
3081 | | or ax, [esp+4] | 2981 | | fld qword [esp+4] |
3082 | |.if mode2 ~= 0xffff | ||
3083 | | and ax, mode2 | ||
3084 | |.endif | ||
3085 | | mov [esp+6], ax | ||
3086 | | fldcw word [esp+6] | ||
3087 | | frndint | ||
3088 | | fldcw word [esp+4] | ||
3089 | | mov eax, [esp+8] | ||
3090 | | ret | 2982 | | ret |
3091 | |.endmacro | 2983 | |.endif |
3092 | | | 2984 | | |
3093 | |// SSE variant: arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. | 2985 | |->name .. _sse: |
3094 | |.macro vm_round_sse, mode | ||
3095 | | sseconst_abs xmm2, RDa | 2986 | | sseconst_abs xmm2, RDa |
3096 | | sseconst_2p52 xmm3, RDa | 2987 | | sseconst_2p52 xmm3, RDa |
3097 | | movaps xmm1, xmm0 | 2988 | | movaps xmm1, xmm0 |
@@ -3127,22 +3018,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
3127 | | ret | 3018 | | ret |
3128 | |.endmacro | 3019 | |.endmacro |
3129 | | | 3020 | | |
3130 | |.macro vm_round, name, ssemode, mode1, mode2 | 3021 | | vm_round vm_floor, 0, 1 |
3131 | |->name: | 3022 | | vm_round vm_ceil, 1, JIT |
3132 | |.if not SSE | 3023 | | vm_round vm_trunc, 2, JIT |
3133 | | vm_round_x87 mode1, mode2 | ||
3134 | |.endif | ||
3135 | |->name .. _sse: | ||
3136 | | vm_round_sse ssemode | ||
3137 | |.endmacro | ||
3138 | | | ||
3139 | | vm_round vm_floor, 0, 0x0400, 0xf7ff | ||
3140 | | vm_round vm_ceil, 1, 0x0800, 0xfbff | ||
3141 | | vm_round vm_trunc, 2, 0x0c00, 0xffff | ||
3142 | | | 3024 | | |
3143 | |// FP modulo x%y. Called by BC_MOD* and vm_arith. | 3025 | |// FP modulo x%y. Called by BC_MOD* and vm_arith. |
3144 | |->vm_mod: | 3026 | |->vm_mod: |
3145 | |.if SSE | ||
3146 | |// Args in xmm0/xmm1, return value in xmm0. | 3027 | |// Args in xmm0/xmm1, return value in xmm0. |
3147 | |// Caveat: xmm0-xmm5 and RC (eax) modified! | 3028 | |// Caveat: xmm0-xmm5 and RC (eax) modified! |
3148 | | movaps xmm5, xmm0 | 3029 | | movaps xmm5, xmm0 |
@@ -3170,172 +3051,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
3170 | | movaps xmm0, xmm5 | 3051 | | movaps xmm0, xmm5 |
3171 | | subsd xmm0, xmm1 | 3052 | | subsd xmm0, xmm1 |
3172 | | ret | 3053 | | ret |
3173 | |.else | ||
3174 | |// Args/ret on x87 stack (y on top). No xmm registers modified. | ||
3175 | |// Caveat: needs 3 slots on x87 stack! RC (eax) modified! | ||
3176 | | fld st1 | ||
3177 | | fdiv st1 | ||
3178 | | fnstcw word [esp+4] | ||
3179 | | mov ax, 0x0400 | ||
3180 | | or ax, [esp+4] | ||
3181 | | and ax, 0xf7ff | ||
3182 | | mov [esp+6], ax | ||
3183 | | fldcw word [esp+6] | ||
3184 | | frndint | ||
3185 | | fldcw word [esp+4] | ||
3186 | | fmulp st1 | ||
3187 | | fsubp st1 | ||
3188 | | ret | ||
3189 | |.endif | ||
3190 | | | ||
3191 | |// FP log2(x). Called by math.log(x, base). | ||
3192 | |->vm_log2: | ||
3193 | |.if X64WIN | ||
3194 | | movsd qword [rsp+8], xmm0 // Use scratch area. | ||
3195 | | fld1 | ||
3196 | | fld qword [rsp+8] | ||
3197 | | fyl2x | ||
3198 | | fstp qword [rsp+8] | ||
3199 | | movsd xmm0, qword [rsp+8] | ||
3200 | |.elif X64 | ||
3201 | | movsd qword [rsp-8], xmm0 // Use red zone. | ||
3202 | | fld1 | ||
3203 | | fld qword [rsp-8] | ||
3204 | | fyl2x | ||
3205 | | fstp qword [rsp-8] | ||
3206 | | movsd xmm0, qword [rsp-8] | ||
3207 | |.else | ||
3208 | | fld1 | ||
3209 | | fld qword [esp+4] | ||
3210 | | fyl2x | ||
3211 | |.endif | ||
3212 | | ret | ||
3213 | | | ||
3214 | |// FP exponentiation e^x and 2^x. Called by math.exp fast function and | ||
3215 | |// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified. | ||
3216 | |// Caveat: needs 3 slots on x87 stack! | ||
3217 | |->vm_exp_x87: | ||
3218 | | fldl2e; fmulp st1 // e^x ==> 2^(x*log2(e)) | ||
3219 | |->vm_exp2_x87: | ||
3220 | | .if X64WIN | ||
3221 | | .define expscratch, dword [rsp+8] // Use scratch area. | ||
3222 | | .elif X64 | ||
3223 | | .define expscratch, dword [rsp-8] // Use red zone. | ||
3224 | | .else | ||
3225 | | .define expscratch, dword [esp+4] // Needs 4 byte scratch area. | ||
3226 | | .endif | ||
3227 | | fst expscratch // Caveat: overwrites ARG1. | ||
3228 | | cmp expscratch, 0x7f800000; je >1 // Special case: e^+Inf = +Inf | ||
3229 | | cmp expscratch, 0xff800000; je >2 // Special case: e^-Inf = 0 | ||
3230 | |->vm_exp2raw: // Entry point for vm_pow. Without +-Inf check. | ||
3231 | | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part. | ||
3232 | | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int | ||
3233 | |1: | ||
3234 | | ret | ||
3235 | |2: | ||
3236 | | fpop; fldz; ret | ||
3237 | | | ||
3238 | |// Generic power function x^y. Called by BC_POW, math.pow fast function, | ||
3239 | |// and vm_arith. | ||
3240 | |// Args/ret on x87 stack (y on top). RC (eax) modified. | ||
3241 | |// Caveat: needs 3 slots on x87 stack! | ||
3242 | |->vm_pow: | ||
3243 | |.if not SSE | ||
3244 | | fist dword [esp+4] // Store/reload int before comparison. | ||
3245 | | fild dword [esp+4] // Integral exponent used in vm_powi. | ||
3246 | | fucomip st1 | ||
3247 | | jnz >8 // Branch for FP exponents. | ||
3248 | | jp >9 // Branch for NaN exponent. | ||
3249 | | fpop // Pop y and fallthrough to vm_powi. | ||
3250 | | | ||
3251 | |// FP/int power function x^i. Arg1/ret on x87 stack. | ||
3252 | |// Arg2 (int) on C stack. RC (eax) modified. | ||
3253 | |// Caveat: needs 2 slots on x87 stack! | ||
3254 | | mov eax, [esp+4] | ||
3255 | | cmp eax, 1; jle >6 // i<=1? | ||
3256 | | // Now 1 < (unsigned)i <= 0x80000000. | ||
3257 | |1: // Handle leading zeros. | ||
3258 | | test eax, 1; jnz >2 | ||
3259 | | fmul st0 | ||
3260 | | shr eax, 1 | ||
3261 | | jmp <1 | ||
3262 | |2: | ||
3263 | | shr eax, 1; jz >5 | ||
3264 | | fdup | ||
3265 | |3: // Handle trailing bits. | ||
3266 | | fmul st0 | ||
3267 | | shr eax, 1; jz >4 | ||
3268 | | jnc <3 | ||
3269 | | fmul st1, st0 | ||
3270 | | jmp <3 | ||
3271 | |4: | ||
3272 | | fmulp st1 | ||
3273 | |5: | ||
3274 | | ret | ||
3275 | |6: | ||
3276 | | je <5 // x^1 ==> x | ||
3277 | | jb >7 | ||
3278 | | fld1; fdivrp st1 | ||
3279 | | neg eax | ||
3280 | | cmp eax, 1; je <5 // x^-1 ==> 1/x | ||
3281 | | jmp <1 // x^-i ==> (1/x)^i | ||
3282 | |7: | ||
3283 | | fpop; fld1 // x^0 ==> 1 | ||
3284 | | ret | ||
3285 | | | ||
3286 | |8: // FP/FP power function x^y. | ||
3287 | | fst dword [esp+4] | ||
3288 | | fxch | ||
3289 | | fst dword [esp+8] | ||
3290 | | mov eax, [esp+4]; shl eax, 1 | ||
3291 | | cmp eax, 0xff000000; je >2 // x^+-Inf? | ||
3292 | | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y? | ||
3293 | | cmp eax, 0xff000000; je >4 // +-Inf^y? | ||
3294 | | fyl2x | ||
3295 | | jmp ->vm_exp2raw | ||
3296 | | | ||
3297 | |9: // Handle x^NaN. | ||
3298 | | fld1 | ||
3299 | | fucomip st2 | ||
3300 | | je >1 // 1^NaN ==> 1 | ||
3301 | | fxch // x^NaN ==> NaN | ||
3302 | |1: | ||
3303 | | fpop | ||
3304 | | ret | ||
3305 | | | ||
3306 | |2: // Handle x^+-Inf. | ||
3307 | | fabs | ||
3308 | | fld1 | ||
3309 | | fucomip st1 | ||
3310 | | je >3 // +-1^+-Inf ==> 1 | ||
3311 | | fpop; fabs; fldz; mov eax, 0; setc al | ||
3312 | | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0 | ||
3313 | | fxch | ||
3314 | |3: | ||
3315 | | fpop1; fabs | ||
3316 | | ret | ||
3317 | | | ||
3318 | |4: // Handle +-0^y or +-Inf^y. | ||
3319 | | cmp dword [esp+4], 0; jge <3 // y >= 0, x^y ==> |x| | ||
3320 | | fpop; fpop | ||
3321 | | test eax, eax; jz >5 // y < 0, +-0^y ==> +Inf | ||
3322 | | fldz // y < 0, +-Inf^y ==> 0 | ||
3323 | | ret | ||
3324 | |5: | ||
3325 | | mov dword [esp+4], 0x7f800000 // Return +Inf. | ||
3326 | | fld dword [esp+4] | ||
3327 | | ret | ||
3328 | |.endif | ||
3329 | | | ||
3330 | |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified. | ||
3331 | |// Needs 16 byte scratch area for x86. Also called from JIT code. | ||
3332 | |->vm_pow_sse: | ||
3333 | | cvtsd2si eax, xmm1 | ||
3334 | | cvtsi2sd xmm2, eax | ||
3335 | | ucomisd xmm1, xmm2 | ||
3336 | | jnz >8 // Branch for FP exponents. | ||
3337 | | jp >9 // Branch for NaN exponent. | ||
3338 | | // Fallthrough to vm_powi_sse. | ||
3339 | | | 3054 | | |
3340 | |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. | 3055 | |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. |
3341 | |->vm_powi_sse: | 3056 | |->vm_powi_sse: |
@@ -3372,287 +3087,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
3372 | | sseconst_1 xmm0, RDa | 3087 | | sseconst_1 xmm0, RDa |
3373 | | ret | 3088 | | ret |
3374 | | | 3089 | | |
3375 | |8: // FP/FP power function x^y. | ||
3376 | |.if X64 | ||
3377 | | movd rax, xmm1; shl rax, 1 | ||
3378 | | rol rax, 12; cmp rax, 0xffe; je >2 // x^+-Inf? | ||
3379 | | movd rax, xmm0; shl rax, 1; je >4 // +-0^y? | ||
3380 | | rol rax, 12; cmp rax, 0xffe; je >5 // +-Inf^y? | ||
3381 | | .if X64WIN | ||
3382 | | movsd qword [rsp+16], xmm1 // Use scratch area. | ||
3383 | | movsd qword [rsp+8], xmm0 | ||
3384 | | fld qword [rsp+16] | ||
3385 | | fld qword [rsp+8] | ||
3386 | | .else | ||
3387 | | movsd qword [rsp-16], xmm1 // Use red zone. | ||
3388 | | movsd qword [rsp-8], xmm0 | ||
3389 | | fld qword [rsp-16] | ||
3390 | | fld qword [rsp-8] | ||
3391 | | .endif | ||
3392 | |.else | ||
3393 | | movsd qword [esp+12], xmm1 // Needs 16 byte scratch area. | ||
3394 | | movsd qword [esp+4], xmm0 | ||
3395 | | cmp dword [esp+12], 0; jne >1 | ||
3396 | | mov eax, [esp+16]; shl eax, 1 | ||
3397 | | cmp eax, 0xffe00000; je >2 // x^+-Inf? | ||
3398 | |1: | ||
3399 | | cmp dword [esp+4], 0; jne >1 | ||
3400 | | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y? | ||
3401 | | cmp eax, 0xffe00000; je >5 // +-Inf^y? | ||
3402 | |1: | ||
3403 | | fld qword [esp+12] | ||
3404 | | fld qword [esp+4] | ||
3405 | |.endif | ||
3406 | | fyl2x // y*log2(x) | ||
3407 | | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part. | ||
3408 | | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int | ||
3409 | |.if X64WIN | ||
3410 | | fstp qword [rsp+8] // Use scratch area. | ||
3411 | | movsd xmm0, qword [rsp+8] | ||
3412 | |.elif X64 | ||
3413 | | fstp qword [rsp-8] // Use red zone. | ||
3414 | | movsd xmm0, qword [rsp-8] | ||
3415 | |.else | ||
3416 | | fstp qword [esp+4] // Needs 8 byte scratch area. | ||
3417 | | movsd xmm0, qword [esp+4] | ||
3418 | |.endif | ||
3419 | | ret | ||
3420 | | | ||
3421 | |9: // Handle x^NaN. | ||
3422 | | sseconst_1 xmm2, RDa | ||
3423 | | ucomisd xmm0, xmm2; je >1 // 1^NaN ==> 1 | ||
3424 | | movaps xmm0, xmm1 // x^NaN ==> NaN | ||
3425 | |1: | ||
3426 | | ret | ||
3427 | | | ||
3428 | |2: // Handle x^+-Inf. | ||
3429 | | sseconst_abs xmm2, RDa | ||
3430 | | andpd xmm0, xmm2 // |x| | ||
3431 | | sseconst_1 xmm2, RDa | ||
3432 | | ucomisd xmm0, xmm2; je <1 // +-1^+-Inf ==> 1 | ||
3433 | | movmskpd eax, xmm1 | ||
3434 | | xorps xmm0, xmm0 | ||
3435 | | mov ah, al; setc al; xor al, ah; jne <1 // |x|<>1, x^+-Inf ==> +Inf/0 | ||
3436 | |3: | ||
3437 | | sseconst_hi xmm0, RDa, 7ff00000 // +Inf | ||
3438 | | ret | ||
3439 | | | ||
3440 | |4: // Handle +-0^y. | ||
3441 | | movmskpd eax, xmm1; test eax, eax; jnz <3 // y < 0, +-0^y ==> +Inf | ||
3442 | | xorps xmm0, xmm0 // y >= 0, +-0^y ==> 0 | ||
3443 | | ret | ||
3444 | | | ||
3445 | |5: // Handle +-Inf^y. | ||
3446 | | movmskpd eax, xmm1; test eax, eax; jz <3 // y >= 0, +-Inf^y ==> +Inf | ||
3447 | | xorps xmm0, xmm0 // y < 0, +-Inf^y ==> 0 | ||
3448 | | ret | ||
3449 | | | ||
3450 | |// Callable from C: double lj_vm_foldfpm(double x, int fpm) | ||
3451 | |// Computes fpm(x) for extended math functions. ORDER FPM. | ||
3452 | |->vm_foldfpm: | ||
3453 | |.if JIT | ||
3454 | |.if X64 | ||
3455 | | .if X64WIN | ||
3456 | | .define fpmop, CARG2d | ||
3457 | | .else | ||
3458 | | .define fpmop, CARG1d | ||
3459 | | .endif | ||
3460 | | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil | ||
3461 | | cmp fpmop, 3; jb ->vm_trunc; ja >2 | ||
3462 | | sqrtsd xmm0, xmm0; ret | ||
3463 | |2: | ||
3464 | | .if X64WIN | ||
3465 | | movsd qword [rsp+8], xmm0 // Use scratch area. | ||
3466 | | fld qword [rsp+8] | ||
3467 | | .else | ||
3468 | | movsd qword [rsp-8], xmm0 // Use red zone. | ||
3469 | | fld qword [rsp-8] | ||
3470 | | .endif | ||
3471 | | cmp fpmop, 5; ja >2 | ||
3472 | | .if X64WIN; pop rax; .endif | ||
3473 | | je >1 | ||
3474 | | call ->vm_exp_x87 | ||
3475 | | .if X64WIN; push rax; .endif | ||
3476 | | jmp >7 | ||
3477 | |1: | ||
3478 | | call ->vm_exp2_x87 | ||
3479 | | .if X64WIN; push rax; .endif | ||
3480 | | jmp >7 | ||
3481 | |2: ; cmp fpmop, 7; je >1; ja >2 | ||
3482 | | fldln2; fxch; fyl2x; jmp >7 | ||
3483 | |1: ; fld1; fxch; fyl2x; jmp >7 | ||
3484 | |2: ; cmp fpmop, 9; je >1; ja >2 | ||
3485 | | fldlg2; fxch; fyl2x; jmp >7 | ||
3486 | |1: ; fsin; jmp >7 | ||
3487 | |2: ; cmp fpmop, 11; je >1; ja >9 | ||
3488 | | fcos; jmp >7 | ||
3489 | |1: ; fptan; fpop | ||
3490 | |7: | ||
3491 | | .if X64WIN | ||
3492 | | fstp qword [rsp+8] // Use scratch area. | ||
3493 | | movsd xmm0, qword [rsp+8] | ||
3494 | | .else | ||
3495 | | fstp qword [rsp-8] // Use red zone. | ||
3496 | | movsd xmm0, qword [rsp-8] | ||
3497 | | .endif | ||
3498 | | ret | ||
3499 | |.else // x86 calling convention. | ||
3500 | | .define fpmop, eax | ||
3501 | |.if SSE | ||
3502 | | mov fpmop, [esp+12] | ||
3503 | | movsd xmm0, qword [esp+4] | ||
3504 | | cmp fpmop, 1; je >1; ja >2 | ||
3505 | | call ->vm_floor; jmp >7 | ||
3506 | |1: ; call ->vm_ceil; jmp >7 | ||
3507 | |2: ; cmp fpmop, 3; je >1; ja >2 | ||
3508 | | call ->vm_trunc; jmp >7 | ||
3509 | |1: | ||
3510 | | sqrtsd xmm0, xmm0 | ||
3511 | |7: | ||
3512 | | movsd qword [esp+4], xmm0 // Overwrite callee-owned args. | ||
3513 | | fld qword [esp+4] | ||
3514 | | ret | ||
3515 | |2: ; fld qword [esp+4] | ||
3516 | | cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87 | ||
3517 | |2: ; cmp fpmop, 7; je >1; ja >2 | ||
3518 | | fldln2; fxch; fyl2x; ret | ||
3519 | |1: ; fld1; fxch; fyl2x; ret | ||
3520 | |2: ; cmp fpmop, 9; je >1; ja >2 | ||
3521 | | fldlg2; fxch; fyl2x; ret | ||
3522 | |1: ; fsin; ret | ||
3523 | |2: ; cmp fpmop, 11; je >1; ja >9 | ||
3524 | | fcos; ret | ||
3525 | |1: ; fptan; fpop; ret | ||
3526 | |.else | ||
3527 | | mov fpmop, [esp+12] | ||
3528 | | fld qword [esp+4] | ||
3529 | | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil | ||
3530 | | cmp fpmop, 3; jb ->vm_trunc; ja >2 | ||
3531 | | fsqrt; ret | ||
3532 | |2: ; cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87 | ||
3533 | | cmp fpmop, 7; je >1; ja >2 | ||
3534 | | fldln2; fxch; fyl2x; ret | ||
3535 | |1: ; fld1; fxch; fyl2x; ret | ||
3536 | |2: ; cmp fpmop, 9; je >1; ja >2 | ||
3537 | | fldlg2; fxch; fyl2x; ret | ||
3538 | |1: ; fsin; ret | ||
3539 | |2: ; cmp fpmop, 11; je >1; ja >9 | ||
3540 | | fcos; ret | ||
3541 | |1: ; fptan; fpop; ret | ||
3542 | |.endif | ||
3543 | |.endif | ||
3544 | |9: ; int3 // Bad fpm. | ||
3545 | |.endif | ||
3546 | | | ||
3547 | |// Callable from C: double lj_vm_foldarith(double x, double y, int op) | ||
3548 | |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -) | ||
3549 | |// and basic math functions. ORDER ARITH | ||
3550 | |->vm_foldarith: | ||
3551 | |.if X64 | ||
3552 | | | ||
3553 | | .if X64WIN | ||
3554 | | .define foldop, CARG3d | ||
3555 | | .else | ||
3556 | | .define foldop, CARG1d | ||
3557 | | .endif | ||
3558 | | cmp foldop, 1; je >1; ja >2 | ||
3559 | | addsd xmm0, xmm1; ret | ||
3560 | |1: ; subsd xmm0, xmm1; ret | ||
3561 | |2: ; cmp foldop, 3; je >1; ja >2 | ||
3562 | | mulsd xmm0, xmm1; ret | ||
3563 | |1: ; divsd xmm0, xmm1; ret | ||
3564 | |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow | ||
3565 | | cmp foldop, 7; je >1; ja >2 | ||
3566 | | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret | ||
3567 | |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret | ||
3568 | |2: ; cmp foldop, 9; ja >2 | ||
3569 | |.if X64WIN | ||
3570 | | movsd qword [rsp+8], xmm0 // Use scratch area. | ||
3571 | | movsd qword [rsp+16], xmm1 | ||
3572 | | fld qword [rsp+8] | ||
3573 | | fld qword [rsp+16] | ||
3574 | |.else | ||
3575 | | movsd qword [rsp-8], xmm0 // Use red zone. | ||
3576 | | movsd qword [rsp-16], xmm1 | ||
3577 | | fld qword [rsp-8] | ||
3578 | | fld qword [rsp-16] | ||
3579 | |.endif | ||
3580 | | je >1 | ||
3581 | | fpatan | ||
3582 | |7: | ||
3583 | |.if X64WIN | ||
3584 | | fstp qword [rsp+8] // Use scratch area. | ||
3585 | | movsd xmm0, qword [rsp+8] | ||
3586 | |.else | ||
3587 | | fstp qword [rsp-8] // Use red zone. | ||
3588 | | movsd xmm0, qword [rsp-8] | ||
3589 | |.endif | ||
3590 | | ret | ||
3591 | |1: ; fxch; fscale; fpop1; jmp <7 | ||
3592 | |2: ; cmp foldop, 11; je >1; ja >9 | ||
3593 | | minsd xmm0, xmm1; ret | ||
3594 | |1: ; maxsd xmm0, xmm1; ret | ||
3595 | |9: ; int3 // Bad op. | ||
3596 | | | ||
3597 | |.elif SSE // x86 calling convention with SSE ops. | ||
3598 | | | ||
3599 | | .define foldop, eax | ||
3600 | | mov foldop, [esp+20] | ||
3601 | | movsd xmm0, qword [esp+4] | ||
3602 | | movsd xmm1, qword [esp+12] | ||
3603 | | cmp foldop, 1; je >1; ja >2 | ||
3604 | | addsd xmm0, xmm1 | ||
3605 | |7: | ||
3606 | | movsd qword [esp+4], xmm0 // Overwrite callee-owned args. | ||
3607 | | fld qword [esp+4] | ||
3608 | | ret | ||
3609 | |1: ; subsd xmm0, xmm1; jmp <7 | ||
3610 | |2: ; cmp foldop, 3; je >1; ja >2 | ||
3611 | | mulsd xmm0, xmm1; jmp <7 | ||
3612 | |1: ; divsd xmm0, xmm1; jmp <7 | ||
3613 | |2: ; cmp foldop, 5 | ||
3614 | | je >1; ja >2 | ||
3615 | | call ->vm_mod; jmp <7 | ||
3616 | |1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area. | ||
3617 | |2: ; cmp foldop, 7; je >1; ja >2 | ||
3618 | | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7 | ||
3619 | |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7 | ||
3620 | |2: ; cmp foldop, 9; ja >2 | ||
3621 | | fld qword [esp+4] // Reload from stack | ||
3622 | | fld qword [esp+12] | ||
3623 | | je >1 | ||
3624 | | fpatan; ret | ||
3625 | |1: ; fxch; fscale; fpop1; ret | ||
3626 | |2: ; cmp foldop, 11; je >1; ja >9 | ||
3627 | | minsd xmm0, xmm1; jmp <7 | ||
3628 | |1: ; maxsd xmm0, xmm1; jmp <7 | ||
3629 | |9: ; int3 // Bad op. | ||
3630 | | | ||
3631 | |.else // x86 calling convention with x87 ops. | ||
3632 | | | ||
3633 | | mov eax, [esp+20] | ||
3634 | | fld qword [esp+4] | ||
3635 | | fld qword [esp+12] | ||
3636 | | cmp eax, 1; je >1; ja >2 | ||
3637 | | faddp st1; ret | ||
3638 | |1: ; fsubp st1; ret | ||
3639 | |2: ; cmp eax, 3; je >1; ja >2 | ||
3640 | | fmulp st1; ret | ||
3641 | |1: ; fdivp st1; ret | ||
3642 | |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow | ||
3643 | | cmp eax, 7; je >1; ja >2 | ||
3644 | | fpop; fchs; ret | ||
3645 | |1: ; fpop; fabs; ret | ||
3646 | |2: ; cmp eax, 9; je >1; ja >2 | ||
3647 | | fpatan; ret | ||
3648 | |1: ; fxch; fscale; fpop1; ret | ||
3649 | |2: ; cmp eax, 11; je >1; ja >9 | ||
3650 | | fucomi st1; fcmovnbe st1; fpop1; ret | ||
3651 | |1: ; fucomi st1; fcmovbe st1; fpop1; ret | ||
3652 | |9: ; int3 // Bad op. | ||
3653 | | | ||
3654 | |.endif | ||
3655 | | | ||
3656 | |//----------------------------------------------------------------------- | 3090 | |//----------------------------------------------------------------------- |
3657 | |//-- Miscellaneous functions -------------------------------------------- | 3091 | |//-- Miscellaneous functions -------------------------------------------- |
3658 | |//----------------------------------------------------------------------- | 3092 | |//----------------------------------------------------------------------- |
@@ -3663,6 +3097,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
3663 | | mov eax, CARG1d | 3097 | | mov eax, CARG1d |
3664 | | .if X64WIN; push rsi; mov rsi, CARG2; .endif | 3098 | | .if X64WIN; push rsi; mov rsi, CARG2; .endif |
3665 | | push rbx | 3099 | | push rbx |
3100 | | xor ecx, ecx | ||
3666 | | cpuid | 3101 | | cpuid |
3667 | | mov [rsi], eax | 3102 | | mov [rsi], eax |
3668 | | mov [rsi+4], ebx | 3103 | | mov [rsi+4], ebx |
@@ -3686,6 +3121,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
3686 | | mov eax, [esp+4] // Argument 1 is function number. | 3121 | | mov eax, [esp+4] // Argument 1 is function number. |
3687 | | push edi | 3122 | | push edi |
3688 | | push ebx | 3123 | | push ebx |
3124 | | xor ecx, ecx | ||
3689 | | cpuid | 3125 | | cpuid |
3690 | | mov edi, [esp+16] // Argument 2 is result area. | 3126 | | mov edi, [esp+16] // Argument 2 is result area. |
3691 | | mov [edi], eax | 3127 | | mov [edi], eax |
@@ -3963,19 +3399,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3963 | | // RA is a number. | 3399 | | // RA is a number. |
3964 | | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp | 3400 | | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp |
3965 | | // RA is a number, RD is an integer. | 3401 | | // RA is a number, RD is an integer. |
3966 | |.if SSE | ||
3967 | | cvtsi2sd xmm0, dword [BASE+RD*8] | 3402 | | cvtsi2sd xmm0, dword [BASE+RD*8] |
3968 | | jmp >2 | 3403 | | jmp >2 |
3969 | |.else | ||
3970 | | fld qword [BASE+RA*8] | ||
3971 | | fild dword [BASE+RD*8] | ||
3972 | | jmp >3 | ||
3973 | |.endif | ||
3974 | | | 3404 | | |
3975 | |8: // RA is an integer, RD is not an integer. | 3405 | |8: // RA is an integer, RD is not an integer. |
3976 | | ja ->vmeta_comp | 3406 | | ja ->vmeta_comp |
3977 | | // RA is an integer, RD is a number. | 3407 | | // RA is an integer, RD is a number. |
3978 | |.if SSE | ||
3979 | | cvtsi2sd xmm1, dword [BASE+RA*8] | 3408 | | cvtsi2sd xmm1, dword [BASE+RA*8] |
3980 | | movsd xmm0, qword [BASE+RD*8] | 3409 | | movsd xmm0, qword [BASE+RD*8] |
3981 | | add PC, 4 | 3410 | | add PC, 4 |
@@ -3983,29 +3412,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3983 | | jmp_comp jbe, ja, jb, jae, <9 | 3412 | | jmp_comp jbe, ja, jb, jae, <9 |
3984 | | jmp <6 | 3413 | | jmp <6 |
3985 | |.else | 3414 | |.else |
3986 | | fild dword [BASE+RA*8] | ||
3987 | | jmp >2 | ||
3988 | |.endif | ||
3989 | |.else | ||
3990 | | checknum RA, ->vmeta_comp | 3415 | | checknum RA, ->vmeta_comp |
3991 | | checknum RD, ->vmeta_comp | 3416 | | checknum RD, ->vmeta_comp |
3992 | |.endif | 3417 | |.endif |
3993 | |.if SSE | ||
3994 | |1: | 3418 | |1: |
3995 | | movsd xmm0, qword [BASE+RD*8] | 3419 | | movsd xmm0, qword [BASE+RD*8] |
3996 | |2: | 3420 | |2: |
3997 | | add PC, 4 | 3421 | | add PC, 4 |
3998 | | ucomisd xmm0, qword [BASE+RA*8] | 3422 | | ucomisd xmm0, qword [BASE+RA*8] |
3999 | |3: | 3423 | |3: |
4000 | |.else | ||
4001 | |1: | ||
4002 | | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A. | ||
4003 | |2: | ||
4004 | | fld qword [BASE+RD*8] | ||
4005 | |3: | ||
4006 | | add PC, 4 | ||
4007 | | fcomparepp | ||
4008 | |.endif | ||
4009 | | // Unordered: all of ZF CF PF set, ordered: PF clear. | 3424 | | // Unordered: all of ZF CF PF set, ordered: PF clear. |
4010 | | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. | 3425 | | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. |
4011 | |.if DUALNUM | 3426 | |.if DUALNUM |
@@ -4045,43 +3460,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4045 | | // RD is a number. | 3460 | | // RD is a number. |
4046 | | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 | 3461 | | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 |
4047 | | // RD is a number, RA is an integer. | 3462 | | // RD is a number, RA is an integer. |
4048 | |.if SSE | ||
4049 | | cvtsi2sd xmm0, dword [BASE+RA*8] | 3463 | | cvtsi2sd xmm0, dword [BASE+RA*8] |
4050 | |.else | ||
4051 | | fild dword [BASE+RA*8] | ||
4052 | |.endif | ||
4053 | | jmp >2 | 3464 | | jmp >2 |
4054 | | | 3465 | | |
4055 | |8: // RD is an integer, RA is not an integer. | 3466 | |8: // RD is an integer, RA is not an integer. |
4056 | | ja >5 | 3467 | | ja >5 |
4057 | | // RD is an integer, RA is a number. | 3468 | | // RD is an integer, RA is a number. |
4058 | |.if SSE | ||
4059 | | cvtsi2sd xmm0, dword [BASE+RD*8] | 3469 | | cvtsi2sd xmm0, dword [BASE+RD*8] |
4060 | | ucomisd xmm0, qword [BASE+RA*8] | 3470 | | ucomisd xmm0, qword [BASE+RA*8] |
4061 | |.else | ||
4062 | | fild dword [BASE+RD*8] | ||
4063 | | fld qword [BASE+RA*8] | ||
4064 | |.endif | ||
4065 | | jmp >4 | 3471 | | jmp >4 |
4066 | | | 3472 | | |
4067 | |.else | 3473 | |.else |
4068 | | cmp RB, LJ_TISNUM; jae >5 | 3474 | | cmp RB, LJ_TISNUM; jae >5 |
4069 | | checknum RA, >5 | 3475 | | checknum RA, >5 |
4070 | |.endif | 3476 | |.endif |
4071 | |.if SSE | ||
4072 | |1: | 3477 | |1: |
4073 | | movsd xmm0, qword [BASE+RA*8] | 3478 | | movsd xmm0, qword [BASE+RA*8] |
4074 | |2: | 3479 | |2: |
4075 | | ucomisd xmm0, qword [BASE+RD*8] | 3480 | | ucomisd xmm0, qword [BASE+RD*8] |
4076 | |4: | 3481 | |4: |
4077 | |.else | ||
4078 | |1: | ||
4079 | | fld qword [BASE+RA*8] | ||
4080 | |2: | ||
4081 | | fld qword [BASE+RD*8] | ||
4082 | |4: | ||
4083 | | fcomparepp | ||
4084 | |.endif | ||
4085 | iseqne_fp: | 3482 | iseqne_fp: |
4086 | if (vk) { | 3483 | if (vk) { |
4087 | | jp >2 // Unordered means not equal. | 3484 | | jp >2 // Unordered means not equal. |
@@ -4204,39 +3601,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4204 | | // RA is a number. | 3601 | | // RA is a number. |
4205 | | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 | 3602 | | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 |
4206 | | // RA is a number, RD is an integer. | 3603 | | // RA is a number, RD is an integer. |
4207 | |.if SSE | ||
4208 | | cvtsi2sd xmm0, dword [KBASE+RD*8] | 3604 | | cvtsi2sd xmm0, dword [KBASE+RD*8] |
4209 | |.else | ||
4210 | | fild dword [KBASE+RD*8] | ||
4211 | |.endif | ||
4212 | | jmp >2 | 3605 | | jmp >2 |
4213 | | | 3606 | | |
4214 | |8: // RA is an integer, RD is a number. | 3607 | |8: // RA is an integer, RD is a number. |
4215 | |.if SSE | ||
4216 | | cvtsi2sd xmm0, dword [BASE+RA*8] | 3608 | | cvtsi2sd xmm0, dword [BASE+RA*8] |
4217 | | ucomisd xmm0, qword [KBASE+RD*8] | 3609 | | ucomisd xmm0, qword [KBASE+RD*8] |
4218 | |.else | ||
4219 | | fild dword [BASE+RA*8] | ||
4220 | | fld qword [KBASE+RD*8] | ||
4221 | |.endif | ||
4222 | | jmp >4 | 3610 | | jmp >4 |
4223 | |.else | 3611 | |.else |
4224 | | cmp RB, LJ_TISNUM; jae >3 | 3612 | | cmp RB, LJ_TISNUM; jae >3 |
4225 | |.endif | 3613 | |.endif |
4226 | |.if SSE | ||
4227 | |1: | 3614 | |1: |
4228 | | movsd xmm0, qword [KBASE+RD*8] | 3615 | | movsd xmm0, qword [KBASE+RD*8] |
4229 | |2: | 3616 | |2: |
4230 | | ucomisd xmm0, qword [BASE+RA*8] | 3617 | | ucomisd xmm0, qword [BASE+RA*8] |
4231 | |4: | 3618 | |4: |
4232 | |.else | ||
4233 | |1: | ||
4234 | | fld qword [KBASE+RD*8] | ||
4235 | |2: | ||
4236 | | fld qword [BASE+RA*8] | ||
4237 | |4: | ||
4238 | | fcomparepp | ||
4239 | |.endif | ||
4240 | goto iseqne_fp; | 3619 | goto iseqne_fp; |
4241 | case BC_ISEQP: case BC_ISNEP: | 3620 | case BC_ISEQP: case BC_ISNEP: |
4242 | vk = op == BC_ISEQP; | 3621 | vk = op == BC_ISEQP; |
@@ -4287,6 +3666,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4287 | | ins_next | 3666 | | ins_next |
4288 | break; | 3667 | break; |
4289 | 3668 | ||
3669 | case BC_ISTYPE: | ||
3670 | | ins_AD // RA = src, RD = -type | ||
3671 | | add RD, [BASE+RA*8+4] | ||
3672 | | jne ->vmeta_istype | ||
3673 | | ins_next | ||
3674 | break; | ||
3675 | case BC_ISNUM: | ||
3676 | | ins_AD // RA = src, RD = -(TISNUM-1) | ||
3677 | | checknum RA, ->vmeta_istype | ||
3678 | | ins_next | ||
3679 | break; | ||
3680 | |||
4290 | /* -- Unary ops --------------------------------------------------------- */ | 3681 | /* -- Unary ops --------------------------------------------------------- */ |
4291 | 3682 | ||
4292 | case BC_MOV: | 3683 | case BC_MOV: |
@@ -4330,16 +3721,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4330 | |.else | 3721 | |.else |
4331 | | checknum RD, ->vmeta_unm | 3722 | | checknum RD, ->vmeta_unm |
4332 | |.endif | 3723 | |.endif |
4333 | |.if SSE | ||
4334 | | movsd xmm0, qword [BASE+RD*8] | 3724 | | movsd xmm0, qword [BASE+RD*8] |
4335 | | sseconst_sign xmm1, RDa | 3725 | | sseconst_sign xmm1, RDa |
4336 | | xorps xmm0, xmm1 | 3726 | | xorps xmm0, xmm1 |
4337 | | movsd qword [BASE+RA*8], xmm0 | 3727 | | movsd qword [BASE+RA*8], xmm0 |
4338 | |.else | ||
4339 | | fld qword [BASE+RD*8] | ||
4340 | | fchs | ||
4341 | | fstp qword [BASE+RA*8] | ||
4342 | |.endif | ||
4343 | |.if DUALNUM | 3728 | |.if DUALNUM |
4344 | | jmp <9 | 3729 | | jmp <9 |
4345 | |.else | 3730 | |.else |
@@ -4355,15 +3740,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4355 | |1: | 3740 | |1: |
4356 | | mov dword [BASE+RA*8+4], LJ_TISNUM | 3741 | | mov dword [BASE+RA*8+4], LJ_TISNUM |
4357 | | mov dword [BASE+RA*8], RD | 3742 | | mov dword [BASE+RA*8], RD |
4358 | |.elif SSE | 3743 | |.else |
4359 | | xorps xmm0, xmm0 | 3744 | | xorps xmm0, xmm0 |
4360 | | cvtsi2sd xmm0, dword STR:RD->len | 3745 | | cvtsi2sd xmm0, dword STR:RD->len |
4361 | |1: | 3746 | |1: |
4362 | | movsd qword [BASE+RA*8], xmm0 | 3747 | | movsd qword [BASE+RA*8], xmm0 |
4363 | |.else | ||
4364 | | fild dword STR:RD->len | ||
4365 | |1: | ||
4366 | | fstp qword [BASE+RA*8] | ||
4367 | |.endif | 3748 | |.endif |
4368 | | ins_next | 3749 | | ins_next |
4369 | |2: | 3750 | |2: |
@@ -4381,11 +3762,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4381 | | // Length of table returned in eax (RD). | 3762 | | // Length of table returned in eax (RD). |
4382 | |.if DUALNUM | 3763 | |.if DUALNUM |
4383 | | // Nothing to do. | 3764 | | // Nothing to do. |
4384 | |.elif SSE | ||
4385 | | cvtsi2sd xmm0, RD | ||
4386 | |.else | 3765 | |.else |
4387 | | mov ARG1, RD | 3766 | | cvtsi2sd xmm0, RD |
4388 | | fild ARG1 | ||
4389 | |.endif | 3767 | |.endif |
4390 | | mov BASE, RB // Restore BASE. | 3768 | | mov BASE, RB // Restore BASE. |
4391 | | movzx RA, PC_RA | 3769 | | movzx RA, PC_RA |
@@ -4400,7 +3778,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4400 | 3778 | ||
4401 | /* -- Binary ops -------------------------------------------------------- */ | 3779 | /* -- Binary ops -------------------------------------------------------- */ |
4402 | 3780 | ||
4403 | |.macro ins_arithpre, x87ins, sseins, ssereg | 3781 | |.macro ins_arithpre, sseins, ssereg |
4404 | | ins_ABC | 3782 | | ins_ABC |
4405 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); | 3783 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); |
4406 | ||switch (vk) { | 3784 | ||switch (vk) { |
@@ -4409,37 +3787,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4409 | | .if DUALNUM | 3787 | | .if DUALNUM |
4410 | | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn | 3788 | | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn |
4411 | | .endif | 3789 | | .endif |
4412 | | .if SSE | 3790 | | movsd xmm0, qword [BASE+RB*8] |
4413 | | movsd xmm0, qword [BASE+RB*8] | 3791 | | sseins ssereg, qword [KBASE+RC*8] |
4414 | | sseins ssereg, qword [KBASE+RC*8] | ||
4415 | | .else | ||
4416 | | fld qword [BASE+RB*8] | ||
4417 | | x87ins qword [KBASE+RC*8] | ||
4418 | | .endif | ||
4419 | || break; | 3792 | || break; |
4420 | ||case 1: | 3793 | ||case 1: |
4421 | | checknum RB, ->vmeta_arith_nv | 3794 | | checknum RB, ->vmeta_arith_nv |
4422 | | .if DUALNUM | 3795 | | .if DUALNUM |
4423 | | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv | 3796 | | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv |
4424 | | .endif | 3797 | | .endif |
4425 | | .if SSE | 3798 | | movsd xmm0, qword [KBASE+RC*8] |
4426 | | movsd xmm0, qword [KBASE+RC*8] | 3799 | | sseins ssereg, qword [BASE+RB*8] |
4427 | | sseins ssereg, qword [BASE+RB*8] | ||
4428 | | .else | ||
4429 | | fld qword [KBASE+RC*8] | ||
4430 | | x87ins qword [BASE+RB*8] | ||
4431 | | .endif | ||
4432 | || break; | 3800 | || break; |
4433 | ||default: | 3801 | ||default: |
4434 | | checknum RB, ->vmeta_arith_vv | 3802 | | checknum RB, ->vmeta_arith_vv |
4435 | | checknum RC, ->vmeta_arith_vv | 3803 | | checknum RC, ->vmeta_arith_vv |
4436 | | .if SSE | 3804 | | movsd xmm0, qword [BASE+RB*8] |
4437 | | movsd xmm0, qword [BASE+RB*8] | 3805 | | sseins ssereg, qword [BASE+RC*8] |
4438 | | sseins ssereg, qword [BASE+RC*8] | ||
4439 | | .else | ||
4440 | | fld qword [BASE+RB*8] | ||
4441 | | x87ins qword [BASE+RC*8] | ||
4442 | | .endif | ||
4443 | || break; | 3806 | || break; |
4444 | ||} | 3807 | ||} |
4445 | |.endmacro | 3808 | |.endmacro |
@@ -4477,55 +3840,62 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4477 | |.endmacro | 3840 | |.endmacro |
4478 | | | 3841 | | |
4479 | |.macro ins_arithpost | 3842 | |.macro ins_arithpost |
4480 | |.if SSE | ||
4481 | | movsd qword [BASE+RA*8], xmm0 | 3843 | | movsd qword [BASE+RA*8], xmm0 |
4482 | |.else | ||
4483 | | fstp qword [BASE+RA*8] | ||
4484 | |.endif | ||
4485 | |.endmacro | 3844 | |.endmacro |
4486 | | | 3845 | | |
4487 | |.macro ins_arith, x87ins, sseins | 3846 | |.macro ins_arith, sseins |
4488 | | ins_arithpre x87ins, sseins, xmm0 | 3847 | | ins_arithpre sseins, xmm0 |
4489 | | ins_arithpost | 3848 | | ins_arithpost |
4490 | | ins_next | 3849 | | ins_next |
4491 | |.endmacro | 3850 | |.endmacro |
4492 | | | 3851 | | |
4493 | |.macro ins_arith, intins, x87ins, sseins | 3852 | |.macro ins_arith, intins, sseins |
4494 | |.if DUALNUM | 3853 | |.if DUALNUM |
4495 | | ins_arithdn intins | 3854 | | ins_arithdn intins |
4496 | |.else | 3855 | |.else |
4497 | | ins_arith, x87ins, sseins | 3856 | | ins_arith, sseins |
4498 | |.endif | 3857 | |.endif |
4499 | |.endmacro | 3858 | |.endmacro |
4500 | 3859 | ||
4501 | | // RA = dst, RB = src1 or num const, RC = src2 or num const | 3860 | | // RA = dst, RB = src1 or num const, RC = src2 or num const |
4502 | case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: | 3861 | case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: |
4503 | | ins_arith add, fadd, addsd | 3862 | | ins_arith add, addsd |
4504 | break; | 3863 | break; |
4505 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: | 3864 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: |
4506 | | ins_arith sub, fsub, subsd | 3865 | | ins_arith sub, subsd |
4507 | break; | 3866 | break; |
4508 | case BC_MULVN: case BC_MULNV: case BC_MULVV: | 3867 | case BC_MULVN: case BC_MULNV: case BC_MULVV: |
4509 | | ins_arith imul, fmul, mulsd | 3868 | | ins_arith imul, mulsd |
4510 | break; | 3869 | break; |
4511 | case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: | 3870 | case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: |
4512 | | ins_arith fdiv, divsd | 3871 | | ins_arith divsd |
4513 | break; | 3872 | break; |
4514 | case BC_MODVN: | 3873 | case BC_MODVN: |
4515 | | ins_arithpre fld, movsd, xmm1 | 3874 | | ins_arithpre movsd, xmm1 |
4516 | |->BC_MODVN_Z: | 3875 | |->BC_MODVN_Z: |
4517 | | call ->vm_mod | 3876 | | call ->vm_mod |
4518 | | ins_arithpost | 3877 | | ins_arithpost |
4519 | | ins_next | 3878 | | ins_next |
4520 | break; | 3879 | break; |
4521 | case BC_MODNV: case BC_MODVV: | 3880 | case BC_MODNV: case BC_MODVV: |
4522 | | ins_arithpre fld, movsd, xmm1 | 3881 | | ins_arithpre movsd, xmm1 |
4523 | | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. | 3882 | | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. |
4524 | break; | 3883 | break; |
4525 | case BC_POW: | 3884 | case BC_POW: |
4526 | | ins_arithpre fld, movsd, xmm1 | 3885 | | ins_arithpre movsd, xmm1 |
4527 | | call ->vm_pow | 3886 | | mov RB, BASE |
3887 | |.if not X64 | ||
3888 | | movsd FPARG1, xmm0 | ||
3889 | | movsd FPARG3, xmm1 | ||
3890 | |.endif | ||
3891 | | call extern pow | ||
3892 | | movzx RA, PC_RA | ||
3893 | | mov BASE, RB | ||
3894 | |.if X64 | ||
4528 | | ins_arithpost | 3895 | | ins_arithpost |
3896 | |.else | ||
3897 | | fstp qword [BASE+RA*8] | ||
3898 | |.endif | ||
4529 | | ins_next | 3899 | | ins_next |
4530 | break; | 3900 | break; |
4531 | 3901 | ||
@@ -4593,25 +3963,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4593 | | movsx RD, RDW | 3963 | | movsx RD, RDW |
4594 | | mov dword [BASE+RA*8+4], LJ_TISNUM | 3964 | | mov dword [BASE+RA*8+4], LJ_TISNUM |
4595 | | mov dword [BASE+RA*8], RD | 3965 | | mov dword [BASE+RA*8], RD |
4596 | |.elif SSE | 3966 | |.else |
4597 | | movsx RD, RDW // Sign-extend literal. | 3967 | | movsx RD, RDW // Sign-extend literal. |
4598 | | cvtsi2sd xmm0, RD | 3968 | | cvtsi2sd xmm0, RD |
4599 | | movsd qword [BASE+RA*8], xmm0 | 3969 | | movsd qword [BASE+RA*8], xmm0 |
4600 | |.else | ||
4601 | | fild PC_RD // Refetch signed RD from instruction. | ||
4602 | | fstp qword [BASE+RA*8] | ||
4603 | |.endif | 3970 | |.endif |
4604 | | ins_next | 3971 | | ins_next |
4605 | break; | 3972 | break; |
4606 | case BC_KNUM: | 3973 | case BC_KNUM: |
4607 | | ins_AD // RA = dst, RD = num const | 3974 | | ins_AD // RA = dst, RD = num const |
4608 | |.if SSE | ||
4609 | | movsd xmm0, qword [KBASE+RD*8] | 3975 | | movsd xmm0, qword [KBASE+RD*8] |
4610 | | movsd qword [BASE+RA*8], xmm0 | 3976 | | movsd qword [BASE+RA*8], xmm0 |
4611 | |.else | ||
4612 | | fld qword [KBASE+RD*8] | ||
4613 | | fstp qword [BASE+RA*8] | ||
4614 | |.endif | ||
4615 | | ins_next | 3977 | | ins_next |
4616 | break; | 3978 | break; |
4617 | case BC_KPRI: | 3979 | case BC_KPRI: |
@@ -4718,18 +4080,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4718 | case BC_USETN: | 4080 | case BC_USETN: |
4719 | | ins_AD // RA = upvalue #, RD = num const | 4081 | | ins_AD // RA = upvalue #, RD = num const |
4720 | | mov LFUNC:RB, [BASE-8] | 4082 | | mov LFUNC:RB, [BASE-8] |
4721 | |.if SSE | ||
4722 | | movsd xmm0, qword [KBASE+RD*8] | 4083 | | movsd xmm0, qword [KBASE+RD*8] |
4723 | |.else | ||
4724 | | fld qword [KBASE+RD*8] | ||
4725 | |.endif | ||
4726 | | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] | 4084 | | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] |
4727 | | mov RA, UPVAL:RB->v | 4085 | | mov RA, UPVAL:RB->v |
4728 | |.if SSE | ||
4729 | | movsd qword [RA], xmm0 | 4086 | | movsd qword [RA], xmm0 |
4730 | |.else | ||
4731 | | fstp qword [RA] | ||
4732 | |.endif | ||
4733 | | ins_next | 4087 | | ins_next |
4734 | break; | 4088 | break; |
4735 | case BC_USETP: | 4089 | case BC_USETP: |
@@ -4883,18 +4237,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4883 | |.else | 4237 | |.else |
4884 | | // Convert number to int and back and compare. | 4238 | | // Convert number to int and back and compare. |
4885 | | checknum RC, >5 | 4239 | | checknum RC, >5 |
4886 | |.if SSE | ||
4887 | | movsd xmm0, qword [BASE+RC*8] | 4240 | | movsd xmm0, qword [BASE+RC*8] |
4888 | | cvtsd2si RC, xmm0 | 4241 | | cvttsd2si RC, xmm0 |
4889 | | cvtsi2sd xmm1, RC | 4242 | | cvtsi2sd xmm1, RC |
4890 | | ucomisd xmm0, xmm1 | 4243 | | ucomisd xmm0, xmm1 |
4891 | |.else | ||
4892 | | fld qword [BASE+RC*8] | ||
4893 | | fist ARG1 | ||
4894 | | fild ARG1 | ||
4895 | | fcomparepp | ||
4896 | | mov RC, ARG1 | ||
4897 | |.endif | ||
4898 | | jne ->vmeta_tgetv // Generic numeric key? Use fallback. | 4244 | | jne ->vmeta_tgetv // Generic numeric key? Use fallback. |
4899 | |.endif | 4245 | |.endif |
4900 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. | 4246 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. |
@@ -5018,6 +4364,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5018 | | mov dword [BASE+RA*8+4], LJ_TNIL | 4364 | | mov dword [BASE+RA*8+4], LJ_TNIL |
5019 | | jmp <1 | 4365 | | jmp <1 |
5020 | break; | 4366 | break; |
4367 | case BC_TGETR: | ||
4368 | | ins_ABC // RA = dst, RB = table, RC = key | ||
4369 | | mov TAB:RB, [BASE+RB*8] | ||
4370 | |.if DUALNUM | ||
4371 | | mov RC, dword [BASE+RC*8] | ||
4372 | |.else | ||
4373 | | cvttsd2si RC, qword [BASE+RC*8] | ||
4374 | |.endif | ||
4375 | | cmp RC, TAB:RB->asize | ||
4376 | | jae ->vmeta_tgetr // Not in array part? Use fallback. | ||
4377 | | shl RC, 3 | ||
4378 | | add RC, TAB:RB->array | ||
4379 | | // Get array slot. | ||
4380 | |->BC_TGETR_Z: | ||
4381 | |.if X64 | ||
4382 | | mov RBa, [RC] | ||
4383 | | mov [BASE+RA*8], RBa | ||
4384 | |.else | ||
4385 | | mov RB, [RC] | ||
4386 | | mov RC, [RC+4] | ||
4387 | | mov [BASE+RA*8], RB | ||
4388 | | mov [BASE+RA*8+4], RC | ||
4389 | |.endif | ||
4390 | |->BC_TGETR2_Z: | ||
4391 | | ins_next | ||
4392 | break; | ||
5021 | 4393 | ||
5022 | case BC_TSETV: | 4394 | case BC_TSETV: |
5023 | | ins_ABC // RA = src, RB = table, RC = key | 4395 | | ins_ABC // RA = src, RB = table, RC = key |
@@ -5031,18 +4403,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5031 | |.else | 4403 | |.else |
5032 | | // Convert number to int and back and compare. | 4404 | | // Convert number to int and back and compare. |
5033 | | checknum RC, >5 | 4405 | | checknum RC, >5 |
5034 | |.if SSE | ||
5035 | | movsd xmm0, qword [BASE+RC*8] | 4406 | | movsd xmm0, qword [BASE+RC*8] |
5036 | | cvtsd2si RC, xmm0 | 4407 | | cvttsd2si RC, xmm0 |
5037 | | cvtsi2sd xmm1, RC | 4408 | | cvtsi2sd xmm1, RC |
5038 | | ucomisd xmm0, xmm1 | 4409 | | ucomisd xmm0, xmm1 |
5039 | |.else | ||
5040 | | fld qword [BASE+RC*8] | ||
5041 | | fist ARG1 | ||
5042 | | fild ARG1 | ||
5043 | | fcomparepp | ||
5044 | | mov RC, ARG1 | ||
5045 | |.endif | ||
5046 | | jne ->vmeta_tsetv // Generic numeric key? Use fallback. | 4410 | | jne ->vmeta_tsetv // Generic numeric key? Use fallback. |
5047 | |.endif | 4411 | |.endif |
5048 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. | 4412 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. |
@@ -5212,6 +4576,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5212 | | movzx RA, PC_RA // Restore RA. | 4576 | | movzx RA, PC_RA // Restore RA. |
5213 | | jmp <2 | 4577 | | jmp <2 |
5214 | break; | 4578 | break; |
4579 | case BC_TSETR: | ||
4580 | | ins_ABC // RA = src, RB = table, RC = key | ||
4581 | | mov TAB:RB, [BASE+RB*8] | ||
4582 | |.if DUALNUM | ||
4583 | | mov RC, dword [BASE+RC*8] | ||
4584 | |.else | ||
4585 | | cvttsd2si RC, qword [BASE+RC*8] | ||
4586 | |.endif | ||
4587 | | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) | ||
4588 | | jnz >7 | ||
4589 | |2: | ||
4590 | | cmp RC, TAB:RB->asize | ||
4591 | | jae ->vmeta_tsetr | ||
4592 | | shl RC, 3 | ||
4593 | | add RC, TAB:RB->array | ||
4594 | | // Set array slot. | ||
4595 | |->BC_TSETR_Z: | ||
4596 | |.if X64 | ||
4597 | | mov RBa, [BASE+RA*8] | ||
4598 | | mov [RC], RBa | ||
4599 | |.else | ||
4600 | | mov RB, [BASE+RA*8+4] | ||
4601 | | mov RA, [BASE+RA*8] | ||
4602 | | mov [RC+4], RB | ||
4603 | | mov [RC], RA | ||
4604 | |.endif | ||
4605 | | ins_next | ||
4606 | | | ||
4607 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
4608 | | barrierback TAB:RB, RA | ||
4609 | | movzx RA, PC_RA // Restore RA. | ||
4610 | | jmp <2 | ||
4611 | break; | ||
5215 | 4612 | ||
5216 | case BC_TSETM: | 4613 | case BC_TSETM: |
5217 | | ins_AD // RA = base (table at base-1), RD = num const (start index) | 4614 | | ins_AD // RA = base (table at base-1), RD = num const (start index) |
@@ -5405,10 +4802,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5405 | |.if DUALNUM | 4802 | |.if DUALNUM |
5406 | | mov dword [BASE+RA*8+4], LJ_TISNUM | 4803 | | mov dword [BASE+RA*8+4], LJ_TISNUM |
5407 | | mov dword [BASE+RA*8], RC | 4804 | | mov dword [BASE+RA*8], RC |
5408 | |.elif SSE | ||
5409 | | cvtsi2sd xmm0, RC | ||
5410 | |.else | 4805 | |.else |
5411 | | fild dword [BASE+RA*8-8] | 4806 | | cvtsi2sd xmm0, RC |
5412 | |.endif | 4807 | |.endif |
5413 | | // Copy array slot to returned value. | 4808 | | // Copy array slot to returned value. |
5414 | |.if X64 | 4809 | |.if X64 |
@@ -5424,10 +4819,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5424 | | // Return array index as a numeric key. | 4819 | | // Return array index as a numeric key. |
5425 | |.if DUALNUM | 4820 | |.if DUALNUM |
5426 | | // See above. | 4821 | | // See above. |
5427 | |.elif SSE | ||
5428 | | movsd qword [BASE+RA*8], xmm0 | ||
5429 | |.else | 4822 | |.else |
5430 | | fstp qword [BASE+RA*8] | 4823 | | movsd qword [BASE+RA*8], xmm0 |
5431 | |.endif | 4824 | |.endif |
5432 | | mov [BASE+RA*8-8], RC // Update control var. | 4825 | | mov [BASE+RA*8-8], RC // Update control var. |
5433 | |2: | 4826 | |2: |
@@ -5440,9 +4833,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5440 | | | 4833 | | |
5441 | |4: // Skip holes in array part. | 4834 | |4: // Skip holes in array part. |
5442 | | add RC, 1 | 4835 | | add RC, 1 |
5443 | |.if not (DUALNUM or SSE) | ||
5444 | | mov [BASE+RA*8-8], RC | ||
5445 | |.endif | ||
5446 | | jmp <1 | 4836 | | jmp <1 |
5447 | | | 4837 | | |
5448 | |5: // Traverse hash part. | 4838 | |5: // Traverse hash part. |
@@ -5776,7 +5166,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5776 | if (!vk) { | 5166 | if (!vk) { |
5777 | | cmp RB, LJ_TISNUM; jae ->vmeta_for | 5167 | | cmp RB, LJ_TISNUM; jae ->vmeta_for |
5778 | } | 5168 | } |
5779 | |.if SSE | ||
5780 | | movsd xmm0, qword FOR_IDX | 5169 | | movsd xmm0, qword FOR_IDX |
5781 | | movsd xmm1, qword FOR_STOP | 5170 | | movsd xmm1, qword FOR_STOP |
5782 | if (vk) { | 5171 | if (vk) { |
@@ -5789,22 +5178,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5789 | | ucomisd xmm1, xmm0 | 5178 | | ucomisd xmm1, xmm0 |
5790 | |1: | 5179 | |1: |
5791 | | movsd qword FOR_EXT, xmm0 | 5180 | | movsd qword FOR_EXT, xmm0 |
5792 | |.else | ||
5793 | | fld qword FOR_STOP | ||
5794 | | fld qword FOR_IDX | ||
5795 | if (vk) { | ||
5796 | | fadd qword FOR_STEP // nidx = idx + step | ||
5797 | | fst qword FOR_IDX | ||
5798 | | fst qword FOR_EXT | ||
5799 | | test RB, RB; js >1 | ||
5800 | } else { | ||
5801 | | fst qword FOR_EXT | ||
5802 | | jl >1 | ||
5803 | } | ||
5804 | | fxch // Swap lim/(n)idx if step non-negative. | ||
5805 | |1: | ||
5806 | | fcomparepp | ||
5807 | |.endif | ||
5808 | if (op == BC_FORI) { | 5181 | if (op == BC_FORI) { |
5809 | |.if DUALNUM | 5182 | |.if DUALNUM |
5810 | | jnb <7 | 5183 | | jnb <7 |
@@ -5832,11 +5205,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5832 | |2: | 5205 | |2: |
5833 | | ins_next | 5206 | | ins_next |
5834 | |.endif | 5207 | |.endif |
5835 | |.if SSE | 5208 | | |
5836 | |3: // Invert comparison if step is negative. | 5209 | |3: // Invert comparison if step is negative. |
5837 | | ucomisd xmm0, xmm1 | 5210 | | ucomisd xmm0, xmm1 |
5838 | | jmp <1 | 5211 | | jmp <1 |
5839 | |.endif | ||
5840 | break; | 5212 | break; |
5841 | 5213 | ||
5842 | case BC_ITERL: | 5214 | case BC_ITERL: |
@@ -5874,7 +5246,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5874 | | ins_A // RA = base, RD = target (loop extent) | 5246 | | ins_A // RA = base, RD = target (loop extent) |
5875 | | // Note: RA/RD is only used by trace recorder to determine scope/extent | 5247 | | // Note: RA/RD is only used by trace recorder to determine scope/extent |
5876 | | // This opcode does NOT jump, it's only purpose is to detect a hot loop. | 5248 | | // This opcode does NOT jump, it's only purpose is to detect a hot loop. |
5877 | |.if JIT | 5249 | |.if JIT |
5878 | | hotloop RB | 5250 | | hotloop RB |
5879 | |.endif | 5251 | |.endif |
5880 | | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. | 5252 | | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. |
@@ -5893,7 +5265,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5893 | | mov RDa, TRACE:RD->mcode | 5265 | | mov RDa, TRACE:RD->mcode |
5894 | | mov L:RB, SAVE_L | 5266 | | mov L:RB, SAVE_L |
5895 | | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE | 5267 | | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE |
5896 | | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB | 5268 | | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB |
5897 | | // Save additional callee-save registers only used in compiled code. | 5269 | | // Save additional callee-save registers only used in compiled code. |
5898 | |.if X64WIN | 5270 | |.if X64WIN |
5899 | | mov TMPQ, r12 | 5271 | | mov TMPQ, r12 |
@@ -6060,9 +5432,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
6060 | | // (lua_State *L, lua_CFunction f) | 5432 | | // (lua_State *L, lua_CFunction f) |
6061 | | call aword [DISPATCH+DISPATCH_GL(wrapf)] | 5433 | | call aword [DISPATCH+DISPATCH_GL(wrapf)] |
6062 | } | 5434 | } |
6063 | | set_vmstate INTERP | ||
6064 | | // nresults returned in eax (RD). | 5435 | | // nresults returned in eax (RD). |
6065 | | mov BASE, L:RB->base | 5436 | | mov BASE, L:RB->base |
5437 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | ||
5438 | | set_vmstate INTERP | ||
6066 | | lea RA, [BASE+RD*8] | 5439 | | lea RA, [BASE+RD*8] |
6067 | | neg RA | 5440 | | neg RA |
6068 | | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 | 5441 | | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 |
@@ -6382,15 +5755,21 @@ static void emit_asm_debug(BuildCtx *ctx) | |||
6382 | "LEFDEY:\n\n", fcsize); | 5755 | "LEFDEY:\n\n", fcsize); |
6383 | } | 5756 | } |
6384 | #endif | 5757 | #endif |
6385 | #if LJ_64 | 5758 | #if !LJ_64 |
6386 | fprintf(ctx->fp, "\t.subsections_via_symbols\n"); | ||
6387 | #else | ||
6388 | fprintf(ctx->fp, | 5759 | fprintf(ctx->fp, |
6389 | "\t.non_lazy_symbol_pointer\n" | 5760 | "\t.non_lazy_symbol_pointer\n" |
6390 | "L_lj_err_unwind_dwarf$non_lazy_ptr:\n" | 5761 | "L_lj_err_unwind_dwarf$non_lazy_ptr:\n" |
6391 | ".indirect_symbol _lj_err_unwind_dwarf\n" | 5762 | ".indirect_symbol _lj_err_unwind_dwarf\n" |
6392 | ".long 0\n"); | 5763 | ".long 0\n\n"); |
5764 | fprintf(ctx->fp, "\t.section __IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5\n"); | ||
5765 | { | ||
5766 | const char *const *xn; | ||
5767 | for (xn = ctx->extnames; *xn; xn++) | ||
5768 | if (strncmp(*xn, LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) | ||
5769 | fprintf(ctx->fp, "L_%s$stub:\n\t.indirect_symbol _%s\n\t.ascii \"\\364\\364\\364\\364\\364\"\n", *xn, *xn); | ||
5770 | } | ||
6393 | #endif | 5771 | #endif |
5772 | fprintf(ctx->fp, ".subsections_via_symbols\n"); | ||
6394 | } | 5773 | } |
6395 | break; | 5774 | break; |
6396 | #endif | 5775 | #endif |
diff --git a/src/xb1build.bat b/src/xb1build.bat new file mode 100644 index 00000000..847e84a5 --- /dev/null +++ b/src/xb1build.bat | |||
@@ -0,0 +1,101 @@ | |||
1 | @rem Script to build LuaJIT with the Xbox One SDK. | ||
2 | @rem Donated to the public domain. | ||
3 | @rem | ||
4 | @rem Open a "Visual Studio .NET Command Prompt" (64 bit host compiler) | ||
5 | @rem Then cd to this directory and run this script. | ||
6 | |||
7 | @if not defined INCLUDE goto :FAIL | ||
8 | @if not defined DurangoXDK goto :FAIL | ||
9 | |||
10 | @setlocal | ||
11 | @echo ---- Host compiler ---- | ||
12 | @set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE /DLUAJIT_ENABLE_GC64 | ||
13 | @set LJLINK=link /nologo | ||
14 | @set LJMT=mt /nologo | ||
15 | @set DASMDIR=..\dynasm | ||
16 | @set DASM=%DASMDIR%\dynasm.lua | ||
17 | @set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c | ||
18 | |||
19 | %LJCOMPILE% host\minilua.c | ||
20 | @if errorlevel 1 goto :BAD | ||
21 | %LJLINK% /out:minilua.exe minilua.obj | ||
22 | @if errorlevel 1 goto :BAD | ||
23 | if exist minilua.exe.manifest^ | ||
24 | %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe | ||
25 | |||
26 | @rem Error out for 64 bit host compiler | ||
27 | @minilua | ||
28 | @if not errorlevel 8 goto :FAIL | ||
29 | |||
30 | @set DASMFLAGS=-D WIN -D FFI -D P64 | ||
31 | minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x64.dasc | ||
32 | @if errorlevel 1 goto :BAD | ||
33 | |||
34 | %LJCOMPILE% /I "." /I %DASMDIR% /D_DURANGO host\buildvm*.c | ||
35 | @if errorlevel 1 goto :BAD | ||
36 | %LJLINK% /out:buildvm.exe buildvm*.obj | ||
37 | @if errorlevel 1 goto :BAD | ||
38 | if exist buildvm.exe.manifest^ | ||
39 | %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe | ||
40 | |||
41 | buildvm -m peobj -o lj_vm.obj | ||
42 | @if errorlevel 1 goto :BAD | ||
43 | buildvm -m bcdef -o lj_bcdef.h %ALL_LIB% | ||
44 | @if errorlevel 1 goto :BAD | ||
45 | buildvm -m ffdef -o lj_ffdef.h %ALL_LIB% | ||
46 | @if errorlevel 1 goto :BAD | ||
47 | buildvm -m libdef -o lj_libdef.h %ALL_LIB% | ||
48 | @if errorlevel 1 goto :BAD | ||
49 | buildvm -m recdef -o lj_recdef.h %ALL_LIB% | ||
50 | @if errorlevel 1 goto :BAD | ||
51 | buildvm -m vmdef -o jit\vmdef.lua %ALL_LIB% | ||
52 | @if errorlevel 1 goto :BAD | ||
53 | buildvm -m folddef -o lj_folddef.h lj_opt_fold.c | ||
54 | @if errorlevel 1 goto :BAD | ||
55 | |||
56 | @echo ---- Cross compiler ---- | ||
57 | |||
58 | @set CWD=%cd% | ||
59 | @call "%DurangoXDK%\xdk\DurangoVars.cmd" XDK | ||
60 | @cd /D "%CWD%" | ||
61 | @shift | ||
62 | |||
63 | @set LJCOMPILE="cl" /nologo /c /W3 /GF /Gm- /GR- /GS- /Gy /openmp- /D_CRT_SECURE_NO_DEPRECATE /D_LIB /D_UNICODE /D_DURANGO | ||
64 | @set LJLIB="lib" /nologo | ||
65 | |||
66 | @if "%1"=="debug" ( | ||
67 | @shift | ||
68 | @set LJCOMPILE=%LJCOMPILE% /Zi /MDd /Od | ||
69 | @set LJLINK=%LJLINK% /debug | ||
70 | ) else ( | ||
71 | @set LJCOMPILE=%LJCOMPILE% /MD /O2 /DNDEBUG | ||
72 | ) | ||
73 | |||
74 | @if "%1"=="amalg" goto :AMALG | ||
75 | %LJCOMPILE% /DLUA_BUILD_AS_DLL lj_*.c lib_*.c | ||
76 | @if errorlevel 1 goto :BAD | ||
77 | %LJLIB% /OUT:luajit.lib lj_*.obj lib_*.obj | ||
78 | @if errorlevel 1 goto :BAD | ||
79 | @goto :NOAMALG | ||
80 | :AMALG | ||
81 | %LJCOMPILE% /DLUA_BUILD_AS_DLL ljamalg.c | ||
82 | @if errorlevel 1 goto :BAD | ||
83 | %LJLIB% /OUT:luajit.lib ljamalg.obj lj_vm.obj | ||
84 | @if errorlevel 1 goto :BAD | ||
85 | :NOAMALG | ||
86 | |||
87 | @del *.obj *.manifest minilua.exe buildvm.exe | ||
88 | @echo. | ||
89 | @echo === Successfully built LuaJIT for Xbox One === | ||
90 | |||
91 | @goto :END | ||
92 | :BAD | ||
93 | @echo. | ||
94 | @echo ******************************************************* | ||
95 | @echo *** Build FAILED -- Please check the error messages *** | ||
96 | @echo ******************************************************* | ||
97 | @goto :END | ||
98 | :FAIL | ||
99 | @echo To run this script you must open a "Visual Studio .NET Command Prompt" | ||
100 | @echo (64 bit host compiler). The Xbox One SDK must be installed, too. | ||
101 | :END | ||