aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--Makefile19
-rw-r--r--README4
-rw-r--r--etc/luajit.pc6
-rw-r--r--src/Makefile17
-rw-r--r--src/Makefile.dep22
-rw-r--r--src/host/buildvm_lib.c59
-rw-r--r--src/host/buildvm_libbc.h23
-rw-r--r--src/host/genlibbc.lua197
-rw-r--r--src/jit/bc.lua2
-rw-r--r--src/jit/bcsave.lua2
-rw-r--r--src/jit/dump.lua5
-rw-r--r--src/jit/v.lua2
-rw-r--r--src/lib_jit.c24
-rw-r--r--src/lib_math.c7
-rw-r--r--src/lib_table.c68
-rw-r--r--src/lj_arch.h1
-rw-r--r--src/lj_asm.c2
-rw-r--r--src/lj_asm_mips.h2
-rw-r--r--src/lj_asm_x86.h30
-rw-r--r--src/lj_bc.h4
-rw-r--r--src/lj_bcdump.h3
-rw-r--r--src/lj_bcread.c34
-rw-r--r--src/lj_crecord.c11
-rw-r--r--src/lj_debug.c17
-rw-r--r--src/lj_debug.h2
-rw-r--r--src/lj_dispatch.h12
-rw-r--r--src/lj_emit_x86.h10
-rw-r--r--src/lj_err.c2
-rw-r--r--src/lj_ffrecord.c16
-rw-r--r--src/lj_ir.h1
-rw-r--r--src/lj_jit.h17
-rw-r--r--src/lj_lib.c27
-rw-r--r--src/lj_lib.h4
-rw-r--r--src/lj_meta.c13
-rw-r--r--src/lj_meta.h1
-rw-r--r--src/lj_obj.h4
-rw-r--r--src/lj_opt_fold.c27
-rw-r--r--src/lj_opt_narrow.c3
-rw-r--r--src/lj_record.c16
-rw-r--r--src/lj_tab.h2
-rw-r--r--src/lj_target_arm.h4
-rw-r--r--src/lj_target_x86.h2
-rw-r--r--src/lj_vm.h4
-rw-r--r--src/luaconf.h4
-rw-r--r--src/luajit.h6
-rw-r--r--src/msvcbuild.bat1
-rw-r--r--src/vm_arm.dasc117
-rw-r--r--src/vm_mips.dasc138
-rw-r--r--src/vm_ppc.dasc123
-rw-r--r--src/vm_ppcspe.dasc6
-rw-r--r--src/vm_x86.dasc845
51 files changed, 1034 insertions, 934 deletions
diff --git a/Makefile b/Makefile
index 7976b925..40c22434 100644
--- a/Makefile
+++ b/Makefile
@@ -14,9 +14,10 @@
14############################################################################## 14##############################################################################
15 15
16MAJVER= 2 16MAJVER= 2
17MINVER= 0 17MINVER= 1
18RELVER= 1 18RELVER= 0
19VERSION= $(MAJVER).$(MINVER).$(RELVER) 19PREREL= -alpha
20VERSION= $(MAJVER).$(MINVER).$(RELVER)$(PREREL)
20ABIVER= 5.1 21ABIVER= 5.1
21 22
22############################################################################## 23##############################################################################
@@ -107,7 +108,7 @@ install: $(INSTALL_DEP)
107 $(MKDIR) $(INSTALL_DIRS) 108 $(MKDIR) $(INSTALL_DIRS)
108 cd src && $(INSTALL_X) $(FILE_T) $(INSTALL_T) 109 cd src && $(INSTALL_X) $(FILE_T) $(INSTALL_T)
109 cd src && test -f $(FILE_A) && $(INSTALL_F) $(FILE_A) $(INSTALL_STATIC) || : 110 cd src && test -f $(FILE_A) && $(INSTALL_F) $(FILE_A) $(INSTALL_STATIC) || :
110 $(RM) $(INSTALL_TSYM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) 111 $(RM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2)
111 cd src && test -f $(FILE_SO) && \ 112 cd src && test -f $(FILE_SO) && \
112 $(INSTALL_X) $(FILE_SO) $(INSTALL_DYN) && \ 113 $(INSTALL_X) $(FILE_SO) $(INSTALL_DYN) && \
113 $(LDCONFIG) $(INSTALL_LIB) && \ 114 $(LDCONFIG) $(INSTALL_LIB) && \
@@ -119,12 +120,18 @@ install: $(INSTALL_DEP)
119 $(RM) $(FILE_PC).tmp 120 $(RM) $(FILE_PC).tmp
120 cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC) 121 cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC)
121 cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB) 122 cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB)
122 $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)
123 @echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ====" 123 @echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ===="
124 @echo ""
125 @echo "Note: the development releases deliberately do NOT install a symlink for luajit"
126 @echo "You can do this now by running this command (with sudo):"
127 @echo ""
128 @echo " $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)"
129 @echo ""
130
124 131
125uninstall: 132uninstall:
126 @echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ====" 133 @echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ===="
127 $(UNINSTALL) $(INSTALL_TSYM) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC) 134 $(UNINSTALL) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC)
128 for file in $(FILES_JITLIB); do \ 135 for file in $(FILES_JITLIB); do \
129 $(UNINSTALL) $(INSTALL_JITLIB)/$$file; \ 136 $(UNINSTALL) $(INSTALL_JITLIB)/$$file; \
130 done 137 done
diff --git a/README b/README
index e68604b7..f9ba16b9 100644
--- a/README
+++ b/README
@@ -1,5 +1,5 @@
1README for LuaJIT 2.0.1 1README for LuaJIT 2.1.0-alpha
2----------------------- 2-----------------------------
3 3
4LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language. 4LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language.
5 5
diff --git a/etc/luajit.pc b/etc/luajit.pc
index 2ce6e94c..d3c7d9a2 100644
--- a/etc/luajit.pc
+++ b/etc/luajit.pc
@@ -1,8 +1,8 @@
1# Package information for LuaJIT to be used by pkg-config. 1# Package information for LuaJIT to be used by pkg-config.
2majver=2 2majver=2
3minver=0 3minver=1
4relver=1 4relver=0
5version=${majver}.${minver}.${relver} 5version=${majver}.${minver}.${relver}-alpha
6abiver=5.1 6abiver=5.1
7 7
8prefix=/usr/local 8prefix=/usr/local
diff --git a/src/Makefile b/src/Makefile
index 278324a1..95671792 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -42,13 +42,10 @@ CCOPT= -O2 -fomit-frame-pointer
42# 42#
43# Target-specific compiler options: 43# Target-specific compiler options:
44# 44#
45# x86 only: it's recommended to compile at least for i686. Better yet,
46# compile for an architecture that has SSE2, too (-msse -msse2).
47#
48# x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute 45# x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute
49# the binaries to a different machine you could also use: -march=native 46# the binaries to a different machine you could also use: -march=native
50# 47#
51CCOPT_x86= -march=i686 48CCOPT_x86= -march=i686 -msse -msse2 -mfpmath=sse
52CCOPT_x64= 49CCOPT_x64=
53CCOPT_arm= 50CCOPT_arm=
54CCOPT_ppc= 51CCOPT_ppc=
@@ -394,11 +391,6 @@ DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subs
394ifeq (Windows,$(TARGET_SYS)) 391ifeq (Windows,$(TARGET_SYS))
395 DASM_AFLAGS+= -D WIN 392 DASM_AFLAGS+= -D WIN
396endif 393endif
397ifeq (x86,$(TARGET_LJARCH))
398 ifneq (,$(findstring __SSE2__ 1,$(TARGET_TESTARCH)))
399 DASM_AFLAGS+= -D SSE
400 endif
401else
402ifeq (x64,$(TARGET_LJARCH)) 394ifeq (x64,$(TARGET_LJARCH))
403 DASM_ARCH= x86 395 DASM_ARCH= x86
404else 396else
@@ -423,7 +415,6 @@ ifeq (ppc,$(TARGET_LJARCH))
423endif 415endif
424endif 416endif
425endif 417endif
426endif
427 418
428DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS) 419DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS)
429DASM_DASC= vm_$(DASM_ARCH).dasc 420DASM_DASC= vm_$(DASM_ARCH).dasc
@@ -567,6 +558,10 @@ amalg:
567clean: 558clean:
568 $(HOST_RM) $(ALL_RM) 559 $(HOST_RM) $(ALL_RM)
569 560
561libbc:
562 ./$(LUAJIT_T) host/genlibbc.lua -o host/buildvm_libbc.h $(LJLIB_C)
563 $(MAKE) all
564
570depend: 565depend:
571 @for file in $(ALL_HDRGEN); do \ 566 @for file in $(ALL_HDRGEN); do \
572 test -f $$file || touch $$file; \ 567 test -f $$file || touch $$file; \
@@ -581,7 +576,7 @@ depend:
581 test -s $$file || $(HOST_RM) $$file; \ 576 test -s $$file || $(HOST_RM) $$file; \
582 done 577 done
583 578
584.PHONY: default all amalg clean depend 579.PHONY: default all amalg clean libbc depend
585 580
586############################################################################## 581##############################################################################
587# Rules for generated files. 582# Rules for generated files.
diff --git a/src/Makefile.dep b/src/Makefile.dep
index 5d91723a..10118c5e 100644
--- a/src/Makefile.dep
+++ b/src/Makefile.dep
@@ -124,7 +124,8 @@ lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
124 lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h 124 lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h
125lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \ 125lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \
126 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \ 126 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \
127 lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_lib.h 127 lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_lex.h lj_bcdump.h \
128 lj_lib.h
128lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ 129lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
129 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_func.h lj_frame.h \ 130 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_func.h lj_frame.h \
130 lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h 131 lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h
@@ -133,7 +134,7 @@ lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
133 lj_traceerr.h lj_vm.h 134 lj_traceerr.h lj_vm.h
134lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 135lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
135 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ 136 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \
136 lj_vm.h lj_strscan.h 137 lj_vm.h lj_strscan.h lj_lib.h
137lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h 138lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
138lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 139lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
139 lj_ir.h lj_jit.h lj_iropt.h 140 lj_ir.h lj_jit.h lj_iropt.h
@@ -194,13 +195,13 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
194 lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_err.c \ 195 lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_err.c \
195 lj_debug.h lj_ff.h lj_ffdef.h lj_char.c lj_char.h lj_bc.c lj_bcdef.h \ 196 lj_debug.h lj_ff.h lj_ffdef.h lj_char.c lj_char.h lj_bc.c lj_bcdef.h \
196 lj_obj.c lj_str.c lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h \ 197 lj_obj.c lj_str.c lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h \
197 lj_debug.c lj_state.c lj_lex.h lj_alloc.h lj_dispatch.c lj_ccallback.h \ 198 lj_lib.h lj_debug.c lj_state.c lj_lex.h lj_alloc.h lj_dispatch.c \
198 luajit.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c lj_api.c \ 199 lj_ccallback.h luajit.h lj_vmevent.c lj_vmevent.h lj_vmmath.c \
199 lj_lex.c lualib.h lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h \ 200 lj_strscan.c lj_api.c lj_lex.c lualib.h lj_parse.h lj_parse.c \
200 lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c \ 201 lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c \
201 lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h lj_target_*.h \ 202 lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h \
202 lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h lj_cparse.c \ 203 lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h \
203 lj_cparse.h lj_lib.c lj_lib.h lj_ir.c lj_ircall.h lj_iropt.h \ 204 lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h lj_iropt.h \
204 lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c \ 205 lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c \
205 lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c \ 206 lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c \
206 lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \ 207 lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \
@@ -220,7 +221,8 @@ host/buildvm_asm.o: host/buildvm_asm.c host/buildvm.h lj_def.h lua.h luaconf.h \
220host/buildvm_fold.o: host/buildvm_fold.c host/buildvm.h lj_def.h lua.h \ 221host/buildvm_fold.o: host/buildvm_fold.c host/buildvm.h lj_def.h lua.h \
221 luaconf.h lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_ir.h lj_obj.h 222 luaconf.h lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_ir.h lj_obj.h
222host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \ 223host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \
223 lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_lib.h lj_obj.h 224 lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_bc.h lj_lib.h lj_obj.h \
225 host/buildvm_libbc.h
224host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \ 226host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \
225 luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h 227 luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h
226host/minilua.o: host/minilua.c 228host/minilua.o: host/minilua.c
diff --git a/src/host/buildvm_lib.c b/src/host/buildvm_lib.c
index 40141dfb..dcd3ca41 100644
--- a/src/host/buildvm_lib.c
+++ b/src/host/buildvm_lib.c
@@ -5,7 +5,9 @@
5 5
6#include "buildvm.h" 6#include "buildvm.h"
7#include "lj_obj.h" 7#include "lj_obj.h"
8#include "lj_bc.h"
8#include "lj_lib.h" 9#include "lj_lib.h"
10#include "buildvm_libbc.h"
9 11
10/* Context for library definitions. */ 12/* Context for library definitions. */
11static uint8_t obuf[8192]; 13static uint8_t obuf[8192];
@@ -151,6 +153,62 @@ static void libdef_func(BuildCtx *ctx, char *p, int arg)
151 regfunc = REGFUNC_OK; 153 regfunc = REGFUNC_OK;
152} 154}
153 155
156static uint8_t *libdef_uleb128(uint8_t *p, uint32_t *vv)
157{
158 uint32_t v = *p++;
159 if (v >= 0x80) {
160 int sh = 0; v &= 0x7f;
161 do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
162 }
163 *vv = v;
164 return p;
165}
166
167static void libdef_fixupbc(uint8_t *p)
168{
169 uint32_t i, sizebc;
170 p += 4;
171 p = libdef_uleb128(p, &sizebc);
172 p = libdef_uleb128(p, &sizebc);
173 p = libdef_uleb128(p, &sizebc);
174 for (i = 0; i < sizebc; i++, p += 4) {
175 uint8_t op = p[libbc_endian ? 3 : 0];
176 uint8_t ra = p[libbc_endian ? 2 : 1];
177 uint8_t rc = p[libbc_endian ? 1 : 2];
178 uint8_t rb = p[libbc_endian ? 0 : 3];
179 if (!LJ_DUALNUM && op == BC_ISTYPE && rc == ~LJ_TNUMX+1) {
180 op = BC_ISNUM; rc++;
181 }
182 p[LJ_ENDIAN_SELECT(0, 3)] = op;
183 p[LJ_ENDIAN_SELECT(1, 2)] = ra;
184 p[LJ_ENDIAN_SELECT(2, 1)] = rc;
185 p[LJ_ENDIAN_SELECT(3, 0)] = rb;
186 }
187}
188
189static void libdef_lua(BuildCtx *ctx, char *p, int arg)
190{
191 UNUSED(arg);
192 if (ctx->mode == BUILD_libdef) {
193 int i;
194 for (i = 0; libbc_map[i].name != NULL; i++) {
195 if (!strcmp(libbc_map[i].name, p)) {
196 int ofs = libbc_map[i].ofs;
197 int len = libbc_map[i+1].ofs - ofs;
198 obuf[2]++; /* Bump hash table size. */
199 *optr++ = LIBINIT_LUA;
200 libdef_name(p, 0);
201 memcpy(optr, libbc_code + ofs, len);
202 libdef_fixupbc(optr);
203 optr += len;
204 return;
205 }
206 }
207 fprintf(stderr, "Error: missing libbc definition for %s\n", p);
208 exit(1);
209 }
210}
211
154static uint32_t find_rec(char *name) 212static uint32_t find_rec(char *name)
155{ 213{
156 char *p = (char *)obuf; 214 char *p = (char *)obuf;
@@ -277,6 +335,7 @@ static const LibDefHandler libdef_handlers[] = {
277 { "CF(", ")", libdef_func, LIBINIT_CF }, 335 { "CF(", ")", libdef_func, LIBINIT_CF },
278 { "ASM(", ")", libdef_func, LIBINIT_ASM }, 336 { "ASM(", ")", libdef_func, LIBINIT_ASM },
279 { "ASM_(", ")", libdef_func, LIBINIT_ASM_ }, 337 { "ASM_(", ")", libdef_func, LIBINIT_ASM_ },
338 { "LUA(", ")", libdef_lua, 0 },
280 { "REC(", ")", libdef_rec, 0 }, 339 { "REC(", ")", libdef_rec, 0 },
281 { "PUSH(", ")", libdef_push, 0 }, 340 { "PUSH(", ")", libdef_push, 0 },
282 { "SET(", ")", libdef_set, 0 }, 341 { "SET(", ")", libdef_set, 0 },
diff --git a/src/host/buildvm_libbc.h b/src/host/buildvm_libbc.h
new file mode 100644
index 00000000..a71aa630
--- /dev/null
+++ b/src/host/buildvm_libbc.h
@@ -0,0 +1,23 @@
1/* This is a generated file. DO NOT EDIT! */
2
3static const int libbc_endian = 0;
4
5static const uint8_t libbc_code[] = {
60,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0,
70,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,2,9,0,0,0,15,
816,0,12,0,16,1,9,0,41,2,1,0,21,3,0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,7,5,0,
959,8,5,0,66,6,3,2,10,6,0,0,88,7,1,128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,10,
100,0,0,16,16,0,12,0,16,1,9,0,43,2,0,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,
1118,8,5,0,18,9,6,0,66,7,3,2,10,7,0,0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,
1275,0,1,0,0,1,2,0,0,0,3,16,0,12,0,21,1,0,0,76,1,2,0,0
13};
14
15static const struct { const char *name; int ofs; } libbc_map[] = {
16{"math_deg",0},
17{"math_rad",25},
18{"table_foreachi",50},
19{"table_foreach",117},
20{"table_getn",188},
21{NULL,207}
22};
23
diff --git a/src/host/genlibbc.lua b/src/host/genlibbc.lua
new file mode 100644
index 00000000..72c55d73
--- /dev/null
+++ b/src/host/genlibbc.lua
@@ -0,0 +1,197 @@
1----------------------------------------------------------------------------
2-- Lua script to dump the bytecode of the library functions written in Lua.
3-- The resulting 'buildvm_libbc.h' is used for the build process of LuaJIT.
4----------------------------------------------------------------------------
5-- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
6-- Released under the MIT license. See Copyright Notice in luajit.h
7----------------------------------------------------------------------------
8
9local ffi = require("ffi")
10local bit = require("bit")
11local vmdef = require("jit.vmdef")
12local bcnames = vmdef.bcnames
13
14local format = string.format
15
16local isbe = (string.byte(string.dump(function() end), 5) % 2 == 1)
17
18local function usage(arg)
19 io.stderr:write("Usage: ", arg and arg[0] or "genlibbc",
20 " [-o buildvm_libbc.h] lib_*.c\n")
21 os.exit(1)
22end
23
24local function parse_arg(arg)
25 local outfile = "-"
26 if not (arg and arg[1]) then
27 usage(arg)
28 end
29 if arg[1] == "-o" then
30 outfile = arg[2]
31 if not outfile then usage(arg) end
32 table.remove(arg, 1)
33 table.remove(arg, 1)
34 end
35 return outfile
36end
37
38local function read_files(names)
39 local src = ""
40 for _,name in ipairs(names) do
41 local fp = assert(io.open(name))
42 src = src .. fp:read("*a")
43 fp:close()
44 end
45 return src
46end
47
48local function transform_lua(code)
49 local fixup = {}
50 local n = -30000
51 code = string.gsub(code, "CHECK_(%w*)%((.-)%)", function(tp, var)
52 n = n + 1
53 fixup[n] = { "CHECK", tp }
54 return format("%s=%d", var, n)
55 end)
56 code = string.gsub(code, "PAIRS%((.-)%)", function(var)
57 fixup.PAIRS = true
58 return format("nil, %s, 0", var)
59 end)
60 return "return "..code, fixup
61end
62
63local function read_uleb128(p)
64 local v = p[0]; p = p + 1
65 if v >= 128 then
66 local sh = 7; v = v - 128
67 repeat
68 local r = p[0]
69 v = v + bit.lshift(bit.band(r, 128), sh)
70 sh = sh + 7
71 p = p + 1
72 until r < 128
73 end
74 return p, v
75end
76
77-- ORDER LJ_T
78local name2itype = {
79 str = 5, func = 9, tab = 12, int = 14, num = 15
80}
81
82local BC = {}
83for i=0,#bcnames/6-1 do
84 BC[string.gsub(string.sub(bcnames, i*6+1, i*6+6), " ", "")] = i
85end
86local xop, xra = isbe and 3 or 0, isbe and 2 or 1
87local xrc, xrb = isbe and 1 or 2, isbe and 0 or 3
88
89local function fixup_dump(dump, fixup)
90 local buf = ffi.new("uint8_t[?]", #dump+1, dump)
91 local p = buf+5
92 local n, sizebc
93 p, n = read_uleb128(p)
94 local start = p
95 p = p + 4
96 p = read_uleb128(p)
97 p = read_uleb128(p)
98 p, sizebc = read_uleb128(p)
99 local rawtab = {}
100 for i=0,sizebc-1 do
101 local op = p[xop]
102 if op == BC.KSHORT then
103 local rd = p[xrc] + 256*p[xrb]
104 rd = bit.arshift(bit.lshift(rd, 16), 16)
105 local f = fixup[rd]
106 if f then
107 if f[1] == "CHECK" then
108 local tp = f[2]
109 if tp == "tab" then rawtab[p[xra]] = true end
110 p[xop] = tp == "num" and BC.ISNUM or BC.ISTYPE
111 p[xrb] = 0
112 p[xrc] = name2itype[tp]
113 else
114 error("unhandled fixup type: "..f[1])
115 end
116 end
117 elseif op == BC.TGETV then
118 if rawtab[p[xrb]] then
119 p[xop] = BC.TGETR
120 end
121 elseif op == BC.TSETV then
122 if rawtab[p[xrb]] then
123 p[xop] = BC.TSETR
124 end
125 elseif op == BC.ITERC then
126 if fixup.PAIRS then
127 p[xop] = BC.ITERN
128 end
129 end
130 p = p + 4
131 end
132 return ffi.string(start, n)
133end
134
135local function find_defs(src)
136 local defs = {}
137 for name, code in string.gmatch(src, "LJLIB_LUA%(([^)]*)%)%s*/%*(.-)%*/") do
138 local env = {}
139 local tcode, fixup = transform_lua(code)
140 local func = assert(load(tcode, "", nil, env))()
141 defs[name] = fixup_dump(string.dump(func, true), fixup)
142 defs[#defs+1] = name
143 end
144 return defs
145end
146
147local function gen_header(defs)
148 local t = {}
149 local function w(x) t[#t+1] = x end
150 w("/* This is a generated file. DO NOT EDIT! */\n\n")
151 w("static const int libbc_endian = ") w(isbe and 1 or 0) w(";\n\n")
152 local s = ""
153 for _,name in ipairs(defs) do
154 s = s .. defs[name]
155 end
156 w("static const uint8_t libbc_code[] = {\n")
157 local n = 0
158 for i=1,#s do
159 local x = string.byte(s, i)
160 w(x); w(",")
161 n = n + (x < 10 and 2 or (x < 100 and 3 or 4))
162 if n >= 75 then n = 0; w("\n") end
163 end
164 w("0\n};\n\n")
165 w("static const struct { const char *name; int ofs; } libbc_map[] = {\n")
166 local m = 0
167 for _,name in ipairs(defs) do
168 w('{"'); w(name); w('",'); w(m) w('},\n')
169 m = m + #defs[name]
170 end
171 w("{NULL,"); w(m); w("}\n};\n\n")
172 return table.concat(t)
173end
174
175local function write_file(name, data)
176 if name == "-" then
177 assert(io.write(data))
178 assert(io.flush())
179 else
180 local fp = io.open(name)
181 if fp then
182 local old = fp:read("*a")
183 fp:close()
184 if data == old then return end
185 end
186 fp = assert(io.open(name, "w"))
187 assert(fp:write(data))
188 assert(fp:close())
189 end
190end
191
192local outfile = parse_arg(arg)
193local src = read_files(arg)
194local defs = find_defs(src)
195local hdr = gen_header(defs)
196write_file(outfile, hdr)
197
diff --git a/src/jit/bc.lua b/src/jit/bc.lua
index 5c00ebe3..dd1c1f3b 100644
--- a/src/jit/bc.lua
+++ b/src/jit/bc.lua
@@ -41,7 +41,7 @@
41 41
42-- Cache some library functions and objects. 42-- Cache some library functions and objects.
43local jit = require("jit") 43local jit = require("jit")
44assert(jit.version_num == 20001, "LuaJIT core/library version mismatch") 44assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
45local jutil = require("jit.util") 45local jutil = require("jit.util")
46local vmdef = require("jit.vmdef") 46local vmdef = require("jit.vmdef")
47local bit = require("bit") 47local bit = require("bit")
diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua
index 25bd6042..a54094dd 100644
--- a/src/jit/bcsave.lua
+++ b/src/jit/bcsave.lua
@@ -11,7 +11,7 @@
11------------------------------------------------------------------------------ 11------------------------------------------------------------------------------
12 12
13local jit = require("jit") 13local jit = require("jit")
14assert(jit.version_num == 20001, "LuaJIT core/library version mismatch") 14assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
15local bit = require("bit") 15local bit = require("bit")
16 16
17-- Symbol name prefix for LuaJIT bytecode. 17-- Symbol name prefix for LuaJIT bytecode.
diff --git a/src/jit/dump.lua b/src/jit/dump.lua
index 70a59280..7f930f51 100644
--- a/src/jit/dump.lua
+++ b/src/jit/dump.lua
@@ -54,7 +54,7 @@
54 54
55-- Cache some library functions and objects. 55-- Cache some library functions and objects.
56local jit = require("jit") 56local jit = require("jit")
57assert(jit.version_num == 20001, "LuaJIT core/library version mismatch") 57assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
58local jutil = require("jit.util") 58local jutil = require("jit.util")
59local vmdef = require("jit.vmdef") 59local vmdef = require("jit.vmdef")
60local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc 60local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc
@@ -269,8 +269,7 @@ local litname = {
269 ["CONV "] = setmetatable({}, { __index = function(t, mode) 269 ["CONV "] = setmetatable({}, { __index = function(t, mode)
270 local s = irtype[band(mode, 31)] 270 local s = irtype[band(mode, 31)]
271 s = irtype[band(shr(mode, 5), 31)].."."..s 271 s = irtype[band(shr(mode, 5), 31)].."."..s
272 if band(mode, 0x400) ~= 0 then s = s.." trunc" 272 if band(mode, 0x800) ~= 0 then s = s.." sext" end
273 elseif band(mode, 0x800) ~= 0 then s = s.." sext" end
274 local c = shr(mode, 14) 273 local c = shr(mode, 14)
275 if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end 274 if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end
276 t[mode] = s 275 t[mode] = s
diff --git a/src/jit/v.lua b/src/jit/v.lua
index f4a9b054..88c358b5 100644
--- a/src/jit/v.lua
+++ b/src/jit/v.lua
@@ -59,7 +59,7 @@
59 59
60-- Cache some library functions and objects. 60-- Cache some library functions and objects.
61local jit = require("jit") 61local jit = require("jit")
62assert(jit.version_num == 20001, "LuaJIT core/library version mismatch") 62assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
63local jutil = require("jit.util") 63local jutil = require("jit.util")
64local vmdef = require("jit.vmdef") 64local vmdef = require("jit.vmdef")
65local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo 65local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo
diff --git a/src/lib_jit.c b/src/lib_jit.c
index 82e68258..125b48ce 100644
--- a/src/lib_jit.c
+++ b/src/lib_jit.c
@@ -538,23 +538,17 @@ static uint32_t jit_cpudetect(lua_State *L)
538 uint32_t features[4]; 538 uint32_t features[4];
539 if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { 539 if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
540#if !LJ_HASJIT 540#if !LJ_HASJIT
541#define JIT_F_CMOV 1
542#define JIT_F_SSE2 2 541#define JIT_F_SSE2 2
543#endif 542#endif
544 flags |= ((features[3] >> 15)&1) * JIT_F_CMOV;
545 flags |= ((features[3] >> 26)&1) * JIT_F_SSE2; 543 flags |= ((features[3] >> 26)&1) * JIT_F_SSE2;
546#if LJ_HASJIT 544#if LJ_HASJIT
547 flags |= ((features[2] >> 0)&1) * JIT_F_SSE3; 545 flags |= ((features[2] >> 0)&1) * JIT_F_SSE3;
548 flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; 546 flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
549 if (vendor[2] == 0x6c65746e) { /* Intel. */ 547 if (vendor[2] == 0x6c65746e) { /* Intel. */
550 if ((features[0] & 0x0ff00f00) == 0x00000f00) /* P4. */ 548 if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */
551 flags |= JIT_F_P4; /* Currently unused. */
552 else if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */
553 flags |= JIT_F_LEA_AGU; 549 flags |= JIT_F_LEA_AGU;
554 } else if (vendor[2] == 0x444d4163) { /* AMD. */ 550 } else if (vendor[2] == 0x444d4163) { /* AMD. */
555 uint32_t fam = (features[0] & 0x0ff00f00); 551 uint32_t fam = (features[0] & 0x0ff00f00);
556 if (fam == 0x00000f00) /* K8. */
557 flags |= JIT_F_SPLIT_XMM;
558 if (fam >= 0x00000f00) /* K8, K10. */ 552 if (fam >= 0x00000f00) /* K8, K10. */
559 flags |= JIT_F_PREFER_IMUL; 553 flags |= JIT_F_PREFER_IMUL;
560 } 554 }
@@ -562,14 +556,8 @@ static uint32_t jit_cpudetect(lua_State *L)
562 } 556 }
563 /* Check for required instruction set support on x86 (unnecessary on x64). */ 557 /* Check for required instruction set support on x86 (unnecessary on x64). */
564#if LJ_TARGET_X86 558#if LJ_TARGET_X86
565#if !defined(LUAJIT_CPU_NOCMOV)
566 if (!(flags & JIT_F_CMOV))
567 luaL_error(L, "CPU not supported");
568#endif
569#if defined(LUAJIT_CPU_SSE2)
570 if (!(flags & JIT_F_SSE2)) 559 if (!(flags & JIT_F_SSE2))
571 luaL_error(L, "CPU does not support SSE2 (recompile without -DLUAJIT_CPU_SSE2)"); 560 luaL_error(L, "CPU with SSE2 required");
572#endif
573#endif 561#endif
574#elif LJ_TARGET_ARM 562#elif LJ_TARGET_ARM
575#if LJ_HASJIT 563#if LJ_HASJIT
@@ -631,11 +619,7 @@ static void jit_init(lua_State *L)
631 uint32_t flags = jit_cpudetect(L); 619 uint32_t flags = jit_cpudetect(L);
632#if LJ_HASJIT 620#if LJ_HASJIT
633 jit_State *J = L2J(L); 621 jit_State *J = L2J(L);
634#if LJ_TARGET_X86 622 J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
635 /* Silently turn off the JIT compiler on CPUs without SSE2. */
636 if ((flags & JIT_F_SSE2))
637#endif
638 J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
639 memcpy(J->param, jit_param_default, sizeof(J->param)); 623 memcpy(J->param, jit_param_default, sizeof(J->param));
640 lj_dispatch_update(G(L)); 624 lj_dispatch_update(G(L));
641#else 625#else
@@ -645,6 +629,7 @@ static void jit_init(lua_State *L)
645 629
646LUALIB_API int luaopen_jit(lua_State *L) 630LUALIB_API int luaopen_jit(lua_State *L)
647{ 631{
632 jit_init(L);
648 lua_pushliteral(L, LJ_OS_NAME); 633 lua_pushliteral(L, LJ_OS_NAME);
649 lua_pushliteral(L, LJ_ARCH_NAME); 634 lua_pushliteral(L, LJ_ARCH_NAME);
650 lua_pushinteger(L, LUAJIT_VERSION_NUM); 635 lua_pushinteger(L, LUAJIT_VERSION_NUM);
@@ -657,7 +642,6 @@ LUALIB_API int luaopen_jit(lua_State *L)
657 LJ_LIB_REG(L, "jit.opt", jit_opt); 642 LJ_LIB_REG(L, "jit.opt", jit_opt);
658#endif 643#endif
659 L->top -= 2; 644 L->top -= 2;
660 jit_init(L);
661 return 1; 645 return 1;
662} 646}
663 647
diff --git a/src/lib_math.c b/src/lib_math.c
index b23d9a2d..e474f980 100644
--- a/src/lib_math.c
+++ b/src/lib_math.c
@@ -63,11 +63,8 @@ LJLIB_ASM(math_log) LJLIB_REC(math_log)
63 return FFH_RETRY; 63 return FFH_RETRY;
64} 64}
65 65
66LJLIB_PUSH(57.29577951308232) 66LJLIB_LUA(math_deg) /* function(x) return x * 57.29577951308232 end */
67LJLIB_ASM_(math_deg) LJLIB_REC(math_degrad) 67LJLIB_LUA(math_rad) /* function(x) return x * 0.017453292519943295 end */
68
69LJLIB_PUSH(0.017453292519943295)
70LJLIB_ASM_(math_rad) LJLIB_REC(math_degrad)
71 68
72LJLIB_ASM(math_atan2) LJLIB_REC(.) 69LJLIB_ASM(math_atan2) LJLIB_REC(.)
73{ 70{
diff --git a/src/lib_table.c b/src/lib_table.c
index 8d53a6cd..13aff24e 100644
--- a/src/lib_table.c
+++ b/src/lib_table.c
@@ -23,50 +23,34 @@
23 23
24#define LJLIB_MODULE_table 24#define LJLIB_MODULE_table
25 25
26LJLIB_CF(table_foreachi) 26LJLIB_LUA(table_foreachi) /*
27{ 27 function(t, f)
28 GCtab *t = lj_lib_checktab(L, 1); 28 CHECK_tab(t)
29 GCfunc *func = lj_lib_checkfunc(L, 2); 29 CHECK_func(f)
30 MSize i, n = lj_tab_len(t); 30 for i=1,#t do
31 for (i = 1; i <= n; i++) { 31 local r = f(i, t[i])
32 cTValue *val; 32 if r ~= nil then return r end
33 setfuncV(L, L->top, func); 33 end
34 setintV(L->top+1, i); 34 end
35 val = lj_tab_getint(t, (int32_t)i); 35*/
36 if (val) { copyTV(L, L->top+2, val); } else { setnilV(L->top+2); }
37 L->top += 3;
38 lua_call(L, 2, 1);
39 if (!tvisnil(L->top-1))
40 return 1;
41 L->top--;
42 }
43 return 0;
44}
45 36
46LJLIB_CF(table_foreach) 37LJLIB_LUA(table_foreach) /*
47{ 38 function(t, f)
48 GCtab *t = lj_lib_checktab(L, 1); 39 CHECK_tab(t)
49 GCfunc *func = lj_lib_checkfunc(L, 2); 40 CHECK_func(f)
50 L->top = L->base+3; 41 for k, v in PAIRS(t) do
51 setnilV(L->top-1); 42 local r = f(k, v)
52 while (lj_tab_next(L, t, L->top-1)) { 43 if r ~= nil then return r end
53 copyTV(L, L->top+2, L->top); 44 end
54 copyTV(L, L->top+1, L->top-1); 45 end
55 setfuncV(L, L->top, func); 46*/
56 L->top += 3;
57 lua_call(L, 2, 1);
58 if (!tvisnil(L->top-1))
59 return 1;
60 L->top--;
61 }
62 return 0;
63}
64 47
65LJLIB_ASM(table_getn) LJLIB_REC(.) 48LJLIB_LUA(table_getn) /*
66{ 49 function(t)
67 lj_lib_checktab(L, 1); 50 CHECK_tab(t)
68 return FFH_UNREACHABLE; 51 return #t
69} 52 end
53*/
70 54
71LJLIB_CF(table_maxn) 55LJLIB_CF(table_maxn)
72{ 56{
diff --git a/src/lj_arch.h b/src/lj_arch.h
index 9ea10d0f..c5f2fb3d 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -227,6 +227,7 @@
227 227
228#elif LUAJIT_TARGET == LUAJIT_ARCH_PPCSPE 228#elif LUAJIT_TARGET == LUAJIT_ARCH_PPCSPE
229 229
230#error "The PPC/e500 port is broken and will be abandoned with LuaJIT 2.1"
230#define LJ_ARCH_NAME "ppcspe" 231#define LJ_ARCH_NAME "ppcspe"
231#define LJ_ARCH_BITS 32 232#define LJ_ARCH_BITS 32
232#define LJ_ARCH_ENDIAN LUAJIT_BE 233#define LJ_ARCH_ENDIAN LUAJIT_BE
diff --git a/src/lj_asm.c b/src/lj_asm.c
index c7365404..a01b4e52 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -1730,7 +1730,7 @@ static void asm_setup_regsp(ASMState *as)
1730 break; 1730 break;
1731 case IR_FPMATH: 1731 case IR_FPMATH:
1732#if LJ_TARGET_X86ORX64 1732#if LJ_TARGET_X86ORX64
1733 if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */ 1733 if (ir->op2 == IRFPM_EXP2) { /* May be joined to pow. */
1734 ir->prev = REGSP_HINT(RID_XMM0); 1734 ir->prev = REGSP_HINT(RID_XMM0);
1735#if !LJ_64 1735#if !LJ_64
1736 if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */ 1736 if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */
diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
index cd283b88..dcc74ce9 100644
--- a/src/lj_asm_mips.h
+++ b/src/lj_asm_mips.h
@@ -1000,7 +1000,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
1000 if (irt_isint(t)) { 1000 if (irt_isint(t)) {
1001 Reg tmp = ra_scratch(as, RSET_FPR); 1001 Reg tmp = ra_scratch(as, RSET_FPR);
1002 emit_tg(as, MIPSI_MFC1, dest, tmp); 1002 emit_tg(as, MIPSI_MFC1, dest, tmp);
1003 emit_fg(as, MIPSI_CVT_W_D, tmp, tmp); 1003 emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp);
1004 dest = tmp; 1004 dest = tmp;
1005 t.irt = IRT_NUM; /* Check for original type. */ 1005 t.irt = IRT_NUM; /* Check for original type. */
1006 } else { 1006 } else {
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index e9c53a09..9dba6b70 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -551,7 +551,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
551 if (ra_hasreg(dest)) { 551 if (ra_hasreg(dest)) {
552 ra_free(as, dest); 552 ra_free(as, dest);
553 ra_modified(as, dest); 553 ra_modified(as, dest);
554 emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, 554 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS,
555 dest, RID_ESP, ofs); 555 dest, RID_ESP, ofs);
556 } 556 }
557 if ((ci->flags & CCI_CASTU64)) { 557 if ((ci->flags & CCI_CASTU64)) {
@@ -662,8 +662,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
662 asm_guardcc(as, CC_NE); 662 asm_guardcc(as, CC_NE);
663 emit_rr(as, XO_UCOMISD, left, tmp); 663 emit_rr(as, XO_UCOMISD, left, tmp);
664 emit_rr(as, XO_CVTSI2SD, tmp, dest); 664 emit_rr(as, XO_CVTSI2SD, tmp, dest);
665 if (!(as->flags & JIT_F_SPLIT_XMM)) 665 emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
666 emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
667 emit_rr(as, XO_CVTTSD2SI, dest, left); 666 emit_rr(as, XO_CVTTSD2SI, dest, left);
668 /* Can't fuse since left is needed twice. */ 667 /* Can't fuse since left is needed twice. */
669} 668}
@@ -719,8 +718,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
719 emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS, 718 emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS,
720 dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left); 719 dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left);
721 } 720 }
722 if (!(as->flags & JIT_F_SPLIT_XMM)) 721 emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */
723 emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */
724 } else if (stfp) { /* FP to integer conversion. */ 722 } else if (stfp) { /* FP to integer conversion. */
725 if (irt_isguard(ir->t)) { 723 if (irt_isguard(ir->t)) {
726 /* Checked conversions are only supported from number to int. */ 724 /* Checked conversions are only supported from number to int. */
@@ -728,9 +726,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
728 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); 726 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
729 } else { 727 } else {
730 Reg dest = ra_dest(as, ir, RSET_GPR); 728 Reg dest = ra_dest(as, ir, RSET_GPR);
731 x86Op op = st == IRT_NUM ? 729 x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI;
732 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) :
733 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI);
734 if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) { 730 if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) {
735 /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */ 731 /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */
736 /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */ 732 /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */
@@ -824,8 +820,7 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir)
824 if (ra_hasreg(dest)) { 820 if (ra_hasreg(dest)) {
825 ra_free(as, dest); 821 ra_free(as, dest);
826 ra_modified(as, dest); 822 ra_modified(as, dest);
827 emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, 823 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, dest, RID_ESP, ofs);
828 dest, RID_ESP, ofs);
829 } 824 }
830 emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, 825 emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd,
831 irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); 826 irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs);
@@ -853,7 +848,6 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
853 Reg lo, hi; 848 Reg lo, hi;
854 lua_assert(st == IRT_NUM || st == IRT_FLOAT); 849 lua_assert(st == IRT_NUM || st == IRT_FLOAT);
855 lua_assert(dt == IRT_I64 || dt == IRT_U64); 850 lua_assert(dt == IRT_I64 || dt == IRT_U64);
856 lua_assert(((ir-1)->op2 & IRCONV_TRUNC));
857 hi = ra_dest(as, ir, RSET_GPR); 851 hi = ra_dest(as, ir, RSET_GPR);
858 lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi)); 852 lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi));
859 if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0); 853 if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0);
@@ -1262,7 +1256,7 @@ static void asm_fxload(ASMState *as, IRIns *ir)
1262 case IRT_U8: xo = XO_MOVZXb; break; 1256 case IRT_U8: xo = XO_MOVZXb; break;
1263 case IRT_I16: xo = XO_MOVSXw; break; 1257 case IRT_I16: xo = XO_MOVSXw; break;
1264 case IRT_U16: xo = XO_MOVZXw; break; 1258 case IRT_U16: xo = XO_MOVZXw; break;
1265 case IRT_NUM: xo = XMM_MOVRM(as); break; 1259 case IRT_NUM: xo = XO_MOVSD; break;
1266 case IRT_FLOAT: xo = XO_MOVSS; break; 1260 case IRT_FLOAT: xo = XO_MOVSS; break;
1267 default: 1261 default:
1268 if (LJ_64 && irt_is64(ir->t)) 1262 if (LJ_64 && irt_is64(ir->t))
@@ -1376,7 +1370,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
1376 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; 1370 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
1377 Reg dest = ra_dest(as, ir, allow); 1371 Reg dest = ra_dest(as, ir, allow);
1378 asm_fuseahuref(as, ir->op1, RSET_GPR); 1372 asm_fuseahuref(as, ir->op1, RSET_GPR);
1379 emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM); 1373 emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM);
1380 } else { 1374 } else {
1381 asm_fuseahuref(as, ir->op1, RSET_GPR); 1375 asm_fuseahuref(as, ir->op1, RSET_GPR);
1382 } 1376 }
@@ -1442,7 +1436,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
1442 Reg left = ra_scratch(as, RSET_FPR); 1436 Reg left = ra_scratch(as, RSET_FPR);
1443 asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ 1437 asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */
1444 base = ra_alloc1(as, REF_BASE, RSET_GPR); 1438 base = ra_alloc1(as, REF_BASE, RSET_GPR);
1445 emit_rmro(as, XMM_MOVRM(as), left, base, ofs); 1439 emit_rmro(as, XO_MOVSD, left, base, ofs);
1446 t.irt = IRT_NUM; /* Continue with a regular number type check. */ 1440 t.irt = IRT_NUM; /* Continue with a regular number type check. */
1447#if LJ_64 1441#if LJ_64
1448 } else if (irt_islightud(t)) { 1442 } else if (irt_islightud(t)) {
@@ -1460,11 +1454,9 @@ static void asm_sload(ASMState *as, IRIns *ir)
1460 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); 1454 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
1461 if ((ir->op2 & IRSLOAD_CONVERT)) { 1455 if ((ir->op2 & IRSLOAD_CONVERT)) {
1462 t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ 1456 t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */
1463 emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs); 1457 emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs);
1464 } else if (irt_isnum(t)) {
1465 emit_rmro(as, XMM_MOVRM(as), dest, base, ofs);
1466 } else { 1458 } else {
1467 emit_rmro(as, XO_MOV, dest, base, ofs); 1459 emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs);
1468 } 1460 }
1469 } else { 1461 } else {
1470 if (!(ir->op2 & IRSLOAD_TYPECHECK)) 1462 if (!(ir->op2 & IRSLOAD_TYPECHECK))
@@ -1696,7 +1688,7 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
1696 if (ra_hasreg(dest)) { 1688 if (ra_hasreg(dest)) {
1697 ra_free(as, dest); 1689 ra_free(as, dest);
1698 ra_modified(as, dest); 1690 ra_modified(as, dest);
1699 emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs); 1691 emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs);
1700 } 1692 }
1701 emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); 1693 emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
1702 switch (fpm) { /* st0 = lj_vm_*(st0) */ 1694 switch (fpm) { /* st0 = lj_vm_*(st0) */
diff --git a/src/lj_bc.h b/src/lj_bc.h
index 56e71dd9..ac9cc5e1 100644
--- a/src/lj_bc.h
+++ b/src/lj_bc.h
@@ -89,6 +89,8 @@
89 _(ISFC, dst, ___, var, ___) \ 89 _(ISFC, dst, ___, var, ___) \
90 _(IST, ___, ___, var, ___) \ 90 _(IST, ___, ___, var, ___) \
91 _(ISF, ___, ___, var, ___) \ 91 _(ISF, ___, ___, var, ___) \
92 _(ISTYPE, var, ___, lit, ___) \
93 _(ISNUM, var, ___, lit, ___) \
92 \ 94 \
93 /* Unary ops. */ \ 95 /* Unary ops. */ \
94 _(MOV, dst, ___, var, ___) \ 96 _(MOV, dst, ___, var, ___) \
@@ -143,10 +145,12 @@
143 _(TGETV, dst, var, var, index) \ 145 _(TGETV, dst, var, var, index) \
144 _(TGETS, dst, var, str, index) \ 146 _(TGETS, dst, var, str, index) \
145 _(TGETB, dst, var, lit, index) \ 147 _(TGETB, dst, var, lit, index) \
148 _(TGETR, dst, var, var, index) \
146 _(TSETV, var, var, var, newindex) \ 149 _(TSETV, var, var, var, newindex) \
147 _(TSETS, var, var, str, newindex) \ 150 _(TSETS, var, var, str, newindex) \
148 _(TSETB, var, var, lit, newindex) \ 151 _(TSETB, var, var, lit, newindex) \
149 _(TSETM, base, ___, num, newindex) \ 152 _(TSETM, base, ___, num, newindex) \
153 _(TSETR, var, var, var, newindex) \
150 \ 154 \
151 /* Calls and vararg handling. T = tail call. */ \ 155 /* Calls and vararg handling. T = tail call. */ \
152 _(CALLM, base, lit, lit, call) \ 156 _(CALLM, base, lit, lit, call) \
diff --git a/src/lj_bcdump.h b/src/lj_bcdump.h
index e660156d..22a8b823 100644
--- a/src/lj_bcdump.h
+++ b/src/lj_bcdump.h
@@ -36,7 +36,7 @@
36/* If you perform *any* kind of private modifications to the bytecode itself 36/* If you perform *any* kind of private modifications to the bytecode itself
37** or to the dump format, you *must* set BCDUMP_VERSION to 0x80 or higher. 37** or to the dump format, you *must* set BCDUMP_VERSION to 0x80 or higher.
38*/ 38*/
39#define BCDUMP_VERSION 1 39#define BCDUMP_VERSION 2
40 40
41/* Compatibility flags. */ 41/* Compatibility flags. */
42#define BCDUMP_F_BE 0x01 42#define BCDUMP_F_BE 0x01
@@ -61,6 +61,7 @@ enum {
61 61
62LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, 62LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer,
63 void *data, int strip); 63 void *data, int strip);
64LJ_FUNC GCproto *lj_bcread_proto(LexState *ls);
64LJ_FUNC GCproto *lj_bcread(LexState *ls); 65LJ_FUNC GCproto *lj_bcread(LexState *ls);
65 66
66#endif 67#endif
diff --git a/src/lj_bcread.c b/src/lj_bcread.c
index 2b5ba855..7a8c08f5 100644
--- a/src/lj_bcread.c
+++ b/src/lj_bcread.c
@@ -326,25 +326,13 @@ static void bcread_uv(LexState *ls, GCproto *pt, MSize sizeuv)
326} 326}
327 327
328/* Read a prototype. */ 328/* Read a prototype. */
329static GCproto *bcread_proto(LexState *ls) 329GCproto *lj_bcread_proto(LexState *ls)
330{ 330{
331 GCproto *pt; 331 GCproto *pt;
332 MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept; 332 MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept;
333 MSize ofsk, ofsuv, ofsdbg; 333 MSize ofsk, ofsuv, ofsdbg;
334 MSize sizedbg = 0; 334 MSize sizedbg = 0;
335 BCLine firstline = 0, numline = 0; 335 BCLine firstline = 0, numline = 0;
336 MSize len, startn;
337
338 /* Read length. */
339 if (ls->n > 0 && ls->p[0] == 0) { /* Shortcut EOF. */
340 ls->n--; ls->p++;
341 return NULL;
342 }
343 bcread_want(ls, 5);
344 len = bcread_uleb128(ls);
345 if (!len) return NULL; /* EOF */
346 bcread_need(ls, len);
347 startn = ls->n;
348 336
349 /* Read prototype header. */ 337 /* Read prototype header. */
350 flags = bcread_byte(ls); 338 flags = bcread_byte(ls);
@@ -413,9 +401,6 @@ static GCproto *bcread_proto(LexState *ls)
413 setmref(pt->uvinfo, NULL); 401 setmref(pt->uvinfo, NULL);
414 setmref(pt->varinfo, NULL); 402 setmref(pt->varinfo, NULL);
415 } 403 }
416
417 if (len != startn - ls->n)
418 bcread_error(ls, LJ_ERR_BCBAD);
419 return pt; 404 return pt;
420} 405}
421 406
@@ -462,8 +447,21 @@ GCproto *lj_bcread(LexState *ls)
462 if (!bcread_header(ls)) 447 if (!bcread_header(ls))
463 bcread_error(ls, LJ_ERR_BCFMT); 448 bcread_error(ls, LJ_ERR_BCFMT);
464 for (;;) { /* Process all prototypes in the bytecode dump. */ 449 for (;;) { /* Process all prototypes in the bytecode dump. */
465 GCproto *pt = bcread_proto(ls); 450 GCproto *pt;
466 if (!pt) break; 451 MSize len, startn;
452 /* Read length. */
453 if (ls->n > 0 && ls->p[0] == 0) { /* Shortcut EOF. */
454 ls->n--; ls->p++;
455 break;
456 }
457 bcread_want(ls, 5);
458 len = bcread_uleb128(ls);
459 if (!len) break; /* EOF */
460 bcread_need(ls, len);
461 startn = ls->n;
462 pt = lj_bcread_proto(ls);
463 if (len != startn - ls->n)
464 bcread_error(ls, LJ_ERR_BCBAD);
467 setprotoV(L, L->top, pt); 465 setprotoV(L, L->top, pt);
468 incr_top(L); 466 incr_top(L);
469 } 467 }
diff --git a/src/lj_crecord.c b/src/lj_crecord.c
index b60eb7b3..a5d896eb 100644
--- a/src/lj_crecord.c
+++ b/src/lj_crecord.c
@@ -446,7 +446,7 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
446 /* fallthrough */ 446 /* fallthrough */
447 case CCX(I, F): 447 case CCX(I, F):
448 if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi; 448 if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi;
449 sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_TRUNC|IRCONV_ANY); 449 sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_ANY);
450 goto xstore; 450 goto xstore;
451 case CCX(I, P): 451 case CCX(I, P):
452 case CCX(I, A): 452 case CCX(I, A):
@@ -522,7 +522,7 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
522 if (st == IRT_CDATA) goto err_nyi; 522 if (st == IRT_CDATA) goto err_nyi;
523 /* The signed conversion is cheaper. x64 really has 47 bit pointers. */ 523 /* The signed conversion is cheaper. x64 really has 47 bit pointers. */
524 sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32, 524 sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32,
525 st, IRCONV_TRUNC|IRCONV_ANY); 525 st, IRCONV_ANY);
526 goto xstore; 526 goto xstore;
527 527
528 /* Destination is an array. */ 528 /* Destination is an array. */
@@ -1229,7 +1229,7 @@ static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm)
1229 for (i = 0; i < 2; i++) { 1229 for (i = 0; i < 2; i++) {
1230 IRType st = tref_type(sp[i]); 1230 IRType st = tref_type(sp[i]);
1231 if (st == IRT_NUM || st == IRT_FLOAT) 1231 if (st == IRT_NUM || st == IRT_FLOAT)
1232 sp[i] = emitconv(sp[i], dt, st, IRCONV_TRUNC|IRCONV_ANY); 1232 sp[i] = emitconv(sp[i], dt, st, IRCONV_ANY);
1233 else if (!(st == IRT_I64 || st == IRT_U64)) 1233 else if (!(st == IRT_I64 || st == IRT_U64))
1234 sp[i] = emitconv(sp[i], dt, IRT_INT, 1234 sp[i] = emitconv(sp[i], dt, IRT_INT,
1235 (s[i]->info & CTF_UNSIGNED) ? 0 : IRCONV_SEXT); 1235 (s[i]->info & CTF_UNSIGNED) ? 0 : IRCONV_SEXT);
@@ -1297,15 +1297,14 @@ static TRef crec_arith_ptr(jit_State *J, TRef *sp, CType **s, MMS mm)
1297 CTypeID id; 1297 CTypeID id;
1298#if LJ_64 1298#if LJ_64
1299 if (t == IRT_NUM || t == IRT_FLOAT) 1299 if (t == IRT_NUM || t == IRT_FLOAT)
1300 tr = emitconv(tr, IRT_INTP, t, IRCONV_TRUNC|IRCONV_ANY); 1300 tr = emitconv(tr, IRT_INTP, t, IRCONV_ANY);
1301 else if (!(t == IRT_I64 || t == IRT_U64)) 1301 else if (!(t == IRT_I64 || t == IRT_U64))
1302 tr = emitconv(tr, IRT_INTP, IRT_INT, 1302 tr = emitconv(tr, IRT_INTP, IRT_INT,
1303 ((t - IRT_I8) & 1) ? 0 : IRCONV_SEXT); 1303 ((t - IRT_I8) & 1) ? 0 : IRCONV_SEXT);
1304#else 1304#else
1305 if (!tref_typerange(sp[1], IRT_I8, IRT_U32)) { 1305 if (!tref_typerange(sp[1], IRT_I8, IRT_U32)) {
1306 tr = emitconv(tr, IRT_INTP, t, 1306 tr = emitconv(tr, IRT_INTP, t,
1307 (t == IRT_NUM || t == IRT_FLOAT) ? 1307 (t == IRT_NUM || t == IRT_FLOAT) ? IRCONV_ANY : 0);
1308 IRCONV_TRUNC|IRCONV_ANY : 0);
1309 } 1308 }
1310#endif 1309#endif
1311 tr = emitir(IRT(IR_MUL, IRT_INTP), tr, lj_ir_kintp(J, sz)); 1310 tr = emitir(IRT(IR_MUL, IRT_INTP), tr, lj_ir_kintp(J, sz));
diff --git a/src/lj_debug.c b/src/lj_debug.c
index be7fb2b1..ec56b7d2 100644
--- a/src/lj_debug.c
+++ b/src/lj_debug.c
@@ -321,7 +321,7 @@ const char *lj_debug_funcname(lua_State *L, TValue *frame, const char **name)
321/* -- Source code locations ----------------------------------------------- */ 321/* -- Source code locations ----------------------------------------------- */
322 322
323/* Generate shortened source name. */ 323/* Generate shortened source name. */
324void lj_debug_shortname(char *out, GCstr *str) 324void lj_debug_shortname(char *out, GCstr *str, BCLine line)
325{ 325{
326 const char *src = strdata(str); 326 const char *src = strdata(str);
327 if (*src == '=') { 327 if (*src == '=') {
@@ -335,11 +335,11 @@ void lj_debug_shortname(char *out, GCstr *str)
335 *out++ = '.'; *out++ = '.'; *out++ = '.'; 335 *out++ = '.'; *out++ = '.'; *out++ = '.';
336 } 336 }
337 strcpy(out, src); 337 strcpy(out, src);
338 } else { /* Output [string "string"]. */ 338 } else { /* Output [string "string"] or [builtin:name]. */
339 size_t len; /* Length, up to first control char. */ 339 size_t len; /* Length, up to first control char. */
340 for (len = 0; len < LUA_IDSIZE-12; len++) 340 for (len = 0; len < LUA_IDSIZE-12; len++)
341 if (((const unsigned char *)src)[len] < ' ') break; 341 if (((const unsigned char *)src)[len] < ' ') break;
342 strcpy(out, "[string \""); out += 9; 342 strcpy(out, line == ~(BCLine)0 ? "[builtin:" : "[string \""); out += 9;
343 if (src[len] != '\0') { /* Must truncate? */ 343 if (src[len] != '\0') { /* Must truncate? */
344 if (len > LUA_IDSIZE-15) len = LUA_IDSIZE-15; 344 if (len > LUA_IDSIZE-15) len = LUA_IDSIZE-15;
345 strncpy(out, src, len); out += len; 345 strncpy(out, src, len); out += len;
@@ -347,7 +347,7 @@ void lj_debug_shortname(char *out, GCstr *str)
347 } else { 347 } else {
348 strcpy(out, src); out += len; 348 strcpy(out, src); out += len;
349 } 349 }
350 strcpy(out, "\"]"); 350 strcpy(out, line == ~(BCLine)0 ? "]" : "\"]");
351 } 351 }
352} 352}
353 353
@@ -360,8 +360,9 @@ void lj_debug_addloc(lua_State *L, const char *msg,
360 if (isluafunc(fn)) { 360 if (isluafunc(fn)) {
361 BCLine line = debug_frameline(L, fn, nextframe); 361 BCLine line = debug_frameline(L, fn, nextframe);
362 if (line >= 0) { 362 if (line >= 0) {
363 GCproto *pt = funcproto(fn);
363 char buf[LUA_IDSIZE]; 364 char buf[LUA_IDSIZE];
364 lj_debug_shortname(buf, proto_chunkname(funcproto(fn))); 365 lj_debug_shortname(buf, proto_chunkname(pt), pt->firstline);
365 lj_str_pushf(L, "%s:%d: %s", buf, line, msg); 366 lj_str_pushf(L, "%s:%d: %s", buf, line, msg);
366 return; 367 return;
367 } 368 }
@@ -377,7 +378,9 @@ void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc)
377 const char *s = strdata(name); 378 const char *s = strdata(name);
378 MSize i, len = name->len; 379 MSize i, len = name->len;
379 BCLine line = lj_debug_line(pt, pc); 380 BCLine line = lj_debug_line(pt, pc);
380 if (*s == '@') { 381 if (pt->firstline == ~(BCLine)0) {
382 lj_str_pushf(L, "builtin:%s", s);
383 } else if (*s == '@') {
381 s++; len--; 384 s++; len--;
382 for (i = len; i > 0; i--) 385 for (i = len; i > 0; i--)
383 if (s[i] == '/' || s[i] == '\\') { 386 if (s[i] == '/' || s[i] == '\\') {
@@ -453,7 +456,7 @@ int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, int ext)
453 BCLine firstline = pt->firstline; 456 BCLine firstline = pt->firstline;
454 GCstr *name = proto_chunkname(pt); 457 GCstr *name = proto_chunkname(pt);
455 ar->source = strdata(name); 458 ar->source = strdata(name);
456 lj_debug_shortname(ar->short_src, name); 459 lj_debug_shortname(ar->short_src, name, pt->firstline);
457 ar->linedefined = (int)firstline; 460 ar->linedefined = (int)firstline;
458 ar->lastlinedefined = (int)(firstline + pt->numline); 461 ar->lastlinedefined = (int)(firstline + pt->numline);
459 ar->what = firstline ? "Lua" : "main"; 462 ar->what = firstline ? "Lua" : "main";
diff --git a/src/lj_debug.h b/src/lj_debug.h
index 7cf57de7..4144b47e 100644
--- a/src/lj_debug.h
+++ b/src/lj_debug.h
@@ -34,7 +34,7 @@ LJ_FUNC const char *lj_debug_slotname(GCproto *pt, const BCIns *pc,
34 BCReg slot, const char **name); 34 BCReg slot, const char **name);
35LJ_FUNC const char *lj_debug_funcname(lua_State *L, TValue *frame, 35LJ_FUNC const char *lj_debug_funcname(lua_State *L, TValue *frame,
36 const char **name); 36 const char **name);
37LJ_FUNC void lj_debug_shortname(char *out, GCstr *str); 37LJ_FUNC void lj_debug_shortname(char *out, GCstr *str, BCLine line);
38LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg, 38LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg,
39 cTValue *frame, cTValue *nextframe); 39 cTValue *frame, cTValue *nextframe);
40LJ_FUNC void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc); 40LJ_FUNC void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc);
diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h
index a56b6260..a03804af 100644
--- a/src/lj_dispatch.h
+++ b/src/lj_dispatch.h
@@ -33,11 +33,11 @@
33 _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \ 33 _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \
34 _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \ 34 _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \
35 _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \ 35 _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \
36 _(lj_meta_for) _(lj_meta_len) _(lj_meta_tget) _(lj_meta_tset) \ 36 _(lj_meta_for) _(lj_meta_istype) _(lj_meta_len) _(lj_meta_tget) \
37 _(lj_state_growstack) _(lj_str_fromnum) _(lj_str_fromnumber) _(lj_str_new) \ 37 _(lj_meta_tset) _(lj_state_growstack) _(lj_str_fromnum) _(lj_str_fromnumber) \
38 _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) _(lj_tab_new) \ 38 _(lj_str_new) _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) \
39 _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \ 39 _(lj_tab_new) _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \
40 JITGOTDEF(_) FFIGOTDEF(_) 40 _(lj_tab_setinth) JITGOTDEF(_) FFIGOTDEF(_)
41 41
42enum { 42enum {
43#define GOTENUM(name) LJ_GOT_##name, 43#define GOTENUM(name) LJ_GOT_##name,
@@ -60,7 +60,7 @@ typedef uint16_t HotCount;
60#define HOTCOUNT_CALL 1 60#define HOTCOUNT_CALL 1
61 61
62/* This solves a circular dependency problem -- bump as needed. Sigh. */ 62/* This solves a circular dependency problem -- bump as needed. Sigh. */
63#define GG_NUM_ASMFF 62 63#define GG_NUM_ASMFF 59
64 64
65#define GG_LEN_DDISP (BC__MAX + GG_NUM_ASMFF) 65#define GG_LEN_DDISP (BC__MAX + GG_NUM_ASMFF)
66#define GG_LEN_SDISP BC_FUNCF 66#define GG_LEN_SDISP BC_FUNCF
diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h
index bd184a30..2454c899 100644
--- a/src/lj_emit_x86.h
+++ b/src/lj_emit_x86.h
@@ -241,10 +241,6 @@ static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i)
241 241
242/* -- Emit loads/stores --------------------------------------------------- */ 242/* -- Emit loads/stores --------------------------------------------------- */
243 243
244/* Instruction selection for XMM moves. */
245#define XMM_MOVRR(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVSD : XO_MOVAPS)
246#define XMM_MOVRM(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVLPD : XO_MOVSD)
247
248/* mov [base+ofs], i */ 244/* mov [base+ofs], i */
249static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) 245static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
250{ 246{
@@ -314,7 +310,7 @@ static void emit_loadn(ASMState *as, Reg r, cTValue *tv)
314 if (tvispzero(tv)) /* Use xor only for +0. */ 310 if (tvispzero(tv)) /* Use xor only for +0. */
315 emit_rr(as, XO_XORPS, r, r); 311 emit_rr(as, XO_XORPS, r, r);
316 else 312 else
317 emit_rma(as, XMM_MOVRM(as), r, &tv->n); 313 emit_rma(as, XO_MOVSD, r, &tv->n);
318} 314}
319 315
320/* -- Emit control-flow instructions -------------------------------------- */ 316/* -- Emit control-flow instructions -------------------------------------- */
@@ -427,7 +423,7 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
427 if (dst < RID_MAX_GPR) 423 if (dst < RID_MAX_GPR)
428 emit_rr(as, XO_MOV, REX_64IR(ir, dst), src); 424 emit_rr(as, XO_MOV, REX_64IR(ir, dst), src);
429 else 425 else
430 emit_rr(as, XMM_MOVRR(as), dst, src); 426 emit_rr(as, XO_MOVAPS, dst, src);
431} 427}
432 428
433/* Generic load of register from stack slot. */ 429/* Generic load of register from stack slot. */
@@ -436,7 +432,7 @@ static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
436 if (r < RID_MAX_GPR) 432 if (r < RID_MAX_GPR)
437 emit_rmro(as, XO_MOV, REX_64IR(ir, r), RID_ESP, ofs); 433 emit_rmro(as, XO_MOV, REX_64IR(ir, r), RID_ESP, ofs);
438 else 434 else
439 emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, r, RID_ESP, ofs); 435 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, r, RID_ESP, ofs);
440} 436}
441 437
442/* Generic store of register to stack slot. */ 438/* Generic store of register to stack slot. */
diff --git a/src/lj_err.c b/src/lj_err.c
index 4a33a233..e0fb7167 100644
--- a/src/lj_err.c
+++ b/src/lj_err.c
@@ -587,7 +587,7 @@ LJ_NOINLINE void lj_err_lex(lua_State *L, GCstr *src, const char *tok,
587{ 587{
588 char buff[LUA_IDSIZE]; 588 char buff[LUA_IDSIZE];
589 const char *msg; 589 const char *msg;
590 lj_debug_shortname(buff, src); 590 lj_debug_shortname(buff, src, line);
591 msg = lj_str_pushvf(L, err2msg(em), argp); 591 msg = lj_str_pushvf(L, err2msg(em), argp);
592 msg = lj_str_pushf(L, "%s:%d: %s", buff, line, msg); 592 msg = lj_str_pushf(L, "%s:%d: %s", buff, line, msg);
593 if (tok) 593 if (tok)
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
index 4aa4f064..51981477 100644
--- a/src/lj_ffrecord.c
+++ b/src/lj_ffrecord.c
@@ -528,14 +528,6 @@ static void LJ_FASTCALL recff_math_modf(jit_State *J, RecordFFData *rd)
528 rd->nres = 2; 528 rd->nres = 2;
529} 529}
530 530
531static void LJ_FASTCALL recff_math_degrad(jit_State *J, RecordFFData *rd)
532{
533 TRef tr = lj_ir_tonum(J, J->base[0]);
534 TRef trm = lj_ir_knum(J, numV(&J->fn->c.upvalue[0]));
535 J->base[0] = emitir(IRTN(IR_MUL), tr, trm);
536 UNUSED(rd);
537}
538
539static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd) 531static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd)
540{ 532{
541 TRef tr = lj_ir_tonum(J, J->base[0]); 533 TRef tr = lj_ir_tonum(J, J->base[0]);
@@ -737,14 +729,6 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
737 729
738/* -- Table library fast functions ---------------------------------------- */ 730/* -- Table library fast functions ---------------------------------------- */
739 731
740static void LJ_FASTCALL recff_table_getn(jit_State *J, RecordFFData *rd)
741{
742 if (tref_istab(J->base[0]))
743 J->base[0] = lj_ir_call(J, IRCALL_lj_tab_len, J->base[0]);
744 /* else: Interpreter will throw. */
745 UNUSED(rd);
746}
747
748static void LJ_FASTCALL recff_table_remove(jit_State *J, RecordFFData *rd) 732static void LJ_FASTCALL recff_table_remove(jit_State *J, RecordFFData *rd)
749{ 733{
750 TRef tab = J->base[0]; 734 TRef tab = J->base[0];
diff --git a/src/lj_ir.h b/src/lj_ir.h
index a9824325..9d2521c9 100644
--- a/src/lj_ir.h
+++ b/src/lj_ir.h
@@ -227,7 +227,6 @@ IRFLDEF(FLENUM)
227#define IRCONV_DSH 5 227#define IRCONV_DSH 5
228#define IRCONV_NUM_INT ((IRT_NUM<<IRCONV_DSH)|IRT_INT) 228#define IRCONV_NUM_INT ((IRT_NUM<<IRCONV_DSH)|IRT_INT)
229#define IRCONV_INT_NUM ((IRT_INT<<IRCONV_DSH)|IRT_NUM) 229#define IRCONV_INT_NUM ((IRT_INT<<IRCONV_DSH)|IRT_NUM)
230#define IRCONV_TRUNC 0x0400 /* Truncate number to integer. */
231#define IRCONV_SEXT 0x0800 /* Sign-extend integer to integer. */ 230#define IRCONV_SEXT 0x0800 /* Sign-extend integer to integer. */
232#define IRCONV_MODEMASK 0x0fff 231#define IRCONV_MODEMASK 0x0fff
233#define IRCONV_CONVMASK 0xf000 232#define IRCONV_CONVMASK 0xf000
diff --git a/src/lj_jit.h b/src/lj_jit.h
index c0b1c41e..2683b462 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -14,18 +14,15 @@
14 14
15/* CPU-specific JIT engine flags. */ 15/* CPU-specific JIT engine flags. */
16#if LJ_TARGET_X86ORX64 16#if LJ_TARGET_X86ORX64
17#define JIT_F_CMOV 0x00000010 17#define JIT_F_SSE2 0x00000010
18#define JIT_F_SSE2 0x00000020 18#define JIT_F_SSE3 0x00000020
19#define JIT_F_SSE3 0x00000040 19#define JIT_F_SSE4_1 0x00000040
20#define JIT_F_SSE4_1 0x00000080 20#define JIT_F_PREFER_IMUL 0x00000080
21#define JIT_F_P4 0x00000100 21#define JIT_F_LEA_AGU 0x00000100
22#define JIT_F_PREFER_IMUL 0x00000200
23#define JIT_F_SPLIT_XMM 0x00000400
24#define JIT_F_LEA_AGU 0x00000800
25 22
26/* Names for the CPU-specific flags. Must match the order above. */ 23/* Names for the CPU-specific flags. Must match the order above. */
27#define JIT_F_CPU_FIRST JIT_F_CMOV 24#define JIT_F_CPU_FIRST JIT_F_SSE2
28#define JIT_F_CPUSTRING "\4CMOV\4SSE2\4SSE3\6SSE4.1\2P4\3AMD\2K8\4ATOM" 25#define JIT_F_CPUSTRING "\4SSE2\4SSE3\6SSE4.1\3AMD\4ATOM"
29#elif LJ_TARGET_ARM 26#elif LJ_TARGET_ARM
30#define JIT_F_ARMV6_ 0x00000010 27#define JIT_F_ARMV6_ 0x00000010
31#define JIT_F_ARMV6T2_ 0x00000020 28#define JIT_F_ARMV6T2_ 0x00000020
diff --git a/src/lj_lib.c b/src/lj_lib.c
index 331eaa6a..be3ee004 100644
--- a/src/lj_lib.c
+++ b/src/lj_lib.c
@@ -18,6 +18,8 @@
18#include "lj_dispatch.h" 18#include "lj_dispatch.h"
19#include "lj_vm.h" 19#include "lj_vm.h"
20#include "lj_strscan.h" 20#include "lj_strscan.h"
21#include "lj_lex.h"
22#include "lj_bcdump.h"
21#include "lj_lib.h" 23#include "lj_lib.h"
22 24
23/* -- Library initialization ---------------------------------------------- */ 25/* -- Library initialization ---------------------------------------------- */
@@ -43,6 +45,28 @@ static GCtab *lib_create_table(lua_State *L, const char *libname, int hsize)
43 return tabV(L->top-1); 45 return tabV(L->top-1);
44} 46}
45 47
48static const uint8_t *lib_read_lfunc(lua_State *L, const uint8_t *p, GCtab *tab)
49{
50 int len = *p++;
51 GCstr *name = lj_str_new(L, (const char *)p, len);
52 LexState ls;
53 GCproto *pt;
54 GCfunc *fn;
55 memset(&ls, 0, sizeof(ls));
56 ls.L = L;
57 ls.p = (const char *)(p+len);
58 ls.n = ~(MSize)0;
59 ls.current = -1;
60 ls.level = (BCDUMP_F_STRIP|(LJ_BE*BCDUMP_F_BE));
61 ls.chunkname = name;
62 pt = lj_bcread_proto(&ls);
63 pt->firstline = ~(BCLine)0;
64 fn = lj_func_newL_empty(L, pt, tabref(L->env));
65 /* NOBARRIER: See below for common barrier. */
66 setfuncV(L, lj_tab_setstr(L, tab, name), fn);
67 return (const uint8_t *)ls.p;
68}
69
46void lj_lib_register(lua_State *L, const char *libname, 70void lj_lib_register(lua_State *L, const char *libname,
47 const uint8_t *p, const lua_CFunction *cf) 71 const uint8_t *p, const lua_CFunction *cf)
48{ 72{
@@ -87,6 +111,9 @@ void lj_lib_register(lua_State *L, const char *libname,
87 ofn = fn; 111 ofn = fn;
88 } else { 112 } else {
89 switch (tag | len) { 113 switch (tag | len) {
114 case LIBINIT_LUA:
115 p = lib_read_lfunc(L, p, tab);
116 break;
90 case LIBINIT_SET: 117 case LIBINIT_SET:
91 L->top -= 2; 118 L->top -= 2;
92 if (tvisstr(L->top+1) && strV(L->top+1)->len == 0) 119 if (tvisstr(L->top+1) && strV(L->top+1)->len == 0)
diff --git a/src/lj_lib.h b/src/lj_lib.h
index 2fe6d2a8..05f90de5 100644
--- a/src/lj_lib.h
+++ b/src/lj_lib.h
@@ -77,6 +77,7 @@ static LJ_AINLINE void lj_lib_pushcc(lua_State *L, lua_CFunction f,
77#define LJLIB_CF(name) static int lj_cf_##name(lua_State *L) 77#define LJLIB_CF(name) static int lj_cf_##name(lua_State *L)
78#define LJLIB_ASM(name) static int lj_ffh_##name(lua_State *L) 78#define LJLIB_ASM(name) static int lj_ffh_##name(lua_State *L)
79#define LJLIB_ASM_(name) 79#define LJLIB_ASM_(name)
80#define LJLIB_LUA(name)
80#define LJLIB_SET(name) 81#define LJLIB_SET(name)
81#define LJLIB_PUSH(arg) 82#define LJLIB_PUSH(arg)
82#define LJLIB_REC(handler) 83#define LJLIB_REC(handler)
@@ -96,7 +97,8 @@ LJ_FUNC void lj_lib_register(lua_State *L, const char *libname,
96#define LIBINIT_ASM 0x40 97#define LIBINIT_ASM 0x40
97#define LIBINIT_ASM_ 0x80 98#define LIBINIT_ASM_ 0x80
98#define LIBINIT_STRING 0xc0 99#define LIBINIT_STRING 0xc0
99#define LIBINIT_MAXSTR 0x39 100#define LIBINIT_MAXSTR 0x38
101#define LIBINIT_LUA 0xf9
100#define LIBINIT_SET 0xfa 102#define LIBINIT_SET 0xfa
101#define LIBINIT_NUMBER 0xfb 103#define LIBINIT_NUMBER 0xfb
102#define LIBINIT_COPY 0xfc 104#define LIBINIT_COPY 0xfc
diff --git a/src/lj_meta.c b/src/lj_meta.c
index 441d571a..e11f1b75 100644
--- a/src/lj_meta.c
+++ b/src/lj_meta.c
@@ -19,6 +19,7 @@
19#include "lj_bc.h" 19#include "lj_bc.h"
20#include "lj_vm.h" 20#include "lj_vm.h"
21#include "lj_strscan.h" 21#include "lj_strscan.h"
22#include "lj_lib.h"
22 23
23/* -- Metamethod handling ------------------------------------------------- */ 24/* -- Metamethod handling ------------------------------------------------- */
24 25
@@ -423,6 +424,18 @@ TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op)
423 } 424 }
424} 425}
425 426
427/* Helper for ISTYPE and ISNUM. Implicit coercion or error. */
428void lj_meta_istype(lua_State *L, BCReg ra, BCReg tp)
429{
430 L->top = curr_topL(L);
431 ra++; tp--;
432 lua_assert(LJ_DUALNUM || tp != ~LJ_TNUMX); /* ISTYPE -> ISNUM broken. */
433 if (LJ_DUALNUM && tp == ~LJ_TNUMX) lj_lib_checkint(L, ra);
434 else if (tp == ~LJ_TNUMX+1) lj_lib_checknum(L, ra);
435 else if (tp == ~LJ_TSTR) lj_lib_checkstr(L, ra);
436 else lj_err_argtype(L, ra, lj_obj_itypename[tp]);
437}
438
426/* Helper for calls. __call metamethod. */ 439/* Helper for calls. __call metamethod. */
427void lj_meta_call(lua_State *L, TValue *func, TValue *top) 440void lj_meta_call(lua_State *L, TValue *func, TValue *top)
428{ 441{
diff --git a/src/lj_meta.h b/src/lj_meta.h
index 6af5e514..970398ec 100644
--- a/src/lj_meta.h
+++ b/src/lj_meta.h
@@ -31,6 +31,7 @@ LJ_FUNCA TValue * LJ_FASTCALL lj_meta_len(lua_State *L, cTValue *o);
31LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne); 31LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne);
32LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins); 32LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins);
33LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op); 33LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op);
34LJ_FUNCA void lj_meta_istype(lua_State *L, BCReg ra, BCReg tp);
34LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top); 35LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top);
35LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o); 36LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o);
36 37
diff --git a/src/lj_obj.h b/src/lj_obj.h
index b967819d..6f367ea2 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -810,11 +810,7 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
810#endif 810#endif
811} 811}
812 812
813#if LJ_TARGET_X86 && !defined(__SSE2__)
814#define lj_num2int(n) lj_num2bit((n))
815#else
816#define lj_num2int(n) ((int32_t)(n)) 813#define lj_num2int(n) ((int32_t)(n))
817#endif
818 814
819static LJ_AINLINE uint64_t lj_num2u64(lua_Number n) 815static LJ_AINLINE uint64_t lj_num2u64(lua_Number n)
820{ 816{
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index be50bf97..e67f3ee6 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -647,27 +647,22 @@ LJFOLD(CONV KNUM IRCONV_INT_NUM)
647LJFOLDF(kfold_conv_knum_int_num) 647LJFOLDF(kfold_conv_knum_int_num)
648{ 648{
649 lua_Number n = knumleft; 649 lua_Number n = knumleft;
650 if (!(fins->op2 & IRCONV_TRUNC)) { 650 int32_t k = lj_num2int(n);
651 int32_t k = lj_num2int(n); 651 if (irt_isguard(fins->t) && n != (lua_Number)k) {
652 if (irt_isguard(fins->t) && n != (lua_Number)k) { 652 /* We're about to create a guard which always fails, like CONV +1.5.
653 /* We're about to create a guard which always fails, like CONV +1.5. 653 ** Some pathological loops cause this during LICM, e.g.:
654 ** Some pathological loops cause this during LICM, e.g.: 654 ** local x,k,t = 0,1.5,{1,[1.5]=2}
655 ** local x,k,t = 0,1.5,{1,[1.5]=2} 655 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
656 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end 656 ** assert(x == 300)
657 ** assert(x == 300) 657 */
658 */ 658 return FAILFOLD;
659 return FAILFOLD;
660 }
661 return INTFOLD(k);
662 } else {
663 return INTFOLD((int32_t)n);
664 } 659 }
660 return INTFOLD(k);
665} 661}
666 662
667LJFOLD(CONV KNUM IRCONV_U32_NUM) 663LJFOLD(CONV KNUM IRCONV_U32_NUM)
668LJFOLDF(kfold_conv_knum_u32_num) 664LJFOLDF(kfold_conv_knum_u32_num)
669{ 665{
670 lua_assert((fins->op2 & IRCONV_TRUNC));
671#ifdef _MSC_VER 666#ifdef _MSC_VER
672 { /* Workaround for MSVC bug. */ 667 { /* Workaround for MSVC bug. */
673 volatile uint32_t u = (uint32_t)knumleft; 668 volatile uint32_t u = (uint32_t)knumleft;
@@ -681,14 +676,12 @@ LJFOLDF(kfold_conv_knum_u32_num)
681LJFOLD(CONV KNUM IRCONV_I64_NUM) 676LJFOLD(CONV KNUM IRCONV_I64_NUM)
682LJFOLDF(kfold_conv_knum_i64_num) 677LJFOLDF(kfold_conv_knum_i64_num)
683{ 678{
684 lua_assert((fins->op2 & IRCONV_TRUNC));
685 return INT64FOLD((uint64_t)(int64_t)knumleft); 679 return INT64FOLD((uint64_t)(int64_t)knumleft);
686} 680}
687 681
688LJFOLD(CONV KNUM IRCONV_U64_NUM) 682LJFOLD(CONV KNUM IRCONV_U64_NUM)
689LJFOLDF(kfold_conv_knum_u64_num) 683LJFOLDF(kfold_conv_knum_u64_num)
690{ 684{
691 lua_assert((fins->op2 & IRCONV_TRUNC));
692 return INT64FOLD(lj_num2u64(knumleft)); 685 return INT64FOLD(lj_num2u64(knumleft));
693} 686}
694 687
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c
index caf2a8df..5d0ea9cb 100644
--- a/src/lj_opt_narrow.c
+++ b/src/lj_opt_narrow.c
@@ -496,8 +496,7 @@ TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr)
496{ 496{
497 lua_assert(tref_isnumber(tr)); 497 lua_assert(tref_isnumber(tr));
498 if (tref_isnum(tr)) 498 if (tref_isnum(tr))
499 return emitir(IRT(IR_CONV, IRT_INTP), tr, 499 return emitir(IRT(IR_CONV, IRT_INTP), tr, (IRT_INTP<<5)|IRT_NUM|IRCONV_ANY);
500 (IRT_INTP<<5)|IRT_NUM|IRCONV_TRUNC|IRCONV_ANY);
501 /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */ 500 /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */
502 return narrow_stripov(J, tr, IR_MULOV, 501 return narrow_stripov(J, tr, IR_MULOV,
503 LJ_64 ? ((IRT_INTP<<5)|IRT_INT|IRCONV_SEXT) : 502 LJ_64 ? ((IRT_INTP<<5)|IRT_INT|IRCONV_SEXT) :
diff --git a/src/lj_record.c b/src/lj_record.c
index 7336e0ac..003910a9 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -1826,6 +1826,18 @@ void lj_record_ins(jit_State *J)
1826 J->maxslot = bc_a(pc[1]); /* Shrink used slots. */ 1826 J->maxslot = bc_a(pc[1]); /* Shrink used slots. */
1827 break; 1827 break;
1828 1828
1829 case BC_ISTYPE: case BC_ISNUM:
1830 /* These coercions need to correspond with lj_meta_istype(). */
1831 if (LJ_DUALNUM && rc == ~LJ_TNUMX+1)
1832 ra = lj_opt_narrow_toint(J, ra);
1833 else if (rc == ~LJ_TNUMX+2)
1834 ra = lj_ir_tonum(J, ra);
1835 else if (rc == ~LJ_TSTR+1)
1836 ra = lj_ir_tostr(J, ra);
1837 /* else: type specialization suffices. */
1838 J->base[bc_a(ins)] = ra;
1839 break;
1840
1829 /* -- Unary ops --------------------------------------------------------- */ 1841 /* -- Unary ops --------------------------------------------------------- */
1830 1842
1831 case BC_NOT: 1843 case BC_NOT:
@@ -1937,6 +1949,10 @@ void lj_record_ins(jit_State *J)
1937 ix.idxchain = LJ_MAX_IDXCHAIN; 1949 ix.idxchain = LJ_MAX_IDXCHAIN;
1938 rc = lj_record_idx(J, &ix); 1950 rc = lj_record_idx(J, &ix);
1939 break; 1951 break;
1952 case BC_TGETR: case BC_TSETR:
1953 ix.idxchain = 0;
1954 rc = lj_record_idx(J, &ix);
1955 break;
1940 1956
1941 case BC_TNEW: 1957 case BC_TNEW:
1942 rc = rec_tnew(J, rc); 1958 rc = rec_tnew(J, rc);
diff --git a/src/lj_tab.h b/src/lj_tab.h
index 2787caa0..d361137c 100644
--- a/src/lj_tab.h
+++ b/src/lj_tab.h
@@ -50,7 +50,7 @@ LJ_FUNCA cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key);
50/* Caveat: all setters require a write barrier for the stored value. */ 50/* Caveat: all setters require a write barrier for the stored value. */
51 51
52LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key); 52LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key);
53LJ_FUNC TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key); 53LJ_FUNCA TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key);
54LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key); 54LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key);
55LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key); 55LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key);
56 56
diff --git a/src/lj_target_arm.h b/src/lj_target_arm.h
index bec55772..f1aedff0 100644
--- a/src/lj_target_arm.h
+++ b/src/lj_target_arm.h
@@ -243,10 +243,6 @@ typedef enum ARMIns {
243 ARMI_VCVT_S32_F64 = 0xeebd0bc0, 243 ARMI_VCVT_S32_F64 = 0xeebd0bc0,
244 ARMI_VCVT_U32_F32 = 0xeebc0ac0, 244 ARMI_VCVT_U32_F32 = 0xeebc0ac0,
245 ARMI_VCVT_U32_F64 = 0xeebc0bc0, 245 ARMI_VCVT_U32_F64 = 0xeebc0bc0,
246 ARMI_VCVTR_S32_F32 = 0xeebd0a40,
247 ARMI_VCVTR_S32_F64 = 0xeebd0b40,
248 ARMI_VCVTR_U32_F32 = 0xeebc0a40,
249 ARMI_VCVTR_U32_F64 = 0xeebc0b40,
250 ARMI_VCVT_F32_S32 = 0xeeb80ac0, 246 ARMI_VCVT_F32_S32 = 0xeeb80ac0,
251 ARMI_VCVT_F64_S32 = 0xeeb80bc0, 247 ARMI_VCVT_F64_S32 = 0xeeb80bc0,
252 ARMI_VCVT_F32_U32 = 0xeeb80a40, 248 ARMI_VCVT_F32_U32 = 0xeeb80a40,
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h
index 84b0871d..450df77f 100644
--- a/src/lj_target_x86.h
+++ b/src/lj_target_x86.h
@@ -277,10 +277,8 @@ typedef enum {
277 XO_ROUNDSD = 0x0b3a0ffc, /* Really 66 0f 3a 0b. See asm_fpmath. */ 277 XO_ROUNDSD = 0x0b3a0ffc, /* Really 66 0f 3a 0b. See asm_fpmath. */
278 XO_UCOMISD = XO_660f(2e), 278 XO_UCOMISD = XO_660f(2e),
279 XO_CVTSI2SD = XO_f20f(2a), 279 XO_CVTSI2SD = XO_f20f(2a),
280 XO_CVTSD2SI = XO_f20f(2d),
281 XO_CVTTSD2SI= XO_f20f(2c), 280 XO_CVTTSD2SI= XO_f20f(2c),
282 XO_CVTSI2SS = XO_f30f(2a), 281 XO_CVTSI2SS = XO_f30f(2a),
283 XO_CVTSS2SI = XO_f30f(2d),
284 XO_CVTTSS2SI= XO_f30f(2c), 282 XO_CVTTSS2SI= XO_f30f(2c),
285 XO_CVTSS2SD = XO_f30f(5a), 283 XO_CVTSS2SD = XO_f30f(5a),
286 XO_CVTSD2SS = XO_f20f(5a), 284 XO_CVTSD2SS = XO_f20f(5a),
diff --git a/src/lj_vm.h b/src/lj_vm.h
index c5d05de4..948d63c2 100644
--- a/src/lj_vm.h
+++ b/src/lj_vm.h
@@ -49,12 +49,14 @@ LJ_ASMF void lj_vm_exit_handler(void);
49LJ_ASMF void lj_vm_exit_interp(void); 49LJ_ASMF void lj_vm_exit_interp(void);
50 50
51/* Internal math helper functions. */ 51/* Internal math helper functions. */
52#if LJ_TARGET_X86ORX64 || LJ_TARGET_PPC 52#if LJ_TARGET_PPC
53#define lj_vm_floor floor 53#define lj_vm_floor floor
54#define lj_vm_ceil ceil 54#define lj_vm_ceil ceil
55#else 55#else
56LJ_ASMF double lj_vm_floor(double); 56LJ_ASMF double lj_vm_floor(double);
57#if !LJ_TARGET_X86ORX64
57LJ_ASMF double lj_vm_ceil(double); 58LJ_ASMF double lj_vm_ceil(double);
59#endif
58#if LJ_TARGET_ARM 60#if LJ_TARGET_ARM
59LJ_ASMF double lj_vm_floor_sf(double); 61LJ_ASMF double lj_vm_floor_sf(double);
60LJ_ASMF double lj_vm_ceil_sf(double); 62LJ_ASMF double lj_vm_ceil_sf(double);
diff --git a/src/luaconf.h b/src/luaconf.h
index 8e3a7aaa..d283233d 100644
--- a/src/luaconf.h
+++ b/src/luaconf.h
@@ -30,12 +30,12 @@
30#define LUA_LDIR LUA_ROOT "share/lua/5.1/" 30#define LUA_LDIR LUA_ROOT "share/lua/5.1/"
31#define LUA_CDIR LUA_ROOT "lib/lua/5.1/" 31#define LUA_CDIR LUA_ROOT "lib/lua/5.1/"
32#ifdef LUA_XROOT 32#ifdef LUA_XROOT
33#define LUA_JDIR LUA_XROOT "share/luajit-2.0.1/" 33#define LUA_JDIR LUA_XROOT "share/luajit-2.1.0-alpha/"
34#define LUA_XPATH \ 34#define LUA_XPATH \
35 ";" LUA_XROOT "share/lua/5.1/?.lua;" LUA_XROOT "share/lua/5.1/?/init.lua" 35 ";" LUA_XROOT "share/lua/5.1/?.lua;" LUA_XROOT "share/lua/5.1/?/init.lua"
36#define LUA_XCPATH LUA_XROOT "lib/lua/5.1/?.so;" 36#define LUA_XCPATH LUA_XROOT "lib/lua/5.1/?.so;"
37#else 37#else
38#define LUA_JDIR LUA_ROOT "share/luajit-2.0.1/" 38#define LUA_JDIR LUA_ROOT "share/luajit-2.1.0-alpha/"
39#define LUA_XPATH 39#define LUA_XPATH
40#define LUA_XCPATH 40#define LUA_XCPATH
41#endif 41#endif
diff --git a/src/luajit.h b/src/luajit.h
index ed39d014..a4c939bf 100644
--- a/src/luajit.h
+++ b/src/luajit.h
@@ -30,9 +30,9 @@
30 30
31#include "lua.h" 31#include "lua.h"
32 32
33#define LUAJIT_VERSION "LuaJIT 2.0.1" 33#define LUAJIT_VERSION "LuaJIT 2.1.0-alpha"
34#define LUAJIT_VERSION_NUM 20001 /* Version 2.0.1 = 02.00.01. */ 34#define LUAJIT_VERSION_NUM 20100 /* Version 2.1.0 = 02.01.00. */
35#define LUAJIT_VERSION_SYM luaJIT_version_2_0_1 35#define LUAJIT_VERSION_SYM luaJIT_version_2_1_0_alpha
36#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2013 Mike Pall" 36#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2013 Mike Pall"
37#define LUAJIT_URL "http://luajit.org/" 37#define LUAJIT_URL "http://luajit.org/"
38 38
diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat
index 745c93ff..1d5bd55a 100644
--- a/src/msvcbuild.bat
+++ b/src/msvcbuild.bat
@@ -35,6 +35,7 @@ if exist minilua.exe.manifest^
35@if errorlevel 8 goto :X64 35@if errorlevel 8 goto :X64
36@set DASMFLAGS=-D WIN -D JIT -D FFI 36@set DASMFLAGS=-D WIN -D JIT -D FFI
37@set LJARCH=x86 37@set LJARCH=x86
38@set LJCOMPILE=%LJCOMPILE% /arch:SSE2
38:X64 39:X64
39minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc 40minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc
40@if errorlevel 1 goto :BAD 41@if errorlevel 1 goto :BAD
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
index 114416a4..6928e03b 100644
--- a/src/vm_arm.dasc
+++ b/src/vm_arm.dasc
@@ -615,6 +615,16 @@ static void build_subroutines(BuildCtx *ctx)
615 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. 615 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
616 | b ->vm_call_dispatch_f 616 | b ->vm_call_dispatch_f
617 | 617 |
618 |->vmeta_tgetr:
619 | .IOS mov RC, BASE
620 | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
621 | // Returns cTValue * or NULL.
622 | .IOS mov BASE, RC
623 | cmp CRET1, #0
624 | ldrdne CARG12, [CRET1]
625 | mvneq CARG2, #~LJ_TNIL
626 | b ->BC_TGETR_Z
627 |
618 |//----------------------------------------------------------------------- 628 |//-----------------------------------------------------------------------
619 | 629 |
620 |->vmeta_tsets1: 630 |->vmeta_tsets1:
@@ -672,6 +682,15 @@ static void build_subroutines(BuildCtx *ctx)
672 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. 682 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
673 | b ->vm_call_dispatch_f 683 | b ->vm_call_dispatch_f
674 | 684 |
685 |->vmeta_tsetr:
686 | str BASE, L->base
687 | .IOS mov RC, BASE
688 | str PC, SAVE_PC
689 | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
690 | // Returns TValue *.
691 | .IOS mov BASE, RC
692 | b ->BC_TSETR_Z
693 |
675 |//-- Comparison metamethods --------------------------------------------- 694 |//-- Comparison metamethods ---------------------------------------------
676 | 695 |
677 |->vmeta_comp: 696 |->vmeta_comp:
@@ -736,6 +755,17 @@ static void build_subroutines(BuildCtx *ctx)
736 | b <3 755 | b <3
737 |.endif 756 |.endif
738 | 757 |
758 |->vmeta_istype:
759 | sub PC, PC, #4
760 | str BASE, L->base
761 | mov CARG1, L
762 | lsr CARG2, RA, #3
763 | mov CARG3, RC
764 | str PC, SAVE_PC
765 | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
766 | .IOS ldr BASE, L->base
767 | b ->cont_nop
768 |
739 |//-- Arithmetic metamethods --------------------------------------------- 769 |//-- Arithmetic metamethods ---------------------------------------------
740 | 770 |
741 |->vmeta_arith_vn: 771 |->vmeta_arith_vn:
@@ -1501,19 +1531,6 @@ static void build_subroutines(BuildCtx *ctx)
1501 | math_extern2 atan2 1531 | math_extern2 atan2
1502 | math_extern2 fmod 1532 | math_extern2 fmod
1503 | 1533 |
1504 |->ff_math_deg:
1505 |.if FPU
1506 | .ffunc_d math_rad
1507 | vldr d1, CFUNC:CARG3->upvalue[0]
1508 | vmul.f64 d0, d0, d1
1509 | b ->fff_resd
1510 |.else
1511 | .ffunc_n math_rad
1512 | ldrd CARG34, CFUNC:CARG3->upvalue[0]
1513 | bl extern __aeabi_dmul
1514 | b ->fff_restv
1515 |.endif
1516 |
1517 |.if HFABI 1534 |.if HFABI
1518 | .ffunc math_ldexp 1535 | .ffunc math_ldexp
1519 | ldr CARG4, [BASE, #4] 1536 | ldr CARG4, [BASE, #4]
@@ -1844,17 +1861,6 @@ static void build_subroutines(BuildCtx *ctx)
1844 |ffstring_case string_lower, 65 1861 |ffstring_case string_lower, 65
1845 |ffstring_case string_upper, 97 1862 |ffstring_case string_upper, 97
1846 | 1863 |
1847 |//-- Table library ------------------------------------------------------
1848 |
1849 |.ffunc_1 table_getn
1850 | checktab CARG2, ->fff_fallback
1851 | .IOS mov RA, BASE
1852 | bl extern lj_tab_len // (GCtab *t)
1853 | // Returns uint32_t (but less than 2^31).
1854 | .IOS mov BASE, RA
1855 | mvn CARG2, #~LJ_TISNUM
1856 | b ->fff_restv
1857 |
1858 |//-- Bit library -------------------------------------------------------- 1864 |//-- Bit library --------------------------------------------------------
1859 | 1865 |
1860 |// FP number to bit conversion for soft-float. Clobbers r0-r3. 1866 |// FP number to bit conversion for soft-float. Clobbers r0-r3.
@@ -2834,6 +2840,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2834 | ins_next 2840 | ins_next
2835 break; 2841 break;
2836 2842
2843 case BC_ISTYPE:
2844 | // RA = src*8, RC = -type
2845 | ldrd CARG12, [BASE, RA]
2846 | ins_next1
2847 | cmn CARG2, RC
2848 | ins_next2
2849 | bne ->vmeta_istype
2850 | ins_next3
2851 break;
2852 case BC_ISNUM:
2853 | // RA = src*8, RC = -(TISNUM-1)
2854 | ldrd CARG12, [BASE, RA]
2855 | ins_next1
2856 | checktp CARG2, LJ_TISNUM
2857 | ins_next2
2858 | bhs ->vmeta_istype
2859 | ins_next3
2860 break;
2861
2837 /* -- Unary ops --------------------------------------------------------- */ 2862 /* -- Unary ops --------------------------------------------------------- */
2838 2863
2839 case BC_MOV: 2864 case BC_MOV:
@@ -3504,6 +3529,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3504 | bne <1 // 'no __index' flag set: done. 3529 | bne <1 // 'no __index' flag set: done.
3505 | b ->vmeta_tgetb 3530 | b ->vmeta_tgetb
3506 break; 3531 break;
3532 case BC_TGETR:
3533 | decode_RB8 RB, INS
3534 | decode_RC8 RC, INS
3535 | // RA = dst*8, RB = table*8, RC = key*8
3536 | ldr TAB:CARG1, [BASE, RB]
3537 | ldr CARG2, [BASE, RC]
3538 | ldr CARG4, TAB:CARG1->array
3539 | ldr CARG3, TAB:CARG1->asize
3540 | add CARG4, CARG4, CARG2, lsl #3
3541 | cmp CARG2, CARG3 // In array part?
3542 | bhs ->vmeta_tgetr
3543 | ldrd CARG12, [CARG4]
3544 |->BC_TGETR_Z:
3545 | ins_next1
3546 | ins_next2
3547 | strd CARG12, [BASE, RA]
3548 | ins_next3
3549 break;
3507 3550
3508 case BC_TSETV: 3551 case BC_TSETV:
3509 | decode_RB8 RB, INS 3552 | decode_RB8 RB, INS
@@ -3674,6 +3717,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3674 | barrierback TAB:CARG1, INS, CARG3 3717 | barrierback TAB:CARG1, INS, CARG3
3675 | b <2 3718 | b <2
3676 break; 3719 break;
3720 case BC_TSETR:
3721 | decode_RB8 RB, INS
3722 | decode_RC8 RC, INS
3723 | // RA = dst*8, RB = table*8, RC = key*8
3724 | ldr TAB:CARG2, [BASE, RB]
3725 | ldr CARG3, [BASE, RC]
3726 | ldrb INS, TAB:CARG2->marked
3727 | ldr CARG1, TAB:CARG2->array
3728 | ldr CARG4, TAB:CARG2->asize
3729 | tst INS, #LJ_GC_BLACK // isblack(table)
3730 | add CARG1, CARG1, CARG3, lsl #3
3731 | bne >7
3732 |2:
3733 | cmp CARG3, CARG4 // In array part?
3734 | bhs ->vmeta_tsetr
3735 |->BC_TSETR_Z:
3736 | ldrd CARG34, [BASE, RA]
3737 | ins_next1
3738 | ins_next2
3739 | strd CARG34, [CARG1]
3740 | ins_next3
3741 |
3742 |7: // Possible table write barrier for the value. Skip valiswhite check.
3743 | barrierback TAB:CARG2, INS, RB
3744 | b <2
3745 break;
3677 3746
3678 case BC_TSETM: 3747 case BC_TSETM:
3679 | // RA = base*8 (table at base-1), RC = num_const (start index) 3748 | // RA = base*8 (table at base-1), RC = num_const (start index)
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
index a81dbeeb..5808e182 100644
--- a/src/vm_mips.dasc
+++ b/src/vm_mips.dasc
@@ -688,6 +688,16 @@ static void build_subroutines(BuildCtx *ctx)
688 | b ->vm_call_dispatch_f 688 | b ->vm_call_dispatch_f
689 |. li NARGS8:RC, 16 // 2 args for func(t, k). 689 |. li NARGS8:RC, 16 // 2 args for func(t, k).
690 | 690 |
691 |->vmeta_tgetr:
692 | load_got lj_tab_getinth
693 | call_intern lj_tab_getinth // (GCtab *t, int32_t key)
694 |. nop
695 | // Returns cTValue * or NULL.
696 | beqz CRET1, >1
697 |. nop
698 | b ->BC_TGETR_Z
699 |. ldc1 f0, 0(CRET1)
700 |
691 |//----------------------------------------------------------------------- 701 |//-----------------------------------------------------------------------
692 | 702 |
693 |->vmeta_tsets1: 703 |->vmeta_tsets1:
@@ -740,6 +750,16 @@ static void build_subroutines(BuildCtx *ctx)
740 | b ->vm_call_dispatch_f 750 | b ->vm_call_dispatch_f
741 |. li NARGS8:RC, 24 // 3 args for func(t, k, v) 751 |. li NARGS8:RC, 24 // 3 args for func(t, k, v)
742 | 752 |
753 |->vmeta_tsetr:
754 | load_got lj_tab_setinth
755 | sw BASE, L->base
756 | sw PC, SAVE_PC
757 | call_intern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
758 |. move CARG1, L
759 | // Returns TValue *.
760 | b ->BC_TSETR_Z
761 |. nop
762 |
743 |//-- Comparison metamethods --------------------------------------------- 763 |//-- Comparison metamethods ---------------------------------------------
744 | 764 |
745 |->vmeta_comp: 765 |->vmeta_comp:
@@ -813,6 +833,18 @@ static void build_subroutines(BuildCtx *ctx)
813 |. nop 833 |. nop
814 |.endif 834 |.endif
815 | 835 |
836 |->vmeta_istype:
837 | load_got lj_meta_istype
838 | addiu PC, PC, -4
839 | sw BASE, L->base
840 | srl CARG2, RA, 3
841 | srl CARG3, RD, 3
842 | sw PC, SAVE_PC
843 | call_intern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
844 |. move CARG1, L
845 | b ->cont_nop
846 |. nop
847 |
816 |//-- Arithmetic metamethods --------------------------------------------- 848 |//-- Arithmetic metamethods ---------------------------------------------
817 | 849 |
818 |->vmeta_unm: 850 |->vmeta_unm:
@@ -1188,7 +1220,7 @@ static void build_subroutines(BuildCtx *ctx)
1188 | mtc1 TMP0, FARG1 1220 | mtc1 TMP0, FARG1
1189 | beqz AT, ->fff_fallback 1221 | beqz AT, ->fff_fallback
1190 |. lw PC, FRAME_PC(BASE) 1222 |. lw PC, FRAME_PC(BASE)
1191 | cvt.w.d FRET1, FARG2 1223 | trunc.w.d FRET1, FARG2
1192 | cvt.d.w FARG1, FARG1 1224 | cvt.d.w FARG1, FARG1
1193 | lw TMP0, TAB:CARG1->asize 1225 | lw TMP0, TAB:CARG1->asize
1194 | lw TMP1, TAB:CARG1->array 1226 | lw TMP1, TAB:CARG1->array
@@ -1521,14 +1553,8 @@ static void build_subroutines(BuildCtx *ctx)
1521 | b ->fff_resn 1553 | b ->fff_resn
1522 |. nop 1554 |. nop
1523 | 1555 |
1524 |->ff_math_deg:
1525 |.ffunc_n math_rad
1526 |. ldc1 FARG2, CFUNC:RB->upvalue[0]
1527 | b ->fff_resn
1528 |. mul.d FRET1, FARG1, FARG2
1529 |
1530 |.ffunc_nn math_ldexp 1556 |.ffunc_nn math_ldexp
1531 | cvt.w.d FARG2, FARG2 1557 | trunc.w.d FARG2, FARG2
1532 | load_got ldexp 1558 | load_got ldexp
1533 | mfc1 CARG3, FARG2 1559 | mfc1 CARG3, FARG2
1534 | call_extern 1560 | call_extern
@@ -1628,7 +1654,7 @@ static void build_subroutines(BuildCtx *ctx)
1628 |. sltiu AT, CARG3, LJ_TISNUM 1654 |. sltiu AT, CARG3, LJ_TISNUM
1629 | beqz AT, ->fff_fallback 1655 | beqz AT, ->fff_fallback
1630 |. li CARG3, 1 1656 |. li CARG3, 1
1631 | cvt.w.d FARG1, FARG1 1657 | trunc.w.d FARG1, FARG1
1632 | addiu CARG2, sp, ARG5_OFS 1658 | addiu CARG2, sp, ARG5_OFS
1633 | sltiu AT, TMP0, 256 1659 | sltiu AT, TMP0, 256
1634 | mfc1 TMP0, FARG1 1660 | mfc1 TMP0, FARG1
@@ -1658,7 +1684,7 @@ static void build_subroutines(BuildCtx *ctx)
1658 | ldc1 f2, 8(BASE) 1684 | ldc1 f2, 8(BASE)
1659 | beqz AT, >1 1685 | beqz AT, >1
1660 |. li CARG4, -1 1686 |. li CARG4, -1
1661 | cvt.w.d f0, f0 1687 | trunc.w.d f0, f0
1662 | sltiu AT, CARG3, LJ_TISNUM 1688 | sltiu AT, CARG3, LJ_TISNUM
1663 | beqz AT, ->fff_fallback 1689 | beqz AT, ->fff_fallback
1664 |. mfc1 CARG4, f0 1690 |. mfc1 CARG4, f0
@@ -1666,7 +1692,7 @@ static void build_subroutines(BuildCtx *ctx)
1666 | sltiu AT, CARG2, LJ_TISNUM 1692 | sltiu AT, CARG2, LJ_TISNUM
1667 | beqz AT, ->fff_fallback 1693 | beqz AT, ->fff_fallback
1668 |. li AT, LJ_TSTR 1694 |. li AT, LJ_TSTR
1669 | cvt.w.d f2, f2 1695 | trunc.w.d f2, f2
1670 | bne TMP0, AT, ->fff_fallback 1696 | bne TMP0, AT, ->fff_fallback
1671 |. lw CARG2, STR:CARG1->len 1697 |. lw CARG2, STR:CARG1->len
1672 | mfc1 CARG3, f2 1698 | mfc1 CARG3, f2
@@ -1706,7 +1732,7 @@ static void build_subroutines(BuildCtx *ctx)
1706 | or AT, AT, TMP0 1732 | or AT, AT, TMP0
1707 | bnez AT, ->fff_fallback 1733 | bnez AT, ->fff_fallback
1708 |. sltiu AT, CARG4, LJ_TISNUM 1734 |. sltiu AT, CARG4, LJ_TISNUM
1709 | cvt.w.d f0, f0 1735 | trunc.w.d f0, f0
1710 | beqz AT, ->fff_fallback 1736 | beqz AT, ->fff_fallback
1711 |. lw TMP0, STR:CARG1->len 1737 |. lw TMP0, STR:CARG1->len
1712 | mfc1 CARG3, f0 1738 | mfc1 CARG3, f0
@@ -1786,18 +1812,6 @@ static void build_subroutines(BuildCtx *ctx)
1786 |ffstring_case string_lower, 65 1812 |ffstring_case string_lower, 65
1787 |ffstring_case string_upper, 97 1813 |ffstring_case string_upper, 97
1788 | 1814 |
1789 |//-- Table library ------------------------------------------------------
1790 |
1791 |.ffunc_1 table_getn
1792 | li AT, LJ_TTAB
1793 | bne CARG3, AT, ->fff_fallback
1794 |. load_got lj_tab_len
1795 | call_intern lj_tab_len // (GCtab *t)
1796 |. nop
1797 | // Returns uint32_t (but less than 2^31).
1798 | b ->fff_resi
1799 |. nop
1800 |
1801 |//-- Bit library -------------------------------------------------------- 1815 |//-- Bit library --------------------------------------------------------
1802 | 1816 |
1803 |.macro .ffunc_bit, name 1817 |.macro .ffunc_bit, name
@@ -2572,6 +2586,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2572 | ins_next 2586 | ins_next
2573 break; 2587 break;
2574 2588
2589 case BC_ISTYPE:
2590 | // RA = src*8, RD = -type*8
2591 | addu TMP2, BASE, RA
2592 | srl TMP1, RD, 3
2593 | lw TMP0, HI(TMP2)
2594 | ins_next1
2595 | addu AT, TMP0, TMP1
2596 | bnez AT, ->vmeta_istype
2597 |. ins_next2
2598 break;
2599 case BC_ISNUM:
2600 | // RA = src*8, RD = -(TISNUM-1)*8
2601 | addu TMP2, BASE, RA
2602 | lw TMP0, HI(TMP2)
2603 | ins_next1
2604 | sltiu AT, TMP0, LJ_TISNUM
2605 | beqz AT, ->vmeta_istype
2606 |. ins_next2
2607 break;
2608
2575 /* -- Unary ops --------------------------------------------------------- */ 2609 /* -- Unary ops --------------------------------------------------------- */
2576 2610
2577 case BC_MOV: 2611 case BC_MOV:
@@ -3210,6 +3244,30 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3210 | b ->vmeta_tgetb // Caveat: preserve TMP0! 3244 | b ->vmeta_tgetb // Caveat: preserve TMP0!
3211 |. nop 3245 |. nop
3212 break; 3246 break;
3247 case BC_TGETR:
3248 | // RA = dst*8, RB = table*8, RC = key*8
3249 | decode_RB8a RB, INS
3250 | decode_RB8b RB
3251 | decode_RDtoRC8 RC, RD
3252 | addu CARG2, BASE, RB
3253 | addu CARG3, BASE, RC
3254 | lw TAB:CARG1, LO(CARG2)
3255 | ldc1 f0, 0(CARG3)
3256 | trunc.w.d f2, f0
3257 | lw TMP0, TAB:CARG1->asize
3258 | mfc1 CARG2, f2
3259 | lw TMP1, TAB:CARG1->array
3260 | sltu AT, CARG2, TMP0
3261 | sll TMP2, CARG2, 3
3262 | beqz AT, ->vmeta_tgetr // In array part?
3263 |. addu TMP2, TMP1, TMP2
3264 | ldc1 f0, 0(TMP2)
3265 |->BC_TGETR_Z:
3266 | addu RA, BASE, RA
3267 | ins_next1
3268 | sdc1 f0, 0(RA)
3269 | ins_next2
3270 break;
3213 3271
3214 case BC_TSETV: 3272 case BC_TSETV:
3215 | // RA = src*8, RB = table*8, RC = key*8 3273 | // RA = src*8, RB = table*8, RC = key*8
@@ -3398,6 +3456,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3398 |7: // Possible table write barrier for the value. Skip valiswhite check. 3456 |7: // Possible table write barrier for the value. Skip valiswhite check.
3399 | barrierback TAB:RB, TMP3, TMP0, <2 3457 | barrierback TAB:RB, TMP3, TMP0, <2
3400 break; 3458 break;
3459 case BC_TSETR:
3460 | // RA = dst*8, RB = table*8, RC = key*8
3461 | decode_RB8a RB, INS
3462 | decode_RB8b RB
3463 | decode_RDtoRC8 RC, RD
3464 | addu CARG1, BASE, RB
3465 | addu CARG3, BASE, RC
3466 | lw TAB:CARG2, LO(CARG1)
3467 | ldc1 f0, 0(CARG3)
3468 | trunc.w.d f2, f0
3469 | lbu TMP3, TAB:CARG2->marked
3470 | lw TMP0, TAB:CARG2->asize
3471 | mfc1 CARG3, f2
3472 | lw TMP1, TAB:CARG2->array
3473 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
3474 | bnez AT, >7
3475 |. addu RA, BASE, RA
3476 |2:
3477 | sltu AT, CARG3, TMP0
3478 | sll TMP2, CARG3, 3
3479 | beqz AT, ->vmeta_tsetr // In array part?
3480 |. ldc1 f20, 0(RA)
3481 | addu CRET1, TMP1, TMP2
3482 |->BC_TSETR_Z:
3483 | ins_next1
3484 | sdc1 f20, 0(CRET1)
3485 | ins_next2
3486 |
3487 |7: // Possible table write barrier for the value. Skip valiswhite check.
3488 | barrierback TAB:RB, TMP3, TMP0, <2
3489 break;
3490
3401 3491
3402 case BC_TSETM: 3492 case BC_TSETM:
3403 | // RA = base*8 (table at base-1), RD = num_const*8 (start index) 3493 | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
index 685ea518..bff50c59 100644
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -895,6 +895,17 @@ static void build_subroutines(BuildCtx *ctx)
895 | li NARGS8:RC, 16 // 2 args for func(t, k). 895 | li NARGS8:RC, 16 // 2 args for func(t, k).
896 | b ->vm_call_dispatch_f 896 | b ->vm_call_dispatch_f
897 | 897 |
898 |->vmeta_tgetr:
899 | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
900 | // Returns cTValue * or NULL.
901 | cmplwi CRET1, 0
902 | beq >1
903 | lfd f14, 0(CRET1)
904 | b ->BC_TGETR_Z
905 |1:
906 | stwx TISNIL, BASE, RA
907 | b ->cont_nop
908 |
898 |//----------------------------------------------------------------------- 909 |//-----------------------------------------------------------------------
899 | 910 |
900 |->vmeta_tsets1: 911 |->vmeta_tsets1:
@@ -962,6 +973,14 @@ static void build_subroutines(BuildCtx *ctx)
962 | stfd f0, 16(BASE) // Copy value to third argument. 973 | stfd f0, 16(BASE) // Copy value to third argument.
963 | b ->vm_call_dispatch_f 974 | b ->vm_call_dispatch_f
964 | 975 |
976 |->vmeta_tsetr:
977 | stp BASE, L->base
978 | stw PC, SAVE_PC
979 | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
980 | // Returns TValue *.
981 | stfd f14, 0(CRET1)
982 | b ->cont_nop
983 |
965 |//-- Comparison metamethods --------------------------------------------- 984 |//-- Comparison metamethods ---------------------------------------------
966 | 985 |
967 |->vmeta_comp: 986 |->vmeta_comp:
@@ -1040,6 +1059,16 @@ static void build_subroutines(BuildCtx *ctx)
1040 | b <3 1059 | b <3
1041 |.endif 1060 |.endif
1042 | 1061 |
1062 |->vmeta_istype:
1063 | subi PC, PC, 4
1064 | stp BASE, L->base
1065 | srwi CARG2, RA, 3
1066 | mr CARG1, L
1067 | srwi CARG3, RD, 3
1068 | stw PC, SAVE_PC
1069 | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
1070 | b ->cont_nop
1071 |
1043 |//-- Arithmetic metamethods --------------------------------------------- 1072 |//-- Arithmetic metamethods ---------------------------------------------
1044 | 1073 |
1045 |->vmeta_arith_nv: 1074 |->vmeta_arith_nv:
@@ -1870,12 +1899,6 @@ static void build_subroutines(BuildCtx *ctx)
1870 | math_extern2 atan2 1899 | math_extern2 atan2
1871 | math_extern2 fmod 1900 | math_extern2 fmod
1872 | 1901 |
1873 |->ff_math_deg:
1874 |.ffunc_n math_rad
1875 | lfd FARG2, CFUNC:RB->upvalue[0]
1876 | fmul FARG1, FARG1, FARG2
1877 | b ->fff_resn
1878 |
1879 |.if DUALNUM 1902 |.if DUALNUM
1880 |.ffunc math_ldexp 1903 |.ffunc math_ldexp
1881 | cmplwi NARGS8:RC, 16 1904 | cmplwi NARGS8:RC, 16
@@ -2258,14 +2281,6 @@ static void build_subroutines(BuildCtx *ctx)
2258 |ffstring_case string_lower, 65 2281 |ffstring_case string_lower, 65
2259 |ffstring_case string_upper, 97 2282 |ffstring_case string_upper, 97
2260 | 2283 |
2261 |//-- Table library ------------------------------------------------------
2262 |
2263 |.ffunc_1 table_getn
2264 | checktab CARG3; bne ->fff_fallback
2265 | bl extern lj_tab_len // (GCtab *t)
2266 | // Returns uint32_t (but less than 2^31).
2267 | b ->fff_resi
2268 |
2269 |//-- Bit library -------------------------------------------------------- 2284 |//-- Bit library --------------------------------------------------------
2270 | 2285 |
2271 |.macro .ffunc_bit, name 2286 |.macro .ffunc_bit, name
@@ -3265,6 +3280,29 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3265 | ins_next 3280 | ins_next
3266 break; 3281 break;
3267 3282
3283 case BC_ISTYPE:
3284 | // RA = src*8, RD = -type*8
3285 | lwzx TMP0, BASE, RA
3286 | srwi TMP1, RD, 3
3287 | ins_next1
3288 |.if not PPE and not GPR64
3289 | add. TMP0, TMP0, TMP1
3290 |.else
3291 | neg TMP1
3292 | cmpw TMP0, TMP1
3293 |.endif
3294 | bne ->vmeta_istype
3295 | ins_next2
3296 break;
3297 case BC_ISNUM:
3298 | // RA = src*8, RD = -(TISNUM-1)*8
3299 | lwzx TMP0, BASE, RA
3300 | ins_next1
3301 | checknum TMP0
3302 | bge ->vmeta_istype
3303 | ins_next2
3304 break;
3305
3268 /* -- Unary ops --------------------------------------------------------- */ 3306 /* -- Unary ops --------------------------------------------------------- */
3269 3307
3270 case BC_MOV: 3308 case BC_MOV:
@@ -4016,6 +4054,30 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4016 | bne <1 // 'no __index' flag set: done. 4054 | bne <1 // 'no __index' flag set: done.
4017 | b ->vmeta_tgetb // Caveat: preserve TMP0! 4055 | b ->vmeta_tgetb // Caveat: preserve TMP0!
4018 break; 4056 break;
4057 case BC_TGETR:
4058 | // RA = dst*8, RB = table*8, RC = key*8
4059 | add RB, BASE, RB
4060 | lwz TAB:CARG1, 4(RB)
4061 |.if DUALNUM
4062 | add RC, BASE, RC
4063 | lwz TMP0, TAB:CARG1->asize
4064 | lwz CARG2, 4(RC)
4065 | lwz TMP1, TAB:CARG1->array
4066 |.else
4067 | lfdx f0, BASE, RC
4068 | lwz TMP0, TAB:CARG1->asize
4069 | toint CARG2, f0
4070 | lwz TMP1, TAB:CARG1->array
4071 |.endif
4072 | cmplw TMP0, CARG2
4073 | slwi TMP2, CARG2, 3
4074 | ble ->vmeta_tgetr // In array part?
4075 | lfdx f14, TMP1, TMP2
4076 |->BC_TGETR_Z:
4077 | ins_next1
4078 | stfdx f14, BASE, RA
4079 | ins_next2
4080 break;
4019 4081
4020 case BC_TSETV: 4082 case BC_TSETV:
4021 | // RA = src*8, RB = table*8, RC = key*8 4083 | // RA = src*8, RB = table*8, RC = key*8
@@ -4195,6 +4257,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4195 | barrierback TAB:RB, TMP3, TMP0 4257 | barrierback TAB:RB, TMP3, TMP0
4196 | b <2 4258 | b <2
4197 break; 4259 break;
4260 case BC_TSETR:
4261 | // RA = dst*8, RB = table*8, RC = key*8
4262 | add RB, BASE, RB
4263 | lwz TAB:CARG2, 4(RB)
4264 |.if DUALNUM
4265 | add RC, BASE, RC
4266 | lbz TMP3, TAB:RB->marked
4267 | lwz TMP0, TAB:CARG2->asize
4268 | lwz CARG3, 4(RC)
4269 | lwz TMP1, TAB:CARG2->array
4270 |.else
4271 | lfdx f0, BASE, RC
4272 | lbz TMP3, TAB:RB->marked
4273 | lwz TMP0, TAB:CARG2->asize
4274 | toint CARG3, f0
4275 | lwz TMP1, TAB:CARG2->array
4276 |.endif
4277 | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table)
4278 | bne >7
4279 |2:
4280 | cmplw TMP0, CARG3
4281 | slwi TMP2, CARG3, 3
4282 | lfdx f14, BASE, RA
4283 | ble ->vmeta_tsetr // In array part?
4284 | ins_next1
4285 | stfdx f14, TMP1, TMP2
4286 | ins_next2
4287 |
4288 |7: // Possible table write barrier for the value. Skip valiswhite check.
4289 | barrierback TAB:CARG2, TMP3, TMP2
4290 | b <2
4291 break;
4292
4198 4293
4199 case BC_TSETM: 4294 case BC_TSETM:
4200 | // RA = base*8 (table at base-1), RD = num_const*8 (start index) 4295 | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
diff --git a/src/vm_ppcspe.dasc b/src/vm_ppcspe.dasc
index 4fabc02f..b443f1b3 100644
--- a/src/vm_ppcspe.dasc
+++ b/src/vm_ppcspe.dasc
@@ -1456,12 +1456,6 @@ static void build_subroutines(BuildCtx *ctx)
1456 | math_extern2 atan2 1456 | math_extern2 atan2
1457 | math_extern2 fmod 1457 | math_extern2 fmod
1458 | 1458 |
1459 |->ff_math_deg:
1460 |.ffunc_n math_rad
1461 | evldd CARG2, CFUNC:RB->upvalue[0]
1462 | efdmul CRET1, CARG1, CARG2
1463 | b ->fff_restv
1464 |
1465 |.ffunc math_ldexp 1459 |.ffunc math_ldexp
1466 | cmplwi NARGS8:RC, 16 1460 | cmplwi NARGS8:RC, 16
1467 | evldd CARG2, 0(BASE) 1461 | evldd CARG2, 0(BASE)
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index b4674e2b..0a53ffde 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -18,7 +18,6 @@
18| 18|
19|.if P64 19|.if P64
20|.define X64, 1 20|.define X64, 1
21|.define SSE, 1
22|.if WIN 21|.if WIN
23|.define X64WIN, 1 22|.define X64WIN, 1
24|.endif 23|.endif
@@ -856,13 +855,9 @@ static void build_subroutines(BuildCtx *ctx)
856 |.if DUALNUM 855 |.if DUALNUM
857 | mov TMP2, LJ_TISNUM 856 | mov TMP2, LJ_TISNUM
858 | mov TMP1, RC 857 | mov TMP1, RC
859 |.elif SSE 858 |.else
860 | cvtsi2sd xmm0, RC 859 | cvtsi2sd xmm0, RC
861 | movsd TMPQ, xmm0 860 | movsd TMPQ, xmm0
862 |.else
863 | mov ARG4, RC
864 | fild ARG4
865 | fstp TMPQ
866 |.endif 861 |.endif
867 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 862 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
868 | jmp >1 863 | jmp >1
@@ -916,6 +911,19 @@ static void build_subroutines(BuildCtx *ctx)
916 | mov NARGS:RD, 2+1 // 2 args for func(t, k). 911 | mov NARGS:RD, 2+1 // 2 args for func(t, k).
917 | jmp ->vm_call_dispatch_f 912 | jmp ->vm_call_dispatch_f
918 | 913 |
914 |->vmeta_tgetr:
915 | mov FCARG1, TAB:RB
916 | mov RB, BASE // Save BASE.
917 | mov FCARG2, RC // Caveat: FCARG2 == BASE
918 | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
919 | // cTValue * or NULL returned in eax (RC).
920 | movzx RA, PC_RA
921 | mov BASE, RB // Restore BASE.
922 | test RC, RC
923 | jnz ->BC_TGETR_Z
924 | mov dword [BASE+RA*8+4], LJ_TNIL
925 | jmp ->BC_TGETR2_Z
926 |
919 |//----------------------------------------------------------------------- 927 |//-----------------------------------------------------------------------
920 | 928 |
921 |->vmeta_tsets: 929 |->vmeta_tsets:
@@ -935,13 +943,9 @@ static void build_subroutines(BuildCtx *ctx)
935 |.if DUALNUM 943 |.if DUALNUM
936 | mov TMP2, LJ_TISNUM 944 | mov TMP2, LJ_TISNUM
937 | mov TMP1, RC 945 | mov TMP1, RC
938 |.elif SSE 946 |.else
939 | cvtsi2sd xmm0, RC 947 | cvtsi2sd xmm0, RC
940 | movsd TMPQ, xmm0 948 | movsd TMPQ, xmm0
941 |.else
942 | mov ARG4, RC
943 | fild ARG4
944 | fstp TMPQ
945 |.endif 949 |.endif
946 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 950 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
947 | jmp >1 951 | jmp >1
@@ -1007,6 +1011,33 @@ static void build_subroutines(BuildCtx *ctx)
1007 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v). 1011 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v).
1008 | jmp ->vm_call_dispatch_f 1012 | jmp ->vm_call_dispatch_f
1009 | 1013 |
1014 |->vmeta_tsetr:
1015 |.if X64WIN
1016 | mov L:CARG1d, SAVE_L
1017 | mov CARG3d, RC
1018 | mov L:CARG1d->base, BASE
1019 | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE.
1020 |.elif X64
1021 | mov L:CARG1d, SAVE_L
1022 | mov CARG2d, TAB:RB
1023 | mov L:CARG1d->base, BASE
1024 | mov RB, BASE // Save BASE.
1025 | mov CARG3d, RC // Caveat: CARG3d == BASE.
1026 |.else
1027 | mov L:RA, SAVE_L
1028 | mov ARG2, TAB:RB
1029 | mov RB, BASE // Save BASE.
1030 | mov ARG3, RC
1031 | mov ARG1, L:RA
1032 | mov L:RA->base, BASE
1033 |.endif
1034 | mov SAVE_PC, PC
1035 | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
1036 | // TValue * returned in eax (RC).
1037 | movzx RA, PC_RA
1038 | mov BASE, RB // Restore BASE.
1039 | jmp ->BC_TSETR_Z
1040 |
1010 |//-- Comparison metamethods --------------------------------------------- 1041 |//-- Comparison metamethods ---------------------------------------------
1011 | 1042 |
1012 |->vmeta_comp: 1043 |->vmeta_comp:
@@ -1101,6 +1132,26 @@ static void build_subroutines(BuildCtx *ctx)
1101 | jmp <3 1132 | jmp <3
1102 |.endif 1133 |.endif
1103 | 1134 |
1135 |->vmeta_istype:
1136 |.if X64
1137 | mov L:CARG1d, SAVE_L
1138 | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
1139 | mov CARG2d, RA
1140 | movzx CARG3d, PC_RD
1141 | mov L:RB, L:CARG1d
1142 |.else
1143 | movzx RD, PC_RD
1144 | mov ARG2, RA
1145 | mov L:RB, SAVE_L
1146 | mov ARG3, RD
1147 | mov ARG1, L:RB
1148 | mov L:RB->base, BASE
1149 |.endif
1150 | mov SAVE_PC, PC
1151 | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
1152 | mov BASE, L:RB->base
1153 | jmp <6
1154 |
1104 |//-- Arithmetic metamethods --------------------------------------------- 1155 |//-- Arithmetic metamethods ---------------------------------------------
1105 | 1156 |
1106 |->vmeta_arith_vno: 1157 |->vmeta_arith_vno:
@@ -1509,11 +1560,7 @@ static void build_subroutines(BuildCtx *ctx)
1509 |.else 1560 |.else
1510 | jae ->fff_fallback 1561 | jae ->fff_fallback
1511 |.endif 1562 |.endif
1512 |.if SSE
1513 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 1563 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0
1514 |.else
1515 | fld qword [BASE]; jmp ->fff_resn
1516 |.endif
1517 | 1564 |
1518 |.ffunc_1 tostring 1565 |.ffunc_1 tostring
1519 | // Only handles the string or number case inline. 1566 | // Only handles the string or number case inline.
@@ -1631,19 +1678,12 @@ static void build_subroutines(BuildCtx *ctx)
1631 | add RD, 1 1678 | add RD, 1
1632 | mov dword [BASE-4], LJ_TISNUM 1679 | mov dword [BASE-4], LJ_TISNUM
1633 | mov dword [BASE-8], RD 1680 | mov dword [BASE-8], RD
1634 |.elif SSE 1681 |.else
1635 | movsd xmm0, qword [BASE+8] 1682 | movsd xmm0, qword [BASE+8]
1636 | sseconst_1 xmm1, RBa 1683 | sseconst_1 xmm1, RBa
1637 | addsd xmm0, xmm1 1684 | addsd xmm0, xmm1
1638 | cvtsd2si RD, xmm0 1685 | cvttsd2si RD, xmm0
1639 | movsd qword [BASE-8], xmm0 1686 | movsd qword [BASE-8], xmm0
1640 |.else
1641 | fld qword [BASE+8]
1642 | fld1
1643 | faddp st1
1644 | fist ARG1
1645 | fstp qword [BASE-8]
1646 | mov RD, ARG1
1647 |.endif 1687 |.endif
1648 | mov TAB:RB, [BASE] 1688 | mov TAB:RB, [BASE]
1649 | cmp RD, TAB:RB->asize; jae >2 // Not in array part? 1689 | cmp RD, TAB:RB->asize; jae >2 // Not in array part?
@@ -1690,12 +1730,9 @@ static void build_subroutines(BuildCtx *ctx)
1690 |.if DUALNUM 1730 |.if DUALNUM
1691 | mov dword [BASE+12], LJ_TISNUM 1731 | mov dword [BASE+12], LJ_TISNUM
1692 | mov dword [BASE+8], 0 1732 | mov dword [BASE+8], 0
1693 |.elif SSE 1733 |.else
1694 | xorps xmm0, xmm0 1734 | xorps xmm0, xmm0
1695 | movsd qword [BASE+8], xmm0 1735 | movsd qword [BASE+8], xmm0
1696 |.else
1697 | fldz
1698 | fstp qword [BASE+8]
1699 |.endif 1736 |.endif
1700 | mov RD, 1+3 1737 | mov RD, 1+3
1701 | jmp ->fff_res 1738 | jmp ->fff_res
@@ -1925,12 +1962,10 @@ static void build_subroutines(BuildCtx *ctx)
1925 |->fff_resi: // Dummy. 1962 |->fff_resi: // Dummy.
1926 |.endif 1963 |.endif
1927 | 1964 |
1928 |.if SSE
1929 |->fff_resn: 1965 |->fff_resn:
1930 | mov PC, [BASE-4] 1966 | mov PC, [BASE-4]
1931 | fstp qword [BASE-8] 1967 | fstp qword [BASE-8]
1932 | jmp ->fff_res1 1968 | jmp ->fff_res1
1933 |.endif
1934 | 1969 |
1935 | .ffunc_1 math_abs 1970 | .ffunc_1 math_abs
1936 |.if DUALNUM 1971 |.if DUALNUM
@@ -1954,8 +1989,6 @@ static void build_subroutines(BuildCtx *ctx)
1954 |.else 1989 |.else
1955 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 1990 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1956 |.endif 1991 |.endif
1957 |
1958 |.if SSE
1959 | movsd xmm0, qword [BASE] 1992 | movsd xmm0, qword [BASE]
1960 | sseconst_abs xmm1, RDa 1993 | sseconst_abs xmm1, RDa
1961 | andps xmm0, xmm1 1994 | andps xmm0, xmm1
@@ -1963,15 +1996,6 @@ static void build_subroutines(BuildCtx *ctx)
1963 | mov PC, [BASE-4] 1996 | mov PC, [BASE-4]
1964 | movsd qword [BASE-8], xmm0 1997 | movsd qword [BASE-8], xmm0
1965 | // fallthrough 1998 | // fallthrough
1966 |.else
1967 | fld qword [BASE]
1968 | fabs
1969 | // fallthrough
1970 |->fff_resxmm0: // Dummy.
1971 |->fff_resn:
1972 | mov PC, [BASE-4]
1973 | fstp qword [BASE-8]
1974 |.endif
1975 | 1999 |
1976 |->fff_res1: 2000 |->fff_res1:
1977 | mov RD, 1+1 2001 | mov RD, 1+1
@@ -2008,48 +2032,24 @@ static void build_subroutines(BuildCtx *ctx)
2008 |.else 2032 |.else
2009 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2033 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
2010 |.endif 2034 |.endif
2011 |.if SSE
2012 | movsd xmm0, qword [BASE] 2035 | movsd xmm0, qword [BASE]
2013 | call ->vm_ .. func 2036 | call ->vm_ .. func .. _sse
2014 | .if DUALNUM 2037 |.if DUALNUM
2015 | cvtsd2si RB, xmm0 2038 | cvttsd2si RB, xmm0
2016 | cmp RB, 0x80000000 2039 | cmp RB, 0x80000000
2017 | jne ->fff_resi 2040 | jne ->fff_resi
2018 | cvtsi2sd xmm1, RB 2041 | cvtsi2sd xmm1, RB
2019 | ucomisd xmm0, xmm1 2042 | ucomisd xmm0, xmm1
2020 | jp ->fff_resxmm0 2043 | jp ->fff_resxmm0
2021 | je ->fff_resi 2044 | je ->fff_resi
2022 | .endif
2023 | jmp ->fff_resxmm0
2024 |.else
2025 | fld qword [BASE]
2026 | call ->vm_ .. func
2027 | .if DUALNUM
2028 | fist ARG1
2029 | mov RB, ARG1
2030 | cmp RB, 0x80000000; jne >2
2031 | fdup
2032 | fild ARG1
2033 | fcomparepp
2034 | jp ->fff_resn
2035 | jne ->fff_resn
2036 |2:
2037 | fpop
2038 | jmp ->fff_resi
2039 | .else
2040 | jmp ->fff_resn
2041 | .endif
2042 |.endif 2045 |.endif
2046 | jmp ->fff_resxmm0
2043 |.endmacro 2047 |.endmacro
2044 | 2048 |
2045 | math_round floor 2049 | math_round floor
2046 | math_round ceil 2050 | math_round ceil
2047 | 2051 |
2048 |.if SSE
2049 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 2052 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0
2050 |.else
2051 |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn
2052 |.endif
2053 | 2053 |
2054 |.ffunc math_log 2054 |.ffunc math_log
2055 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. 2055 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
@@ -2072,42 +2072,24 @@ static void build_subroutines(BuildCtx *ctx)
2072 |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn 2072 |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn
2073 | 2073 |
2074 |.macro math_extern, func 2074 |.macro math_extern, func
2075 |.if SSE
2076 | .ffunc_nsse math_ .. func 2075 | .ffunc_nsse math_ .. func
2077 | .if not X64 2076 |.if not X64
2078 | movsd FPARG1, xmm0 2077 | movsd FPARG1, xmm0
2079 | .endif
2080 |.else
2081 | .ffunc_n math_ .. func
2082 | fstp FPARG1
2083 |.endif 2078 |.endif
2084 | mov RB, BASE 2079 | mov RB, BASE
2085 | call extern lj_vm_ .. func 2080 | call extern lj_vm_ .. func
2086 | mov BASE, RB 2081 | mov BASE, RB
2087 | .if X64 2082 |.if X64
2088 | jmp ->fff_resxmm0 2083 | jmp ->fff_resxmm0
2089 | .else 2084 |.else
2090 | jmp ->fff_resn 2085 | jmp ->fff_resn
2091 | .endif 2086 |.endif
2092 |.endmacro 2087 |.endmacro
2093 | 2088 |
2094 | math_extern sinh 2089 | math_extern sinh
2095 | math_extern cosh 2090 | math_extern cosh
2096 | math_extern tanh 2091 | math_extern tanh
2097 | 2092 |
2098 |->ff_math_deg:
2099 |.if SSE
2100 |.ffunc_nsse math_rad
2101 | mov CFUNC:RB, [BASE-8]
2102 | mulsd xmm0, qword CFUNC:RB->upvalue[0]
2103 | jmp ->fff_resxmm0
2104 |.else
2105 |.ffunc_n math_rad
2106 | mov CFUNC:RB, [BASE-8]
2107 | fmul qword CFUNC:RB->upvalue[0]
2108 | jmp ->fff_resn
2109 |.endif
2110 |
2111 |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn 2093 |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn
2112 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn 2094 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn
2113 | 2095 |
@@ -2123,65 +2105,34 @@ static void build_subroutines(BuildCtx *ctx)
2123 | cmp RB, 0x00200000; jb >4 2105 | cmp RB, 0x00200000; jb >4
2124 |1: 2106 |1:
2125 | shr RB, 21; sub RB, RC // Extract and unbias exponent. 2107 | shr RB, 21; sub RB, RC // Extract and unbias exponent.
2126 |.if SSE
2127 | cvtsi2sd xmm0, RB 2108 | cvtsi2sd xmm0, RB
2128 |.else
2129 | mov TMP1, RB; fild TMP1
2130 |.endif
2131 | mov RB, [BASE-4] 2109 | mov RB, [BASE-4]
2132 | and RB, 0x800fffff // Mask off exponent. 2110 | and RB, 0x800fffff // Mask off exponent.
2133 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. 2111 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0.
2134 | mov [BASE-4], RB 2112 | mov [BASE-4], RB
2135 |2: 2113 |2:
2136 |.if SSE
2137 | movsd qword [BASE], xmm0 2114 | movsd qword [BASE], xmm0
2138 |.else
2139 | fstp qword [BASE]
2140 |.endif
2141 | mov RD, 1+2 2115 | mov RD, 1+2
2142 | jmp ->fff_res 2116 | jmp ->fff_res
2143 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. 2117 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0.
2144 |.if SSE
2145 | xorps xmm0, xmm0; jmp <2 2118 | xorps xmm0, xmm0; jmp <2
2146 |.else
2147 | fldz; jmp <2
2148 |.endif
2149 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. 2119 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias.
2150 |.if SSE
2151 | movsd xmm0, qword [BASE] 2120 | movsd xmm0, qword [BASE]
2152 | sseconst_hi xmm1, RBa, 43500000 // 2^54. 2121 | sseconst_hi xmm1, RBa, 43500000 // 2^54.
2153 | mulsd xmm0, xmm1 2122 | mulsd xmm0, xmm1
2154 | movsd qword [BASE-8], xmm0 2123 | movsd qword [BASE-8], xmm0
2155 |.else
2156 | fld qword [BASE]
2157 | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54
2158 | fstp qword [BASE-8]
2159 |.endif
2160 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 2124 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1
2161 | 2125 |
2162 |.if SSE
2163 |.ffunc_nsse math_modf 2126 |.ffunc_nsse math_modf
2164 |.else
2165 |.ffunc_n math_modf
2166 |.endif
2167 | mov RB, [BASE+4] 2127 | mov RB, [BASE+4]
2168 | mov PC, [BASE-4] 2128 | mov PC, [BASE-4]
2169 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? 2129 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf?
2170 |.if SSE
2171 | movaps xmm4, xmm0 2130 | movaps xmm4, xmm0
2172 | call ->vm_trunc 2131 | call ->vm_trunc_sse
2173 | subsd xmm4, xmm0 2132 | subsd xmm4, xmm0
2174 |1: 2133 |1:
2175 | movsd qword [BASE-8], xmm0 2134 | movsd qword [BASE-8], xmm0
2176 | movsd qword [BASE], xmm4 2135 | movsd qword [BASE], xmm4
2177 |.else
2178 | fdup
2179 | call ->vm_trunc
2180 | fsub st1, st0
2181 |1:
2182 | fstp qword [BASE-8]
2183 | fstp qword [BASE]
2184 |.endif
2185 | mov RC, [BASE-4]; mov RB, [BASE+4] 2136 | mov RC, [BASE-4]; mov RB, [BASE+4]
2186 | xor RC, RB; js >3 // Need to adjust sign? 2137 | xor RC, RB; js >3 // Need to adjust sign?
2187 |2: 2138 |2:
@@ -2191,24 +2142,16 @@ static void build_subroutines(BuildCtx *ctx)
2191 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. 2142 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction.
2192 | jmp <2 2143 | jmp <2
2193 |4: 2144 |4:
2194 |.if SSE
2195 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. 2145 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0.
2196 |.else
2197 | fldz; fxch; jmp <1 // Return +-Inf and +-0.
2198 |.endif
2199 | 2146 |
2200 |.ffunc_nnr math_fmod 2147 |.ffunc_nnr math_fmod
2201 |1: ; fprem; fnstsw ax; sahf; jp <1 2148 |1: ; fprem; fnstsw ax; sahf; jp <1
2202 | fpop1 2149 | fpop1
2203 | jmp ->fff_resn 2150 | jmp ->fff_resn
2204 | 2151 |
2205 |.if SSE 2152 |.ffunc_nnsse math_pow; call ->vm_pow_sse; jmp ->fff_resxmm0
2206 |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0
2207 |.else
2208 |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn
2209 |.endif
2210 | 2153 |
2211 |.macro math_minmax, name, cmovop, fcmovop, sseop 2154 |.macro math_minmax, name, cmovop, sseop
2212 | .ffunc name 2155 | .ffunc name
2213 | mov RA, 2 2156 | mov RA, 2
2214 | cmp dword [BASE+4], LJ_TISNUM 2157 | cmp dword [BASE+4], LJ_TISNUM
@@ -2225,12 +2168,7 @@ static void build_subroutines(BuildCtx *ctx)
2225 |3: 2168 |3:
2226 | ja ->fff_fallback 2169 | ja ->fff_fallback
2227 | // Convert intermediate result to number and continue below. 2170 | // Convert intermediate result to number and continue below.
2228 |.if SSE
2229 | cvtsi2sd xmm0, RB 2171 | cvtsi2sd xmm0, RB
2230 |.else
2231 | mov TMP1, RB
2232 | fild TMP1
2233 |.endif
2234 | jmp >6 2172 | jmp >6
2235 |4: 2173 |4:
2236 | ja ->fff_fallback 2174 | ja ->fff_fallback
@@ -2238,7 +2176,6 @@ static void build_subroutines(BuildCtx *ctx)
2238 | jae ->fff_fallback 2176 | jae ->fff_fallback
2239 |.endif 2177 |.endif
2240 | 2178 |
2241 |.if SSE
2242 | movsd xmm0, qword [BASE] 2179 | movsd xmm0, qword [BASE]
2243 |5: // Handle numbers or integers. 2180 |5: // Handle numbers or integers.
2244 | cmp RA, RD; jae ->fff_resxmm0 2181 | cmp RA, RD; jae ->fff_resxmm0
@@ -2257,34 +2194,10 @@ static void build_subroutines(BuildCtx *ctx)
2257 | sseop xmm0, xmm1 2194 | sseop xmm0, xmm1
2258 | add RA, 1 2195 | add RA, 1
2259 | jmp <5 2196 | jmp <5
2260 |.else
2261 | fld qword [BASE]
2262 |5: // Handle numbers or integers.
2263 | cmp RA, RD; jae ->fff_resn
2264 | cmp dword [BASE+RA*8-4], LJ_TISNUM
2265 |.if DUALNUM
2266 | jb >6
2267 | ja >9
2268 | fild dword [BASE+RA*8-8]
2269 | jmp >7
2270 |.else
2271 | jae >9
2272 |.endif
2273 |6:
2274 | fld qword [BASE+RA*8-8]
2275 |7:
2276 | fucomi st1; fcmovop st1; fpop1
2277 | add RA, 1
2278 | jmp <5
2279 |.endif
2280 |.endmacro 2197 |.endmacro
2281 | 2198 |
2282 | math_minmax math_min, cmovg, fcmovnbe, minsd 2199 | math_minmax math_min, cmovg, minsd
2283 | math_minmax math_max, cmovl, fcmovbe, maxsd 2200 | math_minmax math_max, cmovl, maxsd
2284 |.if not SSE
2285 |9:
2286 | fpop; jmp ->fff_fallback
2287 |.endif
2288 | 2201 |
2289 |//-- String library ----------------------------------------------------- 2202 |//-- String library -----------------------------------------------------
2290 | 2203 |
@@ -2293,10 +2206,8 @@ static void build_subroutines(BuildCtx *ctx)
2293 | mov STR:RB, [BASE] 2206 | mov STR:RB, [BASE]
2294 |.if DUALNUM 2207 |.if DUALNUM
2295 | mov RB, dword STR:RB->len; jmp ->fff_resi 2208 | mov RB, dword STR:RB->len; jmp ->fff_resi
2296 |.elif SSE
2297 | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0
2298 |.else 2209 |.else
2299 | fild dword STR:RB->len; jmp ->fff_resn 2210 | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0
2300 |.endif 2211 |.endif
2301 | 2212 |
2302 |.ffunc string_byte // Only handle the 1-arg case here. 2213 |.ffunc string_byte // Only handle the 1-arg case here.
@@ -2309,10 +2220,8 @@ static void build_subroutines(BuildCtx *ctx)
2309 | movzx RB, byte STR:RB[1] 2220 | movzx RB, byte STR:RB[1]
2310 |.if DUALNUM 2221 |.if DUALNUM
2311 | jmp ->fff_resi 2222 | jmp ->fff_resi
2312 |.elif SSE
2313 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2314 |.else 2223 |.else
2315 | mov TMP1, RB; fild TMP1; jmp ->fff_resn 2224 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2316 |.endif 2225 |.endif
2317 | 2226 |
2318 |.ffunc string_char // Only handle the 1-arg case here. 2227 |.ffunc string_char // Only handle the 1-arg case here.
@@ -2324,16 +2233,11 @@ static void build_subroutines(BuildCtx *ctx)
2324 | mov RB, dword [BASE] 2233 | mov RB, dword [BASE]
2325 | cmp RB, 255; ja ->fff_fallback 2234 | cmp RB, 255; ja ->fff_fallback
2326 | mov TMP2, RB 2235 | mov TMP2, RB
2327 |.elif SSE 2236 |.else
2328 | jae ->fff_fallback 2237 | jae ->fff_fallback
2329 | cvttsd2si RB, qword [BASE] 2238 | cvttsd2si RB, qword [BASE]
2330 | cmp RB, 255; ja ->fff_fallback 2239 | cmp RB, 255; ja ->fff_fallback
2331 | mov TMP2, RB 2240 | mov TMP2, RB
2332 |.else
2333 | jae ->fff_fallback
2334 | fld qword [BASE]
2335 | fistp TMP2
2336 | cmp TMP2, 255; ja ->fff_fallback
2337 |.endif 2241 |.endif
2338 |.if X64 2242 |.if X64
2339 | mov TMP3, 1 2243 | mov TMP3, 1
@@ -2371,14 +2275,10 @@ static void build_subroutines(BuildCtx *ctx)
2371 | jne ->fff_fallback 2275 | jne ->fff_fallback
2372 | mov RB, dword [BASE+16] 2276 | mov RB, dword [BASE+16]
2373 | mov TMP2, RB 2277 | mov TMP2, RB
2374 |.elif SSE 2278 |.else
2375 | jae ->fff_fallback 2279 | jae ->fff_fallback
2376 | cvttsd2si RB, qword [BASE+16] 2280 | cvttsd2si RB, qword [BASE+16]
2377 | mov TMP2, RB 2281 | mov TMP2, RB
2378 |.else
2379 | jae ->fff_fallback
2380 | fld qword [BASE+16]
2381 | fistp TMP2
2382 |.endif 2282 |.endif
2383 |1: 2283 |1:
2384 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2284 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
@@ -2393,12 +2293,8 @@ static void build_subroutines(BuildCtx *ctx)
2393 | mov RB, STR:RB->len 2293 | mov RB, STR:RB->len
2394 |.if DUALNUM 2294 |.if DUALNUM
2395 | mov RA, dword [BASE+8] 2295 | mov RA, dword [BASE+8]
2396 |.elif SSE
2397 | cvttsd2si RA, qword [BASE+8]
2398 |.else 2296 |.else
2399 | fld qword [BASE+8] 2297 | cvttsd2si RA, qword [BASE+8]
2400 | fistp ARG3
2401 | mov RA, ARG3
2402 |.endif 2298 |.endif
2403 | mov RC, TMP2 2299 | mov RC, TMP2
2404 | cmp RB, RC // len < end? (unsigned compare) 2300 | cmp RB, RC // len < end? (unsigned compare)
@@ -2451,14 +2347,9 @@ static void build_subroutines(BuildCtx *ctx)
2451 |.if DUALNUM 2347 |.if DUALNUM
2452 | jne ->fff_fallback 2348 | jne ->fff_fallback
2453 | mov RC, dword [BASE+8] 2349 | mov RC, dword [BASE+8]
2454 |.elif SSE
2455 | jae ->fff_fallback
2456 | cvttsd2si RC, qword [BASE+8]
2457 |.else 2350 |.else
2458 | jae ->fff_fallback 2351 | jae ->fff_fallback
2459 | fld qword [BASE+8] 2352 | cvttsd2si RC, qword [BASE+8]
2460 | fistp TMP2
2461 | mov RC, TMP2
2462 |.endif 2353 |.endif
2463 | test RC, RC 2354 | test RC, RC
2464 | jle ->fff_emptystr // Count <= 0? (or non-int) 2355 | jle ->fff_emptystr // Count <= 0? (or non-int)
@@ -2543,23 +2434,6 @@ static void build_subroutines(BuildCtx *ctx)
2543 |ffstring_case string_lower, 0x41, 0x5a 2434 |ffstring_case string_lower, 0x41, 0x5a
2544 |ffstring_case string_upper, 0x61, 0x7a 2435 |ffstring_case string_upper, 0x61, 0x7a
2545 | 2436 |
2546 |//-- Table library ------------------------------------------------------
2547 |
2548 |.ffunc_1 table_getn
2549 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
2550 | mov RB, BASE // Save BASE.
2551 | mov TAB:FCARG1, [BASE]
2552 | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t)
2553 | // Length of table returned in eax (RD).
2554 | mov BASE, RB // Restore BASE.
2555 |.if DUALNUM
2556 | mov RB, RD; jmp ->fff_resi
2557 |.elif SSE
2558 | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0
2559 |.else
2560 | mov ARG1, RD; fild ARG1; jmp ->fff_resn
2561 |.endif
2562 |
2563 |//-- Bit library -------------------------------------------------------- 2437 |//-- Bit library --------------------------------------------------------
2564 | 2438 |
2565 |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!). 2439 |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!).
@@ -2567,11 +2441,7 @@ static void build_subroutines(BuildCtx *ctx)
2567 |.macro .ffunc_bit, name, kind 2441 |.macro .ffunc_bit, name, kind
2568 | .ffunc_1 name 2442 | .ffunc_1 name
2569 |.if kind == 2 2443 |.if kind == 2
2570 |.if SSE
2571 | sseconst_tobit xmm1, RBa 2444 | sseconst_tobit xmm1, RBa
2572 |.else
2573 | mov TMP1, TOBIT_BIAS
2574 |.endif
2575 |.endif 2445 |.endif
2576 | cmp dword [BASE+4], LJ_TISNUM 2446 | cmp dword [BASE+4], LJ_TISNUM
2577 |.if DUALNUM 2447 |.if DUALNUM
@@ -2587,37 +2457,17 @@ static void build_subroutines(BuildCtx *ctx)
2587 |.else 2457 |.else
2588 | jae ->fff_fallback 2458 | jae ->fff_fallback
2589 |.endif 2459 |.endif
2590 |.if SSE
2591 | movsd xmm0, qword [BASE] 2460 | movsd xmm0, qword [BASE]
2592 |.if kind < 2 2461 |.if kind < 2
2593 | sseconst_tobit xmm1, RBa 2462 | sseconst_tobit xmm1, RBa
2594 |.endif 2463 |.endif
2595 | addsd xmm0, xmm1 2464 | addsd xmm0, xmm1
2596 | movd RB, xmm0 2465 | movd RB, xmm0
2597 |.else
2598 | fld qword [BASE]
2599 |.if kind < 2
2600 | mov TMP1, TOBIT_BIAS
2601 |.endif
2602 | fadd TMP1
2603 | fstp FPARG1
2604 |.if kind > 0
2605 | mov RB, ARG1
2606 |.endif
2607 |.endif
2608 |2: 2466 |2:
2609 |.endmacro 2467 |.endmacro
2610 | 2468 |
2611 |.ffunc_bit bit_tobit, 0 2469 |.ffunc_bit bit_tobit, 0
2612 |.if DUALNUM or SSE
2613 |.if not SSE
2614 | mov RB, ARG1
2615 |.endif
2616 | jmp ->fff_resbit 2470 | jmp ->fff_resbit
2617 |.else
2618 | fild ARG1
2619 | jmp ->fff_resn
2620 |.endif
2621 | 2471 |
2622 |.macro .ffunc_bit_op, name, ins 2472 |.macro .ffunc_bit_op, name, ins
2623 | .ffunc_bit name, 2 2473 | .ffunc_bit name, 2
@@ -2637,17 +2487,10 @@ static void build_subroutines(BuildCtx *ctx)
2637 |.else 2487 |.else
2638 | jae ->fff_fallback_bit_op 2488 | jae ->fff_fallback_bit_op
2639 |.endif 2489 |.endif
2640 |.if SSE
2641 | movsd xmm0, qword [RD] 2490 | movsd xmm0, qword [RD]
2642 | addsd xmm0, xmm1 2491 | addsd xmm0, xmm1
2643 | movd RA, xmm0 2492 | movd RA, xmm0
2644 | ins RB, RA 2493 | ins RB, RA
2645 |.else
2646 | fld qword [RD]
2647 | fadd TMP1
2648 | fstp FPARG1
2649 | ins RB, ARG1
2650 |.endif
2651 | sub RD, 8 2494 | sub RD, 8
2652 | jmp <1 2495 | jmp <1
2653 |.endmacro 2496 |.endmacro
@@ -2664,15 +2507,10 @@ static void build_subroutines(BuildCtx *ctx)
2664 | not RB 2507 | not RB
2665 |.if DUALNUM 2508 |.if DUALNUM
2666 | jmp ->fff_resbit 2509 | jmp ->fff_resbit
2667 |.elif SSE 2510 |.else
2668 |->fff_resbit: 2511 |->fff_resbit:
2669 | cvtsi2sd xmm0, RB 2512 | cvtsi2sd xmm0, RB
2670 | jmp ->fff_resxmm0 2513 | jmp ->fff_resxmm0
2671 |.else
2672 |->fff_resbit:
2673 | mov ARG1, RB
2674 | fild ARG1
2675 | jmp ->fff_resn
2676 |.endif 2514 |.endif
2677 | 2515 |
2678 |->fff_fallback_bit_op: 2516 |->fff_fallback_bit_op:
@@ -2685,22 +2523,13 @@ static void build_subroutines(BuildCtx *ctx)
2685 | // Note: no inline conversion from number for 2nd argument! 2523 | // Note: no inline conversion from number for 2nd argument!
2686 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback 2524 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback
2687 | mov RA, dword [BASE+8] 2525 | mov RA, dword [BASE+8]
2688 |.elif SSE 2526 |.else
2689 | .ffunc_nnsse name 2527 | .ffunc_nnsse name
2690 | sseconst_tobit xmm2, RBa 2528 | sseconst_tobit xmm2, RBa
2691 | addsd xmm0, xmm2 2529 | addsd xmm0, xmm2
2692 | addsd xmm1, xmm2 2530 | addsd xmm1, xmm2
2693 | movd RB, xmm0 2531 | movd RB, xmm0
2694 | movd RA, xmm1 2532 | movd RA, xmm1
2695 |.else
2696 | .ffunc_nn name
2697 | mov TMP1, TOBIT_BIAS
2698 | fadd TMP1
2699 | fstp FPARG3
2700 | fadd TMP1
2701 | fstp FPARG1
2702 | mov RA, ARG3
2703 | mov RB, ARG1
2704 |.endif 2533 |.endif
2705 | ins RB, cl // Assumes RA is ecx. 2534 | ins RB, cl // Assumes RA is ecx.
2706 | jmp ->fff_resbit 2535 | jmp ->fff_resbit
@@ -3051,27 +2880,9 @@ static void build_subroutines(BuildCtx *ctx)
3051 |//----------------------------------------------------------------------- 2880 |//-----------------------------------------------------------------------
3052 | 2881 |
3053 |// FP value rounding. Called by math.floor/math.ceil fast functions 2882 |// FP value rounding. Called by math.floor/math.ceil fast functions
3054 |// and from JIT code. 2883 |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
3055 | 2884 |.macro vm_round, name, mode
3056 |// x87 variant: Arg/ret on x87 stack. No int/xmm registers modified. 2885 |->name .. _sse:
3057 |.macro vm_round_x87, mode1, mode2
3058 | fnstcw word [esp+4] // Caveat: overwrites ARG1 and ARG2.
3059 | mov [esp+8], eax
3060 | mov ax, mode1
3061 | or ax, [esp+4]
3062 |.if mode2 ~= 0xffff
3063 | and ax, mode2
3064 |.endif
3065 | mov [esp+6], ax
3066 | fldcw word [esp+6]
3067 | frndint
3068 | fldcw word [esp+4]
3069 | mov eax, [esp+8]
3070 | ret
3071 |.endmacro
3072 |
3073 |// SSE variant: arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
3074 |.macro vm_round_sse, mode
3075 | sseconst_abs xmm2, RDa 2886 | sseconst_abs xmm2, RDa
3076 | sseconst_2p52 xmm3, RDa 2887 | sseconst_2p52 xmm3, RDa
3077 | movaps xmm1, xmm0 2888 | movaps xmm1, xmm0
@@ -3107,22 +2918,21 @@ static void build_subroutines(BuildCtx *ctx)
3107 | ret 2918 | ret
3108 |.endmacro 2919 |.endmacro
3109 | 2920 |
3110 |.macro vm_round, name, ssemode, mode1, mode2 2921 |->vm_floor:
3111 |->name: 2922 |.if not X64
3112 |.if not SSE 2923 | movsd xmm0, qword [esp+4]
3113 | vm_round_x87 mode1, mode2 2924 | call ->vm_floor_sse
2925 | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg.
2926 | fld qword [esp+4]
2927 | ret
3114 |.endif 2928 |.endif
3115 |->name .. _sse:
3116 | vm_round_sse ssemode
3117 |.endmacro
3118 | 2929 |
3119 | vm_round vm_floor, 0, 0x0400, 0xf7ff 2930 | vm_round vm_floor, 0
3120 | vm_round vm_ceil, 1, 0x0800, 0xfbff 2931 | vm_round vm_ceil, 1
3121 | vm_round vm_trunc, 2, 0x0c00, 0xffff 2932 | vm_round vm_trunc, 2
3122 | 2933 |
3123 |// FP modulo x%y. Called by BC_MOD* and vm_arith. 2934 |// FP modulo x%y. Called by BC_MOD* and vm_arith.
3124 |->vm_mod: 2935 |->vm_mod:
3125 |.if SSE
3126 |// Args in xmm0/xmm1, return value in xmm0. 2936 |// Args in xmm0/xmm1, return value in xmm0.
3127 |// Caveat: xmm0-xmm5 and RC (eax) modified! 2937 |// Caveat: xmm0-xmm5 and RC (eax) modified!
3128 | movaps xmm5, xmm0 2938 | movaps xmm5, xmm0
@@ -3150,23 +2960,6 @@ static void build_subroutines(BuildCtx *ctx)
3150 | movaps xmm0, xmm5 2960 | movaps xmm0, xmm5
3151 | subsd xmm0, xmm1 2961 | subsd xmm0, xmm1
3152 | ret 2962 | ret
3153 |.else
3154 |// Args/ret on x87 stack (y on top). No xmm registers modified.
3155 |// Caveat: needs 3 slots on x87 stack! RC (eax) modified!
3156 | fld st1
3157 | fdiv st1
3158 | fnstcw word [esp+4]
3159 | mov ax, 0x0400
3160 | or ax, [esp+4]
3161 | and ax, 0xf7ff
3162 | mov [esp+6], ax
3163 | fldcw word [esp+6]
3164 | frndint
3165 | fldcw word [esp+4]
3166 | fmulp st1
3167 | fsubp st1
3168 | ret
3169 |.endif
3170 | 2963 |
3171 |// FP log2(x). Called by math.log(x, base). 2964 |// FP log2(x). Called by math.log(x, base).
3172 |->vm_log2: 2965 |->vm_log2:
@@ -3217,105 +3010,15 @@ static void build_subroutines(BuildCtx *ctx)
3217 | 3010 |
3218 |// Generic power function x^y. Called by BC_POW, math.pow fast function, 3011 |// Generic power function x^y. Called by BC_POW, math.pow fast function,
3219 |// and vm_arith. 3012 |// and vm_arith.
3220 |// Args/ret on x87 stack (y on top). RC (eax) modified.
3221 |// Caveat: needs 3 slots on x87 stack!
3222 |->vm_pow:
3223 |.if not SSE
3224 | fist dword [esp+4] // Store/reload int before comparison.
3225 | fild dword [esp+4] // Integral exponent used in vm_powi.
3226 | fucomip st1
3227 | jnz >8 // Branch for FP exponents.
3228 | jp >9 // Branch for NaN exponent.
3229 | fpop // Pop y and fallthrough to vm_powi.
3230 |
3231 |// FP/int power function x^i. Arg1/ret on x87 stack.
3232 |// Arg2 (int) on C stack. RC (eax) modified.
3233 |// Caveat: needs 2 slots on x87 stack!
3234 | mov eax, [esp+4]
3235 | cmp eax, 1; jle >6 // i<=1?
3236 | // Now 1 < (unsigned)i <= 0x80000000.
3237 |1: // Handle leading zeros.
3238 | test eax, 1; jnz >2
3239 | fmul st0
3240 | shr eax, 1
3241 | jmp <1
3242 |2:
3243 | shr eax, 1; jz >5
3244 | fdup
3245 |3: // Handle trailing bits.
3246 | fmul st0
3247 | shr eax, 1; jz >4
3248 | jnc <3
3249 | fmul st1, st0
3250 | jmp <3
3251 |4:
3252 | fmulp st1
3253 |5:
3254 | ret
3255 |6:
3256 | je <5 // x^1 ==> x
3257 | jb >7
3258 | fld1; fdivrp st1
3259 | neg eax
3260 | cmp eax, 1; je <5 // x^-1 ==> 1/x
3261 | jmp <1 // x^-i ==> (1/x)^i
3262 |7:
3263 | fpop; fld1 // x^0 ==> 1
3264 | ret
3265 |
3266 |8: // FP/FP power function x^y.
3267 | fst dword [esp+4]
3268 | fxch
3269 | fst dword [esp+8]
3270 | mov eax, [esp+4]; shl eax, 1
3271 | cmp eax, 0xff000000; je >2 // x^+-Inf?
3272 | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
3273 | cmp eax, 0xff000000; je >4 // +-Inf^y?
3274 | fyl2x
3275 | jmp ->vm_exp2raw
3276 |
3277 |9: // Handle x^NaN.
3278 | fld1
3279 | fucomip st2
3280 | je >1 // 1^NaN ==> 1
3281 | fxch // x^NaN ==> NaN
3282 |1:
3283 | fpop
3284 | ret
3285 |
3286 |2: // Handle x^+-Inf.
3287 | fabs
3288 | fld1
3289 | fucomip st1
3290 | je >3 // +-1^+-Inf ==> 1
3291 | fpop; fabs; fldz; mov eax, 0; setc al
3292 | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0
3293 | fxch
3294 |3:
3295 | fpop1; fabs
3296 | ret
3297 |
3298 |4: // Handle +-0^y or +-Inf^y.
3299 | cmp dword [esp+4], 0; jge <3 // y >= 0, x^y ==> |x|
3300 | fpop; fpop
3301 | test eax, eax; jz >5 // y < 0, +-0^y ==> +Inf
3302 | fldz // y < 0, +-Inf^y ==> 0
3303 | ret
3304 |5:
3305 | mov dword [esp+4], 0x7f800000 // Return +Inf.
3306 | fld dword [esp+4]
3307 | ret
3308 |.endif
3309 |
3310 |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified. 3013 |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified.
3311 |// Needs 16 byte scratch area for x86. Also called from JIT code. 3014 |// Needs 16 byte scratch area for x86. Also called from JIT code.
3312 |->vm_pow_sse: 3015 |->vm_pow_sse:
3313 | cvtsd2si eax, xmm1 3016 | cvttsd2si eax, xmm1
3314 | cvtsi2sd xmm2, eax 3017 | cvtsi2sd xmm2, eax
3315 | ucomisd xmm1, xmm2 3018 | ucomisd xmm1, xmm2
3316 | jnz >8 // Branch for FP exponents. 3019 | jnz >8 // Branch for FP exponents.
3317 | jp >9 // Branch for NaN exponent. 3020 | jp >9 // Branch for NaN exponent.
3318 | // Fallthrough to vm_powi_sse. 3021 | // Fallthrough.
3319 | 3022 |
3320 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. 3023 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
3321 |->vm_powi_sse: 3024 |->vm_powi_sse:
@@ -3437,8 +3140,8 @@ static void build_subroutines(BuildCtx *ctx)
3437 | .else 3140 | .else
3438 | .define fpmop, CARG1d 3141 | .define fpmop, CARG1d
3439 | .endif 3142 | .endif
3440 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil 3143 | cmp fpmop, 1; jb ->vm_floor_sse; je ->vm_ceil_sse
3441 | cmp fpmop, 3; jb ->vm_trunc; ja >2 3144 | cmp fpmop, 3; jb ->vm_trunc_sse; ja >2
3442 | sqrtsd xmm0, xmm0; ret 3145 | sqrtsd xmm0, xmm0; ret
3443 |2: 3146 |2:
3444 | .if X64WIN 3147 | .if X64WIN
@@ -3478,14 +3181,13 @@ static void build_subroutines(BuildCtx *ctx)
3478 | ret 3181 | ret
3479 |.else // x86 calling convention. 3182 |.else // x86 calling convention.
3480 | .define fpmop, eax 3183 | .define fpmop, eax
3481 |.if SSE
3482 | mov fpmop, [esp+12] 3184 | mov fpmop, [esp+12]
3483 | movsd xmm0, qword [esp+4] 3185 | movsd xmm0, qword [esp+4]
3484 | cmp fpmop, 1; je >1; ja >2 3186 | cmp fpmop, 1; je >1; ja >2
3485 | call ->vm_floor; jmp >7 3187 | call ->vm_floor_sse; jmp >7
3486 |1: ; call ->vm_ceil; jmp >7 3188 |1: ; call ->vm_ceil_sse; jmp >7
3487 |2: ; cmp fpmop, 3; je >1; ja >2 3189 |2: ; cmp fpmop, 3; je >1; ja >2
3488 | call ->vm_trunc; jmp >7 3190 | call ->vm_trunc_sse; jmp >7
3489 |1: 3191 |1:
3490 | sqrtsd xmm0, xmm0 3192 | sqrtsd xmm0, xmm0
3491 |7: 3193 |7:
@@ -3503,23 +3205,6 @@ static void build_subroutines(BuildCtx *ctx)
3503 |2: ; cmp fpmop, 11; je >1; ja >9 3205 |2: ; cmp fpmop, 11; je >1; ja >9
3504 | fcos; ret 3206 | fcos; ret
3505 |1: ; fptan; fpop; ret 3207 |1: ; fptan; fpop; ret
3506 |.else
3507 | mov fpmop, [esp+12]
3508 | fld qword [esp+4]
3509 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
3510 | cmp fpmop, 3; jb ->vm_trunc; ja >2
3511 | fsqrt; ret
3512 |2: ; cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87
3513 | cmp fpmop, 7; je >1; ja >2
3514 | fldln2; fxch; fyl2x; ret
3515 |1: ; fld1; fxch; fyl2x; ret
3516 |2: ; cmp fpmop, 9; je >1; ja >2
3517 | fldlg2; fxch; fyl2x; ret
3518 |1: ; fsin; ret
3519 |2: ; cmp fpmop, 11; je >1; ja >9
3520 | fcos; ret
3521 |1: ; fptan; fpop; ret
3522 |.endif
3523 |.endif 3208 |.endif
3524 |9: ; int3 // Bad fpm. 3209 |9: ; int3 // Bad fpm.
3525 |.endif 3210 |.endif
@@ -3541,7 +3226,7 @@ static void build_subroutines(BuildCtx *ctx)
3541 |2: ; cmp foldop, 3; je >1; ja >2 3226 |2: ; cmp foldop, 3; je >1; ja >2
3542 | mulsd xmm0, xmm1; ret 3227 | mulsd xmm0, xmm1; ret
3543 |1: ; divsd xmm0, xmm1; ret 3228 |1: ; divsd xmm0, xmm1; ret
3544 |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow 3229 |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow_sse
3545 | cmp foldop, 7; je >1; ja >2 3230 | cmp foldop, 7; je >1; ja >2
3546 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret 3231 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret
3547 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret 3232 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret
@@ -3574,7 +3259,7 @@ static void build_subroutines(BuildCtx *ctx)
3574 |1: ; maxsd xmm0, xmm1; ret 3259 |1: ; maxsd xmm0, xmm1; ret
3575 |9: ; int3 // Bad op. 3260 |9: ; int3 // Bad op.
3576 | 3261 |
3577 |.elif SSE // x86 calling convention with SSE ops. 3262 |.else // x86 calling convention.
3578 | 3263 |
3579 | .define foldop, eax 3264 | .define foldop, eax
3580 | mov foldop, [esp+20] 3265 | mov foldop, [esp+20]
@@ -3593,7 +3278,7 @@ static void build_subroutines(BuildCtx *ctx)
3593 |2: ; cmp foldop, 5 3278 |2: ; cmp foldop, 5
3594 | je >1; ja >2 3279 | je >1; ja >2
3595 | call ->vm_mod; jmp <7 3280 | call ->vm_mod; jmp <7
3596 |1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area. 3281 |1: ; pop edx; call ->vm_pow_sse; push edx; jmp <7 // Writes to scratch area.
3597 |2: ; cmp foldop, 7; je >1; ja >2 3282 |2: ; cmp foldop, 7; je >1; ja >2
3598 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7 3283 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7
3599 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7 3284 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7
@@ -3608,29 +3293,6 @@ static void build_subroutines(BuildCtx *ctx)
3608 |1: ; maxsd xmm0, xmm1; jmp <7 3293 |1: ; maxsd xmm0, xmm1; jmp <7
3609 |9: ; int3 // Bad op. 3294 |9: ; int3 // Bad op.
3610 | 3295 |
3611 |.else // x86 calling convention with x87 ops.
3612 |
3613 | mov eax, [esp+20]
3614 | fld qword [esp+4]
3615 | fld qword [esp+12]
3616 | cmp eax, 1; je >1; ja >2
3617 | faddp st1; ret
3618 |1: ; fsubp st1; ret
3619 |2: ; cmp eax, 3; je >1; ja >2
3620 | fmulp st1; ret
3621 |1: ; fdivp st1; ret
3622 |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow
3623 | cmp eax, 7; je >1; ja >2
3624 | fpop; fchs; ret
3625 |1: ; fpop; fabs; ret
3626 |2: ; cmp eax, 9; je >1; ja >2
3627 | fpatan; ret
3628 |1: ; fxch; fscale; fpop1; ret
3629 |2: ; cmp eax, 11; je >1; ja >9
3630 | fucomi st1; fcmovnbe st1; fpop1; ret
3631 |1: ; fucomi st1; fcmovbe st1; fpop1; ret
3632 |9: ; int3 // Bad op.
3633 |
3634 |.endif 3296 |.endif
3635 | 3297 |
3636 |//----------------------------------------------------------------------- 3298 |//-----------------------------------------------------------------------
@@ -3943,19 +3605,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3943 | // RA is a number. 3605 | // RA is a number.
3944 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp 3606 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp
3945 | // RA is a number, RD is an integer. 3607 | // RA is a number, RD is an integer.
3946 |.if SSE
3947 | cvtsi2sd xmm0, dword [BASE+RD*8] 3608 | cvtsi2sd xmm0, dword [BASE+RD*8]
3948 | jmp >2 3609 | jmp >2
3949 |.else
3950 | fld qword [BASE+RA*8]
3951 | fild dword [BASE+RD*8]
3952 | jmp >3
3953 |.endif
3954 | 3610 |
3955 |8: // RA is an integer, RD is not an integer. 3611 |8: // RA is an integer, RD is not an integer.
3956 | ja ->vmeta_comp 3612 | ja ->vmeta_comp
3957 | // RA is an integer, RD is a number. 3613 | // RA is an integer, RD is a number.
3958 |.if SSE
3959 | cvtsi2sd xmm1, dword [BASE+RA*8] 3614 | cvtsi2sd xmm1, dword [BASE+RA*8]
3960 | movsd xmm0, qword [BASE+RD*8] 3615 | movsd xmm0, qword [BASE+RD*8]
3961 | add PC, 4 3616 | add PC, 4
@@ -3963,29 +3618,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3963 | jmp_comp jbe, ja, jb, jae, <9 3618 | jmp_comp jbe, ja, jb, jae, <9
3964 | jmp <6 3619 | jmp <6
3965 |.else 3620 |.else
3966 | fild dword [BASE+RA*8]
3967 | jmp >2
3968 |.endif
3969 |.else
3970 | checknum RA, ->vmeta_comp 3621 | checknum RA, ->vmeta_comp
3971 | checknum RD, ->vmeta_comp 3622 | checknum RD, ->vmeta_comp
3972 |.endif 3623 |.endif
3973 |.if SSE
3974 |1: 3624 |1:
3975 | movsd xmm0, qword [BASE+RD*8] 3625 | movsd xmm0, qword [BASE+RD*8]
3976 |2: 3626 |2:
3977 | add PC, 4 3627 | add PC, 4
3978 | ucomisd xmm0, qword [BASE+RA*8] 3628 | ucomisd xmm0, qword [BASE+RA*8]
3979 |3: 3629 |3:
3980 |.else
3981 |1:
3982 | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A.
3983 |2:
3984 | fld qword [BASE+RD*8]
3985 |3:
3986 | add PC, 4
3987 | fcomparepp
3988 |.endif
3989 | // Unordered: all of ZF CF PF set, ordered: PF clear. 3630 | // Unordered: all of ZF CF PF set, ordered: PF clear.
3990 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. 3631 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
3991 |.if DUALNUM 3632 |.if DUALNUM
@@ -4025,43 +3666,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4025 | // RD is a number. 3666 | // RD is a number.
4026 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 3667 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5
4027 | // RD is a number, RA is an integer. 3668 | // RD is a number, RA is an integer.
4028 |.if SSE
4029 | cvtsi2sd xmm0, dword [BASE+RA*8] 3669 | cvtsi2sd xmm0, dword [BASE+RA*8]
4030 |.else
4031 | fild dword [BASE+RA*8]
4032 |.endif
4033 | jmp >2 3670 | jmp >2
4034 | 3671 |
4035 |8: // RD is an integer, RA is not an integer. 3672 |8: // RD is an integer, RA is not an integer.
4036 | ja >5 3673 | ja >5
4037 | // RD is an integer, RA is a number. 3674 | // RD is an integer, RA is a number.
4038 |.if SSE
4039 | cvtsi2sd xmm0, dword [BASE+RD*8] 3675 | cvtsi2sd xmm0, dword [BASE+RD*8]
4040 | ucomisd xmm0, qword [BASE+RA*8] 3676 | ucomisd xmm0, qword [BASE+RA*8]
4041 |.else
4042 | fild dword [BASE+RD*8]
4043 | fld qword [BASE+RA*8]
4044 |.endif
4045 | jmp >4 3677 | jmp >4
4046 | 3678 |
4047 |.else 3679 |.else
4048 | cmp RB, LJ_TISNUM; jae >5 3680 | cmp RB, LJ_TISNUM; jae >5
4049 | checknum RA, >5 3681 | checknum RA, >5
4050 |.endif 3682 |.endif
4051 |.if SSE
4052 |1: 3683 |1:
4053 | movsd xmm0, qword [BASE+RA*8] 3684 | movsd xmm0, qword [BASE+RA*8]
4054 |2: 3685 |2:
4055 | ucomisd xmm0, qword [BASE+RD*8] 3686 | ucomisd xmm0, qword [BASE+RD*8]
4056 |4: 3687 |4:
4057 |.else
4058 |1:
4059 | fld qword [BASE+RA*8]
4060 |2:
4061 | fld qword [BASE+RD*8]
4062 |4:
4063 | fcomparepp
4064 |.endif
4065 iseqne_fp: 3688 iseqne_fp:
4066 if (vk) { 3689 if (vk) {
4067 | jp >2 // Unordered means not equal. 3690 | jp >2 // Unordered means not equal.
@@ -4184,39 +3807,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4184 | // RA is a number. 3807 | // RA is a number.
4185 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 3808 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1
4186 | // RA is a number, RD is an integer. 3809 | // RA is a number, RD is an integer.
4187 |.if SSE
4188 | cvtsi2sd xmm0, dword [KBASE+RD*8] 3810 | cvtsi2sd xmm0, dword [KBASE+RD*8]
4189 |.else
4190 | fild dword [KBASE+RD*8]
4191 |.endif
4192 | jmp >2 3811 | jmp >2
4193 | 3812 |
4194 |8: // RA is an integer, RD is a number. 3813 |8: // RA is an integer, RD is a number.
4195 |.if SSE
4196 | cvtsi2sd xmm0, dword [BASE+RA*8] 3814 | cvtsi2sd xmm0, dword [BASE+RA*8]
4197 | ucomisd xmm0, qword [KBASE+RD*8] 3815 | ucomisd xmm0, qword [KBASE+RD*8]
4198 |.else
4199 | fild dword [BASE+RA*8]
4200 | fld qword [KBASE+RD*8]
4201 |.endif
4202 | jmp >4 3816 | jmp >4
4203 |.else 3817 |.else
4204 | cmp RB, LJ_TISNUM; jae >3 3818 | cmp RB, LJ_TISNUM; jae >3
4205 |.endif 3819 |.endif
4206 |.if SSE
4207 |1: 3820 |1:
4208 | movsd xmm0, qword [KBASE+RD*8] 3821 | movsd xmm0, qword [KBASE+RD*8]
4209 |2: 3822 |2:
4210 | ucomisd xmm0, qword [BASE+RA*8] 3823 | ucomisd xmm0, qword [BASE+RA*8]
4211 |4: 3824 |4:
4212 |.else
4213 |1:
4214 | fld qword [KBASE+RD*8]
4215 |2:
4216 | fld qword [BASE+RA*8]
4217 |4:
4218 | fcomparepp
4219 |.endif
4220 goto iseqne_fp; 3825 goto iseqne_fp;
4221 case BC_ISEQP: case BC_ISNEP: 3826 case BC_ISEQP: case BC_ISNEP:
4222 vk = op == BC_ISEQP; 3827 vk = op == BC_ISEQP;
@@ -4267,6 +3872,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4267 | ins_next 3872 | ins_next
4268 break; 3873 break;
4269 3874
3875 case BC_ISTYPE:
3876 | ins_AD // RA = src, RD = -type
3877 | add RD, [BASE+RA*8+4]
3878 | jne ->vmeta_istype
3879 | ins_next
3880 break;
3881 case BC_ISNUM:
3882 | ins_AD // RA = src, RD = -(TISNUM-1)
3883 | checknum RA, ->vmeta_istype
3884 | ins_next
3885 break;
3886
4270 /* -- Unary ops --------------------------------------------------------- */ 3887 /* -- Unary ops --------------------------------------------------------- */
4271 3888
4272 case BC_MOV: 3889 case BC_MOV:
@@ -4310,16 +3927,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4310 |.else 3927 |.else
4311 | checknum RD, ->vmeta_unm 3928 | checknum RD, ->vmeta_unm
4312 |.endif 3929 |.endif
4313 |.if SSE
4314 | movsd xmm0, qword [BASE+RD*8] 3930 | movsd xmm0, qword [BASE+RD*8]
4315 | sseconst_sign xmm1, RDa 3931 | sseconst_sign xmm1, RDa
4316 | xorps xmm0, xmm1 3932 | xorps xmm0, xmm1
4317 | movsd qword [BASE+RA*8], xmm0 3933 | movsd qword [BASE+RA*8], xmm0
4318 |.else
4319 | fld qword [BASE+RD*8]
4320 | fchs
4321 | fstp qword [BASE+RA*8]
4322 |.endif
4323 |.if DUALNUM 3934 |.if DUALNUM
4324 | jmp <9 3935 | jmp <9
4325 |.else 3936 |.else
@@ -4335,15 +3946,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4335 |1: 3946 |1:
4336 | mov dword [BASE+RA*8+4], LJ_TISNUM 3947 | mov dword [BASE+RA*8+4], LJ_TISNUM
4337 | mov dword [BASE+RA*8], RD 3948 | mov dword [BASE+RA*8], RD
4338 |.elif SSE 3949 |.else
4339 | xorps xmm0, xmm0 3950 | xorps xmm0, xmm0
4340 | cvtsi2sd xmm0, dword STR:RD->len 3951 | cvtsi2sd xmm0, dword STR:RD->len
4341 |1: 3952 |1:
4342 | movsd qword [BASE+RA*8], xmm0 3953 | movsd qword [BASE+RA*8], xmm0
4343 |.else
4344 | fild dword STR:RD->len
4345 |1:
4346 | fstp qword [BASE+RA*8]
4347 |.endif 3954 |.endif
4348 | ins_next 3955 | ins_next
4349 |2: 3956 |2:
@@ -4361,11 +3968,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4361 | // Length of table returned in eax (RD). 3968 | // Length of table returned in eax (RD).
4362 |.if DUALNUM 3969 |.if DUALNUM
4363 | // Nothing to do. 3970 | // Nothing to do.
4364 |.elif SSE
4365 | cvtsi2sd xmm0, RD
4366 |.else 3971 |.else
4367 | mov ARG1, RD 3972 | cvtsi2sd xmm0, RD
4368 | fild ARG1
4369 |.endif 3973 |.endif
4370 | mov BASE, RB // Restore BASE. 3974 | mov BASE, RB // Restore BASE.
4371 | movzx RA, PC_RA 3975 | movzx RA, PC_RA
@@ -4380,7 +3984,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4380 3984
4381 /* -- Binary ops -------------------------------------------------------- */ 3985 /* -- Binary ops -------------------------------------------------------- */
4382 3986
4383 |.macro ins_arithpre, x87ins, sseins, ssereg 3987 |.macro ins_arithpre, sseins, ssereg
4384 | ins_ABC 3988 | ins_ABC
4385 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3989 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
4386 ||switch (vk) { 3990 ||switch (vk) {
@@ -4389,37 +3993,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4389 | .if DUALNUM 3993 | .if DUALNUM
4390 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn 3994 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn
4391 | .endif 3995 | .endif
4392 | .if SSE 3996 | movsd xmm0, qword [BASE+RB*8]
4393 | movsd xmm0, qword [BASE+RB*8] 3997 | sseins ssereg, qword [KBASE+RC*8]
4394 | sseins ssereg, qword [KBASE+RC*8]
4395 | .else
4396 | fld qword [BASE+RB*8]
4397 | x87ins qword [KBASE+RC*8]
4398 | .endif
4399 || break; 3998 || break;
4400 ||case 1: 3999 ||case 1:
4401 | checknum RB, ->vmeta_arith_nv 4000 | checknum RB, ->vmeta_arith_nv
4402 | .if DUALNUM 4001 | .if DUALNUM
4403 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv 4002 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv
4404 | .endif 4003 | .endif
4405 | .if SSE 4004 | movsd xmm0, qword [KBASE+RC*8]
4406 | movsd xmm0, qword [KBASE+RC*8] 4005 | sseins ssereg, qword [BASE+RB*8]
4407 | sseins ssereg, qword [BASE+RB*8]
4408 | .else
4409 | fld qword [KBASE+RC*8]
4410 | x87ins qword [BASE+RB*8]
4411 | .endif
4412 || break; 4006 || break;
4413 ||default: 4007 ||default:
4414 | checknum RB, ->vmeta_arith_vv 4008 | checknum RB, ->vmeta_arith_vv
4415 | checknum RC, ->vmeta_arith_vv 4009 | checknum RC, ->vmeta_arith_vv
4416 | .if SSE 4010 | movsd xmm0, qword [BASE+RB*8]
4417 | movsd xmm0, qword [BASE+RB*8] 4011 | sseins ssereg, qword [BASE+RC*8]
4418 | sseins ssereg, qword [BASE+RC*8]
4419 | .else
4420 | fld qword [BASE+RB*8]
4421 | x87ins qword [BASE+RC*8]
4422 | .endif
4423 || break; 4012 || break;
4424 ||} 4013 ||}
4425 |.endmacro 4014 |.endmacro
@@ -4457,54 +4046,50 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4457 |.endmacro 4046 |.endmacro
4458 | 4047 |
4459 |.macro ins_arithpost 4048 |.macro ins_arithpost
4460 |.if SSE
4461 | movsd qword [BASE+RA*8], xmm0 4049 | movsd qword [BASE+RA*8], xmm0
4462 |.else
4463 | fstp qword [BASE+RA*8]
4464 |.endif
4465 |.endmacro 4050 |.endmacro
4466 | 4051 |
4467 |.macro ins_arith, x87ins, sseins 4052 |.macro ins_arith, sseins
4468 | ins_arithpre x87ins, sseins, xmm0 4053 | ins_arithpre sseins, xmm0
4469 | ins_arithpost 4054 | ins_arithpost
4470 | ins_next 4055 | ins_next
4471 |.endmacro 4056 |.endmacro
4472 | 4057 |
4473 |.macro ins_arith, intins, x87ins, sseins 4058 |.macro ins_arith, intins, sseins
4474 |.if DUALNUM 4059 |.if DUALNUM
4475 | ins_arithdn intins 4060 | ins_arithdn intins
4476 |.else 4061 |.else
4477 | ins_arith, x87ins, sseins 4062 | ins_arith, sseins
4478 |.endif 4063 |.endif
4479 |.endmacro 4064 |.endmacro
4480 4065
4481 | // RA = dst, RB = src1 or num const, RC = src2 or num const 4066 | // RA = dst, RB = src1 or num const, RC = src2 or num const
4482 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 4067 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
4483 | ins_arith add, fadd, addsd 4068 | ins_arith add, addsd
4484 break; 4069 break;
4485 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 4070 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
4486 | ins_arith sub, fsub, subsd 4071 | ins_arith sub, subsd
4487 break; 4072 break;
4488 case BC_MULVN: case BC_MULNV: case BC_MULVV: 4073 case BC_MULVN: case BC_MULNV: case BC_MULVV:
4489 | ins_arith imul, fmul, mulsd 4074 | ins_arith imul, mulsd
4490 break; 4075 break;
4491 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 4076 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
4492 | ins_arith fdiv, divsd 4077 | ins_arith divsd
4493 break; 4078 break;
4494 case BC_MODVN: 4079 case BC_MODVN:
4495 | ins_arithpre fld, movsd, xmm1 4080 | ins_arithpre movsd, xmm1
4496 |->BC_MODVN_Z: 4081 |->BC_MODVN_Z:
4497 | call ->vm_mod 4082 | call ->vm_mod
4498 | ins_arithpost 4083 | ins_arithpost
4499 | ins_next 4084 | ins_next
4500 break; 4085 break;
4501 case BC_MODNV: case BC_MODVV: 4086 case BC_MODNV: case BC_MODVV:
4502 | ins_arithpre fld, movsd, xmm1 4087 | ins_arithpre movsd, xmm1
4503 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 4088 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
4504 break; 4089 break;
4505 case BC_POW: 4090 case BC_POW:
4506 | ins_arithpre fld, movsd, xmm1 4091 | ins_arithpre movsd, xmm1
4507 | call ->vm_pow 4092 | call ->vm_pow_sse
4508 | ins_arithpost 4093 | ins_arithpost
4509 | ins_next 4094 | ins_next
4510 break; 4095 break;
@@ -4573,25 +4158,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4573 | movsx RD, RDW 4158 | movsx RD, RDW
4574 | mov dword [BASE+RA*8+4], LJ_TISNUM 4159 | mov dword [BASE+RA*8+4], LJ_TISNUM
4575 | mov dword [BASE+RA*8], RD 4160 | mov dword [BASE+RA*8], RD
4576 |.elif SSE 4161 |.else
4577 | movsx RD, RDW // Sign-extend literal. 4162 | movsx RD, RDW // Sign-extend literal.
4578 | cvtsi2sd xmm0, RD 4163 | cvtsi2sd xmm0, RD
4579 | movsd qword [BASE+RA*8], xmm0 4164 | movsd qword [BASE+RA*8], xmm0
4580 |.else
4581 | fild PC_RD // Refetch signed RD from instruction.
4582 | fstp qword [BASE+RA*8]
4583 |.endif 4165 |.endif
4584 | ins_next 4166 | ins_next
4585 break; 4167 break;
4586 case BC_KNUM: 4168 case BC_KNUM:
4587 | ins_AD // RA = dst, RD = num const 4169 | ins_AD // RA = dst, RD = num const
4588 |.if SSE
4589 | movsd xmm0, qword [KBASE+RD*8] 4170 | movsd xmm0, qword [KBASE+RD*8]
4590 | movsd qword [BASE+RA*8], xmm0 4171 | movsd qword [BASE+RA*8], xmm0
4591 |.else
4592 | fld qword [KBASE+RD*8]
4593 | fstp qword [BASE+RA*8]
4594 |.endif
4595 | ins_next 4172 | ins_next
4596 break; 4173 break;
4597 case BC_KPRI: 4174 case BC_KPRI:
@@ -4698,18 +4275,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4698 case BC_USETN: 4275 case BC_USETN:
4699 | ins_AD // RA = upvalue #, RD = num const 4276 | ins_AD // RA = upvalue #, RD = num const
4700 | mov LFUNC:RB, [BASE-8] 4277 | mov LFUNC:RB, [BASE-8]
4701 |.if SSE
4702 | movsd xmm0, qword [KBASE+RD*8] 4278 | movsd xmm0, qword [KBASE+RD*8]
4703 |.else
4704 | fld qword [KBASE+RD*8]
4705 |.endif
4706 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] 4279 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
4707 | mov RA, UPVAL:RB->v 4280 | mov RA, UPVAL:RB->v
4708 |.if SSE
4709 | movsd qword [RA], xmm0 4281 | movsd qword [RA], xmm0
4710 |.else
4711 | fstp qword [RA]
4712 |.endif
4713 | ins_next 4282 | ins_next
4714 break; 4283 break;
4715 case BC_USETP: 4284 case BC_USETP:
@@ -4863,18 +4432,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4863 |.else 4432 |.else
4864 | // Convert number to int and back and compare. 4433 | // Convert number to int and back and compare.
4865 | checknum RC, >5 4434 | checknum RC, >5
4866 |.if SSE
4867 | movsd xmm0, qword [BASE+RC*8] 4435 | movsd xmm0, qword [BASE+RC*8]
4868 | cvtsd2si RC, xmm0 4436 | cvttsd2si RC, xmm0
4869 | cvtsi2sd xmm1, RC 4437 | cvtsi2sd xmm1, RC
4870 | ucomisd xmm0, xmm1 4438 | ucomisd xmm0, xmm1
4871 |.else
4872 | fld qword [BASE+RC*8]
4873 | fist ARG1
4874 | fild ARG1
4875 | fcomparepp
4876 | mov RC, ARG1
4877 |.endif
4878 | jne ->vmeta_tgetv // Generic numeric key? Use fallback. 4439 | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
4879 |.endif 4440 |.endif
4880 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4441 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
@@ -4998,6 +4559,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4998 | mov dword [BASE+RA*8+4], LJ_TNIL 4559 | mov dword [BASE+RA*8+4], LJ_TNIL
4999 | jmp <1 4560 | jmp <1
5000 break; 4561 break;
4562 case BC_TGETR:
4563 | ins_ABC // RA = dst, RB = table, RC = key
4564 | mov TAB:RB, [BASE+RB*8]
4565 |.if DUALNUM
4566 | mov RC, dword [BASE+RC*8]
4567 |.else
4568 | cvttsd2si RC, qword [BASE+RC*8]
4569 |.endif
4570 | cmp RC, TAB:RB->asize
4571 | jae ->vmeta_tgetr // Not in array part? Use fallback.
4572 | shl RC, 3
4573 | add RC, TAB:RB->array
4574 | // Get array slot.
4575 |->BC_TGETR_Z:
4576 |.if X64
4577 | mov RBa, [RC]
4578 | mov [BASE+RA*8], RBa
4579 |.else
4580 | mov RB, [RC]
4581 | mov RC, [RC+4]
4582 | mov [BASE+RA*8], RB
4583 | mov [BASE+RA*8+4], RC
4584 |.endif
4585 |->BC_TGETR2_Z:
4586 | ins_next
4587 break;
5001 4588
5002 case BC_TSETV: 4589 case BC_TSETV:
5003 | ins_ABC // RA = src, RB = table, RC = key 4590 | ins_ABC // RA = src, RB = table, RC = key
@@ -5011,18 +4598,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5011 |.else 4598 |.else
5012 | // Convert number to int and back and compare. 4599 | // Convert number to int and back and compare.
5013 | checknum RC, >5 4600 | checknum RC, >5
5014 |.if SSE
5015 | movsd xmm0, qword [BASE+RC*8] 4601 | movsd xmm0, qword [BASE+RC*8]
5016 | cvtsd2si RC, xmm0 4602 | cvttsd2si RC, xmm0
5017 | cvtsi2sd xmm1, RC 4603 | cvtsi2sd xmm1, RC
5018 | ucomisd xmm0, xmm1 4604 | ucomisd xmm0, xmm1
5019 |.else
5020 | fld qword [BASE+RC*8]
5021 | fist ARG1
5022 | fild ARG1
5023 | fcomparepp
5024 | mov RC, ARG1
5025 |.endif
5026 | jne ->vmeta_tsetv // Generic numeric key? Use fallback. 4605 | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
5027 |.endif 4606 |.endif
5028 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4607 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
@@ -5192,6 +4771,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5192 | movzx RA, PC_RA // Restore RA. 4771 | movzx RA, PC_RA // Restore RA.
5193 | jmp <2 4772 | jmp <2
5194 break; 4773 break;
4774 case BC_TSETR:
4775 | ins_ABC // RA = src, RB = table, RC = key
4776 | mov TAB:RB, [BASE+RB*8]
4777 |.if DUALNUM
4778 | mov RC, dword [BASE+RC*8]
4779 |.else
4780 | cvttsd2si RC, qword [BASE+RC*8]
4781 |.endif
4782 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
4783 | jnz >7
4784 |2:
4785 | cmp RC, TAB:RB->asize
4786 | jae ->vmeta_tsetr
4787 | shl RC, 3
4788 | add RC, TAB:RB->array
4789 | // Set array slot.
4790 |->BC_TSETR_Z:
4791 |.if X64
4792 | mov RBa, [BASE+RA*8]
4793 | mov [RC], RBa
4794 |.else
4795 | mov RB, [BASE+RA*8+4]
4796 | mov RA, [BASE+RA*8]
4797 | mov [RC+4], RB
4798 | mov [RC], RA
4799 |.endif
4800 | ins_next
4801 |
4802 |7: // Possible table write barrier for the value. Skip valiswhite check.
4803 | barrierback TAB:RB, RA
4804 | movzx RA, PC_RA // Restore RA.
4805 | jmp <2
4806 break;
5195 4807
5196 case BC_TSETM: 4808 case BC_TSETM:
5197 | ins_AD // RA = base (table at base-1), RD = num const (start index) 4809 | ins_AD // RA = base (table at base-1), RD = num const (start index)
@@ -5386,10 +4998,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5386 |.if DUALNUM 4998 |.if DUALNUM
5387 | mov dword [BASE+RA*8+4], LJ_TISNUM 4999 | mov dword [BASE+RA*8+4], LJ_TISNUM
5388 | mov dword [BASE+RA*8], RC 5000 | mov dword [BASE+RA*8], RC
5389 |.elif SSE
5390 | cvtsi2sd xmm0, RC
5391 |.else 5001 |.else
5392 | fild dword [BASE+RA*8-8] 5002 | cvtsi2sd xmm0, RC
5393 |.endif 5003 |.endif
5394 | // Copy array slot to returned value. 5004 | // Copy array slot to returned value.
5395 |.if X64 5005 |.if X64
@@ -5405,10 +5015,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5405 | // Return array index as a numeric key. 5015 | // Return array index as a numeric key.
5406 |.if DUALNUM 5016 |.if DUALNUM
5407 | // See above. 5017 | // See above.
5408 |.elif SSE
5409 | movsd qword [BASE+RA*8], xmm0
5410 |.else 5018 |.else
5411 | fstp qword [BASE+RA*8] 5019 | movsd qword [BASE+RA*8], xmm0
5412 |.endif 5020 |.endif
5413 | mov [BASE+RA*8-8], RC // Update control var. 5021 | mov [BASE+RA*8-8], RC // Update control var.
5414 |2: 5022 |2:
@@ -5421,9 +5029,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5421 | 5029 |
5422 |4: // Skip holes in array part. 5030 |4: // Skip holes in array part.
5423 | add RC, 1 5031 | add RC, 1
5424 |.if not (DUALNUM or SSE)
5425 | mov [BASE+RA*8-8], RC
5426 |.endif
5427 | jmp <1 5032 | jmp <1
5428 | 5033 |
5429 |5: // Traverse hash part. 5034 |5: // Traverse hash part.
@@ -5757,7 +5362,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5757 if (!vk) { 5362 if (!vk) {
5758 | cmp RB, LJ_TISNUM; jae ->vmeta_for 5363 | cmp RB, LJ_TISNUM; jae ->vmeta_for
5759 } 5364 }
5760 |.if SSE
5761 | movsd xmm0, qword FOR_IDX 5365 | movsd xmm0, qword FOR_IDX
5762 | movsd xmm1, qword FOR_STOP 5366 | movsd xmm1, qword FOR_STOP
5763 if (vk) { 5367 if (vk) {
@@ -5770,22 +5374,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5770 | ucomisd xmm1, xmm0 5374 | ucomisd xmm1, xmm0
5771 |1: 5375 |1:
5772 | movsd qword FOR_EXT, xmm0 5376 | movsd qword FOR_EXT, xmm0
5773 |.else
5774 | fld qword FOR_STOP
5775 | fld qword FOR_IDX
5776 if (vk) {
5777 | fadd qword FOR_STEP // nidx = idx + step
5778 | fst qword FOR_IDX
5779 | fst qword FOR_EXT
5780 | test RB, RB; js >1
5781 } else {
5782 | fst qword FOR_EXT
5783 | jl >1
5784 }
5785 | fxch // Swap lim/(n)idx if step non-negative.
5786 |1:
5787 | fcomparepp
5788 |.endif
5789 if (op == BC_FORI) { 5377 if (op == BC_FORI) {
5790 |.if DUALNUM 5378 |.if DUALNUM
5791 | jnb <7 5379 | jnb <7
@@ -5813,11 +5401,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5813 |2: 5401 |2:
5814 | ins_next 5402 | ins_next
5815 |.endif 5403 |.endif
5816 |.if SSE 5404 |
5817 |3: // Invert comparison if step is negative. 5405 |3: // Invert comparison if step is negative.
5818 | ucomisd xmm0, xmm1 5406 | ucomisd xmm0, xmm1
5819 | jmp <1 5407 | jmp <1
5820 |.endif
5821 break; 5408 break;
5822 5409
5823 case BC_ITERL: 5410 case BC_ITERL: