aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/.gitignore2
-rw-r--r--src/Makefile59
-rw-r--r--src/Makefile.dep200
-rw-r--r--src/host/buildvm.c24
-rw-r--r--src/host/buildvm.h1
-rw-r--r--src/host/buildvm_asm.c50
-rw-r--r--src/host/buildvm_lib.c61
-rw-r--r--src/host/buildvm_libbc.h45
-rw-r--r--src/host/genlibbc.lua197
-rw-r--r--src/jit/bc.lua19
-rw-r--r--src/jit/bcsave.lua14
-rw-r--r--src/jit/dis_arm.lua18
-rw-r--r--src/jit/dis_mips.lua30
-rw-r--r--src/jit/dis_mipsel.lua15
-rw-r--r--src/jit/dis_ppc.lua18
-rw-r--r--src/jit/dis_x64.lua15
-rw-r--r--src/jit/dis_x86.lua38
-rw-r--r--src/jit/dump.lua29
-rw-r--r--src/jit/p.lua310
-rw-r--r--src/jit/v.lua15
-rw-r--r--src/jit/zone.lua45
-rw-r--r--src/lib_base.c79
-rw-r--r--src/lib_bit.c134
-rw-r--r--src/lib_debug.c2
-rw-r--r--src/lib_ffi.c45
-rw-r--r--src/lib_io.c27
-rw-r--r--src/lib_jit.c154
-rw-r--r--src/lib_math.c11
-rw-r--r--src/lib_os.c37
-rw-r--r--src/lib_package.c4
-rw-r--r--src/lib_string.c448
-rw-r--r--src/lib_table.c167
-rw-r--r--src/lj_alloc.c12
-rw-r--r--src/lj_api.c132
-rw-r--r--src/lj_arch.h147
-rw-r--r--src/lj_asm.c492
-rw-r--r--src/lj_asm_arm.h442
-rw-r--r--src/lj_asm_mips.h364
-rw-r--r--src/lj_asm_ppc.h374
-rw-r--r--src/lj_asm_x86.h576
-rw-r--r--src/lj_bc.h4
-rw-r--r--src/lj_bcdump.h6
-rw-r--r--src/lj_bcread.c143
-rw-r--r--src/lj_bcwrite.c229
-rw-r--r--src/lj_buf.c234
-rw-r--r--src/lj_buf.h105
-rw-r--r--src/lj_carith.c76
-rw-r--r--src/lj_carith.h10
-rw-r--r--src/lj_ccall.c158
-rw-r--r--src/lj_ccall.h27
-rw-r--r--src/lj_ccallback.c103
-rw-r--r--src/lj_cdata.c22
-rw-r--r--src/lj_cdata.h5
-rw-r--r--src/lj_clib.c13
-rw-r--r--src/lj_cparse.c52
-rw-r--r--src/lj_crecord.c239
-rw-r--r--src/lj_crecord.h7
-rw-r--r--src/lj_ctype.c15
-rw-r--r--src/lj_ctype.h2
-rw-r--r--src/lj_debug.c186
-rw-r--r--src/lj_debug.h8
-rw-r--r--src/lj_def.h22
-rw-r--r--src/lj_dispatch.c81
-rw-r--r--src/lj_dispatch.h23
-rw-r--r--src/lj_emit_arm.h16
-rw-r--r--src/lj_emit_mips.h16
-rw-r--r--src/lj_emit_ppc.h16
-rw-r--r--src/lj_emit_x86.h24
-rw-r--r--src/lj_err.c66
-rw-r--r--src/lj_errmsg.h5
-rw-r--r--src/lj_ffrecord.c557
-rw-r--r--src/lj_frame.h100
-rw-r--r--src/lj_gc.c66
-rw-r--r--src/lj_gc.h10
-rw-r--r--src/lj_gdbjit.c14
-rw-r--r--src/lj_ir.c10
-rw-r--r--src/lj_ir.h34
-rw-r--r--src/lj_ircall.h138
-rw-r--r--src/lj_jit.h30
-rw-r--r--src/lj_lex.c344
-rw-r--r--src/lj_lex.h17
-rw-r--r--src/lj_lib.c75
-rw-r--r--src/lj_lib.h29
-rw-r--r--src/lj_load.c4
-rw-r--r--src/lj_meta.c125
-rw-r--r--src/lj_meta.h1
-rw-r--r--src/lj_obj.c17
-rw-r--r--src/lj_obj.h182
-rw-r--r--src/lj_opt_fold.c286
-rw-r--r--src/lj_opt_loop.c29
-rw-r--r--src/lj_opt_mem.c23
-rw-r--r--src/lj_opt_narrow.c3
-rw-r--r--src/lj_opt_split.c131
-rw-r--r--src/lj_parse.c189
-rw-r--r--src/lj_profile.c368
-rw-r--r--src/lj_profile.h21
-rw-r--r--src/lj_record.c290
-rw-r--r--src/lj_record.h1
-rw-r--r--src/lj_snap.c22
-rw-r--r--src/lj_state.c33
-rw-r--r--src/lj_str.c212
-rw-r--r--src/lj_str.h35
-rw-r--r--src/lj_strfmt.c554
-rw-r--r--src/lj_strfmt.h125
-rw-r--r--src/lj_tab.c47
-rw-r--r--src/lj_tab.h4
-rw-r--r--src/lj_target.h2
-rw-r--r--src/lj_target_arm.h4
-rw-r--r--src/lj_target_arm64.h97
-rw-r--r--src/lj_target_mips.h3
-rw-r--r--src/lj_target_ppc.h2
-rw-r--r--src/lj_target_x86.h3
-rw-r--r--src/lj_trace.c51
-rw-r--r--src/lj_trace.h1
-rw-r--r--src/lj_traceerr.h3
-rw-r--r--src/lj_vm.h18
-rw-r--r--src/lj_vmevent.c1
-rw-r--r--src/lj_vmmath.c32
-rw-r--r--src/ljamalg.c3
-rw-r--r--src/luaconf.h2
-rw-r--r--src/luajit.c13
-rw-r--r--src/luajit.h15
-rw-r--r--src/msvcbuild.bat1
-rw-r--r--src/vm_arm.dasc331
-rw-r--r--src/vm_arm64.dasc3763
-rw-r--r--src/vm_mips.dasc390
-rw-r--r--src/vm_ppc.dasc369
-rw-r--r--src/vm_ppcspe.dasc3691
-rw-r--r--src/vm_x86.dasc1485
129 files changed, 12279 insertions, 8892 deletions
diff --git a/src/.gitignore b/src/.gitignore
index fc94e82c..1a30573c 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -4,4 +4,4 @@ lj_ffdef.h
4lj_libdef.h 4lj_libdef.h
5lj_recdef.h 5lj_recdef.h
6lj_folddef.h 6lj_folddef.h
7lj_vm.s 7lj_vm.[sS]
diff --git a/src/Makefile b/src/Makefile
index 33b0a43b..d7539fd5 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -11,8 +11,8 @@
11############################################################################## 11##############################################################################
12 12
13MAJVER= 2 13MAJVER= 2
14MINVER= 0 14MINVER= 1
15RELVER= 3 15RELVER= 0
16ABIVER= 5.1 16ABIVER= 5.1
17NODOTABIVER= 51 17NODOTABIVER= 51
18 18
@@ -42,17 +42,14 @@ CCOPT= -O2 -fomit-frame-pointer
42# 42#
43# Target-specific compiler options: 43# Target-specific compiler options:
44# 44#
45# x86 only: it's recommended to compile at least for i686. Better yet,
46# compile for an architecture that has SSE2, too (-msse -msse2).
47#
48# x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute 45# x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute
49# the binaries to a different machine you could also use: -march=native 46# the binaries to a different machine you could also use: -march=native
50# 47#
51CCOPT_x86= -march=i686 48CCOPT_x86= -march=i686 -msse -msse2 -mfpmath=sse
52CCOPT_x64= 49CCOPT_x64=
53CCOPT_arm= 50CCOPT_arm=
51CCOPT_arm64=
54CCOPT_ppc= 52CCOPT_ppc=
55CCOPT_ppcspe=
56CCOPT_mips= 53CCOPT_mips=
57# 54#
58CCDEBUG= 55CCDEBUG=
@@ -165,7 +162,8 @@ XCFLAGS=
165# make HOST_CC="gcc -m32" CROSS=i586-mingw32msvc- TARGET_SYS=Windows 162# make HOST_CC="gcc -m32" CROSS=i586-mingw32msvc- TARGET_SYS=Windows
166# make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu- 163# make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-
167 164
168CCOPTIONS= $(CCDEBUG) $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS) 165ASOPTIONS= $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS)
166CCOPTIONS= $(CCDEBUG) $(ASOPTIONS)
169LDOPTIONS= $(CCDEBUG) $(LDFLAGS) 167LDOPTIONS= $(CCDEBUG) $(LDFLAGS)
170 168
171HOST_CC= $(CC) 169HOST_CC= $(CC)
@@ -204,6 +202,7 @@ TARGET_XLDFLAGS=
204TARGET_XLIBS= -lm 202TARGET_XLIBS= -lm
205TARGET_TCFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS) 203TARGET_TCFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
206TARGET_ACFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS) 204TARGET_ACFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
205TARGET_ASFLAGS= $(ASOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
207TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAGS) 206TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAGS)
208TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS) 207TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS)
209TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS) 208TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS)
@@ -218,12 +217,12 @@ else
218ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH))) 217ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
219 TARGET_LJARCH= arm 218 TARGET_LJARCH= arm
220else 219else
220ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
221 TARGET_LJARCH= arm64
222else
221ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH))) 223ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH)))
222 TARGET_LJARCH= ppc 224 TARGET_LJARCH= ppc
223else 225else
224ifneq (,$(findstring LJ_TARGET_PPCSPE ,$(TARGET_TESTARCH)))
225 TARGET_LJARCH= ppcspe
226else
227ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH))) 226ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH)))
228 ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH))) 227 ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH)))
229 TARGET_ARCH= -D__MIPSEL__=1 228 TARGET_ARCH= -D__MIPSEL__=1
@@ -242,6 +241,7 @@ ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH)))
242 TARGET_SYS= PS3 241 TARGET_SYS= PS3
243 TARGET_ARCH+= -D__CELLOS_LV2__ 242 TARGET_ARCH+= -D__CELLOS_LV2__
244 TARGET_XCFLAGS+= -DLUAJIT_USE_SYSMALLOC 243 TARGET_XCFLAGS+= -DLUAJIT_USE_SYSMALLOC
244 TARGET_XLIBS+= -lpthread
245endif 245endif
246ifneq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH))) 246ifneq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
247 TARGET_ARCH+= -DLUAJIT_NO_UNWIND 247 TARGET_ARCH+= -DLUAJIT_NO_UNWIND
@@ -313,6 +313,9 @@ ifeq (iOS,$(TARGET_SYS))
313 TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC 313 TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
314 TARGET_DYNXLDOPTS= 314 TARGET_DYNXLDOPTS=
315 TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER) 315 TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
316 ifeq (arm64,$(TARGET_LJARCH))
317 TARGET_XCFLAGS+= -fno-omit-frame-pointer
318 endif
316else 319else
317 ifneq (SunOS,$(TARGET_SYS)) 320 ifneq (SunOS,$(TARGET_SYS))
318 ifneq (PS3,$(TARGET_SYS)) 321 ifneq (PS3,$(TARGET_SYS))
@@ -401,13 +404,10 @@ DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subs
401ifeq (Windows,$(TARGET_SYS)) 404ifeq (Windows,$(TARGET_SYS))
402 DASM_AFLAGS+= -D WIN 405 DASM_AFLAGS+= -D WIN
403endif 406endif
404ifeq (x86,$(TARGET_LJARCH))
405 ifneq (,$(findstring __SSE2__ 1,$(TARGET_TESTARCH)))
406 DASM_AFLAGS+= -D SSE
407 endif
408else
409ifeq (x64,$(TARGET_LJARCH)) 407ifeq (x64,$(TARGET_LJARCH))
410 DASM_ARCH= x86 408 ifeq (,$(findstring LJ_FR2 1,$(TARGET_TESTARCH)))
409 DASM_ARCH= x86
410 endif
411else 411else
412ifeq (arm,$(TARGET_LJARCH)) 412ifeq (arm,$(TARGET_LJARCH))
413 ifeq (iOS,$(TARGET_SYS)) 413 ifeq (iOS,$(TARGET_SYS))
@@ -421,13 +421,15 @@ ifeq (ppc,$(TARGET_LJARCH))
421 ifneq (,$(findstring LJ_ARCH_ROUND 1,$(TARGET_TESTARCH))) 421 ifneq (,$(findstring LJ_ARCH_ROUND 1,$(TARGET_TESTARCH)))
422 DASM_AFLAGS+= -D ROUND 422 DASM_AFLAGS+= -D ROUND
423 endif 423 endif
424 ifneq (,$(findstring LJ_ARCH_PPC64 1,$(TARGET_TESTARCH))) 424 ifneq (,$(findstring LJ_ARCH_PPC32ON64 1,$(TARGET_TESTARCH)))
425 DASM_AFLAGS+= -D GPR64 425 DASM_AFLAGS+= -D GPR64
426 endif 426 endif
427 ifeq (PS3,$(TARGET_SYS)) 427 ifeq (PS3,$(TARGET_SYS))
428 DASM_AFLAGS+= -D PPE -D TOC 428 DASM_AFLAGS+= -D PPE -D TOC
429 endif 429 endif
430endif 430 ifneq (,$(findstring LJ_ARCH_PPC64 ,$(TARGET_TESTARCH)))
431 DASM_ARCH= ppc64
432 endif
431endif 433endif
432endif 434endif
433endif 435endif
@@ -443,7 +445,7 @@ BUILDVM_X= $(BUILDVM_T)
443HOST_O= $(MINILUA_O) $(BUILDVM_O) 445HOST_O= $(MINILUA_O) $(BUILDVM_O)
444HOST_T= $(MINILUA_T) $(BUILDVM_T) 446HOST_T= $(MINILUA_T) $(BUILDVM_T)
445 447
446LJVM_S= lj_vm.s 448LJVM_S= lj_vm.S
447LJVM_O= lj_vm.o 449LJVM_O= lj_vm.o
448LJVM_BOUT= $(LJVM_S) 450LJVM_BOUT= $(LJVM_S)
449LJVM_MODE= elfasm 451LJVM_MODE= elfasm
@@ -452,10 +454,11 @@ LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \
452 lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o 454 lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o
453LJLIB_C= $(LJLIB_O:.o=.c) 455LJLIB_C= $(LJLIB_O:.o=.c)
454 456
455LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o \ 457LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
456 lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \ 458 lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \
457 lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \ 459 lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \
458 lj_api.o lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \ 460 lj_strfmt.o lj_api.o lj_profile.o \
461 lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
459 lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ 462 lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
460 lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \ 463 lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \
461 lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \ 464 lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \
@@ -575,6 +578,10 @@ amalg:
575clean: 578clean:
576 $(HOST_RM) $(ALL_RM) 579 $(HOST_RM) $(ALL_RM)
577 580
581libbc:
582 ./$(LUAJIT_T) host/genlibbc.lua -o host/buildvm_libbc.h $(LJLIB_C)
583 $(MAKE) all
584
578depend: 585depend:
579 @for file in $(ALL_HDRGEN); do \ 586 @for file in $(ALL_HDRGEN); do \
580 test -f $$file || touch $$file; \ 587 test -f $$file || touch $$file; \
@@ -589,7 +596,7 @@ depend:
589 test -s $$file || $(HOST_RM) $$file; \ 596 test -s $$file || $(HOST_RM) $$file; \
590 done 597 done
591 598
592.PHONY: default all amalg clean depend 599.PHONY: default all amalg clean libbc depend
593 600
594############################################################################## 601##############################################################################
595# Rules for generated files. 602# Rules for generated files.
@@ -646,10 +653,10 @@ lj_folddef.h: $(BUILDVM_T) lj_opt_fold.c
646 $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $< 653 $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $<
647 $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $< 654 $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $<
648 655
649%.o: %.s 656%.o: %.S
650 $(E) "ASM $@" 657 $(E) "ASM $@"
651 $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $< 658 $(Q)$(TARGET_DYNCC) $(TARGET_ASFLAGS) -c -o $(@:.o=_dyn.o) $<
652 $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $< 659 $(Q)$(TARGET_CC) $(TARGET_ASFLAGS) -c -o $@ $<
653 660
654$(LUAJIT_O): 661$(LUAJIT_O):
655 $(E) "CC $@" 662 $(E) "CC $@"
diff --git a/src/Makefile.dep b/src/Makefile.dep
index 9e14d617..9aefb236 100644
--- a/src/Makefile.dep
+++ b/src/Makefile.dep
@@ -5,43 +5,47 @@ lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
5 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \ 5 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \
6 lj_tab.h lj_meta.h lj_state.h lj_ctype.h lj_cconv.h lj_bc.h lj_ff.h \ 6 lj_tab.h lj_meta.h lj_state.h lj_ctype.h lj_cconv.h lj_bc.h lj_ff.h \
7 lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \ 7 lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \
8 lj_lib.h lj_libdef.h 8 lj_strfmt.h lj_lib.h lj_libdef.h
9lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ 9lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
10 lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_lib.h lj_libdef.h 10 lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_strscan.h \
11 lj_strfmt.h lj_ctype.h lj_cdata.h lj_cconv.h lj_carith.h lj_ff.h \
12 lj_ffdef.h lj_lib.h lj_libdef.h
11lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 13lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
12 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \ 14 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \
13 lj_libdef.h 15 lj_libdef.h
14lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ 16lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
15 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h \ 17 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h \
16 lj_ctype.h lj_cparse.h lj_cdata.h lj_cconv.h lj_carith.h lj_ccall.h \ 18 lj_ctype.h lj_cparse.h lj_cdata.h lj_cconv.h lj_carith.h lj_ccall.h \
17 lj_ccallback.h lj_clib.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h 19 lj_ccallback.h lj_clib.h lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h \
20 lj_libdef.h
18lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h 21lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h
19lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ 22lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
20 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_ff.h \ 23 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_state.h \
21 lj_ffdef.h lj_lib.h lj_libdef.h 24 lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
22lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \ 25lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
23 lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \ 26 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \
24 lj_bc.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_target.h \ 27 lj_state.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
25 lj_target_*.h lj_dispatch.h lj_vm.h lj_vmevent.h lj_lib.h luajit.h \ 28 lj_target.h lj_target_*.h lj_trace.h lj_dispatch.h lj_traceerr.h \
26 lj_libdef.h 29 lj_vm.h lj_vmevent.h lj_lib.h luajit.h lj_libdef.h
27lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 30lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
28 lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_libdef.h 31 lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_libdef.h
29lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ 32lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
30 lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h 33 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_lib.h \
34 lj_libdef.h
31lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 35lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
32 lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h 36 lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h
33lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 37lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
34 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h \ 38 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
35 lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h lj_char.h \ 39 lj_tab.h lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h \
36 lj_lib.h lj_libdef.h 40 lj_char.h lj_strfmt.h lj_lib.h lj_libdef.h
37lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 41lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
38 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_lib.h \ 42 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
39 lj_libdef.h 43 lj_tab.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
40lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h 44lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h
41lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 45lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
42 lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \ 46 lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
43 lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \ 47 lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \
44 lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h 48 lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h lj_strfmt.h
45lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 49lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
46 lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \ 50 lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \
47 lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \ 51 lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \
@@ -50,17 +54,20 @@ lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
50lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \ 54lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \
51 lj_bcdef.h 55 lj_bcdef.h
52lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 56lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
53 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_bc.h lj_ctype.h \ 57 lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_bc.h \
54 lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h 58 lj_ctype.h lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h \
59 lj_strfmt.h
55lj_bcwrite.o: lj_bcwrite.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 60lj_bcwrite.o: lj_bcwrite.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
56 lj_gc.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h lj_ir.h \ 61 lj_gc.h lj_buf.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h \
57 lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h 62 lj_ir.h lj_strfmt.h lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h
63lj_buf.o: lj_buf.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
64 lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_strfmt.h
58lj_carith.o: lj_carith.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 65lj_carith.o: lj_carith.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
59 lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ctype.h lj_cconv.h \ 66 lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ir.h lj_ctype.h \
60 lj_cdata.h lj_carith.h 67 lj_cconv.h lj_cdata.h lj_carith.h lj_strscan.h
61lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 68lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
62 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \ 69 lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h \
63 lj_cdata.h lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ 70 lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
64 lj_traceerr.h 71 lj_traceerr.h
65lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \ 72lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \
66 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_state.h lj_frame.h \ 73 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_state.h lj_frame.h \
@@ -71,107 +78,116 @@ lj_cconv.o: lj_cconv.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
71 lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \ 78 lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \
72 lj_ccallback.h 79 lj_ccallback.h
73lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 80lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
74 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \ 81 lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h
75 lj_cdata.h
76lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h 82lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h
77lj_clib.o: lj_clib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 83lj_clib.o: lj_clib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
78 lj_err.h lj_errmsg.h lj_tab.h lj_str.h lj_udata.h lj_ctype.h lj_cconv.h \ 84 lj_err.h lj_errmsg.h lj_tab.h lj_str.h lj_udata.h lj_ctype.h lj_cconv.h \
79 lj_cdata.h lj_clib.h 85 lj_cdata.h lj_clib.h lj_strfmt.h
80lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 86lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
81 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ctype.h lj_cparse.h lj_frame.h \ 87 lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_ctype.h lj_cparse.h \
82 lj_bc.h lj_vm.h lj_char.h lj_strscan.h 88 lj_frame.h lj_bc.h lj_vm.h lj_char.h lj_strscan.h lj_strfmt.h
83lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 89lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
84 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h \ 90 lj_err.h lj_errmsg.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_gc.h \
85 lj_gc.h lj_cdata.h lj_cparse.h lj_cconv.h lj_clib.h lj_ccall.h lj_ff.h \ 91 lj_cdata.h lj_cparse.h lj_cconv.h lj_carith.h lj_clib.h lj_ccall.h \
86 lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ 92 lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
87 lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \ 93 lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \
88 lj_crecord.h 94 lj_crecord.h lj_strfmt.h
89lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 95lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
90 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_ccallback.h 96 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_strfmt.h lj_ctype.h \
97 lj_ccallback.h
91lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 98lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
92 lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_state.h lj_frame.h \ 99 lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \
93 lj_bc.h lj_vm.h lj_jit.h lj_ir.h 100 lj_state.h lj_frame.h lj_bc.h lj_strfmt.h lj_jit.h lj_ir.h
94lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 101lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
95 lj_err.h lj_errmsg.h lj_func.h lj_str.h lj_tab.h lj_meta.h lj_debug.h \ 102 lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_func.h lj_tab.h \
96 lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h lj_jit.h lj_ir.h \ 103 lj_meta.h lj_debug.h lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h \
97 lj_ccallback.h lj_ctype.h lj_gc.h lj_trace.h lj_dispatch.h lj_traceerr.h \ 104 lj_strfmt.h lj_jit.h lj_ir.h lj_ccallback.h lj_ctype.h lj_trace.h \
98 lj_vm.h luajit.h 105 lj_dispatch.h lj_traceerr.h lj_profile.h lj_vm.h luajit.h
99lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \ 106lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \
100 lj_errmsg.h lj_debug.h lj_str.h lj_func.h lj_state.h lj_frame.h lj_bc.h \ 107 lj_errmsg.h lj_debug.h lj_str.h lj_func.h lj_state.h lj_frame.h lj_bc.h \
101 lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ 108 lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
102 lj_traceerr.h lj_vm.h 109 lj_traceerr.h lj_vm.h lj_strfmt.h
103lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 110lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
104 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \ 111 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \
105 lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ 112 lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
106 lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \ 113 lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \
107 lj_vm.h lj_strscan.h lj_recdef.h 114 lj_vm.h lj_strscan.h lj_strfmt.h lj_recdef.h
108lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 115lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
109 lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ 116 lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
110 lj_traceerr.h lj_vm.h 117 lj_traceerr.h lj_vm.h
111lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 118lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
112 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h \ 119 lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
113 lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h lj_jit.h \ 120 lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h \
114 lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h 121 lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h
115lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 122lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
116 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_jit.h \ 123 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_buf.h \
117 lj_ir.h lj_dispatch.h 124 lj_str.h lj_strfmt.h lj_jit.h lj_ir.h lj_dispatch.h
118lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 125lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
119 lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ 126 lj_buf.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
120 lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h lj_carith.h \ 127 lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h \
121 lj_vm.h lj_strscan.h lj_lib.h 128 lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h
122lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 129lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
123 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h lualib.h \ 130 lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h \
124 lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h 131 lualib.h lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h \
132 lj_strfmt.h
125lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \ 133lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \
126 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \ 134 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \
127 lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_lib.h 135 lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lex.h \
136 lj_bcdump.h lj_lib.h
128lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ 137lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
129 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_func.h lj_frame.h \ 138 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_func.h \
130 lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h 139 lj_frame.h lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h
131lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 140lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
132 lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \ 141 lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \
133 lj_dispatch.h lj_bc.h lj_traceerr.h lj_vm.h 142 lj_dispatch.h lj_bc.h lj_traceerr.h lj_vm.h
134lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 143lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
135 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ 144 lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_meta.h lj_frame.h \
136 lj_vm.h lj_strscan.h 145 lj_bc.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h
137lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h 146lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
138lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 147lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
139 lj_ir.h lj_jit.h lj_iropt.h 148 lj_ir.h lj_jit.h lj_iropt.h
140lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 149lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
141 lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ 150 lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h \
142 lj_bc.h lj_traceerr.h lj_ctype.h lj_gc.h lj_carith.h lj_vm.h \ 151 lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h \
143 lj_strscan.h lj_folddef.h 152 lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_folddef.h
144lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 153lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
145 lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ 154 lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h lj_jit.h \
146 lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h 155 lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h \
156 lj_vm.h
147lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 157lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
148 lj_tab.h lj_ir.h lj_jit.h lj_iropt.h 158 lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_ircall.h
149lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ 159lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \
150 lj_arch.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ 160 lj_arch.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \
151 lj_traceerr.h lj_vm.h lj_strscan.h 161 lj_traceerr.h lj_vm.h lj_strscan.h
152lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 162lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
153 lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h 163 lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h
154lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ 164lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \
155 lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_ircall.h \ 165 lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h \
156 lj_iropt.h lj_vm.h 166 lj_jit.h lj_ircall.h lj_iropt.h lj_vm.h
157lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 167lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
158 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h \ 168 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \
159 lj_state.h lj_bc.h lj_ctype.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h 169 lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \
170 lj_vm.h lj_vmevent.h
171lj_profile.o: lj_profile.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
172 lj_buf.h lj_gc.h lj_str.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h \
173 lj_jit.h lj_ir.h lj_trace.h lj_traceerr.h lj_profile.h luajit.h
160lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 174lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
161 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ 175 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \
162 lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h \ 176 lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_debug.h lj_ir.h lj_jit.h \
163 lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h lj_record.h \ 177 lj_ircall.h lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h \
164 lj_ffrecord.h lj_snap.h lj_vm.h 178 lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h
165lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 179lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
166 lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \ 180 lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \
167 lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \ 181 lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \
168 lj_target_*.h lj_ctype.h lj_cdata.h 182 lj_target_*.h lj_ctype.h lj_cdata.h
169lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 183lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
170 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_meta.h \ 184 lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h \
171 lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h lj_ir.h \ 185 lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h \
172 lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h 186 lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h luajit.h
173lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 187lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
174 lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_char.h 188 lj_err.h lj_errmsg.h lj_str.h lj_char.h
189lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
190 lj_buf.h lj_gc.h lj_str.h lj_state.h lj_char.h lj_strfmt.h
175lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 191lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
176 lj_char.h lj_strscan.h 192 lj_char.h lj_strscan.h
177lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 193lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
@@ -189,21 +205,22 @@ lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
189lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 205lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
190 lj_ir.h lj_vm.h 206 lj_ir.h lj_vm.h
191ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \ 207ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
192 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h \ 208 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h \
193 lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h \ 209 lj_func.h lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h \
194 lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_err.c \ 210 lj_cdata.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h \
195 lj_debug.h lj_ff.h lj_ffdef.h lj_char.c lj_char.h lj_bc.c lj_bcdef.h \ 211 lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_strfmt.h lj_char.c \
196 lj_obj.c lj_str.c lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h \ 212 lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c lj_tab.c \
197 lj_debug.c lj_state.c lj_lex.h lj_alloc.h lj_dispatch.c lj_ccallback.h \ 213 lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c \
198 luajit.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c lj_api.c \ 214 lj_state.c lj_lex.h lj_alloc.h luajit.h lj_dispatch.c lj_ccallback.h \
199 lj_lex.c lualib.h lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h \ 215 lj_profile.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c \
200 lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c \ 216 lj_strfmt.c lj_api.c lj_profile.c lj_lex.c lualib.h lj_parse.h \
201 lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h lj_target_*.h \ 217 lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c \
202 lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h lj_cparse.c \ 218 lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c \
203 lj_cparse.h lj_lib.c lj_lib.h lj_ir.c lj_ircall.h lj_iropt.h \ 219 lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \
204 lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c \ 220 lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h \
205 lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c \ 221 lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \
206 lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \ 222 lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c \
223 lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \
207 lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \ 224 lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \
208 lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \ 225 lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \
209 lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \ 226 lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \
@@ -220,7 +237,8 @@ host/buildvm_asm.o: host/buildvm_asm.c host/buildvm.h lj_def.h lua.h luaconf.h \
220host/buildvm_fold.o: host/buildvm_fold.c host/buildvm.h lj_def.h lua.h \ 237host/buildvm_fold.o: host/buildvm_fold.c host/buildvm.h lj_def.h lua.h \
221 luaconf.h lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_ir.h lj_obj.h 238 luaconf.h lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_ir.h lj_obj.h
222host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \ 239host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \
223 lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_lib.h lj_obj.h 240 lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_bc.h lj_lib.h lj_obj.h \
241 host/buildvm_libbc.h
224host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \ 242host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \
225 luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h 243 luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h
226host/minilua.o: host/minilua.c 244host/minilua.o: host/minilua.c
diff --git a/src/host/buildvm.c b/src/host/buildvm.c
index 07122a64..324dd263 100644
--- a/src/host/buildvm.c
+++ b/src/host/buildvm.c
@@ -59,10 +59,10 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type);
59#include "../dynasm/dasm_x86.h" 59#include "../dynasm/dasm_x86.h"
60#elif LJ_TARGET_ARM 60#elif LJ_TARGET_ARM
61#include "../dynasm/dasm_arm.h" 61#include "../dynasm/dasm_arm.h"
62#elif LJ_TARGET_ARM64
63#include "../dynasm/dasm_arm64.h"
62#elif LJ_TARGET_PPC 64#elif LJ_TARGET_PPC
63#include "../dynasm/dasm_ppc.h" 65#include "../dynasm/dasm_ppc.h"
64#elif LJ_TARGET_PPCSPE
65#include "../dynasm/dasm_ppc.h"
66#elif LJ_TARGET_MIPS 66#elif LJ_TARGET_MIPS
67#include "../dynasm/dasm_mips.h" 67#include "../dynasm/dasm_mips.h"
68#else 68#else
@@ -113,8 +113,8 @@ static const char *sym_decorate(BuildCtx *ctx,
113 name[0] = '@'; 113 name[0] = '@';
114 else 114 else
115 *p = '\0'; 115 *p = '\0';
116#elif (LJ_TARGET_PPC || LJ_TARGET_PPCSPE) && !LJ_TARGET_CONSOLE 116#elif LJ_TARGET_PPC && !LJ_TARGET_CONSOLE
117 /* Keep @plt. */ 117 /* Keep @plt etc. */
118#else 118#else
119 *p = '\0'; 119 *p = '\0';
120#endif 120#endif
@@ -179,6 +179,7 @@ static int build_code(BuildCtx *ctx)
179 ctx->nreloc = 0; 179 ctx->nreloc = 0;
180 180
181 ctx->globnames = globnames; 181 ctx->globnames = globnames;
182 ctx->extnames = extnames;
182 ctx->relocsym = (const char **)malloc(NRELOCSYM*sizeof(const char *)); 183 ctx->relocsym = (const char **)malloc(NRELOCSYM*sizeof(const char *));
183 ctx->nrelocsym = 0; 184 ctx->nrelocsym = 0;
184 for (i = 0; i < (int)NRELOCSYM; i++) relocmap[i] = -1; 185 for (i = 0; i < (int)NRELOCSYM; i++) relocmap[i] = -1;
@@ -320,20 +321,20 @@ static void emit_vmdef(BuildCtx *ctx)
320 char buf[80]; 321 char buf[80];
321 int i; 322 int i;
322 fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n"); 323 fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n");
323 fprintf(ctx->fp, "module(...)\n\n"); 324 fprintf(ctx->fp, "return {\n\n");
324 325
325 fprintf(ctx->fp, "bcnames = \""); 326 fprintf(ctx->fp, "bcnames = \"");
326 for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]); 327 for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]);
327 fprintf(ctx->fp, "\"\n\n"); 328 fprintf(ctx->fp, "\",\n\n");
328 329
329 fprintf(ctx->fp, "irnames = \""); 330 fprintf(ctx->fp, "irnames = \"");
330 for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]); 331 for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]);
331 fprintf(ctx->fp, "\"\n\n"); 332 fprintf(ctx->fp, "\",\n\n");
332 333
333 fprintf(ctx->fp, "irfpm = { [0]="); 334 fprintf(ctx->fp, "irfpm = { [0]=");
334 for (i = 0; irfpm_names[i]; i++) 335 for (i = 0; irfpm_names[i]; i++)
335 fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i])); 336 fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i]));
336 fprintf(ctx->fp, "}\n\n"); 337 fprintf(ctx->fp, "},\n\n");
337 338
338 fprintf(ctx->fp, "irfield = { [0]="); 339 fprintf(ctx->fp, "irfield = { [0]=");
339 for (i = 0; irfield_names[i]; i++) { 340 for (i = 0; irfield_names[i]; i++) {
@@ -343,17 +344,17 @@ static void emit_vmdef(BuildCtx *ctx)
343 if (p) *p = '.'; 344 if (p) *p = '.';
344 fprintf(ctx->fp, "\"%s\", ", buf); 345 fprintf(ctx->fp, "\"%s\", ", buf);
345 } 346 }
346 fprintf(ctx->fp, "}\n\n"); 347 fprintf(ctx->fp, "},\n\n");
347 348
348 fprintf(ctx->fp, "ircall = {\n[0]="); 349 fprintf(ctx->fp, "ircall = {\n[0]=");
349 for (i = 0; ircall_names[i]; i++) 350 for (i = 0; ircall_names[i]; i++)
350 fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]); 351 fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]);
351 fprintf(ctx->fp, "}\n\n"); 352 fprintf(ctx->fp, "},\n\n");
352 353
353 fprintf(ctx->fp, "traceerr = {\n[0]="); 354 fprintf(ctx->fp, "traceerr = {\n[0]=");
354 for (i = 0; trace_errors[i]; i++) 355 for (i = 0; trace_errors[i]; i++)
355 fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]); 356 fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]);
356 fprintf(ctx->fp, "}\n\n"); 357 fprintf(ctx->fp, "},\n\n");
357} 358}
358 359
359/* -- Argument parsing ---------------------------------------------------- */ 360/* -- Argument parsing ---------------------------------------------------- */
@@ -490,6 +491,7 @@ int main(int argc, char **argv)
490 case BUILD_vmdef: 491 case BUILD_vmdef:
491 emit_vmdef(ctx); 492 emit_vmdef(ctx);
492 emit_lib(ctx); 493 emit_lib(ctx);
494 fprintf(ctx->fp, "}\n\n");
493 break; 495 break;
494 case BUILD_ffdef: 496 case BUILD_ffdef:
495 case BUILD_libdef: 497 case BUILD_libdef:
diff --git a/src/host/buildvm.h b/src/host/buildvm.h
index b2621850..55885553 100644
--- a/src/host/buildvm.h
+++ b/src/host/buildvm.h
@@ -82,6 +82,7 @@ typedef struct BuildCtx {
82 const char *beginsym; 82 const char *beginsym;
83 /* Strings generated by DynASM. */ 83 /* Strings generated by DynASM. */
84 const char *const *globnames; 84 const char *const *globnames;
85 const char *const *extnames;
85 const char *dasm_ident; 86 const char *dasm_ident;
86 const char *dasm_arch; 87 const char *dasm_arch;
87 /* Relocations. */ 88 /* Relocations. */
diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c
index 2c9a2d48..9b7ae53a 100644
--- a/src/host/buildvm_asm.c
+++ b/src/host/buildvm_asm.c
@@ -51,8 +51,8 @@ static const char *const jccnames[] = {
51 "js", "jns", "jpe", "jpo", "jl", "jge", "jle", "jg" 51 "js", "jns", "jpe", "jpo", "jl", "jge", "jle", "jg"
52}; 52};
53 53
54/* Emit relocation for the incredibly stupid OSX assembler. */ 54/* Emit x86/x64 text relocations. */
55static void emit_asm_reloc_mach(BuildCtx *ctx, uint8_t *cp, int n, 55static void emit_asm_reloc_text(BuildCtx *ctx, uint8_t *cp, int n,
56 const char *sym) 56 const char *sym)
57{ 57{
58 const char *opname = NULL; 58 const char *opname = NULL;
@@ -71,6 +71,20 @@ err:
71 exit(1); 71 exit(1);
72 } 72 }
73 emit_asm_bytes(ctx, cp, n); 73 emit_asm_bytes(ctx, cp, n);
74 if (strncmp(sym+(*sym == '_'), LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) {
75 /* Various fixups for external symbols outside of our binary. */
76 if (ctx->mode == BUILD_elfasm) {
77 if (LJ_32)
78 fprintf(ctx->fp, "#if __PIC__\n\t%s lj_wrap_%s\n#else\n", opname, sym);
79 fprintf(ctx->fp, "\t%s %s@PLT\n", opname, sym);
80 if (LJ_32)
81 fprintf(ctx->fp, "#endif\n");
82 return;
83 } else if (LJ_32 && ctx->mode == BUILD_machasm) {
84 fprintf(ctx->fp, "\t%s L%s$stub\n", opname, sym);
85 return;
86 }
87 }
74 fprintf(ctx->fp, "\t%s %s\n", opname, sym); 88 fprintf(ctx->fp, "\t%s %s\n", opname, sym);
75} 89}
76#else 90#else
@@ -107,7 +121,16 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
107 ins, sym); 121 ins, sym);
108 exit(1); 122 exit(1);
109 } 123 }
110#elif LJ_TARGET_PPC || LJ_TARGET_PPCSPE 124#elif LJ_TARGET_ARM64
125 if ((ins >> 26) == 0x25u) {
126 fprintf(ctx->fp, "\tbl %s\n", sym);
127 } else {
128 fprintf(stderr,
129 "Error: unsupported opcode %08x for %s symbol relocation.\n",
130 ins, sym);
131 exit(1);
132 }
133#elif LJ_TARGET_PPC
111#if LJ_TARGET_PS3 134#if LJ_TARGET_PS3
112#define TOCPREFIX "." 135#define TOCPREFIX "."
113#else 136#else
@@ -117,6 +140,14 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
117 fprintf(ctx->fp, "\t%s %d, %d, " TOCPREFIX "%s\n", 140 fprintf(ctx->fp, "\t%s %d, %d, " TOCPREFIX "%s\n",
118 (ins & 1) ? "bcl" : "bc", (ins >> 21) & 31, (ins >> 16) & 31, sym); 141 (ins & 1) ? "bcl" : "bc", (ins >> 21) & 31, (ins >> 16) & 31, sym);
119 } else if ((ins >> 26) == 18) { 142 } else if ((ins >> 26) == 18) {
143#if LJ_ARCH_PPC64
144 const char *suffix = strchr(sym, '@');
145 if (suffix && suffix[1] == 'h') {
146 fprintf(ctx->fp, "\taddis 11, 2, %s\n", sym);
147 } else if (suffix && suffix[1] == 'l') {
148 fprintf(ctx->fp, "\tld 12, %s\n", sym);
149 } else
150#endif
120 fprintf(ctx->fp, "\t%s " TOCPREFIX "%s\n", (ins & 1) ? "bl" : "b", sym); 151 fprintf(ctx->fp, "\t%s " TOCPREFIX "%s\n", (ins & 1) ? "bl" : "b", sym);
121 } else { 152 } else {
122 fprintf(stderr, 153 fprintf(stderr,
@@ -214,6 +245,9 @@ void emit_asm(BuildCtx *ctx)
214 int i, rel; 245 int i, rel;
215 246
216 fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch); 247 fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch);
248#if LJ_ARCH_PPC64
249 fprintf(ctx->fp, "\t.abiversion 2\n");
250#endif
217 fprintf(ctx->fp, "\t.text\n"); 251 fprintf(ctx->fp, "\t.text\n");
218 emit_asm_align(ctx, 4); 252 emit_asm_align(ctx, 4);
219 253
@@ -254,8 +288,9 @@ void emit_asm(BuildCtx *ctx)
254 BuildReloc *r = &ctx->reloc[rel]; 288 BuildReloc *r = &ctx->reloc[rel];
255 int n = r->ofs - ofs; 289 int n = r->ofs - ofs;
256#if LJ_TARGET_X86ORX64 290#if LJ_TARGET_X86ORX64
257 if (ctx->mode == BUILD_machasm && r->type != 0) { 291 if (r->type != 0 &&
258 emit_asm_reloc_mach(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]); 292 (ctx->mode == BUILD_elfasm || ctx->mode == BUILD_machasm)) {
293 emit_asm_reloc_text(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
259 } else { 294 } else {
260 emit_asm_bytes(ctx, ctx->code+ofs, n); 295 emit_asm_bytes(ctx, ctx->code+ofs, n);
261 emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]); 296 emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]);
@@ -289,10 +324,7 @@ void emit_asm(BuildCtx *ctx)
289#if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA) 324#if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA)
290 fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n"); 325 fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n");
291#endif 326#endif
292#if LJ_TARGET_PPCSPE 327#if LJ_TARGET_PPC && !LJ_TARGET_PS3
293 /* Soft-float ABI + SPE. */
294 fprintf(ctx->fp, "\t.gnu_attribute 4, 2\n\t.gnu_attribute 8, 3\n");
295#elif LJ_TARGET_PPC && !LJ_TARGET_PS3
296 /* Hard-float ABI. */ 328 /* Hard-float ABI. */
297 fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n"); 329 fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n");
298#endif 330#endif
diff --git a/src/host/buildvm_lib.c b/src/host/buildvm_lib.c
index c37301d4..e928673d 100644
--- a/src/host/buildvm_lib.c
+++ b/src/host/buildvm_lib.c
@@ -5,7 +5,9 @@
5 5
6#include "buildvm.h" 6#include "buildvm.h"
7#include "lj_obj.h" 7#include "lj_obj.h"
8#include "lj_bc.h"
8#include "lj_lib.h" 9#include "lj_lib.h"
10#include "buildvm_libbc.h"
9 11
10/* Context for library definitions. */ 12/* Context for library definitions. */
11static uint8_t obuf[8192]; 13static uint8_t obuf[8192];
@@ -151,6 +153,62 @@ static void libdef_func(BuildCtx *ctx, char *p, int arg)
151 regfunc = REGFUNC_OK; 153 regfunc = REGFUNC_OK;
152} 154}
153 155
156static uint8_t *libdef_uleb128(uint8_t *p, uint32_t *vv)
157{
158 uint32_t v = *p++;
159 if (v >= 0x80) {
160 int sh = 0; v &= 0x7f;
161 do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
162 }
163 *vv = v;
164 return p;
165}
166
167static void libdef_fixupbc(uint8_t *p)
168{
169 uint32_t i, sizebc;
170 p += 4;
171 p = libdef_uleb128(p, &sizebc);
172 p = libdef_uleb128(p, &sizebc);
173 p = libdef_uleb128(p, &sizebc);
174 for (i = 0; i < sizebc; i++, p += 4) {
175 uint8_t op = p[libbc_endian ? 3 : 0];
176 uint8_t ra = p[libbc_endian ? 2 : 1];
177 uint8_t rc = p[libbc_endian ? 1 : 2];
178 uint8_t rb = p[libbc_endian ? 0 : 3];
179 if (!LJ_DUALNUM && op == BC_ISTYPE && rc == ~LJ_TNUMX+1) {
180 op = BC_ISNUM; rc++;
181 }
182 p[LJ_ENDIAN_SELECT(0, 3)] = op;
183 p[LJ_ENDIAN_SELECT(1, 2)] = ra;
184 p[LJ_ENDIAN_SELECT(2, 1)] = rc;
185 p[LJ_ENDIAN_SELECT(3, 0)] = rb;
186 }
187}
188
189static void libdef_lua(BuildCtx *ctx, char *p, int arg)
190{
191 UNUSED(arg);
192 if (ctx->mode == BUILD_libdef) {
193 int i;
194 for (i = 0; libbc_map[i].name != NULL; i++) {
195 if (!strcmp(libbc_map[i].name, p)) {
196 int ofs = libbc_map[i].ofs;
197 int len = libbc_map[i+1].ofs - ofs;
198 obuf[2]++; /* Bump hash table size. */
199 *optr++ = LIBINIT_LUA;
200 libdef_name(p, 0);
201 memcpy(optr, libbc_code + ofs, len);
202 libdef_fixupbc(optr);
203 optr += len;
204 return;
205 }
206 }
207 fprintf(stderr, "Error: missing libbc definition for %s\n", p);
208 exit(1);
209 }
210}
211
154static uint32_t find_rec(char *name) 212static uint32_t find_rec(char *name)
155{ 213{
156 char *p = (char *)obuf; 214 char *p = (char *)obuf;
@@ -277,6 +335,7 @@ static const LibDefHandler libdef_handlers[] = {
277 { "CF(", ")", libdef_func, LIBINIT_CF }, 335 { "CF(", ")", libdef_func, LIBINIT_CF },
278 { "ASM(", ")", libdef_func, LIBINIT_ASM }, 336 { "ASM(", ")", libdef_func, LIBINIT_ASM },
279 { "ASM_(", ")", libdef_func, LIBINIT_ASM_ }, 337 { "ASM_(", ")", libdef_func, LIBINIT_ASM_ },
338 { "LUA(", ")", libdef_lua, 0 },
280 { "REC(", ")", libdef_rec, 0 }, 339 { "REC(", ")", libdef_rec, 0 },
281 { "PUSH(", ")", libdef_push, 0 }, 340 { "PUSH(", ")", libdef_push, 0 },
282 { "SET(", ")", libdef_set, 0 }, 341 { "SET(", ")", libdef_set, 0 },
@@ -373,7 +432,7 @@ void emit_lib(BuildCtx *ctx)
373 "#ifndef FF_NUM_ASMFUNC\n#define FF_NUM_ASMFUNC %d\n#endif\n\n", 432 "#ifndef FF_NUM_ASMFUNC\n#define FF_NUM_ASMFUNC %d\n#endif\n\n",
374 ffasmfunc); 433 ffasmfunc);
375 } else if (ctx->mode == BUILD_vmdef) { 434 } else if (ctx->mode == BUILD_vmdef) {
376 fprintf(ctx->fp, "}\n\n"); 435 fprintf(ctx->fp, "},\n\n");
377 } else if (ctx->mode == BUILD_bcdef) { 436 } else if (ctx->mode == BUILD_bcdef) {
378 int i; 437 int i;
379 fprintf(ctx->fp, "\n};\n\n"); 438 fprintf(ctx->fp, "\n};\n\n");
diff --git a/src/host/buildvm_libbc.h b/src/host/buildvm_libbc.h
new file mode 100644
index 00000000..45f8f8cb
--- /dev/null
+++ b/src/host/buildvm_libbc.h
@@ -0,0 +1,45 @@
1/* This is a generated file. DO NOT EDIT! */
2
3static const int libbc_endian = 0;
4
5static const uint8_t libbc_code[] = {
6#if LJ_FR2
70,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0,
80,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3,
916,0,5,0,21,1,0,0,76,1,2,0,0,2,10,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3,
100,0,41,4,1,0,77,2,8,128,18,6,1,0,18,8,5,0,59,9,5,0,66,6,3,2,10,6,0,0,88,7,1,
11128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,11,0,0,0,16,16,0,12,0,16,1,9,0,43,2,
120,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,9,5,0,18,10,6,0,66,7,3,2,10,7,
130,0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12,
140,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128,
158,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14,
160,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2,
170,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4,
182,0,76,3,2,0,75,0,1,0,0,2,0
19#else
200,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0,
210,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3,
2216,0,5,0,21,1,0,0,76,1,2,0,0,2,9,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3,
230,0,41,4,1,0,77,2,8,128,18,6,1,0,18,7,5,0,59,8,5,0,66,6,3,2,10,6,0,0,88,7,1,
24128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,10,0,0,0,16,16,0,12,0,16,1,9,0,43,2,
250,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,8,5,0,18,9,6,0,66,7,3,2,10,7,0,
260,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12,
270,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128,
288,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14,
290,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2,
300,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4,
312,0,76,3,2,0,75,0,1,0,0,2,0
32#endif
33};
34
35static const struct { const char *name; int ofs; } libbc_map[] = {
36{"math_deg",0},
37{"math_rad",25},
38{"string_len",50},
39{"table_foreachi",69},
40{"table_foreach",136},
41{"table_getn",207},
42{"table_remove",226},
43{NULL,355}
44};
45
diff --git a/src/host/genlibbc.lua b/src/host/genlibbc.lua
new file mode 100644
index 00000000..4398d8e7
--- /dev/null
+++ b/src/host/genlibbc.lua
@@ -0,0 +1,197 @@
1----------------------------------------------------------------------------
2-- Lua script to dump the bytecode of the library functions written in Lua.
3-- The resulting 'buildvm_libbc.h' is used for the build process of LuaJIT.
4----------------------------------------------------------------------------
5-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
6-- Released under the MIT license. See Copyright Notice in luajit.h
7----------------------------------------------------------------------------
8
9local ffi = require("ffi")
10local bit = require("bit")
11local vmdef = require("jit.vmdef")
12local bcnames = vmdef.bcnames
13
14local format = string.format
15
16local isbe = (string.byte(string.dump(function() end), 5) % 2 == 1)
17
18local function usage(arg)
19 io.stderr:write("Usage: ", arg and arg[0] or "genlibbc",
20 " [-o buildvm_libbc.h] lib_*.c\n")
21 os.exit(1)
22end
23
24local function parse_arg(arg)
25 local outfile = "-"
26 if not (arg and arg[1]) then
27 usage(arg)
28 end
29 if arg[1] == "-o" then
30 outfile = arg[2]
31 if not outfile then usage(arg) end
32 table.remove(arg, 1)
33 table.remove(arg, 1)
34 end
35 return outfile
36end
37
38local function read_files(names)
39 local src = ""
40 for _,name in ipairs(names) do
41 local fp = assert(io.open(name))
42 src = src .. fp:read("*a")
43 fp:close()
44 end
45 return src
46end
47
48local function transform_lua(code)
49 local fixup = {}
50 local n = -30000
51 code = string.gsub(code, "CHECK_(%w*)%((.-)%)", function(tp, var)
52 n = n + 1
53 fixup[n] = { "CHECK", tp }
54 return format("%s=%d", var, n)
55 end)
56 code = string.gsub(code, "PAIRS%((.-)%)", function(var)
57 fixup.PAIRS = true
58 return format("nil, %s, 0", var)
59 end)
60 return "return "..code, fixup
61end
62
63local function read_uleb128(p)
64 local v = p[0]; p = p + 1
65 if v >= 128 then
66 local sh = 7; v = v - 128
67 repeat
68 local r = p[0]
69 v = v + bit.lshift(bit.band(r, 127), sh)
70 sh = sh + 7
71 p = p + 1
72 until r < 128
73 end
74 return p, v
75end
76
77-- ORDER LJ_T
78local name2itype = {
79 str = 5, func = 9, tab = 12, int = 14, num = 15
80}
81
82local BC = {}
83for i=0,#bcnames/6-1 do
84 BC[string.gsub(string.sub(bcnames, i*6+1, i*6+6), " ", "")] = i
85end
86local xop, xra = isbe and 3 or 0, isbe and 2 or 1
87local xrc, xrb = isbe and 1 or 2, isbe and 0 or 3
88
89local function fixup_dump(dump, fixup)
90 local buf = ffi.new("uint8_t[?]", #dump+1, dump)
91 local p = buf+5
92 local n, sizebc
93 p, n = read_uleb128(p)
94 local start = p
95 p = p + 4
96 p = read_uleb128(p)
97 p = read_uleb128(p)
98 p, sizebc = read_uleb128(p)
99 local rawtab = {}
100 for i=0,sizebc-1 do
101 local op = p[xop]
102 if op == BC.KSHORT then
103 local rd = p[xrc] + 256*p[xrb]
104 rd = bit.arshift(bit.lshift(rd, 16), 16)
105 local f = fixup[rd]
106 if f then
107 if f[1] == "CHECK" then
108 local tp = f[2]
109 if tp == "tab" then rawtab[p[xra]] = true end
110 p[xop] = tp == "num" and BC.ISNUM or BC.ISTYPE
111 p[xrb] = 0
112 p[xrc] = name2itype[tp]
113 else
114 error("unhandled fixup type: "..f[1])
115 end
116 end
117 elseif op == BC.TGETV then
118 if rawtab[p[xrb]] then
119 p[xop] = BC.TGETR
120 end
121 elseif op == BC.TSETV then
122 if rawtab[p[xrb]] then
123 p[xop] = BC.TSETR
124 end
125 elseif op == BC.ITERC then
126 if fixup.PAIRS then
127 p[xop] = BC.ITERN
128 end
129 end
130 p = p + 4
131 end
132 return ffi.string(start, n)
133end
134
135local function find_defs(src)
136 local defs = {}
137 for name, code in string.gmatch(src, "LJLIB_LUA%(([^)]*)%)%s*/%*(.-)%*/") do
138 local env = {}
139 local tcode, fixup = transform_lua(code)
140 local func = assert(load(tcode, "", nil, env))()
141 defs[name] = fixup_dump(string.dump(func, true), fixup)
142 defs[#defs+1] = name
143 end
144 return defs
145end
146
147local function gen_header(defs)
148 local t = {}
149 local function w(x) t[#t+1] = x end
150 w("/* This is a generated file. DO NOT EDIT! */\n\n")
151 w("static const int libbc_endian = ") w(isbe and 1 or 0) w(";\n\n")
152 local s = ""
153 for _,name in ipairs(defs) do
154 s = s .. defs[name]
155 end
156 w("static const uint8_t libbc_code[] = {\n")
157 local n = 0
158 for i=1,#s do
159 local x = string.byte(s, i)
160 w(x); w(",")
161 n = n + (x < 10 and 2 or (x < 100 and 3 or 4))
162 if n >= 75 then n = 0; w("\n") end
163 end
164 w("0\n};\n\n")
165 w("static const struct { const char *name; int ofs; } libbc_map[] = {\n")
166 local m = 0
167 for _,name in ipairs(defs) do
168 w('{"'); w(name); w('",'); w(m) w('},\n')
169 m = m + #defs[name]
170 end
171 w("{NULL,"); w(m); w("}\n};\n\n")
172 return table.concat(t)
173end
174
175local function write_file(name, data)
176 if name == "-" then
177 assert(io.write(data))
178 assert(io.flush())
179 else
180 local fp = io.open(name)
181 if fp then
182 local old = fp:read("*a")
183 fp:close()
184 if data == old then return end
185 end
186 fp = assert(io.open(name, "w"))
187 assert(fp:write(data))
188 assert(fp:close())
189 end
190end
191
192local outfile = parse_arg(arg)
193local src = read_files(arg)
194local defs = find_defs(src)
195local hdr = gen_header(defs)
196write_file(outfile, hdr)
197
diff --git a/src/jit/bc.lua b/src/jit/bc.lua
index a179d50e..320039ff 100644
--- a/src/jit/bc.lua
+++ b/src/jit/bc.lua
@@ -41,7 +41,7 @@
41 41
42-- Cache some library functions and objects. 42-- Cache some library functions and objects.
43local jit = require("jit") 43local jit = require("jit")
44assert(jit.version_num == 20003, "LuaJIT core/library version mismatch") 44assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
45local jutil = require("jit.util") 45local jutil = require("jit.util")
46local vmdef = require("jit.vmdef") 46local vmdef = require("jit.vmdef")
47local bit = require("bit") 47local bit = require("bit")
@@ -179,13 +179,12 @@ local function bcliston(outfile)
179end 179end
180 180
181-- Public module functions. 181-- Public module functions.
182module(...) 182return {
183 183 line = bcline,
184line = bcline 184 dump = bcdump,
185dump = bcdump 185 targets = bctargets,
186targets = bctargets 186 on = bcliston,
187 187 off = bclistoff,
188on = bcliston 188 start = bcliston -- For -j command line option.
189off = bclistoff 189}
190start = bcliston -- For -j command line option.
191 190
diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua
index 2ba234d0..ef5a1aa9 100644
--- a/src/jit/bcsave.lua
+++ b/src/jit/bcsave.lua
@@ -11,7 +11,7 @@
11------------------------------------------------------------------------------ 11------------------------------------------------------------------------------
12 12
13local jit = require("jit") 13local jit = require("jit")
14assert(jit.version_num == 20003, "LuaJIT core/library version mismatch") 14assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
15local bit = require("bit") 15local bit = require("bit")
16 16
17-- Symbol name prefix for LuaJIT bytecode. 17-- Symbol name prefix for LuaJIT bytecode.
@@ -63,7 +63,7 @@ local map_type = {
63} 63}
64 64
65local map_arch = { 65local map_arch = {
66 x86 = true, x64 = true, arm = true, ppc = true, ppcspe = true, 66 x86 = true, x64 = true, arm = true, ppc = true,
67 mips = true, mipsel = true, 67 mips = true, mipsel = true,
68} 68}
69 69
@@ -202,7 +202,7 @@ typedef struct {
202 local is64, isbe = false, false 202 local is64, isbe = false, false
203 if ctx.arch == "x64" then 203 if ctx.arch == "x64" then
204 is64 = true 204 is64 = true
205 elseif ctx.arch == "ppc" or ctx.arch == "ppcspe" or ctx.arch == "mips" then 205 elseif ctx.arch == "ppc" or ctx.arch == "mips" then
206 isbe = true 206 isbe = true
207 end 207 end
208 208
@@ -237,7 +237,7 @@ typedef struct {
237 hdr.eendian = isbe and 2 or 1 237 hdr.eendian = isbe and 2 or 1
238 hdr.eversion = 1 238 hdr.eversion = 1
239 hdr.type = f16(1) 239 hdr.type = f16(1)
240 hdr.machine = f16(({ x86=3, x64=62, arm=40, ppc=20, ppcspe=20, mips=8, mipsel=8 })[ctx.arch]) 240 hdr.machine = f16(({ x86=3, x64=62, arm=40, ppc=20, mips=8, mipsel=8 })[ctx.arch])
241 if ctx.arch == "mips" or ctx.arch == "mipsel" then 241 if ctx.arch == "mips" or ctx.arch == "mipsel" then
242 hdr.flags = 0x50001006 242 hdr.flags = 0x50001006
243 end 243 end
@@ -653,7 +653,7 @@ end
653------------------------------------------------------------------------------ 653------------------------------------------------------------------------------
654 654
655-- Public module functions. 655-- Public module functions.
656module(...) 656return {
657 657 start = docmd -- Process -b command line option.
658start = docmd -- Process -b command line option. 658}
659 659
diff --git a/src/jit/dis_arm.lua b/src/jit/dis_arm.lua
index 59be715a..dfcbeeec 100644
--- a/src/jit/dis_arm.lua
+++ b/src/jit/dis_arm.lua
@@ -658,7 +658,7 @@ local function disass_block(ctx, ofs, len)
658end 658end
659 659
660-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). 660-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
661local function create_(code, addr, out) 661local function create(code, addr, out)
662 local ctx = {} 662 local ctx = {}
663 ctx.code = code 663 ctx.code = code
664 ctx.addr = addr or 0 664 ctx.addr = addr or 0
@@ -670,20 +670,20 @@ local function create_(code, addr, out)
670end 670end
671 671
672-- Simple API: disassemble code (a string) at address and output via out. 672-- Simple API: disassemble code (a string) at address and output via out.
673local function disass_(code, addr, out) 673local function disass(code, addr, out)
674 create_(code, addr, out):disass() 674 create(code, addr, out):disass()
675end 675end
676 676
677-- Return register name for RID. 677-- Return register name for RID.
678local function regname_(r) 678local function regname(r)
679 if r < 16 then return map_gpr[r] end 679 if r < 16 then return map_gpr[r] end
680 return "d"..(r-16) 680 return "d"..(r-16)
681end 681end
682 682
683-- Public module functions. 683-- Public module functions.
684module(...) 684return {
685 685 create = create,
686create = create_ 686 disass = disass,
687disass = disass_ 687 regname = regname
688regname = regname_ 688}
689 689
diff --git a/src/jit/dis_mips.lua b/src/jit/dis_mips.lua
index acdd2be1..9466f45c 100644
--- a/src/jit/dis_mips.lua
+++ b/src/jit/dis_mips.lua
@@ -384,7 +384,7 @@ local function disass_block(ctx, ofs, len)
384end 384end
385 385
386-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). 386-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
387local function create_(code, addr, out) 387local function create(code, addr, out)
388 local ctx = {} 388 local ctx = {}
389 ctx.code = code 389 ctx.code = code
390 ctx.addr = addr or 0 390 ctx.addr = addr or 0
@@ -396,33 +396,33 @@ local function create_(code, addr, out)
396 return ctx 396 return ctx
397end 397end
398 398
399local function create_el_(code, addr, out) 399local function create_el(code, addr, out)
400 local ctx = create_(code, addr, out) 400 local ctx = create(code, addr, out)
401 ctx.get = get_le 401 ctx.get = get_le
402 return ctx 402 return ctx
403end 403end
404 404
405-- Simple API: disassemble code (a string) at address and output via out. 405-- Simple API: disassemble code (a string) at address and output via out.
406local function disass_(code, addr, out) 406local function disass(code, addr, out)
407 create_(code, addr, out):disass() 407 create(code, addr, out):disass()
408end 408end
409 409
410local function disass_el_(code, addr, out) 410local function disass_el(code, addr, out)
411 create_el_(code, addr, out):disass() 411 create_el(code, addr, out):disass()
412end 412end
413 413
414-- Return register name for RID. 414-- Return register name for RID.
415local function regname_(r) 415local function regname(r)
416 if r < 32 then return map_gpr[r] end 416 if r < 32 then return map_gpr[r] end
417 return "f"..(r-32) 417 return "f"..(r-32)
418end 418end
419 419
420-- Public module functions. 420-- Public module functions.
421module(...) 421return {
422 422 create = create,
423create = create_ 423 create_el = create_el,
424create_el = create_el_ 424 disass = disass,
425disass = disass_ 425 disass_el = disass_el,
426disass_el = disass_el_ 426 regname = regname
427regname = regname_ 427}
428 428
diff --git a/src/jit/dis_mipsel.lua b/src/jit/dis_mipsel.lua
index dd9d26ae..f06ffe85 100644
--- a/src/jit/dis_mipsel.lua
+++ b/src/jit/dis_mipsel.lua
@@ -8,13 +8,10 @@
8-- MIPS disassembler module. All the interesting stuff is there. 8-- MIPS disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------ 9------------------------------------------------------------------------------
10 10
11local require = require 11local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
12 12return {
13module(...) 13 create = dis_mips.create_el,
14 14 disass = dis_mips.disass_el,
15local dis_mips = require(_PACKAGE.."dis_mips") 15 regname = dis_mips.regname
16 16}
17create = dis_mips.create_el
18disass = dis_mips.disass_el
19regname = dis_mips.regname
20 17
diff --git a/src/jit/dis_ppc.lua b/src/jit/dis_ppc.lua
index d05c4311..e077d7ac 100644
--- a/src/jit/dis_ppc.lua
+++ b/src/jit/dis_ppc.lua
@@ -560,7 +560,7 @@ local function disass_block(ctx, ofs, len)
560end 560end
561 561
562-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). 562-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
563local function create_(code, addr, out) 563local function create(code, addr, out)
564 local ctx = {} 564 local ctx = {}
565 ctx.code = code 565 ctx.code = code
566 ctx.addr = addr or 0 566 ctx.addr = addr or 0
@@ -572,20 +572,20 @@ local function create_(code, addr, out)
572end 572end
573 573
574-- Simple API: disassemble code (a string) at address and output via out. 574-- Simple API: disassemble code (a string) at address and output via out.
575local function disass_(code, addr, out) 575local function disass(code, addr, out)
576 create_(code, addr, out):disass() 576 create(code, addr, out):disass()
577end 577end
578 578
579-- Return register name for RID. 579-- Return register name for RID.
580local function regname_(r) 580local function regname(r)
581 if r < 32 then return map_gpr[r] end 581 if r < 32 then return map_gpr[r] end
582 return "f"..(r-32) 582 return "f"..(r-32)
583end 583end
584 584
585-- Public module functions. 585-- Public module functions.
586module(...) 586return {
587 587 create = create,
588create = create_ 588 disass = disass,
589disass = disass_ 589 regname = regname
590regname = regname_ 590}
591 591
diff --git a/src/jit/dis_x64.lua b/src/jit/dis_x64.lua
index a80981bd..15d55243 100644
--- a/src/jit/dis_x64.lua
+++ b/src/jit/dis_x64.lua
@@ -8,13 +8,10 @@
8-- x86/x64 disassembler module. All the interesting stuff is there. 8-- x86/x64 disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------ 9------------------------------------------------------------------------------
10 10
11local require = require 11local dis_x86 = require((string.match(..., ".*%.") or "").."dis_x86")
12 12return {
13module(...) 13 create = dis_x86.create64,
14 14 disass = dis_x86.disass64,
15local dis_x86 = require(_PACKAGE.."dis_x86") 15 regname = dis_x86.regname64
16 16}
17create = dis_x86.create64
18disass = dis_x86.disass64
19regname = dis_x86.regname64
20 17
diff --git a/src/jit/dis_x86.lua b/src/jit/dis_x86.lua
index 078d6094..6bc38066 100644
--- a/src/jit/dis_x86.lua
+++ b/src/jit/dis_x86.lua
@@ -28,6 +28,8 @@ local type = type
28local sub, byte, format = string.sub, string.byte, string.format 28local sub, byte, format = string.sub, string.byte, string.format
29local match, gmatch, gsub = string.match, string.gmatch, string.gsub 29local match, gmatch, gsub = string.match, string.gmatch, string.gsub
30local lower, rep = string.lower, string.rep 30local lower, rep = string.lower, string.rep
31local bit = require("bit")
32local tohex = bit.tohex
31 33
32-- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on. 34-- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on.
33local map_opc1_32 = { 35local map_opc1_32 = {
@@ -532,7 +534,7 @@ local function putpat(ctx, name, pat)
532 local lo = imm % 0x1000000 534 local lo = imm % 0x1000000
533 x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo) 535 x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo)
534 else 536 else
535 x = format("0x%08x", imm) 537 x = "0x"..tohex(imm)
536 end 538 end
537 elseif p == "R" then 539 elseif p == "R" then
538 local r = byte(code, pos-1, pos-1)%8 540 local r = byte(code, pos-1, pos-1)%8
@@ -782,7 +784,7 @@ local function disass_block(ctx, ofs, len)
782end 784end
783 785
784-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). 786-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
785local function create_(code, addr, out) 787local function create(code, addr, out)
786 local ctx = {} 788 local ctx = {}
787 ctx.code = code 789 ctx.code = code
788 ctx.addr = (addr or 0) - 1 790 ctx.addr = (addr or 0) - 1
@@ -796,8 +798,8 @@ local function create_(code, addr, out)
796 return ctx 798 return ctx
797end 799end
798 800
799local function create64_(code, addr, out) 801local function create64(code, addr, out)
800 local ctx = create_(code, addr, out) 802 local ctx = create(code, addr, out)
801 ctx.x64 = true 803 ctx.x64 = true
802 ctx.map1 = map_opc1_64 804 ctx.map1 = map_opc1_64
803 ctx.aregs = map_regs.Q 805 ctx.aregs = map_regs.Q
@@ -805,32 +807,32 @@ local function create64_(code, addr, out)
805end 807end
806 808
807-- Simple API: disassemble code (a string) at address and output via out. 809-- Simple API: disassemble code (a string) at address and output via out.
808local function disass_(code, addr, out) 810local function disass(code, addr, out)
809 create_(code, addr, out):disass() 811 create(code, addr, out):disass()
810end 812end
811 813
812local function disass64_(code, addr, out) 814local function disass64(code, addr, out)
813 create64_(code, addr, out):disass() 815 create64(code, addr, out):disass()
814end 816end
815 817
816-- Return register name for RID. 818-- Return register name for RID.
817local function regname_(r) 819local function regname(r)
818 if r < 8 then return map_regs.D[r+1] end 820 if r < 8 then return map_regs.D[r+1] end
819 return map_regs.X[r-7] 821 return map_regs.X[r-7]
820end 822end
821 823
822local function regname64_(r) 824local function regname64(r)
823 if r < 16 then return map_regs.Q[r+1] end 825 if r < 16 then return map_regs.Q[r+1] end
824 return map_regs.X[r-15] 826 return map_regs.X[r-15]
825end 827end
826 828
827-- Public module functions. 829-- Public module functions.
828module(...) 830return {
829 831 create = create,
830create = create_ 832 create64 = create64,
831create64 = create64_ 833 disass = disass,
832disass = disass_ 834 disass64 = disass64,
833disass64 = disass64_ 835 regname = regname,
834regname = regname_ 836 regname64 = regname64
835regname64 = regname64_ 837}
836 838
diff --git a/src/jit/dump.lua b/src/jit/dump.lua
index 589543f1..5f858492 100644
--- a/src/jit/dump.lua
+++ b/src/jit/dump.lua
@@ -55,7 +55,7 @@
55 55
56-- Cache some library functions and objects. 56-- Cache some library functions and objects.
57local jit = require("jit") 57local jit = require("jit")
58assert(jit.version_num == 20003, "LuaJIT core/library version mismatch") 58assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
59local jutil = require("jit.util") 59local jutil = require("jit.util")
60local vmdef = require("jit.vmdef") 60local vmdef = require("jit.vmdef")
61local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc 61local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc
@@ -63,7 +63,7 @@ local traceinfo, traceir, tracek = jutil.traceinfo, jutil.traceir, jutil.tracek
63local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap 63local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap
64local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr 64local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr
65local bit = require("bit") 65local bit = require("bit")
66local band, shl, shr = bit.band, bit.lshift, bit.rshift 66local band, shl, shr, tohex = bit.band, bit.lshift, bit.rshift, bit.tohex
67local sub, gsub, format = string.sub, string.gsub, string.format 67local sub, gsub, format = string.sub, string.gsub, string.format
68local byte, char, rep = string.byte, string.char, string.rep 68local byte, char, rep = string.byte, string.char, string.rep
69local type, tostring = type, tostring 69local type, tostring = type, tostring
@@ -91,6 +91,7 @@ local function fillsymtab_tr(tr, nexit)
91 end 91 end
92 for i=0,nexit-1 do 92 for i=0,nexit-1 do
93 local addr = traceexitstub(tr, i) 93 local addr = traceexitstub(tr, i)
94 if addr < 0 then addr = addr + 2^32 end
94 t[addr] = tostring(i) 95 t[addr] = tostring(i)
95 end 96 end
96 local addr = traceexitstub(tr, nexit) 97 local addr = traceexitstub(tr, nexit)
@@ -104,7 +105,10 @@ local function fillsymtab(tr, nexit)
104 local ircall = vmdef.ircall 105 local ircall = vmdef.ircall
105 for i=0,#ircall do 106 for i=0,#ircall do
106 local addr = ircalladdr(i) 107 local addr = ircalladdr(i)
107 if addr ~= 0 then t[addr] = ircall[i] end 108 if addr ~= 0 then
109 if addr < 0 then addr = addr + 2^32 end
110 t[addr] = ircall[i]
111 end
108 end 112 end
109 end 113 end
110 if nexitsym == 1000000 then -- Per-trace exit stubs. 114 if nexitsym == 1000000 then -- Per-trace exit stubs.
@@ -118,6 +122,7 @@ local function fillsymtab(tr, nexit)
118 nexit = 1000000 122 nexit = 1000000
119 break 123 break
120 end 124 end
125 if addr < 0 then addr = addr + 2^32 end
121 t[addr] = tostring(i) 126 t[addr] = tostring(i)
122 end 127 end
123 nexitsym = nexit 128 nexitsym = nexit
@@ -136,6 +141,7 @@ local function dump_mcode(tr)
136 local mcode, addr, loop = tracemc(tr) 141 local mcode, addr, loop = tracemc(tr)
137 if not mcode then return end 142 if not mcode then return end
138 if not disass then disass = require("jit.dis_"..jit.arch) end 143 if not disass then disass = require("jit.dis_"..jit.arch) end
144 if addr < 0 then addr = addr + 2^32 end
139 out:write("---- TRACE ", tr, " mcode ", #mcode, "\n") 145 out:write("---- TRACE ", tr, " mcode ", #mcode, "\n")
140 local ctx = disass.create(mcode, addr, dumpwrite) 146 local ctx = disass.create(mcode, addr, dumpwrite)
141 ctx.hexdump = 0 147 ctx.hexdump = 0
@@ -270,8 +276,7 @@ local litname = {
270 ["CONV "] = setmetatable({}, { __index = function(t, mode) 276 ["CONV "] = setmetatable({}, { __index = function(t, mode)
271 local s = irtype[band(mode, 31)] 277 local s = irtype[band(mode, 31)]
272 s = irtype[band(shr(mode, 5), 31)].."."..s 278 s = irtype[band(shr(mode, 5), 31)].."."..s
273 if band(mode, 0x400) ~= 0 then s = s.." trunc" 279 if band(mode, 0x800) ~= 0 then s = s.." sext" end
274 elseif band(mode, 0x800) ~= 0 then s = s.." sext" end
275 local c = shr(mode, 14) 280 local c = shr(mode, 14)
276 if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end 281 if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end
277 t[mode] = s 282 t[mode] = s
@@ -280,6 +285,8 @@ local litname = {
280 ["FLOAD "] = vmdef.irfield, 285 ["FLOAD "] = vmdef.irfield,
281 ["FREF "] = vmdef.irfield, 286 ["FREF "] = vmdef.irfield,
282 ["FPMATH"] = vmdef.irfpm, 287 ["FPMATH"] = vmdef.irfpm,
288 ["BUFHDR"] = { [0] = "RESET", "APPEND" },
289 ["TOSTR "] = { [0] = "INT", "NUM", "CHAR" },
283} 290}
284 291
285local function ctlsub(c) 292local function ctlsub(c)
@@ -607,7 +614,7 @@ local function dump_texit(tr, ex, ngpr, nfpr, ...)
607 end 614 end
608 else 615 else
609 for i=1,ngpr do 616 for i=1,ngpr do
610 out:write(format(" %08x", regs[i])) 617 out:write(" ", tohex(regs[i]))
611 if i % 8 == 0 then out:write("\n") end 618 if i % 8 == 0 then out:write("\n") end
612 end 619 end
613 end 620 end
@@ -691,9 +698,9 @@ local function dumpon(opt, outfile)
691end 698end
692 699
693-- Public module functions. 700-- Public module functions.
694module(...) 701return {
695 702 on = dumpon,
696on = dumpon 703 off = dumpoff,
697off = dumpoff 704 start = dumpon -- For -j command line option.
698start = dumpon -- For -j command line option. 705}
699 706
diff --git a/src/jit/p.lua b/src/jit/p.lua
new file mode 100644
index 00000000..97d4ccdf
--- /dev/null
+++ b/src/jit/p.lua
@@ -0,0 +1,310 @@
1----------------------------------------------------------------------------
2-- LuaJIT profiler.
3--
4-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7--
8-- This module is a simple command line interface to the built-in
9-- low-overhead profiler of LuaJIT.
10--
11-- The lower-level API of the profiler is accessible via the "jit.profile"
12-- module or the luaJIT_profile_* C API.
13--
14-- Example usage:
15--
16-- luajit -jp myapp.lua
17-- luajit -jp=s myapp.lua
18-- luajit -jp=-s myapp.lua
19-- luajit -jp=vl myapp.lua
20-- luajit -jp=G,profile.txt myapp.lua
21--
22-- The following dump features are available:
23--
24-- f Stack dump: function name, otherwise module:line. Default mode.
25-- F Stack dump: ditto, but always prepend module.
26-- l Stack dump: module:line.
27-- <number> stack dump depth (callee < caller). Default: 1.
28-- -<number> Inverse stack dump depth (caller > callee).
29-- s Split stack dump after first stack level. Implies abs(depth) >= 2.
30-- p Show full path for module names.
31-- v Show VM states. Can be combined with stack dumps, e.g. vf or fv.
32-- z Show zones. Can be combined with stack dumps, e.g. zf or fz.
33-- r Show raw sample counts. Default: show percentages.
34-- a Annotate excerpts from source code files.
35-- A Annotate complete source code files.
36-- G Produce raw output suitable for graphical tools (e.g. flame graphs).
37-- m<number> Minimum sample percentage to be shown. Default: 3.
38-- i<number> Sampling interval in milliseconds. Default: 10.
39--
40----------------------------------------------------------------------------
41
42-- Cache some library functions and objects.
43local jit = require("jit")
44assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
45local profile = require("jit.profile")
46local vmdef = require("jit.vmdef")
47local math = math
48local pairs, ipairs, tonumber, floor = pairs, ipairs, tonumber, math.floor
49local sort, format = table.sort, string.format
50local stdout = io.stdout
51local zone -- Load jit.zone module on demand.
52
53-- Output file handle.
54local out
55
56------------------------------------------------------------------------------
57
58local prof_ud
59local prof_states, prof_split, prof_min, prof_raw, prof_fmt, prof_depth
60local prof_ann, prof_count1, prof_count2, prof_samples
61
62local map_vmmode = {
63 N = "Compiled",
64 I = "Interpreted",
65 C = "C code",
66 G = "Garbage Collector",
67 J = "JIT Compiler",
68}
69
70-- Profiler callback.
71local function prof_cb(th, samples, vmmode)
72 prof_samples = prof_samples + samples
73 local key_stack, key_stack2, key_state
74 -- Collect keys for sample.
75 if prof_states then
76 if prof_states == "v" then
77 key_state = map_vmmode[vmmode] or vmmode
78 else
79 key_state = zone:get() or "(none)"
80 end
81 end
82 if prof_fmt then
83 key_stack = profile.dumpstack(th, prof_fmt, prof_depth)
84 key_stack = key_stack:gsub("%[builtin#(%d+)%]", function(x)
85 return vmdef.ffnames[tonumber(x)]
86 end)
87 if prof_split == 2 then
88 local k1, k2 = key_stack:match("(.-) [<>] (.*)")
89 if k2 then key_stack, key_stack2 = k1, k2 end
90 elseif prof_split == 3 then
91 key_stack2 = profile.dumpstack(th, "l", 1)
92 end
93 end
94 -- Order keys.
95 local k1, k2
96 if prof_split == 1 then
97 if key_state then
98 k1 = key_state
99 if key_stack then k2 = key_stack end
100 end
101 elseif key_stack then
102 k1 = key_stack
103 if key_stack2 then k2 = key_stack2 elseif key_state then k2 = key_state end
104 end
105 -- Coalesce samples in one or two levels.
106 if k1 then
107 local t1 = prof_count1
108 t1[k1] = (t1[k1] or 0) + samples
109 if k2 then
110 local t2 = prof_count2
111 local t3 = t2[k1]
112 if not t3 then t3 = {}; t2[k1] = t3 end
113 t3[k2] = (t3[k2] or 0) + samples
114 end
115 end
116end
117
118------------------------------------------------------------------------------
119
120-- Show top N list.
121local function prof_top(count1, count2, samples, indent)
122 local t, n = {}, 0
123 for k, v in pairs(count1) do
124 n = n + 1
125 t[n] = k
126 end
127 sort(t, function(a, b) return count1[a] > count1[b] end)
128 for i=1,n do
129 local k = t[i]
130 local v = count1[k]
131 local pct = floor(v*100/samples + 0.5)
132 if pct < prof_min then break end
133 if not prof_raw then
134 out:write(format("%s%2d%% %s\n", indent, pct, k))
135 elseif prof_raw == "r" then
136 out:write(format("%s%5d %s\n", indent, v, k))
137 else
138 out:write(format("%s %d\n", k, v))
139 end
140 if count2 then
141 local r = count2[k]
142 if r then
143 prof_top(r, nil, v, (prof_split == 3 or prof_split == 1) and " -- " or
144 (prof_depth < 0 and " -> " or " <- "))
145 end
146 end
147 end
148end
149
150-- Annotate source code
151local function prof_annotate(count1, samples)
152 local files = {}
153 local ms = 0
154 for k, v in pairs(count1) do
155 local pct = floor(v*100/samples + 0.5)
156 ms = math.max(ms, v)
157 if pct >= prof_min then
158 local file, line = k:match("^(.*):(%d+)$")
159 local fl = files[file]
160 if not fl then fl = {}; files[file] = fl; files[#files+1] = file end
161 line = tonumber(line)
162 fl[line] = prof_raw and v or pct
163 end
164 end
165 sort(files)
166 local fmtv, fmtn = " %3d%% | %s\n", " | %s\n"
167 if prof_raw then
168 local n = math.max(5, math.ceil(math.log10(ms)))
169 fmtv = "%"..n.."d | %s\n"
170 fmtn = (" "):rep(n).." | %s\n"
171 end
172 local ann = prof_ann
173 for _, file in ipairs(files) do
174 local f0 = file:byte()
175 if f0 == 40 or f0 == 91 then
176 out:write(format("\n====== %s ======\n[Cannot annotate non-file]\n", file))
177 break
178 end
179 local fp, err = io.open(file)
180 if not fp then
181 out:write(format("====== ERROR: %s: %s\n", file, err))
182 break
183 end
184 out:write(format("\n====== %s ======\n", file))
185 local fl = files[file]
186 local n, show = 1, false
187 if ann ~= 0 then
188 for i=1,ann do
189 if fl[i] then show = true; out:write("@@ 1 @@\n"); break end
190 end
191 end
192 for line in fp:lines() do
193 if line:byte() == 27 then
194 out:write("[Cannot annotate bytecode file]\n")
195 break
196 end
197 local v = fl[n]
198 if ann ~= 0 then
199 local v2 = fl[n+ann]
200 if show then
201 if v2 then show = n+ann elseif v then show = n
202 elseif show+ann < n then show = false end
203 elseif v2 then
204 show = n+ann
205 out:write(format("@@ %d @@\n", n))
206 end
207 if not show then goto next end
208 end
209 if v then
210 out:write(format(fmtv, v, line))
211 else
212 out:write(format(fmtn, line))
213 end
214 ::next::
215 n = n + 1
216 end
217 fp:close()
218 end
219end
220
221------------------------------------------------------------------------------
222
223-- Finish profiling and dump result.
224local function prof_finish()
225 if prof_ud then
226 profile.stop()
227 local samples = prof_samples
228 if samples == 0 then
229 if prof_raw ~= true then out:write("[No samples collected]\n") end
230 return
231 end
232 if prof_ann then
233 prof_annotate(prof_count1, samples)
234 else
235 prof_top(prof_count1, prof_count2, samples, "")
236 end
237 prof_count1 = nil
238 prof_count2 = nil
239 prof_ud = nil
240 end
241end
242
243-- Start profiling.
244local function prof_start(mode)
245 local interval = ""
246 mode = mode:gsub("i%d*", function(s) interval = s; return "" end)
247 prof_min = 3
248 mode = mode:gsub("m(%d+)", function(s) prof_min = tonumber(s); return "" end)
249 prof_depth = 1
250 mode = mode:gsub("%-?%d+", function(s) prof_depth = tonumber(s); return "" end)
251 local m = {}
252 for c in mode:gmatch(".") do m[c] = c end
253 prof_states = m.z or m.v
254 if prof_states == "z" then zone = require("jit.zone") end
255 local scope = m.l or m.f or m.F or (prof_states and "" or "f")
256 local flags = (m.p or "")
257 prof_raw = m.r
258 if m.s then
259 prof_split = 2
260 if prof_depth == -1 or m["-"] then prof_depth = -2
261 elseif prof_depth == 1 then prof_depth = 2 end
262 elseif mode:find("[fF].*l") then
263 scope = "l"
264 prof_split = 3
265 else
266 prof_split = (scope == "" or mode:find("[zv].*[lfF]")) and 1 or 0
267 end
268 prof_ann = m.A and 0 or (m.a and 3)
269 if prof_ann then
270 scope = "l"
271 prof_fmt = "pl"
272 prof_split = 0
273 prof_depth = 1
274 elseif m.G and scope ~= "" then
275 prof_fmt = flags..scope.."Z;"
276 prof_depth = -100
277 prof_raw = true
278 prof_min = 0
279 elseif scope == "" then
280 prof_fmt = false
281 else
282 local sc = prof_split == 3 and m.f or m.F or scope
283 prof_fmt = flags..sc..(prof_depth >= 0 and "Z < " or "Z > ")
284 end
285 prof_count1 = {}
286 prof_count2 = {}
287 prof_samples = 0
288 profile.start(scope:lower()..interval, prof_cb)
289 prof_ud = newproxy(true)
290 getmetatable(prof_ud).__gc = prof_finish
291end
292
293------------------------------------------------------------------------------
294
295local function start(mode, outfile)
296 if not outfile then outfile = os.getenv("LUAJIT_PROFILEFILE") end
297 if outfile then
298 out = outfile == "-" and stdout or assert(io.open(outfile, "w"))
299 else
300 out = stdout
301 end
302 prof_start(mode or "f")
303end
304
305-- Public module functions.
306return {
307 start = start, -- For -j command line option.
308 stop = prof_finish
309}
310
diff --git a/src/jit/v.lua b/src/jit/v.lua
index c622443d..157c34bc 100644
--- a/src/jit/v.lua
+++ b/src/jit/v.lua
@@ -59,7 +59,7 @@
59 59
60-- Cache some library functions and objects. 60-- Cache some library functions and objects.
61local jit = require("jit") 61local jit = require("jit")
62assert(jit.version_num == 20003, "LuaJIT core/library version mismatch") 62assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
63local jutil = require("jit.util") 63local jutil = require("jit.util")
64local vmdef = require("jit.vmdef") 64local vmdef = require("jit.vmdef")
65local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo 65local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo
@@ -116,6 +116,9 @@ local function dump_trace(what, tr, func, pc, otr, oex)
116 if ltype == "interpreter" then 116 if ltype == "interpreter" then
117 out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n", 117 out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n",
118 tr, startex, startloc)) 118 tr, startex, startloc))
119 elseif ltype == "stitch" then
120 out:write(format("[TRACE %3s %s%s %s %s]\n",
121 tr, startex, startloc, ltype, fmtfunc(func, pc)))
119 elseif link == tr or link == 0 then 122 elseif link == tr or link == 0 then
120 out:write(format("[TRACE %3s %s%s %s]\n", 123 out:write(format("[TRACE %3s %s%s %s]\n",
121 tr, startex, startloc, ltype)) 124 tr, startex, startloc, ltype))
@@ -159,9 +162,9 @@ local function dumpon(outfile)
159end 162end
160 163
161-- Public module functions. 164-- Public module functions.
162module(...) 165return {
163 166 on = dumpon,
164on = dumpon 167 off = dumpoff,
165off = dumpoff 168 start = dumpon -- For -j command line option.
166start = dumpon -- For -j command line option. 169}
167 170
diff --git a/src/jit/zone.lua b/src/jit/zone.lua
new file mode 100644
index 00000000..69f0f169
--- /dev/null
+++ b/src/jit/zone.lua
@@ -0,0 +1,45 @@
1----------------------------------------------------------------------------
2-- LuaJIT profiler zones.
3--
4-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7--
8-- This module implements a simple hierarchical zone model.
9--
10-- Example usage:
11--
12-- local zone = require("jit.zone")
13-- zone("AI")
14-- ...
15-- zone("A*")
16-- ...
17-- print(zone:get()) --> "A*"
18-- ...
19-- zone()
20-- ...
21-- print(zone:get()) --> "AI"
22-- ...
23-- zone()
24--
25----------------------------------------------------------------------------
26
27local remove = table.remove
28
29return setmetatable({
30 flush = function(t)
31 for i=#t,1,-1 do t[i] = nil end
32 end,
33 get = function(t)
34 return t[#t]
35 end
36}, {
37 __call = function(t, zone)
38 if zone then
39 t[#t+1] = zone
40 else
41 return (assert(remove(t), "empty zone stack"))
42 end
43 end
44})
45
diff --git a/src/lib_base.c b/src/lib_base.c
index 17b9525d..35ccdbc7 100644
--- a/src/lib_base.c
+++ b/src/lib_base.c
@@ -32,6 +32,7 @@
32#include "lj_dispatch.h" 32#include "lj_dispatch.h"
33#include "lj_char.h" 33#include "lj_char.h"
34#include "lj_strscan.h" 34#include "lj_strscan.h"
35#include "lj_strfmt.h"
35#include "lj_lib.h" 36#include "lj_lib.h"
36 37
37/* -- Base library: checks ------------------------------------------------ */ 38/* -- Base library: checks ------------------------------------------------ */
@@ -86,10 +87,11 @@ static int ffh_pairs(lua_State *L, MMS mm)
86 cTValue *mo = lj_meta_lookup(L, o, mm); 87 cTValue *mo = lj_meta_lookup(L, o, mm);
87 if ((LJ_52 || tviscdata(o)) && !tvisnil(mo)) { 88 if ((LJ_52 || tviscdata(o)) && !tvisnil(mo)) {
88 L->top = o+1; /* Only keep one argument. */ 89 L->top = o+1; /* Only keep one argument. */
89 copyTV(L, L->base-1, mo); /* Replace callable. */ 90 copyTV(L, L->base-1-LJ_FR2, mo); /* Replace callable. */
90 return FFH_TAILCALL; 91 return FFH_TAILCALL;
91 } else { 92 } else {
92 if (!tvistab(o)) lj_err_argt(L, 1, LUA_TTABLE); 93 if (!tvistab(o)) lj_err_argt(L, 1, LUA_TTABLE);
94 if (LJ_FR2) { copyTV(L, o-1, o); o--; }
93 setfuncV(L, o-1, funcV(lj_lib_upvalue(L, 1))); 95 setfuncV(L, o-1, funcV(lj_lib_upvalue(L, 1)));
94 if (mm == MM_pairs) setnilV(o+1); else setintV(o+1, 0); 96 if (mm == MM_pairs) setnilV(o+1); else setintV(o+1, 0);
95 return FFH_RES(3); 97 return FFH_RES(3);
@@ -100,7 +102,7 @@ static int ffh_pairs(lua_State *L, MMS mm)
100#endif 102#endif
101 103
102LJLIB_PUSH(lastcl) 104LJLIB_PUSH(lastcl)
103LJLIB_ASM(pairs) 105LJLIB_ASM(pairs) LJLIB_REC(xpairs 0)
104{ 106{
105 return ffh_pairs(L, MM_pairs); 107 return ffh_pairs(L, MM_pairs);
106} 108}
@@ -113,7 +115,7 @@ LJLIB_NOREGUV LJLIB_ASM(ipairs_aux) LJLIB_REC(.)
113} 115}
114 116
115LJLIB_PUSH(lastcl) 117LJLIB_PUSH(lastcl)
116LJLIB_ASM(ipairs) LJLIB_REC(.) 118LJLIB_ASM(ipairs) LJLIB_REC(xpairs 1)
117{ 119{
118 return ffh_pairs(L, MM_ipairs); 120 return ffh_pairs(L, MM_ipairs);
119} 121}
@@ -131,11 +133,11 @@ LJLIB_ASM(setmetatable) LJLIB_REC(.)
131 lj_err_caller(L, LJ_ERR_PROTMT); 133 lj_err_caller(L, LJ_ERR_PROTMT);
132 setgcref(t->metatable, obj2gco(mt)); 134 setgcref(t->metatable, obj2gco(mt));
133 if (mt) { lj_gc_objbarriert(L, t, mt); } 135 if (mt) { lj_gc_objbarriert(L, t, mt); }
134 settabV(L, L->base-1, t); 136 settabV(L, L->base-1-LJ_FR2, t);
135 return FFH_RES(1); 137 return FFH_RES(1);
136} 138}
137 139
138LJLIB_CF(getfenv) 140LJLIB_CF(getfenv) LJLIB_REC(.)
139{ 141{
140 GCfunc *fn; 142 GCfunc *fn;
141 cTValue *o = L->base; 143 cTValue *o = L->base;
@@ -144,6 +146,7 @@ LJLIB_CF(getfenv)
144 o = lj_debug_frame(L, level, &level); 146 o = lj_debug_frame(L, level, &level);
145 if (o == NULL) 147 if (o == NULL)
146 lj_err_arg(L, 1, LJ_ERR_INVLVL); 148 lj_err_arg(L, 1, LJ_ERR_INVLVL);
149 if (LJ_FR2) o--;
147 } 150 }
148 fn = &gcval(o)->fn; 151 fn = &gcval(o)->fn;
149 settabV(L, L->top++, isluafunc(fn) ? tabref(fn->l.env) : tabref(L->env)); 152 settabV(L, L->top++, isluafunc(fn) ? tabref(fn->l.env) : tabref(L->env));
@@ -165,6 +168,7 @@ LJLIB_CF(setfenv)
165 o = lj_debug_frame(L, level, &level); 168 o = lj_debug_frame(L, level, &level);
166 if (o == NULL) 169 if (o == NULL)
167 lj_err_arg(L, 1, LJ_ERR_INVLVL); 170 lj_err_arg(L, 1, LJ_ERR_INVLVL);
171 if (LJ_FR2) o--;
168 } 172 }
169 fn = &gcval(o)->fn; 173 fn = &gcval(o)->fn;
170 if (!isluafunc(fn)) 174 if (!isluafunc(fn))
@@ -257,7 +261,7 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
257 if (base == 10) { 261 if (base == 10) {
258 TValue *o = lj_lib_checkany(L, 1); 262 TValue *o = lj_lib_checkany(L, 1);
259 if (lj_strscan_numberobj(o)) { 263 if (lj_strscan_numberobj(o)) {
260 copyTV(L, L->base-1, o); 264 copyTV(L, L->base-1-LJ_FR2, o);
261 return FFH_RES(1); 265 return FFH_RES(1);
262 } 266 }
263#if LJ_HASFFI 267#if LJ_HASFFI
@@ -270,11 +274,11 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
270 ct->size <= 4 && !(ct->size == 4 && (ct->info & CTF_UNSIGNED))) { 274 ct->size <= 4 && !(ct->size == 4 && (ct->info & CTF_UNSIGNED))) {
271 int32_t i; 275 int32_t i;
272 lj_cconv_ct_tv(cts, ctype_get(cts, CTID_INT32), (uint8_t *)&i, o, 0); 276 lj_cconv_ct_tv(cts, ctype_get(cts, CTID_INT32), (uint8_t *)&i, o, 0);
273 setintV(L->base-1, i); 277 setintV(L->base-1-LJ_FR2, i);
274 return FFH_RES(1); 278 return FFH_RES(1);
275 } 279 }
276 lj_cconv_ct_tv(cts, ctype_get(cts, CTID_DOUBLE), 280 lj_cconv_ct_tv(cts, ctype_get(cts, CTID_DOUBLE),
277 (uint8_t *)&(L->base-1)->n, o, 0); 281 (uint8_t *)&(L->base-1-LJ_FR2)->n, o, 0);
278 return FFH_RES(1); 282 return FFH_RES(1);
279 } 283 }
280 } 284 }
@@ -290,45 +294,29 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
290 while (lj_char_isspace((unsigned char)(*ep))) ep++; 294 while (lj_char_isspace((unsigned char)(*ep))) ep++;
291 if (*ep == '\0') { 295 if (*ep == '\0') {
292 if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u)) 296 if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u))
293 setintV(L->base-1, (int32_t)ul); 297 setintV(L->base-1-LJ_FR2, (int32_t)ul);
294 else 298 else
295 setnumV(L->base-1, (lua_Number)ul); 299 setnumV(L->base-1-LJ_FR2, (lua_Number)ul);
296 return FFH_RES(1); 300 return FFH_RES(1);
297 } 301 }
298 } 302 }
299 } 303 }
300 setnilV(L->base-1); 304 setnilV(L->base-1-LJ_FR2);
301 return FFH_RES(1); 305 return FFH_RES(1);
302} 306}
303 307
304LJLIB_PUSH("nil")
305LJLIB_PUSH("false")
306LJLIB_PUSH("true")
307LJLIB_ASM(tostring) LJLIB_REC(.) 308LJLIB_ASM(tostring) LJLIB_REC(.)
308{ 309{
309 TValue *o = lj_lib_checkany(L, 1); 310 TValue *o = lj_lib_checkany(L, 1);
310 cTValue *mo; 311 cTValue *mo;
311 L->top = o+1; /* Only keep one argument. */ 312 L->top = o+1; /* Only keep one argument. */
312 if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { 313 if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
313 copyTV(L, L->base-1, mo); /* Replace callable. */ 314 copyTV(L, L->base-1-LJ_FR2, mo); /* Replace callable. */
314 return FFH_TAILCALL; 315 return FFH_TAILCALL;
315 } else {
316 GCstr *s;
317 if (tvisnumber(o)) {
318 s = lj_str_fromnumber(L, o);
319 } else if (tvispri(o)) {
320 s = strV(lj_lib_upvalue(L, -(int32_t)itype(o)));
321 } else {
322 if (tvisfunc(o) && isffunc(funcV(o)))
323 lua_pushfstring(L, "function: builtin#%d", funcV(o)->c.ffid);
324 else
325 lua_pushfstring(L, "%s: %p", lj_typename(o), lua_topointer(L, 1));
326 /* Note: lua_pushfstring calls the GC which may invalidate o. */
327 s = strV(L->top-1);
328 }
329 setstrV(L, L->base-1, s);
330 return FFH_RES(1);
331 } 316 }
317 lj_gc_check(L);
318 setstrV(L, L->base-1-LJ_FR2, lj_strfmt_obj(L, L->base));
319 return FFH_RES(1);
332} 320}
333 321
334/* -- Base library: throw and catch errors -------------------------------- */ 322/* -- Base library: throw and catch errors -------------------------------- */
@@ -506,21 +494,13 @@ LJLIB_CF(print)
506 } 494 }
507 shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring); 495 shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring);
508 for (i = 0; i < nargs; i++) { 496 for (i = 0; i < nargs; i++) {
497 cTValue *o = &L->base[i];
498 char buf[STRFMT_MAXBUF_NUM];
509 const char *str; 499 const char *str;
510 size_t size; 500 size_t size;
511 cTValue *o = &L->base[i]; 501 MSize len;
512 if (shortcut && tvisstr(o)) { 502 if (shortcut && (str = lj_strfmt_wstrnum(buf, o, &len)) != NULL) {
513 str = strVdata(o); 503 size = len;
514 size = strV(o)->len;
515 } else if (shortcut && tvisint(o)) {
516 char buf[LJ_STR_INTBUF];
517 char *p = lj_str_bufint(buf, intV(o));
518 size = (size_t)(buf+LJ_STR_INTBUF-p);
519 str = p;
520 } else if (shortcut && tvisnum(o)) {
521 char buf[LJ_STR_NUMBUF];
522 size = lj_str_bufnum(buf, o);
523 str = buf;
524 } else { 504 } else {
525 copyTV(L, L->top+1, o); 505 copyTV(L, L->top+1, o);
526 copyTV(L, L->top, L->top-1); 506 copyTV(L, L->top, L->top-1);
@@ -558,7 +538,7 @@ LJLIB_CF(coroutine_status)
558 if (co == L) s = "running"; 538 if (co == L) s = "running";
559 else if (co->status == LUA_YIELD) s = "suspended"; 539 else if (co->status == LUA_YIELD) s = "suspended";
560 else if (co->status != 0) s = "dead"; 540 else if (co->status != 0) s = "dead";
561 else if (co->base > tvref(co->stack)+1) s = "normal"; 541 else if (co->base > tvref(co->stack)+1+LJ_FR2) s = "normal";
562 else if (co->top == co->base) s = "dead"; 542 else if (co->top == co->base) s = "dead";
563 else s = "suspended"; 543 else s = "suspended";
564 lua_pushstring(L, s); 544 lua_pushstring(L, s);
@@ -600,8 +580,8 @@ static int ffh_resume(lua_State *L, lua_State *co, int wrap)
600 (co->status == 0 && co->top == co->base)) { 580 (co->status == 0 && co->top == co->base)) {
601 ErrMsg em = co->cframe ? LJ_ERR_CORUN : LJ_ERR_CODEAD; 581 ErrMsg em = co->cframe ? LJ_ERR_CORUN : LJ_ERR_CODEAD;
602 if (wrap) lj_err_caller(L, em); 582 if (wrap) lj_err_caller(L, em);
603 setboolV(L->base-1, 0); 583 setboolV(L->base-1-LJ_FR2, 0);
604 setstrV(L, L->base, lj_err_str(L, em)); 584 setstrV(L, L->base-LJ_FR2, lj_err_str(L, em));
605 return FFH_RES(2); 585 return FFH_RES(2);
606 } 586 }
607 lj_state_growstack(co, (MSize)(L->top - L->base)); 587 lj_state_growstack(co, (MSize)(L->top - L->base));
@@ -642,9 +622,10 @@ static void setpc_wrap_aux(lua_State *L, GCfunc *fn);
642 622
643LJLIB_CF(coroutine_wrap) 623LJLIB_CF(coroutine_wrap)
644{ 624{
625 GCfunc *fn;
645 lj_cf_coroutine_create(L); 626 lj_cf_coroutine_create(L);
646 lj_lib_pushcc(L, lj_ffh_coroutine_wrap_aux, FF_coroutine_wrap_aux, 1); 627 fn = lj_lib_pushcc(L, lj_ffh_coroutine_wrap_aux, FF_coroutine_wrap_aux, 1);
647 setpc_wrap_aux(L, funcV(L->top-1)); 628 setpc_wrap_aux(L, fn);
648 return 1; 629 return 1;
649} 630}
650 631
diff --git a/src/lib_bit.c b/src/lib_bit.c
index 583e04b0..55cb2a84 100644
--- a/src/lib_bit.c
+++ b/src/lib_bit.c
@@ -12,26 +12,99 @@
12 12
13#include "lj_obj.h" 13#include "lj_obj.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_str.h" 15#include "lj_buf.h"
16#include "lj_strscan.h"
17#include "lj_strfmt.h"
18#if LJ_HASFFI
19#include "lj_ctype.h"
20#include "lj_cdata.h"
21#include "lj_cconv.h"
22#include "lj_carith.h"
23#endif
24#include "lj_ff.h"
16#include "lj_lib.h" 25#include "lj_lib.h"
17 26
18/* ------------------------------------------------------------------------ */ 27/* ------------------------------------------------------------------------ */
19 28
20#define LJLIB_MODULE_bit 29#define LJLIB_MODULE_bit
21 30
22LJLIB_ASM(bit_tobit) LJLIB_REC(bit_unary IR_TOBIT) 31#if LJ_HASFFI
32static int bit_result64(lua_State *L, CTypeID id, uint64_t x)
23{ 33{
34 GCcdata *cd = lj_cdata_new_(L, id, 8);
35 *(uint64_t *)cdataptr(cd) = x;
36 setcdataV(L, L->base-1-LJ_FR2, cd);
37 return FFH_RES(1);
38}
39#else
40static int32_t bit_checkbit(lua_State *L, int narg)
41{
42 TValue *o = L->base + narg-1;
43 if (!(o < L->top && lj_strscan_numberobj(o)))
44 lj_err_argt(L, narg, LUA_TNUMBER);
45 if (LJ_LIKELY(tvisint(o))) {
46 return intV(o);
47 } else {
48 int32_t i = lj_num2bit(numV(o));
49 if (LJ_DUALNUM) setintV(o, i);
50 return i;
51 }
52}
53#endif
54
55LJLIB_ASM(bit_tobit) LJLIB_REC(bit_tobit)
56{
57#if LJ_HASFFI
58 CTypeID id = 0;
59 setintV(L->base-1-LJ_FR2, (int32_t)lj_carith_check64(L, 1, &id));
60 return FFH_RES(1);
61#else
62 lj_lib_checknumber(L, 1);
63 return FFH_RETRY;
64#endif
65}
66
67LJLIB_ASM(bit_bnot) LJLIB_REC(bit_unary IR_BNOT)
68{
69#if LJ_HASFFI
70 CTypeID id = 0;
71 uint64_t x = lj_carith_check64(L, 1, &id);
72 return id ? bit_result64(L, id, ~x) : FFH_RETRY;
73#else
24 lj_lib_checknumber(L, 1); 74 lj_lib_checknumber(L, 1);
25 return FFH_RETRY; 75 return FFH_RETRY;
76#endif
77}
78
79LJLIB_ASM(bit_bswap) LJLIB_REC(bit_unary IR_BSWAP)
80{
81#if LJ_HASFFI
82 CTypeID id = 0;
83 uint64_t x = lj_carith_check64(L, 1, &id);
84 return id ? bit_result64(L, id, lj_bswap64(x)) : FFH_RETRY;
85#else
86 lj_lib_checknumber(L, 1);
87 return FFH_RETRY;
88#endif
26} 89}
27LJLIB_ASM_(bit_bnot) LJLIB_REC(bit_unary IR_BNOT)
28LJLIB_ASM_(bit_bswap) LJLIB_REC(bit_unary IR_BSWAP)
29 90
30LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL) 91LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL)
31{ 92{
93#if LJ_HASFFI
94 CTypeID id = 0, id2 = 0;
95 uint64_t x = lj_carith_check64(L, 1, &id);
96 int32_t sh = (int32_t)lj_carith_check64(L, 2, &id2);
97 if (id) {
98 x = lj_carith_shift64(x, sh, curr_func(L)->c.ffid - (int)FF_bit_lshift);
99 return bit_result64(L, id, x);
100 }
101 if (id2) setintV(L->base+1, sh);
102 return FFH_RETRY;
103#else
32 lj_lib_checknumber(L, 1); 104 lj_lib_checknumber(L, 1);
33 lj_lib_checkbit(L, 2); 105 bit_checkbit(L, 2);
34 return FFH_RETRY; 106 return FFH_RETRY;
107#endif
35} 108}
36LJLIB_ASM_(bit_rshift) LJLIB_REC(bit_shift IR_BSHR) 109LJLIB_ASM_(bit_rshift) LJLIB_REC(bit_shift IR_BSHR)
37LJLIB_ASM_(bit_arshift) LJLIB_REC(bit_shift IR_BSAR) 110LJLIB_ASM_(bit_arshift) LJLIB_REC(bit_shift IR_BSAR)
@@ -40,25 +113,58 @@ LJLIB_ASM_(bit_ror) LJLIB_REC(bit_shift IR_BROR)
40 113
41LJLIB_ASM(bit_band) LJLIB_REC(bit_nary IR_BAND) 114LJLIB_ASM(bit_band) LJLIB_REC(bit_nary IR_BAND)
42{ 115{
116#if LJ_HASFFI
117 CTypeID id = 0;
118 TValue *o = L->base, *top = L->top;
119 int i = 0;
120 do { lj_carith_check64(L, ++i, &id); } while (++o < top);
121 if (id) {
122 CTState *cts = ctype_cts(L);
123 CType *ct = ctype_get(cts, id);
124 int op = curr_func(L)->c.ffid - (int)FF_bit_bor;
125 uint64_t x, y = op >= 0 ? 0 : ~(uint64_t)0;
126 o = L->base;
127 do {
128 lj_cconv_ct_tv(cts, ct, (uint8_t *)&x, o, 0);
129 if (op < 0) y &= x; else if (op == 0) y |= x; else y ^= x;
130 } while (++o < top);
131 return bit_result64(L, id, y);
132 }
133 return FFH_RETRY;
134#else
43 int i = 0; 135 int i = 0;
44 do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top); 136 do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top);
45 return FFH_RETRY; 137 return FFH_RETRY;
138#endif
46} 139}
47LJLIB_ASM_(bit_bor) LJLIB_REC(bit_nary IR_BOR) 140LJLIB_ASM_(bit_bor) LJLIB_REC(bit_nary IR_BOR)
48LJLIB_ASM_(bit_bxor) LJLIB_REC(bit_nary IR_BXOR) 141LJLIB_ASM_(bit_bxor) LJLIB_REC(bit_nary IR_BXOR)
49 142
50/* ------------------------------------------------------------------------ */ 143/* ------------------------------------------------------------------------ */
51 144
52LJLIB_CF(bit_tohex) 145LJLIB_CF(bit_tohex) LJLIB_REC(.)
53{ 146{
54 uint32_t b = (uint32_t)lj_lib_checkbit(L, 1); 147#if LJ_HASFFI
55 int32_t i, n = L->base+1 >= L->top ? 8 : lj_lib_checkbit(L, 2); 148 CTypeID id = 0, id2 = 0;
56 const char *hexdigits = "0123456789abcdef"; 149 uint64_t b = lj_carith_check64(L, 1, &id);
57 char buf[8]; 150 int32_t n = L->base+1>=L->top ? (id ? 16 : 8) :
58 if (n < 0) { n = -n; hexdigits = "0123456789ABCDEF"; } 151 (int32_t)lj_carith_check64(L, 2, &id2);
59 if (n > 8) n = 8; 152#else
60 for (i = n; --i >= 0; ) { buf[i] = hexdigits[b & 15]; b >>= 4; } 153 uint32_t b = (uint32_t)bit_checkbit(L, 1);
61 lua_pushlstring(L, buf, (size_t)n); 154 int32_t n = L->base+1>=L->top ? 8 : bit_checkbit(L, 2);
155#endif
156 SBuf *sb = lj_buf_tmp_(L);
157 SFormat sf = (STRFMT_UINT|STRFMT_T_HEX);
158 if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; }
159 sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC);
160#if LJ_HASFFI
161 if (n < 16) b &= ((uint64_t)1 << 4*n)-1;
162#else
163 if (n < 8) b &= (1u << 4*n)-1;
164#endif
165 sb = lj_strfmt_putfxint(sb, sf, b);
166 setstrV(L, L->top-1, lj_buf_str(L, sb));
167 lj_gc_check(L);
62 return 1; 168 return 1;
63} 169}
64 170
diff --git a/src/lib_debug.c b/src/lib_debug.c
index e87c35cf..b610fb4d 100644
--- a/src/lib_debug.c
+++ b/src/lib_debug.c
@@ -29,7 +29,7 @@ LJLIB_CF(debug_getregistry)
29 return 1; 29 return 1;
30} 30}
31 31
32LJLIB_CF(debug_getmetatable) 32LJLIB_CF(debug_getmetatable) LJLIB_REC(.)
33{ 33{
34 lj_lib_checkany(L, 1); 34 lj_lib_checkany(L, 1);
35 if (!lua_getmetatable(L, 1)) { 35 if (!lua_getmetatable(L, 1)) {
diff --git a/src/lib_ffi.c b/src/lib_ffi.c
index f6df39d6..b2b2d37f 100644
--- a/src/lib_ffi.c
+++ b/src/lib_ffi.c
@@ -29,6 +29,7 @@
29#include "lj_ccall.h" 29#include "lj_ccall.h"
30#include "lj_ccallback.h" 30#include "lj_ccallback.h"
31#include "lj_clib.h" 31#include "lj_clib.h"
32#include "lj_strfmt.h"
32#include "lj_ff.h" 33#include "lj_ff.h"
33#include "lj_lib.h" 34#include "lj_lib.h"
34 35
@@ -137,7 +138,7 @@ static int ffi_index_meta(lua_State *L, CTState *cts, CType *ct, MMS mm)
137 } 138 }
138 } 139 }
139 copyTV(L, base, L->top); 140 copyTV(L, base, L->top);
140 tv = L->top-1; 141 tv = L->top-1-LJ_FR2;
141 } 142 }
142 return lj_meta_tailcall(L, tv); 143 return lj_meta_tailcall(L, tv);
143} 144}
@@ -318,7 +319,7 @@ LJLIB_CF(ffi_meta___tostring)
318 } 319 }
319 } 320 }
320 } 321 }
321 lj_str_pushf(L, msg, strdata(lj_ctype_repr(L, id, NULL)), p); 322 lj_strfmt_pushf(L, msg, strdata(lj_ctype_repr(L, id, NULL)), p);
322checkgc: 323checkgc:
323 lj_gc_check(L); 324 lj_gc_check(L);
324 return 1; 325 return 1;
@@ -507,7 +508,7 @@ LJLIB_CF(ffi_new) LJLIB_REC(.)
507 if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN) 508 if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN)
508 cd = lj_cdata_new(cts, id, sz); 509 cd = lj_cdata_new(cts, id, sz);
509 else 510 else
510 cd = lj_cdata_newv(cts, id, sz, ctype_align(info)); 511 cd = lj_cdata_newv(L, id, sz, ctype_align(info));
511 setcdataV(L, o-1, cd); /* Anchor the uninitialized cdata. */ 512 setcdataV(L, o-1, cd); /* Anchor the uninitialized cdata. */
512 lj_cconv_ct_init(cts, ct, sz, cdataptr(cd), 513 lj_cconv_ct_init(cts, ct, sz, cdataptr(cd),
513 o, (MSize)(L->top - o)); /* Initialize cdata. */ 514 o, (MSize)(L->top - o)); /* Initialize cdata. */
@@ -558,6 +559,31 @@ LJLIB_CF(ffi_typeof) LJLIB_REC(.)
558 return 1; 559 return 1;
559} 560}
560 561
562/* Internal and unsupported API. */
563LJLIB_CF(ffi_typeinfo)
564{
565 CTState *cts = ctype_cts(L);
566 CTypeID id = (CTypeID)ffi_checkint(L, 1);
567 if (id > 0 && id < cts->top) {
568 CType *ct = ctype_get(cts, id);
569 GCtab *t;
570 lua_createtable(L, 0, 4); /* Increment hash size if fields are added. */
571 t = tabV(L->top-1);
572 setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "info")), (int32_t)ct->info);
573 if (ct->size != CTSIZE_INVALID)
574 setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "size")), (int32_t)ct->size);
575 if (ct->sib)
576 setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "sib")), (int32_t)ct->sib);
577 if (gcref(ct->name)) {
578 GCstr *s = gco2str(gcref(ct->name));
579 setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "name")), s);
580 }
581 lj_gc_check(L);
582 return 1;
583 }
584 return 0;
585}
586
561LJLIB_CF(ffi_istype) LJLIB_REC(.) 587LJLIB_CF(ffi_istype) LJLIB_REC(.)
562{ 588{
563 CTState *cts = ctype_cts(L); 589 CTState *cts = ctype_cts(L);
@@ -725,6 +751,9 @@ LJLIB_CF(ffi_abi) LJLIB_REC(.)
725 case H_(4ab624a8,4ab624a8): b = 1; break; /* win */ 751 case H_(4ab624a8,4ab624a8): b = 1; break; /* win */
726#endif 752#endif
727 case H_(3af93066,1f001464): b = 1; break; /* le/be */ 753 case H_(3af93066,1f001464): b = 1; break; /* le/be */
754#if LJ_GC64
755 case H_(9e89d2c9,13c83c92): b = 1; break; /* gc64 */
756#endif
728 default: 757 default:
729 break; 758 break;
730 } 759 }
@@ -768,19 +797,11 @@ LJLIB_CF(ffi_gc) LJLIB_REC(.)
768 GCcdata *cd = ffi_checkcdata(L, 1); 797 GCcdata *cd = ffi_checkcdata(L, 1);
769 TValue *fin = lj_lib_checkany(L, 2); 798 TValue *fin = lj_lib_checkany(L, 2);
770 CTState *cts = ctype_cts(L); 799 CTState *cts = ctype_cts(L);
771 GCtab *t = cts->finalizer;
772 CType *ct = ctype_raw(cts, cd->ctypeid); 800 CType *ct = ctype_raw(cts, cd->ctypeid);
773 if (!(ctype_isptr(ct->info) || ctype_isstruct(ct->info) || 801 if (!(ctype_isptr(ct->info) || ctype_isstruct(ct->info) ||
774 ctype_isrefarray(ct->info))) 802 ctype_isrefarray(ct->info)))
775 lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE); 803 lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE);
776 if (gcref(t->metatable)) { /* Update finalizer table, if still enabled. */ 804 lj_cdata_setfin(L, cd, gcval(fin), itype(fin));
777 copyTV(L, lj_tab_set(L, t, L->base), fin);
778 lj_gc_anybarriert(L, t);
779 if (!tvisnil(fin))
780 cd->marked |= LJ_GC_CDATA_FIN;
781 else
782 cd->marked &= ~LJ_GC_CDATA_FIN;
783 }
784 L->top = L->base+1; /* Pass through the cdata object. */ 805 L->top = L->base+1; /* Pass through the cdata object. */
785 return 1; 806 return 1;
786} 807}
diff --git a/src/lib_io.c b/src/lib_io.c
index 037aa28e..468d3275 100644
--- a/src/lib_io.c
+++ b/src/lib_io.c
@@ -19,8 +19,10 @@
19#include "lj_obj.h" 19#include "lj_obj.h"
20#include "lj_gc.h" 20#include "lj_gc.h"
21#include "lj_err.h" 21#include "lj_err.h"
22#include "lj_buf.h"
22#include "lj_str.h" 23#include "lj_str.h"
23#include "lj_state.h" 24#include "lj_state.h"
25#include "lj_strfmt.h"
24#include "lj_ff.h" 26#include "lj_ff.h"
25#include "lj_lib.h" 27#include "lj_lib.h"
26 28
@@ -84,7 +86,7 @@ static IOFileUD *io_file_open(lua_State *L, const char *mode)
84 IOFileUD *iof = io_file_new(L); 86 IOFileUD *iof = io_file_new(L);
85 iof->fp = fopen(fname, mode); 87 iof->fp = fopen(fname, mode);
86 if (iof->fp == NULL) 88 if (iof->fp == NULL)
87 luaL_argerror(L, 1, lj_str_pushf(L, "%s: %s", fname, strerror(errno))); 89 luaL_argerror(L, 1, lj_strfmt_pushf(L, "%s: %s", fname, strerror(errno)));
88 return iof; 90 return iof;
89} 91}
90 92
@@ -145,7 +147,7 @@ static int io_file_readline(lua_State *L, FILE *fp, MSize chop)
145 MSize m = LUAL_BUFFERSIZE, n = 0, ok = 0; 147 MSize m = LUAL_BUFFERSIZE, n = 0, ok = 0;
146 char *buf; 148 char *buf;
147 for (;;) { 149 for (;;) {
148 buf = lj_str_needbuf(L, &G(L)->tmpbuf, m); 150 buf = lj_buf_tmp(L, m);
149 if (fgets(buf+n, m-n, fp) == NULL) break; 151 if (fgets(buf+n, m-n, fp) == NULL) break;
150 n += (MSize)strlen(buf+n); 152 n += (MSize)strlen(buf+n);
151 ok |= n; 153 ok |= n;
@@ -161,7 +163,7 @@ static void io_file_readall(lua_State *L, FILE *fp)
161{ 163{
162 MSize m, n; 164 MSize m, n;
163 for (m = LUAL_BUFFERSIZE, n = 0; ; m += m) { 165 for (m = LUAL_BUFFERSIZE, n = 0; ; m += m) {
164 char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m); 166 char *buf = lj_buf_tmp(L, m);
165 n += (MSize)fread(buf+n, 1, m-n, fp); 167 n += (MSize)fread(buf+n, 1, m-n, fp);
166 if (n != m) { 168 if (n != m) {
167 setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n)); 169 setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
@@ -174,7 +176,7 @@ static void io_file_readall(lua_State *L, FILE *fp)
174static int io_file_readlen(lua_State *L, FILE *fp, MSize m) 176static int io_file_readlen(lua_State *L, FILE *fp, MSize m)
175{ 177{
176 if (m) { 178 if (m) {
177 char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m); 179 char *buf = lj_buf_tmp(L, m);
178 MSize n = (MSize)fread(buf, 1, m, fp); 180 MSize n = (MSize)fread(buf, 1, m, fp);
179 setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n)); 181 setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
180 lj_gc_check(L); 182 lj_gc_check(L);
@@ -230,19 +232,12 @@ static int io_file_write(lua_State *L, FILE *fp, int start)
230 cTValue *tv; 232 cTValue *tv;
231 int status = 1; 233 int status = 1;
232 for (tv = L->base+start; tv < L->top; tv++) { 234 for (tv = L->base+start; tv < L->top; tv++) {
233 if (tvisstr(tv)) { 235 char buf[STRFMT_MAXBUF_NUM];
234 MSize len = strV(tv)->len; 236 MSize len;
235 status = status && (fwrite(strVdata(tv), 1, len, fp) == len); 237 const char *p = lj_strfmt_wstrnum(buf, tv, &len);
236 } else if (tvisint(tv)) { 238 if (!p)
237 char buf[LJ_STR_INTBUF];
238 char *p = lj_str_bufint(buf, intV(tv));
239 size_t len = (size_t)(buf+LJ_STR_INTBUF-p);
240 status = status && (fwrite(p, 1, len, fp) == len);
241 } else if (tvisnum(tv)) {
242 status = status && (fprintf(fp, LUA_NUMBER_FMT, numV(tv)) > 0);
243 } else {
244 lj_err_argt(L, (int)(tv - L->base) + 1, LUA_TSTRING); 239 lj_err_argt(L, (int)(tv - L->base) + 1, LUA_TSTRING);
245 } 240 status = status && (fwrite(p, 1, len, fp) == len);
246 } 241 }
247 if (LJ_52 && status) { 242 if (LJ_52 && status) {
248 L->top = L->base+1; 243 L->top = L->base+1;
diff --git a/src/lib_jit.c b/src/lib_jit.c
index 96525faf..178ef249 100644
--- a/src/lib_jit.c
+++ b/src/lib_jit.c
@@ -10,13 +10,17 @@
10#include "lauxlib.h" 10#include "lauxlib.h"
11#include "lualib.h" 11#include "lualib.h"
12 12
13#include "lj_arch.h"
14#include "lj_obj.h" 13#include "lj_obj.h"
14#include "lj_gc.h"
15#include "lj_err.h" 15#include "lj_err.h"
16#include "lj_debug.h" 16#include "lj_debug.h"
17#include "lj_str.h" 17#include "lj_str.h"
18#include "lj_tab.h" 18#include "lj_tab.h"
19#include "lj_state.h"
19#include "lj_bc.h" 20#include "lj_bc.h"
21#if LJ_HASFFI
22#include "lj_ctype.h"
23#endif
20#if LJ_HASJIT 24#if LJ_HASJIT
21#include "lj_ir.h" 25#include "lj_ir.h"
22#include "lj_jit.h" 26#include "lj_jit.h"
@@ -24,6 +28,7 @@
24#include "lj_iropt.h" 28#include "lj_iropt.h"
25#include "lj_target.h" 29#include "lj_target.h"
26#endif 30#endif
31#include "lj_trace.h"
27#include "lj_dispatch.h" 32#include "lj_dispatch.h"
28#include "lj_vm.h" 33#include "lj_vm.h"
29#include "lj_vmevent.h" 34#include "lj_vmevent.h"
@@ -279,7 +284,7 @@ static GCtrace *jit_checktrace(lua_State *L)
279/* Names of link types. ORDER LJ_TRLINK */ 284/* Names of link types. ORDER LJ_TRLINK */
280static const char *const jit_trlinkname[] = { 285static const char *const jit_trlinkname[] = {
281 "none", "root", "loop", "tail-recursion", "up-recursion", "down-recursion", 286 "none", "root", "loop", "tail-recursion", "up-recursion", "down-recursion",
282 "interpreter", "return" 287 "interpreter", "return", "stitch"
283}; 288};
284 289
285/* local info = jit.util.traceinfo(tr) */ 290/* local info = jit.util.traceinfo(tr) */
@@ -332,6 +337,13 @@ LJLIB_CF(jit_util_tracek)
332 slot = ir->op2; 337 slot = ir->op2;
333 ir = &T->ir[ir->op1]; 338 ir = &T->ir[ir->op1];
334 } 339 }
340#if LJ_HASFFI
341 if (ir->o == IR_KINT64 && !ctype_ctsG(G(L))) {
342 ptrdiff_t oldtop = savestack(L, L->top);
343 luaopen_ffi(L); /* Load FFI library on-demand. */
344 L->top = restorestack(L, oldtop);
345 }
346#endif
335 lj_ir_kvalue(L, L->top-2, ir); 347 lj_ir_kvalue(L, L->top-2, ir);
336 setintV(L->top-1, (int32_t)irt_type(ir->t)); 348 setintV(L->top-1, (int32_t)irt_type(ir->t));
337 if (slot == -1) 349 if (slot == -1)
@@ -416,6 +428,12 @@ LJLIB_CF(jit_util_ircalladdr)
416 428
417#include "lj_libdef.h" 429#include "lj_libdef.h"
418 430
431static int luaopen_jit_util(lua_State *L)
432{
433 LJ_LIB_REG(L, NULL, jit_util);
434 return 1;
435}
436
419/* -- jit.opt module ------------------------------------------------------ */ 437/* -- jit.opt module ------------------------------------------------------ */
420 438
421#if LJ_HASJIT 439#if LJ_HASJIT
@@ -513,6 +531,104 @@ LJLIB_CF(jit_opt_start)
513 531
514#endif 532#endif
515 533
534/* -- jit.profile module -------------------------------------------------- */
535
536#if LJ_HASPROFILE
537
538#define LJLIB_MODULE_jit_profile
539
540/* Not loaded by default, use: local profile = require("jit.profile") */
541
542static const char KEY_PROFILE_THREAD = 't';
543static const char KEY_PROFILE_FUNC = 'f';
544
545static void jit_profile_callback(lua_State *L2, lua_State *L, int samples,
546 int vmstate)
547{
548 TValue key;
549 cTValue *tv;
550 setlightudV(&key, (void *)&KEY_PROFILE_FUNC);
551 tv = lj_tab_get(L, tabV(registry(L)), &key);
552 if (tvisfunc(tv)) {
553 char vmst = (char)vmstate;
554 int status;
555 setfuncV(L2, L2->top++, funcV(tv));
556 setthreadV(L2, L2->top++, L);
557 setintV(L2->top++, samples);
558 setstrV(L2, L2->top++, lj_str_new(L2, &vmst, 1));
559 status = lua_pcall(L2, 3, 0, 0); /* callback(thread, samples, vmstate) */
560 if (status) {
561 if (G(L2)->panic) G(L2)->panic(L2);
562 exit(EXIT_FAILURE);
563 }
564 lj_trace_abort(G(L2));
565 }
566}
567
568/* profile.start(mode, cb) */
569LJLIB_CF(jit_profile_start)
570{
571 GCtab *registry = tabV(registry(L));
572 GCstr *mode = lj_lib_optstr(L, 1);
573 GCfunc *func = lj_lib_checkfunc(L, 2);
574 lua_State *L2 = lua_newthread(L); /* Thread that runs profiler callback. */
575 TValue key;
576 /* Anchor thread and function in registry. */
577 setlightudV(&key, (void *)&KEY_PROFILE_THREAD);
578 setthreadV(L, lj_tab_set(L, registry, &key), L2);
579 setlightudV(&key, (void *)&KEY_PROFILE_FUNC);
580 setfuncV(L, lj_tab_set(L, registry, &key), func);
581 lj_gc_anybarriert(L, registry);
582 luaJIT_profile_start(L, mode ? strdata(mode) : "",
583 (luaJIT_profile_callback)jit_profile_callback, L2);
584 return 0;
585}
586
587/* profile.stop() */
588LJLIB_CF(jit_profile_stop)
589{
590 GCtab *registry;
591 TValue key;
592 luaJIT_profile_stop(L);
593 registry = tabV(registry(L));
594 setlightudV(&key, (void *)&KEY_PROFILE_THREAD);
595 setnilV(lj_tab_set(L, registry, &key));
596 setlightudV(&key, (void *)&KEY_PROFILE_FUNC);
597 setnilV(lj_tab_set(L, registry, &key));
598 lj_gc_anybarriert(L, registry);
599 return 0;
600}
601
602/* dump = profile.dumpstack([thread,] fmt, depth) */
603LJLIB_CF(jit_profile_dumpstack)
604{
605 lua_State *L2 = L;
606 int arg = 0;
607 size_t len;
608 int depth;
609 GCstr *fmt;
610 const char *p;
611 if (L->top > L->base && tvisthread(L->base)) {
612 L2 = threadV(L->base);
613 arg = 1;
614 }
615 fmt = lj_lib_checkstr(L, arg+1);
616 depth = lj_lib_checkint(L, arg+2);
617 p = luaJIT_profile_dumpstack(L2, strdata(fmt), depth, &len);
618 lua_pushlstring(L, p, len);
619 return 1;
620}
621
622#include "lj_libdef.h"
623
624static int luaopen_jit_profile(lua_State *L)
625{
626 LJ_LIB_REG(L, NULL, jit_profile);
627 return 1;
628}
629
630#endif
631
516/* -- JIT compiler initialization ----------------------------------------- */ 632/* -- JIT compiler initialization ----------------------------------------- */
517 633
518#if LJ_HASJIT 634#if LJ_HASJIT
@@ -538,23 +654,17 @@ static uint32_t jit_cpudetect(lua_State *L)
538 uint32_t features[4]; 654 uint32_t features[4];
539 if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { 655 if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
540#if !LJ_HASJIT 656#if !LJ_HASJIT
541#define JIT_F_CMOV 1
542#define JIT_F_SSE2 2 657#define JIT_F_SSE2 2
543#endif 658#endif
544 flags |= ((features[3] >> 15)&1) * JIT_F_CMOV;
545 flags |= ((features[3] >> 26)&1) * JIT_F_SSE2; 659 flags |= ((features[3] >> 26)&1) * JIT_F_SSE2;
546#if LJ_HASJIT 660#if LJ_HASJIT
547 flags |= ((features[2] >> 0)&1) * JIT_F_SSE3; 661 flags |= ((features[2] >> 0)&1) * JIT_F_SSE3;
548 flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; 662 flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
549 if (vendor[2] == 0x6c65746e) { /* Intel. */ 663 if (vendor[2] == 0x6c65746e) { /* Intel. */
550 if ((features[0] & 0x0ff00f00) == 0x00000f00) /* P4. */ 664 if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */
551 flags |= JIT_F_P4; /* Currently unused. */
552 else if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */
553 flags |= JIT_F_LEA_AGU; 665 flags |= JIT_F_LEA_AGU;
554 } else if (vendor[2] == 0x444d4163) { /* AMD. */ 666 } else if (vendor[2] == 0x444d4163) { /* AMD. */
555 uint32_t fam = (features[0] & 0x0ff00f00); 667 uint32_t fam = (features[0] & 0x0ff00f00);
556 if (fam == 0x00000f00) /* K8. */
557 flags |= JIT_F_SPLIT_XMM;
558 if (fam >= 0x00000f00) /* K8, K10. */ 668 if (fam >= 0x00000f00) /* K8, K10. */
559 flags |= JIT_F_PREFER_IMUL; 669 flags |= JIT_F_PREFER_IMUL;
560 } 670 }
@@ -562,14 +672,8 @@ static uint32_t jit_cpudetect(lua_State *L)
562 } 672 }
563 /* Check for required instruction set support on x86 (unnecessary on x64). */ 673 /* Check for required instruction set support on x86 (unnecessary on x64). */
564#if LJ_TARGET_X86 674#if LJ_TARGET_X86
565#if !defined(LUAJIT_CPU_NOCMOV)
566 if (!(flags & JIT_F_CMOV))
567 luaL_error(L, "CPU not supported");
568#endif
569#if defined(LUAJIT_CPU_SSE2)
570 if (!(flags & JIT_F_SSE2)) 675 if (!(flags & JIT_F_SSE2))
571 luaL_error(L, "CPU does not support SSE2 (recompile without -DLUAJIT_CPU_SSE2)"); 676 luaL_error(L, "CPU with SSE2 required");
572#endif
573#endif 677#endif
574#elif LJ_TARGET_ARM 678#elif LJ_TARGET_ARM
575#if LJ_HASJIT 679#if LJ_HASJIT
@@ -591,6 +695,8 @@ static uint32_t jit_cpudetect(lua_State *L)
591 ver >= 60 ? JIT_F_ARMV6_ : 0; 695 ver >= 60 ? JIT_F_ARMV6_ : 0;
592 flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2; 696 flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2;
593#endif 697#endif
698#elif LJ_TARGET_ARM64
699 /* No optional CPU features to detect (for now). */
594#elif LJ_TARGET_PPC 700#elif LJ_TARGET_PPC
595#if LJ_HASJIT 701#if LJ_HASJIT
596#if LJ_ARCH_SQRT 702#if LJ_ARCH_SQRT
@@ -600,8 +706,6 @@ static uint32_t jit_cpudetect(lua_State *L)
600 flags |= JIT_F_ROUND; 706 flags |= JIT_F_ROUND;
601#endif 707#endif
602#endif 708#endif
603#elif LJ_TARGET_PPCSPE
604 /* Nothing to do. */
605#elif LJ_TARGET_MIPS 709#elif LJ_TARGET_MIPS
606#if LJ_HASJIT 710#if LJ_HASJIT
607 /* Compile-time MIPS CPU detection. */ 711 /* Compile-time MIPS CPU detection. */
@@ -631,11 +735,7 @@ static void jit_init(lua_State *L)
631 uint32_t flags = jit_cpudetect(L); 735 uint32_t flags = jit_cpudetect(L);
632#if LJ_HASJIT 736#if LJ_HASJIT
633 jit_State *J = L2J(L); 737 jit_State *J = L2J(L);
634#if LJ_TARGET_X86 738 J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
635 /* Silently turn off the JIT compiler on CPUs without SSE2. */
636 if ((flags & JIT_F_SSE2))
637#endif
638 J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
639 memcpy(J->param, jit_param_default, sizeof(J->param)); 739 memcpy(J->param, jit_param_default, sizeof(J->param));
640 lj_dispatch_update(G(L)); 740 lj_dispatch_update(G(L));
641#else 741#else
@@ -645,19 +745,23 @@ static void jit_init(lua_State *L)
645 745
646LUALIB_API int luaopen_jit(lua_State *L) 746LUALIB_API int luaopen_jit(lua_State *L)
647{ 747{
748 jit_init(L);
648 lua_pushliteral(L, LJ_OS_NAME); 749 lua_pushliteral(L, LJ_OS_NAME);
649 lua_pushliteral(L, LJ_ARCH_NAME); 750 lua_pushliteral(L, LJ_ARCH_NAME);
650 lua_pushinteger(L, LUAJIT_VERSION_NUM); 751 lua_pushinteger(L, LUAJIT_VERSION_NUM);
651 lua_pushliteral(L, LUAJIT_VERSION); 752 lua_pushliteral(L, LUAJIT_VERSION);
652 LJ_LIB_REG(L, LUA_JITLIBNAME, jit); 753 LJ_LIB_REG(L, LUA_JITLIBNAME, jit);
754#if LJ_HASPROFILE
755 lj_lib_prereg(L, LUA_JITLIBNAME ".profile", luaopen_jit_profile,
756 tabref(L->env));
757#endif
653#ifndef LUAJIT_DISABLE_JITUTIL 758#ifndef LUAJIT_DISABLE_JITUTIL
654 LJ_LIB_REG(L, "jit.util", jit_util); 759 lj_lib_prereg(L, LUA_JITLIBNAME ".util", luaopen_jit_util, tabref(L->env));
655#endif 760#endif
656#if LJ_HASJIT 761#if LJ_HASJIT
657 LJ_LIB_REG(L, "jit.opt", jit_opt); 762 LJ_LIB_REG(L, "jit.opt", jit_opt);
658#endif 763#endif
659 L->top -= 2; 764 L->top -= 2;
660 jit_init(L);
661 return 1; 765 return 1;
662} 766}
663 767
diff --git a/src/lib_math.c b/src/lib_math.c
index 40f29142..78838fcd 100644
--- a/src/lib_math.c
+++ b/src/lib_math.c
@@ -47,12 +47,6 @@ LJLIB_ASM_(math_tanh) LJLIB_REC(math_htrig IRCALL_tanh)
47LJLIB_ASM_(math_frexp) 47LJLIB_ASM_(math_frexp)
48LJLIB_ASM_(math_modf) LJLIB_REC(.) 48LJLIB_ASM_(math_modf) LJLIB_REC(.)
49 49
50LJLIB_PUSH(57.29577951308232)
51LJLIB_ASM_(math_deg) LJLIB_REC(math_degrad)
52
53LJLIB_PUSH(0.017453292519943295)
54LJLIB_ASM_(math_rad) LJLIB_REC(math_degrad)
55
56LJLIB_ASM(math_log) LJLIB_REC(math_log) 50LJLIB_ASM(math_log) LJLIB_REC(math_log)
57{ 51{
58 double x = lj_lib_checknum(L, 1); 52 double x = lj_lib_checknum(L, 1);
@@ -63,12 +57,15 @@ LJLIB_ASM(math_log) LJLIB_REC(math_log)
63#else 57#else
64 x = lj_vm_log2(x); y = 1.0 / lj_vm_log2(y); 58 x = lj_vm_log2(x); y = 1.0 / lj_vm_log2(y);
65#endif 59#endif
66 setnumV(L->base-1, x*y); /* Do NOT join the expression to x / y. */ 60 setnumV(L->base-1-LJ_FR2, x*y); /* Do NOT join the expression to x / y. */
67 return FFH_RES(1); 61 return FFH_RES(1);
68 } 62 }
69 return FFH_RETRY; 63 return FFH_RETRY;
70} 64}
71 65
66LJLIB_LUA(math_deg) /* function(x) return x * 57.29577951308232 end */
67LJLIB_LUA(math_rad) /* function(x) return x * 0.017453292519943295 end */
68
72LJLIB_ASM(math_atan2) LJLIB_REC(.) 69LJLIB_ASM(math_atan2) LJLIB_REC(.)
73{ 70{
74 lj_lib_checknum(L, 1); 71 lj_lib_checknum(L, 1);
diff --git a/src/lib_os.c b/src/lib_os.c
index bb5a141e..7b5873a5 100644
--- a/src/lib_os.c
+++ b/src/lib_os.c
@@ -17,7 +17,10 @@
17#include "lualib.h" 17#include "lualib.h"
18 18
19#include "lj_obj.h" 19#include "lj_obj.h"
20#include "lj_gc.h"
20#include "lj_err.h" 21#include "lj_err.h"
22#include "lj_buf.h"
23#include "lj_str.h"
21#include "lj_lib.h" 24#include "lj_lib.h"
22 25
23#if LJ_TARGET_POSIX 26#if LJ_TARGET_POSIX
@@ -188,7 +191,7 @@ LJLIB_CF(os_date)
188#endif 191#endif
189 } 192 }
190 if (stm == NULL) { /* Invalid date? */ 193 if (stm == NULL) { /* Invalid date? */
191 setnilV(L->top-1); 194 setnilV(L->top++);
192 } else if (strcmp(s, "*t") == 0) { 195 } else if (strcmp(s, "*t") == 0) {
193 lua_createtable(L, 0, 9); /* 9 = number of fields */ 196 lua_createtable(L, 0, 9); /* 9 = number of fields */
194 setfield(L, "sec", stm->tm_sec); 197 setfield(L, "sec", stm->tm_sec);
@@ -200,23 +203,25 @@ LJLIB_CF(os_date)
200 setfield(L, "wday", stm->tm_wday+1); 203 setfield(L, "wday", stm->tm_wday+1);
201 setfield(L, "yday", stm->tm_yday+1); 204 setfield(L, "yday", stm->tm_yday+1);
202 setboolfield(L, "isdst", stm->tm_isdst); 205 setboolfield(L, "isdst", stm->tm_isdst);
203 } else { 206 } else if (*s) {
204 char cc[3]; 207 SBuf *sb = &G(L)->tmpbuf;
205 luaL_Buffer b; 208 MSize sz = 0;
206 cc[0] = '%'; cc[2] = '\0'; 209 const char *q;
207 luaL_buffinit(L, &b); 210 for (q = s; *q; q++)
208 for (; *s; s++) { 211 sz += (*q == '%') ? 30 : 1; /* Overflow doesn't matter. */
209 if (*s != '%' || *(s + 1) == '\0') { /* No conversion specifier? */ 212 setsbufL(sb, L);
210 luaL_addchar(&b, *s); 213 for (;;) {
211 } else { 214 char *buf = lj_buf_need(sb, sz);
212 size_t reslen; 215 size_t len = strftime(buf, sbufsz(sb), s, stm);
213 char buff[200]; /* Should be big enough for any conversion result. */ 216 if (len) {
214 cc[1] = *(++s); 217 setstrV(L, L->top++, lj_str_new(L, buf, len));
215 reslen = strftime(buff, sizeof(buff), cc, stm); 218 lj_gc_check(L);
216 luaL_addlstring(&b, buff, reslen); 219 break;
217 } 220 }
221 sz += (sz|1);
218 } 222 }
219 luaL_pushresult(&b); 223 } else {
224 setstrV(L, L->top++, &G(L)->strempty);
220 } 225 }
221 return 1; 226 return 1;
222} 227}
diff --git a/src/lib_package.c b/src/lib_package.c
index ac38c815..6b6eb8f6 100644
--- a/src/lib_package.c
+++ b/src/lib_package.c
@@ -226,7 +226,7 @@ static int ll_loadfunc(lua_State *L, const char *path, const char *name, int r)
226 const char *bcdata = ll_bcsym(*reg, mksymname(L, name, SYMPREFIX_BC)); 226 const char *bcdata = ll_bcsym(*reg, mksymname(L, name, SYMPREFIX_BC));
227 lua_pop(L, 1); 227 lua_pop(L, 1);
228 if (bcdata) { 228 if (bcdata) {
229 if (luaL_loadbuffer(L, bcdata, ~(size_t)0, name) != 0) 229 if (luaL_loadbuffer(L, bcdata, LJ_MAX_BUF, name) != 0)
230 return PACKAGE_ERR_LOAD; 230 return PACKAGE_ERR_LOAD;
231 return 0; 231 return 0;
232 } 232 }
@@ -383,7 +383,7 @@ static int lj_cf_package_loader_preload(lua_State *L)
383 if (lua_isnil(L, -1)) { /* Not found? */ 383 if (lua_isnil(L, -1)) { /* Not found? */
384 const char *bcname = mksymname(L, name, SYMPREFIX_BC); 384 const char *bcname = mksymname(L, name, SYMPREFIX_BC);
385 const char *bcdata = ll_bcsym(NULL, bcname); 385 const char *bcdata = ll_bcsym(NULL, bcname);
386 if (bcdata == NULL || luaL_loadbuffer(L, bcdata, ~(size_t)0, name) != 0) 386 if (bcdata == NULL || luaL_loadbuffer(L, bcdata, LJ_MAX_BUF, name) != 0)
387 lua_pushfstring(L, "\n\tno field package.preload['%s']", name); 387 lua_pushfstring(L, "\n\tno field package.preload['%s']", name);
388 } 388 }
389 return 1; 389 return 1;
diff --git a/src/lib_string.c b/src/lib_string.c
index c6168edb..a6d9986a 100644
--- a/src/lib_string.c
+++ b/src/lib_string.c
@@ -6,8 +6,6 @@
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h 6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/ 7*/
8 8
9#include <stdio.h>
10
11#define lib_string_c 9#define lib_string_c
12#define LUA_LIB 10#define LUA_LIB
13 11
@@ -18,6 +16,7 @@
18#include "lj_obj.h" 16#include "lj_obj.h"
19#include "lj_gc.h" 17#include "lj_gc.h"
20#include "lj_err.h" 18#include "lj_err.h"
19#include "lj_buf.h"
21#include "lj_str.h" 20#include "lj_str.h"
22#include "lj_tab.h" 21#include "lj_tab.h"
23#include "lj_meta.h" 22#include "lj_meta.h"
@@ -25,17 +24,19 @@
25#include "lj_ff.h" 24#include "lj_ff.h"
26#include "lj_bcdump.h" 25#include "lj_bcdump.h"
27#include "lj_char.h" 26#include "lj_char.h"
27#include "lj_strfmt.h"
28#include "lj_lib.h" 28#include "lj_lib.h"
29 29
30/* ------------------------------------------------------------------------ */ 30/* ------------------------------------------------------------------------ */
31 31
32#define LJLIB_MODULE_string 32#define LJLIB_MODULE_string
33 33
34LJLIB_ASM(string_len) LJLIB_REC(.) 34LJLIB_LUA(string_len) /*
35{ 35 function(s)
36 lj_lib_checkstr(L, 1); 36 CHECK_str(s)
37 return FFH_RETRY; 37 return #s
38} 38 end
39*/
39 40
40LJLIB_ASM(string_byte) LJLIB_REC(string_range 0) 41LJLIB_ASM(string_byte) LJLIB_REC(string_range 0)
41{ 42{
@@ -57,21 +58,21 @@ LJLIB_ASM(string_byte) LJLIB_REC(string_range 0)
57 lj_state_checkstack(L, (MSize)n); 58 lj_state_checkstack(L, (MSize)n);
58 p = (const unsigned char *)strdata(s) + start; 59 p = (const unsigned char *)strdata(s) + start;
59 for (i = 0; i < n; i++) 60 for (i = 0; i < n; i++)
60 setintV(L->base + i-1, p[i]); 61 setintV(L->base + i-1-LJ_FR2, p[i]);
61 return FFH_RES(n); 62 return FFH_RES(n);
62} 63}
63 64
64LJLIB_ASM(string_char) 65LJLIB_ASM(string_char) LJLIB_REC(.)
65{ 66{
66 int i, nargs = (int)(L->top - L->base); 67 int i, nargs = (int)(L->top - L->base);
67 char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, (MSize)nargs); 68 char *buf = lj_buf_tmp(L, (MSize)nargs);
68 for (i = 1; i <= nargs; i++) { 69 for (i = 1; i <= nargs; i++) {
69 int32_t k = lj_lib_checkint(L, i); 70 int32_t k = lj_lib_checkint(L, i);
70 if (!checku8(k)) 71 if (!checku8(k))
71 lj_err_arg(L, i, LJ_ERR_BADVAL); 72 lj_err_arg(L, i, LJ_ERR_BADVAL);
72 buf[i-1] = (char)k; 73 buf[i-1] = (char)k;
73 } 74 }
74 setstrV(L, L->base-1, lj_str_new(L, buf, (size_t)nargs)); 75 setstrV(L, L->base-1-LJ_FR2, lj_str_new(L, buf, (size_t)nargs));
75 return FFH_RES(1); 76 return FFH_RES(1);
76} 77}
77 78
@@ -83,68 +84,38 @@ LJLIB_ASM(string_sub) LJLIB_REC(string_range 1)
83 return FFH_RETRY; 84 return FFH_RETRY;
84} 85}
85 86
86LJLIB_ASM(string_rep) 87LJLIB_CF(string_rep) LJLIB_REC(.)
87{ 88{
88 GCstr *s = lj_lib_checkstr(L, 1); 89 GCstr *s = lj_lib_checkstr(L, 1);
89 int32_t k = lj_lib_checkint(L, 2); 90 int32_t rep = lj_lib_checkint(L, 2);
90 GCstr *sep = lj_lib_optstr(L, 3); 91 GCstr *sep = lj_lib_optstr(L, 3);
91 int32_t len = (int32_t)s->len; 92 SBuf *sb = lj_buf_tmp_(L);
92 global_State *g = G(L); 93 if (sep && rep > 1) {
93 int64_t tlen; 94 GCstr *s2 = lj_buf_cat2str(L, sep, s);
94 const char *src; 95 lj_buf_reset(sb);
95 char *buf; 96 lj_buf_putstr(sb, s);
96 if (k <= 0) { 97 s = s2;
97 empty: 98 rep--;
98 setstrV(L, L->base-1, &g->strempty);
99 return FFH_RES(1);
100 } 99 }
101 if (sep) { 100 sb = lj_buf_putstr_rep(sb, s, rep);
102 tlen = (int64_t)len + sep->len; 101 setstrV(L, L->top-1, lj_buf_str(L, sb));
103 if (tlen > LJ_MAX_STR) 102 lj_gc_check(L);
104 lj_err_caller(L, LJ_ERR_STROV); 103 return 1;
105 tlen *= k;
106 if (tlen > LJ_MAX_STR)
107 lj_err_caller(L, LJ_ERR_STROV);
108 } else {
109 tlen = (int64_t)k * len;
110 if (tlen > LJ_MAX_STR)
111 lj_err_caller(L, LJ_ERR_STROV);
112 }
113 if (tlen == 0) goto empty;
114 buf = lj_str_needbuf(L, &g->tmpbuf, (MSize)tlen);
115 src = strdata(s);
116 if (sep) {
117 tlen -= sep->len; /* Ignore trailing separator. */
118 if (k > 1) { /* Paste one string and one separator. */
119 int32_t i;
120 i = 0; while (i < len) *buf++ = src[i++];
121 src = strdata(sep); len = sep->len;
122 i = 0; while (i < len) *buf++ = src[i++];
123 src = g->tmpbuf.buf; len += s->len; k--; /* Now copy that k-1 times. */
124 }
125 }
126 do {
127 int32_t i = 0;
128 do { *buf++ = src[i++]; } while (i < len);
129 } while (--k > 0);
130 setstrV(L, L->base-1, lj_str_new(L, g->tmpbuf.buf, (size_t)tlen));
131 return FFH_RES(1);
132} 104}
133 105
134LJLIB_ASM(string_reverse) 106LJLIB_ASM(string_reverse) LJLIB_REC(string_op IRCALL_lj_buf_putstr_reverse)
135{ 107{
136 GCstr *s = lj_lib_checkstr(L, 1); 108 lj_lib_checkstr(L, 1);
137 lj_str_needbuf(L, &G(L)->tmpbuf, s->len);
138 return FFH_RETRY; 109 return FFH_RETRY;
139} 110}
140LJLIB_ASM_(string_lower) 111LJLIB_ASM_(string_lower) LJLIB_REC(string_op IRCALL_lj_buf_putstr_lower)
141LJLIB_ASM_(string_upper) 112LJLIB_ASM_(string_upper) LJLIB_REC(string_op IRCALL_lj_buf_putstr_upper)
142 113
143/* ------------------------------------------------------------------------ */ 114/* ------------------------------------------------------------------------ */
144 115
145static int writer_buf(lua_State *L, const void *p, size_t size, void *b) 116static int writer_buf(lua_State *L, const void *p, size_t size, void *sb)
146{ 117{
147 luaL_addlstring((luaL_Buffer *)b, (const char *)p, size); 118 lj_buf_putmem((SBuf *)sb, p, (MSize)size);
148 UNUSED(L); 119 UNUSED(L);
149 return 0; 120 return 0;
150} 121}
@@ -153,12 +124,12 @@ LJLIB_CF(string_dump)
153{ 124{
154 GCfunc *fn = lj_lib_checkfunc(L, 1); 125 GCfunc *fn = lj_lib_checkfunc(L, 1);
155 int strip = L->base+1 < L->top && tvistruecond(L->base+1); 126 int strip = L->base+1 < L->top && tvistruecond(L->base+1);
156 luaL_Buffer b; 127 SBuf *sb = lj_buf_tmp_(L); /* Assumes lj_bcwrite() doesn't use tmpbuf. */
157 L->top = L->base+1; 128 L->top = L->base+1;
158 luaL_buffinit(L, &b); 129 if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, sb, strip))
159 if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, &b, strip))
160 lj_err_caller(L, LJ_ERR_STRDUMP); 130 lj_err_caller(L, LJ_ERR_STRDUMP);
161 luaL_pushresult(&b); 131 setstrV(L, L->top-1, lj_buf_str(L, sb));
132 lj_gc_check(L);
162 return 1; 133 return 1;
163} 134}
164 135
@@ -183,7 +154,6 @@ typedef struct MatchState {
183} MatchState; 154} MatchState;
184 155
185#define L_ESC '%' 156#define L_ESC '%'
186#define SPECIALS "^$*+?.([%-"
187 157
188static int check_capture(MatchState *ms, int l) 158static int check_capture(MatchState *ms, int l)
189{ 159{
@@ -450,30 +420,6 @@ static const char *match(MatchState *ms, const char *s, const char *p)
450 return s; 420 return s;
451} 421}
452 422
453static const char *lmemfind(const char *s1, size_t l1,
454 const char *s2, size_t l2)
455{
456 if (l2 == 0) {
457 return s1; /* empty strings are everywhere */
458 } else if (l2 > l1) {
459 return NULL; /* avoids a negative `l1' */
460 } else {
461 const char *init; /* to search for a `*s2' inside `s1' */
462 l2--; /* 1st char will be checked by `memchr' */
463 l1 = l1-l2; /* `s2' cannot be found after that */
464 while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) {
465 init++; /* 1st char is already checked */
466 if (memcmp(init, s2+1, l2) == 0) {
467 return init-1;
468 } else { /* correct `l1' and `s1' to try again */
469 l1 -= (size_t)(init-s1);
470 s1 = init;
471 }
472 }
473 return NULL; /* not found */
474 }
475}
476
477static void push_onecapture(MatchState *ms, int i, const char *s, const char *e) 423static void push_onecapture(MatchState *ms, int i, const char *s, const char *e)
478{ 424{
479 if (i >= ms->level) { 425 if (i >= ms->level) {
@@ -501,64 +447,60 @@ static int push_captures(MatchState *ms, const char *s, const char *e)
501 return nlevels; /* number of strings pushed */ 447 return nlevels; /* number of strings pushed */
502} 448}
503 449
504static ptrdiff_t posrelat(ptrdiff_t pos, size_t len)
505{
506 /* relative string position: negative means back from end */
507 if (pos < 0) pos += (ptrdiff_t)len + 1;
508 return (pos >= 0) ? pos : 0;
509}
510
511static int str_find_aux(lua_State *L, int find) 450static int str_find_aux(lua_State *L, int find)
512{ 451{
513 size_t l1, l2; 452 GCstr *s = lj_lib_checkstr(L, 1);
514 const char *s = luaL_checklstring(L, 1, &l1); 453 GCstr *p = lj_lib_checkstr(L, 2);
515 const char *p = luaL_checklstring(L, 2, &l2); 454 int32_t start = lj_lib_optint(L, 3, 1);
516 ptrdiff_t init = posrelat(luaL_optinteger(L, 3, 1), l1) - 1; 455 MSize st;
517 if (init < 0) { 456 if (start < 0) start += (int32_t)s->len; else start--;
518 init = 0; 457 if (start < 0) start = 0;
519 } else if ((size_t)(init) > l1) { 458 st = (MSize)start;
459 if (st > s->len) {
520#if LJ_52 460#if LJ_52
521 setnilV(L->top-1); 461 setnilV(L->top-1);
522 return 1; 462 return 1;
523#else 463#else
524 init = (ptrdiff_t)l1; 464 st = s->len;
525#endif 465#endif
526 } 466 }
527 if (find && (lua_toboolean(L, 4) || /* explicit request? */ 467 if (find && ((L->base+3 < L->top && tvistruecond(L->base+3)) ||
528 strpbrk(p, SPECIALS) == NULL)) { /* or no special characters? */ 468 !lj_str_haspattern(p))) { /* Search for fixed string. */
529 /* do a plain search */ 469 const char *q = lj_str_find(strdata(s)+st, strdata(p), s->len-st, p->len);
530 const char *s2 = lmemfind(s+init, l1-(size_t)init, p, l2); 470 if (q) {
531 if (s2) { 471 setintV(L->top-2, (int32_t)(q-strdata(s)) + 1);
532 lua_pushinteger(L, s2-s+1); 472 setintV(L->top-1, (int32_t)(q-strdata(s)) + (int32_t)p->len);
533 lua_pushinteger(L, s2-s+(ptrdiff_t)l2);
534 return 2; 473 return 2;
535 } 474 }
536 } else { 475 } else { /* Search for pattern. */
537 MatchState ms; 476 MatchState ms;
538 int anchor = (*p == '^') ? (p++, 1) : 0; 477 const char *pstr = strdata(p);
539 const char *s1=s+init; 478 const char *sstr = strdata(s) + st;
479 int anchor = 0;
480 if (*pstr == '^') { pstr++; anchor = 1; }
540 ms.L = L; 481 ms.L = L;
541 ms.src_init = s; 482 ms.src_init = strdata(s);
542 ms.src_end = s+l1; 483 ms.src_end = strdata(s) + s->len;
543 do { 484 do { /* Loop through string and try to match the pattern. */
544 const char *res; 485 const char *q;
545 ms.level = ms.depth = 0; 486 ms.level = ms.depth = 0;
546 if ((res=match(&ms, s1, p)) != NULL) { 487 q = match(&ms, sstr, pstr);
488 if (q) {
547 if (find) { 489 if (find) {
548 lua_pushinteger(L, s1-s+1); /* start */ 490 setintV(L->top++, (int32_t)(sstr-(strdata(s)-1)));
549 lua_pushinteger(L, res-s); /* end */ 491 setintV(L->top++, (int32_t)(q-strdata(s)));
550 return push_captures(&ms, NULL, 0) + 2; 492 return push_captures(&ms, NULL, NULL) + 2;
551 } else { 493 } else {
552 return push_captures(&ms, s1, res); 494 return push_captures(&ms, sstr, q);
553 } 495 }
554 } 496 }
555 } while (s1++ < ms.src_end && !anchor); 497 } while (sstr++ < ms.src_end && !anchor);
556 } 498 }
557 lua_pushnil(L); /* not found */ 499 setnilV(L->top-1); /* Not found. */
558 return 1; 500 return 1;
559} 501}
560 502
561LJLIB_CF(string_find) 503LJLIB_CF(string_find) LJLIB_REC(.)
562{ 504{
563 return str_find_aux(L, 1); 505 return str_find_aux(L, 1);
564} 506}
@@ -698,221 +640,91 @@ LJLIB_CF(string_gsub)
698 640
699/* ------------------------------------------------------------------------ */ 641/* ------------------------------------------------------------------------ */
700 642
701/* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */ 643/* Emulate tostring() inline. */
702#define MAX_FMTITEM 512 644static GCstr *string_fmt_tostring(lua_State *L, int arg, int retry)
703/* valid flags in a format specification */
704#define FMT_FLAGS "-+ #0"
705/*
706** maximum size of each format specification (such as '%-099.99d')
707** (+10 accounts for %99.99x plus margin of error)
708*/
709#define MAX_FMTSPEC (sizeof(FMT_FLAGS) + sizeof(LUA_INTFRMLEN) + 10)
710
711static void addquoted(lua_State *L, luaL_Buffer *b, int arg)
712{
713 GCstr *str = lj_lib_checkstr(L, arg);
714 int32_t len = (int32_t)str->len;
715 const char *s = strdata(str);
716 luaL_addchar(b, '"');
717 while (len--) {
718 uint32_t c = uchar(*s);
719 if (c == '"' || c == '\\' || c == '\n') {
720 luaL_addchar(b, '\\');
721 } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */
722 uint32_t d;
723 luaL_addchar(b, '\\');
724 if (c >= 100 || lj_char_isdigit(uchar(s[1]))) {
725 luaL_addchar(b, '0'+(c >= 100)); if (c >= 100) c -= 100;
726 goto tens;
727 } else if (c >= 10) {
728 tens:
729 d = (c * 205) >> 11; c -= d * 10; luaL_addchar(b, '0'+d);
730 }
731 c += '0';
732 }
733 luaL_addchar(b, c);
734 s++;
735 }
736 luaL_addchar(b, '"');
737}
738
739static const char *scanformat(lua_State *L, const char *strfrmt, char *form)
740{
741 const char *p = strfrmt;
742 while (*p != '\0' && strchr(FMT_FLAGS, *p) != NULL) p++; /* skip flags */
743 if ((size_t)(p - strfrmt) >= sizeof(FMT_FLAGS))
744 lj_err_caller(L, LJ_ERR_STRFMTR);
745 if (lj_char_isdigit(uchar(*p))) p++; /* skip width */
746 if (lj_char_isdigit(uchar(*p))) p++; /* (2 digits at most) */
747 if (*p == '.') {
748 p++;
749 if (lj_char_isdigit(uchar(*p))) p++; /* skip precision */
750 if (lj_char_isdigit(uchar(*p))) p++; /* (2 digits at most) */
751 }
752 if (lj_char_isdigit(uchar(*p)))
753 lj_err_caller(L, LJ_ERR_STRFMTW);
754 *(form++) = '%';
755 strncpy(form, strfrmt, (size_t)(p - strfrmt + 1));
756 form += p - strfrmt + 1;
757 *form = '\0';
758 return p;
759}
760
761static void addintlen(char *form)
762{
763 size_t l = strlen(form);
764 char spec = form[l - 1];
765 strcpy(form + l - 1, LUA_INTFRMLEN);
766 form[l + sizeof(LUA_INTFRMLEN) - 2] = spec;
767 form[l + sizeof(LUA_INTFRMLEN) - 1] = '\0';
768}
769
770static unsigned LUA_INTFRM_T num2intfrm(lua_State *L, int arg)
771{
772 if (sizeof(LUA_INTFRM_T) == 4) {
773 return (LUA_INTFRM_T)lj_lib_checkbit(L, arg);
774 } else {
775 cTValue *o;
776 lj_lib_checknumber(L, arg);
777 o = L->base+arg-1;
778 if (tvisint(o))
779 return (LUA_INTFRM_T)intV(o);
780 else
781 return (LUA_INTFRM_T)numV(o);
782 }
783}
784
785static unsigned LUA_INTFRM_T num2uintfrm(lua_State *L, int arg)
786{
787 if (sizeof(LUA_INTFRM_T) == 4) {
788 return (unsigned LUA_INTFRM_T)lj_lib_checkbit(L, arg);
789 } else {
790 cTValue *o;
791 lj_lib_checknumber(L, arg);
792 o = L->base+arg-1;
793 if (tvisint(o))
794 return (unsigned LUA_INTFRM_T)intV(o);
795 else if ((int32_t)o->u32.hi < 0)
796 return (unsigned LUA_INTFRM_T)(LUA_INTFRM_T)numV(o);
797 else
798 return (unsigned LUA_INTFRM_T)numV(o);
799 }
800}
801
802static GCstr *meta_tostring(lua_State *L, int arg)
803{ 645{
804 TValue *o = L->base+arg-1; 646 TValue *o = L->base+arg-1;
805 cTValue *mo; 647 cTValue *mo;
806 lua_assert(o < L->top); /* Caller already checks for existence. */ 648 lua_assert(o < L->top); /* Caller already checks for existence. */
807 if (LJ_LIKELY(tvisstr(o))) 649 if (LJ_LIKELY(tvisstr(o)))
808 return strV(o); 650 return strV(o);
809 if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { 651 if (retry != 2 && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
810 copyTV(L, L->top++, mo); 652 copyTV(L, L->top++, mo);
811 copyTV(L, L->top++, o); 653 copyTV(L, L->top++, o);
812 lua_call(L, 1, 1); 654 lua_call(L, 1, 1);
813 L->top--; 655 copyTV(L, L->base+arg-1, --L->top);
814 if (tvisstr(L->top)) 656 return NULL; /* Buffer may be overwritten, retry. */
815 return strV(L->top);
816 o = L->base+arg-1;
817 copyTV(L, o, L->top);
818 }
819 if (tvisnumber(o)) {
820 return lj_str_fromnumber(L, o);
821 } else if (tvisnil(o)) {
822 return lj_str_newlit(L, "nil");
823 } else if (tvisfalse(o)) {
824 return lj_str_newlit(L, "false");
825 } else if (tvistrue(o)) {
826 return lj_str_newlit(L, "true");
827 } else {
828 if (tvisfunc(o) && isffunc(funcV(o)))
829 lj_str_pushf(L, "function: builtin#%d", funcV(o)->c.ffid);
830 else
831 lj_str_pushf(L, "%s: %p", lj_typename(o), lua_topointer(L, arg));
832 L->top--;
833 return strV(L->top);
834 } 657 }
835} 658 return lj_strfmt_obj(L, o);
836 659}
837LJLIB_CF(string_format) 660
838{ 661LJLIB_CF(string_format) LJLIB_REC(.)
839 int arg = 1, top = (int)(L->top - L->base); 662{
840 GCstr *fmt = lj_lib_checkstr(L, arg); 663 int arg, top = (int)(L->top - L->base);
841 const char *strfrmt = strdata(fmt); 664 GCstr *fmt;
842 const char *strfrmt_end = strfrmt + fmt->len; 665 SBuf *sb;
843 luaL_Buffer b; 666 FormatState fs;
844 luaL_buffinit(L, &b); 667 SFormat sf;
845 while (strfrmt < strfrmt_end) { 668 int retry = 0;
846 if (*strfrmt != L_ESC) { 669again:
847 luaL_addchar(&b, *strfrmt++); 670 arg = 1;
848 } else if (*++strfrmt == L_ESC) { 671 sb = lj_buf_tmp_(L);
849 luaL_addchar(&b, *strfrmt++); /* %% */ 672 fmt = lj_lib_checkstr(L, arg);
850 } else { /* format item */ 673 lj_strfmt_init(&fs, strdata(fmt), fmt->len);
851 char form[MAX_FMTSPEC]; /* to store the format (`%...') */ 674 while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
852 char buff[MAX_FMTITEM]; /* to store the formatted item */ 675 if (sf == STRFMT_LIT) {
676 lj_buf_putmem(sb, fs.str, fs.len);
677 } else if (sf == STRFMT_ERR) {
678 lj_err_callerv(L, LJ_ERR_STRFMT, strdata(lj_str_new(L, fs.str, fs.len)));
679 } else {
853 if (++arg > top) 680 if (++arg > top)
854 luaL_argerror(L, arg, lj_obj_typename[0]); 681 luaL_argerror(L, arg, lj_obj_typename[0]);
855 strfrmt = scanformat(L, strfrmt, form); 682 switch (STRFMT_TYPE(sf)) {
856 switch (*strfrmt++) { 683 case STRFMT_INT:
857 case 'c': 684 if (tvisint(L->base+arg-1)) {
858 sprintf(buff, form, lj_lib_checkint(L, arg)); 685 int32_t k = intV(L->base+arg-1);
686 if (sf == STRFMT_INT)
687 lj_strfmt_putint(sb, k); /* Shortcut for plain %d. */
688 else
689 lj_strfmt_putfxint(sb, sf, k);
690 } else {
691 lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg));
692 }
859 break; 693 break;
860 case 'd': case 'i': 694 case STRFMT_UINT:
861 addintlen(form); 695 if (tvisint(L->base+arg-1))
862 sprintf(buff, form, num2intfrm(L, arg)); 696 lj_strfmt_putfxint(sb, sf, intV(L->base+arg-1));
697 else
698 lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg));
863 break; 699 break;
864 case 'o': case 'u': case 'x': case 'X': 700 case STRFMT_NUM:
865 addintlen(form); 701 lj_strfmt_putfnum(sb, sf, lj_lib_checknum(L, arg));
866 sprintf(buff, form, num2uintfrm(L, arg));
867 break; 702 break;
868 case 'e': case 'E': case 'f': case 'g': case 'G': case 'a': case 'A': { 703 case STRFMT_STR: {
869 TValue tv; 704 GCstr *str = string_fmt_tostring(L, arg, retry);
870 tv.n = lj_lib_checknum(L, arg); 705 if (str == NULL)
871 if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) { 706 retry = 1;
872 /* Canonicalize output of non-finite values. */ 707 else if ((sf & STRFMT_T_QUOTED))
873 char *p, nbuf[LJ_STR_NUMBUF]; 708 lj_strfmt_putquoted(sb, str); /* No formatting. */
874 size_t len = lj_str_bufnum(nbuf, &tv); 709 else
875 if (strfrmt[-1] < 'a') { 710 lj_strfmt_putfstr(sb, sf, str);
876 nbuf[len-3] = nbuf[len-3] - 0x20;
877 nbuf[len-2] = nbuf[len-2] - 0x20;
878 nbuf[len-1] = nbuf[len-1] - 0x20;
879 }
880 nbuf[len] = '\0';
881 for (p = form; *p < 'A' && *p != '.'; p++) ;
882 *p++ = 's'; *p = '\0';
883 sprintf(buff, form, nbuf);
884 break;
885 }
886 sprintf(buff, form, (double)tv.n);
887 break; 711 break;
888 } 712 }
889 case 'q': 713 case STRFMT_CHAR:
890 addquoted(L, &b, arg); 714 lj_strfmt_putfchar(sb, sf, lj_lib_checkint(L, arg));
891 continue; 715 break;
892 case 'p': 716 case STRFMT_PTR: /* No formatting. */
893 lj_str_pushf(L, "%p", lua_topointer(L, arg)); 717 lj_strfmt_putptr(sb, lj_obj_ptr(L->base+arg-1));
894 luaL_addvalue(&b);
895 continue;
896 case 's': {
897 GCstr *str = meta_tostring(L, arg);
898 if (!strchr(form, '.') && str->len >= 100) {
899 /* no precision and string is too long to be formatted;
900 keep original string */
901 setstrV(L, L->top++, str);
902 luaL_addvalue(&b);
903 continue;
904 }
905 sprintf(buff, form, strdata(str));
906 break; 718 break;
907 }
908 default: 719 default:
909 lj_err_callerv(L, LJ_ERR_STRFMTO, *(strfrmt -1)); 720 lua_assert(0);
910 break; 721 break;
911 } 722 }
912 luaL_addlstring(&b, buff, strlen(buff));
913 } 723 }
914 } 724 }
915 luaL_pushresult(&b); 725 if (retry++ == 1) goto again;
726 setstrV(L, L->top-1, lj_buf_str(L, sb));
727 lj_gc_check(L);
916 return 1; 728 return 1;
917} 729}
918 730
diff --git a/src/lib_table.c b/src/lib_table.c
index fbfe8638..56612aba 100644
--- a/src/lib_table.c
+++ b/src/lib_table.c
@@ -16,57 +16,43 @@
16#include "lj_obj.h" 16#include "lj_obj.h"
17#include "lj_gc.h" 17#include "lj_gc.h"
18#include "lj_err.h" 18#include "lj_err.h"
19#include "lj_buf.h"
19#include "lj_tab.h" 20#include "lj_tab.h"
21#include "lj_ff.h"
20#include "lj_lib.h" 22#include "lj_lib.h"
21 23
22/* ------------------------------------------------------------------------ */ 24/* ------------------------------------------------------------------------ */
23 25
24#define LJLIB_MODULE_table 26#define LJLIB_MODULE_table
25 27
26LJLIB_CF(table_foreachi) 28LJLIB_LUA(table_foreachi) /*
27{ 29 function(t, f)
28 GCtab *t = lj_lib_checktab(L, 1); 30 CHECK_tab(t)
29 GCfunc *func = lj_lib_checkfunc(L, 2); 31 CHECK_func(f)
30 MSize i, n = lj_tab_len(t); 32 for i=1,#t do
31 for (i = 1; i <= n; i++) { 33 local r = f(i, t[i])
32 cTValue *val; 34 if r ~= nil then return r end
33 setfuncV(L, L->top, func); 35 end
34 setintV(L->top+1, i); 36 end
35 val = lj_tab_getint(t, (int32_t)i); 37*/
36 if (val) { copyTV(L, L->top+2, val); } else { setnilV(L->top+2); }
37 L->top += 3;
38 lua_call(L, 2, 1);
39 if (!tvisnil(L->top-1))
40 return 1;
41 L->top--;
42 }
43 return 0;
44}
45 38
46LJLIB_CF(table_foreach) 39LJLIB_LUA(table_foreach) /*
47{ 40 function(t, f)
48 GCtab *t = lj_lib_checktab(L, 1); 41 CHECK_tab(t)
49 GCfunc *func = lj_lib_checkfunc(L, 2); 42 CHECK_func(f)
50 L->top = L->base+3; 43 for k, v in PAIRS(t) do
51 setnilV(L->top-1); 44 local r = f(k, v)
52 while (lj_tab_next(L, t, L->top-1)) { 45 if r ~= nil then return r end
53 copyTV(L, L->top+2, L->top); 46 end
54 copyTV(L, L->top+1, L->top-1); 47 end
55 setfuncV(L, L->top, func); 48*/
56 L->top += 3;
57 lua_call(L, 2, 1);
58 if (!tvisnil(L->top-1))
59 return 1;
60 L->top--;
61 }
62 return 0;
63}
64 49
65LJLIB_ASM(table_getn) LJLIB_REC(.) 50LJLIB_LUA(table_getn) /*
66{ 51 function(t)
67 lj_lib_checktab(L, 1); 52 CHECK_tab(t)
68 return FFH_UNREACHABLE; 53 return #t
69} 54 end
55*/
70 56
71LJLIB_CF(table_maxn) 57LJLIB_CF(table_maxn)
72{ 58{
@@ -119,52 +105,47 @@ LJLIB_CF(table_insert) LJLIB_REC(.)
119 return 0; 105 return 0;
120} 106}
121 107
122LJLIB_CF(table_remove) LJLIB_REC(.) 108LJLIB_LUA(table_remove) /*
123{ 109 function(t, pos)
124 GCtab *t = lj_lib_checktab(L, 1); 110 CHECK_tab(t)
125 int32_t e = (int32_t)lj_tab_len(t); 111 local len = #t
126 int32_t pos = lj_lib_optint(L, 2, e); 112 if pos == nil then
127 if (!(1 <= pos && pos <= e)) /* Nothing to remove? */ 113 if len ~= 0 then
128 return 0; 114 local old = t[len]
129 lua_rawgeti(L, 1, pos); /* Get previous value. */ 115 t[len] = nil
130 /* NOBARRIER: This just moves existing elements around. */ 116 return old
131 for (; pos < e; pos++) { 117 end
132 cTValue *src = lj_tab_getint(t, pos+1); 118 else
133 TValue *dst = lj_tab_setint(L, t, pos); 119 CHECK_int(pos)
134 if (src) { 120 if pos >= 1 and pos <= len then
135 copyTV(L, dst, src); 121 local old = t[pos]
136 } else { 122 for i=pos+1,len do
137 setnilV(dst); 123 t[i-1] = t[i]
138 } 124 end
139 } 125 t[len] = nil
140 setnilV(lj_tab_setint(L, t, e)); /* Remove (last) value. */ 126 return old
141 return 1; /* Return previous value. */ 127 end
142} 128 end
129 end
130*/
143 131
144LJLIB_CF(table_concat) 132LJLIB_CF(table_concat) LJLIB_REC(.)
145{ 133{
146 luaL_Buffer b;
147 GCtab *t = lj_lib_checktab(L, 1); 134 GCtab *t = lj_lib_checktab(L, 1);
148 GCstr *sep = lj_lib_optstr(L, 2); 135 GCstr *sep = lj_lib_optstr(L, 2);
149 MSize seplen = sep ? sep->len : 0;
150 int32_t i = lj_lib_optint(L, 3, 1); 136 int32_t i = lj_lib_optint(L, 3, 1);
151 int32_t e = (L->base+3 < L->top && !tvisnil(L->base+3)) ? 137 int32_t e = (L->base+3 < L->top && !tvisnil(L->base+3)) ?
152 lj_lib_checkint(L, 4) : (int32_t)lj_tab_len(t); 138 lj_lib_checkint(L, 4) : (int32_t)lj_tab_len(t);
153 luaL_buffinit(L, &b); 139 SBuf *sb = lj_buf_tmp_(L);
154 if (i <= e) { 140 SBuf *sbx = lj_buf_puttab(sb, t, sep, i, e);
155 for (;;) { 141 if (LJ_UNLIKELY(!sbx)) { /* Error: bad element type. */
156 cTValue *o; 142 int32_t idx = (int32_t)(intptr_t)sbufP(sb);
157 lua_rawgeti(L, 1, i); 143 cTValue *o = lj_tab_getint(t, idx);
158 o = L->top-1; 144 lj_err_callerv(L, LJ_ERR_TABCAT,
159 if (!(tvisstr(o) || tvisnumber(o))) 145 lj_obj_itypename[o ? itypemap(o) : ~LJ_TNIL], idx);
160 lj_err_callerv(L, LJ_ERR_TABCAT, lj_typename(o), i);
161 luaL_addvalue(&b);
162 if (i++ == e) break;
163 if (seplen)
164 luaL_addlstring(&b, strdata(sep), seplen);
165 }
166 } 146 }
167 luaL_pushresult(&b); 147 setstrV(L, L->top-1, lj_buf_str(L, sbx));
148 lj_gc_check(L);
168 return 1; 149 return 1;
169} 150}
170 151
@@ -284,6 +265,30 @@ LJLIB_CF(table_pack)
284} 265}
285#endif 266#endif
286 267
268LJLIB_NOREG LJLIB_CF(table_new) LJLIB_REC(.)
269{
270 int32_t a = lj_lib_checkint(L, 1);
271 int32_t h = lj_lib_checkint(L, 2);
272 lua_createtable(L, a, h);
273 return 1;
274}
275
276LJLIB_NOREG LJLIB_CF(table_clear) LJLIB_REC(.)
277{
278 lj_tab_clear(lj_lib_checktab(L, 1));
279 return 0;
280}
281
282static int luaopen_table_new(lua_State *L)
283{
284 return lj_lib_postreg(L, lj_cf_table_new, FF_table_new, "new");
285}
286
287static int luaopen_table_clear(lua_State *L)
288{
289 return lj_lib_postreg(L, lj_cf_table_clear, FF_table_clear, "clear");
290}
291
287/* ------------------------------------------------------------------------ */ 292/* ------------------------------------------------------------------------ */
288 293
289#include "lj_libdef.h" 294#include "lj_libdef.h"
@@ -295,6 +300,8 @@ LUALIB_API int luaopen_table(lua_State *L)
295 lua_getglobal(L, "unpack"); 300 lua_getglobal(L, "unpack");
296 lua_setfield(L, -2, "unpack"); 301 lua_setfield(L, -2, "unpack");
297#endif 302#endif
303 lj_lib_prereg(L, LUA_TABLIBNAME ".new", luaopen_table_new, tabV(L->top-1));
304 lj_lib_prereg(L, LUA_TABLIBNAME ".clear", luaopen_table_clear, tabV(L->top-1));
298 return 1; 305 return 1;
299} 306}
300 307
diff --git a/src/lj_alloc.c b/src/lj_alloc.c
index 7c7ec678..0aad826d 100644
--- a/src/lj_alloc.c
+++ b/src/lj_alloc.c
@@ -77,7 +77,7 @@
77#define WIN32_LEAN_AND_MEAN 77#define WIN32_LEAN_AND_MEAN
78#include <windows.h> 78#include <windows.h>
79 79
80#if LJ_64 80#if LJ_64 && !LJ_GC64
81 81
82/* Undocumented, but hey, that's what we all love so much about Windows. */ 82/* Undocumented, but hey, that's what we all love so much about Windows. */
83typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits, 83typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits,
@@ -174,8 +174,10 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
174#endif 174#endif
175#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) 175#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS)
176 176
177#if LJ_64 177#if LJ_64 && !LJ_GC64
178/* 64 bit mode needs special support for allocating memory in the lower 2GB. */ 178/* 64 bit mode with 32 bit pointers needs special support for allocating
179** memory in the lower 2GB.
180*/
179 181
180#if defined(MAP_32BIT) 182#if defined(MAP_32BIT)
181 183
@@ -258,7 +260,7 @@ static LJ_AINLINE void *CALL_MMAP(size_t size)
258 260
259#else 261#else
260 262
261/* 32 bit mode is easy. */ 263/* 32 bit mode and GC64 mode is easy. */
262static LJ_AINLINE void *CALL_MMAP(size_t size) 264static LJ_AINLINE void *CALL_MMAP(size_t size)
263{ 265{
264 int olderr = errno; 266 int olderr = errno;
@@ -294,7 +296,7 @@ static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz,
294#define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv)) 296#define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv))
295#define CALL_MREMAP_NOMOVE 0 297#define CALL_MREMAP_NOMOVE 0
296#define CALL_MREMAP_MAYMOVE 1 298#define CALL_MREMAP_MAYMOVE 1
297#if LJ_64 299#if LJ_64 && !LJ_GC64
298#define CALL_MREMAP_MV CALL_MREMAP_NOMOVE 300#define CALL_MREMAP_MV CALL_MREMAP_NOMOVE
299#else 301#else
300#define CALL_MREMAP_MV CALL_MREMAP_MAYMOVE 302#define CALL_MREMAP_MV CALL_MREMAP_MAYMOVE
diff --git a/src/lj_api.c b/src/lj_api.c
index 3bedb39f..1f09284f 100644
--- a/src/lj_api.c
+++ b/src/lj_api.c
@@ -24,6 +24,7 @@
24#include "lj_trace.h" 24#include "lj_trace.h"
25#include "lj_vm.h" 25#include "lj_vm.h"
26#include "lj_strscan.h" 26#include "lj_strscan.h"
27#include "lj_strfmt.h"
27 28
28/* -- Common helper functions --------------------------------------------- */ 29/* -- Common helper functions --------------------------------------------- */
29 30
@@ -188,7 +189,7 @@ LUA_API int lua_type(lua_State *L, int idx)
188 cTValue *o = index2adr(L, idx); 189 cTValue *o = index2adr(L, idx);
189 if (tvisnumber(o)) { 190 if (tvisnumber(o)) {
190 return LUA_TNUMBER; 191 return LUA_TNUMBER;
191#if LJ_64 192#if LJ_64 && !LJ_GC64
192 } else if (tvislightud(o)) { 193 } else if (tvislightud(o)) {
193 return LUA_TLIGHTUSERDATA; 194 return LUA_TLIGHTUSERDATA;
194#endif 195#endif
@@ -268,7 +269,7 @@ LUA_API int lua_equal(lua_State *L, int idx1, int idx2)
268 return 0; 269 return 0;
269 } else if (tvispri(o1)) { 270 } else if (tvispri(o1)) {
270 return o1 != niltv(L) && o2 != niltv(L); 271 return o1 != niltv(L) && o2 != niltv(L);
271#if LJ_64 272#if LJ_64 && !LJ_GC64
272 } else if (tvislightud(o1)) { 273 } else if (tvislightud(o1)) {
273 return o1->u64 == o2->u64; 274 return o1->u64 == o2->u64;
274#endif 275#endif
@@ -283,8 +284,8 @@ LUA_API int lua_equal(lua_State *L, int idx1, int idx2)
283 } else { 284 } else {
284 L->top = base+2; 285 L->top = base+2;
285 lj_vm_call(L, base, 1+1); 286 lj_vm_call(L, base, 1+1);
286 L->top -= 2; 287 L->top -= 2+LJ_FR2;
287 return tvistruecond(L->top+1); 288 return tvistruecond(L->top+1+LJ_FR2);
288 } 289 }
289 } 290 }
290} 291}
@@ -306,8 +307,8 @@ LUA_API int lua_lessthan(lua_State *L, int idx1, int idx2)
306 } else { 307 } else {
307 L->top = base+2; 308 L->top = base+2;
308 lj_vm_call(L, base, 1+1); 309 lj_vm_call(L, base, 1+1);
309 L->top -= 2; 310 L->top -= 2+LJ_FR2;
310 return tvistruecond(L->top+1); 311 return tvistruecond(L->top+1+LJ_FR2);
311 } 312 }
312 } 313 }
313} 314}
@@ -434,7 +435,7 @@ LUA_API const char *lua_tolstring(lua_State *L, int idx, size_t *len)
434 } else if (tvisnumber(o)) { 435 } else if (tvisnumber(o)) {
435 lj_gc_check(L); 436 lj_gc_check(L);
436 o = index2adr(L, idx); /* GC may move the stack. */ 437 o = index2adr(L, idx); /* GC may move the stack. */
437 s = lj_str_fromnumber(L, o); 438 s = lj_strfmt_number(L, o);
438 setstrV(L, o, s); 439 setstrV(L, o, s);
439 } else { 440 } else {
440 if (len != NULL) *len = 0; 441 if (len != NULL) *len = 0;
@@ -453,7 +454,7 @@ LUALIB_API const char *luaL_checklstring(lua_State *L, int idx, size_t *len)
453 } else if (tvisnumber(o)) { 454 } else if (tvisnumber(o)) {
454 lj_gc_check(L); 455 lj_gc_check(L);
455 o = index2adr(L, idx); /* GC may move the stack. */ 456 o = index2adr(L, idx); /* GC may move the stack. */
456 s = lj_str_fromnumber(L, o); 457 s = lj_strfmt_number(L, o);
457 setstrV(L, o, s); 458 setstrV(L, o, s);
458 } else { 459 } else {
459 lj_err_argt(L, idx, LUA_TSTRING); 460 lj_err_argt(L, idx, LUA_TSTRING);
@@ -475,7 +476,7 @@ LUALIB_API const char *luaL_optlstring(lua_State *L, int idx,
475 } else if (tvisnumber(o)) { 476 } else if (tvisnumber(o)) {
476 lj_gc_check(L); 477 lj_gc_check(L);
477 o = index2adr(L, idx); /* GC may move the stack. */ 478 o = index2adr(L, idx); /* GC may move the stack. */
478 s = lj_str_fromnumber(L, o); 479 s = lj_strfmt_number(L, o);
479 setstrV(L, o, s); 480 setstrV(L, o, s);
480 } else { 481 } else {
481 lj_err_argt(L, idx, LUA_TSTRING); 482 lj_err_argt(L, idx, LUA_TSTRING);
@@ -507,7 +508,7 @@ LUA_API size_t lua_objlen(lua_State *L, int idx)
507 } else if (tvisudata(o)) { 508 } else if (tvisudata(o)) {
508 return udataV(o)->len; 509 return udataV(o)->len;
509 } else if (tvisnumber(o)) { 510 } else if (tvisnumber(o)) {
510 GCstr *s = lj_str_fromnumber(L, o); 511 GCstr *s = lj_strfmt_number(L, o);
511 setstrV(L, o, s); 512 setstrV(L, o, s);
512 return s->len; 513 return s->len;
513 } else { 514 } else {
@@ -545,17 +546,7 @@ LUA_API lua_State *lua_tothread(lua_State *L, int idx)
545 546
546LUA_API const void *lua_topointer(lua_State *L, int idx) 547LUA_API const void *lua_topointer(lua_State *L, int idx)
547{ 548{
548 cTValue *o = index2adr(L, idx); 549 return lj_obj_ptr(index2adr(L, idx));
549 if (tvisudata(o))
550 return uddata(udataV(o));
551 else if (tvislightud(o))
552 return lightudV(o);
553 else if (tviscdata(o))
554 return cdataptr(cdataV(o));
555 else if (tvisgcv(o))
556 return gcV(o);
557 else
558 return NULL;
559} 550}
560 551
561/* -- Stack setters (object creation) ------------------------------------- */ 552/* -- Stack setters (object creation) ------------------------------------- */
@@ -606,7 +597,7 @@ LUA_API const char *lua_pushvfstring(lua_State *L, const char *fmt,
606 va_list argp) 597 va_list argp)
607{ 598{
608 lj_gc_check(L); 599 lj_gc_check(L);
609 return lj_str_pushvf(L, fmt, argp); 600 return lj_strfmt_pushvf(L, fmt, argp);
610} 601}
611 602
612LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...) 603LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...)
@@ -615,7 +606,7 @@ LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...)
615 va_list argp; 606 va_list argp;
616 lj_gc_check(L); 607 lj_gc_check(L);
617 va_start(argp, fmt); 608 va_start(argp, fmt);
618 ret = lj_str_pushvf(L, fmt, argp); 609 ret = lj_strfmt_pushvf(L, fmt, argp);
619 va_end(argp); 610 va_end(argp);
620 return ret; 611 return ret;
621} 612}
@@ -649,10 +640,8 @@ LUA_API void lua_pushlightuserdata(lua_State *L, void *p)
649 640
650LUA_API void lua_createtable(lua_State *L, int narray, int nrec) 641LUA_API void lua_createtable(lua_State *L, int narray, int nrec)
651{ 642{
652 GCtab *t;
653 lj_gc_check(L); 643 lj_gc_check(L);
654 t = lj_tab_new(L, (uint32_t)(narray > 0 ? narray+1 : 0), hsize2hbits(nrec)); 644 settabV(L, L->top, lj_tab_new_ah(L, narray, nrec));
655 settabV(L, L->top, t);
656 incr_top(L); 645 incr_top(L);
657} 646}
658 647
@@ -715,8 +704,8 @@ LUA_API void lua_concat(lua_State *L, int n)
715 n -= (int)(L->top - top); 704 n -= (int)(L->top - top);
716 L->top = top+2; 705 L->top = top+2;
717 lj_vm_call(L, top, 1+1); 706 lj_vm_call(L, top, 1+1);
718 L->top--; 707 L->top -= 1+LJ_FR2;
719 copyTV(L, L->top-1, L->top); 708 copyTV(L, L->top-1, L->top+LJ_FR2);
720 } while (--n > 0); 709 } while (--n > 0);
721 } else if (n == 0) { /* Push empty string. */ 710 } else if (n == 0) { /* Push empty string. */
722 setstrV(L, L->top, &G(L)->strempty); 711 setstrV(L, L->top, &G(L)->strempty);
@@ -735,8 +724,8 @@ LUA_API void lua_gettable(lua_State *L, int idx)
735 if (v == NULL) { 724 if (v == NULL) {
736 L->top += 2; 725 L->top += 2;
737 lj_vm_call(L, L->top-2, 1+1); 726 lj_vm_call(L, L->top-2, 1+1);
738 L->top -= 2; 727 L->top -= 2+LJ_FR2;
739 v = L->top+1; 728 v = L->top+1+LJ_FR2;
740 } 729 }
741 copyTV(L, L->top-1, v); 730 copyTV(L, L->top-1, v);
742} 731}
@@ -751,8 +740,8 @@ LUA_API void lua_getfield(lua_State *L, int idx, const char *k)
751 if (v == NULL) { 740 if (v == NULL) {
752 L->top += 2; 741 L->top += 2;
753 lj_vm_call(L, L->top-2, 1+1); 742 lj_vm_call(L, L->top-2, 1+1);
754 L->top -= 2; 743 L->top -= 2+LJ_FR2;
755 v = L->top+1; 744 v = L->top+1+LJ_FR2;
756 } 745 }
757 copyTV(L, L->top, v); 746 copyTV(L, L->top, v);
758 incr_top(L); 747 incr_top(L);
@@ -893,13 +882,14 @@ LUA_API void lua_settable(lua_State *L, int idx)
893 o = lj_meta_tset(L, t, L->top-2); 882 o = lj_meta_tset(L, t, L->top-2);
894 if (o) { 883 if (o) {
895 /* NOBARRIER: lj_meta_tset ensures the table is not black. */ 884 /* NOBARRIER: lj_meta_tset ensures the table is not black. */
896 copyTV(L, o, L->top-1);
897 L->top -= 2; 885 L->top -= 2;
886 copyTV(L, o, L->top+1);
898 } else { 887 } else {
899 L->top += 3; 888 TValue *base = L->top;
900 copyTV(L, L->top-1, L->top-6); 889 copyTV(L, base+2, base-3-2*LJ_FR2);
901 lj_vm_call(L, L->top-3, 0+1); 890 L->top = base+3;
902 L->top -= 3; 891 lj_vm_call(L, base, 0+1);
892 L->top -= 3+LJ_FR2;
903 } 893 }
904} 894}
905 895
@@ -913,14 +903,14 @@ LUA_API void lua_setfield(lua_State *L, int idx, const char *k)
913 setstrV(L, &key, lj_str_newz(L, k)); 903 setstrV(L, &key, lj_str_newz(L, k));
914 o = lj_meta_tset(L, t, &key); 904 o = lj_meta_tset(L, t, &key);
915 if (o) { 905 if (o) {
916 L->top--;
917 /* NOBARRIER: lj_meta_tset ensures the table is not black. */ 906 /* NOBARRIER: lj_meta_tset ensures the table is not black. */
918 copyTV(L, o, L->top); 907 copyTV(L, o, --L->top);
919 } else { 908 } else {
920 L->top += 3; 909 TValue *base = L->top;
921 copyTV(L, L->top-1, L->top-6); 910 copyTV(L, base+2, base-3-2*LJ_FR2);
922 lj_vm_call(L, L->top-3, 0+1); 911 L->top = base+3;
923 L->top -= 2; 912 lj_vm_call(L, base, 0+1);
913 L->top -= 2+LJ_FR2;
924 } 914 }
925} 915}
926 916
@@ -1027,11 +1017,24 @@ LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n)
1027 1017
1028/* -- Calls --------------------------------------------------------------- */ 1018/* -- Calls --------------------------------------------------------------- */
1029 1019
1020#if LJ_FR2
1021static TValue *api_call_base(lua_State *L, int nargs)
1022{
1023 TValue *o = L->top, *base = o - nargs;
1024 L->top = o+1;
1025 for (; o > base; o--) copyTV(L, o, o-1);
1026 setnilV(o);
1027 return o+1;
1028}
1029#else
1030#define api_call_base(L, nargs) (L->top - (nargs))
1031#endif
1032
1030LUA_API void lua_call(lua_State *L, int nargs, int nresults) 1033LUA_API void lua_call(lua_State *L, int nargs, int nresults)
1031{ 1034{
1032 api_check(L, L->status == 0 || L->status == LUA_ERRERR); 1035 api_check(L, L->status == 0 || L->status == LUA_ERRERR);
1033 api_checknelems(L, nargs+1); 1036 api_checknelems(L, nargs+1);
1034 lj_vm_call(L, L->top - nargs, nresults+1); 1037 lj_vm_call(L, api_call_base(L, nargs), nresults+1);
1035} 1038}
1036 1039
1037LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc) 1040LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
@@ -1049,7 +1052,7 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
1049 api_checkvalidindex(L, o); 1052 api_checkvalidindex(L, o);
1050 ef = savestack(L, o); 1053 ef = savestack(L, o);
1051 } 1054 }
1052 status = lj_vm_pcall(L, L->top - nargs, nresults+1, ef); 1055 status = lj_vm_pcall(L, api_call_base(L, nargs), nresults+1, ef);
1053 if (status) hook_restore(g, oldh); 1056 if (status) hook_restore(g, oldh);
1054 return status; 1057 return status;
1055} 1058}
@@ -1057,12 +1060,14 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
1057static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud) 1060static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud)
1058{ 1061{
1059 GCfunc *fn = lj_func_newC(L, 0, getcurrenv(L)); 1062 GCfunc *fn = lj_func_newC(L, 0, getcurrenv(L));
1063 TValue *top = L->top;
1060 fn->c.f = func; 1064 fn->c.f = func;
1061 setfuncV(L, L->top, fn); 1065 setfuncV(L, top++, fn);
1062 setlightudV(L->top+1, checklightudptr(L, ud)); 1066 if (LJ_FR2) setnilV(top++);
1067 setlightudV(top++, checklightudptr(L, ud));
1063 cframe_nres(L->cframe) = 1+0; /* Zero results. */ 1068 cframe_nres(L->cframe) = 1+0; /* Zero results. */
1064 L->top += 2; 1069 L->top = top;
1065 return L->top-1; /* Now call the newly allocated C function. */ 1070 return top-1; /* Now call the newly allocated C function. */
1066} 1071}
1067 1072
1068LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud) 1073LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
@@ -1079,10 +1084,11 @@ LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
1079LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field) 1084LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field)
1080{ 1085{
1081 if (luaL_getmetafield(L, idx, field)) { 1086 if (luaL_getmetafield(L, idx, field)) {
1082 TValue *base = L->top--; 1087 TValue *top = L->top--;
1083 copyTV(L, base, index2adr(L, idx)); 1088 if (LJ_FR2) setnilV(top++);
1084 L->top = base+1; 1089 copyTV(L, top++, index2adr(L, idx));
1085 lj_vm_call(L, base, 1+1); 1090 L->top = top;
1091 lj_vm_call(L, top-1, 1+1);
1086 return 1; 1092 return 1;
1087 } 1093 }
1088 return 0; 1094 return 0;
@@ -1109,12 +1115,14 @@ LUA_API int lua_yield(lua_State *L, int nresults)
1109 } else { /* Yield from hook: add a pseudo-frame. */ 1115 } else { /* Yield from hook: add a pseudo-frame. */
1110 TValue *top = L->top; 1116 TValue *top = L->top;
1111 hook_leave(g); 1117 hook_leave(g);
1112 top->u64 = cframe_multres(cf); 1118 (top++)->u64 = cframe_multres(cf);
1113 setcont(top+1, lj_cont_hook); 1119 setcont(top, lj_cont_hook);
1114 setframe_pc(top+1, cframe_pc(cf)-1); 1120 if (LJ_FR2) top++;
1115 setframe_gc(top+2, obj2gco(L)); 1121 setframe_pc(top, cframe_pc(cf)-1);
1116 setframe_ftsz(top+2, (int)((char *)(top+3)-(char *)L->base)+FRAME_CONT); 1122 if (LJ_FR2) top++;
1117 L->top = L->base = top+3; 1123 setframe_gc(top, obj2gco(L), LJ_TTHREAD);
1124 setframe_ftsz(top, ((char *)(top+1)-(char *)L->base)+FRAME_CONT);
1125 L->top = L->base = top+1;
1118#if LJ_TARGET_X64 1126#if LJ_TARGET_X64
1119 lj_err_throw(L, LUA_YIELD); 1127 lj_err_throw(L, LUA_YIELD);
1120#else 1128#else
@@ -1131,7 +1139,9 @@ LUA_API int lua_yield(lua_State *L, int nresults)
1131LUA_API int lua_resume(lua_State *L, int nargs) 1139LUA_API int lua_resume(lua_State *L, int nargs)
1132{ 1140{
1133 if (L->cframe == NULL && L->status <= LUA_YIELD) 1141 if (L->cframe == NULL && L->status <= LUA_YIELD)
1134 return lj_vm_resume(L, L->top - nargs, 0, 0); 1142 return lj_vm_resume(L,
1143 L->status == 0 ? api_call_base(L, nargs) : L->top - nargs,
1144 0, 0);
1135 L->top = L->base; 1145 L->top = L->base;
1136 setstrV(L, L->top, lj_err_str(L, LJ_ERR_COSUSP)); 1146 setstrV(L, L->top, lj_err_str(L, LJ_ERR_COSUSP));
1137 incr_top(L); 1147 incr_top(L);
@@ -1161,7 +1171,7 @@ LUA_API int lua_gc(lua_State *L, int what, int data)
1161 res = (int)(g->gc.total & 0x3ff); 1171 res = (int)(g->gc.total & 0x3ff);
1162 break; 1172 break;
1163 case LUA_GCSTEP: { 1173 case LUA_GCSTEP: {
1164 MSize a = (MSize)data << 10; 1174 GCSize a = (GCSize)data << 10;
1165 g->gc.threshold = (a <= g->gc.total) ? (g->gc.total - a) : 0; 1175 g->gc.threshold = (a <= g->gc.total) ? (g->gc.total - a) : 0;
1166 while (g->gc.total >= g->gc.threshold) 1176 while (g->gc.total >= g->gc.threshold)
1167 if (lj_gc_step(L) > 0) { 1177 if (lj_gc_step(L) > 0) {
diff --git a/src/lj_arch.h b/src/lj_arch.h
index d3a9d57d..61c7e19f 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -19,10 +19,10 @@
19#define LUAJIT_ARCH_x64 2 19#define LUAJIT_ARCH_x64 2
20#define LUAJIT_ARCH_ARM 3 20#define LUAJIT_ARCH_ARM 3
21#define LUAJIT_ARCH_arm 3 21#define LUAJIT_ARCH_arm 3
22#define LUAJIT_ARCH_PPC 4 22#define LUAJIT_ARCH_ARM64 4
23#define LUAJIT_ARCH_ppc 4 23#define LUAJIT_ARCH_arm64 4
24#define LUAJIT_ARCH_PPCSPE 5 24#define LUAJIT_ARCH_PPC 5
25#define LUAJIT_ARCH_ppcspe 5 25#define LUAJIT_ARCH_ppc 5
26#define LUAJIT_ARCH_MIPS 6 26#define LUAJIT_ARCH_MIPS 6
27#define LUAJIT_ARCH_mips 6 27#define LUAJIT_ARCH_mips 6
28 28
@@ -43,12 +43,10 @@
43#define LUAJIT_TARGET LUAJIT_ARCH_X64 43#define LUAJIT_TARGET LUAJIT_ARCH_X64
44#elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM) 44#elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM)
45#define LUAJIT_TARGET LUAJIT_ARCH_ARM 45#define LUAJIT_TARGET LUAJIT_ARCH_ARM
46#elif defined(__aarch64__)
47#define LUAJIT_TARGET LUAJIT_ARCH_ARM64
46#elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC) 48#elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
47#ifdef __NO_FPRS__
48#define LUAJIT_TARGET LUAJIT_ARCH_PPCSPE
49#else
50#define LUAJIT_TARGET LUAJIT_ARCH_PPC 49#define LUAJIT_TARGET LUAJIT_ARCH_PPC
51#endif
52#elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS) 50#elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS)
53#define LUAJIT_TARGET LUAJIT_ARCH_MIPS 51#define LUAJIT_TARGET LUAJIT_ARCH_MIPS
54#else 52#else
@@ -96,7 +94,7 @@
96#define LJ_TARGET_WINDOWS (LUAJIT_OS == LUAJIT_OS_WINDOWS) 94#define LJ_TARGET_WINDOWS (LUAJIT_OS == LUAJIT_OS_WINDOWS)
97#define LJ_TARGET_LINUX (LUAJIT_OS == LUAJIT_OS_LINUX) 95#define LJ_TARGET_LINUX (LUAJIT_OS == LUAJIT_OS_LINUX)
98#define LJ_TARGET_OSX (LUAJIT_OS == LUAJIT_OS_OSX) 96#define LJ_TARGET_OSX (LUAJIT_OS == LUAJIT_OS_OSX)
99#define LJ_TARGET_IOS (LJ_TARGET_OSX && LUAJIT_TARGET == LUAJIT_ARCH_ARM) 97#define LJ_TARGET_IOS (LJ_TARGET_OSX && (LUAJIT_TARGET == LUAJIT_ARCH_ARM || LUAJIT_TARGET == LUAJIT_ARCH_ARM64))
100#define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS) 98#define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS)
101#define LJ_TARGET_DLOPEN LJ_TARGET_POSIX 99#define LJ_TARGET_DLOPEN LJ_TARGET_POSIX
102 100
@@ -193,15 +191,43 @@
193#define LJ_ARCH_VERSION 50 191#define LJ_ARCH_VERSION 50
194#endif 192#endif
195 193
194#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64
195
196#define LJ_ARCH_NAME "arm64"
197#define LJ_ARCH_BITS 64
198#define LJ_ARCH_ENDIAN LUAJIT_LE
199#define LJ_TARGET_ARM64 1
200#define LJ_TARGET_EHRETREG 0
201#define LJ_TARGET_JUMPRANGE 27 /* +-2^27 = +-128MB */
202#define LJ_TARGET_MASKSHIFT 1
203#define LJ_TARGET_MASKROT 1
204#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
205#define LJ_TARGET_GC64 1
206#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
207#define LJ_ARCH_NOJIT 1 /* NYI */
208
209#define LJ_ARCH_VERSION 80
210
196#elif LUAJIT_TARGET == LUAJIT_ARCH_PPC 211#elif LUAJIT_TARGET == LUAJIT_ARCH_PPC
197 212
198#define LJ_ARCH_NAME "ppc" 213#if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
214#define LJ_ARCH_ENDIAN LUAJIT_LE
215#else
216#define LJ_ARCH_ENDIAN LUAJIT_BE
217#endif
218
199#if _LP64 219#if _LP64
200#define LJ_ARCH_BITS 64 220#define LJ_ARCH_BITS 64
221#if LJ_ARCH_ENDIAN == LUAJIT_LE
222#define LJ_ARCH_NAME "ppc64le"
223#else
224#define LJ_ARCH_NAME "ppc64"
225#endif
201#else 226#else
202#define LJ_ARCH_BITS 32 227#define LJ_ARCH_BITS 32
228#define LJ_ARCH_NAME "ppc"
203#endif 229#endif
204#define LJ_ARCH_ENDIAN LUAJIT_BE 230
205#define LJ_TARGET_PPC 1 231#define LJ_TARGET_PPC 1
206#define LJ_TARGET_EHRETREG 3 232#define LJ_TARGET_EHRETREG 3
207#define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */ 233#define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */
@@ -210,6 +236,15 @@
210#define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */ 236#define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */
211#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE 237#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE
212 238
239#if LJ_TARGET_CONSOLE
240#define LJ_ARCH_PPC32ON64 1
241#define LJ_ARCH_NOFFI 1
242#elif LJ_ARCH_BITS == 64
243#define LJ_ARCH_PPC64 1
244#define LJ_TARGET_GC64 1
245#define LJ_ARCH_NOJIT 1 /* NYI */
246#endif
247
213#if _ARCH_PWR7 248#if _ARCH_PWR7
214#define LJ_ARCH_VERSION 70 249#define LJ_ARCH_VERSION 70
215#elif _ARCH_PWR6 250#elif _ARCH_PWR6
@@ -223,10 +258,6 @@
223#else 258#else
224#define LJ_ARCH_VERSION 0 259#define LJ_ARCH_VERSION 0
225#endif 260#endif
226#if __PPC64__ || __powerpc64__ || LJ_TARGET_CONSOLE
227#define LJ_ARCH_PPC64 1
228#define LJ_ARCH_NOFFI 1
229#endif
230#if _ARCH_PPCSQ 261#if _ARCH_PPCSQ
231#define LJ_ARCH_SQRT 1 262#define LJ_ARCH_SQRT 1
232#endif 263#endif
@@ -240,25 +271,6 @@
240#define LJ_ARCH_XENON 1 271#define LJ_ARCH_XENON 1
241#endif 272#endif
242 273
243#elif LUAJIT_TARGET == LUAJIT_ARCH_PPCSPE
244
245#define LJ_ARCH_NAME "ppcspe"
246#define LJ_ARCH_BITS 32
247#define LJ_ARCH_ENDIAN LUAJIT_BE
248#ifndef LJ_ABI_SOFTFP
249#define LJ_ABI_SOFTFP 1
250#endif
251#define LJ_ABI_EABI 1
252#define LJ_TARGET_PPCSPE 1
253#define LJ_TARGET_EHRETREG 3
254#define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */
255#define LJ_TARGET_MASKSHIFT 0
256#define LJ_TARGET_MASKROT 1
257#define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */
258#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE
259#define LJ_ARCH_NOFFI 1 /* NYI: comparisons, calls. */
260#define LJ_ARCH_NOJIT 1
261
262#elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS 274#elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS
263 275
264#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) 276#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL)
@@ -305,6 +317,16 @@
305#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2) 317#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
306#error "Need at least GCC 4.2 or newer" 318#error "Need at least GCC 4.2 or newer"
307#endif 319#endif
320#elif LJ_TARGET_ARM64
321#if __clang__
322#if (__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5)
323#error "Need at least Clang 3.5 or newer"
324#endif
325#else
326#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 8)
327#error "Need at least GCC 4.8 or newer"
328#endif
329#endif
308#elif !LJ_TARGET_PS3 330#elif !LJ_TARGET_PS3
309#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 3) 331#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 3)
310#error "Need at least GCC 4.3 or newer" 332#error "Need at least GCC 4.3 or newer"
@@ -328,15 +350,25 @@
328#if !(__ARM_EABI__ || LJ_TARGET_IOS) 350#if !(__ARM_EABI__ || LJ_TARGET_IOS)
329#error "Only ARM EABI or iOS 3.0+ ABI is supported" 351#error "Only ARM EABI or iOS 3.0+ ABI is supported"
330#endif 352#endif
331#elif LJ_TARGET_PPC || LJ_TARGET_PPCSPE 353#elif LJ_TARGET_ARM64
354#if defined(__AARCH64EB__)
355#error "No support for big-endian ARM64"
356#endif
357#if defined(_ILP32)
358#error "No support for ILP32 model on ARM64"
359#endif
360#elif LJ_TARGET_PPC
332#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE) 361#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
333#error "No support for PowerPC CPUs without double-precision FPU" 362#error "No support for PowerPC CPUs without double-precision FPU"
334#endif 363#endif
335#if defined(_LITTLE_ENDIAN) 364#if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE
336#error "No support for little-endian PowerPC" 365#error "No support for little-endian PPC32"
337#endif 366#endif
338#if defined(_LP64) 367#if LJ_ARCH_PPC64
339#error "No support for PowerPC 64 bit mode" 368#error "No support for PowerPC 64 bit mode (yet)"
369#endif
370#ifdef __NO_FPRS__
371#error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
340#endif 372#endif
341#elif LJ_TARGET_MIPS 373#elif LJ_TARGET_MIPS
342#if defined(__mips_soft_float) 374#if defined(__mips_soft_float)
@@ -369,8 +401,22 @@
369#endif 401#endif
370#endif 402#endif
371 403
404/* 64 bit GC references. */
405#if LJ_TARGET_GC64
406#define LJ_GC64 1
407#else
408#define LJ_GC64 0
409#endif
410
411/* 2-slot frame info. */
412#if LJ_GC64
413#define LJ_FR2 1
414#else
415#define LJ_FR2 0
416#endif
417
372/* Disable or enable the JIT compiler. */ 418/* Disable or enable the JIT compiler. */
373#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) 419#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) || LJ_FR2 || LJ_GC64
374#define LJ_HASJIT 0 420#define LJ_HASJIT 0
375#else 421#else
376#define LJ_HASJIT 1 422#define LJ_HASJIT 1
@@ -383,6 +429,21 @@
383#define LJ_HASFFI 1 429#define LJ_HASFFI 1
384#endif 430#endif
385 431
432#if defined(LUAJIT_DISABLE_PROFILE)
433#define LJ_HASPROFILE 0
434#elif LJ_TARGET_POSIX
435#define LJ_HASPROFILE 1
436#define LJ_PROFILE_SIGPROF 1
437#elif LJ_TARGET_PS3
438#define LJ_HASPROFILE 1
439#define LJ_PROFILE_PTHREAD 1
440#elif LJ_TARGET_WINDOWS || LJ_TARGET_XBOX360
441#define LJ_HASPROFILE 1
442#define LJ_PROFILE_WTHREAD 1
443#else
444#define LJ_HASPROFILE 0
445#endif
446
386#ifndef LJ_ARCH_HASFPU 447#ifndef LJ_ARCH_HASFPU
387#define LJ_ARCH_HASFPU 1 448#define LJ_ARCH_HASFPU 1
388#endif 449#endif
@@ -415,11 +476,11 @@
415#define LJ_TARGET_UNALIGNED 0 476#define LJ_TARGET_UNALIGNED 0
416#endif 477#endif
417 478
418/* Various workarounds for embedded operating systems. */ 479/* Various workarounds for embedded operating systems or weak C runtimes. */
419#if (defined(__ANDROID__) && !defined(LJ_TARGET_X86ORX64)) || defined(__symbian__) || LJ_TARGET_XBOX360 480#if (defined(__ANDROID__) && !defined(LJ_TARGET_X86ORX64)) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS
420#define LUAJIT_NO_LOG2 481#define LUAJIT_NO_LOG2
421#endif 482#endif
422#if defined(__symbian__) 483#if defined(__symbian__) || LJ_TARGET_WINDOWS
423#define LUAJIT_NO_EXP2 484#define LUAJIT_NO_EXP2
424#endif 485#endif
425 486
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 804b4dc8..9db950a2 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -179,6 +179,12 @@ IRFLDEF(FLOFS)
179#error "Missing instruction emitter for target CPU" 179#error "Missing instruction emitter for target CPU"
180#endif 180#endif
181 181
182/* Generic load/store of register from/to stack slot. */
183#define emit_spload(as, ir, r, ofs) \
184 emit_loadofs(as, ir, (r), RID_SP, (ofs))
185#define emit_spstore(as, ir, r, ofs) \
186 emit_storeofs(as, ir, (r), RID_SP, (ofs))
187
182/* -- Register allocator debugging ---------------------------------------- */ 188/* -- Register allocator debugging ---------------------------------------- */
183 189
184/* #define LUAJIT_DEBUG_RA */ 190/* #define LUAJIT_DEBUG_RA */
@@ -336,7 +342,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
336 emit_getgl(as, r, jit_base); 342 emit_getgl(as, r, jit_base);
337 } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { 343 } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) {
338 lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */ 344 lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */
339 emit_getgl(as, r, jit_L); 345 emit_getgl(as, r, cur_L);
340#if LJ_64 346#if LJ_64
341 } else if (ir->o == IR_KINT64) { 347 } else if (ir->o == IR_KINT64) {
342 emit_loadu64(as, r, ir_kint64(ir)->u64); 348 emit_loadu64(as, r, ir_kint64(ir)->u64);
@@ -694,7 +700,7 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref)
694 emit_loadu64(as, dest, ir_kint64(ir)->u64); 700 emit_loadu64(as, dest, ir_kint64(ir)->u64);
695 return; 701 return;
696#endif 702#endif
697 } else { 703 } else if (ir->o != IR_KPRI) {
698 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || 704 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
699 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); 705 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL);
700 emit_loadi(as, dest, ir->i); 706 emit_loadi(as, dest, ir->i);
@@ -944,44 +950,6 @@ static void asm_snap_prep(ASMState *as)
944 950
945/* -- Miscellaneous helpers ----------------------------------------------- */ 951/* -- Miscellaneous helpers ----------------------------------------------- */
946 952
947/* Collect arguments from CALL* and CARG instructions. */
948static void asm_collectargs(ASMState *as, IRIns *ir,
949 const CCallInfo *ci, IRRef *args)
950{
951 uint32_t n = CCI_NARGS(ci);
952 lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */
953 if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
954 while (n-- > 1) {
955 ir = IR(ir->op1);
956 lua_assert(ir->o == IR_CARG);
957 args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
958 }
959 args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
960 lua_assert(IR(ir->op1)->o != IR_CARG);
961}
962
963/* Reconstruct CCallInfo flags for CALLX*. */
964static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
965{
966 uint32_t nargs = 0;
967 if (ir->op1 != REF_NIL) { /* Count number of arguments first. */
968 IRIns *ira = IR(ir->op1);
969 nargs++;
970 while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
971 }
972#if LJ_HASFFI
973 if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */
974 CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
975 CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
976 nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
977#if LJ_TARGET_X86
978 nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
979#endif
980 }
981#endif
982 return (nargs | (ir->t.irt << CCI_OTSHIFT));
983}
984
985/* Calculate stack adjustment. */ 953/* Calculate stack adjustment. */
986static int32_t asm_stack_adjust(ASMState *as) 954static int32_t asm_stack_adjust(ASMState *as)
987{ 955{
@@ -1066,6 +1034,259 @@ static void asm_gcstep(ASMState *as, IRIns *ir)
1066 as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */ 1034 as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */
1067} 1035}
1068 1036
1037/* -- Buffer operations --------------------------------------------------- */
1038
1039static void asm_tvptr(ASMState *as, Reg dest, IRRef ref);
1040
1041static void asm_bufhdr(ASMState *as, IRIns *ir)
1042{
1043 Reg sb = ra_dest(as, ir, RSET_GPR);
1044 if ((ir->op2 & IRBUFHDR_APPEND)) {
1045 /* Rematerialize const buffer pointer instead of likely spill. */
1046 IRIns *irp = IR(ir->op1);
1047 if (!(ra_hasreg(irp->r) || irp == ir-1 ||
1048 (irp == ir-2 && !ra_used(ir-1)))) {
1049 while (!(irp->o == IR_BUFHDR && !(irp->op2 & IRBUFHDR_APPEND)))
1050 irp = IR(irp->op1);
1051 if (irref_isk(irp->op1)) {
1052 ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR));
1053 ir = irp;
1054 }
1055 }
1056 } else {
1057 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
1058 /* Passing ir isn't strictly correct, but it's an IRT_P32, too. */
1059 emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p));
1060 emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b));
1061 }
1062#if LJ_TARGET_X86ORX64
1063 ra_left(as, sb, ir->op1);
1064#else
1065 ra_leftov(as, sb, ir->op1);
1066#endif
1067}
1068
1069static void asm_bufput(ASMState *as, IRIns *ir)
1070{
1071 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr];
1072 IRRef args[3];
1073 IRIns *irs;
1074 int kchar = -1;
1075 args[0] = ir->op1; /* SBuf * */
1076 args[1] = ir->op2; /* GCstr * */
1077 irs = IR(ir->op2);
1078 lua_assert(irt_isstr(irs->t));
1079 if (irs->o == IR_KGC) {
1080 GCstr *s = ir_kstr(irs);
1081 if (s->len == 1) { /* Optimize put of single-char string constant. */
1082 kchar = strdata(s)[0];
1083 args[1] = ASMREF_TMP1; /* int, truncated to char */
1084 ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
1085 }
1086 } else if (mayfuse(as, ir->op2) && ra_noreg(irs->r)) {
1087 if (irs->o == IR_TOSTR) { /* Fuse number to string conversions. */
1088 if (irs->op2 == IRTOSTR_NUM) {
1089 args[1] = ASMREF_TMP1; /* TValue * */
1090 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum];
1091 } else {
1092 lua_assert(irt_isinteger(IR(irs->op1)->t));
1093 args[1] = irs->op1; /* int */
1094 if (irs->op2 == IRTOSTR_INT)
1095 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint];
1096 else
1097 ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
1098 }
1099 } else if (irs->o == IR_SNEW) { /* Fuse string allocation. */
1100 args[1] = irs->op1; /* const void * */
1101 args[2] = irs->op2; /* MSize */
1102 ci = &lj_ir_callinfo[IRCALL_lj_buf_putmem];
1103 }
1104 }
1105 asm_setupresult(as, ir, ci); /* SBuf * */
1106 asm_gencall(as, ci, args);
1107 if (args[1] == ASMREF_TMP1) {
1108 Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
1109 if (kchar == -1)
1110 asm_tvptr(as, tmp, irs->op1);
1111 else
1112 ra_allockreg(as, kchar, tmp);
1113 }
1114}
1115
1116static void asm_bufstr(ASMState *as, IRIns *ir)
1117{
1118 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr];
1119 IRRef args[1];
1120 args[0] = ir->op1; /* SBuf *sb */
1121 as->gcsteps++;
1122 asm_setupresult(as, ir, ci); /* GCstr * */
1123 asm_gencall(as, ci, args);
1124}
1125
1126/* -- Type conversions ---------------------------------------------------- */
1127
1128static void asm_tostr(ASMState *as, IRIns *ir)
1129{
1130 const CCallInfo *ci;
1131 IRRef args[2];
1132 args[0] = ASMREF_L;
1133 as->gcsteps++;
1134 if (ir->op2 == IRTOSTR_NUM) {
1135 args[1] = ASMREF_TMP1; /* cTValue * */
1136 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_num];
1137 } else {
1138 args[1] = ir->op1; /* int32_t k */
1139 if (ir->op2 == IRTOSTR_INT)
1140 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_int];
1141 else
1142 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_char];
1143 }
1144 asm_setupresult(as, ir, ci); /* GCstr * */
1145 asm_gencall(as, ci, args);
1146 if (ir->op2 == IRTOSTR_NUM)
1147 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
1148}
1149
1150#if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86
1151static void asm_conv64(ASMState *as, IRIns *ir)
1152{
1153 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
1154 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
1155 IRCallID id;
1156 IRRef args[2];
1157 lua_assert((ir-1)->o == IR_CONV && ir->o == IR_HIOP);
1158 args[LJ_BE] = (ir-1)->op1;
1159 args[LJ_LE] = ir->op1;
1160 if (st == IRT_NUM || st == IRT_FLOAT) {
1161 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
1162 ir--;
1163 } else {
1164 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
1165 }
1166 {
1167#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
1168 CCallInfo cim = lj_ir_callinfo[id], *ci = &cim;
1169 cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */
1170#else
1171 const CCallInfo *ci = &lj_ir_callinfo[id];
1172#endif
1173 asm_setupresult(as, ir, ci);
1174 asm_gencall(as, ci, args);
1175 }
1176}
1177#endif
1178
1179/* -- Memory references --------------------------------------------------- */
1180
1181static void asm_newref(ASMState *as, IRIns *ir)
1182{
1183 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
1184 IRRef args[3];
1185 if (ir->r == RID_SINK)
1186 return;
1187 args[0] = ASMREF_L; /* lua_State *L */
1188 args[1] = ir->op1; /* GCtab *t */
1189 args[2] = ASMREF_TMP1; /* cTValue *key */
1190 asm_setupresult(as, ir, ci); /* TValue * */
1191 asm_gencall(as, ci, args);
1192 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
1193}
1194
1195static void asm_lref(ASMState *as, IRIns *ir)
1196{
1197 Reg r = ra_dest(as, ir, RSET_GPR);
1198#if LJ_TARGET_X86ORX64
1199 ra_left(as, r, ASMREF_L);
1200#else
1201 ra_leftov(as, r, ASMREF_L);
1202#endif
1203}
1204
1205/* -- Calls --------------------------------------------------------------- */
1206
1207/* Collect arguments from CALL* and CARG instructions. */
1208static void asm_collectargs(ASMState *as, IRIns *ir,
1209 const CCallInfo *ci, IRRef *args)
1210{
1211 uint32_t n = CCI_XNARGS(ci);
1212 lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */
1213 if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
1214 while (n-- > 1) {
1215 ir = IR(ir->op1);
1216 lua_assert(ir->o == IR_CARG);
1217 args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
1218 }
1219 args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
1220 lua_assert(IR(ir->op1)->o != IR_CARG);
1221}
1222
1223/* Reconstruct CCallInfo flags for CALLX*. */
1224static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
1225{
1226 uint32_t nargs = 0;
1227 if (ir->op1 != REF_NIL) { /* Count number of arguments first. */
1228 IRIns *ira = IR(ir->op1);
1229 nargs++;
1230 while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
1231 }
1232#if LJ_HASFFI
1233 if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */
1234 CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
1235 CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
1236 nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
1237#if LJ_TARGET_X86
1238 nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
1239#endif
1240 }
1241#endif
1242 return (nargs | (ir->t.irt << CCI_OTSHIFT));
1243}
1244
1245static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
1246{
1247 const CCallInfo *ci = &lj_ir_callinfo[id];
1248 IRRef args[2];
1249 args[0] = ir->op1;
1250 args[1] = ir->op2;
1251 asm_setupresult(as, ir, ci);
1252 asm_gencall(as, ci, args);
1253}
1254
1255static void asm_call(ASMState *as, IRIns *ir)
1256{
1257 IRRef args[CCI_NARGS_MAX];
1258 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
1259 asm_collectargs(as, ir, ci, args);
1260 asm_setupresult(as, ir, ci);
1261 asm_gencall(as, ci, args);
1262}
1263
1264#if !LJ_SOFTFP
1265static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
1266{
1267 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
1268 IRRef args[2];
1269 args[0] = lref;
1270 args[1] = rref;
1271 asm_setupresult(as, ir, ci);
1272 asm_gencall(as, ci, args);
1273}
1274
1275static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
1276{
1277 IRIns *irp = IR(ir->op1);
1278 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
1279 IRIns *irpp = IR(irp->op1);
1280 if (irpp == ir-2 && irpp->o == IR_FPMATH &&
1281 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
1282 asm_fppow(as, ir, irpp->op1, irp->op2);
1283 return 1;
1284 }
1285 }
1286 return 0;
1287}
1288#endif
1289
1069/* -- PHI and loop handling ----------------------------------------------- */ 1290/* -- PHI and loop handling ----------------------------------------------- */
1070 1291
1071/* Break a PHI cycle by renaming to a free register (evict if needed). */ 1292/* Break a PHI cycle by renaming to a free register (evict if needed). */
@@ -1339,6 +1560,129 @@ static void asm_loop(ASMState *as)
1339#error "Missing assembler for target CPU" 1560#error "Missing assembler for target CPU"
1340#endif 1561#endif
1341 1562
1563/* -- Instruction dispatch ------------------------------------------------ */
1564
1565/* Assemble a single instruction. */
1566static void asm_ir(ASMState *as, IRIns *ir)
1567{
1568 switch ((IROp)ir->o) {
1569 /* Miscellaneous ops. */
1570 case IR_LOOP: asm_loop(as); break;
1571 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
1572 case IR_USE:
1573 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1574 case IR_PHI: asm_phi(as, ir); break;
1575 case IR_HIOP: asm_hiop(as, ir); break;
1576 case IR_GCSTEP: asm_gcstep(as, ir); break;
1577 case IR_PROF: asm_prof(as, ir); break;
1578
1579 /* Guarded assertions. */
1580 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1581 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1582 case IR_ABC:
1583 asm_comp(as, ir);
1584 break;
1585 case IR_EQ: case IR_NE:
1586 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
1587 as->curins--;
1588 asm_href(as, ir-1, (IROp)ir->o);
1589 } else {
1590 asm_equal(as, ir);
1591 }
1592 break;
1593
1594 case IR_RETF: asm_retf(as, ir); break;
1595
1596 /* Bit ops. */
1597 case IR_BNOT: asm_bnot(as, ir); break;
1598 case IR_BSWAP: asm_bswap(as, ir); break;
1599 case IR_BAND: asm_band(as, ir); break;
1600 case IR_BOR: asm_bor(as, ir); break;
1601 case IR_BXOR: asm_bxor(as, ir); break;
1602 case IR_BSHL: asm_bshl(as, ir); break;
1603 case IR_BSHR: asm_bshr(as, ir); break;
1604 case IR_BSAR: asm_bsar(as, ir); break;
1605 case IR_BROL: asm_brol(as, ir); break;
1606 case IR_BROR: asm_bror(as, ir); break;
1607
1608 /* Arithmetic ops. */
1609 case IR_ADD: asm_add(as, ir); break;
1610 case IR_SUB: asm_sub(as, ir); break;
1611 case IR_MUL: asm_mul(as, ir); break;
1612 case IR_DIV: asm_div(as, ir); break;
1613 case IR_MOD: asm_mod(as, ir); break;
1614 case IR_POW: asm_pow(as, ir); break;
1615 case IR_NEG: asm_neg(as, ir); break;
1616 case IR_ABS: asm_abs(as, ir); break;
1617 case IR_ATAN2: asm_atan2(as, ir); break;
1618 case IR_LDEXP: asm_ldexp(as, ir); break;
1619 case IR_MIN: asm_min(as, ir); break;
1620 case IR_MAX: asm_max(as, ir); break;
1621 case IR_FPMATH: asm_fpmath(as, ir); break;
1622
1623 /* Overflow-checking arithmetic ops. */
1624 case IR_ADDOV: asm_addov(as, ir); break;
1625 case IR_SUBOV: asm_subov(as, ir); break;
1626 case IR_MULOV: asm_mulov(as, ir); break;
1627
1628 /* Memory references. */
1629 case IR_AREF: asm_aref(as, ir); break;
1630 case IR_HREF: asm_href(as, ir, 0); break;
1631 case IR_HREFK: asm_hrefk(as, ir); break;
1632 case IR_NEWREF: asm_newref(as, ir); break;
1633 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
1634 case IR_FREF: asm_fref(as, ir); break;
1635 case IR_STRREF: asm_strref(as, ir); break;
1636 case IR_LREF: asm_lref(as, ir); break;
1637
1638 /* Loads and stores. */
1639 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
1640 asm_ahuvload(as, ir);
1641 break;
1642 case IR_FLOAD: asm_fload(as, ir); break;
1643 case IR_XLOAD: asm_xload(as, ir); break;
1644 case IR_SLOAD: asm_sload(as, ir); break;
1645
1646 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
1647 case IR_FSTORE: asm_fstore(as, ir); break;
1648 case IR_XSTORE: asm_xstore(as, ir); break;
1649
1650 /* Allocations. */
1651 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
1652 case IR_TNEW: asm_tnew(as, ir); break;
1653 case IR_TDUP: asm_tdup(as, ir); break;
1654 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
1655
1656 /* Buffer operations. */
1657 case IR_BUFHDR: asm_bufhdr(as, ir); break;
1658 case IR_BUFPUT: asm_bufput(as, ir); break;
1659 case IR_BUFSTR: asm_bufstr(as, ir); break;
1660
1661 /* Write barriers. */
1662 case IR_TBAR: asm_tbar(as, ir); break;
1663 case IR_OBAR: asm_obar(as, ir); break;
1664
1665 /* Type conversions. */
1666 case IR_TOBIT: asm_tobit(as, ir); break;
1667 case IR_CONV: asm_conv(as, ir); break;
1668 case IR_TOSTR: asm_tostr(as, ir); break;
1669 case IR_STRTO: asm_strto(as, ir); break;
1670
1671 /* Calls. */
1672 case IR_CALLA:
1673 as->gcsteps++;
1674 /* fallthrough */
1675 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
1676 case IR_CALLXS: asm_callx(as, ir); break;
1677 case IR_CARG: break;
1678
1679 default:
1680 setintV(&as->J->errinfo, ir->o);
1681 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
1682 break;
1683 }
1684}
1685
1342/* -- Head of trace ------------------------------------------------------- */ 1686/* -- Head of trace ------------------------------------------------------- */
1343 1687
1344/* Head of a root trace. */ 1688/* Head of a root trace. */
@@ -1569,7 +1913,7 @@ static void asm_tail_link(ASMState *as)
1569 mres = (int32_t)(snap->nslots - baseslot); 1913 mres = (int32_t)(snap->nslots - baseslot);
1570 switch (bc_op(*pc)) { 1914 switch (bc_op(*pc)) {
1571 case BC_CALLM: case BC_CALLMT: 1915 case BC_CALLM: case BC_CALLMT:
1572 mres -= (int32_t)(1 + bc_a(*pc) + bc_c(*pc)); break; 1916 mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break;
1573 case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break; 1917 case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break;
1574 case BC_TSETM: mres -= (int32_t)bc_a(*pc); break; 1918 case BC_TSETM: mres -= (int32_t)bc_a(*pc); break;
1575 default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break; 1919 default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break;
@@ -1677,7 +2021,7 @@ static void asm_setup_regsp(ASMState *as)
1677 as->modset |= RSET_SCRATCH; 2021 as->modset |= RSET_SCRATCH;
1678 continue; 2022 continue;
1679 } 2023 }
1680 case IR_CALLN: case IR_CALLL: case IR_CALLS: { 2024 case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: {
1681 const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; 2025 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
1682 ir->prev = asm_setup_call_slots(as, ir, ci); 2026 ir->prev = asm_setup_call_slots(as, ir, ci);
1683 if (inloop) 2027 if (inloop)
@@ -1722,10 +2066,20 @@ static void asm_setup_regsp(ASMState *as)
1722 /* fallthrough */ 2066 /* fallthrough */
1723#endif 2067#endif
1724 /* C calls evict all scratch regs and return results in RID_RET. */ 2068 /* C calls evict all scratch regs and return results in RID_RET. */
1725 case IR_SNEW: case IR_XSNEW: case IR_NEWREF: 2069 case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT:
1726 if (REGARG_NUMGPR < 3 && as->evenspill < 3) 2070 if (REGARG_NUMGPR < 3 && as->evenspill < 3)
1727 as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ 2071 as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */
1728 case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR: 2072#if LJ_TARGET_X86 && LJ_HASFFI
2073 if (0) {
2074 case IR_CNEW:
2075 if (ir->op2 != REF_NIL && as->evenspill < 4)
2076 as->evenspill = 4; /* lj_cdata_newv needs 4 args. */
2077 }
2078#else
2079 case IR_CNEW:
2080#endif
2081 case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR:
2082 case IR_BUFSTR:
1729 ir->prev = REGSP_HINT(RID_RET); 2083 ir->prev = REGSP_HINT(RID_RET);
1730 if (inloop) 2084 if (inloop)
1731 as->modset = RSET_SCRATCH; 2085 as->modset = RSET_SCRATCH;
@@ -1734,21 +2088,26 @@ static void asm_setup_regsp(ASMState *as)
1734 if (inloop) 2088 if (inloop)
1735 as->modset = RSET_SCRATCH; 2089 as->modset = RSET_SCRATCH;
1736 break; 2090 break;
1737#if !LJ_TARGET_X86ORX64 && !LJ_SOFTFP 2091#if !LJ_SOFTFP
1738 case IR_ATAN2: case IR_LDEXP: 2092 case IR_ATAN2:
2093#if LJ_TARGET_X86
2094 if (as->evenspill < 4) /* Leave room to call atan2(). */
2095 as->evenspill = 4;
2096#endif
2097#if !LJ_TARGET_X86ORX64
2098 case IR_LDEXP:
2099#endif
1739#endif 2100#endif
1740 case IR_POW: 2101 case IR_POW:
1741 if (!LJ_SOFTFP && irt_isnum(ir->t)) { 2102 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
1742#if LJ_TARGET_X86ORX64
1743 ir->prev = REGSP_HINT(RID_XMM0);
1744 if (inloop) 2103 if (inloop)
1745 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); 2104 as->modset |= RSET_SCRATCH;
2105#if LJ_TARGET_X86
2106 break;
1746#else 2107#else
1747 ir->prev = REGSP_HINT(RID_FPRET); 2108 ir->prev = REGSP_HINT(RID_FPRET);
1748 if (inloop)
1749 as->modset |= RSET_SCRATCH;
1750#endif
1751 continue; 2109 continue;
2110#endif
1752 } 2111 }
1753 /* fallthrough for integer POW */ 2112 /* fallthrough for integer POW */
1754 case IR_DIV: case IR_MOD: 2113 case IR_DIV: case IR_MOD:
@@ -1761,26 +2120,25 @@ static void asm_setup_regsp(ASMState *as)
1761 break; 2120 break;
1762 case IR_FPMATH: 2121 case IR_FPMATH:
1763#if LJ_TARGET_X86ORX64 2122#if LJ_TARGET_X86ORX64
1764 if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */ 2123 if (ir->op2 <= IRFPM_TRUNC) {
1765 ir->prev = REGSP_HINT(RID_XMM0); 2124 if (!(as->flags & JIT_F_SSE4_1)) {
1766#if !LJ_64 2125 ir->prev = REGSP_HINT(RID_XMM0);
1767 if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */ 2126 if (inloop)
2127 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
2128 continue;
2129 }
2130 break;
2131 } else if (ir->op2 == IRFPM_EXP2 && !LJ_64) {
2132 if (as->evenspill < 4) /* Leave room to call pow(). */
1768 as->evenspill = 4; 2133 as->evenspill = 4;
1769#endif
1770 if (inloop)
1771 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
1772 continue;
1773 } else if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) {
1774 ir->prev = REGSP_HINT(RID_XMM0);
1775 if (inloop)
1776 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
1777 continue;
1778 } 2134 }
2135#endif
2136 if (inloop)
2137 as->modset |= RSET_SCRATCH;
2138#if LJ_TARGET_X86
1779 break; 2139 break;
1780#else 2140#else
1781 ir->prev = REGSP_HINT(RID_FPRET); 2141 ir->prev = REGSP_HINT(RID_FPRET);
1782 if (inloop)
1783 as->modset |= RSET_SCRATCH;
1784 continue; 2142 continue;
1785#endif 2143#endif
1786#if LJ_TARGET_X86ORX64 2144#if LJ_TARGET_X86ORX64
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h
index 9e4cf436..81843caf 100644
--- a/src/lj_asm_arm.h
+++ b/src/lj_asm_arm.h
@@ -338,7 +338,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air)
338/* Generate a call to a C function. */ 338/* Generate a call to a C function. */
339static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 339static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
340{ 340{
341 uint32_t n, nargs = CCI_NARGS(ci); 341 uint32_t n, nargs = CCI_XNARGS(ci);
342 int32_t ofs = 0; 342 int32_t ofs = 0;
343#if LJ_SOFTFP 343#if LJ_SOFTFP
344 Reg gpr = REGARG_FIRSTGPR; 344 Reg gpr = REGARG_FIRSTGPR;
@@ -453,15 +453,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
453 UNUSED(ci); 453 UNUSED(ci);
454} 454}
455 455
456static void asm_call(ASMState *as, IRIns *ir)
457{
458 IRRef args[CCI_NARGS_MAX];
459 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
460 asm_collectargs(as, ir, ci, args);
461 asm_setupresult(as, ir, ci);
462 asm_gencall(as, ci, args);
463}
464
465static void asm_callx(ASMState *as, IRIns *ir) 456static void asm_callx(ASMState *as, IRIns *ir)
466{ 457{
467 IRRef args[CCI_NARGS_MAX*2]; 458 IRRef args[CCI_NARGS_MAX*2];
@@ -490,7 +481,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
490{ 481{
491 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); 482 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
492 void *pc = ir_kptr(IR(ir->op2)); 483 void *pc = ir_kptr(IR(ir->op2));
493 int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); 484 int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
494 as->topslot -= (BCReg)delta; 485 as->topslot -= (BCReg)delta;
495 if ((int32_t)as->topslot < 0) as->topslot = 0; 486 if ((int32_t)as->topslot < 0) as->topslot = 0;
496 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ 487 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
@@ -529,6 +520,8 @@ static void asm_tobit(ASMState *as, IRIns *ir)
529 emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); 520 emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
530 emit_dnm(as, ARMI_VADD_D, (tmp & 15), (left & 15), (right & 15)); 521 emit_dnm(as, ARMI_VADD_D, (tmp & 15), (left & 15), (right & 15));
531} 522}
523#else
524#define asm_tobit(as, ir) lua_assert(0)
532#endif 525#endif
533 526
534static void asm_conv(ASMState *as, IRIns *ir) 527static void asm_conv(ASMState *as, IRIns *ir)
@@ -601,31 +594,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
601 } 594 }
602} 595}
603 596
604#if !LJ_SOFTFP && LJ_HASFFI
605static void asm_conv64(ASMState *as, IRIns *ir)
606{
607 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
608 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
609 IRCallID id;
610 CCallInfo ci;
611 IRRef args[2];
612 args[0] = (ir-1)->op1;
613 args[1] = ir->op1;
614 if (st == IRT_NUM || st == IRT_FLOAT) {
615 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
616 ir--;
617 } else {
618 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
619 }
620 ci = lj_ir_callinfo[id];
621#if !LJ_ABI_SOFTFP
622 ci.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */
623#endif
624 asm_setupresult(as, ir, &ci);
625 asm_gencall(as, &ci, args);
626}
627#endif
628
629static void asm_strto(ASMState *as, IRIns *ir) 597static void asm_strto(ASMState *as, IRIns *ir)
630{ 598{
631 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; 599 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
@@ -689,6 +657,8 @@ static void asm_strto(ASMState *as, IRIns *ir)
689 emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR); 657 emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR);
690} 658}
691 659
660/* -- Memory references --------------------------------------------------- */
661
692/* Get pointer to TValue. */ 662/* Get pointer to TValue. */
693static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) 663static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
694{ 664{
@@ -714,7 +684,7 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
714 Reg src = ra_alloc1(as, ref, allow); 684 Reg src = ra_alloc1(as, ref, allow);
715 emit_lso(as, ARMI_STR, src, RID_SP, 0); 685 emit_lso(as, ARMI_STR, src, RID_SP, 0);
716 } 686 }
717 if ((ir+1)->o == IR_HIOP) 687 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
718 type = ra_alloc1(as, ref+1, allow); 688 type = ra_alloc1(as, ref+1, allow);
719 else 689 else
720 type = ra_allock(as, irt_toitype(ir->t), allow); 690 type = ra_allock(as, irt_toitype(ir->t), allow);
@@ -722,27 +692,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
722 } 692 }
723} 693}
724 694
725static void asm_tostr(ASMState *as, IRIns *ir)
726{
727 IRRef args[2];
728 args[0] = ASMREF_L;
729 as->gcsteps++;
730 if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
731 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
732 args[1] = ASMREF_TMP1; /* const lua_Number * */
733 asm_setupresult(as, ir, ci); /* GCstr * */
734 asm_gencall(as, ci, args);
735 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
736 } else {
737 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
738 args[1] = ir->op1; /* int32_t k */
739 asm_setupresult(as, ir, ci); /* GCstr * */
740 asm_gencall(as, ci, args);
741 }
742}
743
744/* -- Memory references --------------------------------------------------- */
745
746static void asm_aref(ASMState *as, IRIns *ir) 695static void asm_aref(ASMState *as, IRIns *ir)
747{ 696{
748 Reg dest = ra_dest(as, ir, RSET_GPR); 697 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -960,20 +909,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
960 emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR); 909 emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR);
961} 910}
962 911
963static void asm_newref(ASMState *as, IRIns *ir)
964{
965 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
966 IRRef args[3];
967 if (ir->r == RID_SINK)
968 return;
969 args[0] = ASMREF_L; /* lua_State *L */
970 args[1] = ir->op1; /* GCtab *t */
971 args[2] = ASMREF_TMP1; /* cTValue *key */
972 asm_setupresult(as, ir, ci); /* TValue * */
973 asm_gencall(as, ci, args);
974 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
975}
976
977static void asm_uref(ASMState *as, IRIns *ir) 912static void asm_uref(ASMState *as, IRIns *ir)
978{ 913{
979 /* NYI: Check that UREFO is still open and not aliasing a slot. */ 914 /* NYI: Check that UREFO is still open and not aliasing a slot. */
@@ -1106,7 +1041,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
1106 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); 1041 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
1107} 1042}
1108 1043
1109static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) 1044static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
1110{ 1045{
1111 if (ir->r != RID_SINK) { 1046 if (ir->r != RID_SINK) {
1112 Reg src = ra_alloc1(as, ir->op2, 1047 Reg src = ra_alloc1(as, ir->op2,
@@ -1116,6 +1051,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
1116 } 1051 }
1117} 1052}
1118 1053
1054#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
1055
1119static void asm_ahuvload(ASMState *as, IRIns *ir) 1056static void asm_ahuvload(ASMState *as, IRIns *ir)
1120{ 1057{
1121 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); 1058 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
@@ -1273,19 +1210,16 @@ dotypecheck:
1273static void asm_cnew(ASMState *as, IRIns *ir) 1210static void asm_cnew(ASMState *as, IRIns *ir)
1274{ 1211{
1275 CTState *cts = ctype_ctsG(J2G(as->J)); 1212 CTState *cts = ctype_ctsG(J2G(as->J));
1276 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 1213 CTypeID id = (CTypeID)IR(ir->op1)->i;
1277 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 1214 CTSize sz;
1278 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 1215 CTInfo info = lj_ctype_info(cts, id, &sz);
1279 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1216 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1280 IRRef args[2]; 1217 IRRef args[4];
1281 RegSet allow = (RSET_GPR & ~RSET_SCRATCH); 1218 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1282 RegSet drop = RSET_SCRATCH; 1219 RegSet drop = RSET_SCRATCH;
1283 lua_assert(sz != CTSIZE_INVALID); 1220 lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
1284 1221
1285 args[0] = ASMREF_L; /* lua_State *L */
1286 args[1] = ASMREF_TMP1; /* MSize size */
1287 as->gcsteps++; 1222 as->gcsteps++;
1288
1289 if (ra_hasreg(ir->r)) 1223 if (ra_hasreg(ir->r))
1290 rset_clear(drop, ir->r); /* Dest reg handled below. */ 1224 rset_clear(drop, ir->r); /* Dest reg handled below. */
1291 ra_evictset(as, drop); 1225 ra_evictset(as, drop);
@@ -1307,16 +1241,28 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1307 if (ofs == sizeof(GCcdata)) break; 1241 if (ofs == sizeof(GCcdata)) break;
1308 ofs -= 4; ir--; 1242 ofs -= 4; ir--;
1309 } 1243 }
1244 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1245 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1246 args[0] = ASMREF_L; /* lua_State *L */
1247 args[1] = ir->op1; /* CTypeID id */
1248 args[2] = ir->op2; /* CTSize sz */
1249 args[3] = ASMREF_TMP1; /* CTSize align */
1250 asm_gencall(as, ci, args);
1251 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1252 return;
1310 } 1253 }
1254
1311 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ 1255 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1312 { 1256 {
1313 uint32_t k = emit_isk12(ARMI_MOV, ctypeid); 1257 uint32_t k = emit_isk12(ARMI_MOV, id);
1314 Reg r = k ? RID_R1 : ra_allock(as, ctypeid, allow); 1258 Reg r = k ? RID_R1 : ra_allock(as, id, allow);
1315 emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); 1259 emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
1316 emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); 1260 emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
1317 emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP); 1261 emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP);
1318 if (k) emit_d(as, ARMI_MOV^k, RID_R1); 1262 if (k) emit_d(as, ARMI_MOV^k, RID_R1);
1319 } 1263 }
1264 args[0] = ASMREF_L; /* lua_State *L */
1265 args[1] = ASMREF_TMP1; /* MSize size */
1320 asm_gencall(as, ci, args); 1266 asm_gencall(as, ci, args);
1321 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), 1267 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1322 ra_releasetmp(as, ASMREF_TMP1)); 1268 ra_releasetmp(as, ASMREF_TMP1));
@@ -1393,24 +1339,41 @@ static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai)
1393 emit_dm(as, ai, (dest & 15), (left & 15)); 1339 emit_dm(as, ai, (dest & 15), (left & 15));
1394} 1340}
1395 1341
1396static int asm_fpjoin_pow(ASMState *as, IRIns *ir) 1342static void asm_callround(ASMState *as, IRIns *ir, int id)
1397{ 1343{
1398 IRIns *irp = IR(ir->op1); 1344 /* The modified regs must match with the *.dasc implementation. */
1399 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { 1345 RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
1400 IRIns *irpp = IR(irp->op1); 1346 RID2RSET(RID_R3)|RID2RSET(RID_R12);
1401 if (irpp == ir-2 && irpp->o == IR_FPMATH && 1347 RegSet of;
1402 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { 1348 Reg dest, src;
1403 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; 1349 ra_evictset(as, drop);
1404 IRRef args[2]; 1350 dest = ra_dest(as, ir, RSET_FPR);
1405 args[0] = irpp->op1; 1351 emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
1406 args[1] = irp->op2; 1352 emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
1407 asm_setupresult(as, ir, ci); 1353 id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
1408 asm_gencall(as, ci, args); 1354 (void *)lj_vm_trunc_sf);
1409 return 1; 1355 /* Workaround to protect argument GPRs from being used for remat. */
1410 } 1356 of = as->freeset;
1411 } 1357 as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
1412 return 0; 1358 as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
1359 src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */
1360 as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
1361 emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
1362}
1363
1364static void asm_fpmath(ASMState *as, IRIns *ir)
1365{
1366 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
1367 return;
1368 if (ir->op2 <= IRFPM_TRUNC)
1369 asm_callround(as, ir, ir->op2);
1370 else if (ir->op2 == IRFPM_SQRT)
1371 asm_fpunary(as, ir, ARMI_VSQRT_D);
1372 else
1373 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1413} 1374}
1375#else
1376#define asm_fpmath(as, ir) lua_assert(0)
1414#endif 1377#endif
1415 1378
1416static int asm_swapops(ASMState *as, IRRef lref, IRRef rref) 1379static int asm_swapops(ASMState *as, IRRef lref, IRRef rref)
@@ -1460,32 +1423,6 @@ static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai)
1460 asm_intop(as, ir, ai); 1423 asm_intop(as, ir, ai);
1461} 1424}
1462 1425
1463static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
1464{
1465 if (as->flagmcp == as->mcp) { /* Try to drop cmp r, #0. */
1466 uint32_t cc = (as->mcp[1] >> 28);
1467 as->flagmcp = NULL;
1468 if (cc <= CC_NE) {
1469 as->mcp++;
1470 ai |= ARMI_S;
1471 } else if (cc == CC_GE) {
1472 *++as->mcp ^= ((CC_GE^CC_PL) << 28);
1473 ai |= ARMI_S;
1474 } else if (cc == CC_LT) {
1475 *++as->mcp ^= ((CC_LT^CC_MI) << 28);
1476 ai |= ARMI_S;
1477 } /* else: other conds don't work with bit ops. */
1478 }
1479 if (ir->op2 == 0) {
1480 Reg dest = ra_dest(as, ir, RSET_GPR);
1481 uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
1482 emit_d(as, ai^m, dest);
1483 } else {
1484 /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
1485 asm_intop(as, ir, ai);
1486 }
1487}
1488
1489static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai) 1426static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai)
1490{ 1427{
1491 Reg dest = ra_dest(as, ir, RSET_GPR); 1428 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1551,6 +1488,26 @@ static void asm_mul(ASMState *as, IRIns *ir)
1551 asm_intmul(as, ir); 1488 asm_intmul(as, ir);
1552} 1489}
1553 1490
1491#define asm_addov(as, ir) asm_add(as, ir)
1492#define asm_subov(as, ir) asm_sub(as, ir)
1493#define asm_mulov(as, ir) asm_mul(as, ir)
1494
1495#if LJ_SOFTFP
1496#define asm_div(as, ir) lua_assert(0)
1497#define asm_pow(as, ir) lua_assert(0)
1498#define asm_abs(as, ir) lua_assert(0)
1499#define asm_atan2(as, ir) lua_assert(0)
1500#define asm_ldexp(as, ir) lua_assert(0)
1501#else
1502#define asm_div(as, ir) asm_fparith(as, ir, ARMI_VDIV_D)
1503#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
1504#define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D)
1505#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
1506#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
1507#endif
1508
1509#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi)
1510
1554static void asm_neg(ASMState *as, IRIns *ir) 1511static void asm_neg(ASMState *as, IRIns *ir)
1555{ 1512{
1556#if !LJ_SOFTFP 1513#if !LJ_SOFTFP
@@ -1562,41 +1519,35 @@ static void asm_neg(ASMState *as, IRIns *ir)
1562 asm_intneg(as, ir, ARMI_RSB); 1519 asm_intneg(as, ir, ARMI_RSB);
1563} 1520}
1564 1521
1565static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) 1522static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
1566{ 1523{
1567 const CCallInfo *ci = &lj_ir_callinfo[id]; 1524 if (as->flagmcp == as->mcp) { /* Try to drop cmp r, #0. */
1568 IRRef args[2]; 1525 uint32_t cc = (as->mcp[1] >> 28);
1569 args[0] = ir->op1; 1526 as->flagmcp = NULL;
1570 args[1] = ir->op2; 1527 if (cc <= CC_NE) {
1571 asm_setupresult(as, ir, ci); 1528 as->mcp++;
1572 asm_gencall(as, ci, args); 1529 ai |= ARMI_S;
1530 } else if (cc == CC_GE) {
1531 *++as->mcp ^= ((CC_GE^CC_PL) << 28);
1532 ai |= ARMI_S;
1533 } else if (cc == CC_LT) {
1534 *++as->mcp ^= ((CC_LT^CC_MI) << 28);
1535 ai |= ARMI_S;
1536 } /* else: other conds don't work with bit ops. */
1537 }
1538 if (ir->op2 == 0) {
1539 Reg dest = ra_dest(as, ir, RSET_GPR);
1540 uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
1541 emit_d(as, ai^m, dest);
1542 } else {
1543 /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
1544 asm_intop(as, ir, ai);
1545 }
1573} 1546}
1574 1547
1575#if !LJ_SOFTFP 1548#define asm_bnot(as, ir) asm_bitop(as, ir, ARMI_MVN)
1576static void asm_callround(ASMState *as, IRIns *ir, int id)
1577{
1578 /* The modified regs must match with the *.dasc implementation. */
1579 RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
1580 RID2RSET(RID_R3)|RID2RSET(RID_R12);
1581 RegSet of;
1582 Reg dest, src;
1583 ra_evictset(as, drop);
1584 dest = ra_dest(as, ir, RSET_FPR);
1585 emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
1586 emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
1587 id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
1588 (void *)lj_vm_trunc_sf);
1589 /* Workaround to protect argument GPRs from being used for remat. */
1590 of = as->freeset;
1591 as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
1592 as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
1593 src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */
1594 as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
1595 emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
1596}
1597#endif
1598 1549
1599static void asm_bitswap(ASMState *as, IRIns *ir) 1550static void asm_bswap(ASMState *as, IRIns *ir)
1600{ 1551{
1601 Reg dest = ra_dest(as, ir, RSET_GPR); 1552 Reg dest = ra_dest(as, ir, RSET_GPR);
1602 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 1553 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
@@ -1613,6 +1564,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1613 } 1564 }
1614} 1565}
1615 1566
1567#define asm_band(as, ir) asm_bitop(as, ir, ARMI_AND)
1568#define asm_bor(as, ir) asm_bitop(as, ir, ARMI_ORR)
1569#define asm_bxor(as, ir) asm_bitop(as, ir, ARMI_EOR)
1570
1616static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) 1571static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
1617{ 1572{
1618 if (irref_isk(ir->op2)) { /* Constant shifts. */ 1573 if (irref_isk(ir->op2)) { /* Constant shifts. */
@@ -1630,6 +1585,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
1630 } 1585 }
1631} 1586}
1632 1587
1588#define asm_bshl(as, ir) asm_bitshift(as, ir, ARMSH_LSL)
1589#define asm_bshr(as, ir) asm_bitshift(as, ir, ARMSH_LSR)
1590#define asm_bsar(as, ir) asm_bitshift(as, ir, ARMSH_ASR)
1591#define asm_bror(as, ir) asm_bitshift(as, ir, ARMSH_ROR)
1592#define asm_brol(as, ir) lua_assert(0)
1593
1633static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) 1594static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
1634{ 1595{
1635 uint32_t kcmp = 0, kmov = 0; 1596 uint32_t kcmp = 0, kmov = 0;
@@ -1703,6 +1664,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc)
1703 asm_intmin_max(as, ir, cc); 1664 asm_intmin_max(as, ir, cc);
1704} 1665}
1705 1666
1667#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_HI)
1668#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LO)
1669
1706/* -- Comparisons --------------------------------------------------------- */ 1670/* -- Comparisons --------------------------------------------------------- */
1707 1671
1708/* Map of comparisons to flags. ORDER IR. */ 1672/* Map of comparisons to flags. ORDER IR. */
@@ -1818,6 +1782,18 @@ notst:
1818 as->flagmcp = as->mcp; /* Allow elimination of the compare. */ 1782 as->flagmcp = as->mcp; /* Allow elimination of the compare. */
1819} 1783}
1820 1784
1785static void asm_comp(ASMState *as, IRIns *ir)
1786{
1787#if !LJ_SOFTFP
1788 if (irt_isnum(ir->t))
1789 asm_fpcomp(as, ir);
1790 else
1791#endif
1792 asm_intcomp(as, ir);
1793}
1794
1795#define asm_equal(as, ir) asm_comp(as, ir)
1796
1821#if LJ_HASFFI 1797#if LJ_HASFFI
1822/* 64 bit integer comparisons. */ 1798/* 64 bit integer comparisons. */
1823static void asm_int64comp(ASMState *as, IRIns *ir) 1799static void asm_int64comp(ASMState *as, IRIns *ir)
@@ -1892,7 +1868,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1892#endif 1868#endif
1893 } else if ((ir-1)->o == IR_XSTORE) { 1869 } else if ((ir-1)->o == IR_XSTORE) {
1894 if ((ir-1)->r != RID_SINK) 1870 if ((ir-1)->r != RID_SINK)
1895 asm_xstore(as, ir, 4); 1871 asm_xstore_(as, ir, 4);
1896 return; 1872 return;
1897 } 1873 }
1898 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ 1874 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
@@ -1940,6 +1916,16 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1940#endif 1916#endif
1941} 1917}
1942 1918
1919/* -- Profiling ----------------------------------------------------------- */
1920
1921static void asm_prof(ASMState *as, IRIns *ir)
1922{
1923 UNUSED(ir);
1924 asm_guardcc(as, CC_NE);
1925 emit_n(as, ARMI_TST|ARMI_K12|HOOK_PROFILE, RID_TMP);
1926 emit_lsptr(as, ARMI_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask);
1927}
1928
1943/* -- Stack handling ------------------------------------------------------ */ 1929/* -- Stack handling ------------------------------------------------------ */
1944 1930
1945/* Check Lua stack size for overflow. Use exit handler as fallback. */ 1931/* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -1969,7 +1955,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1969 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, 1955 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP,
1970 (int32_t)offsetof(lua_State, maxstack)); 1956 (int32_t)offsetof(lua_State, maxstack));
1971 if (irp) { /* Must not spill arbitrary registers in head of side trace. */ 1957 if (irp) { /* Must not spill arbitrary registers in head of side trace. */
1972 int32_t i = i32ptr(&J2G(as->J)->jit_L); 1958 int32_t i = i32ptr(&J2G(as->J)->cur_L);
1973 if (ra_hasspill(irp->s)) 1959 if (ra_hasspill(irp->s))
1974 emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s)); 1960 emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s));
1975 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095)); 1961 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095));
@@ -1977,7 +1963,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1977 emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */ 1963 emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */
1978 emit_loadi(as, RID_TMP, (i & ~4095)); 1964 emit_loadi(as, RID_TMP, (i & ~4095));
1979 } else { 1965 } else {
1980 emit_getgl(as, RID_TMP, jit_L); 1966 emit_getgl(as, RID_TMP, cur_L);
1981 } 1967 }
1982} 1968}
1983 1969
@@ -2086,13 +2072,13 @@ static void asm_loop_fixup(ASMState *as)
2086 2072
2087/* -- Head of trace ------------------------------------------------------- */ 2073/* -- Head of trace ------------------------------------------------------- */
2088 2074
2089/* Reload L register from g->jit_L. */ 2075/* Reload L register from g->cur_L. */
2090static void asm_head_lreg(ASMState *as) 2076static void asm_head_lreg(ASMState *as)
2091{ 2077{
2092 IRIns *ir = IR(ASMREF_L); 2078 IRIns *ir = IR(ASMREF_L);
2093 if (ra_used(ir)) { 2079 if (ra_used(ir)) {
2094 Reg r = ra_dest(as, ir, RSET_GPR); 2080 Reg r = ra_dest(as, ir, RSET_GPR);
2095 emit_getgl(as, r, jit_L); 2081 emit_getgl(as, r, cur_L);
2096 ra_evictk(as); 2082 ra_evictk(as);
2097 } 2083 }
2098} 2084}
@@ -2163,143 +2149,13 @@ static void asm_tail_prep(ASMState *as)
2163 *p = 0; /* Prevent load/store merging. */ 2149 *p = 0; /* Prevent load/store merging. */
2164} 2150}
2165 2151
2166/* -- Instruction dispatch ------------------------------------------------ */
2167
2168/* Assemble a single instruction. */
2169static void asm_ir(ASMState *as, IRIns *ir)
2170{
2171 switch ((IROp)ir->o) {
2172 /* Miscellaneous ops. */
2173 case IR_LOOP: asm_loop(as); break;
2174 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
2175 case IR_USE:
2176 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
2177 case IR_PHI: asm_phi(as, ir); break;
2178 case IR_HIOP: asm_hiop(as, ir); break;
2179 case IR_GCSTEP: asm_gcstep(as, ir); break;
2180
2181 /* Guarded assertions. */
2182 case IR_EQ: case IR_NE:
2183 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
2184 as->curins--;
2185 asm_href(as, ir-1, (IROp)ir->o);
2186 break;
2187 }
2188 /* fallthrough */
2189 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
2190 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
2191 case IR_ABC:
2192#if !LJ_SOFTFP
2193 if (irt_isnum(ir->t)) { asm_fpcomp(as, ir); break; }
2194#endif
2195 asm_intcomp(as, ir);
2196 break;
2197
2198 case IR_RETF: asm_retf(as, ir); break;
2199
2200 /* Bit ops. */
2201 case IR_BNOT: asm_bitop(as, ir, ARMI_MVN); break;
2202 case IR_BSWAP: asm_bitswap(as, ir); break;
2203
2204 case IR_BAND: asm_bitop(as, ir, ARMI_AND); break;
2205 case IR_BOR: asm_bitop(as, ir, ARMI_ORR); break;
2206 case IR_BXOR: asm_bitop(as, ir, ARMI_EOR); break;
2207
2208 case IR_BSHL: asm_bitshift(as, ir, ARMSH_LSL); break;
2209 case IR_BSHR: asm_bitshift(as, ir, ARMSH_LSR); break;
2210 case IR_BSAR: asm_bitshift(as, ir, ARMSH_ASR); break;
2211 case IR_BROR: asm_bitshift(as, ir, ARMSH_ROR); break;
2212 case IR_BROL: lua_assert(0); break;
2213
2214 /* Arithmetic ops. */
2215 case IR_ADD: case IR_ADDOV: asm_add(as, ir); break;
2216 case IR_SUB: case IR_SUBOV: asm_sub(as, ir); break;
2217 case IR_MUL: case IR_MULOV: asm_mul(as, ir); break;
2218 case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
2219 case IR_NEG: asm_neg(as, ir); break;
2220
2221#if LJ_SOFTFP
2222 case IR_DIV: case IR_POW: case IR_ABS:
2223 case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
2224 lua_assert(0); /* Unused for LJ_SOFTFP. */
2225 break;
2226#else
2227 case IR_DIV: asm_fparith(as, ir, ARMI_VDIV_D); break;
2228 case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
2229 case IR_ABS: asm_fpunary(as, ir, ARMI_VABS_D); break;
2230 case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
2231 case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
2232 case IR_FPMATH:
2233 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
2234 break;
2235 if (ir->op2 <= IRFPM_TRUNC)
2236 asm_callround(as, ir, ir->op2);
2237 else if (ir->op2 == IRFPM_SQRT)
2238 asm_fpunary(as, ir, ARMI_VSQRT_D);
2239 else
2240 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
2241 break;
2242 case IR_TOBIT: asm_tobit(as, ir); break;
2243#endif
2244
2245 case IR_MIN: asm_min_max(as, ir, CC_GT, CC_HI); break;
2246 case IR_MAX: asm_min_max(as, ir, CC_LT, CC_LO); break;
2247
2248 /* Memory references. */
2249 case IR_AREF: asm_aref(as, ir); break;
2250 case IR_HREF: asm_href(as, ir, 0); break;
2251 case IR_HREFK: asm_hrefk(as, ir); break;
2252 case IR_NEWREF: asm_newref(as, ir); break;
2253 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
2254 case IR_FREF: asm_fref(as, ir); break;
2255 case IR_STRREF: asm_strref(as, ir); break;
2256
2257 /* Loads and stores. */
2258 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2259 asm_ahuvload(as, ir);
2260 break;
2261 case IR_FLOAD: asm_fload(as, ir); break;
2262 case IR_XLOAD: asm_xload(as, ir); break;
2263 case IR_SLOAD: asm_sload(as, ir); break;
2264
2265 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
2266 case IR_FSTORE: asm_fstore(as, ir); break;
2267 case IR_XSTORE: asm_xstore(as, ir, 0); break;
2268
2269 /* Allocations. */
2270 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
2271 case IR_TNEW: asm_tnew(as, ir); break;
2272 case IR_TDUP: asm_tdup(as, ir); break;
2273 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2274
2275 /* Write barriers. */
2276 case IR_TBAR: asm_tbar(as, ir); break;
2277 case IR_OBAR: asm_obar(as, ir); break;
2278
2279 /* Type conversions. */
2280 case IR_CONV: asm_conv(as, ir); break;
2281 case IR_TOSTR: asm_tostr(as, ir); break;
2282 case IR_STRTO: asm_strto(as, ir); break;
2283
2284 /* Calls. */
2285 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
2286 case IR_CALLXS: asm_callx(as, ir); break;
2287 case IR_CARG: break;
2288
2289 default:
2290 setintV(&as->J->errinfo, ir->o);
2291 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
2292 break;
2293 }
2294}
2295
2296/* -- Trace setup --------------------------------------------------------- */ 2152/* -- Trace setup --------------------------------------------------------- */
2297 2153
2298/* Ensure there are enough stack slots for call arguments. */ 2154/* Ensure there are enough stack slots for call arguments. */
2299static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) 2155static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
2300{ 2156{
2301 IRRef args[CCI_NARGS_MAX*2]; 2157 IRRef args[CCI_NARGS_MAX*2];
2302 uint32_t i, nargs = (int)CCI_NARGS(ci); 2158 uint32_t i, nargs = CCI_XNARGS(ci);
2303 int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0; 2159 int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0;
2304 asm_collectargs(as, ir, ci, args); 2160 asm_collectargs(as, ir, ci, args);
2305 for (i = 0; i < nargs; i++) { 2161 for (i = 0; i < nargs; i++) {
diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
index 78bd26d5..adea0e32 100644
--- a/src/lj_asm_mips.h
+++ b/src/lj_asm_mips.h
@@ -226,7 +226,7 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref,
226/* Generate a call to a C function. */ 226/* Generate a call to a C function. */
227static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 227static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
228{ 228{
229 uint32_t n, nargs = CCI_NARGS(ci); 229 uint32_t n, nargs = CCI_XNARGS(ci);
230 int32_t ofs = 16; 230 int32_t ofs = 16;
231 Reg gpr, fpr = REGARG_FIRSTFPR; 231 Reg gpr, fpr = REGARG_FIRSTFPR;
232 if ((void *)ci->func) 232 if ((void *)ci->func)
@@ -326,15 +326,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
326 } 326 }
327} 327}
328 328
329static void asm_call(ASMState *as, IRIns *ir)
330{
331 IRRef args[CCI_NARGS_MAX];
332 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
333 asm_collectargs(as, ir, ci, args);
334 asm_setupresult(as, ir, ci);
335 asm_gencall(as, ci, args);
336}
337
338static void asm_callx(ASMState *as, IRIns *ir) 329static void asm_callx(ASMState *as, IRIns *ir)
339{ 330{
340 IRRef args[CCI_NARGS_MAX*2]; 331 IRRef args[CCI_NARGS_MAX*2];
@@ -362,16 +353,6 @@ static void asm_callx(ASMState *as, IRIns *ir)
362 asm_gencall(as, &ci, args); 353 asm_gencall(as, &ci, args);
363} 354}
364 355
365static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
366{
367 const CCallInfo *ci = &lj_ir_callinfo[id];
368 IRRef args[2];
369 args[0] = ir->op1;
370 args[1] = ir->op2;
371 asm_setupresult(as, ir, ci);
372 asm_gencall(as, ci, args);
373}
374
375static void asm_callround(ASMState *as, IRIns *ir, IRCallID id) 356static void asm_callround(ASMState *as, IRIns *ir, IRCallID id)
376{ 357{
377 /* The modified regs must match with the *.dasc implementation. */ 358 /* The modified regs must match with the *.dasc implementation. */
@@ -391,7 +372,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
391{ 372{
392 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); 373 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
393 void *pc = ir_kptr(IR(ir->op2)); 374 void *pc = ir_kptr(IR(ir->op2));
394 int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); 375 int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
395 as->topslot -= (BCReg)delta; 376 as->topslot -= (BCReg)delta;
396 if ((int32_t)as->topslot < 0) as->topslot = 0; 377 if ((int32_t)as->topslot < 0) as->topslot = 0;
397 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ 378 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
@@ -520,28 +501,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
520 } 501 }
521} 502}
522 503
523#if LJ_HASFFI
524static void asm_conv64(ASMState *as, IRIns *ir)
525{
526 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
527 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
528 IRCallID id;
529 const CCallInfo *ci;
530 IRRef args[2];
531 args[LJ_BE?0:1] = ir->op1;
532 args[LJ_BE?1:0] = (ir-1)->op1;
533 if (st == IRT_NUM || st == IRT_FLOAT) {
534 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
535 ir--;
536 } else {
537 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
538 }
539 ci = &lj_ir_callinfo[id];
540 asm_setupresult(as, ir, ci);
541 asm_gencall(as, ci, args);
542}
543#endif
544
545static void asm_strto(ASMState *as, IRIns *ir) 504static void asm_strto(ASMState *as, IRIns *ir)
546{ 505{
547 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; 506 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
@@ -558,6 +517,8 @@ static void asm_strto(ASMState *as, IRIns *ir)
558 RID_SP, sps_scale(ir->s)); 517 RID_SP, sps_scale(ir->s));
559} 518}
560 519
520/* -- Memory references --------------------------------------------------- */
521
561/* Get pointer to TValue. */ 522/* Get pointer to TValue. */
562static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) 523static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
563{ 524{
@@ -581,27 +542,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
581 } 542 }
582} 543}
583 544
584static void asm_tostr(ASMState *as, IRIns *ir)
585{
586 IRRef args[2];
587 args[0] = ASMREF_L;
588 as->gcsteps++;
589 if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
590 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
591 args[1] = ASMREF_TMP1; /* const lua_Number * */
592 asm_setupresult(as, ir, ci); /* GCstr * */
593 asm_gencall(as, ci, args);
594 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
595 } else {
596 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
597 args[1] = ir->op1; /* int32_t k */
598 asm_setupresult(as, ir, ci); /* GCstr * */
599 asm_gencall(as, ci, args);
600 }
601}
602
603/* -- Memory references --------------------------------------------------- */
604
605static void asm_aref(ASMState *as, IRIns *ir) 545static void asm_aref(ASMState *as, IRIns *ir)
606{ 546{
607 Reg dest = ra_dest(as, ir, RSET_GPR); 547 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -631,7 +571,7 @@ static void asm_aref(ASMState *as, IRIns *ir)
631** } while ((n = nextnode(n))); 571** } while ((n = nextnode(n)));
632** return niltv(L); 572** return niltv(L);
633*/ 573*/
634static void asm_href(ASMState *as, IRIns *ir) 574static void asm_href(ASMState *as, IRIns *ir, IROp merge)
635{ 575{
636 RegSet allow = RSET_GPR; 576 RegSet allow = RSET_GPR;
637 int destused = ra_used(ir); 577 int destused = ra_used(ir);
@@ -657,37 +597,42 @@ static void asm_href(ASMState *as, IRIns *ir)
657 tmp2 = ra_scratch(as, allow); 597 tmp2 = ra_scratch(as, allow);
658 rset_clear(allow, tmp2); 598 rset_clear(allow, tmp2);
659 599
660 /* Key not found in chain: load niltv. */ 600 /* Key not found in chain: jump to exit (if merged) or load niltv. */
661 l_end = emit_label(as); 601 l_end = emit_label(as);
662 if (destused) 602 as->invmcp = NULL;
603 if (merge == IR_NE)
604 asm_guard(as, MIPSI_B, RID_ZERO, RID_ZERO);
605 else if (destused)
663 emit_loada(as, dest, niltvg(J2G(as->J))); 606 emit_loada(as, dest, niltvg(J2G(as->J)));
664 else
665 *--as->mcp = MIPSI_NOP;
666 /* Follow hash chain until the end. */ 607 /* Follow hash chain until the end. */
667 emit_move(as, dest, tmp1); 608 emit_move(as, dest, tmp2);
668 l_loop = --as->mcp; 609 l_loop = --as->mcp;
669 emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, next)); 610 emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, next));
670 l_next = emit_label(as); 611 l_next = emit_label(as);
671 612
672 /* Type and value comparison. */ 613 /* Type and value comparison. */
614 if (merge == IR_EQ) { /* Must match asm_guard(). */
615 emit_ti(as, MIPSI_LI, RID_TMP, as->snapno);
616 l_end = asm_exitstub_addr(as);
617 }
673 if (irt_isnum(kt)) { 618 if (irt_isnum(kt)) {
674 emit_branch(as, MIPSI_BC1T, 0, 0, l_end); 619 emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
675 emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key); 620 emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key);
676 emit_tg(as, MIPSI_MFC1, tmp1, key+1); 621 *--as->mcp = MIPSI_NOP; /* Avoid NaN comparison overhead. */
677 emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next); 622 emit_branch(as, MIPSI_BEQ, tmp2, RID_ZERO, l_next);
678 emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM); 623 emit_tsi(as, MIPSI_SLTIU, tmp2, tmp2, (int32_t)LJ_TISNUM);
679 emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n)); 624 emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n));
680 } else { 625 } else {
681 if (irt_ispri(kt)) { 626 if (irt_ispri(kt)) {
682 emit_branch(as, MIPSI_BEQ, tmp1, type, l_end); 627 emit_branch(as, MIPSI_BEQ, tmp2, type, l_end);
683 } else { 628 } else {
684 emit_branch(as, MIPSI_BEQ, tmp2, key, l_end); 629 emit_branch(as, MIPSI_BEQ, tmp1, key, l_end);
685 emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); 630 emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.gcr));
686 emit_branch(as, MIPSI_BNE, tmp1, type, l_next); 631 emit_branch(as, MIPSI_BNE, tmp2, type, l_next);
687 } 632 }
688 } 633 }
689 emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.it)); 634 emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, key.it));
690 *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu); 635 *l_loop = MIPSI_BNE | MIPSF_S(tmp2) | ((as->mcp-l_loop-1) & 0xffffu);
691 636
692 /* Load main position relative to tab->node into dest. */ 637 /* Load main position relative to tab->node into dest. */
693 khash = irref_isk(refkey) ? ir_khash(irkey) : 1; 638 khash = irref_isk(refkey) ? ir_khash(irkey) : 1;
@@ -777,20 +722,6 @@ nolo:
777 emit_tsi(as, MIPSI_ADDU, dest, node, ra_allock(as, ofs, allow)); 722 emit_tsi(as, MIPSI_ADDU, dest, node, ra_allock(as, ofs, allow));
778} 723}
779 724
780static void asm_newref(ASMState *as, IRIns *ir)
781{
782 if (ir->r != RID_SINK) {
783 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
784 IRRef args[3];
785 args[0] = ASMREF_L; /* lua_State *L */
786 args[1] = ir->op1; /* GCtab *t */
787 args[2] = ASMREF_TMP1; /* cTValue *key */
788 asm_setupresult(as, ir, ci); /* TValue * */
789 asm_gencall(as, ci, args);
790 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
791 }
792}
793
794static void asm_uref(ASMState *as, IRIns *ir) 725static void asm_uref(ASMState *as, IRIns *ir)
795{ 726{
796 /* NYI: Check that UREFO is still open and not aliasing a slot. */ 727 /* NYI: Check that UREFO is still open and not aliasing a slot. */
@@ -919,7 +850,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
919 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); 850 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
920} 851}
921 852
922static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) 853static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
923{ 854{
924 if (ir->r != RID_SINK) { 855 if (ir->r != RID_SINK) {
925 Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); 856 Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
@@ -928,6 +859,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
928 } 859 }
929} 860}
930 861
862#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
863
931static void asm_ahuvload(ASMState *as, IRIns *ir) 864static void asm_ahuvload(ASMState *as, IRIns *ir)
932{ 865{
933 IRType1 t = ir->t; 866 IRType1 t = ir->t;
@@ -1003,7 +936,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
1003 if (irt_isint(t)) { 936 if (irt_isint(t)) {
1004 Reg tmp = ra_scratch(as, RSET_FPR); 937 Reg tmp = ra_scratch(as, RSET_FPR);
1005 emit_tg(as, MIPSI_MFC1, dest, tmp); 938 emit_tg(as, MIPSI_MFC1, dest, tmp);
1006 emit_fg(as, MIPSI_CVT_W_D, tmp, tmp); 939 emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp);
1007 dest = tmp; 940 dest = tmp;
1008 t.irt = IRT_NUM; /* Check for original type. */ 941 t.irt = IRT_NUM; /* Check for original type. */
1009 } else { 942 } else {
@@ -1043,19 +976,15 @@ dotypecheck:
1043static void asm_cnew(ASMState *as, IRIns *ir) 976static void asm_cnew(ASMState *as, IRIns *ir)
1044{ 977{
1045 CTState *cts = ctype_ctsG(J2G(as->J)); 978 CTState *cts = ctype_ctsG(J2G(as->J));
1046 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 979 CTypeID id = (CTypeID)IR(ir->op1)->i;
1047 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 980 CTSize sz;
1048 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 981 CTInfo info = lj_ctype_info(cts, id, &sz);
1049 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 982 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1050 IRRef args[2]; 983 IRRef args[4];
1051 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1052 RegSet drop = RSET_SCRATCH; 984 RegSet drop = RSET_SCRATCH;
1053 lua_assert(sz != CTSIZE_INVALID); 985 lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
1054 986
1055 args[0] = ASMREF_L; /* lua_State *L */
1056 args[1] = ASMREF_TMP1; /* MSize size */
1057 as->gcsteps++; 987 as->gcsteps++;
1058
1059 if (ra_hasreg(ir->r)) 988 if (ra_hasreg(ir->r))
1060 rset_clear(drop, ir->r); /* Dest reg handled below. */ 989 rset_clear(drop, ir->r); /* Dest reg handled below. */
1061 ra_evictset(as, drop); 990 ra_evictset(as, drop);
@@ -1064,6 +993,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1064 993
1065 /* Initialize immutable cdata object. */ 994 /* Initialize immutable cdata object. */
1066 if (ir->o == IR_CNEWI) { 995 if (ir->o == IR_CNEWI) {
996 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1067 int32_t ofs = sizeof(GCcdata); 997 int32_t ofs = sizeof(GCcdata);
1068 lua_assert(sz == 4 || sz == 8); 998 lua_assert(sz == 4 || sz == 8);
1069 if (sz == 8) { 999 if (sz == 8) {
@@ -1078,12 +1008,24 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1078 if (ofs == sizeof(GCcdata)) break; 1008 if (ofs == sizeof(GCcdata)) break;
1079 ofs -= 4; if (LJ_BE) ir++; else ir--; 1009 ofs -= 4; if (LJ_BE) ir++; else ir--;
1080 } 1010 }
1011 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1012 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1013 args[0] = ASMREF_L; /* lua_State *L */
1014 args[1] = ir->op1; /* CTypeID id */
1015 args[2] = ir->op2; /* CTSize sz */
1016 args[3] = ASMREF_TMP1; /* CTSize align */
1017 asm_gencall(as, ci, args);
1018 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1019 return;
1081 } 1020 }
1021
1082 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ 1022 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1083 emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); 1023 emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
1084 emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); 1024 emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
1085 emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA); 1025 emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA);
1086 emit_ti(as, MIPSI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */ 1026 emit_ti(as, MIPSI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */
1027 args[0] = ASMREF_L; /* lua_State *L */
1028 args[1] = ASMREF_TMP1; /* MSize size */
1087 asm_gencall(as, ci, args); 1029 asm_gencall(as, ci, args);
1088 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), 1030 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1089 ra_releasetmp(as, ASMREF_TMP1)); 1031 ra_releasetmp(as, ASMREF_TMP1));
@@ -1153,23 +1095,16 @@ static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi)
1153 emit_fg(as, mi, dest, left); 1095 emit_fg(as, mi, dest, left);
1154} 1096}
1155 1097
1156static int asm_fpjoin_pow(ASMState *as, IRIns *ir) 1098static void asm_fpmath(ASMState *as, IRIns *ir)
1157{ 1099{
1158 IRIns *irp = IR(ir->op1); 1100 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
1159 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { 1101 return;
1160 IRIns *irpp = IR(irp->op1); 1102 if (ir->op2 <= IRFPM_TRUNC)
1161 if (irpp == ir-2 && irpp->o == IR_FPMATH && 1103 asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
1162 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { 1104 else if (ir->op2 == IRFPM_SQRT)
1163 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; 1105 asm_fpunary(as, ir, MIPSI_SQRT_D);
1164 IRRef args[2]; 1106 else
1165 args[0] = irpp->op1; 1107 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1166 args[1] = irp->op2;
1167 asm_setupresult(as, ir, ci);
1168 asm_gencall(as, ci, args);
1169 return 1;
1170 }
1171 }
1172 return 0;
1173} 1108}
1174 1109
1175static void asm_add(ASMState *as, IRIns *ir) 1110static void asm_add(ASMState *as, IRIns *ir)
@@ -1215,6 +1150,10 @@ static void asm_mul(ASMState *as, IRIns *ir)
1215 } 1150 }
1216} 1151}
1217 1152
1153#define asm_div(as, ir) asm_fparith(as, ir, MIPSI_DIV_D)
1154#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi)
1155#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
1156
1218static void asm_neg(ASMState *as, IRIns *ir) 1157static void asm_neg(ASMState *as, IRIns *ir)
1219{ 1158{
1220 if (irt_isnum(ir->t)) { 1159 if (irt_isnum(ir->t)) {
@@ -1226,6 +1165,10 @@ static void asm_neg(ASMState *as, IRIns *ir)
1226 } 1165 }
1227} 1166}
1228 1167
1168#define asm_abs(as, ir) asm_fpunary(as, ir, MIPSI_ABS_D)
1169#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
1170#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
1171
1229static void asm_arithov(ASMState *as, IRIns *ir) 1172static void asm_arithov(ASMState *as, IRIns *ir)
1230{ 1173{
1231 Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR); 1174 Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR);
@@ -1259,13 +1202,21 @@ static void asm_arithov(ASMState *as, IRIns *ir)
1259 emit_move(as, RID_TMP, dest == left ? left : right); 1202 emit_move(as, RID_TMP, dest == left ? left : right);
1260} 1203}
1261 1204
1205#define asm_addov(as, ir) asm_arithov(as, ir)
1206#define asm_subov(as, ir) asm_arithov(as, ir)
1207
1262static void asm_mulov(ASMState *as, IRIns *ir) 1208static void asm_mulov(ASMState *as, IRIns *ir)
1263{ 1209{
1264#if LJ_DUALNUM 1210 Reg dest = ra_dest(as, ir, RSET_GPR);
1265#error "NYI: MULOV" 1211 Reg tmp, right, left = ra_alloc2(as, ir, RSET_GPR);
1266#else 1212 right = (left >> 8); left &= 255;
1267 UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused in single-number mode. */ 1213 tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left),
1268#endif 1214 right), dest));
1215 asm_guard(as, MIPSI_BNE, RID_TMP, tmp);
1216 emit_dta(as, MIPSI_SRA, RID_TMP, dest, 31);
1217 emit_dst(as, MIPSI_MFHI, tmp, 0, 0);
1218 emit_dst(as, MIPSI_MFLO, dest, 0, 0);
1219 emit_dst(as, MIPSI_MULT, 0, left, right);
1269} 1220}
1270 1221
1271#if LJ_HASFFI 1222#if LJ_HASFFI
@@ -1352,7 +1303,7 @@ static void asm_neg64(ASMState *as, IRIns *ir)
1352} 1303}
1353#endif 1304#endif
1354 1305
1355static void asm_bitnot(ASMState *as, IRIns *ir) 1306static void asm_bnot(ASMState *as, IRIns *ir)
1356{ 1307{
1357 Reg left, right, dest = ra_dest(as, ir, RSET_GPR); 1308 Reg left, right, dest = ra_dest(as, ir, RSET_GPR);
1358 IRIns *irl = IR(ir->op1); 1309 IRIns *irl = IR(ir->op1);
@@ -1366,7 +1317,7 @@ static void asm_bitnot(ASMState *as, IRIns *ir)
1366 emit_dst(as, MIPSI_NOR, dest, left, right); 1317 emit_dst(as, MIPSI_NOR, dest, left, right);
1367} 1318}
1368 1319
1369static void asm_bitswap(ASMState *as, IRIns *ir) 1320static void asm_bswap(ASMState *as, IRIns *ir)
1370{ 1321{
1371 Reg dest = ra_dest(as, ir, RSET_GPR); 1322 Reg dest = ra_dest(as, ir, RSET_GPR);
1372 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 1323 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
@@ -1402,6 +1353,10 @@ static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
1402 emit_dst(as, mi, dest, left, right); 1353 emit_dst(as, mi, dest, left, right);
1403} 1354}
1404 1355
1356#define asm_band(as, ir) asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI)
1357#define asm_bor(as, ir) asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI)
1358#define asm_bxor(as, ir) asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI)
1359
1405static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) 1360static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
1406{ 1361{
1407 Reg dest = ra_dest(as, ir, RSET_GPR); 1362 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1415,7 +1370,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
1415 } 1370 }
1416} 1371}
1417 1372
1418static void asm_bitror(ASMState *as, IRIns *ir) 1373#define asm_bshl(as, ir) asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL)
1374#define asm_bshr(as, ir) asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL)
1375#define asm_bsar(as, ir) asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA)
1376#define asm_brol(as, ir) lua_assert(0)
1377
1378static void asm_bror(ASMState *as, IRIns *ir)
1419{ 1379{
1420 if ((as->flags & JIT_F_MIPS32R2)) { 1380 if ((as->flags & JIT_F_MIPS32R2)) {
1421 asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR); 1381 asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR);
@@ -1464,6 +1424,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1464 } 1424 }
1465} 1425}
1466 1426
1427#define asm_min(as, ir) asm_min_max(as, ir, 0)
1428#define asm_max(as, ir) asm_min_max(as, ir, 1)
1429
1467/* -- Comparisons --------------------------------------------------------- */ 1430/* -- Comparisons --------------------------------------------------------- */
1468 1431
1469static void asm_comp(ASMState *as, IRIns *ir) 1432static void asm_comp(ASMState *as, IRIns *ir)
@@ -1501,7 +1464,7 @@ static void asm_comp(ASMState *as, IRIns *ir)
1501 } 1464 }
1502} 1465}
1503 1466
1504static void asm_compeq(ASMState *as, IRIns *ir) 1467static void asm_equal(ASMState *as, IRIns *ir)
1505{ 1468{
1506 Reg right, left = ra_alloc2(as, ir, irt_isnum(ir->t) ? RSET_FPR : RSET_GPR); 1469 Reg right, left = ra_alloc2(as, ir, irt_isnum(ir->t) ? RSET_FPR : RSET_GPR);
1507 right = (left >> 8); left &= 255; 1470 right = (left >> 8); left &= 255;
@@ -1575,8 +1538,8 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1575 } else if ((ir-1)->o == IR_XSTORE) { 1538 } else if ((ir-1)->o == IR_XSTORE) {
1576 as->curins--; /* Handle both stores here. */ 1539 as->curins--; /* Handle both stores here. */
1577 if ((ir-1)->r != RID_SINK) { 1540 if ((ir-1)->r != RID_SINK) {
1578 asm_xstore(as, ir, LJ_LE ? 4 : 0); 1541 asm_xstore_(as, ir, LJ_LE ? 4 : 0);
1579 asm_xstore(as, ir-1, LJ_LE ? 0 : 4); 1542 asm_xstore_(as, ir-1, LJ_LE ? 0 : 4);
1580 } 1543 }
1581 return; 1544 return;
1582 } 1545 }
@@ -1600,6 +1563,17 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1600#endif 1563#endif
1601} 1564}
1602 1565
1566/* -- Profiling ----------------------------------------------------------- */
1567
1568static void asm_prof(ASMState *as, IRIns *ir)
1569{
1570 UNUSED(ir);
1571 asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO);
1572 emit_tsi(as, MIPSI_ANDI, RID_TMP, RID_TMP, HOOK_PROFILE);
1573 emit_lsglptr(as, MIPSI_LBU, RID_TMP,
1574 (int32_t)offsetof(global_State, hookmask));
1575}
1576
1603/* -- Stack handling ------------------------------------------------------ */ 1577/* -- Stack handling ------------------------------------------------------ */
1604 1578
1605/* Check Lua stack size for overflow. Use exit handler as fallback. */ 1579/* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -1624,7 +1598,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1624 emit_tsi(as, MIPSI_LW, tmp, tmp, offsetof(lua_State, maxstack)); 1598 emit_tsi(as, MIPSI_LW, tmp, tmp, offsetof(lua_State, maxstack));
1625 if (pbase == RID_TMP) 1599 if (pbase == RID_TMP)
1626 emit_getgl(as, RID_TMP, jit_base); 1600 emit_getgl(as, RID_TMP, jit_base);
1627 emit_getgl(as, tmp, jit_L); 1601 emit_getgl(as, tmp, cur_L);
1628 if (allow == RSET_EMPTY) /* Spill temp. register. */ 1602 if (allow == RSET_EMPTY) /* Spill temp. register. */
1629 emit_tsi(as, MIPSI_SW, tmp, RID_SP, 0); 1603 emit_tsi(as, MIPSI_SW, tmp, RID_SP, 0);
1630} 1604}
@@ -1772,131 +1746,13 @@ static void asm_tail_prep(ASMState *as)
1772 as->invmcp = as->loopref ? as->mcp : NULL; 1746 as->invmcp = as->loopref ? as->mcp : NULL;
1773} 1747}
1774 1748
1775/* -- Instruction dispatch ------------------------------------------------ */
1776
1777/* Assemble a single instruction. */
1778static void asm_ir(ASMState *as, IRIns *ir)
1779{
1780 switch ((IROp)ir->o) {
1781 /* Miscellaneous ops. */
1782 case IR_LOOP: asm_loop(as); break;
1783 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
1784 case IR_USE:
1785 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1786 case IR_PHI: asm_phi(as, ir); break;
1787 case IR_HIOP: asm_hiop(as, ir); break;
1788 case IR_GCSTEP: asm_gcstep(as, ir); break;
1789
1790 /* Guarded assertions. */
1791 case IR_EQ: case IR_NE: asm_compeq(as, ir); break;
1792 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1793 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1794 case IR_ABC:
1795 asm_comp(as, ir);
1796 break;
1797
1798 case IR_RETF: asm_retf(as, ir); break;
1799
1800 /* Bit ops. */
1801 case IR_BNOT: asm_bitnot(as, ir); break;
1802 case IR_BSWAP: asm_bitswap(as, ir); break;
1803
1804 case IR_BAND: asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI); break;
1805 case IR_BOR: asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI); break;
1806 case IR_BXOR: asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI); break;
1807
1808 case IR_BSHL: asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL); break;
1809 case IR_BSHR: asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL); break;
1810 case IR_BSAR: asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA); break;
1811 case IR_BROL: lua_assert(0); break;
1812 case IR_BROR: asm_bitror(as, ir); break;
1813
1814 /* Arithmetic ops. */
1815 case IR_ADD: asm_add(as, ir); break;
1816 case IR_SUB: asm_sub(as, ir); break;
1817 case IR_MUL: asm_mul(as, ir); break;
1818 case IR_DIV: asm_fparith(as, ir, MIPSI_DIV_D); break;
1819 case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
1820 case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
1821 case IR_NEG: asm_neg(as, ir); break;
1822
1823 case IR_ABS: asm_fpunary(as, ir, MIPSI_ABS_D); break;
1824 case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
1825 case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
1826 case IR_MIN: asm_min_max(as, ir, 0); break;
1827 case IR_MAX: asm_min_max(as, ir, 1); break;
1828 case IR_FPMATH:
1829 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
1830 break;
1831 if (ir->op2 <= IRFPM_TRUNC)
1832 asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
1833 else if (ir->op2 == IRFPM_SQRT)
1834 asm_fpunary(as, ir, MIPSI_SQRT_D);
1835 else
1836 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1837 break;
1838
1839 /* Overflow-checking arithmetic ops. */
1840 case IR_ADDOV: asm_arithov(as, ir); break;
1841 case IR_SUBOV: asm_arithov(as, ir); break;
1842 case IR_MULOV: asm_mulov(as, ir); break;
1843
1844 /* Memory references. */
1845 case IR_AREF: asm_aref(as, ir); break;
1846 case IR_HREF: asm_href(as, ir); break;
1847 case IR_HREFK: asm_hrefk(as, ir); break;
1848 case IR_NEWREF: asm_newref(as, ir); break;
1849 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
1850 case IR_FREF: asm_fref(as, ir); break;
1851 case IR_STRREF: asm_strref(as, ir); break;
1852
1853 /* Loads and stores. */
1854 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
1855 asm_ahuvload(as, ir);
1856 break;
1857 case IR_FLOAD: asm_fload(as, ir); break;
1858 case IR_XLOAD: asm_xload(as, ir); break;
1859 case IR_SLOAD: asm_sload(as, ir); break;
1860
1861 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
1862 case IR_FSTORE: asm_fstore(as, ir); break;
1863 case IR_XSTORE: asm_xstore(as, ir, 0); break;
1864
1865 /* Allocations. */
1866 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
1867 case IR_TNEW: asm_tnew(as, ir); break;
1868 case IR_TDUP: asm_tdup(as, ir); break;
1869 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
1870
1871 /* Write barriers. */
1872 case IR_TBAR: asm_tbar(as, ir); break;
1873 case IR_OBAR: asm_obar(as, ir); break;
1874
1875 /* Type conversions. */
1876 case IR_CONV: asm_conv(as, ir); break;
1877 case IR_TOBIT: asm_tobit(as, ir); break;
1878 case IR_TOSTR: asm_tostr(as, ir); break;
1879 case IR_STRTO: asm_strto(as, ir); break;
1880
1881 /* Calls. */
1882 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
1883 case IR_CALLXS: asm_callx(as, ir); break;
1884 case IR_CARG: break;
1885
1886 default:
1887 setintV(&as->J->errinfo, ir->o);
1888 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
1889 break;
1890 }
1891}
1892
1893/* -- Trace setup --------------------------------------------------------- */ 1749/* -- Trace setup --------------------------------------------------------- */
1894 1750
1895/* Ensure there are enough stack slots for call arguments. */ 1751/* Ensure there are enough stack slots for call arguments. */
1896static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) 1752static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
1897{ 1753{
1898 IRRef args[CCI_NARGS_MAX*2]; 1754 IRRef args[CCI_NARGS_MAX*2];
1899 uint32_t i, nargs = (int)CCI_NARGS(ci); 1755 uint32_t i, nargs = CCI_XNARGS(ci);
1900 int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; 1756 int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
1901 asm_collectargs(as, ir, ci, args); 1757 asm_collectargs(as, ir, ci, args);
1902 for (i = 0; i < nargs; i++) { 1758 for (i = 0; i < nargs; i++) {
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h
index 2c5d74a8..7deeb66e 100644
--- a/src/lj_asm_ppc.h
+++ b/src/lj_asm_ppc.h
@@ -251,7 +251,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
251/* Generate a call to a C function. */ 251/* Generate a call to a C function. */
252static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 252static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
253{ 253{
254 uint32_t n, nargs = CCI_NARGS(ci); 254 uint32_t n, nargs = CCI_XNARGS(ci);
255 int32_t ofs = 8; 255 int32_t ofs = 8;
256 Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR; 256 Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR;
257 if ((void *)ci->func) 257 if ((void *)ci->func)
@@ -323,23 +323,16 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
323 } else { 323 } else {
324 ra_destreg(as, ir, RID_FPRET); 324 ra_destreg(as, ir, RID_FPRET);
325 } 325 }
326#if LJ_32
326 } else if (hiop) { 327 } else if (hiop) {
327 ra_destpair(as, ir); 328 ra_destpair(as, ir);
329#endif
328 } else { 330 } else {
329 ra_destreg(as, ir, RID_RET); 331 ra_destreg(as, ir, RID_RET);
330 } 332 }
331 } 333 }
332} 334}
333 335
334static void asm_call(ASMState *as, IRIns *ir)
335{
336 IRRef args[CCI_NARGS_MAX];
337 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
338 asm_collectargs(as, ir, ci, args);
339 asm_setupresult(as, ir, ci);
340 asm_gencall(as, ci, args);
341}
342
343static void asm_callx(ASMState *as, IRIns *ir) 336static void asm_callx(ASMState *as, IRIns *ir)
344{ 337{
345 IRRef args[CCI_NARGS_MAX*2]; 338 IRRef args[CCI_NARGS_MAX*2];
@@ -352,7 +345,7 @@ static void asm_callx(ASMState *as, IRIns *ir)
352 func = ir->op2; irf = IR(func); 345 func = ir->op2; irf = IR(func);
353 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } 346 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
354 if (irref_isk(func)) { /* Call to constant address. */ 347 if (irref_isk(func)) { /* Call to constant address. */
355 ci.func = (ASMFunction)(void *)(irf->i); 348 ci.func = (ASMFunction)(void *)(intptr_t)(irf->i);
356 } else { /* Need a non-argument register for indirect calls. */ 349 } else { /* Need a non-argument register for indirect calls. */
357 RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1); 350 RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1);
358 Reg freg = ra_alloc1(as, func, allow); 351 Reg freg = ra_alloc1(as, func, allow);
@@ -363,16 +356,6 @@ static void asm_callx(ASMState *as, IRIns *ir)
363 asm_gencall(as, &ci, args); 356 asm_gencall(as, &ci, args);
364} 357}
365 358
366static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
367{
368 const CCallInfo *ci = &lj_ir_callinfo[id];
369 IRRef args[2];
370 args[0] = ir->op1;
371 args[1] = ir->op2;
372 asm_setupresult(as, ir, ci);
373 asm_gencall(as, ci, args);
374}
375
376/* -- Returns ------------------------------------------------------------- */ 359/* -- Returns ------------------------------------------------------------- */
377 360
378/* Return to lower frame. Guard that it goes to the right spot. */ 361/* Return to lower frame. Guard that it goes to the right spot. */
@@ -380,7 +363,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
380{ 363{
381 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); 364 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
382 void *pc = ir_kptr(IR(ir->op2)); 365 void *pc = ir_kptr(IR(ir->op2));
383 int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); 366 int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
384 as->topslot -= (BCReg)delta; 367 as->topslot -= (BCReg)delta;
385 if ((int32_t)as->topslot < 0) as->topslot = 0; 368 if ((int32_t)as->topslot < 0) as->topslot = 0;
386 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ 369 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
@@ -513,28 +496,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
513 } 496 }
514} 497}
515 498
516#if LJ_HASFFI
517static void asm_conv64(ASMState *as, IRIns *ir)
518{
519 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
520 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
521 IRCallID id;
522 const CCallInfo *ci;
523 IRRef args[2];
524 args[0] = ir->op1;
525 args[1] = (ir-1)->op1;
526 if (st == IRT_NUM || st == IRT_FLOAT) {
527 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
528 ir--;
529 } else {
530 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
531 }
532 ci = &lj_ir_callinfo[id];
533 asm_setupresult(as, ir, ci);
534 asm_gencall(as, ci, args);
535}
536#endif
537
538static void asm_strto(ASMState *as, IRIns *ir) 499static void asm_strto(ASMState *as, IRIns *ir)
539{ 500{
540 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; 501 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
@@ -553,6 +514,8 @@ static void asm_strto(ASMState *as, IRIns *ir)
553 emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs); 514 emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs);
554} 515}
555 516
517/* -- Memory references --------------------------------------------------- */
518
556/* Get pointer to TValue. */ 519/* Get pointer to TValue. */
557static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) 520static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
558{ 521{
@@ -566,7 +529,7 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
566 /* Otherwise use g->tmptv to hold the TValue. */ 529 /* Otherwise use g->tmptv to hold the TValue. */
567 RegSet allow = rset_exclude(RSET_GPR, dest); 530 RegSet allow = rset_exclude(RSET_GPR, dest);
568 Reg type; 531 Reg type;
569 emit_tai(as, PPCI_ADDI, dest, RID_JGL, offsetof(global_State, tmptv)-32768); 532 emit_tai(as, PPCI_ADDI, dest, RID_JGL, (int32_t)offsetof(global_State, tmptv)-32768);
570 if (!irt_ispri(ir->t)) { 533 if (!irt_ispri(ir->t)) {
571 Reg src = ra_alloc1(as, ref, allow); 534 Reg src = ra_alloc1(as, ref, allow);
572 emit_setgl(as, src, tmptv.gcr); 535 emit_setgl(as, src, tmptv.gcr);
@@ -576,27 +539,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
576 } 539 }
577} 540}
578 541
579static void asm_tostr(ASMState *as, IRIns *ir)
580{
581 IRRef args[2];
582 args[0] = ASMREF_L;
583 as->gcsteps++;
584 if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
585 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
586 args[1] = ASMREF_TMP1; /* const lua_Number * */
587 asm_setupresult(as, ir, ci); /* GCstr * */
588 asm_gencall(as, ci, args);
589 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
590 } else {
591 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
592 args[1] = ir->op1; /* int32_t k */
593 asm_setupresult(as, ir, ci); /* GCstr * */
594 asm_gencall(as, ci, args);
595 }
596}
597
598/* -- Memory references --------------------------------------------------- */
599
600static void asm_aref(ASMState *as, IRIns *ir) 542static void asm_aref(ASMState *as, IRIns *ir)
601{ 543{
602 Reg dest = ra_dest(as, ir, RSET_GPR); 544 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -773,20 +715,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
773 } 715 }
774} 716}
775 717
776static void asm_newref(ASMState *as, IRIns *ir)
777{
778 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
779 IRRef args[3];
780 if (ir->r == RID_SINK)
781 return;
782 args[0] = ASMREF_L; /* lua_State *L */
783 args[1] = ir->op1; /* GCtab *t */
784 args[2] = ASMREF_TMP1; /* cTValue *key */
785 asm_setupresult(as, ir, ci); /* TValue * */
786 asm_gencall(as, ci, args);
787 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
788}
789
790static void asm_uref(ASMState *as, IRIns *ir) 718static void asm_uref(ASMState *as, IRIns *ir)
791{ 719{
792 /* NYI: Check that UREFO is still open and not aliasing a slot. */ 720 /* NYI: Check that UREFO is still open and not aliasing a slot. */
@@ -917,7 +845,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
917 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); 845 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
918} 846}
919 847
920static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) 848static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
921{ 849{
922 IRIns *irb; 850 IRIns *irb;
923 if (ir->r == RID_SINK) 851 if (ir->r == RID_SINK)
@@ -934,6 +862,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
934 } 862 }
935} 863}
936 864
865#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
866
937static void asm_ahuvload(ASMState *as, IRIns *ir) 867static void asm_ahuvload(ASMState *as, IRIns *ir)
938{ 868{
939 IRType1 t = ir->t; 869 IRType1 t = ir->t;
@@ -1084,19 +1014,15 @@ dotypecheck:
1084static void asm_cnew(ASMState *as, IRIns *ir) 1014static void asm_cnew(ASMState *as, IRIns *ir)
1085{ 1015{
1086 CTState *cts = ctype_ctsG(J2G(as->J)); 1016 CTState *cts = ctype_ctsG(J2G(as->J));
1087 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 1017 CTypeID id = (CTypeID)IR(ir->op1)->i;
1088 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 1018 CTSize sz;
1089 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 1019 CTInfo info = lj_ctype_info(cts, id, &sz);
1090 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1020 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1091 IRRef args[2]; 1021 IRRef args[4];
1092 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1093 RegSet drop = RSET_SCRATCH; 1022 RegSet drop = RSET_SCRATCH;
1094 lua_assert(sz != CTSIZE_INVALID); 1023 lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
1095 1024
1096 args[0] = ASMREF_L; /* lua_State *L */
1097 args[1] = ASMREF_TMP1; /* MSize size */
1098 as->gcsteps++; 1025 as->gcsteps++;
1099
1100 if (ra_hasreg(ir->r)) 1026 if (ra_hasreg(ir->r))
1101 rset_clear(drop, ir->r); /* Dest reg handled below. */ 1027 rset_clear(drop, ir->r); /* Dest reg handled below. */
1102 ra_evictset(as, drop); 1028 ra_evictset(as, drop);
@@ -1105,6 +1031,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1105 1031
1106 /* Initialize immutable cdata object. */ 1032 /* Initialize immutable cdata object. */
1107 if (ir->o == IR_CNEWI) { 1033 if (ir->o == IR_CNEWI) {
1034 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1108 int32_t ofs = sizeof(GCcdata); 1035 int32_t ofs = sizeof(GCcdata);
1109 lua_assert(sz == 4 || sz == 8); 1036 lua_assert(sz == 4 || sz == 8);
1110 if (sz == 8) { 1037 if (sz == 8) {
@@ -1118,12 +1045,24 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1118 if (ofs == sizeof(GCcdata)) break; 1045 if (ofs == sizeof(GCcdata)) break;
1119 ofs -= 4; ir++; 1046 ofs -= 4; ir++;
1120 } 1047 }
1048 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1049 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1050 args[0] = ASMREF_L; /* lua_State *L */
1051 args[1] = ir->op1; /* CTypeID id */
1052 args[2] = ir->op2; /* CTSize sz */
1053 args[3] = ASMREF_TMP1; /* CTSize align */
1054 asm_gencall(as, ci, args);
1055 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1056 return;
1121 } 1057 }
1058
1122 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ 1059 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1123 emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); 1060 emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
1124 emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); 1061 emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
1125 emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA); 1062 emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA);
1126 emit_ti(as, PPCI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */ 1063 emit_ti(as, PPCI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */
1064 args[0] = ASMREF_L; /* lua_State *L */
1065 args[1] = ASMREF_TMP1; /* MSize size */
1127 asm_gencall(as, ci, args); 1066 asm_gencall(as, ci, args);
1128 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), 1067 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1129 ra_releasetmp(as, ASMREF_TMP1)); 1068 ra_releasetmp(as, ASMREF_TMP1));
@@ -1197,23 +1136,14 @@ static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi)
1197 emit_fb(as, pi, dest, left); 1136 emit_fb(as, pi, dest, left);
1198} 1137}
1199 1138
1200static int asm_fpjoin_pow(ASMState *as, IRIns *ir) 1139static void asm_fpmath(ASMState *as, IRIns *ir)
1201{ 1140{
1202 IRIns *irp = IR(ir->op1); 1141 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
1203 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { 1142 return;
1204 IRIns *irpp = IR(irp->op1); 1143 if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
1205 if (irpp == ir-2 && irpp->o == IR_FPMATH && 1144 asm_fpunary(as, ir, PPCI_FSQRT);
1206 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { 1145 else
1207 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; 1146 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1208 IRRef args[2];
1209 args[0] = irpp->op1;
1210 args[1] = irp->op2;
1211 asm_setupresult(as, ir, ci);
1212 asm_gencall(as, ci, args);
1213 return 1;
1214 }
1215 }
1216 return 0;
1217} 1147}
1218 1148
1219static void asm_add(ASMState *as, IRIns *ir) 1149static void asm_add(ASMState *as, IRIns *ir)
@@ -1313,6 +1243,10 @@ static void asm_mul(ASMState *as, IRIns *ir)
1313 } 1243 }
1314} 1244}
1315 1245
1246#define asm_div(as, ir) asm_fparith(as, ir, PPCI_FDIV)
1247#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi)
1248#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
1249
1316static void asm_neg(ASMState *as, IRIns *ir) 1250static void asm_neg(ASMState *as, IRIns *ir)
1317{ 1251{
1318 if (irt_isnum(ir->t)) { 1252 if (irt_isnum(ir->t)) {
@@ -1331,6 +1265,10 @@ static void asm_neg(ASMState *as, IRIns *ir)
1331 } 1265 }
1332} 1266}
1333 1267
1268#define asm_abs(as, ir) asm_fpunary(as, ir, PPCI_FABS)
1269#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
1270#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
1271
1334static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) 1272static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
1335{ 1273{
1336 Reg dest, left, right; 1274 Reg dest, left, right;
@@ -1346,6 +1284,10 @@ static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
1346 emit_tab(as, pi|PPCF_DOT, dest, left, right); 1284 emit_tab(as, pi|PPCF_DOT, dest, left, right);
1347} 1285}
1348 1286
1287#define asm_addov(as, ir) asm_arithov(as, ir, PPCI_ADDO)
1288#define asm_subov(as, ir) asm_arithov(as, ir, PPCI_SUBFO)
1289#define asm_mulov(as, ir) asm_arithov(as, ir, PPCI_MULLWO)
1290
1349#if LJ_HASFFI 1291#if LJ_HASFFI
1350static void asm_add64(ASMState *as, IRIns *ir) 1292static void asm_add64(ASMState *as, IRIns *ir)
1351{ 1293{
@@ -1425,7 +1367,7 @@ static void asm_neg64(ASMState *as, IRIns *ir)
1425} 1367}
1426#endif 1368#endif
1427 1369
1428static void asm_bitnot(ASMState *as, IRIns *ir) 1370static void asm_bnot(ASMState *as, IRIns *ir)
1429{ 1371{
1430 Reg dest, left, right; 1372 Reg dest, left, right;
1431 PPCIns pi = PPCI_NOR; 1373 PPCIns pi = PPCI_NOR;
@@ -1452,7 +1394,7 @@ nofuse:
1452 emit_asb(as, pi, dest, left, right); 1394 emit_asb(as, pi, dest, left, right);
1453} 1395}
1454 1396
1455static void asm_bitswap(ASMState *as, IRIns *ir) 1397static void asm_bswap(ASMState *as, IRIns *ir)
1456{ 1398{
1457 Reg dest = ra_dest(as, ir, RSET_GPR); 1399 Reg dest = ra_dest(as, ir, RSET_GPR);
1458 IRIns *irx; 1400 IRIns *irx;
@@ -1473,32 +1415,6 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1473 } 1415 }
1474} 1416}
1475 1417
1476static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1477{
1478 Reg dest = ra_dest(as, ir, RSET_GPR);
1479 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1480 if (irref_isk(ir->op2)) {
1481 int32_t k = IR(ir->op2)->i;
1482 Reg tmp = left;
1483 if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
1484 if (!checku16(k)) {
1485 emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
1486 if ((k & 0xffff) == 0) return;
1487 }
1488 emit_asi(as, pik, dest, left, k);
1489 return;
1490 }
1491 }
1492 /* May fail due to spills/restores above, but simplifies the logic. */
1493 if (as->flagmcp == as->mcp) {
1494 as->flagmcp = NULL;
1495 as->mcp++;
1496 pi |= PPCF_DOT;
1497 }
1498 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1499 emit_asb(as, pi, dest, left, right);
1500}
1501
1502/* Fuse BAND with contiguous bitmask and a shift to rlwinm. */ 1418/* Fuse BAND with contiguous bitmask and a shift to rlwinm. */
1503static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref) 1419static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref)
1504{ 1420{
@@ -1529,7 +1445,7 @@ nofuse:
1529 *--as->mcp = pi | PPCF_T(left); 1445 *--as->mcp = pi | PPCF_T(left);
1530} 1446}
1531 1447
1532static void asm_bitand(ASMState *as, IRIns *ir) 1448static void asm_band(ASMState *as, IRIns *ir)
1533{ 1449{
1534 Reg dest, left, right; 1450 Reg dest, left, right;
1535 IRRef lref = ir->op1; 1451 IRRef lref = ir->op1;
@@ -1584,6 +1500,35 @@ static void asm_bitand(ASMState *as, IRIns *ir)
1584 emit_asb(as, PPCI_AND ^ dot, dest, left, right); 1500 emit_asb(as, PPCI_AND ^ dot, dest, left, right);
1585} 1501}
1586 1502
1503static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1504{
1505 Reg dest = ra_dest(as, ir, RSET_GPR);
1506 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1507 if (irref_isk(ir->op2)) {
1508 int32_t k = IR(ir->op2)->i;
1509 Reg tmp = left;
1510 if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
1511 if (!checku16(k)) {
1512 emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
1513 if ((k & 0xffff) == 0) return;
1514 }
1515 emit_asi(as, pik, dest, left, k);
1516 return;
1517 }
1518 }
1519 /* May fail due to spills/restores above, but simplifies the logic. */
1520 if (as->flagmcp == as->mcp) {
1521 as->flagmcp = NULL;
1522 as->mcp++;
1523 pi |= PPCF_DOT;
1524 }
1525 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1526 emit_asb(as, pi, dest, left, right);
1527}
1528
1529#define asm_bor(as, ir) asm_bitop(as, ir, PPCI_OR, PPCI_ORI)
1530#define asm_bxor(as, ir) asm_bitop(as, ir, PPCI_XOR, PPCI_XORI)
1531
1587static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) 1532static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1588{ 1533{
1589 Reg dest, left; 1534 Reg dest, left;
@@ -1609,6 +1554,14 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1609 } 1554 }
1610} 1555}
1611 1556
1557#define asm_bshl(as, ir) asm_bitshift(as, ir, PPCI_SLW, 0)
1558#define asm_bshr(as, ir) asm_bitshift(as, ir, PPCI_SRW, 1)
1559#define asm_bsar(as, ir) asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI)
1560#define asm_brol(as, ir) \
1561 asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), \
1562 PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31))
1563#define asm_bror(as, ir) lua_assert(0)
1564
1612static void asm_min_max(ASMState *as, IRIns *ir, int ismax) 1565static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1613{ 1566{
1614 if (irt_isnum(ir->t)) { 1567 if (irt_isnum(ir->t)) {
@@ -1639,6 +1592,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1639 } 1592 }
1640} 1593}
1641 1594
1595#define asm_min(as, ir) asm_min_max(as, ir, 0)
1596#define asm_max(as, ir) asm_min_max(as, ir, 1)
1597
1642/* -- Comparisons --------------------------------------------------------- */ 1598/* -- Comparisons --------------------------------------------------------- */
1643 1599
1644#define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */ 1600#define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */
@@ -1715,6 +1671,8 @@ static void asm_comp(ASMState *as, IRIns *ir)
1715 } 1671 }
1716} 1672}
1717 1673
1674#define asm_equal(as, ir) asm_comp(as, ir)
1675
1718#if LJ_HASFFI 1676#if LJ_HASFFI
1719/* 64 bit integer comparisons. */ 1677/* 64 bit integer comparisons. */
1720static void asm_comp64(ASMState *as, IRIns *ir) 1678static void asm_comp64(ASMState *as, IRIns *ir)
@@ -1760,8 +1718,8 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1760 } else if ((ir-1)->o == IR_XSTORE) { 1718 } else if ((ir-1)->o == IR_XSTORE) {
1761 as->curins--; /* Handle both stores here. */ 1719 as->curins--; /* Handle both stores here. */
1762 if ((ir-1)->r != RID_SINK) { 1720 if ((ir-1)->r != RID_SINK) {
1763 asm_xstore(as, ir, 0); 1721 asm_xstore_(as, ir, 0);
1764 asm_xstore(as, ir-1, 4); 1722 asm_xstore_(as, ir-1, 4);
1765 } 1723 }
1766 return; 1724 return;
1767 } 1725 }
@@ -1785,6 +1743,17 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1785#endif 1743#endif
1786} 1744}
1787 1745
1746/* -- Profiling ----------------------------------------------------------- */
1747
1748static void asm_prof(ASMState *as, IRIns *ir)
1749{
1750 UNUSED(ir);
1751 asm_guardcc(as, CC_NE);
1752 emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, HOOK_PROFILE);
1753 emit_lsglptr(as, PPCI_LBZ, RID_TMP,
1754 (int32_t)offsetof(global_State, hookmask));
1755}
1756
1788/* -- Stack handling ------------------------------------------------------ */ 1757/* -- Stack handling ------------------------------------------------------ */
1789 1758
1790/* Check Lua stack size for overflow. Use exit handler as fallback. */ 1759/* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -1806,7 +1775,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1806 emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack)); 1775 emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack));
1807 if (pbase == RID_TMP) 1776 if (pbase == RID_TMP)
1808 emit_getgl(as, RID_TMP, jit_base); 1777 emit_getgl(as, RID_TMP, jit_base);
1809 emit_getgl(as, tmp, jit_L); 1778 emit_getgl(as, tmp, cur_L);
1810 if (allow == RSET_EMPTY) /* Spill temp. register. */ 1779 if (allow == RSET_EMPTY) /* Spill temp. register. */
1811 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW); 1780 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW);
1812} 1781}
@@ -1967,136 +1936,13 @@ static void asm_tail_prep(ASMState *as)
1967 } 1936 }
1968} 1937}
1969 1938
1970/* -- Instruction dispatch ------------------------------------------------ */
1971
1972/* Assemble a single instruction. */
1973static void asm_ir(ASMState *as, IRIns *ir)
1974{
1975 switch ((IROp)ir->o) {
1976 /* Miscellaneous ops. */
1977 case IR_LOOP: asm_loop(as); break;
1978 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
1979 case IR_USE:
1980 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1981 case IR_PHI: asm_phi(as, ir); break;
1982 case IR_HIOP: asm_hiop(as, ir); break;
1983 case IR_GCSTEP: asm_gcstep(as, ir); break;
1984
1985 /* Guarded assertions. */
1986 case IR_EQ: case IR_NE:
1987 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
1988 as->curins--;
1989 asm_href(as, ir-1, (IROp)ir->o);
1990 break;
1991 }
1992 /* fallthrough */
1993 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1994 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1995 case IR_ABC:
1996 asm_comp(as, ir);
1997 break;
1998
1999 case IR_RETF: asm_retf(as, ir); break;
2000
2001 /* Bit ops. */
2002 case IR_BNOT: asm_bitnot(as, ir); break;
2003 case IR_BSWAP: asm_bitswap(as, ir); break;
2004
2005 case IR_BAND: asm_bitand(as, ir); break;
2006 case IR_BOR: asm_bitop(as, ir, PPCI_OR, PPCI_ORI); break;
2007 case IR_BXOR: asm_bitop(as, ir, PPCI_XOR, PPCI_XORI); break;
2008
2009 case IR_BSHL: asm_bitshift(as, ir, PPCI_SLW, 0); break;
2010 case IR_BSHR: asm_bitshift(as, ir, PPCI_SRW, 1); break;
2011 case IR_BSAR: asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI); break;
2012 case IR_BROL: asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31),
2013 PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)); break;
2014 case IR_BROR: lua_assert(0); break;
2015
2016 /* Arithmetic ops. */
2017 case IR_ADD: asm_add(as, ir); break;
2018 case IR_SUB: asm_sub(as, ir); break;
2019 case IR_MUL: asm_mul(as, ir); break;
2020 case IR_DIV: asm_fparith(as, ir, PPCI_FDIV); break;
2021 case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
2022 case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
2023 case IR_NEG: asm_neg(as, ir); break;
2024
2025 case IR_ABS: asm_fpunary(as, ir, PPCI_FABS); break;
2026 case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
2027 case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
2028 case IR_MIN: asm_min_max(as, ir, 0); break;
2029 case IR_MAX: asm_min_max(as, ir, 1); break;
2030 case IR_FPMATH:
2031 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
2032 break;
2033 if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
2034 asm_fpunary(as, ir, PPCI_FSQRT);
2035 else
2036 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
2037 break;
2038
2039 /* Overflow-checking arithmetic ops. */
2040 case IR_ADDOV: asm_arithov(as, ir, PPCI_ADDO); break;
2041 case IR_SUBOV: asm_arithov(as, ir, PPCI_SUBFO); break;
2042 case IR_MULOV: asm_arithov(as, ir, PPCI_MULLWO); break;
2043
2044 /* Memory references. */
2045 case IR_AREF: asm_aref(as, ir); break;
2046 case IR_HREF: asm_href(as, ir, 0); break;
2047 case IR_HREFK: asm_hrefk(as, ir); break;
2048 case IR_NEWREF: asm_newref(as, ir); break;
2049 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
2050 case IR_FREF: asm_fref(as, ir); break;
2051 case IR_STRREF: asm_strref(as, ir); break;
2052
2053 /* Loads and stores. */
2054 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2055 asm_ahuvload(as, ir);
2056 break;
2057 case IR_FLOAD: asm_fload(as, ir); break;
2058 case IR_XLOAD: asm_xload(as, ir); break;
2059 case IR_SLOAD: asm_sload(as, ir); break;
2060
2061 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
2062 case IR_FSTORE: asm_fstore(as, ir); break;
2063 case IR_XSTORE: asm_xstore(as, ir, 0); break;
2064
2065 /* Allocations. */
2066 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
2067 case IR_TNEW: asm_tnew(as, ir); break;
2068 case IR_TDUP: asm_tdup(as, ir); break;
2069 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2070
2071 /* Write barriers. */
2072 case IR_TBAR: asm_tbar(as, ir); break;
2073 case IR_OBAR: asm_obar(as, ir); break;
2074
2075 /* Type conversions. */
2076 case IR_CONV: asm_conv(as, ir); break;
2077 case IR_TOBIT: asm_tobit(as, ir); break;
2078 case IR_TOSTR: asm_tostr(as, ir); break;
2079 case IR_STRTO: asm_strto(as, ir); break;
2080
2081 /* Calls. */
2082 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
2083 case IR_CALLXS: asm_callx(as, ir); break;
2084 case IR_CARG: break;
2085
2086 default:
2087 setintV(&as->J->errinfo, ir->o);
2088 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
2089 break;
2090 }
2091}
2092
2093/* -- Trace setup --------------------------------------------------------- */ 1939/* -- Trace setup --------------------------------------------------------- */
2094 1940
2095/* Ensure there are enough stack slots for call arguments. */ 1941/* Ensure there are enough stack slots for call arguments. */
2096static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) 1942static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
2097{ 1943{
2098 IRRef args[CCI_NARGS_MAX*2]; 1944 IRRef args[CCI_NARGS_MAX*2];
2099 uint32_t i, nargs = (int)CCI_NARGS(ci); 1945 uint32_t i, nargs = CCI_XNARGS(ci);
2100 int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; 1946 int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
2101 asm_collectargs(as, ir, ci, args); 1947 asm_collectargs(as, ir, ci, args);
2102 for (i = 0; i < nargs; i++) 1948 for (i = 0; i < nargs; i++)
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index 0b6b2d4a..941d0919 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -392,7 +392,7 @@ static Reg asm_fuseloadm(ASMState *as, IRRef ref, RegSet allow, int is64)
392/* Count the required number of stack slots for a call. */ 392/* Count the required number of stack slots for a call. */
393static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args) 393static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
394{ 394{
395 uint32_t i, nargs = CCI_NARGS(ci); 395 uint32_t i, nargs = CCI_XNARGS(ci);
396 int nslots = 0; 396 int nslots = 0;
397#if LJ_64 397#if LJ_64
398 if (LJ_ABI_WIN) { 398 if (LJ_ABI_WIN) {
@@ -425,7 +425,7 @@ static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
425/* Generate a call to a C function. */ 425/* Generate a call to a C function. */
426static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 426static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
427{ 427{
428 uint32_t n, nargs = CCI_NARGS(ci); 428 uint32_t n, nargs = CCI_XNARGS(ci);
429 int32_t ofs = STACKARG_OFS; 429 int32_t ofs = STACKARG_OFS;
430#if LJ_64 430#if LJ_64
431 uint32_t gprs = REGARG_GPRS; 431 uint32_t gprs = REGARG_GPRS;
@@ -560,7 +560,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
560 if (ra_hasreg(dest)) { 560 if (ra_hasreg(dest)) {
561 ra_free(as, dest); 561 ra_free(as, dest);
562 ra_modified(as, dest); 562 ra_modified(as, dest);
563 emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, 563 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS,
564 dest, RID_ESP, ofs); 564 dest, RID_ESP, ofs);
565 } 565 }
566 if ((ci->flags & CCI_CASTU64)) { 566 if ((ci->flags & CCI_CASTU64)) {
@@ -584,15 +584,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
584 } 584 }
585} 585}
586 586
587static void asm_call(ASMState *as, IRIns *ir)
588{
589 IRRef args[CCI_NARGS_MAX];
590 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
591 asm_collectargs(as, ir, ci, args);
592 asm_setupresult(as, ir, ci);
593 asm_gencall(as, ci, args);
594}
595
596/* Return a constant function pointer or NULL for indirect calls. */ 587/* Return a constant function pointer or NULL for indirect calls. */
597static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func) 588static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func)
598{ 589{
@@ -652,7 +643,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
652{ 643{
653 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); 644 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
654 void *pc = ir_kptr(IR(ir->op2)); 645 void *pc = ir_kptr(IR(ir->op2));
655 int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); 646 int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
656 as->topslot -= (BCReg)delta; 647 as->topslot -= (BCReg)delta;
657 if ((int32_t)as->topslot < 0) as->topslot = 0; 648 if ((int32_t)as->topslot < 0) as->topslot = 0;
658 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ 649 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
@@ -672,8 +663,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
672 asm_guardcc(as, CC_NE); 663 asm_guardcc(as, CC_NE);
673 emit_rr(as, XO_UCOMISD, left, tmp); 664 emit_rr(as, XO_UCOMISD, left, tmp);
674 emit_rr(as, XO_CVTSI2SD, tmp, dest); 665 emit_rr(as, XO_CVTSI2SD, tmp, dest);
675 if (!(as->flags & JIT_F_SPLIT_XMM)) 666 emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
676 emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
677 emit_rr(as, XO_CVTTSD2SI, dest, left); 667 emit_rr(as, XO_CVTTSD2SI, dest, left);
678 /* Can't fuse since left is needed twice. */ 668 /* Can't fuse since left is needed twice. */
679} 669}
@@ -729,8 +719,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
729 emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS, 719 emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS,
730 dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left); 720 dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left);
731 } 721 }
732 if (!(as->flags & JIT_F_SPLIT_XMM)) 722 emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */
733 emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */
734 } else if (stfp) { /* FP to integer conversion. */ 723 } else if (stfp) { /* FP to integer conversion. */
735 if (irt_isguard(ir->t)) { 724 if (irt_isguard(ir->t)) {
736 /* Checked conversions are only supported from number to int. */ 725 /* Checked conversions are only supported from number to int. */
@@ -738,9 +727,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
738 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); 727 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
739 } else { 728 } else {
740 Reg dest = ra_dest(as, ir, RSET_GPR); 729 Reg dest = ra_dest(as, ir, RSET_GPR);
741 x86Op op = st == IRT_NUM ? 730 x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI;
742 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) :
743 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI);
744 if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) { 731 if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) {
745 /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */ 732 /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */
746 /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */ 733 /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */
@@ -834,8 +821,7 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir)
834 if (ra_hasreg(dest)) { 821 if (ra_hasreg(dest)) {
835 ra_free(as, dest); 822 ra_free(as, dest);
836 ra_modified(as, dest); 823 ra_modified(as, dest);
837 emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, 824 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, dest, RID_ESP, ofs);
838 dest, RID_ESP, ofs);
839 } 825 }
840 emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, 826 emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd,
841 irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); 827 irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs);
@@ -863,7 +849,6 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
863 Reg lo, hi; 849 Reg lo, hi;
864 lua_assert(st == IRT_NUM || st == IRT_FLOAT); 850 lua_assert(st == IRT_NUM || st == IRT_FLOAT);
865 lua_assert(dt == IRT_I64 || dt == IRT_U64); 851 lua_assert(dt == IRT_I64 || dt == IRT_U64);
866 lua_assert(((ir-1)->op2 & IRCONV_TRUNC));
867 hi = ra_dest(as, ir, RSET_GPR); 852 hi = ra_dest(as, ir, RSET_GPR);
868 lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi)); 853 lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi));
869 if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0); 854 if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0);
@@ -906,6 +891,14 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
906 st == IRT_NUM ? XOg_FLDq: XOg_FLDd, 891 st == IRT_NUM ? XOg_FLDq: XOg_FLDd,
907 asm_fuseload(as, ir->op1, RSET_EMPTY)); 892 asm_fuseload(as, ir->op1, RSET_EMPTY));
908} 893}
894
895static void asm_conv64(ASMState *as, IRIns *ir)
896{
897 if (irt_isfp(ir->t))
898 asm_conv_fp_int64(as, ir);
899 else
900 asm_conv_int64_fp(as, ir);
901}
909#endif 902#endif
910 903
911static void asm_strto(ASMState *as, IRIns *ir) 904static void asm_strto(ASMState *as, IRIns *ir)
@@ -927,29 +920,32 @@ static void asm_strto(ASMState *as, IRIns *ir)
927 RID_ESP, sps_scale(ir->s)); 920 RID_ESP, sps_scale(ir->s));
928} 921}
929 922
930static void asm_tostr(ASMState *as, IRIns *ir) 923/* -- Memory references --------------------------------------------------- */
924
925/* Get pointer to TValue. */
926static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
931{ 927{
932 IRIns *irl = IR(ir->op1); 928 IRIns *ir = IR(ref);
933 IRRef args[2]; 929 if (irt_isnum(ir->t)) {
934 args[0] = ASMREF_L; 930 /* For numbers use the constant itself or a spill slot as a TValue. */
935 as->gcsteps++; 931 if (irref_isk(ref))
936 if (irt_isnum(irl->t)) { 932 emit_loada(as, dest, ir_knum(ir));
937 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; 933 else
938 args[1] = ASMREF_TMP1; /* const lua_Number * */ 934 emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir));
939 asm_setupresult(as, ir, ci); /* GCstr * */
940 asm_gencall(as, ci, args);
941 emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64,
942 RID_ESP, ra_spill(as, irl));
943 } else { 935 } else {
944 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; 936 /* Otherwise use g->tmptv to hold the TValue. */
945 args[1] = ir->op1; /* int32_t k */ 937 if (!irref_isk(ref)) {
946 asm_setupresult(as, ir, ci); /* GCstr * */ 938 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
947 asm_gencall(as, ci, args); 939 emit_movtomro(as, REX_64IR(ir, src), dest, 0);
940 } else if (!irt_ispri(ir->t)) {
941 emit_movmroi(as, dest, 0, ir->i);
942 }
943 if (!(LJ_64 && irt_islightud(ir->t)))
944 emit_movmroi(as, dest, 4, irt_toitype(ir->t));
945 emit_loada(as, dest, &J2G(as->J)->tmptv);
948 } 946 }
949} 947}
950 948
951/* -- Memory references --------------------------------------------------- */
952
953static void asm_aref(ASMState *as, IRIns *ir) 949static void asm_aref(ASMState *as, IRIns *ir)
954{ 950{
955 Reg dest = ra_dest(as, ir, RSET_GPR); 951 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -960,23 +956,6 @@ static void asm_aref(ASMState *as, IRIns *ir)
960 emit_rr(as, XO_MOV, dest, as->mrm.base); 956 emit_rr(as, XO_MOV, dest, as->mrm.base);
961} 957}
962 958
963/* Merge NE(HREF, niltv) check. */
964static MCode *merge_href_niltv(ASMState *as, IRIns *ir)
965{
966 /* Assumes nothing else generates NE of HREF. */
967 if ((ir[1].o == IR_NE || ir[1].o == IR_EQ) && ir[1].op1 == as->curins &&
968 ra_hasreg(ir->r)) {
969 MCode *p = as->mcp;
970 p += (LJ_64 && *p != XI_ARITHi) ? 7+6 : 6+6;
971 /* Ensure no loop branch inversion happened. */
972 if (p[-6] == 0x0f && p[-5] == XI_JCCn+(CC_NE^(ir[1].o & 1))) {
973 as->mcp = p; /* Kill cmp reg, imm32 + jz exit. */
974 return p + *(int32_t *)(p-4); /* Return exit address. */
975 }
976 }
977 return NULL;
978}
979
980/* Inlined hash lookup. Specialized for key type and for const keys. 959/* Inlined hash lookup. Specialized for key type and for const keys.
981** The equivalent C code is: 960** The equivalent C code is:
982** Node *n = hashkey(t, key); 961** Node *n = hashkey(t, key);
@@ -985,10 +964,10 @@ static MCode *merge_href_niltv(ASMState *as, IRIns *ir)
985** } while ((n = nextnode(n))); 964** } while ((n = nextnode(n)));
986** return niltv(L); 965** return niltv(L);
987*/ 966*/
988static void asm_href(ASMState *as, IRIns *ir) 967static void asm_href(ASMState *as, IRIns *ir, IROp merge)
989{ 968{
990 MCode *nilexit = merge_href_niltv(as, ir); /* Do this before any restores. */
991 RegSet allow = RSET_GPR; 969 RegSet allow = RSET_GPR;
970 int destused = ra_used(ir);
992 Reg dest = ra_dest(as, ir, allow); 971 Reg dest = ra_dest(as, ir, allow);
993 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); 972 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
994 Reg key = RID_NONE, tmp = RID_NONE; 973 Reg key = RID_NONE, tmp = RID_NONE;
@@ -1005,14 +984,12 @@ static void asm_href(ASMState *as, IRIns *ir)
1005 tmp = ra_scratch(as, rset_exclude(allow, key)); 984 tmp = ra_scratch(as, rset_exclude(allow, key));
1006 } 985 }
1007 986
1008 /* Key not found in chain: jump to exit (if merged with NE) or load niltv. */ 987 /* Key not found in chain: jump to exit (if merged) or load niltv. */
1009 l_end = emit_label(as); 988 l_end = emit_label(as);
1010 if (nilexit && ir[1].o == IR_NE) { 989 if (merge == IR_NE)
1011 emit_jcc(as, CC_E, nilexit); /* XI_JMP is not found by lj_asm_patchexit. */ 990 asm_guardcc(as, CC_E); /* XI_JMP is not found by lj_asm_patchexit. */
1012 nilexit = NULL; 991 else if (destused)
1013 } else {
1014 emit_loada(as, dest, niltvg(J2G(as->J))); 992 emit_loada(as, dest, niltvg(J2G(as->J)));
1015 }
1016 993
1017 /* Follow hash chain until the end. */ 994 /* Follow hash chain until the end. */
1018 l_loop = emit_sjcc_label(as, CC_NZ); 995 l_loop = emit_sjcc_label(as, CC_NZ);
@@ -1021,8 +998,8 @@ static void asm_href(ASMState *as, IRIns *ir)
1021 l_next = emit_label(as); 998 l_next = emit_label(as);
1022 999
1023 /* Type and value comparison. */ 1000 /* Type and value comparison. */
1024 if (nilexit) 1001 if (merge == IR_EQ)
1025 emit_jcc(as, CC_E, nilexit); 1002 asm_guardcc(as, CC_E);
1026 else 1003 else
1027 emit_sjcc(as, CC_E, l_end); 1004 emit_sjcc(as, CC_E, l_end);
1028 if (irt_isnum(kt)) { 1005 if (irt_isnum(kt)) {
@@ -1178,41 +1155,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
1178#endif 1155#endif
1179} 1156}
1180 1157
1181static void asm_newref(ASMState *as, IRIns *ir)
1182{
1183 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
1184 IRRef args[3];
1185 IRIns *irkey;
1186 Reg tmp;
1187 if (ir->r == RID_SINK)
1188 return;
1189 args[0] = ASMREF_L; /* lua_State *L */
1190 args[1] = ir->op1; /* GCtab *t */
1191 args[2] = ASMREF_TMP1; /* cTValue *key */
1192 asm_setupresult(as, ir, ci); /* TValue * */
1193 asm_gencall(as, ci, args);
1194 tmp = ra_releasetmp(as, ASMREF_TMP1);
1195 irkey = IR(ir->op2);
1196 if (irt_isnum(irkey->t)) {
1197 /* For numbers use the constant itself or a spill slot as a TValue. */
1198 if (irref_isk(ir->op2))
1199 emit_loada(as, tmp, ir_knum(irkey));
1200 else
1201 emit_rmro(as, XO_LEA, tmp|REX_64, RID_ESP, ra_spill(as, irkey));
1202 } else {
1203 /* Otherwise use g->tmptv to hold the TValue. */
1204 if (!irref_isk(ir->op2)) {
1205 Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp));
1206 emit_movtomro(as, REX_64IR(irkey, src), tmp, 0);
1207 } else if (!irt_ispri(irkey->t)) {
1208 emit_movmroi(as, tmp, 0, irkey->i);
1209 }
1210 if (!(LJ_64 && irt_islightud(irkey->t)))
1211 emit_movmroi(as, tmp, 4, irt_toitype(irkey->t));
1212 emit_loada(as, tmp, &J2G(as->J)->tmptv);
1213 }
1214}
1215
1216static void asm_uref(ASMState *as, IRIns *ir) 1158static void asm_uref(ASMState *as, IRIns *ir)
1217{ 1159{
1218 /* NYI: Check that UREFO is still open and not aliasing a slot. */ 1160 /* NYI: Check that UREFO is still open and not aliasing a slot. */
@@ -1272,7 +1214,7 @@ static void asm_fxload(ASMState *as, IRIns *ir)
1272 case IRT_U8: xo = XO_MOVZXb; break; 1214 case IRT_U8: xo = XO_MOVZXb; break;
1273 case IRT_I16: xo = XO_MOVSXw; break; 1215 case IRT_I16: xo = XO_MOVSXw; break;
1274 case IRT_U16: xo = XO_MOVZXw; break; 1216 case IRT_U16: xo = XO_MOVZXw; break;
1275 case IRT_NUM: xo = XMM_MOVRM(as); break; 1217 case IRT_NUM: xo = XO_MOVSD; break;
1276 case IRT_FLOAT: xo = XO_MOVSS; break; 1218 case IRT_FLOAT: xo = XO_MOVSS; break;
1277 default: 1219 default:
1278 if (LJ_64 && irt_is64(ir->t)) 1220 if (LJ_64 && irt_is64(ir->t))
@@ -1285,6 +1227,9 @@ static void asm_fxload(ASMState *as, IRIns *ir)
1285 emit_mrm(as, xo, dest, RID_MRM); 1227 emit_mrm(as, xo, dest, RID_MRM);
1286} 1228}
1287 1229
1230#define asm_fload(as, ir) asm_fxload(as, ir)
1231#define asm_xload(as, ir) asm_fxload(as, ir)
1232
1288static void asm_fxstore(ASMState *as, IRIns *ir) 1233static void asm_fxstore(ASMState *as, IRIns *ir)
1289{ 1234{
1290 RegSet allow = RSET_GPR; 1235 RegSet allow = RSET_GPR;
@@ -1348,6 +1293,9 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
1348 } 1293 }
1349} 1294}
1350 1295
1296#define asm_fstore(as, ir) asm_fxstore(as, ir)
1297#define asm_xstore(as, ir) asm_fxstore(as, ir)
1298
1351#if LJ_64 1299#if LJ_64
1352static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) 1300static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
1353{ 1301{
@@ -1386,7 +1334,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
1386 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; 1334 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
1387 Reg dest = ra_dest(as, ir, allow); 1335 Reg dest = ra_dest(as, ir, allow);
1388 asm_fuseahuref(as, ir->op1, RSET_GPR); 1336 asm_fuseahuref(as, ir->op1, RSET_GPR);
1389 emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM); 1337 emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM);
1390 } else { 1338 } else {
1391 asm_fuseahuref(as, ir->op1, RSET_GPR); 1339 asm_fuseahuref(as, ir->op1, RSET_GPR);
1392 } 1340 }
@@ -1452,7 +1400,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
1452 Reg left = ra_scratch(as, RSET_FPR); 1400 Reg left = ra_scratch(as, RSET_FPR);
1453 asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ 1401 asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */
1454 base = ra_alloc1(as, REF_BASE, RSET_GPR); 1402 base = ra_alloc1(as, REF_BASE, RSET_GPR);
1455 emit_rmro(as, XMM_MOVRM(as), left, base, ofs); 1403 emit_rmro(as, XO_MOVSD, left, base, ofs);
1456 t.irt = IRT_NUM; /* Continue with a regular number type check. */ 1404 t.irt = IRT_NUM; /* Continue with a regular number type check. */
1457#if LJ_64 1405#if LJ_64
1458 } else if (irt_islightud(t)) { 1406 } else if (irt_islightud(t)) {
@@ -1470,11 +1418,9 @@ static void asm_sload(ASMState *as, IRIns *ir)
1470 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); 1418 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
1471 if ((ir->op2 & IRSLOAD_CONVERT)) { 1419 if ((ir->op2 & IRSLOAD_CONVERT)) {
1472 t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ 1420 t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */
1473 emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs); 1421 emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs);
1474 } else if (irt_isnum(t)) {
1475 emit_rmro(as, XMM_MOVRM(as), dest, base, ofs);
1476 } else { 1422 } else {
1477 emit_rmro(as, XO_MOV, dest, base, ofs); 1423 emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs);
1478 } 1424 }
1479 } else { 1425 } else {
1480 if (!(ir->op2 & IRSLOAD_TYPECHECK)) 1426 if (!(ir->op2 & IRSLOAD_TYPECHECK))
@@ -1501,15 +1447,13 @@ static void asm_sload(ASMState *as, IRIns *ir)
1501static void asm_cnew(ASMState *as, IRIns *ir) 1447static void asm_cnew(ASMState *as, IRIns *ir)
1502{ 1448{
1503 CTState *cts = ctype_ctsG(J2G(as->J)); 1449 CTState *cts = ctype_ctsG(J2G(as->J));
1504 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 1450 CTypeID id = (CTypeID)IR(ir->op1)->i;
1505 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 1451 CTSize sz;
1506 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 1452 CTInfo info = lj_ctype_info(cts, id, &sz);
1507 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1453 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1508 IRRef args[2]; 1454 IRRef args[4];
1509 lua_assert(sz != CTSIZE_INVALID); 1455 lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
1510 1456
1511 args[0] = ASMREF_L; /* lua_State *L */
1512 args[1] = ASMREF_TMP1; /* MSize size */
1513 as->gcsteps++; 1457 as->gcsteps++;
1514 asm_setupresult(as, ir, ci); /* GCcdata * */ 1458 asm_setupresult(as, ir, ci); /* GCcdata * */
1515 1459
@@ -1552,15 +1496,26 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1552 } while (1); 1496 } while (1);
1553#endif 1497#endif
1554 lua_assert(sz == 4 || sz == 8); 1498 lua_assert(sz == 4 || sz == 8);
1499 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1500 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1501 args[0] = ASMREF_L; /* lua_State *L */
1502 args[1] = ir->op1; /* CTypeID id */
1503 args[2] = ir->op2; /* CTSize sz */
1504 args[3] = ASMREF_TMP1; /* CTSize align */
1505 asm_gencall(as, ci, args);
1506 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1507 return;
1555 } 1508 }
1556 1509
1557 /* Combine initialization of marked, gct and ctypeid. */ 1510 /* Combine initialization of marked, gct and ctypeid. */
1558 emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked)); 1511 emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked));
1559 emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX, 1512 emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX,
1560 (int32_t)((~LJ_TCDATA<<8)+(ctypeid<<16))); 1513 (int32_t)((~LJ_TCDATA<<8)+(id<<16)));
1561 emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES); 1514 emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES);
1562 emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite); 1515 emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite);
1563 1516
1517 args[0] = ASMREF_L; /* lua_State *L */
1518 args[1] = ASMREF_TMP1; /* MSize size */
1564 asm_gencall(as, ci, args); 1519 asm_gencall(as, ci, args);
1565 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata))); 1520 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata)));
1566} 1521}
@@ -1638,36 +1593,9 @@ static void asm_x87load(ASMState *as, IRRef ref)
1638 } 1593 }
1639} 1594}
1640 1595
1641/* Try to rejoin pow from EXP2, MUL and LOG2 (if still unsplit). */
1642static int fpmjoin_pow(ASMState *as, IRIns *ir)
1643{
1644 IRIns *irp = IR(ir->op1);
1645 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
1646 IRIns *irpp = IR(irp->op1);
1647 if (irpp == ir-2 && irpp->o == IR_FPMATH &&
1648 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
1649 /* The modified regs must match with the *.dasc implementation. */
1650 RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
1651 IRIns *irx;
1652 if (ra_hasreg(ir->r))
1653 rset_clear(drop, ir->r); /* Dest reg handled below. */
1654 ra_evictset(as, drop);
1655 ra_destreg(as, ir, RID_XMM0);
1656 emit_call(as, lj_vm_pow_sse);
1657 irx = IR(irpp->op1);
1658 if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1)
1659 irx->r = RID_INIT; /* Avoid allocating xmm1 for x. */
1660 ra_left(as, RID_XMM0, irpp->op1);
1661 ra_left(as, RID_XMM1, irp->op2);
1662 return 1;
1663 }
1664 }
1665 return 0;
1666}
1667
1668static void asm_fpmath(ASMState *as, IRIns *ir) 1596static void asm_fpmath(ASMState *as, IRIns *ir)
1669{ 1597{
1670 IRFPMathOp fpm = ir->o == IR_FPMATH ? (IRFPMathOp)ir->op2 : IRFPM_OTHER; 1598 IRFPMathOp fpm = (IRFPMathOp)ir->op2;
1671 if (fpm == IRFPM_SQRT) { 1599 if (fpm == IRFPM_SQRT) {
1672 Reg dest = ra_dest(as, ir, RSET_FPR); 1600 Reg dest = ra_dest(as, ir, RSET_FPR);
1673 Reg left = asm_fuseload(as, ir->op1, RSET_FPR); 1601 Reg left = asm_fuseload(as, ir->op1, RSET_FPR);
@@ -1698,51 +1626,29 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
1698 fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse); 1626 fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse);
1699 ra_left(as, RID_XMM0, ir->op1); 1627 ra_left(as, RID_XMM0, ir->op1);
1700 } 1628 }
1701 } else if (fpm == IRFPM_EXP2 && fpmjoin_pow(as, ir)) { 1629 } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) {
1702 /* Rejoined to pow(). */ 1630 /* Rejoined to pow(). */
1703 } else { /* Handle x87 ops. */ 1631 } else {
1704 int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ 1632 asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
1705 Reg dest = ir->r; 1633 }
1706 if (ra_hasreg(dest)) { 1634}
1707 ra_free(as, dest); 1635
1708 ra_modified(as, dest); 1636#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
1709 emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs); 1637
1710 } 1638static void asm_ldexp(ASMState *as, IRIns *ir)
1711 emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); 1639{
1712 switch (fpm) { /* st0 = lj_vm_*(st0) */ 1640 int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */
1713 case IRFPM_EXP: emit_call(as, lj_vm_exp_x87); break; 1641 Reg dest = ir->r;
1714 case IRFPM_EXP2: emit_call(as, lj_vm_exp2_x87); break; 1642 if (ra_hasreg(dest)) {
1715 case IRFPM_SIN: emit_x87op(as, XI_FSIN); break; 1643 ra_free(as, dest);
1716 case IRFPM_COS: emit_x87op(as, XI_FCOS); break; 1644 ra_modified(as, dest);
1717 case IRFPM_TAN: emit_x87op(as, XI_FPOP); emit_x87op(as, XI_FPTAN); break; 1645 emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs);
1718 case IRFPM_LOG: case IRFPM_LOG2: case IRFPM_LOG10:
1719 /* Note: the use of fyl2xp1 would be pointless here. When computing
1720 ** log(1.0+eps) the precision is already lost after 1.0 is added.
1721 ** Subtracting 1.0 won't recover it. OTOH math.log1p would make sense.
1722 */
1723 emit_x87op(as, XI_FYL2X); break;
1724 case IRFPM_OTHER:
1725 switch (ir->o) {
1726 case IR_ATAN2:
1727 emit_x87op(as, XI_FPATAN); asm_x87load(as, ir->op2); break;
1728 case IR_LDEXP:
1729 emit_x87op(as, XI_FPOP1); emit_x87op(as, XI_FSCALE); break;
1730 default: lua_assert(0); break;
1731 }
1732 break;
1733 default: lua_assert(0); break;
1734 }
1735 asm_x87load(as, ir->op1);
1736 switch (fpm) {
1737 case IRFPM_LOG: emit_x87op(as, XI_FLDLN2); break;
1738 case IRFPM_LOG2: emit_x87op(as, XI_FLD1); break;
1739 case IRFPM_LOG10: emit_x87op(as, XI_FLDLG2); break;
1740 case IRFPM_OTHER:
1741 if (ir->o == IR_LDEXP) asm_x87load(as, ir->op2);
1742 break;
1743 default: break;
1744 }
1745 } 1646 }
1647 emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
1648 emit_x87op(as, XI_FPOP1);
1649 emit_x87op(as, XI_FSCALE);
1650 asm_x87load(as, ir->op1);
1651 asm_x87load(as, ir->op2);
1746} 1652}
1747 1653
1748static void asm_fppowi(ASMState *as, IRIns *ir) 1654static void asm_fppowi(ASMState *as, IRIns *ir)
@@ -1758,26 +1664,15 @@ static void asm_fppowi(ASMState *as, IRIns *ir)
1758 ra_left(as, RID_EAX, ir->op2); 1664 ra_left(as, RID_EAX, ir->op2);
1759} 1665}
1760 1666
1761#if LJ_64 && LJ_HASFFI 1667static void asm_pow(ASMState *as, IRIns *ir)
1762static void asm_arith64(ASMState *as, IRIns *ir, IRCallID id)
1763{ 1668{
1764 const CCallInfo *ci = &lj_ir_callinfo[id]; 1669#if LJ_64 && LJ_HASFFI
1765 IRRef args[2]; 1670 if (!irt_isnum(ir->t))
1766 args[0] = ir->op1; 1671 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
1767 args[1] = ir->op2; 1672 IRCALL_lj_carith_powu64);
1768 asm_setupresult(as, ir, ci); 1673 else
1769 asm_gencall(as, ci, args);
1770}
1771#endif 1674#endif
1772 1675 asm_fppowi(as, ir);
1773static void asm_intmod(ASMState *as, IRIns *ir)
1774{
1775 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_vm_modi];
1776 IRRef args[2];
1777 args[0] = ir->op1;
1778 args[1] = ir->op2;
1779 asm_setupresult(as, ir, ci);
1780 asm_gencall(as, ci, args);
1781} 1676}
1782 1677
1783static int asm_swapops(ASMState *as, IRIns *ir) 1678static int asm_swapops(ASMState *as, IRIns *ir)
@@ -1960,6 +1855,44 @@ static void asm_add(ASMState *as, IRIns *ir)
1960 asm_intarith(as, ir, XOg_ADD); 1855 asm_intarith(as, ir, XOg_ADD);
1961} 1856}
1962 1857
1858static void asm_sub(ASMState *as, IRIns *ir)
1859{
1860 if (irt_isnum(ir->t))
1861 asm_fparith(as, ir, XO_SUBSD);
1862 else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
1863 asm_intarith(as, ir, XOg_SUB);
1864}
1865
1866static void asm_mul(ASMState *as, IRIns *ir)
1867{
1868 if (irt_isnum(ir->t))
1869 asm_fparith(as, ir, XO_MULSD);
1870 else
1871 asm_intarith(as, ir, XOg_X_IMUL);
1872}
1873
1874static void asm_div(ASMState *as, IRIns *ir)
1875{
1876#if LJ_64 && LJ_HASFFI
1877 if (!irt_isnum(ir->t))
1878 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
1879 IRCALL_lj_carith_divu64);
1880 else
1881#endif
1882 asm_fparith(as, ir, XO_DIVSD);
1883}
1884
1885static void asm_mod(ASMState *as, IRIns *ir)
1886{
1887#if LJ_64 && LJ_HASFFI
1888 if (!irt_isint(ir->t))
1889 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
1890 IRCALL_lj_carith_modu64);
1891 else
1892#endif
1893 asm_callid(as, ir, IRCALL_lj_vm_modi);
1894}
1895
1963static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg) 1896static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
1964{ 1897{
1965 Reg dest = ra_dest(as, ir, RSET_GPR); 1898 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1967,7 +1900,17 @@ static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
1967 ra_left(as, dest, ir->op1); 1900 ra_left(as, dest, ir->op1);
1968} 1901}
1969 1902
1970static void asm_min_max(ASMState *as, IRIns *ir, int cc) 1903static void asm_neg(ASMState *as, IRIns *ir)
1904{
1905 if (irt_isnum(ir->t))
1906 asm_fparith(as, ir, XO_XORPS);
1907 else
1908 asm_neg_not(as, ir, XOg_NEG);
1909}
1910
1911#define asm_abs(as, ir) asm_fparith(as, ir, XO_ANDPS)
1912
1913static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
1971{ 1914{
1972 Reg right, dest = ra_dest(as, ir, RSET_GPR); 1915 Reg right, dest = ra_dest(as, ir, RSET_GPR);
1973 IRRef lref = ir->op1, rref = ir->op2; 1916 IRRef lref = ir->op1, rref = ir->op2;
@@ -1978,7 +1921,30 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc)
1978 ra_left(as, dest, lref); 1921 ra_left(as, dest, lref);
1979} 1922}
1980 1923
1981static void asm_bitswap(ASMState *as, IRIns *ir) 1924static void asm_min(ASMState *as, IRIns *ir)
1925{
1926 if (irt_isnum(ir->t))
1927 asm_fparith(as, ir, XO_MINSD);
1928 else
1929 asm_intmin_max(as, ir, CC_G);
1930}
1931
1932static void asm_max(ASMState *as, IRIns *ir)
1933{
1934 if (irt_isnum(ir->t))
1935 asm_fparith(as, ir, XO_MAXSD);
1936 else
1937 asm_intmin_max(as, ir, CC_L);
1938}
1939
1940/* Note: don't use LEA for overflow-checking arithmetic! */
1941#define asm_addov(as, ir) asm_intarith(as, ir, XOg_ADD)
1942#define asm_subov(as, ir) asm_intarith(as, ir, XOg_SUB)
1943#define asm_mulov(as, ir) asm_intarith(as, ir, XOg_X_IMUL)
1944
1945#define asm_bnot(as, ir) asm_neg_not(as, ir, XOg_NOT)
1946
1947static void asm_bswap(ASMState *as, IRIns *ir)
1982{ 1948{
1983 Reg dest = ra_dest(as, ir, RSET_GPR); 1949 Reg dest = ra_dest(as, ir, RSET_GPR);
1984 as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24), 1950 as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24),
@@ -1986,6 +1952,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1986 ra_left(as, dest, ir->op1); 1952 ra_left(as, dest, ir->op1);
1987} 1953}
1988 1954
1955#define asm_band(as, ir) asm_intarith(as, ir, XOg_AND)
1956#define asm_bor(as, ir) asm_intarith(as, ir, XOg_OR)
1957#define asm_bxor(as, ir) asm_intarith(as, ir, XOg_XOR)
1958
1989static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) 1959static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
1990{ 1960{
1991 IRRef rref = ir->op2; 1961 IRRef rref = ir->op2;
@@ -2025,6 +1995,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
2025 */ 1995 */
2026} 1996}
2027 1997
1998#define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL)
1999#define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR)
2000#define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR)
2001#define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL)
2002#define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR)
2003
2028/* -- Comparisons --------------------------------------------------------- */ 2004/* -- Comparisons --------------------------------------------------------- */
2029 2005
2030/* Virtual flags for unordered FP comparisons. */ 2006/* Virtual flags for unordered FP comparisons. */
@@ -2051,8 +2027,9 @@ static const uint16_t asm_compmap[IR_ABC+1] = {
2051}; 2027};
2052 2028
2053/* FP and integer comparisons. */ 2029/* FP and integer comparisons. */
2054static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) 2030static void asm_comp(ASMState *as, IRIns *ir)
2055{ 2031{
2032 uint32_t cc = asm_compmap[ir->o];
2056 if (irt_isnum(ir->t)) { 2033 if (irt_isnum(ir->t)) {
2057 IRRef lref = ir->op1; 2034 IRRef lref = ir->op1;
2058 IRRef rref = ir->op2; 2035 IRRef rref = ir->op2;
@@ -2207,6 +2184,8 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc)
2207 } 2184 }
2208} 2185}
2209 2186
2187#define asm_equal(as, ir) asm_comp(as, ir)
2188
2210#if LJ_32 && LJ_HASFFI 2189#if LJ_32 && LJ_HASFFI
2211/* 64 bit integer comparisons in 32 bit mode. */ 2190/* 64 bit integer comparisons in 32 bit mode. */
2212static void asm_comp_int64(ASMState *as, IRIns *ir) 2191static void asm_comp_int64(ASMState *as, IRIns *ir)
@@ -2289,13 +2268,9 @@ static void asm_hiop(ASMState *as, IRIns *ir)
2289 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ 2268 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
2290 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; 2269 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
2291 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ 2270 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */
2292 if (usehi || uselo) {
2293 if (irt_isfp(ir->t))
2294 asm_conv_fp_int64(as, ir);
2295 else
2296 asm_conv_int64_fp(as, ir);
2297 }
2298 as->curins--; /* Always skip the CONV. */ 2271 as->curins--; /* Always skip the CONV. */
2272 if (usehi || uselo)
2273 asm_conv64(as, ir);
2299 return; 2274 return;
2300 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ 2275 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */
2301 asm_comp_int64(as, ir); 2276 asm_comp_int64(as, ir);
@@ -2344,6 +2319,16 @@ static void asm_hiop(ASMState *as, IRIns *ir)
2344#endif 2319#endif
2345} 2320}
2346 2321
2322/* -- Profiling ----------------------------------------------------------- */
2323
2324static void asm_prof(ASMState *as, IRIns *ir)
2325{
2326 UNUSED(ir);
2327 asm_guardcc(as, CC_NE);
2328 emit_i8(as, HOOK_PROFILE);
2329 emit_rma(as, XO_GROUP3b, XOg_TEST, &J2G(as->J)->hookmask);
2330}
2331
2347/* -- Stack handling ------------------------------------------------------ */ 2332/* -- Stack handling ------------------------------------------------------ */
2348 2333
2349/* Check Lua stack size for overflow. Use exit handler as fallback. */ 2334/* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -2365,7 +2350,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
2365 emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE, 2350 emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE,
2366 ptr2addr(&J2G(as->J)->jit_base)); 2351 ptr2addr(&J2G(as->J)->jit_base));
2367 emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack)); 2352 emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack));
2368 emit_getgl(as, r, jit_L); 2353 emit_getgl(as, r, cur_L);
2369 if (allow == RSET_EMPTY) /* Spill temp. register. */ 2354 if (allow == RSET_EMPTY) /* Spill temp. register. */
2370 emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0); 2355 emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0);
2371} 2356}
@@ -2593,163 +2578,6 @@ static void asm_tail_prep(ASMState *as)
2593 } 2578 }
2594} 2579}
2595 2580
2596/* -- Instruction dispatch ------------------------------------------------ */
2597
2598/* Assemble a single instruction. */
2599static void asm_ir(ASMState *as, IRIns *ir)
2600{
2601 switch ((IROp)ir->o) {
2602 /* Miscellaneous ops. */
2603 case IR_LOOP: asm_loop(as); break;
2604 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
2605 case IR_USE:
2606 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
2607 case IR_PHI: asm_phi(as, ir); break;
2608 case IR_HIOP: asm_hiop(as, ir); break;
2609 case IR_GCSTEP: asm_gcstep(as, ir); break;
2610
2611 /* Guarded assertions. */
2612 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
2613 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
2614 case IR_EQ: case IR_NE: case IR_ABC:
2615 asm_comp(as, ir, asm_compmap[ir->o]);
2616 break;
2617
2618 case IR_RETF: asm_retf(as, ir); break;
2619
2620 /* Bit ops. */
2621 case IR_BNOT: asm_neg_not(as, ir, XOg_NOT); break;
2622 case IR_BSWAP: asm_bitswap(as, ir); break;
2623
2624 case IR_BAND: asm_intarith(as, ir, XOg_AND); break;
2625 case IR_BOR: asm_intarith(as, ir, XOg_OR); break;
2626 case IR_BXOR: asm_intarith(as, ir, XOg_XOR); break;
2627
2628 case IR_BSHL: asm_bitshift(as, ir, XOg_SHL); break;
2629 case IR_BSHR: asm_bitshift(as, ir, XOg_SHR); break;
2630 case IR_BSAR: asm_bitshift(as, ir, XOg_SAR); break;
2631 case IR_BROL: asm_bitshift(as, ir, XOg_ROL); break;
2632 case IR_BROR: asm_bitshift(as, ir, XOg_ROR); break;
2633
2634 /* Arithmetic ops. */
2635 case IR_ADD: asm_add(as, ir); break;
2636 case IR_SUB:
2637 if (irt_isnum(ir->t))
2638 asm_fparith(as, ir, XO_SUBSD);
2639 else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
2640 asm_intarith(as, ir, XOg_SUB);
2641 break;
2642 case IR_MUL:
2643 if (irt_isnum(ir->t))
2644 asm_fparith(as, ir, XO_MULSD);
2645 else
2646 asm_intarith(as, ir, XOg_X_IMUL);
2647 break;
2648 case IR_DIV:
2649#if LJ_64 && LJ_HASFFI
2650 if (!irt_isnum(ir->t))
2651 asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
2652 IRCALL_lj_carith_divu64);
2653 else
2654#endif
2655 asm_fparith(as, ir, XO_DIVSD);
2656 break;
2657 case IR_MOD:
2658#if LJ_64 && LJ_HASFFI
2659 if (!irt_isint(ir->t))
2660 asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
2661 IRCALL_lj_carith_modu64);
2662 else
2663#endif
2664 asm_intmod(as, ir);
2665 break;
2666
2667 case IR_NEG:
2668 if (irt_isnum(ir->t))
2669 asm_fparith(as, ir, XO_XORPS);
2670 else
2671 asm_neg_not(as, ir, XOg_NEG);
2672 break;
2673 case IR_ABS: asm_fparith(as, ir, XO_ANDPS); break;
2674
2675 case IR_MIN:
2676 if (irt_isnum(ir->t))
2677 asm_fparith(as, ir, XO_MINSD);
2678 else
2679 asm_min_max(as, ir, CC_G);
2680 break;
2681 case IR_MAX:
2682 if (irt_isnum(ir->t))
2683 asm_fparith(as, ir, XO_MAXSD);
2684 else
2685 asm_min_max(as, ir, CC_L);
2686 break;
2687
2688 case IR_FPMATH: case IR_ATAN2: case IR_LDEXP:
2689 asm_fpmath(as, ir);
2690 break;
2691 case IR_POW:
2692#if LJ_64 && LJ_HASFFI
2693 if (!irt_isnum(ir->t))
2694 asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
2695 IRCALL_lj_carith_powu64);
2696 else
2697#endif
2698 asm_fppowi(as, ir);
2699 break;
2700
2701 /* Overflow-checking arithmetic ops. Note: don't use LEA here! */
2702 case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break;
2703 case IR_SUBOV: asm_intarith(as, ir, XOg_SUB); break;
2704 case IR_MULOV: asm_intarith(as, ir, XOg_X_IMUL); break;
2705
2706 /* Memory references. */
2707 case IR_AREF: asm_aref(as, ir); break;
2708 case IR_HREF: asm_href(as, ir); break;
2709 case IR_HREFK: asm_hrefk(as, ir); break;
2710 case IR_NEWREF: asm_newref(as, ir); break;
2711 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
2712 case IR_FREF: asm_fref(as, ir); break;
2713 case IR_STRREF: asm_strref(as, ir); break;
2714
2715 /* Loads and stores. */
2716 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2717 asm_ahuvload(as, ir);
2718 break;
2719 case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break;
2720 case IR_SLOAD: asm_sload(as, ir); break;
2721
2722 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
2723 case IR_FSTORE: case IR_XSTORE: asm_fxstore(as, ir); break;
2724
2725 /* Allocations. */
2726 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
2727 case IR_TNEW: asm_tnew(as, ir); break;
2728 case IR_TDUP: asm_tdup(as, ir); break;
2729 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2730
2731 /* Write barriers. */
2732 case IR_TBAR: asm_tbar(as, ir); break;
2733 case IR_OBAR: asm_obar(as, ir); break;
2734
2735 /* Type conversions. */
2736 case IR_TOBIT: asm_tobit(as, ir); break;
2737 case IR_CONV: asm_conv(as, ir); break;
2738 case IR_TOSTR: asm_tostr(as, ir); break;
2739 case IR_STRTO: asm_strto(as, ir); break;
2740
2741 /* Calls. */
2742 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
2743 case IR_CALLXS: asm_callx(as, ir); break;
2744 case IR_CARG: break;
2745
2746 default:
2747 setintV(&as->J->errinfo, ir->o);
2748 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
2749 break;
2750 }
2751}
2752
2753/* -- Trace setup --------------------------------------------------------- */ 2581/* -- Trace setup --------------------------------------------------------- */
2754 2582
2755/* Ensure there are enough stack slots for call arguments. */ 2583/* Ensure there are enough stack slots for call arguments. */
diff --git a/src/lj_bc.h b/src/lj_bc.h
index 7436fabf..64c1bcda 100644
--- a/src/lj_bc.h
+++ b/src/lj_bc.h
@@ -89,6 +89,8 @@
89 _(ISFC, dst, ___, var, ___) \ 89 _(ISFC, dst, ___, var, ___) \
90 _(IST, ___, ___, var, ___) \ 90 _(IST, ___, ___, var, ___) \
91 _(ISF, ___, ___, var, ___) \ 91 _(ISF, ___, ___, var, ___) \
92 _(ISTYPE, var, ___, lit, ___) \
93 _(ISNUM, var, ___, lit, ___) \
92 \ 94 \
93 /* Unary ops. */ \ 95 /* Unary ops. */ \
94 _(MOV, dst, ___, var, ___) \ 96 _(MOV, dst, ___, var, ___) \
@@ -143,10 +145,12 @@
143 _(TGETV, dst, var, var, index) \ 145 _(TGETV, dst, var, var, index) \
144 _(TGETS, dst, var, str, index) \ 146 _(TGETS, dst, var, str, index) \
145 _(TGETB, dst, var, lit, index) \ 147 _(TGETB, dst, var, lit, index) \
148 _(TGETR, dst, var, var, index) \
146 _(TSETV, var, var, var, newindex) \ 149 _(TSETV, var, var, var, newindex) \
147 _(TSETS, var, var, str, newindex) \ 150 _(TSETS, var, var, str, newindex) \
148 _(TSETB, var, var, lit, newindex) \ 151 _(TSETB, var, var, lit, newindex) \
149 _(TSETM, base, ___, num, newindex) \ 152 _(TSETM, base, ___, num, newindex) \
153 _(TSETR, var, var, var, newindex) \
150 \ 154 \
151 /* Calls and vararg handling. T = tail call. */ \ 155 /* Calls and vararg handling. T = tail call. */ \
152 _(CALLM, base, lit, lit, call) \ 156 _(CALLM, base, lit, lit, call) \
diff --git a/src/lj_bcdump.h b/src/lj_bcdump.h
index 812d0e15..c3898314 100644
--- a/src/lj_bcdump.h
+++ b/src/lj_bcdump.h
@@ -36,14 +36,15 @@
36/* If you perform *any* kind of private modifications to the bytecode itself 36/* If you perform *any* kind of private modifications to the bytecode itself
37** or to the dump format, you *must* set BCDUMP_VERSION to 0x80 or higher. 37** or to the dump format, you *must* set BCDUMP_VERSION to 0x80 or higher.
38*/ 38*/
39#define BCDUMP_VERSION 1 39#define BCDUMP_VERSION 2
40 40
41/* Compatibility flags. */ 41/* Compatibility flags. */
42#define BCDUMP_F_BE 0x01 42#define BCDUMP_F_BE 0x01
43#define BCDUMP_F_STRIP 0x02 43#define BCDUMP_F_STRIP 0x02
44#define BCDUMP_F_FFI 0x04 44#define BCDUMP_F_FFI 0x04
45#define BCDUMP_F_FR2 0x08
45 46
46#define BCDUMP_F_KNOWN (BCDUMP_F_FFI*2-1) 47#define BCDUMP_F_KNOWN (BCDUMP_F_FR2*2-1)
47 48
48/* Type codes for the GC constants of a prototype. Plus length for strings. */ 49/* Type codes for the GC constants of a prototype. Plus length for strings. */
49enum { 50enum {
@@ -61,6 +62,7 @@ enum {
61 62
62LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, 63LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer,
63 void *data, int strip); 64 void *data, int strip);
65LJ_FUNC GCproto *lj_bcread_proto(LexState *ls);
64LJ_FUNC GCproto *lj_bcread(LexState *ls); 66LJ_FUNC GCproto *lj_bcread(LexState *ls);
65 67
66#endif 68#endif
diff --git a/src/lj_bcread.c b/src/lj_bcread.c
index 25859d2f..5e502177 100644
--- a/src/lj_bcread.c
+++ b/src/lj_bcread.c
@@ -9,6 +9,7 @@
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_buf.h"
12#include "lj_str.h" 13#include "lj_str.h"
13#include "lj_tab.h" 14#include "lj_tab.h"
14#include "lj_bc.h" 15#include "lj_bc.h"
@@ -20,6 +21,7 @@
20#include "lj_lex.h" 21#include "lj_lex.h"
21#include "lj_bcdump.h" 22#include "lj_bcdump.h"
22#include "lj_state.h" 23#include "lj_state.h"
24#include "lj_strfmt.h"
23 25
24/* Reuse some lexer fields for our own purposes. */ 26/* Reuse some lexer fields for our own purposes. */
25#define bcread_flags(ls) ls->level 27#define bcread_flags(ls) ls->level
@@ -38,84 +40,73 @@ static LJ_NOINLINE void bcread_error(LexState *ls, ErrMsg em)
38 const char *name = ls->chunkarg; 40 const char *name = ls->chunkarg;
39 if (*name == BCDUMP_HEAD1) name = "(binary)"; 41 if (*name == BCDUMP_HEAD1) name = "(binary)";
40 else if (*name == '@' || *name == '=') name++; 42 else if (*name == '@' || *name == '=') name++;
41 lj_str_pushf(L, "%s: %s", name, err2msg(em)); 43 lj_strfmt_pushf(L, "%s: %s", name, err2msg(em));
42 lj_err_throw(L, LUA_ERRSYNTAX); 44 lj_err_throw(L, LUA_ERRSYNTAX);
43} 45}
44 46
45/* Resize input buffer. */ 47/* Refill buffer. */
46static void bcread_resize(LexState *ls, MSize len)
47{
48 if (ls->sb.sz < len) {
49 MSize sz = ls->sb.sz * 2;
50 while (len > sz) sz = sz * 2;
51 lj_str_resizebuf(ls->L, &ls->sb, sz);
52 /* Caveat: this may change ls->sb.buf which may affect ls->p. */
53 }
54}
55
56/* Refill buffer if needed. */
57static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need) 48static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
58{ 49{
59 lua_assert(len != 0); 50 lua_assert(len != 0);
60 if (len > LJ_MAX_MEM || ls->current < 0) 51 if (len > LJ_MAX_BUF || ls->c < 0)
61 bcread_error(ls, LJ_ERR_BCBAD); 52 bcread_error(ls, LJ_ERR_BCBAD);
62 do { 53 do {
63 const char *buf; 54 const char *buf;
64 size_t size; 55 size_t sz;
65 if (ls->n) { /* Copy remainder to buffer. */ 56 char *p = sbufB(&ls->sb);
66 if (ls->sb.n) { /* Move down in buffer. */ 57 MSize n = (MSize)(ls->pe - ls->p);
67 lua_assert(ls->p + ls->n == ls->sb.buf + ls->sb.n); 58 if (n) { /* Copy remainder to buffer. */
68 if (ls->n != ls->sb.n) 59 if (sbuflen(&ls->sb)) { /* Move down in buffer. */
69 memmove(ls->sb.buf, ls->p, ls->n); 60 lua_assert(ls->pe == sbufP(&ls->sb));
61 if (ls->p != p) memmove(p, ls->p, n);
70 } else { /* Copy from buffer provided by reader. */ 62 } else { /* Copy from buffer provided by reader. */
71 bcread_resize(ls, len); 63 p = lj_buf_need(&ls->sb, len);
72 memcpy(ls->sb.buf, ls->p, ls->n); 64 memcpy(p, ls->p, n);
73 } 65 }
74 ls->p = ls->sb.buf; 66 ls->p = p;
67 ls->pe = p + n;
75 } 68 }
76 ls->sb.n = ls->n; 69 setsbufP(&ls->sb, p + n);
77 buf = ls->rfunc(ls->L, ls->rdata, &size); /* Get more data from reader. */ 70 buf = ls->rfunc(ls->L, ls->rdata, &sz); /* Get more data from reader. */
78 if (buf == NULL || size == 0) { /* EOF? */ 71 if (buf == NULL || sz == 0) { /* EOF? */
79 if (need) bcread_error(ls, LJ_ERR_BCBAD); 72 if (need) bcread_error(ls, LJ_ERR_BCBAD);
80 ls->current = -1; /* Only bad if we get called again. */ 73 ls->c = -1; /* Only bad if we get called again. */
81 break; 74 break;
82 } 75 }
83 if (ls->sb.n) { /* Append to buffer. */ 76 if (n) { /* Append to buffer. */
84 MSize n = ls->sb.n + (MSize)size; 77 n += (MSize)sz;
85 bcread_resize(ls, n < len ? len : n); 78 p = lj_buf_need(&ls->sb, n < len ? len : n);
86 memcpy(ls->sb.buf + ls->sb.n, buf, size); 79 memcpy(sbufP(&ls->sb), buf, sz);
87 ls->n = ls->sb.n = n; 80 setsbufP(&ls->sb, p + n);
88 ls->p = ls->sb.buf; 81 ls->p = p;
82 ls->pe = p + n;
89 } else { /* Return buffer provided by reader. */ 83 } else { /* Return buffer provided by reader. */
90 ls->n = (MSize)size;
91 ls->p = buf; 84 ls->p = buf;
85 ls->pe = buf + sz;
92 } 86 }
93 } while (ls->n < len); 87 } while (ls->p + len > ls->pe);
94} 88}
95 89
96/* Need a certain number of bytes. */ 90/* Need a certain number of bytes. */
97static LJ_AINLINE void bcread_need(LexState *ls, MSize len) 91static LJ_AINLINE void bcread_need(LexState *ls, MSize len)
98{ 92{
99 if (LJ_UNLIKELY(ls->n < len)) 93 if (LJ_UNLIKELY(ls->p + len > ls->pe))
100 bcread_fill(ls, len, 1); 94 bcread_fill(ls, len, 1);
101} 95}
102 96
103/* Want to read up to a certain number of bytes, but may need less. */ 97/* Want to read up to a certain number of bytes, but may need less. */
104static LJ_AINLINE void bcread_want(LexState *ls, MSize len) 98static LJ_AINLINE void bcread_want(LexState *ls, MSize len)
105{ 99{
106 if (LJ_UNLIKELY(ls->n < len)) 100 if (LJ_UNLIKELY(ls->p + len > ls->pe))
107 bcread_fill(ls, len, 0); 101 bcread_fill(ls, len, 0);
108} 102}
109 103
110#define bcread_dec(ls) check_exp(ls->n > 0, ls->n--)
111#define bcread_consume(ls, len) check_exp(ls->n >= (len), ls->n -= (len))
112
113/* Return memory block from buffer. */ 104/* Return memory block from buffer. */
114static uint8_t *bcread_mem(LexState *ls, MSize len) 105static LJ_AINLINE uint8_t *bcread_mem(LexState *ls, MSize len)
115{ 106{
116 uint8_t *p = (uint8_t *)ls->p; 107 uint8_t *p = (uint8_t *)ls->p;
117 bcread_consume(ls, len); 108 ls->p += len;
118 ls->p = (char *)p + len; 109 lua_assert(ls->p <= ls->pe);
119 return p; 110 return p;
120} 111}
121 112
@@ -128,25 +119,15 @@ static void bcread_block(LexState *ls, void *q, MSize len)
128/* Read byte from buffer. */ 119/* Read byte from buffer. */
129static LJ_AINLINE uint32_t bcread_byte(LexState *ls) 120static LJ_AINLINE uint32_t bcread_byte(LexState *ls)
130{ 121{
131 bcread_dec(ls); 122 lua_assert(ls->p < ls->pe);
132 return (uint32_t)(uint8_t)*ls->p++; 123 return (uint32_t)(uint8_t)*ls->p++;
133} 124}
134 125
135/* Read ULEB128 value from buffer. */ 126/* Read ULEB128 value from buffer. */
136static uint32_t bcread_uleb128(LexState *ls) 127static LJ_AINLINE uint32_t bcread_uleb128(LexState *ls)
137{ 128{
138 const uint8_t *p = (const uint8_t *)ls->p; 129 uint32_t v = lj_buf_ruleb128(&ls->p);
139 uint32_t v = *p++; 130 lua_assert(ls->p <= ls->pe);
140 if (LJ_UNLIKELY(v >= 0x80)) {
141 int sh = 0;
142 v &= 0x7f;
143 do {
144 v |= ((*p & 0x7f) << (sh += 7));
145 bcread_dec(ls);
146 } while (*p++ >= 0x80);
147 }
148 bcread_dec(ls);
149 ls->p = (char *)p;
150 return v; 131 return v;
151} 132}
152 133
@@ -160,11 +141,10 @@ static uint32_t bcread_uleb128_33(LexState *ls)
160 v &= 0x3f; 141 v &= 0x3f;
161 do { 142 do {
162 v |= ((*p & 0x7f) << (sh += 7)); 143 v |= ((*p & 0x7f) << (sh += 7));
163 bcread_dec(ls);
164 } while (*p++ >= 0x80); 144 } while (*p++ >= 0x80);
165 } 145 }
166 bcread_dec(ls);
167 ls->p = (char *)p; 146 ls->p = (char *)p;
147 lua_assert(ls->p <= ls->pe);
168 return v; 148 return v;
169} 149}
170 150
@@ -212,7 +192,7 @@ static void bcread_ktabk(LexState *ls, TValue *o)
212 o->u32.hi = bcread_uleb128(ls); 192 o->u32.hi = bcread_uleb128(ls);
213 } else { 193 } else {
214 lua_assert(tp <= BCDUMP_KTAB_TRUE); 194 lua_assert(tp <= BCDUMP_KTAB_TRUE);
215 setitype(o, ~tp); 195 setpriV(o, ~tp);
216 } 196 }
217} 197}
218 198
@@ -326,25 +306,13 @@ static void bcread_uv(LexState *ls, GCproto *pt, MSize sizeuv)
326} 306}
327 307
328/* Read a prototype. */ 308/* Read a prototype. */
329static GCproto *bcread_proto(LexState *ls) 309GCproto *lj_bcread_proto(LexState *ls)
330{ 310{
331 GCproto *pt; 311 GCproto *pt;
332 MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept; 312 MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept;
333 MSize ofsk, ofsuv, ofsdbg; 313 MSize ofsk, ofsuv, ofsdbg;
334 MSize sizedbg = 0; 314 MSize sizedbg = 0;
335 BCLine firstline = 0, numline = 0; 315 BCLine firstline = 0, numline = 0;
336 MSize len, startn;
337
338 /* Read length. */
339 if (ls->n > 0 && ls->p[0] == 0) { /* Shortcut EOF. */
340 ls->n--; ls->p++;
341 return NULL;
342 }
343 bcread_want(ls, 5);
344 len = bcread_uleb128(ls);
345 if (!len) return NULL; /* EOF */
346 bcread_need(ls, len);
347 startn = ls->n;
348 316
349 /* Read prototype header. */ 317 /* Read prototype header. */
350 flags = bcread_byte(ls); 318 flags = bcread_byte(ls);
@@ -413,9 +381,6 @@ static GCproto *bcread_proto(LexState *ls)
413 setmref(pt->uvinfo, NULL); 381 setmref(pt->uvinfo, NULL);
414 setmref(pt->varinfo, NULL); 382 setmref(pt->varinfo, NULL);
415 } 383 }
416
417 if (len != startn - ls->n)
418 bcread_error(ls, LJ_ERR_BCBAD);
419 return pt; 384 return pt;
420} 385}
421 386
@@ -429,6 +394,7 @@ static int bcread_header(LexState *ls)
429 bcread_byte(ls) != BCDUMP_VERSION) return 0; 394 bcread_byte(ls) != BCDUMP_VERSION) return 0;
430 bcread_flags(ls) = flags = bcread_uleb128(ls); 395 bcread_flags(ls) = flags = bcread_uleb128(ls);
431 if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0; 396 if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0;
397 if ((flags & BCDUMP_F_FR2) != LJ_FR2*BCDUMP_F_FR2) return 0;
432 if ((flags & BCDUMP_F_FFI)) { 398 if ((flags & BCDUMP_F_FFI)) {
433#if LJ_HASFFI 399#if LJ_HASFFI
434 lua_State *L = ls->L; 400 lua_State *L = ls->L;
@@ -455,19 +421,34 @@ static int bcread_header(LexState *ls)
455GCproto *lj_bcread(LexState *ls) 421GCproto *lj_bcread(LexState *ls)
456{ 422{
457 lua_State *L = ls->L; 423 lua_State *L = ls->L;
458 lua_assert(ls->current == BCDUMP_HEAD1); 424 lua_assert(ls->c == BCDUMP_HEAD1);
459 bcread_savetop(L, ls, L->top); 425 bcread_savetop(L, ls, L->top);
460 lj_str_resetbuf(&ls->sb); 426 lj_buf_reset(&ls->sb);
461 /* Check for a valid bytecode dump header. */ 427 /* Check for a valid bytecode dump header. */
462 if (!bcread_header(ls)) 428 if (!bcread_header(ls))
463 bcread_error(ls, LJ_ERR_BCFMT); 429 bcread_error(ls, LJ_ERR_BCFMT);
464 for (;;) { /* Process all prototypes in the bytecode dump. */ 430 for (;;) { /* Process all prototypes in the bytecode dump. */
465 GCproto *pt = bcread_proto(ls); 431 GCproto *pt;
466 if (!pt) break; 432 MSize len;
433 const char *startp;
434 /* Read length. */
435 if (ls->p < ls->pe && ls->p[0] == 0) { /* Shortcut EOF. */
436 ls->p++;
437 break;
438 }
439 bcread_want(ls, 5);
440 len = bcread_uleb128(ls);
441 if (!len) break; /* EOF */
442 bcread_need(ls, len);
443 startp = ls->p;
444 pt = lj_bcread_proto(ls);
445 if (ls->p != startp + len)
446 bcread_error(ls, LJ_ERR_BCBAD);
467 setprotoV(L, L->top, pt); 447 setprotoV(L, L->top, pt);
468 incr_top(L); 448 incr_top(L);
469 } 449 }
470 if ((int32_t)ls->n > 0 || L->top-1 != bcread_oldtop(L, ls)) 450 if ((int32_t)(2*(uint32_t)(ls->pe - ls->p)) > 0 ||
451 L->top-1 != bcread_oldtop(L, ls))
471 bcread_error(ls, LJ_ERR_BCBAD); 452 bcread_error(ls, LJ_ERR_BCBAD);
472 /* Pop off last prototype. */ 453 /* Pop off last prototype. */
473 L->top--; 454 L->top--;
diff --git a/src/lj_bcwrite.c b/src/lj_bcwrite.c
index ff97450b..b2c09738 100644
--- a/src/lj_bcwrite.c
+++ b/src/lj_bcwrite.c
@@ -8,7 +8,7 @@
8 8
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_str.h" 11#include "lj_buf.h"
12#include "lj_bc.h" 12#include "lj_bc.h"
13#if LJ_HASFFI 13#if LJ_HASFFI
14#include "lj_ctype.h" 14#include "lj_ctype.h"
@@ -17,13 +17,13 @@
17#include "lj_dispatch.h" 17#include "lj_dispatch.h"
18#include "lj_jit.h" 18#include "lj_jit.h"
19#endif 19#endif
20#include "lj_strfmt.h"
20#include "lj_bcdump.h" 21#include "lj_bcdump.h"
21#include "lj_vm.h" 22#include "lj_vm.h"
22 23
23/* Context for bytecode writer. */ 24/* Context for bytecode writer. */
24typedef struct BCWriteCtx { 25typedef struct BCWriteCtx {
25 SBuf sb; /* Output buffer. */ 26 SBuf sb; /* Output buffer. */
26 lua_State *L; /* Lua state. */
27 GCproto *pt; /* Root prototype. */ 27 GCproto *pt; /* Root prototype. */
28 lua_Writer wfunc; /* Writer callback. */ 28 lua_Writer wfunc; /* Writer callback. */
29 void *wdata; /* Writer callback data. */ 29 void *wdata; /* Writer callback data. */
@@ -31,85 +31,44 @@ typedef struct BCWriteCtx {
31 int status; /* Status from writer callback. */ 31 int status; /* Status from writer callback. */
32} BCWriteCtx; 32} BCWriteCtx;
33 33
34/* -- Output buffer handling ---------------------------------------------- */
35
36/* Resize buffer if needed. */
37static LJ_NOINLINE void bcwrite_resize(BCWriteCtx *ctx, MSize len)
38{
39 MSize sz = ctx->sb.sz * 2;
40 while (ctx->sb.n + len > sz) sz = sz * 2;
41 lj_str_resizebuf(ctx->L, &ctx->sb, sz);
42}
43
44/* Need a certain amount of buffer space. */
45static LJ_AINLINE void bcwrite_need(BCWriteCtx *ctx, MSize len)
46{
47 if (LJ_UNLIKELY(ctx->sb.n + len > ctx->sb.sz))
48 bcwrite_resize(ctx, len);
49}
50
51/* Add memory block to buffer. */
52static void bcwrite_block(BCWriteCtx *ctx, const void *p, MSize len)
53{
54 uint8_t *q = (uint8_t *)(ctx->sb.buf + ctx->sb.n);
55 MSize i;
56 ctx->sb.n += len;
57 for (i = 0; i < len; i++) q[i] = ((uint8_t *)p)[i];
58}
59
60/* Add byte to buffer. */
61static LJ_AINLINE void bcwrite_byte(BCWriteCtx *ctx, uint8_t b)
62{
63 ctx->sb.buf[ctx->sb.n++] = b;
64}
65
66/* Add ULEB128 value to buffer. */
67static void bcwrite_uleb128(BCWriteCtx *ctx, uint32_t v)
68{
69 MSize n = ctx->sb.n;
70 uint8_t *p = (uint8_t *)ctx->sb.buf;
71 for (; v >= 0x80; v >>= 7)
72 p[n++] = (uint8_t)((v & 0x7f) | 0x80);
73 p[n++] = (uint8_t)v;
74 ctx->sb.n = n;
75}
76
77/* -- Bytecode writer ----------------------------------------------------- */ 34/* -- Bytecode writer ----------------------------------------------------- */
78 35
79/* Write a single constant key/value of a template table. */ 36/* Write a single constant key/value of a template table. */
80static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow) 37static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow)
81{ 38{
82 bcwrite_need(ctx, 1+10); 39 char *p = lj_buf_more(&ctx->sb, 1+10);
83 if (tvisstr(o)) { 40 if (tvisstr(o)) {
84 const GCstr *str = strV(o); 41 const GCstr *str = strV(o);
85 MSize len = str->len; 42 MSize len = str->len;
86 bcwrite_need(ctx, 5+len); 43 p = lj_buf_more(&ctx->sb, 5+len);
87 bcwrite_uleb128(ctx, BCDUMP_KTAB_STR+len); 44 p = lj_strfmt_wuleb128(p, BCDUMP_KTAB_STR+len);
88 bcwrite_block(ctx, strdata(str), len); 45 p = lj_buf_wmem(p, strdata(str), len);
89 } else if (tvisint(o)) { 46 } else if (tvisint(o)) {
90 bcwrite_byte(ctx, BCDUMP_KTAB_INT); 47 *p++ = BCDUMP_KTAB_INT;
91 bcwrite_uleb128(ctx, intV(o)); 48 p = lj_strfmt_wuleb128(p, intV(o));
92 } else if (tvisnum(o)) { 49 } else if (tvisnum(o)) {
93 if (!LJ_DUALNUM && narrow) { /* Narrow number constants to integers. */ 50 if (!LJ_DUALNUM && narrow) { /* Narrow number constants to integers. */
94 lua_Number num = numV(o); 51 lua_Number num = numV(o);
95 int32_t k = lj_num2int(num); 52 int32_t k = lj_num2int(num);
96 if (num == (lua_Number)k) { /* -0 is never a constant. */ 53 if (num == (lua_Number)k) { /* -0 is never a constant. */
97 bcwrite_byte(ctx, BCDUMP_KTAB_INT); 54 *p++ = BCDUMP_KTAB_INT;
98 bcwrite_uleb128(ctx, k); 55 p = lj_strfmt_wuleb128(p, k);
56 setsbufP(&ctx->sb, p);
99 return; 57 return;
100 } 58 }
101 } 59 }
102 bcwrite_byte(ctx, BCDUMP_KTAB_NUM); 60 *p++ = BCDUMP_KTAB_NUM;
103 bcwrite_uleb128(ctx, o->u32.lo); 61 p = lj_strfmt_wuleb128(p, o->u32.lo);
104 bcwrite_uleb128(ctx, o->u32.hi); 62 p = lj_strfmt_wuleb128(p, o->u32.hi);
105 } else { 63 } else {
106 lua_assert(tvispri(o)); 64 lua_assert(tvispri(o));
107 bcwrite_byte(ctx, BCDUMP_KTAB_NIL+~itype(o)); 65 *p++ = BCDUMP_KTAB_NIL+~itype(o);
108 } 66 }
67 setsbufP(&ctx->sb, p);
109} 68}
110 69
111/* Write a template table. */ 70/* Write a template table. */
112static void bcwrite_ktab(BCWriteCtx *ctx, const GCtab *t) 71static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t)
113{ 72{
114 MSize narray = 0, nhash = 0; 73 MSize narray = 0, nhash = 0;
115 if (t->asize > 0) { /* Determine max. length of array part. */ 74 if (t->asize > 0) { /* Determine max. length of array part. */
@@ -127,8 +86,9 @@ static void bcwrite_ktab(BCWriteCtx *ctx, const GCtab *t)
127 nhash += !tvisnil(&node[i].val); 86 nhash += !tvisnil(&node[i].val);
128 } 87 }
129 /* Write number of array slots and hash slots. */ 88 /* Write number of array slots and hash slots. */
130 bcwrite_uleb128(ctx, narray); 89 p = lj_strfmt_wuleb128(p, narray);
131 bcwrite_uleb128(ctx, nhash); 90 p = lj_strfmt_wuleb128(p, nhash);
91 setsbufP(&ctx->sb, p);
132 if (narray) { /* Write array entries (may contain nil). */ 92 if (narray) { /* Write array entries (may contain nil). */
133 MSize i; 93 MSize i;
134 TValue *o = tvref(t->array); 94 TValue *o = tvref(t->array);
@@ -155,6 +115,7 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt)
155 for (i = 0; i < sizekgc; i++, kr++) { 115 for (i = 0; i < sizekgc; i++, kr++) {
156 GCobj *o = gcref(*kr); 116 GCobj *o = gcref(*kr);
157 MSize tp, need = 1; 117 MSize tp, need = 1;
118 char *p;
158 /* Determine constant type and needed size. */ 119 /* Determine constant type and needed size. */
159 if (o->gch.gct == ~LJ_TSTR) { 120 if (o->gch.gct == ~LJ_TSTR) {
160 tp = BCDUMP_KGC_STR + gco2str(o)->len; 121 tp = BCDUMP_KGC_STR + gco2str(o)->len;
@@ -181,24 +142,26 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt)
181 need = 1+2*5; 142 need = 1+2*5;
182 } 143 }
183 /* Write constant type. */ 144 /* Write constant type. */
184 bcwrite_need(ctx, need); 145 p = lj_buf_more(&ctx->sb, need);
185 bcwrite_uleb128(ctx, tp); 146 p = lj_strfmt_wuleb128(p, tp);
186 /* Write constant data (if any). */ 147 /* Write constant data (if any). */
187 if (tp >= BCDUMP_KGC_STR) { 148 if (tp >= BCDUMP_KGC_STR) {
188 bcwrite_block(ctx, strdata(gco2str(o)), gco2str(o)->len); 149 p = lj_buf_wmem(p, strdata(gco2str(o)), gco2str(o)->len);
189 } else if (tp == BCDUMP_KGC_TAB) { 150 } else if (tp == BCDUMP_KGC_TAB) {
190 bcwrite_ktab(ctx, gco2tab(o)); 151 bcwrite_ktab(ctx, p, gco2tab(o));
152 continue;
191#if LJ_HASFFI 153#if LJ_HASFFI
192 } else if (tp != BCDUMP_KGC_CHILD) { 154 } else if (tp != BCDUMP_KGC_CHILD) {
193 cTValue *p = (TValue *)cdataptr(gco2cd(o)); 155 cTValue *q = (TValue *)cdataptr(gco2cd(o));
194 bcwrite_uleb128(ctx, p[0].u32.lo); 156 p = lj_strfmt_wuleb128(p, q[0].u32.lo);
195 bcwrite_uleb128(ctx, p[0].u32.hi); 157 p = lj_strfmt_wuleb128(p, q[0].u32.hi);
196 if (tp == BCDUMP_KGC_COMPLEX) { 158 if (tp == BCDUMP_KGC_COMPLEX) {
197 bcwrite_uleb128(ctx, p[1].u32.lo); 159 p = lj_strfmt_wuleb128(p, q[1].u32.lo);
198 bcwrite_uleb128(ctx, p[1].u32.hi); 160 p = lj_strfmt_wuleb128(p, q[1].u32.hi);
199 } 161 }
200#endif 162#endif
201 } 163 }
164 setsbufP(&ctx->sb, p);
202 } 165 }
203} 166}
204 167
@@ -207,7 +170,7 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt)
207{ 170{
208 MSize i, sizekn = pt->sizekn; 171 MSize i, sizekn = pt->sizekn;
209 cTValue *o = mref(pt->k, TValue); 172 cTValue *o = mref(pt->k, TValue);
210 bcwrite_need(ctx, 10*sizekn); 173 char *p = lj_buf_more(&ctx->sb, 10*sizekn);
211 for (i = 0; i < sizekn; i++, o++) { 174 for (i = 0; i < sizekn; i++, o++) {
212 int32_t k; 175 int32_t k;
213 if (tvisint(o)) { 176 if (tvisint(o)) {
@@ -220,58 +183,58 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt)
220 k = lj_num2int(num); 183 k = lj_num2int(num);
221 if (num == (lua_Number)k) { /* -0 is never a constant. */ 184 if (num == (lua_Number)k) { /* -0 is never a constant. */
222 save_int: 185 save_int:
223 bcwrite_uleb128(ctx, 2*(uint32_t)k | ((uint32_t)k & 0x80000000u)); 186 p = lj_strfmt_wuleb128(p, 2*(uint32_t)k | ((uint32_t)k&0x80000000u));
224 if (k < 0) { 187 if (k < 0)
225 char *p = &ctx->sb.buf[ctx->sb.n-1]; 188 p[-1] = (p[-1] & 7) | ((k>>27) & 0x18);
226 *p = (*p & 7) | ((k>>27) & 0x18);
227 }
228 continue; 189 continue;
229 } 190 }
230 } 191 }
231 bcwrite_uleb128(ctx, 1+(2*o->u32.lo | (o->u32.lo & 0x80000000u))); 192 p = lj_strfmt_wuleb128(p, 1+(2*o->u32.lo | (o->u32.lo & 0x80000000u)));
232 if (o->u32.lo >= 0x80000000u) { 193 if (o->u32.lo >= 0x80000000u)
233 char *p = &ctx->sb.buf[ctx->sb.n-1]; 194 p[-1] = (p[-1] & 7) | ((o->u32.lo>>27) & 0x18);
234 *p = (*p & 7) | ((o->u32.lo>>27) & 0x18); 195 p = lj_strfmt_wuleb128(p, o->u32.hi);
235 }
236 bcwrite_uleb128(ctx, o->u32.hi);
237 } 196 }
238 } 197 }
198 setsbufP(&ctx->sb, p);
239} 199}
240 200
241/* Write bytecode instructions. */ 201/* Write bytecode instructions. */
242static void bcwrite_bytecode(BCWriteCtx *ctx, GCproto *pt) 202static char *bcwrite_bytecode(BCWriteCtx *ctx, char *p, GCproto *pt)
243{ 203{
244 MSize nbc = pt->sizebc-1; /* Omit the [JI]FUNC* header. */ 204 MSize nbc = pt->sizebc-1; /* Omit the [JI]FUNC* header. */
245#if LJ_HASJIT 205#if LJ_HASJIT
246 uint8_t *p = (uint8_t *)&ctx->sb.buf[ctx->sb.n]; 206 uint8_t *q = (uint8_t *)p;
247#endif 207#endif
248 bcwrite_block(ctx, proto_bc(pt)+1, nbc*(MSize)sizeof(BCIns)); 208 p = lj_buf_wmem(p, proto_bc(pt)+1, nbc*(MSize)sizeof(BCIns));
209 UNUSED(ctx);
249#if LJ_HASJIT 210#if LJ_HASJIT
250 /* Unpatch modified bytecode containing ILOOP/JLOOP etc. */ 211 /* Unpatch modified bytecode containing ILOOP/JLOOP etc. */
251 if ((pt->flags & PROTO_ILOOP) || pt->trace) { 212 if ((pt->flags & PROTO_ILOOP) || pt->trace) {
252 jit_State *J = L2J(ctx->L); 213 jit_State *J = L2J(sbufL(&ctx->sb));
253 MSize i; 214 MSize i;
254 for (i = 0; i < nbc; i++, p += sizeof(BCIns)) { 215 for (i = 0; i < nbc; i++, q += sizeof(BCIns)) {
255 BCOp op = (BCOp)p[LJ_ENDIAN_SELECT(0, 3)]; 216 BCOp op = (BCOp)q[LJ_ENDIAN_SELECT(0, 3)];
256 if (op == BC_IFORL || op == BC_IITERL || op == BC_ILOOP || 217 if (op == BC_IFORL || op == BC_IITERL || op == BC_ILOOP ||
257 op == BC_JFORI) { 218 op == BC_JFORI) {
258 p[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_IFORL+BC_FORL); 219 q[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_IFORL+BC_FORL);
259 } else if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) { 220 } else if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) {
260 BCReg rd = p[LJ_ENDIAN_SELECT(2, 1)] + (p[LJ_ENDIAN_SELECT(3, 0)] << 8); 221 BCReg rd = q[LJ_ENDIAN_SELECT(2, 1)] + (q[LJ_ENDIAN_SELECT(3, 0)] << 8);
261 BCIns ins = traceref(J, rd)->startins; 222 BCIns ins = traceref(J, rd)->startins;
262 p[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_JFORL+BC_FORL); 223 q[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_JFORL+BC_FORL);
263 p[LJ_ENDIAN_SELECT(2, 1)] = bc_c(ins); 224 q[LJ_ENDIAN_SELECT(2, 1)] = bc_c(ins);
264 p[LJ_ENDIAN_SELECT(3, 0)] = bc_b(ins); 225 q[LJ_ENDIAN_SELECT(3, 0)] = bc_b(ins);
265 } 226 }
266 } 227 }
267 } 228 }
268#endif 229#endif
230 return p;
269} 231}
270 232
271/* Write prototype. */ 233/* Write prototype. */
272static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt) 234static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
273{ 235{
274 MSize sizedbg = 0; 236 MSize sizedbg = 0;
237 char *p;
275 238
276 /* Recursively write children of prototype. */ 239 /* Recursively write children of prototype. */
277 if ((pt->flags & PROTO_CHILD)) { 240 if ((pt->flags & PROTO_CHILD)) {
@@ -285,31 +248,32 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
285 } 248 }
286 249
287 /* Start writing the prototype info to a buffer. */ 250 /* Start writing the prototype info to a buffer. */
288 lj_str_resetbuf(&ctx->sb); 251 p = lj_buf_need(&ctx->sb,
289 ctx->sb.n = 5; /* Leave room for final size. */ 252 5+4+6*5+(pt->sizebc-1)*(MSize)sizeof(BCIns)+pt->sizeuv*2);
290 bcwrite_need(ctx, 4+6*5+(pt->sizebc-1)*(MSize)sizeof(BCIns)+pt->sizeuv*2); 253 p += 5; /* Leave room for final size. */
291 254
292 /* Write prototype header. */ 255 /* Write prototype header. */
293 bcwrite_byte(ctx, (pt->flags & (PROTO_CHILD|PROTO_VARARG|PROTO_FFI))); 256 *p++ = (pt->flags & (PROTO_CHILD|PROTO_VARARG|PROTO_FFI));
294 bcwrite_byte(ctx, pt->numparams); 257 *p++ = pt->numparams;
295 bcwrite_byte(ctx, pt->framesize); 258 *p++ = pt->framesize;
296 bcwrite_byte(ctx, pt->sizeuv); 259 *p++ = pt->sizeuv;
297 bcwrite_uleb128(ctx, pt->sizekgc); 260 p = lj_strfmt_wuleb128(p, pt->sizekgc);
298 bcwrite_uleb128(ctx, pt->sizekn); 261 p = lj_strfmt_wuleb128(p, pt->sizekn);
299 bcwrite_uleb128(ctx, pt->sizebc-1); 262 p = lj_strfmt_wuleb128(p, pt->sizebc-1);
300 if (!ctx->strip) { 263 if (!ctx->strip) {
301 if (proto_lineinfo(pt)) 264 if (proto_lineinfo(pt))
302 sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt); 265 sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt);
303 bcwrite_uleb128(ctx, sizedbg); 266 p = lj_strfmt_wuleb128(p, sizedbg);
304 if (sizedbg) { 267 if (sizedbg) {
305 bcwrite_uleb128(ctx, pt->firstline); 268 p = lj_strfmt_wuleb128(p, pt->firstline);
306 bcwrite_uleb128(ctx, pt->numline); 269 p = lj_strfmt_wuleb128(p, pt->numline);
307 } 270 }
308 } 271 }
309 272
310 /* Write bytecode instructions and upvalue refs. */ 273 /* Write bytecode instructions and upvalue refs. */
311 bcwrite_bytecode(ctx, pt); 274 p = bcwrite_bytecode(ctx, p, pt);
312 bcwrite_block(ctx, proto_uv(pt), pt->sizeuv*2); 275 p = lj_buf_wmem(p, proto_uv(pt), pt->sizeuv*2);
276 setsbufP(&ctx->sb, p);
313 277
314 /* Write constants. */ 278 /* Write constants. */
315 bcwrite_kgc(ctx, pt); 279 bcwrite_kgc(ctx, pt);
@@ -317,18 +281,19 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
317 281
318 /* Write debug info, if not stripped. */ 282 /* Write debug info, if not stripped. */
319 if (sizedbg) { 283 if (sizedbg) {
320 bcwrite_need(ctx, sizedbg); 284 p = lj_buf_more(&ctx->sb, sizedbg);
321 bcwrite_block(ctx, proto_lineinfo(pt), sizedbg); 285 p = lj_buf_wmem(p, proto_lineinfo(pt), sizedbg);
286 setsbufP(&ctx->sb, p);
322 } 287 }
323 288
324 /* Pass buffer to writer function. */ 289 /* Pass buffer to writer function. */
325 if (ctx->status == 0) { 290 if (ctx->status == 0) {
326 MSize n = ctx->sb.n - 5; 291 MSize n = sbuflen(&ctx->sb) - 5;
327 MSize nn = (lj_fls(n)+8)*9 >> 6; 292 MSize nn = (lj_fls(n)+8)*9 >> 6;
328 ctx->sb.n = 5 - nn; 293 char *q = sbufB(&ctx->sb) + (5 - nn);
329 bcwrite_uleb128(ctx, n); /* Fill in final size. */ 294 p = lj_strfmt_wuleb128(q, n); /* Fill in final size. */
330 lua_assert(ctx->sb.n == 5); 295 lua_assert(p == sbufB(&ctx->sb) + 5);
331 ctx->status = ctx->wfunc(ctx->L, ctx->sb.buf+5-nn, nn+n, ctx->wdata); 296 ctx->status = ctx->wfunc(sbufL(&ctx->sb), q, nn+n, ctx->wdata);
332 } 297 }
333} 298}
334 299
@@ -338,20 +303,21 @@ static void bcwrite_header(BCWriteCtx *ctx)
338 GCstr *chunkname = proto_chunkname(ctx->pt); 303 GCstr *chunkname = proto_chunkname(ctx->pt);
339 const char *name = strdata(chunkname); 304 const char *name = strdata(chunkname);
340 MSize len = chunkname->len; 305 MSize len = chunkname->len;
341 lj_str_resetbuf(&ctx->sb); 306 char *p = lj_buf_need(&ctx->sb, 5+5+len);
342 bcwrite_need(ctx, 5+5+len); 307 *p++ = BCDUMP_HEAD1;
343 bcwrite_byte(ctx, BCDUMP_HEAD1); 308 *p++ = BCDUMP_HEAD2;
344 bcwrite_byte(ctx, BCDUMP_HEAD2); 309 *p++ = BCDUMP_HEAD3;
345 bcwrite_byte(ctx, BCDUMP_HEAD3); 310 *p++ = BCDUMP_VERSION;
346 bcwrite_byte(ctx, BCDUMP_VERSION); 311 *p++ = (ctx->strip ? BCDUMP_F_STRIP : 0) +
347 bcwrite_byte(ctx, (ctx->strip ? BCDUMP_F_STRIP : 0) + 312 LJ_BE*BCDUMP_F_BE +
348 (LJ_BE ? BCDUMP_F_BE : 0) + 313 ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0) +
349 ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0)); 314 LJ_FR2*BCDUMP_F_FR2;
350 if (!ctx->strip) { 315 if (!ctx->strip) {
351 bcwrite_uleb128(ctx, len); 316 p = lj_strfmt_wuleb128(p, len);
352 bcwrite_block(ctx, name, len); 317 p = lj_buf_wmem(p, name, len);
353 } 318 }
354 ctx->status = ctx->wfunc(ctx->L, ctx->sb.buf, ctx->sb.n, ctx->wdata); 319 ctx->status = ctx->wfunc(sbufL(&ctx->sb), sbufB(&ctx->sb),
320 (MSize)(p - sbufB(&ctx->sb)), ctx->wdata);
355} 321}
356 322
357/* Write footer of bytecode dump. */ 323/* Write footer of bytecode dump. */
@@ -359,7 +325,7 @@ static void bcwrite_footer(BCWriteCtx *ctx)
359{ 325{
360 if (ctx->status == 0) { 326 if (ctx->status == 0) {
361 uint8_t zero = 0; 327 uint8_t zero = 0;
362 ctx->status = ctx->wfunc(ctx->L, &zero, 1, ctx->wdata); 328 ctx->status = ctx->wfunc(sbufL(&ctx->sb), &zero, 1, ctx->wdata);
363 } 329 }
364} 330}
365 331
@@ -367,8 +333,8 @@ static void bcwrite_footer(BCWriteCtx *ctx)
367static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud) 333static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud)
368{ 334{
369 BCWriteCtx *ctx = (BCWriteCtx *)ud; 335 BCWriteCtx *ctx = (BCWriteCtx *)ud;
370 UNUSED(dummy); 336 UNUSED(L); UNUSED(dummy);
371 lj_str_resizebuf(L, &ctx->sb, 1024); /* Avoids resize for most prototypes. */ 337 lj_buf_need(&ctx->sb, 1024); /* Avoids resize for most prototypes. */
372 bcwrite_header(ctx); 338 bcwrite_header(ctx);
373 bcwrite_proto(ctx, ctx->pt); 339 bcwrite_proto(ctx, ctx->pt);
374 bcwrite_footer(ctx); 340 bcwrite_footer(ctx);
@@ -381,16 +347,15 @@ int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data,
381{ 347{
382 BCWriteCtx ctx; 348 BCWriteCtx ctx;
383 int status; 349 int status;
384 ctx.L = L;
385 ctx.pt = pt; 350 ctx.pt = pt;
386 ctx.wfunc = writer; 351 ctx.wfunc = writer;
387 ctx.wdata = data; 352 ctx.wdata = data;
388 ctx.strip = strip; 353 ctx.strip = strip;
389 ctx.status = 0; 354 ctx.status = 0;
390 lj_str_initbuf(&ctx.sb); 355 lj_buf_init(L, &ctx.sb);
391 status = lj_vm_cpcall(L, NULL, &ctx, cpwriter); 356 status = lj_vm_cpcall(L, NULL, &ctx, cpwriter);
392 if (status == 0) status = ctx.status; 357 if (status == 0) status = ctx.status;
393 lj_str_freebuf(G(ctx.L), &ctx.sb); 358 lj_buf_free(G(sbufL(&ctx.sb)), &ctx.sb);
394 return status; 359 return status;
395} 360}
396 361
diff --git a/src/lj_buf.c b/src/lj_buf.c
new file mode 100644
index 00000000..023bb9aa
--- /dev/null
+++ b/src/lj_buf.c
@@ -0,0 +1,234 @@
1/*
2** Buffer handling.
3** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_buf_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10#include "lj_gc.h"
11#include "lj_err.h"
12#include "lj_buf.h"
13#include "lj_str.h"
14#include "lj_tab.h"
15#include "lj_strfmt.h"
16
17/* -- Buffer management --------------------------------------------------- */
18
19static void buf_grow(SBuf *sb, MSize sz)
20{
21 MSize osz = sbufsz(sb), len = sbuflen(sb), nsz = osz;
22 char *b;
23 if (nsz < LJ_MIN_SBUF) nsz = LJ_MIN_SBUF;
24 while (nsz < sz) nsz += nsz;
25 b = (char *)lj_mem_realloc(sbufL(sb), sbufB(sb), osz, nsz);
26 setmref(sb->b, b);
27 setmref(sb->p, b + len);
28 setmref(sb->e, b + nsz);
29}
30
31LJ_NOINLINE char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz)
32{
33 lua_assert(sz > sbufsz(sb));
34 if (LJ_UNLIKELY(sz > LJ_MAX_BUF))
35 lj_err_mem(sbufL(sb));
36 buf_grow(sb, sz);
37 return sbufB(sb);
38}
39
40LJ_NOINLINE char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz)
41{
42 MSize len = sbuflen(sb);
43 lua_assert(sz > sbufleft(sb));
44 if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF))
45 lj_err_mem(sbufL(sb));
46 buf_grow(sb, len + sz);
47 return sbufP(sb);
48}
49
50void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb)
51{
52 char *b = sbufB(sb);
53 MSize osz = (MSize)(sbufE(sb) - b);
54 if (osz > 2*LJ_MIN_SBUF) {
55 MSize n = (MSize)(sbufP(sb) - b);
56 b = lj_mem_realloc(L, b, osz, (osz >> 1));
57 setmref(sb->b, b);
58 setmref(sb->p, b + n);
59 setmref(sb->e, b + (osz >> 1));
60 }
61}
62
63char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz)
64{
65 SBuf *sb = &G(L)->tmpbuf;
66 setsbufL(sb, L);
67 return lj_buf_need(sb, sz);
68}
69
70/* -- Low-level buffer put operations ------------------------------------- */
71
72SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len)
73{
74 char *p = lj_buf_more(sb, len);
75 p = lj_buf_wmem(p, q, len);
76 setsbufP(sb, p);
77 return sb;
78}
79
80#if LJ_HASJIT
81SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c)
82{
83 char *p = lj_buf_more(sb, 1);
84 *p++ = (char)c;
85 setsbufP(sb, p);
86 return sb;
87}
88#endif
89
90SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s)
91{
92 MSize len = s->len;
93 char *p = lj_buf_more(sb, len);
94 p = lj_buf_wmem(p, strdata(s), len);
95 setsbufP(sb, p);
96 return sb;
97}
98
99/* -- High-level buffer put operations ------------------------------------ */
100
101SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s)
102{
103 MSize len = s->len;
104 char *p = lj_buf_more(sb, len), *e = p+len;
105 const char *q = strdata(s)+len-1;
106 while (p < e)
107 *p++ = *q--;
108 setsbufP(sb, p);
109 return sb;
110}
111
112SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s)
113{
114 MSize len = s->len;
115 char *p = lj_buf_more(sb, len), *e = p+len;
116 const char *q = strdata(s);
117 for (; p < e; p++, q++) {
118 uint32_t c = *(unsigned char *)q;
119#if LJ_TARGET_PPC
120 *p = c + ((c >= 'A' && c <= 'Z') << 5);
121#else
122 if (c >= 'A' && c <= 'Z') c += 0x20;
123 *p = c;
124#endif
125 }
126 setsbufP(sb, p);
127 return sb;
128}
129
130SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s)
131{
132 MSize len = s->len;
133 char *p = lj_buf_more(sb, len), *e = p+len;
134 const char *q = strdata(s);
135 for (; p < e; p++, q++) {
136 uint32_t c = *(unsigned char *)q;
137#if LJ_TARGET_PPC
138 *p = c - ((c >= 'a' && c <= 'z') << 5);
139#else
140 if (c >= 'a' && c <= 'z') c -= 0x20;
141 *p = c;
142#endif
143 }
144 setsbufP(sb, p);
145 return sb;
146}
147
148SBuf *lj_buf_putstr_rep(SBuf *sb, GCstr *s, int32_t rep)
149{
150 MSize len = s->len;
151 if (rep > 0 && len) {
152 uint64_t tlen = (uint64_t)rep * len;
153 char *p;
154 if (LJ_UNLIKELY(tlen > LJ_MAX_STR))
155 lj_err_mem(sbufL(sb));
156 p = lj_buf_more(sb, (MSize)tlen);
157 if (len == 1) { /* Optimize a common case. */
158 uint32_t c = strdata(s)[0];
159 do { *p++ = c; } while (--rep > 0);
160 } else {
161 const char *e = strdata(s) + len;
162 do {
163 const char *q = strdata(s);
164 do { *p++ = *q++; } while (q < e);
165 } while (--rep > 0);
166 }
167 setsbufP(sb, p);
168 }
169 return sb;
170}
171
172SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep, int32_t i, int32_t e)
173{
174 MSize seplen = sep ? sep->len : 0;
175 if (i <= e) {
176 for (;;) {
177 cTValue *o = lj_tab_getint(t, i);
178 char *p;
179 if (!o) {
180 badtype: /* Error: bad element type. */
181 setsbufP(sb, (void *)(intptr_t)i); /* Store failing index. */
182 return NULL;
183 } else if (tvisstr(o)) {
184 MSize len = strV(o)->len;
185 p = lj_buf_wmem(lj_buf_more(sb, len + seplen), strVdata(o), len);
186 } else if (tvisint(o)) {
187 p = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT+seplen), intV(o));
188 } else if (tvisnum(o)) {
189 p = lj_strfmt_wnum(lj_buf_more(sb, STRFMT_MAXBUF_NUM+seplen), o);
190 } else {
191 goto badtype;
192 }
193 if (i++ == e) {
194 setsbufP(sb, p);
195 break;
196 }
197 if (seplen) p = lj_buf_wmem(p, strdata(sep), seplen);
198 setsbufP(sb, p);
199 }
200 }
201 return sb;
202}
203
204/* -- Miscellaneous buffer operations ------------------------------------- */
205
206GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb)
207{
208 return lj_str_new(sbufL(sb), sbufB(sb), sbuflen(sb));
209}
210
211/* Concatenate two strings. */
212GCstr *lj_buf_cat2str(lua_State *L, GCstr *s1, GCstr *s2)
213{
214 MSize len1 = s1->len, len2 = s2->len;
215 char *buf = lj_buf_tmp(L, len1 + len2);
216 memcpy(buf, strdata(s1), len1);
217 memcpy(buf+len1, strdata(s2), len2);
218 return lj_str_new(L, buf, len1 + len2);
219}
220
221/* Read ULEB128 from buffer. */
222uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp)
223{
224 const uint8_t *p = (const uint8_t *)*pp;
225 uint32_t v = *p++;
226 if (LJ_UNLIKELY(v >= 0x80)) {
227 int sh = 0;
228 v &= 0x7f;
229 do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
230 }
231 *pp = (const char *)p;
232 return v;
233}
234
diff --git a/src/lj_buf.h b/src/lj_buf.h
new file mode 100644
index 00000000..1cf1780b
--- /dev/null
+++ b/src/lj_buf.h
@@ -0,0 +1,105 @@
1/*
2** Buffer handling.
3** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_BUF_H
7#define _LJ_BUF_H
8
9#include "lj_obj.h"
10#include "lj_gc.h"
11#include "lj_str.h"
12
13/* Resizable string buffers. Struct definition in lj_obj.h. */
14#define sbufB(sb) (mref((sb)->b, char))
15#define sbufP(sb) (mref((sb)->p, char))
16#define sbufE(sb) (mref((sb)->e, char))
17#define sbufL(sb) (mref((sb)->L, lua_State))
18#define sbufsz(sb) ((MSize)(sbufE((sb)) - sbufB((sb))))
19#define sbuflen(sb) ((MSize)(sbufP((sb)) - sbufB((sb))))
20#define sbufleft(sb) ((MSize)(sbufE((sb)) - sbufP((sb))))
21#define setsbufP(sb, q) (setmref((sb)->p, (q)))
22#define setsbufL(sb, l) (setmref((sb)->L, (l)))
23
24/* Buffer management */
25LJ_FUNC char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz);
26LJ_FUNC char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz);
27LJ_FUNC void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb);
28LJ_FUNC char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz);
29
30static LJ_AINLINE void lj_buf_init(lua_State *L, SBuf *sb)
31{
32 setsbufL(sb, L);
33 setmref(sb->p, NULL); setmref(sb->e, NULL); setmref(sb->b, NULL);
34}
35
36static LJ_AINLINE void lj_buf_reset(SBuf *sb)
37{
38 setmrefr(sb->p, sb->b);
39}
40
41static LJ_AINLINE SBuf *lj_buf_tmp_(lua_State *L)
42{
43 SBuf *sb = &G(L)->tmpbuf;
44 setsbufL(sb, L);
45 lj_buf_reset(sb);
46 return sb;
47}
48
49static LJ_AINLINE void lj_buf_free(global_State *g, SBuf *sb)
50{
51 lj_mem_free(g, sbufB(sb), sbufsz(sb));
52}
53
54static LJ_AINLINE char *lj_buf_need(SBuf *sb, MSize sz)
55{
56 if (LJ_UNLIKELY(sz > sbufsz(sb)))
57 return lj_buf_need2(sb, sz);
58 return sbufB(sb);
59}
60
61static LJ_AINLINE char *lj_buf_more(SBuf *sb, MSize sz)
62{
63 if (LJ_UNLIKELY(sz > sbufleft(sb)))
64 return lj_buf_more2(sb, sz);
65 return sbufP(sb);
66}
67
68/* Low-level buffer put operations */
69LJ_FUNC SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len);
70#if LJ_HASJIT
71LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c);
72#endif
73LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s);
74
75static LJ_AINLINE char *lj_buf_wmem(char *p, const void *q, MSize len)
76{
77 return (char *)memcpy(p, q, len) + len;
78}
79
80static LJ_AINLINE void lj_buf_putb(SBuf *sb, int c)
81{
82 char *p = lj_buf_more(sb, 1);
83 *p++ = (char)c;
84 setsbufP(sb, p);
85}
86
87/* High-level buffer put operations */
88LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s);
89LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s);
90LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s);
91LJ_FUNC SBuf *lj_buf_putstr_rep(SBuf *sb, GCstr *s, int32_t rep);
92LJ_FUNC SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep,
93 int32_t i, int32_t e);
94
95/* Miscellaneous buffer operations */
96LJ_FUNCA GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb);
97LJ_FUNC GCstr *lj_buf_cat2str(lua_State *L, GCstr *s1, GCstr *s2);
98LJ_FUNC uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp);
99
100static LJ_AINLINE GCstr *lj_buf_str(lua_State *L, SBuf *sb)
101{
102 return lj_str_new(L, sbufB(sb), sbuflen(sb));
103}
104
105#endif
diff --git a/src/lj_carith.c b/src/lj_carith.c
index 2a358a9b..9032ea32 100644
--- a/src/lj_carith.c
+++ b/src/lj_carith.c
@@ -11,10 +11,12 @@
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_tab.h" 12#include "lj_tab.h"
13#include "lj_meta.h" 13#include "lj_meta.h"
14#include "lj_ir.h"
14#include "lj_ctype.h" 15#include "lj_ctype.h"
15#include "lj_cconv.h" 16#include "lj_cconv.h"
16#include "lj_cdata.h" 17#include "lj_cdata.h"
17#include "lj_carith.h" 18#include "lj_carith.h"
19#include "lj_strscan.h"
18 20
19/* -- C data arithmetic --------------------------------------------------- */ 21/* -- C data arithmetic --------------------------------------------------- */
20 22
@@ -272,6 +274,80 @@ int lj_carith_op(lua_State *L, MMS mm)
272 return lj_carith_meta(L, cts, &ca, mm); 274 return lj_carith_meta(L, cts, &ca, mm);
273} 275}
274 276
277/* -- 64 bit bit operations helpers --------------------------------------- */
278
279#if LJ_64
280#define B64DEF(name) \
281 static LJ_AINLINE uint64_t lj_carith_##name(uint64_t x, int32_t sh)
282#else
283/* Not inlined on 32 bit archs, since some of these are quite lengthy. */
284#define B64DEF(name) \
285 uint64_t LJ_NOINLINE lj_carith_##name(uint64_t x, int32_t sh)
286#endif
287
288B64DEF(shl64) { return x << (sh&63); }
289B64DEF(shr64) { return x >> (sh&63); }
290B64DEF(sar64) { return (uint64_t)((int64_t)x >> (sh&63)); }
291B64DEF(rol64) { return lj_rol(x, (sh&63)); }
292B64DEF(ror64) { return lj_ror(x, (sh&63)); }
293
294#undef B64DEF
295
296uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op)
297{
298 switch (op) {
299 case IR_BSHL-IR_BSHL: x = lj_carith_shl64(x, sh); break;
300 case IR_BSHR-IR_BSHL: x = lj_carith_shr64(x, sh); break;
301 case IR_BSAR-IR_BSHL: x = lj_carith_sar64(x, sh); break;
302 case IR_BROL-IR_BSHL: x = lj_carith_rol64(x, sh); break;
303 case IR_BROR-IR_BSHL: x = lj_carith_ror64(x, sh); break;
304 default: lua_assert(0); break;
305 }
306 return x;
307}
308
309/* Equivalent to lj_lib_checkbit(), but handles cdata. */
310uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id)
311{
312 TValue *o = L->base + narg-1;
313 if (o >= L->top) {
314 err:
315 lj_err_argt(L, narg, LUA_TNUMBER);
316 } else if (LJ_LIKELY(tvisnumber(o))) {
317 /* Handled below. */
318 } else if (tviscdata(o)) {
319 CTState *cts = ctype_cts(L);
320 uint8_t *sp = (uint8_t *)cdataptr(cdataV(o));
321 CTypeID sid = cdataV(o)->ctypeid;
322 CType *s = ctype_get(cts, sid);
323 uint64_t x;
324 if (ctype_isref(s->info)) {
325 sp = *(void **)sp;
326 sid = ctype_cid(s->info);
327 }
328 s = ctype_raw(cts, sid);
329 if (ctype_isenum(s->info)) s = ctype_child(cts, s);
330 if ((s->info & (CTMASK_NUM|CTF_BOOL|CTF_FP|CTF_UNSIGNED)) ==
331 CTINFO(CT_NUM, CTF_UNSIGNED) && s->size == 8)
332 *id = CTID_UINT64; /* Use uint64_t, since it has the highest rank. */
333 else if (!*id)
334 *id = CTID_INT64; /* Use int64_t, unless already set. */
335 lj_cconv_ct_ct(cts, ctype_get(cts, *id), s,
336 (uint8_t *)&x, sp, CCF_ARG(narg));
337 return x;
338 } else if (!(tvisstr(o) && lj_strscan_number(strV(o), o))) {
339 goto err;
340 }
341 if (LJ_LIKELY(tvisint(o))) {
342 return (uint32_t)intV(o);
343 } else {
344 int32_t i = lj_num2bit(numV(o));
345 if (LJ_DUALNUM) setintV(o, i);
346 return (uint32_t)i;
347 }
348}
349
350
275/* -- 64 bit integer arithmetic helpers ----------------------------------- */ 351/* -- 64 bit integer arithmetic helpers ----------------------------------- */
276 352
277#if LJ_32 && LJ_HASJIT 353#if LJ_32 && LJ_HASJIT
diff --git a/src/lj_carith.h b/src/lj_carith.h
index 8c4bdbbe..da8320f3 100644
--- a/src/lj_carith.h
+++ b/src/lj_carith.h
@@ -12,6 +12,16 @@
12 12
13LJ_FUNC int lj_carith_op(lua_State *L, MMS mm); 13LJ_FUNC int lj_carith_op(lua_State *L, MMS mm);
14 14
15#if LJ_32
16LJ_FUNC uint64_t lj_carith_shl64(uint64_t x, int32_t sh);
17LJ_FUNC uint64_t lj_carith_shr64(uint64_t x, int32_t sh);
18LJ_FUNC uint64_t lj_carith_sar64(uint64_t x, int32_t sh);
19LJ_FUNC uint64_t lj_carith_rol64(uint64_t x, int32_t sh);
20LJ_FUNC uint64_t lj_carith_ror64(uint64_t x, int32_t sh);
21#endif
22LJ_FUNC uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op);
23LJ_FUNC uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id);
24
15#if LJ_32 && LJ_HASJIT 25#if LJ_32 && LJ_HASJIT
16LJ_FUNC int64_t lj_carith_mul64(int64_t x, int64_t k); 26LJ_FUNC int64_t lj_carith_mul64(int64_t x, int64_t k);
17#endif 27#endif
diff --git a/src/lj_ccall.c b/src/lj_ccall.c
index 998417c1..5ab5b60d 100644
--- a/src/lj_ccall.c
+++ b/src/lj_ccall.c
@@ -9,7 +9,6 @@
9 9
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_str.h"
13#include "lj_tab.h" 12#include "lj_tab.h"
14#include "lj_ctype.h" 13#include "lj_ctype.h"
15#include "lj_cconv.h" 14#include "lj_cconv.h"
@@ -291,6 +290,75 @@
291#define CCALL_HANDLE_RET \ 290#define CCALL_HANDLE_RET \
292 if ((ct->info & CTF_VARARG)) sp = (uint8_t *)&cc->gpr[0]; 291 if ((ct->info & CTF_VARARG)) sp = (uint8_t *)&cc->gpr[0];
293 292
293#elif LJ_TARGET_ARM64
294/* -- ARM64 calling conventions ------------------------------------------- */
295
296#define CCALL_HANDLE_STRUCTRET \
297 cc->retref = !ccall_classify_struct(cts, ctr); \
298 if (cc->retref) cc->retp = dp;
299
300#define CCALL_HANDLE_STRUCTRET2 \
301 unsigned int cl = ccall_classify_struct(cts, ctr); \
302 if ((cl & 4)) { /* Combine float HFA from separate registers. */ \
303 CTSize i = (cl >> 8) - 1; \
304 do { ((uint32_t *)dp)[i] = cc->fpr[i].u32; } while (i--); \
305 } else { \
306 if (cl > 1) sp = (uint8_t *)&cc->fpr[0]; \
307 memcpy(dp, sp, ctr->size); \
308 }
309
310#define CCALL_HANDLE_COMPLEXRET \
311 /* Complex values are returned in one or two FPRs. */ \
312 cc->retref = 0;
313
314#define CCALL_HANDLE_COMPLEXRET2 \
315 if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \
316 ((float *)dp)[0] = cc->fpr[0].f; \
317 ((float *)dp)[1] = cc->fpr[1].f; \
318 } else { /* Copy complex double from FPRs. */ \
319 ((double *)dp)[0] = cc->fpr[0].d; \
320 ((double *)dp)[1] = cc->fpr[1].d; \
321 }
322
323#define CCALL_HANDLE_STRUCTARG \
324 unsigned int cl = ccall_classify_struct(cts, d); \
325 if (cl == 0) { /* Pass struct by reference. */ \
326 rp = cdataptr(lj_cdata_new(cts, did, sz)); \
327 sz = CTSIZE_PTR; \
328 } else if (cl > 1) { /* Pass struct in FPRs or on stack. */ \
329 isfp = (cl & 4) ? 2 : 1; \
330 } /* else: Pass struct in GPRs or on stack. */
331
332#define CCALL_HANDLE_COMPLEXARG \
333 /* Pass complex by value in separate (!) FPRs or on stack. */ \
334 isfp = ctr->size == 2*sizeof(float) ? 2 : 1;
335
336#define CCALL_HANDLE_REGARG \
337 if (LJ_TARGET_IOS && isva) { \
338 /* IOS: All variadic arguments are on the stack. */ \
339 } else if (isfp) { /* Try to pass argument in FPRs. */ \
340 int n2 = ctype_isvector(d->info) ? 1 : n*isfp; \
341 if (nfpr + n2 <= CCALL_NARG_FPR) { \
342 dp = &cc->fpr[nfpr]; \
343 nfpr += n2; \
344 goto done; \
345 } else { \
346 nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \
347 if (LJ_TARGET_IOS && d->size < 8) goto err_nyi; \
348 } \
349 } else { /* Try to pass argument in GPRs. */ \
350 if (!LJ_TARGET_IOS && (d->info & CTF_ALIGN) > CTALIGN_PTR) \
351 ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
352 if (ngpr + n <= maxgpr) { \
353 dp = &cc->gpr[ngpr]; \
354 ngpr += n; \
355 goto done; \
356 } else { \
357 ngpr = maxgpr; /* Prevent reordering. */ \
358 if (LJ_TARGET_IOS && d->size < 8) goto err_nyi; \
359 } \
360 }
361
294#elif LJ_TARGET_PPC 362#elif LJ_TARGET_PPC
295/* -- PPC calling conventions --------------------------------------------- */ 363/* -- PPC calling conventions --------------------------------------------- */
296 364
@@ -339,42 +407,6 @@
339 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ 407 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
340 ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ 408 ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */
341 409
342#elif LJ_TARGET_PPCSPE
343/* -- PPC/SPE calling conventions ----------------------------------------- */
344
345#define CCALL_HANDLE_STRUCTRET \
346 cc->retref = 1; /* Return all structs by reference. */ \
347 cc->gpr[ngpr++] = (GPRArg)dp;
348
349#define CCALL_HANDLE_COMPLEXRET \
350 /* Complex values are returned in 2 or 4 GPRs. */ \
351 cc->retref = 0;
352
353#define CCALL_HANDLE_COMPLEXRET2 \
354 memcpy(dp, sp, ctr->size); /* Copy complex from GPRs. */
355
356#define CCALL_HANDLE_STRUCTARG \
357 rp = cdataptr(lj_cdata_new(cts, did, sz)); \
358 sz = CTSIZE_PTR; /* Pass all structs by reference. */
359
360#define CCALL_HANDLE_COMPLEXARG \
361 /* Pass complex by value in 2 or 4 GPRs. */
362
363/* PPC/SPE has a softfp ABI. */
364#define CCALL_HANDLE_REGARG \
365 if (n > 1) { /* Doesn't fit in a single GPR? */ \
366 lua_assert(n == 2 || n == 4); /* int64_t, double or complex (float). */ \
367 if (n == 2) \
368 ngpr = (ngpr + 1u) & ~1u; /* Only align 64 bit value to regpair. */ \
369 else if (ngpr + n > maxgpr) \
370 ngpr = maxgpr; /* Prevent reordering. */ \
371 } \
372 if (ngpr + n <= maxgpr) { \
373 dp = &cc->gpr[ngpr]; \
374 ngpr += n; \
375 goto done; \
376 }
377
378#elif LJ_TARGET_MIPS 410#elif LJ_TARGET_MIPS
379/* -- MIPS calling conventions -------------------------------------------- */ 411/* -- MIPS calling conventions -------------------------------------------- */
380 412
@@ -621,6 +653,52 @@ noth: /* Not a homogeneous float/double aggregate. */
621 653
622#endif 654#endif
623 655
656/* -- ARM64 ABI struct classification ------------------------------------- */
657
658#if LJ_TARGET_ARM64
659
660/* Classify a struct based on its fields. */
661static unsigned int ccall_classify_struct(CTState *cts, CType *ct)
662{
663 CTSize sz = ct->size;
664 unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION);
665 while (ct->sib) {
666 CType *sct;
667 ct = ctype_get(cts, ct->sib);
668 if (ctype_isfield(ct->info)) {
669 sct = ctype_rawchild(cts, ct);
670 if (ctype_isfp(sct->info)) {
671 r |= sct->size;
672 if (!isu) n++; else if (n == 0) n = 1;
673 } else if (ctype_iscomplex(sct->info)) {
674 r |= (sct->size >> 1);
675 if (!isu) n += 2; else if (n < 2) n = 2;
676 } else if (ctype_isstruct(sct->info)) {
677 goto substruct;
678 } else {
679 goto noth;
680 }
681 } else if (ctype_isbitfield(ct->info)) {
682 goto noth;
683 } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
684 sct = ctype_rawchild(cts, ct);
685 substruct:
686 if (sct->size > 0) {
687 unsigned int s = ccall_classify_struct(cts, sct);
688 if (s <= 1) goto noth;
689 r |= (s & 255);
690 if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8);
691 }
692 }
693 }
694 if ((r == 4 || r == 8) && n <= 4)
695 return r + (n << 8);
696noth: /* Not a homogeneous float/double aggregate. */
697 return (sz <= 16); /* Return structs of size <= 16 in GPRs. */
698}
699
700#endif
701
624/* -- Common C call handling ---------------------------------------------- */ 702/* -- Common C call handling ---------------------------------------------- */
625 703
626/* Infer the destination CTypeID for a vararg argument. */ 704/* Infer the destination CTypeID for a vararg argument. */
@@ -803,6 +881,12 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
803 cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1]; /* Split complex double. */ 881 cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1]; /* Split complex double. */
804 cc->fpr[nfpr-2].d[1] = 0; 882 cc->fpr[nfpr-2].d[1] = 0;
805 } 883 }
884#elif LJ_TARGET_ARM64
885 if (isfp == 2 && (uint8_t *)dp < (uint8_t *)cc->stack) {
886 /* Split float HFA or complex float into separate registers. */
887 CTSize i = (sz >> 2) - 1;
888 do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--);
889 }
806#else 890#else
807 UNUSED(isfp); 891 UNUSED(isfp);
808#endif 892#endif
diff --git a/src/lj_ccall.h b/src/lj_ccall.h
index f5530109..91983fee 100644
--- a/src/lj_ccall.h
+++ b/src/lj_ccall.h
@@ -68,27 +68,32 @@ typedef union FPRArg {
68 float f[2]; 68 float f[2];
69} FPRArg; 69} FPRArg;
70 70
71#elif LJ_TARGET_PPC 71#elif LJ_TARGET_ARM64
72 72
73#define CCALL_NARG_GPR 8 73#define CCALL_NARG_GPR 8
74#define CCALL_NRET_GPR 2
74#define CCALL_NARG_FPR 8 75#define CCALL_NARG_FPR 8
75#define CCALL_NRET_GPR 4 /* For complex double. */ 76#define CCALL_NRET_FPR 4
76#define CCALL_NRET_FPR 1
77#define CCALL_SPS_EXTRA 4
78#define CCALL_SPS_FREE 0 77#define CCALL_SPS_FREE 0
79 78
80typedef intptr_t GPRArg; 79typedef intptr_t GPRArg;
81typedef double FPRArg; 80typedef union FPRArg {
81 double d;
82 float f;
83 uint32_t u32;
84} FPRArg;
82 85
83#elif LJ_TARGET_PPCSPE 86#elif LJ_TARGET_PPC
84 87
85#define CCALL_NARG_GPR 8 88#define CCALL_NARG_GPR 8
86#define CCALL_NARG_FPR 0 89#define CCALL_NARG_FPR 8
87#define CCALL_NRET_GPR 4 /* For softfp complex double. */ 90#define CCALL_NRET_GPR 4 /* For complex double. */
88#define CCALL_NRET_FPR 0 91#define CCALL_NRET_FPR 1
89#define CCALL_SPS_FREE 0 /* NYI */ 92#define CCALL_SPS_EXTRA 4
93#define CCALL_SPS_FREE 0
90 94
91typedef intptr_t GPRArg; 95typedef intptr_t GPRArg;
96typedef double FPRArg;
92 97
93#elif LJ_TARGET_MIPS 98#elif LJ_TARGET_MIPS
94 99
@@ -145,6 +150,8 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
145 uint8_t nfpr; /* Number of arguments in FPRs. */ 150 uint8_t nfpr; /* Number of arguments in FPRs. */
146#elif LJ_TARGET_X86 151#elif LJ_TARGET_X86
147 uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */ 152 uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */
153#elif LJ_TARGET_ARM64
154 void *retp; /* Aggregate return pointer in x8. */
148#elif LJ_TARGET_PPC 155#elif LJ_TARGET_PPC
149 uint8_t nfpr; /* Number of arguments in FPRs. */ 156 uint8_t nfpr; /* Number of arguments in FPRs. */
150#endif 157#endif
diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
index b210641f..66a09440 100644
--- a/src/lj_ccallback.c
+++ b/src/lj_ccallback.c
@@ -27,7 +27,7 @@
27 27
28#if LJ_OS_NOJIT 28#if LJ_OS_NOJIT
29 29
30/* Disabled callback support. */ 30/* Callbacks disabled. */
31#define CALLBACK_SLOT2OFS(slot) (0*(slot)) 31#define CALLBACK_SLOT2OFS(slot) (0*(slot))
32#define CALLBACK_OFS2SLOT(ofs) (0*(ofs)) 32#define CALLBACK_OFS2SLOT(ofs) (0*(ofs))
33#define CALLBACK_MAX_SLOT 0 33#define CALLBACK_MAX_SLOT 0
@@ -54,23 +54,18 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
54#elif LJ_TARGET_ARM 54#elif LJ_TARGET_ARM
55 55
56#define CALLBACK_MCODE_HEAD 32 56#define CALLBACK_MCODE_HEAD 32
57#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot)) 57
58#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8) 58#elif LJ_TARGET_ARM64
59#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) 59
60#define CALLBACK_MCODE_HEAD 32
60 61
61#elif LJ_TARGET_PPC 62#elif LJ_TARGET_PPC
62 63
63#define CALLBACK_MCODE_HEAD 24 64#define CALLBACK_MCODE_HEAD 24
64#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot))
65#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8)
66#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
67 65
68#elif LJ_TARGET_MIPS 66#elif LJ_TARGET_MIPS
69 67
70#define CALLBACK_MCODE_HEAD 24 68#define CALLBACK_MCODE_HEAD 24
71#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot))
72#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8)
73#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
74 69
75#else 70#else
76 71
@@ -81,6 +76,12 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
81 76
82#endif 77#endif
83 78
79#ifndef CALLBACK_SLOT2OFS
80#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot))
81#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8)
82#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
83#endif
84
84/* Convert callback slot number to callback function pointer. */ 85/* Convert callback slot number to callback function pointer. */
85static void *callback_slot2ptr(CTState *cts, MSize slot) 86static void *callback_slot2ptr(CTState *cts, MSize slot)
86{ 87{
@@ -157,6 +158,26 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
157 } 158 }
158 lua_assert(p - page <= CALLBACK_MCODE_SIZE); 159 lua_assert(p - page <= CALLBACK_MCODE_SIZE);
159} 160}
161#elif LJ_TARGET_ARM64
162static void callback_mcode_init(global_State *g, uint32_t *page)
163{
164 uint32_t *p = page;
165 void *target = (void *)lj_vm_ffi_callback;
166 MSize slot;
167 *p++ = A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4);
168 *p++ = A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5);
169 *p++ = A64I_BR | A64F_N(RID_X11);
170 *p++ = A64I_NOP;
171 ((void **)p)[0] = target;
172 ((void **)p)[1] = g;
173 p += 4;
174 for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
175 *p++ = A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot);
176 *p = A64I_B | A64F_S26((page-p) & 0x03ffffffu);
177 p++;
178 }
179 lua_assert(p - page <= CALLBACK_MCODE_SIZE);
180}
160#elif LJ_TARGET_PPC 181#elif LJ_TARGET_PPC
161static void callback_mcode_init(global_State *g, uint32_t *page) 182static void callback_mcode_init(global_State *g, uint32_t *page)
162{ 183{
@@ -351,6 +372,29 @@ void lj_ccallback_mcode_free(CTState *cts)
351 goto done; \ 372 goto done; \
352 } CALLBACK_HANDLE_REGARG_FP2 373 } CALLBACK_HANDLE_REGARG_FP2
353 374
375#elif LJ_TARGET_ARM64
376
377#define CALLBACK_HANDLE_REGARG \
378 if (isfp) { \
379 if (nfpr + n <= CCALL_NARG_FPR) { \
380 sp = &cts->cb.fpr[nfpr]; \
381 nfpr += n; \
382 goto done; \
383 } else { \
384 nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \
385 } \
386 } else { \
387 if (!LJ_TARGET_IOS && n > 1) \
388 ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
389 if (ngpr + n <= maxgpr) { \
390 sp = &cts->cb.gpr[ngpr]; \
391 ngpr += n; \
392 goto done; \
393 } else { \
394 ngpr = CCALL_NARG_GPR; /* Prevent reordering. */ \
395 } \
396 }
397
354#elif LJ_TARGET_PPC 398#elif LJ_TARGET_PPC
355 399
356#define CALLBACK_HANDLE_REGARG \ 400#define CALLBACK_HANDLE_REGARG \
@@ -411,6 +455,7 @@ static void callback_conv_args(CTState *cts, lua_State *L)
411 int gcsteps = 0; 455 int gcsteps = 0;
412 CType *ct; 456 CType *ct;
413 GCfunc *fn; 457 GCfunc *fn;
458 int fntp;
414 MSize ngpr = 0, nsp = 0, maxgpr = CCALL_NARG_GPR; 459 MSize ngpr = 0, nsp = 0, maxgpr = CCALL_NARG_GPR;
415#if CCALL_NARG_FPR 460#if CCALL_NARG_FPR
416 MSize nfpr = 0; 461 MSize nfpr = 0;
@@ -421,18 +466,27 @@ static void callback_conv_args(CTState *cts, lua_State *L)
421 466
422 if (slot < cts->cb.sizeid && (id = cts->cb.cbid[slot]) != 0) { 467 if (slot < cts->cb.sizeid && (id = cts->cb.cbid[slot]) != 0) {
423 ct = ctype_get(cts, id); 468 ct = ctype_get(cts, id);
424 rid = ctype_cid(ct->info); 469 rid = ctype_cid(ct->info); /* Return type. x86: +(spadj<<16). */
425 fn = funcV(lj_tab_getint(cts->miscmap, (int32_t)slot)); 470 fn = funcV(lj_tab_getint(cts->miscmap, (int32_t)slot));
471 fntp = LJ_TFUNC;
426 } else { /* Must set up frame first, before throwing the error. */ 472 } else { /* Must set up frame first, before throwing the error. */
427 ct = NULL; 473 ct = NULL;
428 rid = 0; 474 rid = 0;
429 fn = (GCfunc *)L; 475 fn = (GCfunc *)L;
476 fntp = LJ_TTHREAD;
430 } 477 }
431 o->u32.lo = LJ_CONT_FFI_CALLBACK; /* Continuation returns from callback. */ 478 /* Continuation returns from callback. */
432 o->u32.hi = rid; /* Return type. x86: +(spadj<<16). */ 479 if (LJ_FR2) {
433 o++; 480 (o++)->u64 = LJ_CONT_FFI_CALLBACK;
434 setframe_gc(o, obj2gco(fn)); 481 (o++)->u64 = rid;
435 setframe_ftsz(o, (int)((char *)(o+1) - (char *)L->base) + FRAME_CONT); 482 o++;
483 } else {
484 o->u32.lo = LJ_CONT_FFI_CALLBACK;
485 o->u32.hi = rid;
486 o++;
487 }
488 setframe_gc(o, obj2gco(fn), fntp);
489 setframe_ftsz(o, ((char *)(o+1) - (char *)L->base) + FRAME_CONT);
436 L->top = L->base = ++o; 490 L->top = L->base = ++o;
437 if (!ct) 491 if (!ct)
438 lj_err_caller(cts->L, LJ_ERR_FFI_BADCBACK); 492 lj_err_caller(cts->L, LJ_ERR_FFI_BADCBACK);
@@ -483,9 +537,14 @@ static void callback_conv_args(CTState *cts, lua_State *L)
483 L->top = o; 537 L->top = o;
484#if LJ_TARGET_X86 538#if LJ_TARGET_X86
485 /* Store stack adjustment for returns from non-cdecl callbacks. */ 539 /* Store stack adjustment for returns from non-cdecl callbacks. */
486 if (ctype_cconv(ct->info) != CTCC_CDECL) 540 if (ctype_cconv(ct->info) != CTCC_CDECL) {
541#if LJ_FR2
542 (L->base-3)->u64 |= (nsp << (16+2));
543#else
487 (L->base-2)->u32.hi |= (nsp << (16+2)); 544 (L->base-2)->u32.hi |= (nsp << (16+2));
488#endif 545#endif
546 }
547#endif
489 while (gcsteps-- > 0) 548 while (gcsteps-- > 0)
490 lj_gc_check(L); 549 lj_gc_check(L);
491} 550}
@@ -493,7 +552,11 @@ static void callback_conv_args(CTState *cts, lua_State *L)
493/* Convert Lua object to callback result. */ 552/* Convert Lua object to callback result. */
494static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) 553static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
495{ 554{
555#if LJ_FR2
556 CType *ctr = ctype_raw(cts, (uint16_t)(L->base-3)->u64);
557#else
496 CType *ctr = ctype_raw(cts, (uint16_t)(L->base-2)->u32.hi); 558 CType *ctr = ctype_raw(cts, (uint16_t)(L->base-2)->u32.hi);
559#endif
497#if LJ_TARGET_X86 560#if LJ_TARGET_X86
498 cts->cb.gpr[2] = 0; 561 cts->cb.gpr[2] = 0;
499#endif 562#endif
@@ -529,7 +592,7 @@ lua_State * LJ_FASTCALL lj_ccallback_enter(CTState *cts, void *cf)
529 lua_State *L = cts->L; 592 lua_State *L = cts->L;
530 global_State *g = cts->g; 593 global_State *g = cts->g;
531 lua_assert(L != NULL); 594 lua_assert(L != NULL);
532 if (gcref(g->jit_L)) { 595 if (tvref(g->jit_base)) {
533 setstrV(L, L->top++, lj_err_str(L, LJ_ERR_FFI_BADCBACK)); 596 setstrV(L, L->top++, lj_err_str(L, LJ_ERR_FFI_BADCBACK));
534 if (g->panic) g->panic(L); 597 if (g->panic) g->panic(L);
535 exit(EXIT_FAILURE); 598 exit(EXIT_FAILURE);
@@ -562,9 +625,9 @@ void LJ_FASTCALL lj_ccallback_leave(CTState *cts, TValue *o)
562 } 625 }
563 callback_conv_result(cts, L, o); 626 callback_conv_result(cts, L, o);
564 /* Finally drop C frame and continuation frame. */ 627 /* Finally drop C frame and continuation frame. */
565 L->cframe = cframe_prev(L->cframe); 628 L->top -= 2+2*LJ_FR2;
566 L->top -= 2;
567 L->base = obase; 629 L->base = obase;
630 L->cframe = cframe_prev(L->cframe);
568 cts->cb.slot = 0; /* Blacklist C function that called the callback. */ 631 cts->cb.slot = 0; /* Blacklist C function that called the callback. */
569} 632}
570 633
diff --git a/src/lj_cdata.c b/src/lj_cdata.c
index 39fc13a9..fccf7f14 100644
--- a/src/lj_cdata.c
+++ b/src/lj_cdata.c
@@ -9,7 +9,6 @@
9 9
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_str.h"
13#include "lj_tab.h" 12#include "lj_tab.h"
14#include "lj_ctype.h" 13#include "lj_ctype.h"
15#include "lj_cconv.h" 14#include "lj_cconv.h"
@@ -27,12 +26,12 @@ GCcdata *lj_cdata_newref(CTState *cts, const void *p, CTypeID id)
27} 26}
28 27
29/* Allocate variable-sized or specially aligned C data object. */ 28/* Allocate variable-sized or specially aligned C data object. */
30GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, CTSize align) 29GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz, CTSize align)
31{ 30{
32 global_State *g; 31 global_State *g;
33 MSize extra = sizeof(GCcdataVar) + sizeof(GCcdata) + 32 MSize extra = sizeof(GCcdataVar) + sizeof(GCcdata) +
34 (align > CT_MEMALIGN ? (1u<<align) - (1u<<CT_MEMALIGN) : 0); 33 (align > CT_MEMALIGN ? (1u<<align) - (1u<<CT_MEMALIGN) : 0);
35 char *p = lj_mem_newt(cts->L, extra + sz, char); 34 char *p = lj_mem_newt(L, extra + sz, char);
36 uintptr_t adata = (uintptr_t)p + sizeof(GCcdataVar) + sizeof(GCcdata); 35 uintptr_t adata = (uintptr_t)p + sizeof(GCcdataVar) + sizeof(GCcdata);
37 uintptr_t almask = (1u << align) - 1u; 36 uintptr_t almask = (1u << align) - 1u;
38 GCcdata *cd = (GCcdata *)(((adata + almask) & ~almask) - sizeof(GCcdata)); 37 GCcdata *cd = (GCcdata *)(((adata + almask) & ~almask) - sizeof(GCcdata));
@@ -40,7 +39,7 @@ GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, CTSize align)
40 cdatav(cd)->offset = (uint16_t)((char *)cd - p); 39 cdatav(cd)->offset = (uint16_t)((char *)cd - p);
41 cdatav(cd)->extra = extra; 40 cdatav(cd)->extra = extra;
42 cdatav(cd)->len = sz; 41 cdatav(cd)->len = sz;
43 g = cts->g; 42 g = G(L);
44 setgcrefr(cd->nextgc, g->gc.root); 43 setgcrefr(cd->nextgc, g->gc.root);
45 setgcref(g->gc.root, obj2gco(cd)); 44 setgcref(g->gc.root, obj2gco(cd));
46 newwhite(g, obj2gco(cd)); 45 newwhite(g, obj2gco(cd));
@@ -76,21 +75,20 @@ void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd)
76 } 75 }
77} 76}
78 77
79TValue * LJ_FASTCALL lj_cdata_setfin(lua_State *L, GCcdata *cd) 78void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj, uint32_t it)
80{ 79{
81 global_State *g = G(L); 80 GCtab *t = ctype_ctsG(G(L))->finalizer;
82 GCtab *t = ctype_ctsG(g)->finalizer;
83 if (gcref(t->metatable)) { 81 if (gcref(t->metatable)) {
84 /* Add cdata to finalizer table, if still enabled. */ 82 /* Add cdata to finalizer table, if still enabled. */
85 TValue *tv, tmp; 83 TValue *tv, tmp;
86 setcdataV(L, &tmp, cd); 84 setcdataV(L, &tmp, cd);
87 lj_gc_anybarriert(L, t); 85 lj_gc_anybarriert(L, t);
88 tv = lj_tab_set(L, t, &tmp); 86 tv = lj_tab_set(L, t, &tmp);
89 cd->marked |= LJ_GC_CDATA_FIN; 87 setgcV(L, tv, obj, it);
90 return tv; 88 if (!tvisnil(tv))
91 } else { 89 cd->marked |= LJ_GC_CDATA_FIN;
92 /* Otherwise return dummy TValue. */ 90 else
93 return &g->tmptv; 91 cd->marked &= ~LJ_GC_CDATA_FIN;
94 } 92 }
95} 93}
96 94
diff --git a/src/lj_cdata.h b/src/lj_cdata.h
index 3a1275e6..c8975be1 100644
--- a/src/lj_cdata.h
+++ b/src/lj_cdata.h
@@ -58,11 +58,12 @@ static LJ_AINLINE GCcdata *lj_cdata_new_(lua_State *L, CTypeID id, CTSize sz)
58} 58}
59 59
60LJ_FUNC GCcdata *lj_cdata_newref(CTState *cts, const void *pp, CTypeID id); 60LJ_FUNC GCcdata *lj_cdata_newref(CTState *cts, const void *pp, CTypeID id);
61LJ_FUNC GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, 61LJ_FUNC GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz,
62 CTSize align); 62 CTSize align);
63 63
64LJ_FUNC void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd); 64LJ_FUNC void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd);
65LJ_FUNCA TValue * LJ_FASTCALL lj_cdata_setfin(lua_State *L, GCcdata *cd); 65LJ_FUNC void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj,
66 uint32_t it);
66 67
67LJ_FUNC CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key, 68LJ_FUNC CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key,
68 uint8_t **pp, CTInfo *qual); 69 uint8_t **pp, CTInfo *qual);
diff --git a/src/lj_clib.c b/src/lj_clib.c
index d3526090..6bdad67f 100644
--- a/src/lj_clib.c
+++ b/src/lj_clib.c
@@ -16,6 +16,7 @@
16#include "lj_cconv.h" 16#include "lj_cconv.h"
17#include "lj_cdata.h" 17#include "lj_cdata.h"
18#include "lj_clib.h" 18#include "lj_clib.h"
19#include "lj_strfmt.h"
19 20
20/* -- OS-specific functions ----------------------------------------------- */ 21/* -- OS-specific functions ----------------------------------------------- */
21 22
@@ -61,7 +62,7 @@ static const char *clib_extname(lua_State *L, const char *name)
61#endif 62#endif
62 ) { 63 ) {
63 if (!strchr(name, '.')) { 64 if (!strchr(name, '.')) {
64 name = lj_str_pushf(L, CLIB_SOEXT, name); 65 name = lj_strfmt_pushf(L, CLIB_SOEXT, name);
65 L->top--; 66 L->top--;
66#ifdef __CYGWIN__ 67#ifdef __CYGWIN__
67 } else { 68 } else {
@@ -70,7 +71,7 @@ static const char *clib_extname(lua_State *L, const char *name)
70 } 71 }
71 if (!(name[0] == CLIB_SOPREFIX[0] && name[1] == CLIB_SOPREFIX[1] && 72 if (!(name[0] == CLIB_SOPREFIX[0] && name[1] == CLIB_SOPREFIX[1] &&
72 name[2] == CLIB_SOPREFIX[2])) { 73 name[2] == CLIB_SOPREFIX[2])) {
73 name = lj_str_pushf(L, CLIB_SOPREFIX "%s", name); 74 name = lj_strfmt_pushf(L, CLIB_SOPREFIX "%s", name);
74 L->top--; 75 L->top--;
75 } 76 }
76 } 77 }
@@ -175,7 +176,7 @@ LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt,
175 if (!FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM, 176 if (!FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM,
176 NULL, err, 0, buf, sizeof(buf), NULL)) 177 NULL, err, 0, buf, sizeof(buf), NULL))
177 buf[0] = '\0'; 178 buf[0] = '\0';
178 lj_err_callermsg(L, lj_str_pushf(L, fmt, name, buf)); 179 lj_err_callermsg(L, lj_strfmt_pushf(L, fmt, name, buf));
179} 180}
180 181
181static int clib_needext(const char *s) 182static int clib_needext(const char *s)
@@ -190,7 +191,7 @@ static int clib_needext(const char *s)
190static const char *clib_extname(lua_State *L, const char *name) 191static const char *clib_extname(lua_State *L, const char *name)
191{ 192{
192 if (clib_needext(name)) { 193 if (clib_needext(name)) {
193 name = lj_str_pushf(L, "%s.dll", name); 194 name = lj_strfmt_pushf(L, "%s.dll", name);
194 L->top--; 195 L->top--;
195 } 196 }
196 return name; 197 return name;
@@ -263,7 +264,7 @@ static void *clib_getsym(CLibrary *cl, const char *name)
263LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt, 264LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt,
264 const char *name) 265 const char *name)
265{ 266{
266 lj_err_callermsg(L, lj_str_pushf(L, fmt, name, "no support for this OS")); 267 lj_err_callermsg(L, lj_strfmt_pushf(L, fmt, name, "no support for this OS"));
267} 268}
268 269
269static void *clib_loadlib(lua_State *L, const char *name, int global) 270static void *clib_loadlib(lua_State *L, const char *name, int global)
@@ -347,7 +348,7 @@ TValue *lj_clib_index(lua_State *L, CLibrary *cl, GCstr *name)
347 CTInfo cconv = ctype_cconv(ct->info); 348 CTInfo cconv = ctype_cconv(ct->info);
348 if (cconv == CTCC_FASTCALL || cconv == CTCC_STDCALL) { 349 if (cconv == CTCC_FASTCALL || cconv == CTCC_STDCALL) {
349 CTSize sz = clib_func_argsize(cts, ct); 350 CTSize sz = clib_func_argsize(cts, ct);
350 const char *symd = lj_str_pushf(L, 351 const char *symd = lj_strfmt_pushf(L,
351 cconv == CTCC_FASTCALL ? "@%s@%d" : "_%s@%d", 352 cconv == CTCC_FASTCALL ? "@%s@%d" : "_%s@%d",
352 sym, sz); 353 sym, sz);
353 L->top--; 354 L->top--;
diff --git a/src/lj_cparse.c b/src/lj_cparse.c
index b9df88d7..1ec32307 100644
--- a/src/lj_cparse.c
+++ b/src/lj_cparse.c
@@ -9,13 +9,14 @@
9 9
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_str.h" 12#include "lj_buf.h"
13#include "lj_ctype.h" 13#include "lj_ctype.h"
14#include "lj_cparse.h" 14#include "lj_cparse.h"
15#include "lj_frame.h" 15#include "lj_frame.h"
16#include "lj_vm.h" 16#include "lj_vm.h"
17#include "lj_char.h" 17#include "lj_char.h"
18#include "lj_strscan.h" 18#include "lj_strscan.h"
19#include "lj_strfmt.h"
19 20
20/* 21/*
21** Important note: this is NOT a validating C parser! This is a minimal 22** Important note: this is NOT a validating C parser! This is a minimal
@@ -46,9 +47,9 @@ static const char *cp_tok2str(CPState *cp, CPToken tok)
46 if (tok > CTOK_OFS) 47 if (tok > CTOK_OFS)
47 return ctoknames[tok-CTOK_OFS-1]; 48 return ctoknames[tok-CTOK_OFS-1];
48 else if (!lj_char_iscntrl(tok)) 49 else if (!lj_char_iscntrl(tok))
49 return lj_str_pushf(cp->L, "%c", tok); 50 return lj_strfmt_pushf(cp->L, "%c", tok);
50 else 51 else
51 return lj_str_pushf(cp->L, "char(%d)", tok); 52 return lj_strfmt_pushf(cp->L, "char(%d)", tok);
52} 53}
53 54
54/* End-of-line? */ 55/* End-of-line? */
@@ -85,24 +86,10 @@ static LJ_NOINLINE CPChar cp_get_bs(CPState *cp)
85 return cp_get(cp); 86 return cp_get(cp);
86} 87}
87 88
88/* Grow save buffer. */
89static LJ_NOINLINE void cp_save_grow(CPState *cp, CPChar c)
90{
91 MSize newsize;
92 if (cp->sb.sz >= CPARSE_MAX_BUF/2)
93 cp_err(cp, LJ_ERR_XELEM);
94 newsize = cp->sb.sz * 2;
95 lj_str_resizebuf(cp->L, &cp->sb, newsize);
96 cp->sb.buf[cp->sb.n++] = (char)c;
97}
98
99/* Save character in buffer. */ 89/* Save character in buffer. */
100static LJ_AINLINE void cp_save(CPState *cp, CPChar c) 90static LJ_AINLINE void cp_save(CPState *cp, CPChar c)
101{ 91{
102 if (LJ_UNLIKELY(cp->sb.n + 1 > cp->sb.sz)) 92 lj_buf_putb(&cp->sb, c);
103 cp_save_grow(cp, c);
104 else
105 cp->sb.buf[cp->sb.n++] = (char)c;
106} 93}
107 94
108/* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */ 95/* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */
@@ -122,20 +109,20 @@ LJ_NORET static void cp_errmsg(CPState *cp, CPToken tok, ErrMsg em, ...)
122 tokstr = NULL; 109 tokstr = NULL;
123 } else if (tok == CTOK_IDENT || tok == CTOK_INTEGER || tok == CTOK_STRING || 110 } else if (tok == CTOK_IDENT || tok == CTOK_INTEGER || tok == CTOK_STRING ||
124 tok >= CTOK_FIRSTDECL) { 111 tok >= CTOK_FIRSTDECL) {
125 if (cp->sb.n == 0) cp_save(cp, '$'); 112 if (sbufP(&cp->sb) == sbufB(&cp->sb)) cp_save(cp, '$');
126 cp_save(cp, '\0'); 113 cp_save(cp, '\0');
127 tokstr = cp->sb.buf; 114 tokstr = sbufB(&cp->sb);
128 } else { 115 } else {
129 tokstr = cp_tok2str(cp, tok); 116 tokstr = cp_tok2str(cp, tok);
130 } 117 }
131 L = cp->L; 118 L = cp->L;
132 va_start(argp, em); 119 va_start(argp, em);
133 msg = lj_str_pushvf(L, err2msg(em), argp); 120 msg = lj_strfmt_pushvf(L, err2msg(em), argp);
134 va_end(argp); 121 va_end(argp);
135 if (tokstr) 122 if (tokstr)
136 msg = lj_str_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tokstr); 123 msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tokstr);
137 if (cp->linenumber > 1) 124 if (cp->linenumber > 1)
138 msg = lj_str_pushf(L, "%s at line %d", msg, cp->linenumber); 125 msg = lj_strfmt_pushf(L, "%s at line %d", msg, cp->linenumber);
139 lj_err_callermsg(L, msg); 126 lj_err_callermsg(L, msg);
140} 127}
141 128
@@ -164,7 +151,7 @@ static CPToken cp_number(CPState *cp)
164 TValue o; 151 TValue o;
165 do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp))); 152 do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp)));
166 cp_save(cp, '\0'); 153 cp_save(cp, '\0');
167 fmt = lj_strscan_scan((const uint8_t *)cp->sb.buf, &o, STRSCAN_OPT_C); 154 fmt = lj_strscan_scan((const uint8_t *)sbufB(&cp->sb), &o, STRSCAN_OPT_C);
168 if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32; 155 if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32;
169 else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32; 156 else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32;
170 else if (!(cp->mode & CPARSE_MODE_SKIP)) 157 else if (!(cp->mode & CPARSE_MODE_SKIP))
@@ -177,7 +164,7 @@ static CPToken cp_number(CPState *cp)
177static CPToken cp_ident(CPState *cp) 164static CPToken cp_ident(CPState *cp)
178{ 165{
179 do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp))); 166 do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp)));
180 cp->str = lj_str_new(cp->L, cp->sb.buf, cp->sb.n); 167 cp->str = lj_buf_str(cp->L, &cp->sb);
181 cp->val.id = lj_ctype_getname(cp->cts, &cp->ct, cp->str, cp->tmask); 168 cp->val.id = lj_ctype_getname(cp->cts, &cp->ct, cp->str, cp->tmask);
182 if (ctype_type(cp->ct->info) == CT_KW) 169 if (ctype_type(cp->ct->info) == CT_KW)
183 return ctype_cid(cp->ct->info); 170 return ctype_cid(cp->ct->info);
@@ -263,11 +250,11 @@ static CPToken cp_string(CPState *cp)
263 } 250 }
264 cp_get(cp); 251 cp_get(cp);
265 if (delim == '"') { 252 if (delim == '"') {
266 cp->str = lj_str_new(cp->L, cp->sb.buf, cp->sb.n); 253 cp->str = lj_buf_str(cp->L, &cp->sb);
267 return CTOK_STRING; 254 return CTOK_STRING;
268 } else { 255 } else {
269 if (cp->sb.n != 1) cp_err_token(cp, '\''); 256 if (sbuflen(&cp->sb) != 1) cp_err_token(cp, '\'');
270 cp->val.i32 = (int32_t)(char)cp->sb.buf[0]; 257 cp->val.i32 = (int32_t)(char)*sbufB(&cp->sb);
271 cp->val.id = CTID_INT32; 258 cp->val.id = CTID_INT32;
272 return CTOK_INTEGER; 259 return CTOK_INTEGER;
273 } 260 }
@@ -296,7 +283,7 @@ static void cp_comment_cpp(CPState *cp)
296/* Lexical scanner for C. Only a minimal subset is implemented. */ 283/* Lexical scanner for C. Only a minimal subset is implemented. */
297static CPToken cp_next_(CPState *cp) 284static CPToken cp_next_(CPState *cp)
298{ 285{
299 lj_str_resetbuf(&cp->sb); 286 lj_buf_reset(&cp->sb);
300 for (;;) { 287 for (;;) {
301 if (lj_char_isident(cp->c)) 288 if (lj_char_isident(cp->c))
302 return lj_char_isdigit(cp->c) ? cp_number(cp) : cp_ident(cp); 289 return lj_char_isdigit(cp->c) ? cp_number(cp) : cp_ident(cp);
@@ -380,8 +367,7 @@ static void cp_init(CPState *cp)
380 cp->depth = 0; 367 cp->depth = 0;
381 cp->curpack = 0; 368 cp->curpack = 0;
382 cp->packstack[0] = 255; 369 cp->packstack[0] = 255;
383 lj_str_initbuf(&cp->sb); 370 lj_buf_init(cp->L, &cp->sb);
384 lj_str_resizebuf(cp->L, &cp->sb, LJ_MIN_SBUF);
385 lua_assert(cp->p != NULL); 371 lua_assert(cp->p != NULL);
386 cp_get(cp); /* Read-ahead first char. */ 372 cp_get(cp); /* Read-ahead first char. */
387 cp->tok = 0; 373 cp->tok = 0;
@@ -393,7 +379,7 @@ static void cp_init(CPState *cp)
393static void cp_cleanup(CPState *cp) 379static void cp_cleanup(CPState *cp)
394{ 380{
395 global_State *g = G(cp->L); 381 global_State *g = G(cp->L);
396 lj_str_freebuf(g, &cp->sb); 382 lj_buf_free(g, &cp->sb);
397} 383}
398 384
399/* Check and consume optional token. */ 385/* Check and consume optional token. */
@@ -1012,7 +998,7 @@ static void cp_decl_asm(CPState *cp, CPDecl *decl)
1012 if (cp->tok == CTOK_STRING) { 998 if (cp->tok == CTOK_STRING) {
1013 GCstr *str = cp->str; 999 GCstr *str = cp->str;
1014 while (cp_next(cp) == CTOK_STRING) { 1000 while (cp_next(cp) == CTOK_STRING) {
1015 lj_str_pushf(cp->L, "%s%s", strdata(str), strdata(cp->str)); 1001 lj_strfmt_pushf(cp->L, "%s%s", strdata(str), strdata(cp->str));
1016 cp->L->top--; 1002 cp->L->top--;
1017 str = strV(cp->L->top); 1003 str = strV(cp->L->top);
1018 } 1004 }
diff --git a/src/lj_crecord.c b/src/lj_crecord.c
index a46665e9..e200cc99 100644
--- a/src/lj_crecord.c
+++ b/src/lj_crecord.c
@@ -11,13 +11,13 @@
11#if LJ_HASJIT && LJ_HASFFI 11#if LJ_HASJIT && LJ_HASFFI
12 12
13#include "lj_err.h" 13#include "lj_err.h"
14#include "lj_str.h"
15#include "lj_tab.h" 14#include "lj_tab.h"
16#include "lj_frame.h" 15#include "lj_frame.h"
17#include "lj_ctype.h" 16#include "lj_ctype.h"
18#include "lj_cdata.h" 17#include "lj_cdata.h"
19#include "lj_cparse.h" 18#include "lj_cparse.h"
20#include "lj_cconv.h" 19#include "lj_cconv.h"
20#include "lj_carith.h"
21#include "lj_clib.h" 21#include "lj_clib.h"
22#include "lj_ccall.h" 22#include "lj_ccall.h"
23#include "lj_ff.h" 23#include "lj_ff.h"
@@ -31,6 +31,7 @@
31#include "lj_snap.h" 31#include "lj_snap.h"
32#include "lj_crecord.h" 32#include "lj_crecord.h"
33#include "lj_dispatch.h" 33#include "lj_dispatch.h"
34#include "lj_strfmt.h"
34 35
35/* Some local macros to save typing. Undef'd at the end. */ 36/* Some local macros to save typing. Undef'd at the end. */
36#define IR(ref) (&J->cur.ir[(ref)]) 37#define IR(ref) (&J->cur.ir[(ref)])
@@ -441,7 +442,7 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
441 /* fallthrough */ 442 /* fallthrough */
442 case CCX(I, F): 443 case CCX(I, F):
443 if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi; 444 if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi;
444 sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_TRUNC|IRCONV_ANY); 445 sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_ANY);
445 goto xstore; 446 goto xstore;
446 case CCX(I, P): 447 case CCX(I, P):
447 case CCX(I, A): 448 case CCX(I, A):
@@ -521,7 +522,7 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
521 if (st == IRT_CDATA) goto err_nyi; 522 if (st == IRT_CDATA) goto err_nyi;
522 /* The signed conversion is cheaper. x64 really has 47 bit pointers. */ 523 /* The signed conversion is cheaper. x64 really has 47 bit pointers. */
523 sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32, 524 sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32,
524 st, IRCONV_TRUNC|IRCONV_ANY); 525 st, IRCONV_ANY);
525 goto xstore; 526 goto xstore;
526 527
527 /* Destination is an array. */ 528 /* Destination is an array. */
@@ -640,12 +641,23 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval)
640 sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCstr))); 641 sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCstr)));
641 sid = CTID_A_CCHAR; 642 sid = CTID_A_CCHAR;
642 } 643 }
643 } else { /* NYI: tref_istab(sp), tref_islightud(sp). */ 644 } else if (tref_islightud(sp)) {
645#if LJ_64
646 sp = emitir(IRT(IR_BAND, IRT_P64), sp,
647 lj_ir_kint64(J, U64x(00007fff,ffffffff)));
648#endif
649 } else { /* NYI: tref_istab(sp). */
644 IRType t; 650 IRType t;
645 sid = argv2cdata(J, sp, sval)->ctypeid; 651 sid = argv2cdata(J, sp, sval)->ctypeid;
646 s = ctype_raw(cts, sid); 652 s = ctype_raw(cts, sid);
647 svisnz = cdataptr(cdataV(sval)); 653 svisnz = cdataptr(cdataV(sval));
648 t = crec_ct2irt(cts, s); 654 if (ctype_isfunc(s->info)) {
655 sid = lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|sid), CTSIZE_PTR);
656 s = ctype_get(cts, sid);
657 t = IRT_PTR;
658 } else {
659 t = crec_ct2irt(cts, s);
660 }
649 if (ctype_isptr(s->info)) { 661 if (ctype_isptr(s->info)) {
650 sp = emitir(IRT(IR_FLOAD, t), sp, IRFL_CDATA_PTR); 662 sp = emitir(IRT(IR_FLOAD, t), sp, IRFL_CDATA_PTR);
651 if (ctype_isref(s->info)) { 663 if (ctype_isref(s->info)) {
@@ -867,21 +879,17 @@ again:
867} 879}
868 880
869/* Record setting a finalizer. */ 881/* Record setting a finalizer. */
870static void crec_finalizer(jit_State *J, TRef trcd, cTValue *fin) 882static void crec_finalizer(jit_State *J, TRef trcd, TRef trfin, cTValue *fin)
871{ 883{
872 TRef trlo = lj_ir_call(J, IRCALL_lj_cdata_setfin, trcd); 884 if (tvisgcv(fin)) {
873 TRef trhi = emitir(IRT(IR_ADD, IRT_P32), trlo, lj_ir_kint(J, 4)); 885 if (!trfin) trfin = lj_ir_kptr(J, gcval(fin));
874 if (LJ_BE) { TRef tmp = trlo; trlo = trhi; trhi = tmp; } 886 } else if (tvisnil(fin)) {
875 if (tvisfunc(fin)) { 887 trfin = lj_ir_kptr(J, NULL);
876 emitir(IRT(IR_XSTORE, IRT_P32), trlo, lj_ir_kfunc(J, funcV(fin)));
877 emitir(IRTI(IR_XSTORE), trhi, lj_ir_kint(J, LJ_TFUNC));
878 } else if (tviscdata(fin)) {
879 emitir(IRT(IR_XSTORE, IRT_P32), trlo,
880 lj_ir_kgc(J, obj2gco(cdataV(fin)), IRT_CDATA));
881 emitir(IRTI(IR_XSTORE), trhi, lj_ir_kint(J, LJ_TCDATA));
882 } else { 888 } else {
883 lj_trace_err(J, LJ_TRERR_BADTYPE); 889 lj_trace_err(J, LJ_TRERR_BADTYPE);
884 } 890 }
891 lj_ir_call(J, IRCALL_lj_cdata_setfin, trcd,
892 trfin, lj_ir_kint(J, (int32_t)itype(fin)));
885 J->needsnap = 1; 893 J->needsnap = 1;
886} 894}
887 895
@@ -892,10 +900,8 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
892 CTSize sz; 900 CTSize sz;
893 CTInfo info = lj_ctype_info(cts, id, &sz); 901 CTInfo info = lj_ctype_info(cts, id, &sz);
894 CType *d = ctype_raw(cts, id); 902 CType *d = ctype_raw(cts, id);
895 TRef trid; 903 TRef trcd, trid = lj_ir_kint(J, id);
896 if (!sz || sz > 128 || (info & CTF_VLA) || ctype_align(info) > CT_MEMALIGN) 904 cTValue *fin;
897 lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: large/special allocations. */
898 trid = lj_ir_kint(J, id);
899 /* Use special instruction to box pointer or 32/64 bit integer. */ 905 /* Use special instruction to box pointer or 32/64 bit integer. */
900 if (ctype_isptr(info) || (ctype_isinteger(info) && (sz == 4 || sz == 8))) { 906 if (ctype_isptr(info) || (ctype_isinteger(info) && (sz == 4 || sz == 8))) {
901 TRef sp = J->base[1] ? crec_ct_tv(J, d, 0, J->base[1], &rd->argv[1]) : 907 TRef sp = J->base[1] ? crec_ct_tv(J, d, 0, J->base[1], &rd->argv[1]) :
@@ -903,11 +909,36 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
903 sz == 4 ? lj_ir_kint(J, 0) : 909 sz == 4 ? lj_ir_kint(J, 0) :
904 (lj_needsplit(J), lj_ir_kint64(J, 0)); 910 (lj_needsplit(J), lj_ir_kint64(J, 0));
905 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, sp); 911 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, sp);
912 return;
906 } else { 913 } else {
907 TRef trcd = emitir(IRTG(IR_CNEW, IRT_CDATA), trid, TREF_NIL); 914 TRef trsz = TREF_NIL;
908 cTValue *fin; 915 if ((info & CTF_VLA)) { /* Calculate VLA/VLS size at runtime. */
909 J->base[0] = trcd; 916 CTSize sz0, sz1;
910 if (J->base[1] && !J->base[2] && 917 if (!J->base[1] || J->base[2])
918 lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init VLA/VLS. */
919 trsz = crec_ct_tv(J, ctype_get(cts, CTID_INT32), 0,
920 J->base[1], &rd->argv[1]);
921 sz0 = lj_ctype_vlsize(cts, d, 0);
922 sz1 = lj_ctype_vlsize(cts, d, 1);
923 trsz = emitir(IRTGI(IR_MULOV), trsz, lj_ir_kint(J, (int32_t)(sz1-sz0)));
924 trsz = emitir(IRTGI(IR_ADDOV), trsz, lj_ir_kint(J, (int32_t)sz0));
925 J->base[1] = 0; /* Simplify logic below. */
926 } else if (ctype_align(info) > CT_MEMALIGN) {
927 trsz = lj_ir_kint(J, sz);
928 }
929 trcd = emitir(IRTG(IR_CNEW, IRT_CDATA), trid, trsz);
930 if (sz > 128 || (info & CTF_VLA)) {
931 TRef dp;
932 CTSize align;
933 special: /* Only handle bulk zero-fill for large/VLA/VLS types. */
934 if (J->base[1])
935 lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init large/VLA/VLS types. */
936 dp = emitir(IRT(IR_ADD, IRT_PTR), trcd, lj_ir_kintp(J, sizeof(GCcdata)));
937 if (trsz == TREF_NIL) trsz = lj_ir_kint(J, sz);
938 align = ctype_align(info);
939 if (align < CT_MEMALIGN) align = CT_MEMALIGN;
940 crec_fill(J, dp, trsz, lj_ir_kint(J, 0), (1u << align));
941 } else if (J->base[1] && !J->base[2] &&
911 !lj_cconv_multi_init(cts, d, &rd->argv[1])) { 942 !lj_cconv_multi_init(cts, d, &rd->argv[1])) {
912 goto single_init; 943 goto single_init;
913 } else if (ctype_isarray(d->info)) { 944 } else if (ctype_isarray(d->info)) {
@@ -918,8 +949,9 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
918 TValue *sval = &tv; 949 TValue *sval = &tv;
919 MSize i; 950 MSize i;
920 tv.u64 = 0; 951 tv.u64 = 0;
921 if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info))) 952 if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info)) ||
922 lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init array of aggregates. */ 953 esize * CREC_FILL_MAXUNROLL < sz)
954 goto special;
923 for (i = 1, ofs = 0; ofs < sz; ofs += esize) { 955 for (i = 1, ofs = 0; ofs < sz; ofs += esize) {
924 TRef dp = emitir(IRT(IR_ADD, IRT_PTR), trcd, 956 TRef dp = emitir(IRT(IR_ADD, IRT_PTR), trcd,
925 lj_ir_kintp(J, ofs + sizeof(GCcdata))); 957 lj_ir_kintp(J, ofs + sizeof(GCcdata)));
@@ -976,11 +1008,12 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
976 crec_ct_tv(J, d, dp, lj_ir_kint(J, 0), &tv); 1008 crec_ct_tv(J, d, dp, lj_ir_kint(J, 0), &tv);
977 } 1009 }
978 } 1010 }
979 /* Handle __gc metamethod. */
980 fin = lj_ctype_meta(cts, id, MM_gc);
981 if (fin)
982 crec_finalizer(J, trcd, fin);
983 } 1011 }
1012 J->base[0] = trcd;
1013 /* Handle __gc metamethod. */
1014 fin = lj_ctype_meta(cts, id, MM_gc);
1015 if (fin)
1016 crec_finalizer(J, trcd, 0, fin);
984} 1017}
985 1018
986/* Record argument conversions. */ 1019/* Record argument conversions. */
@@ -1090,7 +1123,7 @@ static void crec_snap_caller(jit_State *J)
1090 ptrdiff_t delta; 1123 ptrdiff_t delta;
1091 if (!frame_islua(base-1) || J->framedepth <= 0) 1124 if (!frame_islua(base-1) || J->framedepth <= 0)
1092 lj_trace_err(J, LJ_TRERR_NYICALL); 1125 lj_trace_err(J, LJ_TRERR_NYICALL);
1093 J->pc = frame_pc(base-1); delta = 1+bc_a(J->pc[-1]); 1126 J->pc = frame_pc(base-1); delta = 1+LJ_FR2+bc_a(J->pc[-1]);
1094 L->top = base; L->base = base - delta; 1127 L->top = base; L->base = base - delta;
1095 J->base[-1] = TREF_FALSE; 1128 J->base[-1] = TREF_FALSE;
1096 J->base -= delta; J->baseslot -= (BCReg)delta; 1129 J->base -= delta; J->baseslot -= (BCReg)delta;
@@ -1233,7 +1266,7 @@ static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm)
1233 for (i = 0; i < 2; i++) { 1266 for (i = 0; i < 2; i++) {
1234 IRType st = tref_type(sp[i]); 1267 IRType st = tref_type(sp[i]);
1235 if (st == IRT_NUM || st == IRT_FLOAT) 1268 if (st == IRT_NUM || st == IRT_FLOAT)
1236 sp[i] = emitconv(sp[i], dt, st, IRCONV_TRUNC|IRCONV_ANY); 1269 sp[i] = emitconv(sp[i], dt, st, IRCONV_ANY);
1237 else if (!(st == IRT_I64 || st == IRT_U64)) 1270 else if (!(st == IRT_I64 || st == IRT_U64))
1238 sp[i] = emitconv(sp[i], dt, IRT_INT, 1271 sp[i] = emitconv(sp[i], dt, IRT_INT,
1239 (s[i]->info & CTF_UNSIGNED) ? 0 : IRCONV_SEXT); 1272 (s[i]->info & CTF_UNSIGNED) ? 0 : IRCONV_SEXT);
@@ -1301,15 +1334,14 @@ static TRef crec_arith_ptr(jit_State *J, TRef *sp, CType **s, MMS mm)
1301 CTypeID id; 1334 CTypeID id;
1302#if LJ_64 1335#if LJ_64
1303 if (t == IRT_NUM || t == IRT_FLOAT) 1336 if (t == IRT_NUM || t == IRT_FLOAT)
1304 tr = emitconv(tr, IRT_INTP, t, IRCONV_TRUNC|IRCONV_ANY); 1337 tr = emitconv(tr, IRT_INTP, t, IRCONV_ANY);
1305 else if (!(t == IRT_I64 || t == IRT_U64)) 1338 else if (!(t == IRT_I64 || t == IRT_U64))
1306 tr = emitconv(tr, IRT_INTP, IRT_INT, 1339 tr = emitconv(tr, IRT_INTP, IRT_INT,
1307 ((t - IRT_I8) & 1) ? 0 : IRCONV_SEXT); 1340 ((t - IRT_I8) & 1) ? 0 : IRCONV_SEXT);
1308#else 1341#else
1309 if (!tref_typerange(sp[1], IRT_I8, IRT_U32)) { 1342 if (!tref_typerange(sp[1], IRT_I8, IRT_U32)) {
1310 tr = emitconv(tr, IRT_INTP, t, 1343 tr = emitconv(tr, IRT_INTP, t,
1311 (t == IRT_NUM || t == IRT_FLOAT) ? 1344 (t == IRT_NUM || t == IRT_FLOAT) ? IRCONV_ANY : 0);
1312 IRCONV_TRUNC|IRCONV_ANY : 0);
1313 } 1345 }
1314#endif 1346#endif
1315 tr = emitir(IRT(IR_MUL, IRT_INTP), tr, lj_ir_kintp(J, sz)); 1347 tr = emitir(IRT(IR_MUL, IRT_INTP), tr, lj_ir_kintp(J, sz));
@@ -1452,8 +1484,7 @@ void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd)
1452 !irt_isguard(J->guardemit)) { 1484 !irt_isguard(J->guardemit)) {
1453 const BCIns *pc = frame_contpc(J->L->base-1) - 1; 1485 const BCIns *pc = frame_contpc(J->L->base-1) - 1;
1454 if (bc_op(*pc) <= BC_ISNEP) { 1486 if (bc_op(*pc) <= BC_ISNEP) {
1455 setframe_pc(&J2G(J)->tmptv, pc); 1487 J2G(J)->tmptv.u64 = (uint64_t)(uintptr_t)pc;
1456 J2G(J)->tmptv.u32.lo = ((tref_istrue(tr) ^ bc_op(*pc)) & 1);
1457 J->postproc = LJ_POST_FIXCOMP; 1488 J->postproc = LJ_POST_FIXCOMP;
1458 } 1489 }
1459 } 1490 }
@@ -1642,7 +1673,139 @@ void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd)
1642void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd) 1673void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd)
1643{ 1674{
1644 argv2cdata(J, J->base[0], &rd->argv[0]); 1675 argv2cdata(J, J->base[0], &rd->argv[0]);
1645 crec_finalizer(J, J->base[0], &rd->argv[1]); 1676 if (!J->base[1])
1677 lj_trace_err(J, LJ_TRERR_BADTYPE);
1678 crec_finalizer(J, J->base[0], J->base[1], &rd->argv[1]);
1679}
1680
1681/* -- 64 bit bit.* library functions -------------------------------------- */
1682
1683/* Determine bit operation type from argument type. */
1684static CTypeID crec_bit64_type(CTState *cts, cTValue *tv)
1685{
1686 if (tviscdata(tv)) {
1687 CType *ct = lj_ctype_rawref(cts, cdataV(tv)->ctypeid);
1688 if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct);
1689 if ((ct->info & (CTMASK_NUM|CTF_BOOL|CTF_FP|CTF_UNSIGNED)) ==
1690 CTINFO(CT_NUM, CTF_UNSIGNED) && ct->size == 8)
1691 return CTID_UINT64; /* Use uint64_t, since it has the highest rank. */
1692 return CTID_INT64; /* Otherwise use int64_t. */
1693 }
1694 return 0; /* Use regular 32 bit ops. */
1695}
1696
1697void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd)
1698{
1699 CTState *cts = ctype_ctsG(J2G(J));
1700 TRef tr = crec_ct_tv(J, ctype_get(cts, CTID_INT64), 0,
1701 J->base[0], &rd->argv[0]);
1702 if (!tref_isinteger(tr))
1703 tr = emitconv(tr, IRT_INT, tref_type(tr), 0);
1704 J->base[0] = tr;
1705}
1706
1707int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd)
1708{
1709 CTState *cts = ctype_ctsG(J2G(J));
1710 CTypeID id = crec_bit64_type(cts, &rd->argv[0]);
1711 if (id) {
1712 TRef tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]);
1713 tr = emitir(IRT(rd->data, id-CTID_INT64+IRT_I64), tr, 0);
1714 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
1715 return 1;
1716 }
1717 return 0;
1718}
1719
1720int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd)
1721{
1722 CTState *cts = ctype_ctsG(J2G(J));
1723 CTypeID id = 0;
1724 MSize i;
1725 for (i = 0; J->base[i] != 0; i++) {
1726 CTypeID aid = crec_bit64_type(cts, &rd->argv[i]);
1727 if (id < aid) id = aid; /* Determine highest type rank of all arguments. */
1728 }
1729 if (id) {
1730 CType *ct = ctype_get(cts, id);
1731 uint32_t ot = IRT(rd->data, id-CTID_INT64+IRT_I64);
1732 TRef tr = crec_ct_tv(J, ct, 0, J->base[0], &rd->argv[0]);
1733 for (i = 1; J->base[i] != 0; i++) {
1734 TRef tr2 = crec_ct_tv(J, ct, 0, J->base[i], &rd->argv[i]);
1735 tr = emitir(ot, tr, tr2);
1736 }
1737 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
1738 return 1;
1739 }
1740 return 0;
1741}
1742
1743int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd)
1744{
1745 CTState *cts = ctype_ctsG(J2G(J));
1746 CTypeID id;
1747 TRef tsh = 0;
1748 if (J->base[0] && tref_iscdata(J->base[1])) {
1749 tsh = crec_ct_tv(J, ctype_get(cts, CTID_INT64), 0,
1750 J->base[1], &rd->argv[1]);
1751 if (!tref_isinteger(tsh))
1752 tsh = emitconv(tsh, IRT_INT, tref_type(tsh), 0);
1753 J->base[1] = tsh;
1754 }
1755 id = crec_bit64_type(cts, &rd->argv[0]);
1756 if (id) {
1757 TRef tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]);
1758 uint32_t op = rd->data;
1759 if (!tsh) tsh = lj_opt_narrow_tobit(J, J->base[1]);
1760 if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
1761 !tref_isk(tsh))
1762 tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 63));
1763#ifdef LJ_TARGET_UNIFYROT
1764 if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) {
1765 op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR;
1766 tsh = emitir(IRTI(IR_NEG), tsh, tsh);
1767 }
1768#endif
1769 tr = emitir(IRT(op, id-CTID_INT64+IRT_I64), tr, tsh);
1770 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
1771 return 1;
1772 }
1773 return 0;
1774}
1775
1776TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr)
1777{
1778 CTState *cts = ctype_ctsG(J2G(J));
1779 CTypeID id = crec_bit64_type(cts, &rd->argv[0]);
1780 TRef tr, trsf = J->base[1];
1781 SFormat sf = (STRFMT_UINT|STRFMT_T_HEX);
1782 int32_t n;
1783 if (trsf) {
1784 CTypeID id2 = 0;
1785 n = (int32_t)lj_carith_check64(J->L, 2, &id2);
1786 if (id2)
1787 trsf = crec_ct_tv(J, ctype_get(cts, CTID_INT32), 0, trsf, &rd->argv[1]);
1788 else
1789 trsf = lj_opt_narrow_tobit(J, trsf);
1790 emitir(IRTGI(IR_EQ), trsf, lj_ir_kint(J, n)); /* Specialize to n. */
1791 } else {
1792 n = id ? 16 : 8;
1793 }
1794 if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; }
1795 sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC);
1796 if (id) {
1797 tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]);
1798 if (n < 16)
1799 tr = emitir(IRT(IR_BAND, IRT_U64), tr,
1800 lj_ir_kint64(J, ((uint64_t)1 << 4*n)-1));
1801 } else {
1802 tr = lj_opt_narrow_tobit(J, J->base[0]);
1803 if (n < 8)
1804 tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << 4*n)-1)));
1805 tr = emitconv(tr, IRT_U64, IRT_INT, 0); /* No sign-extension. */
1806 lj_needsplit(J);
1807 }
1808 return lj_ir_call(J, IRCALL_lj_strfmt_putfxint, hdr, lj_ir_kint(J, sf), tr);
1646} 1809}
1647 1810
1648/* -- Miscellaneous library functions ------------------------------------- */ 1811/* -- Miscellaneous library functions ------------------------------------- */
diff --git a/src/lj_crecord.h b/src/lj_crecord.h
index a4628cac..59f342a1 100644
--- a/src/lj_crecord.h
+++ b/src/lj_crecord.h
@@ -25,6 +25,13 @@ LJ_FUNC void LJ_FASTCALL recff_ffi_istype(jit_State *J, RecordFFData *rd);
25LJ_FUNC void LJ_FASTCALL recff_ffi_abi(jit_State *J, RecordFFData *rd); 25LJ_FUNC void LJ_FASTCALL recff_ffi_abi(jit_State *J, RecordFFData *rd);
26LJ_FUNC void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd); 26LJ_FUNC void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd);
27LJ_FUNC void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd); 27LJ_FUNC void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd);
28
29LJ_FUNC void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd);
30LJ_FUNC int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd);
31LJ_FUNC int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd);
32LJ_FUNC int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd);
33LJ_FUNC TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr);
34
28LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd); 35LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd);
29#endif 36#endif
30 37
diff --git a/src/lj_ctype.c b/src/lj_ctype.c
index ac301749..2e23c994 100644
--- a/src/lj_ctype.c
+++ b/src/lj_ctype.c
@@ -11,6 +11,7 @@
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_str.h" 12#include "lj_str.h"
13#include "lj_tab.h" 13#include "lj_tab.h"
14#include "lj_strfmt.h"
14#include "lj_ctype.h" 15#include "lj_ctype.h"
15#include "lj_ccallback.h" 16#include "lj_ccallback.h"
16 17
@@ -568,19 +569,19 @@ GCstr *lj_ctype_repr_int64(lua_State *L, uint64_t n, int isunsigned)
568/* Convert complex to string with 'i' or 'I' suffix. */ 569/* Convert complex to string with 'i' or 'I' suffix. */
569GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size) 570GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size)
570{ 571{
571 char buf[2*LJ_STR_NUMBUF+2+1]; 572 char buf[2*STRFMT_MAXBUF_NUM+2+1], *p = buf;
572 TValue re, im; 573 TValue re, im;
573 size_t len;
574 if (size == 2*sizeof(double)) { 574 if (size == 2*sizeof(double)) {
575 re.n = *(double *)sp; im.n = ((double *)sp)[1]; 575 re.n = *(double *)sp; im.n = ((double *)sp)[1];
576 } else { 576 } else {
577 re.n = (double)*(float *)sp; im.n = (double)((float *)sp)[1]; 577 re.n = (double)*(float *)sp; im.n = (double)((float *)sp)[1];
578 } 578 }
579 len = lj_str_bufnum(buf, &re); 579 p = lj_strfmt_wnum(p, &re);
580 if (!(im.u32.hi & 0x80000000u) || im.n != im.n) buf[len++] = '+'; 580 if (!(im.u32.hi & 0x80000000u) || im.n != im.n) *p++ = '+';
581 len += lj_str_bufnum(buf+len, &im); 581 p = lj_strfmt_wnum(p, &im);
582 buf[len] = buf[len-1] >= 'a' ? 'I' : 'i'; 582 *p = *(p-1) >= 'a' ? 'I' : 'i';
583 return lj_str_new(L, buf, len+1); 583 p++;
584 return lj_str_new(L, buf, p-buf);
584} 585}
585 586
586/* -- C type state -------------------------------------------------------- */ 587/* -- C type state -------------------------------------------------------- */
diff --git a/src/lj_ctype.h b/src/lj_ctype.h
index 3df26f09..6639547a 100644
--- a/src/lj_ctype.h
+++ b/src/lj_ctype.h
@@ -263,7 +263,7 @@ typedef struct CTState {
263/* -- Predefined types ---------------------------------------------------- */ 263/* -- Predefined types ---------------------------------------------------- */
264 264
265/* Target-dependent types. */ 265/* Target-dependent types. */
266#if LJ_TARGET_PPC || LJ_TARGET_PPCSPE 266#if LJ_TARGET_PPC
267#define CTTYDEFP(_) \ 267#define CTTYDEFP(_) \
268 _(LINT32, 4, CT_NUM, CTF_LONG|CTALIGN(2)) 268 _(LINT32, 4, CT_NUM, CTF_LONG|CTALIGN(2))
269#else 269#else
diff --git a/src/lj_debug.c b/src/lj_debug.c
index bd2fa1f5..3226d03b 100644
--- a/src/lj_debug.c
+++ b/src/lj_debug.c
@@ -9,12 +9,12 @@
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_err.h" 10#include "lj_err.h"
11#include "lj_debug.h" 11#include "lj_debug.h"
12#include "lj_str.h" 12#include "lj_buf.h"
13#include "lj_tab.h" 13#include "lj_tab.h"
14#include "lj_state.h" 14#include "lj_state.h"
15#include "lj_frame.h" 15#include "lj_frame.h"
16#include "lj_bc.h" 16#include "lj_bc.h"
17#include "lj_vm.h" 17#include "lj_strfmt.h"
18#if LJ_HASJIT 18#if LJ_HASJIT
19#include "lj_jit.h" 19#include "lj_jit.h"
20#endif 20#endif
@@ -24,11 +24,11 @@
24/* Get frame corresponding to a level. */ 24/* Get frame corresponding to a level. */
25cTValue *lj_debug_frame(lua_State *L, int level, int *size) 25cTValue *lj_debug_frame(lua_State *L, int level, int *size)
26{ 26{
27 cTValue *frame, *nextframe, *bot = tvref(L->stack); 27 cTValue *frame, *nextframe, *bot = tvref(L->stack)+LJ_FR2;
28 /* Traverse frames backwards. */ 28 /* Traverse frames backwards. */
29 for (nextframe = frame = L->base-1; frame > bot; ) { 29 for (nextframe = frame = L->base-1; frame > bot; ) {
30 if (frame_gc(frame) == obj2gco(L)) 30 if (frame_gc(frame) == obj2gco(L))
31 level++; /* Skip dummy frames. See lj_meta_call(). */ 31 level++; /* Skip dummy frames. See lj_err_optype_call(). */
32 if (level-- == 0) { 32 if (level-- == 0) {
33 *size = (int)(nextframe - frame); 33 *size = (int)(nextframe - frame);
34 return frame; /* Level found. */ 34 return frame; /* Level found. */
@@ -87,8 +87,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe)
87 if (frame_islua(f)) { 87 if (frame_islua(f)) {
88 f = frame_prevl(f); 88 f = frame_prevl(f);
89 } else { 89 } else {
90 if (frame_isc(f) || (LJ_HASFFI && frame_iscont(f) && 90 if (frame_isc(f) || (frame_iscont(f) && frame_iscont_fficb(f)))
91 (f-1)->u32.lo == LJ_CONT_FFI_CALLBACK))
92 cf = cframe_raw(cframe_prev(cf)); 91 cf = cframe_raw(cframe_prev(cf));
93 f = frame_prevd(f); 92 f = frame_prevd(f);
94 } 93 }
@@ -142,38 +141,25 @@ static BCLine debug_frameline(lua_State *L, GCfunc *fn, cTValue *nextframe)
142 141
143/* -- Variable names ------------------------------------------------------ */ 142/* -- Variable names ------------------------------------------------------ */
144 143
145/* Read ULEB128 value. */
146static uint32_t debug_read_uleb128(const uint8_t **pp)
147{
148 const uint8_t *p = *pp;
149 uint32_t v = *p++;
150 if (LJ_UNLIKELY(v >= 0x80)) {
151 int sh = 0;
152 v &= 0x7f;
153 do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
154 }
155 *pp = p;
156 return v;
157}
158
159/* Get name of a local variable from slot number and PC. */ 144/* Get name of a local variable from slot number and PC. */
160static const char *debug_varname(const GCproto *pt, BCPos pc, BCReg slot) 145static const char *debug_varname(const GCproto *pt, BCPos pc, BCReg slot)
161{ 146{
162 const uint8_t *p = proto_varinfo(pt); 147 const char *p = (const char *)proto_varinfo(pt);
163 if (p) { 148 if (p) {
164 BCPos lastpc = 0; 149 BCPos lastpc = 0;
165 for (;;) { 150 for (;;) {
166 const char *name = (const char *)p; 151 const char *name = p;
167 uint32_t vn = *p++; 152 uint32_t vn = *(const uint8_t *)p;
168 BCPos startpc, endpc; 153 BCPos startpc, endpc;
169 if (vn < VARNAME__MAX) { 154 if (vn < VARNAME__MAX) {
170 if (vn == VARNAME_END) break; /* End of varinfo. */ 155 if (vn == VARNAME_END) break; /* End of varinfo. */
171 } else { 156 } else {
172 while (*p++) ; /* Skip over variable name string. */ 157 do { p++; } while (*(const uint8_t *)p); /* Skip over variable name. */
173 } 158 }
174 lastpc = startpc = lastpc + debug_read_uleb128(&p); 159 p++;
160 lastpc = startpc = lastpc + lj_buf_ruleb128(&p);
175 if (startpc > pc) break; 161 if (startpc > pc) break;
176 endpc = startpc + debug_read_uleb128(&p); 162 endpc = startpc + lj_buf_ruleb128(&p);
177 if (pc < endpc && slot-- == 0) { 163 if (pc < endpc && slot-- == 0) {
178 if (vn < VARNAME__MAX) { 164 if (vn < VARNAME__MAX) {
179#define VARNAMESTR(name, str) str "\0" 165#define VARNAMESTR(name, str) str "\0"
@@ -198,7 +184,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar,
198 TValue *nextframe = size ? frame + size : NULL; 184 TValue *nextframe = size ? frame + size : NULL;
199 GCfunc *fn = frame_func(frame); 185 GCfunc *fn = frame_func(frame);
200 BCPos pc = debug_framepc(L, fn, nextframe); 186 BCPos pc = debug_framepc(L, fn, nextframe);
201 if (!nextframe) nextframe = L->top; 187 if (!nextframe) nextframe = L->top+LJ_FR2;
202 if ((int)slot1 < 0) { /* Negative slot number is for varargs. */ 188 if ((int)slot1 < 0) { /* Negative slot number is for varargs. */
203 if (pc != NO_BCPOS) { 189 if (pc != NO_BCPOS) {
204 GCproto *pt = funcproto(fn); 190 GCproto *pt = funcproto(fn);
@@ -208,7 +194,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar,
208 nextframe = frame; 194 nextframe = frame;
209 frame = frame_prevd(frame); 195 frame = frame_prevd(frame);
210 } 196 }
211 if (frame + slot1 < nextframe) { 197 if (frame + slot1+LJ_FR2 < nextframe) {
212 *name = "(*vararg)"; 198 *name = "(*vararg)";
213 return frame+slot1; 199 return frame+slot1;
214 } 200 }
@@ -219,7 +205,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar,
219 if (pc != NO_BCPOS && 205 if (pc != NO_BCPOS &&
220 (*name = debug_varname(funcproto(fn), pc, slot1-1)) != NULL) 206 (*name = debug_varname(funcproto(fn), pc, slot1-1)) != NULL)
221 ; 207 ;
222 else if (slot1 > 0 && frame + slot1 < nextframe) 208 else if (slot1 > 0 && frame + slot1+LJ_FR2 < nextframe)
223 *name = "(*temporary)"; 209 *name = "(*temporary)";
224 return frame+slot1; 210 return frame+slot1;
225} 211}
@@ -282,7 +268,7 @@ restart:
282 *name = strdata(gco2str(proto_kgc(pt, ~(ptrdiff_t)bc_c(ins)))); 268 *name = strdata(gco2str(proto_kgc(pt, ~(ptrdiff_t)bc_c(ins))));
283 if (ip > proto_bc(pt)) { 269 if (ip > proto_bc(pt)) {
284 BCIns insp = ip[-1]; 270 BCIns insp = ip[-1];
285 if (bc_op(insp) == BC_MOV && bc_a(insp) == ra+1 && 271 if (bc_op(insp) == BC_MOV && bc_a(insp) == ra+1+LJ_FR2 &&
286 bc_d(insp) == bc_b(ins)) 272 bc_d(insp) == bc_b(ins))
287 return "method"; 273 return "method";
288 } 274 }
@@ -299,12 +285,12 @@ restart:
299} 285}
300 286
301/* Deduce function name from caller of a frame. */ 287/* Deduce function name from caller of a frame. */
302const char *lj_debug_funcname(lua_State *L, TValue *frame, const char **name) 288const char *lj_debug_funcname(lua_State *L, cTValue *frame, const char **name)
303{ 289{
304 TValue *pframe; 290 cTValue *pframe;
305 GCfunc *fn; 291 GCfunc *fn;
306 BCPos pc; 292 BCPos pc;
307 if (frame <= tvref(L->stack)) 293 if (frame <= tvref(L->stack)+LJ_FR2)
308 return NULL; 294 return NULL;
309 if (frame_isvarg(frame)) 295 if (frame_isvarg(frame))
310 frame = frame_prevd(frame); 296 frame = frame_prevd(frame);
@@ -330,7 +316,7 @@ const char *lj_debug_funcname(lua_State *L, TValue *frame, const char **name)
330/* -- Source code locations ----------------------------------------------- */ 316/* -- Source code locations ----------------------------------------------- */
331 317
332/* Generate shortened source name. */ 318/* Generate shortened source name. */
333void lj_debug_shortname(char *out, GCstr *str) 319void lj_debug_shortname(char *out, GCstr *str, BCLine line)
334{ 320{
335 const char *src = strdata(str); 321 const char *src = strdata(str);
336 if (*src == '=') { 322 if (*src == '=') {
@@ -344,11 +330,11 @@ void lj_debug_shortname(char *out, GCstr *str)
344 *out++ = '.'; *out++ = '.'; *out++ = '.'; 330 *out++ = '.'; *out++ = '.'; *out++ = '.';
345 } 331 }
346 strcpy(out, src); 332 strcpy(out, src);
347 } else { /* Output [string "string"]. */ 333 } else { /* Output [string "string"] or [builtin:name]. */
348 size_t len; /* Length, up to first control char. */ 334 size_t len; /* Length, up to first control char. */
349 for (len = 0; len < LUA_IDSIZE-12; len++) 335 for (len = 0; len < LUA_IDSIZE-12; len++)
350 if (((const unsigned char *)src)[len] < ' ') break; 336 if (((const unsigned char *)src)[len] < ' ') break;
351 strcpy(out, "[string \""); out += 9; 337 strcpy(out, line == ~(BCLine)0 ? "[builtin:" : "[string \""); out += 9;
352 if (src[len] != '\0') { /* Must truncate? */ 338 if (src[len] != '\0') { /* Must truncate? */
353 if (len > LUA_IDSIZE-15) len = LUA_IDSIZE-15; 339 if (len > LUA_IDSIZE-15) len = LUA_IDSIZE-15;
354 strncpy(out, src, len); out += len; 340 strncpy(out, src, len); out += len;
@@ -356,7 +342,7 @@ void lj_debug_shortname(char *out, GCstr *str)
356 } else { 342 } else {
357 strcpy(out, src); out += len; 343 strcpy(out, src); out += len;
358 } 344 }
359 strcpy(out, "\"]"); 345 strcpy(out, line == ~(BCLine)0 ? "]" : "\"]");
360 } 346 }
361} 347}
362 348
@@ -369,14 +355,15 @@ void lj_debug_addloc(lua_State *L, const char *msg,
369 if (isluafunc(fn)) { 355 if (isluafunc(fn)) {
370 BCLine line = debug_frameline(L, fn, nextframe); 356 BCLine line = debug_frameline(L, fn, nextframe);
371 if (line >= 0) { 357 if (line >= 0) {
358 GCproto *pt = funcproto(fn);
372 char buf[LUA_IDSIZE]; 359 char buf[LUA_IDSIZE];
373 lj_debug_shortname(buf, proto_chunkname(funcproto(fn))); 360 lj_debug_shortname(buf, proto_chunkname(pt), pt->firstline);
374 lj_str_pushf(L, "%s:%d: %s", buf, line, msg); 361 lj_strfmt_pushf(L, "%s:%d: %s", buf, line, msg);
375 return; 362 return;
376 } 363 }
377 } 364 }
378 } 365 }
379 lj_str_pushf(L, "%s", msg); 366 lj_strfmt_pushf(L, "%s", msg);
380} 367}
381 368
382/* Push location string for a bytecode position to Lua stack. */ 369/* Push location string for a bytecode position to Lua stack. */
@@ -386,20 +373,22 @@ void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc)
386 const char *s = strdata(name); 373 const char *s = strdata(name);
387 MSize i, len = name->len; 374 MSize i, len = name->len;
388 BCLine line = lj_debug_line(pt, pc); 375 BCLine line = lj_debug_line(pt, pc);
389 if (*s == '@') { 376 if (pt->firstline == ~(BCLine)0) {
377 lj_strfmt_pushf(L, "builtin:%s", s);
378 } else if (*s == '@') {
390 s++; len--; 379 s++; len--;
391 for (i = len; i > 0; i--) 380 for (i = len; i > 0; i--)
392 if (s[i] == '/' || s[i] == '\\') { 381 if (s[i] == '/' || s[i] == '\\') {
393 s += i+1; 382 s += i+1;
394 break; 383 break;
395 } 384 }
396 lj_str_pushf(L, "%s:%d", s, line); 385 lj_strfmt_pushf(L, "%s:%d", s, line);
397 } else if (len > 40) { 386 } else if (len > 40) {
398 lj_str_pushf(L, "%p:%d", pt, line); 387 lj_strfmt_pushf(L, "%p:%d", pt, line);
399 } else if (*s == '=') { 388 } else if (*s == '=') {
400 lj_str_pushf(L, "%s:%d", s+1, line); 389 lj_strfmt_pushf(L, "%s:%d", s+1, line);
401 } else { 390 } else {
402 lj_str_pushf(L, "\"%s\":%d", s, line); 391 lj_strfmt_pushf(L, "\"%s\":%d", s, line);
403 } 392 }
404} 393}
405 394
@@ -462,7 +451,7 @@ int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, int ext)
462 BCLine firstline = pt->firstline; 451 BCLine firstline = pt->firstline;
463 GCstr *name = proto_chunkname(pt); 452 GCstr *name = proto_chunkname(pt);
464 ar->source = strdata(name); 453 ar->source = strdata(name);
465 lj_debug_shortname(ar->short_src, name); 454 lj_debug_shortname(ar->short_src, name, pt->firstline);
466 ar->linedefined = (int)firstline; 455 ar->linedefined = (int)firstline;
467 ar->lastlinedefined = (int)(firstline + pt->numline); 456 ar->lastlinedefined = (int)(firstline + pt->numline);
468 ar->what = (firstline || !pt->numline) ? "Lua" : "main"; 457 ar->what = (firstline || !pt->numline) ? "Lua" : "main";
@@ -552,6 +541,111 @@ LUA_API int lua_getstack(lua_State *L, int level, lua_Debug *ar)
552 } 541 }
553} 542}
554 543
544#if LJ_HASPROFILE
545/* Put the chunkname into a buffer. */
546static int debug_putchunkname(SBuf *sb, GCproto *pt, int pathstrip)
547{
548 GCstr *name = proto_chunkname(pt);
549 const char *p = strdata(name);
550 if (pt->firstline == ~(BCLine)0) {
551 lj_buf_putmem(sb, "[builtin:", 9);
552 lj_buf_putstr(sb, name);
553 lj_buf_putb(sb, ']');
554 return 0;
555 }
556 if (*p == '=' || *p == '@') {
557 MSize len = name->len-1;
558 p++;
559 if (pathstrip) {
560 int i;
561 for (i = len-1; i >= 0; i--)
562 if (p[i] == '/' || p[i] == '\\') {
563 len -= i+1;
564 p = p+i+1;
565 break;
566 }
567 }
568 lj_buf_putmem(sb, p, len);
569 } else {
570 lj_buf_putmem(sb, "[string]", 8);
571 }
572 return 1;
573}
574
575/* Put a compact stack dump into a buffer. */
576void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt, int depth)
577{
578 int level = 0, dir = 1, pathstrip = 1;
579 MSize lastlen = 0;
580 if (depth < 0) { level = ~depth; depth = dir = -1; } /* Reverse frames. */
581 while (level != depth) { /* Loop through all frame. */
582 int size;
583 cTValue *frame = lj_debug_frame(L, level, &size);
584 if (frame) {
585 cTValue *nextframe = size ? frame+size : NULL;
586 GCfunc *fn = frame_func(frame);
587 const uint8_t *p = (const uint8_t *)fmt;
588 int c;
589 while ((c = *p++)) {
590 switch (c) {
591 case 'p': /* Preserve full path. */
592 pathstrip = 0;
593 break;
594 case 'F': case 'f': { /* Dump function name. */
595 const char *name;
596 const char *what = lj_debug_funcname(L, frame, &name);
597 if (what) {
598 if (c == 'F' && isluafunc(fn)) { /* Dump module:name for 'F'. */
599 GCproto *pt = funcproto(fn);
600 if (pt->firstline != ~(BCLine)0) { /* Not a bytecode builtin. */
601 debug_putchunkname(sb, pt, pathstrip);
602 lj_buf_putb(sb, ':');
603 }
604 }
605 lj_buf_putmem(sb, name, (MSize)strlen(name));
606 break;
607 } /* else: can't derive a name, dump module:line. */
608 }
609 /* fallthrough */
610 case 'l': /* Dump module:line. */
611 if (isluafunc(fn)) {
612 GCproto *pt = funcproto(fn);
613 if (debug_putchunkname(sb, pt, pathstrip)) {
614 /* Regular Lua function. */
615 BCLine line = c == 'l' ? debug_frameline(L, fn, nextframe) :
616 pt->firstline;
617 lj_buf_putb(sb, ':');
618 lj_strfmt_putint(sb, line >= 0 ? line : pt->firstline);
619 }
620 } else if (isffunc(fn)) { /* Dump numbered builtins. */
621 lj_buf_putmem(sb, "[builtin#", 9);
622 lj_strfmt_putint(sb, fn->c.ffid);
623 lj_buf_putb(sb, ']');
624 } else { /* Dump C function address. */
625 lj_buf_putb(sb, '@');
626 lj_strfmt_putptr(sb, fn->c.f);
627 }
628 break;
629 case 'Z': /* Zap trailing separator. */
630 lastlen = sbuflen(sb);
631 break;
632 default:
633 lj_buf_putb(sb, c);
634 break;
635 }
636 }
637 } else if (dir == 1) {
638 break;
639 } else {
640 level -= size; /* Reverse frame order: quickly skip missing level. */
641 }
642 level += dir;
643 }
644 if (lastlen)
645 setsbufP(sb, sbufB(sb) + lastlen); /* Zap trailing separator. */
646}
647#endif
648
555/* Number of frames for the leading and trailing part of a traceback. */ 649/* Number of frames for the leading and trailing part of a traceback. */
556#define TRACEBACK_LEVELS1 12 650#define TRACEBACK_LEVELS1 12
557#define TRACEBACK_LEVELS2 10 651#define TRACEBACK_LEVELS2 10
diff --git a/src/lj_debug.h b/src/lj_debug.h
index fa8988c3..11d308a4 100644
--- a/src/lj_debug.h
+++ b/src/lj_debug.h
@@ -32,14 +32,18 @@ LJ_FUNC const char *lj_debug_uvname(GCproto *pt, uint32_t idx);
32LJ_FUNC const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp); 32LJ_FUNC const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp);
33LJ_FUNC const char *lj_debug_slotname(GCproto *pt, const BCIns *pc, 33LJ_FUNC const char *lj_debug_slotname(GCproto *pt, const BCIns *pc,
34 BCReg slot, const char **name); 34 BCReg slot, const char **name);
35LJ_FUNC const char *lj_debug_funcname(lua_State *L, TValue *frame, 35LJ_FUNC const char *lj_debug_funcname(lua_State *L, cTValue *frame,
36 const char **name); 36 const char **name);
37LJ_FUNC void lj_debug_shortname(char *out, GCstr *str); 37LJ_FUNC void lj_debug_shortname(char *out, GCstr *str, BCLine line);
38LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg, 38LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg,
39 cTValue *frame, cTValue *nextframe); 39 cTValue *frame, cTValue *nextframe);
40LJ_FUNC void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc); 40LJ_FUNC void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc);
41LJ_FUNC int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, 41LJ_FUNC int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar,
42 int ext); 42 int ext);
43#if LJ_HASPROFILE
44LJ_FUNC void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt,
45 int depth);
46#endif
43 47
44/* Fixed internal variable names. */ 48/* Fixed internal variable names. */
45#define VARNAMEDEF(_) \ 49#define VARNAMEDEF(_) \
diff --git a/src/lj_def.h b/src/lj_def.h
index e666c9e3..c8fe4aa4 100644
--- a/src/lj_def.h
+++ b/src/lj_def.h
@@ -46,10 +46,14 @@ typedef unsigned int uintptr_t;
46#include <stdlib.h> 46#include <stdlib.h>
47 47
48/* Various VM limits. */ 48/* Various VM limits. */
49#define LJ_MAX_MEM 0x7fffff00 /* Max. total memory allocation. */ 49#define LJ_MAX_MEM32 0x7fffff00 /* Max. 32 bit memory allocation. */
50#define LJ_MAX_MEM64 ((uint64_t)1<<47) /* Max. 64 bit memory allocation. */
51/* Max. total memory allocation. */
52#define LJ_MAX_MEM (LJ_GC64 ? LJ_MAX_MEM64 : LJ_MAX_MEM32)
50#define LJ_MAX_ALLOC LJ_MAX_MEM /* Max. individual allocation length. */ 53#define LJ_MAX_ALLOC LJ_MAX_MEM /* Max. individual allocation length. */
51#define LJ_MAX_STR LJ_MAX_MEM /* Max. string length. */ 54#define LJ_MAX_STR LJ_MAX_MEM32 /* Max. string length. */
52#define LJ_MAX_UDATA LJ_MAX_MEM /* Max. userdata length. */ 55#define LJ_MAX_BUF LJ_MAX_MEM32 /* Max. buffer length. */
56#define LJ_MAX_UDATA LJ_MAX_MEM32 /* Max. userdata length. */
53 57
54#define LJ_MAX_STRTAB (1<<26) /* Max. string table size. */ 58#define LJ_MAX_STRTAB (1<<26) /* Max. string table size. */
55#define LJ_MAX_HBITS 26 /* Max. hash bits. */ 59#define LJ_MAX_HBITS 26 /* Max. hash bits. */
@@ -57,7 +61,7 @@ typedef unsigned int uintptr_t;
57#define LJ_MAX_ASIZE ((1<<(LJ_MAX_ABITS-1))+1) /* Max. array part size. */ 61#define LJ_MAX_ASIZE ((1<<(LJ_MAX_ABITS-1))+1) /* Max. array part size. */
58#define LJ_MAX_COLOSIZE 16 /* Max. elems for colocated array. */ 62#define LJ_MAX_COLOSIZE 16 /* Max. elems for colocated array. */
59 63
60#define LJ_MAX_LINE LJ_MAX_MEM /* Max. source code line number. */ 64#define LJ_MAX_LINE LJ_MAX_MEM32 /* Max. source code line number. */
61#define LJ_MAX_XLEVEL 200 /* Max. syntactic nesting level. */ 65#define LJ_MAX_XLEVEL 200 /* Max. syntactic nesting level. */
62#define LJ_MAX_BCINS (1<<26) /* Max. # of bytecode instructions. */ 66#define LJ_MAX_BCINS (1<<26) /* Max. # of bytecode instructions. */
63#define LJ_MAX_SLOTS 250 /* Max. # of slots in a Lua func. */ 67#define LJ_MAX_SLOTS 250 /* Max. # of slots in a Lua func. */
@@ -65,7 +69,7 @@ typedef unsigned int uintptr_t;
65#define LJ_MAX_UPVAL 60 /* Max. # of upvalues. */ 69#define LJ_MAX_UPVAL 60 /* Max. # of upvalues. */
66 70
67#define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */ 71#define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */
68#define LJ_STACK_EXTRA 5 /* Extra stack space (metamethods). */ 72#define LJ_STACK_EXTRA (5+2*LJ_FR2) /* Extra stack space (metamethods). */
69 73
70#define LJ_NUM_CBPAGE 1 /* Number of FFI callback pages. */ 74#define LJ_NUM_CBPAGE 1 /* Number of FFI callback pages. */
71 75
@@ -99,6 +103,14 @@ typedef unsigned int uintptr_t;
99#define checki32(x) ((x) == (int32_t)(x)) 103#define checki32(x) ((x) == (int32_t)(x))
100#define checku32(x) ((x) == (uint32_t)(x)) 104#define checku32(x) ((x) == (uint32_t)(x))
101#define checkptr32(x) ((uintptr_t)(x) == (uint32_t)(uintptr_t)(x)) 105#define checkptr32(x) ((uintptr_t)(x) == (uint32_t)(uintptr_t)(x))
106#define checkptr47(x) (((uint64_t)(x) >> 47) == 0)
107#if LJ_GC64
108#define checkptrGC(x) (checkptr47((x)))
109#elif LJ_64
110#define checkptrGC(x) (checkptr32((x)))
111#else
112#define checkptrGC(x) 1
113#endif
102 114
103/* Every half-decent C compiler transforms this into a rotate instruction. */ 115/* Every half-decent C compiler transforms this into a rotate instruction. */
104#define lj_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1)))) 116#define lj_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1))))
diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c
index 37256576..1a07371c 100644
--- a/src/lj_dispatch.c
+++ b/src/lj_dispatch.c
@@ -8,6 +8,7 @@
8 8
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_err.h" 10#include "lj_err.h"
11#include "lj_buf.h"
11#include "lj_func.h" 12#include "lj_func.h"
12#include "lj_str.h" 13#include "lj_str.h"
13#include "lj_tab.h" 14#include "lj_tab.h"
@@ -17,6 +18,7 @@
17#include "lj_frame.h" 18#include "lj_frame.h"
18#include "lj_bc.h" 19#include "lj_bc.h"
19#include "lj_ff.h" 20#include "lj_ff.h"
21#include "lj_strfmt.h"
20#if LJ_HASJIT 22#if LJ_HASJIT
21#include "lj_jit.h" 23#include "lj_jit.h"
22#endif 24#endif
@@ -25,6 +27,9 @@
25#endif 27#endif
26#include "lj_trace.h" 28#include "lj_trace.h"
27#include "lj_dispatch.h" 29#include "lj_dispatch.h"
30#if LJ_HASPROFILE
31#include "lj_profile.h"
32#endif
28#include "lj_vm.h" 33#include "lj_vm.h"
29#include "luajit.h" 34#include "luajit.h"
30 35
@@ -37,6 +42,12 @@ LJ_STATIC_ASSERT(GG_NUM_ASMFF == FF_NUM_ASMFUNC);
37#include <math.h> 42#include <math.h>
38LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L, 43LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L,
39 lua_State *co); 44 lua_State *co);
45#if !LJ_HASJIT
46#define lj_dispatch_stitch lj_dispatch_ins
47#endif
48#if !LJ_HASPROFILE
49#define lj_dispatch_profile lj_dispatch_ins
50#endif
40 51
41#define GOTFUNC(name) (ASMFunction)name, 52#define GOTFUNC(name) (ASMFunction)name,
42static const ASMFunction dispatch_got[] = { 53static const ASMFunction dispatch_got[] = {
@@ -82,11 +93,12 @@ void lj_dispatch_init_hotcount(global_State *g)
82#endif 93#endif
83 94
84/* Internal dispatch mode bits. */ 95/* Internal dispatch mode bits. */
85#define DISPMODE_JIT 0x01 /* JIT compiler on. */ 96#define DISPMODE_CALL 0x01 /* Override call dispatch. */
86#define DISPMODE_REC 0x02 /* Recording active. */ 97#define DISPMODE_RET 0x02 /* Override return dispatch. */
87#define DISPMODE_INS 0x04 /* Override instruction dispatch. */ 98#define DISPMODE_INS 0x04 /* Override instruction dispatch. */
88#define DISPMODE_CALL 0x08 /* Override call dispatch. */ 99#define DISPMODE_JIT 0x10 /* JIT compiler on. */
89#define DISPMODE_RET 0x10 /* Override return dispatch. */ 100#define DISPMODE_REC 0x20 /* Recording active. */
101#define DISPMODE_PROF 0x40 /* Profiling active. */
90 102
91/* Update dispatch table depending on various flags. */ 103/* Update dispatch table depending on various flags. */
92void lj_dispatch_update(global_State *g) 104void lj_dispatch_update(global_State *g)
@@ -98,6 +110,9 @@ void lj_dispatch_update(global_State *g)
98 mode |= G2J(g)->state != LJ_TRACE_IDLE ? 110 mode |= G2J(g)->state != LJ_TRACE_IDLE ?
99 (DISPMODE_REC|DISPMODE_INS|DISPMODE_CALL) : 0; 111 (DISPMODE_REC|DISPMODE_INS|DISPMODE_CALL) : 0;
100#endif 112#endif
113#if LJ_HASPROFILE
114 mode |= (g->hookmask & HOOK_PROFILE) ? (DISPMODE_PROF|DISPMODE_INS) : 0;
115#endif
101 mode |= (g->hookmask & (LUA_MASKLINE|LUA_MASKCOUNT)) ? DISPMODE_INS : 0; 116 mode |= (g->hookmask & (LUA_MASKLINE|LUA_MASKCOUNT)) ? DISPMODE_INS : 0;
102 mode |= (g->hookmask & LUA_MASKCALL) ? DISPMODE_CALL : 0; 117 mode |= (g->hookmask & LUA_MASKCALL) ? DISPMODE_CALL : 0;
103 mode |= (g->hookmask & LUA_MASKRET) ? DISPMODE_RET : 0; 118 mode |= (g->hookmask & LUA_MASKRET) ? DISPMODE_RET : 0;
@@ -126,9 +141,9 @@ void lj_dispatch_update(global_State *g)
126 disp[GG_LEN_DDISP+BC_LOOP] = f_loop; 141 disp[GG_LEN_DDISP+BC_LOOP] = f_loop;
127 142
128 /* Set dynamic instruction dispatch. */ 143 /* Set dynamic instruction dispatch. */
129 if ((oldmode ^ mode) & (DISPMODE_REC|DISPMODE_INS)) { 144 if ((oldmode ^ mode) & (DISPMODE_PROF|DISPMODE_REC|DISPMODE_INS)) {
130 /* Need to update the whole table. */ 145 /* Need to update the whole table. */
131 if (!(mode & (DISPMODE_REC|DISPMODE_INS))) { /* No ins dispatch? */ 146 if (!(mode & DISPMODE_INS)) { /* No ins dispatch? */
132 /* Copy static dispatch table to dynamic dispatch table. */ 147 /* Copy static dispatch table to dynamic dispatch table. */
133 memcpy(&disp[0], &disp[GG_LEN_DDISP], GG_LEN_SDISP*sizeof(ASMFunction)); 148 memcpy(&disp[0], &disp[GG_LEN_DDISP], GG_LEN_SDISP*sizeof(ASMFunction));
134 /* Overwrite with dynamic return dispatch. */ 149 /* Overwrite with dynamic return dispatch. */
@@ -140,12 +155,13 @@ void lj_dispatch_update(global_State *g)
140 } 155 }
141 } else { 156 } else {
142 /* The recording dispatch also checks for hooks. */ 157 /* The recording dispatch also checks for hooks. */
143 ASMFunction f = (mode & DISPMODE_REC) ? lj_vm_record : lj_vm_inshook; 158 ASMFunction f = (mode & DISPMODE_PROF) ? lj_vm_profhook :
159 (mode & DISPMODE_REC) ? lj_vm_record : lj_vm_inshook;
144 uint32_t i; 160 uint32_t i;
145 for (i = 0; i < GG_LEN_SDISP; i++) 161 for (i = 0; i < GG_LEN_SDISP; i++)
146 disp[i] = f; 162 disp[i] = f;
147 } 163 }
148 } else if (!(mode & (DISPMODE_REC|DISPMODE_INS))) { 164 } else if (!(mode & DISPMODE_INS)) {
149 /* Otherwise set dynamic counting ins. */ 165 /* Otherwise set dynamic counting ins. */
150 disp[BC_FORL] = f_forl; 166 disp[BC_FORL] = f_forl;
151 disp[BC_ITERL] = f_iterl; 167 disp[BC_ITERL] = f_iterl;
@@ -352,10 +368,19 @@ static void callhook(lua_State *L, int event, BCLine line)
352 /* Top frame, nextframe = NULL. */ 368 /* Top frame, nextframe = NULL. */
353 ar.i_ci = (int)((L->base-1) - tvref(L->stack)); 369 ar.i_ci = (int)((L->base-1) - tvref(L->stack));
354 lj_state_checkstack(L, 1+LUA_MINSTACK); 370 lj_state_checkstack(L, 1+LUA_MINSTACK);
371#if LJ_HASPROFILE && !LJ_PROFILE_SIGPROF
372 lj_profile_hook_enter(g);
373#else
355 hook_enter(g); 374 hook_enter(g);
375#endif
356 hookf(L, &ar); 376 hookf(L, &ar);
357 lua_assert(hook_active(g)); 377 lua_assert(hook_active(g));
378 setgcref(g->cur_L, obj2gco(L));
379#if LJ_HASPROFILE && !LJ_PROFILE_SIGPROF
380 lj_profile_hook_leave(g);
381#else
358 hook_leave(g); 382 hook_leave(g);
383#endif
359 } 384 }
360} 385}
361 386
@@ -368,7 +393,7 @@ static BCReg cur_topslot(GCproto *pt, const BCIns *pc, uint32_t nres)
368 if (bc_op(ins) == BC_UCLO) 393 if (bc_op(ins) == BC_UCLO)
369 ins = pc[bc_j(ins)]; 394 ins = pc[bc_j(ins)];
370 switch (bc_op(ins)) { 395 switch (bc_op(ins)) {
371 case BC_CALLM: case BC_CALLMT: return bc_a(ins) + bc_c(ins) + nres-1+1; 396 case BC_CALLM: case BC_CALLMT: return bc_a(ins) + bc_c(ins) + nres-1+1+LJ_FR2;
372 case BC_RETM: return bc_a(ins) + bc_d(ins) + nres-1; 397 case BC_RETM: return bc_a(ins) + bc_d(ins) + nres-1;
373 case BC_TSETM: return bc_a(ins) + nres-1; 398 case BC_TSETM: return bc_a(ins) + nres-1;
374 default: return pt->framesize; 399 default: return pt->framesize;
@@ -492,3 +517,41 @@ out:
492 return makeasmfunc(lj_bc_ofs[op]); /* Return static dispatch target. */ 517 return makeasmfunc(lj_bc_ofs[op]); /* Return static dispatch target. */
493} 518}
494 519
520#if LJ_HASJIT
521/* Stitch a new trace. */
522void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc)
523{
524 ERRNO_SAVE
525 lua_State *L = J->L;
526 void *cf = cframe_raw(L->cframe);
527 const BCIns *oldpc = cframe_pc(cf);
528 setcframe_pc(cf, pc);
529 /* Before dispatch, have to bias PC by 1. */
530 L->top = L->base + cur_topslot(curr_proto(L), pc+1, cframe_multres_n(cf));
531 lj_trace_stitch(J, pc-1); /* Point to the CALL instruction. */
532 setcframe_pc(cf, oldpc);
533 ERRNO_RESTORE
534}
535#endif
536
537#if LJ_HASPROFILE
538/* Profile dispatch. */
539void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc)
540{
541 ERRNO_SAVE
542 GCfunc *fn = curr_func(L);
543 GCproto *pt = funcproto(fn);
544 void *cf = cframe_raw(L->cframe);
545 const BCIns *oldpc = cframe_pc(cf);
546 global_State *g;
547 setcframe_pc(cf, pc);
548 L->top = L->base + cur_topslot(pt, pc, cframe_multres_n(cf));
549 lj_profile_interpreter(L);
550 setcframe_pc(cf, oldpc);
551 g = G(L);
552 setgcref(g->cur_L, obj2gco(L));
553 setvmstate(g, INTERP);
554 ERRNO_RESTORE
555}
556#endif
557
diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h
index 778affc8..1e247e38 100644
--- a/src/lj_dispatch.h
+++ b/src/lj_dispatch.h
@@ -29,15 +29,17 @@
29 _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \ 29 _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \
30 _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \ 30 _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \
31 _(pow) _(fmod) _(ldexp) \ 31 _(pow) _(fmod) _(ldexp) \
32 _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_err_throw) \ 32 _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_stitch) \
33 _(lj_dispatch_profile) _(lj_err_throw) \
33 _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \ 34 _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \
34 _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \ 35 _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \
35 _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \ 36 _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \
36 _(lj_meta_for) _(lj_meta_len) _(lj_meta_tget) _(lj_meta_tset) \ 37 _(lj_meta_for) _(lj_meta_istype) _(lj_meta_len) _(lj_meta_tget) \
37 _(lj_state_growstack) _(lj_str_fromnum) _(lj_str_fromnumber) _(lj_str_new) \ 38 _(lj_meta_tset) _(lj_state_growstack) _(lj_strfmt_num) \
38 _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) _(lj_tab_new) \ 39 _(lj_str_new) _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) \
39 _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \ 40 _(lj_tab_new) _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \
40 JITGOTDEF(_) FFIGOTDEF(_) 41 _(lj_tab_setinth) _(lj_buf_putstr_reverse) _(lj_buf_putstr_lower) \
42 _(lj_buf_putstr_upper) _(lj_buf_tostr) JITGOTDEF(_) FFIGOTDEF(_)
41 43
42enum { 44enum {
43#define GOTENUM(name) LJ_GOT_##name, 45#define GOTENUM(name) LJ_GOT_##name,
@@ -60,7 +62,7 @@ typedef uint16_t HotCount;
60#define HOTCOUNT_CALL 1 62#define HOTCOUNT_CALL 1
61 63
62/* This solves a circular dependency problem -- bump as needed. Sigh. */ 64/* This solves a circular dependency problem -- bump as needed. Sigh. */
63#define GG_NUM_ASMFF 62 65#define GG_NUM_ASMFF 57
64 66
65#define GG_LEN_DDISP (BC__MAX + GG_NUM_ASMFF) 67#define GG_LEN_DDISP (BC__MAX + GG_NUM_ASMFF)
66#define GG_LEN_SDISP BC_FUNCF 68#define GG_LEN_SDISP BC_FUNCF
@@ -109,7 +111,12 @@ LJ_FUNC void lj_dispatch_update(global_State *g);
109/* Instruction dispatch callback for hooks or when recording. */ 111/* Instruction dispatch callback for hooks or when recording. */
110LJ_FUNCA void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc); 112LJ_FUNCA void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc);
111LJ_FUNCA ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns*pc); 113LJ_FUNCA ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns*pc);
112LJ_FUNCA void LJ_FASTCALL lj_dispatch_return(lua_State *L, const BCIns *pc); 114#if LJ_HASJIT
115LJ_FUNCA void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc);
116#endif
117#if LJ_HASPROFILE
118LJ_FUNCA void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc);
119#endif
113 120
114#if LJ_HASFFI && !defined(_BUILDVM_H) 121#if LJ_HASFFI && !defined(_BUILDVM_H)
115/* Save/restore errno and GetLastError() around hooks, exits and recording. */ 122/* Save/restore errno and GetLastError() around hooks, exits and recording. */
diff --git a/src/lj_emit_arm.h b/src/lj_emit_arm.h
index 8c5e5379..45ce519e 100644
--- a/src/lj_emit_arm.h
+++ b/src/lj_emit_arm.h
@@ -308,30 +308,30 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
308 emit_dm(as, ARMI_MOV, dst, src); 308 emit_dm(as, ARMI_MOV, dst, src);
309} 309}
310 310
311/* Generic load of register from stack slot. */ 311/* Generic load of register with base and (small) offset address. */
312static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 312static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
313{ 313{
314#if LJ_SOFTFP 314#if LJ_SOFTFP
315 lua_assert(!irt_isnum(ir->t)); UNUSED(ir); 315 lua_assert(!irt_isnum(ir->t)); UNUSED(ir);
316#else 316#else
317 if (r >= RID_MAX_GPR) 317 if (r >= RID_MAX_GPR)
318 emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, RID_SP, ofs); 318 emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, base, ofs);
319 else 319 else
320#endif 320#endif
321 emit_lso(as, ARMI_LDR, r, RID_SP, ofs); 321 emit_lso(as, ARMI_LDR, r, base, ofs);
322} 322}
323 323
324/* Generic store of register to stack slot. */ 324/* Generic store of register with base and (small) offset address. */
325static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 325static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
326{ 326{
327#if LJ_SOFTFP 327#if LJ_SOFTFP
328 lua_assert(!irt_isnum(ir->t)); UNUSED(ir); 328 lua_assert(!irt_isnum(ir->t)); UNUSED(ir);
329#else 329#else
330 if (r >= RID_MAX_GPR) 330 if (r >= RID_MAX_GPR)
331 emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, RID_SP, ofs); 331 emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, base, ofs);
332 else 332 else
333#endif 333#endif
334 emit_lso(as, ARMI_STR, r, RID_SP, ofs); 334 emit_lso(as, ARMI_STR, r, base, ofs);
335} 335}
336 336
337/* Emit an arithmetic/logic operation with a constant operand. */ 337/* Emit an arithmetic/logic operation with a constant operand. */
diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h
index 0fc07d91..8e7ee66f 100644
--- a/src/lj_emit_mips.h
+++ b/src/lj_emit_mips.h
@@ -178,24 +178,24 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
178 emit_fg(as, irt_isnum(ir->t) ? MIPSI_MOV_D : MIPSI_MOV_S, dst, src); 178 emit_fg(as, irt_isnum(ir->t) ? MIPSI_MOV_D : MIPSI_MOV_S, dst, src);
179} 179}
180 180
181/* Generic load of register from stack slot. */ 181/* Generic load of register with base and (small) offset address. */
182static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 182static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
183{ 183{
184 if (r < RID_MAX_GPR) 184 if (r < RID_MAX_GPR)
185 emit_tsi(as, MIPSI_LW, r, RID_SP, ofs); 185 emit_tsi(as, MIPSI_LW, r, base, ofs);
186 else 186 else
187 emit_tsi(as, irt_isnum(ir->t) ? MIPSI_LDC1 : MIPSI_LWC1, 187 emit_tsi(as, irt_isnum(ir->t) ? MIPSI_LDC1 : MIPSI_LWC1,
188 (r & 31), RID_SP, ofs); 188 (r & 31), base, ofs);
189} 189}
190 190
191/* Generic store of register to stack slot. */ 191/* Generic store of register with base and (small) offset address. */
192static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 192static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
193{ 193{
194 if (r < RID_MAX_GPR) 194 if (r < RID_MAX_GPR)
195 emit_tsi(as, MIPSI_SW, r, RID_SP, ofs); 195 emit_tsi(as, MIPSI_SW, r, base, ofs);
196 else 196 else
197 emit_tsi(as, irt_isnum(ir->t) ? MIPSI_SDC1 : MIPSI_SWC1, 197 emit_tsi(as, irt_isnum(ir->t) ? MIPSI_SDC1 : MIPSI_SWC1,
198 (r&31), RID_SP, ofs); 198 (r&31), base, ofs);
199} 199}
200 200
201/* Add offset to pointer. */ 201/* Add offset to pointer. */
diff --git a/src/lj_emit_ppc.h b/src/lj_emit_ppc.h
index 14edf00f..087860ed 100644
--- a/src/lj_emit_ppc.h
+++ b/src/lj_emit_ppc.h
@@ -186,22 +186,22 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
186 emit_fb(as, PPCI_FMR, dst, src); 186 emit_fb(as, PPCI_FMR, dst, src);
187} 187}
188 188
189/* Generic load of register from stack slot. */ 189/* Generic load of register with base and (small) offset address. */
190static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 190static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
191{ 191{
192 if (r < RID_MAX_GPR) 192 if (r < RID_MAX_GPR)
193 emit_tai(as, PPCI_LWZ, r, RID_SP, ofs); 193 emit_tai(as, PPCI_LWZ, r, base, ofs);
194 else 194 else
195 emit_fai(as, irt_isnum(ir->t) ? PPCI_LFD : PPCI_LFS, r, RID_SP, ofs); 195 emit_fai(as, irt_isnum(ir->t) ? PPCI_LFD : PPCI_LFS, r, base, ofs);
196} 196}
197 197
198/* Generic store of register to stack slot. */ 198/* Generic store of register with base and (small) offset address. */
199static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 199static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
200{ 200{
201 if (r < RID_MAX_GPR) 201 if (r < RID_MAX_GPR)
202 emit_tai(as, PPCI_STW, r, RID_SP, ofs); 202 emit_tai(as, PPCI_STW, r, base, ofs);
203 else 203 else
204 emit_fai(as, irt_isnum(ir->t) ? PPCI_STFD : PPCI_STFS, r, RID_SP, ofs); 204 emit_fai(as, irt_isnum(ir->t) ? PPCI_STFD : PPCI_STFS, r, base, ofs);
205} 205}
206 206
207/* Emit a compare (for equality) with a constant operand. */ 207/* Emit a compare (for equality) with a constant operand. */
diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h
index 3a2f6510..ac42db3e 100644
--- a/src/lj_emit_x86.h
+++ b/src/lj_emit_x86.h
@@ -241,10 +241,6 @@ static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i)
241 241
242/* -- Emit loads/stores --------------------------------------------------- */ 242/* -- Emit loads/stores --------------------------------------------------- */
243 243
244/* Instruction selection for XMM moves. */
245#define XMM_MOVRR(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVSD : XO_MOVAPS)
246#define XMM_MOVRM(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVLPD : XO_MOVSD)
247
248/* mov [base+ofs], i */ 244/* mov [base+ofs], i */
249static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) 245static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
250{ 246{
@@ -314,7 +310,7 @@ static void emit_loadn(ASMState *as, Reg r, cTValue *tv)
314 if (tvispzero(tv)) /* Use xor only for +0. */ 310 if (tvispzero(tv)) /* Use xor only for +0. */
315 emit_rr(as, XO_XORPS, r, r); 311 emit_rr(as, XO_XORPS, r, r);
316 else 312 else
317 emit_rma(as, XMM_MOVRM(as), r, &tv->n); 313 emit_rma(as, XO_MOVSD, r, &tv->n);
318} 314}
319 315
320/* -- Emit control-flow instructions -------------------------------------- */ 316/* -- Emit control-flow instructions -------------------------------------- */
@@ -427,25 +423,25 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
427 if (dst < RID_MAX_GPR) 423 if (dst < RID_MAX_GPR)
428 emit_rr(as, XO_MOV, REX_64IR(ir, dst), src); 424 emit_rr(as, XO_MOV, REX_64IR(ir, dst), src);
429 else 425 else
430 emit_rr(as, XMM_MOVRR(as), dst, src); 426 emit_rr(as, XO_MOVAPS, dst, src);
431} 427}
432 428
433/* Generic load of register from stack slot. */ 429/* Generic load of register with base and (small) offset address. */
434static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 430static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
435{ 431{
436 if (r < RID_MAX_GPR) 432 if (r < RID_MAX_GPR)
437 emit_rmro(as, XO_MOV, REX_64IR(ir, r), RID_ESP, ofs); 433 emit_rmro(as, XO_MOV, REX_64IR(ir, r), base, ofs);
438 else 434 else
439 emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, r, RID_ESP, ofs); 435 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, r, base, ofs);
440} 436}
441 437
442/* Generic store of register to stack slot. */ 438/* Generic store of register with base and (small) offset address. */
443static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 439static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
444{ 440{
445 if (r < RID_MAX_GPR) 441 if (r < RID_MAX_GPR)
446 emit_rmro(as, XO_MOVto, REX_64IR(ir, r), RID_ESP, ofs); 442 emit_rmro(as, XO_MOVto, REX_64IR(ir, r), base, ofs);
447 else 443 else
448 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, RID_ESP, ofs); 444 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, base, ofs);
449} 445}
450 446
451/* Add offset to pointer. */ 447/* Add offset to pointer. */
diff --git a/src/lj_err.c b/src/lj_err.c
index 081bfde4..4f134941 100644
--- a/src/lj_err.c
+++ b/src/lj_err.c
@@ -16,6 +16,7 @@
16#include "lj_ff.h" 16#include "lj_ff.h"
17#include "lj_trace.h" 17#include "lj_trace.h"
18#include "lj_vm.h" 18#include "lj_vm.h"
19#include "lj_strfmt.h"
19 20
20/* 21/*
21** LuaJIT can either use internal or external frame unwinding: 22** LuaJIT can either use internal or external frame unwinding:
@@ -98,14 +99,14 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
98 TValue *top = restorestack(L, -nres); 99 TValue *top = restorestack(L, -nres);
99 if (frame < top) { /* Frame reached? */ 100 if (frame < top) { /* Frame reached? */
100 if (errcode) { 101 if (errcode) {
101 L->cframe = cframe_prev(cf);
102 L->base = frame+1; 102 L->base = frame+1;
103 L->cframe = cframe_prev(cf);
103 unwindstack(L, top); 104 unwindstack(L, top);
104 } 105 }
105 return cf; 106 return cf;
106 } 107 }
107 } 108 }
108 if (frame <= tvref(L->stack)) 109 if (frame <= tvref(L->stack)+LJ_FR2)
109 break; 110 break;
110 switch (frame_typep(frame)) { 111 switch (frame_typep(frame)) {
111 case FRAME_LUA: /* Lua frame. */ 112 case FRAME_LUA: /* Lua frame. */
@@ -113,13 +114,11 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
113 frame = frame_prevl(frame); 114 frame = frame_prevl(frame);
114 break; 115 break;
115 case FRAME_C: /* C frame. */ 116 case FRAME_C: /* C frame. */
116#if LJ_HASFFI
117 unwind_c: 117 unwind_c:
118#endif
119#if LJ_UNWIND_EXT 118#if LJ_UNWIND_EXT
120 if (errcode) { 119 if (errcode) {
121 L->cframe = cframe_prev(cf);
122 L->base = frame_prevd(frame) + 1; 120 L->base = frame_prevd(frame) + 1;
121 L->cframe = cframe_prev(cf);
123 unwindstack(L, frame); 122 unwindstack(L, frame);
124 } else if (cf != stopcf) { 123 } else if (cf != stopcf) {
125 cf = cframe_prev(cf); 124 cf = cframe_prev(cf);
@@ -143,16 +142,14 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
143 return cf; 142 return cf;
144 } 143 }
145 if (errcode) { 144 if (errcode) {
146 L->cframe = cframe_prev(cf);
147 L->base = frame_prevd(frame) + 1; 145 L->base = frame_prevd(frame) + 1;
146 L->cframe = cframe_prev(cf);
148 unwindstack(L, frame); 147 unwindstack(L, frame);
149 } 148 }
150 return cf; 149 return cf;
151 case FRAME_CONT: /* Continuation frame. */ 150 case FRAME_CONT: /* Continuation frame. */
152#if LJ_HASFFI 151 if (frame_iscont_fficb(frame))
153 if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK)
154 goto unwind_c; 152 goto unwind_c;
155#endif
156 case FRAME_VARG: /* Vararg frame. */ 153 case FRAME_VARG: /* Vararg frame. */
157 frame = frame_prevd(frame); 154 frame = frame_prevd(frame);
158 break; 155 break;
@@ -165,8 +162,8 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
165 } 162 }
166 if (frame_typep(frame) == FRAME_PCALL) 163 if (frame_typep(frame) == FRAME_PCALL)
167 hook_leave(G(L)); 164 hook_leave(G(L));
168 L->cframe = cf;
169 L->base = frame_prevd(frame) + 1; 165 L->base = frame_prevd(frame) + 1;
166 L->cframe = cf;
170 unwindstack(L, L->base); 167 unwindstack(L, L->base);
171 } 168 }
172 return (void *)((intptr_t)cf | CFRAME_UNWIND_FF); 169 return (void *)((intptr_t)cf | CFRAME_UNWIND_FF);
@@ -174,8 +171,8 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
174 } 171 }
175 /* No C frame. */ 172 /* No C frame. */
176 if (errcode) { 173 if (errcode) {
174 L->base = tvref(L->stack)+1+LJ_FR2;
177 L->cframe = NULL; 175 L->cframe = NULL;
178 L->base = tvref(L->stack)+1;
179 unwindstack(L, L->base); 176 unwindstack(L, L->base);
180 if (G(L)->panic) 177 if (G(L)->panic)
181 G(L)->panic(L); 178 G(L)->panic(L);
@@ -452,7 +449,7 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode)
452{ 449{
453 global_State *g = G(L); 450 global_State *g = G(L);
454 lj_trace_abort(g); 451 lj_trace_abort(g);
455 setgcrefnull(g->jit_L); 452 setmref(g->jit_base, NULL);
456 L->status = 0; 453 L->status = 0;
457#if LJ_UNWIND_EXT 454#if LJ_UNWIND_EXT
458 err_raise_ext(errcode); 455 err_raise_ext(errcode);
@@ -497,7 +494,7 @@ LJ_NOINLINE void lj_err_mem(lua_State *L)
497/* Find error function for runtime errors. Requires an extra stack traversal. */ 494/* Find error function for runtime errors. Requires an extra stack traversal. */
498static ptrdiff_t finderrfunc(lua_State *L) 495static ptrdiff_t finderrfunc(lua_State *L)
499{ 496{
500 cTValue *frame = L->base-1, *bot = tvref(L->stack); 497 cTValue *frame = L->base-1, *bot = tvref(L->stack)+LJ_FR2;
501 void *cf = L->cframe; 498 void *cf = L->cframe;
502 while (frame > bot && cf) { 499 while (frame > bot && cf) {
503 while (cframe_nres(cframe_raw(cf)) < 0) { /* cframe without frame? */ 500 while (cframe_nres(cframe_raw(cf)) < 0) { /* cframe without frame? */
@@ -521,10 +518,8 @@ static ptrdiff_t finderrfunc(lua_State *L)
521 frame = frame_prevd(frame); 518 frame = frame_prevd(frame);
522 break; 519 break;
523 case FRAME_CONT: 520 case FRAME_CONT:
524#if LJ_HASFFI 521 if (frame_iscont_fficb(frame))
525 if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK)
526 cf = cframe_prev(cf); 522 cf = cframe_prev(cf);
527#endif
528 frame = frame_prevd(frame); 523 frame = frame_prevd(frame);
529 break; 524 break;
530 case FRAME_CP: 525 case FRAME_CP:
@@ -535,8 +530,8 @@ static ptrdiff_t finderrfunc(lua_State *L)
535 break; 530 break;
536 case FRAME_PCALL: 531 case FRAME_PCALL:
537 case FRAME_PCALLH: 532 case FRAME_PCALLH:
538 if (frame_ftsz(frame) >= (ptrdiff_t)(2*sizeof(TValue))) /* xpcall? */ 533 if (frame_func(frame_prevd(frame))->c.ffid == FF_xpcall)
539 return savestack(L, frame-1); /* Point to xpcall's errorfunc. */ 534 return savestack(L, frame_prevd(frame)+1); /* xpcall's errorfunc. */
540 return 0; 535 return 0;
541 default: 536 default:
542 lua_assert(0); 537 lua_assert(0);
@@ -559,8 +554,9 @@ LJ_NOINLINE void lj_err_run(lua_State *L)
559 lj_err_throw(L, LUA_ERRERR); 554 lj_err_throw(L, LUA_ERRERR);
560 } 555 }
561 L->status = LUA_ERRERR; 556 L->status = LUA_ERRERR;
562 copyTV(L, top, top-1); 557 copyTV(L, top+LJ_FR2, top-1);
563 copyTV(L, top-1, errfunc); 558 copyTV(L, top-1, errfunc);
559 if (LJ_FR2) setnilV(top++);
564 L->top = top+1; 560 L->top = top+1;
565 lj_vm_call(L, top, 1+1); /* Stack: |errfunc|msg| -> |msg| */ 561 lj_vm_call(L, top, 1+1); /* Stack: |errfunc|msg| -> |msg| */
566 } 562 }
@@ -574,7 +570,7 @@ LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...)
574 va_list argp; 570 va_list argp;
575 va_start(argp, em); 571 va_start(argp, em);
576 if (curr_funcisL(L)) L->top = curr_topL(L); 572 if (curr_funcisL(L)) L->top = curr_topL(L);
577 msg = lj_str_pushvf(L, err2msg(em), argp); 573 msg = lj_strfmt_pushvf(L, err2msg(em), argp);
578 va_end(argp); 574 va_end(argp);
579 lj_debug_addloc(L, msg, L->base-1, NULL); 575 lj_debug_addloc(L, msg, L->base-1, NULL);
580 lj_err_run(L); 576 lj_err_run(L);
@@ -592,11 +588,11 @@ LJ_NOINLINE void lj_err_lex(lua_State *L, GCstr *src, const char *tok,
592{ 588{
593 char buff[LUA_IDSIZE]; 589 char buff[LUA_IDSIZE];
594 const char *msg; 590 const char *msg;
595 lj_debug_shortname(buff, src); 591 lj_debug_shortname(buff, src, line);
596 msg = lj_str_pushvf(L, err2msg(em), argp); 592 msg = lj_strfmt_pushvf(L, err2msg(em), argp);
597 msg = lj_str_pushf(L, "%s:%d: %s", buff, line, msg); 593 msg = lj_strfmt_pushf(L, "%s:%d: %s", buff, line, msg);
598 if (tok) 594 if (tok)
599 lj_str_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tok); 595 lj_strfmt_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tok);
600 lj_err_throw(L, LUA_ERRSYNTAX); 596 lj_err_throw(L, LUA_ERRSYNTAX);
601} 597}
602 598
@@ -635,8 +631,9 @@ LJ_NOINLINE void lj_err_optype_call(lua_State *L, TValue *o)
635 const BCIns *pc = cframe_Lpc(L); 631 const BCIns *pc = cframe_Lpc(L);
636 if (((ptrdiff_t)pc & FRAME_TYPE) != FRAME_LUA) { 632 if (((ptrdiff_t)pc & FRAME_TYPE) != FRAME_LUA) {
637 const char *tname = lj_typename(o); 633 const char *tname = lj_typename(o);
634 if (LJ_FR2) o++;
638 setframe_pc(o, pc); 635 setframe_pc(o, pc);
639 setframe_gc(o, obj2gco(L)); 636 setframe_gc(o, obj2gco(L), LJ_TTHREAD);
640 L->top = L->base = o+1; 637 L->top = L->base = o+1;
641 err_msgv(L, LJ_ERR_BADCALL, tname); 638 err_msgv(L, LJ_ERR_BADCALL, tname);
642 } 639 }
@@ -651,13 +648,10 @@ LJ_NOINLINE void lj_err_callermsg(lua_State *L, const char *msg)
651 if (frame_islua(frame)) { 648 if (frame_islua(frame)) {
652 pframe = frame_prevl(frame); 649 pframe = frame_prevl(frame);
653 } else if (frame_iscont(frame)) { 650 } else if (frame_iscont(frame)) {
654#if LJ_HASFFI 651 if (frame_iscont_fficb(frame)) {
655 if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK) {
656 pframe = frame; 652 pframe = frame;
657 frame = NULL; 653 frame = NULL;
658 } else 654 } else {
659#endif
660 {
661 pframe = frame_prevd(frame); 655 pframe = frame_prevd(frame);
662#if LJ_HASFFI 656#if LJ_HASFFI
663 /* Remove frame for FFI metamethods. */ 657 /* Remove frame for FFI metamethods. */
@@ -680,7 +674,7 @@ LJ_NOINLINE void lj_err_callerv(lua_State *L, ErrMsg em, ...)
680 const char *msg; 674 const char *msg;
681 va_list argp; 675 va_list argp;
682 va_start(argp, em); 676 va_start(argp, em);
683 msg = lj_str_pushvf(L, err2msg(em), argp); 677 msg = lj_strfmt_pushvf(L, err2msg(em), argp);
684 va_end(argp); 678 va_end(argp);
685 lj_err_callermsg(L, msg); 679 lj_err_callermsg(L, msg);
686} 680}
@@ -700,9 +694,9 @@ LJ_NORET LJ_NOINLINE static void err_argmsg(lua_State *L, int narg,
700 if (narg < 0 && narg > LUA_REGISTRYINDEX) 694 if (narg < 0 && narg > LUA_REGISTRYINDEX)
701 narg = (int)(L->top - L->base) + narg + 1; 695 narg = (int)(L->top - L->base) + narg + 1;
702 if (ftype && ftype[3] == 'h' && --narg == 0) /* Check for "method". */ 696 if (ftype && ftype[3] == 'h' && --narg == 0) /* Check for "method". */
703 msg = lj_str_pushf(L, err2msg(LJ_ERR_BADSELF), fname, msg); 697 msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADSELF), fname, msg);
704 else 698 else
705 msg = lj_str_pushf(L, err2msg(LJ_ERR_BADARG), narg, fname, msg); 699 msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADARG), narg, fname, msg);
706 lj_err_callermsg(L, msg); 700 lj_err_callermsg(L, msg);
707} 701}
708 702
@@ -712,7 +706,7 @@ LJ_NOINLINE void lj_err_argv(lua_State *L, int narg, ErrMsg em, ...)
712 const char *msg; 706 const char *msg;
713 va_list argp; 707 va_list argp;
714 va_start(argp, em); 708 va_start(argp, em);
715 msg = lj_str_pushvf(L, err2msg(em), argp); 709 msg = lj_strfmt_pushvf(L, err2msg(em), argp);
716 va_end(argp); 710 va_end(argp);
717 err_argmsg(L, narg, msg); 711 err_argmsg(L, narg, msg);
718} 712}
@@ -742,7 +736,7 @@ LJ_NOINLINE void lj_err_argtype(lua_State *L, int narg, const char *xname)
742 TValue *o = narg < 0 ? L->top + narg : L->base + narg-1; 736 TValue *o = narg < 0 ? L->top + narg : L->base + narg-1;
743 tname = o < L->top ? lj_typename(o) : lj_obj_typename[0]; 737 tname = o < L->top ? lj_typename(o) : lj_obj_typename[0];
744 } 738 }
745 msg = lj_str_pushf(L, err2msg(LJ_ERR_BADTYPE), xname, tname); 739 msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADTYPE), xname, tname);
746 err_argmsg(L, narg, msg); 740 err_argmsg(L, narg, msg);
747} 741}
748 742
@@ -792,7 +786,7 @@ LUALIB_API int luaL_error(lua_State *L, const char *fmt, ...)
792 const char *msg; 786 const char *msg;
793 va_list argp; 787 va_list argp;
794 va_start(argp, fmt); 788 va_start(argp, fmt);
795 msg = lj_str_pushvf(L, fmt, argp); 789 msg = lj_strfmt_pushvf(L, fmt, argp);
796 va_end(argp); 790 va_end(argp);
797 lj_err_callermsg(L, msg); 791 lj_err_callermsg(L, msg);
798 return 0; /* unreachable */ 792 return 0; /* unreachable */
diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h
index 1c948760..7717665b 100644
--- a/src/lj_errmsg.h
+++ b/src/lj_errmsg.h
@@ -96,9 +96,7 @@ ERRDEF(STRPATX, "pattern too complex")
96ERRDEF(STRCAPI, "invalid capture index") 96ERRDEF(STRCAPI, "invalid capture index")
97ERRDEF(STRCAPN, "too many captures") 97ERRDEF(STRCAPN, "too many captures")
98ERRDEF(STRCAPU, "unfinished capture") 98ERRDEF(STRCAPU, "unfinished capture")
99ERRDEF(STRFMTO, "invalid option " LUA_QL("%%%c") " to " LUA_QL("format")) 99ERRDEF(STRFMT, "invalid option " LUA_QS " to " LUA_QL("format"))
100ERRDEF(STRFMTR, "invalid format (repeated flags)")
101ERRDEF(STRFMTW, "invalid format (width or precision too long)")
102ERRDEF(STRGSRV, "invalid replacement value (a %s)") 100ERRDEF(STRGSRV, "invalid replacement value (a %s)")
103ERRDEF(BADMODN, "name conflict for module " LUA_QS) 101ERRDEF(BADMODN, "name conflict for module " LUA_QS)
104#if LJ_HASJIT 102#if LJ_HASJIT
@@ -118,7 +116,6 @@ ERRDEF(JITOPT, "unknown or malformed optimization flag " LUA_QS)
118/* Lexer/parser errors. */ 116/* Lexer/parser errors. */
119ERRDEF(XMODE, "attempt to load chunk with wrong mode") 117ERRDEF(XMODE, "attempt to load chunk with wrong mode")
120ERRDEF(XNEAR, "%s near " LUA_QS) 118ERRDEF(XNEAR, "%s near " LUA_QS)
121ERRDEF(XELEM, "lexical element too long")
122ERRDEF(XLINES, "chunk has too many lines") 119ERRDEF(XLINES, "chunk has too many lines")
123ERRDEF(XLEVELS, "chunk has too many syntax levels") 120ERRDEF(XLEVELS, "chunk has too many syntax levels")
124ERRDEF(XNUMBER, "malformed number") 121ERRDEF(XNUMBER, "malformed number")
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
index 69f71ab2..e17f6818 100644
--- a/src/lj_ffrecord.c
+++ b/src/lj_ffrecord.c
@@ -27,6 +27,7 @@
27#include "lj_dispatch.h" 27#include "lj_dispatch.h"
28#include "lj_vm.h" 28#include "lj_vm.h"
29#include "lj_strscan.h" 29#include "lj_strscan.h"
30#include "lj_strfmt.h"
30 31
31/* Some local macros to save typing. Undef'd at the end. */ 32/* Some local macros to save typing. Undef'd at the end. */
32#define IR(ref) (&J->cur.ir[(ref)]) 33#define IR(ref) (&J->cur.ir[(ref)])
@@ -79,10 +80,7 @@ static GCstr *argv2str(jit_State *J, TValue *o)
79 GCstr *s; 80 GCstr *s;
80 if (!tvisnumber(o)) 81 if (!tvisnumber(o))
81 lj_trace_err(J, LJ_TRERR_BADTYPE); 82 lj_trace_err(J, LJ_TRERR_BADTYPE);
82 if (tvisint(o)) 83 s = lj_strfmt_number(J->L, o);
83 s = lj_str_fromint(J->L, intV(o));
84 else
85 s = lj_str_fromnum(J->L, &o->n);
86 setstrV(J->L, o, s); 84 setstrV(J->L, o, s);
87 return s; 85 return s;
88 } 86 }
@@ -98,27 +96,90 @@ static ptrdiff_t results_wanted(jit_State *J)
98 return -1; 96 return -1;
99} 97}
100 98
101/* Throw error for unsupported variant of fast function. */ 99/* Trace stitching: add continuation below frame to start a new trace. */
102LJ_NORET static void recff_nyiu(jit_State *J) 100static void recff_stitch(jit_State *J)
103{ 101{
104 setfuncV(J->L, &J->errinfo, J->fn); 102 ASMFunction cont = lj_cont_stitch;
105 lj_trace_err_info(J, LJ_TRERR_NYIFFU); 103 TraceNo traceno = J->cur.traceno;
104 lua_State *L = J->L;
105 TValue *base = L->base;
106 const BCIns *pc = frame_pc(base-1);
107 TValue *pframe = frame_prevl(base-1);
108 TRef trcont;
109
110 lua_assert(!LJ_FR2); /* TODO_FR2: handle frame shift. */
111 /* Move func + args up in Lua stack and insert continuation. */
112 memmove(&base[1], &base[-1], sizeof(TValue)*(J->maxslot+1));
113 setframe_ftsz(base+1, ((char *)(base+1) - (char *)pframe) + FRAME_CONT);
114 setcont(base, cont);
115 setframe_pc(base, pc);
116 if (LJ_DUALNUM) setintV(base-1, traceno); else base[-1].u64 = traceno;
117 L->base += 2;
118 L->top += 2;
119
120 /* Ditto for the IR. */
121 memmove(&J->base[1], &J->base[-1], sizeof(TRef)*(J->maxslot+1));
122#if LJ_64
123 trcont = lj_ir_kptr(J, (void *)((int64_t)cont-(int64_t)lj_vm_asm_begin));
124#else
125 trcont = lj_ir_kptr(J, (void *)cont);
126#endif
127 J->base[0] = trcont | TREF_CONT;
128 J->base[-1] = LJ_DUALNUM ? lj_ir_kint(J,traceno) : lj_ir_knum_u64(J,traceno);
129 J->base += 2;
130 J->baseslot += 2;
131 J->framedepth++;
132
133 lj_record_stop(J, LJ_TRLINK_STITCH, 0);
134
135 /* Undo Lua stack changes. */
136 memmove(&base[-1], &base[1], sizeof(TValue)*(J->maxslot+1));
137 setframe_pc(base-1, pc);
138 L->base -= 2;
139 L->top -= 2;
106} 140}
107 141
108/* Fallback handler for all fast functions that are not recorded (yet). */ 142/* Fallback handler for fast functions that are not recorded (yet). */
109static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd) 143static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd)
110{ 144{
111 setfuncV(J->L, &J->errinfo, J->fn); 145 if (J->cur.nins < (IRRef)J->param[JIT_P_minstitch] + REF_BASE) {
112 lj_trace_err_info(J, LJ_TRERR_NYIFF); 146 lj_trace_err_info(J, LJ_TRERR_TRACEUV);
113 UNUSED(rd); 147 } else {
148 /* Can only stitch from Lua call. */
149 if (J->framedepth && frame_islua(J->L->base-1)) {
150 BCOp op = bc_op(*frame_pc(J->L->base-1));
151 /* Stitched trace cannot start with *M op with variable # of args. */
152 if (!(op == BC_CALLM || op == BC_CALLMT ||
153 op == BC_RETM || op == BC_TSETM)) {
154 switch (J->fn->c.ffid) {
155 case FF_error:
156 case FF_debug_sethook:
157 case FF_jit_flush:
158 break; /* Don't stitch across special builtins. */
159 default:
160 recff_stitch(J); /* Use trace stitching. */
161 rd->nres = -1;
162 return;
163 }
164 }
165 }
166 /* Otherwise stop trace and return to interpreter. */
167 lj_record_stop(J, LJ_TRLINK_RETURN, 0);
168 rd->nres = -1;
169 }
114} 170}
115 171
116/* C functions can have arbitrary side-effects and are not recorded (yet). */ 172/* Fallback handler for unsupported variants of fast functions. */
117static void LJ_FASTCALL recff_c(jit_State *J, RecordFFData *rd) 173#define recff_nyiu recff_nyi
174
175/* Must stop the trace for classic C functions with arbitrary side-effects. */
176#define recff_c recff_nyi
177
178/* Emit BUFHDR for the global temporary buffer. */
179static TRef recff_bufhdr(jit_State *J)
118{ 180{
119 setfuncV(J->L, &J->errinfo, J->fn); 181 return emitir(IRT(IR_BUFHDR, IRT_P32),
120 lj_trace_err_info(J, LJ_TRERR_NYICF); 182 lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET);
121 UNUSED(rd);
122} 183}
123 184
124/* -- Base library fast functions ----------------------------------------- */ 185/* -- Base library fast functions ----------------------------------------- */
@@ -135,7 +196,7 @@ static void LJ_FASTCALL recff_type(jit_State *J, RecordFFData *rd)
135 uint32_t t; 196 uint32_t t;
136 if (tvisnumber(&rd->argv[0])) 197 if (tvisnumber(&rd->argv[0]))
137 t = ~LJ_TNUMX; 198 t = ~LJ_TNUMX;
138 else if (LJ_64 && tvislightud(&rd->argv[0])) 199 else if (LJ_64 && !LJ_GC64 && tvislightud(&rd->argv[0]))
139 t = ~LJ_TLIGHTUD; 200 t = ~LJ_TLIGHTUD;
140 else 201 else
141 t = ~itype(&rd->argv[0]); 202 t = ~itype(&rd->argv[0]);
@@ -263,7 +324,8 @@ static void LJ_FASTCALL recff_select(jit_State *J, RecordFFData *rd)
263 J->base[i] = J->base[start+i]; 324 J->base[i] = J->base[start+i];
264 } /* else: Interpreter will throw. */ 325 } /* else: Interpreter will throw. */
265 } else { 326 } else {
266 recff_nyiu(J); 327 recff_nyiu(J, rd);
328 return;
267 } 329 }
268 } /* else: Interpreter will throw. */ 330 } /* else: Interpreter will throw. */
269} 331}
@@ -274,14 +336,18 @@ static void LJ_FASTCALL recff_tonumber(jit_State *J, RecordFFData *rd)
274 TRef base = J->base[1]; 336 TRef base = J->base[1];
275 if (tr && !tref_isnil(base)) { 337 if (tr && !tref_isnil(base)) {
276 base = lj_opt_narrow_toint(J, base); 338 base = lj_opt_narrow_toint(J, base);
277 if (!tref_isk(base) || IR(tref_ref(base))->i != 10) 339 if (!tref_isk(base) || IR(tref_ref(base))->i != 10) {
278 recff_nyiu(J); 340 recff_nyiu(J, rd);
341 return;
342 }
279 } 343 }
280 if (tref_isnumber_str(tr)) { 344 if (tref_isnumber_str(tr)) {
281 if (tref_isstr(tr)) { 345 if (tref_isstr(tr)) {
282 TValue tmp; 346 TValue tmp;
283 if (!lj_strscan_num(strV(&rd->argv[0]), &tmp)) 347 if (!lj_strscan_num(strV(&rd->argv[0]), &tmp)) {
284 recff_nyiu(J); /* Would need an inverted STRTO for this case. */ 348 recff_nyiu(J, rd); /* Would need an inverted STRTO for this case. */
349 return;
350 }
285 tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); 351 tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
286 } 352 }
287#if LJ_HASFFI 353#if LJ_HASFFI
@@ -336,13 +402,15 @@ static void LJ_FASTCALL recff_tostring(jit_State *J, RecordFFData *rd)
336 if (tref_isstr(tr)) { 402 if (tref_isstr(tr)) {
337 /* Ignore __tostring in the string base metatable. */ 403 /* Ignore __tostring in the string base metatable. */
338 /* Pass on result in J->base[0]. */ 404 /* Pass on result in J->base[0]. */
339 } else if (!recff_metacall(J, rd, MM_tostring)) { 405 } else if (tr && !recff_metacall(J, rd, MM_tostring)) {
340 if (tref_isnumber(tr)) { 406 if (tref_isnumber(tr)) {
341 J->base[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0); 407 J->base[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr,
408 tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT);
342 } else if (tref_ispri(tr)) { 409 } else if (tref_ispri(tr)) {
343 J->base[0] = lj_ir_kstr(J, strV(&J->fn->c.upvalue[tref_type(tr)])); 410 J->base[0] = lj_ir_kstr(J, lj_strfmt_obj(J->L, &rd->argv[0]));
344 } else { 411 } else {
345 recff_nyiu(J); 412 recff_nyiu(J, rd);
413 return;
346 } 414 }
347 } 415 }
348} 416}
@@ -364,14 +432,14 @@ static void LJ_FASTCALL recff_ipairs_aux(jit_State *J, RecordFFData *rd)
364 } /* else: Interpreter will throw. */ 432 } /* else: Interpreter will throw. */
365} 433}
366 434
367static void LJ_FASTCALL recff_ipairs(jit_State *J, RecordFFData *rd) 435static void LJ_FASTCALL recff_xpairs(jit_State *J, RecordFFData *rd)
368{ 436{
369 if (!(LJ_52 && recff_metacall(J, rd, MM_ipairs))) { 437 if (!(LJ_52 && recff_metacall(J, rd, MM_ipairs))) {
370 TRef tab = J->base[0]; 438 TRef tab = J->base[0];
371 if (tref_istab(tab)) { 439 if (tref_istab(tab)) {
372 J->base[0] = lj_ir_kfunc(J, funcV(&J->fn->c.upvalue[0])); 440 J->base[0] = lj_ir_kfunc(J, funcV(&J->fn->c.upvalue[0]));
373 J->base[1] = tab; 441 J->base[1] = tab;
374 J->base[2] = lj_ir_kint(J, 0); 442 J->base[2] = rd->data ? lj_ir_kint(J, 0) : TREF_NIL;
375 rd->nres = 3; 443 rd->nres = 3;
376 } /* else: Interpreter will throw. */ 444 } /* else: Interpreter will throw. */
377 } 445 }
@@ -399,6 +467,7 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd)
399 TValue argv0, argv1; 467 TValue argv0, argv1;
400 TRef tmp; 468 TRef tmp;
401 int errcode; 469 int errcode;
470 lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
402 /* Swap function and traceback. */ 471 /* Swap function and traceback. */
403 tmp = J->base[0]; J->base[0] = J->base[1]; J->base[1] = tmp; 472 tmp = J->base[0]; J->base[0] = J->base[1]; J->base[1] = tmp;
404 copyTV(J->L, &argv0, &rd->argv[0]); 473 copyTV(J->L, &argv0, &rd->argv[0]);
@@ -416,6 +485,18 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd)
416 } /* else: Interpreter will throw. */ 485 } /* else: Interpreter will throw. */
417} 486}
418 487
488static void LJ_FASTCALL recff_getfenv(jit_State *J, RecordFFData *rd)
489{
490 TRef tr = J->base[0];
491 /* Only support getfenv(0) for now. */
492 if (tref_isint(tr) && tref_isk(tr) && IR(tref_ref(tr))->i == 0) {
493 TRef trl = emitir(IRT(IR_LREF, IRT_THREAD), 0, 0);
494 J->base[0] = emitir(IRT(IR_FLOAD, IRT_TAB), trl, IRFL_THREAD_ENV);
495 return;
496 }
497 recff_nyiu(J, rd);
498}
499
419/* -- Math library fast functions ----------------------------------------- */ 500/* -- Math library fast functions ----------------------------------------- */
420 501
421static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd) 502static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd)
@@ -528,14 +609,6 @@ static void LJ_FASTCALL recff_math_modf(jit_State *J, RecordFFData *rd)
528 rd->nres = 2; 609 rd->nres = 2;
529} 610}
530 611
531static void LJ_FASTCALL recff_math_degrad(jit_State *J, RecordFFData *rd)
532{
533 TRef tr = lj_ir_tonum(J, J->base[0]);
534 TRef trm = lj_ir_knum(J, numV(&J->fn->c.upvalue[0]));
535 J->base[0] = emitir(IRTN(IR_MUL), tr, trm);
536 UNUSED(rd);
537}
538
539static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd) 612static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd)
540{ 613{
541 TRef tr = lj_ir_tonum(J, J->base[0]); 614 TRef tr = lj_ir_tonum(J, J->base[0]);
@@ -592,48 +665,105 @@ static void LJ_FASTCALL recff_math_random(jit_State *J, RecordFFData *rd)
592 665
593/* -- Bit library fast functions ------------------------------------------ */ 666/* -- Bit library fast functions ------------------------------------------ */
594 667
595/* Record unary bit.tobit, bit.bnot, bit.bswap. */ 668/* Record bit.tobit. */
669static void LJ_FASTCALL recff_bit_tobit(jit_State *J, RecordFFData *rd)
670{
671 TRef tr = J->base[0];
672#if LJ_HASFFI
673 if (tref_iscdata(tr)) { recff_bit64_tobit(J, rd); return; }
674#endif
675 J->base[0] = lj_opt_narrow_tobit(J, tr);
676 UNUSED(rd);
677}
678
679/* Record unary bit.bnot, bit.bswap. */
596static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd) 680static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd)
597{ 681{
598 TRef tr = lj_opt_narrow_tobit(J, J->base[0]); 682#if LJ_HASFFI
599 J->base[0] = (rd->data == IR_TOBIT) ? tr : emitir(IRTI(rd->data), tr, 0); 683 if (recff_bit64_unary(J, rd))
684 return;
685#endif
686 J->base[0] = emitir(IRTI(rd->data), lj_opt_narrow_tobit(J, J->base[0]), 0);
600} 687}
601 688
602/* Record N-ary bit.band, bit.bor, bit.bxor. */ 689/* Record N-ary bit.band, bit.bor, bit.bxor. */
603static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd) 690static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd)
604{ 691{
605 TRef tr = lj_opt_narrow_tobit(J, J->base[0]); 692#if LJ_HASFFI
606 uint32_t op = rd->data; 693 if (recff_bit64_nary(J, rd))
607 BCReg i; 694 return;
608 for (i = 1; J->base[i] != 0; i++) 695#endif
609 tr = emitir(IRTI(op), tr, lj_opt_narrow_tobit(J, J->base[i])); 696 {
610 J->base[0] = tr; 697 TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
698 uint32_t ot = IRTI(rd->data);
699 BCReg i;
700 for (i = 1; J->base[i] != 0; i++)
701 tr = emitir(ot, tr, lj_opt_narrow_tobit(J, J->base[i]));
702 J->base[0] = tr;
703 }
611} 704}
612 705
613/* Record bit shifts. */ 706/* Record bit shifts. */
614static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd) 707static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd)
615{ 708{
616 TRef tr = lj_opt_narrow_tobit(J, J->base[0]); 709#if LJ_HASFFI
617 TRef tsh = lj_opt_narrow_tobit(J, J->base[1]); 710 if (recff_bit64_shift(J, rd))
618 IROp op = (IROp)rd->data; 711 return;
619 if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && 712#endif
620 !tref_isk(tsh)) 713 {
621 tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31)); 714 TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
715 TRef tsh = lj_opt_narrow_tobit(J, J->base[1]);
716 IROp op = (IROp)rd->data;
717 if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
718 !tref_isk(tsh))
719 tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31));
622#ifdef LJ_TARGET_UNIFYROT 720#ifdef LJ_TARGET_UNIFYROT
623 if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) { 721 if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) {
624 op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR; 722 op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR;
625 tsh = emitir(IRTI(IR_NEG), tsh, tsh); 723 tsh = emitir(IRTI(IR_NEG), tsh, tsh);
724 }
725#endif
726 J->base[0] = emitir(IRTI(op), tr, tsh);
626 } 727 }
728}
729
730static void LJ_FASTCALL recff_bit_tohex(jit_State *J, RecordFFData *rd)
731{
732#if LJ_HASFFI
733 TRef hdr = recff_bufhdr(J);
734 TRef tr = recff_bit64_tohex(J, rd, hdr);
735 J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
736#else
737 recff_nyiu(J, rd); /* Don't bother working around this NYI. */
627#endif 738#endif
628 J->base[0] = emitir(IRTI(op), tr, tsh);
629} 739}
630 740
631/* -- String library fast functions --------------------------------------- */ 741/* -- String library fast functions --------------------------------------- */
632 742
633static void LJ_FASTCALL recff_string_len(jit_State *J, RecordFFData *rd) 743/* Specialize to relative starting position for string. */
744static TRef recff_string_start(jit_State *J, GCstr *s, int32_t *st, TRef tr,
745 TRef trlen, TRef tr0)
634{ 746{
635 J->base[0] = emitir(IRTI(IR_FLOAD), lj_ir_tostr(J, J->base[0]), IRFL_STR_LEN); 747 int32_t start = *st;
636 UNUSED(rd); 748 if (start < 0) {
749 emitir(IRTGI(IR_LT), tr, tr0);
750 tr = emitir(IRTI(IR_ADD), trlen, tr);
751 start = start + (int32_t)s->len;
752 emitir(start < 0 ? IRTGI(IR_LT) : IRTGI(IR_GE), tr, tr0);
753 if (start < 0) {
754 tr = tr0;
755 start = 0;
756 }
757 } else if (start == 0) {
758 emitir(IRTGI(IR_EQ), tr, tr0);
759 tr = tr0;
760 } else {
761 tr = emitir(IRTI(IR_ADD), tr, lj_ir_kint(J, -1));
762 emitir(IRTGI(IR_GE), tr, tr0);
763 start--;
764 }
765 *st = start;
766 return tr;
637} 767}
638 768
639/* Handle string.byte (rd->data = 0) and string.sub (rd->data = 1). */ 769/* Handle string.byte (rd->data = 0) and string.sub (rd->data = 1). */
@@ -680,29 +810,11 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
680 } else if ((MSize)end <= str->len) { 810 } else if ((MSize)end <= str->len) {
681 emitir(IRTGI(IR_ULE), trend, trlen); 811 emitir(IRTGI(IR_ULE), trend, trlen);
682 } else { 812 } else {
683 emitir(IRTGI(IR_GT), trend, trlen); 813 emitir(IRTGI(IR_UGT), trend, trlen);
684 end = (int32_t)str->len; 814 end = (int32_t)str->len;
685 trend = trlen; 815 trend = trlen;
686 } 816 }
687 if (start < 0) { 817 trstart = recff_string_start(J, str, &start, trstart, trlen, tr0);
688 emitir(IRTGI(IR_LT), trstart, tr0);
689 trstart = emitir(IRTI(IR_ADD), trlen, trstart);
690 start = start+(int32_t)str->len;
691 emitir(start < 0 ? IRTGI(IR_LT) : IRTGI(IR_GE), trstart, tr0);
692 if (start < 0) {
693 trstart = tr0;
694 start = 0;
695 }
696 } else {
697 if (start == 0) {
698 emitir(IRTGI(IR_EQ), trstart, tr0);
699 trstart = tr0;
700 } else {
701 trstart = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, -1));
702 emitir(IRTGI(IR_GE), trstart, tr0);
703 start--;
704 }
705 }
706 if (rd->data) { /* Return string.sub result. */ 818 if (rd->data) { /* Return string.sub result. */
707 if (end - start >= 0) { 819 if (end - start >= 0) {
708 /* Also handle empty range here, to avoid extra traces. */ 820 /* Also handle empty range here, to avoid extra traces. */
@@ -712,7 +824,7 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
712 J->base[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen); 824 J->base[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen);
713 } else { /* Range underflow: return empty string. */ 825 } else { /* Range underflow: return empty string. */
714 emitir(IRTGI(IR_LT), trend, trstart); 826 emitir(IRTGI(IR_LT), trend, trstart);
715 J->base[0] = lj_ir_kstr(J, lj_str_new(J->L, strdata(str), 0)); 827 J->base[0] = lj_ir_kstr(J, &J2G(J)->strempty);
716 } 828 }
717 } else { /* Return string.byte result(s). */ 829 } else { /* Return string.byte result(s). */
718 ptrdiff_t i, len = end - start; 830 ptrdiff_t i, len = end - start;
@@ -734,48 +846,200 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
734 } 846 }
735} 847}
736 848
737/* -- Table library fast functions ---------------------------------------- */ 849static void LJ_FASTCALL recff_string_char(jit_State *J, RecordFFData *rd)
738
739static void LJ_FASTCALL recff_table_getn(jit_State *J, RecordFFData *rd)
740{ 850{
741 if (tref_istab(J->base[0])) 851 TRef k255 = lj_ir_kint(J, 255);
742 J->base[0] = lj_ir_call(J, IRCALL_lj_tab_len, J->base[0]); 852 BCReg i;
743 /* else: Interpreter will throw. */ 853 for (i = 0; J->base[i] != 0; i++) { /* Convert char values to strings. */
854 TRef tr = lj_opt_narrow_toint(J, J->base[i]);
855 emitir(IRTGI(IR_ULE), tr, k255);
856 J->base[i] = emitir(IRT(IR_TOSTR, IRT_STR), tr, IRTOSTR_CHAR);
857 }
858 if (i > 1) { /* Concatenate the strings, if there's more than one. */
859 TRef hdr = recff_bufhdr(J), tr = hdr;
860 for (i = 0; J->base[i] != 0; i++)
861 tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, J->base[i]);
862 J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
863 }
744 UNUSED(rd); 864 UNUSED(rd);
745} 865}
746 866
747static void LJ_FASTCALL recff_table_remove(jit_State *J, RecordFFData *rd) 867static void LJ_FASTCALL recff_string_rep(jit_State *J, RecordFFData *rd)
748{ 868{
749 TRef tab = J->base[0]; 869 TRef str = lj_ir_tostr(J, J->base[0]);
750 rd->nres = 0; 870 TRef rep = lj_opt_narrow_toint(J, J->base[1]);
751 if (tref_istab(tab)) { 871 TRef hdr, tr, str2 = 0;
752 if (tref_isnil(J->base[1])) { /* Simple pop: t[#t] = nil */ 872 if (!tref_isnil(J->base[2])) {
753 TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, tab); 873 TRef sep = lj_ir_tostr(J, J->base[2]);
754 GCtab *t = tabV(&rd->argv[0]); 874 int32_t vrep = argv2int(J, &rd->argv[1]);
755 MSize len = lj_tab_len(t); 875 emitir(IRTGI(vrep > 1 ? IR_GT : IR_LE), rep, lj_ir_kint(J, 1));
756 emitir(IRTGI(len ? IR_NE : IR_EQ), trlen, lj_ir_kint(J, 0)); 876 if (vrep > 1) {
757 if (len) { 877 TRef hdr2 = recff_bufhdr(J);
758 RecordIndex ix; 878 TRef tr2 = emitir(IRT(IR_BUFPUT, IRT_P32), hdr2, sep);
759 ix.tab = tab; 879 tr2 = emitir(IRT(IR_BUFPUT, IRT_P32), tr2, str);
760 ix.key = trlen; 880 str2 = emitir(IRT(IR_BUFSTR, IRT_STR), tr2, hdr2);
761 settabV(J->L, &ix.tabv, t); 881 }
762 setintV(&ix.keyv, len); 882 }
763 ix.idxchain = 0; 883 tr = hdr = recff_bufhdr(J);
764 if (results_wanted(J) != 0) { /* Specialize load only if needed. */ 884 if (str2) {
765 ix.val = 0; 885 tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, str);
766 J->base[0] = lj_record_idx(J, &ix); /* Load previous value. */ 886 str = str2;
767 rd->nres = 1; 887 rep = emitir(IRTI(IR_ADD), rep, lj_ir_kint(J, -1));
768 /* Assumes ix.key/ix.tab is not modified for raw lj_record_idx(). */ 888 }
769 } 889 tr = lj_ir_call(J, IRCALL_lj_buf_putstr_rep, tr, str, rep);
770 ix.val = TREF_NIL; 890 J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
771 lj_record_idx(J, &ix); /* Remove value. */ 891}
892
893static void LJ_FASTCALL recff_string_op(jit_State *J, RecordFFData *rd)
894{
895 TRef str = lj_ir_tostr(J, J->base[0]);
896 TRef hdr = recff_bufhdr(J);
897 TRef tr = lj_ir_call(J, rd->data, hdr, str);
898 J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
899}
900
901static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd)
902{
903 TRef trstr = lj_ir_tostr(J, J->base[0]);
904 TRef trpat = lj_ir_tostr(J, J->base[1]);
905 TRef trlen = emitir(IRTI(IR_FLOAD), trstr, IRFL_STR_LEN);
906 TRef tr0 = lj_ir_kint(J, 0);
907 TRef trstart;
908 GCstr *str = argv2str(J, &rd->argv[0]);
909 GCstr *pat = argv2str(J, &rd->argv[1]);
910 int32_t start;
911 J->needsnap = 1;
912 if (tref_isnil(J->base[2])) {
913 trstart = lj_ir_kint(J, 1);
914 start = 1;
915 } else {
916 trstart = lj_opt_narrow_toint(J, J->base[2]);
917 start = argv2int(J, &rd->argv[2]);
918 }
919 trstart = recff_string_start(J, str, &start, trstart, trlen, tr0);
920 if ((MSize)start <= str->len) {
921 emitir(IRTGI(IR_ULE), trstart, trlen);
922 } else {
923 emitir(IRTGI(IR_UGT), trstart, trlen);
924#if LJ_52
925 J->base[0] = TREF_NIL;
926 return;
927#else
928 trstart = trlen;
929 start = str->len;
930#endif
931 }
932 /* Fixed arg or no pattern matching chars? (Specialized to pattern string.) */
933 if ((J->base[2] && tref_istruecond(J->base[3])) ||
934 (emitir(IRTG(IR_EQ, IRT_STR), trpat, lj_ir_kstr(J, pat)),
935 !lj_str_haspattern(pat))) { /* Search for fixed string. */
936 TRef trsptr = emitir(IRT(IR_STRREF, IRT_P32), trstr, trstart);
937 TRef trpptr = emitir(IRT(IR_STRREF, IRT_P32), trpat, tr0);
938 TRef trslen = emitir(IRTI(IR_SUB), trlen, trstart);
939 TRef trplen = emitir(IRTI(IR_FLOAD), trpat, IRFL_STR_LEN);
940 TRef tr = lj_ir_call(J, IRCALL_lj_str_find, trsptr, trpptr, trslen, trplen);
941 TRef trp0 = lj_ir_kkptr(J, NULL);
942 if (lj_str_find(strdata(str)+(MSize)start, strdata(pat),
943 str->len-(MSize)start, pat->len)) {
944 TRef pos;
945 emitir(IRTG(IR_NE, IRT_P32), tr, trp0);
946 pos = emitir(IRTI(IR_SUB), tr, emitir(IRT(IR_STRREF, IRT_P32), trstr, tr0));
947 J->base[0] = emitir(IRTI(IR_ADD), pos, lj_ir_kint(J, 1));
948 J->base[1] = emitir(IRTI(IR_ADD), pos, trplen);
949 rd->nres = 2;
950 } else {
951 emitir(IRTG(IR_EQ, IRT_P32), tr, trp0);
952 J->base[0] = TREF_NIL;
953 }
954 } else { /* Search for pattern. */
955 recff_nyiu(J, rd);
956 return;
957 }
958}
959
960static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
961{
962 TRef trfmt = lj_ir_tostr(J, J->base[0]);
963 GCstr *fmt = argv2str(J, &rd->argv[0]);
964 int arg = 1;
965 TRef hdr, tr;
966 FormatState fs;
967 SFormat sf;
968 /* Specialize to the format string. */
969 emitir(IRTG(IR_EQ, IRT_STR), trfmt, lj_ir_kstr(J, fmt));
970 tr = hdr = recff_bufhdr(J);
971 lj_strfmt_init(&fs, strdata(fmt), fmt->len);
972 while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) { /* Parse format. */
973 TRef tra = sf == STRFMT_LIT ? 0 : J->base[arg++];
974 TRef trsf = lj_ir_kint(J, (int32_t)sf);
975 IRCallID id;
976 switch (STRFMT_TYPE(sf)) {
977 case STRFMT_LIT:
978 tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr,
979 lj_ir_kstr(J, lj_str_new(J->L, fs.str, fs.len)));
980 break;
981 case STRFMT_INT:
982 id = IRCALL_lj_strfmt_putfnum_int;
983 handle_int:
984 if (!tref_isinteger(tra))
985 goto handle_num;
986 if (sf == STRFMT_INT) { /* Shortcut for plain %d. */
987 tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr,
988 emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_INT));
989 } else {
990#if LJ_HASFFI
991 tra = emitir(IRT(IR_CONV, IRT_U64), tra,
992 (IRT_INT|(IRT_U64<<5)|IRCONV_SEXT));
993 tr = lj_ir_call(J, IRCALL_lj_strfmt_putfxint, tr, trsf, tra);
994 lj_needsplit(J);
995#else
996 recff_nyiu(J, rd); /* Don't bother working around this NYI. */
997 return;
998#endif
772 } 999 }
773 } else { /* Complex case: remove in the middle. */ 1000 break;
774 recff_nyiu(J); 1001 case STRFMT_UINT:
1002 id = IRCALL_lj_strfmt_putfnum_uint;
1003 goto handle_int;
1004 case STRFMT_NUM:
1005 id = IRCALL_lj_strfmt_putfnum;
1006 handle_num:
1007 tra = lj_ir_tonum(J, tra);
1008 tr = lj_ir_call(J, id, tr, trsf, tra);
1009 if (LJ_SOFTFP) lj_needsplit(J);
1010 break;
1011 case STRFMT_STR:
1012 if (!tref_isstr(tra)) {
1013 recff_nyiu(J, rd); /* NYI: __tostring and non-string types for %s. */
1014 return;
1015 }
1016 if (sf == STRFMT_STR) /* Shortcut for plain %s. */
1017 tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, tra);
1018 else if ((sf & STRFMT_T_QUOTED))
1019 tr = lj_ir_call(J, IRCALL_lj_strfmt_putquoted, tr, tra);
1020 else
1021 tr = lj_ir_call(J, IRCALL_lj_strfmt_putfstr, tr, trsf, tra);
1022 break;
1023 case STRFMT_CHAR:
1024 tra = lj_opt_narrow_toint(J, tra);
1025 if (sf == STRFMT_CHAR) /* Shortcut for plain %c. */
1026 tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr,
1027 emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_CHAR));
1028 else
1029 tr = lj_ir_call(J, IRCALL_lj_strfmt_putfchar, tr, trsf, tra);
1030 break;
1031 case STRFMT_PTR: /* NYI */
1032 case STRFMT_ERR:
1033 default:
1034 recff_nyiu(J, rd);
1035 return;
775 } 1036 }
776 } /* else: Interpreter will throw. */ 1037 }
1038 J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
777} 1039}
778 1040
1041/* -- Table library fast functions ---------------------------------------- */
1042
779static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd) 1043static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd)
780{ 1044{
781 RecordIndex ix; 1045 RecordIndex ix;
@@ -792,11 +1056,49 @@ static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd)
792 ix.idxchain = 0; 1056 ix.idxchain = 0;
793 lj_record_idx(J, &ix); /* Set new value. */ 1057 lj_record_idx(J, &ix); /* Set new value. */
794 } else { /* Complex case: insert in the middle. */ 1058 } else { /* Complex case: insert in the middle. */
795 recff_nyiu(J); 1059 recff_nyiu(J, rd);
1060 return;
796 } 1061 }
797 } /* else: Interpreter will throw. */ 1062 } /* else: Interpreter will throw. */
798} 1063}
799 1064
1065static void LJ_FASTCALL recff_table_concat(jit_State *J, RecordFFData *rd)
1066{
1067 TRef tab = J->base[0];
1068 if (tref_istab(tab)) {
1069 TRef sep = !tref_isnil(J->base[1]) ?
1070 lj_ir_tostr(J, J->base[1]) : lj_ir_knull(J, IRT_STR);
1071 TRef tri = (J->base[1] && !tref_isnil(J->base[2])) ?
1072 lj_opt_narrow_toint(J, J->base[2]) : lj_ir_kint(J, 1);
1073 TRef tre = (J->base[1] && J->base[2] && !tref_isnil(J->base[3])) ?
1074 lj_opt_narrow_toint(J, J->base[3]) :
1075 lj_ir_call(J, IRCALL_lj_tab_len, tab);
1076 TRef hdr = recff_bufhdr(J);
1077 TRef tr = lj_ir_call(J, IRCALL_lj_buf_puttab, hdr, tab, sep, tri, tre);
1078 emitir(IRTG(IR_NE, IRT_PTR), tr, lj_ir_kptr(J, NULL));
1079 J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
1080 } /* else: Interpreter will throw. */
1081 UNUSED(rd);
1082}
1083
1084static void LJ_FASTCALL recff_table_new(jit_State *J, RecordFFData *rd)
1085{
1086 TRef tra = lj_opt_narrow_toint(J, J->base[0]);
1087 TRef trh = lj_opt_narrow_toint(J, J->base[1]);
1088 J->base[0] = lj_ir_call(J, IRCALL_lj_tab_new_ah, tra, trh);
1089 UNUSED(rd);
1090}
1091
1092static void LJ_FASTCALL recff_table_clear(jit_State *J, RecordFFData *rd)
1093{
1094 TRef tr = J->base[0];
1095 if (tref_istab(tr)) {
1096 rd->nres = 0;
1097 lj_ir_call(J, IRCALL_lj_tab_clear, tr);
1098 J->needsnap = 1;
1099 } /* else: Interpreter will throw. */
1100}
1101
800/* -- I/O library fast functions ------------------------------------------ */ 1102/* -- I/O library fast functions ------------------------------------------ */
801 1103
802/* Get FILE* for I/O function. Any I/O error aborts recording, so there's 1104/* Get FILE* for I/O function. Any I/O error aborts recording, so there's
@@ -832,7 +1134,10 @@ static void LJ_FASTCALL recff_io_write(jit_State *J, RecordFFData *rd)
832 TRef buf = emitir(IRT(IR_STRREF, IRT_P32), str, zero); 1134 TRef buf = emitir(IRT(IR_STRREF, IRT_P32), str, zero);
833 TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN); 1135 TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN);
834 if (tref_isk(len) && IR(tref_ref(len))->i == 1) { 1136 if (tref_isk(len) && IR(tref_ref(len))->i == 1) {
835 TRef tr = emitir(IRT(IR_XLOAD, IRT_U8), buf, IRXLOAD_READONLY); 1137 IRIns *irs = IR(tref_ref(str));
1138 TRef tr = (irs->o == IR_TOSTR && irs->op2 == IRTOSTR_CHAR) ?
1139 irs->op1 :
1140 emitir(IRT(IR_XLOAD, IRT_U8), buf, IRXLOAD_READONLY);
836 tr = lj_ir_call(J, IRCALL_fputc, tr, fp); 1141 tr = lj_ir_call(J, IRCALL_fputc, tr, fp);
837 if (results_wanted(J) != 0) /* Check result only if not ignored. */ 1142 if (results_wanted(J) != 0) /* Check result only if not ignored. */
838 emitir(IRTGI(IR_NE), tr, lj_ir_kint(J, -1)); 1143 emitir(IRTGI(IR_NE), tr, lj_ir_kint(J, -1));
@@ -854,6 +1159,28 @@ static void LJ_FASTCALL recff_io_flush(jit_State *J, RecordFFData *rd)
854 J->base[0] = TREF_TRUE; 1159 J->base[0] = TREF_TRUE;
855} 1160}
856 1161
1162/* -- Debug library fast functions ---------------------------------------- */
1163
1164static void LJ_FASTCALL recff_debug_getmetatable(jit_State *J, RecordFFData *rd)
1165{
1166 GCtab *mt;
1167 TRef mtref;
1168 TRef tr = J->base[0];
1169 if (tref_istab(tr)) {
1170 mt = tabref(tabV(&rd->argv[0])->metatable);
1171 mtref = emitir(IRT(IR_FLOAD, IRT_TAB), tr, IRFL_TAB_META);
1172 } else if (tref_isudata(tr)) {
1173 mt = tabref(udataV(&rd->argv[0])->metatable);
1174 mtref = emitir(IRT(IR_FLOAD, IRT_TAB), tr, IRFL_UDATA_META);
1175 } else {
1176 mt = tabref(basemt_obj(J2G(J), &rd->argv[0]));
1177 J->base[0] = mt ? lj_ir_ktab(J, mt) : TREF_NIL;
1178 return;
1179 }
1180 emitir(IRTG(mt ? IR_NE : IR_EQ, IRT_TAB), mtref, lj_ir_knull(J, IRT_TAB));
1181 J->base[0] = mt ? mtref : TREF_NIL;
1182}
1183
857/* -- Record calls to fast functions -------------------------------------- */ 1184/* -- Record calls to fast functions -------------------------------------- */
858 1185
859#include "lj_recdef.h" 1186#include "lj_recdef.h"
diff --git a/src/lj_frame.h b/src/lj_frame.h
index cd57be22..b9595a5a 100644
--- a/src/lj_frame.h
+++ b/src/lj_frame.h
@@ -11,7 +11,16 @@
11 11
12/* -- Lua stack frame ----------------------------------------------------- */ 12/* -- Lua stack frame ----------------------------------------------------- */
13 13
14/* Frame type markers in callee function slot (callee base-1). */ 14/* Frame type markers in LSB of PC (4-byte aligned) or delta (8-byte aligned:
15**
16** PC 00 Lua frame
17** delta 001 C frame
18** delta 010 Continuation frame
19** delta 011 Lua vararg frame
20** delta 101 cpcall() frame
21** delta 110 ff pcall() frame
22** delta 111 ff pcall() frame with active hook
23*/
15enum { 24enum {
16 FRAME_LUA, FRAME_C, FRAME_CONT, FRAME_VARG, 25 FRAME_LUA, FRAME_C, FRAME_CONT, FRAME_VARG,
17 FRAME_LUAP, FRAME_CP, FRAME_PCALL, FRAME_PCALLH 26 FRAME_LUAP, FRAME_CP, FRAME_PCALL, FRAME_PCALLH
@@ -21,9 +30,47 @@ enum {
21#define FRAME_TYPEP (FRAME_TYPE|FRAME_P) 30#define FRAME_TYPEP (FRAME_TYPE|FRAME_P)
22 31
23/* Macros to access and modify Lua frames. */ 32/* Macros to access and modify Lua frames. */
33#if LJ_FR2
34/* Two-slot frame info, required for 64 bit PC/GCRef:
35**
36** base-2 base-1 | base base+1 ...
37** [func PC/delta/ft] | [slots ...]
38** ^-- frame | ^-- base ^-- top
39**
40** Continuation frames:
41**
42** base-4 base-3 base-2 base-1 | base base+1 ...
43** [cont PC ] [func PC/delta/ft] | [slots ...]
44** ^-- frame | ^-- base ^-- top
45*/
46#define frame_gc(f) (gcval((f)-1))
47#define frame_ftsz(f) ((ptrdiff_t)(f)->ftsz)
48#define frame_pc(f) ((const BCIns *)frame_ftsz(f))
49#define setframe_gc(f, p, tp) (setgcVraw((f)-1, (p), (tp)))
50#define setframe_ftsz(f, sz) ((f)->ftsz = (sz))
51#define setframe_pc(f, pc) ((f)->ftsz = (int64_t)(intptr_t)(pc))
52#else
53/* One-slot frame info, sufficient for 32 bit PC/GCRef:
54**
55** base-1 | base base+1 ...
56** lo hi |
57** [func | PC/delta/ft] | [slots ...]
58** ^-- frame | ^-- base ^-- top
59**
60** Continuation frames:
61**
62** base-2 base-1 | base base+1 ...
63** lo hi lo hi |
64** [cont | PC] [func | PC/delta/ft] | [slots ...]
65** ^-- frame | ^-- base ^-- top
66*/
24#define frame_gc(f) (gcref((f)->fr.func)) 67#define frame_gc(f) (gcref((f)->fr.func))
25#define frame_func(f) (&frame_gc(f)->fn) 68#define frame_ftsz(f) ((ptrdiff_t)(f)->fr.tp.ftsz)
26#define frame_ftsz(f) ((f)->fr.tp.ftsz) 69#define frame_pc(f) (mref((f)->fr.tp.pcr, const BCIns))
70#define setframe_gc(f, p, tp) (setgcref((f)->fr.func, (p)), UNUSED(tp))
71#define setframe_ftsz(f, sz) ((f)->fr.tp.ftsz = (int32_t)(sz))
72#define setframe_pc(f, pc) (setmref((f)->fr.tp.pcr, (pc)))
73#endif
27 74
28#define frame_type(f) (frame_ftsz(f) & FRAME_TYPE) 75#define frame_type(f) (frame_ftsz(f) & FRAME_TYPE)
29#define frame_typep(f) (frame_ftsz(f) & FRAME_TYPEP) 76#define frame_typep(f) (frame_ftsz(f) & FRAME_TYPEP)
@@ -33,27 +80,36 @@ enum {
33#define frame_isvarg(f) (frame_typep(f) == FRAME_VARG) 80#define frame_isvarg(f) (frame_typep(f) == FRAME_VARG)
34#define frame_ispcall(f) ((frame_ftsz(f) & 6) == FRAME_PCALL) 81#define frame_ispcall(f) ((frame_ftsz(f) & 6) == FRAME_PCALL)
35 82
36#define frame_pc(f) (mref((f)->fr.tp.pcr, const BCIns)) 83#define frame_func(f) (&frame_gc(f)->fn)
84#define frame_delta(f) (frame_ftsz(f) >> 3)
85#define frame_sized(f) (frame_ftsz(f) & ~FRAME_TYPEP)
86
87enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
88
89#if LJ_FR2
90#define frame_contpc(f) (frame_pc((f)-2))
91#define frame_contv(f) (((f)-3)->u64)
92#else
37#define frame_contpc(f) (frame_pc((f)-1)) 93#define frame_contpc(f) (frame_pc((f)-1))
38#if LJ_64 94#define frame_contv(f) (((f)-1)->u32.lo)
95#endif
96#if LJ_FR2
97#define frame_contf(f) ((ASMFunction)(uintptr_t)((f)-3)->u64)
98#elif LJ_64
39#define frame_contf(f) \ 99#define frame_contf(f) \
40 ((ASMFunction)(void *)((intptr_t)lj_vm_asm_begin + \ 100 ((ASMFunction)(void *)((intptr_t)lj_vm_asm_begin + \
41 (intptr_t)(int32_t)((f)-1)->u32.lo)) 101 (intptr_t)(int32_t)((f)-1)->u32.lo))
42#else 102#else
43#define frame_contf(f) ((ASMFunction)gcrefp(((f)-1)->gcr, void)) 103#define frame_contf(f) ((ASMFunction)gcrefp(((f)-1)->gcr, void))
44#endif 104#endif
45#define frame_delta(f) (frame_ftsz(f) >> 3) 105#define frame_iscont_fficb(f) \
46#define frame_sized(f) (frame_ftsz(f) & ~FRAME_TYPEP) 106 (LJ_HASFFI && frame_contv(f) == LJ_CONT_FFI_CALLBACK)
47 107
48#define frame_prevl(f) ((f) - (1+bc_a(frame_pc(f)[-1]))) 108#define frame_prevl(f) ((f) - (1+LJ_FR2+bc_a(frame_pc(f)[-1])))
49#define frame_prevd(f) ((TValue *)((char *)(f) - frame_sized(f))) 109#define frame_prevd(f) ((TValue *)((char *)(f) - frame_sized(f)))
50#define frame_prev(f) (frame_islua(f)?frame_prevl(f):frame_prevd(f)) 110#define frame_prev(f) (frame_islua(f)?frame_prevl(f):frame_prevd(f))
51/* Note: this macro does not skip over FRAME_VARG. */ 111/* Note: this macro does not skip over FRAME_VARG. */
52 112
53#define setframe_pc(f, pc) (setmref((f)->fr.tp.pcr, (pc)))
54#define setframe_ftsz(f, sz) ((f)->fr.tp.ftsz = (sz))
55#define setframe_gc(f, p) (setgcref((f)->fr.func, (p)))
56
57/* -- C stack frame ------------------------------------------------------- */ 113/* -- C stack frame ------------------------------------------------------- */
58 114
59/* Macros to access and modify the C stack frame chain. */ 115/* Macros to access and modify the C stack frame chain. */
@@ -103,6 +159,15 @@ enum {
103#define CFRAME_SIZE 64 159#define CFRAME_SIZE 64
104#endif 160#endif
105#define CFRAME_SHIFT_MULTRES 3 161#define CFRAME_SHIFT_MULTRES 3
162#elif LJ_TARGET_ARM64
163#define CFRAME_OFS_ERRF 196
164#define CFRAME_OFS_NRES 200
165#define CFRAME_OFS_PREV 160
166#define CFRAME_OFS_L 176
167#define CFRAME_OFS_PC 168
168#define CFRAME_OFS_MULTRES 192
169#define CFRAME_SIZE 208
170#define CFRAME_SHIFT_MULTRES 3
106#elif LJ_TARGET_PPC 171#elif LJ_TARGET_PPC
107#if LJ_TARGET_XBOX360 172#if LJ_TARGET_XBOX360
108#define CFRAME_OFS_ERRF 424 173#define CFRAME_OFS_ERRF 424
@@ -113,7 +178,7 @@ enum {
113#define CFRAME_OFS_MULTRES 408 178#define CFRAME_OFS_MULTRES 408
114#define CFRAME_SIZE 384 179#define CFRAME_SIZE 384
115#define CFRAME_SHIFT_MULTRES 3 180#define CFRAME_SHIFT_MULTRES 3
116#elif LJ_ARCH_PPC64 181#elif LJ_ARCH_PPC32ON64
117#define CFRAME_OFS_ERRF 472 182#define CFRAME_OFS_ERRF 472
118#define CFRAME_OFS_NRES 468 183#define CFRAME_OFS_NRES 468
119#define CFRAME_OFS_PREV 448 184#define CFRAME_OFS_PREV 448
@@ -132,15 +197,6 @@ enum {
132#define CFRAME_SIZE 272 197#define CFRAME_SIZE 272
133#define CFRAME_SHIFT_MULTRES 3 198#define CFRAME_SHIFT_MULTRES 3
134#endif 199#endif
135#elif LJ_TARGET_PPCSPE
136#define CFRAME_OFS_ERRF 28
137#define CFRAME_OFS_NRES 24
138#define CFRAME_OFS_PREV 20
139#define CFRAME_OFS_L 16
140#define CFRAME_OFS_PC 12
141#define CFRAME_OFS_MULTRES 8
142#define CFRAME_SIZE 184
143#define CFRAME_SHIFT_MULTRES 3
144#elif LJ_TARGET_MIPS 200#elif LJ_TARGET_MIPS
145#define CFRAME_OFS_ERRF 124 201#define CFRAME_OFS_ERRF 124
146#define CFRAME_OFS_NRES 120 202#define CFRAME_OFS_NRES 120
diff --git a/src/lj_gc.c b/src/lj_gc.c
index b498abaa..99d664aa 100644
--- a/src/lj_gc.c
+++ b/src/lj_gc.c
@@ -12,6 +12,7 @@
12#include "lj_obj.h" 12#include "lj_obj.h"
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_buf.h"
15#include "lj_str.h" 16#include "lj_str.h"
16#include "lj_tab.h" 17#include "lj_tab.h"
17#include "lj_func.h" 18#include "lj_func.h"
@@ -267,12 +268,12 @@ static MSize gc_traverse_frames(global_State *g, lua_State *th)
267{ 268{
268 TValue *frame, *top = th->top-1, *bot = tvref(th->stack); 269 TValue *frame, *top = th->top-1, *bot = tvref(th->stack);
269 /* Note: extra vararg frame not skipped, marks function twice (harmless). */ 270 /* Note: extra vararg frame not skipped, marks function twice (harmless). */
270 for (frame = th->base-1; frame > bot; frame = frame_prev(frame)) { 271 for (frame = th->base-1; frame > bot+LJ_FR2; frame = frame_prev(frame)) {
271 GCfunc *fn = frame_func(frame); 272 GCfunc *fn = frame_func(frame);
272 TValue *ftop = frame; 273 TValue *ftop = frame;
273 if (isluafunc(fn)) ftop += funcproto(fn)->framesize; 274 if (isluafunc(fn)) ftop += funcproto(fn)->framesize;
274 if (ftop > top) top = ftop; 275 if (ftop > top) top = ftop;
275 gc_markobj(g, fn); /* Need to mark hidden function (or L). */ 276 if (!LJ_FR2) gc_markobj(g, fn); /* Need to mark hidden function (or L). */
276 } 277 }
277 top++; /* Correct bias of -1 (frame == base-1). */ 278 top++; /* Correct bias of -1 (frame == base-1). */
278 if (top > tvref(th->maxstack)) top = tvref(th->maxstack); 279 if (top > tvref(th->maxstack)) top = tvref(th->maxstack);
@@ -283,7 +284,7 @@ static MSize gc_traverse_frames(global_State *g, lua_State *th)
283static void gc_traverse_thread(global_State *g, lua_State *th) 284static void gc_traverse_thread(global_State *g, lua_State *th)
284{ 285{
285 TValue *o, *top = th->top; 286 TValue *o, *top = th->top;
286 for (o = tvref(th->stack)+1; o < top; o++) 287 for (o = tvref(th->stack)+1+LJ_FR2; o < top; o++)
287 gc_marktv(g, o); 288 gc_marktv(g, o);
288 if (g->gc.state == GCSatomic) { 289 if (g->gc.state == GCSatomic) {
289 top = tvref(th->stack) + th->stacksize; 290 top = tvref(th->stack) + th->stacksize;
@@ -348,15 +349,6 @@ static size_t gc_propagate_gray(global_State *g)
348 349
349/* -- Sweep phase --------------------------------------------------------- */ 350/* -- Sweep phase --------------------------------------------------------- */
350 351
351/* Try to shrink some common data structures. */
352static void gc_shrink(global_State *g, lua_State *L)
353{
354 if (g->strnum <= (g->strmask >> 2) && g->strmask > LJ_MIN_STRTAB*2-1)
355 lj_str_resize(L, g->strmask >> 1); /* Shrink string table. */
356 if (g->tmpbuf.sz > LJ_MIN_SBUF*2)
357 lj_str_resizebuf(L, &g->tmpbuf, g->tmpbuf.sz >> 1); /* Shrink temp buf. */
358}
359
360/* Type of GC free functions. */ 352/* Type of GC free functions. */
361typedef void (LJ_FASTCALL *GCFreeFunc)(global_State *g, GCobj *o); 353typedef void (LJ_FASTCALL *GCFreeFunc)(global_State *g, GCobj *o);
362 354
@@ -382,7 +374,7 @@ static const GCFreeFunc gc_freefunc[] = {
382}; 374};
383 375
384/* Full sweep of a GC list. */ 376/* Full sweep of a GC list. */
385#define gc_fullsweep(g, p) gc_sweep(g, (p), LJ_MAX_MEM) 377#define gc_fullsweep(g, p) gc_sweep(g, (p), ~(uint32_t)0)
386 378
387/* Partial sweep of a GC list. */ 379/* Partial sweep of a GC list. */
388static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim) 380static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim)
@@ -460,17 +452,18 @@ static void gc_call_finalizer(global_State *g, lua_State *L,
460{ 452{
461 /* Save and restore lots of state around the __gc callback. */ 453 /* Save and restore lots of state around the __gc callback. */
462 uint8_t oldh = hook_save(g); 454 uint8_t oldh = hook_save(g);
463 MSize oldt = g->gc.threshold; 455 GCSize oldt = g->gc.threshold;
464 int errcode; 456 int errcode;
465 TValue *top; 457 TValue *top;
466 lj_trace_abort(g); 458 lj_trace_abort(g);
467 top = L->top;
468 L->top = top+2;
469 hook_entergc(g); /* Disable hooks and new traces during __gc. */ 459 hook_entergc(g); /* Disable hooks and new traces during __gc. */
470 g->gc.threshold = LJ_MAX_MEM; /* Prevent GC steps. */ 460 g->gc.threshold = LJ_MAX_MEM; /* Prevent GC steps. */
471 copyTV(L, top, mo); 461 top = L->top;
472 setgcV(L, top+1, o, ~o->gch.gct); 462 copyTV(L, top++, mo);
473 errcode = lj_vm_pcall(L, top+1, 1+0, -1); /* Stack: |mo|o| -> | */ 463 if (LJ_FR2) setnilV(top++);
464 setgcV(L, top, o, ~o->gch.gct);
465 L->top = top+1;
466 errcode = lj_vm_pcall(L, top, 1+0, -1); /* Stack: |mo|o| -> | */
474 hook_restore(g, oldh); 467 hook_restore(g, oldh);
475 g->gc.threshold = oldt; /* Restore GC threshold. */ 468 g->gc.threshold = oldt; /* Restore GC threshold. */
476 if (errcode) 469 if (errcode)
@@ -483,7 +476,7 @@ static void gc_finalize(lua_State *L)
483 global_State *g = G(L); 476 global_State *g = G(L);
484 GCobj *o = gcnext(gcref(g->gc.mmudata)); 477 GCobj *o = gcnext(gcref(g->gc.mmudata));
485 cTValue *mo; 478 cTValue *mo;
486 lua_assert(gcref(g->jit_L) == NULL); /* Must not be called on trace. */ 479 lua_assert(tvref(g->jit_base) == NULL); /* Must not be called on trace. */
487 /* Unchain from list of userdata to be finalized. */ 480 /* Unchain from list of userdata to be finalized. */
488 if (o == gcref(g->gc.mmudata)) 481 if (o == gcref(g->gc.mmudata))
489 setgcrefnull(g->gc.mmudata); 482 setgcrefnull(g->gc.mmudata);
@@ -592,11 +585,13 @@ static void atomic(global_State *g, lua_State *L)
592 /* All marking done, clear weak tables. */ 585 /* All marking done, clear weak tables. */
593 gc_clearweak(gcref(g->gc.weak)); 586 gc_clearweak(gcref(g->gc.weak));
594 587
588 lj_buf_shrink(L, &g->tmpbuf); /* Shrink temp buffer. */
589
595 /* Prepare for sweep phase. */ 590 /* Prepare for sweep phase. */
596 g->gc.currentwhite = (uint8_t)otherwhite(g); /* Flip current white. */ 591 g->gc.currentwhite = (uint8_t)otherwhite(g); /* Flip current white. */
597 g->strempty.marked = g->gc.currentwhite; 592 g->strempty.marked = g->gc.currentwhite;
598 setmref(g->gc.sweep, &g->gc.root); 593 setmref(g->gc.sweep, &g->gc.root);
599 g->gc.estimate = g->gc.total - (MSize)udsize; /* Initial estimate. */ 594 g->gc.estimate = g->gc.total - (GCSize)udsize; /* Initial estimate. */
600} 595}
601 596
602/* GC state machine. Returns a cost estimate for each step performed. */ 597/* GC state machine. Returns a cost estimate for each step performed. */
@@ -613,14 +608,14 @@ static size_t gc_onestep(lua_State *L)
613 g->gc.state = GCSatomic; /* End of mark phase. */ 608 g->gc.state = GCSatomic; /* End of mark phase. */
614 return 0; 609 return 0;
615 case GCSatomic: 610 case GCSatomic:
616 if (gcref(g->jit_L)) /* Don't run atomic phase on trace. */ 611 if (tvref(g->jit_base)) /* Don't run atomic phase on trace. */
617 return LJ_MAX_MEM; 612 return LJ_MAX_MEM;
618 atomic(g, L); 613 atomic(g, L);
619 g->gc.state = GCSsweepstring; /* Start of sweep phase. */ 614 g->gc.state = GCSsweepstring; /* Start of sweep phase. */
620 g->gc.sweepstr = 0; 615 g->gc.sweepstr = 0;
621 return 0; 616 return 0;
622 case GCSsweepstring: { 617 case GCSsweepstring: {
623 MSize old = g->gc.total; 618 GCSize old = g->gc.total;
624 gc_fullsweep(g, &g->strhash[g->gc.sweepstr++]); /* Sweep one chain. */ 619 gc_fullsweep(g, &g->strhash[g->gc.sweepstr++]); /* Sweep one chain. */
625 if (g->gc.sweepstr > g->strmask) 620 if (g->gc.sweepstr > g->strmask)
626 g->gc.state = GCSsweep; /* All string hash chains sweeped. */ 621 g->gc.state = GCSsweep; /* All string hash chains sweeped. */
@@ -629,12 +624,13 @@ static size_t gc_onestep(lua_State *L)
629 return GCSWEEPCOST; 624 return GCSWEEPCOST;
630 } 625 }
631 case GCSsweep: { 626 case GCSsweep: {
632 MSize old = g->gc.total; 627 GCSize old = g->gc.total;
633 setmref(g->gc.sweep, gc_sweep(g, mref(g->gc.sweep, GCRef), GCSWEEPMAX)); 628 setmref(g->gc.sweep, gc_sweep(g, mref(g->gc.sweep, GCRef), GCSWEEPMAX));
634 lua_assert(old >= g->gc.total); 629 lua_assert(old >= g->gc.total);
635 g->gc.estimate -= old - g->gc.total; 630 g->gc.estimate -= old - g->gc.total;
636 if (gcref(*mref(g->gc.sweep, GCRef)) == NULL) { 631 if (gcref(*mref(g->gc.sweep, GCRef)) == NULL) {
637 gc_shrink(g, L); 632 if (g->strnum <= (g->strmask >> 2) && g->strmask > LJ_MIN_STRTAB*2-1)
633 lj_str_resize(L, g->strmask >> 1); /* Shrink string table. */
638 if (gcref(g->gc.mmudata)) { /* Need any finalizations? */ 634 if (gcref(g->gc.mmudata)) { /* Need any finalizations? */
639 g->gc.state = GCSfinalize; 635 g->gc.state = GCSfinalize;
640#if LJ_HASFFI 636#if LJ_HASFFI
@@ -649,7 +645,7 @@ static size_t gc_onestep(lua_State *L)
649 } 645 }
650 case GCSfinalize: 646 case GCSfinalize:
651 if (gcref(g->gc.mmudata) != NULL) { 647 if (gcref(g->gc.mmudata) != NULL) {
652 if (gcref(g->jit_L)) /* Don't call finalizers on trace. */ 648 if (tvref(g->jit_base)) /* Don't call finalizers on trace. */
653 return LJ_MAX_MEM; 649 return LJ_MAX_MEM;
654 gc_finalize(L); /* Finalize one userdata object. */ 650 gc_finalize(L); /* Finalize one userdata object. */
655 if (g->gc.estimate > GCFINALIZECOST) 651 if (g->gc.estimate > GCFINALIZECOST)
@@ -672,7 +668,7 @@ static size_t gc_onestep(lua_State *L)
672int LJ_FASTCALL lj_gc_step(lua_State *L) 668int LJ_FASTCALL lj_gc_step(lua_State *L)
673{ 669{
674 global_State *g = G(L); 670 global_State *g = G(L);
675 MSize lim; 671 GCSize lim;
676 int32_t ostate = g->vmstate; 672 int32_t ostate = g->vmstate;
677 setvmstate(g, GC); 673 setvmstate(g, GC);
678 lim = (GCSTEPSIZE/100) * g->gc.stepmul; 674 lim = (GCSTEPSIZE/100) * g->gc.stepmul;
@@ -681,13 +677,13 @@ int LJ_FASTCALL lj_gc_step(lua_State *L)
681 if (g->gc.total > g->gc.threshold) 677 if (g->gc.total > g->gc.threshold)
682 g->gc.debt += g->gc.total - g->gc.threshold; 678 g->gc.debt += g->gc.total - g->gc.threshold;
683 do { 679 do {
684 lim -= (MSize)gc_onestep(L); 680 lim -= (GCSize)gc_onestep(L);
685 if (g->gc.state == GCSpause) { 681 if (g->gc.state == GCSpause) {
686 g->gc.threshold = (g->gc.estimate/100) * g->gc.pause; 682 g->gc.threshold = (g->gc.estimate/100) * g->gc.pause;
687 g->vmstate = ostate; 683 g->vmstate = ostate;
688 return 1; /* Finished a GC cycle. */ 684 return 1; /* Finished a GC cycle. */
689 } 685 }
690 } while ((int32_t)lim > 0); 686 } while (sizeof(lim) == 8 ? ((int64_t)lim > 0) : ((int32_t)lim > 0));
691 if (g->gc.debt < GCSTEPSIZE) { 687 if (g->gc.debt < GCSTEPSIZE) {
692 g->gc.threshold = g->gc.total + GCSTEPSIZE; 688 g->gc.threshold = g->gc.total + GCSTEPSIZE;
693 g->vmstate = ostate; 689 g->vmstate = ostate;
@@ -711,8 +707,8 @@ void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L)
711/* Perform multiple GC steps. Called from JIT-compiled code. */ 707/* Perform multiple GC steps. Called from JIT-compiled code. */
712int LJ_FASTCALL lj_gc_step_jit(global_State *g, MSize steps) 708int LJ_FASTCALL lj_gc_step_jit(global_State *g, MSize steps)
713{ 709{
714 lua_State *L = gco2th(gcref(g->jit_L)); 710 lua_State *L = gco2th(gcref(g->cur_L));
715 L->base = mref(G(L)->jit_base, TValue); 711 L->base = tvref(G(L)->jit_base);
716 L->top = curr_topL(L); 712 L->top = curr_topL(L);
717 while (steps-- > 0 && lj_gc_step(L) == 0) 713 while (steps-- > 0 && lj_gc_step(L) == 0)
718 ; 714 ;
@@ -806,7 +802,7 @@ void lj_gc_barriertrace(global_State *g, uint32_t traceno)
806/* -- Allocator ----------------------------------------------------------- */ 802/* -- Allocator ----------------------------------------------------------- */
807 803
808/* Call pluggable memory allocator to allocate or resize a fragment. */ 804/* Call pluggable memory allocator to allocate or resize a fragment. */
809void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz) 805void *lj_mem_realloc(lua_State *L, void *p, GCSize osz, GCSize nsz)
810{ 806{
811 global_State *g = G(L); 807 global_State *g = G(L);
812 lua_assert((osz == 0) == (p == NULL)); 808 lua_assert((osz == 0) == (p == NULL));
@@ -814,19 +810,19 @@ void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz)
814 if (p == NULL && nsz > 0) 810 if (p == NULL && nsz > 0)
815 lj_err_mem(L); 811 lj_err_mem(L);
816 lua_assert((nsz == 0) == (p == NULL)); 812 lua_assert((nsz == 0) == (p == NULL));
817 lua_assert(checkptr32(p)); 813 lua_assert(checkptrGC(p));
818 g->gc.total = (g->gc.total - osz) + nsz; 814 g->gc.total = (g->gc.total - osz) + nsz;
819 return p; 815 return p;
820} 816}
821 817
822/* Allocate new GC object and link it to the root set. */ 818/* Allocate new GC object and link it to the root set. */
823void * LJ_FASTCALL lj_mem_newgco(lua_State *L, MSize size) 819void * LJ_FASTCALL lj_mem_newgco(lua_State *L, GCSize size)
824{ 820{
825 global_State *g = G(L); 821 global_State *g = G(L);
826 GCobj *o = (GCobj *)g->allocf(g->allocd, NULL, 0, size); 822 GCobj *o = (GCobj *)g->allocf(g->allocd, NULL, 0, size);
827 if (o == NULL) 823 if (o == NULL)
828 lj_err_mem(L); 824 lj_err_mem(L);
829 lua_assert(checkptr32(o)); 825 lua_assert(checkptrGC(o));
830 g->gc.total += size; 826 g->gc.total += size;
831 setgcrefr(o->gch.nextgc, g->gc.root); 827 setgcrefr(o->gch.nextgc, g->gc.root);
832 setgcref(g->gc.root, o); 828 setgcref(g->gc.root, o);
diff --git a/src/lj_gc.h b/src/lj_gc.h
index ba061bc5..847eb783 100644
--- a/src/lj_gc.h
+++ b/src/lj_gc.h
@@ -107,8 +107,8 @@ static LJ_AINLINE void lj_gc_barrierback(global_State *g, GCtab *t)
107 lj_gc_barrierf(G(L), obj2gco(p), obj2gco(o)); } 107 lj_gc_barrierf(G(L), obj2gco(p), obj2gco(o)); }
108 108
109/* Allocator. */ 109/* Allocator. */
110LJ_FUNC void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz); 110LJ_FUNC void *lj_mem_realloc(lua_State *L, void *p, GCSize osz, GCSize nsz);
111LJ_FUNC void * LJ_FASTCALL lj_mem_newgco(lua_State *L, MSize size); 111LJ_FUNC void * LJ_FASTCALL lj_mem_newgco(lua_State *L, GCSize size);
112LJ_FUNC void *lj_mem_grow(lua_State *L, void *p, 112LJ_FUNC void *lj_mem_grow(lua_State *L, void *p,
113 MSize *szp, MSize lim, MSize esz); 113 MSize *szp, MSize lim, MSize esz);
114 114
@@ -116,13 +116,13 @@ LJ_FUNC void *lj_mem_grow(lua_State *L, void *p,
116 116
117static LJ_AINLINE void lj_mem_free(global_State *g, void *p, size_t osize) 117static LJ_AINLINE void lj_mem_free(global_State *g, void *p, size_t osize)
118{ 118{
119 g->gc.total -= (MSize)osize; 119 g->gc.total -= (GCSize)osize;
120 g->allocf(g->allocd, p, osize, 0); 120 g->allocf(g->allocd, p, osize, 0);
121} 121}
122 122
123#define lj_mem_newvec(L, n, t) ((t *)lj_mem_new(L, (MSize)((n)*sizeof(t)))) 123#define lj_mem_newvec(L, n, t) ((t *)lj_mem_new(L, (GCSize)((n)*sizeof(t))))
124#define lj_mem_reallocvec(L, p, on, n, t) \ 124#define lj_mem_reallocvec(L, p, on, n, t) \
125 ((p) = (t *)lj_mem_realloc(L, p, (on)*sizeof(t), (MSize)((n)*sizeof(t)))) 125 ((p) = (t *)lj_mem_realloc(L, p, (on)*sizeof(t), (GCSize)((n)*sizeof(t))))
126#define lj_mem_growvec(L, p, n, m, t) \ 126#define lj_mem_growvec(L, p, n, m, t) \
127 ((p) = (t *)lj_mem_grow(L, (p), &(n), (m), (MSize)sizeof(t))) 127 ((p) = (t *)lj_mem_grow(L, (p), &(n), (m), (MSize)sizeof(t)))
128#define lj_mem_freevec(g, p, n, t) lj_mem_free(g, (p), (n)*sizeof(t)) 128#define lj_mem_freevec(g, p, n, t) lj_mem_free(g, (p), (n)*sizeof(t))
diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c
index d3f3e611..c289cd8e 100644
--- a/src/lj_gdbjit.c
+++ b/src/lj_gdbjit.c
@@ -14,6 +14,8 @@
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_debug.h" 15#include "lj_debug.h"
16#include "lj_frame.h" 16#include "lj_frame.h"
17#include "lj_buf.h"
18#include "lj_strfmt.h"
17#include "lj_jit.h" 19#include "lj_jit.h"
18#include "lj_dispatch.h" 20#include "lj_dispatch.h"
19 21
@@ -428,16 +430,6 @@ static void gdbjit_catnum(GDBJITctx *ctx, uint32_t n)
428 *ctx->p++ = '0' + n; 430 *ctx->p++ = '0' + n;
429} 431}
430 432
431/* Add a ULEB128 value. */
432static void gdbjit_uleb128(GDBJITctx *ctx, uint32_t v)
433{
434 uint8_t *p = ctx->p;
435 for (; v >= 0x80; v >>= 7)
436 *p++ = (uint8_t)((v & 0x7f) | 0x80);
437 *p++ = (uint8_t)v;
438 ctx->p = p;
439}
440
441/* Add a SLEB128 value. */ 433/* Add a SLEB128 value. */
442static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v) 434static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v)
443{ 435{
@@ -454,7 +446,7 @@ static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v)
454#define DU16(x) (*(uint16_t *)p = (x), p += 2) 446#define DU16(x) (*(uint16_t *)p = (x), p += 2)
455#define DU32(x) (*(uint32_t *)p = (x), p += 4) 447#define DU32(x) (*(uint32_t *)p = (x), p += 4)
456#define DADDR(x) (*(uintptr_t *)p = (x), p += sizeof(uintptr_t)) 448#define DADDR(x) (*(uintptr_t *)p = (x), p += sizeof(uintptr_t))
457#define DUV(x) (ctx->p = p, gdbjit_uleb128(ctx, (x)), p = ctx->p) 449#define DUV(x) (p = (uint8_t *)lj_strfmt_wuleb128((char *)p, (x)))
458#define DSV(x) (ctx->p = p, gdbjit_sleb128(ctx, (x)), p = ctx->p) 450#define DSV(x) (ctx->p = p, gdbjit_sleb128(ctx, (x)), p = ctx->p)
459#define DSTR(str) (ctx->p = p, gdbjit_strz(ctx, (str)), p = ctx->p) 451#define DSTR(str) (ctx->p = p, gdbjit_strz(ctx, (str)), p = ctx->p)
460#define DALIGNNOP(s) while ((uintptr_t)p & ((s)-1)) *p++ = DW_CFA_nop 452#define DALIGNNOP(s) while ((uintptr_t)p & ((s)-1)) *p++ = DW_CFA_nop
diff --git a/src/lj_ir.c b/src/lj_ir.c
index 439f3fc3..9682e05e 100644
--- a/src/lj_ir.c
+++ b/src/lj_ir.c
@@ -15,6 +15,7 @@
15#if LJ_HASJIT 15#if LJ_HASJIT
16 16
17#include "lj_gc.h" 17#include "lj_gc.h"
18#include "lj_buf.h"
18#include "lj_str.h" 19#include "lj_str.h"
19#include "lj_tab.h" 20#include "lj_tab.h"
20#include "lj_ir.h" 21#include "lj_ir.h"
@@ -29,6 +30,7 @@
29#endif 30#endif
30#include "lj_vm.h" 31#include "lj_vm.h"
31#include "lj_strscan.h" 32#include "lj_strscan.h"
33#include "lj_strfmt.h"
32#include "lj_lib.h" 34#include "lj_lib.h"
33 35
34/* Some local macros to save typing. Undef'd at the end. */ 36/* Some local macros to save typing. Undef'd at the end. */
@@ -251,7 +253,7 @@ TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv)
251 goto found; 253 goto found;
252 ref = ir_nextk(J); 254 ref = ir_nextk(J);
253 ir = IR(ref); 255 ir = IR(ref);
254 lua_assert(checkptr32(tv)); 256 lua_assert(checkptrGC(tv));
255 setmref(ir->ptr, tv); 257 setmref(ir->ptr, tv);
256 ir->t.irt = t; 258 ir->t.irt = t;
257 ir->o = op; 259 ir->o = op;
@@ -305,6 +307,7 @@ TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t)
305{ 307{
306 IRIns *ir, *cir = J->cur.ir; 308 IRIns *ir, *cir = J->cur.ir;
307 IRRef ref; 309 IRRef ref;
310 lua_assert(!LJ_GC64); /* TODO_GC64: major changes required. */
308 lua_assert(!isdead(J2G(J), o)); 311 lua_assert(!isdead(J2G(J), o));
309 for (ref = J->chain[IR_KGC]; ref; ref = cir[ref].prev) 312 for (ref = J->chain[IR_KGC]; ref; ref = cir[ref].prev)
310 if (ir_kgc(&cir[ref]) == o) 313 if (ir_kgc(&cir[ref]) == o)
@@ -390,7 +393,7 @@ void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir)
390 UNUSED(L); 393 UNUSED(L);
391 lua_assert(ir->o != IR_KSLOT); /* Common mistake. */ 394 lua_assert(ir->o != IR_KSLOT); /* Common mistake. */
392 switch (ir->o) { 395 switch (ir->o) {
393 case IR_KPRI: setitype(tv, irt_toitype(ir->t)); break; 396 case IR_KPRI: setpriV(tv, irt_toitype(ir->t)); break;
394 case IR_KINT: setintV(tv, ir->i); break; 397 case IR_KINT: setintV(tv, ir->i); break;
395 case IR_KGC: setgcV(L, tv, ir_kgc(ir), irt_toitype(ir->t)); break; 398 case IR_KGC: setgcV(L, tv, ir_kgc(ir), irt_toitype(ir->t)); break;
396 case IR_KPTR: case IR_KKPTR: case IR_KNULL: 399 case IR_KPTR: case IR_KKPTR: case IR_KNULL:
@@ -443,7 +446,8 @@ TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr)
443 if (!tref_isstr(tr)) { 446 if (!tref_isstr(tr)) {
444 if (!tref_isnumber(tr)) 447 if (!tref_isnumber(tr))
445 lj_trace_err(J, LJ_TRERR_BADTYPE); 448 lj_trace_err(J, LJ_TRERR_BADTYPE);
446 tr = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0); 449 tr = emitir(IRT(IR_TOSTR, IRT_STR), tr,
450 tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT);
447 } 451 }
448 return tr; 452 return tr;
449} 453}
diff --git a/src/lj_ir.h b/src/lj_ir.h
index 8126482e..56e19774 100644
--- a/src/lj_ir.h
+++ b/src/lj_ir.h
@@ -40,6 +40,7 @@
40 _(USE, S , ref, ___) \ 40 _(USE, S , ref, ___) \
41 _(PHI, S , ref, ref) \ 41 _(PHI, S , ref, ref) \
42 _(RENAME, S , ref, lit) \ 42 _(RENAME, S , ref, lit) \
43 _(PROF, S , ___, ___) \
43 \ 44 \
44 /* Constants. */ \ 45 /* Constants. */ \
45 _(KPRI, N , ___, ___) \ 46 _(KPRI, N , ___, ___) \
@@ -96,6 +97,7 @@
96 _(UREFC, LW, ref, lit) \ 97 _(UREFC, LW, ref, lit) \
97 _(FREF, R , ref, lit) \ 98 _(FREF, R , ref, lit) \
98 _(STRREF, N , ref, ref) \ 99 _(STRREF, N , ref, ref) \
100 _(LREF, L , ___, ___) \
99 \ 101 \
100 /* Loads and Stores. These must be in the same order. */ \ 102 /* Loads and Stores. These must be in the same order. */ \
101 _(ALOAD, L , ref, ___) \ 103 _(ALOAD, L , ref, ___) \
@@ -120,6 +122,11 @@
120 _(CNEW, AW, ref, ref) \ 122 _(CNEW, AW, ref, ref) \
121 _(CNEWI, NW, ref, ref) /* CSE is ok, not marked as A. */ \ 123 _(CNEWI, NW, ref, ref) /* CSE is ok, not marked as A. */ \
122 \ 124 \
125 /* Buffer operations. */ \
126 _(BUFHDR, L , ref, lit) \
127 _(BUFPUT, L , ref, ref) \
128 _(BUFSTR, A , ref, ref) \
129 \
123 /* Barriers. */ \ 130 /* Barriers. */ \
124 _(TBAR, S , ref, ___) \ 131 _(TBAR, S , ref, ___) \
125 _(OBAR, S , ref, ref) \ 132 _(OBAR, S , ref, ref) \
@@ -128,11 +135,12 @@
128 /* Type conversions. */ \ 135 /* Type conversions. */ \
129 _(CONV, NW, ref, lit) \ 136 _(CONV, NW, ref, lit) \
130 _(TOBIT, N , ref, ref) \ 137 _(TOBIT, N , ref, ref) \
131 _(TOSTR, N , ref, ___) \ 138 _(TOSTR, N , ref, lit) \
132 _(STRTO, N , ref, ___) \ 139 _(STRTO, N , ref, ___) \
133 \ 140 \
134 /* Calls. */ \ 141 /* Calls. */ \
135 _(CALLN, N , ref, lit) \ 142 _(CALLN, N , ref, lit) \
143 _(CALLA, A , ref, lit) \
136 _(CALLL, L , ref, lit) \ 144 _(CALLL, L , ref, lit) \
137 _(CALLS, S , ref, lit) \ 145 _(CALLS, S , ref, lit) \
138 _(CALLXS, S , ref, ref) \ 146 _(CALLXS, S , ref, ref) \
@@ -186,6 +194,8 @@ IRFPMDEF(FPMENUM)
186 _(STR_LEN, offsetof(GCstr, len)) \ 194 _(STR_LEN, offsetof(GCstr, len)) \
187 _(FUNC_ENV, offsetof(GCfunc, l.env)) \ 195 _(FUNC_ENV, offsetof(GCfunc, l.env)) \
188 _(FUNC_PC, offsetof(GCfunc, l.pc)) \ 196 _(FUNC_PC, offsetof(GCfunc, l.pc)) \
197 _(FUNC_FFID, offsetof(GCfunc, l.ffid)) \
198 _(THREAD_ENV, offsetof(lua_State, env)) \
189 _(TAB_META, offsetof(GCtab, metatable)) \ 199 _(TAB_META, offsetof(GCtab, metatable)) \
190 _(TAB_ARRAY, offsetof(GCtab, array)) \ 200 _(TAB_ARRAY, offsetof(GCtab, array)) \
191 _(TAB_NODE, offsetof(GCtab, node)) \ 201 _(TAB_NODE, offsetof(GCtab, node)) \
@@ -221,13 +231,16 @@ IRFLDEF(FLENUM)
221#define IRXLOAD_VOLATILE 2 /* Load from volatile data. */ 231#define IRXLOAD_VOLATILE 2 /* Load from volatile data. */
222#define IRXLOAD_UNALIGNED 4 /* Unaligned load. */ 232#define IRXLOAD_UNALIGNED 4 /* Unaligned load. */
223 233
234/* BUFHDR mode, stored in op2. */
235#define IRBUFHDR_RESET 0 /* Reset buffer. */
236#define IRBUFHDR_APPEND 1 /* Append to buffer. */
237
224/* CONV mode, stored in op2. */ 238/* CONV mode, stored in op2. */
225#define IRCONV_SRCMASK 0x001f /* Source IRType. */ 239#define IRCONV_SRCMASK 0x001f /* Source IRType. */
226#define IRCONV_DSTMASK 0x03e0 /* Dest. IRType (also in ir->t). */ 240#define IRCONV_DSTMASK 0x03e0 /* Dest. IRType (also in ir->t). */
227#define IRCONV_DSH 5 241#define IRCONV_DSH 5
228#define IRCONV_NUM_INT ((IRT_NUM<<IRCONV_DSH)|IRT_INT) 242#define IRCONV_NUM_INT ((IRT_NUM<<IRCONV_DSH)|IRT_INT)
229#define IRCONV_INT_NUM ((IRT_INT<<IRCONV_DSH)|IRT_NUM) 243#define IRCONV_INT_NUM ((IRT_INT<<IRCONV_DSH)|IRT_NUM)
230#define IRCONV_TRUNC 0x0400 /* Truncate number to integer. */
231#define IRCONV_SEXT 0x0800 /* Sign-extend integer to integer. */ 244#define IRCONV_SEXT 0x0800 /* Sign-extend integer to integer. */
232#define IRCONV_MODEMASK 0x0fff 245#define IRCONV_MODEMASK 0x0fff
233#define IRCONV_CONVMASK 0xf000 246#define IRCONV_CONVMASK 0xf000
@@ -238,6 +251,11 @@ IRFLDEF(FLENUM)
238#define IRCONV_INDEX (2<<IRCONV_CSH) /* Check + special backprop rules. */ 251#define IRCONV_INDEX (2<<IRCONV_CSH) /* Check + special backprop rules. */
239#define IRCONV_CHECK (3<<IRCONV_CSH) /* Number checked for integerness. */ 252#define IRCONV_CHECK (3<<IRCONV_CSH) /* Number checked for integerness. */
240 253
254/* TOSTR mode, stored in op2. */
255#define IRTOSTR_INT 0 /* Convert integer to string. */
256#define IRTOSTR_NUM 1 /* Convert number to string. */
257#define IRTOSTR_CHAR 2 /* Convert char value to string. */
258
241/* -- IR operands --------------------------------------------------------- */ 259/* -- IR operands --------------------------------------------------------- */
242 260
243/* IR operand mode (2 bit). */ 261/* IR operand mode (2 bit). */
@@ -302,6 +320,7 @@ IRTDEF(IRTENUM)
302 IRT_PTR = LJ_64 ? IRT_P64 : IRT_P32, 320 IRT_PTR = LJ_64 ? IRT_P64 : IRT_P32,
303 IRT_INTP = LJ_64 ? IRT_I64 : IRT_INT, 321 IRT_INTP = LJ_64 ? IRT_I64 : IRT_INT,
304 IRT_UINTP = LJ_64 ? IRT_U64 : IRT_U32, 322 IRT_UINTP = LJ_64 ? IRT_U64 : IRT_U32,
323 /* TODO_GC64: major changes required for all uses of IRT_P32. */
305 324
306 /* Additional flags. */ 325 /* Additional flags. */
307 IRT_MARK = 0x20, /* Marker for misc. purposes. */ 326 IRT_MARK = 0x20, /* Marker for misc. purposes. */
@@ -353,7 +372,12 @@ typedef struct IRType1 { uint8_t irt; } IRType1;
353#define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA)) 372#define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA))
354#define irt_isint64(t) (irt_typerange((t), IRT_I64, IRT_U64)) 373#define irt_isint64(t) (irt_typerange((t), IRT_I64, IRT_U64))
355 374
356#if LJ_64 375#if LJ_GC64
376#define IRT_IS64 \
377 ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|\
378 (1u<<IRT_LIGHTUD)|(1u<<IRT_STR)|(1u<<IRT_THREAD)|(1u<<IRT_PROTO)|\
379 (1u<<IRT_FUNC)|(1u<<IRT_CDATA)|(1u<<IRT_TAB)|(1u<<IRT_UDATA))
380#elif LJ_64
357#define IRT_IS64 \ 381#define IRT_IS64 \
358 ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|(1u<<IRT_LIGHTUD)) 382 ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|(1u<<IRT_LIGHTUD))
359#else 383#else
@@ -374,7 +398,7 @@ static LJ_AINLINE IRType itype2irt(const TValue *tv)
374 return IRT_INT; 398 return IRT_INT;
375 else if (tvisnum(tv)) 399 else if (tvisnum(tv))
376 return IRT_NUM; 400 return IRT_NUM;
377#if LJ_64 401#if LJ_64 && !LJ_GC64
378 else if (tvislightud(tv)) 402 else if (tvislightud(tv))
379 return IRT_LIGHTUD; 403 return IRT_LIGHTUD;
380#endif 404#endif
@@ -464,6 +488,7 @@ typedef uint32_t TRef;
464#define tref_isnil(tr) (tref_istype((tr), IRT_NIL)) 488#define tref_isnil(tr) (tref_istype((tr), IRT_NIL))
465#define tref_isfalse(tr) (tref_istype((tr), IRT_FALSE)) 489#define tref_isfalse(tr) (tref_istype((tr), IRT_FALSE))
466#define tref_istrue(tr) (tref_istype((tr), IRT_TRUE)) 490#define tref_istrue(tr) (tref_istype((tr), IRT_TRUE))
491#define tref_islightud(tr) (tref_istype((tr), IRT_LIGHTUD))
467#define tref_isstr(tr) (tref_istype((tr), IRT_STR)) 492#define tref_isstr(tr) (tref_istype((tr), IRT_STR))
468#define tref_isfunc(tr) (tref_istype((tr), IRT_FUNC)) 493#define tref_isfunc(tr) (tref_istype((tr), IRT_FUNC))
469#define tref_iscdata(tr) (tref_istype((tr), IRT_CDATA)) 494#define tref_iscdata(tr) (tref_istype((tr), IRT_CDATA))
@@ -528,6 +553,7 @@ typedef union IRIns {
528 MRef ptr; /* Pointer constant (overlaps op12). */ 553 MRef ptr; /* Pointer constant (overlaps op12). */
529} IRIns; 554} IRIns;
530 555
556/* TODO_GC64: major changes required. */
531#define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)->gcr)) 557#define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)->gcr))
532#define ir_kstr(ir) (gco2str(ir_kgc((ir)))) 558#define ir_kstr(ir) (gco2str(ir_kgc((ir))))
533#define ir_ktab(ir) (gco2tab(ir_kgc((ir)))) 559#define ir_ktab(ir) (gco2tab(ir_kgc((ir))))
diff --git a/src/lj_ircall.h b/src/lj_ircall.h
index 893dac2f..84e41ecf 100644
--- a/src/lj_ircall.h
+++ b/src/lj_ircall.h
@@ -16,7 +16,7 @@ typedef struct CCallInfo {
16 uint32_t flags; /* Number of arguments and flags. */ 16 uint32_t flags; /* Number of arguments and flags. */
17} CCallInfo; 17} CCallInfo;
18 18
19#define CCI_NARGS(ci) ((ci)->flags & 0xff) /* Extract # of args. */ 19#define CCI_NARGS(ci) ((ci)->flags & 0xff) /* # of args. */
20#define CCI_NARGS_MAX 32 /* Max. # of args. */ 20#define CCI_NARGS_MAX 32 /* Max. # of args. */
21 21
22#define CCI_OTSHIFT 16 22#define CCI_OTSHIFT 16
@@ -25,6 +25,7 @@ typedef struct CCallInfo {
25#define CCI_OP(ci) ((ci)->flags >> CCI_OPSHIFT) /* Get op. */ 25#define CCI_OP(ci) ((ci)->flags >> CCI_OPSHIFT) /* Get op. */
26 26
27#define CCI_CALL_N (IR_CALLN << CCI_OPSHIFT) 27#define CCI_CALL_N (IR_CALLN << CCI_OPSHIFT)
28#define CCI_CALL_A (IR_CALLA << CCI_OPSHIFT)
28#define CCI_CALL_L (IR_CALLL << CCI_OPSHIFT) 29#define CCI_CALL_L (IR_CALLL << CCI_OPSHIFT)
29#define CCI_CALL_S (IR_CALLS << CCI_OPSHIFT) 30#define CCI_CALL_S (IR_CALLS << CCI_OPSHIFT)
30#define CCI_CALL_FN (CCI_CALL_N|CCI_CC_FASTCALL) 31#define CCI_CALL_FN (CCI_CALL_N|CCI_CC_FASTCALL)
@@ -45,6 +46,17 @@ typedef struct CCallInfo {
45#define CCI_CC_FASTCALL 0x2000 /* Fastcall calling convention. */ 46#define CCI_CC_FASTCALL 0x2000 /* Fastcall calling convention. */
46#define CCI_CC_STDCALL 0x3000 /* Stdcall calling convention. */ 47#define CCI_CC_STDCALL 0x3000 /* Stdcall calling convention. */
47 48
49/* Extra args for SOFTFP, SPLIT 64 bit. */
50#define CCI_XARGS_SHIFT 14
51#define CCI_XARGS(ci) (((ci)->flags >> CCI_XARGS_SHIFT) & 3)
52#define CCI_XA (1u << CCI_XARGS_SHIFT)
53
54#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
55#define CCI_XNARGS(ci) (CCI_NARGS((ci)) + CCI_XARGS((ci)))
56#else
57#define CCI_XNARGS(ci) CCI_NARGS((ci))
58#endif
59
48/* Helpers for conditional function definitions. */ 60/* Helpers for conditional function definitions. */
49#define IRCALLCOND_ANY(x) x 61#define IRCALLCOND_ANY(x) x
50 62
@@ -93,26 +105,52 @@ typedef struct CCallInfo {
93#endif 105#endif
94 106
95#if LJ_SOFTFP 107#if LJ_SOFTFP
96#define ARG1_FP 2 /* Treat as 2 32 bit arguments. */ 108#define XA_FP CCI_XA
109#define XA2_FP (CCI_XA+CCI_XA)
97#else 110#else
98#define ARG1_FP 1 111#define XA_FP 0
112#define XA2_FP 0
99#endif 113#endif
100 114
101#if LJ_32 115#if LJ_32
102#define ARG2_64 4 /* Treat as 4 32 bit arguments. */ 116#define XA_64 CCI_XA
117#define XA2_64 (CCI_XA+CCI_XA)
103#else 118#else
104#define ARG2_64 2 119#define XA_64 0
120#define XA2_64 0
105#endif 121#endif
106 122
107/* Function definitions for CALL* instructions. */ 123/* Function definitions for CALL* instructions. */
108#define IRCALLDEF(_) \ 124#define IRCALLDEF(_) \
109 _(ANY, lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \ 125 _(ANY, lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \
126 _(ANY, lj_str_find, 4, N, P32, 0) \
110 _(ANY, lj_str_new, 3, S, STR, CCI_L) \ 127 _(ANY, lj_str_new, 3, S, STR, CCI_L) \
111 _(ANY, lj_strscan_num, 2, FN, INT, 0) \ 128 _(ANY, lj_strscan_num, 2, FN, INT, 0) \
112 _(ANY, lj_str_fromint, 2, FN, STR, CCI_L) \ 129 _(ANY, lj_strfmt_int, 2, FN, STR, CCI_L) \
113 _(ANY, lj_str_fromnum, 2, FN, STR, CCI_L) \ 130 _(ANY, lj_strfmt_num, 2, FN, STR, CCI_L) \
131 _(ANY, lj_strfmt_char, 2, FN, STR, CCI_L) \
132 _(ANY, lj_strfmt_putint, 2, FL, P32, 0) \
133 _(ANY, lj_strfmt_putnum, 2, FL, P32, 0) \
134 _(ANY, lj_strfmt_putquoted, 2, FL, P32, 0) \
135 _(ANY, lj_strfmt_putfxint, 3, L, P32, XA_64) \
136 _(ANY, lj_strfmt_putfnum_int, 3, L, P32, XA_FP) \
137 _(ANY, lj_strfmt_putfnum_uint, 3, L, P32, XA_FP) \
138 _(ANY, lj_strfmt_putfnum, 3, L, P32, XA_FP) \
139 _(ANY, lj_strfmt_putfstr, 3, L, P32, 0) \
140 _(ANY, lj_strfmt_putfchar, 3, L, P32, 0) \
141 _(ANY, lj_buf_putmem, 3, S, P32, 0) \
142 _(ANY, lj_buf_putstr, 2, FL, P32, 0) \
143 _(ANY, lj_buf_putchar, 2, FL, P32, 0) \
144 _(ANY, lj_buf_putstr_reverse, 2, FL, P32, 0) \
145 _(ANY, lj_buf_putstr_lower, 2, FL, P32, 0) \
146 _(ANY, lj_buf_putstr_upper, 2, FL, P32, 0) \
147 _(ANY, lj_buf_putstr_rep, 3, L, P32, 0) \
148 _(ANY, lj_buf_puttab, 5, L, P32, 0) \
149 _(ANY, lj_buf_tostr, 1, FL, STR, 0) \
150 _(ANY, lj_tab_new_ah, 3, A, TAB, CCI_L) \
114 _(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \ 151 _(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \
115 _(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \ 152 _(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \
153 _(ANY, lj_tab_clear, 1, FS, NIL, 0) \
116 _(ANY, lj_tab_newkey, 3, S, P32, CCI_L) \ 154 _(ANY, lj_tab_newkey, 3, S, P32, CCI_L) \
117 _(ANY, lj_tab_len, 1, FL, INT, 0) \ 155 _(ANY, lj_tab_len, 1, FL, INT, 0) \
118 _(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \ 156 _(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \
@@ -120,29 +158,29 @@ typedef struct CCallInfo {
120 _(ANY, lj_mem_newgco, 2, FS, P32, CCI_L) \ 158 _(ANY, lj_mem_newgco, 2, FS, P32, CCI_L) \
121 _(ANY, lj_math_random_step, 1, FS, NUM, CCI_CASTU64|CCI_RANDFPR)\ 159 _(ANY, lj_math_random_step, 1, FS, NUM, CCI_CASTU64|CCI_RANDFPR)\
122 _(ANY, lj_vm_modi, 2, FN, INT, 0) \ 160 _(ANY, lj_vm_modi, 2, FN, INT, 0) \
123 _(ANY, sinh, ARG1_FP, N, NUM, 0) \ 161 _(ANY, sinh, 1, N, NUM, XA_FP) \
124 _(ANY, cosh, ARG1_FP, N, NUM, 0) \ 162 _(ANY, cosh, 1, N, NUM, XA_FP) \
125 _(ANY, tanh, ARG1_FP, N, NUM, 0) \ 163 _(ANY, tanh, 1, N, NUM, XA_FP) \
126 _(ANY, fputc, 2, S, INT, 0) \ 164 _(ANY, fputc, 2, S, INT, 0) \
127 _(ANY, fwrite, 4, S, INT, 0) \ 165 _(ANY, fwrite, 4, S, INT, 0) \
128 _(ANY, fflush, 1, S, INT, 0) \ 166 _(ANY, fflush, 1, S, INT, 0) \
129 /* ORDER FPM */ \ 167 /* ORDER FPM */ \
130 _(FPMATH, lj_vm_floor, ARG1_FP, N, NUM, 0) \ 168 _(FPMATH, lj_vm_floor, 1, N, NUM, XA_FP) \
131 _(FPMATH, lj_vm_ceil, ARG1_FP, N, NUM, 0) \ 169 _(FPMATH, lj_vm_ceil, 1, N, NUM, XA_FP) \
132 _(FPMATH, lj_vm_trunc, ARG1_FP, N, NUM, 0) \ 170 _(FPMATH, lj_vm_trunc, 1, N, NUM, XA_FP) \
133 _(FPMATH, sqrt, ARG1_FP, N, NUM, 0) \ 171 _(FPMATH, sqrt, 1, N, NUM, XA_FP) \
134 _(FPMATH, exp, ARG1_FP, N, NUM, 0) \ 172 _(ANY, exp, 1, N, NUM, XA_FP) \
135 _(FPMATH, lj_vm_exp2, ARG1_FP, N, NUM, 0) \ 173 _(ANY, lj_vm_exp2, 1, N, NUM, XA_FP) \
136 _(FPMATH, log, ARG1_FP, N, NUM, 0) \ 174 _(ANY, log, 1, N, NUM, XA_FP) \
137 _(FPMATH, lj_vm_log2, ARG1_FP, N, NUM, 0) \ 175 _(ANY, lj_vm_log2, 1, N, NUM, XA_FP) \
138 _(FPMATH, log10, ARG1_FP, N, NUM, 0) \ 176 _(ANY, log10, 1, N, NUM, XA_FP) \
139 _(FPMATH, sin, ARG1_FP, N, NUM, 0) \ 177 _(ANY, sin, 1, N, NUM, XA_FP) \
140 _(FPMATH, cos, ARG1_FP, N, NUM, 0) \ 178 _(ANY, cos, 1, N, NUM, XA_FP) \
141 _(FPMATH, tan, ARG1_FP, N, NUM, 0) \ 179 _(ANY, tan, 1, N, NUM, XA_FP) \
142 _(FPMATH, lj_vm_powi, ARG1_FP+1, N, NUM, 0) \ 180 _(ANY, lj_vm_powi, 2, N, NUM, XA_FP) \
143 _(FPMATH, pow, ARG1_FP*2, N, NUM, 0) \ 181 _(ANY, pow, 2, N, NUM, XA2_FP) \
144 _(FPMATH, atan2, ARG1_FP*2, N, NUM, 0) \ 182 _(ANY, atan2, 2, N, NUM, XA2_FP) \
145 _(FPMATH, ldexp, ARG1_FP+1, N, NUM, 0) \ 183 _(ANY, ldexp, 2, N, NUM, XA_FP) \
146 _(SOFTFP, lj_vm_tobit, 2, N, INT, 0) \ 184 _(SOFTFP, lj_vm_tobit, 2, N, INT, 0) \
147 _(SOFTFP, softfp_add, 4, N, NUM, 0) \ 185 _(SOFTFP, softfp_add, 4, N, NUM, 0) \
148 _(SOFTFP, softfp_sub, 4, N, NUM, 0) \ 186 _(SOFTFP, softfp_sub, 4, N, NUM, 0) \
@@ -159,26 +197,32 @@ typedef struct CCallInfo {
159 _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \ 197 _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \
160 _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \ 198 _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \
161 _(SOFTFP_FFI, softfp_f2ui, 1, N, INT, 0) \ 199 _(SOFTFP_FFI, softfp_f2ui, 1, N, INT, 0) \
162 _(FP64_FFI, fp64_l2d, 2, N, NUM, 0) \ 200 _(FP64_FFI, fp64_l2d, 1, N, NUM, XA_64) \
163 _(FP64_FFI, fp64_ul2d, 2, N, NUM, 0) \ 201 _(FP64_FFI, fp64_ul2d, 1, N, NUM, XA_64) \
164 _(FP64_FFI, fp64_l2f, 2, N, FLOAT, 0) \ 202 _(FP64_FFI, fp64_l2f, 1, N, FLOAT, XA_64) \
165 _(FP64_FFI, fp64_ul2f, 2, N, FLOAT, 0) \ 203 _(FP64_FFI, fp64_ul2f, 1, N, FLOAT, XA_64) \
166 _(FP64_FFI, fp64_d2l, ARG1_FP, N, I64, 0) \ 204 _(FP64_FFI, fp64_d2l, 1, N, I64, XA_FP) \
167 _(FP64_FFI, fp64_d2ul, ARG1_FP, N, U64, 0) \ 205 _(FP64_FFI, fp64_d2ul, 1, N, U64, XA_FP) \
168 _(FP64_FFI, fp64_f2l, 1, N, I64, 0) \ 206 _(FP64_FFI, fp64_f2l, 1, N, I64, 0) \
169 _(FP64_FFI, fp64_f2ul, 1, N, U64, 0) \ 207 _(FP64_FFI, fp64_f2ul, 1, N, U64, 0) \
170 _(FFI, lj_carith_divi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \ 208 _(FFI, lj_carith_divi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
171 _(FFI, lj_carith_divu64, ARG2_64, N, U64, CCI_NOFPRCLOBBER) \ 209 _(FFI, lj_carith_divu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \
172 _(FFI, lj_carith_modi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \ 210 _(FFI, lj_carith_modi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
173 _(FFI, lj_carith_modu64, ARG2_64, N, U64, CCI_NOFPRCLOBBER) \ 211 _(FFI, lj_carith_modu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \
174 _(FFI, lj_carith_powi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \ 212 _(FFI, lj_carith_powi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
175 _(FFI, lj_carith_powu64, ARG2_64, N, U64, CCI_NOFPRCLOBBER) \ 213 _(FFI, lj_carith_powu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \
176 _(FFI, lj_cdata_setfin, 2, FN, P32, CCI_L) \ 214 _(FFI, lj_cdata_newv, 4, S, CDATA, CCI_L) \
177 _(FFI, strlen, 1, L, INTP, 0) \ 215 _(FFI, lj_cdata_setfin, 4, S, NIL, CCI_L) \
178 _(FFI, memcpy, 3, S, PTR, 0) \ 216 _(FFI, strlen, 1, L, INTP, 0) \
179 _(FFI, memset, 3, S, PTR, 0) \ 217 _(FFI, memcpy, 3, S, PTR, 0) \
180 _(FFI, lj_vm_errno, 0, S, INT, CCI_NOFPRCLOBBER) \ 218 _(FFI, memset, 3, S, PTR, 0) \
181 _(FFI32, lj_carith_mul64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) 219 _(FFI, lj_vm_errno, 0, S, INT, CCI_NOFPRCLOBBER) \
220 _(FFI32, lj_carith_mul64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
221 _(FFI32, lj_carith_shl64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
222 _(FFI32, lj_carith_shr64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
223 _(FFI32, lj_carith_sar64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
224 _(FFI32, lj_carith_rol64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
225 _(FFI32, lj_carith_ror64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
182 \ 226 \
183 /* End of list. */ 227 /* End of list. */
184 228
diff --git a/src/lj_jit.h b/src/lj_jit.h
index eb765477..4b51baeb 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -14,18 +14,15 @@
14 14
15/* CPU-specific JIT engine flags. */ 15/* CPU-specific JIT engine flags. */
16#if LJ_TARGET_X86ORX64 16#if LJ_TARGET_X86ORX64
17#define JIT_F_CMOV 0x00000010 17#define JIT_F_SSE2 0x00000010
18#define JIT_F_SSE2 0x00000020 18#define JIT_F_SSE3 0x00000020
19#define JIT_F_SSE3 0x00000040 19#define JIT_F_SSE4_1 0x00000040
20#define JIT_F_SSE4_1 0x00000080 20#define JIT_F_PREFER_IMUL 0x00000080
21#define JIT_F_P4 0x00000100 21#define JIT_F_LEA_AGU 0x00000100
22#define JIT_F_PREFER_IMUL 0x00000200
23#define JIT_F_SPLIT_XMM 0x00000400
24#define JIT_F_LEA_AGU 0x00000800
25 22
26/* Names for the CPU-specific flags. Must match the order above. */ 23/* Names for the CPU-specific flags. Must match the order above. */
27#define JIT_F_CPU_FIRST JIT_F_CMOV 24#define JIT_F_CPU_FIRST JIT_F_SSE2
28#define JIT_F_CPUSTRING "\4CMOV\4SSE2\4SSE3\6SSE4.1\2P4\3AMD\2K8\4ATOM" 25#define JIT_F_CPUSTRING "\4SSE2\4SSE3\6SSE4.1\3AMD\4ATOM"
29#elif LJ_TARGET_ARM 26#elif LJ_TARGET_ARM
30#define JIT_F_ARMV6_ 0x00000010 27#define JIT_F_ARMV6_ 0x00000010
31#define JIT_F_ARMV6T2_ 0x00000020 28#define JIT_F_ARMV6T2_ 0x00000020
@@ -100,6 +97,7 @@
100 _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \ 97 _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \
101 _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \ 98 _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \
102 _(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \ 99 _(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \
100 _(\011, minstitch, 0) /* Min. # of IR ins for a stitched trace. */ \
103 \ 101 \
104 _(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \ 102 _(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \
105 _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \ 103 _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \
@@ -205,7 +203,8 @@ typedef enum {
205 LJ_TRLINK_UPREC, /* Up-recursion. */ 203 LJ_TRLINK_UPREC, /* Up-recursion. */
206 LJ_TRLINK_DOWNREC, /* Down-recursion. */ 204 LJ_TRLINK_DOWNREC, /* Down-recursion. */
207 LJ_TRLINK_INTERP, /* Fallback to interpreter. */ 205 LJ_TRLINK_INTERP, /* Fallback to interpreter. */
208 LJ_TRLINK_RETURN /* Return to interpreter. */ 206 LJ_TRLINK_RETURN, /* Return to interpreter. */
207 LJ_TRLINK_STITCH /* Trace stitching. */
209} TraceLink; 208} TraceLink;
210 209
211/* Trace object. */ 210/* Trace object. */
@@ -214,6 +213,9 @@ typedef struct GCtrace {
214 uint8_t topslot; /* Top stack slot already checked to be allocated. */ 213 uint8_t topslot; /* Top stack slot already checked to be allocated. */
215 uint8_t linktype; /* Type of link. */ 214 uint8_t linktype; /* Type of link. */
216 IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */ 215 IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */
216#if LJ_GC64
217 uint32_t unused_gc64;
218#endif
217 GCRef gclist; 219 GCRef gclist;
218 IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */ 220 IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */
219 IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */ 221 IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */
@@ -400,6 +402,12 @@ typedef struct jit_State {
400 size_t szallmcarea; /* Total size of all allocated mcode areas. */ 402 size_t szallmcarea; /* Total size of all allocated mcode areas. */
401 403
402 TValue errinfo; /* Additional info element for trace errors. */ 404 TValue errinfo; /* Additional info element for trace errors. */
405
406#if LJ_HASPROFILE
407 GCproto *prev_pt; /* Previous prototype. */
408 BCLine prev_line; /* Previous line. */
409 int prof_mode; /* Profiling mode: 0, 'f', 'l'. */
410#endif
403} 411}
404#if LJ_TARGET_ARM 412#if LJ_TARGET_ARM
405LJ_ALIGN(16) /* For DISPATCH-relative addresses in assembler part. */ 413LJ_ALIGN(16) /* For DISPATCH-relative addresses in assembler part. */
diff --git a/src/lj_lex.c b/src/lj_lex.c
index e1dc3cdf..8409cd78 100644
--- a/src/lj_lex.c
+++ b/src/lj_lex.c
@@ -12,6 +12,7 @@
12#include "lj_obj.h" 12#include "lj_obj.h"
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_buf.h"
15#include "lj_str.h" 16#include "lj_str.h"
16#if LJ_HASFFI 17#if LJ_HASFFI
17#include "lj_tab.h" 18#include "lj_tab.h"
@@ -24,6 +25,7 @@
24#include "lj_parse.h" 25#include "lj_parse.h"
25#include "lj_char.h" 26#include "lj_char.h"
26#include "lj_strscan.h" 27#include "lj_strscan.h"
28#include "lj_strfmt.h"
27 29
28/* Lua lexer token names. */ 30/* Lua lexer token names. */
29static const char *const tokennames[] = { 31static const char *const tokennames[] = {
@@ -37,50 +39,48 @@ TKDEF(TKSTR1, TKSTR2)
37 39
38/* -- Buffer handling ----------------------------------------------------- */ 40/* -- Buffer handling ----------------------------------------------------- */
39 41
40#define char2int(c) ((int)(uint8_t)(c)) 42#define LEX_EOF (-1)
41#define next(ls) \ 43#define lex_iseol(ls) (ls->c == '\n' || ls->c == '\r')
42 (ls->current = (ls->n--) > 0 ? char2int(*ls->p++) : fillbuf(ls))
43#define save_and_next(ls) (save(ls, ls->current), next(ls))
44#define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r')
45#define END_OF_STREAM (-1)
46 44
47static int fillbuf(LexState *ls) 45/* Get more input from reader. */
46static LJ_NOINLINE LexChar lex_more(LexState *ls)
48{ 47{
49 size_t sz; 48 size_t sz;
50 const char *buf = ls->rfunc(ls->L, ls->rdata, &sz); 49 const char *p = ls->rfunc(ls->L, ls->rdata, &sz);
51 if (buf == NULL || sz == 0) return END_OF_STREAM; 50 if (p == NULL || sz == 0) return LEX_EOF;
52 ls->n = (MSize)sz - 1; 51 ls->pe = p + sz;
53 ls->p = buf; 52 ls->p = p + 1;
54 return char2int(*(ls->p++)); 53 return (LexChar)(uint8_t)p[0];
55} 54}
56 55
57static LJ_NOINLINE void save_grow(LexState *ls, int c) 56/* Get next character. */
57static LJ_AINLINE LexChar lex_next(LexState *ls)
58{ 58{
59 MSize newsize; 59 return (ls->c = ls->p < ls->pe ? (LexChar)(uint8_t)*ls->p++ : lex_more(ls));
60 if (ls->sb.sz >= LJ_MAX_STR/2)
61 lj_lex_error(ls, 0, LJ_ERR_XELEM);
62 newsize = ls->sb.sz * 2;
63 lj_str_resizebuf(ls->L, &ls->sb, newsize);
64 ls->sb.buf[ls->sb.n++] = (char)c;
65} 60}
66 61
67static LJ_AINLINE void save(LexState *ls, int c) 62/* Save character. */
63static LJ_AINLINE void lex_save(LexState *ls, LexChar c)
68{ 64{
69 if (LJ_UNLIKELY(ls->sb.n + 1 > ls->sb.sz)) 65 lj_buf_putb(&ls->sb, c);
70 save_grow(ls, c); 66}
71 else 67
72 ls->sb.buf[ls->sb.n++] = (char)c; 68/* Save previous character and get next character. */
69static LJ_AINLINE LexChar lex_savenext(LexState *ls)
70{
71 lex_save(ls, ls->c);
72 return lex_next(ls);
73} 73}
74 74
75static void inclinenumber(LexState *ls) 75/* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */
76static void lex_newline(LexState *ls)
76{ 77{
77 int old = ls->current; 78 LexChar old = ls->c;
78 lua_assert(currIsNewline(ls)); 79 lua_assert(lex_iseol(ls));
79 next(ls); /* skip `\n' or `\r' */ 80 lex_next(ls); /* Skip "\n" or "\r". */
80 if (currIsNewline(ls) && ls->current != old) 81 if (lex_iseol(ls) && ls->c != old) lex_next(ls); /* Skip "\n\r" or "\r\n". */
81 next(ls); /* skip `\n\r' or `\r\n' */
82 if (++ls->linenumber >= LJ_MAX_LINE) 82 if (++ls->linenumber >= LJ_MAX_LINE)
83 lj_lex_error(ls, ls->token, LJ_ERR_XLINES); 83 lj_lex_error(ls, ls->tok, LJ_ERR_XLINES);
84} 84}
85 85
86/* -- Scanner for terminals ----------------------------------------------- */ 86/* -- Scanner for terminals ----------------------------------------------- */
@@ -89,19 +89,17 @@ static void inclinenumber(LexState *ls)
89static void lex_number(LexState *ls, TValue *tv) 89static void lex_number(LexState *ls, TValue *tv)
90{ 90{
91 StrScanFmt fmt; 91 StrScanFmt fmt;
92 int c, xp = 'e'; 92 LexChar c, xp = 'e';
93 lua_assert(lj_char_isdigit(ls->current)); 93 lua_assert(lj_char_isdigit(ls->c));
94 if ((c = ls->current) == '0') { 94 if ((c = ls->c) == '0' && (lex_savenext(ls) | 0x20) == 'x')
95 save_and_next(ls); 95 xp = 'p';
96 if ((ls->current | 0x20) == 'x') xp = 'p'; 96 while (lj_char_isident(ls->c) || ls->c == '.' ||
97 } 97 ((ls->c == '-' || ls->c == '+') && (c | 0x20) == xp)) {
98 while (lj_char_isident(ls->current) || ls->current == '.' || 98 c = ls->c;
99 ((ls->current == '-' || ls->current == '+') && (c | 0x20) == xp)) { 99 lex_savenext(ls);
100 c = ls->current;
101 save_and_next(ls);
102 } 100 }
103 save(ls, '\0'); 101 lex_save(ls, '\0');
104 fmt = lj_strscan_scan((const uint8_t *)ls->sb.buf, tv, 102 fmt = lj_strscan_scan((const uint8_t *)sbufB(&ls->sb), tv,
105 (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) | 103 (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) |
106 (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0)); 104 (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0));
107 if (LJ_DUALNUM && fmt == STRSCAN_INT) { 105 if (LJ_DUALNUM && fmt == STRSCAN_INT) {
@@ -134,60 +132,60 @@ static void lex_number(LexState *ls, TValue *tv)
134 } 132 }
135} 133}
136 134
137static int skip_sep(LexState *ls) 135/* Skip equal signs for "[=...=[" and "]=...=]" and return their count. */
136static int lex_skipeq(LexState *ls)
138{ 137{
139 int count = 0; 138 int count = 0;
140 int s = ls->current; 139 LexChar s = ls->c;
141 lua_assert(s == '[' || s == ']'); 140 lua_assert(s == '[' || s == ']');
142 save_and_next(ls); 141 while (lex_savenext(ls) == '=')
143 while (ls->current == '=') {
144 save_and_next(ls);
145 count++; 142 count++;
146 } 143 return (ls->c == s) ? count : (-count) - 1;
147 return (ls->current == s) ? count : (-count) - 1;
148} 144}
149 145
150static void read_long_string(LexState *ls, TValue *tv, int sep) 146/* Parse a long string or long comment (tv set to NULL). */
147static void lex_longstring(LexState *ls, TValue *tv, int sep)
151{ 148{
152 save_and_next(ls); /* skip 2nd `[' */ 149 lex_savenext(ls); /* Skip second '['. */
153 if (currIsNewline(ls)) /* string starts with a newline? */ 150 if (lex_iseol(ls)) /* Skip initial newline. */
154 inclinenumber(ls); /* skip it */ 151 lex_newline(ls);
155 for (;;) { 152 for (;;) {
156 switch (ls->current) { 153 switch (ls->c) {
157 case END_OF_STREAM: 154 case LEX_EOF:
158 lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM); 155 lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM);
159 break; 156 break;
160 case ']': 157 case ']':
161 if (skip_sep(ls) == sep) { 158 if (lex_skipeq(ls) == sep) {
162 save_and_next(ls); /* skip 2nd `]' */ 159 lex_savenext(ls); /* Skip second ']'. */
163 goto endloop; 160 goto endloop;
164 } 161 }
165 break; 162 break;
166 case '\n': 163 case '\n':
167 case '\r': 164 case '\r':
168 save(ls, '\n'); 165 lex_save(ls, '\n');
169 inclinenumber(ls); 166 lex_newline(ls);
170 if (!tv) lj_str_resetbuf(&ls->sb); /* avoid wasting space */ 167 if (!tv) lj_buf_reset(&ls->sb); /* Don't waste space for comments. */
171 break; 168 break;
172 default: 169 default:
173 if (tv) save_and_next(ls); 170 lex_savenext(ls);
174 else next(ls);
175 break; 171 break;
176 } 172 }
177 } endloop: 173 } endloop:
178 if (tv) { 174 if (tv) {
179 GCstr *str = lj_parse_keepstr(ls, ls->sb.buf + (2 + (MSize)sep), 175 GCstr *str = lj_parse_keepstr(ls, sbufB(&ls->sb) + (2 + (MSize)sep),
180 ls->sb.n - 2*(2 + (MSize)sep)); 176 sbuflen(&ls->sb) - 2*(2 + (MSize)sep));
181 setstrV(ls->L, tv, str); 177 setstrV(ls->L, tv, str);
182 } 178 }
183} 179}
184 180
185static void read_string(LexState *ls, int delim, TValue *tv) 181/* Parse a string. */
182static void lex_string(LexState *ls, TValue *tv)
186{ 183{
187 save_and_next(ls); 184 LexChar delim = ls->c; /* Delimiter is '\'' or '"'. */
188 while (ls->current != delim) { 185 lex_savenext(ls);
189 switch (ls->current) { 186 while (ls->c != delim) {
190 case END_OF_STREAM: 187 switch (ls->c) {
188 case LEX_EOF:
191 lj_lex_error(ls, TK_eof, LJ_ERR_XSTR); 189 lj_lex_error(ls, TK_eof, LJ_ERR_XSTR);
192 continue; 190 continue;
193 case '\n': 191 case '\n':
@@ -195,7 +193,7 @@ static void read_string(LexState *ls, int delim, TValue *tv)
195 lj_lex_error(ls, TK_string, LJ_ERR_XSTR); 193 lj_lex_error(ls, TK_string, LJ_ERR_XSTR);
196 continue; 194 continue;
197 case '\\': { 195 case '\\': {
198 int c = next(ls); /* Skip the '\\'. */ 196 LexChar c = lex_next(ls); /* Skip the '\\'. */
199 switch (c) { 197 switch (c) {
200 case 'a': c = '\a'; break; 198 case 'a': c = '\a'; break;
201 case 'b': c = '\b'; break; 199 case 'b': c = '\b'; break;
@@ -205,111 +203,112 @@ static void read_string(LexState *ls, int delim, TValue *tv)
205 case 't': c = '\t'; break; 203 case 't': c = '\t'; break;
206 case 'v': c = '\v'; break; 204 case 'v': c = '\v'; break;
207 case 'x': /* Hexadecimal escape '\xXX'. */ 205 case 'x': /* Hexadecimal escape '\xXX'. */
208 c = (next(ls) & 15u) << 4; 206 c = (lex_next(ls) & 15u) << 4;
209 if (!lj_char_isdigit(ls->current)) { 207 if (!lj_char_isdigit(ls->c)) {
210 if (!lj_char_isxdigit(ls->current)) goto err_xesc; 208 if (!lj_char_isxdigit(ls->c)) goto err_xesc;
211 c += 9 << 4; 209 c += 9 << 4;
212 } 210 }
213 c += (next(ls) & 15u); 211 c += (lex_next(ls) & 15u);
214 if (!lj_char_isdigit(ls->current)) { 212 if (!lj_char_isdigit(ls->c)) {
215 if (!lj_char_isxdigit(ls->current)) goto err_xesc; 213 if (!lj_char_isxdigit(ls->c)) goto err_xesc;
216 c += 9; 214 c += 9;
217 } 215 }
218 break; 216 break;
219 case 'z': /* Skip whitespace. */ 217 case 'z': /* Skip whitespace. */
220 next(ls); 218 lex_next(ls);
221 while (lj_char_isspace(ls->current)) 219 while (lj_char_isspace(ls->c))
222 if (currIsNewline(ls)) inclinenumber(ls); else next(ls); 220 if (lex_iseol(ls)) lex_newline(ls); else lex_next(ls);
223 continue; 221 continue;
224 case '\n': case '\r': save(ls, '\n'); inclinenumber(ls); continue; 222 case '\n': case '\r': lex_save(ls, '\n'); lex_newline(ls); continue;
225 case '\\': case '\"': case '\'': break; 223 case '\\': case '\"': case '\'': break;
226 case END_OF_STREAM: continue; 224 case LEX_EOF: continue;
227 default: 225 default:
228 if (!lj_char_isdigit(c)) 226 if (!lj_char_isdigit(c))
229 goto err_xesc; 227 goto err_xesc;
230 c -= '0'; /* Decimal escape '\ddd'. */ 228 c -= '0'; /* Decimal escape '\ddd'. */
231 if (lj_char_isdigit(next(ls))) { 229 if (lj_char_isdigit(lex_next(ls))) {
232 c = c*10 + (ls->current - '0'); 230 c = c*10 + (ls->c - '0');
233 if (lj_char_isdigit(next(ls))) { 231 if (lj_char_isdigit(lex_next(ls))) {
234 c = c*10 + (ls->current - '0'); 232 c = c*10 + (ls->c - '0');
235 if (c > 255) { 233 if (c > 255) {
236 err_xesc: 234 err_xesc:
237 lj_lex_error(ls, TK_string, LJ_ERR_XESC); 235 lj_lex_error(ls, TK_string, LJ_ERR_XESC);
238 } 236 }
239 next(ls); 237 lex_next(ls);
240 } 238 }
241 } 239 }
242 save(ls, c); 240 lex_save(ls, c);
243 continue; 241 continue;
244 } 242 }
245 save(ls, c); 243 lex_save(ls, c);
246 next(ls); 244 lex_next(ls);
247 continue; 245 continue;
248 } 246 }
249 default: 247 default:
250 save_and_next(ls); 248 lex_savenext(ls);
251 break; 249 break;
252 } 250 }
253 } 251 }
254 save_and_next(ls); /* skip delimiter */ 252 lex_savenext(ls); /* Skip trailing delimiter. */
255 setstrV(ls->L, tv, lj_parse_keepstr(ls, ls->sb.buf + 1, ls->sb.n - 2)); 253 setstrV(ls->L, tv,
254 lj_parse_keepstr(ls, sbufB(&ls->sb)+1, sbuflen(&ls->sb)-2));
256} 255}
257 256
258/* -- Main lexical scanner ------------------------------------------------ */ 257/* -- Main lexical scanner ------------------------------------------------ */
259 258
260static int llex(LexState *ls, TValue *tv) 259/* Get next lexical token. */
260static LexToken lex_scan(LexState *ls, TValue *tv)
261{ 261{
262 lj_str_resetbuf(&ls->sb); 262 lj_buf_reset(&ls->sb);
263 for (;;) { 263 for (;;) {
264 if (lj_char_isident(ls->current)) { 264 if (lj_char_isident(ls->c)) {
265 GCstr *s; 265 GCstr *s;
266 if (lj_char_isdigit(ls->current)) { /* Numeric literal. */ 266 if (lj_char_isdigit(ls->c)) { /* Numeric literal. */
267 lex_number(ls, tv); 267 lex_number(ls, tv);
268 return TK_number; 268 return TK_number;
269 } 269 }
270 /* Identifier or reserved word. */ 270 /* Identifier or reserved word. */
271 do { 271 do {
272 save_and_next(ls); 272 lex_savenext(ls);
273 } while (lj_char_isident(ls->current)); 273 } while (lj_char_isident(ls->c));
274 s = lj_parse_keepstr(ls, ls->sb.buf, ls->sb.n); 274 s = lj_parse_keepstr(ls, sbufB(&ls->sb), sbuflen(&ls->sb));
275 setstrV(ls->L, tv, s); 275 setstrV(ls->L, tv, s);
276 if (s->reserved > 0) /* Reserved word? */ 276 if (s->reserved > 0) /* Reserved word? */
277 return TK_OFS + s->reserved; 277 return TK_OFS + s->reserved;
278 return TK_name; 278 return TK_name;
279 } 279 }
280 switch (ls->current) { 280 switch (ls->c) {
281 case '\n': 281 case '\n':
282 case '\r': 282 case '\r':
283 inclinenumber(ls); 283 lex_newline(ls);
284 continue; 284 continue;
285 case ' ': 285 case ' ':
286 case '\t': 286 case '\t':
287 case '\v': 287 case '\v':
288 case '\f': 288 case '\f':
289 next(ls); 289 lex_next(ls);
290 continue; 290 continue;
291 case '-': 291 case '-':
292 next(ls); 292 lex_next(ls);
293 if (ls->current != '-') return '-'; 293 if (ls->c != '-') return '-';
294 /* else is a comment */ 294 lex_next(ls);
295 next(ls); 295 if (ls->c == '[') { /* Long comment "--[=*[...]=*]". */
296 if (ls->current == '[') { 296 int sep = lex_skipeq(ls);
297 int sep = skip_sep(ls); 297 lj_buf_reset(&ls->sb); /* `lex_skipeq' may dirty the buffer */
298 lj_str_resetbuf(&ls->sb); /* `skip_sep' may dirty the buffer */
299 if (sep >= 0) { 298 if (sep >= 0) {
300 read_long_string(ls, NULL, sep); /* long comment */ 299 lex_longstring(ls, NULL, sep);
301 lj_str_resetbuf(&ls->sb); 300 lj_buf_reset(&ls->sb);
302 continue; 301 continue;
303 } 302 }
304 } 303 }
305 /* else short comment */ 304 /* Short comment "--.*\n". */
306 while (!currIsNewline(ls) && ls->current != END_OF_STREAM) 305 while (!lex_iseol(ls) && ls->c != LEX_EOF)
307 next(ls); 306 lex_next(ls);
308 continue; 307 continue;
309 case '[': { 308 case '[': {
310 int sep = skip_sep(ls); 309 int sep = lex_skipeq(ls);
311 if (sep >= 0) { 310 if (sep >= 0) {
312 read_long_string(ls, tv, sep); 311 lex_longstring(ls, tv, sep);
313 return TK_string; 312 return TK_string;
314 } else if (sep == -1) { 313 } else if (sep == -1) {
315 return '['; 314 return '[';
@@ -319,44 +318,43 @@ static int llex(LexState *ls, TValue *tv)
319 } 318 }
320 } 319 }
321 case '=': 320 case '=':
322 next(ls); 321 lex_next(ls);
323 if (ls->current != '=') return '='; else { next(ls); return TK_eq; } 322 if (ls->c != '=') return '='; else { lex_next(ls); return TK_eq; }
324 case '<': 323 case '<':
325 next(ls); 324 lex_next(ls);
326 if (ls->current != '=') return '<'; else { next(ls); return TK_le; } 325 if (ls->c != '=') return '<'; else { lex_next(ls); return TK_le; }
327 case '>': 326 case '>':
328 next(ls); 327 lex_next(ls);
329 if (ls->current != '=') return '>'; else { next(ls); return TK_ge; } 328 if (ls->c != '=') return '>'; else { lex_next(ls); return TK_ge; }
330 case '~': 329 case '~':
331 next(ls); 330 lex_next(ls);
332 if (ls->current != '=') return '~'; else { next(ls); return TK_ne; } 331 if (ls->c != '=') return '~'; else { lex_next(ls); return TK_ne; }
333 case ':': 332 case ':':
334 next(ls); 333 lex_next(ls);
335 if (ls->current != ':') return ':'; else { next(ls); return TK_label; } 334 if (ls->c != ':') return ':'; else { lex_next(ls); return TK_label; }
336 case '"': 335 case '"':
337 case '\'': 336 case '\'':
338 read_string(ls, ls->current, tv); 337 lex_string(ls, tv);
339 return TK_string; 338 return TK_string;
340 case '.': 339 case '.':
341 save_and_next(ls); 340 if (lex_savenext(ls) == '.') {
342 if (ls->current == '.') { 341 lex_next(ls);
343 next(ls); 342 if (ls->c == '.') {
344 if (ls->current == '.') { 343 lex_next(ls);
345 next(ls);
346 return TK_dots; /* ... */ 344 return TK_dots; /* ... */
347 } 345 }
348 return TK_concat; /* .. */ 346 return TK_concat; /* .. */
349 } else if (!lj_char_isdigit(ls->current)) { 347 } else if (!lj_char_isdigit(ls->c)) {
350 return '.'; 348 return '.';
351 } else { 349 } else {
352 lex_number(ls, tv); 350 lex_number(ls, tv);
353 return TK_number; 351 return TK_number;
354 } 352 }
355 case END_OF_STREAM: 353 case LEX_EOF:
356 return TK_eof; 354 return TK_eof;
357 default: { 355 default: {
358 int c = ls->current; 356 LexChar c = ls->c;
359 next(ls); 357 lex_next(ls);
360 return c; /* Single-char tokens (+ - / ...). */ 358 return c; /* Single-char tokens (+ - / ...). */
361 } 359 }
362 } 360 }
@@ -371,35 +369,32 @@ int lj_lex_setup(lua_State *L, LexState *ls)
371 int header = 0; 369 int header = 0;
372 ls->L = L; 370 ls->L = L;
373 ls->fs = NULL; 371 ls->fs = NULL;
374 ls->n = 0; 372 ls->pe = ls->p = NULL;
375 ls->p = NULL;
376 ls->vstack = NULL; 373 ls->vstack = NULL;
377 ls->sizevstack = 0; 374 ls->sizevstack = 0;
378 ls->vtop = 0; 375 ls->vtop = 0;
379 ls->bcstack = NULL; 376 ls->bcstack = NULL;
380 ls->sizebcstack = 0; 377 ls->sizebcstack = 0;
381 ls->token = 0; 378 ls->tok = 0;
382 ls->lookahead = TK_eof; /* No look-ahead token. */ 379 ls->lookahead = TK_eof; /* No look-ahead token. */
383 ls->linenumber = 1; 380 ls->linenumber = 1;
384 ls->lastline = 1; 381 ls->lastline = 1;
385 lj_str_resizebuf(ls->L, &ls->sb, LJ_MIN_SBUF); 382 lex_next(ls); /* Read-ahead first char. */
386 next(ls); /* Read-ahead first char. */ 383 if (ls->c == 0xef && ls->p + 2 <= ls->pe && (uint8_t)ls->p[0] == 0xbb &&
387 if (ls->current == 0xef && ls->n >= 2 && char2int(ls->p[0]) == 0xbb && 384 (uint8_t)ls->p[1] == 0xbf) { /* Skip UTF-8 BOM (if buffered). */
388 char2int(ls->p[1]) == 0xbf) { /* Skip UTF-8 BOM (if buffered). */
389 ls->n -= 2;
390 ls->p += 2; 385 ls->p += 2;
391 next(ls); 386 lex_next(ls);
392 header = 1; 387 header = 1;
393 } 388 }
394 if (ls->current == '#') { /* Skip POSIX #! header line. */ 389 if (ls->c == '#') { /* Skip POSIX #! header line. */
395 do { 390 do {
396 next(ls); 391 lex_next(ls);
397 if (ls->current == END_OF_STREAM) return 0; 392 if (ls->c == LEX_EOF) return 0;
398 } while (!currIsNewline(ls)); 393 } while (!lex_iseol(ls));
399 inclinenumber(ls); 394 lex_newline(ls);
400 header = 1; 395 header = 1;
401 } 396 }
402 if (ls->current == LUA_SIGNATURE[0]) { /* Bytecode dump. */ 397 if (ls->c == LUA_SIGNATURE[0]) { /* Bytecode dump. */
403 if (header) { 398 if (header) {
404 /* 399 /*
405 ** Loading bytecode with an extra header is disabled for security 400 ** Loading bytecode with an extra header is disabled for security
@@ -421,55 +416,60 @@ void lj_lex_cleanup(lua_State *L, LexState *ls)
421 global_State *g = G(L); 416 global_State *g = G(L);
422 lj_mem_freevec(g, ls->bcstack, ls->sizebcstack, BCInsLine); 417 lj_mem_freevec(g, ls->bcstack, ls->sizebcstack, BCInsLine);
423 lj_mem_freevec(g, ls->vstack, ls->sizevstack, VarInfo); 418 lj_mem_freevec(g, ls->vstack, ls->sizevstack, VarInfo);
424 lj_str_freebuf(g, &ls->sb); 419 lj_buf_free(g, &ls->sb);
425} 420}
426 421
422/* Return next lexical token. */
427void lj_lex_next(LexState *ls) 423void lj_lex_next(LexState *ls)
428{ 424{
429 ls->lastline = ls->linenumber; 425 ls->lastline = ls->linenumber;
430 if (LJ_LIKELY(ls->lookahead == TK_eof)) { /* No lookahead token? */ 426 if (LJ_LIKELY(ls->lookahead == TK_eof)) { /* No lookahead token? */
431 ls->token = llex(ls, &ls->tokenval); /* Get next token. */ 427 ls->tok = lex_scan(ls, &ls->tokval); /* Get next token. */
432 } else { /* Otherwise return lookahead token. */ 428 } else { /* Otherwise return lookahead token. */
433 ls->token = ls->lookahead; 429 ls->tok = ls->lookahead;
434 ls->lookahead = TK_eof; 430 ls->lookahead = TK_eof;
435 ls->tokenval = ls->lookaheadval; 431 ls->tokval = ls->lookaheadval;
436 } 432 }
437} 433}
438 434
435/* Look ahead for the next token. */
439LexToken lj_lex_lookahead(LexState *ls) 436LexToken lj_lex_lookahead(LexState *ls)
440{ 437{
441 lua_assert(ls->lookahead == TK_eof); 438 lua_assert(ls->lookahead == TK_eof);
442 ls->lookahead = llex(ls, &ls->lookaheadval); 439 ls->lookahead = lex_scan(ls, &ls->lookaheadval);
443 return ls->lookahead; 440 return ls->lookahead;
444} 441}
445 442
446const char *lj_lex_token2str(LexState *ls, LexToken token) 443/* Convert token to string. */
444const char *lj_lex_token2str(LexState *ls, LexToken tok)
447{ 445{
448 if (token > TK_OFS) 446 if (tok > TK_OFS)
449 return tokennames[token-TK_OFS-1]; 447 return tokennames[tok-TK_OFS-1];
450 else if (!lj_char_iscntrl(token)) 448 else if (!lj_char_iscntrl(tok))
451 return lj_str_pushf(ls->L, "%c", token); 449 return lj_strfmt_pushf(ls->L, "%c", tok);
452 else 450 else
453 return lj_str_pushf(ls->L, "char(%d)", token); 451 return lj_strfmt_pushf(ls->L, "char(%d)", tok);
454} 452}
455 453
456void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...) 454/* Lexer error. */
455void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...)
457{ 456{
458 const char *tok; 457 const char *tokstr;
459 va_list argp; 458 va_list argp;
460 if (token == 0) { 459 if (tok == 0) {
461 tok = NULL; 460 tokstr = NULL;
462 } else if (token == TK_name || token == TK_string || token == TK_number) { 461 } else if (tok == TK_name || tok == TK_string || tok == TK_number) {
463 save(ls, '\0'); 462 lex_save(ls, '\0');
464 tok = ls->sb.buf; 463 tokstr = sbufB(&ls->sb);
465 } else { 464 } else {
466 tok = lj_lex_token2str(ls, token); 465 tokstr = lj_lex_token2str(ls, tok);
467 } 466 }
468 va_start(argp, em); 467 va_start(argp, em);
469 lj_err_lex(ls->L, ls->chunkname, tok, ls->linenumber, em, argp); 468 lj_err_lex(ls->L, ls->chunkname, tokstr, ls->linenumber, em, argp);
470 va_end(argp); 469 va_end(argp);
471} 470}
472 471
472/* Initialize strings for reserved words. */
473void lj_lex_init(lua_State *L) 473void lj_lex_init(lua_State *L)
474{ 474{
475 uint32_t i; 475 uint32_t i;
diff --git a/src/lj_lex.h b/src/lj_lex.h
index fe017686..acd2285d 100644
--- a/src/lj_lex.h
+++ b/src/lj_lex.h
@@ -30,7 +30,8 @@ TKDEF(TKENUM1, TKENUM2)
30 TK_RESERVED = TK_while - TK_OFS 30 TK_RESERVED = TK_while - TK_OFS
31}; 31};
32 32
33typedef int LexToken; 33typedef int LexChar; /* Lexical character. Unsigned ext. from char. */
34typedef int LexToken; /* Lexical token. */
34 35
35/* Combined bytecode ins/line. Only used during bytecode generation. */ 36/* Combined bytecode ins/line. Only used during bytecode generation. */
36typedef struct BCInsLine { 37typedef struct BCInsLine {
@@ -51,13 +52,13 @@ typedef struct VarInfo {
51typedef struct LexState { 52typedef struct LexState {
52 struct FuncState *fs; /* Current FuncState. Defined in lj_parse.c. */ 53 struct FuncState *fs; /* Current FuncState. Defined in lj_parse.c. */
53 struct lua_State *L; /* Lua state. */ 54 struct lua_State *L; /* Lua state. */
54 TValue tokenval; /* Current token value. */ 55 TValue tokval; /* Current token value. */
55 TValue lookaheadval; /* Lookahead token value. */ 56 TValue lookaheadval; /* Lookahead token value. */
56 int current; /* Current character (charint). */
57 LexToken token; /* Current token. */
58 LexToken lookahead; /* Lookahead token. */
59 MSize n; /* Bytes left in input buffer. */
60 const char *p; /* Current position in input buffer. */ 57 const char *p; /* Current position in input buffer. */
58 const char *pe; /* End of input buffer. */
59 LexChar c; /* Current character. */
60 LexToken tok; /* Current token. */
61 LexToken lookahead; /* Lookahead token. */
61 SBuf sb; /* String buffer for tokens. */ 62 SBuf sb; /* String buffer for tokens. */
62 lua_Reader rfunc; /* Reader callback. */ 63 lua_Reader rfunc; /* Reader callback. */
63 void *rdata; /* Reader callback data. */ 64 void *rdata; /* Reader callback data. */
@@ -78,8 +79,8 @@ LJ_FUNC int lj_lex_setup(lua_State *L, LexState *ls);
78LJ_FUNC void lj_lex_cleanup(lua_State *L, LexState *ls); 79LJ_FUNC void lj_lex_cleanup(lua_State *L, LexState *ls);
79LJ_FUNC void lj_lex_next(LexState *ls); 80LJ_FUNC void lj_lex_next(LexState *ls);
80LJ_FUNC LexToken lj_lex_lookahead(LexState *ls); 81LJ_FUNC LexToken lj_lex_lookahead(LexState *ls);
81LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken token); 82LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken tok);
82LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...); 83LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...);
83LJ_FUNC void lj_lex_init(lua_State *L); 84LJ_FUNC void lj_lex_init(lua_State *L);
84 85
85#endif 86#endif
diff --git a/src/lj_lib.c b/src/lj_lib.c
index 856685ee..b16d0564 100644
--- a/src/lj_lib.c
+++ b/src/lj_lib.c
@@ -18,6 +18,9 @@
18#include "lj_dispatch.h" 18#include "lj_dispatch.h"
19#include "lj_vm.h" 19#include "lj_vm.h"
20#include "lj_strscan.h" 20#include "lj_strscan.h"
21#include "lj_strfmt.h"
22#include "lj_lex.h"
23#include "lj_bcdump.h"
21#include "lj_lib.h" 24#include "lj_lib.h"
22 25
23/* -- Library initialization ---------------------------------------------- */ 26/* -- Library initialization ---------------------------------------------- */
@@ -43,6 +46,28 @@ static GCtab *lib_create_table(lua_State *L, const char *libname, int hsize)
43 return tabV(L->top-1); 46 return tabV(L->top-1);
44} 47}
45 48
49static const uint8_t *lib_read_lfunc(lua_State *L, const uint8_t *p, GCtab *tab)
50{
51 int len = *p++;
52 GCstr *name = lj_str_new(L, (const char *)p, len);
53 LexState ls;
54 GCproto *pt;
55 GCfunc *fn;
56 memset(&ls, 0, sizeof(ls));
57 ls.L = L;
58 ls.p = (const char *)(p+len);
59 ls.pe = (const char *)~(uintptr_t)0;
60 ls.c = -1;
61 ls.level = (BCDUMP_F_STRIP|(LJ_BE*BCDUMP_F_BE));
62 ls.chunkname = name;
63 pt = lj_bcread_proto(&ls);
64 pt->firstline = ~(BCLine)0;
65 fn = lj_func_newL_empty(L, pt, tabref(L->env));
66 /* NOBARRIER: See below for common barrier. */
67 setfuncV(L, lj_tab_setstr(L, tab, name), fn);
68 return (const uint8_t *)ls.p;
69}
70
46void lj_lib_register(lua_State *L, const char *libname, 71void lj_lib_register(lua_State *L, const char *libname,
47 const uint8_t *p, const lua_CFunction *cf) 72 const uint8_t *p, const lua_CFunction *cf)
48{ 73{
@@ -87,6 +112,9 @@ void lj_lib_register(lua_State *L, const char *libname,
87 ofn = fn; 112 ofn = fn;
88 } else { 113 } else {
89 switch (tag | len) { 114 switch (tag | len) {
115 case LIBINIT_LUA:
116 p = lib_read_lfunc(L, p, tab);
117 break;
90 case LIBINIT_SET: 118 case LIBINIT_SET:
91 L->top -= 2; 119 L->top -= 2;
92 if (tvisstr(L->top+1) && strV(L->top+1)->len == 0) 120 if (tvisstr(L->top+1) && strV(L->top+1)->len == 0)
@@ -120,6 +148,37 @@ void lj_lib_register(lua_State *L, const char *libname,
120 } 148 }
121} 149}
122 150
151/* Push internal function on the stack. */
152GCfunc *lj_lib_pushcc(lua_State *L, lua_CFunction f, int id, int n)
153{
154 GCfunc *fn;
155 lua_pushcclosure(L, f, n);
156 fn = funcV(L->top-1);
157 fn->c.ffid = (uint8_t)id;
158 setmref(fn->c.pc, &G(L)->bc_cfunc_int);
159 return fn;
160}
161
162void lj_lib_prereg(lua_State *L, const char *name, lua_CFunction f, GCtab *env)
163{
164 luaL_findtable(L, LUA_REGISTRYINDEX, "_PRELOAD", 4);
165 lua_pushcfunction(L, f);
166 /* NOBARRIER: The function is new (marked white). */
167 setgcref(funcV(L->top-1)->c.env, obj2gco(env));
168 lua_setfield(L, -2, name);
169 L->top--;
170}
171
172int lj_lib_postreg(lua_State *L, lua_CFunction cf, int id, const char *name)
173{
174 GCfunc *fn = lj_lib_pushcf(L, cf, id);
175 GCtab *t = tabref(curr_func(L)->c.env); /* Reference to parent table. */
176 setfuncV(L, lj_tab_setstr(L, t, lj_str_newz(L, name)), fn);
177 lj_gc_anybarriert(L, t);
178 setfuncV(L, L->top++, fn);
179 return 1;
180}
181
123/* -- Type checks --------------------------------------------------------- */ 182/* -- Type checks --------------------------------------------------------- */
124 183
125TValue *lj_lib_checkany(lua_State *L, int narg) 184TValue *lj_lib_checkany(lua_State *L, int narg)
@@ -137,7 +196,7 @@ GCstr *lj_lib_checkstr(lua_State *L, int narg)
137 if (LJ_LIKELY(tvisstr(o))) { 196 if (LJ_LIKELY(tvisstr(o))) {
138 return strV(o); 197 return strV(o);
139 } else if (tvisnumber(o)) { 198 } else if (tvisnumber(o)) {
140 GCstr *s = lj_str_fromnumber(L, o); 199 GCstr *s = lj_strfmt_number(L, o);
141 setstrV(L, o, s); 200 setstrV(L, o, s);
142 return s; 201 return s;
143 } 202 }
@@ -196,20 +255,6 @@ int32_t lj_lib_optint(lua_State *L, int narg, int32_t def)
196 return (o < L->top && !tvisnil(o)) ? lj_lib_checkint(L, narg) : def; 255 return (o < L->top && !tvisnil(o)) ? lj_lib_checkint(L, narg) : def;
197} 256}
198 257
199int32_t lj_lib_checkbit(lua_State *L, int narg)
200{
201 TValue *o = L->base + narg-1;
202 if (!(o < L->top && lj_strscan_numberobj(o)))
203 lj_err_argt(L, narg, LUA_TNUMBER);
204 if (LJ_LIKELY(tvisint(o))) {
205 return intV(o);
206 } else {
207 int32_t i = lj_num2bit(numV(o));
208 if (LJ_DUALNUM) setintV(o, i);
209 return i;
210 }
211}
212
213GCfunc *lj_lib_checkfunc(lua_State *L, int narg) 258GCfunc *lj_lib_checkfunc(lua_State *L, int narg)
214{ 259{
215 TValue *o = L->base + narg-1; 260 TValue *o = L->base + narg-1;
diff --git a/src/lj_lib.h b/src/lj_lib.h
index 9320f34f..3fa7aa17 100644
--- a/src/lj_lib.h
+++ b/src/lj_lib.h
@@ -41,15 +41,22 @@ LJ_FUNC void lj_lib_checknumber(lua_State *L, int narg);
41LJ_FUNC lua_Number lj_lib_checknum(lua_State *L, int narg); 41LJ_FUNC lua_Number lj_lib_checknum(lua_State *L, int narg);
42LJ_FUNC int32_t lj_lib_checkint(lua_State *L, int narg); 42LJ_FUNC int32_t lj_lib_checkint(lua_State *L, int narg);
43LJ_FUNC int32_t lj_lib_optint(lua_State *L, int narg, int32_t def); 43LJ_FUNC int32_t lj_lib_optint(lua_State *L, int narg, int32_t def);
44LJ_FUNC int32_t lj_lib_checkbit(lua_State *L, int narg);
45LJ_FUNC GCfunc *lj_lib_checkfunc(lua_State *L, int narg); 44LJ_FUNC GCfunc *lj_lib_checkfunc(lua_State *L, int narg);
46LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg); 45LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg);
47LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg); 46LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg);
48LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst); 47LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst);
49 48
50/* Avoid including lj_frame.h. */ 49/* Avoid including lj_frame.h. */
50#if LJ_GC64
51#define lj_lib_upvalue(L, n) \
52 (&gcval(L->base-2)->fn.c.upvalue[(n)-1])
53#elif LJ_FR2
54#define lj_lib_upvalue(L, n) \
55 (&gcref((L->base-2)->gcr)->fn.c.upvalue[(n)-1])
56#else
51#define lj_lib_upvalue(L, n) \ 57#define lj_lib_upvalue(L, n) \
52 (&gcref((L->base-1)->fr.func)->fn.c.upvalue[(n)-1]) 58 (&gcref((L->base-1)->fr.func)->fn.c.upvalue[(n)-1])
59#endif
53 60
54#if LJ_TARGET_WINDOWS 61#if LJ_TARGET_WINDOWS
55#define lj_lib_checkfpu(L) \ 62#define lj_lib_checkfpu(L) \
@@ -60,23 +67,14 @@ LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst);
60#define lj_lib_checkfpu(L) UNUSED(L) 67#define lj_lib_checkfpu(L) UNUSED(L)
61#endif 68#endif
62 69
63/* Push internal function on the stack. */ 70LJ_FUNC GCfunc *lj_lib_pushcc(lua_State *L, lua_CFunction f, int id, int n);
64static LJ_AINLINE void lj_lib_pushcc(lua_State *L, lua_CFunction f,
65 int id, int n)
66{
67 GCfunc *fn;
68 lua_pushcclosure(L, f, n);
69 fn = funcV(L->top-1);
70 fn->c.ffid = (uint8_t)id;
71 setmref(fn->c.pc, &G(L)->bc_cfunc_int);
72}
73
74#define lj_lib_pushcf(L, fn, id) (lj_lib_pushcc(L, (fn), (id), 0)) 71#define lj_lib_pushcf(L, fn, id) (lj_lib_pushcc(L, (fn), (id), 0))
75 72
76/* Library function declarations. Scanned by buildvm. */ 73/* Library function declarations. Scanned by buildvm. */
77#define LJLIB_CF(name) static int lj_cf_##name(lua_State *L) 74#define LJLIB_CF(name) static int lj_cf_##name(lua_State *L)
78#define LJLIB_ASM(name) static int lj_ffh_##name(lua_State *L) 75#define LJLIB_ASM(name) static int lj_ffh_##name(lua_State *L)
79#define LJLIB_ASM_(name) 76#define LJLIB_ASM_(name)
77#define LJLIB_LUA(name)
80#define LJLIB_SET(name) 78#define LJLIB_SET(name)
81#define LJLIB_PUSH(arg) 79#define LJLIB_PUSH(arg)
82#define LJLIB_REC(handler) 80#define LJLIB_REC(handler)
@@ -88,6 +86,10 @@ static LJ_AINLINE void lj_lib_pushcc(lua_State *L, lua_CFunction f,
88 86
89LJ_FUNC void lj_lib_register(lua_State *L, const char *libname, 87LJ_FUNC void lj_lib_register(lua_State *L, const char *libname,
90 const uint8_t *init, const lua_CFunction *cf); 88 const uint8_t *init, const lua_CFunction *cf);
89LJ_FUNC void lj_lib_prereg(lua_State *L, const char *name, lua_CFunction f,
90 GCtab *env);
91LJ_FUNC int lj_lib_postreg(lua_State *L, lua_CFunction cf, int id,
92 const char *name);
91 93
92/* Library init data tags. */ 94/* Library init data tags. */
93#define LIBINIT_LENMASK 0x3f 95#define LIBINIT_LENMASK 0x3f
@@ -96,7 +98,8 @@ LJ_FUNC void lj_lib_register(lua_State *L, const char *libname,
96#define LIBINIT_ASM 0x40 98#define LIBINIT_ASM 0x40
97#define LIBINIT_ASM_ 0x80 99#define LIBINIT_ASM_ 0x80
98#define LIBINIT_STRING 0xc0 100#define LIBINIT_STRING 0xc0
99#define LIBINIT_MAXSTR 0x39 101#define LIBINIT_MAXSTR 0x38
102#define LIBINIT_LUA 0xf9
100#define LIBINIT_SET 0xfa 103#define LIBINIT_SET 0xfa
101#define LIBINIT_NUMBER 0xfb 104#define LIBINIT_NUMBER 0xfb
102#define LIBINIT_COPY 0xfc 105#define LIBINIT_COPY 0xfc
diff --git a/src/lj_load.c b/src/lj_load.c
index ff7b8511..95a6ab0d 100644
--- a/src/lj_load.c
+++ b/src/lj_load.c
@@ -15,7 +15,7 @@
15#include "lj_obj.h" 15#include "lj_obj.h"
16#include "lj_gc.h" 16#include "lj_gc.h"
17#include "lj_err.h" 17#include "lj_err.h"
18#include "lj_str.h" 18#include "lj_buf.h"
19#include "lj_func.h" 19#include "lj_func.h"
20#include "lj_frame.h" 20#include "lj_frame.h"
21#include "lj_vm.h" 21#include "lj_vm.h"
@@ -54,7 +54,7 @@ LUA_API int lua_loadx(lua_State *L, lua_Reader reader, void *data,
54 ls.rdata = data; 54 ls.rdata = data;
55 ls.chunkarg = chunkname ? chunkname : "?"; 55 ls.chunkarg = chunkname ? chunkname : "?";
56 ls.mode = mode; 56 ls.mode = mode;
57 lj_str_initbuf(&ls.sb); 57 lj_buf_init(L, &ls.sb);
58 status = lj_vm_cpcall(L, NULL, &ls, cpparser); 58 status = lj_vm_cpcall(L, NULL, &ls, cpparser);
59 lj_lex_cleanup(L, &ls); 59 lj_lex_cleanup(L, &ls);
60 lj_gc_check(L); 60 lj_gc_check(L);
diff --git a/src/lj_meta.c b/src/lj_meta.c
index faaaf702..104ecf07 100644
--- a/src/lj_meta.c
+++ b/src/lj_meta.c
@@ -12,6 +12,7 @@
12#include "lj_obj.h" 12#include "lj_obj.h"
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_buf.h"
15#include "lj_str.h" 16#include "lj_str.h"
16#include "lj_tab.h" 17#include "lj_tab.h"
17#include "lj_meta.h" 18#include "lj_meta.h"
@@ -19,6 +20,8 @@
19#include "lj_bc.h" 20#include "lj_bc.h"
20#include "lj_vm.h" 21#include "lj_vm.h"
21#include "lj_strscan.h" 22#include "lj_strscan.h"
23#include "lj_strfmt.h"
24#include "lj_lib.h"
22 25
23/* -- Metamethod handling ------------------------------------------------- */ 26/* -- Metamethod handling ------------------------------------------------- */
24 27
@@ -77,12 +80,16 @@ int lj_meta_tailcall(lua_State *L, cTValue *tv)
77 TValue *base = L->base; 80 TValue *base = L->base;
78 TValue *top = L->top; 81 TValue *top = L->top;
79 const BCIns *pc = frame_pc(base-1); /* Preserve old PC from frame. */ 82 const BCIns *pc = frame_pc(base-1); /* Preserve old PC from frame. */
80 copyTV(L, base-1, tv); /* Replace frame with new object. */ 83 copyTV(L, base-1-LJ_FR2, tv); /* Replace frame with new object. */
81 top->u32.lo = LJ_CONT_TAILCALL; 84 if (LJ_FR2)
82 setframe_pc(top, pc); 85 (top++)->u64 = LJ_CONT_TAILCALL;
83 setframe_gc(top+1, obj2gco(L)); /* Dummy frame object. */ 86 else
84 setframe_ftsz(top+1, (int)((char *)(top+2) - (char *)base) + FRAME_CONT); 87 top->u32.lo = LJ_CONT_TAILCALL;
85 L->base = L->top = top+2; 88 setframe_pc(top++, pc);
89 if (LJ_FR2) top++;
90 setframe_gc(top, obj2gco(L), LJ_TTHREAD); /* Dummy frame object. */
91 setframe_ftsz(top, ((char *)(top+1) - (char *)base) + FRAME_CONT);
92 L->base = L->top = top+1;
86 /* 93 /*
87 ** before: [old_mo|PC] [... ...] 94 ** before: [old_mo|PC] [... ...]
88 ** ^base ^top 95 ** ^base ^top
@@ -113,11 +120,13 @@ static TValue *mmcall(lua_State *L, ASMFunction cont, cTValue *mo,
113 */ 120 */
114 TValue *top = L->top; 121 TValue *top = L->top;
115 if (curr_funcisL(L)) top = curr_topL(L); 122 if (curr_funcisL(L)) top = curr_topL(L);
116 setcont(top, cont); /* Assembler VM stores PC in upper word. */ 123 setcont(top++, cont); /* Assembler VM stores PC in upper word or FR2. */
117 copyTV(L, top+1, mo); /* Store metamethod and two arguments. */ 124 if (LJ_FR2) setnilV(top++);
118 copyTV(L, top+2, a); 125 copyTV(L, top++, mo); /* Store metamethod and two arguments. */
119 copyTV(L, top+3, b); 126 if (LJ_FR2) setnilV(top++);
120 return top+2; /* Return new base. */ 127 copyTV(L, top, a);
128 copyTV(L, top+1, b);
129 return top; /* Return new base. */
121} 130}
122 131
123/* -- C helpers for some instructions, called from assembler VM ----------- */ 132/* -- C helpers for some instructions, called from assembler VM ----------- */
@@ -225,27 +234,14 @@ TValue *lj_meta_arith(lua_State *L, TValue *ra, cTValue *rb, cTValue *rc,
225 } 234 }
226} 235}
227 236
228/* In-place coercion of a number to a string. */
229static LJ_AINLINE int tostring(lua_State *L, TValue *o)
230{
231 if (tvisstr(o)) {
232 return 1;
233 } else if (tvisnumber(o)) {
234 setstrV(L, o, lj_str_fromnumber(L, o));
235 return 1;
236 } else {
237 return 0;
238 }
239}
240
241/* Helper for CAT. Coercion, iterative concat, __concat metamethod. */ 237/* Helper for CAT. Coercion, iterative concat, __concat metamethod. */
242TValue *lj_meta_cat(lua_State *L, TValue *top, int left) 238TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
243{ 239{
244 int fromc = 0; 240 int fromc = 0;
245 if (left < 0) { left = -left; fromc = 1; } 241 if (left < 0) { left = -left; fromc = 1; }
246 do { 242 do {
247 int n = 1; 243 if (!(tvisstr(top) || tvisnumber(top)) ||
248 if (!(tvisstr(top-1) || tvisnumber(top-1)) || !tostring(L, top)) { 244 !(tvisstr(top-1) || tvisnumber(top-1))) {
249 cTValue *mo = lj_meta_lookup(L, top-1, MM_concat); 245 cTValue *mo = lj_meta_lookup(L, top-1, MM_concat);
250 if (tvisnil(mo)) { 246 if (tvisnil(mo)) {
251 mo = lj_meta_lookup(L, top, MM_concat); 247 mo = lj_meta_lookup(L, top, MM_concat);
@@ -266,13 +262,12 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
266 ** after mm: [...][CAT stack ...] <--push-- [result] 262 ** after mm: [...][CAT stack ...] <--push-- [result]
267 ** next step: [...][CAT stack .............] 263 ** next step: [...][CAT stack .............]
268 */ 264 */
269 copyTV(L, top+2, top); /* Careful with the order of stack copies! */ 265 copyTV(L, top+2*LJ_FR2+2, top); /* Carefully ordered stack copies! */
270 copyTV(L, top+1, top-1); 266 copyTV(L, top+2*LJ_FR2+1, top-1);
271 copyTV(L, top, mo); 267 copyTV(L, top+LJ_FR2, mo);
272 setcont(top-1, lj_cont_cat); 268 setcont(top-1, lj_cont_cat);
269 if (LJ_FR2) { setnilV(top); setnilV(top+2); top += 2; }
273 return top+1; /* Trigger metamethod call. */ 270 return top+1; /* Trigger metamethod call. */
274 } else if (strV(top)->len == 0) { /* Shortcut. */
275 (void)tostring(L, top-1);
276 } else { 271 } else {
277 /* Pick as many strings as possible from the top and concatenate them: 272 /* Pick as many strings as possible from the top and concatenate them:
278 ** 273 **
@@ -281,27 +276,28 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
281 ** concat: [...][CAT stack ...] [result] 276 ** concat: [...][CAT stack ...] [result]
282 ** next step: [...][CAT stack ............] 277 ** next step: [...][CAT stack ............]
283 */ 278 */
284 MSize tlen = strV(top)->len; 279 TValue *e, *o = top;
285 char *buffer; 280 uint64_t tlen = tvisstr(o) ? strV(o)->len : STRFMT_MAXBUF_NUM;
286 int i; 281 char *p, *buf;
287 for (n = 1; n <= left && tostring(L, top-n); n++) { 282 do {
288 MSize len = strV(top-n)->len; 283 o--; tlen += tvisstr(o) ? strV(o)->len : STRFMT_MAXBUF_NUM;
289 if (len >= LJ_MAX_STR - tlen) 284 } while (--left > 0 && (tvisstr(o-1) || tvisnumber(o-1)));
290 lj_err_msg(L, LJ_ERR_STROV); 285 if (tlen >= LJ_MAX_STR) lj_err_msg(L, LJ_ERR_STROV);
291 tlen += len; 286 p = buf = lj_buf_tmp(L, (MSize)tlen);
292 } 287 for (e = top, top = o; o <= e; o++) {
293 buffer = lj_str_needbuf(L, &G(L)->tmpbuf, tlen); 288 if (tvisstr(o)) {
294 n--; 289 GCstr *s = strV(o);
295 tlen = 0; 290 MSize len = s->len;
296 for (i = n; i >= 0; i--) { 291 p = lj_buf_wmem(p, strdata(s), len);
297 MSize len = strV(top-i)->len; 292 } else if (tvisint(o)) {
298 memcpy(buffer + tlen, strVdata(top-i), len); 293 p = lj_strfmt_wint(p, intV(o));
299 tlen += len; 294 } else {
295 lua_assert(tvisnum(o));
296 p = lj_strfmt_wnum(p, o);
297 }
300 } 298 }
301 setstrV(L, top-n, lj_str_new(L, buffer, tlen)); 299 setstrV(L, top, lj_str_new(L, buf, (size_t)(p-buf)));
302 } 300 }
303 left -= n;
304 top -= n;
305 } while (left >= 1); 301 } while (left >= 1);
306 if (LJ_UNLIKELY(G(L)->gc.total >= G(L)->gc.threshold)) { 302 if (LJ_UNLIKELY(G(L)->gc.total >= G(L)->gc.threshold)) {
307 if (!fromc) L->top = curr_topL(L); 303 if (!fromc) L->top = curr_topL(L);
@@ -338,12 +334,14 @@ TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne)
338 return (TValue *)(intptr_t)ne; 334 return (TValue *)(intptr_t)ne;
339 } 335 }
340 top = curr_top(L); 336 top = curr_top(L);
341 setcont(top, ne ? lj_cont_condf : lj_cont_condt); 337 setcont(top++, ne ? lj_cont_condf : lj_cont_condt);
342 copyTV(L, top+1, mo); 338 if (LJ_FR2) setnilV(top++);
339 copyTV(L, top++, mo);
340 if (LJ_FR2) setnilV(top++);
343 it = ~(uint32_t)o1->gch.gct; 341 it = ~(uint32_t)o1->gch.gct;
344 setgcV(L, top+2, o1, it); 342 setgcV(L, top, o1, it);
345 setgcV(L, top+3, o2, it); 343 setgcV(L, top+1, o2, it);
346 return top+2; /* Trigger metamethod call. */ 344 return top; /* Trigger metamethod call. */
347 } 345 }
348 return (TValue *)(intptr_t)ne; 346 return (TValue *)(intptr_t)ne;
349} 347}
@@ -366,7 +364,7 @@ TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins)
366 o2 = &mref(curr_proto(L)->k, cTValue)[bc_d(ins)]; 364 o2 = &mref(curr_proto(L)->k, cTValue)[bc_d(ins)];
367 } else { 365 } else {
368 lua_assert(op == BC_ISEQP); 366 lua_assert(op == BC_ISEQP);
369 setitype(&tv, ~bc_d(ins)); 367 setpriV(&tv, ~bc_d(ins));
370 o2 = &tv; 368 o2 = &tv;
371 } 369 }
372 mo = lj_meta_lookup(L, o1mm, MM_eq); 370 mo = lj_meta_lookup(L, o1mm, MM_eq);
@@ -423,6 +421,18 @@ TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op)
423 } 421 }
424} 422}
425 423
424/* Helper for ISTYPE and ISNUM. Implicit coercion or error. */
425void lj_meta_istype(lua_State *L, BCReg ra, BCReg tp)
426{
427 L->top = curr_topL(L);
428 ra++; tp--;
429 lua_assert(LJ_DUALNUM || tp != ~LJ_TNUMX); /* ISTYPE -> ISNUM broken. */
430 if (LJ_DUALNUM && tp == ~LJ_TNUMX) lj_lib_checkint(L, ra);
431 else if (tp == ~LJ_TNUMX+1) lj_lib_checknum(L, ra);
432 else if (tp == ~LJ_TSTR) lj_lib_checkstr(L, ra);
433 else lj_err_argtype(L, ra, lj_obj_itypename[tp]);
434}
435
426/* Helper for calls. __call metamethod. */ 436/* Helper for calls. __call metamethod. */
427void lj_meta_call(lua_State *L, TValue *func, TValue *top) 437void lj_meta_call(lua_State *L, TValue *func, TValue *top)
428{ 438{
@@ -430,7 +440,8 @@ void lj_meta_call(lua_State *L, TValue *func, TValue *top)
430 TValue *p; 440 TValue *p;
431 if (!tvisfunc(mo)) 441 if (!tvisfunc(mo))
432 lj_err_optype_call(L, func); 442 lj_err_optype_call(L, func);
433 for (p = top; p > func; p--) copyTV(L, p, p-1); 443 for (p = top; p > func+2*LJ_FR2; p--) copyTV(L, p, p-1);
444 if (LJ_FR2) copyTV(L, func+2, func);
434 copyTV(L, func, mo); 445 copyTV(L, func, mo);
435} 446}
436 447
diff --git a/src/lj_meta.h b/src/lj_meta.h
index 2c1ad0dd..7f716333 100644
--- a/src/lj_meta.h
+++ b/src/lj_meta.h
@@ -31,6 +31,7 @@ LJ_FUNCA TValue * LJ_FASTCALL lj_meta_len(lua_State *L, cTValue *o);
31LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne); 31LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne);
32LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins); 32LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins);
33LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op); 33LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op);
34LJ_FUNCA void lj_meta_istype(lua_State *L, BCReg ra, BCReg tp);
34LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top); 35LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top);
35LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o); 36LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o);
36 37
diff --git a/src/lj_obj.c b/src/lj_obj.c
index 7fab714e..b78d2c8d 100644
--- a/src/lj_obj.c
+++ b/src/lj_obj.c
@@ -20,7 +20,7 @@ LJ_DATADEF const char *const lj_obj_itypename[] = { /* ORDER LJ_T */
20}; 20};
21 21
22/* Compare two objects without calling metamethods. */ 22/* Compare two objects without calling metamethods. */
23int lj_obj_equal(cTValue *o1, cTValue *o2) 23int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2)
24{ 24{
25 if (itype(o1) == itype(o2)) { 25 if (itype(o1) == itype(o2)) {
26 if (tvispri(o1)) 26 if (tvispri(o1))
@@ -33,3 +33,18 @@ int lj_obj_equal(cTValue *o1, cTValue *o2)
33 return numberVnum(o1) == numberVnum(o2); 33 return numberVnum(o1) == numberVnum(o2);
34} 34}
35 35
36/* Return pointer to object or its object data. */
37const void * LJ_FASTCALL lj_obj_ptr(cTValue *o)
38{
39 if (tvisudata(o))
40 return uddata(udataV(o));
41 else if (tvislightud(o))
42 return lightudV(o);
43 else if (LJ_HASFFI && tviscdata(o))
44 return cdataptr(cdataV(o));
45 else if (tvisgcv(o))
46 return gcV(o);
47 else
48 return NULL;
49}
50
diff --git a/src/lj_obj.h b/src/lj_obj.h
index 6e8381cb..74ed59bc 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -15,42 +15,75 @@
15 15
16/* -- Memory references (32 bit address space) ---------------------------- */ 16/* -- Memory references (32 bit address space) ---------------------------- */
17 17
18/* Memory size. */ 18/* Memory and GC object sizes. */
19typedef uint32_t MSize; 19typedef uint32_t MSize;
20#if LJ_GC64
21typedef uint64_t GCSize;
22#else
23typedef uint32_t GCSize;
24#endif
20 25
21/* Memory reference */ 26/* Memory reference */
22typedef struct MRef { 27typedef struct MRef {
28#if LJ_GC64
29 uint64_t ptr64; /* True 64 bit pointer. */
30#else
23 uint32_t ptr32; /* Pseudo 32 bit pointer. */ 31 uint32_t ptr32; /* Pseudo 32 bit pointer. */
32#endif
24} MRef; 33} MRef;
25 34
35#if LJ_GC64
36#define mref(r, t) ((t *)(void *)(r).ptr64)
37
38#define setmref(r, p) ((r).ptr64 = (uint64_t)(void *)(p))
39#define setmrefr(r, v) ((r).ptr64 = (v).ptr64)
40#else
26#define mref(r, t) ((t *)(void *)(uintptr_t)(r).ptr32) 41#define mref(r, t) ((t *)(void *)(uintptr_t)(r).ptr32)
27 42
28#define setmref(r, p) ((r).ptr32 = (uint32_t)(uintptr_t)(void *)(p)) 43#define setmref(r, p) ((r).ptr32 = (uint32_t)(uintptr_t)(void *)(p))
29#define setmrefr(r, v) ((r).ptr32 = (v).ptr32) 44#define setmrefr(r, v) ((r).ptr32 = (v).ptr32)
45#endif
30 46
31/* -- GC object references (32 bit address space) ------------------------- */ 47/* -- GC object references (32 bit address space) ------------------------- */
32 48
33/* GCobj reference */ 49/* GCobj reference */
34typedef struct GCRef { 50typedef struct GCRef {
51#if LJ_GC64
52 uint64_t gcptr64; /* True 64 bit pointer. */
53#else
35 uint32_t gcptr32; /* Pseudo 32 bit pointer. */ 54 uint32_t gcptr32; /* Pseudo 32 bit pointer. */
55#endif
36} GCRef; 56} GCRef;
37 57
38/* Common GC header for all collectable objects. */ 58/* Common GC header for all collectable objects. */
39#define GCHeader GCRef nextgc; uint8_t marked; uint8_t gct 59#define GCHeader GCRef nextgc; uint8_t marked; uint8_t gct
40/* This occupies 6 bytes, so use the next 2 bytes for non-32 bit fields. */ 60/* This occupies 6 bytes, so use the next 2 bytes for non-32 bit fields. */
41 61
62#if LJ_GC64
63#define gcref(r) ((GCobj *)(r).gcptr64)
64#define gcrefp(r, t) ((t *)(void *)(r).gcptr64)
65#define gcrefu(r) ((r).gcptr64)
66#define gcrefeq(r1, r2) ((r1).gcptr64 == (r2).gcptr64)
67
68#define setgcref(r, gc) ((r).gcptr64 = (uint64_t)&(gc)->gch)
69#define setgcreft(r, gc, it) \
70 (r).gcptr64 = (uint64_t)&(gc)->gch | (((uint64_t)(it)) << 47)
71#define setgcrefp(r, p) ((r).gcptr64 = (uint64_t)(p))
72#define setgcrefnull(r) ((r).gcptr64 = 0)
73#define setgcrefr(r, v) ((r).gcptr64 = (v).gcptr64)
74#else
42#define gcref(r) ((GCobj *)(uintptr_t)(r).gcptr32) 75#define gcref(r) ((GCobj *)(uintptr_t)(r).gcptr32)
43#define gcrefp(r, t) ((t *)(void *)(uintptr_t)(r).gcptr32) 76#define gcrefp(r, t) ((t *)(void *)(uintptr_t)(r).gcptr32)
44#define gcrefu(r) ((r).gcptr32) 77#define gcrefu(r) ((r).gcptr32)
45#define gcrefi(r) ((int32_t)(r).gcptr32)
46#define gcrefeq(r1, r2) ((r1).gcptr32 == (r2).gcptr32) 78#define gcrefeq(r1, r2) ((r1).gcptr32 == (r2).gcptr32)
47#define gcnext(gc) (gcref((gc)->gch.nextgc))
48 79
49#define setgcref(r, gc) ((r).gcptr32 = (uint32_t)(uintptr_t)&(gc)->gch) 80#define setgcref(r, gc) ((r).gcptr32 = (uint32_t)(uintptr_t)&(gc)->gch)
50#define setgcrefi(r, i) ((r).gcptr32 = (uint32_t)(i))
51#define setgcrefp(r, p) ((r).gcptr32 = (uint32_t)(uintptr_t)(p)) 81#define setgcrefp(r, p) ((r).gcptr32 = (uint32_t)(uintptr_t)(p))
52#define setgcrefnull(r) ((r).gcptr32 = 0) 82#define setgcrefnull(r) ((r).gcptr32 = 0)
53#define setgcrefr(r, v) ((r).gcptr32 = (v).gcptr32) 83#define setgcrefr(r, v) ((r).gcptr32 = (v).gcptr32)
84#endif
85
86#define gcnext(gc) (gcref((gc)->gch.nextgc))
54 87
55/* IMPORTANT NOTE: 88/* IMPORTANT NOTE:
56** 89**
@@ -119,11 +152,12 @@ typedef int32_t BCLine; /* Bytecode line number. */
119/* Internal assembler functions. Never call these directly from C. */ 152/* Internal assembler functions. Never call these directly from C. */
120typedef void (*ASMFunction)(void); 153typedef void (*ASMFunction)(void);
121 154
122/* Resizable string buffer. Need this here, details in lj_str.h. */ 155/* Resizable string buffer. Need this here, details in lj_buf.h. */
123typedef struct SBuf { 156typedef struct SBuf {
124 char *buf; /* String buffer base. */ 157 MRef p; /* String buffer pointer. */
125 MSize n; /* String buffer length. */ 158 MRef e; /* String buffer end pointer. */
126 MSize sz; /* String buffer size. */ 159 MRef b; /* String buffer base. */
160 MRef L; /* lua_State, used for buffer resizing. */
127} SBuf; 161} SBuf;
128 162
129/* -- Tags and values ----------------------------------------------------- */ 163/* -- Tags and values ----------------------------------------------------- */
@@ -131,13 +165,23 @@ typedef struct SBuf {
131/* Frame link. */ 165/* Frame link. */
132typedef union { 166typedef union {
133 int32_t ftsz; /* Frame type and size of previous frame. */ 167 int32_t ftsz; /* Frame type and size of previous frame. */
134 MRef pcr; /* Overlaps PC for Lua frames. */ 168 MRef pcr; /* Or PC for Lua frames. */
135} FrameLink; 169} FrameLink;
136 170
137/* Tagged value. */ 171/* Tagged value. */
138typedef LJ_ALIGN(8) union TValue { 172typedef LJ_ALIGN(8) union TValue {
139 uint64_t u64; /* 64 bit pattern overlaps number. */ 173 uint64_t u64; /* 64 bit pattern overlaps number. */
140 lua_Number n; /* Number object overlaps split tag/value object. */ 174 lua_Number n; /* Number object overlaps split tag/value object. */
175#if LJ_GC64
176 GCRef gcr; /* GCobj reference with tag. */
177 int64_t it64;
178 struct {
179 LJ_ENDIAN_LOHI(
180 int32_t i; /* Integer value. */
181 , uint32_t it; /* Internal object tag. Must overlap MSW of number. */
182 )
183 };
184#else
141 struct { 185 struct {
142 LJ_ENDIAN_LOHI( 186 LJ_ENDIAN_LOHI(
143 union { 187 union {
@@ -147,12 +191,17 @@ typedef LJ_ALIGN(8) union TValue {
147 , uint32_t it; /* Internal object tag. Must overlap MSW of number. */ 191 , uint32_t it; /* Internal object tag. Must overlap MSW of number. */
148 ) 192 )
149 }; 193 };
194#endif
195#if LJ_FR2
196 int64_t ftsz; /* Frame type and size of previous frame, or PC. */
197#else
150 struct { 198 struct {
151 LJ_ENDIAN_LOHI( 199 LJ_ENDIAN_LOHI(
152 GCRef func; /* Function for next frame (or dummy L). */ 200 GCRef func; /* Function for next frame (or dummy L). */
153 , FrameLink tp; /* Link to previous frame. */ 201 , FrameLink tp; /* Link to previous frame. */
154 ) 202 )
155 } fr; 203 } fr;
204#endif
156 struct { 205 struct {
157 LJ_ENDIAN_LOHI( 206 LJ_ENDIAN_LOHI(
158 uint32_t lo; /* Lower 32 bits of number. */ 207 uint32_t lo; /* Lower 32 bits of number. */
@@ -172,6 +221,8 @@ typedef const TValue cTValue;
172 221
173/* Internal object tags. 222/* Internal object tags.
174** 223**
224** Format for 32 bit GC references (!LJ_GC64):
225**
175** Internal tags overlap the MSW of a number object (must be a double). 226** Internal tags overlap the MSW of a number object (must be a double).
176** Interpreted as a double these are special NaNs. The FPU only generates 227** Interpreted as a double these are special NaNs. The FPU only generates
177** one type of NaN (0xfff8_0000_0000_0000). So MSWs > 0xfff80000 are available 228** one type of NaN (0xfff8_0000_0000_0000). So MSWs > 0xfff80000 are available
@@ -186,6 +237,18 @@ typedef const TValue cTValue;
186** int (LJ_DUALNUM)| itype | int | 237** int (LJ_DUALNUM)| itype | int |
187** number -------double------ 238** number -------double------
188** 239**
240** Format for 64 bit GC references (LJ_GC64):
241**
242** The upper 13 bits must be 1 (0xfff8...) for a special NaN. The next
243** 4 bits hold the internal tag. The lowest 47 bits either hold a pointer,
244** a zero-extended 32 bit integer or all bits set to 1 for primitive types.
245**
246** ------MSW------.------LSW------
247** primitive types |1..1|itype|1..................1|
248** GC objects/lightud |1..1|itype|-------GCRef--------|
249** int (LJ_DUALNUM) |1..1|itype|0..0|-----int-------|
250** number ------------double-------------
251**
189** ORDER LJ_T 252** ORDER LJ_T
190** Primitive types nil/false/true must be first, lightuserdata next. 253** Primitive types nil/false/true must be first, lightuserdata next.
191** GC objects are at the end, table/userdata must be lowest. 254** GC objects are at the end, table/userdata must be lowest.
@@ -208,7 +271,7 @@ typedef const TValue cTValue;
208#define LJ_TNUMX (~13u) 271#define LJ_TNUMX (~13u)
209 272
210/* Integers have itype == LJ_TISNUM doubles have itype < LJ_TISNUM */ 273/* Integers have itype == LJ_TISNUM doubles have itype < LJ_TISNUM */
211#if LJ_64 274#if LJ_64 && !LJ_GC64
212#define LJ_TISNUM 0xfffeffffu 275#define LJ_TISNUM 0xfffeffffu
213#else 276#else
214#define LJ_TISNUM LJ_TNUMX 277#define LJ_TISNUM LJ_TNUMX
@@ -218,6 +281,10 @@ typedef const TValue cTValue;
218#define LJ_TISGCV (LJ_TSTR+1) 281#define LJ_TISGCV (LJ_TSTR+1)
219#define LJ_TISTABUD LJ_TTAB 282#define LJ_TISTABUD LJ_TTAB
220 283
284#if LJ_GC64
285#define LJ_GCVMASK (((uint64_t)1 << 47) - 1)
286#endif
287
221/* -- String object ------------------------------------------------------- */ 288/* -- String object ------------------------------------------------------- */
222 289
223/* String object header. String payload follows. */ 290/* String object header. String payload follows. */
@@ -291,6 +358,9 @@ typedef struct GCproto {
291 uint8_t numparams; /* Number of parameters. */ 358 uint8_t numparams; /* Number of parameters. */
292 uint8_t framesize; /* Fixed frame size. */ 359 uint8_t framesize; /* Fixed frame size. */
293 MSize sizebc; /* Number of bytecode instructions. */ 360 MSize sizebc; /* Number of bytecode instructions. */
361#if LJ_GC64
362 uint32_t unused_gc64;
363#endif
294 GCRef gclist; 364 GCRef gclist;
295 MRef k; /* Split constant array (points to the middle). */ 365 MRef k; /* Split constant array (points to the middle). */
296 MRef uv; /* Upvalue list. local slot|0x8000 or parent uv idx. */ 366 MRef uv; /* Upvalue list. local slot|0x8000 or parent uv idx. */
@@ -402,7 +472,9 @@ typedef struct Node {
402 TValue val; /* Value object. Must be first field. */ 472 TValue val; /* Value object. Must be first field. */
403 TValue key; /* Key object. */ 473 TValue key; /* Key object. */
404 MRef next; /* Hash chain. */ 474 MRef next; /* Hash chain. */
475#if !LJ_GC64
405 MRef freetop; /* Top of free elements (stored in t->node[0]). */ 476 MRef freetop; /* Top of free elements (stored in t->node[0]). */
477#endif
406} Node; 478} Node;
407 479
408LJ_STATIC_ASSERT(offsetof(Node, val) == 0); 480LJ_STATIC_ASSERT(offsetof(Node, val) == 0);
@@ -417,12 +489,22 @@ typedef struct GCtab {
417 MRef node; /* Hash part. */ 489 MRef node; /* Hash part. */
418 uint32_t asize; /* Size of array part (keys [0, asize-1]). */ 490 uint32_t asize; /* Size of array part (keys [0, asize-1]). */
419 uint32_t hmask; /* Hash part mask (size of hash part - 1). */ 491 uint32_t hmask; /* Hash part mask (size of hash part - 1). */
492#if LJ_GC64
493 MRef freetop; /* Top of free elements. */
494#endif
420} GCtab; 495} GCtab;
421 496
422#define sizetabcolo(n) ((n)*sizeof(TValue) + sizeof(GCtab)) 497#define sizetabcolo(n) ((n)*sizeof(TValue) + sizeof(GCtab))
423#define tabref(r) (&gcref((r))->tab) 498#define tabref(r) (&gcref((r))->tab)
424#define noderef(r) (mref((r), Node)) 499#define noderef(r) (mref((r), Node))
425#define nextnode(n) (mref((n)->next, Node)) 500#define nextnode(n) (mref((n)->next, Node))
501#if LJ_GC64
502#define getfreetop(t, n) (noderef((t)->freetop))
503#define setfreetop(t, n, v) (setmref((t)->freetop, (v)))
504#else
505#define getfreetop(t, n) (noderef((n)->freetop))
506#define setfreetop(t, n, v) (setmref((n)->freetop, (v)))
507#endif
426 508
427/* -- State objects ------------------------------------------------------- */ 509/* -- State objects ------------------------------------------------------- */
428 510
@@ -489,8 +571,8 @@ typedef enum {
489#define mmname_str(g, mm) (strref((g)->gcroot[GCROOT_MMNAME+(mm)])) 571#define mmname_str(g, mm) (strref((g)->gcroot[GCROOT_MMNAME+(mm)]))
490 572
491typedef struct GCState { 573typedef struct GCState {
492 MSize total; /* Memory currently allocated. */ 574 GCSize total; /* Memory currently allocated. */
493 MSize threshold; /* Memory threshold. */ 575 GCSize threshold; /* Memory threshold. */
494 uint8_t currentwhite; /* Current white color. */ 576 uint8_t currentwhite; /* Current white color. */
495 uint8_t state; /* GC state. */ 577 uint8_t state; /* GC state. */
496 uint8_t nocdatafin; /* No cdata finalizer called. */ 578 uint8_t nocdatafin; /* No cdata finalizer called. */
@@ -502,9 +584,9 @@ typedef struct GCState {
502 GCRef grayagain; /* List of objects for atomic traversal. */ 584 GCRef grayagain; /* List of objects for atomic traversal. */
503 GCRef weak; /* List of weak tables (to be cleared). */ 585 GCRef weak; /* List of weak tables (to be cleared). */
504 GCRef mmudata; /* List of userdata (to be finalized). */ 586 GCRef mmudata; /* List of userdata (to be finalized). */
587 GCSize debt; /* Debt (how much GC is behind schedule). */
588 GCSize estimate; /* Estimate of memory actually in use. */
505 MSize stepmul; /* Incremental GC step granularity. */ 589 MSize stepmul; /* Incremental GC step granularity. */
506 MSize debt; /* Debt (how much GC is behind schedule). */
507 MSize estimate; /* Estimate of memory actually in use. */
508 MSize pause; /* Pause between successive GC cycles. */ 590 MSize pause; /* Pause between successive GC cycles. */
509} GCState; 591} GCState;
510 592
@@ -516,8 +598,8 @@ typedef struct global_State {
516 lua_Alloc allocf; /* Memory allocator. */ 598 lua_Alloc allocf; /* Memory allocator. */
517 void *allocd; /* Memory allocator data. */ 599 void *allocd; /* Memory allocator data. */
518 GCState gc; /* Garbage collector. */ 600 GCState gc; /* Garbage collector. */
519 SBuf tmpbuf; /* Temporary buffer for string concatenation. */ 601 volatile int32_t vmstate; /* VM state or current JIT code trace number. */
520 Node nilnode; /* Fallback 1-element hash part (nil key and value). */ 602 SBuf tmpbuf; /* Temporary string buffer. */
521 GCstr strempty; /* Empty string. */ 603 GCstr strempty; /* Empty string. */
522 uint8_t stremptyz; /* Zero terminator of empty string. */ 604 uint8_t stremptyz; /* Zero terminator of empty string. */
523 uint8_t hookmask; /* Hook mask. */ 605 uint8_t hookmask; /* Hook mask. */
@@ -526,17 +608,17 @@ typedef struct global_State {
526 GCRef mainthref; /* Link to main thread. */ 608 GCRef mainthref; /* Link to main thread. */
527 TValue registrytv; /* Anchor for registry. */ 609 TValue registrytv; /* Anchor for registry. */
528 TValue tmptv, tmptv2; /* Temporary TValues. */ 610 TValue tmptv, tmptv2; /* Temporary TValues. */
611 Node nilnode; /* Fallback 1-element hash part (nil key and value). */
529 GCupval uvhead; /* Head of double-linked list of all open upvalues. */ 612 GCupval uvhead; /* Head of double-linked list of all open upvalues. */
530 int32_t hookcount; /* Instruction hook countdown. */ 613 int32_t hookcount; /* Instruction hook countdown. */
531 int32_t hookcstart; /* Start count for instruction hook counter. */ 614 int32_t hookcstart; /* Start count for instruction hook counter. */
532 lua_Hook hookf; /* Hook function. */ 615 lua_Hook hookf; /* Hook function. */
533 lua_CFunction wrapf; /* Wrapper for C function calls. */ 616 lua_CFunction wrapf; /* Wrapper for C function calls. */
534 lua_CFunction panic; /* Called as a last resort for errors. */ 617 lua_CFunction panic; /* Called as a last resort for errors. */
535 volatile int32_t vmstate; /* VM state or current JIT code trace number. */
536 BCIns bc_cfunc_int; /* Bytecode for internal C function calls. */ 618 BCIns bc_cfunc_int; /* Bytecode for internal C function calls. */
537 BCIns bc_cfunc_ext; /* Bytecode for external C function calls. */ 619 BCIns bc_cfunc_ext; /* Bytecode for external C function calls. */
538 GCRef jit_L; /* Current JIT code lua_State or NULL. */ 620 GCRef cur_L; /* Currently executing lua_State. */
539 MRef jit_base; /* Current JIT code L->base. */ 621 MRef jit_base; /* Current JIT code L->base or NULL. */
540 MRef ctype_state; /* Pointer to C type state. */ 622 MRef ctype_state; /* Pointer to C type state. */
541 GCRef gcroot[GCROOT_MAX]; /* GC roots. */ 623 GCRef gcroot[GCROOT_MAX]; /* GC roots. */
542} global_State; 624} global_State;
@@ -553,6 +635,7 @@ typedef struct global_State {
553#define HOOK_ACTIVE_SHIFT 4 635#define HOOK_ACTIVE_SHIFT 4
554#define HOOK_VMEVENT 0x20 636#define HOOK_VMEVENT 0x20
555#define HOOK_GC 0x40 637#define HOOK_GC 0x40
638#define HOOK_PROFILE 0x80
556#define hook_active(g) ((g)->hookmask & HOOK_ACTIVE) 639#define hook_active(g) ((g)->hookmask & HOOK_ACTIVE)
557#define hook_enter(g) ((g)->hookmask |= HOOK_ACTIVE) 640#define hook_enter(g) ((g)->hookmask |= HOOK_ACTIVE)
558#define hook_entergc(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_GC)) 641#define hook_entergc(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_GC))
@@ -583,7 +666,13 @@ struct lua_State {
583#define registry(L) (&G(L)->registrytv) 666#define registry(L) (&G(L)->registrytv)
584 667
585/* Macros to access the currently executing (Lua) function. */ 668/* Macros to access the currently executing (Lua) function. */
669#if LJ_GC64
670#define curr_func(L) (&gcval(L->base-2)->fn)
671#elif LJ_FR2
672#define curr_func(L) (&gcref((L->base-2)->gcr)->fn)
673#else
586#define curr_func(L) (&gcref((L->base-1)->fr.func)->fn) 674#define curr_func(L) (&gcref((L->base-1)->fr.func)->fn)
675#endif
587#define curr_funcisL(L) (isluafunc(curr_func(L))) 676#define curr_funcisL(L) (isluafunc(curr_func(L)))
588#define curr_proto(L) (funcproto(curr_func(L))) 677#define curr_proto(L) (funcproto(curr_func(L)))
589#define curr_topL(L) (L->base + curr_proto(L)->framesize) 678#define curr_topL(L) (L->base + curr_proto(L)->framesize)
@@ -647,12 +736,17 @@ typedef union GCobj {
647#endif 736#endif
648 737
649/* Macros to test types. */ 738/* Macros to test types. */
739#if LJ_GC64
740#define itype(o) ((uint32_t)((o)->it64 >> 47))
741#define tvisnil(o) ((o)->it64 == -1)
742#else
650#define itype(o) ((o)->it) 743#define itype(o) ((o)->it)
651#define tvisnil(o) (itype(o) == LJ_TNIL) 744#define tvisnil(o) (itype(o) == LJ_TNIL)
745#endif
652#define tvisfalse(o) (itype(o) == LJ_TFALSE) 746#define tvisfalse(o) (itype(o) == LJ_TFALSE)
653#define tvistrue(o) (itype(o) == LJ_TTRUE) 747#define tvistrue(o) (itype(o) == LJ_TTRUE)
654#define tvisbool(o) (tvisfalse(o) || tvistrue(o)) 748#define tvisbool(o) (tvisfalse(o) || tvistrue(o))
655#if LJ_64 749#if LJ_64 && !LJ_GC64
656#define tvislightud(o) (((int32_t)itype(o) >> 15) == -2) 750#define tvislightud(o) (((int32_t)itype(o) >> 15) == -2)
657#else 751#else
658#define tvislightud(o) (itype(o) == LJ_TLIGHTUD) 752#define tvislightud(o) (itype(o) == LJ_TLIGHTUD)
@@ -686,7 +780,7 @@ typedef union GCobj {
686#define rawnumequal(o1, o2) ((o1)->u64 == (o2)->u64) 780#define rawnumequal(o1, o2) ((o1)->u64 == (o2)->u64)
687 781
688/* Macros to convert type ids. */ 782/* Macros to convert type ids. */
689#if LJ_64 783#if LJ_64 && !LJ_GC64
690#define itypemap(o) \ 784#define itypemap(o) \
691 (tvisnumber(o) ? ~LJ_TNUMX : tvislightud(o) ? ~LJ_TLIGHTUD : ~itype(o)) 785 (tvisnumber(o) ? ~LJ_TNUMX : tvislightud(o) ? ~LJ_TLIGHTUD : ~itype(o))
692#else 786#else
@@ -694,8 +788,12 @@ typedef union GCobj {
694#endif 788#endif
695 789
696/* Macros to get tagged values. */ 790/* Macros to get tagged values. */
791#if LJ_GC64
792#define gcval(o) ((GCobj *)(gcrefu((o)->gcr) & LJ_GCVMASK))
793#else
697#define gcval(o) (gcref((o)->gcr)) 794#define gcval(o) (gcref((o)->gcr))
698#define boolV(o) check_exp(tvisbool(o), (LJ_TFALSE - (o)->it)) 795#endif
796#define boolV(o) check_exp(tvisbool(o), (LJ_TFALSE - itype(o)))
699#if LJ_64 797#if LJ_64
700#define lightudV(o) \ 798#define lightudV(o) \
701 check_exp(tvislightud(o), (void *)((o)->u64 & U64x(00007fff,ffffffff))) 799 check_exp(tvislightud(o), (void *)((o)->u64 & U64x(00007fff,ffffffff)))
@@ -714,13 +812,23 @@ typedef union GCobj {
714#define intV(o) check_exp(tvisint(o), (int32_t)(o)->i) 812#define intV(o) check_exp(tvisint(o), (int32_t)(o)->i)
715 813
716/* Macros to set tagged values. */ 814/* Macros to set tagged values. */
815#if LJ_GC64
816#define setitype(o, i) ((o)->it = ((i) << 15))
817#define setnilV(o) ((o)->it64 = -1)
818#define setpriV(o, x) ((o)->it64 = (int64_t)~((uint64_t)~(x)<<47))
819#define setboolV(o, x) ((o)->it64 = (int64_t)~((uint64_t)((x)+1)<<47))
820#else
717#define setitype(o, i) ((o)->it = (i)) 821#define setitype(o, i) ((o)->it = (i))
718#define setnilV(o) ((o)->it = LJ_TNIL) 822#define setnilV(o) ((o)->it = LJ_TNIL)
719#define setboolV(o, x) ((o)->it = LJ_TFALSE-(uint32_t)(x)) 823#define setboolV(o, x) ((o)->it = LJ_TFALSE-(uint32_t)(x))
824#define setpriV(o, i) (setitype((o), (i)))
825#endif
720 826
721static LJ_AINLINE void setlightudV(TValue *o, void *p) 827static LJ_AINLINE void setlightudV(TValue *o, void *p)
722{ 828{
723#if LJ_64 829#if LJ_GC64
830 o->u64 = (uint64_t)p | (((uint64_t)LJ_TLIGHTUD) << 47);
831#elif LJ_64
724 o->u64 = (uint64_t)p | (((uint64_t)0xffff) << 48); 832 o->u64 = (uint64_t)p | (((uint64_t)0xffff) << 48);
725#else 833#else
726 setgcrefp(o->gcr, p); setitype(o, LJ_TLIGHTUD); 834 setgcrefp(o->gcr, p); setitype(o, LJ_TLIGHTUD);
@@ -730,10 +838,16 @@ static LJ_AINLINE void setlightudV(TValue *o, void *p)
730#if LJ_64 838#if LJ_64
731#define checklightudptr(L, p) \ 839#define checklightudptr(L, p) \
732 (((uint64_t)(p) >> 47) ? (lj_err_msg(L, LJ_ERR_BADLU), NULL) : (p)) 840 (((uint64_t)(p) >> 47) ? (lj_err_msg(L, LJ_ERR_BADLU), NULL) : (p))
841#else
842#define checklightudptr(L, p) (p)
843#endif
844
845#if LJ_FR2
846#define setcont(o, f) ((o)->u64 = (uint64_t)(uintptr_t)(void *)(f))
847#elif LJ_64
733#define setcont(o, f) \ 848#define setcont(o, f) \
734 ((o)->u64 = (uint64_t)(void *)(f) - (uint64_t)lj_vm_asm_begin) 849 ((o)->u64 = (uint64_t)(void *)(f) - (uint64_t)lj_vm_asm_begin)
735#else 850#else
736#define checklightudptr(L, p) (p)
737#define setcont(o, f) setlightudV((o), (void *)(f)) 851#define setcont(o, f) setlightudV((o), (void *)(f))
738#endif 852#endif
739 853
@@ -741,9 +855,18 @@ static LJ_AINLINE void setlightudV(TValue *o, void *p)
741 UNUSED(L), lua_assert(!tvisgcv(o) || \ 855 UNUSED(L), lua_assert(!tvisgcv(o) || \
742 ((~itype(o) == gcval(o)->gch.gct) && !isdead(G(L), gcval(o)))) 856 ((~itype(o) == gcval(o)->gch.gct) && !isdead(G(L), gcval(o))))
743 857
744static LJ_AINLINE void setgcV(lua_State *L, TValue *o, GCobj *v, uint32_t itype) 858static LJ_AINLINE void setgcVraw(TValue *o, GCobj *v, uint32_t itype)
745{ 859{
746 setgcref(o->gcr, v); setitype(o, itype); tvchecklive(L, o); 860#if LJ_GC64
861 setgcreft(o->gcr, v, itype);
862#else
863 setgcref(o->gcr, v); setitype(o, itype);
864#endif
865}
866
867static LJ_AINLINE void setgcV(lua_State *L, TValue *o, GCobj *v, uint32_t it)
868{
869 setgcVraw(o, v, it); tvchecklive(L, o);
747} 870}
748 871
749#define define_setV(name, type, tag) \ 872#define define_setV(name, type, tag) \
@@ -810,11 +933,7 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
810#endif 933#endif
811} 934}
812 935
813#if LJ_TARGET_X86 && !defined(__SSE2__)
814#define lj_num2int(n) lj_num2bit((n))
815#else
816#define lj_num2int(n) ((int32_t)(n)) 936#define lj_num2int(n) ((int32_t)(n))
817#endif
818 937
819static LJ_AINLINE uint64_t lj_num2u64(lua_Number n) 938static LJ_AINLINE uint64_t lj_num2u64(lua_Number n)
820{ 939{
@@ -851,6 +970,7 @@ LJ_DATA const char *const lj_obj_itypename[~LJ_TNUMX+1];
851#define lj_typename(o) (lj_obj_itypename[itypemap(o)]) 970#define lj_typename(o) (lj_obj_itypename[itypemap(o)])
852 971
853/* Compare two objects without calling metamethods. */ 972/* Compare two objects without calling metamethods. */
854LJ_FUNC int lj_obj_equal(cTValue *o1, cTValue *o2); 973LJ_FUNC int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2);
974LJ_FUNC const void * LJ_FASTCALL lj_obj_ptr(cTValue *o);
855 975
856#endif 976#endif
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index d00fdd56..f809a991 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -14,18 +14,21 @@
14 14
15#if LJ_HASJIT 15#if LJ_HASJIT
16 16
17#include "lj_buf.h"
17#include "lj_str.h" 18#include "lj_str.h"
18#include "lj_tab.h" 19#include "lj_tab.h"
19#include "lj_ir.h" 20#include "lj_ir.h"
20#include "lj_jit.h" 21#include "lj_jit.h"
22#include "lj_ircall.h"
21#include "lj_iropt.h" 23#include "lj_iropt.h"
22#include "lj_trace.h" 24#include "lj_trace.h"
23#if LJ_HASFFI 25#if LJ_HASFFI
24#include "lj_ctype.h" 26#include "lj_ctype.h"
25#endif
26#include "lj_carith.h" 27#include "lj_carith.h"
28#endif
27#include "lj_vm.h" 29#include "lj_vm.h"
28#include "lj_strscan.h" 30#include "lj_strscan.h"
31#include "lj_strfmt.h"
29 32
30/* Here's a short description how the FOLD engine processes instructions: 33/* Here's a short description how the FOLD engine processes instructions:
31** 34**
@@ -155,13 +158,14 @@ typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
155 158
156/* Barrier to prevent folding across a GC step. 159/* Barrier to prevent folding across a GC step.
157** GC steps can only happen at the head of a trace and at LOOP. 160** GC steps can only happen at the head of a trace and at LOOP.
158** And the GC is only driven forward if there is at least one allocation. 161** And the GC is only driven forward if there's at least one allocation.
159*/ 162*/
160#define gcstep_barrier(J, ref) \ 163#define gcstep_barrier(J, ref) \
161 ((ref) < J->chain[IR_LOOP] && \ 164 ((ref) < J->chain[IR_LOOP] && \
162 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \ 165 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \
163 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \ 166 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
164 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR])) 167 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || \
168 J->chain[IR_BUFSTR] || J->chain[IR_TOSTR] || J->chain[IR_CALLA]))
165 169
166/* -- Constant folding for FP numbers ------------------------------------- */ 170/* -- Constant folding for FP numbers ------------------------------------- */
167 171
@@ -336,11 +340,9 @@ LJFOLDF(kfold_intcomp0)
336static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) 340static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op)
337{ 341{
338 switch (op) { 342 switch (op) {
339#if LJ_64 || LJ_HASFFI 343#if LJ_HASFFI
340 case IR_ADD: k1 += k2; break; 344 case IR_ADD: k1 += k2; break;
341 case IR_SUB: k1 -= k2; break; 345 case IR_SUB: k1 -= k2; break;
342#endif
343#if LJ_HASFFI
344 case IR_MUL: k1 *= k2; break; 346 case IR_MUL: k1 *= k2; break;
345 case IR_BAND: k1 &= k2; break; 347 case IR_BAND: k1 &= k2; break;
346 case IR_BOR: k1 |= k2; break; 348 case IR_BOR: k1 |= k2; break;
@@ -392,20 +394,10 @@ LJFOLD(BROL KINT64 KINT)
392LJFOLD(BROR KINT64 KINT) 394LJFOLD(BROR KINT64 KINT)
393LJFOLDF(kfold_int64shift) 395LJFOLDF(kfold_int64shift)
394{ 396{
395#if LJ_HASFFI || LJ_64 397#if LJ_HASFFI
396 uint64_t k = ir_k64(fleft)->u64; 398 uint64_t k = ir_k64(fleft)->u64;
397 int32_t sh = (fright->i & 63); 399 int32_t sh = (fright->i & 63);
398 switch ((IROp)fins->o) { 400 return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL));
399 case IR_BSHL: k <<= sh; break;
400#if LJ_HASFFI
401 case IR_BSHR: k >>= sh; break;
402 case IR_BSAR: k = (uint64_t)((int64_t)k >> sh); break;
403 case IR_BROL: k = lj_rol(k, sh); break;
404 case IR_BROR: k = lj_ror(k, sh); break;
405#endif
406 default: lua_assert(0); break;
407 }
408 return INT64FOLD(k);
409#else 401#else
410 UNUSED(J); lua_assert(0); return FAILFOLD; 402 UNUSED(J); lua_assert(0); return FAILFOLD;
411#endif 403#endif
@@ -528,6 +520,180 @@ LJFOLDF(kfold_strcmp)
528 return NEXTFOLD; 520 return NEXTFOLD;
529} 521}
530 522
523/* -- Constant folding and forwarding for buffers ------------------------- */
524
525/*
526** Buffer ops perform stores, but their effect is limited to the buffer
527** itself. Also, buffer ops are chained: a use of an op implies a use of
528** all other ops up the chain. Conversely, if an op is unused, all ops
529** up the chain can go unsed. This largely eliminates the need to treat
530** them as stores.
531**
532** Alas, treating them as normal (IRM_N) ops doesn't work, because they
533** cannot be CSEd in isolation. CSE for IRM_N is implicitly done in LOOP
534** or if FOLD is disabled.
535**
536** The compromise is to declare them as loads, emit them like stores and
537** CSE whole chains manually when the BUFSTR is to be emitted. Any chain
538** fragments left over from CSE are eliminated by DCE.
539*/
540
541/* BUFHDR is emitted like a store, see below. */
542
543LJFOLD(BUFPUT BUFHDR BUFSTR)
544LJFOLDF(bufput_append)
545{
546 /* New buffer, no other buffer op inbetween and same buffer? */
547 if ((J->flags & JIT_F_OPT_FWD) &&
548 !(fleft->op2 & IRBUFHDR_APPEND) &&
549 fleft->prev == fright->op2 &&
550 fleft->op1 == IR(fright->op2)->op1) {
551 IRRef ref = fins->op1;
552 IR(ref)->op2 = (fleft->op2 | IRBUFHDR_APPEND); /* Modify BUFHDR. */
553 IR(ref)->op1 = fright->op1;
554 return ref;
555 }
556 return EMITFOLD; /* Always emit, CSE later. */
557}
558
559LJFOLD(BUFPUT any any)
560LJFOLDF(bufput_kgc)
561{
562 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fright->o == IR_KGC) {
563 GCstr *s2 = ir_kstr(fright);
564 if (s2->len == 0) { /* Empty string? */
565 return LEFTFOLD;
566 } else {
567 if (fleft->o == IR_BUFPUT && irref_isk(fleft->op2) &&
568 !irt_isphi(fleft->t)) { /* Join two constant string puts in a row. */
569 GCstr *s1 = ir_kstr(IR(fleft->op2));
570 IRRef kref = lj_ir_kstr(J, lj_buf_cat2str(J->L, s1, s2));
571 /* lj_ir_kstr() may realloc the IR and invalidates any IRIns *. */
572 IR(fins->op1)->op2 = kref; /* Modify previous BUFPUT. */
573 return fins->op1;
574 }
575 }
576 }
577 return EMITFOLD; /* Always emit, CSE later. */
578}
579
580LJFOLD(BUFSTR any any)
581LJFOLDF(bufstr_kfold_cse)
582{
583 lua_assert(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT ||
584 fleft->o == IR_CALLL);
585 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
586 if (fleft->o == IR_BUFHDR) { /* No put operations? */
587 if (!(fleft->op2 & IRBUFHDR_APPEND)) /* Empty buffer? */
588 return lj_ir_kstr(J, &J2G(J)->strempty);
589 fins->op1 = fleft->op1;
590 fins->op2 = fleft->prev; /* Relies on checks in bufput_append. */
591 return CSEFOLD;
592 } else if (fleft->o == IR_BUFPUT) {
593 IRIns *irb = IR(fleft->op1);
594 if (irb->o == IR_BUFHDR && !(irb->op2 & IRBUFHDR_APPEND))
595 return fleft->op2; /* Shortcut for a single put operation. */
596 }
597 }
598 /* Try to CSE the whole chain. */
599 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
600 IRRef ref = J->chain[IR_BUFSTR];
601 while (ref) {
602 IRIns *irs = IR(ref), *ira = fleft, *irb = IR(irs->op1);
603 while (ira->o == irb->o && ira->op2 == irb->op2) {
604 lua_assert(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT ||
605 ira->o == IR_CALLL || ira->o == IR_CARG);
606 if (ira->o == IR_BUFHDR && !(ira->op2 & IRBUFHDR_APPEND))
607 return ref; /* CSE succeeded. */
608 if (ira->o == IR_CALLL && ira->op2 == IRCALL_lj_buf_puttab)
609 break;
610 ira = IR(ira->op1);
611 irb = IR(irb->op1);
612 }
613 ref = irs->prev;
614 }
615 }
616 return EMITFOLD; /* No CSE possible. */
617}
618
619LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_reverse)
620LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_upper)
621LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_lower)
622LJFOLD(CALLL CARG IRCALL_lj_strfmt_putquoted)
623LJFOLDF(bufput_kfold_op)
624{
625 if (irref_isk(fleft->op2)) {
626 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
627 SBuf *sb = lj_buf_tmp_(J->L);
628 sb = ((SBuf * (LJ_FASTCALL *)(SBuf *, GCstr *))ci->func)(sb,
629 ir_kstr(IR(fleft->op2)));
630 fins->o = IR_BUFPUT;
631 fins->op1 = fleft->op1;
632 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
633 return RETRYFOLD;
634 }
635 return EMITFOLD; /* Always emit, CSE later. */
636}
637
638LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_rep)
639LJFOLDF(bufput_kfold_rep)
640{
641 if (irref_isk(fleft->op2)) {
642 IRIns *irc = IR(fleft->op1);
643 if (irref_isk(irc->op2)) {
644 SBuf *sb = lj_buf_tmp_(J->L);
645 sb = lj_buf_putstr_rep(sb, ir_kstr(IR(irc->op2)), IR(fleft->op2)->i);
646 fins->o = IR_BUFPUT;
647 fins->op1 = irc->op1;
648 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
649 return RETRYFOLD;
650 }
651 }
652 return EMITFOLD; /* Always emit, CSE later. */
653}
654
655LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfxint)
656LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_int)
657LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_uint)
658LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum)
659LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfstr)
660LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfchar)
661LJFOLDF(bufput_kfold_fmt)
662{
663 IRIns *irc = IR(fleft->op1);
664 lua_assert(irref_isk(irc->op2)); /* SFormat must be const. */
665 if (irref_isk(fleft->op2)) {
666 SFormat sf = (SFormat)IR(irc->op2)->i;
667 IRIns *ira = IR(fleft->op2);
668 SBuf *sb = lj_buf_tmp_(J->L);
669 switch (fins->op2) {
670 case IRCALL_lj_strfmt_putfxint:
671 sb = lj_strfmt_putfxint(sb, sf, ir_k64(ira)->u64);
672 break;
673 case IRCALL_lj_strfmt_putfstr:
674 sb = lj_strfmt_putfstr(sb, sf, ir_kstr(ira));
675 break;
676 case IRCALL_lj_strfmt_putfchar:
677 sb = lj_strfmt_putfchar(sb, sf, ira->i);
678 break;
679 case IRCALL_lj_strfmt_putfnum_int:
680 case IRCALL_lj_strfmt_putfnum_uint:
681 case IRCALL_lj_strfmt_putfnum:
682 default: {
683 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
684 sb = ((SBuf * (*)(SBuf *, SFormat, lua_Number))ci->func)(sb, sf,
685 ir_knum(ira)->n);
686 break;
687 }
688 }
689 fins->o = IR_BUFPUT;
690 fins->op1 = irc->op1;
691 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
692 return RETRYFOLD;
693 }
694 return EMITFOLD; /* Always emit, CSE later. */
695}
696
531/* -- Constant folding of pointer arithmetic ------------------------------ */ 697/* -- Constant folding of pointer arithmetic ------------------------------ */
532 698
533LJFOLD(ADD KGC KINT) 699LJFOLD(ADD KGC KINT)
@@ -648,27 +814,22 @@ LJFOLD(CONV KNUM IRCONV_INT_NUM)
648LJFOLDF(kfold_conv_knum_int_num) 814LJFOLDF(kfold_conv_knum_int_num)
649{ 815{
650 lua_Number n = knumleft; 816 lua_Number n = knumleft;
651 if (!(fins->op2 & IRCONV_TRUNC)) { 817 int32_t k = lj_num2int(n);
652 int32_t k = lj_num2int(n); 818 if (irt_isguard(fins->t) && n != (lua_Number)k) {
653 if (irt_isguard(fins->t) && n != (lua_Number)k) { 819 /* We're about to create a guard which always fails, like CONV +1.5.
654 /* We're about to create a guard which always fails, like CONV +1.5. 820 ** Some pathological loops cause this during LICM, e.g.:
655 ** Some pathological loops cause this during LICM, e.g.: 821 ** local x,k,t = 0,1.5,{1,[1.5]=2}
656 ** local x,k,t = 0,1.5,{1,[1.5]=2} 822 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
657 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end 823 ** assert(x == 300)
658 ** assert(x == 300) 824 */
659 */ 825 return FAILFOLD;
660 return FAILFOLD;
661 }
662 return INTFOLD(k);
663 } else {
664 return INTFOLD((int32_t)n);
665 } 826 }
827 return INTFOLD(k);
666} 828}
667 829
668LJFOLD(CONV KNUM IRCONV_U32_NUM) 830LJFOLD(CONV KNUM IRCONV_U32_NUM)
669LJFOLDF(kfold_conv_knum_u32_num) 831LJFOLDF(kfold_conv_knum_u32_num)
670{ 832{
671 lua_assert((fins->op2 & IRCONV_TRUNC));
672#ifdef _MSC_VER 833#ifdef _MSC_VER
673 { /* Workaround for MSVC bug. */ 834 { /* Workaround for MSVC bug. */
674 volatile uint32_t u = (uint32_t)knumleft; 835 volatile uint32_t u = (uint32_t)knumleft;
@@ -682,27 +843,27 @@ LJFOLDF(kfold_conv_knum_u32_num)
682LJFOLD(CONV KNUM IRCONV_I64_NUM) 843LJFOLD(CONV KNUM IRCONV_I64_NUM)
683LJFOLDF(kfold_conv_knum_i64_num) 844LJFOLDF(kfold_conv_knum_i64_num)
684{ 845{
685 lua_assert((fins->op2 & IRCONV_TRUNC));
686 return INT64FOLD((uint64_t)(int64_t)knumleft); 846 return INT64FOLD((uint64_t)(int64_t)knumleft);
687} 847}
688 848
689LJFOLD(CONV KNUM IRCONV_U64_NUM) 849LJFOLD(CONV KNUM IRCONV_U64_NUM)
690LJFOLDF(kfold_conv_knum_u64_num) 850LJFOLDF(kfold_conv_knum_u64_num)
691{ 851{
692 lua_assert((fins->op2 & IRCONV_TRUNC));
693 return INT64FOLD(lj_num2u64(knumleft)); 852 return INT64FOLD(lj_num2u64(knumleft));
694} 853}
695 854
696LJFOLD(TOSTR KNUM) 855LJFOLD(TOSTR KNUM any)
697LJFOLDF(kfold_tostr_knum) 856LJFOLDF(kfold_tostr_knum)
698{ 857{
699 return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft)); 858 return lj_ir_kstr(J, lj_strfmt_num(J->L, ir_knum(fleft)));
700} 859}
701 860
702LJFOLD(TOSTR KINT) 861LJFOLD(TOSTR KINT any)
703LJFOLDF(kfold_tostr_kint) 862LJFOLDF(kfold_tostr_kint)
704{ 863{
705 return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i)); 864 return lj_ir_kstr(J, fins->op2 == IRTOSTR_INT ?
865 lj_strfmt_int(J->L, fleft->i) :
866 lj_strfmt_char(J->L, fleft->i));
706} 867}
707 868
708LJFOLD(STRTO KGC) 869LJFOLD(STRTO KGC)
@@ -1205,7 +1366,9 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k)
1205 ** But this is mainly intended for simple address arithmetic. 1366 ** But this is mainly intended for simple address arithmetic.
1206 ** Also it's easier for the backend to optimize the original multiplies. 1367 ** Also it's easier for the backend to optimize the original multiplies.
1207 */ 1368 */
1208 if (k == 1) { /* i * 1 ==> i */ 1369 if (k == 0) { /* i * 0 ==> 0 */
1370 return RIGHTFOLD;
1371 } else if (k == 1) { /* i * 1 ==> i */
1209 return LEFTFOLD; 1372 return LEFTFOLD;
1210 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */ 1373 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */
1211 fins->o = IR_BSHL; 1374 fins->o = IR_BSHL;
@@ -1218,9 +1381,7 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k)
1218LJFOLD(MUL any KINT) 1381LJFOLD(MUL any KINT)
1219LJFOLDF(simplify_intmul_k32) 1382LJFOLDF(simplify_intmul_k32)
1220{ 1383{
1221 if (fright->i == 0) /* i * 0 ==> 0 */ 1384 if (fright->i >= 0)
1222 return INTFOLD(0);
1223 else if (fright->i > 0)
1224 return simplify_intmul_k(J, fright->i); 1385 return simplify_intmul_k(J, fright->i);
1225 return NEXTFOLD; 1386 return NEXTFOLD;
1226} 1387}
@@ -1228,14 +1389,13 @@ LJFOLDF(simplify_intmul_k32)
1228LJFOLD(MUL any KINT64) 1389LJFOLD(MUL any KINT64)
1229LJFOLDF(simplify_intmul_k64) 1390LJFOLDF(simplify_intmul_k64)
1230{ 1391{
1231 if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */ 1392#if LJ_HASFFI
1232 return INT64FOLD(0); 1393 if (ir_kint64(fright)->u64 < 0x80000000u)
1233#if LJ_64
1234 /* NYI: SPLIT for BSHL and 32 bit backend support. */
1235 else if (ir_kint64(fright)->u64 < 0x80000000u)
1236 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64); 1394 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
1237#endif
1238 return NEXTFOLD; 1395 return NEXTFOLD;
1396#else
1397 UNUSED(J); lua_assert(0); return FAILFOLD;
1398#endif
1239} 1399}
1240 1400
1241LJFOLD(MOD any KINT) 1401LJFOLD(MOD any KINT)
@@ -1535,7 +1695,7 @@ LJFOLD(BOR BOR KINT64)
1535LJFOLD(BXOR BXOR KINT64) 1695LJFOLD(BXOR BXOR KINT64)
1536LJFOLDF(reassoc_intarith_k64) 1696LJFOLDF(reassoc_intarith_k64)
1537{ 1697{
1538#if LJ_HASFFI || LJ_64 1698#if LJ_HASFFI
1539 IRIns *irk = IR(fleft->op2); 1699 IRIns *irk = IR(fleft->op2);
1540 if (irk->o == IR_KINT64) { 1700 if (irk->o == IR_KINT64) {
1541 uint64_t k = kfold_int64arith(ir_k64(irk)->u64, 1701 uint64_t k = kfold_int64arith(ir_k64(irk)->u64,
@@ -1953,6 +2113,7 @@ LJFOLDF(fwd_href_tdup)
1953** an aliased table, as it may invalidate all of the pointers and fields. 2113** an aliased table, as it may invalidate all of the pointers and fields.
1954** Only HREF needs the NEWREF check -- AREF and HREFK already depend on 2114** Only HREF needs the NEWREF check -- AREF and HREFK already depend on
1955** FLOADs. And NEWREF itself is treated like a store (see below). 2115** FLOADs. And NEWREF itself is treated like a store (see below).
2116** LREF is constant (per trace) since coroutine switches are not inlined.
1956*/ 2117*/
1957LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE) 2118LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE)
1958LJFOLDF(fload_tab_tnew_asize) 2119LJFOLDF(fload_tab_tnew_asize)
@@ -2016,6 +2177,14 @@ LJFOLDF(fload_str_len_snew)
2016 return NEXTFOLD; 2177 return NEXTFOLD;
2017} 2178}
2018 2179
2180LJFOLD(FLOAD TOSTR IRFL_STR_LEN)
2181LJFOLDF(fload_str_len_tostr)
2182{
2183 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fleft->op2 == IRTOSTR_CHAR)
2184 return INTFOLD(1);
2185 return NEXTFOLD;
2186}
2187
2019/* The C type ID of cdata objects is immutable. */ 2188/* The C type ID of cdata objects is immutable. */
2020LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID) 2189LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID)
2021LJFOLDF(fload_cdata_typeid_kgc) 2190LJFOLDF(fload_cdata_typeid_kgc)
@@ -2062,6 +2231,8 @@ LJFOLDF(fload_cdata_ptr_int64_cnew)
2062} 2231}
2063 2232
2064LJFOLD(FLOAD any IRFL_STR_LEN) 2233LJFOLD(FLOAD any IRFL_STR_LEN)
2234LJFOLD(FLOAD any IRFL_FUNC_ENV)
2235LJFOLD(FLOAD any IRFL_THREAD_ENV)
2065LJFOLD(FLOAD any IRFL_CDATA_CTYPEID) 2236LJFOLD(FLOAD any IRFL_CDATA_CTYPEID)
2066LJFOLD(FLOAD any IRFL_CDATA_PTR) 2237LJFOLD(FLOAD any IRFL_CDATA_PTR)
2067LJFOLD(FLOAD any IRFL_CDATA_INT) 2238LJFOLD(FLOAD any IRFL_CDATA_INT)
@@ -2127,6 +2298,17 @@ LJFOLDF(barrier_tnew_tdup)
2127 return DROPFOLD; 2298 return DROPFOLD;
2128} 2299}
2129 2300
2301/* -- Profiling ----------------------------------------------------------- */
2302
2303LJFOLD(PROF any any)
2304LJFOLDF(prof)
2305{
2306 IRRef ref = J->chain[IR_PROF];
2307 if (ref+1 == J->cur.nins) /* Drop neighbouring IR_PROF. */
2308 return ref;
2309 return EMITFOLD;
2310}
2311
2130/* -- Stores and allocations ---------------------------------------------- */ 2312/* -- Stores and allocations ---------------------------------------------- */
2131 2313
2132/* Stores and allocations cannot be folded or passed on to CSE in general. 2314/* Stores and allocations cannot be folded or passed on to CSE in general.
@@ -2149,8 +2331,9 @@ LJFOLD(XSTORE any any)
2149LJFOLDX(lj_opt_dse_xstore) 2331LJFOLDX(lj_opt_dse_xstore)
2150 2332
2151LJFOLD(NEWREF any any) /* Treated like a store. */ 2333LJFOLD(NEWREF any any) /* Treated like a store. */
2152LJFOLD(CALLS any any) 2334LJFOLD(CALLA any any)
2153LJFOLD(CALLL any any) /* Safeguard fallback. */ 2335LJFOLD(CALLL any any) /* Safeguard fallback. */
2336LJFOLD(CALLS any any)
2154LJFOLD(CALLXS any any) 2337LJFOLD(CALLXS any any)
2155LJFOLD(XBAR) 2338LJFOLD(XBAR)
2156LJFOLD(RETF any any) /* Modifies BASE. */ 2339LJFOLD(RETF any any) /* Modifies BASE. */
@@ -2158,6 +2341,7 @@ LJFOLD(TNEW any any)
2158LJFOLD(TDUP any) 2341LJFOLD(TDUP any)
2159LJFOLD(CNEW any any) 2342LJFOLD(CNEW any any)
2160LJFOLD(XSNEW any any) 2343LJFOLD(XSNEW any any)
2344LJFOLD(BUFHDR any any)
2161LJFOLDX(lj_ir_emit) 2345LJFOLDX(lj_ir_emit)
2162 2346
2163/* ------------------------------------------------------------------------ */ 2347/* ------------------------------------------------------------------------ */
diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c
index b7d1923e..4b4ab7dc 100644
--- a/src/lj_opt_loop.c
+++ b/src/lj_opt_loop.c
@@ -11,7 +11,7 @@
11#if LJ_HASJIT 11#if LJ_HASJIT
12 12
13#include "lj_err.h" 13#include "lj_err.h"
14#include "lj_str.h" 14#include "lj_buf.h"
15#include "lj_ir.h" 15#include "lj_ir.h"
16#include "lj_jit.h" 16#include "lj_jit.h"
17#include "lj_iropt.h" 17#include "lj_iropt.h"
@@ -254,9 +254,16 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap,
254 J->cur.nsnapmap = (uint16_t)(nmap - J->cur.snapmap); 254 J->cur.nsnapmap = (uint16_t)(nmap - J->cur.snapmap);
255} 255}
256 256
257typedef struct LoopState {
258 jit_State *J;
259 IRRef1 *subst;
260 MSize sizesubst;
261} LoopState;
262
257/* Unroll loop. */ 263/* Unroll loop. */
258static void loop_unroll(jit_State *J) 264static void loop_unroll(LoopState *lps)
259{ 265{
266 jit_State *J = lps->J;
260 IRRef1 phi[LJ_MAX_PHI]; 267 IRRef1 phi[LJ_MAX_PHI];
261 uint32_t nphi = 0; 268 uint32_t nphi = 0;
262 IRRef1 *subst; 269 IRRef1 *subst;
@@ -265,13 +272,13 @@ static void loop_unroll(jit_State *J)
265 SnapEntry *loopmap, *psentinel; 272 SnapEntry *loopmap, *psentinel;
266 IRRef ins, invar; 273 IRRef ins, invar;
267 274
268 /* Use temp buffer for substitution table. 275 /* Allocate substitution table.
269 ** Only non-constant refs in [REF_BIAS,invar) are valid indexes. 276 ** Only non-constant refs in [REF_BIAS,invar) are valid indexes.
270 ** Caveat: don't call into the VM or run the GC or the buffer may be gone.
271 */ 277 */
272 invar = J->cur.nins; 278 invar = J->cur.nins;
273 subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, 279 lps->sizesubst = invar - REF_BIAS;
274 (invar-REF_BIAS)*sizeof(IRRef1)) - REF_BIAS; 280 lps->subst = lj_mem_newvec(J->L, lps->sizesubst, IRRef1);
281 subst = lps->subst - REF_BIAS;
275 subst[REF_BASE] = REF_BASE; 282 subst[REF_BASE] = REF_BASE;
276 283
277 /* LOOP separates the pre-roll from the loop body. */ 284 /* LOOP separates the pre-roll from the loop body. */
@@ -396,7 +403,7 @@ static void loop_undo(jit_State *J, IRRef ins, SnapNo nsnap, MSize nsnapmap)
396static TValue *cploop_opt(lua_State *L, lua_CFunction dummy, void *ud) 403static TValue *cploop_opt(lua_State *L, lua_CFunction dummy, void *ud)
397{ 404{
398 UNUSED(L); UNUSED(dummy); 405 UNUSED(L); UNUSED(dummy);
399 loop_unroll((jit_State *)ud); 406 loop_unroll((LoopState *)ud);
400 return NULL; 407 return NULL;
401} 408}
402 409
@@ -406,7 +413,13 @@ int lj_opt_loop(jit_State *J)
406 IRRef nins = J->cur.nins; 413 IRRef nins = J->cur.nins;
407 SnapNo nsnap = J->cur.nsnap; 414 SnapNo nsnap = J->cur.nsnap;
408 MSize nsnapmap = J->cur.nsnapmap; 415 MSize nsnapmap = J->cur.nsnapmap;
409 int errcode = lj_vm_cpcall(J->L, NULL, J, cploop_opt); 416 LoopState lps;
417 int errcode;
418 lps.J = J;
419 lps.subst = NULL;
420 lps.sizesubst = 0;
421 errcode = lj_vm_cpcall(J->L, NULL, &lps, cploop_opt);
422 lj_mem_freevec(J2G(J), lps.subst, lps.sizesubst, IRRef1);
410 if (LJ_UNLIKELY(errcode)) { 423 if (LJ_UNLIKELY(errcode)) {
411 lua_State *L = J->L; 424 lua_State *L = J->L;
412 if (errcode == LUA_ERRRUN && tvisnumber(L->top-1)) { /* Trace error? */ 425 if (errcode == LUA_ERRRUN && tvisnumber(L->top-1)) { /* Trace error? */
diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c
index a4d96fc0..e04a6228 100644
--- a/src/lj_opt_mem.c
+++ b/src/lj_opt_mem.c
@@ -17,6 +17,7 @@
17#include "lj_ir.h" 17#include "lj_ir.h"
18#include "lj_jit.h" 18#include "lj_jit.h"
19#include "lj_iropt.h" 19#include "lj_iropt.h"
20#include "lj_ircall.h"
20 21
21/* Some local macros to save typing. Undef'd at the end. */ 22/* Some local macros to save typing. Undef'd at the end. */
22#define IR(ref) (&J->cur.ir[(ref)]) 23#define IR(ref) (&J->cur.ir[(ref)])
@@ -308,7 +309,21 @@ int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J)
308 return 1; /* No conflict. Can fold to niltv. */ 309 return 1; /* No conflict. Can fold to niltv. */
309} 310}
310 311
311/* Check whether there's no aliasing NEWREF for the left operand. */ 312/* Check whether there's no aliasing table.clear. */
313static int fwd_aa_tab_clear(jit_State *J, IRRef lim, IRRef ta)
314{
315 IRRef ref = J->chain[IR_CALLS];
316 while (ref > lim) {
317 IRIns *calls = IR(ref);
318 if (calls->op2 == IRCALL_lj_tab_clear &&
319 (ta == calls->op1 || aa_table(J, ta, calls->op1) != ALIAS_NO))
320 return 0; /* Conflict. */
321 ref = calls->prev;
322 }
323 return 1; /* No conflict. Can safely FOLD/CSE. */
324}
325
326/* Check whether there's no aliasing NEWREF/table.clear for the left operand. */
312int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim) 327int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim)
313{ 328{
314 IRRef ta = fins->op1; 329 IRRef ta = fins->op1;
@@ -319,7 +334,7 @@ int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim)
319 return 0; /* Conflict. */ 334 return 0; /* Conflict. */
320 ref = newref->prev; 335 ref = newref->prev;
321 } 336 }
322 return 1; /* No conflict. Can safely FOLD/CSE. */ 337 return fwd_aa_tab_clear(J, lim, ta);
323} 338}
324 339
325/* ASTORE/HSTORE elimination. */ 340/* ASTORE/HSTORE elimination. */
@@ -854,6 +869,10 @@ TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J)
854 ref = store->prev; 869 ref = store->prev;
855 } 870 }
856 871
872 /* Search for aliasing table.clear. */
873 if (!fwd_aa_tab_clear(J, lim, tab))
874 return lj_ir_emit(J);
875
857 /* Try to find a matching load. Below the conflicting store, if any. */ 876 /* Try to find a matching load. Below the conflicting store, if any. */
858 return lj_opt_cselim(J, lim); 877 return lj_opt_cselim(J, lim);
859} 878}
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c
index b7bd3232..d221c30d 100644
--- a/src/lj_opt_narrow.c
+++ b/src/lj_opt_narrow.c
@@ -496,8 +496,7 @@ TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr)
496{ 496{
497 lua_assert(tref_isnumber(tr)); 497 lua_assert(tref_isnumber(tr));
498 if (tref_isnum(tr)) 498 if (tref_isnum(tr))
499 return emitir(IRT(IR_CONV, IRT_INTP), tr, 499 return emitir(IRT(IR_CONV, IRT_INTP), tr, (IRT_INTP<<5)|IRT_NUM|IRCONV_ANY);
500 (IRT_INTP<<5)|IRT_NUM|IRCONV_TRUNC|IRCONV_ANY);
501 /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */ 500 /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */
502 return narrow_stripov(J, tr, IR_MULOV, 501 return narrow_stripov(J, tr, IR_MULOV,
503 LJ_64 ? ((IRT_INTP<<5)|IRT_INT|IRCONV_SEXT) : 502 LJ_64 ? ((IRT_INTP<<5)|IRT_INT|IRCONV_SEXT) :
diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c
index 1cee5093..81ded6c0 100644
--- a/src/lj_opt_split.c
+++ b/src/lj_opt_split.c
@@ -11,7 +11,7 @@
11#if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) 11#if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI))
12 12
13#include "lj_err.h" 13#include "lj_err.h"
14#include "lj_str.h" 14#include "lj_buf.h"
15#include "lj_ir.h" 15#include "lj_ir.h"
16#include "lj_jit.h" 16#include "lj_jit.h"
17#include "lj_ircall.h" 17#include "lj_ircall.h"
@@ -139,6 +139,7 @@ static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir,
139 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); 139 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
140 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); 140 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
141} 141}
142#endif
142 143
143/* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */ 144/* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */
144static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir, 145static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
@@ -155,7 +156,6 @@ static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
155 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); 156 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
156 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); 157 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
157} 158}
158#endif
159 159
160/* Emit a CALLN with two split 64 bit arguments. */ 160/* Emit a CALLN with two split 64 bit arguments. */
161static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir, 161static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir,
@@ -195,6 +195,118 @@ static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref)
195 return split_emit(J, IRTI(IR_ADD), nref, lj_ir_kint(J, ofs)); 195 return split_emit(J, IRTI(IR_ADD), nref, lj_ir_kint(J, ofs));
196} 196}
197 197
198#if LJ_HASFFI
199static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst,
200 IRIns *oir, IRIns *nir, IRIns *ir)
201{
202 IROp op = ir->o;
203 IRRef kref = nir->op2;
204 if (irref_isk(kref)) { /* Optimize constant shifts. */
205 int32_t k = (IR(kref)->i & 63);
206 IRRef lo = nir->op1, hi = hisubst[ir->op1];
207 if (op == IR_BROL || op == IR_BROR) {
208 if (op == IR_BROR) k = (-k & 63);
209 if (k >= 32) { IRRef t = lo; lo = hi; hi = t; k -= 32; }
210 if (k == 0) {
211 passthrough:
212 J->cur.nins--;
213 ir->prev = lo;
214 return hi;
215 } else {
216 TRef k1, k2;
217 IRRef t1, t2, t3, t4;
218 J->cur.nins--;
219 k1 = lj_ir_kint(J, k);
220 k2 = lj_ir_kint(J, (-k & 31));
221 t1 = split_emit(J, IRTI(IR_BSHL), lo, k1);
222 t2 = split_emit(J, IRTI(IR_BSHL), hi, k1);
223 t3 = split_emit(J, IRTI(IR_BSHR), lo, k2);
224 t4 = split_emit(J, IRTI(IR_BSHR), hi, k2);
225 ir->prev = split_emit(J, IRTI(IR_BOR), t1, t4);
226 return split_emit(J, IRTI(IR_BOR), t2, t3);
227 }
228 } else if (k == 0) {
229 goto passthrough;
230 } else if (k < 32) {
231 if (op == IR_BSHL) {
232 IRRef t1 = split_emit(J, IRTI(IR_BSHL), hi, kref);
233 IRRef t2 = split_emit(J, IRTI(IR_BSHR), lo, lj_ir_kint(J, (-k&31)));
234 return split_emit(J, IRTI(IR_BOR), t1, t2);
235 } else {
236 IRRef t1 = ir->prev, t2;
237 lua_assert(op == IR_BSHR || op == IR_BSAR);
238 nir->o = IR_BSHR;
239 t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31)));
240 ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2);
241 return split_emit(J, IRTI(op), hi, kref);
242 }
243 } else {
244 if (op == IR_BSHL) {
245 if (k == 32)
246 J->cur.nins--;
247 else
248 lo = ir->prev;
249 ir->prev = lj_ir_kint(J, 0);
250 return lo;
251 } else {
252 lua_assert(op == IR_BSHR || op == IR_BSAR);
253 if (k == 32) {
254 J->cur.nins--;
255 ir->prev = hi;
256 } else {
257 nir->op1 = hi;
258 }
259 if (op == IR_BSHR)
260 return lj_ir_kint(J, 0);
261 else
262 return split_emit(J, IRTI(IR_BSAR), hi, lj_ir_kint(J, 31));
263 }
264 }
265 }
266 return split_call_li(J, hisubst, oir, ir,
267 op - IR_BSHL + IRCALL_lj_carith_shl64);
268}
269
270static IRRef split_bitop(jit_State *J, IRRef1 *hisubst,
271 IRIns *nir, IRIns *ir)
272{
273 IROp op = ir->o;
274 IRRef hi, kref = nir->op2;
275 if (irref_isk(kref)) { /* Optimize bit operations with lo constant. */
276 int32_t k = IR(kref)->i;
277 if (k == 0 || k == -1) {
278 if (op == IR_BAND) k = ~k;
279 if (k == 0) {
280 J->cur.nins--;
281 ir->prev = nir->op1;
282 } else if (op == IR_BXOR) {
283 nir->o = IR_BNOT;
284 nir->op2 = 0;
285 } else {
286 J->cur.nins--;
287 ir->prev = kref;
288 }
289 }
290 }
291 hi = hisubst[ir->op1];
292 kref = hisubst[ir->op2];
293 if (irref_isk(kref)) { /* Optimize bit operations with hi constant. */
294 int32_t k = IR(kref)->i;
295 if (k == 0 || k == -1) {
296 if (op == IR_BAND) k = ~k;
297 if (k == 0) {
298 return hi;
299 } else if (op == IR_BXOR) {
300 return split_emit(J, IRTI(IR_BNOT), hi, 0);
301 } else {
302 return kref;
303 }
304 }
305 }
306 return split_emit(J, IRTI(op), hi, kref);
307}
308#endif
309
198/* Substitute references of a snapshot. */ 310/* Substitute references of a snapshot. */
199static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir) 311static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir)
200{ 312{
@@ -214,7 +326,7 @@ static void split_ir(jit_State *J)
214 IRRef nins = J->cur.nins, nk = J->cur.nk; 326 IRRef nins = J->cur.nins, nk = J->cur.nk;
215 MSize irlen = nins - nk; 327 MSize irlen = nins - nk;
216 MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1)); 328 MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1));
217 IRIns *oir = (IRIns *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, need); 329 IRIns *oir = (IRIns *)lj_buf_tmp(J->L, need);
218 IRRef1 *hisubst; 330 IRRef1 *hisubst;
219 IRRef ref, snref; 331 IRRef ref, snref;
220 SnapShot *snap; 332 SnapShot *snap;
@@ -438,6 +550,19 @@ static void split_ir(jit_State *J)
438 irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : 550 irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
439 IRCALL_lj_carith_powu64); 551 IRCALL_lj_carith_powu64);
440 break; 552 break;
553 case IR_BNOT:
554 hi = split_emit(J, IRTI(IR_BNOT), hiref, 0);
555 break;
556 case IR_BSWAP:
557 ir->prev = split_emit(J, IRTI(IR_BSWAP), hiref, 0);
558 hi = nref;
559 break;
560 case IR_BAND: case IR_BOR: case IR_BXOR:
561 hi = split_bitop(J, hisubst, nir, ir);
562 break;
563 case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
564 hi = split_bitshift(J, hisubst, oir, nir, ir);
565 break;
441 case IR_FLOAD: 566 case IR_FLOAD:
442 lua_assert(ir->op2 == IRFL_CDATA_INT64); 567 lua_assert(ir->op2 == IRFL_CDATA_INT64);
443 hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4); 568 hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4);
diff --git a/src/lj_parse.c b/src/lj_parse.c
index abfac3c0..9891897e 100644
--- a/src/lj_parse.c
+++ b/src/lj_parse.c
@@ -13,6 +13,7 @@
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_debug.h" 15#include "lj_debug.h"
16#include "lj_buf.h"
16#include "lj_str.h" 17#include "lj_str.h"
17#include "lj_tab.h" 18#include "lj_tab.h"
18#include "lj_func.h" 19#include "lj_func.h"
@@ -21,6 +22,7 @@
21#if LJ_HASFFI 22#if LJ_HASFFI
22#include "lj_ctype.h" 23#include "lj_ctype.h"
23#endif 24#endif
25#include "lj_strfmt.h"
24#include "lj_lex.h" 26#include "lj_lex.h"
25#include "lj_parse.h" 27#include "lj_parse.h"
26#include "lj_vm.h" 28#include "lj_vm.h"
@@ -165,12 +167,12 @@ LJ_STATIC_ASSERT((int)BC_MODVV-(int)BC_ADDVV == (int)OPR_MOD-(int)OPR_ADD);
165 167
166LJ_NORET LJ_NOINLINE static void err_syntax(LexState *ls, ErrMsg em) 168LJ_NORET LJ_NOINLINE static void err_syntax(LexState *ls, ErrMsg em)
167{ 169{
168 lj_lex_error(ls, ls->token, em); 170 lj_lex_error(ls, ls->tok, em);
169} 171}
170 172
171LJ_NORET LJ_NOINLINE static void err_token(LexState *ls, LexToken token) 173LJ_NORET LJ_NOINLINE static void err_token(LexState *ls, LexToken tok)
172{ 174{
173 lj_lex_error(ls, ls->token, LJ_ERR_XTOKEN, lj_lex_token2str(ls, token)); 175 lj_lex_error(ls, ls->tok, LJ_ERR_XTOKEN, lj_lex_token2str(ls, tok));
174} 176}
175 177
176LJ_NORET static void err_limit(FuncState *fs, uint32_t limit, const char *what) 178LJ_NORET static void err_limit(FuncState *fs, uint32_t limit, const char *what)
@@ -660,16 +662,16 @@ static void bcemit_method(FuncState *fs, ExpDesc *e, ExpDesc *key)
660 BCReg idx, func, obj = expr_toanyreg(fs, e); 662 BCReg idx, func, obj = expr_toanyreg(fs, e);
661 expr_free(fs, e); 663 expr_free(fs, e);
662 func = fs->freereg; 664 func = fs->freereg;
663 bcemit_AD(fs, BC_MOV, func+1, obj); /* Copy object to first argument. */ 665 bcemit_AD(fs, BC_MOV, func+1+LJ_FR2, obj); /* Copy object to 1st argument. */
664 lua_assert(expr_isstrk(key)); 666 lua_assert(expr_isstrk(key));
665 idx = const_str(fs, key); 667 idx = const_str(fs, key);
666 if (idx <= BCMAX_C) { 668 if (idx <= BCMAX_C) {
667 bcreg_reserve(fs, 2); 669 bcreg_reserve(fs, 2+LJ_FR2);
668 bcemit_ABC(fs, BC_TGETS, func, obj, idx); 670 bcemit_ABC(fs, BC_TGETS, func, obj, idx);
669 } else { 671 } else {
670 bcreg_reserve(fs, 3); 672 bcreg_reserve(fs, 3+LJ_FR2);
671 bcemit_AD(fs, BC_KSTR, func+2, idx); 673 bcemit_AD(fs, BC_KSTR, func+2+LJ_FR2, idx);
672 bcemit_ABC(fs, BC_TGETV, func, obj, func+2); 674 bcemit_ABC(fs, BC_TGETV, func, obj, func+2+LJ_FR2);
673 fs->freereg--; 675 fs->freereg--;
674 } 676 }
675 e->u.s.info = func; 677 e->u.s.info = func;
@@ -983,7 +985,7 @@ static void bcemit_unop(FuncState *fs, BCOp op, ExpDesc *e)
983/* Check and consume optional token. */ 985/* Check and consume optional token. */
984static int lex_opt(LexState *ls, LexToken tok) 986static int lex_opt(LexState *ls, LexToken tok)
985{ 987{
986 if (ls->token == tok) { 988 if (ls->tok == tok) {
987 lj_lex_next(ls); 989 lj_lex_next(ls);
988 return 1; 990 return 1;
989 } 991 }
@@ -993,7 +995,7 @@ static int lex_opt(LexState *ls, LexToken tok)
993/* Check and consume token. */ 995/* Check and consume token. */
994static void lex_check(LexState *ls, LexToken tok) 996static void lex_check(LexState *ls, LexToken tok)
995{ 997{
996 if (ls->token != tok) 998 if (ls->tok != tok)
997 err_token(ls, tok); 999 err_token(ls, tok);
998 lj_lex_next(ls); 1000 lj_lex_next(ls);
999} 1001}
@@ -1007,7 +1009,7 @@ static void lex_match(LexState *ls, LexToken what, LexToken who, BCLine line)
1007 } else { 1009 } else {
1008 const char *swhat = lj_lex_token2str(ls, what); 1010 const char *swhat = lj_lex_token2str(ls, what);
1009 const char *swho = lj_lex_token2str(ls, who); 1011 const char *swho = lj_lex_token2str(ls, who);
1010 lj_lex_error(ls, ls->token, LJ_ERR_XMATCH, swhat, swho, line); 1012 lj_lex_error(ls, ls->tok, LJ_ERR_XMATCH, swhat, swho, line);
1011 } 1013 }
1012 } 1014 }
1013} 1015}
@@ -1016,9 +1018,9 @@ static void lex_match(LexState *ls, LexToken what, LexToken who, BCLine line)
1016static GCstr *lex_str(LexState *ls) 1018static GCstr *lex_str(LexState *ls)
1017{ 1019{
1018 GCstr *s; 1020 GCstr *s;
1019 if (ls->token != TK_name && (LJ_52 || ls->token != TK_goto)) 1021 if (ls->tok != TK_name && (LJ_52 || ls->tok != TK_goto))
1020 err_token(ls, TK_name); 1022 err_token(ls, TK_name);
1021 s = strV(&ls->tokenval); 1023 s = strV(&ls->tokval);
1022 lj_lex_next(ls); 1024 lj_lex_next(ls);
1023 return s; 1025 return s;
1024} 1026}
@@ -1431,78 +1433,46 @@ static void fs_fixup_line(FuncState *fs, GCproto *pt,
1431 } 1433 }
1432} 1434}
1433 1435
1434/* Resize buffer if needed. */
1435static LJ_NOINLINE void fs_buf_resize(LexState *ls, MSize len)
1436{
1437 MSize sz = ls->sb.sz * 2;
1438 while (ls->sb.n + len > sz) sz = sz * 2;
1439 lj_str_resizebuf(ls->L, &ls->sb, sz);
1440}
1441
1442static LJ_AINLINE void fs_buf_need(LexState *ls, MSize len)
1443{
1444 if (LJ_UNLIKELY(ls->sb.n + len > ls->sb.sz))
1445 fs_buf_resize(ls, len);
1446}
1447
1448/* Add string to buffer. */
1449static void fs_buf_str(LexState *ls, const char *str, MSize len)
1450{
1451 char *p = ls->sb.buf + ls->sb.n;
1452 MSize i;
1453 ls->sb.n += len;
1454 for (i = 0; i < len; i++) p[i] = str[i];
1455}
1456
1457/* Add ULEB128 value to buffer. */
1458static void fs_buf_uleb128(LexState *ls, uint32_t v)
1459{
1460 MSize n = ls->sb.n;
1461 uint8_t *p = (uint8_t *)ls->sb.buf;
1462 for (; v >= 0x80; v >>= 7)
1463 p[n++] = (uint8_t)((v & 0x7f) | 0x80);
1464 p[n++] = (uint8_t)v;
1465 ls->sb.n = n;
1466}
1467
1468/* Prepare variable info for prototype. */ 1436/* Prepare variable info for prototype. */
1469static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar) 1437static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar)
1470{ 1438{
1471 VarInfo *vs =ls->vstack, *ve; 1439 VarInfo *vs =ls->vstack, *ve;
1472 MSize i, n; 1440 MSize i, n;
1473 BCPos lastpc; 1441 BCPos lastpc;
1474 lj_str_resetbuf(&ls->sb); /* Copy to temp. string buffer. */ 1442 lj_buf_reset(&ls->sb); /* Copy to temp. string buffer. */
1475 /* Store upvalue names. */ 1443 /* Store upvalue names. */
1476 for (i = 0, n = fs->nuv; i < n; i++) { 1444 for (i = 0, n = fs->nuv; i < n; i++) {
1477 GCstr *s = strref(vs[fs->uvmap[i]].name); 1445 GCstr *s = strref(vs[fs->uvmap[i]].name);
1478 MSize len = s->len+1; 1446 MSize len = s->len+1;
1479 fs_buf_need(ls, len); 1447 char *p = lj_buf_more(&ls->sb, len);
1480 fs_buf_str(ls, strdata(s), len); 1448 p = lj_buf_wmem(p, strdata(s), len);
1449 setsbufP(&ls->sb, p);
1481 } 1450 }
1482 *ofsvar = ls->sb.n; 1451 *ofsvar = sbuflen(&ls->sb);
1483 lastpc = 0; 1452 lastpc = 0;
1484 /* Store local variable names and compressed ranges. */ 1453 /* Store local variable names and compressed ranges. */
1485 for (ve = vs + ls->vtop, vs += fs->vbase; vs < ve; vs++) { 1454 for (ve = vs + ls->vtop, vs += fs->vbase; vs < ve; vs++) {
1486 if (!gola_isgotolabel(vs)) { 1455 if (!gola_isgotolabel(vs)) {
1487 GCstr *s = strref(vs->name); 1456 GCstr *s = strref(vs->name);
1488 BCPos startpc; 1457 BCPos startpc;
1458 char *p;
1489 if ((uintptr_t)s < VARNAME__MAX) { 1459 if ((uintptr_t)s < VARNAME__MAX) {
1490 fs_buf_need(ls, 1 + 2*5); 1460 p = lj_buf_more(&ls->sb, 1 + 2*5);
1491 ls->sb.buf[ls->sb.n++] = (uint8_t)(uintptr_t)s; 1461 *p++ = (char)(uintptr_t)s;
1492 } else { 1462 } else {
1493 MSize len = s->len+1; 1463 MSize len = s->len+1;
1494 fs_buf_need(ls, len + 2*5); 1464 p = lj_buf_more(&ls->sb, len + 2*5);
1495 fs_buf_str(ls, strdata(s), len); 1465 p = lj_buf_wmem(p, strdata(s), len);
1496 } 1466 }
1497 startpc = vs->startpc; 1467 startpc = vs->startpc;
1498 fs_buf_uleb128(ls, startpc-lastpc); 1468 p = lj_strfmt_wuleb128(p, startpc-lastpc);
1499 fs_buf_uleb128(ls, vs->endpc-startpc); 1469 p = lj_strfmt_wuleb128(p, vs->endpc-startpc);
1470 setsbufP(&ls->sb, p);
1500 lastpc = startpc; 1471 lastpc = startpc;
1501 } 1472 }
1502 } 1473 }
1503 fs_buf_need(ls, 1); 1474 lj_buf_putb(&ls->sb, '\0'); /* Terminator for varinfo. */
1504 ls->sb.buf[ls->sb.n++] = '\0'; /* Terminator for varinfo. */ 1475 return sbuflen(&ls->sb);
1505 return ls->sb.n;
1506} 1476}
1507 1477
1508/* Fixup variable info for prototype. */ 1478/* Fixup variable info for prototype. */
@@ -1510,7 +1480,7 @@ static void fs_fixup_var(LexState *ls, GCproto *pt, uint8_t *p, size_t ofsvar)
1510{ 1480{
1511 setmref(pt->uvinfo, p); 1481 setmref(pt->uvinfo, p);
1512 setmref(pt->varinfo, (char *)p + ofsvar); 1482 setmref(pt->varinfo, (char *)p + ofsvar);
1513 memcpy(p, ls->sb.buf, ls->sb.n); /* Copy from temp. string buffer. */ 1483 memcpy(p, sbufB(&ls->sb), sbuflen(&ls->sb)); /* Copy from temp. buffer. */
1514} 1484}
1515#else 1485#else
1516 1486
@@ -1619,7 +1589,7 @@ static GCproto *fs_finish(LexState *ls, BCLine line)
1619 L->top--; /* Pop table of constants. */ 1589 L->top--; /* Pop table of constants. */
1620 ls->vtop = fs->vbase; /* Reset variable stack. */ 1590 ls->vtop = fs->vbase; /* Reset variable stack. */
1621 ls->fs = fs->prev; 1591 ls->fs = fs->prev;
1622 lua_assert(ls->fs != NULL || ls->token == TK_eof); 1592 lua_assert(ls->fs != NULL || ls->tok == TK_eof);
1623 return pt; 1593 return pt;
1624} 1594}
1625 1595
@@ -1716,10 +1686,9 @@ static void expr_bracket(LexState *ls, ExpDesc *v)
1716static void expr_kvalue(TValue *v, ExpDesc *e) 1686static void expr_kvalue(TValue *v, ExpDesc *e)
1717{ 1687{
1718 if (e->k <= VKTRUE) { 1688 if (e->k <= VKTRUE) {
1719 setitype(v, ~(uint32_t)e->k); 1689 setpriV(v, ~(uint32_t)e->k);
1720 } else if (e->k == VKSTR) { 1690 } else if (e->k == VKSTR) {
1721 setgcref(v->gcr, obj2gco(e->u.sval)); 1691 setgcVraw(v, obj2gco(e->u.sval), LJ_TSTR);
1722 setitype(v, LJ_TSTR);
1723 } else { 1692 } else {
1724 lua_assert(tvisnumber(expr_numtv(e))); 1693 lua_assert(tvisnumber(expr_numtv(e)));
1725 *v = *expr_numtv(e); 1694 *v = *expr_numtv(e);
@@ -1741,15 +1710,15 @@ static void expr_table(LexState *ls, ExpDesc *e)
1741 bcreg_reserve(fs, 1); 1710 bcreg_reserve(fs, 1);
1742 freg++; 1711 freg++;
1743 lex_check(ls, '{'); 1712 lex_check(ls, '{');
1744 while (ls->token != '}') { 1713 while (ls->tok != '}') {
1745 ExpDesc key, val; 1714 ExpDesc key, val;
1746 vcall = 0; 1715 vcall = 0;
1747 if (ls->token == '[') { 1716 if (ls->tok == '[') {
1748 expr_bracket(ls, &key); /* Already calls expr_toval. */ 1717 expr_bracket(ls, &key); /* Already calls expr_toval. */
1749 if (!expr_isk(&key)) expr_index(fs, e, &key); 1718 if (!expr_isk(&key)) expr_index(fs, e, &key);
1750 if (expr_isnumk(&key) && expr_numiszero(&key)) needarr = 1; else nhash++; 1719 if (expr_isnumk(&key) && expr_numiszero(&key)) needarr = 1; else nhash++;
1751 lex_check(ls, '='); 1720 lex_check(ls, '=');
1752 } else if ((ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) && 1721 } else if ((ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) &&
1753 lj_lex_lookahead(ls) == '=') { 1722 lj_lex_lookahead(ls) == '=') {
1754 expr_str(ls, &key); 1723 expr_str(ls, &key);
1755 lex_check(ls, '='); 1724 lex_check(ls, '=');
@@ -1842,11 +1811,11 @@ static BCReg parse_params(LexState *ls, int needself)
1842 lex_check(ls, '('); 1811 lex_check(ls, '(');
1843 if (needself) 1812 if (needself)
1844 var_new_lit(ls, nparams++, "self"); 1813 var_new_lit(ls, nparams++, "self");
1845 if (ls->token != ')') { 1814 if (ls->tok != ')') {
1846 do { 1815 do {
1847 if (ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) { 1816 if (ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) {
1848 var_new(ls, nparams++, lex_str(ls)); 1817 var_new(ls, nparams++, lex_str(ls));
1849 } else if (ls->token == TK_dots) { 1818 } else if (ls->tok == TK_dots) {
1850 lj_lex_next(ls); 1819 lj_lex_next(ls);
1851 fs->flags |= PROTO_VARARG; 1820 fs->flags |= PROTO_VARARG;
1852 break; 1821 break;
@@ -1880,7 +1849,7 @@ static void parse_body(LexState *ls, ExpDesc *e, int needself, BCLine line)
1880 fs.bclim = pfs->bclim - pfs->pc; 1849 fs.bclim = pfs->bclim - pfs->pc;
1881 bcemit_AD(&fs, BC_FUNCF, 0, 0); /* Placeholder. */ 1850 bcemit_AD(&fs, BC_FUNCF, 0, 0); /* Placeholder. */
1882 parse_chunk(ls); 1851 parse_chunk(ls);
1883 if (ls->token != TK_end) lex_match(ls, TK_end, TK_function, line); 1852 if (ls->tok != TK_end) lex_match(ls, TK_end, TK_function, line);
1884 pt = fs_finish(ls, (ls->lastline = ls->linenumber)); 1853 pt = fs_finish(ls, (ls->lastline = ls->linenumber));
1885 pfs->bcbase = ls->bcstack + oldbase; /* May have been reallocated. */ 1854 pfs->bcbase = ls->bcstack + oldbase; /* May have been reallocated. */
1886 pfs->bclim = (BCPos)(ls->sizebcstack - oldbase); 1855 pfs->bclim = (BCPos)(ls->sizebcstack - oldbase);
@@ -1919,13 +1888,13 @@ static void parse_args(LexState *ls, ExpDesc *e)
1919 BCIns ins; 1888 BCIns ins;
1920 BCReg base; 1889 BCReg base;
1921 BCLine line = ls->linenumber; 1890 BCLine line = ls->linenumber;
1922 if (ls->token == '(') { 1891 if (ls->tok == '(') {
1923#if !LJ_52 1892#if !LJ_52
1924 if (line != ls->lastline) 1893 if (line != ls->lastline)
1925 err_syntax(ls, LJ_ERR_XAMBIG); 1894 err_syntax(ls, LJ_ERR_XAMBIG);
1926#endif 1895#endif
1927 lj_lex_next(ls); 1896 lj_lex_next(ls);
1928 if (ls->token == ')') { /* f(). */ 1897 if (ls->tok == ')') { /* f(). */
1929 args.k = VVOID; 1898 args.k = VVOID;
1930 } else { 1899 } else {
1931 expr_list(ls, &args); 1900 expr_list(ls, &args);
@@ -1933,11 +1902,11 @@ static void parse_args(LexState *ls, ExpDesc *e)
1933 setbc_b(bcptr(fs, &args), 0); /* Pass on multiple results. */ 1902 setbc_b(bcptr(fs, &args), 0); /* Pass on multiple results. */
1934 } 1903 }
1935 lex_match(ls, ')', '(', line); 1904 lex_match(ls, ')', '(', line);
1936 } else if (ls->token == '{') { 1905 } else if (ls->tok == '{') {
1937 expr_table(ls, &args); 1906 expr_table(ls, &args);
1938 } else if (ls->token == TK_string) { 1907 } else if (ls->tok == TK_string) {
1939 expr_init(&args, VKSTR, 0); 1908 expr_init(&args, VKSTR, 0);
1940 args.u.sval = strV(&ls->tokenval); 1909 args.u.sval = strV(&ls->tokval);
1941 lj_lex_next(ls); 1910 lj_lex_next(ls);
1942 } else { 1911 } else {
1943 err_syntax(ls, LJ_ERR_XFUNARG); 1912 err_syntax(ls, LJ_ERR_XFUNARG);
@@ -1946,11 +1915,11 @@ static void parse_args(LexState *ls, ExpDesc *e)
1946 lua_assert(e->k == VNONRELOC); 1915 lua_assert(e->k == VNONRELOC);
1947 base = e->u.s.info; /* Base register for call. */ 1916 base = e->u.s.info; /* Base register for call. */
1948 if (args.k == VCALL) { 1917 if (args.k == VCALL) {
1949 ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1); 1918 ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1 - LJ_FR2);
1950 } else { 1919 } else {
1951 if (args.k != VVOID) 1920 if (args.k != VVOID)
1952 expr_tonextreg(fs, &args); 1921 expr_tonextreg(fs, &args);
1953 ins = BCINS_ABC(BC_CALL, base, 2, fs->freereg - base); 1922 ins = BCINS_ABC(BC_CALL, base, 2, fs->freereg - base - LJ_FR2);
1954 } 1923 }
1955 expr_init(e, VCALL, bcemit_INS(fs, ins)); 1924 expr_init(e, VCALL, bcemit_INS(fs, ins));
1956 e->u.s.aux = base; 1925 e->u.s.aux = base;
@@ -1963,33 +1932,34 @@ static void expr_primary(LexState *ls, ExpDesc *v)
1963{ 1932{
1964 FuncState *fs = ls->fs; 1933 FuncState *fs = ls->fs;
1965 /* Parse prefix expression. */ 1934 /* Parse prefix expression. */
1966 if (ls->token == '(') { 1935 if (ls->tok == '(') {
1967 BCLine line = ls->linenumber; 1936 BCLine line = ls->linenumber;
1968 lj_lex_next(ls); 1937 lj_lex_next(ls);
1969 expr(ls, v); 1938 expr(ls, v);
1970 lex_match(ls, ')', '(', line); 1939 lex_match(ls, ')', '(', line);
1971 expr_discharge(ls->fs, v); 1940 expr_discharge(ls->fs, v);
1972 } else if (ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) { 1941 } else if (ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) {
1973 var_lookup(ls, v); 1942 var_lookup(ls, v);
1974 } else { 1943 } else {
1975 err_syntax(ls, LJ_ERR_XSYMBOL); 1944 err_syntax(ls, LJ_ERR_XSYMBOL);
1976 } 1945 }
1977 for (;;) { /* Parse multiple expression suffixes. */ 1946 for (;;) { /* Parse multiple expression suffixes. */
1978 if (ls->token == '.') { 1947 if (ls->tok == '.') {
1979 expr_field(ls, v); 1948 expr_field(ls, v);
1980 } else if (ls->token == '[') { 1949 } else if (ls->tok == '[') {
1981 ExpDesc key; 1950 ExpDesc key;
1982 expr_toanyreg(fs, v); 1951 expr_toanyreg(fs, v);
1983 expr_bracket(ls, &key); 1952 expr_bracket(ls, &key);
1984 expr_index(fs, v, &key); 1953 expr_index(fs, v, &key);
1985 } else if (ls->token == ':') { 1954 } else if (ls->tok == ':') {
1986 ExpDesc key; 1955 ExpDesc key;
1987 lj_lex_next(ls); 1956 lj_lex_next(ls);
1988 expr_str(ls, &key); 1957 expr_str(ls, &key);
1989 bcemit_method(fs, v, &key); 1958 bcemit_method(fs, v, &key);
1990 parse_args(ls, v); 1959 parse_args(ls, v);
1991 } else if (ls->token == '(' || ls->token == TK_string || ls->token == '{') { 1960 } else if (ls->tok == '(' || ls->tok == TK_string || ls->tok == '{') {
1992 expr_tonextreg(fs, v); 1961 expr_tonextreg(fs, v);
1962 if (LJ_FR2) bcreg_reserve(fs, 1);
1993 parse_args(ls, v); 1963 parse_args(ls, v);
1994 } else { 1964 } else {
1995 break; 1965 break;
@@ -2000,14 +1970,14 @@ static void expr_primary(LexState *ls, ExpDesc *v)
2000/* Parse simple expression. */ 1970/* Parse simple expression. */
2001static void expr_simple(LexState *ls, ExpDesc *v) 1971static void expr_simple(LexState *ls, ExpDesc *v)
2002{ 1972{
2003 switch (ls->token) { 1973 switch (ls->tok) {
2004 case TK_number: 1974 case TK_number:
2005 expr_init(v, (LJ_HASFFI && tviscdata(&ls->tokenval)) ? VKCDATA : VKNUM, 0); 1975 expr_init(v, (LJ_HASFFI && tviscdata(&ls->tokval)) ? VKCDATA : VKNUM, 0);
2006 copyTV(ls->L, &v->u.nval, &ls->tokenval); 1976 copyTV(ls->L, &v->u.nval, &ls->tokval);
2007 break; 1977 break;
2008 case TK_string: 1978 case TK_string:
2009 expr_init(v, VKSTR, 0); 1979 expr_init(v, VKSTR, 0);
2010 v->u.sval = strV(&ls->tokenval); 1980 v->u.sval = strV(&ls->tokval);
2011 break; 1981 break;
2012 case TK_nil: 1982 case TK_nil:
2013 expr_init(v, VKNIL, 0); 1983 expr_init(v, VKNIL, 0);
@@ -2095,11 +2065,11 @@ static BinOpr expr_binop(LexState *ls, ExpDesc *v, uint32_t limit);
2095static void expr_unop(LexState *ls, ExpDesc *v) 2065static void expr_unop(LexState *ls, ExpDesc *v)
2096{ 2066{
2097 BCOp op; 2067 BCOp op;
2098 if (ls->token == TK_not) { 2068 if (ls->tok == TK_not) {
2099 op = BC_NOT; 2069 op = BC_NOT;
2100 } else if (ls->token == '-') { 2070 } else if (ls->tok == '-') {
2101 op = BC_UNM; 2071 op = BC_UNM;
2102 } else if (ls->token == '#') { 2072 } else if (ls->tok == '#') {
2103 op = BC_LEN; 2073 op = BC_LEN;
2104 } else { 2074 } else {
2105 expr_simple(ls, v); 2075 expr_simple(ls, v);
@@ -2116,7 +2086,7 @@ static BinOpr expr_binop(LexState *ls, ExpDesc *v, uint32_t limit)
2116 BinOpr op; 2086 BinOpr op;
2117 synlevel_begin(ls); 2087 synlevel_begin(ls);
2118 expr_unop(ls, v); 2088 expr_unop(ls, v);
2119 op = token2binop(ls->token); 2089 op = token2binop(ls->tok);
2120 while (op != OPR_NOBINOPR && priority[op].left > limit) { 2090 while (op != OPR_NOBINOPR && priority[op].left > limit) {
2121 ExpDesc v2; 2091 ExpDesc v2;
2122 BinOpr nextop; 2092 BinOpr nextop;
@@ -2305,9 +2275,9 @@ static void parse_func(LexState *ls, BCLine line)
2305 lj_lex_next(ls); /* Skip 'function'. */ 2275 lj_lex_next(ls); /* Skip 'function'. */
2306 /* Parse function name. */ 2276 /* Parse function name. */
2307 var_lookup(ls, &v); 2277 var_lookup(ls, &v);
2308 while (ls->token == '.') /* Multiple dot-separated fields. */ 2278 while (ls->tok == '.') /* Multiple dot-separated fields. */
2309 expr_field(ls, &v); 2279 expr_field(ls, &v);
2310 if (ls->token == ':') { /* Optional colon to signify method call. */ 2280 if (ls->tok == ':') { /* Optional colon to signify method call. */
2311 needself = 1; 2281 needself = 1;
2312 expr_field(ls, &v); 2282 expr_field(ls, &v);
2313 } 2283 }
@@ -2320,9 +2290,9 @@ static void parse_func(LexState *ls, BCLine line)
2320/* -- Control transfer statements ----------------------------------------- */ 2290/* -- Control transfer statements ----------------------------------------- */
2321 2291
2322/* Check for end of block. */ 2292/* Check for end of block. */
2323static int endofblock(LexToken token) 2293static int parse_isend(LexToken tok)
2324{ 2294{
2325 switch (token) { 2295 switch (tok) {
2326 case TK_else: case TK_elseif: case TK_end: case TK_until: case TK_eof: 2296 case TK_else: case TK_elseif: case TK_end: case TK_until: case TK_eof:
2327 return 1; 2297 return 1;
2328 default: 2298 default:
@@ -2337,7 +2307,7 @@ static void parse_return(LexState *ls)
2337 FuncState *fs = ls->fs; 2307 FuncState *fs = ls->fs;
2338 lj_lex_next(ls); /* Skip 'return'. */ 2308 lj_lex_next(ls); /* Skip 'return'. */
2339 fs->flags |= PROTO_HAS_RETURN; 2309 fs->flags |= PROTO_HAS_RETURN;
2340 if (endofblock(ls->token) || ls->token == ';') { /* Bare return. */ 2310 if (parse_isend(ls->tok) || ls->tok == ';') { /* Bare return. */
2341 ins = BCINS_AD(BC_RET0, 0, 1); 2311 ins = BCINS_AD(BC_RET0, 0, 1);
2342 } else { /* Return with one or more values. */ 2312 } else { /* Return with one or more values. */
2343 ExpDesc e; /* Receives the _last_ expression in the list. */ 2313 ExpDesc e; /* Receives the _last_ expression in the list. */
@@ -2403,18 +2373,18 @@ static void parse_label(LexState *ls)
2403 lex_check(ls, TK_label); 2373 lex_check(ls, TK_label);
2404 /* Recursively parse trailing statements: labels and ';' (Lua 5.2 only). */ 2374 /* Recursively parse trailing statements: labels and ';' (Lua 5.2 only). */
2405 for (;;) { 2375 for (;;) {
2406 if (ls->token == TK_label) { 2376 if (ls->tok == TK_label) {
2407 synlevel_begin(ls); 2377 synlevel_begin(ls);
2408 parse_label(ls); 2378 parse_label(ls);
2409 synlevel_end(ls); 2379 synlevel_end(ls);
2410 } else if (LJ_52 && ls->token == ';') { 2380 } else if (LJ_52 && ls->tok == ';') {
2411 lj_lex_next(ls); 2381 lj_lex_next(ls);
2412 } else { 2382 } else {
2413 break; 2383 break;
2414 } 2384 }
2415 } 2385 }
2416 /* Trailing label is considered to be outside of scope. */ 2386 /* Trailing label is considered to be outside of scope. */
2417 if (endofblock(ls->token) && ls->token != TK_until) 2387 if (parse_isend(ls->tok) && ls->tok != TK_until)
2418 ls->vstack[idx].slot = fs->bl->nactvar; 2388 ls->vstack[idx].slot = fs->bl->nactvar;
2419 gola_resolve(ls, fs->bl, idx); 2389 gola_resolve(ls, fs->bl, idx);
2420} 2390}
@@ -2570,7 +2540,8 @@ static void parse_for_iter(LexState *ls, GCstr *indexname)
2570 lex_check(ls, TK_in); 2540 lex_check(ls, TK_in);
2571 line = ls->linenumber; 2541 line = ls->linenumber;
2572 assign_adjust(ls, 3, expr_list(ls, &e), &e); 2542 assign_adjust(ls, 3, expr_list(ls, &e), &e);
2573 bcreg_bump(fs, 3); /* The iterator needs another 3 slots (func + 2 args). */ 2543 /* The iterator needs another 3 [4] slots (func [pc] | state ctl). */
2544 bcreg_bump(fs, 3+LJ_FR2);
2574 isnext = (nvars <= 5 && predict_next(ls, fs, exprpc)); 2545 isnext = (nvars <= 5 && predict_next(ls, fs, exprpc));
2575 var_add(ls, 3); /* Hidden control variables. */ 2546 var_add(ls, 3); /* Hidden control variables. */
2576 lex_check(ls, TK_do); 2547 lex_check(ls, TK_do);
@@ -2598,9 +2569,9 @@ static void parse_for(LexState *ls, BCLine line)
2598 fscope_begin(fs, &bl, FSCOPE_LOOP); 2569 fscope_begin(fs, &bl, FSCOPE_LOOP);
2599 lj_lex_next(ls); /* Skip 'for'. */ 2570 lj_lex_next(ls); /* Skip 'for'. */
2600 varname = lex_str(ls); /* Get first variable name. */ 2571 varname = lex_str(ls); /* Get first variable name. */
2601 if (ls->token == '=') 2572 if (ls->tok == '=')
2602 parse_for_num(ls, varname, line); 2573 parse_for_num(ls, varname, line);
2603 else if (ls->token == ',' || ls->token == TK_in) 2574 else if (ls->tok == ',' || ls->tok == TK_in)
2604 parse_for_iter(ls, varname); 2575 parse_for_iter(ls, varname);
2605 else 2576 else
2606 err_syntax(ls, LJ_ERR_XFOR); 2577 err_syntax(ls, LJ_ERR_XFOR);
@@ -2626,12 +2597,12 @@ static void parse_if(LexState *ls, BCLine line)
2626 BCPos flist; 2597 BCPos flist;
2627 BCPos escapelist = NO_JMP; 2598 BCPos escapelist = NO_JMP;
2628 flist = parse_then(ls); 2599 flist = parse_then(ls);
2629 while (ls->token == TK_elseif) { /* Parse multiple 'elseif' blocks. */ 2600 while (ls->tok == TK_elseif) { /* Parse multiple 'elseif' blocks. */
2630 jmp_append(fs, &escapelist, bcemit_jmp(fs)); 2601 jmp_append(fs, &escapelist, bcemit_jmp(fs));
2631 jmp_tohere(fs, flist); 2602 jmp_tohere(fs, flist);
2632 flist = parse_then(ls); 2603 flist = parse_then(ls);
2633 } 2604 }
2634 if (ls->token == TK_else) { /* Parse optional 'else' block. */ 2605 if (ls->tok == TK_else) { /* Parse optional 'else' block. */
2635 jmp_append(fs, &escapelist, bcemit_jmp(fs)); 2606 jmp_append(fs, &escapelist, bcemit_jmp(fs));
2636 jmp_tohere(fs, flist); 2607 jmp_tohere(fs, flist);
2637 lj_lex_next(ls); /* Skip 'else'. */ 2608 lj_lex_next(ls); /* Skip 'else'. */
@@ -2649,7 +2620,7 @@ static void parse_if(LexState *ls, BCLine line)
2649static int parse_stmt(LexState *ls) 2620static int parse_stmt(LexState *ls)
2650{ 2621{
2651 BCLine line = ls->linenumber; 2622 BCLine line = ls->linenumber;
2652 switch (ls->token) { 2623 switch (ls->tok) {
2653 case TK_if: 2624 case TK_if:
2654 parse_if(ls, line); 2625 parse_if(ls, line);
2655 break; 2626 break;
@@ -2707,7 +2678,7 @@ static void parse_chunk(LexState *ls)
2707{ 2678{
2708 int islast = 0; 2679 int islast = 0;
2709 synlevel_begin(ls); 2680 synlevel_begin(ls);
2710 while (!islast && !endofblock(ls->token)) { 2681 while (!islast && !parse_isend(ls->tok)) {
2711 islast = parse_stmt(ls); 2682 islast = parse_stmt(ls);
2712 lex_opt(ls, ';'); 2683 lex_opt(ls, ';');
2713 lua_assert(ls->fs->framesize >= ls->fs->freereg && 2684 lua_assert(ls->fs->framesize >= ls->fs->freereg &&
@@ -2742,7 +2713,7 @@ GCproto *lj_parse(LexState *ls)
2742 bcemit_AD(&fs, BC_FUNCV, 0, 0); /* Placeholder. */ 2713 bcemit_AD(&fs, BC_FUNCV, 0, 0); /* Placeholder. */
2743 lj_lex_next(ls); /* Read-ahead first token. */ 2714 lj_lex_next(ls); /* Read-ahead first token. */
2744 parse_chunk(ls); 2715 parse_chunk(ls);
2745 if (ls->token != TK_eof) 2716 if (ls->tok != TK_eof)
2746 err_token(ls, TK_eof); 2717 err_token(ls, TK_eof);
2747 pt = fs_finish(ls, ls->linenumber); 2718 pt = fs_finish(ls, ls->linenumber);
2748 L->top--; /* Drop chunkname. */ 2719 L->top--; /* Drop chunkname. */
diff --git a/src/lj_profile.c b/src/lj_profile.c
new file mode 100644
index 00000000..01367014
--- /dev/null
+++ b/src/lj_profile.c
@@ -0,0 +1,368 @@
1/*
2** Low-overhead profiling.
3** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_profile_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10
11#if LJ_HASPROFILE
12
13#include "lj_buf.h"
14#include "lj_frame.h"
15#include "lj_debug.h"
16#include "lj_dispatch.h"
17#if LJ_HASJIT
18#include "lj_jit.h"
19#include "lj_trace.h"
20#endif
21#include "lj_profile.h"
22
23#include "luajit.h"
24
25#if LJ_PROFILE_SIGPROF
26
27#include <sys/time.h>
28#include <signal.h>
29#define profile_lock(ps) UNUSED(ps)
30#define profile_unlock(ps) UNUSED(ps)
31
32#elif LJ_PROFILE_PTHREAD
33
34#include <pthread.h>
35#include <time.h>
36#if LJ_TARGET_PS3
37#include <sys/timer.h>
38#endif
39#define profile_lock(ps) pthread_mutex_lock(&ps->lock)
40#define profile_unlock(ps) pthread_mutex_unlock(&ps->lock)
41
42#elif LJ_PROFILE_WTHREAD
43
44#define WIN32_LEAN_AND_MEAN
45#if LJ_TARGET_XBOX360
46#include <xtl.h>
47#include <xbox.h>
48#else
49#include <windows.h>
50#endif
51typedef unsigned int (WINAPI *WMM_TPFUNC)(unsigned int);
52#define profile_lock(ps) EnterCriticalSection(&ps->lock)
53#define profile_unlock(ps) LeaveCriticalSection(&ps->lock)
54
55#endif
56
57/* Profiler state. */
58typedef struct ProfileState {
59 global_State *g; /* VM state that started the profiler. */
60 luaJIT_profile_callback cb; /* Profiler callback. */
61 void *data; /* Profiler callback data. */
62 SBuf sb; /* String buffer for stack dumps. */
63 int interval; /* Sample interval in milliseconds. */
64 int samples; /* Number of samples for next callback. */
65 int vmstate; /* VM state when profile timer triggered. */
66#if LJ_PROFILE_SIGPROF
67 struct sigaction oldsa; /* Previous SIGPROF state. */
68#elif LJ_PROFILE_PTHREAD
69 pthread_mutex_t lock; /* g->hookmask update lock. */
70 pthread_t thread; /* Timer thread. */
71 int abort; /* Abort timer thread. */
72#elif LJ_PROFILE_WTHREAD
73#if LJ_TARGET_WINDOWS
74 HINSTANCE wmm; /* WinMM library handle. */
75 WMM_TPFUNC wmm_tbp; /* WinMM timeBeginPeriod function. */
76 WMM_TPFUNC wmm_tep; /* WinMM timeEndPeriod function. */
77#endif
78 CRITICAL_SECTION lock; /* g->hookmask update lock. */
79 HANDLE thread; /* Timer thread. */
80 int abort; /* Abort timer thread. */
81#endif
82} ProfileState;
83
84/* Sadly, we have to use a static profiler state.
85**
86** The SIGPROF variant needs a static pointer to the global state, anyway.
87** And it would be hard to extend for multiple threads. You can still use
88** multiple VMs in multiple threads, but only profile one at a time.
89*/
90static ProfileState profile_state;
91
92/* Default sample interval in milliseconds. */
93#define LJ_PROFILE_INTERVAL_DEFAULT 10
94
95/* -- Profiler/hook interaction ------------------------------------------- */
96
97#if !LJ_PROFILE_SIGPROF
98void LJ_FASTCALL lj_profile_hook_enter(global_State *g)
99{
100 ProfileState *ps = &profile_state;
101 if (ps->g) {
102 profile_lock(ps);
103 hook_enter(g);
104 profile_unlock(ps);
105 } else {
106 hook_enter(g);
107 }
108}
109
110void LJ_FASTCALL lj_profile_hook_leave(global_State *g)
111{
112 ProfileState *ps = &profile_state;
113 if (ps->g) {
114 profile_lock(ps);
115 hook_leave(g);
116 profile_unlock(ps);
117 } else {
118 hook_leave(g);
119 }
120}
121#endif
122
123/* -- Profile callbacks --------------------------------------------------- */
124
125/* Callback from profile hook (HOOK_PROFILE already cleared). */
126void LJ_FASTCALL lj_profile_interpreter(lua_State *L)
127{
128 ProfileState *ps = &profile_state;
129 global_State *g = G(L);
130 uint8_t mask;
131 profile_lock(ps);
132 mask = (g->hookmask & ~HOOK_PROFILE);
133 if (!(mask & HOOK_VMEVENT)) {
134 int samples = ps->samples;
135 ps->samples = 0;
136 g->hookmask = HOOK_VMEVENT;
137 lj_dispatch_update(g);
138 profile_unlock(ps);
139 ps->cb(ps->data, L, samples, ps->vmstate); /* Invoke user callback. */
140 profile_lock(ps);
141 mask |= (g->hookmask & HOOK_PROFILE);
142 }
143 g->hookmask = mask;
144 lj_dispatch_update(g);
145 profile_unlock(ps);
146}
147
148/* Trigger profile hook. Asynchronous call from OS-specific profile timer. */
149static void profile_trigger(ProfileState *ps)
150{
151 global_State *g = ps->g;
152 uint8_t mask;
153 profile_lock(ps);
154 ps->samples++; /* Always increment number of samples. */
155 mask = g->hookmask;
156 if (!(mask & (HOOK_PROFILE|HOOK_VMEVENT))) { /* Set profile hook. */
157 int st = g->vmstate;
158 ps->vmstate = st >= 0 ? 'N' :
159 st == ~LJ_VMST_INTERP ? 'I' :
160 st == ~LJ_VMST_C ? 'C' :
161 st == ~LJ_VMST_GC ? 'G' : 'J';
162 g->hookmask = (mask | HOOK_PROFILE);
163 lj_dispatch_update(g);
164 }
165 profile_unlock(ps);
166}
167
168/* -- OS-specific profile timer handling ---------------------------------- */
169
170#if LJ_PROFILE_SIGPROF
171
172/* SIGPROF handler. */
173static void profile_signal(int sig)
174{
175 UNUSED(sig);
176 profile_trigger(&profile_state);
177}
178
179/* Start profiling timer. */
180static void profile_timer_start(ProfileState *ps)
181{
182 int interval = ps->interval;
183 struct itimerval tm;
184 struct sigaction sa;
185 tm.it_value.tv_sec = tm.it_interval.tv_sec = interval / 1000;
186 tm.it_value.tv_usec = tm.it_interval.tv_usec = (interval % 1000) * 1000;
187 setitimer(ITIMER_PROF, &tm, NULL);
188 sa.sa_flags = SA_RESTART;
189 sa.sa_handler = profile_signal;
190 sigemptyset(&sa.sa_mask);
191 sigaction(SIGPROF, &sa, &ps->oldsa);
192}
193
194/* Stop profiling timer. */
195static void profile_timer_stop(ProfileState *ps)
196{
197 struct itimerval tm;
198 tm.it_value.tv_sec = tm.it_interval.tv_sec = 0;
199 tm.it_value.tv_usec = tm.it_interval.tv_usec = 0;
200 setitimer(ITIMER_PROF, &tm, NULL);
201 sigaction(SIGPROF, &ps->oldsa, NULL);
202}
203
204#elif LJ_PROFILE_PTHREAD
205
206/* POSIX timer thread. */
207static void *profile_thread(ProfileState *ps)
208{
209 int interval = ps->interval;
210#if !LJ_TARGET_PS3
211 struct timespec ts;
212 ts.tv_sec = interval / 1000;
213 ts.tv_nsec = (interval % 1000) * 1000000;
214#endif
215 while (1) {
216#if LJ_TARGET_PS3
217 sys_timer_usleep(interval * 1000);
218#else
219 nanosleep(&ts, NULL);
220#endif
221 if (ps->abort) break;
222 profile_trigger(ps);
223 }
224 return NULL;
225}
226
227/* Start profiling timer thread. */
228static void profile_timer_start(ProfileState *ps)
229{
230 pthread_mutex_init(&ps->lock, 0);
231 ps->abort = 0;
232 pthread_create(&ps->thread, NULL, (void *(*)(void *))profile_thread, ps);
233}
234
235/* Stop profiling timer thread. */
236static void profile_timer_stop(ProfileState *ps)
237{
238 ps->abort = 1;
239 pthread_join(ps->thread, NULL);
240 pthread_mutex_destroy(&ps->lock);
241}
242
243#elif LJ_PROFILE_WTHREAD
244
245/* Windows timer thread. */
246static DWORD WINAPI profile_thread(void *psx)
247{
248 ProfileState *ps = (ProfileState *)psx;
249 int interval = ps->interval;
250#if LJ_TARGET_WINDOWS
251 ps->wmm_tbp(interval);
252#endif
253 while (1) {
254 Sleep(interval);
255 if (ps->abort) break;
256 profile_trigger(ps);
257 }
258#if LJ_TARGET_WINDOWS
259 ps->wmm_tep(interval);
260#endif
261 return 0;
262}
263
264/* Start profiling timer thread. */
265static void profile_timer_start(ProfileState *ps)
266{
267#if LJ_TARGET_WINDOWS
268 if (!ps->wmm) { /* Load WinMM library on-demand. */
269 ps->wmm = LoadLibraryA("winmm.dll");
270 if (ps->wmm) {
271 ps->wmm_tbp = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeBeginPeriod");
272 ps->wmm_tep = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeEndPeriod");
273 if (!ps->wmm_tbp || !ps->wmm_tep) {
274 ps->wmm = NULL;
275 return;
276 }
277 }
278 }
279#endif
280 InitializeCriticalSection(&ps->lock);
281 ps->abort = 0;
282 ps->thread = CreateThread(NULL, 0, profile_thread, ps, 0, NULL);
283}
284
285/* Stop profiling timer thread. */
286static void profile_timer_stop(ProfileState *ps)
287{
288 ps->abort = 1;
289 WaitForSingleObject(ps->thread, INFINITE);
290 DeleteCriticalSection(&ps->lock);
291}
292
293#endif
294
295/* -- Public profiling API ------------------------------------------------ */
296
297/* Start profiling. */
298LUA_API void luaJIT_profile_start(lua_State *L, const char *mode,
299 luaJIT_profile_callback cb, void *data)
300{
301 ProfileState *ps = &profile_state;
302 int interval = LJ_PROFILE_INTERVAL_DEFAULT;
303 while (*mode) {
304 int m = *mode++;
305 switch (m) {
306 case 'i':
307 interval = 0;
308 while (*mode >= '0' && *mode <= '9')
309 interval = interval * 10 + (*mode++ - '0');
310 if (interval <= 0) interval = 1;
311 break;
312#if LJ_HASJIT
313 case 'l': case 'f':
314 L2J(L)->prof_mode = m;
315 lj_trace_flushall(L);
316 break;
317#endif
318 default: /* Ignore unknown mode chars. */
319 break;
320 }
321 }
322 if (ps->g) {
323 luaJIT_profile_stop(L);
324 if (ps->g) return; /* Profiler in use by another VM. */
325 }
326 ps->g = G(L);
327 ps->interval = interval;
328 ps->cb = cb;
329 ps->data = data;
330 ps->samples = 0;
331 lj_buf_init(L, &ps->sb);
332 profile_timer_start(ps);
333}
334
335/* Stop profiling. */
336LUA_API void luaJIT_profile_stop(lua_State *L)
337{
338 ProfileState *ps = &profile_state;
339 global_State *g = ps->g;
340 if (G(L) == g) { /* Only stop profiler if started by this VM. */
341 profile_timer_stop(ps);
342 g->hookmask &= ~HOOK_PROFILE;
343 lj_dispatch_update(g);
344#if LJ_HASJIT
345 G2J(g)->prof_mode = 0;
346 lj_trace_flushall(L);
347#endif
348 lj_buf_free(g, &ps->sb);
349 setmref(ps->sb.b, NULL);
350 setmref(ps->sb.e, NULL);
351 ps->g = NULL;
352 }
353}
354
355/* Return a compact stack dump. */
356LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt,
357 int depth, size_t *len)
358{
359 ProfileState *ps = &profile_state;
360 SBuf *sb = &ps->sb;
361 setsbufL(sb, L);
362 lj_buf_reset(sb);
363 lj_debug_dumpstack(L, sb, fmt, depth);
364 *len = (size_t)sbuflen(sb);
365 return sbufB(sb);
366}
367
368#endif
diff --git a/src/lj_profile.h b/src/lj_profile.h
new file mode 100644
index 00000000..26cb9db3
--- /dev/null
+++ b/src/lj_profile.h
@@ -0,0 +1,21 @@
1/*
2** Low-overhead profiling.
3** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_PROFILE_H
7#define _LJ_PROFILE_H
8
9#include "lj_obj.h"
10
11#if LJ_HASPROFILE
12
13LJ_FUNC void LJ_FASTCALL lj_profile_interpreter(lua_State *L);
14#if !LJ_PROFILE_SIGPROF
15LJ_FUNC void LJ_FASTCALL lj_profile_hook_enter(global_State *g);
16LJ_FUNC void LJ_FASTCALL lj_profile_hook_leave(global_State *g);
17#endif
18
19#endif
20
21#endif
diff --git a/src/lj_record.c b/src/lj_record.c
index 843108c8..56038156 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -20,6 +20,9 @@
20#endif 20#endif
21#include "lj_bc.h" 21#include "lj_bc.h"
22#include "lj_ff.h" 22#include "lj_ff.h"
23#if LJ_HASPROFILE
24#include "lj_debug.h"
25#endif
23#include "lj_ir.h" 26#include "lj_ir.h"
24#include "lj_jit.h" 27#include "lj_jit.h"
25#include "lj_ircall.h" 28#include "lj_ircall.h"
@@ -230,7 +233,7 @@ static void canonicalize_slots(jit_State *J)
230} 233}
231 234
232/* Stop recording. */ 235/* Stop recording. */
233static void rec_stop(jit_State *J, TraceLink linktype, TraceNo lnk) 236void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk)
234{ 237{
235 lj_trace_end(J); 238 lj_trace_end(J);
236 J->cur.linktype = (uint8_t)linktype; 239 J->cur.linktype = (uint8_t)linktype;
@@ -499,8 +502,8 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl)
499static LoopEvent rec_iterl(jit_State *J, const BCIns iterins) 502static LoopEvent rec_iterl(jit_State *J, const BCIns iterins)
500{ 503{
501 BCReg ra = bc_a(iterins); 504 BCReg ra = bc_a(iterins);
502 lua_assert(J->base[ra] != 0); 505 lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
503 if (!tref_isnil(J->base[ra])) { /* Looping back? */ 506 if (!tref_isnil(getslot(J, ra))) { /* Looping back? */
504 J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */ 507 J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */
505 J->maxslot = ra-1+bc_b(J->pc[-1]); 508 J->maxslot = ra-1+bc_b(J->pc[-1]);
506 J->pc += bc_j(iterins)+1; 509 J->pc += bc_j(iterins)+1;
@@ -538,12 +541,12 @@ static int innerloopleft(jit_State *J, const BCIns *pc)
538/* Handle the case when an interpreted loop op is hit. */ 541/* Handle the case when an interpreted loop op is hit. */
539static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev) 542static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev)
540{ 543{
541 if (J->parent == 0) { 544 if (J->parent == 0 && J->exitno == 0) {
542 if (pc == J->startpc && J->framedepth + J->retdepth == 0) { 545 if (pc == J->startpc && J->framedepth + J->retdepth == 0) {
543 /* Same loop? */ 546 /* Same loop? */
544 if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */ 547 if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */
545 lj_trace_err(J, LJ_TRERR_LLEAVE); 548 lj_trace_err(J, LJ_TRERR_LLEAVE);
546 rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping root trace. */ 549 lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping trace. */
547 } else if (ev != LOOPEV_LEAVE) { /* Entering inner loop? */ 550 } else if (ev != LOOPEV_LEAVE) { /* Entering inner loop? */
548 /* It's usually better to abort here and wait until the inner loop 551 /* It's usually better to abort here and wait until the inner loop
549 ** is traced. But if the inner loop repeatedly didn't loop back, 552 ** is traced. But if the inner loop repeatedly didn't loop back,
@@ -568,18 +571,64 @@ static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev)
568/* Handle the case when an already compiled loop op is hit. */ 571/* Handle the case when an already compiled loop op is hit. */
569static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev) 572static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev)
570{ 573{
571 if (J->parent == 0) { /* Root trace hit an inner loop. */ 574 if (J->parent == 0 && J->exitno == 0) { /* Root trace hit an inner loop. */
572 /* Better let the inner loop spawn a side trace back here. */ 575 /* Better let the inner loop spawn a side trace back here. */
573 lj_trace_err(J, LJ_TRERR_LINNER); 576 lj_trace_err(J, LJ_TRERR_LINNER);
574 } else if (ev != LOOPEV_LEAVE) { /* Side trace enters a compiled loop. */ 577 } else if (ev != LOOPEV_LEAVE) { /* Side trace enters a compiled loop. */
575 J->instunroll = 0; /* Cannot continue across a compiled loop op. */ 578 J->instunroll = 0; /* Cannot continue across a compiled loop op. */
576 if (J->pc == J->startpc && J->framedepth + J->retdepth == 0) 579 if (J->pc == J->startpc && J->framedepth + J->retdepth == 0)
577 rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Form an extra loop. */ 580 lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Form extra loop. */
578 else 581 else
579 rec_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the loop. */ 582 lj_record_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the loop. */
580 } /* Side trace continues across a loop that's left or not entered. */ 583 } /* Side trace continues across a loop that's left or not entered. */
581} 584}
582 585
586/* -- Record profiler hook checks ----------------------------------------- */
587
588#if LJ_HASPROFILE
589
590/* Need to insert profiler hook check? */
591static int rec_profile_need(jit_State *J, GCproto *pt, const BCIns *pc)
592{
593 GCproto *ppt;
594 lua_assert(J->prof_mode == 'f' || J->prof_mode == 'l');
595 if (!pt)
596 return 0;
597 ppt = J->prev_pt;
598 J->prev_pt = pt;
599 if (pt != ppt && ppt) {
600 J->prev_line = -1;
601 return 1;
602 }
603 if (J->prof_mode == 'l') {
604 BCLine line = lj_debug_line(pt, proto_bcpos(pt, pc));
605 BCLine pline = J->prev_line;
606 J->prev_line = line;
607 if (pline != line)
608 return 1;
609 }
610 return 0;
611}
612
613static void rec_profile_ins(jit_State *J, const BCIns *pc)
614{
615 if (J->prof_mode && rec_profile_need(J, J->pt, pc)) {
616 emitir(IRTG(IR_PROF, IRT_NIL), 0, 0);
617 lj_snap_add(J);
618 }
619}
620
621static void rec_profile_ret(jit_State *J)
622{
623 if (J->prof_mode == 'f') {
624 emitir(IRTG(IR_PROF, IRT_NIL), 0, 0);
625 J->prev_pt = NULL;
626 lj_snap_add(J);
627 }
628}
629
630#endif
631
583/* -- Record calls and returns -------------------------------------------- */ 632/* -- Record calls and returns -------------------------------------------- */
584 633
585/* Specialize to the runtime value of the called function or its prototype. */ 634/* Specialize to the runtime value of the called function or its prototype. */
@@ -595,6 +644,21 @@ static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr)
595 (void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */ 644 (void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */
596 return tr; 645 return tr;
597 } 646 }
647 } else {
648 /* Don't specialize to non-monomorphic builtins. */
649 switch (fn->c.ffid) {
650 case FF_coroutine_wrap_aux:
651 case FF_string_gmatch_aux:
652 /* NYI: io_file_iter doesn't have an ffid, yet. */
653 { /* Specialize to the ffid. */
654 TRef trid = emitir(IRT(IR_FLOAD, IRT_U8), tr, IRFL_FUNC_FFID);
655 emitir(IRTG(IR_EQ, IRT_INT), trid, lj_ir_kint(J, fn->c.ffid));
656 }
657 return tr;
658 default:
659 /* NYI: don't specialize to non-monomorphic C functions. */
660 break;
661 }
598 } 662 }
599 /* Otherwise specialize to the function (closure) value itself. */ 663 /* Otherwise specialize to the function (closure) value itself. */
600 kfunc = lj_ir_kfunc(J, fn); 664 kfunc = lj_ir_kfunc(J, fn);
@@ -609,6 +673,7 @@ static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs)
609 TValue *functv = &J->L->base[func]; 673 TValue *functv = &J->L->base[func];
610 TRef *fbase = &J->base[func]; 674 TRef *fbase = &J->base[func];
611 ptrdiff_t i; 675 ptrdiff_t i;
676 lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
612 for (i = 0; i <= nargs; i++) 677 for (i = 0; i <= nargs; i++)
613 (void)getslot(J, func+i); /* Ensure func and all args have a reference. */ 678 (void)getslot(J, func+i); /* Ensure func and all args have a reference. */
614 if (!tref_isfunc(fbase[0])) { /* Resolve __call metamethod. */ 679 if (!tref_isfunc(fbase[0])) { /* Resolve __call metamethod. */
@@ -678,6 +743,8 @@ static int check_downrec_unroll(jit_State *J, GCproto *pt)
678 return 0; 743 return 0;
679} 744}
680 745
746static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot);
747
681/* Record return. */ 748/* Record return. */
682void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) 749void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
683{ 750{
@@ -700,12 +767,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
700 /* Return to lower frame via interpreter for unhandled cases. */ 767 /* Return to lower frame via interpreter for unhandled cases. */
701 if (J->framedepth == 0 && J->pt && bc_isret(bc_op(*J->pc)) && 768 if (J->framedepth == 0 && J->pt && bc_isret(bc_op(*J->pc)) &&
702 (!frame_islua(frame) || 769 (!frame_islua(frame) ||
703 (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))))) { 770 (J->parent == 0 && J->exitno == 0 &&
771 !bc_isret(bc_op(J->cur.startins))))) {
704 /* NYI: specialize to frame type and return directly, not via RET*. */ 772 /* NYI: specialize to frame type and return directly, not via RET*. */
705 for (i = 0; i < (ptrdiff_t)rbase; i++) 773 for (i = 0; i < (ptrdiff_t)rbase; i++)
706 J->base[i] = 0; /* Purge dead slots. */ 774 J->base[i] = 0; /* Purge dead slots. */
707 J->maxslot = rbase + (BCReg)gotresults; 775 J->maxslot = rbase + (BCReg)gotresults;
708 rec_stop(J, LJ_TRLINK_RETURN, 0); /* Return to interpreter. */ 776 lj_record_stop(J, LJ_TRLINK_RETURN, 0); /* Return to interpreter. */
709 return; 777 return;
710 } 778 }
711 if (frame_isvarg(frame)) { 779 if (frame_isvarg(frame)) {
@@ -722,14 +790,15 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
722 BCIns callins = *(frame_pc(frame)-1); 790 BCIns callins = *(frame_pc(frame)-1);
723 ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults; 791 ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults;
724 BCReg cbase = bc_a(callins); 792 BCReg cbase = bc_a(callins);
725 GCproto *pt = funcproto(frame_func(frame - (cbase+1))); 793 GCproto *pt = funcproto(frame_func(frame - (cbase+1-LJ_FR2)));
794 lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame teardown. */
726 if ((pt->flags & PROTO_NOJIT)) 795 if ((pt->flags & PROTO_NOJIT))
727 lj_trace_err(J, LJ_TRERR_CJITOFF); 796 lj_trace_err(J, LJ_TRERR_CJITOFF);
728 if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) { 797 if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) {
729 if (check_downrec_unroll(J, pt)) { 798 if (check_downrec_unroll(J, pt)) {
730 J->maxslot = (BCReg)(rbase + gotresults); 799 J->maxslot = (BCReg)(rbase + gotresults);
731 lj_snap_purge(J); 800 lj_snap_purge(J);
732 rec_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno); /* Down-recursion. */ 801 lj_record_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno); /* Down-rec. */
733 return; 802 return;
734 } 803 }
735 lj_snap_add(J); 804 lj_snap_add(J);
@@ -742,7 +811,8 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
742 lua_assert(J->baseslot > cbase+1); 811 lua_assert(J->baseslot > cbase+1);
743 J->baseslot -= cbase+1; 812 J->baseslot -= cbase+1;
744 J->base -= cbase+1; 813 J->base -= cbase+1;
745 } else if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) { 814 } else if (J->parent == 0 && J->exitno == 0 &&
815 !bc_isret(bc_op(J->cur.startins))) {
746 /* Return to lower frame would leave the loop in a root trace. */ 816 /* Return to lower frame would leave the loop in a root trace. */
747 lj_trace_err(J, LJ_TRERR_LLEAVE); 817 lj_trace_err(J, LJ_TRERR_LLEAVE);
748 } else if (J->needsnap) { /* Tailcalled to ff with side-effects. */ 818 } else if (J->needsnap) { /* Tailcalled to ff with side-effects. */
@@ -774,7 +844,24 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
774 } else if (cont == lj_cont_nop) { 844 } else if (cont == lj_cont_nop) {
775 /* Nothing to do here. */ 845 /* Nothing to do here. */
776 } else if (cont == lj_cont_cat) { 846 } else if (cont == lj_cont_cat) {
777 lua_assert(0); 847 BCReg bslot = bc_b(*(frame_contpc(frame)-1));
848 TRef tr = gotresults ? J->base[cbase+rbase] : TREF_NIL;
849 if (bslot != cbase-2) { /* Concatenate the remainder. */
850 TValue *b = J->L->base, save; /* Simulate lower frame and result. */
851 J->base[cbase-2] = tr;
852 copyTV(J->L, &save, b-2);
853 if (gotresults) copyTV(J->L, b-2, b+rbase); else setnilV(b-2);
854 J->L->base = b - cbase;
855 tr = rec_cat(J, bslot, cbase-2);
856 b = J->L->base + cbase; /* Undo. */
857 J->L->base = b;
858 copyTV(J->L, b-2, &save);
859 }
860 if (tr) { /* Store final result. */
861 BCReg dst = bc_a(*(frame_contpc(frame)-1));
862 J->base[dst] = tr;
863 if (dst >= J->maxslot) J->maxslot = dst+1;
864 } /* Otherwise continue with another __concat call. */
778 } else { 865 } else {
779 /* Result type already specialized. */ 866 /* Result type already specialized. */
780 lua_assert(cont == lj_cont_condf || cont == lj_cont_condt); 867 lua_assert(cont == lj_cont_condf || cont == lj_cont_condt);
@@ -790,13 +877,11 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
790/* Prepare to record call to metamethod. */ 877/* Prepare to record call to metamethod. */
791static BCReg rec_mm_prep(jit_State *J, ASMFunction cont) 878static BCReg rec_mm_prep(jit_State *J, ASMFunction cont)
792{ 879{
793 BCReg s, top = curr_proto(J->L)->framesize; 880 BCReg s, top = cont == lj_cont_cat ? J->maxslot : curr_proto(J->L)->framesize;
794 TRef trcont;
795 setcont(&J->L->base[top], cont);
796#if LJ_64 881#if LJ_64
797 trcont = lj_ir_kptr(J, (void *)((int64_t)cont - (int64_t)lj_vm_asm_begin)); 882 TRef trcont = lj_ir_kptr(J, (void *)((int64_t)cont-(int64_t)lj_vm_asm_begin));
798#else 883#else
799 trcont = lj_ir_kptr(J, (void *)cont); 884 TRef trcont = lj_ir_kptr(J, (void *)cont);
800#endif 885#endif
801 J->base[top] = trcont | TREF_CONT; 886 J->base[top] = trcont | TREF_CONT;
802 J->framedepth++; 887 J->framedepth++;
@@ -877,7 +962,7 @@ nocheck:
877static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) 962static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm)
878{ 963{
879 /* Set up metamethod call first to save ix->tab and ix->tabv. */ 964 /* Set up metamethod call first to save ix->tab and ix->tabv. */
880 BCReg func = rec_mm_prep(J, lj_cont_ra); 965 BCReg func = rec_mm_prep(J, mm == MM_concat ? lj_cont_cat : lj_cont_ra);
881 TRef *base = J->base + func; 966 TRef *base = J->base + func;
882 TValue *basev = J->L->base + func; 967 TValue *basev = J->L->base + func;
883 base[1] = ix->tab; base[2] = ix->key; 968 base[1] = ix->tab; base[2] = ix->key;
@@ -893,6 +978,7 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm)
893 lj_trace_err(J, LJ_TRERR_NOMM); 978 lj_trace_err(J, LJ_TRERR_NOMM);
894 } 979 }
895ok: 980ok:
981 lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
896 base[0] = ix->mobj; 982 base[0] = ix->mobj;
897 copyTV(J->L, basev+0, &ix->mobjv); 983 copyTV(J->L, basev+0, &ix->mobjv);
898 lj_record_call(J, func, 2); 984 lj_record_call(J, func, 2);
@@ -909,6 +995,7 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv)
909 BCReg func = rec_mm_prep(J, lj_cont_ra); 995 BCReg func = rec_mm_prep(J, lj_cont_ra);
910 TRef *base = J->base + func; 996 TRef *base = J->base + func;
911 TValue *basev = J->L->base + func; 997 TValue *basev = J->L->base + func;
998 lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
912 base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv); 999 base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv);
913 base[1] = tr; copyTV(J->L, basev+1, tv); 1000 base[1] = tr; copyTV(J->L, basev+1, tv);
914#if LJ_52 1001#if LJ_52
@@ -931,6 +1018,7 @@ static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op)
931 BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt); 1018 BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt);
932 TRef *base = J->base + func; 1019 TRef *base = J->base + func;
933 TValue *tv = J->L->base + func; 1020 TValue *tv = J->L->base + func;
1021 lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
934 base[0] = ix->mobj; base[1] = ix->val; base[2] = ix->key; 1022 base[0] = ix->mobj; base[1] = ix->val; base[2] = ix->key;
935 copyTV(J->L, tv+0, &ix->mobjv); 1023 copyTV(J->L, tv+0, &ix->mobjv);
936 copyTV(J->L, tv+1, &ix->valv); 1024 copyTV(J->L, tv+1, &ix->valv);
@@ -1078,11 +1166,12 @@ static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize)
1078} 1166}
1079 1167
1080/* Record indexed key lookup. */ 1168/* Record indexed key lookup. */
1081static TRef rec_idx_key(jit_State *J, RecordIndex *ix) 1169static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref)
1082{ 1170{
1083 TRef key; 1171 TRef key;
1084 GCtab *t = tabV(&ix->tabv); 1172 GCtab *t = tabV(&ix->tabv);
1085 ix->oldv = lj_tab_get(J->L, t, &ix->keyv); /* Lookup previous value. */ 1173 ix->oldv = lj_tab_get(J->L, t, &ix->keyv); /* Lookup previous value. */
1174 *rbref = 0;
1086 1175
1087 /* Integer keys are looked up in the array part first. */ 1176 /* Integer keys are looked up in the array part first. */
1088 key = ix->key; 1177 key = ix->key;
@@ -1132,8 +1221,9 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix)
1132 MSize hslot = (MSize)((char *)ix->oldv - (char *)&noderef(t->node)[0].val); 1221 MSize hslot = (MSize)((char *)ix->oldv - (char *)&noderef(t->node)[0].val);
1133 if (t->hmask > 0 && hslot <= t->hmask*(MSize)sizeof(Node) && 1222 if (t->hmask > 0 && hslot <= t->hmask*(MSize)sizeof(Node) &&
1134 hslot <= 65535*(MSize)sizeof(Node)) { 1223 hslot <= 65535*(MSize)sizeof(Node)) {
1135 TRef node, kslot; 1224 TRef node, kslot, hm;
1136 TRef hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK); 1225 *rbref = J->cur.nins; /* Mark possible rollback point. */
1226 hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK);
1137 emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask)); 1227 emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask));
1138 node = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_NODE); 1228 node = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_NODE);
1139 kslot = lj_ir_kslot(J, key, hslot / sizeof(Node)); 1229 kslot = lj_ir_kslot(J, key, hslot / sizeof(Node));
@@ -1166,6 +1256,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1166{ 1256{
1167 TRef xref; 1257 TRef xref;
1168 IROp xrefop, loadop; 1258 IROp xrefop, loadop;
1259 IRRef rbref;
1169 cTValue *oldv; 1260 cTValue *oldv;
1170 1261
1171 while (!tref_istab(ix->tab)) { /* Handle non-table lookup. */ 1262 while (!tref_istab(ix->tab)) { /* Handle non-table lookup. */
@@ -1178,6 +1269,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1178 BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra); 1269 BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra);
1179 TRef *base = J->base + func; 1270 TRef *base = J->base + func;
1180 TValue *tv = J->L->base + func; 1271 TValue *tv = J->L->base + func;
1272 lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
1181 base[0] = ix->mobj; base[1] = ix->tab; base[2] = ix->key; 1273 base[0] = ix->mobj; base[1] = ix->tab; base[2] = ix->key;
1182 setfuncV(J->L, tv+0, funcV(&ix->mobjv)); 1274 setfuncV(J->L, tv+0, funcV(&ix->mobjv));
1183 copyTV(J->L, tv+1, &ix->tabv); 1275 copyTV(J->L, tv+1, &ix->tabv);
@@ -1211,7 +1303,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1211 } 1303 }
1212 1304
1213 /* Record the key lookup. */ 1305 /* Record the key lookup. */
1214 xref = rec_idx_key(J, ix); 1306 xref = rec_idx_key(J, ix, &rbref);
1215 xrefop = IR(tref_ref(xref))->o; 1307 xrefop = IR(tref_ref(xref))->o;
1216 loadop = xrefop == IR_AREF ? IR_ALOAD : IR_HLOAD; 1308 loadop = xrefop == IR_AREF ? IR_ALOAD : IR_HLOAD;
1217 /* The lj_meta_tset() inconsistency is gone, but better play safe. */ 1309 /* The lj_meta_tset() inconsistency is gone, but better play safe. */
@@ -1226,6 +1318,8 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1226 } else { 1318 } else {
1227 res = emitir(IRTG(loadop, t), xref, 0); 1319 res = emitir(IRTG(loadop, t), xref, 0);
1228 } 1320 }
1321 if (tref_ref(res) < rbref) /* HREFK + load forwarded? */
1322 lj_ir_rollback(J, rbref); /* Rollback to eliminate hmask guard. */
1229 if (t == IRT_NIL && ix->idxchain && lj_record_mm_lookup(J, ix, MM_index)) 1323 if (t == IRT_NIL && ix->idxchain && lj_record_mm_lookup(J, ix, MM_index))
1230 goto handlemm; 1324 goto handlemm;
1231 if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitives. */ 1325 if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitives. */
@@ -1233,6 +1327,8 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1233 } else { /* Indexed store. */ 1327 } else { /* Indexed store. */
1234 GCtab *mt = tabref(tabV(&ix->tabv)->metatable); 1328 GCtab *mt = tabref(tabV(&ix->tabv)->metatable);
1235 int keybarrier = tref_isgcv(ix->key) && !tref_isnil(ix->val); 1329 int keybarrier = tref_isgcv(ix->key) && !tref_isnil(ix->val);
1330 if (tref_ref(xref) < rbref) /* HREFK forwarded? */
1331 lj_ir_rollback(J, rbref); /* Rollback to eliminate hmask guard. */
1236 if (tvisnil(oldv)) { /* Previous value was nil? */ 1332 if (tvisnil(oldv)) { /* Previous value was nil? */
1237 /* Need to duplicate the hasmm check for the early guards. */ 1333 /* Need to duplicate the hasmm check for the early guards. */
1238 int hasmm = 0; 1334 int hasmm = 0;
@@ -1290,6 +1386,22 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1290 } 1386 }
1291} 1387}
1292 1388
1389static void rec_tsetm(jit_State *J, BCReg ra, BCReg rn, int32_t i)
1390{
1391 RecordIndex ix;
1392 cTValue *basev = J->L->base;
1393 copyTV(J->L, &ix.tabv, &basev[ra-1]);
1394 ix.tab = getslot(J, ra-1);
1395 ix.idxchain = 0;
1396 for (; ra < rn; i++, ra++) {
1397 setintV(&ix.keyv, i);
1398 ix.key = lj_ir_kint(J, i);
1399 copyTV(J->L, &ix.valv, &basev[ra]);
1400 ix.val = getslot(J, ra);
1401 lj_record_idx(J, &ix);
1402 }
1403}
1404
1293/* -- Upvalue access ------------------------------------------------------ */ 1405/* -- Upvalue access ------------------------------------------------------ */
1294 1406
1295/* Check whether upvalue is immutable and ok to constify. */ 1407/* Check whether upvalue is immutable and ok to constify. */
@@ -1401,9 +1513,9 @@ static void check_call_unroll(jit_State *J, TraceNo lnk)
1401 if (count + J->tailcalled > J->param[JIT_P_recunroll]) { 1513 if (count + J->tailcalled > J->param[JIT_P_recunroll]) {
1402 J->pc++; 1514 J->pc++;
1403 if (J->framedepth + J->retdepth == 0) 1515 if (J->framedepth + J->retdepth == 0)
1404 rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Tail-recursion. */ 1516 lj_record_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Tail-rec. */
1405 else 1517 else
1406 rec_stop(J, LJ_TRLINK_UPREC, J->cur.traceno); /* Up-recursion. */ 1518 lj_record_stop(J, LJ_TRLINK_UPREC, J->cur.traceno); /* Up-recursion. */
1407 } 1519 }
1408 } else { 1520 } else {
1409 if (count > J->param[JIT_P_callunroll]) { 1521 if (count > J->param[JIT_P_callunroll]) {
@@ -1477,9 +1589,9 @@ static void rec_func_jit(jit_State *J, TraceNo lnk)
1477 } 1589 }
1478 J->instunroll = 0; /* Cannot continue across a compiled function. */ 1590 J->instunroll = 0; /* Cannot continue across a compiled function. */
1479 if (J->pc == J->startpc && J->framedepth + J->retdepth == 0) 1591 if (J->pc == J->startpc && J->framedepth + J->retdepth == 0)
1480 rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Extra tail-recursion. */ 1592 lj_record_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Extra tail-rec. */
1481 else 1593 else
1482 rec_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the function. */ 1594 lj_record_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the function. */
1483} 1595}
1484 1596
1485/* -- Vararg handling ----------------------------------------------------- */ 1597/* -- Vararg handling ----------------------------------------------------- */
@@ -1524,7 +1636,8 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
1524 if (nvararg >= nresults) 1636 if (nvararg >= nresults)
1525 emitir(IRTGI(IR_GE), fr, lj_ir_kint(J, frofs+8*(int32_t)nresults)); 1637 emitir(IRTGI(IR_GE), fr, lj_ir_kint(J, frofs+8*(int32_t)nresults));
1526 else 1638 else
1527 emitir(IRTGI(IR_EQ), fr, lj_ir_kint(J, frame_ftsz(J->L->base-1))); 1639 emitir(IRTGI(IR_EQ), fr,
1640 lj_ir_kint(J, (int32_t)frame_ftsz(J->L->base-1)));
1528 vbase = emitir(IRTI(IR_SUB), REF_BASE, fr); 1641 vbase = emitir(IRTI(IR_SUB), REF_BASE, fr);
1529 vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8)); 1642 vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8));
1530 for (i = 0; i < nload; i++) { 1643 for (i = 0; i < nload; i++) {
@@ -1603,6 +1716,54 @@ static TRef rec_tnew(jit_State *J, uint32_t ah)
1603 return emitir(IRTG(IR_TNEW, IRT_TAB), asize, hbits); 1716 return emitir(IRTG(IR_TNEW, IRT_TAB), asize, hbits);
1604} 1717}
1605 1718
1719/* -- Concatenation ------------------------------------------------------- */
1720
1721static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot)
1722{
1723 TRef *top = &J->base[topslot];
1724 TValue savetv[5];
1725 BCReg s;
1726 RecordIndex ix;
1727 lua_assert(baseslot < topslot);
1728 for (s = baseslot; s <= topslot; s++)
1729 (void)getslot(J, s); /* Ensure all arguments have a reference. */
1730 if (tref_isnumber_str(top[0]) && tref_isnumber_str(top[-1])) {
1731 TRef tr, hdr, *trp, *xbase, *base = &J->base[baseslot];
1732 /* First convert numbers to strings. */
1733 for (trp = top; trp >= base; trp--) {
1734 if (tref_isnumber(*trp))
1735 *trp = emitir(IRT(IR_TOSTR, IRT_STR), *trp,
1736 tref_isnum(*trp) ? IRTOSTR_NUM : IRTOSTR_INT);
1737 else if (!tref_isstr(*trp))
1738 break;
1739 }
1740 xbase = ++trp;
1741 tr = hdr = emitir(IRT(IR_BUFHDR, IRT_P32),
1742 lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET);
1743 do {
1744 tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, *trp++);
1745 } while (trp <= top);
1746 tr = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
1747 J->maxslot = (BCReg)(xbase - J->base);
1748 if (xbase == base) return tr; /* Return simple concatenation result. */
1749 /* Pass partial result. */
1750 topslot = J->maxslot--;
1751 *xbase = tr;
1752 top = xbase;
1753 setstrV(J->L, &ix.keyv, &J2G(J)->strempty); /* Simulate string result. */
1754 } else {
1755 J->maxslot = topslot-1;
1756 copyTV(J->L, &ix.keyv, &J->L->base[topslot]);
1757 }
1758 copyTV(J->L, &ix.tabv, &J->L->base[topslot-1]);
1759 ix.tab = top[-1];
1760 ix.key = top[0];
1761 memcpy(savetv, &J->L->base[topslot-1], sizeof(savetv)); /* Save slots. */
1762 rec_mm_arith(J, &ix, MM_concat); /* Call __concat metamethod. */
1763 memcpy(&J->L->base[topslot-1], savetv, sizeof(savetv)); /* Restore slots. */
1764 return 0; /* No result yet. */
1765}
1766
1606/* -- Record bytecode ops ------------------------------------------------- */ 1767/* -- Record bytecode ops ------------------------------------------------- */
1607 1768
1608/* Prepare for comparison. */ 1769/* Prepare for comparison. */
@@ -1641,7 +1802,7 @@ void lj_record_ins(jit_State *J)
1641 if (LJ_UNLIKELY(J->postproc != LJ_POST_NONE)) { 1802 if (LJ_UNLIKELY(J->postproc != LJ_POST_NONE)) {
1642 switch (J->postproc) { 1803 switch (J->postproc) {
1643 case LJ_POST_FIXCOMP: /* Fixup comparison. */ 1804 case LJ_POST_FIXCOMP: /* Fixup comparison. */
1644 pc = frame_pc(&J2G(J)->tmptv); 1805 pc = (const BCIns *)(uintptr_t)J2G(J)->tmptv.u64;
1645 rec_comp_fixup(J, pc, (!tvistruecond(&J2G(J)->tmptv2) ^ (bc_op(*pc)&1))); 1806 rec_comp_fixup(J, pc, (!tvistruecond(&J2G(J)->tmptv2) ^ (bc_op(*pc)&1)));
1646 /* fallthrough */ 1807 /* fallthrough */
1647 case LJ_POST_FIXGUARD: /* Fixup and emit pending guard. */ 1808 case LJ_POST_FIXGUARD: /* Fixup and emit pending guard. */
@@ -1709,6 +1870,10 @@ void lj_record_ins(jit_State *J)
1709 rec_check_ir(J); 1870 rec_check_ir(J);
1710#endif 1871#endif
1711 1872
1873#if LJ_HASPROFILE
1874 rec_profile_ins(J, pc);
1875#endif
1876
1712 /* Keep a copy of the runtime values of var/num/str operands. */ 1877 /* Keep a copy of the runtime values of var/num/str operands. */
1713#define rav (&ix.valv) 1878#define rav (&ix.valv)
1714#define rbv (&ix.tabv) 1879#define rbv (&ix.tabv)
@@ -1735,7 +1900,7 @@ void lj_record_ins(jit_State *J)
1735 switch (bcmode_c(op)) { 1900 switch (bcmode_c(op)) {
1736 case BCMvar: 1901 case BCMvar:
1737 copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break; 1902 copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break;
1738 case BCMpri: setitype(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break; 1903 case BCMpri: setpriV(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break;
1739 case BCMnum: { cTValue *tv = proto_knumtv(J->pt, rc); 1904 case BCMnum: { cTValue *tv = proto_knumtv(J->pt, rc);
1740 copyTV(J->L, rcv, tv); ix.key = rc = tvisint(tv) ? lj_ir_kint(J, intV(tv)) : 1905 copyTV(J->L, rcv, tv); ix.key = rc = tvisint(tv) ? lj_ir_kint(J, intV(tv)) :
1741 lj_ir_knumint(J, numV(tv)); } break; 1906 lj_ir_knumint(J, numV(tv)); } break;
@@ -1830,6 +1995,18 @@ void lj_record_ins(jit_State *J)
1830 J->maxslot = bc_a(pc[1]); /* Shrink used slots. */ 1995 J->maxslot = bc_a(pc[1]); /* Shrink used slots. */
1831 break; 1996 break;
1832 1997
1998 case BC_ISTYPE: case BC_ISNUM:
1999 /* These coercions need to correspond with lj_meta_istype(). */
2000 if (LJ_DUALNUM && rc == ~LJ_TNUMX+1)
2001 ra = lj_opt_narrow_toint(J, ra);
2002 else if (rc == ~LJ_TNUMX+2)
2003 ra = lj_ir_tonum(J, ra);
2004 else if (rc == ~LJ_TSTR+1)
2005 ra = lj_ir_tostr(J, ra);
2006 /* else: type specialization suffices. */
2007 J->base[bc_a(ins)] = ra;
2008 break;
2009
1833 /* -- Unary ops --------------------------------------------------------- */ 2010 /* -- Unary ops --------------------------------------------------------- */
1834 2011
1835 case BC_NOT: 2012 case BC_NOT:
@@ -1893,6 +2070,12 @@ void lj_record_ins(jit_State *J)
1893 rc = rec_mm_arith(J, &ix, MM_pow); 2070 rc = rec_mm_arith(J, &ix, MM_pow);
1894 break; 2071 break;
1895 2072
2073 /* -- Miscellaneous ops ------------------------------------------------- */
2074
2075 case BC_CAT:
2076 rc = rec_cat(J, rb, rc);
2077 break;
2078
1896 /* -- Constant and move ops --------------------------------------------- */ 2079 /* -- Constant and move ops --------------------------------------------- */
1897 2080
1898 case BC_MOV: 2081 case BC_MOV:
@@ -1941,6 +2124,14 @@ void lj_record_ins(jit_State *J)
1941 ix.idxchain = LJ_MAX_IDXCHAIN; 2124 ix.idxchain = LJ_MAX_IDXCHAIN;
1942 rc = lj_record_idx(J, &ix); 2125 rc = lj_record_idx(J, &ix);
1943 break; 2126 break;
2127 case BC_TGETR: case BC_TSETR:
2128 ix.idxchain = 0;
2129 rc = lj_record_idx(J, &ix);
2130 break;
2131
2132 case BC_TSETM:
2133 rec_tsetm(J, ra, (BCReg)(J->L->top - J->L->base), (int32_t)rcv->u32.lo);
2134 break;
1944 2135
1945 case BC_TNEW: 2136 case BC_TNEW:
1946 rc = rec_tnew(J, rc); 2137 rc = rec_tnew(J, rc);
@@ -1953,28 +2144,28 @@ void lj_record_ins(jit_State *J)
1953 /* -- Calls and vararg handling ----------------------------------------- */ 2144 /* -- Calls and vararg handling ----------------------------------------- */
1954 2145
1955 case BC_ITERC: 2146 case BC_ITERC:
1956 J->base[ra] = getslot(J, ra-3); 2147 J->base[ra] = getslot(J, ra-3-LJ_FR2);
1957 J->base[ra+1] = getslot(J, ra-2); 2148 J->base[ra+1] = getslot(J, ra-2-LJ_FR2);
1958 J->base[ra+2] = getslot(J, ra-1); 2149 J->base[ra+2] = getslot(J, ra-1-LJ_FR2);
1959 { /* Do the actual copy now because lj_record_call needs the values. */ 2150 { /* Do the actual copy now because lj_record_call needs the values. */
1960 TValue *b = &J->L->base[ra]; 2151 TValue *b = &J->L->base[ra];
1961 copyTV(J->L, b, b-3); 2152 copyTV(J->L, b, b-3-LJ_FR2);
1962 copyTV(J->L, b+1, b-2); 2153 copyTV(J->L, b+1, b-2-LJ_FR2);
1963 copyTV(J->L, b+2, b-1); 2154 copyTV(J->L, b+2, b-1-LJ_FR2);
1964 } 2155 }
1965 lj_record_call(J, ra, (ptrdiff_t)rc-1); 2156 lj_record_call(J, ra, (ptrdiff_t)rc-1);
1966 break; 2157 break;
1967 2158
1968 /* L->top is set to L->base+ra+rc+NARGS-1+1. See lj_dispatch_ins(). */ 2159 /* L->top is set to L->base+ra+rc+NARGS-1+1. See lj_dispatch_ins(). */
1969 case BC_CALLM: 2160 case BC_CALLM:
1970 rc = (BCReg)(J->L->top - J->L->base) - ra; 2161 rc = (BCReg)(J->L->top - J->L->base) - ra - LJ_FR2;
1971 /* fallthrough */ 2162 /* fallthrough */
1972 case BC_CALL: 2163 case BC_CALL:
1973 lj_record_call(J, ra, (ptrdiff_t)rc-1); 2164 lj_record_call(J, ra, (ptrdiff_t)rc-1);
1974 break; 2165 break;
1975 2166
1976 case BC_CALLMT: 2167 case BC_CALLMT:
1977 rc = (BCReg)(J->L->top - J->L->base) - ra; 2168 rc = (BCReg)(J->L->top - J->L->base) - ra - LJ_FR2;
1978 /* fallthrough */ 2169 /* fallthrough */
1979 case BC_CALLT: 2170 case BC_CALLT:
1980 lj_record_tailcall(J, ra, (ptrdiff_t)rc-1); 2171 lj_record_tailcall(J, ra, (ptrdiff_t)rc-1);
@@ -1991,6 +2182,9 @@ void lj_record_ins(jit_State *J)
1991 rc = (BCReg)(J->L->top - J->L->base) - ra + 1; 2182 rc = (BCReg)(J->L->top - J->L->base) - ra + 1;
1992 /* fallthrough */ 2183 /* fallthrough */
1993 case BC_RET: case BC_RET0: case BC_RET1: 2184 case BC_RET: case BC_RET0: case BC_RET1:
2185#if LJ_HASPROFILE
2186 rec_profile_ret(J);
2187#endif
1994 lj_record_ret(J, ra, (ptrdiff_t)rc-1); 2188 lj_record_ret(J, ra, (ptrdiff_t)rc-1);
1995 break; 2189 break;
1996 2190
@@ -2003,7 +2197,7 @@ void lj_record_ins(jit_State *J)
2003 case BC_JFORI: 2197 case BC_JFORI:
2004 lua_assert(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL); 2198 lua_assert(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL);
2005 if (rec_for(J, pc, 0) != LOOPEV_LEAVE) /* Link to existing loop. */ 2199 if (rec_for(J, pc, 0) != LOOPEV_LEAVE) /* Link to existing loop. */
2006 rec_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J])); 2200 lj_record_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J]));
2007 /* Continue tracing if the loop is not entered. */ 2201 /* Continue tracing if the loop is not entered. */
2008 break; 2202 break;
2009 2203
@@ -2070,10 +2264,8 @@ void lj_record_ins(jit_State *J)
2070 /* fallthrough */ 2264 /* fallthrough */
2071 case BC_ITERN: 2265 case BC_ITERN:
2072 case BC_ISNEXT: 2266 case BC_ISNEXT:
2073 case BC_CAT:
2074 case BC_UCLO: 2267 case BC_UCLO:
2075 case BC_FNEW: 2268 case BC_FNEW:
2076 case BC_TSETM:
2077 setintV(&J->errinfo, (int32_t)op); 2269 setintV(&J->errinfo, (int32_t)op);
2078 lj_trace_err_info(J, LJ_TRERR_NYIBC); 2270 lj_trace_err_info(J, LJ_TRERR_NYIBC);
2079 break; 2271 break;
@@ -2139,6 +2331,12 @@ static const BCIns *rec_setup_root(jit_State *J)
2139 J->maxslot = J->pt->numparams; 2331 J->maxslot = J->pt->numparams;
2140 pc++; 2332 pc++;
2141 break; 2333 break;
2334 case BC_CALLM:
2335 case BC_CALL:
2336 case BC_ITERC:
2337 /* No bytecode range check for stitched traces. */
2338 pc++;
2339 break;
2142 default: 2340 default:
2143 lua_assert(0); 2341 lua_assert(0);
2144 break; 2342 break;
@@ -2207,7 +2405,7 @@ void lj_record_setup(jit_State *J)
2207 if (traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] || 2405 if (traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] ||
2208 T->snap[J->exitno].count >= J->param[JIT_P_hotexit] + 2406 T->snap[J->exitno].count >= J->param[JIT_P_hotexit] +
2209 J->param[JIT_P_tryside]) { 2407 J->param[JIT_P_tryside]) {
2210 rec_stop(J, LJ_TRLINK_INTERP, 0); 2408 lj_record_stop(J, LJ_TRLINK_INTERP, 0);
2211 } 2409 }
2212 } else { /* Root trace. */ 2410 } else { /* Root trace. */
2213 J->cur.root = 0; 2411 J->cur.root = 0;
@@ -2219,9 +2417,15 @@ void lj_record_setup(jit_State *J)
2219 lj_snap_add(J); 2417 lj_snap_add(J);
2220 if (bc_op(J->cur.startins) == BC_FORL) 2418 if (bc_op(J->cur.startins) == BC_FORL)
2221 rec_for_loop(J, J->pc-1, &J->scev, 1); 2419 rec_for_loop(J, J->pc-1, &J->scev, 1);
2420 else if (bc_op(J->cur.startins) == BC_ITERC)
2421 J->startpc = NULL;
2222 if (1 + J->pt->framesize >= LJ_MAX_JSLOTS) 2422 if (1 + J->pt->framesize >= LJ_MAX_JSLOTS)
2223 lj_trace_err(J, LJ_TRERR_STACKOV); 2423 lj_trace_err(J, LJ_TRERR_STACKOV);
2224 } 2424 }
2425#if LJ_HASPROFILE
2426 J->prev_pt = NULL;
2427 J->prev_line = -1;
2428#endif
2225#ifdef LUAJIT_ENABLE_CHECKHOOK 2429#ifdef LUAJIT_ENABLE_CHECKHOOK
2226 /* Regularly check for instruction/line hooks from compiled code and 2430 /* Regularly check for instruction/line hooks from compiled code and
2227 ** exit to the interpreter if the hooks are set. 2431 ** exit to the interpreter if the hooks are set.
diff --git a/src/lj_record.h b/src/lj_record.h
index c9f4882a..732adb47 100644
--- a/src/lj_record.h
+++ b/src/lj_record.h
@@ -28,6 +28,7 @@ typedef struct RecordIndex {
28 28
29LJ_FUNC int lj_record_objcmp(jit_State *J, TRef a, TRef b, 29LJ_FUNC int lj_record_objcmp(jit_State *J, TRef a, TRef b,
30 cTValue *av, cTValue *bv); 30 cTValue *av, cTValue *bv);
31LJ_FUNC void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk);
31LJ_FUNC TRef lj_record_constify(jit_State *J, cTValue *o); 32LJ_FUNC TRef lj_record_constify(jit_State *J, cTValue *o);
32 33
33LJ_FUNC void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs); 34LJ_FUNC void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs);
diff --git a/src/lj_snap.c b/src/lj_snap.c
index 5c870baf..d8e7987c 100644
--- a/src/lj_snap.c
+++ b/src/lj_snap.c
@@ -97,8 +97,10 @@ static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map)
97{ 97{
98 cTValue *frame = J->L->base - 1; 98 cTValue *frame = J->L->base - 1;
99 cTValue *lim = J->L->base - J->baseslot; 99 cTValue *lim = J->L->base - J->baseslot;
100 cTValue *ftop = frame + funcproto(frame_func(frame))->framesize; 100 GCfunc *fn = frame_func(frame);
101 cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top;
101 MSize f = 0; 102 MSize f = 0;
103 lua_assert(!LJ_FR2); /* TODO_FR2: store 64 bit PCs. */
102 map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ 104 map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */
103 while (frame > lim) { /* Backwards traversal of all frames above base. */ 105 while (frame > lim) { /* Backwards traversal of all frames above base. */
104 if (frame_islua(frame)) { 106 if (frame_islua(frame)) {
@@ -240,7 +242,8 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf,
240 case BCMbase: 242 case BCMbase:
241 if (op >= BC_CALLM && op <= BC_VARG) { 243 if (op >= BC_CALLM && op <= BC_VARG) {
242 BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ? 244 BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ?
243 maxslot : (bc_a(ins) + bc_c(ins)); 245 maxslot : (bc_a(ins) + bc_c(ins)+LJ_FR2);
246 if (LJ_FR2) DEF_SLOT(bc_a(ins)+1);
244 s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0); 247 s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0);
245 for (; s < top; s++) USE_SLOT(s); 248 for (; s < top; s++) USE_SLOT(s);
246 for (; s < maxslot; s++) DEF_SLOT(s); 249 for (; s < maxslot; s++) DEF_SLOT(s);
@@ -599,6 +602,7 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
599 } 602 }
600 if (LJ_UNLIKELY(bloomtest(rfilt, ref))) 603 if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
601 rs = snap_renameref(T, snapno, ref, rs); 604 rs = snap_renameref(T, snapno, ref, rs);
605 lua_assert(!LJ_GC64); /* TODO_GC64: handle 64 bit references. */
602 if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ 606 if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */
603 int32_t *sps = &ex->spill[regsp_spill(rs)]; 607 int32_t *sps = &ex->spill[regsp_spill(rs)];
604 if (irt_isinteger(t)) { 608 if (irt_isinteger(t)) {
@@ -612,8 +616,7 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
612 o->u64 = *(uint64_t *)sps; 616 o->u64 = *(uint64_t *)sps;
613 } else { 617 } else {
614 lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ 618 lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */
615 setgcrefi(o->gcr, *sps); 619 setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t));
616 setitype(o, irt_toitype(t));
617 } 620 }
618 } else { /* Restore from register. */ 621 } else { /* Restore from register. */
619 Reg r = regsp_reg(rs); 622 Reg r = regsp_reg(rs);
@@ -631,10 +634,10 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
631 } else if (LJ_64 && irt_islightud(t)) { 634 } else if (LJ_64 && irt_islightud(t)) {
632 /* 64 bit lightuserdata which may escape already has the tag bits. */ 635 /* 64 bit lightuserdata which may escape already has the tag bits. */
633 o->u64 = ex->gpr[r-RID_MIN_GPR]; 636 o->u64 = ex->gpr[r-RID_MIN_GPR];
637 } else if (irt_ispri(t)) {
638 setpriV(o, irt_toitype(t));
634 } else { 639 } else {
635 if (!irt_ispri(t)) 640 setgcV(J->L, o, (GCobj *)ex->gpr[r-RID_MIN_GPR], irt_toitype(t));
636 setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]);
637 setitype(o, irt_toitype(t));
638 } 641 }
639 } 642 }
640} 643}
@@ -795,7 +798,7 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
795 MSize n, nent = snap->nent; 798 MSize n, nent = snap->nent;
796 SnapEntry *map = &T->snapmap[snap->mapofs]; 799 SnapEntry *map = &T->snapmap[snap->mapofs];
797 SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1]; 800 SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1];
798 int32_t ftsz0; 801 ptrdiff_t ftsz0;
799 TValue *frame; 802 TValue *frame;
800 BloomFilter rfilt = snap_renamefilter(T, snapno); 803 BloomFilter rfilt = snap_renamefilter(T, snapno);
801 const BCIns *pc = snap_pc(map[nent]); 804 const BCIns *pc = snap_pc(map[nent]);
@@ -836,8 +839,9 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
836 snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp); 839 snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp);
837 o->u32.hi = tmp.u32.lo; 840 o->u32.hi = tmp.u32.lo;
838 } else if ((sn & (SNAP_CONT|SNAP_FRAME))) { 841 } else if ((sn & (SNAP_CONT|SNAP_FRAME))) {
842 lua_assert(!LJ_FR2); /* TODO_FR2: store 64 bit PCs. */
839 /* Overwrite tag with frame link. */ 843 /* Overwrite tag with frame link. */
840 o->fr.tp.ftsz = snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0; 844 setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0);
841 L->base = o+1; 845 L->base = o+1;
842 } 846 }
843 } 847 }
diff --git a/src/lj_state.c b/src/lj_state.c
index e654afae..84b4d113 100644
--- a/src/lj_state.c
+++ b/src/lj_state.c
@@ -12,6 +12,7 @@
12#include "lj_obj.h" 12#include "lj_obj.h"
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_buf.h"
15#include "lj_str.h" 16#include "lj_str.h"
16#include "lj_tab.h" 17#include "lj_tab.h"
17#include "lj_func.h" 18#include "lj_func.h"
@@ -26,6 +27,7 @@
26#include "lj_vm.h" 27#include "lj_vm.h"
27#include "lj_lex.h" 28#include "lj_lex.h"
28#include "lj_alloc.h" 29#include "lj_alloc.h"
30#include "luajit.h"
29 31
30/* -- Stack handling ------------------------------------------------------ */ 32/* -- Stack handling ------------------------------------------------------ */
31 33
@@ -47,6 +49,7 @@
47** one extra slot if mobj is not a function. Only lj_meta_tset needs 5 49** one extra slot if mobj is not a function. Only lj_meta_tset needs 5
48** slots above top, but then mobj is always a function. So we can get by 50** slots above top, but then mobj is always a function. So we can get by
49** with 5 extra slots. 51** with 5 extra slots.
52** LJ_FR2: We need 2 more slots for the frame PC and the continuation PC.
50*/ 53*/
51 54
52/* Resize stack slots and adjust pointers in state. */ 55/* Resize stack slots and adjust pointers in state. */
@@ -59,7 +62,7 @@ static void resizestack(lua_State *L, MSize n)
59 GCobj *up; 62 GCobj *up;
60 lua_assert((MSize)(tvref(L->maxstack)-oldst)==L->stacksize-LJ_STACK_EXTRA-1); 63 lua_assert((MSize)(tvref(L->maxstack)-oldst)==L->stacksize-LJ_STACK_EXTRA-1);
61 st = (TValue *)lj_mem_realloc(L, tvref(L->stack), 64 st = (TValue *)lj_mem_realloc(L, tvref(L->stack),
62 (MSize)(L->stacksize*sizeof(TValue)), 65 (MSize)(oldsize*sizeof(TValue)),
63 (MSize)(realsize*sizeof(TValue))); 66 (MSize)(realsize*sizeof(TValue)));
64 setmref(L->stack, st); 67 setmref(L->stack, st);
65 delta = (char *)st - (char *)oldst; 68 delta = (char *)st - (char *)oldst;
@@ -67,12 +70,12 @@ static void resizestack(lua_State *L, MSize n)
67 while (oldsize < realsize) /* Clear new slots. */ 70 while (oldsize < realsize) /* Clear new slots. */
68 setnilV(st + oldsize++); 71 setnilV(st + oldsize++);
69 L->stacksize = realsize; 72 L->stacksize = realsize;
73 if ((size_t)(mref(G(L)->jit_base, char) - (char *)oldst) < oldsize)
74 setmref(G(L)->jit_base, mref(G(L)->jit_base, char) + delta);
70 L->base = (TValue *)((char *)L->base + delta); 75 L->base = (TValue *)((char *)L->base + delta);
71 L->top = (TValue *)((char *)L->top + delta); 76 L->top = (TValue *)((char *)L->top + delta);
72 for (up = gcref(L->openupval); up != NULL; up = gcnext(up)) 77 for (up = gcref(L->openupval); up != NULL; up = gcnext(up))
73 setmref(gco2uv(up)->v, (TValue *)((char *)uvval(gco2uv(up)) + delta)); 78 setmref(gco2uv(up)->v, (TValue *)((char *)uvval(gco2uv(up)) + delta));
74 if (obj2gco(L) == gcref(G(L)->jit_L))
75 setmref(G(L)->jit_base, mref(G(L)->jit_base, char) + delta);
76} 79}
77 80
78/* Relimit stack after error, in case the limit was overdrawn. */ 81/* Relimit stack after error, in case the limit was overdrawn. */
@@ -89,7 +92,8 @@ void lj_state_shrinkstack(lua_State *L, MSize used)
89 return; /* Avoid stack shrinking while handling stack overflow. */ 92 return; /* Avoid stack shrinking while handling stack overflow. */
90 if (4*used < L->stacksize && 93 if (4*used < L->stacksize &&
91 2*(LJ_STACK_START+LJ_STACK_EXTRA) < L->stacksize && 94 2*(LJ_STACK_START+LJ_STACK_EXTRA) < L->stacksize &&
92 obj2gco(L) != gcref(G(L)->jit_L)) /* Don't shrink stack of live trace. */ 95 /* Don't shrink stack of live trace. */
96 (tvref(G(L)->jit_base) == NULL || obj2gco(L) != gcref(G(L)->cur_L)))
93 resizestack(L, L->stacksize >> 1); 97 resizestack(L, L->stacksize >> 1);
94} 98}
95 99
@@ -125,8 +129,9 @@ static void stack_init(lua_State *L1, lua_State *L)
125 L1->stacksize = LJ_STACK_START + LJ_STACK_EXTRA; 129 L1->stacksize = LJ_STACK_START + LJ_STACK_EXTRA;
126 stend = st + L1->stacksize; 130 stend = st + L1->stacksize;
127 setmref(L1->maxstack, stend - LJ_STACK_EXTRA - 1); 131 setmref(L1->maxstack, stend - LJ_STACK_EXTRA - 1);
128 L1->base = L1->top = st+1; 132 setthreadV(L1, st++, L1); /* Needed for curr_funcisL() on empty stack. */
129 setthreadV(L1, st, L1); /* Needed for curr_funcisL() on empty stack. */ 133 if (LJ_FR2) setnilV(st++);
134 L1->base = L1->top = st;
130 while (st < stend) /* Clear new slots. */ 135 while (st < stend) /* Clear new slots. */
131 setnilV(st++); 136 setnilV(st++);
132} 137}
@@ -164,7 +169,7 @@ static void close_state(lua_State *L)
164 lj_ctype_freestate(g); 169 lj_ctype_freestate(g);
165#endif 170#endif
166 lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef); 171 lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef);
167 lj_str_freebuf(g, &g->tmpbuf); 172 lj_buf_free(g, &g->tmpbuf);
168 lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); 173 lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue);
169 lua_assert(g->gc.total == sizeof(GG_State)); 174 lua_assert(g->gc.total == sizeof(GG_State));
170#ifndef LUAJIT_USE_SYSMALLOC 175#ifndef LUAJIT_USE_SYSMALLOC
@@ -184,7 +189,7 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
184 GG_State *GG = (GG_State *)f(ud, NULL, 0, sizeof(GG_State)); 189 GG_State *GG = (GG_State *)f(ud, NULL, 0, sizeof(GG_State));
185 lua_State *L = &GG->L; 190 lua_State *L = &GG->L;
186 global_State *g = &GG->g; 191 global_State *g = &GG->g;
187 if (GG == NULL || !checkptr32(GG)) return NULL; 192 if (GG == NULL || !checkptrGC(GG)) return NULL;
188 memset(GG, 0, sizeof(GG_State)); 193 memset(GG, 0, sizeof(GG_State));
189 L->gct = ~LJ_TTHREAD; 194 L->gct = ~LJ_TTHREAD;
190 L->marked = LJ_GC_WHITE0 | LJ_GC_FIXED | LJ_GC_SFIXED; /* Prevent free. */ 195 L->marked = LJ_GC_WHITE0 | LJ_GC_FIXED | LJ_GC_SFIXED; /* Prevent free. */
@@ -202,8 +207,10 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
202 setnilV(registry(L)); 207 setnilV(registry(L));
203 setnilV(&g->nilnode.val); 208 setnilV(&g->nilnode.val);
204 setnilV(&g->nilnode.key); 209 setnilV(&g->nilnode.key);
210#if !LJ_GC64
205 setmref(g->nilnode.freetop, &g->nilnode); 211 setmref(g->nilnode.freetop, &g->nilnode);
206 lj_str_initbuf(&g->tmpbuf); 212#endif
213 lj_buf_init(NULL, &g->tmpbuf);
207 g->gc.state = GCSpause; 214 g->gc.state = GCSpause;
208 setgcref(g->gc.root, obj2gco(L)); 215 setgcref(g->gc.root, obj2gco(L));
209 setmref(g->gc.sweep, &g->gc.root); 216 setmref(g->gc.sweep, &g->gc.root);
@@ -236,6 +243,10 @@ LUA_API void lua_close(lua_State *L)
236 global_State *g = G(L); 243 global_State *g = G(L);
237 int i; 244 int i;
238 L = mainthread(g); /* Only the main thread can be closed. */ 245 L = mainthread(g); /* Only the main thread can be closed. */
246#if LJ_HASPROFILE
247 luaJIT_profile_stop(L);
248#endif
249 setgcrefnull(g->cur_L);
239 lj_func_closeuv(L, tvref(L->stack)); 250 lj_func_closeuv(L, tvref(L->stack));
240 lj_gc_separateudata(g, 1); /* Separate udata which have GC metamethods. */ 251 lj_gc_separateudata(g, 1); /* Separate udata which have GC metamethods. */
241#if LJ_HASJIT 252#if LJ_HASJIT
@@ -246,8 +257,8 @@ LUA_API void lua_close(lua_State *L)
246 for (i = 0;;) { 257 for (i = 0;;) {
247 hook_enter(g); 258 hook_enter(g);
248 L->status = 0; 259 L->status = 0;
260 L->base = L->top = tvref(L->stack) + 1 + LJ_FR2;
249 L->cframe = NULL; 261 L->cframe = NULL;
250 L->base = L->top = tvref(L->stack) + 1;
251 if (lj_vm_cpcall(L, NULL, NULL, cpfinalize) == 0) { 262 if (lj_vm_cpcall(L, NULL, NULL, cpfinalize) == 0) {
252 if (++i >= 10) break; 263 if (++i >= 10) break;
253 lj_gc_separateudata(g, 1); /* Separate udata again. */ 264 lj_gc_separateudata(g, 1); /* Separate udata again. */
@@ -279,6 +290,8 @@ lua_State *lj_state_new(lua_State *L)
279void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L) 290void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L)
280{ 291{
281 lua_assert(L != mainthread(g)); 292 lua_assert(L != mainthread(g));
293 if (obj2gco(L) == gcref(g->cur_L))
294 setgcrefnull(g->cur_L);
282 lj_func_closeuv(L, tvref(L->stack)); 295 lj_func_closeuv(L, tvref(L->stack));
283 lua_assert(gcref(L->openupval) == NULL); 296 lua_assert(gcref(L->openupval) == NULL);
284 lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); 297 lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue);
diff --git a/src/lj_str.c b/src/lj_str.c
index ca60bccb..dd324500 100644
--- a/src/lj_str.c
+++ b/src/lj_str.c
@@ -1,13 +1,8 @@
1/* 1/*
2** String handling. 2** String handling.
3** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h 3** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
4**
5** Portions taken verbatim or adapted from the Lua interpreter.
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/ 4*/
8 5
9#include <stdio.h>
10
11#define lj_str_c 6#define lj_str_c
12#define LUA_CORE 7#define LUA_CORE
13 8
@@ -15,10 +10,9 @@
15#include "lj_gc.h" 10#include "lj_gc.h"
16#include "lj_err.h" 11#include "lj_err.h"
17#include "lj_str.h" 12#include "lj_str.h"
18#include "lj_state.h"
19#include "lj_char.h" 13#include "lj_char.h"
20 14
21/* -- String interning ---------------------------------------------------- */ 15/* -- String helpers ------------------------------------------------------ */
22 16
23/* Ordered compare of strings. Assumes string data is 4-byte aligned. */ 17/* Ordered compare of strings. Assumes string data is 4-byte aligned. */
24int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) 18int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
@@ -64,6 +58,40 @@ static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len)
64 return 0; 58 return 0;
65} 59}
66 60
61/* Find fixed string p inside string s. */
62const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen)
63{
64 if (plen <= slen) {
65 if (plen == 0) {
66 return s;
67 } else {
68 int c = *(const uint8_t *)p++;
69 plen--; slen -= plen;
70 while (slen) {
71 const char *q = (const char *)memchr(s, c, slen);
72 if (!q) break;
73 if (memcmp(q+1, p, plen) == 0) return q;
74 q++; slen -= (MSize)(q-s); s = q;
75 }
76 }
77 }
78 return NULL;
79}
80
81/* Check whether a string has a pattern matching character. */
82int lj_str_haspattern(GCstr *s)
83{
84 const char *p = strdata(s), *q = p + s->len;
85 while (p < q) {
86 int c = *(const uint8_t *)p++;
87 if (lj_char_ispunct(c) && strchr("^$*+?.([%-", c))
88 return 1; /* Found a pattern matching char. */
89 }
90 return 0; /* No pattern matching chars found. */
91}
92
93/* -- String interning ---------------------------------------------------- */
94
67/* Resize the string hash table (grow and shrink). */ 95/* Resize the string hash table (grow and shrink). */
68void lj_str_resize(lua_State *L, MSize newmask) 96void lj_str_resize(lua_State *L, MSize newmask)
69{ 97{
@@ -167,173 +195,3 @@ void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
167 lj_mem_free(g, s, sizestring(s)); 195 lj_mem_free(g, s, sizestring(s));
168} 196}
169 197
170/* -- Type conversions ---------------------------------------------------- */
171
172/* Print number to buffer. Canonicalizes non-finite values. */
173size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o)
174{
175 if (LJ_LIKELY((o->u32.hi << 1) < 0xffe00000)) { /* Finite? */
176 lua_Number n = o->n;
177#if __BIONIC__
178 if (tvismzero(o)) { s[0] = '-'; s[1] = '0'; return 2; }
179#endif
180 return (size_t)lua_number2str(s, n);
181 } else if (((o->u32.hi & 0x000fffff) | o->u32.lo) != 0) {
182 s[0] = 'n'; s[1] = 'a'; s[2] = 'n'; return 3;
183 } else if ((o->u32.hi & 0x80000000) == 0) {
184 s[0] = 'i'; s[1] = 'n'; s[2] = 'f'; return 3;
185 } else {
186 s[0] = '-'; s[1] = 'i'; s[2] = 'n'; s[3] = 'f'; return 4;
187 }
188}
189
190/* Print integer to buffer. Returns pointer to start. */
191char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k)
192{
193 uint32_t u = (uint32_t)(k < 0 ? -k : k);
194 p += 1+10;
195 do { *--p = (char)('0' + u % 10); } while (u /= 10);
196 if (k < 0) *--p = '-';
197 return p;
198}
199
200/* Convert number to string. */
201GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np)
202{
203 char buf[LJ_STR_NUMBUF];
204 size_t len = lj_str_bufnum(buf, (TValue *)np);
205 return lj_str_new(L, buf, len);
206}
207
208/* Convert integer to string. */
209GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k)
210{
211 char s[1+10];
212 char *p = lj_str_bufint(s, k);
213 return lj_str_new(L, p, (size_t)(s+sizeof(s)-p));
214}
215
216GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o)
217{
218 return tvisint(o) ? lj_str_fromint(L, intV(o)) : lj_str_fromnum(L, &o->n);
219}
220
221/* -- String formatting --------------------------------------------------- */
222
223static void addstr(lua_State *L, SBuf *sb, const char *str, MSize len)
224{
225 char *p;
226 MSize i;
227 if (sb->n + len > sb->sz) {
228 MSize sz = sb->sz * 2;
229 while (sb->n + len > sz) sz = sz * 2;
230 lj_str_resizebuf(L, sb, sz);
231 }
232 p = sb->buf + sb->n;
233 sb->n += len;
234 for (i = 0; i < len; i++) p[i] = str[i];
235}
236
237static void addchar(lua_State *L, SBuf *sb, int c)
238{
239 if (sb->n + 1 > sb->sz) {
240 MSize sz = sb->sz * 2;
241 lj_str_resizebuf(L, sb, sz);
242 }
243 sb->buf[sb->n++] = (char)c;
244}
245
246/* Push formatted message as a string object to Lua stack. va_list variant. */
247const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp)
248{
249 SBuf *sb = &G(L)->tmpbuf;
250 lj_str_needbuf(L, sb, (MSize)strlen(fmt));
251 lj_str_resetbuf(sb);
252 for (;;) {
253 const char *e = strchr(fmt, '%');
254 if (e == NULL) break;
255 addstr(L, sb, fmt, (MSize)(e-fmt));
256 /* This function only handles %s, %c, %d, %f and %p formats. */
257 switch (e[1]) {
258 case 's': {
259 const char *s = va_arg(argp, char *);
260 if (s == NULL) s = "(null)";
261 addstr(L, sb, s, (MSize)strlen(s));
262 break;
263 }
264 case 'c':
265 addchar(L, sb, va_arg(argp, int));
266 break;
267 case 'd': {
268 char buf[LJ_STR_INTBUF];
269 char *p = lj_str_bufint(buf, va_arg(argp, int32_t));
270 addstr(L, sb, p, (MSize)(buf+LJ_STR_INTBUF-p));
271 break;
272 }
273 case 'f': {
274 char buf[LJ_STR_NUMBUF];
275 TValue tv;
276 MSize len;
277 tv.n = (lua_Number)(va_arg(argp, LUAI_UACNUMBER));
278 len = (MSize)lj_str_bufnum(buf, &tv);
279 addstr(L, sb, buf, len);
280 break;
281 }
282 case 'p': {
283#define FMTP_CHARS (2*sizeof(ptrdiff_t))
284 char buf[2+FMTP_CHARS];
285 ptrdiff_t p = (ptrdiff_t)(va_arg(argp, void *));
286 ptrdiff_t i, lasti = 2+FMTP_CHARS;
287 if (p == 0) {
288 addstr(L, sb, "NULL", 4);
289 break;
290 }
291#if LJ_64
292 /* Shorten output for 64 bit pointers. */
293 lasti = 2+2*4+((p >> 32) ? 2+2*(lj_fls((uint32_t)(p >> 32))>>3) : 0);
294#endif
295 buf[0] = '0';
296 buf[1] = 'x';
297 for (i = lasti-1; i >= 2; i--, p >>= 4)
298 buf[i] = "0123456789abcdef"[(p & 15)];
299 addstr(L, sb, buf, (MSize)lasti);
300 break;
301 }
302 case '%':
303 addchar(L, sb, '%');
304 break;
305 default:
306 addchar(L, sb, '%');
307 addchar(L, sb, e[1]);
308 break;
309 }
310 fmt = e+2;
311 }
312 addstr(L, sb, fmt, (MSize)strlen(fmt));
313 setstrV(L, L->top, lj_str_new(L, sb->buf, sb->n));
314 incr_top(L);
315 return strVdata(L->top - 1);
316}
317
318/* Push formatted message as a string object to Lua stack. Vararg variant. */
319const char *lj_str_pushf(lua_State *L, const char *fmt, ...)
320{
321 const char *msg;
322 va_list argp;
323 va_start(argp, fmt);
324 msg = lj_str_pushvf(L, fmt, argp);
325 va_end(argp);
326 return msg;
327}
328
329/* -- Buffer handling ----------------------------------------------------- */
330
331char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz)
332{
333 if (sz > sb->sz) {
334 if (sz < LJ_MIN_SBUF) sz = LJ_MIN_SBUF;
335 lj_str_resizebuf(L, sb, sz);
336 }
337 return sb->buf;
338}
339
diff --git a/src/lj_str.h b/src/lj_str.h
index 99697051..d8465de6 100644
--- a/src/lj_str.h
+++ b/src/lj_str.h
@@ -10,8 +10,13 @@
10 10
11#include "lj_obj.h" 11#include "lj_obj.h"
12 12
13/* String interning. */ 13/* String helpers. */
14LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b); 14LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b);
15LJ_FUNC const char *lj_str_find(const char *s, const char *f,
16 MSize slen, MSize flen);
17LJ_FUNC int lj_str_haspattern(GCstr *s);
18
19/* String interning. */
15LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask); 20LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask);
16LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len); 21LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len);
17LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); 22LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
@@ -19,32 +24,4 @@ LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
19#define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s))) 24#define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s)))
20#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1)) 25#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1))
21 26
22/* Type conversions. */
23LJ_FUNC size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o);
24LJ_FUNC char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k);
25LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np);
26LJ_FUNC GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k);
27LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o);
28
29#define LJ_STR_INTBUF (1+10)
30#define LJ_STR_NUMBUF LUAI_MAXNUMBER2STR
31
32/* String formatting. */
33LJ_FUNC const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp);
34LJ_FUNC const char *lj_str_pushf(lua_State *L, const char *fmt, ...)
35#if defined(__GNUC__)
36 __attribute__ ((format (printf, 2, 3)))
37#endif
38 ;
39
40/* Resizable string buffers. Struct definition in lj_obj.h. */
41LJ_FUNC char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz);
42
43#define lj_str_initbuf(sb) ((sb)->buf = NULL, (sb)->sz = 0)
44#define lj_str_resetbuf(sb) ((sb)->n = 0)
45#define lj_str_resizebuf(L, sb, size) \
46 ((sb)->buf = (char *)lj_mem_realloc(L, (sb)->buf, (sb)->sz, (size)), \
47 (sb)->sz = (size))
48#define lj_str_freebuf(g, sb) lj_mem_free(g, (void *)(sb)->buf, (sb)->sz)
49
50#endif 27#endif
diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c
new file mode 100644
index 00000000..d54e796a
--- /dev/null
+++ b/src/lj_strfmt.c
@@ -0,0 +1,554 @@
1/*
2** String formatting.
3** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#include <stdio.h>
7
8#define lj_strfmt_c
9#define LUA_CORE
10
11#include "lj_obj.h"
12#include "lj_buf.h"
13#include "lj_str.h"
14#include "lj_state.h"
15#include "lj_char.h"
16#include "lj_strfmt.h"
17
18/* -- Format parser ------------------------------------------------------- */
19
20static const uint8_t strfmt_map[('x'-'A')+1] = {
21 STRFMT_A,0,0,0,STRFMT_E,0,STRFMT_G,0,0,0,0,0,0,
22 0,0,0,0,0,0,0,0,0,0,STRFMT_X,0,0,
23 0,0,0,0,0,0,
24 STRFMT_A,0,STRFMT_C,STRFMT_D,STRFMT_E,STRFMT_F,STRFMT_G,0,STRFMT_I,0,0,0,0,
25 0,STRFMT_O,STRFMT_P,STRFMT_Q,0,STRFMT_S,0,STRFMT_U,0,0,STRFMT_X
26};
27
28SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs)
29{
30 const uint8_t *p = fs->p, *e = fs->e;
31 fs->str = (const char *)p;
32 for (; p < e; p++) {
33 if (*p == '%') { /* Escape char? */
34 if (p[1] == '%') { /* '%%'? */
35 fs->p = ++p+1;
36 goto retlit;
37 } else {
38 SFormat sf = 0;
39 uint32_t c;
40 if (p != (const uint8_t *)fs->str)
41 break;
42 for (p++; (uint32_t)*p - ' ' <= (uint32_t)('0' - ' '); p++) {
43 /* Parse flags. */
44 if (*p == '-') sf |= STRFMT_F_LEFT;
45 else if (*p == '+') sf |= STRFMT_F_PLUS;
46 else if (*p == '0') sf |= STRFMT_F_ZERO;
47 else if (*p == ' ') sf |= STRFMT_F_SPACE;
48 else if (*p == '#') sf |= STRFMT_F_ALT;
49 else break;
50 }
51 if ((uint32_t)*p - '0' < 10) { /* Parse width. */
52 uint32_t width = (uint32_t)*p++ - '0';
53 if ((uint32_t)*p - '0' < 10)
54 width = (uint32_t)*p++ - '0' + width*10;
55 sf |= (width << STRFMT_SH_WIDTH);
56 }
57 if (*p == '.') { /* Parse precision. */
58 uint32_t prec = 0;
59 p++;
60 if ((uint32_t)*p - '0' < 10) {
61 prec = (uint32_t)*p++ - '0';
62 if ((uint32_t)*p - '0' < 10)
63 prec = (uint32_t)*p++ - '0' + prec*10;
64 }
65 sf |= ((prec+1) << STRFMT_SH_PREC);
66 }
67 /* Parse conversion. */
68 c = (uint32_t)*p - 'A';
69 if (LJ_LIKELY(c <= (uint32_t)('x' - 'A'))) {
70 uint32_t sx = strfmt_map[c];
71 if (sx) {
72 fs->p = p+1;
73 return (sf | sx | ((c & 0x20) ? 0 : STRFMT_F_UPPER));
74 }
75 }
76 /* Return error location. */
77 if (*p >= 32) p++;
78 fs->len = (MSize)(p - (const uint8_t *)fs->str);
79 fs->p = fs->e;
80 return STRFMT_ERR;
81 }
82 }
83 }
84 fs->p = p;
85retlit:
86 fs->len = (MSize)(p - (const uint8_t *)fs->str);
87 return fs->len ? STRFMT_LIT : STRFMT_EOF;
88}
89
90/* -- Raw conversions ----------------------------------------------------- */
91
92/* Write number to bufer. */
93char * LJ_FASTCALL lj_strfmt_wnum(char *p, cTValue *o)
94{
95 if (LJ_LIKELY((o->u32.hi << 1) < 0xffe00000)) { /* Finite? */
96#if __BIONIC__
97 if (tvismzero(o)) { *p++ = '-'; *p++ = '0'; return p; }
98#endif
99 return p + lua_number2str(p, o->n);
100 } else if (((o->u32.hi & 0x000fffff) | o->u32.lo) != 0) {
101 *p++ = 'n'; *p++ = 'a'; *p++ = 'n';
102 } else if ((o->u32.hi & 0x80000000) == 0) {
103 *p++ = 'i'; *p++ = 'n'; *p++ = 'f';
104 } else {
105 *p++ = '-'; *p++ = 'i'; *p++ = 'n'; *p++ = 'f';
106 }
107 return p;
108}
109
110#define WINT_R(x, sh, sc) \
111 { uint32_t d = (x*(((1<<sh)+sc-1)/sc))>>sh; x -= d*sc; *p++ = (char)('0'+d); }
112
113/* Write integer to buffer. */
114char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k)
115{
116 uint32_t u = (uint32_t)k;
117 if (k < 0) { u = (uint32_t)-k; *p++ = '-'; }
118 if (u < 10000) {
119 if (u < 10) goto dig1; if (u < 100) goto dig2; if (u < 1000) goto dig3;
120 } else {
121 uint32_t v = u / 10000; u -= v * 10000;
122 if (v < 10000) {
123 if (v < 10) goto dig5; if (v < 100) goto dig6; if (v < 1000) goto dig7;
124 } else {
125 uint32_t w = v / 10000; v -= w * 10000;
126 if (w >= 10) WINT_R(w, 10, 10)
127 *p++ = (char)('0'+w);
128 }
129 WINT_R(v, 23, 1000)
130 dig7: WINT_R(v, 12, 100)
131 dig6: WINT_R(v, 10, 10)
132 dig5: *p++ = (char)('0'+v);
133 }
134 WINT_R(u, 23, 1000)
135 dig3: WINT_R(u, 12, 100)
136 dig2: WINT_R(u, 10, 10)
137 dig1: *p++ = (char)('0'+u);
138 return p;
139}
140#undef WINT_R
141
142/* Write pointer to buffer. */
143char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v)
144{
145 ptrdiff_t x = (ptrdiff_t)v;
146 MSize i, n = STRFMT_MAXBUF_PTR;
147 if (x == 0) {
148 *p++ = 'N'; *p++ = 'U'; *p++ = 'L'; *p++ = 'L';
149 return p;
150 }
151#if LJ_64
152 /* Shorten output for 64 bit pointers. */
153 n = 2+2*4+((x >> 32) ? 2+2*(lj_fls((uint32_t)(x >> 32))>>3) : 0);
154#endif
155 p[0] = '0';
156 p[1] = 'x';
157 for (i = n-1; i >= 2; i--, x >>= 4)
158 p[i] = "0123456789abcdef"[(x & 15)];
159 return p+n;
160}
161
162/* Write ULEB128 to buffer. */
163char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v)
164{
165 for (; v >= 0x80; v >>= 7)
166 *p++ = (char)((v & 0x7f) | 0x80);
167 *p++ = (char)v;
168 return p;
169}
170
171/* Return string or write number to buffer and return pointer to start. */
172const char *lj_strfmt_wstrnum(char *buf, cTValue *o, MSize *lenp)
173{
174 if (tvisstr(o)) {
175 *lenp = strV(o)->len;
176 return strVdata(o);
177 } else if (tvisint(o)) {
178 *lenp = (MSize)(lj_strfmt_wint(buf, intV(o)) - buf);
179 return buf;
180 } else if (tvisnum(o)) {
181 *lenp = (MSize)(lj_strfmt_wnum(buf, o) - buf);
182 return buf;
183 } else {
184 return NULL;
185 }
186}
187
188/* -- Unformatted conversions to buffer ----------------------------------- */
189
190/* Add integer to buffer. */
191SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k)
192{
193 setsbufP(sb, lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT), k));
194 return sb;
195}
196
197#if LJ_HASJIT
198/* Add number to buffer. */
199SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o)
200{
201 setsbufP(sb, lj_strfmt_wnum(lj_buf_more(sb, STRFMT_MAXBUF_NUM), o));
202 return sb;
203}
204#endif
205
206SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v)
207{
208 setsbufP(sb, lj_strfmt_wptr(lj_buf_more(sb, STRFMT_MAXBUF_PTR), v));
209 return sb;
210}
211
212/* Add quoted string to buffer. */
213SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str)
214{
215 const char *s = strdata(str);
216 MSize len = str->len;
217 lj_buf_putb(sb, '"');
218 while (len--) {
219 uint32_t c = (uint32_t)(uint8_t)*s++;
220 char *p = lj_buf_more(sb, 4);
221 if (c == '"' || c == '\\' || c == '\n') {
222 *p++ = '\\';
223 } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */
224 uint32_t d;
225 *p++ = '\\';
226 if (c >= 100 || lj_char_isdigit((uint8_t)*s)) {
227 *p++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100;
228 goto tens;
229 } else if (c >= 10) {
230 tens:
231 d = (c * 205) >> 11; c -= d * 10; *p++ = (char)('0'+d);
232 }
233 c += '0';
234 }
235 *p++ = (char)c;
236 setsbufP(sb, p);
237 }
238 lj_buf_putb(sb, '"');
239 return sb;
240}
241
242/* -- Formatted conversions to buffer ------------------------------------- */
243
244/* Add formatted char to buffer. */
245SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat sf, int32_t c)
246{
247 MSize width = STRFMT_WIDTH(sf);
248 char *p = lj_buf_more(sb, width > 1 ? width : 1);
249 if ((sf & STRFMT_F_LEFT)) *p++ = (char)c;
250 while (width-- > 1) *p++ = ' ';
251 if (!(sf & STRFMT_F_LEFT)) *p++ = (char)c;
252 setsbufP(sb, p);
253 return sb;
254}
255
256/* Add formatted string to buffer. */
257SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat sf, GCstr *str)
258{
259 MSize len = str->len <= STRFMT_PREC(sf) ? str->len : STRFMT_PREC(sf);
260 MSize width = STRFMT_WIDTH(sf);
261 char *p = lj_buf_more(sb, width > len ? width : len);
262 if ((sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len);
263 while (width-- > len) *p++ = ' ';
264 if (!(sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len);
265 setsbufP(sb, p);
266 return sb;
267}
268
269/* Add formatted signed/unsigned integer to buffer. */
270SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k)
271{
272 char buf[STRFMT_MAXBUF_XINT], *q = buf + sizeof(buf), *p;
273#ifdef LUA_USE_ASSERT
274 char *ps;
275#endif
276 MSize prefix = 0, len, prec, pprec, width, need;
277
278 /* Figure out signed prefixes. */
279 if (STRFMT_TYPE(sf) == STRFMT_INT) {
280 if ((int64_t)k < 0) {
281 k = (uint64_t)-(int64_t)k;
282 prefix = 256 + '-';
283 } else if ((sf & STRFMT_F_PLUS)) {
284 prefix = 256 + '+';
285 } else if ((sf & STRFMT_F_SPACE)) {
286 prefix = 256 + ' ';
287 }
288 }
289
290 /* Convert number and store to fixed-size buffer in reverse order. */
291 prec = STRFMT_PREC(sf);
292 if ((int32_t)prec >= 0) sf &= ~STRFMT_F_ZERO;
293 if (k == 0) { /* Special-case zero argument. */
294 if (prec != 0 ||
295 (sf & (STRFMT_T_OCT|STRFMT_F_ALT)) == (STRFMT_T_OCT|STRFMT_F_ALT))
296 *--q = '0';
297 } else if (!(sf & (STRFMT_T_HEX|STRFMT_T_OCT))) { /* Decimal. */
298 uint32_t k2;
299 while ((k >> 32)) { *--q = (char)('0' + k % 10); k /= 10; }
300 k2 = (uint32_t)k;
301 do { *--q = (char)('0' + k2 % 10); k2 /= 10; } while (k2);
302 } else if ((sf & STRFMT_T_HEX)) { /* Hex. */
303 const char *hexdig = (sf & STRFMT_F_UPPER) ? "0123456789ABCDEF" :
304 "0123456789abcdef";
305 do { *--q = hexdig[(k & 15)]; k >>= 4; } while (k);
306 if ((sf & STRFMT_F_ALT)) prefix = 512 + ((sf & STRFMT_F_UPPER) ? 'X' : 'x');
307 } else { /* Octal. */
308 do { *--q = (char)('0' + (uint32_t)(k & 7)); k >>= 3; } while (k);
309 if ((sf & STRFMT_F_ALT)) *--q = '0';
310 }
311
312 /* Calculate sizes. */
313 len = (MSize)(buf + sizeof(buf) - q);
314 if ((int32_t)len >= (int32_t)prec) prec = len;
315 width = STRFMT_WIDTH(sf);
316 pprec = prec + (prefix >> 8);
317 need = width > pprec ? width : pprec;
318 p = lj_buf_more(sb, need);
319#ifdef LUA_USE_ASSERT
320 ps = p;
321#endif
322
323 /* Format number with leading/trailing whitespace and zeros. */
324 if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == 0)
325 while (width-- > pprec) *p++ = ' ';
326 if (prefix) {
327 if ((char)prefix >= 'X') *p++ = '0';
328 *p++ = (char)prefix;
329 }
330 if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == STRFMT_F_ZERO)
331 while (width-- > pprec) *p++ = '0';
332 while (prec-- > len) *p++ = '0';
333 while (q < buf + sizeof(buf)) *p++ = *q++; /* Add number itself. */
334 if ((sf & STRFMT_F_LEFT))
335 while (width-- > pprec) *p++ = ' ';
336
337 lua_assert(need == (MSize)(p - ps));
338 setsbufP(sb, p);
339 return sb;
340}
341
342/* Add number formatted as signed integer to buffer. */
343SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n)
344{
345 int64_t k = (int64_t)n;
346 if (checki32(k) && sf == STRFMT_INT)
347 return lj_strfmt_putint(sb, (int32_t)k); /* Shortcut for plain %d. */
348 else
349 return lj_strfmt_putfxint(sb, sf, (uint64_t)k);
350}
351
352/* Add number formatted as unsigned integer to buffer. */
353SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n)
354{
355 int64_t k;
356 if (n >= 9223372036854775808.0)
357 k = (int64_t)(n - 18446744073709551616.0);
358 else
359 k = (int64_t)n;
360 return lj_strfmt_putfxint(sb, sf, (uint64_t)k);
361}
362
363/* Max. sprintf buffer size needed. At least #string.format("%.99f", -1e308). */
364#define STRFMT_FMTNUMBUF 512
365
366/* Add formatted floating-point number to buffer. */
367SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat sf, lua_Number n)
368{
369 TValue tv;
370 tv.n = n;
371 if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) {
372 /* Canonicalize output of non-finite values. */
373 MSize width = STRFMT_WIDTH(sf), len = 3;
374 int prefix = 0, ch = (sf & STRFMT_F_UPPER) ? 0x202020 : 0;
375 char *p;
376 if (((tv.u32.hi & 0x000fffff) | tv.u32.lo) != 0) {
377 ch ^= ('n' << 16) | ('a' << 8) | 'n';
378 if ((sf & STRFMT_F_SPACE)) prefix = ' ';
379 } else {
380 ch ^= ('i' << 16) | ('n' << 8) | 'f';
381 if ((tv.u32.hi & 0x80000000)) prefix = '-';
382 else if ((sf & STRFMT_F_PLUS)) prefix = '+';
383 else if ((sf & STRFMT_F_SPACE)) prefix = ' ';
384 }
385 if (prefix) len = 4;
386 p = lj_buf_more(sb, width > len ? width : len);
387 if (!(sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' ';
388 if (prefix) *p++ = prefix;
389 *p++ = (char)(ch >> 16); *p++ = (char)(ch >> 8); *p++ = (char)ch;
390 if ((sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' ';
391 setsbufP(sb, p);
392 } else { /* Delegate to sprintf() for now. */
393 uint8_t width = (uint8_t)STRFMT_WIDTH(sf), prec = (uint8_t)STRFMT_PREC(sf);
394 char fmt[1+5+2+3+1+1], *p = fmt;
395 *p++ = '%';
396 if ((sf & STRFMT_F_LEFT)) *p++ = '-';
397 if ((sf & STRFMT_F_PLUS)) *p++ = '+';
398 if ((sf & STRFMT_F_ZERO)) *p++ = '0';
399 if ((sf & STRFMT_F_SPACE)) *p++ = ' ';
400 if ((sf & STRFMT_F_ALT)) *p++ = '#';
401 if (width) {
402 uint8_t x = width / 10, y = width % 10;
403 if (x) *p++ = '0' + x;
404 *p++ = '0' + y;
405 }
406 if (prec != 255) {
407 uint8_t x = prec / 10, y = prec % 10;
408 *p++ = '.';
409 if (x) *p++ = '0' + x;
410 *p++ = '0' + y;
411 }
412 *p++ = (0x67666561 >> (STRFMT_FP(sf)<<3)) ^ ((sf & STRFMT_F_UPPER)?0x20:0);
413 *p = '\0';
414 p = lj_buf_more(sb, STRFMT_FMTNUMBUF);
415 setsbufP(sb, p + sprintf(p, fmt, n));
416 }
417 return sb;
418}
419
420/* -- Conversions to strings ---------------------------------------------- */
421
422/* Convert integer to string. */
423GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k)
424{
425 char buf[STRFMT_MAXBUF_INT];
426 MSize len = (MSize)(lj_strfmt_wint(buf, k) - buf);
427 return lj_str_new(L, buf, len);
428}
429
430/* Convert number to string. */
431GCstr * LJ_FASTCALL lj_strfmt_num(lua_State *L, cTValue *o)
432{
433 char buf[STRFMT_MAXBUF_NUM];
434 MSize len = (MSize)(lj_strfmt_wnum(buf, o) - buf);
435 return lj_str_new(L, buf, len);
436}
437
438/* Convert integer or number to string. */
439GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o)
440{
441 return tvisint(o) ? lj_strfmt_int(L, intV(o)) : lj_strfmt_num(L, o);
442}
443
444#if LJ_HASJIT
445/* Convert char value to string. */
446GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c)
447{
448 char buf[1];
449 buf[0] = c;
450 return lj_str_new(L, buf, 1);
451}
452#endif
453
454/* Raw conversion of object to string. */
455GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o)
456{
457 if (tvisstr(o)) {
458 return strV(o);
459 } else if (tvisnumber(o)) {
460 return lj_strfmt_number(L, o);
461 } else if (tvisnil(o)) {
462 return lj_str_newlit(L, "nil");
463 } else if (tvisfalse(o)) {
464 return lj_str_newlit(L, "false");
465 } else if (tvistrue(o)) {
466 return lj_str_newlit(L, "true");
467 } else {
468 char buf[8+2+2+16], *p = buf;
469 p = lj_buf_wmem(p, lj_typename(o), (MSize)strlen(lj_typename(o)));
470 *p++ = ':'; *p++ = ' ';
471 if (tvisfunc(o) && isffunc(funcV(o))) {
472 p = lj_buf_wmem(p, "builtin#", 8);
473 p = lj_strfmt_wint(p, funcV(o)->c.ffid);
474 } else {
475 p = lj_strfmt_wptr(p, lj_obj_ptr(o));
476 }
477 return lj_str_new(L, buf, (size_t)(p - buf));
478 }
479}
480
481/* -- Internal string formatting ------------------------------------------ */
482
483/*
484** These functions are only used for lua_pushfstring(), lua_pushvfstring()
485** and for internal string formatting (e.g. error messages). Caveat: unlike
486** string.format(), only a limited subset of formats and flags are supported!
487**
488** LuaJIT has support for a couple more formats than Lua 5.1/5.2:
489** - %d %u %o %x with full formatting, 32 bit integers only.
490** - %f and other FP formats are really %.14g.
491** - %s %c %p without formatting.
492*/
493
494/* Push formatted message as a string object to Lua stack. va_list variant. */
495const char *lj_strfmt_pushvf(lua_State *L, const char *fmt, va_list argp)
496{
497 SBuf *sb = lj_buf_tmp_(L);
498 FormatState fs;
499 SFormat sf;
500 GCstr *str;
501 lj_strfmt_init(&fs, fmt, (MSize)strlen(fmt));
502 while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
503 switch (STRFMT_TYPE(sf)) {
504 case STRFMT_LIT:
505 lj_buf_putmem(sb, fs.str, fs.len);
506 break;
507 case STRFMT_INT:
508 lj_strfmt_putfxint(sb, sf, va_arg(argp, int32_t));
509 break;
510 case STRFMT_UINT:
511 lj_strfmt_putfxint(sb, sf, va_arg(argp, uint32_t));
512 break;
513 case STRFMT_NUM: {
514 TValue tv;
515 tv.n = va_arg(argp, lua_Number);
516 setsbufP(sb, lj_strfmt_wnum(lj_buf_more(sb, STRFMT_MAXBUF_NUM), &tv));
517 break;
518 }
519 case STRFMT_STR: {
520 const char *s = va_arg(argp, char *);
521 if (s == NULL) s = "(null)";
522 lj_buf_putmem(sb, s, (MSize)strlen(s));
523 break;
524 }
525 case STRFMT_CHAR:
526 lj_buf_putb(sb, va_arg(argp, int));
527 break;
528 case STRFMT_PTR:
529 lj_strfmt_putptr(sb, va_arg(argp, void *));
530 break;
531 case STRFMT_ERR:
532 default:
533 lj_buf_putb(sb, '?');
534 lua_assert(0);
535 break;
536 }
537 }
538 str = lj_buf_str(L, sb);
539 setstrV(L, L->top, str);
540 incr_top(L);
541 return strdata(str);
542}
543
544/* Push formatted message as a string object to Lua stack. Vararg variant. */
545const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...)
546{
547 const char *msg;
548 va_list argp;
549 va_start(argp, fmt);
550 msg = lj_strfmt_pushvf(L, fmt, argp);
551 va_end(argp);
552 return msg;
553}
554
diff --git a/src/lj_strfmt.h b/src/lj_strfmt.h
new file mode 100644
index 00000000..dcfaf2e3
--- /dev/null
+++ b/src/lj_strfmt.h
@@ -0,0 +1,125 @@
1/*
2** String formatting.
3** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_STRFMT_H
7#define _LJ_STRFMT_H
8
9#include "lj_obj.h"
10
11typedef uint32_t SFormat; /* Format indicator. */
12
13/* Format parser state. */
14typedef struct FormatState {
15 const uint8_t *p; /* Current format string pointer. */
16 const uint8_t *e; /* End of format string. */
17 const char *str; /* Returned literal string. */
18 MSize len; /* Size of literal string. */
19} FormatState;
20
21/* Format types (max. 16). */
22typedef enum FormatType {
23 STRFMT_EOF, STRFMT_ERR, STRFMT_LIT,
24 STRFMT_INT, STRFMT_UINT, STRFMT_NUM, STRFMT_STR, STRFMT_CHAR, STRFMT_PTR
25} FormatType;
26
27/* Format subtypes (bits are reused). */
28#define STRFMT_T_HEX 0x0010 /* STRFMT_UINT */
29#define STRFMT_T_OCT 0x0020 /* STRFMT_UINT */
30#define STRFMT_T_FP_A 0x0000 /* STRFMT_NUM */
31#define STRFMT_T_FP_E 0x0010 /* STRFMT_NUM */
32#define STRFMT_T_FP_F 0x0020 /* STRFMT_NUM */
33#define STRFMT_T_FP_G 0x0030 /* STRFMT_NUM */
34#define STRFMT_T_QUOTED 0x0010 /* STRFMT_STR */
35
36/* Format flags. */
37#define STRFMT_F_LEFT 0x0100
38#define STRFMT_F_PLUS 0x0200
39#define STRFMT_F_ZERO 0x0400
40#define STRFMT_F_SPACE 0x0800
41#define STRFMT_F_ALT 0x1000
42#define STRFMT_F_UPPER 0x2000
43
44/* Format indicator fields. */
45#define STRFMT_SH_WIDTH 16
46#define STRFMT_SH_PREC 24
47
48#define STRFMT_TYPE(sf) ((FormatType)((sf) & 15))
49#define STRFMT_WIDTH(sf) (((sf) >> STRFMT_SH_WIDTH) & 255u)
50#define STRFMT_PREC(sf) ((((sf) >> STRFMT_SH_PREC) & 255u) - 1u)
51#define STRFMT_FP(sf) (((sf) >> 4) & 3)
52
53/* Formats for conversion characters. */
54#define STRFMT_A (STRFMT_NUM|STRFMT_T_FP_A)
55#define STRFMT_C (STRFMT_CHAR)
56#define STRFMT_D (STRFMT_INT)
57#define STRFMT_E (STRFMT_NUM|STRFMT_T_FP_E)
58#define STRFMT_F (STRFMT_NUM|STRFMT_T_FP_F)
59#define STRFMT_G (STRFMT_NUM|STRFMT_T_FP_G)
60#define STRFMT_I STRFMT_D
61#define STRFMT_O (STRFMT_UINT|STRFMT_T_OCT)
62#define STRFMT_P (STRFMT_PTR)
63#define STRFMT_Q (STRFMT_STR|STRFMT_T_QUOTED)
64#define STRFMT_S (STRFMT_STR)
65#define STRFMT_U (STRFMT_UINT)
66#define STRFMT_X (STRFMT_UINT|STRFMT_T_HEX)
67
68/* Maximum buffer sizes for conversions. */
69#define STRFMT_MAXBUF_XINT (1+22) /* '0' prefix + uint64_t in octal. */
70#define STRFMT_MAXBUF_INT (1+10) /* Sign + int32_t in decimal. */
71#define STRFMT_MAXBUF_NUM LUAI_MAXNUMBER2STR
72#define STRFMT_MAXBUF_PTR (2+2*sizeof(ptrdiff_t)) /* "0x" + hex ptr. */
73
74/* Format parser. */
75LJ_FUNC SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs);
76
77static LJ_AINLINE void lj_strfmt_init(FormatState *fs, const char *p, MSize len)
78{
79 fs->p = (const uint8_t *)p;
80 fs->e = (const uint8_t *)p + len;
81 lua_assert(*fs->e == 0); /* Must be NUL-terminated (may have NULs inside). */
82}
83
84/* Raw conversions. */
85LJ_FUNC char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k);
86LJ_FUNC char * LJ_FASTCALL lj_strfmt_wnum(char *p, cTValue *o);
87LJ_FUNC char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v);
88LJ_FUNC char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v);
89LJ_FUNC const char *lj_strfmt_wstrnum(char *buf, cTValue *o, MSize *lenp);
90
91/* Unformatted conversions to buffer. */
92LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k);
93#if LJ_HASJIT
94LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o);
95#endif
96LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v);
97LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str);
98
99/* Formatted conversions to buffer. */
100LJ_FUNC SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k);
101LJ_FUNC SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n);
102LJ_FUNC SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n);
103LJ_FUNC SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat, lua_Number n);
104LJ_FUNC SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat, int32_t c);
105LJ_FUNC SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat, GCstr *str);
106
107/* Conversions to strings. */
108LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k);
109LJ_FUNCA GCstr * LJ_FASTCALL lj_strfmt_num(lua_State *L, cTValue *o);
110LJ_FUNCA GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o);
111#if LJ_HASJIT
112LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c);
113#endif
114LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o);
115
116/* Internal string formatting. */
117LJ_FUNC const char *lj_strfmt_pushvf(lua_State *L, const char *fmt,
118 va_list argp);
119LJ_FUNC const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...)
120#ifdef __GNUC__
121 __attribute__ ((format (printf, 2, 3)))
122#endif
123 ;
124
125#endif
diff --git a/src/lj_tab.c b/src/lj_tab.c
index a8062db7..a9f43835 100644
--- a/src/lj_tab.c
+++ b/src/lj_tab.c
@@ -29,7 +29,12 @@ static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash)
29#define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi))) 29#define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi)))
30#define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1)) 30#define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1))
31#define hashptr(t, p) hashlohi((t), u32ptr(p), u32ptr(p) + HASH_BIAS) 31#define hashptr(t, p) hashlohi((t), u32ptr(p), u32ptr(p) + HASH_BIAS)
32#if LJ_GC64
33#define hashgcref(t, r) \
34 hashlohi((t), (uint32_t)gcrefu(r), (uint32_t)(gcrefu(r) >> 32))
35#else
32#define hashgcref(t, r) hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS) 36#define hashgcref(t, r) hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS)
37#endif
33 38
34/* Hash an arbitrary key and return its anchor position in the hash table. */ 39/* Hash an arbitrary key and return its anchor position in the hash table. */
35static Node *hashkey(const GCtab *t, cTValue *key) 40static Node *hashkey(const GCtab *t, cTValue *key)
@@ -58,8 +63,8 @@ static LJ_AINLINE void newhpart(lua_State *L, GCtab *t, uint32_t hbits)
58 lj_err_msg(L, LJ_ERR_TABOV); 63 lj_err_msg(L, LJ_ERR_TABOV);
59 hsize = 1u << hbits; 64 hsize = 1u << hbits;
60 node = lj_mem_newvec(L, hsize, Node); 65 node = lj_mem_newvec(L, hsize, Node);
61 setmref(node->freetop, &node[hsize]);
62 setmref(t->node, node); 66 setmref(t->node, node);
67 setfreetop(t, node, &node[hsize]);
63 t->hmask = hsize-1; 68 t->hmask = hsize-1;
64} 69}
65 70
@@ -98,6 +103,7 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits)
98 GCtab *t; 103 GCtab *t;
99 /* First try to colocate the array part. */ 104 /* First try to colocate the array part. */
100 if (LJ_MAX_COLOSIZE != 0 && asize > 0 && asize <= LJ_MAX_COLOSIZE) { 105 if (LJ_MAX_COLOSIZE != 0 && asize > 0 && asize <= LJ_MAX_COLOSIZE) {
106 Node *nilnode;
101 lua_assert((sizeof(GCtab) & 7) == 0); 107 lua_assert((sizeof(GCtab) & 7) == 0);
102 t = (GCtab *)lj_mem_newgco(L, sizetabcolo(asize)); 108 t = (GCtab *)lj_mem_newgco(L, sizetabcolo(asize));
103 t->gct = ~LJ_TTAB; 109 t->gct = ~LJ_TTAB;
@@ -107,8 +113,13 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits)
107 setgcrefnull(t->metatable); 113 setgcrefnull(t->metatable);
108 t->asize = asize; 114 t->asize = asize;
109 t->hmask = 0; 115 t->hmask = 0;
110 setmref(t->node, &G(L)->nilnode); 116 nilnode = &G(L)->nilnode;
117 setmref(t->node, nilnode);
118#if LJ_GC64
119 setmref(t->freetop, nilnode);
120#endif
111 } else { /* Otherwise separately allocate the array part. */ 121 } else { /* Otherwise separately allocate the array part. */
122 Node *nilnode;
112 t = lj_mem_newobj(L, GCtab); 123 t = lj_mem_newobj(L, GCtab);
113 t->gct = ~LJ_TTAB; 124 t->gct = ~LJ_TTAB;
114 t->nomm = (uint8_t)~0; 125 t->nomm = (uint8_t)~0;
@@ -117,7 +128,11 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits)
117 setgcrefnull(t->metatable); 128 setgcrefnull(t->metatable);
118 t->asize = 0; /* In case the array allocation fails. */ 129 t->asize = 0; /* In case the array allocation fails. */
119 t->hmask = 0; 130 t->hmask = 0;
120 setmref(t->node, &G(L)->nilnode); 131 nilnode = &G(L)->nilnode;
132 setmref(t->node, nilnode);
133#if LJ_GC64
134 setmref(t->freetop, nilnode);
135#endif
121 if (asize > 0) { 136 if (asize > 0) {
122 if (asize > LJ_MAX_ASIZE) 137 if (asize > LJ_MAX_ASIZE)
123 lj_err_msg(L, LJ_ERR_TABOV); 138 lj_err_msg(L, LJ_ERR_TABOV);
@@ -149,6 +164,12 @@ GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits)
149 return t; 164 return t;
150} 165}
151 166
167/* The API of this function conforms to lua_createtable(). */
168GCtab *lj_tab_new_ah(lua_State *L, int32_t a, int32_t h)
169{
170 return lj_tab_new(L, (uint32_t)(a > 0 ? a+1 : 0), hsize2hbits(h));
171}
172
152#if LJ_HASJIT 173#if LJ_HASJIT
153GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize) 174GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize)
154{ 175{
@@ -185,7 +206,7 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt)
185 Node *node = noderef(t->node); 206 Node *node = noderef(t->node);
186 Node *knode = noderef(kt->node); 207 Node *knode = noderef(kt->node);
187 ptrdiff_t d = (char *)node - (char *)knode; 208 ptrdiff_t d = (char *)node - (char *)knode;
188 setmref(node->freetop, (Node *)((char *)noderef(knode->freetop) + d)); 209 setfreetop(t, node, (Node *)((char *)getfreetop(kt, knode) + d));
189 for (i = 0; i <= hmask; i++) { 210 for (i = 0; i <= hmask; i++) {
190 Node *kn = &knode[i]; 211 Node *kn = &knode[i];
191 Node *n = &node[i]; 212 Node *n = &node[i];
@@ -198,6 +219,17 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt)
198 return t; 219 return t;
199} 220}
200 221
222/* Clear a table. */
223void LJ_FASTCALL lj_tab_clear(GCtab *t)
224{
225 clearapart(t);
226 if (t->hmask > 0) {
227 Node *node = noderef(t->node);
228 setfreetop(t, node, &node[t->hmask+1]);
229 clearhpart(t);
230 }
231}
232
201/* Free a table. */ 233/* Free a table. */
202void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t) 234void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t)
203{ 235{
@@ -247,6 +279,9 @@ static void resizetab(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits)
247 } else { 279 } else {
248 global_State *g = G(L); 280 global_State *g = G(L);
249 setmref(t->node, &g->nilnode); 281 setmref(t->node, &g->nilnode);
282#if LJ_GC64
283 setmref(t->freetop, &g->nilnode);
284#endif
250 t->hmask = 0; 285 t->hmask = 0;
251 } 286 }
252 if (asize < oldasize) { /* Array part shrinks? */ 287 if (asize < oldasize) { /* Array part shrinks? */
@@ -428,7 +463,7 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key)
428 Node *n = hashkey(t, key); 463 Node *n = hashkey(t, key);
429 if (!tvisnil(&n->val) || t->hmask == 0) { 464 if (!tvisnil(&n->val) || t->hmask == 0) {
430 Node *nodebase = noderef(t->node); 465 Node *nodebase = noderef(t->node);
431 Node *collide, *freenode = noderef(nodebase->freetop); 466 Node *collide, *freenode = getfreetop(t, nodebase);
432 lua_assert(freenode >= nodebase && freenode <= nodebase+t->hmask+1); 467 lua_assert(freenode >= nodebase && freenode <= nodebase+t->hmask+1);
433 do { 468 do {
434 if (freenode == nodebase) { /* No free node found? */ 469 if (freenode == nodebase) { /* No free node found? */
@@ -436,7 +471,7 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key)
436 return lj_tab_set(L, t, key); /* Retry key insertion. */ 471 return lj_tab_set(L, t, key); /* Retry key insertion. */
437 } 472 }
438 } while (!tvisnil(&(--freenode)->key)); 473 } while (!tvisnil(&(--freenode)->key));
439 setmref(nodebase->freetop, freenode); 474 setfreetop(t, nodebase, freenode);
440 lua_assert(freenode != &G(L)->nilnode); 475 lua_assert(freenode != &G(L)->nilnode);
441 collide = hashkey(t, &n->key); 476 collide = hashkey(t, &n->key);
442 if (collide != n) { /* Colliding node not the main node? */ 477 if (collide != n) { /* Colliding node not the main node? */
diff --git a/src/lj_tab.h b/src/lj_tab.h
index f0d228eb..1da28bd9 100644
--- a/src/lj_tab.h
+++ b/src/lj_tab.h
@@ -34,10 +34,12 @@ static LJ_AINLINE uint32_t hashrot(uint32_t lo, uint32_t hi)
34#define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0) 34#define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0)
35 35
36LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits); 36LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits);
37LJ_FUNC GCtab *lj_tab_new_ah(lua_State *L, int32_t a, int32_t h);
37#if LJ_HASJIT 38#if LJ_HASJIT
38LJ_FUNC GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize); 39LJ_FUNC GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize);
39#endif 40#endif
40LJ_FUNCA GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt); 41LJ_FUNCA GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt);
42LJ_FUNC void LJ_FASTCALL lj_tab_clear(GCtab *t);
41LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t); 43LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t);
42#if LJ_HASFFI 44#if LJ_HASFFI
43LJ_FUNC void lj_tab_rehash(lua_State *L, GCtab *t); 45LJ_FUNC void lj_tab_rehash(lua_State *L, GCtab *t);
@@ -53,7 +55,7 @@ LJ_FUNCA cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key);
53/* Caveat: all setters require a write barrier for the stored value. */ 55/* Caveat: all setters require a write barrier for the stored value. */
54 56
55LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key); 57LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key);
56LJ_FUNC TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key); 58LJ_FUNCA TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key);
57LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key); 59LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key);
58LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key); 60LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key);
59 61
diff --git a/src/lj_target.h b/src/lj_target.h
index 1a242325..0daecb11 100644
--- a/src/lj_target.h
+++ b/src/lj_target.h
@@ -138,6 +138,8 @@ typedef uint32_t RegCost;
138#include "lj_target_x86.h" 138#include "lj_target_x86.h"
139#elif LJ_TARGET_ARM 139#elif LJ_TARGET_ARM
140#include "lj_target_arm.h" 140#include "lj_target_arm.h"
141#elif LJ_TARGET_ARM64
142#include "lj_target_arm64.h"
141#elif LJ_TARGET_PPC 143#elif LJ_TARGET_PPC
142#include "lj_target_ppc.h" 144#include "lj_target_ppc.h"
143#elif LJ_TARGET_MIPS 145#elif LJ_TARGET_MIPS
diff --git a/src/lj_target_arm.h b/src/lj_target_arm.h
index 6d4d0aed..0a243b37 100644
--- a/src/lj_target_arm.h
+++ b/src/lj_target_arm.h
@@ -243,10 +243,6 @@ typedef enum ARMIns {
243 ARMI_VCVT_S32_F64 = 0xeebd0bc0, 243 ARMI_VCVT_S32_F64 = 0xeebd0bc0,
244 ARMI_VCVT_U32_F32 = 0xeebc0ac0, 244 ARMI_VCVT_U32_F32 = 0xeebc0ac0,
245 ARMI_VCVT_U32_F64 = 0xeebc0bc0, 245 ARMI_VCVT_U32_F64 = 0xeebc0bc0,
246 ARMI_VCVTR_S32_F32 = 0xeebd0a40,
247 ARMI_VCVTR_S32_F64 = 0xeebd0b40,
248 ARMI_VCVTR_U32_F32 = 0xeebc0a40,
249 ARMI_VCVTR_U32_F64 = 0xeebc0b40,
250 ARMI_VCVT_F32_S32 = 0xeeb80ac0, 246 ARMI_VCVT_F32_S32 = 0xeeb80ac0,
251 ARMI_VCVT_F64_S32 = 0xeeb80bc0, 247 ARMI_VCVT_F64_S32 = 0xeeb80bc0,
252 ARMI_VCVT_F32_U32 = 0xeeb80a40, 248 ARMI_VCVT_F32_U32 = 0xeeb80a40,
diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h
new file mode 100644
index 00000000..99e0adc9
--- /dev/null
+++ b/src/lj_target_arm64.h
@@ -0,0 +1,97 @@
1/*
2** Definitions for ARM64 CPUs.
3** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_TARGET_ARM64_H
7#define _LJ_TARGET_ARM64_H
8
9/* -- Registers IDs ------------------------------------------------------- */
10
11#define GPRDEF(_) \
12 _(X0) _(X1) _(X2) _(X3) _(X4) _(X5) _(X6) _(X7) \
13 _(X8) _(X9) _(X10) _(X11) _(X12) _(X13) _(X14) _(X15) \
14 _(X16) _(X17) _(X18) _(X19) _(X20) _(X21) _(X22) _(X23) \
15 _(X24) _(X25) _(X26) _(X27) _(X28) _(FP) _(LR) _(SP)
16#define FPRDEF(_) \
17 _(D0) _(D1) _(D2) _(D3) _(D4) _(D5) _(D6) _(D7) \
18 _(D8) _(D9) _(D10) _(D11) _(D12) _(D13) _(D14) _(D15) \
19 _(D16) _(D17) _(D18) _(D19) _(D20) _(D21) _(D22) _(D23) \
20 _(D24) _(D25) _(D26) _(D27) _(D28) _(D29) _(D30) _(D31)
21#define VRIDDEF(_)
22
23#define RIDENUM(name) RID_##name,
24
25enum {
26 GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
27 FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
28 RID_MAX,
29 RID_TMP = RID_LR,
30 RID_ZERO = RID_SP,
31
32 /* Calling conventions. */
33 RID_RET = RID_X0,
34 RID_FPRET = RID_D0,
35
36 /* These definitions must match with the *.dasc file(s): */
37 RID_BASE = RID_X19, /* Interpreter BASE. */
38 RID_LPC = RID_X21, /* Interpreter PC. */
39 RID_GL = RID_X22, /* Interpreter GL. */
40 RID_LREG = RID_X23, /* Interpreter L. */
41
42 /* Register ranges [min, max) and number of registers. */
43 RID_MIN_GPR = RID_X0,
44 RID_MAX_GPR = RID_SP+1,
45 RID_MIN_FPR = RID_MAX_GPR,
46 RID_MAX_FPR = RID_D31+1,
47 RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
48 RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR
49};
50
51#define RID_NUM_KREF RID_NUM_GPR
52#define RID_MIN_KREF RID_X0
53
54/* -- Register sets ------------------------------------------------------- */
55
56/* Make use of all registers, except for x18, fp, lr and sp. */
57#define RSET_FIXED \
58 (RID2RSET(RID_X18)|RID2RSET(RID_FP)|RID2RSET(RID_LR)|RID2RSET(RID_SP))
59#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED)
60#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)
61#define RSET_ALL (RSET_GPR|RSET_FPR)
62#define RSET_INIT RSET_ALL
63
64/* lr is an implicit scratch register. */
65#define RSET_SCRATCH_GPR (RSET_RANGE(RID_X0, RID_X17+1))
66#define RSET_SCRATCH_FPR \
67 (RSET_RANGE(RID_D0, RID_D7+1)|RSET_RANGE(RID_D16, RID_D31+1))
68#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
69#define REGARG_FIRSTGPR RID_X0
70#define REGARG_LASTGPR RID_X7
71#define REGARG_NUMGPR 8
72#define REGARG_FIRSTFPR RID_D0
73#define REGARG_LASTFPR RID_D7
74#define REGARG_NUMFPR 8
75
76/* -- Instructions -------------------------------------------------------- */
77
78/* Instruction fields. */
79#define A64F_D(r) (r)
80#define A64F_N(r) ((r) << 5)
81#define A64F_A(r) ((r) << 10)
82#define A64F_M(r) ((r) << 16)
83#define A64F_U16(x) ((x) << 5)
84#define A64F_S26(x) (x)
85#define A64F_S19(x) ((x) << 5)
86
87typedef enum A64Ins {
88 A64I_MOVZw = 0x52800000,
89 A64I_MOVZx = 0xd2800000,
90 A64I_LDRLw = 0x18000000,
91 A64I_LDRLx = 0x58000000,
92 A64I_NOP = 0xd503201f,
93 A64I_B = 0x14000000,
94 A64I_BR = 0xd61f0000,
95} A64Ins;
96
97#endif
diff --git a/src/lj_target_mips.h b/src/lj_target_mips.h
index 0ab140bf..76645bca 100644
--- a/src/lj_target_mips.h
+++ b/src/lj_target_mips.h
@@ -169,6 +169,9 @@ typedef enum MIPSIns {
169 MIPSI_SLTU = 0x0000002b, 169 MIPSI_SLTU = 0x0000002b,
170 MIPSI_MOVZ = 0x0000000a, 170 MIPSI_MOVZ = 0x0000000a,
171 MIPSI_MOVN = 0x0000000b, 171 MIPSI_MOVN = 0x0000000b,
172 MIPSI_MFHI = 0x00000010,
173 MIPSI_MFLO = 0x00000012,
174 MIPSI_MULT = 0x00000018,
172 175
173 MIPSI_SLL = 0x00000000, 176 MIPSI_SLL = 0x00000000,
174 MIPSI_SRL = 0x00000002, 177 MIPSI_SRL = 0x00000002,
diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h
index 2caeeb04..99867688 100644
--- a/src/lj_target_ppc.h
+++ b/src/lj_target_ppc.h
@@ -104,7 +104,7 @@ enum {
104/* This definition must match with the *.dasc file(s). */ 104/* This definition must match with the *.dasc file(s). */
105typedef struct { 105typedef struct {
106 lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ 106 lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
107 int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ 107 intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
108 int32_t spill[256]; /* Spill slots. */ 108 int32_t spill[256]; /* Spill slots. */
109} ExitState; 109} ExitState;
110 110
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h
index d12a1b87..65e438fd 100644
--- a/src/lj_target_x86.h
+++ b/src/lj_target_x86.h
@@ -33,6 +33,7 @@ enum {
33 RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */ 33 RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */
34 34
35 /* Calling conventions. */ 35 /* Calling conventions. */
36 RID_SP = RID_ESP,
36 RID_RET = RID_EAX, 37 RID_RET = RID_EAX,
37#if LJ_64 38#if LJ_64
38 RID_FPRET = RID_XMM0, 39 RID_FPRET = RID_XMM0,
@@ -277,10 +278,8 @@ typedef enum {
277 XO_ROUNDSD = 0x0b3a0ffc, /* Really 66 0f 3a 0b. See asm_fpmath. */ 278 XO_ROUNDSD = 0x0b3a0ffc, /* Really 66 0f 3a 0b. See asm_fpmath. */
278 XO_UCOMISD = XO_660f(2e), 279 XO_UCOMISD = XO_660f(2e),
279 XO_CVTSI2SD = XO_f20f(2a), 280 XO_CVTSI2SD = XO_f20f(2a),
280 XO_CVTSD2SI = XO_f20f(2d),
281 XO_CVTTSD2SI= XO_f20f(2c), 281 XO_CVTTSD2SI= XO_f20f(2c),
282 XO_CVTSI2SS = XO_f30f(2a), 282 XO_CVTSI2SS = XO_f30f(2a),
283 XO_CVTSS2SI = XO_f30f(2d),
284 XO_CVTTSS2SI= XO_f30f(2c), 283 XO_CVTTSS2SI= XO_f30f(2c),
285 XO_CVTSS2SD = XO_f30f(5a), 284 XO_CVTSS2SD = XO_f30f(5a),
286 XO_CVTSD2SS = XO_f20f(5a), 285 XO_CVTSD2SS = XO_f20f(5a),
diff --git a/src/lj_trace.c b/src/lj_trace.c
index e51ec546..39ff0461 100644
--- a/src/lj_trace.c
+++ b/src/lj_trace.c
@@ -360,7 +360,7 @@ static void trace_start(jit_State *J)
360 TraceNo traceno; 360 TraceNo traceno;
361 361
362 if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */ 362 if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */
363 if (J->parent == 0) { 363 if (J->parent == 0 && J->exitno == 0) {
364 /* Lazy bytecode patching to disable hotcount events. */ 364 /* Lazy bytecode patching to disable hotcount events. */
365 lua_assert(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL || 365 lua_assert(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL ||
366 bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF); 366 bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF);
@@ -453,6 +453,12 @@ static void trace_stop(jit_State *J)
453 root->nextside = (TraceNo1)traceno; 453 root->nextside = (TraceNo1)traceno;
454 } 454 }
455 break; 455 break;
456 case BC_CALLM:
457 case BC_CALL:
458 case BC_ITERC:
459 /* Trace stitching: patch link of previous trace. */
460 traceref(J, J->exitno)->link = traceno;
461 break;
456 default: 462 default:
457 lua_assert(0); 463 lua_assert(0);
458 break; 464 break;
@@ -467,6 +473,7 @@ static void trace_stop(jit_State *J)
467 lj_vmevent_send(L, TRACE, 473 lj_vmevent_send(L, TRACE,
468 setstrV(L, L->top++, lj_str_newlit(L, "stop")); 474 setstrV(L, L->top++, lj_str_newlit(L, "stop"));
469 setintV(L->top++, traceno); 475 setintV(L->top++, traceno);
476 setfuncV(L, L->top++, J->fn);
470 ); 477 );
471} 478}
472 479
@@ -502,8 +509,12 @@ static int trace_abort(jit_State *J)
502 return 1; /* Retry ASM with new MCode area. */ 509 return 1; /* Retry ASM with new MCode area. */
503 } 510 }
504 /* Penalize or blacklist starting bytecode instruction. */ 511 /* Penalize or blacklist starting bytecode instruction. */
505 if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) 512 if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) {
506 penalty_pc(J, &gcref(J->cur.startpt)->pt, mref(J->cur.startpc, BCIns), e); 513 if (J->exitno == 0)
514 penalty_pc(J, &gcref(J->cur.startpt)->pt, mref(J->cur.startpc, BCIns), e);
515 else
516 traceref(J, J->exitno)->link = J->exitno; /* Self-link is blacklisted. */
517 }
507 518
508 /* Is there anything to abort? */ 519 /* Is there anything to abort? */
509 traceno = J->cur.traceno; 520 traceno = J->cur.traceno;
@@ -672,6 +683,7 @@ static void trace_hotside(jit_State *J, const BCIns *pc)
672{ 683{
673 SnapShot *snap = &traceref(J, J->parent)->snap[J->exitno]; 684 SnapShot *snap = &traceref(J, J->parent)->snap[J->exitno];
674 if (!(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT)) && 685 if (!(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT)) &&
686 isluafunc(curr_func(J->L)) &&
675 snap->count != SNAPCOUNT_DONE && 687 snap->count != SNAPCOUNT_DONE &&
676 ++snap->count >= J->param[JIT_P_hotexit]) { 688 ++snap->count >= J->param[JIT_P_hotexit]) {
677 lua_assert(J->state == LJ_TRACE_IDLE); 689 lua_assert(J->state == LJ_TRACE_IDLE);
@@ -681,6 +693,20 @@ static void trace_hotside(jit_State *J, const BCIns *pc)
681 } 693 }
682} 694}
683 695
696/* Stitch a new trace to the previous trace. */
697void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc)
698{
699 /* Only start a new trace if not recording or inside __gc call or vmevent. */
700 if (J->state == LJ_TRACE_IDLE &&
701 !(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT))) {
702 J->parent = 0; /* Have to treat it like a root trace. */
703 /* J->exitno is set to the invoking trace. */
704 J->state = LJ_TRACE_START;
705 lj_trace_ins(J, pc);
706 }
707}
708
709
684/* Tiny struct to pass data to protected call. */ 710/* Tiny struct to pass data to protected call. */
685typedef struct ExitDataCP { 711typedef struct ExitDataCP {
686 jit_State *J; 712 jit_State *J;
@@ -767,17 +793,20 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
767 if (errcode) 793 if (errcode)
768 return -errcode; /* Return negated error code. */ 794 return -errcode; /* Return negated error code. */
769 795
770 lj_vmevent_send(L, TEXIT, 796 if (!(LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)))
771 lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK); 797 lj_vmevent_send(L, TEXIT,
772 setintV(L->top++, J->parent); 798 lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK);
773 setintV(L->top++, J->exitno); 799 setintV(L->top++, J->parent);
774 trace_exit_regs(L, ex); 800 setintV(L->top++, J->exitno);
775 ); 801 trace_exit_regs(L, ex);
802 );
776 803
777 pc = exd.pc; 804 pc = exd.pc;
778 cf = cframe_raw(L->cframe); 805 cf = cframe_raw(L->cframe);
779 setcframe_pc(cf, pc); 806 setcframe_pc(cf, pc);
780 if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) { 807 if (LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)) {
808 /* Just exit to interpreter. */
809 } else if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) {
781 if (!(G(L)->hookmask & HOOK_GC)) 810 if (!(G(L)->hookmask & HOOK_GC))
782 lj_gc_step(L); /* Exited because of GC: drive GC forward. */ 811 lj_gc_step(L); /* Exited because of GC: drive GC forward. */
783 } else { 812 } else {
@@ -801,7 +830,7 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
801 ERRNO_RESTORE 830 ERRNO_RESTORE
802 switch (bc_op(*pc)) { 831 switch (bc_op(*pc)) {
803 case BC_CALLM: case BC_CALLMT: 832 case BC_CALLM: case BC_CALLMT:
804 return (int)((BCReg)(L->top - L->base) - bc_a(*pc) - bc_c(*pc)); 833 return (int)((BCReg)(L->top - L->base) - bc_a(*pc) - bc_c(*pc) + LJ_FR2);
805 case BC_RETM: 834 case BC_RETM:
806 return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc) - bc_d(*pc)); 835 return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc) - bc_d(*pc));
807 case BC_TSETM: 836 case BC_TSETM:
diff --git a/src/lj_trace.h b/src/lj_trace.h
index 4fbe5cf2..9eaf91b0 100644
--- a/src/lj_trace.h
+++ b/src/lj_trace.h
@@ -34,6 +34,7 @@ LJ_FUNC void lj_trace_freestate(global_State *g);
34/* Event handling. */ 34/* Event handling. */
35LJ_FUNC void lj_trace_ins(jit_State *J, const BCIns *pc); 35LJ_FUNC void lj_trace_ins(jit_State *J, const BCIns *pc);
36LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc); 36LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc);
37LJ_FUNCA void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc);
37LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr); 38LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr);
38 39
39/* Signal asynchronous abort of trace or end of trace. */ 40/* Signal asynchronous abort of trace or end of trace. */
diff --git a/src/lj_traceerr.h b/src/lj_traceerr.h
index 9bef117a..6b377cb0 100644
--- a/src/lj_traceerr.h
+++ b/src/lj_traceerr.h
@@ -7,6 +7,7 @@
7 7
8/* Recording. */ 8/* Recording. */
9TREDEF(RECERR, "error thrown or hook called during recording") 9TREDEF(RECERR, "error thrown or hook called during recording")
10TREDEF(TRACEUV, "trace too short")
10TREDEF(TRACEOV, "trace too long") 11TREDEF(TRACEOV, "trace too long")
11TREDEF(STACKOV, "trace too deep") 12TREDEF(STACKOV, "trace too deep")
12TREDEF(SNAPOV, "too many snapshots") 13TREDEF(SNAPOV, "too many snapshots")
@@ -23,8 +24,6 @@ TREDEF(BADTYPE, "bad argument type")
23TREDEF(CJITOFF, "JIT compilation disabled for function") 24TREDEF(CJITOFF, "JIT compilation disabled for function")
24TREDEF(CUNROLL, "call unroll limit reached") 25TREDEF(CUNROLL, "call unroll limit reached")
25TREDEF(DOWNREC, "down-recursion, restarting") 26TREDEF(DOWNREC, "down-recursion, restarting")
26TREDEF(NYICF, "NYI: C function %p")
27TREDEF(NYIFF, "NYI: FastFunc %s")
28TREDEF(NYIFFU, "NYI: unsupported variant of FastFunc %s") 27TREDEF(NYIFFU, "NYI: unsupported variant of FastFunc %s")
29TREDEF(NYIRETL, "NYI: return to lower frame") 28TREDEF(NYIRETL, "NYI: return to lower frame")
30 29
diff --git a/src/lj_vm.h b/src/lj_vm.h
index 036cabc5..b31e22f7 100644
--- a/src/lj_vm.h
+++ b/src/lj_vm.h
@@ -43,13 +43,14 @@ LJ_ASMF void lj_vm_record(void);
43LJ_ASMF void lj_vm_inshook(void); 43LJ_ASMF void lj_vm_inshook(void);
44LJ_ASMF void lj_vm_rethook(void); 44LJ_ASMF void lj_vm_rethook(void);
45LJ_ASMF void lj_vm_callhook(void); 45LJ_ASMF void lj_vm_callhook(void);
46LJ_ASMF void lj_vm_profhook(void);
46 47
47/* Trace exit handling. */ 48/* Trace exit handling. */
48LJ_ASMF void lj_vm_exit_handler(void); 49LJ_ASMF void lj_vm_exit_handler(void);
49LJ_ASMF void lj_vm_exit_interp(void); 50LJ_ASMF void lj_vm_exit_interp(void);
50 51
51/* Internal math helper functions. */ 52/* Internal math helper functions. */
52#if LJ_TARGET_X86ORX64 || LJ_TARGET_PPC 53#if LJ_TARGET_PPC || LJ_TARGET_ARM64
53#define lj_vm_floor floor 54#define lj_vm_floor floor
54#define lj_vm_ceil ceil 55#define lj_vm_ceil ceil
55#else 56#else
@@ -60,7 +61,7 @@ LJ_ASMF double lj_vm_floor_sf(double);
60LJ_ASMF double lj_vm_ceil_sf(double); 61LJ_ASMF double lj_vm_ceil_sf(double);
61#endif 62#endif
62#endif 63#endif
63#if defined(LUAJIT_NO_LOG2) || LJ_TARGET_X86ORX64 64#ifdef LUAJIT_NO_LOG2
64LJ_ASMF double lj_vm_log2(double); 65LJ_ASMF double lj_vm_log2(double);
65#else 66#else
66#define lj_vm_log2 log2 67#define lj_vm_log2 log2
@@ -71,12 +72,12 @@ LJ_ASMF double lj_vm_log2(double);
71LJ_ASMF void lj_vm_floor_sse(void); 72LJ_ASMF void lj_vm_floor_sse(void);
72LJ_ASMF void lj_vm_ceil_sse(void); 73LJ_ASMF void lj_vm_ceil_sse(void);
73LJ_ASMF void lj_vm_trunc_sse(void); 74LJ_ASMF void lj_vm_trunc_sse(void);
74LJ_ASMF void lj_vm_exp_x87(void);
75LJ_ASMF void lj_vm_exp2_x87(void);
76LJ_ASMF void lj_vm_pow_sse(void);
77LJ_ASMF void lj_vm_powi_sse(void); 75LJ_ASMF void lj_vm_powi_sse(void);
76#define lj_vm_powi NULL
78#else 77#else
79#if LJ_TARGET_PPC 78LJ_ASMF double lj_vm_powi(double, int32_t);
79#endif
80#if LJ_TARGET_PPC || LJ_TARGET_ARM64
80#define lj_vm_trunc trunc 81#define lj_vm_trunc trunc
81#else 82#else
82LJ_ASMF double lj_vm_trunc(double); 83LJ_ASMF double lj_vm_trunc(double);
@@ -84,13 +85,11 @@ LJ_ASMF double lj_vm_trunc(double);
84LJ_ASMF double lj_vm_trunc_sf(double); 85LJ_ASMF double lj_vm_trunc_sf(double);
85#endif 86#endif
86#endif 87#endif
87LJ_ASMF double lj_vm_powi(double, int32_t);
88#ifdef LUAJIT_NO_EXP2 88#ifdef LUAJIT_NO_EXP2
89LJ_ASMF double lj_vm_exp2(double); 89LJ_ASMF double lj_vm_exp2(double);
90#else 90#else
91#define lj_vm_exp2 exp2 91#define lj_vm_exp2 exp2
92#endif 92#endif
93#endif
94LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t); 93LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t);
95#if LJ_HASFFI 94#if LJ_HASFFI
96LJ_ASMF int lj_vm_errno(void); 95LJ_ASMF int lj_vm_errno(void);
@@ -104,8 +103,7 @@ LJ_ASMF void lj_cont_nop(void); /* Do nothing, just continue execution. */
104LJ_ASMF void lj_cont_condt(void); /* Branch if result is true. */ 103LJ_ASMF void lj_cont_condt(void); /* Branch if result is true. */
105LJ_ASMF void lj_cont_condf(void); /* Branch if result is false. */ 104LJ_ASMF void lj_cont_condf(void); /* Branch if result is false. */
106LJ_ASMF void lj_cont_hook(void); /* Continue from hook yield. */ 105LJ_ASMF void lj_cont_hook(void); /* Continue from hook yield. */
107 106LJ_ASMF void lj_cont_stitch(void); /* Trace stitching. */
108enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
109 107
110/* Start of the ASM code. */ 108/* Start of the ASM code. */
111LJ_ASMF char lj_vm_asm_begin[]; 109LJ_ASMF char lj_vm_asm_begin[];
diff --git a/src/lj_vmevent.c b/src/lj_vmevent.c
index 81fe47d4..87ebcfbd 100644
--- a/src/lj_vmevent.c
+++ b/src/lj_vmevent.c
@@ -27,6 +27,7 @@ ptrdiff_t lj_vmevent_prepare(lua_State *L, VMEvent ev)
27 if (tv && tvisfunc(tv)) { 27 if (tv && tvisfunc(tv)) {
28 lj_state_checkstack(L, LUA_MINSTACK); 28 lj_state_checkstack(L, LUA_MINSTACK);
29 setfuncV(L, L->top++, funcV(tv)); 29 setfuncV(L, L->top++, funcV(tv));
30 if (LJ_FR2) setnilV(L->top++);
30 return savestack(L, L->top); 31 return savestack(L, L->top);
31 } 32 }
32 } 33 }
diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c
index 31c6029f..ecad2950 100644
--- a/src/lj_vmmath.c
+++ b/src/lj_vmmath.c
@@ -13,16 +13,29 @@
13#include "lj_ir.h" 13#include "lj_ir.h"
14#include "lj_vm.h" 14#include "lj_vm.h"
15 15
16/* -- Helper functions for generated machine code ------------------------- */ 16/* -- Wrapper functions --------------------------------------------------- */
17 17
18#if LJ_TARGET_X86ORX64 18#if LJ_TARGET_X86 && __ELF__ && __PIC__
19/* Wrapper functions to avoid linker issues on OSX. */ 19/* Wrapper functions to deal with the ELF/x86 PIC disaster. */
20LJ_FUNCA double lj_vm_sinh(double x) { return sinh(x); } 20LJ_FUNCA double lj_wrap_log(double x) { return log(x); }
21LJ_FUNCA double lj_vm_cosh(double x) { return cosh(x); } 21LJ_FUNCA double lj_wrap_log10(double x) { return log10(x); }
22LJ_FUNCA double lj_vm_tanh(double x) { return tanh(x); } 22LJ_FUNCA double lj_wrap_exp(double x) { return exp(x); }
23LJ_FUNCA double lj_wrap_sin(double x) { return sin(x); }
24LJ_FUNCA double lj_wrap_cos(double x) { return cos(x); }
25LJ_FUNCA double lj_wrap_tan(double x) { return tan(x); }
26LJ_FUNCA double lj_wrap_asin(double x) { return asin(x); }
27LJ_FUNCA double lj_wrap_acos(double x) { return acos(x); }
28LJ_FUNCA double lj_wrap_atan(double x) { return atan(x); }
29LJ_FUNCA double lj_wrap_sinh(double x) { return sinh(x); }
30LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); }
31LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); }
32LJ_FUNCA double lj_wrap_atan2(double x, double y) { return atan2(x, y); }
33LJ_FUNCA double lj_wrap_pow(double x, double y) { return pow(x, y); }
34LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); }
23#endif 35#endif
24 36
25#if !LJ_TARGET_X86ORX64 37/* -- Helper functions for generated machine code ------------------------- */
38
26double lj_vm_foldarith(double x, double y, int op) 39double lj_vm_foldarith(double x, double y, int op)
27{ 40{
28 switch (op) { 41 switch (op) {
@@ -43,7 +56,6 @@ double lj_vm_foldarith(double x, double y, int op)
43 default: return x; 56 default: return x;
44 } 57 }
45} 58}
46#endif
47 59
48#if LJ_HASJIT 60#if LJ_HASJIT
49 61
@@ -61,7 +73,7 @@ double lj_vm_exp2(double a)
61} 73}
62#endif 74#endif
63 75
64#if !(LJ_TARGET_ARM || LJ_TARGET_PPC) 76#if !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)
65int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) 77int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b)
66{ 78{
67 uint32_t y, ua, ub; 79 uint32_t y, ua, ub;
@@ -107,6 +119,7 @@ double lj_vm_powi(double x, int32_t k)
107 else 119 else
108 return 1.0 / lj_vm_powui(x, (uint32_t)-k); 120 return 1.0 / lj_vm_powui(x, (uint32_t)-k);
109} 121}
122#endif
110 123
111/* Computes fpm(x) for extended math functions. */ 124/* Computes fpm(x) for extended math functions. */
112double lj_vm_foldfpm(double x, int fpm) 125double lj_vm_foldfpm(double x, int fpm)
@@ -128,7 +141,6 @@ double lj_vm_foldfpm(double x, int fpm)
128 } 141 }
129 return 0; 142 return 0;
130} 143}
131#endif
132 144
133#if LJ_HASFFI 145#if LJ_HASFFI
134int lj_vm_errno(void) 146int lj_vm_errno(void)
diff --git a/src/ljamalg.c b/src/ljamalg.c
index 9b237b7e..be0c52d7 100644
--- a/src/ljamalg.c
+++ b/src/ljamalg.c
@@ -33,6 +33,7 @@
33#include "lj_char.c" 33#include "lj_char.c"
34#include "lj_bc.c" 34#include "lj_bc.c"
35#include "lj_obj.c" 35#include "lj_obj.c"
36#include "lj_buf.c"
36#include "lj_str.c" 37#include "lj_str.c"
37#include "lj_tab.c" 38#include "lj_tab.c"
38#include "lj_func.c" 39#include "lj_func.c"
@@ -44,7 +45,9 @@
44#include "lj_vmevent.c" 45#include "lj_vmevent.c"
45#include "lj_vmmath.c" 46#include "lj_vmmath.c"
46#include "lj_strscan.c" 47#include "lj_strscan.c"
48#include "lj_strfmt.c"
47#include "lj_api.c" 49#include "lj_api.c"
50#include "lj_profile.c"
48#include "lj_lex.c" 51#include "lj_lex.c"
49#include "lj_parse.c" 52#include "lj_parse.c"
50#include "lj_bcread.c" 53#include "lj_bcread.c"
diff --git a/src/luaconf.h b/src/luaconf.h
index 7f57bae1..043590b2 100644
--- a/src/luaconf.h
+++ b/src/luaconf.h
@@ -37,7 +37,7 @@
37#endif 37#endif
38#define LUA_LROOT "/usr/local" 38#define LUA_LROOT "/usr/local"
39#define LUA_LUADIR "/lua/5.1/" 39#define LUA_LUADIR "/lua/5.1/"
40#define LUA_LJDIR "/luajit-2.0.3/" 40#define LUA_LJDIR "/luajit-2.1.0-alpha/"
41 41
42#ifdef LUA_ROOT 42#ifdef LUA_ROOT
43#define LUA_JROOT LUA_ROOT 43#define LUA_JROOT LUA_ROOT
diff --git a/src/luajit.c b/src/luajit.c
index 85d713fb..0ebc7300 100644
--- a/src/luajit.c
+++ b/src/luajit.c
@@ -61,8 +61,9 @@ static void laction(int i)
61 61
62static void print_usage(void) 62static void print_usage(void)
63{ 63{
64 fprintf(stderr, 64 fputs("usage: ", stderr);
65 "usage: %s [options]... [script [args]...].\n" 65 fputs(progname, stderr);
66 fputs(" [options]... [script [args]...].\n"
66 "Available options are:\n" 67 "Available options are:\n"
67 " -e chunk Execute string " LUA_QL("chunk") ".\n" 68 " -e chunk Execute string " LUA_QL("chunk") ".\n"
68 " -l name Require library " LUA_QL("name") ".\n" 69 " -l name Require library " LUA_QL("name") ".\n"
@@ -73,16 +74,14 @@ static void print_usage(void)
73 " -v Show version information.\n" 74 " -v Show version information.\n"
74 " -E Ignore environment variables.\n" 75 " -E Ignore environment variables.\n"
75 " -- Stop handling options.\n" 76 " -- Stop handling options.\n"
76 " - Execute stdin and stop handling options.\n" 77 " - Execute stdin and stop handling options.\n", stderr);
77 ,
78 progname);
79 fflush(stderr); 78 fflush(stderr);
80} 79}
81 80
82static void l_message(const char *pname, const char *msg) 81static void l_message(const char *pname, const char *msg)
83{ 82{
84 if (pname) fprintf(stderr, "%s: ", pname); 83 if (pname) { fputs(pname, stderr); fputc(':', stderr); fputc(' ', stderr); }
85 fprintf(stderr, "%s\n", msg); 84 fputs(msg, stderr); fputc('\n', stderr);
86 fflush(stderr); 85 fflush(stderr);
87} 86}
88 87
diff --git a/src/luajit.h b/src/luajit.h
index 901807ab..3db4bba7 100644
--- a/src/luajit.h
+++ b/src/luajit.h
@@ -30,9 +30,9 @@
30 30
31#include "lua.h" 31#include "lua.h"
32 32
33#define LUAJIT_VERSION "LuaJIT 2.0.3" 33#define LUAJIT_VERSION "LuaJIT 2.1.0-alpha"
34#define LUAJIT_VERSION_NUM 20003 /* Version 2.0.3 = 02.00.03. */ 34#define LUAJIT_VERSION_NUM 20100 /* Version 2.1.0 = 02.01.00. */
35#define LUAJIT_VERSION_SYM luaJIT_version_2_0_3 35#define LUAJIT_VERSION_SYM luaJIT_version_2_1_0_alpha
36#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2015 Mike Pall" 36#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2015 Mike Pall"
37#define LUAJIT_URL "http://luajit.org/" 37#define LUAJIT_URL "http://luajit.org/"
38 38
@@ -64,6 +64,15 @@ enum {
64/* Control the JIT engine. */ 64/* Control the JIT engine. */
65LUA_API int luaJIT_setmode(lua_State *L, int idx, int mode); 65LUA_API int luaJIT_setmode(lua_State *L, int idx, int mode);
66 66
67/* Low-overhead profiling API. */
68typedef void (*luaJIT_profile_callback)(void *data, lua_State *L,
69 int samples, int vmstate);
70LUA_API void luaJIT_profile_start(lua_State *L, const char *mode,
71 luaJIT_profile_callback cb, void *data);
72LUA_API void luaJIT_profile_stop(lua_State *L);
73LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt,
74 int depth, size_t *len);
75
67/* Enforce (dynamic) linker error for version mismatches. Call from main. */ 76/* Enforce (dynamic) linker error for version mismatches. Call from main. */
68LUA_API void LUAJIT_VERSION_SYM(void); 77LUA_API void LUAJIT_VERSION_SYM(void);
69 78
diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat
index 4b501855..0360d7e3 100644
--- a/src/msvcbuild.bat
+++ b/src/msvcbuild.bat
@@ -37,6 +37,7 @@ if exist minilua.exe.manifest^
37@if errorlevel 8 goto :X64 37@if errorlevel 8 goto :X64
38@set DASMFLAGS=-D WIN -D JIT -D FFI 38@set DASMFLAGS=-D WIN -D JIT -D FFI
39@set LJARCH=x86 39@set LJARCH=x86
40@set LJCOMPILE=%LJCOMPILE% /arch:SSE2
40:X64 41:X64
41minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc 42minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc
42@if errorlevel 1 goto :BAD 43@if errorlevel 1 goto :BAD
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
index 457efa63..0bd9b147 100644
--- a/src/vm_arm.dasc
+++ b/src/vm_arm.dasc
@@ -99,6 +99,7 @@
99|.type NODE, Node 99|.type NODE, Node
100|.type NARGS8, int 100|.type NARGS8, int
101|.type TRACE, GCtrace 101|.type TRACE, GCtrace
102|.type SBUF, SBuf
102| 103|
103|//----------------------------------------------------------------------- 104|//-----------------------------------------------------------------------
104| 105|
@@ -418,13 +419,14 @@ static void build_subroutines(BuildCtx *ctx)
418 | add CARG2, sp, #CFRAME_RESUME 419 | add CARG2, sp, #CFRAME_RESUME
419 | ldrb CARG1, L->status 420 | ldrb CARG1, L->status
420 | str CARG3, SAVE_ERRF 421 | str CARG3, SAVE_ERRF
421 | str CARG2, L->cframe 422 | str L, SAVE_PC // Any value outside of bytecode is ok.
422 | str CARG3, SAVE_CFRAME 423 | str CARG3, SAVE_CFRAME
423 | cmp CARG1, #0 424 | cmp CARG1, #0
424 | str L, SAVE_PC // Any value outside of bytecode is ok. 425 | str CARG2, L->cframe
425 | beq >3 426 | beq >3
426 | 427 |
427 | // Resume after yield (like a return). 428 | // Resume after yield (like a return).
429 | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
428 | mov RA, BASE 430 | mov RA, BASE
429 | ldr BASE, L->base 431 | ldr BASE, L->base
430 | ldr CARG1, L->top 432 | ldr CARG1, L->top
@@ -458,14 +460,15 @@ static void build_subroutines(BuildCtx *ctx)
458 | str CARG3, SAVE_NRES 460 | str CARG3, SAVE_NRES
459 | mov L, CARG1 461 | mov L, CARG1
460 | str CARG1, SAVE_L 462 | str CARG1, SAVE_L
461 | mov BASE, CARG2
462 | str sp, L->cframe // Add our C frame to cframe chain.
463 | ldr DISPATCH, L->glref // Setup pointer to dispatch table. 463 | ldr DISPATCH, L->glref // Setup pointer to dispatch table.
464 | mov BASE, CARG2
464 | str CARG1, SAVE_PC // Any value outside of bytecode is ok. 465 | str CARG1, SAVE_PC // Any value outside of bytecode is ok.
465 | str RC, SAVE_CFRAME 466 | str RC, SAVE_CFRAME
466 | add DISPATCH, DISPATCH, #GG_G2DISP 467 | add DISPATCH, DISPATCH, #GG_G2DISP
468 | str sp, L->cframe // Add our C frame to cframe chain.
467 | 469 |
468 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). 470 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
471 | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
469 | ldr RB, L->base // RB = old base (for vmeta_call). 472 | ldr RB, L->base // RB = old base (for vmeta_call).
470 | ldr CARG1, L->top 473 | ldr CARG1, L->top
471 | mov MASKR8, #255 474 | mov MASKR8, #255
@@ -491,20 +494,21 @@ static void build_subroutines(BuildCtx *ctx)
491 | mov L, CARG1 494 | mov L, CARG1
492 | ldr RA, L:CARG1->stack 495 | ldr RA, L:CARG1->stack
493 | str CARG1, SAVE_L 496 | str CARG1, SAVE_L
497 | ldr DISPATCH, L->glref // Setup pointer to dispatch table.
494 | ldr RB, L->top 498 | ldr RB, L->top
495 | str CARG1, SAVE_PC // Any value outside of bytecode is ok. 499 | str CARG1, SAVE_PC // Any value outside of bytecode is ok.
496 | ldr RC, L->cframe 500 | ldr RC, L->cframe
501 | add DISPATCH, DISPATCH, #GG_G2DISP
497 | sub RA, RA, RB // Compute -savestack(L, L->top). 502 | sub RA, RA, RB // Compute -savestack(L, L->top).
498 | str sp, L->cframe // Add our C frame to cframe chain.
499 | mov RB, #0 503 | mov RB, #0
500 | str RA, SAVE_NRES // Neg. delta means cframe w/o frame. 504 | str RA, SAVE_NRES // Neg. delta means cframe w/o frame.
501 | str RB, SAVE_ERRF // No error function. 505 | str RB, SAVE_ERRF // No error function.
502 | str RC, SAVE_CFRAME 506 | str RC, SAVE_CFRAME
507 | str sp, L->cframe // Add our C frame to cframe chain.
508 | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
503 | blx CARG4 // (lua_State *L, lua_CFunction func, void *ud) 509 | blx CARG4 // (lua_State *L, lua_CFunction func, void *ud)
504 | ldr DISPATCH, L->glref // Setup pointer to dispatch table.
505 | movs BASE, CRET1 510 | movs BASE, CRET1
506 | mov PC, #FRAME_CP 511 | mov PC, #FRAME_CP
507 | add DISPATCH, DISPATCH, #GG_G2DISP
508 | bne <3 // Else continue with the call. 512 | bne <3 // Else continue with the call.
509 | b ->vm_leave_cp // No base? Just remove C frame. 513 | b ->vm_leave_cp // No base? Just remove C frame.
510 | 514 |
@@ -614,6 +618,16 @@ static void build_subroutines(BuildCtx *ctx)
614 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. 618 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
615 | b ->vm_call_dispatch_f 619 | b ->vm_call_dispatch_f
616 | 620 |
621 |->vmeta_tgetr:
622 | .IOS mov RC, BASE
623 | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
624 | // Returns cTValue * or NULL.
625 | .IOS mov BASE, RC
626 | cmp CRET1, #0
627 | ldrdne CARG12, [CRET1]
628 | mvneq CARG2, #~LJ_TNIL
629 | b ->BC_TGETR_Z
630 |
617 |//----------------------------------------------------------------------- 631 |//-----------------------------------------------------------------------
618 | 632 |
619 |->vmeta_tsets1: 633 |->vmeta_tsets1:
@@ -671,6 +685,15 @@ static void build_subroutines(BuildCtx *ctx)
671 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. 685 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
672 | b ->vm_call_dispatch_f 686 | b ->vm_call_dispatch_f
673 | 687 |
688 |->vmeta_tsetr:
689 | str BASE, L->base
690 | .IOS mov RC, BASE
691 | str PC, SAVE_PC
692 | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
693 | // Returns TValue *.
694 | .IOS mov BASE, RC
695 | b ->BC_TSETR_Z
696 |
674 |//-- Comparison metamethods --------------------------------------------- 697 |//-- Comparison metamethods ---------------------------------------------
675 | 698 |
676 |->vmeta_comp: 699 |->vmeta_comp:
@@ -735,6 +758,17 @@ static void build_subroutines(BuildCtx *ctx)
735 | b <3 758 | b <3
736 |.endif 759 |.endif
737 | 760 |
761 |->vmeta_istype:
762 | sub PC, PC, #4
763 | str BASE, L->base
764 | mov CARG1, L
765 | lsr CARG2, RA, #3
766 | mov CARG3, RC
767 | str PC, SAVE_PC
768 | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
769 | .IOS ldr BASE, L->base
770 | b ->cont_nop
771 |
738 |//-- Arithmetic metamethods --------------------------------------------- 772 |//-- Arithmetic metamethods ---------------------------------------------
739 | 773 |
740 |->vmeta_arith_vn: 774 |->vmeta_arith_vn:
@@ -1052,7 +1086,7 @@ static void build_subroutines(BuildCtx *ctx)
1052 | ffgccheck 1086 | ffgccheck
1053 | mov CARG1, L 1087 | mov CARG1, L
1054 | mov CARG2, BASE 1088 | mov CARG2, BASE
1055 | bl extern lj_str_fromnumber // (lua_State *L, cTValue *o) 1089 | bl extern lj_strfmt_number // (lua_State *L, cTValue *o)
1056 | // Returns GCstr *. 1090 | // Returns GCstr *.
1057 | ldr BASE, L->base 1091 | ldr BASE, L->base
1058 | mvn CARG2, #~LJ_TSTR 1092 | mvn CARG2, #~LJ_TSTR
@@ -1230,9 +1264,10 @@ static void build_subroutines(BuildCtx *ctx)
1230 | ldr CARG3, L:RA->base 1264 | ldr CARG3, L:RA->base
1231 | mv_vmstate CARG2, INTERP 1265 | mv_vmstate CARG2, INTERP
1232 | ldr CARG4, L:RA->top 1266 | ldr CARG4, L:RA->top
1233 | st_vmstate CARG2
1234 | cmp CRET1, #LUA_YIELD 1267 | cmp CRET1, #LUA_YIELD
1235 | ldr BASE, L->base 1268 | ldr BASE, L->base
1269 | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
1270 | st_vmstate CARG2
1236 | bhi >8 1271 | bhi >8
1237 | subs RC, CARG4, CARG3 1272 | subs RC, CARG4, CARG3
1238 | ldr CARG1, L->maxstack 1273 | ldr CARG1, L->maxstack
@@ -1500,19 +1535,6 @@ static void build_subroutines(BuildCtx *ctx)
1500 | math_extern2 atan2 1535 | math_extern2 atan2
1501 | math_extern2 fmod 1536 | math_extern2 fmod
1502 | 1537 |
1503 |->ff_math_deg:
1504 |.if FPU
1505 | .ffunc_d math_rad
1506 | vldr d1, CFUNC:CARG3->upvalue[0]
1507 | vmul.f64 d0, d0, d1
1508 | b ->fff_resd
1509 |.else
1510 | .ffunc_n math_rad
1511 | ldrd CARG34, CFUNC:CARG3->upvalue[0]
1512 | bl extern __aeabi_dmul
1513 | b ->fff_restv
1514 |.endif
1515 |
1516 |.if HFABI 1538 |.if HFABI
1517 | .ffunc math_ldexp 1539 | .ffunc math_ldexp
1518 | ldr CARG4, [BASE, #4] 1540 | ldr CARG4, [BASE, #4]
@@ -1687,12 +1709,6 @@ static void build_subroutines(BuildCtx *ctx)
1687 | 1709 |
1688 |//-- String library ----------------------------------------------------- 1710 |//-- String library -----------------------------------------------------
1689 | 1711 |
1690 |.ffunc_1 string_len
1691 | checkstr CARG2, ->fff_fallback
1692 | ldr CARG1, STR:CARG1->len
1693 | mvn CARG2, #~LJ_TISNUM
1694 | b ->fff_restv
1695 |
1696 |.ffunc string_byte // Only handle the 1-arg case here. 1712 |.ffunc string_byte // Only handle the 1-arg case here.
1697 | ldrd CARG12, [BASE] 1713 | ldrd CARG12, [BASE]
1698 | ldr PC, [BASE, FRAME_PC] 1714 | ldr PC, [BASE, FRAME_PC]
@@ -1725,6 +1741,7 @@ static void build_subroutines(BuildCtx *ctx)
1725 | mov CARG1, L 1741 | mov CARG1, L
1726 | str PC, SAVE_PC 1742 | str PC, SAVE_PC
1727 | bl extern lj_str_new // (lua_State *L, char *str, size_t l) 1743 | bl extern lj_str_new // (lua_State *L, char *str, size_t l)
1744 |->fff_resstr:
1728 | // Returns GCstr *. 1745 | // Returns GCstr *.
1729 | ldr BASE, L->base 1746 | ldr BASE, L->base
1730 | mvn CARG2, #~LJ_TSTR 1747 | mvn CARG2, #~LJ_TSTR
@@ -1768,91 +1785,28 @@ static void build_subroutines(BuildCtx *ctx)
1768 | mvn CARG2, #~LJ_TSTR 1785 | mvn CARG2, #~LJ_TSTR
1769 | b ->fff_restv 1786 | b ->fff_restv
1770 | 1787 |
1771 |.ffunc string_rep // Only handle the 1-char case inline. 1788 |.macro ffstring_op, name
1772 | ffgccheck 1789 | .ffunc string_ .. name
1773 | ldrd CARG12, [BASE]
1774 | ldrd CARG34, [BASE, #8]
1775 | cmp NARGS8:RC, #16
1776 | bne ->fff_fallback // Exactly 2 arguments
1777 | checktp CARG2, LJ_TSTR
1778 | checktpeq CARG4, LJ_TISNUM
1779 | bne ->fff_fallback
1780 | subs CARG4, CARG3, #1
1781 | ldr CARG2, STR:CARG1->len
1782 | blt ->fff_emptystr // Count <= 0?
1783 | cmp CARG2, #1
1784 | blo ->fff_emptystr // Zero-length string?
1785 | bne ->fff_fallback // Fallback for > 1-char strings.
1786 | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)]
1787 | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)]
1788 | ldr CARG1, STR:CARG1[1]
1789 | cmp RB, CARG3
1790 | blo ->fff_fallback
1791 |1: // Fill buffer with char.
1792 | strb CARG1, [CARG2, CARG4]
1793 | subs CARG4, CARG4, #1
1794 | bge <1
1795 | b ->fff_newstr
1796 |
1797 |.ffunc string_reverse
1798 | ffgccheck 1790 | ffgccheck
1799 | ldrd CARG12, [BASE] 1791 | ldr CARG3, [BASE, #4]
1800 | cmp NARGS8:RC, #8 1792 | cmp NARGS8:RC, #8
1793 | ldr STR:CARG2, [BASE]
1801 | blo ->fff_fallback 1794 | blo ->fff_fallback
1802 | checkstr CARG2, ->fff_fallback 1795 | sub SBUF:CARG1, DISPATCH, #-DISPATCH_GL(tmpbuf)
1803 | ldr CARG3, STR:CARG1->len 1796 | checkstr CARG3, ->fff_fallback
1804 | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)] 1797 | ldr CARG4, SBUF:CARG1->b
1805 | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)] 1798 | str BASE, L->base
1806 | mov CARG4, CARG3 1799 | str PC, SAVE_PC
1807 | add CARG1, STR:CARG1, #sizeof(GCstr) 1800 | str L, SBUF:CARG1->L
1808 | cmp RB, CARG3 1801 | str CARG4, SBUF:CARG1->p
1809 | blo ->fff_fallback 1802 | bl extern lj_buf_putstr_ .. name
1810 |1: // Reverse string copy. 1803 | bl extern lj_buf_tostr
1811 | ldrb RB, [CARG1], #1 1804 | b ->fff_resstr
1812 | subs CARG4, CARG4, #1
1813 | blt ->fff_newstr
1814 | strb RB, [CARG2, CARG4]
1815 | b <1
1816 |
1817 |.macro ffstring_case, name, lo
1818 | .ffunc name
1819 | ffgccheck
1820 | ldrd CARG12, [BASE]
1821 | cmp NARGS8:RC, #8
1822 | blo ->fff_fallback
1823 | checkstr CARG2, ->fff_fallback
1824 | ldr CARG3, STR:CARG1->len
1825 | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)]
1826 | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)]
1827 | mov CARG4, #0
1828 | add CARG1, STR:CARG1, #sizeof(GCstr)
1829 | cmp RB, CARG3
1830 | blo ->fff_fallback
1831 |1: // ASCII case conversion.
1832 | ldrb RB, [CARG1, CARG4]
1833 | cmp CARG4, CARG3
1834 | bhs ->fff_newstr
1835 | sub RC, RB, #lo
1836 | cmp RC, #26
1837 | eorlo RB, RB, #0x20
1838 | strb RB, [CARG2, CARG4]
1839 | add CARG4, CARG4, #1
1840 | b <1
1841 |.endmacro 1805 |.endmacro
1842 | 1806 |
1843 |ffstring_case string_lower, 65 1807 |ffstring_op reverse
1844 |ffstring_case string_upper, 97 1808 |ffstring_op lower
1845 | 1809 |ffstring_op upper
1846 |//-- Table library ------------------------------------------------------
1847 |
1848 |.ffunc_1 table_getn
1849 | checktab CARG2, ->fff_fallback
1850 | .IOS mov RA, BASE
1851 | bl extern lj_tab_len // (GCtab *t)
1852 | // Returns uint32_t (but less than 2^31).
1853 | .IOS mov BASE, RA
1854 | mvn CARG2, #~LJ_TISNUM
1855 | b ->fff_restv
1856 | 1810 |
1857 |//-- Bit library -------------------------------------------------------- 1811 |//-- Bit library --------------------------------------------------------
1858 | 1812 |
@@ -2127,6 +2081,69 @@ static void build_subroutines(BuildCtx *ctx)
2127 | ldr INS, [PC, #-4] 2081 | ldr INS, [PC, #-4]
2128 | bx CRET1 2082 | bx CRET1
2129 | 2083 |
2084 |->cont_stitch: // Trace stitching.
2085 |.if JIT
2086 | // RA = resultptr, CARG4 = meta base
2087 | ldr RB, SAVE_MULTRES
2088 | ldr INS, [PC, #-4]
2089 | ldr CARG3, [CARG4, #-24] // Save previous trace number.
2090 | subs RB, RB, #8
2091 | decode_RA8 RC, INS // Call base.
2092 | beq >2
2093 |1: // Move results down.
2094 | ldrd CARG12, [RA]
2095 | add RA, RA, #8
2096 | subs RB, RB, #8
2097 | strd CARG12, [BASE, RC]
2098 | add RC, RC, #8
2099 | bne <1
2100 |2:
2101 | decode_RA8 RA, INS
2102 | decode_RB8 RB, INS
2103 | add RA, RA, RB
2104 | ldr CARG1, [DISPATCH, #DISPATCH_J(trace)]
2105 |3:
2106 | cmp RA, RC
2107 | mvn CARG2, #~LJ_TNIL
2108 | bhi >9 // More results wanted?
2109 |
2110 | ldr TRACE:RA, [CARG1, CARG3, lsl #2]
2111 | cmp TRACE:RA, #0
2112 | beq ->cont_nop
2113 | ldrh RC, TRACE:RA->link
2114 | cmp RC, CARG3
2115 | beq ->cont_nop // Blacklisted.
2116 | cmp RC, #0
2117 | bne =>BC_JLOOP // Jump to stitched trace.
2118 |
2119 | // Stitch a new trace to the previous trace.
2120 | str CARG3, [DISPATCH, #DISPATCH_J(exitno)]
2121 | str L, [DISPATCH, #DISPATCH_J(L)]
2122 | str BASE, L->base
2123 | sub CARG1, DISPATCH, #-GG_DISP2J
2124 | mov CARG2, PC
2125 | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2126 | ldr BASE, L->base
2127 | b ->cont_nop
2128 |
2129 |9: // Fill up results with nil.
2130 | strd CARG12, [BASE, RC]
2131 | add RC, RC, #8
2132 | b <3
2133 |.endif
2134 |
2135 |->vm_profhook: // Dispatch target for profiler hook.
2136#if LJ_HASPROFILE
2137 | mov CARG1, L
2138 | str BASE, L->base
2139 | mov CARG2, PC
2140 | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2141 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2142 | ldr BASE, L->base
2143 | sub PC, PC, #4
2144 | b ->cont_nop
2145#endif
2146 |
2130 |//----------------------------------------------------------------------- 2147 |//-----------------------------------------------------------------------
2131 |//-- Trace exit handler ------------------------------------------------- 2148 |//-- Trace exit handler -------------------------------------------------
2132 |//----------------------------------------------------------------------- 2149 |//-----------------------------------------------------------------------
@@ -2151,14 +2168,14 @@ static void build_subroutines(BuildCtx *ctx)
2151 | add CARG1, CARG1, CARG2, asr #6 2168 | add CARG1, CARG1, CARG2, asr #6
2152 | ldr CARG2, [lr, #4] // Load exit stub group offset. 2169 | ldr CARG2, [lr, #4] // Load exit stub group offset.
2153 | sub CARG1, CARG1, lr 2170 | sub CARG1, CARG1, lr
2154 | ldr L, [DISPATCH, #DISPATCH_GL(jit_L)] 2171 | ldr L, [DISPATCH, #DISPATCH_GL(cur_L)]
2155 | add CARG1, CARG2, CARG1, lsr #2 // Compute exit number. 2172 | add CARG1, CARG2, CARG1, lsr #2 // Compute exit number.
2156 | ldr BASE, [DISPATCH, #DISPATCH_GL(jit_base)] 2173 | ldr BASE, [DISPATCH, #DISPATCH_GL(jit_base)]
2157 | str CARG1, [DISPATCH, #DISPATCH_J(exitno)] 2174 | str CARG1, [DISPATCH, #DISPATCH_J(exitno)]
2158 | mov CARG4, #0 2175 | mov CARG4, #0
2159 | str L, [DISPATCH, #DISPATCH_J(L)]
2160 | str BASE, L->base 2176 | str BASE, L->base
2161 | str CARG4, [DISPATCH, #DISPATCH_GL(jit_L)] 2177 | str L, [DISPATCH, #DISPATCH_J(L)]
2178 | str CARG4, [DISPATCH, #DISPATCH_GL(jit_base)]
2162 | sub CARG1, DISPATCH, #-GG_DISP2J 2179 | sub CARG1, DISPATCH, #-GG_DISP2J
2163 | mov CARG2, sp 2180 | mov CARG2, sp
2164 | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) 2181 | bl extern lj_trace_exit // (jit_State *J, ExitState *ex)
@@ -2177,13 +2194,14 @@ static void build_subroutines(BuildCtx *ctx)
2177 | ldr L, SAVE_L 2194 | ldr L, SAVE_L
2178 |1: 2195 |1:
2179 | cmp CARG1, #0 2196 | cmp CARG1, #0
2180 | blt >3 // Check for error from exit. 2197 | blt >9 // Check for error from exit.
2181 | lsl RC, CARG1, #3 2198 | lsl RC, CARG1, #3
2182 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] 2199 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
2183 | str RC, SAVE_MULTRES 2200 | str RC, SAVE_MULTRES
2184 | mov CARG3, #0 2201 | mov CARG3, #0
2202 | str BASE, L->base
2185 | ldr CARG2, LFUNC:CARG2->field_pc 2203 | ldr CARG2, LFUNC:CARG2->field_pc
2186 | str CARG3, [DISPATCH, #DISPATCH_GL(jit_L)] 2204 | str CARG3, [DISPATCH, #DISPATCH_GL(jit_base)]
2187 | mv_vmstate CARG4, INTERP 2205 | mv_vmstate CARG4, INTERP
2188 | ldr KBASE, [CARG2, #PC2PROTO(k)] 2206 | ldr KBASE, [CARG2, #PC2PROTO(k)]
2189 | // Modified copy of ins_next which handles function header dispatch, too. 2207 | // Modified copy of ins_next which handles function header dispatch, too.
@@ -2192,15 +2210,32 @@ static void build_subroutines(BuildCtx *ctx)
2192 | ldr INS, [PC], #4 2210 | ldr INS, [PC], #4
2193 | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. 2211 | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8.
2194 | st_vmstate CARG4 2212 | st_vmstate CARG4
2213 | cmp OP, #BC_FUNCC+2 // Fast function?
2214 | bhs >4
2215 |2:
2195 | cmp OP, #BC_FUNCF // Function header? 2216 | cmp OP, #BC_FUNCF // Function header?
2196 | ldr OP, [DISPATCH, OP, lsl #2] 2217 | ldr OP, [DISPATCH, OP, lsl #2]
2197 | decode_RA8 RA, INS 2218 | decode_RA8 RA, INS
2198 | lsrlo RC, INS, #16 // No: Decode operands A*8 and D. 2219 | lsrlo RC, INS, #16 // No: Decode operands A*8 and D.
2199 | subhs RC, RC, #8 2220 | subhs RC, RC, #8
2200 | addhs RA, RA, BASE // Yes: RA = BASE+framesize*8, RC = nargs*8 2221 | addhs RA, RA, BASE // Yes: RA = BASE+framesize*8, RC = nargs*8
2222 | ldrhs CARG3, [BASE, FRAME_FUNC]
2201 | bx OP 2223 | bx OP
2202 | 2224 |
2203 |3: // Rethrow error from the right C frame. 2225 |4: // Check frame below fast function.
2226 | ldr CARG1, [BASE, FRAME_PC]
2227 | ands CARG2, CARG1, #FRAME_TYPE
2228 | bne <2 // Trace stitching continuation?
2229 | // Otherwise set KBASE for Lua function below fast function.
2230 | ldr CARG3, [CARG1, #-4]
2231 | decode_RA8 CARG1, CARG3
2232 | sub CARG2, BASE, CARG1
2233 | ldr LFUNC:CARG3, [CARG2, #-16]
2234 | ldr CARG3, LFUNC:CARG3->field_pc
2235 | ldr KBASE, [CARG3, #PC2PROTO(k)]
2236 | b <2
2237 |
2238 |9: // Rethrow error from the right C frame.
2204 | rsb CARG2, CARG1, #0 2239 | rsb CARG2, CARG1, #0
2205 | mov CARG1, L 2240 | mov CARG1, L
2206 | bl extern lj_err_throw // (lua_State *L, int errcode) 2241 | bl extern lj_err_throw // (lua_State *L, int errcode)
@@ -2833,6 +2868,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2833 | ins_next 2868 | ins_next
2834 break; 2869 break;
2835 2870
2871 case BC_ISTYPE:
2872 | // RA = src*8, RC = -type
2873 | ldrd CARG12, [BASE, RA]
2874 | ins_next1
2875 | cmn CARG2, RC
2876 | ins_next2
2877 | bne ->vmeta_istype
2878 | ins_next3
2879 break;
2880 case BC_ISNUM:
2881 | // RA = src*8, RC = -(TISNUM-1)
2882 | ldrd CARG12, [BASE, RA]
2883 | ins_next1
2884 | checktp CARG2, LJ_TISNUM
2885 | ins_next2
2886 | bhs ->vmeta_istype
2887 | ins_next3
2888 break;
2889
2836 /* -- Unary ops --------------------------------------------------------- */ 2890 /* -- Unary ops --------------------------------------------------------- */
2837 2891
2838 case BC_MOV: 2892 case BC_MOV:
@@ -3503,6 +3557,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3503 | bne <1 // 'no __index' flag set: done. 3557 | bne <1 // 'no __index' flag set: done.
3504 | b ->vmeta_tgetb 3558 | b ->vmeta_tgetb
3505 break; 3559 break;
3560 case BC_TGETR:
3561 | decode_RB8 RB, INS
3562 | decode_RC8 RC, INS
3563 | // RA = dst*8, RB = table*8, RC = key*8
3564 | ldr TAB:CARG1, [BASE, RB]
3565 | ldr CARG2, [BASE, RC]
3566 | ldr CARG4, TAB:CARG1->array
3567 | ldr CARG3, TAB:CARG1->asize
3568 | add CARG4, CARG4, CARG2, lsl #3
3569 | cmp CARG2, CARG3 // In array part?
3570 | bhs ->vmeta_tgetr
3571 | ldrd CARG12, [CARG4]
3572 |->BC_TGETR_Z:
3573 | ins_next1
3574 | ins_next2
3575 | strd CARG12, [BASE, RA]
3576 | ins_next3
3577 break;
3506 3578
3507 case BC_TSETV: 3579 case BC_TSETV:
3508 | decode_RB8 RB, INS 3580 | decode_RB8 RB, INS
@@ -3673,6 +3745,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3673 | barrierback TAB:CARG1, INS, CARG3 3745 | barrierback TAB:CARG1, INS, CARG3
3674 | b <2 3746 | b <2
3675 break; 3747 break;
3748 case BC_TSETR:
3749 | decode_RB8 RB, INS
3750 | decode_RC8 RC, INS
3751 | // RA = src*8, RB = table*8, RC = key*8
3752 | ldr TAB:CARG2, [BASE, RB]
3753 | ldr CARG3, [BASE, RC]
3754 | ldrb INS, TAB:CARG2->marked
3755 | ldr CARG1, TAB:CARG2->array
3756 | ldr CARG4, TAB:CARG2->asize
3757 | tst INS, #LJ_GC_BLACK // isblack(table)
3758 | add CARG1, CARG1, CARG3, lsl #3
3759 | bne >7
3760 |2:
3761 | cmp CARG3, CARG4 // In array part?
3762 | bhs ->vmeta_tsetr
3763 |->BC_TSETR_Z:
3764 | ldrd CARG34, [BASE, RA]
3765 | ins_next1
3766 | ins_next2
3767 | strd CARG34, [CARG1]
3768 | ins_next3
3769 |
3770 |7: // Possible table write barrier for the value. Skip valiswhite check.
3771 | barrierback TAB:CARG2, INS, RB
3772 | b <2
3773 break;
3676 3774
3677 case BC_TSETM: 3775 case BC_TSETM:
3678 | // RA = base*8 (table at base-1), RC = num_const (start index) 3776 | // RA = base*8 (table at base-1), RC = num_const (start index)
@@ -4270,7 +4368,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4270 | st_vmstate CARG2 4368 | st_vmstate CARG2
4271 | ldr RA, TRACE:RC->mcode 4369 | ldr RA, TRACE:RC->mcode
4272 | str BASE, [DISPATCH, #DISPATCH_GL(jit_base)] 4370 | str BASE, [DISPATCH, #DISPATCH_GL(jit_base)]
4273 | str L, [DISPATCH, #DISPATCH_GL(jit_L)] 4371 | str L, [DISPATCH, #DISPATCH_GL(tmpbuf.L)]
4274 | bx RA 4372 | bx RA
4275 |.endif 4373 |.endif
4276 break; 4374 break;
@@ -4388,6 +4486,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4388 | ldr BASE, L->base 4486 | ldr BASE, L->base
4389 | mv_vmstate CARG3, INTERP 4487 | mv_vmstate CARG3, INTERP
4390 | ldr CRET2, L->top 4488 | ldr CRET2, L->top
4489 | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
4391 | lsl RC, CRET1, #3 4490 | lsl RC, CRET1, #3
4392 | st_vmstate CARG3 4491 | st_vmstate CARG3
4393 | ldr PC, [BASE, FRAME_PC] 4492 | ldr PC, [BASE, FRAME_PC]
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
new file mode 100644
index 00000000..a31cbb3a
--- /dev/null
+++ b/src/vm_arm64.dasc
@@ -0,0 +1,3763 @@
1|// Low-level VM code for ARM64 CPUs.
2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
4|
5|.arch arm64
6|.section code_op, code_sub
7|
8|.actionlist build_actionlist
9|.globals GLOB_
10|.globalnames globnames
11|.externnames extnames
12|
13|// Note: The ragged indentation of the instructions is intentional.
14|// The starting columns indicate data dependencies.
15|
16|//-----------------------------------------------------------------------
17|
18|// ARM64 registers and the AAPCS64 ABI 1.0 at a glance:
19|//
20|// x0-x17 temp, x19-x28 callee-saved, x29 fp, x30 lr
21|// x18 is reserved on most platforms. Don't use it, save it or restore it.
22|// x31 doesn't exist. Register number 31 either means xzr/wzr (zero) or sp,
23|// depending on the instruction.
24|// v0-v7 temp, v8-v15 callee-saved (only d8-d15 preserved), v16-v31 temp
25|//
26|// x0-x7/v0-v7 hold parameters and results.
27|
28|// Fixed register assignments for the interpreter.
29|
30|// The following must be C callee-save.
31|.define BASE, x19 // Base of current Lua stack frame.
32|.define KBASE, x20 // Constants of current Lua function.
33|.define PC, x21 // Next PC.
34|.define GLREG, x22 // Global state.
35|.define LREG, x23 // Register holding lua_State (also in SAVE_L).
36|.define TISNUM, x24 // Constant LJ_TISNUM << 47.
37|.define TISNUMhi, x25 // Constant LJ_TISNUM << 15.
38|.define TISNIL, x26 // Constant -1LL.
39|.define fp, x29 // Yes, we have to maintain a frame pointer.
40|
41|.define ST_INTERP, w26 // Constant -1.
42|
43|// The following temporaries are not saved across C calls, except for RA/RC.
44|.define RA, x27
45|.define RC, x28
46|.define RB, x17
47|.define RAw, w27
48|.define RCw, w28
49|.define RBw, w17
50|.define INS, x16
51|.define INSw, w16
52|.define ITYPE, x15
53|.define TMP0, x8
54|.define TMP1, x9
55|.define TMP2, x10
56|.define TMP3, x11
57|.define TMP0w, w8
58|.define TMP1w, w9
59|.define TMP2w, w10
60|.define TMP3w, w11
61|
62|// Calling conventions. Also used as temporaries.
63|.define CARG1, x0
64|.define CARG2, x1
65|.define CARG3, x2
66|.define CARG4, x3
67|.define CARG5, x4
68|.define CARG1w, w0
69|.define CARG2w, w1
70|.define CARG3w, w2
71|.define CARG4w, w3
72|.define CARG5w, w4
73|
74|.define FARG1, d0
75|.define FARG2, d1
76|
77|.define CRET1, x0
78|.define CRET1w, w0
79|
80|// Stack layout while in interpreter. Must match with lj_frame.h.
81|
82|.define CFRAME_SPACE, 208
83|//----- 16 byte aligned, <-- sp entering interpreter
84|// Unused [sp, #204] // 32 bit values
85|.define SAVE_NRES, [sp, #200]
86|.define SAVE_ERRF, [sp, #196]
87|.define SAVE_MULTRES, [sp, #192]
88|.define TMPD, [sp, #184] // 64 bit values
89|.define SAVE_L, [sp, #176]
90|.define SAVE_PC, [sp, #168]
91|.define SAVE_CFRAME, [sp, #160]
92|.define SAVE_FPR_, 96 // 96+8*8: 64 bit FPR saves
93|.define SAVE_GPR_, 16 // 16+10*8: 64 bit GPR saves
94|.define SAVE_LR, [sp, #8]
95|.define SAVE_FP, [sp]
96|//----- 16 byte aligned, <-- sp while in interpreter.
97|
98|.define TMPDofs, #184
99|
100|.macro save_, gpr1, gpr2, fpr1, fpr2
101| stp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8]
102| stp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8]
103|.endmacro
104|.macro rest_, gpr1, gpr2, fpr1, fpr2
105| ldp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8]
106| ldp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8]
107|.endmacro
108|
109|.macro saveregs
110| stp fp, lr, [sp, #-CFRAME_SPACE]!
111| add fp, sp, #0
112| stp x19, x20, [sp, # SAVE_GPR_]
113| save_ 21, 22, 8, 9
114| save_ 23, 24, 10, 11
115| save_ 25, 26, 12, 13
116| save_ 27, 28, 14, 15
117|.endmacro
118|.macro restoreregs
119| ldp x19, x20, [sp, # SAVE_GPR_]
120| rest_ 21, 22, 8, 9
121| rest_ 23, 24, 10, 11
122| rest_ 25, 26, 12, 13
123| rest_ 27, 28, 14, 15
124| ldp fp, lr, [sp], # CFRAME_SPACE
125|.endmacro
126|
127|// Type definitions. Some of these are only used for documentation.
128|.type L, lua_State, LREG
129|.type GL, global_State, GLREG
130|.type TVALUE, TValue
131|.type GCOBJ, GCobj
132|.type STR, GCstr
133|.type TAB, GCtab
134|.type LFUNC, GCfuncL
135|.type CFUNC, GCfuncC
136|.type PROTO, GCproto
137|.type UPVAL, GCupval
138|.type NODE, Node
139|.type NARGS8, int
140|.type TRACE, GCtrace
141|.type SBUF, SBuf
142|
143|//-----------------------------------------------------------------------
144|
145|// Trap for not-yet-implemented parts.
146|.macro NYI; brk; .endmacro
147|
148|//-----------------------------------------------------------------------
149|
150|// Access to frame relative to BASE.
151|.define FRAME_FUNC, #-16
152|.define FRAME_PC, #-8
153|
154|.macro decode_RA, dst, ins; ubfx dst, ins, #8, #8; .endmacro
155|.macro decode_RB, dst, ins; ubfx dst, ins, #24, #8; .endmacro
156|.macro decode_RC, dst, ins; ubfx dst, ins, #16, #8; .endmacro
157|.macro decode_RD, dst, ins; ubfx dst, ins, #16, #16; .endmacro
158|.macro decode_RC8RD, dst, src; ubfiz dst, src, #3, #8; .endmacro
159|
160|// Instruction decode+dispatch.
161|.macro ins_NEXT
162| ldr INSw, [PC], #4
163| add TMP1, GL, INS, uxtb #3
164| decode_RA RA, INS
165| ldr TMP0, [TMP1, #GG_G2DISP]
166| decode_RD RC, INS
167| br TMP0
168|.endmacro
169|
170|// Instruction footer.
171|.if 1
172| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
173| .define ins_next, ins_NEXT
174| .define ins_next_, ins_NEXT
175|.else
176| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
177| // Affects only certain kinds of benchmarks (and only with -j off).
178| .macro ins_next
179| b ->ins_next
180| .endmacro
181| .macro ins_next_
182| ->ins_next:
183| ins_NEXT
184| .endmacro
185|.endif
186|
187|// Call decode and dispatch.
188|.macro ins_callt
189| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
190| ldr PC, LFUNC:CARG3->pc
191| ldr INSw, [PC], #4
192| add TMP1, GL, INS, uxtb #3
193| decode_RA RA, INS
194| ldr TMP0, [TMP1, #GG_G2DISP]
195| add RA, BASE, RA, lsl #3
196| br TMP0
197|.endmacro
198|
199|.macro ins_call
200| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, PC = caller PC
201| str PC, [BASE, FRAME_PC]
202| ins_callt
203|.endmacro
204|
205|//-----------------------------------------------------------------------
206|
207|// Macros to check the TValue type and extract the GCobj. Branch on failure.
208|.macro checktp, reg, tp, target
209| asr ITYPE, reg, #47
210| cmn ITYPE, #-tp
211| and reg, reg, #LJ_GCVMASK
212| bne target
213|.endmacro
214|.macro checktp, dst, reg, tp, target
215| asr ITYPE, reg, #47
216| cmn ITYPE, #-tp
217| and dst, reg, #LJ_GCVMASK
218| bne target
219|.endmacro
220|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro
221|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro
222|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro
223|.macro checkint, reg, target
224| cmp TISNUMhi, reg, lsr #32
225| bne target
226|.endmacro
227|.macro checknum, reg, target
228| cmp TISNUMhi, reg, lsr #32
229| bls target
230|.endmacro
231|.macro checknumber, reg, target
232| cmp TISNUMhi, reg, lsr #32
233| blo target
234|.endmacro
235|
236|.macro mov_false, reg; movn reg, #0x8000, lsl #32; .endmacro
237|.macro mov_true, reg; movn reg, #0x0001, lsl #48; .endmacro
238|
239#define GL_J(field) (GG_OFS(J) + (int)offsetof(jit_State, field))
240|
241#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
242|
243|.macro hotcheck, delta
244| NYI
245|.endmacro
246|
247|.macro hotloop
248| hotcheck HOTCOUNT_LOOP
249| blo ->vm_hotloop
250|.endmacro
251|
252|.macro hotcall
253| hotcheck HOTCOUNT_CALL
254| blo ->vm_hotcall
255|.endmacro
256|
257|// Set current VM state.
258|.macro mv_vmstate, reg, st; movn reg, #LJ_VMST_..st; .endmacro
259|.macro st_vmstate, reg; str reg, GL->vmstate; .endmacro
260|
261|// Move table write barrier back. Overwrites mark and tmp.
262|.macro barrierback, tab, mark, tmp
263| ldr tmp, GL->gc.grayagain
264| and mark, mark, #~LJ_GC_BLACK // black2gray(tab)
265| str tab, GL->gc.grayagain
266| strb mark, tab->marked
267| str tmp, tab->gclist
268|.endmacro
269|
270|//-----------------------------------------------------------------------
271
272#if !LJ_DUALNUM
273#error "Only dual-number mode supported for ARM64 target"
274#endif
275
276/* Generate subroutines used by opcodes and other parts of the VM. */
277/* The .code_sub section should be last to help static branch prediction. */
278static void build_subroutines(BuildCtx *ctx)
279{
280 |.code_sub
281 |
282 |//-----------------------------------------------------------------------
283 |//-- Return handling ----------------------------------------------------
284 |//-----------------------------------------------------------------------
285 |
286 |->vm_returnp:
287 | // See vm_return. Also: RB = previous base.
288 | tbz PC, #2, ->cont_dispatch // (PC & FRAME_P) == 0?
289 |
290 | // Return from pcall or xpcall fast func.
291 | ldr PC, [RB, FRAME_PC] // Fetch PC of previous frame.
292 | mov_true TMP0
293 | mov BASE, RB
294 | // Prepending may overwrite the pcall frame, so do it at the end.
295 | str TMP0, [RA, #-8]! // Prepend true to results.
296 |
297 |->vm_returnc:
298 | adds RC, RC, #8 // RC = (nresults+1)*8.
299 | mov CRET1, #LUA_YIELD
300 | beq ->vm_unwind_c_eh
301 | str RCw, SAVE_MULTRES
302 | ands CARG1, PC, #FRAME_TYPE
303 | beq ->BC_RET_Z // Handle regular return to Lua.
304 |
305 |->vm_return:
306 | // BASE = base, RA = resultptr, RC/MULTRES = (nresults+1)*8, PC = return
307 | // CARG1 = PC & FRAME_TYPE
308 | and RB, PC, #~FRAME_TYPEP
309 | cmp CARG1, #FRAME_C
310 | sub RB, BASE, RB // RB = previous base.
311 | bne ->vm_returnp
312 |
313 | str RB, L->base
314 | ldrsw CARG2, SAVE_NRES // CARG2 = nresults+1.
315 | mv_vmstate TMP0w, C
316 | sub BASE, BASE, #16
317 | subs TMP2, RC, #8
318 | st_vmstate TMP0w
319 | beq >2
320 |1:
321 | subs TMP2, TMP2, #8
322 | ldr TMP0, [RA], #8
323 | str TMP0, [BASE], #8
324 | bne <1
325 |2:
326 | cmp RC, CARG2, lsl #3 // More/less results wanted?
327 | bne >6
328 |3:
329 | str BASE, L->top // Store new top.
330 |
331 |->vm_leave_cp:
332 | ldr RC, SAVE_CFRAME // Restore previous C frame.
333 | mov CRET1, #0 // Ok return status for vm_pcall.
334 | str RC, L->cframe
335 |
336 |->vm_leave_unw:
337 | restoreregs
338 | ret
339 |
340 |6:
341 | bgt >7 // Less results wanted?
342 | // More results wanted. Check stack size and fill up results with nil.
343 | ldr CARG3, L->maxstack
344 | cmp BASE, CARG3
345 | bhs >8
346 | str TISNIL, [BASE], #8
347 | add RC, RC, #8
348 | b <2
349 |
350 |7: // Less results wanted.
351 | cbz CARG2, <3 // LUA_MULTRET+1 case?
352 | sub CARG1, RC, CARG2, lsl #3
353 | sub BASE, BASE, CARG1 // Shrink top.
354 | b <3
355 |
356 |8: // Corner case: need to grow stack for filling up results.
357 | // This can happen if:
358 | // - A C function grows the stack (a lot).
359 | // - The GC shrinks the stack in between.
360 | // - A return back from a lua_call() with (high) nresults adjustment.
361 | str BASE, L->top // Save current top held in BASE (yes).
362 | mov CARG1, L
363 | bl extern lj_state_growstack // (lua_State *L, int n)
364 | ldr BASE, L->top // Need the (realloced) L->top in BASE.
365 | ldrsw CARG2, SAVE_NRES
366 | b <2
367 |
368 |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
369 | // (void *cframe, int errcode)
370 | mov sp, CARG1
371 | mov CRET1, CARG2
372 |->vm_unwind_c_eh: // Landing pad for external unwinder.
373 | ldr L, SAVE_L
374 | mv_vmstate TMP0w, C
375 | ldr GL, L->glref
376 | st_vmstate TMP0w
377 | b ->vm_leave_unw
378 |
379 |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
380 | // (void *cframe)
381 | and sp, CARG1, #CFRAME_RAWMASK
382 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
383 | ldr L, SAVE_L
384 | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
385 | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
386 | movn TISNIL, #0
387 | mov RC, #16 // 2 results: false + error message.
388 | ldr BASE, L->base
389 | ldr GL, L->glref // Setup pointer to global state.
390 | mov_false TMP0
391 | sub RA, BASE, #8 // Results start at BASE-8.
392 | ldr PC, [BASE, FRAME_PC] // Fetch PC of previous frame.
393 | str TMP0, [BASE, #-8] // Prepend false to error message.
394 | st_vmstate ST_INTERP
395 | b ->vm_returnc
396 |
397 |//-----------------------------------------------------------------------
398 |//-- Grow stack for calls -----------------------------------------------
399 |//-----------------------------------------------------------------------
400 |
401 |->vm_growstack_c: // Grow stack for C function.
402 | // CARG1 = L
403 | mov CARG2, #LUA_MINSTACK
404 | b >2
405 |
406 |->vm_growstack_l: // Grow stack for Lua function.
407 | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC
408 | add RC, BASE, RC
409 | sub RA, RA, BASE
410 | mov CARG1, L
411 | stp BASE, RC, L->base
412 | add PC, PC, #4 // Must point after first instruction.
413 | lsr CARG2, RA, #3
414 |2:
415 | // L->base = new base, L->top = top
416 | str PC, SAVE_PC
417 | bl extern lj_state_growstack // (lua_State *L, int n)
418 | ldp BASE, RC, L->base
419 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
420 | sub NARGS8:RC, RC, BASE
421 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
422 | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
423 | ins_callt // Just retry the call.
424 |
425 |//-----------------------------------------------------------------------
426 |//-- Entry points into the assembler VM ---------------------------------
427 |//-----------------------------------------------------------------------
428 |
429 |->vm_resume: // Setup C frame and resume thread.
430 | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
431 | saveregs
432 | mov L, CARG1
433 | ldr GL, L->glref // Setup pointer to global state.
434 | mov BASE, CARG2
435 | str L, SAVE_L
436 | mov PC, #FRAME_CP
437 | str wzr, SAVE_NRES
438 | add TMP0, sp, #CFRAME_RESUME
439 | ldrb TMP1w, L->status
440 | str wzr, SAVE_ERRF
441 | str L, SAVE_PC // Any value outside of bytecode is ok.
442 | str xzr, SAVE_CFRAME
443 | str TMP0, L->cframe
444 | cbz TMP1w, >3
445 |
446 | // Resume after yield (like a return).
447 | str L, GL->cur_L
448 | mov RA, BASE
449 | ldp BASE, CARG1, L->base
450 | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
451 | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
452 | ldr PC, [BASE, FRAME_PC]
453 | strb wzr, L->status
454 | movn TISNIL, #0
455 | sub RC, CARG1, BASE
456 | ands CARG1, PC, #FRAME_TYPE
457 | add RC, RC, #8
458 | st_vmstate ST_INTERP
459 | str RCw, SAVE_MULTRES
460 | beq ->BC_RET_Z
461 | b ->vm_return
462 |
463 |->vm_pcall: // Setup protected C frame and enter VM.
464 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
465 | saveregs
466 | mov PC, #FRAME_CP
467 | str CARG4w, SAVE_ERRF
468 | b >1
469 |
470 |->vm_call: // Setup C frame and enter VM.
471 | // (lua_State *L, TValue *base, int nres1)
472 | saveregs
473 | mov PC, #FRAME_C
474 |
475 |1: // Entry point for vm_pcall above (PC = ftype).
476 | ldr RC, L:CARG1->cframe
477 | str CARG3w, SAVE_NRES
478 | mov L, CARG1
479 | str CARG1, SAVE_L
480 | ldr GL, L->glref // Setup pointer to global state.
481 | mov BASE, CARG2
482 | str CARG1, SAVE_PC // Any value outside of bytecode is ok.
483 | str RC, SAVE_CFRAME
484 | str fp, L->cframe // Add our C frame to cframe chain.
485 |
486 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
487 | str L, GL->cur_L
488 | ldp RB, CARG1, L->base // RB = old base (for vmeta_call).
489 | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
490 | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
491 | add PC, PC, BASE
492 | movn TISNIL, #0
493 | sub PC, PC, RB // PC = frame delta + frame type
494 | sub NARGS8:RC, CARG1, BASE
495 | st_vmstate ST_INTERP
496 |
497 |->vm_call_dispatch:
498 | // RB = old base, BASE = new base, RC = nargs*8, PC = caller PC
499 | ldr CARG3, [BASE, FRAME_FUNC]
500 | checkfunc CARG3, ->vmeta_call
501 |
502 |->vm_call_dispatch_f:
503 | ins_call
504 | // BASE = new base, CARG3 = func, RC = nargs*8, PC = caller PC
505 |
506 |->vm_cpcall: // Setup protected C frame, call C.
507 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
508 | saveregs
509 | mov L, CARG1
510 | ldr RA, L:CARG1->stack
511 | str CARG1, SAVE_L
512 | ldr GL, L->glref // Setup pointer to global state.
513 | ldr RB, L->top
514 | str CARG1, SAVE_PC // Any value outside of bytecode is ok.
515 | ldr RC, L->cframe
516 | sub RA, RA, RB // Compute -savestack(L, L->top).
517 | str RAw, SAVE_NRES // Neg. delta means cframe w/o frame.
518 | str wzr, SAVE_ERRF // No error function.
519 | str RC, SAVE_CFRAME
520 | str fp, L->cframe // Add our C frame to cframe chain.
521 | str L, GL->cur_L
522 | blr CARG4 // (lua_State *L, lua_CFunction func, void *ud)
523 | mov BASE, CRET1
524 | mov PC, #FRAME_CP
525 | cbnz BASE, <3 // Else continue with the call.
526 | b ->vm_leave_cp // No base? Just remove C frame.
527 |
528 |//-----------------------------------------------------------------------
529 |//-- Metamethod handling ------------------------------------------------
530 |//-----------------------------------------------------------------------
531 |
532 |//-- Continuation dispatch ----------------------------------------------
533 |
534 |->cont_dispatch:
535 | // BASE = meta base, RA = resultptr, RC = (nresults+1)*8
536 | ldr LFUNC:CARG3, [RB, FRAME_FUNC]
537 | ldr CARG1, [BASE, #-32] // Get continuation.
538 | mov CARG4, BASE
539 | mov BASE, RB // Restore caller BASE.
540 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
541 |.if FFI
542 | cmp CARG1, #1
543 |.endif
544 | ldr PC, [CARG4, #-24] // Restore PC from [cont|PC].
545 | ldr CARG3, LFUNC:CARG3->pc
546 | add TMP0, RA, RC
547 | str TISNIL, [TMP0, #-8] // Ensure one valid arg.
548 |.if FFI
549 | bls >1
550 |.endif
551 | ldr KBASE, [CARG3, #PC2PROTO(k)]
552 | // BASE = base, RA = resultptr, CARG4 = meta base
553 | br CARG1
554 |
555 |.if FFI
556 |1:
557 | beq ->cont_ffi_callback // cont = 1: return from FFI callback.
558 | // cont = 0: tailcall from C function.
559 | sub CARG4, CARG4, #32
560 | sub RC, CARG4, BASE
561 | b ->vm_call_tail
562 |.endif
563 |
564 |->cont_cat: // RA = resultptr, CARG4 = meta base
565 | ldr INSw, [PC, #-4]
566 | sub CARG2, CARG4, #32
567 | ldr TMP0, [RA]
568 | str BASE, L->base
569 | decode_RB RB, INS
570 | decode_RA RA, INS
571 | add TMP1, BASE, RB, lsl #3
572 | subs TMP1, CARG2, TMP1
573 | beq >1
574 | str TMP0, [CARG2]
575 | lsr CARG3, TMP1, #3
576 | b ->BC_CAT_Z
577 |
578 |1:
579 | str TMP0, [BASE, RA, lsl #3]
580 | b ->cont_nop
581 |
582 |//-- Table indexing metamethods -----------------------------------------
583 |
584 |->vmeta_tgets1:
585 | movn CARG4, #~LJ_TSTR
586 | add CARG2, BASE, RB, lsl #3
587 | add CARG4, STR:RC, CARG4, lsl #47
588 | b >2
589 |
590 |->vmeta_tgets:
591 | movk CARG2, #(LJ_TTAB>>1)&0xffff, lsl #48
592 | str CARG2, GL->tmptv
593 | add CARG2, GL, #offsetof(global_State, tmptv)
594 |2:
595 | add CARG3, sp, TMPDofs
596 | str CARG4, TMPD
597 | b >1
598 |
599 |->vmeta_tgetb: // RB = table, RC = index
600 | add RC, RC, TISNUM
601 | add CARG2, BASE, RB, lsl #3
602 | add CARG3, sp, TMPDofs
603 | str RC, TMPD
604 | b >1
605 |
606 |->vmeta_tgetv: // RB = table, RC = key
607 | add CARG2, BASE, RB, lsl #3
608 | add CARG3, BASE, RC, lsl #3
609 |1:
610 | str BASE, L->base
611 | mov CARG1, L
612 | str PC, SAVE_PC
613 | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
614 | // Returns TValue * (finished) or NULL (metamethod).
615 | cbz CRET1, >3
616 | ldr TMP0, [CRET1]
617 | str TMP0, [BASE, RA, lsl #3]
618 | ins_next
619 |
620 |3: // Call __index metamethod.
621 | // BASE = base, L->top = new base, stack = cont/func/t/k
622 | sub TMP1, BASE, #FRAME_CONT
623 | ldr BASE, L->top
624 | mov NARGS8:RC, #16 // 2 args for func(t, k).
625 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
626 | str PC, [BASE, #-24] // [cont|PC]
627 | sub PC, BASE, TMP1
628 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
629 | b ->vm_call_dispatch_f
630 |
631 |->vmeta_tgetr:
632 | sxtw CARG2, TMP1w
633 | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
634 | // Returns cTValue * or NULL.
635 | mov TMP0, TISNIL
636 | cbz CRET1, ->BC_TGETR_Z
637 | ldr TMP0, [CRET1]
638 | b ->BC_TGETR_Z
639 |
640 |//-----------------------------------------------------------------------
641 |
642 |->vmeta_tsets1:
643 | movn CARG4, #~LJ_TSTR
644 | add CARG2, BASE, RB, lsl #3
645 | add CARG4, STR:RC, CARG4, lsl #47
646 | b >2
647 |
648 |->vmeta_tsets:
649 | movk CARG2, #(LJ_TTAB>>1)&0xffff, lsl #48
650 | str CARG2, GL->tmptv
651 | add CARG2, GL, #offsetof(global_State, tmptv)
652 |2:
653 | add CARG3, sp, TMPDofs
654 | str CARG4, TMPD
655 | b >1
656 |
657 |->vmeta_tsetb: // RB = table, RC = index
658 | add RC, RC, TISNUM
659 | add CARG2, BASE, RB, lsl #3
660 | add CARG3, sp, TMPDofs
661 | str RC, TMPD
662 | b >1
663 |
664 |->vmeta_tsetv:
665 | add CARG2, BASE, RB, lsl #3
666 | add CARG3, BASE, RC, lsl #3
667 |1:
668 | str BASE, L->base
669 | mov CARG1, L
670 | str PC, SAVE_PC
671 | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
672 | // Returns TValue * (finished) or NULL (metamethod).
673 | ldr TMP0, [BASE, RA, lsl #3]
674 | cbz CRET1, >3
675 | // NOBARRIER: lj_meta_tset ensures the table is not black.
676 | str TMP0, [CRET1]
677 | ins_next
678 |
679 |3: // Call __newindex metamethod.
680 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
681 | sub TMP1, BASE, #FRAME_CONT
682 | ldr BASE, L->top
683 | mov NARGS8:RC, #24 // 3 args for func(t, k, v).
684 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
685 | str TMP0, [BASE, #16] // Copy value to third argument.
686 | str PC, [BASE, #-24] // [cont|PC]
687 | sub PC, BASE, TMP1
688 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
689 | b ->vm_call_dispatch_f
690 |
691 |->vmeta_tsetr:
692 | sxtw CARG3, TMP1w
693 | str BASE, L->base
694 | str PC, SAVE_PC
695 | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
696 | // Returns TValue *.
697 | b ->BC_TSETR_Z
698 |
699 |//-- Comparison metamethods ---------------------------------------------
700 |
701 |->vmeta_comp:
702 | add CARG2, BASE, RA, lsl #3
703 | sub PC, PC, #4
704 | add CARG3, BASE, RC, lsl #3
705 | str BASE, L->base
706 | mov CARG1, L
707 | str PC, SAVE_PC
708 | uxtb CARG4w, INSw
709 | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
710 | // Returns 0/1 or TValue * (metamethod).
711 |3:
712 | cmp CRET1, #1
713 | bhi ->vmeta_binop
714 |4:
715 | ldrh RBw, [PC, #2]
716 | add PC, PC, #4
717 | add RB, PC, RB, lsl #2
718 | sub RB, RB, #0x20000
719 | csel PC, PC, RB, lo
720 |->cont_nop:
721 | ins_next
722 |
723 |->cont_ra: // RA = resultptr
724 | ldr INSw, [PC, #-4]
725 | ldr TMP0, [RA]
726 | decode_RA TMP1, INS
727 | str TMP0, [BASE, TMP1, lsl #3]
728 | b ->cont_nop
729 |
730 |->cont_condt: // RA = resultptr
731 | ldr TMP0, [RA]
732 | mov_true TMP1
733 | cmp TMP1, TMP0 // Branch if result is true.
734 | b <4
735 |
736 |->cont_condf: // RA = resultptr
737 | ldr TMP0, [RA]
738 | mov_false TMP1
739 | cmp TMP0, TMP1 // Branch if result is false.
740 | b <4
741 |
742 |->vmeta_equal:
743 | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV.
744 | and TAB:CARG3, CARG3, #LJ_GCVMASK
745 | sub PC, PC, #4
746 | str BASE, L->base
747 | mov CARG1, L
748 | str PC, SAVE_PC
749 | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
750 | // Returns 0/1 or TValue * (metamethod).
751 | b <3
752 |
753 |->vmeta_equal_cd:
754 |.if FFI
755 | sub PC, PC, #4
756 | str BASE, L->base
757 | mov CARG1, L
758 | mov CARG2, INS
759 | str PC, SAVE_PC
760 | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op)
761 | // Returns 0/1 or TValue * (metamethod).
762 | b <3
763 |.endif
764 |
765 |->vmeta_istype:
766 | sub PC, PC, #4
767 | str BASE, L->base
768 | mov CARG1, L
769 | mov CARG2, RA
770 | mov CARG3, RC
771 | str PC, SAVE_PC
772 | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
773 | b ->cont_nop
774 |
775 |//-- Arithmetic metamethods ---------------------------------------------
776 |
777 |->vmeta_arith_vn:
778 | add CARG3, BASE, RB, lsl #3
779 | add CARG4, KBASE, RC, lsl #3
780 | b >1
781 |
782 |->vmeta_arith_nv:
783 | add CARG4, BASE, RB, lsl #3
784 | add CARG3, KBASE, RC, lsl #3
785 | b >1
786 |
787 |->vmeta_unm:
788 | add CARG3, BASE, RC, lsl #3
789 | mov CARG4, CARG3
790 | b >1
791 |
792 |->vmeta_arith_vv:
793 | add CARG3, BASE, RB, lsl #3
794 | add CARG4, BASE, RC, lsl #3
795 |1:
796 | uxtb CARG5w, INSw
797 | add CARG2, BASE, RA, lsl #3
798 | str BASE, L->base
799 | mov CARG1, L
800 | str PC, SAVE_PC
801 | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
802 | // Returns NULL (finished) or TValue * (metamethod).
803 | cbz CRET1, ->cont_nop
804 |
805 | // Call metamethod for binary op.
806 |->vmeta_binop:
807 | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
808 | sub TMP1, CRET1, BASE
809 | str PC, [CRET1, #-24] // [cont|PC]
810 | add PC, TMP1, #FRAME_CONT
811 | mov BASE, CRET1
812 | mov NARGS8:RC, #16 // 2 args for func(o1, o2).
813 | b ->vm_call_dispatch
814 |
815 |->vmeta_len:
816 | add CARG2, BASE, RC, lsl #3
817#if LJ_52
818 | mov TAB:RC, TAB:CARG1 // Save table (ignored for other types).
819#endif
820 | str BASE, L->base
821 | mov CARG1, L
822 | str PC, SAVE_PC
823 | bl extern lj_meta_len // (lua_State *L, TValue *o)
824 | // Returns NULL (retry) or TValue * (metamethod base).
825#if LJ_52
826 | cbnz CRET1, ->vmeta_binop // Binop call for compatibility.
827 | mov TAB:CARG1, TAB:RC
828 | b ->BC_LEN_Z
829#else
830 | b ->vmeta_binop // Binop call for compatibility.
831#endif
832 |
833 |//-- Call metamethod ----------------------------------------------------
834 |
835 |->vmeta_call: // Resolve and call __call metamethod.
836 | // RB = old base, BASE = new base, RC = nargs*8
837 | mov CARG1, L
838 | str RB, L->base // This is the callers base!
839 | sub CARG2, BASE, #16
840 | str PC, SAVE_PC
841 | add CARG3, BASE, NARGS8:RC
842 | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
843 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
844 | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now.
845 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
846 | ins_call
847 |
848 |->vmeta_callt: // Resolve __call for BC_CALLT.
849 | // BASE = old base, RA = new base, RC = nargs*8
850 | mov CARG1, L
851 | str BASE, L->base
852 | sub CARG2, RA, #16
853 | str PC, SAVE_PC
854 | add CARG3, RA, NARGS8:RC
855 | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
856 | ldr LFUNC:CARG3, [RA, FRAME_FUNC] // Guaranteed to be a function here.
857 | ldr PC, [BASE, FRAME_PC]
858 | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now.
859 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
860 | b ->BC_CALLT2_Z
861 |
862 |//-- Argument coercion for 'for' statement ------------------------------
863 |
864 |->vmeta_for:
865 | mov CARG1, L
866 | str BASE, L->base
867 | mov CARG2, RA
868 | str PC, SAVE_PC
869 | bl extern lj_meta_for // (lua_State *L, TValue *base)
870 | ldr INSw, [PC, #-4]
871 |.if JIT
872 | uxtb TMP0, INS
873 |.endif
874 | decode_RA RA, INS
875 | decode_RD RC, INS
876 |.if JIT
877 | cmp TMP0, #BC_JFORI
878 | beq =>BC_JFORI
879 |.endif
880 | b =>BC_FORI
881 |
882 |//-----------------------------------------------------------------------
883 |//-- Fast functions -----------------------------------------------------
884 |//-----------------------------------------------------------------------
885 |
886 |.macro .ffunc, name
887 |->ff_ .. name:
888 |.endmacro
889 |
890 |.macro .ffunc_1, name
891 |->ff_ .. name:
892 | ldr CARG1, [BASE]
893 | cmp NARGS8:RC, #8
894 | blo ->fff_fallback
895 |.endmacro
896 |
897 |.macro .ffunc_2, name
898 |->ff_ .. name:
899 | ldp CARG1, CARG2, [BASE]
900 | cmp NARGS8:RC, #16
901 | blo ->fff_fallback
902 |.endmacro
903 |
904 |.macro .ffunc_n, name
905 | .ffunc name
906 | ldr CARG1, [BASE]
907 | cmp NARGS8:RC, #8
908 | ldr FARG1, [BASE]
909 | blo ->fff_fallback
910 | checknum CARG1, ->fff_fallback
911 |.endmacro
912 |
913 |.macro .ffunc_nn, name
914 | .ffunc name
915 | ldp CARG1, CARG2, [BASE]
916 | cmp NARGS8:RC, #16
917 | ldp FARG1, FARG2, [BASE]
918 | blo ->fff_fallback
919 | checknum CARG1, ->fff_fallback
920 | checknum CARG2, ->fff_fallback
921 |.endmacro
922 |
923 |// Inlined GC threshold check. Caveat: uses CARG1 and CARG2.
924 |.macro ffgccheck
925 | ldp CARG1, CARG2, GL->gc.total // Assumes threshold follows total.
926 | cmp CARG1, CARG2
927 | blt >1
928 | bl ->fff_gcstep
929 |1:
930 |.endmacro
931 |
932 |//-- Base library: checks -----------------------------------------------
933 |
934 |.ffunc_1 assert
935 | ldr PC, [BASE, FRAME_PC]
936 | mov_false TMP1
937 | cmp CARG1, TMP1
938 | bhs ->fff_fallback
939 | str CARG1, [BASE, #-16]
940 | sub RB, BASE, #8
941 | subs RA, NARGS8:RC, #8
942 | add RC, NARGS8:RC, #8 // Compute (nresults+1)*8.
943 | cbz RA, ->fff_res // Done if exactly 1 argument.
944 |1:
945 | ldr CARG1, [RB, #16]
946 | sub RA, RA, #8
947 | str CARG1, [RB], #8
948 | cbnz RA, <1
949 | b ->fff_res
950 |
951 |.ffunc_1 type
952 | mov TMP0, #~LJ_TISNUM
953 | asr ITYPE, CARG1, #47
954 | cmn ITYPE, #~LJ_TISNUM
955 | csinv TMP1, TMP0, ITYPE, lo
956 | add TMP1, TMP1, #offsetof(GCfuncC, upvalue)/8
957 | ldr CARG1, [CFUNC:CARG3, TMP1, lsl #3]
958 | b ->fff_restv
959 |
960 |//-- Base library: getters and setters ---------------------------------
961 |
962 |.ffunc_1 getmetatable
963 | asr ITYPE, CARG1, #47
964 | cmn ITYPE, #-LJ_TTAB
965 | ccmn ITYPE, #-LJ_TUDATA, #4, ne
966 | and TAB:CARG1, CARG1, #LJ_GCVMASK
967 | bne >6
968 |1: // Field metatable must be at same offset for GCtab and GCudata!
969 | ldr TAB:RB, TAB:CARG1->metatable
970 |2:
971 | mov CARG1, TISNIL
972 | ldr STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable]
973 | cbz TAB:RB, ->fff_restv
974 | ldr TMP1w, TAB:RB->hmask
975 | ldr TMP2w, STR:RC->hash
976 | ldr NODE:CARG3, TAB:RB->node
977 | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask
978 | add TMP1, TMP1, TMP1, lsl #1
979 | movn CARG4, #~LJ_TSTR
980 | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8
981 | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for.
982 |3: // Rearranged logic, because we expect _not_ to find the key.
983 | ldp CARG1, TMP0, NODE:CARG3->val
984 | ldr NODE:CARG3, NODE:CARG3->next
985 | cmp TMP0, CARG4
986 | beq >5
987 | cbnz NODE:CARG3, <3
988 |4:
989 | mov CARG1, RB // Use metatable as default result.
990 | movk CARG1, #(LJ_TTAB>>1)&0xffff, lsl #48
991 | b ->fff_restv
992 |5:
993 | cmp TMP0, TISNIL
994 | bne ->fff_restv
995 | b <4
996 |
997 |6:
998 | movn TMP0, #~LJ_TISNUM
999 | cmp ITYPE, TMP0
1000 | csel ITYPE, ITYPE, TMP0, hs
1001 | sub TMP1, GL, ITYPE, lsl #3
1002 | ldr TAB:RB, [TMP1, #offsetof(global_State, gcroot[GCROOT_BASEMT])-8]
1003 | b <2
1004 |
1005 |.ffunc_2 setmetatable
1006 | // Fast path: no mt for table yet and not clearing the mt.
1007 | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback
1008 | ldr TAB:TMP0, TAB:TMP1->metatable
1009 | asr ITYPE, CARG2, #47
1010 | ldrb TMP2w, TAB:TMP1->marked
1011 | cmn ITYPE, #-LJ_TTAB
1012 | and TAB:CARG2, CARG2, #LJ_GCVMASK
1013 | ccmp TAB:TMP0, #0, #0, eq
1014 | bne ->fff_fallback
1015 | str TAB:CARG2, TAB:TMP1->metatable
1016 | tbz TMP2w, #2, ->fff_restv // isblack(table)
1017 | barrierback TAB:TMP1, TMP2w, TMP0
1018 | b ->fff_restv
1019 |
1020 |.ffunc rawget
1021 | ldr CARG2, [BASE]
1022 | cmp NARGS8:RC, #16
1023 | blo ->fff_fallback
1024 | checktab CARG2, ->fff_fallback
1025 | mov CARG1, L
1026 | add CARG3, BASE, #8
1027 | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1028 | // Returns cTValue *.
1029 | ldr CARG1, [CRET1]
1030 | b ->fff_restv
1031 |
1032 |//-- Base library: conversions ------------------------------------------
1033 |
1034 |.ffunc tonumber
1035 | // Only handles the number case inline (without a base argument).
1036 | ldr CARG1, [BASE]
1037 | cmp NARGS8:RC, #8
1038 | bne ->fff_fallback
1039 | checknumber CARG1, ->fff_fallback
1040 | b ->fff_restv
1041 |
1042 |.ffunc_1 tostring
1043 | // Only handles the string or number case inline.
1044 | asr ITYPE, CARG1, #47
1045 | cmn ITYPE, #-LJ_TSTR
1046 | // A __tostring method in the string base metatable is ignored.
1047 | beq ->fff_restv
1048 | // Handle numbers inline, unless a number base metatable is present.
1049 | ldr TMP1, GL->gcroot[GCROOT_BASEMT_NUM]
1050 | str BASE, L->base
1051 | cmn ITYPE, #-LJ_TISNUM
1052 | ccmp TMP1, #0, #0, ls
1053 | str PC, SAVE_PC // Redundant (but a defined value).
1054 | bne ->fff_fallback
1055 | ffgccheck
1056 | mov CARG1, L
1057 | mov CARG2, BASE
1058 | bl extern lj_strfmt_number // (lua_State *L, cTValue *o)
1059 | // Returns GCstr *.
1060 | movn TMP1, #~LJ_TSTR
1061 | ldr BASE, L->base
1062 | add CARG1, CARG1, TMP1, lsl #47
1063 | b ->fff_restv
1064 |
1065 |//-- Base library: iterators -------------------------------------------
1066 |
1067 |.ffunc_1 next
1068 | checktp CARG2, CARG1, LJ_TTAB, ->fff_fallback
1069 | str TISNIL, [BASE, NARGS8:RC] // Set missing 2nd arg to nil.
1070 | ldr PC, [BASE, FRAME_PC]
1071 | stp BASE, BASE, L->base // Add frame since C call can throw.
1072 | mov CARG1, L
1073 | add CARG3, BASE, #8
1074 | str PC, SAVE_PC
1075 | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
1076 | // Returns 0 at end of traversal.
1077 | str TISNIL, [BASE, #-16]
1078 | cbz CRET1, ->fff_res1 // End of traversal: return nil.
1079 | ldp CARG1, CARG2, [BASE, #8] // Copy key and value to results.
1080 | mov RC, #(2+1)*8
1081 | stp CARG1, CARG2, [BASE, #-16]
1082 | b ->fff_res
1083 |
1084 |.ffunc_1 pairs
1085 | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback
1086#if LJ_52
1087 | ldr TAB:CARG2, TAB:TMP1->metatable
1088#endif
1089 | ldr CFUNC:CARG4, CFUNC:CARG3->upvalue[0]
1090 | ldr PC, [BASE, FRAME_PC]
1091#if LJ_52
1092 | cbnz TAB:CARG2, ->fff_fallback
1093#endif
1094 | mov RC, #(3+1)*8
1095 | stp CARG1, TISNIL, [BASE, #-8]
1096 | str CFUNC:CARG4, [BASE, #-16]
1097 | b ->fff_res
1098 |
1099 |.ffunc_2 ipairs_aux
1100 | checktab CARG1, ->fff_fallback
1101 | checkint CARG2, ->fff_fallback
1102 | ldr TMP1w, TAB:CARG1->asize
1103 | ldr CARG3, TAB:CARG1->array
1104 | ldr TMP0w, TAB:CARG1->hmask
1105 | add CARG2w, CARG2w, #1
1106 | cmp CARG2w, TMP1w
1107 | ldr PC, [BASE, FRAME_PC]
1108 | add TMP2, CARG2, TISNUM
1109 | mov RC, #(0+1)*8
1110 | str TMP2, [BASE, #-16]
1111 | bhs >2 // Not in array part?
1112 | ldr TMP0, [CARG3, CARG2, lsl #3]
1113 |1:
1114 | mov TMP1, #(2+1)*8
1115 | cmp TMP0, TISNIL
1116 | str TMP0, [BASE, #-8]
1117 | csel RC, RC, TMP1, eq
1118 | b ->fff_res
1119 |2: // Check for empty hash part first. Otherwise call C function.
1120 | cbz TMP0w, ->fff_res
1121 | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
1122 | // Returns cTValue * or NULL.
1123 | cbz CRET1, ->fff_res
1124 | ldr TMP0, [CRET1]
1125 | b <1
1126 |
1127 |.ffunc_1 ipairs
1128 | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback
1129#if LJ_52
1130 | ldr TAB:CARG2, TAB:TMP1->metatable
1131#endif
1132 | ldr CFUNC:CARG4, CFUNC:CARG3->upvalue[0]
1133 | ldr PC, [BASE, FRAME_PC]
1134#if LJ_52
1135 | cbnz TAB:CARG2, ->fff_fallback
1136#endif
1137 | mov RC, #(3+1)*8
1138 | stp CARG1, TISNUM, [BASE, #-8]
1139 | str CFUNC:CARG4, [BASE, #-16]
1140 | b ->fff_res
1141 |
1142 |//-- Base library: catch errors ----------------------------------------
1143 |
1144 |.ffunc pcall
1145 | ldrb TMP0w, GL->hookmask
1146 | subs NARGS8:RC, NARGS8:RC, #8
1147 | blo ->fff_fallback
1148 | mov RB, BASE
1149 | add BASE, BASE, #16
1150 | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1
1151 | add PC, TMP0, #16+FRAME_PCALL
1152 | beq ->vm_call_dispatch
1153 |1:
1154 | add TMP2, BASE, NARGS8:RC
1155 |2:
1156 | ldr TMP0, [TMP2, #-16]
1157 | str TMP0, [TMP2, #-8]!
1158 | cmp TMP2, BASE
1159 | bne <2
1160 | b ->vm_call_dispatch
1161 |
1162 |.ffunc xpcall
1163 | ldp CARG1, CARG2, [BASE]
1164 | ldrb TMP0w, GL->hookmask
1165 | subs NARGS8:RC, NARGS8:RC, #16
1166 | blo ->fff_fallback
1167 | mov RB, BASE
1168 | add BASE, BASE, #24
1169 | asr ITYPE, CARG2, #47
1170 | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1
1171 | cmn ITYPE, #-LJ_TFUNC
1172 | add PC, TMP0, #24+FRAME_PCALL
1173 | bne ->fff_fallback // Traceback must be a function.
1174 | stp CARG2, CARG1, [RB] // Swap function and traceback.
1175 | cbz NARGS8:RC, ->vm_call_dispatch
1176 | b <1
1177 |
1178 |//-- Coroutine library --------------------------------------------------
1179 |
1180 |.macro coroutine_resume_wrap, resume
1181 |.if resume
1182 |.ffunc_1 coroutine_resume
1183 | checktp CARG1, LJ_TTHREAD, ->fff_fallback
1184 |.else
1185 |.ffunc coroutine_wrap_aux
1186 | ldr L:CARG1, CFUNC:CARG3->upvalue[0].gcr
1187 | and L:CARG1, CARG1, #LJ_GCVMASK
1188 |.endif
1189 | ldr PC, [BASE, FRAME_PC]
1190 | str BASE, L->base
1191 | ldp RB, CARG2, L:CARG1->base
1192 | ldrb TMP1w, L:CARG1->status
1193 | add TMP0, CARG2, TMP1
1194 | str PC, SAVE_PC
1195 | cmp TMP0, RB
1196 | beq ->fff_fallback
1197 | cmp TMP1, #LUA_YIELD
1198 | add TMP0, CARG2, #8
1199 | csel CARG2, CARG2, TMP0, hs
1200 | ldr CARG4, L:CARG1->maxstack
1201 | add CARG3, CARG2, NARGS8:RC
1202 | ldr RB, L:CARG1->cframe
1203 | ccmp CARG3, CARG4, #2, ls
1204 | ccmp RB, #0, #2, ls
1205 | bhi ->fff_fallback
1206 |.if resume
1207 | sub CARG3, CARG3, #8 // Keep resumed thread in stack for GC.
1208 | add BASE, BASE, #8
1209 | sub NARGS8:RC, NARGS8:RC, #8
1210 |.endif
1211 | str CARG3, L:CARG1->top
1212 | str BASE, L->top
1213 | cbz NARGS8:RC, >3
1214 |2: // Move args to coroutine.
1215 | ldr TMP0, [BASE, RB]
1216 | cmp RB, NARGS8:RC
1217 | str TMP0, [CARG2, RB]
1218 | add RB, RB, #8
1219 | bne <2
1220 |3:
1221 | mov CARG3, #0
1222 | mov L:RA, L:CARG1
1223 | mov CARG4, #0
1224 | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0)
1225 | // Returns thread status.
1226 |4:
1227 | ldp CARG3, CARG4, L:RA->base
1228 | cmp CRET1, #LUA_YIELD
1229 | ldr BASE, L->base
1230 | str L, GL->cur_L
1231 | st_vmstate ST_INTERP
1232 | bhi >8
1233 | sub RC, CARG4, CARG3
1234 | ldr CARG1, L->maxstack
1235 | add CARG2, BASE, RC
1236 | cbz RC, >6 // No results?
1237 | cmp CARG2, CARG1
1238 | mov RB, #0
1239 | bhi >9 // Need to grow stack?
1240 |
1241 | sub CARG4, RC, #8
1242 | str CARG3, L:RA->top // Clear coroutine stack.
1243 |5: // Move results from coroutine.
1244 | ldr TMP0, [CARG3, RB]
1245 | cmp RB, CARG4
1246 | str TMP0, [BASE, RB]
1247 | add RB, RB, #8
1248 | bne <5
1249 |6:
1250 |.if resume
1251 | mov_true TMP1
1252 | add RC, RC, #16
1253 |7:
1254 | str TMP1, [BASE, #-8] // Prepend true/false to results.
1255 | sub RA, BASE, #8
1256 |.else
1257 | mov RA, BASE
1258 | add RC, RC, #8
1259 |.endif
1260 | ands CARG1, PC, #FRAME_TYPE
1261 | str PC, SAVE_PC
1262 | str RCw, SAVE_MULTRES
1263 | beq ->BC_RET_Z
1264 | b ->vm_return
1265 |
1266 |8: // Coroutine returned with error (at co->top-1).
1267 |.if resume
1268 | ldr TMP0, [CARG4, #-8]!
1269 | mov_false TMP1
1270 | mov RC, #(2+1)*8
1271 | str CARG4, L:RA->top // Remove error from coroutine stack.
1272 | str TMP0, [BASE] // Copy error message.
1273 | b <7
1274 |.else
1275 | mov CARG1, L
1276 | mov CARG2, L:RA
1277 | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
1278 | // Never returns.
1279 |.endif
1280 |
1281 |9: // Handle stack expansion on return from yield.
1282 | mov CARG1, L
1283 | lsr CARG2, RC, #3
1284 | bl extern lj_state_growstack // (lua_State *L, int n)
1285 | mov CRET1, #0
1286 | b <4
1287 |.endmacro
1288 |
1289 | coroutine_resume_wrap 1 // coroutine.resume
1290 | coroutine_resume_wrap 0 // coroutine.wrap
1291 |
1292 |.ffunc coroutine_yield
1293 | ldr TMP0, L->cframe
1294 | add TMP1, BASE, NARGS8:RC
1295 | mov CRET1, #LUA_YIELD
1296 | stp BASE, TMP1, L->base
1297 | tbz TMP0, #0, ->fff_fallback
1298 | str xzr, L->cframe
1299 | strb CRET1w, L->status
1300 | b ->vm_leave_unw
1301 |
1302 |//-- Math library -------------------------------------------------------
1303 |
1304 |.macro math_round, func, round
1305 | .ffunc math_ .. func
1306 | ldr CARG1, [BASE]
1307 | cmp NARGS8:RC, #8
1308 | ldr d0, [BASE]
1309 | blo ->fff_fallback
1310 | cmp TISNUMhi, CARG1, lsr #32
1311 | beq ->fff_restv
1312 | round d0, d0
1313 | b ->fff_resn
1314 |.endmacro
1315 |
1316 | math_round floor, frintm
1317 | math_round ceil, frintp
1318 |
1319 |.ffunc_1 math_abs
1320 | checknumber CARG1, ->fff_fallback
1321 | and CARG1, CARG1, #U64x(7fffffff,ffffffff)
1322 | bne ->fff_restv
1323 | eor CARG2w, CARG1w, CARG1w, asr #31
1324 | movz CARG3, #0x41e0, lsl #48 // 2^31.
1325 | subs CARG1w, CARG2w, CARG1w, asr #31
1326 | add CARG1, CARG1, TISNUM
1327 | csel CARG1, CARG1, CARG3, pl
1328 | // Fallthrough.
1329 |
1330 |->fff_restv:
1331 | // CARG1 = TValue result.
1332 | ldr PC, [BASE, FRAME_PC]
1333 | str CARG1, [BASE, #-16]
1334 |->fff_res1:
1335 | // PC = return.
1336 | mov RC, #(1+1)*8
1337 |->fff_res:
1338 | // RC = (nresults+1)*8, PC = return.
1339 | ands CARG1, PC, #FRAME_TYPE
1340 | str RCw, SAVE_MULTRES
1341 | sub RA, BASE, #16
1342 | bne ->vm_return
1343 | ldr INSw, [PC, #-4]
1344 | decode_RB RB, INS
1345 |5:
1346 | cmp RC, RB, lsl #3 // More results expected?
1347 | blo >6
1348 | decode_RA TMP1, INS
1349 | // Adjust BASE. KBASE is assumed to be set for the calling frame.
1350 | sub BASE, RA, TMP1, lsl #3
1351 | ins_next
1352 |
1353 |6: // Fill up results with nil.
1354 | add TMP1, RA, RC
1355 | add RC, RC, #8
1356 | str TISNIL, [TMP1, #-8]
1357 | b <5
1358 |
1359 |.macro math_extern, func
1360 | .ffunc_n math_ .. func
1361 | bl extern func
1362 | b ->fff_resn
1363 |.endmacro
1364 |
1365 |.macro math_extern2, func
1366 | .ffunc_nn math_ .. func
1367 | bl extern func
1368 | b ->fff_resn
1369 |.endmacro
1370 |
1371 |.ffunc_n math_sqrt
1372 | fsqrt d0, d0
1373 |->fff_resn:
1374 | ldr PC, [BASE, FRAME_PC]
1375 | str d0, [BASE, #-16]
1376 | b ->fff_res1
1377 |
1378 |.ffunc math_log
1379 | ldr CARG1, [BASE]
1380 | cmp NARGS8:RC, #8
1381 | ldr FARG1, [BASE]
1382 | bne ->fff_fallback // Need exactly 1 argument.
1383 | checknum CARG1, ->fff_fallback
1384 | bl extern log
1385 | b ->fff_resn
1386 |
1387 | math_extern log10
1388 | math_extern exp
1389 | math_extern sin
1390 | math_extern cos
1391 | math_extern tan
1392 | math_extern asin
1393 | math_extern acos
1394 | math_extern atan
1395 | math_extern sinh
1396 | math_extern cosh
1397 | math_extern tanh
1398 | math_extern2 pow
1399 | math_extern2 atan2
1400 | math_extern2 fmod
1401 |
1402 |.ffunc_2 math_ldexp
1403 | ldr FARG1, [BASE]
1404 | checknum CARG1, ->fff_fallback
1405 | checkint CARG2, ->fff_fallback
1406 | sxtw CARG1, CARG2w
1407 | bl extern ldexp // (double x, int exp)
1408 | b ->fff_resn
1409 |
1410 |.ffunc_n math_frexp
1411 | add CARG1, sp, TMPDofs
1412 | bl extern frexp
1413 | ldr CARG2w, TMPD
1414 | ldr PC, [BASE, FRAME_PC]
1415 | str d0, [BASE, #-16]
1416 | mov RC, #(2+1)*8
1417 | add CARG2, CARG2, TISNUM
1418 | str CARG2, [BASE, #-8]
1419 | b ->fff_res
1420 |
1421 |.ffunc_n math_modf
1422 | sub CARG1, BASE, #16
1423 | ldr PC, [BASE, FRAME_PC]
1424 | bl extern modf
1425 | mov RC, #(2+1)*8
1426 | str d0, [BASE, #-8]
1427 | b ->fff_res
1428 |
1429 |.macro math_minmax, name, cond, fcond
1430 | .ffunc_1 name
1431 | add RB, BASE, RC
1432 | add RA, BASE, #8
1433 | checkint CARG1, >4
1434 |1: // Handle integers.
1435 | ldr CARG2, [RA]
1436 | cmp RA, RB
1437 | bhs ->fff_restv
1438 | checkint CARG2, >3
1439 | cmp CARG1w, CARG2w
1440 | add RA, RA, #8
1441 | csel CARG1, CARG2, CARG1, cond
1442 | b <1
1443 |3: // Convert intermediate result to number and continue below.
1444 | scvtf d0, CARG1w
1445 | blo ->fff_fallback
1446 | ldr d1, [RA]
1447 | b >6
1448 |
1449 |4:
1450 | ldr d0, [BASE]
1451 | blo ->fff_fallback
1452 |5: // Handle numbers.
1453 | ldr CARG2, [RA]
1454 | ldr d1, [RA]
1455 | cmp RA, RB
1456 | bhs ->fff_resn
1457 | checknum CARG2, >7
1458 |6:
1459 | fcmp d0, d1
1460 | add RA, RA, #8
1461 | fcsel d0, d1, d0, fcond
1462 | b <5
1463 |7: // Convert integer to number and continue above.
1464 | scvtf d1, CARG2w
1465 | blo ->fff_fallback
1466 | b <6
1467 |.endmacro
1468 |
1469 | math_minmax math_min, gt, hi
1470 | math_minmax math_max, lt, lo
1471 |
1472 |//-- String library -----------------------------------------------------
1473 |
1474 |.ffunc string_byte // Only handle the 1-arg case here.
1475 | ldp PC, CARG1, [BASE, FRAME_PC]
1476 | cmp NARGS8:RC, #8
1477 | asr ITYPE, CARG1, #47
1478 | ccmn ITYPE, #-LJ_TSTR, #0, eq
1479 | and STR:CARG1, CARG1, #LJ_GCVMASK
1480 | bne ->fff_fallback
1481 | ldrb TMP0w, STR:CARG1[1] // Access is always ok (NUL at end).
1482 | ldr CARG3w, STR:CARG1->len
1483 | add TMP0, TMP0, TISNUM
1484 | str TMP0, [BASE, #-16]
1485 | mov RC, #(0+1)*8
1486 | cbz CARG3, ->fff_res
1487 | b ->fff_res1
1488 |
1489 |.ffunc string_char // Only handle the 1-arg case here.
1490 | ffgccheck
1491 | ldp PC, CARG1, [BASE, FRAME_PC]
1492 | cmp CARG1w, #255
1493 | ccmp NARGS8:RC, #8, #0, ls // Need exactly 1 argument.
1494 | bne ->fff_fallback
1495 | checkint CARG1, ->fff_fallback
1496 | mov CARG3, #1
1497 | mov CARG2, BASE // Points to stack. Little-endian.
1498 |->fff_newstr:
1499 | // CARG2 = str, CARG3 = len.
1500 | str BASE, L->base
1501 | mov CARG1, L
1502 | str PC, SAVE_PC
1503 | bl extern lj_str_new // (lua_State *L, char *str, size_t l)
1504 |->fff_resstr:
1505 | // Returns GCstr *.
1506 | ldr BASE, L->base
1507 | movn TMP1, #~LJ_TSTR
1508 | add CARG1, CARG1, TMP1, lsl #47
1509 | b ->fff_restv
1510 |
1511 |.ffunc string_sub
1512 | ffgccheck
1513 | ldr CARG1, [BASE]
1514 | ldr CARG3, [BASE, #16]
1515 | cmp NARGS8:RC, #16
1516 | movn RB, #0
1517 | beq >1
1518 | blo ->fff_fallback
1519 | checkint CARG3, ->fff_fallback
1520 | sxtw RB, CARG3w
1521 |1:
1522 | ldr CARG2, [BASE, #8]
1523 | checkstr CARG1, ->fff_fallback
1524 | ldr TMP1w, STR:CARG1->len
1525 | checkint CARG2, ->fff_fallback
1526 | sxtw CARG2, CARG2w
1527 | // CARG1 = str, TMP1 = str->len, CARG2 = start, RB = end
1528 | add TMP2, RB, TMP1
1529 | cmp RB, #0
1530 | add TMP0, CARG2, TMP1
1531 | csinc RB, RB, TMP2, ge // if (end < 0) end += len+1
1532 | cmp CARG2, #0
1533 | csinc CARG2, CARG2, TMP0, ge // if (start < 0) start += len+1
1534 | cmp RB, #0
1535 | csel RB, RB, xzr, ge // if (end < 0) end = 0
1536 | cmp CARG2, #1
1537 | csinc CARG2, CARG2, xzr, ge // if (start < 1) start = 1
1538 | cmp RB, TMP1
1539 | csel RB, RB, TMP1, le // if (end > len) end = len
1540 | add CARG1, STR:CARG1, #sizeof(GCstr)-1
1541 | subs CARG3, RB, CARG2 // len = end - start
1542 | add CARG2, CARG1, CARG2
1543 | add CARG3, CARG3, #1 // len += 1
1544 | bge ->fff_newstr
1545 | add STR:CARG1, GL, #offsetof(global_State, strempty)
1546 | movn TMP1, #~LJ_TSTR
1547 | add CARG1, CARG1, TMP1, lsl #47
1548 | b ->fff_restv
1549 |
1550 |.macro ffstring_op, name
1551 | .ffunc string_ .. name
1552 | ffgccheck
1553 | ldr CARG2, [BASE]
1554 | cmp NARGS8:RC, #8
1555 | asr ITYPE, CARG2, #47
1556 | ccmn ITYPE, #-LJ_TSTR, #0, hs
1557 | and STR:CARG2, CARG2, #LJ_GCVMASK
1558 | bne ->fff_fallback
1559 | ldr TMP0, GL->tmpbuf.b
1560 | add SBUF:CARG1, GL, #offsetof(global_State, tmpbuf)
1561 | str BASE, L->base
1562 | str PC, SAVE_PC
1563 | str L, GL->tmpbuf.L
1564 | str TMP0, GL->tmpbuf.p
1565 | bl extern lj_buf_putstr_ .. name
1566 | bl extern lj_buf_tostr
1567 | b ->fff_resstr
1568 |.endmacro
1569 |
1570 |ffstring_op reverse
1571 |ffstring_op lower
1572 |ffstring_op upper
1573 |
1574 |//-- Bit library --------------------------------------------------------
1575 |
1576 |// FP number to bit conversion for soft-float. Clobbers CARG1-CARG3
1577 |->vm_tobit_fb:
1578 | bls ->fff_fallback
1579 | add CARG2, CARG1, CARG1
1580 | mov CARG3, #1076
1581 | sub CARG3, CARG3, CARG2, lsr #53
1582 | cmp CARG3, #53
1583 | bhi >1
1584 | and CARG2, CARG2, #U64x(001fffff,ffffffff)
1585 | orr CARG2, CARG2, #U64x(00200000,00000000)
1586 | cmp CARG1, #0
1587 | lsr CARG2, CARG2, CARG3
1588 | cneg CARG1w, CARG2w, mi
1589 | br lr
1590 |1:
1591 | mov CARG1w, #0
1592 | br lr
1593 |
1594 |.macro .ffunc_bit, name
1595 | .ffunc_1 bit_..name
1596 | adr lr, >1
1597 | checkint CARG1, ->vm_tobit_fb
1598 |1:
1599 |.endmacro
1600 |
1601 |.macro .ffunc_bit_op, name, ins
1602 | .ffunc_bit name
1603 | mov RA, #8
1604 | mov TMP0w, CARG1w
1605 | adr lr, >2
1606 |1:
1607 | ldr CARG1, [BASE, RA]
1608 | cmp RA, NARGS8:RC
1609 | add RA, RA, #8
1610 | bge >9
1611 | checkint CARG1, ->vm_tobit_fb
1612 |2:
1613 | ins TMP0w, TMP0w, CARG1w
1614 | b <1
1615 |.endmacro
1616 |
1617 |.ffunc_bit_op band, and
1618 |.ffunc_bit_op bor, orr
1619 |.ffunc_bit_op bxor, eor
1620 |
1621 |.ffunc_bit tobit
1622 | mov TMP0w, CARG1w
1623 |9: // Label reused by .ffunc_bit_op users.
1624 | add CARG1, TMP0, TISNUM
1625 | b ->fff_restv
1626 |
1627 |.ffunc_bit bswap
1628 | rev TMP0w, CARG1w
1629 | add CARG1, TMP0, TISNUM
1630 | b ->fff_restv
1631 |
1632 |.ffunc_bit bnot
1633 | mvn TMP0w, CARG1w
1634 | add CARG1, TMP0, TISNUM
1635 | b ->fff_restv
1636 |
1637 |.macro .ffunc_bit_sh, name, ins, shmod
1638 | .ffunc bit_..name
1639 | ldp TMP0, CARG1, [BASE]
1640 | cmp NARGS8:RC, #16
1641 | blo ->fff_fallback
1642 | adr lr, >1
1643 | checkint CARG1, ->vm_tobit_fb
1644 |1:
1645 |.if shmod == 0
1646 | mov TMP1, CARG1
1647 |.else
1648 | neg TMP1, CARG1
1649 |.endif
1650 | mov CARG1, TMP0
1651 | adr lr, >2
1652 | checkint CARG1, ->vm_tobit_fb
1653 |2:
1654 | ins TMP0w, CARG1w, TMP1w
1655 | add CARG1, TMP0, TISNUM
1656 | b ->fff_restv
1657 |.endmacro
1658 |
1659 |.ffunc_bit_sh lshift, lsl, 0
1660 |.ffunc_bit_sh rshift, lsr, 0
1661 |.ffunc_bit_sh arshift, asr, 0
1662 |.ffunc_bit_sh rol, ror, 1
1663 |.ffunc_bit_sh ror, ror, 0
1664 |
1665 |//-----------------------------------------------------------------------
1666 |
1667 |->fff_fallback: // Call fast function fallback handler.
1668 | // BASE = new base, RC = nargs*8
1669 | ldp CFUNC:CARG3, PC, [BASE, FRAME_FUNC] // Fallback may overwrite PC.
1670 | ldr TMP2, L->maxstack
1671 | add TMP1, BASE, NARGS8:RC
1672 | stp BASE, TMP1, L->base
1673 | and CFUNC:CARG3, CARG3, #LJ_GCVMASK
1674 | add TMP1, TMP1, #8*LUA_MINSTACK
1675 | ldr CARG3, CFUNC:CARG3->f
1676 | str PC, SAVE_PC // Redundant (but a defined value).
1677 | cmp TMP1, TMP2
1678 | mov CARG1, L
1679 | bhi >5 // Need to grow stack.
1680 | blr CARG3 // (lua_State *L)
1681 | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
1682 | ldr BASE, L->base
1683 | cmp CRET1w, #0
1684 | lsl RC, CRET1, #3
1685 | sub RA, BASE, #16
1686 | bgt ->fff_res // Returned nresults+1?
1687 |1: // Returned 0 or -1: retry fast path.
1688 | ldr CARG1, L->top
1689 | ldr CFUNC:CARG3, [BASE, FRAME_FUNC]
1690 | sub NARGS8:RC, CARG1, BASE
1691 | bne ->vm_call_tail // Returned -1?
1692 | and CFUNC:CARG3, CARG3, #LJ_GCVMASK
1693 | ins_callt // Returned 0: retry fast path.
1694 |
1695 |// Reconstruct previous base for vmeta_call during tailcall.
1696 |->vm_call_tail:
1697 | ands TMP0, PC, #FRAME_TYPE
1698 | and TMP1, PC, #~FRAME_TYPEP
1699 | bne >3
1700 | ldrb RAw, [PC, #-3]
1701 | lsl RA, RA, #3
1702 | add TMP1, RA, #16
1703 |3:
1704 | sub RB, BASE, TMP1
1705 | b ->vm_call_dispatch // Resolve again for tailcall.
1706 |
1707 |5: // Grow stack for fallback handler.
1708 | mov CARG2, #LUA_MINSTACK
1709 | bl extern lj_state_growstack // (lua_State *L, int n)
1710 | ldr BASE, L->base
1711 | cmp CARG1, CARG1 // Set zero-flag to force retry.
1712 | b <1
1713 |
1714 |->fff_gcstep: // Call GC step function.
1715 | // BASE = new base, RC = nargs*8
1716 | add CARG2, BASE, NARGS8:RC // Calculate L->top.
1717 | mov RA, lr
1718 | stp BASE, CARG2, L->base
1719 | str PC, SAVE_PC // Redundant (but a defined value).
1720 | mov CARG1, L
1721 | bl extern lj_gc_step // (lua_State *L)
1722 | ldp BASE, CARG2, L->base
1723 | ldr CFUNC:CARG3, [BASE, FRAME_FUNC]
1724 | mov lr, RA // Help return address predictor.
1725 | sub NARGS8:RC, CARG2, BASE // Calculate nargs*8.
1726 | and CFUNC:CARG3, CARG3, #LJ_GCVMASK
1727 | ret
1728 |
1729 |//-----------------------------------------------------------------------
1730 |//-- Special dispatch targets -------------------------------------------
1731 |//-----------------------------------------------------------------------
1732 |
1733 |->vm_record: // Dispatch target for recording phase.
1734 | NYI
1735 |
1736 |->vm_rethook: // Dispatch target for return hooks.
1737 | ldrb TMP2w, GL->hookmask
1738 | tbz TMP2w, #HOOK_ACTIVE_SHIFT, >1 // Hook already active?
1739 |5: // Re-dispatch to static ins.
1740 | ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC]
1741 | br TMP0
1742 |
1743 |->vm_inshook: // Dispatch target for instr/line hooks.
1744 | ldrb TMP2w, GL->hookmask
1745 | ldr TMP3w, GL->hookcount
1746 | tbnz TMP2w, #HOOK_ACTIVE_SHIFT, <5 // Hook already active?
1747 | tst TMP2w, #LUA_MASKLINE|LUA_MASKCOUNT
1748 | beq <5
1749 | sub TMP3w, TMP3w, #1
1750 | str TMP3w, GL->hookcount
1751 | cbz TMP3w, >1
1752 | tbz TMP2w, #LUA_HOOKLINE, <5
1753 |1:
1754 | mov CARG1, L
1755 | str BASE, L->base
1756 | mov CARG2, PC
1757 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
1758 | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
1759 |3:
1760 | ldr BASE, L->base
1761 |4: // Re-dispatch to static ins.
1762 | ldr INSw, [PC, #-4]
1763 | add TMP1, GL, INS, uxtb #3
1764 | decode_RA RA, INS
1765 | ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC]
1766 | decode_RD RC, INS
1767 | br TMP0
1768 |
1769 |->cont_hook: // Continue from hook yield.
1770 | ldr CARG1, [CARG4, #-40]
1771 | add PC, PC, #4
1772 | str CARG1w, SAVE_MULTRES // Restore MULTRES for *M ins.
1773 | b <4
1774 |
1775 |->vm_hotloop: // Hot loop counter underflow.
1776 | NYI
1777 |
1778 |->vm_callhook: // Dispatch target for call hooks.
1779 | mov CARG2, PC
1780 |.if JIT
1781 | b >1
1782 |.endif
1783 |
1784 |->vm_hotcall: // Hot call counter underflow.
1785 |.if JIT
1786 | orr CARG2, PC, #1
1787 |1:
1788 |.endif
1789 | add TMP1, BASE, NARGS8:RC
1790 | str PC, SAVE_PC
1791 | mov CARG1, L
1792 | sub RA, RA, BASE
1793 | stp BASE, TMP1, L->base
1794 | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc)
1795 | // Returns ASMFunction.
1796 | ldp BASE, TMP1, L->base
1797 | str xzr, SAVE_PC // Invalidate for subsequent line hook.
1798 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
1799 | add RA, BASE, RA
1800 | sub NARGS8:RC, TMP1, BASE
1801 | ldr INSw, [PC, #-4]
1802 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
1803 | br CRET1
1804 |
1805 |->cont_stitch: // Trace stitching.
1806 | NYI
1807 |
1808 |->vm_profhook: // Dispatch target for profiler hook.
1809#if LJ_HASPROFILE
1810 | mov CARG1, L
1811 | str BASE, L->base
1812 | mov CARG2, PC
1813 | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
1814 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
1815 | ldr BASE, L->base
1816 | sub PC, PC, #4
1817 | b ->cont_nop
1818#endif
1819 |
1820 |//-----------------------------------------------------------------------
1821 |//-- Trace exit handler -------------------------------------------------
1822 |//-----------------------------------------------------------------------
1823 |
1824 |->vm_exit_handler:
1825 | NYI
1826 |->vm_exit_interp:
1827 | NYI
1828 |
1829 |//-----------------------------------------------------------------------
1830 |//-- Math helper functions ----------------------------------------------
1831 |//-----------------------------------------------------------------------
1832 |
1833 | // int lj_vm_modi(int dividend, int divisor);
1834 |->vm_modi:
1835 | eor CARG4w, CARG1w, CARG2w
1836 | cmp CARG4w, #0
1837 | eor CARG3w, CARG1w, CARG1w, asr #31
1838 | eor CARG4w, CARG2w, CARG2w, asr #31
1839 | sub CARG3w, CARG3w, CARG1w, asr #31
1840 | sub CARG4w, CARG4w, CARG2w, asr #31
1841 | udiv CARG1w, CARG3w, CARG4w
1842 | msub CARG1w, CARG1w, CARG4w, CARG3w
1843 | ccmp CARG1w, #0, #4, mi
1844 | sub CARG3w, CARG1w, CARG4w
1845 | csel CARG1w, CARG1w, CARG3w, eq
1846 | eor CARG3w, CARG1w, CARG2w
1847 | cmp CARG3w, #0
1848 | cneg CARG1w, CARG1w, mi
1849 | ret
1850 |
1851 |//-----------------------------------------------------------------------
1852 |//-- Miscellaneous functions --------------------------------------------
1853 |//-----------------------------------------------------------------------
1854 |
1855 |//-----------------------------------------------------------------------
1856 |//-- FFI helper functions -----------------------------------------------
1857 |//-----------------------------------------------------------------------
1858 |
1859 |// Handler for callback functions.
1860 |// Saveregs already performed. Callback slot number in [sp], g in r12.
1861 |->vm_ffi_callback:
1862 |.if FFI
1863 |.type CTSTATE, CTState, PC
1864 | saveregs
1865 | ldr CTSTATE, GL:x10->ctype_state
1866 | mov GL, x10
1867 | add x10, sp, # CFRAME_SPACE
1868 | str w9, CTSTATE->cb.slot
1869 | stp x0, x1, CTSTATE->cb.gpr[0]
1870 | stp d0, d1, CTSTATE->cb.fpr[0]
1871 | stp x2, x3, CTSTATE->cb.gpr[2]
1872 | stp d2, d3, CTSTATE->cb.fpr[2]
1873 | stp x4, x5, CTSTATE->cb.gpr[4]
1874 | stp d4, d5, CTSTATE->cb.fpr[4]
1875 | stp x6, x7, CTSTATE->cb.gpr[6]
1876 | stp d6, d7, CTSTATE->cb.fpr[6]
1877 | str x10, CTSTATE->cb.stack
1878 | mov CARG1, CTSTATE
1879 | str CTSTATE, SAVE_PC // Any value outside of bytecode is ok.
1880 | mov CARG2, sp
1881 | bl extern lj_ccallback_enter // (CTState *cts, void *cf)
1882 | // Returns lua_State *.
1883 | ldp BASE, RC, L:CRET1->base
1884 | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
1885 | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
1886 | movn TISNIL, #0
1887 | mov L, CRET1
1888 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
1889 | sub RC, RC, BASE
1890 | st_vmstate ST_INTERP
1891 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
1892 | ins_callt
1893 |.endif
1894 |
1895 |->cont_ffi_callback: // Return from FFI callback.
1896 |.if FFI
1897 | ldr CTSTATE, GL->ctype_state
1898 | stp BASE, CARG4, L->base
1899 | str L, CTSTATE->L
1900 | mov CARG1, CTSTATE
1901 | mov CARG2, RA
1902 | bl extern lj_ccallback_leave // (CTState *cts, TValue *o)
1903 | ldp x0, x1, CTSTATE->cb.gpr[0]
1904 | ldp d0, d1, CTSTATE->cb.fpr[0]
1905 | b ->vm_leave_unw
1906 |.endif
1907 |
1908 |->vm_ffi_call: // Call C function via FFI.
1909 | // Caveat: needs special frame unwinding, see below.
1910 |.if FFI
1911 | .type CCSTATE, CCallState, x19
1912 | stp fp, lr, [sp, #-32]!
1913 | add fp, sp, #0
1914 | str CCSTATE, [sp, #16]
1915 | mov CCSTATE, x0
1916 | ldr TMP0w, CCSTATE:x0->spadj
1917 | ldrb TMP1w, CCSTATE->nsp
1918 | add TMP2, CCSTATE, #offsetof(CCallState, stack)
1919 | subs TMP1, TMP1, #1
1920 | ldr TMP3, CCSTATE->func
1921 | sub sp, fp, TMP0
1922 | bmi >2
1923 |1: // Copy stack slots
1924 | ldr TMP0, [TMP2, TMP1, lsl #3]
1925 | str TMP0, [sp, TMP1, lsl #3]
1926 | subs TMP1, TMP1, #1
1927 | bpl <1
1928 |2:
1929 | ldp x0, x1, CCSTATE->gpr[0]
1930 | ldp d0, d1, CCSTATE->fpr[0]
1931 | ldp x2, x3, CCSTATE->gpr[2]
1932 | ldp d2, d3, CCSTATE->fpr[2]
1933 | ldp x4, x5, CCSTATE->gpr[4]
1934 | ldp d4, d5, CCSTATE->fpr[4]
1935 | ldp x6, x7, CCSTATE->gpr[6]
1936 | ldp d6, d7, CCSTATE->fpr[6]
1937 | ldr x8, CCSTATE->retp
1938 | blr TMP3
1939 | mov sp, fp
1940 | stp x0, x1, CCSTATE->gpr[0]
1941 | stp d0, d1, CCSTATE->fpr[0]
1942 | stp d2, d3, CCSTATE->fpr[2]
1943 | ldr CCSTATE, [sp, #16]
1944 | ldp fp, lr, [sp], #32
1945 | ret
1946 |.endif
1947 |// Note: vm_ffi_call must be the last function in this object file!
1948 |
1949 |//-----------------------------------------------------------------------
1950}
1951
1952/* Generate the code for a single instruction. */
1953static void build_ins(BuildCtx *ctx, BCOp op, int defop)
1954{
1955 int vk = 0;
1956 |=>defop:
1957
1958 switch (op) {
1959
1960 /* -- Comparison ops ---------------------------------------------------- */
1961
1962 /* Remember: all ops branch for a true comparison, fall through otherwise. */
1963
1964 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
1965 | // RA = src1, RC = src2, JMP with RC = target
1966 | ldr CARG1, [BASE, RA, lsl #3]
1967 | ldrh RBw, [PC, #2]
1968 | ldr CARG2, [BASE, RC, lsl #3]
1969 | add PC, PC, #4
1970 | add RB, PC, RB, lsl #2
1971 | sub RB, RB, #0x20000
1972 | checkint CARG1, >3
1973 | checkint CARG2, >4
1974 | cmp CARG1w, CARG2w
1975 if (op == BC_ISLT) {
1976 | csel PC, RB, PC, lt
1977 } else if (op == BC_ISGE) {
1978 | csel PC, RB, PC, ge
1979 } else if (op == BC_ISLE) {
1980 | csel PC, RB, PC, le
1981 } else {
1982 | csel PC, RB, PC, gt
1983 }
1984 |1:
1985 | ins_next
1986 |
1987 |3: // RA not int.
1988 | ldr FARG1, [BASE, RA, lsl #3]
1989 | blo ->vmeta_comp
1990 | ldr FARG2, [BASE, RC, lsl #3]
1991 | cmp TISNUMhi, CARG2, lsr #32
1992 | bhi >5
1993 | bne ->vmeta_comp
1994 | // RA number, RC int.
1995 | scvtf FARG2, CARG2w
1996 | b >5
1997 |
1998 |4: // RA int, RC not int
1999 | ldr FARG2, [BASE, RC, lsl #3]
2000 | blo ->vmeta_comp
2001 | // RA int, RC number.
2002 | scvtf FARG1, CARG1w
2003 |
2004 |5: // RA number, RC number
2005 | fcmp FARG1, FARG2
2006 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
2007 if (op == BC_ISLT) {
2008 | csel PC, RB, PC, lo
2009 } else if (op == BC_ISGE) {
2010 | csel PC, RB, PC, hs
2011 } else if (op == BC_ISLE) {
2012 | csel PC, RB, PC, ls
2013 } else {
2014 | csel PC, RB, PC, hi
2015 }
2016 | b <1
2017 break;
2018
2019 case BC_ISEQV: case BC_ISNEV:
2020 vk = op == BC_ISEQV;
2021 | // RA = src1, RC = src2, JMP with RC = target
2022 | ldr CARG1, [BASE, RA, lsl #3]
2023 | add RC, BASE, RC, lsl #3
2024 | ldrh RBw, [PC, #2]
2025 | ldr CARG3, [RC]
2026 | add PC, PC, #4
2027 | add RB, PC, RB, lsl #2
2028 | sub RB, RB, #0x20000
2029 | asr ITYPE, CARG3, #47
2030 | cmn ITYPE, #-LJ_TISNUM
2031 if (vk) {
2032 | bls ->BC_ISEQN_Z
2033 } else {
2034 | bls ->BC_ISNEN_Z
2035 }
2036 | // RC is not a number.
2037 | asr TMP0, CARG1, #47
2038 |.if FFI
2039 | // Check if RC or RA is a cdata.
2040 | cmn ITYPE, #-LJ_TCDATA
2041 | ccmn TMP0, #-LJ_TCDATA, #4, ne
2042 | beq ->vmeta_equal_cd
2043 |.endif
2044 | cmp CARG1, CARG3
2045 | bne >2
2046 | // Tag and value are equal.
2047 if (vk) {
2048 |->BC_ISEQV_Z:
2049 | mov PC, RB // Perform branch.
2050 }
2051 |1:
2052 | ins_next
2053 |
2054 |2: // Check if the tags are the same and it's a table or userdata.
2055 | cmp ITYPE, TMP0
2056 | ccmn ITYPE, #-LJ_TISTABUD, #2, eq
2057 if (vk) {
2058 | bhi <1
2059 } else {
2060 | bhi ->BC_ISEQV_Z // Reuse code from opposite instruction.
2061 }
2062 | // Different tables or userdatas. Need to check __eq metamethod.
2063 | // Field metatable must be at same offset for GCtab and GCudata!
2064 | and TAB:CARG2, CARG1, #LJ_GCVMASK
2065 | ldr TAB:TMP2, TAB:CARG2->metatable
2066 if (vk) {
2067 | cbz TAB:TMP2, <1 // No metatable?
2068 | ldrb TMP1w, TAB:TMP2->nomm
2069 | mov CARG4, #0 // ne = 0
2070 | tbnz TMP1w, #MM_eq, <1 // 'no __eq' flag set: done.
2071 } else {
2072 | cbz TAB:TMP2, ->BC_ISEQV_Z // No metatable?
2073 | ldrb TMP1w, TAB:TMP2->nomm
2074 | mov CARG4, #1 // ne = 1.
2075 | tbnz TMP1w, #MM_eq, ->BC_ISEQV_Z // 'no __eq' flag set: done.
2076 }
2077 | b ->vmeta_equal
2078 break;
2079
2080 case BC_ISEQS: case BC_ISNES:
2081 vk = op == BC_ISEQS;
2082 | // RA = src, RC = str_const (~), JMP with RC = target
2083 | ldr CARG1, [BASE, RA, lsl #3]
2084 | mvn RC, RC
2085 | ldrh RBw, [PC, #2]
2086 | ldr CARG2, [KBASE, RC, lsl #3]
2087 | add PC, PC, #4
2088 | movn TMP0, #~LJ_TSTR
2089 |.if FFI
2090 | asr ITYPE, CARG1, #47
2091 |.endif
2092 | add RB, PC, RB, lsl #2
2093 | add CARG2, CARG2, TMP0, lsl #47
2094 | sub RB, RB, #0x20000
2095 |.if FFI
2096 | cmn ITYPE, #-LJ_TCDATA
2097 | beq ->vmeta_equal_cd
2098 |.endif
2099 | cmp CARG1, CARG2
2100 if (vk) {
2101 | csel PC, RB, PC, eq
2102 } else {
2103 | csel PC, RB, PC, ne
2104 }
2105 | ins_next
2106 break;
2107
2108 case BC_ISEQN: case BC_ISNEN:
2109 vk = op == BC_ISEQN;
2110 | // RA = src, RC = num_const (~), JMP with RC = target
2111 | ldr CARG1, [BASE, RA, lsl #3]
2112 | add RC, KBASE, RC, lsl #3
2113 | ldrh RBw, [PC, #2]
2114 | ldr CARG3, [RC]
2115 | add PC, PC, #4
2116 | add RB, PC, RB, lsl #2
2117 | sub RB, RB, #0x20000
2118 if (vk) {
2119 |->BC_ISEQN_Z:
2120 } else {
2121 |->BC_ISNEN_Z:
2122 }
2123 | checkint CARG1, >4
2124 | checkint CARG3, >6
2125 | cmp CARG1w, CARG3w
2126 |1:
2127 if (vk) {
2128 | csel PC, RB, PC, eq
2129 |2:
2130 } else {
2131 |2:
2132 | csel PC, RB, PC, ne
2133 }
2134 |3:
2135 | ins_next
2136 |
2137 |4: // RA not int.
2138 |.if FFI
2139 | blo >7
2140 |.else
2141 | blo <2
2142 |.endif
2143 | ldr FARG1, [BASE, RA, lsl #3]
2144 | ldr FARG2, [RC]
2145 | cmp TISNUMhi, CARG3, lsr #32
2146 | bne >5
2147 | // RA number, RC int.
2148 | scvtf FARG2, CARG3w
2149 |5:
2150 | // RA number, RC number.
2151 | fcmp FARG1, FARG2
2152 | b <1
2153 |
2154 |6: // RA int, RC number
2155 | ldr FARG2, [RC]
2156 | scvtf FARG1, CARG1w
2157 | fcmp FARG1, FARG2
2158 | b <1
2159 |
2160 |.if FFI
2161 |7:
2162 | asr ITYPE, CARG1, #47
2163 | cmn ITYPE, #-LJ_TCDATA
2164 | bne <2
2165 | b ->vmeta_equal_cd
2166 |.endif
2167 break;
2168
2169 case BC_ISEQP: case BC_ISNEP:
2170 vk = op == BC_ISEQP;
2171 | // RA = src, RC = primitive_type (~), JMP with RC = target
2172 | ldr TMP0, [BASE, RA, lsl #3]
2173 | ldrh RBw, [PC, #2]
2174 | add PC, PC, #4
2175 | add RC, RC, #1
2176 | add RB, PC, RB, lsl #2
2177 |.if FFI
2178 | asr ITYPE, TMP0, #47
2179 | cmn ITYPE, #-LJ_TCDATA
2180 | beq ->vmeta_equal_cd
2181 | cmn RC, ITYPE
2182 |.else
2183 | cmn RC, TMP0, asr #47
2184 |.endif
2185 | sub RB, RB, #0x20000
2186 if (vk) {
2187 | csel PC, RB, PC, eq
2188 } else {
2189 | csel PC, RB, PC, ne
2190 }
2191 | ins_next
2192 break;
2193
2194 /* -- Unary test and copy ops ------------------------------------------- */
2195
2196 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
2197 | // RA = dst or unused, RC = src, JMP with RC = target
2198 | ldrh RBw, [PC, #2]
2199 | ldr TMP0, [BASE, RC, lsl #3]
2200 | add PC, PC, #4
2201 | mov_false TMP1
2202 | add RB, PC, RB, lsl #2
2203 | cmp TMP0, TMP1
2204 | sub RB, RB, #0x20000
2205 if (op == BC_ISTC || op == BC_IST) {
2206 if (op == BC_ISTC) {
2207 | csel RA, RA, RC, lo
2208 }
2209 | csel PC, RB, PC, lo
2210 } else {
2211 if (op == BC_ISFC) {
2212 | csel RA, RA, RC, hs
2213 }
2214 | csel PC, RB, PC, hs
2215 }
2216 if (op == BC_ISTC || op == BC_ISFC) {
2217 | str TMP0, [BASE, RA, lsl #3]
2218 }
2219 | ins_next
2220 break;
2221
2222 case BC_ISTYPE:
2223 | // RA = src, RC = -type
2224 | ldr TMP0, [BASE, RA, lsl #3]
2225 | cmn RC, TMP0, asr #47
2226 | bne ->vmeta_istype
2227 | ins_next
2228 break;
2229 case BC_ISNUM:
2230 | // RA = src, RC = -(TISNUM-1)
2231 | ldr TMP0, [BASE, RA]
2232 | checknum TMP0, ->vmeta_istype
2233 | ins_next
2234 break;
2235
2236 /* -- Unary ops --------------------------------------------------------- */
2237
2238 case BC_MOV:
2239 | // RA = dst, RC = src
2240 | ldr TMP0, [BASE, RC, lsl #3]
2241 | str TMP0, [BASE, RA, lsl #3]
2242 | ins_next
2243 break;
2244 case BC_NOT:
2245 | // RA = dst, RC = src
2246 | ldr TMP0, [BASE, RC, lsl #3]
2247 | mov_false TMP1
2248 | mov_true TMP2
2249 | cmp TMP0, TMP1
2250 | csel TMP0, TMP1, TMP2, lo
2251 | str TMP0, [BASE, RA, lsl #3]
2252 | ins_next
2253 break;
2254 case BC_UNM:
2255 | // RA = dst, RC = src
2256 | ldr TMP0, [BASE, RC, lsl #3]
2257 | asr ITYPE, TMP0, #47
2258 | cmn ITYPE, #-LJ_TISNUM
2259 | bhi ->vmeta_unm
2260 | eor TMP0, TMP0, #U64x(80000000,00000000)
2261 | bne >5
2262 | negs TMP0w, TMP0w
2263 | movz CARG3, #0x41e0, lsl #48 // 2^31.
2264 | add TMP0, TMP0, TISNUM
2265 | csel TMP0, TMP0, CARG3, vc
2266 |5:
2267 | str TMP0, [BASE, RA, lsl #3]
2268 | ins_next
2269 break;
2270 case BC_LEN:
2271 | // RA = dst, RC = src
2272 | ldr CARG1, [BASE, RC, lsl #3]
2273 | asr ITYPE, CARG1, #47
2274 | cmn ITYPE, #-LJ_TSTR
2275 | and CARG1, CARG1, #LJ_GCVMASK
2276 | bne >2
2277 | ldr CARG1w, STR:CARG1->len
2278 |1:
2279 | add CARG1, CARG1, TISNUM
2280 | str CARG1, [BASE, RA, lsl #3]
2281 | ins_next
2282 |
2283 |2:
2284 | cmn ITYPE, #-LJ_TTAB
2285 | bne ->vmeta_len
2286#if LJ_52
2287 | ldr TAB:CARG2, TAB:CARG1->metatable
2288 | cbnz TAB:CARG2, >9
2289 |3:
2290#endif
2291 |->BC_LEN_Z:
2292 | bl extern lj_tab_len // (GCtab *t)
2293 | // Returns uint32_t (but less than 2^31).
2294 | b <1
2295 |
2296#if LJ_52
2297 |9:
2298 | ldrb TMP1w, TAB:CARG2->nomm
2299 | tbnz TMP1w, #MM_len, <3 // 'no __len' flag set: done.
2300 | b ->vmeta_len
2301#endif
2302 break;
2303
2304 /* -- Binary ops -------------------------------------------------------- */
2305
2306 |.macro ins_arithcheck_int, target
2307 | checkint CARG1, target
2308 | checkint CARG2, target
2309 |.endmacro
2310 |
2311 |.macro ins_arithcheck_num, target
2312 | checknum CARG1, target
2313 | checknum CARG2, target
2314 |.endmacro
2315 |
2316 |.macro ins_arithcheck_nzdiv, target
2317 | cbz CARG2w, target
2318 |.endmacro
2319 |
2320 |.macro ins_arithhead
2321 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
2322 ||if (vk == 1) {
2323 | and RC, RC, #255
2324 | decode_RB RB, INS
2325 ||} else {
2326 | decode_RB RB, INS
2327 | and RC, RC, #255
2328 ||}
2329 |.endmacro
2330 |
2331 |.macro ins_arithload, reg1, reg2
2332 | // RA = dst, RB = src1, RC = src2 | num_const
2333 ||switch (vk) {
2334 ||case 0:
2335 | ldr reg1, [BASE, RB, lsl #3]
2336 | ldr reg2, [KBASE, RC, lsl #3]
2337 || break;
2338 ||case 1:
2339 | ldr reg1, [KBASE, RC, lsl #3]
2340 | ldr reg2, [BASE, RB, lsl #3]
2341 || break;
2342 ||default:
2343 | ldr reg1, [BASE, RB, lsl #3]
2344 | ldr reg2, [BASE, RC, lsl #3]
2345 || break;
2346 ||}
2347 |.endmacro
2348 |
2349 |.macro ins_arithfallback, ins
2350 ||switch (vk) {
2351 ||case 0:
2352 | ins ->vmeta_arith_vn
2353 || break;
2354 ||case 1:
2355 | ins ->vmeta_arith_nv
2356 || break;
2357 ||default:
2358 | ins ->vmeta_arith_vv
2359 || break;
2360 ||}
2361 |.endmacro
2362 |
2363 |.macro ins_arithmod, res, reg1, reg2
2364 | fdiv d2, reg1, reg2
2365 | frintm d2, d2
2366 | fmsub res, d2, reg2, reg1
2367 |.endmacro
2368 |
2369 |.macro ins_arithdn, intins, fpins
2370 | ins_arithhead
2371 | ins_arithload CARG1, CARG2
2372 | ins_arithcheck_int >5
2373 |.if "intins" == "smull"
2374 | smull CARG1, CARG1w, CARG2w
2375 | cmp CARG1, CARG1, sxtw
2376 | mov CARG1w, CARG1w
2377 | ins_arithfallback bne
2378 |.elif "intins" == "ins_arithmodi"
2379 | ins_arithfallback ins_arithcheck_nzdiv
2380 | bl ->vm_modi
2381 |.else
2382 | intins CARG1w, CARG1w, CARG2w
2383 | ins_arithfallback bvs
2384 |.endif
2385 | add CARG1, CARG1, TISNUM
2386 | str CARG1, [BASE, RA, lsl #3]
2387 |4:
2388 | ins_next
2389 |
2390 |5: // FP variant.
2391 | ins_arithload FARG1, FARG2
2392 | ins_arithfallback ins_arithcheck_num
2393 | fpins FARG1, FARG1, FARG2
2394 | str FARG1, [BASE, RA, lsl #3]
2395 | b <4
2396 |.endmacro
2397 |
2398 |.macro ins_arithfp, fpins
2399 | ins_arithhead
2400 | ins_arithload CARG1, CARG2
2401 | ins_arithload FARG1, FARG2
2402 | ins_arithfallback ins_arithcheck_num
2403 |.if "fpins" == "fpow"
2404 | bl extern pow
2405 |.else
2406 | fpins FARG1, FARG1, FARG2
2407 |.endif
2408 | str FARG1, [BASE, RA, lsl #3]
2409 | ins_next
2410 |.endmacro
2411
2412 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
2413 | ins_arithdn adds, fadd
2414 break;
2415 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
2416 | ins_arithdn subs, fsub
2417 break;
2418 case BC_MULVN: case BC_MULNV: case BC_MULVV:
2419 | ins_arithdn smull, fmul
2420 break;
2421 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
2422 | ins_arithfp fdiv
2423 break;
2424 case BC_MODVN: case BC_MODNV: case BC_MODVV:
2425 | ins_arithdn ins_arithmodi, ins_arithmod
2426 break;
2427 case BC_POW:
2428 | // NYI: (partial) integer arithmetic.
2429 | ins_arithfp fpow
2430 break;
2431
2432 case BC_CAT:
2433 | decode_RB RB, INS
2434 | and RC, RC, #255
2435 | // RA = dst, RB = src_start, RC = src_end
2436 | str BASE, L->base
2437 | sub CARG3, RC, RB
2438 | add CARG2, BASE, RC, lsl #3
2439 |->BC_CAT_Z:
2440 | // RA = dst, CARG2 = top-1, CARG3 = left
2441 | mov CARG1, L
2442 | str PC, SAVE_PC
2443 | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left)
2444 | // Returns NULL (finished) or TValue * (metamethod).
2445 | ldrb RBw, [PC, #-1]
2446 | ldr BASE, L->base
2447 | cbnz CRET1, ->vmeta_binop
2448 | ldr TMP0, [BASE, RB, lsl #3]
2449 | str TMP0, [BASE, RA, lsl #3] // Copy result to RA.
2450 | ins_next
2451 break;
2452
2453 /* -- Constant ops ------------------------------------------------------ */
2454
2455 case BC_KSTR:
2456 | // RA = dst, RC = str_const (~)
2457 | mvn RC, RC
2458 | ldr TMP0, [KBASE, RC, lsl #3]
2459 | movn TMP1, #~LJ_TSTR
2460 | add TMP0, TMP0, TMP1, lsl #47
2461 | str TMP0, [BASE, RA, lsl #3]
2462 | ins_next
2463 break;
2464 case BC_KCDATA:
2465 |.if FFI
2466 | // RA = dst, RC = cdata_const (~)
2467 | mvn RC, RC
2468 | ldr TMP0, [KBASE, RC, lsl #3]
2469 | movn TMP1, #~LJ_TCDATA
2470 | add TMP0, TMP0, TMP1, lsl #47
2471 | str TMP0, [BASE, RA, lsl #3]
2472 | ins_next
2473 |.endif
2474 break;
2475 case BC_KSHORT:
2476 | // RA = dst, RC = int16_literal
2477 | sxth RCw, RCw
2478 | add TMP0, RC, TISNUM
2479 | str TMP0, [BASE, RA, lsl #3]
2480 | ins_next
2481 break;
2482 case BC_KNUM:
2483 | // RA = dst, RC = num_const
2484 | ldr TMP0, [KBASE, RC, lsl #3]
2485 | str TMP0, [BASE, RA, lsl #3]
2486 | ins_next
2487 break;
2488 case BC_KPRI:
2489 | // RA = dst, RC = primitive_type (~)
2490 | mvn TMP0, RC, lsl #47
2491 | str TMP0, [BASE, RA, lsl #3]
2492 | ins_next
2493 break;
2494 case BC_KNIL:
2495 | // RA = base, RC = end
2496 | add RA, BASE, RA, lsl #3
2497 | add RC, BASE, RC, lsl #3
2498 | str TISNIL, [RA], #8
2499 |1:
2500 | cmp RA, RC
2501 | str TISNIL, [RA], #8
2502 | blt <1
2503 | ins_next_
2504 break;
2505
2506 /* -- Upvalue and function ops ------------------------------------------ */
2507
2508 case BC_UGET:
2509 | // RA = dst, RC = uvnum
2510 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
2511 | add RC, RC, #offsetof(GCfuncL, uvptr)/8
2512 | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
2513 | ldr UPVAL:CARG2, [LFUNC:CARG2, RC, lsl #3]
2514 | ldr CARG2, UPVAL:CARG2->v
2515 | ldr TMP0, [CARG2]
2516 | str TMP0, [BASE, RA, lsl #3]
2517 | ins_next
2518 break;
2519 case BC_USETV:
2520 | // RA = uvnum, RC = src
2521 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
2522 | add RA, RA, #offsetof(GCfuncL, uvptr)/8
2523 | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
2524 | ldr UPVAL:CARG1, [LFUNC:CARG2, RA, lsl #3]
2525 | ldr CARG3, [BASE, RC, lsl #3]
2526 | ldr CARG2, UPVAL:CARG1->v
2527 | ldrb TMP2w, UPVAL:CARG1->marked
2528 | ldrb TMP0w, UPVAL:CARG1->closed
2529 | asr ITYPE, CARG3, #47
2530 | str CARG3, [CARG2]
2531 | add ITYPE, ITYPE, #-LJ_TISGCV
2532 | tst TMP2w, #LJ_GC_BLACK // isblack(uv)
2533 | ccmp TMP0w, #0, #4, ne // && uv->closed
2534 | ccmn ITYPE, #-(LJ_TNUMX - LJ_TISGCV), #0, ne // && tvisgcv(v)
2535 | bhi >2
2536 |1:
2537 | ins_next
2538 |
2539 |2: // Check if new value is white.
2540 | and GCOBJ:CARG3, CARG3, #LJ_GCVMASK
2541 | ldrb TMP1w, GCOBJ:CARG3->gch.marked
2542 | tst TMP1w, #LJ_GC_WHITES // iswhite(str)
2543 | beq <1
2544 | // Crossed a write barrier. Move the barrier forward.
2545 | mov CARG1, GL
2546 | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
2547 | b <1
2548 break;
2549 case BC_USETS:
2550 | // RA = uvnum, RC = str_const (~)
2551 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
2552 | add RA, RA, #offsetof(GCfuncL, uvptr)/8
2553 | mvn RC, RC
2554 | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
2555 | ldr UPVAL:CARG1, [LFUNC:CARG2, RA, lsl #3]
2556 | ldr STR:CARG3, [KBASE, RC, lsl #3]
2557 | movn TMP0, #~LJ_TSTR
2558 | ldr CARG2, UPVAL:CARG1->v
2559 | ldrb TMP2w, UPVAL:CARG1->marked
2560 | add TMP0, STR:CARG3, TMP0, lsl #47
2561 | ldrb TMP1w, STR:CARG3->marked
2562 | str TMP0, [CARG2]
2563 | tbnz TMP2w, #2, >2 // isblack(uv)
2564 |1:
2565 | ins_next
2566 |
2567 |2: // Check if string is white and ensure upvalue is closed.
2568 | ldrb TMP0w, UPVAL:CARG1->closed
2569 | tst TMP1w, #LJ_GC_WHITES // iswhite(str)
2570 | ccmp TMP0w, #0, #0, ne
2571 | beq <1
2572 | // Crossed a write barrier. Move the barrier forward.
2573 | mov CARG1, GL
2574 | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
2575 | b <1
2576 break;
2577 case BC_USETN:
2578 | // RA = uvnum, RC = num_const
2579 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
2580 | add RA, RA, #offsetof(GCfuncL, uvptr)/8
2581 | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
2582 | ldr UPVAL:CARG2, [LFUNC:CARG2, RA, lsl #3]
2583 | ldr TMP0, [KBASE, RC, lsl #3]
2584 | ldr CARG2, UPVAL:CARG2->v
2585 | str TMP0, [CARG2]
2586 | ins_next
2587 break;
2588 case BC_USETP:
2589 | // RA = uvnum, RC = primitive_type (~)
2590 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
2591 | add RA, RA, #offsetof(GCfuncL, uvptr)/8
2592 | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
2593 | ldr UPVAL:CARG2, [LFUNC:CARG2, RA, lsl #3]
2594 | mvn TMP0, RC, lsl #47
2595 | ldr CARG2, UPVAL:CARG2->v
2596 | str TMP0, [CARG2]
2597 | ins_next
2598 break;
2599
2600 case BC_UCLO:
2601 | // RA = level, RC = target
2602 | ldr CARG3, L->openupval
2603 | add RC, PC, RC, lsl #2
2604 | str BASE, L->base
2605 | sub PC, RC, #0x20000
2606 | cbz CARG3, >1
2607 | mov CARG1, L
2608 | add CARG2, BASE, RA, lsl #3
2609 | bl extern lj_func_closeuv // (lua_State *L, TValue *level)
2610 | ldr BASE, L->base
2611 |1:
2612 | ins_next
2613 break;
2614
2615 case BC_FNEW:
2616 | // RA = dst, RC = proto_const (~) (holding function prototype)
2617 | mvn RC, RC
2618 | str BASE, L->base
2619 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
2620 | str PC, SAVE_PC
2621 | ldr CARG2, [KBASE, RC, lsl #3]
2622 | mov CARG1, L
2623 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
2624 | // (lua_State *L, GCproto *pt, GCfuncL *parent)
2625 | bl extern lj_func_newL_gc
2626 | // Returns GCfuncL *.
2627 | ldr BASE, L->base
2628 | movn TMP0, #~LJ_TFUNC
2629 | add CRET1, CRET1, TMP0, lsl #47
2630 | str CRET1, [BASE, RA, lsl #3]
2631 | ins_next
2632 break;
2633
2634 /* -- Table ops --------------------------------------------------------- */
2635
2636 case BC_TNEW:
2637 case BC_TDUP:
2638 | // RA = dst, RC = (hbits|asize) | tab_const (~)
2639 | ldp CARG3, CARG4, GL->gc.total // Assumes threshold follows total.
2640 | str BASE, L->base
2641 | str PC, SAVE_PC
2642 | mov CARG1, L
2643 | cmp CARG3, CARG4
2644 | bhs >5
2645 |1:
2646 if (op == BC_TNEW) {
2647 | and CARG2, RC, #0x7ff
2648 | lsr CARG3, RC, #11
2649 | cmp CARG2, #0x7ff
2650 | mov TMP0, #0x801
2651 | csel CARG2, CARG2, TMP0, ne
2652 | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
2653 | // Returns GCtab *.
2654 } else {
2655 | mvn RC, RC
2656 | ldr CARG2, [KBASE, RC, lsl #3]
2657 | bl extern lj_tab_dup // (lua_State *L, Table *kt)
2658 | // Returns GCtab *.
2659 }
2660 | ldr BASE, L->base
2661 | movk CRET1, #(LJ_TTAB>>1)&0xffff, lsl #48
2662 | str CRET1, [BASE, RA, lsl #3]
2663 | ins_next
2664 |
2665 |5:
2666 | bl extern lj_gc_step_fixtop // (lua_State *L)
2667 | mov CARG1, L
2668 | b <1
2669 break;
2670
2671 case BC_GGET:
2672 | // RA = dst, RC = str_const (~)
2673 case BC_GSET:
2674 | // RA = dst, RC = str_const (~)
2675 | ldr LFUNC:CARG1, [BASE, FRAME_FUNC]
2676 | mvn RC, RC
2677 | and LFUNC:CARG1, CARG1, #LJ_GCVMASK
2678 | ldr TAB:CARG2, LFUNC:CARG1->env
2679 | ldr STR:RC, [KBASE, RC, lsl #3]
2680 if (op == BC_GGET) {
2681 | b ->BC_TGETS_Z
2682 } else {
2683 | b ->BC_TSETS_Z
2684 }
2685 break;
2686
2687 case BC_TGETV:
2688 | decode_RB RB, INS
2689 | and RC, RC, #255
2690 | // RA = dst, RB = table, RC = key
2691 | ldr CARG2, [BASE, RB, lsl #3]
2692 | ldr TMP1, [BASE, RC, lsl #3]
2693 | checktab CARG2, ->vmeta_tgetv
2694 | checkint TMP1, >9 // Integer key?
2695 | ldr CARG3, TAB:CARG2->array
2696 | ldr CARG1w, TAB:CARG2->asize
2697 | add CARG3, CARG3, TMP1, uxtw #3
2698 | cmp TMP1w, CARG1w // In array part?
2699 | bhs ->vmeta_tgetv
2700 | ldr TMP0, [CARG3]
2701 | cmp TMP0, TISNIL
2702 | beq >5
2703 |1:
2704 | str TMP0, [BASE, RA, lsl #3]
2705 | ins_next
2706 |
2707 |5: // Check for __index if table value is nil.
2708 | ldr TAB:CARG1, TAB:CARG2->metatable
2709 | cbz TAB:CARG1, <1 // No metatable: done.
2710 | ldrb TMP1w, TAB:CARG1->nomm
2711 | tbnz TMP1w, #MM_index, <1 // 'no __index' flag set: done.
2712 | b ->vmeta_tgetv
2713 |
2714 |9:
2715 | asr ITYPE, TMP1, #47
2716 | cmn ITYPE, #-LJ_TSTR // String key?
2717 | bne ->vmeta_tgetv
2718 | and STR:RC, TMP1, #LJ_GCVMASK
2719 | b ->BC_TGETS_Z
2720 break;
2721 case BC_TGETS:
2722 | decode_RB RB, INS
2723 | and RC, RC, #255
2724 | // RA = dst, RB = table, RC = str_const (~)
2725 | ldr CARG2, [BASE, RB, lsl #3]
2726 | mvn RC, RC
2727 | ldr STR:RC, [KBASE, RC, lsl #3]
2728 | checktab CARG2, ->vmeta_tgets1
2729 |->BC_TGETS_Z:
2730 | // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = dst
2731 | ldr TMP1w, TAB:CARG2->hmask
2732 | ldr TMP2w, STR:RC->hash
2733 | ldr NODE:CARG3, TAB:CARG2->node
2734 | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask
2735 | add TMP1, TMP1, TMP1, lsl #1
2736 | movn CARG4, #~LJ_TSTR
2737 | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8
2738 | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for.
2739 |1:
2740 | ldp TMP0, CARG1, NODE:CARG3->val
2741 | ldr NODE:CARG3, NODE:CARG3->next
2742 | cmp CARG1, CARG4
2743 | bne >4
2744 | cmp TMP0, TISNIL
2745 | beq >5
2746 |3:
2747 | str TMP0, [BASE, RA, lsl #3]
2748 | ins_next
2749 |
2750 |4: // Follow hash chain.
2751 | cbnz NODE:CARG3, <1
2752 | // End of hash chain: key not found, nil result.
2753 | mov TMP0, TISNIL
2754 |
2755 |5: // Check for __index if table value is nil.
2756 | ldr TAB:CARG1, TAB:CARG2->metatable
2757 | cbz TAB:CARG1, <3 // No metatable: done.
2758 | ldrb TMP1w, TAB:CARG1->nomm
2759 | tbnz TMP1w, #MM_index, <3 // 'no __index' flag set: done.
2760 | b ->vmeta_tgets
2761 break;
2762 case BC_TGETB:
2763 | decode_RB RB, INS
2764 | and RC, RC, #255
2765 | // RA = dst, RB = table, RC = index
2766 | ldr CARG2, [BASE, RB, lsl #3]
2767 | checktab CARG2, ->vmeta_tgetb
2768 | ldr CARG3, TAB:CARG2->array
2769 | ldr CARG1w, TAB:CARG2->asize
2770 | add CARG3, CARG3, RC, lsl #3
2771 | cmp RCw, CARG1w // In array part?
2772 | bhs ->vmeta_tgetb
2773 | ldr TMP0, [CARG3]
2774 | cmp TMP0, TISNIL
2775 | beq >5
2776 |1:
2777 | str TMP0, [BASE, RA, lsl #3]
2778 | ins_next
2779 |
2780 |5: // Check for __index if table value is nil.
2781 | ldr TAB:CARG1, TAB:CARG2->metatable
2782 | cbz TAB:CARG1, <1 // No metatable: done.
2783 | ldrb TMP1w, TAB:CARG1->nomm
2784 | tbnz TMP1w, #MM_index, <1 // 'no __index' flag set: done.
2785 | b ->vmeta_tgetb
2786 break;
2787 case BC_TGETR:
2788 | decode_RB RB, INS
2789 | and RC, RC, #255
2790 | // RA = dst, RB = table, RC = key
2791 | ldr CARG1, [BASE, RB, lsl #3]
2792 | ldr TMP1, [BASE, RC, lsl #3]
2793 | and TAB:CARG1, CARG1, #LJ_GCVMASK
2794 | ldr CARG3, TAB:CARG1->array
2795 | ldr TMP2w, TAB:CARG1->asize
2796 | add CARG3, CARG3, TMP1w, uxtw #3
2797 | cmp TMP1w, TMP2w // In array part?
2798 | bhs ->vmeta_tgetr
2799 | ldr TMP0, [CARG3]
2800 |->BC_TGETR_Z:
2801 | str TMP0, [BASE, RA, lsl #3]
2802 | ins_next
2803 break;
2804
2805 case BC_TSETV:
2806 | decode_RB RB, INS
2807 | and RC, RC, #255
2808 | // RA = src, RB = table, RC = key
2809 | ldr CARG2, [BASE, RB, lsl #3]
2810 | ldr TMP1, [BASE, RC, lsl #3]
2811 | checktab CARG2, ->vmeta_tsetv
2812 | checkint TMP1, >9 // Integer key?
2813 | ldr CARG3, TAB:CARG2->array
2814 | ldr CARG1w, TAB:CARG2->asize
2815 | add CARG3, CARG3, TMP1, uxtw #3
2816 | cmp TMP1w, CARG1w // In array part?
2817 | bhs ->vmeta_tsetv
2818 | ldr TMP1, [CARG3]
2819 | ldr TMP0, [BASE, RA, lsl #3]
2820 | ldrb TMP2w, TAB:CARG2->marked
2821 | cmp TMP1, TISNIL // Previous value is nil?
2822 | beq >5
2823 |1:
2824 | str TMP0, [CARG3]
2825 | tbnz TMP2w, #2, >7 // isblack(table)
2826 |2:
2827 | ins_next
2828 |
2829 |5: // Check for __newindex if previous value is nil.
2830 | ldr TAB:CARG1, TAB:CARG2->metatable
2831 | cbz TAB:CARG1, <1 // No metatable: done.
2832 | ldrb TMP1w, TAB:CARG1->nomm
2833 | tbnz TMP1w, #MM_newindex, <1 // 'no __newindex' flag set: done.
2834 | b ->vmeta_tsetv
2835 |
2836 |7: // Possible table write barrier for the value. Skip valiswhite check.
2837 | barrierback TAB:CARG2, TMP2w, TMP1
2838 | b <2
2839 |
2840 |9:
2841 | asr ITYPE, TMP1, #47
2842 | cmn ITYPE, #-LJ_TSTR // String key?
2843 | bne ->vmeta_tsetv
2844 | and STR:RC, TMP1, #LJ_GCVMASK
2845 | b ->BC_TSETS_Z
2846 break;
2847 case BC_TSETS:
2848 | decode_RB RB, INS
2849 | and RC, RC, #255
2850 | // RA = dst, RB = table, RC = str_const (~)
2851 | ldr CARG2, [BASE, RB, lsl #3]
2852 | mvn RC, RC
2853 | ldr STR:RC, [KBASE, RC, lsl #3]
2854 | checktab CARG2, ->vmeta_tsets1
2855 |->BC_TSETS_Z:
2856 | // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = src
2857 | ldr TMP1w, TAB:CARG2->hmask
2858 | ldr TMP2w, STR:RC->hash
2859 | ldr NODE:CARG3, TAB:CARG2->node
2860 | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask
2861 | add TMP1, TMP1, TMP1, lsl #1
2862 | movn CARG4, #~LJ_TSTR
2863 | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8
2864 | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for.
2865 | strb wzr, TAB:CARG2->nomm // Clear metamethod cache.
2866 |1:
2867 | ldp TMP1, CARG1, NODE:CARG3->val
2868 | ldr NODE:TMP3, NODE:CARG3->next
2869 | ldrb TMP2w, TAB:CARG2->marked
2870 | cmp CARG1, CARG4
2871 | bne >5
2872 | ldr TMP0, [BASE, RA, lsl #3]
2873 | cmp TMP1, TISNIL // Previous value is nil?
2874 | beq >4
2875 |2:
2876 | str TMP0, NODE:CARG3->val
2877 | tbnz TMP2w, #2, >7 // isblack(table)
2878 |3:
2879 | ins_next
2880 |
2881 |4: // Check for __newindex if previous value is nil.
2882 | ldr TAB:CARG1, TAB:CARG2->metatable
2883 | cbz TAB:CARG1, <2 // No metatable: done.
2884 | ldrb TMP1w, TAB:CARG1->nomm
2885 | tbnz TMP1w, #MM_newindex, <2 // 'no __newindex' flag set: done.
2886 | b ->vmeta_tsets
2887 |
2888 |5: // Follow hash chain.
2889 | mov NODE:CARG3, NODE:TMP3
2890 | cbnz NODE:TMP3, <1
2891 | // End of hash chain: key not found, add a new one.
2892 |
2893 | // But check for __newindex first.
2894 | ldr TAB:CARG1, TAB:CARG2->metatable
2895 | cbz TAB:CARG1, >6 // No metatable: continue.
2896 | ldrb TMP1w, TAB:CARG1->nomm
2897 | // 'no __newindex' flag NOT set: check.
2898 | tbz TMP1w, #MM_newindex, ->vmeta_tsets
2899 |6:
2900 | movn TMP1, #~LJ_TSTR
2901 | str PC, SAVE_PC
2902 | add TMP0, STR:RC, TMP1, lsl #47
2903 | str BASE, L->base
2904 | mov CARG1, L
2905 | str TMP0, TMPD
2906 | add CARG3, sp, TMPDofs
2907 | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
2908 | // Returns TValue *.
2909 | ldr BASE, L->base
2910 | ldr TMP0, [BASE, RA, lsl #3]
2911 | str TMP0, [CRET1]
2912 | b <3 // No 2nd write barrier needed.
2913 |
2914 |7: // Possible table write barrier for the value. Skip valiswhite check.
2915 | barrierback TAB:CARG2, TMP2w, TMP1
2916 | b <3
2917 break;
2918 case BC_TSETB:
2919 | decode_RB RB, INS
2920 | and RC, RC, #255
2921 | // RA = src, RB = table, RC = index
2922 | ldr CARG2, [BASE, RB, lsl #3]
2923 | checktab CARG2, ->vmeta_tsetb
2924 | ldr CARG3, TAB:CARG2->array
2925 | ldr CARG1w, TAB:CARG2->asize
2926 | add CARG3, CARG3, RC, lsl #3
2927 | cmp RCw, CARG1w // In array part?
2928 | bhs ->vmeta_tsetb
2929 | ldr TMP1, [CARG3]
2930 | ldr TMP0, [BASE, RA, lsl #3]
2931 | ldrb TMP2w, TAB:CARG2->marked
2932 | cmp TMP1, TISNIL // Previous value is nil?
2933 | beq >5
2934 |1:
2935 | str TMP0, [CARG3]
2936 | tbnz TMP2w, #2, >7 // isblack(table)
2937 |2:
2938 | ins_next
2939 |
2940 |5: // Check for __newindex if previous value is nil.
2941 | ldr TAB:CARG1, TAB:CARG2->metatable
2942 | cbz TAB:CARG1, <1 // No metatable: done.
2943 | ldrb TMP1w, TAB:CARG1->nomm
2944 | tbnz TMP1w, #MM_newindex, <1 // 'no __newindex' flag set: done.
2945 | b ->vmeta_tsetb
2946 |
2947 |7: // Possible table write barrier for the value. Skip valiswhite check.
2948 | barrierback TAB:CARG2, TMP2w, TMP1
2949 | b <2
2950 break;
2951 case BC_TSETR:
2952 | decode_RB RB, INS
2953 | and RC, RC, #255
2954 | // RA = src, RB = table, RC = key
2955 | ldr CARG2, [BASE, RB, lsl #3]
2956 | ldr TMP1, [BASE, RC, lsl #3]
2957 | and TAB:CARG2, CARG2, #LJ_GCVMASK
2958 | ldr CARG1, TAB:CARG2->array
2959 | ldrb TMP2w, TAB:CARG2->marked
2960 | ldr CARG4w, TAB:CARG2->asize
2961 | add CARG1, CARG1, TMP1, uxtw #3
2962 | tbnz TMP2w, #2, >7 // isblack(table)
2963 |2:
2964 | cmp TMP1w, CARG4w // In array part?
2965 | bhs ->vmeta_tsetr
2966 |->BC_TSETR_Z:
2967 | ldr TMP0, [BASE, RA, lsl #3]
2968 | str TMP0, [CARG1]
2969 | ins_next
2970 |
2971 |7: // Possible table write barrier for the value. Skip valiswhite check.
2972 | barrierback TAB:CARG2, TMP2w, TMP0
2973 | b <2
2974 break;
2975
2976 case BC_TSETM:
2977 | // RA = base (table at base-1), RC = num_const (start index)
2978 | add RA, BASE, RA, lsl #3
2979 |1:
2980 | ldr RBw, SAVE_MULTRES
2981 | ldr TAB:CARG2, [RA, #-8] // Guaranteed to be a table.
2982 | ldr TMP1, [KBASE, RC, lsl #3] // Integer constant is in lo-word.
2983 | sub RB, RB, #8
2984 | cbz RB, >4 // Nothing to copy?
2985 | and TAB:CARG2, CARG2, #LJ_GCVMASK
2986 | ldr CARG1w, TAB:CARG2->asize
2987 | add CARG3w, TMP1w, RBw, lsr #3
2988 | ldr CARG4, TAB:CARG2->array
2989 | cmp CARG3, CARG1
2990 | add RB, RA, RB
2991 | bhi >5
2992 | add TMP1, CARG4, TMP1w, uxtw #3
2993 | ldrb TMP2w, TAB:CARG2->marked
2994 |3: // Copy result slots to table.
2995 | ldr TMP0, [RA], #8
2996 | str TMP0, [TMP1], #8
2997 | cmp RA, RB
2998 | blo <3
2999 | tbnz TMP2w, #2, >7 // isblack(table)
3000 |4:
3001 | ins_next
3002 |
3003 |5: // Need to resize array part.
3004 | str BASE, L->base
3005 | mov CARG1, L
3006 | str PC, SAVE_PC
3007 | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
3008 | // Must not reallocate the stack.
3009 | b <1
3010 |
3011 |7: // Possible table write barrier for any value. Skip valiswhite check.
3012 | barrierback TAB:CARG2, TMP2w, TMP1
3013 | b <4
3014 break;
3015
3016 /* -- Calls and vararg handling ----------------------------------------- */
3017
3018 case BC_CALLM:
3019 | // RA = base, (RB = nresults+1,) RC = extra_nargs
3020 | ldr TMP0w, SAVE_MULTRES
3021 | decode_RC8RD NARGS8:RC, RC
3022 | add NARGS8:RC, NARGS8:RC, TMP0
3023 | b ->BC_CALL_Z
3024 break;
3025 case BC_CALL:
3026 | decode_RC8RD NARGS8:RC, RC
3027 | // RA = base, (RB = nresults+1,) RC = (nargs+1)*8
3028 |->BC_CALL_Z:
3029 | mov RB, BASE // Save old BASE for vmeta_call.
3030 | add BASE, BASE, RA, lsl #3
3031 | ldr CARG3, [BASE]
3032 | sub NARGS8:RC, NARGS8:RC, #8
3033 | add BASE, BASE, #16
3034 | checkfunc CARG3, ->vmeta_call
3035 | ins_call
3036 break;
3037
3038 case BC_CALLMT:
3039 | // RA = base, (RB = 0,) RC = extra_nargs
3040 | ldr TMP0w, SAVE_MULTRES
3041 | add NARGS8:RC, TMP0, RC, lsl #3
3042 | b ->BC_CALLT1_Z
3043 break;
3044 case BC_CALLT:
3045 | lsl NARGS8:RC, RC, #3
3046 | // RA = base, (RB = 0,) RC = (nargs+1)*8
3047 |->BC_CALLT1_Z:
3048 | add RA, BASE, RA, lsl #3
3049 | ldr TMP1, [RA]
3050 | sub NARGS8:RC, NARGS8:RC, #8
3051 | add RA, RA, #16
3052 | checktp CARG3, TMP1, LJ_TFUNC, ->vmeta_callt
3053 | ldr PC, [BASE, FRAME_PC]
3054 |->BC_CALLT2_Z:
3055 | mov RB, #0
3056 | ldrb TMP2w, LFUNC:CARG3->ffid
3057 | tst PC, #FRAME_TYPE
3058 | bne >7
3059 |1:
3060 | str TMP1, [BASE, FRAME_FUNC] // Copy function down, but keep PC.
3061 | cbz NARGS8:RC, >3
3062 |2:
3063 | ldr TMP0, [RA, RB]
3064 | add TMP1, RB, #8
3065 | cmp TMP1, NARGS8:RC
3066 | str TMP0, [BASE, RB]
3067 | mov RB, TMP1
3068 | bne <2
3069 |3:
3070 | cmp TMP2, #1 // (> FF_C) Calling a fast function?
3071 | bhi >5
3072 |4:
3073 | ins_callt
3074 |
3075 |5: // Tailcall to a fast function with a Lua frame below.
3076 | ldrb RAw, [PC, #-3]
3077 | sub CARG1, BASE, RA, lsl #3
3078 | ldr LFUNC:CARG1, [CARG1, #-32]
3079 | and LFUNC:CARG1, CARG1, #LJ_GCVMASK
3080 | ldr CARG1, LFUNC:CARG1->pc
3081 | ldr KBASE, [CARG1, #PC2PROTO(k)]
3082 | b <4
3083 |
3084 |7: // Tailcall from a vararg function.
3085 | eor PC, PC, #FRAME_VARG
3086 | tst PC, #FRAME_TYPEP // Vararg frame below?
3087 | csel TMP2, RB, TMP2, ne // Clear ffid if no Lua function below.
3088 | bne <1
3089 | sub BASE, BASE, PC
3090 | ldr PC, [BASE, FRAME_PC]
3091 | tst PC, #FRAME_TYPE
3092 | csel TMP2, RB, TMP2, ne // Clear ffid if no Lua function below.
3093 | b <1
3094 break;
3095
3096 case BC_ITERC:
3097 | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
3098 | add RA, BASE, RA, lsl #3
3099 | ldr CARG3, [RA, #-24]
3100 | mov RB, BASE // Save old BASE for vmeta_call.
3101 | ldp CARG1, CARG2, [RA, #-16]
3102 | add BASE, RA, #16
3103 | mov NARGS8:RC, #16 // Iterators get 2 arguments.
3104 | str CARG3, [RA] // Copy callable.
3105 | stp CARG1, CARG2, [RA, #16] // Copy state and control var.
3106 | checkfunc CARG3, ->vmeta_call
3107 | ins_call
3108 break;
3109
3110 case BC_ITERN:
3111 | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
3112 |.if JIT
3113 | // NYI: add hotloop, record BC_ITERN.
3114 |.endif
3115 | add RA, BASE, RA, lsl #3
3116 | ldr TAB:RB, [RA, #-16]
3117 | ldrh TMP3w, [PC, #2]
3118 | ldr CARG1w, [RA, #-8] // Get index from control var.
3119 | add PC, PC, #4
3120 | add TMP3, PC, TMP3, lsl #2
3121 | and TAB:RB, RB, #LJ_GCVMASK
3122 | sub TMP3, TMP3, #0x20000
3123 | ldr TMP1w, TAB:RB->asize
3124 | ldr CARG2, TAB:RB->array
3125 |1: // Traverse array part.
3126 | subs RC, CARG1, TMP1
3127 | add CARG3, CARG2, CARG1, lsl #3
3128 | bhs >5 // Index points after array part?
3129 | ldr TMP0, [CARG3]
3130 | cmp TMP0, TISNIL
3131 | cinc CARG1, CARG1, eq // Skip holes in array part.
3132 | beq <1
3133 | add CARG1, CARG1, TISNUM
3134 | stp CARG1, TMP0, [RA]
3135 | add CARG1, CARG1, #1
3136 |3:
3137 | str CARG1w, [RA, #-8] // Update control var.
3138 | mov PC, TMP3
3139 |4:
3140 | ins_next
3141 |
3142 |5: // Traverse hash part.
3143 | ldr TMP2w, TAB:RB->hmask
3144 | ldr NODE:RB, TAB:RB->node
3145 |6:
3146 | add CARG1, RC, RC, lsl #1
3147 | cmp RC, TMP2 // End of iteration? Branch to ITERN+1.
3148 | add NODE:CARG3, NODE:RB, CARG1, lsl #3 // node = tab->node + idx*3*8
3149 | bhi <4
3150 | ldp TMP0, CARG1, NODE:CARG3->val
3151 | cmp TMP0, TISNIL
3152 | add RC, RC, #1
3153 | beq <6 // Skip holes in hash part.
3154 | stp CARG1, TMP0, [RA]
3155 | add CARG1, RC, TMP1
3156 | b <3
3157 break;
3158
3159 case BC_ISNEXT:
3160 | // RA = base, RC = target (points to ITERN)
3161 | add RA, BASE, RA, lsl #3
3162 | ldr CFUNC:CARG1, [RA, #-24]
3163 | add RC, PC, RC, lsl #2
3164 | ldp TAB:CARG3, CARG4, [RA, #-16]
3165 | sub RC, RC, #0x20000
3166 | checkfunc CFUNC:CARG1, >5
3167 | asr TMP0, TAB:CARG3, #47
3168 | ldrb TMP1w, CFUNC:CARG1->ffid
3169 | cmn TMP0, #-LJ_TTAB
3170 | ccmp CARG4, TISNIL, #0, eq
3171 | ccmp TMP1w, #FF_next_N, #0, eq
3172 | bne >5
3173 | mov TMP0w, #0xfffe7fff
3174 | lsl TMP0, TMP0, #32
3175 | str TMP0, [RA, #-8] // Initialize control var.
3176 |1:
3177 | mov PC, RC
3178 | ins_next
3179 |
3180 |5: // Despecialize bytecode if any of the checks fail.
3181 | mov TMP0, #BC_JMP
3182 | mov TMP1, #BC_ITERC
3183 | strb TMP0w, [PC, #-4]
3184 | strb TMP1w, [RC]
3185 | b <1
3186 break;
3187
3188 case BC_VARG:
3189 | decode_RB RB, INS
3190 | and RC, RC, #255
3191 | // RA = base, RB = (nresults+1), RC = numparams
3192 | ldr TMP1, [BASE, FRAME_PC]
3193 | add RC, BASE, RC, lsl #3
3194 | add RA, BASE, RA, lsl #3
3195 | add RC, RC, #FRAME_VARG
3196 | add TMP2, RA, RB, lsl #3
3197 | sub RC, RC, TMP1 // RC = vbase
3198 | // Note: RC may now be even _above_ BASE if nargs was < numparams.
3199 | sub TMP3, BASE, #16 // TMP3 = vtop
3200 | cbz RB, >5
3201 | sub TMP2, TMP2, #16
3202 |1: // Copy vararg slots to destination slots.
3203 | cmp RC, TMP3
3204 | ldr TMP0, [RC], #8
3205 | csel TMP0, TMP0, TISNIL, lo
3206 | cmp RA, TMP2
3207 | str TMP0, [RA], #8
3208 | blo <1
3209 |2:
3210 | ins_next
3211 |
3212 |5: // Copy all varargs.
3213 | ldr TMP0, L->maxstack
3214 | subs TMP2, TMP3, RC
3215 | csel RB, xzr, TMP2, le // MULTRES = (max(vtop-vbase,0)+1)*8
3216 | add RB, RB, #8
3217 | add TMP1, RA, TMP2
3218 | str RBw, SAVE_MULTRES
3219 | ble <2 // Nothing to copy.
3220 | cmp TMP1, TMP0
3221 | bhi >7
3222 |6:
3223 | ldr TMP0, [RC], #8
3224 | str TMP0, [RA], #8
3225 | cmp RC, TMP3
3226 | blo <6
3227 | b <2
3228 |
3229 |7: // Grow stack for varargs.
3230 | lsr CARG2, TMP2, #3
3231 | stp BASE, RA, L->base
3232 | mov CARG1, L
3233 | sub RC, RC, BASE // Need delta, because BASE may change.
3234 | str PC, SAVE_PC
3235 | bl extern lj_state_growstack // (lua_State *L, int n)
3236 | ldp BASE, RA, L->base
3237 | add RC, BASE, RC
3238 | sub TMP3, BASE, #16
3239 | b <6
3240 break;
3241
3242 /* -- Returns ----------------------------------------------------------- */
3243
3244 case BC_RETM:
3245 | // RA = results, RC = extra results
3246 | ldr TMP0w, SAVE_MULTRES
3247 | ldr PC, [BASE, FRAME_PC]
3248 | add RA, BASE, RA, lsl #3
3249 | add RC, TMP0, RC, lsl #3
3250 | b ->BC_RETM_Z
3251 break;
3252
3253 case BC_RET:
3254 | // RA = results, RC = nresults+1
3255 | ldr PC, [BASE, FRAME_PC]
3256 | lsl RC, RC, #3
3257 | add RA, BASE, RA, lsl #3
3258 |->BC_RETM_Z:
3259 | str RCw, SAVE_MULTRES
3260 |1:
3261 | ands CARG1, PC, #FRAME_TYPE
3262 | eor CARG2, PC, #FRAME_VARG
3263 | bne ->BC_RETV2_Z
3264 |
3265 |->BC_RET_Z:
3266 | // BASE = base, RA = resultptr, RC = (nresults+1)*8, PC = return
3267 | ldr INSw, [PC, #-4]
3268 | subs TMP1, RC, #8
3269 | sub CARG3, BASE, #16
3270 | beq >3
3271 |2:
3272 | ldr TMP0, [RA], #8
3273 | add BASE, BASE, #8
3274 | sub TMP1, TMP1, #8
3275 | str TMP0, [BASE, #-24]
3276 | cbnz TMP1, <2
3277 |3:
3278 | decode_RA RA, INS
3279 | sub CARG4, CARG3, RA, lsl #3
3280 | decode_RB RB, INS
3281 | ldr LFUNC:CARG1, [CARG4, FRAME_FUNC]
3282 |5:
3283 | cmp RC, RB, lsl #3 // More results expected?
3284 | blo >6
3285 | and LFUNC:CARG1, CARG1, #LJ_GCVMASK
3286 | mov BASE, CARG4
3287 | ldr CARG2, LFUNC:CARG1->pc
3288 | ldr KBASE, [CARG2, #PC2PROTO(k)]
3289 | ins_next
3290 |
3291 |6: // Fill up results with nil.
3292 | add BASE, BASE, #8
3293 | add RC, RC, #8
3294 | str TISNIL, [BASE, #-24]
3295 | b <5
3296 |
3297 |->BC_RETV1_Z: // Non-standard return case.
3298 | add RA, BASE, RA, lsl #3
3299 |->BC_RETV2_Z:
3300 | tst CARG2, #FRAME_TYPEP
3301 | bne ->vm_return
3302 | // Return from vararg function: relocate BASE down.
3303 | sub BASE, BASE, CARG2
3304 | ldr PC, [BASE, FRAME_PC]
3305 | b <1
3306 break;
3307
3308 case BC_RET0: case BC_RET1:
3309 | // RA = results, RC = nresults+1
3310 | ldr PC, [BASE, FRAME_PC]
3311 | lsl RC, RC, #3
3312 | str RCw, SAVE_MULTRES
3313 | ands CARG1, PC, #FRAME_TYPE
3314 | eor CARG2, PC, #FRAME_VARG
3315 | bne ->BC_RETV1_Z
3316 | ldr INSw, [PC, #-4]
3317 if (op == BC_RET1) {
3318 | ldr TMP0, [BASE, RA, lsl #3]
3319 }
3320 | sub CARG4, BASE, #16
3321 | decode_RA RA, INS
3322 | sub BASE, CARG4, RA, lsl #3
3323 if (op == BC_RET1) {
3324 | str TMP0, [CARG4], #8
3325 }
3326 | decode_RB RB, INS
3327 | ldr LFUNC:CARG1, [BASE, FRAME_FUNC]
3328 |5:
3329 | cmp RC, RB, lsl #3
3330 | blo >6
3331 | and LFUNC:CARG1, CARG1, #LJ_GCVMASK
3332 | ldr CARG2, LFUNC:CARG1->pc
3333 | ldr KBASE, [CARG2, #PC2PROTO(k)]
3334 | ins_next
3335 |
3336 |6: // Fill up results with nil.
3337 | add RC, RC, #8
3338 | str TISNIL, [CARG4], #8
3339 | b <5
3340 break;
3341
3342 /* -- Loops and branches ------------------------------------------------ */
3343
3344 |.define FOR_IDX, [RA]; .define FOR_TIDX, [RA, #4]
3345 |.define FOR_STOP, [RA, #8]; .define FOR_TSTOP, [RA, #12]
3346 |.define FOR_STEP, [RA, #16]; .define FOR_TSTEP, [RA, #20]
3347 |.define FOR_EXT, [RA, #24]; .define FOR_TEXT, [RA, #28]
3348
3349 case BC_FORL:
3350 |.if JIT
3351 | hotloop
3352 |.endif
3353 | // Fall through. Assumes BC_IFORL follows.
3354 break;
3355
3356 case BC_JFORI:
3357 case BC_JFORL:
3358#if !LJ_HASJIT
3359 break;
3360#endif
3361 case BC_FORI:
3362 case BC_IFORL:
3363 | // RA = base, RC = target (after end of loop or start of loop)
3364 vk = (op == BC_IFORL || op == BC_JFORL);
3365 | add RA, BASE, RA, lsl #3
3366 | ldp CARG1, CARG2, FOR_IDX // CARG1 = IDX, CARG2 = STOP
3367 | ldr CARG3, FOR_STEP // CARG3 = STEP
3368 if (op != BC_JFORL) {
3369 | add RC, PC, RC, lsl #2
3370 | sub RC, RC, #0x20000
3371 }
3372 | checkint CARG1, >5
3373 if (!vk) {
3374 | checkint CARG2, ->vmeta_for
3375 | checkint CARG3, ->vmeta_for
3376 | tbnz CARG3w, #31, >4
3377 | cmp CARG1w, CARG2w
3378 } else {
3379 | adds CARG1w, CARG1w, CARG3w
3380 | bvs >2
3381 | add TMP0, CARG1, TISNUM
3382 | tbnz CARG3w, #31, >4
3383 | cmp CARG1w, CARG2w
3384 }
3385 |1:
3386 if (op == BC_FORI) {
3387 | csel PC, RC, PC, gt
3388 } else if (op == BC_JFORI) {
3389 | ldrh RCw, [RC, #-2]
3390 } else if (op == BC_IFORL) {
3391 | csel PC, RC, PC, le
3392 }
3393 if (vk) {
3394 | str TMP0, FOR_IDX
3395 | str TMP0, FOR_EXT
3396 } else {
3397 | str CARG1, FOR_EXT
3398 }
3399 if (op == BC_JFORI || op == BC_JFORL) {
3400 | ble =>BC_JLOOP
3401 }
3402 |2:
3403 | ins_next
3404 |
3405 |4: // Invert check for negative step.
3406 | cmp CARG2w, CARG1w
3407 | b <1
3408 |
3409 |5: // FP loop.
3410 | ldp d0, d1, FOR_IDX
3411 | blo ->vmeta_for
3412 if (!vk) {
3413 | checknum CARG2, ->vmeta_for
3414 | checknum CARG3, ->vmeta_for
3415 | str d0, FOR_EXT
3416 } else {
3417 | ldr d2, FOR_STEP
3418 | fadd d0, d0, d2
3419 }
3420 | tbnz CARG3, #63, >7
3421 | fcmp d0, d1
3422 |6:
3423 if (vk) {
3424 | str d0, FOR_IDX
3425 | str d0, FOR_EXT
3426 }
3427 if (op == BC_FORI) {
3428 | csel PC, RC, PC, hi
3429 } else if (op == BC_JFORI) {
3430 | ldrh RCw, [RC, #-2]
3431 | bls =>BC_JLOOP
3432 } else if (op == BC_IFORL) {
3433 | csel PC, RC, PC, ls
3434 } else {
3435 | bls =>BC_JLOOP
3436 }
3437 | b <2
3438 |
3439 |7: // Invert check for negative step.
3440 | fcmp d1, d0
3441 | b <6
3442 break;
3443
3444 case BC_ITERL:
3445 |.if JIT
3446 | hotloop
3447 |.endif
3448 | // Fall through. Assumes BC_IITERL follows.
3449 break;
3450
3451 case BC_JITERL:
3452#if !LJ_HASJIT
3453 break;
3454#endif
3455 case BC_IITERL:
3456 | // RA = base, RC = target
3457 | ldr CARG1, [BASE, RA, lsl #3]
3458 | add TMP1, BASE, RA, lsl #3
3459 | cmp CARG1, TISNIL
3460 | beq >1 // Stop if iterator returned nil.
3461 if (op == BC_JITERL) {
3462 | str CARG1, [TMP1, #-8]
3463 | b =>BC_JLOOP
3464 } else {
3465 | add TMP0, PC, RC, lsl #2 // Otherwise save control var + branch.
3466 | sub PC, TMP0, #0x20000
3467 | str CARG1, [TMP1, #-8]
3468 }
3469 |1:
3470 | ins_next
3471 break;
3472
3473 case BC_LOOP:
3474 | // RA = base, RC = target (loop extent)
3475 | // Note: RA/RC is only used by trace recorder to determine scope/extent
3476 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
3477 |.if JIT
3478 | hotloop
3479 |.endif
3480 | // Fall through. Assumes BC_ILOOP follows.
3481 break;
3482
3483 case BC_ILOOP:
3484 | // RA = base, RC = target (loop extent)
3485 | ins_next
3486 break;
3487
3488 case BC_JLOOP:
3489 |.if JIT
3490 | NYI
3491 |.endif
3492 break;
3493
3494 case BC_JMP:
3495 | // RA = base (only used by trace recorder), RC = target
3496 | add RC, PC, RC, lsl #2
3497 | sub PC, RC, #0x20000
3498 | ins_next
3499 break;
3500
3501 /* -- Function headers -------------------------------------------------- */
3502
3503 case BC_FUNCF:
3504 |.if JIT
3505 | hotcall
3506 |.endif
3507 case BC_FUNCV: /* NYI: compiled vararg functions. */
3508 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow.
3509 break;
3510
3511 case BC_JFUNCF:
3512#if !LJ_HASJIT
3513 break;
3514#endif
3515 case BC_IFUNCF:
3516 | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8
3517 | ldr CARG1, L->maxstack
3518 | ldrb TMP1w, [PC, #-4+PC2PROTO(numparams)]
3519 | ldr KBASE, [PC, #-4+PC2PROTO(k)]
3520 | cmp RA, CARG1
3521 | bhi ->vm_growstack_l
3522 |2:
3523 | cmp NARGS8:RC, TMP1, lsl #3 // Check for missing parameters.
3524 | blo >3
3525 if (op == BC_JFUNCF) {
3526 | decode_RD RC, INS
3527 | b =>BC_JLOOP
3528 } else {
3529 | ins_next
3530 }
3531 |
3532 |3: // Clear missing parameters.
3533 | str TISNIL, [BASE, NARGS8:RC]
3534 | add NARGS8:RC, NARGS8:RC, #8
3535 | b <2
3536 break;
3537
3538 case BC_JFUNCV:
3539#if !LJ_HASJIT
3540 break;
3541#endif
3542 | NYI // NYI: compiled vararg functions
3543 break; /* NYI: compiled vararg functions. */
3544
3545 case BC_IFUNCV:
3546 | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8
3547 | ldr CARG1, L->maxstack
3548 | add TMP2, BASE, RC
3549 | add RA, RA, RC
3550 | add TMP0, RC, #16+FRAME_VARG
3551 | str LFUNC:CARG3, [TMP2], #8 // Store (untagged) copy of LFUNC.
3552 | ldr KBASE, [PC, #-4+PC2PROTO(k)]
3553 | cmp RA, CARG1
3554 | str TMP0, [TMP2], #8 // Store delta + FRAME_VARG.
3555 | bhs ->vm_growstack_l
3556 | sub RC, TMP2, #16
3557 | ldrb TMP1w, [PC, #-4+PC2PROTO(numparams)]
3558 | mov RA, BASE
3559 | mov BASE, TMP2
3560 | cbz TMP1, >2
3561 |1:
3562 | cmp RA, RC // Less args than parameters?
3563 | bhs >3
3564 | ldr TMP0, [RA]
3565 | sub TMP1, TMP1, #1
3566 | str TISNIL, [RA], #8 // Clear old fixarg slot (help the GC).
3567 | str TMP0, [TMP2], #8
3568 | cbnz TMP1, <1
3569 |2:
3570 | ins_next
3571 |
3572 |3:
3573 | sub TMP1, TMP1, #1
3574 | str TISNIL, [TMP2], #8
3575 | cbz TMP1, <2
3576 | b <3
3577 break;
3578
3579 case BC_FUNCC:
3580 case BC_FUNCCW:
3581 | // BASE = new base, RA = BASE+framesize*8, CARG3 = CFUNC, RC = nargs*8
3582 if (op == BC_FUNCC) {
3583 | ldr CARG4, CFUNC:CARG3->f
3584 } else {
3585 | ldr CARG4, GL->wrapf
3586 }
3587 | add CARG2, RA, NARGS8:RC
3588 | ldr CARG1, L->maxstack
3589 | add RC, BASE, NARGS8:RC
3590 | cmp CARG2, CARG1
3591 | stp BASE, RC, L->base
3592 if (op == BC_FUNCCW) {
3593 | ldr CARG2, CFUNC:CARG3->f
3594 }
3595 | mv_vmstate TMP0w, C
3596 | mov CARG1, L
3597 | bhi ->vm_growstack_c // Need to grow stack.
3598 | st_vmstate TMP0w
3599 | blr CARG4 // (lua_State *L [, lua_CFunction f])
3600 | // Returns nresults.
3601 | ldp BASE, TMP1, L->base
3602 | str L, GL->cur_L
3603 | sbfiz RC, CRET1, #3, #32
3604 | st_vmstate ST_INTERP
3605 | ldr PC, [BASE, FRAME_PC]
3606 | sub RA, TMP1, RC // RA = L->top - nresults*8
3607 | b ->vm_returnc
3608 break;
3609
3610 /* ---------------------------------------------------------------------- */
3611
3612 default:
3613 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
3614 exit(2);
3615 break;
3616 }
3617}
3618
3619static int build_backend(BuildCtx *ctx)
3620{
3621 int op;
3622
3623 dasm_growpc(Dst, BC__MAX);
3624
3625 build_subroutines(ctx);
3626
3627 |.code_op
3628 for (op = 0; op < BC__MAX; op++)
3629 build_ins(ctx, (BCOp)op, op);
3630
3631 return BC__MAX;
3632}
3633
3634/* Emit pseudo frame-info for all assembler functions. */
3635static void emit_asm_debug(BuildCtx *ctx)
3636{
3637 int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
3638 int i, cf = CFRAME_SIZE >> 3;
3639 switch (ctx->mode) {
3640 case BUILD_elfasm:
3641 fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n");
3642 fprintf(ctx->fp,
3643 ".Lframe0:\n"
3644 "\t.long .LECIE0-.LSCIE0\n"
3645 ".LSCIE0:\n"
3646 "\t.long 0xffffffff\n"
3647 "\t.byte 0x1\n"
3648 "\t.string \"\"\n"
3649 "\t.uleb128 0x1\n"
3650 "\t.sleb128 -8\n"
3651 "\t.byte 30\n" /* Return address is in lr. */
3652 "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */
3653 "\t.align 3\n"
3654 ".LECIE0:\n\n");
3655 fprintf(ctx->fp,
3656 ".LSFDE0:\n"
3657 "\t.long .LEFDE0-.LASFDE0\n"
3658 ".LASFDE0:\n"
3659 "\t.long .Lframe0\n"
3660 "\t.quad .Lbegin\n"
3661 "\t.quad %d\n"
3662 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
3663 "\t.byte 0x9d\n\t.uleb128 %d\n" /* offset fp */
3664 "\t.byte 0x9e\n\t.uleb128 %d\n", /* offset lr */
3665 fcofs, CFRAME_SIZE, cf, cf-1);
3666 for (i = 19; i <= 28; i++) /* offset x19-x28 */
3667 fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17);
3668 for (i = 8; i <= 15; i++) /* offset d8-d15 */
3669 fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
3670 64+i, cf-i-4);
3671 fprintf(ctx->fp,
3672 "\t.align 3\n"
3673 ".LEFDE0:\n\n");
3674#if LJ_HASFFI
3675 fprintf(ctx->fp,
3676 ".LSFDE1:\n"
3677 "\t.long .LEFDE1-.LASFDE1\n"
3678 ".LASFDE1:\n"
3679 "\t.long .Lframe0\n"
3680 "\t.quad lj_vm_ffi_call\n"
3681 "\t.quad %d\n"
3682 "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */
3683 "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */
3684 "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */
3685 "\t.byte 0x93\n\t.uleb128 2\n" /* offset x19 */
3686 "\t.align 3\n"
3687 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
3688#endif
3689 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",%%progbits\n");
3690 fprintf(ctx->fp,
3691 ".Lframe1:\n"
3692 "\t.long .LECIE1-.LSCIE1\n"
3693 ".LSCIE1:\n"
3694 "\t.long 0\n"
3695 "\t.byte 0x1\n"
3696 "\t.string \"zPR\"\n"
3697 "\t.uleb128 0x1\n"
3698 "\t.sleb128 -8\n"
3699 "\t.byte 30\n" /* Return address is in lr. */
3700 "\t.uleb128 6\n" /* augmentation length */
3701 "\t.byte 0x1b\n" /* pcrel|sdata4 */
3702 "\t.long lj_err_unwind_dwarf-.\n"
3703 "\t.byte 0x1b\n" /* pcrel|sdata4 */
3704 "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */
3705 "\t.align 3\n"
3706 ".LECIE1:\n\n");
3707 fprintf(ctx->fp,
3708 ".LSFDE2:\n"
3709 "\t.long .LEFDE2-.LASFDE2\n"
3710 ".LASFDE2:\n"
3711 "\t.long .LASFDE2-.Lframe1\n"
3712 "\t.long .Lbegin-.\n"
3713 "\t.long %d\n"
3714 "\t.uleb128 0\n" /* augmentation length */
3715 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
3716 "\t.byte 0x9d\n\t.uleb128 %d\n" /* offset fp */
3717 "\t.byte 0x9e\n\t.uleb128 %d\n", /* offset lr */
3718 fcofs, CFRAME_SIZE, cf, cf-1);
3719 for (i = 19; i <= 28; i++) /* offset x19-x28 */
3720 fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17);
3721 for (i = 8; i <= 15; i++) /* offset d8-d15 */
3722 fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
3723 64+i, cf-i-4);
3724 fprintf(ctx->fp,
3725 "\t.align 3\n"
3726 ".LEFDE2:\n\n");
3727#if LJ_HASFFI
3728 fprintf(ctx->fp,
3729 ".Lframe2:\n"
3730 "\t.long .LECIE2-.LSCIE2\n"
3731 ".LSCIE2:\n"
3732 "\t.long 0\n"
3733 "\t.byte 0x1\n"
3734 "\t.string \"zR\"\n"
3735 "\t.uleb128 0x1\n"
3736 "\t.sleb128 -8\n"
3737 "\t.byte 30\n" /* Return address is in lr. */
3738 "\t.uleb128 1\n" /* augmentation length */
3739 "\t.byte 0x1b\n" /* pcrel|sdata4 */
3740 "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */
3741 "\t.align 3\n"
3742 ".LECIE2:\n\n");
3743 fprintf(ctx->fp,
3744 ".LSFDE3:\n"
3745 "\t.long .LEFDE3-.LASFDE3\n"
3746 ".LASFDE3:\n"
3747 "\t.long .LASFDE3-.Lframe2\n"
3748 "\t.long lj_vm_ffi_call-.\n"
3749 "\t.long %d\n"
3750 "\t.uleb128 0\n" /* augmentation length */
3751 "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */
3752 "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */
3753 "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */
3754 "\t.byte 0x93\n\t.uleb128 2\n" /* offset x19 */
3755 "\t.align 3\n"
3756 ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
3757#endif
3758 break;
3759 default:
3760 break;
3761 }
3762}
3763
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
index ac8346bb..7cfdf4b1 100644
--- a/src/vm_mips.dasc
+++ b/src/vm_mips.dasc
@@ -138,6 +138,7 @@
138|.type NODE, Node 138|.type NODE, Node
139|.type NARGS8, int 139|.type NARGS8, int
140|.type TRACE, GCtrace 140|.type TRACE, GCtrace
141|.type SBUF, SBuf
141| 142|
142|//----------------------------------------------------------------------- 143|//-----------------------------------------------------------------------
143| 144|
@@ -486,12 +487,13 @@ static void build_subroutines(BuildCtx *ctx)
486 | addiu DISPATCH, DISPATCH, GG_G2DISP 487 | addiu DISPATCH, DISPATCH, GG_G2DISP
487 | sw r0, SAVE_NRES 488 | sw r0, SAVE_NRES
488 | sw r0, SAVE_ERRF 489 | sw r0, SAVE_ERRF
489 | sw TMP0, L->cframe 490 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok.
490 | sw r0, SAVE_CFRAME 491 | sw r0, SAVE_CFRAME
491 | beqz TMP1, >3 492 | beqz TMP1, >3
492 |. sw CARG1, SAVE_PC // Any value outside of bytecode is ok. 493 |. sw TMP0, L->cframe
493 | 494 |
494 | // Resume after yield (like a return). 495 | // Resume after yield (like a return).
496 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
495 | move RA, BASE 497 | move RA, BASE
496 | lw BASE, L->base 498 | lw BASE, L->base
497 | lw TMP1, L->top 499 | lw TMP1, L->top
@@ -525,17 +527,18 @@ static void build_subroutines(BuildCtx *ctx)
525 | 527 |
526 |1: // Entry point for vm_pcall above (PC = ftype). 528 |1: // Entry point for vm_pcall above (PC = ftype).
527 | lw TMP1, L:CARG1->cframe 529 | lw TMP1, L:CARG1->cframe
528 | sw CARG3, SAVE_NRES
529 | move L, CARG1 530 | move L, CARG1
530 | sw CARG1, SAVE_L 531 | sw CARG3, SAVE_NRES
531 | move BASE, CARG2
532 | sw sp, L->cframe // Add our C frame to cframe chain.
533 | lw DISPATCH, L->glref // Setup pointer to dispatch table. 532 | lw DISPATCH, L->glref // Setup pointer to dispatch table.
533 | sw CARG1, SAVE_L
534 | move BASE, CARG2
535 | addiu DISPATCH, DISPATCH, GG_G2DISP
534 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. 536 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok.
535 | sw TMP1, SAVE_CFRAME 537 | sw TMP1, SAVE_CFRAME
536 | addiu DISPATCH, DISPATCH, GG_G2DISP 538 | sw sp, L->cframe // Add our C frame to cframe chain.
537 | 539 |
538 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). 540 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
541 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
539 | lw TMP2, L->base // TMP2 = old base (used in vmeta_call). 542 | lw TMP2, L->base // TMP2 = old base (used in vmeta_call).
540 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 543 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
541 | lw TMP1, L->top 544 | lw TMP1, L->top
@@ -566,20 +569,21 @@ static void build_subroutines(BuildCtx *ctx)
566 | lw TMP0, L:CARG1->stack 569 | lw TMP0, L:CARG1->stack
567 | sw CARG1, SAVE_L 570 | sw CARG1, SAVE_L
568 | lw TMP1, L->top 571 | lw TMP1, L->top
572 | lw DISPATCH, L->glref // Setup pointer to dispatch table.
569 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. 573 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok.
570 | subu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). 574 | subu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
571 | lw TMP1, L->cframe 575 | lw TMP1, L->cframe
572 | sw sp, L->cframe // Add our C frame to cframe chain. 576 | addiu DISPATCH, DISPATCH, GG_G2DISP
573 | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. 577 | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame.
574 | sw r0, SAVE_ERRF // No error function. 578 | sw r0, SAVE_ERRF // No error function.
575 | move CFUNCADDR, CARG4 579 | sw TMP1, SAVE_CFRAME
580 | sw sp, L->cframe // Add our C frame to cframe chain.
581 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
576 | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud) 582 | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud)
577 |. sw TMP1, SAVE_CFRAME 583 |. move CFUNCADDR, CARG4
578 | move BASE, CRET1 584 | move BASE, CRET1
579 | lw DISPATCH, L->glref // Setup pointer to dispatch table.
580 | li PC, FRAME_CP
581 | bnez CRET1, <3 // Else continue with the call. 585 | bnez CRET1, <3 // Else continue with the call.
582 |. addiu DISPATCH, DISPATCH, GG_G2DISP 586 |. li PC, FRAME_CP
583 | b ->vm_leave_cp // No base? Just remove C frame. 587 | b ->vm_leave_cp // No base? Just remove C frame.
584 |. nop 588 |. nop
585 | 589 |
@@ -688,6 +692,16 @@ static void build_subroutines(BuildCtx *ctx)
688 | b ->vm_call_dispatch_f 692 | b ->vm_call_dispatch_f
689 |. li NARGS8:RC, 16 // 2 args for func(t, k). 693 |. li NARGS8:RC, 16 // 2 args for func(t, k).
690 | 694 |
695 |->vmeta_tgetr:
696 | load_got lj_tab_getinth
697 | call_intern lj_tab_getinth // (GCtab *t, int32_t key)
698 |. nop
699 | // Returns cTValue * or NULL.
700 | beqz CRET1, >1
701 |. nop
702 | b ->BC_TGETR_Z
703 |. ldc1 f0, 0(CRET1)
704 |
691 |//----------------------------------------------------------------------- 705 |//-----------------------------------------------------------------------
692 | 706 |
693 |->vmeta_tsets1: 707 |->vmeta_tsets1:
@@ -740,6 +754,16 @@ static void build_subroutines(BuildCtx *ctx)
740 | b ->vm_call_dispatch_f 754 | b ->vm_call_dispatch_f
741 |. li NARGS8:RC, 24 // 3 args for func(t, k, v) 755 |. li NARGS8:RC, 24 // 3 args for func(t, k, v)
742 | 756 |
757 |->vmeta_tsetr:
758 | load_got lj_tab_setinth
759 | sw BASE, L->base
760 | sw PC, SAVE_PC
761 | call_intern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
762 |. move CARG1, L
763 | // Returns TValue *.
764 | b ->BC_TSETR_Z
765 |. nop
766 |
743 |//-- Comparison metamethods --------------------------------------------- 767 |//-- Comparison metamethods ---------------------------------------------
744 | 768 |
745 |->vmeta_comp: 769 |->vmeta_comp:
@@ -813,6 +837,18 @@ static void build_subroutines(BuildCtx *ctx)
813 |. nop 837 |. nop
814 |.endif 838 |.endif
815 | 839 |
840 |->vmeta_istype:
841 | load_got lj_meta_istype
842 | addiu PC, PC, -4
843 | sw BASE, L->base
844 | srl CARG2, RA, 3
845 | srl CARG3, RD, 3
846 | sw PC, SAVE_PC
847 | call_intern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
848 |. move CARG1, L
849 | b ->cont_nop
850 |. nop
851 |
816 |//-- Arithmetic metamethods --------------------------------------------- 852 |//-- Arithmetic metamethods ---------------------------------------------
817 | 853 |
818 |->vmeta_unm: 854 |->vmeta_unm:
@@ -1119,9 +1155,9 @@ static void build_subroutines(BuildCtx *ctx)
1119 |. sw BASE, L->base // Add frame since C call can throw. 1155 |. sw BASE, L->base // Add frame since C call can throw.
1120 | ffgccheck 1156 | ffgccheck
1121 |. sw PC, SAVE_PC // Redundant (but a defined value). 1157 |. sw PC, SAVE_PC // Redundant (but a defined value).
1122 | load_got lj_str_fromnum 1158 | load_got lj_strfmt_num
1123 | move CARG1, L 1159 | move CARG1, L
1124 | call_intern lj_str_fromnum // (lua_State *L, lua_Number *np) 1160 | call_intern lj_strfmt_num // (lua_State *L, lua_Number *np)
1125 |. move CARG2, BASE 1161 |. move CARG2, BASE
1126 | // Returns GCstr *. 1162 | // Returns GCstr *.
1127 | li CARG3, LJ_TSTR 1163 | li CARG3, LJ_TSTR
@@ -1188,7 +1224,7 @@ static void build_subroutines(BuildCtx *ctx)
1188 | mtc1 TMP0, FARG1 1224 | mtc1 TMP0, FARG1
1189 | beqz AT, ->fff_fallback 1225 | beqz AT, ->fff_fallback
1190 |. lw PC, FRAME_PC(BASE) 1226 |. lw PC, FRAME_PC(BASE)
1191 | cvt.w.d FRET1, FARG2 1227 | trunc.w.d FRET1, FARG2
1192 | cvt.d.w FARG1, FARG1 1228 | cvt.d.w FARG1, FARG1
1193 | lw TMP0, TAB:CARG1->asize 1229 | lw TMP0, TAB:CARG1->asize
1194 | lw TMP1, TAB:CARG1->array 1230 | lw TMP1, TAB:CARG1->array
@@ -1331,6 +1367,7 @@ static void build_subroutines(BuildCtx *ctx)
1331 | lw TMP3, L:RA->top 1367 | lw TMP3, L:RA->top
1332 | li_vmstate INTERP 1368 | li_vmstate INTERP
1333 | lw BASE, L->base 1369 | lw BASE, L->base
1370 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
1334 | st_vmstate 1371 | st_vmstate
1335 | beqz AT, >8 1372 | beqz AT, >8
1336 |. subu RD, TMP3, TMP2 1373 |. subu RD, TMP3, TMP2
@@ -1521,14 +1558,8 @@ static void build_subroutines(BuildCtx *ctx)
1521 | b ->fff_resn 1558 | b ->fff_resn
1522 |. nop 1559 |. nop
1523 | 1560 |
1524 |->ff_math_deg:
1525 |.ffunc_n math_rad
1526 |. ldc1 FARG2, CFUNC:RB->upvalue[0]
1527 | b ->fff_resn
1528 |. mul.d FRET1, FARG1, FARG2
1529 |
1530 |.ffunc_nn math_ldexp 1561 |.ffunc_nn math_ldexp
1531 | cvt.w.d FARG2, FARG2 1562 | trunc.w.d FARG2, FARG2
1532 | load_got ldexp 1563 | load_got ldexp
1533 | mfc1 CARG3, FARG2 1564 | mfc1 CARG3, FARG2
1534 | call_extern 1565 | call_extern
@@ -1592,13 +1623,6 @@ static void build_subroutines(BuildCtx *ctx)
1592 | 1623 |
1593 |//-- String library ----------------------------------------------------- 1624 |//-- String library -----------------------------------------------------
1594 | 1625 |
1595 |.ffunc_1 string_len
1596 | li AT, LJ_TSTR
1597 | bne CARG3, AT, ->fff_fallback
1598 |. nop
1599 | b ->fff_resi
1600 |. lw CRET1, STR:CARG1->len
1601 |
1602 |.ffunc string_byte // Only handle the 1-arg case here. 1626 |.ffunc string_byte // Only handle the 1-arg case here.
1603 | lw CARG3, HI(BASE) 1627 | lw CARG3, HI(BASE)
1604 | lw STR:CARG1, LO(BASE) 1628 | lw STR:CARG1, LO(BASE)
@@ -1628,7 +1652,7 @@ static void build_subroutines(BuildCtx *ctx)
1628 |. sltiu AT, CARG3, LJ_TISNUM 1652 |. sltiu AT, CARG3, LJ_TISNUM
1629 | beqz AT, ->fff_fallback 1653 | beqz AT, ->fff_fallback
1630 |. li CARG3, 1 1654 |. li CARG3, 1
1631 | cvt.w.d FARG1, FARG1 1655 | trunc.w.d FARG1, FARG1
1632 | addiu CARG2, sp, ARG5_OFS 1656 | addiu CARG2, sp, ARG5_OFS
1633 | sltiu AT, TMP0, 256 1657 | sltiu AT, TMP0, 256
1634 | mfc1 TMP0, FARG1 1658 | mfc1 TMP0, FARG1
@@ -1642,6 +1666,7 @@ static void build_subroutines(BuildCtx *ctx)
1642 |. move CARG1, L 1666 |. move CARG1, L
1643 | // Returns GCstr *. 1667 | // Returns GCstr *.
1644 | lw BASE, L->base 1668 | lw BASE, L->base
1669 |->fff_resstr:
1645 | move CARG1, CRET1 1670 | move CARG1, CRET1
1646 | b ->fff_restv 1671 | b ->fff_restv
1647 |. li CARG3, LJ_TSTR 1672 |. li CARG3, LJ_TSTR
@@ -1658,7 +1683,7 @@ static void build_subroutines(BuildCtx *ctx)
1658 | ldc1 f2, 8(BASE) 1683 | ldc1 f2, 8(BASE)
1659 | beqz AT, >1 1684 | beqz AT, >1
1660 |. li CARG4, -1 1685 |. li CARG4, -1
1661 | cvt.w.d f0, f0 1686 | trunc.w.d f0, f0
1662 | sltiu AT, CARG3, LJ_TISNUM 1687 | sltiu AT, CARG3, LJ_TISNUM
1663 | beqz AT, ->fff_fallback 1688 | beqz AT, ->fff_fallback
1664 |. mfc1 CARG4, f0 1689 |. mfc1 CARG4, f0
@@ -1666,7 +1691,7 @@ static void build_subroutines(BuildCtx *ctx)
1666 | sltiu AT, CARG2, LJ_TISNUM 1691 | sltiu AT, CARG2, LJ_TISNUM
1667 | beqz AT, ->fff_fallback 1692 | beqz AT, ->fff_fallback
1668 |. li AT, LJ_TSTR 1693 |. li AT, LJ_TSTR
1669 | cvt.w.d f2, f2 1694 | trunc.w.d f2, f2
1670 | bne TMP0, AT, ->fff_fallback 1695 | bne TMP0, AT, ->fff_fallback
1671 |. lw CARG2, STR:CARG1->len 1696 |. lw CARG2, STR:CARG1->len
1672 | mfc1 CARG3, f2 1697 | mfc1 CARG3, f2
@@ -1695,108 +1720,32 @@ static void build_subroutines(BuildCtx *ctx)
1695 | b ->fff_restv 1720 | b ->fff_restv
1696 |. li CARG3, LJ_TSTR 1721 |. li CARG3, LJ_TSTR
1697 | 1722 |
1698 |.ffunc string_rep // Only handle the 1-char case inline. 1723 |.macro ffstring_op, name
1699 | ffgccheck 1724 | .ffunc string_ .. name
1700 | lw TMP0, HI(BASE)
1701 | addiu AT, NARGS8:RC, -16 // Exactly 2 arguments.
1702 | lw CARG4, 8+HI(BASE)
1703 | lw STR:CARG1, LO(BASE)
1704 | addiu TMP0, TMP0, -LJ_TSTR
1705 | ldc1 f0, 8(BASE)
1706 | or AT, AT, TMP0
1707 | bnez AT, ->fff_fallback
1708 |. sltiu AT, CARG4, LJ_TISNUM
1709 | cvt.w.d f0, f0
1710 | beqz AT, ->fff_fallback
1711 |. lw TMP0, STR:CARG1->len
1712 | mfc1 CARG3, f0
1713 | lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
1714 | li AT, 1
1715 | blez CARG3, ->fff_emptystr // Count <= 0?
1716 |. sltu AT, AT, TMP0
1717 | beqz TMP0, ->fff_emptystr // Zero length string?
1718 |. sltu TMP0, TMP1, CARG3
1719 | or AT, AT, TMP0
1720 | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
1721 | bnez AT, ->fff_fallback // Fallback for > 1-char strings.
1722 |. lbu TMP0, STR:CARG1[1]
1723 | addu TMP2, CARG2, CARG3
1724 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
1725 | addiu TMP2, TMP2, -1
1726 | sltu AT, CARG2, TMP2
1727 | bnez AT, <1
1728 |. sb TMP0, 0(TMP2)
1729 | b ->fff_newstr
1730 |. nop
1731 |
1732 |.ffunc string_reverse
1733 | ffgccheck
1734 | lw CARG3, HI(BASE)
1735 | lw STR:CARG1, LO(BASE)
1736 | beqz NARGS8:RC, ->fff_fallback
1737 |. li AT, LJ_TSTR
1738 | bne CARG3, AT, ->fff_fallback
1739 |. lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
1740 | lw CARG3, STR:CARG1->len
1741 | addiu CARG1, STR:CARG1, #STR
1742 | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
1743 | sltu AT, TMP1, CARG3
1744 | bnez AT, ->fff_fallback
1745 |. addu TMP3, CARG1, CARG3
1746 | addu CARG4, CARG2, CARG3
1747 |1: // Reverse string copy.
1748 | lbu TMP1, 0(CARG1)
1749 | sltu AT, CARG1, TMP3
1750 | beqz AT, ->fff_newstr
1751 |. addiu CARG1, CARG1, 1
1752 | addiu CARG4, CARG4, -1
1753 | b <1
1754 | sb TMP1, 0(CARG4)
1755 |
1756 |.macro ffstring_case, name, lo
1757 | .ffunc name
1758 | ffgccheck 1725 | ffgccheck
1759 | lw CARG3, HI(BASE) 1726 | lw CARG3, HI(BASE)
1760 | lw STR:CARG1, LO(BASE) 1727 | lw STR:CARG2, LO(BASE)
1761 | beqz NARGS8:RC, ->fff_fallback 1728 | beqz NARGS8:RC, ->fff_fallback
1762 |. li AT, LJ_TSTR 1729 |. li AT, LJ_TSTR
1763 | bne CARG3, AT, ->fff_fallback 1730 | bne CARG3, AT, ->fff_fallback
1764 |. lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) 1731 |. addiu SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf)
1765 | lw CARG3, STR:CARG1->len 1732 | load_got lj_buf_putstr_ .. name
1766 | addiu CARG1, STR:CARG1, #STR 1733 | lw TMP0, SBUF:CARG1->b
1767 | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) 1734 | sw L, SBUF:CARG1->L
1768 | sltu AT, TMP1, CARG3 1735 | sw BASE, L->base
1769 | bnez AT, ->fff_fallback 1736 | sw TMP0, SBUF:CARG1->p
1770 |. addu TMP3, CARG1, CARG3 1737 | call_intern extern lj_buf_putstr_ .. name
1771 | move CARG4, CARG2 1738 |. sw PC, SAVE_PC
1772 |1: // ASCII case conversion. 1739 | load_got lj_buf_tostr
1773 | lbu TMP1, 0(CARG1) 1740 | call_intern lj_buf_tostr
1774 | sltu AT, CARG1, TMP3 1741 |. move SBUF:CARG1, SBUF:CRET1
1775 | beqz AT, ->fff_newstr 1742 | b ->fff_resstr
1776 |. addiu TMP0, TMP1, -lo 1743 |. lw BASE, L->base
1777 | xori TMP2, TMP1, 0x20
1778 | sltiu AT, TMP0, 26
1779 | movn TMP1, TMP2, AT
1780 | addiu CARG1, CARG1, 1
1781 | sb TMP1, 0(CARG4)
1782 | b <1
1783 |. addiu CARG4, CARG4, 1
1784 |.endmacro 1744 |.endmacro
1785 | 1745 |
1786 |ffstring_case string_lower, 65 1746 |ffstring_op reverse
1787 |ffstring_case string_upper, 97 1747 |ffstring_op lower
1788 | 1748 |ffstring_op upper
1789 |//-- Table library ------------------------------------------------------
1790 |
1791 |.ffunc_1 table_getn
1792 | li AT, LJ_TTAB
1793 | bne CARG3, AT, ->fff_fallback
1794 |. load_got lj_tab_len
1795 | call_intern lj_tab_len // (GCtab *t)
1796 |. nop
1797 | // Returns uint32_t (but less than 2^31).
1798 | b ->fff_resi
1799 |. nop
1800 | 1749 |
1801 |//-- Bit library -------------------------------------------------------- 1750 |//-- Bit library --------------------------------------------------------
1802 | 1751 |
@@ -2062,6 +2011,76 @@ static void build_subroutines(BuildCtx *ctx)
2062 | jr CRET1 2011 | jr CRET1
2063 |. lw INS, -4(PC) 2012 |. lw INS, -4(PC)
2064 | 2013 |
2014 |->cont_stitch: // Trace stitching.
2015 |.if JIT
2016 | // RA = resultptr, RB = meta base
2017 | lw INS, -4(PC)
2018 | lw TMP3, -24+LO(RB) // Save previous trace number.
2019 | decode_RA8a RC, INS
2020 | addiu AT, MULTRES, -8
2021 | decode_RA8b RC
2022 | beqz AT, >2
2023 |. addu RC, BASE, RC // Call base.
2024 |1: // Move results down.
2025 | ldc1 f0, 0(RA)
2026 | addiu AT, AT, -8
2027 | addiu RA, RA, 8
2028 | sdc1 f0, 0(RC)
2029 | bnez AT, <1
2030 |. addiu RC, RC, 8
2031 |2:
2032 | decode_RA8a RA, INS
2033 | decode_RB8a RB, INS
2034 | decode_RA8b RA
2035 | decode_RB8b RB
2036 | addu RA, RA, RB
2037 | lw TMP1, DISPATCH_J(trace)(DISPATCH)
2038 | addu RA, BASE, RA
2039 |3:
2040 | sltu AT, RC, RA
2041 | bnez AT, >9 // More results wanted?
2042 |. sll TMP2, TMP3, 2
2043 |
2044 | addu TMP2, TMP1, TMP2
2045 | lw TRACE:TMP2, 0(TMP2)
2046 | beqz TRACE:TMP2, ->cont_nop
2047 |. nop
2048 | lhu RD, TRACE:TMP2->link
2049 | beq RD, TMP3, ->cont_nop // Blacklisted.
2050 |. load_got lj_dispatch_stitch
2051 | bnez RD, =>BC_JLOOP // Jump to stitched trace.
2052 |. sll RD, RD, 3
2053 |
2054 | // Stitch a new trace to the previous trace.
2055 | sw TMP3, DISPATCH_J(exitno)(DISPATCH)
2056 | sw L, DISPATCH_J(L)(DISPATCH)
2057 | sw BASE, L->base
2058 | addiu CARG1, DISPATCH, GG_DISP2J
2059 | call_intern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2060 |. move CARG2, PC
2061 | b ->cont_nop
2062 |. lw BASE, L->base
2063 |
2064 |9:
2065 | sw TISNIL, HI(RC)
2066 | b <3
2067 |. addiu RC, RC, 8
2068 |.endif
2069 |
2070 |->vm_profhook: // Dispatch target for profiler hook.
2071#if LJ_HASPROFILE
2072 | load_got lj_dispatch_profile
2073 | sw MULTRES, SAVE_MULTRES
2074 | move CARG2, PC
2075 | sw BASE, L->base
2076 | call_intern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2077 |. move CARG1, L
2078 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2079 | addiu PC, PC, -4
2080 | b ->cont_nop
2081 |. lw BASE, L->base
2082#endif
2083 |
2065 |//----------------------------------------------------------------------- 2084 |//-----------------------------------------------------------------------
2066 |//-- Trace exit handler ------------------------------------------------- 2085 |//-- Trace exit handler -------------------------------------------------
2067 |//----------------------------------------------------------------------- 2086 |//-----------------------------------------------------------------------
@@ -2100,14 +2119,15 @@ static void build_subroutines(BuildCtx *ctx)
2100 | lw TMP1, 0(TMP2) // Load exit number. 2119 | lw TMP1, 0(TMP2) // Load exit number.
2101 | st_vmstate 2120 | st_vmstate
2102 | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP. 2121 | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP.
2103 | lw L, DISPATCH_GL(jit_L)(DISPATCH) 2122 | lw L, DISPATCH_GL(cur_L)(DISPATCH)
2104 | lw BASE, DISPATCH_GL(jit_base)(DISPATCH) 2123 | lw BASE, DISPATCH_GL(jit_base)(DISPATCH)
2105 | load_got lj_trace_exit 2124 | load_got lj_trace_exit
2106 | sw L, DISPATCH_J(L)(DISPATCH) 2125 | sw L, DISPATCH_J(L)(DISPATCH)
2107 | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number. 2126 | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number.
2127 | sw BASE, L->base
2108 | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number. 2128 | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number.
2109 | addiu CARG1, DISPATCH, GG_DISP2J 2129 | addiu CARG1, DISPATCH, GG_DISP2J
2110 | sw BASE, L->base 2130 | sw r0, DISPATCH_GL(jit_base)(DISPATCH)
2111 | call_intern lj_trace_exit // (jit_State *J, ExitState *ex) 2131 | call_intern lj_trace_exit // (jit_State *J, ExitState *ex)
2112 |. addiu CARG2, sp, 16 2132 |. addiu CARG2, sp, 16
2113 | // Returns MULTRES (unscaled) or negated error code. 2133 | // Returns MULTRES (unscaled) or negated error code.
@@ -2123,17 +2143,18 @@ static void build_subroutines(BuildCtx *ctx)
2123 |.if JIT 2143 |.if JIT
2124 | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set. 2144 | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set.
2125 | lw L, SAVE_L 2145 | lw L, SAVE_L
2126 | addiu DISPATCH, JGL, -GG_DISP2G-32768 2146 | addiu DISPATCH, JGL, -GG_DISP2G-32768
2147 | sw BASE, L->base
2127 |1: 2148 |1:
2128 | bltz CRET1, >3 // Check for error from exit. 2149 | bltz CRET1, >9 // Check for error from exit.
2129 |. lw LFUNC:TMP1, FRAME_FUNC(BASE) 2150 |. lw LFUNC:RB, FRAME_FUNC(BASE)
2130 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 2151 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2131 | sll MULTRES, CRET1, 3 2152 | sll MULTRES, CRET1, 3
2132 | li TISNIL, LJ_TNIL 2153 | li TISNIL, LJ_TNIL
2133 | sw MULTRES, SAVE_MULTRES 2154 | sw MULTRES, SAVE_MULTRES
2134 | mtc1 TMP3, TOBIT 2155 | mtc1 TMP3, TOBIT
2135 | lw TMP1, LFUNC:TMP1->pc 2156 | lw TMP1, LFUNC:RB->pc
2136 | sw r0, DISPATCH_GL(jit_L)(DISPATCH) 2157 | sw r0, DISPATCH_GL(jit_base)(DISPATCH)
2137 | lw KBASE, PC2PROTO(k)(TMP1) 2158 | lw KBASE, PC2PROTO(k)(TMP1)
2138 | cvt.d.s TOBIT, TOBIT 2159 | cvt.d.s TOBIT, TOBIT
2139 | // Modified copy of ins_next which handles function header dispatch, too. 2160 | // Modified copy of ins_next which handles function header dispatch, too.
@@ -2153,11 +2174,27 @@ static void build_subroutines(BuildCtx *ctx)
2153 | jr AT 2174 | jr AT
2154 |. decode_RD8b RD 2175 |. decode_RD8b RD
2155 |2: 2176 |2:
2177 | sltiu TMP2, TMP1, (BC_FUNCC+2)*4 // Fast function?
2178 | bnez TMP2, >3
2179 |. lw TMP1, FRAME_PC(BASE)
2180 | // Check frame below fast function.
2181 | andi TMP0, TMP1, FRAME_TYPE
2182 | bnez TMP0, >3 // Trace stitching continuation?
2183 |. nop
2184 | // Otherwise set KBASE for Lua function below fast function.
2185 | lw TMP2, -4(TMP1)
2186 | decode_RA8a TMP0, TMP2
2187 | decode_RA8b TMP0
2188 | subu TMP1, BASE, TMP0
2189 | lw LFUNC:TMP2, -8+FRAME_FUNC(TMP1)
2190 | lw TMP1, LFUNC:TMP2->pc
2191 | lw KBASE, PC2PROTO(k)(TMP1)
2192 |3:
2156 | addiu RC, MULTRES, -8 2193 | addiu RC, MULTRES, -8
2157 | jr AT 2194 | jr AT
2158 |. addu RA, RA, BASE 2195 |. addu RA, RA, BASE
2159 | 2196 |
2160 |3: // Rethrow error from the right C frame. 2197 |9: // Rethrow error from the right C frame.
2161 | load_got lj_err_throw 2198 | load_got lj_err_throw
2162 | negu CARG2, CRET1 2199 | negu CARG2, CRET1
2163 | call_intern lj_err_throw // (lua_State *L, int errcode) 2200 | call_intern lj_err_throw // (lua_State *L, int errcode)
@@ -2572,6 +2609,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2572 | ins_next 2609 | ins_next
2573 break; 2610 break;
2574 2611
2612 case BC_ISTYPE:
2613 | // RA = src*8, RD = -type*8
2614 | addu TMP2, BASE, RA
2615 | srl TMP1, RD, 3
2616 | lw TMP0, HI(TMP2)
2617 | ins_next1
2618 | addu AT, TMP0, TMP1
2619 | bnez AT, ->vmeta_istype
2620 |. ins_next2
2621 break;
2622 case BC_ISNUM:
2623 | // RA = src*8, RD = -(TISNUM-1)*8
2624 | addu TMP2, BASE, RA
2625 | lw TMP0, HI(TMP2)
2626 | ins_next1
2627 | sltiu AT, TMP0, LJ_TISNUM
2628 | beqz AT, ->vmeta_istype
2629 |. ins_next2
2630 break;
2631
2575 /* -- Unary ops --------------------------------------------------------- */ 2632 /* -- Unary ops --------------------------------------------------------- */
2576 2633
2577 case BC_MOV: 2634 case BC_MOV:
@@ -3210,6 +3267,30 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3210 | b ->vmeta_tgetb // Caveat: preserve TMP0! 3267 | b ->vmeta_tgetb // Caveat: preserve TMP0!
3211 |. nop 3268 |. nop
3212 break; 3269 break;
3270 case BC_TGETR:
3271 | // RA = dst*8, RB = table*8, RC = key*8
3272 | decode_RB8a RB, INS
3273 | decode_RB8b RB
3274 | decode_RDtoRC8 RC, RD
3275 | addu CARG2, BASE, RB
3276 | addu CARG3, BASE, RC
3277 | lw TAB:CARG1, LO(CARG2)
3278 | ldc1 f0, 0(CARG3)
3279 | trunc.w.d f2, f0
3280 | lw TMP0, TAB:CARG1->asize
3281 | mfc1 CARG2, f2
3282 | lw TMP1, TAB:CARG1->array
3283 | sltu AT, CARG2, TMP0
3284 | sll TMP2, CARG2, 3
3285 | beqz AT, ->vmeta_tgetr // In array part?
3286 |. addu TMP2, TMP1, TMP2
3287 | ldc1 f0, 0(TMP2)
3288 |->BC_TGETR_Z:
3289 | addu RA, BASE, RA
3290 | ins_next1
3291 | sdc1 f0, 0(RA)
3292 | ins_next2
3293 break;
3213 3294
3214 case BC_TSETV: 3295 case BC_TSETV:
3215 | // RA = src*8, RB = table*8, RC = key*8 3296 | // RA = src*8, RB = table*8, RC = key*8
@@ -3398,6 +3479,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3398 |7: // Possible table write barrier for the value. Skip valiswhite check. 3479 |7: // Possible table write barrier for the value. Skip valiswhite check.
3399 | barrierback TAB:RB, TMP3, TMP0, <2 3480 | barrierback TAB:RB, TMP3, TMP0, <2
3400 break; 3481 break;
3482 case BC_TSETR:
3483 | // RA = dst*8, RB = table*8, RC = key*8
3484 | decode_RB8a RB, INS
3485 | decode_RB8b RB
3486 | decode_RDtoRC8 RC, RD
3487 | addu CARG1, BASE, RB
3488 | addu CARG3, BASE, RC
3489 | lw TAB:CARG2, LO(CARG1)
3490 | ldc1 f0, 0(CARG3)
3491 | trunc.w.d f2, f0
3492 | lbu TMP3, TAB:CARG2->marked
3493 | lw TMP0, TAB:CARG2->asize
3494 | mfc1 CARG3, f2
3495 | lw TMP1, TAB:CARG2->array
3496 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
3497 | bnez AT, >7
3498 |. addu RA, BASE, RA
3499 |2:
3500 | sltu AT, CARG3, TMP0
3501 | sll TMP2, CARG3, 3
3502 | beqz AT, ->vmeta_tsetr // In array part?
3503 |. ldc1 f20, 0(RA)
3504 | addu CRET1, TMP1, TMP2
3505 |->BC_TSETR_Z:
3506 | ins_next1
3507 | sdc1 f20, 0(CRET1)
3508 | ins_next2
3509 |
3510 |7: // Possible table write barrier for the value. Skip valiswhite check.
3511 | barrierback TAB:RB, TMP3, TMP0, <2
3512 break;
3513
3401 3514
3402 case BC_TSETM: 3515 case BC_TSETM:
3403 | // RA = base*8 (table at base-1), RD = num_const*8 (start index) 3516 | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
@@ -3957,8 +4070,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3957 | sw AT, DISPATCH_GL(vmstate)(DISPATCH) 4070 | sw AT, DISPATCH_GL(vmstate)(DISPATCH)
3958 | lw TRACE:TMP2, 0(TMP1) 4071 | lw TRACE:TMP2, 0(TMP1)
3959 | sw BASE, DISPATCH_GL(jit_base)(DISPATCH) 4072 | sw BASE, DISPATCH_GL(jit_base)(DISPATCH)
3960 | sw L, DISPATCH_GL(jit_L)(DISPATCH)
3961 | lw TMP2, TRACE:TMP2->mcode 4073 | lw TMP2, TRACE:TMP2->mcode
4074 | sw L, DISPATCH_GL(tmpbuf.L)(DISPATCH)
3962 | jr TMP2 4075 | jr TMP2
3963 |. addiu JGL, DISPATCH, GG_DISP2G+32768 4076 |. addiu JGL, DISPATCH, GG_DISP2G+32768
3964 |.endif 4077 |.endif
@@ -4084,6 +4197,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4084 | li_vmstate INTERP 4197 | li_vmstate INTERP
4085 | lw PC, FRAME_PC(BASE) // Fetch PC of caller. 4198 | lw PC, FRAME_PC(BASE) // Fetch PC of caller.
4086 | subu RA, TMP1, RD // RA = L->top - nresults*8 4199 | subu RA, TMP1, RD // RA = L->top - nresults*8
4200 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
4087 | b ->vm_returnc 4201 | b ->vm_returnc
4088 |. st_vmstate 4202 |. st_vmstate
4089 break; 4203 break;
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
index ad8a023e..2a7a7455 100644
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -1,4 +1,4 @@
1|// Low-level VM code for PowerPC CPUs. 1|// Low-level VM code for PowerPC 32 bit or 32on64 bit mode.
2|// Bytecode interpreter, fast functions and helper functions. 2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h 3|// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
4| 4|
@@ -18,7 +18,7 @@
18|// DynASM defines used by the PPC port: 18|// DynASM defines used by the PPC port:
19|// 19|//
20|// P64 64 bit pointers (only for GPR64 testing). 20|// P64 64 bit pointers (only for GPR64 testing).
21|// Note: a full PPC64 _LP64 port is not planned. 21|// Note: see vm_ppc64.dasc for a full PPC64 _LP64 port.
22|// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3). 22|// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3).
23|// Affects reg saves, stack layout, carry/overflow/dot flags etc. 23|// Affects reg saves, stack layout, carry/overflow/dot flags etc.
24|// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360). 24|// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360).
@@ -316,6 +316,7 @@
316|.type NODE, Node 316|.type NODE, Node
317|.type NARGS8, int 317|.type NARGS8, int
318|.type TRACE, GCtrace 318|.type TRACE, GCtrace
319|.type SBUF, SBuf
319| 320|
320|//----------------------------------------------------------------------- 321|//-----------------------------------------------------------------------
321| 322|
@@ -684,12 +685,13 @@ static void build_subroutines(BuildCtx *ctx)
684 | stw CARG3, SAVE_NRES 685 | stw CARG3, SAVE_NRES
685 | cmplwi TMP1, 0 686 | cmplwi TMP1, 0
686 | stw CARG3, SAVE_ERRF 687 | stw CARG3, SAVE_ERRF
687 | stp TMP0, L->cframe
688 | stp CARG3, SAVE_CFRAME 688 | stp CARG3, SAVE_CFRAME
689 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 689 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
690 | stp TMP0, L->cframe
690 | beq >3 691 | beq >3
691 | 692 |
692 | // Resume after yield (like a return). 693 | // Resume after yield (like a return).
694 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
693 | mr RA, BASE 695 | mr RA, BASE
694 | lp BASE, L->base 696 | lp BASE, L->base
695 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 697 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
@@ -729,17 +731,18 @@ static void build_subroutines(BuildCtx *ctx)
729 | 731 |
730 |1: // Entry point for vm_pcall above (PC = ftype). 732 |1: // Entry point for vm_pcall above (PC = ftype).
731 | lp TMP1, L:CARG1->cframe 733 | lp TMP1, L:CARG1->cframe
732 | stw CARG3, SAVE_NRES
733 | mr L, CARG1 734 | mr L, CARG1
734 | stw CARG1, SAVE_L 735 | stw CARG3, SAVE_NRES
735 | mr BASE, CARG2
736 | stp sp, L->cframe // Add our C frame to cframe chain.
737 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 736 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
737 | stw CARG1, SAVE_L
738 | mr BASE, CARG2
739 | addi DISPATCH, DISPATCH, GG_G2DISP
738 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 740 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
739 | stp TMP1, SAVE_CFRAME 741 | stp TMP1, SAVE_CFRAME
740 | addi DISPATCH, DISPATCH, GG_G2DISP 742 | stp sp, L->cframe // Add our C frame to cframe chain.
741 | 743 |
742 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). 744 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
745 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
743 | lp TMP2, L->base // TMP2 = old base (used in vmeta_call). 746 | lp TMP2, L->base // TMP2 = old base (used in vmeta_call).
744 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 747 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
745 | lp TMP1, L->top 748 | lp TMP1, L->top
@@ -776,15 +779,18 @@ static void build_subroutines(BuildCtx *ctx)
776 | lwz TMP0, L:CARG1->stack 779 | lwz TMP0, L:CARG1->stack
777 | stw CARG1, SAVE_L 780 | stw CARG1, SAVE_L
778 | lp TMP1, L->top 781 | lp TMP1, L->top
782 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
779 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 783 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
780 | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). 784 | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
781 | lp TMP1, L->cframe 785 | lp TMP1, L->cframe
782 | stp sp, L->cframe // Add our C frame to cframe chain. 786 | addi DISPATCH, DISPATCH, GG_G2DISP
783 | .toc lp CARG4, 0(CARG4) 787 | .toc lp CARG4, 0(CARG4)
784 | li TMP2, 0 788 | li TMP2, 0
785 | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. 789 | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame.
786 | stw TMP2, SAVE_ERRF // No error function. 790 | stw TMP2, SAVE_ERRF // No error function.
787 | stp TMP1, SAVE_CFRAME 791 | stp TMP1, SAVE_CFRAME
792 | stp sp, L->cframe // Add our C frame to cframe chain.
793 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
788 | mtctr CARG4 794 | mtctr CARG4
789 | bctrl // (lua_State *L, lua_CFunction func, void *ud) 795 | bctrl // (lua_State *L, lua_CFunction func, void *ud)
790 |.if PPE 796 |.if PPE
@@ -793,9 +799,7 @@ static void build_subroutines(BuildCtx *ctx)
793 |.else 799 |.else
794 | mr. BASE, CRET1 800 | mr. BASE, CRET1
795 |.endif 801 |.endif
796 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 802 | li PC, FRAME_CP
797 | li PC, FRAME_CP
798 | addi DISPATCH, DISPATCH, GG_G2DISP
799 | bne <3 // Else continue with the call. 803 | bne <3 // Else continue with the call.
800 | b ->vm_leave_cp // No base? Just remove C frame. 804 | b ->vm_leave_cp // No base? Just remove C frame.
801 | 805 |
@@ -918,6 +922,17 @@ static void build_subroutines(BuildCtx *ctx)
918 | li NARGS8:RC, 16 // 2 args for func(t, k). 922 | li NARGS8:RC, 16 // 2 args for func(t, k).
919 | b ->vm_call_dispatch_f 923 | b ->vm_call_dispatch_f
920 | 924 |
925 |->vmeta_tgetr:
926 | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
927 | // Returns cTValue * or NULL.
928 | cmplwi CRET1, 0
929 | beq >1
930 | lfd f14, 0(CRET1)
931 | b ->BC_TGETR_Z
932 |1:
933 | stwx TISNIL, BASE, RA
934 | b ->cont_nop
935 |
921 |//----------------------------------------------------------------------- 936 |//-----------------------------------------------------------------------
922 | 937 |
923 |->vmeta_tsets1: 938 |->vmeta_tsets1:
@@ -985,6 +1000,14 @@ static void build_subroutines(BuildCtx *ctx)
985 | stfd f0, 16(BASE) // Copy value to third argument. 1000 | stfd f0, 16(BASE) // Copy value to third argument.
986 | b ->vm_call_dispatch_f 1001 | b ->vm_call_dispatch_f
987 | 1002 |
1003 |->vmeta_tsetr:
1004 | stp BASE, L->base
1005 | stw PC, SAVE_PC
1006 | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
1007 | // Returns TValue *.
1008 | stfd f14, 0(CRET1)
1009 | b ->cont_nop
1010 |
988 |//-- Comparison metamethods --------------------------------------------- 1011 |//-- Comparison metamethods ---------------------------------------------
989 | 1012 |
990 |->vmeta_comp: 1013 |->vmeta_comp:
@@ -1063,6 +1086,16 @@ static void build_subroutines(BuildCtx *ctx)
1063 | b <3 1086 | b <3
1064 |.endif 1087 |.endif
1065 | 1088 |
1089 |->vmeta_istype:
1090 | subi PC, PC, 4
1091 | stp BASE, L->base
1092 | srwi CARG2, RA, 3
1093 | mr CARG1, L
1094 | srwi CARG3, RD, 3
1095 | stw PC, SAVE_PC
1096 | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
1097 | b ->cont_nop
1098 |
1066 |//-- Arithmetic metamethods --------------------------------------------- 1099 |//-- Arithmetic metamethods ---------------------------------------------
1067 | 1100 |
1068 |->vmeta_arith_nv: 1101 |->vmeta_arith_nv:
@@ -1387,9 +1420,9 @@ static void build_subroutines(BuildCtx *ctx)
1387 | mr CARG1, L 1420 | mr CARG1, L
1388 | mr CARG2, BASE 1421 | mr CARG2, BASE
1389 |.if DUALNUM 1422 |.if DUALNUM
1390 | bl extern lj_str_fromnumber // (lua_State *L, cTValue *o) 1423 | bl extern lj_strfmt_number // (lua_State *L, cTValue *o)
1391 |.else 1424 |.else
1392 | bl extern lj_str_fromnum // (lua_State *L, lua_Number *np) 1425 | bl extern lj_strfmt_num // (lua_State *L, lua_Number *np)
1393 |.endif 1426 |.endif
1394 | // Returns GCstr *. 1427 | // Returns GCstr *.
1395 | li CARG3, LJ_TSTR 1428 | li CARG3, LJ_TSTR
@@ -1622,6 +1655,7 @@ static void build_subroutines(BuildCtx *ctx)
1622 | lp TMP3, L:SAVE0->top 1655 | lp TMP3, L:SAVE0->top
1623 | li_vmstate INTERP 1656 | li_vmstate INTERP
1624 | lp BASE, L->base 1657 | lp BASE, L->base
1658 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
1625 | st_vmstate 1659 | st_vmstate
1626 | bgt >8 1660 | bgt >8
1627 | sub RD, TMP3, TMP2 1661 | sub RD, TMP3, TMP2
@@ -1893,12 +1927,6 @@ static void build_subroutines(BuildCtx *ctx)
1893 | math_extern2 atan2 1927 | math_extern2 atan2
1894 | math_extern2 fmod 1928 | math_extern2 fmod
1895 | 1929 |
1896 |->ff_math_deg:
1897 |.ffunc_n math_rad
1898 | lfd FARG2, CFUNC:RB->upvalue[0]
1899 | fmul FARG1, FARG1, FARG2
1900 | b ->fff_resn
1901 |
1902 |.if DUALNUM 1930 |.if DUALNUM
1903 |.ffunc math_ldexp 1931 |.ffunc math_ldexp
1904 | cmplwi NARGS8:RC, 16 1932 | cmplwi NARGS8:RC, 16
@@ -2044,11 +2072,6 @@ static void build_subroutines(BuildCtx *ctx)
2044 | 2072 |
2045 |//-- String library ----------------------------------------------------- 2073 |//-- String library -----------------------------------------------------
2046 | 2074 |
2047 |.ffunc_1 string_len
2048 | checkstr CARG3; bne ->fff_fallback
2049 | lwz CRET1, STR:CARG1->len
2050 | b ->fff_resi
2051 |
2052 |.ffunc string_byte // Only handle the 1-arg case here. 2075 |.ffunc string_byte // Only handle the 1-arg case here.
2053 | cmplwi NARGS8:RC, 8 2076 | cmplwi NARGS8:RC, 8
2054 | lwz CARG3, 0(BASE) 2077 | lwz CARG3, 0(BASE)
@@ -2103,6 +2126,7 @@ static void build_subroutines(BuildCtx *ctx)
2103 | stp BASE, L->base 2126 | stp BASE, L->base
2104 | stw PC, SAVE_PC 2127 | stw PC, SAVE_PC
2105 | bl extern lj_str_new // (lua_State *L, char *str, size_t l) 2128 | bl extern lj_str_new // (lua_State *L, char *str, size_t l)
2129 |->fff_resstr:
2106 | // Returns GCstr *. 2130 | // Returns GCstr *.
2107 | lp BASE, L->base 2131 | lp BASE, L->base
2108 | li CARG3, LJ_TSTR 2132 | li CARG3, LJ_TSTR
@@ -2180,114 +2204,29 @@ static void build_subroutines(BuildCtx *ctx)
2180 | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0) 2204 | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0)
2181 | b <3 2205 | b <3
2182 | 2206 |
2183 |.ffunc string_rep // Only handle the 1-char case inline. 2207 |.macro ffstring_op, name
2184 | ffgccheck 2208 | .ffunc string_ .. name
2185 | cmplwi NARGS8:RC, 16
2186 | lwz TMP0, 0(BASE)
2187 | lwz STR:CARG1, 4(BASE)
2188 | lwz CARG4, 8(BASE)
2189 |.if DUALNUM
2190 | lwz CARG3, 12(BASE)
2191 |.else
2192 | lfd FARG2, 8(BASE)
2193 |.endif
2194 | bne ->fff_fallback // Exactly 2 arguments.
2195 | checkstr TMP0; bne ->fff_fallback
2196 |.if DUALNUM
2197 | checknum CARG4; bne ->fff_fallback
2198 |.else
2199 | checknum CARG4; bge ->fff_fallback
2200 | toint CARG3, FARG2
2201 |.endif
2202 | lwz TMP0, STR:CARG1->len
2203 | cmpwi CARG3, 0
2204 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
2205 | ble >2 // Count <= 0? (or non-int)
2206 | cmplwi TMP0, 1
2207 | subi TMP2, CARG3, 1
2208 | blt >2 // Zero length string?
2209 | cmplw cr1, TMP1, CARG3
2210 | bne ->fff_fallback // Fallback for > 1-char strings.
2211 | lbz TMP0, STR:CARG1[1]
2212 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
2213 | blt cr1, ->fff_fallback
2214 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
2215 | cmplwi TMP2, 0
2216 | stbx TMP0, CARG2, TMP2
2217 | subi TMP2, TMP2, 1
2218 | bne <1
2219 | b ->fff_newstr
2220 |2: // Return empty string.
2221 | la STR:CARG1, DISPATCH_GL(strempty)(DISPATCH)
2222 | li CARG3, LJ_TSTR
2223 | b ->fff_restv
2224 |
2225 |.ffunc string_reverse
2226 | ffgccheck 2209 | ffgccheck
2227 | cmplwi NARGS8:RC, 8 2210 | cmplwi NARGS8:RC, 8
2228 | lwz CARG3, 0(BASE) 2211 | lwz CARG3, 0(BASE)
2229 | lwz STR:CARG1, 4(BASE) 2212 | lwz STR:CARG2, 4(BASE)
2230 | blt ->fff_fallback 2213 | blt ->fff_fallback
2231 | checkstr CARG3 2214 | checkstr CARG3
2232 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) 2215 | la SBUF:CARG1, DISPATCH_GL(tmpbuf)(DISPATCH)
2233 | bne ->fff_fallback 2216 | bne ->fff_fallback
2234 | lwz CARG3, STR:CARG1->len 2217 | lwz TMP0, SBUF:CARG1->b
2235 | la CARG1, #STR(STR:CARG1) 2218 | stw L, SBUF:CARG1->L
2236 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) 2219 | stp BASE, L->base
2237 | li TMP2, 0 2220 | stw PC, SAVE_PC
2238 | cmplw TMP1, CARG3 2221 | stw TMP0, SBUF:CARG1->p
2239 | subi TMP3, CARG3, 1 2222 | bl extern lj_buf_putstr_ .. name
2240 | blt ->fff_fallback 2223 | bl extern lj_buf_tostr
2241 |1: // Reverse string copy. 2224 | b ->fff_resstr
2242 | cmpwi TMP3, 0
2243 | lbzx TMP1, CARG1, TMP2
2244 | blty ->fff_newstr
2245 | stbx TMP1, CARG2, TMP3
2246 | subi TMP3, TMP3, 1
2247 | addi TMP2, TMP2, 1
2248 | b <1
2249 |
2250 |.macro ffstring_case, name, lo
2251 | .ffunc name
2252 | ffgccheck
2253 | cmplwi NARGS8:RC, 8
2254 | lwz CARG3, 0(BASE)
2255 | lwz STR:CARG1, 4(BASE)
2256 | blt ->fff_fallback
2257 | checkstr CARG3
2258 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
2259 | bne ->fff_fallback
2260 | lwz CARG3, STR:CARG1->len
2261 | la CARG1, #STR(STR:CARG1)
2262 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
2263 | cmplw TMP1, CARG3
2264 | li TMP2, 0
2265 | blt ->fff_fallback
2266 |1: // ASCII case conversion.
2267 | cmplw TMP2, CARG3
2268 | lbzx TMP1, CARG1, TMP2
2269 | bgey ->fff_newstr
2270 | subi TMP0, TMP1, lo
2271 | xori TMP3, TMP1, 0x20
2272 | addic TMP0, TMP0, -26
2273 | subfe TMP3, TMP3, TMP3
2274 | rlwinm TMP3, TMP3, 0, 26, 26 // x &= 0x20.
2275 | xor TMP1, TMP1, TMP3
2276 | stbx TMP1, CARG2, TMP2
2277 | addi TMP2, TMP2, 1
2278 | b <1
2279 |.endmacro 2225 |.endmacro
2280 | 2226 |
2281 |ffstring_case string_lower, 65 2227 |ffstring_op reverse
2282 |ffstring_case string_upper, 97 2228 |ffstring_op lower
2283 | 2229 |ffstring_op upper
2284 |//-- Table library ------------------------------------------------------
2285 |
2286 |.ffunc_1 table_getn
2287 | checktab CARG3; bne ->fff_fallback
2288 | bl extern lj_tab_len // (GCtab *t)
2289 | // Returns uint32_t (but less than 2^31).
2290 | b ->fff_resi
2291 | 2230 |
2292 |//-- Bit library -------------------------------------------------------- 2231 |//-- Bit library --------------------------------------------------------
2293 | 2232 |
@@ -2589,6 +2528,70 @@ static void build_subroutines(BuildCtx *ctx)
2589 | mtctr CRET1 2528 | mtctr CRET1
2590 | bctr 2529 | bctr
2591 | 2530 |
2531 |->cont_stitch: // Trace stitching.
2532 |.if JIT
2533 | // RA = resultptr, RB = meta base
2534 | lwz INS, -4(PC)
2535 | lwz TMP3, -20(RB) // Save previous trace number.
2536 | addic. TMP1, MULTRES, -8
2537 | decode_RA8 RC, INS // Call base.
2538 | beq >2
2539 |1: // Move results down.
2540 | lfd f0, 0(RA)
2541 | addic. TMP1, TMP1, -8
2542 | addi RA, RA, 8
2543 | stfdx f0, BASE, RC
2544 | addi RC, RC, 8
2545 | bne <1
2546 |2:
2547 | decode_RA8 RA, INS
2548 | decode_RB8 RB, INS
2549 | add RA, RA, RB
2550 | lwz TMP1, DISPATCH_J(trace)(DISPATCH)
2551 |3:
2552 | cmplw RA, RC
2553 | bgt >9 // More results wanted?
2554 |
2555 | slwi TMP2, TMP3, 2
2556 | lwzx TRACE:TMP2, TMP1, TMP2
2557 | cmpwi TRACE:TMP2, 0
2558 | beq ->cont_nop
2559 | lhz RD, TRACE:TMP2->link
2560 | cmpw RD, TMP3
2561 | cmpwi cr1, RD, 0
2562 | beq ->cont_nop // Blacklisted.
2563 | slwi RD, RD, 3
2564 | bne cr1, =>BC_JLOOP // Jump to stitched trace.
2565 |
2566 | // Stitch a new trace to the previous trace.
2567 | stw TMP3, DISPATCH_J(exitno)(DISPATCH)
2568 | stp L, DISPATCH_J(L)(DISPATCH)
2569 | stp BASE, L->base
2570 | addi CARG1, DISPATCH, GG_DISP2J
2571 | mr CARG2, PC
2572 | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2573 | lp BASE, L->base
2574 | b ->cont_nop
2575 |
2576 |9:
2577 | stwx TISNIL, BASE, RC
2578 | addi RC, RC, 8
2579 | b <3
2580 |.endif
2581 |
2582 |->vm_profhook: // Dispatch target for profiler hook.
2583#if LJ_HASPROFILE
2584 | mr CARG1, L
2585 | stw MULTRES, SAVE_MULTRES
2586 | mr CARG2, PC
2587 | stp BASE, L->base
2588 | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2589 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2590 | lp BASE, L->base
2591 | subi PC, PC, 4
2592 | b ->cont_nop
2593#endif
2594 |
2592 |//----------------------------------------------------------------------- 2595 |//-----------------------------------------------------------------------
2593 |//-- Trace exit handler ------------------------------------------------- 2596 |//-- Trace exit handler -------------------------------------------------
2594 |//----------------------------------------------------------------------- 2597 |//-----------------------------------------------------------------------
@@ -2623,16 +2626,16 @@ static void build_subroutines(BuildCtx *ctx)
2623 | savex_ 20,21,22,23 2626 | savex_ 20,21,22,23
2624 | lhz CARG4, 2(CARG3) // Load trace number. 2627 | lhz CARG4, 2(CARG3) // Load trace number.
2625 | savex_ 24,25,26,27 2628 | savex_ 24,25,26,27
2626 | lwz L, DISPATCH_GL(jit_L)(DISPATCH) 2629 | lwz L, DISPATCH_GL(cur_L)(DISPATCH)
2627 | savex_ 28,29,30,31 2630 | savex_ 28,29,30,31
2628 | sub CARG3, TMP0, CARG3 // Compute exit number. 2631 | sub CARG3, TMP0, CARG3 // Compute exit number.
2629 | lp BASE, DISPATCH_GL(jit_base)(DISPATCH) 2632 | lp BASE, DISPATCH_GL(jit_base)(DISPATCH)
2630 | srwi CARG3, CARG3, 2 2633 | srwi CARG3, CARG3, 2
2631 | stw L, DISPATCH_J(L)(DISPATCH) 2634 | stp L, DISPATCH_J(L)(DISPATCH)
2632 | subi CARG3, CARG3, 2 2635 | subi CARG3, CARG3, 2
2633 | stw TMP1, DISPATCH_GL(jit_L)(DISPATCH)
2634 | stw CARG4, DISPATCH_J(parent)(DISPATCH)
2635 | stp BASE, L->base 2636 | stp BASE, L->base
2637 | stw CARG4, DISPATCH_J(parent)(DISPATCH)
2638 | stw TMP1, DISPATCH_GL(jit_base)(DISPATCH)
2636 | addi CARG1, DISPATCH, GG_DISP2J 2639 | addi CARG1, DISPATCH, GG_DISP2J
2637 | stw CARG3, DISPATCH_J(exitno)(DISPATCH) 2640 | stw CARG3, DISPATCH_J(exitno)(DISPATCH)
2638 | addi CARG2, sp, 16 2641 | addi CARG2, sp, 16
@@ -2656,15 +2659,16 @@ static void build_subroutines(BuildCtx *ctx)
2656 | // CARG1 = MULTRES or negated error code, BASE, PC and JGL set. 2659 | // CARG1 = MULTRES or negated error code, BASE, PC and JGL set.
2657 | lwz L, SAVE_L 2660 | lwz L, SAVE_L
2658 | addi DISPATCH, JGL, -GG_DISP2G-32768 2661 | addi DISPATCH, JGL, -GG_DISP2G-32768
2662 | stp BASE, L->base
2659 |1: 2663 |1:
2660 | cmpwi CARG1, 0 2664 | cmpwi CARG1, 0
2661 | blt >3 // Check for error from exit. 2665 | blt >9 // Check for error from exit.
2662 | lwz LFUNC:TMP1, FRAME_FUNC(BASE) 2666 | lwz LFUNC:RB, FRAME_FUNC(BASE)
2663 | slwi MULTRES, CARG1, 3 2667 | slwi MULTRES, CARG1, 3
2664 | li TMP2, 0 2668 | li TMP2, 0
2665 | stw MULTRES, SAVE_MULTRES 2669 | stw MULTRES, SAVE_MULTRES
2666 | lwz TMP1, LFUNC:TMP1->pc 2670 | lwz TMP1, LFUNC:RB->pc
2667 | stw TMP2, DISPATCH_GL(jit_L)(DISPATCH) 2671 | stw TMP2, DISPATCH_GL(jit_base)(DISPATCH)
2668 | lwz KBASE, PC2PROTO(k)(TMP1) 2672 | lwz KBASE, PC2PROTO(k)(TMP1)
2669 | // Setup type comparison constants. 2673 | // Setup type comparison constants.
2670 | li TISNUM, LJ_TISNUM 2674 | li TISNUM, LJ_TISNUM
@@ -2694,11 +2698,25 @@ static void build_subroutines(BuildCtx *ctx)
2694 | decode_RC8 RC, INS 2698 | decode_RC8 RC, INS
2695 | bctr 2699 | bctr
2696 |2: 2700 |2:
2701 | cmplwi TMP1, (BC_FUNCC+2)*4 // Fast function?
2702 | blt >3
2703 | // Check frame below fast function.
2704 | lwz TMP1, FRAME_PC(BASE)
2705 | andix. TMP0, TMP1, FRAME_TYPE
2706 | bney >3 // Trace stitching continuation?
2707 | // Otherwise set KBASE for Lua function below fast function.
2708 | lwz TMP2, -4(TMP1)
2709 | decode_RA8 TMP0, TMP2
2710 | sub TMP1, BASE, TMP0
2711 | lwz LFUNC:TMP2, -12(TMP1)
2712 | lwz TMP1, LFUNC:TMP2->pc
2713 | lwz KBASE, PC2PROTO(k)(TMP1)
2714 |3:
2697 | subi RC, MULTRES, 8 2715 | subi RC, MULTRES, 8
2698 | add RA, RA, BASE 2716 | add RA, RA, BASE
2699 | bctr 2717 | bctr
2700 | 2718 |
2701 |3: // Rethrow error from the right C frame. 2719 |9: // Rethrow error from the right C frame.
2702 | neg CARG2, CARG1 2720 | neg CARG2, CARG1
2703 | mr CARG1, L 2721 | mr CARG1, L
2704 | bl extern lj_err_throw // (lua_State *L, int errcode) 2722 | bl extern lj_err_throw // (lua_State *L, int errcode)
@@ -3288,6 +3306,29 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3288 | ins_next 3306 | ins_next
3289 break; 3307 break;
3290 3308
3309 case BC_ISTYPE:
3310 | // RA = src*8, RD = -type*8
3311 | lwzx TMP0, BASE, RA
3312 | srwi TMP1, RD, 3
3313 | ins_next1
3314 |.if not PPE and not GPR64
3315 | add. TMP0, TMP0, TMP1
3316 |.else
3317 | neg TMP1, TMP1
3318 | cmpw TMP0, TMP1
3319 |.endif
3320 | bne ->vmeta_istype
3321 | ins_next2
3322 break;
3323 case BC_ISNUM:
3324 | // RA = src*8, RD = -(TISNUM-1)*8
3325 | lwzx TMP0, BASE, RA
3326 | ins_next1
3327 | checknum TMP0
3328 | bge ->vmeta_istype
3329 | ins_next2
3330 break;
3331
3291 /* -- Unary ops --------------------------------------------------------- */ 3332 /* -- Unary ops --------------------------------------------------------- */
3292 3333
3293 case BC_MOV: 3334 case BC_MOV:
@@ -4039,6 +4080,30 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4039 | bne <1 // 'no __index' flag set: done. 4080 | bne <1 // 'no __index' flag set: done.
4040 | b ->vmeta_tgetb // Caveat: preserve TMP0! 4081 | b ->vmeta_tgetb // Caveat: preserve TMP0!
4041 break; 4082 break;
4083 case BC_TGETR:
4084 | // RA = dst*8, RB = table*8, RC = key*8
4085 | add RB, BASE, RB
4086 | lwz TAB:CARG1, 4(RB)
4087 |.if DUALNUM
4088 | add RC, BASE, RC
4089 | lwz TMP0, TAB:CARG1->asize
4090 | lwz CARG2, 4(RC)
4091 | lwz TMP1, TAB:CARG1->array
4092 |.else
4093 | lfdx f0, BASE, RC
4094 | lwz TMP0, TAB:CARG1->asize
4095 | toint CARG2, f0
4096 | lwz TMP1, TAB:CARG1->array
4097 |.endif
4098 | cmplw TMP0, CARG2
4099 | slwi TMP2, CARG2, 3
4100 | ble ->vmeta_tgetr // In array part?
4101 | lfdx f14, TMP1, TMP2
4102 |->BC_TGETR_Z:
4103 | ins_next1
4104 | stfdx f14, BASE, RA
4105 | ins_next2
4106 break;
4042 4107
4043 case BC_TSETV: 4108 case BC_TSETV:
4044 | // RA = src*8, RB = table*8, RC = key*8 4109 | // RA = src*8, RB = table*8, RC = key*8
@@ -4218,6 +4283,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4218 | barrierback TAB:RB, TMP3, TMP0 4283 | barrierback TAB:RB, TMP3, TMP0
4219 | b <2 4284 | b <2
4220 break; 4285 break;
4286 case BC_TSETR:
4287 | // RA = dst*8, RB = table*8, RC = key*8
4288 | add RB, BASE, RB
4289 | lwz TAB:CARG2, 4(RB)
4290 |.if DUALNUM
4291 | add RC, BASE, RC
4292 | lbz TMP3, TAB:RB->marked
4293 | lwz TMP0, TAB:CARG2->asize
4294 | lwz CARG3, 4(RC)
4295 | lwz TMP1, TAB:CARG2->array
4296 |.else
4297 | lfdx f0, BASE, RC
4298 | lbz TMP3, TAB:RB->marked
4299 | lwz TMP0, TAB:CARG2->asize
4300 | toint CARG3, f0
4301 | lwz TMP1, TAB:CARG2->array
4302 |.endif
4303 | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table)
4304 | bne >7
4305 |2:
4306 | cmplw TMP0, CARG3
4307 | slwi TMP2, CARG3, 3
4308 | lfdx f14, BASE, RA
4309 | ble ->vmeta_tsetr // In array part?
4310 | ins_next1
4311 | stfdx f14, TMP1, TMP2
4312 | ins_next2
4313 |
4314 |7: // Possible table write barrier for the value. Skip valiswhite check.
4315 | barrierback TAB:CARG2, TMP3, TMP2
4316 | b <2
4317 break;
4318
4221 4319
4222 case BC_TSETM: 4320 case BC_TSETM:
4223 | // RA = base*8 (table at base-1), RD = num_const*8 (start index) 4321 | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
@@ -4859,8 +4957,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4859 | lp TMP2, TRACE:TMP2->mcode 4957 | lp TMP2, TRACE:TMP2->mcode
4860 | stw BASE, DISPATCH_GL(jit_base)(DISPATCH) 4958 | stw BASE, DISPATCH_GL(jit_base)(DISPATCH)
4861 | mtctr TMP2 4959 | mtctr TMP2
4862 | stw L, DISPATCH_GL(jit_L)(DISPATCH)
4863 | addi JGL, DISPATCH, GG_DISP2G+32768 4960 | addi JGL, DISPATCH, GG_DISP2G+32768
4961 | stw L, DISPATCH_GL(tmpbuf.L)(DISPATCH)
4864 | bctr 4962 | bctr
4865 |.endif 4963 |.endif
4866 break; 4964 break;
@@ -4995,6 +5093,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4995 | lp TMP1, L->top 5093 | lp TMP1, L->top
4996 | li_vmstate INTERP 5094 | li_vmstate INTERP
4997 | lwz PC, FRAME_PC(BASE) // Fetch PC of caller. 5095 | lwz PC, FRAME_PC(BASE) // Fetch PC of caller.
5096 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
4998 | sub RA, TMP1, RD // RA = L->top - nresults*8 5097 | sub RA, TMP1, RD // RA = L->top - nresults*8
4999 | st_vmstate 5098 | st_vmstate
5000 | b ->vm_returnc 5099 | b ->vm_returnc
diff --git a/src/vm_ppcspe.dasc b/src/vm_ppcspe.dasc
deleted file mode 100644
index 53ea2d96..00000000
--- a/src/vm_ppcspe.dasc
+++ /dev/null
@@ -1,3691 +0,0 @@
1|// Low-level VM code for PowerPC/e500 CPUs.
2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
4|
5|.arch ppc
6|.section code_op, code_sub
7|
8|.actionlist build_actionlist
9|.globals GLOB_
10|.globalnames globnames
11|.externnames extnames
12|
13|// Note: The ragged indentation of the instructions is intentional.
14|// The starting columns indicate data dependencies.
15|
16|//-----------------------------------------------------------------------
17|
18|// Fixed register assignments for the interpreter.
19|// Don't use: r1 = sp, r2 and r13 = reserved and/or small data area ptr
20|
21|// The following must be C callee-save (but BASE is often refetched).
22|.define BASE, r14 // Base of current Lua stack frame.
23|.define KBASE, r15 // Constants of current Lua function.
24|.define PC, r16 // Next PC.
25|.define DISPATCH, r17 // Opcode dispatch table.
26|.define LREG, r18 // Register holding lua_State (also in SAVE_L).
27|.define MULTRES, r19 // Size of multi-result: (nresults+1)*8.
28|
29|// Constants for vectorized type-comparisons (hi+low GPR). C callee-save.
30|.define TISNUM, r22
31|.define TISSTR, r23
32|.define TISTAB, r24
33|.define TISFUNC, r25
34|.define TISNIL, r26
35|.define TOBIT, r27
36|.define ZERO, TOBIT // Zero in lo word.
37|
38|// The following temporaries are not saved across C calls, except for RA.
39|.define RA, r20 // Callee-save.
40|.define RB, r10
41|.define RC, r11
42|.define RD, r12
43|.define INS, r7 // Overlaps CARG5.
44|
45|.define TMP0, r0
46|.define TMP1, r8
47|.define TMP2, r9
48|.define TMP3, r6 // Overlaps CARG4.
49|
50|// Saved temporaries.
51|.define SAVE0, r21
52|
53|// Calling conventions.
54|.define CARG1, r3
55|.define CARG2, r4
56|.define CARG3, r5
57|.define CARG4, r6 // Overlaps TMP3.
58|.define CARG5, r7 // Overlaps INS.
59|
60|.define CRET1, r3
61|.define CRET2, r4
62|
63|// Stack layout while in interpreter. Must match with lj_frame.h.
64|.define SAVE_LR, 188(sp)
65|.define CFRAME_SPACE, 184 // Delta for sp.
66|// Back chain for sp: 184(sp) <-- sp entering interpreter
67|.define SAVE_r31, 176(sp) // 64 bit register saves.
68|.define SAVE_r30, 168(sp)
69|.define SAVE_r29, 160(sp)
70|.define SAVE_r28, 152(sp)
71|.define SAVE_r27, 144(sp)
72|.define SAVE_r26, 136(sp)
73|.define SAVE_r25, 128(sp)
74|.define SAVE_r24, 120(sp)
75|.define SAVE_r23, 112(sp)
76|.define SAVE_r22, 104(sp)
77|.define SAVE_r21, 96(sp)
78|.define SAVE_r20, 88(sp)
79|.define SAVE_r19, 80(sp)
80|.define SAVE_r18, 72(sp)
81|.define SAVE_r17, 64(sp)
82|.define SAVE_r16, 56(sp)
83|.define SAVE_r15, 48(sp)
84|.define SAVE_r14, 40(sp)
85|.define SAVE_CR, 36(sp)
86|.define UNUSED1, 32(sp)
87|.define SAVE_ERRF, 28(sp) // 32 bit C frame info.
88|.define SAVE_NRES, 24(sp)
89|.define SAVE_CFRAME, 20(sp)
90|.define SAVE_L, 16(sp)
91|.define SAVE_PC, 12(sp)
92|.define SAVE_MULTRES, 8(sp)
93|// Next frame lr: 4(sp)
94|// Back chain for sp: 0(sp) <-- sp while in interpreter
95|
96|.macro save_, reg; evstdd reg, SAVE_..reg; .endmacro
97|.macro rest_, reg; evldd reg, SAVE_..reg; .endmacro
98|
99|.macro saveregs
100| stwu sp, -CFRAME_SPACE(sp)
101| save_ r14; save_ r15; save_ r16; save_ r17; save_ r18; save_ r19
102| mflr r0; mfcr r12
103| save_ r20; save_ r21; save_ r22; save_ r23; save_ r24; save_ r25
104| stw r0, SAVE_LR; stw r12, SAVE_CR
105| save_ r26; save_ r27; save_ r28; save_ r29; save_ r30; save_ r31
106|.endmacro
107|
108|.macro restoreregs
109| lwz r0, SAVE_LR; lwz r12, SAVE_CR
110| rest_ r14; rest_ r15; rest_ r16; rest_ r17; rest_ r18; rest_ r19
111| mtlr r0; mtcrf 0x38, r12
112| rest_ r20; rest_ r21; rest_ r22; rest_ r23; rest_ r24; rest_ r25
113| rest_ r26; rest_ r27; rest_ r28; rest_ r29; rest_ r30; rest_ r31
114| addi sp, sp, CFRAME_SPACE
115|.endmacro
116|
117|// Type definitions. Some of these are only used for documentation.
118|.type L, lua_State, LREG
119|.type GL, global_State
120|.type TVALUE, TValue
121|.type GCOBJ, GCobj
122|.type STR, GCstr
123|.type TAB, GCtab
124|.type LFUNC, GCfuncL
125|.type CFUNC, GCfuncC
126|.type PROTO, GCproto
127|.type UPVAL, GCupval
128|.type NODE, Node
129|.type NARGS8, int
130|.type TRACE, GCtrace
131|
132|//-----------------------------------------------------------------------
133|
134|// These basic macros should really be part of DynASM.
135|.macro srwi, rx, ry, n; rlwinm rx, ry, 32-n, n, 31; .endmacro
136|.macro slwi, rx, ry, n; rlwinm rx, ry, n, 0, 31-n; .endmacro
137|.macro rotlwi, rx, ry, n; rlwinm rx, ry, n, 0, 31; .endmacro
138|.macro rotlw, rx, ry, rn; rlwnm rx, ry, rn, 0, 31; .endmacro
139|.macro subi, rx, ry, i; addi rx, ry, -i; .endmacro
140|
141|// Trap for not-yet-implemented parts.
142|.macro NYI; tw 4, sp, sp; .endmacro
143|
144|//-----------------------------------------------------------------------
145|
146|// Access to frame relative to BASE.
147|.define FRAME_PC, -8
148|.define FRAME_FUNC, -4
149|
150|// Instruction decode.
151|.macro decode_OP4, dst, ins; rlwinm dst, ins, 2, 22, 29; .endmacro
152|.macro decode_RA8, dst, ins; rlwinm dst, ins, 27, 21, 28; .endmacro
153|.macro decode_RB8, dst, ins; rlwinm dst, ins, 11, 21, 28; .endmacro
154|.macro decode_RC8, dst, ins; rlwinm dst, ins, 19, 21, 28; .endmacro
155|.macro decode_RD8, dst, ins; rlwinm dst, ins, 19, 13, 28; .endmacro
156|
157|.macro decode_OP1, dst, ins; rlwinm dst, ins, 0, 24, 31; .endmacro
158|.macro decode_RD4, dst, ins; rlwinm dst, ins, 18, 14, 29; .endmacro
159|
160|// Instruction fetch.
161|.macro ins_NEXT1
162| lwz INS, 0(PC)
163| addi PC, PC, 4
164|.endmacro
165|// Instruction decode+dispatch.
166|.macro ins_NEXT2
167| decode_OP4 TMP1, INS
168| decode_RB8 RB, INS
169| decode_RD8 RD, INS
170| lwzx TMP0, DISPATCH, TMP1
171| decode_RA8 RA, INS
172| decode_RC8 RC, INS
173| mtctr TMP0
174| bctr
175|.endmacro
176|.macro ins_NEXT
177| ins_NEXT1
178| ins_NEXT2
179|.endmacro
180|
181|// Instruction footer.
182|.if 1
183| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
184| .define ins_next, ins_NEXT
185| .define ins_next_, ins_NEXT
186| .define ins_next1, ins_NEXT1
187| .define ins_next2, ins_NEXT2
188|.else
189| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
190| // Affects only certain kinds of benchmarks (and only with -j off).
191| .macro ins_next
192| b ->ins_next
193| .endmacro
194| .macro ins_next1
195| .endmacro
196| .macro ins_next2
197| b ->ins_next
198| .endmacro
199| .macro ins_next_
200| ->ins_next:
201| ins_NEXT
202| .endmacro
203|.endif
204|
205|// Call decode and dispatch.
206|.macro ins_callt
207| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
208| lwz PC, LFUNC:RB->pc
209| lwz INS, 0(PC)
210| addi PC, PC, 4
211| decode_OP4 TMP1, INS
212| decode_RA8 RA, INS
213| lwzx TMP0, DISPATCH, TMP1
214| add RA, RA, BASE
215| mtctr TMP0
216| bctr
217|.endmacro
218|
219|.macro ins_call
220| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC
221| stw PC, FRAME_PC(BASE)
222| ins_callt
223|.endmacro
224|
225|//-----------------------------------------------------------------------
226|
227|// Macros to test operand types.
228|.macro checknum, reg; evcmpltu reg, TISNUM; .endmacro
229|.macro checkstr, reg; evcmpeq reg, TISSTR; .endmacro
230|.macro checktab, reg; evcmpeq reg, TISTAB; .endmacro
231|.macro checkfunc, reg; evcmpeq reg, TISFUNC; .endmacro
232|.macro checknil, reg; evcmpeq reg, TISNIL; .endmacro
233|.macro checkok, label; blt label; .endmacro
234|.macro checkfail, label; bge label; .endmacro
235|.macro checkanyfail, label; bns label; .endmacro
236|.macro checkallok, label; bso label; .endmacro
237|
238|.macro branch_RD
239| srwi TMP0, RD, 1
240| add PC, PC, TMP0
241| addis PC, PC, -(BCBIAS_J*4 >> 16)
242|.endmacro
243|
244|// Assumes DISPATCH is relative to GL.
245#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
246#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
247|
248#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
249|
250|.macro hotloop
251| NYI
252|.endmacro
253|
254|.macro hotcall
255| NYI
256|.endmacro
257|
258|// Set current VM state. Uses TMP0.
259|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro
260|.macro st_vmstate; stw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro
261|
262|// Move table write barrier back. Overwrites mark and tmp.
263|.macro barrierback, tab, mark, tmp
264| lwz tmp, DISPATCH_GL(gc.grayagain)(DISPATCH)
265| // Assumes LJ_GC_BLACK is 0x04.
266| rlwinm mark, mark, 0, 30, 28 // black2gray(tab)
267| stw tab, DISPATCH_GL(gc.grayagain)(DISPATCH)
268| stb mark, tab->marked
269| stw tmp, tab->gclist
270|.endmacro
271|
272|//-----------------------------------------------------------------------
273
274/* Generate subroutines used by opcodes and other parts of the VM. */
275/* The .code_sub section should be last to help static branch prediction. */
276static void build_subroutines(BuildCtx *ctx)
277{
278 |.code_sub
279 |
280 |//-----------------------------------------------------------------------
281 |//-- Return handling ----------------------------------------------------
282 |//-----------------------------------------------------------------------
283 |
284 |->vm_returnp:
285 | // See vm_return. Also: TMP2 = previous base.
286 | andi. TMP0, PC, FRAME_P
287 | evsplati TMP1, LJ_TTRUE
288 | beq ->cont_dispatch
289 |
290 | // Return from pcall or xpcall fast func.
291 | lwz PC, FRAME_PC(TMP2) // Fetch PC of previous frame.
292 | mr BASE, TMP2 // Restore caller base.
293 | // Prepending may overwrite the pcall frame, so do it at the end.
294 | stwu TMP1, FRAME_PC(RA) // Prepend true to results.
295 |
296 |->vm_returnc:
297 | addi RD, RD, 8 // RD = (nresults+1)*8.
298 | andi. TMP0, PC, FRAME_TYPE
299 | cmpwi cr1, RD, 0
300 | li CRET1, LUA_YIELD
301 | beq cr1, ->vm_unwind_c_eh
302 | mr MULTRES, RD
303 | beq ->BC_RET_Z // Handle regular return to Lua.
304 |
305 |->vm_return:
306 | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return
307 | // TMP0 = PC & FRAME_TYPE
308 | cmpwi TMP0, FRAME_C
309 | rlwinm TMP2, PC, 0, 0, 28
310 | li_vmstate C
311 | sub TMP2, BASE, TMP2 // TMP2 = previous base.
312 | bne ->vm_returnp
313 |
314 | addic. TMP1, RD, -8
315 | stw TMP2, L->base
316 | lwz TMP2, SAVE_NRES
317 | subi BASE, BASE, 8
318 | st_vmstate
319 | slwi TMP2, TMP2, 3
320 | beq >2
321 |1:
322 | addic. TMP1, TMP1, -8
323 | evldd TMP0, 0(RA)
324 | addi RA, RA, 8
325 | evstdd TMP0, 0(BASE)
326 | addi BASE, BASE, 8
327 | bne <1
328 |
329 |2:
330 | cmpw TMP2, RD // More/less results wanted?
331 | bne >6
332 |3:
333 | stw BASE, L->top // Store new top.
334 |
335 |->vm_leave_cp:
336 | lwz TMP0, SAVE_CFRAME // Restore previous C frame.
337 | li CRET1, 0 // Ok return status for vm_pcall.
338 | stw TMP0, L->cframe
339 |
340 |->vm_leave_unw:
341 | restoreregs
342 | blr
343 |
344 |6:
345 | ble >7 // Less results wanted?
346 | // More results wanted. Check stack size and fill up results with nil.
347 | lwz TMP1, L->maxstack
348 | cmplw BASE, TMP1
349 | bge >8
350 | evstdd TISNIL, 0(BASE)
351 | addi RD, RD, 8
352 | addi BASE, BASE, 8
353 | b <2
354 |
355 |7: // Less results wanted.
356 | sub TMP0, RD, TMP2
357 | cmpwi TMP2, 0 // LUA_MULTRET+1 case?
358 | sub TMP0, BASE, TMP0 // Subtract the difference.
359 | iseleq BASE, BASE, TMP0 // Either keep top or shrink it.
360 | b <3
361 |
362 |8: // Corner case: need to grow stack for filling up results.
363 | // This can happen if:
364 | // - A C function grows the stack (a lot).
365 | // - The GC shrinks the stack in between.
366 | // - A return back from a lua_call() with (high) nresults adjustment.
367 | stw BASE, L->top // Save current top held in BASE (yes).
368 | mr SAVE0, RD
369 | mr CARG2, TMP2
370 | mr CARG1, L
371 | bl extern lj_state_growstack // (lua_State *L, int n)
372 | lwz TMP2, SAVE_NRES
373 | mr RD, SAVE0
374 | slwi TMP2, TMP2, 3
375 | lwz BASE, L->top // Need the (realloced) L->top in BASE.
376 | b <2
377 |
378 |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
379 | // (void *cframe, int errcode)
380 | mr sp, CARG1
381 | mr CRET1, CARG2
382 |->vm_unwind_c_eh: // Landing pad for external unwinder.
383 | lwz L, SAVE_L
384 | li TMP0, ~LJ_VMST_C
385 | lwz GL:TMP1, L->glref
386 | stw TMP0, GL:TMP1->vmstate
387 | b ->vm_leave_unw
388 |
389 |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
390 | // (void *cframe)
391 | rlwinm sp, CARG1, 0, 0, 29
392 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
393 | lwz L, SAVE_L
394 | evsplati TISNUM, LJ_TISNUM+1 // Setup type comparison constants.
395 | evsplati TISFUNC, LJ_TFUNC
396 | lus TOBIT, 0x4338
397 | evsplati TISTAB, LJ_TTAB
398 | li TMP0, 0
399 | lwz BASE, L->base
400 | evmergelo TOBIT, TOBIT, TMP0
401 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
402 | evsplati TISSTR, LJ_TSTR
403 | li TMP1, LJ_TFALSE
404 | evsplati TISNIL, LJ_TNIL
405 | li_vmstate INTERP
406 | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame.
407 | la RA, -8(BASE) // Results start at BASE-8.
408 | addi DISPATCH, DISPATCH, GG_G2DISP
409 | stw TMP1, 0(RA) // Prepend false to error message.
410 | li RD, 16 // 2 results: false + error message.
411 | st_vmstate
412 | b ->vm_returnc
413 |
414 |//-----------------------------------------------------------------------
415 |//-- Grow stack for calls -----------------------------------------------
416 |//-----------------------------------------------------------------------
417 |
418 |->vm_growstack_c: // Grow stack for C function.
419 | li CARG2, LUA_MINSTACK
420 | b >2
421 |
422 |->vm_growstack_l: // Grow stack for Lua function.
423 | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC
424 | add RC, BASE, RC
425 | sub RA, RA, BASE
426 | stw BASE, L->base
427 | addi PC, PC, 4 // Must point after first instruction.
428 | stw RC, L->top
429 | srwi CARG2, RA, 3
430 |2:
431 | // L->base = new base, L->top = top
432 | stw PC, SAVE_PC
433 | mr CARG1, L
434 | bl extern lj_state_growstack // (lua_State *L, int n)
435 | lwz BASE, L->base
436 | lwz RC, L->top
437 | lwz LFUNC:RB, FRAME_FUNC(BASE)
438 | sub RC, RC, BASE
439 | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
440 | ins_callt // Just retry the call.
441 |
442 |//-----------------------------------------------------------------------
443 |//-- Entry points into the assembler VM ---------------------------------
444 |//-----------------------------------------------------------------------
445 |
446 |->vm_resume: // Setup C frame and resume thread.
447 | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
448 | saveregs
449 | mr L, CARG1
450 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
451 | mr BASE, CARG2
452 | lbz TMP1, L->status
453 | stw L, SAVE_L
454 | li PC, FRAME_CP
455 | addi TMP0, sp, CFRAME_RESUME
456 | addi DISPATCH, DISPATCH, GG_G2DISP
457 | stw CARG3, SAVE_NRES
458 | cmplwi TMP1, 0
459 | stw CARG3, SAVE_ERRF
460 | stw TMP0, L->cframe
461 | stw CARG3, SAVE_CFRAME
462 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
463 | beq >3
464 |
465 | // Resume after yield (like a return).
466 | mr RA, BASE
467 | lwz BASE, L->base
468 | evsplati TISNUM, LJ_TISNUM+1 // Setup type comparison constants.
469 | lwz TMP1, L->top
470 | evsplati TISFUNC, LJ_TFUNC
471 | lus TOBIT, 0x4338
472 | evsplati TISTAB, LJ_TTAB
473 | lwz PC, FRAME_PC(BASE)
474 | li TMP2, 0
475 | evsplati TISSTR, LJ_TSTR
476 | sub RD, TMP1, BASE
477 | evmergelo TOBIT, TOBIT, TMP2
478 | stb CARG3, L->status
479 | andi. TMP0, PC, FRAME_TYPE
480 | li_vmstate INTERP
481 | addi RD, RD, 8
482 | evsplati TISNIL, LJ_TNIL
483 | mr MULTRES, RD
484 | st_vmstate
485 | beq ->BC_RET_Z
486 | b ->vm_return
487 |
488 |->vm_pcall: // Setup protected C frame and enter VM.
489 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
490 | saveregs
491 | li PC, FRAME_CP
492 | stw CARG4, SAVE_ERRF
493 | b >1
494 |
495 |->vm_call: // Setup C frame and enter VM.
496 | // (lua_State *L, TValue *base, int nres1)
497 | saveregs
498 | li PC, FRAME_C
499 |
500 |1: // Entry point for vm_pcall above (PC = ftype).
501 | lwz TMP1, L:CARG1->cframe
502 | stw CARG3, SAVE_NRES
503 | mr L, CARG1
504 | stw CARG1, SAVE_L
505 | mr BASE, CARG2
506 | stw sp, L->cframe // Add our C frame to cframe chain.
507 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
508 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
509 | stw TMP1, SAVE_CFRAME
510 | addi DISPATCH, DISPATCH, GG_G2DISP
511 |
512 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
513 | lwz TMP2, L->base // TMP2 = old base (used in vmeta_call).
514 | evsplati TISNUM, LJ_TISNUM+1 // Setup type comparison constants.
515 | lwz TMP1, L->top
516 | evsplati TISFUNC, LJ_TFUNC
517 | add PC, PC, BASE
518 | evsplati TISTAB, LJ_TTAB
519 | lus TOBIT, 0x4338
520 | li TMP0, 0
521 | sub PC, PC, TMP2 // PC = frame delta + frame type
522 | evsplati TISSTR, LJ_TSTR
523 | sub NARGS8:RC, TMP1, BASE
524 | evmergelo TOBIT, TOBIT, TMP0
525 | li_vmstate INTERP
526 | evsplati TISNIL, LJ_TNIL
527 | st_vmstate
528 |
529 |->vm_call_dispatch:
530 | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC
531 | li TMP0, -8
532 | evlddx LFUNC:RB, BASE, TMP0
533 | checkfunc LFUNC:RB
534 | checkfail ->vmeta_call
535 |
536 |->vm_call_dispatch_f:
537 | ins_call
538 | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC
539 |
540 |->vm_cpcall: // Setup protected C frame, call C.
541 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
542 | saveregs
543 | mr L, CARG1
544 | lwz TMP0, L:CARG1->stack
545 | stw CARG1, SAVE_L
546 | lwz TMP1, L->top
547 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
548 | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
549 | lwz TMP1, L->cframe
550 | stw sp, L->cframe // Add our C frame to cframe chain.
551 | li TMP2, 0
552 | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame.
553 | stw TMP2, SAVE_ERRF // No error function.
554 | stw TMP1, SAVE_CFRAME
555 | mtctr CARG4
556 | bctrl // (lua_State *L, lua_CFunction func, void *ud)
557 | mr. BASE, CRET1
558 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
559 | li PC, FRAME_CP
560 | addi DISPATCH, DISPATCH, GG_G2DISP
561 | bne <3 // Else continue with the call.
562 | b ->vm_leave_cp // No base? Just remove C frame.
563 |
564 |//-----------------------------------------------------------------------
565 |//-- Metamethod handling ------------------------------------------------
566 |//-----------------------------------------------------------------------
567 |
568 |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the
569 |// stack, so BASE doesn't need to be reloaded across these calls.
570 |
571 |//-- Continuation dispatch ----------------------------------------------
572 |
573 |->cont_dispatch:
574 | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8
575 | lwz TMP0, -12(BASE) // Continuation.
576 | mr RB, BASE
577 | mr BASE, TMP2 // Restore caller BASE.
578 | lwz LFUNC:TMP1, FRAME_FUNC(TMP2)
579 | cmplwi TMP0, 0
580 | lwz PC, -16(RB) // Restore PC from [cont|PC].
581 | beq >1
582 | subi TMP2, RD, 8
583 | lwz TMP1, LFUNC:TMP1->pc
584 | evstddx TISNIL, RA, TMP2 // Ensure one valid arg.
585 | lwz KBASE, PC2PROTO(k)(TMP1)
586 | // BASE = base, RA = resultptr, RB = meta base
587 | mtctr TMP0
588 | bctr // Jump to continuation.
589 |
590 |1: // Tail call from C function.
591 | subi TMP1, RB, 16
592 | sub RC, TMP1, BASE
593 | b ->vm_call_tail
594 |
595 |->cont_cat: // RA = resultptr, RB = meta base
596 | lwz INS, -4(PC)
597 | subi CARG2, RB, 16
598 | decode_RB8 SAVE0, INS
599 | evldd TMP0, 0(RA)
600 | add TMP1, BASE, SAVE0
601 | stw BASE, L->base
602 | cmplw TMP1, CARG2
603 | sub CARG3, CARG2, TMP1
604 | decode_RA8 RA, INS
605 | evstdd TMP0, 0(CARG2)
606 | bne ->BC_CAT_Z
607 | evstddx TMP0, BASE, RA
608 | b ->cont_nop
609 |
610 |//-- Table indexing metamethods -----------------------------------------
611 |
612 |->vmeta_tgets1:
613 | evmergelo STR:RC, TISSTR, STR:RC
614 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
615 | decode_RB8 RB, INS
616 | evstdd STR:RC, 0(CARG3)
617 | add CARG2, BASE, RB
618 | b >1
619 |
620 |->vmeta_tgets:
621 | evmergelo TAB:RB, TISTAB, TAB:RB
622 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
623 | evmergelo STR:RC, TISSTR, STR:RC
624 | evstdd TAB:RB, 0(CARG2)
625 | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH)
626 | evstdd STR:RC, 0(CARG3)
627 | b >1
628 |
629 |->vmeta_tgetb: // TMP0 = index
630 | efdcfsi TMP0, TMP0
631 | decode_RB8 RB, INS
632 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
633 | add CARG2, BASE, RB
634 | evstdd TMP0, 0(CARG3)
635 | b >1
636 |
637 |->vmeta_tgetv:
638 | decode_RB8 RB, INS
639 | decode_RC8 RC, INS
640 | add CARG2, BASE, RB
641 | add CARG3, BASE, RC
642 |1:
643 | stw BASE, L->base
644 | mr CARG1, L
645 | stw PC, SAVE_PC
646 | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
647 | // Returns TValue * (finished) or NULL (metamethod).
648 | cmplwi CRET1, 0
649 | beq >3
650 | evldd TMP0, 0(CRET1)
651 | evstddx TMP0, BASE, RA
652 | ins_next
653 |
654 |3: // Call __index metamethod.
655 | // BASE = base, L->top = new base, stack = cont/func/t/k
656 | subfic TMP1, BASE, FRAME_CONT
657 | lwz BASE, L->top
658 | stw PC, -16(BASE) // [cont|PC]
659 | add PC, TMP1, BASE
660 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
661 | li NARGS8:RC, 16 // 2 args for func(t, k).
662 | b ->vm_call_dispatch_f
663 |
664 |//-----------------------------------------------------------------------
665 |
666 |->vmeta_tsets1:
667 | evmergelo STR:RC, TISSTR, STR:RC
668 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
669 | decode_RB8 RB, INS
670 | evstdd STR:RC, 0(CARG3)
671 | add CARG2, BASE, RB
672 | b >1
673 |
674 |->vmeta_tsets:
675 | evmergelo TAB:RB, TISTAB, TAB:RB
676 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
677 | evmergelo STR:RC, TISSTR, STR:RC
678 | evstdd TAB:RB, 0(CARG2)
679 | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH)
680 | evstdd STR:RC, 0(CARG3)
681 | b >1
682 |
683 |->vmeta_tsetb: // TMP0 = index
684 | efdcfsi TMP0, TMP0
685 | decode_RB8 RB, INS
686 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
687 | add CARG2, BASE, RB
688 | evstdd TMP0, 0(CARG3)
689 | b >1
690 |
691 |->vmeta_tsetv:
692 | decode_RB8 RB, INS
693 | decode_RC8 RC, INS
694 | add CARG2, BASE, RB
695 | add CARG3, BASE, RC
696 |1:
697 | stw BASE, L->base
698 | mr CARG1, L
699 | stw PC, SAVE_PC
700 | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
701 | // Returns TValue * (finished) or NULL (metamethod).
702 | cmplwi CRET1, 0
703 | evlddx TMP0, BASE, RA
704 | beq >3
705 | // NOBARRIER: lj_meta_tset ensures the table is not black.
706 | evstdd TMP0, 0(CRET1)
707 | ins_next
708 |
709 |3: // Call __newindex metamethod.
710 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
711 | subfic TMP1, BASE, FRAME_CONT
712 | lwz BASE, L->top
713 | stw PC, -16(BASE) // [cont|PC]
714 | add PC, TMP1, BASE
715 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
716 | li NARGS8:RC, 24 // 3 args for func(t, k, v)
717 | evstdd TMP0, 16(BASE) // Copy value to third argument.
718 | b ->vm_call_dispatch_f
719 |
720 |//-- Comparison metamethods ---------------------------------------------
721 |
722 |->vmeta_comp:
723 | mr CARG1, L
724 | subi PC, PC, 4
725 | add CARG2, BASE, RA
726 | stw PC, SAVE_PC
727 | add CARG3, BASE, RD
728 | stw BASE, L->base
729 | decode_OP1 CARG4, INS
730 | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
731 | // Returns 0/1 or TValue * (metamethod).
732 |3:
733 | cmplwi CRET1, 1
734 | bgt ->vmeta_binop
735 |4:
736 | lwz INS, 0(PC)
737 | addi PC, PC, 4
738 | decode_RD4 TMP2, INS
739 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
740 | add TMP2, TMP2, TMP3
741 | isellt PC, PC, TMP2
742 |->cont_nop:
743 | ins_next
744 |
745 |->cont_ra: // RA = resultptr
746 | lwz INS, -4(PC)
747 | evldd TMP0, 0(RA)
748 | decode_RA8 TMP1, INS
749 | evstddx TMP0, BASE, TMP1
750 | b ->cont_nop
751 |
752 |->cont_condt: // RA = resultptr
753 | lwz TMP0, 0(RA)
754 | li TMP1, LJ_TTRUE
755 | cmplw TMP1, TMP0 // Branch if result is true.
756 | b <4
757 |
758 |->cont_condf: // RA = resultptr
759 | lwz TMP0, 0(RA)
760 | li TMP1, LJ_TFALSE
761 | cmplw TMP0, TMP1 // Branch if result is false.
762 | b <4
763 |
764 |->vmeta_equal:
765 | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV.
766 | subi PC, PC, 4
767 | stw BASE, L->base
768 | mr CARG1, L
769 | stw PC, SAVE_PC
770 | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
771 | // Returns 0/1 or TValue * (metamethod).
772 | b <3
773 |
774 |//-- Arithmetic metamethods ---------------------------------------------
775 |
776 |->vmeta_arith_vn:
777 | add CARG3, BASE, RB
778 | add CARG4, KBASE, RC
779 | b >1
780 |
781 |->vmeta_arith_nv:
782 | add CARG3, KBASE, RC
783 | add CARG4, BASE, RB
784 | b >1
785 |
786 |->vmeta_unm:
787 | add CARG3, BASE, RD
788 | mr CARG4, CARG3
789 | b >1
790 |
791 |->vmeta_arith_vv:
792 | add CARG3, BASE, RB
793 | add CARG4, BASE, RC
794 |1:
795 | add CARG2, BASE, RA
796 | stw BASE, L->base
797 | mr CARG1, L
798 | stw PC, SAVE_PC
799 | decode_OP1 CARG5, INS // Caveat: CARG5 overlaps INS.
800 | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
801 | // Returns NULL (finished) or TValue * (metamethod).
802 | cmplwi CRET1, 0
803 | beq ->cont_nop
804 |
805 | // Call metamethod for binary op.
806 |->vmeta_binop:
807 | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
808 | sub TMP1, CRET1, BASE
809 | stw PC, -16(CRET1) // [cont|PC]
810 | mr TMP2, BASE
811 | addi PC, TMP1, FRAME_CONT
812 | mr BASE, CRET1
813 | li NARGS8:RC, 16 // 2 args for func(o1, o2).
814 | b ->vm_call_dispatch
815 |
816 |->vmeta_len:
817#if LJ_52
818 | mr SAVE0, CARG1
819#endif
820 | add CARG2, BASE, RD
821 | stw BASE, L->base
822 | mr CARG1, L
823 | stw PC, SAVE_PC
824 | bl extern lj_meta_len // (lua_State *L, TValue *o)
825 | // Returns NULL (retry) or TValue * (metamethod base).
826#if LJ_52
827 | cmplwi CRET1, 0
828 | bne ->vmeta_binop // Binop call for compatibility.
829 | mr CARG1, SAVE0
830 | b ->BC_LEN_Z
831#else
832 | b ->vmeta_binop // Binop call for compatibility.
833#endif
834 |
835 |//-- Call metamethod ----------------------------------------------------
836 |
837 |->vmeta_call: // Resolve and call __call metamethod.
838 | // TMP2 = old base, BASE = new base, RC = nargs*8
839 | mr CARG1, L
840 | stw TMP2, L->base // This is the callers base!
841 | subi CARG2, BASE, 8
842 | stw PC, SAVE_PC
843 | add CARG3, BASE, RC
844 | mr SAVE0, NARGS8:RC
845 | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
846 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
847 | addi NARGS8:RC, SAVE0, 8 // Got one more argument now.
848 | ins_call
849 |
850 |->vmeta_callt: // Resolve __call for BC_CALLT.
851 | // BASE = old base, RA = new base, RC = nargs*8
852 | mr CARG1, L
853 | stw BASE, L->base
854 | subi CARG2, RA, 8
855 | stw PC, SAVE_PC
856 | add CARG3, RA, RC
857 | mr SAVE0, NARGS8:RC
858 | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
859 | lwz TMP1, FRAME_PC(BASE)
860 | addi NARGS8:RC, SAVE0, 8 // Got one more argument now.
861 | lwz LFUNC:RB, FRAME_FUNC(RA) // Guaranteed to be a function here.
862 | b ->BC_CALLT_Z
863 |
864 |//-- Argument coercion for 'for' statement ------------------------------
865 |
866 |->vmeta_for:
867 | mr CARG1, L
868 | stw BASE, L->base
869 | mr CARG2, RA
870 | stw PC, SAVE_PC
871 | mr SAVE0, INS
872 | bl extern lj_meta_for // (lua_State *L, TValue *base)
873 |.if JIT
874 | decode_OP1 TMP0, SAVE0
875 |.endif
876 | decode_RA8 RA, SAVE0
877 |.if JIT
878 | cmpwi TMP0, BC_JFORI
879 |.endif
880 | decode_RD8 RD, SAVE0
881 |.if JIT
882 | beq =>BC_JFORI
883 |.endif
884 | b =>BC_FORI
885 |
886 |//-----------------------------------------------------------------------
887 |//-- Fast functions -----------------------------------------------------
888 |//-----------------------------------------------------------------------
889 |
890 |.macro .ffunc, name
891 |->ff_ .. name:
892 |.endmacro
893 |
894 |.macro .ffunc_1, name
895 |->ff_ .. name:
896 | cmplwi NARGS8:RC, 8
897 | evldd CARG1, 0(BASE)
898 | blt ->fff_fallback
899 |.endmacro
900 |
901 |.macro .ffunc_2, name
902 |->ff_ .. name:
903 | cmplwi NARGS8:RC, 16
904 | evldd CARG1, 0(BASE)
905 | evldd CARG2, 8(BASE)
906 | blt ->fff_fallback
907 |.endmacro
908 |
909 |.macro .ffunc_n, name
910 | .ffunc_1 name
911 | checknum CARG1
912 | checkfail ->fff_fallback
913 |.endmacro
914 |
915 |.macro .ffunc_nn, name
916 | .ffunc_2 name
917 | evmergehi TMP0, CARG1, CARG2
918 | checknum TMP0
919 | checkanyfail ->fff_fallback
920 |.endmacro
921 |
922 |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1.
923 |.macro ffgccheck
924 | lwz TMP0, DISPATCH_GL(gc.total)(DISPATCH)
925 | lwz TMP1, DISPATCH_GL(gc.threshold)(DISPATCH)
926 | cmplw TMP0, TMP1
927 | bgel ->fff_gcstep
928 |.endmacro
929 |
930 |//-- Base library: checks -----------------------------------------------
931 |
932 |.ffunc assert
933 | cmplwi NARGS8:RC, 8
934 | evldd TMP0, 0(BASE)
935 | blt ->fff_fallback
936 | evaddw TMP1, TISNIL, TISNIL // Synthesize LJ_TFALSE.
937 | la RA, -8(BASE)
938 | evcmpltu cr1, TMP0, TMP1
939 | lwz PC, FRAME_PC(BASE)
940 | bge cr1, ->fff_fallback
941 | evstdd TMP0, 0(RA)
942 | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
943 | beq ->fff_res // Done if exactly 1 argument.
944 | li TMP1, 8
945 | subi RC, RC, 8
946 |1:
947 | cmplw TMP1, RC
948 | evlddx TMP0, BASE, TMP1
949 | evstddx TMP0, RA, TMP1
950 | addi TMP1, TMP1, 8
951 | bne <1
952 | b ->fff_res
953 |
954 |.ffunc type
955 | cmplwi NARGS8:RC, 8
956 | lwz CARG1, 0(BASE)
957 | blt ->fff_fallback
958 | li TMP2, ~LJ_TNUMX
959 | cmplw CARG1, TISNUM
960 | not TMP1, CARG1
961 | isellt TMP1, TMP2, TMP1
962 | slwi TMP1, TMP1, 3
963 | la TMP2, CFUNC:RB->upvalue
964 | evlddx STR:CRET1, TMP2, TMP1
965 | b ->fff_restv
966 |
967 |//-- Base library: getters and setters ---------------------------------
968 |
969 |.ffunc_1 getmetatable
970 | checktab CARG1
971 | evmergehi TMP1, CARG1, CARG1
972 | checkfail >6
973 |1: // Field metatable must be at same offset for GCtab and GCudata!
974 | lwz TAB:RB, TAB:CARG1->metatable
975 |2:
976 | evmr CRET1, TISNIL
977 | cmplwi TAB:RB, 0
978 | lwz STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH)
979 | beq ->fff_restv
980 | lwz TMP0, TAB:RB->hmask
981 | evmergelo CRET1, TISTAB, TAB:RB // Use metatable as default result.
982 | lwz TMP1, STR:RC->hash
983 | lwz NODE:TMP2, TAB:RB->node
984 | evmergelo STR:RC, TISSTR, STR:RC
985 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
986 | slwi TMP0, TMP1, 5
987 | slwi TMP1, TMP1, 3
988 | sub TMP1, TMP0, TMP1
989 | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
990 |3: // Rearranged logic, because we expect _not_ to find the key.
991 | evldd TMP0, NODE:TMP2->key
992 | evldd TMP1, NODE:TMP2->val
993 | evcmpeq TMP0, STR:RC
994 | lwz NODE:TMP2, NODE:TMP2->next
995 | checkallok >5
996 | cmplwi NODE:TMP2, 0
997 | beq ->fff_restv // Not found, keep default result.
998 | b <3
999 |5:
1000 | checknil TMP1
1001 | checkok ->fff_restv // Ditto for nil value.
1002 | evmr CRET1, TMP1 // Return value of mt.__metatable.
1003 | b ->fff_restv
1004 |
1005 |6:
1006 | cmpwi TMP1, LJ_TUDATA
1007 | not TMP1, TMP1
1008 | beq <1
1009 | checknum CARG1
1010 | slwi TMP1, TMP1, 2
1011 | li TMP2, 4*~LJ_TNUMX
1012 | isellt TMP1, TMP2, TMP1
1013 | la TMP2, DISPATCH_GL(gcroot[GCROOT_BASEMT])(DISPATCH)
1014 | lwzx TAB:RB, TMP2, TMP1
1015 | b <2
1016 |
1017 |.ffunc_2 setmetatable
1018 | // Fast path: no mt for table yet and not clearing the mt.
1019 | evmergehi TMP0, TAB:CARG1, TAB:CARG2
1020 | checktab TMP0
1021 | checkanyfail ->fff_fallback
1022 | lwz TAB:TMP1, TAB:CARG1->metatable
1023 | cmplwi TAB:TMP1, 0
1024 | lbz TMP3, TAB:CARG1->marked
1025 | bne ->fff_fallback
1026 | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
1027 | stw TAB:CARG2, TAB:CARG1->metatable
1028 | beq ->fff_restv
1029 | barrierback TAB:CARG1, TMP3, TMP0
1030 | b ->fff_restv
1031 |
1032 |.ffunc rawget
1033 | cmplwi NARGS8:RC, 16
1034 | evldd CARG2, 0(BASE)
1035 | blt ->fff_fallback
1036 | checktab CARG2
1037 | la CARG3, 8(BASE)
1038 | checkfail ->fff_fallback
1039 | mr CARG1, L
1040 | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1041 | // Returns cTValue *.
1042 | evldd CRET1, 0(CRET1)
1043 | b ->fff_restv
1044 |
1045 |//-- Base library: conversions ------------------------------------------
1046 |
1047 |.ffunc tonumber
1048 | // Only handles the number case inline (without a base argument).
1049 | cmplwi NARGS8:RC, 8
1050 | evldd CARG1, 0(BASE)
1051 | bne ->fff_fallback // Exactly one argument.
1052 | checknum CARG1
1053 | checkok ->fff_restv
1054 | b ->fff_fallback
1055 |
1056 |.ffunc_1 tostring
1057 | // Only handles the string or number case inline.
1058 | checkstr CARG1
1059 | // A __tostring method in the string base metatable is ignored.
1060 | checkok ->fff_restv // String key?
1061 | // Handle numbers inline, unless a number base metatable is present.
1062 | lwz TMP0, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH)
1063 | checknum CARG1
1064 | cmplwi cr1, TMP0, 0
1065 | stw BASE, L->base // Add frame since C call can throw.
1066 | crand 4*cr0+eq, 4*cr0+lt, 4*cr1+eq
1067 | stw PC, SAVE_PC // Redundant (but a defined value).
1068 | bne ->fff_fallback
1069 | ffgccheck
1070 | mr CARG1, L
1071 | mr CARG2, BASE
1072 | bl extern lj_str_fromnum // (lua_State *L, lua_Number *np)
1073 | // Returns GCstr *.
1074 | evmergelo STR:CRET1, TISSTR, STR:CRET1
1075 | b ->fff_restv
1076 |
1077 |//-- Base library: iterators -------------------------------------------
1078 |
1079 |.ffunc next
1080 | cmplwi NARGS8:RC, 8
1081 | evldd CARG2, 0(BASE)
1082 | blt ->fff_fallback
1083 | evstddx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil.
1084 | checktab TAB:CARG2
1085 | lwz PC, FRAME_PC(BASE)
1086 | checkfail ->fff_fallback
1087 | stw BASE, L->base // Add frame since C call can throw.
1088 | mr CARG1, L
1089 | stw BASE, L->top // Dummy frame length is ok.
1090 | la CARG3, 8(BASE)
1091 | stw PC, SAVE_PC
1092 | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
1093 | // Returns 0 at end of traversal.
1094 | cmplwi CRET1, 0
1095 | evmr CRET1, TISNIL
1096 | beq ->fff_restv // End of traversal: return nil.
1097 | evldd TMP0, 8(BASE) // Copy key and value to results.
1098 | la RA, -8(BASE)
1099 | evldd TMP1, 16(BASE)
1100 | evstdd TMP0, 0(RA)
1101 | li RD, (2+1)*8
1102 | evstdd TMP1, 8(RA)
1103 | b ->fff_res
1104 |
1105 |.ffunc_1 pairs
1106 | checktab TAB:CARG1
1107 | lwz PC, FRAME_PC(BASE)
1108 | checkfail ->fff_fallback
1109#if LJ_52
1110 | lwz TAB:TMP2, TAB:CARG1->metatable
1111 | evldd CFUNC:TMP0, CFUNC:RB->upvalue[0]
1112 | cmplwi TAB:TMP2, 0
1113 | la RA, -8(BASE)
1114 | bne ->fff_fallback
1115#else
1116 | evldd CFUNC:TMP0, CFUNC:RB->upvalue[0]
1117 | la RA, -8(BASE)
1118#endif
1119 | evstdd TISNIL, 8(BASE)
1120 | li RD, (3+1)*8
1121 | evstdd CFUNC:TMP0, 0(RA)
1122 | b ->fff_res
1123 |
1124 |.ffunc_2 ipairs_aux
1125 | checktab TAB:CARG1
1126 | lwz PC, FRAME_PC(BASE)
1127 | checkfail ->fff_fallback
1128 | checknum CARG2
1129 | lus TMP3, 0x3ff0
1130 | checkfail ->fff_fallback
1131 | efdctsi TMP2, CARG2
1132 | lwz TMP0, TAB:CARG1->asize
1133 | evmergelo TMP3, TMP3, ZERO
1134 | lwz TMP1, TAB:CARG1->array
1135 | efdadd CARG2, CARG2, TMP3
1136 | addi TMP2, TMP2, 1
1137 | la RA, -8(BASE)
1138 | cmplw TMP0, TMP2
1139 | slwi TMP3, TMP2, 3
1140 | evstdd CARG2, 0(RA)
1141 | ble >2 // Not in array part?
1142 | evlddx TMP1, TMP1, TMP3
1143 |1:
1144 | checknil TMP1
1145 | li RD, (0+1)*8
1146 | checkok ->fff_res // End of iteration, return 0 results.
1147 | li RD, (2+1)*8
1148 | evstdd TMP1, 8(RA)
1149 | b ->fff_res
1150 |2: // Check for empty hash part first. Otherwise call C function.
1151 | lwz TMP0, TAB:CARG1->hmask
1152 | cmplwi TMP0, 0
1153 | li RD, (0+1)*8
1154 | beq ->fff_res
1155 | mr CARG2, TMP2
1156 | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
1157 | // Returns cTValue * or NULL.
1158 | cmplwi CRET1, 0
1159 | li RD, (0+1)*8
1160 | beq ->fff_res
1161 | evldd TMP1, 0(CRET1)
1162 | b <1
1163 |
1164 |.ffunc_1 ipairs
1165 | checktab TAB:CARG1
1166 | lwz PC, FRAME_PC(BASE)
1167 | checkfail ->fff_fallback
1168#if LJ_52
1169 | lwz TAB:TMP2, TAB:CARG1->metatable
1170 | evldd CFUNC:TMP0, CFUNC:RB->upvalue[0]
1171 | cmplwi TAB:TMP2, 0
1172 | la RA, -8(BASE)
1173 | bne ->fff_fallback
1174#else
1175 | evldd CFUNC:TMP0, CFUNC:RB->upvalue[0]
1176 | la RA, -8(BASE)
1177#endif
1178 | evsplati TMP1, 0
1179 | li RD, (3+1)*8
1180 | evstdd TMP1, 8(BASE)
1181 | evstdd CFUNC:TMP0, 0(RA)
1182 | b ->fff_res
1183 |
1184 |//-- Base library: catch errors ----------------------------------------
1185 |
1186 |.ffunc pcall
1187 | cmplwi NARGS8:RC, 8
1188 | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
1189 | blt ->fff_fallback
1190 | mr TMP2, BASE
1191 | la BASE, 8(BASE)
1192 | // Remember active hook before pcall.
1193 | rlwinm TMP3, TMP3, 32-HOOK_ACTIVE_SHIFT, 31, 31
1194 | subi NARGS8:RC, NARGS8:RC, 8
1195 | addi PC, TMP3, 8+FRAME_PCALL
1196 | b ->vm_call_dispatch
1197 |
1198 |.ffunc_2 xpcall
1199 | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
1200 | mr TMP2, BASE
1201 | checkfunc CARG2 // Traceback must be a function.
1202 | checkfail ->fff_fallback
1203 | la BASE, 16(BASE)
1204 | // Remember active hook before pcall.
1205 | rlwinm TMP3, TMP3, 32-HOOK_ACTIVE_SHIFT, 31, 31
1206 | evstdd CARG2, 0(TMP2) // Swap function and traceback.
1207 | subi NARGS8:RC, NARGS8:RC, 16
1208 | evstdd CARG1, 8(TMP2)
1209 | addi PC, TMP3, 16+FRAME_PCALL
1210 | b ->vm_call_dispatch
1211 |
1212 |//-- Coroutine library --------------------------------------------------
1213 |
1214 |.macro coroutine_resume_wrap, resume
1215 |.if resume
1216 |.ffunc_1 coroutine_resume
1217 | evmergehi TMP0, L:CARG1, L:CARG1
1218 |.else
1219 |.ffunc coroutine_wrap_aux
1220 | lwz L:CARG1, CFUNC:RB->upvalue[0].gcr
1221 |.endif
1222 |.if resume
1223 | cmpwi TMP0, LJ_TTHREAD
1224 | bne ->fff_fallback
1225 |.endif
1226 | lbz TMP0, L:CARG1->status
1227 | lwz TMP1, L:CARG1->cframe
1228 | lwz CARG2, L:CARG1->top
1229 | cmplwi cr0, TMP0, LUA_YIELD
1230 | lwz TMP2, L:CARG1->base
1231 | cmplwi cr1, TMP1, 0
1232 | lwz TMP0, L:CARG1->maxstack
1233 | cmplw cr7, CARG2, TMP2
1234 | lwz PC, FRAME_PC(BASE)
1235 | crorc 4*cr6+lt, 4*cr0+gt, 4*cr1+eq // st>LUA_YIELD || cframe!=0
1236 | add TMP2, CARG2, NARGS8:RC
1237 | crandc 4*cr6+gt, 4*cr7+eq, 4*cr0+eq // base==top && st!=LUA_YIELD
1238 | cmplw cr1, TMP2, TMP0
1239 | cror 4*cr6+lt, 4*cr6+lt, 4*cr6+gt
1240 | stw PC, SAVE_PC
1241 | cror 4*cr6+lt, 4*cr6+lt, 4*cr1+gt // cond1 || cond2 || stackov
1242 | stw BASE, L->base
1243 | blt cr6, ->fff_fallback
1244 |1:
1245 |.if resume
1246 | addi BASE, BASE, 8 // Keep resumed thread in stack for GC.
1247 | subi NARGS8:RC, NARGS8:RC, 8
1248 | subi TMP2, TMP2, 8
1249 |.endif
1250 | stw TMP2, L:CARG1->top
1251 | li TMP1, 0
1252 | stw BASE, L->top
1253 |2: // Move args to coroutine.
1254 | cmpw TMP1, NARGS8:RC
1255 | evlddx TMP0, BASE, TMP1
1256 | beq >3
1257 | evstddx TMP0, CARG2, TMP1
1258 | addi TMP1, TMP1, 8
1259 | b <2
1260 |3:
1261 | li CARG3, 0
1262 | mr L:SAVE0, L:CARG1
1263 | li CARG4, 0
1264 | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0)
1265 | // Returns thread status.
1266 |4:
1267 | lwz TMP2, L:SAVE0->base
1268 | cmplwi CRET1, LUA_YIELD
1269 | lwz TMP3, L:SAVE0->top
1270 | li_vmstate INTERP
1271 | lwz BASE, L->base
1272 | st_vmstate
1273 | bgt >8
1274 | sub RD, TMP3, TMP2
1275 | lwz TMP0, L->maxstack
1276 | cmplwi RD, 0
1277 | add TMP1, BASE, RD
1278 | beq >6 // No results?
1279 | cmplw TMP1, TMP0
1280 | li TMP1, 0
1281 | bgt >9 // Need to grow stack?
1282 |
1283 | subi TMP3, RD, 8
1284 | stw TMP2, L:SAVE0->top // Clear coroutine stack.
1285 |5: // Move results from coroutine.
1286 | cmplw TMP1, TMP3
1287 | evlddx TMP0, TMP2, TMP1
1288 | evstddx TMP0, BASE, TMP1
1289 | addi TMP1, TMP1, 8
1290 | bne <5
1291 |6:
1292 | andi. TMP0, PC, FRAME_TYPE
1293 |.if resume
1294 | li TMP1, LJ_TTRUE
1295 | la RA, -8(BASE)
1296 | stw TMP1, -8(BASE) // Prepend true to results.
1297 | addi RD, RD, 16
1298 |.else
1299 | mr RA, BASE
1300 | addi RD, RD, 8
1301 |.endif
1302 |7:
1303 | stw PC, SAVE_PC
1304 | mr MULTRES, RD
1305 | beq ->BC_RET_Z
1306 | b ->vm_return
1307 |
1308 |8: // Coroutine returned with error (at co->top-1).
1309 |.if resume
1310 | andi. TMP0, PC, FRAME_TYPE
1311 | la TMP3, -8(TMP3)
1312 | li TMP1, LJ_TFALSE
1313 | evldd TMP0, 0(TMP3)
1314 | stw TMP3, L:SAVE0->top // Remove error from coroutine stack.
1315 | li RD, (2+1)*8
1316 | stw TMP1, -8(BASE) // Prepend false to results.
1317 | la RA, -8(BASE)
1318 | evstdd TMP0, 0(BASE) // Copy error message.
1319 | b <7
1320 |.else
1321 | mr CARG1, L
1322 | mr CARG2, L:SAVE0
1323 | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
1324 |.endif
1325 |
1326 |9: // Handle stack expansion on return from yield.
1327 | mr CARG1, L
1328 | srwi CARG2, RD, 3
1329 | bl extern lj_state_growstack // (lua_State *L, int n)
1330 | li CRET1, 0
1331 | b <4
1332 |.endmacro
1333 |
1334 | coroutine_resume_wrap 1 // coroutine.resume
1335 | coroutine_resume_wrap 0 // coroutine.wrap
1336 |
1337 |.ffunc coroutine_yield
1338 | lwz TMP0, L->cframe
1339 | add TMP1, BASE, NARGS8:RC
1340 | stw BASE, L->base
1341 | andi. TMP0, TMP0, CFRAME_RESUME
1342 | stw TMP1, L->top
1343 | li CRET1, LUA_YIELD
1344 | beq ->fff_fallback
1345 | stw ZERO, L->cframe
1346 | stb CRET1, L->status
1347 | b ->vm_leave_unw
1348 |
1349 |//-- Math library -------------------------------------------------------
1350 |
1351 |.ffunc_n math_abs
1352 | efdabs CRET1, CARG1
1353 | // Fallthrough.
1354 |
1355 |->fff_restv:
1356 | // CRET1 = TValue result.
1357 | lwz PC, FRAME_PC(BASE)
1358 | la RA, -8(BASE)
1359 | evstdd CRET1, 0(RA)
1360 |->fff_res1:
1361 | // RA = results, PC = return.
1362 | li RD, (1+1)*8
1363 |->fff_res:
1364 | // RA = results, RD = (nresults+1)*8, PC = return.
1365 | andi. TMP0, PC, FRAME_TYPE
1366 | mr MULTRES, RD
1367 | bne ->vm_return
1368 | lwz INS, -4(PC)
1369 | decode_RB8 RB, INS
1370 |5:
1371 | cmplw RB, RD // More results expected?
1372 | decode_RA8 TMP0, INS
1373 | bgt >6
1374 | ins_next1
1375 | // Adjust BASE. KBASE is assumed to be set for the calling frame.
1376 | sub BASE, RA, TMP0
1377 | ins_next2
1378 |
1379 |6: // Fill up results with nil.
1380 | subi TMP1, RD, 8
1381 | addi RD, RD, 8
1382 | evstddx TISNIL, RA, TMP1
1383 | b <5
1384 |
1385 |.macro math_extern, func
1386 | .ffunc math_ .. func
1387 | cmplwi NARGS8:RC, 8
1388 | evldd CARG2, 0(BASE)
1389 | blt ->fff_fallback
1390 | checknum CARG2
1391 | evmergehi CARG1, CARG2, CARG2
1392 | checkfail ->fff_fallback
1393 | bl extern func@plt
1394 | evmergelo CRET1, CRET1, CRET2
1395 | b ->fff_restv
1396 |.endmacro
1397 |
1398 |.macro math_extern2, func
1399 | .ffunc math_ .. func
1400 | cmplwi NARGS8:RC, 16
1401 | evldd CARG2, 0(BASE)
1402 | evldd CARG4, 8(BASE)
1403 | blt ->fff_fallback
1404 | evmergehi CARG1, CARG4, CARG2
1405 | checknum CARG1
1406 | evmergehi CARG3, CARG4, CARG4
1407 | checkanyfail ->fff_fallback
1408 | bl extern func@plt
1409 | evmergelo CRET1, CRET1, CRET2
1410 | b ->fff_restv
1411 |.endmacro
1412 |
1413 |.macro math_round, func
1414 | .ffunc math_ .. func
1415 | cmplwi NARGS8:RC, 8
1416 | evldd CARG2, 0(BASE)
1417 | blt ->fff_fallback
1418 | checknum CARG2
1419 | evmergehi CARG1, CARG2, CARG2
1420 | checkfail ->fff_fallback
1421 | lwz PC, FRAME_PC(BASE)
1422 | bl ->vm_..func.._hilo;
1423 | la RA, -8(BASE)
1424 | evstdd CRET2, 0(RA)
1425 | b ->fff_res1
1426 |.endmacro
1427 |
1428 | math_round floor
1429 | math_round ceil
1430 |
1431 | math_extern sqrt
1432 |
1433 |.ffunc math_log
1434 | cmplwi NARGS8:RC, 8
1435 | evldd CARG2, 0(BASE)
1436 | bne ->fff_fallback // Need exactly 1 argument.
1437 | checknum CARG2
1438 | evmergehi CARG1, CARG2, CARG2
1439 | checkfail ->fff_fallback
1440 | bl extern log@plt
1441 | evmergelo CRET1, CRET1, CRET2
1442 | b ->fff_restv
1443 |
1444 | math_extern log10
1445 | math_extern exp
1446 | math_extern sin
1447 | math_extern cos
1448 | math_extern tan
1449 | math_extern asin
1450 | math_extern acos
1451 | math_extern atan
1452 | math_extern sinh
1453 | math_extern cosh
1454 | math_extern tanh
1455 | math_extern2 pow
1456 | math_extern2 atan2
1457 | math_extern2 fmod
1458 |
1459 |->ff_math_deg:
1460 |.ffunc_n math_rad
1461 | evldd CARG2, CFUNC:RB->upvalue[0]
1462 | efdmul CRET1, CARG1, CARG2
1463 | b ->fff_restv
1464 |
1465 |.ffunc math_ldexp
1466 | cmplwi NARGS8:RC, 16
1467 | evldd CARG2, 0(BASE)
1468 | evldd CARG4, 8(BASE)
1469 | blt ->fff_fallback
1470 | evmergehi CARG1, CARG4, CARG2
1471 | checknum CARG1
1472 | checkanyfail ->fff_fallback
1473 | efdctsi CARG3, CARG4
1474 | bl extern ldexp@plt
1475 | evmergelo CRET1, CRET1, CRET2
1476 | b ->fff_restv
1477 |
1478 |.ffunc math_frexp
1479 | cmplwi NARGS8:RC, 8
1480 | evldd CARG2, 0(BASE)
1481 | blt ->fff_fallback
1482 | checknum CARG2
1483 | evmergehi CARG1, CARG2, CARG2
1484 | checkfail ->fff_fallback
1485 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
1486 | lwz PC, FRAME_PC(BASE)
1487 | bl extern frexp@plt
1488 | lwz TMP1, DISPATCH_GL(tmptv)(DISPATCH)
1489 | evmergelo CRET1, CRET1, CRET2
1490 | efdcfsi CRET2, TMP1
1491 | la RA, -8(BASE)
1492 | evstdd CRET1, 0(RA)
1493 | li RD, (2+1)*8
1494 | evstdd CRET2, 8(RA)
1495 | b ->fff_res
1496 |
1497 |.ffunc math_modf
1498 | cmplwi NARGS8:RC, 8
1499 | evldd CARG2, 0(BASE)
1500 | blt ->fff_fallback
1501 | checknum CARG2
1502 | evmergehi CARG1, CARG2, CARG2
1503 | checkfail ->fff_fallback
1504 | la CARG3, -8(BASE)
1505 | lwz PC, FRAME_PC(BASE)
1506 | bl extern modf@plt
1507 | evmergelo CRET1, CRET1, CRET2
1508 | la RA, -8(BASE)
1509 | evstdd CRET1, 0(BASE)
1510 | li RD, (2+1)*8
1511 | b ->fff_res
1512 |
1513 |.macro math_minmax, name, cmpop
1514 | .ffunc_1 name
1515 | checknum CARG1
1516 | li TMP1, 8
1517 | checkfail ->fff_fallback
1518 |1:
1519 | evlddx CARG2, BASE, TMP1
1520 | cmplw cr1, TMP1, NARGS8:RC
1521 | checknum CARG2
1522 | bge cr1, ->fff_restv // Ok, since CRET1 = CARG1.
1523 | checkfail ->fff_fallback
1524 | cmpop CARG2, CARG1
1525 | addi TMP1, TMP1, 8
1526 | crmove 4*cr0+lt, 4*cr0+gt
1527 | evsel CARG1, CARG2, CARG1
1528 | b <1
1529 |.endmacro
1530 |
1531 | math_minmax math_min, efdtstlt
1532 | math_minmax math_max, efdtstgt
1533 |
1534 |//-- String library -----------------------------------------------------
1535 |
1536 |.ffunc_1 string_len
1537 | checkstr STR:CARG1
1538 | checkfail ->fff_fallback
1539 | lwz TMP0, STR:CARG1->len
1540 | efdcfsi CRET1, TMP0
1541 | b ->fff_restv
1542 |
1543 |.ffunc string_byte // Only handle the 1-arg case here.
1544 | cmplwi NARGS8:RC, 8
1545 | evldd STR:CARG1, 0(BASE)
1546 | bne ->fff_fallback // Need exactly 1 argument.
1547 | checkstr STR:CARG1
1548 | la RA, -8(BASE)
1549 | checkfail ->fff_fallback
1550 | lwz TMP0, STR:CARG1->len
1551 | li RD, (0+1)*8
1552 | lbz TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
1553 | li TMP2, (1+1)*8
1554 | cmplwi TMP0, 0
1555 | lwz PC, FRAME_PC(BASE)
1556 | efdcfsi CRET1, TMP1
1557 | iseleq RD, RD, TMP2
1558 | evstdd CRET1, 0(RA)
1559 | b ->fff_res
1560 |
1561 |.ffunc string_char // Only handle the 1-arg case here.
1562 | ffgccheck
1563 | cmplwi NARGS8:RC, 8
1564 | evldd CARG1, 0(BASE)
1565 | bne ->fff_fallback // Exactly 1 argument.
1566 | checknum CARG1
1567 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
1568 | checkfail ->fff_fallback
1569 | efdctsiz TMP0, CARG1
1570 | li CARG3, 1
1571 | cmplwi TMP0, 255
1572 | stb TMP0, 0(CARG2)
1573 | bgt ->fff_fallback
1574 |->fff_newstr:
1575 | mr CARG1, L
1576 | stw BASE, L->base
1577 | stw PC, SAVE_PC
1578 | bl extern lj_str_new // (lua_State *L, char *str, size_t l)
1579 | // Returns GCstr *.
1580 | lwz BASE, L->base
1581 | evmergelo STR:CRET1, TISSTR, STR:CRET1
1582 | b ->fff_restv
1583 |
1584 |.ffunc string_sub
1585 | ffgccheck
1586 | cmplwi NARGS8:RC, 16
1587 | evldd CARG3, 16(BASE)
1588 | evldd STR:CARG1, 0(BASE)
1589 | blt ->fff_fallback
1590 | evldd CARG2, 8(BASE)
1591 | li TMP2, -1
1592 | beq >1
1593 | checknum CARG3
1594 | checkfail ->fff_fallback
1595 | efdctsiz TMP2, CARG3
1596 |1:
1597 | checknum CARG2
1598 | checkfail ->fff_fallback
1599 | checkstr STR:CARG1
1600 | efdctsiz TMP1, CARG2
1601 | checkfail ->fff_fallback
1602 | lwz TMP0, STR:CARG1->len
1603 | cmplw TMP0, TMP2 // len < end? (unsigned compare)
1604 | add TMP3, TMP2, TMP0
1605 | blt >5
1606 |2:
1607 | cmpwi TMP1, 0 // start <= 0?
1608 | add TMP3, TMP1, TMP0
1609 | ble >7
1610 |3:
1611 | sub. CARG3, TMP2, TMP1
1612 | addi CARG2, STR:CARG1, #STR-1
1613 | addi CARG3, CARG3, 1
1614 | add CARG2, CARG2, TMP1
1615 | isellt CARG3, r0, CARG3
1616 | b ->fff_newstr
1617 |
1618 |5: // Negative end or overflow.
1619 | cmpw TMP0, TMP2
1620 | addi TMP3, TMP3, 1
1621 | iselgt TMP2, TMP3, TMP0 // end = end > len ? len : end+len+1
1622 | b <2
1623 |
1624 |7: // Negative start or underflow.
1625 | cmpwi cr1, TMP3, 0
1626 | iseleq TMP1, r0, TMP3
1627 | isel TMP1, r0, TMP1, 4*cr1+lt
1628 | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0)
1629 | b <3
1630 |
1631 |.ffunc string_rep // Only handle the 1-char case inline.
1632 | ffgccheck
1633 | cmplwi NARGS8:RC, 16
1634 | evldd CARG1, 0(BASE)
1635 | evldd CARG2, 8(BASE)
1636 | bne ->fff_fallback // Exactly 2 arguments.
1637 | checknum CARG2
1638 | checkfail ->fff_fallback
1639 | checkstr STR:CARG1
1640 | efdctsiz CARG3, CARG2
1641 | checkfail ->fff_fallback
1642 | lwz TMP0, STR:CARG1->len
1643 | cmpwi CARG3, 0
1644 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
1645 | ble >2 // Count <= 0? (or non-int)
1646 | cmplwi TMP0, 1
1647 | subi TMP2, CARG3, 1
1648 | blt >2 // Zero length string?
1649 | cmplw cr1, TMP1, CARG3
1650 | bne ->fff_fallback // Fallback for > 1-char strings.
1651 | lbz TMP0, STR:CARG1[1]
1652 | lwz CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
1653 | blt cr1, ->fff_fallback
1654 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
1655 | cmplwi TMP2, 0
1656 | stbx TMP0, CARG2, TMP2
1657 | subi TMP2, TMP2, 1
1658 | bne <1
1659 | b ->fff_newstr
1660 |2: // Return empty string.
1661 | la STR:CRET1, DISPATCH_GL(strempty)(DISPATCH)
1662 | evmergelo CRET1, TISSTR, STR:CRET1
1663 | b ->fff_restv
1664 |
1665 |.ffunc string_reverse
1666 | ffgccheck
1667 | cmplwi NARGS8:RC, 8
1668 | evldd CARG1, 0(BASE)
1669 | blt ->fff_fallback
1670 | checkstr STR:CARG1
1671 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
1672 | checkfail ->fff_fallback
1673 | lwz CARG3, STR:CARG1->len
1674 | la CARG1, #STR(STR:CARG1)
1675 | lwz CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
1676 | li TMP2, 0
1677 | cmplw TMP1, CARG3
1678 | subi TMP3, CARG3, 1
1679 | blt ->fff_fallback
1680 |1: // Reverse string copy.
1681 | cmpwi TMP3, 0
1682 | lbzx TMP1, CARG1, TMP2
1683 | blt ->fff_newstr
1684 | stbx TMP1, CARG2, TMP3
1685 | subi TMP3, TMP3, 1
1686 | addi TMP2, TMP2, 1
1687 | b <1
1688 |
1689 |.macro ffstring_case, name, lo
1690 | .ffunc name
1691 | ffgccheck
1692 | cmplwi NARGS8:RC, 8
1693 | evldd CARG1, 0(BASE)
1694 | blt ->fff_fallback
1695 | checkstr STR:CARG1
1696 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
1697 | checkfail ->fff_fallback
1698 | lwz CARG3, STR:CARG1->len
1699 | la CARG1, #STR(STR:CARG1)
1700 | lwz CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
1701 | cmplw TMP1, CARG3
1702 | li TMP2, 0
1703 | blt ->fff_fallback
1704 |1: // ASCII case conversion.
1705 | cmplw TMP2, CARG3
1706 | lbzx TMP1, CARG1, TMP2
1707 | bge ->fff_newstr
1708 | subi TMP0, TMP1, lo
1709 | xori TMP3, TMP1, 0x20
1710 | cmplwi TMP0, 26
1711 | isellt TMP1, TMP3, TMP1
1712 | stbx TMP1, CARG2, TMP2
1713 | addi TMP2, TMP2, 1
1714 | b <1
1715 |.endmacro
1716 |
1717 |ffstring_case string_lower, 65
1718 |ffstring_case string_upper, 97
1719 |
1720 |//-- Table library ------------------------------------------------------
1721 |
1722 |.ffunc_1 table_getn
1723 | checktab CARG1
1724 | checkfail ->fff_fallback
1725 | bl extern lj_tab_len // (GCtab *t)
1726 | // Returns uint32_t (but less than 2^31).
1727 | efdcfsi CRET1, CRET1
1728 | b ->fff_restv
1729 |
1730 |//-- Bit library --------------------------------------------------------
1731 |
1732 |.macro .ffunc_bit, name
1733 | .ffunc_n bit_..name
1734 | efdadd CARG1, CARG1, TOBIT
1735 |.endmacro
1736 |
1737 |.ffunc_bit tobit
1738 |->fff_resbit:
1739 | efdcfsi CRET1, CARG1
1740 | b ->fff_restv
1741 |
1742 |.macro .ffunc_bit_op, name, ins
1743 | .ffunc_bit name
1744 | li TMP1, 8
1745 |1:
1746 | evlddx CARG2, BASE, TMP1
1747 | cmplw cr1, TMP1, NARGS8:RC
1748 | checknum CARG2
1749 | bge cr1, ->fff_resbit
1750 | checkfail ->fff_fallback
1751 | efdadd CARG2, CARG2, TOBIT
1752 | ins CARG1, CARG1, CARG2
1753 | addi TMP1, TMP1, 8
1754 | b <1
1755 |.endmacro
1756 |
1757 |.ffunc_bit_op band, and
1758 |.ffunc_bit_op bor, or
1759 |.ffunc_bit_op bxor, xor
1760 |
1761 |.ffunc_bit bswap
1762 | rotlwi TMP0, CARG1, 8
1763 | rlwimi TMP0, CARG1, 24, 0, 7
1764 | rlwimi TMP0, CARG1, 24, 16, 23
1765 | efdcfsi CRET1, TMP0
1766 | b ->fff_restv
1767 |
1768 |.ffunc_bit bnot
1769 | not TMP0, CARG1
1770 | efdcfsi CRET1, TMP0
1771 | b ->fff_restv
1772 |
1773 |.macro .ffunc_bit_sh, name, ins, shmod
1774 | .ffunc_nn bit_..name
1775 | efdadd CARG2, CARG2, TOBIT
1776 | efdadd CARG1, CARG1, TOBIT
1777 |.if shmod == 1
1778 | rlwinm CARG2, CARG2, 0, 27, 31
1779 |.elif shmod == 2
1780 | neg CARG2, CARG2
1781 |.endif
1782 | ins TMP0, CARG1, CARG2
1783 | efdcfsi CRET1, TMP0
1784 | b ->fff_restv
1785 |.endmacro
1786 |
1787 |.ffunc_bit_sh lshift, slw, 1
1788 |.ffunc_bit_sh rshift, srw, 1
1789 |.ffunc_bit_sh arshift, sraw, 1
1790 |.ffunc_bit_sh rol, rotlw, 0
1791 |.ffunc_bit_sh ror, rotlw, 2
1792 |
1793 |//-----------------------------------------------------------------------
1794 |
1795 |->fff_fallback: // Call fast function fallback handler.
1796 | // BASE = new base, RB = CFUNC, RC = nargs*8
1797 | lwz TMP3, CFUNC:RB->f
1798 | add TMP1, BASE, NARGS8:RC
1799 | lwz PC, FRAME_PC(BASE) // Fallback may overwrite PC.
1800 | addi TMP0, TMP1, 8*LUA_MINSTACK
1801 | lwz TMP2, L->maxstack
1802 | stw PC, SAVE_PC // Redundant (but a defined value).
1803 | cmplw TMP0, TMP2
1804 | stw BASE, L->base
1805 | stw TMP1, L->top
1806 | mr CARG1, L
1807 | bgt >5 // Need to grow stack.
1808 | mtctr TMP3
1809 | bctrl // (lua_State *L)
1810 | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
1811 | lwz BASE, L->base
1812 | cmpwi CRET1, 0
1813 | slwi RD, CRET1, 3
1814 | la RA, -8(BASE)
1815 | bgt ->fff_res // Returned nresults+1?
1816 |1: // Returned 0 or -1: retry fast path.
1817 | lwz TMP0, L->top
1818 | lwz LFUNC:RB, FRAME_FUNC(BASE)
1819 | sub NARGS8:RC, TMP0, BASE
1820 | bne ->vm_call_tail // Returned -1?
1821 | ins_callt // Returned 0: retry fast path.
1822 |
1823 |// Reconstruct previous base for vmeta_call during tailcall.
1824 |->vm_call_tail:
1825 | andi. TMP0, PC, FRAME_TYPE
1826 | rlwinm TMP1, PC, 0, 0, 28
1827 | bne >3
1828 | lwz INS, -4(PC)
1829 | decode_RA8 TMP1, INS
1830 | addi TMP1, TMP1, 8
1831 |3:
1832 | sub TMP2, BASE, TMP1
1833 | b ->vm_call_dispatch // Resolve again for tailcall.
1834 |
1835 |5: // Grow stack for fallback handler.
1836 | li CARG2, LUA_MINSTACK
1837 | bl extern lj_state_growstack // (lua_State *L, int n)
1838 | lwz BASE, L->base
1839 | cmpw TMP0, TMP0 // Set 4*cr0+eq to force retry.
1840 | b <1
1841 |
1842 |->fff_gcstep: // Call GC step function.
1843 | // BASE = new base, RC = nargs*8
1844 | mflr SAVE0
1845 | stw BASE, L->base
1846 | add TMP0, BASE, NARGS8:RC
1847 | stw PC, SAVE_PC // Redundant (but a defined value).
1848 | stw TMP0, L->top
1849 | mr CARG1, L
1850 | bl extern lj_gc_step // (lua_State *L)
1851 | lwz BASE, L->base
1852 | mtlr SAVE0
1853 | lwz TMP0, L->top
1854 | sub NARGS8:RC, TMP0, BASE
1855 | lwz CFUNC:RB, FRAME_FUNC(BASE)
1856 | blr
1857 |
1858 |//-----------------------------------------------------------------------
1859 |//-- Special dispatch targets -------------------------------------------
1860 |//-----------------------------------------------------------------------
1861 |
1862 |->vm_record: // Dispatch target for recording phase.
1863 |.if JIT
1864 | NYI
1865 |.endif
1866 |
1867 |->vm_rethook: // Dispatch target for return hooks.
1868 | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
1869 | andi. TMP0, TMP3, HOOK_ACTIVE // Hook already active?
1870 | beq >1
1871 |5: // Re-dispatch to static ins.
1872 | addi TMP1, TMP1, GG_DISP2STATIC // Assumes decode_OP4 TMP1, INS.
1873 | lwzx TMP0, DISPATCH, TMP1
1874 | mtctr TMP0
1875 | bctr
1876 |
1877 |->vm_inshook: // Dispatch target for instr/line hooks.
1878 | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
1879 | lwz TMP2, DISPATCH_GL(hookcount)(DISPATCH)
1880 | andi. TMP0, TMP3, HOOK_ACTIVE // Hook already active?
1881 | rlwinm TMP0, TMP3, 31-LUA_HOOKLINE, 31, 0
1882 | bne <5
1883 |
1884 | cmpwi cr1, TMP0, 0
1885 | addic. TMP2, TMP2, -1
1886 | beq cr1, <5
1887 | stw TMP2, DISPATCH_GL(hookcount)(DISPATCH)
1888 | beq >1
1889 | bge cr1, <5
1890 |1:
1891 | mr CARG1, L
1892 | stw MULTRES, SAVE_MULTRES
1893 | mr CARG2, PC
1894 | stw BASE, L->base
1895 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
1896 | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
1897 |3:
1898 | lwz BASE, L->base
1899 |4: // Re-dispatch to static ins.
1900 | lwz INS, -4(PC)
1901 | decode_OP4 TMP1, INS
1902 | decode_RB8 RB, INS
1903 | addi TMP1, TMP1, GG_DISP2STATIC
1904 | decode_RD8 RD, INS
1905 | lwzx TMP0, DISPATCH, TMP1
1906 | decode_RA8 RA, INS
1907 | decode_RC8 RC, INS
1908 | mtctr TMP0
1909 | bctr
1910 |
1911 |->cont_hook: // Continue from hook yield.
1912 | addi PC, PC, 4
1913 | lwz MULTRES, -20(RB) // Restore MULTRES for *M ins.
1914 | b <4
1915 |
1916 |->vm_hotloop: // Hot loop counter underflow.
1917 |.if JIT
1918 | NYI
1919 |.endif
1920 |
1921 |->vm_callhook: // Dispatch target for call hooks.
1922 | mr CARG2, PC
1923 |.if JIT
1924 | b >1
1925 |.endif
1926 |
1927 |->vm_hotcall: // Hot call counter underflow.
1928 |.if JIT
1929 | ori CARG2, PC, 1
1930 |1:
1931 |.endif
1932 | add TMP0, BASE, RC
1933 | stw PC, SAVE_PC
1934 | mr CARG1, L
1935 | stw BASE, L->base
1936 | sub RA, RA, BASE
1937 | stw TMP0, L->top
1938 | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc)
1939 | // Returns ASMFunction.
1940 | lwz BASE, L->base
1941 | lwz TMP0, L->top
1942 | stw ZERO, SAVE_PC // Invalidate for subsequent line hook.
1943 | sub NARGS8:RC, TMP0, BASE
1944 | add RA, BASE, RA
1945 | lwz LFUNC:RB, FRAME_FUNC(BASE)
1946 | mtctr CRET1
1947 | bctr
1948 |
1949 |//-----------------------------------------------------------------------
1950 |//-- Trace exit handler -------------------------------------------------
1951 |//-----------------------------------------------------------------------
1952 |
1953 |->vm_exit_handler:
1954 |.if JIT
1955 | NYI
1956 |.endif
1957 |->vm_exit_interp:
1958 |.if JIT
1959 | NYI
1960 |.endif
1961 |
1962 |//-----------------------------------------------------------------------
1963 |//-- Math helper functions ----------------------------------------------
1964 |//-----------------------------------------------------------------------
1965 |
1966 |// FP value rounding. Called by math.floor/math.ceil fast functions
1967 |// and from JIT code.
1968 |//
1969 |// This can be inlined if the CPU has the frin/friz/frip/frim instructions.
1970 |// The alternative hard-float approaches have a deep dependency chain.
1971 |// The resulting latency is at least 3x-7x the double-precision FP latency
1972 |// (e500v2: 6cy, e600: 5cy, Cell: 10cy) or around 20-70 cycles.
1973 |//
1974 |// The soft-float approach is tedious, but much faster (e500v2: ~11cy/~6cy).
1975 |// However it relies on a fast way to transfer the FP value to GPRs
1976 |// (e500v2: 0cy for lo-word, 1cy for hi-word).
1977 |//
1978 |.macro vm_round, name, mode
1979 | // Used temporaries: TMP0, TMP1, TMP2, TMP3.
1980 |->name.._efd: // Input: CARG2, output: CRET2
1981 | evmergehi CARG1, CARG2, CARG2
1982 |->name.._hilo:
1983 | // Input: CARG1 (hi), CARG2 (hi, lo), output: CRET2
1984 | rlwinm TMP2, CARG1, 12, 21, 31
1985 | addic. TMP2, TMP2, -1023 // exp = exponent(x) - 1023
1986 | li TMP1, -1
1987 | cmplwi cr1, TMP2, 51 // 0 <= exp <= 51?
1988 | subfic TMP0, TMP2, 52
1989 | bgt cr1, >1
1990 | lus TMP3, 0xfff0
1991 | slw TMP0, TMP1, TMP0 // lomask = -1 << (52-exp)
1992 | sraw TMP1, TMP3, TMP2 // himask = (int32_t)0xfff00000 >> exp
1993 |.if mode == 2 // trunc(x):
1994 | evmergelo TMP0, TMP1, TMP0
1995 | evand CRET2, CARG2, TMP0 // hi &= himask, lo &= lomask
1996 |.else
1997 | andc TMP2, CARG2, TMP0
1998 | andc TMP3, CARG1, TMP1
1999 | or TMP2, TMP2, TMP3 // ztest = (hi&~himask) | (lo&~lomask)
2000 | srawi TMP3, CARG1, 31 // signmask = (int32_t)hi >> 31
2001 |.if mode == 0 // floor(x):
2002 | and. TMP2, TMP2, TMP3 // iszero = ((ztest & signmask) == 0)
2003 |.else // ceil(x):
2004 | andc. TMP2, TMP2, TMP3 // iszero = ((ztest & ~signmask) == 0)
2005 |.endif
2006 | and CARG2, CARG2, TMP0 // lo &= lomask
2007 | and CARG1, CARG1, TMP1 // hi &= himask
2008 | subc TMP0, CARG2, TMP0
2009 | iseleq TMP0, CARG2, TMP0 // lo = iszero ? lo : lo-lomask
2010 | sube TMP1, CARG1, TMP1
2011 | iseleq TMP1, CARG1, TMP1 // hi = iszero ? hi : hi-himask+carry
2012 | evmergelo CRET2, TMP1, TMP0
2013 |.endif
2014 | blr
2015 |1:
2016 | bgtlr // Already done if >=2^52, +-inf or nan.
2017 |.if mode == 2 // trunc(x):
2018 | rlwinm TMP1, CARG1, 0, 0, 0 // hi = sign(x)
2019 | li TMP0, 0
2020 | evmergelo CRET2, TMP1, TMP0
2021 |.else
2022 | rlwinm TMP2, CARG1, 0, 1, 31
2023 | srawi TMP0, CARG1, 31 // signmask = (int32_t)hi >> 31
2024 | or TMP2, TMP2, CARG2 // ztest = abs(hi) | lo
2025 | lus TMP1, 0x3ff0
2026 |.if mode == 0 // floor(x):
2027 | and. TMP2, TMP2, TMP0 // iszero = ((ztest & signmask) == 0)
2028 |.else // ceil(x):
2029 | andc. TMP2, TMP2, TMP0 // iszero = ((ztest & ~signmask) == 0)
2030 |.endif
2031 | li TMP0, 0
2032 | iseleq TMP1, r0, TMP1
2033 | rlwimi CARG1, TMP1, 0, 1, 31 // hi = sign(x) | (iszero ? 0.0 : 1.0)
2034 | evmergelo CRET2, CARG1, TMP0
2035 |.endif
2036 | blr
2037 |.endmacro
2038 |
2039 |->vm_floor:
2040 | mflr CARG3
2041 | evmergelo CARG2, CARG1, CARG2
2042 | bl ->vm_floor_hilo
2043 | mtlr CARG3
2044 | evmergehi CRET1, CRET2, CRET2
2045 | blr
2046 |
2047 | vm_round vm_floor, 0
2048 | vm_round vm_ceil, 1
2049 |.if JIT
2050 | vm_round vm_trunc, 2
2051 |.else
2052 |->vm_trunc_efd:
2053 |->vm_trunc_hilo:
2054 |.endif
2055 |
2056 |//-----------------------------------------------------------------------
2057 |//-- Miscellaneous functions --------------------------------------------
2058 |//-----------------------------------------------------------------------
2059 |
2060 |//-----------------------------------------------------------------------
2061 |//-- FFI helper functions -----------------------------------------------
2062 |//-----------------------------------------------------------------------
2063 |
2064 |->vm_ffi_call:
2065 |.if FFI
2066 | NYI
2067 |.endif
2068 |
2069 |//-----------------------------------------------------------------------
2070}
2071
2072/* Generate the code for a single instruction. */
2073static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2074{
2075 int vk = 0;
2076 |=>defop:
2077
2078 switch (op) {
2079
2080 /* -- Comparison ops ---------------------------------------------------- */
2081
2082 /* Remember: all ops branch for a true comparison, fall through otherwise. */
2083
2084 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2085 | // RA = src1*8, RD = src2*8, JMP with RD = target
2086 | evlddx TMP0, BASE, RA
2087 | addi PC, PC, 4
2088 | evlddx TMP1, BASE, RD
2089 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
2090 | lwz TMP2, -4(PC)
2091 | evmergehi RB, TMP0, TMP1
2092 | decode_RD4 TMP2, TMP2
2093 | checknum RB
2094 | add TMP2, TMP2, TMP3
2095 | checkanyfail ->vmeta_comp
2096 | efdcmplt TMP0, TMP1
2097 if (op == BC_ISLE || op == BC_ISGT) {
2098 | efdcmpeq cr1, TMP0, TMP1
2099 | cror 4*cr0+gt, 4*cr0+gt, 4*cr1+gt
2100 }
2101 if (op == BC_ISLT || op == BC_ISLE) {
2102 | iselgt PC, TMP2, PC
2103 } else {
2104 | iselgt PC, PC, TMP2
2105 }
2106 | ins_next
2107 break;
2108
2109 case BC_ISEQV: case BC_ISNEV:
2110 vk = op == BC_ISEQV;
2111 | // RA = src1*8, RD = src2*8, JMP with RD = target
2112 | evlddx CARG2, BASE, RA
2113 | addi PC, PC, 4
2114 | evlddx CARG3, BASE, RD
2115 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
2116 | lwz TMP2, -4(PC)
2117 | evmergehi RB, CARG2, CARG3
2118 | decode_RD4 TMP2, TMP2
2119 | checknum RB
2120 | add TMP2, TMP2, TMP3
2121 | checkanyfail >5
2122 | efdcmpeq CARG2, CARG3
2123 if (vk) {
2124 | iselgt PC, TMP2, PC
2125 } else {
2126 | iselgt PC, PC, TMP2
2127 }
2128 |1:
2129 | ins_next
2130 |
2131 |5: // Either or both types are not numbers.
2132 | evcmpeq CARG2, CARG3
2133 | not TMP3, RB
2134 | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive?
2135 | crorc 4*cr7+lt, 4*cr0+so, 4*cr0+lt // 1: Same tv or different type.
2136 | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata?
2137 | crandc 4*cr7+gt, 4*cr0+lt, 4*cr1+gt // 2: Same type and primitive.
2138 | mr SAVE0, PC
2139 if (vk) {
2140 | isel PC, TMP2, PC, 4*cr7+gt
2141 } else {
2142 | isel TMP2, PC, TMP2, 4*cr7+gt
2143 }
2144 | cror 4*cr7+lt, 4*cr7+lt, 4*cr7+gt // 1 or 2.
2145 if (vk) {
2146 | isel PC, TMP2, PC, 4*cr0+so
2147 } else {
2148 | isel PC, PC, TMP2, 4*cr0+so
2149 }
2150 | blt cr7, <1 // Done if 1 or 2.
2151 | blt cr6, <1 // Done if not tab/ud.
2152 |
2153 | // Different tables or userdatas. Need to check __eq metamethod.
2154 | // Field metatable must be at same offset for GCtab and GCudata!
2155 | lwz TAB:TMP2, TAB:CARG2->metatable
2156 | li CARG4, 1-vk // ne = 0 or 1.
2157 | cmplwi TAB:TMP2, 0
2158 | beq <1 // No metatable?
2159 | lbz TMP2, TAB:TMP2->nomm
2160 | andi. TMP2, TMP2, 1<<MM_eq
2161 | bne <1 // Or 'no __eq' flag set?
2162 | mr PC, SAVE0 // Restore old PC.
2163 | b ->vmeta_equal // Handle __eq metamethod.
2164 break;
2165
2166 case BC_ISEQS: case BC_ISNES:
2167 vk = op == BC_ISEQS;
2168 | // RA = src*8, RD = str_const*8 (~), JMP with RD = target
2169 | evlddx TMP0, BASE, RA
2170 | srwi RD, RD, 1
2171 | lwz INS, 0(PC)
2172 | subfic RD, RD, -4
2173 | addi PC, PC, 4
2174 | lwzx STR:TMP1, KBASE, RD // KBASE-4-str_const*4
2175 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
2176 | decode_RD4 TMP2, INS
2177 | evmergelo STR:TMP1, TISSTR, STR:TMP1
2178 | add TMP2, TMP2, TMP3
2179 | evcmpeq TMP0, STR:TMP1
2180 if (vk) {
2181 | isel PC, TMP2, PC, 4*cr0+so
2182 } else {
2183 | isel PC, PC, TMP2, 4*cr0+so
2184 }
2185 | ins_next
2186 break;
2187
2188 case BC_ISEQN: case BC_ISNEN:
2189 vk = op == BC_ISEQN;
2190 | // RA = src*8, RD = num_const*8, JMP with RD = target
2191 | evlddx TMP0, BASE, RA
2192 | addi PC, PC, 4
2193 | evlddx TMP1, KBASE, RD
2194 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
2195 | lwz INS, -4(PC)
2196 | checknum TMP0
2197 | checkfail >5
2198 | efdcmpeq TMP0, TMP1
2199 |1:
2200 | decode_RD4 TMP2, INS
2201 | add TMP2, TMP2, TMP3
2202 if (vk) {
2203 | iselgt PC, TMP2, PC
2204 |5:
2205 } else {
2206 | iselgt PC, PC, TMP2
2207 }
2208 |3:
2209 | ins_next
2210 if (!vk) {
2211 |5:
2212 | decode_RD4 TMP2, INS
2213 | add PC, TMP2, TMP3
2214 | b <3
2215 }
2216 break;
2217
2218 case BC_ISEQP: case BC_ISNEP:
2219 vk = op == BC_ISEQP;
2220 | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target
2221 | lwzx TMP0, BASE, RA
2222 | srwi TMP1, RD, 3
2223 | lwz INS, 0(PC)
2224 | addi PC, PC, 4
2225 | not TMP1, TMP1
2226 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
2227 | cmplw TMP0, TMP1
2228 | decode_RD4 TMP2, INS
2229 | add TMP2, TMP2, TMP3
2230 if (vk) {
2231 | iseleq PC, TMP2, PC
2232 } else {
2233 | iseleq PC, PC, TMP2
2234 }
2235 | ins_next
2236 break;
2237
2238 /* -- Unary test and copy ops ------------------------------------------- */
2239
2240 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
2241 | // RA = dst*8 or unused, RD = src*8, JMP with RD = target
2242 | evlddx TMP0, BASE, RD
2243 | evaddw TMP1, TISNIL, TISNIL // Synthesize LJ_TFALSE.
2244 | lwz INS, 0(PC)
2245 | evcmpltu TMP0, TMP1
2246 | addi PC, PC, 4
2247 if (op == BC_IST || op == BC_ISF) {
2248 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
2249 | decode_RD4 TMP2, INS
2250 | add TMP2, TMP2, TMP3
2251 if (op == BC_IST) {
2252 | isellt PC, TMP2, PC
2253 } else {
2254 | isellt PC, PC, TMP2
2255 }
2256 } else {
2257 if (op == BC_ISTC) {
2258 | checkfail >1
2259 } else {
2260 | checkok >1
2261 }
2262 | addis PC, PC, -(BCBIAS_J*4 >> 16)
2263 | decode_RD4 TMP2, INS
2264 | evstddx TMP0, BASE, RA
2265 | add PC, PC, TMP2
2266 |1:
2267 }
2268 | ins_next
2269 break;
2270
2271 /* -- Unary ops --------------------------------------------------------- */
2272
2273 case BC_MOV:
2274 | // RA = dst*8, RD = src*8
2275 | ins_next1
2276 | evlddx TMP0, BASE, RD
2277 | evstddx TMP0, BASE, RA
2278 | ins_next2
2279 break;
2280 case BC_NOT:
2281 | // RA = dst*8, RD = src*8
2282 | ins_next1
2283 | lwzx TMP0, BASE, RD
2284 | subfic TMP1, TMP0, LJ_TTRUE
2285 | adde TMP0, TMP0, TMP1
2286 | stwx TMP0, BASE, RA
2287 | ins_next2
2288 break;
2289 case BC_UNM:
2290 | // RA = dst*8, RD = src*8
2291 | evlddx TMP0, BASE, RD
2292 | checknum TMP0
2293 | checkfail ->vmeta_unm
2294 | efdneg TMP0, TMP0
2295 | ins_next1
2296 | evstddx TMP0, BASE, RA
2297 | ins_next2
2298 break;
2299 case BC_LEN:
2300 | // RA = dst*8, RD = src*8
2301 | evlddx CARG1, BASE, RD
2302 | checkstr CARG1
2303 | checkfail >2
2304 | lwz CRET1, STR:CARG1->len
2305 |1:
2306 | ins_next1
2307 | efdcfsi TMP0, CRET1
2308 | evstddx TMP0, BASE, RA
2309 | ins_next2
2310 |2:
2311 | checktab CARG1
2312 | checkfail ->vmeta_len
2313#if LJ_52
2314 | lwz TAB:TMP2, TAB:CARG1->metatable
2315 | cmplwi TAB:TMP2, 0
2316 | bne >9
2317 |3:
2318#endif
2319 |->BC_LEN_Z:
2320 | bl extern lj_tab_len // (GCtab *t)
2321 | // Returns uint32_t (but less than 2^31).
2322 | b <1
2323#if LJ_52
2324 |9:
2325 | lbz TMP0, TAB:TMP2->nomm
2326 | andi. TMP0, TMP0, 1<<MM_len
2327 | bne <3 // 'no __len' flag set: done.
2328 | b ->vmeta_len
2329#endif
2330 break;
2331
2332 /* -- Binary ops -------------------------------------------------------- */
2333
2334 |.macro ins_arithpre, t0, t1
2335 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
2336 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
2337 ||switch (vk) {
2338 ||case 0:
2339 | evlddx t0, BASE, RB
2340 | checknum t0
2341 | evlddx t1, KBASE, RC
2342 | checkfail ->vmeta_arith_vn
2343 || break;
2344 ||case 1:
2345 | evlddx t1, BASE, RB
2346 | checknum t1
2347 | evlddx t0, KBASE, RC
2348 | checkfail ->vmeta_arith_nv
2349 || break;
2350 ||default:
2351 | evlddx t0, BASE, RB
2352 | evlddx t1, BASE, RC
2353 | evmergehi TMP2, t0, t1
2354 | checknum TMP2
2355 | checkanyfail ->vmeta_arith_vv
2356 || break;
2357 ||}
2358 |.endmacro
2359 |
2360 |.macro ins_arith, ins
2361 | ins_arithpre TMP0, TMP1
2362 | ins_next1
2363 | ins TMP0, TMP0, TMP1
2364 | evstddx TMP0, BASE, RA
2365 | ins_next2
2366 |.endmacro
2367
2368 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
2369 | ins_arith efdadd
2370 break;
2371 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
2372 | ins_arith efdsub
2373 break;
2374 case BC_MULVN: case BC_MULNV: case BC_MULVV:
2375 | ins_arith efdmul
2376 break;
2377 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
2378 | ins_arith efddiv
2379 break;
2380 case BC_MODVN:
2381 | ins_arithpre RD, SAVE0
2382 |->BC_MODVN_Z:
2383 | efddiv CARG2, RD, SAVE0
2384 | bl ->vm_floor_efd // floor(b/c)
2385 | efdmul TMP0, CRET2, SAVE0
2386 | ins_next1
2387 | efdsub TMP0, RD, TMP0 // b - floor(b/c)*c
2388 | evstddx TMP0, BASE, RA
2389 | ins_next2
2390 break;
2391 case BC_MODNV: case BC_MODVV:
2392 | ins_arithpre RD, SAVE0
2393 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
2394 break;
2395 case BC_POW:
2396 | evlddx CARG2, BASE, RB
2397 | evlddx CARG4, BASE, RC
2398 | evmergehi CARG1, CARG4, CARG2
2399 | checknum CARG1
2400 | evmergehi CARG3, CARG4, CARG4
2401 | checkanyfail ->vmeta_arith_vv
2402 | bl extern pow@plt
2403 | evmergelo CRET2, CRET1, CRET2
2404 | evstddx CRET2, BASE, RA
2405 | ins_next
2406 break;
2407
2408 case BC_CAT:
2409 | // RA = dst*8, RB = src_start*8, RC = src_end*8
2410 | sub CARG3, RC, RB
2411 | stw BASE, L->base
2412 | add CARG2, BASE, RC
2413 | mr SAVE0, RB
2414 |->BC_CAT_Z:
2415 | stw PC, SAVE_PC
2416 | mr CARG1, L
2417 | srwi CARG3, CARG3, 3
2418 | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left)
2419 | // Returns NULL (finished) or TValue * (metamethod).
2420 | cmplwi CRET1, 0
2421 | lwz BASE, L->base
2422 | bne ->vmeta_binop
2423 | evlddx TMP0, BASE, SAVE0 // Copy result from RB to RA.
2424 | evstddx TMP0, BASE, RA
2425 | ins_next
2426 break;
2427
2428 /* -- Constant ops ------------------------------------------------------ */
2429
2430 case BC_KSTR:
2431 | // RA = dst*8, RD = str_const*8 (~)
2432 | ins_next1
2433 | srwi TMP1, RD, 1
2434 | subfic TMP1, TMP1, -4
2435 | lwzx TMP0, KBASE, TMP1 // KBASE-4-str_const*4
2436 | evmergelo TMP0, TISSTR, TMP0
2437 | evstddx TMP0, BASE, RA
2438 | ins_next2
2439 break;
2440 case BC_KCDATA:
2441 |.if FFI
2442 | // RA = dst*8, RD = cdata_const*8 (~)
2443 | ins_next1
2444 | srwi TMP1, RD, 1
2445 | subfic TMP1, TMP1, -4
2446 | lwzx TMP0, KBASE, TMP1 // KBASE-4-cdata_const*4
2447 | li TMP2, LJ_TCDATA
2448 | evmergelo TMP0, TMP2, TMP0
2449 | evstddx TMP0, BASE, RA
2450 | ins_next2
2451 |.endif
2452 break;
2453 case BC_KSHORT:
2454 | // RA = dst*8, RD = int16_literal*8
2455 | srwi TMP1, RD, 3
2456 | extsh TMP1, TMP1
2457 | ins_next1
2458 | efdcfsi TMP0, TMP1
2459 | evstddx TMP0, BASE, RA
2460 | ins_next2
2461 break;
2462 case BC_KNUM:
2463 | // RA = dst*8, RD = num_const*8
2464 | evlddx TMP0, KBASE, RD
2465 | ins_next1
2466 | evstddx TMP0, BASE, RA
2467 | ins_next2
2468 break;
2469 case BC_KPRI:
2470 | // RA = dst*8, RD = primitive_type*8 (~)
2471 | srwi TMP1, RD, 3
2472 | not TMP0, TMP1
2473 | ins_next1
2474 | stwx TMP0, BASE, RA
2475 | ins_next2
2476 break;
2477 case BC_KNIL:
2478 | // RA = base*8, RD = end*8
2479 | evstddx TISNIL, BASE, RA
2480 | addi RA, RA, 8
2481 |1:
2482 | evstddx TISNIL, BASE, RA
2483 | cmpw RA, RD
2484 | addi RA, RA, 8
2485 | blt <1
2486 | ins_next_
2487 break;
2488
2489 /* -- Upvalue and function ops ------------------------------------------ */
2490
2491 case BC_UGET:
2492 | // RA = dst*8, RD = uvnum*8
2493 | ins_next1
2494 | lwz LFUNC:RB, FRAME_FUNC(BASE)
2495 | srwi RD, RD, 1
2496 | addi RD, RD, offsetof(GCfuncL, uvptr)
2497 | lwzx UPVAL:RB, LFUNC:RB, RD
2498 | lwz TMP1, UPVAL:RB->v
2499 | evldd TMP0, 0(TMP1)
2500 | evstddx TMP0, BASE, RA
2501 | ins_next2
2502 break;
2503 case BC_USETV:
2504 | // RA = uvnum*8, RD = src*8
2505 | lwz LFUNC:RB, FRAME_FUNC(BASE)
2506 | srwi RA, RA, 1
2507 | addi RA, RA, offsetof(GCfuncL, uvptr)
2508 | evlddx TMP1, BASE, RD
2509 | lwzx UPVAL:RB, LFUNC:RB, RA
2510 | lbz TMP3, UPVAL:RB->marked
2511 | lwz CARG2, UPVAL:RB->v
2512 | andi. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
2513 | lbz TMP0, UPVAL:RB->closed
2514 | evmergehi TMP2, TMP1, TMP1
2515 | evstdd TMP1, 0(CARG2)
2516 | cmplwi cr1, TMP0, 0
2517 | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
2518 | subi TMP2, TMP2, (LJ_TISNUM+1)
2519 | bne >2 // Upvalue is closed and black?
2520 |1:
2521 | ins_next
2522 |
2523 |2: // Check if new value is collectable.
2524 | cmplwi TMP2, LJ_TISGCV - (LJ_TISNUM+1)
2525 | bge <1 // tvisgcv(v)
2526 | lbz TMP3, GCOBJ:TMP1->gch.marked
2527 | andi. TMP3, TMP3, LJ_GC_WHITES // iswhite(v)
2528 | la CARG1, GG_DISP2G(DISPATCH)
2529 | // Crossed a write barrier. Move the barrier forward.
2530 | beq <1
2531 | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
2532 | b <1
2533 break;
2534 case BC_USETS:
2535 | // RA = uvnum*8, RD = str_const*8 (~)
2536 | lwz LFUNC:RB, FRAME_FUNC(BASE)
2537 | srwi TMP1, RD, 1
2538 | srwi RA, RA, 1
2539 | subfic TMP1, TMP1, -4
2540 | addi RA, RA, offsetof(GCfuncL, uvptr)
2541 | lwzx STR:TMP1, KBASE, TMP1 // KBASE-4-str_const*4
2542 | lwzx UPVAL:RB, LFUNC:RB, RA
2543 | evmergelo STR:TMP1, TISSTR, STR:TMP1
2544 | lbz TMP3, UPVAL:RB->marked
2545 | lwz CARG2, UPVAL:RB->v
2546 | andi. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
2547 | lbz TMP3, STR:TMP1->marked
2548 | lbz TMP2, UPVAL:RB->closed
2549 | evstdd STR:TMP1, 0(CARG2)
2550 | bne >2
2551 |1:
2552 | ins_next
2553 |
2554 |2: // Check if string is white and ensure upvalue is closed.
2555 | andi. TMP3, TMP3, LJ_GC_WHITES // iswhite(str)
2556 | cmplwi cr1, TMP2, 0
2557 | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
2558 | la CARG1, GG_DISP2G(DISPATCH)
2559 | // Crossed a write barrier. Move the barrier forward.
2560 | beq <1
2561 | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
2562 | b <1
2563 break;
2564 case BC_USETN:
2565 | // RA = uvnum*8, RD = num_const*8
2566 | ins_next1
2567 | lwz LFUNC:RB, FRAME_FUNC(BASE)
2568 | srwi RA, RA, 1
2569 | addi RA, RA, offsetof(GCfuncL, uvptr)
2570 | evlddx TMP0, KBASE, RD
2571 | lwzx UPVAL:RB, LFUNC:RB, RA
2572 | lwz TMP1, UPVAL:RB->v
2573 | evstdd TMP0, 0(TMP1)
2574 | ins_next2
2575 break;
2576 case BC_USETP:
2577 | // RA = uvnum*8, RD = primitive_type*8 (~)
2578 | ins_next1
2579 | lwz LFUNC:RB, FRAME_FUNC(BASE)
2580 | srwi RA, RA, 1
2581 | addi RA, RA, offsetof(GCfuncL, uvptr)
2582 | srwi TMP0, RD, 3
2583 | lwzx UPVAL:RB, LFUNC:RB, RA
2584 | not TMP0, TMP0
2585 | lwz TMP1, UPVAL:RB->v
2586 | stw TMP0, 0(TMP1)
2587 | ins_next2
2588 break;
2589
2590 case BC_UCLO:
2591 | // RA = level*8, RD = target
2592 | lwz TMP1, L->openupval
2593 | branch_RD // Do this first since RD is not saved.
2594 | stw BASE, L->base
2595 | cmplwi TMP1, 0
2596 | mr CARG1, L
2597 | beq >1
2598 | add CARG2, BASE, RA
2599 | bl extern lj_func_closeuv // (lua_State *L, TValue *level)
2600 | lwz BASE, L->base
2601 |1:
2602 | ins_next
2603 break;
2604
2605 case BC_FNEW:
2606 | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype)
2607 | srwi TMP1, RD, 1
2608 | stw BASE, L->base
2609 | subfic TMP1, TMP1, -4
2610 | stw PC, SAVE_PC
2611 | lwzx CARG2, KBASE, TMP1 // KBASE-4-tab_const*4
2612 | mr CARG1, L
2613 | lwz CARG3, FRAME_FUNC(BASE)
2614 | // (lua_State *L, GCproto *pt, GCfuncL *parent)
2615 | bl extern lj_func_newL_gc
2616 | // Returns GCfuncL *.
2617 | lwz BASE, L->base
2618 | evmergelo LFUNC:CRET1, TISFUNC, LFUNC:CRET1
2619 | evstddx LFUNC:CRET1, BASE, RA
2620 | ins_next
2621 break;
2622
2623 /* -- Table ops --------------------------------------------------------- */
2624
2625 case BC_TNEW:
2626 case BC_TDUP:
2627 | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~)
2628 | lwz TMP0, DISPATCH_GL(gc.total)(DISPATCH)
2629 | mr CARG1, L
2630 | lwz TMP1, DISPATCH_GL(gc.threshold)(DISPATCH)
2631 | stw BASE, L->base
2632 | cmplw TMP0, TMP1
2633 | stw PC, SAVE_PC
2634 | bge >5
2635 |1:
2636 if (op == BC_TNEW) {
2637 | rlwinm CARG2, RD, 29, 21, 31
2638 | rlwinm CARG3, RD, 18, 27, 31
2639 | cmpwi CARG2, 0x7ff
2640 | li TMP1, 0x801
2641 | iseleq CARG2, TMP1, CARG2
2642 | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
2643 | // Returns Table *.
2644 } else {
2645 | srwi TMP1, RD, 1
2646 | subfic TMP1, TMP1, -4
2647 | lwzx CARG2, KBASE, TMP1 // KBASE-4-tab_const*4
2648 | bl extern lj_tab_dup // (lua_State *L, Table *kt)
2649 | // Returns Table *.
2650 }
2651 | lwz BASE, L->base
2652 | evmergelo TAB:CRET1, TISTAB, TAB:CRET1
2653 | evstddx TAB:CRET1, BASE, RA
2654 | ins_next
2655 |5:
2656 | mr SAVE0, RD
2657 | bl extern lj_gc_step_fixtop // (lua_State *L)
2658 | mr RD, SAVE0
2659 | mr CARG1, L
2660 | b <1
2661 break;
2662
2663 case BC_GGET:
2664 | // RA = dst*8, RD = str_const*8 (~)
2665 case BC_GSET:
2666 | // RA = src*8, RD = str_const*8 (~)
2667 | lwz LFUNC:TMP2, FRAME_FUNC(BASE)
2668 | srwi TMP1, RD, 1
2669 | lwz TAB:RB, LFUNC:TMP2->env
2670 | subfic TMP1, TMP1, -4
2671 | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4
2672 if (op == BC_GGET) {
2673 | b ->BC_TGETS_Z
2674 } else {
2675 | b ->BC_TSETS_Z
2676 }
2677 break;
2678
2679 case BC_TGETV:
2680 | // RA = dst*8, RB = table*8, RC = key*8
2681 | evlddx TAB:RB, BASE, RB
2682 | evlddx RC, BASE, RC
2683 | checktab TAB:RB
2684 | checkfail ->vmeta_tgetv
2685 | checknum RC
2686 | checkfail >5
2687 | // Convert number key to integer
2688 | efdctsi TMP2, RC
2689 | lwz TMP0, TAB:RB->asize
2690 | efdcfsi TMP1, TMP2
2691 | cmplw cr0, TMP0, TMP2
2692 | efdcmpeq cr1, RC, TMP1
2693 | lwz TMP1, TAB:RB->array
2694 | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+gt
2695 | slwi TMP2, TMP2, 3
2696 | ble ->vmeta_tgetv // Integer key and in array part?
2697 | evlddx TMP1, TMP1, TMP2
2698 | checknil TMP1
2699 | checkok >2
2700 |1:
2701 | evstddx TMP1, BASE, RA
2702 | ins_next
2703 |
2704 |2: // Check for __index if table value is nil.
2705 | lwz TAB:TMP2, TAB:RB->metatable
2706 | cmplwi TAB:TMP2, 0
2707 | beq <1 // No metatable: done.
2708 | lbz TMP0, TAB:TMP2->nomm
2709 | andi. TMP0, TMP0, 1<<MM_index
2710 | bne <1 // 'no __index' flag set: done.
2711 | b ->vmeta_tgetv
2712 |
2713 |5:
2714 | checkstr STR:RC // String key?
2715 | checkok ->BC_TGETS_Z
2716 | b ->vmeta_tgetv
2717 break;
2718 case BC_TGETS:
2719 | // RA = dst*8, RB = table*8, RC = str_const*8 (~)
2720 | evlddx TAB:RB, BASE, RB
2721 | srwi TMP1, RC, 1
2722 | checktab TAB:RB
2723 | subfic TMP1, TMP1, -4
2724 | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4
2725 | checkfail ->vmeta_tgets1
2726 |->BC_TGETS_Z:
2727 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
2728 | lwz TMP0, TAB:RB->hmask
2729 | lwz TMP1, STR:RC->hash
2730 | lwz NODE:TMP2, TAB:RB->node
2731 | evmergelo STR:RC, TISSTR, STR:RC
2732 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
2733 | slwi TMP0, TMP1, 5
2734 | slwi TMP1, TMP1, 3
2735 | sub TMP1, TMP0, TMP1
2736 | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
2737 |1:
2738 | evldd TMP0, NODE:TMP2->key
2739 | evldd TMP1, NODE:TMP2->val
2740 | evcmpeq TMP0, STR:RC
2741 | checkanyfail >4
2742 | checknil TMP1
2743 | checkok >5 // Key found, but nil value?
2744 |3:
2745 | evstddx TMP1, BASE, RA
2746 | ins_next
2747 |
2748 |4: // Follow hash chain.
2749 | lwz NODE:TMP2, NODE:TMP2->next
2750 | cmplwi NODE:TMP2, 0
2751 | bne <1
2752 | // End of hash chain: key not found, nil result.
2753 | evmr TMP1, TISNIL
2754 |
2755 |5: // Check for __index if table value is nil.
2756 | lwz TAB:TMP2, TAB:RB->metatable
2757 | cmplwi TAB:TMP2, 0
2758 | beq <3 // No metatable: done.
2759 | lbz TMP0, TAB:TMP2->nomm
2760 | andi. TMP0, TMP0, 1<<MM_index
2761 | bne <3 // 'no __index' flag set: done.
2762 | b ->vmeta_tgets
2763 break;
2764 case BC_TGETB:
2765 | // RA = dst*8, RB = table*8, RC = index*8
2766 | evlddx TAB:RB, BASE, RB
2767 | srwi TMP0, RC, 3
2768 | checktab TAB:RB
2769 | checkfail ->vmeta_tgetb
2770 | lwz TMP1, TAB:RB->asize
2771 | lwz TMP2, TAB:RB->array
2772 | cmplw TMP0, TMP1
2773 | bge ->vmeta_tgetb
2774 | evlddx TMP1, TMP2, RC
2775 | checknil TMP1
2776 | checkok >5
2777 |1:
2778 | ins_next1
2779 | evstddx TMP1, BASE, RA
2780 | ins_next2
2781 |
2782 |5: // Check for __index if table value is nil.
2783 | lwz TAB:TMP2, TAB:RB->metatable
2784 | cmplwi TAB:TMP2, 0
2785 | beq <1 // No metatable: done.
2786 | lbz TMP2, TAB:TMP2->nomm
2787 | andi. TMP2, TMP2, 1<<MM_index
2788 | bne <1 // 'no __index' flag set: done.
2789 | b ->vmeta_tgetb // Caveat: preserve TMP0!
2790 break;
2791
2792 case BC_TSETV:
2793 | // RA = src*8, RB = table*8, RC = key*8
2794 | evlddx TAB:RB, BASE, RB
2795 | evlddx RC, BASE, RC
2796 | checktab TAB:RB
2797 | checkfail ->vmeta_tsetv
2798 | checknum RC
2799 | checkfail >5
2800 | // Convert number key to integer
2801 | efdctsi TMP2, RC
2802 | evlddx SAVE0, BASE, RA
2803 | lwz TMP0, TAB:RB->asize
2804 | efdcfsi TMP1, TMP2
2805 | cmplw cr0, TMP0, TMP2
2806 | efdcmpeq cr1, RC, TMP1
2807 | lwz TMP1, TAB:RB->array
2808 | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+gt
2809 | slwi TMP0, TMP2, 3
2810 | ble ->vmeta_tsetv // Integer key and in array part?
2811 | lbz TMP3, TAB:RB->marked
2812 | evlddx TMP2, TMP1, TMP0
2813 | checknil TMP2
2814 | checkok >3
2815 |1:
2816 | andi. TMP2, TMP3, LJ_GC_BLACK // isblack(table)
2817 | evstddx SAVE0, TMP1, TMP0
2818 | bne >7
2819 |2:
2820 | ins_next
2821 |
2822 |3: // Check for __newindex if previous value is nil.
2823 | lwz TAB:TMP2, TAB:RB->metatable
2824 | cmplwi TAB:TMP2, 0
2825 | beq <1 // No metatable: done.
2826 | lbz TMP2, TAB:TMP2->nomm
2827 | andi. TMP2, TMP2, 1<<MM_newindex
2828 | bne <1 // 'no __newindex' flag set: done.
2829 | b ->vmeta_tsetv
2830 |
2831 |5:
2832 | checkstr STR:RC // String key?
2833 | checkok ->BC_TSETS_Z
2834 | b ->vmeta_tsetv
2835 |
2836 |7: // Possible table write barrier for the value. Skip valiswhite check.
2837 | barrierback TAB:RB, TMP3, TMP0
2838 | b <2
2839 break;
2840 case BC_TSETS:
2841 | // RA = src*8, RB = table*8, RC = str_const*8 (~)
2842 | evlddx TAB:RB, BASE, RB
2843 | srwi TMP1, RC, 1
2844 | checktab TAB:RB
2845 | subfic TMP1, TMP1, -4
2846 | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4
2847 | checkfail ->vmeta_tsets1
2848 |->BC_TSETS_Z:
2849 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = src*8
2850 | lwz TMP0, TAB:RB->hmask
2851 | lwz TMP1, STR:RC->hash
2852 | lwz NODE:TMP2, TAB:RB->node
2853 | evmergelo STR:RC, TISSTR, STR:RC
2854 | stb ZERO, TAB:RB->nomm // Clear metamethod cache.
2855 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
2856 | evlddx SAVE0, BASE, RA
2857 | slwi TMP0, TMP1, 5
2858 | slwi TMP1, TMP1, 3
2859 | sub TMP1, TMP0, TMP1
2860 | lbz TMP3, TAB:RB->marked
2861 | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
2862 |1:
2863 | evldd TMP0, NODE:TMP2->key
2864 | evldd TMP1, NODE:TMP2->val
2865 | evcmpeq TMP0, STR:RC
2866 | checkanyfail >5
2867 | checknil TMP1
2868 | checkok >4 // Key found, but nil value?
2869 |2:
2870 | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
2871 | evstdd SAVE0, NODE:TMP2->val
2872 | bne >7
2873 |3:
2874 | ins_next
2875 |
2876 |4: // Check for __newindex if previous value is nil.
2877 | lwz TAB:TMP1, TAB:RB->metatable
2878 | cmplwi TAB:TMP1, 0
2879 | beq <2 // No metatable: done.
2880 | lbz TMP0, TAB:TMP1->nomm
2881 | andi. TMP0, TMP0, 1<<MM_newindex
2882 | bne <2 // 'no __newindex' flag set: done.
2883 | b ->vmeta_tsets
2884 |
2885 |5: // Follow hash chain.
2886 | lwz NODE:TMP2, NODE:TMP2->next
2887 | cmplwi NODE:TMP2, 0
2888 | bne <1
2889 | // End of hash chain: key not found, add a new one.
2890 |
2891 | // But check for __newindex first.
2892 | lwz TAB:TMP1, TAB:RB->metatable
2893 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
2894 | stw PC, SAVE_PC
2895 | mr CARG1, L
2896 | cmplwi TAB:TMP1, 0
2897 | stw BASE, L->base
2898 | beq >6 // No metatable: continue.
2899 | lbz TMP0, TAB:TMP1->nomm
2900 | andi. TMP0, TMP0, 1<<MM_newindex
2901 | beq ->vmeta_tsets // 'no __newindex' flag NOT set: check.
2902 |6:
2903 | mr CARG2, TAB:RB
2904 | evstdd STR:RC, 0(CARG3)
2905 | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
2906 | // Returns TValue *.
2907 | lwz BASE, L->base
2908 | evstdd SAVE0, 0(CRET1)
2909 | b <3 // No 2nd write barrier needed.
2910 |
2911 |7: // Possible table write barrier for the value. Skip valiswhite check.
2912 | barrierback TAB:RB, TMP3, TMP0
2913 | b <3
2914 break;
2915 case BC_TSETB:
2916 | // RA = src*8, RB = table*8, RC = index*8
2917 | evlddx TAB:RB, BASE, RB
2918 | srwi TMP0, RC, 3
2919 | checktab TAB:RB
2920 | checkfail ->vmeta_tsetb
2921 | lwz TMP1, TAB:RB->asize
2922 | lwz TMP2, TAB:RB->array
2923 | lbz TMP3, TAB:RB->marked
2924 | cmplw TMP0, TMP1
2925 | evlddx SAVE0, BASE, RA
2926 | bge ->vmeta_tsetb
2927 | evlddx TMP1, TMP2, RC
2928 | checknil TMP1
2929 | checkok >5
2930 |1:
2931 | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
2932 | evstddx SAVE0, TMP2, RC
2933 | bne >7
2934 |2:
2935 | ins_next
2936 |
2937 |5: // Check for __newindex if previous value is nil.
2938 | lwz TAB:TMP1, TAB:RB->metatable
2939 | cmplwi TAB:TMP1, 0
2940 | beq <1 // No metatable: done.
2941 | lbz TMP1, TAB:TMP1->nomm
2942 | andi. TMP1, TMP1, 1<<MM_newindex
2943 | bne <1 // 'no __newindex' flag set: done.
2944 | b ->vmeta_tsetb // Caveat: preserve TMP0!
2945 |
2946 |7: // Possible table write barrier for the value. Skip valiswhite check.
2947 | barrierback TAB:RB, TMP3, TMP0
2948 | b <2
2949 break;
2950
2951 case BC_TSETM:
2952 | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
2953 | add RA, BASE, RA
2954 |1:
2955 | add TMP3, KBASE, RD
2956 | lwz TAB:CARG2, -4(RA) // Guaranteed to be a table.
2957 | addic. TMP0, MULTRES, -8
2958 | lwz TMP3, 4(TMP3) // Integer constant is in lo-word.
2959 | srwi CARG3, TMP0, 3
2960 | beq >4 // Nothing to copy?
2961 | add CARG3, CARG3, TMP3
2962 | lwz TMP2, TAB:CARG2->asize
2963 | slwi TMP1, TMP3, 3
2964 | lbz TMP3, TAB:CARG2->marked
2965 | cmplw CARG3, TMP2
2966 | add TMP2, RA, TMP0
2967 | lwz TMP0, TAB:CARG2->array
2968 | bgt >5
2969 | add TMP1, TMP1, TMP0
2970 | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
2971 |3: // Copy result slots to table.
2972 | evldd TMP0, 0(RA)
2973 | addi RA, RA, 8
2974 | cmpw cr1, RA, TMP2
2975 | evstdd TMP0, 0(TMP1)
2976 | addi TMP1, TMP1, 8
2977 | blt cr1, <3
2978 | bne >7
2979 |4:
2980 | ins_next
2981 |
2982 |5: // Need to resize array part.
2983 | stw BASE, L->base
2984 | mr CARG1, L
2985 | stw PC, SAVE_PC
2986 | mr SAVE0, RD
2987 | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
2988 | // Must not reallocate the stack.
2989 | mr RD, SAVE0
2990 | b <1
2991 |
2992 |7: // Possible table write barrier for any value. Skip valiswhite check.
2993 | barrierback TAB:CARG2, TMP3, TMP0
2994 | b <4
2995 break;
2996
2997 /* -- Calls and vararg handling ----------------------------------------- */
2998
2999 case BC_CALLM:
3000 | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8
3001 | add NARGS8:RC, NARGS8:RC, MULTRES
3002 | // Fall through. Assumes BC_CALL follows.
3003 break;
3004 case BC_CALL:
3005 | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8
3006 | evlddx LFUNC:RB, BASE, RA
3007 | mr TMP2, BASE
3008 | add BASE, BASE, RA
3009 | subi NARGS8:RC, NARGS8:RC, 8
3010 | checkfunc LFUNC:RB
3011 | addi BASE, BASE, 8
3012 | checkfail ->vmeta_call
3013 | ins_call
3014 break;
3015
3016 case BC_CALLMT:
3017 | // RA = base*8, (RB = 0,) RC = extra_nargs*8
3018 | add NARGS8:RC, NARGS8:RC, MULTRES
3019 | // Fall through. Assumes BC_CALLT follows.
3020 break;
3021 case BC_CALLT:
3022 | // RA = base*8, (RB = 0,) RC = (nargs+1)*8
3023 | evlddx LFUNC:RB, BASE, RA
3024 | add RA, BASE, RA
3025 | lwz TMP1, FRAME_PC(BASE)
3026 | subi NARGS8:RC, NARGS8:RC, 8
3027 | checkfunc LFUNC:RB
3028 | addi RA, RA, 8
3029 | checkfail ->vmeta_callt
3030 |->BC_CALLT_Z:
3031 | andi. TMP0, TMP1, FRAME_TYPE // Caveat: preserve cr0 until the crand.
3032 | lbz TMP3, LFUNC:RB->ffid
3033 | xori TMP2, TMP1, FRAME_VARG
3034 | cmplwi cr1, NARGS8:RC, 0
3035 | bne >7
3036 |1:
3037 | stw LFUNC:RB, FRAME_FUNC(BASE) // Copy function down, but keep PC.
3038 | li TMP2, 0
3039 | cmplwi cr7, TMP3, 1 // (> FF_C) Calling a fast function?
3040 | beq cr1, >3
3041 |2:
3042 | addi TMP3, TMP2, 8
3043 | evlddx TMP0, RA, TMP2
3044 | cmplw cr1, TMP3, NARGS8:RC
3045 | evstddx TMP0, BASE, TMP2
3046 | mr TMP2, TMP3
3047 | bne cr1, <2
3048 |3:
3049 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+gt
3050 | beq >5
3051 |4:
3052 | ins_callt
3053 |
3054 |5: // Tailcall to a fast function with a Lua frame below.
3055 | lwz INS, -4(TMP1)
3056 | decode_RA8 RA, INS
3057 | sub TMP1, BASE, RA
3058 | lwz LFUNC:TMP1, FRAME_FUNC-8(TMP1)
3059 | lwz TMP1, LFUNC:TMP1->pc
3060 | lwz KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE.
3061 | b <4
3062 |
3063 |7: // Tailcall from a vararg function.
3064 | andi. TMP0, TMP2, FRAME_TYPEP
3065 | bne <1 // Vararg frame below?
3066 | sub BASE, BASE, TMP2 // Relocate BASE down.
3067 | lwz TMP1, FRAME_PC(BASE)
3068 | andi. TMP0, TMP1, FRAME_TYPE
3069 | b <1
3070 break;
3071
3072 case BC_ITERC:
3073 | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8))
3074 | subi RA, RA, 24 // evldd doesn't support neg. offsets.
3075 | mr TMP2, BASE
3076 | evlddx LFUNC:RB, BASE, RA
3077 | add BASE, BASE, RA
3078 | evldd TMP0, 8(BASE)
3079 | evldd TMP1, 16(BASE)
3080 | evstdd LFUNC:RB, 24(BASE) // Copy callable.
3081 | checkfunc LFUNC:RB
3082 | evstdd TMP0, 32(BASE) // Copy state.
3083 | li NARGS8:RC, 16 // Iterators get 2 arguments.
3084 | evstdd TMP1, 40(BASE) // Copy control var.
3085 | addi BASE, BASE, 32
3086 | checkfail ->vmeta_call
3087 | ins_call
3088 break;
3089
3090 case BC_ITERN:
3091 | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
3092 |.if JIT
3093 | // NYI: add hotloop, record BC_ITERN.
3094 |.endif
3095 | add RA, BASE, RA
3096 | lwz TAB:RB, -12(RA)
3097 | lwz RC, -4(RA) // Get index from control var.
3098 | lwz TMP0, TAB:RB->asize
3099 | lwz TMP1, TAB:RB->array
3100 | addi PC, PC, 4
3101 |1: // Traverse array part.
3102 | cmplw RC, TMP0
3103 | slwi TMP3, RC, 3
3104 | bge >5 // Index points after array part?
3105 | evlddx TMP2, TMP1, TMP3
3106 | checknil TMP2
3107 | lwz INS, -4(PC)
3108 | checkok >4
3109 | efdcfsi TMP0, RC
3110 | addi RC, RC, 1
3111 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
3112 | evstdd TMP2, 8(RA)
3113 | decode_RD4 TMP1, INS
3114 | stw RC, -4(RA) // Update control var.
3115 | add PC, TMP1, TMP3
3116 | evstdd TMP0, 0(RA)
3117 |3:
3118 | ins_next
3119 |
3120 |4: // Skip holes in array part.
3121 | addi RC, RC, 1
3122 | b <1
3123 |
3124 |5: // Traverse hash part.
3125 | lwz TMP1, TAB:RB->hmask
3126 | sub RC, RC, TMP0
3127 | lwz TMP2, TAB:RB->node
3128 |6:
3129 | cmplw RC, TMP1 // End of iteration? Branch to ITERL+1.
3130 | slwi TMP3, RC, 5
3131 | bgt <3
3132 | slwi RB, RC, 3
3133 | sub TMP3, TMP3, RB
3134 | evlddx RB, TMP2, TMP3
3135 | add NODE:TMP3, TMP2, TMP3
3136 | checknil RB
3137 | lwz INS, -4(PC)
3138 | checkok >7
3139 | evldd TMP3, NODE:TMP3->key
3140 | addis TMP2, PC, -(BCBIAS_J*4 >> 16)
3141 | evstdd RB, 8(RA)
3142 | add RC, RC, TMP0
3143 | decode_RD4 TMP1, INS
3144 | evstdd TMP3, 0(RA)
3145 | addi RC, RC, 1
3146 | add PC, TMP1, TMP2
3147 | stw RC, -4(RA) // Update control var.
3148 | b <3
3149 |
3150 |7: // Skip holes in hash part.
3151 | addi RC, RC, 1
3152 | b <6
3153 break;
3154
3155 case BC_ISNEXT:
3156 | // RA = base*8, RD = target (points to ITERN)
3157 | add RA, BASE, RA
3158 | li TMP2, -24
3159 | evlddx CFUNC:TMP1, RA, TMP2
3160 | lwz TMP2, -16(RA)
3161 | lwz TMP3, -8(RA)
3162 | evmergehi TMP0, CFUNC:TMP1, CFUNC:TMP1
3163 | cmpwi cr0, TMP2, LJ_TTAB
3164 | cmpwi cr1, TMP0, LJ_TFUNC
3165 | cmpwi cr6, TMP3, LJ_TNIL
3166 | bne cr1, >5
3167 | lbz TMP1, CFUNC:TMP1->ffid
3168 | crand 4*cr0+eq, 4*cr0+eq, 4*cr6+eq
3169 | cmpwi cr7, TMP1, FF_next_N
3170 | srwi TMP0, RD, 1
3171 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
3172 | add TMP3, PC, TMP0
3173 | bne cr0, >5
3174 | lus TMP1, 0xfffe
3175 | ori TMP1, TMP1, 0x7fff
3176 | stw ZERO, -4(RA) // Initialize control var.
3177 | stw TMP1, -8(RA)
3178 | addis PC, TMP3, -(BCBIAS_J*4 >> 16)
3179 |1:
3180 | ins_next
3181 |5: // Despecialize bytecode if any of the checks fail.
3182 | li TMP0, BC_JMP
3183 | li TMP1, BC_ITERC
3184 | stb TMP0, -1(PC)
3185 | addis PC, TMP3, -(BCBIAS_J*4 >> 16)
3186 | stb TMP1, 3(PC)
3187 | b <1
3188 break;
3189
3190 case BC_VARG:
3191 | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8
3192 | lwz TMP0, FRAME_PC(BASE)
3193 | add RC, BASE, RC
3194 | add RA, BASE, RA
3195 | addi RC, RC, FRAME_VARG
3196 | add TMP2, RA, RB
3197 | subi TMP3, BASE, 8 // TMP3 = vtop
3198 | sub RC, RC, TMP0 // RC = vbase
3199 | // Note: RC may now be even _above_ BASE if nargs was < numparams.
3200 | cmplwi cr1, RB, 0
3201 | sub. TMP1, TMP3, RC
3202 | beq cr1, >5 // Copy all varargs?
3203 | subi TMP2, TMP2, 16
3204 | ble >2 // No vararg slots?
3205 |1: // Copy vararg slots to destination slots.
3206 | evldd TMP0, 0(RC)
3207 | addi RC, RC, 8
3208 | evstdd TMP0, 0(RA)
3209 | cmplw RA, TMP2
3210 | cmplw cr1, RC, TMP3
3211 | bge >3 // All destination slots filled?
3212 | addi RA, RA, 8
3213 | blt cr1, <1 // More vararg slots?
3214 |2: // Fill up remainder with nil.
3215 | evstdd TISNIL, 0(RA)
3216 | cmplw RA, TMP2
3217 | addi RA, RA, 8
3218 | blt <2
3219 |3:
3220 | ins_next
3221 |
3222 |5: // Copy all varargs.
3223 | lwz TMP0, L->maxstack
3224 | li MULTRES, 8 // MULTRES = (0+1)*8
3225 | ble <3 // No vararg slots?
3226 | add TMP2, RA, TMP1
3227 | cmplw TMP2, TMP0
3228 | addi MULTRES, TMP1, 8
3229 | bgt >7
3230 |6:
3231 | evldd TMP0, 0(RC)
3232 | addi RC, RC, 8
3233 | evstdd TMP0, 0(RA)
3234 | cmplw RC, TMP3
3235 | addi RA, RA, 8
3236 | blt <6 // More vararg slots?
3237 | b <3
3238 |
3239 |7: // Grow stack for varargs.
3240 | mr CARG1, L
3241 | stw RA, L->top
3242 | sub SAVE0, RC, BASE // Need delta, because BASE may change.
3243 | stw BASE, L->base
3244 | sub RA, RA, BASE
3245 | stw PC, SAVE_PC
3246 | srwi CARG2, TMP1, 3
3247 | bl extern lj_state_growstack // (lua_State *L, int n)
3248 | lwz BASE, L->base
3249 | add RA, BASE, RA
3250 | add RC, BASE, SAVE0
3251 | subi TMP3, BASE, 8
3252 | b <6
3253 break;
3254
3255 /* -- Returns ----------------------------------------------------------- */
3256
3257 case BC_RETM:
3258 | // RA = results*8, RD = extra_nresults*8
3259 | add RD, RD, MULTRES // MULTRES >= 8, so RD >= 8.
3260 | // Fall through. Assumes BC_RET follows.
3261 break;
3262
3263 case BC_RET:
3264 | // RA = results*8, RD = (nresults+1)*8
3265 | lwz PC, FRAME_PC(BASE)
3266 | add RA, BASE, RA
3267 | mr MULTRES, RD
3268 |1:
3269 | andi. TMP0, PC, FRAME_TYPE
3270 | xori TMP1, PC, FRAME_VARG
3271 | bne ->BC_RETV_Z
3272 |
3273 |->BC_RET_Z:
3274 | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return
3275 | lwz INS, -4(PC)
3276 | cmpwi RD, 8
3277 | subi TMP2, BASE, 8
3278 | subi RC, RD, 8
3279 | decode_RB8 RB, INS
3280 | beq >3
3281 | li TMP1, 0
3282 |2:
3283 | addi TMP3, TMP1, 8
3284 | evlddx TMP0, RA, TMP1
3285 | cmpw TMP3, RC
3286 | evstddx TMP0, TMP2, TMP1
3287 | beq >3
3288 | addi TMP1, TMP3, 8
3289 | evlddx TMP0, RA, TMP3
3290 | cmpw TMP1, RC
3291 | evstddx TMP0, TMP2, TMP3
3292 | bne <2
3293 |3:
3294 |5:
3295 | cmplw RB, RD
3296 | decode_RA8 RA, INS
3297 | bgt >6
3298 | sub BASE, TMP2, RA
3299 | lwz LFUNC:TMP1, FRAME_FUNC(BASE)
3300 | ins_next1
3301 | lwz TMP1, LFUNC:TMP1->pc
3302 | lwz KBASE, PC2PROTO(k)(TMP1)
3303 | ins_next2
3304 |
3305 |6: // Fill up results with nil.
3306 | subi TMP1, RD, 8
3307 | addi RD, RD, 8
3308 | evstddx TISNIL, TMP2, TMP1
3309 | b <5
3310 |
3311 |->BC_RETV_Z: // Non-standard return case.
3312 | andi. TMP2, TMP1, FRAME_TYPEP
3313 | bne ->vm_return
3314 | // Return from vararg function: relocate BASE down.
3315 | sub BASE, BASE, TMP1
3316 | lwz PC, FRAME_PC(BASE)
3317 | b <1
3318 break;
3319
3320 case BC_RET0: case BC_RET1:
3321 | // RA = results*8, RD = (nresults+1)*8
3322 | lwz PC, FRAME_PC(BASE)
3323 | add RA, BASE, RA
3324 | mr MULTRES, RD
3325 | andi. TMP0, PC, FRAME_TYPE
3326 | xori TMP1, PC, FRAME_VARG
3327 | bne ->BC_RETV_Z
3328 |
3329 | lwz INS, -4(PC)
3330 | subi TMP2, BASE, 8
3331 | decode_RB8 RB, INS
3332 if (op == BC_RET1) {
3333 | evldd TMP0, 0(RA)
3334 | evstdd TMP0, 0(TMP2)
3335 }
3336 |5:
3337 | cmplw RB, RD
3338 | decode_RA8 RA, INS
3339 | bgt >6
3340 | sub BASE, TMP2, RA
3341 | lwz LFUNC:TMP1, FRAME_FUNC(BASE)
3342 | ins_next1
3343 | lwz TMP1, LFUNC:TMP1->pc
3344 | lwz KBASE, PC2PROTO(k)(TMP1)
3345 | ins_next2
3346 |
3347 |6: // Fill up results with nil.
3348 | subi TMP1, RD, 8
3349 | addi RD, RD, 8
3350 | evstddx TISNIL, TMP2, TMP1
3351 | b <5
3352 break;
3353
3354 /* -- Loops and branches ------------------------------------------------ */
3355
3356 case BC_FORL:
3357 |.if JIT
3358 | hotloop
3359 |.endif
3360 | // Fall through. Assumes BC_IFORL follows.
3361 break;
3362
3363 case BC_JFORI:
3364 case BC_JFORL:
3365#if !LJ_HASJIT
3366 break;
3367#endif
3368 case BC_FORI:
3369 case BC_IFORL:
3370 | // RA = base*8, RD = target (after end of loop or start of loop)
3371 vk = (op == BC_IFORL || op == BC_JFORL);
3372 | add RA, BASE, RA
3373 | evldd TMP1, FORL_IDX*8(RA)
3374 | evldd TMP3, FORL_STEP*8(RA)
3375 | evldd TMP2, FORL_STOP*8(RA)
3376 if (!vk) {
3377 | evcmpgtu cr0, TMP1, TISNUM
3378 | evcmpgtu cr7, TMP3, TISNUM
3379 | evcmpgtu cr1, TMP2, TISNUM
3380 | cror 4*cr0+lt, 4*cr0+lt, 4*cr7+lt
3381 | cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3382 | blt ->vmeta_for
3383 }
3384 if (vk) {
3385 | efdadd TMP1, TMP1, TMP3
3386 | evstdd TMP1, FORL_IDX*8(RA)
3387 }
3388 | evcmpgts TMP3, TISNIL
3389 | evstdd TMP1, FORL_EXT*8(RA)
3390 | bge >2
3391 | efdcmpgt TMP1, TMP2
3392 |1:
3393 if (op != BC_JFORL) {
3394 | srwi RD, RD, 1
3395 | add RD, PC, RD
3396 if (op == BC_JFORI) {
3397 | addis PC, RD, -(BCBIAS_J*4 >> 16)
3398 } else {
3399 | addis RD, RD, -(BCBIAS_J*4 >> 16)
3400 }
3401 }
3402 if (op == BC_FORI) {
3403 | iselgt PC, RD, PC
3404 } else if (op == BC_IFORL) {
3405 | iselgt PC, PC, RD
3406 } else {
3407 | ble =>BC_JLOOP
3408 }
3409 | ins_next
3410 |2:
3411 | efdcmpgt TMP2, TMP1
3412 | b <1
3413 break;
3414
3415 case BC_ITERL:
3416 |.if JIT
3417 | hotloop
3418 |.endif
3419 | // Fall through. Assumes BC_IITERL follows.
3420 break;
3421
3422 case BC_JITERL:
3423#if !LJ_HASJIT
3424 break;
3425#endif
3426 case BC_IITERL:
3427 | // RA = base*8, RD = target
3428 | evlddx TMP1, BASE, RA
3429 | subi RA, RA, 8
3430 | checknil TMP1
3431 | checkok >1 // Stop if iterator returned nil.
3432 if (op == BC_JITERL) {
3433 | NYI
3434 } else {
3435 | branch_RD // Otherwise save control var + branch.
3436 | evstddx TMP1, BASE, RA
3437 }
3438 |1:
3439 | ins_next
3440 break;
3441
3442 case BC_LOOP:
3443 | // RA = base*8, RD = target (loop extent)
3444 | // Note: RA/RD is only used by trace recorder to determine scope/extent
3445 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
3446 |.if JIT
3447 | hotloop
3448 |.endif
3449 | // Fall through. Assumes BC_ILOOP follows.
3450 break;
3451
3452 case BC_ILOOP:
3453 | // RA = base*8, RD = target (loop extent)
3454 | ins_next
3455 break;
3456
3457 case BC_JLOOP:
3458 |.if JIT
3459 | NYI
3460 |.endif
3461 break;
3462
3463 case BC_JMP:
3464 | // RA = base*8 (only used by trace recorder), RD = target
3465 | branch_RD
3466 | ins_next
3467 break;
3468
3469 /* -- Function headers -------------------------------------------------- */
3470
3471 case BC_FUNCF:
3472 |.if JIT
3473 | hotcall
3474 |.endif
3475 case BC_FUNCV: /* NYI: compiled vararg functions. */
3476 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow.
3477 break;
3478
3479 case BC_JFUNCF:
3480#if !LJ_HASJIT
3481 break;
3482#endif
3483 case BC_IFUNCF:
3484 | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
3485 | lwz TMP2, L->maxstack
3486 | lbz TMP1, -4+PC2PROTO(numparams)(PC)
3487 | lwz KBASE, -4+PC2PROTO(k)(PC)
3488 | cmplw RA, TMP2
3489 | slwi TMP1, TMP1, 3
3490 | bgt ->vm_growstack_l
3491 | ins_next1
3492 |2:
3493 | cmplw NARGS8:RC, TMP1 // Check for missing parameters.
3494 | ble >3
3495 if (op == BC_JFUNCF) {
3496 | NYI
3497 } else {
3498 | ins_next2
3499 }
3500 |
3501 |3: // Clear missing parameters.
3502 | evstddx TISNIL, BASE, NARGS8:RC
3503 | addi NARGS8:RC, NARGS8:RC, 8
3504 | b <2
3505 break;
3506
3507 case BC_JFUNCV:
3508#if !LJ_HASJIT
3509 break;
3510#endif
3511 | NYI // NYI: compiled vararg functions
3512 break; /* NYI: compiled vararg functions. */
3513
3514 case BC_IFUNCV:
3515 | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
3516 | lwz TMP2, L->maxstack
3517 | add TMP1, BASE, RC
3518 | add TMP0, RA, RC
3519 | stw LFUNC:RB, 4(TMP1) // Store copy of LFUNC.
3520 | addi TMP3, RC, 8+FRAME_VARG
3521 | lwz KBASE, -4+PC2PROTO(k)(PC)
3522 | cmplw TMP0, TMP2
3523 | stw TMP3, 0(TMP1) // Store delta + FRAME_VARG.
3524 | bge ->vm_growstack_l
3525 | lbz TMP2, -4+PC2PROTO(numparams)(PC)
3526 | mr RA, BASE
3527 | mr RC, TMP1
3528 | ins_next1
3529 | cmpwi TMP2, 0
3530 | addi BASE, TMP1, 8
3531 | beq >3
3532 |1:
3533 | cmplw RA, RC // Less args than parameters?
3534 | evldd TMP0, 0(RA)
3535 | bge >4
3536 | evstdd TISNIL, 0(RA) // Clear old fixarg slot (help the GC).
3537 | addi RA, RA, 8
3538 |2:
3539 | addic. TMP2, TMP2, -1
3540 | evstdd TMP0, 8(TMP1)
3541 | addi TMP1, TMP1, 8
3542 | bne <1
3543 |3:
3544 | ins_next2
3545 |
3546 |4: // Clear missing parameters.
3547 | evmr TMP0, TISNIL
3548 | b <2
3549 break;
3550
3551 case BC_FUNCC:
3552 case BC_FUNCCW:
3553 | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8
3554 if (op == BC_FUNCC) {
3555 | lwz TMP3, CFUNC:RB->f
3556 } else {
3557 | lwz TMP3, DISPATCH_GL(wrapf)(DISPATCH)
3558 }
3559 | add TMP1, RA, NARGS8:RC
3560 | lwz TMP2, L->maxstack
3561 | add RC, BASE, NARGS8:RC
3562 | stw BASE, L->base
3563 | cmplw TMP1, TMP2
3564 | stw RC, L->top
3565 | li_vmstate C
3566 | mtctr TMP3
3567 if (op == BC_FUNCCW) {
3568 | lwz CARG2, CFUNC:RB->f
3569 }
3570 | mr CARG1, L
3571 | bgt ->vm_growstack_c // Need to grow stack.
3572 | st_vmstate
3573 | bctrl // (lua_State *L [, lua_CFunction f])
3574 | // Returns nresults.
3575 | lwz TMP1, L->top
3576 | slwi RD, CRET1, 3
3577 | lwz BASE, L->base
3578 | li_vmstate INTERP
3579 | lwz PC, FRAME_PC(BASE) // Fetch PC of caller.
3580 | sub RA, TMP1, RD // RA = L->top - nresults*8
3581 | st_vmstate
3582 | b ->vm_returnc
3583 break;
3584
3585 /* ---------------------------------------------------------------------- */
3586
3587 default:
3588 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
3589 exit(2);
3590 break;
3591 }
3592}
3593
3594static int build_backend(BuildCtx *ctx)
3595{
3596 int op;
3597
3598 dasm_growpc(Dst, BC__MAX);
3599
3600 build_subroutines(ctx);
3601
3602 |.code_op
3603 for (op = 0; op < BC__MAX; op++)
3604 build_ins(ctx, (BCOp)op, op);
3605
3606 return BC__MAX;
3607}
3608
3609/* Emit pseudo frame-info for all assembler functions. */
3610static void emit_asm_debug(BuildCtx *ctx)
3611{
3612 int i;
3613 switch (ctx->mode) {
3614 case BUILD_elfasm:
3615 fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
3616 fprintf(ctx->fp,
3617 ".Lframe0:\n"
3618 "\t.long .LECIE0-.LSCIE0\n"
3619 ".LSCIE0:\n"
3620 "\t.long 0xffffffff\n"
3621 "\t.byte 0x1\n"
3622 "\t.string \"\"\n"
3623 "\t.uleb128 0x1\n"
3624 "\t.sleb128 -4\n"
3625 "\t.byte 65\n"
3626 "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n"
3627 "\t.align 2\n"
3628 ".LECIE0:\n\n");
3629 fprintf(ctx->fp,
3630 ".LSFDE0:\n"
3631 "\t.long .LEFDE0-.LASFDE0\n"
3632 ".LASFDE0:\n"
3633 "\t.long .Lframe0\n"
3634 "\t.long .Lbegin\n"
3635 "\t.long %d\n"
3636 "\t.byte 0xe\n\t.uleb128 %d\n"
3637 "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
3638 "\t.byte 0x5\n\t.uleb128 70\n\t.sleb128 37\n",
3639 (int)ctx->codesz, CFRAME_SIZE);
3640 for (i = 14; i <= 31; i++)
3641 fprintf(ctx->fp,
3642 "\t.byte %d\n\t.uleb128 %d\n"
3643 "\t.byte 5\n\t.uleb128 %d\n\t.uleb128 %d\n",
3644 0x80+i, 1+2*(31-i), 1200+i, 2+2*(31-i));
3645 fprintf(ctx->fp,
3646 "\t.align 2\n"
3647 ".LEFDE0:\n\n");
3648 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
3649 fprintf(ctx->fp,
3650 ".Lframe1:\n"
3651 "\t.long .LECIE1-.LSCIE1\n"
3652 ".LSCIE1:\n"
3653 "\t.long 0\n"
3654 "\t.byte 0x1\n"
3655 "\t.string \"zPR\"\n"
3656 "\t.uleb128 0x1\n"
3657 "\t.sleb128 -4\n"
3658 "\t.byte 65\n"
3659 "\t.uleb128 6\n" /* augmentation length */
3660 "\t.byte 0x1b\n" /* pcrel|sdata4 */
3661 "\t.long lj_err_unwind_dwarf-.\n"
3662 "\t.byte 0x1b\n" /* pcrel|sdata4 */
3663 "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n"
3664 "\t.align 2\n"
3665 ".LECIE1:\n\n");
3666 fprintf(ctx->fp,
3667 ".LSFDE1:\n"
3668 "\t.long .LEFDE1-.LASFDE1\n"
3669 ".LASFDE1:\n"
3670 "\t.long .LASFDE1-.Lframe1\n"
3671 "\t.long .Lbegin-.\n"
3672 "\t.long %d\n"
3673 "\t.uleb128 0\n" /* augmentation length */
3674 "\t.byte 0xe\n\t.uleb128 %d\n"
3675 "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
3676 "\t.byte 0x5\n\t.uleb128 70\n\t.sleb128 37\n",
3677 (int)ctx->codesz, CFRAME_SIZE);
3678 for (i = 14; i <= 31; i++)
3679 fprintf(ctx->fp,
3680 "\t.byte %d\n\t.uleb128 %d\n"
3681 "\t.byte 5\n\t.uleb128 %d\n\t.uleb128 %d\n",
3682 0x80+i, 1+2*(31-i), 1200+i, 2+2*(31-i));
3683 fprintf(ctx->fp,
3684 "\t.align 2\n"
3685 ".LEFDE1:\n\n");
3686 break;
3687 default:
3688 break;
3689 }
3690}
3691
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index 4544a3be..ea0415ee 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -18,7 +18,6 @@
18| 18|
19|.if P64 19|.if P64
20|.define X64, 1 20|.define X64, 1
21|.define SSE, 1
22|.if WIN 21|.if WIN
23|.define X64WIN, 1 22|.define X64WIN, 1
24|.endif 23|.endif
@@ -116,6 +115,7 @@
116|.type NODE, Node 115|.type NODE, Node
117|.type NARGS, int 116|.type NARGS, int
118|.type TRACE, GCtrace 117|.type TRACE, GCtrace
118|.type SBUF, SBuf
119| 119|
120|// Stack layout while in interpreter. Must match with lj_frame.h. 120|// Stack layout while in interpreter. Must match with lj_frame.h.
121|//----------------------------------------------------------------------- 121|//-----------------------------------------------------------------------
@@ -373,7 +373,6 @@
373| fpop 373| fpop
374|.endmacro 374|.endmacro
375| 375|
376|.macro fdup; fld st0; .endmacro
377|.macro fpop1; fstp st1; .endmacro 376|.macro fpop1; fstp st1; .endmacro
378| 377|
379|// Synthesize SSE FP constants. 378|// Synthesize SSE FP constants.
@@ -630,17 +629,18 @@ static void build_subroutines(BuildCtx *ctx)
630 | lea KBASEa, [esp+CFRAME_RESUME] 629 | lea KBASEa, [esp+CFRAME_RESUME]
631 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 630 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
632 | add DISPATCH, GG_G2DISP 631 | add DISPATCH, GG_G2DISP
633 | mov L:RB->cframe, KBASEa
634 | mov SAVE_PC, RD // Any value outside of bytecode is ok. 632 | mov SAVE_PC, RD // Any value outside of bytecode is ok.
635 | mov SAVE_CFRAME, RDa 633 | mov SAVE_CFRAME, RDa
636 |.if X64 634 |.if X64
637 | mov SAVE_NRES, RD 635 | mov SAVE_NRES, RD
638 | mov SAVE_ERRF, RD 636 | mov SAVE_ERRF, RD
639 |.endif 637 |.endif
638 | mov L:RB->cframe, KBASEa
640 | cmp byte L:RB->status, RDL 639 | cmp byte L:RB->status, RDL
641 | je >3 // Initial resume (like a call). 640 | je >2 // Initial resume (like a call).
642 | 641 |
643 | // Resume after yield (like a return). 642 | // Resume after yield (like a return).
643 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
644 | set_vmstate INTERP 644 | set_vmstate INTERP
645 | mov byte L:RB->status, RDL 645 | mov byte L:RB->status, RDL
646 | mov BASE, L:RB->base 646 | mov BASE, L:RB->base
@@ -680,20 +680,19 @@ static void build_subroutines(BuildCtx *ctx)
680 | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! 680 | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME!
681 |.endif 681 |.endif
682 | 682 |
683 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
683 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. 684 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
684 | mov SAVE_CFRAME, KBASEa 685 | mov SAVE_CFRAME, KBASEa
685 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. 686 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
687 | add DISPATCH, GG_G2DISP
686 |.if X64 688 |.if X64
687 | mov L:RB->cframe, rsp 689 | mov L:RB->cframe, rsp
688 |.else 690 |.else
689 | mov L:RB->cframe, esp 691 | mov L:RB->cframe, esp
690 |.endif 692 |.endif
691 | 693 |
692 |2: // Entry point for vm_cpcall below (RA = base, RB = L, PC = ftype). 694 |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
693 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 695 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
694 | add DISPATCH, GG_G2DISP
695 |
696 |3: // Entry point for vm_resume above (RA = base, RB = L, PC = ftype).
697 | set_vmstate INTERP 696 | set_vmstate INTERP
698 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). 697 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
699 | add PC, RA 698 | add PC, RA
@@ -731,14 +730,17 @@ static void build_subroutines(BuildCtx *ctx)
731 | 730 |
732 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). 731 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
733 | sub KBASE, L:RB->top 732 | sub KBASE, L:RB->top
733 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
734 | mov SAVE_ERRF, 0 // No error function. 734 | mov SAVE_ERRF, 0 // No error function.
735 | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame. 735 | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame.
736 | add DISPATCH, GG_G2DISP
736 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). 737 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
737 | 738 |
738 |.if X64 739 |.if X64
739 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. 740 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
740 | mov SAVE_CFRAME, KBASEa 741 | mov SAVE_CFRAME, KBASEa
741 | mov L:RB->cframe, rsp 742 | mov L:RB->cframe, rsp
743 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
742 | 744 |
743 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud) 745 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud)
744 |.else 746 |.else
@@ -749,6 +751,7 @@ static void build_subroutines(BuildCtx *ctx)
749 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. 751 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
750 | mov SAVE_CFRAME, KBASE 752 | mov SAVE_CFRAME, KBASE
751 | mov L:RB->cframe, esp 753 | mov L:RB->cframe, esp
754 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
752 | 755 |
753 | call BASE // (lua_State *L, lua_CFunction func, void *ud) 756 | call BASE // (lua_State *L, lua_CFunction func, void *ud)
754 |.endif 757 |.endif
@@ -856,13 +859,9 @@ static void build_subroutines(BuildCtx *ctx)
856 |.if DUALNUM 859 |.if DUALNUM
857 | mov TMP2, LJ_TISNUM 860 | mov TMP2, LJ_TISNUM
858 | mov TMP1, RC 861 | mov TMP1, RC
859 |.elif SSE 862 |.else
860 | cvtsi2sd xmm0, RC 863 | cvtsi2sd xmm0, RC
861 | movsd TMPQ, xmm0 864 | movsd TMPQ, xmm0
862 |.else
863 | mov ARG4, RC
864 | fild ARG4
865 | fstp TMPQ
866 |.endif 865 |.endif
867 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 866 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
868 | jmp >1 867 | jmp >1
@@ -916,6 +915,19 @@ static void build_subroutines(BuildCtx *ctx)
916 | mov NARGS:RD, 2+1 // 2 args for func(t, k). 915 | mov NARGS:RD, 2+1 // 2 args for func(t, k).
917 | jmp ->vm_call_dispatch_f 916 | jmp ->vm_call_dispatch_f
918 | 917 |
918 |->vmeta_tgetr:
919 | mov FCARG1, TAB:RB
920 | mov RB, BASE // Save BASE.
921 | mov FCARG2, RC // Caveat: FCARG2 == BASE
922 | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
923 | // cTValue * or NULL returned in eax (RC).
924 | movzx RA, PC_RA
925 | mov BASE, RB // Restore BASE.
926 | test RC, RC
927 | jnz ->BC_TGETR_Z
928 | mov dword [BASE+RA*8+4], LJ_TNIL
929 | jmp ->BC_TGETR2_Z
930 |
919 |//----------------------------------------------------------------------- 931 |//-----------------------------------------------------------------------
920 | 932 |
921 |->vmeta_tsets: 933 |->vmeta_tsets:
@@ -935,13 +947,9 @@ static void build_subroutines(BuildCtx *ctx)
935 |.if DUALNUM 947 |.if DUALNUM
936 | mov TMP2, LJ_TISNUM 948 | mov TMP2, LJ_TISNUM
937 | mov TMP1, RC 949 | mov TMP1, RC
938 |.elif SSE 950 |.else
939 | cvtsi2sd xmm0, RC 951 | cvtsi2sd xmm0, RC
940 | movsd TMPQ, xmm0 952 | movsd TMPQ, xmm0
941 |.else
942 | mov ARG4, RC
943 | fild ARG4
944 | fstp TMPQ
945 |.endif 953 |.endif
946 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 954 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
947 | jmp >1 955 | jmp >1
@@ -1007,6 +1015,33 @@ static void build_subroutines(BuildCtx *ctx)
1007 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v). 1015 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v).
1008 | jmp ->vm_call_dispatch_f 1016 | jmp ->vm_call_dispatch_f
1009 | 1017 |
1018 |->vmeta_tsetr:
1019 |.if X64WIN
1020 | mov L:CARG1d, SAVE_L
1021 | mov CARG3d, RC
1022 | mov L:CARG1d->base, BASE
1023 | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE.
1024 |.elif X64
1025 | mov L:CARG1d, SAVE_L
1026 | mov CARG2d, TAB:RB
1027 | mov L:CARG1d->base, BASE
1028 | mov RB, BASE // Save BASE.
1029 | mov CARG3d, RC // Caveat: CARG3d == BASE.
1030 |.else
1031 | mov L:RA, SAVE_L
1032 | mov ARG2, TAB:RB
1033 | mov RB, BASE // Save BASE.
1034 | mov ARG3, RC
1035 | mov ARG1, L:RA
1036 | mov L:RA->base, BASE
1037 |.endif
1038 | mov SAVE_PC, PC
1039 | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
1040 | // TValue * returned in eax (RC).
1041 | movzx RA, PC_RA
1042 | mov BASE, RB // Restore BASE.
1043 | jmp ->BC_TSETR_Z
1044 |
1010 |//-- Comparison metamethods --------------------------------------------- 1045 |//-- Comparison metamethods ---------------------------------------------
1011 | 1046 |
1012 |->vmeta_comp: 1047 |->vmeta_comp:
@@ -1101,6 +1136,26 @@ static void build_subroutines(BuildCtx *ctx)
1101 | jmp <3 1136 | jmp <3
1102 |.endif 1137 |.endif
1103 | 1138 |
1139 |->vmeta_istype:
1140 |.if X64
1141 | mov L:RB, SAVE_L
1142 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
1143 | mov CARG2d, RA
1144 | movzx CARG3d, PC_RD
1145 | mov L:CARG1d, L:RB
1146 |.else
1147 | movzx RD, PC_RD
1148 | mov ARG2, RA
1149 | mov L:RB, SAVE_L
1150 | mov ARG3, RD
1151 | mov ARG1, L:RB
1152 | mov L:RB->base, BASE
1153 |.endif
1154 | mov SAVE_PC, PC
1155 | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
1156 | mov BASE, L:RB->base
1157 | jmp <6
1158 |
1104 |//-- Arithmetic metamethods --------------------------------------------- 1159 |//-- Arithmetic metamethods ---------------------------------------------
1105 | 1160 |
1106 |->vmeta_arith_vno: 1161 |->vmeta_arith_vno:
@@ -1273,19 +1328,6 @@ static void build_subroutines(BuildCtx *ctx)
1273 | cmp NARGS:RD, 2+1; jb ->fff_fallback 1328 | cmp NARGS:RD, 2+1; jb ->fff_fallback
1274 |.endmacro 1329 |.endmacro
1275 | 1330 |
1276 |.macro .ffunc_n, name
1277 | .ffunc_1 name
1278 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1279 | fld qword [BASE]
1280 |.endmacro
1281 |
1282 |.macro .ffunc_n, name, op
1283 | .ffunc_1 name
1284 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1285 | op
1286 | fld qword [BASE]
1287 |.endmacro
1288 |
1289 |.macro .ffunc_nsse, name, op 1331 |.macro .ffunc_nsse, name, op
1290 | .ffunc_1 name 1332 | .ffunc_1 name
1291 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 1333 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
@@ -1296,14 +1338,6 @@ static void build_subroutines(BuildCtx *ctx)
1296 | .ffunc_nsse name, movsd 1338 | .ffunc_nsse name, movsd
1297 |.endmacro 1339 |.endmacro
1298 | 1340 |
1299 |.macro .ffunc_nn, name
1300 | .ffunc_2 name
1301 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1302 | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback
1303 | fld qword [BASE]
1304 | fld qword [BASE+8]
1305 |.endmacro
1306 |
1307 |.macro .ffunc_nnsse, name 1341 |.macro .ffunc_nnsse, name
1308 | .ffunc_2 name 1342 | .ffunc_2 name
1309 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 1343 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
@@ -1509,11 +1543,7 @@ static void build_subroutines(BuildCtx *ctx)
1509 |.else 1543 |.else
1510 | jae ->fff_fallback 1544 | jae ->fff_fallback
1511 |.endif 1545 |.endif
1512 |.if SSE
1513 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 1546 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0
1514 |.else
1515 | fld qword [BASE]; jmp ->fff_resn
1516 |.endif
1517 | 1547 |
1518 |.ffunc_1 tostring 1548 |.ffunc_1 tostring
1519 | // Only handles the string or number case inline. 1549 | // Only handles the string or number case inline.
@@ -1538,9 +1568,9 @@ static void build_subroutines(BuildCtx *ctx)
1538 |.endif 1568 |.endif
1539 | mov L:FCARG1, L:RB 1569 | mov L:FCARG1, L:RB
1540 |.if DUALNUM 1570 |.if DUALNUM
1541 | call extern lj_str_fromnumber@8 // (lua_State *L, cTValue *o) 1571 | call extern lj_strfmt_number@8 // (lua_State *L, cTValue *o)
1542 |.else 1572 |.else
1543 | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np) 1573 | call extern lj_strfmt_num@8 // (lua_State *L, lua_Number *np)
1544 |.endif 1574 |.endif
1545 | // GCstr returned in eax (RD). 1575 | // GCstr returned in eax (RD).
1546 | mov BASE, L:RB->base 1576 | mov BASE, L:RB->base
@@ -1631,19 +1661,12 @@ static void build_subroutines(BuildCtx *ctx)
1631 | add RD, 1 1661 | add RD, 1
1632 | mov dword [BASE-4], LJ_TISNUM 1662 | mov dword [BASE-4], LJ_TISNUM
1633 | mov dword [BASE-8], RD 1663 | mov dword [BASE-8], RD
1634 |.elif SSE 1664 |.else
1635 | movsd xmm0, qword [BASE+8] 1665 | movsd xmm0, qword [BASE+8]
1636 | sseconst_1 xmm1, RBa 1666 | sseconst_1 xmm1, RBa
1637 | addsd xmm0, xmm1 1667 | addsd xmm0, xmm1
1638 | cvtsd2si RD, xmm0 1668 | cvttsd2si RD, xmm0
1639 | movsd qword [BASE-8], xmm0 1669 | movsd qword [BASE-8], xmm0
1640 |.else
1641 | fld qword [BASE+8]
1642 | fld1
1643 | faddp st1
1644 | fist ARG1
1645 | fstp qword [BASE-8]
1646 | mov RD, ARG1
1647 |.endif 1670 |.endif
1648 | mov TAB:RB, [BASE] 1671 | mov TAB:RB, [BASE]
1649 | cmp RD, TAB:RB->asize; jae >2 // Not in array part? 1672 | cmp RD, TAB:RB->asize; jae >2 // Not in array part?
@@ -1690,12 +1713,9 @@ static void build_subroutines(BuildCtx *ctx)
1690 |.if DUALNUM 1713 |.if DUALNUM
1691 | mov dword [BASE+12], LJ_TISNUM 1714 | mov dword [BASE+12], LJ_TISNUM
1692 | mov dword [BASE+8], 0 1715 | mov dword [BASE+8], 0
1693 |.elif SSE 1716 |.else
1694 | xorps xmm0, xmm0 1717 | xorps xmm0, xmm0
1695 | movsd qword [BASE+8], xmm0 1718 | movsd qword [BASE+8], xmm0
1696 |.else
1697 | fldz
1698 | fstp qword [BASE+8]
1699 |.endif 1719 |.endif
1700 | mov RD, 1+3 1720 | mov RD, 1+3
1701 | jmp ->fff_res 1721 | jmp ->fff_res
@@ -1802,7 +1822,6 @@ static void build_subroutines(BuildCtx *ctx)
1802 | mov ARG3, RA 1822 | mov ARG3, RA
1803 |.endif 1823 |.endif
1804 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0) 1824 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0)
1805 | set_vmstate INTERP
1806 | 1825 |
1807 | mov L:RB, SAVE_L 1826 | mov L:RB, SAVE_L
1808 |.if X64 1827 |.if X64
@@ -1811,6 +1830,9 @@ static void build_subroutines(BuildCtx *ctx)
1811 | mov L:PC, ARG1 // The callee doesn't modify SAVE_L. 1830 | mov L:PC, ARG1 // The callee doesn't modify SAVE_L.
1812 |.endif 1831 |.endif
1813 | mov BASE, L:RB->base 1832 | mov BASE, L:RB->base
1833 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
1834 | set_vmstate INTERP
1835 |
1814 | cmp eax, LUA_YIELD 1836 | cmp eax, LUA_YIELD
1815 | ja >8 1837 | ja >8
1816 |4: 1838 |4:
@@ -1925,12 +1947,10 @@ static void build_subroutines(BuildCtx *ctx)
1925 |->fff_resi: // Dummy. 1947 |->fff_resi: // Dummy.
1926 |.endif 1948 |.endif
1927 | 1949 |
1928 |.if SSE
1929 |->fff_resn: 1950 |->fff_resn:
1930 | mov PC, [BASE-4] 1951 | mov PC, [BASE-4]
1931 | fstp qword [BASE-8] 1952 | fstp qword [BASE-8]
1932 | jmp ->fff_res1 1953 | jmp ->fff_res1
1933 |.endif
1934 | 1954 |
1935 | .ffunc_1 math_abs 1955 | .ffunc_1 math_abs
1936 |.if DUALNUM 1956 |.if DUALNUM
@@ -1954,8 +1974,6 @@ static void build_subroutines(BuildCtx *ctx)
1954 |.else 1974 |.else
1955 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 1975 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1956 |.endif 1976 |.endif
1957 |
1958 |.if SSE
1959 | movsd xmm0, qword [BASE] 1977 | movsd xmm0, qword [BASE]
1960 | sseconst_abs xmm1, RDa 1978 | sseconst_abs xmm1, RDa
1961 | andps xmm0, xmm1 1979 | andps xmm0, xmm1
@@ -1963,15 +1981,6 @@ static void build_subroutines(BuildCtx *ctx)
1963 | mov PC, [BASE-4] 1981 | mov PC, [BASE-4]
1964 | movsd qword [BASE-8], xmm0 1982 | movsd qword [BASE-8], xmm0
1965 | // fallthrough 1983 | // fallthrough
1966 |.else
1967 | fld qword [BASE]
1968 | fabs
1969 | // fallthrough
1970 |->fff_resxmm0: // Dummy.
1971 |->fff_resn:
1972 | mov PC, [BASE-4]
1973 | fstp qword [BASE-8]
1974 |.endif
1975 | 1984 |
1976 |->fff_res1: 1985 |->fff_res1:
1977 | mov RD, 1+1 1986 | mov RD, 1+1
@@ -1998,6 +2007,12 @@ static void build_subroutines(BuildCtx *ctx)
1998 | mov RAa, -8 // Results start at BASE+RA = BASE-8. 2007 | mov RAa, -8 // Results start at BASE+RA = BASE-8.
1999 | jmp ->vm_return 2008 | jmp ->vm_return
2000 | 2009 |
2010 |.if X64
2011 |.define fff_resfp, fff_resxmm0
2012 |.else
2013 |.define fff_resfp, fff_resn
2014 |.endif
2015 |
2001 |.macro math_round, func 2016 |.macro math_round, func
2002 | .ffunc math_ .. func 2017 | .ffunc math_ .. func
2003 |.if DUALNUM 2018 |.if DUALNUM
@@ -2008,107 +2023,75 @@ static void build_subroutines(BuildCtx *ctx)
2008 |.else 2023 |.else
2009 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2024 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
2010 |.endif 2025 |.endif
2011 |.if SSE
2012 | movsd xmm0, qword [BASE] 2026 | movsd xmm0, qword [BASE]
2013 | call ->vm_ .. func 2027 | call ->vm_ .. func .. _sse
2014 | .if DUALNUM 2028 |.if DUALNUM
2015 | cvtsd2si RB, xmm0 2029 | cvttsd2si RB, xmm0
2016 | cmp RB, 0x80000000 2030 | cmp RB, 0x80000000
2017 | jne ->fff_resi 2031 | jne ->fff_resi
2018 | cvtsi2sd xmm1, RB 2032 | cvtsi2sd xmm1, RB
2019 | ucomisd xmm0, xmm1 2033 | ucomisd xmm0, xmm1
2020 | jp ->fff_resxmm0 2034 | jp ->fff_resxmm0
2021 | je ->fff_resi 2035 | je ->fff_resi
2022 | .endif
2023 | jmp ->fff_resxmm0
2024 |.else
2025 | fld qword [BASE]
2026 | call ->vm_ .. func
2027 | .if DUALNUM
2028 | fist ARG1
2029 | mov RB, ARG1
2030 | cmp RB, 0x80000000; jne >2
2031 | fdup
2032 | fild ARG1
2033 | fcomparepp
2034 | jp ->fff_resn
2035 | jne ->fff_resn
2036 |2:
2037 | fpop
2038 | jmp ->fff_resi
2039 | .else
2040 | jmp ->fff_resn
2041 | .endif
2042 |.endif 2036 |.endif
2037 | jmp ->fff_resxmm0
2043 |.endmacro 2038 |.endmacro
2044 | 2039 |
2045 | math_round floor 2040 | math_round floor
2046 | math_round ceil 2041 | math_round ceil
2047 | 2042 |
2048 |.if SSE
2049 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 2043 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0
2050 |.else
2051 |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn
2052 |.endif
2053 | 2044 |
2054 |.ffunc math_log 2045 |.ffunc math_log
2055 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. 2046 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
2056 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2047 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
2057 | fldln2; fld qword [BASE]; fyl2x; jmp ->fff_resn 2048 | movsd xmm0, qword [BASE]
2058 | 2049 |.if not X64
2059 |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn 2050 | movsd FPARG1, xmm0
2060 |.ffunc_n math_exp; call ->vm_exp_x87; jmp ->fff_resn 2051 |.endif
2061 | 2052 | mov RB, BASE
2062 |.ffunc_n math_sin; fsin; jmp ->fff_resn 2053 | call extern log
2063 |.ffunc_n math_cos; fcos; jmp ->fff_resn 2054 | mov BASE, RB
2064 |.ffunc_n math_tan; fptan; fpop; jmp ->fff_resn 2055 | jmp ->fff_resfp
2065 |
2066 |.ffunc_n math_asin
2067 | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fpatan
2068 | jmp ->fff_resn
2069 |.ffunc_n math_acos
2070 | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fxch; fpatan
2071 | jmp ->fff_resn
2072 |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn
2073 | 2056 |
2074 |.macro math_extern, func 2057 |.macro math_extern, func
2075 |.if SSE
2076 | .ffunc_nsse math_ .. func 2058 | .ffunc_nsse math_ .. func
2077 | .if not X64 2059 |.if not X64
2078 | movsd FPARG1, xmm0 2060 | movsd FPARG1, xmm0
2079 | .endif
2080 |.else
2081 | .ffunc_n math_ .. func
2082 | fstp FPARG1
2083 |.endif 2061 |.endif
2084 | mov RB, BASE 2062 | mov RB, BASE
2085 | call extern lj_vm_ .. func 2063 | call extern func
2086 | mov BASE, RB 2064 | mov BASE, RB
2087 | .if X64 2065 | jmp ->fff_resfp
2088 | jmp ->fff_resxmm0 2066 |.endmacro
2089 | .else 2067 |
2090 | jmp ->fff_resn 2068 |.macro math_extern2, func
2091 | .endif 2069 | .ffunc_nnsse math_ .. func
2070 |.if not X64
2071 | movsd FPARG1, xmm0
2072 | movsd FPARG3, xmm1
2073 |.endif
2074 | mov RB, BASE
2075 | call extern func
2076 | mov BASE, RB
2077 | jmp ->fff_resfp
2092 |.endmacro 2078 |.endmacro
2093 | 2079 |
2080 | math_extern log10
2081 | math_extern exp
2082 | math_extern sin
2083 | math_extern cos
2084 | math_extern tan
2085 | math_extern asin
2086 | math_extern acos
2087 | math_extern atan
2094 | math_extern sinh 2088 | math_extern sinh
2095 | math_extern cosh 2089 | math_extern cosh
2096 | math_extern tanh 2090 | math_extern tanh
2091 | math_extern2 pow
2092 | math_extern2 atan2
2093 | math_extern2 fmod
2097 | 2094 |
2098 |->ff_math_deg:
2099 |.if SSE
2100 |.ffunc_nsse math_rad
2101 | mov CFUNC:RB, [BASE-8]
2102 | mulsd xmm0, qword CFUNC:RB->upvalue[0]
2103 | jmp ->fff_resxmm0
2104 |.else
2105 |.ffunc_n math_rad
2106 | mov CFUNC:RB, [BASE-8]
2107 | fmul qword CFUNC:RB->upvalue[0]
2108 | jmp ->fff_resn
2109 |.endif
2110 |
2111 |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn
2112 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn 2095 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn
2113 | 2096 |
2114 |.ffunc_1 math_frexp 2097 |.ffunc_1 math_frexp
@@ -2123,65 +2106,34 @@ static void build_subroutines(BuildCtx *ctx)
2123 | cmp RB, 0x00200000; jb >4 2106 | cmp RB, 0x00200000; jb >4
2124 |1: 2107 |1:
2125 | shr RB, 21; sub RB, RC // Extract and unbias exponent. 2108 | shr RB, 21; sub RB, RC // Extract and unbias exponent.
2126 |.if SSE
2127 | cvtsi2sd xmm0, RB 2109 | cvtsi2sd xmm0, RB
2128 |.else
2129 | mov TMP1, RB; fild TMP1
2130 |.endif
2131 | mov RB, [BASE-4] 2110 | mov RB, [BASE-4]
2132 | and RB, 0x800fffff // Mask off exponent. 2111 | and RB, 0x800fffff // Mask off exponent.
2133 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. 2112 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0.
2134 | mov [BASE-4], RB 2113 | mov [BASE-4], RB
2135 |2: 2114 |2:
2136 |.if SSE
2137 | movsd qword [BASE], xmm0 2115 | movsd qword [BASE], xmm0
2138 |.else
2139 | fstp qword [BASE]
2140 |.endif
2141 | mov RD, 1+2 2116 | mov RD, 1+2
2142 | jmp ->fff_res 2117 | jmp ->fff_res
2143 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. 2118 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0.
2144 |.if SSE
2145 | xorps xmm0, xmm0; jmp <2 2119 | xorps xmm0, xmm0; jmp <2
2146 |.else
2147 | fldz; jmp <2
2148 |.endif
2149 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. 2120 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias.
2150 |.if SSE
2151 | movsd xmm0, qword [BASE] 2121 | movsd xmm0, qword [BASE]
2152 | sseconst_hi xmm1, RBa, 43500000 // 2^54. 2122 | sseconst_hi xmm1, RBa, 43500000 // 2^54.
2153 | mulsd xmm0, xmm1 2123 | mulsd xmm0, xmm1
2154 | movsd qword [BASE-8], xmm0 2124 | movsd qword [BASE-8], xmm0
2155 |.else
2156 | fld qword [BASE]
2157 | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54
2158 | fstp qword [BASE-8]
2159 |.endif
2160 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 2125 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1
2161 | 2126 |
2162 |.if SSE
2163 |.ffunc_nsse math_modf 2127 |.ffunc_nsse math_modf
2164 |.else
2165 |.ffunc_n math_modf
2166 |.endif
2167 | mov RB, [BASE+4] 2128 | mov RB, [BASE+4]
2168 | mov PC, [BASE-4] 2129 | mov PC, [BASE-4]
2169 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? 2130 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf?
2170 |.if SSE
2171 | movaps xmm4, xmm0 2131 | movaps xmm4, xmm0
2172 | call ->vm_trunc 2132 | call ->vm_trunc_sse
2173 | subsd xmm4, xmm0 2133 | subsd xmm4, xmm0
2174 |1: 2134 |1:
2175 | movsd qword [BASE-8], xmm0 2135 | movsd qword [BASE-8], xmm0
2176 | movsd qword [BASE], xmm4 2136 | movsd qword [BASE], xmm4
2177 |.else
2178 | fdup
2179 | call ->vm_trunc
2180 | fsub st1, st0
2181 |1:
2182 | fstp qword [BASE-8]
2183 | fstp qword [BASE]
2184 |.endif
2185 | mov RC, [BASE-4]; mov RB, [BASE+4] 2137 | mov RC, [BASE-4]; mov RB, [BASE+4]
2186 | xor RC, RB; js >3 // Need to adjust sign? 2138 | xor RC, RB; js >3 // Need to adjust sign?
2187 |2: 2139 |2:
@@ -2191,24 +2143,9 @@ static void build_subroutines(BuildCtx *ctx)
2191 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. 2143 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction.
2192 | jmp <2 2144 | jmp <2
2193 |4: 2145 |4:
2194 |.if SSE
2195 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. 2146 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0.
2196 |.else
2197 | fldz; fxch; jmp <1 // Return +-Inf and +-0.
2198 |.endif
2199 |
2200 |.ffunc_nnr math_fmod
2201 |1: ; fprem; fnstsw ax; and ax, 0x400; jnz <1
2202 | fpop1
2203 | jmp ->fff_resn
2204 | 2147 |
2205 |.if SSE 2148 |.macro math_minmax, name, cmovop, sseop
2206 |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0
2207 |.else
2208 |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn
2209 |.endif
2210 |
2211 |.macro math_minmax, name, cmovop, fcmovop, sseop
2212 | .ffunc name 2149 | .ffunc name
2213 | mov RA, 2 2150 | mov RA, 2
2214 | cmp dword [BASE+4], LJ_TISNUM 2151 | cmp dword [BASE+4], LJ_TISNUM
@@ -2225,12 +2162,7 @@ static void build_subroutines(BuildCtx *ctx)
2225 |3: 2162 |3:
2226 | ja ->fff_fallback 2163 | ja ->fff_fallback
2227 | // Convert intermediate result to number and continue below. 2164 | // Convert intermediate result to number and continue below.
2228 |.if SSE
2229 | cvtsi2sd xmm0, RB 2165 | cvtsi2sd xmm0, RB
2230 |.else
2231 | mov TMP1, RB
2232 | fild TMP1
2233 |.endif
2234 | jmp >6 2166 | jmp >6
2235 |4: 2167 |4:
2236 | ja ->fff_fallback 2168 | ja ->fff_fallback
@@ -2238,7 +2170,6 @@ static void build_subroutines(BuildCtx *ctx)
2238 | jae ->fff_fallback 2170 | jae ->fff_fallback
2239 |.endif 2171 |.endif
2240 | 2172 |
2241 |.if SSE
2242 | movsd xmm0, qword [BASE] 2173 | movsd xmm0, qword [BASE]
2243 |5: // Handle numbers or integers. 2174 |5: // Handle numbers or integers.
2244 | cmp RA, RD; jae ->fff_resxmm0 2175 | cmp RA, RD; jae ->fff_resxmm0
@@ -2257,48 +2188,13 @@ static void build_subroutines(BuildCtx *ctx)
2257 | sseop xmm0, xmm1 2188 | sseop xmm0, xmm1
2258 | add RA, 1 2189 | add RA, 1
2259 | jmp <5 2190 | jmp <5
2260 |.else
2261 | fld qword [BASE]
2262 |5: // Handle numbers or integers.
2263 | cmp RA, RD; jae ->fff_resn
2264 | cmp dword [BASE+RA*8-4], LJ_TISNUM
2265 |.if DUALNUM
2266 | jb >6
2267 | ja >9
2268 | fild dword [BASE+RA*8-8]
2269 | jmp >7
2270 |.else
2271 | jae >9
2272 |.endif
2273 |6:
2274 | fld qword [BASE+RA*8-8]
2275 |7:
2276 | fucomi st1; fcmovop st1; fpop1
2277 | add RA, 1
2278 | jmp <5
2279 |.endif
2280 |.endmacro 2191 |.endmacro
2281 | 2192 |
2282 | math_minmax math_min, cmovg, fcmovnbe, minsd 2193 | math_minmax math_min, cmovg, minsd
2283 | math_minmax math_max, cmovl, fcmovbe, maxsd 2194 | math_minmax math_max, cmovl, maxsd
2284 |.if not SSE
2285 |9:
2286 | fpop; jmp ->fff_fallback
2287 |.endif
2288 | 2195 |
2289 |//-- String library ----------------------------------------------------- 2196 |//-- String library -----------------------------------------------------
2290 | 2197 |
2291 |.ffunc_1 string_len
2292 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2293 | mov STR:RB, [BASE]
2294 |.if DUALNUM
2295 | mov RB, dword STR:RB->len; jmp ->fff_resi
2296 |.elif SSE
2297 | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0
2298 |.else
2299 | fild dword STR:RB->len; jmp ->fff_resn
2300 |.endif
2301 |
2302 |.ffunc string_byte // Only handle the 1-arg case here. 2198 |.ffunc string_byte // Only handle the 1-arg case here.
2303 | cmp NARGS:RD, 1+1; jne ->fff_fallback 2199 | cmp NARGS:RD, 1+1; jne ->fff_fallback
2304 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2200 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
@@ -2309,10 +2205,8 @@ static void build_subroutines(BuildCtx *ctx)
2309 | movzx RB, byte STR:RB[1] 2205 | movzx RB, byte STR:RB[1]
2310 |.if DUALNUM 2206 |.if DUALNUM
2311 | jmp ->fff_resi 2207 | jmp ->fff_resi
2312 |.elif SSE
2313 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2314 |.else 2208 |.else
2315 | mov TMP1, RB; fild TMP1; jmp ->fff_resn 2209 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2316 |.endif 2210 |.endif
2317 | 2211 |
2318 |.ffunc string_char // Only handle the 1-arg case here. 2212 |.ffunc string_char // Only handle the 1-arg case here.
@@ -2324,16 +2218,11 @@ static void build_subroutines(BuildCtx *ctx)
2324 | mov RB, dword [BASE] 2218 | mov RB, dword [BASE]
2325 | cmp RB, 255; ja ->fff_fallback 2219 | cmp RB, 255; ja ->fff_fallback
2326 | mov TMP2, RB 2220 | mov TMP2, RB
2327 |.elif SSE 2221 |.else
2328 | jae ->fff_fallback 2222 | jae ->fff_fallback
2329 | cvttsd2si RB, qword [BASE] 2223 | cvttsd2si RB, qword [BASE]
2330 | cmp RB, 255; ja ->fff_fallback 2224 | cmp RB, 255; ja ->fff_fallback
2331 | mov TMP2, RB 2225 | mov TMP2, RB
2332 |.else
2333 | jae ->fff_fallback
2334 | fld qword [BASE]
2335 | fistp TMP2
2336 | cmp TMP2, 255; ja ->fff_fallback
2337 |.endif 2226 |.endif
2338 |.if X64 2227 |.if X64
2339 | mov TMP3, 1 2228 | mov TMP3, 1
@@ -2354,6 +2243,7 @@ static void build_subroutines(BuildCtx *ctx)
2354 |.endif 2243 |.endif
2355 | mov SAVE_PC, PC 2244 | mov SAVE_PC, PC
2356 | call extern lj_str_new // (lua_State *L, char *str, size_t l) 2245 | call extern lj_str_new // (lua_State *L, char *str, size_t l)
2246 |->fff_resstr:
2357 | // GCstr * returned in eax (RD). 2247 | // GCstr * returned in eax (RD).
2358 | mov BASE, L:RB->base 2248 | mov BASE, L:RB->base
2359 | mov PC, [BASE-4] 2249 | mov PC, [BASE-4]
@@ -2371,14 +2261,10 @@ static void build_subroutines(BuildCtx *ctx)
2371 | jne ->fff_fallback 2261 | jne ->fff_fallback
2372 | mov RB, dword [BASE+16] 2262 | mov RB, dword [BASE+16]
2373 | mov TMP2, RB 2263 | mov TMP2, RB
2374 |.elif SSE 2264 |.else
2375 | jae ->fff_fallback 2265 | jae ->fff_fallback
2376 | cvttsd2si RB, qword [BASE+16] 2266 | cvttsd2si RB, qword [BASE+16]
2377 | mov TMP2, RB 2267 | mov TMP2, RB
2378 |.else
2379 | jae ->fff_fallback
2380 | fld qword [BASE+16]
2381 | fistp TMP2
2382 |.endif 2268 |.endif
2383 |1: 2269 |1:
2384 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2270 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
@@ -2393,12 +2279,8 @@ static void build_subroutines(BuildCtx *ctx)
2393 | mov RB, STR:RB->len 2279 | mov RB, STR:RB->len
2394 |.if DUALNUM 2280 |.if DUALNUM
2395 | mov RA, dword [BASE+8] 2281 | mov RA, dword [BASE+8]
2396 |.elif SSE
2397 | cvttsd2si RA, qword [BASE+8]
2398 |.else 2282 |.else
2399 | fld qword [BASE+8] 2283 | cvttsd2si RA, qword [BASE+8]
2400 | fistp ARG3
2401 | mov RA, ARG3
2402 |.endif 2284 |.endif
2403 | mov RC, TMP2 2285 | mov RC, TMP2
2404 | cmp RB, RC // len < end? (unsigned compare) 2286 | cmp RB, RC // len < end? (unsigned compare)
@@ -2442,123 +2324,27 @@ static void build_subroutines(BuildCtx *ctx)
2442 | xor RC, RC // Zero length. Any ptr in RB is ok. 2324 | xor RC, RC // Zero length. Any ptr in RB is ok.
2443 | jmp <4 2325 | jmp <4
2444 | 2326 |
2445 |.ffunc string_rep // Only handle the 1-char case inline. 2327 |.macro ffstring_op, name
2446 | ffgccheck 2328 | .ffunc_1 string_ .. name
2447 | cmp NARGS:RD, 2+1; jne ->fff_fallback // Exactly 2 arguments.
2448 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2449 | cmp dword [BASE+12], LJ_TISNUM
2450 | mov STR:RB, [BASE]
2451 |.if DUALNUM
2452 | jne ->fff_fallback
2453 | mov RC, dword [BASE+8]
2454 |.elif SSE
2455 | jae ->fff_fallback
2456 | cvttsd2si RC, qword [BASE+8]
2457 |.else
2458 | jae ->fff_fallback
2459 | fld qword [BASE+8]
2460 | fistp TMP2
2461 | mov RC, TMP2
2462 |.endif
2463 | test RC, RC
2464 | jle ->fff_emptystr // Count <= 0? (or non-int)
2465 | cmp dword STR:RB->len, 1
2466 | jb ->fff_emptystr // Zero length string?
2467 | jne ->fff_fallback_2 // Fallback for > 1-char strings.
2468 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_2
2469 | movzx RA, byte STR:RB[1]
2470 | mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2471 |.if X64
2472 | mov TMP3, RC
2473 |.else
2474 | mov ARG3, RC
2475 |.endif
2476 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
2477 | mov [RB], RAL
2478 | add RB, 1
2479 | sub RC, 1
2480 | jnz <1
2481 | mov RD, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2482 | jmp ->fff_newstr
2483 |
2484 |.ffunc_1 string_reverse
2485 | ffgccheck 2329 | ffgccheck
2486 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2330 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2487 | mov STR:RB, [BASE] 2331 | mov L:RB, SAVE_L
2488 | mov RC, STR:RB->len 2332 | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
2489 | test RC, RC 2333 | mov L:RB->base, BASE
2490 | jz ->fff_emptystr // Zero length string? 2334 | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE
2491 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 2335 | mov RC, SBUF:FCARG1->b
2492 | add RB, #STR 2336 | mov SBUF:FCARG1->L, L:RB
2493 | mov TMP2, PC // Need another temp register. 2337 | mov SBUF:FCARG1->p, RC
2494 |.if X64 2338 | mov SAVE_PC, PC
2495 | mov TMP3, RC 2339 | call extern lj_buf_putstr_ .. name .. @8
2496 |.else 2340 | mov FCARG1, eax
2497 | mov ARG3, RC 2341 | call extern lj_buf_tostr@4
2498 |.endif 2342 | jmp ->fff_resstr
2499 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2500 |1:
2501 | movzx RA, byte [RB]
2502 | add RB, 1
2503 | sub RC, 1
2504 | mov [PC+RC], RAL
2505 | jnz <1
2506 | mov RD, PC
2507 | mov PC, TMP2
2508 | jmp ->fff_newstr
2509 |
2510 |.macro ffstring_case, name, lo, hi
2511 | .ffunc_1 name
2512 | ffgccheck
2513 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2514 | mov STR:RB, [BASE]
2515 | mov RC, STR:RB->len
2516 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
2517 | add RB, #STR
2518 | mov TMP2, PC // Need another temp register.
2519 |.if X64
2520 | mov TMP3, RC
2521 |.else
2522 | mov ARG3, RC
2523 |.endif
2524 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2525 | jmp >3
2526 |1: // ASCII case conversion. Yes, this is suboptimal code (do you care?).
2527 | movzx RA, byte [RB+RC]
2528 | cmp RA, lo
2529 | jb >2
2530 | cmp RA, hi
2531 | ja >2
2532 | xor RA, 0x20
2533 |2:
2534 | mov [PC+RC], RAL
2535 |3:
2536 | sub RC, 1
2537 | jns <1
2538 | mov RD, PC
2539 | mov PC, TMP2
2540 | jmp ->fff_newstr
2541 |.endmacro 2343 |.endmacro
2542 | 2344 |
2543 |ffstring_case string_lower, 0x41, 0x5a 2345 |ffstring_op reverse
2544 |ffstring_case string_upper, 0x61, 0x7a 2346 |ffstring_op lower
2545 | 2347 |ffstring_op upper
2546 |//-- Table library ------------------------------------------------------
2547 |
2548 |.ffunc_1 table_getn
2549 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
2550 | mov RB, BASE // Save BASE.
2551 | mov TAB:FCARG1, [BASE]
2552 | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t)
2553 | // Length of table returned in eax (RD).
2554 | mov BASE, RB // Restore BASE.
2555 |.if DUALNUM
2556 | mov RB, RD; jmp ->fff_resi
2557 |.elif SSE
2558 | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0
2559 |.else
2560 | mov ARG1, RD; fild ARG1; jmp ->fff_resn
2561 |.endif
2562 | 2348 |
2563 |//-- Bit library -------------------------------------------------------- 2349 |//-- Bit library --------------------------------------------------------
2564 | 2350 |
@@ -2567,11 +2353,7 @@ static void build_subroutines(BuildCtx *ctx)
2567 |.macro .ffunc_bit, name, kind 2353 |.macro .ffunc_bit, name, kind
2568 | .ffunc_1 name 2354 | .ffunc_1 name
2569 |.if kind == 2 2355 |.if kind == 2
2570 |.if SSE
2571 | sseconst_tobit xmm1, RBa 2356 | sseconst_tobit xmm1, RBa
2572 |.else
2573 | mov TMP1, TOBIT_BIAS
2574 |.endif
2575 |.endif 2357 |.endif
2576 | cmp dword [BASE+4], LJ_TISNUM 2358 | cmp dword [BASE+4], LJ_TISNUM
2577 |.if DUALNUM 2359 |.if DUALNUM
@@ -2587,37 +2369,17 @@ static void build_subroutines(BuildCtx *ctx)
2587 |.else 2369 |.else
2588 | jae ->fff_fallback 2370 | jae ->fff_fallback
2589 |.endif 2371 |.endif
2590 |.if SSE
2591 | movsd xmm0, qword [BASE] 2372 | movsd xmm0, qword [BASE]
2592 |.if kind < 2 2373 |.if kind < 2
2593 | sseconst_tobit xmm1, RBa 2374 | sseconst_tobit xmm1, RBa
2594 |.endif 2375 |.endif
2595 | addsd xmm0, xmm1 2376 | addsd xmm0, xmm1
2596 | movd RB, xmm0 2377 | movd RB, xmm0
2597 |.else
2598 | fld qword [BASE]
2599 |.if kind < 2
2600 | mov TMP1, TOBIT_BIAS
2601 |.endif
2602 | fadd TMP1
2603 | fstp FPARG1
2604 |.if kind > 0
2605 | mov RB, ARG1
2606 |.endif
2607 |.endif
2608 |2: 2378 |2:
2609 |.endmacro 2379 |.endmacro
2610 | 2380 |
2611 |.ffunc_bit bit_tobit, 0 2381 |.ffunc_bit bit_tobit, 0
2612 |.if DUALNUM or SSE
2613 |.if not SSE
2614 | mov RB, ARG1
2615 |.endif
2616 | jmp ->fff_resbit 2382 | jmp ->fff_resbit
2617 |.else
2618 | fild ARG1
2619 | jmp ->fff_resn
2620 |.endif
2621 | 2383 |
2622 |.macro .ffunc_bit_op, name, ins 2384 |.macro .ffunc_bit_op, name, ins
2623 | .ffunc_bit name, 2 2385 | .ffunc_bit name, 2
@@ -2637,17 +2399,10 @@ static void build_subroutines(BuildCtx *ctx)
2637 |.else 2399 |.else
2638 | jae ->fff_fallback_bit_op 2400 | jae ->fff_fallback_bit_op
2639 |.endif 2401 |.endif
2640 |.if SSE
2641 | movsd xmm0, qword [RD] 2402 | movsd xmm0, qword [RD]
2642 | addsd xmm0, xmm1 2403 | addsd xmm0, xmm1
2643 | movd RA, xmm0 2404 | movd RA, xmm0
2644 | ins RB, RA 2405 | ins RB, RA
2645 |.else
2646 | fld qword [RD]
2647 | fadd TMP1
2648 | fstp FPARG1
2649 | ins RB, ARG1
2650 |.endif
2651 | sub RD, 8 2406 | sub RD, 8
2652 | jmp <1 2407 | jmp <1
2653 |.endmacro 2408 |.endmacro
@@ -2664,15 +2419,10 @@ static void build_subroutines(BuildCtx *ctx)
2664 | not RB 2419 | not RB
2665 |.if DUALNUM 2420 |.if DUALNUM
2666 | jmp ->fff_resbit 2421 | jmp ->fff_resbit
2667 |.elif SSE 2422 |.else
2668 |->fff_resbit: 2423 |->fff_resbit:
2669 | cvtsi2sd xmm0, RB 2424 | cvtsi2sd xmm0, RB
2670 | jmp ->fff_resxmm0 2425 | jmp ->fff_resxmm0
2671 |.else
2672 |->fff_resbit:
2673 | mov ARG1, RB
2674 | fild ARG1
2675 | jmp ->fff_resn
2676 |.endif 2426 |.endif
2677 | 2427 |
2678 |->fff_fallback_bit_op: 2428 |->fff_fallback_bit_op:
@@ -2685,22 +2435,13 @@ static void build_subroutines(BuildCtx *ctx)
2685 | // Note: no inline conversion from number for 2nd argument! 2435 | // Note: no inline conversion from number for 2nd argument!
2686 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback 2436 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback
2687 | mov RA, dword [BASE+8] 2437 | mov RA, dword [BASE+8]
2688 |.elif SSE 2438 |.else
2689 | .ffunc_nnsse name 2439 | .ffunc_nnsse name
2690 | sseconst_tobit xmm2, RBa 2440 | sseconst_tobit xmm2, RBa
2691 | addsd xmm0, xmm2 2441 | addsd xmm0, xmm2
2692 | addsd xmm1, xmm2 2442 | addsd xmm1, xmm2
2693 | movd RB, xmm0 2443 | movd RB, xmm0
2694 | movd RA, xmm1 2444 | movd RA, xmm1
2695 |.else
2696 | .ffunc_nn name
2697 | mov TMP1, TOBIT_BIAS
2698 | fadd TMP1
2699 | fstp FPARG3
2700 | fadd TMP1
2701 | fstp FPARG1
2702 | mov RA, ARG3
2703 | mov RB, ARG1
2704 |.endif 2445 |.endif
2705 | ins RB, cl // Assumes RA is ecx. 2446 | ins RB, cl // Assumes RA is ecx.
2706 | jmp ->fff_resbit 2447 | jmp ->fff_resbit
@@ -2834,7 +2575,7 @@ static void build_subroutines(BuildCtx *ctx)
2834 | mov FCARG2, PC // Caveat: FCARG2 == BASE 2575 | mov FCARG2, PC // Caveat: FCARG2 == BASE
2835 | mov FCARG1, L:RB 2576 | mov FCARG1, L:RB
2836 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. 2577 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
2837 | call extern lj_dispatch_ins@8 // (lua_State *L, BCIns *pc) 2578 | call extern lj_dispatch_ins@8 // (lua_State *L, const BCIns *pc)
2838 |3: 2579 |3:
2839 | mov BASE, L:RB->base 2580 | mov BASE, L:RB->base
2840 |4: 2581 |4:
@@ -2905,6 +2646,82 @@ static void build_subroutines(BuildCtx *ctx)
2905 | add NARGS:RD, 1 2646 | add NARGS:RD, 1
2906 | jmp RBa 2647 | jmp RBa
2907 | 2648 |
2649 |->cont_stitch: // Trace stitching.
2650 |.if JIT
2651 | // BASE = base, RC = result, RB = mbase
2652 | mov RA, [RB-24] // Save previous trace number.
2653 | mov TMP1, RA
2654 | mov TMP3, DISPATCH // Need one more register.
2655 | mov DISPATCH, MULTRES
2656 | movzx RA, PC_RA
2657 | lea RA, [BASE+RA*8] // Call base.
2658 | sub DISPATCH, 1
2659 | jz >2
2660 |1: // Move results down.
2661 |.if X64
2662 | mov RBa, [RC]
2663 | mov [RA], RBa
2664 |.else
2665 | mov RB, [RC]
2666 | mov [RA], RB
2667 | mov RB, [RC+4]
2668 | mov [RA+4], RB
2669 |.endif
2670 | add RC, 8
2671 | add RA, 8
2672 | sub DISPATCH, 1
2673 | jnz <1
2674 |2:
2675 | movzx RC, PC_RA
2676 | movzx RB, PC_RB
2677 | add RC, RB
2678 | lea RC, [BASE+RC*8-8]
2679 |3:
2680 | cmp RC, RA
2681 | ja >9 // More results wanted?
2682 |
2683 | mov DISPATCH, TMP3
2684 | mov RB, TMP1 // Get previous trace number.
2685 | mov RA, [DISPATCH+DISPATCH_J(trace)]
2686 | mov TRACE:RD, [RA+RB*4]
2687 | test TRACE:RD, TRACE:RD
2688 | jz ->cont_nop
2689 | movzx RD, word TRACE:RD->link
2690 | cmp RD, RB
2691 | je ->cont_nop // Blacklisted.
2692 | test RD, RD
2693 | jne =>BC_JLOOP // Jump to stitched trace.
2694 |
2695 | // Stitch a new trace to the previous trace.
2696 | mov [DISPATCH+DISPATCH_J(exitno)], RB
2697 | mov L:RB, SAVE_L
2698 | mov L:RB->base, BASE
2699 | mov FCARG2, PC
2700 | lea FCARG1, [DISPATCH+GG_DISP2J]
2701 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
2702 | call extern lj_dispatch_stitch@8 // (jit_State *J, const BCIns *pc)
2703 | mov BASE, L:RB->base
2704 | jmp ->cont_nop
2705 |
2706 |9: // Fill up results with nil.
2707 | mov dword [RA+4], LJ_TNIL
2708 | add RA, 8
2709 | jmp <3
2710 |.endif
2711 |
2712 |->vm_profhook: // Dispatch target for profiler hook.
2713#if LJ_HASPROFILE
2714 | mov L:RB, SAVE_L
2715 | mov L:RB->base, BASE
2716 | mov FCARG2, PC // Caveat: FCARG2 == BASE
2717 | mov FCARG1, L:RB
2718 | call extern lj_dispatch_profile@8 // (lua_State *L, const BCIns *pc)
2719 | mov BASE, L:RB->base
2720 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2721 | sub PC, 4
2722 | jmp ->cont_nop
2723#endif
2724 |
2908 |//----------------------------------------------------------------------- 2725 |//-----------------------------------------------------------------------
2909 |//-- Trace exit handler ------------------------------------------------- 2726 |//-- Trace exit handler -------------------------------------------------
2910 |//----------------------------------------------------------------------- 2727 |//-----------------------------------------------------------------------
@@ -2957,10 +2774,9 @@ static void build_subroutines(BuildCtx *ctx)
2957 | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0 2774 | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0
2958 |.endif 2775 |.endif
2959 | // Caveat: RB is ebp. 2776 | // Caveat: RB is ebp.
2960 | mov L:RB, [DISPATCH+DISPATCH_GL(jit_L)] 2777 | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)]
2961 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] 2778 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
2962 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa 2779 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
2963 | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0
2964 | mov L:RB->base, BASE 2780 | mov L:RB->base, BASE
2965 |.if X64WIN 2781 |.if X64WIN
2966 | lea CARG2, [rsp+4*8] 2782 | lea CARG2, [rsp+4*8]
@@ -2970,6 +2786,7 @@ static void build_subroutines(BuildCtx *ctx)
2970 | lea FCARG2, [esp+16] 2786 | lea FCARG2, [esp+16]
2971 |.endif 2787 |.endif
2972 | lea FCARG1, [DISPATCH+GG_DISP2J] 2788 | lea FCARG1, [DISPATCH+GG_DISP2J]
2789 | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
2973 | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex) 2790 | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex)
2974 | // MULTRES or negated error code returned in eax (RD). 2791 | // MULTRES or negated error code returned in eax (RD).
2975 | mov RAa, L:RB->cframe 2792 | mov RAa, L:RB->cframe
@@ -3016,12 +2833,14 @@ static void build_subroutines(BuildCtx *ctx)
3016 | mov r13, TMPa 2833 | mov r13, TMPa
3017 | mov r12, TMPQ 2834 | mov r12, TMPQ
3018 |.endif 2835 |.endif
3019 | test RD, RD; js >3 // Check for error from exit. 2836 | test RD, RD; js >9 // Check for error from exit.
2837 | mov L:RB, SAVE_L
3020 | mov MULTRES, RD 2838 | mov MULTRES, RD
3021 | mov LFUNC:KBASE, [BASE-8] 2839 | mov LFUNC:KBASE, [BASE-8]
3022 | mov KBASE, LFUNC:KBASE->pc 2840 | mov KBASE, LFUNC:KBASE->pc
3023 | mov KBASE, [KBASE+PC2PROTO(k)] 2841 | mov KBASE, [KBASE+PC2PROTO(k)]
3024 | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0 2842 | mov L:RB->base, BASE
2843 | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
3025 | set_vmstate INTERP 2844 | set_vmstate INTERP
3026 | // Modified copy of ins_next which handles function header dispatch, too. 2845 | // Modified copy of ins_next which handles function header dispatch, too.
3027 | mov RC, [PC] 2846 | mov RC, [PC]
@@ -3030,16 +2849,31 @@ static void build_subroutines(BuildCtx *ctx)
3030 | add PC, 4 2849 | add PC, 4
3031 | shr RC, 16 2850 | shr RC, 16
3032 | cmp OP, BC_FUNCF // Function header? 2851 | cmp OP, BC_FUNCF // Function header?
3033 | jb >2 2852 | jb >3
3034 | mov RC, MULTRES // RC/RD holds nres+1. 2853 | cmp OP, BC_FUNCC+2 // Fast function?
2854 | jae >4
3035 |2: 2855 |2:
2856 | mov RC, MULTRES // RC/RD holds nres+1.
2857 |3:
3036 |.if X64 2858 |.if X64
3037 | jmp aword [DISPATCH+OP*8] 2859 | jmp aword [DISPATCH+OP*8]
3038 |.else 2860 |.else
3039 | jmp aword [DISPATCH+OP*4] 2861 | jmp aword [DISPATCH+OP*4]
3040 |.endif 2862 |.endif
3041 | 2863 |
3042 |3: // Rethrow error from the right C frame. 2864 |4: // Check frame below fast function.
2865 | mov RC, [BASE-4]
2866 | test RC, FRAME_TYPE
2867 | jnz <2 // Trace stitching continuation?
2868 | // Otherwise set KBASE for Lua function below fast function.
2869 | movzx RC, byte [RC-3]
2870 | not RCa
2871 | mov LFUNC:KBASE, [BASE+RC*8-8]
2872 | mov KBASE, LFUNC:KBASE->pc
2873 | mov KBASE, [KBASE+PC2PROTO(k)]
2874 | jmp <2
2875 |
2876 |9: // Rethrow error from the right C frame.
3043 | neg RD 2877 | neg RD
3044 | mov FCARG1, L:RB 2878 | mov FCARG1, L:RB
3045 | mov FCARG2, RD 2879 | mov FCARG2, RD
@@ -3051,27 +2885,18 @@ static void build_subroutines(BuildCtx *ctx)
3051 |//----------------------------------------------------------------------- 2885 |//-----------------------------------------------------------------------
3052 | 2886 |
3053 |// FP value rounding. Called by math.floor/math.ceil fast functions 2887 |// FP value rounding. Called by math.floor/math.ceil fast functions
3054 |// and from JIT code. 2888 |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
3055 | 2889 |.macro vm_round, name, mode, cond
3056 |// x87 variant: Arg/ret on x87 stack. No int/xmm registers modified. 2890 |->name:
3057 |.macro vm_round_x87, mode1, mode2 2891 |.if not X64 and cond
3058 | fnstcw word [esp+4] // Caveat: overwrites ARG1 and ARG2. 2892 | movsd xmm0, qword [esp+4]
3059 | mov [esp+8], eax 2893 | call ->name .. _sse
3060 | mov ax, mode1 2894 | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg.
3061 | or ax, [esp+4] 2895 | fld qword [esp+4]
3062 |.if mode2 ~= 0xffff
3063 | and ax, mode2
3064 |.endif
3065 | mov [esp+6], ax
3066 | fldcw word [esp+6]
3067 | frndint
3068 | fldcw word [esp+4]
3069 | mov eax, [esp+8]
3070 | ret 2896 | ret
3071 |.endmacro 2897 |.endif
3072 | 2898 |
3073 |// SSE variant: arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. 2899 |->name .. _sse:
3074 |.macro vm_round_sse, mode
3075 | sseconst_abs xmm2, RDa 2900 | sseconst_abs xmm2, RDa
3076 | sseconst_2p52 xmm3, RDa 2901 | sseconst_2p52 xmm3, RDa
3077 | movaps xmm1, xmm0 2902 | movaps xmm1, xmm0
@@ -3107,22 +2932,12 @@ static void build_subroutines(BuildCtx *ctx)
3107 | ret 2932 | ret
3108 |.endmacro 2933 |.endmacro
3109 | 2934 |
3110 |.macro vm_round, name, ssemode, mode1, mode2 2935 | vm_round vm_floor, 0, 1
3111 |->name: 2936 | vm_round vm_ceil, 1, JIT
3112 |.if not SSE 2937 | vm_round vm_trunc, 2, JIT
3113 | vm_round_x87 mode1, mode2
3114 |.endif
3115 |->name .. _sse:
3116 | vm_round_sse ssemode
3117 |.endmacro
3118 |
3119 | vm_round vm_floor, 0, 0x0400, 0xf7ff
3120 | vm_round vm_ceil, 1, 0x0800, 0xfbff
3121 | vm_round vm_trunc, 2, 0x0c00, 0xffff
3122 | 2938 |
3123 |// FP modulo x%y. Called by BC_MOD* and vm_arith. 2939 |// FP modulo x%y. Called by BC_MOD* and vm_arith.
3124 |->vm_mod: 2940 |->vm_mod:
3125 |.if SSE
3126 |// Args in xmm0/xmm1, return value in xmm0. 2941 |// Args in xmm0/xmm1, return value in xmm0.
3127 |// Caveat: xmm0-xmm5 and RC (eax) modified! 2942 |// Caveat: xmm0-xmm5 and RC (eax) modified!
3128 | movaps xmm5, xmm0 2943 | movaps xmm5, xmm0
@@ -3150,172 +2965,6 @@ static void build_subroutines(BuildCtx *ctx)
3150 | movaps xmm0, xmm5 2965 | movaps xmm0, xmm5
3151 | subsd xmm0, xmm1 2966 | subsd xmm0, xmm1
3152 | ret 2967 | ret
3153 |.else
3154 |// Args/ret on x87 stack (y on top). No xmm registers modified.
3155 |// Caveat: needs 3 slots on x87 stack! RC (eax) modified!
3156 | fld st1
3157 | fdiv st1
3158 | fnstcw word [esp+4]
3159 | mov ax, 0x0400
3160 | or ax, [esp+4]
3161 | and ax, 0xf7ff
3162 | mov [esp+6], ax
3163 | fldcw word [esp+6]
3164 | frndint
3165 | fldcw word [esp+4]
3166 | fmulp st1
3167 | fsubp st1
3168 | ret
3169 |.endif
3170 |
3171 |// FP log2(x). Called by math.log(x, base).
3172 |->vm_log2:
3173 |.if X64WIN
3174 | movsd qword [rsp+8], xmm0 // Use scratch area.
3175 | fld1
3176 | fld qword [rsp+8]
3177 | fyl2x
3178 | fstp qword [rsp+8]
3179 | movsd xmm0, qword [rsp+8]
3180 |.elif X64
3181 | movsd qword [rsp-8], xmm0 // Use red zone.
3182 | fld1
3183 | fld qword [rsp-8]
3184 | fyl2x
3185 | fstp qword [rsp-8]
3186 | movsd xmm0, qword [rsp-8]
3187 |.else
3188 | fld1
3189 | fld qword [esp+4]
3190 | fyl2x
3191 |.endif
3192 | ret
3193 |
3194 |// FP exponentiation e^x and 2^x. Called by math.exp fast function and
3195 |// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified.
3196 |// Caveat: needs 3 slots on x87 stack!
3197 |->vm_exp_x87:
3198 | fldl2e; fmulp st1 // e^x ==> 2^(x*log2(e))
3199 |->vm_exp2_x87:
3200 | .if X64WIN
3201 | .define expscratch, dword [rsp+8] // Use scratch area.
3202 | .elif X64
3203 | .define expscratch, dword [rsp-8] // Use red zone.
3204 | .else
3205 | .define expscratch, dword [esp+4] // Needs 4 byte scratch area.
3206 | .endif
3207 | fst expscratch // Caveat: overwrites ARG1.
3208 | cmp expscratch, 0x7f800000; je >1 // Special case: e^+Inf = +Inf
3209 | cmp expscratch, 0xff800000; je >2 // Special case: e^-Inf = 0
3210 |->vm_exp2raw: // Entry point for vm_pow. Without +-Inf check.
3211 | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
3212 | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
3213 |1:
3214 | ret
3215 |2:
3216 | fpop; fldz; ret
3217 |
3218 |// Generic power function x^y. Called by BC_POW, math.pow fast function,
3219 |// and vm_arith.
3220 |// Args/ret on x87 stack (y on top). RC (eax) modified.
3221 |// Caveat: needs 3 slots on x87 stack!
3222 |->vm_pow:
3223 |.if not SSE
3224 | fist dword [esp+4] // Store/reload int before comparison.
3225 | fild dword [esp+4] // Integral exponent used in vm_powi.
3226 | fucomip st1
3227 | jnz >8 // Branch for FP exponents.
3228 | jp >9 // Branch for NaN exponent.
3229 | fpop // Pop y and fallthrough to vm_powi.
3230 |
3231 |// FP/int power function x^i. Arg1/ret on x87 stack.
3232 |// Arg2 (int) on C stack. RC (eax) modified.
3233 |// Caveat: needs 2 slots on x87 stack!
3234 | mov eax, [esp+4]
3235 | cmp eax, 1; jle >6 // i<=1?
3236 | // Now 1 < (unsigned)i <= 0x80000000.
3237 |1: // Handle leading zeros.
3238 | test eax, 1; jnz >2
3239 | fmul st0
3240 | shr eax, 1
3241 | jmp <1
3242 |2:
3243 | shr eax, 1; jz >5
3244 | fdup
3245 |3: // Handle trailing bits.
3246 | fmul st0
3247 | shr eax, 1; jz >4
3248 | jnc <3
3249 | fmul st1, st0
3250 | jmp <3
3251 |4:
3252 | fmulp st1
3253 |5:
3254 | ret
3255 |6:
3256 | je <5 // x^1 ==> x
3257 | jb >7
3258 | fld1; fdivrp st1
3259 | neg eax
3260 | cmp eax, 1; je <5 // x^-1 ==> 1/x
3261 | jmp <1 // x^-i ==> (1/x)^i
3262 |7:
3263 | fpop; fld1 // x^0 ==> 1
3264 | ret
3265 |
3266 |8: // FP/FP power function x^y.
3267 | fst dword [esp+4]
3268 | fxch
3269 | fst dword [esp+8]
3270 | mov eax, [esp+4]; shl eax, 1
3271 | cmp eax, 0xff000000; je >2 // x^+-Inf?
3272 | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
3273 | cmp eax, 0xff000000; je >4 // +-Inf^y?
3274 | fyl2x
3275 | jmp ->vm_exp2raw
3276 |
3277 |9: // Handle x^NaN.
3278 | fld1
3279 | fucomip st2
3280 | je >1 // 1^NaN ==> 1
3281 | fxch // x^NaN ==> NaN
3282 |1:
3283 | fpop
3284 | ret
3285 |
3286 |2: // Handle x^+-Inf.
3287 | fabs
3288 | fld1
3289 | fucomip st1
3290 | je >3 // +-1^+-Inf ==> 1
3291 | fpop; fabs; fldz; mov eax, 0; setc al
3292 | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0
3293 | fxch
3294 |3:
3295 | fpop1; fabs
3296 | ret
3297 |
3298 |4: // Handle +-0^y or +-Inf^y.
3299 | cmp dword [esp+4], 0; jge <3 // y >= 0, x^y ==> |x|
3300 | fpop; fpop
3301 | test eax, eax; jz >5 // y < 0, +-0^y ==> +Inf
3302 | fldz // y < 0, +-Inf^y ==> 0
3303 | ret
3304 |5:
3305 | mov dword [esp+4], 0x7f800000 // Return +Inf.
3306 | fld dword [esp+4]
3307 | ret
3308 |.endif
3309 |
3310 |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified.
3311 |// Needs 16 byte scratch area for x86. Also called from JIT code.
3312 |->vm_pow_sse:
3313 | cvtsd2si eax, xmm1
3314 | cvtsi2sd xmm2, eax
3315 | ucomisd xmm1, xmm2
3316 | jnz >8 // Branch for FP exponents.
3317 | jp >9 // Branch for NaN exponent.
3318 | // Fallthrough to vm_powi_sse.
3319 | 2968 |
3320 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. 2969 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
3321 |->vm_powi_sse: 2970 |->vm_powi_sse:
@@ -3352,287 +3001,6 @@ static void build_subroutines(BuildCtx *ctx)
3352 | sseconst_1 xmm0, RDa 3001 | sseconst_1 xmm0, RDa
3353 | ret 3002 | ret
3354 | 3003 |
3355 |8: // FP/FP power function x^y.
3356 |.if X64
3357 | movd rax, xmm1; shl rax, 1
3358 | rol rax, 12; cmp rax, 0xffe; je >2 // x^+-Inf?
3359 | movd rax, xmm0; shl rax, 1; je >4 // +-0^y?
3360 | rol rax, 12; cmp rax, 0xffe; je >5 // +-Inf^y?
3361 | .if X64WIN
3362 | movsd qword [rsp+16], xmm1 // Use scratch area.
3363 | movsd qword [rsp+8], xmm0
3364 | fld qword [rsp+16]
3365 | fld qword [rsp+8]
3366 | .else
3367 | movsd qword [rsp-16], xmm1 // Use red zone.
3368 | movsd qword [rsp-8], xmm0
3369 | fld qword [rsp-16]
3370 | fld qword [rsp-8]
3371 | .endif
3372 |.else
3373 | movsd qword [esp+12], xmm1 // Needs 16 byte scratch area.
3374 | movsd qword [esp+4], xmm0
3375 | cmp dword [esp+12], 0; jne >1
3376 | mov eax, [esp+16]; shl eax, 1
3377 | cmp eax, 0xffe00000; je >2 // x^+-Inf?
3378 |1:
3379 | cmp dword [esp+4], 0; jne >1
3380 | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
3381 | cmp eax, 0xffe00000; je >5 // +-Inf^y?
3382 |1:
3383 | fld qword [esp+12]
3384 | fld qword [esp+4]
3385 |.endif
3386 | fyl2x // y*log2(x)
3387 | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
3388 | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
3389 |.if X64WIN
3390 | fstp qword [rsp+8] // Use scratch area.
3391 | movsd xmm0, qword [rsp+8]
3392 |.elif X64
3393 | fstp qword [rsp-8] // Use red zone.
3394 | movsd xmm0, qword [rsp-8]
3395 |.else
3396 | fstp qword [esp+4] // Needs 8 byte scratch area.
3397 | movsd xmm0, qword [esp+4]
3398 |.endif
3399 | ret
3400 |
3401 |9: // Handle x^NaN.
3402 | sseconst_1 xmm2, RDa
3403 | ucomisd xmm0, xmm2; je >1 // 1^NaN ==> 1
3404 | movaps xmm0, xmm1 // x^NaN ==> NaN
3405 |1:
3406 | ret
3407 |
3408 |2: // Handle x^+-Inf.
3409 | sseconst_abs xmm2, RDa
3410 | andpd xmm0, xmm2 // |x|
3411 | sseconst_1 xmm2, RDa
3412 | ucomisd xmm0, xmm2; je <1 // +-1^+-Inf ==> 1
3413 | movmskpd eax, xmm1
3414 | xorps xmm0, xmm0
3415 | mov ah, al; setc al; xor al, ah; jne <1 // |x|<>1, x^+-Inf ==> +Inf/0
3416 |3:
3417 | sseconst_hi xmm0, RDa, 7ff00000 // +Inf
3418 | ret
3419 |
3420 |4: // Handle +-0^y.
3421 | movmskpd eax, xmm1; test eax, eax; jnz <3 // y < 0, +-0^y ==> +Inf
3422 | xorps xmm0, xmm0 // y >= 0, +-0^y ==> 0
3423 | ret
3424 |
3425 |5: // Handle +-Inf^y.
3426 | movmskpd eax, xmm1; test eax, eax; jz <3 // y >= 0, +-Inf^y ==> +Inf
3427 | xorps xmm0, xmm0 // y < 0, +-Inf^y ==> 0
3428 | ret
3429 |
3430 |// Callable from C: double lj_vm_foldfpm(double x, int fpm)
3431 |// Computes fpm(x) for extended math functions. ORDER FPM.
3432 |->vm_foldfpm:
3433 |.if JIT
3434 |.if X64
3435 | .if X64WIN
3436 | .define fpmop, CARG2d
3437 | .else
3438 | .define fpmop, CARG1d
3439 | .endif
3440 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
3441 | cmp fpmop, 3; jb ->vm_trunc; ja >2
3442 | sqrtsd xmm0, xmm0; ret
3443 |2:
3444 | .if X64WIN
3445 | movsd qword [rsp+8], xmm0 // Use scratch area.
3446 | fld qword [rsp+8]
3447 | .else
3448 | movsd qword [rsp-8], xmm0 // Use red zone.
3449 | fld qword [rsp-8]
3450 | .endif
3451 | cmp fpmop, 5; ja >2
3452 | .if X64WIN; pop rax; .endif
3453 | je >1
3454 | call ->vm_exp_x87
3455 | .if X64WIN; push rax; .endif
3456 | jmp >7
3457 |1:
3458 | call ->vm_exp2_x87
3459 | .if X64WIN; push rax; .endif
3460 | jmp >7
3461 |2: ; cmp fpmop, 7; je >1; ja >2
3462 | fldln2; fxch; fyl2x; jmp >7
3463 |1: ; fld1; fxch; fyl2x; jmp >7
3464 |2: ; cmp fpmop, 9; je >1; ja >2
3465 | fldlg2; fxch; fyl2x; jmp >7
3466 |1: ; fsin; jmp >7
3467 |2: ; cmp fpmop, 11; je >1; ja >9
3468 | fcos; jmp >7
3469 |1: ; fptan; fpop
3470 |7:
3471 | .if X64WIN
3472 | fstp qword [rsp+8] // Use scratch area.
3473 | movsd xmm0, qword [rsp+8]
3474 | .else
3475 | fstp qword [rsp-8] // Use red zone.
3476 | movsd xmm0, qword [rsp-8]
3477 | .endif
3478 | ret
3479 |.else // x86 calling convention.
3480 | .define fpmop, eax
3481 |.if SSE
3482 | mov fpmop, [esp+12]
3483 | movsd xmm0, qword [esp+4]
3484 | cmp fpmop, 1; je >1; ja >2
3485 | call ->vm_floor; jmp >7
3486 |1: ; call ->vm_ceil; jmp >7
3487 |2: ; cmp fpmop, 3; je >1; ja >2
3488 | call ->vm_trunc; jmp >7
3489 |1:
3490 | sqrtsd xmm0, xmm0
3491 |7:
3492 | movsd qword [esp+4], xmm0 // Overwrite callee-owned args.
3493 | fld qword [esp+4]
3494 | ret
3495 |2: ; fld qword [esp+4]
3496 | cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87
3497 |2: ; cmp fpmop, 7; je >1; ja >2
3498 | fldln2; fxch; fyl2x; ret
3499 |1: ; fld1; fxch; fyl2x; ret
3500 |2: ; cmp fpmop, 9; je >1; ja >2
3501 | fldlg2; fxch; fyl2x; ret
3502 |1: ; fsin; ret
3503 |2: ; cmp fpmop, 11; je >1; ja >9
3504 | fcos; ret
3505 |1: ; fptan; fpop; ret
3506 |.else
3507 | mov fpmop, [esp+12]
3508 | fld qword [esp+4]
3509 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
3510 | cmp fpmop, 3; jb ->vm_trunc; ja >2
3511 | fsqrt; ret
3512 |2: ; cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87
3513 | cmp fpmop, 7; je >1; ja >2
3514 | fldln2; fxch; fyl2x; ret
3515 |1: ; fld1; fxch; fyl2x; ret
3516 |2: ; cmp fpmop, 9; je >1; ja >2
3517 | fldlg2; fxch; fyl2x; ret
3518 |1: ; fsin; ret
3519 |2: ; cmp fpmop, 11; je >1; ja >9
3520 | fcos; ret
3521 |1: ; fptan; fpop; ret
3522 |.endif
3523 |.endif
3524 |9: ; int3 // Bad fpm.
3525 |.endif
3526 |
3527 |// Callable from C: double lj_vm_foldarith(double x, double y, int op)
3528 |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -)
3529 |// and basic math functions. ORDER ARITH
3530 |->vm_foldarith:
3531 |.if X64
3532 |
3533 | .if X64WIN
3534 | .define foldop, CARG3d
3535 | .else
3536 | .define foldop, CARG1d
3537 | .endif
3538 | cmp foldop, 1; je >1; ja >2
3539 | addsd xmm0, xmm1; ret
3540 |1: ; subsd xmm0, xmm1; ret
3541 |2: ; cmp foldop, 3; je >1; ja >2
3542 | mulsd xmm0, xmm1; ret
3543 |1: ; divsd xmm0, xmm1; ret
3544 |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow
3545 | cmp foldop, 7; je >1; ja >2
3546 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret
3547 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret
3548 |2: ; cmp foldop, 9; ja >2
3549 |.if X64WIN
3550 | movsd qword [rsp+8], xmm0 // Use scratch area.
3551 | movsd qword [rsp+16], xmm1
3552 | fld qword [rsp+8]
3553 | fld qword [rsp+16]
3554 |.else
3555 | movsd qword [rsp-8], xmm0 // Use red zone.
3556 | movsd qword [rsp-16], xmm1
3557 | fld qword [rsp-8]
3558 | fld qword [rsp-16]
3559 |.endif
3560 | je >1
3561 | fpatan
3562 |7:
3563 |.if X64WIN
3564 | fstp qword [rsp+8] // Use scratch area.
3565 | movsd xmm0, qword [rsp+8]
3566 |.else
3567 | fstp qword [rsp-8] // Use red zone.
3568 | movsd xmm0, qword [rsp-8]
3569 |.endif
3570 | ret
3571 |1: ; fxch; fscale; fpop1; jmp <7
3572 |2: ; cmp foldop, 11; je >1; ja >9
3573 | minsd xmm0, xmm1; ret
3574 |1: ; maxsd xmm0, xmm1; ret
3575 |9: ; int3 // Bad op.
3576 |
3577 |.elif SSE // x86 calling convention with SSE ops.
3578 |
3579 | .define foldop, eax
3580 | mov foldop, [esp+20]
3581 | movsd xmm0, qword [esp+4]
3582 | movsd xmm1, qword [esp+12]
3583 | cmp foldop, 1; je >1; ja >2
3584 | addsd xmm0, xmm1
3585 |7:
3586 | movsd qword [esp+4], xmm0 // Overwrite callee-owned args.
3587 | fld qword [esp+4]
3588 | ret
3589 |1: ; subsd xmm0, xmm1; jmp <7
3590 |2: ; cmp foldop, 3; je >1; ja >2
3591 | mulsd xmm0, xmm1; jmp <7
3592 |1: ; divsd xmm0, xmm1; jmp <7
3593 |2: ; cmp foldop, 5
3594 | je >1; ja >2
3595 | call ->vm_mod; jmp <7
3596 |1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area.
3597 |2: ; cmp foldop, 7; je >1; ja >2
3598 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7
3599 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7
3600 |2: ; cmp foldop, 9; ja >2
3601 | fld qword [esp+4] // Reload from stack
3602 | fld qword [esp+12]
3603 | je >1
3604 | fpatan; ret
3605 |1: ; fxch; fscale; fpop1; ret
3606 |2: ; cmp foldop, 11; je >1; ja >9
3607 | minsd xmm0, xmm1; jmp <7
3608 |1: ; maxsd xmm0, xmm1; jmp <7
3609 |9: ; int3 // Bad op.
3610 |
3611 |.else // x86 calling convention with x87 ops.
3612 |
3613 | mov eax, [esp+20]
3614 | fld qword [esp+4]
3615 | fld qword [esp+12]
3616 | cmp eax, 1; je >1; ja >2
3617 | faddp st1; ret
3618 |1: ; fsubp st1; ret
3619 |2: ; cmp eax, 3; je >1; ja >2
3620 | fmulp st1; ret
3621 |1: ; fdivp st1; ret
3622 |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow
3623 | cmp eax, 7; je >1; ja >2
3624 | fpop; fchs; ret
3625 |1: ; fpop; fabs; ret
3626 |2: ; cmp eax, 9; je >1; ja >2
3627 | fpatan; ret
3628 |1: ; fxch; fscale; fpop1; ret
3629 |2: ; cmp eax, 11; je >1; ja >9
3630 | fucomi st1; fcmovnbe st1; fpop1; ret
3631 |1: ; fucomi st1; fcmovbe st1; fpop1; ret
3632 |9: ; int3 // Bad op.
3633 |
3634 |.endif
3635 |
3636 |//----------------------------------------------------------------------- 3004 |//-----------------------------------------------------------------------
3637 |//-- Miscellaneous functions -------------------------------------------- 3005 |//-- Miscellaneous functions --------------------------------------------
3638 |//----------------------------------------------------------------------- 3006 |//-----------------------------------------------------------------------
@@ -3943,19 +3311,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3943 | // RA is a number. 3311 | // RA is a number.
3944 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp 3312 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp
3945 | // RA is a number, RD is an integer. 3313 | // RA is a number, RD is an integer.
3946 |.if SSE
3947 | cvtsi2sd xmm0, dword [BASE+RD*8] 3314 | cvtsi2sd xmm0, dword [BASE+RD*8]
3948 | jmp >2 3315 | jmp >2
3949 |.else
3950 | fld qword [BASE+RA*8]
3951 | fild dword [BASE+RD*8]
3952 | jmp >3
3953 |.endif
3954 | 3316 |
3955 |8: // RA is an integer, RD is not an integer. 3317 |8: // RA is an integer, RD is not an integer.
3956 | ja ->vmeta_comp 3318 | ja ->vmeta_comp
3957 | // RA is an integer, RD is a number. 3319 | // RA is an integer, RD is a number.
3958 |.if SSE
3959 | cvtsi2sd xmm1, dword [BASE+RA*8] 3320 | cvtsi2sd xmm1, dword [BASE+RA*8]
3960 | movsd xmm0, qword [BASE+RD*8] 3321 | movsd xmm0, qword [BASE+RD*8]
3961 | add PC, 4 3322 | add PC, 4
@@ -3963,29 +3324,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3963 | jmp_comp jbe, ja, jb, jae, <9 3324 | jmp_comp jbe, ja, jb, jae, <9
3964 | jmp <6 3325 | jmp <6
3965 |.else 3326 |.else
3966 | fild dword [BASE+RA*8]
3967 | jmp >2
3968 |.endif
3969 |.else
3970 | checknum RA, ->vmeta_comp 3327 | checknum RA, ->vmeta_comp
3971 | checknum RD, ->vmeta_comp 3328 | checknum RD, ->vmeta_comp
3972 |.endif 3329 |.endif
3973 |.if SSE
3974 |1: 3330 |1:
3975 | movsd xmm0, qword [BASE+RD*8] 3331 | movsd xmm0, qword [BASE+RD*8]
3976 |2: 3332 |2:
3977 | add PC, 4 3333 | add PC, 4
3978 | ucomisd xmm0, qword [BASE+RA*8] 3334 | ucomisd xmm0, qword [BASE+RA*8]
3979 |3: 3335 |3:
3980 |.else
3981 |1:
3982 | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A.
3983 |2:
3984 | fld qword [BASE+RD*8]
3985 |3:
3986 | add PC, 4
3987 | fcomparepp
3988 |.endif
3989 | // Unordered: all of ZF CF PF set, ordered: PF clear. 3336 | // Unordered: all of ZF CF PF set, ordered: PF clear.
3990 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. 3337 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
3991 |.if DUALNUM 3338 |.if DUALNUM
@@ -4025,43 +3372,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4025 | // RD is a number. 3372 | // RD is a number.
4026 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 3373 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5
4027 | // RD is a number, RA is an integer. 3374 | // RD is a number, RA is an integer.
4028 |.if SSE
4029 | cvtsi2sd xmm0, dword [BASE+RA*8] 3375 | cvtsi2sd xmm0, dword [BASE+RA*8]
4030 |.else
4031 | fild dword [BASE+RA*8]
4032 |.endif
4033 | jmp >2 3376 | jmp >2
4034 | 3377 |
4035 |8: // RD is an integer, RA is not an integer. 3378 |8: // RD is an integer, RA is not an integer.
4036 | ja >5 3379 | ja >5
4037 | // RD is an integer, RA is a number. 3380 | // RD is an integer, RA is a number.
4038 |.if SSE
4039 | cvtsi2sd xmm0, dword [BASE+RD*8] 3381 | cvtsi2sd xmm0, dword [BASE+RD*8]
4040 | ucomisd xmm0, qword [BASE+RA*8] 3382 | ucomisd xmm0, qword [BASE+RA*8]
4041 |.else
4042 | fild dword [BASE+RD*8]
4043 | fld qword [BASE+RA*8]
4044 |.endif
4045 | jmp >4 3383 | jmp >4
4046 | 3384 |
4047 |.else 3385 |.else
4048 | cmp RB, LJ_TISNUM; jae >5 3386 | cmp RB, LJ_TISNUM; jae >5
4049 | checknum RA, >5 3387 | checknum RA, >5
4050 |.endif 3388 |.endif
4051 |.if SSE
4052 |1: 3389 |1:
4053 | movsd xmm0, qword [BASE+RA*8] 3390 | movsd xmm0, qword [BASE+RA*8]
4054 |2: 3391 |2:
4055 | ucomisd xmm0, qword [BASE+RD*8] 3392 | ucomisd xmm0, qword [BASE+RD*8]
4056 |4: 3393 |4:
4057 |.else
4058 |1:
4059 | fld qword [BASE+RA*8]
4060 |2:
4061 | fld qword [BASE+RD*8]
4062 |4:
4063 | fcomparepp
4064 |.endif
4065 iseqne_fp: 3394 iseqne_fp:
4066 if (vk) { 3395 if (vk) {
4067 | jp >2 // Unordered means not equal. 3396 | jp >2 // Unordered means not equal.
@@ -4184,39 +3513,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4184 | // RA is a number. 3513 | // RA is a number.
4185 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 3514 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1
4186 | // RA is a number, RD is an integer. 3515 | // RA is a number, RD is an integer.
4187 |.if SSE
4188 | cvtsi2sd xmm0, dword [KBASE+RD*8] 3516 | cvtsi2sd xmm0, dword [KBASE+RD*8]
4189 |.else
4190 | fild dword [KBASE+RD*8]
4191 |.endif
4192 | jmp >2 3517 | jmp >2
4193 | 3518 |
4194 |8: // RA is an integer, RD is a number. 3519 |8: // RA is an integer, RD is a number.
4195 |.if SSE
4196 | cvtsi2sd xmm0, dword [BASE+RA*8] 3520 | cvtsi2sd xmm0, dword [BASE+RA*8]
4197 | ucomisd xmm0, qword [KBASE+RD*8] 3521 | ucomisd xmm0, qword [KBASE+RD*8]
4198 |.else
4199 | fild dword [BASE+RA*8]
4200 | fld qword [KBASE+RD*8]
4201 |.endif
4202 | jmp >4 3522 | jmp >4
4203 |.else 3523 |.else
4204 | cmp RB, LJ_TISNUM; jae >3 3524 | cmp RB, LJ_TISNUM; jae >3
4205 |.endif 3525 |.endif
4206 |.if SSE
4207 |1: 3526 |1:
4208 | movsd xmm0, qword [KBASE+RD*8] 3527 | movsd xmm0, qword [KBASE+RD*8]
4209 |2: 3528 |2:
4210 | ucomisd xmm0, qword [BASE+RA*8] 3529 | ucomisd xmm0, qword [BASE+RA*8]
4211 |4: 3530 |4:
4212 |.else
4213 |1:
4214 | fld qword [KBASE+RD*8]
4215 |2:
4216 | fld qword [BASE+RA*8]
4217 |4:
4218 | fcomparepp
4219 |.endif
4220 goto iseqne_fp; 3531 goto iseqne_fp;
4221 case BC_ISEQP: case BC_ISNEP: 3532 case BC_ISEQP: case BC_ISNEP:
4222 vk = op == BC_ISEQP; 3533 vk = op == BC_ISEQP;
@@ -4267,6 +3578,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4267 | ins_next 3578 | ins_next
4268 break; 3579 break;
4269 3580
3581 case BC_ISTYPE:
3582 | ins_AD // RA = src, RD = -type
3583 | add RD, [BASE+RA*8+4]
3584 | jne ->vmeta_istype
3585 | ins_next
3586 break;
3587 case BC_ISNUM:
3588 | ins_AD // RA = src, RD = -(TISNUM-1)
3589 | checknum RA, ->vmeta_istype
3590 | ins_next
3591 break;
3592
4270 /* -- Unary ops --------------------------------------------------------- */ 3593 /* -- Unary ops --------------------------------------------------------- */
4271 3594
4272 case BC_MOV: 3595 case BC_MOV:
@@ -4310,16 +3633,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4310 |.else 3633 |.else
4311 | checknum RD, ->vmeta_unm 3634 | checknum RD, ->vmeta_unm
4312 |.endif 3635 |.endif
4313 |.if SSE
4314 | movsd xmm0, qword [BASE+RD*8] 3636 | movsd xmm0, qword [BASE+RD*8]
4315 | sseconst_sign xmm1, RDa 3637 | sseconst_sign xmm1, RDa
4316 | xorps xmm0, xmm1 3638 | xorps xmm0, xmm1
4317 | movsd qword [BASE+RA*8], xmm0 3639 | movsd qword [BASE+RA*8], xmm0
4318 |.else
4319 | fld qword [BASE+RD*8]
4320 | fchs
4321 | fstp qword [BASE+RA*8]
4322 |.endif
4323 |.if DUALNUM 3640 |.if DUALNUM
4324 | jmp <9 3641 | jmp <9
4325 |.else 3642 |.else
@@ -4335,15 +3652,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4335 |1: 3652 |1:
4336 | mov dword [BASE+RA*8+4], LJ_TISNUM 3653 | mov dword [BASE+RA*8+4], LJ_TISNUM
4337 | mov dword [BASE+RA*8], RD 3654 | mov dword [BASE+RA*8], RD
4338 |.elif SSE 3655 |.else
4339 | xorps xmm0, xmm0 3656 | xorps xmm0, xmm0
4340 | cvtsi2sd xmm0, dword STR:RD->len 3657 | cvtsi2sd xmm0, dword STR:RD->len
4341 |1: 3658 |1:
4342 | movsd qword [BASE+RA*8], xmm0 3659 | movsd qword [BASE+RA*8], xmm0
4343 |.else
4344 | fild dword STR:RD->len
4345 |1:
4346 | fstp qword [BASE+RA*8]
4347 |.endif 3660 |.endif
4348 | ins_next 3661 | ins_next
4349 |2: 3662 |2:
@@ -4361,11 +3674,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4361 | // Length of table returned in eax (RD). 3674 | // Length of table returned in eax (RD).
4362 |.if DUALNUM 3675 |.if DUALNUM
4363 | // Nothing to do. 3676 | // Nothing to do.
4364 |.elif SSE
4365 | cvtsi2sd xmm0, RD
4366 |.else 3677 |.else
4367 | mov ARG1, RD 3678 | cvtsi2sd xmm0, RD
4368 | fild ARG1
4369 |.endif 3679 |.endif
4370 | mov BASE, RB // Restore BASE. 3680 | mov BASE, RB // Restore BASE.
4371 | movzx RA, PC_RA 3681 | movzx RA, PC_RA
@@ -4380,7 +3690,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4380 3690
4381 /* -- Binary ops -------------------------------------------------------- */ 3691 /* -- Binary ops -------------------------------------------------------- */
4382 3692
4383 |.macro ins_arithpre, x87ins, sseins, ssereg 3693 |.macro ins_arithpre, sseins, ssereg
4384 | ins_ABC 3694 | ins_ABC
4385 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3695 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
4386 ||switch (vk) { 3696 ||switch (vk) {
@@ -4389,37 +3699,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4389 | .if DUALNUM 3699 | .if DUALNUM
4390 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn 3700 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn
4391 | .endif 3701 | .endif
4392 | .if SSE 3702 | movsd xmm0, qword [BASE+RB*8]
4393 | movsd xmm0, qword [BASE+RB*8] 3703 | sseins ssereg, qword [KBASE+RC*8]
4394 | sseins ssereg, qword [KBASE+RC*8]
4395 | .else
4396 | fld qword [BASE+RB*8]
4397 | x87ins qword [KBASE+RC*8]
4398 | .endif
4399 || break; 3704 || break;
4400 ||case 1: 3705 ||case 1:
4401 | checknum RB, ->vmeta_arith_nv 3706 | checknum RB, ->vmeta_arith_nv
4402 | .if DUALNUM 3707 | .if DUALNUM
4403 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv 3708 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv
4404 | .endif 3709 | .endif
4405 | .if SSE 3710 | movsd xmm0, qword [KBASE+RC*8]
4406 | movsd xmm0, qword [KBASE+RC*8] 3711 | sseins ssereg, qword [BASE+RB*8]
4407 | sseins ssereg, qword [BASE+RB*8]
4408 | .else
4409 | fld qword [KBASE+RC*8]
4410 | x87ins qword [BASE+RB*8]
4411 | .endif
4412 || break; 3712 || break;
4413 ||default: 3713 ||default:
4414 | checknum RB, ->vmeta_arith_vv 3714 | checknum RB, ->vmeta_arith_vv
4415 | checknum RC, ->vmeta_arith_vv 3715 | checknum RC, ->vmeta_arith_vv
4416 | .if SSE 3716 | movsd xmm0, qword [BASE+RB*8]
4417 | movsd xmm0, qword [BASE+RB*8] 3717 | sseins ssereg, qword [BASE+RC*8]
4418 | sseins ssereg, qword [BASE+RC*8]
4419 | .else
4420 | fld qword [BASE+RB*8]
4421 | x87ins qword [BASE+RC*8]
4422 | .endif
4423 || break; 3718 || break;
4424 ||} 3719 ||}
4425 |.endmacro 3720 |.endmacro
@@ -4457,55 +3752,62 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4457 |.endmacro 3752 |.endmacro
4458 | 3753 |
4459 |.macro ins_arithpost 3754 |.macro ins_arithpost
4460 |.if SSE
4461 | movsd qword [BASE+RA*8], xmm0 3755 | movsd qword [BASE+RA*8], xmm0
4462 |.else
4463 | fstp qword [BASE+RA*8]
4464 |.endif
4465 |.endmacro 3756 |.endmacro
4466 | 3757 |
4467 |.macro ins_arith, x87ins, sseins 3758 |.macro ins_arith, sseins
4468 | ins_arithpre x87ins, sseins, xmm0 3759 | ins_arithpre sseins, xmm0
4469 | ins_arithpost 3760 | ins_arithpost
4470 | ins_next 3761 | ins_next
4471 |.endmacro 3762 |.endmacro
4472 | 3763 |
4473 |.macro ins_arith, intins, x87ins, sseins 3764 |.macro ins_arith, intins, sseins
4474 |.if DUALNUM 3765 |.if DUALNUM
4475 | ins_arithdn intins 3766 | ins_arithdn intins
4476 |.else 3767 |.else
4477 | ins_arith, x87ins, sseins 3768 | ins_arith, sseins
4478 |.endif 3769 |.endif
4479 |.endmacro 3770 |.endmacro
4480 3771
4481 | // RA = dst, RB = src1 or num const, RC = src2 or num const 3772 | // RA = dst, RB = src1 or num const, RC = src2 or num const
4482 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 3773 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
4483 | ins_arith add, fadd, addsd 3774 | ins_arith add, addsd
4484 break; 3775 break;
4485 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 3776 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
4486 | ins_arith sub, fsub, subsd 3777 | ins_arith sub, subsd
4487 break; 3778 break;
4488 case BC_MULVN: case BC_MULNV: case BC_MULVV: 3779 case BC_MULVN: case BC_MULNV: case BC_MULVV:
4489 | ins_arith imul, fmul, mulsd 3780 | ins_arith imul, mulsd
4490 break; 3781 break;
4491 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 3782 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
4492 | ins_arith fdiv, divsd 3783 | ins_arith divsd
4493 break; 3784 break;
4494 case BC_MODVN: 3785 case BC_MODVN:
4495 | ins_arithpre fld, movsd, xmm1 3786 | ins_arithpre movsd, xmm1
4496 |->BC_MODVN_Z: 3787 |->BC_MODVN_Z:
4497 | call ->vm_mod 3788 | call ->vm_mod
4498 | ins_arithpost 3789 | ins_arithpost
4499 | ins_next 3790 | ins_next
4500 break; 3791 break;
4501 case BC_MODNV: case BC_MODVV: 3792 case BC_MODNV: case BC_MODVV:
4502 | ins_arithpre fld, movsd, xmm1 3793 | ins_arithpre movsd, xmm1
4503 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 3794 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
4504 break; 3795 break;
4505 case BC_POW: 3796 case BC_POW:
4506 | ins_arithpre fld, movsd, xmm1 3797 | ins_arithpre movsd, xmm1
4507 | call ->vm_pow 3798 | mov RB, BASE
3799 |.if not X64
3800 | movsd FPARG1, xmm0
3801 | movsd FPARG3, xmm1
3802 |.endif
3803 | call extern pow
3804 | movzx RA, PC_RA
3805 | mov BASE, RB
3806 |.if X64
4508 | ins_arithpost 3807 | ins_arithpost
3808 |.else
3809 | fstp qword [BASE+RA*8]
3810 |.endif
4509 | ins_next 3811 | ins_next
4510 break; 3812 break;
4511 3813
@@ -4573,25 +3875,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4573 | movsx RD, RDW 3875 | movsx RD, RDW
4574 | mov dword [BASE+RA*8+4], LJ_TISNUM 3876 | mov dword [BASE+RA*8+4], LJ_TISNUM
4575 | mov dword [BASE+RA*8], RD 3877 | mov dword [BASE+RA*8], RD
4576 |.elif SSE 3878 |.else
4577 | movsx RD, RDW // Sign-extend literal. 3879 | movsx RD, RDW // Sign-extend literal.
4578 | cvtsi2sd xmm0, RD 3880 | cvtsi2sd xmm0, RD
4579 | movsd qword [BASE+RA*8], xmm0 3881 | movsd qword [BASE+RA*8], xmm0
4580 |.else
4581 | fild PC_RD // Refetch signed RD from instruction.
4582 | fstp qword [BASE+RA*8]
4583 |.endif 3882 |.endif
4584 | ins_next 3883 | ins_next
4585 break; 3884 break;
4586 case BC_KNUM: 3885 case BC_KNUM:
4587 | ins_AD // RA = dst, RD = num const 3886 | ins_AD // RA = dst, RD = num const
4588 |.if SSE
4589 | movsd xmm0, qword [KBASE+RD*8] 3887 | movsd xmm0, qword [KBASE+RD*8]
4590 | movsd qword [BASE+RA*8], xmm0 3888 | movsd qword [BASE+RA*8], xmm0
4591 |.else
4592 | fld qword [KBASE+RD*8]
4593 | fstp qword [BASE+RA*8]
4594 |.endif
4595 | ins_next 3889 | ins_next
4596 break; 3890 break;
4597 case BC_KPRI: 3891 case BC_KPRI:
@@ -4698,18 +3992,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4698 case BC_USETN: 3992 case BC_USETN:
4699 | ins_AD // RA = upvalue #, RD = num const 3993 | ins_AD // RA = upvalue #, RD = num const
4700 | mov LFUNC:RB, [BASE-8] 3994 | mov LFUNC:RB, [BASE-8]
4701 |.if SSE
4702 | movsd xmm0, qword [KBASE+RD*8] 3995 | movsd xmm0, qword [KBASE+RD*8]
4703 |.else
4704 | fld qword [KBASE+RD*8]
4705 |.endif
4706 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] 3996 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
4707 | mov RA, UPVAL:RB->v 3997 | mov RA, UPVAL:RB->v
4708 |.if SSE
4709 | movsd qword [RA], xmm0 3998 | movsd qword [RA], xmm0
4710 |.else
4711 | fstp qword [RA]
4712 |.endif
4713 | ins_next 3999 | ins_next
4714 break; 4000 break;
4715 case BC_USETP: 4001 case BC_USETP:
@@ -4863,18 +4149,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4863 |.else 4149 |.else
4864 | // Convert number to int and back and compare. 4150 | // Convert number to int and back and compare.
4865 | checknum RC, >5 4151 | checknum RC, >5
4866 |.if SSE
4867 | movsd xmm0, qword [BASE+RC*8] 4152 | movsd xmm0, qword [BASE+RC*8]
4868 | cvtsd2si RC, xmm0 4153 | cvttsd2si RC, xmm0
4869 | cvtsi2sd xmm1, RC 4154 | cvtsi2sd xmm1, RC
4870 | ucomisd xmm0, xmm1 4155 | ucomisd xmm0, xmm1
4871 |.else
4872 | fld qword [BASE+RC*8]
4873 | fist ARG1
4874 | fild ARG1
4875 | fcomparepp
4876 | mov RC, ARG1
4877 |.endif
4878 | jne ->vmeta_tgetv // Generic numeric key? Use fallback. 4156 | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
4879 |.endif 4157 |.endif
4880 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4158 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
@@ -4998,6 +4276,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4998 | mov dword [BASE+RA*8+4], LJ_TNIL 4276 | mov dword [BASE+RA*8+4], LJ_TNIL
4999 | jmp <1 4277 | jmp <1
5000 break; 4278 break;
4279 case BC_TGETR:
4280 | ins_ABC // RA = dst, RB = table, RC = key
4281 | mov TAB:RB, [BASE+RB*8]
4282 |.if DUALNUM
4283 | mov RC, dword [BASE+RC*8]
4284 |.else
4285 | cvttsd2si RC, qword [BASE+RC*8]
4286 |.endif
4287 | cmp RC, TAB:RB->asize
4288 | jae ->vmeta_tgetr // Not in array part? Use fallback.
4289 | shl RC, 3
4290 | add RC, TAB:RB->array
4291 | // Get array slot.
4292 |->BC_TGETR_Z:
4293 |.if X64
4294 | mov RBa, [RC]
4295 | mov [BASE+RA*8], RBa
4296 |.else
4297 | mov RB, [RC]
4298 | mov RC, [RC+4]
4299 | mov [BASE+RA*8], RB
4300 | mov [BASE+RA*8+4], RC
4301 |.endif
4302 |->BC_TGETR2_Z:
4303 | ins_next
4304 break;
5001 4305
5002 case BC_TSETV: 4306 case BC_TSETV:
5003 | ins_ABC // RA = src, RB = table, RC = key 4307 | ins_ABC // RA = src, RB = table, RC = key
@@ -5011,18 +4315,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5011 |.else 4315 |.else
5012 | // Convert number to int and back and compare. 4316 | // Convert number to int and back and compare.
5013 | checknum RC, >5 4317 | checknum RC, >5
5014 |.if SSE
5015 | movsd xmm0, qword [BASE+RC*8] 4318 | movsd xmm0, qword [BASE+RC*8]
5016 | cvtsd2si RC, xmm0 4319 | cvttsd2si RC, xmm0
5017 | cvtsi2sd xmm1, RC 4320 | cvtsi2sd xmm1, RC
5018 | ucomisd xmm0, xmm1 4321 | ucomisd xmm0, xmm1
5019 |.else
5020 | fld qword [BASE+RC*8]
5021 | fist ARG1
5022 | fild ARG1
5023 | fcomparepp
5024 | mov RC, ARG1
5025 |.endif
5026 | jne ->vmeta_tsetv // Generic numeric key? Use fallback. 4322 | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
5027 |.endif 4323 |.endif
5028 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4324 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
@@ -5192,6 +4488,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5192 | movzx RA, PC_RA // Restore RA. 4488 | movzx RA, PC_RA // Restore RA.
5193 | jmp <2 4489 | jmp <2
5194 break; 4490 break;
4491 case BC_TSETR:
4492 | ins_ABC // RA = src, RB = table, RC = key
4493 | mov TAB:RB, [BASE+RB*8]
4494 |.if DUALNUM
4495 | mov RC, dword [BASE+RC*8]
4496 |.else
4497 | cvttsd2si RC, qword [BASE+RC*8]
4498 |.endif
4499 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
4500 | jnz >7
4501 |2:
4502 | cmp RC, TAB:RB->asize
4503 | jae ->vmeta_tsetr
4504 | shl RC, 3
4505 | add RC, TAB:RB->array
4506 | // Set array slot.
4507 |->BC_TSETR_Z:
4508 |.if X64
4509 | mov RBa, [BASE+RA*8]
4510 | mov [RC], RBa
4511 |.else
4512 | mov RB, [BASE+RA*8+4]
4513 | mov RA, [BASE+RA*8]
4514 | mov [RC+4], RB
4515 | mov [RC], RA
4516 |.endif
4517 | ins_next
4518 |
4519 |7: // Possible table write barrier for the value. Skip valiswhite check.
4520 | barrierback TAB:RB, RA
4521 | movzx RA, PC_RA // Restore RA.
4522 | jmp <2
4523 break;
5195 4524
5196 case BC_TSETM: 4525 case BC_TSETM:
5197 | ins_AD // RA = base (table at base-1), RD = num const (start index) 4526 | ins_AD // RA = base (table at base-1), RD = num const (start index)
@@ -5385,10 +4714,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5385 |.if DUALNUM 4714 |.if DUALNUM
5386 | mov dword [BASE+RA*8+4], LJ_TISNUM 4715 | mov dword [BASE+RA*8+4], LJ_TISNUM
5387 | mov dword [BASE+RA*8], RC 4716 | mov dword [BASE+RA*8], RC
5388 |.elif SSE
5389 | cvtsi2sd xmm0, RC
5390 |.else 4717 |.else
5391 | fild dword [BASE+RA*8-8] 4718 | cvtsi2sd xmm0, RC
5392 |.endif 4719 |.endif
5393 | // Copy array slot to returned value. 4720 | // Copy array slot to returned value.
5394 |.if X64 4721 |.if X64
@@ -5404,10 +4731,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5404 | // Return array index as a numeric key. 4731 | // Return array index as a numeric key.
5405 |.if DUALNUM 4732 |.if DUALNUM
5406 | // See above. 4733 | // See above.
5407 |.elif SSE
5408 | movsd qword [BASE+RA*8], xmm0
5409 |.else 4734 |.else
5410 | fstp qword [BASE+RA*8] 4735 | movsd qword [BASE+RA*8], xmm0
5411 |.endif 4736 |.endif
5412 | mov [BASE+RA*8-8], RC // Update control var. 4737 | mov [BASE+RA*8-8], RC // Update control var.
5413 |2: 4738 |2:
@@ -5420,9 +4745,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5420 | 4745 |
5421 |4: // Skip holes in array part. 4746 |4: // Skip holes in array part.
5422 | add RC, 1 4747 | add RC, 1
5423 |.if not (DUALNUM or SSE)
5424 | mov [BASE+RA*8-8], RC
5425 |.endif
5426 | jmp <1 4748 | jmp <1
5427 | 4749 |
5428 |5: // Traverse hash part. 4750 |5: // Traverse hash part.
@@ -5756,7 +5078,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5756 if (!vk) { 5078 if (!vk) {
5757 | cmp RB, LJ_TISNUM; jae ->vmeta_for 5079 | cmp RB, LJ_TISNUM; jae ->vmeta_for
5758 } 5080 }
5759 |.if SSE
5760 | movsd xmm0, qword FOR_IDX 5081 | movsd xmm0, qword FOR_IDX
5761 | movsd xmm1, qword FOR_STOP 5082 | movsd xmm1, qword FOR_STOP
5762 if (vk) { 5083 if (vk) {
@@ -5769,22 +5090,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5769 | ucomisd xmm1, xmm0 5090 | ucomisd xmm1, xmm0
5770 |1: 5091 |1:
5771 | movsd qword FOR_EXT, xmm0 5092 | movsd qword FOR_EXT, xmm0
5772 |.else
5773 | fld qword FOR_STOP
5774 | fld qword FOR_IDX
5775 if (vk) {
5776 | fadd qword FOR_STEP // nidx = idx + step
5777 | fst qword FOR_IDX
5778 | fst qword FOR_EXT
5779 | test RB, RB; js >1
5780 } else {
5781 | fst qword FOR_EXT
5782 | jl >1
5783 }
5784 | fxch // Swap lim/(n)idx if step non-negative.
5785 |1:
5786 | fcomparepp
5787 |.endif
5788 if (op == BC_FORI) { 5093 if (op == BC_FORI) {
5789 |.if DUALNUM 5094 |.if DUALNUM
5790 | jnb <7 5095 | jnb <7
@@ -5812,11 +5117,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5812 |2: 5117 |2:
5813 | ins_next 5118 | ins_next
5814 |.endif 5119 |.endif
5815 |.if SSE 5120 |
5816 |3: // Invert comparison if step is negative. 5121 |3: // Invert comparison if step is negative.
5817 | ucomisd xmm0, xmm1 5122 | ucomisd xmm0, xmm1
5818 | jmp <1 5123 | jmp <1
5819 |.endif
5820 break; 5124 break;
5821 5125
5822 case BC_ITERL: 5126 case BC_ITERL:
@@ -5854,7 +5158,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5854 | ins_A // RA = base, RD = target (loop extent) 5158 | ins_A // RA = base, RD = target (loop extent)
5855 | // Note: RA/RD is only used by trace recorder to determine scope/extent 5159 | // Note: RA/RD is only used by trace recorder to determine scope/extent
5856 | // This opcode does NOT jump, it's only purpose is to detect a hot loop. 5160 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
5857 |.if JIT 5161 |.if JIT
5858 | hotloop RB 5162 | hotloop RB
5859 |.endif 5163 |.endif
5860 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. 5164 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
@@ -5873,7 +5177,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5873 | mov RDa, TRACE:RD->mcode 5177 | mov RDa, TRACE:RD->mcode
5874 | mov L:RB, SAVE_L 5178 | mov L:RB, SAVE_L
5875 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE 5179 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
5876 | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB 5180 | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
5877 | // Save additional callee-save registers only used in compiled code. 5181 | // Save additional callee-save registers only used in compiled code.
5878 |.if X64WIN 5182 |.if X64WIN
5879 | mov TMPQ, r12 5183 | mov TMPQ, r12
@@ -6040,9 +5344,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
6040 | // (lua_State *L, lua_CFunction f) 5344 | // (lua_State *L, lua_CFunction f)
6041 | call aword [DISPATCH+DISPATCH_GL(wrapf)] 5345 | call aword [DISPATCH+DISPATCH_GL(wrapf)]
6042 } 5346 }
6043 | set_vmstate INTERP
6044 | // nresults returned in eax (RD). 5347 | // nresults returned in eax (RD).
6045 | mov BASE, L:RB->base 5348 | mov BASE, L:RB->base
5349 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
5350 | set_vmstate INTERP
6046 | lea RA, [BASE+RD*8] 5351 | lea RA, [BASE+RD*8]
6047 | neg RA 5352 | neg RA
6048 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 5353 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
@@ -6355,15 +5660,21 @@ static void emit_asm_debug(BuildCtx *ctx)
6355 "LEFDEY:\n\n", fcsize); 5660 "LEFDEY:\n\n", fcsize);
6356 } 5661 }
6357#endif 5662#endif
6358#if LJ_64 5663#if !LJ_64
6359 fprintf(ctx->fp, "\t.subsections_via_symbols\n");
6360#else
6361 fprintf(ctx->fp, 5664 fprintf(ctx->fp,
6362 "\t.non_lazy_symbol_pointer\n" 5665 "\t.non_lazy_symbol_pointer\n"
6363 "L_lj_err_unwind_dwarf$non_lazy_ptr:\n" 5666 "L_lj_err_unwind_dwarf$non_lazy_ptr:\n"
6364 ".indirect_symbol _lj_err_unwind_dwarf\n" 5667 ".indirect_symbol _lj_err_unwind_dwarf\n"
6365 ".long 0\n"); 5668 ".long 0\n\n");
5669 fprintf(ctx->fp, "\t.section __IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5\n");
5670 {
5671 const char *const *xn;
5672 for (xn = ctx->extnames; *xn; xn++)
5673 if (strncmp(*xn, LABEL_PREFIX, sizeof(LABEL_PREFIX)-1))
5674 fprintf(ctx->fp, "L_%s$stub:\n\t.indirect_symbol _%s\n\t.ascii \"\\364\\364\\364\\364\\364\"\n", *xn, *xn);
5675 }
6366#endif 5676#endif
5677 fprintf(ctx->fp, ".subsections_via_symbols\n");
6367 } 5678 }
6368 break; 5679 break;
6369 default: /* Difficult for other modes. */ 5680 default: /* Difficult for other modes. */