aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Makefile27
-rw-r--r--src/Makefile.dep200
-rw-r--r--src/host/buildvm.c15
-rw-r--r--src/host/buildvm_lib.c61
-rw-r--r--src/host/buildvm_libbc.h30
-rw-r--r--src/host/genlibbc.lua197
-rw-r--r--src/jit/bc.lua19
-rw-r--r--src/jit/bcsave.lua8
-rw-r--r--src/jit/dis_arm.lua18
-rw-r--r--src/jit/dis_mips.lua30
-rw-r--r--src/jit/dis_mipsel.lua15
-rw-r--r--src/jit/dis_ppc.lua18
-rw-r--r--src/jit/dis_x64.lua15
-rw-r--r--src/jit/dis_x86.lua38
-rw-r--r--src/jit/dump.lua29
-rw-r--r--src/jit/p.lua310
-rw-r--r--src/jit/v.lua15
-rw-r--r--src/jit/zone.lua45
-rw-r--r--src/lib_base.c52
-rw-r--r--src/lib_bit.c134
-rw-r--r--src/lib_ffi.c15
-rw-r--r--src/lib_io.c27
-rw-r--r--src/lib_jit.c150
-rw-r--r--src/lib_math.c9
-rw-r--r--src/lib_os.c37
-rw-r--r--src/lib_package.c4
-rw-r--r--src/lib_string.c444
-rw-r--r--src/lib_table.c167
-rw-r--r--src/lj_api.c29
-rw-r--r--src/lj_arch.h16
-rw-r--r--src/lj_asm.c446
-rw-r--r--src/lj_asm_arm.h440
-rw-r--r--src/lj_asm_mips.h362
-rw-r--r--src/lj_asm_ppc.h366
-rw-r--r--src/lj_asm_x86.h526
-rw-r--r--src/lj_bc.h4
-rw-r--r--src/lj_bcdump.h3
-rw-r--r--src/lj_bcread.c140
-rw-r--r--src/lj_bcwrite.c228
-rw-r--r--src/lj_buf.c234
-rw-r--r--src/lj_buf.h105
-rw-r--r--src/lj_carith.c76
-rw-r--r--src/lj_carith.h10
-rw-r--r--src/lj_ccall.c1
-rw-r--r--src/lj_ccallback.c4
-rw-r--r--src/lj_cdata.c22
-rw-r--r--src/lj_cdata.h5
-rw-r--r--src/lj_clib.c13
-rw-r--r--src/lj_cparse.c52
-rw-r--r--src/lj_crecord.c234
-rw-r--r--src/lj_crecord.h7
-rw-r--r--src/lj_ctype.c15
-rw-r--r--src/lj_debug.c170
-rw-r--r--src/lj_debug.h8
-rw-r--r--src/lj_dispatch.c79
-rw-r--r--src/lj_dispatch.h23
-rw-r--r--src/lj_emit_arm.h16
-rw-r--r--src/lj_emit_mips.h16
-rw-r--r--src/lj_emit_ppc.h16
-rw-r--r--src/lj_emit_x86.h24
-rw-r--r--src/lj_err.c35
-rw-r--r--src/lj_errmsg.h5
-rw-r--r--src/lj_ffrecord.c531
-rw-r--r--src/lj_gc.c25
-rw-r--r--src/lj_gdbjit.c14
-rw-r--r--src/lj_ir.c5
-rw-r--r--src/lj_ir.h23
-rw-r--r--src/lj_ircall.h138
-rw-r--r--src/lj_jit.h27
-rw-r--r--src/lj_lex.c342
-rw-r--r--src/lj_lex.h17
-rw-r--r--src/lj_lib.c75
-rw-r--r--src/lj_lib.h21
-rw-r--r--src/lj_load.c4
-rw-r--r--src/lj_meta.c73
-rw-r--r--src/lj_meta.h1
-rw-r--r--src/lj_obj.c17
-rw-r--r--src/lj_obj.h27
-rw-r--r--src/lj_opt_fold.c285
-rw-r--r--src/lj_opt_loop.c29
-rw-r--r--src/lj_opt_mem.c11
-rw-r--r--src/lj_opt_narrow.c3
-rw-r--r--src/lj_opt_split.c131
-rw-r--r--src/lj_parse.c166
-rw-r--r--src/lj_profile.c368
-rw-r--r--src/lj_profile.h21
-rw-r--r--src/lj_record.c243
-rw-r--r--src/lj_record.h1
-rw-r--r--src/lj_snap.c3
-rw-r--r--src/lj_state.c23
-rw-r--r--src/lj_str.c212
-rw-r--r--src/lj_str.h35
-rw-r--r--src/lj_strfmt.c554
-rw-r--r--src/lj_strfmt.h125
-rw-r--r--src/lj_tab.c17
-rw-r--r--src/lj_tab.h4
-rw-r--r--src/lj_target_arm.h4
-rw-r--r--src/lj_target_mips.h3
-rw-r--r--src/lj_target_x86.h3
-rw-r--r--src/lj_trace.c49
-rw-r--r--src/lj_trace.h1
-rw-r--r--src/lj_traceerr.h3
-rw-r--r--src/lj_vm.h6
-rw-r--r--src/ljamalg.c3
-rw-r--r--src/luaconf.h2
-rw-r--r--src/luajit.c13
-rw-r--r--src/luajit.h15
-rw-r--r--src/msvcbuild.bat1
-rw-r--r--src/vm_arm.dasc331
-rw-r--r--src/vm_mips.dasc390
-rw-r--r--src/vm_ppc.dasc365
-rw-r--r--src/vm_ppcspe.dasc6
-rw-r--r--src/vm_x86.dasc1102
113 files changed, 7332 insertions, 4395 deletions
diff --git a/src/Makefile b/src/Makefile
index 9551781a..722efc33 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -11,8 +11,8 @@
11############################################################################## 11##############################################################################
12 12
13MAJVER= 2 13MAJVER= 2
14MINVER= 0 14MINVER= 1
15RELVER= 3 15RELVER= 0
16ABIVER= 5.1 16ABIVER= 5.1
17NODOTABIVER= 51 17NODOTABIVER= 51
18 18
@@ -42,13 +42,10 @@ CCOPT= -O2 -fomit-frame-pointer
42# 42#
43# Target-specific compiler options: 43# Target-specific compiler options:
44# 44#
45# x86 only: it's recommended to compile at least for i686. Better yet,
46# compile for an architecture that has SSE2, too (-msse -msse2).
47#
48# x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute 45# x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute
49# the binaries to a different machine you could also use: -march=native 46# the binaries to a different machine you could also use: -march=native
50# 47#
51CCOPT_x86= -march=i686 48CCOPT_x86= -march=i686 -msse -msse2 -mfpmath=sse
52CCOPT_x64= 49CCOPT_x64=
53CCOPT_arm= 50CCOPT_arm=
54CCOPT_ppc= 51CCOPT_ppc=
@@ -240,6 +237,7 @@ ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH)))
240 TARGET_SYS= PS3 237 TARGET_SYS= PS3
241 TARGET_ARCH+= -D__CELLOS_LV2__ 238 TARGET_ARCH+= -D__CELLOS_LV2__
242 TARGET_XCFLAGS+= -DLUAJIT_USE_SYSMALLOC 239 TARGET_XCFLAGS+= -DLUAJIT_USE_SYSMALLOC
240 TARGET_XLIBS+= -lpthread
243endif 241endif
244ifneq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH))) 242ifneq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
245 TARGET_ARCH+= -DLUAJIT_NO_UNWIND 243 TARGET_ARCH+= -DLUAJIT_NO_UNWIND
@@ -403,11 +401,6 @@ DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subs
403ifeq (Windows,$(TARGET_SYS)) 401ifeq (Windows,$(TARGET_SYS))
404 DASM_AFLAGS+= -D WIN 402 DASM_AFLAGS+= -D WIN
405endif 403endif
406ifeq (x86,$(TARGET_LJARCH))
407 ifneq (,$(findstring __SSE2__ 1,$(TARGET_TESTARCH)))
408 DASM_AFLAGS+= -D SSE
409 endif
410else
411ifeq (x64,$(TARGET_LJARCH)) 404ifeq (x64,$(TARGET_LJARCH))
412 DASM_ARCH= x86 405 DASM_ARCH= x86
413else 406else
@@ -432,7 +425,6 @@ ifeq (ppc,$(TARGET_LJARCH))
432endif 425endif
433endif 426endif
434endif 427endif
435endif
436 428
437DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS) 429DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS)
438DASM_DASC= vm_$(DASM_ARCH).dasc 430DASM_DASC= vm_$(DASM_ARCH).dasc
@@ -454,10 +446,11 @@ LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \
454 lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o 446 lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o
455LJLIB_C= $(LJLIB_O:.o=.c) 447LJLIB_C= $(LJLIB_O:.o=.c)
456 448
457LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o \ 449LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
458 lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \ 450 lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \
459 lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \ 451 lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \
460 lj_api.o lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \ 452 lj_strfmt.o lj_api.o lj_profile.o \
453 lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
461 lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ 454 lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
462 lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \ 455 lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \
463 lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \ 456 lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \
@@ -577,6 +570,10 @@ amalg:
577clean: 570clean:
578 $(HOST_RM) $(ALL_RM) 571 $(HOST_RM) $(ALL_RM)
579 572
573libbc:
574 ./$(LUAJIT_T) host/genlibbc.lua -o host/buildvm_libbc.h $(LJLIB_C)
575 $(MAKE) all
576
580depend: 577depend:
581 @for file in $(ALL_HDRGEN); do \ 578 @for file in $(ALL_HDRGEN); do \
582 test -f $$file || touch $$file; \ 579 test -f $$file || touch $$file; \
@@ -591,7 +588,7 @@ depend:
591 test -s $$file || $(HOST_RM) $$file; \ 588 test -s $$file || $(HOST_RM) $$file; \
592 done 589 done
593 590
594.PHONY: default all amalg clean depend 591.PHONY: default all amalg clean libbc depend
595 592
596############################################################################## 593##############################################################################
597# Rules for generated files. 594# Rules for generated files.
diff --git a/src/Makefile.dep b/src/Makefile.dep
index 8ca33151..9aefb236 100644
--- a/src/Makefile.dep
+++ b/src/Makefile.dep
@@ -5,43 +5,47 @@ lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
5 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \ 5 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \
6 lj_tab.h lj_meta.h lj_state.h lj_ctype.h lj_cconv.h lj_bc.h lj_ff.h \ 6 lj_tab.h lj_meta.h lj_state.h lj_ctype.h lj_cconv.h lj_bc.h lj_ff.h \
7 lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \ 7 lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \
8 lj_lib.h lj_libdef.h 8 lj_strfmt.h lj_lib.h lj_libdef.h
9lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ 9lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
10 lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_lib.h lj_libdef.h 10 lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_strscan.h \
11 lj_strfmt.h lj_ctype.h lj_cdata.h lj_cconv.h lj_carith.h lj_ff.h \
12 lj_ffdef.h lj_lib.h lj_libdef.h
11lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 13lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
12 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \ 14 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \
13 lj_libdef.h 15 lj_libdef.h
14lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ 16lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
15 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h \ 17 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h \
16 lj_ctype.h lj_cparse.h lj_cdata.h lj_cconv.h lj_carith.h lj_ccall.h \ 18 lj_ctype.h lj_cparse.h lj_cdata.h lj_cconv.h lj_carith.h lj_ccall.h \
17 lj_ccallback.h lj_clib.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h 19 lj_ccallback.h lj_clib.h lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h \
20 lj_libdef.h
18lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h 21lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h
19lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ 22lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
20 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_ff.h \ 23 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_state.h \
21 lj_ffdef.h lj_lib.h lj_libdef.h 24 lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
22lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \ 25lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
23 lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \ 26 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \
24 lj_bc.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_target.h \ 27 lj_state.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
25 lj_target_*.h lj_dispatch.h lj_vm.h lj_vmevent.h lj_lib.h luajit.h \ 28 lj_target.h lj_target_*.h lj_trace.h lj_dispatch.h lj_traceerr.h \
26 lj_libdef.h 29 lj_vm.h lj_vmevent.h lj_lib.h luajit.h lj_libdef.h
27lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 30lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
28 lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_libdef.h 31 lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_libdef.h
29lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ 32lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
30 lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h 33 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_lib.h \
34 lj_libdef.h
31lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 35lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
32 lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h 36 lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h
33lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 37lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
34 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h \ 38 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
35 lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h lj_char.h \ 39 lj_tab.h lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h \
36 lj_lib.h lj_libdef.h 40 lj_char.h lj_strfmt.h lj_lib.h lj_libdef.h
37lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 41lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
38 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_lib.h \ 42 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
39 lj_libdef.h 43 lj_tab.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
40lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h 44lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h
41lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 45lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
42 lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \ 46 lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
43 lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \ 47 lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \
44 lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h 48 lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h lj_strfmt.h
45lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 49lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
46 lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \ 50 lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \
47 lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \ 51 lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \
@@ -50,17 +54,20 @@ lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
50lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \ 54lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \
51 lj_bcdef.h 55 lj_bcdef.h
52lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 56lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
53 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_bc.h lj_ctype.h \ 57 lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_bc.h \
54 lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h 58 lj_ctype.h lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h \
59 lj_strfmt.h
55lj_bcwrite.o: lj_bcwrite.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 60lj_bcwrite.o: lj_bcwrite.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
56 lj_gc.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h lj_ir.h \ 61 lj_gc.h lj_buf.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h \
57 lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h 62 lj_ir.h lj_strfmt.h lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h
63lj_buf.o: lj_buf.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
64 lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_strfmt.h
58lj_carith.o: lj_carith.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 65lj_carith.o: lj_carith.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
59 lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ctype.h lj_cconv.h \ 66 lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ir.h lj_ctype.h \
60 lj_cdata.h lj_carith.h 67 lj_cconv.h lj_cdata.h lj_carith.h lj_strscan.h
61lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 68lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
62 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \ 69 lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h \
63 lj_cdata.h lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ 70 lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
64 lj_traceerr.h 71 lj_traceerr.h
65lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \ 72lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \
66 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_state.h lj_frame.h \ 73 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_state.h lj_frame.h \
@@ -71,107 +78,116 @@ lj_cconv.o: lj_cconv.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
71 lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \ 78 lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \
72 lj_ccallback.h 79 lj_ccallback.h
73lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 80lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
74 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \ 81 lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h
75 lj_cdata.h
76lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h 82lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h
77lj_clib.o: lj_clib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 83lj_clib.o: lj_clib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
78 lj_err.h lj_errmsg.h lj_tab.h lj_str.h lj_udata.h lj_ctype.h lj_cconv.h \ 84 lj_err.h lj_errmsg.h lj_tab.h lj_str.h lj_udata.h lj_ctype.h lj_cconv.h \
79 lj_cdata.h lj_clib.h 85 lj_cdata.h lj_clib.h lj_strfmt.h
80lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 86lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
81 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ctype.h lj_cparse.h lj_frame.h \ 87 lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_ctype.h lj_cparse.h \
82 lj_bc.h lj_vm.h lj_char.h lj_strscan.h 88 lj_frame.h lj_bc.h lj_vm.h lj_char.h lj_strscan.h lj_strfmt.h
83lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 89lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
84 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h \ 90 lj_err.h lj_errmsg.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_gc.h \
85 lj_gc.h lj_cdata.h lj_cparse.h lj_cconv.h lj_clib.h lj_ccall.h lj_ff.h \ 91 lj_cdata.h lj_cparse.h lj_cconv.h lj_carith.h lj_clib.h lj_ccall.h \
86 lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ 92 lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
87 lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \ 93 lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \
88 lj_crecord.h 94 lj_crecord.h lj_strfmt.h
89lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 95lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
90 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_ccallback.h 96 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_strfmt.h lj_ctype.h \
97 lj_ccallback.h
91lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 98lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
92 lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_state.h lj_frame.h \ 99 lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \
93 lj_bc.h lj_jit.h lj_ir.h 100 lj_state.h lj_frame.h lj_bc.h lj_strfmt.h lj_jit.h lj_ir.h
94lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 101lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
95 lj_err.h lj_errmsg.h lj_func.h lj_str.h lj_tab.h lj_meta.h lj_debug.h \ 102 lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_func.h lj_tab.h \
96 lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h lj_jit.h lj_ir.h \ 103 lj_meta.h lj_debug.h lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h \
97 lj_ccallback.h lj_ctype.h lj_gc.h lj_trace.h lj_dispatch.h lj_traceerr.h \ 104 lj_strfmt.h lj_jit.h lj_ir.h lj_ccallback.h lj_ctype.h lj_trace.h \
98 lj_vm.h luajit.h 105 lj_dispatch.h lj_traceerr.h lj_profile.h lj_vm.h luajit.h
99lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \ 106lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \
100 lj_errmsg.h lj_debug.h lj_str.h lj_func.h lj_state.h lj_frame.h lj_bc.h \ 107 lj_errmsg.h lj_debug.h lj_str.h lj_func.h lj_state.h lj_frame.h lj_bc.h \
101 lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ 108 lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
102 lj_traceerr.h lj_vm.h 109 lj_traceerr.h lj_vm.h lj_strfmt.h
103lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 110lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
104 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \ 111 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \
105 lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ 112 lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
106 lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \ 113 lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \
107 lj_vm.h lj_strscan.h lj_recdef.h 114 lj_vm.h lj_strscan.h lj_strfmt.h lj_recdef.h
108lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 115lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
109 lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ 116 lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
110 lj_traceerr.h lj_vm.h 117 lj_traceerr.h lj_vm.h
111lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 118lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
112 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h \ 119 lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
113 lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h lj_jit.h \ 120 lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h \
114 lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h 121 lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h
115lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 122lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
116 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_jit.h \ 123 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_buf.h \
117 lj_ir.h lj_dispatch.h 124 lj_str.h lj_strfmt.h lj_jit.h lj_ir.h lj_dispatch.h
118lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 125lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
119 lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ 126 lj_buf.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
120 lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h lj_carith.h \ 127 lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h \
121 lj_vm.h lj_strscan.h lj_lib.h 128 lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h
122lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 129lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
123 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h lualib.h \ 130 lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h \
124 lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h 131 lualib.h lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h \
132 lj_strfmt.h
125lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \ 133lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \
126 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \ 134 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \
127 lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_lib.h 135 lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lex.h \
136 lj_bcdump.h lj_lib.h
128lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ 137lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
129 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_func.h lj_frame.h \ 138 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_func.h \
130 lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h 139 lj_frame.h lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h
131lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 140lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
132 lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \ 141 lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \
133 lj_dispatch.h lj_bc.h lj_traceerr.h lj_vm.h 142 lj_dispatch.h lj_bc.h lj_traceerr.h lj_vm.h
134lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 143lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
135 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ 144 lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_meta.h lj_frame.h \
136 lj_vm.h lj_strscan.h 145 lj_bc.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h
137lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h 146lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
138lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 147lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
139 lj_ir.h lj_jit.h lj_iropt.h 148 lj_ir.h lj_jit.h lj_iropt.h
140lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 149lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
141 lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ 150 lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h \
142 lj_bc.h lj_traceerr.h lj_ctype.h lj_gc.h lj_carith.h lj_vm.h \ 151 lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h \
143 lj_strscan.h lj_folddef.h 152 lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_folddef.h
144lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 153lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
145 lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ 154 lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h lj_jit.h \
146 lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h 155 lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h \
156 lj_vm.h
147lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 157lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
148 lj_tab.h lj_ir.h lj_jit.h lj_iropt.h 158 lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_ircall.h
149lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ 159lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \
150 lj_arch.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ 160 lj_arch.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \
151 lj_traceerr.h lj_vm.h lj_strscan.h 161 lj_traceerr.h lj_vm.h lj_strscan.h
152lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 162lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
153 lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h 163 lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h
154lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ 164lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \
155 lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_ircall.h \ 165 lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h \
156 lj_iropt.h lj_vm.h 166 lj_jit.h lj_ircall.h lj_iropt.h lj_vm.h
157lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 167lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
158 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h \ 168 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \
159 lj_state.h lj_bc.h lj_ctype.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h 169 lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \
170 lj_vm.h lj_vmevent.h
171lj_profile.o: lj_profile.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
172 lj_buf.h lj_gc.h lj_str.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h \
173 lj_jit.h lj_ir.h lj_trace.h lj_traceerr.h lj_profile.h luajit.h
160lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 174lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
161 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ 175 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \
162 lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h \ 176 lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_debug.h lj_ir.h lj_jit.h \
163 lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h lj_record.h \ 177 lj_ircall.h lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h \
164 lj_ffrecord.h lj_snap.h lj_vm.h 178 lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h
165lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 179lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
166 lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \ 180 lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \
167 lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \ 181 lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \
168 lj_target_*.h lj_ctype.h lj_cdata.h 182 lj_target_*.h lj_ctype.h lj_cdata.h
169lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 183lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
170 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_meta.h \ 184 lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h \
171 lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h lj_ir.h \ 185 lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h \
172 lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h 186 lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h luajit.h
173lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 187lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
174 lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_char.h 188 lj_err.h lj_errmsg.h lj_str.h lj_char.h
189lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
190 lj_buf.h lj_gc.h lj_str.h lj_state.h lj_char.h lj_strfmt.h
175lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 191lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
176 lj_char.h lj_strscan.h 192 lj_char.h lj_strscan.h
177lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 193lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
@@ -189,21 +205,22 @@ lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
189lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 205lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
190 lj_ir.h lj_vm.h 206 lj_ir.h lj_vm.h
191ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \ 207ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
192 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h \ 208 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h \
193 lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h \ 209 lj_func.h lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h \
194 lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_err.c \ 210 lj_cdata.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h \
195 lj_debug.h lj_ff.h lj_ffdef.h lj_char.c lj_char.h lj_bc.c lj_bcdef.h \ 211 lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_strfmt.h lj_char.c \
196 lj_obj.c lj_str.c lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h \ 212 lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c lj_tab.c \
197 lj_debug.c lj_state.c lj_lex.h lj_alloc.h lj_dispatch.c lj_ccallback.h \ 213 lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c \
198 luajit.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c lj_api.c \ 214 lj_state.c lj_lex.h lj_alloc.h luajit.h lj_dispatch.c lj_ccallback.h \
199 lj_lex.c lualib.h lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h \ 215 lj_profile.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c \
200 lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c \ 216 lj_strfmt.c lj_api.c lj_profile.c lj_lex.c lualib.h lj_parse.h \
201 lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h lj_target_*.h \ 217 lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c \
202 lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h lj_cparse.c \ 218 lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c \
203 lj_cparse.h lj_lib.c lj_lib.h lj_ir.c lj_ircall.h lj_iropt.h \ 219 lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \
204 lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c \ 220 lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h \
205 lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c \ 221 lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \
206 lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \ 222 lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c \
223 lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \
207 lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \ 224 lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \
208 lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \ 225 lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \
209 lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \ 226 lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \
@@ -220,7 +237,8 @@ host/buildvm_asm.o: host/buildvm_asm.c host/buildvm.h lj_def.h lua.h luaconf.h \
220host/buildvm_fold.o: host/buildvm_fold.c host/buildvm.h lj_def.h lua.h \ 237host/buildvm_fold.o: host/buildvm_fold.c host/buildvm.h lj_def.h lua.h \
221 luaconf.h lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_ir.h lj_obj.h 238 luaconf.h lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_ir.h lj_obj.h
222host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \ 239host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \
223 lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_lib.h lj_obj.h 240 lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_bc.h lj_lib.h lj_obj.h \
241 host/buildvm_libbc.h
224host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \ 242host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \
225 luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h 243 luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h
226host/minilua.o: host/minilua.c 244host/minilua.o: host/minilua.c
diff --git a/src/host/buildvm.c b/src/host/buildvm.c
index 7687ad7a..37b20ae2 100644
--- a/src/host/buildvm.c
+++ b/src/host/buildvm.c
@@ -320,20 +320,20 @@ static void emit_vmdef(BuildCtx *ctx)
320 char buf[80]; 320 char buf[80];
321 int i; 321 int i;
322 fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n"); 322 fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n");
323 fprintf(ctx->fp, "module(...)\n\n"); 323 fprintf(ctx->fp, "return {\n\n");
324 324
325 fprintf(ctx->fp, "bcnames = \""); 325 fprintf(ctx->fp, "bcnames = \"");
326 for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]); 326 for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]);
327 fprintf(ctx->fp, "\"\n\n"); 327 fprintf(ctx->fp, "\",\n\n");
328 328
329 fprintf(ctx->fp, "irnames = \""); 329 fprintf(ctx->fp, "irnames = \"");
330 for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]); 330 for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]);
331 fprintf(ctx->fp, "\"\n\n"); 331 fprintf(ctx->fp, "\",\n\n");
332 332
333 fprintf(ctx->fp, "irfpm = { [0]="); 333 fprintf(ctx->fp, "irfpm = { [0]=");
334 for (i = 0; irfpm_names[i]; i++) 334 for (i = 0; irfpm_names[i]; i++)
335 fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i])); 335 fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i]));
336 fprintf(ctx->fp, "}\n\n"); 336 fprintf(ctx->fp, "},\n\n");
337 337
338 fprintf(ctx->fp, "irfield = { [0]="); 338 fprintf(ctx->fp, "irfield = { [0]=");
339 for (i = 0; irfield_names[i]; i++) { 339 for (i = 0; irfield_names[i]; i++) {
@@ -343,17 +343,17 @@ static void emit_vmdef(BuildCtx *ctx)
343 if (p) *p = '.'; 343 if (p) *p = '.';
344 fprintf(ctx->fp, "\"%s\", ", buf); 344 fprintf(ctx->fp, "\"%s\", ", buf);
345 } 345 }
346 fprintf(ctx->fp, "}\n\n"); 346 fprintf(ctx->fp, "},\n\n");
347 347
348 fprintf(ctx->fp, "ircall = {\n[0]="); 348 fprintf(ctx->fp, "ircall = {\n[0]=");
349 for (i = 0; ircall_names[i]; i++) 349 for (i = 0; ircall_names[i]; i++)
350 fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]); 350 fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]);
351 fprintf(ctx->fp, "}\n\n"); 351 fprintf(ctx->fp, "},\n\n");
352 352
353 fprintf(ctx->fp, "traceerr = {\n[0]="); 353 fprintf(ctx->fp, "traceerr = {\n[0]=");
354 for (i = 0; trace_errors[i]; i++) 354 for (i = 0; trace_errors[i]; i++)
355 fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]); 355 fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]);
356 fprintf(ctx->fp, "}\n\n"); 356 fprintf(ctx->fp, "},\n\n");
357} 357}
358 358
359/* -- Argument parsing ---------------------------------------------------- */ 359/* -- Argument parsing ---------------------------------------------------- */
@@ -490,6 +490,7 @@ int main(int argc, char **argv)
490 case BUILD_vmdef: 490 case BUILD_vmdef:
491 emit_vmdef(ctx); 491 emit_vmdef(ctx);
492 emit_lib(ctx); 492 emit_lib(ctx);
493 fprintf(ctx->fp, "}\n\n");
493 break; 494 break;
494 case BUILD_ffdef: 495 case BUILD_ffdef:
495 case BUILD_libdef: 496 case BUILD_libdef:
diff --git a/src/host/buildvm_lib.c b/src/host/buildvm_lib.c
index eafdab98..db95977f 100644
--- a/src/host/buildvm_lib.c
+++ b/src/host/buildvm_lib.c
@@ -5,7 +5,9 @@
5 5
6#include "buildvm.h" 6#include "buildvm.h"
7#include "lj_obj.h" 7#include "lj_obj.h"
8#include "lj_bc.h"
8#include "lj_lib.h" 9#include "lj_lib.h"
10#include "buildvm_libbc.h"
9 11
10/* Context for library definitions. */ 12/* Context for library definitions. */
11static uint8_t obuf[8192]; 13static uint8_t obuf[8192];
@@ -151,6 +153,62 @@ static void libdef_func(BuildCtx *ctx, char *p, int arg)
151 regfunc = REGFUNC_OK; 153 regfunc = REGFUNC_OK;
152} 154}
153 155
156static uint8_t *libdef_uleb128(uint8_t *p, uint32_t *vv)
157{
158 uint32_t v = *p++;
159 if (v >= 0x80) {
160 int sh = 0; v &= 0x7f;
161 do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
162 }
163 *vv = v;
164 return p;
165}
166
167static void libdef_fixupbc(uint8_t *p)
168{
169 uint32_t i, sizebc;
170 p += 4;
171 p = libdef_uleb128(p, &sizebc);
172 p = libdef_uleb128(p, &sizebc);
173 p = libdef_uleb128(p, &sizebc);
174 for (i = 0; i < sizebc; i++, p += 4) {
175 uint8_t op = p[libbc_endian ? 3 : 0];
176 uint8_t ra = p[libbc_endian ? 2 : 1];
177 uint8_t rc = p[libbc_endian ? 1 : 2];
178 uint8_t rb = p[libbc_endian ? 0 : 3];
179 if (!LJ_DUALNUM && op == BC_ISTYPE && rc == ~LJ_TNUMX+1) {
180 op = BC_ISNUM; rc++;
181 }
182 p[LJ_ENDIAN_SELECT(0, 3)] = op;
183 p[LJ_ENDIAN_SELECT(1, 2)] = ra;
184 p[LJ_ENDIAN_SELECT(2, 1)] = rc;
185 p[LJ_ENDIAN_SELECT(3, 0)] = rb;
186 }
187}
188
189static void libdef_lua(BuildCtx *ctx, char *p, int arg)
190{
191 UNUSED(arg);
192 if (ctx->mode == BUILD_libdef) {
193 int i;
194 for (i = 0; libbc_map[i].name != NULL; i++) {
195 if (!strcmp(libbc_map[i].name, p)) {
196 int ofs = libbc_map[i].ofs;
197 int len = libbc_map[i+1].ofs - ofs;
198 obuf[2]++; /* Bump hash table size. */
199 *optr++ = LIBINIT_LUA;
200 libdef_name(p, 0);
201 memcpy(optr, libbc_code + ofs, len);
202 libdef_fixupbc(optr);
203 optr += len;
204 return;
205 }
206 }
207 fprintf(stderr, "Error: missing libbc definition for %s\n", p);
208 exit(1);
209 }
210}
211
154static uint32_t find_rec(char *name) 212static uint32_t find_rec(char *name)
155{ 213{
156 char *p = (char *)obuf; 214 char *p = (char *)obuf;
@@ -277,6 +335,7 @@ static const LibDefHandler libdef_handlers[] = {
277 { "CF(", ")", libdef_func, LIBINIT_CF }, 335 { "CF(", ")", libdef_func, LIBINIT_CF },
278 { "ASM(", ")", libdef_func, LIBINIT_ASM }, 336 { "ASM(", ")", libdef_func, LIBINIT_ASM },
279 { "ASM_(", ")", libdef_func, LIBINIT_ASM_ }, 337 { "ASM_(", ")", libdef_func, LIBINIT_ASM_ },
338 { "LUA(", ")", libdef_lua, 0 },
280 { "REC(", ")", libdef_rec, 0 }, 339 { "REC(", ")", libdef_rec, 0 },
281 { "PUSH(", ")", libdef_push, 0 }, 340 { "PUSH(", ")", libdef_push, 0 },
282 { "SET(", ")", libdef_set, 0 }, 341 { "SET(", ")", libdef_set, 0 },
@@ -373,7 +432,7 @@ void emit_lib(BuildCtx *ctx)
373 "#ifndef FF_NUM_ASMFUNC\n#define FF_NUM_ASMFUNC %d\n#endif\n\n", 432 "#ifndef FF_NUM_ASMFUNC\n#define FF_NUM_ASMFUNC %d\n#endif\n\n",
374 ffasmfunc); 433 ffasmfunc);
375 } else if (ctx->mode == BUILD_vmdef) { 434 } else if (ctx->mode == BUILD_vmdef) {
376 fprintf(ctx->fp, "}\n\n"); 435 fprintf(ctx->fp, "},\n\n");
377 } else if (ctx->mode == BUILD_bcdef) { 436 } else if (ctx->mode == BUILD_bcdef) {
378 int i; 437 int i;
379 fprintf(ctx->fp, "\n};\n\n"); 438 fprintf(ctx->fp, "\n};\n\n");
diff --git a/src/host/buildvm_libbc.h b/src/host/buildvm_libbc.h
new file mode 100644
index 00000000..e96c8a53
--- /dev/null
+++ b/src/host/buildvm_libbc.h
@@ -0,0 +1,30 @@
1/* This is a generated file. DO NOT EDIT! */
2
3static const int libbc_endian = 0;
4
5static const uint8_t libbc_code[] = {
60,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0,
70,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3,
816,0,5,0,21,1,0,0,76,1,2,0,0,2,9,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3,
90,0,41,4,1,0,77,2,8,128,18,6,1,0,18,7,5,0,59,8,5,0,66,6,3,2,10,6,0,0,88,7,1,
10128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,10,0,0,0,16,16,0,12,0,16,1,9,0,43,2,
110,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,8,5,0,18,9,6,0,66,7,3,2,10,7,0,
120,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12,
130,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128,
148,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14,
150,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2,
160,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4,
172,0,76,3,2,0,75,0,1,0,0,2,0
18};
19
20static const struct { const char *name; int ofs; } libbc_map[] = {
21{"math_deg",0},
22{"math_rad",25},
23{"string_len",50},
24{"table_foreachi",69},
25{"table_foreach",136},
26{"table_getn",207},
27{"table_remove",226},
28{NULL,355}
29};
30
diff --git a/src/host/genlibbc.lua b/src/host/genlibbc.lua
new file mode 100644
index 00000000..f1e12429
--- /dev/null
+++ b/src/host/genlibbc.lua
@@ -0,0 +1,197 @@
1----------------------------------------------------------------------------
2-- Lua script to dump the bytecode of the library functions written in Lua.
3-- The resulting 'buildvm_libbc.h' is used for the build process of LuaJIT.
4----------------------------------------------------------------------------
5-- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
6-- Released under the MIT license. See Copyright Notice in luajit.h
7----------------------------------------------------------------------------
8
9local ffi = require("ffi")
10local bit = require("bit")
11local vmdef = require("jit.vmdef")
12local bcnames = vmdef.bcnames
13
14local format = string.format
15
16local isbe = (string.byte(string.dump(function() end), 5) % 2 == 1)
17
18local function usage(arg)
19 io.stderr:write("Usage: ", arg and arg[0] or "genlibbc",
20 " [-o buildvm_libbc.h] lib_*.c\n")
21 os.exit(1)
22end
23
24local function parse_arg(arg)
25 local outfile = "-"
26 if not (arg and arg[1]) then
27 usage(arg)
28 end
29 if arg[1] == "-o" then
30 outfile = arg[2]
31 if not outfile then usage(arg) end
32 table.remove(arg, 1)
33 table.remove(arg, 1)
34 end
35 return outfile
36end
37
38local function read_files(names)
39 local src = ""
40 for _,name in ipairs(names) do
41 local fp = assert(io.open(name))
42 src = src .. fp:read("*a")
43 fp:close()
44 end
45 return src
46end
47
48local function transform_lua(code)
49 local fixup = {}
50 local n = -30000
51 code = string.gsub(code, "CHECK_(%w*)%((.-)%)", function(tp, var)
52 n = n + 1
53 fixup[n] = { "CHECK", tp }
54 return format("%s=%d", var, n)
55 end)
56 code = string.gsub(code, "PAIRS%((.-)%)", function(var)
57 fixup.PAIRS = true
58 return format("nil, %s, 0", var)
59 end)
60 return "return "..code, fixup
61end
62
63local function read_uleb128(p)
64 local v = p[0]; p = p + 1
65 if v >= 128 then
66 local sh = 7; v = v - 128
67 repeat
68 local r = p[0]
69 v = v + bit.lshift(bit.band(r, 127), sh)
70 sh = sh + 7
71 p = p + 1
72 until r < 128
73 end
74 return p, v
75end
76
77-- ORDER LJ_T
78local name2itype = {
79 str = 5, func = 9, tab = 12, int = 14, num = 15
80}
81
82local BC = {}
83for i=0,#bcnames/6-1 do
84 BC[string.gsub(string.sub(bcnames, i*6+1, i*6+6), " ", "")] = i
85end
86local xop, xra = isbe and 3 or 0, isbe and 2 or 1
87local xrc, xrb = isbe and 1 or 2, isbe and 0 or 3
88
89local function fixup_dump(dump, fixup)
90 local buf = ffi.new("uint8_t[?]", #dump+1, dump)
91 local p = buf+5
92 local n, sizebc
93 p, n = read_uleb128(p)
94 local start = p
95 p = p + 4
96 p = read_uleb128(p)
97 p = read_uleb128(p)
98 p, sizebc = read_uleb128(p)
99 local rawtab = {}
100 for i=0,sizebc-1 do
101 local op = p[xop]
102 if op == BC.KSHORT then
103 local rd = p[xrc] + 256*p[xrb]
104 rd = bit.arshift(bit.lshift(rd, 16), 16)
105 local f = fixup[rd]
106 if f then
107 if f[1] == "CHECK" then
108 local tp = f[2]
109 if tp == "tab" then rawtab[p[xra]] = true end
110 p[xop] = tp == "num" and BC.ISNUM or BC.ISTYPE
111 p[xrb] = 0
112 p[xrc] = name2itype[tp]
113 else
114 error("unhandled fixup type: "..f[1])
115 end
116 end
117 elseif op == BC.TGETV then
118 if rawtab[p[xrb]] then
119 p[xop] = BC.TGETR
120 end
121 elseif op == BC.TSETV then
122 if rawtab[p[xrb]] then
123 p[xop] = BC.TSETR
124 end
125 elseif op == BC.ITERC then
126 if fixup.PAIRS then
127 p[xop] = BC.ITERN
128 end
129 end
130 p = p + 4
131 end
132 return ffi.string(start, n)
133end
134
135local function find_defs(src)
136 local defs = {}
137 for name, code in string.gmatch(src, "LJLIB_LUA%(([^)]*)%)%s*/%*(.-)%*/") do
138 local env = {}
139 local tcode, fixup = transform_lua(code)
140 local func = assert(load(tcode, "", nil, env))()
141 defs[name] = fixup_dump(string.dump(func, true), fixup)
142 defs[#defs+1] = name
143 end
144 return defs
145end
146
147local function gen_header(defs)
148 local t = {}
149 local function w(x) t[#t+1] = x end
150 w("/* This is a generated file. DO NOT EDIT! */\n\n")
151 w("static const int libbc_endian = ") w(isbe and 1 or 0) w(";\n\n")
152 local s = ""
153 for _,name in ipairs(defs) do
154 s = s .. defs[name]
155 end
156 w("static const uint8_t libbc_code[] = {\n")
157 local n = 0
158 for i=1,#s do
159 local x = string.byte(s, i)
160 w(x); w(",")
161 n = n + (x < 10 and 2 or (x < 100 and 3 or 4))
162 if n >= 75 then n = 0; w("\n") end
163 end
164 w("0\n};\n\n")
165 w("static const struct { const char *name; int ofs; } libbc_map[] = {\n")
166 local m = 0
167 for _,name in ipairs(defs) do
168 w('{"'); w(name); w('",'); w(m) w('},\n')
169 m = m + #defs[name]
170 end
171 w("{NULL,"); w(m); w("}\n};\n\n")
172 return table.concat(t)
173end
174
175local function write_file(name, data)
176 if name == "-" then
177 assert(io.write(data))
178 assert(io.flush())
179 else
180 local fp = io.open(name)
181 if fp then
182 local old = fp:read("*a")
183 fp:close()
184 if data == old then return end
185 end
186 fp = assert(io.open(name, "w"))
187 assert(fp:write(data))
188 assert(fp:close())
189 end
190end
191
192local outfile = parse_arg(arg)
193local src = read_files(arg)
194local defs = find_defs(src)
195local hdr = gen_header(defs)
196write_file(outfile, hdr)
197
diff --git a/src/jit/bc.lua b/src/jit/bc.lua
index 9df19c7b..48e06d6f 100644
--- a/src/jit/bc.lua
+++ b/src/jit/bc.lua
@@ -41,7 +41,7 @@
41 41
42-- Cache some library functions and objects. 42-- Cache some library functions and objects.
43local jit = require("jit") 43local jit = require("jit")
44assert(jit.version_num == 20003, "LuaJIT core/library version mismatch") 44assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
45local jutil = require("jit.util") 45local jutil = require("jit.util")
46local vmdef = require("jit.vmdef") 46local vmdef = require("jit.vmdef")
47local bit = require("bit") 47local bit = require("bit")
@@ -179,13 +179,12 @@ local function bcliston(outfile)
179end 179end
180 180
181-- Public module functions. 181-- Public module functions.
182module(...) 182return {
183 183 line = bcline,
184line = bcline 184 dump = bcdump,
185dump = bcdump 185 targets = bctargets,
186targets = bctargets 186 on = bcliston,
187 187 off = bclistoff,
188on = bcliston 188 start = bcliston -- For -j command line option.
189off = bclistoff 189}
190start = bcliston -- For -j command line option.
191 190
diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua
index f55bda97..fa68bf56 100644
--- a/src/jit/bcsave.lua
+++ b/src/jit/bcsave.lua
@@ -11,7 +11,7 @@
11------------------------------------------------------------------------------ 11------------------------------------------------------------------------------
12 12
13local jit = require("jit") 13local jit = require("jit")
14assert(jit.version_num == 20003, "LuaJIT core/library version mismatch") 14assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
15local bit = require("bit") 15local bit = require("bit")
16 16
17-- Symbol name prefix for LuaJIT bytecode. 17-- Symbol name prefix for LuaJIT bytecode.
@@ -653,7 +653,7 @@ end
653------------------------------------------------------------------------------ 653------------------------------------------------------------------------------
654 654
655-- Public module functions. 655-- Public module functions.
656module(...) 656return {
657 657 start = docmd -- Process -b command line option.
658start = docmd -- Process -b command line option. 658}
659 659
diff --git a/src/jit/dis_arm.lua b/src/jit/dis_arm.lua
index b3d0ebc2..a1bff844 100644
--- a/src/jit/dis_arm.lua
+++ b/src/jit/dis_arm.lua
@@ -658,7 +658,7 @@ local function disass_block(ctx, ofs, len)
658end 658end
659 659
660-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). 660-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
661local function create_(code, addr, out) 661local function create(code, addr, out)
662 local ctx = {} 662 local ctx = {}
663 ctx.code = code 663 ctx.code = code
664 ctx.addr = addr or 0 664 ctx.addr = addr or 0
@@ -670,20 +670,20 @@ local function create_(code, addr, out)
670end 670end
671 671
672-- Simple API: disassemble code (a string) at address and output via out. 672-- Simple API: disassemble code (a string) at address and output via out.
673local function disass_(code, addr, out) 673local function disass(code, addr, out)
674 create_(code, addr, out):disass() 674 create(code, addr, out):disass()
675end 675end
676 676
677-- Return register name for RID. 677-- Return register name for RID.
678local function regname_(r) 678local function regname(r)
679 if r < 16 then return map_gpr[r] end 679 if r < 16 then return map_gpr[r] end
680 return "d"..(r-16) 680 return "d"..(r-16)
681end 681end
682 682
683-- Public module functions. 683-- Public module functions.
684module(...) 684return {
685 685 create = create,
686create = create_ 686 disass = disass,
687disass = disass_ 687 regname = regname
688regname = regname_ 688}
689 689
diff --git a/src/jit/dis_mips.lua b/src/jit/dis_mips.lua
index 47a7a83b..bdd70d77 100644
--- a/src/jit/dis_mips.lua
+++ b/src/jit/dis_mips.lua
@@ -384,7 +384,7 @@ local function disass_block(ctx, ofs, len)
384end 384end
385 385
386-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). 386-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
387local function create_(code, addr, out) 387local function create(code, addr, out)
388 local ctx = {} 388 local ctx = {}
389 ctx.code = code 389 ctx.code = code
390 ctx.addr = addr or 0 390 ctx.addr = addr or 0
@@ -396,33 +396,33 @@ local function create_(code, addr, out)
396 return ctx 396 return ctx
397end 397end
398 398
399local function create_el_(code, addr, out) 399local function create_el(code, addr, out)
400 local ctx = create_(code, addr, out) 400 local ctx = create(code, addr, out)
401 ctx.get = get_le 401 ctx.get = get_le
402 return ctx 402 return ctx
403end 403end
404 404
405-- Simple API: disassemble code (a string) at address and output via out. 405-- Simple API: disassemble code (a string) at address and output via out.
406local function disass_(code, addr, out) 406local function disass(code, addr, out)
407 create_(code, addr, out):disass() 407 create(code, addr, out):disass()
408end 408end
409 409
410local function disass_el_(code, addr, out) 410local function disass_el(code, addr, out)
411 create_el_(code, addr, out):disass() 411 create_el(code, addr, out):disass()
412end 412end
413 413
414-- Return register name for RID. 414-- Return register name for RID.
415local function regname_(r) 415local function regname(r)
416 if r < 32 then return map_gpr[r] end 416 if r < 32 then return map_gpr[r] end
417 return "f"..(r-32) 417 return "f"..(r-32)
418end 418end
419 419
420-- Public module functions. 420-- Public module functions.
421module(...) 421return {
422 422 create = create,
423create = create_ 423 create_el = create_el,
424create_el = create_el_ 424 disass = disass,
425disass = disass_ 425 disass_el = disass_el,
426disass_el = disass_el_ 426 regname = regname
427regname = regname_ 427}
428 428
diff --git a/src/jit/dis_mipsel.lua b/src/jit/dis_mipsel.lua
index d19de319..ee7dd962 100644
--- a/src/jit/dis_mipsel.lua
+++ b/src/jit/dis_mipsel.lua
@@ -8,13 +8,10 @@
8-- MIPS disassembler module. All the interesting stuff is there. 8-- MIPS disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------ 9------------------------------------------------------------------------------
10 10
11local require = require 11local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
12 12return {
13module(...) 13 create = dis_mips.create_el,
14 14 disass = dis_mips.disass_el,
15local dis_mips = require(_PACKAGE.."dis_mips") 15 regname = dis_mips.regname
16 16}
17create = dis_mips.create_el
18disass = dis_mips.disass_el
19regname = dis_mips.regname
20 17
diff --git a/src/jit/dis_ppc.lua b/src/jit/dis_ppc.lua
index cfaf982f..178df1ce 100644
--- a/src/jit/dis_ppc.lua
+++ b/src/jit/dis_ppc.lua
@@ -560,7 +560,7 @@ local function disass_block(ctx, ofs, len)
560end 560end
561 561
562-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). 562-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
563local function create_(code, addr, out) 563local function create(code, addr, out)
564 local ctx = {} 564 local ctx = {}
565 ctx.code = code 565 ctx.code = code
566 ctx.addr = addr or 0 566 ctx.addr = addr or 0
@@ -572,20 +572,20 @@ local function create_(code, addr, out)
572end 572end
573 573
574-- Simple API: disassemble code (a string) at address and output via out. 574-- Simple API: disassemble code (a string) at address and output via out.
575local function disass_(code, addr, out) 575local function disass(code, addr, out)
576 create_(code, addr, out):disass() 576 create(code, addr, out):disass()
577end 577end
578 578
579-- Return register name for RID. 579-- Return register name for RID.
580local function regname_(r) 580local function regname(r)
581 if r < 32 then return map_gpr[r] end 581 if r < 32 then return map_gpr[r] end
582 return "f"..(r-32) 582 return "f"..(r-32)
583end 583end
584 584
585-- Public module functions. 585-- Public module functions.
586module(...) 586return {
587 587 create = create,
588create = create_ 588 disass = disass,
589disass = disass_ 589 regname = regname
590regname = regname_ 590}
591 591
diff --git a/src/jit/dis_x64.lua b/src/jit/dis_x64.lua
index 4f491695..a2de3242 100644
--- a/src/jit/dis_x64.lua
+++ b/src/jit/dis_x64.lua
@@ -8,13 +8,10 @@
8-- x86/x64 disassembler module. All the interesting stuff is there. 8-- x86/x64 disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------ 9------------------------------------------------------------------------------
10 10
11local require = require 11local dis_x86 = require((string.match(..., ".*%.") or "").."dis_x86")
12 12return {
13module(...) 13 create = dis_x86.create64,
14 14 disass = dis_x86.disass64,
15local dis_x86 = require(_PACKAGE.."dis_x86") 15 regname = dis_x86.regname64
16 16}
17create = dis_x86.create64
18disass = dis_x86.disass64
19regname = dis_x86.regname64
20 17
diff --git a/src/jit/dis_x86.lua b/src/jit/dis_x86.lua
index 0b865ab6..bc22e423 100644
--- a/src/jit/dis_x86.lua
+++ b/src/jit/dis_x86.lua
@@ -28,6 +28,8 @@ local type = type
28local sub, byte, format = string.sub, string.byte, string.format 28local sub, byte, format = string.sub, string.byte, string.format
29local match, gmatch, gsub = string.match, string.gmatch, string.gsub 29local match, gmatch, gsub = string.match, string.gmatch, string.gsub
30local lower, rep = string.lower, string.rep 30local lower, rep = string.lower, string.rep
31local bit = require("bit")
32local tohex = bit.tohex
31 33
32-- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on. 34-- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on.
33local map_opc1_32 = { 35local map_opc1_32 = {
@@ -532,7 +534,7 @@ local function putpat(ctx, name, pat)
532 local lo = imm % 0x1000000 534 local lo = imm % 0x1000000
533 x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo) 535 x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo)
534 else 536 else
535 x = format("0x%08x", imm) 537 x = "0x"..tohex(imm)
536 end 538 end
537 elseif p == "R" then 539 elseif p == "R" then
538 local r = byte(code, pos-1, pos-1)%8 540 local r = byte(code, pos-1, pos-1)%8
@@ -782,7 +784,7 @@ local function disass_block(ctx, ofs, len)
782end 784end
783 785
784-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). 786-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
785local function create_(code, addr, out) 787local function create(code, addr, out)
786 local ctx = {} 788 local ctx = {}
787 ctx.code = code 789 ctx.code = code
788 ctx.addr = (addr or 0) - 1 790 ctx.addr = (addr or 0) - 1
@@ -796,8 +798,8 @@ local function create_(code, addr, out)
796 return ctx 798 return ctx
797end 799end
798 800
799local function create64_(code, addr, out) 801local function create64(code, addr, out)
800 local ctx = create_(code, addr, out) 802 local ctx = create(code, addr, out)
801 ctx.x64 = true 803 ctx.x64 = true
802 ctx.map1 = map_opc1_64 804 ctx.map1 = map_opc1_64
803 ctx.aregs = map_regs.Q 805 ctx.aregs = map_regs.Q
@@ -805,32 +807,32 @@ local function create64_(code, addr, out)
805end 807end
806 808
807-- Simple API: disassemble code (a string) at address and output via out. 809-- Simple API: disassemble code (a string) at address and output via out.
808local function disass_(code, addr, out) 810local function disass(code, addr, out)
809 create_(code, addr, out):disass() 811 create(code, addr, out):disass()
810end 812end
811 813
812local function disass64_(code, addr, out) 814local function disass64(code, addr, out)
813 create64_(code, addr, out):disass() 815 create64(code, addr, out):disass()
814end 816end
815 817
816-- Return register name for RID. 818-- Return register name for RID.
817local function regname_(r) 819local function regname(r)
818 if r < 8 then return map_regs.D[r+1] end 820 if r < 8 then return map_regs.D[r+1] end
819 return map_regs.X[r-7] 821 return map_regs.X[r-7]
820end 822end
821 823
822local function regname64_(r) 824local function regname64(r)
823 if r < 16 then return map_regs.Q[r+1] end 825 if r < 16 then return map_regs.Q[r+1] end
824 return map_regs.X[r-15] 826 return map_regs.X[r-15]
825end 827end
826 828
827-- Public module functions. 829-- Public module functions.
828module(...) 830return {
829 831 create = create,
830create = create_ 832 create64 = create64,
831create64 = create64_ 833 disass = disass,
832disass = disass_ 834 disass64 = disass64,
833disass64 = disass64_ 835 regname = regname,
834regname = regname_ 836 regname64 = regname64
835regname64 = regname64_ 837}
836 838
diff --git a/src/jit/dump.lua b/src/jit/dump.lua
index 556ce883..c9016ce5 100644
--- a/src/jit/dump.lua
+++ b/src/jit/dump.lua
@@ -55,7 +55,7 @@
55 55
56-- Cache some library functions and objects. 56-- Cache some library functions and objects.
57local jit = require("jit") 57local jit = require("jit")
58assert(jit.version_num == 20003, "LuaJIT core/library version mismatch") 58assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
59local jutil = require("jit.util") 59local jutil = require("jit.util")
60local vmdef = require("jit.vmdef") 60local vmdef = require("jit.vmdef")
61local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc 61local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc
@@ -63,7 +63,7 @@ local traceinfo, traceir, tracek = jutil.traceinfo, jutil.traceir, jutil.tracek
63local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap 63local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap
64local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr 64local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr
65local bit = require("bit") 65local bit = require("bit")
66local band, shl, shr = bit.band, bit.lshift, bit.rshift 66local band, shl, shr, tohex = bit.band, bit.lshift, bit.rshift, bit.tohex
67local sub, gsub, format = string.sub, string.gsub, string.format 67local sub, gsub, format = string.sub, string.gsub, string.format
68local byte, char, rep = string.byte, string.char, string.rep 68local byte, char, rep = string.byte, string.char, string.rep
69local type, tostring = type, tostring 69local type, tostring = type, tostring
@@ -91,6 +91,7 @@ local function fillsymtab_tr(tr, nexit)
91 end 91 end
92 for i=0,nexit-1 do 92 for i=0,nexit-1 do
93 local addr = traceexitstub(tr, i) 93 local addr = traceexitstub(tr, i)
94 if addr < 0 then addr = addr + 2^32 end
94 t[addr] = tostring(i) 95 t[addr] = tostring(i)
95 end 96 end
96 local addr = traceexitstub(tr, nexit) 97 local addr = traceexitstub(tr, nexit)
@@ -104,7 +105,10 @@ local function fillsymtab(tr, nexit)
104 local ircall = vmdef.ircall 105 local ircall = vmdef.ircall
105 for i=0,#ircall do 106 for i=0,#ircall do
106 local addr = ircalladdr(i) 107 local addr = ircalladdr(i)
107 if addr ~= 0 then t[addr] = ircall[i] end 108 if addr ~= 0 then
109 if addr < 0 then addr = addr + 2^32 end
110 t[addr] = ircall[i]
111 end
108 end 112 end
109 end 113 end
110 if nexitsym == 1000000 then -- Per-trace exit stubs. 114 if nexitsym == 1000000 then -- Per-trace exit stubs.
@@ -118,6 +122,7 @@ local function fillsymtab(tr, nexit)
118 nexit = 1000000 122 nexit = 1000000
119 break 123 break
120 end 124 end
125 if addr < 0 then addr = addr + 2^32 end
121 t[addr] = tostring(i) 126 t[addr] = tostring(i)
122 end 127 end
123 nexitsym = nexit 128 nexitsym = nexit
@@ -136,6 +141,7 @@ local function dump_mcode(tr)
136 local mcode, addr, loop = tracemc(tr) 141 local mcode, addr, loop = tracemc(tr)
137 if not mcode then return end 142 if not mcode then return end
138 if not disass then disass = require("jit.dis_"..jit.arch) end 143 if not disass then disass = require("jit.dis_"..jit.arch) end
144 if addr < 0 then addr = addr + 2^32 end
139 out:write("---- TRACE ", tr, " mcode ", #mcode, "\n") 145 out:write("---- TRACE ", tr, " mcode ", #mcode, "\n")
140 local ctx = disass.create(mcode, addr, dumpwrite) 146 local ctx = disass.create(mcode, addr, dumpwrite)
141 ctx.hexdump = 0 147 ctx.hexdump = 0
@@ -270,8 +276,7 @@ local litname = {
270 ["CONV "] = setmetatable({}, { __index = function(t, mode) 276 ["CONV "] = setmetatable({}, { __index = function(t, mode)
271 local s = irtype[band(mode, 31)] 277 local s = irtype[band(mode, 31)]
272 s = irtype[band(shr(mode, 5), 31)].."."..s 278 s = irtype[band(shr(mode, 5), 31)].."."..s
273 if band(mode, 0x400) ~= 0 then s = s.." trunc" 279 if band(mode, 0x800) ~= 0 then s = s.." sext" end
274 elseif band(mode, 0x800) ~= 0 then s = s.." sext" end
275 local c = shr(mode, 14) 280 local c = shr(mode, 14)
276 if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end 281 if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end
277 t[mode] = s 282 t[mode] = s
@@ -280,6 +285,8 @@ local litname = {
280 ["FLOAD "] = vmdef.irfield, 285 ["FLOAD "] = vmdef.irfield,
281 ["FREF "] = vmdef.irfield, 286 ["FREF "] = vmdef.irfield,
282 ["FPMATH"] = vmdef.irfpm, 287 ["FPMATH"] = vmdef.irfpm,
288 ["BUFHDR"] = { [0] = "RESET", "APPEND" },
289 ["TOSTR "] = { [0] = "INT", "NUM", "CHAR" },
283} 290}
284 291
285local function ctlsub(c) 292local function ctlsub(c)
@@ -607,7 +614,7 @@ local function dump_texit(tr, ex, ngpr, nfpr, ...)
607 end 614 end
608 else 615 else
609 for i=1,ngpr do 616 for i=1,ngpr do
610 out:write(format(" %08x", regs[i])) 617 out:write(" ", tohex(regs[i]))
611 if i % 8 == 0 then out:write("\n") end 618 if i % 8 == 0 then out:write("\n") end
612 end 619 end
613 end 620 end
@@ -691,9 +698,9 @@ local function dumpon(opt, outfile)
691end 698end
692 699
693-- Public module functions. 700-- Public module functions.
694module(...) 701return {
695 702 on = dumpon,
696on = dumpon 703 off = dumpoff,
697off = dumpoff 704 start = dumpon -- For -j command line option.
698start = dumpon -- For -j command line option. 705}
699 706
diff --git a/src/jit/p.lua b/src/jit/p.lua
new file mode 100644
index 00000000..cd6a0616
--- /dev/null
+++ b/src/jit/p.lua
@@ -0,0 +1,310 @@
1----------------------------------------------------------------------------
2-- LuaJIT profiler.
3--
4-- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7--
8-- This module is a simple command line interface to the built-in
9-- low-overhead profiler of LuaJIT.
10--
11-- The lower-level API of the profiler is accessible via the "jit.profile"
12-- module or the luaJIT_profile_* C API.
13--
14-- Example usage:
15--
16-- luajit -jp myapp.lua
17-- luajit -jp=s myapp.lua
18-- luajit -jp=-s myapp.lua
19-- luajit -jp=vl myapp.lua
20-- luajit -jp=G,profile.txt myapp.lua
21--
22-- The following dump features are available:
23--
24-- f Stack dump: function name, otherwise module:line. Default mode.
25-- F Stack dump: ditto, but always prepend module.
26-- l Stack dump: module:line.
27-- <number> stack dump depth (callee < caller). Default: 1.
28-- -<number> Inverse stack dump depth (caller > callee).
29-- s Split stack dump after first stack level. Implies abs(depth) >= 2.
30-- p Show full path for module names.
31-- v Show VM states. Can be combined with stack dumps, e.g. vf or fv.
32-- z Show zones. Can be combined with stack dumps, e.g. zf or fz.
33-- r Show raw sample counts. Default: show percentages.
34-- a Annotate excerpts from source code files.
35-- A Annotate complete source code files.
36-- G Produce raw output suitable for graphical tools (e.g. flame graphs).
37-- m<number> Minimum sample percentage to be shown. Default: 3.
38-- i<number> Sampling interval in milliseconds. Default: 10.
39--
40----------------------------------------------------------------------------
41
42-- Cache some library functions and objects.
43local jit = require("jit")
44assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
45local profile = require("jit.profile")
46local vmdef = require("jit.vmdef")
47local math = math
48local pairs, ipairs, tonumber, floor = pairs, ipairs, tonumber, math.floor
49local sort, format = table.sort, string.format
50local stdout = io.stdout
51local zone -- Load jit.zone module on demand.
52
53-- Output file handle.
54local out
55
56------------------------------------------------------------------------------
57
58local prof_ud
59local prof_states, prof_split, prof_min, prof_raw, prof_fmt, prof_depth
60local prof_ann, prof_count1, prof_count2, prof_samples
61
62local map_vmmode = {
63 N = "Compiled",
64 I = "Interpreted",
65 C = "C code",
66 G = "Garbage Collector",
67 J = "JIT Compiler",
68}
69
70-- Profiler callback.
71local function prof_cb(th, samples, vmmode)
72 prof_samples = prof_samples + samples
73 local key_stack, key_stack2, key_state
74 -- Collect keys for sample.
75 if prof_states then
76 if prof_states == "v" then
77 key_state = map_vmmode[vmmode] or vmmode
78 else
79 key_state = zone:get() or "(none)"
80 end
81 end
82 if prof_fmt then
83 key_stack = profile.dumpstack(th, prof_fmt, prof_depth)
84 key_stack = key_stack:gsub("%[builtin#(%d+)%]", function(x)
85 return vmdef.ffnames[tonumber(x)]
86 end)
87 if prof_split == 2 then
88 local k1, k2 = key_stack:match("(.-) [<>] (.*)")
89 if k2 then key_stack, key_stack2 = k1, k2 end
90 elseif prof_split == 3 then
91 key_stack2 = profile.dumpstack(th, "l", 1)
92 end
93 end
94 -- Order keys.
95 local k1, k2
96 if prof_split == 1 then
97 if key_state then
98 k1 = key_state
99 if key_stack then k2 = key_stack end
100 end
101 elseif key_stack then
102 k1 = key_stack
103 if key_stack2 then k2 = key_stack2 elseif key_state then k2 = key_state end
104 end
105 -- Coalesce samples in one or two levels.
106 if k1 then
107 local t1 = prof_count1
108 t1[k1] = (t1[k1] or 0) + samples
109 if k2 then
110 local t2 = prof_count2
111 local t3 = t2[k1]
112 if not t3 then t3 = {}; t2[k1] = t3 end
113 t3[k2] = (t3[k2] or 0) + samples
114 end
115 end
116end
117
118------------------------------------------------------------------------------
119
120-- Show top N list.
121local function prof_top(count1, count2, samples, indent)
122 local t, n = {}, 0
123 for k, v in pairs(count1) do
124 n = n + 1
125 t[n] = k
126 end
127 sort(t, function(a, b) return count1[a] > count1[b] end)
128 for i=1,n do
129 local k = t[i]
130 local v = count1[k]
131 local pct = floor(v*100/samples + 0.5)
132 if pct < prof_min then break end
133 if not prof_raw then
134 out:write(format("%s%2d%% %s\n", indent, pct, k))
135 elseif prof_raw == "r" then
136 out:write(format("%s%5d %s\n", indent, v, k))
137 else
138 out:write(format("%s %d\n", k, v))
139 end
140 if count2 then
141 local r = count2[k]
142 if r then
143 prof_top(r, nil, v, (prof_split == 3 or prof_split == 1) and " -- " or
144 (prof_depth < 0 and " -> " or " <- "))
145 end
146 end
147 end
148end
149
150-- Annotate source code
151local function prof_annotate(count1, samples)
152 local files = {}
153 local ms = 0
154 for k, v in pairs(count1) do
155 local pct = floor(v*100/samples + 0.5)
156 ms = math.max(ms, v)
157 if pct >= prof_min then
158 local file, line = k:match("^(.*):(%d+)$")
159 local fl = files[file]
160 if not fl then fl = {}; files[file] = fl; files[#files+1] = file end
161 line = tonumber(line)
162 fl[line] = prof_raw and v or pct
163 end
164 end
165 sort(files)
166 local fmtv, fmtn = " %3d%% | %s\n", " | %s\n"
167 if prof_raw then
168 local n = math.max(5, math.ceil(math.log10(ms)))
169 fmtv = "%"..n.."d | %s\n"
170 fmtn = (" "):rep(n).." | %s\n"
171 end
172 local ann = prof_ann
173 for _, file in ipairs(files) do
174 local f0 = file:byte()
175 if f0 == 40 or f0 == 91 then
176 out:write(format("\n====== %s ======\n[Cannot annotate non-file]\n", file))
177 break
178 end
179 local fp, err = io.open(file)
180 if not fp then
181 out:write(format("====== ERROR: %s: %s\n", file, err))
182 break
183 end
184 out:write(format("\n====== %s ======\n", file))
185 local fl = files[file]
186 local n, show = 1, false
187 if ann ~= 0 then
188 for i=1,ann do
189 if fl[i] then show = true; out:write("@@ 1 @@\n"); break end
190 end
191 end
192 for line in fp:lines() do
193 if line:byte() == 27 then
194 out:write("[Cannot annotate bytecode file]\n")
195 break
196 end
197 local v = fl[n]
198 if ann ~= 0 then
199 local v2 = fl[n+ann]
200 if show then
201 if v2 then show = n+ann elseif v then show = n
202 elseif show+ann < n then show = false end
203 elseif v2 then
204 show = n+ann
205 out:write(format("@@ %d @@\n", n))
206 end
207 if not show then goto next end
208 end
209 if v then
210 out:write(format(fmtv, v, line))
211 else
212 out:write(format(fmtn, line))
213 end
214 ::next::
215 n = n + 1
216 end
217 fp:close()
218 end
219end
220
221------------------------------------------------------------------------------
222
223-- Finish profiling and dump result.
224local function prof_finish()
225 if prof_ud then
226 profile.stop()
227 local samples = prof_samples
228 if samples == 0 then
229 if prof_raw ~= true then out:write("[No samples collected]\n") end
230 return
231 end
232 if prof_ann then
233 prof_annotate(prof_count1, samples)
234 else
235 prof_top(prof_count1, prof_count2, samples, "")
236 end
237 prof_count1 = nil
238 prof_count2 = nil
239 prof_ud = nil
240 end
241end
242
243-- Start profiling.
244local function prof_start(mode)
245 local interval = ""
246 mode = mode:gsub("i%d*", function(s) interval = s; return "" end)
247 prof_min = 3
248 mode = mode:gsub("m(%d+)", function(s) prof_min = tonumber(s); return "" end)
249 prof_depth = 1
250 mode = mode:gsub("%-?%d+", function(s) prof_depth = tonumber(s); return "" end)
251 local m = {}
252 for c in mode:gmatch(".") do m[c] = c end
253 prof_states = m.z or m.v
254 if prof_states == "z" then zone = require("jit.zone") end
255 local scope = m.l or m.f or m.F or (prof_states and "" or "f")
256 local flags = (m.p or "")
257 prof_raw = m.r
258 if m.s then
259 prof_split = 2
260 if prof_depth == -1 or m["-"] then prof_depth = -2
261 elseif prof_depth == 1 then prof_depth = 2 end
262 elseif mode:find("[fF].*l") then
263 scope = "l"
264 prof_split = 3
265 else
266 prof_split = (scope == "" or mode:find("[zv].*[lfF]")) and 1 or 0
267 end
268 prof_ann = m.A and 0 or (m.a and 3)
269 if prof_ann then
270 scope = "l"
271 prof_fmt = "pl"
272 prof_split = 0
273 prof_depth = 1
274 elseif m.G and scope ~= "" then
275 prof_fmt = flags..scope.."Z;"
276 prof_depth = -100
277 prof_raw = true
278 prof_min = 0
279 elseif scope == "" then
280 prof_fmt = false
281 else
282 local sc = prof_split == 3 and m.f or m.F or scope
283 prof_fmt = flags..sc..(prof_depth >= 0 and "Z < " or "Z > ")
284 end
285 prof_count1 = {}
286 prof_count2 = {}
287 prof_samples = 0
288 profile.start(scope:lower()..interval, prof_cb)
289 prof_ud = newproxy(true)
290 getmetatable(prof_ud).__gc = prof_finish
291end
292
293------------------------------------------------------------------------------
294
295local function start(mode, outfile)
296 if not outfile then outfile = os.getenv("LUAJIT_PROFILEFILE") end
297 if outfile then
298 out = outfile == "-" and stdout or assert(io.open(outfile, "w"))
299 else
300 out = stdout
301 end
302 prof_start(mode or "f")
303end
304
305-- Public module functions.
306return {
307 start = start, -- For -j command line option.
308 stop = prof_finish
309}
310
diff --git a/src/jit/v.lua b/src/jit/v.lua
index 197e67c6..50909357 100644
--- a/src/jit/v.lua
+++ b/src/jit/v.lua
@@ -59,7 +59,7 @@
59 59
60-- Cache some library functions and objects. 60-- Cache some library functions and objects.
61local jit = require("jit") 61local jit = require("jit")
62assert(jit.version_num == 20003, "LuaJIT core/library version mismatch") 62assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
63local jutil = require("jit.util") 63local jutil = require("jit.util")
64local vmdef = require("jit.vmdef") 64local vmdef = require("jit.vmdef")
65local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo 65local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo
@@ -116,6 +116,9 @@ local function dump_trace(what, tr, func, pc, otr, oex)
116 if ltype == "interpreter" then 116 if ltype == "interpreter" then
117 out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n", 117 out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n",
118 tr, startex, startloc)) 118 tr, startex, startloc))
119 elseif ltype == "stitch" then
120 out:write(format("[TRACE %3s %s%s %s %s]\n",
121 tr, startex, startloc, ltype, fmtfunc(func, pc)))
119 elseif link == tr or link == 0 then 122 elseif link == tr or link == 0 then
120 out:write(format("[TRACE %3s %s%s %s]\n", 123 out:write(format("[TRACE %3s %s%s %s]\n",
121 tr, startex, startloc, ltype)) 124 tr, startex, startloc, ltype))
@@ -159,9 +162,9 @@ local function dumpon(outfile)
159end 162end
160 163
161-- Public module functions. 164-- Public module functions.
162module(...) 165return {
163 166 on = dumpon,
164on = dumpon 167 off = dumpoff,
165off = dumpoff 168 start = dumpon -- For -j command line option.
166start = dumpon -- For -j command line option. 169}
167 170
diff --git a/src/jit/zone.lua b/src/jit/zone.lua
new file mode 100644
index 00000000..7201d014
--- /dev/null
+++ b/src/jit/zone.lua
@@ -0,0 +1,45 @@
1----------------------------------------------------------------------------
2-- LuaJIT profiler zones.
3--
4-- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7--
8-- This module implements a simple hierarchical zone model.
9--
10-- Example usage:
11--
12-- local zone = require("jit.zone")
13-- zone("AI")
14-- ...
15-- zone("A*")
16-- ...
17-- print(zone:get()) --> "A*"
18-- ...
19-- zone()
20-- ...
21-- print(zone:get()) --> "AI"
22-- ...
23-- zone()
24--
25----------------------------------------------------------------------------
26
27local remove = table.remove
28
29return setmetatable({
30 flush = function(t)
31 for i=#t,1,-1 do t[i] = nil end
32 end,
33 get = function(t)
34 return t[#t]
35 end
36}, {
37 __call = function(t, zone)
38 if zone then
39 t[#t+1] = zone
40 else
41 return (assert(remove(t), "empty zone stack"))
42 end
43 end
44})
45
diff --git a/src/lib_base.c b/src/lib_base.c
index 30e88f19..713bdae5 100644
--- a/src/lib_base.c
+++ b/src/lib_base.c
@@ -32,6 +32,7 @@
32#include "lj_dispatch.h" 32#include "lj_dispatch.h"
33#include "lj_char.h" 33#include "lj_char.h"
34#include "lj_strscan.h" 34#include "lj_strscan.h"
35#include "lj_strfmt.h"
35#include "lj_lib.h" 36#include "lj_lib.h"
36 37
37/* -- Base library: checks ------------------------------------------------ */ 38/* -- Base library: checks ------------------------------------------------ */
@@ -100,7 +101,7 @@ static int ffh_pairs(lua_State *L, MMS mm)
100#endif 101#endif
101 102
102LJLIB_PUSH(lastcl) 103LJLIB_PUSH(lastcl)
103LJLIB_ASM(pairs) 104LJLIB_ASM(pairs) LJLIB_REC(xpairs 0)
104{ 105{
105 return ffh_pairs(L, MM_pairs); 106 return ffh_pairs(L, MM_pairs);
106} 107}
@@ -113,7 +114,7 @@ LJLIB_NOREGUV LJLIB_ASM(ipairs_aux) LJLIB_REC(.)
113} 114}
114 115
115LJLIB_PUSH(lastcl) 116LJLIB_PUSH(lastcl)
116LJLIB_ASM(ipairs) LJLIB_REC(.) 117LJLIB_ASM(ipairs) LJLIB_REC(xpairs 1)
117{ 118{
118 return ffh_pairs(L, MM_ipairs); 119 return ffh_pairs(L, MM_ipairs);
119} 120}
@@ -135,7 +136,7 @@ LJLIB_ASM(setmetatable) LJLIB_REC(.)
135 return FFH_RES(1); 136 return FFH_RES(1);
136} 137}
137 138
138LJLIB_CF(getfenv) 139LJLIB_CF(getfenv) LJLIB_REC(.)
139{ 140{
140 GCfunc *fn; 141 GCfunc *fn;
141 cTValue *o = L->base; 142 cTValue *o = L->base;
@@ -301,9 +302,6 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
301 return FFH_RES(1); 302 return FFH_RES(1);
302} 303}
303 304
304LJLIB_PUSH("nil")
305LJLIB_PUSH("false")
306LJLIB_PUSH("true")
307LJLIB_ASM(tostring) LJLIB_REC(.) 305LJLIB_ASM(tostring) LJLIB_REC(.)
308{ 306{
309 TValue *o = lj_lib_checkany(L, 1); 307 TValue *o = lj_lib_checkany(L, 1);
@@ -312,23 +310,10 @@ LJLIB_ASM(tostring) LJLIB_REC(.)
312 if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { 310 if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
313 copyTV(L, L->base-1, mo); /* Replace callable. */ 311 copyTV(L, L->base-1, mo); /* Replace callable. */
314 return FFH_TAILCALL; 312 return FFH_TAILCALL;
315 } else {
316 GCstr *s;
317 if (tvisnumber(o)) {
318 s = lj_str_fromnumber(L, o);
319 } else if (tvispri(o)) {
320 s = strV(lj_lib_upvalue(L, -(int32_t)itype(o)));
321 } else {
322 if (tvisfunc(o) && isffunc(funcV(o)))
323 lua_pushfstring(L, "function: builtin#%d", funcV(o)->c.ffid);
324 else
325 lua_pushfstring(L, "%s: %p", lj_typename(o), lua_topointer(L, 1));
326 /* Note: lua_pushfstring calls the GC which may invalidate o. */
327 s = strV(L->top-1);
328 }
329 setstrV(L, L->base-1, s);
330 return FFH_RES(1);
331 } 313 }
314 lj_gc_check(L);
315 setstrV(L, L->base-1, lj_strfmt_obj(L, L->base));
316 return FFH_RES(1);
332} 317}
333 318
334/* -- Base library: throw and catch errors -------------------------------- */ 319/* -- Base library: throw and catch errors -------------------------------- */
@@ -506,21 +491,13 @@ LJLIB_CF(print)
506 } 491 }
507 shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring); 492 shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring);
508 for (i = 0; i < nargs; i++) { 493 for (i = 0; i < nargs; i++) {
494 cTValue *o = &L->base[i];
495 char buf[STRFMT_MAXBUF_NUM];
509 const char *str; 496 const char *str;
510 size_t size; 497 size_t size;
511 cTValue *o = &L->base[i]; 498 MSize len;
512 if (shortcut && tvisstr(o)) { 499 if (shortcut && (str = lj_strfmt_wstrnum(buf, o, &len)) != NULL) {
513 str = strVdata(o); 500 size = len;
514 size = strV(o)->len;
515 } else if (shortcut && tvisint(o)) {
516 char buf[LJ_STR_INTBUF];
517 char *p = lj_str_bufint(buf, intV(o));
518 size = (size_t)(buf+LJ_STR_INTBUF-p);
519 str = p;
520 } else if (shortcut && tvisnum(o)) {
521 char buf[LJ_STR_NUMBUF];
522 size = lj_str_bufnum(buf, o);
523 str = buf;
524 } else { 501 } else {
525 copyTV(L, L->top+1, o); 502 copyTV(L, L->top+1, o);
526 copyTV(L, L->top, L->top-1); 503 copyTV(L, L->top, L->top-1);
@@ -642,9 +619,10 @@ static void setpc_wrap_aux(lua_State *L, GCfunc *fn);
642 619
643LJLIB_CF(coroutine_wrap) 620LJLIB_CF(coroutine_wrap)
644{ 621{
622 GCfunc *fn;
645 lj_cf_coroutine_create(L); 623 lj_cf_coroutine_create(L);
646 lj_lib_pushcc(L, lj_ffh_coroutine_wrap_aux, FF_coroutine_wrap_aux, 1); 624 fn = lj_lib_pushcc(L, lj_ffh_coroutine_wrap_aux, FF_coroutine_wrap_aux, 1);
647 setpc_wrap_aux(L, funcV(L->top-1)); 625 setpc_wrap_aux(L, fn);
648 return 1; 626 return 1;
649} 627}
650 628
diff --git a/src/lib_bit.c b/src/lib_bit.c
index 739924b0..ffdc29c1 100644
--- a/src/lib_bit.c
+++ b/src/lib_bit.c
@@ -12,26 +12,99 @@
12 12
13#include "lj_obj.h" 13#include "lj_obj.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_str.h" 15#include "lj_buf.h"
16#include "lj_strscan.h"
17#include "lj_strfmt.h"
18#if LJ_HASFFI
19#include "lj_ctype.h"
20#include "lj_cdata.h"
21#include "lj_cconv.h"
22#include "lj_carith.h"
23#endif
24#include "lj_ff.h"
16#include "lj_lib.h" 25#include "lj_lib.h"
17 26
18/* ------------------------------------------------------------------------ */ 27/* ------------------------------------------------------------------------ */
19 28
20#define LJLIB_MODULE_bit 29#define LJLIB_MODULE_bit
21 30
22LJLIB_ASM(bit_tobit) LJLIB_REC(bit_unary IR_TOBIT) 31#if LJ_HASFFI
32static int bit_result64(lua_State *L, CTypeID id, uint64_t x)
23{ 33{
34 GCcdata *cd = lj_cdata_new_(L, id, 8);
35 *(uint64_t *)cdataptr(cd) = x;
36 setcdataV(L, L->base-1, cd);
37 return FFH_RES(1);
38}
39#else
40static int32_t bit_checkbit(lua_State *L, int narg)
41{
42 TValue *o = L->base + narg-1;
43 if (!(o < L->top && lj_strscan_numberobj(o)))
44 lj_err_argt(L, narg, LUA_TNUMBER);
45 if (LJ_LIKELY(tvisint(o))) {
46 return intV(o);
47 } else {
48 int32_t i = lj_num2bit(numV(o));
49 if (LJ_DUALNUM) setintV(o, i);
50 return i;
51 }
52}
53#endif
54
55LJLIB_ASM(bit_tobit) LJLIB_REC(bit_tobit)
56{
57#if LJ_HASFFI
58 CTypeID id = 0;
59 setintV(L->base-1, (int32_t)lj_carith_check64(L, 1, &id));
60 return FFH_RES(1);
61#else
62 lj_lib_checknumber(L, 1);
63 return FFH_RETRY;
64#endif
65}
66
67LJLIB_ASM(bit_bnot) LJLIB_REC(bit_unary IR_BNOT)
68{
69#if LJ_HASFFI
70 CTypeID id = 0;
71 uint64_t x = lj_carith_check64(L, 1, &id);
72 return id ? bit_result64(L, id, ~x) : FFH_RETRY;
73#else
24 lj_lib_checknumber(L, 1); 74 lj_lib_checknumber(L, 1);
25 return FFH_RETRY; 75 return FFH_RETRY;
76#endif
77}
78
79LJLIB_ASM(bit_bswap) LJLIB_REC(bit_unary IR_BSWAP)
80{
81#if LJ_HASFFI
82 CTypeID id = 0;
83 uint64_t x = lj_carith_check64(L, 1, &id);
84 return id ? bit_result64(L, id, lj_bswap64(x)) : FFH_RETRY;
85#else
86 lj_lib_checknumber(L, 1);
87 return FFH_RETRY;
88#endif
26} 89}
27LJLIB_ASM_(bit_bnot) LJLIB_REC(bit_unary IR_BNOT)
28LJLIB_ASM_(bit_bswap) LJLIB_REC(bit_unary IR_BSWAP)
29 90
30LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL) 91LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL)
31{ 92{
93#if LJ_HASFFI
94 CTypeID id = 0, id2 = 0;
95 uint64_t x = lj_carith_check64(L, 1, &id);
96 int32_t sh = (int32_t)lj_carith_check64(L, 2, &id2);
97 if (id) {
98 x = lj_carith_shift64(x, sh, curr_func(L)->c.ffid - (int)FF_bit_lshift);
99 return bit_result64(L, id, x);
100 }
101 if (id2) setintV(L->base+1, sh);
102 return FFH_RETRY;
103#else
32 lj_lib_checknumber(L, 1); 104 lj_lib_checknumber(L, 1);
33 lj_lib_checkbit(L, 2); 105 bit_checkbit(L, 2);
34 return FFH_RETRY; 106 return FFH_RETRY;
107#endif
35} 108}
36LJLIB_ASM_(bit_rshift) LJLIB_REC(bit_shift IR_BSHR) 109LJLIB_ASM_(bit_rshift) LJLIB_REC(bit_shift IR_BSHR)
37LJLIB_ASM_(bit_arshift) LJLIB_REC(bit_shift IR_BSAR) 110LJLIB_ASM_(bit_arshift) LJLIB_REC(bit_shift IR_BSAR)
@@ -40,25 +113,58 @@ LJLIB_ASM_(bit_ror) LJLIB_REC(bit_shift IR_BROR)
40 113
41LJLIB_ASM(bit_band) LJLIB_REC(bit_nary IR_BAND) 114LJLIB_ASM(bit_band) LJLIB_REC(bit_nary IR_BAND)
42{ 115{
116#if LJ_HASFFI
117 CTypeID id = 0;
118 TValue *o = L->base, *top = L->top;
119 int i = 0;
120 do { lj_carith_check64(L, ++i, &id); } while (++o < top);
121 if (id) {
122 CTState *cts = ctype_cts(L);
123 CType *ct = ctype_get(cts, id);
124 int op = curr_func(L)->c.ffid - (int)FF_bit_bor;
125 uint64_t x, y = op >= 0 ? 0 : ~(uint64_t)0;
126 o = L->base;
127 do {
128 lj_cconv_ct_tv(cts, ct, (uint8_t *)&x, o, 0);
129 if (op < 0) y &= x; else if (op == 0) y |= x; else y ^= x;
130 } while (++o < top);
131 return bit_result64(L, id, y);
132 }
133 return FFH_RETRY;
134#else
43 int i = 0; 135 int i = 0;
44 do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top); 136 do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top);
45 return FFH_RETRY; 137 return FFH_RETRY;
138#endif
46} 139}
47LJLIB_ASM_(bit_bor) LJLIB_REC(bit_nary IR_BOR) 140LJLIB_ASM_(bit_bor) LJLIB_REC(bit_nary IR_BOR)
48LJLIB_ASM_(bit_bxor) LJLIB_REC(bit_nary IR_BXOR) 141LJLIB_ASM_(bit_bxor) LJLIB_REC(bit_nary IR_BXOR)
49 142
50/* ------------------------------------------------------------------------ */ 143/* ------------------------------------------------------------------------ */
51 144
52LJLIB_CF(bit_tohex) 145LJLIB_CF(bit_tohex) LJLIB_REC(.)
53{ 146{
54 uint32_t b = (uint32_t)lj_lib_checkbit(L, 1); 147#if LJ_HASFFI
55 int32_t i, n = L->base+1 >= L->top ? 8 : lj_lib_checkbit(L, 2); 148 CTypeID id = 0, id2 = 0;
56 const char *hexdigits = "0123456789abcdef"; 149 uint64_t b = lj_carith_check64(L, 1, &id);
57 char buf[8]; 150 int32_t n = L->base+1>=L->top ? (id ? 16 : 8) :
58 if (n < 0) { n = -n; hexdigits = "0123456789ABCDEF"; } 151 (int32_t)lj_carith_check64(L, 2, &id2);
59 if (n > 8) n = 8; 152#else
60 for (i = n; --i >= 0; ) { buf[i] = hexdigits[b & 15]; b >>= 4; } 153 uint32_t b = (uint32_t)bit_checkbit(L, 1);
61 lua_pushlstring(L, buf, (size_t)n); 154 int32_t n = L->base+1>=L->top ? 8 : bit_checkbit(L, 2);
155#endif
156 SBuf *sb = lj_buf_tmp_(L);
157 SFormat sf = (STRFMT_UINT|STRFMT_T_HEX);
158 if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; }
159 sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC);
160#if LJ_HASFFI
161 if (n < 16) b &= ((uint64_t)1 << 4*n)-1;
162#else
163 if (n < 8) b &= (1u << 4*n)-1;
164#endif
165 sb = lj_strfmt_putfxint(sb, sf, b);
166 setstrV(L, L->top-1, lj_buf_str(L, sb));
167 lj_gc_check(L);
62 return 1; 168 return 1;
63} 169}
64 170
diff --git a/src/lib_ffi.c b/src/lib_ffi.c
index e789ad6c..2d942b23 100644
--- a/src/lib_ffi.c
+++ b/src/lib_ffi.c
@@ -29,6 +29,7 @@
29#include "lj_ccall.h" 29#include "lj_ccall.h"
30#include "lj_ccallback.h" 30#include "lj_ccallback.h"
31#include "lj_clib.h" 31#include "lj_clib.h"
32#include "lj_strfmt.h"
32#include "lj_ff.h" 33#include "lj_ff.h"
33#include "lj_lib.h" 34#include "lj_lib.h"
34 35
@@ -317,7 +318,7 @@ LJLIB_CF(ffi_meta___tostring)
317 } 318 }
318 } 319 }
319 } 320 }
320 lj_str_pushf(L, msg, strdata(lj_ctype_repr(L, id, NULL)), p); 321 lj_strfmt_pushf(L, msg, strdata(lj_ctype_repr(L, id, NULL)), p);
321checkgc: 322checkgc:
322 lj_gc_check(L); 323 lj_gc_check(L);
323 return 1; 324 return 1;
@@ -506,7 +507,7 @@ LJLIB_CF(ffi_new) LJLIB_REC(.)
506 if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN) 507 if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN)
507 cd = lj_cdata_new(cts, id, sz); 508 cd = lj_cdata_new(cts, id, sz);
508 else 509 else
509 cd = lj_cdata_newv(cts, id, sz, ctype_align(info)); 510 cd = lj_cdata_newv(L, id, sz, ctype_align(info));
510 setcdataV(L, o-1, cd); /* Anchor the uninitialized cdata. */ 511 setcdataV(L, o-1, cd); /* Anchor the uninitialized cdata. */
511 lj_cconv_ct_init(cts, ct, sz, cdataptr(cd), 512 lj_cconv_ct_init(cts, ct, sz, cdataptr(cd),
512 o, (MSize)(L->top - o)); /* Initialize cdata. */ 513 o, (MSize)(L->top - o)); /* Initialize cdata. */
@@ -767,19 +768,11 @@ LJLIB_CF(ffi_gc) LJLIB_REC(.)
767 GCcdata *cd = ffi_checkcdata(L, 1); 768 GCcdata *cd = ffi_checkcdata(L, 1);
768 TValue *fin = lj_lib_checkany(L, 2); 769 TValue *fin = lj_lib_checkany(L, 2);
769 CTState *cts = ctype_cts(L); 770 CTState *cts = ctype_cts(L);
770 GCtab *t = cts->finalizer;
771 CType *ct = ctype_raw(cts, cd->ctypeid); 771 CType *ct = ctype_raw(cts, cd->ctypeid);
772 if (!(ctype_isptr(ct->info) || ctype_isstruct(ct->info) || 772 if (!(ctype_isptr(ct->info) || ctype_isstruct(ct->info) ||
773 ctype_isrefarray(ct->info))) 773 ctype_isrefarray(ct->info)))
774 lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE); 774 lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE);
775 if (gcref(t->metatable)) { /* Update finalizer table, if still enabled. */ 775 lj_cdata_setfin(L, cd, gcval(fin), itype(fin));
776 copyTV(L, lj_tab_set(L, t, L->base), fin);
777 lj_gc_anybarriert(L, t);
778 if (!tvisnil(fin))
779 cd->marked |= LJ_GC_CDATA_FIN;
780 else
781 cd->marked &= ~LJ_GC_CDATA_FIN;
782 }
783 L->top = L->base+1; /* Pass through the cdata object. */ 776 L->top = L->base+1; /* Pass through the cdata object. */
784 return 1; 777 return 1;
785} 778}
diff --git a/src/lib_io.c b/src/lib_io.c
index 04f0f739..586709d6 100644
--- a/src/lib_io.c
+++ b/src/lib_io.c
@@ -19,8 +19,10 @@
19#include "lj_obj.h" 19#include "lj_obj.h"
20#include "lj_gc.h" 20#include "lj_gc.h"
21#include "lj_err.h" 21#include "lj_err.h"
22#include "lj_buf.h"
22#include "lj_str.h" 23#include "lj_str.h"
23#include "lj_state.h" 24#include "lj_state.h"
25#include "lj_strfmt.h"
24#include "lj_ff.h" 26#include "lj_ff.h"
25#include "lj_lib.h" 27#include "lj_lib.h"
26 28
@@ -84,7 +86,7 @@ static IOFileUD *io_file_open(lua_State *L, const char *mode)
84 IOFileUD *iof = io_file_new(L); 86 IOFileUD *iof = io_file_new(L);
85 iof->fp = fopen(fname, mode); 87 iof->fp = fopen(fname, mode);
86 if (iof->fp == NULL) 88 if (iof->fp == NULL)
87 luaL_argerror(L, 1, lj_str_pushf(L, "%s: %s", fname, strerror(errno))); 89 luaL_argerror(L, 1, lj_strfmt_pushf(L, "%s: %s", fname, strerror(errno)));
88 return iof; 90 return iof;
89} 91}
90 92
@@ -145,7 +147,7 @@ static int io_file_readline(lua_State *L, FILE *fp, MSize chop)
145 MSize m = LUAL_BUFFERSIZE, n = 0, ok = 0; 147 MSize m = LUAL_BUFFERSIZE, n = 0, ok = 0;
146 char *buf; 148 char *buf;
147 for (;;) { 149 for (;;) {
148 buf = lj_str_needbuf(L, &G(L)->tmpbuf, m); 150 buf = lj_buf_tmp(L, m);
149 if (fgets(buf+n, m-n, fp) == NULL) break; 151 if (fgets(buf+n, m-n, fp) == NULL) break;
150 n += (MSize)strlen(buf+n); 152 n += (MSize)strlen(buf+n);
151 ok |= n; 153 ok |= n;
@@ -161,7 +163,7 @@ static void io_file_readall(lua_State *L, FILE *fp)
161{ 163{
162 MSize m, n; 164 MSize m, n;
163 for (m = LUAL_BUFFERSIZE, n = 0; ; m += m) { 165 for (m = LUAL_BUFFERSIZE, n = 0; ; m += m) {
164 char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m); 166 char *buf = lj_buf_tmp(L, m);
165 n += (MSize)fread(buf+n, 1, m-n, fp); 167 n += (MSize)fread(buf+n, 1, m-n, fp);
166 if (n != m) { 168 if (n != m) {
167 setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n)); 169 setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
@@ -174,7 +176,7 @@ static void io_file_readall(lua_State *L, FILE *fp)
174static int io_file_readlen(lua_State *L, FILE *fp, MSize m) 176static int io_file_readlen(lua_State *L, FILE *fp, MSize m)
175{ 177{
176 if (m) { 178 if (m) {
177 char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m); 179 char *buf = lj_buf_tmp(L, m);
178 MSize n = (MSize)fread(buf, 1, m, fp); 180 MSize n = (MSize)fread(buf, 1, m, fp);
179 setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n)); 181 setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
180 lj_gc_check(L); 182 lj_gc_check(L);
@@ -230,19 +232,12 @@ static int io_file_write(lua_State *L, FILE *fp, int start)
230 cTValue *tv; 232 cTValue *tv;
231 int status = 1; 233 int status = 1;
232 for (tv = L->base+start; tv < L->top; tv++) { 234 for (tv = L->base+start; tv < L->top; tv++) {
233 if (tvisstr(tv)) { 235 char buf[STRFMT_MAXBUF_NUM];
234 MSize len = strV(tv)->len; 236 MSize len;
235 status = status && (fwrite(strVdata(tv), 1, len, fp) == len); 237 const char *p = lj_strfmt_wstrnum(buf, tv, &len);
236 } else if (tvisint(tv)) { 238 if (!p)
237 char buf[LJ_STR_INTBUF];
238 char *p = lj_str_bufint(buf, intV(tv));
239 size_t len = (size_t)(buf+LJ_STR_INTBUF-p);
240 status = status && (fwrite(p, 1, len, fp) == len);
241 } else if (tvisnum(tv)) {
242 status = status && (fprintf(fp, LUA_NUMBER_FMT, numV(tv)) > 0);
243 } else {
244 lj_err_argt(L, (int)(tv - L->base) + 1, LUA_TSTRING); 239 lj_err_argt(L, (int)(tv - L->base) + 1, LUA_TSTRING);
245 } 240 status = status && (fwrite(p, 1, len, fp) == len);
246 } 241 }
247 if (LJ_52 && status) { 242 if (LJ_52 && status) {
248 L->top = L->base+1; 243 L->top = L->base+1;
diff --git a/src/lib_jit.c b/src/lib_jit.c
index 0fadf8b9..21a72a94 100644
--- a/src/lib_jit.c
+++ b/src/lib_jit.c
@@ -10,13 +10,17 @@
10#include "lauxlib.h" 10#include "lauxlib.h"
11#include "lualib.h" 11#include "lualib.h"
12 12
13#include "lj_arch.h"
14#include "lj_obj.h" 13#include "lj_obj.h"
14#include "lj_gc.h"
15#include "lj_err.h" 15#include "lj_err.h"
16#include "lj_debug.h" 16#include "lj_debug.h"
17#include "lj_str.h" 17#include "lj_str.h"
18#include "lj_tab.h" 18#include "lj_tab.h"
19#include "lj_state.h"
19#include "lj_bc.h" 20#include "lj_bc.h"
21#if LJ_HASFFI
22#include "lj_ctype.h"
23#endif
20#if LJ_HASJIT 24#if LJ_HASJIT
21#include "lj_ir.h" 25#include "lj_ir.h"
22#include "lj_jit.h" 26#include "lj_jit.h"
@@ -24,6 +28,7 @@
24#include "lj_iropt.h" 28#include "lj_iropt.h"
25#include "lj_target.h" 29#include "lj_target.h"
26#endif 30#endif
31#include "lj_trace.h"
27#include "lj_dispatch.h" 32#include "lj_dispatch.h"
28#include "lj_vm.h" 33#include "lj_vm.h"
29#include "lj_vmevent.h" 34#include "lj_vmevent.h"
@@ -279,7 +284,7 @@ static GCtrace *jit_checktrace(lua_State *L)
279/* Names of link types. ORDER LJ_TRLINK */ 284/* Names of link types. ORDER LJ_TRLINK */
280static const char *const jit_trlinkname[] = { 285static const char *const jit_trlinkname[] = {
281 "none", "root", "loop", "tail-recursion", "up-recursion", "down-recursion", 286 "none", "root", "loop", "tail-recursion", "up-recursion", "down-recursion",
282 "interpreter", "return" 287 "interpreter", "return", "stitch"
283}; 288};
284 289
285/* local info = jit.util.traceinfo(tr) */ 290/* local info = jit.util.traceinfo(tr) */
@@ -332,6 +337,13 @@ LJLIB_CF(jit_util_tracek)
332 slot = ir->op2; 337 slot = ir->op2;
333 ir = &T->ir[ir->op1]; 338 ir = &T->ir[ir->op1];
334 } 339 }
340#if LJ_HASFFI
341 if (ir->o == IR_KINT64 && !ctype_ctsG(G(L))) {
342 ptrdiff_t oldtop = savestack(L, L->top);
343 luaopen_ffi(L); /* Load FFI library on-demand. */
344 L->top = restorestack(L, oldtop);
345 }
346#endif
335 lj_ir_kvalue(L, L->top-2, ir); 347 lj_ir_kvalue(L, L->top-2, ir);
336 setintV(L->top-1, (int32_t)irt_type(ir->t)); 348 setintV(L->top-1, (int32_t)irt_type(ir->t));
337 if (slot == -1) 349 if (slot == -1)
@@ -416,6 +428,12 @@ LJLIB_CF(jit_util_ircalladdr)
416 428
417#include "lj_libdef.h" 429#include "lj_libdef.h"
418 430
431static int luaopen_jit_util(lua_State *L)
432{
433 LJ_LIB_REG(L, NULL, jit_util);
434 return 1;
435}
436
419/* -- jit.opt module ------------------------------------------------------ */ 437/* -- jit.opt module ------------------------------------------------------ */
420 438
421#if LJ_HASJIT 439#if LJ_HASJIT
@@ -513,6 +531,104 @@ LJLIB_CF(jit_opt_start)
513 531
514#endif 532#endif
515 533
534/* -- jit.profile module -------------------------------------------------- */
535
536#if LJ_HASPROFILE
537
538#define LJLIB_MODULE_jit_profile
539
540/* Not loaded by default, use: local profile = require("jit.profile") */
541
542static const char KEY_PROFILE_THREAD = 't';
543static const char KEY_PROFILE_FUNC = 'f';
544
545static void jit_profile_callback(lua_State *L2, lua_State *L, int samples,
546 int vmstate)
547{
548 TValue key;
549 cTValue *tv;
550 setlightudV(&key, (void *)&KEY_PROFILE_FUNC);
551 tv = lj_tab_get(L, tabV(registry(L)), &key);
552 if (tvisfunc(tv)) {
553 char vmst = (char)vmstate;
554 int status;
555 setfuncV(L2, L2->top++, funcV(tv));
556 setthreadV(L2, L2->top++, L);
557 setintV(L2->top++, samples);
558 setstrV(L2, L2->top++, lj_str_new(L2, &vmst, 1));
559 status = lua_pcall(L2, 3, 0, 0); /* callback(thread, samples, vmstate) */
560 if (status) {
561 if (G(L2)->panic) G(L2)->panic(L2);
562 exit(EXIT_FAILURE);
563 }
564 lj_trace_abort(G(L2));
565 }
566}
567
568/* profile.start(mode, cb) */
569LJLIB_CF(jit_profile_start)
570{
571 GCtab *registry = tabV(registry(L));
572 GCstr *mode = lj_lib_optstr(L, 1);
573 GCfunc *func = lj_lib_checkfunc(L, 2);
574 lua_State *L2 = lua_newthread(L); /* Thread that runs profiler callback. */
575 TValue key;
576 /* Anchor thread and function in registry. */
577 setlightudV(&key, (void *)&KEY_PROFILE_THREAD);
578 setthreadV(L, lj_tab_set(L, registry, &key), L2);
579 setlightudV(&key, (void *)&KEY_PROFILE_FUNC);
580 setfuncV(L, lj_tab_set(L, registry, &key), func);
581 lj_gc_anybarriert(L, registry);
582 luaJIT_profile_start(L, mode ? strdata(mode) : "",
583 (luaJIT_profile_callback)jit_profile_callback, L2);
584 return 0;
585}
586
587/* profile.stop() */
588LJLIB_CF(jit_profile_stop)
589{
590 GCtab *registry;
591 TValue key;
592 luaJIT_profile_stop(L);
593 registry = tabV(registry(L));
594 setlightudV(&key, (void *)&KEY_PROFILE_THREAD);
595 setnilV(lj_tab_set(L, registry, &key));
596 setlightudV(&key, (void *)&KEY_PROFILE_FUNC);
597 setnilV(lj_tab_set(L, registry, &key));
598 lj_gc_anybarriert(L, registry);
599 return 0;
600}
601
602/* dump = profile.dumpstack([thread,] fmt, depth) */
603LJLIB_CF(jit_profile_dumpstack)
604{
605 lua_State *L2 = L;
606 int arg = 0;
607 size_t len;
608 int depth;
609 GCstr *fmt;
610 const char *p;
611 if (L->top > L->base && tvisthread(L->base)) {
612 L2 = threadV(L->base);
613 arg = 1;
614 }
615 fmt = lj_lib_checkstr(L, arg+1);
616 depth = lj_lib_checkint(L, arg+2);
617 p = luaJIT_profile_dumpstack(L2, strdata(fmt), depth, &len);
618 lua_pushlstring(L, p, len);
619 return 1;
620}
621
622#include "lj_libdef.h"
623
624static int luaopen_jit_profile(lua_State *L)
625{
626 LJ_LIB_REG(L, NULL, jit_profile);
627 return 1;
628}
629
630#endif
631
516/* -- JIT compiler initialization ----------------------------------------- */ 632/* -- JIT compiler initialization ----------------------------------------- */
517 633
518#if LJ_HASJIT 634#if LJ_HASJIT
@@ -538,23 +654,17 @@ static uint32_t jit_cpudetect(lua_State *L)
538 uint32_t features[4]; 654 uint32_t features[4];
539 if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { 655 if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
540#if !LJ_HASJIT 656#if !LJ_HASJIT
541#define JIT_F_CMOV 1
542#define JIT_F_SSE2 2 657#define JIT_F_SSE2 2
543#endif 658#endif
544 flags |= ((features[3] >> 15)&1) * JIT_F_CMOV;
545 flags |= ((features[3] >> 26)&1) * JIT_F_SSE2; 659 flags |= ((features[3] >> 26)&1) * JIT_F_SSE2;
546#if LJ_HASJIT 660#if LJ_HASJIT
547 flags |= ((features[2] >> 0)&1) * JIT_F_SSE3; 661 flags |= ((features[2] >> 0)&1) * JIT_F_SSE3;
548 flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; 662 flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
549 if (vendor[2] == 0x6c65746e) { /* Intel. */ 663 if (vendor[2] == 0x6c65746e) { /* Intel. */
550 if ((features[0] & 0x0ff00f00) == 0x00000f00) /* P4. */ 664 if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */
551 flags |= JIT_F_P4; /* Currently unused. */
552 else if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */
553 flags |= JIT_F_LEA_AGU; 665 flags |= JIT_F_LEA_AGU;
554 } else if (vendor[2] == 0x444d4163) { /* AMD. */ 666 } else if (vendor[2] == 0x444d4163) { /* AMD. */
555 uint32_t fam = (features[0] & 0x0ff00f00); 667 uint32_t fam = (features[0] & 0x0ff00f00);
556 if (fam == 0x00000f00) /* K8. */
557 flags |= JIT_F_SPLIT_XMM;
558 if (fam >= 0x00000f00) /* K8, K10. */ 668 if (fam >= 0x00000f00) /* K8, K10. */
559 flags |= JIT_F_PREFER_IMUL; 669 flags |= JIT_F_PREFER_IMUL;
560 } 670 }
@@ -562,14 +672,8 @@ static uint32_t jit_cpudetect(lua_State *L)
562 } 672 }
563 /* Check for required instruction set support on x86 (unnecessary on x64). */ 673 /* Check for required instruction set support on x86 (unnecessary on x64). */
564#if LJ_TARGET_X86 674#if LJ_TARGET_X86
565#if !defined(LUAJIT_CPU_NOCMOV)
566 if (!(flags & JIT_F_CMOV))
567 luaL_error(L, "CPU not supported");
568#endif
569#if defined(LUAJIT_CPU_SSE2)
570 if (!(flags & JIT_F_SSE2)) 675 if (!(flags & JIT_F_SSE2))
571 luaL_error(L, "CPU does not support SSE2 (recompile without -DLUAJIT_CPU_SSE2)"); 676 luaL_error(L, "CPU with SSE2 required");
572#endif
573#endif 677#endif
574#elif LJ_TARGET_ARM 678#elif LJ_TARGET_ARM
575#if LJ_HASJIT 679#if LJ_HASJIT
@@ -631,11 +735,7 @@ static void jit_init(lua_State *L)
631 uint32_t flags = jit_cpudetect(L); 735 uint32_t flags = jit_cpudetect(L);
632#if LJ_HASJIT 736#if LJ_HASJIT
633 jit_State *J = L2J(L); 737 jit_State *J = L2J(L);
634#if LJ_TARGET_X86 738 J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
635 /* Silently turn off the JIT compiler on CPUs without SSE2. */
636 if ((flags & JIT_F_SSE2))
637#endif
638 J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
639 memcpy(J->param, jit_param_default, sizeof(J->param)); 739 memcpy(J->param, jit_param_default, sizeof(J->param));
640 lj_dispatch_update(G(L)); 740 lj_dispatch_update(G(L));
641#else 741#else
@@ -645,19 +745,23 @@ static void jit_init(lua_State *L)
645 745
646LUALIB_API int luaopen_jit(lua_State *L) 746LUALIB_API int luaopen_jit(lua_State *L)
647{ 747{
748 jit_init(L);
648 lua_pushliteral(L, LJ_OS_NAME); 749 lua_pushliteral(L, LJ_OS_NAME);
649 lua_pushliteral(L, LJ_ARCH_NAME); 750 lua_pushliteral(L, LJ_ARCH_NAME);
650 lua_pushinteger(L, LUAJIT_VERSION_NUM); 751 lua_pushinteger(L, LUAJIT_VERSION_NUM);
651 lua_pushliteral(L, LUAJIT_VERSION); 752 lua_pushliteral(L, LUAJIT_VERSION);
652 LJ_LIB_REG(L, LUA_JITLIBNAME, jit); 753 LJ_LIB_REG(L, LUA_JITLIBNAME, jit);
754#if LJ_HASPROFILE
755 lj_lib_prereg(L, LUA_JITLIBNAME ".profile", luaopen_jit_profile,
756 tabref(L->env));
757#endif
653#ifndef LUAJIT_DISABLE_JITUTIL 758#ifndef LUAJIT_DISABLE_JITUTIL
654 LJ_LIB_REG(L, "jit.util", jit_util); 759 lj_lib_prereg(L, LUA_JITLIBNAME ".util", luaopen_jit_util, tabref(L->env));
655#endif 760#endif
656#if LJ_HASJIT 761#if LJ_HASJIT
657 LJ_LIB_REG(L, "jit.opt", jit_opt); 762 LJ_LIB_REG(L, "jit.opt", jit_opt);
658#endif 763#endif
659 L->top -= 2; 764 L->top -= 2;
660 jit_init(L);
661 return 1; 765 return 1;
662} 766}
663 767
diff --git a/src/lib_math.c b/src/lib_math.c
index 3aa5c6b0..87ec2883 100644
--- a/src/lib_math.c
+++ b/src/lib_math.c
@@ -47,12 +47,6 @@ LJLIB_ASM_(math_tanh) LJLIB_REC(math_htrig IRCALL_tanh)
47LJLIB_ASM_(math_frexp) 47LJLIB_ASM_(math_frexp)
48LJLIB_ASM_(math_modf) LJLIB_REC(.) 48LJLIB_ASM_(math_modf) LJLIB_REC(.)
49 49
50LJLIB_PUSH(57.29577951308232)
51LJLIB_ASM_(math_deg) LJLIB_REC(math_degrad)
52
53LJLIB_PUSH(0.017453292519943295)
54LJLIB_ASM_(math_rad) LJLIB_REC(math_degrad)
55
56LJLIB_ASM(math_log) LJLIB_REC(math_log) 50LJLIB_ASM(math_log) LJLIB_REC(math_log)
57{ 51{
58 double x = lj_lib_checknum(L, 1); 52 double x = lj_lib_checknum(L, 1);
@@ -69,6 +63,9 @@ LJLIB_ASM(math_log) LJLIB_REC(math_log)
69 return FFH_RETRY; 63 return FFH_RETRY;
70} 64}
71 65
66LJLIB_LUA(math_deg) /* function(x) return x * 57.29577951308232 end */
67LJLIB_LUA(math_rad) /* function(x) return x * 0.017453292519943295 end */
68
72LJLIB_ASM(math_atan2) LJLIB_REC(.) 69LJLIB_ASM(math_atan2) LJLIB_REC(.)
73{ 70{
74 lj_lib_checknum(L, 1); 71 lj_lib_checknum(L, 1);
diff --git a/src/lib_os.c b/src/lib_os.c
index f62e8c8b..76ffcaa8 100644
--- a/src/lib_os.c
+++ b/src/lib_os.c
@@ -18,7 +18,10 @@
18#include "lualib.h" 18#include "lualib.h"
19 19
20#include "lj_obj.h" 20#include "lj_obj.h"
21#include "lj_gc.h"
21#include "lj_err.h" 22#include "lj_err.h"
23#include "lj_buf.h"
24#include "lj_str.h"
22#include "lj_lib.h" 25#include "lj_lib.h"
23 26
24#if LJ_TARGET_POSIX 27#if LJ_TARGET_POSIX
@@ -185,7 +188,7 @@ LJLIB_CF(os_date)
185#endif 188#endif
186 } 189 }
187 if (stm == NULL) { /* Invalid date? */ 190 if (stm == NULL) { /* Invalid date? */
188 setnilV(L->top-1); 191 setnilV(L->top++);
189 } else if (strcmp(s, "*t") == 0) { 192 } else if (strcmp(s, "*t") == 0) {
190 lua_createtable(L, 0, 9); /* 9 = number of fields */ 193 lua_createtable(L, 0, 9); /* 9 = number of fields */
191 setfield(L, "sec", stm->tm_sec); 194 setfield(L, "sec", stm->tm_sec);
@@ -197,23 +200,25 @@ LJLIB_CF(os_date)
197 setfield(L, "wday", stm->tm_wday+1); 200 setfield(L, "wday", stm->tm_wday+1);
198 setfield(L, "yday", stm->tm_yday+1); 201 setfield(L, "yday", stm->tm_yday+1);
199 setboolfield(L, "isdst", stm->tm_isdst); 202 setboolfield(L, "isdst", stm->tm_isdst);
200 } else { 203 } else if (*s) {
201 char cc[3]; 204 SBuf *sb = &G(L)->tmpbuf;
202 luaL_Buffer b; 205 MSize sz = 0;
203 cc[0] = '%'; cc[2] = '\0'; 206 const char *q;
204 luaL_buffinit(L, &b); 207 for (q = s; *q; q++)
205 for (; *s; s++) { 208 sz += (*q == '%') ? 30 : 1; /* Overflow doesn't matter. */
206 if (*s != '%' || *(s + 1) == '\0') { /* No conversion specifier? */ 209 setsbufL(sb, L);
207 luaL_addchar(&b, *s); 210 for (;;) {
208 } else { 211 char *buf = lj_buf_need(sb, sz);
209 size_t reslen; 212 size_t len = strftime(buf, sbufsz(sb), s, stm);
210 char buff[200]; /* Should be big enough for any conversion result. */ 213 if (len) {
211 cc[1] = *(++s); 214 setstrV(L, L->top++, lj_str_new(L, buf, len));
212 reslen = strftime(buff, sizeof(buff), cc, stm); 215 lj_gc_check(L);
213 luaL_addlstring(&b, buff, reslen); 216 break;
214 } 217 }
218 sz += (sz|1);
215 } 219 }
216 luaL_pushresult(&b); 220 } else {
221 setstrV(L, L->top++, &G(L)->strempty);
217 } 222 }
218 return 1; 223 return 1;
219} 224}
diff --git a/src/lib_package.c b/src/lib_package.c
index e3410390..d3229110 100644
--- a/src/lib_package.c
+++ b/src/lib_package.c
@@ -226,7 +226,7 @@ static int ll_loadfunc(lua_State *L, const char *path, const char *name, int r)
226 const char *bcdata = ll_bcsym(*reg, mksymname(L, name, SYMPREFIX_BC)); 226 const char *bcdata = ll_bcsym(*reg, mksymname(L, name, SYMPREFIX_BC));
227 lua_pop(L, 1); 227 lua_pop(L, 1);
228 if (bcdata) { 228 if (bcdata) {
229 if (luaL_loadbuffer(L, bcdata, ~(size_t)0, name) != 0) 229 if (luaL_loadbuffer(L, bcdata, LJ_MAX_MEM, name) != 0)
230 return PACKAGE_ERR_LOAD; 230 return PACKAGE_ERR_LOAD;
231 return 0; 231 return 0;
232 } 232 }
@@ -383,7 +383,7 @@ static int lj_cf_package_loader_preload(lua_State *L)
383 if (lua_isnil(L, -1)) { /* Not found? */ 383 if (lua_isnil(L, -1)) { /* Not found? */
384 const char *bcname = mksymname(L, name, SYMPREFIX_BC); 384 const char *bcname = mksymname(L, name, SYMPREFIX_BC);
385 const char *bcdata = ll_bcsym(NULL, bcname); 385 const char *bcdata = ll_bcsym(NULL, bcname);
386 if (bcdata == NULL || luaL_loadbuffer(L, bcdata, ~(size_t)0, name) != 0) 386 if (bcdata == NULL || luaL_loadbuffer(L, bcdata, LJ_MAX_MEM, name) != 0)
387 lua_pushfstring(L, "\n\tno field package.preload['%s']", name); 387 lua_pushfstring(L, "\n\tno field package.preload['%s']", name);
388 } 388 }
389 return 1; 389 return 1;
diff --git a/src/lib_string.c b/src/lib_string.c
index 9ec6d0c3..6ca7a76b 100644
--- a/src/lib_string.c
+++ b/src/lib_string.c
@@ -6,8 +6,6 @@
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h 6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/ 7*/
8 8
9#include <stdio.h>
10
11#define lib_string_c 9#define lib_string_c
12#define LUA_LIB 10#define LUA_LIB
13 11
@@ -18,6 +16,7 @@
18#include "lj_obj.h" 16#include "lj_obj.h"
19#include "lj_gc.h" 17#include "lj_gc.h"
20#include "lj_err.h" 18#include "lj_err.h"
19#include "lj_buf.h"
21#include "lj_str.h" 20#include "lj_str.h"
22#include "lj_tab.h" 21#include "lj_tab.h"
23#include "lj_meta.h" 22#include "lj_meta.h"
@@ -25,17 +24,19 @@
25#include "lj_ff.h" 24#include "lj_ff.h"
26#include "lj_bcdump.h" 25#include "lj_bcdump.h"
27#include "lj_char.h" 26#include "lj_char.h"
27#include "lj_strfmt.h"
28#include "lj_lib.h" 28#include "lj_lib.h"
29 29
30/* ------------------------------------------------------------------------ */ 30/* ------------------------------------------------------------------------ */
31 31
32#define LJLIB_MODULE_string 32#define LJLIB_MODULE_string
33 33
34LJLIB_ASM(string_len) LJLIB_REC(.) 34LJLIB_LUA(string_len) /*
35{ 35 function(s)
36 lj_lib_checkstr(L, 1); 36 CHECK_str(s)
37 return FFH_RETRY; 37 return #s
38} 38 end
39*/
39 40
40LJLIB_ASM(string_byte) LJLIB_REC(string_range 0) 41LJLIB_ASM(string_byte) LJLIB_REC(string_range 0)
41{ 42{
@@ -61,10 +62,10 @@ LJLIB_ASM(string_byte) LJLIB_REC(string_range 0)
61 return FFH_RES(n); 62 return FFH_RES(n);
62} 63}
63 64
64LJLIB_ASM(string_char) 65LJLIB_ASM(string_char) LJLIB_REC(.)
65{ 66{
66 int i, nargs = (int)(L->top - L->base); 67 int i, nargs = (int)(L->top - L->base);
67 char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, (MSize)nargs); 68 char *buf = lj_buf_tmp(L, (MSize)nargs);
68 for (i = 1; i <= nargs; i++) { 69 for (i = 1; i <= nargs; i++) {
69 int32_t k = lj_lib_checkint(L, i); 70 int32_t k = lj_lib_checkint(L, i);
70 if (!checku8(k)) 71 if (!checku8(k))
@@ -83,68 +84,38 @@ LJLIB_ASM(string_sub) LJLIB_REC(string_range 1)
83 return FFH_RETRY; 84 return FFH_RETRY;
84} 85}
85 86
86LJLIB_ASM(string_rep) 87LJLIB_CF(string_rep) LJLIB_REC(.)
87{ 88{
88 GCstr *s = lj_lib_checkstr(L, 1); 89 GCstr *s = lj_lib_checkstr(L, 1);
89 int32_t k = lj_lib_checkint(L, 2); 90 int32_t rep = lj_lib_checkint(L, 2);
90 GCstr *sep = lj_lib_optstr(L, 3); 91 GCstr *sep = lj_lib_optstr(L, 3);
91 int32_t len = (int32_t)s->len; 92 SBuf *sb = lj_buf_tmp_(L);
92 global_State *g = G(L); 93 if (sep && rep > 1) {
93 int64_t tlen; 94 GCstr *s2 = lj_buf_cat2str(L, sep, s);
94 const char *src; 95 lj_buf_reset(sb);
95 char *buf; 96 lj_buf_putstr(sb, s);
96 if (k <= 0) { 97 s = s2;
97 empty: 98 rep--;
98 setstrV(L, L->base-1, &g->strempty);
99 return FFH_RES(1);
100 } 99 }
101 if (sep) { 100 sb = lj_buf_putstr_rep(sb, s, rep);
102 tlen = (int64_t)len + sep->len; 101 setstrV(L, L->top-1, lj_buf_str(L, sb));
103 if (tlen > LJ_MAX_STR) 102 lj_gc_check(L);
104 lj_err_caller(L, LJ_ERR_STROV); 103 return 1;
105 tlen *= k;
106 if (tlen > LJ_MAX_STR)
107 lj_err_caller(L, LJ_ERR_STROV);
108 } else {
109 tlen = (int64_t)k * len;
110 if (tlen > LJ_MAX_STR)
111 lj_err_caller(L, LJ_ERR_STROV);
112 }
113 if (tlen == 0) goto empty;
114 buf = lj_str_needbuf(L, &g->tmpbuf, (MSize)tlen);
115 src = strdata(s);
116 if (sep) {
117 tlen -= sep->len; /* Ignore trailing separator. */
118 if (k > 1) { /* Paste one string and one separator. */
119 int32_t i;
120 i = 0; while (i < len) *buf++ = src[i++];
121 src = strdata(sep); len = sep->len;
122 i = 0; while (i < len) *buf++ = src[i++];
123 src = g->tmpbuf.buf; len += s->len; k--; /* Now copy that k-1 times. */
124 }
125 }
126 do {
127 int32_t i = 0;
128 do { *buf++ = src[i++]; } while (i < len);
129 } while (--k > 0);
130 setstrV(L, L->base-1, lj_str_new(L, g->tmpbuf.buf, (size_t)tlen));
131 return FFH_RES(1);
132} 104}
133 105
134LJLIB_ASM(string_reverse) 106LJLIB_ASM(string_reverse) LJLIB_REC(string_op IRCALL_lj_buf_putstr_reverse)
135{ 107{
136 GCstr *s = lj_lib_checkstr(L, 1); 108 lj_lib_checkstr(L, 1);
137 lj_str_needbuf(L, &G(L)->tmpbuf, s->len);
138 return FFH_RETRY; 109 return FFH_RETRY;
139} 110}
140LJLIB_ASM_(string_lower) 111LJLIB_ASM_(string_lower) LJLIB_REC(string_op IRCALL_lj_buf_putstr_lower)
141LJLIB_ASM_(string_upper) 112LJLIB_ASM_(string_upper) LJLIB_REC(string_op IRCALL_lj_buf_putstr_upper)
142 113
143/* ------------------------------------------------------------------------ */ 114/* ------------------------------------------------------------------------ */
144 115
145static int writer_buf(lua_State *L, const void *p, size_t size, void *b) 116static int writer_buf(lua_State *L, const void *p, size_t size, void *sb)
146{ 117{
147 luaL_addlstring((luaL_Buffer *)b, (const char *)p, size); 118 lj_buf_putmem((SBuf *)sb, p, (MSize)size);
148 UNUSED(L); 119 UNUSED(L);
149 return 0; 120 return 0;
150} 121}
@@ -153,12 +124,12 @@ LJLIB_CF(string_dump)
153{ 124{
154 GCfunc *fn = lj_lib_checkfunc(L, 1); 125 GCfunc *fn = lj_lib_checkfunc(L, 1);
155 int strip = L->base+1 < L->top && tvistruecond(L->base+1); 126 int strip = L->base+1 < L->top && tvistruecond(L->base+1);
156 luaL_Buffer b; 127 SBuf *sb = lj_buf_tmp_(L); /* Assumes lj_bcwrite() doesn't use tmpbuf. */
157 L->top = L->base+1; 128 L->top = L->base+1;
158 luaL_buffinit(L, &b); 129 if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, sb, strip))
159 if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, &b, strip))
160 lj_err_caller(L, LJ_ERR_STRDUMP); 130 lj_err_caller(L, LJ_ERR_STRDUMP);
161 luaL_pushresult(&b); 131 setstrV(L, L->top-1, lj_buf_str(L, sb));
132 lj_gc_check(L);
162 return 1; 133 return 1;
163} 134}
164 135
@@ -183,7 +154,6 @@ typedef struct MatchState {
183} MatchState; 154} MatchState;
184 155
185#define L_ESC '%' 156#define L_ESC '%'
186#define SPECIALS "^$*+?.([%-"
187 157
188static int check_capture(MatchState *ms, int l) 158static int check_capture(MatchState *ms, int l)
189{ 159{
@@ -450,30 +420,6 @@ static const char *match(MatchState *ms, const char *s, const char *p)
450 return s; 420 return s;
451} 421}
452 422
453static const char *lmemfind(const char *s1, size_t l1,
454 const char *s2, size_t l2)
455{
456 if (l2 == 0) {
457 return s1; /* empty strings are everywhere */
458 } else if (l2 > l1) {
459 return NULL; /* avoids a negative `l1' */
460 } else {
461 const char *init; /* to search for a `*s2' inside `s1' */
462 l2--; /* 1st char will be checked by `memchr' */
463 l1 = l1-l2; /* `s2' cannot be found after that */
464 while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) {
465 init++; /* 1st char is already checked */
466 if (memcmp(init, s2+1, l2) == 0) {
467 return init-1;
468 } else { /* correct `l1' and `s1' to try again */
469 l1 -= (size_t)(init-s1);
470 s1 = init;
471 }
472 }
473 return NULL; /* not found */
474 }
475}
476
477static void push_onecapture(MatchState *ms, int i, const char *s, const char *e) 423static void push_onecapture(MatchState *ms, int i, const char *s, const char *e)
478{ 424{
479 if (i >= ms->level) { 425 if (i >= ms->level) {
@@ -501,64 +447,60 @@ static int push_captures(MatchState *ms, const char *s, const char *e)
501 return nlevels; /* number of strings pushed */ 447 return nlevels; /* number of strings pushed */
502} 448}
503 449
504static ptrdiff_t posrelat(ptrdiff_t pos, size_t len)
505{
506 /* relative string position: negative means back from end */
507 if (pos < 0) pos += (ptrdiff_t)len + 1;
508 return (pos >= 0) ? pos : 0;
509}
510
511static int str_find_aux(lua_State *L, int find) 450static int str_find_aux(lua_State *L, int find)
512{ 451{
513 size_t l1, l2; 452 GCstr *s = lj_lib_checkstr(L, 1);
514 const char *s = luaL_checklstring(L, 1, &l1); 453 GCstr *p = lj_lib_checkstr(L, 2);
515 const char *p = luaL_checklstring(L, 2, &l2); 454 int32_t start = lj_lib_optint(L, 3, 1);
516 ptrdiff_t init = posrelat(luaL_optinteger(L, 3, 1), l1) - 1; 455 MSize st;
517 if (init < 0) { 456 if (start < 0) start += (int32_t)s->len; else start--;
518 init = 0; 457 if (start < 0) start = 0;
519 } else if ((size_t)(init) > l1) { 458 st = (MSize)start;
459 if (st > s->len) {
520#if LJ_52 460#if LJ_52
521 setnilV(L->top-1); 461 setnilV(L->top-1);
522 return 1; 462 return 1;
523#else 463#else
524 init = (ptrdiff_t)l1; 464 st = s->len;
525#endif 465#endif
526 } 466 }
527 if (find && (lua_toboolean(L, 4) || /* explicit request? */ 467 if (find && ((L->base+3 < L->top && tvistruecond(L->base+3)) ||
528 strpbrk(p, SPECIALS) == NULL)) { /* or no special characters? */ 468 !lj_str_haspattern(p))) { /* Search for fixed string. */
529 /* do a plain search */ 469 const char *q = lj_str_find(strdata(s)+st, strdata(p), s->len-st, p->len);
530 const char *s2 = lmemfind(s+init, l1-(size_t)init, p, l2); 470 if (q) {
531 if (s2) { 471 setintV(L->top-2, (int32_t)(q-strdata(s)) + 1);
532 lua_pushinteger(L, s2-s+1); 472 setintV(L->top-1, (int32_t)(q-strdata(s)) + (int32_t)p->len);
533 lua_pushinteger(L, s2-s+(ptrdiff_t)l2);
534 return 2; 473 return 2;
535 } 474 }
536 } else { 475 } else { /* Search for pattern. */
537 MatchState ms; 476 MatchState ms;
538 int anchor = (*p == '^') ? (p++, 1) : 0; 477 const char *pstr = strdata(p);
539 const char *s1=s+init; 478 const char *sstr = strdata(s) + st;
479 int anchor = 0;
480 if (*pstr == '^') { pstr++; anchor = 1; }
540 ms.L = L; 481 ms.L = L;
541 ms.src_init = s; 482 ms.src_init = strdata(s);
542 ms.src_end = s+l1; 483 ms.src_end = strdata(s) + s->len;
543 do { 484 do { /* Loop through string and try to match the pattern. */
544 const char *res; 485 const char *q;
545 ms.level = ms.depth = 0; 486 ms.level = ms.depth = 0;
546 if ((res=match(&ms, s1, p)) != NULL) { 487 q = match(&ms, sstr, pstr);
488 if (q) {
547 if (find) { 489 if (find) {
548 lua_pushinteger(L, s1-s+1); /* start */ 490 setintV(L->top++, (int32_t)(sstr-(strdata(s)-1)));
549 lua_pushinteger(L, res-s); /* end */ 491 setintV(L->top++, (int32_t)(q-strdata(s)));
550 return push_captures(&ms, NULL, 0) + 2; 492 return push_captures(&ms, NULL, NULL) + 2;
551 } else { 493 } else {
552 return push_captures(&ms, s1, res); 494 return push_captures(&ms, sstr, q);
553 } 495 }
554 } 496 }
555 } while (s1++ < ms.src_end && !anchor); 497 } while (sstr++ < ms.src_end && !anchor);
556 } 498 }
557 lua_pushnil(L); /* not found */ 499 setnilV(L->top-1); /* Not found. */
558 return 1; 500 return 1;
559} 501}
560 502
561LJLIB_CF(string_find) 503LJLIB_CF(string_find) LJLIB_REC(.)
562{ 504{
563 return str_find_aux(L, 1); 505 return str_find_aux(L, 1);
564} 506}
@@ -698,221 +640,91 @@ LJLIB_CF(string_gsub)
698 640
699/* ------------------------------------------------------------------------ */ 641/* ------------------------------------------------------------------------ */
700 642
701/* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */ 643/* Emulate tostring() inline. */
702#define MAX_FMTITEM 512 644static GCstr *string_fmt_tostring(lua_State *L, int arg, int retry)
703/* valid flags in a format specification */
704#define FMT_FLAGS "-+ #0"
705/*
706** maximum size of each format specification (such as '%-099.99d')
707** (+10 accounts for %99.99x plus margin of error)
708*/
709#define MAX_FMTSPEC (sizeof(FMT_FLAGS) + sizeof(LUA_INTFRMLEN) + 10)
710
711static void addquoted(lua_State *L, luaL_Buffer *b, int arg)
712{
713 GCstr *str = lj_lib_checkstr(L, arg);
714 int32_t len = (int32_t)str->len;
715 const char *s = strdata(str);
716 luaL_addchar(b, '"');
717 while (len--) {
718 uint32_t c = uchar(*s);
719 if (c == '"' || c == '\\' || c == '\n') {
720 luaL_addchar(b, '\\');
721 } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */
722 uint32_t d;
723 luaL_addchar(b, '\\');
724 if (c >= 100 || lj_char_isdigit(uchar(s[1]))) {
725 luaL_addchar(b, '0'+(c >= 100)); if (c >= 100) c -= 100;
726 goto tens;
727 } else if (c >= 10) {
728 tens:
729 d = (c * 205) >> 11; c -= d * 10; luaL_addchar(b, '0'+d);
730 }
731 c += '0';
732 }
733 luaL_addchar(b, c);
734 s++;
735 }
736 luaL_addchar(b, '"');
737}
738
739static const char *scanformat(lua_State *L, const char *strfrmt, char *form)
740{
741 const char *p = strfrmt;
742 while (*p != '\0' && strchr(FMT_FLAGS, *p) != NULL) p++; /* skip flags */
743 if ((size_t)(p - strfrmt) >= sizeof(FMT_FLAGS))
744 lj_err_caller(L, LJ_ERR_STRFMTR);
745 if (lj_char_isdigit(uchar(*p))) p++; /* skip width */
746 if (lj_char_isdigit(uchar(*p))) p++; /* (2 digits at most) */
747 if (*p == '.') {
748 p++;
749 if (lj_char_isdigit(uchar(*p))) p++; /* skip precision */
750 if (lj_char_isdigit(uchar(*p))) p++; /* (2 digits at most) */
751 }
752 if (lj_char_isdigit(uchar(*p)))
753 lj_err_caller(L, LJ_ERR_STRFMTW);
754 *(form++) = '%';
755 strncpy(form, strfrmt, (size_t)(p - strfrmt + 1));
756 form += p - strfrmt + 1;
757 *form = '\0';
758 return p;
759}
760
761static void addintlen(char *form)
762{
763 size_t l = strlen(form);
764 char spec = form[l - 1];
765 strcpy(form + l - 1, LUA_INTFRMLEN);
766 form[l + sizeof(LUA_INTFRMLEN) - 2] = spec;
767 form[l + sizeof(LUA_INTFRMLEN) - 1] = '\0';
768}
769
770static unsigned LUA_INTFRM_T num2intfrm(lua_State *L, int arg)
771{
772 if (sizeof(LUA_INTFRM_T) == 4) {
773 return (LUA_INTFRM_T)lj_lib_checkbit(L, arg);
774 } else {
775 cTValue *o;
776 lj_lib_checknumber(L, arg);
777 o = L->base+arg-1;
778 if (tvisint(o))
779 return (LUA_INTFRM_T)intV(o);
780 else
781 return (LUA_INTFRM_T)numV(o);
782 }
783}
784
785static unsigned LUA_INTFRM_T num2uintfrm(lua_State *L, int arg)
786{
787 if (sizeof(LUA_INTFRM_T) == 4) {
788 return (unsigned LUA_INTFRM_T)lj_lib_checkbit(L, arg);
789 } else {
790 cTValue *o;
791 lj_lib_checknumber(L, arg);
792 o = L->base+arg-1;
793 if (tvisint(o))
794 return (unsigned LUA_INTFRM_T)intV(o);
795 else if ((int32_t)o->u32.hi < 0)
796 return (unsigned LUA_INTFRM_T)(LUA_INTFRM_T)numV(o);
797 else
798 return (unsigned LUA_INTFRM_T)numV(o);
799 }
800}
801
802static GCstr *meta_tostring(lua_State *L, int arg)
803{ 645{
804 TValue *o = L->base+arg-1; 646 TValue *o = L->base+arg-1;
805 cTValue *mo; 647 cTValue *mo;
806 lua_assert(o < L->top); /* Caller already checks for existence. */ 648 lua_assert(o < L->top); /* Caller already checks for existence. */
807 if (LJ_LIKELY(tvisstr(o))) 649 if (LJ_LIKELY(tvisstr(o)))
808 return strV(o); 650 return strV(o);
809 if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { 651 if (retry != 2 && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
810 copyTV(L, L->top++, mo); 652 copyTV(L, L->top++, mo);
811 copyTV(L, L->top++, o); 653 copyTV(L, L->top++, o);
812 lua_call(L, 1, 1); 654 lua_call(L, 1, 1);
813 L->top--; 655 copyTV(L, L->base+arg-1, --L->top);
814 if (tvisstr(L->top)) 656 return NULL; /* Buffer may be overwritten, retry. */
815 return strV(L->top);
816 o = L->base+arg-1;
817 copyTV(L, o, L->top);
818 }
819 if (tvisnumber(o)) {
820 return lj_str_fromnumber(L, o);
821 } else if (tvisnil(o)) {
822 return lj_str_newlit(L, "nil");
823 } else if (tvisfalse(o)) {
824 return lj_str_newlit(L, "false");
825 } else if (tvistrue(o)) {
826 return lj_str_newlit(L, "true");
827 } else {
828 if (tvisfunc(o) && isffunc(funcV(o)))
829 lj_str_pushf(L, "function: builtin#%d", funcV(o)->c.ffid);
830 else
831 lj_str_pushf(L, "%s: %p", lj_typename(o), lua_topointer(L, arg));
832 L->top--;
833 return strV(L->top);
834 } 657 }
835} 658 return lj_strfmt_obj(L, o);
836 659}
837LJLIB_CF(string_format) 660
838{ 661LJLIB_CF(string_format) LJLIB_REC(.)
839 int arg = 1, top = (int)(L->top - L->base); 662{
840 GCstr *fmt = lj_lib_checkstr(L, arg); 663 int arg, top = (int)(L->top - L->base);
841 const char *strfrmt = strdata(fmt); 664 GCstr *fmt;
842 const char *strfrmt_end = strfrmt + fmt->len; 665 SBuf *sb;
843 luaL_Buffer b; 666 FormatState fs;
844 luaL_buffinit(L, &b); 667 SFormat sf;
845 while (strfrmt < strfrmt_end) { 668 int retry = 0;
846 if (*strfrmt != L_ESC) { 669again:
847 luaL_addchar(&b, *strfrmt++); 670 arg = 1;
848 } else if (*++strfrmt == L_ESC) { 671 sb = lj_buf_tmp_(L);
849 luaL_addchar(&b, *strfrmt++); /* %% */ 672 fmt = lj_lib_checkstr(L, arg);
850 } else { /* format item */ 673 lj_strfmt_init(&fs, strdata(fmt), fmt->len);
851 char form[MAX_FMTSPEC]; /* to store the format (`%...') */ 674 while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
852 char buff[MAX_FMTITEM]; /* to store the formatted item */ 675 if (sf == STRFMT_LIT) {
676 lj_buf_putmem(sb, fs.str, fs.len);
677 } else if (sf == STRFMT_ERR) {
678 lj_err_callerv(L, LJ_ERR_STRFMT, strdata(lj_str_new(L, fs.str, fs.len)));
679 } else {
853 if (++arg > top) 680 if (++arg > top)
854 luaL_argerror(L, arg, lj_obj_typename[0]); 681 luaL_argerror(L, arg, lj_obj_typename[0]);
855 strfrmt = scanformat(L, strfrmt, form); 682 switch (STRFMT_TYPE(sf)) {
856 switch (*strfrmt++) { 683 case STRFMT_INT:
857 case 'c': 684 if (tvisint(L->base+arg-1)) {
858 sprintf(buff, form, lj_lib_checkint(L, arg)); 685 int32_t k = intV(L->base+arg-1);
686 if (sf == STRFMT_INT)
687 lj_strfmt_putint(sb, k); /* Shortcut for plain %d. */
688 else
689 lj_strfmt_putfxint(sb, sf, k);
690 } else {
691 lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg));
692 }
859 break; 693 break;
860 case 'd': case 'i': 694 case STRFMT_UINT:
861 addintlen(form); 695 if (tvisint(L->base+arg-1))
862 sprintf(buff, form, num2intfrm(L, arg)); 696 lj_strfmt_putfxint(sb, sf, intV(L->base+arg-1));
697 else
698 lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg));
863 break; 699 break;
864 case 'o': case 'u': case 'x': case 'X': 700 case STRFMT_NUM:
865 addintlen(form); 701 lj_strfmt_putfnum(sb, sf, lj_lib_checknum(L, arg));
866 sprintf(buff, form, num2uintfrm(L, arg));
867 break; 702 break;
868 case 'e': case 'E': case 'f': case 'g': case 'G': case 'a': case 'A': { 703 case STRFMT_STR: {
869 TValue tv; 704 GCstr *str = string_fmt_tostring(L, arg, retry);
870 tv.n = lj_lib_checknum(L, arg); 705 if (str == NULL)
871 if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) { 706 retry = 1;
872 /* Canonicalize output of non-finite values. */ 707 else if ((sf & STRFMT_T_QUOTED))
873 char *p, nbuf[LJ_STR_NUMBUF]; 708 lj_strfmt_putquoted(sb, str); /* No formatting. */
874 size_t len = lj_str_bufnum(nbuf, &tv); 709 else
875 if (strfrmt[-1] < 'a') { 710 lj_strfmt_putfstr(sb, sf, str);
876 nbuf[len-3] = nbuf[len-3] - 0x20;
877 nbuf[len-2] = nbuf[len-2] - 0x20;
878 nbuf[len-1] = nbuf[len-1] - 0x20;
879 }
880 nbuf[len] = '\0';
881 for (p = form; *p < 'A' && *p != '.'; p++) ;
882 *p++ = 's'; *p = '\0';
883 sprintf(buff, form, nbuf);
884 break;
885 }
886 sprintf(buff, form, (double)tv.n);
887 break; 711 break;
888 } 712 }
889 case 'q': 713 case STRFMT_CHAR:
890 addquoted(L, &b, arg); 714 lj_strfmt_putfchar(sb, sf, lj_lib_checkint(L, arg));
891 continue; 715 break;
892 case 'p': 716 case STRFMT_PTR: /* No formatting. */
893 lj_str_pushf(L, "%p", lua_topointer(L, arg)); 717 lj_strfmt_putptr(sb, lj_obj_ptr(L->base+arg-1));
894 luaL_addvalue(&b);
895 continue;
896 case 's': {
897 GCstr *str = meta_tostring(L, arg);
898 if (!strchr(form, '.') && str->len >= 100) {
899 /* no precision and string is too long to be formatted;
900 keep original string */
901 setstrV(L, L->top++, str);
902 luaL_addvalue(&b);
903 continue;
904 }
905 sprintf(buff, form, strdata(str));
906 break; 718 break;
907 }
908 default: 719 default:
909 lj_err_callerv(L, LJ_ERR_STRFMTO, *(strfrmt -1)); 720 lua_assert(0);
910 break; 721 break;
911 } 722 }
912 luaL_addlstring(&b, buff, strlen(buff));
913 } 723 }
914 } 724 }
915 luaL_pushresult(&b); 725 if (retry++ == 1) goto again;
726 setstrV(L, L->top-1, lj_buf_str(L, sb));
727 lj_gc_check(L);
916 return 1; 728 return 1;
917} 729}
918 730
diff --git a/src/lib_table.c b/src/lib_table.c
index e0e8302f..5f0c8bb4 100644
--- a/src/lib_table.c
+++ b/src/lib_table.c
@@ -16,57 +16,43 @@
16#include "lj_obj.h" 16#include "lj_obj.h"
17#include "lj_gc.h" 17#include "lj_gc.h"
18#include "lj_err.h" 18#include "lj_err.h"
19#include "lj_buf.h"
19#include "lj_tab.h" 20#include "lj_tab.h"
21#include "lj_ff.h"
20#include "lj_lib.h" 22#include "lj_lib.h"
21 23
22/* ------------------------------------------------------------------------ */ 24/* ------------------------------------------------------------------------ */
23 25
24#define LJLIB_MODULE_table 26#define LJLIB_MODULE_table
25 27
26LJLIB_CF(table_foreachi) 28LJLIB_LUA(table_foreachi) /*
27{ 29 function(t, f)
28 GCtab *t = lj_lib_checktab(L, 1); 30 CHECK_tab(t)
29 GCfunc *func = lj_lib_checkfunc(L, 2); 31 CHECK_func(f)
30 MSize i, n = lj_tab_len(t); 32 for i=1,#t do
31 for (i = 1; i <= n; i++) { 33 local r = f(i, t[i])
32 cTValue *val; 34 if r ~= nil then return r end
33 setfuncV(L, L->top, func); 35 end
34 setintV(L->top+1, i); 36 end
35 val = lj_tab_getint(t, (int32_t)i); 37*/
36 if (val) { copyTV(L, L->top+2, val); } else { setnilV(L->top+2); }
37 L->top += 3;
38 lua_call(L, 2, 1);
39 if (!tvisnil(L->top-1))
40 return 1;
41 L->top--;
42 }
43 return 0;
44}
45 38
46LJLIB_CF(table_foreach) 39LJLIB_LUA(table_foreach) /*
47{ 40 function(t, f)
48 GCtab *t = lj_lib_checktab(L, 1); 41 CHECK_tab(t)
49 GCfunc *func = lj_lib_checkfunc(L, 2); 42 CHECK_func(f)
50 L->top = L->base+3; 43 for k, v in PAIRS(t) do
51 setnilV(L->top-1); 44 local r = f(k, v)
52 while (lj_tab_next(L, t, L->top-1)) { 45 if r ~= nil then return r end
53 copyTV(L, L->top+2, L->top); 46 end
54 copyTV(L, L->top+1, L->top-1); 47 end
55 setfuncV(L, L->top, func); 48*/
56 L->top += 3;
57 lua_call(L, 2, 1);
58 if (!tvisnil(L->top-1))
59 return 1;
60 L->top--;
61 }
62 return 0;
63}
64 49
65LJLIB_ASM(table_getn) LJLIB_REC(.) 50LJLIB_LUA(table_getn) /*
66{ 51 function(t)
67 lj_lib_checktab(L, 1); 52 CHECK_tab(t)
68 return FFH_UNREACHABLE; 53 return #t
69} 54 end
55*/
70 56
71LJLIB_CF(table_maxn) 57LJLIB_CF(table_maxn)
72{ 58{
@@ -119,52 +105,47 @@ LJLIB_CF(table_insert) LJLIB_REC(.)
119 return 0; 105 return 0;
120} 106}
121 107
122LJLIB_CF(table_remove) LJLIB_REC(.) 108LJLIB_LUA(table_remove) /*
123{ 109 function(t, pos)
124 GCtab *t = lj_lib_checktab(L, 1); 110 CHECK_tab(t)
125 int32_t e = (int32_t)lj_tab_len(t); 111 local len = #t
126 int32_t pos = lj_lib_optint(L, 2, e); 112 if pos == nil then
127 if (!(1 <= pos && pos <= e)) /* Nothing to remove? */ 113 if len ~= 0 then
128 return 0; 114 local old = t[len]
129 lua_rawgeti(L, 1, pos); /* Get previous value. */ 115 t[len] = nil
130 /* NOBARRIER: This just moves existing elements around. */ 116 return old
131 for (; pos < e; pos++) { 117 end
132 cTValue *src = lj_tab_getint(t, pos+1); 118 else
133 TValue *dst = lj_tab_setint(L, t, pos); 119 CHECK_int(pos)
134 if (src) { 120 if pos >= 1 and pos <= len then
135 copyTV(L, dst, src); 121 local old = t[pos]
136 } else { 122 for i=pos+1,len do
137 setnilV(dst); 123 t[i-1] = t[i]
138 } 124 end
139 } 125 t[len] = nil
140 setnilV(lj_tab_setint(L, t, e)); /* Remove (last) value. */ 126 return old
141 return 1; /* Return previous value. */ 127 end
142} 128 end
129 end
130*/
143 131
144LJLIB_CF(table_concat) 132LJLIB_CF(table_concat) LJLIB_REC(.)
145{ 133{
146 luaL_Buffer b;
147 GCtab *t = lj_lib_checktab(L, 1); 134 GCtab *t = lj_lib_checktab(L, 1);
148 GCstr *sep = lj_lib_optstr(L, 2); 135 GCstr *sep = lj_lib_optstr(L, 2);
149 MSize seplen = sep ? sep->len : 0;
150 int32_t i = lj_lib_optint(L, 3, 1); 136 int32_t i = lj_lib_optint(L, 3, 1);
151 int32_t e = (L->base+3 < L->top && !tvisnil(L->base+3)) ? 137 int32_t e = (L->base+3 < L->top && !tvisnil(L->base+3)) ?
152 lj_lib_checkint(L, 4) : (int32_t)lj_tab_len(t); 138 lj_lib_checkint(L, 4) : (int32_t)lj_tab_len(t);
153 luaL_buffinit(L, &b); 139 SBuf *sb = lj_buf_tmp_(L);
154 if (i <= e) { 140 SBuf *sbx = lj_buf_puttab(sb, t, sep, i, e);
155 for (;;) { 141 if (LJ_UNLIKELY(!sbx)) { /* Error: bad element type. */
156 cTValue *o; 142 int32_t idx = (int32_t)(intptr_t)sbufP(sb);
157 lua_rawgeti(L, 1, i); 143 cTValue *o = lj_tab_getint(t, idx);
158 o = L->top-1; 144 lj_err_callerv(L, LJ_ERR_TABCAT,
159 if (!(tvisstr(o) || tvisnumber(o))) 145 lj_obj_itypename[o ? itypemap(o) : ~LJ_TNIL], idx);
160 lj_err_callerv(L, LJ_ERR_TABCAT, lj_typename(o), i);
161 luaL_addvalue(&b);
162 if (i++ == e) break;
163 if (seplen)
164 luaL_addlstring(&b, strdata(sep), seplen);
165 }
166 } 146 }
167 luaL_pushresult(&b); 147 setstrV(L, L->top-1, lj_buf_str(L, sbx));
148 lj_gc_check(L);
168 return 1; 149 return 1;
169} 150}
170 151
@@ -284,6 +265,30 @@ LJLIB_CF(table_pack)
284} 265}
285#endif 266#endif
286 267
268LJLIB_NOREG LJLIB_CF(table_new) LJLIB_REC(.)
269{
270 int32_t a = lj_lib_checkint(L, 1);
271 int32_t h = lj_lib_checkint(L, 2);
272 lua_createtable(L, a, h);
273 return 1;
274}
275
276LJLIB_NOREG LJLIB_CF(table_clear) LJLIB_REC(.)
277{
278 lj_tab_clear(lj_lib_checktab(L, 1));
279 return 0;
280}
281
282static int luaopen_table_new(lua_State *L)
283{
284 return lj_lib_postreg(L, lj_cf_table_new, FF_table_new, "new");
285}
286
287static int luaopen_table_clear(lua_State *L)
288{
289 return lj_lib_postreg(L, lj_cf_table_clear, FF_table_clear, "clear");
290}
291
287/* ------------------------------------------------------------------------ */ 292/* ------------------------------------------------------------------------ */
288 293
289#include "lj_libdef.h" 294#include "lj_libdef.h"
@@ -295,6 +300,8 @@ LUALIB_API int luaopen_table(lua_State *L)
295 lua_getglobal(L, "unpack"); 300 lua_getglobal(L, "unpack");
296 lua_setfield(L, -2, "unpack"); 301 lua_setfield(L, -2, "unpack");
297#endif 302#endif
303 lj_lib_prereg(L, LUA_TABLIBNAME ".new", luaopen_table_new, tabV(L->top-1));
304 lj_lib_prereg(L, LUA_TABLIBNAME ".clear", luaopen_table_clear, tabV(L->top-1));
298 return 1; 305 return 1;
299} 306}
300 307
diff --git a/src/lj_api.c b/src/lj_api.c
index bc5290b2..e27123bf 100644
--- a/src/lj_api.c
+++ b/src/lj_api.c
@@ -24,6 +24,7 @@
24#include "lj_trace.h" 24#include "lj_trace.h"
25#include "lj_vm.h" 25#include "lj_vm.h"
26#include "lj_strscan.h" 26#include "lj_strscan.h"
27#include "lj_strfmt.h"
27 28
28/* -- Common helper functions --------------------------------------------- */ 29/* -- Common helper functions --------------------------------------------- */
29 30
@@ -434,7 +435,7 @@ LUA_API const char *lua_tolstring(lua_State *L, int idx, size_t *len)
434 } else if (tvisnumber(o)) { 435 } else if (tvisnumber(o)) {
435 lj_gc_check(L); 436 lj_gc_check(L);
436 o = index2adr(L, idx); /* GC may move the stack. */ 437 o = index2adr(L, idx); /* GC may move the stack. */
437 s = lj_str_fromnumber(L, o); 438 s = lj_strfmt_number(L, o);
438 setstrV(L, o, s); 439 setstrV(L, o, s);
439 } else { 440 } else {
440 if (len != NULL) *len = 0; 441 if (len != NULL) *len = 0;
@@ -453,7 +454,7 @@ LUALIB_API const char *luaL_checklstring(lua_State *L, int idx, size_t *len)
453 } else if (tvisnumber(o)) { 454 } else if (tvisnumber(o)) {
454 lj_gc_check(L); 455 lj_gc_check(L);
455 o = index2adr(L, idx); /* GC may move the stack. */ 456 o = index2adr(L, idx); /* GC may move the stack. */
456 s = lj_str_fromnumber(L, o); 457 s = lj_strfmt_number(L, o);
457 setstrV(L, o, s); 458 setstrV(L, o, s);
458 } else { 459 } else {
459 lj_err_argt(L, idx, LUA_TSTRING); 460 lj_err_argt(L, idx, LUA_TSTRING);
@@ -475,7 +476,7 @@ LUALIB_API const char *luaL_optlstring(lua_State *L, int idx,
475 } else if (tvisnumber(o)) { 476 } else if (tvisnumber(o)) {
476 lj_gc_check(L); 477 lj_gc_check(L);
477 o = index2adr(L, idx); /* GC may move the stack. */ 478 o = index2adr(L, idx); /* GC may move the stack. */
478 s = lj_str_fromnumber(L, o); 479 s = lj_strfmt_number(L, o);
479 setstrV(L, o, s); 480 setstrV(L, o, s);
480 } else { 481 } else {
481 lj_err_argt(L, idx, LUA_TSTRING); 482 lj_err_argt(L, idx, LUA_TSTRING);
@@ -507,7 +508,7 @@ LUA_API size_t lua_objlen(lua_State *L, int idx)
507 } else if (tvisudata(o)) { 508 } else if (tvisudata(o)) {
508 return udataV(o)->len; 509 return udataV(o)->len;
509 } else if (tvisnumber(o)) { 510 } else if (tvisnumber(o)) {
510 GCstr *s = lj_str_fromnumber(L, o); 511 GCstr *s = lj_strfmt_number(L, o);
511 setstrV(L, o, s); 512 setstrV(L, o, s);
512 return s->len; 513 return s->len;
513 } else { 514 } else {
@@ -545,17 +546,7 @@ LUA_API lua_State *lua_tothread(lua_State *L, int idx)
545 546
546LUA_API const void *lua_topointer(lua_State *L, int idx) 547LUA_API const void *lua_topointer(lua_State *L, int idx)
547{ 548{
548 cTValue *o = index2adr(L, idx); 549 return lj_obj_ptr(index2adr(L, idx));
549 if (tvisudata(o))
550 return uddata(udataV(o));
551 else if (tvislightud(o))
552 return lightudV(o);
553 else if (tviscdata(o))
554 return cdataptr(cdataV(o));
555 else if (tvisgcv(o))
556 return gcV(o);
557 else
558 return NULL;
559} 550}
560 551
561/* -- Stack setters (object creation) ------------------------------------- */ 552/* -- Stack setters (object creation) ------------------------------------- */
@@ -606,7 +597,7 @@ LUA_API const char *lua_pushvfstring(lua_State *L, const char *fmt,
606 va_list argp) 597 va_list argp)
607{ 598{
608 lj_gc_check(L); 599 lj_gc_check(L);
609 return lj_str_pushvf(L, fmt, argp); 600 return lj_strfmt_pushvf(L, fmt, argp);
610} 601}
611 602
612LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...) 603LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...)
@@ -615,7 +606,7 @@ LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...)
615 va_list argp; 606 va_list argp;
616 lj_gc_check(L); 607 lj_gc_check(L);
617 va_start(argp, fmt); 608 va_start(argp, fmt);
618 ret = lj_str_pushvf(L, fmt, argp); 609 ret = lj_strfmt_pushvf(L, fmt, argp);
619 va_end(argp); 610 va_end(argp);
620 return ret; 611 return ret;
621} 612}
@@ -649,10 +640,8 @@ LUA_API void lua_pushlightuserdata(lua_State *L, void *p)
649 640
650LUA_API void lua_createtable(lua_State *L, int narray, int nrec) 641LUA_API void lua_createtable(lua_State *L, int narray, int nrec)
651{ 642{
652 GCtab *t;
653 lj_gc_check(L); 643 lj_gc_check(L);
654 t = lj_tab_new(L, (uint32_t)(narray > 0 ? narray+1 : 0), hsize2hbits(nrec)); 644 settabV(L, L->top, lj_tab_new_ah(L, narray, nrec));
655 settabV(L, L->top, t);
656 incr_top(L); 645 incr_top(L);
657} 646}
658 647
diff --git a/src/lj_arch.h b/src/lj_arch.h
index 0196eedc..acdfe18b 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -234,6 +234,7 @@
234 234
235#elif LUAJIT_TARGET == LUAJIT_ARCH_PPCSPE 235#elif LUAJIT_TARGET == LUAJIT_ARCH_PPCSPE
236 236
237#error "The PPC/e500 port is broken and will be abandoned with LuaJIT 2.1"
237#define LJ_ARCH_NAME "ppcspe" 238#define LJ_ARCH_NAME "ppcspe"
238#define LJ_ARCH_BITS 32 239#define LJ_ARCH_BITS 32
239#define LJ_ARCH_ENDIAN LUAJIT_BE 240#define LJ_ARCH_ENDIAN LUAJIT_BE
@@ -372,6 +373,21 @@
372#define LJ_HASFFI 1 373#define LJ_HASFFI 1
373#endif 374#endif
374 375
376#if defined(LUAJIT_DISABLE_PROFILE)
377#define LJ_HASPROFILE 0
378#elif LJ_TARGET_POSIX
379#define LJ_HASPROFILE 1
380#define LJ_PROFILE_SIGPROF 1
381#elif LJ_TARGET_PS3
382#define LJ_HASPROFILE 1
383#define LJ_PROFILE_PTHREAD 1
384#elif LJ_TARGET_WINDOWS || LJ_TARGET_XBOX360
385#define LJ_HASPROFILE 1
386#define LJ_PROFILE_WTHREAD 1
387#else
388#define LJ_HASPROFILE 0
389#endif
390
375#ifndef LJ_ARCH_HASFPU 391#ifndef LJ_ARCH_HASFPU
376#define LJ_ARCH_HASFPU 1 392#define LJ_ARCH_HASFPU 1
377#endif 393#endif
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 264649ae..329e5c95 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -179,6 +179,12 @@ IRFLDEF(FLOFS)
179#error "Missing instruction emitter for target CPU" 179#error "Missing instruction emitter for target CPU"
180#endif 180#endif
181 181
182/* Generic load/store of register from/to stack slot. */
183#define emit_spload(as, ir, r, ofs) \
184 emit_loadofs(as, ir, (r), RID_SP, (ofs))
185#define emit_spstore(as, ir, r, ofs) \
186 emit_storeofs(as, ir, (r), RID_SP, (ofs))
187
182/* -- Register allocator debugging ---------------------------------------- */ 188/* -- Register allocator debugging ---------------------------------------- */
183 189
184/* #define LUAJIT_DEBUG_RA */ 190/* #define LUAJIT_DEBUG_RA */
@@ -336,7 +342,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
336 emit_getgl(as, r, jit_base); 342 emit_getgl(as, r, jit_base);
337 } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { 343 } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) {
338 lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */ 344 lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */
339 emit_getgl(as, r, jit_L); 345 emit_getgl(as, r, cur_L);
340#if LJ_64 346#if LJ_64
341 } else if (ir->o == IR_KINT64) { 347 } else if (ir->o == IR_KINT64) {
342 emit_loadu64(as, r, ir_kint64(ir)->u64); 348 emit_loadu64(as, r, ir_kint64(ir)->u64);
@@ -693,7 +699,7 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref)
693 emit_loadu64(as, dest, ir_kint64(ir)->u64); 699 emit_loadu64(as, dest, ir_kint64(ir)->u64);
694 return; 700 return;
695#endif 701#endif
696 } else { 702 } else if (ir->o != IR_KPRI) {
697 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || 703 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
698 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); 704 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL);
699 emit_loadi(as, dest, ir->i); 705 emit_loadi(as, dest, ir->i);
@@ -943,44 +949,6 @@ static void asm_snap_prep(ASMState *as)
943 949
944/* -- Miscellaneous helpers ----------------------------------------------- */ 950/* -- Miscellaneous helpers ----------------------------------------------- */
945 951
946/* Collect arguments from CALL* and CARG instructions. */
947static void asm_collectargs(ASMState *as, IRIns *ir,
948 const CCallInfo *ci, IRRef *args)
949{
950 uint32_t n = CCI_NARGS(ci);
951 lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */
952 if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
953 while (n-- > 1) {
954 ir = IR(ir->op1);
955 lua_assert(ir->o == IR_CARG);
956 args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
957 }
958 args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
959 lua_assert(IR(ir->op1)->o != IR_CARG);
960}
961
962/* Reconstruct CCallInfo flags for CALLX*. */
963static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
964{
965 uint32_t nargs = 0;
966 if (ir->op1 != REF_NIL) { /* Count number of arguments first. */
967 IRIns *ira = IR(ir->op1);
968 nargs++;
969 while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
970 }
971#if LJ_HASFFI
972 if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */
973 CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
974 CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
975 nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
976#if LJ_TARGET_X86
977 nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
978#endif
979 }
980#endif
981 return (nargs | (ir->t.irt << CCI_OTSHIFT));
982}
983
984/* Calculate stack adjustment. */ 952/* Calculate stack adjustment. */
985static int32_t asm_stack_adjust(ASMState *as) 953static int32_t asm_stack_adjust(ASMState *as)
986{ 954{
@@ -1065,6 +1033,263 @@ static void asm_gcstep(ASMState *as, IRIns *ir)
1065 as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */ 1033 as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */
1066} 1034}
1067 1035
1036/* -- Buffer operations --------------------------------------------------- */
1037
1038static void asm_tvptr(ASMState *as, Reg dest, IRRef ref);
1039
1040static void asm_bufhdr(ASMState *as, IRIns *ir)
1041{
1042 Reg sb = ra_dest(as, ir, RSET_GPR);
1043 if ((ir->op2 & IRBUFHDR_APPEND)) {
1044 /* Rematerialize const buffer pointer instead of likely spill. */
1045 IRIns *irp = IR(ir->op1);
1046 if (!(ra_hasreg(irp->r) || irp == ir-1 ||
1047 (irp == ir-2 && !ra_used(ir-1)))) {
1048 while (!(irp->o == IR_BUFHDR && !(irp->op2 & IRBUFHDR_APPEND)))
1049 irp = IR(irp->op1);
1050 if (irref_isk(irp->op1)) {
1051 ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR));
1052 ir = irp;
1053 }
1054 }
1055 } else {
1056 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
1057 /* Passing ir isn't strictly correct, but it's an IRT_P32, too. */
1058 emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p));
1059 emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b));
1060 }
1061#if LJ_TARGET_X86ORX64
1062 ra_left(as, sb, ir->op1);
1063#else
1064 ra_leftov(as, sb, ir->op1);
1065#endif
1066}
1067
1068static void asm_bufput(ASMState *as, IRIns *ir)
1069{
1070 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr];
1071 IRRef args[3];
1072 IRIns *irs;
1073 int kchar = -1;
1074 args[0] = ir->op1; /* SBuf * */
1075 args[1] = ir->op2; /* GCstr * */
1076 irs = IR(ir->op2);
1077 lua_assert(irt_isstr(irs->t));
1078 if (irs->o == IR_KGC) {
1079 GCstr *s = ir_kstr(irs);
1080 if (s->len == 1) { /* Optimize put of single-char string constant. */
1081 kchar = strdata(s)[0];
1082 args[1] = ASMREF_TMP1; /* int, truncated to char */
1083 ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
1084 }
1085 } else if (mayfuse(as, ir->op2) && ra_noreg(irs->r)) {
1086 if (irs->o == IR_TOSTR) { /* Fuse number to string conversions. */
1087 if (irs->op2 == IRTOSTR_NUM) {
1088 args[1] = ASMREF_TMP1; /* TValue * */
1089 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum];
1090 } else {
1091 lua_assert(irt_isinteger(IR(irs->op1)->t));
1092 args[1] = irs->op1; /* int */
1093 if (irs->op2 == IRTOSTR_INT)
1094 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint];
1095 else
1096 ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
1097 }
1098 } else if (irs->o == IR_SNEW) { /* Fuse string allocation. */
1099 args[1] = irs->op1; /* const void * */
1100 args[2] = irs->op2; /* MSize */
1101 ci = &lj_ir_callinfo[IRCALL_lj_buf_putmem];
1102 }
1103 }
1104 asm_setupresult(as, ir, ci); /* SBuf * */
1105 asm_gencall(as, ci, args);
1106 if (args[1] == ASMREF_TMP1) {
1107 Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
1108 if (kchar == -1)
1109 asm_tvptr(as, tmp, irs->op1);
1110 else
1111 ra_allockreg(as, kchar, tmp);
1112 }
1113}
1114
1115static void asm_bufstr(ASMState *as, IRIns *ir)
1116{
1117 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr];
1118 IRRef args[1];
1119 args[0] = ir->op1; /* SBuf *sb */
1120 as->gcsteps++;
1121 asm_setupresult(as, ir, ci); /* GCstr * */
1122 asm_gencall(as, ci, args);
1123}
1124
1125/* -- Type conversions ---------------------------------------------------- */
1126
1127static void asm_tostr(ASMState *as, IRIns *ir)
1128{
1129 const CCallInfo *ci;
1130 IRRef args[2];
1131 args[0] = ASMREF_L;
1132 as->gcsteps++;
1133 if (ir->op2 == IRTOSTR_NUM) {
1134 args[1] = ASMREF_TMP1; /* cTValue * */
1135 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_num];
1136 } else {
1137 args[1] = ir->op1; /* int32_t k */
1138 if (ir->op2 == IRTOSTR_INT)
1139 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_int];
1140 else
1141 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_char];
1142 }
1143 asm_setupresult(as, ir, ci); /* GCstr * */
1144 asm_gencall(as, ci, args);
1145 if (ir->op2 == IRTOSTR_NUM)
1146 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
1147}
1148
1149#if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86
1150static void asm_conv64(ASMState *as, IRIns *ir)
1151{
1152 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
1153 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
1154 IRCallID id;
1155 IRRef args[2];
1156 lua_assert((ir-1)->o == IR_CONV && ir->o == IR_HIOP);
1157 args[LJ_BE] = (ir-1)->op1;
1158 args[LJ_LE] = ir->op1;
1159 if (st == IRT_NUM || st == IRT_FLOAT) {
1160 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
1161 ir--;
1162 } else {
1163 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
1164 }
1165 {
1166#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
1167 CCallInfo cim = lj_ir_callinfo[id], *ci = &cim;
1168 cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */
1169#else
1170 const CCallInfo *ci = &lj_ir_callinfo[id];
1171#endif
1172 asm_setupresult(as, ir, ci);
1173 asm_gencall(as, ci, args);
1174 }
1175}
1176#endif
1177
1178/* -- Memory references --------------------------------------------------- */
1179
1180static void asm_newref(ASMState *as, IRIns *ir)
1181{
1182 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
1183 IRRef args[3];
1184 if (ir->r == RID_SINK)
1185 return;
1186 args[0] = ASMREF_L; /* lua_State *L */
1187 args[1] = ir->op1; /* GCtab *t */
1188 args[2] = ASMREF_TMP1; /* cTValue *key */
1189 asm_setupresult(as, ir, ci); /* TValue * */
1190 asm_gencall(as, ci, args);
1191 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
1192}
1193
1194static void asm_lref(ASMState *as, IRIns *ir)
1195{
1196 Reg r = ra_dest(as, ir, RSET_GPR);
1197#if LJ_TARGET_X86ORX64
1198 ra_left(as, r, ASMREF_L);
1199#else
1200 ra_leftov(as, r, ASMREF_L);
1201#endif
1202}
1203
1204/* -- Calls --------------------------------------------------------------- */
1205
1206/* Collect arguments from CALL* and CARG instructions. */
1207static void asm_collectargs(ASMState *as, IRIns *ir,
1208 const CCallInfo *ci, IRRef *args)
1209{
1210 uint32_t n = CCI_XNARGS(ci);
1211 lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */
1212 if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
1213 while (n-- > 1) {
1214 ir = IR(ir->op1);
1215 lua_assert(ir->o == IR_CARG);
1216 args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
1217 }
1218 args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
1219 lua_assert(IR(ir->op1)->o != IR_CARG);
1220}
1221
1222/* Reconstruct CCallInfo flags for CALLX*. */
1223static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
1224{
1225 uint32_t nargs = 0;
1226 if (ir->op1 != REF_NIL) { /* Count number of arguments first. */
1227 IRIns *ira = IR(ir->op1);
1228 nargs++;
1229 while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
1230 }
1231#if LJ_HASFFI
1232 if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */
1233 CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
1234 CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
1235 nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
1236#if LJ_TARGET_X86
1237 nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
1238#endif
1239 }
1240#endif
1241 return (nargs | (ir->t.irt << CCI_OTSHIFT));
1242}
1243
1244static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
1245{
1246 const CCallInfo *ci = &lj_ir_callinfo[id];
1247 IRRef args[2];
1248 args[0] = ir->op1;
1249 args[1] = ir->op2;
1250 asm_setupresult(as, ir, ci);
1251 asm_gencall(as, ci, args);
1252}
1253
1254static void asm_call(ASMState *as, IRIns *ir)
1255{
1256 IRRef args[CCI_NARGS_MAX];
1257 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
1258 asm_collectargs(as, ir, ci, args);
1259 asm_setupresult(as, ir, ci);
1260 asm_gencall(as, ci, args);
1261}
1262
1263#if !LJ_SOFTFP
1264static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref);
1265
1266#if !LJ_TARGET_X86ORX64
1267static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
1268{
1269 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
1270 IRRef args[2];
1271 args[0] = lref;
1272 args[1] = rref;
1273 asm_setupresult(as, ir, ci);
1274 asm_gencall(as, ci, args);
1275}
1276#endif
1277
1278static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
1279{
1280 IRIns *irp = IR(ir->op1);
1281 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
1282 IRIns *irpp = IR(irp->op1);
1283 if (irpp == ir-2 && irpp->o == IR_FPMATH &&
1284 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
1285 asm_fppow(as, ir, irpp->op1, irp->op2);
1286 return 1;
1287 }
1288 }
1289 return 0;
1290}
1291#endif
1292
1068/* -- PHI and loop handling ----------------------------------------------- */ 1293/* -- PHI and loop handling ----------------------------------------------- */
1069 1294
1070/* Break a PHI cycle by renaming to a free register (evict if needed). */ 1295/* Break a PHI cycle by renaming to a free register (evict if needed). */
@@ -1338,6 +1563,129 @@ static void asm_loop(ASMState *as)
1338#error "Missing assembler for target CPU" 1563#error "Missing assembler for target CPU"
1339#endif 1564#endif
1340 1565
1566/* -- Instruction dispatch ------------------------------------------------ */
1567
1568/* Assemble a single instruction. */
1569static void asm_ir(ASMState *as, IRIns *ir)
1570{
1571 switch ((IROp)ir->o) {
1572 /* Miscellaneous ops. */
1573 case IR_LOOP: asm_loop(as); break;
1574 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
1575 case IR_USE:
1576 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1577 case IR_PHI: asm_phi(as, ir); break;
1578 case IR_HIOP: asm_hiop(as, ir); break;
1579 case IR_GCSTEP: asm_gcstep(as, ir); break;
1580 case IR_PROF: asm_prof(as, ir); break;
1581
1582 /* Guarded assertions. */
1583 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1584 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1585 case IR_ABC:
1586 asm_comp(as, ir);
1587 break;
1588 case IR_EQ: case IR_NE:
1589 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
1590 as->curins--;
1591 asm_href(as, ir-1, (IROp)ir->o);
1592 } else {
1593 asm_equal(as, ir);
1594 }
1595 break;
1596
1597 case IR_RETF: asm_retf(as, ir); break;
1598
1599 /* Bit ops. */
1600 case IR_BNOT: asm_bnot(as, ir); break;
1601 case IR_BSWAP: asm_bswap(as, ir); break;
1602 case IR_BAND: asm_band(as, ir); break;
1603 case IR_BOR: asm_bor(as, ir); break;
1604 case IR_BXOR: asm_bxor(as, ir); break;
1605 case IR_BSHL: asm_bshl(as, ir); break;
1606 case IR_BSHR: asm_bshr(as, ir); break;
1607 case IR_BSAR: asm_bsar(as, ir); break;
1608 case IR_BROL: asm_brol(as, ir); break;
1609 case IR_BROR: asm_bror(as, ir); break;
1610
1611 /* Arithmetic ops. */
1612 case IR_ADD: asm_add(as, ir); break;
1613 case IR_SUB: asm_sub(as, ir); break;
1614 case IR_MUL: asm_mul(as, ir); break;
1615 case IR_DIV: asm_div(as, ir); break;
1616 case IR_MOD: asm_mod(as, ir); break;
1617 case IR_POW: asm_pow(as, ir); break;
1618 case IR_NEG: asm_neg(as, ir); break;
1619 case IR_ABS: asm_abs(as, ir); break;
1620 case IR_ATAN2: asm_atan2(as, ir); break;
1621 case IR_LDEXP: asm_ldexp(as, ir); break;
1622 case IR_MIN: asm_min(as, ir); break;
1623 case IR_MAX: asm_max(as, ir); break;
1624 case IR_FPMATH: asm_fpmath(as, ir); break;
1625
1626 /* Overflow-checking arithmetic ops. */
1627 case IR_ADDOV: asm_addov(as, ir); break;
1628 case IR_SUBOV: asm_subov(as, ir); break;
1629 case IR_MULOV: asm_mulov(as, ir); break;
1630
1631 /* Memory references. */
1632 case IR_AREF: asm_aref(as, ir); break;
1633 case IR_HREF: asm_href(as, ir, 0); break;
1634 case IR_HREFK: asm_hrefk(as, ir); break;
1635 case IR_NEWREF: asm_newref(as, ir); break;
1636 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
1637 case IR_FREF: asm_fref(as, ir); break;
1638 case IR_STRREF: asm_strref(as, ir); break;
1639 case IR_LREF: asm_lref(as, ir); break;
1640
1641 /* Loads and stores. */
1642 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
1643 asm_ahuvload(as, ir);
1644 break;
1645 case IR_FLOAD: asm_fload(as, ir); break;
1646 case IR_XLOAD: asm_xload(as, ir); break;
1647 case IR_SLOAD: asm_sload(as, ir); break;
1648
1649 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
1650 case IR_FSTORE: asm_fstore(as, ir); break;
1651 case IR_XSTORE: asm_xstore(as, ir); break;
1652
1653 /* Allocations. */
1654 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
1655 case IR_TNEW: asm_tnew(as, ir); break;
1656 case IR_TDUP: asm_tdup(as, ir); break;
1657 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
1658
1659 /* Buffer operations. */
1660 case IR_BUFHDR: asm_bufhdr(as, ir); break;
1661 case IR_BUFPUT: asm_bufput(as, ir); break;
1662 case IR_BUFSTR: asm_bufstr(as, ir); break;
1663
1664 /* Write barriers. */
1665 case IR_TBAR: asm_tbar(as, ir); break;
1666 case IR_OBAR: asm_obar(as, ir); break;
1667
1668 /* Type conversions. */
1669 case IR_TOBIT: asm_tobit(as, ir); break;
1670 case IR_CONV: asm_conv(as, ir); break;
1671 case IR_TOSTR: asm_tostr(as, ir); break;
1672 case IR_STRTO: asm_strto(as, ir); break;
1673
1674 /* Calls. */
1675 case IR_CALLA:
1676 as->gcsteps++;
1677 /* fallthrough */
1678 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
1679 case IR_CALLXS: asm_callx(as, ir); break;
1680 case IR_CARG: break;
1681
1682 default:
1683 setintV(&as->J->errinfo, ir->o);
1684 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
1685 break;
1686 }
1687}
1688
1341/* -- Head of trace ------------------------------------------------------- */ 1689/* -- Head of trace ------------------------------------------------------- */
1342 1690
1343/* Head of a root trace. */ 1691/* Head of a root trace. */
@@ -1671,7 +2019,7 @@ static void asm_setup_regsp(ASMState *as)
1671 as->modset |= RSET_SCRATCH; 2019 as->modset |= RSET_SCRATCH;
1672 continue; 2020 continue;
1673 } 2021 }
1674 case IR_CALLN: case IR_CALLL: case IR_CALLS: { 2022 case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: {
1675 const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; 2023 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
1676 ir->prev = asm_setup_call_slots(as, ir, ci); 2024 ir->prev = asm_setup_call_slots(as, ir, ci);
1677 if (inloop) 2025 if (inloop)
@@ -1716,10 +2064,20 @@ static void asm_setup_regsp(ASMState *as)
1716 /* fallthrough */ 2064 /* fallthrough */
1717#endif 2065#endif
1718 /* C calls evict all scratch regs and return results in RID_RET. */ 2066 /* C calls evict all scratch regs and return results in RID_RET. */
1719 case IR_SNEW: case IR_XSNEW: case IR_NEWREF: 2067 case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT:
1720 if (REGARG_NUMGPR < 3 && as->evenspill < 3) 2068 if (REGARG_NUMGPR < 3 && as->evenspill < 3)
1721 as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ 2069 as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */
1722 case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR: 2070#if LJ_TARGET_X86 && LJ_HASFFI
2071 if (0) {
2072 case IR_CNEW:
2073 if (ir->op2 != REF_NIL && as->evenspill < 4)
2074 as->evenspill = 4; /* lj_cdata_newv needs 4 args. */
2075 }
2076#else
2077 case IR_CNEW:
2078#endif
2079 case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR:
2080 case IR_BUFSTR:
1723 ir->prev = REGSP_HINT(RID_RET); 2081 ir->prev = REGSP_HINT(RID_RET);
1724 if (inloop) 2082 if (inloop)
1725 as->modset = RSET_SCRATCH; 2083 as->modset = RSET_SCRATCH;
@@ -1755,7 +2113,7 @@ static void asm_setup_regsp(ASMState *as)
1755 break; 2113 break;
1756 case IR_FPMATH: 2114 case IR_FPMATH:
1757#if LJ_TARGET_X86ORX64 2115#if LJ_TARGET_X86ORX64
1758 if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */ 2116 if (ir->op2 == IRFPM_EXP2) { /* May be joined to pow. */
1759 ir->prev = REGSP_HINT(RID_XMM0); 2117 ir->prev = REGSP_HINT(RID_XMM0);
1760#if !LJ_64 2118#if !LJ_64
1761 if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */ 2119 if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h
index 72f205d9..8339367b 100644
--- a/src/lj_asm_arm.h
+++ b/src/lj_asm_arm.h
@@ -338,7 +338,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air)
338/* Generate a call to a C function. */ 338/* Generate a call to a C function. */
339static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 339static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
340{ 340{
341 uint32_t n, nargs = CCI_NARGS(ci); 341 uint32_t n, nargs = CCI_XNARGS(ci);
342 int32_t ofs = 0; 342 int32_t ofs = 0;
343#if LJ_SOFTFP 343#if LJ_SOFTFP
344 Reg gpr = REGARG_FIRSTGPR; 344 Reg gpr = REGARG_FIRSTGPR;
@@ -453,15 +453,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
453 UNUSED(ci); 453 UNUSED(ci);
454} 454}
455 455
456static void asm_call(ASMState *as, IRIns *ir)
457{
458 IRRef args[CCI_NARGS_MAX];
459 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
460 asm_collectargs(as, ir, ci, args);
461 asm_setupresult(as, ir, ci);
462 asm_gencall(as, ci, args);
463}
464
465static void asm_callx(ASMState *as, IRIns *ir) 456static void asm_callx(ASMState *as, IRIns *ir)
466{ 457{
467 IRRef args[CCI_NARGS_MAX*2]; 458 IRRef args[CCI_NARGS_MAX*2];
@@ -529,6 +520,8 @@ static void asm_tobit(ASMState *as, IRIns *ir)
529 emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); 520 emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
530 emit_dnm(as, ARMI_VADD_D, (tmp & 15), (left & 15), (right & 15)); 521 emit_dnm(as, ARMI_VADD_D, (tmp & 15), (left & 15), (right & 15));
531} 522}
523#else
524#define asm_tobit(as, ir) lua_assert(0)
532#endif 525#endif
533 526
534static void asm_conv(ASMState *as, IRIns *ir) 527static void asm_conv(ASMState *as, IRIns *ir)
@@ -601,31 +594,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
601 } 594 }
602} 595}
603 596
604#if !LJ_SOFTFP && LJ_HASFFI
605static void asm_conv64(ASMState *as, IRIns *ir)
606{
607 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
608 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
609 IRCallID id;
610 CCallInfo ci;
611 IRRef args[2];
612 args[0] = (ir-1)->op1;
613 args[1] = ir->op1;
614 if (st == IRT_NUM || st == IRT_FLOAT) {
615 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
616 ir--;
617 } else {
618 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
619 }
620 ci = lj_ir_callinfo[id];
621#if !LJ_ABI_SOFTFP
622 ci.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */
623#endif
624 asm_setupresult(as, ir, &ci);
625 asm_gencall(as, &ci, args);
626}
627#endif
628
629static void asm_strto(ASMState *as, IRIns *ir) 597static void asm_strto(ASMState *as, IRIns *ir)
630{ 598{
631 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; 599 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
@@ -689,6 +657,8 @@ static void asm_strto(ASMState *as, IRIns *ir)
689 emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR); 657 emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR);
690} 658}
691 659
660/* -- Memory references --------------------------------------------------- */
661
692/* Get pointer to TValue. */ 662/* Get pointer to TValue. */
693static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) 663static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
694{ 664{
@@ -714,7 +684,7 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
714 Reg src = ra_alloc1(as, ref, allow); 684 Reg src = ra_alloc1(as, ref, allow);
715 emit_lso(as, ARMI_STR, src, RID_SP, 0); 685 emit_lso(as, ARMI_STR, src, RID_SP, 0);
716 } 686 }
717 if ((ir+1)->o == IR_HIOP) 687 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
718 type = ra_alloc1(as, ref+1, allow); 688 type = ra_alloc1(as, ref+1, allow);
719 else 689 else
720 type = ra_allock(as, irt_toitype(ir->t), allow); 690 type = ra_allock(as, irt_toitype(ir->t), allow);
@@ -722,27 +692,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
722 } 692 }
723} 693}
724 694
725static void asm_tostr(ASMState *as, IRIns *ir)
726{
727 IRRef args[2];
728 args[0] = ASMREF_L;
729 as->gcsteps++;
730 if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
731 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
732 args[1] = ASMREF_TMP1; /* const lua_Number * */
733 asm_setupresult(as, ir, ci); /* GCstr * */
734 asm_gencall(as, ci, args);
735 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
736 } else {
737 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
738 args[1] = ir->op1; /* int32_t k */
739 asm_setupresult(as, ir, ci); /* GCstr * */
740 asm_gencall(as, ci, args);
741 }
742}
743
744/* -- Memory references --------------------------------------------------- */
745
746static void asm_aref(ASMState *as, IRIns *ir) 695static void asm_aref(ASMState *as, IRIns *ir)
747{ 696{
748 Reg dest = ra_dest(as, ir, RSET_GPR); 697 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -960,20 +909,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
960 emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR); 909 emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR);
961} 910}
962 911
963static void asm_newref(ASMState *as, IRIns *ir)
964{
965 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
966 IRRef args[3];
967 if (ir->r == RID_SINK)
968 return;
969 args[0] = ASMREF_L; /* lua_State *L */
970 args[1] = ir->op1; /* GCtab *t */
971 args[2] = ASMREF_TMP1; /* cTValue *key */
972 asm_setupresult(as, ir, ci); /* TValue * */
973 asm_gencall(as, ci, args);
974 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
975}
976
977static void asm_uref(ASMState *as, IRIns *ir) 912static void asm_uref(ASMState *as, IRIns *ir)
978{ 913{
979 /* NYI: Check that UREFO is still open and not aliasing a slot. */ 914 /* NYI: Check that UREFO is still open and not aliasing a slot. */
@@ -1106,7 +1041,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
1106 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); 1041 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
1107} 1042}
1108 1043
1109static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) 1044static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
1110{ 1045{
1111 if (ir->r != RID_SINK) { 1046 if (ir->r != RID_SINK) {
1112 Reg src = ra_alloc1(as, ir->op2, 1047 Reg src = ra_alloc1(as, ir->op2,
@@ -1116,6 +1051,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
1116 } 1051 }
1117} 1052}
1118 1053
1054#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
1055
1119static void asm_ahuvload(ASMState *as, IRIns *ir) 1056static void asm_ahuvload(ASMState *as, IRIns *ir)
1120{ 1057{
1121 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); 1058 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
@@ -1273,19 +1210,16 @@ dotypecheck:
1273static void asm_cnew(ASMState *as, IRIns *ir) 1210static void asm_cnew(ASMState *as, IRIns *ir)
1274{ 1211{
1275 CTState *cts = ctype_ctsG(J2G(as->J)); 1212 CTState *cts = ctype_ctsG(J2G(as->J));
1276 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 1213 CTypeID id = (CTypeID)IR(ir->op1)->i;
1277 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 1214 CTSize sz;
1278 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 1215 CTInfo info = lj_ctype_info(cts, id, &sz);
1279 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1216 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1280 IRRef args[2]; 1217 IRRef args[4];
1281 RegSet allow = (RSET_GPR & ~RSET_SCRATCH); 1218 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1282 RegSet drop = RSET_SCRATCH; 1219 RegSet drop = RSET_SCRATCH;
1283 lua_assert(sz != CTSIZE_INVALID); 1220 lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
1284 1221
1285 args[0] = ASMREF_L; /* lua_State *L */
1286 args[1] = ASMREF_TMP1; /* MSize size */
1287 as->gcsteps++; 1222 as->gcsteps++;
1288
1289 if (ra_hasreg(ir->r)) 1223 if (ra_hasreg(ir->r))
1290 rset_clear(drop, ir->r); /* Dest reg handled below. */ 1224 rset_clear(drop, ir->r); /* Dest reg handled below. */
1291 ra_evictset(as, drop); 1225 ra_evictset(as, drop);
@@ -1307,16 +1241,28 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1307 if (ofs == sizeof(GCcdata)) break; 1241 if (ofs == sizeof(GCcdata)) break;
1308 ofs -= 4; ir--; 1242 ofs -= 4; ir--;
1309 } 1243 }
1244 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1245 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1246 args[0] = ASMREF_L; /* lua_State *L */
1247 args[1] = ir->op1; /* CTypeID id */
1248 args[2] = ir->op2; /* CTSize sz */
1249 args[3] = ASMREF_TMP1; /* CTSize align */
1250 asm_gencall(as, ci, args);
1251 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1252 return;
1310 } 1253 }
1254
1311 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ 1255 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1312 { 1256 {
1313 uint32_t k = emit_isk12(ARMI_MOV, ctypeid); 1257 uint32_t k = emit_isk12(ARMI_MOV, id);
1314 Reg r = k ? RID_R1 : ra_allock(as, ctypeid, allow); 1258 Reg r = k ? RID_R1 : ra_allock(as, id, allow);
1315 emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); 1259 emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
1316 emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); 1260 emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
1317 emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP); 1261 emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP);
1318 if (k) emit_d(as, ARMI_MOV^k, RID_R1); 1262 if (k) emit_d(as, ARMI_MOV^k, RID_R1);
1319 } 1263 }
1264 args[0] = ASMREF_L; /* lua_State *L */
1265 args[1] = ASMREF_TMP1; /* MSize size */
1320 asm_gencall(as, ci, args); 1266 asm_gencall(as, ci, args);
1321 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), 1267 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1322 ra_releasetmp(as, ASMREF_TMP1)); 1268 ra_releasetmp(as, ASMREF_TMP1));
@@ -1393,24 +1339,41 @@ static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai)
1393 emit_dm(as, ai, (dest & 15), (left & 15)); 1339 emit_dm(as, ai, (dest & 15), (left & 15));
1394} 1340}
1395 1341
1396static int asm_fpjoin_pow(ASMState *as, IRIns *ir) 1342static void asm_callround(ASMState *as, IRIns *ir, int id)
1397{ 1343{
1398 IRIns *irp = IR(ir->op1); 1344 /* The modified regs must match with the *.dasc implementation. */
1399 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { 1345 RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
1400 IRIns *irpp = IR(irp->op1); 1346 RID2RSET(RID_R3)|RID2RSET(RID_R12);
1401 if (irpp == ir-2 && irpp->o == IR_FPMATH && 1347 RegSet of;
1402 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { 1348 Reg dest, src;
1403 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; 1349 ra_evictset(as, drop);
1404 IRRef args[2]; 1350 dest = ra_dest(as, ir, RSET_FPR);
1405 args[0] = irpp->op1; 1351 emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
1406 args[1] = irp->op2; 1352 emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
1407 asm_setupresult(as, ir, ci); 1353 id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
1408 asm_gencall(as, ci, args); 1354 (void *)lj_vm_trunc_sf);
1409 return 1; 1355 /* Workaround to protect argument GPRs from being used for remat. */
1410 } 1356 of = as->freeset;
1411 } 1357 as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
1412 return 0; 1358 as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
1359 src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */
1360 as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
1361 emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
1362}
1363
1364static void asm_fpmath(ASMState *as, IRIns *ir)
1365{
1366 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
1367 return;
1368 if (ir->op2 <= IRFPM_TRUNC)
1369 asm_callround(as, ir, ir->op2);
1370 else if (ir->op2 == IRFPM_SQRT)
1371 asm_fpunary(as, ir, ARMI_VSQRT_D);
1372 else
1373 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1413} 1374}
1375#else
1376#define asm_fpmath(as, ir) lua_assert(0)
1414#endif 1377#endif
1415 1378
1416static int asm_swapops(ASMState *as, IRRef lref, IRRef rref) 1379static int asm_swapops(ASMState *as, IRRef lref, IRRef rref)
@@ -1460,32 +1423,6 @@ static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai)
1460 asm_intop(as, ir, ai); 1423 asm_intop(as, ir, ai);
1461} 1424}
1462 1425
1463static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
1464{
1465 if (as->flagmcp == as->mcp) { /* Try to drop cmp r, #0. */
1466 uint32_t cc = (as->mcp[1] >> 28);
1467 as->flagmcp = NULL;
1468 if (cc <= CC_NE) {
1469 as->mcp++;
1470 ai |= ARMI_S;
1471 } else if (cc == CC_GE) {
1472 *++as->mcp ^= ((CC_GE^CC_PL) << 28);
1473 ai |= ARMI_S;
1474 } else if (cc == CC_LT) {
1475 *++as->mcp ^= ((CC_LT^CC_MI) << 28);
1476 ai |= ARMI_S;
1477 } /* else: other conds don't work with bit ops. */
1478 }
1479 if (ir->op2 == 0) {
1480 Reg dest = ra_dest(as, ir, RSET_GPR);
1481 uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
1482 emit_d(as, ai^m, dest);
1483 } else {
1484 /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
1485 asm_intop(as, ir, ai);
1486 }
1487}
1488
1489static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai) 1426static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai)
1490{ 1427{
1491 Reg dest = ra_dest(as, ir, RSET_GPR); 1428 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1551,6 +1488,26 @@ static void asm_mul(ASMState *as, IRIns *ir)
1551 asm_intmul(as, ir); 1488 asm_intmul(as, ir);
1552} 1489}
1553 1490
1491#define asm_addov(as, ir) asm_add(as, ir)
1492#define asm_subov(as, ir) asm_sub(as, ir)
1493#define asm_mulov(as, ir) asm_mul(as, ir)
1494
1495#if LJ_SOFTFP
1496#define asm_div(as, ir) lua_assert(0)
1497#define asm_pow(as, ir) lua_assert(0)
1498#define asm_abs(as, ir) lua_assert(0)
1499#define asm_atan2(as, ir) lua_assert(0)
1500#define asm_ldexp(as, ir) lua_assert(0)
1501#else
1502#define asm_div(as, ir) asm_fparith(as, ir, ARMI_VDIV_D)
1503#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
1504#define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D)
1505#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
1506#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
1507#endif
1508
1509#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi)
1510
1554static void asm_neg(ASMState *as, IRIns *ir) 1511static void asm_neg(ASMState *as, IRIns *ir)
1555{ 1512{
1556#if !LJ_SOFTFP 1513#if !LJ_SOFTFP
@@ -1562,41 +1519,35 @@ static void asm_neg(ASMState *as, IRIns *ir)
1562 asm_intneg(as, ir, ARMI_RSB); 1519 asm_intneg(as, ir, ARMI_RSB);
1563} 1520}
1564 1521
1565static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) 1522static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
1566{ 1523{
1567 const CCallInfo *ci = &lj_ir_callinfo[id]; 1524 if (as->flagmcp == as->mcp) { /* Try to drop cmp r, #0. */
1568 IRRef args[2]; 1525 uint32_t cc = (as->mcp[1] >> 28);
1569 args[0] = ir->op1; 1526 as->flagmcp = NULL;
1570 args[1] = ir->op2; 1527 if (cc <= CC_NE) {
1571 asm_setupresult(as, ir, ci); 1528 as->mcp++;
1572 asm_gencall(as, ci, args); 1529 ai |= ARMI_S;
1530 } else if (cc == CC_GE) {
1531 *++as->mcp ^= ((CC_GE^CC_PL) << 28);
1532 ai |= ARMI_S;
1533 } else if (cc == CC_LT) {
1534 *++as->mcp ^= ((CC_LT^CC_MI) << 28);
1535 ai |= ARMI_S;
1536 } /* else: other conds don't work with bit ops. */
1537 }
1538 if (ir->op2 == 0) {
1539 Reg dest = ra_dest(as, ir, RSET_GPR);
1540 uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
1541 emit_d(as, ai^m, dest);
1542 } else {
1543 /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
1544 asm_intop(as, ir, ai);
1545 }
1573} 1546}
1574 1547
1575#if !LJ_SOFTFP 1548#define asm_bnot(as, ir) asm_bitop(as, ir, ARMI_MVN)
1576static void asm_callround(ASMState *as, IRIns *ir, int id)
1577{
1578 /* The modified regs must match with the *.dasc implementation. */
1579 RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
1580 RID2RSET(RID_R3)|RID2RSET(RID_R12);
1581 RegSet of;
1582 Reg dest, src;
1583 ra_evictset(as, drop);
1584 dest = ra_dest(as, ir, RSET_FPR);
1585 emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
1586 emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
1587 id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
1588 (void *)lj_vm_trunc_sf);
1589 /* Workaround to protect argument GPRs from being used for remat. */
1590 of = as->freeset;
1591 as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
1592 as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
1593 src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */
1594 as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
1595 emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
1596}
1597#endif
1598 1549
1599static void asm_bitswap(ASMState *as, IRIns *ir) 1550static void asm_bswap(ASMState *as, IRIns *ir)
1600{ 1551{
1601 Reg dest = ra_dest(as, ir, RSET_GPR); 1552 Reg dest = ra_dest(as, ir, RSET_GPR);
1602 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 1553 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
@@ -1613,6 +1564,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1613 } 1564 }
1614} 1565}
1615 1566
1567#define asm_band(as, ir) asm_bitop(as, ir, ARMI_AND)
1568#define asm_bor(as, ir) asm_bitop(as, ir, ARMI_ORR)
1569#define asm_bxor(as, ir) asm_bitop(as, ir, ARMI_EOR)
1570
1616static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) 1571static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
1617{ 1572{
1618 if (irref_isk(ir->op2)) { /* Constant shifts. */ 1573 if (irref_isk(ir->op2)) { /* Constant shifts. */
@@ -1630,6 +1585,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
1630 } 1585 }
1631} 1586}
1632 1587
1588#define asm_bshl(as, ir) asm_bitshift(as, ir, ARMSH_LSL)
1589#define asm_bshr(as, ir) asm_bitshift(as, ir, ARMSH_LSR)
1590#define asm_bsar(as, ir) asm_bitshift(as, ir, ARMSH_ASR)
1591#define asm_bror(as, ir) asm_bitshift(as, ir, ARMSH_ROR)
1592#define asm_brol(as, ir) lua_assert(0)
1593
1633static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) 1594static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
1634{ 1595{
1635 uint32_t kcmp = 0, kmov = 0; 1596 uint32_t kcmp = 0, kmov = 0;
@@ -1703,6 +1664,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc)
1703 asm_intmin_max(as, ir, cc); 1664 asm_intmin_max(as, ir, cc);
1704} 1665}
1705 1666
1667#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_HI)
1668#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LO)
1669
1706/* -- Comparisons --------------------------------------------------------- */ 1670/* -- Comparisons --------------------------------------------------------- */
1707 1671
1708/* Map of comparisons to flags. ORDER IR. */ 1672/* Map of comparisons to flags. ORDER IR. */
@@ -1818,6 +1782,18 @@ notst:
1818 as->flagmcp = as->mcp; /* Allow elimination of the compare. */ 1782 as->flagmcp = as->mcp; /* Allow elimination of the compare. */
1819} 1783}
1820 1784
1785static void asm_comp(ASMState *as, IRIns *ir)
1786{
1787#if !LJ_SOFTFP
1788 if (irt_isnum(ir->t))
1789 asm_fpcomp(as, ir);
1790 else
1791#endif
1792 asm_intcomp(as, ir);
1793}
1794
1795#define asm_equal(as, ir) asm_comp(as, ir)
1796
1821#if LJ_HASFFI 1797#if LJ_HASFFI
1822/* 64 bit integer comparisons. */ 1798/* 64 bit integer comparisons. */
1823static void asm_int64comp(ASMState *as, IRIns *ir) 1799static void asm_int64comp(ASMState *as, IRIns *ir)
@@ -1892,7 +1868,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1892#endif 1868#endif
1893 } else if ((ir-1)->o == IR_XSTORE) { 1869 } else if ((ir-1)->o == IR_XSTORE) {
1894 if ((ir-1)->r != RID_SINK) 1870 if ((ir-1)->r != RID_SINK)
1895 asm_xstore(as, ir, 4); 1871 asm_xstore_(as, ir, 4);
1896 return; 1872 return;
1897 } 1873 }
1898 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ 1874 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
@@ -1940,6 +1916,16 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1940#endif 1916#endif
1941} 1917}
1942 1918
1919/* -- Profiling ----------------------------------------------------------- */
1920
1921static void asm_prof(ASMState *as, IRIns *ir)
1922{
1923 UNUSED(ir);
1924 asm_guardcc(as, CC_NE);
1925 emit_n(as, ARMI_TST|ARMI_K12|HOOK_PROFILE, RID_TMP);
1926 emit_lsptr(as, ARMI_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask);
1927}
1928
1943/* -- Stack handling ------------------------------------------------------ */ 1929/* -- Stack handling ------------------------------------------------------ */
1944 1930
1945/* Check Lua stack size for overflow. Use exit handler as fallback. */ 1931/* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -1969,7 +1955,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1969 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, 1955 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP,
1970 (int32_t)offsetof(lua_State, maxstack)); 1956 (int32_t)offsetof(lua_State, maxstack));
1971 if (irp) { /* Must not spill arbitrary registers in head of side trace. */ 1957 if (irp) { /* Must not spill arbitrary registers in head of side trace. */
1972 int32_t i = i32ptr(&J2G(as->J)->jit_L); 1958 int32_t i = i32ptr(&J2G(as->J)->cur_L);
1973 if (ra_hasspill(irp->s)) 1959 if (ra_hasspill(irp->s))
1974 emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s)); 1960 emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s));
1975 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095)); 1961 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095));
@@ -1977,7 +1963,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1977 emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */ 1963 emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */
1978 emit_loadi(as, RID_TMP, (i & ~4095)); 1964 emit_loadi(as, RID_TMP, (i & ~4095));
1979 } else { 1965 } else {
1980 emit_getgl(as, RID_TMP, jit_L); 1966 emit_getgl(as, RID_TMP, cur_L);
1981 } 1967 }
1982} 1968}
1983 1969
@@ -2086,13 +2072,13 @@ static void asm_loop_fixup(ASMState *as)
2086 2072
2087/* -- Head of trace ------------------------------------------------------- */ 2073/* -- Head of trace ------------------------------------------------------- */
2088 2074
2089/* Reload L register from g->jit_L. */ 2075/* Reload L register from g->cur_L. */
2090static void asm_head_lreg(ASMState *as) 2076static void asm_head_lreg(ASMState *as)
2091{ 2077{
2092 IRIns *ir = IR(ASMREF_L); 2078 IRIns *ir = IR(ASMREF_L);
2093 if (ra_used(ir)) { 2079 if (ra_used(ir)) {
2094 Reg r = ra_dest(as, ir, RSET_GPR); 2080 Reg r = ra_dest(as, ir, RSET_GPR);
2095 emit_getgl(as, r, jit_L); 2081 emit_getgl(as, r, cur_L);
2096 ra_evictk(as); 2082 ra_evictk(as);
2097 } 2083 }
2098} 2084}
@@ -2163,143 +2149,13 @@ static void asm_tail_prep(ASMState *as)
2163 *p = 0; /* Prevent load/store merging. */ 2149 *p = 0; /* Prevent load/store merging. */
2164} 2150}
2165 2151
2166/* -- Instruction dispatch ------------------------------------------------ */
2167
2168/* Assemble a single instruction. */
2169static void asm_ir(ASMState *as, IRIns *ir)
2170{
2171 switch ((IROp)ir->o) {
2172 /* Miscellaneous ops. */
2173 case IR_LOOP: asm_loop(as); break;
2174 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
2175 case IR_USE:
2176 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
2177 case IR_PHI: asm_phi(as, ir); break;
2178 case IR_HIOP: asm_hiop(as, ir); break;
2179 case IR_GCSTEP: asm_gcstep(as, ir); break;
2180
2181 /* Guarded assertions. */
2182 case IR_EQ: case IR_NE:
2183 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
2184 as->curins--;
2185 asm_href(as, ir-1, (IROp)ir->o);
2186 break;
2187 }
2188 /* fallthrough */
2189 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
2190 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
2191 case IR_ABC:
2192#if !LJ_SOFTFP
2193 if (irt_isnum(ir->t)) { asm_fpcomp(as, ir); break; }
2194#endif
2195 asm_intcomp(as, ir);
2196 break;
2197
2198 case IR_RETF: asm_retf(as, ir); break;
2199
2200 /* Bit ops. */
2201 case IR_BNOT: asm_bitop(as, ir, ARMI_MVN); break;
2202 case IR_BSWAP: asm_bitswap(as, ir); break;
2203
2204 case IR_BAND: asm_bitop(as, ir, ARMI_AND); break;
2205 case IR_BOR: asm_bitop(as, ir, ARMI_ORR); break;
2206 case IR_BXOR: asm_bitop(as, ir, ARMI_EOR); break;
2207
2208 case IR_BSHL: asm_bitshift(as, ir, ARMSH_LSL); break;
2209 case IR_BSHR: asm_bitshift(as, ir, ARMSH_LSR); break;
2210 case IR_BSAR: asm_bitshift(as, ir, ARMSH_ASR); break;
2211 case IR_BROR: asm_bitshift(as, ir, ARMSH_ROR); break;
2212 case IR_BROL: lua_assert(0); break;
2213
2214 /* Arithmetic ops. */
2215 case IR_ADD: case IR_ADDOV: asm_add(as, ir); break;
2216 case IR_SUB: case IR_SUBOV: asm_sub(as, ir); break;
2217 case IR_MUL: case IR_MULOV: asm_mul(as, ir); break;
2218 case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
2219 case IR_NEG: asm_neg(as, ir); break;
2220
2221#if LJ_SOFTFP
2222 case IR_DIV: case IR_POW: case IR_ABS:
2223 case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
2224 lua_assert(0); /* Unused for LJ_SOFTFP. */
2225 break;
2226#else
2227 case IR_DIV: asm_fparith(as, ir, ARMI_VDIV_D); break;
2228 case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
2229 case IR_ABS: asm_fpunary(as, ir, ARMI_VABS_D); break;
2230 case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
2231 case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
2232 case IR_FPMATH:
2233 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
2234 break;
2235 if (ir->op2 <= IRFPM_TRUNC)
2236 asm_callround(as, ir, ir->op2);
2237 else if (ir->op2 == IRFPM_SQRT)
2238 asm_fpunary(as, ir, ARMI_VSQRT_D);
2239 else
2240 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
2241 break;
2242 case IR_TOBIT: asm_tobit(as, ir); break;
2243#endif
2244
2245 case IR_MIN: asm_min_max(as, ir, CC_GT, CC_HI); break;
2246 case IR_MAX: asm_min_max(as, ir, CC_LT, CC_LO); break;
2247
2248 /* Memory references. */
2249 case IR_AREF: asm_aref(as, ir); break;
2250 case IR_HREF: asm_href(as, ir, 0); break;
2251 case IR_HREFK: asm_hrefk(as, ir); break;
2252 case IR_NEWREF: asm_newref(as, ir); break;
2253 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
2254 case IR_FREF: asm_fref(as, ir); break;
2255 case IR_STRREF: asm_strref(as, ir); break;
2256
2257 /* Loads and stores. */
2258 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2259 asm_ahuvload(as, ir);
2260 break;
2261 case IR_FLOAD: asm_fload(as, ir); break;
2262 case IR_XLOAD: asm_xload(as, ir); break;
2263 case IR_SLOAD: asm_sload(as, ir); break;
2264
2265 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
2266 case IR_FSTORE: asm_fstore(as, ir); break;
2267 case IR_XSTORE: asm_xstore(as, ir, 0); break;
2268
2269 /* Allocations. */
2270 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
2271 case IR_TNEW: asm_tnew(as, ir); break;
2272 case IR_TDUP: asm_tdup(as, ir); break;
2273 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2274
2275 /* Write barriers. */
2276 case IR_TBAR: asm_tbar(as, ir); break;
2277 case IR_OBAR: asm_obar(as, ir); break;
2278
2279 /* Type conversions. */
2280 case IR_CONV: asm_conv(as, ir); break;
2281 case IR_TOSTR: asm_tostr(as, ir); break;
2282 case IR_STRTO: asm_strto(as, ir); break;
2283
2284 /* Calls. */
2285 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
2286 case IR_CALLXS: asm_callx(as, ir); break;
2287 case IR_CARG: break;
2288
2289 default:
2290 setintV(&as->J->errinfo, ir->o);
2291 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
2292 break;
2293 }
2294}
2295
2296/* -- Trace setup --------------------------------------------------------- */ 2152/* -- Trace setup --------------------------------------------------------- */
2297 2153
2298/* Ensure there are enough stack slots for call arguments. */ 2154/* Ensure there are enough stack slots for call arguments. */
2299static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) 2155static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
2300{ 2156{
2301 IRRef args[CCI_NARGS_MAX*2]; 2157 IRRef args[CCI_NARGS_MAX*2];
2302 uint32_t i, nargs = (int)CCI_NARGS(ci); 2158 uint32_t i, nargs = CCI_XNARGS(ci);
2303 int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0; 2159 int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0;
2304 asm_collectargs(as, ir, ci, args); 2160 asm_collectargs(as, ir, ci, args);
2305 for (i = 0; i < nargs; i++) { 2161 for (i = 0; i < nargs; i++) {
diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
index e5c67b53..bc521596 100644
--- a/src/lj_asm_mips.h
+++ b/src/lj_asm_mips.h
@@ -226,7 +226,7 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref,
226/* Generate a call to a C function. */ 226/* Generate a call to a C function. */
227static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 227static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
228{ 228{
229 uint32_t n, nargs = CCI_NARGS(ci); 229 uint32_t n, nargs = CCI_XNARGS(ci);
230 int32_t ofs = 16; 230 int32_t ofs = 16;
231 Reg gpr, fpr = REGARG_FIRSTFPR; 231 Reg gpr, fpr = REGARG_FIRSTFPR;
232 if ((void *)ci->func) 232 if ((void *)ci->func)
@@ -326,15 +326,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
326 } 326 }
327} 327}
328 328
329static void asm_call(ASMState *as, IRIns *ir)
330{
331 IRRef args[CCI_NARGS_MAX];
332 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
333 asm_collectargs(as, ir, ci, args);
334 asm_setupresult(as, ir, ci);
335 asm_gencall(as, ci, args);
336}
337
338static void asm_callx(ASMState *as, IRIns *ir) 329static void asm_callx(ASMState *as, IRIns *ir)
339{ 330{
340 IRRef args[CCI_NARGS_MAX*2]; 331 IRRef args[CCI_NARGS_MAX*2];
@@ -362,16 +353,6 @@ static void asm_callx(ASMState *as, IRIns *ir)
362 asm_gencall(as, &ci, args); 353 asm_gencall(as, &ci, args);
363} 354}
364 355
365static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
366{
367 const CCallInfo *ci = &lj_ir_callinfo[id];
368 IRRef args[2];
369 args[0] = ir->op1;
370 args[1] = ir->op2;
371 asm_setupresult(as, ir, ci);
372 asm_gencall(as, ci, args);
373}
374
375static void asm_callround(ASMState *as, IRIns *ir, IRCallID id) 356static void asm_callround(ASMState *as, IRIns *ir, IRCallID id)
376{ 357{
377 /* The modified regs must match with the *.dasc implementation. */ 358 /* The modified regs must match with the *.dasc implementation. */
@@ -520,28 +501,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
520 } 501 }
521} 502}
522 503
523#if LJ_HASFFI
524static void asm_conv64(ASMState *as, IRIns *ir)
525{
526 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
527 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
528 IRCallID id;
529 const CCallInfo *ci;
530 IRRef args[2];
531 args[LJ_BE?0:1] = ir->op1;
532 args[LJ_BE?1:0] = (ir-1)->op1;
533 if (st == IRT_NUM || st == IRT_FLOAT) {
534 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
535 ir--;
536 } else {
537 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
538 }
539 ci = &lj_ir_callinfo[id];
540 asm_setupresult(as, ir, ci);
541 asm_gencall(as, ci, args);
542}
543#endif
544
545static void asm_strto(ASMState *as, IRIns *ir) 504static void asm_strto(ASMState *as, IRIns *ir)
546{ 505{
547 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; 506 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
@@ -558,6 +517,8 @@ static void asm_strto(ASMState *as, IRIns *ir)
558 RID_SP, sps_scale(ir->s)); 517 RID_SP, sps_scale(ir->s));
559} 518}
560 519
520/* -- Memory references --------------------------------------------------- */
521
561/* Get pointer to TValue. */ 522/* Get pointer to TValue. */
562static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) 523static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
563{ 524{
@@ -581,27 +542,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
581 } 542 }
582} 543}
583 544
584static void asm_tostr(ASMState *as, IRIns *ir)
585{
586 IRRef args[2];
587 args[0] = ASMREF_L;
588 as->gcsteps++;
589 if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
590 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
591 args[1] = ASMREF_TMP1; /* const lua_Number * */
592 asm_setupresult(as, ir, ci); /* GCstr * */
593 asm_gencall(as, ci, args);
594 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
595 } else {
596 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
597 args[1] = ir->op1; /* int32_t k */
598 asm_setupresult(as, ir, ci); /* GCstr * */
599 asm_gencall(as, ci, args);
600 }
601}
602
603/* -- Memory references --------------------------------------------------- */
604
605static void asm_aref(ASMState *as, IRIns *ir) 545static void asm_aref(ASMState *as, IRIns *ir)
606{ 546{
607 Reg dest = ra_dest(as, ir, RSET_GPR); 547 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -631,7 +571,7 @@ static void asm_aref(ASMState *as, IRIns *ir)
631** } while ((n = nextnode(n))); 571** } while ((n = nextnode(n)));
632** return niltv(L); 572** return niltv(L);
633*/ 573*/
634static void asm_href(ASMState *as, IRIns *ir) 574static void asm_href(ASMState *as, IRIns *ir, IROp merge)
635{ 575{
636 RegSet allow = RSET_GPR; 576 RegSet allow = RSET_GPR;
637 int destused = ra_used(ir); 577 int destused = ra_used(ir);
@@ -657,37 +597,42 @@ static void asm_href(ASMState *as, IRIns *ir)
657 tmp2 = ra_scratch(as, allow); 597 tmp2 = ra_scratch(as, allow);
658 rset_clear(allow, tmp2); 598 rset_clear(allow, tmp2);
659 599
660 /* Key not found in chain: load niltv. */ 600 /* Key not found in chain: jump to exit (if merged) or load niltv. */
661 l_end = emit_label(as); 601 l_end = emit_label(as);
662 if (destused) 602 as->invmcp = NULL;
603 if (merge == IR_NE)
604 asm_guard(as, MIPSI_B, RID_ZERO, RID_ZERO);
605 else if (destused)
663 emit_loada(as, dest, niltvg(J2G(as->J))); 606 emit_loada(as, dest, niltvg(J2G(as->J)));
664 else
665 *--as->mcp = MIPSI_NOP;
666 /* Follow hash chain until the end. */ 607 /* Follow hash chain until the end. */
667 emit_move(as, dest, tmp1); 608 emit_move(as, dest, tmp2);
668 l_loop = --as->mcp; 609 l_loop = --as->mcp;
669 emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, next)); 610 emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, next));
670 l_next = emit_label(as); 611 l_next = emit_label(as);
671 612
672 /* Type and value comparison. */ 613 /* Type and value comparison. */
614 if (merge == IR_EQ) { /* Must match asm_guard(). */
615 emit_ti(as, MIPSI_LI, RID_TMP, as->snapno);
616 l_end = asm_exitstub_addr(as);
617 }
673 if (irt_isnum(kt)) { 618 if (irt_isnum(kt)) {
674 emit_branch(as, MIPSI_BC1T, 0, 0, l_end); 619 emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
675 emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key); 620 emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key);
676 emit_tg(as, MIPSI_MFC1, tmp1, key+1); 621 *--as->mcp = MIPSI_NOP; /* Avoid NaN comparison overhead. */
677 emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next); 622 emit_branch(as, MIPSI_BEQ, tmp2, RID_ZERO, l_next);
678 emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM); 623 emit_tsi(as, MIPSI_SLTIU, tmp2, tmp2, (int32_t)LJ_TISNUM);
679 emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n)); 624 emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n));
680 } else { 625 } else {
681 if (irt_ispri(kt)) { 626 if (irt_ispri(kt)) {
682 emit_branch(as, MIPSI_BEQ, tmp1, type, l_end); 627 emit_branch(as, MIPSI_BEQ, tmp2, type, l_end);
683 } else { 628 } else {
684 emit_branch(as, MIPSI_BEQ, tmp2, key, l_end); 629 emit_branch(as, MIPSI_BEQ, tmp1, key, l_end);
685 emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); 630 emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.gcr));
686 emit_branch(as, MIPSI_BNE, tmp1, type, l_next); 631 emit_branch(as, MIPSI_BNE, tmp2, type, l_next);
687 } 632 }
688 } 633 }
689 emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.it)); 634 emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, key.it));
690 *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu); 635 *l_loop = MIPSI_BNE | MIPSF_S(tmp2) | ((as->mcp-l_loop-1) & 0xffffu);
691 636
692 /* Load main position relative to tab->node into dest. */ 637 /* Load main position relative to tab->node into dest. */
693 khash = irref_isk(refkey) ? ir_khash(irkey) : 1; 638 khash = irref_isk(refkey) ? ir_khash(irkey) : 1;
@@ -777,20 +722,6 @@ nolo:
777 emit_tsi(as, MIPSI_ADDU, dest, node, ra_allock(as, ofs, allow)); 722 emit_tsi(as, MIPSI_ADDU, dest, node, ra_allock(as, ofs, allow));
778} 723}
779 724
780static void asm_newref(ASMState *as, IRIns *ir)
781{
782 if (ir->r != RID_SINK) {
783 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
784 IRRef args[3];
785 args[0] = ASMREF_L; /* lua_State *L */
786 args[1] = ir->op1; /* GCtab *t */
787 args[2] = ASMREF_TMP1; /* cTValue *key */
788 asm_setupresult(as, ir, ci); /* TValue * */
789 asm_gencall(as, ci, args);
790 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
791 }
792}
793
794static void asm_uref(ASMState *as, IRIns *ir) 725static void asm_uref(ASMState *as, IRIns *ir)
795{ 726{
796 /* NYI: Check that UREFO is still open and not aliasing a slot. */ 727 /* NYI: Check that UREFO is still open and not aliasing a slot. */
@@ -919,7 +850,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
919 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); 850 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
920} 851}
921 852
922static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) 853static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
923{ 854{
924 if (ir->r != RID_SINK) { 855 if (ir->r != RID_SINK) {
925 Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); 856 Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
@@ -928,6 +859,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
928 } 859 }
929} 860}
930 861
862#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
863
931static void asm_ahuvload(ASMState *as, IRIns *ir) 864static void asm_ahuvload(ASMState *as, IRIns *ir)
932{ 865{
933 IRType1 t = ir->t; 866 IRType1 t = ir->t;
@@ -1003,7 +936,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
1003 if (irt_isint(t)) { 936 if (irt_isint(t)) {
1004 Reg tmp = ra_scratch(as, RSET_FPR); 937 Reg tmp = ra_scratch(as, RSET_FPR);
1005 emit_tg(as, MIPSI_MFC1, dest, tmp); 938 emit_tg(as, MIPSI_MFC1, dest, tmp);
1006 emit_fg(as, MIPSI_CVT_W_D, tmp, tmp); 939 emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp);
1007 dest = tmp; 940 dest = tmp;
1008 t.irt = IRT_NUM; /* Check for original type. */ 941 t.irt = IRT_NUM; /* Check for original type. */
1009 } else { 942 } else {
@@ -1043,19 +976,15 @@ dotypecheck:
1043static void asm_cnew(ASMState *as, IRIns *ir) 976static void asm_cnew(ASMState *as, IRIns *ir)
1044{ 977{
1045 CTState *cts = ctype_ctsG(J2G(as->J)); 978 CTState *cts = ctype_ctsG(J2G(as->J));
1046 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 979 CTypeID id = (CTypeID)IR(ir->op1)->i;
1047 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 980 CTSize sz;
1048 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 981 CTInfo info = lj_ctype_info(cts, id, &sz);
1049 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 982 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1050 IRRef args[2]; 983 IRRef args[4];
1051 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1052 RegSet drop = RSET_SCRATCH; 984 RegSet drop = RSET_SCRATCH;
1053 lua_assert(sz != CTSIZE_INVALID); 985 lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
1054 986
1055 args[0] = ASMREF_L; /* lua_State *L */
1056 args[1] = ASMREF_TMP1; /* MSize size */
1057 as->gcsteps++; 987 as->gcsteps++;
1058
1059 if (ra_hasreg(ir->r)) 988 if (ra_hasreg(ir->r))
1060 rset_clear(drop, ir->r); /* Dest reg handled below. */ 989 rset_clear(drop, ir->r); /* Dest reg handled below. */
1061 ra_evictset(as, drop); 990 ra_evictset(as, drop);
@@ -1064,6 +993,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1064 993
1065 /* Initialize immutable cdata object. */ 994 /* Initialize immutable cdata object. */
1066 if (ir->o == IR_CNEWI) { 995 if (ir->o == IR_CNEWI) {
996 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1067 int32_t ofs = sizeof(GCcdata); 997 int32_t ofs = sizeof(GCcdata);
1068 lua_assert(sz == 4 || sz == 8); 998 lua_assert(sz == 4 || sz == 8);
1069 if (sz == 8) { 999 if (sz == 8) {
@@ -1078,12 +1008,24 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1078 if (ofs == sizeof(GCcdata)) break; 1008 if (ofs == sizeof(GCcdata)) break;
1079 ofs -= 4; if (LJ_BE) ir++; else ir--; 1009 ofs -= 4; if (LJ_BE) ir++; else ir--;
1080 } 1010 }
1011 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1012 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1013 args[0] = ASMREF_L; /* lua_State *L */
1014 args[1] = ir->op1; /* CTypeID id */
1015 args[2] = ir->op2; /* CTSize sz */
1016 args[3] = ASMREF_TMP1; /* CTSize align */
1017 asm_gencall(as, ci, args);
1018 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1019 return;
1081 } 1020 }
1021
1082 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ 1022 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1083 emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); 1023 emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
1084 emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); 1024 emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
1085 emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA); 1025 emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA);
1086 emit_ti(as, MIPSI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */ 1026 emit_ti(as, MIPSI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */
1027 args[0] = ASMREF_L; /* lua_State *L */
1028 args[1] = ASMREF_TMP1; /* MSize size */
1087 asm_gencall(as, ci, args); 1029 asm_gencall(as, ci, args);
1088 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), 1030 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1089 ra_releasetmp(as, ASMREF_TMP1)); 1031 ra_releasetmp(as, ASMREF_TMP1));
@@ -1153,23 +1095,16 @@ static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi)
1153 emit_fg(as, mi, dest, left); 1095 emit_fg(as, mi, dest, left);
1154} 1096}
1155 1097
1156static int asm_fpjoin_pow(ASMState *as, IRIns *ir) 1098static void asm_fpmath(ASMState *as, IRIns *ir)
1157{ 1099{
1158 IRIns *irp = IR(ir->op1); 1100 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
1159 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { 1101 return;
1160 IRIns *irpp = IR(irp->op1); 1102 if (ir->op2 <= IRFPM_TRUNC)
1161 if (irpp == ir-2 && irpp->o == IR_FPMATH && 1103 asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
1162 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { 1104 else if (ir->op2 == IRFPM_SQRT)
1163 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; 1105 asm_fpunary(as, ir, MIPSI_SQRT_D);
1164 IRRef args[2]; 1106 else
1165 args[0] = irpp->op1; 1107 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1166 args[1] = irp->op2;
1167 asm_setupresult(as, ir, ci);
1168 asm_gencall(as, ci, args);
1169 return 1;
1170 }
1171 }
1172 return 0;
1173} 1108}
1174 1109
1175static void asm_add(ASMState *as, IRIns *ir) 1110static void asm_add(ASMState *as, IRIns *ir)
@@ -1215,6 +1150,10 @@ static void asm_mul(ASMState *as, IRIns *ir)
1215 } 1150 }
1216} 1151}
1217 1152
1153#define asm_div(as, ir) asm_fparith(as, ir, MIPSI_DIV_D)
1154#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi)
1155#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
1156
1218static void asm_neg(ASMState *as, IRIns *ir) 1157static void asm_neg(ASMState *as, IRIns *ir)
1219{ 1158{
1220 if (irt_isnum(ir->t)) { 1159 if (irt_isnum(ir->t)) {
@@ -1226,6 +1165,10 @@ static void asm_neg(ASMState *as, IRIns *ir)
1226 } 1165 }
1227} 1166}
1228 1167
1168#define asm_abs(as, ir) asm_fpunary(as, ir, MIPSI_ABS_D)
1169#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
1170#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
1171
1229static void asm_arithov(ASMState *as, IRIns *ir) 1172static void asm_arithov(ASMState *as, IRIns *ir)
1230{ 1173{
1231 Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR); 1174 Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR);
@@ -1259,13 +1202,21 @@ static void asm_arithov(ASMState *as, IRIns *ir)
1259 emit_move(as, RID_TMP, dest == left ? left : right); 1202 emit_move(as, RID_TMP, dest == left ? left : right);
1260} 1203}
1261 1204
1205#define asm_addov(as, ir) asm_arithov(as, ir)
1206#define asm_subov(as, ir) asm_arithov(as, ir)
1207
1262static void asm_mulov(ASMState *as, IRIns *ir) 1208static void asm_mulov(ASMState *as, IRIns *ir)
1263{ 1209{
1264#if LJ_DUALNUM 1210 Reg dest = ra_dest(as, ir, RSET_GPR);
1265#error "NYI: MULOV" 1211 Reg tmp, right, left = ra_alloc2(as, ir, RSET_GPR);
1266#else 1212 right = (left >> 8); left &= 255;
1267 UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused in single-number mode. */ 1213 tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left),
1268#endif 1214 right), dest));
1215 asm_guard(as, MIPSI_BNE, RID_TMP, tmp);
1216 emit_dta(as, MIPSI_SRA, RID_TMP, dest, 31);
1217 emit_dst(as, MIPSI_MFHI, tmp, 0, 0);
1218 emit_dst(as, MIPSI_MFLO, dest, 0, 0);
1219 emit_dst(as, MIPSI_MULT, 0, left, right);
1269} 1220}
1270 1221
1271#if LJ_HASFFI 1222#if LJ_HASFFI
@@ -1352,7 +1303,7 @@ static void asm_neg64(ASMState *as, IRIns *ir)
1352} 1303}
1353#endif 1304#endif
1354 1305
1355static void asm_bitnot(ASMState *as, IRIns *ir) 1306static void asm_bnot(ASMState *as, IRIns *ir)
1356{ 1307{
1357 Reg left, right, dest = ra_dest(as, ir, RSET_GPR); 1308 Reg left, right, dest = ra_dest(as, ir, RSET_GPR);
1358 IRIns *irl = IR(ir->op1); 1309 IRIns *irl = IR(ir->op1);
@@ -1366,7 +1317,7 @@ static void asm_bitnot(ASMState *as, IRIns *ir)
1366 emit_dst(as, MIPSI_NOR, dest, left, right); 1317 emit_dst(as, MIPSI_NOR, dest, left, right);
1367} 1318}
1368 1319
1369static void asm_bitswap(ASMState *as, IRIns *ir) 1320static void asm_bswap(ASMState *as, IRIns *ir)
1370{ 1321{
1371 Reg dest = ra_dest(as, ir, RSET_GPR); 1322 Reg dest = ra_dest(as, ir, RSET_GPR);
1372 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 1323 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
@@ -1402,6 +1353,10 @@ static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
1402 emit_dst(as, mi, dest, left, right); 1353 emit_dst(as, mi, dest, left, right);
1403} 1354}
1404 1355
1356#define asm_band(as, ir) asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI)
1357#define asm_bor(as, ir) asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI)
1358#define asm_bxor(as, ir) asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI)
1359
1405static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) 1360static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
1406{ 1361{
1407 Reg dest = ra_dest(as, ir, RSET_GPR); 1362 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1415,7 +1370,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
1415 } 1370 }
1416} 1371}
1417 1372
1418static void asm_bitror(ASMState *as, IRIns *ir) 1373#define asm_bshl(as, ir) asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL)
1374#define asm_bshr(as, ir) asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL)
1375#define asm_bsar(as, ir) asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA)
1376#define asm_brol(as, ir) lua_assert(0)
1377
1378static void asm_bror(ASMState *as, IRIns *ir)
1419{ 1379{
1420 if ((as->flags & JIT_F_MIPS32R2)) { 1380 if ((as->flags & JIT_F_MIPS32R2)) {
1421 asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR); 1381 asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR);
@@ -1464,6 +1424,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1464 } 1424 }
1465} 1425}
1466 1426
1427#define asm_min(as, ir) asm_min_max(as, ir, 0)
1428#define asm_max(as, ir) asm_min_max(as, ir, 1)
1429
1467/* -- Comparisons --------------------------------------------------------- */ 1430/* -- Comparisons --------------------------------------------------------- */
1468 1431
1469static void asm_comp(ASMState *as, IRIns *ir) 1432static void asm_comp(ASMState *as, IRIns *ir)
@@ -1501,7 +1464,7 @@ static void asm_comp(ASMState *as, IRIns *ir)
1501 } 1464 }
1502} 1465}
1503 1466
1504static void asm_compeq(ASMState *as, IRIns *ir) 1467static void asm_equal(ASMState *as, IRIns *ir)
1505{ 1468{
1506 Reg right, left = ra_alloc2(as, ir, irt_isnum(ir->t) ? RSET_FPR : RSET_GPR); 1469 Reg right, left = ra_alloc2(as, ir, irt_isnum(ir->t) ? RSET_FPR : RSET_GPR);
1507 right = (left >> 8); left &= 255; 1470 right = (left >> 8); left &= 255;
@@ -1575,8 +1538,8 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1575 } else if ((ir-1)->o == IR_XSTORE) { 1538 } else if ((ir-1)->o == IR_XSTORE) {
1576 as->curins--; /* Handle both stores here. */ 1539 as->curins--; /* Handle both stores here. */
1577 if ((ir-1)->r != RID_SINK) { 1540 if ((ir-1)->r != RID_SINK) {
1578 asm_xstore(as, ir, LJ_LE ? 4 : 0); 1541 asm_xstore_(as, ir, LJ_LE ? 4 : 0);
1579 asm_xstore(as, ir-1, LJ_LE ? 0 : 4); 1542 asm_xstore_(as, ir-1, LJ_LE ? 0 : 4);
1580 } 1543 }
1581 return; 1544 return;
1582 } 1545 }
@@ -1600,6 +1563,17 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1600#endif 1563#endif
1601} 1564}
1602 1565
1566/* -- Profiling ----------------------------------------------------------- */
1567
1568static void asm_prof(ASMState *as, IRIns *ir)
1569{
1570 UNUSED(ir);
1571 asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO);
1572 emit_tsi(as, MIPSI_ANDI, RID_TMP, RID_TMP, HOOK_PROFILE);
1573 emit_lsglptr(as, MIPSI_LBU, RID_TMP,
1574 (int32_t)offsetof(global_State, hookmask));
1575}
1576
1603/* -- Stack handling ------------------------------------------------------ */ 1577/* -- Stack handling ------------------------------------------------------ */
1604 1578
1605/* Check Lua stack size for overflow. Use exit handler as fallback. */ 1579/* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -1624,7 +1598,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1624 emit_tsi(as, MIPSI_LW, tmp, tmp, offsetof(lua_State, maxstack)); 1598 emit_tsi(as, MIPSI_LW, tmp, tmp, offsetof(lua_State, maxstack));
1625 if (pbase == RID_TMP) 1599 if (pbase == RID_TMP)
1626 emit_getgl(as, RID_TMP, jit_base); 1600 emit_getgl(as, RID_TMP, jit_base);
1627 emit_getgl(as, tmp, jit_L); 1601 emit_getgl(as, tmp, cur_L);
1628 if (allow == RSET_EMPTY) /* Spill temp. register. */ 1602 if (allow == RSET_EMPTY) /* Spill temp. register. */
1629 emit_tsi(as, MIPSI_SW, tmp, RID_SP, 0); 1603 emit_tsi(as, MIPSI_SW, tmp, RID_SP, 0);
1630} 1604}
@@ -1772,131 +1746,13 @@ static void asm_tail_prep(ASMState *as)
1772 as->invmcp = as->loopref ? as->mcp : NULL; 1746 as->invmcp = as->loopref ? as->mcp : NULL;
1773} 1747}
1774 1748
1775/* -- Instruction dispatch ------------------------------------------------ */
1776
1777/* Assemble a single instruction. */
1778static void asm_ir(ASMState *as, IRIns *ir)
1779{
1780 switch ((IROp)ir->o) {
1781 /* Miscellaneous ops. */
1782 case IR_LOOP: asm_loop(as); break;
1783 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
1784 case IR_USE:
1785 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1786 case IR_PHI: asm_phi(as, ir); break;
1787 case IR_HIOP: asm_hiop(as, ir); break;
1788 case IR_GCSTEP: asm_gcstep(as, ir); break;
1789
1790 /* Guarded assertions. */
1791 case IR_EQ: case IR_NE: asm_compeq(as, ir); break;
1792 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1793 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1794 case IR_ABC:
1795 asm_comp(as, ir);
1796 break;
1797
1798 case IR_RETF: asm_retf(as, ir); break;
1799
1800 /* Bit ops. */
1801 case IR_BNOT: asm_bitnot(as, ir); break;
1802 case IR_BSWAP: asm_bitswap(as, ir); break;
1803
1804 case IR_BAND: asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI); break;
1805 case IR_BOR: asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI); break;
1806 case IR_BXOR: asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI); break;
1807
1808 case IR_BSHL: asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL); break;
1809 case IR_BSHR: asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL); break;
1810 case IR_BSAR: asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA); break;
1811 case IR_BROL: lua_assert(0); break;
1812 case IR_BROR: asm_bitror(as, ir); break;
1813
1814 /* Arithmetic ops. */
1815 case IR_ADD: asm_add(as, ir); break;
1816 case IR_SUB: asm_sub(as, ir); break;
1817 case IR_MUL: asm_mul(as, ir); break;
1818 case IR_DIV: asm_fparith(as, ir, MIPSI_DIV_D); break;
1819 case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
1820 case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
1821 case IR_NEG: asm_neg(as, ir); break;
1822
1823 case IR_ABS: asm_fpunary(as, ir, MIPSI_ABS_D); break;
1824 case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
1825 case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
1826 case IR_MIN: asm_min_max(as, ir, 0); break;
1827 case IR_MAX: asm_min_max(as, ir, 1); break;
1828 case IR_FPMATH:
1829 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
1830 break;
1831 if (ir->op2 <= IRFPM_TRUNC)
1832 asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
1833 else if (ir->op2 == IRFPM_SQRT)
1834 asm_fpunary(as, ir, MIPSI_SQRT_D);
1835 else
1836 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1837 break;
1838
1839 /* Overflow-checking arithmetic ops. */
1840 case IR_ADDOV: asm_arithov(as, ir); break;
1841 case IR_SUBOV: asm_arithov(as, ir); break;
1842 case IR_MULOV: asm_mulov(as, ir); break;
1843
1844 /* Memory references. */
1845 case IR_AREF: asm_aref(as, ir); break;
1846 case IR_HREF: asm_href(as, ir); break;
1847 case IR_HREFK: asm_hrefk(as, ir); break;
1848 case IR_NEWREF: asm_newref(as, ir); break;
1849 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
1850 case IR_FREF: asm_fref(as, ir); break;
1851 case IR_STRREF: asm_strref(as, ir); break;
1852
1853 /* Loads and stores. */
1854 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
1855 asm_ahuvload(as, ir);
1856 break;
1857 case IR_FLOAD: asm_fload(as, ir); break;
1858 case IR_XLOAD: asm_xload(as, ir); break;
1859 case IR_SLOAD: asm_sload(as, ir); break;
1860
1861 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
1862 case IR_FSTORE: asm_fstore(as, ir); break;
1863 case IR_XSTORE: asm_xstore(as, ir, 0); break;
1864
1865 /* Allocations. */
1866 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
1867 case IR_TNEW: asm_tnew(as, ir); break;
1868 case IR_TDUP: asm_tdup(as, ir); break;
1869 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
1870
1871 /* Write barriers. */
1872 case IR_TBAR: asm_tbar(as, ir); break;
1873 case IR_OBAR: asm_obar(as, ir); break;
1874
1875 /* Type conversions. */
1876 case IR_CONV: asm_conv(as, ir); break;
1877 case IR_TOBIT: asm_tobit(as, ir); break;
1878 case IR_TOSTR: asm_tostr(as, ir); break;
1879 case IR_STRTO: asm_strto(as, ir); break;
1880
1881 /* Calls. */
1882 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
1883 case IR_CALLXS: asm_callx(as, ir); break;
1884 case IR_CARG: break;
1885
1886 default:
1887 setintV(&as->J->errinfo, ir->o);
1888 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
1889 break;
1890 }
1891}
1892
1893/* -- Trace setup --------------------------------------------------------- */ 1749/* -- Trace setup --------------------------------------------------------- */
1894 1750
1895/* Ensure there are enough stack slots for call arguments. */ 1751/* Ensure there are enough stack slots for call arguments. */
1896static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) 1752static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
1897{ 1753{
1898 IRRef args[CCI_NARGS_MAX*2]; 1754 IRRef args[CCI_NARGS_MAX*2];
1899 uint32_t i, nargs = (int)CCI_NARGS(ci); 1755 uint32_t i, nargs = CCI_XNARGS(ci);
1900 int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; 1756 int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
1901 asm_collectargs(as, ir, ci, args); 1757 asm_collectargs(as, ir, ci, args);
1902 for (i = 0; i < nargs; i++) { 1758 for (i = 0; i < nargs; i++) {
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h
index e1a496a7..676bfcbf 100644
--- a/src/lj_asm_ppc.h
+++ b/src/lj_asm_ppc.h
@@ -249,7 +249,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
249/* Generate a call to a C function. */ 249/* Generate a call to a C function. */
250static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 250static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
251{ 251{
252 uint32_t n, nargs = CCI_NARGS(ci); 252 uint32_t n, nargs = CCI_XNARGS(ci);
253 int32_t ofs = 8; 253 int32_t ofs = 8;
254 Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR; 254 Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR;
255 if ((void *)ci->func) 255 if ((void *)ci->func)
@@ -329,15 +329,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
329 } 329 }
330} 330}
331 331
332static void asm_call(ASMState *as, IRIns *ir)
333{
334 IRRef args[CCI_NARGS_MAX];
335 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
336 asm_collectargs(as, ir, ci, args);
337 asm_setupresult(as, ir, ci);
338 asm_gencall(as, ci, args);
339}
340
341static void asm_callx(ASMState *as, IRIns *ir) 332static void asm_callx(ASMState *as, IRIns *ir)
342{ 333{
343 IRRef args[CCI_NARGS_MAX*2]; 334 IRRef args[CCI_NARGS_MAX*2];
@@ -361,16 +352,6 @@ static void asm_callx(ASMState *as, IRIns *ir)
361 asm_gencall(as, &ci, args); 352 asm_gencall(as, &ci, args);
362} 353}
363 354
364static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
365{
366 const CCallInfo *ci = &lj_ir_callinfo[id];
367 IRRef args[2];
368 args[0] = ir->op1;
369 args[1] = ir->op2;
370 asm_setupresult(as, ir, ci);
371 asm_gencall(as, ci, args);
372}
373
374/* -- Returns ------------------------------------------------------------- */ 355/* -- Returns ------------------------------------------------------------- */
375 356
376/* Return to lower frame. Guard that it goes to the right spot. */ 357/* Return to lower frame. Guard that it goes to the right spot. */
@@ -511,28 +492,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
511 } 492 }
512} 493}
513 494
514#if LJ_HASFFI
515static void asm_conv64(ASMState *as, IRIns *ir)
516{
517 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
518 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
519 IRCallID id;
520 const CCallInfo *ci;
521 IRRef args[2];
522 args[0] = ir->op1;
523 args[1] = (ir-1)->op1;
524 if (st == IRT_NUM || st == IRT_FLOAT) {
525 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
526 ir--;
527 } else {
528 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
529 }
530 ci = &lj_ir_callinfo[id];
531 asm_setupresult(as, ir, ci);
532 asm_gencall(as, ci, args);
533}
534#endif
535
536static void asm_strto(ASMState *as, IRIns *ir) 495static void asm_strto(ASMState *as, IRIns *ir)
537{ 496{
538 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; 497 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
@@ -551,6 +510,8 @@ static void asm_strto(ASMState *as, IRIns *ir)
551 emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs); 510 emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs);
552} 511}
553 512
513/* -- Memory references --------------------------------------------------- */
514
554/* Get pointer to TValue. */ 515/* Get pointer to TValue. */
555static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) 516static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
556{ 517{
@@ -574,27 +535,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
574 } 535 }
575} 536}
576 537
577static void asm_tostr(ASMState *as, IRIns *ir)
578{
579 IRRef args[2];
580 args[0] = ASMREF_L;
581 as->gcsteps++;
582 if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
583 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
584 args[1] = ASMREF_TMP1; /* const lua_Number * */
585 asm_setupresult(as, ir, ci); /* GCstr * */
586 asm_gencall(as, ci, args);
587 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
588 } else {
589 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
590 args[1] = ir->op1; /* int32_t k */
591 asm_setupresult(as, ir, ci); /* GCstr * */
592 asm_gencall(as, ci, args);
593 }
594}
595
596/* -- Memory references --------------------------------------------------- */
597
598static void asm_aref(ASMState *as, IRIns *ir) 538static void asm_aref(ASMState *as, IRIns *ir)
599{ 539{
600 Reg dest = ra_dest(as, ir, RSET_GPR); 540 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -771,20 +711,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
771 } 711 }
772} 712}
773 713
774static void asm_newref(ASMState *as, IRIns *ir)
775{
776 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
777 IRRef args[3];
778 if (ir->r == RID_SINK)
779 return;
780 args[0] = ASMREF_L; /* lua_State *L */
781 args[1] = ir->op1; /* GCtab *t */
782 args[2] = ASMREF_TMP1; /* cTValue *key */
783 asm_setupresult(as, ir, ci); /* TValue * */
784 asm_gencall(as, ci, args);
785 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
786}
787
788static void asm_uref(ASMState *as, IRIns *ir) 714static void asm_uref(ASMState *as, IRIns *ir)
789{ 715{
790 /* NYI: Check that UREFO is still open and not aliasing a slot. */ 716 /* NYI: Check that UREFO is still open and not aliasing a slot. */
@@ -915,7 +841,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
915 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); 841 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
916} 842}
917 843
918static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) 844static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
919{ 845{
920 IRIns *irb; 846 IRIns *irb;
921 if (ir->r == RID_SINK) 847 if (ir->r == RID_SINK)
@@ -932,6 +858,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
932 } 858 }
933} 859}
934 860
861#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
862
935static void asm_ahuvload(ASMState *as, IRIns *ir) 863static void asm_ahuvload(ASMState *as, IRIns *ir)
936{ 864{
937 IRType1 t = ir->t; 865 IRType1 t = ir->t;
@@ -1082,19 +1010,15 @@ dotypecheck:
1082static void asm_cnew(ASMState *as, IRIns *ir) 1010static void asm_cnew(ASMState *as, IRIns *ir)
1083{ 1011{
1084 CTState *cts = ctype_ctsG(J2G(as->J)); 1012 CTState *cts = ctype_ctsG(J2G(as->J));
1085 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 1013 CTypeID id = (CTypeID)IR(ir->op1)->i;
1086 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 1014 CTSize sz;
1087 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 1015 CTInfo info = lj_ctype_info(cts, id, &sz);
1088 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1016 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1089 IRRef args[2]; 1017 IRRef args[4];
1090 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1091 RegSet drop = RSET_SCRATCH; 1018 RegSet drop = RSET_SCRATCH;
1092 lua_assert(sz != CTSIZE_INVALID); 1019 lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
1093 1020
1094 args[0] = ASMREF_L; /* lua_State *L */
1095 args[1] = ASMREF_TMP1; /* MSize size */
1096 as->gcsteps++; 1021 as->gcsteps++;
1097
1098 if (ra_hasreg(ir->r)) 1022 if (ra_hasreg(ir->r))
1099 rset_clear(drop, ir->r); /* Dest reg handled below. */ 1023 rset_clear(drop, ir->r); /* Dest reg handled below. */
1100 ra_evictset(as, drop); 1024 ra_evictset(as, drop);
@@ -1103,6 +1027,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1103 1027
1104 /* Initialize immutable cdata object. */ 1028 /* Initialize immutable cdata object. */
1105 if (ir->o == IR_CNEWI) { 1029 if (ir->o == IR_CNEWI) {
1030 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1106 int32_t ofs = sizeof(GCcdata); 1031 int32_t ofs = sizeof(GCcdata);
1107 lua_assert(sz == 4 || sz == 8); 1032 lua_assert(sz == 4 || sz == 8);
1108 if (sz == 8) { 1033 if (sz == 8) {
@@ -1116,12 +1041,24 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1116 if (ofs == sizeof(GCcdata)) break; 1041 if (ofs == sizeof(GCcdata)) break;
1117 ofs -= 4; ir++; 1042 ofs -= 4; ir++;
1118 } 1043 }
1044 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1045 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1046 args[0] = ASMREF_L; /* lua_State *L */
1047 args[1] = ir->op1; /* CTypeID id */
1048 args[2] = ir->op2; /* CTSize sz */
1049 args[3] = ASMREF_TMP1; /* CTSize align */
1050 asm_gencall(as, ci, args);
1051 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1052 return;
1119 } 1053 }
1054
1120 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ 1055 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1121 emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); 1056 emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
1122 emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); 1057 emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
1123 emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA); 1058 emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA);
1124 emit_ti(as, PPCI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */ 1059 emit_ti(as, PPCI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */
1060 args[0] = ASMREF_L; /* lua_State *L */
1061 args[1] = ASMREF_TMP1; /* MSize size */
1125 asm_gencall(as, ci, args); 1062 asm_gencall(as, ci, args);
1126 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), 1063 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1127 ra_releasetmp(as, ASMREF_TMP1)); 1064 ra_releasetmp(as, ASMREF_TMP1));
@@ -1195,23 +1132,14 @@ static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi)
1195 emit_fb(as, pi, dest, left); 1132 emit_fb(as, pi, dest, left);
1196} 1133}
1197 1134
1198static int asm_fpjoin_pow(ASMState *as, IRIns *ir) 1135static void asm_fpmath(ASMState *as, IRIns *ir)
1199{ 1136{
1200 IRIns *irp = IR(ir->op1); 1137 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
1201 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { 1138 return;
1202 IRIns *irpp = IR(irp->op1); 1139 if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
1203 if (irpp == ir-2 && irpp->o == IR_FPMATH && 1140 asm_fpunary(as, ir, PPCI_FSQRT);
1204 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { 1141 else
1205 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; 1142 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1206 IRRef args[2];
1207 args[0] = irpp->op1;
1208 args[1] = irp->op2;
1209 asm_setupresult(as, ir, ci);
1210 asm_gencall(as, ci, args);
1211 return 1;
1212 }
1213 }
1214 return 0;
1215} 1143}
1216 1144
1217static void asm_add(ASMState *as, IRIns *ir) 1145static void asm_add(ASMState *as, IRIns *ir)
@@ -1311,6 +1239,10 @@ static void asm_mul(ASMState *as, IRIns *ir)
1311 } 1239 }
1312} 1240}
1313 1241
1242#define asm_div(as, ir) asm_fparith(as, ir, PPCI_FDIV)
1243#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi)
1244#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
1245
1314static void asm_neg(ASMState *as, IRIns *ir) 1246static void asm_neg(ASMState *as, IRIns *ir)
1315{ 1247{
1316 if (irt_isnum(ir->t)) { 1248 if (irt_isnum(ir->t)) {
@@ -1329,6 +1261,10 @@ static void asm_neg(ASMState *as, IRIns *ir)
1329 } 1261 }
1330} 1262}
1331 1263
1264#define asm_abs(as, ir) asm_fpunary(as, ir, PPCI_FABS)
1265#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
1266#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
1267
1332static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) 1268static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
1333{ 1269{
1334 Reg dest, left, right; 1270 Reg dest, left, right;
@@ -1344,6 +1280,10 @@ static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
1344 emit_tab(as, pi|PPCF_DOT, dest, left, right); 1280 emit_tab(as, pi|PPCF_DOT, dest, left, right);
1345} 1281}
1346 1282
1283#define asm_addov(as, ir) asm_arithov(as, ir, PPCI_ADDO)
1284#define asm_subov(as, ir) asm_arithov(as, ir, PPCI_SUBFO)
1285#define asm_mulov(as, ir) asm_arithov(as, ir, PPCI_MULLWO)
1286
1347#if LJ_HASFFI 1287#if LJ_HASFFI
1348static void asm_add64(ASMState *as, IRIns *ir) 1288static void asm_add64(ASMState *as, IRIns *ir)
1349{ 1289{
@@ -1423,7 +1363,7 @@ static void asm_neg64(ASMState *as, IRIns *ir)
1423} 1363}
1424#endif 1364#endif
1425 1365
1426static void asm_bitnot(ASMState *as, IRIns *ir) 1366static void asm_bnot(ASMState *as, IRIns *ir)
1427{ 1367{
1428 Reg dest, left, right; 1368 Reg dest, left, right;
1429 PPCIns pi = PPCI_NOR; 1369 PPCIns pi = PPCI_NOR;
@@ -1450,7 +1390,7 @@ nofuse:
1450 emit_asb(as, pi, dest, left, right); 1390 emit_asb(as, pi, dest, left, right);
1451} 1391}
1452 1392
1453static void asm_bitswap(ASMState *as, IRIns *ir) 1393static void asm_bswap(ASMState *as, IRIns *ir)
1454{ 1394{
1455 Reg dest = ra_dest(as, ir, RSET_GPR); 1395 Reg dest = ra_dest(as, ir, RSET_GPR);
1456 IRIns *irx; 1396 IRIns *irx;
@@ -1471,32 +1411,6 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1471 } 1411 }
1472} 1412}
1473 1413
1474static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1475{
1476 Reg dest = ra_dest(as, ir, RSET_GPR);
1477 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1478 if (irref_isk(ir->op2)) {
1479 int32_t k = IR(ir->op2)->i;
1480 Reg tmp = left;
1481 if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
1482 if (!checku16(k)) {
1483 emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
1484 if ((k & 0xffff) == 0) return;
1485 }
1486 emit_asi(as, pik, dest, left, k);
1487 return;
1488 }
1489 }
1490 /* May fail due to spills/restores above, but simplifies the logic. */
1491 if (as->flagmcp == as->mcp) {
1492 as->flagmcp = NULL;
1493 as->mcp++;
1494 pi |= PPCF_DOT;
1495 }
1496 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1497 emit_asb(as, pi, dest, left, right);
1498}
1499
1500/* Fuse BAND with contiguous bitmask and a shift to rlwinm. */ 1414/* Fuse BAND with contiguous bitmask and a shift to rlwinm. */
1501static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref) 1415static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref)
1502{ 1416{
@@ -1527,7 +1441,7 @@ nofuse:
1527 *--as->mcp = pi | PPCF_T(left); 1441 *--as->mcp = pi | PPCF_T(left);
1528} 1442}
1529 1443
1530static void asm_bitand(ASMState *as, IRIns *ir) 1444static void asm_band(ASMState *as, IRIns *ir)
1531{ 1445{
1532 Reg dest, left, right; 1446 Reg dest, left, right;
1533 IRRef lref = ir->op1; 1447 IRRef lref = ir->op1;
@@ -1582,6 +1496,35 @@ static void asm_bitand(ASMState *as, IRIns *ir)
1582 emit_asb(as, PPCI_AND ^ dot, dest, left, right); 1496 emit_asb(as, PPCI_AND ^ dot, dest, left, right);
1583} 1497}
1584 1498
1499static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1500{
1501 Reg dest = ra_dest(as, ir, RSET_GPR);
1502 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1503 if (irref_isk(ir->op2)) {
1504 int32_t k = IR(ir->op2)->i;
1505 Reg tmp = left;
1506 if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
1507 if (!checku16(k)) {
1508 emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
1509 if ((k & 0xffff) == 0) return;
1510 }
1511 emit_asi(as, pik, dest, left, k);
1512 return;
1513 }
1514 }
1515 /* May fail due to spills/restores above, but simplifies the logic. */
1516 if (as->flagmcp == as->mcp) {
1517 as->flagmcp = NULL;
1518 as->mcp++;
1519 pi |= PPCF_DOT;
1520 }
1521 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1522 emit_asb(as, pi, dest, left, right);
1523}
1524
1525#define asm_bor(as, ir) asm_bitop(as, ir, PPCI_OR, PPCI_ORI)
1526#define asm_bxor(as, ir) asm_bitop(as, ir, PPCI_XOR, PPCI_XORI)
1527
1585static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) 1528static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1586{ 1529{
1587 Reg dest, left; 1530 Reg dest, left;
@@ -1607,6 +1550,14 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1607 } 1550 }
1608} 1551}
1609 1552
1553#define asm_bshl(as, ir) asm_bitshift(as, ir, PPCI_SLW, 0)
1554#define asm_bshr(as, ir) asm_bitshift(as, ir, PPCI_SRW, 1)
1555#define asm_bsar(as, ir) asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI)
1556#define asm_brol(as, ir) \
1557 asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), \
1558 PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31))
1559#define asm_bror(as, ir) lua_assert(0)
1560
1610static void asm_min_max(ASMState *as, IRIns *ir, int ismax) 1561static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1611{ 1562{
1612 if (irt_isnum(ir->t)) { 1563 if (irt_isnum(ir->t)) {
@@ -1637,6 +1588,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1637 } 1588 }
1638} 1589}
1639 1590
1591#define asm_min(as, ir) asm_min_max(as, ir, 0)
1592#define asm_max(as, ir) asm_min_max(as, ir, 1)
1593
1640/* -- Comparisons --------------------------------------------------------- */ 1594/* -- Comparisons --------------------------------------------------------- */
1641 1595
1642#define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */ 1596#define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */
@@ -1713,6 +1667,8 @@ static void asm_comp(ASMState *as, IRIns *ir)
1713 } 1667 }
1714} 1668}
1715 1669
1670#define asm_equal(as, ir) asm_comp(as, ir)
1671
1716#if LJ_HASFFI 1672#if LJ_HASFFI
1717/* 64 bit integer comparisons. */ 1673/* 64 bit integer comparisons. */
1718static void asm_comp64(ASMState *as, IRIns *ir) 1674static void asm_comp64(ASMState *as, IRIns *ir)
@@ -1758,8 +1714,8 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1758 } else if ((ir-1)->o == IR_XSTORE) { 1714 } else if ((ir-1)->o == IR_XSTORE) {
1759 as->curins--; /* Handle both stores here. */ 1715 as->curins--; /* Handle both stores here. */
1760 if ((ir-1)->r != RID_SINK) { 1716 if ((ir-1)->r != RID_SINK) {
1761 asm_xstore(as, ir, 0); 1717 asm_xstore_(as, ir, 0);
1762 asm_xstore(as, ir-1, 4); 1718 asm_xstore_(as, ir-1, 4);
1763 } 1719 }
1764 return; 1720 return;
1765 } 1721 }
@@ -1783,6 +1739,17 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1783#endif 1739#endif
1784} 1740}
1785 1741
1742/* -- Profiling ----------------------------------------------------------- */
1743
1744static void asm_prof(ASMState *as, IRIns *ir)
1745{
1746 UNUSED(ir);
1747 asm_guardcc(as, CC_NE);
1748 emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, HOOK_PROFILE);
1749 emit_lsglptr(as, PPCI_LBZ, RID_TMP,
1750 (int32_t)offsetof(global_State, hookmask));
1751}
1752
1786/* -- Stack handling ------------------------------------------------------ */ 1753/* -- Stack handling ------------------------------------------------------ */
1787 1754
1788/* Check Lua stack size for overflow. Use exit handler as fallback. */ 1755/* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -1804,7 +1771,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1804 emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack)); 1771 emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack));
1805 if (pbase == RID_TMP) 1772 if (pbase == RID_TMP)
1806 emit_getgl(as, RID_TMP, jit_base); 1773 emit_getgl(as, RID_TMP, jit_base);
1807 emit_getgl(as, tmp, jit_L); 1774 emit_getgl(as, tmp, cur_L);
1808 if (allow == RSET_EMPTY) /* Spill temp. register. */ 1775 if (allow == RSET_EMPTY) /* Spill temp. register. */
1809 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW); 1776 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW);
1810} 1777}
@@ -1965,136 +1932,13 @@ static void asm_tail_prep(ASMState *as)
1965 } 1932 }
1966} 1933}
1967 1934
1968/* -- Instruction dispatch ------------------------------------------------ */
1969
1970/* Assemble a single instruction. */
1971static void asm_ir(ASMState *as, IRIns *ir)
1972{
1973 switch ((IROp)ir->o) {
1974 /* Miscellaneous ops. */
1975 case IR_LOOP: asm_loop(as); break;
1976 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
1977 case IR_USE:
1978 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1979 case IR_PHI: asm_phi(as, ir); break;
1980 case IR_HIOP: asm_hiop(as, ir); break;
1981 case IR_GCSTEP: asm_gcstep(as, ir); break;
1982
1983 /* Guarded assertions. */
1984 case IR_EQ: case IR_NE:
1985 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
1986 as->curins--;
1987 asm_href(as, ir-1, (IROp)ir->o);
1988 break;
1989 }
1990 /* fallthrough */
1991 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1992 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1993 case IR_ABC:
1994 asm_comp(as, ir);
1995 break;
1996
1997 case IR_RETF: asm_retf(as, ir); break;
1998
1999 /* Bit ops. */
2000 case IR_BNOT: asm_bitnot(as, ir); break;
2001 case IR_BSWAP: asm_bitswap(as, ir); break;
2002
2003 case IR_BAND: asm_bitand(as, ir); break;
2004 case IR_BOR: asm_bitop(as, ir, PPCI_OR, PPCI_ORI); break;
2005 case IR_BXOR: asm_bitop(as, ir, PPCI_XOR, PPCI_XORI); break;
2006
2007 case IR_BSHL: asm_bitshift(as, ir, PPCI_SLW, 0); break;
2008 case IR_BSHR: asm_bitshift(as, ir, PPCI_SRW, 1); break;
2009 case IR_BSAR: asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI); break;
2010 case IR_BROL: asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31),
2011 PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)); break;
2012 case IR_BROR: lua_assert(0); break;
2013
2014 /* Arithmetic ops. */
2015 case IR_ADD: asm_add(as, ir); break;
2016 case IR_SUB: asm_sub(as, ir); break;
2017 case IR_MUL: asm_mul(as, ir); break;
2018 case IR_DIV: asm_fparith(as, ir, PPCI_FDIV); break;
2019 case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
2020 case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
2021 case IR_NEG: asm_neg(as, ir); break;
2022
2023 case IR_ABS: asm_fpunary(as, ir, PPCI_FABS); break;
2024 case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
2025 case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
2026 case IR_MIN: asm_min_max(as, ir, 0); break;
2027 case IR_MAX: asm_min_max(as, ir, 1); break;
2028 case IR_FPMATH:
2029 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
2030 break;
2031 if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
2032 asm_fpunary(as, ir, PPCI_FSQRT);
2033 else
2034 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
2035 break;
2036
2037 /* Overflow-checking arithmetic ops. */
2038 case IR_ADDOV: asm_arithov(as, ir, PPCI_ADDO); break;
2039 case IR_SUBOV: asm_arithov(as, ir, PPCI_SUBFO); break;
2040 case IR_MULOV: asm_arithov(as, ir, PPCI_MULLWO); break;
2041
2042 /* Memory references. */
2043 case IR_AREF: asm_aref(as, ir); break;
2044 case IR_HREF: asm_href(as, ir, 0); break;
2045 case IR_HREFK: asm_hrefk(as, ir); break;
2046 case IR_NEWREF: asm_newref(as, ir); break;
2047 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
2048 case IR_FREF: asm_fref(as, ir); break;
2049 case IR_STRREF: asm_strref(as, ir); break;
2050
2051 /* Loads and stores. */
2052 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2053 asm_ahuvload(as, ir);
2054 break;
2055 case IR_FLOAD: asm_fload(as, ir); break;
2056 case IR_XLOAD: asm_xload(as, ir); break;
2057 case IR_SLOAD: asm_sload(as, ir); break;
2058
2059 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
2060 case IR_FSTORE: asm_fstore(as, ir); break;
2061 case IR_XSTORE: asm_xstore(as, ir, 0); break;
2062
2063 /* Allocations. */
2064 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
2065 case IR_TNEW: asm_tnew(as, ir); break;
2066 case IR_TDUP: asm_tdup(as, ir); break;
2067 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2068
2069 /* Write barriers. */
2070 case IR_TBAR: asm_tbar(as, ir); break;
2071 case IR_OBAR: asm_obar(as, ir); break;
2072
2073 /* Type conversions. */
2074 case IR_CONV: asm_conv(as, ir); break;
2075 case IR_TOBIT: asm_tobit(as, ir); break;
2076 case IR_TOSTR: asm_tostr(as, ir); break;
2077 case IR_STRTO: asm_strto(as, ir); break;
2078
2079 /* Calls. */
2080 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
2081 case IR_CALLXS: asm_callx(as, ir); break;
2082 case IR_CARG: break;
2083
2084 default:
2085 setintV(&as->J->errinfo, ir->o);
2086 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
2087 break;
2088 }
2089}
2090
2091/* -- Trace setup --------------------------------------------------------- */ 1935/* -- Trace setup --------------------------------------------------------- */
2092 1936
2093/* Ensure there are enough stack slots for call arguments. */ 1937/* Ensure there are enough stack slots for call arguments. */
2094static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) 1938static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
2095{ 1939{
2096 IRRef args[CCI_NARGS_MAX*2]; 1940 IRRef args[CCI_NARGS_MAX*2];
2097 uint32_t i, nargs = (int)CCI_NARGS(ci); 1941 uint32_t i, nargs = CCI_XNARGS(ci);
2098 int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; 1942 int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
2099 asm_collectargs(as, ir, ci, args); 1943 asm_collectargs(as, ir, ci, args);
2100 for (i = 0; i < nargs; i++) 1944 for (i = 0; i < nargs; i++)
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index fda911e5..3e87ba18 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -384,7 +384,7 @@ static Reg asm_fuseloadm(ASMState *as, IRRef ref, RegSet allow, int is64)
384/* Count the required number of stack slots for a call. */ 384/* Count the required number of stack slots for a call. */
385static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args) 385static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
386{ 386{
387 uint32_t i, nargs = CCI_NARGS(ci); 387 uint32_t i, nargs = CCI_XNARGS(ci);
388 int nslots = 0; 388 int nslots = 0;
389#if LJ_64 389#if LJ_64
390 if (LJ_ABI_WIN) { 390 if (LJ_ABI_WIN) {
@@ -417,7 +417,7 @@ static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
417/* Generate a call to a C function. */ 417/* Generate a call to a C function. */
418static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 418static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
419{ 419{
420 uint32_t n, nargs = CCI_NARGS(ci); 420 uint32_t n, nargs = CCI_XNARGS(ci);
421 int32_t ofs = STACKARG_OFS; 421 int32_t ofs = STACKARG_OFS;
422#if LJ_64 422#if LJ_64
423 uint32_t gprs = REGARG_GPRS; 423 uint32_t gprs = REGARG_GPRS;
@@ -552,7 +552,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
552 if (ra_hasreg(dest)) { 552 if (ra_hasreg(dest)) {
553 ra_free(as, dest); 553 ra_free(as, dest);
554 ra_modified(as, dest); 554 ra_modified(as, dest);
555 emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, 555 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS,
556 dest, RID_ESP, ofs); 556 dest, RID_ESP, ofs);
557 } 557 }
558 if ((ci->flags & CCI_CASTU64)) { 558 if ((ci->flags & CCI_CASTU64)) {
@@ -576,15 +576,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
576 } 576 }
577} 577}
578 578
579static void asm_call(ASMState *as, IRIns *ir)
580{
581 IRRef args[CCI_NARGS_MAX];
582 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
583 asm_collectargs(as, ir, ci, args);
584 asm_setupresult(as, ir, ci);
585 asm_gencall(as, ci, args);
586}
587
588/* Return a constant function pointer or NULL for indirect calls. */ 579/* Return a constant function pointer or NULL for indirect calls. */
589static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func) 580static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func)
590{ 581{
@@ -664,8 +655,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
664 asm_guardcc(as, CC_NE); 655 asm_guardcc(as, CC_NE);
665 emit_rr(as, XO_UCOMISD, left, tmp); 656 emit_rr(as, XO_UCOMISD, left, tmp);
666 emit_rr(as, XO_CVTSI2SD, tmp, dest); 657 emit_rr(as, XO_CVTSI2SD, tmp, dest);
667 if (!(as->flags & JIT_F_SPLIT_XMM)) 658 emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
668 emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
669 emit_rr(as, XO_CVTTSD2SI, dest, left); 659 emit_rr(as, XO_CVTTSD2SI, dest, left);
670 /* Can't fuse since left is needed twice. */ 660 /* Can't fuse since left is needed twice. */
671} 661}
@@ -721,8 +711,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
721 emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS, 711 emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS,
722 dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left); 712 dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left);
723 } 713 }
724 if (!(as->flags & JIT_F_SPLIT_XMM)) 714 emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */
725 emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */
726 } else if (stfp) { /* FP to integer conversion. */ 715 } else if (stfp) { /* FP to integer conversion. */
727 if (irt_isguard(ir->t)) { 716 if (irt_isguard(ir->t)) {
728 /* Checked conversions are only supported from number to int. */ 717 /* Checked conversions are only supported from number to int. */
@@ -730,9 +719,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
730 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); 719 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
731 } else { 720 } else {
732 Reg dest = ra_dest(as, ir, RSET_GPR); 721 Reg dest = ra_dest(as, ir, RSET_GPR);
733 x86Op op = st == IRT_NUM ? 722 x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI;
734 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) :
735 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI);
736 if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) { 723 if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) {
737 /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */ 724 /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */
738 /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */ 725 /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */
@@ -826,8 +813,7 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir)
826 if (ra_hasreg(dest)) { 813 if (ra_hasreg(dest)) {
827 ra_free(as, dest); 814 ra_free(as, dest);
828 ra_modified(as, dest); 815 ra_modified(as, dest);
829 emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, 816 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, dest, RID_ESP, ofs);
830 dest, RID_ESP, ofs);
831 } 817 }
832 emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, 818 emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd,
833 irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); 819 irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs);
@@ -855,7 +841,6 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
855 Reg lo, hi; 841 Reg lo, hi;
856 lua_assert(st == IRT_NUM || st == IRT_FLOAT); 842 lua_assert(st == IRT_NUM || st == IRT_FLOAT);
857 lua_assert(dt == IRT_I64 || dt == IRT_U64); 843 lua_assert(dt == IRT_I64 || dt == IRT_U64);
858 lua_assert(((ir-1)->op2 & IRCONV_TRUNC));
859 hi = ra_dest(as, ir, RSET_GPR); 844 hi = ra_dest(as, ir, RSET_GPR);
860 lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi)); 845 lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi));
861 if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0); 846 if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0);
@@ -898,6 +883,14 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
898 st == IRT_NUM ? XOg_FLDq: XOg_FLDd, 883 st == IRT_NUM ? XOg_FLDq: XOg_FLDd,
899 asm_fuseload(as, ir->op1, RSET_EMPTY)); 884 asm_fuseload(as, ir->op1, RSET_EMPTY));
900} 885}
886
887static void asm_conv64(ASMState *as, IRIns *ir)
888{
889 if (irt_isfp(ir->t))
890 asm_conv_fp_int64(as, ir);
891 else
892 asm_conv_int64_fp(as, ir);
893}
901#endif 894#endif
902 895
903static void asm_strto(ASMState *as, IRIns *ir) 896static void asm_strto(ASMState *as, IRIns *ir)
@@ -919,29 +912,32 @@ static void asm_strto(ASMState *as, IRIns *ir)
919 RID_ESP, sps_scale(ir->s)); 912 RID_ESP, sps_scale(ir->s));
920} 913}
921 914
922static void asm_tostr(ASMState *as, IRIns *ir) 915/* -- Memory references --------------------------------------------------- */
916
917/* Get pointer to TValue. */
918static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
923{ 919{
924 IRIns *irl = IR(ir->op1); 920 IRIns *ir = IR(ref);
925 IRRef args[2]; 921 if (irt_isnum(ir->t)) {
926 args[0] = ASMREF_L; 922 /* For numbers use the constant itself or a spill slot as a TValue. */
927 as->gcsteps++; 923 if (irref_isk(ref))
928 if (irt_isnum(irl->t)) { 924 emit_loada(as, dest, ir_knum(ir));
929 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; 925 else
930 args[1] = ASMREF_TMP1; /* const lua_Number * */ 926 emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir));
931 asm_setupresult(as, ir, ci); /* GCstr * */
932 asm_gencall(as, ci, args);
933 emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64,
934 RID_ESP, ra_spill(as, irl));
935 } else { 927 } else {
936 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; 928 /* Otherwise use g->tmptv to hold the TValue. */
937 args[1] = ir->op1; /* int32_t k */ 929 if (!irref_isk(ref)) {
938 asm_setupresult(as, ir, ci); /* GCstr * */ 930 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
939 asm_gencall(as, ci, args); 931 emit_movtomro(as, REX_64IR(ir, src), dest, 0);
932 } else if (!irt_ispri(ir->t)) {
933 emit_movmroi(as, dest, 0, ir->i);
934 }
935 if (!(LJ_64 && irt_islightud(ir->t)))
936 emit_movmroi(as, dest, 4, irt_toitype(ir->t));
937 emit_loada(as, dest, &J2G(as->J)->tmptv);
940 } 938 }
941} 939}
942 940
943/* -- Memory references --------------------------------------------------- */
944
945static void asm_aref(ASMState *as, IRIns *ir) 941static void asm_aref(ASMState *as, IRIns *ir)
946{ 942{
947 Reg dest = ra_dest(as, ir, RSET_GPR); 943 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -952,23 +948,6 @@ static void asm_aref(ASMState *as, IRIns *ir)
952 emit_rr(as, XO_MOV, dest, as->mrm.base); 948 emit_rr(as, XO_MOV, dest, as->mrm.base);
953} 949}
954 950
955/* Merge NE(HREF, niltv) check. */
956static MCode *merge_href_niltv(ASMState *as, IRIns *ir)
957{
958 /* Assumes nothing else generates NE of HREF. */
959 if ((ir[1].o == IR_NE || ir[1].o == IR_EQ) && ir[1].op1 == as->curins &&
960 ra_hasreg(ir->r)) {
961 MCode *p = as->mcp;
962 p += (LJ_64 && *p != XI_ARITHi) ? 7+6 : 6+6;
963 /* Ensure no loop branch inversion happened. */
964 if (p[-6] == 0x0f && p[-5] == XI_JCCn+(CC_NE^(ir[1].o & 1))) {
965 as->mcp = p; /* Kill cmp reg, imm32 + jz exit. */
966 return p + *(int32_t *)(p-4); /* Return exit address. */
967 }
968 }
969 return NULL;
970}
971
972/* Inlined hash lookup. Specialized for key type and for const keys. 951/* Inlined hash lookup. Specialized for key type and for const keys.
973** The equivalent C code is: 952** The equivalent C code is:
974** Node *n = hashkey(t, key); 953** Node *n = hashkey(t, key);
@@ -977,10 +956,10 @@ static MCode *merge_href_niltv(ASMState *as, IRIns *ir)
977** } while ((n = nextnode(n))); 956** } while ((n = nextnode(n)));
978** return niltv(L); 957** return niltv(L);
979*/ 958*/
980static void asm_href(ASMState *as, IRIns *ir) 959static void asm_href(ASMState *as, IRIns *ir, IROp merge)
981{ 960{
982 MCode *nilexit = merge_href_niltv(as, ir); /* Do this before any restores. */
983 RegSet allow = RSET_GPR; 961 RegSet allow = RSET_GPR;
962 int destused = ra_used(ir);
984 Reg dest = ra_dest(as, ir, allow); 963 Reg dest = ra_dest(as, ir, allow);
985 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); 964 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
986 Reg key = RID_NONE, tmp = RID_NONE; 965 Reg key = RID_NONE, tmp = RID_NONE;
@@ -997,14 +976,12 @@ static void asm_href(ASMState *as, IRIns *ir)
997 tmp = ra_scratch(as, rset_exclude(allow, key)); 976 tmp = ra_scratch(as, rset_exclude(allow, key));
998 } 977 }
999 978
1000 /* Key not found in chain: jump to exit (if merged with NE) or load niltv. */ 979 /* Key not found in chain: jump to exit (if merged) or load niltv. */
1001 l_end = emit_label(as); 980 l_end = emit_label(as);
1002 if (nilexit && ir[1].o == IR_NE) { 981 if (merge == IR_NE)
1003 emit_jcc(as, CC_E, nilexit); /* XI_JMP is not found by lj_asm_patchexit. */ 982 asm_guardcc(as, CC_E); /* XI_JMP is not found by lj_asm_patchexit. */
1004 nilexit = NULL; 983 else if (destused)
1005 } else {
1006 emit_loada(as, dest, niltvg(J2G(as->J))); 984 emit_loada(as, dest, niltvg(J2G(as->J)));
1007 }
1008 985
1009 /* Follow hash chain until the end. */ 986 /* Follow hash chain until the end. */
1010 l_loop = emit_sjcc_label(as, CC_NZ); 987 l_loop = emit_sjcc_label(as, CC_NZ);
@@ -1013,8 +990,8 @@ static void asm_href(ASMState *as, IRIns *ir)
1013 l_next = emit_label(as); 990 l_next = emit_label(as);
1014 991
1015 /* Type and value comparison. */ 992 /* Type and value comparison. */
1016 if (nilexit) 993 if (merge == IR_EQ)
1017 emit_jcc(as, CC_E, nilexit); 994 asm_guardcc(as, CC_E);
1018 else 995 else
1019 emit_sjcc(as, CC_E, l_end); 996 emit_sjcc(as, CC_E, l_end);
1020 if (irt_isnum(kt)) { 997 if (irt_isnum(kt)) {
@@ -1170,41 +1147,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
1170#endif 1147#endif
1171} 1148}
1172 1149
1173static void asm_newref(ASMState *as, IRIns *ir)
1174{
1175 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
1176 IRRef args[3];
1177 IRIns *irkey;
1178 Reg tmp;
1179 if (ir->r == RID_SINK)
1180 return;
1181 args[0] = ASMREF_L; /* lua_State *L */
1182 args[1] = ir->op1; /* GCtab *t */
1183 args[2] = ASMREF_TMP1; /* cTValue *key */
1184 asm_setupresult(as, ir, ci); /* TValue * */
1185 asm_gencall(as, ci, args);
1186 tmp = ra_releasetmp(as, ASMREF_TMP1);
1187 irkey = IR(ir->op2);
1188 if (irt_isnum(irkey->t)) {
1189 /* For numbers use the constant itself or a spill slot as a TValue. */
1190 if (irref_isk(ir->op2))
1191 emit_loada(as, tmp, ir_knum(irkey));
1192 else
1193 emit_rmro(as, XO_LEA, tmp|REX_64, RID_ESP, ra_spill(as, irkey));
1194 } else {
1195 /* Otherwise use g->tmptv to hold the TValue. */
1196 if (!irref_isk(ir->op2)) {
1197 Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp));
1198 emit_movtomro(as, REX_64IR(irkey, src), tmp, 0);
1199 } else if (!irt_ispri(irkey->t)) {
1200 emit_movmroi(as, tmp, 0, irkey->i);
1201 }
1202 if (!(LJ_64 && irt_islightud(irkey->t)))
1203 emit_movmroi(as, tmp, 4, irt_toitype(irkey->t));
1204 emit_loada(as, tmp, &J2G(as->J)->tmptv);
1205 }
1206}
1207
1208static void asm_uref(ASMState *as, IRIns *ir) 1150static void asm_uref(ASMState *as, IRIns *ir)
1209{ 1151{
1210 /* NYI: Check that UREFO is still open and not aliasing a slot. */ 1152 /* NYI: Check that UREFO is still open and not aliasing a slot. */
@@ -1264,7 +1206,7 @@ static void asm_fxload(ASMState *as, IRIns *ir)
1264 case IRT_U8: xo = XO_MOVZXb; break; 1206 case IRT_U8: xo = XO_MOVZXb; break;
1265 case IRT_I16: xo = XO_MOVSXw; break; 1207 case IRT_I16: xo = XO_MOVSXw; break;
1266 case IRT_U16: xo = XO_MOVZXw; break; 1208 case IRT_U16: xo = XO_MOVZXw; break;
1267 case IRT_NUM: xo = XMM_MOVRM(as); break; 1209 case IRT_NUM: xo = XO_MOVSD; break;
1268 case IRT_FLOAT: xo = XO_MOVSS; break; 1210 case IRT_FLOAT: xo = XO_MOVSS; break;
1269 default: 1211 default:
1270 if (LJ_64 && irt_is64(ir->t)) 1212 if (LJ_64 && irt_is64(ir->t))
@@ -1277,6 +1219,9 @@ static void asm_fxload(ASMState *as, IRIns *ir)
1277 emit_mrm(as, xo, dest, RID_MRM); 1219 emit_mrm(as, xo, dest, RID_MRM);
1278} 1220}
1279 1221
1222#define asm_fload(as, ir) asm_fxload(as, ir)
1223#define asm_xload(as, ir) asm_fxload(as, ir)
1224
1280static void asm_fxstore(ASMState *as, IRIns *ir) 1225static void asm_fxstore(ASMState *as, IRIns *ir)
1281{ 1226{
1282 RegSet allow = RSET_GPR; 1227 RegSet allow = RSET_GPR;
@@ -1340,6 +1285,9 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
1340 } 1285 }
1341} 1286}
1342 1287
1288#define asm_fstore(as, ir) asm_fxstore(as, ir)
1289#define asm_xstore(as, ir) asm_fxstore(as, ir)
1290
1343#if LJ_64 1291#if LJ_64
1344static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) 1292static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
1345{ 1293{
@@ -1378,7 +1326,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
1378 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; 1326 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
1379 Reg dest = ra_dest(as, ir, allow); 1327 Reg dest = ra_dest(as, ir, allow);
1380 asm_fuseahuref(as, ir->op1, RSET_GPR); 1328 asm_fuseahuref(as, ir->op1, RSET_GPR);
1381 emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM); 1329 emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM);
1382 } else { 1330 } else {
1383 asm_fuseahuref(as, ir->op1, RSET_GPR); 1331 asm_fuseahuref(as, ir->op1, RSET_GPR);
1384 } 1332 }
@@ -1444,7 +1392,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
1444 Reg left = ra_scratch(as, RSET_FPR); 1392 Reg left = ra_scratch(as, RSET_FPR);
1445 asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ 1393 asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */
1446 base = ra_alloc1(as, REF_BASE, RSET_GPR); 1394 base = ra_alloc1(as, REF_BASE, RSET_GPR);
1447 emit_rmro(as, XMM_MOVRM(as), left, base, ofs); 1395 emit_rmro(as, XO_MOVSD, left, base, ofs);
1448 t.irt = IRT_NUM; /* Continue with a regular number type check. */ 1396 t.irt = IRT_NUM; /* Continue with a regular number type check. */
1449#if LJ_64 1397#if LJ_64
1450 } else if (irt_islightud(t)) { 1398 } else if (irt_islightud(t)) {
@@ -1462,11 +1410,9 @@ static void asm_sload(ASMState *as, IRIns *ir)
1462 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); 1410 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
1463 if ((ir->op2 & IRSLOAD_CONVERT)) { 1411 if ((ir->op2 & IRSLOAD_CONVERT)) {
1464 t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ 1412 t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */
1465 emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs); 1413 emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs);
1466 } else if (irt_isnum(t)) {
1467 emit_rmro(as, XMM_MOVRM(as), dest, base, ofs);
1468 } else { 1414 } else {
1469 emit_rmro(as, XO_MOV, dest, base, ofs); 1415 emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs);
1470 } 1416 }
1471 } else { 1417 } else {
1472 if (!(ir->op2 & IRSLOAD_TYPECHECK)) 1418 if (!(ir->op2 & IRSLOAD_TYPECHECK))
@@ -1493,15 +1439,13 @@ static void asm_sload(ASMState *as, IRIns *ir)
1493static void asm_cnew(ASMState *as, IRIns *ir) 1439static void asm_cnew(ASMState *as, IRIns *ir)
1494{ 1440{
1495 CTState *cts = ctype_ctsG(J2G(as->J)); 1441 CTState *cts = ctype_ctsG(J2G(as->J));
1496 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 1442 CTypeID id = (CTypeID)IR(ir->op1)->i;
1497 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 1443 CTSize sz;
1498 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 1444 CTInfo info = lj_ctype_info(cts, id, &sz);
1499 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1445 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1500 IRRef args[2]; 1446 IRRef args[4];
1501 lua_assert(sz != CTSIZE_INVALID); 1447 lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
1502 1448
1503 args[0] = ASMREF_L; /* lua_State *L */
1504 args[1] = ASMREF_TMP1; /* MSize size */
1505 as->gcsteps++; 1449 as->gcsteps++;
1506 asm_setupresult(as, ir, ci); /* GCcdata * */ 1450 asm_setupresult(as, ir, ci); /* GCcdata * */
1507 1451
@@ -1544,15 +1488,26 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1544 } while (1); 1488 } while (1);
1545#endif 1489#endif
1546 lua_assert(sz == 4 || sz == 8); 1490 lua_assert(sz == 4 || sz == 8);
1491 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1492 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1493 args[0] = ASMREF_L; /* lua_State *L */
1494 args[1] = ir->op1; /* CTypeID id */
1495 args[2] = ir->op2; /* CTSize sz */
1496 args[3] = ASMREF_TMP1; /* CTSize align */
1497 asm_gencall(as, ci, args);
1498 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1499 return;
1547 } 1500 }
1548 1501
1549 /* Combine initialization of marked, gct and ctypeid. */ 1502 /* Combine initialization of marked, gct and ctypeid. */
1550 emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked)); 1503 emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked));
1551 emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX, 1504 emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX,
1552 (int32_t)((~LJ_TCDATA<<8)+(ctypeid<<16))); 1505 (int32_t)((~LJ_TCDATA<<8)+(id<<16)));
1553 emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES); 1506 emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES);
1554 emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite); 1507 emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite);
1555 1508
1509 args[0] = ASMREF_L; /* lua_State *L */
1510 args[1] = ASMREF_TMP1; /* MSize size */
1556 asm_gencall(as, ci, args); 1511 asm_gencall(as, ci, args);
1557 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata))); 1512 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata)));
1558} 1513}
@@ -1630,31 +1585,21 @@ static void asm_x87load(ASMState *as, IRRef ref)
1630 } 1585 }
1631} 1586}
1632 1587
1633/* Try to rejoin pow from EXP2, MUL and LOG2 (if still unsplit). */ 1588static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
1634static int fpmjoin_pow(ASMState *as, IRIns *ir)
1635{ 1589{
1636 IRIns *irp = IR(ir->op1); 1590 /* The modified regs must match with the *.dasc implementation. */
1637 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { 1591 RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
1638 IRIns *irpp = IR(irp->op1); 1592 IRIns *irx;
1639 if (irpp == ir-2 && irpp->o == IR_FPMATH && 1593 if (ra_hasreg(ir->r))
1640 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { 1594 rset_clear(drop, ir->r); /* Dest reg handled below. */
1641 /* The modified regs must match with the *.dasc implementation. */ 1595 ra_evictset(as, drop);
1642 RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX); 1596 ra_destreg(as, ir, RID_XMM0);
1643 IRIns *irx; 1597 emit_call(as, lj_vm_pow_sse);
1644 if (ra_hasreg(ir->r)) 1598 irx = IR(lref);
1645 rset_clear(drop, ir->r); /* Dest reg handled below. */ 1599 if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1)
1646 ra_evictset(as, drop); 1600 irx->r = RID_INIT; /* Avoid allocating xmm1 for x. */
1647 ra_destreg(as, ir, RID_XMM0); 1601 ra_left(as, RID_XMM0, lref);
1648 emit_call(as, lj_vm_pow_sse); 1602 ra_left(as, RID_XMM1, rref);
1649 irx = IR(irpp->op1);
1650 if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1)
1651 irx->r = RID_INIT; /* Avoid allocating xmm1 for x. */
1652 ra_left(as, RID_XMM0, irpp->op1);
1653 ra_left(as, RID_XMM1, irp->op2);
1654 return 1;
1655 }
1656 }
1657 return 0;
1658} 1603}
1659 1604
1660static void asm_fpmath(ASMState *as, IRIns *ir) 1605static void asm_fpmath(ASMState *as, IRIns *ir)
@@ -1690,7 +1635,7 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
1690 fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse); 1635 fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse);
1691 ra_left(as, RID_XMM0, ir->op1); 1636 ra_left(as, RID_XMM0, ir->op1);
1692 } 1637 }
1693 } else if (fpm == IRFPM_EXP2 && fpmjoin_pow(as, ir)) { 1638 } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) {
1694 /* Rejoined to pow(). */ 1639 /* Rejoined to pow(). */
1695 } else { /* Handle x87 ops. */ 1640 } else { /* Handle x87 ops. */
1696 int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ 1641 int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */
@@ -1698,7 +1643,7 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
1698 if (ra_hasreg(dest)) { 1643 if (ra_hasreg(dest)) {
1699 ra_free(as, dest); 1644 ra_free(as, dest);
1700 ra_modified(as, dest); 1645 ra_modified(as, dest);
1701 emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs); 1646 emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs);
1702 } 1647 }
1703 emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); 1648 emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
1704 switch (fpm) { /* st0 = lj_vm_*(st0) */ 1649 switch (fpm) { /* st0 = lj_vm_*(st0) */
@@ -1737,6 +1682,9 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
1737 } 1682 }
1738} 1683}
1739 1684
1685#define asm_atan2(as, ir) asm_fpmath(as, ir)
1686#define asm_ldexp(as, ir) asm_fpmath(as, ir)
1687
1740static void asm_fppowi(ASMState *as, IRIns *ir) 1688static void asm_fppowi(ASMState *as, IRIns *ir)
1741{ 1689{
1742 /* The modified regs must match with the *.dasc implementation. */ 1690 /* The modified regs must match with the *.dasc implementation. */
@@ -1750,26 +1698,15 @@ static void asm_fppowi(ASMState *as, IRIns *ir)
1750 ra_left(as, RID_EAX, ir->op2); 1698 ra_left(as, RID_EAX, ir->op2);
1751} 1699}
1752 1700
1753#if LJ_64 && LJ_HASFFI 1701static void asm_pow(ASMState *as, IRIns *ir)
1754static void asm_arith64(ASMState *as, IRIns *ir, IRCallID id)
1755{ 1702{
1756 const CCallInfo *ci = &lj_ir_callinfo[id]; 1703#if LJ_64 && LJ_HASFFI
1757 IRRef args[2]; 1704 if (!irt_isnum(ir->t))
1758 args[0] = ir->op1; 1705 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
1759 args[1] = ir->op2; 1706 IRCALL_lj_carith_powu64);
1760 asm_setupresult(as, ir, ci); 1707 else
1761 asm_gencall(as, ci, args);
1762}
1763#endif 1708#endif
1764 1709 asm_fppowi(as, ir);
1765static void asm_intmod(ASMState *as, IRIns *ir)
1766{
1767 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_vm_modi];
1768 IRRef args[2];
1769 args[0] = ir->op1;
1770 args[1] = ir->op2;
1771 asm_setupresult(as, ir, ci);
1772 asm_gencall(as, ci, args);
1773} 1710}
1774 1711
1775static int asm_swapops(ASMState *as, IRIns *ir) 1712static int asm_swapops(ASMState *as, IRIns *ir)
@@ -1948,6 +1885,44 @@ static void asm_add(ASMState *as, IRIns *ir)
1948 asm_intarith(as, ir, XOg_ADD); 1885 asm_intarith(as, ir, XOg_ADD);
1949} 1886}
1950 1887
1888static void asm_sub(ASMState *as, IRIns *ir)
1889{
1890 if (irt_isnum(ir->t))
1891 asm_fparith(as, ir, XO_SUBSD);
1892 else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
1893 asm_intarith(as, ir, XOg_SUB);
1894}
1895
1896static void asm_mul(ASMState *as, IRIns *ir)
1897{
1898 if (irt_isnum(ir->t))
1899 asm_fparith(as, ir, XO_MULSD);
1900 else
1901 asm_intarith(as, ir, XOg_X_IMUL);
1902}
1903
1904static void asm_div(ASMState *as, IRIns *ir)
1905{
1906#if LJ_64 && LJ_HASFFI
1907 if (!irt_isnum(ir->t))
1908 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
1909 IRCALL_lj_carith_divu64);
1910 else
1911#endif
1912 asm_fparith(as, ir, XO_DIVSD);
1913}
1914
1915static void asm_mod(ASMState *as, IRIns *ir)
1916{
1917#if LJ_64 && LJ_HASFFI
1918 if (!irt_isint(ir->t))
1919 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
1920 IRCALL_lj_carith_modu64);
1921 else
1922#endif
1923 asm_callid(as, ir, IRCALL_lj_vm_modi);
1924}
1925
1951static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg) 1926static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
1952{ 1927{
1953 Reg dest = ra_dest(as, ir, RSET_GPR); 1928 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1955,7 +1930,17 @@ static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
1955 ra_left(as, dest, ir->op1); 1930 ra_left(as, dest, ir->op1);
1956} 1931}
1957 1932
1958static void asm_min_max(ASMState *as, IRIns *ir, int cc) 1933static void asm_neg(ASMState *as, IRIns *ir)
1934{
1935 if (irt_isnum(ir->t))
1936 asm_fparith(as, ir, XO_XORPS);
1937 else
1938 asm_neg_not(as, ir, XOg_NEG);
1939}
1940
1941#define asm_abs(as, ir) asm_fparith(as, ir, XO_ANDPS)
1942
1943static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
1959{ 1944{
1960 Reg right, dest = ra_dest(as, ir, RSET_GPR); 1945 Reg right, dest = ra_dest(as, ir, RSET_GPR);
1961 IRRef lref = ir->op1, rref = ir->op2; 1946 IRRef lref = ir->op1, rref = ir->op2;
@@ -1966,7 +1951,30 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc)
1966 ra_left(as, dest, lref); 1951 ra_left(as, dest, lref);
1967} 1952}
1968 1953
1969static void asm_bitswap(ASMState *as, IRIns *ir) 1954static void asm_min(ASMState *as, IRIns *ir)
1955{
1956 if (irt_isnum(ir->t))
1957 asm_fparith(as, ir, XO_MINSD);
1958 else
1959 asm_intmin_max(as, ir, CC_G);
1960}
1961
1962static void asm_max(ASMState *as, IRIns *ir)
1963{
1964 if (irt_isnum(ir->t))
1965 asm_fparith(as, ir, XO_MAXSD);
1966 else
1967 asm_intmin_max(as, ir, CC_L);
1968}
1969
1970/* Note: don't use LEA for overflow-checking arithmetic! */
1971#define asm_addov(as, ir) asm_intarith(as, ir, XOg_ADD)
1972#define asm_subov(as, ir) asm_intarith(as, ir, XOg_SUB)
1973#define asm_mulov(as, ir) asm_intarith(as, ir, XOg_X_IMUL)
1974
1975#define asm_bnot(as, ir) asm_neg_not(as, ir, XOg_NOT)
1976
1977static void asm_bswap(ASMState *as, IRIns *ir)
1970{ 1978{
1971 Reg dest = ra_dest(as, ir, RSET_GPR); 1979 Reg dest = ra_dest(as, ir, RSET_GPR);
1972 as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24), 1980 as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24),
@@ -1974,6 +1982,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1974 ra_left(as, dest, ir->op1); 1982 ra_left(as, dest, ir->op1);
1975} 1983}
1976 1984
1985#define asm_band(as, ir) asm_intarith(as, ir, XOg_AND)
1986#define asm_bor(as, ir) asm_intarith(as, ir, XOg_OR)
1987#define asm_bxor(as, ir) asm_intarith(as, ir, XOg_XOR)
1988
1977static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) 1989static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
1978{ 1990{
1979 IRRef rref = ir->op2; 1991 IRRef rref = ir->op2;
@@ -2013,6 +2025,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
2013 */ 2025 */
2014} 2026}
2015 2027
2028#define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL)
2029#define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR)
2030#define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR)
2031#define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL)
2032#define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR)
2033
2016/* -- Comparisons --------------------------------------------------------- */ 2034/* -- Comparisons --------------------------------------------------------- */
2017 2035
2018/* Virtual flags for unordered FP comparisons. */ 2036/* Virtual flags for unordered FP comparisons. */
@@ -2039,8 +2057,9 @@ static const uint16_t asm_compmap[IR_ABC+1] = {
2039}; 2057};
2040 2058
2041/* FP and integer comparisons. */ 2059/* FP and integer comparisons. */
2042static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) 2060static void asm_comp(ASMState *as, IRIns *ir)
2043{ 2061{
2062 uint32_t cc = asm_compmap[ir->o];
2044 if (irt_isnum(ir->t)) { 2063 if (irt_isnum(ir->t)) {
2045 IRRef lref = ir->op1; 2064 IRRef lref = ir->op1;
2046 IRRef rref = ir->op2; 2065 IRRef rref = ir->op2;
@@ -2195,6 +2214,8 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc)
2195 } 2214 }
2196} 2215}
2197 2216
2217#define asm_equal(as, ir) asm_comp(as, ir)
2218
2198#if LJ_32 && LJ_HASFFI 2219#if LJ_32 && LJ_HASFFI
2199/* 64 bit integer comparisons in 32 bit mode. */ 2220/* 64 bit integer comparisons in 32 bit mode. */
2200static void asm_comp_int64(ASMState *as, IRIns *ir) 2221static void asm_comp_int64(ASMState *as, IRIns *ir)
@@ -2277,13 +2298,9 @@ static void asm_hiop(ASMState *as, IRIns *ir)
2277 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ 2298 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
2278 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; 2299 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
2279 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ 2300 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */
2280 if (usehi || uselo) {
2281 if (irt_isfp(ir->t))
2282 asm_conv_fp_int64(as, ir);
2283 else
2284 asm_conv_int64_fp(as, ir);
2285 }
2286 as->curins--; /* Always skip the CONV. */ 2301 as->curins--; /* Always skip the CONV. */
2302 if (usehi || uselo)
2303 asm_conv64(as, ir);
2287 return; 2304 return;
2288 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ 2305 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */
2289 asm_comp_int64(as, ir); 2306 asm_comp_int64(as, ir);
@@ -2332,6 +2349,16 @@ static void asm_hiop(ASMState *as, IRIns *ir)
2332#endif 2349#endif
2333} 2350}
2334 2351
2352/* -- Profiling ----------------------------------------------------------- */
2353
2354static void asm_prof(ASMState *as, IRIns *ir)
2355{
2356 UNUSED(ir);
2357 asm_guardcc(as, CC_NE);
2358 emit_i8(as, HOOK_PROFILE);
2359 emit_rma(as, XO_GROUP3b, XOg_TEST, &J2G(as->J)->hookmask);
2360}
2361
2335/* -- Stack handling ------------------------------------------------------ */ 2362/* -- Stack handling ------------------------------------------------------ */
2336 2363
2337/* Check Lua stack size for overflow. Use exit handler as fallback. */ 2364/* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -2353,7 +2380,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
2353 emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE, 2380 emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE,
2354 ptr2addr(&J2G(as->J)->jit_base)); 2381 ptr2addr(&J2G(as->J)->jit_base));
2355 emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack)); 2382 emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack));
2356 emit_getgl(as, r, jit_L); 2383 emit_getgl(as, r, cur_L);
2357 if (allow == RSET_EMPTY) /* Spill temp. register. */ 2384 if (allow == RSET_EMPTY) /* Spill temp. register. */
2358 emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0); 2385 emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0);
2359} 2386}
@@ -2581,163 +2608,6 @@ static void asm_tail_prep(ASMState *as)
2581 } 2608 }
2582} 2609}
2583 2610
2584/* -- Instruction dispatch ------------------------------------------------ */
2585
2586/* Assemble a single instruction. */
2587static void asm_ir(ASMState *as, IRIns *ir)
2588{
2589 switch ((IROp)ir->o) {
2590 /* Miscellaneous ops. */
2591 case IR_LOOP: asm_loop(as); break;
2592 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
2593 case IR_USE:
2594 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
2595 case IR_PHI: asm_phi(as, ir); break;
2596 case IR_HIOP: asm_hiop(as, ir); break;
2597 case IR_GCSTEP: asm_gcstep(as, ir); break;
2598
2599 /* Guarded assertions. */
2600 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
2601 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
2602 case IR_EQ: case IR_NE: case IR_ABC:
2603 asm_comp(as, ir, asm_compmap[ir->o]);
2604 break;
2605
2606 case IR_RETF: asm_retf(as, ir); break;
2607
2608 /* Bit ops. */
2609 case IR_BNOT: asm_neg_not(as, ir, XOg_NOT); break;
2610 case IR_BSWAP: asm_bitswap(as, ir); break;
2611
2612 case IR_BAND: asm_intarith(as, ir, XOg_AND); break;
2613 case IR_BOR: asm_intarith(as, ir, XOg_OR); break;
2614 case IR_BXOR: asm_intarith(as, ir, XOg_XOR); break;
2615
2616 case IR_BSHL: asm_bitshift(as, ir, XOg_SHL); break;
2617 case IR_BSHR: asm_bitshift(as, ir, XOg_SHR); break;
2618 case IR_BSAR: asm_bitshift(as, ir, XOg_SAR); break;
2619 case IR_BROL: asm_bitshift(as, ir, XOg_ROL); break;
2620 case IR_BROR: asm_bitshift(as, ir, XOg_ROR); break;
2621
2622 /* Arithmetic ops. */
2623 case IR_ADD: asm_add(as, ir); break;
2624 case IR_SUB:
2625 if (irt_isnum(ir->t))
2626 asm_fparith(as, ir, XO_SUBSD);
2627 else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
2628 asm_intarith(as, ir, XOg_SUB);
2629 break;
2630 case IR_MUL:
2631 if (irt_isnum(ir->t))
2632 asm_fparith(as, ir, XO_MULSD);
2633 else
2634 asm_intarith(as, ir, XOg_X_IMUL);
2635 break;
2636 case IR_DIV:
2637#if LJ_64 && LJ_HASFFI
2638 if (!irt_isnum(ir->t))
2639 asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
2640 IRCALL_lj_carith_divu64);
2641 else
2642#endif
2643 asm_fparith(as, ir, XO_DIVSD);
2644 break;
2645 case IR_MOD:
2646#if LJ_64 && LJ_HASFFI
2647 if (!irt_isint(ir->t))
2648 asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
2649 IRCALL_lj_carith_modu64);
2650 else
2651#endif
2652 asm_intmod(as, ir);
2653 break;
2654
2655 case IR_NEG:
2656 if (irt_isnum(ir->t))
2657 asm_fparith(as, ir, XO_XORPS);
2658 else
2659 asm_neg_not(as, ir, XOg_NEG);
2660 break;
2661 case IR_ABS: asm_fparith(as, ir, XO_ANDPS); break;
2662
2663 case IR_MIN:
2664 if (irt_isnum(ir->t))
2665 asm_fparith(as, ir, XO_MINSD);
2666 else
2667 asm_min_max(as, ir, CC_G);
2668 break;
2669 case IR_MAX:
2670 if (irt_isnum(ir->t))
2671 asm_fparith(as, ir, XO_MAXSD);
2672 else
2673 asm_min_max(as, ir, CC_L);
2674 break;
2675
2676 case IR_FPMATH: case IR_ATAN2: case IR_LDEXP:
2677 asm_fpmath(as, ir);
2678 break;
2679 case IR_POW:
2680#if LJ_64 && LJ_HASFFI
2681 if (!irt_isnum(ir->t))
2682 asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
2683 IRCALL_lj_carith_powu64);
2684 else
2685#endif
2686 asm_fppowi(as, ir);
2687 break;
2688
2689 /* Overflow-checking arithmetic ops. Note: don't use LEA here! */
2690 case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break;
2691 case IR_SUBOV: asm_intarith(as, ir, XOg_SUB); break;
2692 case IR_MULOV: asm_intarith(as, ir, XOg_X_IMUL); break;
2693
2694 /* Memory references. */
2695 case IR_AREF: asm_aref(as, ir); break;
2696 case IR_HREF: asm_href(as, ir); break;
2697 case IR_HREFK: asm_hrefk(as, ir); break;
2698 case IR_NEWREF: asm_newref(as, ir); break;
2699 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
2700 case IR_FREF: asm_fref(as, ir); break;
2701 case IR_STRREF: asm_strref(as, ir); break;
2702
2703 /* Loads and stores. */
2704 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2705 asm_ahuvload(as, ir);
2706 break;
2707 case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break;
2708 case IR_SLOAD: asm_sload(as, ir); break;
2709
2710 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
2711 case IR_FSTORE: case IR_XSTORE: asm_fxstore(as, ir); break;
2712
2713 /* Allocations. */
2714 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
2715 case IR_TNEW: asm_tnew(as, ir); break;
2716 case IR_TDUP: asm_tdup(as, ir); break;
2717 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2718
2719 /* Write barriers. */
2720 case IR_TBAR: asm_tbar(as, ir); break;
2721 case IR_OBAR: asm_obar(as, ir); break;
2722
2723 /* Type conversions. */
2724 case IR_TOBIT: asm_tobit(as, ir); break;
2725 case IR_CONV: asm_conv(as, ir); break;
2726 case IR_TOSTR: asm_tostr(as, ir); break;
2727 case IR_STRTO: asm_strto(as, ir); break;
2728
2729 /* Calls. */
2730 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
2731 case IR_CALLXS: asm_callx(as, ir); break;
2732 case IR_CARG: break;
2733
2734 default:
2735 setintV(&as->J->errinfo, ir->o);
2736 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
2737 break;
2738 }
2739}
2740
2741/* -- Trace setup --------------------------------------------------------- */ 2611/* -- Trace setup --------------------------------------------------------- */
2742 2612
2743/* Ensure there are enough stack slots for call arguments. */ 2613/* Ensure there are enough stack slots for call arguments. */
diff --git a/src/lj_bc.h b/src/lj_bc.h
index 1a4ade31..826a304c 100644
--- a/src/lj_bc.h
+++ b/src/lj_bc.h
@@ -89,6 +89,8 @@
89 _(ISFC, dst, ___, var, ___) \ 89 _(ISFC, dst, ___, var, ___) \
90 _(IST, ___, ___, var, ___) \ 90 _(IST, ___, ___, var, ___) \
91 _(ISF, ___, ___, var, ___) \ 91 _(ISF, ___, ___, var, ___) \
92 _(ISTYPE, var, ___, lit, ___) \
93 _(ISNUM, var, ___, lit, ___) \
92 \ 94 \
93 /* Unary ops. */ \ 95 /* Unary ops. */ \
94 _(MOV, dst, ___, var, ___) \ 96 _(MOV, dst, ___, var, ___) \
@@ -143,10 +145,12 @@
143 _(TGETV, dst, var, var, index) \ 145 _(TGETV, dst, var, var, index) \
144 _(TGETS, dst, var, str, index) \ 146 _(TGETS, dst, var, str, index) \
145 _(TGETB, dst, var, lit, index) \ 147 _(TGETB, dst, var, lit, index) \
148 _(TGETR, dst, var, var, index) \
146 _(TSETV, var, var, var, newindex) \ 149 _(TSETV, var, var, var, newindex) \
147 _(TSETS, var, var, str, newindex) \ 150 _(TSETS, var, var, str, newindex) \
148 _(TSETB, var, var, lit, newindex) \ 151 _(TSETB, var, var, lit, newindex) \
149 _(TSETM, base, ___, num, newindex) \ 152 _(TSETM, base, ___, num, newindex) \
153 _(TSETR, var, var, var, newindex) \
150 \ 154 \
151 /* Calls and vararg handling. T = tail call. */ \ 155 /* Calls and vararg handling. T = tail call. */ \
152 _(CALLM, base, lit, lit, call) \ 156 _(CALLM, base, lit, lit, call) \
diff --git a/src/lj_bcdump.h b/src/lj_bcdump.h
index 77a789e9..940ed147 100644
--- a/src/lj_bcdump.h
+++ b/src/lj_bcdump.h
@@ -36,7 +36,7 @@
36/* If you perform *any* kind of private modifications to the bytecode itself 36/* If you perform *any* kind of private modifications to the bytecode itself
37** or to the dump format, you *must* set BCDUMP_VERSION to 0x80 or higher. 37** or to the dump format, you *must* set BCDUMP_VERSION to 0x80 or higher.
38*/ 38*/
39#define BCDUMP_VERSION 1 39#define BCDUMP_VERSION 2
40 40
41/* Compatibility flags. */ 41/* Compatibility flags. */
42#define BCDUMP_F_BE 0x01 42#define BCDUMP_F_BE 0x01
@@ -61,6 +61,7 @@ enum {
61 61
62LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, 62LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer,
63 void *data, int strip); 63 void *data, int strip);
64LJ_FUNC GCproto *lj_bcread_proto(LexState *ls);
64LJ_FUNC GCproto *lj_bcread(LexState *ls); 65LJ_FUNC GCproto *lj_bcread(LexState *ls);
65 66
66#endif 67#endif
diff --git a/src/lj_bcread.c b/src/lj_bcread.c
index c0baece4..2360bf40 100644
--- a/src/lj_bcread.c
+++ b/src/lj_bcread.c
@@ -9,6 +9,7 @@
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_buf.h"
12#include "lj_str.h" 13#include "lj_str.h"
13#include "lj_tab.h" 14#include "lj_tab.h"
14#include "lj_bc.h" 15#include "lj_bc.h"
@@ -20,6 +21,7 @@
20#include "lj_lex.h" 21#include "lj_lex.h"
21#include "lj_bcdump.h" 22#include "lj_bcdump.h"
22#include "lj_state.h" 23#include "lj_state.h"
24#include "lj_strfmt.h"
23 25
24/* Reuse some lexer fields for our own purposes. */ 26/* Reuse some lexer fields for our own purposes. */
25#define bcread_flags(ls) ls->level 27#define bcread_flags(ls) ls->level
@@ -38,84 +40,73 @@ static LJ_NOINLINE void bcread_error(LexState *ls, ErrMsg em)
38 const char *name = ls->chunkarg; 40 const char *name = ls->chunkarg;
39 if (*name == BCDUMP_HEAD1) name = "(binary)"; 41 if (*name == BCDUMP_HEAD1) name = "(binary)";
40 else if (*name == '@' || *name == '=') name++; 42 else if (*name == '@' || *name == '=') name++;
41 lj_str_pushf(L, "%s: %s", name, err2msg(em)); 43 lj_strfmt_pushf(L, "%s: %s", name, err2msg(em));
42 lj_err_throw(L, LUA_ERRSYNTAX); 44 lj_err_throw(L, LUA_ERRSYNTAX);
43} 45}
44 46
45/* Resize input buffer. */ 47/* Refill buffer. */
46static void bcread_resize(LexState *ls, MSize len)
47{
48 if (ls->sb.sz < len) {
49 MSize sz = ls->sb.sz * 2;
50 while (len > sz) sz = sz * 2;
51 lj_str_resizebuf(ls->L, &ls->sb, sz);
52 /* Caveat: this may change ls->sb.buf which may affect ls->p. */
53 }
54}
55
56/* Refill buffer if needed. */
57static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need) 48static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
58{ 49{
59 lua_assert(len != 0); 50 lua_assert(len != 0);
60 if (len > LJ_MAX_MEM || ls->current < 0) 51 if (len > LJ_MAX_MEM || ls->c < 0)
61 bcread_error(ls, LJ_ERR_BCBAD); 52 bcread_error(ls, LJ_ERR_BCBAD);
62 do { 53 do {
63 const char *buf; 54 const char *buf;
64 size_t size; 55 size_t sz;
65 if (ls->n) { /* Copy remainder to buffer. */ 56 char *p = sbufB(&ls->sb);
66 if (ls->sb.n) { /* Move down in buffer. */ 57 MSize n = (MSize)(ls->pe - ls->p);
67 lua_assert(ls->p + ls->n == ls->sb.buf + ls->sb.n); 58 if (n) { /* Copy remainder to buffer. */
68 if (ls->n != ls->sb.n) 59 if (sbuflen(&ls->sb)) { /* Move down in buffer. */
69 memmove(ls->sb.buf, ls->p, ls->n); 60 lua_assert(ls->pe == sbufP(&ls->sb));
61 if (ls->p != p) memmove(p, ls->p, n);
70 } else { /* Copy from buffer provided by reader. */ 62 } else { /* Copy from buffer provided by reader. */
71 bcread_resize(ls, len); 63 p = lj_buf_need(&ls->sb, len);
72 memcpy(ls->sb.buf, ls->p, ls->n); 64 memcpy(p, ls->p, n);
73 } 65 }
74 ls->p = ls->sb.buf; 66 ls->p = p;
67 ls->pe = p + n;
75 } 68 }
76 ls->sb.n = ls->n; 69 setsbufP(&ls->sb, p + n);
77 buf = ls->rfunc(ls->L, ls->rdata, &size); /* Get more data from reader. */ 70 buf = ls->rfunc(ls->L, ls->rdata, &sz); /* Get more data from reader. */
78 if (buf == NULL || size == 0) { /* EOF? */ 71 if (buf == NULL || sz == 0) { /* EOF? */
79 if (need) bcread_error(ls, LJ_ERR_BCBAD); 72 if (need) bcread_error(ls, LJ_ERR_BCBAD);
80 ls->current = -1; /* Only bad if we get called again. */ 73 ls->c = -1; /* Only bad if we get called again. */
81 break; 74 break;
82 } 75 }
83 if (ls->sb.n) { /* Append to buffer. */ 76 if (n) { /* Append to buffer. */
84 MSize n = ls->sb.n + (MSize)size; 77 n += (MSize)sz;
85 bcread_resize(ls, n < len ? len : n); 78 p = lj_buf_need(&ls->sb, n < len ? len : n);
86 memcpy(ls->sb.buf + ls->sb.n, buf, size); 79 memcpy(sbufP(&ls->sb), buf, sz);
87 ls->n = ls->sb.n = n; 80 setsbufP(&ls->sb, p + n);
88 ls->p = ls->sb.buf; 81 ls->p = p;
82 ls->pe = p + n;
89 } else { /* Return buffer provided by reader. */ 83 } else { /* Return buffer provided by reader. */
90 ls->n = (MSize)size;
91 ls->p = buf; 84 ls->p = buf;
85 ls->pe = buf + sz;
92 } 86 }
93 } while (ls->n < len); 87 } while (ls->p + len > ls->pe);
94} 88}
95 89
96/* Need a certain number of bytes. */ 90/* Need a certain number of bytes. */
97static LJ_AINLINE void bcread_need(LexState *ls, MSize len) 91static LJ_AINLINE void bcread_need(LexState *ls, MSize len)
98{ 92{
99 if (LJ_UNLIKELY(ls->n < len)) 93 if (LJ_UNLIKELY(ls->p + len > ls->pe))
100 bcread_fill(ls, len, 1); 94 bcread_fill(ls, len, 1);
101} 95}
102 96
103/* Want to read up to a certain number of bytes, but may need less. */ 97/* Want to read up to a certain number of bytes, but may need less. */
104static LJ_AINLINE void bcread_want(LexState *ls, MSize len) 98static LJ_AINLINE void bcread_want(LexState *ls, MSize len)
105{ 99{
106 if (LJ_UNLIKELY(ls->n < len)) 100 if (LJ_UNLIKELY(ls->p + len > ls->pe))
107 bcread_fill(ls, len, 0); 101 bcread_fill(ls, len, 0);
108} 102}
109 103
110#define bcread_dec(ls) check_exp(ls->n > 0, ls->n--)
111#define bcread_consume(ls, len) check_exp(ls->n >= (len), ls->n -= (len))
112
113/* Return memory block from buffer. */ 104/* Return memory block from buffer. */
114static uint8_t *bcread_mem(LexState *ls, MSize len) 105static LJ_AINLINE uint8_t *bcread_mem(LexState *ls, MSize len)
115{ 106{
116 uint8_t *p = (uint8_t *)ls->p; 107 uint8_t *p = (uint8_t *)ls->p;
117 bcread_consume(ls, len); 108 ls->p += len;
118 ls->p = (char *)p + len; 109 lua_assert(ls->p <= ls->pe);
119 return p; 110 return p;
120} 111}
121 112
@@ -128,25 +119,15 @@ static void bcread_block(LexState *ls, void *q, MSize len)
128/* Read byte from buffer. */ 119/* Read byte from buffer. */
129static LJ_AINLINE uint32_t bcread_byte(LexState *ls) 120static LJ_AINLINE uint32_t bcread_byte(LexState *ls)
130{ 121{
131 bcread_dec(ls); 122 lua_assert(ls->p < ls->pe);
132 return (uint32_t)(uint8_t)*ls->p++; 123 return (uint32_t)(uint8_t)*ls->p++;
133} 124}
134 125
135/* Read ULEB128 value from buffer. */ 126/* Read ULEB128 value from buffer. */
136static uint32_t bcread_uleb128(LexState *ls) 127static LJ_AINLINE uint32_t bcread_uleb128(LexState *ls)
137{ 128{
138 const uint8_t *p = (const uint8_t *)ls->p; 129 uint32_t v = lj_buf_ruleb128(&ls->p);
139 uint32_t v = *p++; 130 lua_assert(ls->p <= ls->pe);
140 if (LJ_UNLIKELY(v >= 0x80)) {
141 int sh = 0;
142 v &= 0x7f;
143 do {
144 v |= ((*p & 0x7f) << (sh += 7));
145 bcread_dec(ls);
146 } while (*p++ >= 0x80);
147 }
148 bcread_dec(ls);
149 ls->p = (char *)p;
150 return v; 131 return v;
151} 132}
152 133
@@ -160,11 +141,10 @@ static uint32_t bcread_uleb128_33(LexState *ls)
160 v &= 0x3f; 141 v &= 0x3f;
161 do { 142 do {
162 v |= ((*p & 0x7f) << (sh += 7)); 143 v |= ((*p & 0x7f) << (sh += 7));
163 bcread_dec(ls);
164 } while (*p++ >= 0x80); 144 } while (*p++ >= 0x80);
165 } 145 }
166 bcread_dec(ls);
167 ls->p = (char *)p; 146 ls->p = (char *)p;
147 lua_assert(ls->p <= ls->pe);
168 return v; 148 return v;
169} 149}
170 150
@@ -326,25 +306,13 @@ static void bcread_uv(LexState *ls, GCproto *pt, MSize sizeuv)
326} 306}
327 307
328/* Read a prototype. */ 308/* Read a prototype. */
329static GCproto *bcread_proto(LexState *ls) 309GCproto *lj_bcread_proto(LexState *ls)
330{ 310{
331 GCproto *pt; 311 GCproto *pt;
332 MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept; 312 MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept;
333 MSize ofsk, ofsuv, ofsdbg; 313 MSize ofsk, ofsuv, ofsdbg;
334 MSize sizedbg = 0; 314 MSize sizedbg = 0;
335 BCLine firstline = 0, numline = 0; 315 BCLine firstline = 0, numline = 0;
336 MSize len, startn;
337
338 /* Read length. */
339 if (ls->n > 0 && ls->p[0] == 0) { /* Shortcut EOF. */
340 ls->n--; ls->p++;
341 return NULL;
342 }
343 bcread_want(ls, 5);
344 len = bcread_uleb128(ls);
345 if (!len) return NULL; /* EOF */
346 bcread_need(ls, len);
347 startn = ls->n;
348 316
349 /* Read prototype header. */ 317 /* Read prototype header. */
350 flags = bcread_byte(ls); 318 flags = bcread_byte(ls);
@@ -413,9 +381,6 @@ static GCproto *bcread_proto(LexState *ls)
413 setmref(pt->uvinfo, NULL); 381 setmref(pt->uvinfo, NULL);
414 setmref(pt->varinfo, NULL); 382 setmref(pt->varinfo, NULL);
415 } 383 }
416
417 if (len != startn - ls->n)
418 bcread_error(ls, LJ_ERR_BCBAD);
419 return pt; 384 return pt;
420} 385}
421 386
@@ -455,19 +420,34 @@ static int bcread_header(LexState *ls)
455GCproto *lj_bcread(LexState *ls) 420GCproto *lj_bcread(LexState *ls)
456{ 421{
457 lua_State *L = ls->L; 422 lua_State *L = ls->L;
458 lua_assert(ls->current == BCDUMP_HEAD1); 423 lua_assert(ls->c == BCDUMP_HEAD1);
459 bcread_savetop(L, ls, L->top); 424 bcread_savetop(L, ls, L->top);
460 lj_str_resetbuf(&ls->sb); 425 lj_buf_reset(&ls->sb);
461 /* Check for a valid bytecode dump header. */ 426 /* Check for a valid bytecode dump header. */
462 if (!bcread_header(ls)) 427 if (!bcread_header(ls))
463 bcread_error(ls, LJ_ERR_BCFMT); 428 bcread_error(ls, LJ_ERR_BCFMT);
464 for (;;) { /* Process all prototypes in the bytecode dump. */ 429 for (;;) { /* Process all prototypes in the bytecode dump. */
465 GCproto *pt = bcread_proto(ls); 430 GCproto *pt;
466 if (!pt) break; 431 MSize len;
432 const char *startp;
433 /* Read length. */
434 if (ls->p < ls->pe && ls->p[0] == 0) { /* Shortcut EOF. */
435 ls->p++;
436 break;
437 }
438 bcread_want(ls, 5);
439 len = bcread_uleb128(ls);
440 if (!len) break; /* EOF */
441 bcread_need(ls, len);
442 startp = ls->p;
443 pt = lj_bcread_proto(ls);
444 if (ls->p != startp + len)
445 bcread_error(ls, LJ_ERR_BCBAD);
467 setprotoV(L, L->top, pt); 446 setprotoV(L, L->top, pt);
468 incr_top(L); 447 incr_top(L);
469 } 448 }
470 if ((int32_t)ls->n > 0 || L->top-1 != bcread_oldtop(L, ls)) 449 if ((int32_t)(2*(uint32_t)(ls->pe - ls->p)) > 0 ||
450 L->top-1 != bcread_oldtop(L, ls))
471 bcread_error(ls, LJ_ERR_BCBAD); 451 bcread_error(ls, LJ_ERR_BCBAD);
472 /* Pop off last prototype. */ 452 /* Pop off last prototype. */
473 L->top--; 453 L->top--;
diff --git a/src/lj_bcwrite.c b/src/lj_bcwrite.c
index dcdaa10a..a70735ca 100644
--- a/src/lj_bcwrite.c
+++ b/src/lj_bcwrite.c
@@ -8,7 +8,7 @@
8 8
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_str.h" 11#include "lj_buf.h"
12#include "lj_bc.h" 12#include "lj_bc.h"
13#if LJ_HASFFI 13#if LJ_HASFFI
14#include "lj_ctype.h" 14#include "lj_ctype.h"
@@ -17,13 +17,13 @@
17#include "lj_dispatch.h" 17#include "lj_dispatch.h"
18#include "lj_jit.h" 18#include "lj_jit.h"
19#endif 19#endif
20#include "lj_strfmt.h"
20#include "lj_bcdump.h" 21#include "lj_bcdump.h"
21#include "lj_vm.h" 22#include "lj_vm.h"
22 23
23/* Context for bytecode writer. */ 24/* Context for bytecode writer. */
24typedef struct BCWriteCtx { 25typedef struct BCWriteCtx {
25 SBuf sb; /* Output buffer. */ 26 SBuf sb; /* Output buffer. */
26 lua_State *L; /* Lua state. */
27 GCproto *pt; /* Root prototype. */ 27 GCproto *pt; /* Root prototype. */
28 lua_Writer wfunc; /* Writer callback. */ 28 lua_Writer wfunc; /* Writer callback. */
29 void *wdata; /* Writer callback data. */ 29 void *wdata; /* Writer callback data. */
@@ -31,85 +31,44 @@ typedef struct BCWriteCtx {
31 int status; /* Status from writer callback. */ 31 int status; /* Status from writer callback. */
32} BCWriteCtx; 32} BCWriteCtx;
33 33
34/* -- Output buffer handling ---------------------------------------------- */
35
36/* Resize buffer if needed. */
37static LJ_NOINLINE void bcwrite_resize(BCWriteCtx *ctx, MSize len)
38{
39 MSize sz = ctx->sb.sz * 2;
40 while (ctx->sb.n + len > sz) sz = sz * 2;
41 lj_str_resizebuf(ctx->L, &ctx->sb, sz);
42}
43
44/* Need a certain amount of buffer space. */
45static LJ_AINLINE void bcwrite_need(BCWriteCtx *ctx, MSize len)
46{
47 if (LJ_UNLIKELY(ctx->sb.n + len > ctx->sb.sz))
48 bcwrite_resize(ctx, len);
49}
50
51/* Add memory block to buffer. */
52static void bcwrite_block(BCWriteCtx *ctx, const void *p, MSize len)
53{
54 uint8_t *q = (uint8_t *)(ctx->sb.buf + ctx->sb.n);
55 MSize i;
56 ctx->sb.n += len;
57 for (i = 0; i < len; i++) q[i] = ((uint8_t *)p)[i];
58}
59
60/* Add byte to buffer. */
61static LJ_AINLINE void bcwrite_byte(BCWriteCtx *ctx, uint8_t b)
62{
63 ctx->sb.buf[ctx->sb.n++] = b;
64}
65
66/* Add ULEB128 value to buffer. */
67static void bcwrite_uleb128(BCWriteCtx *ctx, uint32_t v)
68{
69 MSize n = ctx->sb.n;
70 uint8_t *p = (uint8_t *)ctx->sb.buf;
71 for (; v >= 0x80; v >>= 7)
72 p[n++] = (uint8_t)((v & 0x7f) | 0x80);
73 p[n++] = (uint8_t)v;
74 ctx->sb.n = n;
75}
76
77/* -- Bytecode writer ----------------------------------------------------- */ 34/* -- Bytecode writer ----------------------------------------------------- */
78 35
79/* Write a single constant key/value of a template table. */ 36/* Write a single constant key/value of a template table. */
80static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow) 37static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow)
81{ 38{
82 bcwrite_need(ctx, 1+10); 39 char *p = lj_buf_more(&ctx->sb, 1+10);
83 if (tvisstr(o)) { 40 if (tvisstr(o)) {
84 const GCstr *str = strV(o); 41 const GCstr *str = strV(o);
85 MSize len = str->len; 42 MSize len = str->len;
86 bcwrite_need(ctx, 5+len); 43 p = lj_buf_more(&ctx->sb, 5+len);
87 bcwrite_uleb128(ctx, BCDUMP_KTAB_STR+len); 44 p = lj_strfmt_wuleb128(p, BCDUMP_KTAB_STR+len);
88 bcwrite_block(ctx, strdata(str), len); 45 p = lj_buf_wmem(p, strdata(str), len);
89 } else if (tvisint(o)) { 46 } else if (tvisint(o)) {
90 bcwrite_byte(ctx, BCDUMP_KTAB_INT); 47 *p++ = BCDUMP_KTAB_INT;
91 bcwrite_uleb128(ctx, intV(o)); 48 p = lj_strfmt_wuleb128(p, intV(o));
92 } else if (tvisnum(o)) { 49 } else if (tvisnum(o)) {
93 if (!LJ_DUALNUM && narrow) { /* Narrow number constants to integers. */ 50 if (!LJ_DUALNUM && narrow) { /* Narrow number constants to integers. */
94 lua_Number num = numV(o); 51 lua_Number num = numV(o);
95 int32_t k = lj_num2int(num); 52 int32_t k = lj_num2int(num);
96 if (num == (lua_Number)k) { /* -0 is never a constant. */ 53 if (num == (lua_Number)k) { /* -0 is never a constant. */
97 bcwrite_byte(ctx, BCDUMP_KTAB_INT); 54 *p++ = BCDUMP_KTAB_INT;
98 bcwrite_uleb128(ctx, k); 55 p = lj_strfmt_wuleb128(p, k);
56 setsbufP(&ctx->sb, p);
99 return; 57 return;
100 } 58 }
101 } 59 }
102 bcwrite_byte(ctx, BCDUMP_KTAB_NUM); 60 *p++ = BCDUMP_KTAB_NUM;
103 bcwrite_uleb128(ctx, o->u32.lo); 61 p = lj_strfmt_wuleb128(p, o->u32.lo);
104 bcwrite_uleb128(ctx, o->u32.hi); 62 p = lj_strfmt_wuleb128(p, o->u32.hi);
105 } else { 63 } else {
106 lua_assert(tvispri(o)); 64 lua_assert(tvispri(o));
107 bcwrite_byte(ctx, BCDUMP_KTAB_NIL+~itype(o)); 65 *p++ = BCDUMP_KTAB_NIL+~itype(o);
108 } 66 }
67 setsbufP(&ctx->sb, p);
109} 68}
110 69
111/* Write a template table. */ 70/* Write a template table. */
112static void bcwrite_ktab(BCWriteCtx *ctx, const GCtab *t) 71static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t)
113{ 72{
114 MSize narray = 0, nhash = 0; 73 MSize narray = 0, nhash = 0;
115 if (t->asize > 0) { /* Determine max. length of array part. */ 74 if (t->asize > 0) { /* Determine max. length of array part. */
@@ -127,8 +86,9 @@ static void bcwrite_ktab(BCWriteCtx *ctx, const GCtab *t)
127 nhash += !tvisnil(&node[i].val); 86 nhash += !tvisnil(&node[i].val);
128 } 87 }
129 /* Write number of array slots and hash slots. */ 88 /* Write number of array slots and hash slots. */
130 bcwrite_uleb128(ctx, narray); 89 p = lj_strfmt_wuleb128(p, narray);
131 bcwrite_uleb128(ctx, nhash); 90 p = lj_strfmt_wuleb128(p, nhash);
91 setsbufP(&ctx->sb, p);
132 if (narray) { /* Write array entries (may contain nil). */ 92 if (narray) { /* Write array entries (may contain nil). */
133 MSize i; 93 MSize i;
134 TValue *o = tvref(t->array); 94 TValue *o = tvref(t->array);
@@ -155,6 +115,7 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt)
155 for (i = 0; i < sizekgc; i++, kr++) { 115 for (i = 0; i < sizekgc; i++, kr++) {
156 GCobj *o = gcref(*kr); 116 GCobj *o = gcref(*kr);
157 MSize tp, need = 1; 117 MSize tp, need = 1;
118 char *p;
158 /* Determine constant type and needed size. */ 119 /* Determine constant type and needed size. */
159 if (o->gch.gct == ~LJ_TSTR) { 120 if (o->gch.gct == ~LJ_TSTR) {
160 tp = BCDUMP_KGC_STR + gco2str(o)->len; 121 tp = BCDUMP_KGC_STR + gco2str(o)->len;
@@ -181,24 +142,26 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt)
181 need = 1+2*5; 142 need = 1+2*5;
182 } 143 }
183 /* Write constant type. */ 144 /* Write constant type. */
184 bcwrite_need(ctx, need); 145 p = lj_buf_more(&ctx->sb, need);
185 bcwrite_uleb128(ctx, tp); 146 p = lj_strfmt_wuleb128(p, tp);
186 /* Write constant data (if any). */ 147 /* Write constant data (if any). */
187 if (tp >= BCDUMP_KGC_STR) { 148 if (tp >= BCDUMP_KGC_STR) {
188 bcwrite_block(ctx, strdata(gco2str(o)), gco2str(o)->len); 149 p = lj_buf_wmem(p, strdata(gco2str(o)), gco2str(o)->len);
189 } else if (tp == BCDUMP_KGC_TAB) { 150 } else if (tp == BCDUMP_KGC_TAB) {
190 bcwrite_ktab(ctx, gco2tab(o)); 151 bcwrite_ktab(ctx, p, gco2tab(o));
152 continue;
191#if LJ_HASFFI 153#if LJ_HASFFI
192 } else if (tp != BCDUMP_KGC_CHILD) { 154 } else if (tp != BCDUMP_KGC_CHILD) {
193 cTValue *p = (TValue *)cdataptr(gco2cd(o)); 155 cTValue *q = (TValue *)cdataptr(gco2cd(o));
194 bcwrite_uleb128(ctx, p[0].u32.lo); 156 p = lj_strfmt_wuleb128(p, q[0].u32.lo);
195 bcwrite_uleb128(ctx, p[0].u32.hi); 157 p = lj_strfmt_wuleb128(p, q[0].u32.hi);
196 if (tp == BCDUMP_KGC_COMPLEX) { 158 if (tp == BCDUMP_KGC_COMPLEX) {
197 bcwrite_uleb128(ctx, p[1].u32.lo); 159 p = lj_strfmt_wuleb128(p, q[1].u32.lo);
198 bcwrite_uleb128(ctx, p[1].u32.hi); 160 p = lj_strfmt_wuleb128(p, q[1].u32.hi);
199 } 161 }
200#endif 162#endif
201 } 163 }
164 setsbufP(&ctx->sb, p);
202 } 165 }
203} 166}
204 167
@@ -207,7 +170,7 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt)
207{ 170{
208 MSize i, sizekn = pt->sizekn; 171 MSize i, sizekn = pt->sizekn;
209 cTValue *o = mref(pt->k, TValue); 172 cTValue *o = mref(pt->k, TValue);
210 bcwrite_need(ctx, 10*sizekn); 173 char *p = lj_buf_more(&ctx->sb, 10*sizekn);
211 for (i = 0; i < sizekn; i++, o++) { 174 for (i = 0; i < sizekn; i++, o++) {
212 int32_t k; 175 int32_t k;
213 if (tvisint(o)) { 176 if (tvisint(o)) {
@@ -220,58 +183,58 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt)
220 k = lj_num2int(num); 183 k = lj_num2int(num);
221 if (num == (lua_Number)k) { /* -0 is never a constant. */ 184 if (num == (lua_Number)k) { /* -0 is never a constant. */
222 save_int: 185 save_int:
223 bcwrite_uleb128(ctx, 2*(uint32_t)k | ((uint32_t)k & 0x80000000u)); 186 p = lj_strfmt_wuleb128(p, 2*(uint32_t)k | ((uint32_t)k&0x80000000u));
224 if (k < 0) { 187 if (k < 0)
225 char *p = &ctx->sb.buf[ctx->sb.n-1]; 188 p[-1] = (p[-1] & 7) | ((k>>27) & 0x18);
226 *p = (*p & 7) | ((k>>27) & 0x18);
227 }
228 continue; 189 continue;
229 } 190 }
230 } 191 }
231 bcwrite_uleb128(ctx, 1+(2*o->u32.lo | (o->u32.lo & 0x80000000u))); 192 p = lj_strfmt_wuleb128(p, 1+(2*o->u32.lo | (o->u32.lo & 0x80000000u)));
232 if (o->u32.lo >= 0x80000000u) { 193 if (o->u32.lo >= 0x80000000u)
233 char *p = &ctx->sb.buf[ctx->sb.n-1]; 194 p[-1] = (p[-1] & 7) | ((o->u32.lo>>27) & 0x18);
234 *p = (*p & 7) | ((o->u32.lo>>27) & 0x18); 195 p = lj_strfmt_wuleb128(p, o->u32.hi);
235 }
236 bcwrite_uleb128(ctx, o->u32.hi);
237 } 196 }
238 } 197 }
198 setsbufP(&ctx->sb, p);
239} 199}
240 200
241/* Write bytecode instructions. */ 201/* Write bytecode instructions. */
242static void bcwrite_bytecode(BCWriteCtx *ctx, GCproto *pt) 202static char *bcwrite_bytecode(BCWriteCtx *ctx, char *p, GCproto *pt)
243{ 203{
244 MSize nbc = pt->sizebc-1; /* Omit the [JI]FUNC* header. */ 204 MSize nbc = pt->sizebc-1; /* Omit the [JI]FUNC* header. */
245#if LJ_HASJIT 205#if LJ_HASJIT
246 uint8_t *p = (uint8_t *)&ctx->sb.buf[ctx->sb.n]; 206 uint8_t *q = (uint8_t *)p;
247#endif 207#endif
248 bcwrite_block(ctx, proto_bc(pt)+1, nbc*(MSize)sizeof(BCIns)); 208 p = lj_buf_wmem(p, proto_bc(pt)+1, nbc*(MSize)sizeof(BCIns));
209 UNUSED(ctx);
249#if LJ_HASJIT 210#if LJ_HASJIT
250 /* Unpatch modified bytecode containing ILOOP/JLOOP etc. */ 211 /* Unpatch modified bytecode containing ILOOP/JLOOP etc. */
251 if ((pt->flags & PROTO_ILOOP) || pt->trace) { 212 if ((pt->flags & PROTO_ILOOP) || pt->trace) {
252 jit_State *J = L2J(ctx->L); 213 jit_State *J = L2J(sbufL(&ctx->sb));
253 MSize i; 214 MSize i;
254 for (i = 0; i < nbc; i++, p += sizeof(BCIns)) { 215 for (i = 0; i < nbc; i++, q += sizeof(BCIns)) {
255 BCOp op = (BCOp)p[LJ_ENDIAN_SELECT(0, 3)]; 216 BCOp op = (BCOp)q[LJ_ENDIAN_SELECT(0, 3)];
256 if (op == BC_IFORL || op == BC_IITERL || op == BC_ILOOP || 217 if (op == BC_IFORL || op == BC_IITERL || op == BC_ILOOP ||
257 op == BC_JFORI) { 218 op == BC_JFORI) {
258 p[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_IFORL+BC_FORL); 219 q[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_IFORL+BC_FORL);
259 } else if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) { 220 } else if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) {
260 BCReg rd = p[LJ_ENDIAN_SELECT(2, 1)] + (p[LJ_ENDIAN_SELECT(3, 0)] << 8); 221 BCReg rd = q[LJ_ENDIAN_SELECT(2, 1)] + (q[LJ_ENDIAN_SELECT(3, 0)] << 8);
261 BCIns ins = traceref(J, rd)->startins; 222 BCIns ins = traceref(J, rd)->startins;
262 p[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_JFORL+BC_FORL); 223 q[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_JFORL+BC_FORL);
263 p[LJ_ENDIAN_SELECT(2, 1)] = bc_c(ins); 224 q[LJ_ENDIAN_SELECT(2, 1)] = bc_c(ins);
264 p[LJ_ENDIAN_SELECT(3, 0)] = bc_b(ins); 225 q[LJ_ENDIAN_SELECT(3, 0)] = bc_b(ins);
265 } 226 }
266 } 227 }
267 } 228 }
268#endif 229#endif
230 return p;
269} 231}
270 232
271/* Write prototype. */ 233/* Write prototype. */
272static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt) 234static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
273{ 235{
274 MSize sizedbg = 0; 236 MSize sizedbg = 0;
237 char *p;
275 238
276 /* Recursively write children of prototype. */ 239 /* Recursively write children of prototype. */
277 if ((pt->flags & PROTO_CHILD)) { 240 if ((pt->flags & PROTO_CHILD)) {
@@ -285,31 +248,32 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
285 } 248 }
286 249
287 /* Start writing the prototype info to a buffer. */ 250 /* Start writing the prototype info to a buffer. */
288 lj_str_resetbuf(&ctx->sb); 251 p = lj_buf_need(&ctx->sb,
289 ctx->sb.n = 5; /* Leave room for final size. */ 252 5+4+6*5+(pt->sizebc-1)*(MSize)sizeof(BCIns)+pt->sizeuv*2);
290 bcwrite_need(ctx, 4+6*5+(pt->sizebc-1)*(MSize)sizeof(BCIns)+pt->sizeuv*2); 253 p += 5; /* Leave room for final size. */
291 254
292 /* Write prototype header. */ 255 /* Write prototype header. */
293 bcwrite_byte(ctx, (pt->flags & (PROTO_CHILD|PROTO_VARARG|PROTO_FFI))); 256 *p++ = (pt->flags & (PROTO_CHILD|PROTO_VARARG|PROTO_FFI));
294 bcwrite_byte(ctx, pt->numparams); 257 *p++ = pt->numparams;
295 bcwrite_byte(ctx, pt->framesize); 258 *p++ = pt->framesize;
296 bcwrite_byte(ctx, pt->sizeuv); 259 *p++ = pt->sizeuv;
297 bcwrite_uleb128(ctx, pt->sizekgc); 260 p = lj_strfmt_wuleb128(p, pt->sizekgc);
298 bcwrite_uleb128(ctx, pt->sizekn); 261 p = lj_strfmt_wuleb128(p, pt->sizekn);
299 bcwrite_uleb128(ctx, pt->sizebc-1); 262 p = lj_strfmt_wuleb128(p, pt->sizebc-1);
300 if (!ctx->strip) { 263 if (!ctx->strip) {
301 if (proto_lineinfo(pt)) 264 if (proto_lineinfo(pt))
302 sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt); 265 sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt);
303 bcwrite_uleb128(ctx, sizedbg); 266 p = lj_strfmt_wuleb128(p, sizedbg);
304 if (sizedbg) { 267 if (sizedbg) {
305 bcwrite_uleb128(ctx, pt->firstline); 268 p = lj_strfmt_wuleb128(p, pt->firstline);
306 bcwrite_uleb128(ctx, pt->numline); 269 p = lj_strfmt_wuleb128(p, pt->numline);
307 } 270 }
308 } 271 }
309 272
310 /* Write bytecode instructions and upvalue refs. */ 273 /* Write bytecode instructions and upvalue refs. */
311 bcwrite_bytecode(ctx, pt); 274 p = bcwrite_bytecode(ctx, p, pt);
312 bcwrite_block(ctx, proto_uv(pt), pt->sizeuv*2); 275 p = lj_buf_wmem(p, proto_uv(pt), pt->sizeuv*2);
276 setsbufP(&ctx->sb, p);
313 277
314 /* Write constants. */ 278 /* Write constants. */
315 bcwrite_kgc(ctx, pt); 279 bcwrite_kgc(ctx, pt);
@@ -317,18 +281,19 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
317 281
318 /* Write debug info, if not stripped. */ 282 /* Write debug info, if not stripped. */
319 if (sizedbg) { 283 if (sizedbg) {
320 bcwrite_need(ctx, sizedbg); 284 p = lj_buf_more(&ctx->sb, sizedbg);
321 bcwrite_block(ctx, proto_lineinfo(pt), sizedbg); 285 p = lj_buf_wmem(p, proto_lineinfo(pt), sizedbg);
286 setsbufP(&ctx->sb, p);
322 } 287 }
323 288
324 /* Pass buffer to writer function. */ 289 /* Pass buffer to writer function. */
325 if (ctx->status == 0) { 290 if (ctx->status == 0) {
326 MSize n = ctx->sb.n - 5; 291 MSize n = sbuflen(&ctx->sb) - 5;
327 MSize nn = (lj_fls(n)+8)*9 >> 6; 292 MSize nn = (lj_fls(n)+8)*9 >> 6;
328 ctx->sb.n = 5 - nn; 293 char *q = sbufB(&ctx->sb) + (5 - nn);
329 bcwrite_uleb128(ctx, n); /* Fill in final size. */ 294 p = lj_strfmt_wuleb128(q, n); /* Fill in final size. */
330 lua_assert(ctx->sb.n == 5); 295 lua_assert(p == sbufB(&ctx->sb) + 5);
331 ctx->status = ctx->wfunc(ctx->L, ctx->sb.buf+5-nn, nn+n, ctx->wdata); 296 ctx->status = ctx->wfunc(sbufL(&ctx->sb), q, nn+n, ctx->wdata);
332 } 297 }
333} 298}
334 299
@@ -338,20 +303,20 @@ static void bcwrite_header(BCWriteCtx *ctx)
338 GCstr *chunkname = proto_chunkname(ctx->pt); 303 GCstr *chunkname = proto_chunkname(ctx->pt);
339 const char *name = strdata(chunkname); 304 const char *name = strdata(chunkname);
340 MSize len = chunkname->len; 305 MSize len = chunkname->len;
341 lj_str_resetbuf(&ctx->sb); 306 char *p = lj_buf_need(&ctx->sb, 5+5+len);
342 bcwrite_need(ctx, 5+5+len); 307 *p++ = BCDUMP_HEAD1;
343 bcwrite_byte(ctx, BCDUMP_HEAD1); 308 *p++ = BCDUMP_HEAD2;
344 bcwrite_byte(ctx, BCDUMP_HEAD2); 309 *p++ = BCDUMP_HEAD3;
345 bcwrite_byte(ctx, BCDUMP_HEAD3); 310 *p++ = BCDUMP_VERSION;
346 bcwrite_byte(ctx, BCDUMP_VERSION); 311 *p++ = (ctx->strip ? BCDUMP_F_STRIP : 0) +
347 bcwrite_byte(ctx, (ctx->strip ? BCDUMP_F_STRIP : 0) + 312 (LJ_BE ? BCDUMP_F_BE : 0) +
348 (LJ_BE ? BCDUMP_F_BE : 0) + 313 ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0);
349 ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0));
350 if (!ctx->strip) { 314 if (!ctx->strip) {
351 bcwrite_uleb128(ctx, len); 315 p = lj_strfmt_wuleb128(p, len);
352 bcwrite_block(ctx, name, len); 316 p = lj_buf_wmem(p, name, len);
353 } 317 }
354 ctx->status = ctx->wfunc(ctx->L, ctx->sb.buf, ctx->sb.n, ctx->wdata); 318 ctx->status = ctx->wfunc(sbufL(&ctx->sb), sbufB(&ctx->sb),
319 (MSize)(p - sbufB(&ctx->sb)), ctx->wdata);
355} 320}
356 321
357/* Write footer of bytecode dump. */ 322/* Write footer of bytecode dump. */
@@ -359,7 +324,7 @@ static void bcwrite_footer(BCWriteCtx *ctx)
359{ 324{
360 if (ctx->status == 0) { 325 if (ctx->status == 0) {
361 uint8_t zero = 0; 326 uint8_t zero = 0;
362 ctx->status = ctx->wfunc(ctx->L, &zero, 1, ctx->wdata); 327 ctx->status = ctx->wfunc(sbufL(&ctx->sb), &zero, 1, ctx->wdata);
363 } 328 }
364} 329}
365 330
@@ -367,8 +332,8 @@ static void bcwrite_footer(BCWriteCtx *ctx)
367static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud) 332static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud)
368{ 333{
369 BCWriteCtx *ctx = (BCWriteCtx *)ud; 334 BCWriteCtx *ctx = (BCWriteCtx *)ud;
370 UNUSED(dummy); 335 UNUSED(L); UNUSED(dummy);
371 lj_str_resizebuf(L, &ctx->sb, 1024); /* Avoids resize for most prototypes. */ 336 lj_buf_need(&ctx->sb, 1024); /* Avoids resize for most prototypes. */
372 bcwrite_header(ctx); 337 bcwrite_header(ctx);
373 bcwrite_proto(ctx, ctx->pt); 338 bcwrite_proto(ctx, ctx->pt);
374 bcwrite_footer(ctx); 339 bcwrite_footer(ctx);
@@ -381,16 +346,15 @@ int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data,
381{ 346{
382 BCWriteCtx ctx; 347 BCWriteCtx ctx;
383 int status; 348 int status;
384 ctx.L = L;
385 ctx.pt = pt; 349 ctx.pt = pt;
386 ctx.wfunc = writer; 350 ctx.wfunc = writer;
387 ctx.wdata = data; 351 ctx.wdata = data;
388 ctx.strip = strip; 352 ctx.strip = strip;
389 ctx.status = 0; 353 ctx.status = 0;
390 lj_str_initbuf(&ctx.sb); 354 lj_buf_init(L, &ctx.sb);
391 status = lj_vm_cpcall(L, NULL, &ctx, cpwriter); 355 status = lj_vm_cpcall(L, NULL, &ctx, cpwriter);
392 if (status == 0) status = ctx.status; 356 if (status == 0) status = ctx.status;
393 lj_str_freebuf(G(ctx.L), &ctx.sb); 357 lj_buf_free(G(sbufL(&ctx.sb)), &ctx.sb);
394 return status; 358 return status;
395} 359}
396 360
diff --git a/src/lj_buf.c b/src/lj_buf.c
new file mode 100644
index 00000000..05ff1f51
--- /dev/null
+++ b/src/lj_buf.c
@@ -0,0 +1,234 @@
1/*
2** Buffer handling.
3** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_buf_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10#include "lj_gc.h"
11#include "lj_err.h"
12#include "lj_buf.h"
13#include "lj_str.h"
14#include "lj_tab.h"
15#include "lj_strfmt.h"
16
17/* -- Buffer management --------------------------------------------------- */
18
19static void buf_grow(SBuf *sb, MSize sz)
20{
21 MSize osz = sbufsz(sb), len = sbuflen(sb), nsz = osz;
22 char *b;
23 if (nsz < LJ_MIN_SBUF) nsz = LJ_MIN_SBUF;
24 while (nsz < sz) nsz += nsz;
25 b = (char *)lj_mem_realloc(sbufL(sb), sbufB(sb), osz, nsz);
26 setmref(sb->b, b);
27 setmref(sb->p, b + len);
28 setmref(sb->e, b + nsz);
29}
30
31LJ_NOINLINE char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz)
32{
33 lua_assert(sz > sbufsz(sb));
34 if (LJ_UNLIKELY(sz > LJ_MAX_MEM))
35 lj_err_mem(sbufL(sb));
36 buf_grow(sb, sz);
37 return sbufB(sb);
38}
39
40LJ_NOINLINE char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz)
41{
42 MSize len = sbuflen(sb);
43 lua_assert(sz > sbufleft(sb));
44 if (LJ_UNLIKELY(sz > LJ_MAX_MEM || len + sz > LJ_MAX_MEM))
45 lj_err_mem(sbufL(sb));
46 buf_grow(sb, len + sz);
47 return sbufP(sb);
48}
49
50void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb)
51{
52 char *b = sbufB(sb);
53 MSize osz = (MSize)(sbufE(sb) - b);
54 if (osz > 2*LJ_MIN_SBUF) {
55 MSize n = (MSize)(sbufP(sb) - b);
56 b = lj_mem_realloc(L, b, osz, (osz >> 1));
57 setmref(sb->b, b);
58 setmref(sb->p, b + n);
59 setmref(sb->e, b + (osz >> 1));
60 }
61}
62
63char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz)
64{
65 SBuf *sb = &G(L)->tmpbuf;
66 setsbufL(sb, L);
67 return lj_buf_need(sb, sz);
68}
69
70/* -- Low-level buffer put operations ------------------------------------- */
71
72SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len)
73{
74 char *p = lj_buf_more(sb, len);
75 p = lj_buf_wmem(p, q, len);
76 setsbufP(sb, p);
77 return sb;
78}
79
80#if LJ_HASJIT
81SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c)
82{
83 char *p = lj_buf_more(sb, 1);
84 *p++ = (char)c;
85 setsbufP(sb, p);
86 return sb;
87}
88#endif
89
90SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s)
91{
92 MSize len = s->len;
93 char *p = lj_buf_more(sb, len);
94 p = lj_buf_wmem(p, strdata(s), len);
95 setsbufP(sb, p);
96 return sb;
97}
98
99/* -- High-level buffer put operations ------------------------------------ */
100
101SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s)
102{
103 MSize len = s->len;
104 char *p = lj_buf_more(sb, len), *e = p+len;
105 const char *q = strdata(s)+len-1;
106 while (p < e)
107 *p++ = *q--;
108 setsbufP(sb, p);
109 return sb;
110}
111
112SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s)
113{
114 MSize len = s->len;
115 char *p = lj_buf_more(sb, len), *e = p+len;
116 const char *q = strdata(s);
117 for (; p < e; p++, q++) {
118 uint32_t c = *(unsigned char *)q;
119#if LJ_TARGET_PPC
120 *p = c + ((c >= 'A' && c <= 'Z') << 5);
121#else
122 if (c >= 'A' && c <= 'Z') c += 0x20;
123 *p = c;
124#endif
125 }
126 setsbufP(sb, p);
127 return sb;
128}
129
130SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s)
131{
132 MSize len = s->len;
133 char *p = lj_buf_more(sb, len), *e = p+len;
134 const char *q = strdata(s);
135 for (; p < e; p++, q++) {
136 uint32_t c = *(unsigned char *)q;
137#if LJ_TARGET_PPC
138 *p = c - ((c >= 'a' && c <= 'z') << 5);
139#else
140 if (c >= 'a' && c <= 'z') c -= 0x20;
141 *p = c;
142#endif
143 }
144 setsbufP(sb, p);
145 return sb;
146}
147
148SBuf *lj_buf_putstr_rep(SBuf *sb, GCstr *s, int32_t rep)
149{
150 MSize len = s->len;
151 if (rep > 0 && len) {
152 uint64_t tlen = (uint64_t)rep * len;
153 char *p;
154 if (LJ_UNLIKELY(tlen > LJ_MAX_STR))
155 lj_err_mem(sbufL(sb));
156 p = lj_buf_more(sb, (MSize)tlen);
157 if (len == 1) { /* Optimize a common case. */
158 uint32_t c = strdata(s)[0];
159 do { *p++ = c; } while (--rep > 0);
160 } else {
161 const char *e = strdata(s) + len;
162 do {
163 const char *q = strdata(s);
164 do { *p++ = *q++; } while (q < e);
165 } while (--rep > 0);
166 }
167 setsbufP(sb, p);
168 }
169 return sb;
170}
171
172SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep, int32_t i, int32_t e)
173{
174 MSize seplen = sep ? sep->len : 0;
175 if (i <= e) {
176 for (;;) {
177 cTValue *o = lj_tab_getint(t, i);
178 char *p;
179 if (!o) {
180 badtype: /* Error: bad element type. */
181 setsbufP(sb, (intptr_t)i); /* Store failing index. */
182 return NULL;
183 } else if (tvisstr(o)) {
184 MSize len = strV(o)->len;
185 p = lj_buf_wmem(lj_buf_more(sb, len + seplen), strVdata(o), len);
186 } else if (tvisint(o)) {
187 p = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT+seplen), intV(o));
188 } else if (tvisnum(o)) {
189 p = lj_strfmt_wnum(lj_buf_more(sb, STRFMT_MAXBUF_NUM+seplen), o);
190 } else {
191 goto badtype;
192 }
193 if (i++ == e) {
194 setsbufP(sb, p);
195 break;
196 }
197 if (seplen) p = lj_buf_wmem(p, strdata(sep), seplen);
198 setsbufP(sb, p);
199 }
200 }
201 return sb;
202}
203
204/* -- Miscellaneous buffer operations ------------------------------------- */
205
206GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb)
207{
208 return lj_str_new(sbufL(sb), sbufB(sb), sbuflen(sb));
209}
210
211/* Concatenate two strings. */
212GCstr *lj_buf_cat2str(lua_State *L, GCstr *s1, GCstr *s2)
213{
214 MSize len1 = s1->len, len2 = s2->len;
215 char *buf = lj_buf_tmp(L, len1 + len2);
216 memcpy(buf, strdata(s1), len1);
217 memcpy(buf+len1, strdata(s2), len2);
218 return lj_str_new(L, buf, len1 + len2);
219}
220
221/* Read ULEB128 from buffer. */
222uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp)
223{
224 const uint8_t *p = (const uint8_t *)*pp;
225 uint32_t v = *p++;
226 if (LJ_UNLIKELY(v >= 0x80)) {
227 int sh = 0;
228 v &= 0x7f;
229 do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
230 }
231 *pp = (const char *)p;
232 return v;
233}
234
diff --git a/src/lj_buf.h b/src/lj_buf.h
new file mode 100644
index 00000000..66b285ad
--- /dev/null
+++ b/src/lj_buf.h
@@ -0,0 +1,105 @@
1/*
2** Buffer handling.
3** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_BUF_H
7#define _LJ_BUF_H
8
9#include "lj_obj.h"
10#include "lj_gc.h"
11#include "lj_str.h"
12
13/* Resizable string buffers. Struct definition in lj_obj.h. */
14#define sbufB(sb) (mref((sb)->b, char))
15#define sbufP(sb) (mref((sb)->p, char))
16#define sbufE(sb) (mref((sb)->e, char))
17#define sbufL(sb) (mref((sb)->L, lua_State))
18#define sbufsz(sb) ((MSize)(sbufE((sb)) - sbufB((sb))))
19#define sbuflen(sb) ((MSize)(sbufP((sb)) - sbufB((sb))))
20#define sbufleft(sb) ((MSize)(sbufE((sb)) - sbufP((sb))))
21#define setsbufP(sb, q) (setmref((sb)->p, (q)))
22#define setsbufL(sb, l) (setmref((sb)->L, (l)))
23
24/* Buffer management */
25LJ_FUNC char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz);
26LJ_FUNC char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz);
27LJ_FUNC void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb);
28LJ_FUNC char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz);
29
30static LJ_AINLINE void lj_buf_init(lua_State *L, SBuf *sb)
31{
32 setsbufL(sb, L);
33 setmref(sb->p, NULL); setmref(sb->e, NULL); setmref(sb->b, NULL);
34}
35
36static LJ_AINLINE void lj_buf_reset(SBuf *sb)
37{
38 setmrefr(sb->p, sb->b);
39}
40
41static LJ_AINLINE SBuf *lj_buf_tmp_(lua_State *L)
42{
43 SBuf *sb = &G(L)->tmpbuf;
44 setsbufL(sb, L);
45 lj_buf_reset(sb);
46 return sb;
47}
48
49static LJ_AINLINE void lj_buf_free(global_State *g, SBuf *sb)
50{
51 lj_mem_free(g, sbufB(sb), sbufsz(sb));
52}
53
54static LJ_AINLINE char *lj_buf_need(SBuf *sb, MSize sz)
55{
56 if (LJ_UNLIKELY(sz > sbufsz(sb)))
57 return lj_buf_need2(sb, sz);
58 return sbufB(sb);
59}
60
61static LJ_AINLINE char *lj_buf_more(SBuf *sb, MSize sz)
62{
63 if (LJ_UNLIKELY(sz > sbufleft(sb)))
64 return lj_buf_more2(sb, sz);
65 return sbufP(sb);
66}
67
68/* Low-level buffer put operations */
69LJ_FUNC SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len);
70#if LJ_HASJIT
71LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c);
72#endif
73LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s);
74
75static LJ_AINLINE char *lj_buf_wmem(char *p, const void *q, MSize len)
76{
77 return (char *)memcpy(p, q, len) + len;
78}
79
80static LJ_AINLINE void lj_buf_putb(SBuf *sb, int c)
81{
82 char *p = lj_buf_more(sb, 1);
83 *p++ = (char)c;
84 setsbufP(sb, p);
85}
86
87/* High-level buffer put operations */
88LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s);
89LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s);
90LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s);
91LJ_FUNC SBuf *lj_buf_putstr_rep(SBuf *sb, GCstr *s, int32_t rep);
92LJ_FUNC SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep,
93 int32_t i, int32_t e);
94
95/* Miscellaneous buffer operations */
96LJ_FUNCA GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb);
97LJ_FUNC GCstr *lj_buf_cat2str(lua_State *L, GCstr *s1, GCstr *s2);
98LJ_FUNC uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp);
99
100static LJ_AINLINE GCstr *lj_buf_str(lua_State *L, SBuf *sb)
101{
102 return lj_str_new(L, sbufB(sb), sbuflen(sb));
103}
104
105#endif
diff --git a/src/lj_carith.c b/src/lj_carith.c
index 9f94091d..d8b5f91a 100644
--- a/src/lj_carith.c
+++ b/src/lj_carith.c
@@ -11,10 +11,12 @@
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_tab.h" 12#include "lj_tab.h"
13#include "lj_meta.h" 13#include "lj_meta.h"
14#include "lj_ir.h"
14#include "lj_ctype.h" 15#include "lj_ctype.h"
15#include "lj_cconv.h" 16#include "lj_cconv.h"
16#include "lj_cdata.h" 17#include "lj_cdata.h"
17#include "lj_carith.h" 18#include "lj_carith.h"
19#include "lj_strscan.h"
18 20
19/* -- C data arithmetic --------------------------------------------------- */ 21/* -- C data arithmetic --------------------------------------------------- */
20 22
@@ -270,6 +272,80 @@ int lj_carith_op(lua_State *L, MMS mm)
270 return lj_carith_meta(L, cts, &ca, mm); 272 return lj_carith_meta(L, cts, &ca, mm);
271} 273}
272 274
275/* -- 64 bit bit operations helpers --------------------------------------- */
276
277#if LJ_64
278#define B64DEF(name) \
279 static LJ_AINLINE uint64_t lj_carith_##name(uint64_t x, int32_t sh)
280#else
281/* Not inlined on 32 bit archs, since some of these are quite lengthy. */
282#define B64DEF(name) \
283 uint64_t LJ_NOINLINE lj_carith_##name(uint64_t x, int32_t sh)
284#endif
285
286B64DEF(shl64) { return x << (sh&63); }
287B64DEF(shr64) { return x >> (sh&63); }
288B64DEF(sar64) { return (uint64_t)((int64_t)x >> (sh&63)); }
289B64DEF(rol64) { return lj_rol(x, (sh&63)); }
290B64DEF(ror64) { return lj_ror(x, (sh&63)); }
291
292#undef B64DEF
293
294uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op)
295{
296 switch (op) {
297 case IR_BSHL-IR_BSHL: x = lj_carith_shl64(x, sh); break;
298 case IR_BSHR-IR_BSHL: x = lj_carith_shr64(x, sh); break;
299 case IR_BSAR-IR_BSHL: x = lj_carith_sar64(x, sh); break;
300 case IR_BROL-IR_BSHL: x = lj_carith_rol64(x, sh); break;
301 case IR_BROR-IR_BSHL: x = lj_carith_ror64(x, sh); break;
302 default: lua_assert(0); break;
303 }
304 return x;
305}
306
307/* Equivalent to lj_lib_checkbit(), but handles cdata. */
308uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id)
309{
310 TValue *o = L->base + narg-1;
311 if (o >= L->top) {
312 err:
313 lj_err_argt(L, narg, LUA_TNUMBER);
314 } else if (LJ_LIKELY(tvisnumber(o))) {
315 /* Handled below. */
316 } else if (tviscdata(o)) {
317 CTState *cts = ctype_cts(L);
318 uint8_t *sp = (uint8_t *)cdataptr(cdataV(o));
319 CTypeID sid = cdataV(o)->ctypeid;
320 CType *s = ctype_get(cts, sid);
321 uint64_t x;
322 if (ctype_isref(s->info)) {
323 sp = *(void **)sp;
324 sid = ctype_cid(s->info);
325 }
326 s = ctype_raw(cts, sid);
327 if (ctype_isenum(s->info)) s = ctype_child(cts, s);
328 if ((s->info & (CTMASK_NUM|CTF_BOOL|CTF_FP|CTF_UNSIGNED)) ==
329 CTINFO(CT_NUM, CTF_UNSIGNED) && s->size == 8)
330 *id = CTID_UINT64; /* Use uint64_t, since it has the highest rank. */
331 else if (!*id)
332 *id = CTID_INT64; /* Use int64_t, unless already set. */
333 lj_cconv_ct_ct(cts, ctype_get(cts, *id), s,
334 (uint8_t *)&x, sp, CCF_ARG(narg));
335 return x;
336 } else if (!(tvisstr(o) && lj_strscan_number(strV(o), o))) {
337 goto err;
338 }
339 if (LJ_LIKELY(tvisint(o))) {
340 return (uint32_t)intV(o);
341 } else {
342 int32_t i = lj_num2bit(numV(o));
343 if (LJ_DUALNUM) setintV(o, i);
344 return (uint32_t)i;
345 }
346}
347
348
273/* -- 64 bit integer arithmetic helpers ----------------------------------- */ 349/* -- 64 bit integer arithmetic helpers ----------------------------------- */
274 350
275#if LJ_32 && LJ_HASJIT 351#if LJ_32 && LJ_HASJIT
diff --git a/src/lj_carith.h b/src/lj_carith.h
index 8b28556d..bea5ed2e 100644
--- a/src/lj_carith.h
+++ b/src/lj_carith.h
@@ -12,6 +12,16 @@
12 12
13LJ_FUNC int lj_carith_op(lua_State *L, MMS mm); 13LJ_FUNC int lj_carith_op(lua_State *L, MMS mm);
14 14
15#if LJ_32
16LJ_FUNC uint64_t lj_carith_shl64(uint64_t x, int32_t sh);
17LJ_FUNC uint64_t lj_carith_shr64(uint64_t x, int32_t sh);
18LJ_FUNC uint64_t lj_carith_sar64(uint64_t x, int32_t sh);
19LJ_FUNC uint64_t lj_carith_rol64(uint64_t x, int32_t sh);
20LJ_FUNC uint64_t lj_carith_ror64(uint64_t x, int32_t sh);
21#endif
22LJ_FUNC uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op);
23LJ_FUNC uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id);
24
15#if LJ_32 && LJ_HASJIT 25#if LJ_32 && LJ_HASJIT
16LJ_FUNC int64_t lj_carith_mul64(int64_t x, int64_t k); 26LJ_FUNC int64_t lj_carith_mul64(int64_t x, int64_t k);
17#endif 27#endif
diff --git a/src/lj_ccall.c b/src/lj_ccall.c
index 70c9ba5e..1011a1ad 100644
--- a/src/lj_ccall.c
+++ b/src/lj_ccall.c
@@ -9,7 +9,6 @@
9 9
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_str.h"
13#include "lj_tab.h" 12#include "lj_tab.h"
14#include "lj_ctype.h" 13#include "lj_ctype.h"
15#include "lj_cconv.h" 14#include "lj_cconv.h"
diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
index d872d671..b234ec67 100644
--- a/src/lj_ccallback.c
+++ b/src/lj_ccallback.c
@@ -529,7 +529,7 @@ lua_State * LJ_FASTCALL lj_ccallback_enter(CTState *cts, void *cf)
529 lua_State *L = cts->L; 529 lua_State *L = cts->L;
530 global_State *g = cts->g; 530 global_State *g = cts->g;
531 lua_assert(L != NULL); 531 lua_assert(L != NULL);
532 if (gcref(g->jit_L)) { 532 if (tvref(g->jit_base)) {
533 setstrV(L, L->top++, lj_err_str(L, LJ_ERR_FFI_BADCBACK)); 533 setstrV(L, L->top++, lj_err_str(L, LJ_ERR_FFI_BADCBACK));
534 if (g->panic) g->panic(L); 534 if (g->panic) g->panic(L);
535 exit(EXIT_FAILURE); 535 exit(EXIT_FAILURE);
@@ -562,9 +562,9 @@ void LJ_FASTCALL lj_ccallback_leave(CTState *cts, TValue *o)
562 } 562 }
563 callback_conv_result(cts, L, o); 563 callback_conv_result(cts, L, o);
564 /* Finally drop C frame and continuation frame. */ 564 /* Finally drop C frame and continuation frame. */
565 L->cframe = cframe_prev(L->cframe);
566 L->top -= 2; 565 L->top -= 2;
567 L->base = obase; 566 L->base = obase;
567 L->cframe = cframe_prev(L->cframe);
568 cts->cb.slot = 0; /* Blacklist C function that called the callback. */ 568 cts->cb.slot = 0; /* Blacklist C function that called the callback. */
569} 569}
570 570
diff --git a/src/lj_cdata.c b/src/lj_cdata.c
index 590ddf10..04875fd6 100644
--- a/src/lj_cdata.c
+++ b/src/lj_cdata.c
@@ -9,7 +9,6 @@
9 9
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_str.h"
13#include "lj_tab.h" 12#include "lj_tab.h"
14#include "lj_ctype.h" 13#include "lj_ctype.h"
15#include "lj_cconv.h" 14#include "lj_cconv.h"
@@ -27,12 +26,12 @@ GCcdata *lj_cdata_newref(CTState *cts, const void *p, CTypeID id)
27} 26}
28 27
29/* Allocate variable-sized or specially aligned C data object. */ 28/* Allocate variable-sized or specially aligned C data object. */
30GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, CTSize align) 29GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz, CTSize align)
31{ 30{
32 global_State *g; 31 global_State *g;
33 MSize extra = sizeof(GCcdataVar) + sizeof(GCcdata) + 32 MSize extra = sizeof(GCcdataVar) + sizeof(GCcdata) +
34 (align > CT_MEMALIGN ? (1u<<align) - (1u<<CT_MEMALIGN) : 0); 33 (align > CT_MEMALIGN ? (1u<<align) - (1u<<CT_MEMALIGN) : 0);
35 char *p = lj_mem_newt(cts->L, extra + sz, char); 34 char *p = lj_mem_newt(L, extra + sz, char);
36 uintptr_t adata = (uintptr_t)p + sizeof(GCcdataVar) + sizeof(GCcdata); 35 uintptr_t adata = (uintptr_t)p + sizeof(GCcdataVar) + sizeof(GCcdata);
37 uintptr_t almask = (1u << align) - 1u; 36 uintptr_t almask = (1u << align) - 1u;
38 GCcdata *cd = (GCcdata *)(((adata + almask) & ~almask) - sizeof(GCcdata)); 37 GCcdata *cd = (GCcdata *)(((adata + almask) & ~almask) - sizeof(GCcdata));
@@ -40,7 +39,7 @@ GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, CTSize align)
40 cdatav(cd)->offset = (uint16_t)((char *)cd - p); 39 cdatav(cd)->offset = (uint16_t)((char *)cd - p);
41 cdatav(cd)->extra = extra; 40 cdatav(cd)->extra = extra;
42 cdatav(cd)->len = sz; 41 cdatav(cd)->len = sz;
43 g = cts->g; 42 g = G(L);
44 setgcrefr(cd->nextgc, g->gc.root); 43 setgcrefr(cd->nextgc, g->gc.root);
45 setgcref(g->gc.root, obj2gco(cd)); 44 setgcref(g->gc.root, obj2gco(cd));
46 newwhite(g, obj2gco(cd)); 45 newwhite(g, obj2gco(cd));
@@ -76,21 +75,20 @@ void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd)
76 } 75 }
77} 76}
78 77
79TValue * LJ_FASTCALL lj_cdata_setfin(lua_State *L, GCcdata *cd) 78void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj, uint32_t it)
80{ 79{
81 global_State *g = G(L); 80 GCtab *t = ctype_ctsG(G(L))->finalizer;
82 GCtab *t = ctype_ctsG(g)->finalizer;
83 if (gcref(t->metatable)) { 81 if (gcref(t->metatable)) {
84 /* Add cdata to finalizer table, if still enabled. */ 82 /* Add cdata to finalizer table, if still enabled. */
85 TValue *tv, tmp; 83 TValue *tv, tmp;
86 setcdataV(L, &tmp, cd); 84 setcdataV(L, &tmp, cd);
87 lj_gc_anybarriert(L, t); 85 lj_gc_anybarriert(L, t);
88 tv = lj_tab_set(L, t, &tmp); 86 tv = lj_tab_set(L, t, &tmp);
89 cd->marked |= LJ_GC_CDATA_FIN; 87 setgcV(L, tv, obj, it);
90 return tv; 88 if (!tvisnil(tv))
91 } else { 89 cd->marked |= LJ_GC_CDATA_FIN;
92 /* Otherwise return dummy TValue. */ 90 else
93 return &g->tmptv; 91 cd->marked &= ~LJ_GC_CDATA_FIN;
94 } 92 }
95} 93}
96 94
diff --git a/src/lj_cdata.h b/src/lj_cdata.h
index 6c8e7a1a..f24f3adc 100644
--- a/src/lj_cdata.h
+++ b/src/lj_cdata.h
@@ -58,11 +58,12 @@ static LJ_AINLINE GCcdata *lj_cdata_new_(lua_State *L, CTypeID id, CTSize sz)
58} 58}
59 59
60LJ_FUNC GCcdata *lj_cdata_newref(CTState *cts, const void *pp, CTypeID id); 60LJ_FUNC GCcdata *lj_cdata_newref(CTState *cts, const void *pp, CTypeID id);
61LJ_FUNC GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, 61LJ_FUNC GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz,
62 CTSize align); 62 CTSize align);
63 63
64LJ_FUNC void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd); 64LJ_FUNC void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd);
65LJ_FUNCA TValue * LJ_FASTCALL lj_cdata_setfin(lua_State *L, GCcdata *cd); 65LJ_FUNC void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj,
66 uint32_t it);
66 67
67LJ_FUNC CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key, 68LJ_FUNC CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key,
68 uint8_t **pp, CTInfo *qual); 69 uint8_t **pp, CTInfo *qual);
diff --git a/src/lj_clib.c b/src/lj_clib.c
index ccb99e55..70e1c8f3 100644
--- a/src/lj_clib.c
+++ b/src/lj_clib.c
@@ -16,6 +16,7 @@
16#include "lj_cconv.h" 16#include "lj_cconv.h"
17#include "lj_cdata.h" 17#include "lj_cdata.h"
18#include "lj_clib.h" 18#include "lj_clib.h"
19#include "lj_strfmt.h"
19 20
20/* -- OS-specific functions ----------------------------------------------- */ 21/* -- OS-specific functions ----------------------------------------------- */
21 22
@@ -61,7 +62,7 @@ static const char *clib_extname(lua_State *L, const char *name)
61#endif 62#endif
62 ) { 63 ) {
63 if (!strchr(name, '.')) { 64 if (!strchr(name, '.')) {
64 name = lj_str_pushf(L, CLIB_SOEXT, name); 65 name = lj_strfmt_pushf(L, CLIB_SOEXT, name);
65 L->top--; 66 L->top--;
66#ifdef __CYGWIN__ 67#ifdef __CYGWIN__
67 } else { 68 } else {
@@ -70,7 +71,7 @@ static const char *clib_extname(lua_State *L, const char *name)
70 } 71 }
71 if (!(name[0] == CLIB_SOPREFIX[0] && name[1] == CLIB_SOPREFIX[1] && 72 if (!(name[0] == CLIB_SOPREFIX[0] && name[1] == CLIB_SOPREFIX[1] &&
72 name[2] == CLIB_SOPREFIX[2])) { 73 name[2] == CLIB_SOPREFIX[2])) {
73 name = lj_str_pushf(L, CLIB_SOPREFIX "%s", name); 74 name = lj_strfmt_pushf(L, CLIB_SOPREFIX "%s", name);
74 L->top--; 75 L->top--;
75 } 76 }
76 } 77 }
@@ -175,7 +176,7 @@ LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt,
175 if (!FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM, 176 if (!FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM,
176 NULL, err, 0, buf, sizeof(buf), NULL)) 177 NULL, err, 0, buf, sizeof(buf), NULL))
177 buf[0] = '\0'; 178 buf[0] = '\0';
178 lj_err_callermsg(L, lj_str_pushf(L, fmt, name, buf)); 179 lj_err_callermsg(L, lj_strfmt_pushf(L, fmt, name, buf));
179} 180}
180 181
181static int clib_needext(const char *s) 182static int clib_needext(const char *s)
@@ -190,7 +191,7 @@ static int clib_needext(const char *s)
190static const char *clib_extname(lua_State *L, const char *name) 191static const char *clib_extname(lua_State *L, const char *name)
191{ 192{
192 if (clib_needext(name)) { 193 if (clib_needext(name)) {
193 name = lj_str_pushf(L, "%s.dll", name); 194 name = lj_strfmt_pushf(L, "%s.dll", name);
194 L->top--; 195 L->top--;
195 } 196 }
196 return name; 197 return name;
@@ -263,7 +264,7 @@ static void *clib_getsym(CLibrary *cl, const char *name)
263LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt, 264LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt,
264 const char *name) 265 const char *name)
265{ 266{
266 lj_err_callermsg(L, lj_str_pushf(L, fmt, name, "no support for this OS")); 267 lj_err_callermsg(L, lj_strfmt_pushf(L, fmt, name, "no support for this OS"));
267} 268}
268 269
269static void *clib_loadlib(lua_State *L, const char *name, int global) 270static void *clib_loadlib(lua_State *L, const char *name, int global)
@@ -347,7 +348,7 @@ TValue *lj_clib_index(lua_State *L, CLibrary *cl, GCstr *name)
347 CTInfo cconv = ctype_cconv(ct->info); 348 CTInfo cconv = ctype_cconv(ct->info);
348 if (cconv == CTCC_FASTCALL || cconv == CTCC_STDCALL) { 349 if (cconv == CTCC_FASTCALL || cconv == CTCC_STDCALL) {
349 CTSize sz = clib_func_argsize(cts, ct); 350 CTSize sz = clib_func_argsize(cts, ct);
350 const char *symd = lj_str_pushf(L, 351 const char *symd = lj_strfmt_pushf(L,
351 cconv == CTCC_FASTCALL ? "@%s@%d" : "_%s@%d", 352 cconv == CTCC_FASTCALL ? "@%s@%d" : "_%s@%d",
352 sym, sz); 353 sym, sz);
353 L->top--; 354 L->top--;
diff --git a/src/lj_cparse.c b/src/lj_cparse.c
index 6ffe16a2..fd998adc 100644
--- a/src/lj_cparse.c
+++ b/src/lj_cparse.c
@@ -9,13 +9,14 @@
9 9
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_str.h" 12#include "lj_buf.h"
13#include "lj_ctype.h" 13#include "lj_ctype.h"
14#include "lj_cparse.h" 14#include "lj_cparse.h"
15#include "lj_frame.h" 15#include "lj_frame.h"
16#include "lj_vm.h" 16#include "lj_vm.h"
17#include "lj_char.h" 17#include "lj_char.h"
18#include "lj_strscan.h" 18#include "lj_strscan.h"
19#include "lj_strfmt.h"
19 20
20/* 21/*
21** Important note: this is NOT a validating C parser! This is a minimal 22** Important note: this is NOT a validating C parser! This is a minimal
@@ -46,9 +47,9 @@ static const char *cp_tok2str(CPState *cp, CPToken tok)
46 if (tok > CTOK_OFS) 47 if (tok > CTOK_OFS)
47 return ctoknames[tok-CTOK_OFS-1]; 48 return ctoknames[tok-CTOK_OFS-1];
48 else if (!lj_char_iscntrl(tok)) 49 else if (!lj_char_iscntrl(tok))
49 return lj_str_pushf(cp->L, "%c", tok); 50 return lj_strfmt_pushf(cp->L, "%c", tok);
50 else 51 else
51 return lj_str_pushf(cp->L, "char(%d)", tok); 52 return lj_strfmt_pushf(cp->L, "char(%d)", tok);
52} 53}
53 54
54/* End-of-line? */ 55/* End-of-line? */
@@ -85,24 +86,10 @@ static LJ_NOINLINE CPChar cp_get_bs(CPState *cp)
85 return cp_get(cp); 86 return cp_get(cp);
86} 87}
87 88
88/* Grow save buffer. */
89static LJ_NOINLINE void cp_save_grow(CPState *cp, CPChar c)
90{
91 MSize newsize;
92 if (cp->sb.sz >= CPARSE_MAX_BUF/2)
93 cp_err(cp, LJ_ERR_XELEM);
94 newsize = cp->sb.sz * 2;
95 lj_str_resizebuf(cp->L, &cp->sb, newsize);
96 cp->sb.buf[cp->sb.n++] = (char)c;
97}
98
99/* Save character in buffer. */ 89/* Save character in buffer. */
100static LJ_AINLINE void cp_save(CPState *cp, CPChar c) 90static LJ_AINLINE void cp_save(CPState *cp, CPChar c)
101{ 91{
102 if (LJ_UNLIKELY(cp->sb.n + 1 > cp->sb.sz)) 92 lj_buf_putb(&cp->sb, c);
103 cp_save_grow(cp, c);
104 else
105 cp->sb.buf[cp->sb.n++] = (char)c;
106} 93}
107 94
108/* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */ 95/* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */
@@ -122,20 +109,20 @@ LJ_NORET static void cp_errmsg(CPState *cp, CPToken tok, ErrMsg em, ...)
122 tokstr = NULL; 109 tokstr = NULL;
123 } else if (tok == CTOK_IDENT || tok == CTOK_INTEGER || tok == CTOK_STRING || 110 } else if (tok == CTOK_IDENT || tok == CTOK_INTEGER || tok == CTOK_STRING ||
124 tok >= CTOK_FIRSTDECL) { 111 tok >= CTOK_FIRSTDECL) {
125 if (cp->sb.n == 0) cp_save(cp, '$'); 112 if (sbufP(&cp->sb) == sbufB(&cp->sb)) cp_save(cp, '$');
126 cp_save(cp, '\0'); 113 cp_save(cp, '\0');
127 tokstr = cp->sb.buf; 114 tokstr = sbufB(&cp->sb);
128 } else { 115 } else {
129 tokstr = cp_tok2str(cp, tok); 116 tokstr = cp_tok2str(cp, tok);
130 } 117 }
131 L = cp->L; 118 L = cp->L;
132 va_start(argp, em); 119 va_start(argp, em);
133 msg = lj_str_pushvf(L, err2msg(em), argp); 120 msg = lj_strfmt_pushvf(L, err2msg(em), argp);
134 va_end(argp); 121 va_end(argp);
135 if (tokstr) 122 if (tokstr)
136 msg = lj_str_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tokstr); 123 msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tokstr);
137 if (cp->linenumber > 1) 124 if (cp->linenumber > 1)
138 msg = lj_str_pushf(L, "%s at line %d", msg, cp->linenumber); 125 msg = lj_strfmt_pushf(L, "%s at line %d", msg, cp->linenumber);
139 lj_err_callermsg(L, msg); 126 lj_err_callermsg(L, msg);
140} 127}
141 128
@@ -164,7 +151,7 @@ static CPToken cp_number(CPState *cp)
164 TValue o; 151 TValue o;
165 do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp))); 152 do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp)));
166 cp_save(cp, '\0'); 153 cp_save(cp, '\0');
167 fmt = lj_strscan_scan((const uint8_t *)cp->sb.buf, &o, STRSCAN_OPT_C); 154 fmt = lj_strscan_scan((const uint8_t *)sbufB(&cp->sb), &o, STRSCAN_OPT_C);
168 if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32; 155 if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32;
169 else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32; 156 else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32;
170 else if (!(cp->mode & CPARSE_MODE_SKIP)) 157 else if (!(cp->mode & CPARSE_MODE_SKIP))
@@ -177,7 +164,7 @@ static CPToken cp_number(CPState *cp)
177static CPToken cp_ident(CPState *cp) 164static CPToken cp_ident(CPState *cp)
178{ 165{
179 do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp))); 166 do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp)));
180 cp->str = lj_str_new(cp->L, cp->sb.buf, cp->sb.n); 167 cp->str = lj_buf_str(cp->L, &cp->sb);
181 cp->val.id = lj_ctype_getname(cp->cts, &cp->ct, cp->str, cp->tmask); 168 cp->val.id = lj_ctype_getname(cp->cts, &cp->ct, cp->str, cp->tmask);
182 if (ctype_type(cp->ct->info) == CT_KW) 169 if (ctype_type(cp->ct->info) == CT_KW)
183 return ctype_cid(cp->ct->info); 170 return ctype_cid(cp->ct->info);
@@ -263,11 +250,11 @@ static CPToken cp_string(CPState *cp)
263 } 250 }
264 cp_get(cp); 251 cp_get(cp);
265 if (delim == '"') { 252 if (delim == '"') {
266 cp->str = lj_str_new(cp->L, cp->sb.buf, cp->sb.n); 253 cp->str = lj_buf_str(cp->L, &cp->sb);
267 return CTOK_STRING; 254 return CTOK_STRING;
268 } else { 255 } else {
269 if (cp->sb.n != 1) cp_err_token(cp, '\''); 256 if (sbuflen(&cp->sb) != 1) cp_err_token(cp, '\'');
270 cp->val.i32 = (int32_t)(char)cp->sb.buf[0]; 257 cp->val.i32 = (int32_t)(char)*sbufB(&cp->sb);
271 cp->val.id = CTID_INT32; 258 cp->val.id = CTID_INT32;
272 return CTOK_INTEGER; 259 return CTOK_INTEGER;
273 } 260 }
@@ -296,7 +283,7 @@ static void cp_comment_cpp(CPState *cp)
296/* Lexical scanner for C. Only a minimal subset is implemented. */ 283/* Lexical scanner for C. Only a minimal subset is implemented. */
297static CPToken cp_next_(CPState *cp) 284static CPToken cp_next_(CPState *cp)
298{ 285{
299 lj_str_resetbuf(&cp->sb); 286 lj_buf_reset(&cp->sb);
300 for (;;) { 287 for (;;) {
301 if (lj_char_isident(cp->c)) 288 if (lj_char_isident(cp->c))
302 return lj_char_isdigit(cp->c) ? cp_number(cp) : cp_ident(cp); 289 return lj_char_isdigit(cp->c) ? cp_number(cp) : cp_ident(cp);
@@ -380,8 +367,7 @@ static void cp_init(CPState *cp)
380 cp->depth = 0; 367 cp->depth = 0;
381 cp->curpack = 0; 368 cp->curpack = 0;
382 cp->packstack[0] = 255; 369 cp->packstack[0] = 255;
383 lj_str_initbuf(&cp->sb); 370 lj_buf_init(cp->L, &cp->sb);
384 lj_str_resizebuf(cp->L, &cp->sb, LJ_MIN_SBUF);
385 lua_assert(cp->p != NULL); 371 lua_assert(cp->p != NULL);
386 cp_get(cp); /* Read-ahead first char. */ 372 cp_get(cp); /* Read-ahead first char. */
387 cp->tok = 0; 373 cp->tok = 0;
@@ -393,7 +379,7 @@ static void cp_init(CPState *cp)
393static void cp_cleanup(CPState *cp) 379static void cp_cleanup(CPState *cp)
394{ 380{
395 global_State *g = G(cp->L); 381 global_State *g = G(cp->L);
396 lj_str_freebuf(g, &cp->sb); 382 lj_buf_free(g, &cp->sb);
397} 383}
398 384
399/* Check and consume optional token. */ 385/* Check and consume optional token. */
@@ -1012,7 +998,7 @@ static void cp_decl_asm(CPState *cp, CPDecl *decl)
1012 if (cp->tok == CTOK_STRING) { 998 if (cp->tok == CTOK_STRING) {
1013 GCstr *str = cp->str; 999 GCstr *str = cp->str;
1014 while (cp_next(cp) == CTOK_STRING) { 1000 while (cp_next(cp) == CTOK_STRING) {
1015 lj_str_pushf(cp->L, "%s%s", strdata(str), strdata(cp->str)); 1001 lj_strfmt_pushf(cp->L, "%s%s", strdata(str), strdata(cp->str));
1016 cp->L->top--; 1002 cp->L->top--;
1017 str = strV(cp->L->top); 1003 str = strV(cp->L->top);
1018 } 1004 }
diff --git a/src/lj_crecord.c b/src/lj_crecord.c
index da9013f0..a1e2c504 100644
--- a/src/lj_crecord.c
+++ b/src/lj_crecord.c
@@ -11,13 +11,13 @@
11#if LJ_HASJIT && LJ_HASFFI 11#if LJ_HASJIT && LJ_HASFFI
12 12
13#include "lj_err.h" 13#include "lj_err.h"
14#include "lj_str.h"
15#include "lj_tab.h" 14#include "lj_tab.h"
16#include "lj_frame.h" 15#include "lj_frame.h"
17#include "lj_ctype.h" 16#include "lj_ctype.h"
18#include "lj_cdata.h" 17#include "lj_cdata.h"
19#include "lj_cparse.h" 18#include "lj_cparse.h"
20#include "lj_cconv.h" 19#include "lj_cconv.h"
20#include "lj_carith.h"
21#include "lj_clib.h" 21#include "lj_clib.h"
22#include "lj_ccall.h" 22#include "lj_ccall.h"
23#include "lj_ff.h" 23#include "lj_ff.h"
@@ -31,6 +31,7 @@
31#include "lj_snap.h" 31#include "lj_snap.h"
32#include "lj_crecord.h" 32#include "lj_crecord.h"
33#include "lj_dispatch.h" 33#include "lj_dispatch.h"
34#include "lj_strfmt.h"
34 35
35/* Some local macros to save typing. Undef'd at the end. */ 36/* Some local macros to save typing. Undef'd at the end. */
36#define IR(ref) (&J->cur.ir[(ref)]) 37#define IR(ref) (&J->cur.ir[(ref)])
@@ -441,7 +442,7 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
441 /* fallthrough */ 442 /* fallthrough */
442 case CCX(I, F): 443 case CCX(I, F):
443 if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi; 444 if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi;
444 sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_TRUNC|IRCONV_ANY); 445 sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_ANY);
445 goto xstore; 446 goto xstore;
446 case CCX(I, P): 447 case CCX(I, P):
447 case CCX(I, A): 448 case CCX(I, A):
@@ -521,7 +522,7 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
521 if (st == IRT_CDATA) goto err_nyi; 522 if (st == IRT_CDATA) goto err_nyi;
522 /* The signed conversion is cheaper. x64 really has 47 bit pointers. */ 523 /* The signed conversion is cheaper. x64 really has 47 bit pointers. */
523 sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32, 524 sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32,
524 st, IRCONV_TRUNC|IRCONV_ANY); 525 st, IRCONV_ANY);
525 goto xstore; 526 goto xstore;
526 527
527 /* Destination is an array. */ 528 /* Destination is an array. */
@@ -640,12 +641,23 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval)
640 sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCstr))); 641 sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCstr)));
641 sid = CTID_A_CCHAR; 642 sid = CTID_A_CCHAR;
642 } 643 }
643 } else { /* NYI: tref_istab(sp), tref_islightud(sp). */ 644 } else if (tref_islightud(sp)) {
645#if LJ_64
646 sp = emitir(IRT(IR_BAND, IRT_P64), sp,
647 lj_ir_kint64(J, U64x(00007fff,ffffffff)));
648#endif
649 } else { /* NYI: tref_istab(sp). */
644 IRType t; 650 IRType t;
645 sid = argv2cdata(J, sp, sval)->ctypeid; 651 sid = argv2cdata(J, sp, sval)->ctypeid;
646 s = ctype_raw(cts, sid); 652 s = ctype_raw(cts, sid);
647 svisnz = cdataptr(cdataV(sval)); 653 svisnz = cdataptr(cdataV(sval));
648 t = crec_ct2irt(cts, s); 654 if (ctype_isfunc(s->info)) {
655 sid = lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|sid), CTSIZE_PTR);
656 s = ctype_get(cts, sid);
657 t = IRT_PTR;
658 } else {
659 t = crec_ct2irt(cts, s);
660 }
649 if (ctype_isptr(s->info)) { 661 if (ctype_isptr(s->info)) {
650 sp = emitir(IRT(IR_FLOAD, t), sp, IRFL_CDATA_PTR); 662 sp = emitir(IRT(IR_FLOAD, t), sp, IRFL_CDATA_PTR);
651 if (ctype_isref(s->info)) { 663 if (ctype_isref(s->info)) {
@@ -863,21 +875,17 @@ again:
863} 875}
864 876
865/* Record setting a finalizer. */ 877/* Record setting a finalizer. */
866static void crec_finalizer(jit_State *J, TRef trcd, cTValue *fin) 878static void crec_finalizer(jit_State *J, TRef trcd, TRef trfin, cTValue *fin)
867{ 879{
868 TRef trlo = lj_ir_call(J, IRCALL_lj_cdata_setfin, trcd); 880 if (tvisgcv(fin)) {
869 TRef trhi = emitir(IRT(IR_ADD, IRT_P32), trlo, lj_ir_kint(J, 4)); 881 if (!trfin) trfin = lj_ir_kptr(J, gcval(fin));
870 if (LJ_BE) { TRef tmp = trlo; trlo = trhi; trhi = tmp; } 882 } else if (tvisnil(fin)) {
871 if (tvisfunc(fin)) { 883 trfin = lj_ir_kptr(J, NULL);
872 emitir(IRT(IR_XSTORE, IRT_P32), trlo, lj_ir_kfunc(J, funcV(fin)));
873 emitir(IRTI(IR_XSTORE), trhi, lj_ir_kint(J, LJ_TFUNC));
874 } else if (tviscdata(fin)) {
875 emitir(IRT(IR_XSTORE, IRT_P32), trlo,
876 lj_ir_kgc(J, obj2gco(cdataV(fin)), IRT_CDATA));
877 emitir(IRTI(IR_XSTORE), trhi, lj_ir_kint(J, LJ_TCDATA));
878 } else { 884 } else {
879 lj_trace_err(J, LJ_TRERR_BADTYPE); 885 lj_trace_err(J, LJ_TRERR_BADTYPE);
880 } 886 }
887 lj_ir_call(J, IRCALL_lj_cdata_setfin, trcd,
888 trfin, lj_ir_kint(J, (int32_t)itype(fin)));
881 J->needsnap = 1; 889 J->needsnap = 1;
882} 890}
883 891
@@ -888,10 +896,8 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
888 CTSize sz; 896 CTSize sz;
889 CTInfo info = lj_ctype_info(cts, id, &sz); 897 CTInfo info = lj_ctype_info(cts, id, &sz);
890 CType *d = ctype_raw(cts, id); 898 CType *d = ctype_raw(cts, id);
891 TRef trid; 899 TRef trcd, trid = lj_ir_kint(J, id);
892 if (!sz || sz > 128 || (info & CTF_VLA) || ctype_align(info) > CT_MEMALIGN) 900 cTValue *fin;
893 lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: large/special allocations. */
894 trid = lj_ir_kint(J, id);
895 /* Use special instruction to box pointer or 32/64 bit integer. */ 901 /* Use special instruction to box pointer or 32/64 bit integer. */
896 if (ctype_isptr(info) || (ctype_isinteger(info) && (sz == 4 || sz == 8))) { 902 if (ctype_isptr(info) || (ctype_isinteger(info) && (sz == 4 || sz == 8))) {
897 TRef sp = J->base[1] ? crec_ct_tv(J, d, 0, J->base[1], &rd->argv[1]) : 903 TRef sp = J->base[1] ? crec_ct_tv(J, d, 0, J->base[1], &rd->argv[1]) :
@@ -899,11 +905,36 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
899 sz == 4 ? lj_ir_kint(J, 0) : 905 sz == 4 ? lj_ir_kint(J, 0) :
900 (lj_needsplit(J), lj_ir_kint64(J, 0)); 906 (lj_needsplit(J), lj_ir_kint64(J, 0));
901 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, sp); 907 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, sp);
908 return;
902 } else { 909 } else {
903 TRef trcd = emitir(IRTG(IR_CNEW, IRT_CDATA), trid, TREF_NIL); 910 TRef trsz = TREF_NIL;
904 cTValue *fin; 911 if ((info & CTF_VLA)) { /* Calculate VLA/VLS size at runtime. */
905 J->base[0] = trcd; 912 CTSize sz0, sz1;
906 if (J->base[1] && !J->base[2] && 913 if (!J->base[1] || J->base[2])
914 lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init VLA/VLS. */
915 trsz = crec_ct_tv(J, ctype_get(cts, CTID_INT32), 0,
916 J->base[1], &rd->argv[1]);
917 sz0 = lj_ctype_vlsize(cts, d, 0);
918 sz1 = lj_ctype_vlsize(cts, d, 1);
919 trsz = emitir(IRTGI(IR_MULOV), trsz, lj_ir_kint(J, (int32_t)(sz1-sz0)));
920 trsz = emitir(IRTGI(IR_ADDOV), trsz, lj_ir_kint(J, (int32_t)sz0));
921 J->base[1] = 0; /* Simplify logic below. */
922 } else if (ctype_align(info) > CT_MEMALIGN) {
923 trsz = lj_ir_kint(J, sz);
924 }
925 trcd = emitir(IRTG(IR_CNEW, IRT_CDATA), trid, trsz);
926 if (sz > 128 || (info & CTF_VLA)) {
927 TRef dp;
928 CTSize align;
929 special: /* Only handle bulk zero-fill for large/VLA/VLS types. */
930 if (J->base[1])
931 lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init large/VLA/VLS types. */
932 dp = emitir(IRT(IR_ADD, IRT_PTR), trcd, lj_ir_kintp(J, sizeof(GCcdata)));
933 if (trsz == TREF_NIL) trsz = lj_ir_kint(J, sz);
934 align = ctype_align(info);
935 if (align < CT_MEMALIGN) align = CT_MEMALIGN;
936 crec_fill(J, dp, trsz, lj_ir_kint(J, 0), (1u << align));
937 } else if (J->base[1] && !J->base[2] &&
907 !lj_cconv_multi_init(cts, d, &rd->argv[1])) { 938 !lj_cconv_multi_init(cts, d, &rd->argv[1])) {
908 goto single_init; 939 goto single_init;
909 } else if (ctype_isarray(d->info)) { 940 } else if (ctype_isarray(d->info)) {
@@ -914,8 +945,9 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
914 TValue *sval = &tv; 945 TValue *sval = &tv;
915 MSize i; 946 MSize i;
916 tv.u64 = 0; 947 tv.u64 = 0;
917 if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info))) 948 if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info)) ||
918 lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init array of aggregates. */ 949 esize * CREC_FILL_MAXUNROLL < sz)
950 goto special;
919 for (i = 1, ofs = 0; ofs < sz; ofs += esize) { 951 for (i = 1, ofs = 0; ofs < sz; ofs += esize) {
920 TRef dp = emitir(IRT(IR_ADD, IRT_PTR), trcd, 952 TRef dp = emitir(IRT(IR_ADD, IRT_PTR), trcd,
921 lj_ir_kintp(J, ofs + sizeof(GCcdata))); 953 lj_ir_kintp(J, ofs + sizeof(GCcdata)));
@@ -972,11 +1004,12 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
972 crec_ct_tv(J, d, dp, lj_ir_kint(J, 0), &tv); 1004 crec_ct_tv(J, d, dp, lj_ir_kint(J, 0), &tv);
973 } 1005 }
974 } 1006 }
975 /* Handle __gc metamethod. */
976 fin = lj_ctype_meta(cts, id, MM_gc);
977 if (fin)
978 crec_finalizer(J, trcd, fin);
979 } 1007 }
1008 J->base[0] = trcd;
1009 /* Handle __gc metamethod. */
1010 fin = lj_ctype_meta(cts, id, MM_gc);
1011 if (fin)
1012 crec_finalizer(J, trcd, 0, fin);
980} 1013}
981 1014
982/* Record argument conversions. */ 1015/* Record argument conversions. */
@@ -1229,7 +1262,7 @@ static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm)
1229 for (i = 0; i < 2; i++) { 1262 for (i = 0; i < 2; i++) {
1230 IRType st = tref_type(sp[i]); 1263 IRType st = tref_type(sp[i]);
1231 if (st == IRT_NUM || st == IRT_FLOAT) 1264 if (st == IRT_NUM || st == IRT_FLOAT)
1232 sp[i] = emitconv(sp[i], dt, st, IRCONV_TRUNC|IRCONV_ANY); 1265 sp[i] = emitconv(sp[i], dt, st, IRCONV_ANY);
1233 else if (!(st == IRT_I64 || st == IRT_U64)) 1266 else if (!(st == IRT_I64 || st == IRT_U64))
1234 sp[i] = emitconv(sp[i], dt, IRT_INT, 1267 sp[i] = emitconv(sp[i], dt, IRT_INT,
1235 (s[i]->info & CTF_UNSIGNED) ? 0 : IRCONV_SEXT); 1268 (s[i]->info & CTF_UNSIGNED) ? 0 : IRCONV_SEXT);
@@ -1297,15 +1330,14 @@ static TRef crec_arith_ptr(jit_State *J, TRef *sp, CType **s, MMS mm)
1297 CTypeID id; 1330 CTypeID id;
1298#if LJ_64 1331#if LJ_64
1299 if (t == IRT_NUM || t == IRT_FLOAT) 1332 if (t == IRT_NUM || t == IRT_FLOAT)
1300 tr = emitconv(tr, IRT_INTP, t, IRCONV_TRUNC|IRCONV_ANY); 1333 tr = emitconv(tr, IRT_INTP, t, IRCONV_ANY);
1301 else if (!(t == IRT_I64 || t == IRT_U64)) 1334 else if (!(t == IRT_I64 || t == IRT_U64))
1302 tr = emitconv(tr, IRT_INTP, IRT_INT, 1335 tr = emitconv(tr, IRT_INTP, IRT_INT,
1303 ((t - IRT_I8) & 1) ? 0 : IRCONV_SEXT); 1336 ((t - IRT_I8) & 1) ? 0 : IRCONV_SEXT);
1304#else 1337#else
1305 if (!tref_typerange(sp[1], IRT_I8, IRT_U32)) { 1338 if (!tref_typerange(sp[1], IRT_I8, IRT_U32)) {
1306 tr = emitconv(tr, IRT_INTP, t, 1339 tr = emitconv(tr, IRT_INTP, t,
1307 (t == IRT_NUM || t == IRT_FLOAT) ? 1340 (t == IRT_NUM || t == IRT_FLOAT) ? IRCONV_ANY : 0);
1308 IRCONV_TRUNC|IRCONV_ANY : 0);
1309 } 1341 }
1310#endif 1342#endif
1311 tr = emitir(IRT(IR_MUL, IRT_INTP), tr, lj_ir_kintp(J, sz)); 1343 tr = emitir(IRT(IR_MUL, IRT_INTP), tr, lj_ir_kintp(J, sz));
@@ -1624,7 +1656,139 @@ void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd)
1624void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd) 1656void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd)
1625{ 1657{
1626 argv2cdata(J, J->base[0], &rd->argv[0]); 1658 argv2cdata(J, J->base[0], &rd->argv[0]);
1627 crec_finalizer(J, J->base[0], &rd->argv[1]); 1659 if (!J->base[1])
1660 lj_trace_err(J, LJ_TRERR_BADTYPE);
1661 crec_finalizer(J, J->base[0], J->base[1], &rd->argv[1]);
1662}
1663
1664/* -- 64 bit bit.* library functions -------------------------------------- */
1665
1666/* Determine bit operation type from argument type. */
1667static CTypeID crec_bit64_type(CTState *cts, cTValue *tv)
1668{
1669 if (tviscdata(tv)) {
1670 CType *ct = lj_ctype_rawref(cts, cdataV(tv)->ctypeid);
1671 if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct);
1672 if ((ct->info & (CTMASK_NUM|CTF_BOOL|CTF_FP|CTF_UNSIGNED)) ==
1673 CTINFO(CT_NUM, CTF_UNSIGNED) && ct->size == 8)
1674 return CTID_UINT64; /* Use uint64_t, since it has the highest rank. */
1675 return CTID_INT64; /* Otherwise use int64_t. */
1676 }
1677 return 0; /* Use regular 32 bit ops. */
1678}
1679
1680void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd)
1681{
1682 CTState *cts = ctype_ctsG(J2G(J));
1683 TRef tr = crec_ct_tv(J, ctype_get(cts, CTID_INT64), 0,
1684 J->base[0], &rd->argv[0]);
1685 if (!tref_isinteger(tr))
1686 tr = emitconv(tr, IRT_INT, tref_type(tr), 0);
1687 J->base[0] = tr;
1688}
1689
1690int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd)
1691{
1692 CTState *cts = ctype_ctsG(J2G(J));
1693 CTypeID id = crec_bit64_type(cts, &rd->argv[0]);
1694 if (id) {
1695 TRef tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]);
1696 tr = emitir(IRT(rd->data, id-CTID_INT64+IRT_I64), tr, 0);
1697 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
1698 return 1;
1699 }
1700 return 0;
1701}
1702
1703int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd)
1704{
1705 CTState *cts = ctype_ctsG(J2G(J));
1706 CTypeID id = 0;
1707 MSize i;
1708 for (i = 0; J->base[i] != 0; i++) {
1709 CTypeID aid = crec_bit64_type(cts, &rd->argv[i]);
1710 if (id < aid) id = aid; /* Determine highest type rank of all arguments. */
1711 }
1712 if (id) {
1713 CType *ct = ctype_get(cts, id);
1714 uint32_t ot = IRT(rd->data, id-CTID_INT64+IRT_I64);
1715 TRef tr = crec_ct_tv(J, ct, 0, J->base[0], &rd->argv[0]);
1716 for (i = 1; J->base[i] != 0; i++) {
1717 TRef tr2 = crec_ct_tv(J, ct, 0, J->base[i], &rd->argv[i]);
1718 tr = emitir(ot, tr, tr2);
1719 }
1720 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
1721 return 1;
1722 }
1723 return 0;
1724}
1725
1726int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd)
1727{
1728 CTState *cts = ctype_ctsG(J2G(J));
1729 CTypeID id;
1730 TRef tsh = 0;
1731 if (J->base[0] && tref_iscdata(J->base[1])) {
1732 tsh = crec_ct_tv(J, ctype_get(cts, CTID_INT64), 0,
1733 J->base[1], &rd->argv[1]);
1734 if (!tref_isinteger(tsh))
1735 tsh = emitconv(tsh, IRT_INT, tref_type(tsh), 0);
1736 J->base[1] = tsh;
1737 }
1738 id = crec_bit64_type(cts, &rd->argv[0]);
1739 if (id) {
1740 TRef tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]);
1741 uint32_t op = rd->data;
1742 if (!tsh) tsh = lj_opt_narrow_tobit(J, J->base[1]);
1743 if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
1744 !tref_isk(tsh))
1745 tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 63));
1746#ifdef LJ_TARGET_UNIFYROT
1747 if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) {
1748 op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR;
1749 tsh = emitir(IRTI(IR_NEG), tsh, tsh);
1750 }
1751#endif
1752 tr = emitir(IRT(op, id-CTID_INT64+IRT_I64), tr, tsh);
1753 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
1754 return 1;
1755 }
1756 return 0;
1757}
1758
1759TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr)
1760{
1761 CTState *cts = ctype_ctsG(J2G(J));
1762 CTypeID id = crec_bit64_type(cts, &rd->argv[0]);
1763 TRef tr, trsf = J->base[1];
1764 SFormat sf = (STRFMT_UINT|STRFMT_T_HEX);
1765 int32_t n;
1766 if (trsf) {
1767 CTypeID id2 = 0;
1768 n = (int32_t)lj_carith_check64(J->L, 2, &id2);
1769 if (id2)
1770 trsf = crec_ct_tv(J, ctype_get(cts, CTID_INT32), 0, trsf, &rd->argv[1]);
1771 else
1772 trsf = lj_opt_narrow_tobit(J, trsf);
1773 emitir(IRTGI(IR_EQ), trsf, lj_ir_kint(J, n)); /* Specialize to n. */
1774 } else {
1775 n = id ? 16 : 8;
1776 }
1777 if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; }
1778 sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC);
1779 if (id) {
1780 tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]);
1781 if (n < 16)
1782 tr = emitir(IRT(IR_BAND, IRT_U64), tr,
1783 lj_ir_kint64(J, ((uint64_t)1 << 4*n)-1));
1784 } else {
1785 tr = lj_opt_narrow_tobit(J, J->base[0]);
1786 if (n < 8)
1787 tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << 4*n)-1)));
1788 tr = emitconv(tr, IRT_U64, IRT_INT, 0); /* No sign-extension. */
1789 lj_needsplit(J);
1790 }
1791 return lj_ir_call(J, IRCALL_lj_strfmt_putfxint, hdr, lj_ir_kint(J, sf), tr);
1628} 1792}
1629 1793
1630/* -- Miscellaneous library functions ------------------------------------- */ 1794/* -- Miscellaneous library functions ------------------------------------- */
diff --git a/src/lj_crecord.h b/src/lj_crecord.h
index fb77ca60..765b8109 100644
--- a/src/lj_crecord.h
+++ b/src/lj_crecord.h
@@ -25,6 +25,13 @@ LJ_FUNC void LJ_FASTCALL recff_ffi_istype(jit_State *J, RecordFFData *rd);
25LJ_FUNC void LJ_FASTCALL recff_ffi_abi(jit_State *J, RecordFFData *rd); 25LJ_FUNC void LJ_FASTCALL recff_ffi_abi(jit_State *J, RecordFFData *rd);
26LJ_FUNC void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd); 26LJ_FUNC void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd);
27LJ_FUNC void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd); 27LJ_FUNC void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd);
28
29LJ_FUNC void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd);
30LJ_FUNC int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd);
31LJ_FUNC int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd);
32LJ_FUNC int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd);
33LJ_FUNC TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr);
34
28LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd); 35LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd);
29#endif 36#endif
30 37
diff --git a/src/lj_ctype.c b/src/lj_ctype.c
index e9fe0943..1814a465 100644
--- a/src/lj_ctype.c
+++ b/src/lj_ctype.c
@@ -11,6 +11,7 @@
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_str.h" 12#include "lj_str.h"
13#include "lj_tab.h" 13#include "lj_tab.h"
14#include "lj_strfmt.h"
14#include "lj_ctype.h" 15#include "lj_ctype.h"
15#include "lj_ccallback.h" 16#include "lj_ccallback.h"
16 17
@@ -568,19 +569,19 @@ GCstr *lj_ctype_repr_int64(lua_State *L, uint64_t n, int isunsigned)
568/* Convert complex to string with 'i' or 'I' suffix. */ 569/* Convert complex to string with 'i' or 'I' suffix. */
569GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size) 570GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size)
570{ 571{
571 char buf[2*LJ_STR_NUMBUF+2+1]; 572 char buf[2*STRFMT_MAXBUF_NUM+2+1], *p = buf;
572 TValue re, im; 573 TValue re, im;
573 size_t len;
574 if (size == 2*sizeof(double)) { 574 if (size == 2*sizeof(double)) {
575 re.n = *(double *)sp; im.n = ((double *)sp)[1]; 575 re.n = *(double *)sp; im.n = ((double *)sp)[1];
576 } else { 576 } else {
577 re.n = (double)*(float *)sp; im.n = (double)((float *)sp)[1]; 577 re.n = (double)*(float *)sp; im.n = (double)((float *)sp)[1];
578 } 578 }
579 len = lj_str_bufnum(buf, &re); 579 p = lj_strfmt_wnum(p, &re);
580 if (!(im.u32.hi & 0x80000000u) || im.n != im.n) buf[len++] = '+'; 580 if (!(im.u32.hi & 0x80000000u) || im.n != im.n) *p++ = '+';
581 len += lj_str_bufnum(buf+len, &im); 581 p = lj_strfmt_wnum(p, &im);
582 buf[len] = buf[len-1] >= 'a' ? 'I' : 'i'; 582 *p = *(p-1) >= 'a' ? 'I' : 'i';
583 return lj_str_new(L, buf, len+1); 583 p++;
584 return lj_str_new(L, buf, p-buf);
584} 585}
585 586
586/* -- C type state -------------------------------------------------------- */ 587/* -- C type state -------------------------------------------------------- */
diff --git a/src/lj_debug.c b/src/lj_debug.c
index 4653a4ec..bb77ad77 100644
--- a/src/lj_debug.c
+++ b/src/lj_debug.c
@@ -9,11 +9,12 @@
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_err.h" 10#include "lj_err.h"
11#include "lj_debug.h" 11#include "lj_debug.h"
12#include "lj_str.h" 12#include "lj_buf.h"
13#include "lj_tab.h" 13#include "lj_tab.h"
14#include "lj_state.h" 14#include "lj_state.h"
15#include "lj_frame.h" 15#include "lj_frame.h"
16#include "lj_bc.h" 16#include "lj_bc.h"
17#include "lj_strfmt.h"
17#if LJ_HASJIT 18#if LJ_HASJIT
18#include "lj_jit.h" 19#include "lj_jit.h"
19#endif 20#endif
@@ -27,7 +28,7 @@ cTValue *lj_debug_frame(lua_State *L, int level, int *size)
27 /* Traverse frames backwards. */ 28 /* Traverse frames backwards. */
28 for (nextframe = frame = L->base-1; frame > bot; ) { 29 for (nextframe = frame = L->base-1; frame > bot; ) {
29 if (frame_gc(frame) == obj2gco(L)) 30 if (frame_gc(frame) == obj2gco(L))
30 level++; /* Skip dummy frames. See lj_meta_call(). */ 31 level++; /* Skip dummy frames. See lj_err_optype_call(). */
31 if (level-- == 0) { 32 if (level-- == 0) {
32 *size = (int)(nextframe - frame); 33 *size = (int)(nextframe - frame);
33 return frame; /* Level found. */ 34 return frame; /* Level found. */
@@ -140,38 +141,25 @@ static BCLine debug_frameline(lua_State *L, GCfunc *fn, cTValue *nextframe)
140 141
141/* -- Variable names ------------------------------------------------------ */ 142/* -- Variable names ------------------------------------------------------ */
142 143
143/* Read ULEB128 value. */
144static uint32_t debug_read_uleb128(const uint8_t **pp)
145{
146 const uint8_t *p = *pp;
147 uint32_t v = *p++;
148 if (LJ_UNLIKELY(v >= 0x80)) {
149 int sh = 0;
150 v &= 0x7f;
151 do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
152 }
153 *pp = p;
154 return v;
155}
156
157/* Get name of a local variable from slot number and PC. */ 144/* Get name of a local variable from slot number and PC. */
158static const char *debug_varname(const GCproto *pt, BCPos pc, BCReg slot) 145static const char *debug_varname(const GCproto *pt, BCPos pc, BCReg slot)
159{ 146{
160 const uint8_t *p = proto_varinfo(pt); 147 const char *p = (const char *)proto_varinfo(pt);
161 if (p) { 148 if (p) {
162 BCPos lastpc = 0; 149 BCPos lastpc = 0;
163 for (;;) { 150 for (;;) {
164 const char *name = (const char *)p; 151 const char *name = p;
165 uint32_t vn = *p++; 152 uint32_t vn = *(const uint8_t *)p;
166 BCPos startpc, endpc; 153 BCPos startpc, endpc;
167 if (vn < VARNAME__MAX) { 154 if (vn < VARNAME__MAX) {
168 if (vn == VARNAME_END) break; /* End of varinfo. */ 155 if (vn == VARNAME_END) break; /* End of varinfo. */
169 } else { 156 } else {
170 while (*p++) ; /* Skip over variable name string. */ 157 do { p++; } while (*(const uint8_t *)p); /* Skip over variable name. */
171 } 158 }
172 lastpc = startpc = lastpc + debug_read_uleb128(&p); 159 p++;
160 lastpc = startpc = lastpc + lj_buf_ruleb128(&p);
173 if (startpc > pc) break; 161 if (startpc > pc) break;
174 endpc = startpc + debug_read_uleb128(&p); 162 endpc = startpc + lj_buf_ruleb128(&p);
175 if (pc < endpc && slot-- == 0) { 163 if (pc < endpc && slot-- == 0) {
176 if (vn < VARNAME__MAX) { 164 if (vn < VARNAME__MAX) {
177#define VARNAMESTR(name, str) str "\0" 165#define VARNAMESTR(name, str) str "\0"
@@ -297,9 +285,9 @@ restart:
297} 285}
298 286
299/* Deduce function name from caller of a frame. */ 287/* Deduce function name from caller of a frame. */
300const char *lj_debug_funcname(lua_State *L, TValue *frame, const char **name) 288const char *lj_debug_funcname(lua_State *L, cTValue *frame, const char **name)
301{ 289{
302 TValue *pframe; 290 cTValue *pframe;
303 GCfunc *fn; 291 GCfunc *fn;
304 BCPos pc; 292 BCPos pc;
305 if (frame <= tvref(L->stack)) 293 if (frame <= tvref(L->stack))
@@ -328,7 +316,7 @@ const char *lj_debug_funcname(lua_State *L, TValue *frame, const char **name)
328/* -- Source code locations ----------------------------------------------- */ 316/* -- Source code locations ----------------------------------------------- */
329 317
330/* Generate shortened source name. */ 318/* Generate shortened source name. */
331void lj_debug_shortname(char *out, GCstr *str) 319void lj_debug_shortname(char *out, GCstr *str, BCLine line)
332{ 320{
333 const char *src = strdata(str); 321 const char *src = strdata(str);
334 if (*src == '=') { 322 if (*src == '=') {
@@ -342,11 +330,11 @@ void lj_debug_shortname(char *out, GCstr *str)
342 *out++ = '.'; *out++ = '.'; *out++ = '.'; 330 *out++ = '.'; *out++ = '.'; *out++ = '.';
343 } 331 }
344 strcpy(out, src); 332 strcpy(out, src);
345 } else { /* Output [string "string"]. */ 333 } else { /* Output [string "string"] or [builtin:name]. */
346 size_t len; /* Length, up to first control char. */ 334 size_t len; /* Length, up to first control char. */
347 for (len = 0; len < LUA_IDSIZE-12; len++) 335 for (len = 0; len < LUA_IDSIZE-12; len++)
348 if (((const unsigned char *)src)[len] < ' ') break; 336 if (((const unsigned char *)src)[len] < ' ') break;
349 strcpy(out, "[string \""); out += 9; 337 strcpy(out, line == ~(BCLine)0 ? "[builtin:" : "[string \""); out += 9;
350 if (src[len] != '\0') { /* Must truncate? */ 338 if (src[len] != '\0') { /* Must truncate? */
351 if (len > LUA_IDSIZE-15) len = LUA_IDSIZE-15; 339 if (len > LUA_IDSIZE-15) len = LUA_IDSIZE-15;
352 strncpy(out, src, len); out += len; 340 strncpy(out, src, len); out += len;
@@ -354,7 +342,7 @@ void lj_debug_shortname(char *out, GCstr *str)
354 } else { 342 } else {
355 strcpy(out, src); out += len; 343 strcpy(out, src); out += len;
356 } 344 }
357 strcpy(out, "\"]"); 345 strcpy(out, line == ~(BCLine)0 ? "]" : "\"]");
358 } 346 }
359} 347}
360 348
@@ -367,14 +355,15 @@ void lj_debug_addloc(lua_State *L, const char *msg,
367 if (isluafunc(fn)) { 355 if (isluafunc(fn)) {
368 BCLine line = debug_frameline(L, fn, nextframe); 356 BCLine line = debug_frameline(L, fn, nextframe);
369 if (line >= 0) { 357 if (line >= 0) {
358 GCproto *pt = funcproto(fn);
370 char buf[LUA_IDSIZE]; 359 char buf[LUA_IDSIZE];
371 lj_debug_shortname(buf, proto_chunkname(funcproto(fn))); 360 lj_debug_shortname(buf, proto_chunkname(pt), pt->firstline);
372 lj_str_pushf(L, "%s:%d: %s", buf, line, msg); 361 lj_strfmt_pushf(L, "%s:%d: %s", buf, line, msg);
373 return; 362 return;
374 } 363 }
375 } 364 }
376 } 365 }
377 lj_str_pushf(L, "%s", msg); 366 lj_strfmt_pushf(L, "%s", msg);
378} 367}
379 368
380/* Push location string for a bytecode position to Lua stack. */ 369/* Push location string for a bytecode position to Lua stack. */
@@ -384,20 +373,22 @@ void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc)
384 const char *s = strdata(name); 373 const char *s = strdata(name);
385 MSize i, len = name->len; 374 MSize i, len = name->len;
386 BCLine line = lj_debug_line(pt, pc); 375 BCLine line = lj_debug_line(pt, pc);
387 if (*s == '@') { 376 if (pt->firstline == ~(BCLine)0) {
377 lj_strfmt_pushf(L, "builtin:%s", s);
378 } else if (*s == '@') {
388 s++; len--; 379 s++; len--;
389 for (i = len; i > 0; i--) 380 for (i = len; i > 0; i--)
390 if (s[i] == '/' || s[i] == '\\') { 381 if (s[i] == '/' || s[i] == '\\') {
391 s += i+1; 382 s += i+1;
392 break; 383 break;
393 } 384 }
394 lj_str_pushf(L, "%s:%d", s, line); 385 lj_strfmt_pushf(L, "%s:%d", s, line);
395 } else if (len > 40) { 386 } else if (len > 40) {
396 lj_str_pushf(L, "%p:%d", pt, line); 387 lj_strfmt_pushf(L, "%p:%d", pt, line);
397 } else if (*s == '=') { 388 } else if (*s == '=') {
398 lj_str_pushf(L, "%s:%d", s+1, line); 389 lj_strfmt_pushf(L, "%s:%d", s+1, line);
399 } else { 390 } else {
400 lj_str_pushf(L, "\"%s\":%d", s, line); 391 lj_strfmt_pushf(L, "\"%s\":%d", s, line);
401 } 392 }
402} 393}
403 394
@@ -460,7 +451,7 @@ int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, int ext)
460 BCLine firstline = pt->firstline; 451 BCLine firstline = pt->firstline;
461 GCstr *name = proto_chunkname(pt); 452 GCstr *name = proto_chunkname(pt);
462 ar->source = strdata(name); 453 ar->source = strdata(name);
463 lj_debug_shortname(ar->short_src, name); 454 lj_debug_shortname(ar->short_src, name, pt->firstline);
464 ar->linedefined = (int)firstline; 455 ar->linedefined = (int)firstline;
465 ar->lastlinedefined = (int)(firstline + pt->numline); 456 ar->lastlinedefined = (int)(firstline + pt->numline);
466 ar->what = firstline ? "Lua" : "main"; 457 ar->what = firstline ? "Lua" : "main";
@@ -550,6 +541,111 @@ LUA_API int lua_getstack(lua_State *L, int level, lua_Debug *ar)
550 } 541 }
551} 542}
552 543
544#if LJ_HASPROFILE
545/* Put the chunkname into a buffer. */
546static int debug_putchunkname(SBuf *sb, GCproto *pt, int pathstrip)
547{
548 GCstr *name = proto_chunkname(pt);
549 const char *p = strdata(name);
550 if (pt->firstline == ~(BCLine)0) {
551 lj_buf_putmem(sb, "[builtin:", 9);
552 lj_buf_putstr(sb, name);
553 lj_buf_putb(sb, ']');
554 return 0;
555 }
556 if (*p == '=' || *p == '@') {
557 MSize len = name->len-1;
558 p++;
559 if (pathstrip) {
560 int i;
561 for (i = len-1; i >= 0; i--)
562 if (p[i] == '/' || p[i] == '\\') {
563 len -= i+1;
564 p = p+i+1;
565 break;
566 }
567 }
568 lj_buf_putmem(sb, p, len);
569 } else {
570 lj_buf_putmem(sb, "[string]", 8);
571 }
572 return 1;
573}
574
575/* Put a compact stack dump into a buffer. */
576void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt, int depth)
577{
578 int level = 0, dir = 1, pathstrip = 1;
579 MSize lastlen = 0;
580 if (depth < 0) { level = ~depth; depth = dir = -1; } /* Reverse frames. */
581 while (level != depth) { /* Loop through all frame. */
582 int size;
583 cTValue *frame = lj_debug_frame(L, level, &size);
584 if (frame) {
585 cTValue *nextframe = size ? frame+size : NULL;
586 GCfunc *fn = frame_func(frame);
587 const uint8_t *p = (const uint8_t *)fmt;
588 int c;
589 while ((c = *p++)) {
590 switch (c) {
591 case 'p': /* Preserve full path. */
592 pathstrip = 0;
593 break;
594 case 'F': case 'f': { /* Dump function name. */
595 const char *name;
596 const char *what = lj_debug_funcname(L, frame, &name);
597 if (what) {
598 if (c == 'F' && isluafunc(fn)) { /* Dump module:name for 'F'. */
599 GCproto *pt = funcproto(fn);
600 if (pt->firstline != ~(BCLine)0) { /* Not a bytecode builtin. */
601 debug_putchunkname(sb, pt, pathstrip);
602 lj_buf_putb(sb, ':');
603 }
604 }
605 lj_buf_putmem(sb, name, (MSize)strlen(name));
606 break;
607 } /* else: can't derive a name, dump module:line. */
608 }
609 /* fallthrough */
610 case 'l': /* Dump module:line. */
611 if (isluafunc(fn)) {
612 GCproto *pt = funcproto(fn);
613 if (debug_putchunkname(sb, pt, pathstrip)) {
614 /* Regular Lua function. */
615 BCLine line = c == 'l' ? debug_frameline(L, fn, nextframe) :
616 pt->firstline;
617 lj_buf_putb(sb, ':');
618 lj_strfmt_putint(sb, line >= 0 ? line : pt->firstline);
619 }
620 } else if (isffunc(fn)) { /* Dump numbered builtins. */
621 lj_buf_putmem(sb, "[builtin#", 9);
622 lj_strfmt_putint(sb, fn->c.ffid);
623 lj_buf_putb(sb, ']');
624 } else { /* Dump C function address. */
625 lj_buf_putb(sb, '@');
626 lj_strfmt_putptr(sb, fn->c.f);
627 }
628 break;
629 case 'Z': /* Zap trailing separator. */
630 lastlen = sbuflen(sb);
631 break;
632 default:
633 lj_buf_putb(sb, c);
634 break;
635 }
636 }
637 } else if (dir == 1) {
638 break;
639 } else {
640 level -= size; /* Reverse frame order: quickly skip missing level. */
641 }
642 level += dir;
643 }
644 if (lastlen)
645 setsbufP(sb, sbufB(sb) + lastlen); /* Zap trailing separator. */
646}
647#endif
648
553/* Number of frames for the leading and trailing part of a traceback. */ 649/* Number of frames for the leading and trailing part of a traceback. */
554#define TRACEBACK_LEVELS1 12 650#define TRACEBACK_LEVELS1 12
555#define TRACEBACK_LEVELS2 10 651#define TRACEBACK_LEVELS2 10
diff --git a/src/lj_debug.h b/src/lj_debug.h
index bec6b4f3..f6e52172 100644
--- a/src/lj_debug.h
+++ b/src/lj_debug.h
@@ -32,14 +32,18 @@ LJ_FUNC const char *lj_debug_uvname(GCproto *pt, uint32_t idx);
32LJ_FUNC const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp); 32LJ_FUNC const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp);
33LJ_FUNC const char *lj_debug_slotname(GCproto *pt, const BCIns *pc, 33LJ_FUNC const char *lj_debug_slotname(GCproto *pt, const BCIns *pc,
34 BCReg slot, const char **name); 34 BCReg slot, const char **name);
35LJ_FUNC const char *lj_debug_funcname(lua_State *L, TValue *frame, 35LJ_FUNC const char *lj_debug_funcname(lua_State *L, cTValue *frame,
36 const char **name); 36 const char **name);
37LJ_FUNC void lj_debug_shortname(char *out, GCstr *str); 37LJ_FUNC void lj_debug_shortname(char *out, GCstr *str, BCLine line);
38LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg, 38LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg,
39 cTValue *frame, cTValue *nextframe); 39 cTValue *frame, cTValue *nextframe);
40LJ_FUNC void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc); 40LJ_FUNC void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc);
41LJ_FUNC int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, 41LJ_FUNC int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar,
42 int ext); 42 int ext);
43#if LJ_HASPROFILE
44LJ_FUNC void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt,
45 int depth);
46#endif
43 47
44/* Fixed internal variable names. */ 48/* Fixed internal variable names. */
45#define VARNAMEDEF(_) \ 49#define VARNAMEDEF(_) \
diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c
index 58d6c3e2..ea196ef2 100644
--- a/src/lj_dispatch.c
+++ b/src/lj_dispatch.c
@@ -8,6 +8,7 @@
8 8
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_err.h" 10#include "lj_err.h"
11#include "lj_buf.h"
11#include "lj_func.h" 12#include "lj_func.h"
12#include "lj_str.h" 13#include "lj_str.h"
13#include "lj_tab.h" 14#include "lj_tab.h"
@@ -17,6 +18,7 @@
17#include "lj_frame.h" 18#include "lj_frame.h"
18#include "lj_bc.h" 19#include "lj_bc.h"
19#include "lj_ff.h" 20#include "lj_ff.h"
21#include "lj_strfmt.h"
20#if LJ_HASJIT 22#if LJ_HASJIT
21#include "lj_jit.h" 23#include "lj_jit.h"
22#endif 24#endif
@@ -25,6 +27,9 @@
25#endif 27#endif
26#include "lj_trace.h" 28#include "lj_trace.h"
27#include "lj_dispatch.h" 29#include "lj_dispatch.h"
30#if LJ_HASPROFILE
31#include "lj_profile.h"
32#endif
28#include "lj_vm.h" 33#include "lj_vm.h"
29#include "luajit.h" 34#include "luajit.h"
30 35
@@ -37,6 +42,12 @@ LJ_STATIC_ASSERT(GG_NUM_ASMFF == FF_NUM_ASMFUNC);
37#include <math.h> 42#include <math.h>
38LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L, 43LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L,
39 lua_State *co); 44 lua_State *co);
45#if !LJ_HASJIT
46#define lj_dispatch_stitch lj_dispatch_ins
47#endif
48#if !LJ_HASPROFILE
49#define lj_dispatch_profile lj_dispatch_ins
50#endif
40 51
41#define GOTFUNC(name) (ASMFunction)name, 52#define GOTFUNC(name) (ASMFunction)name,
42static const ASMFunction dispatch_got[] = { 53static const ASMFunction dispatch_got[] = {
@@ -82,11 +93,12 @@ void lj_dispatch_init_hotcount(global_State *g)
82#endif 93#endif
83 94
84/* Internal dispatch mode bits. */ 95/* Internal dispatch mode bits. */
85#define DISPMODE_JIT 0x01 /* JIT compiler on. */ 96#define DISPMODE_CALL 0x01 /* Override call dispatch. */
86#define DISPMODE_REC 0x02 /* Recording active. */ 97#define DISPMODE_RET 0x02 /* Override return dispatch. */
87#define DISPMODE_INS 0x04 /* Override instruction dispatch. */ 98#define DISPMODE_INS 0x04 /* Override instruction dispatch. */
88#define DISPMODE_CALL 0x08 /* Override call dispatch. */ 99#define DISPMODE_JIT 0x10 /* JIT compiler on. */
89#define DISPMODE_RET 0x10 /* Override return dispatch. */ 100#define DISPMODE_REC 0x20 /* Recording active. */
101#define DISPMODE_PROF 0x40 /* Profiling active. */
90 102
91/* Update dispatch table depending on various flags. */ 103/* Update dispatch table depending on various flags. */
92void lj_dispatch_update(global_State *g) 104void lj_dispatch_update(global_State *g)
@@ -98,6 +110,9 @@ void lj_dispatch_update(global_State *g)
98 mode |= G2J(g)->state != LJ_TRACE_IDLE ? 110 mode |= G2J(g)->state != LJ_TRACE_IDLE ?
99 (DISPMODE_REC|DISPMODE_INS|DISPMODE_CALL) : 0; 111 (DISPMODE_REC|DISPMODE_INS|DISPMODE_CALL) : 0;
100#endif 112#endif
113#if LJ_HASPROFILE
114 mode |= (g->hookmask & HOOK_PROFILE) ? (DISPMODE_PROF|DISPMODE_INS) : 0;
115#endif
101 mode |= (g->hookmask & (LUA_MASKLINE|LUA_MASKCOUNT)) ? DISPMODE_INS : 0; 116 mode |= (g->hookmask & (LUA_MASKLINE|LUA_MASKCOUNT)) ? DISPMODE_INS : 0;
102 mode |= (g->hookmask & LUA_MASKCALL) ? DISPMODE_CALL : 0; 117 mode |= (g->hookmask & LUA_MASKCALL) ? DISPMODE_CALL : 0;
103 mode |= (g->hookmask & LUA_MASKRET) ? DISPMODE_RET : 0; 118 mode |= (g->hookmask & LUA_MASKRET) ? DISPMODE_RET : 0;
@@ -126,9 +141,9 @@ void lj_dispatch_update(global_State *g)
126 disp[GG_LEN_DDISP+BC_LOOP] = f_loop; 141 disp[GG_LEN_DDISP+BC_LOOP] = f_loop;
127 142
128 /* Set dynamic instruction dispatch. */ 143 /* Set dynamic instruction dispatch. */
129 if ((oldmode ^ mode) & (DISPMODE_REC|DISPMODE_INS)) { 144 if ((oldmode ^ mode) & (DISPMODE_PROF|DISPMODE_REC|DISPMODE_INS)) {
130 /* Need to update the whole table. */ 145 /* Need to update the whole table. */
131 if (!(mode & (DISPMODE_REC|DISPMODE_INS))) { /* No ins dispatch? */ 146 if (!(mode & DISPMODE_INS)) { /* No ins dispatch? */
132 /* Copy static dispatch table to dynamic dispatch table. */ 147 /* Copy static dispatch table to dynamic dispatch table. */
133 memcpy(&disp[0], &disp[GG_LEN_DDISP], GG_LEN_SDISP*sizeof(ASMFunction)); 148 memcpy(&disp[0], &disp[GG_LEN_DDISP], GG_LEN_SDISP*sizeof(ASMFunction));
134 /* Overwrite with dynamic return dispatch. */ 149 /* Overwrite with dynamic return dispatch. */
@@ -140,12 +155,13 @@ void lj_dispatch_update(global_State *g)
140 } 155 }
141 } else { 156 } else {
142 /* The recording dispatch also checks for hooks. */ 157 /* The recording dispatch also checks for hooks. */
143 ASMFunction f = (mode & DISPMODE_REC) ? lj_vm_record : lj_vm_inshook; 158 ASMFunction f = (mode & DISPMODE_PROF) ? lj_vm_profhook :
159 (mode & DISPMODE_REC) ? lj_vm_record : lj_vm_inshook;
144 uint32_t i; 160 uint32_t i;
145 for (i = 0; i < GG_LEN_SDISP; i++) 161 for (i = 0; i < GG_LEN_SDISP; i++)
146 disp[i] = f; 162 disp[i] = f;
147 } 163 }
148 } else if (!(mode & (DISPMODE_REC|DISPMODE_INS))) { 164 } else if (!(mode & DISPMODE_INS)) {
149 /* Otherwise set dynamic counting ins. */ 165 /* Otherwise set dynamic counting ins. */
150 disp[BC_FORL] = f_forl; 166 disp[BC_FORL] = f_forl;
151 disp[BC_ITERL] = f_iterl; 167 disp[BC_ITERL] = f_iterl;
@@ -352,10 +368,19 @@ static void callhook(lua_State *L, int event, BCLine line)
352 /* Top frame, nextframe = NULL. */ 368 /* Top frame, nextframe = NULL. */
353 ar.i_ci = (int)((L->base-1) - tvref(L->stack)); 369 ar.i_ci = (int)((L->base-1) - tvref(L->stack));
354 lj_state_checkstack(L, 1+LUA_MINSTACK); 370 lj_state_checkstack(L, 1+LUA_MINSTACK);
371#if LJ_HASPROFILE && !LJ_PROFILE_SIGPROF
372 lj_profile_hook_enter(g);
373#else
355 hook_enter(g); 374 hook_enter(g);
375#endif
356 hookf(L, &ar); 376 hookf(L, &ar);
357 lua_assert(hook_active(g)); 377 lua_assert(hook_active(g));
378 setgcref(g->cur_L, obj2gco(L));
379#if LJ_HASPROFILE && !LJ_PROFILE_SIGPROF
380 lj_profile_hook_leave(g);
381#else
358 hook_leave(g); 382 hook_leave(g);
383#endif
359 } 384 }
360} 385}
361 386
@@ -492,3 +517,41 @@ out:
492 return makeasmfunc(lj_bc_ofs[op]); /* Return static dispatch target. */ 517 return makeasmfunc(lj_bc_ofs[op]); /* Return static dispatch target. */
493} 518}
494 519
520#if LJ_HASJIT
521/* Stitch a new trace. */
522void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc)
523{
524 ERRNO_SAVE
525 lua_State *L = J->L;
526 void *cf = cframe_raw(L->cframe);
527 const BCIns *oldpc = cframe_pc(cf);
528 setcframe_pc(cf, pc);
529 /* Before dispatch, have to bias PC by 1. */
530 L->top = L->base + cur_topslot(curr_proto(L), pc+1, cframe_multres_n(cf));
531 lj_trace_stitch(J, pc-1); /* Point to the CALL instruction. */
532 setcframe_pc(cf, oldpc);
533 ERRNO_RESTORE
534}
535#endif
536
537#if LJ_HASPROFILE
538/* Profile dispatch. */
539void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc)
540{
541 ERRNO_SAVE
542 GCfunc *fn = curr_func(L);
543 GCproto *pt = funcproto(fn);
544 void *cf = cframe_raw(L->cframe);
545 const BCIns *oldpc = cframe_pc(cf);
546 global_State *g;
547 setcframe_pc(cf, pc);
548 L->top = L->base + cur_topslot(pt, pc, cframe_multres_n(cf));
549 lj_profile_interpreter(L);
550 setcframe_pc(cf, oldpc);
551 g = G(L);
552 setgcref(g->cur_L, obj2gco(L));
553 setvmstate(g, INTERP);
554 ERRNO_RESTORE
555}
556#endif
557
diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h
index 5960e64a..c3f6d86b 100644
--- a/src/lj_dispatch.h
+++ b/src/lj_dispatch.h
@@ -29,15 +29,17 @@
29 _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \ 29 _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \
30 _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \ 30 _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \
31 _(pow) _(fmod) _(ldexp) \ 31 _(pow) _(fmod) _(ldexp) \
32 _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_err_throw) \ 32 _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_stitch) \
33 _(lj_dispatch_profile) _(lj_err_throw) \
33 _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \ 34 _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \
34 _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \ 35 _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \
35 _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \ 36 _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \
36 _(lj_meta_for) _(lj_meta_len) _(lj_meta_tget) _(lj_meta_tset) \ 37 _(lj_meta_for) _(lj_meta_istype) _(lj_meta_len) _(lj_meta_tget) \
37 _(lj_state_growstack) _(lj_str_fromnum) _(lj_str_fromnumber) _(lj_str_new) \ 38 _(lj_meta_tset) _(lj_state_growstack) _(lj_strfmt_num) \
38 _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) _(lj_tab_new) \ 39 _(lj_str_new) _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) \
39 _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \ 40 _(lj_tab_new) _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \
40 JITGOTDEF(_) FFIGOTDEF(_) 41 _(lj_tab_setinth) _(lj_buf_putstr_reverse) _(lj_buf_putstr_lower) \
42 _(lj_buf_putstr_upper) _(lj_buf_tostr) JITGOTDEF(_) FFIGOTDEF(_)
41 43
42enum { 44enum {
43#define GOTENUM(name) LJ_GOT_##name, 45#define GOTENUM(name) LJ_GOT_##name,
@@ -60,7 +62,7 @@ typedef uint16_t HotCount;
60#define HOTCOUNT_CALL 1 62#define HOTCOUNT_CALL 1
61 63
62/* This solves a circular dependency problem -- bump as needed. Sigh. */ 64/* This solves a circular dependency problem -- bump as needed. Sigh. */
63#define GG_NUM_ASMFF 62 65#define GG_NUM_ASMFF 57
64 66
65#define GG_LEN_DDISP (BC__MAX + GG_NUM_ASMFF) 67#define GG_LEN_DDISP (BC__MAX + GG_NUM_ASMFF)
66#define GG_LEN_SDISP BC_FUNCF 68#define GG_LEN_SDISP BC_FUNCF
@@ -109,7 +111,12 @@ LJ_FUNC void lj_dispatch_update(global_State *g);
109/* Instruction dispatch callback for hooks or when recording. */ 111/* Instruction dispatch callback for hooks or when recording. */
110LJ_FUNCA void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc); 112LJ_FUNCA void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc);
111LJ_FUNCA ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns*pc); 113LJ_FUNCA ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns*pc);
112LJ_FUNCA void LJ_FASTCALL lj_dispatch_return(lua_State *L, const BCIns *pc); 114#if LJ_HASJIT
115LJ_FUNCA void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc);
116#endif
117#if LJ_HASPROFILE
118LJ_FUNCA void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc);
119#endif
113 120
114#if LJ_HASFFI && !defined(_BUILDVM_H) 121#if LJ_HASFFI && !defined(_BUILDVM_H)
115/* Save/restore errno and GetLastError() around hooks, exits and recording. */ 122/* Save/restore errno and GetLastError() around hooks, exits and recording. */
diff --git a/src/lj_emit_arm.h b/src/lj_emit_arm.h
index 3bac3679..aa36e830 100644
--- a/src/lj_emit_arm.h
+++ b/src/lj_emit_arm.h
@@ -308,30 +308,30 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
308 emit_dm(as, ARMI_MOV, dst, src); 308 emit_dm(as, ARMI_MOV, dst, src);
309} 309}
310 310
311/* Generic load of register from stack slot. */ 311/* Generic load of register with base and (small) offset address. */
312static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 312static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
313{ 313{
314#if LJ_SOFTFP 314#if LJ_SOFTFP
315 lua_assert(!irt_isnum(ir->t)); UNUSED(ir); 315 lua_assert(!irt_isnum(ir->t)); UNUSED(ir);
316#else 316#else
317 if (r >= RID_MAX_GPR) 317 if (r >= RID_MAX_GPR)
318 emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, RID_SP, ofs); 318 emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, base, ofs);
319 else 319 else
320#endif 320#endif
321 emit_lso(as, ARMI_LDR, r, RID_SP, ofs); 321 emit_lso(as, ARMI_LDR, r, base, ofs);
322} 322}
323 323
324/* Generic store of register to stack slot. */ 324/* Generic store of register with base and (small) offset address. */
325static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 325static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
326{ 326{
327#if LJ_SOFTFP 327#if LJ_SOFTFP
328 lua_assert(!irt_isnum(ir->t)); UNUSED(ir); 328 lua_assert(!irt_isnum(ir->t)); UNUSED(ir);
329#else 329#else
330 if (r >= RID_MAX_GPR) 330 if (r >= RID_MAX_GPR)
331 emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, RID_SP, ofs); 331 emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, base, ofs);
332 else 332 else
333#endif 333#endif
334 emit_lso(as, ARMI_STR, r, RID_SP, ofs); 334 emit_lso(as, ARMI_STR, r, base, ofs);
335} 335}
336 336
337/* Emit an arithmetic/logic operation with a constant operand. */ 337/* Emit an arithmetic/logic operation with a constant operand. */
diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h
index dc9197ad..fa39e910 100644
--- a/src/lj_emit_mips.h
+++ b/src/lj_emit_mips.h
@@ -178,24 +178,24 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
178 emit_fg(as, irt_isnum(ir->t) ? MIPSI_MOV_D : MIPSI_MOV_S, dst, src); 178 emit_fg(as, irt_isnum(ir->t) ? MIPSI_MOV_D : MIPSI_MOV_S, dst, src);
179} 179}
180 180
181/* Generic load of register from stack slot. */ 181/* Generic load of register with base and (small) offset address. */
182static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 182static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
183{ 183{
184 if (r < RID_MAX_GPR) 184 if (r < RID_MAX_GPR)
185 emit_tsi(as, MIPSI_LW, r, RID_SP, ofs); 185 emit_tsi(as, MIPSI_LW, r, base, ofs);
186 else 186 else
187 emit_tsi(as, irt_isnum(ir->t) ? MIPSI_LDC1 : MIPSI_LWC1, 187 emit_tsi(as, irt_isnum(ir->t) ? MIPSI_LDC1 : MIPSI_LWC1,
188 (r & 31), RID_SP, ofs); 188 (r & 31), base, ofs);
189} 189}
190 190
191/* Generic store of register to stack slot. */ 191/* Generic store of register with base and (small) offset address. */
192static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 192static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
193{ 193{
194 if (r < RID_MAX_GPR) 194 if (r < RID_MAX_GPR)
195 emit_tsi(as, MIPSI_SW, r, RID_SP, ofs); 195 emit_tsi(as, MIPSI_SW, r, base, ofs);
196 else 196 else
197 emit_tsi(as, irt_isnum(ir->t) ? MIPSI_SDC1 : MIPSI_SWC1, 197 emit_tsi(as, irt_isnum(ir->t) ? MIPSI_SDC1 : MIPSI_SWC1,
198 (r&31), RID_SP, ofs); 198 (r&31), base, ofs);
199} 199}
200 200
201/* Add offset to pointer. */ 201/* Add offset to pointer. */
diff --git a/src/lj_emit_ppc.h b/src/lj_emit_ppc.h
index 3a2ae389..1e5aa653 100644
--- a/src/lj_emit_ppc.h
+++ b/src/lj_emit_ppc.h
@@ -186,22 +186,22 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
186 emit_fb(as, PPCI_FMR, dst, src); 186 emit_fb(as, PPCI_FMR, dst, src);
187} 187}
188 188
189/* Generic load of register from stack slot. */ 189/* Generic load of register with base and (small) offset address. */
190static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 190static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
191{ 191{
192 if (r < RID_MAX_GPR) 192 if (r < RID_MAX_GPR)
193 emit_tai(as, PPCI_LWZ, r, RID_SP, ofs); 193 emit_tai(as, PPCI_LWZ, r, base, ofs);
194 else 194 else
195 emit_fai(as, irt_isnum(ir->t) ? PPCI_LFD : PPCI_LFS, r, RID_SP, ofs); 195 emit_fai(as, irt_isnum(ir->t) ? PPCI_LFD : PPCI_LFS, r, base, ofs);
196} 196}
197 197
198/* Generic store of register to stack slot. */ 198/* Generic store of register with base and (small) offset address. */
199static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 199static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
200{ 200{
201 if (r < RID_MAX_GPR) 201 if (r < RID_MAX_GPR)
202 emit_tai(as, PPCI_STW, r, RID_SP, ofs); 202 emit_tai(as, PPCI_STW, r, base, ofs);
203 else 203 else
204 emit_fai(as, irt_isnum(ir->t) ? PPCI_STFD : PPCI_STFS, r, RID_SP, ofs); 204 emit_fai(as, irt_isnum(ir->t) ? PPCI_STFD : PPCI_STFS, r, base, ofs);
205} 205}
206 206
207/* Emit a compare (for equality) with a constant operand. */ 207/* Emit a compare (for equality) with a constant operand. */
diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h
index 8cfb654f..0c7fa148 100644
--- a/src/lj_emit_x86.h
+++ b/src/lj_emit_x86.h
@@ -241,10 +241,6 @@ static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i)
241 241
242/* -- Emit loads/stores --------------------------------------------------- */ 242/* -- Emit loads/stores --------------------------------------------------- */
243 243
244/* Instruction selection for XMM moves. */
245#define XMM_MOVRR(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVSD : XO_MOVAPS)
246#define XMM_MOVRM(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVLPD : XO_MOVSD)
247
248/* mov [base+ofs], i */ 244/* mov [base+ofs], i */
249static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) 245static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
250{ 246{
@@ -314,7 +310,7 @@ static void emit_loadn(ASMState *as, Reg r, cTValue *tv)
314 if (tvispzero(tv)) /* Use xor only for +0. */ 310 if (tvispzero(tv)) /* Use xor only for +0. */
315 emit_rr(as, XO_XORPS, r, r); 311 emit_rr(as, XO_XORPS, r, r);
316 else 312 else
317 emit_rma(as, XMM_MOVRM(as), r, &tv->n); 313 emit_rma(as, XO_MOVSD, r, &tv->n);
318} 314}
319 315
320/* -- Emit control-flow instructions -------------------------------------- */ 316/* -- Emit control-flow instructions -------------------------------------- */
@@ -427,25 +423,25 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
427 if (dst < RID_MAX_GPR) 423 if (dst < RID_MAX_GPR)
428 emit_rr(as, XO_MOV, REX_64IR(ir, dst), src); 424 emit_rr(as, XO_MOV, REX_64IR(ir, dst), src);
429 else 425 else
430 emit_rr(as, XMM_MOVRR(as), dst, src); 426 emit_rr(as, XO_MOVAPS, dst, src);
431} 427}
432 428
433/* Generic load of register from stack slot. */ 429/* Generic load of register with base and (small) offset address. */
434static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 430static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
435{ 431{
436 if (r < RID_MAX_GPR) 432 if (r < RID_MAX_GPR)
437 emit_rmro(as, XO_MOV, REX_64IR(ir, r), RID_ESP, ofs); 433 emit_rmro(as, XO_MOV, REX_64IR(ir, r), base, ofs);
438 else 434 else
439 emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, r, RID_ESP, ofs); 435 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, r, base, ofs);
440} 436}
441 437
442/* Generic store of register to stack slot. */ 438/* Generic store of register with base and (small) offset address. */
443static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 439static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
444{ 440{
445 if (r < RID_MAX_GPR) 441 if (r < RID_MAX_GPR)
446 emit_rmro(as, XO_MOVto, REX_64IR(ir, r), RID_ESP, ofs); 442 emit_rmro(as, XO_MOVto, REX_64IR(ir, r), base, ofs);
447 else 443 else
448 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, RID_ESP, ofs); 444 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, base, ofs);
449} 445}
450 446
451/* Add offset to pointer. */ 447/* Add offset to pointer. */
diff --git a/src/lj_err.c b/src/lj_err.c
index db182673..5aafbd7c 100644
--- a/src/lj_err.c
+++ b/src/lj_err.c
@@ -16,6 +16,7 @@
16#include "lj_ff.h" 16#include "lj_ff.h"
17#include "lj_trace.h" 17#include "lj_trace.h"
18#include "lj_vm.h" 18#include "lj_vm.h"
19#include "lj_strfmt.h"
19 20
20/* 21/*
21** LuaJIT can either use internal or external frame unwinding: 22** LuaJIT can either use internal or external frame unwinding:
@@ -98,8 +99,8 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
98 TValue *top = restorestack(L, -nres); 99 TValue *top = restorestack(L, -nres);
99 if (frame < top) { /* Frame reached? */ 100 if (frame < top) { /* Frame reached? */
100 if (errcode) { 101 if (errcode) {
101 L->cframe = cframe_prev(cf);
102 L->base = frame+1; 102 L->base = frame+1;
103 L->cframe = cframe_prev(cf);
103 unwindstack(L, top); 104 unwindstack(L, top);
104 } 105 }
105 return cf; 106 return cf;
@@ -118,8 +119,8 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
118#endif 119#endif
119#if LJ_UNWIND_EXT 120#if LJ_UNWIND_EXT
120 if (errcode) { 121 if (errcode) {
121 L->cframe = cframe_prev(cf);
122 L->base = frame_prevd(frame) + 1; 122 L->base = frame_prevd(frame) + 1;
123 L->cframe = cframe_prev(cf);
123 unwindstack(L, frame); 124 unwindstack(L, frame);
124 } else if (cf != stopcf) { 125 } else if (cf != stopcf) {
125 cf = cframe_prev(cf); 126 cf = cframe_prev(cf);
@@ -143,8 +144,8 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
143 return cf; 144 return cf;
144 } 145 }
145 if (errcode) { 146 if (errcode) {
146 L->cframe = cframe_prev(cf);
147 L->base = frame_prevd(frame) + 1; 147 L->base = frame_prevd(frame) + 1;
148 L->cframe = cframe_prev(cf);
148 unwindstack(L, frame); 149 unwindstack(L, frame);
149 } 150 }
150 return cf; 151 return cf;
@@ -165,8 +166,8 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
165 } 166 }
166 if (frame_typep(frame) == FRAME_PCALL) 167 if (frame_typep(frame) == FRAME_PCALL)
167 hook_leave(G(L)); 168 hook_leave(G(L));
168 L->cframe = cf;
169 L->base = frame_prevd(frame) + 1; 169 L->base = frame_prevd(frame) + 1;
170 L->cframe = cf;
170 unwindstack(L, L->base); 171 unwindstack(L, L->base);
171 } 172 }
172 return (void *)((intptr_t)cf | CFRAME_UNWIND_FF); 173 return (void *)((intptr_t)cf | CFRAME_UNWIND_FF);
@@ -174,8 +175,8 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
174 } 175 }
175 /* No C frame. */ 176 /* No C frame. */
176 if (errcode) { 177 if (errcode) {
177 L->cframe = NULL;
178 L->base = tvref(L->stack)+1; 178 L->base = tvref(L->stack)+1;
179 L->cframe = NULL;
179 unwindstack(L, L->base); 180 unwindstack(L, L->base);
180 if (G(L)->panic) 181 if (G(L)->panic)
181 G(L)->panic(L); 182 G(L)->panic(L);
@@ -452,7 +453,7 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode)
452{ 453{
453 global_State *g = G(L); 454 global_State *g = G(L);
454 lj_trace_abort(g); 455 lj_trace_abort(g);
455 setgcrefnull(g->jit_L); 456 setmref(g->jit_base, NULL);
456 L->status = 0; 457 L->status = 0;
457#if LJ_UNWIND_EXT 458#if LJ_UNWIND_EXT
458 err_raise_ext(errcode); 459 err_raise_ext(errcode);
@@ -573,7 +574,7 @@ LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...)
573 va_list argp; 574 va_list argp;
574 va_start(argp, em); 575 va_start(argp, em);
575 if (curr_funcisL(L)) L->top = curr_topL(L); 576 if (curr_funcisL(L)) L->top = curr_topL(L);
576 msg = lj_str_pushvf(L, err2msg(em), argp); 577 msg = lj_strfmt_pushvf(L, err2msg(em), argp);
577 va_end(argp); 578 va_end(argp);
578 lj_debug_addloc(L, msg, L->base-1, NULL); 579 lj_debug_addloc(L, msg, L->base-1, NULL);
579 lj_err_run(L); 580 lj_err_run(L);
@@ -591,11 +592,11 @@ LJ_NOINLINE void lj_err_lex(lua_State *L, GCstr *src, const char *tok,
591{ 592{
592 char buff[LUA_IDSIZE]; 593 char buff[LUA_IDSIZE];
593 const char *msg; 594 const char *msg;
594 lj_debug_shortname(buff, src); 595 lj_debug_shortname(buff, src, line);
595 msg = lj_str_pushvf(L, err2msg(em), argp); 596 msg = lj_strfmt_pushvf(L, err2msg(em), argp);
596 msg = lj_str_pushf(L, "%s:%d: %s", buff, line, msg); 597 msg = lj_strfmt_pushf(L, "%s:%d: %s", buff, line, msg);
597 if (tok) 598 if (tok)
598 lj_str_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tok); 599 lj_strfmt_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tok);
599 lj_err_throw(L, LUA_ERRSYNTAX); 600 lj_err_throw(L, LUA_ERRSYNTAX);
600} 601}
601 602
@@ -679,7 +680,7 @@ LJ_NOINLINE void lj_err_callerv(lua_State *L, ErrMsg em, ...)
679 const char *msg; 680 const char *msg;
680 va_list argp; 681 va_list argp;
681 va_start(argp, em); 682 va_start(argp, em);
682 msg = lj_str_pushvf(L, err2msg(em), argp); 683 msg = lj_strfmt_pushvf(L, err2msg(em), argp);
683 va_end(argp); 684 va_end(argp);
684 lj_err_callermsg(L, msg); 685 lj_err_callermsg(L, msg);
685} 686}
@@ -699,9 +700,9 @@ LJ_NORET LJ_NOINLINE static void err_argmsg(lua_State *L, int narg,
699 if (narg < 0 && narg > LUA_REGISTRYINDEX) 700 if (narg < 0 && narg > LUA_REGISTRYINDEX)
700 narg = (int)(L->top - L->base) + narg + 1; 701 narg = (int)(L->top - L->base) + narg + 1;
701 if (ftype && ftype[3] == 'h' && --narg == 0) /* Check for "method". */ 702 if (ftype && ftype[3] == 'h' && --narg == 0) /* Check for "method". */
702 msg = lj_str_pushf(L, err2msg(LJ_ERR_BADSELF), fname, msg); 703 msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADSELF), fname, msg);
703 else 704 else
704 msg = lj_str_pushf(L, err2msg(LJ_ERR_BADARG), narg, fname, msg); 705 msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADARG), narg, fname, msg);
705 lj_err_callermsg(L, msg); 706 lj_err_callermsg(L, msg);
706} 707}
707 708
@@ -711,7 +712,7 @@ LJ_NOINLINE void lj_err_argv(lua_State *L, int narg, ErrMsg em, ...)
711 const char *msg; 712 const char *msg;
712 va_list argp; 713 va_list argp;
713 va_start(argp, em); 714 va_start(argp, em);
714 msg = lj_str_pushvf(L, err2msg(em), argp); 715 msg = lj_strfmt_pushvf(L, err2msg(em), argp);
715 va_end(argp); 716 va_end(argp);
716 err_argmsg(L, narg, msg); 717 err_argmsg(L, narg, msg);
717} 718}
@@ -727,7 +728,7 @@ LJ_NOINLINE void lj_err_argtype(lua_State *L, int narg, const char *xname)
727{ 728{
728 TValue *o = narg < 0 ? L->top + narg : L->base + narg-1; 729 TValue *o = narg < 0 ? L->top + narg : L->base + narg-1;
729 const char *tname = o < L->top ? lj_typename(o) : lj_obj_typename[0]; 730 const char *tname = o < L->top ? lj_typename(o) : lj_obj_typename[0];
730 const char *msg = lj_str_pushf(L, err2msg(LJ_ERR_BADTYPE), xname, tname); 731 const char *msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADTYPE), xname, tname);
731 err_argmsg(L, narg, msg); 732 err_argmsg(L, narg, msg);
732} 733}
733 734
@@ -777,7 +778,7 @@ LUALIB_API int luaL_error(lua_State *L, const char *fmt, ...)
777 const char *msg; 778 const char *msg;
778 va_list argp; 779 va_list argp;
779 va_start(argp, fmt); 780 va_start(argp, fmt);
780 msg = lj_str_pushvf(L, fmt, argp); 781 msg = lj_strfmt_pushvf(L, fmt, argp);
781 va_end(argp); 782 va_end(argp);
782 lj_err_callermsg(L, msg); 783 lj_err_callermsg(L, msg);
783 return 0; /* unreachable */ 784 return 0; /* unreachable */
diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h
index 137831eb..ad9318c0 100644
--- a/src/lj_errmsg.h
+++ b/src/lj_errmsg.h
@@ -96,9 +96,7 @@ ERRDEF(STRPATX, "pattern too complex")
96ERRDEF(STRCAPI, "invalid capture index") 96ERRDEF(STRCAPI, "invalid capture index")
97ERRDEF(STRCAPN, "too many captures") 97ERRDEF(STRCAPN, "too many captures")
98ERRDEF(STRCAPU, "unfinished capture") 98ERRDEF(STRCAPU, "unfinished capture")
99ERRDEF(STRFMTO, "invalid option " LUA_QL("%%%c") " to " LUA_QL("format")) 99ERRDEF(STRFMT, "invalid option " LUA_QS " to " LUA_QL("format"))
100ERRDEF(STRFMTR, "invalid format (repeated flags)")
101ERRDEF(STRFMTW, "invalid format (width or precision too long)")
102ERRDEF(STRGSRV, "invalid replacement value (a %s)") 100ERRDEF(STRGSRV, "invalid replacement value (a %s)")
103ERRDEF(BADMODN, "name conflict for module " LUA_QS) 101ERRDEF(BADMODN, "name conflict for module " LUA_QS)
104#if LJ_HASJIT 102#if LJ_HASJIT
@@ -118,7 +116,6 @@ ERRDEF(JITOPT, "unknown or malformed optimization flag " LUA_QS)
118/* Lexer/parser errors. */ 116/* Lexer/parser errors. */
119ERRDEF(XMODE, "attempt to load chunk with wrong mode") 117ERRDEF(XMODE, "attempt to load chunk with wrong mode")
120ERRDEF(XNEAR, "%s near " LUA_QS) 118ERRDEF(XNEAR, "%s near " LUA_QS)
121ERRDEF(XELEM, "lexical element too long")
122ERRDEF(XLINES, "chunk has too many lines") 119ERRDEF(XLINES, "chunk has too many lines")
123ERRDEF(XLEVELS, "chunk has too many syntax levels") 120ERRDEF(XLEVELS, "chunk has too many syntax levels")
124ERRDEF(XNUMBER, "malformed number") 121ERRDEF(XNUMBER, "malformed number")
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
index 794bbd43..8470dd8c 100644
--- a/src/lj_ffrecord.c
+++ b/src/lj_ffrecord.c
@@ -27,6 +27,7 @@
27#include "lj_dispatch.h" 27#include "lj_dispatch.h"
28#include "lj_vm.h" 28#include "lj_vm.h"
29#include "lj_strscan.h" 29#include "lj_strscan.h"
30#include "lj_strfmt.h"
30 31
31/* Some local macros to save typing. Undef'd at the end. */ 32/* Some local macros to save typing. Undef'd at the end. */
32#define IR(ref) (&J->cur.ir[(ref)]) 33#define IR(ref) (&J->cur.ir[(ref)])
@@ -79,10 +80,7 @@ static GCstr *argv2str(jit_State *J, TValue *o)
79 GCstr *s; 80 GCstr *s;
80 if (!tvisnumber(o)) 81 if (!tvisnumber(o))
81 lj_trace_err(J, LJ_TRERR_BADTYPE); 82 lj_trace_err(J, LJ_TRERR_BADTYPE);
82 if (tvisint(o)) 83 s = lj_strfmt_number(J->L, o);
83 s = lj_str_fromint(J->L, intV(o));
84 else
85 s = lj_str_fromnum(J->L, &o->n);
86 setstrV(J->L, o, s); 84 setstrV(J->L, o, s);
87 return s; 85 return s;
88 } 86 }
@@ -98,27 +96,89 @@ static ptrdiff_t results_wanted(jit_State *J)
98 return -1; 96 return -1;
99} 97}
100 98
101/* Throw error for unsupported variant of fast function. */ 99/* Trace stitching: add continuation below frame to start a new trace. */
102LJ_NORET static void recff_nyiu(jit_State *J) 100static void recff_stitch(jit_State *J)
103{ 101{
104 setfuncV(J->L, &J->errinfo, J->fn); 102 ASMFunction cont = lj_cont_stitch;
105 lj_trace_err_info(J, LJ_TRERR_NYIFFU); 103 TraceNo traceno = J->cur.traceno;
104 lua_State *L = J->L;
105 TValue *base = L->base;
106 const BCIns *pc = frame_pc(base-1);
107 TValue *pframe = frame_prevl(base-1);
108 TRef trcont;
109
110 /* Move func + args up in Lua stack and insert continuation. */
111 memmove(&base[1], &base[-1], sizeof(TValue)*(J->maxslot+1));
112 setframe_ftsz(base+1, (int)((char *)(base+1) - (char *)pframe) + FRAME_CONT);
113 setcont(base, cont);
114 setframe_pc(base, pc);
115 if (LJ_DUALNUM) setintV(base-1, traceno); else base[-1].u64 = traceno;
116 L->base += 2;
117 L->top += 2;
118
119 /* Ditto for the IR. */
120 memmove(&J->base[1], &J->base[-1], sizeof(TRef)*(J->maxslot+1));
121#if LJ_64
122 trcont = lj_ir_kptr(J, (void *)((int64_t)cont-(int64_t)lj_vm_asm_begin));
123#else
124 trcont = lj_ir_kptr(J, (void *)cont);
125#endif
126 J->base[0] = trcont | TREF_CONT;
127 J->base[-1] = LJ_DUALNUM ? lj_ir_kint(J,traceno) : lj_ir_knum_u64(J,traceno);
128 J->base += 2;
129 J->baseslot += 2;
130 J->framedepth++;
131
132 lj_record_stop(J, LJ_TRLINK_STITCH, 0);
133
134 /* Undo Lua stack changes. */
135 memmove(&base[-1], &base[1], sizeof(TValue)*(J->maxslot+1));
136 setframe_pc(base-1, pc);
137 L->base -= 2;
138 L->top -= 2;
106} 139}
107 140
108/* Fallback handler for all fast functions that are not recorded (yet). */ 141/* Fallback handler for fast functions that are not recorded (yet). */
109static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd) 142static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd)
110{ 143{
111 setfuncV(J->L, &J->errinfo, J->fn); 144 if (J->cur.nins < (IRRef)J->param[JIT_P_minstitch] + REF_BASE) {
112 lj_trace_err_info(J, LJ_TRERR_NYIFF); 145 lj_trace_err_info(J, LJ_TRERR_TRACEUV);
113 UNUSED(rd); 146 } else {
147 /* Can only stitch from Lua call. */
148 if (J->framedepth && frame_islua(J->L->base-1)) {
149 BCOp op = bc_op(*frame_pc(J->L->base-1));
150 /* Stitched trace cannot start with *M op with variable # of args. */
151 if (!(op == BC_CALLM || op == BC_CALLMT ||
152 op == BC_RETM || op == BC_TSETM)) {
153 switch (J->fn->c.ffid) {
154 case FF_error:
155 case FF_debug_sethook:
156 case FF_jit_flush:
157 break; /* Don't stitch across special builtins. */
158 default:
159 recff_stitch(J); /* Use trace stitching. */
160 rd->nres = -1;
161 return;
162 }
163 }
164 }
165 /* Otherwise stop trace and return to interpreter. */
166 lj_record_stop(J, LJ_TRLINK_RETURN, 0);
167 rd->nres = -1;
168 }
114} 169}
115 170
116/* C functions can have arbitrary side-effects and are not recorded (yet). */ 171/* Fallback handler for unsupported variants of fast functions. */
117static void LJ_FASTCALL recff_c(jit_State *J, RecordFFData *rd) 172#define recff_nyiu recff_nyi
173
174/* Must stop the trace for classic C functions with arbitrary side-effects. */
175#define recff_c recff_nyi
176
177/* Emit BUFHDR for the global temporary buffer. */
178static TRef recff_bufhdr(jit_State *J)
118{ 179{
119 setfuncV(J->L, &J->errinfo, J->fn); 180 return emitir(IRT(IR_BUFHDR, IRT_P32),
120 lj_trace_err_info(J, LJ_TRERR_NYICF); 181 lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET);
121 UNUSED(rd);
122} 182}
123 183
124/* -- Base library fast functions ----------------------------------------- */ 184/* -- Base library fast functions ----------------------------------------- */
@@ -263,7 +323,8 @@ static void LJ_FASTCALL recff_select(jit_State *J, RecordFFData *rd)
263 J->base[i] = J->base[start+i]; 323 J->base[i] = J->base[start+i];
264 } /* else: Interpreter will throw. */ 324 } /* else: Interpreter will throw. */
265 } else { 325 } else {
266 recff_nyiu(J); 326 recff_nyiu(J, rd);
327 return;
267 } 328 }
268 } /* else: Interpreter will throw. */ 329 } /* else: Interpreter will throw. */
269} 330}
@@ -274,14 +335,18 @@ static void LJ_FASTCALL recff_tonumber(jit_State *J, RecordFFData *rd)
274 TRef base = J->base[1]; 335 TRef base = J->base[1];
275 if (tr && !tref_isnil(base)) { 336 if (tr && !tref_isnil(base)) {
276 base = lj_opt_narrow_toint(J, base); 337 base = lj_opt_narrow_toint(J, base);
277 if (!tref_isk(base) || IR(tref_ref(base))->i != 10) 338 if (!tref_isk(base) || IR(tref_ref(base))->i != 10) {
278 recff_nyiu(J); 339 recff_nyiu(J, rd);
340 return;
341 }
279 } 342 }
280 if (tref_isnumber_str(tr)) { 343 if (tref_isnumber_str(tr)) {
281 if (tref_isstr(tr)) { 344 if (tref_isstr(tr)) {
282 TValue tmp; 345 TValue tmp;
283 if (!lj_strscan_num(strV(&rd->argv[0]), &tmp)) 346 if (!lj_strscan_num(strV(&rd->argv[0]), &tmp)) {
284 recff_nyiu(J); /* Would need an inverted STRTO for this case. */ 347 recff_nyiu(J, rd); /* Would need an inverted STRTO for this case. */
348 return;
349 }
285 tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); 350 tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
286 } 351 }
287#if LJ_HASFFI 352#if LJ_HASFFI
@@ -336,13 +401,15 @@ static void LJ_FASTCALL recff_tostring(jit_State *J, RecordFFData *rd)
336 if (tref_isstr(tr)) { 401 if (tref_isstr(tr)) {
337 /* Ignore __tostring in the string base metatable. */ 402 /* Ignore __tostring in the string base metatable. */
338 /* Pass on result in J->base[0]. */ 403 /* Pass on result in J->base[0]. */
339 } else if (!recff_metacall(J, rd, MM_tostring)) { 404 } else if (tr && !recff_metacall(J, rd, MM_tostring)) {
340 if (tref_isnumber(tr)) { 405 if (tref_isnumber(tr)) {
341 J->base[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0); 406 J->base[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr,
407 tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT);
342 } else if (tref_ispri(tr)) { 408 } else if (tref_ispri(tr)) {
343 J->base[0] = lj_ir_kstr(J, strV(&J->fn->c.upvalue[tref_type(tr)])); 409 J->base[0] = lj_ir_kstr(J, lj_strfmt_obj(J->L, &rd->argv[0]));
344 } else { 410 } else {
345 recff_nyiu(J); 411 recff_nyiu(J, rd);
412 return;
346 } 413 }
347 } 414 }
348} 415}
@@ -364,14 +431,14 @@ static void LJ_FASTCALL recff_ipairs_aux(jit_State *J, RecordFFData *rd)
364 } /* else: Interpreter will throw. */ 431 } /* else: Interpreter will throw. */
365} 432}
366 433
367static void LJ_FASTCALL recff_ipairs(jit_State *J, RecordFFData *rd) 434static void LJ_FASTCALL recff_xpairs(jit_State *J, RecordFFData *rd)
368{ 435{
369 if (!(LJ_52 && recff_metacall(J, rd, MM_ipairs))) { 436 if (!(LJ_52 && recff_metacall(J, rd, MM_ipairs))) {
370 TRef tab = J->base[0]; 437 TRef tab = J->base[0];
371 if (tref_istab(tab)) { 438 if (tref_istab(tab)) {
372 J->base[0] = lj_ir_kfunc(J, funcV(&J->fn->c.upvalue[0])); 439 J->base[0] = lj_ir_kfunc(J, funcV(&J->fn->c.upvalue[0]));
373 J->base[1] = tab; 440 J->base[1] = tab;
374 J->base[2] = lj_ir_kint(J, 0); 441 J->base[2] = rd->data ? lj_ir_kint(J, 0) : TREF_NIL;
375 rd->nres = 3; 442 rd->nres = 3;
376 } /* else: Interpreter will throw. */ 443 } /* else: Interpreter will throw. */
377 } 444 }
@@ -416,6 +483,18 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd)
416 } /* else: Interpreter will throw. */ 483 } /* else: Interpreter will throw. */
417} 484}
418 485
486static void LJ_FASTCALL recff_getfenv(jit_State *J, RecordFFData *rd)
487{
488 TRef tr = J->base[0];
489 /* Only support getfenv(0) for now. */
490 if (tref_isint(tr) && tref_isk(tr) && IR(tref_ref(tr))->i == 0) {
491 TRef trl = emitir(IRT(IR_LREF, IRT_THREAD), 0, 0);
492 J->base[0] = emitir(IRT(IR_FLOAD, IRT_TAB), trl, IRFL_THREAD_ENV);
493 return;
494 }
495 recff_nyiu(J, rd);
496}
497
419/* -- Math library fast functions ----------------------------------------- */ 498/* -- Math library fast functions ----------------------------------------- */
420 499
421static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd) 500static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd)
@@ -528,14 +607,6 @@ static void LJ_FASTCALL recff_math_modf(jit_State *J, RecordFFData *rd)
528 rd->nres = 2; 607 rd->nres = 2;
529} 608}
530 609
531static void LJ_FASTCALL recff_math_degrad(jit_State *J, RecordFFData *rd)
532{
533 TRef tr = lj_ir_tonum(J, J->base[0]);
534 TRef trm = lj_ir_knum(J, numV(&J->fn->c.upvalue[0]));
535 J->base[0] = emitir(IRTN(IR_MUL), tr, trm);
536 UNUSED(rd);
537}
538
539static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd) 610static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd)
540{ 611{
541 TRef tr = lj_ir_tonum(J, J->base[0]); 612 TRef tr = lj_ir_tonum(J, J->base[0]);
@@ -592,48 +663,105 @@ static void LJ_FASTCALL recff_math_random(jit_State *J, RecordFFData *rd)
592 663
593/* -- Bit library fast functions ------------------------------------------ */ 664/* -- Bit library fast functions ------------------------------------------ */
594 665
595/* Record unary bit.tobit, bit.bnot, bit.bswap. */ 666/* Record bit.tobit. */
667static void LJ_FASTCALL recff_bit_tobit(jit_State *J, RecordFFData *rd)
668{
669 TRef tr = J->base[0];
670#if LJ_HASFFI
671 if (tref_iscdata(tr)) { recff_bit64_tobit(J, rd); return; }
672#endif
673 J->base[0] = lj_opt_narrow_tobit(J, tr);
674 UNUSED(rd);
675}
676
677/* Record unary bit.bnot, bit.bswap. */
596static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd) 678static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd)
597{ 679{
598 TRef tr = lj_opt_narrow_tobit(J, J->base[0]); 680#if LJ_HASFFI
599 J->base[0] = (rd->data == IR_TOBIT) ? tr : emitir(IRTI(rd->data), tr, 0); 681 if (recff_bit64_unary(J, rd))
682 return;
683#endif
684 J->base[0] = emitir(IRTI(rd->data), lj_opt_narrow_tobit(J, J->base[0]), 0);
600} 685}
601 686
602/* Record N-ary bit.band, bit.bor, bit.bxor. */ 687/* Record N-ary bit.band, bit.bor, bit.bxor. */
603static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd) 688static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd)
604{ 689{
605 TRef tr = lj_opt_narrow_tobit(J, J->base[0]); 690#if LJ_HASFFI
606 uint32_t op = rd->data; 691 if (recff_bit64_nary(J, rd))
607 BCReg i; 692 return;
608 for (i = 1; J->base[i] != 0; i++) 693#endif
609 tr = emitir(IRTI(op), tr, lj_opt_narrow_tobit(J, J->base[i])); 694 {
610 J->base[0] = tr; 695 TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
696 uint32_t ot = IRTI(rd->data);
697 BCReg i;
698 for (i = 1; J->base[i] != 0; i++)
699 tr = emitir(ot, tr, lj_opt_narrow_tobit(J, J->base[i]));
700 J->base[0] = tr;
701 }
611} 702}
612 703
613/* Record bit shifts. */ 704/* Record bit shifts. */
614static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd) 705static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd)
615{ 706{
616 TRef tr = lj_opt_narrow_tobit(J, J->base[0]); 707#if LJ_HASFFI
617 TRef tsh = lj_opt_narrow_tobit(J, J->base[1]); 708 if (recff_bit64_shift(J, rd))
618 IROp op = (IROp)rd->data; 709 return;
619 if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && 710#endif
620 !tref_isk(tsh)) 711 {
621 tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31)); 712 TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
713 TRef tsh = lj_opt_narrow_tobit(J, J->base[1]);
714 IROp op = (IROp)rd->data;
715 if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
716 !tref_isk(tsh))
717 tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31));
622#ifdef LJ_TARGET_UNIFYROT 718#ifdef LJ_TARGET_UNIFYROT
623 if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) { 719 if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) {
624 op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR; 720 op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR;
625 tsh = emitir(IRTI(IR_NEG), tsh, tsh); 721 tsh = emitir(IRTI(IR_NEG), tsh, tsh);
722 }
723#endif
724 J->base[0] = emitir(IRTI(op), tr, tsh);
626 } 725 }
726}
727
728static void LJ_FASTCALL recff_bit_tohex(jit_State *J, RecordFFData *rd)
729{
730#if LJ_HASFFI
731 TRef hdr = recff_bufhdr(J);
732 TRef tr = recff_bit64_tohex(J, rd, hdr);
733 J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
734#else
735 recff_nyiu(J, rd); /* Don't bother working around this NYI. */
627#endif 736#endif
628 J->base[0] = emitir(IRTI(op), tr, tsh);
629} 737}
630 738
631/* -- String library fast functions --------------------------------------- */ 739/* -- String library fast functions --------------------------------------- */
632 740
633static void LJ_FASTCALL recff_string_len(jit_State *J, RecordFFData *rd) 741/* Specialize to relative starting position for string. */
742static TRef recff_string_start(jit_State *J, GCstr *s, int32_t *st, TRef tr,
743 TRef trlen, TRef tr0)
634{ 744{
635 J->base[0] = emitir(IRTI(IR_FLOAD), lj_ir_tostr(J, J->base[0]), IRFL_STR_LEN); 745 int32_t start = *st;
636 UNUSED(rd); 746 if (start < 0) {
747 emitir(IRTGI(IR_LT), tr, tr0);
748 tr = emitir(IRTI(IR_ADD), trlen, tr);
749 start = start + (int32_t)s->len;
750 emitir(start < 0 ? IRTGI(IR_LT) : IRTGI(IR_GE), tr, tr0);
751 if (start < 0) {
752 tr = tr0;
753 start = 0;
754 }
755 } else if (start == 0) {
756 emitir(IRTGI(IR_EQ), tr, tr0);
757 tr = tr0;
758 } else {
759 tr = emitir(IRTI(IR_ADD), tr, lj_ir_kint(J, -1));
760 emitir(IRTGI(IR_GE), tr, tr0);
761 start--;
762 }
763 *st = start;
764 return tr;
637} 765}
638 766
639/* Handle string.byte (rd->data = 0) and string.sub (rd->data = 1). */ 767/* Handle string.byte (rd->data = 0) and string.sub (rd->data = 1). */
@@ -680,29 +808,11 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
680 } else if ((MSize)end <= str->len) { 808 } else if ((MSize)end <= str->len) {
681 emitir(IRTGI(IR_ULE), trend, trlen); 809 emitir(IRTGI(IR_ULE), trend, trlen);
682 } else { 810 } else {
683 emitir(IRTGI(IR_GT), trend, trlen); 811 emitir(IRTGI(IR_UGT), trend, trlen);
684 end = (int32_t)str->len; 812 end = (int32_t)str->len;
685 trend = trlen; 813 trend = trlen;
686 } 814 }
687 if (start < 0) { 815 trstart = recff_string_start(J, str, &start, trstart, trlen, tr0);
688 emitir(IRTGI(IR_LT), trstart, tr0);
689 trstart = emitir(IRTI(IR_ADD), trlen, trstart);
690 start = start+(int32_t)str->len;
691 emitir(start < 0 ? IRTGI(IR_LT) : IRTGI(IR_GE), trstart, tr0);
692 if (start < 0) {
693 trstart = tr0;
694 start = 0;
695 }
696 } else {
697 if (start == 0) {
698 emitir(IRTGI(IR_EQ), trstart, tr0);
699 trstart = tr0;
700 } else {
701 trstart = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, -1));
702 emitir(IRTGI(IR_GE), trstart, tr0);
703 start--;
704 }
705 }
706 if (rd->data) { /* Return string.sub result. */ 816 if (rd->data) { /* Return string.sub result. */
707 if (end - start >= 0) { 817 if (end - start >= 0) {
708 /* Also handle empty range here, to avoid extra traces. */ 818 /* Also handle empty range here, to avoid extra traces. */
@@ -712,7 +822,7 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
712 J->base[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen); 822 J->base[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen);
713 } else { /* Range underflow: return empty string. */ 823 } else { /* Range underflow: return empty string. */
714 emitir(IRTGI(IR_LT), trend, trstart); 824 emitir(IRTGI(IR_LT), trend, trstart);
715 J->base[0] = lj_ir_kstr(J, lj_str_new(J->L, strdata(str), 0)); 825 J->base[0] = lj_ir_kstr(J, &J2G(J)->strempty);
716 } 826 }
717 } else { /* Return string.byte result(s). */ 827 } else { /* Return string.byte result(s). */
718 ptrdiff_t i, len = end - start; 828 ptrdiff_t i, len = end - start;
@@ -734,48 +844,200 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
734 } 844 }
735} 845}
736 846
737/* -- Table library fast functions ---------------------------------------- */ 847static void LJ_FASTCALL recff_string_char(jit_State *J, RecordFFData *rd)
738
739static void LJ_FASTCALL recff_table_getn(jit_State *J, RecordFFData *rd)
740{ 848{
741 if (tref_istab(J->base[0])) 849 TRef k255 = lj_ir_kint(J, 255);
742 J->base[0] = lj_ir_call(J, IRCALL_lj_tab_len, J->base[0]); 850 BCReg i;
743 /* else: Interpreter will throw. */ 851 for (i = 0; J->base[i] != 0; i++) { /* Convert char values to strings. */
852 TRef tr = lj_opt_narrow_toint(J, J->base[i]);
853 emitir(IRTGI(IR_ULE), tr, k255);
854 J->base[i] = emitir(IRT(IR_TOSTR, IRT_STR), tr, IRTOSTR_CHAR);
855 }
856 if (i > 1) { /* Concatenate the strings, if there's more than one. */
857 TRef hdr = recff_bufhdr(J), tr = hdr;
858 for (i = 0; J->base[i] != 0; i++)
859 tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, J->base[i]);
860 J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
861 }
744 UNUSED(rd); 862 UNUSED(rd);
745} 863}
746 864
747static void LJ_FASTCALL recff_table_remove(jit_State *J, RecordFFData *rd) 865static void LJ_FASTCALL recff_string_rep(jit_State *J, RecordFFData *rd)
748{ 866{
749 TRef tab = J->base[0]; 867 TRef str = lj_ir_tostr(J, J->base[0]);
750 rd->nres = 0; 868 TRef rep = lj_opt_narrow_toint(J, J->base[1]);
751 if (tref_istab(tab)) { 869 TRef hdr, tr, str2 = 0;
752 if (tref_isnil(J->base[1])) { /* Simple pop: t[#t] = nil */ 870 if (!tref_isnil(J->base[2])) {
753 TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, tab); 871 TRef sep = lj_ir_tostr(J, J->base[2]);
754 GCtab *t = tabV(&rd->argv[0]); 872 int32_t vrep = argv2int(J, &rd->argv[1]);
755 MSize len = lj_tab_len(t); 873 emitir(IRTGI(vrep > 1 ? IR_GT : IR_LE), rep, lj_ir_kint(J, 1));
756 emitir(IRTGI(len ? IR_NE : IR_EQ), trlen, lj_ir_kint(J, 0)); 874 if (vrep > 1) {
757 if (len) { 875 TRef hdr2 = recff_bufhdr(J);
758 RecordIndex ix; 876 TRef tr2 = emitir(IRT(IR_BUFPUT, IRT_P32), hdr2, sep);
759 ix.tab = tab; 877 tr2 = emitir(IRT(IR_BUFPUT, IRT_P32), tr2, str);
760 ix.key = trlen; 878 str2 = emitir(IRT(IR_BUFSTR, IRT_STR), tr2, hdr2);
761 settabV(J->L, &ix.tabv, t); 879 }
762 setintV(&ix.keyv, len); 880 }
763 ix.idxchain = 0; 881 tr = hdr = recff_bufhdr(J);
764 if (results_wanted(J) != 0) { /* Specialize load only if needed. */ 882 if (str2) {
765 ix.val = 0; 883 tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, str);
766 J->base[0] = lj_record_idx(J, &ix); /* Load previous value. */ 884 str = str2;
767 rd->nres = 1; 885 rep = emitir(IRTI(IR_ADD), rep, lj_ir_kint(J, -1));
768 /* Assumes ix.key/ix.tab is not modified for raw lj_record_idx(). */ 886 }
769 } 887 tr = lj_ir_call(J, IRCALL_lj_buf_putstr_rep, tr, str, rep);
770 ix.val = TREF_NIL; 888 J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
771 lj_record_idx(J, &ix); /* Remove value. */ 889}
890
891static void LJ_FASTCALL recff_string_op(jit_State *J, RecordFFData *rd)
892{
893 TRef str = lj_ir_tostr(J, J->base[0]);
894 TRef hdr = recff_bufhdr(J);
895 TRef tr = lj_ir_call(J, rd->data, hdr, str);
896 J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
897}
898
899static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd)
900{
901 TRef trstr = lj_ir_tostr(J, J->base[0]);
902 TRef trpat = lj_ir_tostr(J, J->base[1]);
903 TRef trlen = emitir(IRTI(IR_FLOAD), trstr, IRFL_STR_LEN);
904 TRef tr0 = lj_ir_kint(J, 0);
905 TRef trstart;
906 GCstr *str = argv2str(J, &rd->argv[0]);
907 GCstr *pat = argv2str(J, &rd->argv[1]);
908 int32_t start;
909 J->needsnap = 1;
910 if (tref_isnil(J->base[2])) {
911 trstart = lj_ir_kint(J, 1);
912 start = 1;
913 } else {
914 trstart = lj_opt_narrow_toint(J, J->base[2]);
915 start = argv2int(J, &rd->argv[2]);
916 }
917 trstart = recff_string_start(J, str, &start, trstart, trlen, tr0);
918 if ((MSize)start <= str->len) {
919 emitir(IRTGI(IR_ULE), trstart, trlen);
920 } else {
921 emitir(IRTGI(IR_UGT), trstart, trlen);
922#if LJ_52
923 J->base[0] = TREF_NIL;
924 return;
925#else
926 trstart = trlen;
927 start = str->len;
928#endif
929 }
930 /* Fixed arg or no pattern matching chars? (Specialized to pattern string.) */
931 if ((J->base[2] && tref_istruecond(J->base[3])) ||
932 (emitir(IRTG(IR_EQ, IRT_STR), trpat, lj_ir_kstr(J, pat)),
933 !lj_str_haspattern(pat))) { /* Search for fixed string. */
934 TRef trsptr = emitir(IRT(IR_STRREF, IRT_P32), trstr, trstart);
935 TRef trpptr = emitir(IRT(IR_STRREF, IRT_P32), trpat, tr0);
936 TRef trslen = emitir(IRTI(IR_SUB), trlen, trstart);
937 TRef trplen = emitir(IRTI(IR_FLOAD), trpat, IRFL_STR_LEN);
938 TRef tr = lj_ir_call(J, IRCALL_lj_str_find, trsptr, trpptr, trslen, trplen);
939 TRef trp0 = lj_ir_kkptr(J, NULL);
940 if (lj_str_find(strdata(str)+(MSize)start, strdata(pat),
941 str->len-(MSize)start, pat->len)) {
942 TRef pos;
943 emitir(IRTG(IR_NE, IRT_P32), tr, trp0);
944 pos = emitir(IRTI(IR_SUB), tr, emitir(IRT(IR_STRREF, IRT_P32), trstr, tr0));
945 J->base[0] = emitir(IRTI(IR_ADD), pos, lj_ir_kint(J, 1));
946 J->base[1] = emitir(IRTI(IR_ADD), pos, trplen);
947 rd->nres = 2;
948 } else {
949 emitir(IRTG(IR_EQ, IRT_P32), tr, trp0);
950 J->base[0] = TREF_NIL;
951 }
952 } else { /* Search for pattern. */
953 recff_nyiu(J, rd);
954 return;
955 }
956}
957
958static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
959{
960 TRef trfmt = lj_ir_tostr(J, J->base[0]);
961 GCstr *fmt = argv2str(J, &rd->argv[0]);
962 int arg = 1;
963 TRef hdr, tr;
964 FormatState fs;
965 SFormat sf;
966 /* Specialize to the format string. */
967 emitir(IRTG(IR_EQ, IRT_STR), trfmt, lj_ir_kstr(J, fmt));
968 tr = hdr = recff_bufhdr(J);
969 lj_strfmt_init(&fs, strdata(fmt), fmt->len);
970 while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) { /* Parse format. */
971 TRef tra = sf == STRFMT_LIT ? 0 : J->base[arg++];
972 TRef trsf = lj_ir_kint(J, (int32_t)sf);
973 IRCallID id;
974 switch (STRFMT_TYPE(sf)) {
975 case STRFMT_LIT:
976 tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr,
977 lj_ir_kstr(J, lj_str_new(J->L, fs.str, fs.len)));
978 break;
979 case STRFMT_INT:
980 id = IRCALL_lj_strfmt_putfnum_int;
981 handle_int:
982 if (!tref_isinteger(tra))
983 goto handle_num;
984 if (sf == STRFMT_INT) { /* Shortcut for plain %d. */
985 tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr,
986 emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_INT));
987 } else {
988#if LJ_HASFFI
989 tra = emitir(IRT(IR_CONV, IRT_U64), tra,
990 (IRT_INT|(IRT_U64<<5)|IRCONV_SEXT));
991 tr = lj_ir_call(J, IRCALL_lj_strfmt_putfxint, tr, trsf, tra);
992 lj_needsplit(J);
993#else
994 recff_nyiu(J, rd); /* Don't bother working around this NYI. */
995 return;
996#endif
772 } 997 }
773 } else { /* Complex case: remove in the middle. */ 998 break;
774 recff_nyiu(J); 999 case STRFMT_UINT:
1000 id = IRCALL_lj_strfmt_putfnum_uint;
1001 goto handle_int;
1002 case STRFMT_NUM:
1003 id = IRCALL_lj_strfmt_putfnum;
1004 handle_num:
1005 tra = lj_ir_tonum(J, tra);
1006 tr = lj_ir_call(J, id, tr, trsf, tra);
1007 if (LJ_SOFTFP) lj_needsplit(J);
1008 break;
1009 case STRFMT_STR:
1010 if (!tref_isstr(tra)) {
1011 recff_nyiu(J, rd); /* NYI: __tostring and non-string types for %s. */
1012 return;
1013 }
1014 if (sf == STRFMT_STR) /* Shortcut for plain %s. */
1015 tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, tra);
1016 else if ((sf & STRFMT_T_QUOTED))
1017 tr = lj_ir_call(J, IRCALL_lj_strfmt_putquoted, tr, tra);
1018 else
1019 tr = lj_ir_call(J, IRCALL_lj_strfmt_putfstr, tr, trsf, tra);
1020 break;
1021 case STRFMT_CHAR:
1022 tra = lj_opt_narrow_toint(J, tra);
1023 if (sf == STRFMT_CHAR) /* Shortcut for plain %c. */
1024 tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr,
1025 emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_CHAR));
1026 else
1027 tr = lj_ir_call(J, IRCALL_lj_strfmt_putfchar, tr, trsf, tra);
1028 break;
1029 case STRFMT_PTR: /* NYI */
1030 case STRFMT_ERR:
1031 default:
1032 recff_nyiu(J, rd);
1033 return;
775 } 1034 }
776 } /* else: Interpreter will throw. */ 1035 }
1036 J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
777} 1037}
778 1038
1039/* -- Table library fast functions ---------------------------------------- */
1040
779static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd) 1041static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd)
780{ 1042{
781 RecordIndex ix; 1043 RecordIndex ix;
@@ -792,11 +1054,49 @@ static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd)
792 ix.idxchain = 0; 1054 ix.idxchain = 0;
793 lj_record_idx(J, &ix); /* Set new value. */ 1055 lj_record_idx(J, &ix); /* Set new value. */
794 } else { /* Complex case: insert in the middle. */ 1056 } else { /* Complex case: insert in the middle. */
795 recff_nyiu(J); 1057 recff_nyiu(J, rd);
1058 return;
796 } 1059 }
797 } /* else: Interpreter will throw. */ 1060 } /* else: Interpreter will throw. */
798} 1061}
799 1062
1063static void LJ_FASTCALL recff_table_concat(jit_State *J, RecordFFData *rd)
1064{
1065 TRef tab = J->base[0];
1066 if (tref_istab(tab)) {
1067 TRef sep = !tref_isnil(J->base[1]) ?
1068 lj_ir_tostr(J, J->base[1]) : lj_ir_knull(J, IRT_STR);
1069 TRef tri = (J->base[1] && !tref_isnil(J->base[2])) ?
1070 lj_opt_narrow_toint(J, J->base[2]) : lj_ir_kint(J, 1);
1071 TRef tre = (J->base[1] && J->base[2] && !tref_isnil(J->base[3])) ?
1072 lj_opt_narrow_toint(J, J->base[3]) :
1073 lj_ir_call(J, IRCALL_lj_tab_len, tab);
1074 TRef hdr = recff_bufhdr(J);
1075 TRef tr = lj_ir_call(J, IRCALL_lj_buf_puttab, hdr, tab, sep, tri, tre);
1076 emitir(IRTG(IR_NE, IRT_PTR), tr, lj_ir_kptr(J, NULL));
1077 J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
1078 } /* else: Interpreter will throw. */
1079 UNUSED(rd);
1080}
1081
1082static void LJ_FASTCALL recff_table_new(jit_State *J, RecordFFData *rd)
1083{
1084 TRef tra = lj_opt_narrow_toint(J, J->base[0]);
1085 TRef trh = lj_opt_narrow_toint(J, J->base[1]);
1086 J->base[0] = lj_ir_call(J, IRCALL_lj_tab_new_ah, tra, trh);
1087 UNUSED(rd);
1088}
1089
1090static void LJ_FASTCALL recff_table_clear(jit_State *J, RecordFFData *rd)
1091{
1092 TRef tr = J->base[0];
1093 if (tref_istab(tr)) {
1094 rd->nres = 0;
1095 lj_ir_call(J, IRCALL_lj_tab_clear, tr);
1096 J->needsnap = 1;
1097 } /* else: Interpreter will throw. */
1098}
1099
800/* -- I/O library fast functions ------------------------------------------ */ 1100/* -- I/O library fast functions ------------------------------------------ */
801 1101
802/* Get FILE* for I/O function. Any I/O error aborts recording, so there's 1102/* Get FILE* for I/O function. Any I/O error aborts recording, so there's
@@ -832,7 +1132,10 @@ static void LJ_FASTCALL recff_io_write(jit_State *J, RecordFFData *rd)
832 TRef buf = emitir(IRT(IR_STRREF, IRT_P32), str, zero); 1132 TRef buf = emitir(IRT(IR_STRREF, IRT_P32), str, zero);
833 TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN); 1133 TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN);
834 if (tref_isk(len) && IR(tref_ref(len))->i == 1) { 1134 if (tref_isk(len) && IR(tref_ref(len))->i == 1) {
835 TRef tr = emitir(IRT(IR_XLOAD, IRT_U8), buf, IRXLOAD_READONLY); 1135 IRIns *irs = IR(tref_ref(str));
1136 TRef tr = (irs->o == IR_TOSTR && irs->op2 == IRTOSTR_CHAR) ?
1137 irs->op1 :
1138 emitir(IRT(IR_XLOAD, IRT_U8), buf, IRXLOAD_READONLY);
836 tr = lj_ir_call(J, IRCALL_fputc, tr, fp); 1139 tr = lj_ir_call(J, IRCALL_fputc, tr, fp);
837 if (results_wanted(J) != 0) /* Check result only if not ignored. */ 1140 if (results_wanted(J) != 0) /* Check result only if not ignored. */
838 emitir(IRTGI(IR_NE), tr, lj_ir_kint(J, -1)); 1141 emitir(IRTGI(IR_NE), tr, lj_ir_kint(J, -1));
diff --git a/src/lj_gc.c b/src/lj_gc.c
index c2bc397d..8fea9853 100644
--- a/src/lj_gc.c
+++ b/src/lj_gc.c
@@ -12,6 +12,7 @@
12#include "lj_obj.h" 12#include "lj_obj.h"
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_buf.h"
15#include "lj_str.h" 16#include "lj_str.h"
16#include "lj_tab.h" 17#include "lj_tab.h"
17#include "lj_func.h" 18#include "lj_func.h"
@@ -348,15 +349,6 @@ static size_t gc_propagate_gray(global_State *g)
348 349
349/* -- Sweep phase --------------------------------------------------------- */ 350/* -- Sweep phase --------------------------------------------------------- */
350 351
351/* Try to shrink some common data structures. */
352static void gc_shrink(global_State *g, lua_State *L)
353{
354 if (g->strnum <= (g->strmask >> 2) && g->strmask > LJ_MIN_STRTAB*2-1)
355 lj_str_resize(L, g->strmask >> 1); /* Shrink string table. */
356 if (g->tmpbuf.sz > LJ_MIN_SBUF*2)
357 lj_str_resizebuf(L, &g->tmpbuf, g->tmpbuf.sz >> 1); /* Shrink temp buf. */
358}
359
360/* Type of GC free functions. */ 352/* Type of GC free functions. */
361typedef void (LJ_FASTCALL *GCFreeFunc)(global_State *g, GCobj *o); 353typedef void (LJ_FASTCALL *GCFreeFunc)(global_State *g, GCobj *o);
362 354
@@ -483,7 +475,7 @@ static void gc_finalize(lua_State *L)
483 global_State *g = G(L); 475 global_State *g = G(L);
484 GCobj *o = gcnext(gcref(g->gc.mmudata)); 476 GCobj *o = gcnext(gcref(g->gc.mmudata));
485 cTValue *mo; 477 cTValue *mo;
486 lua_assert(gcref(g->jit_L) == NULL); /* Must not be called on trace. */ 478 lua_assert(tvref(g->jit_base) == NULL); /* Must not be called on trace. */
487 /* Unchain from list of userdata to be finalized. */ 479 /* Unchain from list of userdata to be finalized. */
488 if (o == gcref(g->gc.mmudata)) 480 if (o == gcref(g->gc.mmudata))
489 setgcrefnull(g->gc.mmudata); 481 setgcrefnull(g->gc.mmudata);
@@ -592,6 +584,8 @@ static void atomic(global_State *g, lua_State *L)
592 /* All marking done, clear weak tables. */ 584 /* All marking done, clear weak tables. */
593 gc_clearweak(gcref(g->gc.weak)); 585 gc_clearweak(gcref(g->gc.weak));
594 586
587 lj_buf_shrink(L, &g->tmpbuf); /* Shrink temp buffer. */
588
595 /* Prepare for sweep phase. */ 589 /* Prepare for sweep phase. */
596 g->gc.currentwhite = (uint8_t)otherwhite(g); /* Flip current white. */ 590 g->gc.currentwhite = (uint8_t)otherwhite(g); /* Flip current white. */
597 g->strempty.marked = g->gc.currentwhite; 591 g->strempty.marked = g->gc.currentwhite;
@@ -613,7 +607,7 @@ static size_t gc_onestep(lua_State *L)
613 g->gc.state = GCSatomic; /* End of mark phase. */ 607 g->gc.state = GCSatomic; /* End of mark phase. */
614 return 0; 608 return 0;
615 case GCSatomic: 609 case GCSatomic:
616 if (gcref(g->jit_L)) /* Don't run atomic phase on trace. */ 610 if (tvref(g->jit_base)) /* Don't run atomic phase on trace. */
617 return LJ_MAX_MEM; 611 return LJ_MAX_MEM;
618 atomic(g, L); 612 atomic(g, L);
619 g->gc.state = GCSsweepstring; /* Start of sweep phase. */ 613 g->gc.state = GCSsweepstring; /* Start of sweep phase. */
@@ -632,7 +626,8 @@ static size_t gc_onestep(lua_State *L)
632 MSize old = g->gc.total; 626 MSize old = g->gc.total;
633 setmref(g->gc.sweep, gc_sweep(g, mref(g->gc.sweep, GCRef), GCSWEEPMAX)); 627 setmref(g->gc.sweep, gc_sweep(g, mref(g->gc.sweep, GCRef), GCSWEEPMAX));
634 if (gcref(*mref(g->gc.sweep, GCRef)) == NULL) { 628 if (gcref(*mref(g->gc.sweep, GCRef)) == NULL) {
635 gc_shrink(g, L); 629 if (g->strnum <= (g->strmask >> 2) && g->strmask > LJ_MIN_STRTAB*2-1)
630 lj_str_resize(L, g->strmask >> 1); /* Shrink string table. */
636 if (gcref(g->gc.mmudata)) { /* Need any finalizations? */ 631 if (gcref(g->gc.mmudata)) { /* Need any finalizations? */
637 g->gc.state = GCSfinalize; 632 g->gc.state = GCSfinalize;
638#if LJ_HASFFI 633#if LJ_HASFFI
@@ -649,7 +644,7 @@ static size_t gc_onestep(lua_State *L)
649 } 644 }
650 case GCSfinalize: 645 case GCSfinalize:
651 if (gcref(g->gc.mmudata) != NULL) { 646 if (gcref(g->gc.mmudata) != NULL) {
652 if (gcref(g->jit_L)) /* Don't call finalizers on trace. */ 647 if (tvref(g->jit_base)) /* Don't call finalizers on trace. */
653 return LJ_MAX_MEM; 648 return LJ_MAX_MEM;
654 gc_finalize(L); /* Finalize one userdata object. */ 649 gc_finalize(L); /* Finalize one userdata object. */
655 if (g->gc.estimate > GCFINALIZECOST) 650 if (g->gc.estimate > GCFINALIZECOST)
@@ -711,8 +706,8 @@ void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L)
711/* Perform multiple GC steps. Called from JIT-compiled code. */ 706/* Perform multiple GC steps. Called from JIT-compiled code. */
712int LJ_FASTCALL lj_gc_step_jit(global_State *g, MSize steps) 707int LJ_FASTCALL lj_gc_step_jit(global_State *g, MSize steps)
713{ 708{
714 lua_State *L = gco2th(gcref(g->jit_L)); 709 lua_State *L = gco2th(gcref(g->cur_L));
715 L->base = mref(G(L)->jit_base, TValue); 710 L->base = tvref(G(L)->jit_base);
716 L->top = curr_topL(L); 711 L->top = curr_topL(L);
717 while (steps-- > 0 && lj_gc_step(L) == 0) 712 while (steps-- > 0 && lj_gc_step(L) == 0)
718 ; 713 ;
diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c
index 5e7fca1c..1e1ac31f 100644
--- a/src/lj_gdbjit.c
+++ b/src/lj_gdbjit.c
@@ -14,6 +14,8 @@
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_debug.h" 15#include "lj_debug.h"
16#include "lj_frame.h" 16#include "lj_frame.h"
17#include "lj_buf.h"
18#include "lj_strfmt.h"
17#include "lj_jit.h" 19#include "lj_jit.h"
18#include "lj_dispatch.h" 20#include "lj_dispatch.h"
19 21
@@ -426,16 +428,6 @@ static void gdbjit_catnum(GDBJITctx *ctx, uint32_t n)
426 *ctx->p++ = '0' + n; 428 *ctx->p++ = '0' + n;
427} 429}
428 430
429/* Add a ULEB128 value. */
430static void gdbjit_uleb128(GDBJITctx *ctx, uint32_t v)
431{
432 uint8_t *p = ctx->p;
433 for (; v >= 0x80; v >>= 7)
434 *p++ = (uint8_t)((v & 0x7f) | 0x80);
435 *p++ = (uint8_t)v;
436 ctx->p = p;
437}
438
439/* Add a SLEB128 value. */ 431/* Add a SLEB128 value. */
440static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v) 432static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v)
441{ 433{
@@ -452,7 +444,7 @@ static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v)
452#define DU16(x) (*(uint16_t *)p = (x), p += 2) 444#define DU16(x) (*(uint16_t *)p = (x), p += 2)
453#define DU32(x) (*(uint32_t *)p = (x), p += 4) 445#define DU32(x) (*(uint32_t *)p = (x), p += 4)
454#define DADDR(x) (*(uintptr_t *)p = (x), p += sizeof(uintptr_t)) 446#define DADDR(x) (*(uintptr_t *)p = (x), p += sizeof(uintptr_t))
455#define DUV(x) (ctx->p = p, gdbjit_uleb128(ctx, (x)), p = ctx->p) 447#define DUV(x) (p = (uint8_t *)lj_strfmt_wuleb128((char *)p, (x)))
456#define DSV(x) (ctx->p = p, gdbjit_sleb128(ctx, (x)), p = ctx->p) 448#define DSV(x) (ctx->p = p, gdbjit_sleb128(ctx, (x)), p = ctx->p)
457#define DSTR(str) (ctx->p = p, gdbjit_strz(ctx, (str)), p = ctx->p) 449#define DSTR(str) (ctx->p = p, gdbjit_strz(ctx, (str)), p = ctx->p)
458#define DALIGNNOP(s) while ((uintptr_t)p & ((s)-1)) *p++ = DW_CFA_nop 450#define DALIGNNOP(s) while ((uintptr_t)p & ((s)-1)) *p++ = DW_CFA_nop
diff --git a/src/lj_ir.c b/src/lj_ir.c
index b2170a10..460cd307 100644
--- a/src/lj_ir.c
+++ b/src/lj_ir.c
@@ -15,6 +15,7 @@
15#if LJ_HASJIT 15#if LJ_HASJIT
16 16
17#include "lj_gc.h" 17#include "lj_gc.h"
18#include "lj_buf.h"
18#include "lj_str.h" 19#include "lj_str.h"
19#include "lj_tab.h" 20#include "lj_tab.h"
20#include "lj_ir.h" 21#include "lj_ir.h"
@@ -29,6 +30,7 @@
29#endif 30#endif
30#include "lj_vm.h" 31#include "lj_vm.h"
31#include "lj_strscan.h" 32#include "lj_strscan.h"
33#include "lj_strfmt.h"
32#include "lj_lib.h" 34#include "lj_lib.h"
33 35
34/* Some local macros to save typing. Undef'd at the end. */ 36/* Some local macros to save typing. Undef'd at the end. */
@@ -443,7 +445,8 @@ TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr)
443 if (!tref_isstr(tr)) { 445 if (!tref_isstr(tr)) {
444 if (!tref_isnumber(tr)) 446 if (!tref_isnumber(tr))
445 lj_trace_err(J, LJ_TRERR_BADTYPE); 447 lj_trace_err(J, LJ_TRERR_BADTYPE);
446 tr = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0); 448 tr = emitir(IRT(IR_TOSTR, IRT_STR), tr,
449 tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT);
447 } 450 }
448 return tr; 451 return tr;
449} 452}
diff --git a/src/lj_ir.h b/src/lj_ir.h
index 80763d88..14b86165 100644
--- a/src/lj_ir.h
+++ b/src/lj_ir.h
@@ -40,6 +40,7 @@
40 _(USE, S , ref, ___) \ 40 _(USE, S , ref, ___) \
41 _(PHI, S , ref, ref) \ 41 _(PHI, S , ref, ref) \
42 _(RENAME, S , ref, lit) \ 42 _(RENAME, S , ref, lit) \
43 _(PROF, S , ___, ___) \
43 \ 44 \
44 /* Constants. */ \ 45 /* Constants. */ \
45 _(KPRI, N , ___, ___) \ 46 _(KPRI, N , ___, ___) \
@@ -96,6 +97,7 @@
96 _(UREFC, LW, ref, lit) \ 97 _(UREFC, LW, ref, lit) \
97 _(FREF, R , ref, lit) \ 98 _(FREF, R , ref, lit) \
98 _(STRREF, N , ref, ref) \ 99 _(STRREF, N , ref, ref) \
100 _(LREF, L , ___, ___) \
99 \ 101 \
100 /* Loads and Stores. These must be in the same order. */ \ 102 /* Loads and Stores. These must be in the same order. */ \
101 _(ALOAD, L , ref, ___) \ 103 _(ALOAD, L , ref, ___) \
@@ -120,6 +122,11 @@
120 _(CNEW, AW, ref, ref) \ 122 _(CNEW, AW, ref, ref) \
121 _(CNEWI, NW, ref, ref) /* CSE is ok, not marked as A. */ \ 123 _(CNEWI, NW, ref, ref) /* CSE is ok, not marked as A. */ \
122 \ 124 \
125 /* Buffer operations. */ \
126 _(BUFHDR, L , ref, lit) \
127 _(BUFPUT, L , ref, ref) \
128 _(BUFSTR, A , ref, ref) \
129 \
123 /* Barriers. */ \ 130 /* Barriers. */ \
124 _(TBAR, S , ref, ___) \ 131 _(TBAR, S , ref, ___) \
125 _(OBAR, S , ref, ref) \ 132 _(OBAR, S , ref, ref) \
@@ -128,11 +135,12 @@
128 /* Type conversions. */ \ 135 /* Type conversions. */ \
129 _(CONV, NW, ref, lit) \ 136 _(CONV, NW, ref, lit) \
130 _(TOBIT, N , ref, ref) \ 137 _(TOBIT, N , ref, ref) \
131 _(TOSTR, N , ref, ___) \ 138 _(TOSTR, N , ref, lit) \
132 _(STRTO, N , ref, ___) \ 139 _(STRTO, N , ref, ___) \
133 \ 140 \
134 /* Calls. */ \ 141 /* Calls. */ \
135 _(CALLN, N , ref, lit) \ 142 _(CALLN, N , ref, lit) \
143 _(CALLA, A , ref, lit) \
136 _(CALLL, L , ref, lit) \ 144 _(CALLL, L , ref, lit) \
137 _(CALLS, S , ref, lit) \ 145 _(CALLS, S , ref, lit) \
138 _(CALLXS, S , ref, ref) \ 146 _(CALLXS, S , ref, ref) \
@@ -186,6 +194,8 @@ IRFPMDEF(FPMENUM)
186 _(STR_LEN, offsetof(GCstr, len)) \ 194 _(STR_LEN, offsetof(GCstr, len)) \
187 _(FUNC_ENV, offsetof(GCfunc, l.env)) \ 195 _(FUNC_ENV, offsetof(GCfunc, l.env)) \
188 _(FUNC_PC, offsetof(GCfunc, l.pc)) \ 196 _(FUNC_PC, offsetof(GCfunc, l.pc)) \
197 _(FUNC_FFID, offsetof(GCfunc, l.ffid)) \
198 _(THREAD_ENV, offsetof(lua_State, env)) \
189 _(TAB_META, offsetof(GCtab, metatable)) \ 199 _(TAB_META, offsetof(GCtab, metatable)) \
190 _(TAB_ARRAY, offsetof(GCtab, array)) \ 200 _(TAB_ARRAY, offsetof(GCtab, array)) \
191 _(TAB_NODE, offsetof(GCtab, node)) \ 201 _(TAB_NODE, offsetof(GCtab, node)) \
@@ -221,13 +231,16 @@ IRFLDEF(FLENUM)
221#define IRXLOAD_VOLATILE 2 /* Load from volatile data. */ 231#define IRXLOAD_VOLATILE 2 /* Load from volatile data. */
222#define IRXLOAD_UNALIGNED 4 /* Unaligned load. */ 232#define IRXLOAD_UNALIGNED 4 /* Unaligned load. */
223 233
234/* BUFHDR mode, stored in op2. */
235#define IRBUFHDR_RESET 0 /* Reset buffer. */
236#define IRBUFHDR_APPEND 1 /* Append to buffer. */
237
224/* CONV mode, stored in op2. */ 238/* CONV mode, stored in op2. */
225#define IRCONV_SRCMASK 0x001f /* Source IRType. */ 239#define IRCONV_SRCMASK 0x001f /* Source IRType. */
226#define IRCONV_DSTMASK 0x03e0 /* Dest. IRType (also in ir->t). */ 240#define IRCONV_DSTMASK 0x03e0 /* Dest. IRType (also in ir->t). */
227#define IRCONV_DSH 5 241#define IRCONV_DSH 5
228#define IRCONV_NUM_INT ((IRT_NUM<<IRCONV_DSH)|IRT_INT) 242#define IRCONV_NUM_INT ((IRT_NUM<<IRCONV_DSH)|IRT_INT)
229#define IRCONV_INT_NUM ((IRT_INT<<IRCONV_DSH)|IRT_NUM) 243#define IRCONV_INT_NUM ((IRT_INT<<IRCONV_DSH)|IRT_NUM)
230#define IRCONV_TRUNC 0x0400 /* Truncate number to integer. */
231#define IRCONV_SEXT 0x0800 /* Sign-extend integer to integer. */ 244#define IRCONV_SEXT 0x0800 /* Sign-extend integer to integer. */
232#define IRCONV_MODEMASK 0x0fff 245#define IRCONV_MODEMASK 0x0fff
233#define IRCONV_CONVMASK 0xf000 246#define IRCONV_CONVMASK 0xf000
@@ -238,6 +251,11 @@ IRFLDEF(FLENUM)
238#define IRCONV_INDEX (2<<IRCONV_CSH) /* Check + special backprop rules. */ 251#define IRCONV_INDEX (2<<IRCONV_CSH) /* Check + special backprop rules. */
239#define IRCONV_CHECK (3<<IRCONV_CSH) /* Number checked for integerness. */ 252#define IRCONV_CHECK (3<<IRCONV_CSH) /* Number checked for integerness. */
240 253
254/* TOSTR mode, stored in op2. */
255#define IRTOSTR_INT 0 /* Convert integer to string. */
256#define IRTOSTR_NUM 1 /* Convert number to string. */
257#define IRTOSTR_CHAR 2 /* Convert char value to string. */
258
241/* -- IR operands --------------------------------------------------------- */ 259/* -- IR operands --------------------------------------------------------- */
242 260
243/* IR operand mode (2 bit). */ 261/* IR operand mode (2 bit). */
@@ -464,6 +482,7 @@ typedef uint32_t TRef;
464#define tref_isnil(tr) (tref_istype((tr), IRT_NIL)) 482#define tref_isnil(tr) (tref_istype((tr), IRT_NIL))
465#define tref_isfalse(tr) (tref_istype((tr), IRT_FALSE)) 483#define tref_isfalse(tr) (tref_istype((tr), IRT_FALSE))
466#define tref_istrue(tr) (tref_istype((tr), IRT_TRUE)) 484#define tref_istrue(tr) (tref_istype((tr), IRT_TRUE))
485#define tref_islightud(tr) (tref_istype((tr), IRT_LIGHTUD))
467#define tref_isstr(tr) (tref_istype((tr), IRT_STR)) 486#define tref_isstr(tr) (tref_istype((tr), IRT_STR))
468#define tref_isfunc(tr) (tref_istype((tr), IRT_FUNC)) 487#define tref_isfunc(tr) (tref_istype((tr), IRT_FUNC))
469#define tref_iscdata(tr) (tref_istype((tr), IRT_CDATA)) 488#define tref_iscdata(tr) (tref_istype((tr), IRT_CDATA))
diff --git a/src/lj_ircall.h b/src/lj_ircall.h
index af30ba8f..27c7fbe5 100644
--- a/src/lj_ircall.h
+++ b/src/lj_ircall.h
@@ -16,7 +16,7 @@ typedef struct CCallInfo {
16 uint32_t flags; /* Number of arguments and flags. */ 16 uint32_t flags; /* Number of arguments and flags. */
17} CCallInfo; 17} CCallInfo;
18 18
19#define CCI_NARGS(ci) ((ci)->flags & 0xff) /* Extract # of args. */ 19#define CCI_NARGS(ci) ((ci)->flags & 0xff) /* # of args. */
20#define CCI_NARGS_MAX 32 /* Max. # of args. */ 20#define CCI_NARGS_MAX 32 /* Max. # of args. */
21 21
22#define CCI_OTSHIFT 16 22#define CCI_OTSHIFT 16
@@ -25,6 +25,7 @@ typedef struct CCallInfo {
25#define CCI_OP(ci) ((ci)->flags >> CCI_OPSHIFT) /* Get op. */ 25#define CCI_OP(ci) ((ci)->flags >> CCI_OPSHIFT) /* Get op. */
26 26
27#define CCI_CALL_N (IR_CALLN << CCI_OPSHIFT) 27#define CCI_CALL_N (IR_CALLN << CCI_OPSHIFT)
28#define CCI_CALL_A (IR_CALLA << CCI_OPSHIFT)
28#define CCI_CALL_L (IR_CALLL << CCI_OPSHIFT) 29#define CCI_CALL_L (IR_CALLL << CCI_OPSHIFT)
29#define CCI_CALL_S (IR_CALLS << CCI_OPSHIFT) 30#define CCI_CALL_S (IR_CALLS << CCI_OPSHIFT)
30#define CCI_CALL_FN (CCI_CALL_N|CCI_CC_FASTCALL) 31#define CCI_CALL_FN (CCI_CALL_N|CCI_CC_FASTCALL)
@@ -45,6 +46,17 @@ typedef struct CCallInfo {
45#define CCI_CC_FASTCALL 0x2000 /* Fastcall calling convention. */ 46#define CCI_CC_FASTCALL 0x2000 /* Fastcall calling convention. */
46#define CCI_CC_STDCALL 0x3000 /* Stdcall calling convention. */ 47#define CCI_CC_STDCALL 0x3000 /* Stdcall calling convention. */
47 48
49/* Extra args for SOFTFP, SPLIT 64 bit. */
50#define CCI_XARGS_SHIFT 14
51#define CCI_XARGS(ci) (((ci)->flags >> CCI_XARGS_SHIFT) & 3)
52#define CCI_XA (1u << CCI_XARGS_SHIFT)
53
54#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
55#define CCI_XNARGS(ci) (CCI_NARGS((ci)) + CCI_XARGS((ci)))
56#else
57#define CCI_XNARGS(ci) CCI_NARGS((ci))
58#endif
59
48/* Helpers for conditional function definitions. */ 60/* Helpers for conditional function definitions. */
49#define IRCALLCOND_ANY(x) x 61#define IRCALLCOND_ANY(x) x
50 62
@@ -87,26 +99,52 @@ typedef struct CCallInfo {
87#endif 99#endif
88 100
89#if LJ_SOFTFP 101#if LJ_SOFTFP
90#define ARG1_FP 2 /* Treat as 2 32 bit arguments. */ 102#define XA_FP CCI_XA
103#define XA2_FP (CCI_XA+CCI_XA)
91#else 104#else
92#define ARG1_FP 1 105#define XA_FP 0
106#define XA2_FP 0
93#endif 107#endif
94 108
95#if LJ_32 109#if LJ_32
96#define ARG2_64 4 /* Treat as 4 32 bit arguments. */ 110#define XA_64 CCI_XA
111#define XA2_64 (CCI_XA+CCI_XA)
97#else 112#else
98#define ARG2_64 2 113#define XA_64 0
114#define XA2_64 0
99#endif 115#endif
100 116
101/* Function definitions for CALL* instructions. */ 117/* Function definitions for CALL* instructions. */
102#define IRCALLDEF(_) \ 118#define IRCALLDEF(_) \
103 _(ANY, lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \ 119 _(ANY, lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \
120 _(ANY, lj_str_find, 4, N, P32, 0) \
104 _(ANY, lj_str_new, 3, S, STR, CCI_L) \ 121 _(ANY, lj_str_new, 3, S, STR, CCI_L) \
105 _(ANY, lj_strscan_num, 2, FN, INT, 0) \ 122 _(ANY, lj_strscan_num, 2, FN, INT, 0) \
106 _(ANY, lj_str_fromint, 2, FN, STR, CCI_L) \ 123 _(ANY, lj_strfmt_int, 2, FN, STR, CCI_L) \
107 _(ANY, lj_str_fromnum, 2, FN, STR, CCI_L) \ 124 _(ANY, lj_strfmt_num, 2, FN, STR, CCI_L) \
125 _(ANY, lj_strfmt_char, 2, FN, STR, CCI_L) \
126 _(ANY, lj_strfmt_putint, 2, FL, P32, 0) \
127 _(ANY, lj_strfmt_putnum, 2, FL, P32, 0) \
128 _(ANY, lj_strfmt_putquoted, 2, FL, P32, 0) \
129 _(ANY, lj_strfmt_putfxint, 3, L, P32, XA_64) \
130 _(ANY, lj_strfmt_putfnum_int, 3, L, P32, XA_FP) \
131 _(ANY, lj_strfmt_putfnum_uint, 3, L, P32, XA_FP) \
132 _(ANY, lj_strfmt_putfnum, 3, L, P32, XA_FP) \
133 _(ANY, lj_strfmt_putfstr, 3, L, P32, 0) \
134 _(ANY, lj_strfmt_putfchar, 3, L, P32, 0) \
135 _(ANY, lj_buf_putmem, 3, S, P32, 0) \
136 _(ANY, lj_buf_putstr, 2, FL, P32, 0) \
137 _(ANY, lj_buf_putchar, 2, FL, P32, 0) \
138 _(ANY, lj_buf_putstr_reverse, 2, FL, P32, 0) \
139 _(ANY, lj_buf_putstr_lower, 2, FL, P32, 0) \
140 _(ANY, lj_buf_putstr_upper, 2, FL, P32, 0) \
141 _(ANY, lj_buf_putstr_rep, 3, L, P32, 0) \
142 _(ANY, lj_buf_puttab, 5, L, P32, 0) \
143 _(ANY, lj_buf_tostr, 1, FL, STR, 0) \
144 _(ANY, lj_tab_new_ah, 3, A, TAB, CCI_L) \
108 _(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \ 145 _(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \
109 _(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \ 146 _(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \
147 _(ANY, lj_tab_clear, 1, FS, NIL, 0) \
110 _(ANY, lj_tab_newkey, 3, S, P32, CCI_L) \ 148 _(ANY, lj_tab_newkey, 3, S, P32, CCI_L) \
111 _(ANY, lj_tab_len, 1, FL, INT, 0) \ 149 _(ANY, lj_tab_len, 1, FL, INT, 0) \
112 _(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \ 150 _(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \
@@ -114,29 +152,29 @@ typedef struct CCallInfo {
114 _(ANY, lj_mem_newgco, 2, FS, P32, CCI_L) \ 152 _(ANY, lj_mem_newgco, 2, FS, P32, CCI_L) \
115 _(ANY, lj_math_random_step, 1, FS, NUM, CCI_CASTU64|CCI_NOFPRCLOBBER) \ 153 _(ANY, lj_math_random_step, 1, FS, NUM, CCI_CASTU64|CCI_NOFPRCLOBBER) \
116 _(ANY, lj_vm_modi, 2, FN, INT, 0) \ 154 _(ANY, lj_vm_modi, 2, FN, INT, 0) \
117 _(ANY, sinh, ARG1_FP, N, NUM, 0) \ 155 _(ANY, sinh, 1, N, NUM, XA_FP) \
118 _(ANY, cosh, ARG1_FP, N, NUM, 0) \ 156 _(ANY, cosh, 1, N, NUM, XA_FP) \
119 _(ANY, tanh, ARG1_FP, N, NUM, 0) \ 157 _(ANY, tanh, 1, N, NUM, XA_FP) \
120 _(ANY, fputc, 2, S, INT, 0) \ 158 _(ANY, fputc, 2, S, INT, 0) \
121 _(ANY, fwrite, 4, S, INT, 0) \ 159 _(ANY, fwrite, 4, S, INT, 0) \
122 _(ANY, fflush, 1, S, INT, 0) \ 160 _(ANY, fflush, 1, S, INT, 0) \
123 /* ORDER FPM */ \ 161 /* ORDER FPM */ \
124 _(FPMATH, lj_vm_floor, ARG1_FP, N, NUM, 0) \ 162 _(FPMATH, lj_vm_floor, 1, N, NUM, XA_FP) \
125 _(FPMATH, lj_vm_ceil, ARG1_FP, N, NUM, 0) \ 163 _(FPMATH, lj_vm_ceil, 1, N, NUM, XA_FP) \
126 _(FPMATH, lj_vm_trunc, ARG1_FP, N, NUM, 0) \ 164 _(FPMATH, lj_vm_trunc, 1, N, NUM, XA_FP) \
127 _(FPMATH, sqrt, ARG1_FP, N, NUM, 0) \ 165 _(FPMATH, sqrt, 1, N, NUM, XA_FP) \
128 _(FPMATH, exp, ARG1_FP, N, NUM, 0) \ 166 _(FPMATH, exp, 1, N, NUM, XA_FP) \
129 _(FPMATH, lj_vm_exp2, ARG1_FP, N, NUM, 0) \ 167 _(FPMATH, lj_vm_exp2, 1, N, NUM, XA_FP) \
130 _(FPMATH, log, ARG1_FP, N, NUM, 0) \ 168 _(FPMATH, log, 1, N, NUM, XA_FP) \
131 _(FPMATH, lj_vm_log2, ARG1_FP, N, NUM, 0) \ 169 _(FPMATH, lj_vm_log2, 1, N, NUM, XA_FP) \
132 _(FPMATH, log10, ARG1_FP, N, NUM, 0) \ 170 _(FPMATH, log10, 1, N, NUM, XA_FP) \
133 _(FPMATH, sin, ARG1_FP, N, NUM, 0) \ 171 _(FPMATH, sin, 1, N, NUM, XA_FP) \
134 _(FPMATH, cos, ARG1_FP, N, NUM, 0) \ 172 _(FPMATH, cos, 1, N, NUM, XA_FP) \
135 _(FPMATH, tan, ARG1_FP, N, NUM, 0) \ 173 _(FPMATH, tan, 1, N, NUM, XA_FP) \
136 _(FPMATH, lj_vm_powi, ARG1_FP+1, N, NUM, 0) \ 174 _(FPMATH, lj_vm_powi, 2, N, NUM, XA_FP) \
137 _(FPMATH, pow, ARG1_FP*2, N, NUM, 0) \ 175 _(FPMATH, pow, 2, N, NUM, XA2_FP) \
138 _(FPMATH, atan2, ARG1_FP*2, N, NUM, 0) \ 176 _(FPMATH, atan2, 2, N, NUM, XA2_FP) \
139 _(FPMATH, ldexp, ARG1_FP+1, N, NUM, 0) \ 177 _(FPMATH, ldexp, 2, N, NUM, XA_FP) \
140 _(SOFTFP, lj_vm_tobit, 2, N, INT, 0) \ 178 _(SOFTFP, lj_vm_tobit, 2, N, INT, 0) \
141 _(SOFTFP, softfp_add, 4, N, NUM, 0) \ 179 _(SOFTFP, softfp_add, 4, N, NUM, 0) \
142 _(SOFTFP, softfp_sub, 4, N, NUM, 0) \ 180 _(SOFTFP, softfp_sub, 4, N, NUM, 0) \
@@ -153,26 +191,32 @@ typedef struct CCallInfo {
153 _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \ 191 _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \
154 _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \ 192 _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \
155 _(SOFTFP_FFI, softfp_f2ui, 1, N, INT, 0) \ 193 _(SOFTFP_FFI, softfp_f2ui, 1, N, INT, 0) \
156 _(FP64_FFI, fp64_l2d, 2, N, NUM, 0) \ 194 _(FP64_FFI, fp64_l2d, 1, N, NUM, XA_64) \
157 _(FP64_FFI, fp64_ul2d, 2, N, NUM, 0) \ 195 _(FP64_FFI, fp64_ul2d, 1, N, NUM, XA_64) \
158 _(FP64_FFI, fp64_l2f, 2, N, FLOAT, 0) \ 196 _(FP64_FFI, fp64_l2f, 1, N, FLOAT, XA_64) \
159 _(FP64_FFI, fp64_ul2f, 2, N, FLOAT, 0) \ 197 _(FP64_FFI, fp64_ul2f, 1, N, FLOAT, XA_64) \
160 _(FP64_FFI, fp64_d2l, ARG1_FP, N, I64, 0) \ 198 _(FP64_FFI, fp64_d2l, 1, N, I64, XA_FP) \
161 _(FP64_FFI, fp64_d2ul, ARG1_FP, N, U64, 0) \ 199 _(FP64_FFI, fp64_d2ul, 1, N, U64, XA_FP) \
162 _(FP64_FFI, fp64_f2l, 1, N, I64, 0) \ 200 _(FP64_FFI, fp64_f2l, 1, N, I64, 0) \
163 _(FP64_FFI, fp64_f2ul, 1, N, U64, 0) \ 201 _(FP64_FFI, fp64_f2ul, 1, N, U64, 0) \
164 _(FFI, lj_carith_divi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \ 202 _(FFI, lj_carith_divi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
165 _(FFI, lj_carith_divu64, ARG2_64, N, U64, CCI_NOFPRCLOBBER) \ 203 _(FFI, lj_carith_divu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \
166 _(FFI, lj_carith_modi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \ 204 _(FFI, lj_carith_modi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
167 _(FFI, lj_carith_modu64, ARG2_64, N, U64, CCI_NOFPRCLOBBER) \ 205 _(FFI, lj_carith_modu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \
168 _(FFI, lj_carith_powi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \ 206 _(FFI, lj_carith_powi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
169 _(FFI, lj_carith_powu64, ARG2_64, N, U64, CCI_NOFPRCLOBBER) \ 207 _(FFI, lj_carith_powu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \
170 _(FFI, lj_cdata_setfin, 2, FN, P32, CCI_L) \ 208 _(FFI, lj_cdata_newv, 4, S, CDATA, CCI_L) \
171 _(FFI, strlen, 1, L, INTP, 0) \ 209 _(FFI, lj_cdata_setfin, 4, S, NIL, CCI_L) \
172 _(FFI, memcpy, 3, S, PTR, 0) \ 210 _(FFI, strlen, 1, L, INTP, 0) \
173 _(FFI, memset, 3, S, PTR, 0) \ 211 _(FFI, memcpy, 3, S, PTR, 0) \
174 _(FFI, lj_vm_errno, 0, S, INT, CCI_NOFPRCLOBBER) \ 212 _(FFI, memset, 3, S, PTR, 0) \
175 _(FFI32, lj_carith_mul64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) 213 _(FFI, lj_vm_errno, 0, S, INT, CCI_NOFPRCLOBBER) \
214 _(FFI32, lj_carith_mul64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
215 _(FFI32, lj_carith_shl64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
216 _(FFI32, lj_carith_shr64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
217 _(FFI32, lj_carith_sar64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
218 _(FFI32, lj_carith_rol64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
219 _(FFI32, lj_carith_ror64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
176 \ 220 \
177 /* End of list. */ 221 /* End of list. */
178 222
diff --git a/src/lj_jit.h b/src/lj_jit.h
index ecd79de5..4246e9db 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -14,18 +14,15 @@
14 14
15/* CPU-specific JIT engine flags. */ 15/* CPU-specific JIT engine flags. */
16#if LJ_TARGET_X86ORX64 16#if LJ_TARGET_X86ORX64
17#define JIT_F_CMOV 0x00000010 17#define JIT_F_SSE2 0x00000010
18#define JIT_F_SSE2 0x00000020 18#define JIT_F_SSE3 0x00000020
19#define JIT_F_SSE3 0x00000040 19#define JIT_F_SSE4_1 0x00000040
20#define JIT_F_SSE4_1 0x00000080 20#define JIT_F_PREFER_IMUL 0x00000080
21#define JIT_F_P4 0x00000100 21#define JIT_F_LEA_AGU 0x00000100
22#define JIT_F_PREFER_IMUL 0x00000200
23#define JIT_F_SPLIT_XMM 0x00000400
24#define JIT_F_LEA_AGU 0x00000800
25 22
26/* Names for the CPU-specific flags. Must match the order above. */ 23/* Names for the CPU-specific flags. Must match the order above. */
27#define JIT_F_CPU_FIRST JIT_F_CMOV 24#define JIT_F_CPU_FIRST JIT_F_SSE2
28#define JIT_F_CPUSTRING "\4CMOV\4SSE2\4SSE3\6SSE4.1\2P4\3AMD\2K8\4ATOM" 25#define JIT_F_CPUSTRING "\4SSE2\4SSE3\6SSE4.1\3AMD\4ATOM"
29#elif LJ_TARGET_ARM 26#elif LJ_TARGET_ARM
30#define JIT_F_ARMV6_ 0x00000010 27#define JIT_F_ARMV6_ 0x00000010
31#define JIT_F_ARMV6T2_ 0x00000020 28#define JIT_F_ARMV6T2_ 0x00000020
@@ -100,6 +97,7 @@
100 _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \ 97 _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \
101 _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \ 98 _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \
102 _(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \ 99 _(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \
100 _(\011, minstitch, 0) /* Min. # of IR ins for a stitched trace. */ \
103 \ 101 \
104 _(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \ 102 _(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \
105 _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \ 103 _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \
@@ -205,7 +203,8 @@ typedef enum {
205 LJ_TRLINK_UPREC, /* Up-recursion. */ 203 LJ_TRLINK_UPREC, /* Up-recursion. */
206 LJ_TRLINK_DOWNREC, /* Down-recursion. */ 204 LJ_TRLINK_DOWNREC, /* Down-recursion. */
207 LJ_TRLINK_INTERP, /* Fallback to interpreter. */ 205 LJ_TRLINK_INTERP, /* Fallback to interpreter. */
208 LJ_TRLINK_RETURN /* Return to interpreter. */ 206 LJ_TRLINK_RETURN, /* Return to interpreter. */
207 LJ_TRLINK_STITCH /* Trace stitching. */
209} TraceLink; 208} TraceLink;
210 209
211/* Trace object. */ 210/* Trace object. */
@@ -400,6 +399,12 @@ typedef struct jit_State {
400 size_t szallmcarea; /* Total size of all allocated mcode areas. */ 399 size_t szallmcarea; /* Total size of all allocated mcode areas. */
401 400
402 TValue errinfo; /* Additional info element for trace errors. */ 401 TValue errinfo; /* Additional info element for trace errors. */
402
403#if LJ_HASPROFILE
404 GCproto *prev_pt; /* Previous prototype. */
405 BCLine prev_line; /* Previous line. */
406 int prof_mode; /* Profiling mode: 0, 'f', 'l'. */
407#endif
403} 408}
404#if LJ_TARGET_ARM 409#if LJ_TARGET_ARM
405LJ_ALIGN(16) /* For DISPATCH-relative addresses in assembler part. */ 410LJ_ALIGN(16) /* For DISPATCH-relative addresses in assembler part. */
diff --git a/src/lj_lex.c b/src/lj_lex.c
index 4fa39313..49e1e88e 100644
--- a/src/lj_lex.c
+++ b/src/lj_lex.c
@@ -12,6 +12,7 @@
12#include "lj_obj.h" 12#include "lj_obj.h"
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_buf.h"
15#include "lj_str.h" 16#include "lj_str.h"
16#if LJ_HASFFI 17#if LJ_HASFFI
17#include "lj_tab.h" 18#include "lj_tab.h"
@@ -24,6 +25,7 @@
24#include "lj_parse.h" 25#include "lj_parse.h"
25#include "lj_char.h" 26#include "lj_char.h"
26#include "lj_strscan.h" 27#include "lj_strscan.h"
28#include "lj_strfmt.h"
27 29
28/* Lua lexer token names. */ 30/* Lua lexer token names. */
29static const char *const tokennames[] = { 31static const char *const tokennames[] = {
@@ -37,50 +39,48 @@ TKDEF(TKSTR1, TKSTR2)
37 39
38/* -- Buffer handling ----------------------------------------------------- */ 40/* -- Buffer handling ----------------------------------------------------- */
39 41
40#define char2int(c) ((int)(uint8_t)(c)) 42#define LEX_EOF (-1)
41#define next(ls) \ 43#define lex_iseol(ls) (ls->c == '\n' || ls->c == '\r')
42 (ls->current = (ls->n--) > 0 ? char2int(*ls->p++) : fillbuf(ls))
43#define save_and_next(ls) (save(ls, ls->current), next(ls))
44#define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r')
45#define END_OF_STREAM (-1)
46 44
47static int fillbuf(LexState *ls) 45/* Get more input from reader. */
46static LJ_NOINLINE LexChar lex_more(LexState *ls)
48{ 47{
49 size_t sz; 48 size_t sz;
50 const char *buf = ls->rfunc(ls->L, ls->rdata, &sz); 49 const char *p = ls->rfunc(ls->L, ls->rdata, &sz);
51 if (buf == NULL || sz == 0) return END_OF_STREAM; 50 if (p == NULL || sz == 0) return LEX_EOF;
52 ls->n = (MSize)sz - 1; 51 ls->pe = p + sz;
53 ls->p = buf; 52 ls->p = p + 1;
54 return char2int(*(ls->p++)); 53 return (LexChar)(uint8_t)p[0];
55} 54}
56 55
57static LJ_NOINLINE void save_grow(LexState *ls, int c) 56/* Get next character. */
57static LJ_AINLINE LexChar lex_next(LexState *ls)
58{ 58{
59 MSize newsize; 59 return (ls->c = ls->p < ls->pe ? (LexChar)(uint8_t)*ls->p++ : lex_more(ls));
60 if (ls->sb.sz >= LJ_MAX_STR/2)
61 lj_lex_error(ls, 0, LJ_ERR_XELEM);
62 newsize = ls->sb.sz * 2;
63 lj_str_resizebuf(ls->L, &ls->sb, newsize);
64 ls->sb.buf[ls->sb.n++] = (char)c;
65} 60}
66 61
67static LJ_AINLINE void save(LexState *ls, int c) 62/* Save character. */
63static LJ_AINLINE void lex_save(LexState *ls, LexChar c)
68{ 64{
69 if (LJ_UNLIKELY(ls->sb.n + 1 > ls->sb.sz)) 65 lj_buf_putb(&ls->sb, c);
70 save_grow(ls, c); 66}
71 else 67
72 ls->sb.buf[ls->sb.n++] = (char)c; 68/* Save previous character and get next character. */
69static LJ_AINLINE LexChar lex_savenext(LexState *ls)
70{
71 lex_save(ls, ls->c);
72 return lex_next(ls);
73} 73}
74 74
75static void inclinenumber(LexState *ls) 75/* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */
76static void lex_newline(LexState *ls)
76{ 77{
77 int old = ls->current; 78 LexChar old = ls->c;
78 lua_assert(currIsNewline(ls)); 79 lua_assert(lex_iseol(ls));
79 next(ls); /* skip `\n' or `\r' */ 80 lex_next(ls); /* Skip "\n" or "\r". */
80 if (currIsNewline(ls) && ls->current != old) 81 if (lex_iseol(ls) && ls->c != old) lex_next(ls); /* Skip "\n\r" or "\r\n". */
81 next(ls); /* skip `\n\r' or `\r\n' */
82 if (++ls->linenumber >= LJ_MAX_LINE) 82 if (++ls->linenumber >= LJ_MAX_LINE)
83 lj_lex_error(ls, ls->token, LJ_ERR_XLINES); 83 lj_lex_error(ls, ls->tok, LJ_ERR_XLINES);
84} 84}
85 85
86/* -- Scanner for terminals ----------------------------------------------- */ 86/* -- Scanner for terminals ----------------------------------------------- */
@@ -89,19 +89,17 @@ static void inclinenumber(LexState *ls)
89static void lex_number(LexState *ls, TValue *tv) 89static void lex_number(LexState *ls, TValue *tv)
90{ 90{
91 StrScanFmt fmt; 91 StrScanFmt fmt;
92 int c, xp = 'e'; 92 LexChar c, xp = 'e';
93 lua_assert(lj_char_isdigit(ls->current)); 93 lua_assert(lj_char_isdigit(ls->c));
94 if ((c = ls->current) == '0') { 94 if ((c = ls->c) == '0' && (lex_savenext(ls) | 0x20) == 'x')
95 save_and_next(ls); 95 xp = 'p';
96 if ((ls->current | 0x20) == 'x') xp = 'p'; 96 while (lj_char_isident(ls->c) || ls->c == '.' ||
97 } 97 ((ls->c == '-' || ls->c == '+') && (c | 0x20) == xp)) {
98 while (lj_char_isident(ls->current) || ls->current == '.' || 98 c = ls->c;
99 ((ls->current == '-' || ls->current == '+') && (c | 0x20) == xp)) { 99 lex_savenext(ls);
100 c = ls->current;
101 save_and_next(ls);
102 } 100 }
103 save(ls, '\0'); 101 lex_save(ls, '\0');
104 fmt = lj_strscan_scan((const uint8_t *)ls->sb.buf, tv, 102 fmt = lj_strscan_scan((const uint8_t *)sbufB(&ls->sb), tv,
105 (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) | 103 (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) |
106 (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0)); 104 (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0));
107 if (LJ_DUALNUM && fmt == STRSCAN_INT) { 105 if (LJ_DUALNUM && fmt == STRSCAN_INT) {
@@ -134,60 +132,60 @@ static void lex_number(LexState *ls, TValue *tv)
134 } 132 }
135} 133}
136 134
137static int skip_sep(LexState *ls) 135/* Skip equal signs for "[=...=[" and "]=...=]" and return their count. */
136static int lex_skipeq(LexState *ls)
138{ 137{
139 int count = 0; 138 int count = 0;
140 int s = ls->current; 139 LexChar s = ls->c;
141 lua_assert(s == '[' || s == ']'); 140 lua_assert(s == '[' || s == ']');
142 save_and_next(ls); 141 while (lex_savenext(ls) == '=')
143 while (ls->current == '=') {
144 save_and_next(ls);
145 count++; 142 count++;
146 } 143 return (ls->c == s) ? count : (-count) - 1;
147 return (ls->current == s) ? count : (-count) - 1;
148} 144}
149 145
150static void read_long_string(LexState *ls, TValue *tv, int sep) 146/* Parse a long string or long comment (tv set to NULL). */
147static void lex_longstring(LexState *ls, TValue *tv, int sep)
151{ 148{
152 save_and_next(ls); /* skip 2nd `[' */ 149 lex_savenext(ls); /* Skip second '['. */
153 if (currIsNewline(ls)) /* string starts with a newline? */ 150 if (lex_iseol(ls)) /* Skip initial newline. */
154 inclinenumber(ls); /* skip it */ 151 lex_newline(ls);
155 for (;;) { 152 for (;;) {
156 switch (ls->current) { 153 switch (ls->c) {
157 case END_OF_STREAM: 154 case LEX_EOF:
158 lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM); 155 lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM);
159 break; 156 break;
160 case ']': 157 case ']':
161 if (skip_sep(ls) == sep) { 158 if (lex_skipeq(ls) == sep) {
162 save_and_next(ls); /* skip 2nd `]' */ 159 lex_savenext(ls); /* Skip second ']'. */
163 goto endloop; 160 goto endloop;
164 } 161 }
165 break; 162 break;
166 case '\n': 163 case '\n':
167 case '\r': 164 case '\r':
168 save(ls, '\n'); 165 lex_save(ls, '\n');
169 inclinenumber(ls); 166 lex_newline(ls);
170 if (!tv) lj_str_resetbuf(&ls->sb); /* avoid wasting space */ 167 if (!tv) lj_buf_reset(&ls->sb); /* Don't waste space for comments. */
171 break; 168 break;
172 default: 169 default:
173 if (tv) save_and_next(ls); 170 lex_savenext(ls);
174 else next(ls);
175 break; 171 break;
176 } 172 }
177 } endloop: 173 } endloop:
178 if (tv) { 174 if (tv) {
179 GCstr *str = lj_parse_keepstr(ls, ls->sb.buf + (2 + (MSize)sep), 175 GCstr *str = lj_parse_keepstr(ls, sbufB(&ls->sb) + (2 + (MSize)sep),
180 ls->sb.n - 2*(2 + (MSize)sep)); 176 sbuflen(&ls->sb) - 2*(2 + (MSize)sep));
181 setstrV(ls->L, tv, str); 177 setstrV(ls->L, tv, str);
182 } 178 }
183} 179}
184 180
185static void read_string(LexState *ls, int delim, TValue *tv) 181/* Parse a string. */
182static void lex_string(LexState *ls, TValue *tv)
186{ 183{
187 save_and_next(ls); 184 LexChar delim = ls->c; /* Delimiter is '\'' or '"'. */
188 while (ls->current != delim) { 185 lex_savenext(ls);
189 switch (ls->current) { 186 while (ls->c != delim) {
190 case END_OF_STREAM: 187 switch (ls->c) {
188 case LEX_EOF:
191 lj_lex_error(ls, TK_eof, LJ_ERR_XSTR); 189 lj_lex_error(ls, TK_eof, LJ_ERR_XSTR);
192 continue; 190 continue;
193 case '\n': 191 case '\n':
@@ -195,7 +193,7 @@ static void read_string(LexState *ls, int delim, TValue *tv)
195 lj_lex_error(ls, TK_string, LJ_ERR_XSTR); 193 lj_lex_error(ls, TK_string, LJ_ERR_XSTR);
196 continue; 194 continue;
197 case '\\': { 195 case '\\': {
198 int c = next(ls); /* Skip the '\\'. */ 196 LexChar c = lex_next(ls); /* Skip the '\\'. */
199 switch (c) { 197 switch (c) {
200 case 'a': c = '\a'; break; 198 case 'a': c = '\a'; break;
201 case 'b': c = '\b'; break; 199 case 'b': c = '\b'; break;
@@ -205,111 +203,112 @@ static void read_string(LexState *ls, int delim, TValue *tv)
205 case 't': c = '\t'; break; 203 case 't': c = '\t'; break;
206 case 'v': c = '\v'; break; 204 case 'v': c = '\v'; break;
207 case 'x': /* Hexadecimal escape '\xXX'. */ 205 case 'x': /* Hexadecimal escape '\xXX'. */
208 c = (next(ls) & 15u) << 4; 206 c = (lex_next(ls) & 15u) << 4;
209 if (!lj_char_isdigit(ls->current)) { 207 if (!lj_char_isdigit(ls->c)) {
210 if (!lj_char_isxdigit(ls->current)) goto err_xesc; 208 if (!lj_char_isxdigit(ls->c)) goto err_xesc;
211 c += 9 << 4; 209 c += 9 << 4;
212 } 210 }
213 c += (next(ls) & 15u); 211 c += (lex_next(ls) & 15u);
214 if (!lj_char_isdigit(ls->current)) { 212 if (!lj_char_isdigit(ls->c)) {
215 if (!lj_char_isxdigit(ls->current)) goto err_xesc; 213 if (!lj_char_isxdigit(ls->c)) goto err_xesc;
216 c += 9; 214 c += 9;
217 } 215 }
218 break; 216 break;
219 case 'z': /* Skip whitespace. */ 217 case 'z': /* Skip whitespace. */
220 next(ls); 218 lex_next(ls);
221 while (lj_char_isspace(ls->current)) 219 while (lj_char_isspace(ls->c))
222 if (currIsNewline(ls)) inclinenumber(ls); else next(ls); 220 if (lex_iseol(ls)) lex_newline(ls); else lex_next(ls);
223 continue; 221 continue;
224 case '\n': case '\r': save(ls, '\n'); inclinenumber(ls); continue; 222 case '\n': case '\r': lex_save(ls, '\n'); lex_newline(ls); continue;
225 case '\\': case '\"': case '\'': break; 223 case '\\': case '\"': case '\'': break;
226 case END_OF_STREAM: continue; 224 case LEX_EOF: continue;
227 default: 225 default:
228 if (!lj_char_isdigit(c)) 226 if (!lj_char_isdigit(c))
229 goto err_xesc; 227 goto err_xesc;
230 c -= '0'; /* Decimal escape '\ddd'. */ 228 c -= '0'; /* Decimal escape '\ddd'. */
231 if (lj_char_isdigit(next(ls))) { 229 if (lj_char_isdigit(lex_next(ls))) {
232 c = c*10 + (ls->current - '0'); 230 c = c*10 + (ls->c - '0');
233 if (lj_char_isdigit(next(ls))) { 231 if (lj_char_isdigit(lex_next(ls))) {
234 c = c*10 + (ls->current - '0'); 232 c = c*10 + (ls->c - '0');
235 if (c > 255) { 233 if (c > 255) {
236 err_xesc: 234 err_xesc:
237 lj_lex_error(ls, TK_string, LJ_ERR_XESC); 235 lj_lex_error(ls, TK_string, LJ_ERR_XESC);
238 } 236 }
239 next(ls); 237 lex_next(ls);
240 } 238 }
241 } 239 }
242 save(ls, c); 240 lex_save(ls, c);
243 continue; 241 continue;
244 } 242 }
245 save(ls, c); 243 lex_save(ls, c);
246 next(ls); 244 lex_next(ls);
247 continue; 245 continue;
248 } 246 }
249 default: 247 default:
250 save_and_next(ls); 248 lex_savenext(ls);
251 break; 249 break;
252 } 250 }
253 } 251 }
254 save_and_next(ls); /* skip delimiter */ 252 lex_savenext(ls); /* Skip trailing delimiter. */
255 setstrV(ls->L, tv, lj_parse_keepstr(ls, ls->sb.buf + 1, ls->sb.n - 2)); 253 setstrV(ls->L, tv,
254 lj_parse_keepstr(ls, sbufB(&ls->sb)+1, sbuflen(&ls->sb)-2));
256} 255}
257 256
258/* -- Main lexical scanner ------------------------------------------------ */ 257/* -- Main lexical scanner ------------------------------------------------ */
259 258
260static int llex(LexState *ls, TValue *tv) 259/* Get next lexical token. */
260static LexToken lex_scan(LexState *ls, TValue *tv)
261{ 261{
262 lj_str_resetbuf(&ls->sb); 262 lj_buf_reset(&ls->sb);
263 for (;;) { 263 for (;;) {
264 if (lj_char_isident(ls->current)) { 264 if (lj_char_isident(ls->c)) {
265 GCstr *s; 265 GCstr *s;
266 if (lj_char_isdigit(ls->current)) { /* Numeric literal. */ 266 if (lj_char_isdigit(ls->c)) { /* Numeric literal. */
267 lex_number(ls, tv); 267 lex_number(ls, tv);
268 return TK_number; 268 return TK_number;
269 } 269 }
270 /* Identifier or reserved word. */ 270 /* Identifier or reserved word. */
271 do { 271 do {
272 save_and_next(ls); 272 lex_savenext(ls);
273 } while (lj_char_isident(ls->current)); 273 } while (lj_char_isident(ls->c));
274 s = lj_parse_keepstr(ls, ls->sb.buf, ls->sb.n); 274 s = lj_parse_keepstr(ls, sbufB(&ls->sb), sbuflen(&ls->sb));
275 setstrV(ls->L, tv, s); 275 setstrV(ls->L, tv, s);
276 if (s->reserved > 0) /* Reserved word? */ 276 if (s->reserved > 0) /* Reserved word? */
277 return TK_OFS + s->reserved; 277 return TK_OFS + s->reserved;
278 return TK_name; 278 return TK_name;
279 } 279 }
280 switch (ls->current) { 280 switch (ls->c) {
281 case '\n': 281 case '\n':
282 case '\r': 282 case '\r':
283 inclinenumber(ls); 283 lex_newline(ls);
284 continue; 284 continue;
285 case ' ': 285 case ' ':
286 case '\t': 286 case '\t':
287 case '\v': 287 case '\v':
288 case '\f': 288 case '\f':
289 next(ls); 289 lex_next(ls);
290 continue; 290 continue;
291 case '-': 291 case '-':
292 next(ls); 292 lex_next(ls);
293 if (ls->current != '-') return '-'; 293 if (ls->c != '-') return '-';
294 /* else is a comment */ 294 lex_next(ls);
295 next(ls); 295 if (ls->c == '[') { /* Long comment "--[=*[...]=*]". */
296 if (ls->current == '[') { 296 int sep = lex_skipeq(ls);
297 int sep = skip_sep(ls); 297 lj_buf_reset(&ls->sb); /* `lex_skipeq' may dirty the buffer */
298 lj_str_resetbuf(&ls->sb); /* `skip_sep' may dirty the buffer */
299 if (sep >= 0) { 298 if (sep >= 0) {
300 read_long_string(ls, NULL, sep); /* long comment */ 299 lex_longstring(ls, NULL, sep);
301 lj_str_resetbuf(&ls->sb); 300 lj_buf_reset(&ls->sb);
302 continue; 301 continue;
303 } 302 }
304 } 303 }
305 /* else short comment */ 304 /* Short comment "--.*\n". */
306 while (!currIsNewline(ls) && ls->current != END_OF_STREAM) 305 while (!lex_iseol(ls) && ls->c != LEX_EOF)
307 next(ls); 306 lex_next(ls);
308 continue; 307 continue;
309 case '[': { 308 case '[': {
310 int sep = skip_sep(ls); 309 int sep = lex_skipeq(ls);
311 if (sep >= 0) { 310 if (sep >= 0) {
312 read_long_string(ls, tv, sep); 311 lex_longstring(ls, tv, sep);
313 return TK_string; 312 return TK_string;
314 } else if (sep == -1) { 313 } else if (sep == -1) {
315 return '['; 314 return '[';
@@ -319,44 +318,43 @@ static int llex(LexState *ls, TValue *tv)
319 } 318 }
320 } 319 }
321 case '=': 320 case '=':
322 next(ls); 321 lex_next(ls);
323 if (ls->current != '=') return '='; else { next(ls); return TK_eq; } 322 if (ls->c != '=') return '='; else { lex_next(ls); return TK_eq; }
324 case '<': 323 case '<':
325 next(ls); 324 lex_next(ls);
326 if (ls->current != '=') return '<'; else { next(ls); return TK_le; } 325 if (ls->c != '=') return '<'; else { lex_next(ls); return TK_le; }
327 case '>': 326 case '>':
328 next(ls); 327 lex_next(ls);
329 if (ls->current != '=') return '>'; else { next(ls); return TK_ge; } 328 if (ls->c != '=') return '>'; else { lex_next(ls); return TK_ge; }
330 case '~': 329 case '~':
331 next(ls); 330 lex_next(ls);
332 if (ls->current != '=') return '~'; else { next(ls); return TK_ne; } 331 if (ls->c != '=') return '~'; else { lex_next(ls); return TK_ne; }
333 case ':': 332 case ':':
334 next(ls); 333 lex_next(ls);
335 if (ls->current != ':') return ':'; else { next(ls); return TK_label; } 334 if (ls->c != ':') return ':'; else { lex_next(ls); return TK_label; }
336 case '"': 335 case '"':
337 case '\'': 336 case '\'':
338 read_string(ls, ls->current, tv); 337 lex_string(ls, tv);
339 return TK_string; 338 return TK_string;
340 case '.': 339 case '.':
341 save_and_next(ls); 340 if (lex_savenext(ls) == '.') {
342 if (ls->current == '.') { 341 lex_next(ls);
343 next(ls); 342 if (ls->c == '.') {
344 if (ls->current == '.') { 343 lex_next(ls);
345 next(ls);
346 return TK_dots; /* ... */ 344 return TK_dots; /* ... */
347 } 345 }
348 return TK_concat; /* .. */ 346 return TK_concat; /* .. */
349 } else if (!lj_char_isdigit(ls->current)) { 347 } else if (!lj_char_isdigit(ls->c)) {
350 return '.'; 348 return '.';
351 } else { 349 } else {
352 lex_number(ls, tv); 350 lex_number(ls, tv);
353 return TK_number; 351 return TK_number;
354 } 352 }
355 case END_OF_STREAM: 353 case LEX_EOF:
356 return TK_eof; 354 return TK_eof;
357 default: { 355 default: {
358 int c = ls->current; 356 LexChar c = ls->c;
359 next(ls); 357 lex_next(ls);
360 return c; /* Single-char tokens (+ - / ...). */ 358 return c; /* Single-char tokens (+ - / ...). */
361 } 359 }
362 } 360 }
@@ -371,8 +369,7 @@ int lj_lex_setup(lua_State *L, LexState *ls)
371 int header = 0; 369 int header = 0;
372 ls->L = L; 370 ls->L = L;
373 ls->fs = NULL; 371 ls->fs = NULL;
374 ls->n = 0; 372 ls->pe = ls->p = NULL;
375 ls->p = NULL;
376 ls->vstack = NULL; 373 ls->vstack = NULL;
377 ls->sizevstack = 0; 374 ls->sizevstack = 0;
378 ls->vtop = 0; 375 ls->vtop = 0;
@@ -381,24 +378,22 @@ int lj_lex_setup(lua_State *L, LexState *ls)
381 ls->lookahead = TK_eof; /* No look-ahead token. */ 378 ls->lookahead = TK_eof; /* No look-ahead token. */
382 ls->linenumber = 1; 379 ls->linenumber = 1;
383 ls->lastline = 1; 380 ls->lastline = 1;
384 lj_str_resizebuf(ls->L, &ls->sb, LJ_MIN_SBUF); 381 lex_next(ls); /* Read-ahead first char. */
385 next(ls); /* Read-ahead first char. */ 382 if (ls->c == 0xef && ls->p + 2 <= ls->pe && (uint8_t)ls->p[0] == 0xbb &&
386 if (ls->current == 0xef && ls->n >= 2 && char2int(ls->p[0]) == 0xbb && 383 (uint8_t)ls->p[1] == 0xbf) { /* Skip UTF-8 BOM (if buffered). */
387 char2int(ls->p[1]) == 0xbf) { /* Skip UTF-8 BOM (if buffered). */
388 ls->n -= 2;
389 ls->p += 2; 384 ls->p += 2;
390 next(ls); 385 lex_next(ls);
391 header = 1; 386 header = 1;
392 } 387 }
393 if (ls->current == '#') { /* Skip POSIX #! header line. */ 388 if (ls->c == '#') { /* Skip POSIX #! header line. */
394 do { 389 do {
395 next(ls); 390 lex_next(ls);
396 if (ls->current == END_OF_STREAM) return 0; 391 if (ls->c == LEX_EOF) return 0;
397 } while (!currIsNewline(ls)); 392 } while (!lex_iseol(ls));
398 inclinenumber(ls); 393 lex_newline(ls);
399 header = 1; 394 header = 1;
400 } 395 }
401 if (ls->current == LUA_SIGNATURE[0]) { /* Bytecode dump. */ 396 if (ls->c == LUA_SIGNATURE[0]) { /* Bytecode dump. */
402 if (header) { 397 if (header) {
403 /* 398 /*
404 ** Loading bytecode with an extra header is disabled for security 399 ** Loading bytecode with an extra header is disabled for security
@@ -420,55 +415,60 @@ void lj_lex_cleanup(lua_State *L, LexState *ls)
420 global_State *g = G(L); 415 global_State *g = G(L);
421 lj_mem_freevec(g, ls->bcstack, ls->sizebcstack, BCInsLine); 416 lj_mem_freevec(g, ls->bcstack, ls->sizebcstack, BCInsLine);
422 lj_mem_freevec(g, ls->vstack, ls->sizevstack, VarInfo); 417 lj_mem_freevec(g, ls->vstack, ls->sizevstack, VarInfo);
423 lj_str_freebuf(g, &ls->sb); 418 lj_buf_free(g, &ls->sb);
424} 419}
425 420
421/* Return next lexical token. */
426void lj_lex_next(LexState *ls) 422void lj_lex_next(LexState *ls)
427{ 423{
428 ls->lastline = ls->linenumber; 424 ls->lastline = ls->linenumber;
429 if (LJ_LIKELY(ls->lookahead == TK_eof)) { /* No lookahead token? */ 425 if (LJ_LIKELY(ls->lookahead == TK_eof)) { /* No lookahead token? */
430 ls->token = llex(ls, &ls->tokenval); /* Get next token. */ 426 ls->tok = lex_scan(ls, &ls->tokval); /* Get next token. */
431 } else { /* Otherwise return lookahead token. */ 427 } else { /* Otherwise return lookahead token. */
432 ls->token = ls->lookahead; 428 ls->tok = ls->lookahead;
433 ls->lookahead = TK_eof; 429 ls->lookahead = TK_eof;
434 ls->tokenval = ls->lookaheadval; 430 ls->tokval = ls->lookaheadval;
435 } 431 }
436} 432}
437 433
434/* Look ahead for the next token. */
438LexToken lj_lex_lookahead(LexState *ls) 435LexToken lj_lex_lookahead(LexState *ls)
439{ 436{
440 lua_assert(ls->lookahead == TK_eof); 437 lua_assert(ls->lookahead == TK_eof);
441 ls->lookahead = llex(ls, &ls->lookaheadval); 438 ls->lookahead = lex_scan(ls, &ls->lookaheadval);
442 return ls->lookahead; 439 return ls->lookahead;
443} 440}
444 441
445const char *lj_lex_token2str(LexState *ls, LexToken token) 442/* Convert token to string. */
443const char *lj_lex_token2str(LexState *ls, LexToken tok)
446{ 444{
447 if (token > TK_OFS) 445 if (tok > TK_OFS)
448 return tokennames[token-TK_OFS-1]; 446 return tokennames[tok-TK_OFS-1];
449 else if (!lj_char_iscntrl(token)) 447 else if (!lj_char_iscntrl(tok))
450 return lj_str_pushf(ls->L, "%c", token); 448 return lj_strfmt_pushf(ls->L, "%c", tok);
451 else 449 else
452 return lj_str_pushf(ls->L, "char(%d)", token); 450 return lj_strfmt_pushf(ls->L, "char(%d)", tok);
453} 451}
454 452
455void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...) 453/* Lexer error. */
454void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...)
456{ 455{
457 const char *tok; 456 const char *tokstr;
458 va_list argp; 457 va_list argp;
459 if (token == 0) { 458 if (tok == 0) {
460 tok = NULL; 459 tokstr = NULL;
461 } else if (token == TK_name || token == TK_string || token == TK_number) { 460 } else if (tok == TK_name || tok == TK_string || tok == TK_number) {
462 save(ls, '\0'); 461 lex_save(ls, '\0');
463 tok = ls->sb.buf; 462 tokstr = sbufB(&ls->sb);
464 } else { 463 } else {
465 tok = lj_lex_token2str(ls, token); 464 tokstr = lj_lex_token2str(ls, tok);
466 } 465 }
467 va_start(argp, em); 466 va_start(argp, em);
468 lj_err_lex(ls->L, ls->chunkname, tok, ls->linenumber, em, argp); 467 lj_err_lex(ls->L, ls->chunkname, tokstr, ls->linenumber, em, argp);
469 va_end(argp); 468 va_end(argp);
470} 469}
471 470
471/* Initialize strings for reserved words. */
472void lj_lex_init(lua_State *L) 472void lj_lex_init(lua_State *L)
473{ 473{
474 uint32_t i; 474 uint32_t i;
diff --git a/src/lj_lex.h b/src/lj_lex.h
index a7ff29a0..000af12c 100644
--- a/src/lj_lex.h
+++ b/src/lj_lex.h
@@ -30,7 +30,8 @@ TKDEF(TKENUM1, TKENUM2)
30 TK_RESERVED = TK_while - TK_OFS 30 TK_RESERVED = TK_while - TK_OFS
31}; 31};
32 32
33typedef int LexToken; 33typedef int LexChar; /* Lexical character. Unsigned ext. from char. */
34typedef int LexToken; /* Lexical token. */
34 35
35/* Combined bytecode ins/line. Only used during bytecode generation. */ 36/* Combined bytecode ins/line. Only used during bytecode generation. */
36typedef struct BCInsLine { 37typedef struct BCInsLine {
@@ -51,13 +52,13 @@ typedef struct VarInfo {
51typedef struct LexState { 52typedef struct LexState {
52 struct FuncState *fs; /* Current FuncState. Defined in lj_parse.c. */ 53 struct FuncState *fs; /* Current FuncState. Defined in lj_parse.c. */
53 struct lua_State *L; /* Lua state. */ 54 struct lua_State *L; /* Lua state. */
54 TValue tokenval; /* Current token value. */ 55 TValue tokval; /* Current token value. */
55 TValue lookaheadval; /* Lookahead token value. */ 56 TValue lookaheadval; /* Lookahead token value. */
56 int current; /* Current character (charint). */
57 LexToken token; /* Current token. */
58 LexToken lookahead; /* Lookahead token. */
59 MSize n; /* Bytes left in input buffer. */
60 const char *p; /* Current position in input buffer. */ 57 const char *p; /* Current position in input buffer. */
58 const char *pe; /* End of input buffer. */
59 LexChar c; /* Current character. */
60 LexToken tok; /* Current token. */
61 LexToken lookahead; /* Lookahead token. */
61 SBuf sb; /* String buffer for tokens. */ 62 SBuf sb; /* String buffer for tokens. */
62 lua_Reader rfunc; /* Reader callback. */ 63 lua_Reader rfunc; /* Reader callback. */
63 void *rdata; /* Reader callback data. */ 64 void *rdata; /* Reader callback data. */
@@ -78,8 +79,8 @@ LJ_FUNC int lj_lex_setup(lua_State *L, LexState *ls);
78LJ_FUNC void lj_lex_cleanup(lua_State *L, LexState *ls); 79LJ_FUNC void lj_lex_cleanup(lua_State *L, LexState *ls);
79LJ_FUNC void lj_lex_next(LexState *ls); 80LJ_FUNC void lj_lex_next(LexState *ls);
80LJ_FUNC LexToken lj_lex_lookahead(LexState *ls); 81LJ_FUNC LexToken lj_lex_lookahead(LexState *ls);
81LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken token); 82LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken tok);
82LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...); 83LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...);
83LJ_FUNC void lj_lex_init(lua_State *L); 84LJ_FUNC void lj_lex_init(lua_State *L);
84 85
85#endif 86#endif
diff --git a/src/lj_lib.c b/src/lj_lib.c
index 76c8a064..9f84488a 100644
--- a/src/lj_lib.c
+++ b/src/lj_lib.c
@@ -18,6 +18,9 @@
18#include "lj_dispatch.h" 18#include "lj_dispatch.h"
19#include "lj_vm.h" 19#include "lj_vm.h"
20#include "lj_strscan.h" 20#include "lj_strscan.h"
21#include "lj_strfmt.h"
22#include "lj_lex.h"
23#include "lj_bcdump.h"
21#include "lj_lib.h" 24#include "lj_lib.h"
22 25
23/* -- Library initialization ---------------------------------------------- */ 26/* -- Library initialization ---------------------------------------------- */
@@ -43,6 +46,28 @@ static GCtab *lib_create_table(lua_State *L, const char *libname, int hsize)
43 return tabV(L->top-1); 46 return tabV(L->top-1);
44} 47}
45 48
49static const uint8_t *lib_read_lfunc(lua_State *L, const uint8_t *p, GCtab *tab)
50{
51 int len = *p++;
52 GCstr *name = lj_str_new(L, (const char *)p, len);
53 LexState ls;
54 GCproto *pt;
55 GCfunc *fn;
56 memset(&ls, 0, sizeof(ls));
57 ls.L = L;
58 ls.p = (const char *)(p+len);
59 ls.pe = (const char *)~(uintptr_t)0;
60 ls.c = -1;
61 ls.level = (BCDUMP_F_STRIP|(LJ_BE*BCDUMP_F_BE));
62 ls.chunkname = name;
63 pt = lj_bcread_proto(&ls);
64 pt->firstline = ~(BCLine)0;
65 fn = lj_func_newL_empty(L, pt, tabref(L->env));
66 /* NOBARRIER: See below for common barrier. */
67 setfuncV(L, lj_tab_setstr(L, tab, name), fn);
68 return (const uint8_t *)ls.p;
69}
70
46void lj_lib_register(lua_State *L, const char *libname, 71void lj_lib_register(lua_State *L, const char *libname,
47 const uint8_t *p, const lua_CFunction *cf) 72 const uint8_t *p, const lua_CFunction *cf)
48{ 73{
@@ -87,6 +112,9 @@ void lj_lib_register(lua_State *L, const char *libname,
87 ofn = fn; 112 ofn = fn;
88 } else { 113 } else {
89 switch (tag | len) { 114 switch (tag | len) {
115 case LIBINIT_LUA:
116 p = lib_read_lfunc(L, p, tab);
117 break;
90 case LIBINIT_SET: 118 case LIBINIT_SET:
91 L->top -= 2; 119 L->top -= 2;
92 if (tvisstr(L->top+1) && strV(L->top+1)->len == 0) 120 if (tvisstr(L->top+1) && strV(L->top+1)->len == 0)
@@ -120,6 +148,37 @@ void lj_lib_register(lua_State *L, const char *libname,
120 } 148 }
121} 149}
122 150
151/* Push internal function on the stack. */
152GCfunc *lj_lib_pushcc(lua_State *L, lua_CFunction f, int id, int n)
153{
154 GCfunc *fn;
155 lua_pushcclosure(L, f, n);
156 fn = funcV(L->top-1);
157 fn->c.ffid = (uint8_t)id;
158 setmref(fn->c.pc, &G(L)->bc_cfunc_int);
159 return fn;
160}
161
162void lj_lib_prereg(lua_State *L, const char *name, lua_CFunction f, GCtab *env)
163{
164 luaL_findtable(L, LUA_REGISTRYINDEX, "_PRELOAD", 4);
165 lua_pushcfunction(L, f);
166 /* NOBARRIER: The function is new (marked white). */
167 setgcref(funcV(L->top-1)->c.env, obj2gco(env));
168 lua_setfield(L, -2, name);
169 L->top--;
170}
171
172int lj_lib_postreg(lua_State *L, lua_CFunction cf, int id, const char *name)
173{
174 GCfunc *fn = lj_lib_pushcf(L, cf, id);
175 GCtab *t = tabref(curr_func(L)->c.env); /* Reference to parent table. */
176 setfuncV(L, lj_tab_setstr(L, t, lj_str_newz(L, name)), fn);
177 lj_gc_anybarriert(L, t);
178 setfuncV(L, L->top++, fn);
179 return 1;
180}
181
123/* -- Type checks --------------------------------------------------------- */ 182/* -- Type checks --------------------------------------------------------- */
124 183
125TValue *lj_lib_checkany(lua_State *L, int narg) 184TValue *lj_lib_checkany(lua_State *L, int narg)
@@ -137,7 +196,7 @@ GCstr *lj_lib_checkstr(lua_State *L, int narg)
137 if (LJ_LIKELY(tvisstr(o))) { 196 if (LJ_LIKELY(tvisstr(o))) {
138 return strV(o); 197 return strV(o);
139 } else if (tvisnumber(o)) { 198 } else if (tvisnumber(o)) {
140 GCstr *s = lj_str_fromnumber(L, o); 199 GCstr *s = lj_strfmt_number(L, o);
141 setstrV(L, o, s); 200 setstrV(L, o, s);
142 return s; 201 return s;
143 } 202 }
@@ -196,20 +255,6 @@ int32_t lj_lib_optint(lua_State *L, int narg, int32_t def)
196 return (o < L->top && !tvisnil(o)) ? lj_lib_checkint(L, narg) : def; 255 return (o < L->top && !tvisnil(o)) ? lj_lib_checkint(L, narg) : def;
197} 256}
198 257
199int32_t lj_lib_checkbit(lua_State *L, int narg)
200{
201 TValue *o = L->base + narg-1;
202 if (!(o < L->top && lj_strscan_numberobj(o)))
203 lj_err_argt(L, narg, LUA_TNUMBER);
204 if (LJ_LIKELY(tvisint(o))) {
205 return intV(o);
206 } else {
207 int32_t i = lj_num2bit(numV(o));
208 if (LJ_DUALNUM) setintV(o, i);
209 return i;
210 }
211}
212
213GCfunc *lj_lib_checkfunc(lua_State *L, int narg) 258GCfunc *lj_lib_checkfunc(lua_State *L, int narg)
214{ 259{
215 TValue *o = L->base + narg-1; 260 TValue *o = L->base + narg-1;
diff --git a/src/lj_lib.h b/src/lj_lib.h
index daacb921..5bfd8d7c 100644
--- a/src/lj_lib.h
+++ b/src/lj_lib.h
@@ -41,7 +41,6 @@ LJ_FUNC void lj_lib_checknumber(lua_State *L, int narg);
41LJ_FUNC lua_Number lj_lib_checknum(lua_State *L, int narg); 41LJ_FUNC lua_Number lj_lib_checknum(lua_State *L, int narg);
42LJ_FUNC int32_t lj_lib_checkint(lua_State *L, int narg); 42LJ_FUNC int32_t lj_lib_checkint(lua_State *L, int narg);
43LJ_FUNC int32_t lj_lib_optint(lua_State *L, int narg, int32_t def); 43LJ_FUNC int32_t lj_lib_optint(lua_State *L, int narg, int32_t def);
44LJ_FUNC int32_t lj_lib_checkbit(lua_State *L, int narg);
45LJ_FUNC GCfunc *lj_lib_checkfunc(lua_State *L, int narg); 44LJ_FUNC GCfunc *lj_lib_checkfunc(lua_State *L, int narg);
46LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg); 45LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg);
47LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg); 46LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg);
@@ -60,23 +59,14 @@ LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst);
60#define lj_lib_checkfpu(L) UNUSED(L) 59#define lj_lib_checkfpu(L) UNUSED(L)
61#endif 60#endif
62 61
63/* Push internal function on the stack. */ 62LJ_FUNC GCfunc *lj_lib_pushcc(lua_State *L, lua_CFunction f, int id, int n);
64static LJ_AINLINE void lj_lib_pushcc(lua_State *L, lua_CFunction f,
65 int id, int n)
66{
67 GCfunc *fn;
68 lua_pushcclosure(L, f, n);
69 fn = funcV(L->top-1);
70 fn->c.ffid = (uint8_t)id;
71 setmref(fn->c.pc, &G(L)->bc_cfunc_int);
72}
73
74#define lj_lib_pushcf(L, fn, id) (lj_lib_pushcc(L, (fn), (id), 0)) 63#define lj_lib_pushcf(L, fn, id) (lj_lib_pushcc(L, (fn), (id), 0))
75 64
76/* Library function declarations. Scanned by buildvm. */ 65/* Library function declarations. Scanned by buildvm. */
77#define LJLIB_CF(name) static int lj_cf_##name(lua_State *L) 66#define LJLIB_CF(name) static int lj_cf_##name(lua_State *L)
78#define LJLIB_ASM(name) static int lj_ffh_##name(lua_State *L) 67#define LJLIB_ASM(name) static int lj_ffh_##name(lua_State *L)
79#define LJLIB_ASM_(name) 68#define LJLIB_ASM_(name)
69#define LJLIB_LUA(name)
80#define LJLIB_SET(name) 70#define LJLIB_SET(name)
81#define LJLIB_PUSH(arg) 71#define LJLIB_PUSH(arg)
82#define LJLIB_REC(handler) 72#define LJLIB_REC(handler)
@@ -88,6 +78,10 @@ static LJ_AINLINE void lj_lib_pushcc(lua_State *L, lua_CFunction f,
88 78
89LJ_FUNC void lj_lib_register(lua_State *L, const char *libname, 79LJ_FUNC void lj_lib_register(lua_State *L, const char *libname,
90 const uint8_t *init, const lua_CFunction *cf); 80 const uint8_t *init, const lua_CFunction *cf);
81LJ_FUNC void lj_lib_prereg(lua_State *L, const char *name, lua_CFunction f,
82 GCtab *env);
83LJ_FUNC int lj_lib_postreg(lua_State *L, lua_CFunction cf, int id,
84 const char *name);
91 85
92/* Library init data tags. */ 86/* Library init data tags. */
93#define LIBINIT_LENMASK 0x3f 87#define LIBINIT_LENMASK 0x3f
@@ -96,7 +90,8 @@ LJ_FUNC void lj_lib_register(lua_State *L, const char *libname,
96#define LIBINIT_ASM 0x40 90#define LIBINIT_ASM 0x40
97#define LIBINIT_ASM_ 0x80 91#define LIBINIT_ASM_ 0x80
98#define LIBINIT_STRING 0xc0 92#define LIBINIT_STRING 0xc0
99#define LIBINIT_MAXSTR 0x39 93#define LIBINIT_MAXSTR 0x38
94#define LIBINIT_LUA 0xf9
100#define LIBINIT_SET 0xfa 95#define LIBINIT_SET 0xfa
101#define LIBINIT_NUMBER 0xfb 96#define LIBINIT_NUMBER 0xfb
102#define LIBINIT_COPY 0xfc 97#define LIBINIT_COPY 0xfc
diff --git a/src/lj_load.c b/src/lj_load.c
index edfdc903..160e82fa 100644
--- a/src/lj_load.c
+++ b/src/lj_load.c
@@ -15,7 +15,7 @@
15#include "lj_obj.h" 15#include "lj_obj.h"
16#include "lj_gc.h" 16#include "lj_gc.h"
17#include "lj_err.h" 17#include "lj_err.h"
18#include "lj_str.h" 18#include "lj_buf.h"
19#include "lj_func.h" 19#include "lj_func.h"
20#include "lj_frame.h" 20#include "lj_frame.h"
21#include "lj_vm.h" 21#include "lj_vm.h"
@@ -54,7 +54,7 @@ LUA_API int lua_loadx(lua_State *L, lua_Reader reader, void *data,
54 ls.rdata = data; 54 ls.rdata = data;
55 ls.chunkarg = chunkname ? chunkname : "?"; 55 ls.chunkarg = chunkname ? chunkname : "?";
56 ls.mode = mode; 56 ls.mode = mode;
57 lj_str_initbuf(&ls.sb); 57 lj_buf_init(L, &ls.sb);
58 status = lj_vm_cpcall(L, NULL, &ls, cpparser); 58 status = lj_vm_cpcall(L, NULL, &ls, cpparser);
59 lj_lex_cleanup(L, &ls); 59 lj_lex_cleanup(L, &ls);
60 lj_gc_check(L); 60 lj_gc_check(L);
diff --git a/src/lj_meta.c b/src/lj_meta.c
index 0a526671..dea456f2 100644
--- a/src/lj_meta.c
+++ b/src/lj_meta.c
@@ -12,6 +12,7 @@
12#include "lj_obj.h" 12#include "lj_obj.h"
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_buf.h"
15#include "lj_str.h" 16#include "lj_str.h"
16#include "lj_tab.h" 17#include "lj_tab.h"
17#include "lj_meta.h" 18#include "lj_meta.h"
@@ -19,6 +20,8 @@
19#include "lj_bc.h" 20#include "lj_bc.h"
20#include "lj_vm.h" 21#include "lj_vm.h"
21#include "lj_strscan.h" 22#include "lj_strscan.h"
23#include "lj_strfmt.h"
24#include "lj_lib.h"
22 25
23/* -- Metamethod handling ------------------------------------------------- */ 26/* -- Metamethod handling ------------------------------------------------- */
24 27
@@ -225,27 +228,14 @@ TValue *lj_meta_arith(lua_State *L, TValue *ra, cTValue *rb, cTValue *rc,
225 } 228 }
226} 229}
227 230
228/* In-place coercion of a number to a string. */
229static LJ_AINLINE int tostring(lua_State *L, TValue *o)
230{
231 if (tvisstr(o)) {
232 return 1;
233 } else if (tvisnumber(o)) {
234 setstrV(L, o, lj_str_fromnumber(L, o));
235 return 1;
236 } else {
237 return 0;
238 }
239}
240
241/* Helper for CAT. Coercion, iterative concat, __concat metamethod. */ 231/* Helper for CAT. Coercion, iterative concat, __concat metamethod. */
242TValue *lj_meta_cat(lua_State *L, TValue *top, int left) 232TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
243{ 233{
244 int fromc = 0; 234 int fromc = 0;
245 if (left < 0) { left = -left; fromc = 1; } 235 if (left < 0) { left = -left; fromc = 1; }
246 do { 236 do {
247 int n = 1; 237 if (!(tvisstr(top) || tvisnumber(top)) ||
248 if (!(tvisstr(top-1) || tvisnumber(top-1)) || !tostring(L, top)) { 238 !(tvisstr(top-1) || tvisnumber(top-1))) {
249 cTValue *mo = lj_meta_lookup(L, top-1, MM_concat); 239 cTValue *mo = lj_meta_lookup(L, top-1, MM_concat);
250 if (tvisnil(mo)) { 240 if (tvisnil(mo)) {
251 mo = lj_meta_lookup(L, top, MM_concat); 241 mo = lj_meta_lookup(L, top, MM_concat);
@@ -271,8 +261,6 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
271 copyTV(L, top, mo); 261 copyTV(L, top, mo);
272 setcont(top-1, lj_cont_cat); 262 setcont(top-1, lj_cont_cat);
273 return top+1; /* Trigger metamethod call. */ 263 return top+1; /* Trigger metamethod call. */
274 } else if (strV(top)->len == 0) { /* Shortcut. */
275 (void)tostring(L, top-1);
276 } else { 264 } else {
277 /* Pick as many strings as possible from the top and concatenate them: 265 /* Pick as many strings as possible from the top and concatenate them:
278 ** 266 **
@@ -281,27 +269,28 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
281 ** concat: [...][CAT stack ...] [result] 269 ** concat: [...][CAT stack ...] [result]
282 ** next step: [...][CAT stack ............] 270 ** next step: [...][CAT stack ............]
283 */ 271 */
284 MSize tlen = strV(top)->len; 272 TValue *e, *o = top;
285 char *buffer; 273 uint64_t tlen = tvisstr(o) ? strV(o)->len : STRFMT_MAXBUF_NUM;
286 int i; 274 char *p, *buf;
287 for (n = 1; n <= left && tostring(L, top-n); n++) { 275 do {
288 MSize len = strV(top-n)->len; 276 o--; tlen += tvisstr(o) ? strV(o)->len : STRFMT_MAXBUF_NUM;
289 if (len >= LJ_MAX_STR - tlen) 277 } while (--left > 0 && (tvisstr(o-1) || tvisnumber(o-1)));
290 lj_err_msg(L, LJ_ERR_STROV); 278 if (tlen >= LJ_MAX_STR) lj_err_msg(L, LJ_ERR_STROV);
291 tlen += len; 279 p = buf = lj_buf_tmp(L, (MSize)tlen);
292 } 280 for (e = top, top = o; o <= e; o++) {
293 buffer = lj_str_needbuf(L, &G(L)->tmpbuf, tlen); 281 if (tvisstr(o)) {
294 n--; 282 GCstr *s = strV(o);
295 tlen = 0; 283 MSize len = s->len;
296 for (i = n; i >= 0; i--) { 284 p = lj_buf_wmem(p, strdata(s), len);
297 MSize len = strV(top-i)->len; 285 } else if (tvisint(o)) {
298 memcpy(buffer + tlen, strVdata(top-i), len); 286 p = lj_strfmt_wint(p, intV(o));
299 tlen += len; 287 } else {
288 lua_assert(tvisnum(o));
289 p = lj_strfmt_wnum(p, o);
290 }
300 } 291 }
301 setstrV(L, top-n, lj_str_new(L, buffer, tlen)); 292 setstrV(L, top, lj_str_new(L, buf, (size_t)(p-buf)));
302 } 293 }
303 left -= n;
304 top -= n;
305 } while (left >= 1); 294 } while (left >= 1);
306 if (LJ_UNLIKELY(G(L)->gc.total >= G(L)->gc.threshold)) { 295 if (LJ_UNLIKELY(G(L)->gc.total >= G(L)->gc.threshold)) {
307 if (!fromc) L->top = curr_topL(L); 296 if (!fromc) L->top = curr_topL(L);
@@ -423,6 +412,18 @@ TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op)
423 } 412 }
424} 413}
425 414
415/* Helper for ISTYPE and ISNUM. Implicit coercion or error. */
416void lj_meta_istype(lua_State *L, BCReg ra, BCReg tp)
417{
418 L->top = curr_topL(L);
419 ra++; tp--;
420 lua_assert(LJ_DUALNUM || tp != ~LJ_TNUMX); /* ISTYPE -> ISNUM broken. */
421 if (LJ_DUALNUM && tp == ~LJ_TNUMX) lj_lib_checkint(L, ra);
422 else if (tp == ~LJ_TNUMX+1) lj_lib_checknum(L, ra);
423 else if (tp == ~LJ_TSTR) lj_lib_checkstr(L, ra);
424 else lj_err_argtype(L, ra, lj_obj_itypename[tp]);
425}
426
426/* Helper for calls. __call metamethod. */ 427/* Helper for calls. __call metamethod. */
427void lj_meta_call(lua_State *L, TValue *func, TValue *top) 428void lj_meta_call(lua_State *L, TValue *func, TValue *top)
428{ 429{
diff --git a/src/lj_meta.h b/src/lj_meta.h
index 8d7c2cd4..5068f7a4 100644
--- a/src/lj_meta.h
+++ b/src/lj_meta.h
@@ -31,6 +31,7 @@ LJ_FUNCA TValue * LJ_FASTCALL lj_meta_len(lua_State *L, cTValue *o);
31LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne); 31LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne);
32LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins); 32LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins);
33LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op); 33LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op);
34LJ_FUNCA void lj_meta_istype(lua_State *L, BCReg ra, BCReg tp);
34LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top); 35LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top);
35LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o); 36LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o);
36 37
diff --git a/src/lj_obj.c b/src/lj_obj.c
index c7f3bc12..e8067167 100644
--- a/src/lj_obj.c
+++ b/src/lj_obj.c
@@ -20,7 +20,7 @@ LJ_DATADEF const char *const lj_obj_itypename[] = { /* ORDER LJ_T */
20}; 20};
21 21
22/* Compare two objects without calling metamethods. */ 22/* Compare two objects without calling metamethods. */
23int lj_obj_equal(cTValue *o1, cTValue *o2) 23int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2)
24{ 24{
25 if (itype(o1) == itype(o2)) { 25 if (itype(o1) == itype(o2)) {
26 if (tvispri(o1)) 26 if (tvispri(o1))
@@ -33,3 +33,18 @@ int lj_obj_equal(cTValue *o1, cTValue *o2)
33 return numberVnum(o1) == numberVnum(o2); 33 return numberVnum(o1) == numberVnum(o2);
34} 34}
35 35
36/* Return pointer to object or its object data. */
37const void * LJ_FASTCALL lj_obj_ptr(cTValue *o)
38{
39 if (tvisudata(o))
40 return uddata(udataV(o));
41 else if (tvislightud(o))
42 return lightudV(o);
43 else if (LJ_HASFFI && tviscdata(o))
44 return cdataptr(cdataV(o));
45 else if (tvisgcv(o))
46 return gcV(o);
47 else
48 return NULL;
49}
50
diff --git a/src/lj_obj.h b/src/lj_obj.h
index cc999aa4..daa62e34 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -119,11 +119,12 @@ typedef int32_t BCLine; /* Bytecode line number. */
119/* Internal assembler functions. Never call these directly from C. */ 119/* Internal assembler functions. Never call these directly from C. */
120typedef void (*ASMFunction)(void); 120typedef void (*ASMFunction)(void);
121 121
122/* Resizable string buffer. Need this here, details in lj_str.h. */ 122/* Resizable string buffer. Need this here, details in lj_buf.h. */
123typedef struct SBuf { 123typedef struct SBuf {
124 char *buf; /* String buffer base. */ 124 MRef p; /* String buffer pointer. */
125 MSize n; /* String buffer length. */ 125 MRef e; /* String buffer end pointer. */
126 MSize sz; /* String buffer size. */ 126 MRef b; /* String buffer base. */
127 MRef L; /* lua_State, used for buffer resizing. */
127} SBuf; 128} SBuf;
128 129
129/* -- Tags and values ----------------------------------------------------- */ 130/* -- Tags and values ----------------------------------------------------- */
@@ -516,8 +517,8 @@ typedef struct global_State {
516 lua_Alloc allocf; /* Memory allocator. */ 517 lua_Alloc allocf; /* Memory allocator. */
517 void *allocd; /* Memory allocator data. */ 518 void *allocd; /* Memory allocator data. */
518 GCState gc; /* Garbage collector. */ 519 GCState gc; /* Garbage collector. */
519 SBuf tmpbuf; /* Temporary buffer for string concatenation. */ 520 volatile int32_t vmstate; /* VM state or current JIT code trace number. */
520 Node nilnode; /* Fallback 1-element hash part (nil key and value). */ 521 SBuf tmpbuf; /* Temporary string buffer. */
521 GCstr strempty; /* Empty string. */ 522 GCstr strempty; /* Empty string. */
522 uint8_t stremptyz; /* Zero terminator of empty string. */ 523 uint8_t stremptyz; /* Zero terminator of empty string. */
523 uint8_t hookmask; /* Hook mask. */ 524 uint8_t hookmask; /* Hook mask. */
@@ -526,17 +527,17 @@ typedef struct global_State {
526 GCRef mainthref; /* Link to main thread. */ 527 GCRef mainthref; /* Link to main thread. */
527 TValue registrytv; /* Anchor for registry. */ 528 TValue registrytv; /* Anchor for registry. */
528 TValue tmptv, tmptv2; /* Temporary TValues. */ 529 TValue tmptv, tmptv2; /* Temporary TValues. */
530 Node nilnode; /* Fallback 1-element hash part (nil key and value). */
529 GCupval uvhead; /* Head of double-linked list of all open upvalues. */ 531 GCupval uvhead; /* Head of double-linked list of all open upvalues. */
530 int32_t hookcount; /* Instruction hook countdown. */ 532 int32_t hookcount; /* Instruction hook countdown. */
531 int32_t hookcstart; /* Start count for instruction hook counter. */ 533 int32_t hookcstart; /* Start count for instruction hook counter. */
532 lua_Hook hookf; /* Hook function. */ 534 lua_Hook hookf; /* Hook function. */
533 lua_CFunction wrapf; /* Wrapper for C function calls. */ 535 lua_CFunction wrapf; /* Wrapper for C function calls. */
534 lua_CFunction panic; /* Called as a last resort for errors. */ 536 lua_CFunction panic; /* Called as a last resort for errors. */
535 volatile int32_t vmstate; /* VM state or current JIT code trace number. */
536 BCIns bc_cfunc_int; /* Bytecode for internal C function calls. */ 537 BCIns bc_cfunc_int; /* Bytecode for internal C function calls. */
537 BCIns bc_cfunc_ext; /* Bytecode for external C function calls. */ 538 BCIns bc_cfunc_ext; /* Bytecode for external C function calls. */
538 GCRef jit_L; /* Current JIT code lua_State or NULL. */ 539 GCRef cur_L; /* Currently executing lua_State. */
539 MRef jit_base; /* Current JIT code L->base. */ 540 MRef jit_base; /* Current JIT code L->base or NULL. */
540 MRef ctype_state; /* Pointer to C type state. */ 541 MRef ctype_state; /* Pointer to C type state. */
541 GCRef gcroot[GCROOT_MAX]; /* GC roots. */ 542 GCRef gcroot[GCROOT_MAX]; /* GC roots. */
542} global_State; 543} global_State;
@@ -553,6 +554,7 @@ typedef struct global_State {
553#define HOOK_ACTIVE_SHIFT 4 554#define HOOK_ACTIVE_SHIFT 4
554#define HOOK_VMEVENT 0x20 555#define HOOK_VMEVENT 0x20
555#define HOOK_GC 0x40 556#define HOOK_GC 0x40
557#define HOOK_PROFILE 0x80
556#define hook_active(g) ((g)->hookmask & HOOK_ACTIVE) 558#define hook_active(g) ((g)->hookmask & HOOK_ACTIVE)
557#define hook_enter(g) ((g)->hookmask |= HOOK_ACTIVE) 559#define hook_enter(g) ((g)->hookmask |= HOOK_ACTIVE)
558#define hook_entergc(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_GC)) 560#define hook_entergc(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_GC))
@@ -810,11 +812,7 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
810#endif 812#endif
811} 813}
812 814
813#if LJ_TARGET_X86 && !defined(__SSE2__)
814#define lj_num2int(n) lj_num2bit((n))
815#else
816#define lj_num2int(n) ((int32_t)(n)) 815#define lj_num2int(n) ((int32_t)(n))
817#endif
818 816
819static LJ_AINLINE uint64_t lj_num2u64(lua_Number n) 817static LJ_AINLINE uint64_t lj_num2u64(lua_Number n)
820{ 818{
@@ -851,6 +849,7 @@ LJ_DATA const char *const lj_obj_itypename[~LJ_TNUMX+1];
851#define lj_typename(o) (lj_obj_itypename[itypemap(o)]) 849#define lj_typename(o) (lj_obj_itypename[itypemap(o)])
852 850
853/* Compare two objects without calling metamethods. */ 851/* Compare two objects without calling metamethods. */
854LJ_FUNC int lj_obj_equal(cTValue *o1, cTValue *o2); 852LJ_FUNC int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2);
853LJ_FUNC const void * LJ_FASTCALL lj_obj_ptr(cTValue *o);
855 854
856#endif 855#endif
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index 8a5b41cc..7194e618 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -14,18 +14,21 @@
14 14
15#if LJ_HASJIT 15#if LJ_HASJIT
16 16
17#include "lj_buf.h"
17#include "lj_str.h" 18#include "lj_str.h"
18#include "lj_tab.h" 19#include "lj_tab.h"
19#include "lj_ir.h" 20#include "lj_ir.h"
20#include "lj_jit.h" 21#include "lj_jit.h"
22#include "lj_ircall.h"
21#include "lj_iropt.h" 23#include "lj_iropt.h"
22#include "lj_trace.h" 24#include "lj_trace.h"
23#if LJ_HASFFI 25#if LJ_HASFFI
24#include "lj_ctype.h" 26#include "lj_ctype.h"
25#endif
26#include "lj_carith.h" 27#include "lj_carith.h"
28#endif
27#include "lj_vm.h" 29#include "lj_vm.h"
28#include "lj_strscan.h" 30#include "lj_strscan.h"
31#include "lj_strfmt.h"
29 32
30/* Here's a short description how the FOLD engine processes instructions: 33/* Here's a short description how the FOLD engine processes instructions:
31** 34**
@@ -155,13 +158,14 @@ typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
155 158
156/* Barrier to prevent folding across a GC step. 159/* Barrier to prevent folding across a GC step.
157** GC steps can only happen at the head of a trace and at LOOP. 160** GC steps can only happen at the head of a trace and at LOOP.
158** And the GC is only driven forward if there is at least one allocation. 161** And the GC is only driven forward if there's at least one allocation.
159*/ 162*/
160#define gcstep_barrier(J, ref) \ 163#define gcstep_barrier(J, ref) \
161 ((ref) < J->chain[IR_LOOP] && \ 164 ((ref) < J->chain[IR_LOOP] && \
162 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \ 165 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \
163 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \ 166 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
164 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR])) 167 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || \
168 J->chain[IR_BUFSTR] || J->chain[IR_TOSTR] || J->chain[IR_CALLA]))
165 169
166/* -- Constant folding for FP numbers ------------------------------------- */ 170/* -- Constant folding for FP numbers ------------------------------------- */
167 171
@@ -336,11 +340,9 @@ LJFOLDF(kfold_intcomp0)
336static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) 340static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op)
337{ 341{
338 switch (op) { 342 switch (op) {
339#if LJ_64 || LJ_HASFFI 343#if LJ_HASFFI
340 case IR_ADD: k1 += k2; break; 344 case IR_ADD: k1 += k2; break;
341 case IR_SUB: k1 -= k2; break; 345 case IR_SUB: k1 -= k2; break;
342#endif
343#if LJ_HASFFI
344 case IR_MUL: k1 *= k2; break; 346 case IR_MUL: k1 *= k2; break;
345 case IR_BAND: k1 &= k2; break; 347 case IR_BAND: k1 &= k2; break;
346 case IR_BOR: k1 |= k2; break; 348 case IR_BOR: k1 |= k2; break;
@@ -392,20 +394,10 @@ LJFOLD(BROL KINT64 KINT)
392LJFOLD(BROR KINT64 KINT) 394LJFOLD(BROR KINT64 KINT)
393LJFOLDF(kfold_int64shift) 395LJFOLDF(kfold_int64shift)
394{ 396{
395#if LJ_HASFFI || LJ_64 397#if LJ_HASFFI
396 uint64_t k = ir_k64(fleft)->u64; 398 uint64_t k = ir_k64(fleft)->u64;
397 int32_t sh = (fright->i & 63); 399 int32_t sh = (fright->i & 63);
398 switch ((IROp)fins->o) { 400 return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL));
399 case IR_BSHL: k <<= sh; break;
400#if LJ_HASFFI
401 case IR_BSHR: k >>= sh; break;
402 case IR_BSAR: k = (uint64_t)((int64_t)k >> sh); break;
403 case IR_BROL: k = lj_rol(k, sh); break;
404 case IR_BROR: k = lj_ror(k, sh); break;
405#endif
406 default: lua_assert(0); break;
407 }
408 return INT64FOLD(k);
409#else 401#else
410 UNUSED(J); lua_assert(0); return FAILFOLD; 402 UNUSED(J); lua_assert(0); return FAILFOLD;
411#endif 403#endif
@@ -527,6 +519,179 @@ LJFOLDF(kfold_strcmp)
527 return NEXTFOLD; 519 return NEXTFOLD;
528} 520}
529 521
522/* -- Constant folding and forwarding for buffers ------------------------- */
523
524/*
525** Buffer ops perform stores, but their effect is limited to the buffer
526** itself. Also, buffer ops are chained: a use of an op implies a use of
527** all other ops up the chain. Conversely, if an op is unused, all ops
528** up the chain can go unsed. This largely eliminates the need to treat
529** them as stores.
530**
531** Alas, treating them as normal (IRM_N) ops doesn't work, because they
532** cannot be CSEd in isolation. CSE for IRM_N is implicitly done in LOOP
533** or if FOLD is disabled.
534**
535** The compromise is to declare them as loads, emit them like stores and
536** CSE whole chains manually when the BUFSTR is to be emitted. Any chain
537** fragments left over from CSE are eliminated by DCE.
538*/
539
540/* BUFHDR is emitted like a store, see below. */
541
542LJFOLD(BUFPUT BUFHDR BUFSTR)
543LJFOLDF(bufput_append)
544{
545 /* New buffer, no other buffer op inbetween and same buffer? */
546 if ((J->flags & JIT_F_OPT_FWD) &&
547 !(fleft->op2 & IRBUFHDR_APPEND) &&
548 fleft->prev == fright->op2 &&
549 fleft->op1 == IR(fright->op2)->op1) {
550 IRRef ref = fins->op1;
551 IR(ref)->op2 = (fleft->op2 | IRBUFHDR_APPEND); /* Modify BUFHDR. */
552 IR(ref)->op1 = fright->op1;
553 return ref;
554 }
555 return EMITFOLD; /* Always emit, CSE later. */
556}
557
558LJFOLD(BUFPUT any any)
559LJFOLDF(bufput_kgc)
560{
561 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fright->o == IR_KGC) {
562 GCstr *s2 = ir_kstr(fright);
563 if (s2->len == 0) { /* Empty string? */
564 return LEFTFOLD;
565 } else {
566 if (fleft->o == IR_BUFPUT && irref_isk(fleft->op2) &&
567 !irt_isphi(fleft->t)) { /* Join two constant string puts in a row. */
568 GCstr *s1 = ir_kstr(IR(fleft->op2));
569 IRRef kref = lj_ir_kstr(J, lj_buf_cat2str(J->L, s1, s2));
570 /* lj_ir_kstr() may realloc the IR and invalidates any IRIns *. */
571 IR(fins->op1)->op2 = kref; /* Modify previous BUFPUT. */
572 return fins->op1;
573 }
574 }
575 }
576 return EMITFOLD; /* Always emit, CSE later. */
577}
578
579LJFOLD(BUFSTR any any)
580LJFOLDF(bufstr_kfold_cse)
581{
582 lua_assert(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT ||
583 fleft->o == IR_CALLL);
584 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
585 if (fleft->o == IR_BUFHDR) { /* No put operations? */
586 if (!(fleft->op2 & IRBUFHDR_APPEND)) /* Empty buffer? */
587 return lj_ir_kstr(J, &J2G(J)->strempty);
588 fins->op1 = fleft->prev; /* Relies on checks in bufput_append. */
589 return CSEFOLD;
590 } else if (fleft->o == IR_BUFPUT) {
591 IRIns *irb = IR(fleft->op1);
592 if (irb->o == IR_BUFHDR && !(irb->op2 & IRBUFHDR_APPEND))
593 return fleft->op2; /* Shortcut for a single put operation. */
594 }
595 }
596 /* Try to CSE the whole chain. */
597 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
598 IRRef ref = J->chain[IR_BUFSTR];
599 while (ref) {
600 IRIns *irs = IR(ref), *ira = fleft, *irb = IR(irs->op1);
601 while (ira->o == irb->o && ira->op2 == irb->op2) {
602 lua_assert(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT ||
603 ira->o == IR_CALLL || ira->o == IR_CARG);
604 if (ira->o == IR_BUFHDR && !(ira->op2 & IRBUFHDR_APPEND))
605 return ref; /* CSE succeeded. */
606 if (ira->o == IR_CALLL && ira->op2 == IRCALL_lj_buf_puttab)
607 break;
608 ira = IR(ira->op1);
609 irb = IR(irb->op1);
610 }
611 ref = irs->prev;
612 }
613 }
614 return EMITFOLD; /* No CSE possible. */
615}
616
617LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_reverse)
618LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_upper)
619LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_lower)
620LJFOLD(CALLL CARG IRCALL_lj_strfmt_putquoted)
621LJFOLDF(bufput_kfold_op)
622{
623 if (irref_isk(fleft->op2)) {
624 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
625 SBuf *sb = lj_buf_tmp_(J->L);
626 sb = ((SBuf * (LJ_FASTCALL *)(SBuf *, GCstr *))ci->func)(sb,
627 ir_kstr(IR(fleft->op2)));
628 fins->o = IR_BUFPUT;
629 fins->op1 = fleft->op1;
630 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
631 return RETRYFOLD;
632 }
633 return EMITFOLD; /* Always emit, CSE later. */
634}
635
636LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_rep)
637LJFOLDF(bufput_kfold_rep)
638{
639 if (irref_isk(fleft->op2)) {
640 IRIns *irc = IR(fleft->op1);
641 if (irref_isk(irc->op2)) {
642 SBuf *sb = lj_buf_tmp_(J->L);
643 sb = lj_buf_putstr_rep(sb, ir_kstr(IR(irc->op2)), IR(fleft->op2)->i);
644 fins->o = IR_BUFPUT;
645 fins->op1 = irc->op1;
646 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
647 return RETRYFOLD;
648 }
649 }
650 return EMITFOLD; /* Always emit, CSE later. */
651}
652
653LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfxint)
654LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_int)
655LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_uint)
656LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum)
657LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfstr)
658LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfchar)
659LJFOLDF(bufput_kfold_fmt)
660{
661 IRIns *irc = IR(fleft->op1);
662 lua_assert(irref_isk(irc->op2)); /* SFormat must be const. */
663 if (irref_isk(fleft->op2)) {
664 SFormat sf = (SFormat)IR(irc->op2)->i;
665 IRIns *ira = IR(fleft->op2);
666 SBuf *sb = lj_buf_tmp_(J->L);
667 switch (fins->op2) {
668 case IRCALL_lj_strfmt_putfxint:
669 sb = lj_strfmt_putfxint(sb, sf, ir_k64(ira)->u64);
670 break;
671 case IRCALL_lj_strfmt_putfstr:
672 sb = lj_strfmt_putfstr(sb, sf, ir_kstr(ira));
673 break;
674 case IRCALL_lj_strfmt_putfchar:
675 sb = lj_strfmt_putfchar(sb, sf, ira->i);
676 break;
677 case IRCALL_lj_strfmt_putfnum_int:
678 case IRCALL_lj_strfmt_putfnum_uint:
679 case IRCALL_lj_strfmt_putfnum:
680 default: {
681 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
682 sb = ((SBuf * (*)(SBuf *, SFormat, lua_Number))ci->func)(sb, sf,
683 ir_knum(ira)->n);
684 break;
685 }
686 }
687 fins->o = IR_BUFPUT;
688 fins->op1 = irc->op1;
689 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
690 return RETRYFOLD;
691 }
692 return EMITFOLD; /* Always emit, CSE later. */
693}
694
530/* -- Constant folding of pointer arithmetic ------------------------------ */ 695/* -- Constant folding of pointer arithmetic ------------------------------ */
531 696
532LJFOLD(ADD KGC KINT) 697LJFOLD(ADD KGC KINT)
@@ -647,27 +812,22 @@ LJFOLD(CONV KNUM IRCONV_INT_NUM)
647LJFOLDF(kfold_conv_knum_int_num) 812LJFOLDF(kfold_conv_knum_int_num)
648{ 813{
649 lua_Number n = knumleft; 814 lua_Number n = knumleft;
650 if (!(fins->op2 & IRCONV_TRUNC)) { 815 int32_t k = lj_num2int(n);
651 int32_t k = lj_num2int(n); 816 if (irt_isguard(fins->t) && n != (lua_Number)k) {
652 if (irt_isguard(fins->t) && n != (lua_Number)k) { 817 /* We're about to create a guard which always fails, like CONV +1.5.
653 /* We're about to create a guard which always fails, like CONV +1.5. 818 ** Some pathological loops cause this during LICM, e.g.:
654 ** Some pathological loops cause this during LICM, e.g.: 819 ** local x,k,t = 0,1.5,{1,[1.5]=2}
655 ** local x,k,t = 0,1.5,{1,[1.5]=2} 820 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
656 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end 821 ** assert(x == 300)
657 ** assert(x == 300) 822 */
658 */ 823 return FAILFOLD;
659 return FAILFOLD;
660 }
661 return INTFOLD(k);
662 } else {
663 return INTFOLD((int32_t)n);
664 } 824 }
825 return INTFOLD(k);
665} 826}
666 827
667LJFOLD(CONV KNUM IRCONV_U32_NUM) 828LJFOLD(CONV KNUM IRCONV_U32_NUM)
668LJFOLDF(kfold_conv_knum_u32_num) 829LJFOLDF(kfold_conv_knum_u32_num)
669{ 830{
670 lua_assert((fins->op2 & IRCONV_TRUNC));
671#ifdef _MSC_VER 831#ifdef _MSC_VER
672 { /* Workaround for MSVC bug. */ 832 { /* Workaround for MSVC bug. */
673 volatile uint32_t u = (uint32_t)knumleft; 833 volatile uint32_t u = (uint32_t)knumleft;
@@ -681,27 +841,27 @@ LJFOLDF(kfold_conv_knum_u32_num)
681LJFOLD(CONV KNUM IRCONV_I64_NUM) 841LJFOLD(CONV KNUM IRCONV_I64_NUM)
682LJFOLDF(kfold_conv_knum_i64_num) 842LJFOLDF(kfold_conv_knum_i64_num)
683{ 843{
684 lua_assert((fins->op2 & IRCONV_TRUNC));
685 return INT64FOLD((uint64_t)(int64_t)knumleft); 844 return INT64FOLD((uint64_t)(int64_t)knumleft);
686} 845}
687 846
688LJFOLD(CONV KNUM IRCONV_U64_NUM) 847LJFOLD(CONV KNUM IRCONV_U64_NUM)
689LJFOLDF(kfold_conv_knum_u64_num) 848LJFOLDF(kfold_conv_knum_u64_num)
690{ 849{
691 lua_assert((fins->op2 & IRCONV_TRUNC));
692 return INT64FOLD(lj_num2u64(knumleft)); 850 return INT64FOLD(lj_num2u64(knumleft));
693} 851}
694 852
695LJFOLD(TOSTR KNUM) 853LJFOLD(TOSTR KNUM any)
696LJFOLDF(kfold_tostr_knum) 854LJFOLDF(kfold_tostr_knum)
697{ 855{
698 return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft)); 856 return lj_ir_kstr(J, lj_strfmt_num(J->L, ir_knum(fleft)));
699} 857}
700 858
701LJFOLD(TOSTR KINT) 859LJFOLD(TOSTR KINT any)
702LJFOLDF(kfold_tostr_kint) 860LJFOLDF(kfold_tostr_kint)
703{ 861{
704 return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i)); 862 return lj_ir_kstr(J, fins->op2 == IRTOSTR_INT ?
863 lj_strfmt_int(J->L, fleft->i) :
864 lj_strfmt_char(J->L, fleft->i));
705} 865}
706 866
707LJFOLD(STRTO KGC) 867LJFOLD(STRTO KGC)
@@ -1199,7 +1359,9 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k)
1199 ** But this is mainly intended for simple address arithmetic. 1359 ** But this is mainly intended for simple address arithmetic.
1200 ** Also it's easier for the backend to optimize the original multiplies. 1360 ** Also it's easier for the backend to optimize the original multiplies.
1201 */ 1361 */
1202 if (k == 1) { /* i * 1 ==> i */ 1362 if (k == 0) { /* i * 0 ==> 0 */
1363 return RIGHTFOLD;
1364 } else if (k == 1) { /* i * 1 ==> i */
1203 return LEFTFOLD; 1365 return LEFTFOLD;
1204 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */ 1366 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */
1205 fins->o = IR_BSHL; 1367 fins->o = IR_BSHL;
@@ -1212,9 +1374,7 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k)
1212LJFOLD(MUL any KINT) 1374LJFOLD(MUL any KINT)
1213LJFOLDF(simplify_intmul_k32) 1375LJFOLDF(simplify_intmul_k32)
1214{ 1376{
1215 if (fright->i == 0) /* i * 0 ==> 0 */ 1377 if (fright->i >= 0)
1216 return INTFOLD(0);
1217 else if (fright->i > 0)
1218 return simplify_intmul_k(J, fright->i); 1378 return simplify_intmul_k(J, fright->i);
1219 return NEXTFOLD; 1379 return NEXTFOLD;
1220} 1380}
@@ -1222,14 +1382,13 @@ LJFOLDF(simplify_intmul_k32)
1222LJFOLD(MUL any KINT64) 1382LJFOLD(MUL any KINT64)
1223LJFOLDF(simplify_intmul_k64) 1383LJFOLDF(simplify_intmul_k64)
1224{ 1384{
1225 if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */ 1385#if LJ_HASFFI
1226 return INT64FOLD(0); 1386 if (ir_kint64(fright)->u64 < 0x80000000u)
1227#if LJ_64
1228 /* NYI: SPLIT for BSHL and 32 bit backend support. */
1229 else if (ir_kint64(fright)->u64 < 0x80000000u)
1230 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64); 1387 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
1231#endif
1232 return NEXTFOLD; 1388 return NEXTFOLD;
1389#else
1390 UNUSED(J); lua_assert(0); return FAILFOLD;
1391#endif
1233} 1392}
1234 1393
1235LJFOLD(MOD any KINT) 1394LJFOLD(MOD any KINT)
@@ -1529,7 +1688,7 @@ LJFOLD(BOR BOR KINT64)
1529LJFOLD(BXOR BXOR KINT64) 1688LJFOLD(BXOR BXOR KINT64)
1530LJFOLDF(reassoc_intarith_k64) 1689LJFOLDF(reassoc_intarith_k64)
1531{ 1690{
1532#if LJ_HASFFI || LJ_64 1691#if LJ_HASFFI
1533 IRIns *irk = IR(fleft->op2); 1692 IRIns *irk = IR(fleft->op2);
1534 if (irk->o == IR_KINT64) { 1693 if (irk->o == IR_KINT64) {
1535 uint64_t k = kfold_int64arith(ir_k64(irk)->u64, 1694 uint64_t k = kfold_int64arith(ir_k64(irk)->u64,
@@ -1946,6 +2105,7 @@ LJFOLDF(fwd_href_tdup)
1946** an aliased table, as it may invalidate all of the pointers and fields. 2105** an aliased table, as it may invalidate all of the pointers and fields.
1947** Only HREF needs the NEWREF check -- AREF and HREFK already depend on 2106** Only HREF needs the NEWREF check -- AREF and HREFK already depend on
1948** FLOADs. And NEWREF itself is treated like a store (see below). 2107** FLOADs. And NEWREF itself is treated like a store (see below).
2108** LREF is constant (per trace) since coroutine switches are not inlined.
1949*/ 2109*/
1950LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE) 2110LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE)
1951LJFOLDF(fload_tab_tnew_asize) 2111LJFOLDF(fload_tab_tnew_asize)
@@ -2009,6 +2169,14 @@ LJFOLDF(fload_str_len_snew)
2009 return NEXTFOLD; 2169 return NEXTFOLD;
2010} 2170}
2011 2171
2172LJFOLD(FLOAD TOSTR IRFL_STR_LEN)
2173LJFOLDF(fload_str_len_tostr)
2174{
2175 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fleft->op2 == IRTOSTR_CHAR)
2176 return INTFOLD(1);
2177 return NEXTFOLD;
2178}
2179
2012/* The C type ID of cdata objects is immutable. */ 2180/* The C type ID of cdata objects is immutable. */
2013LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID) 2181LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID)
2014LJFOLDF(fload_cdata_typeid_kgc) 2182LJFOLDF(fload_cdata_typeid_kgc)
@@ -2055,6 +2223,8 @@ LJFOLDF(fload_cdata_ptr_int64_cnew)
2055} 2223}
2056 2224
2057LJFOLD(FLOAD any IRFL_STR_LEN) 2225LJFOLD(FLOAD any IRFL_STR_LEN)
2226LJFOLD(FLOAD any IRFL_FUNC_ENV)
2227LJFOLD(FLOAD any IRFL_THREAD_ENV)
2058LJFOLD(FLOAD any IRFL_CDATA_CTYPEID) 2228LJFOLD(FLOAD any IRFL_CDATA_CTYPEID)
2059LJFOLD(FLOAD any IRFL_CDATA_PTR) 2229LJFOLD(FLOAD any IRFL_CDATA_PTR)
2060LJFOLD(FLOAD any IRFL_CDATA_INT) 2230LJFOLD(FLOAD any IRFL_CDATA_INT)
@@ -2120,6 +2290,17 @@ LJFOLDF(barrier_tnew_tdup)
2120 return DROPFOLD; 2290 return DROPFOLD;
2121} 2291}
2122 2292
2293/* -- Profiling ----------------------------------------------------------- */
2294
2295LJFOLD(PROF any any)
2296LJFOLDF(prof)
2297{
2298 IRRef ref = J->chain[IR_PROF];
2299 if (ref+1 == J->cur.nins) /* Drop neighbouring IR_PROF. */
2300 return ref;
2301 return EMITFOLD;
2302}
2303
2123/* -- Stores and allocations ---------------------------------------------- */ 2304/* -- Stores and allocations ---------------------------------------------- */
2124 2305
2125/* Stores and allocations cannot be folded or passed on to CSE in general. 2306/* Stores and allocations cannot be folded or passed on to CSE in general.
@@ -2142,8 +2323,9 @@ LJFOLD(XSTORE any any)
2142LJFOLDX(lj_opt_dse_xstore) 2323LJFOLDX(lj_opt_dse_xstore)
2143 2324
2144LJFOLD(NEWREF any any) /* Treated like a store. */ 2325LJFOLD(NEWREF any any) /* Treated like a store. */
2145LJFOLD(CALLS any any) 2326LJFOLD(CALLA any any)
2146LJFOLD(CALLL any any) /* Safeguard fallback. */ 2327LJFOLD(CALLL any any) /* Safeguard fallback. */
2328LJFOLD(CALLS any any)
2147LJFOLD(CALLXS any any) 2329LJFOLD(CALLXS any any)
2148LJFOLD(XBAR) 2330LJFOLD(XBAR)
2149LJFOLD(RETF any any) /* Modifies BASE. */ 2331LJFOLD(RETF any any) /* Modifies BASE. */
@@ -2151,6 +2333,7 @@ LJFOLD(TNEW any any)
2151LJFOLD(TDUP any) 2333LJFOLD(TDUP any)
2152LJFOLD(CNEW any any) 2334LJFOLD(CNEW any any)
2153LJFOLD(XSNEW any any) 2335LJFOLD(XSNEW any any)
2336LJFOLD(BUFHDR any any)
2154LJFOLDX(lj_ir_emit) 2337LJFOLDX(lj_ir_emit)
2155 2338
2156/* ------------------------------------------------------------------------ */ 2339/* ------------------------------------------------------------------------ */
diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c
index fa001151..91f8067c 100644
--- a/src/lj_opt_loop.c
+++ b/src/lj_opt_loop.c
@@ -11,7 +11,7 @@
11#if LJ_HASJIT 11#if LJ_HASJIT
12 12
13#include "lj_err.h" 13#include "lj_err.h"
14#include "lj_str.h" 14#include "lj_buf.h"
15#include "lj_ir.h" 15#include "lj_ir.h"
16#include "lj_jit.h" 16#include "lj_jit.h"
17#include "lj_iropt.h" 17#include "lj_iropt.h"
@@ -254,9 +254,16 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap,
254 J->cur.nsnapmap = (uint16_t)(nmap - J->cur.snapmap); 254 J->cur.nsnapmap = (uint16_t)(nmap - J->cur.snapmap);
255} 255}
256 256
257typedef struct LoopState {
258 jit_State *J;
259 IRRef1 *subst;
260 MSize sizesubst;
261} LoopState;
262
257/* Unroll loop. */ 263/* Unroll loop. */
258static void loop_unroll(jit_State *J) 264static void loop_unroll(LoopState *lps)
259{ 265{
266 jit_State *J = lps->J;
260 IRRef1 phi[LJ_MAX_PHI]; 267 IRRef1 phi[LJ_MAX_PHI];
261 uint32_t nphi = 0; 268 uint32_t nphi = 0;
262 IRRef1 *subst; 269 IRRef1 *subst;
@@ -265,13 +272,13 @@ static void loop_unroll(jit_State *J)
265 SnapEntry *loopmap, *psentinel; 272 SnapEntry *loopmap, *psentinel;
266 IRRef ins, invar; 273 IRRef ins, invar;
267 274
268 /* Use temp buffer for substitution table. 275 /* Allocate substitution table.
269 ** Only non-constant refs in [REF_BIAS,invar) are valid indexes. 276 ** Only non-constant refs in [REF_BIAS,invar) are valid indexes.
270 ** Caveat: don't call into the VM or run the GC or the buffer may be gone.
271 */ 277 */
272 invar = J->cur.nins; 278 invar = J->cur.nins;
273 subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, 279 lps->sizesubst = invar - REF_BIAS;
274 (invar-REF_BIAS)*sizeof(IRRef1)) - REF_BIAS; 280 lps->subst = lj_mem_newvec(J->L, lps->sizesubst, IRRef1);
281 subst = lps->subst - REF_BIAS;
275 subst[REF_BASE] = REF_BASE; 282 subst[REF_BASE] = REF_BASE;
276 283
277 /* LOOP separates the pre-roll from the loop body. */ 284 /* LOOP separates the pre-roll from the loop body. */
@@ -396,7 +403,7 @@ static void loop_undo(jit_State *J, IRRef ins, SnapNo nsnap, MSize nsnapmap)
396static TValue *cploop_opt(lua_State *L, lua_CFunction dummy, void *ud) 403static TValue *cploop_opt(lua_State *L, lua_CFunction dummy, void *ud)
397{ 404{
398 UNUSED(L); UNUSED(dummy); 405 UNUSED(L); UNUSED(dummy);
399 loop_unroll((jit_State *)ud); 406 loop_unroll((LoopState *)ud);
400 return NULL; 407 return NULL;
401} 408}
402 409
@@ -406,7 +413,13 @@ int lj_opt_loop(jit_State *J)
406 IRRef nins = J->cur.nins; 413 IRRef nins = J->cur.nins;
407 SnapNo nsnap = J->cur.nsnap; 414 SnapNo nsnap = J->cur.nsnap;
408 MSize nsnapmap = J->cur.nsnapmap; 415 MSize nsnapmap = J->cur.nsnapmap;
409 int errcode = lj_vm_cpcall(J->L, NULL, J, cploop_opt); 416 LoopState lps;
417 int errcode;
418 lps.J = J;
419 lps.subst = NULL;
420 lps.sizesubst = 0;
421 errcode = lj_vm_cpcall(J->L, NULL, &lps, cploop_opt);
422 lj_mem_freevec(J2G(J), lps.subst, lps.sizesubst, IRRef1);
410 if (LJ_UNLIKELY(errcode)) { 423 if (LJ_UNLIKELY(errcode)) {
411 lua_State *L = J->L; 424 lua_State *L = J->L;
412 if (errcode == LUA_ERRRUN && tvisnumber(L->top-1)) { /* Trace error? */ 425 if (errcode == LUA_ERRRUN && tvisnumber(L->top-1)) { /* Trace error? */
diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c
index c8cdc3e5..400fb48e 100644
--- a/src/lj_opt_mem.c
+++ b/src/lj_opt_mem.c
@@ -17,6 +17,7 @@
17#include "lj_ir.h" 17#include "lj_ir.h"
18#include "lj_jit.h" 18#include "lj_jit.h"
19#include "lj_iropt.h" 19#include "lj_iropt.h"
20#include "lj_ircall.h"
20 21
21/* Some local macros to save typing. Undef'd at the end. */ 22/* Some local macros to save typing. Undef'd at the end. */
22#define IR(ref) (&J->cur.ir[(ref)]) 23#define IR(ref) (&J->cur.ir[(ref)])
@@ -308,7 +309,7 @@ int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J)
308 return 1; /* No conflict. Can fold to niltv. */ 309 return 1; /* No conflict. Can fold to niltv. */
309} 310}
310 311
311/* Check whether there's no aliasing NEWREF for the left operand. */ 312/* Check whether there's no aliasing NEWREF/table.clear for the left operand. */
312int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim) 313int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim)
313{ 314{
314 IRRef ta = fins->op1; 315 IRRef ta = fins->op1;
@@ -319,6 +320,14 @@ int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim)
319 return 0; /* Conflict. */ 320 return 0; /* Conflict. */
320 ref = newref->prev; 321 ref = newref->prev;
321 } 322 }
323 ref = J->chain[IR_CALLS];
324 while (ref > lim) {
325 IRIns *calls = IR(ref);
326 if (calls->op2 == IRCALL_lj_tab_clear &&
327 (ta == calls->op1 || aa_table(J, ta, calls->op1) != ALIAS_NO))
328 return 0; /* Conflict. */
329 ref = calls->prev;
330 }
322 return 1; /* No conflict. Can safely FOLD/CSE. */ 331 return 1; /* No conflict. Can safely FOLD/CSE. */
323} 332}
324 333
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c
index 2cecf65d..5effcdb4 100644
--- a/src/lj_opt_narrow.c
+++ b/src/lj_opt_narrow.c
@@ -496,8 +496,7 @@ TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr)
496{ 496{
497 lua_assert(tref_isnumber(tr)); 497 lua_assert(tref_isnumber(tr));
498 if (tref_isnum(tr)) 498 if (tref_isnum(tr))
499 return emitir(IRT(IR_CONV, IRT_INTP), tr, 499 return emitir(IRT(IR_CONV, IRT_INTP), tr, (IRT_INTP<<5)|IRT_NUM|IRCONV_ANY);
500 (IRT_INTP<<5)|IRT_NUM|IRCONV_TRUNC|IRCONV_ANY);
501 /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */ 500 /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */
502 return narrow_stripov(J, tr, IR_MULOV, 501 return narrow_stripov(J, tr, IR_MULOV,
503 LJ_64 ? ((IRT_INTP<<5)|IRT_INT|IRCONV_SEXT) : 502 LJ_64 ? ((IRT_INTP<<5)|IRT_INT|IRCONV_SEXT) :
diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c
index 99d10584..05ea1cc0 100644
--- a/src/lj_opt_split.c
+++ b/src/lj_opt_split.c
@@ -11,7 +11,7 @@
11#if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) 11#if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI))
12 12
13#include "lj_err.h" 13#include "lj_err.h"
14#include "lj_str.h" 14#include "lj_buf.h"
15#include "lj_ir.h" 15#include "lj_ir.h"
16#include "lj_jit.h" 16#include "lj_jit.h"
17#include "lj_ircall.h" 17#include "lj_ircall.h"
@@ -139,6 +139,7 @@ static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir,
139 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); 139 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
140 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); 140 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
141} 141}
142#endif
142 143
143/* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */ 144/* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */
144static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir, 145static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
@@ -155,7 +156,6 @@ static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
155 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); 156 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
156 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); 157 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
157} 158}
158#endif
159 159
160/* Emit a CALLN with two split 64 bit arguments. */ 160/* Emit a CALLN with two split 64 bit arguments. */
161static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir, 161static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir,
@@ -195,6 +195,118 @@ static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref)
195 return split_emit(J, IRTI(IR_ADD), nref, lj_ir_kint(J, ofs)); 195 return split_emit(J, IRTI(IR_ADD), nref, lj_ir_kint(J, ofs));
196} 196}
197 197
198#if LJ_HASFFI
199static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst,
200 IRIns *oir, IRIns *nir, IRIns *ir)
201{
202 IROp op = ir->o;
203 IRRef kref = nir->op2;
204 if (irref_isk(kref)) { /* Optimize constant shifts. */
205 int32_t k = (IR(kref)->i & 63);
206 IRRef lo = nir->op1, hi = hisubst[ir->op1];
207 if (op == IR_BROL || op == IR_BROR) {
208 if (op == IR_BROR) k = (-k & 63);
209 if (k >= 32) { IRRef t = lo; lo = hi; hi = t; k -= 32; }
210 if (k == 0) {
211 passthrough:
212 J->cur.nins--;
213 ir->prev = lo;
214 return hi;
215 } else {
216 TRef k1, k2;
217 IRRef t1, t2, t3, t4;
218 J->cur.nins--;
219 k1 = lj_ir_kint(J, k);
220 k2 = lj_ir_kint(J, (-k & 31));
221 t1 = split_emit(J, IRTI(IR_BSHL), lo, k1);
222 t2 = split_emit(J, IRTI(IR_BSHL), hi, k1);
223 t3 = split_emit(J, IRTI(IR_BSHR), lo, k2);
224 t4 = split_emit(J, IRTI(IR_BSHR), hi, k2);
225 ir->prev = split_emit(J, IRTI(IR_BOR), t1, t4);
226 return split_emit(J, IRTI(IR_BOR), t2, t3);
227 }
228 } else if (k == 0) {
229 goto passthrough;
230 } else if (k < 32) {
231 if (op == IR_BSHL) {
232 IRRef t1 = split_emit(J, IRTI(IR_BSHL), hi, kref);
233 IRRef t2 = split_emit(J, IRTI(IR_BSHR), lo, lj_ir_kint(J, (-k&31)));
234 return split_emit(J, IRTI(IR_BOR), t1, t2);
235 } else {
236 IRRef t1 = ir->prev, t2;
237 lua_assert(op == IR_BSHR || op == IR_BSAR);
238 nir->o = IR_BSHR;
239 t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31)));
240 ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2);
241 return split_emit(J, IRTI(op), hi, kref);
242 }
243 } else {
244 if (op == IR_BSHL) {
245 if (k == 32)
246 J->cur.nins--;
247 else
248 lo = ir->prev;
249 ir->prev = lj_ir_kint(J, 0);
250 return lo;
251 } else {
252 lua_assert(op == IR_BSHR || op == IR_BSAR);
253 if (k == 32) {
254 J->cur.nins--;
255 ir->prev = hi;
256 } else {
257 nir->op1 = hi;
258 }
259 if (op == IR_BSHR)
260 return lj_ir_kint(J, 0);
261 else
262 return split_emit(J, IRTI(IR_BSAR), hi, lj_ir_kint(J, 31));
263 }
264 }
265 }
266 return split_call_li(J, hisubst, oir, ir,
267 op - IR_BSHL + IRCALL_lj_carith_shl64);
268}
269
270static IRRef split_bitop(jit_State *J, IRRef1 *hisubst,
271 IRIns *nir, IRIns *ir)
272{
273 IROp op = ir->o;
274 IRRef hi, kref = nir->op2;
275 if (irref_isk(kref)) { /* Optimize bit operations with lo constant. */
276 int32_t k = IR(kref)->i;
277 if (k == 0 || k == -1) {
278 if (op == IR_BAND) k = ~k;
279 if (k == 0) {
280 J->cur.nins--;
281 ir->prev = nir->op1;
282 } else if (op == IR_BXOR) {
283 nir->o = IR_BNOT;
284 nir->op2 = 0;
285 } else {
286 J->cur.nins--;
287 ir->prev = kref;
288 }
289 }
290 }
291 hi = hisubst[ir->op1];
292 kref = hisubst[ir->op2];
293 if (irref_isk(kref)) { /* Optimize bit operations with hi constant. */
294 int32_t k = IR(kref)->i;
295 if (k == 0 || k == -1) {
296 if (op == IR_BAND) k = ~k;
297 if (k == 0) {
298 return hi;
299 } else if (op == IR_BXOR) {
300 return split_emit(J, IRTI(IR_BNOT), hi, 0);
301 } else {
302 return kref;
303 }
304 }
305 }
306 return split_emit(J, IRTI(op), hi, kref);
307}
308#endif
309
198/* Substitute references of a snapshot. */ 310/* Substitute references of a snapshot. */
199static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir) 311static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir)
200{ 312{
@@ -214,7 +326,7 @@ static void split_ir(jit_State *J)
214 IRRef nins = J->cur.nins, nk = J->cur.nk; 326 IRRef nins = J->cur.nins, nk = J->cur.nk;
215 MSize irlen = nins - nk; 327 MSize irlen = nins - nk;
216 MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1)); 328 MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1));
217 IRIns *oir = (IRIns *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, need); 329 IRIns *oir = (IRIns *)lj_buf_tmp(J->L, need);
218 IRRef1 *hisubst; 330 IRRef1 *hisubst;
219 IRRef ref, snref; 331 IRRef ref, snref;
220 SnapShot *snap; 332 SnapShot *snap;
@@ -438,6 +550,19 @@ static void split_ir(jit_State *J)
438 irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : 550 irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
439 IRCALL_lj_carith_powu64); 551 IRCALL_lj_carith_powu64);
440 break; 552 break;
553 case IR_BNOT:
554 hi = split_emit(J, IRTI(IR_BNOT), hiref, 0);
555 break;
556 case IR_BSWAP:
557 ir->prev = split_emit(J, IRTI(IR_BSWAP), hiref, 0);
558 hi = nref;
559 break;
560 case IR_BAND: case IR_BOR: case IR_BXOR:
561 hi = split_bitop(J, hisubst, nir, ir);
562 break;
563 case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
564 hi = split_bitshift(J, hisubst, oir, nir, ir);
565 break;
441 case IR_FLOAD: 566 case IR_FLOAD:
442 lua_assert(ir->op2 == IRFL_CDATA_INT64); 567 lua_assert(ir->op2 == IRFL_CDATA_INT64);
443 hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4); 568 hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4);
diff --git a/src/lj_parse.c b/src/lj_parse.c
index e8aafba2..064b9b19 100644
--- a/src/lj_parse.c
+++ b/src/lj_parse.c
@@ -13,6 +13,7 @@
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_debug.h" 15#include "lj_debug.h"
16#include "lj_buf.h"
16#include "lj_str.h" 17#include "lj_str.h"
17#include "lj_tab.h" 18#include "lj_tab.h"
18#include "lj_func.h" 19#include "lj_func.h"
@@ -21,6 +22,7 @@
21#if LJ_HASFFI 22#if LJ_HASFFI
22#include "lj_ctype.h" 23#include "lj_ctype.h"
23#endif 24#endif
25#include "lj_strfmt.h"
24#include "lj_lex.h" 26#include "lj_lex.h"
25#include "lj_parse.h" 27#include "lj_parse.h"
26#include "lj_vm.h" 28#include "lj_vm.h"
@@ -165,12 +167,12 @@ LJ_STATIC_ASSERT((int)BC_MODVV-(int)BC_ADDVV == (int)OPR_MOD-(int)OPR_ADD);
165 167
166LJ_NORET LJ_NOINLINE static void err_syntax(LexState *ls, ErrMsg em) 168LJ_NORET LJ_NOINLINE static void err_syntax(LexState *ls, ErrMsg em)
167{ 169{
168 lj_lex_error(ls, ls->token, em); 170 lj_lex_error(ls, ls->tok, em);
169} 171}
170 172
171LJ_NORET LJ_NOINLINE static void err_token(LexState *ls, LexToken token) 173LJ_NORET LJ_NOINLINE static void err_token(LexState *ls, LexToken tok)
172{ 174{
173 lj_lex_error(ls, ls->token, LJ_ERR_XTOKEN, lj_lex_token2str(ls, token)); 175 lj_lex_error(ls, ls->tok, LJ_ERR_XTOKEN, lj_lex_token2str(ls, tok));
174} 176}
175 177
176LJ_NORET static void err_limit(FuncState *fs, uint32_t limit, const char *what) 178LJ_NORET static void err_limit(FuncState *fs, uint32_t limit, const char *what)
@@ -981,7 +983,7 @@ static void bcemit_unop(FuncState *fs, BCOp op, ExpDesc *e)
981/* Check and consume optional token. */ 983/* Check and consume optional token. */
982static int lex_opt(LexState *ls, LexToken tok) 984static int lex_opt(LexState *ls, LexToken tok)
983{ 985{
984 if (ls->token == tok) { 986 if (ls->tok == tok) {
985 lj_lex_next(ls); 987 lj_lex_next(ls);
986 return 1; 988 return 1;
987 } 989 }
@@ -991,7 +993,7 @@ static int lex_opt(LexState *ls, LexToken tok)
991/* Check and consume token. */ 993/* Check and consume token. */
992static void lex_check(LexState *ls, LexToken tok) 994static void lex_check(LexState *ls, LexToken tok)
993{ 995{
994 if (ls->token != tok) 996 if (ls->tok != tok)
995 err_token(ls, tok); 997 err_token(ls, tok);
996 lj_lex_next(ls); 998 lj_lex_next(ls);
997} 999}
@@ -1005,7 +1007,7 @@ static void lex_match(LexState *ls, LexToken what, LexToken who, BCLine line)
1005 } else { 1007 } else {
1006 const char *swhat = lj_lex_token2str(ls, what); 1008 const char *swhat = lj_lex_token2str(ls, what);
1007 const char *swho = lj_lex_token2str(ls, who); 1009 const char *swho = lj_lex_token2str(ls, who);
1008 lj_lex_error(ls, ls->token, LJ_ERR_XMATCH, swhat, swho, line); 1010 lj_lex_error(ls, ls->tok, LJ_ERR_XMATCH, swhat, swho, line);
1009 } 1011 }
1010 } 1012 }
1011} 1013}
@@ -1014,9 +1016,9 @@ static void lex_match(LexState *ls, LexToken what, LexToken who, BCLine line)
1014static GCstr *lex_str(LexState *ls) 1016static GCstr *lex_str(LexState *ls)
1015{ 1017{
1016 GCstr *s; 1018 GCstr *s;
1017 if (ls->token != TK_name && (LJ_52 || ls->token != TK_goto)) 1019 if (ls->tok != TK_name && (LJ_52 || ls->tok != TK_goto))
1018 err_token(ls, TK_name); 1020 err_token(ls, TK_name);
1019 s = strV(&ls->tokenval); 1021 s = strV(&ls->tokval);
1020 lj_lex_next(ls); 1022 lj_lex_next(ls);
1021 return s; 1023 return s;
1022} 1024}
@@ -1429,78 +1431,46 @@ static void fs_fixup_line(FuncState *fs, GCproto *pt,
1429 } 1431 }
1430} 1432}
1431 1433
1432/* Resize buffer if needed. */
1433static LJ_NOINLINE void fs_buf_resize(LexState *ls, MSize len)
1434{
1435 MSize sz = ls->sb.sz * 2;
1436 while (ls->sb.n + len > sz) sz = sz * 2;
1437 lj_str_resizebuf(ls->L, &ls->sb, sz);
1438}
1439
1440static LJ_AINLINE void fs_buf_need(LexState *ls, MSize len)
1441{
1442 if (LJ_UNLIKELY(ls->sb.n + len > ls->sb.sz))
1443 fs_buf_resize(ls, len);
1444}
1445
1446/* Add string to buffer. */
1447static void fs_buf_str(LexState *ls, const char *str, MSize len)
1448{
1449 char *p = ls->sb.buf + ls->sb.n;
1450 MSize i;
1451 ls->sb.n += len;
1452 for (i = 0; i < len; i++) p[i] = str[i];
1453}
1454
1455/* Add ULEB128 value to buffer. */
1456static void fs_buf_uleb128(LexState *ls, uint32_t v)
1457{
1458 MSize n = ls->sb.n;
1459 uint8_t *p = (uint8_t *)ls->sb.buf;
1460 for (; v >= 0x80; v >>= 7)
1461 p[n++] = (uint8_t)((v & 0x7f) | 0x80);
1462 p[n++] = (uint8_t)v;
1463 ls->sb.n = n;
1464}
1465
1466/* Prepare variable info for prototype. */ 1434/* Prepare variable info for prototype. */
1467static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar) 1435static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar)
1468{ 1436{
1469 VarInfo *vs =ls->vstack, *ve; 1437 VarInfo *vs =ls->vstack, *ve;
1470 MSize i, n; 1438 MSize i, n;
1471 BCPos lastpc; 1439 BCPos lastpc;
1472 lj_str_resetbuf(&ls->sb); /* Copy to temp. string buffer. */ 1440 lj_buf_reset(&ls->sb); /* Copy to temp. string buffer. */
1473 /* Store upvalue names. */ 1441 /* Store upvalue names. */
1474 for (i = 0, n = fs->nuv; i < n; i++) { 1442 for (i = 0, n = fs->nuv; i < n; i++) {
1475 GCstr *s = strref(vs[fs->uvmap[i]].name); 1443 GCstr *s = strref(vs[fs->uvmap[i]].name);
1476 MSize len = s->len+1; 1444 MSize len = s->len+1;
1477 fs_buf_need(ls, len); 1445 char *p = lj_buf_more(&ls->sb, len);
1478 fs_buf_str(ls, strdata(s), len); 1446 p = lj_buf_wmem(p, strdata(s), len);
1447 setsbufP(&ls->sb, p);
1479 } 1448 }
1480 *ofsvar = ls->sb.n; 1449 *ofsvar = sbuflen(&ls->sb);
1481 lastpc = 0; 1450 lastpc = 0;
1482 /* Store local variable names and compressed ranges. */ 1451 /* Store local variable names and compressed ranges. */
1483 for (ve = vs + ls->vtop, vs += fs->vbase; vs < ve; vs++) { 1452 for (ve = vs + ls->vtop, vs += fs->vbase; vs < ve; vs++) {
1484 if (!gola_isgotolabel(vs)) { 1453 if (!gola_isgotolabel(vs)) {
1485 GCstr *s = strref(vs->name); 1454 GCstr *s = strref(vs->name);
1486 BCPos startpc; 1455 BCPos startpc;
1456 char *p;
1487 if ((uintptr_t)s < VARNAME__MAX) { 1457 if ((uintptr_t)s < VARNAME__MAX) {
1488 fs_buf_need(ls, 1 + 2*5); 1458 p = lj_buf_more(&ls->sb, 1 + 2*5);
1489 ls->sb.buf[ls->sb.n++] = (uint8_t)(uintptr_t)s; 1459 *p++ = (char)(uintptr_t)s;
1490 } else { 1460 } else {
1491 MSize len = s->len+1; 1461 MSize len = s->len+1;
1492 fs_buf_need(ls, len + 2*5); 1462 p = lj_buf_more(&ls->sb, len + 2*5);
1493 fs_buf_str(ls, strdata(s), len); 1463 p = lj_buf_wmem(p, strdata(s), len);
1494 } 1464 }
1495 startpc = vs->startpc; 1465 startpc = vs->startpc;
1496 fs_buf_uleb128(ls, startpc-lastpc); 1466 p = lj_strfmt_wuleb128(p, startpc-lastpc);
1497 fs_buf_uleb128(ls, vs->endpc-startpc); 1467 p = lj_strfmt_wuleb128(p, vs->endpc-startpc);
1468 setsbufP(&ls->sb, p);
1498 lastpc = startpc; 1469 lastpc = startpc;
1499 } 1470 }
1500 } 1471 }
1501 fs_buf_need(ls, 1); 1472 lj_buf_putb(&ls->sb, '\0'); /* Terminator for varinfo. */
1502 ls->sb.buf[ls->sb.n++] = '\0'; /* Terminator for varinfo. */ 1473 return sbuflen(&ls->sb);
1503 return ls->sb.n;
1504} 1474}
1505 1475
1506/* Fixup variable info for prototype. */ 1476/* Fixup variable info for prototype. */
@@ -1508,7 +1478,7 @@ static void fs_fixup_var(LexState *ls, GCproto *pt, uint8_t *p, size_t ofsvar)
1508{ 1478{
1509 setmref(pt->uvinfo, p); 1479 setmref(pt->uvinfo, p);
1510 setmref(pt->varinfo, (char *)p + ofsvar); 1480 setmref(pt->varinfo, (char *)p + ofsvar);
1511 memcpy(p, ls->sb.buf, ls->sb.n); /* Copy from temp. string buffer. */ 1481 memcpy(p, sbufB(&ls->sb), sbuflen(&ls->sb)); /* Copy from temp. buffer. */
1512} 1482}
1513#else 1483#else
1514 1484
@@ -1617,7 +1587,7 @@ static GCproto *fs_finish(LexState *ls, BCLine line)
1617 L->top--; /* Pop table of constants. */ 1587 L->top--; /* Pop table of constants. */
1618 ls->vtop = fs->vbase; /* Reset variable stack. */ 1588 ls->vtop = fs->vbase; /* Reset variable stack. */
1619 ls->fs = fs->prev; 1589 ls->fs = fs->prev;
1620 lua_assert(ls->fs != NULL || ls->token == TK_eof); 1590 lua_assert(ls->fs != NULL || ls->tok == TK_eof);
1621 return pt; 1591 return pt;
1622} 1592}
1623 1593
@@ -1739,15 +1709,15 @@ static void expr_table(LexState *ls, ExpDesc *e)
1739 bcreg_reserve(fs, 1); 1709 bcreg_reserve(fs, 1);
1740 freg++; 1710 freg++;
1741 lex_check(ls, '{'); 1711 lex_check(ls, '{');
1742 while (ls->token != '}') { 1712 while (ls->tok != '}') {
1743 ExpDesc key, val; 1713 ExpDesc key, val;
1744 vcall = 0; 1714 vcall = 0;
1745 if (ls->token == '[') { 1715 if (ls->tok == '[') {
1746 expr_bracket(ls, &key); /* Already calls expr_toval. */ 1716 expr_bracket(ls, &key); /* Already calls expr_toval. */
1747 if (!expr_isk(&key)) expr_index(fs, e, &key); 1717 if (!expr_isk(&key)) expr_index(fs, e, &key);
1748 if (expr_isnumk(&key) && expr_numiszero(&key)) needarr = 1; else nhash++; 1718 if (expr_isnumk(&key) && expr_numiszero(&key)) needarr = 1; else nhash++;
1749 lex_check(ls, '='); 1719 lex_check(ls, '=');
1750 } else if ((ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) && 1720 } else if ((ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) &&
1751 lj_lex_lookahead(ls) == '=') { 1721 lj_lex_lookahead(ls) == '=') {
1752 expr_str(ls, &key); 1722 expr_str(ls, &key);
1753 lex_check(ls, '='); 1723 lex_check(ls, '=');
@@ -1840,11 +1810,11 @@ static BCReg parse_params(LexState *ls, int needself)
1840 lex_check(ls, '('); 1810 lex_check(ls, '(');
1841 if (needself) 1811 if (needself)
1842 var_new_lit(ls, nparams++, "self"); 1812 var_new_lit(ls, nparams++, "self");
1843 if (ls->token != ')') { 1813 if (ls->tok != ')') {
1844 do { 1814 do {
1845 if (ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) { 1815 if (ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) {
1846 var_new(ls, nparams++, lex_str(ls)); 1816 var_new(ls, nparams++, lex_str(ls));
1847 } else if (ls->token == TK_dots) { 1817 } else if (ls->tok == TK_dots) {
1848 lj_lex_next(ls); 1818 lj_lex_next(ls);
1849 fs->flags |= PROTO_VARARG; 1819 fs->flags |= PROTO_VARARG;
1850 break; 1820 break;
@@ -1878,7 +1848,7 @@ static void parse_body(LexState *ls, ExpDesc *e, int needself, BCLine line)
1878 fs.bclim = pfs->bclim - pfs->pc; 1848 fs.bclim = pfs->bclim - pfs->pc;
1879 bcemit_AD(&fs, BC_FUNCF, 0, 0); /* Placeholder. */ 1849 bcemit_AD(&fs, BC_FUNCF, 0, 0); /* Placeholder. */
1880 parse_chunk(ls); 1850 parse_chunk(ls);
1881 if (ls->token != TK_end) lex_match(ls, TK_end, TK_function, line); 1851 if (ls->tok != TK_end) lex_match(ls, TK_end, TK_function, line);
1882 pt = fs_finish(ls, (ls->lastline = ls->linenumber)); 1852 pt = fs_finish(ls, (ls->lastline = ls->linenumber));
1883 pfs->bcbase = ls->bcstack + oldbase; /* May have been reallocated. */ 1853 pfs->bcbase = ls->bcstack + oldbase; /* May have been reallocated. */
1884 pfs->bclim = (BCPos)(ls->sizebcstack - oldbase); 1854 pfs->bclim = (BCPos)(ls->sizebcstack - oldbase);
@@ -1917,13 +1887,13 @@ static void parse_args(LexState *ls, ExpDesc *e)
1917 BCIns ins; 1887 BCIns ins;
1918 BCReg base; 1888 BCReg base;
1919 BCLine line = ls->linenumber; 1889 BCLine line = ls->linenumber;
1920 if (ls->token == '(') { 1890 if (ls->tok == '(') {
1921#if !LJ_52 1891#if !LJ_52
1922 if (line != ls->lastline) 1892 if (line != ls->lastline)
1923 err_syntax(ls, LJ_ERR_XAMBIG); 1893 err_syntax(ls, LJ_ERR_XAMBIG);
1924#endif 1894#endif
1925 lj_lex_next(ls); 1895 lj_lex_next(ls);
1926 if (ls->token == ')') { /* f(). */ 1896 if (ls->tok == ')') { /* f(). */
1927 args.k = VVOID; 1897 args.k = VVOID;
1928 } else { 1898 } else {
1929 expr_list(ls, &args); 1899 expr_list(ls, &args);
@@ -1931,11 +1901,11 @@ static void parse_args(LexState *ls, ExpDesc *e)
1931 setbc_b(bcptr(fs, &args), 0); /* Pass on multiple results. */ 1901 setbc_b(bcptr(fs, &args), 0); /* Pass on multiple results. */
1932 } 1902 }
1933 lex_match(ls, ')', '(', line); 1903 lex_match(ls, ')', '(', line);
1934 } else if (ls->token == '{') { 1904 } else if (ls->tok == '{') {
1935 expr_table(ls, &args); 1905 expr_table(ls, &args);
1936 } else if (ls->token == TK_string) { 1906 } else if (ls->tok == TK_string) {
1937 expr_init(&args, VKSTR, 0); 1907 expr_init(&args, VKSTR, 0);
1938 args.u.sval = strV(&ls->tokenval); 1908 args.u.sval = strV(&ls->tokval);
1939 lj_lex_next(ls); 1909 lj_lex_next(ls);
1940 } else { 1910 } else {
1941 err_syntax(ls, LJ_ERR_XFUNARG); 1911 err_syntax(ls, LJ_ERR_XFUNARG);
@@ -1961,32 +1931,32 @@ static void expr_primary(LexState *ls, ExpDesc *v)
1961{ 1931{
1962 FuncState *fs = ls->fs; 1932 FuncState *fs = ls->fs;
1963 /* Parse prefix expression. */ 1933 /* Parse prefix expression. */
1964 if (ls->token == '(') { 1934 if (ls->tok == '(') {
1965 BCLine line = ls->linenumber; 1935 BCLine line = ls->linenumber;
1966 lj_lex_next(ls); 1936 lj_lex_next(ls);
1967 expr(ls, v); 1937 expr(ls, v);
1968 lex_match(ls, ')', '(', line); 1938 lex_match(ls, ')', '(', line);
1969 expr_discharge(ls->fs, v); 1939 expr_discharge(ls->fs, v);
1970 } else if (ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) { 1940 } else if (ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) {
1971 var_lookup(ls, v); 1941 var_lookup(ls, v);
1972 } else { 1942 } else {
1973 err_syntax(ls, LJ_ERR_XSYMBOL); 1943 err_syntax(ls, LJ_ERR_XSYMBOL);
1974 } 1944 }
1975 for (;;) { /* Parse multiple expression suffixes. */ 1945 for (;;) { /* Parse multiple expression suffixes. */
1976 if (ls->token == '.') { 1946 if (ls->tok == '.') {
1977 expr_field(ls, v); 1947 expr_field(ls, v);
1978 } else if (ls->token == '[') { 1948 } else if (ls->tok == '[') {
1979 ExpDesc key; 1949 ExpDesc key;
1980 expr_toanyreg(fs, v); 1950 expr_toanyreg(fs, v);
1981 expr_bracket(ls, &key); 1951 expr_bracket(ls, &key);
1982 expr_index(fs, v, &key); 1952 expr_index(fs, v, &key);
1983 } else if (ls->token == ':') { 1953 } else if (ls->tok == ':') {
1984 ExpDesc key; 1954 ExpDesc key;
1985 lj_lex_next(ls); 1955 lj_lex_next(ls);
1986 expr_str(ls, &key); 1956 expr_str(ls, &key);
1987 bcemit_method(fs, v, &key); 1957 bcemit_method(fs, v, &key);
1988 parse_args(ls, v); 1958 parse_args(ls, v);
1989 } else if (ls->token == '(' || ls->token == TK_string || ls->token == '{') { 1959 } else if (ls->tok == '(' || ls->tok == TK_string || ls->tok == '{') {
1990 expr_tonextreg(fs, v); 1960 expr_tonextreg(fs, v);
1991 parse_args(ls, v); 1961 parse_args(ls, v);
1992 } else { 1962 } else {
@@ -1998,14 +1968,14 @@ static void expr_primary(LexState *ls, ExpDesc *v)
1998/* Parse simple expression. */ 1968/* Parse simple expression. */
1999static void expr_simple(LexState *ls, ExpDesc *v) 1969static void expr_simple(LexState *ls, ExpDesc *v)
2000{ 1970{
2001 switch (ls->token) { 1971 switch (ls->tok) {
2002 case TK_number: 1972 case TK_number:
2003 expr_init(v, (LJ_HASFFI && tviscdata(&ls->tokenval)) ? VKCDATA : VKNUM, 0); 1973 expr_init(v, (LJ_HASFFI && tviscdata(&ls->tokval)) ? VKCDATA : VKNUM, 0);
2004 copyTV(ls->L, &v->u.nval, &ls->tokenval); 1974 copyTV(ls->L, &v->u.nval, &ls->tokval);
2005 break; 1975 break;
2006 case TK_string: 1976 case TK_string:
2007 expr_init(v, VKSTR, 0); 1977 expr_init(v, VKSTR, 0);
2008 v->u.sval = strV(&ls->tokenval); 1978 v->u.sval = strV(&ls->tokval);
2009 break; 1979 break;
2010 case TK_nil: 1980 case TK_nil:
2011 expr_init(v, VKNIL, 0); 1981 expr_init(v, VKNIL, 0);
@@ -2093,11 +2063,11 @@ static BinOpr expr_binop(LexState *ls, ExpDesc *v, uint32_t limit);
2093static void expr_unop(LexState *ls, ExpDesc *v) 2063static void expr_unop(LexState *ls, ExpDesc *v)
2094{ 2064{
2095 BCOp op; 2065 BCOp op;
2096 if (ls->token == TK_not) { 2066 if (ls->tok == TK_not) {
2097 op = BC_NOT; 2067 op = BC_NOT;
2098 } else if (ls->token == '-') { 2068 } else if (ls->tok == '-') {
2099 op = BC_UNM; 2069 op = BC_UNM;
2100 } else if (ls->token == '#') { 2070 } else if (ls->tok == '#') {
2101 op = BC_LEN; 2071 op = BC_LEN;
2102 } else { 2072 } else {
2103 expr_simple(ls, v); 2073 expr_simple(ls, v);
@@ -2114,7 +2084,7 @@ static BinOpr expr_binop(LexState *ls, ExpDesc *v, uint32_t limit)
2114 BinOpr op; 2084 BinOpr op;
2115 synlevel_begin(ls); 2085 synlevel_begin(ls);
2116 expr_unop(ls, v); 2086 expr_unop(ls, v);
2117 op = token2binop(ls->token); 2087 op = token2binop(ls->tok);
2118 while (op != OPR_NOBINOPR && priority[op].left > limit) { 2088 while (op != OPR_NOBINOPR && priority[op].left > limit) {
2119 ExpDesc v2; 2089 ExpDesc v2;
2120 BinOpr nextop; 2090 BinOpr nextop;
@@ -2303,9 +2273,9 @@ static void parse_func(LexState *ls, BCLine line)
2303 lj_lex_next(ls); /* Skip 'function'. */ 2273 lj_lex_next(ls); /* Skip 'function'. */
2304 /* Parse function name. */ 2274 /* Parse function name. */
2305 var_lookup(ls, &v); 2275 var_lookup(ls, &v);
2306 while (ls->token == '.') /* Multiple dot-separated fields. */ 2276 while (ls->tok == '.') /* Multiple dot-separated fields. */
2307 expr_field(ls, &v); 2277 expr_field(ls, &v);
2308 if (ls->token == ':') { /* Optional colon to signify method call. */ 2278 if (ls->tok == ':') { /* Optional colon to signify method call. */
2309 needself = 1; 2279 needself = 1;
2310 expr_field(ls, &v); 2280 expr_field(ls, &v);
2311 } 2281 }
@@ -2318,9 +2288,9 @@ static void parse_func(LexState *ls, BCLine line)
2318/* -- Control transfer statements ----------------------------------------- */ 2288/* -- Control transfer statements ----------------------------------------- */
2319 2289
2320/* Check for end of block. */ 2290/* Check for end of block. */
2321static int endofblock(LexToken token) 2291static int parse_isend(LexToken tok)
2322{ 2292{
2323 switch (token) { 2293 switch (tok) {
2324 case TK_else: case TK_elseif: case TK_end: case TK_until: case TK_eof: 2294 case TK_else: case TK_elseif: case TK_end: case TK_until: case TK_eof:
2325 return 1; 2295 return 1;
2326 default: 2296 default:
@@ -2335,7 +2305,7 @@ static void parse_return(LexState *ls)
2335 FuncState *fs = ls->fs; 2305 FuncState *fs = ls->fs;
2336 lj_lex_next(ls); /* Skip 'return'. */ 2306 lj_lex_next(ls); /* Skip 'return'. */
2337 fs->flags |= PROTO_HAS_RETURN; 2307 fs->flags |= PROTO_HAS_RETURN;
2338 if (endofblock(ls->token) || ls->token == ';') { /* Bare return. */ 2308 if (parse_isend(ls->tok) || ls->tok == ';') { /* Bare return. */
2339 ins = BCINS_AD(BC_RET0, 0, 1); 2309 ins = BCINS_AD(BC_RET0, 0, 1);
2340 } else { /* Return with one or more values. */ 2310 } else { /* Return with one or more values. */
2341 ExpDesc e; /* Receives the _last_ expression in the list. */ 2311 ExpDesc e; /* Receives the _last_ expression in the list. */
@@ -2401,18 +2371,18 @@ static void parse_label(LexState *ls)
2401 lex_check(ls, TK_label); 2371 lex_check(ls, TK_label);
2402 /* Recursively parse trailing statements: labels and ';' (Lua 5.2 only). */ 2372 /* Recursively parse trailing statements: labels and ';' (Lua 5.2 only). */
2403 for (;;) { 2373 for (;;) {
2404 if (ls->token == TK_label) { 2374 if (ls->tok == TK_label) {
2405 synlevel_begin(ls); 2375 synlevel_begin(ls);
2406 parse_label(ls); 2376 parse_label(ls);
2407 synlevel_end(ls); 2377 synlevel_end(ls);
2408 } else if (LJ_52 && ls->token == ';') { 2378 } else if (LJ_52 && ls->tok == ';') {
2409 lj_lex_next(ls); 2379 lj_lex_next(ls);
2410 } else { 2380 } else {
2411 break; 2381 break;
2412 } 2382 }
2413 } 2383 }
2414 /* Trailing label is considered to be outside of scope. */ 2384 /* Trailing label is considered to be outside of scope. */
2415 if (endofblock(ls->token) && ls->token != TK_until) 2385 if (parse_isend(ls->tok) && ls->tok != TK_until)
2416 ls->vstack[idx].slot = fs->bl->nactvar; 2386 ls->vstack[idx].slot = fs->bl->nactvar;
2417 gola_resolve(ls, fs->bl, idx); 2387 gola_resolve(ls, fs->bl, idx);
2418} 2388}
@@ -2596,9 +2566,9 @@ static void parse_for(LexState *ls, BCLine line)
2596 fscope_begin(fs, &bl, FSCOPE_LOOP); 2566 fscope_begin(fs, &bl, FSCOPE_LOOP);
2597 lj_lex_next(ls); /* Skip 'for'. */ 2567 lj_lex_next(ls); /* Skip 'for'. */
2598 varname = lex_str(ls); /* Get first variable name. */ 2568 varname = lex_str(ls); /* Get first variable name. */
2599 if (ls->token == '=') 2569 if (ls->tok == '=')
2600 parse_for_num(ls, varname, line); 2570 parse_for_num(ls, varname, line);
2601 else if (ls->token == ',' || ls->token == TK_in) 2571 else if (ls->tok == ',' || ls->tok == TK_in)
2602 parse_for_iter(ls, varname); 2572 parse_for_iter(ls, varname);
2603 else 2573 else
2604 err_syntax(ls, LJ_ERR_XFOR); 2574 err_syntax(ls, LJ_ERR_XFOR);
@@ -2624,12 +2594,12 @@ static void parse_if(LexState *ls, BCLine line)
2624 BCPos flist; 2594 BCPos flist;
2625 BCPos escapelist = NO_JMP; 2595 BCPos escapelist = NO_JMP;
2626 flist = parse_then(ls); 2596 flist = parse_then(ls);
2627 while (ls->token == TK_elseif) { /* Parse multiple 'elseif' blocks. */ 2597 while (ls->tok == TK_elseif) { /* Parse multiple 'elseif' blocks. */
2628 jmp_append(fs, &escapelist, bcemit_jmp(fs)); 2598 jmp_append(fs, &escapelist, bcemit_jmp(fs));
2629 jmp_tohere(fs, flist); 2599 jmp_tohere(fs, flist);
2630 flist = parse_then(ls); 2600 flist = parse_then(ls);
2631 } 2601 }
2632 if (ls->token == TK_else) { /* Parse optional 'else' block. */ 2602 if (ls->tok == TK_else) { /* Parse optional 'else' block. */
2633 jmp_append(fs, &escapelist, bcemit_jmp(fs)); 2603 jmp_append(fs, &escapelist, bcemit_jmp(fs));
2634 jmp_tohere(fs, flist); 2604 jmp_tohere(fs, flist);
2635 lj_lex_next(ls); /* Skip 'else'. */ 2605 lj_lex_next(ls); /* Skip 'else'. */
@@ -2647,7 +2617,7 @@ static void parse_if(LexState *ls, BCLine line)
2647static int parse_stmt(LexState *ls) 2617static int parse_stmt(LexState *ls)
2648{ 2618{
2649 BCLine line = ls->linenumber; 2619 BCLine line = ls->linenumber;
2650 switch (ls->token) { 2620 switch (ls->tok) {
2651 case TK_if: 2621 case TK_if:
2652 parse_if(ls, line); 2622 parse_if(ls, line);
2653 break; 2623 break;
@@ -2705,7 +2675,7 @@ static void parse_chunk(LexState *ls)
2705{ 2675{
2706 int islast = 0; 2676 int islast = 0;
2707 synlevel_begin(ls); 2677 synlevel_begin(ls);
2708 while (!islast && !endofblock(ls->token)) { 2678 while (!islast && !parse_isend(ls->tok)) {
2709 islast = parse_stmt(ls); 2679 islast = parse_stmt(ls);
2710 lex_opt(ls, ';'); 2680 lex_opt(ls, ';');
2711 lua_assert(ls->fs->framesize >= ls->fs->freereg && 2681 lua_assert(ls->fs->framesize >= ls->fs->freereg &&
@@ -2740,7 +2710,7 @@ GCproto *lj_parse(LexState *ls)
2740 bcemit_AD(&fs, BC_FUNCV, 0, 0); /* Placeholder. */ 2710 bcemit_AD(&fs, BC_FUNCV, 0, 0); /* Placeholder. */
2741 lj_lex_next(ls); /* Read-ahead first token. */ 2711 lj_lex_next(ls); /* Read-ahead first token. */
2742 parse_chunk(ls); 2712 parse_chunk(ls);
2743 if (ls->token != TK_eof) 2713 if (ls->tok != TK_eof)
2744 err_token(ls, TK_eof); 2714 err_token(ls, TK_eof);
2745 pt = fs_finish(ls, ls->linenumber); 2715 pt = fs_finish(ls, ls->linenumber);
2746 L->top--; /* Drop chunkname. */ 2716 L->top--; /* Drop chunkname. */
diff --git a/src/lj_profile.c b/src/lj_profile.c
new file mode 100644
index 00000000..c11e3851
--- /dev/null
+++ b/src/lj_profile.c
@@ -0,0 +1,368 @@
1/*
2** Low-overhead profiling.
3** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_profile_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10
11#if LJ_HASPROFILE
12
13#include "lj_buf.h"
14#include "lj_frame.h"
15#include "lj_debug.h"
16#include "lj_dispatch.h"
17#if LJ_HASJIT
18#include "lj_jit.h"
19#include "lj_trace.h"
20#endif
21#include "lj_profile.h"
22
23#include "luajit.h"
24
25#if LJ_PROFILE_SIGPROF
26
27#include <sys/time.h>
28#include <signal.h>
29#define profile_lock(ps) UNUSED(ps)
30#define profile_unlock(ps) UNUSED(ps)
31
32#elif LJ_PROFILE_PTHREAD
33
34#include <pthread.h>
35#include <time.h>
36#if LJ_TARGET_PS3
37#include <sys/timer.h>
38#endif
39#define profile_lock(ps) pthread_mutex_lock(&ps->lock)
40#define profile_unlock(ps) pthread_mutex_unlock(&ps->lock)
41
42#elif LJ_PROFILE_WTHREAD
43
44#define WIN32_LEAN_AND_MEAN
45#if LJ_TARGET_XBOX360
46#include <xtl.h>
47#include <xbox.h>
48#else
49#include <windows.h>
50#endif
51typedef unsigned int (WINAPI *WMM_TPFUNC)(unsigned int);
52#define profile_lock(ps) EnterCriticalSection(&ps->lock)
53#define profile_unlock(ps) LeaveCriticalSection(&ps->lock)
54
55#endif
56
57/* Profiler state. */
58typedef struct ProfileState {
59 global_State *g; /* VM state that started the profiler. */
60 luaJIT_profile_callback cb; /* Profiler callback. */
61 void *data; /* Profiler callback data. */
62 SBuf sb; /* String buffer for stack dumps. */
63 int interval; /* Sample interval in milliseconds. */
64 int samples; /* Number of samples for next callback. */
65 int vmstate; /* VM state when profile timer triggered. */
66#if LJ_PROFILE_SIGPROF
67 struct sigaction oldsa; /* Previous SIGPROF state. */
68#elif LJ_PROFILE_PTHREAD
69 pthread_mutex_t lock; /* g->hookmask update lock. */
70 pthread_t thread; /* Timer thread. */
71 int abort; /* Abort timer thread. */
72#elif LJ_PROFILE_WTHREAD
73#if LJ_TARGET_WINDOWS
74 HINSTANCE wmm; /* WinMM library handle. */
75 WMM_TPFUNC wmm_tbp; /* WinMM timeBeginPeriod function. */
76 WMM_TPFUNC wmm_tep; /* WinMM timeEndPeriod function. */
77#endif
78 CRITICAL_SECTION lock; /* g->hookmask update lock. */
79 HANDLE thread; /* Timer thread. */
80 int abort; /* Abort timer thread. */
81#endif
82} ProfileState;
83
84/* Sadly, we have to use a static profiler state.
85**
86** The SIGPROF variant needs a static pointer to the global state, anyway.
87** And it would be hard to extend for multiple threads. You can still use
88** multiple VMs in multiple threads, but only profile one at a time.
89*/
90static ProfileState profile_state;
91
92/* Default sample interval in milliseconds. */
93#define LJ_PROFILE_INTERVAL_DEFAULT 10
94
95/* -- Profiler/hook interaction ------------------------------------------- */
96
97#if !LJ_PROFILE_SIGPROF
98void LJ_FASTCALL lj_profile_hook_enter(global_State *g)
99{
100 ProfileState *ps = &profile_state;
101 if (ps->g) {
102 profile_lock(ps);
103 hook_enter(g);
104 profile_unlock(ps);
105 } else {
106 hook_enter(g);
107 }
108}
109
110void LJ_FASTCALL lj_profile_hook_leave(global_State *g)
111{
112 ProfileState *ps = &profile_state;
113 if (ps->g) {
114 profile_lock(ps);
115 hook_leave(g);
116 profile_unlock(ps);
117 } else {
118 hook_leave(g);
119 }
120}
121#endif
122
123/* -- Profile callbacks --------------------------------------------------- */
124
125/* Callback from profile hook (HOOK_PROFILE already cleared). */
126void LJ_FASTCALL lj_profile_interpreter(lua_State *L)
127{
128 ProfileState *ps = &profile_state;
129 global_State *g = G(L);
130 uint8_t mask;
131 profile_lock(ps);
132 mask = (g->hookmask & ~HOOK_PROFILE);
133 if (!(mask & HOOK_VMEVENT)) {
134 int samples = ps->samples;
135 ps->samples = 0;
136 g->hookmask = HOOK_VMEVENT;
137 lj_dispatch_update(g);
138 profile_unlock(ps);
139 ps->cb(ps->data, L, samples, ps->vmstate); /* Invoke user callback. */
140 profile_lock(ps);
141 mask |= (g->hookmask & HOOK_PROFILE);
142 }
143 g->hookmask = mask;
144 lj_dispatch_update(g);
145 profile_unlock(ps);
146}
147
148/* Trigger profile hook. Asynchronous call from OS-specific profile timer. */
149static void profile_trigger(ProfileState *ps)
150{
151 global_State *g = ps->g;
152 uint8_t mask;
153 profile_lock(ps);
154 ps->samples++; /* Always increment number of samples. */
155 mask = g->hookmask;
156 if (!(mask & (HOOK_PROFILE|HOOK_VMEVENT))) { /* Set profile hook. */
157 int st = g->vmstate;
158 ps->vmstate = st >= 0 ? 'N' :
159 st == ~LJ_VMST_INTERP ? 'I' :
160 st == ~LJ_VMST_C ? 'C' :
161 st == ~LJ_VMST_GC ? 'G' : 'J';
162 g->hookmask = (mask | HOOK_PROFILE);
163 lj_dispatch_update(g);
164 }
165 profile_unlock(ps);
166}
167
168/* -- OS-specific profile timer handling ---------------------------------- */
169
170#if LJ_PROFILE_SIGPROF
171
172/* SIGPROF handler. */
173static void profile_signal(int sig)
174{
175 UNUSED(sig);
176 profile_trigger(&profile_state);
177}
178
179/* Start profiling timer. */
180static void profile_timer_start(ProfileState *ps)
181{
182 int interval = ps->interval;
183 struct itimerval tm;
184 struct sigaction sa;
185 tm.it_value.tv_sec = tm.it_interval.tv_sec = interval / 1000;
186 tm.it_value.tv_usec = tm.it_interval.tv_usec = (interval % 1000) * 1000;
187 setitimer(ITIMER_PROF, &tm, NULL);
188 sa.sa_flags = SA_RESTART;
189 sa.sa_handler = profile_signal;
190 sigemptyset(&sa.sa_mask);
191 sigaction(SIGPROF, &sa, &ps->oldsa);
192}
193
194/* Stop profiling timer. */
195static void profile_timer_stop(ProfileState *ps)
196{
197 struct itimerval tm;
198 tm.it_value.tv_sec = tm.it_interval.tv_sec = 0;
199 tm.it_value.tv_usec = tm.it_interval.tv_usec = 0;
200 setitimer(ITIMER_PROF, &tm, NULL);
201 sigaction(SIGPROF, &ps->oldsa, NULL);
202}
203
204#elif LJ_PROFILE_PTHREAD
205
206/* POSIX timer thread. */
207static void *profile_thread(ProfileState *ps)
208{
209 int interval = ps->interval;
210#if !LJ_TARGET_PS3
211 struct timespec ts;
212 ts.tv_sec = interval / 1000;
213 ts.tv_nsec = (interval % 1000) * 1000000;
214#endif
215 while (1) {
216#if LJ_TARGET_PS3
217 sys_timer_usleep(interval * 1000);
218#else
219 nanosleep(&ts, NULL);
220#endif
221 if (ps->abort) break;
222 profile_trigger(ps);
223 }
224 return NULL;
225}
226
227/* Start profiling timer thread. */
228static void profile_timer_start(ProfileState *ps)
229{
230 pthread_mutex_init(&ps->lock, 0);
231 ps->abort = 0;
232 pthread_create(&ps->thread, NULL, (void *(*)(void *))profile_thread, ps);
233}
234
235/* Stop profiling timer thread. */
236static void profile_timer_stop(ProfileState *ps)
237{
238 ps->abort = 1;
239 pthread_join(ps->thread, NULL);
240 pthread_mutex_destroy(&ps->lock);
241}
242
243#elif LJ_PROFILE_WTHREAD
244
245/* Windows timer thread. */
246static DWORD WINAPI profile_thread(void *psx)
247{
248 ProfileState *ps = (ProfileState *)psx;
249 int interval = ps->interval;
250#if LJ_TARGET_WINDOWS
251 ps->wmm_tbp(interval);
252#endif
253 while (1) {
254 Sleep(interval);
255 if (ps->abort) break;
256 profile_trigger(ps);
257 }
258#if LJ_TARGET_WINDOWS
259 ps->wmm_tep(interval);
260#endif
261 return 0;
262}
263
264/* Start profiling timer thread. */
265static void profile_timer_start(ProfileState *ps)
266{
267#if LJ_TARGET_WINDOWS
268 if (!ps->wmm) { /* Load WinMM library on-demand. */
269 ps->wmm = LoadLibraryA("winmm.dll");
270 if (ps->wmm) {
271 ps->wmm_tbp = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeBeginPeriod");
272 ps->wmm_tep = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeEndPeriod");
273 if (!ps->wmm_tbp || !ps->wmm_tep) {
274 ps->wmm = NULL;
275 return;
276 }
277 }
278 }
279#endif
280 InitializeCriticalSection(&ps->lock);
281 ps->abort = 0;
282 ps->thread = CreateThread(NULL, 0, profile_thread, ps, 0, NULL);
283}
284
285/* Stop profiling timer thread. */
286static void profile_timer_stop(ProfileState *ps)
287{
288 ps->abort = 1;
289 WaitForSingleObject(ps->thread, INFINITE);
290 DeleteCriticalSection(&ps->lock);
291}
292
293#endif
294
295/* -- Public profiling API ------------------------------------------------ */
296
297/* Start profiling. */
298LUA_API void luaJIT_profile_start(lua_State *L, const char *mode,
299 luaJIT_profile_callback cb, void *data)
300{
301 ProfileState *ps = &profile_state;
302 int interval = LJ_PROFILE_INTERVAL_DEFAULT;
303 while (*mode) {
304 int m = *mode++;
305 switch (m) {
306 case 'i':
307 interval = 0;
308 while (*mode >= '0' && *mode <= '9')
309 interval = interval * 10 + (*mode++ - '0');
310 if (interval <= 0) interval = 1;
311 break;
312#if LJ_HASJIT
313 case 'l': case 'f':
314 L2J(L)->prof_mode = m;
315 lj_trace_flushall(L);
316 break;
317#endif
318 default: /* Ignore unknown mode chars. */
319 break;
320 }
321 }
322 if (ps->g) {
323 luaJIT_profile_stop(L);
324 if (ps->g) return; /* Profiler in use by another VM. */
325 }
326 ps->g = G(L);
327 ps->interval = interval;
328 ps->cb = cb;
329 ps->data = data;
330 ps->samples = 0;
331 lj_buf_init(L, &ps->sb);
332 profile_timer_start(ps);
333}
334
335/* Stop profiling. */
336LUA_API void luaJIT_profile_stop(lua_State *L)
337{
338 ProfileState *ps = &profile_state;
339 global_State *g = ps->g;
340 if (G(L) == g) { /* Only stop profiler if started by this VM. */
341 profile_timer_stop(ps);
342 g->hookmask &= ~HOOK_PROFILE;
343 lj_dispatch_update(g);
344#if LJ_HASJIT
345 G2J(g)->prof_mode = 0;
346 lj_trace_flushall(L);
347#endif
348 lj_buf_free(g, &ps->sb);
349 setmref(ps->sb.b, NULL);
350 setmref(ps->sb.e, NULL);
351 ps->g = NULL;
352 }
353}
354
355/* Return a compact stack dump. */
356LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt,
357 int depth, size_t *len)
358{
359 ProfileState *ps = &profile_state;
360 SBuf *sb = &ps->sb;
361 setsbufL(sb, L);
362 lj_buf_reset(sb);
363 lj_debug_dumpstack(L, sb, fmt, depth);
364 *len = (size_t)sbuflen(sb);
365 return sbufB(sb);
366}
367
368#endif
diff --git a/src/lj_profile.h b/src/lj_profile.h
new file mode 100644
index 00000000..384d7052
--- /dev/null
+++ b/src/lj_profile.h
@@ -0,0 +1,21 @@
1/*
2** Low-overhead profiling.
3** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_PROFILE_H
7#define _LJ_PROFILE_H
8
9#include "lj_obj.h"
10
11#if LJ_HASPROFILE
12
13LJ_FUNC void LJ_FASTCALL lj_profile_interpreter(lua_State *L);
14#if !LJ_PROFILE_SIGPROF
15LJ_FUNC void LJ_FASTCALL lj_profile_hook_enter(global_State *g);
16LJ_FUNC void LJ_FASTCALL lj_profile_hook_leave(global_State *g);
17#endif
18
19#endif
20
21#endif
diff --git a/src/lj_record.c b/src/lj_record.c
index 95957d31..ca833df2 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -20,6 +20,9 @@
20#endif 20#endif
21#include "lj_bc.h" 21#include "lj_bc.h"
22#include "lj_ff.h" 22#include "lj_ff.h"
23#if LJ_HASPROFILE
24#include "lj_debug.h"
25#endif
23#include "lj_ir.h" 26#include "lj_ir.h"
24#include "lj_jit.h" 27#include "lj_jit.h"
25#include "lj_ircall.h" 28#include "lj_ircall.h"
@@ -230,7 +233,7 @@ static void canonicalize_slots(jit_State *J)
230} 233}
231 234
232/* Stop recording. */ 235/* Stop recording. */
233static void rec_stop(jit_State *J, TraceLink linktype, TraceNo lnk) 236void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk)
234{ 237{
235 lj_trace_end(J); 238 lj_trace_end(J);
236 J->cur.linktype = (uint8_t)linktype; 239 J->cur.linktype = (uint8_t)linktype;
@@ -499,8 +502,7 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl)
499static LoopEvent rec_iterl(jit_State *J, const BCIns iterins) 502static LoopEvent rec_iterl(jit_State *J, const BCIns iterins)
500{ 503{
501 BCReg ra = bc_a(iterins); 504 BCReg ra = bc_a(iterins);
502 lua_assert(J->base[ra] != 0); 505 if (!tref_isnil(getslot(J, ra))) { /* Looping back? */
503 if (!tref_isnil(J->base[ra])) { /* Looping back? */
504 J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */ 506 J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */
505 J->maxslot = ra-1+bc_b(J->pc[-1]); 507 J->maxslot = ra-1+bc_b(J->pc[-1]);
506 J->pc += bc_j(iterins)+1; 508 J->pc += bc_j(iterins)+1;
@@ -538,12 +540,12 @@ static int innerloopleft(jit_State *J, const BCIns *pc)
538/* Handle the case when an interpreted loop op is hit. */ 540/* Handle the case when an interpreted loop op is hit. */
539static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev) 541static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev)
540{ 542{
541 if (J->parent == 0) { 543 if (J->parent == 0 && J->exitno == 0) {
542 if (pc == J->startpc && J->framedepth + J->retdepth == 0) { 544 if (pc == J->startpc && J->framedepth + J->retdepth == 0) {
543 /* Same loop? */ 545 /* Same loop? */
544 if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */ 546 if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */
545 lj_trace_err(J, LJ_TRERR_LLEAVE); 547 lj_trace_err(J, LJ_TRERR_LLEAVE);
546 rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping root trace. */ 548 lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping trace. */
547 } else if (ev != LOOPEV_LEAVE) { /* Entering inner loop? */ 549 } else if (ev != LOOPEV_LEAVE) { /* Entering inner loop? */
548 /* It's usually better to abort here and wait until the inner loop 550 /* It's usually better to abort here and wait until the inner loop
549 ** is traced. But if the inner loop repeatedly didn't loop back, 551 ** is traced. But if the inner loop repeatedly didn't loop back,
@@ -568,18 +570,64 @@ static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev)
568/* Handle the case when an already compiled loop op is hit. */ 570/* Handle the case when an already compiled loop op is hit. */
569static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev) 571static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev)
570{ 572{
571 if (J->parent == 0) { /* Root trace hit an inner loop. */ 573 if (J->parent == 0 && J->exitno == 0) { /* Root trace hit an inner loop. */
572 /* Better let the inner loop spawn a side trace back here. */ 574 /* Better let the inner loop spawn a side trace back here. */
573 lj_trace_err(J, LJ_TRERR_LINNER); 575 lj_trace_err(J, LJ_TRERR_LINNER);
574 } else if (ev != LOOPEV_LEAVE) { /* Side trace enters a compiled loop. */ 576 } else if (ev != LOOPEV_LEAVE) { /* Side trace enters a compiled loop. */
575 J->instunroll = 0; /* Cannot continue across a compiled loop op. */ 577 J->instunroll = 0; /* Cannot continue across a compiled loop op. */
576 if (J->pc == J->startpc && J->framedepth + J->retdepth == 0) 578 if (J->pc == J->startpc && J->framedepth + J->retdepth == 0)
577 rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Form an extra loop. */ 579 lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Form extra loop. */
578 else 580 else
579 rec_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the loop. */ 581 lj_record_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the loop. */
580 } /* Side trace continues across a loop that's left or not entered. */ 582 } /* Side trace continues across a loop that's left or not entered. */
581} 583}
582 584
585/* -- Record profiler hook checks ----------------------------------------- */
586
587#if LJ_HASPROFILE
588
589/* Need to insert profiler hook check? */
590static int rec_profile_need(jit_State *J, GCproto *pt, const BCIns *pc)
591{
592 GCproto *ppt;
593 lua_assert(J->prof_mode == 'f' || J->prof_mode == 'l');
594 if (!pt)
595 return 0;
596 ppt = J->prev_pt;
597 J->prev_pt = pt;
598 if (pt != ppt && ppt) {
599 J->prev_line = -1;
600 return 1;
601 }
602 if (J->prof_mode == 'l') {
603 BCLine line = lj_debug_line(pt, proto_bcpos(pt, pc));
604 BCLine pline = J->prev_line;
605 J->prev_line = line;
606 if (pline != line)
607 return 1;
608 }
609 return 0;
610}
611
612static void rec_profile_ins(jit_State *J, const BCIns *pc)
613{
614 if (J->prof_mode && rec_profile_need(J, J->pt, pc)) {
615 emitir(IRTG(IR_PROF, IRT_NIL), 0, 0);
616 lj_snap_add(J);
617 }
618}
619
620static void rec_profile_ret(jit_State *J)
621{
622 if (J->prof_mode == 'f') {
623 emitir(IRTG(IR_PROF, IRT_NIL), 0, 0);
624 J->prev_pt = NULL;
625 lj_snap_add(J);
626 }
627}
628
629#endif
630
583/* -- Record calls and returns -------------------------------------------- */ 631/* -- Record calls and returns -------------------------------------------- */
584 632
585/* Specialize to the runtime value of the called function or its prototype. */ 633/* Specialize to the runtime value of the called function or its prototype. */
@@ -595,6 +643,21 @@ static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr)
595 (void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */ 643 (void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */
596 return tr; 644 return tr;
597 } 645 }
646 } else {
647 /* Don't specialize to non-monomorphic builtins. */
648 switch (fn->c.ffid) {
649 case FF_coroutine_wrap_aux:
650 case FF_string_gmatch_aux:
651 /* NYI: io_file_iter doesn't have an ffid, yet. */
652 { /* Specialize to the ffid. */
653 TRef trid = emitir(IRT(IR_FLOAD, IRT_U8), tr, IRFL_FUNC_FFID);
654 emitir(IRTG(IR_EQ, IRT_INT), trid, lj_ir_kint(J, fn->c.ffid));
655 }
656 return tr;
657 default:
658 /* NYI: don't specialize to non-monomorphic C functions. */
659 break;
660 }
598 } 661 }
599 /* Otherwise specialize to the function (closure) value itself. */ 662 /* Otherwise specialize to the function (closure) value itself. */
600 kfunc = lj_ir_kfunc(J, fn); 663 kfunc = lj_ir_kfunc(J, fn);
@@ -678,6 +741,8 @@ static int check_downrec_unroll(jit_State *J, GCproto *pt)
678 return 0; 741 return 0;
679} 742}
680 743
744static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot);
745
681/* Record return. */ 746/* Record return. */
682void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) 747void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
683{ 748{
@@ -700,12 +765,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
700 /* Return to lower frame via interpreter for unhandled cases. */ 765 /* Return to lower frame via interpreter for unhandled cases. */
701 if (J->framedepth == 0 && J->pt && bc_isret(bc_op(*J->pc)) && 766 if (J->framedepth == 0 && J->pt && bc_isret(bc_op(*J->pc)) &&
702 (!frame_islua(frame) || 767 (!frame_islua(frame) ||
703 (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))))) { 768 (J->parent == 0 && J->exitno == 0 &&
769 !bc_isret(bc_op(J->cur.startins))))) {
704 /* NYI: specialize to frame type and return directly, not via RET*. */ 770 /* NYI: specialize to frame type and return directly, not via RET*. */
705 for (i = 0; i < (ptrdiff_t)rbase; i++) 771 for (i = 0; i < (ptrdiff_t)rbase; i++)
706 J->base[i] = 0; /* Purge dead slots. */ 772 J->base[i] = 0; /* Purge dead slots. */
707 J->maxslot = rbase + (BCReg)gotresults; 773 J->maxslot = rbase + (BCReg)gotresults;
708 rec_stop(J, LJ_TRLINK_RETURN, 0); /* Return to interpreter. */ 774 lj_record_stop(J, LJ_TRLINK_RETURN, 0); /* Return to interpreter. */
709 return; 775 return;
710 } 776 }
711 if (frame_isvarg(frame)) { 777 if (frame_isvarg(frame)) {
@@ -729,7 +795,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
729 if (check_downrec_unroll(J, pt)) { 795 if (check_downrec_unroll(J, pt)) {
730 J->maxslot = (BCReg)(rbase + gotresults); 796 J->maxslot = (BCReg)(rbase + gotresults);
731 lj_snap_purge(J); 797 lj_snap_purge(J);
732 rec_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno); /* Down-recursion. */ 798 lj_record_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno); /* Down-rec. */
733 return; 799 return;
734 } 800 }
735 lj_snap_add(J); 801 lj_snap_add(J);
@@ -742,7 +808,8 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
742 lua_assert(J->baseslot > cbase+1); 808 lua_assert(J->baseslot > cbase+1);
743 J->baseslot -= cbase+1; 809 J->baseslot -= cbase+1;
744 J->base -= cbase+1; 810 J->base -= cbase+1;
745 } else if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) { 811 } else if (J->parent == 0 && J->exitno == 0 &&
812 !bc_isret(bc_op(J->cur.startins))) {
746 /* Return to lower frame would leave the loop in a root trace. */ 813 /* Return to lower frame would leave the loop in a root trace. */
747 lj_trace_err(J, LJ_TRERR_LLEAVE); 814 lj_trace_err(J, LJ_TRERR_LLEAVE);
748 } else { /* Return to lower frame. Guard for the target we return to. */ 815 } else { /* Return to lower frame. Guard for the target we return to. */
@@ -772,7 +839,24 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
772 } else if (cont == lj_cont_nop) { 839 } else if (cont == lj_cont_nop) {
773 /* Nothing to do here. */ 840 /* Nothing to do here. */
774 } else if (cont == lj_cont_cat) { 841 } else if (cont == lj_cont_cat) {
775 lua_assert(0); 842 BCReg bslot = bc_b(*(frame_contpc(frame)-1));
843 TRef tr = gotresults ? J->base[cbase+rbase] : TREF_NIL;
844 if (bslot != cbase-2) { /* Concatenate the remainder. */
845 TValue *b = J->L->base, save; /* Simulate lower frame and result. */
846 J->base[cbase-2] = tr;
847 copyTV(J->L, &save, b-2);
848 if (gotresults) copyTV(J->L, b-2, b+rbase); else setnilV(b-2);
849 J->L->base = b - cbase;
850 tr = rec_cat(J, bslot, cbase-2);
851 b = J->L->base + cbase; /* Undo. */
852 J->L->base = b;
853 copyTV(J->L, b-2, &save);
854 }
855 if (tr) { /* Store final result. */
856 BCReg dst = bc_a(*(frame_contpc(frame)-1));
857 J->base[dst] = tr;
858 if (dst >= J->maxslot) J->maxslot = dst+1;
859 } /* Otherwise continue with another __concat call. */
776 } else { 860 } else {
777 /* Result type already specialized. */ 861 /* Result type already specialized. */
778 lua_assert(cont == lj_cont_condf || cont == lj_cont_condt); 862 lua_assert(cont == lj_cont_condf || cont == lj_cont_condt);
@@ -788,13 +872,11 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
788/* Prepare to record call to metamethod. */ 872/* Prepare to record call to metamethod. */
789static BCReg rec_mm_prep(jit_State *J, ASMFunction cont) 873static BCReg rec_mm_prep(jit_State *J, ASMFunction cont)
790{ 874{
791 BCReg s, top = curr_proto(J->L)->framesize; 875 BCReg s, top = cont == lj_cont_cat ? J->maxslot : curr_proto(J->L)->framesize;
792 TRef trcont;
793 setcont(&J->L->base[top], cont);
794#if LJ_64 876#if LJ_64
795 trcont = lj_ir_kptr(J, (void *)((int64_t)cont - (int64_t)lj_vm_asm_begin)); 877 TRef trcont = lj_ir_kptr(J, (void *)((int64_t)cont-(int64_t)lj_vm_asm_begin));
796#else 878#else
797 trcont = lj_ir_kptr(J, (void *)cont); 879 TRef trcont = lj_ir_kptr(J, (void *)cont);
798#endif 880#endif
799 J->base[top] = trcont | TREF_CONT; 881 J->base[top] = trcont | TREF_CONT;
800 J->framedepth++; 882 J->framedepth++;
@@ -875,7 +957,7 @@ nocheck:
875static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) 957static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm)
876{ 958{
877 /* Set up metamethod call first to save ix->tab and ix->tabv. */ 959 /* Set up metamethod call first to save ix->tab and ix->tabv. */
878 BCReg func = rec_mm_prep(J, lj_cont_ra); 960 BCReg func = rec_mm_prep(J, mm == MM_concat ? lj_cont_cat : lj_cont_ra);
879 TRef *base = J->base + func; 961 TRef *base = J->base + func;
880 TValue *basev = J->L->base + func; 962 TValue *basev = J->L->base + func;
881 base[1] = ix->tab; base[2] = ix->key; 963 base[1] = ix->tab; base[2] = ix->key;
@@ -1288,6 +1370,22 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1288 } 1370 }
1289} 1371}
1290 1372
1373static void rec_tsetm(jit_State *J, BCReg ra, BCReg rn, int32_t i)
1374{
1375 RecordIndex ix;
1376 cTValue *basev = J->L->base;
1377 copyTV(J->L, &ix.tabv, &basev[ra-1]);
1378 ix.tab = getslot(J, ra-1);
1379 ix.idxchain = 0;
1380 for (; ra < rn; i++, ra++) {
1381 setintV(&ix.keyv, i);
1382 ix.key = lj_ir_kint(J, i);
1383 copyTV(J->L, &ix.valv, &basev[ra]);
1384 ix.val = getslot(J, ra);
1385 lj_record_idx(J, &ix);
1386 }
1387}
1388
1291/* -- Upvalue access ------------------------------------------------------ */ 1389/* -- Upvalue access ------------------------------------------------------ */
1292 1390
1293/* Check whether upvalue is immutable and ok to constify. */ 1391/* Check whether upvalue is immutable and ok to constify. */
@@ -1399,9 +1497,9 @@ static void check_call_unroll(jit_State *J, TraceNo lnk)
1399 if (count + J->tailcalled > J->param[JIT_P_recunroll]) { 1497 if (count + J->tailcalled > J->param[JIT_P_recunroll]) {
1400 J->pc++; 1498 J->pc++;
1401 if (J->framedepth + J->retdepth == 0) 1499 if (J->framedepth + J->retdepth == 0)
1402 rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Tail-recursion. */ 1500 lj_record_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Tail-rec. */
1403 else 1501 else
1404 rec_stop(J, LJ_TRLINK_UPREC, J->cur.traceno); /* Up-recursion. */ 1502 lj_record_stop(J, LJ_TRLINK_UPREC, J->cur.traceno); /* Up-recursion. */
1405 } 1503 }
1406 } else { 1504 } else {
1407 if (count > J->param[JIT_P_callunroll]) { 1505 if (count > J->param[JIT_P_callunroll]) {
@@ -1475,9 +1573,9 @@ static void rec_func_jit(jit_State *J, TraceNo lnk)
1475 } 1573 }
1476 J->instunroll = 0; /* Cannot continue across a compiled function. */ 1574 J->instunroll = 0; /* Cannot continue across a compiled function. */
1477 if (J->pc == J->startpc && J->framedepth + J->retdepth == 0) 1575 if (J->pc == J->startpc && J->framedepth + J->retdepth == 0)
1478 rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Extra tail-recursion. */ 1576 lj_record_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Extra tail-rec. */
1479 else 1577 else
1480 rec_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the function. */ 1578 lj_record_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the function. */
1481} 1579}
1482 1580
1483/* -- Vararg handling ----------------------------------------------------- */ 1581/* -- Vararg handling ----------------------------------------------------- */
@@ -1601,6 +1699,54 @@ static TRef rec_tnew(jit_State *J, uint32_t ah)
1601 return emitir(IRTG(IR_TNEW, IRT_TAB), asize, hbits); 1699 return emitir(IRTG(IR_TNEW, IRT_TAB), asize, hbits);
1602} 1700}
1603 1701
1702/* -- Concatenation ------------------------------------------------------- */
1703
1704static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot)
1705{
1706 TRef *top = &J->base[topslot];
1707 TValue savetv[5];
1708 BCReg s;
1709 RecordIndex ix;
1710 lua_assert(baseslot < topslot);
1711 for (s = baseslot; s <= topslot; s++)
1712 (void)getslot(J, s); /* Ensure all arguments have a reference. */
1713 if (tref_isnumber_str(top[0]) && tref_isnumber_str(top[-1])) {
1714 TRef tr, hdr, *trp, *xbase, *base = &J->base[baseslot];
1715 /* First convert numbers to strings. */
1716 for (trp = top; trp >= base; trp--) {
1717 if (tref_isnumber(*trp))
1718 *trp = emitir(IRT(IR_TOSTR, IRT_STR), *trp,
1719 tref_isnum(*trp) ? IRTOSTR_NUM : IRTOSTR_INT);
1720 else if (!tref_isstr(*trp))
1721 break;
1722 }
1723 xbase = ++trp;
1724 tr = hdr = emitir(IRT(IR_BUFHDR, IRT_P32),
1725 lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET);
1726 do {
1727 tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, *trp++);
1728 } while (trp <= top);
1729 tr = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
1730 J->maxslot = (BCReg)(xbase - J->base);
1731 if (xbase == base) return tr; /* Return simple concatenation result. */
1732 /* Pass partial result. */
1733 topslot = J->maxslot--;
1734 *xbase = tr;
1735 top = xbase;
1736 setstrV(J->L, &ix.keyv, &J2G(J)->strempty); /* Simulate string result. */
1737 } else {
1738 J->maxslot = topslot-1;
1739 copyTV(J->L, &ix.keyv, &J->L->base[topslot]);
1740 }
1741 copyTV(J->L, &ix.tabv, &J->L->base[topslot-1]);
1742 ix.tab = top[-1];
1743 ix.key = top[0];
1744 memcpy(savetv, &J->L->base[topslot-1], sizeof(savetv)); /* Save slots. */
1745 rec_mm_arith(J, &ix, MM_concat); /* Call __concat metamethod. */
1746 memcpy(&J->L->base[topslot-1], savetv, sizeof(savetv)); /* Restore slots. */
1747 return 0; /* No result yet. */
1748}
1749
1604/* -- Record bytecode ops ------------------------------------------------- */ 1750/* -- Record bytecode ops ------------------------------------------------- */
1605 1751
1606/* Prepare for comparison. */ 1752/* Prepare for comparison. */
@@ -1707,6 +1853,10 @@ void lj_record_ins(jit_State *J)
1707 rec_check_ir(J); 1853 rec_check_ir(J);
1708#endif 1854#endif
1709 1855
1856#if LJ_HASPROFILE
1857 rec_profile_ins(J, pc);
1858#endif
1859
1710 /* Keep a copy of the runtime values of var/num/str operands. */ 1860 /* Keep a copy of the runtime values of var/num/str operands. */
1711#define rav (&ix.valv) 1861#define rav (&ix.valv)
1712#define rbv (&ix.tabv) 1862#define rbv (&ix.tabv)
@@ -1828,6 +1978,18 @@ void lj_record_ins(jit_State *J)
1828 J->maxslot = bc_a(pc[1]); /* Shrink used slots. */ 1978 J->maxslot = bc_a(pc[1]); /* Shrink used slots. */
1829 break; 1979 break;
1830 1980
1981 case BC_ISTYPE: case BC_ISNUM:
1982 /* These coercions need to correspond with lj_meta_istype(). */
1983 if (LJ_DUALNUM && rc == ~LJ_TNUMX+1)
1984 ra = lj_opt_narrow_toint(J, ra);
1985 else if (rc == ~LJ_TNUMX+2)
1986 ra = lj_ir_tonum(J, ra);
1987 else if (rc == ~LJ_TSTR+1)
1988 ra = lj_ir_tostr(J, ra);
1989 /* else: type specialization suffices. */
1990 J->base[bc_a(ins)] = ra;
1991 break;
1992
1831 /* -- Unary ops --------------------------------------------------------- */ 1993 /* -- Unary ops --------------------------------------------------------- */
1832 1994
1833 case BC_NOT: 1995 case BC_NOT:
@@ -1891,6 +2053,12 @@ void lj_record_ins(jit_State *J)
1891 rc = rec_mm_arith(J, &ix, MM_pow); 2053 rc = rec_mm_arith(J, &ix, MM_pow);
1892 break; 2054 break;
1893 2055
2056 /* -- Miscellaneous ops ------------------------------------------------- */
2057
2058 case BC_CAT:
2059 rc = rec_cat(J, rb, rc);
2060 break;
2061
1894 /* -- Constant and move ops --------------------------------------------- */ 2062 /* -- Constant and move ops --------------------------------------------- */
1895 2063
1896 case BC_MOV: 2064 case BC_MOV:
@@ -1939,6 +2107,14 @@ void lj_record_ins(jit_State *J)
1939 ix.idxchain = LJ_MAX_IDXCHAIN; 2107 ix.idxchain = LJ_MAX_IDXCHAIN;
1940 rc = lj_record_idx(J, &ix); 2108 rc = lj_record_idx(J, &ix);
1941 break; 2109 break;
2110 case BC_TGETR: case BC_TSETR:
2111 ix.idxchain = 0;
2112 rc = lj_record_idx(J, &ix);
2113 break;
2114
2115 case BC_TSETM:
2116 rec_tsetm(J, ra, (BCReg)(J->L->top - J->L->base), (int32_t)rcv->u32.lo);
2117 break;
1942 2118
1943 case BC_TNEW: 2119 case BC_TNEW:
1944 rc = rec_tnew(J, rc); 2120 rc = rec_tnew(J, rc);
@@ -1989,6 +2165,9 @@ void lj_record_ins(jit_State *J)
1989 rc = (BCReg)(J->L->top - J->L->base) - ra + 1; 2165 rc = (BCReg)(J->L->top - J->L->base) - ra + 1;
1990 /* fallthrough */ 2166 /* fallthrough */
1991 case BC_RET: case BC_RET0: case BC_RET1: 2167 case BC_RET: case BC_RET0: case BC_RET1:
2168#if LJ_HASPROFILE
2169 rec_profile_ret(J);
2170#endif
1992 lj_record_ret(J, ra, (ptrdiff_t)rc-1); 2171 lj_record_ret(J, ra, (ptrdiff_t)rc-1);
1993 break; 2172 break;
1994 2173
@@ -2001,7 +2180,7 @@ void lj_record_ins(jit_State *J)
2001 case BC_JFORI: 2180 case BC_JFORI:
2002 lua_assert(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL); 2181 lua_assert(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL);
2003 if (rec_for(J, pc, 0) != LOOPEV_LEAVE) /* Link to existing loop. */ 2182 if (rec_for(J, pc, 0) != LOOPEV_LEAVE) /* Link to existing loop. */
2004 rec_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J])); 2183 lj_record_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J]));
2005 /* Continue tracing if the loop is not entered. */ 2184 /* Continue tracing if the loop is not entered. */
2006 break; 2185 break;
2007 2186
@@ -2068,10 +2247,8 @@ void lj_record_ins(jit_State *J)
2068 /* fallthrough */ 2247 /* fallthrough */
2069 case BC_ITERN: 2248 case BC_ITERN:
2070 case BC_ISNEXT: 2249 case BC_ISNEXT:
2071 case BC_CAT:
2072 case BC_UCLO: 2250 case BC_UCLO:
2073 case BC_FNEW: 2251 case BC_FNEW:
2074 case BC_TSETM:
2075 setintV(&J->errinfo, (int32_t)op); 2252 setintV(&J->errinfo, (int32_t)op);
2076 lj_trace_err_info(J, LJ_TRERR_NYIBC); 2253 lj_trace_err_info(J, LJ_TRERR_NYIBC);
2077 break; 2254 break;
@@ -2137,6 +2314,12 @@ static const BCIns *rec_setup_root(jit_State *J)
2137 J->maxslot = J->pt->numparams; 2314 J->maxslot = J->pt->numparams;
2138 pc++; 2315 pc++;
2139 break; 2316 break;
2317 case BC_CALLM:
2318 case BC_CALL:
2319 case BC_ITERC:
2320 /* No bytecode range check for stitched traces. */
2321 pc++;
2322 break;
2140 default: 2323 default:
2141 lua_assert(0); 2324 lua_assert(0);
2142 break; 2325 break;
@@ -2205,7 +2388,7 @@ void lj_record_setup(jit_State *J)
2205 if (traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] || 2388 if (traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] ||
2206 T->snap[J->exitno].count >= J->param[JIT_P_hotexit] + 2389 T->snap[J->exitno].count >= J->param[JIT_P_hotexit] +
2207 J->param[JIT_P_tryside]) { 2390 J->param[JIT_P_tryside]) {
2208 rec_stop(J, LJ_TRLINK_INTERP, 0); 2391 lj_record_stop(J, LJ_TRLINK_INTERP, 0);
2209 } 2392 }
2210 } else { /* Root trace. */ 2393 } else { /* Root trace. */
2211 J->cur.root = 0; 2394 J->cur.root = 0;
@@ -2217,9 +2400,15 @@ void lj_record_setup(jit_State *J)
2217 lj_snap_add(J); 2400 lj_snap_add(J);
2218 if (bc_op(J->cur.startins) == BC_FORL) 2401 if (bc_op(J->cur.startins) == BC_FORL)
2219 rec_for_loop(J, J->pc-1, &J->scev, 1); 2402 rec_for_loop(J, J->pc-1, &J->scev, 1);
2403 else if (bc_op(J->cur.startins) == BC_ITERC)
2404 J->startpc = NULL;
2220 if (1 + J->pt->framesize >= LJ_MAX_JSLOTS) 2405 if (1 + J->pt->framesize >= LJ_MAX_JSLOTS)
2221 lj_trace_err(J, LJ_TRERR_STACKOV); 2406 lj_trace_err(J, LJ_TRERR_STACKOV);
2222 } 2407 }
2408#if LJ_HASPROFILE
2409 J->prev_pt = NULL;
2410 J->prev_line = -1;
2411#endif
2223#ifdef LUAJIT_ENABLE_CHECKHOOK 2412#ifdef LUAJIT_ENABLE_CHECKHOOK
2224 /* Regularly check for instruction/line hooks from compiled code and 2413 /* Regularly check for instruction/line hooks from compiled code and
2225 ** exit to the interpreter if the hooks are set. 2414 ** exit to the interpreter if the hooks are set.
diff --git a/src/lj_record.h b/src/lj_record.h
index 940e105f..7e38cccc 100644
--- a/src/lj_record.h
+++ b/src/lj_record.h
@@ -28,6 +28,7 @@ typedef struct RecordIndex {
28 28
29LJ_FUNC int lj_record_objcmp(jit_State *J, TRef a, TRef b, 29LJ_FUNC int lj_record_objcmp(jit_State *J, TRef a, TRef b,
30 cTValue *av, cTValue *bv); 30 cTValue *av, cTValue *bv);
31LJ_FUNC void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk);
31LJ_FUNC TRef lj_record_constify(jit_State *J, cTValue *o); 32LJ_FUNC TRef lj_record_constify(jit_State *J, cTValue *o);
32 33
33LJ_FUNC void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs); 34LJ_FUNC void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs);
diff --git a/src/lj_snap.c b/src/lj_snap.c
index 1c978c26..cc498c89 100644
--- a/src/lj_snap.c
+++ b/src/lj_snap.c
@@ -97,7 +97,8 @@ static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map)
97{ 97{
98 cTValue *frame = J->L->base - 1; 98 cTValue *frame = J->L->base - 1;
99 cTValue *lim = J->L->base - J->baseslot; 99 cTValue *lim = J->L->base - J->baseslot;
100 cTValue *ftop = frame + funcproto(frame_func(frame))->framesize; 100 GCfunc *fn = frame_func(frame);
101 cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top;
101 MSize f = 0; 102 MSize f = 0;
102 map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ 103 map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */
103 while (frame > lim) { /* Backwards traversal of all frames above base. */ 104 while (frame > lim) { /* Backwards traversal of all frames above base. */
diff --git a/src/lj_state.c b/src/lj_state.c
index f972fdce..73611ac8 100644
--- a/src/lj_state.c
+++ b/src/lj_state.c
@@ -12,6 +12,7 @@
12#include "lj_obj.h" 12#include "lj_obj.h"
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_buf.h"
15#include "lj_str.h" 16#include "lj_str.h"
16#include "lj_tab.h" 17#include "lj_tab.h"
17#include "lj_func.h" 18#include "lj_func.h"
@@ -26,6 +27,7 @@
26#include "lj_vm.h" 27#include "lj_vm.h"
27#include "lj_lex.h" 28#include "lj_lex.h"
28#include "lj_alloc.h" 29#include "lj_alloc.h"
30#include "luajit.h"
29 31
30/* -- Stack handling ------------------------------------------------------ */ 32/* -- Stack handling ------------------------------------------------------ */
31 33
@@ -59,7 +61,7 @@ static void resizestack(lua_State *L, MSize n)
59 GCobj *up; 61 GCobj *up;
60 lua_assert((MSize)(tvref(L->maxstack)-oldst)==L->stacksize-LJ_STACK_EXTRA-1); 62 lua_assert((MSize)(tvref(L->maxstack)-oldst)==L->stacksize-LJ_STACK_EXTRA-1);
61 st = (TValue *)lj_mem_realloc(L, tvref(L->stack), 63 st = (TValue *)lj_mem_realloc(L, tvref(L->stack),
62 (MSize)(L->stacksize*sizeof(TValue)), 64 (MSize)(oldsize*sizeof(TValue)),
63 (MSize)(realsize*sizeof(TValue))); 65 (MSize)(realsize*sizeof(TValue)));
64 setmref(L->stack, st); 66 setmref(L->stack, st);
65 delta = (char *)st - (char *)oldst; 67 delta = (char *)st - (char *)oldst;
@@ -67,12 +69,12 @@ static void resizestack(lua_State *L, MSize n)
67 while (oldsize < realsize) /* Clear new slots. */ 69 while (oldsize < realsize) /* Clear new slots. */
68 setnilV(st + oldsize++); 70 setnilV(st + oldsize++);
69 L->stacksize = realsize; 71 L->stacksize = realsize;
72 if ((size_t)(mref(G(L)->jit_base, char) - (char *)oldst) < oldsize)
73 setmref(G(L)->jit_base, mref(G(L)->jit_base, char) + delta);
70 L->base = (TValue *)((char *)L->base + delta); 74 L->base = (TValue *)((char *)L->base + delta);
71 L->top = (TValue *)((char *)L->top + delta); 75 L->top = (TValue *)((char *)L->top + delta);
72 for (up = gcref(L->openupval); up != NULL; up = gcnext(up)) 76 for (up = gcref(L->openupval); up != NULL; up = gcnext(up))
73 setmref(gco2uv(up)->v, (TValue *)((char *)uvval(gco2uv(up)) + delta)); 77 setmref(gco2uv(up)->v, (TValue *)((char *)uvval(gco2uv(up)) + delta));
74 if (obj2gco(L) == gcref(G(L)->jit_L))
75 setmref(G(L)->jit_base, mref(G(L)->jit_base, char) + delta);
76} 78}
77 79
78/* Relimit stack after error, in case the limit was overdrawn. */ 80/* Relimit stack after error, in case the limit was overdrawn. */
@@ -89,7 +91,8 @@ void lj_state_shrinkstack(lua_State *L, MSize used)
89 return; /* Avoid stack shrinking while handling stack overflow. */ 91 return; /* Avoid stack shrinking while handling stack overflow. */
90 if (4*used < L->stacksize && 92 if (4*used < L->stacksize &&
91 2*(LJ_STACK_START+LJ_STACK_EXTRA) < L->stacksize && 93 2*(LJ_STACK_START+LJ_STACK_EXTRA) < L->stacksize &&
92 obj2gco(L) != gcref(G(L)->jit_L)) /* Don't shrink stack of live trace. */ 94 /* Don't shrink stack of live trace. */
95 (tvref(G(L)->jit_base) == NULL || obj2gco(L) != gcref(G(L)->cur_L)))
93 resizestack(L, L->stacksize >> 1); 96 resizestack(L, L->stacksize >> 1);
94} 97}
95 98
@@ -164,7 +167,7 @@ static void close_state(lua_State *L)
164 lj_ctype_freestate(g); 167 lj_ctype_freestate(g);
165#endif 168#endif
166 lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef); 169 lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef);
167 lj_str_freebuf(g, &g->tmpbuf); 170 lj_buf_free(g, &g->tmpbuf);
168 lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); 171 lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue);
169 lua_assert(g->gc.total == sizeof(GG_State)); 172 lua_assert(g->gc.total == sizeof(GG_State));
170#ifndef LUAJIT_USE_SYSMALLOC 173#ifndef LUAJIT_USE_SYSMALLOC
@@ -203,7 +206,7 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
203 setnilV(&g->nilnode.val); 206 setnilV(&g->nilnode.val);
204 setnilV(&g->nilnode.key); 207 setnilV(&g->nilnode.key);
205 setmref(g->nilnode.freetop, &g->nilnode); 208 setmref(g->nilnode.freetop, &g->nilnode);
206 lj_str_initbuf(&g->tmpbuf); 209 lj_buf_init(NULL, &g->tmpbuf);
207 g->gc.state = GCSpause; 210 g->gc.state = GCSpause;
208 setgcref(g->gc.root, obj2gco(L)); 211 setgcref(g->gc.root, obj2gco(L));
209 setmref(g->gc.sweep, &g->gc.root); 212 setmref(g->gc.sweep, &g->gc.root);
@@ -236,6 +239,10 @@ LUA_API void lua_close(lua_State *L)
236 global_State *g = G(L); 239 global_State *g = G(L);
237 int i; 240 int i;
238 L = mainthread(g); /* Only the main thread can be closed. */ 241 L = mainthread(g); /* Only the main thread can be closed. */
242#if LJ_HASPROFILE
243 luaJIT_profile_stop(L);
244#endif
245 setgcrefnull(g->cur_L);
239 lj_func_closeuv(L, tvref(L->stack)); 246 lj_func_closeuv(L, tvref(L->stack));
240 lj_gc_separateudata(g, 1); /* Separate udata which have GC metamethods. */ 247 lj_gc_separateudata(g, 1); /* Separate udata which have GC metamethods. */
241#if LJ_HASJIT 248#if LJ_HASJIT
@@ -246,8 +253,8 @@ LUA_API void lua_close(lua_State *L)
246 for (i = 0;;) { 253 for (i = 0;;) {
247 hook_enter(g); 254 hook_enter(g);
248 L->status = 0; 255 L->status = 0;
249 L->cframe = NULL;
250 L->base = L->top = tvref(L->stack) + 1; 256 L->base = L->top = tvref(L->stack) + 1;
257 L->cframe = NULL;
251 if (lj_vm_cpcall(L, NULL, NULL, cpfinalize) == 0) { 258 if (lj_vm_cpcall(L, NULL, NULL, cpfinalize) == 0) {
252 if (++i >= 10) break; 259 if (++i >= 10) break;
253 lj_gc_separateudata(g, 1); /* Separate udata again. */ 260 lj_gc_separateudata(g, 1); /* Separate udata again. */
@@ -279,6 +286,8 @@ lua_State *lj_state_new(lua_State *L)
279void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L) 286void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L)
280{ 287{
281 lua_assert(L != mainthread(g)); 288 lua_assert(L != mainthread(g));
289 if (obj2gco(L) == gcref(g->cur_L))
290 setgcrefnull(g->cur_L);
282 lj_func_closeuv(L, tvref(L->stack)); 291 lj_func_closeuv(L, tvref(L->stack));
283 lua_assert(gcref(L->openupval) == NULL); 292 lua_assert(gcref(L->openupval) == NULL);
284 lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); 293 lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue);
diff --git a/src/lj_str.c b/src/lj_str.c
index aead8b53..46d546c6 100644
--- a/src/lj_str.c
+++ b/src/lj_str.c
@@ -1,13 +1,8 @@
1/* 1/*
2** String handling. 2** String handling.
3** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h 3** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
4**
5** Portions taken verbatim or adapted from the Lua interpreter.
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/ 4*/
8 5
9#include <stdio.h>
10
11#define lj_str_c 6#define lj_str_c
12#define LUA_CORE 7#define LUA_CORE
13 8
@@ -15,10 +10,9 @@
15#include "lj_gc.h" 10#include "lj_gc.h"
16#include "lj_err.h" 11#include "lj_err.h"
17#include "lj_str.h" 12#include "lj_str.h"
18#include "lj_state.h"
19#include "lj_char.h" 13#include "lj_char.h"
20 14
21/* -- String interning ---------------------------------------------------- */ 15/* -- String helpers ------------------------------------------------------ */
22 16
23/* Ordered compare of strings. Assumes string data is 4-byte aligned. */ 17/* Ordered compare of strings. Assumes string data is 4-byte aligned. */
24int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) 18int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
@@ -64,6 +58,40 @@ static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len)
64 return 0; 58 return 0;
65} 59}
66 60
61/* Find fixed string p inside string s. */
62const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen)
63{
64 if (plen <= slen) {
65 if (plen == 0) {
66 return s;
67 } else {
68 int c = *(const uint8_t *)p++;
69 plen--; slen -= plen;
70 while (slen) {
71 const char *q = (const char *)memchr(s, c, slen);
72 if (!q) break;
73 if (memcmp(q+1, p, plen) == 0) return q;
74 q++; slen -= (MSize)(q-s); s = q;
75 }
76 }
77 }
78 return NULL;
79}
80
81/* Check whether a string has a pattern matching character. */
82int lj_str_haspattern(GCstr *s)
83{
84 const char *p = strdata(s), *q = p + s->len;
85 while (p < q) {
86 int c = *(const uint8_t *)p++;
87 if (lj_char_ispunct(c) && strchr("^$*+?.([%-", c))
88 return 1; /* Found a pattern matching char. */
89 }
90 return 0; /* No pattern matching chars found. */
91}
92
93/* -- String interning ---------------------------------------------------- */
94
67/* Resize the string hash table (grow and shrink). */ 95/* Resize the string hash table (grow and shrink). */
68void lj_str_resize(lua_State *L, MSize newmask) 96void lj_str_resize(lua_State *L, MSize newmask)
69{ 97{
@@ -167,173 +195,3 @@ void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
167 lj_mem_free(g, s, sizestring(s)); 195 lj_mem_free(g, s, sizestring(s));
168} 196}
169 197
170/* -- Type conversions ---------------------------------------------------- */
171
172/* Print number to buffer. Canonicalizes non-finite values. */
173size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o)
174{
175 if (LJ_LIKELY((o->u32.hi << 1) < 0xffe00000)) { /* Finite? */
176 lua_Number n = o->n;
177#if __BIONIC__
178 if (tvismzero(o)) { s[0] = '-'; s[1] = '0'; return 2; }
179#endif
180 return (size_t)lua_number2str(s, n);
181 } else if (((o->u32.hi & 0x000fffff) | o->u32.lo) != 0) {
182 s[0] = 'n'; s[1] = 'a'; s[2] = 'n'; return 3;
183 } else if ((o->u32.hi & 0x80000000) == 0) {
184 s[0] = 'i'; s[1] = 'n'; s[2] = 'f'; return 3;
185 } else {
186 s[0] = '-'; s[1] = 'i'; s[2] = 'n'; s[3] = 'f'; return 4;
187 }
188}
189
190/* Print integer to buffer. Returns pointer to start. */
191char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k)
192{
193 uint32_t u = (uint32_t)(k < 0 ? -k : k);
194 p += 1+10;
195 do { *--p = (char)('0' + u % 10); } while (u /= 10);
196 if (k < 0) *--p = '-';
197 return p;
198}
199
200/* Convert number to string. */
201GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np)
202{
203 char buf[LJ_STR_NUMBUF];
204 size_t len = lj_str_bufnum(buf, (TValue *)np);
205 return lj_str_new(L, buf, len);
206}
207
208/* Convert integer to string. */
209GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k)
210{
211 char s[1+10];
212 char *p = lj_str_bufint(s, k);
213 return lj_str_new(L, p, (size_t)(s+sizeof(s)-p));
214}
215
216GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o)
217{
218 return tvisint(o) ? lj_str_fromint(L, intV(o)) : lj_str_fromnum(L, &o->n);
219}
220
221/* -- String formatting --------------------------------------------------- */
222
223static void addstr(lua_State *L, SBuf *sb, const char *str, MSize len)
224{
225 char *p;
226 MSize i;
227 if (sb->n + len > sb->sz) {
228 MSize sz = sb->sz * 2;
229 while (sb->n + len > sz) sz = sz * 2;
230 lj_str_resizebuf(L, sb, sz);
231 }
232 p = sb->buf + sb->n;
233 sb->n += len;
234 for (i = 0; i < len; i++) p[i] = str[i];
235}
236
237static void addchar(lua_State *L, SBuf *sb, int c)
238{
239 if (sb->n + 1 > sb->sz) {
240 MSize sz = sb->sz * 2;
241 lj_str_resizebuf(L, sb, sz);
242 }
243 sb->buf[sb->n++] = (char)c;
244}
245
246/* Push formatted message as a string object to Lua stack. va_list variant. */
247const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp)
248{
249 SBuf *sb = &G(L)->tmpbuf;
250 lj_str_needbuf(L, sb, (MSize)strlen(fmt));
251 lj_str_resetbuf(sb);
252 for (;;) {
253 const char *e = strchr(fmt, '%');
254 if (e == NULL) break;
255 addstr(L, sb, fmt, (MSize)(e-fmt));
256 /* This function only handles %s, %c, %d, %f and %p formats. */
257 switch (e[1]) {
258 case 's': {
259 const char *s = va_arg(argp, char *);
260 if (s == NULL) s = "(null)";
261 addstr(L, sb, s, (MSize)strlen(s));
262 break;
263 }
264 case 'c':
265 addchar(L, sb, va_arg(argp, int));
266 break;
267 case 'd': {
268 char buf[LJ_STR_INTBUF];
269 char *p = lj_str_bufint(buf, va_arg(argp, int32_t));
270 addstr(L, sb, p, (MSize)(buf+LJ_STR_INTBUF-p));
271 break;
272 }
273 case 'f': {
274 char buf[LJ_STR_NUMBUF];
275 TValue tv;
276 MSize len;
277 tv.n = (lua_Number)(va_arg(argp, LUAI_UACNUMBER));
278 len = (MSize)lj_str_bufnum(buf, &tv);
279 addstr(L, sb, buf, len);
280 break;
281 }
282 case 'p': {
283#define FMTP_CHARS (2*sizeof(ptrdiff_t))
284 char buf[2+FMTP_CHARS];
285 ptrdiff_t p = (ptrdiff_t)(va_arg(argp, void *));
286 ptrdiff_t i, lasti = 2+FMTP_CHARS;
287 if (p == 0) {
288 addstr(L, sb, "NULL", 4);
289 break;
290 }
291#if LJ_64
292 /* Shorten output for 64 bit pointers. */
293 lasti = 2+2*4+((p >> 32) ? 2+2*(lj_fls((uint32_t)(p >> 32))>>3) : 0);
294#endif
295 buf[0] = '0';
296 buf[1] = 'x';
297 for (i = lasti-1; i >= 2; i--, p >>= 4)
298 buf[i] = "0123456789abcdef"[(p & 15)];
299 addstr(L, sb, buf, (MSize)lasti);
300 break;
301 }
302 case '%':
303 addchar(L, sb, '%');
304 break;
305 default:
306 addchar(L, sb, '%');
307 addchar(L, sb, e[1]);
308 break;
309 }
310 fmt = e+2;
311 }
312 addstr(L, sb, fmt, (MSize)strlen(fmt));
313 setstrV(L, L->top, lj_str_new(L, sb->buf, sb->n));
314 incr_top(L);
315 return strVdata(L->top - 1);
316}
317
318/* Push formatted message as a string object to Lua stack. Vararg variant. */
319const char *lj_str_pushf(lua_State *L, const char *fmt, ...)
320{
321 const char *msg;
322 va_list argp;
323 va_start(argp, fmt);
324 msg = lj_str_pushvf(L, fmt, argp);
325 va_end(argp);
326 return msg;
327}
328
329/* -- Buffer handling ----------------------------------------------------- */
330
331char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz)
332{
333 if (sz > sb->sz) {
334 if (sz < LJ_MIN_SBUF) sz = LJ_MIN_SBUF;
335 lj_str_resizebuf(L, sb, sz);
336 }
337 return sb->buf;
338}
339
diff --git a/src/lj_str.h b/src/lj_str.h
index 1602c980..cd1bc215 100644
--- a/src/lj_str.h
+++ b/src/lj_str.h
@@ -10,8 +10,13 @@
10 10
11#include "lj_obj.h" 11#include "lj_obj.h"
12 12
13/* String interning. */ 13/* String helpers. */
14LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b); 14LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b);
15LJ_FUNC const char *lj_str_find(const char *s, const char *f,
16 MSize slen, MSize flen);
17LJ_FUNC int lj_str_haspattern(GCstr *s);
18
19/* String interning. */
15LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask); 20LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask);
16LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len); 21LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len);
17LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); 22LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
@@ -19,32 +24,4 @@ LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
19#define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s))) 24#define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s)))
20#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1)) 25#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1))
21 26
22/* Type conversions. */
23LJ_FUNC size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o);
24LJ_FUNC char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k);
25LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np);
26LJ_FUNC GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k);
27LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o);
28
29#define LJ_STR_INTBUF (1+10)
30#define LJ_STR_NUMBUF LUAI_MAXNUMBER2STR
31
32/* String formatting. */
33LJ_FUNC const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp);
34LJ_FUNC const char *lj_str_pushf(lua_State *L, const char *fmt, ...)
35#if defined(__GNUC__)
36 __attribute__ ((format (printf, 2, 3)))
37#endif
38 ;
39
40/* Resizable string buffers. Struct definition in lj_obj.h. */
41LJ_FUNC char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz);
42
43#define lj_str_initbuf(sb) ((sb)->buf = NULL, (sb)->sz = 0)
44#define lj_str_resetbuf(sb) ((sb)->n = 0)
45#define lj_str_resizebuf(L, sb, size) \
46 ((sb)->buf = (char *)lj_mem_realloc(L, (sb)->buf, (sb)->sz, (size)), \
47 (sb)->sz = (size))
48#define lj_str_freebuf(g, sb) lj_mem_free(g, (void *)(sb)->buf, (sb)->sz)
49
50#endif 27#endif
diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c
new file mode 100644
index 00000000..be89f7e7
--- /dev/null
+++ b/src/lj_strfmt.c
@@ -0,0 +1,554 @@
1/*
2** String formatting.
3** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#include <stdio.h>
7
8#define lj_strfmt_c
9#define LUA_CORE
10
11#include "lj_obj.h"
12#include "lj_buf.h"
13#include "lj_str.h"
14#include "lj_state.h"
15#include "lj_char.h"
16#include "lj_strfmt.h"
17
18/* -- Format parser ------------------------------------------------------- */
19
20static const uint8_t strfmt_map[('x'-'A')+1] = {
21 STRFMT_A,0,0,0,STRFMT_E,0,STRFMT_G,0,0,0,0,0,0,
22 0,0,0,0,0,0,0,0,0,0,STRFMT_X,0,0,
23 0,0,0,0,0,0,
24 STRFMT_A,0,STRFMT_C,STRFMT_D,STRFMT_E,STRFMT_F,STRFMT_G,0,STRFMT_I,0,0,0,0,
25 0,STRFMT_O,STRFMT_P,STRFMT_Q,0,STRFMT_S,0,STRFMT_U,0,0,STRFMT_X
26};
27
28SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs)
29{
30 const uint8_t *p = fs->p, *e = fs->e;
31 fs->str = (const char *)p;
32 for (; p < e; p++) {
33 if (*p == '%') { /* Escape char? */
34 if (p[1] == '%') { /* '%%'? */
35 fs->p = ++p+1;
36 goto retlit;
37 } else {
38 SFormat sf = 0;
39 uint32_t c;
40 if (p != (const uint8_t *)fs->str)
41 break;
42 for (p++; (uint32_t)*p - ' ' <= (uint32_t)('0' - ' '); p++) {
43 /* Parse flags. */
44 if (*p == '-') sf |= STRFMT_F_LEFT;
45 else if (*p == '+') sf |= STRFMT_F_PLUS;
46 else if (*p == '0') sf |= STRFMT_F_ZERO;
47 else if (*p == ' ') sf |= STRFMT_F_SPACE;
48 else if (*p == '#') sf |= STRFMT_F_ALT;
49 else break;
50 }
51 if ((uint32_t)*p - '0' < 10) { /* Parse width. */
52 uint32_t width = (uint32_t)*p++ - '0';
53 if ((uint32_t)*p - '0' < 10)
54 width = (uint32_t)*p++ - '0' + width*10;
55 sf |= (width << STRFMT_SH_WIDTH);
56 }
57 if (*p == '.') { /* Parse precision. */
58 uint32_t prec = 0;
59 p++;
60 if ((uint32_t)*p - '0' < 10) {
61 prec = (uint32_t)*p++ - '0';
62 if ((uint32_t)*p - '0' < 10)
63 prec = (uint32_t)*p++ - '0' + prec*10;
64 }
65 sf |= ((prec+1) << STRFMT_SH_PREC);
66 }
67 /* Parse conversion. */
68 c = (uint32_t)*p - 'A';
69 if (LJ_LIKELY(c <= (uint32_t)('x' - 'A'))) {
70 uint32_t sx = strfmt_map[c];
71 if (sx) {
72 fs->p = p+1;
73 return (sf | sx | ((c & 0x20) ? 0 : STRFMT_F_UPPER));
74 }
75 }
76 /* Return error location. */
77 if (*p >= 32) p++;
78 fs->len = (MSize)(p - (const uint8_t *)fs->str);
79 fs->p = fs->e;
80 return STRFMT_ERR;
81 }
82 }
83 }
84 fs->p = p;
85retlit:
86 fs->len = (MSize)(p - (const uint8_t *)fs->str);
87 return fs->len ? STRFMT_LIT : STRFMT_EOF;
88}
89
90/* -- Raw conversions ----------------------------------------------------- */
91
92/* Write number to bufer. */
93char * LJ_FASTCALL lj_strfmt_wnum(char *p, cTValue *o)
94{
95 if (LJ_LIKELY((o->u32.hi << 1) < 0xffe00000)) { /* Finite? */
96#if __BIONIC__
97 if (tvismzero(o)) { *p++ = '-'; *p++ = '0'; return p; }
98#endif
99 return p + lua_number2str(p, o->n);
100 } else if (((o->u32.hi & 0x000fffff) | o->u32.lo) != 0) {
101 *p++ = 'n'; *p++ = 'a'; *p++ = 'n';
102 } else if ((o->u32.hi & 0x80000000) == 0) {
103 *p++ = 'i'; *p++ = 'n'; *p++ = 'f';
104 } else {
105 *p++ = '-'; *p++ = 'i'; *p++ = 'n'; *p++ = 'f';
106 }
107 return p;
108}
109
110#define WINT_R(x, sh, sc) \
111 { uint32_t d = (x*(((1<<sh)+sc-1)/sc))>>sh; x -= d*sc; *p++ = (char)('0'+d); }
112
113/* Write integer to buffer. */
114char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k)
115{
116 uint32_t u = (uint32_t)k;
117 if (k < 0) { u = (uint32_t)-k; *p++ = '-'; }
118 if (u < 10000) {
119 if (u < 10) goto dig1; if (u < 100) goto dig2; if (u < 1000) goto dig3;
120 } else {
121 uint32_t v = u / 10000; u -= v * 10000;
122 if (v < 10000) {
123 if (v < 10) goto dig5; if (v < 100) goto dig6; if (v < 1000) goto dig7;
124 } else {
125 uint32_t w = v / 10000; v -= w * 10000;
126 if (w >= 10) WINT_R(w, 10, 10)
127 *p++ = (char)('0'+w);
128 }
129 WINT_R(v, 23, 1000)
130 dig7: WINT_R(v, 12, 100)
131 dig6: WINT_R(v, 10, 10)
132 dig5: *p++ = (char)('0'+v);
133 }
134 WINT_R(u, 23, 1000)
135 dig3: WINT_R(u, 12, 100)
136 dig2: WINT_R(u, 10, 10)
137 dig1: *p++ = (char)('0'+u);
138 return p;
139}
140#undef WINT_R
141
142/* Write pointer to buffer. */
143char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v)
144{
145 ptrdiff_t x = (ptrdiff_t)v;
146 MSize i, n = STRFMT_MAXBUF_PTR;
147 if (x == 0) {
148 *p++ = 'N'; *p++ = 'U'; *p++ = 'L'; *p++ = 'L';
149 return p;
150 }
151#if LJ_64
152 /* Shorten output for 64 bit pointers. */
153 n = 2+2*4+((x >> 32) ? 2+2*(lj_fls((uint32_t)(x >> 32))>>3) : 0);
154#endif
155 p[0] = '0';
156 p[1] = 'x';
157 for (i = n-1; i >= 2; i--, x >>= 4)
158 p[i] = "0123456789abcdef"[(x & 15)];
159 return p+n;
160}
161
162/* Write ULEB128 to buffer. */
163char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v)
164{
165 for (; v >= 0x80; v >>= 7)
166 *p++ = (char)((v & 0x7f) | 0x80);
167 *p++ = (char)v;
168 return p;
169}
170
171/* Return string or write number to buffer and return pointer to start. */
172const char *lj_strfmt_wstrnum(char *buf, cTValue *o, MSize *lenp)
173{
174 if (tvisstr(o)) {
175 *lenp = strV(o)->len;
176 return strVdata(o);
177 } else if (tvisint(o)) {
178 *lenp = (MSize)(lj_strfmt_wint(buf, intV(o)) - buf);
179 return buf;
180 } else if (tvisnum(o)) {
181 *lenp = (MSize)(lj_strfmt_wnum(buf, o) - buf);
182 return buf;
183 } else {
184 return NULL;
185 }
186}
187
188/* -- Unformatted conversions to buffer ----------------------------------- */
189
190/* Add integer to buffer. */
191SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k)
192{
193 setsbufP(sb, lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT), k));
194 return sb;
195}
196
197#if LJ_HASJIT
198/* Add number to buffer. */
199SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o)
200{
201 setsbufP(sb, lj_strfmt_wnum(lj_buf_more(sb, STRFMT_MAXBUF_NUM), o));
202 return sb;
203}
204#endif
205
206SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v)
207{
208 setsbufP(sb, lj_strfmt_wptr(lj_buf_more(sb, STRFMT_MAXBUF_PTR), v));
209 return sb;
210}
211
212/* Add quoted string to buffer. */
213SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str)
214{
215 const char *s = strdata(str);
216 MSize len = str->len;
217 lj_buf_putb(sb, '"');
218 while (len--) {
219 uint32_t c = (uint32_t)(uint8_t)*s++;
220 char *p = lj_buf_more(sb, 4);
221 if (c == '"' || c == '\\' || c == '\n') {
222 *p++ = '\\';
223 } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */
224 uint32_t d;
225 *p++ = '\\';
226 if (c >= 100 || lj_char_isdigit((uint8_t)*s)) {
227 *p++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100;
228 goto tens;
229 } else if (c >= 10) {
230 tens:
231 d = (c * 205) >> 11; c -= d * 10; *p++ = (char)('0'+d);
232 }
233 c += '0';
234 }
235 *p++ = (char)c;
236 setsbufP(sb, p);
237 }
238 lj_buf_putb(sb, '"');
239 return sb;
240}
241
242/* -- Formatted conversions to buffer ------------------------------------- */
243
244/* Add formatted char to buffer. */
245SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat sf, int32_t c)
246{
247 MSize width = STRFMT_WIDTH(sf);
248 char *p = lj_buf_more(sb, width > 1 ? width : 1);
249 if ((sf & STRFMT_F_LEFT)) *p++ = (char)c;
250 while (width-- > 1) *p++ = ' ';
251 if (!(sf & STRFMT_F_LEFT)) *p++ = (char)c;
252 setsbufP(sb, p);
253 return sb;
254}
255
256/* Add formatted string to buffer. */
257SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat sf, GCstr *str)
258{
259 MSize len = str->len <= STRFMT_PREC(sf) ? str->len : STRFMT_PREC(sf);
260 MSize width = STRFMT_WIDTH(sf);
261 char *p = lj_buf_more(sb, width > len ? width : len);
262 if ((sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len);
263 while (width-- > len) *p++ = ' ';
264 if (!(sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len);
265 setsbufP(sb, p);
266 return sb;
267}
268
269/* Add formatted signed/unsigned integer to buffer. */
270SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k)
271{
272 char buf[STRFMT_MAXBUF_XINT], *q = buf + sizeof(buf), *p;
273#ifdef LUA_USE_ASSERT
274 char *ps;
275#endif
276 MSize prefix = 0, len, prec, pprec, width, need;
277
278 /* Figure out signed prefixes. */
279 if (STRFMT_TYPE(sf) == STRFMT_INT) {
280 if ((int64_t)k < 0) {
281 k = (uint64_t)-(int64_t)k;
282 prefix = 256 + '-';
283 } else if ((sf & STRFMT_F_PLUS)) {
284 prefix = 256 + '+';
285 } else if ((sf & STRFMT_F_SPACE)) {
286 prefix = 256 + ' ';
287 }
288 }
289
290 /* Convert number and store to fixed-size buffer in reverse order. */
291 prec = STRFMT_PREC(sf);
292 if ((int32_t)prec >= 0) sf &= ~STRFMT_F_ZERO;
293 if (k == 0) { /* Special-case zero argument. */
294 if (prec != 0 ||
295 (sf & (STRFMT_T_OCT|STRFMT_F_ALT)) == (STRFMT_T_OCT|STRFMT_F_ALT))
296 *--q = '0';
297 } else if (!(sf & (STRFMT_T_HEX|STRFMT_T_OCT))) { /* Decimal. */
298 uint32_t k2;
299 while ((k >> 32)) { *--q = (char)('0' + k % 10); k /= 10; }
300 k2 = (uint32_t)k;
301 do { *--q = (char)('0' + k2 % 10); k2 /= 10; } while (k2);
302 } else if ((sf & STRFMT_T_HEX)) { /* Hex. */
303 const char *hexdig = (sf & STRFMT_F_UPPER) ? "0123456789ABCDEF" :
304 "0123456789abcdef";
305 do { *--q = hexdig[(k & 15)]; k >>= 4; } while (k);
306 if ((sf & STRFMT_F_ALT)) prefix = 512 + ((sf & STRFMT_F_UPPER) ? 'X' : 'x');
307 } else { /* Octal. */
308 do { *--q = (char)('0' + (uint32_t)(k & 7)); k >>= 3; } while (k);
309 if ((sf & STRFMT_F_ALT)) *--q = '0';
310 }
311
312 /* Calculate sizes. */
313 len = (MSize)(buf + sizeof(buf) - q);
314 if ((int32_t)len >= (int32_t)prec) prec = len;
315 width = STRFMT_WIDTH(sf);
316 pprec = prec + (prefix >> 8);
317 need = width > pprec ? width : pprec;
318 p = lj_buf_more(sb, need);
319#ifdef LUA_USE_ASSERT
320 ps = p;
321#endif
322
323 /* Format number with leading/trailing whitespace and zeros. */
324 if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == 0)
325 while (width-- > pprec) *p++ = ' ';
326 if (prefix) {
327 if ((char)prefix >= 'X') *p++ = '0';
328 *p++ = (char)prefix;
329 }
330 if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == STRFMT_F_ZERO)
331 while (width-- > pprec) *p++ = '0';
332 while (prec-- > len) *p++ = '0';
333 while (q < buf + sizeof(buf)) *p++ = *q++; /* Add number itself. */
334 if ((sf & STRFMT_F_LEFT))
335 while (width-- > pprec) *p++ = ' ';
336
337 lua_assert(need == (MSize)(p - ps));
338 setsbufP(sb, p);
339 return sb;
340}
341
342/* Add number formatted as signed integer to buffer. */
343SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n)
344{
345 int64_t k = (int64_t)n;
346 if (checki32(k) && sf == STRFMT_INT)
347 return lj_strfmt_putint(sb, (int32_t)k); /* Shortcut for plain %d. */
348 else
349 return lj_strfmt_putfxint(sb, sf, (uint64_t)k);
350}
351
352/* Add number formatted as unsigned integer to buffer. */
353SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n)
354{
355 int64_t k;
356 if (n >= 9223372036854775808.0)
357 k = (int64_t)(n - 18446744073709551616.0);
358 else
359 k = (int64_t)n;
360 return lj_strfmt_putfxint(sb, sf, (uint64_t)k);
361}
362
363/* Max. sprintf buffer size needed. At least #string.format("%.99f", -1e308). */
364#define STRFMT_FMTNUMBUF 512
365
366/* Add formatted floating-point number to buffer. */
367SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat sf, lua_Number n)
368{
369 TValue tv;
370 tv.n = n;
371 if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) {
372 /* Canonicalize output of non-finite values. */
373 MSize width = STRFMT_WIDTH(sf), len = 3;
374 int prefix = 0, ch = (sf & STRFMT_F_UPPER) ? 0x202020 : 0;
375 char *p;
376 if (((tv.u32.hi & 0x000fffff) | tv.u32.lo) != 0) {
377 ch ^= ('n' << 16) | ('a' << 8) | 'n';
378 if ((sf & STRFMT_F_SPACE)) prefix = ' ';
379 } else {
380 ch ^= ('i' << 16) | ('n' << 8) | 'f';
381 if ((tv.u32.hi & 0x80000000)) prefix = '-';
382 else if ((sf & STRFMT_F_PLUS)) prefix = '+';
383 else if ((sf & STRFMT_F_SPACE)) prefix = ' ';
384 }
385 if (prefix) len = 4;
386 p = lj_buf_more(sb, width > len ? width : len);
387 if (!(sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' ';
388 if (prefix) *p++ = prefix;
389 *p++ = (char)(ch >> 16); *p++ = (char)(ch >> 8); *p++ = (char)ch;
390 if ((sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' ';
391 setsbufP(sb, p);
392 } else { /* Delegate to sprintf() for now. */
393 uint8_t width = (uint8_t)STRFMT_WIDTH(sf), prec = (uint8_t)STRFMT_PREC(sf);
394 char fmt[1+5+2+3+1+1], *p = fmt;
395 *p++ = '%';
396 if ((sf & STRFMT_F_LEFT)) *p++ = '-';
397 if ((sf & STRFMT_F_PLUS)) *p++ = '+';
398 if ((sf & STRFMT_F_ZERO)) *p++ = '0';
399 if ((sf & STRFMT_F_SPACE)) *p++ = ' ';
400 if ((sf & STRFMT_F_ALT)) *p++ = '#';
401 if (width) {
402 uint8_t x = width / 10, y = width % 10;
403 if (x) *p++ = '0' + x;
404 *p++ = '0' + y;
405 }
406 if (prec != 255) {
407 uint8_t x = prec / 10, y = prec % 10;
408 *p++ = '.';
409 if (x) *p++ = '0' + x;
410 *p++ = '0' + y;
411 }
412 *p++ = (0x67666561 >> (STRFMT_FP(sf)<<3)) ^ ((sf & STRFMT_F_UPPER)?0x20:0);
413 *p = '\0';
414 p = lj_buf_more(sb, STRFMT_FMTNUMBUF);
415 setsbufP(sb, p + sprintf(p, fmt, n));
416 }
417 return sb;
418}
419
420/* -- Conversions to strings ---------------------------------------------- */
421
422/* Convert integer to string. */
423GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k)
424{
425 char buf[STRFMT_MAXBUF_INT];
426 MSize len = (MSize)(lj_strfmt_wint(buf, k) - buf);
427 return lj_str_new(L, buf, len);
428}
429
430/* Convert number to string. */
431GCstr * LJ_FASTCALL lj_strfmt_num(lua_State *L, cTValue *o)
432{
433 char buf[STRFMT_MAXBUF_NUM];
434 MSize len = (MSize)(lj_strfmt_wnum(buf, o) - buf);
435 return lj_str_new(L, buf, len);
436}
437
438/* Convert integer or number to string. */
439GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o)
440{
441 return tvisint(o) ? lj_strfmt_int(L, intV(o)) : lj_strfmt_num(L, o);
442}
443
444#if LJ_HASJIT
445/* Convert char value to string. */
446GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c)
447{
448 char buf[1];
449 buf[0] = c;
450 return lj_str_new(L, buf, 1);
451}
452#endif
453
454/* Raw conversion of object to string. */
455GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o)
456{
457 if (tvisstr(o)) {
458 return strV(o);
459 } else if (tvisnumber(o)) {
460 return lj_strfmt_number(L, o);
461 } else if (tvisnil(o)) {
462 return lj_str_newlit(L, "nil");
463 } else if (tvisfalse(o)) {
464 return lj_str_newlit(L, "false");
465 } else if (tvistrue(o)) {
466 return lj_str_newlit(L, "true");
467 } else {
468 char buf[8+2+2+16], *p = buf;
469 p = lj_buf_wmem(p, lj_typename(o), (MSize)strlen(lj_typename(o)));
470 *p++ = ':'; *p++ = ' ';
471 if (tvisfunc(o) && isffunc(funcV(o))) {
472 p = lj_buf_wmem(p, "builtin#", 8);
473 p = lj_strfmt_wint(p, funcV(o)->c.ffid);
474 } else {
475 p = lj_strfmt_wptr(p, lj_obj_ptr(o));
476 }
477 return lj_str_new(L, buf, (size_t)(p - buf));
478 }
479}
480
481/* -- Internal string formatting ------------------------------------------ */
482
483/*
484** These functions are only used for lua_pushfstring(), lua_pushvfstring()
485** and for internal string formatting (e.g. error messages). Caveat: unlike
486** string.format(), only a limited subset of formats and flags are supported!
487**
488** LuaJIT has support for a couple more formats than Lua 5.1/5.2:
489** - %d %u %o %x with full formatting, 32 bit integers only.
490** - %f and other FP formats are really %.14g.
491** - %s %c %p without formatting.
492*/
493
494/* Push formatted message as a string object to Lua stack. va_list variant. */
495const char *lj_strfmt_pushvf(lua_State *L, const char *fmt, va_list argp)
496{
497 SBuf *sb = lj_buf_tmp_(L);
498 FormatState fs;
499 SFormat sf;
500 GCstr *str;
501 lj_strfmt_init(&fs, fmt, (MSize)strlen(fmt));
502 while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
503 switch (STRFMT_TYPE(sf)) {
504 case STRFMT_LIT:
505 lj_buf_putmem(sb, fs.str, fs.len);
506 break;
507 case STRFMT_INT:
508 lj_strfmt_putfxint(sb, sf, va_arg(argp, int32_t));
509 break;
510 case STRFMT_UINT:
511 lj_strfmt_putfxint(sb, sf, va_arg(argp, uint32_t));
512 break;
513 case STRFMT_NUM: {
514 TValue tv;
515 tv.n = va_arg(argp, lua_Number);
516 setsbufP(sb, lj_strfmt_wnum(lj_buf_more(sb, STRFMT_MAXBUF_NUM), &tv));
517 break;
518 }
519 case STRFMT_STR: {
520 const char *s = va_arg(argp, char *);
521 if (s == NULL) s = "(null)";
522 lj_buf_putmem(sb, s, (MSize)strlen(s));
523 break;
524 }
525 case STRFMT_CHAR:
526 lj_buf_putb(sb, va_arg(argp, int));
527 break;
528 case STRFMT_PTR:
529 lj_strfmt_putptr(sb, va_arg(argp, void *));
530 break;
531 case STRFMT_ERR:
532 default:
533 lj_buf_putb(sb, '?');
534 lua_assert(0);
535 break;
536 }
537 }
538 str = lj_buf_str(L, sb);
539 setstrV(L, L->top, str);
540 incr_top(L);
541 return strdata(str);
542}
543
544/* Push formatted message as a string object to Lua stack. Vararg variant. */
545const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...)
546{
547 const char *msg;
548 va_list argp;
549 va_start(argp, fmt);
550 msg = lj_strfmt_pushvf(L, fmt, argp);
551 va_end(argp);
552 return msg;
553}
554
diff --git a/src/lj_strfmt.h b/src/lj_strfmt.h
new file mode 100644
index 00000000..829bef2d
--- /dev/null
+++ b/src/lj_strfmt.h
@@ -0,0 +1,125 @@
1/*
2** String formatting.
3** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_STRFMT_H
7#define _LJ_STRFMT_H
8
9#include "lj_obj.h"
10
11typedef uint32_t SFormat; /* Format indicator. */
12
13/* Format parser state. */
14typedef struct FormatState {
15 const uint8_t *p; /* Current format string pointer. */
16 const uint8_t *e; /* End of format string. */
17 const char *str; /* Returned literal string. */
18 MSize len; /* Size of literal string. */
19} FormatState;
20
21/* Format types (max. 16). */
22typedef enum FormatType {
23 STRFMT_EOF, STRFMT_ERR, STRFMT_LIT,
24 STRFMT_INT, STRFMT_UINT, STRFMT_NUM, STRFMT_STR, STRFMT_CHAR, STRFMT_PTR
25} FormatType;
26
27/* Format subtypes (bits are reused). */
28#define STRFMT_T_HEX 0x0010 /* STRFMT_UINT */
29#define STRFMT_T_OCT 0x0020 /* STRFMT_UINT */
30#define STRFMT_T_FP_A 0x0000 /* STRFMT_NUM */
31#define STRFMT_T_FP_E 0x0010 /* STRFMT_NUM */
32#define STRFMT_T_FP_F 0x0020 /* STRFMT_NUM */
33#define STRFMT_T_FP_G 0x0030 /* STRFMT_NUM */
34#define STRFMT_T_QUOTED 0x0010 /* STRFMT_STR */
35
36/* Format flags. */
37#define STRFMT_F_LEFT 0x0100
38#define STRFMT_F_PLUS 0x0200
39#define STRFMT_F_ZERO 0x0400
40#define STRFMT_F_SPACE 0x0800
41#define STRFMT_F_ALT 0x1000
42#define STRFMT_F_UPPER 0x2000
43
44/* Format indicator fields. */
45#define STRFMT_SH_WIDTH 16
46#define STRFMT_SH_PREC 24
47
48#define STRFMT_TYPE(sf) ((FormatType)((sf) & 15))
49#define STRFMT_WIDTH(sf) (((sf) >> STRFMT_SH_WIDTH) & 255u)
50#define STRFMT_PREC(sf) ((((sf) >> STRFMT_SH_PREC) & 255u) - 1u)
51#define STRFMT_FP(sf) (((sf) >> 4) & 3)
52
53/* Formats for conversion characters. */
54#define STRFMT_A (STRFMT_NUM|STRFMT_T_FP_A)
55#define STRFMT_C (STRFMT_CHAR)
56#define STRFMT_D (STRFMT_INT)
57#define STRFMT_E (STRFMT_NUM|STRFMT_T_FP_E)
58#define STRFMT_F (STRFMT_NUM|STRFMT_T_FP_F)
59#define STRFMT_G (STRFMT_NUM|STRFMT_T_FP_G)
60#define STRFMT_I STRFMT_D
61#define STRFMT_O (STRFMT_UINT|STRFMT_T_OCT)
62#define STRFMT_P (STRFMT_PTR)
63#define STRFMT_Q (STRFMT_STR|STRFMT_T_QUOTED)
64#define STRFMT_S (STRFMT_STR)
65#define STRFMT_U (STRFMT_UINT)
66#define STRFMT_X (STRFMT_UINT|STRFMT_T_HEX)
67
68/* Maximum buffer sizes for conversions. */
69#define STRFMT_MAXBUF_XINT (1+22) /* '0' prefix + uint64_t in octal. */
70#define STRFMT_MAXBUF_INT (1+10) /* Sign + int32_t in decimal. */
71#define STRFMT_MAXBUF_NUM LUAI_MAXNUMBER2STR
72#define STRFMT_MAXBUF_PTR (2+2*sizeof(ptrdiff_t)) /* "0x" + hex ptr. */
73
74/* Format parser. */
75LJ_FUNC SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs);
76
77static LJ_AINLINE void lj_strfmt_init(FormatState *fs, const char *p, MSize len)
78{
79 fs->p = (const uint8_t *)p;
80 fs->e = (const uint8_t *)p + len;
81 lua_assert(*fs->e == 0); /* Must be NUL-terminated (may have NULs inside). */
82}
83
84/* Raw conversions. */
85LJ_FUNC char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k);
86LJ_FUNC char * LJ_FASTCALL lj_strfmt_wnum(char *p, cTValue *o);
87LJ_FUNC char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v);
88LJ_FUNC char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v);
89LJ_FUNC const char *lj_strfmt_wstrnum(char *buf, cTValue *o, MSize *lenp);
90
91/* Unformatted conversions to buffer. */
92LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k);
93#if LJ_HASJIT
94LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o);
95#endif
96LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v);
97LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str);
98
99/* Formatted conversions to buffer. */
100LJ_FUNC SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k);
101LJ_FUNC SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n);
102LJ_FUNC SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n);
103LJ_FUNC SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat, lua_Number n);
104LJ_FUNC SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat, int32_t c);
105LJ_FUNC SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat, GCstr *str);
106
107/* Conversions to strings. */
108LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k);
109LJ_FUNCA GCstr * LJ_FASTCALL lj_strfmt_num(lua_State *L, cTValue *o);
110LJ_FUNCA GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o);
111#if LJ_HASJIT
112LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c);
113#endif
114LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o);
115
116/* Internal string formatting. */
117LJ_FUNC const char *lj_strfmt_pushvf(lua_State *L, const char *fmt,
118 va_list argp);
119LJ_FUNC const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...)
120#ifdef __GNUC__
121 __attribute__ ((format (printf, 2, 3)))
122#endif
123 ;
124
125#endif
diff --git a/src/lj_tab.c b/src/lj_tab.c
index fc7d0f1c..ef19ba97 100644
--- a/src/lj_tab.c
+++ b/src/lj_tab.c
@@ -149,6 +149,12 @@ GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits)
149 return t; 149 return t;
150} 150}
151 151
152/* The API of this function conforms to lua_createtable(). */
153GCtab *lj_tab_new_ah(lua_State *L, int32_t a, int32_t h)
154{
155 return lj_tab_new(L, (uint32_t)(a > 0 ? a+1 : 0), hsize2hbits(h));
156}
157
152#if LJ_HASJIT 158#if LJ_HASJIT
153GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize) 159GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize)
154{ 160{
@@ -198,6 +204,17 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt)
198 return t; 204 return t;
199} 205}
200 206
207/* Clear a table. */
208void LJ_FASTCALL lj_tab_clear(GCtab *t)
209{
210 clearapart(t);
211 if (t->hmask > 0) {
212 Node *node = noderef(t->node);
213 setmref(node->freetop, &node[t->hmask+1]);
214 clearhpart(t);
215 }
216}
217
201/* Free a table. */ 218/* Free a table. */
202void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t) 219void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t)
203{ 220{
diff --git a/src/lj_tab.h b/src/lj_tab.h
index 44b1bbbd..fd7f760d 100644
--- a/src/lj_tab.h
+++ b/src/lj_tab.h
@@ -34,10 +34,12 @@ static LJ_AINLINE uint32_t hashrot(uint32_t lo, uint32_t hi)
34#define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0) 34#define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0)
35 35
36LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits); 36LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits);
37LJ_FUNC GCtab *lj_tab_new_ah(lua_State *L, int32_t a, int32_t h);
37#if LJ_HASJIT 38#if LJ_HASJIT
38LJ_FUNC GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize); 39LJ_FUNC GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize);
39#endif 40#endif
40LJ_FUNCA GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt); 41LJ_FUNCA GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt);
42LJ_FUNC void LJ_FASTCALL lj_tab_clear(GCtab *t);
41LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t); 43LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t);
42#if LJ_HASFFI 44#if LJ_HASFFI
43LJ_FUNC void lj_tab_rehash(lua_State *L, GCtab *t); 45LJ_FUNC void lj_tab_rehash(lua_State *L, GCtab *t);
@@ -53,7 +55,7 @@ LJ_FUNCA cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key);
53/* Caveat: all setters require a write barrier for the stored value. */ 55/* Caveat: all setters require a write barrier for the stored value. */
54 56
55LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key); 57LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key);
56LJ_FUNC TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key); 58LJ_FUNCA TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key);
57LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key); 59LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key);
58LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key); 60LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key);
59 61
diff --git a/src/lj_target_arm.h b/src/lj_target_arm.h
index 63a0c925..68d022b2 100644
--- a/src/lj_target_arm.h
+++ b/src/lj_target_arm.h
@@ -243,10 +243,6 @@ typedef enum ARMIns {
243 ARMI_VCVT_S32_F64 = 0xeebd0bc0, 243 ARMI_VCVT_S32_F64 = 0xeebd0bc0,
244 ARMI_VCVT_U32_F32 = 0xeebc0ac0, 244 ARMI_VCVT_U32_F32 = 0xeebc0ac0,
245 ARMI_VCVT_U32_F64 = 0xeebc0bc0, 245 ARMI_VCVT_U32_F64 = 0xeebc0bc0,
246 ARMI_VCVTR_S32_F32 = 0xeebd0a40,
247 ARMI_VCVTR_S32_F64 = 0xeebd0b40,
248 ARMI_VCVTR_U32_F32 = 0xeebc0a40,
249 ARMI_VCVTR_U32_F64 = 0xeebc0b40,
250 ARMI_VCVT_F32_S32 = 0xeeb80ac0, 246 ARMI_VCVT_F32_S32 = 0xeeb80ac0,
251 ARMI_VCVT_F64_S32 = 0xeeb80bc0, 247 ARMI_VCVT_F64_S32 = 0xeeb80bc0,
252 ARMI_VCVT_F32_U32 = 0xeeb80a40, 248 ARMI_VCVT_F32_U32 = 0xeeb80a40,
diff --git a/src/lj_target_mips.h b/src/lj_target_mips.h
index e0fc152d..099240e9 100644
--- a/src/lj_target_mips.h
+++ b/src/lj_target_mips.h
@@ -169,6 +169,9 @@ typedef enum MIPSIns {
169 MIPSI_SLTU = 0x0000002b, 169 MIPSI_SLTU = 0x0000002b,
170 MIPSI_MOVZ = 0x0000000a, 170 MIPSI_MOVZ = 0x0000000a,
171 MIPSI_MOVN = 0x0000000b, 171 MIPSI_MOVN = 0x0000000b,
172 MIPSI_MFHI = 0x00000010,
173 MIPSI_MFLO = 0x00000012,
174 MIPSI_MULT = 0x00000018,
172 175
173 MIPSI_SLL = 0x00000000, 176 MIPSI_SLL = 0x00000000,
174 MIPSI_SRL = 0x00000002, 177 MIPSI_SRL = 0x00000002,
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h
index 41f401b0..171dae4c 100644
--- a/src/lj_target_x86.h
+++ b/src/lj_target_x86.h
@@ -33,6 +33,7 @@ enum {
33 RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */ 33 RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */
34 34
35 /* Calling conventions. */ 35 /* Calling conventions. */
36 RID_SP = RID_ESP,
36 RID_RET = RID_EAX, 37 RID_RET = RID_EAX,
37#if LJ_64 38#if LJ_64
38 RID_FPRET = RID_XMM0, 39 RID_FPRET = RID_XMM0,
@@ -277,10 +278,8 @@ typedef enum {
277 XO_ROUNDSD = 0x0b3a0ffc, /* Really 66 0f 3a 0b. See asm_fpmath. */ 278 XO_ROUNDSD = 0x0b3a0ffc, /* Really 66 0f 3a 0b. See asm_fpmath. */
278 XO_UCOMISD = XO_660f(2e), 279 XO_UCOMISD = XO_660f(2e),
279 XO_CVTSI2SD = XO_f20f(2a), 280 XO_CVTSI2SD = XO_f20f(2a),
280 XO_CVTSD2SI = XO_f20f(2d),
281 XO_CVTTSD2SI= XO_f20f(2c), 281 XO_CVTTSD2SI= XO_f20f(2c),
282 XO_CVTSI2SS = XO_f30f(2a), 282 XO_CVTSI2SS = XO_f30f(2a),
283 XO_CVTSS2SI = XO_f30f(2d),
284 XO_CVTTSS2SI= XO_f30f(2c), 283 XO_CVTTSS2SI= XO_f30f(2c),
285 XO_CVTSS2SD = XO_f30f(5a), 284 XO_CVTSS2SD = XO_f30f(5a),
286 XO_CVTSD2SS = XO_f20f(5a), 285 XO_CVTSD2SS = XO_f20f(5a),
diff --git a/src/lj_trace.c b/src/lj_trace.c
index 9e5e400f..7bb6c8ae 100644
--- a/src/lj_trace.c
+++ b/src/lj_trace.c
@@ -360,7 +360,7 @@ static void trace_start(jit_State *J)
360 TraceNo traceno; 360 TraceNo traceno;
361 361
362 if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */ 362 if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */
363 if (J->parent == 0) { 363 if (J->parent == 0 && J->exitno == 0) {
364 /* Lazy bytecode patching to disable hotcount events. */ 364 /* Lazy bytecode patching to disable hotcount events. */
365 lua_assert(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL || 365 lua_assert(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL ||
366 bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF); 366 bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF);
@@ -453,6 +453,12 @@ static void trace_stop(jit_State *J)
453 root->nextside = (TraceNo1)traceno; 453 root->nextside = (TraceNo1)traceno;
454 } 454 }
455 break; 455 break;
456 case BC_CALLM:
457 case BC_CALL:
458 case BC_ITERC:
459 /* Trace stitching: patch link of previous trace. */
460 traceref(J, J->exitno)->link = traceno;
461 break;
456 default: 462 default:
457 lua_assert(0); 463 lua_assert(0);
458 break; 464 break;
@@ -467,6 +473,7 @@ static void trace_stop(jit_State *J)
467 lj_vmevent_send(L, TRACE, 473 lj_vmevent_send(L, TRACE,
468 setstrV(L, L->top++, lj_str_newlit(L, "stop")); 474 setstrV(L, L->top++, lj_str_newlit(L, "stop"));
469 setintV(L->top++, traceno); 475 setintV(L->top++, traceno);
476 setfuncV(L, L->top++, J->fn);
470 ); 477 );
471} 478}
472 479
@@ -502,8 +509,12 @@ static int trace_abort(jit_State *J)
502 return 1; /* Retry ASM with new MCode area. */ 509 return 1; /* Retry ASM with new MCode area. */
503 } 510 }
504 /* Penalize or blacklist starting bytecode instruction. */ 511 /* Penalize or blacklist starting bytecode instruction. */
505 if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) 512 if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) {
506 penalty_pc(J, &gcref(J->cur.startpt)->pt, mref(J->cur.startpc, BCIns), e); 513 if (J->exitno == 0)
514 penalty_pc(J, &gcref(J->cur.startpt)->pt, mref(J->cur.startpc, BCIns), e);
515 else
516 traceref(J, J->exitno)->link = J->exitno; /* Self-link is blacklisted. */
517 }
507 518
508 /* Is there anything to abort? */ 519 /* Is there anything to abort? */
509 traceno = J->cur.traceno; 520 traceno = J->cur.traceno;
@@ -671,6 +682,7 @@ static void trace_hotside(jit_State *J, const BCIns *pc)
671{ 682{
672 SnapShot *snap = &traceref(J, J->parent)->snap[J->exitno]; 683 SnapShot *snap = &traceref(J, J->parent)->snap[J->exitno];
673 if (!(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT)) && 684 if (!(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT)) &&
685 isluafunc(curr_func(J->L)) &&
674 snap->count != SNAPCOUNT_DONE && 686 snap->count != SNAPCOUNT_DONE &&
675 ++snap->count >= J->param[JIT_P_hotexit]) { 687 ++snap->count >= J->param[JIT_P_hotexit]) {
676 lua_assert(J->state == LJ_TRACE_IDLE); 688 lua_assert(J->state == LJ_TRACE_IDLE);
@@ -680,6 +692,20 @@ static void trace_hotside(jit_State *J, const BCIns *pc)
680 } 692 }
681} 693}
682 694
695/* Stitch a new trace to the previous trace. */
696void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc)
697{
698 /* Only start a new trace if not recording or inside __gc call or vmevent. */
699 if (J->state == LJ_TRACE_IDLE &&
700 !(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT))) {
701 J->parent = 0; /* Have to treat it like a root trace. */
702 /* J->exitno is set to the invoking trace. */
703 J->state = LJ_TRACE_START;
704 lj_trace_ins(J, pc);
705 }
706}
707
708
683/* Tiny struct to pass data to protected call. */ 709/* Tiny struct to pass data to protected call. */
684typedef struct ExitDataCP { 710typedef struct ExitDataCP {
685 jit_State *J; 711 jit_State *J;
@@ -766,17 +792,20 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
766 if (errcode) 792 if (errcode)
767 return -errcode; /* Return negated error code. */ 793 return -errcode; /* Return negated error code. */
768 794
769 lj_vmevent_send(L, TEXIT, 795 if (!(LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)))
770 lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK); 796 lj_vmevent_send(L, TEXIT,
771 setintV(L->top++, J->parent); 797 lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK);
772 setintV(L->top++, J->exitno); 798 setintV(L->top++, J->parent);
773 trace_exit_regs(L, ex); 799 setintV(L->top++, J->exitno);
774 ); 800 trace_exit_regs(L, ex);
801 );
775 802
776 pc = exd.pc; 803 pc = exd.pc;
777 cf = cframe_raw(L->cframe); 804 cf = cframe_raw(L->cframe);
778 setcframe_pc(cf, pc); 805 setcframe_pc(cf, pc);
779 if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) { 806 if (LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)) {
807 /* Just exit to interpreter. */
808 } else if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) {
780 if (!(G(L)->hookmask & HOOK_GC)) 809 if (!(G(L)->hookmask & HOOK_GC))
781 lj_gc_step(L); /* Exited because of GC: drive GC forward. */ 810 lj_gc_step(L); /* Exited because of GC: drive GC forward. */
782 } else { 811 } else {
diff --git a/src/lj_trace.h b/src/lj_trace.h
index f3109081..74c5431d 100644
--- a/src/lj_trace.h
+++ b/src/lj_trace.h
@@ -34,6 +34,7 @@ LJ_FUNC void lj_trace_freestate(global_State *g);
34/* Event handling. */ 34/* Event handling. */
35LJ_FUNC void lj_trace_ins(jit_State *J, const BCIns *pc); 35LJ_FUNC void lj_trace_ins(jit_State *J, const BCIns *pc);
36LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc); 36LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc);
37LJ_FUNCA void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc);
37LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr); 38LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr);
38 39
39/* Signal asynchronous abort of trace or end of trace. */ 40/* Signal asynchronous abort of trace or end of trace. */
diff --git a/src/lj_traceerr.h b/src/lj_traceerr.h
index 521560a1..c58fbef3 100644
--- a/src/lj_traceerr.h
+++ b/src/lj_traceerr.h
@@ -7,6 +7,7 @@
7 7
8/* Recording. */ 8/* Recording. */
9TREDEF(RECERR, "error thrown or hook called during recording") 9TREDEF(RECERR, "error thrown or hook called during recording")
10TREDEF(TRACEUV, "trace too short")
10TREDEF(TRACEOV, "trace too long") 11TREDEF(TRACEOV, "trace too long")
11TREDEF(STACKOV, "trace too deep") 12TREDEF(STACKOV, "trace too deep")
12TREDEF(SNAPOV, "too many snapshots") 13TREDEF(SNAPOV, "too many snapshots")
@@ -23,8 +24,6 @@ TREDEF(BADTYPE, "bad argument type")
23TREDEF(CJITOFF, "JIT compilation disabled for function") 24TREDEF(CJITOFF, "JIT compilation disabled for function")
24TREDEF(CUNROLL, "call unroll limit reached") 25TREDEF(CUNROLL, "call unroll limit reached")
25TREDEF(DOWNREC, "down-recursion, restarting") 26TREDEF(DOWNREC, "down-recursion, restarting")
26TREDEF(NYICF, "NYI: C function %p")
27TREDEF(NYIFF, "NYI: FastFunc %s")
28TREDEF(NYIFFU, "NYI: unsupported variant of FastFunc %s") 27TREDEF(NYIFFU, "NYI: unsupported variant of FastFunc %s")
29TREDEF(NYIRETL, "NYI: return to lower frame") 28TREDEF(NYIRETL, "NYI: return to lower frame")
30 29
diff --git a/src/lj_vm.h b/src/lj_vm.h
index 4a8f03f0..83883e2c 100644
--- a/src/lj_vm.h
+++ b/src/lj_vm.h
@@ -43,18 +43,21 @@ LJ_ASMF void lj_vm_record(void);
43LJ_ASMF void lj_vm_inshook(void); 43LJ_ASMF void lj_vm_inshook(void);
44LJ_ASMF void lj_vm_rethook(void); 44LJ_ASMF void lj_vm_rethook(void);
45LJ_ASMF void lj_vm_callhook(void); 45LJ_ASMF void lj_vm_callhook(void);
46LJ_ASMF void lj_vm_profhook(void);
46 47
47/* Trace exit handling. */ 48/* Trace exit handling. */
48LJ_ASMF void lj_vm_exit_handler(void); 49LJ_ASMF void lj_vm_exit_handler(void);
49LJ_ASMF void lj_vm_exit_interp(void); 50LJ_ASMF void lj_vm_exit_interp(void);
50 51
51/* Internal math helper functions. */ 52/* Internal math helper functions. */
52#if LJ_TARGET_X86ORX64 || LJ_TARGET_PPC 53#if LJ_TARGET_PPC
53#define lj_vm_floor floor 54#define lj_vm_floor floor
54#define lj_vm_ceil ceil 55#define lj_vm_ceil ceil
55#else 56#else
56LJ_ASMF double lj_vm_floor(double); 57LJ_ASMF double lj_vm_floor(double);
58#if !LJ_TARGET_X86ORX64
57LJ_ASMF double lj_vm_ceil(double); 59LJ_ASMF double lj_vm_ceil(double);
60#endif
58#if LJ_TARGET_ARM 61#if LJ_TARGET_ARM
59LJ_ASMF double lj_vm_floor_sf(double); 62LJ_ASMF double lj_vm_floor_sf(double);
60LJ_ASMF double lj_vm_ceil_sf(double); 63LJ_ASMF double lj_vm_ceil_sf(double);
@@ -104,6 +107,7 @@ LJ_ASMF void lj_cont_nop(void); /* Do nothing, just continue execution. */
104LJ_ASMF void lj_cont_condt(void); /* Branch if result is true. */ 107LJ_ASMF void lj_cont_condt(void); /* Branch if result is true. */
105LJ_ASMF void lj_cont_condf(void); /* Branch if result is false. */ 108LJ_ASMF void lj_cont_condf(void); /* Branch if result is false. */
106LJ_ASMF void lj_cont_hook(void); /* Continue from hook yield. */ 109LJ_ASMF void lj_cont_hook(void); /* Continue from hook yield. */
110LJ_ASMF void lj_cont_stitch(void); /* Trace stitching. */
107 111
108enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ 112enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
109 113
diff --git a/src/ljamalg.c b/src/ljamalg.c
index 52a86153..da08f7ba 100644
--- a/src/ljamalg.c
+++ b/src/ljamalg.c
@@ -33,6 +33,7 @@
33#include "lj_char.c" 33#include "lj_char.c"
34#include "lj_bc.c" 34#include "lj_bc.c"
35#include "lj_obj.c" 35#include "lj_obj.c"
36#include "lj_buf.c"
36#include "lj_str.c" 37#include "lj_str.c"
37#include "lj_tab.c" 38#include "lj_tab.c"
38#include "lj_func.c" 39#include "lj_func.c"
@@ -44,7 +45,9 @@
44#include "lj_vmevent.c" 45#include "lj_vmevent.c"
45#include "lj_vmmath.c" 46#include "lj_vmmath.c"
46#include "lj_strscan.c" 47#include "lj_strscan.c"
48#include "lj_strfmt.c"
47#include "lj_api.c" 49#include "lj_api.c"
50#include "lj_profile.c"
48#include "lj_lex.c" 51#include "lj_lex.c"
49#include "lj_parse.c" 52#include "lj_parse.c"
50#include "lj_bcread.c" 53#include "lj_bcread.c"
diff --git a/src/luaconf.h b/src/luaconf.h
index affb7da8..40fd68a6 100644
--- a/src/luaconf.h
+++ b/src/luaconf.h
@@ -37,7 +37,7 @@
37#endif 37#endif
38#define LUA_LROOT "/usr/local" 38#define LUA_LROOT "/usr/local"
39#define LUA_LUADIR "/lua/5.1/" 39#define LUA_LUADIR "/lua/5.1/"
40#define LUA_LJDIR "/luajit-2.0.3/" 40#define LUA_LJDIR "/luajit-2.1.0-alpha/"
41 41
42#ifdef LUA_ROOT 42#ifdef LUA_ROOT
43#define LUA_JROOT LUA_ROOT 43#define LUA_JROOT LUA_ROOT
diff --git a/src/luajit.c b/src/luajit.c
index e0343439..e292da80 100644
--- a/src/luajit.c
+++ b/src/luajit.c
@@ -61,8 +61,9 @@ static void laction(int i)
61 61
62static void print_usage(void) 62static void print_usage(void)
63{ 63{
64 fprintf(stderr, 64 fputs("usage: ", stderr);
65 "usage: %s [options]... [script [args]...].\n" 65 fputs(progname, stderr);
66 fputs(" [options]... [script [args]...].\n"
66 "Available options are:\n" 67 "Available options are:\n"
67 " -e chunk Execute string " LUA_QL("chunk") ".\n" 68 " -e chunk Execute string " LUA_QL("chunk") ".\n"
68 " -l name Require library " LUA_QL("name") ".\n" 69 " -l name Require library " LUA_QL("name") ".\n"
@@ -73,16 +74,14 @@ static void print_usage(void)
73 " -v Show version information.\n" 74 " -v Show version information.\n"
74 " -E Ignore environment variables.\n" 75 " -E Ignore environment variables.\n"
75 " -- Stop handling options.\n" 76 " -- Stop handling options.\n"
76 " - Execute stdin and stop handling options.\n" 77 " - Execute stdin and stop handling options.\n", stderr);
77 ,
78 progname);
79 fflush(stderr); 78 fflush(stderr);
80} 79}
81 80
82static void l_message(const char *pname, const char *msg) 81static void l_message(const char *pname, const char *msg)
83{ 82{
84 if (pname) fprintf(stderr, "%s: ", pname); 83 if (pname) { fputs(pname, stderr); fputc(':', stderr); fputc(' ', stderr); }
85 fprintf(stderr, "%s\n", msg); 84 fputs(msg, stderr); fputc('\n', stderr);
86 fflush(stderr); 85 fflush(stderr);
87} 86}
88 87
diff --git a/src/luajit.h b/src/luajit.h
index be721cf4..4e1da9a4 100644
--- a/src/luajit.h
+++ b/src/luajit.h
@@ -30,9 +30,9 @@
30 30
31#include "lua.h" 31#include "lua.h"
32 32
33#define LUAJIT_VERSION "LuaJIT 2.0.3" 33#define LUAJIT_VERSION "LuaJIT 2.1.0-alpha"
34#define LUAJIT_VERSION_NUM 20003 /* Version 2.0.3 = 02.00.03. */ 34#define LUAJIT_VERSION_NUM 20100 /* Version 2.1.0 = 02.01.00. */
35#define LUAJIT_VERSION_SYM luaJIT_version_2_0_3 35#define LUAJIT_VERSION_SYM luaJIT_version_2_1_0_alpha
36#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2014 Mike Pall" 36#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2014 Mike Pall"
37#define LUAJIT_URL "http://luajit.org/" 37#define LUAJIT_URL "http://luajit.org/"
38 38
@@ -64,6 +64,15 @@ enum {
64/* Control the JIT engine. */ 64/* Control the JIT engine. */
65LUA_API int luaJIT_setmode(lua_State *L, int idx, int mode); 65LUA_API int luaJIT_setmode(lua_State *L, int idx, int mode);
66 66
67/* Low-overhead profiling API. */
68typedef void (*luaJIT_profile_callback)(void *data, lua_State *L,
69 int samples, int vmstate);
70LUA_API void luaJIT_profile_start(lua_State *L, const char *mode,
71 luaJIT_profile_callback cb, void *data);
72LUA_API void luaJIT_profile_stop(lua_State *L);
73LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt,
74 int depth, size_t *len);
75
67/* Enforce (dynamic) linker error for version mismatches. Call from main. */ 76/* Enforce (dynamic) linker error for version mismatches. Call from main. */
68LUA_API void LUAJIT_VERSION_SYM(void); 77LUA_API void LUAJIT_VERSION_SYM(void);
69 78
diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat
index 9160e0f8..729b7049 100644
--- a/src/msvcbuild.bat
+++ b/src/msvcbuild.bat
@@ -37,6 +37,7 @@ if exist minilua.exe.manifest^
37@if errorlevel 8 goto :X64 37@if errorlevel 8 goto :X64
38@set DASMFLAGS=-D WIN -D JIT -D FFI 38@set DASMFLAGS=-D WIN -D JIT -D FFI
39@set LJARCH=x86 39@set LJARCH=x86
40@set LJCOMPILE=%LJCOMPILE% /arch:SSE2
40:X64 41:X64
41minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc 42minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc
42@if errorlevel 1 goto :BAD 43@if errorlevel 1 goto :BAD
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
index 82cba909..58efabce 100644
--- a/src/vm_arm.dasc
+++ b/src/vm_arm.dasc
@@ -99,6 +99,7 @@
99|.type NODE, Node 99|.type NODE, Node
100|.type NARGS8, int 100|.type NARGS8, int
101|.type TRACE, GCtrace 101|.type TRACE, GCtrace
102|.type SBUF, SBuf
102| 103|
103|//----------------------------------------------------------------------- 104|//-----------------------------------------------------------------------
104| 105|
@@ -418,13 +419,14 @@ static void build_subroutines(BuildCtx *ctx)
418 | add CARG2, sp, #CFRAME_RESUME 419 | add CARG2, sp, #CFRAME_RESUME
419 | ldrb CARG1, L->status 420 | ldrb CARG1, L->status
420 | str CARG3, SAVE_ERRF 421 | str CARG3, SAVE_ERRF
421 | str CARG2, L->cframe 422 | str L, SAVE_PC // Any value outside of bytecode is ok.
422 | str CARG3, SAVE_CFRAME 423 | str CARG3, SAVE_CFRAME
423 | cmp CARG1, #0 424 | cmp CARG1, #0
424 | str L, SAVE_PC // Any value outside of bytecode is ok. 425 | str CARG2, L->cframe
425 | beq >3 426 | beq >3
426 | 427 |
427 | // Resume after yield (like a return). 428 | // Resume after yield (like a return).
429 | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
428 | mov RA, BASE 430 | mov RA, BASE
429 | ldr BASE, L->base 431 | ldr BASE, L->base
430 | ldr CARG1, L->top 432 | ldr CARG1, L->top
@@ -458,14 +460,15 @@ static void build_subroutines(BuildCtx *ctx)
458 | str CARG3, SAVE_NRES 460 | str CARG3, SAVE_NRES
459 | mov L, CARG1 461 | mov L, CARG1
460 | str CARG1, SAVE_L 462 | str CARG1, SAVE_L
461 | mov BASE, CARG2
462 | str sp, L->cframe // Add our C frame to cframe chain.
463 | ldr DISPATCH, L->glref // Setup pointer to dispatch table. 463 | ldr DISPATCH, L->glref // Setup pointer to dispatch table.
464 | mov BASE, CARG2
464 | str CARG1, SAVE_PC // Any value outside of bytecode is ok. 465 | str CARG1, SAVE_PC // Any value outside of bytecode is ok.
465 | str RC, SAVE_CFRAME 466 | str RC, SAVE_CFRAME
466 | add DISPATCH, DISPATCH, #GG_G2DISP 467 | add DISPATCH, DISPATCH, #GG_G2DISP
468 | str sp, L->cframe // Add our C frame to cframe chain.
467 | 469 |
468 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). 470 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
471 | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
469 | ldr RB, L->base // RB = old base (for vmeta_call). 472 | ldr RB, L->base // RB = old base (for vmeta_call).
470 | ldr CARG1, L->top 473 | ldr CARG1, L->top
471 | mov MASKR8, #255 474 | mov MASKR8, #255
@@ -491,20 +494,21 @@ static void build_subroutines(BuildCtx *ctx)
491 | mov L, CARG1 494 | mov L, CARG1
492 | ldr RA, L:CARG1->stack 495 | ldr RA, L:CARG1->stack
493 | str CARG1, SAVE_L 496 | str CARG1, SAVE_L
497 | ldr DISPATCH, L->glref // Setup pointer to dispatch table.
494 | ldr RB, L->top 498 | ldr RB, L->top
495 | str CARG1, SAVE_PC // Any value outside of bytecode is ok. 499 | str CARG1, SAVE_PC // Any value outside of bytecode is ok.
496 | ldr RC, L->cframe 500 | ldr RC, L->cframe
501 | add DISPATCH, DISPATCH, #GG_G2DISP
497 | sub RA, RA, RB // Compute -savestack(L, L->top). 502 | sub RA, RA, RB // Compute -savestack(L, L->top).
498 | str sp, L->cframe // Add our C frame to cframe chain.
499 | mov RB, #0 503 | mov RB, #0
500 | str RA, SAVE_NRES // Neg. delta means cframe w/o frame. 504 | str RA, SAVE_NRES // Neg. delta means cframe w/o frame.
501 | str RB, SAVE_ERRF // No error function. 505 | str RB, SAVE_ERRF // No error function.
502 | str RC, SAVE_CFRAME 506 | str RC, SAVE_CFRAME
507 | str sp, L->cframe // Add our C frame to cframe chain.
508 | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
503 | blx CARG4 // (lua_State *L, lua_CFunction func, void *ud) 509 | blx CARG4 // (lua_State *L, lua_CFunction func, void *ud)
504 | ldr DISPATCH, L->glref // Setup pointer to dispatch table.
505 | movs BASE, CRET1 510 | movs BASE, CRET1
506 | mov PC, #FRAME_CP 511 | mov PC, #FRAME_CP
507 | add DISPATCH, DISPATCH, #GG_G2DISP
508 | bne <3 // Else continue with the call. 512 | bne <3 // Else continue with the call.
509 | b ->vm_leave_cp // No base? Just remove C frame. 513 | b ->vm_leave_cp // No base? Just remove C frame.
510 | 514 |
@@ -615,6 +619,16 @@ static void build_subroutines(BuildCtx *ctx)
615 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. 619 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
616 | b ->vm_call_dispatch_f 620 | b ->vm_call_dispatch_f
617 | 621 |
622 |->vmeta_tgetr:
623 | .IOS mov RC, BASE
624 | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
625 | // Returns cTValue * or NULL.
626 | .IOS mov BASE, RC
627 | cmp CRET1, #0
628 | ldrdne CARG12, [CRET1]
629 | mvneq CARG2, #~LJ_TNIL
630 | b ->BC_TGETR_Z
631 |
618 |//----------------------------------------------------------------------- 632 |//-----------------------------------------------------------------------
619 | 633 |
620 |->vmeta_tsets1: 634 |->vmeta_tsets1:
@@ -672,6 +686,15 @@ static void build_subroutines(BuildCtx *ctx)
672 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. 686 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
673 | b ->vm_call_dispatch_f 687 | b ->vm_call_dispatch_f
674 | 688 |
689 |->vmeta_tsetr:
690 | str BASE, L->base
691 | .IOS mov RC, BASE
692 | str PC, SAVE_PC
693 | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
694 | // Returns TValue *.
695 | .IOS mov BASE, RC
696 | b ->BC_TSETR_Z
697 |
675 |//-- Comparison metamethods --------------------------------------------- 698 |//-- Comparison metamethods ---------------------------------------------
676 | 699 |
677 |->vmeta_comp: 700 |->vmeta_comp:
@@ -736,6 +759,17 @@ static void build_subroutines(BuildCtx *ctx)
736 | b <3 759 | b <3
737 |.endif 760 |.endif
738 | 761 |
762 |->vmeta_istype:
763 | sub PC, PC, #4
764 | str BASE, L->base
765 | mov CARG1, L
766 | lsr CARG2, RA, #3
767 | mov CARG3, RC
768 | str PC, SAVE_PC
769 | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
770 | .IOS ldr BASE, L->base
771 | b ->cont_nop
772 |
739 |//-- Arithmetic metamethods --------------------------------------------- 773 |//-- Arithmetic metamethods ---------------------------------------------
740 | 774 |
741 |->vmeta_arith_vn: 775 |->vmeta_arith_vn:
@@ -1053,7 +1087,7 @@ static void build_subroutines(BuildCtx *ctx)
1053 | ffgccheck 1087 | ffgccheck
1054 | mov CARG1, L 1088 | mov CARG1, L
1055 | mov CARG2, BASE 1089 | mov CARG2, BASE
1056 | bl extern lj_str_fromnumber // (lua_State *L, cTValue *o) 1090 | bl extern lj_strfmt_number // (lua_State *L, cTValue *o)
1057 | // Returns GCstr *. 1091 | // Returns GCstr *.
1058 | ldr BASE, L->base 1092 | ldr BASE, L->base
1059 | mvn CARG2, #~LJ_TSTR 1093 | mvn CARG2, #~LJ_TSTR
@@ -1231,9 +1265,10 @@ static void build_subroutines(BuildCtx *ctx)
1231 | ldr CARG3, L:RA->base 1265 | ldr CARG3, L:RA->base
1232 | mv_vmstate CARG2, INTERP 1266 | mv_vmstate CARG2, INTERP
1233 | ldr CARG4, L:RA->top 1267 | ldr CARG4, L:RA->top
1234 | st_vmstate CARG2
1235 | cmp CRET1, #LUA_YIELD 1268 | cmp CRET1, #LUA_YIELD
1236 | ldr BASE, L->base 1269 | ldr BASE, L->base
1270 | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
1271 | st_vmstate CARG2
1237 | bhi >8 1272 | bhi >8
1238 | subs RC, CARG4, CARG3 1273 | subs RC, CARG4, CARG3
1239 | ldr CARG1, L->maxstack 1274 | ldr CARG1, L->maxstack
@@ -1501,19 +1536,6 @@ static void build_subroutines(BuildCtx *ctx)
1501 | math_extern2 atan2 1536 | math_extern2 atan2
1502 | math_extern2 fmod 1537 | math_extern2 fmod
1503 | 1538 |
1504 |->ff_math_deg:
1505 |.if FPU
1506 | .ffunc_d math_rad
1507 | vldr d1, CFUNC:CARG3->upvalue[0]
1508 | vmul.f64 d0, d0, d1
1509 | b ->fff_resd
1510 |.else
1511 | .ffunc_n math_rad
1512 | ldrd CARG34, CFUNC:CARG3->upvalue[0]
1513 | bl extern __aeabi_dmul
1514 | b ->fff_restv
1515 |.endif
1516 |
1517 |.if HFABI 1539 |.if HFABI
1518 | .ffunc math_ldexp 1540 | .ffunc math_ldexp
1519 | ldr CARG4, [BASE, #4] 1541 | ldr CARG4, [BASE, #4]
@@ -1688,12 +1710,6 @@ static void build_subroutines(BuildCtx *ctx)
1688 | 1710 |
1689 |//-- String library ----------------------------------------------------- 1711 |//-- String library -----------------------------------------------------
1690 | 1712 |
1691 |.ffunc_1 string_len
1692 | checkstr CARG2, ->fff_fallback
1693 | ldr CARG1, STR:CARG1->len
1694 | mvn CARG2, #~LJ_TISNUM
1695 | b ->fff_restv
1696 |
1697 |.ffunc string_byte // Only handle the 1-arg case here. 1713 |.ffunc string_byte // Only handle the 1-arg case here.
1698 | ldrd CARG12, [BASE] 1714 | ldrd CARG12, [BASE]
1699 | ldr PC, [BASE, FRAME_PC] 1715 | ldr PC, [BASE, FRAME_PC]
@@ -1726,6 +1742,7 @@ static void build_subroutines(BuildCtx *ctx)
1726 | mov CARG1, L 1742 | mov CARG1, L
1727 | str PC, SAVE_PC 1743 | str PC, SAVE_PC
1728 | bl extern lj_str_new // (lua_State *L, char *str, size_t l) 1744 | bl extern lj_str_new // (lua_State *L, char *str, size_t l)
1745 |->fff_resstr:
1729 | // Returns GCstr *. 1746 | // Returns GCstr *.
1730 | ldr BASE, L->base 1747 | ldr BASE, L->base
1731 | mvn CARG2, #~LJ_TSTR 1748 | mvn CARG2, #~LJ_TSTR
@@ -1769,91 +1786,28 @@ static void build_subroutines(BuildCtx *ctx)
1769 | mvn CARG2, #~LJ_TSTR 1786 | mvn CARG2, #~LJ_TSTR
1770 | b ->fff_restv 1787 | b ->fff_restv
1771 | 1788 |
1772 |.ffunc string_rep // Only handle the 1-char case inline. 1789 |.macro ffstring_op, name
1773 | ffgccheck 1790 | .ffunc string_ .. name
1774 | ldrd CARG12, [BASE]
1775 | ldrd CARG34, [BASE, #8]
1776 | cmp NARGS8:RC, #16
1777 | bne ->fff_fallback // Exactly 2 arguments
1778 | checktp CARG2, LJ_TSTR
1779 | checktpeq CARG4, LJ_TISNUM
1780 | bne ->fff_fallback
1781 | subs CARG4, CARG3, #1
1782 | ldr CARG2, STR:CARG1->len
1783 | blt ->fff_emptystr // Count <= 0?
1784 | cmp CARG2, #1
1785 | blo ->fff_emptystr // Zero-length string?
1786 | bne ->fff_fallback // Fallback for > 1-char strings.
1787 | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)]
1788 | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)]
1789 | ldr CARG1, STR:CARG1[1]
1790 | cmp RB, CARG3
1791 | blo ->fff_fallback
1792 |1: // Fill buffer with char.
1793 | strb CARG1, [CARG2, CARG4]
1794 | subs CARG4, CARG4, #1
1795 | bge <1
1796 | b ->fff_newstr
1797 |
1798 |.ffunc string_reverse
1799 | ffgccheck
1800 | ldrd CARG12, [BASE]
1801 | cmp NARGS8:RC, #8
1802 | blo ->fff_fallback
1803 | checkstr CARG2, ->fff_fallback
1804 | ldr CARG3, STR:CARG1->len
1805 | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)]
1806 | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)]
1807 | mov CARG4, CARG3
1808 | add CARG1, STR:CARG1, #sizeof(GCstr)
1809 | cmp RB, CARG3
1810 | blo ->fff_fallback
1811 |1: // Reverse string copy.
1812 | ldrb RB, [CARG1], #1
1813 | subs CARG4, CARG4, #1
1814 | blt ->fff_newstr
1815 | strb RB, [CARG2, CARG4]
1816 | b <1
1817 |
1818 |.macro ffstring_case, name, lo
1819 | .ffunc name
1820 | ffgccheck 1791 | ffgccheck
1821 | ldrd CARG12, [BASE] 1792 | ldr CARG3, [BASE, #4]
1822 | cmp NARGS8:RC, #8 1793 | cmp NARGS8:RC, #8
1794 | ldr STR:CARG2, [BASE]
1823 | blo ->fff_fallback 1795 | blo ->fff_fallback
1824 | checkstr CARG2, ->fff_fallback 1796 | sub SBUF:CARG1, DISPATCH, #-DISPATCH_GL(tmpbuf)
1825 | ldr CARG3, STR:CARG1->len 1797 | checkstr CARG3, ->fff_fallback
1826 | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)] 1798 | ldr CARG4, SBUF:CARG1->b
1827 | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)] 1799 | str BASE, L->base
1828 | mov CARG4, #0 1800 | str PC, SAVE_PC
1829 | add CARG1, STR:CARG1, #sizeof(GCstr) 1801 | str L, SBUF:CARG1->L
1830 | cmp RB, CARG3 1802 | str CARG4, SBUF:CARG1->p
1831 | blo ->fff_fallback 1803 | bl extern lj_buf_putstr_ .. name
1832 |1: // ASCII case conversion. 1804 | bl extern lj_buf_tostr
1833 | ldrb RB, [CARG1, CARG4] 1805 | b ->fff_resstr
1834 | cmp CARG4, CARG3
1835 | bhs ->fff_newstr
1836 | sub RC, RB, #lo
1837 | cmp RC, #26
1838 | eorlo RB, RB, #0x20
1839 | strb RB, [CARG2, CARG4]
1840 | add CARG4, CARG4, #1
1841 | b <1
1842 |.endmacro 1806 |.endmacro
1843 | 1807 |
1844 |ffstring_case string_lower, 65 1808 |ffstring_op reverse
1845 |ffstring_case string_upper, 97 1809 |ffstring_op lower
1846 | 1810 |ffstring_op upper
1847 |//-- Table library ------------------------------------------------------
1848 |
1849 |.ffunc_1 table_getn
1850 | checktab CARG2, ->fff_fallback
1851 | .IOS mov RA, BASE
1852 | bl extern lj_tab_len // (GCtab *t)
1853 | // Returns uint32_t (but less than 2^31).
1854 | .IOS mov BASE, RA
1855 | mvn CARG2, #~LJ_TISNUM
1856 | b ->fff_restv
1857 | 1811 |
1858 |//-- Bit library -------------------------------------------------------- 1812 |//-- Bit library --------------------------------------------------------
1859 | 1813 |
@@ -2128,6 +2082,69 @@ static void build_subroutines(BuildCtx *ctx)
2128 | ldr INS, [PC, #-4] 2082 | ldr INS, [PC, #-4]
2129 | bx CRET1 2083 | bx CRET1
2130 | 2084 |
2085 |->cont_stitch: // Trace stitching.
2086 |.if JIT
2087 | // RA = resultptr, CARG4 = meta base
2088 | ldr RB, SAVE_MULTRES
2089 | ldr INS, [PC, #-4]
2090 | ldr CARG3, [CARG4, #-24] // Save previous trace number.
2091 | subs RB, RB, #8
2092 | decode_RA8 RC, INS // Call base.
2093 | beq >2
2094 |1: // Move results down.
2095 | ldrd CARG12, [RA]
2096 | add RA, RA, #8
2097 | subs RB, RB, #8
2098 | strd CARG12, [BASE, RC]
2099 | add RC, RC, #8
2100 | bne <1
2101 |2:
2102 | decode_RA8 RA, INS
2103 | decode_RB8 RB, INS
2104 | add RA, RA, RB
2105 | ldr CARG1, [DISPATCH, #DISPATCH_J(trace)]
2106 |3:
2107 | cmp RA, RC
2108 | mvn CARG2, #~LJ_TNIL
2109 | bhi >9 // More results wanted?
2110 |
2111 | ldr TRACE:RA, [CARG1, CARG3, lsl #2]
2112 | cmp TRACE:RA, #0
2113 | beq ->cont_nop
2114 | ldrh RC, TRACE:RA->link
2115 | cmp RC, CARG3
2116 | beq ->cont_nop // Blacklisted.
2117 | cmp RC, #0
2118 | bne =>BC_JLOOP // Jump to stitched trace.
2119 |
2120 | // Stitch a new trace to the previous trace.
2121 | str CARG3, [DISPATCH, #DISPATCH_J(exitno)]
2122 | str L, [DISPATCH, #DISPATCH_J(L)]
2123 | str BASE, L->base
2124 | sub CARG1, DISPATCH, #-GG_DISP2J
2125 | mov CARG2, PC
2126 | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2127 | ldr BASE, L->base
2128 | b ->cont_nop
2129 |
2130 |9: // Fill up results with nil.
2131 | strd CARG12, [BASE, RC]
2132 | add RC, RC, #8
2133 | b <3
2134 |.endif
2135 |
2136 |->vm_profhook: // Dispatch target for profiler hook.
2137#if LJ_HASPROFILE
2138 | mov CARG1, L
2139 | str BASE, L->base
2140 | mov CARG2, PC
2141 | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2142 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2143 | ldr BASE, L->base
2144 | sub PC, PC, #4
2145 | b ->cont_nop
2146#endif
2147 |
2131 |//----------------------------------------------------------------------- 2148 |//-----------------------------------------------------------------------
2132 |//-- Trace exit handler ------------------------------------------------- 2149 |//-- Trace exit handler -------------------------------------------------
2133 |//----------------------------------------------------------------------- 2150 |//-----------------------------------------------------------------------
@@ -2152,14 +2169,14 @@ static void build_subroutines(BuildCtx *ctx)
2152 | add CARG1, CARG1, CARG2, asr #6 2169 | add CARG1, CARG1, CARG2, asr #6
2153 | ldr CARG2, [lr, #4] // Load exit stub group offset. 2170 | ldr CARG2, [lr, #4] // Load exit stub group offset.
2154 | sub CARG1, CARG1, lr 2171 | sub CARG1, CARG1, lr
2155 | ldr L, [DISPATCH, #DISPATCH_GL(jit_L)] 2172 | ldr L, [DISPATCH, #DISPATCH_GL(cur_L)]
2156 | add CARG1, CARG2, CARG1, lsr #2 // Compute exit number. 2173 | add CARG1, CARG2, CARG1, lsr #2 // Compute exit number.
2157 | ldr BASE, [DISPATCH, #DISPATCH_GL(jit_base)] 2174 | ldr BASE, [DISPATCH, #DISPATCH_GL(jit_base)]
2158 | str CARG1, [DISPATCH, #DISPATCH_J(exitno)] 2175 | str CARG1, [DISPATCH, #DISPATCH_J(exitno)]
2159 | mov CARG4, #0 2176 | mov CARG4, #0
2160 | str L, [DISPATCH, #DISPATCH_J(L)]
2161 | str BASE, L->base 2177 | str BASE, L->base
2162 | str CARG4, [DISPATCH, #DISPATCH_GL(jit_L)] 2178 | str L, [DISPATCH, #DISPATCH_J(L)]
2179 | str CARG4, [DISPATCH, #DISPATCH_GL(jit_base)]
2163 | sub CARG1, DISPATCH, #-GG_DISP2J 2180 | sub CARG1, DISPATCH, #-GG_DISP2J
2164 | mov CARG2, sp 2181 | mov CARG2, sp
2165 | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) 2182 | bl extern lj_trace_exit // (jit_State *J, ExitState *ex)
@@ -2178,13 +2195,14 @@ static void build_subroutines(BuildCtx *ctx)
2178 | ldr L, SAVE_L 2195 | ldr L, SAVE_L
2179 |1: 2196 |1:
2180 | cmp CARG1, #0 2197 | cmp CARG1, #0
2181 | blt >3 // Check for error from exit. 2198 | blt >9 // Check for error from exit.
2182 | lsl RC, CARG1, #3 2199 | lsl RC, CARG1, #3
2183 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] 2200 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
2184 | str RC, SAVE_MULTRES 2201 | str RC, SAVE_MULTRES
2185 | mov CARG3, #0 2202 | mov CARG3, #0
2203 | str BASE, L->base
2186 | ldr CARG2, LFUNC:CARG2->field_pc 2204 | ldr CARG2, LFUNC:CARG2->field_pc
2187 | str CARG3, [DISPATCH, #DISPATCH_GL(jit_L)] 2205 | str CARG3, [DISPATCH, #DISPATCH_GL(jit_base)]
2188 | mv_vmstate CARG4, INTERP 2206 | mv_vmstate CARG4, INTERP
2189 | ldr KBASE, [CARG2, #PC2PROTO(k)] 2207 | ldr KBASE, [CARG2, #PC2PROTO(k)]
2190 | // Modified copy of ins_next which handles function header dispatch, too. 2208 | // Modified copy of ins_next which handles function header dispatch, too.
@@ -2193,15 +2211,32 @@ static void build_subroutines(BuildCtx *ctx)
2193 | ldr INS, [PC], #4 2211 | ldr INS, [PC], #4
2194 | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. 2212 | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8.
2195 | st_vmstate CARG4 2213 | st_vmstate CARG4
2214 | cmp OP, #BC_FUNCC+2 // Fast function?
2215 | bhs >4
2216 |2:
2196 | cmp OP, #BC_FUNCF // Function header? 2217 | cmp OP, #BC_FUNCF // Function header?
2197 | ldr OP, [DISPATCH, OP, lsl #2] 2218 | ldr OP, [DISPATCH, OP, lsl #2]
2198 | decode_RA8 RA, INS 2219 | decode_RA8 RA, INS
2199 | lsrlo RC, INS, #16 // No: Decode operands A*8 and D. 2220 | lsrlo RC, INS, #16 // No: Decode operands A*8 and D.
2200 | subhs RC, RC, #8 2221 | subhs RC, RC, #8
2201 | addhs RA, RA, BASE // Yes: RA = BASE+framesize*8, RC = nargs*8 2222 | addhs RA, RA, BASE // Yes: RA = BASE+framesize*8, RC = nargs*8
2223 | ldrhs CARG3, [BASE, FRAME_FUNC]
2202 | bx OP 2224 | bx OP
2203 | 2225 |
2204 |3: // Rethrow error from the right C frame. 2226 |4: // Check frame below fast function.
2227 | ldr CARG1, [BASE, FRAME_PC]
2228 | ands CARG2, CARG1, #FRAME_TYPE
2229 | bne <2 // Trace stitching continuation?
2230 | // Otherwise set KBASE for Lua function below fast function.
2231 | ldr CARG3, [CARG1, #-4]
2232 | decode_RA8 CARG1, CARG3
2233 | sub CARG2, BASE, CARG1
2234 | ldr LFUNC:CARG3, [CARG2, #-16]
2235 | ldr CARG3, LFUNC:CARG3->field_pc
2236 | ldr KBASE, [CARG3, #PC2PROTO(k)]
2237 | b <2
2238 |
2239 |9: // Rethrow error from the right C frame.
2205 | rsb CARG2, CARG1, #0 2240 | rsb CARG2, CARG1, #0
2206 | mov CARG1, L 2241 | mov CARG1, L
2207 | bl extern lj_err_throw // (lua_State *L, int errcode) 2242 | bl extern lj_err_throw // (lua_State *L, int errcode)
@@ -2834,6 +2869,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2834 | ins_next 2869 | ins_next
2835 break; 2870 break;
2836 2871
2872 case BC_ISTYPE:
2873 | // RA = src*8, RC = -type
2874 | ldrd CARG12, [BASE, RA]
2875 | ins_next1
2876 | cmn CARG2, RC
2877 | ins_next2
2878 | bne ->vmeta_istype
2879 | ins_next3
2880 break;
2881 case BC_ISNUM:
2882 | // RA = src*8, RC = -(TISNUM-1)
2883 | ldrd CARG12, [BASE, RA]
2884 | ins_next1
2885 | checktp CARG2, LJ_TISNUM
2886 | ins_next2
2887 | bhs ->vmeta_istype
2888 | ins_next3
2889 break;
2890
2837 /* -- Unary ops --------------------------------------------------------- */ 2891 /* -- Unary ops --------------------------------------------------------- */
2838 2892
2839 case BC_MOV: 2893 case BC_MOV:
@@ -3504,6 +3558,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3504 | bne <1 // 'no __index' flag set: done. 3558 | bne <1 // 'no __index' flag set: done.
3505 | b ->vmeta_tgetb 3559 | b ->vmeta_tgetb
3506 break; 3560 break;
3561 case BC_TGETR:
3562 | decode_RB8 RB, INS
3563 | decode_RC8 RC, INS
3564 | // RA = dst*8, RB = table*8, RC = key*8
3565 | ldr TAB:CARG1, [BASE, RB]
3566 | ldr CARG2, [BASE, RC]
3567 | ldr CARG4, TAB:CARG1->array
3568 | ldr CARG3, TAB:CARG1->asize
3569 | add CARG4, CARG4, CARG2, lsl #3
3570 | cmp CARG2, CARG3 // In array part?
3571 | bhs ->vmeta_tgetr
3572 | ldrd CARG12, [CARG4]
3573 |->BC_TGETR_Z:
3574 | ins_next1
3575 | ins_next2
3576 | strd CARG12, [BASE, RA]
3577 | ins_next3
3578 break;
3507 3579
3508 case BC_TSETV: 3580 case BC_TSETV:
3509 | decode_RB8 RB, INS 3581 | decode_RB8 RB, INS
@@ -3674,6 +3746,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3674 | barrierback TAB:CARG1, INS, CARG3 3746 | barrierback TAB:CARG1, INS, CARG3
3675 | b <2 3747 | b <2
3676 break; 3748 break;
3749 case BC_TSETR:
3750 | decode_RB8 RB, INS
3751 | decode_RC8 RC, INS
3752 | // RA = dst*8, RB = table*8, RC = key*8
3753 | ldr TAB:CARG2, [BASE, RB]
3754 | ldr CARG3, [BASE, RC]
3755 | ldrb INS, TAB:CARG2->marked
3756 | ldr CARG1, TAB:CARG2->array
3757 | ldr CARG4, TAB:CARG2->asize
3758 | tst INS, #LJ_GC_BLACK // isblack(table)
3759 | add CARG1, CARG1, CARG3, lsl #3
3760 | bne >7
3761 |2:
3762 | cmp CARG3, CARG4 // In array part?
3763 | bhs ->vmeta_tsetr
3764 |->BC_TSETR_Z:
3765 | ldrd CARG34, [BASE, RA]
3766 | ins_next1
3767 | ins_next2
3768 | strd CARG34, [CARG1]
3769 | ins_next3
3770 |
3771 |7: // Possible table write barrier for the value. Skip valiswhite check.
3772 | barrierback TAB:CARG2, INS, RB
3773 | b <2
3774 break;
3677 3775
3678 case BC_TSETM: 3776 case BC_TSETM:
3679 | // RA = base*8 (table at base-1), RC = num_const (start index) 3777 | // RA = base*8 (table at base-1), RC = num_const (start index)
@@ -4271,7 +4369,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4271 | st_vmstate CARG2 4369 | st_vmstate CARG2
4272 | ldr RA, TRACE:RC->mcode 4370 | ldr RA, TRACE:RC->mcode
4273 | str BASE, [DISPATCH, #DISPATCH_GL(jit_base)] 4371 | str BASE, [DISPATCH, #DISPATCH_GL(jit_base)]
4274 | str L, [DISPATCH, #DISPATCH_GL(jit_L)] 4372 | str L, [DISPATCH, #DISPATCH_GL(tmpbuf.L)]
4275 | bx RA 4373 | bx RA
4276 |.endif 4374 |.endif
4277 break; 4375 break;
@@ -4389,6 +4487,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4389 | ldr BASE, L->base 4487 | ldr BASE, L->base
4390 | mv_vmstate CARG3, INTERP 4488 | mv_vmstate CARG3, INTERP
4391 | ldr CRET2, L->top 4489 | ldr CRET2, L->top
4490 | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
4392 | lsl RC, CRET1, #3 4491 | lsl RC, CRET1, #3
4393 | st_vmstate CARG3 4492 | st_vmstate CARG3
4394 | ldr PC, [BASE, FRAME_PC] 4493 | ldr PC, [BASE, FRAME_PC]
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
index 0ec13e60..3bf5a993 100644
--- a/src/vm_mips.dasc
+++ b/src/vm_mips.dasc
@@ -138,6 +138,7 @@
138|.type NODE, Node 138|.type NODE, Node
139|.type NARGS8, int 139|.type NARGS8, int
140|.type TRACE, GCtrace 140|.type TRACE, GCtrace
141|.type SBUF, SBuf
141| 142|
142|//----------------------------------------------------------------------- 143|//-----------------------------------------------------------------------
143| 144|
@@ -486,12 +487,13 @@ static void build_subroutines(BuildCtx *ctx)
486 | addiu DISPATCH, DISPATCH, GG_G2DISP 487 | addiu DISPATCH, DISPATCH, GG_G2DISP
487 | sw r0, SAVE_NRES 488 | sw r0, SAVE_NRES
488 | sw r0, SAVE_ERRF 489 | sw r0, SAVE_ERRF
489 | sw TMP0, L->cframe 490 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok.
490 | sw r0, SAVE_CFRAME 491 | sw r0, SAVE_CFRAME
491 | beqz TMP1, >3 492 | beqz TMP1, >3
492 |. sw CARG1, SAVE_PC // Any value outside of bytecode is ok. 493 |. sw TMP0, L->cframe
493 | 494 |
494 | // Resume after yield (like a return). 495 | // Resume after yield (like a return).
496 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
495 | move RA, BASE 497 | move RA, BASE
496 | lw BASE, L->base 498 | lw BASE, L->base
497 | lw TMP1, L->top 499 | lw TMP1, L->top
@@ -525,17 +527,18 @@ static void build_subroutines(BuildCtx *ctx)
525 | 527 |
526 |1: // Entry point for vm_pcall above (PC = ftype). 528 |1: // Entry point for vm_pcall above (PC = ftype).
527 | lw TMP1, L:CARG1->cframe 529 | lw TMP1, L:CARG1->cframe
528 | sw CARG3, SAVE_NRES
529 | move L, CARG1 530 | move L, CARG1
530 | sw CARG1, SAVE_L 531 | sw CARG3, SAVE_NRES
531 | move BASE, CARG2
532 | sw sp, L->cframe // Add our C frame to cframe chain.
533 | lw DISPATCH, L->glref // Setup pointer to dispatch table. 532 | lw DISPATCH, L->glref // Setup pointer to dispatch table.
533 | sw CARG1, SAVE_L
534 | move BASE, CARG2
535 | addiu DISPATCH, DISPATCH, GG_G2DISP
534 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. 536 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok.
535 | sw TMP1, SAVE_CFRAME 537 | sw TMP1, SAVE_CFRAME
536 | addiu DISPATCH, DISPATCH, GG_G2DISP 538 | sw sp, L->cframe // Add our C frame to cframe chain.
537 | 539 |
538 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). 540 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
541 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
539 | lw TMP2, L->base // TMP2 = old base (used in vmeta_call). 542 | lw TMP2, L->base // TMP2 = old base (used in vmeta_call).
540 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 543 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
541 | lw TMP1, L->top 544 | lw TMP1, L->top
@@ -566,20 +569,21 @@ static void build_subroutines(BuildCtx *ctx)
566 | lw TMP0, L:CARG1->stack 569 | lw TMP0, L:CARG1->stack
567 | sw CARG1, SAVE_L 570 | sw CARG1, SAVE_L
568 | lw TMP1, L->top 571 | lw TMP1, L->top
572 | lw DISPATCH, L->glref // Setup pointer to dispatch table.
569 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. 573 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok.
570 | subu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). 574 | subu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
571 | lw TMP1, L->cframe 575 | lw TMP1, L->cframe
572 | sw sp, L->cframe // Add our C frame to cframe chain. 576 | addiu DISPATCH, DISPATCH, GG_G2DISP
573 | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. 577 | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame.
574 | sw r0, SAVE_ERRF // No error function. 578 | sw r0, SAVE_ERRF // No error function.
575 | move CFUNCADDR, CARG4 579 | sw TMP1, SAVE_CFRAME
580 | sw sp, L->cframe // Add our C frame to cframe chain.
581 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
576 | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud) 582 | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud)
577 |. sw TMP1, SAVE_CFRAME 583 |. move CFUNCADDR, CARG4
578 | move BASE, CRET1 584 | move BASE, CRET1
579 | lw DISPATCH, L->glref // Setup pointer to dispatch table.
580 | li PC, FRAME_CP
581 | bnez CRET1, <3 // Else continue with the call. 585 | bnez CRET1, <3 // Else continue with the call.
582 |. addiu DISPATCH, DISPATCH, GG_G2DISP 586 |. li PC, FRAME_CP
583 | b ->vm_leave_cp // No base? Just remove C frame. 587 | b ->vm_leave_cp // No base? Just remove C frame.
584 |. nop 588 |. nop
585 | 589 |
@@ -688,6 +692,16 @@ static void build_subroutines(BuildCtx *ctx)
688 | b ->vm_call_dispatch_f 692 | b ->vm_call_dispatch_f
689 |. li NARGS8:RC, 16 // 2 args for func(t, k). 693 |. li NARGS8:RC, 16 // 2 args for func(t, k).
690 | 694 |
695 |->vmeta_tgetr:
696 | load_got lj_tab_getinth
697 | call_intern lj_tab_getinth // (GCtab *t, int32_t key)
698 |. nop
699 | // Returns cTValue * or NULL.
700 | beqz CRET1, >1
701 |. nop
702 | b ->BC_TGETR_Z
703 |. ldc1 f0, 0(CRET1)
704 |
691 |//----------------------------------------------------------------------- 705 |//-----------------------------------------------------------------------
692 | 706 |
693 |->vmeta_tsets1: 707 |->vmeta_tsets1:
@@ -740,6 +754,16 @@ static void build_subroutines(BuildCtx *ctx)
740 | b ->vm_call_dispatch_f 754 | b ->vm_call_dispatch_f
741 |. li NARGS8:RC, 24 // 3 args for func(t, k, v) 755 |. li NARGS8:RC, 24 // 3 args for func(t, k, v)
742 | 756 |
757 |->vmeta_tsetr:
758 | load_got lj_tab_setinth
759 | sw BASE, L->base
760 | sw PC, SAVE_PC
761 | call_intern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
762 |. move CARG1, L
763 | // Returns TValue *.
764 | b ->BC_TSETR_Z
765 |. nop
766 |
743 |//-- Comparison metamethods --------------------------------------------- 767 |//-- Comparison metamethods ---------------------------------------------
744 | 768 |
745 |->vmeta_comp: 769 |->vmeta_comp:
@@ -813,6 +837,18 @@ static void build_subroutines(BuildCtx *ctx)
813 |. nop 837 |. nop
814 |.endif 838 |.endif
815 | 839 |
840 |->vmeta_istype:
841 | load_got lj_meta_istype
842 | addiu PC, PC, -4
843 | sw BASE, L->base
844 | srl CARG2, RA, 3
845 | srl CARG3, RD, 3
846 | sw PC, SAVE_PC
847 | call_intern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
848 |. move CARG1, L
849 | b ->cont_nop
850 |. nop
851 |
816 |//-- Arithmetic metamethods --------------------------------------------- 852 |//-- Arithmetic metamethods ---------------------------------------------
817 | 853 |
818 |->vmeta_unm: 854 |->vmeta_unm:
@@ -1119,9 +1155,9 @@ static void build_subroutines(BuildCtx *ctx)
1119 |. sw BASE, L->base // Add frame since C call can throw. 1155 |. sw BASE, L->base // Add frame since C call can throw.
1120 | ffgccheck 1156 | ffgccheck
1121 |. sw PC, SAVE_PC // Redundant (but a defined value). 1157 |. sw PC, SAVE_PC // Redundant (but a defined value).
1122 | load_got lj_str_fromnum 1158 | load_got lj_strfmt_num
1123 | move CARG1, L 1159 | move CARG1, L
1124 | call_intern lj_str_fromnum // (lua_State *L, lua_Number *np) 1160 | call_intern lj_strfmt_num // (lua_State *L, lua_Number *np)
1125 |. move CARG2, BASE 1161 |. move CARG2, BASE
1126 | // Returns GCstr *. 1162 | // Returns GCstr *.
1127 | li CARG3, LJ_TSTR 1163 | li CARG3, LJ_TSTR
@@ -1188,7 +1224,7 @@ static void build_subroutines(BuildCtx *ctx)
1188 | mtc1 TMP0, FARG1 1224 | mtc1 TMP0, FARG1
1189 | beqz AT, ->fff_fallback 1225 | beqz AT, ->fff_fallback
1190 |. lw PC, FRAME_PC(BASE) 1226 |. lw PC, FRAME_PC(BASE)
1191 | cvt.w.d FRET1, FARG2 1227 | trunc.w.d FRET1, FARG2
1192 | cvt.d.w FARG1, FARG1 1228 | cvt.d.w FARG1, FARG1
1193 | lw TMP0, TAB:CARG1->asize 1229 | lw TMP0, TAB:CARG1->asize
1194 | lw TMP1, TAB:CARG1->array 1230 | lw TMP1, TAB:CARG1->array
@@ -1331,6 +1367,7 @@ static void build_subroutines(BuildCtx *ctx)
1331 | lw TMP3, L:RA->top 1367 | lw TMP3, L:RA->top
1332 | li_vmstate INTERP 1368 | li_vmstate INTERP
1333 | lw BASE, L->base 1369 | lw BASE, L->base
1370 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
1334 | st_vmstate 1371 | st_vmstate
1335 | beqz AT, >8 1372 | beqz AT, >8
1336 |. subu RD, TMP3, TMP2 1373 |. subu RD, TMP3, TMP2
@@ -1521,14 +1558,8 @@ static void build_subroutines(BuildCtx *ctx)
1521 | b ->fff_resn 1558 | b ->fff_resn
1522 |. nop 1559 |. nop
1523 | 1560 |
1524 |->ff_math_deg:
1525 |.ffunc_n math_rad
1526 |. ldc1 FARG2, CFUNC:RB->upvalue[0]
1527 | b ->fff_resn
1528 |. mul.d FRET1, FARG1, FARG2
1529 |
1530 |.ffunc_nn math_ldexp 1561 |.ffunc_nn math_ldexp
1531 | cvt.w.d FARG2, FARG2 1562 | trunc.w.d FARG2, FARG2
1532 | load_got ldexp 1563 | load_got ldexp
1533 | mfc1 CARG3, FARG2 1564 | mfc1 CARG3, FARG2
1534 | call_extern 1565 | call_extern
@@ -1592,13 +1623,6 @@ static void build_subroutines(BuildCtx *ctx)
1592 | 1623 |
1593 |//-- String library ----------------------------------------------------- 1624 |//-- String library -----------------------------------------------------
1594 | 1625 |
1595 |.ffunc_1 string_len
1596 | li AT, LJ_TSTR
1597 | bne CARG3, AT, ->fff_fallback
1598 |. nop
1599 | b ->fff_resi
1600 |. lw CRET1, STR:CARG1->len
1601 |
1602 |.ffunc string_byte // Only handle the 1-arg case here. 1626 |.ffunc string_byte // Only handle the 1-arg case here.
1603 | lw CARG3, HI(BASE) 1627 | lw CARG3, HI(BASE)
1604 | lw STR:CARG1, LO(BASE) 1628 | lw STR:CARG1, LO(BASE)
@@ -1628,7 +1652,7 @@ static void build_subroutines(BuildCtx *ctx)
1628 |. sltiu AT, CARG3, LJ_TISNUM 1652 |. sltiu AT, CARG3, LJ_TISNUM
1629 | beqz AT, ->fff_fallback 1653 | beqz AT, ->fff_fallback
1630 |. li CARG3, 1 1654 |. li CARG3, 1
1631 | cvt.w.d FARG1, FARG1 1655 | trunc.w.d FARG1, FARG1
1632 | addiu CARG2, sp, ARG5_OFS 1656 | addiu CARG2, sp, ARG5_OFS
1633 | sltiu AT, TMP0, 256 1657 | sltiu AT, TMP0, 256
1634 | mfc1 TMP0, FARG1 1658 | mfc1 TMP0, FARG1
@@ -1642,6 +1666,7 @@ static void build_subroutines(BuildCtx *ctx)
1642 |. move CARG1, L 1666 |. move CARG1, L
1643 | // Returns GCstr *. 1667 | // Returns GCstr *.
1644 | lw BASE, L->base 1668 | lw BASE, L->base
1669 |->fff_resstr:
1645 | move CARG1, CRET1 1670 | move CARG1, CRET1
1646 | b ->fff_restv 1671 | b ->fff_restv
1647 |. li CARG3, LJ_TSTR 1672 |. li CARG3, LJ_TSTR
@@ -1658,7 +1683,7 @@ static void build_subroutines(BuildCtx *ctx)
1658 | ldc1 f2, 8(BASE) 1683 | ldc1 f2, 8(BASE)
1659 | beqz AT, >1 1684 | beqz AT, >1
1660 |. li CARG4, -1 1685 |. li CARG4, -1
1661 | cvt.w.d f0, f0 1686 | trunc.w.d f0, f0
1662 | sltiu AT, CARG3, LJ_TISNUM 1687 | sltiu AT, CARG3, LJ_TISNUM
1663 | beqz AT, ->fff_fallback 1688 | beqz AT, ->fff_fallback
1664 |. mfc1 CARG4, f0 1689 |. mfc1 CARG4, f0
@@ -1666,7 +1691,7 @@ static void build_subroutines(BuildCtx *ctx)
1666 | sltiu AT, CARG2, LJ_TISNUM 1691 | sltiu AT, CARG2, LJ_TISNUM
1667 | beqz AT, ->fff_fallback 1692 | beqz AT, ->fff_fallback
1668 |. li AT, LJ_TSTR 1693 |. li AT, LJ_TSTR
1669 | cvt.w.d f2, f2 1694 | trunc.w.d f2, f2
1670 | bne TMP0, AT, ->fff_fallback 1695 | bne TMP0, AT, ->fff_fallback
1671 |. lw CARG2, STR:CARG1->len 1696 |. lw CARG2, STR:CARG1->len
1672 | mfc1 CARG3, f2 1697 | mfc1 CARG3, f2
@@ -1695,108 +1720,32 @@ static void build_subroutines(BuildCtx *ctx)
1695 | b ->fff_restv 1720 | b ->fff_restv
1696 |. li CARG3, LJ_TSTR 1721 |. li CARG3, LJ_TSTR
1697 | 1722 |
1698 |.ffunc string_rep // Only handle the 1-char case inline. 1723 |.macro ffstring_op, name
1699 | ffgccheck 1724 | .ffunc string_ .. name
1700 | lw TMP0, HI(BASE)
1701 | addiu AT, NARGS8:RC, -16 // Exactly 2 arguments.
1702 | lw CARG4, 8+HI(BASE)
1703 | lw STR:CARG1, LO(BASE)
1704 | addiu TMP0, TMP0, -LJ_TSTR
1705 | ldc1 f0, 8(BASE)
1706 | or AT, AT, TMP0
1707 | bnez AT, ->fff_fallback
1708 |. sltiu AT, CARG4, LJ_TISNUM
1709 | cvt.w.d f0, f0
1710 | beqz AT, ->fff_fallback
1711 |. lw TMP0, STR:CARG1->len
1712 | mfc1 CARG3, f0
1713 | lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
1714 | li AT, 1
1715 | blez CARG3, ->fff_emptystr // Count <= 0?
1716 |. sltu AT, AT, TMP0
1717 | beqz TMP0, ->fff_emptystr // Zero length string?
1718 |. sltu TMP0, TMP1, CARG3
1719 | or AT, AT, TMP0
1720 | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
1721 | bnez AT, ->fff_fallback // Fallback for > 1-char strings.
1722 |. lbu TMP0, STR:CARG1[1]
1723 | addu TMP2, CARG2, CARG3
1724 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
1725 | addiu TMP2, TMP2, -1
1726 | sltu AT, CARG2, TMP2
1727 | bnez AT, <1
1728 |. sb TMP0, 0(TMP2)
1729 | b ->fff_newstr
1730 |. nop
1731 |
1732 |.ffunc string_reverse
1733 | ffgccheck
1734 | lw CARG3, HI(BASE)
1735 | lw STR:CARG1, LO(BASE)
1736 | beqz NARGS8:RC, ->fff_fallback
1737 |. li AT, LJ_TSTR
1738 | bne CARG3, AT, ->fff_fallback
1739 |. lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
1740 | lw CARG3, STR:CARG1->len
1741 | addiu CARG1, STR:CARG1, #STR
1742 | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
1743 | sltu AT, TMP1, CARG3
1744 | bnez AT, ->fff_fallback
1745 |. addu TMP3, CARG1, CARG3
1746 | addu CARG4, CARG2, CARG3
1747 |1: // Reverse string copy.
1748 | lbu TMP1, 0(CARG1)
1749 | sltu AT, CARG1, TMP3
1750 | beqz AT, ->fff_newstr
1751 |. addiu CARG1, CARG1, 1
1752 | addiu CARG4, CARG4, -1
1753 | b <1
1754 | sb TMP1, 0(CARG4)
1755 |
1756 |.macro ffstring_case, name, lo
1757 | .ffunc name
1758 | ffgccheck 1725 | ffgccheck
1759 | lw CARG3, HI(BASE) 1726 | lw CARG3, HI(BASE)
1760 | lw STR:CARG1, LO(BASE) 1727 | lw STR:CARG2, LO(BASE)
1761 | beqz NARGS8:RC, ->fff_fallback 1728 | beqz NARGS8:RC, ->fff_fallback
1762 |. li AT, LJ_TSTR 1729 |. li AT, LJ_TSTR
1763 | bne CARG3, AT, ->fff_fallback 1730 | bne CARG3, AT, ->fff_fallback
1764 |. lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) 1731 |. addiu SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf)
1765 | lw CARG3, STR:CARG1->len 1732 | load_got lj_buf_putstr_ .. name
1766 | addiu CARG1, STR:CARG1, #STR 1733 | lw TMP0, SBUF:CARG1->b
1767 | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) 1734 | sw L, SBUF:CARG1->L
1768 | sltu AT, TMP1, CARG3 1735 | sw BASE, L->base
1769 | bnez AT, ->fff_fallback 1736 | sw TMP0, SBUF:CARG1->p
1770 |. addu TMP3, CARG1, CARG3 1737 | call_intern extern lj_buf_putstr_ .. name
1771 | move CARG4, CARG2 1738 |. sw PC, SAVE_PC
1772 |1: // ASCII case conversion. 1739 | load_got lj_buf_tostr
1773 | lbu TMP1, 0(CARG1) 1740 | call_intern lj_buf_tostr
1774 | sltu AT, CARG1, TMP3 1741 |. move SBUF:CARG1, SBUF:CRET1
1775 | beqz AT, ->fff_newstr 1742 | b ->fff_resstr
1776 |. addiu TMP0, TMP1, -lo 1743 |. lw BASE, L->base
1777 | xori TMP2, TMP1, 0x20
1778 | sltiu AT, TMP0, 26
1779 | movn TMP1, TMP2, AT
1780 | addiu CARG1, CARG1, 1
1781 | sb TMP1, 0(CARG4)
1782 | b <1
1783 |. addiu CARG4, CARG4, 1
1784 |.endmacro 1744 |.endmacro
1785 | 1745 |
1786 |ffstring_case string_lower, 65 1746 |ffstring_op reverse
1787 |ffstring_case string_upper, 97 1747 |ffstring_op lower
1788 | 1748 |ffstring_op upper
1789 |//-- Table library ------------------------------------------------------
1790 |
1791 |.ffunc_1 table_getn
1792 | li AT, LJ_TTAB
1793 | bne CARG3, AT, ->fff_fallback
1794 |. load_got lj_tab_len
1795 | call_intern lj_tab_len // (GCtab *t)
1796 |. nop
1797 | // Returns uint32_t (but less than 2^31).
1798 | b ->fff_resi
1799 |. nop
1800 | 1749 |
1801 |//-- Bit library -------------------------------------------------------- 1750 |//-- Bit library --------------------------------------------------------
1802 | 1751 |
@@ -2062,6 +2011,76 @@ static void build_subroutines(BuildCtx *ctx)
2062 | jr CRET1 2011 | jr CRET1
2063 |. lw INS, -4(PC) 2012 |. lw INS, -4(PC)
2064 | 2013 |
2014 |->cont_stitch: // Trace stitching.
2015 |.if JIT
2016 | // RA = resultptr, RB = meta base
2017 | lw INS, -4(PC)
2018 | lw TMP3, -24+LO(RB) // Save previous trace number.
2019 | decode_RA8a RC, INS
2020 | addiu AT, MULTRES, -8
2021 | decode_RA8b RC
2022 | beqz AT, >2
2023 |. addu RC, BASE, RC // Call base.
2024 |1: // Move results down.
2025 | ldc1 f0, 0(RA)
2026 | addiu AT, AT, -8
2027 | addiu RA, RA, 8
2028 | sdc1 f0, 0(RC)
2029 | bnez AT, <1
2030 |. addiu RC, RC, 8
2031 |2:
2032 | decode_RA8a RA, INS
2033 | decode_RB8a RB, INS
2034 | decode_RA8b RA
2035 | decode_RB8b RB
2036 | addu RA, RA, RB
2037 | lw TMP1, DISPATCH_J(trace)(DISPATCH)
2038 | addu RA, BASE, RA
2039 |3:
2040 | sltu AT, RC, RA
2041 | bnez AT, >9 // More results wanted?
2042 |. sll TMP2, TMP3, 2
2043 |
2044 | addu TMP2, TMP1, TMP2
2045 | lw TRACE:TMP2, 0(TMP2)
2046 | beqz TRACE:TMP2, ->cont_nop
2047 |. nop
2048 | lhu RD, TRACE:TMP2->link
2049 | beq RD, TMP3, ->cont_nop // Blacklisted.
2050 |. load_got lj_dispatch_stitch
2051 | bnez RD, =>BC_JLOOP // Jump to stitched trace.
2052 |. sll RD, RD, 3
2053 |
2054 | // Stitch a new trace to the previous trace.
2055 | sw TMP3, DISPATCH_J(exitno)(DISPATCH)
2056 | sw L, DISPATCH_J(L)(DISPATCH)
2057 | sw BASE, L->base
2058 | addiu CARG1, DISPATCH, GG_DISP2J
2059 | call_intern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2060 |. move CARG2, PC
2061 | b ->cont_nop
2062 |. lw BASE, L->base
2063 |
2064 |9:
2065 | sw TISNIL, HI(RC)
2066 | b <3
2067 |. addiu RC, RC, 8
2068 |.endif
2069 |
2070 |->vm_profhook: // Dispatch target for profiler hook.
2071#if LJ_HASPROFILE
2072 | load_got lj_dispatch_profile
2073 | sw MULTRES, SAVE_MULTRES
2074 | move CARG2, PC
2075 | sw BASE, L->base
2076 | call_intern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2077 |. move CARG1, L
2078 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2079 | addiu PC, PC, -4
2080 | b ->cont_nop
2081 |. lw BASE, L->base
2082#endif
2083 |
2065 |//----------------------------------------------------------------------- 2084 |//-----------------------------------------------------------------------
2066 |//-- Trace exit handler ------------------------------------------------- 2085 |//-- Trace exit handler -------------------------------------------------
2067 |//----------------------------------------------------------------------- 2086 |//-----------------------------------------------------------------------
@@ -2100,14 +2119,15 @@ static void build_subroutines(BuildCtx *ctx)
2100 | lw TMP1, 0(TMP2) // Load exit number. 2119 | lw TMP1, 0(TMP2) // Load exit number.
2101 | st_vmstate 2120 | st_vmstate
2102 | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP. 2121 | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP.
2103 | lw L, DISPATCH_GL(jit_L)(DISPATCH) 2122 | lw L, DISPATCH_GL(cur_L)(DISPATCH)
2104 | lw BASE, DISPATCH_GL(jit_base)(DISPATCH) 2123 | lw BASE, DISPATCH_GL(jit_base)(DISPATCH)
2105 | load_got lj_trace_exit 2124 | load_got lj_trace_exit
2106 | sw L, DISPATCH_J(L)(DISPATCH) 2125 | sw L, DISPATCH_J(L)(DISPATCH)
2107 | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number. 2126 | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number.
2127 | sw BASE, L->base
2108 | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number. 2128 | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number.
2109 | addiu CARG1, DISPATCH, GG_DISP2J 2129 | addiu CARG1, DISPATCH, GG_DISP2J
2110 | sw BASE, L->base 2130 | sw r0, DISPATCH_GL(jit_base)(DISPATCH)
2111 | call_intern lj_trace_exit // (jit_State *J, ExitState *ex) 2131 | call_intern lj_trace_exit // (jit_State *J, ExitState *ex)
2112 |. addiu CARG2, sp, 16 2132 |. addiu CARG2, sp, 16
2113 | // Returns MULTRES (unscaled) or negated error code. 2133 | // Returns MULTRES (unscaled) or negated error code.
@@ -2123,17 +2143,18 @@ static void build_subroutines(BuildCtx *ctx)
2123 |.if JIT 2143 |.if JIT
2124 | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set. 2144 | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set.
2125 | lw L, SAVE_L 2145 | lw L, SAVE_L
2126 | addiu DISPATCH, JGL, -GG_DISP2G-32768 2146 | addiu DISPATCH, JGL, -GG_DISP2G-32768
2147 | sw BASE, L->base
2127 |1: 2148 |1:
2128 | bltz CRET1, >3 // Check for error from exit. 2149 | bltz CRET1, >9 // Check for error from exit.
2129 |. lw LFUNC:TMP1, FRAME_FUNC(BASE) 2150 |. lw LFUNC:RB, FRAME_FUNC(BASE)
2130 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 2151 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2131 | sll MULTRES, CRET1, 3 2152 | sll MULTRES, CRET1, 3
2132 | li TISNIL, LJ_TNIL 2153 | li TISNIL, LJ_TNIL
2133 | sw MULTRES, SAVE_MULTRES 2154 | sw MULTRES, SAVE_MULTRES
2134 | mtc1 TMP3, TOBIT 2155 | mtc1 TMP3, TOBIT
2135 | lw TMP1, LFUNC:TMP1->pc 2156 | lw TMP1, LFUNC:RB->pc
2136 | sw r0, DISPATCH_GL(jit_L)(DISPATCH) 2157 | sw r0, DISPATCH_GL(jit_base)(DISPATCH)
2137 | lw KBASE, PC2PROTO(k)(TMP1) 2158 | lw KBASE, PC2PROTO(k)(TMP1)
2138 | cvt.d.s TOBIT, TOBIT 2159 | cvt.d.s TOBIT, TOBIT
2139 | // Modified copy of ins_next which handles function header dispatch, too. 2160 | // Modified copy of ins_next which handles function header dispatch, too.
@@ -2153,11 +2174,27 @@ static void build_subroutines(BuildCtx *ctx)
2153 | jr AT 2174 | jr AT
2154 |. decode_RD8b RD 2175 |. decode_RD8b RD
2155 |2: 2176 |2:
2177 | sltiu TMP2, TMP1, (BC_FUNCC+2)*4 // Fast function?
2178 | bnez TMP2, >3
2179 |. lw TMP1, FRAME_PC(BASE)
2180 | // Check frame below fast function.
2181 | andi TMP0, TMP1, FRAME_TYPE
2182 | bnez TMP0, >3 // Trace stitching continuation?
2183 |. nop
2184 | // Otherwise set KBASE for Lua function below fast function.
2185 | lw TMP2, -4(TMP1)
2186 | decode_RA8a TMP0, TMP2
2187 | decode_RA8b TMP0
2188 | subu TMP1, BASE, TMP0
2189 | lw LFUNC:TMP2, -8+FRAME_FUNC(TMP1)
2190 | lw TMP1, LFUNC:TMP2->pc
2191 | lw KBASE, PC2PROTO(k)(TMP1)
2192 |3:
2156 | addiu RC, MULTRES, -8 2193 | addiu RC, MULTRES, -8
2157 | jr AT 2194 | jr AT
2158 |. addu RA, RA, BASE 2195 |. addu RA, RA, BASE
2159 | 2196 |
2160 |3: // Rethrow error from the right C frame. 2197 |9: // Rethrow error from the right C frame.
2161 | load_got lj_err_throw 2198 | load_got lj_err_throw
2162 | negu CARG2, CRET1 2199 | negu CARG2, CRET1
2163 | call_intern lj_err_throw // (lua_State *L, int errcode) 2200 | call_intern lj_err_throw // (lua_State *L, int errcode)
@@ -2572,6 +2609,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2572 | ins_next 2609 | ins_next
2573 break; 2610 break;
2574 2611
2612 case BC_ISTYPE:
2613 | // RA = src*8, RD = -type*8
2614 | addu TMP2, BASE, RA
2615 | srl TMP1, RD, 3
2616 | lw TMP0, HI(TMP2)
2617 | ins_next1
2618 | addu AT, TMP0, TMP1
2619 | bnez AT, ->vmeta_istype
2620 |. ins_next2
2621 break;
2622 case BC_ISNUM:
2623 | // RA = src*8, RD = -(TISNUM-1)*8
2624 | addu TMP2, BASE, RA
2625 | lw TMP0, HI(TMP2)
2626 | ins_next1
2627 | sltiu AT, TMP0, LJ_TISNUM
2628 | beqz AT, ->vmeta_istype
2629 |. ins_next2
2630 break;
2631
2575 /* -- Unary ops --------------------------------------------------------- */ 2632 /* -- Unary ops --------------------------------------------------------- */
2576 2633
2577 case BC_MOV: 2634 case BC_MOV:
@@ -3210,6 +3267,30 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3210 | b ->vmeta_tgetb // Caveat: preserve TMP0! 3267 | b ->vmeta_tgetb // Caveat: preserve TMP0!
3211 |. nop 3268 |. nop
3212 break; 3269 break;
3270 case BC_TGETR:
3271 | // RA = dst*8, RB = table*8, RC = key*8
3272 | decode_RB8a RB, INS
3273 | decode_RB8b RB
3274 | decode_RDtoRC8 RC, RD
3275 | addu CARG2, BASE, RB
3276 | addu CARG3, BASE, RC
3277 | lw TAB:CARG1, LO(CARG2)
3278 | ldc1 f0, 0(CARG3)
3279 | trunc.w.d f2, f0
3280 | lw TMP0, TAB:CARG1->asize
3281 | mfc1 CARG2, f2
3282 | lw TMP1, TAB:CARG1->array
3283 | sltu AT, CARG2, TMP0
3284 | sll TMP2, CARG2, 3
3285 | beqz AT, ->vmeta_tgetr // In array part?
3286 |. addu TMP2, TMP1, TMP2
3287 | ldc1 f0, 0(TMP2)
3288 |->BC_TGETR_Z:
3289 | addu RA, BASE, RA
3290 | ins_next1
3291 | sdc1 f0, 0(RA)
3292 | ins_next2
3293 break;
3213 3294
3214 case BC_TSETV: 3295 case BC_TSETV:
3215 | // RA = src*8, RB = table*8, RC = key*8 3296 | // RA = src*8, RB = table*8, RC = key*8
@@ -3398,6 +3479,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3398 |7: // Possible table write barrier for the value. Skip valiswhite check. 3479 |7: // Possible table write barrier for the value. Skip valiswhite check.
3399 | barrierback TAB:RB, TMP3, TMP0, <2 3480 | barrierback TAB:RB, TMP3, TMP0, <2
3400 break; 3481 break;
3482 case BC_TSETR:
3483 | // RA = dst*8, RB = table*8, RC = key*8
3484 | decode_RB8a RB, INS
3485 | decode_RB8b RB
3486 | decode_RDtoRC8 RC, RD
3487 | addu CARG1, BASE, RB
3488 | addu CARG3, BASE, RC
3489 | lw TAB:CARG2, LO(CARG1)
3490 | ldc1 f0, 0(CARG3)
3491 | trunc.w.d f2, f0
3492 | lbu TMP3, TAB:CARG2->marked
3493 | lw TMP0, TAB:CARG2->asize
3494 | mfc1 CARG3, f2
3495 | lw TMP1, TAB:CARG2->array
3496 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
3497 | bnez AT, >7
3498 |. addu RA, BASE, RA
3499 |2:
3500 | sltu AT, CARG3, TMP0
3501 | sll TMP2, CARG3, 3
3502 | beqz AT, ->vmeta_tsetr // In array part?
3503 |. ldc1 f20, 0(RA)
3504 | addu CRET1, TMP1, TMP2
3505 |->BC_TSETR_Z:
3506 | ins_next1
3507 | sdc1 f20, 0(CRET1)
3508 | ins_next2
3509 |
3510 |7: // Possible table write barrier for the value. Skip valiswhite check.
3511 | barrierback TAB:RB, TMP3, TMP0, <2
3512 break;
3513
3401 3514
3402 case BC_TSETM: 3515 case BC_TSETM:
3403 | // RA = base*8 (table at base-1), RD = num_const*8 (start index) 3516 | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
@@ -3957,8 +4070,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3957 | sw AT, DISPATCH_GL(vmstate)(DISPATCH) 4070 | sw AT, DISPATCH_GL(vmstate)(DISPATCH)
3958 | lw TRACE:TMP2, 0(TMP1) 4071 | lw TRACE:TMP2, 0(TMP1)
3959 | sw BASE, DISPATCH_GL(jit_base)(DISPATCH) 4072 | sw BASE, DISPATCH_GL(jit_base)(DISPATCH)
3960 | sw L, DISPATCH_GL(jit_L)(DISPATCH)
3961 | lw TMP2, TRACE:TMP2->mcode 4073 | lw TMP2, TRACE:TMP2->mcode
4074 | sw L, DISPATCH_GL(tmpbuf.L)(DISPATCH)
3962 | jr TMP2 4075 | jr TMP2
3963 |. addiu JGL, DISPATCH, GG_DISP2G+32768 4076 |. addiu JGL, DISPATCH, GG_DISP2G+32768
3964 |.endif 4077 |.endif
@@ -4084,6 +4197,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4084 | li_vmstate INTERP 4197 | li_vmstate INTERP
4085 | lw PC, FRAME_PC(BASE) // Fetch PC of caller. 4198 | lw PC, FRAME_PC(BASE) // Fetch PC of caller.
4086 | subu RA, TMP1, RD // RA = L->top - nresults*8 4199 | subu RA, TMP1, RD // RA = L->top - nresults*8
4200 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
4087 | b ->vm_returnc 4201 | b ->vm_returnc
4088 |. st_vmstate 4202 |. st_vmstate
4089 break; 4203 break;
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
index da75f556..4eef1eab 100644
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -293,6 +293,7 @@
293|.type NODE, Node 293|.type NODE, Node
294|.type NARGS8, int 294|.type NARGS8, int
295|.type TRACE, GCtrace 295|.type TRACE, GCtrace
296|.type SBUF, SBuf
296| 297|
297|//----------------------------------------------------------------------- 298|//-----------------------------------------------------------------------
298| 299|
@@ -661,12 +662,13 @@ static void build_subroutines(BuildCtx *ctx)
661 | stw CARG3, SAVE_NRES 662 | stw CARG3, SAVE_NRES
662 | cmplwi TMP1, 0 663 | cmplwi TMP1, 0
663 | stw CARG3, SAVE_ERRF 664 | stw CARG3, SAVE_ERRF
664 | stp TMP0, L->cframe
665 | stp CARG3, SAVE_CFRAME 665 | stp CARG3, SAVE_CFRAME
666 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 666 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
667 | stp TMP0, L->cframe
667 | beq >3 668 | beq >3
668 | 669 |
669 | // Resume after yield (like a return). 670 | // Resume after yield (like a return).
671 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
670 | mr RA, BASE 672 | mr RA, BASE
671 | lp BASE, L->base 673 | lp BASE, L->base
672 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 674 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
@@ -706,17 +708,18 @@ static void build_subroutines(BuildCtx *ctx)
706 | 708 |
707 |1: // Entry point for vm_pcall above (PC = ftype). 709 |1: // Entry point for vm_pcall above (PC = ftype).
708 | lp TMP1, L:CARG1->cframe 710 | lp TMP1, L:CARG1->cframe
709 | stw CARG3, SAVE_NRES
710 | mr L, CARG1 711 | mr L, CARG1
711 | stw CARG1, SAVE_L 712 | stw CARG3, SAVE_NRES
712 | mr BASE, CARG2
713 | stp sp, L->cframe // Add our C frame to cframe chain.
714 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 713 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
714 | stw CARG1, SAVE_L
715 | mr BASE, CARG2
716 | addi DISPATCH, DISPATCH, GG_G2DISP
715 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 717 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
716 | stp TMP1, SAVE_CFRAME 718 | stp TMP1, SAVE_CFRAME
717 | addi DISPATCH, DISPATCH, GG_G2DISP 719 | stp sp, L->cframe // Add our C frame to cframe chain.
718 | 720 |
719 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). 721 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
722 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
720 | lp TMP2, L->base // TMP2 = old base (used in vmeta_call). 723 | lp TMP2, L->base // TMP2 = old base (used in vmeta_call).
721 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 724 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
722 | lp TMP1, L->top 725 | lp TMP1, L->top
@@ -753,15 +756,18 @@ static void build_subroutines(BuildCtx *ctx)
753 | lwz TMP0, L:CARG1->stack 756 | lwz TMP0, L:CARG1->stack
754 | stw CARG1, SAVE_L 757 | stw CARG1, SAVE_L
755 | lp TMP1, L->top 758 | lp TMP1, L->top
759 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
756 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 760 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
757 | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). 761 | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
758 | lp TMP1, L->cframe 762 | lp TMP1, L->cframe
759 | stp sp, L->cframe // Add our C frame to cframe chain. 763 | addi DISPATCH, DISPATCH, GG_G2DISP
760 | .toc lp CARG4, 0(CARG4) 764 | .toc lp CARG4, 0(CARG4)
761 | li TMP2, 0 765 | li TMP2, 0
762 | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. 766 | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame.
763 | stw TMP2, SAVE_ERRF // No error function. 767 | stw TMP2, SAVE_ERRF // No error function.
764 | stp TMP1, SAVE_CFRAME 768 | stp TMP1, SAVE_CFRAME
769 | stp sp, L->cframe // Add our C frame to cframe chain.
770 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
765 | mtctr CARG4 771 | mtctr CARG4
766 | bctrl // (lua_State *L, lua_CFunction func, void *ud) 772 | bctrl // (lua_State *L, lua_CFunction func, void *ud)
767 |.if PPE 773 |.if PPE
@@ -770,9 +776,7 @@ static void build_subroutines(BuildCtx *ctx)
770 |.else 776 |.else
771 | mr. BASE, CRET1 777 | mr. BASE, CRET1
772 |.endif 778 |.endif
773 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 779 | li PC, FRAME_CP
774 | li PC, FRAME_CP
775 | addi DISPATCH, DISPATCH, GG_G2DISP
776 | bne <3 // Else continue with the call. 780 | bne <3 // Else continue with the call.
777 | b ->vm_leave_cp // No base? Just remove C frame. 781 | b ->vm_leave_cp // No base? Just remove C frame.
778 | 782 |
@@ -895,6 +899,17 @@ static void build_subroutines(BuildCtx *ctx)
895 | li NARGS8:RC, 16 // 2 args for func(t, k). 899 | li NARGS8:RC, 16 // 2 args for func(t, k).
896 | b ->vm_call_dispatch_f 900 | b ->vm_call_dispatch_f
897 | 901 |
902 |->vmeta_tgetr:
903 | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
904 | // Returns cTValue * or NULL.
905 | cmplwi CRET1, 0
906 | beq >1
907 | lfd f14, 0(CRET1)
908 | b ->BC_TGETR_Z
909 |1:
910 | stwx TISNIL, BASE, RA
911 | b ->cont_nop
912 |
898 |//----------------------------------------------------------------------- 913 |//-----------------------------------------------------------------------
899 | 914 |
900 |->vmeta_tsets1: 915 |->vmeta_tsets1:
@@ -962,6 +977,14 @@ static void build_subroutines(BuildCtx *ctx)
962 | stfd f0, 16(BASE) // Copy value to third argument. 977 | stfd f0, 16(BASE) // Copy value to third argument.
963 | b ->vm_call_dispatch_f 978 | b ->vm_call_dispatch_f
964 | 979 |
980 |->vmeta_tsetr:
981 | stp BASE, L->base
982 | stw PC, SAVE_PC
983 | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
984 | // Returns TValue *.
985 | stfd f14, 0(CRET1)
986 | b ->cont_nop
987 |
965 |//-- Comparison metamethods --------------------------------------------- 988 |//-- Comparison metamethods ---------------------------------------------
966 | 989 |
967 |->vmeta_comp: 990 |->vmeta_comp:
@@ -1040,6 +1063,16 @@ static void build_subroutines(BuildCtx *ctx)
1040 | b <3 1063 | b <3
1041 |.endif 1064 |.endif
1042 | 1065 |
1066 |->vmeta_istype:
1067 | subi PC, PC, 4
1068 | stp BASE, L->base
1069 | srwi CARG2, RA, 3
1070 | mr CARG1, L
1071 | srwi CARG3, RD, 3
1072 | stw PC, SAVE_PC
1073 | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
1074 | b ->cont_nop
1075 |
1043 |//-- Arithmetic metamethods --------------------------------------------- 1076 |//-- Arithmetic metamethods ---------------------------------------------
1044 | 1077 |
1045 |->vmeta_arith_nv: 1078 |->vmeta_arith_nv:
@@ -1364,9 +1397,9 @@ static void build_subroutines(BuildCtx *ctx)
1364 | mr CARG1, L 1397 | mr CARG1, L
1365 | mr CARG2, BASE 1398 | mr CARG2, BASE
1366 |.if DUALNUM 1399 |.if DUALNUM
1367 | bl extern lj_str_fromnumber // (lua_State *L, cTValue *o) 1400 | bl extern lj_strfmt_number // (lua_State *L, cTValue *o)
1368 |.else 1401 |.else
1369 | bl extern lj_str_fromnum // (lua_State *L, lua_Number *np) 1402 | bl extern lj_strfmt_num // (lua_State *L, lua_Number *np)
1370 |.endif 1403 |.endif
1371 | // Returns GCstr *. 1404 | // Returns GCstr *.
1372 | li CARG3, LJ_TSTR 1405 | li CARG3, LJ_TSTR
@@ -1599,6 +1632,7 @@ static void build_subroutines(BuildCtx *ctx)
1599 | lp TMP3, L:SAVE0->top 1632 | lp TMP3, L:SAVE0->top
1600 | li_vmstate INTERP 1633 | li_vmstate INTERP
1601 | lp BASE, L->base 1634 | lp BASE, L->base
1635 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
1602 | st_vmstate 1636 | st_vmstate
1603 | bgt >8 1637 | bgt >8
1604 | sub RD, TMP3, TMP2 1638 | sub RD, TMP3, TMP2
@@ -1870,12 +1904,6 @@ static void build_subroutines(BuildCtx *ctx)
1870 | math_extern2 atan2 1904 | math_extern2 atan2
1871 | math_extern2 fmod 1905 | math_extern2 fmod
1872 | 1906 |
1873 |->ff_math_deg:
1874 |.ffunc_n math_rad
1875 | lfd FARG2, CFUNC:RB->upvalue[0]
1876 | fmul FARG1, FARG1, FARG2
1877 | b ->fff_resn
1878 |
1879 |.if DUALNUM 1907 |.if DUALNUM
1880 |.ffunc math_ldexp 1908 |.ffunc math_ldexp
1881 | cmplwi NARGS8:RC, 16 1909 | cmplwi NARGS8:RC, 16
@@ -2021,11 +2049,6 @@ static void build_subroutines(BuildCtx *ctx)
2021 | 2049 |
2022 |//-- String library ----------------------------------------------------- 2050 |//-- String library -----------------------------------------------------
2023 | 2051 |
2024 |.ffunc_1 string_len
2025 | checkstr CARG3; bne ->fff_fallback
2026 | lwz CRET1, STR:CARG1->len
2027 | b ->fff_resi
2028 |
2029 |.ffunc string_byte // Only handle the 1-arg case here. 2052 |.ffunc string_byte // Only handle the 1-arg case here.
2030 | cmplwi NARGS8:RC, 8 2053 | cmplwi NARGS8:RC, 8
2031 | lwz CARG3, 0(BASE) 2054 | lwz CARG3, 0(BASE)
@@ -2080,6 +2103,7 @@ static void build_subroutines(BuildCtx *ctx)
2080 | stp BASE, L->base 2103 | stp BASE, L->base
2081 | stw PC, SAVE_PC 2104 | stw PC, SAVE_PC
2082 | bl extern lj_str_new // (lua_State *L, char *str, size_t l) 2105 | bl extern lj_str_new // (lua_State *L, char *str, size_t l)
2106 |->fff_resstr:
2083 | // Returns GCstr *. 2107 | // Returns GCstr *.
2084 | lp BASE, L->base 2108 | lp BASE, L->base
2085 | li CARG3, LJ_TSTR 2109 | li CARG3, LJ_TSTR
@@ -2157,114 +2181,29 @@ static void build_subroutines(BuildCtx *ctx)
2157 | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0) 2181 | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0)
2158 | b <3 2182 | b <3
2159 | 2183 |
2160 |.ffunc string_rep // Only handle the 1-char case inline. 2184 |.macro ffstring_op, name
2161 | ffgccheck 2185 | .ffunc string_ .. name
2162 | cmplwi NARGS8:RC, 16
2163 | lwz TMP0, 0(BASE)
2164 | lwz STR:CARG1, 4(BASE)
2165 | lwz CARG4, 8(BASE)
2166 |.if DUALNUM
2167 | lwz CARG3, 12(BASE)
2168 |.else
2169 | lfd FARG2, 8(BASE)
2170 |.endif
2171 | bne ->fff_fallback // Exactly 2 arguments.
2172 | checkstr TMP0; bne ->fff_fallback
2173 |.if DUALNUM
2174 | checknum CARG4; bne ->fff_fallback
2175 |.else
2176 | checknum CARG4; bge ->fff_fallback
2177 | toint CARG3, FARG2
2178 |.endif
2179 | lwz TMP0, STR:CARG1->len
2180 | cmpwi CARG3, 0
2181 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
2182 | ble >2 // Count <= 0? (or non-int)
2183 | cmplwi TMP0, 1
2184 | subi TMP2, CARG3, 1
2185 | blt >2 // Zero length string?
2186 | cmplw cr1, TMP1, CARG3
2187 | bne ->fff_fallback // Fallback for > 1-char strings.
2188 | lbz TMP0, STR:CARG1[1]
2189 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
2190 | blt cr1, ->fff_fallback
2191 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
2192 | cmplwi TMP2, 0
2193 | stbx TMP0, CARG2, TMP2
2194 | subi TMP2, TMP2, 1
2195 | bne <1
2196 | b ->fff_newstr
2197 |2: // Return empty string.
2198 | la STR:CARG1, DISPATCH_GL(strempty)(DISPATCH)
2199 | li CARG3, LJ_TSTR
2200 | b ->fff_restv
2201 |
2202 |.ffunc string_reverse
2203 | ffgccheck 2186 | ffgccheck
2204 | cmplwi NARGS8:RC, 8 2187 | cmplwi NARGS8:RC, 8
2205 | lwz CARG3, 0(BASE) 2188 | lwz CARG3, 0(BASE)
2206 | lwz STR:CARG1, 4(BASE) 2189 | lwz STR:CARG2, 4(BASE)
2207 | blt ->fff_fallback 2190 | blt ->fff_fallback
2208 | checkstr CARG3 2191 | checkstr CARG3
2209 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) 2192 | la SBUF:CARG1, DISPATCH_GL(tmpbuf)(DISPATCH)
2210 | bne ->fff_fallback 2193 | bne ->fff_fallback
2211 | lwz CARG3, STR:CARG1->len 2194 | lwz TMP0, SBUF:CARG1->b
2212 | la CARG1, #STR(STR:CARG1) 2195 | stw L, SBUF:CARG1->L
2213 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) 2196 | stp BASE, L->base
2214 | li TMP2, 0 2197 | stw PC, SAVE_PC
2215 | cmplw TMP1, CARG3 2198 | stw TMP0, SBUF:CARG1->p
2216 | subi TMP3, CARG3, 1 2199 | bl extern lj_buf_putstr_ .. name
2217 | blt ->fff_fallback 2200 | bl extern lj_buf_tostr
2218 |1: // Reverse string copy. 2201 | b ->fff_resstr
2219 | cmpwi TMP3, 0
2220 | lbzx TMP1, CARG1, TMP2
2221 | blty ->fff_newstr
2222 | stbx TMP1, CARG2, TMP3
2223 | subi TMP3, TMP3, 1
2224 | addi TMP2, TMP2, 1
2225 | b <1
2226 |
2227 |.macro ffstring_case, name, lo
2228 | .ffunc name
2229 | ffgccheck
2230 | cmplwi NARGS8:RC, 8
2231 | lwz CARG3, 0(BASE)
2232 | lwz STR:CARG1, 4(BASE)
2233 | blt ->fff_fallback
2234 | checkstr CARG3
2235 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
2236 | bne ->fff_fallback
2237 | lwz CARG3, STR:CARG1->len
2238 | la CARG1, #STR(STR:CARG1)
2239 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
2240 | cmplw TMP1, CARG3
2241 | li TMP2, 0
2242 | blt ->fff_fallback
2243 |1: // ASCII case conversion.
2244 | cmplw TMP2, CARG3
2245 | lbzx TMP1, CARG1, TMP2
2246 | bgey ->fff_newstr
2247 | subi TMP0, TMP1, lo
2248 | xori TMP3, TMP1, 0x20
2249 | addic TMP0, TMP0, -26
2250 | subfe TMP3, TMP3, TMP3
2251 | rlwinm TMP3, TMP3, 0, 26, 26 // x &= 0x20.
2252 | xor TMP1, TMP1, TMP3
2253 | stbx TMP1, CARG2, TMP2
2254 | addi TMP2, TMP2, 1
2255 | b <1
2256 |.endmacro 2202 |.endmacro
2257 | 2203 |
2258 |ffstring_case string_lower, 65 2204 |ffstring_op reverse
2259 |ffstring_case string_upper, 97 2205 |ffstring_op lower
2260 | 2206 |ffstring_op upper
2261 |//-- Table library ------------------------------------------------------
2262 |
2263 |.ffunc_1 table_getn
2264 | checktab CARG3; bne ->fff_fallback
2265 | bl extern lj_tab_len // (GCtab *t)
2266 | // Returns uint32_t (but less than 2^31).
2267 | b ->fff_resi
2268 | 2207 |
2269 |//-- Bit library -------------------------------------------------------- 2208 |//-- Bit library --------------------------------------------------------
2270 | 2209 |
@@ -2566,6 +2505,70 @@ static void build_subroutines(BuildCtx *ctx)
2566 | mtctr CRET1 2505 | mtctr CRET1
2567 | bctr 2506 | bctr
2568 | 2507 |
2508 |->cont_stitch: // Trace stitching.
2509 |.if JIT
2510 | // RA = resultptr, RB = meta base
2511 | lwz INS, -4(PC)
2512 | lwz TMP3, -20(RB) // Save previous trace number.
2513 | addic. TMP1, MULTRES, -8
2514 | decode_RA8 RC, INS // Call base.
2515 | beq >2
2516 |1: // Move results down.
2517 | lfd f0, 0(RA)
2518 | addic. TMP1, TMP1, -8
2519 | addi RA, RA, 8
2520 | stfdx f0, BASE, RC
2521 | addi RC, RC, 8
2522 | bne <1
2523 |2:
2524 | decode_RA8 RA, INS
2525 | decode_RB8 RB, INS
2526 | add RA, RA, RB
2527 | lwz TMP1, DISPATCH_J(trace)(DISPATCH)
2528 |3:
2529 | cmplw RA, RC
2530 | bgt >9 // More results wanted?
2531 |
2532 | slwi TMP2, TMP3, 2
2533 | lwzx TRACE:TMP2, TMP1, TMP2
2534 | cmpwi TRACE:TMP2, 0
2535 | beq ->cont_nop
2536 | lhz RD, TRACE:TMP2->link
2537 | cmpw RD, TMP3
2538 | cmpwi cr1, RD, 0
2539 | beq ->cont_nop // Blacklisted.
2540 | slwi RD, RD, 3
2541 | bne cr1, =>BC_JLOOP // Jump to stitched trace.
2542 |
2543 | // Stitch a new trace to the previous trace.
2544 | stw TMP3, DISPATCH_J(exitno)(DISPATCH)
2545 | stp L, DISPATCH_J(L)(DISPATCH)
2546 | stp BASE, L->base
2547 | addi CARG1, DISPATCH, GG_DISP2J
2548 | mr CARG2, PC
2549 | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2550 | lp BASE, L->base
2551 | b ->cont_nop
2552 |
2553 |9:
2554 | stwx TISNIL, BASE, RC
2555 | addi RC, RC, 8
2556 | b <3
2557 |.endif
2558 |
2559 |->vm_profhook: // Dispatch target for profiler hook.
2560#if LJ_HASPROFILE
2561 | mr CARG1, L
2562 | stw MULTRES, SAVE_MULTRES
2563 | mr CARG2, PC
2564 | stp BASE, L->base
2565 | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2566 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2567 | lp BASE, L->base
2568 | subi PC, PC, 4
2569 | b ->cont_nop
2570#endif
2571 |
2569 |//----------------------------------------------------------------------- 2572 |//-----------------------------------------------------------------------
2570 |//-- Trace exit handler ------------------------------------------------- 2573 |//-- Trace exit handler -------------------------------------------------
2571 |//----------------------------------------------------------------------- 2574 |//-----------------------------------------------------------------------
@@ -2600,16 +2603,16 @@ static void build_subroutines(BuildCtx *ctx)
2600 | savex_ 20,21,22,23 2603 | savex_ 20,21,22,23
2601 | lhz CARG4, 2(CARG3) // Load trace number. 2604 | lhz CARG4, 2(CARG3) // Load trace number.
2602 | savex_ 24,25,26,27 2605 | savex_ 24,25,26,27
2603 | lwz L, DISPATCH_GL(jit_L)(DISPATCH) 2606 | lwz L, DISPATCH_GL(cur_L)(DISPATCH)
2604 | savex_ 28,29,30,31 2607 | savex_ 28,29,30,31
2605 | sub CARG3, TMP0, CARG3 // Compute exit number. 2608 | sub CARG3, TMP0, CARG3 // Compute exit number.
2606 | lp BASE, DISPATCH_GL(jit_base)(DISPATCH) 2609 | lp BASE, DISPATCH_GL(jit_base)(DISPATCH)
2607 | srwi CARG3, CARG3, 2 2610 | srwi CARG3, CARG3, 2
2608 | stw L, DISPATCH_J(L)(DISPATCH) 2611 | stp L, DISPATCH_J(L)(DISPATCH)
2609 | subi CARG3, CARG3, 2 2612 | subi CARG3, CARG3, 2
2610 | stw TMP1, DISPATCH_GL(jit_L)(DISPATCH)
2611 | stw CARG4, DISPATCH_J(parent)(DISPATCH)
2612 | stp BASE, L->base 2613 | stp BASE, L->base
2614 | stw CARG4, DISPATCH_J(parent)(DISPATCH)
2615 | stw TMP1, DISPATCH_GL(jit_base)(DISPATCH)
2613 | addi CARG1, DISPATCH, GG_DISP2J 2616 | addi CARG1, DISPATCH, GG_DISP2J
2614 | stw CARG3, DISPATCH_J(exitno)(DISPATCH) 2617 | stw CARG3, DISPATCH_J(exitno)(DISPATCH)
2615 | addi CARG2, sp, 16 2618 | addi CARG2, sp, 16
@@ -2633,15 +2636,16 @@ static void build_subroutines(BuildCtx *ctx)
2633 | // CARG1 = MULTRES or negated error code, BASE, PC and JGL set. 2636 | // CARG1 = MULTRES or negated error code, BASE, PC and JGL set.
2634 | lwz L, SAVE_L 2637 | lwz L, SAVE_L
2635 | addi DISPATCH, JGL, -GG_DISP2G-32768 2638 | addi DISPATCH, JGL, -GG_DISP2G-32768
2639 | stp BASE, L->base
2636 |1: 2640 |1:
2637 | cmpwi CARG1, 0 2641 | cmpwi CARG1, 0
2638 | blt >3 // Check for error from exit. 2642 | blt >9 // Check for error from exit.
2639 | lwz LFUNC:TMP1, FRAME_FUNC(BASE) 2643 | lwz LFUNC:RB, FRAME_FUNC(BASE)
2640 | slwi MULTRES, CARG1, 3 2644 | slwi MULTRES, CARG1, 3
2641 | li TMP2, 0 2645 | li TMP2, 0
2642 | stw MULTRES, SAVE_MULTRES 2646 | stw MULTRES, SAVE_MULTRES
2643 | lwz TMP1, LFUNC:TMP1->pc 2647 | lwz TMP1, LFUNC:RB->pc
2644 | stw TMP2, DISPATCH_GL(jit_L)(DISPATCH) 2648 | stw TMP2, DISPATCH_GL(jit_base)(DISPATCH)
2645 | lwz KBASE, PC2PROTO(k)(TMP1) 2649 | lwz KBASE, PC2PROTO(k)(TMP1)
2646 | // Setup type comparison constants. 2650 | // Setup type comparison constants.
2647 | li TISNUM, LJ_TISNUM 2651 | li TISNUM, LJ_TISNUM
@@ -2671,11 +2675,25 @@ static void build_subroutines(BuildCtx *ctx)
2671 | decode_RC8 RC, INS 2675 | decode_RC8 RC, INS
2672 | bctr 2676 | bctr
2673 |2: 2677 |2:
2678 | cmplwi TMP1, (BC_FUNCC+2)*4 // Fast function?
2679 | blt >3
2680 | // Check frame below fast function.
2681 | lwz TMP1, FRAME_PC(BASE)
2682 | andix. TMP0, TMP1, FRAME_TYPE
2683 | bney >3 // Trace stitching continuation?
2684 | // Otherwise set KBASE for Lua function below fast function.
2685 | lwz TMP2, -4(TMP1)
2686 | decode_RA8 TMP0, TMP2
2687 | sub TMP1, BASE, TMP0
2688 | lwz LFUNC:TMP2, -12(TMP1)
2689 | lwz TMP1, LFUNC:TMP2->pc
2690 | lwz KBASE, PC2PROTO(k)(TMP1)
2691 |3:
2674 | subi RC, MULTRES, 8 2692 | subi RC, MULTRES, 8
2675 | add RA, RA, BASE 2693 | add RA, RA, BASE
2676 | bctr 2694 | bctr
2677 | 2695 |
2678 |3: // Rethrow error from the right C frame. 2696 |9: // Rethrow error from the right C frame.
2679 | neg CARG2, CARG1 2697 | neg CARG2, CARG1
2680 | mr CARG1, L 2698 | mr CARG1, L
2681 | bl extern lj_err_throw // (lua_State *L, int errcode) 2699 | bl extern lj_err_throw // (lua_State *L, int errcode)
@@ -3265,6 +3283,29 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3265 | ins_next 3283 | ins_next
3266 break; 3284 break;
3267 3285
3286 case BC_ISTYPE:
3287 | // RA = src*8, RD = -type*8
3288 | lwzx TMP0, BASE, RA
3289 | srwi TMP1, RD, 3
3290 | ins_next1
3291 |.if not PPE and not GPR64
3292 | add. TMP0, TMP0, TMP1
3293 |.else
3294 | neg TMP1, TMP1
3295 | cmpw TMP0, TMP1
3296 |.endif
3297 | bne ->vmeta_istype
3298 | ins_next2
3299 break;
3300 case BC_ISNUM:
3301 | // RA = src*8, RD = -(TISNUM-1)*8
3302 | lwzx TMP0, BASE, RA
3303 | ins_next1
3304 | checknum TMP0
3305 | bge ->vmeta_istype
3306 | ins_next2
3307 break;
3308
3268 /* -- Unary ops --------------------------------------------------------- */ 3309 /* -- Unary ops --------------------------------------------------------- */
3269 3310
3270 case BC_MOV: 3311 case BC_MOV:
@@ -4016,6 +4057,30 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4016 | bne <1 // 'no __index' flag set: done. 4057 | bne <1 // 'no __index' flag set: done.
4017 | b ->vmeta_tgetb // Caveat: preserve TMP0! 4058 | b ->vmeta_tgetb // Caveat: preserve TMP0!
4018 break; 4059 break;
4060 case BC_TGETR:
4061 | // RA = dst*8, RB = table*8, RC = key*8
4062 | add RB, BASE, RB
4063 | lwz TAB:CARG1, 4(RB)
4064 |.if DUALNUM
4065 | add RC, BASE, RC
4066 | lwz TMP0, TAB:CARG1->asize
4067 | lwz CARG2, 4(RC)
4068 | lwz TMP1, TAB:CARG1->array
4069 |.else
4070 | lfdx f0, BASE, RC
4071 | lwz TMP0, TAB:CARG1->asize
4072 | toint CARG2, f0
4073 | lwz TMP1, TAB:CARG1->array
4074 |.endif
4075 | cmplw TMP0, CARG2
4076 | slwi TMP2, CARG2, 3
4077 | ble ->vmeta_tgetr // In array part?
4078 | lfdx f14, TMP1, TMP2
4079 |->BC_TGETR_Z:
4080 | ins_next1
4081 | stfdx f14, BASE, RA
4082 | ins_next2
4083 break;
4019 4084
4020 case BC_TSETV: 4085 case BC_TSETV:
4021 | // RA = src*8, RB = table*8, RC = key*8 4086 | // RA = src*8, RB = table*8, RC = key*8
@@ -4195,6 +4260,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4195 | barrierback TAB:RB, TMP3, TMP0 4260 | barrierback TAB:RB, TMP3, TMP0
4196 | b <2 4261 | b <2
4197 break; 4262 break;
4263 case BC_TSETR:
4264 | // RA = dst*8, RB = table*8, RC = key*8
4265 | add RB, BASE, RB
4266 | lwz TAB:CARG2, 4(RB)
4267 |.if DUALNUM
4268 | add RC, BASE, RC
4269 | lbz TMP3, TAB:RB->marked
4270 | lwz TMP0, TAB:CARG2->asize
4271 | lwz CARG3, 4(RC)
4272 | lwz TMP1, TAB:CARG2->array
4273 |.else
4274 | lfdx f0, BASE, RC
4275 | lbz TMP3, TAB:RB->marked
4276 | lwz TMP0, TAB:CARG2->asize
4277 | toint CARG3, f0
4278 | lwz TMP1, TAB:CARG2->array
4279 |.endif
4280 | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table)
4281 | bne >7
4282 |2:
4283 | cmplw TMP0, CARG3
4284 | slwi TMP2, CARG3, 3
4285 | lfdx f14, BASE, RA
4286 | ble ->vmeta_tsetr // In array part?
4287 | ins_next1
4288 | stfdx f14, TMP1, TMP2
4289 | ins_next2
4290 |
4291 |7: // Possible table write barrier for the value. Skip valiswhite check.
4292 | barrierback TAB:CARG2, TMP3, TMP2
4293 | b <2
4294 break;
4295
4198 4296
4199 case BC_TSETM: 4297 case BC_TSETM:
4200 | // RA = base*8 (table at base-1), RD = num_const*8 (start index) 4298 | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
@@ -4836,8 +4934,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4836 | lp TMP2, TRACE:TMP2->mcode 4934 | lp TMP2, TRACE:TMP2->mcode
4837 | stw BASE, DISPATCH_GL(jit_base)(DISPATCH) 4935 | stw BASE, DISPATCH_GL(jit_base)(DISPATCH)
4838 | mtctr TMP2 4936 | mtctr TMP2
4839 | stw L, DISPATCH_GL(jit_L)(DISPATCH)
4840 | addi JGL, DISPATCH, GG_DISP2G+32768 4937 | addi JGL, DISPATCH, GG_DISP2G+32768
4938 | stw L, DISPATCH_GL(tmpbuf.L)(DISPATCH)
4841 | bctr 4939 | bctr
4842 |.endif 4940 |.endif
4843 break; 4941 break;
@@ -4972,6 +5070,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4972 | lp TMP1, L->top 5070 | lp TMP1, L->top
4973 | li_vmstate INTERP 5071 | li_vmstate INTERP
4974 | lwz PC, FRAME_PC(BASE) // Fetch PC of caller. 5072 | lwz PC, FRAME_PC(BASE) // Fetch PC of caller.
5073 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
4975 | sub RA, TMP1, RD // RA = L->top - nresults*8 5074 | sub RA, TMP1, RD // RA = L->top - nresults*8
4976 | st_vmstate 5075 | st_vmstate
4977 | b ->vm_returnc 5076 | b ->vm_returnc
diff --git a/src/vm_ppcspe.dasc b/src/vm_ppcspe.dasc
index adcec244..ee394031 100644
--- a/src/vm_ppcspe.dasc
+++ b/src/vm_ppcspe.dasc
@@ -1456,12 +1456,6 @@ static void build_subroutines(BuildCtx *ctx)
1456 | math_extern2 atan2 1456 | math_extern2 atan2
1457 | math_extern2 fmod 1457 | math_extern2 fmod
1458 | 1458 |
1459 |->ff_math_deg:
1460 |.ffunc_n math_rad
1461 | evldd CARG2, CFUNC:RB->upvalue[0]
1462 | efdmul CRET1, CARG1, CARG2
1463 | b ->fff_restv
1464 |
1465 |.ffunc math_ldexp 1459 |.ffunc math_ldexp
1466 | cmplwi NARGS8:RC, 16 1460 | cmplwi NARGS8:RC, 16
1467 | evldd CARG2, 0(BASE) 1461 | evldd CARG2, 0(BASE)
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index bd7056f8..a0c7cc60 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -18,7 +18,6 @@
18| 18|
19|.if P64 19|.if P64
20|.define X64, 1 20|.define X64, 1
21|.define SSE, 1
22|.if WIN 21|.if WIN
23|.define X64WIN, 1 22|.define X64WIN, 1
24|.endif 23|.endif
@@ -116,6 +115,7 @@
116|.type NODE, Node 115|.type NODE, Node
117|.type NARGS, int 116|.type NARGS, int
118|.type TRACE, GCtrace 117|.type TRACE, GCtrace
118|.type SBUF, SBuf
119| 119|
120|// Stack layout while in interpreter. Must match with lj_frame.h. 120|// Stack layout while in interpreter. Must match with lj_frame.h.
121|//----------------------------------------------------------------------- 121|//-----------------------------------------------------------------------
@@ -630,17 +630,18 @@ static void build_subroutines(BuildCtx *ctx)
630 | lea KBASEa, [esp+CFRAME_RESUME] 630 | lea KBASEa, [esp+CFRAME_RESUME]
631 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 631 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
632 | add DISPATCH, GG_G2DISP 632 | add DISPATCH, GG_G2DISP
633 | mov L:RB->cframe, KBASEa
634 | mov SAVE_PC, RD // Any value outside of bytecode is ok. 633 | mov SAVE_PC, RD // Any value outside of bytecode is ok.
635 | mov SAVE_CFRAME, RDa 634 | mov SAVE_CFRAME, RDa
636 |.if X64 635 |.if X64
637 | mov SAVE_NRES, RD 636 | mov SAVE_NRES, RD
638 | mov SAVE_ERRF, RD 637 | mov SAVE_ERRF, RD
639 |.endif 638 |.endif
639 | mov L:RB->cframe, KBASEa
640 | cmp byte L:RB->status, RDL 640 | cmp byte L:RB->status, RDL
641 | je >3 // Initial resume (like a call). 641 | je >2 // Initial resume (like a call).
642 | 642 |
643 | // Resume after yield (like a return). 643 | // Resume after yield (like a return).
644 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
644 | set_vmstate INTERP 645 | set_vmstate INTERP
645 | mov byte L:RB->status, RDL 646 | mov byte L:RB->status, RDL
646 | mov BASE, L:RB->base 647 | mov BASE, L:RB->base
@@ -680,20 +681,19 @@ static void build_subroutines(BuildCtx *ctx)
680 | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! 681 | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME!
681 |.endif 682 |.endif
682 | 683 |
684 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
683 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. 685 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
684 | mov SAVE_CFRAME, KBASEa 686 | mov SAVE_CFRAME, KBASEa
685 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. 687 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
688 | add DISPATCH, GG_G2DISP
686 |.if X64 689 |.if X64
687 | mov L:RB->cframe, rsp 690 | mov L:RB->cframe, rsp
688 |.else 691 |.else
689 | mov L:RB->cframe, esp 692 | mov L:RB->cframe, esp
690 |.endif 693 |.endif
691 | 694 |
692 |2: // Entry point for vm_cpcall below (RA = base, RB = L, PC = ftype). 695 |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
693 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 696 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
694 | add DISPATCH, GG_G2DISP
695 |
696 |3: // Entry point for vm_resume above (RA = base, RB = L, PC = ftype).
697 | set_vmstate INTERP 697 | set_vmstate INTERP
698 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). 698 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
699 | add PC, RA 699 | add PC, RA
@@ -731,14 +731,17 @@ static void build_subroutines(BuildCtx *ctx)
731 | 731 |
732 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). 732 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
733 | sub KBASE, L:RB->top 733 | sub KBASE, L:RB->top
734 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
734 | mov SAVE_ERRF, 0 // No error function. 735 | mov SAVE_ERRF, 0 // No error function.
735 | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame. 736 | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame.
737 | add DISPATCH, GG_G2DISP
736 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). 738 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
737 | 739 |
738 |.if X64 740 |.if X64
739 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. 741 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
740 | mov SAVE_CFRAME, KBASEa 742 | mov SAVE_CFRAME, KBASEa
741 | mov L:RB->cframe, rsp 743 | mov L:RB->cframe, rsp
744 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
742 | 745 |
743 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud) 746 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud)
744 |.else 747 |.else
@@ -749,6 +752,7 @@ static void build_subroutines(BuildCtx *ctx)
749 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. 752 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
750 | mov SAVE_CFRAME, KBASE 753 | mov SAVE_CFRAME, KBASE
751 | mov L:RB->cframe, esp 754 | mov L:RB->cframe, esp
755 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
752 | 756 |
753 | call BASE // (lua_State *L, lua_CFunction func, void *ud) 757 | call BASE // (lua_State *L, lua_CFunction func, void *ud)
754 |.endif 758 |.endif
@@ -856,13 +860,9 @@ static void build_subroutines(BuildCtx *ctx)
856 |.if DUALNUM 860 |.if DUALNUM
857 | mov TMP2, LJ_TISNUM 861 | mov TMP2, LJ_TISNUM
858 | mov TMP1, RC 862 | mov TMP1, RC
859 |.elif SSE 863 |.else
860 | cvtsi2sd xmm0, RC 864 | cvtsi2sd xmm0, RC
861 | movsd TMPQ, xmm0 865 | movsd TMPQ, xmm0
862 |.else
863 | mov ARG4, RC
864 | fild ARG4
865 | fstp TMPQ
866 |.endif 866 |.endif
867 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 867 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
868 | jmp >1 868 | jmp >1
@@ -916,6 +916,19 @@ static void build_subroutines(BuildCtx *ctx)
916 | mov NARGS:RD, 2+1 // 2 args for func(t, k). 916 | mov NARGS:RD, 2+1 // 2 args for func(t, k).
917 | jmp ->vm_call_dispatch_f 917 | jmp ->vm_call_dispatch_f
918 | 918 |
919 |->vmeta_tgetr:
920 | mov FCARG1, TAB:RB
921 | mov RB, BASE // Save BASE.
922 | mov FCARG2, RC // Caveat: FCARG2 == BASE
923 | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
924 | // cTValue * or NULL returned in eax (RC).
925 | movzx RA, PC_RA
926 | mov BASE, RB // Restore BASE.
927 | test RC, RC
928 | jnz ->BC_TGETR_Z
929 | mov dword [BASE+RA*8+4], LJ_TNIL
930 | jmp ->BC_TGETR2_Z
931 |
919 |//----------------------------------------------------------------------- 932 |//-----------------------------------------------------------------------
920 | 933 |
921 |->vmeta_tsets: 934 |->vmeta_tsets:
@@ -935,13 +948,9 @@ static void build_subroutines(BuildCtx *ctx)
935 |.if DUALNUM 948 |.if DUALNUM
936 | mov TMP2, LJ_TISNUM 949 | mov TMP2, LJ_TISNUM
937 | mov TMP1, RC 950 | mov TMP1, RC
938 |.elif SSE 951 |.else
939 | cvtsi2sd xmm0, RC 952 | cvtsi2sd xmm0, RC
940 | movsd TMPQ, xmm0 953 | movsd TMPQ, xmm0
941 |.else
942 | mov ARG4, RC
943 | fild ARG4
944 | fstp TMPQ
945 |.endif 954 |.endif
946 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 955 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
947 | jmp >1 956 | jmp >1
@@ -1007,6 +1016,33 @@ static void build_subroutines(BuildCtx *ctx)
1007 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v). 1016 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v).
1008 | jmp ->vm_call_dispatch_f 1017 | jmp ->vm_call_dispatch_f
1009 | 1018 |
1019 |->vmeta_tsetr:
1020 |.if X64WIN
1021 | mov L:CARG1d, SAVE_L
1022 | mov CARG3d, RC
1023 | mov L:CARG1d->base, BASE
1024 | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE.
1025 |.elif X64
1026 | mov L:CARG1d, SAVE_L
1027 | mov CARG2d, TAB:RB
1028 | mov L:CARG1d->base, BASE
1029 | mov RB, BASE // Save BASE.
1030 | mov CARG3d, RC // Caveat: CARG3d == BASE.
1031 |.else
1032 | mov L:RA, SAVE_L
1033 | mov ARG2, TAB:RB
1034 | mov RB, BASE // Save BASE.
1035 | mov ARG3, RC
1036 | mov ARG1, L:RA
1037 | mov L:RA->base, BASE
1038 |.endif
1039 | mov SAVE_PC, PC
1040 | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
1041 | // TValue * returned in eax (RC).
1042 | movzx RA, PC_RA
1043 | mov BASE, RB // Restore BASE.
1044 | jmp ->BC_TSETR_Z
1045 |
1010 |//-- Comparison metamethods --------------------------------------------- 1046 |//-- Comparison metamethods ---------------------------------------------
1011 | 1047 |
1012 |->vmeta_comp: 1048 |->vmeta_comp:
@@ -1101,6 +1137,26 @@ static void build_subroutines(BuildCtx *ctx)
1101 | jmp <3 1137 | jmp <3
1102 |.endif 1138 |.endif
1103 | 1139 |
1140 |->vmeta_istype:
1141 |.if X64
1142 | mov L:RB, SAVE_L
1143 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
1144 | mov CARG2d, RA
1145 | movzx CARG3d, PC_RD
1146 | mov L:CARG1d, L:RB
1147 |.else
1148 | movzx RD, PC_RD
1149 | mov ARG2, RA
1150 | mov L:RB, SAVE_L
1151 | mov ARG3, RD
1152 | mov ARG1, L:RB
1153 | mov L:RB->base, BASE
1154 |.endif
1155 | mov SAVE_PC, PC
1156 | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
1157 | mov BASE, L:RB->base
1158 | jmp <6
1159 |
1104 |//-- Arithmetic metamethods --------------------------------------------- 1160 |//-- Arithmetic metamethods ---------------------------------------------
1105 | 1161 |
1106 |->vmeta_arith_vno: 1162 |->vmeta_arith_vno:
@@ -1509,11 +1565,7 @@ static void build_subroutines(BuildCtx *ctx)
1509 |.else 1565 |.else
1510 | jae ->fff_fallback 1566 | jae ->fff_fallback
1511 |.endif 1567 |.endif
1512 |.if SSE
1513 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 1568 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0
1514 |.else
1515 | fld qword [BASE]; jmp ->fff_resn
1516 |.endif
1517 | 1569 |
1518 |.ffunc_1 tostring 1570 |.ffunc_1 tostring
1519 | // Only handles the string or number case inline. 1571 | // Only handles the string or number case inline.
@@ -1538,9 +1590,9 @@ static void build_subroutines(BuildCtx *ctx)
1538 |.endif 1590 |.endif
1539 | mov L:FCARG1, L:RB 1591 | mov L:FCARG1, L:RB
1540 |.if DUALNUM 1592 |.if DUALNUM
1541 | call extern lj_str_fromnumber@8 // (lua_State *L, cTValue *o) 1593 | call extern lj_strfmt_number@8 // (lua_State *L, cTValue *o)
1542 |.else 1594 |.else
1543 | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np) 1595 | call extern lj_strfmt_num@8 // (lua_State *L, lua_Number *np)
1544 |.endif 1596 |.endif
1545 | // GCstr returned in eax (RD). 1597 | // GCstr returned in eax (RD).
1546 | mov BASE, L:RB->base 1598 | mov BASE, L:RB->base
@@ -1631,19 +1683,12 @@ static void build_subroutines(BuildCtx *ctx)
1631 | add RD, 1 1683 | add RD, 1
1632 | mov dword [BASE-4], LJ_TISNUM 1684 | mov dword [BASE-4], LJ_TISNUM
1633 | mov dword [BASE-8], RD 1685 | mov dword [BASE-8], RD
1634 |.elif SSE 1686 |.else
1635 | movsd xmm0, qword [BASE+8] 1687 | movsd xmm0, qword [BASE+8]
1636 | sseconst_1 xmm1, RBa 1688 | sseconst_1 xmm1, RBa
1637 | addsd xmm0, xmm1 1689 | addsd xmm0, xmm1
1638 | cvtsd2si RD, xmm0 1690 | cvttsd2si RD, xmm0
1639 | movsd qword [BASE-8], xmm0 1691 | movsd qword [BASE-8], xmm0
1640 |.else
1641 | fld qword [BASE+8]
1642 | fld1
1643 | faddp st1
1644 | fist ARG1
1645 | fstp qword [BASE-8]
1646 | mov RD, ARG1
1647 |.endif 1692 |.endif
1648 | mov TAB:RB, [BASE] 1693 | mov TAB:RB, [BASE]
1649 | cmp RD, TAB:RB->asize; jae >2 // Not in array part? 1694 | cmp RD, TAB:RB->asize; jae >2 // Not in array part?
@@ -1690,12 +1735,9 @@ static void build_subroutines(BuildCtx *ctx)
1690 |.if DUALNUM 1735 |.if DUALNUM
1691 | mov dword [BASE+12], LJ_TISNUM 1736 | mov dword [BASE+12], LJ_TISNUM
1692 | mov dword [BASE+8], 0 1737 | mov dword [BASE+8], 0
1693 |.elif SSE 1738 |.else
1694 | xorps xmm0, xmm0 1739 | xorps xmm0, xmm0
1695 | movsd qword [BASE+8], xmm0 1740 | movsd qword [BASE+8], xmm0
1696 |.else
1697 | fldz
1698 | fstp qword [BASE+8]
1699 |.endif 1741 |.endif
1700 | mov RD, 1+3 1742 | mov RD, 1+3
1701 | jmp ->fff_res 1743 | jmp ->fff_res
@@ -1802,7 +1844,6 @@ static void build_subroutines(BuildCtx *ctx)
1802 | mov ARG3, RA 1844 | mov ARG3, RA
1803 |.endif 1845 |.endif
1804 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0) 1846 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0)
1805 | set_vmstate INTERP
1806 | 1847 |
1807 | mov L:RB, SAVE_L 1848 | mov L:RB, SAVE_L
1808 |.if X64 1849 |.if X64
@@ -1811,6 +1852,9 @@ static void build_subroutines(BuildCtx *ctx)
1811 | mov L:PC, ARG1 // The callee doesn't modify SAVE_L. 1852 | mov L:PC, ARG1 // The callee doesn't modify SAVE_L.
1812 |.endif 1853 |.endif
1813 | mov BASE, L:RB->base 1854 | mov BASE, L:RB->base
1855 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
1856 | set_vmstate INTERP
1857 |
1814 | cmp eax, LUA_YIELD 1858 | cmp eax, LUA_YIELD
1815 | ja >8 1859 | ja >8
1816 |4: 1860 |4:
@@ -1925,12 +1969,10 @@ static void build_subroutines(BuildCtx *ctx)
1925 |->fff_resi: // Dummy. 1969 |->fff_resi: // Dummy.
1926 |.endif 1970 |.endif
1927 | 1971 |
1928 |.if SSE
1929 |->fff_resn: 1972 |->fff_resn:
1930 | mov PC, [BASE-4] 1973 | mov PC, [BASE-4]
1931 | fstp qword [BASE-8] 1974 | fstp qword [BASE-8]
1932 | jmp ->fff_res1 1975 | jmp ->fff_res1
1933 |.endif
1934 | 1976 |
1935 | .ffunc_1 math_abs 1977 | .ffunc_1 math_abs
1936 |.if DUALNUM 1978 |.if DUALNUM
@@ -1954,8 +1996,6 @@ static void build_subroutines(BuildCtx *ctx)
1954 |.else 1996 |.else
1955 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 1997 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1956 |.endif 1998 |.endif
1957 |
1958 |.if SSE
1959 | movsd xmm0, qword [BASE] 1999 | movsd xmm0, qword [BASE]
1960 | sseconst_abs xmm1, RDa 2000 | sseconst_abs xmm1, RDa
1961 | andps xmm0, xmm1 2001 | andps xmm0, xmm1
@@ -1963,15 +2003,6 @@ static void build_subroutines(BuildCtx *ctx)
1963 | mov PC, [BASE-4] 2003 | mov PC, [BASE-4]
1964 | movsd qword [BASE-8], xmm0 2004 | movsd qword [BASE-8], xmm0
1965 | // fallthrough 2005 | // fallthrough
1966 |.else
1967 | fld qword [BASE]
1968 | fabs
1969 | // fallthrough
1970 |->fff_resxmm0: // Dummy.
1971 |->fff_resn:
1972 | mov PC, [BASE-4]
1973 | fstp qword [BASE-8]
1974 |.endif
1975 | 2006 |
1976 |->fff_res1: 2007 |->fff_res1:
1977 | mov RD, 1+1 2008 | mov RD, 1+1
@@ -2008,48 +2039,24 @@ static void build_subroutines(BuildCtx *ctx)
2008 |.else 2039 |.else
2009 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2040 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
2010 |.endif 2041 |.endif
2011 |.if SSE
2012 | movsd xmm0, qword [BASE] 2042 | movsd xmm0, qword [BASE]
2013 | call ->vm_ .. func 2043 | call ->vm_ .. func .. _sse
2014 | .if DUALNUM 2044 |.if DUALNUM
2015 | cvtsd2si RB, xmm0 2045 | cvttsd2si RB, xmm0
2016 | cmp RB, 0x80000000 2046 | cmp RB, 0x80000000
2017 | jne ->fff_resi 2047 | jne ->fff_resi
2018 | cvtsi2sd xmm1, RB 2048 | cvtsi2sd xmm1, RB
2019 | ucomisd xmm0, xmm1 2049 | ucomisd xmm0, xmm1
2020 | jp ->fff_resxmm0 2050 | jp ->fff_resxmm0
2021 | je ->fff_resi 2051 | je ->fff_resi
2022 | .endif
2023 | jmp ->fff_resxmm0
2024 |.else
2025 | fld qword [BASE]
2026 | call ->vm_ .. func
2027 | .if DUALNUM
2028 | fist ARG1
2029 | mov RB, ARG1
2030 | cmp RB, 0x80000000; jne >2
2031 | fdup
2032 | fild ARG1
2033 | fcomparepp
2034 | jp ->fff_resn
2035 | jne ->fff_resn
2036 |2:
2037 | fpop
2038 | jmp ->fff_resi
2039 | .else
2040 | jmp ->fff_resn
2041 | .endif
2042 |.endif 2052 |.endif
2053 | jmp ->fff_resxmm0
2043 |.endmacro 2054 |.endmacro
2044 | 2055 |
2045 | math_round floor 2056 | math_round floor
2046 | math_round ceil 2057 | math_round ceil
2047 | 2058 |
2048 |.if SSE
2049 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 2059 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0
2050 |.else
2051 |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn
2052 |.endif
2053 | 2060 |
2054 |.ffunc math_log 2061 |.ffunc math_log
2055 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. 2062 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
@@ -2072,42 +2079,24 @@ static void build_subroutines(BuildCtx *ctx)
2072 |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn 2079 |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn
2073 | 2080 |
2074 |.macro math_extern, func 2081 |.macro math_extern, func
2075 |.if SSE
2076 | .ffunc_nsse math_ .. func 2082 | .ffunc_nsse math_ .. func
2077 | .if not X64 2083 |.if not X64
2078 | movsd FPARG1, xmm0 2084 | movsd FPARG1, xmm0
2079 | .endif
2080 |.else
2081 | .ffunc_n math_ .. func
2082 | fstp FPARG1
2083 |.endif 2085 |.endif
2084 | mov RB, BASE 2086 | mov RB, BASE
2085 | call extern lj_vm_ .. func 2087 | call extern lj_vm_ .. func
2086 | mov BASE, RB 2088 | mov BASE, RB
2087 | .if X64 2089 |.if X64
2088 | jmp ->fff_resxmm0 2090 | jmp ->fff_resxmm0
2089 | .else 2091 |.else
2090 | jmp ->fff_resn 2092 | jmp ->fff_resn
2091 | .endif 2093 |.endif
2092 |.endmacro 2094 |.endmacro
2093 | 2095 |
2094 | math_extern sinh 2096 | math_extern sinh
2095 | math_extern cosh 2097 | math_extern cosh
2096 | math_extern tanh 2098 | math_extern tanh
2097 | 2099 |
2098 |->ff_math_deg:
2099 |.if SSE
2100 |.ffunc_nsse math_rad
2101 | mov CFUNC:RB, [BASE-8]
2102 | mulsd xmm0, qword CFUNC:RB->upvalue[0]
2103 | jmp ->fff_resxmm0
2104 |.else
2105 |.ffunc_n math_rad
2106 | mov CFUNC:RB, [BASE-8]
2107 | fmul qword CFUNC:RB->upvalue[0]
2108 | jmp ->fff_resn
2109 |.endif
2110 |
2111 |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn 2100 |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn
2112 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn 2101 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn
2113 | 2102 |
@@ -2123,65 +2112,34 @@ static void build_subroutines(BuildCtx *ctx)
2123 | cmp RB, 0x00200000; jb >4 2112 | cmp RB, 0x00200000; jb >4
2124 |1: 2113 |1:
2125 | shr RB, 21; sub RB, RC // Extract and unbias exponent. 2114 | shr RB, 21; sub RB, RC // Extract and unbias exponent.
2126 |.if SSE
2127 | cvtsi2sd xmm0, RB 2115 | cvtsi2sd xmm0, RB
2128 |.else
2129 | mov TMP1, RB; fild TMP1
2130 |.endif
2131 | mov RB, [BASE-4] 2116 | mov RB, [BASE-4]
2132 | and RB, 0x800fffff // Mask off exponent. 2117 | and RB, 0x800fffff // Mask off exponent.
2133 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. 2118 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0.
2134 | mov [BASE-4], RB 2119 | mov [BASE-4], RB
2135 |2: 2120 |2:
2136 |.if SSE
2137 | movsd qword [BASE], xmm0 2121 | movsd qword [BASE], xmm0
2138 |.else
2139 | fstp qword [BASE]
2140 |.endif
2141 | mov RD, 1+2 2122 | mov RD, 1+2
2142 | jmp ->fff_res 2123 | jmp ->fff_res
2143 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. 2124 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0.
2144 |.if SSE
2145 | xorps xmm0, xmm0; jmp <2 2125 | xorps xmm0, xmm0; jmp <2
2146 |.else
2147 | fldz; jmp <2
2148 |.endif
2149 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. 2126 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias.
2150 |.if SSE
2151 | movsd xmm0, qword [BASE] 2127 | movsd xmm0, qword [BASE]
2152 | sseconst_hi xmm1, RBa, 43500000 // 2^54. 2128 | sseconst_hi xmm1, RBa, 43500000 // 2^54.
2153 | mulsd xmm0, xmm1 2129 | mulsd xmm0, xmm1
2154 | movsd qword [BASE-8], xmm0 2130 | movsd qword [BASE-8], xmm0
2155 |.else
2156 | fld qword [BASE]
2157 | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54
2158 | fstp qword [BASE-8]
2159 |.endif
2160 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 2131 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1
2161 | 2132 |
2162 |.if SSE
2163 |.ffunc_nsse math_modf 2133 |.ffunc_nsse math_modf
2164 |.else
2165 |.ffunc_n math_modf
2166 |.endif
2167 | mov RB, [BASE+4] 2134 | mov RB, [BASE+4]
2168 | mov PC, [BASE-4] 2135 | mov PC, [BASE-4]
2169 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? 2136 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf?
2170 |.if SSE
2171 | movaps xmm4, xmm0 2137 | movaps xmm4, xmm0
2172 | call ->vm_trunc 2138 | call ->vm_trunc_sse
2173 | subsd xmm4, xmm0 2139 | subsd xmm4, xmm0
2174 |1: 2140 |1:
2175 | movsd qword [BASE-8], xmm0 2141 | movsd qword [BASE-8], xmm0
2176 | movsd qword [BASE], xmm4 2142 | movsd qword [BASE], xmm4
2177 |.else
2178 | fdup
2179 | call ->vm_trunc
2180 | fsub st1, st0
2181 |1:
2182 | fstp qword [BASE-8]
2183 | fstp qword [BASE]
2184 |.endif
2185 | mov RC, [BASE-4]; mov RB, [BASE+4] 2143 | mov RC, [BASE-4]; mov RB, [BASE+4]
2186 | xor RC, RB; js >3 // Need to adjust sign? 2144 | xor RC, RB; js >3 // Need to adjust sign?
2187 |2: 2145 |2:
@@ -2191,24 +2149,16 @@ static void build_subroutines(BuildCtx *ctx)
2191 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. 2149 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction.
2192 | jmp <2 2150 | jmp <2
2193 |4: 2151 |4:
2194 |.if SSE
2195 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. 2152 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0.
2196 |.else
2197 | fldz; fxch; jmp <1 // Return +-Inf and +-0.
2198 |.endif
2199 | 2153 |
2200 |.ffunc_nnr math_fmod 2154 |.ffunc_nnr math_fmod
2201 |1: ; fprem; fnstsw ax; and ax, 0x400; jnz <1 2155 |1: ; fprem; fnstsw ax; and ax, 0x400; jnz <1
2202 | fpop1 2156 | fpop1
2203 | jmp ->fff_resn 2157 | jmp ->fff_resn
2204 | 2158 |
2205 |.if SSE 2159 |.ffunc_nnsse math_pow; call ->vm_pow_sse; jmp ->fff_resxmm0
2206 |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0
2207 |.else
2208 |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn
2209 |.endif
2210 | 2160 |
2211 |.macro math_minmax, name, cmovop, fcmovop, sseop 2161 |.macro math_minmax, name, cmovop, sseop
2212 | .ffunc name 2162 | .ffunc name
2213 | mov RA, 2 2163 | mov RA, 2
2214 | cmp dword [BASE+4], LJ_TISNUM 2164 | cmp dword [BASE+4], LJ_TISNUM
@@ -2225,12 +2175,7 @@ static void build_subroutines(BuildCtx *ctx)
2225 |3: 2175 |3:
2226 | ja ->fff_fallback 2176 | ja ->fff_fallback
2227 | // Convert intermediate result to number and continue below. 2177 | // Convert intermediate result to number and continue below.
2228 |.if SSE
2229 | cvtsi2sd xmm0, RB 2178 | cvtsi2sd xmm0, RB
2230 |.else
2231 | mov TMP1, RB
2232 | fild TMP1
2233 |.endif
2234 | jmp >6 2179 | jmp >6
2235 |4: 2180 |4:
2236 | ja ->fff_fallback 2181 | ja ->fff_fallback
@@ -2238,7 +2183,6 @@ static void build_subroutines(BuildCtx *ctx)
2238 | jae ->fff_fallback 2183 | jae ->fff_fallback
2239 |.endif 2184 |.endif
2240 | 2185 |
2241 |.if SSE
2242 | movsd xmm0, qword [BASE] 2186 | movsd xmm0, qword [BASE]
2243 |5: // Handle numbers or integers. 2187 |5: // Handle numbers or integers.
2244 | cmp RA, RD; jae ->fff_resxmm0 2188 | cmp RA, RD; jae ->fff_resxmm0
@@ -2257,48 +2201,13 @@ static void build_subroutines(BuildCtx *ctx)
2257 | sseop xmm0, xmm1 2201 | sseop xmm0, xmm1
2258 | add RA, 1 2202 | add RA, 1
2259 | jmp <5 2203 | jmp <5
2260 |.else
2261 | fld qword [BASE]
2262 |5: // Handle numbers or integers.
2263 | cmp RA, RD; jae ->fff_resn
2264 | cmp dword [BASE+RA*8-4], LJ_TISNUM
2265 |.if DUALNUM
2266 | jb >6
2267 | ja >9
2268 | fild dword [BASE+RA*8-8]
2269 | jmp >7
2270 |.else
2271 | jae >9
2272 |.endif
2273 |6:
2274 | fld qword [BASE+RA*8-8]
2275 |7:
2276 | fucomi st1; fcmovop st1; fpop1
2277 | add RA, 1
2278 | jmp <5
2279 |.endif
2280 |.endmacro 2204 |.endmacro
2281 | 2205 |
2282 | math_minmax math_min, cmovg, fcmovnbe, minsd 2206 | math_minmax math_min, cmovg, minsd
2283 | math_minmax math_max, cmovl, fcmovbe, maxsd 2207 | math_minmax math_max, cmovl, maxsd
2284 |.if not SSE
2285 |9:
2286 | fpop; jmp ->fff_fallback
2287 |.endif
2288 | 2208 |
2289 |//-- String library ----------------------------------------------------- 2209 |//-- String library -----------------------------------------------------
2290 | 2210 |
2291 |.ffunc_1 string_len
2292 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2293 | mov STR:RB, [BASE]
2294 |.if DUALNUM
2295 | mov RB, dword STR:RB->len; jmp ->fff_resi
2296 |.elif SSE
2297 | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0
2298 |.else
2299 | fild dword STR:RB->len; jmp ->fff_resn
2300 |.endif
2301 |
2302 |.ffunc string_byte // Only handle the 1-arg case here. 2211 |.ffunc string_byte // Only handle the 1-arg case here.
2303 | cmp NARGS:RD, 1+1; jne ->fff_fallback 2212 | cmp NARGS:RD, 1+1; jne ->fff_fallback
2304 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2213 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
@@ -2309,10 +2218,8 @@ static void build_subroutines(BuildCtx *ctx)
2309 | movzx RB, byte STR:RB[1] 2218 | movzx RB, byte STR:RB[1]
2310 |.if DUALNUM 2219 |.if DUALNUM
2311 | jmp ->fff_resi 2220 | jmp ->fff_resi
2312 |.elif SSE
2313 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2314 |.else 2221 |.else
2315 | mov TMP1, RB; fild TMP1; jmp ->fff_resn 2222 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2316 |.endif 2223 |.endif
2317 | 2224 |
2318 |.ffunc string_char // Only handle the 1-arg case here. 2225 |.ffunc string_char // Only handle the 1-arg case here.
@@ -2324,16 +2231,11 @@ static void build_subroutines(BuildCtx *ctx)
2324 | mov RB, dword [BASE] 2231 | mov RB, dword [BASE]
2325 | cmp RB, 255; ja ->fff_fallback 2232 | cmp RB, 255; ja ->fff_fallback
2326 | mov TMP2, RB 2233 | mov TMP2, RB
2327 |.elif SSE 2234 |.else
2328 | jae ->fff_fallback 2235 | jae ->fff_fallback
2329 | cvttsd2si RB, qword [BASE] 2236 | cvttsd2si RB, qword [BASE]
2330 | cmp RB, 255; ja ->fff_fallback 2237 | cmp RB, 255; ja ->fff_fallback
2331 | mov TMP2, RB 2238 | mov TMP2, RB
2332 |.else
2333 | jae ->fff_fallback
2334 | fld qword [BASE]
2335 | fistp TMP2
2336 | cmp TMP2, 255; ja ->fff_fallback
2337 |.endif 2239 |.endif
2338 |.if X64 2240 |.if X64
2339 | mov TMP3, 1 2241 | mov TMP3, 1
@@ -2354,6 +2256,7 @@ static void build_subroutines(BuildCtx *ctx)
2354 |.endif 2256 |.endif
2355 | mov SAVE_PC, PC 2257 | mov SAVE_PC, PC
2356 | call extern lj_str_new // (lua_State *L, char *str, size_t l) 2258 | call extern lj_str_new // (lua_State *L, char *str, size_t l)
2259 |->fff_resstr:
2357 | // GCstr * returned in eax (RD). 2260 | // GCstr * returned in eax (RD).
2358 | mov BASE, L:RB->base 2261 | mov BASE, L:RB->base
2359 | mov PC, [BASE-4] 2262 | mov PC, [BASE-4]
@@ -2371,14 +2274,10 @@ static void build_subroutines(BuildCtx *ctx)
2371 | jne ->fff_fallback 2274 | jne ->fff_fallback
2372 | mov RB, dword [BASE+16] 2275 | mov RB, dword [BASE+16]
2373 | mov TMP2, RB 2276 | mov TMP2, RB
2374 |.elif SSE 2277 |.else
2375 | jae ->fff_fallback 2278 | jae ->fff_fallback
2376 | cvttsd2si RB, qword [BASE+16] 2279 | cvttsd2si RB, qword [BASE+16]
2377 | mov TMP2, RB 2280 | mov TMP2, RB
2378 |.else
2379 | jae ->fff_fallback
2380 | fld qword [BASE+16]
2381 | fistp TMP2
2382 |.endif 2281 |.endif
2383 |1: 2282 |1:
2384 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2283 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
@@ -2393,12 +2292,8 @@ static void build_subroutines(BuildCtx *ctx)
2393 | mov RB, STR:RB->len 2292 | mov RB, STR:RB->len
2394 |.if DUALNUM 2293 |.if DUALNUM
2395 | mov RA, dword [BASE+8] 2294 | mov RA, dword [BASE+8]
2396 |.elif SSE
2397 | cvttsd2si RA, qword [BASE+8]
2398 |.else 2295 |.else
2399 | fld qword [BASE+8] 2296 | cvttsd2si RA, qword [BASE+8]
2400 | fistp ARG3
2401 | mov RA, ARG3
2402 |.endif 2297 |.endif
2403 | mov RC, TMP2 2298 | mov RC, TMP2
2404 | cmp RB, RC // len < end? (unsigned compare) 2299 | cmp RB, RC // len < end? (unsigned compare)
@@ -2442,123 +2337,27 @@ static void build_subroutines(BuildCtx *ctx)
2442 | xor RC, RC // Zero length. Any ptr in RB is ok. 2337 | xor RC, RC // Zero length. Any ptr in RB is ok.
2443 | jmp <4 2338 | jmp <4
2444 | 2339 |
2445 |.ffunc string_rep // Only handle the 1-char case inline. 2340 |.macro ffstring_op, name
2446 | ffgccheck 2341 | .ffunc_1 string_ .. name
2447 | cmp NARGS:RD, 2+1; jne ->fff_fallback // Exactly 2 arguments.
2448 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2449 | cmp dword [BASE+12], LJ_TISNUM
2450 | mov STR:RB, [BASE]
2451 |.if DUALNUM
2452 | jne ->fff_fallback
2453 | mov RC, dword [BASE+8]
2454 |.elif SSE
2455 | jae ->fff_fallback
2456 | cvttsd2si RC, qword [BASE+8]
2457 |.else
2458 | jae ->fff_fallback
2459 | fld qword [BASE+8]
2460 | fistp TMP2
2461 | mov RC, TMP2
2462 |.endif
2463 | test RC, RC
2464 | jle ->fff_emptystr // Count <= 0? (or non-int)
2465 | cmp dword STR:RB->len, 1
2466 | jb ->fff_emptystr // Zero length string?
2467 | jne ->fff_fallback_2 // Fallback for > 1-char strings.
2468 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_2
2469 | movzx RA, byte STR:RB[1]
2470 | mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2471 |.if X64
2472 | mov TMP3, RC
2473 |.else
2474 | mov ARG3, RC
2475 |.endif
2476 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
2477 | mov [RB], RAL
2478 | add RB, 1
2479 | sub RC, 1
2480 | jnz <1
2481 | mov RD, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2482 | jmp ->fff_newstr
2483 |
2484 |.ffunc_1 string_reverse
2485 | ffgccheck
2486 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2487 | mov STR:RB, [BASE]
2488 | mov RC, STR:RB->len
2489 | test RC, RC
2490 | jz ->fff_emptystr // Zero length string?
2491 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
2492 | add RB, #STR
2493 | mov TMP2, PC // Need another temp register.
2494 |.if X64
2495 | mov TMP3, RC
2496 |.else
2497 | mov ARG3, RC
2498 |.endif
2499 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2500 |1:
2501 | movzx RA, byte [RB]
2502 | add RB, 1
2503 | sub RC, 1
2504 | mov [PC+RC], RAL
2505 | jnz <1
2506 | mov RD, PC
2507 | mov PC, TMP2
2508 | jmp ->fff_newstr
2509 |
2510 |.macro ffstring_case, name, lo, hi
2511 | .ffunc_1 name
2512 | ffgccheck 2342 | ffgccheck
2513 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2343 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2514 | mov STR:RB, [BASE] 2344 | mov L:RB, SAVE_L
2515 | mov RC, STR:RB->len 2345 | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
2516 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 2346 | mov L:RB->base, BASE
2517 | add RB, #STR 2347 | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE
2518 | mov TMP2, PC // Need another temp register. 2348 | mov RC, SBUF:FCARG1->b
2519 |.if X64 2349 | mov SBUF:FCARG1->L, L:RB
2520 | mov TMP3, RC 2350 | mov SBUF:FCARG1->p, RC
2521 |.else 2351 | mov SAVE_PC, PC
2522 | mov ARG3, RC 2352 | call extern lj_buf_putstr_ .. name .. @8
2523 |.endif 2353 | mov FCARG1, eax
2524 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] 2354 | call extern lj_buf_tostr@4
2525 | jmp >3 2355 | jmp ->fff_resstr
2526 |1: // ASCII case conversion. Yes, this is suboptimal code (do you care?).
2527 | movzx RA, byte [RB+RC]
2528 | cmp RA, lo
2529 | jb >2
2530 | cmp RA, hi
2531 | ja >2
2532 | xor RA, 0x20
2533 |2:
2534 | mov [PC+RC], RAL
2535 |3:
2536 | sub RC, 1
2537 | jns <1
2538 | mov RD, PC
2539 | mov PC, TMP2
2540 | jmp ->fff_newstr
2541 |.endmacro 2356 |.endmacro
2542 | 2357 |
2543 |ffstring_case string_lower, 0x41, 0x5a 2358 |ffstring_op reverse
2544 |ffstring_case string_upper, 0x61, 0x7a 2359 |ffstring_op lower
2545 | 2360 |ffstring_op upper
2546 |//-- Table library ------------------------------------------------------
2547 |
2548 |.ffunc_1 table_getn
2549 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
2550 | mov RB, BASE // Save BASE.
2551 | mov TAB:FCARG1, [BASE]
2552 | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t)
2553 | // Length of table returned in eax (RD).
2554 | mov BASE, RB // Restore BASE.
2555 |.if DUALNUM
2556 | mov RB, RD; jmp ->fff_resi
2557 |.elif SSE
2558 | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0
2559 |.else
2560 | mov ARG1, RD; fild ARG1; jmp ->fff_resn
2561 |.endif
2562 | 2361 |
2563 |//-- Bit library -------------------------------------------------------- 2362 |//-- Bit library --------------------------------------------------------
2564 | 2363 |
@@ -2567,11 +2366,7 @@ static void build_subroutines(BuildCtx *ctx)
2567 |.macro .ffunc_bit, name, kind 2366 |.macro .ffunc_bit, name, kind
2568 | .ffunc_1 name 2367 | .ffunc_1 name
2569 |.if kind == 2 2368 |.if kind == 2
2570 |.if SSE
2571 | sseconst_tobit xmm1, RBa 2369 | sseconst_tobit xmm1, RBa
2572 |.else
2573 | mov TMP1, TOBIT_BIAS
2574 |.endif
2575 |.endif 2370 |.endif
2576 | cmp dword [BASE+4], LJ_TISNUM 2371 | cmp dword [BASE+4], LJ_TISNUM
2577 |.if DUALNUM 2372 |.if DUALNUM
@@ -2587,37 +2382,17 @@ static void build_subroutines(BuildCtx *ctx)
2587 |.else 2382 |.else
2588 | jae ->fff_fallback 2383 | jae ->fff_fallback
2589 |.endif 2384 |.endif
2590 |.if SSE
2591 | movsd xmm0, qword [BASE] 2385 | movsd xmm0, qword [BASE]
2592 |.if kind < 2 2386 |.if kind < 2
2593 | sseconst_tobit xmm1, RBa 2387 | sseconst_tobit xmm1, RBa
2594 |.endif 2388 |.endif
2595 | addsd xmm0, xmm1 2389 | addsd xmm0, xmm1
2596 | movd RB, xmm0 2390 | movd RB, xmm0
2597 |.else
2598 | fld qword [BASE]
2599 |.if kind < 2
2600 | mov TMP1, TOBIT_BIAS
2601 |.endif
2602 | fadd TMP1
2603 | fstp FPARG1
2604 |.if kind > 0
2605 | mov RB, ARG1
2606 |.endif
2607 |.endif
2608 |2: 2391 |2:
2609 |.endmacro 2392 |.endmacro
2610 | 2393 |
2611 |.ffunc_bit bit_tobit, 0 2394 |.ffunc_bit bit_tobit, 0
2612 |.if DUALNUM or SSE
2613 |.if not SSE
2614 | mov RB, ARG1
2615 |.endif
2616 | jmp ->fff_resbit 2395 | jmp ->fff_resbit
2617 |.else
2618 | fild ARG1
2619 | jmp ->fff_resn
2620 |.endif
2621 | 2396 |
2622 |.macro .ffunc_bit_op, name, ins 2397 |.macro .ffunc_bit_op, name, ins
2623 | .ffunc_bit name, 2 2398 | .ffunc_bit name, 2
@@ -2637,17 +2412,10 @@ static void build_subroutines(BuildCtx *ctx)
2637 |.else 2412 |.else
2638 | jae ->fff_fallback_bit_op 2413 | jae ->fff_fallback_bit_op
2639 |.endif 2414 |.endif
2640 |.if SSE
2641 | movsd xmm0, qword [RD] 2415 | movsd xmm0, qword [RD]
2642 | addsd xmm0, xmm1 2416 | addsd xmm0, xmm1
2643 | movd RA, xmm0 2417 | movd RA, xmm0
2644 | ins RB, RA 2418 | ins RB, RA
2645 |.else
2646 | fld qword [RD]
2647 | fadd TMP1
2648 | fstp FPARG1
2649 | ins RB, ARG1
2650 |.endif
2651 | sub RD, 8 2419 | sub RD, 8
2652 | jmp <1 2420 | jmp <1
2653 |.endmacro 2421 |.endmacro
@@ -2664,15 +2432,10 @@ static void build_subroutines(BuildCtx *ctx)
2664 | not RB 2432 | not RB
2665 |.if DUALNUM 2433 |.if DUALNUM
2666 | jmp ->fff_resbit 2434 | jmp ->fff_resbit
2667 |.elif SSE 2435 |.else
2668 |->fff_resbit: 2436 |->fff_resbit:
2669 | cvtsi2sd xmm0, RB 2437 | cvtsi2sd xmm0, RB
2670 | jmp ->fff_resxmm0 2438 | jmp ->fff_resxmm0
2671 |.else
2672 |->fff_resbit:
2673 | mov ARG1, RB
2674 | fild ARG1
2675 | jmp ->fff_resn
2676 |.endif 2439 |.endif
2677 | 2440 |
2678 |->fff_fallback_bit_op: 2441 |->fff_fallback_bit_op:
@@ -2685,22 +2448,13 @@ static void build_subroutines(BuildCtx *ctx)
2685 | // Note: no inline conversion from number for 2nd argument! 2448 | // Note: no inline conversion from number for 2nd argument!
2686 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback 2449 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback
2687 | mov RA, dword [BASE+8] 2450 | mov RA, dword [BASE+8]
2688 |.elif SSE 2451 |.else
2689 | .ffunc_nnsse name 2452 | .ffunc_nnsse name
2690 | sseconst_tobit xmm2, RBa 2453 | sseconst_tobit xmm2, RBa
2691 | addsd xmm0, xmm2 2454 | addsd xmm0, xmm2
2692 | addsd xmm1, xmm2 2455 | addsd xmm1, xmm2
2693 | movd RB, xmm0 2456 | movd RB, xmm0
2694 | movd RA, xmm1 2457 | movd RA, xmm1
2695 |.else
2696 | .ffunc_nn name
2697 | mov TMP1, TOBIT_BIAS
2698 | fadd TMP1
2699 | fstp FPARG3
2700 | fadd TMP1
2701 | fstp FPARG1
2702 | mov RA, ARG3
2703 | mov RB, ARG1
2704 |.endif 2458 |.endif
2705 | ins RB, cl // Assumes RA is ecx. 2459 | ins RB, cl // Assumes RA is ecx.
2706 | jmp ->fff_resbit 2460 | jmp ->fff_resbit
@@ -2834,7 +2588,7 @@ static void build_subroutines(BuildCtx *ctx)
2834 | mov FCARG2, PC // Caveat: FCARG2 == BASE 2588 | mov FCARG2, PC // Caveat: FCARG2 == BASE
2835 | mov FCARG1, L:RB 2589 | mov FCARG1, L:RB
2836 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. 2590 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
2837 | call extern lj_dispatch_ins@8 // (lua_State *L, BCIns *pc) 2591 | call extern lj_dispatch_ins@8 // (lua_State *L, const BCIns *pc)
2838 |3: 2592 |3:
2839 | mov BASE, L:RB->base 2593 | mov BASE, L:RB->base
2840 |4: 2594 |4:
@@ -2905,6 +2659,82 @@ static void build_subroutines(BuildCtx *ctx)
2905 | add NARGS:RD, 1 2659 | add NARGS:RD, 1
2906 | jmp RBa 2660 | jmp RBa
2907 | 2661 |
2662 |->cont_stitch: // Trace stitching.
2663 |.if JIT
2664 | // BASE = base, RC = result, RB = mbase
2665 | mov RA, [RB-24] // Save previous trace number.
2666 | mov TMP1, RA
2667 | mov TMP3, DISPATCH // Need one more register.
2668 | mov DISPATCH, MULTRES
2669 | movzx RA, PC_RA
2670 | lea RA, [BASE+RA*8] // Call base.
2671 | sub DISPATCH, 1
2672 | jz >2
2673 |1: // Move results down.
2674 |.if X64
2675 | mov RBa, [RC]
2676 | mov [RA], RBa
2677 |.else
2678 | mov RB, [RC]
2679 | mov [RA], RB
2680 | mov RB, [RC+4]
2681 | mov [RA+4], RB
2682 |.endif
2683 | add RC, 8
2684 | add RA, 8
2685 | sub DISPATCH, 1
2686 | jnz <1
2687 |2:
2688 | movzx RC, PC_RA
2689 | movzx RB, PC_RB
2690 | add RC, RB
2691 | lea RC, [BASE+RC*8-8]
2692 |3:
2693 | cmp RC, RA
2694 | ja >9 // More results wanted?
2695 |
2696 | mov DISPATCH, TMP3
2697 | mov RB, TMP1 // Get previous trace number.
2698 | mov RA, [DISPATCH+DISPATCH_J(trace)]
2699 | mov TRACE:RD, [RA+RB*4]
2700 | test TRACE:RD, TRACE:RD
2701 | jz ->cont_nop
2702 | movzx RD, word TRACE:RD->link
2703 | cmp RD, RB
2704 | je ->cont_nop // Blacklisted.
2705 | test RD, RD
2706 | jne =>BC_JLOOP // Jump to stitched trace.
2707 |
2708 | // Stitch a new trace to the previous trace.
2709 | mov [DISPATCH+DISPATCH_J(exitno)], RB
2710 | mov L:RB, SAVE_L
2711 | mov L:RB->base, BASE
2712 | mov FCARG2, PC
2713 | lea FCARG1, [DISPATCH+GG_DISP2J]
2714 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
2715 | call extern lj_dispatch_stitch@8 // (jit_State *J, const BCIns *pc)
2716 | mov BASE, L:RB->base
2717 | jmp ->cont_nop
2718 |
2719 |9: // Fill up results with nil.
2720 | mov dword [RA+4], LJ_TNIL
2721 | add RA, 8
2722 | jmp <3
2723 |.endif
2724 |
2725 |->vm_profhook: // Dispatch target for profiler hook.
2726#if LJ_HASPROFILE
2727 | mov L:RB, SAVE_L
2728 | mov L:RB->base, BASE
2729 | mov FCARG2, PC // Caveat: FCARG2 == BASE
2730 | mov FCARG1, L:RB
2731 | call extern lj_dispatch_profile@8 // (lua_State *L, const BCIns *pc)
2732 | mov BASE, L:RB->base
2733 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2734 | sub PC, 4
2735 | jmp ->cont_nop
2736#endif
2737 |
2908 |//----------------------------------------------------------------------- 2738 |//-----------------------------------------------------------------------
2909 |//-- Trace exit handler ------------------------------------------------- 2739 |//-- Trace exit handler -------------------------------------------------
2910 |//----------------------------------------------------------------------- 2740 |//-----------------------------------------------------------------------
@@ -2957,10 +2787,9 @@ static void build_subroutines(BuildCtx *ctx)
2957 | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0 2787 | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0
2958 |.endif 2788 |.endif
2959 | // Caveat: RB is ebp. 2789 | // Caveat: RB is ebp.
2960 | mov L:RB, [DISPATCH+DISPATCH_GL(jit_L)] 2790 | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)]
2961 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] 2791 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
2962 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa 2792 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
2963 | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0
2964 | mov L:RB->base, BASE 2793 | mov L:RB->base, BASE
2965 |.if X64WIN 2794 |.if X64WIN
2966 | lea CARG2, [rsp+4*8] 2795 | lea CARG2, [rsp+4*8]
@@ -2970,6 +2799,7 @@ static void build_subroutines(BuildCtx *ctx)
2970 | lea FCARG2, [esp+16] 2799 | lea FCARG2, [esp+16]
2971 |.endif 2800 |.endif
2972 | lea FCARG1, [DISPATCH+GG_DISP2J] 2801 | lea FCARG1, [DISPATCH+GG_DISP2J]
2802 | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
2973 | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex) 2803 | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex)
2974 | // MULTRES or negated error code returned in eax (RD). 2804 | // MULTRES or negated error code returned in eax (RD).
2975 | mov RAa, L:RB->cframe 2805 | mov RAa, L:RB->cframe
@@ -3016,12 +2846,14 @@ static void build_subroutines(BuildCtx *ctx)
3016 | mov r13, TMPa 2846 | mov r13, TMPa
3017 | mov r12, TMPQ 2847 | mov r12, TMPQ
3018 |.endif 2848 |.endif
3019 | test RD, RD; js >3 // Check for error from exit. 2849 | test RD, RD; js >9 // Check for error from exit.
2850 | mov L:RB, SAVE_L
3020 | mov MULTRES, RD 2851 | mov MULTRES, RD
3021 | mov LFUNC:KBASE, [BASE-8] 2852 | mov LFUNC:KBASE, [BASE-8]
3022 | mov KBASE, LFUNC:KBASE->pc 2853 | mov KBASE, LFUNC:KBASE->pc
3023 | mov KBASE, [KBASE+PC2PROTO(k)] 2854 | mov KBASE, [KBASE+PC2PROTO(k)]
3024 | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0 2855 | mov L:RB->base, BASE
2856 | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
3025 | set_vmstate INTERP 2857 | set_vmstate INTERP
3026 | // Modified copy of ins_next which handles function header dispatch, too. 2858 | // Modified copy of ins_next which handles function header dispatch, too.
3027 | mov RC, [PC] 2859 | mov RC, [PC]
@@ -3030,16 +2862,31 @@ static void build_subroutines(BuildCtx *ctx)
3030 | add PC, 4 2862 | add PC, 4
3031 | shr RC, 16 2863 | shr RC, 16
3032 | cmp OP, BC_FUNCF // Function header? 2864 | cmp OP, BC_FUNCF // Function header?
3033 | jb >2 2865 | jb >3
3034 | mov RC, MULTRES // RC/RD holds nres+1. 2866 | cmp OP, BC_FUNCC+2 // Fast function?
2867 | jae >4
3035 |2: 2868 |2:
2869 | mov RC, MULTRES // RC/RD holds nres+1.
2870 |3:
3036 |.if X64 2871 |.if X64
3037 | jmp aword [DISPATCH+OP*8] 2872 | jmp aword [DISPATCH+OP*8]
3038 |.else 2873 |.else
3039 | jmp aword [DISPATCH+OP*4] 2874 | jmp aword [DISPATCH+OP*4]
3040 |.endif 2875 |.endif
3041 | 2876 |
3042 |3: // Rethrow error from the right C frame. 2877 |4: // Check frame below fast function.
2878 | mov RC, [BASE-4]
2879 | test RC, FRAME_TYPE
2880 | jnz <2 // Trace stitching continuation?
2881 | // Otherwise set KBASE for Lua function below fast function.
2882 | movzx RC, byte [RC-3]
2883 | not RCa
2884 | mov LFUNC:KBASE, [BASE+RC*8-8]
2885 | mov KBASE, LFUNC:KBASE->pc
2886 | mov KBASE, [KBASE+PC2PROTO(k)]
2887 | jmp <2
2888 |
2889 |9: // Rethrow error from the right C frame.
3043 | neg RD 2890 | neg RD
3044 | mov FCARG1, L:RB 2891 | mov FCARG1, L:RB
3045 | mov FCARG2, RD 2892 | mov FCARG2, RD
@@ -3051,27 +2898,9 @@ static void build_subroutines(BuildCtx *ctx)
3051 |//----------------------------------------------------------------------- 2898 |//-----------------------------------------------------------------------
3052 | 2899 |
3053 |// FP value rounding. Called by math.floor/math.ceil fast functions 2900 |// FP value rounding. Called by math.floor/math.ceil fast functions
3054 |// and from JIT code. 2901 |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
3055 | 2902 |.macro vm_round, name, mode
3056 |// x87 variant: Arg/ret on x87 stack. No int/xmm registers modified. 2903 |->name .. _sse:
3057 |.macro vm_round_x87, mode1, mode2
3058 | fnstcw word [esp+4] // Caveat: overwrites ARG1 and ARG2.
3059 | mov [esp+8], eax
3060 | mov ax, mode1
3061 | or ax, [esp+4]
3062 |.if mode2 ~= 0xffff
3063 | and ax, mode2
3064 |.endif
3065 | mov [esp+6], ax
3066 | fldcw word [esp+6]
3067 | frndint
3068 | fldcw word [esp+4]
3069 | mov eax, [esp+8]
3070 | ret
3071 |.endmacro
3072 |
3073 |// SSE variant: arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
3074 |.macro vm_round_sse, mode
3075 | sseconst_abs xmm2, RDa 2904 | sseconst_abs xmm2, RDa
3076 | sseconst_2p52 xmm3, RDa 2905 | sseconst_2p52 xmm3, RDa
3077 | movaps xmm1, xmm0 2906 | movaps xmm1, xmm0
@@ -3107,22 +2936,21 @@ static void build_subroutines(BuildCtx *ctx)
3107 | ret 2936 | ret
3108 |.endmacro 2937 |.endmacro
3109 | 2938 |
3110 |.macro vm_round, name, ssemode, mode1, mode2 2939 |->vm_floor:
3111 |->name: 2940 |.if not X64
3112 |.if not SSE 2941 | movsd xmm0, qword [esp+4]
3113 | vm_round_x87 mode1, mode2 2942 | call ->vm_floor_sse
2943 | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg.
2944 | fld qword [esp+4]
2945 | ret
3114 |.endif 2946 |.endif
3115 |->name .. _sse:
3116 | vm_round_sse ssemode
3117 |.endmacro
3118 | 2947 |
3119 | vm_round vm_floor, 0, 0x0400, 0xf7ff 2948 | vm_round vm_floor, 0
3120 | vm_round vm_ceil, 1, 0x0800, 0xfbff 2949 | vm_round vm_ceil, 1
3121 | vm_round vm_trunc, 2, 0x0c00, 0xffff 2950 | vm_round vm_trunc, 2
3122 | 2951 |
3123 |// FP modulo x%y. Called by BC_MOD* and vm_arith. 2952 |// FP modulo x%y. Called by BC_MOD* and vm_arith.
3124 |->vm_mod: 2953 |->vm_mod:
3125 |.if SSE
3126 |// Args in xmm0/xmm1, return value in xmm0. 2954 |// Args in xmm0/xmm1, return value in xmm0.
3127 |// Caveat: xmm0-xmm5 and RC (eax) modified! 2955 |// Caveat: xmm0-xmm5 and RC (eax) modified!
3128 | movaps xmm5, xmm0 2956 | movaps xmm5, xmm0
@@ -3150,23 +2978,6 @@ static void build_subroutines(BuildCtx *ctx)
3150 | movaps xmm0, xmm5 2978 | movaps xmm0, xmm5
3151 | subsd xmm0, xmm1 2979 | subsd xmm0, xmm1
3152 | ret 2980 | ret
3153 |.else
3154 |// Args/ret on x87 stack (y on top). No xmm registers modified.
3155 |// Caveat: needs 3 slots on x87 stack! RC (eax) modified!
3156 | fld st1
3157 | fdiv st1
3158 | fnstcw word [esp+4]
3159 | mov ax, 0x0400
3160 | or ax, [esp+4]
3161 | and ax, 0xf7ff
3162 | mov [esp+6], ax
3163 | fldcw word [esp+6]
3164 | frndint
3165 | fldcw word [esp+4]
3166 | fmulp st1
3167 | fsubp st1
3168 | ret
3169 |.endif
3170 | 2981 |
3171 |// FP log2(x). Called by math.log(x, base). 2982 |// FP log2(x). Called by math.log(x, base).
3172 |->vm_log2: 2983 |->vm_log2:
@@ -3217,105 +3028,15 @@ static void build_subroutines(BuildCtx *ctx)
3217 | 3028 |
3218 |// Generic power function x^y. Called by BC_POW, math.pow fast function, 3029 |// Generic power function x^y. Called by BC_POW, math.pow fast function,
3219 |// and vm_arith. 3030 |// and vm_arith.
3220 |// Args/ret on x87 stack (y on top). RC (eax) modified.
3221 |// Caveat: needs 3 slots on x87 stack!
3222 |->vm_pow:
3223 |.if not SSE
3224 | fist dword [esp+4] // Store/reload int before comparison.
3225 | fild dword [esp+4] // Integral exponent used in vm_powi.
3226 | fucomip st1
3227 | jnz >8 // Branch for FP exponents.
3228 | jp >9 // Branch for NaN exponent.
3229 | fpop // Pop y and fallthrough to vm_powi.
3230 |
3231 |// FP/int power function x^i. Arg1/ret on x87 stack.
3232 |// Arg2 (int) on C stack. RC (eax) modified.
3233 |// Caveat: needs 2 slots on x87 stack!
3234 | mov eax, [esp+4]
3235 | cmp eax, 1; jle >6 // i<=1?
3236 | // Now 1 < (unsigned)i <= 0x80000000.
3237 |1: // Handle leading zeros.
3238 | test eax, 1; jnz >2
3239 | fmul st0
3240 | shr eax, 1
3241 | jmp <1
3242 |2:
3243 | shr eax, 1; jz >5
3244 | fdup
3245 |3: // Handle trailing bits.
3246 | fmul st0
3247 | shr eax, 1; jz >4
3248 | jnc <3
3249 | fmul st1, st0
3250 | jmp <3
3251 |4:
3252 | fmulp st1
3253 |5:
3254 | ret
3255 |6:
3256 | je <5 // x^1 ==> x
3257 | jb >7
3258 | fld1; fdivrp st1
3259 | neg eax
3260 | cmp eax, 1; je <5 // x^-1 ==> 1/x
3261 | jmp <1 // x^-i ==> (1/x)^i
3262 |7:
3263 | fpop; fld1 // x^0 ==> 1
3264 | ret
3265 |
3266 |8: // FP/FP power function x^y.
3267 | fst dword [esp+4]
3268 | fxch
3269 | fst dword [esp+8]
3270 | mov eax, [esp+4]; shl eax, 1
3271 | cmp eax, 0xff000000; je >2 // x^+-Inf?
3272 | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
3273 | cmp eax, 0xff000000; je >4 // +-Inf^y?
3274 | fyl2x
3275 | jmp ->vm_exp2raw
3276 |
3277 |9: // Handle x^NaN.
3278 | fld1
3279 | fucomip st2
3280 | je >1 // 1^NaN ==> 1
3281 | fxch // x^NaN ==> NaN
3282 |1:
3283 | fpop
3284 | ret
3285 |
3286 |2: // Handle x^+-Inf.
3287 | fabs
3288 | fld1
3289 | fucomip st1
3290 | je >3 // +-1^+-Inf ==> 1
3291 | fpop; fabs; fldz; mov eax, 0; setc al
3292 | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0
3293 | fxch
3294 |3:
3295 | fpop1; fabs
3296 | ret
3297 |
3298 |4: // Handle +-0^y or +-Inf^y.
3299 | cmp dword [esp+4], 0; jge <3 // y >= 0, x^y ==> |x|
3300 | fpop; fpop
3301 | test eax, eax; jz >5 // y < 0, +-0^y ==> +Inf
3302 | fldz // y < 0, +-Inf^y ==> 0
3303 | ret
3304 |5:
3305 | mov dword [esp+4], 0x7f800000 // Return +Inf.
3306 | fld dword [esp+4]
3307 | ret
3308 |.endif
3309 |
3310 |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified. 3031 |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified.
3311 |// Needs 16 byte scratch area for x86. Also called from JIT code. 3032 |// Needs 16 byte scratch area for x86. Also called from JIT code.
3312 |->vm_pow_sse: 3033 |->vm_pow_sse:
3313 | cvtsd2si eax, xmm1 3034 | cvttsd2si eax, xmm1
3314 | cvtsi2sd xmm2, eax 3035 | cvtsi2sd xmm2, eax
3315 | ucomisd xmm1, xmm2 3036 | ucomisd xmm1, xmm2
3316 | jnz >8 // Branch for FP exponents. 3037 | jnz >8 // Branch for FP exponents.
3317 | jp >9 // Branch for NaN exponent. 3038 | jp >9 // Branch for NaN exponent.
3318 | // Fallthrough to vm_powi_sse. 3039 | // Fallthrough.
3319 | 3040 |
3320 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. 3041 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
3321 |->vm_powi_sse: 3042 |->vm_powi_sse:
@@ -3437,8 +3158,8 @@ static void build_subroutines(BuildCtx *ctx)
3437 | .else 3158 | .else
3438 | .define fpmop, CARG1d 3159 | .define fpmop, CARG1d
3439 | .endif 3160 | .endif
3440 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil 3161 | cmp fpmop, 1; jb ->vm_floor_sse; je ->vm_ceil_sse
3441 | cmp fpmop, 3; jb ->vm_trunc; ja >2 3162 | cmp fpmop, 3; jb ->vm_trunc_sse; ja >2
3442 | sqrtsd xmm0, xmm0; ret 3163 | sqrtsd xmm0, xmm0; ret
3443 |2: 3164 |2:
3444 | .if X64WIN 3165 | .if X64WIN
@@ -3478,14 +3199,13 @@ static void build_subroutines(BuildCtx *ctx)
3478 | ret 3199 | ret
3479 |.else // x86 calling convention. 3200 |.else // x86 calling convention.
3480 | .define fpmop, eax 3201 | .define fpmop, eax
3481 |.if SSE
3482 | mov fpmop, [esp+12] 3202 | mov fpmop, [esp+12]
3483 | movsd xmm0, qword [esp+4] 3203 | movsd xmm0, qword [esp+4]
3484 | cmp fpmop, 1; je >1; ja >2 3204 | cmp fpmop, 1; je >1; ja >2
3485 | call ->vm_floor; jmp >7 3205 | call ->vm_floor_sse; jmp >7
3486 |1: ; call ->vm_ceil; jmp >7 3206 |1: ; call ->vm_ceil_sse; jmp >7
3487 |2: ; cmp fpmop, 3; je >1; ja >2 3207 |2: ; cmp fpmop, 3; je >1; ja >2
3488 | call ->vm_trunc; jmp >7 3208 | call ->vm_trunc_sse; jmp >7
3489 |1: 3209 |1:
3490 | sqrtsd xmm0, xmm0 3210 | sqrtsd xmm0, xmm0
3491 |7: 3211 |7:
@@ -3503,23 +3223,6 @@ static void build_subroutines(BuildCtx *ctx)
3503 |2: ; cmp fpmop, 11; je >1; ja >9 3223 |2: ; cmp fpmop, 11; je >1; ja >9
3504 | fcos; ret 3224 | fcos; ret
3505 |1: ; fptan; fpop; ret 3225 |1: ; fptan; fpop; ret
3506 |.else
3507 | mov fpmop, [esp+12]
3508 | fld qword [esp+4]
3509 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
3510 | cmp fpmop, 3; jb ->vm_trunc; ja >2
3511 | fsqrt; ret
3512 |2: ; cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87
3513 | cmp fpmop, 7; je >1; ja >2
3514 | fldln2; fxch; fyl2x; ret
3515 |1: ; fld1; fxch; fyl2x; ret
3516 |2: ; cmp fpmop, 9; je >1; ja >2
3517 | fldlg2; fxch; fyl2x; ret
3518 |1: ; fsin; ret
3519 |2: ; cmp fpmop, 11; je >1; ja >9
3520 | fcos; ret
3521 |1: ; fptan; fpop; ret
3522 |.endif
3523 |.endif 3226 |.endif
3524 |9: ; int3 // Bad fpm. 3227 |9: ; int3 // Bad fpm.
3525 |.endif 3228 |.endif
@@ -3541,7 +3244,7 @@ static void build_subroutines(BuildCtx *ctx)
3541 |2: ; cmp foldop, 3; je >1; ja >2 3244 |2: ; cmp foldop, 3; je >1; ja >2
3542 | mulsd xmm0, xmm1; ret 3245 | mulsd xmm0, xmm1; ret
3543 |1: ; divsd xmm0, xmm1; ret 3246 |1: ; divsd xmm0, xmm1; ret
3544 |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow 3247 |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow_sse
3545 | cmp foldop, 7; je >1; ja >2 3248 | cmp foldop, 7; je >1; ja >2
3546 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret 3249 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret
3547 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret 3250 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret
@@ -3574,7 +3277,7 @@ static void build_subroutines(BuildCtx *ctx)
3574 |1: ; maxsd xmm0, xmm1; ret 3277 |1: ; maxsd xmm0, xmm1; ret
3575 |9: ; int3 // Bad op. 3278 |9: ; int3 // Bad op.
3576 | 3279 |
3577 |.elif SSE // x86 calling convention with SSE ops. 3280 |.else // x86 calling convention.
3578 | 3281 |
3579 | .define foldop, eax 3282 | .define foldop, eax
3580 | mov foldop, [esp+20] 3283 | mov foldop, [esp+20]
@@ -3593,7 +3296,7 @@ static void build_subroutines(BuildCtx *ctx)
3593 |2: ; cmp foldop, 5 3296 |2: ; cmp foldop, 5
3594 | je >1; ja >2 3297 | je >1; ja >2
3595 | call ->vm_mod; jmp <7 3298 | call ->vm_mod; jmp <7
3596 |1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area. 3299 |1: ; pop edx; call ->vm_pow_sse; push edx; jmp <7 // Writes to scratch area.
3597 |2: ; cmp foldop, 7; je >1; ja >2 3300 |2: ; cmp foldop, 7; je >1; ja >2
3598 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7 3301 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7
3599 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7 3302 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7
@@ -3608,29 +3311,6 @@ static void build_subroutines(BuildCtx *ctx)
3608 |1: ; maxsd xmm0, xmm1; jmp <7 3311 |1: ; maxsd xmm0, xmm1; jmp <7
3609 |9: ; int3 // Bad op. 3312 |9: ; int3 // Bad op.
3610 | 3313 |
3611 |.else // x86 calling convention with x87 ops.
3612 |
3613 | mov eax, [esp+20]
3614 | fld qword [esp+4]
3615 | fld qword [esp+12]
3616 | cmp eax, 1; je >1; ja >2
3617 | faddp st1; ret
3618 |1: ; fsubp st1; ret
3619 |2: ; cmp eax, 3; je >1; ja >2
3620 | fmulp st1; ret
3621 |1: ; fdivp st1; ret
3622 |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow
3623 | cmp eax, 7; je >1; ja >2
3624 | fpop; fchs; ret
3625 |1: ; fpop; fabs; ret
3626 |2: ; cmp eax, 9; je >1; ja >2
3627 | fpatan; ret
3628 |1: ; fxch; fscale; fpop1; ret
3629 |2: ; cmp eax, 11; je >1; ja >9
3630 | fucomi st1; fcmovnbe st1; fpop1; ret
3631 |1: ; fucomi st1; fcmovbe st1; fpop1; ret
3632 |9: ; int3 // Bad op.
3633 |
3634 |.endif 3314 |.endif
3635 | 3315 |
3636 |//----------------------------------------------------------------------- 3316 |//-----------------------------------------------------------------------
@@ -3943,19 +3623,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3943 | // RA is a number. 3623 | // RA is a number.
3944 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp 3624 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp
3945 | // RA is a number, RD is an integer. 3625 | // RA is a number, RD is an integer.
3946 |.if SSE
3947 | cvtsi2sd xmm0, dword [BASE+RD*8] 3626 | cvtsi2sd xmm0, dword [BASE+RD*8]
3948 | jmp >2 3627 | jmp >2
3949 |.else
3950 | fld qword [BASE+RA*8]
3951 | fild dword [BASE+RD*8]
3952 | jmp >3
3953 |.endif
3954 | 3628 |
3955 |8: // RA is an integer, RD is not an integer. 3629 |8: // RA is an integer, RD is not an integer.
3956 | ja ->vmeta_comp 3630 | ja ->vmeta_comp
3957 | // RA is an integer, RD is a number. 3631 | // RA is an integer, RD is a number.
3958 |.if SSE
3959 | cvtsi2sd xmm1, dword [BASE+RA*8] 3632 | cvtsi2sd xmm1, dword [BASE+RA*8]
3960 | movsd xmm0, qword [BASE+RD*8] 3633 | movsd xmm0, qword [BASE+RD*8]
3961 | add PC, 4 3634 | add PC, 4
@@ -3963,29 +3636,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3963 | jmp_comp jbe, ja, jb, jae, <9 3636 | jmp_comp jbe, ja, jb, jae, <9
3964 | jmp <6 3637 | jmp <6
3965 |.else 3638 |.else
3966 | fild dword [BASE+RA*8]
3967 | jmp >2
3968 |.endif
3969 |.else
3970 | checknum RA, ->vmeta_comp 3639 | checknum RA, ->vmeta_comp
3971 | checknum RD, ->vmeta_comp 3640 | checknum RD, ->vmeta_comp
3972 |.endif 3641 |.endif
3973 |.if SSE
3974 |1: 3642 |1:
3975 | movsd xmm0, qword [BASE+RD*8] 3643 | movsd xmm0, qword [BASE+RD*8]
3976 |2: 3644 |2:
3977 | add PC, 4 3645 | add PC, 4
3978 | ucomisd xmm0, qword [BASE+RA*8] 3646 | ucomisd xmm0, qword [BASE+RA*8]
3979 |3: 3647 |3:
3980 |.else
3981 |1:
3982 | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A.
3983 |2:
3984 | fld qword [BASE+RD*8]
3985 |3:
3986 | add PC, 4
3987 | fcomparepp
3988 |.endif
3989 | // Unordered: all of ZF CF PF set, ordered: PF clear. 3648 | // Unordered: all of ZF CF PF set, ordered: PF clear.
3990 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. 3649 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
3991 |.if DUALNUM 3650 |.if DUALNUM
@@ -4025,43 +3684,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4025 | // RD is a number. 3684 | // RD is a number.
4026 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 3685 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5
4027 | // RD is a number, RA is an integer. 3686 | // RD is a number, RA is an integer.
4028 |.if SSE
4029 | cvtsi2sd xmm0, dword [BASE+RA*8] 3687 | cvtsi2sd xmm0, dword [BASE+RA*8]
4030 |.else
4031 | fild dword [BASE+RA*8]
4032 |.endif
4033 | jmp >2 3688 | jmp >2
4034 | 3689 |
4035 |8: // RD is an integer, RA is not an integer. 3690 |8: // RD is an integer, RA is not an integer.
4036 | ja >5 3691 | ja >5
4037 | // RD is an integer, RA is a number. 3692 | // RD is an integer, RA is a number.
4038 |.if SSE
4039 | cvtsi2sd xmm0, dword [BASE+RD*8] 3693 | cvtsi2sd xmm0, dword [BASE+RD*8]
4040 | ucomisd xmm0, qword [BASE+RA*8] 3694 | ucomisd xmm0, qword [BASE+RA*8]
4041 |.else
4042 | fild dword [BASE+RD*8]
4043 | fld qword [BASE+RA*8]
4044 |.endif
4045 | jmp >4 3695 | jmp >4
4046 | 3696 |
4047 |.else 3697 |.else
4048 | cmp RB, LJ_TISNUM; jae >5 3698 | cmp RB, LJ_TISNUM; jae >5
4049 | checknum RA, >5 3699 | checknum RA, >5
4050 |.endif 3700 |.endif
4051 |.if SSE
4052 |1: 3701 |1:
4053 | movsd xmm0, qword [BASE+RA*8] 3702 | movsd xmm0, qword [BASE+RA*8]
4054 |2: 3703 |2:
4055 | ucomisd xmm0, qword [BASE+RD*8] 3704 | ucomisd xmm0, qword [BASE+RD*8]
4056 |4: 3705 |4:
4057 |.else
4058 |1:
4059 | fld qword [BASE+RA*8]
4060 |2:
4061 | fld qword [BASE+RD*8]
4062 |4:
4063 | fcomparepp
4064 |.endif
4065 iseqne_fp: 3706 iseqne_fp:
4066 if (vk) { 3707 if (vk) {
4067 | jp >2 // Unordered means not equal. 3708 | jp >2 // Unordered means not equal.
@@ -4184,39 +3825,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4184 | // RA is a number. 3825 | // RA is a number.
4185 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 3826 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1
4186 | // RA is a number, RD is an integer. 3827 | // RA is a number, RD is an integer.
4187 |.if SSE
4188 | cvtsi2sd xmm0, dword [KBASE+RD*8] 3828 | cvtsi2sd xmm0, dword [KBASE+RD*8]
4189 |.else
4190 | fild dword [KBASE+RD*8]
4191 |.endif
4192 | jmp >2 3829 | jmp >2
4193 | 3830 |
4194 |8: // RA is an integer, RD is a number. 3831 |8: // RA is an integer, RD is a number.
4195 |.if SSE
4196 | cvtsi2sd xmm0, dword [BASE+RA*8] 3832 | cvtsi2sd xmm0, dword [BASE+RA*8]
4197 | ucomisd xmm0, qword [KBASE+RD*8] 3833 | ucomisd xmm0, qword [KBASE+RD*8]
4198 |.else
4199 | fild dword [BASE+RA*8]
4200 | fld qword [KBASE+RD*8]
4201 |.endif
4202 | jmp >4 3834 | jmp >4
4203 |.else 3835 |.else
4204 | cmp RB, LJ_TISNUM; jae >3 3836 | cmp RB, LJ_TISNUM; jae >3
4205 |.endif 3837 |.endif
4206 |.if SSE
4207 |1: 3838 |1:
4208 | movsd xmm0, qword [KBASE+RD*8] 3839 | movsd xmm0, qword [KBASE+RD*8]
4209 |2: 3840 |2:
4210 | ucomisd xmm0, qword [BASE+RA*8] 3841 | ucomisd xmm0, qword [BASE+RA*8]
4211 |4: 3842 |4:
4212 |.else
4213 |1:
4214 | fld qword [KBASE+RD*8]
4215 |2:
4216 | fld qword [BASE+RA*8]
4217 |4:
4218 | fcomparepp
4219 |.endif
4220 goto iseqne_fp; 3843 goto iseqne_fp;
4221 case BC_ISEQP: case BC_ISNEP: 3844 case BC_ISEQP: case BC_ISNEP:
4222 vk = op == BC_ISEQP; 3845 vk = op == BC_ISEQP;
@@ -4267,6 +3890,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4267 | ins_next 3890 | ins_next
4268 break; 3891 break;
4269 3892
3893 case BC_ISTYPE:
3894 | ins_AD // RA = src, RD = -type
3895 | add RD, [BASE+RA*8+4]
3896 | jne ->vmeta_istype
3897 | ins_next
3898 break;
3899 case BC_ISNUM:
3900 | ins_AD // RA = src, RD = -(TISNUM-1)
3901 | checknum RA, ->vmeta_istype
3902 | ins_next
3903 break;
3904
4270 /* -- Unary ops --------------------------------------------------------- */ 3905 /* -- Unary ops --------------------------------------------------------- */
4271 3906
4272 case BC_MOV: 3907 case BC_MOV:
@@ -4310,16 +3945,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4310 |.else 3945 |.else
4311 | checknum RD, ->vmeta_unm 3946 | checknum RD, ->vmeta_unm
4312 |.endif 3947 |.endif
4313 |.if SSE
4314 | movsd xmm0, qword [BASE+RD*8] 3948 | movsd xmm0, qword [BASE+RD*8]
4315 | sseconst_sign xmm1, RDa 3949 | sseconst_sign xmm1, RDa
4316 | xorps xmm0, xmm1 3950 | xorps xmm0, xmm1
4317 | movsd qword [BASE+RA*8], xmm0 3951 | movsd qword [BASE+RA*8], xmm0
4318 |.else
4319 | fld qword [BASE+RD*8]
4320 | fchs
4321 | fstp qword [BASE+RA*8]
4322 |.endif
4323 |.if DUALNUM 3952 |.if DUALNUM
4324 | jmp <9 3953 | jmp <9
4325 |.else 3954 |.else
@@ -4335,15 +3964,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4335 |1: 3964 |1:
4336 | mov dword [BASE+RA*8+4], LJ_TISNUM 3965 | mov dword [BASE+RA*8+4], LJ_TISNUM
4337 | mov dword [BASE+RA*8], RD 3966 | mov dword [BASE+RA*8], RD
4338 |.elif SSE 3967 |.else
4339 | xorps xmm0, xmm0 3968 | xorps xmm0, xmm0
4340 | cvtsi2sd xmm0, dword STR:RD->len 3969 | cvtsi2sd xmm0, dword STR:RD->len
4341 |1: 3970 |1:
4342 | movsd qword [BASE+RA*8], xmm0 3971 | movsd qword [BASE+RA*8], xmm0
4343 |.else
4344 | fild dword STR:RD->len
4345 |1:
4346 | fstp qword [BASE+RA*8]
4347 |.endif 3972 |.endif
4348 | ins_next 3973 | ins_next
4349 |2: 3974 |2:
@@ -4361,11 +3986,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4361 | // Length of table returned in eax (RD). 3986 | // Length of table returned in eax (RD).
4362 |.if DUALNUM 3987 |.if DUALNUM
4363 | // Nothing to do. 3988 | // Nothing to do.
4364 |.elif SSE
4365 | cvtsi2sd xmm0, RD
4366 |.else 3989 |.else
4367 | mov ARG1, RD 3990 | cvtsi2sd xmm0, RD
4368 | fild ARG1
4369 |.endif 3991 |.endif
4370 | mov BASE, RB // Restore BASE. 3992 | mov BASE, RB // Restore BASE.
4371 | movzx RA, PC_RA 3993 | movzx RA, PC_RA
@@ -4380,7 +4002,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4380 4002
4381 /* -- Binary ops -------------------------------------------------------- */ 4003 /* -- Binary ops -------------------------------------------------------- */
4382 4004
4383 |.macro ins_arithpre, x87ins, sseins, ssereg 4005 |.macro ins_arithpre, sseins, ssereg
4384 | ins_ABC 4006 | ins_ABC
4385 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 4007 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
4386 ||switch (vk) { 4008 ||switch (vk) {
@@ -4389,37 +4011,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4389 | .if DUALNUM 4011 | .if DUALNUM
4390 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn 4012 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn
4391 | .endif 4013 | .endif
4392 | .if SSE 4014 | movsd xmm0, qword [BASE+RB*8]
4393 | movsd xmm0, qword [BASE+RB*8] 4015 | sseins ssereg, qword [KBASE+RC*8]
4394 | sseins ssereg, qword [KBASE+RC*8]
4395 | .else
4396 | fld qword [BASE+RB*8]
4397 | x87ins qword [KBASE+RC*8]
4398 | .endif
4399 || break; 4016 || break;
4400 ||case 1: 4017 ||case 1:
4401 | checknum RB, ->vmeta_arith_nv 4018 | checknum RB, ->vmeta_arith_nv
4402 | .if DUALNUM 4019 | .if DUALNUM
4403 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv 4020 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv
4404 | .endif 4021 | .endif
4405 | .if SSE 4022 | movsd xmm0, qword [KBASE+RC*8]
4406 | movsd xmm0, qword [KBASE+RC*8] 4023 | sseins ssereg, qword [BASE+RB*8]
4407 | sseins ssereg, qword [BASE+RB*8]
4408 | .else
4409 | fld qword [KBASE+RC*8]
4410 | x87ins qword [BASE+RB*8]
4411 | .endif
4412 || break; 4024 || break;
4413 ||default: 4025 ||default:
4414 | checknum RB, ->vmeta_arith_vv 4026 | checknum RB, ->vmeta_arith_vv
4415 | checknum RC, ->vmeta_arith_vv 4027 | checknum RC, ->vmeta_arith_vv
4416 | .if SSE 4028 | movsd xmm0, qword [BASE+RB*8]
4417 | movsd xmm0, qword [BASE+RB*8] 4029 | sseins ssereg, qword [BASE+RC*8]
4418 | sseins ssereg, qword [BASE+RC*8]
4419 | .else
4420 | fld qword [BASE+RB*8]
4421 | x87ins qword [BASE+RC*8]
4422 | .endif
4423 || break; 4030 || break;
4424 ||} 4031 ||}
4425 |.endmacro 4032 |.endmacro
@@ -4457,54 +4064,50 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4457 |.endmacro 4064 |.endmacro
4458 | 4065 |
4459 |.macro ins_arithpost 4066 |.macro ins_arithpost
4460 |.if SSE
4461 | movsd qword [BASE+RA*8], xmm0 4067 | movsd qword [BASE+RA*8], xmm0
4462 |.else
4463 | fstp qword [BASE+RA*8]
4464 |.endif
4465 |.endmacro 4068 |.endmacro
4466 | 4069 |
4467 |.macro ins_arith, x87ins, sseins 4070 |.macro ins_arith, sseins
4468 | ins_arithpre x87ins, sseins, xmm0 4071 | ins_arithpre sseins, xmm0
4469 | ins_arithpost 4072 | ins_arithpost
4470 | ins_next 4073 | ins_next
4471 |.endmacro 4074 |.endmacro
4472 | 4075 |
4473 |.macro ins_arith, intins, x87ins, sseins 4076 |.macro ins_arith, intins, sseins
4474 |.if DUALNUM 4077 |.if DUALNUM
4475 | ins_arithdn intins 4078 | ins_arithdn intins
4476 |.else 4079 |.else
4477 | ins_arith, x87ins, sseins 4080 | ins_arith, sseins
4478 |.endif 4081 |.endif
4479 |.endmacro 4082 |.endmacro
4480 4083
4481 | // RA = dst, RB = src1 or num const, RC = src2 or num const 4084 | // RA = dst, RB = src1 or num const, RC = src2 or num const
4482 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 4085 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
4483 | ins_arith add, fadd, addsd 4086 | ins_arith add, addsd
4484 break; 4087 break;
4485 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 4088 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
4486 | ins_arith sub, fsub, subsd 4089 | ins_arith sub, subsd
4487 break; 4090 break;
4488 case BC_MULVN: case BC_MULNV: case BC_MULVV: 4091 case BC_MULVN: case BC_MULNV: case BC_MULVV:
4489 | ins_arith imul, fmul, mulsd 4092 | ins_arith imul, mulsd
4490 break; 4093 break;
4491 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 4094 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
4492 | ins_arith fdiv, divsd 4095 | ins_arith divsd
4493 break; 4096 break;
4494 case BC_MODVN: 4097 case BC_MODVN:
4495 | ins_arithpre fld, movsd, xmm1 4098 | ins_arithpre movsd, xmm1
4496 |->BC_MODVN_Z: 4099 |->BC_MODVN_Z:
4497 | call ->vm_mod 4100 | call ->vm_mod
4498 | ins_arithpost 4101 | ins_arithpost
4499 | ins_next 4102 | ins_next
4500 break; 4103 break;
4501 case BC_MODNV: case BC_MODVV: 4104 case BC_MODNV: case BC_MODVV:
4502 | ins_arithpre fld, movsd, xmm1 4105 | ins_arithpre movsd, xmm1
4503 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 4106 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
4504 break; 4107 break;
4505 case BC_POW: 4108 case BC_POW:
4506 | ins_arithpre fld, movsd, xmm1 4109 | ins_arithpre movsd, xmm1
4507 | call ->vm_pow 4110 | call ->vm_pow_sse
4508 | ins_arithpost 4111 | ins_arithpost
4509 | ins_next 4112 | ins_next
4510 break; 4113 break;
@@ -4573,25 +4176,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4573 | movsx RD, RDW 4176 | movsx RD, RDW
4574 | mov dword [BASE+RA*8+4], LJ_TISNUM 4177 | mov dword [BASE+RA*8+4], LJ_TISNUM
4575 | mov dword [BASE+RA*8], RD 4178 | mov dword [BASE+RA*8], RD
4576 |.elif SSE 4179 |.else
4577 | movsx RD, RDW // Sign-extend literal. 4180 | movsx RD, RDW // Sign-extend literal.
4578 | cvtsi2sd xmm0, RD 4181 | cvtsi2sd xmm0, RD
4579 | movsd qword [BASE+RA*8], xmm0 4182 | movsd qword [BASE+RA*8], xmm0
4580 |.else
4581 | fild PC_RD // Refetch signed RD from instruction.
4582 | fstp qword [BASE+RA*8]
4583 |.endif 4183 |.endif
4584 | ins_next 4184 | ins_next
4585 break; 4185 break;
4586 case BC_KNUM: 4186 case BC_KNUM:
4587 | ins_AD // RA = dst, RD = num const 4187 | ins_AD // RA = dst, RD = num const
4588 |.if SSE
4589 | movsd xmm0, qword [KBASE+RD*8] 4188 | movsd xmm0, qword [KBASE+RD*8]
4590 | movsd qword [BASE+RA*8], xmm0 4189 | movsd qword [BASE+RA*8], xmm0
4591 |.else
4592 | fld qword [KBASE+RD*8]
4593 | fstp qword [BASE+RA*8]
4594 |.endif
4595 | ins_next 4190 | ins_next
4596 break; 4191 break;
4597 case BC_KPRI: 4192 case BC_KPRI:
@@ -4698,18 +4293,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4698 case BC_USETN: 4293 case BC_USETN:
4699 | ins_AD // RA = upvalue #, RD = num const 4294 | ins_AD // RA = upvalue #, RD = num const
4700 | mov LFUNC:RB, [BASE-8] 4295 | mov LFUNC:RB, [BASE-8]
4701 |.if SSE
4702 | movsd xmm0, qword [KBASE+RD*8] 4296 | movsd xmm0, qword [KBASE+RD*8]
4703 |.else
4704 | fld qword [KBASE+RD*8]
4705 |.endif
4706 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] 4297 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
4707 | mov RA, UPVAL:RB->v 4298 | mov RA, UPVAL:RB->v
4708 |.if SSE
4709 | movsd qword [RA], xmm0 4299 | movsd qword [RA], xmm0
4710 |.else
4711 | fstp qword [RA]
4712 |.endif
4713 | ins_next 4300 | ins_next
4714 break; 4301 break;
4715 case BC_USETP: 4302 case BC_USETP:
@@ -4863,18 +4450,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4863 |.else 4450 |.else
4864 | // Convert number to int and back and compare. 4451 | // Convert number to int and back and compare.
4865 | checknum RC, >5 4452 | checknum RC, >5
4866 |.if SSE
4867 | movsd xmm0, qword [BASE+RC*8] 4453 | movsd xmm0, qword [BASE+RC*8]
4868 | cvtsd2si RC, xmm0 4454 | cvttsd2si RC, xmm0
4869 | cvtsi2sd xmm1, RC 4455 | cvtsi2sd xmm1, RC
4870 | ucomisd xmm0, xmm1 4456 | ucomisd xmm0, xmm1
4871 |.else
4872 | fld qword [BASE+RC*8]
4873 | fist ARG1
4874 | fild ARG1
4875 | fcomparepp
4876 | mov RC, ARG1
4877 |.endif
4878 | jne ->vmeta_tgetv // Generic numeric key? Use fallback. 4457 | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
4879 |.endif 4458 |.endif
4880 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4459 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
@@ -4998,6 +4577,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4998 | mov dword [BASE+RA*8+4], LJ_TNIL 4577 | mov dword [BASE+RA*8+4], LJ_TNIL
4999 | jmp <1 4578 | jmp <1
5000 break; 4579 break;
4580 case BC_TGETR:
4581 | ins_ABC // RA = dst, RB = table, RC = key
4582 | mov TAB:RB, [BASE+RB*8]
4583 |.if DUALNUM
4584 | mov RC, dword [BASE+RC*8]
4585 |.else
4586 | cvttsd2si RC, qword [BASE+RC*8]
4587 |.endif
4588 | cmp RC, TAB:RB->asize
4589 | jae ->vmeta_tgetr // Not in array part? Use fallback.
4590 | shl RC, 3
4591 | add RC, TAB:RB->array
4592 | // Get array slot.
4593 |->BC_TGETR_Z:
4594 |.if X64
4595 | mov RBa, [RC]
4596 | mov [BASE+RA*8], RBa
4597 |.else
4598 | mov RB, [RC]
4599 | mov RC, [RC+4]
4600 | mov [BASE+RA*8], RB
4601 | mov [BASE+RA*8+4], RC
4602 |.endif
4603 |->BC_TGETR2_Z:
4604 | ins_next
4605 break;
5001 4606
5002 case BC_TSETV: 4607 case BC_TSETV:
5003 | ins_ABC // RA = src, RB = table, RC = key 4608 | ins_ABC // RA = src, RB = table, RC = key
@@ -5011,18 +4616,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5011 |.else 4616 |.else
5012 | // Convert number to int and back and compare. 4617 | // Convert number to int and back and compare.
5013 | checknum RC, >5 4618 | checknum RC, >5
5014 |.if SSE
5015 | movsd xmm0, qword [BASE+RC*8] 4619 | movsd xmm0, qword [BASE+RC*8]
5016 | cvtsd2si RC, xmm0 4620 | cvttsd2si RC, xmm0
5017 | cvtsi2sd xmm1, RC 4621 | cvtsi2sd xmm1, RC
5018 | ucomisd xmm0, xmm1 4622 | ucomisd xmm0, xmm1
5019 |.else
5020 | fld qword [BASE+RC*8]
5021 | fist ARG1
5022 | fild ARG1
5023 | fcomparepp
5024 | mov RC, ARG1
5025 |.endif
5026 | jne ->vmeta_tsetv // Generic numeric key? Use fallback. 4623 | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
5027 |.endif 4624 |.endif
5028 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4625 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
@@ -5192,6 +4789,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5192 | movzx RA, PC_RA // Restore RA. 4789 | movzx RA, PC_RA // Restore RA.
5193 | jmp <2 4790 | jmp <2
5194 break; 4791 break;
4792 case BC_TSETR:
4793 | ins_ABC // RA = src, RB = table, RC = key
4794 | mov TAB:RB, [BASE+RB*8]
4795 |.if DUALNUM
4796 | mov RC, dword [BASE+RC*8]
4797 |.else
4798 | cvttsd2si RC, qword [BASE+RC*8]
4799 |.endif
4800 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
4801 | jnz >7
4802 |2:
4803 | cmp RC, TAB:RB->asize
4804 | jae ->vmeta_tsetr
4805 | shl RC, 3
4806 | add RC, TAB:RB->array
4807 | // Set array slot.
4808 |->BC_TSETR_Z:
4809 |.if X64
4810 | mov RBa, [BASE+RA*8]
4811 | mov [RC], RBa
4812 |.else
4813 | mov RB, [BASE+RA*8+4]
4814 | mov RA, [BASE+RA*8]
4815 | mov [RC+4], RB
4816 | mov [RC], RA
4817 |.endif
4818 | ins_next
4819 |
4820 |7: // Possible table write barrier for the value. Skip valiswhite check.
4821 | barrierback TAB:RB, RA
4822 | movzx RA, PC_RA // Restore RA.
4823 | jmp <2
4824 break;
5195 4825
5196 case BC_TSETM: 4826 case BC_TSETM:
5197 | ins_AD // RA = base (table at base-1), RD = num const (start index) 4827 | ins_AD // RA = base (table at base-1), RD = num const (start index)
@@ -5386,10 +5016,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5386 |.if DUALNUM 5016 |.if DUALNUM
5387 | mov dword [BASE+RA*8+4], LJ_TISNUM 5017 | mov dword [BASE+RA*8+4], LJ_TISNUM
5388 | mov dword [BASE+RA*8], RC 5018 | mov dword [BASE+RA*8], RC
5389 |.elif SSE
5390 | cvtsi2sd xmm0, RC
5391 |.else 5019 |.else
5392 | fild dword [BASE+RA*8-8] 5020 | cvtsi2sd xmm0, RC
5393 |.endif 5021 |.endif
5394 | // Copy array slot to returned value. 5022 | // Copy array slot to returned value.
5395 |.if X64 5023 |.if X64
@@ -5405,10 +5033,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5405 | // Return array index as a numeric key. 5033 | // Return array index as a numeric key.
5406 |.if DUALNUM 5034 |.if DUALNUM
5407 | // See above. 5035 | // See above.
5408 |.elif SSE
5409 | movsd qword [BASE+RA*8], xmm0
5410 |.else 5036 |.else
5411 | fstp qword [BASE+RA*8] 5037 | movsd qword [BASE+RA*8], xmm0
5412 |.endif 5038 |.endif
5413 | mov [BASE+RA*8-8], RC // Update control var. 5039 | mov [BASE+RA*8-8], RC // Update control var.
5414 |2: 5040 |2:
@@ -5421,9 +5047,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5421 | 5047 |
5422 |4: // Skip holes in array part. 5048 |4: // Skip holes in array part.
5423 | add RC, 1 5049 | add RC, 1
5424 |.if not (DUALNUM or SSE)
5425 | mov [BASE+RA*8-8], RC
5426 |.endif
5427 | jmp <1 5050 | jmp <1
5428 | 5051 |
5429 |5: // Traverse hash part. 5052 |5: // Traverse hash part.
@@ -5757,7 +5380,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5757 if (!vk) { 5380 if (!vk) {
5758 | cmp RB, LJ_TISNUM; jae ->vmeta_for 5381 | cmp RB, LJ_TISNUM; jae ->vmeta_for
5759 } 5382 }
5760 |.if SSE
5761 | movsd xmm0, qword FOR_IDX 5383 | movsd xmm0, qword FOR_IDX
5762 | movsd xmm1, qword FOR_STOP 5384 | movsd xmm1, qword FOR_STOP
5763 if (vk) { 5385 if (vk) {
@@ -5770,22 +5392,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5770 | ucomisd xmm1, xmm0 5392 | ucomisd xmm1, xmm0
5771 |1: 5393 |1:
5772 | movsd qword FOR_EXT, xmm0 5394 | movsd qword FOR_EXT, xmm0
5773 |.else
5774 | fld qword FOR_STOP
5775 | fld qword FOR_IDX
5776 if (vk) {
5777 | fadd qword FOR_STEP // nidx = idx + step
5778 | fst qword FOR_IDX
5779 | fst qword FOR_EXT
5780 | test RB, RB; js >1
5781 } else {
5782 | fst qword FOR_EXT
5783 | jl >1
5784 }
5785 | fxch // Swap lim/(n)idx if step non-negative.
5786 |1:
5787 | fcomparepp
5788 |.endif
5789 if (op == BC_FORI) { 5395 if (op == BC_FORI) {
5790 |.if DUALNUM 5396 |.if DUALNUM
5791 | jnb <7 5397 | jnb <7
@@ -5813,11 +5419,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5813 |2: 5419 |2:
5814 | ins_next 5420 | ins_next
5815 |.endif 5421 |.endif
5816 |.if SSE 5422 |
5817 |3: // Invert comparison if step is negative. 5423 |3: // Invert comparison if step is negative.
5818 | ucomisd xmm0, xmm1 5424 | ucomisd xmm0, xmm1
5819 | jmp <1 5425 | jmp <1
5820 |.endif
5821 break; 5426 break;
5822 5427
5823 case BC_ITERL: 5428 case BC_ITERL:
@@ -5855,7 +5460,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5855 | ins_A // RA = base, RD = target (loop extent) 5460 | ins_A // RA = base, RD = target (loop extent)
5856 | // Note: RA/RD is only used by trace recorder to determine scope/extent 5461 | // Note: RA/RD is only used by trace recorder to determine scope/extent
5857 | // This opcode does NOT jump, it's only purpose is to detect a hot loop. 5462 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
5858 |.if JIT 5463 |.if JIT
5859 | hotloop RB 5464 | hotloop RB
5860 |.endif 5465 |.endif
5861 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. 5466 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
@@ -5874,7 +5479,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5874 | mov RDa, TRACE:RD->mcode 5479 | mov RDa, TRACE:RD->mcode
5875 | mov L:RB, SAVE_L 5480 | mov L:RB, SAVE_L
5876 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE 5481 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
5877 | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB 5482 | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
5878 | // Save additional callee-save registers only used in compiled code. 5483 | // Save additional callee-save registers only used in compiled code.
5879 |.if X64WIN 5484 |.if X64WIN
5880 | mov TMPQ, r12 5485 | mov TMPQ, r12
@@ -6041,9 +5646,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
6041 | // (lua_State *L, lua_CFunction f) 5646 | // (lua_State *L, lua_CFunction f)
6042 | call aword [DISPATCH+DISPATCH_GL(wrapf)] 5647 | call aword [DISPATCH+DISPATCH_GL(wrapf)]
6043 } 5648 }
6044 | set_vmstate INTERP
6045 | // nresults returned in eax (RD). 5649 | // nresults returned in eax (RD).
6046 | mov BASE, L:RB->base 5650 | mov BASE, L:RB->base
5651 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
5652 | set_vmstate INTERP
6047 | lea RA, [BASE+RD*8] 5653 | lea RA, [BASE+RD*8]
6048 | neg RA 5654 | neg RA
6049 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 5655 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8