aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Makefile22
-rw-r--r--src/Makefile.dep186
-rw-r--r--src/host/buildvm.c15
-rw-r--r--src/host/buildvm_lib.c61
-rw-r--r--src/host/buildvm_libbc.h30
-rw-r--r--src/host/genlibbc.lua197
-rw-r--r--src/jit/bc.lua19
-rw-r--r--src/jit/bcsave.lua8
-rw-r--r--src/jit/dis_arm.lua18
-rw-r--r--src/jit/dis_mips.lua30
-rw-r--r--src/jit/dis_mipsel.lua15
-rw-r--r--src/jit/dis_ppc.lua18
-rw-r--r--src/jit/dis_x64.lua15
-rw-r--r--src/jit/dis_x86.lua38
-rw-r--r--src/jit/dump.lua22
-rw-r--r--src/jit/v.lua12
-rw-r--r--src/lib_base.c41
-rw-r--r--src/lib_bit.c134
-rw-r--r--src/lib_ffi.c15
-rw-r--r--src/lib_io.c27
-rw-r--r--src/lib_jit.c35
-rw-r--r--src/lib_math.c7
-rw-r--r--src/lib_os.c37
-rw-r--r--src/lib_string.c445
-rw-r--r--src/lib_table.c140
-rw-r--r--src/lj_api.c25
-rw-r--r--src/lj_arch.h1
-rw-r--r--src/lj_asm.c425
-rw-r--r--src/lj_asm_arm.h422
-rw-r--r--src/lj_asm_mips.h349
-rw-r--r--src/lj_asm_ppc.h353
-rw-r--r--src/lj_asm_x86.h514
-rw-r--r--src/lj_bc.h4
-rw-r--r--src/lj_bcdump.h3
-rw-r--r--src/lj_bcread.c139
-rw-r--r--src/lj_bcwrite.c228
-rw-r--r--src/lj_buf.c222
-rw-r--r--src/lj_buf.h105
-rw-r--r--src/lj_carith.c76
-rw-r--r--src/lj_carith.h10
-rw-r--r--src/lj_ccall.c1
-rw-r--r--src/lj_cdata.c22
-rw-r--r--src/lj_cdata.h5
-rw-r--r--src/lj_clib.c13
-rw-r--r--src/lj_cparse.c52
-rw-r--r--src/lj_crecord.c219
-rw-r--r--src/lj_crecord.h7
-rw-r--r--src/lj_ctype.c15
-rw-r--r--src/lj_debug.c50
-rw-r--r--src/lj_debug.h2
-rw-r--r--src/lj_dispatch.c2
-rw-r--r--src/lj_dispatch.h13
-rw-r--r--src/lj_emit_arm.h16
-rw-r--r--src/lj_emit_mips.h16
-rw-r--r--src/lj_emit_ppc.h16
-rw-r--r--src/lj_emit_x86.h24
-rw-r--r--src/lj_err.c23
-rw-r--r--src/lj_errmsg.h5
-rw-r--r--src/lj_ffrecord.c389
-rw-r--r--src/lj_gc.c15
-rw-r--r--src/lj_gdbjit.c14
-rw-r--r--src/lj_ir.c5
-rw-r--r--src/lj_ir.h17
-rw-r--r--src/lj_ircall.h135
-rw-r--r--src/lj_jit.h17
-rw-r--r--src/lj_lex.c342
-rw-r--r--src/lj_lex.h17
-rw-r--r--src/lj_lib.c44
-rw-r--r--src/lj_lib.h5
-rw-r--r--src/lj_load.c4
-rw-r--r--src/lj_meta.c73
-rw-r--r--src/lj_meta.h1
-rw-r--r--src/lj_obj.c17
-rw-r--r--src/lj_obj.h22
-rw-r--r--src/lj_opt_fold.c268
-rw-r--r--src/lj_opt_loop.c5
-rw-r--r--src/lj_opt_narrow.c3
-rw-r--r--src/lj_opt_split.c131
-rw-r--r--src/lj_parse.c166
-rw-r--r--src/lj_record.c102
-rw-r--r--src/lj_state.c5
-rw-r--r--src/lj_str.c210
-rw-r--r--src/lj_str.h35
-rw-r--r--src/lj_strfmt.c549
-rw-r--r--src/lj_strfmt.h124
-rw-r--r--src/lj_tab.h2
-rw-r--r--src/lj_target_arm.h4
-rw-r--r--src/lj_target_mips.h3
-rw-r--r--src/lj_target_x86.h3
-rw-r--r--src/lj_vm.h4
-rw-r--r--src/ljamalg.c2
-rw-r--r--src/luaconf.h4
-rw-r--r--src/luajit.c13
-rw-r--r--src/luajit.h6
-rw-r--r--src/msvcbuild.bat1
-rw-r--r--src/vm_arm.dasc214
-rw-r--r--src/vm_mips.dasc252
-rw-r--r--src/vm_ppc.dasc242
-rw-r--r--src/vm_ppcspe.dasc6
-rw-r--r--src/vm_x86.dasc965
100 files changed, 5230 insertions, 4170 deletions
diff --git a/src/Makefile b/src/Makefile
index 84b3355d..fa5aed1c 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -42,13 +42,10 @@ CCOPT= -O2 -fomit-frame-pointer
42# 42#
43# Target-specific compiler options: 43# Target-specific compiler options:
44# 44#
45# x86 only: it's recommended to compile at least for i686. Better yet,
46# compile for an architecture that has SSE2, too (-msse -msse2).
47#
48# x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute 45# x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute
49# the binaries to a different machine you could also use: -march=native 46# the binaries to a different machine you could also use: -march=native
50# 47#
51CCOPT_x86= -march=i686 48CCOPT_x86= -march=i686 -msse -msse2 -mfpmath=sse
52CCOPT_x64= 49CCOPT_x64=
53CCOPT_arm= 50CCOPT_arm=
54CCOPT_ppc= 51CCOPT_ppc=
@@ -394,11 +391,6 @@ DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subs
394ifeq (Windows,$(TARGET_SYS)) 391ifeq (Windows,$(TARGET_SYS))
395 DASM_AFLAGS+= -D WIN 392 DASM_AFLAGS+= -D WIN
396endif 393endif
397ifeq (x86,$(TARGET_LJARCH))
398 ifneq (,$(findstring __SSE2__ 1,$(TARGET_TESTARCH)))
399 DASM_AFLAGS+= -D SSE
400 endif
401else
402ifeq (x64,$(TARGET_LJARCH)) 394ifeq (x64,$(TARGET_LJARCH))
403 DASM_ARCH= x86 395 DASM_ARCH= x86
404else 396else
@@ -423,7 +415,6 @@ ifeq (ppc,$(TARGET_LJARCH))
423endif 415endif
424endif 416endif
425endif 417endif
426endif
427 418
428DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS) 419DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS)
429DASM_DASC= vm_$(DASM_ARCH).dasc 420DASM_DASC= vm_$(DASM_ARCH).dasc
@@ -445,10 +436,11 @@ LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \
445 lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o 436 lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o
446LJLIB_C= $(LJLIB_O:.o=.c) 437LJLIB_C= $(LJLIB_O:.o=.c)
447 438
448LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o \ 439LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
449 lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \ 440 lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \
450 lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \ 441 lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \
451 lj_api.o lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \ 442 lj_strfmt.o lj_api.o \
443 lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
452 lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ 444 lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
453 lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \ 445 lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \
454 lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \ 446 lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \
@@ -568,6 +560,10 @@ amalg:
568clean: 560clean:
569 $(HOST_RM) $(ALL_RM) 561 $(HOST_RM) $(ALL_RM)
570 562
563libbc:
564 ./$(LUAJIT_T) host/genlibbc.lua -o host/buildvm_libbc.h $(LJLIB_C)
565 $(MAKE) all
566
571depend: 567depend:
572 @for file in $(ALL_HDRGEN); do \ 568 @for file in $(ALL_HDRGEN); do \
573 test -f $$file || touch $$file; \ 569 test -f $$file || touch $$file; \
@@ -582,7 +578,7 @@ depend:
582 test -s $$file || $(HOST_RM) $$file; \ 578 test -s $$file || $(HOST_RM) $$file; \
583 done 579 done
584 580
585.PHONY: default all amalg clean depend 581.PHONY: default all amalg clean libbc depend
586 582
587############################################################################## 583##############################################################################
588# Rules for generated files. 584# Rules for generated files.
diff --git a/src/Makefile.dep b/src/Makefile.dep
index 5d91723a..0ea0d98e 100644
--- a/src/Makefile.dep
+++ b/src/Makefile.dep
@@ -5,43 +5,47 @@ lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
5 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \ 5 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \
6 lj_tab.h lj_meta.h lj_state.h lj_ctype.h lj_cconv.h lj_bc.h lj_ff.h \ 6 lj_tab.h lj_meta.h lj_state.h lj_ctype.h lj_cconv.h lj_bc.h lj_ff.h \
7 lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \ 7 lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \
8 lj_lib.h lj_libdef.h 8 lj_strfmt.h lj_lib.h lj_libdef.h
9lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ 9lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
10 lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_lib.h lj_libdef.h 10 lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_strscan.h \
11 lj_strfmt.h lj_ctype.h lj_cdata.h lj_cconv.h lj_carith.h lj_ff.h \
12 lj_ffdef.h lj_lib.h lj_libdef.h
11lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 13lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
12 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \ 14 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \
13 lj_libdef.h 15 lj_libdef.h
14lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ 16lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
15 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h \ 17 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h \
16 lj_ctype.h lj_cparse.h lj_cdata.h lj_cconv.h lj_carith.h lj_ccall.h \ 18 lj_ctype.h lj_cparse.h lj_cdata.h lj_cconv.h lj_carith.h lj_ccall.h \
17 lj_ccallback.h lj_clib.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h 19 lj_ccallback.h lj_clib.h lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h \
20 lj_libdef.h
18lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h 21lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h
19lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ 22lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
20 lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_ff.h lj_ffdef.h \ 23 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_state.h \
21 lj_lib.h lj_libdef.h 24 lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
22lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \ 25lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \
23 lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \ 26 lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \
24 lj_bc.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_target.h \ 27 lj_state.h lj_bc.h lj_ctype.h lj_gc.h lj_ir.h lj_jit.h lj_ircall.h \
25 lj_target_*.h lj_dispatch.h lj_vm.h lj_vmevent.h lj_lib.h luajit.h \ 28 lj_iropt.h lj_target.h lj_target_*.h lj_dispatch.h lj_vm.h \
26 lj_libdef.h 29 lj_vmevent.h lj_lib.h luajit.h lj_libdef.h
27lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 30lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
28 lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_libdef.h 31 lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_libdef.h
29lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ 32lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
30 lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h 33 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_lib.h \
34 lj_libdef.h
31lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 35lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
32 lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h 36 lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h
33lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 37lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
34 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h \ 38 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
35 lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h lj_char.h \ 39 lj_tab.h lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h \
36 lj_lib.h lj_libdef.h 40 lj_char.h lj_strfmt.h lj_lib.h lj_libdef.h
37lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 41lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
38 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_lib.h \ 42 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
39 lj_libdef.h 43 lj_tab.h lj_lib.h lj_libdef.h
40lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h 44lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h
41lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 45lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
42 lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \ 46 lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
43 lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \ 47 lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \
44 lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h 48 lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h lj_strfmt.h
45lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 49lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
46 lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \ 50 lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \
47 lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \ 51 lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \
@@ -50,17 +54,20 @@ lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
50lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \ 54lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \
51 lj_bcdef.h 55 lj_bcdef.h
52lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 56lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
53 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_bc.h lj_ctype.h \ 57 lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_bc.h \
54 lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h 58 lj_ctype.h lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h \
59 lj_strfmt.h
55lj_bcwrite.o: lj_bcwrite.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 60lj_bcwrite.o: lj_bcwrite.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
56 lj_gc.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h lj_ir.h \ 61 lj_gc.h lj_buf.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h \
57 lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h 62 lj_ir.h lj_strfmt.h lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h
63lj_buf.o: lj_buf.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
64 lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_strfmt.h
58lj_carith.o: lj_carith.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 65lj_carith.o: lj_carith.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
59 lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ctype.h lj_cconv.h \ 66 lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ir.h lj_ctype.h \
60 lj_cdata.h lj_carith.h 67 lj_cconv.h lj_cdata.h lj_carith.h lj_strscan.h
61lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 68lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
62 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \ 69 lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h \
63 lj_cdata.h lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ 70 lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
64 lj_traceerr.h 71 lj_traceerr.h
65lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \ 72lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \
66 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_state.h lj_frame.h \ 73 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_state.h lj_frame.h \
@@ -71,79 +78,82 @@ lj_cconv.o: lj_cconv.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
71 lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \ 78 lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \
72 lj_ccallback.h 79 lj_ccallback.h
73lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 80lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
74 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \ 81 lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h
75 lj_cdata.h
76lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h 82lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h
77lj_clib.o: lj_clib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 83lj_clib.o: lj_clib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
78 lj_err.h lj_errmsg.h lj_tab.h lj_str.h lj_udata.h lj_ctype.h lj_cconv.h \ 84 lj_err.h lj_errmsg.h lj_tab.h lj_str.h lj_udata.h lj_ctype.h lj_cconv.h \
79 lj_cdata.h lj_clib.h 85 lj_cdata.h lj_clib.h lj_strfmt.h
80lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 86lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
81 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ctype.h lj_cparse.h lj_frame.h \ 87 lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_ctype.h lj_cparse.h \
82 lj_bc.h lj_vm.h lj_char.h lj_strscan.h 88 lj_frame.h lj_bc.h lj_vm.h lj_char.h lj_strscan.h lj_strfmt.h
83lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 89lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
84 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h \ 90 lj_err.h lj_errmsg.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_gc.h \
85 lj_gc.h lj_cdata.h lj_cparse.h lj_cconv.h lj_clib.h lj_ccall.h lj_ff.h \ 91 lj_cdata.h lj_cparse.h lj_cconv.h lj_carith.h lj_clib.h lj_ccall.h \
86 lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ 92 lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
87 lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \ 93 lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \
88 lj_crecord.h 94 lj_crecord.h lj_strfmt.h
89lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 95lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
90 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_ccallback.h 96 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_strfmt.h lj_ctype.h \
97 lj_ccallback.h
91lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 98lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
92 lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_state.h lj_frame.h \ 99 lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \
93 lj_bc.h lj_jit.h lj_ir.h 100 lj_state.h lj_frame.h lj_bc.h lj_strfmt.h lj_jit.h lj_ir.h
94lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 101lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
95 lj_err.h lj_errmsg.h lj_func.h lj_str.h lj_tab.h lj_meta.h lj_debug.h \ 102 lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_func.h lj_tab.h \
96 lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h lj_jit.h lj_ir.h \ 103 lj_meta.h lj_debug.h lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h \
97 lj_ccallback.h lj_ctype.h lj_gc.h lj_trace.h lj_dispatch.h lj_traceerr.h \ 104 lj_strfmt.h lj_jit.h lj_ir.h lj_ccallback.h lj_ctype.h lj_trace.h \
98 lj_vm.h luajit.h 105 lj_dispatch.h lj_traceerr.h lj_vm.h luajit.h
99lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \ 106lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \
100 lj_errmsg.h lj_debug.h lj_str.h lj_func.h lj_state.h lj_frame.h lj_bc.h \ 107 lj_errmsg.h lj_debug.h lj_str.h lj_func.h lj_state.h lj_frame.h lj_bc.h \
101 lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ 108 lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
102 lj_traceerr.h lj_vm.h 109 lj_traceerr.h lj_vm.h lj_strfmt.h
103lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 110lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
104 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \ 111 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \
105 lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ 112 lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
106 lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \ 113 lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \
107 lj_vm.h lj_strscan.h lj_recdef.h 114 lj_vm.h lj_strscan.h lj_strfmt.h lj_recdef.h
108lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 115lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
109 lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ 116 lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
110 lj_traceerr.h lj_vm.h 117 lj_traceerr.h lj_vm.h
111lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 118lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
112 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h \ 119 lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
113 lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h lj_jit.h \ 120 lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h \
114 lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h 121 lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h
115lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 122lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
116 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_jit.h \ 123 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_buf.h \
117 lj_ir.h lj_dispatch.h 124 lj_str.h lj_strfmt.h lj_jit.h lj_ir.h lj_dispatch.h
118lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 125lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
119 lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ 126 lj_buf.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
120 lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h lj_carith.h \ 127 lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h \
121 lj_vm.h lj_strscan.h lj_lib.h 128 lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h
122lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 129lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
123 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h lualib.h \ 130 lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h \
124 lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h 131 lualib.h lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h \
132 lj_strfmt.h
125lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \ 133lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \
126 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \ 134 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \
127 lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_lib.h 135 lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lex.h \
136 lj_bcdump.h lj_lib.h
128lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ 137lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
129 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_func.h lj_frame.h \ 138 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_func.h \
130 lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h 139 lj_frame.h lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h
131lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 140lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
132 lj_gc.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h lj_dispatch.h lj_bc.h \ 141 lj_gc.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h lj_dispatch.h lj_bc.h \
133 lj_traceerr.h lj_vm.h 142 lj_traceerr.h lj_vm.h
134lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 143lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
135 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ 144 lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_meta.h lj_frame.h \
136 lj_vm.h lj_strscan.h 145 lj_bc.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h
137lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h 146lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
138lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 147lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
139 lj_ir.h lj_jit.h lj_iropt.h 148 lj_ir.h lj_jit.h lj_iropt.h
140lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 149lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
141 lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ 150 lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h \
142 lj_bc.h lj_traceerr.h lj_ctype.h lj_gc.h lj_carith.h lj_vm.h \ 151 lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h \
143 lj_strscan.h lj_folddef.h 152 lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_folddef.h
144lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 153lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
145 lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ 154 lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h lj_jit.h \
146 lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h 155 lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h \
156 lj_vm.h
147lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 157lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
148 lj_tab.h lj_ir.h lj_jit.h lj_iropt.h 158 lj_tab.h lj_ir.h lj_jit.h lj_iropt.h
149lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ 159lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \
@@ -152,11 +162,12 @@ lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \
152lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 162lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
153 lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h 163 lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h
154lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ 164lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \
155 lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_ircall.h \ 165 lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h \
156 lj_iropt.h lj_vm.h 166 lj_jit.h lj_ircall.h lj_iropt.h lj_vm.h
157lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 167lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
158 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h \ 168 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \
159 lj_state.h lj_bc.h lj_ctype.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h 169 lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \
170 lj_vm.h lj_vmevent.h
160lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 171lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
161 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ 172 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \
162 lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h \ 173 lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h \
@@ -167,11 +178,13 @@ lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
167 lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \ 178 lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \
168 lj_target_*.h lj_ctype.h lj_cdata.h 179 lj_target_*.h lj_ctype.h lj_cdata.h
169lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 180lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
170 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_meta.h \ 181 lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h \
171 lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h lj_ir.h \ 182 lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h \
172 lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h 183 lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h
173lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 184lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
174 lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_char.h 185 lj_err.h lj_errmsg.h lj_str.h lj_char.h
186lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
187 lj_buf.h lj_gc.h lj_str.h lj_state.h lj_char.h lj_strfmt.h
175lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 188lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
176 lj_char.h lj_strscan.h 189 lj_char.h lj_strscan.h
177lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 190lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
@@ -189,26 +202,26 @@ lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
189lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 202lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
190 lj_ir.h lj_vm.h 203 lj_ir.h lj_vm.h
191ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \ 204ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
192 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h \ 205 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h \
193 lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h \ 206 lj_func.h lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h \
194 lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_err.c \ 207 lj_cdata.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h \
195 lj_debug.h lj_ff.h lj_ffdef.h lj_char.c lj_char.h lj_bc.c lj_bcdef.h \ 208 lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_strfmt.h lj_char.c \
196 lj_obj.c lj_str.c lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h \ 209 lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c lj_tab.c \
197 lj_debug.c lj_state.c lj_lex.h lj_alloc.h lj_dispatch.c lj_ccallback.h \ 210 lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c \
198 luajit.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c lj_api.c \ 211 lj_state.c lj_lex.h lj_alloc.h lj_dispatch.c lj_ccallback.h luajit.h \
212 lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c lj_strfmt.c lj_api.c \
199 lj_lex.c lualib.h lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h \ 213 lj_lex.c lualib.h lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h \
200 lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c \ 214 lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c \
201 lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h lj_target_*.h \ 215 lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h lj_target_*.h \
202 lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h lj_cparse.c \ 216 lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h lj_cparse.c \
203 lj_cparse.h lj_lib.c lj_lib.h lj_ir.c lj_ircall.h lj_iropt.h \ 217 lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h lj_iropt.h lj_opt_mem.c \
204 lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c \ 218 lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c \
205 lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c \ 219 lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c \
206 lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \ 220 lj_record.h lj_ffrecord.h lj_crecord.c lj_crecord.h lj_ffrecord.c \
207 lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \ 221 lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h lj_asm_*.h lj_trace.c \
208 lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \ 222 lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c lib_base.c lj_libdef.h \
209 lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \ 223 lib_math.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c \
210 lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \ 224 lib_debug.c lib_bit.c lib_jit.c lib_ffi.c lib_init.c
211 lib_init.c
212luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h 225luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h
213host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \ 226host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \
214 lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \ 227 lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \
@@ -220,7 +233,8 @@ host/buildvm_asm.o: host/buildvm_asm.c host/buildvm.h lj_def.h lua.h luaconf.h \
220host/buildvm_fold.o: host/buildvm_fold.c host/buildvm.h lj_def.h lua.h \ 233host/buildvm_fold.o: host/buildvm_fold.c host/buildvm.h lj_def.h lua.h \
221 luaconf.h lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_ir.h lj_obj.h 234 luaconf.h lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_ir.h lj_obj.h
222host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \ 235host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \
223 lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_lib.h lj_obj.h 236 lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_bc.h lj_lib.h lj_obj.h \
237 host/buildvm_libbc.h
224host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \ 238host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \
225 luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h 239 luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h
226host/minilua.o: host/minilua.c 240host/minilua.o: host/minilua.c
diff --git a/src/host/buildvm.c b/src/host/buildvm.c
index ba8fbcba..ef5f7fb9 100644
--- a/src/host/buildvm.c
+++ b/src/host/buildvm.c
@@ -314,20 +314,20 @@ static void emit_vmdef(BuildCtx *ctx)
314 char buf[80]; 314 char buf[80];
315 int i; 315 int i;
316 fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n"); 316 fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n");
317 fprintf(ctx->fp, "module(...)\n\n"); 317 fprintf(ctx->fp, "return {\n\n");
318 318
319 fprintf(ctx->fp, "bcnames = \""); 319 fprintf(ctx->fp, "bcnames = \"");
320 for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]); 320 for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]);
321 fprintf(ctx->fp, "\"\n\n"); 321 fprintf(ctx->fp, "\",\n\n");
322 322
323 fprintf(ctx->fp, "irnames = \""); 323 fprintf(ctx->fp, "irnames = \"");
324 for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]); 324 for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]);
325 fprintf(ctx->fp, "\"\n\n"); 325 fprintf(ctx->fp, "\",\n\n");
326 326
327 fprintf(ctx->fp, "irfpm = { [0]="); 327 fprintf(ctx->fp, "irfpm = { [0]=");
328 for (i = 0; irfpm_names[i]; i++) 328 for (i = 0; irfpm_names[i]; i++)
329 fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i])); 329 fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i]));
330 fprintf(ctx->fp, "}\n\n"); 330 fprintf(ctx->fp, "},\n\n");
331 331
332 fprintf(ctx->fp, "irfield = { [0]="); 332 fprintf(ctx->fp, "irfield = { [0]=");
333 for (i = 0; irfield_names[i]; i++) { 333 for (i = 0; irfield_names[i]; i++) {
@@ -337,17 +337,17 @@ static void emit_vmdef(BuildCtx *ctx)
337 if (p) *p = '.'; 337 if (p) *p = '.';
338 fprintf(ctx->fp, "\"%s\", ", buf); 338 fprintf(ctx->fp, "\"%s\", ", buf);
339 } 339 }
340 fprintf(ctx->fp, "}\n\n"); 340 fprintf(ctx->fp, "},\n\n");
341 341
342 fprintf(ctx->fp, "ircall = {\n[0]="); 342 fprintf(ctx->fp, "ircall = {\n[0]=");
343 for (i = 0; ircall_names[i]; i++) 343 for (i = 0; ircall_names[i]; i++)
344 fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]); 344 fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]);
345 fprintf(ctx->fp, "}\n\n"); 345 fprintf(ctx->fp, "},\n\n");
346 346
347 fprintf(ctx->fp, "traceerr = {\n[0]="); 347 fprintf(ctx->fp, "traceerr = {\n[0]=");
348 for (i = 0; trace_errors[i]; i++) 348 for (i = 0; trace_errors[i]; i++)
349 fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]); 349 fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]);
350 fprintf(ctx->fp, "}\n\n"); 350 fprintf(ctx->fp, "},\n\n");
351} 351}
352 352
353/* -- Argument parsing ---------------------------------------------------- */ 353/* -- Argument parsing ---------------------------------------------------- */
@@ -484,6 +484,7 @@ int main(int argc, char **argv)
484 case BUILD_vmdef: 484 case BUILD_vmdef:
485 emit_vmdef(ctx); 485 emit_vmdef(ctx);
486 emit_lib(ctx); 486 emit_lib(ctx);
487 fprintf(ctx->fp, "}\n\n");
487 break; 488 break;
488 case BUILD_ffdef: 489 case BUILD_ffdef:
489 case BUILD_libdef: 490 case BUILD_libdef:
diff --git a/src/host/buildvm_lib.c b/src/host/buildvm_lib.c
index 40141dfb..f578bbc9 100644
--- a/src/host/buildvm_lib.c
+++ b/src/host/buildvm_lib.c
@@ -5,7 +5,9 @@
5 5
6#include "buildvm.h" 6#include "buildvm.h"
7#include "lj_obj.h" 7#include "lj_obj.h"
8#include "lj_bc.h"
8#include "lj_lib.h" 9#include "lj_lib.h"
10#include "buildvm_libbc.h"
9 11
10/* Context for library definitions. */ 12/* Context for library definitions. */
11static uint8_t obuf[8192]; 13static uint8_t obuf[8192];
@@ -151,6 +153,62 @@ static void libdef_func(BuildCtx *ctx, char *p, int arg)
151 regfunc = REGFUNC_OK; 153 regfunc = REGFUNC_OK;
152} 154}
153 155
156static uint8_t *libdef_uleb128(uint8_t *p, uint32_t *vv)
157{
158 uint32_t v = *p++;
159 if (v >= 0x80) {
160 int sh = 0; v &= 0x7f;
161 do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
162 }
163 *vv = v;
164 return p;
165}
166
167static void libdef_fixupbc(uint8_t *p)
168{
169 uint32_t i, sizebc;
170 p += 4;
171 p = libdef_uleb128(p, &sizebc);
172 p = libdef_uleb128(p, &sizebc);
173 p = libdef_uleb128(p, &sizebc);
174 for (i = 0; i < sizebc; i++, p += 4) {
175 uint8_t op = p[libbc_endian ? 3 : 0];
176 uint8_t ra = p[libbc_endian ? 2 : 1];
177 uint8_t rc = p[libbc_endian ? 1 : 2];
178 uint8_t rb = p[libbc_endian ? 0 : 3];
179 if (!LJ_DUALNUM && op == BC_ISTYPE && rc == ~LJ_TNUMX+1) {
180 op = BC_ISNUM; rc++;
181 }
182 p[LJ_ENDIAN_SELECT(0, 3)] = op;
183 p[LJ_ENDIAN_SELECT(1, 2)] = ra;
184 p[LJ_ENDIAN_SELECT(2, 1)] = rc;
185 p[LJ_ENDIAN_SELECT(3, 0)] = rb;
186 }
187}
188
189static void libdef_lua(BuildCtx *ctx, char *p, int arg)
190{
191 UNUSED(arg);
192 if (ctx->mode == BUILD_libdef) {
193 int i;
194 for (i = 0; libbc_map[i].name != NULL; i++) {
195 if (!strcmp(libbc_map[i].name, p)) {
196 int ofs = libbc_map[i].ofs;
197 int len = libbc_map[i+1].ofs - ofs;
198 obuf[2]++; /* Bump hash table size. */
199 *optr++ = LIBINIT_LUA;
200 libdef_name(p, 0);
201 memcpy(optr, libbc_code + ofs, len);
202 libdef_fixupbc(optr);
203 optr += len;
204 return;
205 }
206 }
207 fprintf(stderr, "Error: missing libbc definition for %s\n", p);
208 exit(1);
209 }
210}
211
154static uint32_t find_rec(char *name) 212static uint32_t find_rec(char *name)
155{ 213{
156 char *p = (char *)obuf; 214 char *p = (char *)obuf;
@@ -277,6 +335,7 @@ static const LibDefHandler libdef_handlers[] = {
277 { "CF(", ")", libdef_func, LIBINIT_CF }, 335 { "CF(", ")", libdef_func, LIBINIT_CF },
278 { "ASM(", ")", libdef_func, LIBINIT_ASM }, 336 { "ASM(", ")", libdef_func, LIBINIT_ASM },
279 { "ASM_(", ")", libdef_func, LIBINIT_ASM_ }, 337 { "ASM_(", ")", libdef_func, LIBINIT_ASM_ },
338 { "LUA(", ")", libdef_lua, 0 },
280 { "REC(", ")", libdef_rec, 0 }, 339 { "REC(", ")", libdef_rec, 0 },
281 { "PUSH(", ")", libdef_push, 0 }, 340 { "PUSH(", ")", libdef_push, 0 },
282 { "SET(", ")", libdef_set, 0 }, 341 { "SET(", ")", libdef_set, 0 },
@@ -373,7 +432,7 @@ void emit_lib(BuildCtx *ctx)
373 "#ifndef FF_NUM_ASMFUNC\n#define FF_NUM_ASMFUNC %d\n#endif\n\n", 432 "#ifndef FF_NUM_ASMFUNC\n#define FF_NUM_ASMFUNC %d\n#endif\n\n",
374 ffasmfunc); 433 ffasmfunc);
375 } else if (ctx->mode == BUILD_vmdef) { 434 } else if (ctx->mode == BUILD_vmdef) {
376 fprintf(ctx->fp, "}\n\n"); 435 fprintf(ctx->fp, "},\n\n");
377 } else if (ctx->mode == BUILD_bcdef) { 436 } else if (ctx->mode == BUILD_bcdef) {
378 int i; 437 int i;
379 fprintf(ctx->fp, "\n};\n\n"); 438 fprintf(ctx->fp, "\n};\n\n");
diff --git a/src/host/buildvm_libbc.h b/src/host/buildvm_libbc.h
new file mode 100644
index 00000000..e96c8a53
--- /dev/null
+++ b/src/host/buildvm_libbc.h
@@ -0,0 +1,30 @@
1/* This is a generated file. DO NOT EDIT! */
2
3static const int libbc_endian = 0;
4
5static const uint8_t libbc_code[] = {
60,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0,
70,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3,
816,0,5,0,21,1,0,0,76,1,2,0,0,2,9,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3,
90,0,41,4,1,0,77,2,8,128,18,6,1,0,18,7,5,0,59,8,5,0,66,6,3,2,10,6,0,0,88,7,1,
10128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,10,0,0,0,16,16,0,12,0,16,1,9,0,43,2,
110,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,8,5,0,18,9,6,0,66,7,3,2,10,7,0,
120,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12,
130,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128,
148,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14,
150,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2,
160,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4,
172,0,76,3,2,0,75,0,1,0,0,2,0
18};
19
20static const struct { const char *name; int ofs; } libbc_map[] = {
21{"math_deg",0},
22{"math_rad",25},
23{"string_len",50},
24{"table_foreachi",69},
25{"table_foreach",136},
26{"table_getn",207},
27{"table_remove",226},
28{NULL,355}
29};
30
diff --git a/src/host/genlibbc.lua b/src/host/genlibbc.lua
new file mode 100644
index 00000000..16f0a0b6
--- /dev/null
+++ b/src/host/genlibbc.lua
@@ -0,0 +1,197 @@
1----------------------------------------------------------------------------
2-- Lua script to dump the bytecode of the library functions written in Lua.
3-- The resulting 'buildvm_libbc.h' is used for the build process of LuaJIT.
4----------------------------------------------------------------------------
5-- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
6-- Released under the MIT license. See Copyright Notice in luajit.h
7----------------------------------------------------------------------------
8
9local ffi = require("ffi")
10local bit = require("bit")
11local vmdef = require("jit.vmdef")
12local bcnames = vmdef.bcnames
13
14local format = string.format
15
16local isbe = (string.byte(string.dump(function() end), 5) % 2 == 1)
17
18local function usage(arg)
19 io.stderr:write("Usage: ", arg and arg[0] or "genlibbc",
20 " [-o buildvm_libbc.h] lib_*.c\n")
21 os.exit(1)
22end
23
24local function parse_arg(arg)
25 local outfile = "-"
26 if not (arg and arg[1]) then
27 usage(arg)
28 end
29 if arg[1] == "-o" then
30 outfile = arg[2]
31 if not outfile then usage(arg) end
32 table.remove(arg, 1)
33 table.remove(arg, 1)
34 end
35 return outfile
36end
37
38local function read_files(names)
39 local src = ""
40 for _,name in ipairs(names) do
41 local fp = assert(io.open(name))
42 src = src .. fp:read("*a")
43 fp:close()
44 end
45 return src
46end
47
48local function transform_lua(code)
49 local fixup = {}
50 local n = -30000
51 code = string.gsub(code, "CHECK_(%w*)%((.-)%)", function(tp, var)
52 n = n + 1
53 fixup[n] = { "CHECK", tp }
54 return format("%s=%d", var, n)
55 end)
56 code = string.gsub(code, "PAIRS%((.-)%)", function(var)
57 fixup.PAIRS = true
58 return format("nil, %s, 0", var)
59 end)
60 return "return "..code, fixup
61end
62
63local function read_uleb128(p)
64 local v = p[0]; p = p + 1
65 if v >= 128 then
66 local sh = 7; v = v - 128
67 repeat
68 local r = p[0]
69 v = v + bit.lshift(bit.band(r, 127), sh)
70 sh = sh + 7
71 p = p + 1
72 until r < 128
73 end
74 return p, v
75end
76
77-- ORDER LJ_T
78local name2itype = {
79 str = 5, func = 9, tab = 12, int = 14, num = 15
80}
81
82local BC = {}
83for i=0,#bcnames/6-1 do
84 BC[string.gsub(string.sub(bcnames, i*6+1, i*6+6), " ", "")] = i
85end
86local xop, xra = isbe and 3 or 0, isbe and 2 or 1
87local xrc, xrb = isbe and 1 or 2, isbe and 0 or 3
88
89local function fixup_dump(dump, fixup)
90 local buf = ffi.new("uint8_t[?]", #dump+1, dump)
91 local p = buf+5
92 local n, sizebc
93 p, n = read_uleb128(p)
94 local start = p
95 p = p + 4
96 p = read_uleb128(p)
97 p = read_uleb128(p)
98 p, sizebc = read_uleb128(p)
99 local rawtab = {}
100 for i=0,sizebc-1 do
101 local op = p[xop]
102 if op == BC.KSHORT then
103 local rd = p[xrc] + 256*p[xrb]
104 rd = bit.arshift(bit.lshift(rd, 16), 16)
105 local f = fixup[rd]
106 if f then
107 if f[1] == "CHECK" then
108 local tp = f[2]
109 if tp == "tab" then rawtab[p[xra]] = true end
110 p[xop] = tp == "num" and BC.ISNUM or BC.ISTYPE
111 p[xrb] = 0
112 p[xrc] = name2itype[tp]
113 else
114 error("unhandled fixup type: "..f[1])
115 end
116 end
117 elseif op == BC.TGETV then
118 if rawtab[p[xrb]] then
119 p[xop] = BC.TGETR
120 end
121 elseif op == BC.TSETV then
122 if rawtab[p[xrb]] then
123 p[xop] = BC.TSETR
124 end
125 elseif op == BC.ITERC then
126 if fixup.PAIRS then
127 p[xop] = BC.ITERN
128 end
129 end
130 p = p + 4
131 end
132 return ffi.string(start, n)
133end
134
135local function find_defs(src)
136 local defs = {}
137 for name, code in string.gmatch(src, "LJLIB_LUA%(([^)]*)%)%s*/%*(.-)%*/") do
138 local env = {}
139 local tcode, fixup = transform_lua(code)
140 local func = assert(load(tcode, "", nil, env))()
141 defs[name] = fixup_dump(string.dump(func, true), fixup)
142 defs[#defs+1] = name
143 end
144 return defs
145end
146
147local function gen_header(defs)
148 local t = {}
149 local function w(x) t[#t+1] = x end
150 w("/* This is a generated file. DO NOT EDIT! */\n\n")
151 w("static const int libbc_endian = ") w(isbe and 1 or 0) w(";\n\n")
152 local s = ""
153 for _,name in ipairs(defs) do
154 s = s .. defs[name]
155 end
156 w("static const uint8_t libbc_code[] = {\n")
157 local n = 0
158 for i=1,#s do
159 local x = string.byte(s, i)
160 w(x); w(",")
161 n = n + (x < 10 and 2 or (x < 100 and 3 or 4))
162 if n >= 75 then n = 0; w("\n") end
163 end
164 w("0\n};\n\n")
165 w("static const struct { const char *name; int ofs; } libbc_map[] = {\n")
166 local m = 0
167 for _,name in ipairs(defs) do
168 w('{"'); w(name); w('",'); w(m) w('},\n')
169 m = m + #defs[name]
170 end
171 w("{NULL,"); w(m); w("}\n};\n\n")
172 return table.concat(t)
173end
174
175local function write_file(name, data)
176 if name == "-" then
177 assert(io.write(data))
178 assert(io.flush())
179 else
180 local fp = io.open(name)
181 if fp then
182 local old = fp:read("*a")
183 fp:close()
184 if data == old then return end
185 end
186 fp = assert(io.open(name, "w"))
187 assert(fp:write(data))
188 assert(fp:close())
189 end
190end
191
192local outfile = parse_arg(arg)
193local src = read_files(arg)
194local defs = find_defs(src)
195local hdr = gen_header(defs)
196write_file(outfile, hdr)
197
diff --git a/src/jit/bc.lua b/src/jit/bc.lua
index 5c00ebe3..566b09c3 100644
--- a/src/jit/bc.lua
+++ b/src/jit/bc.lua
@@ -41,7 +41,7 @@
41 41
42-- Cache some library functions and objects. 42-- Cache some library functions and objects.
43local jit = require("jit") 43local jit = require("jit")
44assert(jit.version_num == 20001, "LuaJIT core/library version mismatch") 44assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
45local jutil = require("jit.util") 45local jutil = require("jit.util")
46local vmdef = require("jit.vmdef") 46local vmdef = require("jit.vmdef")
47local bit = require("bit") 47local bit = require("bit")
@@ -179,13 +179,12 @@ local function bcliston(outfile)
179end 179end
180 180
181-- Public module functions. 181-- Public module functions.
182module(...) 182return {
183 183 line = bcline,
184line = bcline 184 dump = bcdump,
185dump = bcdump 185 targets = bctargets,
186targets = bctargets 186 on = bcliston,
187 187 off = bclistoff,
188on = bcliston 188 start = bcliston -- For -j command line option.
189off = bclistoff 189}
190start = bcliston -- For -j command line option.
191 190
diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua
index 25bd6042..8aad7596 100644
--- a/src/jit/bcsave.lua
+++ b/src/jit/bcsave.lua
@@ -11,7 +11,7 @@
11------------------------------------------------------------------------------ 11------------------------------------------------------------------------------
12 12
13local jit = require("jit") 13local jit = require("jit")
14assert(jit.version_num == 20001, "LuaJIT core/library version mismatch") 14assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
15local bit = require("bit") 15local bit = require("bit")
16 16
17-- Symbol name prefix for LuaJIT bytecode. 17-- Symbol name prefix for LuaJIT bytecode.
@@ -653,7 +653,7 @@ end
653------------------------------------------------------------------------------ 653------------------------------------------------------------------------------
654 654
655-- Public module functions. 655-- Public module functions.
656module(...) 656return {
657 657 start = docmd -- Process -b command line option.
658start = docmd -- Process -b command line option. 658}
659 659
diff --git a/src/jit/dis_arm.lua b/src/jit/dis_arm.lua
index dc7ca71f..87a84e93 100644
--- a/src/jit/dis_arm.lua
+++ b/src/jit/dis_arm.lua
@@ -658,7 +658,7 @@ local function disass_block(ctx, ofs, len)
658end 658end
659 659
660-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). 660-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
661local function create_(code, addr, out) 661local function create(code, addr, out)
662 local ctx = {} 662 local ctx = {}
663 ctx.code = code 663 ctx.code = code
664 ctx.addr = addr or 0 664 ctx.addr = addr or 0
@@ -670,20 +670,20 @@ local function create_(code, addr, out)
670end 670end
671 671
672-- Simple API: disassemble code (a string) at address and output via out. 672-- Simple API: disassemble code (a string) at address and output via out.
673local function disass_(code, addr, out) 673local function disass(code, addr, out)
674 create_(code, addr, out):disass() 674 create(code, addr, out):disass()
675end 675end
676 676
677-- Return register name for RID. 677-- Return register name for RID.
678local function regname_(r) 678local function regname(r)
679 if r < 16 then return map_gpr[r] end 679 if r < 16 then return map_gpr[r] end
680 return "d"..(r-16) 680 return "d"..(r-16)
681end 681end
682 682
683-- Public module functions. 683-- Public module functions.
684module(...) 684return {
685 685 create = create,
686create = create_ 686 disass = disass,
687disass = disass_ 687 regname = regname
688regname = regname_ 688}
689 689
diff --git a/src/jit/dis_mips.lua b/src/jit/dis_mips.lua
index 830db409..9ba0e019 100644
--- a/src/jit/dis_mips.lua
+++ b/src/jit/dis_mips.lua
@@ -384,7 +384,7 @@ local function disass_block(ctx, ofs, len)
384end 384end
385 385
386-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). 386-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
387local function create_(code, addr, out) 387local function create(code, addr, out)
388 local ctx = {} 388 local ctx = {}
389 ctx.code = code 389 ctx.code = code
390 ctx.addr = addr or 0 390 ctx.addr = addr or 0
@@ -396,33 +396,33 @@ local function create_(code, addr, out)
396 return ctx 396 return ctx
397end 397end
398 398
399local function create_el_(code, addr, out) 399local function create_el(code, addr, out)
400 local ctx = create_(code, addr, out) 400 local ctx = create(code, addr, out)
401 ctx.get = get_le 401 ctx.get = get_le
402 return ctx 402 return ctx
403end 403end
404 404
405-- Simple API: disassemble code (a string) at address and output via out. 405-- Simple API: disassemble code (a string) at address and output via out.
406local function disass_(code, addr, out) 406local function disass(code, addr, out)
407 create_(code, addr, out):disass() 407 create(code, addr, out):disass()
408end 408end
409 409
410local function disass_el_(code, addr, out) 410local function disass_el(code, addr, out)
411 create_el_(code, addr, out):disass() 411 create_el(code, addr, out):disass()
412end 412end
413 413
414-- Return register name for RID. 414-- Return register name for RID.
415local function regname_(r) 415local function regname(r)
416 if r < 32 then return map_gpr[r] end 416 if r < 32 then return map_gpr[r] end
417 return "f"..(r-32) 417 return "f"..(r-32)
418end 418end
419 419
420-- Public module functions. 420-- Public module functions.
421module(...) 421return {
422 422 create = create,
423create = create_ 423 create_el = create_el,
424create_el = create_el_ 424 disass = disass,
425disass = disass_ 425 disass_el = disass_el,
426disass_el = disass_el_ 426 regname = regname
427regname = regname_ 427}
428 428
diff --git a/src/jit/dis_mipsel.lua b/src/jit/dis_mipsel.lua
index 8a10c462..60e0233b 100644
--- a/src/jit/dis_mipsel.lua
+++ b/src/jit/dis_mipsel.lua
@@ -8,13 +8,10 @@
8-- MIPS disassembler module. All the interesting stuff is there. 8-- MIPS disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------ 9------------------------------------------------------------------------------
10 10
11local require = require 11local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
12 12return {
13module(...) 13 create = dis_mips.create_el,
14 14 disass = dis_mips.disass_el,
15local dis_mips = require(_PACKAGE.."dis_mips") 15 regname = dis_mips.regname
16 16}
17create = dis_mips.create_el
18disass = dis_mips.disass_el
19regname = dis_mips.regname
20 17
diff --git a/src/jit/dis_ppc.lua b/src/jit/dis_ppc.lua
index 169a534c..5143d47a 100644
--- a/src/jit/dis_ppc.lua
+++ b/src/jit/dis_ppc.lua
@@ -560,7 +560,7 @@ local function disass_block(ctx, ofs, len)
560end 560end
561 561
562-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). 562-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
563local function create_(code, addr, out) 563local function create(code, addr, out)
564 local ctx = {} 564 local ctx = {}
565 ctx.code = code 565 ctx.code = code
566 ctx.addr = addr or 0 566 ctx.addr = addr or 0
@@ -572,20 +572,20 @@ local function create_(code, addr, out)
572end 572end
573 573
574-- Simple API: disassemble code (a string) at address and output via out. 574-- Simple API: disassemble code (a string) at address and output via out.
575local function disass_(code, addr, out) 575local function disass(code, addr, out)
576 create_(code, addr, out):disass() 576 create(code, addr, out):disass()
577end 577end
578 578
579-- Return register name for RID. 579-- Return register name for RID.
580local function regname_(r) 580local function regname(r)
581 if r < 32 then return map_gpr[r] end 581 if r < 32 then return map_gpr[r] end
582 return "f"..(r-32) 582 return "f"..(r-32)
583end 583end
584 584
585-- Public module functions. 585-- Public module functions.
586module(...) 586return {
587 587 create = create,
588create = create_ 588 disass = disass,
589disass = disass_ 589 regname = regname
590regname = regname_ 590}
591 591
diff --git a/src/jit/dis_x64.lua b/src/jit/dis_x64.lua
index 4a1894ac..2f4f6e7c 100644
--- a/src/jit/dis_x64.lua
+++ b/src/jit/dis_x64.lua
@@ -8,13 +8,10 @@
8-- x86/x64 disassembler module. All the interesting stuff is there. 8-- x86/x64 disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------ 9------------------------------------------------------------------------------
10 10
11local require = require 11local dis_x86 = require((string.match(..., ".*%.") or "").."dis_x86")
12 12return {
13module(...) 13 create = dis_x86.create64,
14 14 disass = dis_x86.disass64,
15local dis_x86 = require(_PACKAGE.."dis_x86") 15 regname = dis_x86.regname64
16 16}
17create = dis_x86.create64
18disass = dis_x86.disass64
19regname = dis_x86.regname64
20 17
diff --git a/src/jit/dis_x86.lua b/src/jit/dis_x86.lua
index c442a176..14b0fd61 100644
--- a/src/jit/dis_x86.lua
+++ b/src/jit/dis_x86.lua
@@ -28,6 +28,8 @@ local type = type
28local sub, byte, format = string.sub, string.byte, string.format 28local sub, byte, format = string.sub, string.byte, string.format
29local match, gmatch, gsub = string.match, string.gmatch, string.gsub 29local match, gmatch, gsub = string.match, string.gmatch, string.gsub
30local lower, rep = string.lower, string.rep 30local lower, rep = string.lower, string.rep
31local bit = require("bit")
32local tohex = bit.tohex
31 33
32-- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on. 34-- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on.
33local map_opc1_32 = { 35local map_opc1_32 = {
@@ -532,7 +534,7 @@ local function putpat(ctx, name, pat)
532 local lo = imm % 0x1000000 534 local lo = imm % 0x1000000
533 x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo) 535 x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo)
534 else 536 else
535 x = format("0x%08x", imm) 537 x = "0x"..tohex(imm)
536 end 538 end
537 elseif p == "R" then 539 elseif p == "R" then
538 local r = byte(code, pos-1, pos-1)%8 540 local r = byte(code, pos-1, pos-1)%8
@@ -782,7 +784,7 @@ local function disass_block(ctx, ofs, len)
782end 784end
783 785
784-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). 786-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
785local function create_(code, addr, out) 787local function create(code, addr, out)
786 local ctx = {} 788 local ctx = {}
787 ctx.code = code 789 ctx.code = code
788 ctx.addr = (addr or 0) - 1 790 ctx.addr = (addr or 0) - 1
@@ -796,8 +798,8 @@ local function create_(code, addr, out)
796 return ctx 798 return ctx
797end 799end
798 800
799local function create64_(code, addr, out) 801local function create64(code, addr, out)
800 local ctx = create_(code, addr, out) 802 local ctx = create(code, addr, out)
801 ctx.x64 = true 803 ctx.x64 = true
802 ctx.map1 = map_opc1_64 804 ctx.map1 = map_opc1_64
803 ctx.aregs = map_regs.Q 805 ctx.aregs = map_regs.Q
@@ -805,32 +807,32 @@ local function create64_(code, addr, out)
805end 807end
806 808
807-- Simple API: disassemble code (a string) at address and output via out. 809-- Simple API: disassemble code (a string) at address and output via out.
808local function disass_(code, addr, out) 810local function disass(code, addr, out)
809 create_(code, addr, out):disass() 811 create(code, addr, out):disass()
810end 812end
811 813
812local function disass64_(code, addr, out) 814local function disass64(code, addr, out)
813 create64_(code, addr, out):disass() 815 create64(code, addr, out):disass()
814end 816end
815 817
816-- Return register name for RID. 818-- Return register name for RID.
817local function regname_(r) 819local function regname(r)
818 if r < 8 then return map_regs.D[r+1] end 820 if r < 8 then return map_regs.D[r+1] end
819 return map_regs.X[r-7] 821 return map_regs.X[r-7]
820end 822end
821 823
822local function regname64_(r) 824local function regname64(r)
823 if r < 16 then return map_regs.Q[r+1] end 825 if r < 16 then return map_regs.Q[r+1] end
824 return map_regs.X[r-15] 826 return map_regs.X[r-15]
825end 827end
826 828
827-- Public module functions. 829-- Public module functions.
828module(...) 830return {
829 831 create = create,
830create = create_ 832 create64 = create64,
831create64 = create64_ 833 disass = disass,
832disass = disass_ 834 disass64 = disass64,
833disass64 = disass64_ 835 regname = regname,
834regname = regname_ 836 regname64 = regname64
835regname64 = regname64_ 837}
836 838
diff --git a/src/jit/dump.lua b/src/jit/dump.lua
index 70a59280..18a4d260 100644
--- a/src/jit/dump.lua
+++ b/src/jit/dump.lua
@@ -54,7 +54,7 @@
54 54
55-- Cache some library functions and objects. 55-- Cache some library functions and objects.
56local jit = require("jit") 56local jit = require("jit")
57assert(jit.version_num == 20001, "LuaJIT core/library version mismatch") 57assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
58local jutil = require("jit.util") 58local jutil = require("jit.util")
59local vmdef = require("jit.vmdef") 59local vmdef = require("jit.vmdef")
60local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc 60local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc
@@ -62,7 +62,7 @@ local traceinfo, traceir, tracek = jutil.traceinfo, jutil.traceir, jutil.tracek
62local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap 62local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap
63local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr 63local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr
64local bit = require("bit") 64local bit = require("bit")
65local band, shl, shr = bit.band, bit.lshift, bit.rshift 65local band, shl, shr, tohex = bit.band, bit.lshift, bit.rshift, bit.tohex
66local sub, gsub, format = string.sub, string.gsub, string.format 66local sub, gsub, format = string.sub, string.gsub, string.format
67local byte, char, rep = string.byte, string.char, string.rep 67local byte, char, rep = string.byte, string.char, string.rep
68local type, tostring = type, tostring 68local type, tostring = type, tostring
@@ -135,6 +135,7 @@ local function dump_mcode(tr)
135 local mcode, addr, loop = tracemc(tr) 135 local mcode, addr, loop = tracemc(tr)
136 if not mcode then return end 136 if not mcode then return end
137 if not disass then disass = require("jit.dis_"..jit.arch) end 137 if not disass then disass = require("jit.dis_"..jit.arch) end
138 if addr < 0 then addr = addr + 2^32 end
138 out:write("---- TRACE ", tr, " mcode ", #mcode, "\n") 139 out:write("---- TRACE ", tr, " mcode ", #mcode, "\n")
139 local ctx = disass.create(mcode, addr, dumpwrite) 140 local ctx = disass.create(mcode, addr, dumpwrite)
140 ctx.hexdump = 0 141 ctx.hexdump = 0
@@ -269,8 +270,7 @@ local litname = {
269 ["CONV "] = setmetatable({}, { __index = function(t, mode) 270 ["CONV "] = setmetatable({}, { __index = function(t, mode)
270 local s = irtype[band(mode, 31)] 271 local s = irtype[band(mode, 31)]
271 s = irtype[band(shr(mode, 5), 31)].."."..s 272 s = irtype[band(shr(mode, 5), 31)].."."..s
272 if band(mode, 0x400) ~= 0 then s = s.." trunc" 273 if band(mode, 0x800) ~= 0 then s = s.." sext" end
273 elseif band(mode, 0x800) ~= 0 then s = s.." sext" end
274 local c = shr(mode, 14) 274 local c = shr(mode, 14)
275 if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end 275 if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end
276 t[mode] = s 276 t[mode] = s
@@ -279,6 +279,8 @@ local litname = {
279 ["FLOAD "] = vmdef.irfield, 279 ["FLOAD "] = vmdef.irfield,
280 ["FREF "] = vmdef.irfield, 280 ["FREF "] = vmdef.irfield,
281 ["FPMATH"] = vmdef.irfpm, 281 ["FPMATH"] = vmdef.irfpm,
282 ["BUFHDR"] = { [0] = "RESET", "APPEND" },
283 ["TOSTR "] = { [0] = "INT", "NUM", "CHAR" },
282} 284}
283 285
284local function ctlsub(c) 286local function ctlsub(c)
@@ -608,7 +610,7 @@ local function dump_texit(tr, ex, ngpr, nfpr, ...)
608 end 610 end
609 else 611 else
610 for i=1,ngpr do 612 for i=1,ngpr do
611 out:write(format(" %08x", regs[i])) 613 out:write(" ", tohex(regs[i]))
612 if i % 8 == 0 then out:write("\n") end 614 if i % 8 == 0 then out:write("\n") end
613 end 615 end
614 end 616 end
@@ -692,9 +694,9 @@ local function dumpon(opt, outfile)
692end 694end
693 695
694-- Public module functions. 696-- Public module functions.
695module(...) 697return {
696 698 on = dumpon,
697on = dumpon 699 off = dumpoff,
698off = dumpoff 700 start = dumpon -- For -j command line option.
699start = dumpon -- For -j command line option. 701}
700 702
diff --git a/src/jit/v.lua b/src/jit/v.lua
index f4a9b054..22bee3ff 100644
--- a/src/jit/v.lua
+++ b/src/jit/v.lua
@@ -59,7 +59,7 @@
59 59
60-- Cache some library functions and objects. 60-- Cache some library functions and objects.
61local jit = require("jit") 61local jit = require("jit")
62assert(jit.version_num == 20001, "LuaJIT core/library version mismatch") 62assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
63local jutil = require("jit.util") 63local jutil = require("jit.util")
64local vmdef = require("jit.vmdef") 64local vmdef = require("jit.vmdef")
65local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo 65local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo
@@ -159,9 +159,9 @@ local function dumpon(outfile)
159end 159end
160 160
161-- Public module functions. 161-- Public module functions.
162module(...) 162return {
163 163 on = dumpon,
164on = dumpon 164 off = dumpoff,
165off = dumpoff 165 start = dumpon -- For -j command line option.
166start = dumpon -- For -j command line option. 166}
167 167
diff --git a/src/lib_base.c b/src/lib_base.c
index 070970ed..44817187 100644
--- a/src/lib_base.c
+++ b/src/lib_base.c
@@ -32,6 +32,7 @@
32#include "lj_dispatch.h" 32#include "lj_dispatch.h"
33#include "lj_char.h" 33#include "lj_char.h"
34#include "lj_strscan.h" 34#include "lj_strscan.h"
35#include "lj_strfmt.h"
35#include "lj_lib.h" 36#include "lj_lib.h"
36 37
37/* -- Base library: checks ------------------------------------------------ */ 38/* -- Base library: checks ------------------------------------------------ */
@@ -301,9 +302,6 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
301 return FFH_RES(1); 302 return FFH_RES(1);
302} 303}
303 304
304LJLIB_PUSH("nil")
305LJLIB_PUSH("false")
306LJLIB_PUSH("true")
307LJLIB_ASM(tostring) LJLIB_REC(.) 305LJLIB_ASM(tostring) LJLIB_REC(.)
308{ 306{
309 TValue *o = lj_lib_checkany(L, 1); 307 TValue *o = lj_lib_checkany(L, 1);
@@ -312,23 +310,10 @@ LJLIB_ASM(tostring) LJLIB_REC(.)
312 if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { 310 if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
313 copyTV(L, L->base-1, mo); /* Replace callable. */ 311 copyTV(L, L->base-1, mo); /* Replace callable. */
314 return FFH_TAILCALL; 312 return FFH_TAILCALL;
315 } else {
316 GCstr *s;
317 if (tvisnumber(o)) {
318 s = lj_str_fromnumber(L, o);
319 } else if (tvispri(o)) {
320 s = strV(lj_lib_upvalue(L, -(int32_t)itype(o)));
321 } else {
322 if (tvisfunc(o) && isffunc(funcV(o)))
323 lua_pushfstring(L, "function: builtin#%d", funcV(o)->c.ffid);
324 else
325 lua_pushfstring(L, "%s: %p", lj_typename(o), lua_topointer(L, 1));
326 /* Note: lua_pushfstring calls the GC which may invalidate o. */
327 s = strV(L->top-1);
328 }
329 setstrV(L, L->base-1, s);
330 return FFH_RES(1);
331 } 313 }
314 lj_gc_check(L);
315 setstrV(L, L->base-1, lj_strfmt_obj(L, L->base));
316 return FFH_RES(1);
332} 317}
333 318
334/* -- Base library: throw and catch errors -------------------------------- */ 319/* -- Base library: throw and catch errors -------------------------------- */
@@ -506,21 +491,13 @@ LJLIB_CF(print)
506 } 491 }
507 shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring); 492 shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring);
508 for (i = 0; i < nargs; i++) { 493 for (i = 0; i < nargs; i++) {
494 cTValue *o = &L->base[i];
495 char buf[STRFMT_MAXBUF_NUM];
509 const char *str; 496 const char *str;
510 size_t size; 497 size_t size;
511 cTValue *o = &L->base[i]; 498 MSize len;
512 if (shortcut && tvisstr(o)) { 499 if (shortcut && (str = lj_strfmt_wstrnum(buf, o, &len)) != NULL) {
513 str = strVdata(o); 500 size = len;
514 size = strV(o)->len;
515 } else if (shortcut && tvisint(o)) {
516 char buf[LJ_STR_INTBUF];
517 char *p = lj_str_bufint(buf, intV(o));
518 size = (size_t)(buf+LJ_STR_INTBUF-p);
519 str = p;
520 } else if (shortcut && tvisnum(o)) {
521 char buf[LJ_STR_NUMBUF];
522 size = lj_str_bufnum(buf, o);
523 str = buf;
524 } else { 501 } else {
525 copyTV(L, L->top+1, o); 502 copyTV(L, L->top+1, o);
526 copyTV(L, L->top, L->top-1); 503 copyTV(L, L->top, L->top-1);
diff --git a/src/lib_bit.c b/src/lib_bit.c
index 93fead92..a3f7c1ac 100644
--- a/src/lib_bit.c
+++ b/src/lib_bit.c
@@ -12,26 +12,99 @@
12 12
13#include "lj_obj.h" 13#include "lj_obj.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_str.h" 15#include "lj_buf.h"
16#include "lj_strscan.h"
17#include "lj_strfmt.h"
18#if LJ_HASFFI
19#include "lj_ctype.h"
20#include "lj_cdata.h"
21#include "lj_cconv.h"
22#include "lj_carith.h"
23#endif
24#include "lj_ff.h"
16#include "lj_lib.h" 25#include "lj_lib.h"
17 26
18/* ------------------------------------------------------------------------ */ 27/* ------------------------------------------------------------------------ */
19 28
20#define LJLIB_MODULE_bit 29#define LJLIB_MODULE_bit
21 30
22LJLIB_ASM(bit_tobit) LJLIB_REC(bit_unary IR_TOBIT) 31#if LJ_HASFFI
32static int bit_result64(lua_State *L, CTypeID id, uint64_t x)
23{ 33{
34 GCcdata *cd = lj_cdata_new_(L, id, 8);
35 *(uint64_t *)cdataptr(cd) = x;
36 setcdataV(L, L->base-1, cd);
37 return FFH_RES(1);
38}
39#else
40static int32_t bit_checkbit(lua_State *L, int narg)
41{
42 TValue *o = L->base + narg-1;
43 if (!(o < L->top && lj_strscan_numberobj(o)))
44 lj_err_argt(L, narg, LUA_TNUMBER);
45 if (LJ_LIKELY(tvisint(o))) {
46 return intV(o);
47 } else {
48 int32_t i = lj_num2bit(numV(o));
49 if (LJ_DUALNUM) setintV(o, i);
50 return i;
51 }
52}
53#endif
54
55LJLIB_ASM(bit_tobit) LJLIB_REC(bit_tobit)
56{
57#if LJ_HASFFI
58 CTypeID id = 0;
59 setintV(L->base-1, (int32_t)lj_carith_check64(L, 1, &id));
60 return FFH_RES(1);
61#else
62 lj_lib_checknumber(L, 1);
63 return FFH_RETRY;
64#endif
65}
66
67LJLIB_ASM(bit_bnot) LJLIB_REC(bit_unary IR_BNOT)
68{
69#if LJ_HASFFI
70 CTypeID id = 0;
71 uint64_t x = lj_carith_check64(L, 1, &id);
72 return id ? bit_result64(L, id, ~x) : FFH_RETRY;
73#else
24 lj_lib_checknumber(L, 1); 74 lj_lib_checknumber(L, 1);
25 return FFH_RETRY; 75 return FFH_RETRY;
76#endif
77}
78
79LJLIB_ASM(bit_bswap) LJLIB_REC(bit_unary IR_BSWAP)
80{
81#if LJ_HASFFI
82 CTypeID id = 0;
83 uint64_t x = lj_carith_check64(L, 1, &id);
84 return id ? bit_result64(L, id, lj_bswap64(x)) : FFH_RETRY;
85#else
86 lj_lib_checknumber(L, 1);
87 return FFH_RETRY;
88#endif
26} 89}
27LJLIB_ASM_(bit_bnot) LJLIB_REC(bit_unary IR_BNOT)
28LJLIB_ASM_(bit_bswap) LJLIB_REC(bit_unary IR_BSWAP)
29 90
30LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL) 91LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL)
31{ 92{
93#if LJ_HASFFI
94 CTypeID id = 0, id2 = 0;
95 uint64_t x = lj_carith_check64(L, 1, &id);
96 int32_t sh = (int32_t)lj_carith_check64(L, 2, &id2);
97 if (id) {
98 x = lj_carith_shift64(x, sh, curr_func(L)->c.ffid - (int)FF_bit_lshift);
99 return bit_result64(L, id, x);
100 }
101 if (id2) setintV(L->base+1, sh);
102 return FFH_RETRY;
103#else
32 lj_lib_checknumber(L, 1); 104 lj_lib_checknumber(L, 1);
33 lj_lib_checkbit(L, 2); 105 bit_checkbit(L, 2);
34 return FFH_RETRY; 106 return FFH_RETRY;
107#endif
35} 108}
36LJLIB_ASM_(bit_rshift) LJLIB_REC(bit_shift IR_BSHR) 109LJLIB_ASM_(bit_rshift) LJLIB_REC(bit_shift IR_BSHR)
37LJLIB_ASM_(bit_arshift) LJLIB_REC(bit_shift IR_BSAR) 110LJLIB_ASM_(bit_arshift) LJLIB_REC(bit_shift IR_BSAR)
@@ -40,25 +113,58 @@ LJLIB_ASM_(bit_ror) LJLIB_REC(bit_shift IR_BROR)
40 113
41LJLIB_ASM(bit_band) LJLIB_REC(bit_nary IR_BAND) 114LJLIB_ASM(bit_band) LJLIB_REC(bit_nary IR_BAND)
42{ 115{
116#if LJ_HASFFI
117 CTypeID id = 0;
118 TValue *o = L->base, *top = L->top;
119 int i = 0;
120 do { lj_carith_check64(L, ++i, &id); } while (++o < top);
121 if (id) {
122 CTState *cts = ctype_cts(L);
123 CType *ct = ctype_get(cts, id);
124 int op = curr_func(L)->c.ffid - (int)FF_bit_bor;
125 uint64_t x, y = op >= 0 ? 0 : ~(uint64_t)0;
126 o = L->base;
127 do {
128 lj_cconv_ct_tv(cts, ct, (uint8_t *)&x, o, 0);
129 if (op < 0) y &= x; else if (op == 0) y |= x; else y ^= x;
130 } while (++o < top);
131 return bit_result64(L, id, y);
132 }
133 return FFH_RETRY;
134#else
43 int i = 0; 135 int i = 0;
44 do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top); 136 do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top);
45 return FFH_RETRY; 137 return FFH_RETRY;
138#endif
46} 139}
47LJLIB_ASM_(bit_bor) LJLIB_REC(bit_nary IR_BOR) 140LJLIB_ASM_(bit_bor) LJLIB_REC(bit_nary IR_BOR)
48LJLIB_ASM_(bit_bxor) LJLIB_REC(bit_nary IR_BXOR) 141LJLIB_ASM_(bit_bxor) LJLIB_REC(bit_nary IR_BXOR)
49 142
50/* ------------------------------------------------------------------------ */ 143/* ------------------------------------------------------------------------ */
51 144
52LJLIB_CF(bit_tohex) 145LJLIB_CF(bit_tohex) LJLIB_REC(.)
53{ 146{
54 uint32_t b = (uint32_t)lj_lib_checkbit(L, 1); 147#if LJ_HASFFI
55 int32_t i, n = L->base+1 >= L->top ? 8 : lj_lib_checkbit(L, 2); 148 CTypeID id = 0, id2 = 0;
56 const char *hexdigits = "0123456789abcdef"; 149 uint64_t b = lj_carith_check64(L, 1, &id);
57 char buf[8]; 150 int32_t n = L->base+1>=L->top ? (id ? 16 : 8) :
58 if (n < 0) { n = -n; hexdigits = "0123456789ABCDEF"; } 151 (int32_t)lj_carith_check64(L, 2, &id2);
59 if (n > 8) n = 8; 152#else
60 for (i = n; --i >= 0; ) { buf[i] = hexdigits[b & 15]; b >>= 4; } 153 uint32_t b = (uint32_t)bit_checkbit(L, 1);
61 lua_pushlstring(L, buf, (size_t)n); 154 int32_t n = L->base+1>=L->top ? 8 : bit_checkbit(L, 2);
155#endif
156 SBuf *sb = lj_buf_tmp_(L);
157 SFormat sf = (STRFMT_UINT|STRFMT_T_HEX);
158 if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; }
159 sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC);
160#if LJ_HASFFI
161 if (n < 16) b &= ((uint64_t)1 << 4*n)-1;
162#else
163 if (n < 8) b &= (1u << 4*n)-1;
164#endif
165 sb = lj_strfmt_putfxint(sb, sf, b);
166 setstrV(L, L->top-1, lj_buf_str(L, sb));
167 lj_gc_check(L);
62 return 1; 168 return 1;
63} 169}
64 170
diff --git a/src/lib_ffi.c b/src/lib_ffi.c
index f61fabc0..3310b205 100644
--- a/src/lib_ffi.c
+++ b/src/lib_ffi.c
@@ -29,6 +29,7 @@
29#include "lj_ccall.h" 29#include "lj_ccall.h"
30#include "lj_ccallback.h" 30#include "lj_ccallback.h"
31#include "lj_clib.h" 31#include "lj_clib.h"
32#include "lj_strfmt.h"
32#include "lj_ff.h" 33#include "lj_ff.h"
33#include "lj_lib.h" 34#include "lj_lib.h"
34 35
@@ -317,7 +318,7 @@ LJLIB_CF(ffi_meta___tostring)
317 } 318 }
318 } 319 }
319 } 320 }
320 lj_str_pushf(L, msg, strdata(lj_ctype_repr(L, id, NULL)), p); 321 lj_strfmt_pushf(L, msg, strdata(lj_ctype_repr(L, id, NULL)), p);
321checkgc: 322checkgc:
322 lj_gc_check(L); 323 lj_gc_check(L);
323 return 1; 324 return 1;
@@ -506,7 +507,7 @@ LJLIB_CF(ffi_new) LJLIB_REC(.)
506 if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN) 507 if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN)
507 cd = lj_cdata_new(cts, id, sz); 508 cd = lj_cdata_new(cts, id, sz);
508 else 509 else
509 cd = lj_cdata_newv(cts, id, sz, ctype_align(info)); 510 cd = lj_cdata_newv(L, id, sz, ctype_align(info));
510 setcdataV(L, o-1, cd); /* Anchor the uninitialized cdata. */ 511 setcdataV(L, o-1, cd); /* Anchor the uninitialized cdata. */
511 lj_cconv_ct_init(cts, ct, sz, cdataptr(cd), 512 lj_cconv_ct_init(cts, ct, sz, cdataptr(cd),
512 o, (MSize)(L->top - o)); /* Initialize cdata. */ 513 o, (MSize)(L->top - o)); /* Initialize cdata. */
@@ -767,19 +768,11 @@ LJLIB_CF(ffi_gc) LJLIB_REC(.)
767 GCcdata *cd = ffi_checkcdata(L, 1); 768 GCcdata *cd = ffi_checkcdata(L, 1);
768 TValue *fin = lj_lib_checkany(L, 2); 769 TValue *fin = lj_lib_checkany(L, 2);
769 CTState *cts = ctype_cts(L); 770 CTState *cts = ctype_cts(L);
770 GCtab *t = cts->finalizer;
771 CType *ct = ctype_raw(cts, cd->ctypeid); 771 CType *ct = ctype_raw(cts, cd->ctypeid);
772 if (!(ctype_isptr(ct->info) || ctype_isstruct(ct->info) || 772 if (!(ctype_isptr(ct->info) || ctype_isstruct(ct->info) ||
773 ctype_isrefarray(ct->info))) 773 ctype_isrefarray(ct->info)))
774 lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE); 774 lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE);
775 if (gcref(t->metatable)) { /* Update finalizer table, if still enabled. */ 775 lj_cdata_setfin(L, cd, gcval(fin), itype(fin));
776 copyTV(L, lj_tab_set(L, t, L->base), fin);
777 lj_gc_anybarriert(L, t);
778 if (!tvisnil(fin))
779 cd->marked |= LJ_GC_CDATA_FIN;
780 else
781 cd->marked &= ~LJ_GC_CDATA_FIN;
782 }
783 L->top = L->base+1; /* Pass through the cdata object. */ 776 L->top = L->base+1; /* Pass through the cdata object. */
784 return 1; 777 return 1;
785} 778}
diff --git a/src/lib_io.c b/src/lib_io.c
index e0c6908f..ca87ec4d 100644
--- a/src/lib_io.c
+++ b/src/lib_io.c
@@ -19,8 +19,10 @@
19#include "lj_obj.h" 19#include "lj_obj.h"
20#include "lj_gc.h" 20#include "lj_gc.h"
21#include "lj_err.h" 21#include "lj_err.h"
22#include "lj_buf.h"
22#include "lj_str.h" 23#include "lj_str.h"
23#include "lj_state.h" 24#include "lj_state.h"
25#include "lj_strfmt.h"
24#include "lj_ff.h" 26#include "lj_ff.h"
25#include "lj_lib.h" 27#include "lj_lib.h"
26 28
@@ -84,7 +86,7 @@ static IOFileUD *io_file_open(lua_State *L, const char *mode)
84 IOFileUD *iof = io_file_new(L); 86 IOFileUD *iof = io_file_new(L);
85 iof->fp = fopen(fname, mode); 87 iof->fp = fopen(fname, mode);
86 if (iof->fp == NULL) 88 if (iof->fp == NULL)
87 luaL_argerror(L, 1, lj_str_pushf(L, "%s: %s", fname, strerror(errno))); 89 luaL_argerror(L, 1, lj_strfmt_pushf(L, "%s: %s", fname, strerror(errno)));
88 return iof; 90 return iof;
89} 91}
90 92
@@ -145,7 +147,7 @@ static int io_file_readline(lua_State *L, FILE *fp, MSize chop)
145 MSize m = LUAL_BUFFERSIZE, n = 0, ok = 0; 147 MSize m = LUAL_BUFFERSIZE, n = 0, ok = 0;
146 char *buf; 148 char *buf;
147 for (;;) { 149 for (;;) {
148 buf = lj_str_needbuf(L, &G(L)->tmpbuf, m); 150 buf = lj_buf_tmp(L, m);
149 if (fgets(buf+n, m-n, fp) == NULL) break; 151 if (fgets(buf+n, m-n, fp) == NULL) break;
150 n += (MSize)strlen(buf+n); 152 n += (MSize)strlen(buf+n);
151 ok |= n; 153 ok |= n;
@@ -161,7 +163,7 @@ static void io_file_readall(lua_State *L, FILE *fp)
161{ 163{
162 MSize m, n; 164 MSize m, n;
163 for (m = LUAL_BUFFERSIZE, n = 0; ; m += m) { 165 for (m = LUAL_BUFFERSIZE, n = 0; ; m += m) {
164 char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m); 166 char *buf = lj_buf_tmp(L, m);
165 n += (MSize)fread(buf+n, 1, m-n, fp); 167 n += (MSize)fread(buf+n, 1, m-n, fp);
166 if (n != m) { 168 if (n != m) {
167 setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n)); 169 setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
@@ -174,7 +176,7 @@ static void io_file_readall(lua_State *L, FILE *fp)
174static int io_file_readlen(lua_State *L, FILE *fp, MSize m) 176static int io_file_readlen(lua_State *L, FILE *fp, MSize m)
175{ 177{
176 if (m) { 178 if (m) {
177 char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m); 179 char *buf = lj_buf_tmp(L, m);
178 MSize n = (MSize)fread(buf, 1, m, fp); 180 MSize n = (MSize)fread(buf, 1, m, fp);
179 setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n)); 181 setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
180 lj_gc_check(L); 182 lj_gc_check(L);
@@ -230,19 +232,12 @@ static int io_file_write(lua_State *L, FILE *fp, int start)
230 cTValue *tv; 232 cTValue *tv;
231 int status = 1; 233 int status = 1;
232 for (tv = L->base+start; tv < L->top; tv++) { 234 for (tv = L->base+start; tv < L->top; tv++) {
233 if (tvisstr(tv)) { 235 char buf[STRFMT_MAXBUF_NUM];
234 MSize len = strV(tv)->len; 236 MSize len;
235 status = status && (fwrite(strVdata(tv), 1, len, fp) == len); 237 const char *p = lj_strfmt_wstrnum(buf, tv, &len);
236 } else if (tvisint(tv)) { 238 if (!p)
237 char buf[LJ_STR_INTBUF];
238 char *p = lj_str_bufint(buf, intV(tv));
239 size_t len = (size_t)(buf+LJ_STR_INTBUF-p);
240 status = status && (fwrite(p, 1, len, fp) == len);
241 } else if (tvisnum(tv)) {
242 status = status && (fprintf(fp, LUA_NUMBER_FMT, numV(tv)) > 0);
243 } else {
244 lj_err_argt(L, (int)(tv - L->base) + 1, LUA_TSTRING); 239 lj_err_argt(L, (int)(tv - L->base) + 1, LUA_TSTRING);
245 } 240 status = status && (fwrite(p, 1, len, fp) == len);
246 } 241 }
247 if (LJ_52 && status) { 242 if (LJ_52 && status) {
248 L->top = L->base+1; 243 L->top = L->base+1;
diff --git a/src/lib_jit.c b/src/lib_jit.c
index 82e68258..555e581c 100644
--- a/src/lib_jit.c
+++ b/src/lib_jit.c
@@ -16,7 +16,11 @@
16#include "lj_debug.h" 16#include "lj_debug.h"
17#include "lj_str.h" 17#include "lj_str.h"
18#include "lj_tab.h" 18#include "lj_tab.h"
19#include "lj_state.h"
19#include "lj_bc.h" 20#include "lj_bc.h"
21#if LJ_HASFFI
22#include "lj_ctype.h"
23#endif
20#if LJ_HASJIT 24#if LJ_HASJIT
21#include "lj_ir.h" 25#include "lj_ir.h"
22#include "lj_jit.h" 26#include "lj_jit.h"
@@ -332,6 +336,13 @@ LJLIB_CF(jit_util_tracek)
332 slot = ir->op2; 336 slot = ir->op2;
333 ir = &T->ir[ir->op1]; 337 ir = &T->ir[ir->op1];
334 } 338 }
339#if LJ_HASFFI
340 if (ir->o == IR_KINT64 && !ctype_ctsG(G(L))) {
341 ptrdiff_t oldtop = savestack(L, L->top);
342 luaopen_ffi(L); /* Load FFI library on-demand. */
343 L->top = restorestack(L, oldtop);
344 }
345#endif
335 lj_ir_kvalue(L, L->top-2, ir); 346 lj_ir_kvalue(L, L->top-2, ir);
336 setintV(L->top-1, (int32_t)irt_type(ir->t)); 347 setintV(L->top-1, (int32_t)irt_type(ir->t));
337 if (slot == -1) 348 if (slot == -1)
@@ -538,23 +549,17 @@ static uint32_t jit_cpudetect(lua_State *L)
538 uint32_t features[4]; 549 uint32_t features[4];
539 if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { 550 if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
540#if !LJ_HASJIT 551#if !LJ_HASJIT
541#define JIT_F_CMOV 1
542#define JIT_F_SSE2 2 552#define JIT_F_SSE2 2
543#endif 553#endif
544 flags |= ((features[3] >> 15)&1) * JIT_F_CMOV;
545 flags |= ((features[3] >> 26)&1) * JIT_F_SSE2; 554 flags |= ((features[3] >> 26)&1) * JIT_F_SSE2;
546#if LJ_HASJIT 555#if LJ_HASJIT
547 flags |= ((features[2] >> 0)&1) * JIT_F_SSE3; 556 flags |= ((features[2] >> 0)&1) * JIT_F_SSE3;
548 flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; 557 flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
549 if (vendor[2] == 0x6c65746e) { /* Intel. */ 558 if (vendor[2] == 0x6c65746e) { /* Intel. */
550 if ((features[0] & 0x0ff00f00) == 0x00000f00) /* P4. */ 559 if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */
551 flags |= JIT_F_P4; /* Currently unused. */
552 else if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */
553 flags |= JIT_F_LEA_AGU; 560 flags |= JIT_F_LEA_AGU;
554 } else if (vendor[2] == 0x444d4163) { /* AMD. */ 561 } else if (vendor[2] == 0x444d4163) { /* AMD. */
555 uint32_t fam = (features[0] & 0x0ff00f00); 562 uint32_t fam = (features[0] & 0x0ff00f00);
556 if (fam == 0x00000f00) /* K8. */
557 flags |= JIT_F_SPLIT_XMM;
558 if (fam >= 0x00000f00) /* K8, K10. */ 563 if (fam >= 0x00000f00) /* K8, K10. */
559 flags |= JIT_F_PREFER_IMUL; 564 flags |= JIT_F_PREFER_IMUL;
560 } 565 }
@@ -562,14 +567,8 @@ static uint32_t jit_cpudetect(lua_State *L)
562 } 567 }
563 /* Check for required instruction set support on x86 (unnecessary on x64). */ 568 /* Check for required instruction set support on x86 (unnecessary on x64). */
564#if LJ_TARGET_X86 569#if LJ_TARGET_X86
565#if !defined(LUAJIT_CPU_NOCMOV)
566 if (!(flags & JIT_F_CMOV))
567 luaL_error(L, "CPU not supported");
568#endif
569#if defined(LUAJIT_CPU_SSE2)
570 if (!(flags & JIT_F_SSE2)) 570 if (!(flags & JIT_F_SSE2))
571 luaL_error(L, "CPU does not support SSE2 (recompile without -DLUAJIT_CPU_SSE2)"); 571 luaL_error(L, "CPU with SSE2 required");
572#endif
573#endif 572#endif
574#elif LJ_TARGET_ARM 573#elif LJ_TARGET_ARM
575#if LJ_HASJIT 574#if LJ_HASJIT
@@ -631,11 +630,7 @@ static void jit_init(lua_State *L)
631 uint32_t flags = jit_cpudetect(L); 630 uint32_t flags = jit_cpudetect(L);
632#if LJ_HASJIT 631#if LJ_HASJIT
633 jit_State *J = L2J(L); 632 jit_State *J = L2J(L);
634#if LJ_TARGET_X86 633 J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
635 /* Silently turn off the JIT compiler on CPUs without SSE2. */
636 if ((flags & JIT_F_SSE2))
637#endif
638 J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
639 memcpy(J->param, jit_param_default, sizeof(J->param)); 634 memcpy(J->param, jit_param_default, sizeof(J->param));
640 lj_dispatch_update(G(L)); 635 lj_dispatch_update(G(L));
641#else 636#else
@@ -645,6 +640,7 @@ static void jit_init(lua_State *L)
645 640
646LUALIB_API int luaopen_jit(lua_State *L) 641LUALIB_API int luaopen_jit(lua_State *L)
647{ 642{
643 jit_init(L);
648 lua_pushliteral(L, LJ_OS_NAME); 644 lua_pushliteral(L, LJ_OS_NAME);
649 lua_pushliteral(L, LJ_ARCH_NAME); 645 lua_pushliteral(L, LJ_ARCH_NAME);
650 lua_pushinteger(L, LUAJIT_VERSION_NUM); 646 lua_pushinteger(L, LUAJIT_VERSION_NUM);
@@ -657,7 +653,6 @@ LUALIB_API int luaopen_jit(lua_State *L)
657 LJ_LIB_REG(L, "jit.opt", jit_opt); 653 LJ_LIB_REG(L, "jit.opt", jit_opt);
658#endif 654#endif
659 L->top -= 2; 655 L->top -= 2;
660 jit_init(L);
661 return 1; 656 return 1;
662} 657}
663 658
diff --git a/src/lib_math.c b/src/lib_math.c
index b23d9a2d..e474f980 100644
--- a/src/lib_math.c
+++ b/src/lib_math.c
@@ -63,11 +63,8 @@ LJLIB_ASM(math_log) LJLIB_REC(math_log)
63 return FFH_RETRY; 63 return FFH_RETRY;
64} 64}
65 65
66LJLIB_PUSH(57.29577951308232) 66LJLIB_LUA(math_deg) /* function(x) return x * 57.29577951308232 end */
67LJLIB_ASM_(math_deg) LJLIB_REC(math_degrad) 67LJLIB_LUA(math_rad) /* function(x) return x * 0.017453292519943295 end */
68
69LJLIB_PUSH(0.017453292519943295)
70LJLIB_ASM_(math_rad) LJLIB_REC(math_degrad)
71 68
72LJLIB_ASM(math_atan2) LJLIB_REC(.) 69LJLIB_ASM(math_atan2) LJLIB_REC(.)
73{ 70{
diff --git a/src/lib_os.c b/src/lib_os.c
index 0a784129..de2bc623 100644
--- a/src/lib_os.c
+++ b/src/lib_os.c
@@ -18,7 +18,10 @@
18#include "lualib.h" 18#include "lualib.h"
19 19
20#include "lj_obj.h" 20#include "lj_obj.h"
21#include "lj_gc.h"
21#include "lj_err.h" 22#include "lj_err.h"
23#include "lj_buf.h"
24#include "lj_str.h"
22#include "lj_lib.h" 25#include "lj_lib.h"
23 26
24#if LJ_TARGET_POSIX 27#if LJ_TARGET_POSIX
@@ -185,7 +188,7 @@ LJLIB_CF(os_date)
185#endif 188#endif
186 } 189 }
187 if (stm == NULL) { /* Invalid date? */ 190 if (stm == NULL) { /* Invalid date? */
188 setnilV(L->top-1); 191 setnilV(L->top++);
189 } else if (strcmp(s, "*t") == 0) { 192 } else if (strcmp(s, "*t") == 0) {
190 lua_createtable(L, 0, 9); /* 9 = number of fields */ 193 lua_createtable(L, 0, 9); /* 9 = number of fields */
191 setfield(L, "sec", stm->tm_sec); 194 setfield(L, "sec", stm->tm_sec);
@@ -197,23 +200,25 @@ LJLIB_CF(os_date)
197 setfield(L, "wday", stm->tm_wday+1); 200 setfield(L, "wday", stm->tm_wday+1);
198 setfield(L, "yday", stm->tm_yday+1); 201 setfield(L, "yday", stm->tm_yday+1);
199 setboolfield(L, "isdst", stm->tm_isdst); 202 setboolfield(L, "isdst", stm->tm_isdst);
200 } else { 203 } else if (*s) {
201 char cc[3]; 204 SBuf *sb = &G(L)->tmpbuf;
202 luaL_Buffer b; 205 MSize sz = 0;
203 cc[0] = '%'; cc[2] = '\0'; 206 const char *q;
204 luaL_buffinit(L, &b); 207 for (q = s; *q; q++)
205 for (; *s; s++) { 208 sz += (*q == '%') ? 30 : 1; /* Overflow doesn't matter. */
206 if (*s != '%' || *(s + 1) == '\0') { /* No conversion specifier? */ 209 setsbufL(sb, L);
207 luaL_addchar(&b, *s); 210 for (;;) {
208 } else { 211 char *buf = lj_buf_need(sb, sz);
209 size_t reslen; 212 size_t len = strftime(buf, sbufsz(sb), s, stm);
210 char buff[200]; /* Should be big enough for any conversion result. */ 213 if (len) {
211 cc[1] = *(++s); 214 setstrV(L, L->top++, lj_str_new(L, buf, len));
212 reslen = strftime(buff, sizeof(buff), cc, stm); 215 lj_gc_check(L);
213 luaL_addlstring(&b, buff, reslen); 216 break;
214 } 217 }
218 sz += (sz|1);
215 } 219 }
216 luaL_pushresult(&b); 220 } else {
221 setstrV(L, L->top++, &G(L)->strempty);
217 } 222 }
218 return 1; 223 return 1;
219} 224}
diff --git a/src/lib_string.c b/src/lib_string.c
index 9e8ab900..204f6975 100644
--- a/src/lib_string.c
+++ b/src/lib_string.c
@@ -6,8 +6,6 @@
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h 6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/ 7*/
8 8
9#include <stdio.h>
10
11#define lib_string_c 9#define lib_string_c
12#define LUA_LIB 10#define LUA_LIB
13 11
@@ -18,6 +16,7 @@
18#include "lj_obj.h" 16#include "lj_obj.h"
19#include "lj_gc.h" 17#include "lj_gc.h"
20#include "lj_err.h" 18#include "lj_err.h"
19#include "lj_buf.h"
21#include "lj_str.h" 20#include "lj_str.h"
22#include "lj_tab.h" 21#include "lj_tab.h"
23#include "lj_meta.h" 22#include "lj_meta.h"
@@ -25,17 +24,19 @@
25#include "lj_ff.h" 24#include "lj_ff.h"
26#include "lj_bcdump.h" 25#include "lj_bcdump.h"
27#include "lj_char.h" 26#include "lj_char.h"
27#include "lj_strfmt.h"
28#include "lj_lib.h" 28#include "lj_lib.h"
29 29
30/* ------------------------------------------------------------------------ */ 30/* ------------------------------------------------------------------------ */
31 31
32#define LJLIB_MODULE_string 32#define LJLIB_MODULE_string
33 33
34LJLIB_ASM(string_len) LJLIB_REC(.) 34LJLIB_LUA(string_len) /*
35{ 35 function(s)
36 lj_lib_checkstr(L, 1); 36 CHECK_str(s)
37 return FFH_RETRY; 37 return #s
38} 38 end
39*/
39 40
40LJLIB_ASM(string_byte) LJLIB_REC(string_range 0) 41LJLIB_ASM(string_byte) LJLIB_REC(string_range 0)
41{ 42{
@@ -61,10 +62,10 @@ LJLIB_ASM(string_byte) LJLIB_REC(string_range 0)
61 return FFH_RES(n); 62 return FFH_RES(n);
62} 63}
63 64
64LJLIB_ASM(string_char) 65LJLIB_ASM(string_char) LJLIB_REC(.)
65{ 66{
66 int i, nargs = (int)(L->top - L->base); 67 int i, nargs = (int)(L->top - L->base);
67 char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, (size_t)nargs); 68 char *buf = lj_buf_tmp(L, (size_t)nargs);
68 for (i = 1; i <= nargs; i++) { 69 for (i = 1; i <= nargs; i++) {
69 int32_t k = lj_lib_checkint(L, i); 70 int32_t k = lj_lib_checkint(L, i);
70 if (!checku8(k)) 71 if (!checku8(k))
@@ -83,68 +84,38 @@ LJLIB_ASM(string_sub) LJLIB_REC(string_range 1)
83 return FFH_RETRY; 84 return FFH_RETRY;
84} 85}
85 86
86LJLIB_ASM(string_rep) 87LJLIB_CF(string_rep) LJLIB_REC(.)
87{ 88{
88 GCstr *s = lj_lib_checkstr(L, 1); 89 GCstr *s = lj_lib_checkstr(L, 1);
89 int32_t k = lj_lib_checkint(L, 2); 90 int32_t rep = lj_lib_checkint(L, 2);
90 GCstr *sep = lj_lib_optstr(L, 3); 91 GCstr *sep = lj_lib_optstr(L, 3);
91 int32_t len = (int32_t)s->len; 92 SBuf *sb = lj_buf_tmp_(L);
92 global_State *g = G(L); 93 if (sep && rep > 1) {
93 int64_t tlen; 94 GCstr *s2 = lj_buf_cat2str(L, sep, s);
94 const char *src; 95 lj_buf_reset(sb);
95 char *buf; 96 lj_buf_putstr(sb, s);
96 if (k <= 0) { 97 s = s2;
97 empty: 98 rep--;
98 setstrV(L, L->base-1, &g->strempty);
99 return FFH_RES(1);
100 } 99 }
101 if (sep) { 100 sb = lj_buf_putstr_rep(sb, s, rep);
102 tlen = (int64_t)len + sep->len; 101 setstrV(L, L->top-1, lj_buf_str(L, sb));
103 if (tlen > LJ_MAX_STR) 102 lj_gc_check(L);
104 lj_err_caller(L, LJ_ERR_STROV); 103 return 1;
105 tlen *= k;
106 if (tlen > LJ_MAX_STR)
107 lj_err_caller(L, LJ_ERR_STROV);
108 } else {
109 tlen = (int64_t)k * len;
110 if (tlen > LJ_MAX_STR)
111 lj_err_caller(L, LJ_ERR_STROV);
112 }
113 if (tlen == 0) goto empty;
114 buf = lj_str_needbuf(L, &g->tmpbuf, (MSize)tlen);
115 src = strdata(s);
116 if (sep) {
117 tlen -= sep->len; /* Ignore trailing separator. */
118 if (k > 1) { /* Paste one string and one separator. */
119 int32_t i;
120 i = 0; while (i < len) *buf++ = src[i++];
121 src = strdata(sep); len = sep->len;
122 i = 0; while (i < len) *buf++ = src[i++];
123 src = g->tmpbuf.buf; len += s->len; k--; /* Now copy that k-1 times. */
124 }
125 }
126 do {
127 int32_t i = 0;
128 do { *buf++ = src[i++]; } while (i < len);
129 } while (--k > 0);
130 setstrV(L, L->base-1, lj_str_new(L, g->tmpbuf.buf, (size_t)tlen));
131 return FFH_RES(1);
132} 104}
133 105
134LJLIB_ASM(string_reverse) 106LJLIB_ASM(string_reverse) LJLIB_REC(string_op IRCALL_lj_buf_putstr_reverse)
135{ 107{
136 GCstr *s = lj_lib_checkstr(L, 1); 108 lj_lib_checkstr(L, 1);
137 lj_str_needbuf(L, &G(L)->tmpbuf, s->len);
138 return FFH_RETRY; 109 return FFH_RETRY;
139} 110}
140LJLIB_ASM_(string_lower) 111LJLIB_ASM_(string_lower) LJLIB_REC(string_op IRCALL_lj_buf_putstr_lower)
141LJLIB_ASM_(string_upper) 112LJLIB_ASM_(string_upper) LJLIB_REC(string_op IRCALL_lj_buf_putstr_upper)
142 113
143/* ------------------------------------------------------------------------ */ 114/* ------------------------------------------------------------------------ */
144 115
145static int writer_buf(lua_State *L, const void *p, size_t size, void *b) 116static int writer_buf(lua_State *L, const void *p, size_t size, void *sb)
146{ 117{
147 luaL_addlstring((luaL_Buffer *)b, (const char *)p, size); 118 lj_buf_putmem((SBuf *)sb, p, (MSize)size);
148 UNUSED(L); 119 UNUSED(L);
149 return 0; 120 return 0;
150} 121}
@@ -153,12 +124,12 @@ LJLIB_CF(string_dump)
153{ 124{
154 GCfunc *fn = lj_lib_checkfunc(L, 1); 125 GCfunc *fn = lj_lib_checkfunc(L, 1);
155 int strip = L->base+1 < L->top && tvistruecond(L->base+1); 126 int strip = L->base+1 < L->top && tvistruecond(L->base+1);
156 luaL_Buffer b; 127 SBuf *sb = lj_buf_tmp_(L); /* Assumes lj_bcwrite() doesn't use tmpbuf. */
157 L->top = L->base+1; 128 L->top = L->base+1;
158 luaL_buffinit(L, &b); 129 if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, sb, strip))
159 if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, &b, strip))
160 lj_err_caller(L, LJ_ERR_STRDUMP); 130 lj_err_caller(L, LJ_ERR_STRDUMP);
161 luaL_pushresult(&b); 131 setstrV(L, L->top-1, lj_buf_str(L, sb));
132 lj_gc_check(L);
162 return 1; 133 return 1;
163} 134}
164 135
@@ -183,7 +154,6 @@ typedef struct MatchState {
183} MatchState; 154} MatchState;
184 155
185#define L_ESC '%' 156#define L_ESC '%'
186#define SPECIALS "^$*+?.([%-"
187 157
188static int check_capture(MatchState *ms, int l) 158static int check_capture(MatchState *ms, int l)
189{ 159{
@@ -450,30 +420,6 @@ static const char *match(MatchState *ms, const char *s, const char *p)
450 return s; 420 return s;
451} 421}
452 422
453static const char *lmemfind(const char *s1, size_t l1,
454 const char *s2, size_t l2)
455{
456 if (l2 == 0) {
457 return s1; /* empty strings are everywhere */
458 } else if (l2 > l1) {
459 return NULL; /* avoids a negative `l1' */
460 } else {
461 const char *init; /* to search for a `*s2' inside `s1' */
462 l2--; /* 1st char will be checked by `memchr' */
463 l1 = l1-l2; /* `s2' cannot be found after that */
464 while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) {
465 init++; /* 1st char is already checked */
466 if (memcmp(init, s2+1, l2) == 0) {
467 return init-1;
468 } else { /* correct `l1' and `s1' to try again */
469 l1 -= (size_t)(init-s1);
470 s1 = init;
471 }
472 }
473 return NULL; /* not found */
474 }
475}
476
477static void push_onecapture(MatchState *ms, int i, const char *s, const char *e) 423static void push_onecapture(MatchState *ms, int i, const char *s, const char *e)
478{ 424{
479 if (i >= ms->level) { 425 if (i >= ms->level) {
@@ -501,64 +447,60 @@ static int push_captures(MatchState *ms, const char *s, const char *e)
501 return nlevels; /* number of strings pushed */ 447 return nlevels; /* number of strings pushed */
502} 448}
503 449
504static ptrdiff_t posrelat(ptrdiff_t pos, size_t len)
505{
506 /* relative string position: negative means back from end */
507 if (pos < 0) pos += (ptrdiff_t)len + 1;
508 return (pos >= 0) ? pos : 0;
509}
510
511static int str_find_aux(lua_State *L, int find) 450static int str_find_aux(lua_State *L, int find)
512{ 451{
513 size_t l1, l2; 452 GCstr *s = lj_lib_checkstr(L, 1);
514 const char *s = luaL_checklstring(L, 1, &l1); 453 GCstr *p = lj_lib_checkstr(L, 2);
515 const char *p = luaL_checklstring(L, 2, &l2); 454 int32_t start = lj_lib_optint(L, 3, 1);
516 ptrdiff_t init = posrelat(luaL_optinteger(L, 3, 1), l1) - 1; 455 MSize st;
517 if (init < 0) { 456 if (start < 0) start += (int32_t)s->len; else start--;
518 init = 0; 457 if (start < 0) start = 0;
519 } else if ((size_t)(init) > l1) { 458 st = (MSize)start;
459 if (st > s->len) {
520#if LJ_52 460#if LJ_52
521 setnilV(L->top-1); 461 setnilV(L->top-1);
522 return 1; 462 return 1;
523#else 463#else
524 init = (ptrdiff_t)l1; 464 st = s->len;
525#endif 465#endif
526 } 466 }
527 if (find && (lua_toboolean(L, 4) || /* explicit request? */ 467 if (find && ((L->base+3 < L->top && tvistruecond(L->base+3)) ||
528 strpbrk(p, SPECIALS) == NULL)) { /* or no special characters? */ 468 !lj_str_haspattern(p))) { /* Search for fixed string. */
529 /* do a plain search */ 469 const char *q = lj_str_find(strdata(s)+st, strdata(p), s->len-st, p->len);
530 const char *s2 = lmemfind(s+init, l1-(size_t)init, p, l2); 470 if (q) {
531 if (s2) { 471 setintV(L->top-2, (int32_t)(q-strdata(s)) + 1);
532 lua_pushinteger(L, s2-s+1); 472 setintV(L->top-1, (int32_t)(q-strdata(s)) + (int32_t)p->len);
533 lua_pushinteger(L, s2-s+(ptrdiff_t)l2);
534 return 2; 473 return 2;
535 } 474 }
536 } else { 475 } else { /* Search for pattern. */
537 MatchState ms; 476 MatchState ms;
538 int anchor = (*p == '^') ? (p++, 1) : 0; 477 const char *pstr = strdata(p);
539 const char *s1=s+init; 478 const char *sstr = strdata(s) + st;
479 int anchor = 0;
480 if (*pstr == '^') { pstr++; anchor = 1; }
540 ms.L = L; 481 ms.L = L;
541 ms.src_init = s; 482 ms.src_init = strdata(s);
542 ms.src_end = s+l1; 483 ms.src_end = strdata(s) + s->len;
543 do { 484 do { /* Loop through string and try to match the pattern. */
544 const char *res; 485 const char *q;
545 ms.level = ms.depth = 0; 486 ms.level = ms.depth = 0;
546 if ((res=match(&ms, s1, p)) != NULL) { 487 q = match(&ms, sstr, pstr);
488 if (q) {
547 if (find) { 489 if (find) {
548 lua_pushinteger(L, s1-s+1); /* start */ 490 setintV(L->top++, (int32_t)(sstr-(strdata(s)-1)));
549 lua_pushinteger(L, res-s); /* end */ 491 setintV(L->top++, (int32_t)(q-strdata(s)));
550 return push_captures(&ms, NULL, 0) + 2; 492 return push_captures(&ms, NULL, NULL) + 2;
551 } else { 493 } else {
552 return push_captures(&ms, s1, res); 494 return push_captures(&ms, sstr, q);
553 } 495 }
554 } 496 }
555 } while (s1++ < ms.src_end && !anchor); 497 } while (sstr++ < ms.src_end && !anchor);
556 } 498 }
557 lua_pushnil(L); /* not found */ 499 setnilV(L->top-1); /* Not found. */
558 return 1; 500 return 1;
559} 501}
560 502
561LJLIB_CF(string_find) 503LJLIB_CF(string_find) LJLIB_REC(.)
562{ 504{
563 return str_find_aux(L, 1); 505 return str_find_aux(L, 1);
564} 506}
@@ -698,221 +640,92 @@ LJLIB_CF(string_gsub)
698 640
699/* ------------------------------------------------------------------------ */ 641/* ------------------------------------------------------------------------ */
700 642
701/* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */ 643/* Emulate tostring() inline. */
702#define MAX_FMTITEM 512 644static GCstr *string_fmt_tostring(lua_State *L, int arg, int retry)
703/* valid flags in a format specification */
704#define FMT_FLAGS "-+ #0"
705/*
706** maximum size of each format specification (such as '%-099.99d')
707** (+10 accounts for %99.99x plus margin of error)
708*/
709#define MAX_FMTSPEC (sizeof(FMT_FLAGS) + sizeof(LUA_INTFRMLEN) + 10)
710
711static void addquoted(lua_State *L, luaL_Buffer *b, int arg)
712{
713 GCstr *str = lj_lib_checkstr(L, arg);
714 int32_t len = (int32_t)str->len;
715 const char *s = strdata(str);
716 luaL_addchar(b, '"');
717 while (len--) {
718 uint32_t c = uchar(*s);
719 if (c == '"' || c == '\\' || c == '\n') {
720 luaL_addchar(b, '\\');
721 } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */
722 uint32_t d;
723 luaL_addchar(b, '\\');
724 if (c >= 100 || lj_char_isdigit(uchar(s[1]))) {
725 luaL_addchar(b, '0'+(c >= 100)); if (c >= 100) c -= 100;
726 goto tens;
727 } else if (c >= 10) {
728 tens:
729 d = (c * 205) >> 11; c -= d * 10; luaL_addchar(b, '0'+d);
730 }
731 c += '0';
732 }
733 luaL_addchar(b, c);
734 s++;
735 }
736 luaL_addchar(b, '"');
737}
738
739static const char *scanformat(lua_State *L, const char *strfrmt, char *form)
740{
741 const char *p = strfrmt;
742 while (*p != '\0' && strchr(FMT_FLAGS, *p) != NULL) p++; /* skip flags */
743 if ((size_t)(p - strfrmt) >= sizeof(FMT_FLAGS))
744 lj_err_caller(L, LJ_ERR_STRFMTR);
745 if (lj_char_isdigit(uchar(*p))) p++; /* skip width */
746 if (lj_char_isdigit(uchar(*p))) p++; /* (2 digits at most) */
747 if (*p == '.') {
748 p++;
749 if (lj_char_isdigit(uchar(*p))) p++; /* skip precision */
750 if (lj_char_isdigit(uchar(*p))) p++; /* (2 digits at most) */
751 }
752 if (lj_char_isdigit(uchar(*p)))
753 lj_err_caller(L, LJ_ERR_STRFMTW);
754 *(form++) = '%';
755 strncpy(form, strfrmt, (size_t)(p - strfrmt + 1));
756 form += p - strfrmt + 1;
757 *form = '\0';
758 return p;
759}
760
761static void addintlen(char *form)
762{
763 size_t l = strlen(form);
764 char spec = form[l - 1];
765 strcpy(form + l - 1, LUA_INTFRMLEN);
766 form[l + sizeof(LUA_INTFRMLEN) - 2] = spec;
767 form[l + sizeof(LUA_INTFRMLEN) - 1] = '\0';
768}
769
770static unsigned LUA_INTFRM_T num2intfrm(lua_State *L, int arg)
771{
772 if (sizeof(LUA_INTFRM_T) == 4) {
773 return (LUA_INTFRM_T)lj_lib_checkbit(L, arg);
774 } else {
775 cTValue *o;
776 lj_lib_checknumber(L, arg);
777 o = L->base+arg-1;
778 if (tvisint(o))
779 return (LUA_INTFRM_T)intV(o);
780 else
781 return (LUA_INTFRM_T)numV(o);
782 }
783}
784
785static unsigned LUA_INTFRM_T num2uintfrm(lua_State *L, int arg)
786{
787 if (sizeof(LUA_INTFRM_T) == 4) {
788 return (unsigned LUA_INTFRM_T)lj_lib_checkbit(L, arg);
789 } else {
790 cTValue *o;
791 lj_lib_checknumber(L, arg);
792 o = L->base+arg-1;
793 if (tvisint(o))
794 return (unsigned LUA_INTFRM_T)intV(o);
795 else if ((int32_t)o->u32.hi < 0)
796 return (unsigned LUA_INTFRM_T)(LUA_INTFRM_T)numV(o);
797 else
798 return (unsigned LUA_INTFRM_T)numV(o);
799 }
800}
801
802static GCstr *meta_tostring(lua_State *L, int arg)
803{ 645{
804 TValue *o = L->base+arg-1; 646 TValue *o = L->base+arg-1;
805 cTValue *mo; 647 cTValue *mo;
806 lua_assert(o < L->top); /* Caller already checks for existence. */ 648 lua_assert(o < L->top); /* Caller already checks for existence. */
807 if (LJ_LIKELY(tvisstr(o))) 649 if (LJ_LIKELY(tvisstr(o)))
808 return strV(o); 650 return strV(o);
809 if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { 651 if (retry != 2 && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
810 copyTV(L, L->top++, mo); 652 copyTV(L, L->top++, mo);
811 copyTV(L, L->top++, o); 653 copyTV(L, L->top++, o);
812 lua_call(L, 1, 1); 654 lua_call(L, 1, 1);
813 L->top--; 655 copyTV(L, L->base+arg-1, --L->top);
814 if (tvisstr(L->top)) 656 return NULL; /* Buffer may be overwritten, retry. */
815 return strV(L->top);
816 o = L->base+arg-1;
817 copyTV(L, o, L->top);
818 }
819 if (tvisnumber(o)) {
820 return lj_str_fromnumber(L, o);
821 } else if (tvisnil(o)) {
822 return lj_str_newlit(L, "nil");
823 } else if (tvisfalse(o)) {
824 return lj_str_newlit(L, "false");
825 } else if (tvistrue(o)) {
826 return lj_str_newlit(L, "true");
827 } else {
828 if (tvisfunc(o) && isffunc(funcV(o)))
829 lj_str_pushf(L, "function: builtin#%d", funcV(o)->c.ffid);
830 else
831 lj_str_pushf(L, "%s: %p", lj_typename(o), lua_topointer(L, arg));
832 L->top--;
833 return strV(L->top);
834 } 657 }
835} 658 return lj_strfmt_obj(L, o);
836 659}
837LJLIB_CF(string_format) 660
838{ 661LJLIB_CF(string_format) LJLIB_REC(.)
839 int arg = 1, top = (int)(L->top - L->base); 662{
840 GCstr *fmt = lj_lib_checkstr(L, arg); 663 int arg, top = (int)(L->top - L->base);
841 const char *strfrmt = strdata(fmt); 664 GCstr *fmt;
842 const char *strfrmt_end = strfrmt + fmt->len; 665 SBuf *sb;
843 luaL_Buffer b; 666 FormatState fs;
844 luaL_buffinit(L, &b); 667 SFormat sf;
845 while (strfrmt < strfrmt_end) { 668 int retry = 0;
846 if (*strfrmt != L_ESC) { 669again:
847 luaL_addchar(&b, *strfrmt++); 670 arg = 1;
848 } else if (*++strfrmt == L_ESC) { 671 sb = lj_buf_tmp_(L);
849 luaL_addchar(&b, *strfrmt++); /* %% */ 672 fmt = lj_lib_checkstr(L, arg);
850 } else { /* format item */ 673 lj_strfmt_init(&fs, strdata(fmt), fmt->len);
851 char form[MAX_FMTSPEC]; /* to store the format (`%...') */ 674 while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
852 char buff[MAX_FMTITEM]; /* to store the formatted item */ 675 if (sf == STRFMT_LIT) {
676 lj_buf_putmem(sb, fs.str, fs.len);
677 } else if (sf == STRFMT_ERR) {
678 lj_err_callerv(L, LJ_ERR_STRFMT, strdata(lj_str_new(L, fs.str, fs.len)));
679 } else {
853 if (++arg > top) 680 if (++arg > top)
854 luaL_argerror(L, arg, lj_obj_typename[0]); 681 luaL_argerror(L, arg, lj_obj_typename[0]);
855 strfrmt = scanformat(L, strfrmt, form); 682 switch (STRFMT_TYPE(sf)) {
856 switch (*strfrmt++) { 683 case STRFMT_INT:
857 case 'c': 684 if (tvisint(L->base+arg-1)) {
858 sprintf(buff, form, lj_lib_checkint(L, arg)); 685 int32_t k = intV(L->base+arg-1);
686 if (sf == STRFMT_INT)
687 lj_strfmt_putint(sb, k); /* Shortcut for plain %d. */
688 else
689 lj_strfmt_putfxint(sb, sf, k);
690 } else {
691 lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg));
692 }
859 break; 693 break;
860 case 'd': case 'i': 694 case STRFMT_UINT:
861 addintlen(form); 695 if (tvisint(L->base+arg-1))
862 sprintf(buff, form, num2intfrm(L, arg)); 696 lj_strfmt_putfxint(sb, sf, intV(L->base+arg-1));
697 else
698 lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg));
863 break; 699 break;
864 case 'o': case 'u': case 'x': case 'X': 700 case STRFMT_NUM:
865 addintlen(form); 701 lj_strfmt_putfnum(sb, sf, lj_lib_checknum(L, arg));
866 sprintf(buff, form, num2uintfrm(L, arg));
867 break; 702 break;
868 case 'e': case 'E': case 'f': case 'g': case 'G': case 'a': case 'A': { 703 case STRFMT_STR: {
869 TValue tv; 704 GCstr *str = string_fmt_tostring(L, arg, retry);
870 tv.n = lj_lib_checknum(L, arg); 705 if (str == NULL)
871 if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) { 706 retry = 1;
872 /* Canonicalize output of non-finite values. */ 707 else if ((sf & STRFMT_T_QUOTED))
873 char *p, nbuf[LJ_STR_NUMBUF]; 708 lj_strfmt_putquoted(sb, str); /* No formatting. */
874 size_t len = lj_str_bufnum(nbuf, &tv); 709 else
875 if (strfrmt[-1] < 'a') { 710 lj_strfmt_putfstr(sb, sf, str);
876 nbuf[len-3] = nbuf[len-3] - 0x20;
877 nbuf[len-2] = nbuf[len-2] - 0x20;
878 nbuf[len-1] = nbuf[len-1] - 0x20;
879 }
880 nbuf[len] = '\0';
881 for (p = form; *p < 'A' && *p != '.'; p++) ;
882 *p++ = 's'; *p = '\0';
883 sprintf(buff, form, nbuf);
884 break;
885 }
886 sprintf(buff, form, (double)tv.n);
887 break; 711 break;
888 } 712 }
889 case 'q': 713 case STRFMT_CHAR:
890 addquoted(L, &b, arg); 714 lj_strfmt_putfchar(sb, sf, lj_lib_checkint(L, arg));
891 continue; 715 break;
892 case 'p': 716 case STRFMT_PTR: /* No formatting. */
893 lj_str_pushf(L, "%p", lua_topointer(L, arg)); 717 setsbufP(sb, lj_strfmt_wptr(lj_buf_more(sb, STRFMT_MAXBUF_PTR),
894 luaL_addvalue(&b); 718 lj_obj_ptr(L->base+arg-1)));
895 continue;
896 case 's': {
897 GCstr *str = meta_tostring(L, arg);
898 if (!strchr(form, '.') && str->len >= 100) {
899 /* no precision and string is too long to be formatted;
900 keep original string */
901 setstrV(L, L->top++, str);
902 luaL_addvalue(&b);
903 continue;
904 }
905 sprintf(buff, form, strdata(str));
906 break; 719 break;
907 }
908 default: 720 default:
909 lj_err_callerv(L, LJ_ERR_STRFMTO, *(strfrmt -1)); 721 lua_assert(0);
910 break; 722 break;
911 } 723 }
912 luaL_addlstring(&b, buff, strlen(buff));
913 } 724 }
914 } 725 }
915 luaL_pushresult(&b); 726 if (retry++ == 1) goto again;
727 setstrV(L, L->top-1, lj_buf_str(L, sb));
728 lj_gc_check(L);
916 return 1; 729 return 1;
917} 730}
918 731
diff --git a/src/lib_table.c b/src/lib_table.c
index 542ed1f8..d7df8399 100644
--- a/src/lib_table.c
+++ b/src/lib_table.c
@@ -16,6 +16,7 @@
16#include "lj_obj.h" 16#include "lj_obj.h"
17#include "lj_gc.h" 17#include "lj_gc.h"
18#include "lj_err.h" 18#include "lj_err.h"
19#include "lj_buf.h"
19#include "lj_tab.h" 20#include "lj_tab.h"
20#include "lj_lib.h" 21#include "lj_lib.h"
21 22
@@ -23,50 +24,34 @@
23 24
24#define LJLIB_MODULE_table 25#define LJLIB_MODULE_table
25 26
26LJLIB_CF(table_foreachi) 27LJLIB_LUA(table_foreachi) /*
27{ 28 function(t, f)
28 GCtab *t = lj_lib_checktab(L, 1); 29 CHECK_tab(t)
29 GCfunc *func = lj_lib_checkfunc(L, 2); 30 CHECK_func(f)
30 MSize i, n = lj_tab_len(t); 31 for i=1,#t do
31 for (i = 1; i <= n; i++) { 32 local r = f(i, t[i])
32 cTValue *val; 33 if r ~= nil then return r end
33 setfuncV(L, L->top, func); 34 end
34 setintV(L->top+1, i); 35 end
35 val = lj_tab_getint(t, (int32_t)i); 36*/
36 if (val) { copyTV(L, L->top+2, val); } else { setnilV(L->top+2); }
37 L->top += 3;
38 lua_call(L, 2, 1);
39 if (!tvisnil(L->top-1))
40 return 1;
41 L->top--;
42 }
43 return 0;
44}
45 37
46LJLIB_CF(table_foreach) 38LJLIB_LUA(table_foreach) /*
47{ 39 function(t, f)
48 GCtab *t = lj_lib_checktab(L, 1); 40 CHECK_tab(t)
49 GCfunc *func = lj_lib_checkfunc(L, 2); 41 CHECK_func(f)
50 L->top = L->base+3; 42 for k, v in PAIRS(t) do
51 setnilV(L->top-1); 43 local r = f(k, v)
52 while (lj_tab_next(L, t, L->top-1)) { 44 if r ~= nil then return r end
53 copyTV(L, L->top+2, L->top); 45 end
54 copyTV(L, L->top+1, L->top-1); 46 end
55 setfuncV(L, L->top, func); 47*/
56 L->top += 3;
57 lua_call(L, 2, 1);
58 if (!tvisnil(L->top-1))
59 return 1;
60 L->top--;
61 }
62 return 0;
63}
64 48
65LJLIB_ASM(table_getn) LJLIB_REC(.) 49LJLIB_LUA(table_getn) /*
66{ 50 function(t)
67 lj_lib_checktab(L, 1); 51 CHECK_tab(t)
68 return FFH_UNREACHABLE; 52 return #t
69} 53 end
54*/
70 55
71LJLIB_CF(table_maxn) 56LJLIB_CF(table_maxn)
72{ 57{
@@ -119,52 +104,47 @@ LJLIB_CF(table_insert) LJLIB_REC(.)
119 return 0; 104 return 0;
120} 105}
121 106
122LJLIB_CF(table_remove) LJLIB_REC(.) 107LJLIB_LUA(table_remove) /*
123{ 108 function(t, pos)
124 GCtab *t = lj_lib_checktab(L, 1); 109 CHECK_tab(t)
125 int32_t e = (int32_t)lj_tab_len(t); 110 local len = #t
126 int32_t pos = lj_lib_optint(L, 2, e); 111 if pos == nil then
127 if (!(1 <= pos && pos <= e)) /* Nothing to remove? */ 112 if len ~= 0 then
128 return 0; 113 local old = t[len]
129 lua_rawgeti(L, 1, pos); /* Get previous value. */ 114 t[len] = nil
130 /* NOBARRIER: This just moves existing elements around. */ 115 return old
131 for (; pos < e; pos++) { 116 end
132 cTValue *src = lj_tab_getint(t, pos+1); 117 else
133 TValue *dst = lj_tab_setint(L, t, pos); 118 CHECK_int(pos)
134 if (src) { 119 if pos >= 1 and pos <= len then
135 copyTV(L, dst, src); 120 local old = t[pos]
136 } else { 121 for i=pos+1,len do
137 setnilV(dst); 122 t[i-1] = t[i]
138 } 123 end
139 } 124 t[len] = nil
140 setnilV(lj_tab_setint(L, t, e)); /* Remove (last) value. */ 125 return old
141 return 1; /* Return previous value. */ 126 end
142} 127 end
128 end
129*/
143 130
144LJLIB_CF(table_concat) 131LJLIB_CF(table_concat) LJLIB_REC(.)
145{ 132{
146 luaL_Buffer b;
147 GCtab *t = lj_lib_checktab(L, 1); 133 GCtab *t = lj_lib_checktab(L, 1);
148 GCstr *sep = lj_lib_optstr(L, 2); 134 GCstr *sep = lj_lib_optstr(L, 2);
149 MSize seplen = sep ? sep->len : 0;
150 int32_t i = lj_lib_optint(L, 3, 1); 135 int32_t i = lj_lib_optint(L, 3, 1);
151 int32_t e = (L->base+3 < L->top && !tvisnil(L->base+3)) ? 136 int32_t e = (L->base+3 < L->top && !tvisnil(L->base+3)) ?
152 lj_lib_checkint(L, 4) : (int32_t)lj_tab_len(t); 137 lj_lib_checkint(L, 4) : (int32_t)lj_tab_len(t);
153 luaL_buffinit(L, &b); 138 SBuf *sb = lj_buf_tmp_(L);
154 if (i <= e) { 139 SBuf *sbx = lj_buf_puttab(sb, t, sep, i, e);
155 for (;;) { 140 if (LJ_UNLIKELY(!sbx)) { /* Error: bad element type. */
156 cTValue *o; 141 int32_t idx = (int32_t)(intptr_t)sbufP(sb);
157 lua_rawgeti(L, 1, i); 142 cTValue *o = lj_tab_getint(t, idx);
158 o = L->top-1; 143 lj_err_callerv(L, LJ_ERR_TABCAT,
159 if (!(tvisstr(o) || tvisnumber(o))) 144 lj_obj_itypename[o ? itypemap(o) : ~LJ_TNIL], idx);
160 lj_err_callerv(L, LJ_ERR_TABCAT, lj_typename(o), i);
161 luaL_addvalue(&b);
162 if (i++ == e) break;
163 if (seplen)
164 luaL_addlstring(&b, strdata(sep), seplen);
165 }
166 } 145 }
167 luaL_pushresult(&b); 146 setstrV(L, L->top-1, lj_buf_str(L, sbx));
147 lj_gc_check(L);
168 return 1; 148 return 1;
169} 149}
170 150
diff --git a/src/lj_api.c b/src/lj_api.c
index edb2d620..8aaafdc5 100644
--- a/src/lj_api.c
+++ b/src/lj_api.c
@@ -24,6 +24,7 @@
24#include "lj_trace.h" 24#include "lj_trace.h"
25#include "lj_vm.h" 25#include "lj_vm.h"
26#include "lj_strscan.h" 26#include "lj_strscan.h"
27#include "lj_strfmt.h"
27 28
28/* -- Common helper functions --------------------------------------------- */ 29/* -- Common helper functions --------------------------------------------- */
29 30
@@ -434,7 +435,7 @@ LUA_API const char *lua_tolstring(lua_State *L, int idx, size_t *len)
434 } else if (tvisnumber(o)) { 435 } else if (tvisnumber(o)) {
435 lj_gc_check(L); 436 lj_gc_check(L);
436 o = index2adr(L, idx); /* GC may move the stack. */ 437 o = index2adr(L, idx); /* GC may move the stack. */
437 s = lj_str_fromnumber(L, o); 438 s = lj_strfmt_number(L, o);
438 setstrV(L, o, s); 439 setstrV(L, o, s);
439 } else { 440 } else {
440 if (len != NULL) *len = 0; 441 if (len != NULL) *len = 0;
@@ -453,7 +454,7 @@ LUALIB_API const char *luaL_checklstring(lua_State *L, int idx, size_t *len)
453 } else if (tvisnumber(o)) { 454 } else if (tvisnumber(o)) {
454 lj_gc_check(L); 455 lj_gc_check(L);
455 o = index2adr(L, idx); /* GC may move the stack. */ 456 o = index2adr(L, idx); /* GC may move the stack. */
456 s = lj_str_fromnumber(L, o); 457 s = lj_strfmt_number(L, o);
457 setstrV(L, o, s); 458 setstrV(L, o, s);
458 } else { 459 } else {
459 lj_err_argt(L, idx, LUA_TSTRING); 460 lj_err_argt(L, idx, LUA_TSTRING);
@@ -475,7 +476,7 @@ LUALIB_API const char *luaL_optlstring(lua_State *L, int idx,
475 } else if (tvisnumber(o)) { 476 } else if (tvisnumber(o)) {
476 lj_gc_check(L); 477 lj_gc_check(L);
477 o = index2adr(L, idx); /* GC may move the stack. */ 478 o = index2adr(L, idx); /* GC may move the stack. */
478 s = lj_str_fromnumber(L, o); 479 s = lj_strfmt_number(L, o);
479 setstrV(L, o, s); 480 setstrV(L, o, s);
480 } else { 481 } else {
481 lj_err_argt(L, idx, LUA_TSTRING); 482 lj_err_argt(L, idx, LUA_TSTRING);
@@ -507,7 +508,7 @@ LUA_API size_t lua_objlen(lua_State *L, int idx)
507 } else if (tvisudata(o)) { 508 } else if (tvisudata(o)) {
508 return udataV(o)->len; 509 return udataV(o)->len;
509 } else if (tvisnumber(o)) { 510 } else if (tvisnumber(o)) {
510 GCstr *s = lj_str_fromnumber(L, o); 511 GCstr *s = lj_strfmt_number(L, o);
511 setstrV(L, o, s); 512 setstrV(L, o, s);
512 return s->len; 513 return s->len;
513 } else { 514 } else {
@@ -545,17 +546,7 @@ LUA_API lua_State *lua_tothread(lua_State *L, int idx)
545 546
546LUA_API const void *lua_topointer(lua_State *L, int idx) 547LUA_API const void *lua_topointer(lua_State *L, int idx)
547{ 548{
548 cTValue *o = index2adr(L, idx); 549 return lj_obj_ptr(index2adr(L, idx));
549 if (tvisudata(o))
550 return uddata(udataV(o));
551 else if (tvislightud(o))
552 return lightudV(o);
553 else if (tviscdata(o))
554 return cdataptr(cdataV(o));
555 else if (tvisgcv(o))
556 return gcV(o);
557 else
558 return NULL;
559} 550}
560 551
561/* -- Stack setters (object creation) ------------------------------------- */ 552/* -- Stack setters (object creation) ------------------------------------- */
@@ -606,7 +597,7 @@ LUA_API const char *lua_pushvfstring(lua_State *L, const char *fmt,
606 va_list argp) 597 va_list argp)
607{ 598{
608 lj_gc_check(L); 599 lj_gc_check(L);
609 return lj_str_pushvf(L, fmt, argp); 600 return lj_strfmt_pushvf(L, fmt, argp);
610} 601}
611 602
612LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...) 603LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...)
@@ -615,7 +606,7 @@ LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...)
615 va_list argp; 606 va_list argp;
616 lj_gc_check(L); 607 lj_gc_check(L);
617 va_start(argp, fmt); 608 va_start(argp, fmt);
618 ret = lj_str_pushvf(L, fmt, argp); 609 ret = lj_strfmt_pushvf(L, fmt, argp);
619 va_end(argp); 610 va_end(argp);
620 return ret; 611 return ret;
621} 612}
diff --git a/src/lj_arch.h b/src/lj_arch.h
index 9ea10d0f..c5f2fb3d 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -227,6 +227,7 @@
227 227
228#elif LUAJIT_TARGET == LUAJIT_ARCH_PPCSPE 228#elif LUAJIT_TARGET == LUAJIT_ARCH_PPCSPE
229 229
230#error "The PPC/e500 port is broken and will be abandoned with LuaJIT 2.1"
230#define LJ_ARCH_NAME "ppcspe" 231#define LJ_ARCH_NAME "ppcspe"
231#define LJ_ARCH_BITS 32 232#define LJ_ARCH_BITS 32
232#define LJ_ARCH_ENDIAN LUAJIT_BE 233#define LJ_ARCH_ENDIAN LUAJIT_BE
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 316e81d6..a80d6adf 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -179,6 +179,12 @@ IRFLDEF(FLOFS)
179#error "Missing instruction emitter for target CPU" 179#error "Missing instruction emitter for target CPU"
180#endif 180#endif
181 181
182/* Generic load/store of register from/to stack slot. */
183#define emit_spload(as, ir, r, ofs) \
184 emit_loadofs(as, ir, (r), RID_SP, (ofs))
185#define emit_spstore(as, ir, r, ofs) \
186 emit_storeofs(as, ir, (r), RID_SP, (ofs))
187
182/* -- Register allocator debugging ---------------------------------------- */ 188/* -- Register allocator debugging ---------------------------------------- */
183 189
184/* #define LUAJIT_DEBUG_RA */ 190/* #define LUAJIT_DEBUG_RA */
@@ -943,44 +949,6 @@ static void asm_snap_prep(ASMState *as)
943 949
944/* -- Miscellaneous helpers ----------------------------------------------- */ 950/* -- Miscellaneous helpers ----------------------------------------------- */
945 951
946/* Collect arguments from CALL* and CARG instructions. */
947static void asm_collectargs(ASMState *as, IRIns *ir,
948 const CCallInfo *ci, IRRef *args)
949{
950 uint32_t n = CCI_NARGS(ci);
951 lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */
952 if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
953 while (n-- > 1) {
954 ir = IR(ir->op1);
955 lua_assert(ir->o == IR_CARG);
956 args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
957 }
958 args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
959 lua_assert(IR(ir->op1)->o != IR_CARG);
960}
961
962/* Reconstruct CCallInfo flags for CALLX*. */
963static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
964{
965 uint32_t nargs = 0;
966 if (ir->op1 != REF_NIL) { /* Count number of arguments first. */
967 IRIns *ira = IR(ir->op1);
968 nargs++;
969 while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
970 }
971#if LJ_HASFFI
972 if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */
973 CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
974 CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
975 nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
976#if LJ_TARGET_X86
977 nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
978#endif
979 }
980#endif
981 return (nargs | (ir->t.irt << CCI_OTSHIFT));
982}
983
984/* Calculate stack adjustment. */ 952/* Calculate stack adjustment. */
985static int32_t asm_stack_adjust(ASMState *as) 953static int32_t asm_stack_adjust(ASMState *as)
986{ 954{
@@ -1065,6 +1033,253 @@ static void asm_gcstep(ASMState *as, IRIns *ir)
1065 as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */ 1033 as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */
1066} 1034}
1067 1035
1036/* -- Buffer operations --------------------------------------------------- */
1037
1038static void asm_tvptr(ASMState *as, Reg dest, IRRef ref);
1039
1040static void asm_bufhdr(ASMState *as, IRIns *ir)
1041{
1042 Reg sb = ra_dest(as, ir, RSET_GPR);
1043 if ((ir->op2 & IRBUFHDR_APPEND)) {
1044 /* Rematerialize const buffer pointer instead of likely spill. */
1045 IRIns *irp = IR(ir->op1);
1046 if (!(ra_hasreg(irp->r) || irp == ir-1 ||
1047 (irp == ir-2 && !ra_used(ir-1)))) {
1048 while (!(irp->o == IR_BUFHDR && !(irp->op2 & IRBUFHDR_APPEND)))
1049 irp = IR(irp->op1);
1050 if (irref_isk(irp->op1)) {
1051 ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR));
1052 ir = irp;
1053 }
1054 }
1055 } else {
1056 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
1057 /* Passing ir isn't strictly correct, but it's an IRT_P32, too. */
1058 emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p));
1059 emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b));
1060 }
1061#if LJ_TARGET_X86ORX64
1062 ra_left(as, sb, ir->op1);
1063#else
1064 ra_leftov(as, sb, ir->op1);
1065#endif
1066}
1067
1068static void asm_bufput(ASMState *as, IRIns *ir)
1069{
1070 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr];
1071 IRRef args[3];
1072 IRIns *irs;
1073 int kchar = -1;
1074 args[0] = ir->op1; /* SBuf * */
1075 args[1] = ir->op2; /* GCstr * */
1076 irs = IR(ir->op2);
1077 lua_assert(irt_isstr(irs->t));
1078 if (irs->o == IR_KGC) {
1079 GCstr *s = ir_kstr(irs);
1080 if (s->len == 1) { /* Optimize put of single-char string constant. */
1081 kchar = strdata(s)[0];
1082 args[1] = ASMREF_TMP1; /* int, truncated to char */
1083 ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
1084 }
1085 } else if (mayfuse(as, ir->op2) && ra_noreg(irs->r)) {
1086 if (irs->o == IR_TOSTR) { /* Fuse number to string conversions. */
1087 if (irs->op2 == IRTOSTR_NUM) {
1088 args[1] = ASMREF_TMP1; /* TValue * */
1089 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum];
1090 } else {
1091 lua_assert(irt_isinteger(IR(irs->op1)->t));
1092 args[1] = irs->op1; /* int */
1093 if (irs->op2 == IRTOSTR_INT)
1094 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint];
1095 else
1096 ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
1097 }
1098 } else if (irs->o == IR_SNEW) { /* Fuse string allocation. */
1099 args[1] = irs->op1; /* const void * */
1100 args[2] = irs->op2; /* MSize */
1101 ci = &lj_ir_callinfo[IRCALL_lj_buf_putmem];
1102 }
1103 }
1104 asm_setupresult(as, ir, ci); /* SBuf * */
1105 asm_gencall(as, ci, args);
1106 if (args[1] == ASMREF_TMP1) {
1107 Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
1108 if (kchar == -1)
1109 asm_tvptr(as, tmp, irs->op1);
1110 else
1111 ra_allockreg(as, kchar, tmp);
1112 }
1113}
1114
1115static void asm_bufstr(ASMState *as, IRIns *ir)
1116{
1117 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr];
1118 IRRef args[1];
1119 args[0] = ir->op1; /* SBuf *sb */
1120 as->gcsteps++;
1121 asm_setupresult(as, ir, ci); /* GCstr * */
1122 asm_gencall(as, ci, args);
1123}
1124
1125/* -- Type conversions ---------------------------------------------------- */
1126
1127static void asm_tostr(ASMState *as, IRIns *ir)
1128{
1129 const CCallInfo *ci;
1130 IRRef args[2];
1131 args[0] = ASMREF_L;
1132 as->gcsteps++;
1133 if (ir->op2 == IRTOSTR_NUM) {
1134 args[1] = ASMREF_TMP1; /* cTValue * */
1135 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_num];
1136 } else {
1137 args[1] = ir->op1; /* int32_t k */
1138 if (ir->op2 == IRTOSTR_INT)
1139 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_int];
1140 else
1141 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_char];
1142 }
1143 asm_setupresult(as, ir, ci); /* GCstr * */
1144 asm_gencall(as, ci, args);
1145 if (ir->op2 == IRTOSTR_NUM)
1146 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
1147}
1148
1149#if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86
1150static void asm_conv64(ASMState *as, IRIns *ir)
1151{
1152 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
1153 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
1154 IRCallID id;
1155 IRRef args[2];
1156 lua_assert((ir-1)->o == IR_CONV && ir->o == IR_HIOP);
1157 args[LJ_BE] = (ir-1)->op1;
1158 args[LJ_LE] = ir->op1;
1159 if (st == IRT_NUM || st == IRT_FLOAT) {
1160 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
1161 ir--;
1162 } else {
1163 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
1164 }
1165 {
1166#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
1167 CCallInfo cim = lj_ir_callinfo[id], *ci = &cim;
1168 cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */
1169#else
1170 const CCallInfo *ci = &lj_ir_callinfo[id];
1171#endif
1172 asm_setupresult(as, ir, ci);
1173 asm_gencall(as, ci, args);
1174 }
1175}
1176#endif
1177
1178/* -- Memory references --------------------------------------------------- */
1179
1180static void asm_newref(ASMState *as, IRIns *ir)
1181{
1182 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
1183 IRRef args[3];
1184 if (ir->r == RID_SINK)
1185 return;
1186 args[0] = ASMREF_L; /* lua_State *L */
1187 args[1] = ir->op1; /* GCtab *t */
1188 args[2] = ASMREF_TMP1; /* cTValue *key */
1189 asm_setupresult(as, ir, ci); /* TValue * */
1190 asm_gencall(as, ci, args);
1191 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
1192}
1193
1194/* -- Calls --------------------------------------------------------------- */
1195
1196/* Collect arguments from CALL* and CARG instructions. */
1197static void asm_collectargs(ASMState *as, IRIns *ir,
1198 const CCallInfo *ci, IRRef *args)
1199{
1200 uint32_t n = CCI_XNARGS(ci);
1201 lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */
1202 if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
1203 while (n-- > 1) {
1204 ir = IR(ir->op1);
1205 lua_assert(ir->o == IR_CARG);
1206 args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
1207 }
1208 args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
1209 lua_assert(IR(ir->op1)->o != IR_CARG);
1210}
1211
1212/* Reconstruct CCallInfo flags for CALLX*. */
1213static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
1214{
1215 uint32_t nargs = 0;
1216 if (ir->op1 != REF_NIL) { /* Count number of arguments first. */
1217 IRIns *ira = IR(ir->op1);
1218 nargs++;
1219 while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
1220 }
1221#if LJ_HASFFI
1222 if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */
1223 CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
1224 CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
1225 nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
1226#if LJ_TARGET_X86
1227 nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
1228#endif
1229 }
1230#endif
1231 return (nargs | (ir->t.irt << CCI_OTSHIFT));
1232}
1233
1234static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
1235{
1236 const CCallInfo *ci = &lj_ir_callinfo[id];
1237 IRRef args[2];
1238 args[0] = ir->op1;
1239 args[1] = ir->op2;
1240 asm_setupresult(as, ir, ci);
1241 asm_gencall(as, ci, args);
1242}
1243
1244static void asm_call(ASMState *as, IRIns *ir)
1245{
1246 IRRef args[CCI_NARGS_MAX];
1247 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
1248 asm_collectargs(as, ir, ci, args);
1249 asm_setupresult(as, ir, ci);
1250 asm_gencall(as, ci, args);
1251}
1252
1253#if !LJ_SOFTFP
1254static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref);
1255
1256#if !LJ_TARGET_X86ORX64
1257static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
1258{
1259 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
1260 IRRef args[2];
1261 args[0] = lref;
1262 args[1] = rref;
1263 asm_setupresult(as, ir, ci);
1264 asm_gencall(as, ci, args);
1265}
1266#endif
1267
1268static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
1269{
1270 IRIns *irp = IR(ir->op1);
1271 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
1272 IRIns *irpp = IR(irp->op1);
1273 if (irpp == ir-2 && irpp->o == IR_FPMATH &&
1274 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
1275 asm_fppow(as, ir, irpp->op1, irp->op2);
1276 return 1;
1277 }
1278 }
1279 return 0;
1280}
1281#endif
1282
1068/* -- PHI and loop handling ----------------------------------------------- */ 1283/* -- PHI and loop handling ----------------------------------------------- */
1069 1284
1070/* Break a PHI cycle by renaming to a free register (evict if needed). */ 1285/* Break a PHI cycle by renaming to a free register (evict if needed). */
@@ -1336,6 +1551,124 @@ static void asm_loop(ASMState *as)
1336#error "Missing assembler for target CPU" 1551#error "Missing assembler for target CPU"
1337#endif 1552#endif
1338 1553
1554/* -- Instruction dispatch ------------------------------------------------ */
1555
1556/* Assemble a single instruction. */
1557static void asm_ir(ASMState *as, IRIns *ir)
1558{
1559 switch ((IROp)ir->o) {
1560 /* Miscellaneous ops. */
1561 case IR_LOOP: asm_loop(as); break;
1562 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
1563 case IR_USE:
1564 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1565 case IR_PHI: asm_phi(as, ir); break;
1566 case IR_HIOP: asm_hiop(as, ir); break;
1567 case IR_GCSTEP: asm_gcstep(as, ir); break;
1568
1569 /* Guarded assertions. */
1570 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1571 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1572 case IR_ABC:
1573 asm_comp(as, ir);
1574 break;
1575 case IR_EQ: case IR_NE:
1576 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
1577 as->curins--;
1578 asm_href(as, ir-1, (IROp)ir->o);
1579 } else {
1580 asm_equal(as, ir);
1581 }
1582 break;
1583
1584 case IR_RETF: asm_retf(as, ir); break;
1585
1586 /* Bit ops. */
1587 case IR_BNOT: asm_bnot(as, ir); break;
1588 case IR_BSWAP: asm_bswap(as, ir); break;
1589 case IR_BAND: asm_band(as, ir); break;
1590 case IR_BOR: asm_bor(as, ir); break;
1591 case IR_BXOR: asm_bxor(as, ir); break;
1592 case IR_BSHL: asm_bshl(as, ir); break;
1593 case IR_BSHR: asm_bshr(as, ir); break;
1594 case IR_BSAR: asm_bsar(as, ir); break;
1595 case IR_BROL: asm_brol(as, ir); break;
1596 case IR_BROR: asm_bror(as, ir); break;
1597
1598 /* Arithmetic ops. */
1599 case IR_ADD: asm_add(as, ir); break;
1600 case IR_SUB: asm_sub(as, ir); break;
1601 case IR_MUL: asm_mul(as, ir); break;
1602 case IR_DIV: asm_div(as, ir); break;
1603 case IR_MOD: asm_mod(as, ir); break;
1604 case IR_POW: asm_pow(as, ir); break;
1605 case IR_NEG: asm_neg(as, ir); break;
1606 case IR_ABS: asm_abs(as, ir); break;
1607 case IR_ATAN2: asm_atan2(as, ir); break;
1608 case IR_LDEXP: asm_ldexp(as, ir); break;
1609 case IR_MIN: asm_min(as, ir); break;
1610 case IR_MAX: asm_max(as, ir); break;
1611 case IR_FPMATH: asm_fpmath(as, ir); break;
1612
1613 /* Overflow-checking arithmetic ops. */
1614 case IR_ADDOV: asm_addov(as, ir); break;
1615 case IR_SUBOV: asm_subov(as, ir); break;
1616 case IR_MULOV: asm_mulov(as, ir); break;
1617
1618 /* Memory references. */
1619 case IR_AREF: asm_aref(as, ir); break;
1620 case IR_HREF: asm_href(as, ir, 0); break;
1621 case IR_HREFK: asm_hrefk(as, ir); break;
1622 case IR_NEWREF: asm_newref(as, ir); break;
1623 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
1624 case IR_FREF: asm_fref(as, ir); break;
1625 case IR_STRREF: asm_strref(as, ir); break;
1626
1627 /* Loads and stores. */
1628 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
1629 asm_ahuvload(as, ir);
1630 break;
1631 case IR_FLOAD: asm_fload(as, ir); break;
1632 case IR_XLOAD: asm_xload(as, ir); break;
1633 case IR_SLOAD: asm_sload(as, ir); break;
1634
1635 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
1636 case IR_FSTORE: asm_fstore(as, ir); break;
1637 case IR_XSTORE: asm_xstore(as, ir); break;
1638
1639 /* Allocations. */
1640 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
1641 case IR_TNEW: asm_tnew(as, ir); break;
1642 case IR_TDUP: asm_tdup(as, ir); break;
1643 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
1644
1645 /* Buffer operations. */
1646 case IR_BUFHDR: asm_bufhdr(as, ir); break;
1647 case IR_BUFPUT: asm_bufput(as, ir); break;
1648 case IR_BUFSTR: asm_bufstr(as, ir); break;
1649
1650 /* Write barriers. */
1651 case IR_TBAR: asm_tbar(as, ir); break;
1652 case IR_OBAR: asm_obar(as, ir); break;
1653
1654 /* Type conversions. */
1655 case IR_TOBIT: asm_tobit(as, ir); break;
1656 case IR_CONV: asm_conv(as, ir); break;
1657 case IR_TOSTR: asm_tostr(as, ir); break;
1658 case IR_STRTO: asm_strto(as, ir); break;
1659
1660 /* Calls. */
1661 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
1662 case IR_CALLXS: asm_callx(as, ir); break;
1663 case IR_CARG: break;
1664
1665 default:
1666 setintV(&as->J->errinfo, ir->o);
1667 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
1668 break;
1669 }
1670}
1671
1339/* -- Head of trace ------------------------------------------------------- */ 1672/* -- Head of trace ------------------------------------------------------- */
1340 1673
1341/* Head of a root trace. */ 1674/* Head of a root trace. */
@@ -1714,10 +2047,20 @@ static void asm_setup_regsp(ASMState *as)
1714 /* fallthrough */ 2047 /* fallthrough */
1715#endif 2048#endif
1716 /* C calls evict all scratch regs and return results in RID_RET. */ 2049 /* C calls evict all scratch regs and return results in RID_RET. */
1717 case IR_SNEW: case IR_XSNEW: case IR_NEWREF: 2050 case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT:
1718 if (REGARG_NUMGPR < 3 && as->evenspill < 3) 2051 if (REGARG_NUMGPR < 3 && as->evenspill < 3)
1719 as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ 2052 as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */
1720 case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR: 2053#if LJ_TARGET_X86 && LJ_HASFFI
2054 if (0) {
2055 case IR_CNEW:
2056 if (ir->op2 != REF_NIL && as->evenspill < 4)
2057 as->evenspill = 4; /* lj_cdata_newv needs 4 args. */
2058 }
2059#else
2060 case IR_CNEW:
2061#endif
2062 case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR:
2063 case IR_BUFSTR:
1721 ir->prev = REGSP_HINT(RID_RET); 2064 ir->prev = REGSP_HINT(RID_RET);
1722 if (inloop) 2065 if (inloop)
1723 as->modset = RSET_SCRATCH; 2066 as->modset = RSET_SCRATCH;
@@ -1753,7 +2096,7 @@ static void asm_setup_regsp(ASMState *as)
1753 break; 2096 break;
1754 case IR_FPMATH: 2097 case IR_FPMATH:
1755#if LJ_TARGET_X86ORX64 2098#if LJ_TARGET_X86ORX64
1756 if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */ 2099 if (ir->op2 == IRFPM_EXP2) { /* May be joined to pow. */
1757 ir->prev = REGSP_HINT(RID_XMM0); 2100 ir->prev = REGSP_HINT(RID_XMM0);
1758#if !LJ_64 2101#if !LJ_64
1759 if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */ 2102 if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h
index a66573c0..9b661eb7 100644
--- a/src/lj_asm_arm.h
+++ b/src/lj_asm_arm.h
@@ -338,7 +338,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air)
338/* Generate a call to a C function. */ 338/* Generate a call to a C function. */
339static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 339static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
340{ 340{
341 uint32_t n, nargs = CCI_NARGS(ci); 341 uint32_t n, nargs = CCI_XNARGS(ci);
342 int32_t ofs = 0; 342 int32_t ofs = 0;
343#if LJ_SOFTFP 343#if LJ_SOFTFP
344 Reg gpr = REGARG_FIRSTGPR; 344 Reg gpr = REGARG_FIRSTGPR;
@@ -453,15 +453,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
453 UNUSED(ci); 453 UNUSED(ci);
454} 454}
455 455
456static void asm_call(ASMState *as, IRIns *ir)
457{
458 IRRef args[CCI_NARGS_MAX];
459 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
460 asm_collectargs(as, ir, ci, args);
461 asm_setupresult(as, ir, ci);
462 asm_gencall(as, ci, args);
463}
464
465static void asm_callx(ASMState *as, IRIns *ir) 456static void asm_callx(ASMState *as, IRIns *ir)
466{ 457{
467 IRRef args[CCI_NARGS_MAX*2]; 458 IRRef args[CCI_NARGS_MAX*2];
@@ -528,6 +519,8 @@ static void asm_tobit(ASMState *as, IRIns *ir)
528 emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); 519 emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
529 emit_dnm(as, ARMI_VADD_D, (tmp & 15), (left & 15), (right & 15)); 520 emit_dnm(as, ARMI_VADD_D, (tmp & 15), (left & 15), (right & 15));
530} 521}
522#else
523#define asm_tobit(as, ir) lua_assert(0)
531#endif 524#endif
532 525
533static void asm_conv(ASMState *as, IRIns *ir) 526static void asm_conv(ASMState *as, IRIns *ir)
@@ -600,31 +593,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
600 } 593 }
601} 594}
602 595
603#if !LJ_SOFTFP && LJ_HASFFI
604static void asm_conv64(ASMState *as, IRIns *ir)
605{
606 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
607 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
608 IRCallID id;
609 CCallInfo ci;
610 IRRef args[2];
611 args[0] = (ir-1)->op1;
612 args[1] = ir->op1;
613 if (st == IRT_NUM || st == IRT_FLOAT) {
614 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
615 ir--;
616 } else {
617 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
618 }
619 ci = lj_ir_callinfo[id];
620#if !LJ_ABI_SOFTFP
621 ci.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */
622#endif
623 asm_setupresult(as, ir, &ci);
624 asm_gencall(as, &ci, args);
625}
626#endif
627
628static void asm_strto(ASMState *as, IRIns *ir) 596static void asm_strto(ASMState *as, IRIns *ir)
629{ 597{
630 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; 598 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
@@ -688,6 +656,8 @@ static void asm_strto(ASMState *as, IRIns *ir)
688 emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR); 656 emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR);
689} 657}
690 658
659/* -- Memory references --------------------------------------------------- */
660
691/* Get pointer to TValue. */ 661/* Get pointer to TValue. */
692static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) 662static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
693{ 663{
@@ -713,7 +683,7 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
713 Reg src = ra_alloc1(as, ref, allow); 683 Reg src = ra_alloc1(as, ref, allow);
714 emit_lso(as, ARMI_STR, src, RID_SP, 0); 684 emit_lso(as, ARMI_STR, src, RID_SP, 0);
715 } 685 }
716 if ((ir+1)->o == IR_HIOP) 686 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
717 type = ra_alloc1(as, ref+1, allow); 687 type = ra_alloc1(as, ref+1, allow);
718 else 688 else
719 type = ra_allock(as, irt_toitype(ir->t), allow); 689 type = ra_allock(as, irt_toitype(ir->t), allow);
@@ -721,27 +691,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
721 } 691 }
722} 692}
723 693
724static void asm_tostr(ASMState *as, IRIns *ir)
725{
726 IRRef args[2];
727 args[0] = ASMREF_L;
728 as->gcsteps++;
729 if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
730 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
731 args[1] = ASMREF_TMP1; /* const lua_Number * */
732 asm_setupresult(as, ir, ci); /* GCstr * */
733 asm_gencall(as, ci, args);
734 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
735 } else {
736 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
737 args[1] = ir->op1; /* int32_t k */
738 asm_setupresult(as, ir, ci); /* GCstr * */
739 asm_gencall(as, ci, args);
740 }
741}
742
743/* -- Memory references --------------------------------------------------- */
744
745static void asm_aref(ASMState *as, IRIns *ir) 694static void asm_aref(ASMState *as, IRIns *ir)
746{ 695{
747 Reg dest = ra_dest(as, ir, RSET_GPR); 696 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -959,20 +908,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
959 emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR); 908 emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR);
960} 909}
961 910
962static void asm_newref(ASMState *as, IRIns *ir)
963{
964 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
965 IRRef args[3];
966 if (ir->r == RID_SINK)
967 return;
968 args[0] = ASMREF_L; /* lua_State *L */
969 args[1] = ir->op1; /* GCtab *t */
970 args[2] = ASMREF_TMP1; /* cTValue *key */
971 asm_setupresult(as, ir, ci); /* TValue * */
972 asm_gencall(as, ci, args);
973 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
974}
975
976static void asm_uref(ASMState *as, IRIns *ir) 911static void asm_uref(ASMState *as, IRIns *ir)
977{ 912{
978 /* NYI: Check that UREFO is still open and not aliasing a slot. */ 913 /* NYI: Check that UREFO is still open and not aliasing a slot. */
@@ -1105,7 +1040,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
1105 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); 1040 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
1106} 1041}
1107 1042
1108static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) 1043static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
1109{ 1044{
1110 if (ir->r != RID_SINK) { 1045 if (ir->r != RID_SINK) {
1111 Reg src = ra_alloc1(as, ir->op2, 1046 Reg src = ra_alloc1(as, ir->op2,
@@ -1115,6 +1050,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
1115 } 1050 }
1116} 1051}
1117 1052
1053#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
1054
1118static void asm_ahuvload(ASMState *as, IRIns *ir) 1055static void asm_ahuvload(ASMState *as, IRIns *ir)
1119{ 1056{
1120 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); 1057 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
@@ -1272,19 +1209,16 @@ dotypecheck:
1272static void asm_cnew(ASMState *as, IRIns *ir) 1209static void asm_cnew(ASMState *as, IRIns *ir)
1273{ 1210{
1274 CTState *cts = ctype_ctsG(J2G(as->J)); 1211 CTState *cts = ctype_ctsG(J2G(as->J));
1275 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 1212 CTypeID id = (CTypeID)IR(ir->op1)->i;
1276 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 1213 CTSize sz;
1277 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 1214 CTInfo info = lj_ctype_info(cts, id, &sz);
1278 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1215 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1279 IRRef args[2]; 1216 IRRef args[4];
1280 RegSet allow = (RSET_GPR & ~RSET_SCRATCH); 1217 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1281 RegSet drop = RSET_SCRATCH; 1218 RegSet drop = RSET_SCRATCH;
1282 lua_assert(sz != CTSIZE_INVALID); 1219 lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
1283 1220
1284 args[0] = ASMREF_L; /* lua_State *L */
1285 args[1] = ASMREF_TMP1; /* MSize size */
1286 as->gcsteps++; 1221 as->gcsteps++;
1287
1288 if (ra_hasreg(ir->r)) 1222 if (ra_hasreg(ir->r))
1289 rset_clear(drop, ir->r); /* Dest reg handled below. */ 1223 rset_clear(drop, ir->r); /* Dest reg handled below. */
1290 ra_evictset(as, drop); 1224 ra_evictset(as, drop);
@@ -1306,16 +1240,28 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1306 if (ofs == sizeof(GCcdata)) break; 1240 if (ofs == sizeof(GCcdata)) break;
1307 ofs -= 4; ir--; 1241 ofs -= 4; ir--;
1308 } 1242 }
1243 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1244 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1245 args[0] = ASMREF_L; /* lua_State *L */
1246 args[1] = ir->op1; /* CTypeID id */
1247 args[2] = ir->op2; /* CTSize sz */
1248 args[3] = ASMREF_TMP1; /* CTSize align */
1249 asm_gencall(as, ci, args);
1250 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1251 return;
1309 } 1252 }
1253
1310 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ 1254 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1311 { 1255 {
1312 uint32_t k = emit_isk12(ARMI_MOV, ctypeid); 1256 uint32_t k = emit_isk12(ARMI_MOV, id);
1313 Reg r = k ? RID_R1 : ra_allock(as, ctypeid, allow); 1257 Reg r = k ? RID_R1 : ra_allock(as, id, allow);
1314 emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); 1258 emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
1315 emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); 1259 emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
1316 emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP); 1260 emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP);
1317 if (k) emit_d(as, ARMI_MOV^k, RID_R1); 1261 if (k) emit_d(as, ARMI_MOV^k, RID_R1);
1318 } 1262 }
1263 args[0] = ASMREF_L; /* lua_State *L */
1264 args[1] = ASMREF_TMP1; /* MSize size */
1319 asm_gencall(as, ci, args); 1265 asm_gencall(as, ci, args);
1320 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), 1266 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1321 ra_releasetmp(as, ASMREF_TMP1)); 1267 ra_releasetmp(as, ASMREF_TMP1));
@@ -1392,24 +1338,41 @@ static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai)
1392 emit_dm(as, ai, (dest & 15), (left & 15)); 1338 emit_dm(as, ai, (dest & 15), (left & 15));
1393} 1339}
1394 1340
1395static int asm_fpjoin_pow(ASMState *as, IRIns *ir) 1341static void asm_callround(ASMState *as, IRIns *ir, int id)
1396{ 1342{
1397 IRIns *irp = IR(ir->op1); 1343 /* The modified regs must match with the *.dasc implementation. */
1398 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { 1344 RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
1399 IRIns *irpp = IR(irp->op1); 1345 RID2RSET(RID_R3)|RID2RSET(RID_R12);
1400 if (irpp == ir-2 && irpp->o == IR_FPMATH && 1346 RegSet of;
1401 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { 1347 Reg dest, src;
1402 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; 1348 ra_evictset(as, drop);
1403 IRRef args[2]; 1349 dest = ra_dest(as, ir, RSET_FPR);
1404 args[0] = irpp->op1; 1350 emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
1405 args[1] = irp->op2; 1351 emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
1406 asm_setupresult(as, ir, ci); 1352 id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
1407 asm_gencall(as, ci, args); 1353 (void *)lj_vm_trunc_sf);
1408 return 1; 1354 /* Workaround to protect argument GPRs from being used for remat. */
1409 } 1355 of = as->freeset;
1410 } 1356 as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
1411 return 0; 1357 as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
1358 src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */
1359 as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
1360 emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
1361}
1362
1363static void asm_fpmath(ASMState *as, IRIns *ir)
1364{
1365 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
1366 return;
1367 if (ir->op2 <= IRFPM_TRUNC)
1368 asm_callround(as, ir, ir->op2);
1369 else if (ir->op2 == IRFPM_SQRT)
1370 asm_fpunary(as, ir, ARMI_VSQRT_D);
1371 else
1372 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1412} 1373}
1374#else
1375#define asm_fpmath(as, ir) lua_assert(0)
1413#endif 1376#endif
1414 1377
1415static int asm_swapops(ASMState *as, IRRef lref, IRRef rref) 1378static int asm_swapops(ASMState *as, IRRef lref, IRRef rref)
@@ -1459,32 +1422,6 @@ static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai)
1459 asm_intop(as, ir, ai); 1422 asm_intop(as, ir, ai);
1460} 1423}
1461 1424
1462static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
1463{
1464 if (as->flagmcp == as->mcp) { /* Try to drop cmp r, #0. */
1465 uint32_t cc = (as->mcp[1] >> 28);
1466 as->flagmcp = NULL;
1467 if (cc <= CC_NE) {
1468 as->mcp++;
1469 ai |= ARMI_S;
1470 } else if (cc == CC_GE) {
1471 *++as->mcp ^= ((CC_GE^CC_PL) << 28);
1472 ai |= ARMI_S;
1473 } else if (cc == CC_LT) {
1474 *++as->mcp ^= ((CC_LT^CC_MI) << 28);
1475 ai |= ARMI_S;
1476 } /* else: other conds don't work with bit ops. */
1477 }
1478 if (ir->op2 == 0) {
1479 Reg dest = ra_dest(as, ir, RSET_GPR);
1480 uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
1481 emit_d(as, ai^m, dest);
1482 } else {
1483 /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
1484 asm_intop(as, ir, ai);
1485 }
1486}
1487
1488static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai) 1425static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai)
1489{ 1426{
1490 Reg dest = ra_dest(as, ir, RSET_GPR); 1427 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1550,6 +1487,26 @@ static void asm_mul(ASMState *as, IRIns *ir)
1550 asm_intmul(as, ir); 1487 asm_intmul(as, ir);
1551} 1488}
1552 1489
1490#define asm_addov(as, ir) asm_add(as, ir)
1491#define asm_subov(as, ir) asm_sub(as, ir)
1492#define asm_mulov(as, ir) asm_mul(as, ir)
1493
1494#if LJ_SOFTFP
1495#define asm_div(as, ir) lua_assert(0)
1496#define asm_pow(as, ir) lua_assert(0)
1497#define asm_abs(as, ir) lua_assert(0)
1498#define asm_atan2(as, ir) lua_assert(0)
1499#define asm_ldexp(as, ir) lua_assert(0)
1500#else
1501#define asm_div(as, ir) asm_fparith(as, ir, ARMI_VDIV_D)
1502#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
1503#define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D)
1504#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
1505#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
1506#endif
1507
1508#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi)
1509
1553static void asm_neg(ASMState *as, IRIns *ir) 1510static void asm_neg(ASMState *as, IRIns *ir)
1554{ 1511{
1555#if !LJ_SOFTFP 1512#if !LJ_SOFTFP
@@ -1561,41 +1518,35 @@ static void asm_neg(ASMState *as, IRIns *ir)
1561 asm_intneg(as, ir, ARMI_RSB); 1518 asm_intneg(as, ir, ARMI_RSB);
1562} 1519}
1563 1520
1564static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) 1521static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
1565{ 1522{
1566 const CCallInfo *ci = &lj_ir_callinfo[id]; 1523 if (as->flagmcp == as->mcp) { /* Try to drop cmp r, #0. */
1567 IRRef args[2]; 1524 uint32_t cc = (as->mcp[1] >> 28);
1568 args[0] = ir->op1; 1525 as->flagmcp = NULL;
1569 args[1] = ir->op2; 1526 if (cc <= CC_NE) {
1570 asm_setupresult(as, ir, ci); 1527 as->mcp++;
1571 asm_gencall(as, ci, args); 1528 ai |= ARMI_S;
1529 } else if (cc == CC_GE) {
1530 *++as->mcp ^= ((CC_GE^CC_PL) << 28);
1531 ai |= ARMI_S;
1532 } else if (cc == CC_LT) {
1533 *++as->mcp ^= ((CC_LT^CC_MI) << 28);
1534 ai |= ARMI_S;
1535 } /* else: other conds don't work with bit ops. */
1536 }
1537 if (ir->op2 == 0) {
1538 Reg dest = ra_dest(as, ir, RSET_GPR);
1539 uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
1540 emit_d(as, ai^m, dest);
1541 } else {
1542 /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
1543 asm_intop(as, ir, ai);
1544 }
1572} 1545}
1573 1546
1574#if !LJ_SOFTFP 1547#define asm_bnot(as, ir) asm_bitop(as, ir, ARMI_MVN)
1575static void asm_callround(ASMState *as, IRIns *ir, int id)
1576{
1577 /* The modified regs must match with the *.dasc implementation. */
1578 RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
1579 RID2RSET(RID_R3)|RID2RSET(RID_R12);
1580 RegSet of;
1581 Reg dest, src;
1582 ra_evictset(as, drop);
1583 dest = ra_dest(as, ir, RSET_FPR);
1584 emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
1585 emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
1586 id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
1587 (void *)lj_vm_trunc_sf);
1588 /* Workaround to protect argument GPRs from being used for remat. */
1589 of = as->freeset;
1590 as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
1591 as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
1592 src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */
1593 as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
1594 emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
1595}
1596#endif
1597 1548
1598static void asm_bitswap(ASMState *as, IRIns *ir) 1549static void asm_bswap(ASMState *as, IRIns *ir)
1599{ 1550{
1600 Reg dest = ra_dest(as, ir, RSET_GPR); 1551 Reg dest = ra_dest(as, ir, RSET_GPR);
1601 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 1552 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
@@ -1612,6 +1563,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1612 } 1563 }
1613} 1564}
1614 1565
1566#define asm_band(as, ir) asm_bitop(as, ir, ARMI_AND)
1567#define asm_bor(as, ir) asm_bitop(as, ir, ARMI_ORR)
1568#define asm_bxor(as, ir) asm_bitop(as, ir, ARMI_EOR)
1569
1615static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) 1570static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
1616{ 1571{
1617 if (irref_isk(ir->op2)) { /* Constant shifts. */ 1572 if (irref_isk(ir->op2)) { /* Constant shifts. */
@@ -1629,6 +1584,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
1629 } 1584 }
1630} 1585}
1631 1586
1587#define asm_bshl(as, ir) asm_bitshift(as, ir, ARMSH_LSL)
1588#define asm_bshr(as, ir) asm_bitshift(as, ir, ARMSH_LSR)
1589#define asm_bsar(as, ir) asm_bitshift(as, ir, ARMSH_ASR)
1590#define asm_bror(as, ir) asm_bitshift(as, ir, ARMSH_ROR)
1591#define asm_brol(as, ir) lua_assert(0)
1592
1632static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) 1593static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
1633{ 1594{
1634 uint32_t kcmp = 0, kmov = 0; 1595 uint32_t kcmp = 0, kmov = 0;
@@ -1702,6 +1663,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc)
1702 asm_intmin_max(as, ir, cc); 1663 asm_intmin_max(as, ir, cc);
1703} 1664}
1704 1665
1666#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_HI)
1667#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LO)
1668
1705/* -- Comparisons --------------------------------------------------------- */ 1669/* -- Comparisons --------------------------------------------------------- */
1706 1670
1707/* Map of comparisons to flags. ORDER IR. */ 1671/* Map of comparisons to flags. ORDER IR. */
@@ -1817,6 +1781,18 @@ notst:
1817 as->flagmcp = as->mcp; /* Allow elimination of the compare. */ 1781 as->flagmcp = as->mcp; /* Allow elimination of the compare. */
1818} 1782}
1819 1783
1784static void asm_comp(ASMState *as, IRIns *ir)
1785{
1786#if !LJ_SOFTFP
1787 if (irt_isnum(ir->t))
1788 asm_fpcomp(as, ir);
1789 else
1790#endif
1791 asm_intcomp(as, ir);
1792}
1793
1794#define asm_equal(as, ir) asm_comp(as, ir)
1795
1820#if LJ_HASFFI 1796#if LJ_HASFFI
1821/* 64 bit integer comparisons. */ 1797/* 64 bit integer comparisons. */
1822static void asm_int64comp(ASMState *as, IRIns *ir) 1798static void asm_int64comp(ASMState *as, IRIns *ir)
@@ -1891,7 +1867,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1891#endif 1867#endif
1892 } else if ((ir-1)->o == IR_XSTORE) { 1868 } else if ((ir-1)->o == IR_XSTORE) {
1893 if ((ir-1)->r != RID_SINK) 1869 if ((ir-1)->r != RID_SINK)
1894 asm_xstore(as, ir, 4); 1870 asm_xstore_(as, ir, 4);
1895 return; 1871 return;
1896 } 1872 }
1897 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ 1873 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
@@ -2160,143 +2136,13 @@ static void asm_tail_prep(ASMState *as)
2160 *p = 0; /* Prevent load/store merging. */ 2136 *p = 0; /* Prevent load/store merging. */
2161} 2137}
2162 2138
2163/* -- Instruction dispatch ------------------------------------------------ */
2164
2165/* Assemble a single instruction. */
2166static void asm_ir(ASMState *as, IRIns *ir)
2167{
2168 switch ((IROp)ir->o) {
2169 /* Miscellaneous ops. */
2170 case IR_LOOP: asm_loop(as); break;
2171 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
2172 case IR_USE:
2173 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
2174 case IR_PHI: asm_phi(as, ir); break;
2175 case IR_HIOP: asm_hiop(as, ir); break;
2176 case IR_GCSTEP: asm_gcstep(as, ir); break;
2177
2178 /* Guarded assertions. */
2179 case IR_EQ: case IR_NE:
2180 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
2181 as->curins--;
2182 asm_href(as, ir-1, (IROp)ir->o);
2183 break;
2184 }
2185 /* fallthrough */
2186 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
2187 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
2188 case IR_ABC:
2189#if !LJ_SOFTFP
2190 if (irt_isnum(ir->t)) { asm_fpcomp(as, ir); break; }
2191#endif
2192 asm_intcomp(as, ir);
2193 break;
2194
2195 case IR_RETF: asm_retf(as, ir); break;
2196
2197 /* Bit ops. */
2198 case IR_BNOT: asm_bitop(as, ir, ARMI_MVN); break;
2199 case IR_BSWAP: asm_bitswap(as, ir); break;
2200
2201 case IR_BAND: asm_bitop(as, ir, ARMI_AND); break;
2202 case IR_BOR: asm_bitop(as, ir, ARMI_ORR); break;
2203 case IR_BXOR: asm_bitop(as, ir, ARMI_EOR); break;
2204
2205 case IR_BSHL: asm_bitshift(as, ir, ARMSH_LSL); break;
2206 case IR_BSHR: asm_bitshift(as, ir, ARMSH_LSR); break;
2207 case IR_BSAR: asm_bitshift(as, ir, ARMSH_ASR); break;
2208 case IR_BROR: asm_bitshift(as, ir, ARMSH_ROR); break;
2209 case IR_BROL: lua_assert(0); break;
2210
2211 /* Arithmetic ops. */
2212 case IR_ADD: case IR_ADDOV: asm_add(as, ir); break;
2213 case IR_SUB: case IR_SUBOV: asm_sub(as, ir); break;
2214 case IR_MUL: case IR_MULOV: asm_mul(as, ir); break;
2215 case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
2216 case IR_NEG: asm_neg(as, ir); break;
2217
2218#if LJ_SOFTFP
2219 case IR_DIV: case IR_POW: case IR_ABS:
2220 case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
2221 lua_assert(0); /* Unused for LJ_SOFTFP. */
2222 break;
2223#else
2224 case IR_DIV: asm_fparith(as, ir, ARMI_VDIV_D); break;
2225 case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
2226 case IR_ABS: asm_fpunary(as, ir, ARMI_VABS_D); break;
2227 case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
2228 case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
2229 case IR_FPMATH:
2230 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
2231 break;
2232 if (ir->op2 <= IRFPM_TRUNC)
2233 asm_callround(as, ir, ir->op2);
2234 else if (ir->op2 == IRFPM_SQRT)
2235 asm_fpunary(as, ir, ARMI_VSQRT_D);
2236 else
2237 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
2238 break;
2239 case IR_TOBIT: asm_tobit(as, ir); break;
2240#endif
2241
2242 case IR_MIN: asm_min_max(as, ir, CC_GT, CC_HI); break;
2243 case IR_MAX: asm_min_max(as, ir, CC_LT, CC_LO); break;
2244
2245 /* Memory references. */
2246 case IR_AREF: asm_aref(as, ir); break;
2247 case IR_HREF: asm_href(as, ir, 0); break;
2248 case IR_HREFK: asm_hrefk(as, ir); break;
2249 case IR_NEWREF: asm_newref(as, ir); break;
2250 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
2251 case IR_FREF: asm_fref(as, ir); break;
2252 case IR_STRREF: asm_strref(as, ir); break;
2253
2254 /* Loads and stores. */
2255 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2256 asm_ahuvload(as, ir);
2257 break;
2258 case IR_FLOAD: asm_fload(as, ir); break;
2259 case IR_XLOAD: asm_xload(as, ir); break;
2260 case IR_SLOAD: asm_sload(as, ir); break;
2261
2262 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
2263 case IR_FSTORE: asm_fstore(as, ir); break;
2264 case IR_XSTORE: asm_xstore(as, ir, 0); break;
2265
2266 /* Allocations. */
2267 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
2268 case IR_TNEW: asm_tnew(as, ir); break;
2269 case IR_TDUP: asm_tdup(as, ir); break;
2270 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2271
2272 /* Write barriers. */
2273 case IR_TBAR: asm_tbar(as, ir); break;
2274 case IR_OBAR: asm_obar(as, ir); break;
2275
2276 /* Type conversions. */
2277 case IR_CONV: asm_conv(as, ir); break;
2278 case IR_TOSTR: asm_tostr(as, ir); break;
2279 case IR_STRTO: asm_strto(as, ir); break;
2280
2281 /* Calls. */
2282 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
2283 case IR_CALLXS: asm_callx(as, ir); break;
2284 case IR_CARG: break;
2285
2286 default:
2287 setintV(&as->J->errinfo, ir->o);
2288 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
2289 break;
2290 }
2291}
2292
2293/* -- Trace setup --------------------------------------------------------- */ 2139/* -- Trace setup --------------------------------------------------------- */
2294 2140
2295/* Ensure there are enough stack slots for call arguments. */ 2141/* Ensure there are enough stack slots for call arguments. */
2296static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) 2142static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
2297{ 2143{
2298 IRRef args[CCI_NARGS_MAX*2]; 2144 IRRef args[CCI_NARGS_MAX*2];
2299 uint32_t i, nargs = (int)CCI_NARGS(ci); 2145 uint32_t i, nargs = CCI_XNARGS(ci);
2300 int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0; 2146 int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0;
2301 asm_collectargs(as, ir, ci, args); 2147 asm_collectargs(as, ir, ci, args);
2302 for (i = 0; i < nargs; i++) { 2148 for (i = 0; i < nargs; i++) {
diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
index 9fe7c9c3..3d061eb4 100644
--- a/src/lj_asm_mips.h
+++ b/src/lj_asm_mips.h
@@ -226,7 +226,7 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref,
226/* Generate a call to a C function. */ 226/* Generate a call to a C function. */
227static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 227static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
228{ 228{
229 uint32_t n, nargs = CCI_NARGS(ci); 229 uint32_t n, nargs = CCI_XNARGS(ci);
230 int32_t ofs = 16; 230 int32_t ofs = 16;
231 Reg gpr, fpr = REGARG_FIRSTFPR; 231 Reg gpr, fpr = REGARG_FIRSTFPR;
232 if ((void *)ci->func) 232 if ((void *)ci->func)
@@ -326,15 +326,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
326 } 326 }
327} 327}
328 328
329static void asm_call(ASMState *as, IRIns *ir)
330{
331 IRRef args[CCI_NARGS_MAX];
332 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
333 asm_collectargs(as, ir, ci, args);
334 asm_setupresult(as, ir, ci);
335 asm_gencall(as, ci, args);
336}
337
338static void asm_callx(ASMState *as, IRIns *ir) 329static void asm_callx(ASMState *as, IRIns *ir)
339{ 330{
340 IRRef args[CCI_NARGS_MAX*2]; 331 IRRef args[CCI_NARGS_MAX*2];
@@ -362,16 +353,6 @@ static void asm_callx(ASMState *as, IRIns *ir)
362 asm_gencall(as, &ci, args); 353 asm_gencall(as, &ci, args);
363} 354}
364 355
365static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
366{
367 const CCallInfo *ci = &lj_ir_callinfo[id];
368 IRRef args[2];
369 args[0] = ir->op1;
370 args[1] = ir->op2;
371 asm_setupresult(as, ir, ci);
372 asm_gencall(as, ci, args);
373}
374
375static void asm_callround(ASMState *as, IRIns *ir, IRCallID id) 356static void asm_callround(ASMState *as, IRIns *ir, IRCallID id)
376{ 357{
377 /* The modified regs must match with the *.dasc implementation. */ 358 /* The modified regs must match with the *.dasc implementation. */
@@ -519,28 +500,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
519 } 500 }
520} 501}
521 502
522#if LJ_HASFFI
523static void asm_conv64(ASMState *as, IRIns *ir)
524{
525 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
526 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
527 IRCallID id;
528 const CCallInfo *ci;
529 IRRef args[2];
530 args[LJ_BE?0:1] = ir->op1;
531 args[LJ_BE?1:0] = (ir-1)->op1;
532 if (st == IRT_NUM || st == IRT_FLOAT) {
533 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
534 ir--;
535 } else {
536 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
537 }
538 ci = &lj_ir_callinfo[id];
539 asm_setupresult(as, ir, ci);
540 asm_gencall(as, ci, args);
541}
542#endif
543
544static void asm_strto(ASMState *as, IRIns *ir) 503static void asm_strto(ASMState *as, IRIns *ir)
545{ 504{
546 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; 505 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
@@ -557,6 +516,8 @@ static void asm_strto(ASMState *as, IRIns *ir)
557 RID_SP, sps_scale(ir->s)); 516 RID_SP, sps_scale(ir->s));
558} 517}
559 518
519/* -- Memory references --------------------------------------------------- */
520
560/* Get pointer to TValue. */ 521/* Get pointer to TValue. */
561static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) 522static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
562{ 523{
@@ -580,27 +541,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
580 } 541 }
581} 542}
582 543
583static void asm_tostr(ASMState *as, IRIns *ir)
584{
585 IRRef args[2];
586 args[0] = ASMREF_L;
587 as->gcsteps++;
588 if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
589 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
590 args[1] = ASMREF_TMP1; /* const lua_Number * */
591 asm_setupresult(as, ir, ci); /* GCstr * */
592 asm_gencall(as, ci, args);
593 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
594 } else {
595 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
596 args[1] = ir->op1; /* int32_t k */
597 asm_setupresult(as, ir, ci); /* GCstr * */
598 asm_gencall(as, ci, args);
599 }
600}
601
602/* -- Memory references --------------------------------------------------- */
603
604static void asm_aref(ASMState *as, IRIns *ir) 544static void asm_aref(ASMState *as, IRIns *ir)
605{ 545{
606 Reg dest = ra_dest(as, ir, RSET_GPR); 546 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -630,7 +570,7 @@ static void asm_aref(ASMState *as, IRIns *ir)
630** } while ((n = nextnode(n))); 570** } while ((n = nextnode(n)));
631** return niltv(L); 571** return niltv(L);
632*/ 572*/
633static void asm_href(ASMState *as, IRIns *ir) 573static void asm_href(ASMState *as, IRIns *ir, IROp merge)
634{ 574{
635 RegSet allow = RSET_GPR; 575 RegSet allow = RSET_GPR;
636 int destused = ra_used(ir); 576 int destused = ra_used(ir);
@@ -656,37 +596,42 @@ static void asm_href(ASMState *as, IRIns *ir)
656 tmp2 = ra_scratch(as, allow); 596 tmp2 = ra_scratch(as, allow);
657 rset_clear(allow, tmp2); 597 rset_clear(allow, tmp2);
658 598
659 /* Key not found in chain: load niltv. */ 599 /* Key not found in chain: jump to exit (if merged) or load niltv. */
660 l_end = emit_label(as); 600 l_end = emit_label(as);
661 if (destused) 601 as->invmcp = NULL;
602 if (merge == IR_NE)
603 asm_guard(as, MIPSI_B, RID_ZERO, RID_ZERO);
604 else if (destused)
662 emit_loada(as, dest, niltvg(J2G(as->J))); 605 emit_loada(as, dest, niltvg(J2G(as->J)));
663 else
664 *--as->mcp = MIPSI_NOP;
665 /* Follow hash chain until the end. */ 606 /* Follow hash chain until the end. */
666 emit_move(as, dest, tmp1); 607 emit_move(as, dest, tmp2);
667 l_loop = --as->mcp; 608 l_loop = --as->mcp;
668 emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, next)); 609 emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, next));
669 l_next = emit_label(as); 610 l_next = emit_label(as);
670 611
671 /* Type and value comparison. */ 612 /* Type and value comparison. */
613 if (merge == IR_EQ) { /* Must match asm_guard(). */
614 emit_ti(as, MIPSI_LI, RID_TMP, as->snapno);
615 l_end = asm_exitstub_addr(as);
616 }
672 if (irt_isnum(kt)) { 617 if (irt_isnum(kt)) {
673 emit_branch(as, MIPSI_BC1T, 0, 0, l_end); 618 emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
674 emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key); 619 emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key);
675 emit_tg(as, MIPSI_MFC1, tmp1, key+1); 620 *--as->mcp = MIPSI_NOP; /* Avoid NaN comparison overhead. */
676 emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next); 621 emit_branch(as, MIPSI_BEQ, tmp2, RID_ZERO, l_next);
677 emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM); 622 emit_tsi(as, MIPSI_SLTIU, tmp2, tmp2, (int32_t)LJ_TISNUM);
678 emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n)); 623 emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n));
679 } else { 624 } else {
680 if (irt_ispri(kt)) { 625 if (irt_ispri(kt)) {
681 emit_branch(as, MIPSI_BEQ, tmp1, type, l_end); 626 emit_branch(as, MIPSI_BEQ, tmp2, type, l_end);
682 } else { 627 } else {
683 emit_branch(as, MIPSI_BEQ, tmp2, key, l_end); 628 emit_branch(as, MIPSI_BEQ, tmp1, key, l_end);
684 emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); 629 emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.gcr));
685 emit_branch(as, MIPSI_BNE, tmp1, type, l_next); 630 emit_branch(as, MIPSI_BNE, tmp2, type, l_next);
686 } 631 }
687 } 632 }
688 emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.it)); 633 emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, key.it));
689 *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu); 634 *l_loop = MIPSI_BNE | MIPSF_S(tmp2) | ((as->mcp-l_loop-1) & 0xffffu);
690 635
691 /* Load main position relative to tab->node into dest. */ 636 /* Load main position relative to tab->node into dest. */
692 khash = irref_isk(refkey) ? ir_khash(irkey) : 1; 637 khash = irref_isk(refkey) ? ir_khash(irkey) : 1;
@@ -776,20 +721,6 @@ nolo:
776 emit_tsi(as, MIPSI_ADDU, dest, node, ra_allock(as, ofs, allow)); 721 emit_tsi(as, MIPSI_ADDU, dest, node, ra_allock(as, ofs, allow));
777} 722}
778 723
779static void asm_newref(ASMState *as, IRIns *ir)
780{
781 if (ir->r != RID_SINK) {
782 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
783 IRRef args[3];
784 args[0] = ASMREF_L; /* lua_State *L */
785 args[1] = ir->op1; /* GCtab *t */
786 args[2] = ASMREF_TMP1; /* cTValue *key */
787 asm_setupresult(as, ir, ci); /* TValue * */
788 asm_gencall(as, ci, args);
789 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
790 }
791}
792
793static void asm_uref(ASMState *as, IRIns *ir) 724static void asm_uref(ASMState *as, IRIns *ir)
794{ 725{
795 /* NYI: Check that UREFO is still open and not aliasing a slot. */ 726 /* NYI: Check that UREFO is still open and not aliasing a slot. */
@@ -918,7 +849,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
918 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); 849 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
919} 850}
920 851
921static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) 852static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
922{ 853{
923 if (ir->r != RID_SINK) { 854 if (ir->r != RID_SINK) {
924 Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); 855 Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
@@ -927,6 +858,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
927 } 858 }
928} 859}
929 860
861#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
862
930static void asm_ahuvload(ASMState *as, IRIns *ir) 863static void asm_ahuvload(ASMState *as, IRIns *ir)
931{ 864{
932 IRType1 t = ir->t; 865 IRType1 t = ir->t;
@@ -1002,7 +935,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
1002 if (irt_isint(t)) { 935 if (irt_isint(t)) {
1003 Reg tmp = ra_scratch(as, RSET_FPR); 936 Reg tmp = ra_scratch(as, RSET_FPR);
1004 emit_tg(as, MIPSI_MFC1, dest, tmp); 937 emit_tg(as, MIPSI_MFC1, dest, tmp);
1005 emit_fg(as, MIPSI_CVT_W_D, tmp, tmp); 938 emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp);
1006 dest = tmp; 939 dest = tmp;
1007 t.irt = IRT_NUM; /* Check for original type. */ 940 t.irt = IRT_NUM; /* Check for original type. */
1008 } else { 941 } else {
@@ -1042,19 +975,15 @@ dotypecheck:
1042static void asm_cnew(ASMState *as, IRIns *ir) 975static void asm_cnew(ASMState *as, IRIns *ir)
1043{ 976{
1044 CTState *cts = ctype_ctsG(J2G(as->J)); 977 CTState *cts = ctype_ctsG(J2G(as->J));
1045 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 978 CTypeID id = (CTypeID)IR(ir->op1)->i;
1046 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 979 CTSize sz;
1047 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 980 CTInfo info = lj_ctype_info(cts, id, &sz);
1048 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 981 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1049 IRRef args[2]; 982 IRRef args[4];
1050 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1051 RegSet drop = RSET_SCRATCH; 983 RegSet drop = RSET_SCRATCH;
1052 lua_assert(sz != CTSIZE_INVALID); 984 lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
1053 985
1054 args[0] = ASMREF_L; /* lua_State *L */
1055 args[1] = ASMREF_TMP1; /* MSize size */
1056 as->gcsteps++; 986 as->gcsteps++;
1057
1058 if (ra_hasreg(ir->r)) 987 if (ra_hasreg(ir->r))
1059 rset_clear(drop, ir->r); /* Dest reg handled below. */ 988 rset_clear(drop, ir->r); /* Dest reg handled below. */
1060 ra_evictset(as, drop); 989 ra_evictset(as, drop);
@@ -1063,6 +992,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1063 992
1064 /* Initialize immutable cdata object. */ 993 /* Initialize immutable cdata object. */
1065 if (ir->o == IR_CNEWI) { 994 if (ir->o == IR_CNEWI) {
995 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1066 int32_t ofs = sizeof(GCcdata); 996 int32_t ofs = sizeof(GCcdata);
1067 lua_assert(sz == 4 || sz == 8); 997 lua_assert(sz == 4 || sz == 8);
1068 if (sz == 8) { 998 if (sz == 8) {
@@ -1077,12 +1007,24 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1077 if (ofs == sizeof(GCcdata)) break; 1007 if (ofs == sizeof(GCcdata)) break;
1078 ofs -= 4; if (LJ_BE) ir++; else ir--; 1008 ofs -= 4; if (LJ_BE) ir++; else ir--;
1079 } 1009 }
1010 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1011 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1012 args[0] = ASMREF_L; /* lua_State *L */
1013 args[1] = ir->op1; /* CTypeID id */
1014 args[2] = ir->op2; /* CTSize sz */
1015 args[3] = ASMREF_TMP1; /* CTSize align */
1016 asm_gencall(as, ci, args);
1017 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1018 return;
1080 } 1019 }
1020
1081 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ 1021 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1082 emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); 1022 emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
1083 emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); 1023 emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
1084 emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA); 1024 emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA);
1085 emit_ti(as, MIPSI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */ 1025 emit_ti(as, MIPSI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */
1026 args[0] = ASMREF_L; /* lua_State *L */
1027 args[1] = ASMREF_TMP1; /* MSize size */
1086 asm_gencall(as, ci, args); 1028 asm_gencall(as, ci, args);
1087 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), 1029 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1088 ra_releasetmp(as, ASMREF_TMP1)); 1030 ra_releasetmp(as, ASMREF_TMP1));
@@ -1152,23 +1094,16 @@ static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi)
1152 emit_fg(as, mi, dest, left); 1094 emit_fg(as, mi, dest, left);
1153} 1095}
1154 1096
1155static int asm_fpjoin_pow(ASMState *as, IRIns *ir) 1097static void asm_fpmath(ASMState *as, IRIns *ir)
1156{ 1098{
1157 IRIns *irp = IR(ir->op1); 1099 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
1158 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { 1100 return;
1159 IRIns *irpp = IR(irp->op1); 1101 if (ir->op2 <= IRFPM_TRUNC)
1160 if (irpp == ir-2 && irpp->o == IR_FPMATH && 1102 asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
1161 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { 1103 else if (ir->op2 == IRFPM_SQRT)
1162 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; 1104 asm_fpunary(as, ir, MIPSI_SQRT_D);
1163 IRRef args[2]; 1105 else
1164 args[0] = irpp->op1; 1106 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1165 args[1] = irp->op2;
1166 asm_setupresult(as, ir, ci);
1167 asm_gencall(as, ci, args);
1168 return 1;
1169 }
1170 }
1171 return 0;
1172} 1107}
1173 1108
1174static void asm_add(ASMState *as, IRIns *ir) 1109static void asm_add(ASMState *as, IRIns *ir)
@@ -1214,6 +1149,10 @@ static void asm_mul(ASMState *as, IRIns *ir)
1214 } 1149 }
1215} 1150}
1216 1151
1152#define asm_div(as, ir) asm_fparith(as, ir, MIPSI_DIV_D)
1153#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi)
1154#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
1155
1217static void asm_neg(ASMState *as, IRIns *ir) 1156static void asm_neg(ASMState *as, IRIns *ir)
1218{ 1157{
1219 if (irt_isnum(ir->t)) { 1158 if (irt_isnum(ir->t)) {
@@ -1225,6 +1164,10 @@ static void asm_neg(ASMState *as, IRIns *ir)
1225 } 1164 }
1226} 1165}
1227 1166
1167#define asm_abs(as, ir) asm_fpunary(as, ir, MIPSI_ABS_D)
1168#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
1169#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
1170
1228static void asm_arithov(ASMState *as, IRIns *ir) 1171static void asm_arithov(ASMState *as, IRIns *ir)
1229{ 1172{
1230 Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR); 1173 Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR);
@@ -1258,13 +1201,21 @@ static void asm_arithov(ASMState *as, IRIns *ir)
1258 emit_move(as, RID_TMP, dest == left ? left : right); 1201 emit_move(as, RID_TMP, dest == left ? left : right);
1259} 1202}
1260 1203
1204#define asm_addov(as, ir) asm_arithov(as, ir)
1205#define asm_subov(as, ir) asm_arithov(as, ir)
1206
1261static void asm_mulov(ASMState *as, IRIns *ir) 1207static void asm_mulov(ASMState *as, IRIns *ir)
1262{ 1208{
1263#if LJ_DUALNUM 1209 Reg dest = ra_dest(as, ir, RSET_GPR);
1264#error "NYI: MULOV" 1210 Reg tmp, right, left = ra_alloc2(as, ir, RSET_GPR);
1265#else 1211 right = (left >> 8); left &= 255;
1266 UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused in single-number mode. */ 1212 tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left),
1267#endif 1213 right), dest));
1214 asm_guard(as, MIPSI_BNE, RID_TMP, tmp);
1215 emit_dta(as, MIPSI_SRA, RID_TMP, dest, 31);
1216 emit_dst(as, MIPSI_MFHI, tmp, 0, 0);
1217 emit_dst(as, MIPSI_MFLO, dest, 0, 0);
1218 emit_dst(as, MIPSI_MULT, 0, left, right);
1268} 1219}
1269 1220
1270#if LJ_HASFFI 1221#if LJ_HASFFI
@@ -1351,7 +1302,7 @@ static void asm_neg64(ASMState *as, IRIns *ir)
1351} 1302}
1352#endif 1303#endif
1353 1304
1354static void asm_bitnot(ASMState *as, IRIns *ir) 1305static void asm_bnot(ASMState *as, IRIns *ir)
1355{ 1306{
1356 Reg left, right, dest = ra_dest(as, ir, RSET_GPR); 1307 Reg left, right, dest = ra_dest(as, ir, RSET_GPR);
1357 IRIns *irl = IR(ir->op1); 1308 IRIns *irl = IR(ir->op1);
@@ -1365,7 +1316,7 @@ static void asm_bitnot(ASMState *as, IRIns *ir)
1365 emit_dst(as, MIPSI_NOR, dest, left, right); 1316 emit_dst(as, MIPSI_NOR, dest, left, right);
1366} 1317}
1367 1318
1368static void asm_bitswap(ASMState *as, IRIns *ir) 1319static void asm_bswap(ASMState *as, IRIns *ir)
1369{ 1320{
1370 Reg dest = ra_dest(as, ir, RSET_GPR); 1321 Reg dest = ra_dest(as, ir, RSET_GPR);
1371 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 1322 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
@@ -1401,6 +1352,10 @@ static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
1401 emit_dst(as, mi, dest, left, right); 1352 emit_dst(as, mi, dest, left, right);
1402} 1353}
1403 1354
1355#define asm_band(as, ir) asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI)
1356#define asm_bor(as, ir) asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI)
1357#define asm_bxor(as, ir) asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI)
1358
1404static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) 1359static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
1405{ 1360{
1406 Reg dest = ra_dest(as, ir, RSET_GPR); 1361 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1414,7 +1369,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
1414 } 1369 }
1415} 1370}
1416 1371
1417static void asm_bitror(ASMState *as, IRIns *ir) 1372#define asm_bshl(as, ir) asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL)
1373#define asm_bshr(as, ir) asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL)
1374#define asm_bsar(as, ir) asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA)
1375#define asm_brol(as, ir) lua_assert(0)
1376
1377static void asm_bror(ASMState *as, IRIns *ir)
1418{ 1378{
1419 if ((as->flags & JIT_F_MIPS32R2)) { 1379 if ((as->flags & JIT_F_MIPS32R2)) {
1420 asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR); 1380 asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR);
@@ -1463,6 +1423,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1463 } 1423 }
1464} 1424}
1465 1425
1426#define asm_min(as, ir) asm_min_max(as, ir, 0)
1427#define asm_max(as, ir) asm_min_max(as, ir, 1)
1428
1466/* -- Comparisons --------------------------------------------------------- */ 1429/* -- Comparisons --------------------------------------------------------- */
1467 1430
1468static void asm_comp(ASMState *as, IRIns *ir) 1431static void asm_comp(ASMState *as, IRIns *ir)
@@ -1500,7 +1463,7 @@ static void asm_comp(ASMState *as, IRIns *ir)
1500 } 1463 }
1501} 1464}
1502 1465
1503static void asm_compeq(ASMState *as, IRIns *ir) 1466static void asm_equal(ASMState *as, IRIns *ir)
1504{ 1467{
1505 Reg right, left = ra_alloc2(as, ir, irt_isnum(ir->t) ? RSET_FPR : RSET_GPR); 1468 Reg right, left = ra_alloc2(as, ir, irt_isnum(ir->t) ? RSET_FPR : RSET_GPR);
1506 right = (left >> 8); left &= 255; 1469 right = (left >> 8); left &= 255;
@@ -1574,8 +1537,8 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1574 } else if ((ir-1)->o == IR_XSTORE) { 1537 } else if ((ir-1)->o == IR_XSTORE) {
1575 as->curins--; /* Handle both stores here. */ 1538 as->curins--; /* Handle both stores here. */
1576 if ((ir-1)->r != RID_SINK) { 1539 if ((ir-1)->r != RID_SINK) {
1577 asm_xstore(as, ir, LJ_LE ? 4 : 0); 1540 asm_xstore_(as, ir, LJ_LE ? 4 : 0);
1578 asm_xstore(as, ir-1, LJ_LE ? 0 : 4); 1541 asm_xstore_(as, ir-1, LJ_LE ? 0 : 4);
1579 } 1542 }
1580 return; 1543 return;
1581 } 1544 }
@@ -1771,131 +1734,13 @@ static void asm_tail_prep(ASMState *as)
1771 as->invmcp = as->loopref ? as->mcp : NULL; 1734 as->invmcp = as->loopref ? as->mcp : NULL;
1772} 1735}
1773 1736
1774/* -- Instruction dispatch ------------------------------------------------ */
1775
1776/* Assemble a single instruction. */
1777static void asm_ir(ASMState *as, IRIns *ir)
1778{
1779 switch ((IROp)ir->o) {
1780 /* Miscellaneous ops. */
1781 case IR_LOOP: asm_loop(as); break;
1782 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
1783 case IR_USE:
1784 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1785 case IR_PHI: asm_phi(as, ir); break;
1786 case IR_HIOP: asm_hiop(as, ir); break;
1787 case IR_GCSTEP: asm_gcstep(as, ir); break;
1788
1789 /* Guarded assertions. */
1790 case IR_EQ: case IR_NE: asm_compeq(as, ir); break;
1791 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1792 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1793 case IR_ABC:
1794 asm_comp(as, ir);
1795 break;
1796
1797 case IR_RETF: asm_retf(as, ir); break;
1798
1799 /* Bit ops. */
1800 case IR_BNOT: asm_bitnot(as, ir); break;
1801 case IR_BSWAP: asm_bitswap(as, ir); break;
1802
1803 case IR_BAND: asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI); break;
1804 case IR_BOR: asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI); break;
1805 case IR_BXOR: asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI); break;
1806
1807 case IR_BSHL: asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL); break;
1808 case IR_BSHR: asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL); break;
1809 case IR_BSAR: asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA); break;
1810 case IR_BROL: lua_assert(0); break;
1811 case IR_BROR: asm_bitror(as, ir); break;
1812
1813 /* Arithmetic ops. */
1814 case IR_ADD: asm_add(as, ir); break;
1815 case IR_SUB: asm_sub(as, ir); break;
1816 case IR_MUL: asm_mul(as, ir); break;
1817 case IR_DIV: asm_fparith(as, ir, MIPSI_DIV_D); break;
1818 case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
1819 case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
1820 case IR_NEG: asm_neg(as, ir); break;
1821
1822 case IR_ABS: asm_fpunary(as, ir, MIPSI_ABS_D); break;
1823 case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
1824 case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
1825 case IR_MIN: asm_min_max(as, ir, 0); break;
1826 case IR_MAX: asm_min_max(as, ir, 1); break;
1827 case IR_FPMATH:
1828 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
1829 break;
1830 if (ir->op2 <= IRFPM_TRUNC)
1831 asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
1832 else if (ir->op2 == IRFPM_SQRT)
1833 asm_fpunary(as, ir, MIPSI_SQRT_D);
1834 else
1835 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1836 break;
1837
1838 /* Overflow-checking arithmetic ops. */
1839 case IR_ADDOV: asm_arithov(as, ir); break;
1840 case IR_SUBOV: asm_arithov(as, ir); break;
1841 case IR_MULOV: asm_mulov(as, ir); break;
1842
1843 /* Memory references. */
1844 case IR_AREF: asm_aref(as, ir); break;
1845 case IR_HREF: asm_href(as, ir); break;
1846 case IR_HREFK: asm_hrefk(as, ir); break;
1847 case IR_NEWREF: asm_newref(as, ir); break;
1848 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
1849 case IR_FREF: asm_fref(as, ir); break;
1850 case IR_STRREF: asm_strref(as, ir); break;
1851
1852 /* Loads and stores. */
1853 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
1854 asm_ahuvload(as, ir);
1855 break;
1856 case IR_FLOAD: asm_fload(as, ir); break;
1857 case IR_XLOAD: asm_xload(as, ir); break;
1858 case IR_SLOAD: asm_sload(as, ir); break;
1859
1860 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
1861 case IR_FSTORE: asm_fstore(as, ir); break;
1862 case IR_XSTORE: asm_xstore(as, ir, 0); break;
1863
1864 /* Allocations. */
1865 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
1866 case IR_TNEW: asm_tnew(as, ir); break;
1867 case IR_TDUP: asm_tdup(as, ir); break;
1868 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
1869
1870 /* Write barriers. */
1871 case IR_TBAR: asm_tbar(as, ir); break;
1872 case IR_OBAR: asm_obar(as, ir); break;
1873
1874 /* Type conversions. */
1875 case IR_CONV: asm_conv(as, ir); break;
1876 case IR_TOBIT: asm_tobit(as, ir); break;
1877 case IR_TOSTR: asm_tostr(as, ir); break;
1878 case IR_STRTO: asm_strto(as, ir); break;
1879
1880 /* Calls. */
1881 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
1882 case IR_CALLXS: asm_callx(as, ir); break;
1883 case IR_CARG: break;
1884
1885 default:
1886 setintV(&as->J->errinfo, ir->o);
1887 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
1888 break;
1889 }
1890}
1891
1892/* -- Trace setup --------------------------------------------------------- */ 1737/* -- Trace setup --------------------------------------------------------- */
1893 1738
1894/* Ensure there are enough stack slots for call arguments. */ 1739/* Ensure there are enough stack slots for call arguments. */
1895static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) 1740static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
1896{ 1741{
1897 IRRef args[CCI_NARGS_MAX*2]; 1742 IRRef args[CCI_NARGS_MAX*2];
1898 uint32_t i, nargs = (int)CCI_NARGS(ci); 1743 uint32_t i, nargs = CCI_XNARGS(ci);
1899 int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; 1744 int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
1900 asm_collectargs(as, ir, ci, args); 1745 asm_collectargs(as, ir, ci, args);
1901 for (i = 0; i < nargs; i++) { 1746 for (i = 0; i < nargs; i++) {
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h
index 651fa318..10cd79dd 100644
--- a/src/lj_asm_ppc.h
+++ b/src/lj_asm_ppc.h
@@ -249,7 +249,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
249/* Generate a call to a C function. */ 249/* Generate a call to a C function. */
250static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 250static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
251{ 251{
252 uint32_t n, nargs = CCI_NARGS(ci); 252 uint32_t n, nargs = CCI_XNARGS(ci);
253 int32_t ofs = 8; 253 int32_t ofs = 8;
254 Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR; 254 Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR;
255 if ((void *)ci->func) 255 if ((void *)ci->func)
@@ -329,15 +329,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
329 } 329 }
330} 330}
331 331
332static void asm_call(ASMState *as, IRIns *ir)
333{
334 IRRef args[CCI_NARGS_MAX];
335 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
336 asm_collectargs(as, ir, ci, args);
337 asm_setupresult(as, ir, ci);
338 asm_gencall(as, ci, args);
339}
340
341static void asm_callx(ASMState *as, IRIns *ir) 332static void asm_callx(ASMState *as, IRIns *ir)
342{ 333{
343 IRRef args[CCI_NARGS_MAX*2]; 334 IRRef args[CCI_NARGS_MAX*2];
@@ -361,16 +352,6 @@ static void asm_callx(ASMState *as, IRIns *ir)
361 asm_gencall(as, &ci, args); 352 asm_gencall(as, &ci, args);
362} 353}
363 354
364static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
365{
366 const CCallInfo *ci = &lj_ir_callinfo[id];
367 IRRef args[2];
368 args[0] = ir->op1;
369 args[1] = ir->op2;
370 asm_setupresult(as, ir, ci);
371 asm_gencall(as, ci, args);
372}
373
374/* -- Returns ------------------------------------------------------------- */ 355/* -- Returns ------------------------------------------------------------- */
375 356
376/* Return to lower frame. Guard that it goes to the right spot. */ 357/* Return to lower frame. Guard that it goes to the right spot. */
@@ -510,28 +491,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
510 } 491 }
511} 492}
512 493
513#if LJ_HASFFI
514static void asm_conv64(ASMState *as, IRIns *ir)
515{
516 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
517 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
518 IRCallID id;
519 const CCallInfo *ci;
520 IRRef args[2];
521 args[0] = ir->op1;
522 args[1] = (ir-1)->op1;
523 if (st == IRT_NUM || st == IRT_FLOAT) {
524 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
525 ir--;
526 } else {
527 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
528 }
529 ci = &lj_ir_callinfo[id];
530 asm_setupresult(as, ir, ci);
531 asm_gencall(as, ci, args);
532}
533#endif
534
535static void asm_strto(ASMState *as, IRIns *ir) 494static void asm_strto(ASMState *as, IRIns *ir)
536{ 495{
537 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; 496 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
@@ -550,6 +509,8 @@ static void asm_strto(ASMState *as, IRIns *ir)
550 emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs); 509 emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs);
551} 510}
552 511
512/* -- Memory references --------------------------------------------------- */
513
553/* Get pointer to TValue. */ 514/* Get pointer to TValue. */
554static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) 515static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
555{ 516{
@@ -573,27 +534,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
573 } 534 }
574} 535}
575 536
576static void asm_tostr(ASMState *as, IRIns *ir)
577{
578 IRRef args[2];
579 args[0] = ASMREF_L;
580 as->gcsteps++;
581 if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
582 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
583 args[1] = ASMREF_TMP1; /* const lua_Number * */
584 asm_setupresult(as, ir, ci); /* GCstr * */
585 asm_gencall(as, ci, args);
586 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
587 } else {
588 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
589 args[1] = ir->op1; /* int32_t k */
590 asm_setupresult(as, ir, ci); /* GCstr * */
591 asm_gencall(as, ci, args);
592 }
593}
594
595/* -- Memory references --------------------------------------------------- */
596
597static void asm_aref(ASMState *as, IRIns *ir) 537static void asm_aref(ASMState *as, IRIns *ir)
598{ 538{
599 Reg dest = ra_dest(as, ir, RSET_GPR); 539 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -770,20 +710,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
770 } 710 }
771} 711}
772 712
773static void asm_newref(ASMState *as, IRIns *ir)
774{
775 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
776 IRRef args[3];
777 if (ir->r == RID_SINK)
778 return;
779 args[0] = ASMREF_L; /* lua_State *L */
780 args[1] = ir->op1; /* GCtab *t */
781 args[2] = ASMREF_TMP1; /* cTValue *key */
782 asm_setupresult(as, ir, ci); /* TValue * */
783 asm_gencall(as, ci, args);
784 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
785}
786
787static void asm_uref(ASMState *as, IRIns *ir) 713static void asm_uref(ASMState *as, IRIns *ir)
788{ 714{
789 /* NYI: Check that UREFO is still open and not aliasing a slot. */ 715 /* NYI: Check that UREFO is still open and not aliasing a slot. */
@@ -914,7 +840,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
914 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); 840 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
915} 841}
916 842
917static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) 843static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
918{ 844{
919 IRIns *irb; 845 IRIns *irb;
920 if (ir->r == RID_SINK) 846 if (ir->r == RID_SINK)
@@ -931,6 +857,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
931 } 857 }
932} 858}
933 859
860#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
861
934static void asm_ahuvload(ASMState *as, IRIns *ir) 862static void asm_ahuvload(ASMState *as, IRIns *ir)
935{ 863{
936 IRType1 t = ir->t; 864 IRType1 t = ir->t;
@@ -1081,19 +1009,15 @@ dotypecheck:
1081static void asm_cnew(ASMState *as, IRIns *ir) 1009static void asm_cnew(ASMState *as, IRIns *ir)
1082{ 1010{
1083 CTState *cts = ctype_ctsG(J2G(as->J)); 1011 CTState *cts = ctype_ctsG(J2G(as->J));
1084 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 1012 CTypeID id = (CTypeID)IR(ir->op1)->i;
1085 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 1013 CTSize sz;
1086 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 1014 CTInfo info = lj_ctype_info(cts, id, &sz);
1087 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1015 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1088 IRRef args[2]; 1016 IRRef args[4];
1089 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1090 RegSet drop = RSET_SCRATCH; 1017 RegSet drop = RSET_SCRATCH;
1091 lua_assert(sz != CTSIZE_INVALID); 1018 lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
1092 1019
1093 args[0] = ASMREF_L; /* lua_State *L */
1094 args[1] = ASMREF_TMP1; /* MSize size */
1095 as->gcsteps++; 1020 as->gcsteps++;
1096
1097 if (ra_hasreg(ir->r)) 1021 if (ra_hasreg(ir->r))
1098 rset_clear(drop, ir->r); /* Dest reg handled below. */ 1022 rset_clear(drop, ir->r); /* Dest reg handled below. */
1099 ra_evictset(as, drop); 1023 ra_evictset(as, drop);
@@ -1102,6 +1026,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1102 1026
1103 /* Initialize immutable cdata object. */ 1027 /* Initialize immutable cdata object. */
1104 if (ir->o == IR_CNEWI) { 1028 if (ir->o == IR_CNEWI) {
1029 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1105 int32_t ofs = sizeof(GCcdata); 1030 int32_t ofs = sizeof(GCcdata);
1106 lua_assert(sz == 4 || sz == 8); 1031 lua_assert(sz == 4 || sz == 8);
1107 if (sz == 8) { 1032 if (sz == 8) {
@@ -1115,12 +1040,24 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1115 if (ofs == sizeof(GCcdata)) break; 1040 if (ofs == sizeof(GCcdata)) break;
1116 ofs -= 4; ir++; 1041 ofs -= 4; ir++;
1117 } 1042 }
1043 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1044 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1045 args[0] = ASMREF_L; /* lua_State *L */
1046 args[1] = ir->op1; /* CTypeID id */
1047 args[2] = ir->op2; /* CTSize sz */
1048 args[3] = ASMREF_TMP1; /* CTSize align */
1049 asm_gencall(as, ci, args);
1050 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1051 return;
1118 } 1052 }
1053
1119 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ 1054 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1120 emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); 1055 emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
1121 emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); 1056 emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
1122 emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA); 1057 emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA);
1123 emit_ti(as, PPCI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */ 1058 emit_ti(as, PPCI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */
1059 args[0] = ASMREF_L; /* lua_State *L */
1060 args[1] = ASMREF_TMP1; /* MSize size */
1124 asm_gencall(as, ci, args); 1061 asm_gencall(as, ci, args);
1125 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), 1062 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1126 ra_releasetmp(as, ASMREF_TMP1)); 1063 ra_releasetmp(as, ASMREF_TMP1));
@@ -1194,23 +1131,14 @@ static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi)
1194 emit_fb(as, pi, dest, left); 1131 emit_fb(as, pi, dest, left);
1195} 1132}
1196 1133
1197static int asm_fpjoin_pow(ASMState *as, IRIns *ir) 1134static void asm_fpmath(ASMState *as, IRIns *ir)
1198{ 1135{
1199 IRIns *irp = IR(ir->op1); 1136 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
1200 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { 1137 return;
1201 IRIns *irpp = IR(irp->op1); 1138 if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
1202 if (irpp == ir-2 && irpp->o == IR_FPMATH && 1139 asm_fpunary(as, ir, PPCI_FSQRT);
1203 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { 1140 else
1204 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; 1141 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1205 IRRef args[2];
1206 args[0] = irpp->op1;
1207 args[1] = irp->op2;
1208 asm_setupresult(as, ir, ci);
1209 asm_gencall(as, ci, args);
1210 return 1;
1211 }
1212 }
1213 return 0;
1214} 1142}
1215 1143
1216static void asm_add(ASMState *as, IRIns *ir) 1144static void asm_add(ASMState *as, IRIns *ir)
@@ -1310,6 +1238,10 @@ static void asm_mul(ASMState *as, IRIns *ir)
1310 } 1238 }
1311} 1239}
1312 1240
1241#define asm_div(as, ir) asm_fparith(as, ir, PPCI_FDIV)
1242#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi)
1243#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
1244
1313static void asm_neg(ASMState *as, IRIns *ir) 1245static void asm_neg(ASMState *as, IRIns *ir)
1314{ 1246{
1315 if (irt_isnum(ir->t)) { 1247 if (irt_isnum(ir->t)) {
@@ -1328,6 +1260,10 @@ static void asm_neg(ASMState *as, IRIns *ir)
1328 } 1260 }
1329} 1261}
1330 1262
1263#define asm_abs(as, ir) asm_fpunary(as, ir, PPCI_FABS)
1264#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
1265#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
1266
1331static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) 1267static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
1332{ 1268{
1333 Reg dest, left, right; 1269 Reg dest, left, right;
@@ -1343,6 +1279,10 @@ static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
1343 emit_tab(as, pi|PPCF_DOT, dest, left, right); 1279 emit_tab(as, pi|PPCF_DOT, dest, left, right);
1344} 1280}
1345 1281
1282#define asm_addov(as, ir) asm_arithov(as, ir, PPCI_ADDO)
1283#define asm_subov(as, ir) asm_arithov(as, ir, PPCI_SUBFO)
1284#define asm_mulov(as, ir) asm_arithov(as, ir, PPCI_MULLWO)
1285
1346#if LJ_HASFFI 1286#if LJ_HASFFI
1347static void asm_add64(ASMState *as, IRIns *ir) 1287static void asm_add64(ASMState *as, IRIns *ir)
1348{ 1288{
@@ -1422,7 +1362,7 @@ static void asm_neg64(ASMState *as, IRIns *ir)
1422} 1362}
1423#endif 1363#endif
1424 1364
1425static void asm_bitnot(ASMState *as, IRIns *ir) 1365static void asm_bnot(ASMState *as, IRIns *ir)
1426{ 1366{
1427 Reg dest, left, right; 1367 Reg dest, left, right;
1428 PPCIns pi = PPCI_NOR; 1368 PPCIns pi = PPCI_NOR;
@@ -1449,7 +1389,7 @@ nofuse:
1449 emit_asb(as, pi, dest, left, right); 1389 emit_asb(as, pi, dest, left, right);
1450} 1390}
1451 1391
1452static void asm_bitswap(ASMState *as, IRIns *ir) 1392static void asm_bswap(ASMState *as, IRIns *ir)
1453{ 1393{
1454 Reg dest = ra_dest(as, ir, RSET_GPR); 1394 Reg dest = ra_dest(as, ir, RSET_GPR);
1455 IRIns *irx; 1395 IRIns *irx;
@@ -1470,32 +1410,6 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1470 } 1410 }
1471} 1411}
1472 1412
1473static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1474{
1475 Reg dest = ra_dest(as, ir, RSET_GPR);
1476 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1477 if (irref_isk(ir->op2)) {
1478 int32_t k = IR(ir->op2)->i;
1479 Reg tmp = left;
1480 if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
1481 if (!checku16(k)) {
1482 emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
1483 if ((k & 0xffff) == 0) return;
1484 }
1485 emit_asi(as, pik, dest, left, k);
1486 return;
1487 }
1488 }
1489 /* May fail due to spills/restores above, but simplifies the logic. */
1490 if (as->flagmcp == as->mcp) {
1491 as->flagmcp = NULL;
1492 as->mcp++;
1493 pi |= PPCF_DOT;
1494 }
1495 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1496 emit_asb(as, pi, dest, left, right);
1497}
1498
1499/* Fuse BAND with contiguous bitmask and a shift to rlwinm. */ 1413/* Fuse BAND with contiguous bitmask and a shift to rlwinm. */
1500static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref) 1414static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref)
1501{ 1415{
@@ -1526,7 +1440,7 @@ nofuse:
1526 *--as->mcp = pi | PPCF_T(left); 1440 *--as->mcp = pi | PPCF_T(left);
1527} 1441}
1528 1442
1529static void asm_bitand(ASMState *as, IRIns *ir) 1443static void asm_band(ASMState *as, IRIns *ir)
1530{ 1444{
1531 Reg dest, left, right; 1445 Reg dest, left, right;
1532 IRRef lref = ir->op1; 1446 IRRef lref = ir->op1;
@@ -1581,6 +1495,35 @@ static void asm_bitand(ASMState *as, IRIns *ir)
1581 emit_asb(as, PPCI_AND ^ dot, dest, left, right); 1495 emit_asb(as, PPCI_AND ^ dot, dest, left, right);
1582} 1496}
1583 1497
1498static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1499{
1500 Reg dest = ra_dest(as, ir, RSET_GPR);
1501 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1502 if (irref_isk(ir->op2)) {
1503 int32_t k = IR(ir->op2)->i;
1504 Reg tmp = left;
1505 if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
1506 if (!checku16(k)) {
1507 emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
1508 if ((k & 0xffff) == 0) return;
1509 }
1510 emit_asi(as, pik, dest, left, k);
1511 return;
1512 }
1513 }
1514 /* May fail due to spills/restores above, but simplifies the logic. */
1515 if (as->flagmcp == as->mcp) {
1516 as->flagmcp = NULL;
1517 as->mcp++;
1518 pi |= PPCF_DOT;
1519 }
1520 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1521 emit_asb(as, pi, dest, left, right);
1522}
1523
1524#define asm_bor(as, ir) asm_bitop(as, ir, PPCI_OR, PPCI_ORI)
1525#define asm_bxor(as, ir) asm_bitop(as, ir, PPCI_XOR, PPCI_XORI)
1526
1584static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) 1527static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1585{ 1528{
1586 Reg dest, left; 1529 Reg dest, left;
@@ -1606,6 +1549,14 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1606 } 1549 }
1607} 1550}
1608 1551
1552#define asm_bshl(as, ir) asm_bitshift(as, ir, PPCI_SLW, 0)
1553#define asm_bshr(as, ir) asm_bitshift(as, ir, PPCI_SRW, 1)
1554#define asm_bsar(as, ir) asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI)
1555#define asm_brol(as, ir) \
1556 asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), \
1557 PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31))
1558#define asm_bror(as, ir) lua_assert(0)
1559
1609static void asm_min_max(ASMState *as, IRIns *ir, int ismax) 1560static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1610{ 1561{
1611 if (irt_isnum(ir->t)) { 1562 if (irt_isnum(ir->t)) {
@@ -1636,6 +1587,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1636 } 1587 }
1637} 1588}
1638 1589
1590#define asm_min(as, ir) asm_min_max(as, ir, 0)
1591#define asm_max(as, ir) asm_min_max(as, ir, 1)
1592
1639/* -- Comparisons --------------------------------------------------------- */ 1593/* -- Comparisons --------------------------------------------------------- */
1640 1594
1641#define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */ 1595#define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */
@@ -1712,6 +1666,8 @@ static void asm_comp(ASMState *as, IRIns *ir)
1712 } 1666 }
1713} 1667}
1714 1668
1669#define asm_equal(as, ir) asm_comp(as, ir)
1670
1715#if LJ_HASFFI 1671#if LJ_HASFFI
1716/* 64 bit integer comparisons. */ 1672/* 64 bit integer comparisons. */
1717static void asm_comp64(ASMState *as, IRIns *ir) 1673static void asm_comp64(ASMState *as, IRIns *ir)
@@ -1757,8 +1713,8 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1757 } else if ((ir-1)->o == IR_XSTORE) { 1713 } else if ((ir-1)->o == IR_XSTORE) {
1758 as->curins--; /* Handle both stores here. */ 1714 as->curins--; /* Handle both stores here. */
1759 if ((ir-1)->r != RID_SINK) { 1715 if ((ir-1)->r != RID_SINK) {
1760 asm_xstore(as, ir, 0); 1716 asm_xstore_(as, ir, 0);
1761 asm_xstore(as, ir-1, 4); 1717 asm_xstore_(as, ir-1, 4);
1762 } 1718 }
1763 return; 1719 return;
1764 } 1720 }
@@ -1964,136 +1920,13 @@ static void asm_tail_prep(ASMState *as)
1964 } 1920 }
1965} 1921}
1966 1922
1967/* -- Instruction dispatch ------------------------------------------------ */
1968
1969/* Assemble a single instruction. */
1970static void asm_ir(ASMState *as, IRIns *ir)
1971{
1972 switch ((IROp)ir->o) {
1973 /* Miscellaneous ops. */
1974 case IR_LOOP: asm_loop(as); break;
1975 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
1976 case IR_USE:
1977 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1978 case IR_PHI: asm_phi(as, ir); break;
1979 case IR_HIOP: asm_hiop(as, ir); break;
1980 case IR_GCSTEP: asm_gcstep(as, ir); break;
1981
1982 /* Guarded assertions. */
1983 case IR_EQ: case IR_NE:
1984 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
1985 as->curins--;
1986 asm_href(as, ir-1, (IROp)ir->o);
1987 break;
1988 }
1989 /* fallthrough */
1990 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1991 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1992 case IR_ABC:
1993 asm_comp(as, ir);
1994 break;
1995
1996 case IR_RETF: asm_retf(as, ir); break;
1997
1998 /* Bit ops. */
1999 case IR_BNOT: asm_bitnot(as, ir); break;
2000 case IR_BSWAP: asm_bitswap(as, ir); break;
2001
2002 case IR_BAND: asm_bitand(as, ir); break;
2003 case IR_BOR: asm_bitop(as, ir, PPCI_OR, PPCI_ORI); break;
2004 case IR_BXOR: asm_bitop(as, ir, PPCI_XOR, PPCI_XORI); break;
2005
2006 case IR_BSHL: asm_bitshift(as, ir, PPCI_SLW, 0); break;
2007 case IR_BSHR: asm_bitshift(as, ir, PPCI_SRW, 1); break;
2008 case IR_BSAR: asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI); break;
2009 case IR_BROL: asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31),
2010 PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)); break;
2011 case IR_BROR: lua_assert(0); break;
2012
2013 /* Arithmetic ops. */
2014 case IR_ADD: asm_add(as, ir); break;
2015 case IR_SUB: asm_sub(as, ir); break;
2016 case IR_MUL: asm_mul(as, ir); break;
2017 case IR_DIV: asm_fparith(as, ir, PPCI_FDIV); break;
2018 case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
2019 case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
2020 case IR_NEG: asm_neg(as, ir); break;
2021
2022 case IR_ABS: asm_fpunary(as, ir, PPCI_FABS); break;
2023 case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
2024 case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
2025 case IR_MIN: asm_min_max(as, ir, 0); break;
2026 case IR_MAX: asm_min_max(as, ir, 1); break;
2027 case IR_FPMATH:
2028 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
2029 break;
2030 if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
2031 asm_fpunary(as, ir, PPCI_FSQRT);
2032 else
2033 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
2034 break;
2035
2036 /* Overflow-checking arithmetic ops. */
2037 case IR_ADDOV: asm_arithov(as, ir, PPCI_ADDO); break;
2038 case IR_SUBOV: asm_arithov(as, ir, PPCI_SUBFO); break;
2039 case IR_MULOV: asm_arithov(as, ir, PPCI_MULLWO); break;
2040
2041 /* Memory references. */
2042 case IR_AREF: asm_aref(as, ir); break;
2043 case IR_HREF: asm_href(as, ir, 0); break;
2044 case IR_HREFK: asm_hrefk(as, ir); break;
2045 case IR_NEWREF: asm_newref(as, ir); break;
2046 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
2047 case IR_FREF: asm_fref(as, ir); break;
2048 case IR_STRREF: asm_strref(as, ir); break;
2049
2050 /* Loads and stores. */
2051 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2052 asm_ahuvload(as, ir);
2053 break;
2054 case IR_FLOAD: asm_fload(as, ir); break;
2055 case IR_XLOAD: asm_xload(as, ir); break;
2056 case IR_SLOAD: asm_sload(as, ir); break;
2057
2058 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
2059 case IR_FSTORE: asm_fstore(as, ir); break;
2060 case IR_XSTORE: asm_xstore(as, ir, 0); break;
2061
2062 /* Allocations. */
2063 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
2064 case IR_TNEW: asm_tnew(as, ir); break;
2065 case IR_TDUP: asm_tdup(as, ir); break;
2066 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2067
2068 /* Write barriers. */
2069 case IR_TBAR: asm_tbar(as, ir); break;
2070 case IR_OBAR: asm_obar(as, ir); break;
2071
2072 /* Type conversions. */
2073 case IR_CONV: asm_conv(as, ir); break;
2074 case IR_TOBIT: asm_tobit(as, ir); break;
2075 case IR_TOSTR: asm_tostr(as, ir); break;
2076 case IR_STRTO: asm_strto(as, ir); break;
2077
2078 /* Calls. */
2079 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
2080 case IR_CALLXS: asm_callx(as, ir); break;
2081 case IR_CARG: break;
2082
2083 default:
2084 setintV(&as->J->errinfo, ir->o);
2085 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
2086 break;
2087 }
2088}
2089
2090/* -- Trace setup --------------------------------------------------------- */ 1923/* -- Trace setup --------------------------------------------------------- */
2091 1924
2092/* Ensure there are enough stack slots for call arguments. */ 1925/* Ensure there are enough stack slots for call arguments. */
2093static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) 1926static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
2094{ 1927{
2095 IRRef args[CCI_NARGS_MAX*2]; 1928 IRRef args[CCI_NARGS_MAX*2];
2096 uint32_t i, nargs = (int)CCI_NARGS(ci); 1929 uint32_t i, nargs = CCI_XNARGS(ci);
2097 int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; 1930 int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
2098 asm_collectargs(as, ir, ci, args); 1931 asm_collectargs(as, ir, ci, args);
2099 for (i = 0; i < nargs; i++) 1932 for (i = 0; i < nargs; i++)
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index 64441ccb..f2f8157d 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -384,7 +384,7 @@ static Reg asm_fuseloadm(ASMState *as, IRRef ref, RegSet allow, int is64)
384/* Count the required number of stack slots for a call. */ 384/* Count the required number of stack slots for a call. */
385static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args) 385static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
386{ 386{
387 uint32_t i, nargs = CCI_NARGS(ci); 387 uint32_t i, nargs = CCI_XNARGS(ci);
388 int nslots = 0; 388 int nslots = 0;
389#if LJ_64 389#if LJ_64
390 if (LJ_ABI_WIN) { 390 if (LJ_ABI_WIN) {
@@ -417,7 +417,7 @@ static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
417/* Generate a call to a C function. */ 417/* Generate a call to a C function. */
418static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 418static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
419{ 419{
420 uint32_t n, nargs = CCI_NARGS(ci); 420 uint32_t n, nargs = CCI_XNARGS(ci);
421 int32_t ofs = STACKARG_OFS; 421 int32_t ofs = STACKARG_OFS;
422#if LJ_64 422#if LJ_64
423 uint32_t gprs = REGARG_GPRS; 423 uint32_t gprs = REGARG_GPRS;
@@ -552,7 +552,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
552 if (ra_hasreg(dest)) { 552 if (ra_hasreg(dest)) {
553 ra_free(as, dest); 553 ra_free(as, dest);
554 ra_modified(as, dest); 554 ra_modified(as, dest);
555 emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, 555 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS,
556 dest, RID_ESP, ofs); 556 dest, RID_ESP, ofs);
557 } 557 }
558 if ((ci->flags & CCI_CASTU64)) { 558 if ((ci->flags & CCI_CASTU64)) {
@@ -576,15 +576,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
576 } 576 }
577} 577}
578 578
579static void asm_call(ASMState *as, IRIns *ir)
580{
581 IRRef args[CCI_NARGS_MAX];
582 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
583 asm_collectargs(as, ir, ci, args);
584 asm_setupresult(as, ir, ci);
585 asm_gencall(as, ci, args);
586}
587
588/* Return a constant function pointer or NULL for indirect calls. */ 579/* Return a constant function pointer or NULL for indirect calls. */
589static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func) 580static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func)
590{ 581{
@@ -663,8 +654,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
663 asm_guardcc(as, CC_NE); 654 asm_guardcc(as, CC_NE);
664 emit_rr(as, XO_UCOMISD, left, tmp); 655 emit_rr(as, XO_UCOMISD, left, tmp);
665 emit_rr(as, XO_CVTSI2SD, tmp, dest); 656 emit_rr(as, XO_CVTSI2SD, tmp, dest);
666 if (!(as->flags & JIT_F_SPLIT_XMM)) 657 emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
667 emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
668 emit_rr(as, XO_CVTTSD2SI, dest, left); 658 emit_rr(as, XO_CVTTSD2SI, dest, left);
669 /* Can't fuse since left is needed twice. */ 659 /* Can't fuse since left is needed twice. */
670} 660}
@@ -720,8 +710,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
720 emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS, 710 emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS,
721 dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left); 711 dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left);
722 } 712 }
723 if (!(as->flags & JIT_F_SPLIT_XMM)) 713 emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */
724 emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */
725 } else if (stfp) { /* FP to integer conversion. */ 714 } else if (stfp) { /* FP to integer conversion. */
726 if (irt_isguard(ir->t)) { 715 if (irt_isguard(ir->t)) {
727 /* Checked conversions are only supported from number to int. */ 716 /* Checked conversions are only supported from number to int. */
@@ -729,9 +718,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
729 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); 718 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
730 } else { 719 } else {
731 Reg dest = ra_dest(as, ir, RSET_GPR); 720 Reg dest = ra_dest(as, ir, RSET_GPR);
732 x86Op op = st == IRT_NUM ? 721 x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI;
733 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) :
734 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI);
735 if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) { 722 if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) {
736 /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */ 723 /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */
737 /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */ 724 /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */
@@ -825,8 +812,7 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir)
825 if (ra_hasreg(dest)) { 812 if (ra_hasreg(dest)) {
826 ra_free(as, dest); 813 ra_free(as, dest);
827 ra_modified(as, dest); 814 ra_modified(as, dest);
828 emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, 815 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, dest, RID_ESP, ofs);
829 dest, RID_ESP, ofs);
830 } 816 }
831 emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, 817 emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd,
832 irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); 818 irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs);
@@ -854,7 +840,6 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
854 Reg lo, hi; 840 Reg lo, hi;
855 lua_assert(st == IRT_NUM || st == IRT_FLOAT); 841 lua_assert(st == IRT_NUM || st == IRT_FLOAT);
856 lua_assert(dt == IRT_I64 || dt == IRT_U64); 842 lua_assert(dt == IRT_I64 || dt == IRT_U64);
857 lua_assert(((ir-1)->op2 & IRCONV_TRUNC));
858 hi = ra_dest(as, ir, RSET_GPR); 843 hi = ra_dest(as, ir, RSET_GPR);
859 lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi)); 844 lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi));
860 if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0); 845 if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0);
@@ -897,6 +882,14 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
897 st == IRT_NUM ? XOg_FLDq: XOg_FLDd, 882 st == IRT_NUM ? XOg_FLDq: XOg_FLDd,
898 asm_fuseload(as, ir->op1, RSET_EMPTY)); 883 asm_fuseload(as, ir->op1, RSET_EMPTY));
899} 884}
885
886static void asm_conv64(ASMState *as, IRIns *ir)
887{
888 if (irt_isfp(ir->t))
889 asm_conv_fp_int64(as, ir);
890 else
891 asm_conv_int64_fp(as, ir);
892}
900#endif 893#endif
901 894
902static void asm_strto(ASMState *as, IRIns *ir) 895static void asm_strto(ASMState *as, IRIns *ir)
@@ -918,29 +911,32 @@ static void asm_strto(ASMState *as, IRIns *ir)
918 RID_ESP, sps_scale(ir->s)); 911 RID_ESP, sps_scale(ir->s));
919} 912}
920 913
921static void asm_tostr(ASMState *as, IRIns *ir) 914/* -- Memory references --------------------------------------------------- */
915
916/* Get pointer to TValue. */
917static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
922{ 918{
923 IRIns *irl = IR(ir->op1); 919 IRIns *ir = IR(ref);
924 IRRef args[2]; 920 if (irt_isnum(ir->t)) {
925 args[0] = ASMREF_L; 921 /* For numbers use the constant itself or a spill slot as a TValue. */
926 as->gcsteps++; 922 if (irref_isk(ref))
927 if (irt_isnum(irl->t)) { 923 emit_loada(as, dest, ir_knum(ir));
928 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; 924 else
929 args[1] = ASMREF_TMP1; /* const lua_Number * */ 925 emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir));
930 asm_setupresult(as, ir, ci); /* GCstr * */
931 asm_gencall(as, ci, args);
932 emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64,
933 RID_ESP, ra_spill(as, irl));
934 } else { 926 } else {
935 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; 927 /* Otherwise use g->tmptv to hold the TValue. */
936 args[1] = ir->op1; /* int32_t k */ 928 if (!irref_isk(ref)) {
937 asm_setupresult(as, ir, ci); /* GCstr * */ 929 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
938 asm_gencall(as, ci, args); 930 emit_movtomro(as, REX_64IR(ir, src), dest, 0);
931 } else if (!irt_ispri(ir->t)) {
932 emit_movmroi(as, dest, 0, ir->i);
933 }
934 if (!(LJ_64 && irt_islightud(ir->t)))
935 emit_movmroi(as, dest, 4, irt_toitype(ir->t));
936 emit_loada(as, dest, &J2G(as->J)->tmptv);
939 } 937 }
940} 938}
941 939
942/* -- Memory references --------------------------------------------------- */
943
944static void asm_aref(ASMState *as, IRIns *ir) 940static void asm_aref(ASMState *as, IRIns *ir)
945{ 941{
946 Reg dest = ra_dest(as, ir, RSET_GPR); 942 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -951,23 +947,6 @@ static void asm_aref(ASMState *as, IRIns *ir)
951 emit_rr(as, XO_MOV, dest, as->mrm.base); 947 emit_rr(as, XO_MOV, dest, as->mrm.base);
952} 948}
953 949
954/* Merge NE(HREF, niltv) check. */
955static MCode *merge_href_niltv(ASMState *as, IRIns *ir)
956{
957 /* Assumes nothing else generates NE of HREF. */
958 if ((ir[1].o == IR_NE || ir[1].o == IR_EQ) && ir[1].op1 == as->curins &&
959 ra_hasreg(ir->r)) {
960 MCode *p = as->mcp;
961 p += (LJ_64 && *p != XI_ARITHi) ? 7+6 : 6+6;
962 /* Ensure no loop branch inversion happened. */
963 if (p[-6] == 0x0f && p[-5] == XI_JCCn+(CC_NE^(ir[1].o & 1))) {
964 as->mcp = p; /* Kill cmp reg, imm32 + jz exit. */
965 return p + *(int32_t *)(p-4); /* Return exit address. */
966 }
967 }
968 return NULL;
969}
970
971/* Inlined hash lookup. Specialized for key type and for const keys. 950/* Inlined hash lookup. Specialized for key type and for const keys.
972** The equivalent C code is: 951** The equivalent C code is:
973** Node *n = hashkey(t, key); 952** Node *n = hashkey(t, key);
@@ -976,10 +955,10 @@ static MCode *merge_href_niltv(ASMState *as, IRIns *ir)
976** } while ((n = nextnode(n))); 955** } while ((n = nextnode(n)));
977** return niltv(L); 956** return niltv(L);
978*/ 957*/
979static void asm_href(ASMState *as, IRIns *ir) 958static void asm_href(ASMState *as, IRIns *ir, IROp merge)
980{ 959{
981 MCode *nilexit = merge_href_niltv(as, ir); /* Do this before any restores. */
982 RegSet allow = RSET_GPR; 960 RegSet allow = RSET_GPR;
961 int destused = ra_used(ir);
983 Reg dest = ra_dest(as, ir, allow); 962 Reg dest = ra_dest(as, ir, allow);
984 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); 963 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
985 Reg key = RID_NONE, tmp = RID_NONE; 964 Reg key = RID_NONE, tmp = RID_NONE;
@@ -996,14 +975,12 @@ static void asm_href(ASMState *as, IRIns *ir)
996 tmp = ra_scratch(as, rset_exclude(allow, key)); 975 tmp = ra_scratch(as, rset_exclude(allow, key));
997 } 976 }
998 977
999 /* Key not found in chain: jump to exit (if merged with NE) or load niltv. */ 978 /* Key not found in chain: jump to exit (if merged) or load niltv. */
1000 l_end = emit_label(as); 979 l_end = emit_label(as);
1001 if (nilexit && ir[1].o == IR_NE) { 980 if (merge == IR_NE)
1002 emit_jcc(as, CC_E, nilexit); /* XI_JMP is not found by lj_asm_patchexit. */ 981 asm_guardcc(as, CC_E); /* XI_JMP is not found by lj_asm_patchexit. */
1003 nilexit = NULL; 982 else if (destused)
1004 } else {
1005 emit_loada(as, dest, niltvg(J2G(as->J))); 983 emit_loada(as, dest, niltvg(J2G(as->J)));
1006 }
1007 984
1008 /* Follow hash chain until the end. */ 985 /* Follow hash chain until the end. */
1009 l_loop = emit_sjcc_label(as, CC_NZ); 986 l_loop = emit_sjcc_label(as, CC_NZ);
@@ -1012,8 +989,8 @@ static void asm_href(ASMState *as, IRIns *ir)
1012 l_next = emit_label(as); 989 l_next = emit_label(as);
1013 990
1014 /* Type and value comparison. */ 991 /* Type and value comparison. */
1015 if (nilexit) 992 if (merge == IR_EQ)
1016 emit_jcc(as, CC_E, nilexit); 993 asm_guardcc(as, CC_E);
1017 else 994 else
1018 emit_sjcc(as, CC_E, l_end); 995 emit_sjcc(as, CC_E, l_end);
1019 if (irt_isnum(kt)) { 996 if (irt_isnum(kt)) {
@@ -1169,41 +1146,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
1169#endif 1146#endif
1170} 1147}
1171 1148
1172static void asm_newref(ASMState *as, IRIns *ir)
1173{
1174 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
1175 IRRef args[3];
1176 IRIns *irkey;
1177 Reg tmp;
1178 if (ir->r == RID_SINK)
1179 return;
1180 args[0] = ASMREF_L; /* lua_State *L */
1181 args[1] = ir->op1; /* GCtab *t */
1182 args[2] = ASMREF_TMP1; /* cTValue *key */
1183 asm_setupresult(as, ir, ci); /* TValue * */
1184 asm_gencall(as, ci, args);
1185 tmp = ra_releasetmp(as, ASMREF_TMP1);
1186 irkey = IR(ir->op2);
1187 if (irt_isnum(irkey->t)) {
1188 /* For numbers use the constant itself or a spill slot as a TValue. */
1189 if (irref_isk(ir->op2))
1190 emit_loada(as, tmp, ir_knum(irkey));
1191 else
1192 emit_rmro(as, XO_LEA, tmp|REX_64, RID_ESP, ra_spill(as, irkey));
1193 } else {
1194 /* Otherwise use g->tmptv to hold the TValue. */
1195 if (!irref_isk(ir->op2)) {
1196 Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp));
1197 emit_movtomro(as, REX_64IR(irkey, src), tmp, 0);
1198 } else if (!irt_ispri(irkey->t)) {
1199 emit_movmroi(as, tmp, 0, irkey->i);
1200 }
1201 if (!(LJ_64 && irt_islightud(irkey->t)))
1202 emit_movmroi(as, tmp, 4, irt_toitype(irkey->t));
1203 emit_loada(as, tmp, &J2G(as->J)->tmptv);
1204 }
1205}
1206
1207static void asm_uref(ASMState *as, IRIns *ir) 1149static void asm_uref(ASMState *as, IRIns *ir)
1208{ 1150{
1209 /* NYI: Check that UREFO is still open and not aliasing a slot. */ 1151 /* NYI: Check that UREFO is still open and not aliasing a slot. */
@@ -1263,7 +1205,7 @@ static void asm_fxload(ASMState *as, IRIns *ir)
1263 case IRT_U8: xo = XO_MOVZXb; break; 1205 case IRT_U8: xo = XO_MOVZXb; break;
1264 case IRT_I16: xo = XO_MOVSXw; break; 1206 case IRT_I16: xo = XO_MOVSXw; break;
1265 case IRT_U16: xo = XO_MOVZXw; break; 1207 case IRT_U16: xo = XO_MOVZXw; break;
1266 case IRT_NUM: xo = XMM_MOVRM(as); break; 1208 case IRT_NUM: xo = XO_MOVSD; break;
1267 case IRT_FLOAT: xo = XO_MOVSS; break; 1209 case IRT_FLOAT: xo = XO_MOVSS; break;
1268 default: 1210 default:
1269 if (LJ_64 && irt_is64(ir->t)) 1211 if (LJ_64 && irt_is64(ir->t))
@@ -1276,6 +1218,9 @@ static void asm_fxload(ASMState *as, IRIns *ir)
1276 emit_mrm(as, xo, dest, RID_MRM); 1218 emit_mrm(as, xo, dest, RID_MRM);
1277} 1219}
1278 1220
1221#define asm_fload(as, ir) asm_fxload(as, ir)
1222#define asm_xload(as, ir) asm_fxload(as, ir)
1223
1279static void asm_fxstore(ASMState *as, IRIns *ir) 1224static void asm_fxstore(ASMState *as, IRIns *ir)
1280{ 1225{
1281 RegSet allow = RSET_GPR; 1226 RegSet allow = RSET_GPR;
@@ -1339,6 +1284,9 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
1339 } 1284 }
1340} 1285}
1341 1286
1287#define asm_fstore(as, ir) asm_fxstore(as, ir)
1288#define asm_xstore(as, ir) asm_fxstore(as, ir)
1289
1342#if LJ_64 1290#if LJ_64
1343static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) 1291static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
1344{ 1292{
@@ -1377,7 +1325,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
1377 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; 1325 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
1378 Reg dest = ra_dest(as, ir, allow); 1326 Reg dest = ra_dest(as, ir, allow);
1379 asm_fuseahuref(as, ir->op1, RSET_GPR); 1327 asm_fuseahuref(as, ir->op1, RSET_GPR);
1380 emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM); 1328 emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM);
1381 } else { 1329 } else {
1382 asm_fuseahuref(as, ir->op1, RSET_GPR); 1330 asm_fuseahuref(as, ir->op1, RSET_GPR);
1383 } 1331 }
@@ -1443,7 +1391,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
1443 Reg left = ra_scratch(as, RSET_FPR); 1391 Reg left = ra_scratch(as, RSET_FPR);
1444 asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ 1392 asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */
1445 base = ra_alloc1(as, REF_BASE, RSET_GPR); 1393 base = ra_alloc1(as, REF_BASE, RSET_GPR);
1446 emit_rmro(as, XMM_MOVRM(as), left, base, ofs); 1394 emit_rmro(as, XO_MOVSD, left, base, ofs);
1447 t.irt = IRT_NUM; /* Continue with a regular number type check. */ 1395 t.irt = IRT_NUM; /* Continue with a regular number type check. */
1448#if LJ_64 1396#if LJ_64
1449 } else if (irt_islightud(t)) { 1397 } else if (irt_islightud(t)) {
@@ -1461,11 +1409,9 @@ static void asm_sload(ASMState *as, IRIns *ir)
1461 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); 1409 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
1462 if ((ir->op2 & IRSLOAD_CONVERT)) { 1410 if ((ir->op2 & IRSLOAD_CONVERT)) {
1463 t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ 1411 t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */
1464 emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs); 1412 emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs);
1465 } else if (irt_isnum(t)) {
1466 emit_rmro(as, XMM_MOVRM(as), dest, base, ofs);
1467 } else { 1413 } else {
1468 emit_rmro(as, XO_MOV, dest, base, ofs); 1414 emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs);
1469 } 1415 }
1470 } else { 1416 } else {
1471 if (!(ir->op2 & IRSLOAD_TYPECHECK)) 1417 if (!(ir->op2 & IRSLOAD_TYPECHECK))
@@ -1492,15 +1438,13 @@ static void asm_sload(ASMState *as, IRIns *ir)
1492static void asm_cnew(ASMState *as, IRIns *ir) 1438static void asm_cnew(ASMState *as, IRIns *ir)
1493{ 1439{
1494 CTState *cts = ctype_ctsG(J2G(as->J)); 1440 CTState *cts = ctype_ctsG(J2G(as->J));
1495 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 1441 CTypeID id = (CTypeID)IR(ir->op1)->i;
1496 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 1442 CTSize sz;
1497 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 1443 CTInfo info = lj_ctype_info(cts, id, &sz);
1498 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1444 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1499 IRRef args[2]; 1445 IRRef args[4];
1500 lua_assert(sz != CTSIZE_INVALID); 1446 lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
1501 1447
1502 args[0] = ASMREF_L; /* lua_State *L */
1503 args[1] = ASMREF_TMP1; /* MSize size */
1504 as->gcsteps++; 1448 as->gcsteps++;
1505 asm_setupresult(as, ir, ci); /* GCcdata * */ 1449 asm_setupresult(as, ir, ci); /* GCcdata * */
1506 1450
@@ -1543,15 +1487,26 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1543 } while (1); 1487 } while (1);
1544#endif 1488#endif
1545 lua_assert(sz == 4 || sz == 8); 1489 lua_assert(sz == 4 || sz == 8);
1490 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1491 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1492 args[0] = ASMREF_L; /* lua_State *L */
1493 args[1] = ir->op1; /* CTypeID id */
1494 args[2] = ir->op2; /* CTSize sz */
1495 args[3] = ASMREF_TMP1; /* CTSize align */
1496 asm_gencall(as, ci, args);
1497 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1498 return;
1546 } 1499 }
1547 1500
1548 /* Combine initialization of marked, gct and ctypeid. */ 1501 /* Combine initialization of marked, gct and ctypeid. */
1549 emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked)); 1502 emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked));
1550 emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX, 1503 emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX,
1551 (int32_t)((~LJ_TCDATA<<8)+(ctypeid<<16))); 1504 (int32_t)((~LJ_TCDATA<<8)+(id<<16)));
1552 emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES); 1505 emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES);
1553 emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite); 1506 emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite);
1554 1507
1508 args[0] = ASMREF_L; /* lua_State *L */
1509 args[1] = ASMREF_TMP1; /* MSize size */
1555 asm_gencall(as, ci, args); 1510 asm_gencall(as, ci, args);
1556 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata))); 1511 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata)));
1557} 1512}
@@ -1629,31 +1584,21 @@ static void asm_x87load(ASMState *as, IRRef ref)
1629 } 1584 }
1630} 1585}
1631 1586
1632/* Try to rejoin pow from EXP2, MUL and LOG2 (if still unsplit). */ 1587static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
1633static int fpmjoin_pow(ASMState *as, IRIns *ir)
1634{ 1588{
1635 IRIns *irp = IR(ir->op1); 1589 /* The modified regs must match with the *.dasc implementation. */
1636 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { 1590 RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
1637 IRIns *irpp = IR(irp->op1); 1591 IRIns *irx;
1638 if (irpp == ir-2 && irpp->o == IR_FPMATH && 1592 if (ra_hasreg(ir->r))
1639 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { 1593 rset_clear(drop, ir->r); /* Dest reg handled below. */
1640 /* The modified regs must match with the *.dasc implementation. */ 1594 ra_evictset(as, drop);
1641 RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX); 1595 ra_destreg(as, ir, RID_XMM0);
1642 IRIns *irx; 1596 emit_call(as, lj_vm_pow_sse);
1643 if (ra_hasreg(ir->r)) 1597 irx = IR(lref);
1644 rset_clear(drop, ir->r); /* Dest reg handled below. */ 1598 if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1)
1645 ra_evictset(as, drop); 1599 irx->r = RID_INIT; /* Avoid allocating xmm1 for x. */
1646 ra_destreg(as, ir, RID_XMM0); 1600 ra_left(as, RID_XMM0, lref);
1647 emit_call(as, lj_vm_pow_sse); 1601 ra_left(as, RID_XMM1, rref);
1648 irx = IR(irpp->op1);
1649 if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1)
1650 irx->r = RID_INIT; /* Avoid allocating xmm1 for x. */
1651 ra_left(as, RID_XMM0, irpp->op1);
1652 ra_left(as, RID_XMM1, irp->op2);
1653 return 1;
1654 }
1655 }
1656 return 0;
1657} 1602}
1658 1603
1659static void asm_fpmath(ASMState *as, IRIns *ir) 1604static void asm_fpmath(ASMState *as, IRIns *ir)
@@ -1689,7 +1634,7 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
1689 fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse); 1634 fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse);
1690 ra_left(as, RID_XMM0, ir->op1); 1635 ra_left(as, RID_XMM0, ir->op1);
1691 } 1636 }
1692 } else if (fpm == IRFPM_EXP2 && fpmjoin_pow(as, ir)) { 1637 } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) {
1693 /* Rejoined to pow(). */ 1638 /* Rejoined to pow(). */
1694 } else { /* Handle x87 ops. */ 1639 } else { /* Handle x87 ops. */
1695 int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ 1640 int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */
@@ -1697,7 +1642,7 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
1697 if (ra_hasreg(dest)) { 1642 if (ra_hasreg(dest)) {
1698 ra_free(as, dest); 1643 ra_free(as, dest);
1699 ra_modified(as, dest); 1644 ra_modified(as, dest);
1700 emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs); 1645 emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs);
1701 } 1646 }
1702 emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); 1647 emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
1703 switch (fpm) { /* st0 = lj_vm_*(st0) */ 1648 switch (fpm) { /* st0 = lj_vm_*(st0) */
@@ -1736,6 +1681,9 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
1736 } 1681 }
1737} 1682}
1738 1683
1684#define asm_atan2(as, ir) asm_fpmath(as, ir)
1685#define asm_ldexp(as, ir) asm_fpmath(as, ir)
1686
1739static void asm_fppowi(ASMState *as, IRIns *ir) 1687static void asm_fppowi(ASMState *as, IRIns *ir)
1740{ 1688{
1741 /* The modified regs must match with the *.dasc implementation. */ 1689 /* The modified regs must match with the *.dasc implementation. */
@@ -1749,26 +1697,15 @@ static void asm_fppowi(ASMState *as, IRIns *ir)
1749 ra_left(as, RID_EAX, ir->op2); 1697 ra_left(as, RID_EAX, ir->op2);
1750} 1698}
1751 1699
1752#if LJ_64 && LJ_HASFFI 1700static void asm_pow(ASMState *as, IRIns *ir)
1753static void asm_arith64(ASMState *as, IRIns *ir, IRCallID id)
1754{ 1701{
1755 const CCallInfo *ci = &lj_ir_callinfo[id]; 1702#if LJ_64 && LJ_HASFFI
1756 IRRef args[2]; 1703 if (!irt_isnum(ir->t))
1757 args[0] = ir->op1; 1704 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
1758 args[1] = ir->op2; 1705 IRCALL_lj_carith_powu64);
1759 asm_setupresult(as, ir, ci); 1706 else
1760 asm_gencall(as, ci, args);
1761}
1762#endif 1707#endif
1763 1708 asm_fppowi(as, ir);
1764static void asm_intmod(ASMState *as, IRIns *ir)
1765{
1766 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_vm_modi];
1767 IRRef args[2];
1768 args[0] = ir->op1;
1769 args[1] = ir->op2;
1770 asm_setupresult(as, ir, ci);
1771 asm_gencall(as, ci, args);
1772} 1709}
1773 1710
1774static int asm_swapops(ASMState *as, IRIns *ir) 1711static int asm_swapops(ASMState *as, IRIns *ir)
@@ -1947,6 +1884,44 @@ static void asm_add(ASMState *as, IRIns *ir)
1947 asm_intarith(as, ir, XOg_ADD); 1884 asm_intarith(as, ir, XOg_ADD);
1948} 1885}
1949 1886
1887static void asm_sub(ASMState *as, IRIns *ir)
1888{
1889 if (irt_isnum(ir->t))
1890 asm_fparith(as, ir, XO_SUBSD);
1891 else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
1892 asm_intarith(as, ir, XOg_SUB);
1893}
1894
1895static void asm_mul(ASMState *as, IRIns *ir)
1896{
1897 if (irt_isnum(ir->t))
1898 asm_fparith(as, ir, XO_MULSD);
1899 else
1900 asm_intarith(as, ir, XOg_X_IMUL);
1901}
1902
1903static void asm_div(ASMState *as, IRIns *ir)
1904{
1905#if LJ_64 && LJ_HASFFI
1906 if (!irt_isnum(ir->t))
1907 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
1908 IRCALL_lj_carith_divu64);
1909 else
1910#endif
1911 asm_fparith(as, ir, XO_DIVSD);
1912}
1913
1914static void asm_mod(ASMState *as, IRIns *ir)
1915{
1916#if LJ_64 && LJ_HASFFI
1917 if (!irt_isint(ir->t))
1918 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
1919 IRCALL_lj_carith_modu64);
1920 else
1921#endif
1922 asm_callid(as, ir, IRCALL_lj_vm_modi);
1923}
1924
1950static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg) 1925static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
1951{ 1926{
1952 Reg dest = ra_dest(as, ir, RSET_GPR); 1927 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1954,7 +1929,17 @@ static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
1954 ra_left(as, dest, ir->op1); 1929 ra_left(as, dest, ir->op1);
1955} 1930}
1956 1931
1957static void asm_min_max(ASMState *as, IRIns *ir, int cc) 1932static void asm_neg(ASMState *as, IRIns *ir)
1933{
1934 if (irt_isnum(ir->t))
1935 asm_fparith(as, ir, XO_XORPS);
1936 else
1937 asm_neg_not(as, ir, XOg_NEG);
1938}
1939
1940#define asm_abs(as, ir) asm_fparith(as, ir, XO_ANDPS)
1941
1942static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
1958{ 1943{
1959 Reg right, dest = ra_dest(as, ir, RSET_GPR); 1944 Reg right, dest = ra_dest(as, ir, RSET_GPR);
1960 IRRef lref = ir->op1, rref = ir->op2; 1945 IRRef lref = ir->op1, rref = ir->op2;
@@ -1965,7 +1950,30 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc)
1965 ra_left(as, dest, lref); 1950 ra_left(as, dest, lref);
1966} 1951}
1967 1952
1968static void asm_bitswap(ASMState *as, IRIns *ir) 1953static void asm_min(ASMState *as, IRIns *ir)
1954{
1955 if (irt_isnum(ir->t))
1956 asm_fparith(as, ir, XO_MINSD);
1957 else
1958 asm_intmin_max(as, ir, CC_G);
1959}
1960
1961static void asm_max(ASMState *as, IRIns *ir)
1962{
1963 if (irt_isnum(ir->t))
1964 asm_fparith(as, ir, XO_MAXSD);
1965 else
1966 asm_intmin_max(as, ir, CC_L);
1967}
1968
1969/* Note: don't use LEA for overflow-checking arithmetic! */
1970#define asm_addov(as, ir) asm_intarith(as, ir, XOg_ADD)
1971#define asm_subov(as, ir) asm_intarith(as, ir, XOg_SUB)
1972#define asm_mulov(as, ir) asm_intarith(as, ir, XOg_X_IMUL)
1973
1974#define asm_bnot(as, ir) asm_neg_not(as, ir, XOg_NOT)
1975
1976static void asm_bswap(ASMState *as, IRIns *ir)
1969{ 1977{
1970 Reg dest = ra_dest(as, ir, RSET_GPR); 1978 Reg dest = ra_dest(as, ir, RSET_GPR);
1971 as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24), 1979 as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24),
@@ -1973,6 +1981,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1973 ra_left(as, dest, ir->op1); 1981 ra_left(as, dest, ir->op1);
1974} 1982}
1975 1983
1984#define asm_band(as, ir) asm_intarith(as, ir, XOg_AND)
1985#define asm_bor(as, ir) asm_intarith(as, ir, XOg_OR)
1986#define asm_bxor(as, ir) asm_intarith(as, ir, XOg_XOR)
1987
1976static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) 1988static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
1977{ 1989{
1978 IRRef rref = ir->op2; 1990 IRRef rref = ir->op2;
@@ -2012,6 +2024,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
2012 */ 2024 */
2013} 2025}
2014 2026
2027#define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL)
2028#define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR)
2029#define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR)
2030#define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL)
2031#define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR)
2032
2015/* -- Comparisons --------------------------------------------------------- */ 2033/* -- Comparisons --------------------------------------------------------- */
2016 2034
2017/* Virtual flags for unordered FP comparisons. */ 2035/* Virtual flags for unordered FP comparisons. */
@@ -2038,8 +2056,9 @@ static const uint16_t asm_compmap[IR_ABC+1] = {
2038}; 2056};
2039 2057
2040/* FP and integer comparisons. */ 2058/* FP and integer comparisons. */
2041static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) 2059static void asm_comp(ASMState *as, IRIns *ir)
2042{ 2060{
2061 uint32_t cc = asm_compmap[ir->o];
2043 if (irt_isnum(ir->t)) { 2062 if (irt_isnum(ir->t)) {
2044 IRRef lref = ir->op1; 2063 IRRef lref = ir->op1;
2045 IRRef rref = ir->op2; 2064 IRRef rref = ir->op2;
@@ -2194,6 +2213,8 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc)
2194 } 2213 }
2195} 2214}
2196 2215
2216#define asm_equal(as, ir) asm_comp(as, ir)
2217
2197#if LJ_32 && LJ_HASFFI 2218#if LJ_32 && LJ_HASFFI
2198/* 64 bit integer comparisons in 32 bit mode. */ 2219/* 64 bit integer comparisons in 32 bit mode. */
2199static void asm_comp_int64(ASMState *as, IRIns *ir) 2220static void asm_comp_int64(ASMState *as, IRIns *ir)
@@ -2276,13 +2297,9 @@ static void asm_hiop(ASMState *as, IRIns *ir)
2276 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ 2297 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
2277 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; 2298 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
2278 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ 2299 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */
2279 if (usehi || uselo) {
2280 if (irt_isfp(ir->t))
2281 asm_conv_fp_int64(as, ir);
2282 else
2283 asm_conv_int64_fp(as, ir);
2284 }
2285 as->curins--; /* Always skip the CONV. */ 2300 as->curins--; /* Always skip the CONV. */
2301 if (usehi || uselo)
2302 asm_conv64(as, ir);
2286 return; 2303 return;
2287 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ 2304 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */
2288 asm_comp_int64(as, ir); 2305 asm_comp_int64(as, ir);
@@ -2580,163 +2597,6 @@ static void asm_tail_prep(ASMState *as)
2580 } 2597 }
2581} 2598}
2582 2599
2583/* -- Instruction dispatch ------------------------------------------------ */
2584
2585/* Assemble a single instruction. */
2586static void asm_ir(ASMState *as, IRIns *ir)
2587{
2588 switch ((IROp)ir->o) {
2589 /* Miscellaneous ops. */
2590 case IR_LOOP: asm_loop(as); break;
2591 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
2592 case IR_USE:
2593 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
2594 case IR_PHI: asm_phi(as, ir); break;
2595 case IR_HIOP: asm_hiop(as, ir); break;
2596 case IR_GCSTEP: asm_gcstep(as, ir); break;
2597
2598 /* Guarded assertions. */
2599 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
2600 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
2601 case IR_EQ: case IR_NE: case IR_ABC:
2602 asm_comp(as, ir, asm_compmap[ir->o]);
2603 break;
2604
2605 case IR_RETF: asm_retf(as, ir); break;
2606
2607 /* Bit ops. */
2608 case IR_BNOT: asm_neg_not(as, ir, XOg_NOT); break;
2609 case IR_BSWAP: asm_bitswap(as, ir); break;
2610
2611 case IR_BAND: asm_intarith(as, ir, XOg_AND); break;
2612 case IR_BOR: asm_intarith(as, ir, XOg_OR); break;
2613 case IR_BXOR: asm_intarith(as, ir, XOg_XOR); break;
2614
2615 case IR_BSHL: asm_bitshift(as, ir, XOg_SHL); break;
2616 case IR_BSHR: asm_bitshift(as, ir, XOg_SHR); break;
2617 case IR_BSAR: asm_bitshift(as, ir, XOg_SAR); break;
2618 case IR_BROL: asm_bitshift(as, ir, XOg_ROL); break;
2619 case IR_BROR: asm_bitshift(as, ir, XOg_ROR); break;
2620
2621 /* Arithmetic ops. */
2622 case IR_ADD: asm_add(as, ir); break;
2623 case IR_SUB:
2624 if (irt_isnum(ir->t))
2625 asm_fparith(as, ir, XO_SUBSD);
2626 else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
2627 asm_intarith(as, ir, XOg_SUB);
2628 break;
2629 case IR_MUL:
2630 if (irt_isnum(ir->t))
2631 asm_fparith(as, ir, XO_MULSD);
2632 else
2633 asm_intarith(as, ir, XOg_X_IMUL);
2634 break;
2635 case IR_DIV:
2636#if LJ_64 && LJ_HASFFI
2637 if (!irt_isnum(ir->t))
2638 asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
2639 IRCALL_lj_carith_divu64);
2640 else
2641#endif
2642 asm_fparith(as, ir, XO_DIVSD);
2643 break;
2644 case IR_MOD:
2645#if LJ_64 && LJ_HASFFI
2646 if (!irt_isint(ir->t))
2647 asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
2648 IRCALL_lj_carith_modu64);
2649 else
2650#endif
2651 asm_intmod(as, ir);
2652 break;
2653
2654 case IR_NEG:
2655 if (irt_isnum(ir->t))
2656 asm_fparith(as, ir, XO_XORPS);
2657 else
2658 asm_neg_not(as, ir, XOg_NEG);
2659 break;
2660 case IR_ABS: asm_fparith(as, ir, XO_ANDPS); break;
2661
2662 case IR_MIN:
2663 if (irt_isnum(ir->t))
2664 asm_fparith(as, ir, XO_MINSD);
2665 else
2666 asm_min_max(as, ir, CC_G);
2667 break;
2668 case IR_MAX:
2669 if (irt_isnum(ir->t))
2670 asm_fparith(as, ir, XO_MAXSD);
2671 else
2672 asm_min_max(as, ir, CC_L);
2673 break;
2674
2675 case IR_FPMATH: case IR_ATAN2: case IR_LDEXP:
2676 asm_fpmath(as, ir);
2677 break;
2678 case IR_POW:
2679#if LJ_64 && LJ_HASFFI
2680 if (!irt_isnum(ir->t))
2681 asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
2682 IRCALL_lj_carith_powu64);
2683 else
2684#endif
2685 asm_fppowi(as, ir);
2686 break;
2687
2688 /* Overflow-checking arithmetic ops. Note: don't use LEA here! */
2689 case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break;
2690 case IR_SUBOV: asm_intarith(as, ir, XOg_SUB); break;
2691 case IR_MULOV: asm_intarith(as, ir, XOg_X_IMUL); break;
2692
2693 /* Memory references. */
2694 case IR_AREF: asm_aref(as, ir); break;
2695 case IR_HREF: asm_href(as, ir); break;
2696 case IR_HREFK: asm_hrefk(as, ir); break;
2697 case IR_NEWREF: asm_newref(as, ir); break;
2698 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
2699 case IR_FREF: asm_fref(as, ir); break;
2700 case IR_STRREF: asm_strref(as, ir); break;
2701
2702 /* Loads and stores. */
2703 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2704 asm_ahuvload(as, ir);
2705 break;
2706 case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break;
2707 case IR_SLOAD: asm_sload(as, ir); break;
2708
2709 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
2710 case IR_FSTORE: case IR_XSTORE: asm_fxstore(as, ir); break;
2711
2712 /* Allocations. */
2713 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
2714 case IR_TNEW: asm_tnew(as, ir); break;
2715 case IR_TDUP: asm_tdup(as, ir); break;
2716 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2717
2718 /* Write barriers. */
2719 case IR_TBAR: asm_tbar(as, ir); break;
2720 case IR_OBAR: asm_obar(as, ir); break;
2721
2722 /* Type conversions. */
2723 case IR_TOBIT: asm_tobit(as, ir); break;
2724 case IR_CONV: asm_conv(as, ir); break;
2725 case IR_TOSTR: asm_tostr(as, ir); break;
2726 case IR_STRTO: asm_strto(as, ir); break;
2727
2728 /* Calls. */
2729 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
2730 case IR_CALLXS: asm_callx(as, ir); break;
2731 case IR_CARG: break;
2732
2733 default:
2734 setintV(&as->J->errinfo, ir->o);
2735 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
2736 break;
2737 }
2738}
2739
2740/* -- Trace setup --------------------------------------------------------- */ 2600/* -- Trace setup --------------------------------------------------------- */
2741 2601
2742/* Ensure there are enough stack slots for call arguments. */ 2602/* Ensure there are enough stack slots for call arguments. */
diff --git a/src/lj_bc.h b/src/lj_bc.h
index 56e71dd9..ac9cc5e1 100644
--- a/src/lj_bc.h
+++ b/src/lj_bc.h
@@ -89,6 +89,8 @@
89 _(ISFC, dst, ___, var, ___) \ 89 _(ISFC, dst, ___, var, ___) \
90 _(IST, ___, ___, var, ___) \ 90 _(IST, ___, ___, var, ___) \
91 _(ISF, ___, ___, var, ___) \ 91 _(ISF, ___, ___, var, ___) \
92 _(ISTYPE, var, ___, lit, ___) \
93 _(ISNUM, var, ___, lit, ___) \
92 \ 94 \
93 /* Unary ops. */ \ 95 /* Unary ops. */ \
94 _(MOV, dst, ___, var, ___) \ 96 _(MOV, dst, ___, var, ___) \
@@ -143,10 +145,12 @@
143 _(TGETV, dst, var, var, index) \ 145 _(TGETV, dst, var, var, index) \
144 _(TGETS, dst, var, str, index) \ 146 _(TGETS, dst, var, str, index) \
145 _(TGETB, dst, var, lit, index) \ 147 _(TGETB, dst, var, lit, index) \
148 _(TGETR, dst, var, var, index) \
146 _(TSETV, var, var, var, newindex) \ 149 _(TSETV, var, var, var, newindex) \
147 _(TSETS, var, var, str, newindex) \ 150 _(TSETS, var, var, str, newindex) \
148 _(TSETB, var, var, lit, newindex) \ 151 _(TSETB, var, var, lit, newindex) \
149 _(TSETM, base, ___, num, newindex) \ 152 _(TSETM, base, ___, num, newindex) \
153 _(TSETR, var, var, var, newindex) \
150 \ 154 \
151 /* Calls and vararg handling. T = tail call. */ \ 155 /* Calls and vararg handling. T = tail call. */ \
152 _(CALLM, base, lit, lit, call) \ 156 _(CALLM, base, lit, lit, call) \
diff --git a/src/lj_bcdump.h b/src/lj_bcdump.h
index e660156d..22a8b823 100644
--- a/src/lj_bcdump.h
+++ b/src/lj_bcdump.h
@@ -36,7 +36,7 @@
36/* If you perform *any* kind of private modifications to the bytecode itself 36/* If you perform *any* kind of private modifications to the bytecode itself
37** or to the dump format, you *must* set BCDUMP_VERSION to 0x80 or higher. 37** or to the dump format, you *must* set BCDUMP_VERSION to 0x80 or higher.
38*/ 38*/
39#define BCDUMP_VERSION 1 39#define BCDUMP_VERSION 2
40 40
41/* Compatibility flags. */ 41/* Compatibility flags. */
42#define BCDUMP_F_BE 0x01 42#define BCDUMP_F_BE 0x01
@@ -61,6 +61,7 @@ enum {
61 61
62LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, 62LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer,
63 void *data, int strip); 63 void *data, int strip);
64LJ_FUNC GCproto *lj_bcread_proto(LexState *ls);
64LJ_FUNC GCproto *lj_bcread(LexState *ls); 65LJ_FUNC GCproto *lj_bcread(LexState *ls);
65 66
66#endif 67#endif
diff --git a/src/lj_bcread.c b/src/lj_bcread.c
index 2b5ba855..9f025500 100644
--- a/src/lj_bcread.c
+++ b/src/lj_bcread.c
@@ -9,6 +9,7 @@
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_buf.h"
12#include "lj_str.h" 13#include "lj_str.h"
13#include "lj_tab.h" 14#include "lj_tab.h"
14#include "lj_bc.h" 15#include "lj_bc.h"
@@ -20,6 +21,7 @@
20#include "lj_lex.h" 21#include "lj_lex.h"
21#include "lj_bcdump.h" 22#include "lj_bcdump.h"
22#include "lj_state.h" 23#include "lj_state.h"
24#include "lj_strfmt.h"
23 25
24/* Reuse some lexer fields for our own purposes. */ 26/* Reuse some lexer fields for our own purposes. */
25#define bcread_flags(ls) ls->level 27#define bcread_flags(ls) ls->level
@@ -38,84 +40,73 @@ static LJ_NOINLINE void bcread_error(LexState *ls, ErrMsg em)
38 const char *name = ls->chunkarg; 40 const char *name = ls->chunkarg;
39 if (*name == BCDUMP_HEAD1) name = "(binary)"; 41 if (*name == BCDUMP_HEAD1) name = "(binary)";
40 else if (*name == '@' || *name == '=') name++; 42 else if (*name == '@' || *name == '=') name++;
41 lj_str_pushf(L, "%s: %s", name, err2msg(em)); 43 lj_strfmt_pushf(L, "%s: %s", name, err2msg(em));
42 lj_err_throw(L, LUA_ERRSYNTAX); 44 lj_err_throw(L, LUA_ERRSYNTAX);
43} 45}
44 46
45/* Resize input buffer. */ 47/* Refill buffer. */
46static void bcread_resize(LexState *ls, MSize len)
47{
48 if (ls->sb.sz < len) {
49 MSize sz = ls->sb.sz * 2;
50 while (len > sz) sz = sz * 2;
51 lj_str_resizebuf(ls->L, &ls->sb, sz);
52 /* Caveat: this may change ls->sb.buf which may affect ls->p. */
53 }
54}
55
56/* Refill buffer if needed. */
57static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need) 48static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
58{ 49{
59 lua_assert(len != 0); 50 lua_assert(len != 0);
60 if (len > LJ_MAX_MEM || ls->current < 0) 51 if (len > LJ_MAX_MEM || ls->c < 0)
61 bcread_error(ls, LJ_ERR_BCBAD); 52 bcread_error(ls, LJ_ERR_BCBAD);
62 do { 53 do {
63 const char *buf; 54 const char *buf;
64 size_t size; 55 size_t sz;
65 if (ls->n) { /* Copy remainder to buffer. */ 56 char *p = sbufB(&ls->sb);
66 if (ls->sb.n) { /* Move down in buffer. */ 57 MSize n = (MSize)(ls->pe - ls->p);
67 lua_assert(ls->p + ls->n == ls->sb.buf + ls->sb.n); 58 if (n) { /* Copy remainder to buffer. */
68 if (ls->n != ls->sb.n) 59 if (sbuflen(&ls->sb)) { /* Move down in buffer. */
69 memmove(ls->sb.buf, ls->p, ls->n); 60 lua_assert(ls->pe == sbufP(&ls->sb));
61 if (ls->p != p) memmove(p, ls->p, n);
70 } else { /* Copy from buffer provided by reader. */ 62 } else { /* Copy from buffer provided by reader. */
71 bcread_resize(ls, len); 63 p = lj_buf_need(&ls->sb, len);
72 memcpy(ls->sb.buf, ls->p, ls->n); 64 memcpy(p, ls->p, n);
73 } 65 }
74 ls->p = ls->sb.buf; 66 ls->p = p;
67 ls->pe = p + n;
75 } 68 }
76 ls->sb.n = ls->n; 69 setsbufP(&ls->sb, p + n);
77 buf = ls->rfunc(ls->L, ls->rdata, &size); /* Get more data from reader. */ 70 buf = ls->rfunc(ls->L, ls->rdata, &sz); /* Get more data from reader. */
78 if (buf == NULL || size == 0) { /* EOF? */ 71 if (buf == NULL || sz == 0) { /* EOF? */
79 if (need) bcread_error(ls, LJ_ERR_BCBAD); 72 if (need) bcread_error(ls, LJ_ERR_BCBAD);
80 ls->current = -1; /* Only bad if we get called again. */ 73 ls->c = -1; /* Only bad if we get called again. */
81 break; 74 break;
82 } 75 }
83 if (ls->sb.n) { /* Append to buffer. */ 76 if (n) { /* Append to buffer. */
84 MSize n = ls->sb.n + (MSize)size; 77 n += (MSize)sz;
85 bcread_resize(ls, n < len ? len : n); 78 p = lj_buf_need(&ls->sb, n < len ? len : n);
86 memcpy(ls->sb.buf + ls->sb.n, buf, size); 79 memcpy(sbufP(&ls->sb), buf, sz);
87 ls->n = ls->sb.n = n; 80 setsbufP(&ls->sb, p + n);
88 ls->p = ls->sb.buf; 81 ls->p = p;
82 ls->pe = p + n;
89 } else { /* Return buffer provided by reader. */ 83 } else { /* Return buffer provided by reader. */
90 ls->n = (MSize)size;
91 ls->p = buf; 84 ls->p = buf;
85 ls->pe = buf + sz;
92 } 86 }
93 } while (ls->n < len); 87 } while (ls->p + len > ls->pe);
94} 88}
95 89
96/* Need a certain number of bytes. */ 90/* Need a certain number of bytes. */
97static LJ_AINLINE void bcread_need(LexState *ls, MSize len) 91static LJ_AINLINE void bcread_need(LexState *ls, MSize len)
98{ 92{
99 if (LJ_UNLIKELY(ls->n < len)) 93 if (LJ_UNLIKELY(ls->p + len > ls->pe))
100 bcread_fill(ls, len, 1); 94 bcread_fill(ls, len, 1);
101} 95}
102 96
103/* Want to read up to a certain number of bytes, but may need less. */ 97/* Want to read up to a certain number of bytes, but may need less. */
104static LJ_AINLINE void bcread_want(LexState *ls, MSize len) 98static LJ_AINLINE void bcread_want(LexState *ls, MSize len)
105{ 99{
106 if (LJ_UNLIKELY(ls->n < len)) 100 if (LJ_UNLIKELY(ls->p + len > ls->pe))
107 bcread_fill(ls, len, 0); 101 bcread_fill(ls, len, 0);
108} 102}
109 103
110#define bcread_dec(ls) check_exp(ls->n > 0, ls->n--)
111#define bcread_consume(ls, len) check_exp(ls->n >= (len), ls->n -= (len))
112
113/* Return memory block from buffer. */ 104/* Return memory block from buffer. */
114static uint8_t *bcread_mem(LexState *ls, MSize len) 105static LJ_AINLINE uint8_t *bcread_mem(LexState *ls, MSize len)
115{ 106{
116 uint8_t *p = (uint8_t *)ls->p; 107 uint8_t *p = (uint8_t *)ls->p;
117 bcread_consume(ls, len); 108 ls->p += len;
118 ls->p = (char *)p + len; 109 lua_assert(ls->p <= ls->pe);
119 return p; 110 return p;
120} 111}
121 112
@@ -128,25 +119,15 @@ static void bcread_block(LexState *ls, void *q, MSize len)
128/* Read byte from buffer. */ 119/* Read byte from buffer. */
129static LJ_AINLINE uint32_t bcread_byte(LexState *ls) 120static LJ_AINLINE uint32_t bcread_byte(LexState *ls)
130{ 121{
131 bcread_dec(ls); 122 lua_assert(ls->p < ls->pe);
132 return (uint32_t)(uint8_t)*ls->p++; 123 return (uint32_t)(uint8_t)*ls->p++;
133} 124}
134 125
135/* Read ULEB128 value from buffer. */ 126/* Read ULEB128 value from buffer. */
136static uint32_t bcread_uleb128(LexState *ls) 127static LJ_AINLINE uint32_t bcread_uleb128(LexState *ls)
137{ 128{
138 const uint8_t *p = (const uint8_t *)ls->p; 129 uint32_t v = lj_buf_ruleb128(&ls->p);
139 uint32_t v = *p++; 130 lua_assert(ls->p <= ls->pe);
140 if (LJ_UNLIKELY(v >= 0x80)) {
141 int sh = 0;
142 v &= 0x7f;
143 do {
144 v |= ((*p & 0x7f) << (sh += 7));
145 bcread_dec(ls);
146 } while (*p++ >= 0x80);
147 }
148 bcread_dec(ls);
149 ls->p = (char *)p;
150 return v; 131 return v;
151} 132}
152 133
@@ -160,11 +141,10 @@ static uint32_t bcread_uleb128_33(LexState *ls)
160 v &= 0x3f; 141 v &= 0x3f;
161 do { 142 do {
162 v |= ((*p & 0x7f) << (sh += 7)); 143 v |= ((*p & 0x7f) << (sh += 7));
163 bcread_dec(ls);
164 } while (*p++ >= 0x80); 144 } while (*p++ >= 0x80);
165 } 145 }
166 bcread_dec(ls);
167 ls->p = (char *)p; 146 ls->p = (char *)p;
147 lua_assert(ls->p <= ls->pe);
168 return v; 148 return v;
169} 149}
170 150
@@ -326,25 +306,13 @@ static void bcread_uv(LexState *ls, GCproto *pt, MSize sizeuv)
326} 306}
327 307
328/* Read a prototype. */ 308/* Read a prototype. */
329static GCproto *bcread_proto(LexState *ls) 309GCproto *lj_bcread_proto(LexState *ls)
330{ 310{
331 GCproto *pt; 311 GCproto *pt;
332 MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept; 312 MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept;
333 MSize ofsk, ofsuv, ofsdbg; 313 MSize ofsk, ofsuv, ofsdbg;
334 MSize sizedbg = 0; 314 MSize sizedbg = 0;
335 BCLine firstline = 0, numline = 0; 315 BCLine firstline = 0, numline = 0;
336 MSize len, startn;
337
338 /* Read length. */
339 if (ls->n > 0 && ls->p[0] == 0) { /* Shortcut EOF. */
340 ls->n--; ls->p++;
341 return NULL;
342 }
343 bcread_want(ls, 5);
344 len = bcread_uleb128(ls);
345 if (!len) return NULL; /* EOF */
346 bcread_need(ls, len);
347 startn = ls->n;
348 316
349 /* Read prototype header. */ 317 /* Read prototype header. */
350 flags = bcread_byte(ls); 318 flags = bcread_byte(ls);
@@ -413,9 +381,6 @@ static GCproto *bcread_proto(LexState *ls)
413 setmref(pt->uvinfo, NULL); 381 setmref(pt->uvinfo, NULL);
414 setmref(pt->varinfo, NULL); 382 setmref(pt->varinfo, NULL);
415 } 383 }
416
417 if (len != startn - ls->n)
418 bcread_error(ls, LJ_ERR_BCBAD);
419 return pt; 384 return pt;
420} 385}
421 386
@@ -455,19 +420,33 @@ static int bcread_header(LexState *ls)
455GCproto *lj_bcread(LexState *ls) 420GCproto *lj_bcread(LexState *ls)
456{ 421{
457 lua_State *L = ls->L; 422 lua_State *L = ls->L;
458 lua_assert(ls->current == BCDUMP_HEAD1); 423 lua_assert(ls->c == BCDUMP_HEAD1);
459 bcread_savetop(L, ls, L->top); 424 bcread_savetop(L, ls, L->top);
460 lj_str_resetbuf(&ls->sb); 425 lj_buf_reset(&ls->sb);
461 /* Check for a valid bytecode dump header. */ 426 /* Check for a valid bytecode dump header. */
462 if (!bcread_header(ls)) 427 if (!bcread_header(ls))
463 bcread_error(ls, LJ_ERR_BCFMT); 428 bcread_error(ls, LJ_ERR_BCFMT);
464 for (;;) { /* Process all prototypes in the bytecode dump. */ 429 for (;;) { /* Process all prototypes in the bytecode dump. */
465 GCproto *pt = bcread_proto(ls); 430 GCproto *pt;
466 if (!pt) break; 431 MSize len;
432 const char *startp;
433 /* Read length. */
434 if (ls->p < ls->pe && ls->p[0] == 0) { /* Shortcut EOF. */
435 ls->p++;
436 break;
437 }
438 bcread_want(ls, 5);
439 len = bcread_uleb128(ls);
440 if (!len) break; /* EOF */
441 bcread_need(ls, len);
442 startp = ls->p;
443 pt = lj_bcread_proto(ls);
444 if (ls->p != startp + len)
445 bcread_error(ls, LJ_ERR_BCBAD);
467 setprotoV(L, L->top, pt); 446 setprotoV(L, L->top, pt);
468 incr_top(L); 447 incr_top(L);
469 } 448 }
470 if ((int32_t)ls->n > 0 || L->top-1 != bcread_oldtop(L, ls)) 449 if (ls->p < ls->pe || L->top-1 != bcread_oldtop(L, ls))
471 bcread_error(ls, LJ_ERR_BCBAD); 450 bcread_error(ls, LJ_ERR_BCBAD);
472 /* Pop off last prototype. */ 451 /* Pop off last prototype. */
473 L->top--; 452 L->top--;
diff --git a/src/lj_bcwrite.c b/src/lj_bcwrite.c
index 4805d515..c95086c7 100644
--- a/src/lj_bcwrite.c
+++ b/src/lj_bcwrite.c
@@ -8,7 +8,7 @@
8 8
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_str.h" 11#include "lj_buf.h"
12#include "lj_bc.h" 12#include "lj_bc.h"
13#if LJ_HASFFI 13#if LJ_HASFFI
14#include "lj_ctype.h" 14#include "lj_ctype.h"
@@ -17,13 +17,13 @@
17#include "lj_dispatch.h" 17#include "lj_dispatch.h"
18#include "lj_jit.h" 18#include "lj_jit.h"
19#endif 19#endif
20#include "lj_strfmt.h"
20#include "lj_bcdump.h" 21#include "lj_bcdump.h"
21#include "lj_vm.h" 22#include "lj_vm.h"
22 23
23/* Context for bytecode writer. */ 24/* Context for bytecode writer. */
24typedef struct BCWriteCtx { 25typedef struct BCWriteCtx {
25 SBuf sb; /* Output buffer. */ 26 SBuf sb; /* Output buffer. */
26 lua_State *L; /* Lua state. */
27 GCproto *pt; /* Root prototype. */ 27 GCproto *pt; /* Root prototype. */
28 lua_Writer wfunc; /* Writer callback. */ 28 lua_Writer wfunc; /* Writer callback. */
29 void *wdata; /* Writer callback data. */ 29 void *wdata; /* Writer callback data. */
@@ -31,85 +31,44 @@ typedef struct BCWriteCtx {
31 int status; /* Status from writer callback. */ 31 int status; /* Status from writer callback. */
32} BCWriteCtx; 32} BCWriteCtx;
33 33
34/* -- Output buffer handling ---------------------------------------------- */
35
36/* Resize buffer if needed. */
37static LJ_NOINLINE void bcwrite_resize(BCWriteCtx *ctx, MSize len)
38{
39 MSize sz = ctx->sb.sz * 2;
40 while (ctx->sb.n + len > sz) sz = sz * 2;
41 lj_str_resizebuf(ctx->L, &ctx->sb, sz);
42}
43
44/* Need a certain amount of buffer space. */
45static LJ_AINLINE void bcwrite_need(BCWriteCtx *ctx, MSize len)
46{
47 if (LJ_UNLIKELY(ctx->sb.n + len > ctx->sb.sz))
48 bcwrite_resize(ctx, len);
49}
50
51/* Add memory block to buffer. */
52static void bcwrite_block(BCWriteCtx *ctx, const void *p, MSize len)
53{
54 uint8_t *q = (uint8_t *)(ctx->sb.buf + ctx->sb.n);
55 MSize i;
56 ctx->sb.n += len;
57 for (i = 0; i < len; i++) q[i] = ((uint8_t *)p)[i];
58}
59
60/* Add byte to buffer. */
61static LJ_AINLINE void bcwrite_byte(BCWriteCtx *ctx, uint8_t b)
62{
63 ctx->sb.buf[ctx->sb.n++] = b;
64}
65
66/* Add ULEB128 value to buffer. */
67static void bcwrite_uleb128(BCWriteCtx *ctx, uint32_t v)
68{
69 MSize n = ctx->sb.n;
70 uint8_t *p = (uint8_t *)ctx->sb.buf;
71 for (; v >= 0x80; v >>= 7)
72 p[n++] = (uint8_t)((v & 0x7f) | 0x80);
73 p[n++] = (uint8_t)v;
74 ctx->sb.n = n;
75}
76
77/* -- Bytecode writer ----------------------------------------------------- */ 34/* -- Bytecode writer ----------------------------------------------------- */
78 35
79/* Write a single constant key/value of a template table. */ 36/* Write a single constant key/value of a template table. */
80static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow) 37static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow)
81{ 38{
82 bcwrite_need(ctx, 1+10); 39 char *p = lj_buf_more(&ctx->sb, 1+10);
83 if (tvisstr(o)) { 40 if (tvisstr(o)) {
84 const GCstr *str = strV(o); 41 const GCstr *str = strV(o);
85 MSize len = str->len; 42 MSize len = str->len;
86 bcwrite_need(ctx, 5+len); 43 p = lj_buf_more(&ctx->sb, 5+len);
87 bcwrite_uleb128(ctx, BCDUMP_KTAB_STR+len); 44 p = lj_strfmt_wuleb128(p, BCDUMP_KTAB_STR+len);
88 bcwrite_block(ctx, strdata(str), len); 45 p = lj_buf_wmem(p, strdata(str), len);
89 } else if (tvisint(o)) { 46 } else if (tvisint(o)) {
90 bcwrite_byte(ctx, BCDUMP_KTAB_INT); 47 *p++ = BCDUMP_KTAB_INT;
91 bcwrite_uleb128(ctx, intV(o)); 48 p = lj_strfmt_wuleb128(p, intV(o));
92 } else if (tvisnum(o)) { 49 } else if (tvisnum(o)) {
93 if (!LJ_DUALNUM && narrow) { /* Narrow number constants to integers. */ 50 if (!LJ_DUALNUM && narrow) { /* Narrow number constants to integers. */
94 lua_Number num = numV(o); 51 lua_Number num = numV(o);
95 int32_t k = lj_num2int(num); 52 int32_t k = lj_num2int(num);
96 if (num == (lua_Number)k) { /* -0 is never a constant. */ 53 if (num == (lua_Number)k) { /* -0 is never a constant. */
97 bcwrite_byte(ctx, BCDUMP_KTAB_INT); 54 *p++ = BCDUMP_KTAB_INT;
98 bcwrite_uleb128(ctx, k); 55 p = lj_strfmt_wuleb128(p, k);
56 setsbufP(&ctx->sb, p);
99 return; 57 return;
100 } 58 }
101 } 59 }
102 bcwrite_byte(ctx, BCDUMP_KTAB_NUM); 60 *p++ = BCDUMP_KTAB_NUM;
103 bcwrite_uleb128(ctx, o->u32.lo); 61 p = lj_strfmt_wuleb128(p, o->u32.lo);
104 bcwrite_uleb128(ctx, o->u32.hi); 62 p = lj_strfmt_wuleb128(p, o->u32.hi);
105 } else { 63 } else {
106 lua_assert(tvispri(o)); 64 lua_assert(tvispri(o));
107 bcwrite_byte(ctx, BCDUMP_KTAB_NIL+~itype(o)); 65 *p++ = BCDUMP_KTAB_NIL+~itype(o);
108 } 66 }
67 setsbufP(&ctx->sb, p);
109} 68}
110 69
111/* Write a template table. */ 70/* Write a template table. */
112static void bcwrite_ktab(BCWriteCtx *ctx, const GCtab *t) 71static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t)
113{ 72{
114 MSize narray = 0, nhash = 0; 73 MSize narray = 0, nhash = 0;
115 if (t->asize > 0) { /* Determine max. length of array part. */ 74 if (t->asize > 0) { /* Determine max. length of array part. */
@@ -127,8 +86,9 @@ static void bcwrite_ktab(BCWriteCtx *ctx, const GCtab *t)
127 nhash += !tvisnil(&node[i].val); 86 nhash += !tvisnil(&node[i].val);
128 } 87 }
129 /* Write number of array slots and hash slots. */ 88 /* Write number of array slots and hash slots. */
130 bcwrite_uleb128(ctx, narray); 89 p = lj_strfmt_wuleb128(p, narray);
131 bcwrite_uleb128(ctx, nhash); 90 p = lj_strfmt_wuleb128(p, nhash);
91 setsbufP(&ctx->sb, p);
132 if (narray) { /* Write array entries (may contain nil). */ 92 if (narray) { /* Write array entries (may contain nil). */
133 MSize i; 93 MSize i;
134 TValue *o = tvref(t->array); 94 TValue *o = tvref(t->array);
@@ -155,6 +115,7 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt)
155 for (i = 0; i < sizekgc; i++, kr++) { 115 for (i = 0; i < sizekgc; i++, kr++) {
156 GCobj *o = gcref(*kr); 116 GCobj *o = gcref(*kr);
157 MSize tp, need = 1; 117 MSize tp, need = 1;
118 char *p;
158 /* Determine constant type and needed size. */ 119 /* Determine constant type and needed size. */
159 if (o->gch.gct == ~LJ_TSTR) { 120 if (o->gch.gct == ~LJ_TSTR) {
160 tp = BCDUMP_KGC_STR + gco2str(o)->len; 121 tp = BCDUMP_KGC_STR + gco2str(o)->len;
@@ -181,24 +142,26 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt)
181 need = 1+2*5; 142 need = 1+2*5;
182 } 143 }
183 /* Write constant type. */ 144 /* Write constant type. */
184 bcwrite_need(ctx, need); 145 p = lj_buf_more(&ctx->sb, need);
185 bcwrite_uleb128(ctx, tp); 146 p = lj_strfmt_wuleb128(p, tp);
186 /* Write constant data (if any). */ 147 /* Write constant data (if any). */
187 if (tp >= BCDUMP_KGC_STR) { 148 if (tp >= BCDUMP_KGC_STR) {
188 bcwrite_block(ctx, strdata(gco2str(o)), gco2str(o)->len); 149 p = lj_buf_wmem(p, strdata(gco2str(o)), gco2str(o)->len);
189 } else if (tp == BCDUMP_KGC_TAB) { 150 } else if (tp == BCDUMP_KGC_TAB) {
190 bcwrite_ktab(ctx, gco2tab(o)); 151 bcwrite_ktab(ctx, p, gco2tab(o));
152 continue;
191#if LJ_HASFFI 153#if LJ_HASFFI
192 } else if (tp != BCDUMP_KGC_CHILD) { 154 } else if (tp != BCDUMP_KGC_CHILD) {
193 cTValue *p = (TValue *)cdataptr(gco2cd(o)); 155 cTValue *q = (TValue *)cdataptr(gco2cd(o));
194 bcwrite_uleb128(ctx, p[0].u32.lo); 156 p = lj_strfmt_wuleb128(p, q[0].u32.lo);
195 bcwrite_uleb128(ctx, p[0].u32.hi); 157 p = lj_strfmt_wuleb128(p, q[0].u32.hi);
196 if (tp == BCDUMP_KGC_COMPLEX) { 158 if (tp == BCDUMP_KGC_COMPLEX) {
197 bcwrite_uleb128(ctx, p[1].u32.lo); 159 p = lj_strfmt_wuleb128(p, q[1].u32.lo);
198 bcwrite_uleb128(ctx, p[1].u32.hi); 160 p = lj_strfmt_wuleb128(p, q[1].u32.hi);
199 } 161 }
200#endif 162#endif
201 } 163 }
164 setsbufP(&ctx->sb, p);
202 } 165 }
203} 166}
204 167
@@ -207,7 +170,7 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt)
207{ 170{
208 MSize i, sizekn = pt->sizekn; 171 MSize i, sizekn = pt->sizekn;
209 cTValue *o = mref(pt->k, TValue); 172 cTValue *o = mref(pt->k, TValue);
210 bcwrite_need(ctx, 10*sizekn); 173 char *p = lj_buf_more(&ctx->sb, 10*sizekn);
211 for (i = 0; i < sizekn; i++, o++) { 174 for (i = 0; i < sizekn; i++, o++) {
212 int32_t k; 175 int32_t k;
213 if (tvisint(o)) { 176 if (tvisint(o)) {
@@ -220,58 +183,58 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt)
220 k = lj_num2int(num); 183 k = lj_num2int(num);
221 if (num == (lua_Number)k) { /* -0 is never a constant. */ 184 if (num == (lua_Number)k) { /* -0 is never a constant. */
222 save_int: 185 save_int:
223 bcwrite_uleb128(ctx, 2*(uint32_t)k | ((uint32_t)k & 0x80000000u)); 186 p = lj_strfmt_wuleb128(p, 2*(uint32_t)k | ((uint32_t)k&0x80000000u));
224 if (k < 0) { 187 if (k < 0)
225 char *p = &ctx->sb.buf[ctx->sb.n-1]; 188 p[-1] = (p[-1] & 7) | ((k>>27) & 0x18);
226 *p = (*p & 7) | ((k>>27) & 0x18);
227 }
228 continue; 189 continue;
229 } 190 }
230 } 191 }
231 bcwrite_uleb128(ctx, 1+(2*o->u32.lo | (o->u32.lo & 0x80000000u))); 192 p = lj_strfmt_wuleb128(p, 1+(2*o->u32.lo | (o->u32.lo & 0x80000000u)));
232 if (o->u32.lo >= 0x80000000u) { 193 if (o->u32.lo >= 0x80000000u)
233 char *p = &ctx->sb.buf[ctx->sb.n-1]; 194 p[-1] = (p[-1] & 7) | ((o->u32.lo>>27) & 0x18);
234 *p = (*p & 7) | ((o->u32.lo>>27) & 0x18); 195 p = lj_strfmt_wuleb128(p, o->u32.hi);
235 }
236 bcwrite_uleb128(ctx, o->u32.hi);
237 } 196 }
238 } 197 }
198 setsbufP(&ctx->sb, p);
239} 199}
240 200
241/* Write bytecode instructions. */ 201/* Write bytecode instructions. */
242static void bcwrite_bytecode(BCWriteCtx *ctx, GCproto *pt) 202static char *bcwrite_bytecode(BCWriteCtx *ctx, char *p, GCproto *pt)
243{ 203{
244 MSize nbc = pt->sizebc-1; /* Omit the [JI]FUNC* header. */ 204 MSize nbc = pt->sizebc-1; /* Omit the [JI]FUNC* header. */
245#if LJ_HASJIT 205#if LJ_HASJIT
246 uint8_t *p = (uint8_t *)&ctx->sb.buf[ctx->sb.n]; 206 uint8_t *q = (uint8_t *)p;
247#endif 207#endif
248 bcwrite_block(ctx, proto_bc(pt)+1, nbc*(MSize)sizeof(BCIns)); 208 p = lj_buf_wmem(p, proto_bc(pt)+1, nbc*(MSize)sizeof(BCIns));
209 UNUSED(ctx);
249#if LJ_HASJIT 210#if LJ_HASJIT
250 /* Unpatch modified bytecode containing ILOOP/JLOOP etc. */ 211 /* Unpatch modified bytecode containing ILOOP/JLOOP etc. */
251 if ((pt->flags & PROTO_ILOOP) || pt->trace) { 212 if ((pt->flags & PROTO_ILOOP) || pt->trace) {
252 jit_State *J = L2J(ctx->L); 213 jit_State *J = L2J(sbufL(&ctx->sb));
253 MSize i; 214 MSize i;
254 for (i = 0; i < nbc; i++, p += sizeof(BCIns)) { 215 for (i = 0; i < nbc; i++, q += sizeof(BCIns)) {
255 BCOp op = (BCOp)p[LJ_ENDIAN_SELECT(0, 3)]; 216 BCOp op = (BCOp)q[LJ_ENDIAN_SELECT(0, 3)];
256 if (op == BC_IFORL || op == BC_IITERL || op == BC_ILOOP || 217 if (op == BC_IFORL || op == BC_IITERL || op == BC_ILOOP ||
257 op == BC_JFORI) { 218 op == BC_JFORI) {
258 p[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_IFORL+BC_FORL); 219 q[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_IFORL+BC_FORL);
259 } else if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) { 220 } else if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) {
260 BCReg rd = p[LJ_ENDIAN_SELECT(2, 1)] + (p[LJ_ENDIAN_SELECT(3, 0)] << 8); 221 BCReg rd = q[LJ_ENDIAN_SELECT(2, 1)] + (q[LJ_ENDIAN_SELECT(3, 0)] << 8);
261 BCIns ins = traceref(J, rd)->startins; 222 BCIns ins = traceref(J, rd)->startins;
262 p[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_JFORL+BC_FORL); 223 q[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_JFORL+BC_FORL);
263 p[LJ_ENDIAN_SELECT(2, 1)] = bc_c(ins); 224 q[LJ_ENDIAN_SELECT(2, 1)] = bc_c(ins);
264 p[LJ_ENDIAN_SELECT(3, 0)] = bc_b(ins); 225 q[LJ_ENDIAN_SELECT(3, 0)] = bc_b(ins);
265 } 226 }
266 } 227 }
267 } 228 }
268#endif 229#endif
230 return p;
269} 231}
270 232
271/* Write prototype. */ 233/* Write prototype. */
272static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt) 234static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
273{ 235{
274 MSize sizedbg = 0; 236 MSize sizedbg = 0;
237 char *p;
275 238
276 /* Recursively write children of prototype. */ 239 /* Recursively write children of prototype. */
277 if ((pt->flags & PROTO_CHILD)) { 240 if ((pt->flags & PROTO_CHILD)) {
@@ -285,31 +248,32 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
285 } 248 }
286 249
287 /* Start writing the prototype info to a buffer. */ 250 /* Start writing the prototype info to a buffer. */
288 lj_str_resetbuf(&ctx->sb); 251 p = lj_buf_need(&ctx->sb,
289 ctx->sb.n = 5; /* Leave room for final size. */ 252 5+4+6*5+(pt->sizebc-1)*(MSize)sizeof(BCIns)+pt->sizeuv*2);
290 bcwrite_need(ctx, 4+6*5+(pt->sizebc-1)*(MSize)sizeof(BCIns)+pt->sizeuv*2); 253 p += 5; /* Leave room for final size. */
291 254
292 /* Write prototype header. */ 255 /* Write prototype header. */
293 bcwrite_byte(ctx, (pt->flags & (PROTO_CHILD|PROTO_VARARG|PROTO_FFI))); 256 *p++ = (pt->flags & (PROTO_CHILD|PROTO_VARARG|PROTO_FFI));
294 bcwrite_byte(ctx, pt->numparams); 257 *p++ = pt->numparams;
295 bcwrite_byte(ctx, pt->framesize); 258 *p++ = pt->framesize;
296 bcwrite_byte(ctx, pt->sizeuv); 259 *p++ = pt->sizeuv;
297 bcwrite_uleb128(ctx, pt->sizekgc); 260 p = lj_strfmt_wuleb128(p, pt->sizekgc);
298 bcwrite_uleb128(ctx, pt->sizekn); 261 p = lj_strfmt_wuleb128(p, pt->sizekn);
299 bcwrite_uleb128(ctx, pt->sizebc-1); 262 p = lj_strfmt_wuleb128(p, pt->sizebc-1);
300 if (!ctx->strip) { 263 if (!ctx->strip) {
301 if (proto_lineinfo(pt)) 264 if (proto_lineinfo(pt))
302 sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt); 265 sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt);
303 bcwrite_uleb128(ctx, sizedbg); 266 p = lj_strfmt_wuleb128(p, sizedbg);
304 if (sizedbg) { 267 if (sizedbg) {
305 bcwrite_uleb128(ctx, pt->firstline); 268 p = lj_strfmt_wuleb128(p, pt->firstline);
306 bcwrite_uleb128(ctx, pt->numline); 269 p = lj_strfmt_wuleb128(p, pt->numline);
307 } 270 }
308 } 271 }
309 272
310 /* Write bytecode instructions and upvalue refs. */ 273 /* Write bytecode instructions and upvalue refs. */
311 bcwrite_bytecode(ctx, pt); 274 p = bcwrite_bytecode(ctx, p, pt);
312 bcwrite_block(ctx, proto_uv(pt), pt->sizeuv*2); 275 p = lj_buf_wmem(p, proto_uv(pt), pt->sizeuv*2);
276 setsbufP(&ctx->sb, p);
313 277
314 /* Write constants. */ 278 /* Write constants. */
315 bcwrite_kgc(ctx, pt); 279 bcwrite_kgc(ctx, pt);
@@ -317,18 +281,19 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
317 281
318 /* Write debug info, if not stripped. */ 282 /* Write debug info, if not stripped. */
319 if (sizedbg) { 283 if (sizedbg) {
320 bcwrite_need(ctx, sizedbg); 284 p = lj_buf_more(&ctx->sb, sizedbg);
321 bcwrite_block(ctx, proto_lineinfo(pt), sizedbg); 285 p = lj_buf_wmem(p, proto_lineinfo(pt), sizedbg);
286 setsbufP(&ctx->sb, p);
322 } 287 }
323 288
324 /* Pass buffer to writer function. */ 289 /* Pass buffer to writer function. */
325 if (ctx->status == 0) { 290 if (ctx->status == 0) {
326 MSize n = ctx->sb.n - 5; 291 MSize n = sbuflen(&ctx->sb) - 5;
327 MSize nn = (lj_fls(n)+8)*9 >> 6; 292 MSize nn = (lj_fls(n)+8)*9 >> 6;
328 ctx->sb.n = 5 - nn; 293 char *q = sbufB(&ctx->sb) + (5 - nn);
329 bcwrite_uleb128(ctx, n); /* Fill in final size. */ 294 p = lj_strfmt_wuleb128(q, n); /* Fill in final size. */
330 lua_assert(ctx->sb.n == 5); 295 lua_assert(p == sbufB(&ctx->sb) + 5);
331 ctx->status = ctx->wfunc(ctx->L, ctx->sb.buf+5-nn, nn+n, ctx->wdata); 296 ctx->status = ctx->wfunc(sbufL(&ctx->sb), q, nn+n, ctx->wdata);
332 } 297 }
333} 298}
334 299
@@ -338,20 +303,20 @@ static void bcwrite_header(BCWriteCtx *ctx)
338 GCstr *chunkname = proto_chunkname(ctx->pt); 303 GCstr *chunkname = proto_chunkname(ctx->pt);
339 const char *name = strdata(chunkname); 304 const char *name = strdata(chunkname);
340 MSize len = chunkname->len; 305 MSize len = chunkname->len;
341 lj_str_resetbuf(&ctx->sb); 306 char *p = lj_buf_need(&ctx->sb, 5+5+len);
342 bcwrite_need(ctx, 5+5+len); 307 *p++ = BCDUMP_HEAD1;
343 bcwrite_byte(ctx, BCDUMP_HEAD1); 308 *p++ = BCDUMP_HEAD2;
344 bcwrite_byte(ctx, BCDUMP_HEAD2); 309 *p++ = BCDUMP_HEAD3;
345 bcwrite_byte(ctx, BCDUMP_HEAD3); 310 *p++ = BCDUMP_VERSION;
346 bcwrite_byte(ctx, BCDUMP_VERSION); 311 *p++ = (ctx->strip ? BCDUMP_F_STRIP : 0) +
347 bcwrite_byte(ctx, (ctx->strip ? BCDUMP_F_STRIP : 0) + 312 (LJ_BE ? BCDUMP_F_BE : 0) +
348 (LJ_BE ? BCDUMP_F_BE : 0) + 313 ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0);
349 ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0));
350 if (!ctx->strip) { 314 if (!ctx->strip) {
351 bcwrite_uleb128(ctx, len); 315 p = lj_strfmt_wuleb128(p, len);
352 bcwrite_block(ctx, name, len); 316 p = lj_buf_wmem(p, name, len);
353 } 317 }
354 ctx->status = ctx->wfunc(ctx->L, ctx->sb.buf, ctx->sb.n, ctx->wdata); 318 ctx->status = ctx->wfunc(sbufL(&ctx->sb), sbufB(&ctx->sb),
319 (MSize)(p - sbufB(&ctx->sb)), ctx->wdata);
355} 320}
356 321
357/* Write footer of bytecode dump. */ 322/* Write footer of bytecode dump. */
@@ -359,7 +324,7 @@ static void bcwrite_footer(BCWriteCtx *ctx)
359{ 324{
360 if (ctx->status == 0) { 325 if (ctx->status == 0) {
361 uint8_t zero = 0; 326 uint8_t zero = 0;
362 ctx->status = ctx->wfunc(ctx->L, &zero, 1, ctx->wdata); 327 ctx->status = ctx->wfunc(sbufL(&ctx->sb), &zero, 1, ctx->wdata);
363 } 328 }
364} 329}
365 330
@@ -367,8 +332,8 @@ static void bcwrite_footer(BCWriteCtx *ctx)
367static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud) 332static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud)
368{ 333{
369 BCWriteCtx *ctx = (BCWriteCtx *)ud; 334 BCWriteCtx *ctx = (BCWriteCtx *)ud;
370 UNUSED(dummy); 335 UNUSED(L); UNUSED(dummy);
371 lj_str_resizebuf(L, &ctx->sb, 1024); /* Avoids resize for most prototypes. */ 336 lj_buf_need(&ctx->sb, 1024); /* Avoids resize for most prototypes. */
372 bcwrite_header(ctx); 337 bcwrite_header(ctx);
373 bcwrite_proto(ctx, ctx->pt); 338 bcwrite_proto(ctx, ctx->pt);
374 bcwrite_footer(ctx); 339 bcwrite_footer(ctx);
@@ -381,16 +346,15 @@ int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data,
381{ 346{
382 BCWriteCtx ctx; 347 BCWriteCtx ctx;
383 int status; 348 int status;
384 ctx.L = L;
385 ctx.pt = pt; 349 ctx.pt = pt;
386 ctx.wfunc = writer; 350 ctx.wfunc = writer;
387 ctx.wdata = data; 351 ctx.wdata = data;
388 ctx.strip = strip; 352 ctx.strip = strip;
389 ctx.status = 0; 353 ctx.status = 0;
390 lj_str_initbuf(&ctx.sb); 354 lj_buf_init(L, &ctx.sb);
391 status = lj_vm_cpcall(L, NULL, &ctx, cpwriter); 355 status = lj_vm_cpcall(L, NULL, &ctx, cpwriter);
392 if (status == 0) status = ctx.status; 356 if (status == 0) status = ctx.status;
393 lj_str_freebuf(G(ctx.L), &ctx.sb); 357 lj_buf_free(G(sbufL(&ctx.sb)), &ctx.sb);
394 return status; 358 return status;
395} 359}
396 360
diff --git a/src/lj_buf.c b/src/lj_buf.c
new file mode 100644
index 00000000..1786c10d
--- /dev/null
+++ b/src/lj_buf.c
@@ -0,0 +1,222 @@
1/*
2** Buffer handling.
3** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#include <stdio.h>
7
8#define lj_buf_c
9#define LUA_CORE
10
11#include "lj_obj.h"
12#include "lj_gc.h"
13#include "lj_err.h"
14#include "lj_buf.h"
15#include "lj_str.h"
16#include "lj_tab.h"
17#include "lj_strfmt.h"
18
19/* -- Buffer management --------------------------------------------------- */
20
21LJ_NOINLINE void LJ_FASTCALL lj_buf_grow(SBuf *sb, char *en)
22{
23 lua_State *L = sbufL(sb);
24 char *b = sbufB(sb);
25 MSize sz = (MSize)(en - b);
26 MSize osz = (MSize)(sbufE(sb) - b), nsz = osz;
27 MSize n = (MSize)(sbufP(sb) - b);
28 if (LJ_UNLIKELY(sz > LJ_MAX_MEM))
29 lj_err_mem(L);
30 if (nsz < LJ_MIN_SBUF) nsz = LJ_MIN_SBUF;
31 while (nsz < sz) nsz += nsz;
32 b = (char *)lj_mem_realloc(L, b, osz, nsz);
33 setmref(sb->b, b);
34 setmref(sb->p, b + n);
35 setmref(sb->e, b + nsz);
36}
37
38void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb)
39{
40 char *b = sbufB(sb);
41 MSize osz = (MSize)(sbufE(sb) - b);
42 if (osz > 2*LJ_MIN_SBUF) {
43 MSize n = (MSize)(sbufP(sb) - b);
44 b = lj_mem_realloc(L, b, osz, (osz >> 1));
45 setmref(sb->b, b);
46 setmref(sb->p, b + n);
47 setmref(sb->e, b + (osz >> 1));
48 }
49}
50
51char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz)
52{
53 SBuf *sb = &G(L)->tmpbuf;
54 setsbufL(sb, L);
55 return lj_buf_need(sb, sz);
56}
57
58/* -- Low-level buffer put operations ------------------------------------- */
59
60SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len)
61{
62 char *p = lj_buf_more(sb, len);
63 p = lj_buf_wmem(p, q, len);
64 setsbufP(sb, p);
65 return sb;
66}
67
68#if LJ_HASJIT
69SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c)
70{
71 char *p = lj_buf_more(sb, 1);
72 *p++ = (char)c;
73 setsbufP(sb, p);
74 return sb;
75}
76#endif
77
78SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s)
79{
80 MSize len = s->len;
81 char *p = lj_buf_more(sb, len);
82 p = lj_buf_wmem(p, strdata(s), len);
83 setsbufP(sb, p);
84 return sb;
85}
86
87/* -- High-level buffer put operations ------------------------------------ */
88
89SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s)
90{
91 MSize len = s->len;
92 char *p = lj_buf_more(sb, len), *e = p+len;
93 const char *q = strdata(s)+len-1;
94 while (p < e)
95 *p++ = *q--;
96 setsbufP(sb, p);
97 return sb;
98}
99
100SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s)
101{
102 MSize len = s->len;
103 char *p = lj_buf_more(sb, len), *e = p+len;
104 const char *q = strdata(s);
105 for (; p < e; p++, q++) {
106 uint32_t c = *(unsigned char *)q;
107#if LJ_TARGET_PPC
108 *p = c + ((c >= 'A' && c <= 'Z') << 5);
109#else
110 if (c >= 'A' && c <= 'Z') c += 0x20;
111 *p = c;
112#endif
113 }
114 setsbufP(sb, p);
115 return sb;
116}
117
118SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s)
119{
120 MSize len = s->len;
121 char *p = lj_buf_more(sb, len), *e = p+len;
122 const char *q = strdata(s);
123 for (; p < e; p++, q++) {
124 uint32_t c = *(unsigned char *)q;
125#if LJ_TARGET_PPC
126 *p = c - ((c >= 'a' && c <= 'z') << 5);
127#else
128 if (c >= 'a' && c <= 'z') c -= 0x20;
129 *p = c;
130#endif
131 }
132 setsbufP(sb, p);
133 return sb;
134}
135
136SBuf *lj_buf_putstr_rep(SBuf *sb, GCstr *s, int32_t rep)
137{
138 MSize len = s->len;
139 if (rep > 0 && len) {
140 uint64_t tlen = (uint64_t)rep * len;
141 char *p;
142 if (LJ_UNLIKELY(tlen > LJ_MAX_STR))
143 lj_err_mem(sbufL(sb));
144 p = lj_buf_more(sb, (MSize)tlen);
145 if (len == 1) { /* Optimize a common case. */
146 uint32_t c = strdata(s)[0];
147 do { *p++ = c; } while (--rep > 0);
148 } else {
149 const char *e = strdata(s) + len;
150 do {
151 const char *q = strdata(s);
152 do { *p++ = *q++; } while (q < e);
153 } while (--rep > 0);
154 }
155 setsbufP(sb, p);
156 }
157 return sb;
158}
159
160SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep, int32_t i, int32_t e)
161{
162 MSize seplen = sep ? sep->len : 0;
163 if (i <= e) {
164 for (;;) {
165 cTValue *o = lj_tab_getint(t, i);
166 char *p;
167 if (!o) {
168 badtype: /* Error: bad element type. */
169 setsbufP(sb, (intptr_t)i); /* Store failing index. */
170 return NULL;
171 } else if (tvisstr(o)) {
172 MSize len = strV(o)->len;
173 p = lj_buf_wmem(lj_buf_more(sb, len + seplen), strVdata(o), len);
174 } else if (tvisint(o)) {
175 p = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT+seplen), intV(o));
176 } else if (tvisnum(o)) {
177 p = lj_strfmt_wnum(lj_buf_more(sb, STRFMT_MAXBUF_NUM+seplen), o);
178 } else {
179 goto badtype;
180 }
181 if (i++ == e) {
182 setsbufP(sb, p);
183 break;
184 }
185 if (seplen) p = lj_buf_wmem(p, strdata(sep), seplen);
186 setsbufP(sb, p);
187 }
188 }
189 return sb;
190}
191
192/* -- Miscellaneous buffer operations ------------------------------------- */
193
194GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb)
195{
196 return lj_str_new(sbufL(sb), sbufB(sb), sbuflen(sb));
197}
198
199/* Concatenate two strings. */
200GCstr *lj_buf_cat2str(lua_State *L, GCstr *s1, GCstr *s2)
201{
202 MSize len1 = s1->len, len2 = s2->len;
203 char *buf = lj_buf_tmp(L, len1 + len2);
204 memcpy(buf, strdata(s1), len1);
205 memcpy(buf+len1, strdata(s2), len2);
206 return lj_str_new(L, buf, len1 + len2);
207}
208
209/* Read ULEB128 from buffer. */
210uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp)
211{
212 const uint8_t *p = (const uint8_t *)*pp;
213 uint32_t v = *p++;
214 if (LJ_UNLIKELY(v >= 0x80)) {
215 int sh = 0;
216 v &= 0x7f;
217 do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
218 }
219 *pp = (const char *)p;
220 return v;
221}
222
diff --git a/src/lj_buf.h b/src/lj_buf.h
new file mode 100644
index 00000000..5f78c4a9
--- /dev/null
+++ b/src/lj_buf.h
@@ -0,0 +1,105 @@
1/*
2** Buffer handling.
3** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_BUF_H
7#define _LJ_BUF_H
8
9#include "lj_obj.h"
10#include "lj_gc.h"
11#include "lj_str.h"
12
13/* Resizable string buffers. Struct definition in lj_obj.h. */
14#define sbufB(sb) (mref((sb)->b, char))
15#define sbufP(sb) (mref((sb)->p, char))
16#define sbufE(sb) (mref((sb)->e, char))
17#define sbufL(sb) (mref((sb)->L, lua_State))
18#define sbufsz(sb) ((MSize)(sbufE((sb)) - sbufB((sb))))
19#define sbuflen(sb) ((MSize)(sbufP((sb)) - sbufB((sb))))
20#define setsbufP(sb, q) (setmref((sb)->p, (q)))
21#define setsbufL(sb, l) (setmref((sb)->L, (l)))
22
23/* Buffer management */
24LJ_FUNC void LJ_FASTCALL lj_buf_grow(SBuf *sb, char *en);
25LJ_FUNC void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb);
26LJ_FUNC char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz);
27
28static LJ_AINLINE void lj_buf_init(lua_State *L, SBuf *sb)
29{
30 setsbufL(sb, L);
31 setmref(sb->p, NULL); setmref(sb->e, NULL); setmref(sb->b, NULL);
32}
33
34static LJ_AINLINE void lj_buf_reset(SBuf *sb)
35{
36 setmrefr(sb->p, sb->b);
37}
38
39static LJ_AINLINE SBuf *lj_buf_tmp_(lua_State *L)
40{
41 SBuf *sb = &G(L)->tmpbuf;
42 setsbufL(sb, L);
43 lj_buf_reset(sb);
44 return sb;
45}
46
47static LJ_AINLINE void lj_buf_free(global_State *g, SBuf *sb)
48{
49 lj_mem_free(g, sbufB(sb), sbufsz(sb));
50}
51
52static LJ_AINLINE char *lj_buf_need(SBuf *sb, MSize sz)
53{
54 char *en = sbufB(sb) + sz;
55 if (LJ_UNLIKELY(en > sbufE(sb)))
56 lj_buf_grow(sb, en);
57 return sbufB(sb);
58}
59
60static LJ_AINLINE char *lj_buf_more(SBuf *sb, MSize sz)
61{
62 char *en = sbufP(sb) + sz;
63 if (LJ_UNLIKELY(en > sbufE(sb)))
64 lj_buf_grow(sb, en);
65 return sbufP(sb);
66}
67
68/* Low-level buffer put operations */
69LJ_FUNC SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len);
70#if LJ_HASJIT
71LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c);
72#endif
73LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s);
74
75static LJ_AINLINE char *lj_buf_wmem(char *p, const void *q, MSize len)
76{
77 return (char *)memcpy(p, q, len) + len;
78}
79
80static LJ_AINLINE void lj_buf_putb(SBuf *sb, int c)
81{
82 char *p = lj_buf_more(sb, 1);
83 *p++ = (char)c;
84 setsbufP(sb, p);
85}
86
87/* High-level buffer put operations */
88LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s);
89LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s);
90LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s);
91LJ_FUNC SBuf *lj_buf_putstr_rep(SBuf *sb, GCstr *s, int32_t rep);
92LJ_FUNC SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep,
93 int32_t i, int32_t e);
94
95/* Miscellaneous buffer operations */
96LJ_FUNCA GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb);
97LJ_FUNC GCstr *lj_buf_cat2str(lua_State *L, GCstr *s1, GCstr *s2);
98LJ_FUNC uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp);
99
100static LJ_AINLINE GCstr *lj_buf_str(lua_State *L, SBuf *sb)
101{
102 return lj_str_new(L, sbufB(sb), sbuflen(sb));
103}
104
105#endif
diff --git a/src/lj_carith.c b/src/lj_carith.c
index afe7e682..bb810af8 100644
--- a/src/lj_carith.c
+++ b/src/lj_carith.c
@@ -11,10 +11,12 @@
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_tab.h" 12#include "lj_tab.h"
13#include "lj_meta.h" 13#include "lj_meta.h"
14#include "lj_ir.h"
14#include "lj_ctype.h" 15#include "lj_ctype.h"
15#include "lj_cconv.h" 16#include "lj_cconv.h"
16#include "lj_cdata.h" 17#include "lj_cdata.h"
17#include "lj_carith.h" 18#include "lj_carith.h"
19#include "lj_strscan.h"
18 20
19/* -- C data arithmetic --------------------------------------------------- */ 21/* -- C data arithmetic --------------------------------------------------- */
20 22
@@ -270,6 +272,80 @@ int lj_carith_op(lua_State *L, MMS mm)
270 return lj_carith_meta(L, cts, &ca, mm); 272 return lj_carith_meta(L, cts, &ca, mm);
271} 273}
272 274
275/* -- 64 bit bit operations helpers --------------------------------------- */
276
277#if LJ_64
278#define B64DEF(name) \
279 static LJ_AINLINE uint64_t lj_carith_##name(uint64_t x, int32_t sh)
280#else
281/* Not inlined on 32 bit archs, since some of these are quite lengthy. */
282#define B64DEF(name) \
283 uint64_t LJ_NOINLINE lj_carith_##name(uint64_t x, int32_t sh)
284#endif
285
286B64DEF(shl64) { return x << (sh&63); }
287B64DEF(shr64) { return x >> (sh&63); }
288B64DEF(sar64) { return (uint64_t)((int64_t)x >> (sh&63)); }
289B64DEF(rol64) { return lj_rol(x, (sh&63)); }
290B64DEF(ror64) { return lj_ror(x, (sh&63)); }
291
292#undef B64DEF
293
294uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op)
295{
296 switch (op) {
297 case IR_BSHL-IR_BSHL: x = lj_carith_shl64(x, sh); break;
298 case IR_BSHR-IR_BSHL: x = lj_carith_shr64(x, sh); break;
299 case IR_BSAR-IR_BSHL: x = lj_carith_sar64(x, sh); break;
300 case IR_BROL-IR_BSHL: x = lj_carith_rol64(x, sh); break;
301 case IR_BROR-IR_BSHL: x = lj_carith_ror64(x, sh); break;
302 default: lua_assert(0); break;
303 }
304 return x;
305}
306
307/* Equivalent to lj_lib_checkbit(), but handles cdata. */
308uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id)
309{
310 TValue *o = L->base + narg-1;
311 if (o >= L->top) {
312 err:
313 lj_err_argt(L, narg, LUA_TNUMBER);
314 } else if (LJ_LIKELY(tvisnumber(o))) {
315 /* Handled below. */
316 } else if (tviscdata(o)) {
317 CTState *cts = ctype_cts(L);
318 uint8_t *sp = (uint8_t *)cdataptr(cdataV(o));
319 CTypeID sid = cdataV(o)->ctypeid;
320 CType *s = ctype_get(cts, sid);
321 uint64_t x;
322 if (ctype_isref(s->info)) {
323 sp = *(void **)sp;
324 sid = ctype_cid(s->info);
325 }
326 s = ctype_raw(cts, sid);
327 if (ctype_isenum(s->info)) s = ctype_child(cts, s);
328 if ((s->info & (CTMASK_NUM|CTF_BOOL|CTF_FP|CTF_UNSIGNED)) ==
329 CTINFO(CT_NUM, CTF_UNSIGNED) && s->size == 8)
330 *id = CTID_UINT64; /* Use uint64_t, since it has the highest rank. */
331 else if (!*id)
332 *id = CTID_INT64; /* Use int64_t, unless already set. */
333 lj_cconv_ct_ct(cts, ctype_get(cts, *id), s,
334 (uint8_t *)&x, sp, CCF_ARG(narg));
335 return x;
336 } else if (!(tvisstr(o) && lj_strscan_number(strV(o), o))) {
337 goto err;
338 }
339 if (LJ_LIKELY(tvisint(o))) {
340 return (uint32_t)intV(o);
341 } else {
342 int32_t i = lj_num2bit(numV(o));
343 if (LJ_DUALNUM) setintV(o, i);
344 return (uint32_t)i;
345 }
346}
347
348
273/* -- 64 bit integer arithmetic helpers ----------------------------------- */ 349/* -- 64 bit integer arithmetic helpers ----------------------------------- */
274 350
275#if LJ_32 && LJ_HASJIT 351#if LJ_32 && LJ_HASJIT
diff --git a/src/lj_carith.h b/src/lj_carith.h
index ae17df00..b1a65d35 100644
--- a/src/lj_carith.h
+++ b/src/lj_carith.h
@@ -12,6 +12,16 @@
12 12
13LJ_FUNC int lj_carith_op(lua_State *L, MMS mm); 13LJ_FUNC int lj_carith_op(lua_State *L, MMS mm);
14 14
15#if LJ_32
16LJ_FUNC uint64_t lj_carith_shl64(uint64_t x, int32_t sh);
17LJ_FUNC uint64_t lj_carith_shr64(uint64_t x, int32_t sh);
18LJ_FUNC uint64_t lj_carith_sar64(uint64_t x, int32_t sh);
19LJ_FUNC uint64_t lj_carith_rol64(uint64_t x, int32_t sh);
20LJ_FUNC uint64_t lj_carith_ror64(uint64_t x, int32_t sh);
21#endif
22LJ_FUNC uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op);
23LJ_FUNC uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id);
24
15#if LJ_32 && LJ_HASJIT 25#if LJ_32 && LJ_HASJIT
16LJ_FUNC int64_t lj_carith_mul64(int64_t x, int64_t k); 26LJ_FUNC int64_t lj_carith_mul64(int64_t x, int64_t k);
17#endif 27#endif
diff --git a/src/lj_ccall.c b/src/lj_ccall.c
index eb73604f..995729b0 100644
--- a/src/lj_ccall.c
+++ b/src/lj_ccall.c
@@ -9,7 +9,6 @@
9 9
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_str.h"
13#include "lj_tab.h" 12#include "lj_tab.h"
14#include "lj_ctype.h" 13#include "lj_ctype.h"
15#include "lj_cconv.h" 14#include "lj_cconv.h"
diff --git a/src/lj_cdata.c b/src/lj_cdata.c
index 10f4809c..aa3bd1a5 100644
--- a/src/lj_cdata.c
+++ b/src/lj_cdata.c
@@ -9,7 +9,6 @@
9 9
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_str.h"
13#include "lj_tab.h" 12#include "lj_tab.h"
14#include "lj_ctype.h" 13#include "lj_ctype.h"
15#include "lj_cconv.h" 14#include "lj_cconv.h"
@@ -27,12 +26,12 @@ GCcdata *lj_cdata_newref(CTState *cts, const void *p, CTypeID id)
27} 26}
28 27
29/* Allocate variable-sized or specially aligned C data object. */ 28/* Allocate variable-sized or specially aligned C data object. */
30GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, CTSize align) 29GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz, CTSize align)
31{ 30{
32 global_State *g; 31 global_State *g;
33 MSize extra = sizeof(GCcdataVar) + sizeof(GCcdata) + 32 MSize extra = sizeof(GCcdataVar) + sizeof(GCcdata) +
34 (align > CT_MEMALIGN ? (1u<<align) - (1u<<CT_MEMALIGN) : 0); 33 (align > CT_MEMALIGN ? (1u<<align) - (1u<<CT_MEMALIGN) : 0);
35 char *p = lj_mem_newt(cts->L, extra + sz, char); 34 char *p = lj_mem_newt(L, extra + sz, char);
36 uintptr_t adata = (uintptr_t)p + sizeof(GCcdataVar) + sizeof(GCcdata); 35 uintptr_t adata = (uintptr_t)p + sizeof(GCcdataVar) + sizeof(GCcdata);
37 uintptr_t almask = (1u << align) - 1u; 36 uintptr_t almask = (1u << align) - 1u;
38 GCcdata *cd = (GCcdata *)(((adata + almask) & ~almask) - sizeof(GCcdata)); 37 GCcdata *cd = (GCcdata *)(((adata + almask) & ~almask) - sizeof(GCcdata));
@@ -40,7 +39,7 @@ GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, CTSize align)
40 cdatav(cd)->offset = (uint16_t)((char *)cd - p); 39 cdatav(cd)->offset = (uint16_t)((char *)cd - p);
41 cdatav(cd)->extra = extra; 40 cdatav(cd)->extra = extra;
42 cdatav(cd)->len = sz; 41 cdatav(cd)->len = sz;
43 g = cts->g; 42 g = G(L);
44 setgcrefr(cd->nextgc, g->gc.root); 43 setgcrefr(cd->nextgc, g->gc.root);
45 setgcref(g->gc.root, obj2gco(cd)); 44 setgcref(g->gc.root, obj2gco(cd));
46 newwhite(g, obj2gco(cd)); 45 newwhite(g, obj2gco(cd));
@@ -76,21 +75,20 @@ void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd)
76 } 75 }
77} 76}
78 77
79TValue * LJ_FASTCALL lj_cdata_setfin(lua_State *L, GCcdata *cd) 78void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj, uint32_t it)
80{ 79{
81 global_State *g = G(L); 80 GCtab *t = ctype_ctsG(G(L))->finalizer;
82 GCtab *t = ctype_ctsG(g)->finalizer;
83 if (gcref(t->metatable)) { 81 if (gcref(t->metatable)) {
84 /* Add cdata to finalizer table, if still enabled. */ 82 /* Add cdata to finalizer table, if still enabled. */
85 TValue *tv, tmp; 83 TValue *tv, tmp;
86 setcdataV(L, &tmp, cd); 84 setcdataV(L, &tmp, cd);
87 lj_gc_anybarriert(L, t); 85 lj_gc_anybarriert(L, t);
88 tv = lj_tab_set(L, t, &tmp); 86 tv = lj_tab_set(L, t, &tmp);
89 cd->marked |= LJ_GC_CDATA_FIN; 87 setgcV(L, tv, obj, it);
90 return tv; 88 if (!tvisnil(tv))
91 } else { 89 cd->marked |= LJ_GC_CDATA_FIN;
92 /* Otherwise return dummy TValue. */ 90 else
93 return &g->tmptv; 91 cd->marked &= ~LJ_GC_CDATA_FIN;
94 } 92 }
95} 93}
96 94
diff --git a/src/lj_cdata.h b/src/lj_cdata.h
index 0c81b02b..7db1ca1e 100644
--- a/src/lj_cdata.h
+++ b/src/lj_cdata.h
@@ -58,11 +58,12 @@ static LJ_AINLINE GCcdata *lj_cdata_new_(lua_State *L, CTypeID id, CTSize sz)
58} 58}
59 59
60LJ_FUNC GCcdata *lj_cdata_newref(CTState *cts, const void *pp, CTypeID id); 60LJ_FUNC GCcdata *lj_cdata_newref(CTState *cts, const void *pp, CTypeID id);
61LJ_FUNC GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, 61LJ_FUNC GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz,
62 CTSize align); 62 CTSize align);
63 63
64LJ_FUNC void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd); 64LJ_FUNC void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd);
65LJ_FUNCA TValue * LJ_FASTCALL lj_cdata_setfin(lua_State *L, GCcdata *cd); 65LJ_FUNC void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj,
66 uint32_t it);
66 67
67LJ_FUNC CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key, 68LJ_FUNC CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key,
68 uint8_t **pp, CTInfo *qual); 69 uint8_t **pp, CTInfo *qual);
diff --git a/src/lj_clib.c b/src/lj_clib.c
index 23d1f182..263028e4 100644
--- a/src/lj_clib.c
+++ b/src/lj_clib.c
@@ -16,6 +16,7 @@
16#include "lj_cconv.h" 16#include "lj_cconv.h"
17#include "lj_cdata.h" 17#include "lj_cdata.h"
18#include "lj_clib.h" 18#include "lj_clib.h"
19#include "lj_strfmt.h"
19 20
20/* -- OS-specific functions ----------------------------------------------- */ 21/* -- OS-specific functions ----------------------------------------------- */
21 22
@@ -61,7 +62,7 @@ static const char *clib_extname(lua_State *L, const char *name)
61#endif 62#endif
62 ) { 63 ) {
63 if (!strchr(name, '.')) { 64 if (!strchr(name, '.')) {
64 name = lj_str_pushf(L, CLIB_SOEXT, name); 65 name = lj_strfmt_pushf(L, CLIB_SOEXT, name);
65 L->top--; 66 L->top--;
66#ifdef __CYGWIN__ 67#ifdef __CYGWIN__
67 } else { 68 } else {
@@ -70,7 +71,7 @@ static const char *clib_extname(lua_State *L, const char *name)
70 } 71 }
71 if (!(name[0] == CLIB_SOPREFIX[0] && name[1] == CLIB_SOPREFIX[1] && 72 if (!(name[0] == CLIB_SOPREFIX[0] && name[1] == CLIB_SOPREFIX[1] &&
72 name[2] == CLIB_SOPREFIX[2])) { 73 name[2] == CLIB_SOPREFIX[2])) {
73 name = lj_str_pushf(L, CLIB_SOPREFIX "%s", name); 74 name = lj_strfmt_pushf(L, CLIB_SOPREFIX "%s", name);
74 L->top--; 75 L->top--;
75 } 76 }
76 } 77 }
@@ -178,7 +179,7 @@ LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt,
178 if (!FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM, 179 if (!FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM,
179 NULL, err, 0, buf, sizeof(buf), NULL)) 180 NULL, err, 0, buf, sizeof(buf), NULL))
180 buf[0] = '\0'; 181 buf[0] = '\0';
181 lj_err_callermsg(L, lj_str_pushf(L, fmt, name, buf)); 182 lj_err_callermsg(L, lj_strfmt_pushf(L, fmt, name, buf));
182} 183}
183 184
184static int clib_needext(const char *s) 185static int clib_needext(const char *s)
@@ -193,7 +194,7 @@ static int clib_needext(const char *s)
193static const char *clib_extname(lua_State *L, const char *name) 194static const char *clib_extname(lua_State *L, const char *name)
194{ 195{
195 if (clib_needext(name)) { 196 if (clib_needext(name)) {
196 name = lj_str_pushf(L, "%s.dll", name); 197 name = lj_strfmt_pushf(L, "%s.dll", name);
197 L->top--; 198 L->top--;
198 } 199 }
199 return name; 200 return name;
@@ -266,7 +267,7 @@ static void *clib_getsym(CLibrary *cl, const char *name)
266LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt, 267LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt,
267 const char *name) 268 const char *name)
268{ 269{
269 lj_err_callermsg(L, lj_str_pushf(L, fmt, name, "no support for this OS")); 270 lj_err_callermsg(L, lj_strfmt_pushf(L, fmt, name, "no support for this OS"));
270} 271}
271 272
272static void *clib_loadlib(lua_State *L, const char *name, int global) 273static void *clib_loadlib(lua_State *L, const char *name, int global)
@@ -350,7 +351,7 @@ TValue *lj_clib_index(lua_State *L, CLibrary *cl, GCstr *name)
350 CTInfo cconv = ctype_cconv(ct->info); 351 CTInfo cconv = ctype_cconv(ct->info);
351 if (cconv == CTCC_FASTCALL || cconv == CTCC_STDCALL) { 352 if (cconv == CTCC_FASTCALL || cconv == CTCC_STDCALL) {
352 CTSize sz = clib_func_argsize(cts, ct); 353 CTSize sz = clib_func_argsize(cts, ct);
353 const char *symd = lj_str_pushf(L, 354 const char *symd = lj_strfmt_pushf(L,
354 cconv == CTCC_FASTCALL ? "@%s@%d" : "_%s@%d", 355 cconv == CTCC_FASTCALL ? "@%s@%d" : "_%s@%d",
355 sym, sz); 356 sym, sz);
356 L->top--; 357 L->top--;
diff --git a/src/lj_cparse.c b/src/lj_cparse.c
index 107c0381..4392b7c4 100644
--- a/src/lj_cparse.c
+++ b/src/lj_cparse.c
@@ -9,13 +9,14 @@
9 9
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_str.h" 12#include "lj_buf.h"
13#include "lj_ctype.h" 13#include "lj_ctype.h"
14#include "lj_cparse.h" 14#include "lj_cparse.h"
15#include "lj_frame.h" 15#include "lj_frame.h"
16#include "lj_vm.h" 16#include "lj_vm.h"
17#include "lj_char.h" 17#include "lj_char.h"
18#include "lj_strscan.h" 18#include "lj_strscan.h"
19#include "lj_strfmt.h"
19 20
20/* 21/*
21** Important note: this is NOT a validating C parser! This is a minimal 22** Important note: this is NOT a validating C parser! This is a minimal
@@ -46,9 +47,9 @@ static const char *cp_tok2str(CPState *cp, CPToken tok)
46 if (tok > CTOK_OFS) 47 if (tok > CTOK_OFS)
47 return ctoknames[tok-CTOK_OFS-1]; 48 return ctoknames[tok-CTOK_OFS-1];
48 else if (!lj_char_iscntrl(tok)) 49 else if (!lj_char_iscntrl(tok))
49 return lj_str_pushf(cp->L, "%c", tok); 50 return lj_strfmt_pushf(cp->L, "%c", tok);
50 else 51 else
51 return lj_str_pushf(cp->L, "char(%d)", tok); 52 return lj_strfmt_pushf(cp->L, "char(%d)", tok);
52} 53}
53 54
54/* End-of-line? */ 55/* End-of-line? */
@@ -85,24 +86,10 @@ static LJ_AINLINE CPChar cp_get(CPState *cp)
85 return cp_get_bs(cp); 86 return cp_get_bs(cp);
86} 87}
87 88
88/* Grow save buffer. */
89static LJ_NOINLINE void cp_save_grow(CPState *cp, CPChar c)
90{
91 MSize newsize;
92 if (cp->sb.sz >= CPARSE_MAX_BUF/2)
93 cp_err(cp, LJ_ERR_XELEM);
94 newsize = cp->sb.sz * 2;
95 lj_str_resizebuf(cp->L, &cp->sb, newsize);
96 cp->sb.buf[cp->sb.n++] = (char)c;
97}
98
99/* Save character in buffer. */ 89/* Save character in buffer. */
100static LJ_AINLINE void cp_save(CPState *cp, CPChar c) 90static LJ_AINLINE void cp_save(CPState *cp, CPChar c)
101{ 91{
102 if (LJ_UNLIKELY(cp->sb.n + 1 > cp->sb.sz)) 92 lj_buf_putb(&cp->sb, c);
103 cp_save_grow(cp, c);
104 else
105 cp->sb.buf[cp->sb.n++] = (char)c;
106} 93}
107 94
108/* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */ 95/* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */
@@ -122,20 +109,20 @@ LJ_NORET static void cp_errmsg(CPState *cp, CPToken tok, ErrMsg em, ...)
122 tokstr = NULL; 109 tokstr = NULL;
123 } else if (tok == CTOK_IDENT || tok == CTOK_INTEGER || tok == CTOK_STRING || 110 } else if (tok == CTOK_IDENT || tok == CTOK_INTEGER || tok == CTOK_STRING ||
124 tok >= CTOK_FIRSTDECL) { 111 tok >= CTOK_FIRSTDECL) {
125 if (cp->sb.n == 0) cp_save(cp, '$'); 112 if (sbufP(&cp->sb) == sbufB(&cp->sb)) cp_save(cp, '$');
126 cp_save(cp, '\0'); 113 cp_save(cp, '\0');
127 tokstr = cp->sb.buf; 114 tokstr = sbufB(&cp->sb);
128 } else { 115 } else {
129 tokstr = cp_tok2str(cp, tok); 116 tokstr = cp_tok2str(cp, tok);
130 } 117 }
131 L = cp->L; 118 L = cp->L;
132 va_start(argp, em); 119 va_start(argp, em);
133 msg = lj_str_pushvf(L, err2msg(em), argp); 120 msg = lj_strfmt_pushvf(L, err2msg(em), argp);
134 va_end(argp); 121 va_end(argp);
135 if (tokstr) 122 if (tokstr)
136 msg = lj_str_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tokstr); 123 msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tokstr);
137 if (cp->linenumber > 1) 124 if (cp->linenumber > 1)
138 msg = lj_str_pushf(L, "%s at line %d", msg, cp->linenumber); 125 msg = lj_strfmt_pushf(L, "%s at line %d", msg, cp->linenumber);
139 lj_err_callermsg(L, msg); 126 lj_err_callermsg(L, msg);
140} 127}
141 128
@@ -164,7 +151,7 @@ static CPToken cp_number(CPState *cp)
164 TValue o; 151 TValue o;
165 do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp))); 152 do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp)));
166 cp_save(cp, '\0'); 153 cp_save(cp, '\0');
167 fmt = lj_strscan_scan((const uint8_t *)cp->sb.buf, &o, STRSCAN_OPT_C); 154 fmt = lj_strscan_scan((const uint8_t *)sbufB(&cp->sb), &o, STRSCAN_OPT_C);
168 if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32; 155 if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32;
169 else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32; 156 else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32;
170 else if (!(cp->mode & CPARSE_MODE_SKIP)) 157 else if (!(cp->mode & CPARSE_MODE_SKIP))
@@ -177,7 +164,7 @@ static CPToken cp_number(CPState *cp)
177static CPToken cp_ident(CPState *cp) 164static CPToken cp_ident(CPState *cp)
178{ 165{
179 do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp))); 166 do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp)));
180 cp->str = lj_str_new(cp->L, cp->sb.buf, cp->sb.n); 167 cp->str = lj_buf_str(cp->L, &cp->sb);
181 cp->val.id = lj_ctype_getname(cp->cts, &cp->ct, cp->str, cp->tmask); 168 cp->val.id = lj_ctype_getname(cp->cts, &cp->ct, cp->str, cp->tmask);
182 if (ctype_type(cp->ct->info) == CT_KW) 169 if (ctype_type(cp->ct->info) == CT_KW)
183 return ctype_cid(cp->ct->info); 170 return ctype_cid(cp->ct->info);
@@ -263,11 +250,11 @@ static CPToken cp_string(CPState *cp)
263 } 250 }
264 cp_get(cp); 251 cp_get(cp);
265 if (delim == '"') { 252 if (delim == '"') {
266 cp->str = lj_str_new(cp->L, cp->sb.buf, cp->sb.n); 253 cp->str = lj_buf_str(cp->L, &cp->sb);
267 return CTOK_STRING; 254 return CTOK_STRING;
268 } else { 255 } else {
269 if (cp->sb.n != 1) cp_err_token(cp, '\''); 256 if (sbuflen(&cp->sb) != 1) cp_err_token(cp, '\'');
270 cp->val.i32 = (int32_t)(char)cp->sb.buf[0]; 257 cp->val.i32 = (int32_t)(char)*sbufB(&cp->sb);
271 cp->val.id = CTID_INT32; 258 cp->val.id = CTID_INT32;
272 return CTOK_INTEGER; 259 return CTOK_INTEGER;
273 } 260 }
@@ -296,7 +283,7 @@ static void cp_comment_cpp(CPState *cp)
296/* Lexical scanner for C. Only a minimal subset is implemented. */ 283/* Lexical scanner for C. Only a minimal subset is implemented. */
297static CPToken cp_next_(CPState *cp) 284static CPToken cp_next_(CPState *cp)
298{ 285{
299 lj_str_resetbuf(&cp->sb); 286 lj_buf_reset(&cp->sb);
300 for (;;) { 287 for (;;) {
301 if (lj_char_isident(cp->c)) 288 if (lj_char_isident(cp->c))
302 return lj_char_isdigit(cp->c) ? cp_number(cp) : cp_ident(cp); 289 return lj_char_isdigit(cp->c) ? cp_number(cp) : cp_ident(cp);
@@ -380,8 +367,7 @@ static void cp_init(CPState *cp)
380 cp->depth = 0; 367 cp->depth = 0;
381 cp->curpack = 0; 368 cp->curpack = 0;
382 cp->packstack[0] = 255; 369 cp->packstack[0] = 255;
383 lj_str_initbuf(&cp->sb); 370 lj_buf_init(cp->L, &cp->sb);
384 lj_str_resizebuf(cp->L, &cp->sb, LJ_MIN_SBUF);
385 lua_assert(cp->p != NULL); 371 lua_assert(cp->p != NULL);
386 cp_get(cp); /* Read-ahead first char. */ 372 cp_get(cp); /* Read-ahead first char. */
387 cp->tok = 0; 373 cp->tok = 0;
@@ -393,7 +379,7 @@ static void cp_init(CPState *cp)
393static void cp_cleanup(CPState *cp) 379static void cp_cleanup(CPState *cp)
394{ 380{
395 global_State *g = G(cp->L); 381 global_State *g = G(cp->L);
396 lj_str_freebuf(g, &cp->sb); 382 lj_buf_free(g, &cp->sb);
397} 383}
398 384
399/* Check and consume optional token. */ 385/* Check and consume optional token. */
@@ -1012,7 +998,7 @@ static void cp_decl_asm(CPState *cp, CPDecl *decl)
1012 if (cp->tok == CTOK_STRING) { 998 if (cp->tok == CTOK_STRING) {
1013 GCstr *str = cp->str; 999 GCstr *str = cp->str;
1014 while (cp_next(cp) == CTOK_STRING) { 1000 while (cp_next(cp) == CTOK_STRING) {
1015 lj_str_pushf(cp->L, "%s%s", strdata(str), strdata(cp->str)); 1001 lj_strfmt_pushf(cp->L, "%s%s", strdata(str), strdata(cp->str));
1016 cp->L->top--; 1002 cp->L->top--;
1017 str = strV(cp->L->top); 1003 str = strV(cp->L->top);
1018 } 1004 }
diff --git a/src/lj_crecord.c b/src/lj_crecord.c
index 8577de2c..c3b01f63 100644
--- a/src/lj_crecord.c
+++ b/src/lj_crecord.c
@@ -11,13 +11,13 @@
11#if LJ_HASJIT && LJ_HASFFI 11#if LJ_HASJIT && LJ_HASFFI
12 12
13#include "lj_err.h" 13#include "lj_err.h"
14#include "lj_str.h"
15#include "lj_tab.h" 14#include "lj_tab.h"
16#include "lj_frame.h" 15#include "lj_frame.h"
17#include "lj_ctype.h" 16#include "lj_ctype.h"
18#include "lj_cdata.h" 17#include "lj_cdata.h"
19#include "lj_cparse.h" 18#include "lj_cparse.h"
20#include "lj_cconv.h" 19#include "lj_cconv.h"
20#include "lj_carith.h"
21#include "lj_clib.h" 21#include "lj_clib.h"
22#include "lj_ccall.h" 22#include "lj_ccall.h"
23#include "lj_ff.h" 23#include "lj_ff.h"
@@ -31,6 +31,7 @@
31#include "lj_snap.h" 31#include "lj_snap.h"
32#include "lj_crecord.h" 32#include "lj_crecord.h"
33#include "lj_dispatch.h" 33#include "lj_dispatch.h"
34#include "lj_strfmt.h"
34 35
35/* Some local macros to save typing. Undef'd at the end. */ 36/* Some local macros to save typing. Undef'd at the end. */
36#define IR(ref) (&J->cur.ir[(ref)]) 37#define IR(ref) (&J->cur.ir[(ref)])
@@ -441,7 +442,7 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
441 /* fallthrough */ 442 /* fallthrough */
442 case CCX(I, F): 443 case CCX(I, F):
443 if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi; 444 if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi;
444 sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_TRUNC|IRCONV_ANY); 445 sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_ANY);
445 goto xstore; 446 goto xstore;
446 case CCX(I, P): 447 case CCX(I, P):
447 case CCX(I, A): 448 case CCX(I, A):
@@ -521,7 +522,7 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
521 if (st == IRT_CDATA) goto err_nyi; 522 if (st == IRT_CDATA) goto err_nyi;
522 /* The signed conversion is cheaper. x64 really has 47 bit pointers. */ 523 /* The signed conversion is cheaper. x64 really has 47 bit pointers. */
523 sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32, 524 sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32,
524 st, IRCONV_TRUNC|IRCONV_ANY); 525 st, IRCONV_ANY);
525 goto xstore; 526 goto xstore;
526 527
527 /* Destination is an array. */ 528 /* Destination is an array. */
@@ -862,21 +863,17 @@ again:
862} 863}
863 864
864/* Record setting a finalizer. */ 865/* Record setting a finalizer. */
865static void crec_finalizer(jit_State *J, TRef trcd, cTValue *fin) 866static void crec_finalizer(jit_State *J, TRef trcd, TRef trfin, cTValue *fin)
866{ 867{
867 TRef trlo = lj_ir_call(J, IRCALL_lj_cdata_setfin, trcd); 868 if (tvisgcv(fin)) {
868 TRef trhi = emitir(IRT(IR_ADD, IRT_P32), trlo, lj_ir_kint(J, 4)); 869 if (!trfin) trfin = lj_ir_kptr(J, gcval(fin));
869 if (LJ_BE) { TRef tmp = trlo; trlo = trhi; trhi = tmp; } 870 } else if (tvisnil(fin)) {
870 if (tvisfunc(fin)) { 871 trfin = lj_ir_kptr(J, NULL);
871 emitir(IRT(IR_XSTORE, IRT_P32), trlo, lj_ir_kfunc(J, funcV(fin)));
872 emitir(IRTI(IR_XSTORE), trhi, lj_ir_kint(J, LJ_TFUNC));
873 } else if (tviscdata(fin)) {
874 emitir(IRT(IR_XSTORE, IRT_P32), trlo,
875 lj_ir_kgc(J, obj2gco(cdataV(fin)), IRT_CDATA));
876 emitir(IRTI(IR_XSTORE), trhi, lj_ir_kint(J, LJ_TCDATA));
877 } else { 872 } else {
878 lj_trace_err(J, LJ_TRERR_BADTYPE); 873 lj_trace_err(J, LJ_TRERR_BADTYPE);
879 } 874 }
875 lj_ir_call(J, IRCALL_lj_cdata_setfin, trcd,
876 trfin, lj_ir_kint(J, (int32_t)itype(fin)));
880 J->needsnap = 1; 877 J->needsnap = 1;
881} 878}
882 879
@@ -887,10 +884,8 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
887 CTSize sz; 884 CTSize sz;
888 CTInfo info = lj_ctype_info(cts, id, &sz); 885 CTInfo info = lj_ctype_info(cts, id, &sz);
889 CType *d = ctype_raw(cts, id); 886 CType *d = ctype_raw(cts, id);
890 TRef trid; 887 TRef trcd, trid = lj_ir_kint(J, id);
891 if (!sz || sz > 128 || (info & CTF_VLA) || ctype_align(info) > CT_MEMALIGN) 888 cTValue *fin;
892 lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: large/special allocations. */
893 trid = lj_ir_kint(J, id);
894 /* Use special instruction to box pointer or 32/64 bit integer. */ 889 /* Use special instruction to box pointer or 32/64 bit integer. */
895 if (ctype_isptr(info) || (ctype_isinteger(info) && (sz == 4 || sz == 8))) { 890 if (ctype_isptr(info) || (ctype_isinteger(info) && (sz == 4 || sz == 8))) {
896 TRef sp = J->base[1] ? crec_ct_tv(J, d, 0, J->base[1], &rd->argv[1]) : 891 TRef sp = J->base[1] ? crec_ct_tv(J, d, 0, J->base[1], &rd->argv[1]) :
@@ -898,11 +893,36 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
898 sz == 4 ? lj_ir_kint(J, 0) : 893 sz == 4 ? lj_ir_kint(J, 0) :
899 (lj_needsplit(J), lj_ir_kint64(J, 0)); 894 (lj_needsplit(J), lj_ir_kint64(J, 0));
900 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, sp); 895 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, sp);
896 return;
901 } else { 897 } else {
902 TRef trcd = emitir(IRTG(IR_CNEW, IRT_CDATA), trid, TREF_NIL); 898 TRef trsz = TREF_NIL;
903 cTValue *fin; 899 if ((info & CTF_VLA)) { /* Calculate VLA/VLS size at runtime. */
904 J->base[0] = trcd; 900 CTSize sz0, sz1;
905 if (J->base[1] && !J->base[2] && 901 if (!J->base[1] || J->base[2])
902 lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init VLA/VLS. */
903 trsz = crec_ct_tv(J, ctype_get(cts, CTID_INT32), 0,
904 J->base[1], &rd->argv[1]);
905 sz0 = lj_ctype_vlsize(cts, d, 0);
906 sz1 = lj_ctype_vlsize(cts, d, 1);
907 trsz = emitir(IRTGI(IR_MULOV), trsz, lj_ir_kint(J, (int32_t)(sz1-sz0)));
908 trsz = emitir(IRTGI(IR_ADDOV), trsz, lj_ir_kint(J, (int32_t)sz0));
909 J->base[1] = 0; /* Simplify logic below. */
910 } else if (ctype_align(info) > CT_MEMALIGN) {
911 trsz = lj_ir_kint(J, sz);
912 }
913 trcd = emitir(IRTG(IR_CNEW, IRT_CDATA), trid, trsz);
914 if (sz > 128 || (info & CTF_VLA)) {
915 TRef dp;
916 CTSize align;
917 special: /* Only handle bulk zero-fill for large/VLA/VLS types. */
918 if (J->base[1])
919 lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init large/VLA/VLS types. */
920 dp = emitir(IRT(IR_ADD, IRT_PTR), trcd, lj_ir_kintp(J, sizeof(GCcdata)));
921 if (trsz == TREF_NIL) trsz = lj_ir_kint(J, sz);
922 align = ctype_align(info);
923 if (align < CT_MEMALIGN) align = CT_MEMALIGN;
924 crec_fill(J, dp, trsz, lj_ir_kint(J, 0), (1u << align));
925 } else if (J->base[1] && !J->base[2] &&
906 !lj_cconv_multi_init(cts, d, &rd->argv[1])) { 926 !lj_cconv_multi_init(cts, d, &rd->argv[1])) {
907 goto single_init; 927 goto single_init;
908 } else if (ctype_isarray(d->info)) { 928 } else if (ctype_isarray(d->info)) {
@@ -913,8 +933,9 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
913 TValue *sval = &tv; 933 TValue *sval = &tv;
914 MSize i; 934 MSize i;
915 tv.u64 = 0; 935 tv.u64 = 0;
916 if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info))) 936 if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info)) ||
917 lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init array of aggregates. */ 937 esize * CREC_FILL_MAXUNROLL < sz)
938 goto special;
918 for (i = 1, ofs = 0; ofs < sz; ofs += esize) { 939 for (i = 1, ofs = 0; ofs < sz; ofs += esize) {
919 TRef dp = emitir(IRT(IR_ADD, IRT_PTR), trcd, 940 TRef dp = emitir(IRT(IR_ADD, IRT_PTR), trcd,
920 lj_ir_kintp(J, ofs + sizeof(GCcdata))); 941 lj_ir_kintp(J, ofs + sizeof(GCcdata)));
@@ -971,11 +992,12 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
971 crec_ct_tv(J, d, dp, lj_ir_kint(J, 0), &tv); 992 crec_ct_tv(J, d, dp, lj_ir_kint(J, 0), &tv);
972 } 993 }
973 } 994 }
974 /* Handle __gc metamethod. */
975 fin = lj_ctype_meta(cts, id, MM_gc);
976 if (fin)
977 crec_finalizer(J, trcd, fin);
978 } 995 }
996 J->base[0] = trcd;
997 /* Handle __gc metamethod. */
998 fin = lj_ctype_meta(cts, id, MM_gc);
999 if (fin)
1000 crec_finalizer(J, trcd, 0, fin);
979} 1001}
980 1002
981/* Record argument conversions. */ 1003/* Record argument conversions. */
@@ -1228,7 +1250,7 @@ static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm)
1228 for (i = 0; i < 2; i++) { 1250 for (i = 0; i < 2; i++) {
1229 IRType st = tref_type(sp[i]); 1251 IRType st = tref_type(sp[i]);
1230 if (st == IRT_NUM || st == IRT_FLOAT) 1252 if (st == IRT_NUM || st == IRT_FLOAT)
1231 sp[i] = emitconv(sp[i], dt, st, IRCONV_TRUNC|IRCONV_ANY); 1253 sp[i] = emitconv(sp[i], dt, st, IRCONV_ANY);
1232 else if (!(st == IRT_I64 || st == IRT_U64)) 1254 else if (!(st == IRT_I64 || st == IRT_U64))
1233 sp[i] = emitconv(sp[i], dt, IRT_INT, 1255 sp[i] = emitconv(sp[i], dt, IRT_INT,
1234 (s[i]->info & CTF_UNSIGNED) ? 0 : IRCONV_SEXT); 1256 (s[i]->info & CTF_UNSIGNED) ? 0 : IRCONV_SEXT);
@@ -1296,15 +1318,14 @@ static TRef crec_arith_ptr(jit_State *J, TRef *sp, CType **s, MMS mm)
1296 CTypeID id; 1318 CTypeID id;
1297#if LJ_64 1319#if LJ_64
1298 if (t == IRT_NUM || t == IRT_FLOAT) 1320 if (t == IRT_NUM || t == IRT_FLOAT)
1299 tr = emitconv(tr, IRT_INTP, t, IRCONV_TRUNC|IRCONV_ANY); 1321 tr = emitconv(tr, IRT_INTP, t, IRCONV_ANY);
1300 else if (!(t == IRT_I64 || t == IRT_U64)) 1322 else if (!(t == IRT_I64 || t == IRT_U64))
1301 tr = emitconv(tr, IRT_INTP, IRT_INT, 1323 tr = emitconv(tr, IRT_INTP, IRT_INT,
1302 ((t - IRT_I8) & 1) ? 0 : IRCONV_SEXT); 1324 ((t - IRT_I8) & 1) ? 0 : IRCONV_SEXT);
1303#else 1325#else
1304 if (!tref_typerange(sp[1], IRT_I8, IRT_U32)) { 1326 if (!tref_typerange(sp[1], IRT_I8, IRT_U32)) {
1305 tr = emitconv(tr, IRT_INTP, t, 1327 tr = emitconv(tr, IRT_INTP, t,
1306 (t == IRT_NUM || t == IRT_FLOAT) ? 1328 (t == IRT_NUM || t == IRT_FLOAT) ? IRCONV_ANY : 0);
1307 IRCONV_TRUNC|IRCONV_ANY : 0);
1308 } 1329 }
1309#endif 1330#endif
1310 tr = emitir(IRT(IR_MUL, IRT_INTP), tr, lj_ir_kintp(J, sz)); 1331 tr = emitir(IRT(IR_MUL, IRT_INTP), tr, lj_ir_kintp(J, sz));
@@ -1623,7 +1644,139 @@ void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd)
1623void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd) 1644void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd)
1624{ 1645{
1625 argv2cdata(J, J->base[0], &rd->argv[0]); 1646 argv2cdata(J, J->base[0], &rd->argv[0]);
1626 crec_finalizer(J, J->base[0], &rd->argv[1]); 1647 if (!J->base[1])
1648 lj_trace_err(J, LJ_TRERR_BADTYPE);
1649 crec_finalizer(J, J->base[0], J->base[1], &rd->argv[1]);
1650}
1651
1652/* -- 64 bit bit.* library functions -------------------------------------- */
1653
1654/* Determine bit operation type from argument type. */
1655static CTypeID crec_bit64_type(CTState *cts, cTValue *tv)
1656{
1657 if (tviscdata(tv)) {
1658 CType *ct = lj_ctype_rawref(cts, cdataV(tv)->ctypeid);
1659 if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct);
1660 if ((ct->info & (CTMASK_NUM|CTF_BOOL|CTF_FP|CTF_UNSIGNED)) ==
1661 CTINFO(CT_NUM, CTF_UNSIGNED) && ct->size == 8)
1662 return CTID_UINT64; /* Use uint64_t, since it has the highest rank. */
1663 return CTID_INT64; /* Otherwise use int64_t. */
1664 }
1665 return 0; /* Use regular 32 bit ops. */
1666}
1667
1668void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd)
1669{
1670 CTState *cts = ctype_ctsG(J2G(J));
1671 TRef tr = crec_ct_tv(J, ctype_get(cts, CTID_INT64), 0,
1672 J->base[0], &rd->argv[0]);
1673 if (!tref_isinteger(tr))
1674 tr = emitconv(tr, IRT_INT, tref_type(tr), 0);
1675 J->base[0] = tr;
1676}
1677
1678int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd)
1679{
1680 CTState *cts = ctype_ctsG(J2G(J));
1681 CTypeID id = crec_bit64_type(cts, &rd->argv[0]);
1682 if (id) {
1683 TRef tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]);
1684 tr = emitir(IRT(rd->data, id-CTID_INT64+IRT_I64), tr, 0);
1685 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
1686 return 1;
1687 }
1688 return 0;
1689}
1690
1691int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd)
1692{
1693 CTState *cts = ctype_ctsG(J2G(J));
1694 CTypeID id = 0;
1695 MSize i;
1696 for (i = 0; J->base[i] != 0; i++) {
1697 CTypeID aid = crec_bit64_type(cts, &rd->argv[i]);
1698 if (id < aid) id = aid; /* Determine highest type rank of all arguments. */
1699 }
1700 if (id) {
1701 CType *ct = ctype_get(cts, id);
1702 uint32_t ot = IRT(rd->data, id-CTID_INT64+IRT_I64);
1703 TRef tr = crec_ct_tv(J, ct, 0, J->base[0], &rd->argv[0]);
1704 for (i = 1; J->base[i] != 0; i++) {
1705 TRef tr2 = crec_ct_tv(J, ct, 0, J->base[i], &rd->argv[i]);
1706 tr = emitir(ot, tr, tr2);
1707 }
1708 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
1709 return 1;
1710 }
1711 return 0;
1712}
1713
1714int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd)
1715{
1716 CTState *cts = ctype_ctsG(J2G(J));
1717 CTypeID id;
1718 TRef tsh = 0;
1719 if (J->base[0] && tref_iscdata(J->base[1])) {
1720 tsh = crec_ct_tv(J, ctype_get(cts, CTID_INT64), 0,
1721 J->base[1], &rd->argv[1]);
1722 if (!tref_isinteger(tsh))
1723 tsh = emitconv(tsh, IRT_INT, tref_type(tsh), 0);
1724 J->base[1] = tsh;
1725 }
1726 id = crec_bit64_type(cts, &rd->argv[0]);
1727 if (id) {
1728 TRef tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]);
1729 uint32_t op = rd->data;
1730 if (!tsh) tsh = lj_opt_narrow_tobit(J, J->base[1]);
1731 if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
1732 !tref_isk(tsh))
1733 tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 63));
1734#ifdef LJ_TARGET_UNIFYROT
1735 if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) {
1736 op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR;
1737 tsh = emitir(IRTI(IR_NEG), tsh, tsh);
1738 }
1739#endif
1740 tr = emitir(IRT(op, id-CTID_INT64+IRT_I64), tr, tsh);
1741 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
1742 return 1;
1743 }
1744 return 0;
1745}
1746
1747TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr)
1748{
1749 CTState *cts = ctype_ctsG(J2G(J));
1750 CTypeID id = crec_bit64_type(cts, &rd->argv[0]);
1751 TRef tr, trsf = J->base[1];
1752 SFormat sf = (STRFMT_UINT|STRFMT_T_HEX);
1753 int32_t n;
1754 if (trsf) {
1755 CTypeID id2 = 0;
1756 n = (int32_t)lj_carith_check64(J->L, 2, &id2);
1757 if (id2)
1758 trsf = crec_ct_tv(J, ctype_get(cts, CTID_INT32), 0, trsf, &rd->argv[1]);
1759 else
1760 trsf = lj_opt_narrow_tobit(J, trsf);
1761 emitir(IRTGI(IR_EQ), trsf, lj_ir_kint(J, n)); /* Specialize to n. */
1762 } else {
1763 n = id ? 16 : 8;
1764 }
1765 if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; }
1766 sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC);
1767 if (id) {
1768 tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]);
1769 if (n < 16)
1770 tr = emitir(IRT(IR_BAND, IRT_U64), tr,
1771 lj_ir_kint64(J, ((uint64_t)1 << 4*n)-1));
1772 } else {
1773 tr = lj_opt_narrow_tobit(J, J->base[0]);
1774 if (n < 8)
1775 tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << 4*n)-1)));
1776 tr = emitconv(tr, IRT_U64, IRT_INT, 0); /* No sign-extension. */
1777 lj_needsplit(J);
1778 }
1779 return lj_ir_call(J, IRCALL_lj_strfmt_putfxint, hdr, lj_ir_kint(J, sf), tr);
1627} 1780}
1628 1781
1629/* -- Miscellaneous library functions ------------------------------------- */ 1782/* -- Miscellaneous library functions ------------------------------------- */
diff --git a/src/lj_crecord.h b/src/lj_crecord.h
index dea05f78..47c0a69d 100644
--- a/src/lj_crecord.h
+++ b/src/lj_crecord.h
@@ -25,6 +25,13 @@ LJ_FUNC void LJ_FASTCALL recff_ffi_istype(jit_State *J, RecordFFData *rd);
25LJ_FUNC void LJ_FASTCALL recff_ffi_abi(jit_State *J, RecordFFData *rd); 25LJ_FUNC void LJ_FASTCALL recff_ffi_abi(jit_State *J, RecordFFData *rd);
26LJ_FUNC void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd); 26LJ_FUNC void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd);
27LJ_FUNC void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd); 27LJ_FUNC void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd);
28
29LJ_FUNC void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd);
30LJ_FUNC int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd);
31LJ_FUNC int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd);
32LJ_FUNC int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd);
33LJ_FUNC TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr);
34
28LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd); 35LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd);
29#endif 36#endif
30 37
diff --git a/src/lj_ctype.c b/src/lj_ctype.c
index 57a0d7cc..19a79c45 100644
--- a/src/lj_ctype.c
+++ b/src/lj_ctype.c
@@ -11,6 +11,7 @@
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_str.h" 12#include "lj_str.h"
13#include "lj_tab.h" 13#include "lj_tab.h"
14#include "lj_strfmt.h"
14#include "lj_ctype.h" 15#include "lj_ctype.h"
15#include "lj_ccallback.h" 16#include "lj_ccallback.h"
16 17
@@ -568,19 +569,19 @@ GCstr *lj_ctype_repr_int64(lua_State *L, uint64_t n, int isunsigned)
568/* Convert complex to string with 'i' or 'I' suffix. */ 569/* Convert complex to string with 'i' or 'I' suffix. */
569GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size) 570GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size)
570{ 571{
571 char buf[2*LJ_STR_NUMBUF+2+1]; 572 char buf[2*STRFMT_MAXBUF_NUM+2+1], *p = buf;
572 TValue re, im; 573 TValue re, im;
573 size_t len;
574 if (size == 2*sizeof(double)) { 574 if (size == 2*sizeof(double)) {
575 re.n = *(double *)sp; im.n = ((double *)sp)[1]; 575 re.n = *(double *)sp; im.n = ((double *)sp)[1];
576 } else { 576 } else {
577 re.n = (double)*(float *)sp; im.n = (double)((float *)sp)[1]; 577 re.n = (double)*(float *)sp; im.n = (double)((float *)sp)[1];
578 } 578 }
579 len = lj_str_bufnum(buf, &re); 579 p = lj_strfmt_wnum(p, &re);
580 if (!(im.u32.hi & 0x80000000u) || im.n != im.n) buf[len++] = '+'; 580 if (!(im.u32.hi & 0x80000000u) || im.n != im.n) *p++ = '+';
581 len += lj_str_bufnum(buf+len, &im); 581 p = lj_strfmt_wnum(p, &im);
582 buf[len] = buf[len-1] >= 'a' ? 'I' : 'i'; 582 *p = *(p-1) >= 'a' ? 'I' : 'i';
583 return lj_str_new(L, buf, len+1); 583 p++;
584 return lj_str_new(L, buf, p-buf);
584} 585}
585 586
586/* -- C type state -------------------------------------------------------- */ 587/* -- C type state -------------------------------------------------------- */
diff --git a/src/lj_debug.c b/src/lj_debug.c
index be7fb2b1..54f7db74 100644
--- a/src/lj_debug.c
+++ b/src/lj_debug.c
@@ -9,11 +9,12 @@
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_err.h" 10#include "lj_err.h"
11#include "lj_debug.h" 11#include "lj_debug.h"
12#include "lj_str.h" 12#include "lj_buf.h"
13#include "lj_tab.h" 13#include "lj_tab.h"
14#include "lj_state.h" 14#include "lj_state.h"
15#include "lj_frame.h" 15#include "lj_frame.h"
16#include "lj_bc.h" 16#include "lj_bc.h"
17#include "lj_strfmt.h"
17#if LJ_HASJIT 18#if LJ_HASJIT
18#include "lj_jit.h" 19#include "lj_jit.h"
19#endif 20#endif
@@ -133,20 +134,6 @@ static BCLine debug_frameline(lua_State *L, GCfunc *fn, cTValue *nextframe)
133 134
134/* -- Variable names ------------------------------------------------------ */ 135/* -- Variable names ------------------------------------------------------ */
135 136
136/* Read ULEB128 value. */
137static uint32_t debug_read_uleb128(const uint8_t **pp)
138{
139 const uint8_t *p = *pp;
140 uint32_t v = *p++;
141 if (LJ_UNLIKELY(v >= 0x80)) {
142 int sh = 0;
143 v &= 0x7f;
144 do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
145 }
146 *pp = p;
147 return v;
148}
149
150/* Get name of a local variable from slot number and PC. */ 137/* Get name of a local variable from slot number and PC. */
151static const char *debug_varname(const GCproto *pt, BCPos pc, BCReg slot) 138static const char *debug_varname(const GCproto *pt, BCPos pc, BCReg slot)
152{ 139{
@@ -162,9 +149,9 @@ static const char *debug_varname(const GCproto *pt, BCPos pc, BCReg slot)
162 } else { 149 } else {
163 while (*p++) ; /* Skip over variable name string. */ 150 while (*p++) ; /* Skip over variable name string. */
164 } 151 }
165 lastpc = startpc = lastpc + debug_read_uleb128(&p); 152 lastpc = startpc = lastpc + lj_buf_ruleb128((const char **)&p);
166 if (startpc > pc) break; 153 if (startpc > pc) break;
167 endpc = startpc + debug_read_uleb128(&p); 154 endpc = startpc + lj_buf_ruleb128((const char **)&p);
168 if (pc < endpc && slot-- == 0) { 155 if (pc < endpc && slot-- == 0) {
169 if (vn < VARNAME__MAX) { 156 if (vn < VARNAME__MAX) {
170#define VARNAMESTR(name, str) str "\0" 157#define VARNAMESTR(name, str) str "\0"
@@ -321,7 +308,7 @@ const char *lj_debug_funcname(lua_State *L, TValue *frame, const char **name)
321/* -- Source code locations ----------------------------------------------- */ 308/* -- Source code locations ----------------------------------------------- */
322 309
323/* Generate shortened source name. */ 310/* Generate shortened source name. */
324void lj_debug_shortname(char *out, GCstr *str) 311void lj_debug_shortname(char *out, GCstr *str, BCLine line)
325{ 312{
326 const char *src = strdata(str); 313 const char *src = strdata(str);
327 if (*src == '=') { 314 if (*src == '=') {
@@ -335,11 +322,11 @@ void lj_debug_shortname(char *out, GCstr *str)
335 *out++ = '.'; *out++ = '.'; *out++ = '.'; 322 *out++ = '.'; *out++ = '.'; *out++ = '.';
336 } 323 }
337 strcpy(out, src); 324 strcpy(out, src);
338 } else { /* Output [string "string"]. */ 325 } else { /* Output [string "string"] or [builtin:name]. */
339 size_t len; /* Length, up to first control char. */ 326 size_t len; /* Length, up to first control char. */
340 for (len = 0; len < LUA_IDSIZE-12; len++) 327 for (len = 0; len < LUA_IDSIZE-12; len++)
341 if (((const unsigned char *)src)[len] < ' ') break; 328 if (((const unsigned char *)src)[len] < ' ') break;
342 strcpy(out, "[string \""); out += 9; 329 strcpy(out, line == ~(BCLine)0 ? "[builtin:" : "[string \""); out += 9;
343 if (src[len] != '\0') { /* Must truncate? */ 330 if (src[len] != '\0') { /* Must truncate? */
344 if (len > LUA_IDSIZE-15) len = LUA_IDSIZE-15; 331 if (len > LUA_IDSIZE-15) len = LUA_IDSIZE-15;
345 strncpy(out, src, len); out += len; 332 strncpy(out, src, len); out += len;
@@ -347,7 +334,7 @@ void lj_debug_shortname(char *out, GCstr *str)
347 } else { 334 } else {
348 strcpy(out, src); out += len; 335 strcpy(out, src); out += len;
349 } 336 }
350 strcpy(out, "\"]"); 337 strcpy(out, line == ~(BCLine)0 ? "]" : "\"]");
351 } 338 }
352} 339}
353 340
@@ -360,14 +347,15 @@ void lj_debug_addloc(lua_State *L, const char *msg,
360 if (isluafunc(fn)) { 347 if (isluafunc(fn)) {
361 BCLine line = debug_frameline(L, fn, nextframe); 348 BCLine line = debug_frameline(L, fn, nextframe);
362 if (line >= 0) { 349 if (line >= 0) {
350 GCproto *pt = funcproto(fn);
363 char buf[LUA_IDSIZE]; 351 char buf[LUA_IDSIZE];
364 lj_debug_shortname(buf, proto_chunkname(funcproto(fn))); 352 lj_debug_shortname(buf, proto_chunkname(pt), pt->firstline);
365 lj_str_pushf(L, "%s:%d: %s", buf, line, msg); 353 lj_strfmt_pushf(L, "%s:%d: %s", buf, line, msg);
366 return; 354 return;
367 } 355 }
368 } 356 }
369 } 357 }
370 lj_str_pushf(L, "%s", msg); 358 lj_strfmt_pushf(L, "%s", msg);
371} 359}
372 360
373/* Push location string for a bytecode position to Lua stack. */ 361/* Push location string for a bytecode position to Lua stack. */
@@ -377,20 +365,22 @@ void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc)
377 const char *s = strdata(name); 365 const char *s = strdata(name);
378 MSize i, len = name->len; 366 MSize i, len = name->len;
379 BCLine line = lj_debug_line(pt, pc); 367 BCLine line = lj_debug_line(pt, pc);
380 if (*s == '@') { 368 if (pt->firstline == ~(BCLine)0) {
369 lj_strfmt_pushf(L, "builtin:%s", s);
370 } else if (*s == '@') {
381 s++; len--; 371 s++; len--;
382 for (i = len; i > 0; i--) 372 for (i = len; i > 0; i--)
383 if (s[i] == '/' || s[i] == '\\') { 373 if (s[i] == '/' || s[i] == '\\') {
384 s += i+1; 374 s += i+1;
385 break; 375 break;
386 } 376 }
387 lj_str_pushf(L, "%s:%d", s, line); 377 lj_strfmt_pushf(L, "%s:%d", s, line);
388 } else if (len > 40) { 378 } else if (len > 40) {
389 lj_str_pushf(L, "%p:%d", pt, line); 379 lj_strfmt_pushf(L, "%p:%d", pt, line);
390 } else if (*s == '=') { 380 } else if (*s == '=') {
391 lj_str_pushf(L, "%s:%d", s+1, line); 381 lj_strfmt_pushf(L, "%s:%d", s+1, line);
392 } else { 382 } else {
393 lj_str_pushf(L, "\"%s\":%d", s, line); 383 lj_strfmt_pushf(L, "\"%s\":%d", s, line);
394 } 384 }
395} 385}
396 386
@@ -453,7 +443,7 @@ int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, int ext)
453 BCLine firstline = pt->firstline; 443 BCLine firstline = pt->firstline;
454 GCstr *name = proto_chunkname(pt); 444 GCstr *name = proto_chunkname(pt);
455 ar->source = strdata(name); 445 ar->source = strdata(name);
456 lj_debug_shortname(ar->short_src, name); 446 lj_debug_shortname(ar->short_src, name, pt->firstline);
457 ar->linedefined = (int)firstline; 447 ar->linedefined = (int)firstline;
458 ar->lastlinedefined = (int)(firstline + pt->numline); 448 ar->lastlinedefined = (int)(firstline + pt->numline);
459 ar->what = firstline ? "Lua" : "main"; 449 ar->what = firstline ? "Lua" : "main";
diff --git a/src/lj_debug.h b/src/lj_debug.h
index 7cf57de7..4144b47e 100644
--- a/src/lj_debug.h
+++ b/src/lj_debug.h
@@ -34,7 +34,7 @@ LJ_FUNC const char *lj_debug_slotname(GCproto *pt, const BCIns *pc,
34 BCReg slot, const char **name); 34 BCReg slot, const char **name);
35LJ_FUNC const char *lj_debug_funcname(lua_State *L, TValue *frame, 35LJ_FUNC const char *lj_debug_funcname(lua_State *L, TValue *frame,
36 const char **name); 36 const char **name);
37LJ_FUNC void lj_debug_shortname(char *out, GCstr *str); 37LJ_FUNC void lj_debug_shortname(char *out, GCstr *str, BCLine line);
38LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg, 38LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg,
39 cTValue *frame, cTValue *nextframe); 39 cTValue *frame, cTValue *nextframe);
40LJ_FUNC void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc); 40LJ_FUNC void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc);
diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c
index d57f1a6f..a78075a7 100644
--- a/src/lj_dispatch.c
+++ b/src/lj_dispatch.c
@@ -8,6 +8,7 @@
8 8
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_err.h" 10#include "lj_err.h"
11#include "lj_buf.h"
11#include "lj_func.h" 12#include "lj_func.h"
12#include "lj_str.h" 13#include "lj_str.h"
13#include "lj_tab.h" 14#include "lj_tab.h"
@@ -17,6 +18,7 @@
17#include "lj_frame.h" 18#include "lj_frame.h"
18#include "lj_bc.h" 19#include "lj_bc.h"
19#include "lj_ff.h" 20#include "lj_ff.h"
21#include "lj_strfmt.h"
20#if LJ_HASJIT 22#if LJ_HASJIT
21#include "lj_jit.h" 23#include "lj_jit.h"
22#endif 24#endif
diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h
index a56b6260..326297cd 100644
--- a/src/lj_dispatch.h
+++ b/src/lj_dispatch.h
@@ -33,11 +33,12 @@
33 _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \ 33 _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \
34 _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \ 34 _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \
35 _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \ 35 _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \
36 _(lj_meta_for) _(lj_meta_len) _(lj_meta_tget) _(lj_meta_tset) \ 36 _(lj_meta_for) _(lj_meta_istype) _(lj_meta_len) _(lj_meta_tget) \
37 _(lj_state_growstack) _(lj_str_fromnum) _(lj_str_fromnumber) _(lj_str_new) \ 37 _(lj_meta_tset) _(lj_state_growstack) _(lj_strfmt_num) \
38 _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) _(lj_tab_new) \ 38 _(lj_str_new) _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) \
39 _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \ 39 _(lj_tab_new) _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \
40 JITGOTDEF(_) FFIGOTDEF(_) 40 _(lj_tab_setinth) _(lj_buf_putstr_reverse) _(lj_buf_putstr_lower) \
41 _(lj_buf_putstr_upper) _(lj_buf_tostr) JITGOTDEF(_) FFIGOTDEF(_)
41 42
42enum { 43enum {
43#define GOTENUM(name) LJ_GOT_##name, 44#define GOTENUM(name) LJ_GOT_##name,
@@ -60,7 +61,7 @@ typedef uint16_t HotCount;
60#define HOTCOUNT_CALL 1 61#define HOTCOUNT_CALL 1
61 62
62/* This solves a circular dependency problem -- bump as needed. Sigh. */ 63/* This solves a circular dependency problem -- bump as needed. Sigh. */
63#define GG_NUM_ASMFF 62 64#define GG_NUM_ASMFF 57
64 65
65#define GG_LEN_DDISP (BC__MAX + GG_NUM_ASMFF) 66#define GG_LEN_DDISP (BC__MAX + GG_NUM_ASMFF)
66#define GG_LEN_SDISP BC_FUNCF 67#define GG_LEN_SDISP BC_FUNCF
diff --git a/src/lj_emit_arm.h b/src/lj_emit_arm.h
index b76a9a45..1a9a6852 100644
--- a/src/lj_emit_arm.h
+++ b/src/lj_emit_arm.h
@@ -308,30 +308,30 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
308 emit_dm(as, ARMI_MOV, dst, src); 308 emit_dm(as, ARMI_MOV, dst, src);
309} 309}
310 310
311/* Generic load of register from stack slot. */ 311/* Generic load of register with base and (small) offset address. */
312static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 312static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
313{ 313{
314#if LJ_SOFTFP 314#if LJ_SOFTFP
315 lua_assert(!irt_isnum(ir->t)); UNUSED(ir); 315 lua_assert(!irt_isnum(ir->t)); UNUSED(ir);
316#else 316#else
317 if (r >= RID_MAX_GPR) 317 if (r >= RID_MAX_GPR)
318 emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, RID_SP, ofs); 318 emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, base, ofs);
319 else 319 else
320#endif 320#endif
321 emit_lso(as, ARMI_LDR, r, RID_SP, ofs); 321 emit_lso(as, ARMI_LDR, r, base, ofs);
322} 322}
323 323
324/* Generic store of register to stack slot. */ 324/* Generic store of register with base and (small) offset address. */
325static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 325static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
326{ 326{
327#if LJ_SOFTFP 327#if LJ_SOFTFP
328 lua_assert(!irt_isnum(ir->t)); UNUSED(ir); 328 lua_assert(!irt_isnum(ir->t)); UNUSED(ir);
329#else 329#else
330 if (r >= RID_MAX_GPR) 330 if (r >= RID_MAX_GPR)
331 emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, RID_SP, ofs); 331 emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, base, ofs);
332 else 332 else
333#endif 333#endif
334 emit_lso(as, ARMI_STR, r, RID_SP, ofs); 334 emit_lso(as, ARMI_STR, r, base, ofs);
335} 335}
336 336
337/* Emit an arithmetic/logic operation with a constant operand. */ 337/* Emit an arithmetic/logic operation with a constant operand. */
diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h
index 74821b8b..d6ea1d52 100644
--- a/src/lj_emit_mips.h
+++ b/src/lj_emit_mips.h
@@ -178,24 +178,24 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
178 emit_fg(as, irt_isnum(ir->t) ? MIPSI_MOV_D : MIPSI_MOV_S, dst, src); 178 emit_fg(as, irt_isnum(ir->t) ? MIPSI_MOV_D : MIPSI_MOV_S, dst, src);
179} 179}
180 180
181/* Generic load of register from stack slot. */ 181/* Generic load of register with base and (small) offset address. */
182static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 182static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
183{ 183{
184 if (r < RID_MAX_GPR) 184 if (r < RID_MAX_GPR)
185 emit_tsi(as, MIPSI_LW, r, RID_SP, ofs); 185 emit_tsi(as, MIPSI_LW, r, base, ofs);
186 else 186 else
187 emit_tsi(as, irt_isnum(ir->t) ? MIPSI_LDC1 : MIPSI_LWC1, 187 emit_tsi(as, irt_isnum(ir->t) ? MIPSI_LDC1 : MIPSI_LWC1,
188 (r & 31), RID_SP, ofs); 188 (r & 31), base, ofs);
189} 189}
190 190
191/* Generic store of register to stack slot. */ 191/* Generic store of register with base and (small) offset address. */
192static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 192static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
193{ 193{
194 if (r < RID_MAX_GPR) 194 if (r < RID_MAX_GPR)
195 emit_tsi(as, MIPSI_SW, r, RID_SP, ofs); 195 emit_tsi(as, MIPSI_SW, r, base, ofs);
196 else 196 else
197 emit_tsi(as, irt_isnum(ir->t) ? MIPSI_SDC1 : MIPSI_SWC1, 197 emit_tsi(as, irt_isnum(ir->t) ? MIPSI_SDC1 : MIPSI_SWC1,
198 (r&31), RID_SP, ofs); 198 (r&31), base, ofs);
199} 199}
200 200
201/* Add offset to pointer. */ 201/* Add offset to pointer. */
diff --git a/src/lj_emit_ppc.h b/src/lj_emit_ppc.h
index a589f3a6..6938c446 100644
--- a/src/lj_emit_ppc.h
+++ b/src/lj_emit_ppc.h
@@ -186,22 +186,22 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
186 emit_fb(as, PPCI_FMR, dst, src); 186 emit_fb(as, PPCI_FMR, dst, src);
187} 187}
188 188
189/* Generic load of register from stack slot. */ 189/* Generic load of register with base and (small) offset address. */
190static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 190static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
191{ 191{
192 if (r < RID_MAX_GPR) 192 if (r < RID_MAX_GPR)
193 emit_tai(as, PPCI_LWZ, r, RID_SP, ofs); 193 emit_tai(as, PPCI_LWZ, r, base, ofs);
194 else 194 else
195 emit_fai(as, irt_isnum(ir->t) ? PPCI_LFD : PPCI_LFS, r, RID_SP, ofs); 195 emit_fai(as, irt_isnum(ir->t) ? PPCI_LFD : PPCI_LFS, r, base, ofs);
196} 196}
197 197
198/* Generic store of register to stack slot. */ 198/* Generic store of register with base and (small) offset address. */
199static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 199static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
200{ 200{
201 if (r < RID_MAX_GPR) 201 if (r < RID_MAX_GPR)
202 emit_tai(as, PPCI_STW, r, RID_SP, ofs); 202 emit_tai(as, PPCI_STW, r, base, ofs);
203 else 203 else
204 emit_fai(as, irt_isnum(ir->t) ? PPCI_STFD : PPCI_STFS, r, RID_SP, ofs); 204 emit_fai(as, irt_isnum(ir->t) ? PPCI_STFD : PPCI_STFS, r, base, ofs);
205} 205}
206 206
207/* Emit a compare (for equality) with a constant operand. */ 207/* Emit a compare (for equality) with a constant operand. */
diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h
index bd184a30..ca63f59c 100644
--- a/src/lj_emit_x86.h
+++ b/src/lj_emit_x86.h
@@ -241,10 +241,6 @@ static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i)
241 241
242/* -- Emit loads/stores --------------------------------------------------- */ 242/* -- Emit loads/stores --------------------------------------------------- */
243 243
244/* Instruction selection for XMM moves. */
245#define XMM_MOVRR(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVSD : XO_MOVAPS)
246#define XMM_MOVRM(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVLPD : XO_MOVSD)
247
248/* mov [base+ofs], i */ 244/* mov [base+ofs], i */
249static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) 245static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
250{ 246{
@@ -314,7 +310,7 @@ static void emit_loadn(ASMState *as, Reg r, cTValue *tv)
314 if (tvispzero(tv)) /* Use xor only for +0. */ 310 if (tvispzero(tv)) /* Use xor only for +0. */
315 emit_rr(as, XO_XORPS, r, r); 311 emit_rr(as, XO_XORPS, r, r);
316 else 312 else
317 emit_rma(as, XMM_MOVRM(as), r, &tv->n); 313 emit_rma(as, XO_MOVSD, r, &tv->n);
318} 314}
319 315
320/* -- Emit control-flow instructions -------------------------------------- */ 316/* -- Emit control-flow instructions -------------------------------------- */
@@ -427,25 +423,25 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
427 if (dst < RID_MAX_GPR) 423 if (dst < RID_MAX_GPR)
428 emit_rr(as, XO_MOV, REX_64IR(ir, dst), src); 424 emit_rr(as, XO_MOV, REX_64IR(ir, dst), src);
429 else 425 else
430 emit_rr(as, XMM_MOVRR(as), dst, src); 426 emit_rr(as, XO_MOVAPS, dst, src);
431} 427}
432 428
433/* Generic load of register from stack slot. */ 429/* Generic load of register with base and (small) offset address. */
434static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 430static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
435{ 431{
436 if (r < RID_MAX_GPR) 432 if (r < RID_MAX_GPR)
437 emit_rmro(as, XO_MOV, REX_64IR(ir, r), RID_ESP, ofs); 433 emit_rmro(as, XO_MOV, REX_64IR(ir, r), base, ofs);
438 else 434 else
439 emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, r, RID_ESP, ofs); 435 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, r, base, ofs);
440} 436}
441 437
442/* Generic store of register to stack slot. */ 438/* Generic store of register with base and (small) offset address. */
443static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 439static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
444{ 440{
445 if (r < RID_MAX_GPR) 441 if (r < RID_MAX_GPR)
446 emit_rmro(as, XO_MOVto, REX_64IR(ir, r), RID_ESP, ofs); 442 emit_rmro(as, XO_MOVto, REX_64IR(ir, r), base, ofs);
447 else 443 else
448 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, RID_ESP, ofs); 444 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, base, ofs);
449} 445}
450 446
451/* Add offset to pointer. */ 447/* Add offset to pointer. */
diff --git a/src/lj_err.c b/src/lj_err.c
index 42cd12b4..7ae87a82 100644
--- a/src/lj_err.c
+++ b/src/lj_err.c
@@ -16,6 +16,7 @@
16#include "lj_ff.h" 16#include "lj_ff.h"
17#include "lj_trace.h" 17#include "lj_trace.h"
18#include "lj_vm.h" 18#include "lj_vm.h"
19#include "lj_strfmt.h"
19 20
20/* 21/*
21** LuaJIT can either use internal or external frame unwinding: 22** LuaJIT can either use internal or external frame unwinding:
@@ -573,7 +574,7 @@ LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...)
573 va_list argp; 574 va_list argp;
574 va_start(argp, em); 575 va_start(argp, em);
575 if (curr_funcisL(L)) L->top = curr_topL(L); 576 if (curr_funcisL(L)) L->top = curr_topL(L);
576 msg = lj_str_pushvf(L, err2msg(em), argp); 577 msg = lj_strfmt_pushvf(L, err2msg(em), argp);
577 va_end(argp); 578 va_end(argp);
578 lj_debug_addloc(L, msg, L->base-1, NULL); 579 lj_debug_addloc(L, msg, L->base-1, NULL);
579 lj_err_run(L); 580 lj_err_run(L);
@@ -591,11 +592,11 @@ LJ_NOINLINE void lj_err_lex(lua_State *L, GCstr *src, const char *tok,
591{ 592{
592 char buff[LUA_IDSIZE]; 593 char buff[LUA_IDSIZE];
593 const char *msg; 594 const char *msg;
594 lj_debug_shortname(buff, src); 595 lj_debug_shortname(buff, src, line);
595 msg = lj_str_pushvf(L, err2msg(em), argp); 596 msg = lj_strfmt_pushvf(L, err2msg(em), argp);
596 msg = lj_str_pushf(L, "%s:%d: %s", buff, line, msg); 597 msg = lj_strfmt_pushf(L, "%s:%d: %s", buff, line, msg);
597 if (tok) 598 if (tok)
598 lj_str_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tok); 599 lj_strfmt_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tok);
599 lj_err_throw(L, LUA_ERRSYNTAX); 600 lj_err_throw(L, LUA_ERRSYNTAX);
600} 601}
601 602
@@ -679,7 +680,7 @@ LJ_NOINLINE void lj_err_callerv(lua_State *L, ErrMsg em, ...)
679 const char *msg; 680 const char *msg;
680 va_list argp; 681 va_list argp;
681 va_start(argp, em); 682 va_start(argp, em);
682 msg = lj_str_pushvf(L, err2msg(em), argp); 683 msg = lj_strfmt_pushvf(L, err2msg(em), argp);
683 va_end(argp); 684 va_end(argp);
684 lj_err_callermsg(L, msg); 685 lj_err_callermsg(L, msg);
685} 686}
@@ -699,9 +700,9 @@ LJ_NORET LJ_NOINLINE static void err_argmsg(lua_State *L, int narg,
699 if (narg < 0 && narg > LUA_REGISTRYINDEX) 700 if (narg < 0 && narg > LUA_REGISTRYINDEX)
700 narg = (int)(L->top - L->base) + narg + 1; 701 narg = (int)(L->top - L->base) + narg + 1;
701 if (ftype && ftype[3] == 'h' && --narg == 0) /* Check for "method". */ 702 if (ftype && ftype[3] == 'h' && --narg == 0) /* Check for "method". */
702 msg = lj_str_pushf(L, err2msg(LJ_ERR_BADSELF), fname, msg); 703 msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADSELF), fname, msg);
703 else 704 else
704 msg = lj_str_pushf(L, err2msg(LJ_ERR_BADARG), narg, fname, msg); 705 msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADARG), narg, fname, msg);
705 lj_err_callermsg(L, msg); 706 lj_err_callermsg(L, msg);
706} 707}
707 708
@@ -711,7 +712,7 @@ LJ_NOINLINE void lj_err_argv(lua_State *L, int narg, ErrMsg em, ...)
711 const char *msg; 712 const char *msg;
712 va_list argp; 713 va_list argp;
713 va_start(argp, em); 714 va_start(argp, em);
714 msg = lj_str_pushvf(L, err2msg(em), argp); 715 msg = lj_strfmt_pushvf(L, err2msg(em), argp);
715 va_end(argp); 716 va_end(argp);
716 err_argmsg(L, narg, msg); 717 err_argmsg(L, narg, msg);
717} 718}
@@ -727,7 +728,7 @@ LJ_NOINLINE void lj_err_argtype(lua_State *L, int narg, const char *xname)
727{ 728{
728 TValue *o = narg < 0 ? L->top + narg : L->base + narg-1; 729 TValue *o = narg < 0 ? L->top + narg : L->base + narg-1;
729 const char *tname = o < L->top ? lj_typename(o) : lj_obj_typename[0]; 730 const char *tname = o < L->top ? lj_typename(o) : lj_obj_typename[0];
730 const char *msg = lj_str_pushf(L, err2msg(LJ_ERR_BADTYPE), xname, tname); 731 const char *msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADTYPE), xname, tname);
731 err_argmsg(L, narg, msg); 732 err_argmsg(L, narg, msg);
732} 733}
733 734
@@ -777,7 +778,7 @@ LUALIB_API int luaL_error(lua_State *L, const char *fmt, ...)
777 const char *msg; 778 const char *msg;
778 va_list argp; 779 va_list argp;
779 va_start(argp, fmt); 780 va_start(argp, fmt);
780 msg = lj_str_pushvf(L, fmt, argp); 781 msg = lj_strfmt_pushvf(L, fmt, argp);
781 va_end(argp); 782 va_end(argp);
782 lj_err_callermsg(L, msg); 783 lj_err_callermsg(L, msg);
783 return 0; /* unreachable */ 784 return 0; /* unreachable */
diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h
index fd46acd4..e62dc237 100644
--- a/src/lj_errmsg.h
+++ b/src/lj_errmsg.h
@@ -96,9 +96,7 @@ ERRDEF(STRPATX, "pattern too complex")
96ERRDEF(STRCAPI, "invalid capture index") 96ERRDEF(STRCAPI, "invalid capture index")
97ERRDEF(STRCAPN, "too many captures") 97ERRDEF(STRCAPN, "too many captures")
98ERRDEF(STRCAPU, "unfinished capture") 98ERRDEF(STRCAPU, "unfinished capture")
99ERRDEF(STRFMTO, "invalid option " LUA_QL("%%%c") " to " LUA_QL("format")) 99ERRDEF(STRFMT, "invalid option " LUA_QS " to " LUA_QL("format"))
100ERRDEF(STRFMTR, "invalid format (repeated flags)")
101ERRDEF(STRFMTW, "invalid format (width or precision too long)")
102ERRDEF(STRGSRV, "invalid replacement value (a %s)") 100ERRDEF(STRGSRV, "invalid replacement value (a %s)")
103ERRDEF(BADMODN, "name conflict for module " LUA_QS) 101ERRDEF(BADMODN, "name conflict for module " LUA_QS)
104#if LJ_HASJIT 102#if LJ_HASJIT
@@ -117,7 +115,6 @@ ERRDEF(JITOPT, "unknown or malformed optimization flag " LUA_QS)
117/* Lexer/parser errors. */ 115/* Lexer/parser errors. */
118ERRDEF(XMODE, "attempt to load chunk with wrong mode") 116ERRDEF(XMODE, "attempt to load chunk with wrong mode")
119ERRDEF(XNEAR, "%s near " LUA_QS) 117ERRDEF(XNEAR, "%s near " LUA_QS)
120ERRDEF(XELEM, "lexical element too long")
121ERRDEF(XLINES, "chunk has too many lines") 118ERRDEF(XLINES, "chunk has too many lines")
122ERRDEF(XLEVELS, "chunk has too many syntax levels") 119ERRDEF(XLEVELS, "chunk has too many syntax levels")
123ERRDEF(XNUMBER, "malformed number") 120ERRDEF(XNUMBER, "malformed number")
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
index 35e2e88e..fcc46319 100644
--- a/src/lj_ffrecord.c
+++ b/src/lj_ffrecord.c
@@ -27,6 +27,7 @@
27#include "lj_dispatch.h" 27#include "lj_dispatch.h"
28#include "lj_vm.h" 28#include "lj_vm.h"
29#include "lj_strscan.h" 29#include "lj_strscan.h"
30#include "lj_strfmt.h"
30 31
31/* Some local macros to save typing. Undef'd at the end. */ 32/* Some local macros to save typing. Undef'd at the end. */
32#define IR(ref) (&J->cur.ir[(ref)]) 33#define IR(ref) (&J->cur.ir[(ref)])
@@ -79,10 +80,7 @@ static GCstr *argv2str(jit_State *J, TValue *o)
79 GCstr *s; 80 GCstr *s;
80 if (!tvisnumber(o)) 81 if (!tvisnumber(o))
81 lj_trace_err(J, LJ_TRERR_BADTYPE); 82 lj_trace_err(J, LJ_TRERR_BADTYPE);
82 if (tvisint(o)) 83 s = lj_strfmt_number(J->L, o);
83 s = lj_str_fromint(J->L, intV(o));
84 else
85 s = lj_str_fromnum(J->L, &o->n);
86 setstrV(J->L, o, s); 84 setstrV(J->L, o, s);
87 return s; 85 return s;
88 } 86 }
@@ -121,6 +119,13 @@ static void LJ_FASTCALL recff_c(jit_State *J, RecordFFData *rd)
121 UNUSED(rd); 119 UNUSED(rd);
122} 120}
123 121
122/* Emit BUFHDR for the global temporary buffer. */
123static TRef recff_bufhdr(jit_State *J)
124{
125 return emitir(IRT(IR_BUFHDR, IRT_P32),
126 lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET);
127}
128
124/* -- Base library fast functions ----------------------------------------- */ 129/* -- Base library fast functions ----------------------------------------- */
125 130
126static void LJ_FASTCALL recff_assert(jit_State *J, RecordFFData *rd) 131static void LJ_FASTCALL recff_assert(jit_State *J, RecordFFData *rd)
@@ -336,11 +341,12 @@ static void LJ_FASTCALL recff_tostring(jit_State *J, RecordFFData *rd)
336 if (tref_isstr(tr)) { 341 if (tref_isstr(tr)) {
337 /* Ignore __tostring in the string base metatable. */ 342 /* Ignore __tostring in the string base metatable. */
338 /* Pass on result in J->base[0]. */ 343 /* Pass on result in J->base[0]. */
339 } else if (!recff_metacall(J, rd, MM_tostring)) { 344 } else if (tr && !recff_metacall(J, rd, MM_tostring)) {
340 if (tref_isnumber(tr)) { 345 if (tref_isnumber(tr)) {
341 J->base[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0); 346 J->base[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr,
347 tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT);
342 } else if (tref_ispri(tr)) { 348 } else if (tref_ispri(tr)) {
343 J->base[0] = lj_ir_kstr(J, strV(&J->fn->c.upvalue[tref_type(tr)])); 349 J->base[0] = lj_ir_kstr(J, lj_strfmt_obj(J->L, &rd->argv[0]));
344 } else { 350 } else {
345 recff_nyiu(J); 351 recff_nyiu(J);
346 } 352 }
@@ -528,14 +534,6 @@ static void LJ_FASTCALL recff_math_modf(jit_State *J, RecordFFData *rd)
528 rd->nres = 2; 534 rd->nres = 2;
529} 535}
530 536
531static void LJ_FASTCALL recff_math_degrad(jit_State *J, RecordFFData *rd)
532{
533 TRef tr = lj_ir_tonum(J, J->base[0]);
534 TRef trm = lj_ir_knum(J, numV(&J->fn->c.upvalue[0]));
535 J->base[0] = emitir(IRTN(IR_MUL), tr, trm);
536 UNUSED(rd);
537}
538
539static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd) 537static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd)
540{ 538{
541 TRef tr = lj_ir_tonum(J, J->base[0]); 539 TRef tr = lj_ir_tonum(J, J->base[0]);
@@ -592,48 +590,105 @@ static void LJ_FASTCALL recff_math_random(jit_State *J, RecordFFData *rd)
592 590
593/* -- Bit library fast functions ------------------------------------------ */ 591/* -- Bit library fast functions ------------------------------------------ */
594 592
595/* Record unary bit.tobit, bit.bnot, bit.bswap. */ 593/* Record bit.tobit. */
594static void LJ_FASTCALL recff_bit_tobit(jit_State *J, RecordFFData *rd)
595{
596 TRef tr = J->base[0];
597#if LJ_HASFFI
598 if (tref_iscdata(tr)) { recff_bit64_tobit(J, rd); return; }
599#endif
600 J->base[0] = lj_opt_narrow_tobit(J, tr);
601 UNUSED(rd);
602}
603
604/* Record unary bit.bnot, bit.bswap. */
596static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd) 605static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd)
597{ 606{
598 TRef tr = lj_opt_narrow_tobit(J, J->base[0]); 607#if LJ_HASFFI
599 J->base[0] = (rd->data == IR_TOBIT) ? tr : emitir(IRTI(rd->data), tr, 0); 608 if (recff_bit64_unary(J, rd))
609 return;
610#endif
611 J->base[0] = emitir(IRTI(rd->data), lj_opt_narrow_tobit(J, J->base[0]), 0);
600} 612}
601 613
602/* Record N-ary bit.band, bit.bor, bit.bxor. */ 614/* Record N-ary bit.band, bit.bor, bit.bxor. */
603static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd) 615static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd)
604{ 616{
605 TRef tr = lj_opt_narrow_tobit(J, J->base[0]); 617#if LJ_HASFFI
606 uint32_t op = rd->data; 618 if (recff_bit64_nary(J, rd))
607 BCReg i; 619 return;
608 for (i = 1; J->base[i] != 0; i++) 620#endif
609 tr = emitir(IRTI(op), tr, lj_opt_narrow_tobit(J, J->base[i])); 621 {
610 J->base[0] = tr; 622 TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
623 uint32_t ot = IRTI(rd->data);
624 BCReg i;
625 for (i = 1; J->base[i] != 0; i++)
626 tr = emitir(ot, tr, lj_opt_narrow_tobit(J, J->base[i]));
627 J->base[0] = tr;
628 }
611} 629}
612 630
613/* Record bit shifts. */ 631/* Record bit shifts. */
614static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd) 632static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd)
615{ 633{
616 TRef tr = lj_opt_narrow_tobit(J, J->base[0]); 634#if LJ_HASFFI
617 TRef tsh = lj_opt_narrow_tobit(J, J->base[1]); 635 if (recff_bit64_shift(J, rd))
618 IROp op = (IROp)rd->data; 636 return;
619 if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && 637#endif
620 !tref_isk(tsh)) 638 {
621 tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31)); 639 TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
640 TRef tsh = lj_opt_narrow_tobit(J, J->base[1]);
641 IROp op = (IROp)rd->data;
642 if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
643 !tref_isk(tsh))
644 tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31));
622#ifdef LJ_TARGET_UNIFYROT 645#ifdef LJ_TARGET_UNIFYROT
623 if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) { 646 if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) {
624 op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR; 647 op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR;
625 tsh = emitir(IRTI(IR_NEG), tsh, tsh); 648 tsh = emitir(IRTI(IR_NEG), tsh, tsh);
649 }
650#endif
651 J->base[0] = emitir(IRTI(op), tr, tsh);
626 } 652 }
653}
654
655static void LJ_FASTCALL recff_bit_tohex(jit_State *J, RecordFFData *rd)
656{
657#if LJ_HASFFI
658 TRef hdr = recff_bufhdr(J);
659 TRef tr = recff_bit64_tohex(J, rd, hdr);
660 J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
661#else
662 recff_nyiu(J); /* Don't bother working around this NYI. */
627#endif 663#endif
628 J->base[0] = emitir(IRTI(op), tr, tsh);
629} 664}
630 665
631/* -- String library fast functions --------------------------------------- */ 666/* -- String library fast functions --------------------------------------- */
632 667
633static void LJ_FASTCALL recff_string_len(jit_State *J, RecordFFData *rd) 668/* Specialize to relative starting position for string. */
669static TRef recff_string_start(jit_State *J, GCstr *s, int32_t *st, TRef tr,
670 TRef trlen, TRef tr0)
634{ 671{
635 J->base[0] = emitir(IRTI(IR_FLOAD), lj_ir_tostr(J, J->base[0]), IRFL_STR_LEN); 672 int32_t start = *st;
636 UNUSED(rd); 673 if (start < 0) {
674 emitir(IRTGI(IR_LT), tr, tr0);
675 tr = emitir(IRTI(IR_ADD), trlen, tr);
676 start = start + (int32_t)s->len;
677 emitir(start < 0 ? IRTGI(IR_LT) : IRTGI(IR_GE), tr, tr0);
678 if (start < 0) {
679 tr = tr0;
680 start = 0;
681 }
682 } else if (start == 0) {
683 emitir(IRTGI(IR_EQ), tr, tr0);
684 tr = tr0;
685 } else {
686 tr = emitir(IRTI(IR_ADD), tr, lj_ir_kint(J, -1));
687 emitir(IRTGI(IR_GE), tr, tr0);
688 start--;
689 }
690 *st = start;
691 return tr;
637} 692}
638 693
639/* Handle string.byte (rd->data = 0) and string.sub (rd->data = 1). */ 694/* Handle string.byte (rd->data = 0) and string.sub (rd->data = 1). */
@@ -681,29 +736,11 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
681 } else if ((MSize)end <= str->len) { 736 } else if ((MSize)end <= str->len) {
682 emitir(IRTGI(IR_ULE), trend, trlen); 737 emitir(IRTGI(IR_ULE), trend, trlen);
683 } else { 738 } else {
684 emitir(IRTGI(IR_GT), trend, trlen); 739 emitir(IRTGI(IR_UGT), trend, trlen);
685 end = (int32_t)str->len; 740 end = (int32_t)str->len;
686 trend = trlen; 741 trend = trlen;
687 } 742 }
688 if (start < 0) { 743 trstart = recff_string_start(J, str, &start, trstart, trlen, tr0);
689 emitir(IRTGI(IR_LT), trstart, tr0);
690 trstart = emitir(IRTI(IR_ADD), trlen, trstart);
691 start = start+(int32_t)str->len;
692 emitir(start < 0 ? IRTGI(IR_LT) : IRTGI(IR_GE), trstart, tr0);
693 if (start < 0) {
694 trstart = tr0;
695 start = 0;
696 }
697 } else {
698 if (start == 0) {
699 emitir(IRTGI(IR_EQ), trstart, tr0);
700 trstart = tr0;
701 } else {
702 trstart = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, -1));
703 emitir(IRTGI(IR_GE), trstart, tr0);
704 start--;
705 }
706 }
707 if (rd->data) { /* Return string.sub result. */ 744 if (rd->data) { /* Return string.sub result. */
708 if (end - start >= 0) { 745 if (end - start >= 0) {
709 /* Also handle empty range here, to avoid extra traces. */ 746 /* Also handle empty range here, to avoid extra traces. */
@@ -713,7 +750,7 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
713 J->base[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen); 750 J->base[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen);
714 } else { /* Range underflow: return empty string. */ 751 } else { /* Range underflow: return empty string. */
715 emitir(IRTGI(IR_LT), trend, trstart); 752 emitir(IRTGI(IR_LT), trend, trstart);
716 J->base[0] = lj_ir_kstr(J, lj_str_new(J->L, strdata(str), 0)); 753 J->base[0] = lj_ir_kstr(J, &J2G(J)->strempty);
717 } 754 }
718 } else { /* Return string.byte result(s). */ 755 } else { /* Return string.byte result(s). */
719 ptrdiff_t i, len = end - start; 756 ptrdiff_t i, len = end - start;
@@ -735,48 +772,196 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
735 } 772 }
736} 773}
737 774
738/* -- Table library fast functions ---------------------------------------- */ 775static void LJ_FASTCALL recff_string_char(jit_State *J, RecordFFData *rd)
739
740static void LJ_FASTCALL recff_table_getn(jit_State *J, RecordFFData *rd)
741{ 776{
742 if (tref_istab(J->base[0])) 777 TRef k255 = lj_ir_kint(J, 255);
743 J->base[0] = lj_ir_call(J, IRCALL_lj_tab_len, J->base[0]); 778 BCReg i;
744 /* else: Interpreter will throw. */ 779 for (i = 0; J->base[i] != 0; i++) { /* Convert char values to strings. */
780 TRef tr = lj_opt_narrow_toint(J, J->base[i]);
781 emitir(IRTGI(IR_ULE), tr, k255);
782 J->base[i] = emitir(IRT(IR_TOSTR, IRT_STR), tr, IRTOSTR_CHAR);
783 }
784 if (i > 1) { /* Concatenate the strings, if there's more than one. */
785 TRef hdr = recff_bufhdr(J), tr = hdr;
786 for (i = 0; J->base[i] != 0; i++)
787 tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, J->base[i]);
788 J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
789 }
745 UNUSED(rd); 790 UNUSED(rd);
746} 791}
747 792
748static void LJ_FASTCALL recff_table_remove(jit_State *J, RecordFFData *rd) 793static void LJ_FASTCALL recff_string_rep(jit_State *J, RecordFFData *rd)
749{ 794{
750 TRef tab = J->base[0]; 795 TRef str = lj_ir_tostr(J, J->base[0]);
751 rd->nres = 0; 796 TRef rep = lj_opt_narrow_toint(J, J->base[1]);
752 if (tref_istab(tab)) { 797 TRef hdr, tr, str2 = 0;
753 if (tref_isnil(J->base[1])) { /* Simple pop: t[#t] = nil */ 798 if (!tref_isnil(J->base[2])) {
754 TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, tab); 799 TRef sep = lj_ir_tostr(J, J->base[2]);
755 GCtab *t = tabV(&rd->argv[0]); 800 int32_t vrep = argv2int(J, &rd->argv[1]);
756 MSize len = lj_tab_len(t); 801 emitir(IRTGI(vrep > 1 ? IR_GT : IR_LE), rep, lj_ir_kint(J, 1));
757 emitir(IRTGI(len ? IR_NE : IR_EQ), trlen, lj_ir_kint(J, 0)); 802 if (vrep > 1) {
758 if (len) { 803 TRef hdr2 = recff_bufhdr(J);
759 RecordIndex ix; 804 TRef tr2 = emitir(IRT(IR_BUFPUT, IRT_P32), hdr2, sep);
760 ix.tab = tab; 805 tr2 = emitir(IRT(IR_BUFPUT, IRT_P32), tr2, str);
761 ix.key = trlen; 806 str2 = emitir(IRT(IR_BUFSTR, IRT_STR), tr2, hdr2);
762 settabV(J->L, &ix.tabv, t); 807 }
763 setintV(&ix.keyv, len); 808 }
764 ix.idxchain = 0; 809 tr = hdr = recff_bufhdr(J);
765 if (results_wanted(J) != 0) { /* Specialize load only if needed. */ 810 if (str2) {
766 ix.val = 0; 811 tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, str);
767 J->base[0] = lj_record_idx(J, &ix); /* Load previous value. */ 812 str = str2;
768 rd->nres = 1; 813 rep = emitir(IRTI(IR_ADD), rep, lj_ir_kint(J, -1));
769 /* Assumes ix.key/ix.tab is not modified for raw lj_record_idx(). */ 814 }
770 } 815 tr = lj_ir_call(J, IRCALL_lj_buf_putstr_rep, tr, str, rep);
771 ix.val = TREF_NIL; 816 J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
772 lj_record_idx(J, &ix); /* Remove value. */ 817}
818
819static void LJ_FASTCALL recff_string_op(jit_State *J, RecordFFData *rd)
820{
821 TRef str = lj_ir_tostr(J, J->base[0]);
822 TRef hdr = recff_bufhdr(J);
823 TRef tr = lj_ir_call(J, rd->data, hdr, str);
824 J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
825}
826
827static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd)
828{
829 TRef trstr = lj_ir_tostr(J, J->base[0]);
830 TRef trpat = lj_ir_tostr(J, J->base[1]);
831 TRef trlen = emitir(IRTI(IR_FLOAD), trstr, IRFL_STR_LEN);
832 TRef tr0 = lj_ir_kint(J, 0);
833 TRef trstart;
834 GCstr *str = argv2str(J, &rd->argv[0]);
835 GCstr *pat = argv2str(J, &rd->argv[1]);
836 int32_t start;
837 J->needsnap = 1;
838 if (tref_isnil(J->base[2])) {
839 trstart = lj_ir_kint(J, 1);
840 start = 1;
841 } else {
842 trstart = lj_opt_narrow_toint(J, J->base[2]);
843 start = argv2int(J, &rd->argv[2]);
844 }
845 trstart = recff_string_start(J, str, &start, trstart, trlen, tr0);
846 if ((MSize)start <= str->len) {
847 emitir(IRTGI(IR_ULE), trstart, trlen);
848 } else {
849 emitir(IRTGI(IR_UGT), trstart, trlen);
850#if LJ_52
851 J->base[0] = TREF_NIL;
852 return;
853#else
854 trstart = trlen;
855 start = str->len;
856#endif
857 }
858 /* Fixed arg or no pattern matching chars? (Specialized to pattern string.) */
859 if ((J->base[2] && tref_istruecond(J->base[3])) ||
860 (emitir(IRTG(IR_EQ, IRT_STR), trpat, lj_ir_kstr(J, pat)),
861 !lj_str_haspattern(pat))) { /* Search for fixed string. */
862 TRef trsptr = emitir(IRT(IR_STRREF, IRT_P32), trstr, trstart);
863 TRef trpptr = emitir(IRT(IR_STRREF, IRT_P32), trpat, tr0);
864 TRef trslen = emitir(IRTI(IR_SUB), trlen, trstart);
865 TRef trplen = emitir(IRTI(IR_FLOAD), trpat, IRFL_STR_LEN);
866 TRef tr = lj_ir_call(J, IRCALL_lj_str_find, trsptr, trpptr, trslen, trplen);
867 TRef trp0 = lj_ir_kkptr(J, NULL);
868 if (lj_str_find(strdata(str)+(MSize)start, strdata(pat),
869 str->len-(MSize)start, pat->len)) {
870 TRef pos;
871 emitir(IRTG(IR_NE, IRT_P32), tr, trp0);
872 pos = emitir(IRTI(IR_SUB), tr, emitir(IRT(IR_STRREF, IRT_P32), trstr, tr0));
873 J->base[0] = emitir(IRTI(IR_ADD), pos, lj_ir_kint(J, 1));
874 J->base[1] = emitir(IRTI(IR_ADD), pos, trplen);
875 rd->nres = 2;
876 } else {
877 emitir(IRTG(IR_EQ, IRT_P32), tr, trp0);
878 J->base[0] = TREF_NIL;
879 }
880 } else { /* Search for pattern. */
881 recff_nyiu(J);
882 }
883}
884
885static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
886{
887 TRef trfmt = lj_ir_tostr(J, J->base[0]);
888 GCstr *fmt = argv2str(J, &rd->argv[0]);
889 int arg = 1;
890 TRef hdr, tr;
891 FormatState fs;
892 SFormat sf;
893 /* Specialize to the format string. */
894 emitir(IRTG(IR_EQ, IRT_STR), trfmt, lj_ir_kstr(J, fmt));
895 tr = hdr = recff_bufhdr(J);
896 lj_strfmt_init(&fs, strdata(fmt), fmt->len);
897 while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) { /* Parse format. */
898 TRef tra = sf == STRFMT_LIT ? 0 : J->base[arg++];
899 TRef trsf = lj_ir_kint(J, (int32_t)sf);
900 IRCallID id;
901 switch (STRFMT_TYPE(sf)) {
902 case STRFMT_LIT:
903 tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr,
904 lj_ir_kstr(J, lj_str_new(J->L, fs.str, fs.len)));
905 break;
906 case STRFMT_INT:
907 id = IRCALL_lj_strfmt_putfnum_int;
908 handle_int:
909 if (!tref_isinteger(tra))
910 goto handle_num;
911 if (sf == STRFMT_INT) { /* Shortcut for plain %d. */
912 tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr,
913 emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_INT));
914 } else {
915#if LJ_HASFFI
916 tra = emitir(IRT(IR_CONV, IRT_U64), tra,
917 (IRT_INT|(IRT_U64<<5)|IRCONV_SEXT));
918 tr = lj_ir_call(J, IRCALL_lj_strfmt_putfxint, tr, trsf, tra);
919 lj_needsplit(J);
920#else
921 recff_nyiu(J); /* Don't bother working around this NYI. */
922#endif
773 } 923 }
774 } else { /* Complex case: remove in the middle. */ 924 break;
925 case STRFMT_UINT:
926 id = IRCALL_lj_strfmt_putfnum_uint;
927 goto handle_int;
928 case STRFMT_NUM:
929 id = IRCALL_lj_strfmt_putfnum;
930 handle_num:
931 tra = lj_ir_tonum(J, tra);
932 tr = lj_ir_call(J, id, tr, trsf, tra);
933 if (LJ_SOFTFP) lj_needsplit(J);
934 break;
935 case STRFMT_STR:
936 if (!tref_isstr(tra))
937 recff_nyiu(J); /* NYI: __tostring and non-string types for %s. */
938 if (sf == STRFMT_STR) /* Shortcut for plain %s. */
939 tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, tra);
940 else if ((sf & STRFMT_T_QUOTED))
941 tr = lj_ir_call(J, IRCALL_lj_strfmt_putquoted, tr, tra);
942 else
943 tr = lj_ir_call(J, IRCALL_lj_strfmt_putfstr, tr, trsf, tra);
944 break;
945 case STRFMT_CHAR:
946 tra = lj_opt_narrow_toint(J, tra);
947 if (sf == STRFMT_CHAR) /* Shortcut for plain %c. */
948 tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr,
949 emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_CHAR));
950 else
951 tr = lj_ir_call(J, IRCALL_lj_strfmt_putfchar, tr, trsf, tra);
952 break;
953 case STRFMT_PTR: /* NYI */
954 case STRFMT_ERR:
955 default:
775 recff_nyiu(J); 956 recff_nyiu(J);
957 break;
776 } 958 }
777 } /* else: Interpreter will throw. */ 959 }
960 J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
778} 961}
779 962
963/* -- Table library fast functions ---------------------------------------- */
964
780static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd) 965static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd)
781{ 966{
782 RecordIndex ix; 967 RecordIndex ix;
@@ -798,6 +983,25 @@ static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd)
798 } /* else: Interpreter will throw. */ 983 } /* else: Interpreter will throw. */
799} 984}
800 985
986static void LJ_FASTCALL recff_table_concat(jit_State *J, RecordFFData *rd)
987{
988 TRef tab = J->base[0];
989 if (tref_istab(tab)) {
990 TRef sep = !tref_isnil(J->base[1]) ?
991 lj_ir_tostr(J, J->base[1]) : lj_ir_knull(J, IRT_STR);
992 TRef tri = (J->base[1] && !tref_isnil(J->base[2])) ?
993 lj_opt_narrow_toint(J, J->base[2]) : lj_ir_kint(J, 1);
994 TRef tre = (J->base[1] && J->base[2] && !tref_isnil(J->base[3])) ?
995 lj_opt_narrow_toint(J, J->base[3]) :
996 lj_ir_call(J, IRCALL_lj_tab_len, tab);
997 TRef hdr = recff_bufhdr(J);
998 TRef tr = lj_ir_call(J, IRCALL_lj_buf_puttab, hdr, tab, sep, tri, tre);
999 emitir(IRTG(IR_NE, IRT_PTR), tr, lj_ir_kptr(J, NULL));
1000 J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
1001 } /* else: Interpreter will throw. */
1002 UNUSED(rd);
1003}
1004
801/* -- I/O library fast functions ------------------------------------------ */ 1005/* -- I/O library fast functions ------------------------------------------ */
802 1006
803/* Get FILE* for I/O function. Any I/O error aborts recording, so there's 1007/* Get FILE* for I/O function. Any I/O error aborts recording, so there's
@@ -833,7 +1037,10 @@ static void LJ_FASTCALL recff_io_write(jit_State *J, RecordFFData *rd)
833 TRef buf = emitir(IRT(IR_STRREF, IRT_P32), str, zero); 1037 TRef buf = emitir(IRT(IR_STRREF, IRT_P32), str, zero);
834 TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN); 1038 TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN);
835 if (tref_isk(len) && IR(tref_ref(len))->i == 1) { 1039 if (tref_isk(len) && IR(tref_ref(len))->i == 1) {
836 TRef tr = emitir(IRT(IR_XLOAD, IRT_U8), buf, IRXLOAD_READONLY); 1040 IRIns *irs = IR(tref_ref(str));
1041 TRef tr = (irs->o == IR_TOSTR && irs->op2 == IRTOSTR_CHAR) ?
1042 irs->op1 :
1043 emitir(IRT(IR_XLOAD, IRT_U8), buf, IRXLOAD_READONLY);
837 tr = lj_ir_call(J, IRCALL_fputc, tr, fp); 1044 tr = lj_ir_call(J, IRCALL_fputc, tr, fp);
838 if (results_wanted(J) != 0) /* Check result only if not ignored. */ 1045 if (results_wanted(J) != 0) /* Check result only if not ignored. */
839 emitir(IRTGI(IR_NE), tr, lj_ir_kint(J, -1)); 1046 emitir(IRTGI(IR_NE), tr, lj_ir_kint(J, -1));
diff --git a/src/lj_gc.c b/src/lj_gc.c
index 79f8b720..c9eaf21a 100644
--- a/src/lj_gc.c
+++ b/src/lj_gc.c
@@ -12,6 +12,7 @@
12#include "lj_obj.h" 12#include "lj_obj.h"
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_buf.h"
15#include "lj_str.h" 16#include "lj_str.h"
16#include "lj_tab.h" 17#include "lj_tab.h"
17#include "lj_func.h" 18#include "lj_func.h"
@@ -348,15 +349,6 @@ static size_t gc_propagate_gray(global_State *g)
348 349
349/* -- Sweep phase --------------------------------------------------------- */ 350/* -- Sweep phase --------------------------------------------------------- */
350 351
351/* Try to shrink some common data structures. */
352static void gc_shrink(global_State *g, lua_State *L)
353{
354 if (g->strnum <= (g->strmask >> 2) && g->strmask > LJ_MIN_STRTAB*2-1)
355 lj_str_resize(L, g->strmask >> 1); /* Shrink string table. */
356 if (g->tmpbuf.sz > LJ_MIN_SBUF*2)
357 lj_str_resizebuf(L, &g->tmpbuf, g->tmpbuf.sz >> 1); /* Shrink temp buf. */
358}
359
360/* Type of GC free functions. */ 352/* Type of GC free functions. */
361typedef void (LJ_FASTCALL *GCFreeFunc)(global_State *g, GCobj *o); 353typedef void (LJ_FASTCALL *GCFreeFunc)(global_State *g, GCobj *o);
362 354
@@ -591,6 +583,8 @@ static void atomic(global_State *g, lua_State *L)
591 /* All marking done, clear weak tables. */ 583 /* All marking done, clear weak tables. */
592 gc_clearweak(gcref(g->gc.weak)); 584 gc_clearweak(gcref(g->gc.weak));
593 585
586 lj_buf_shrink(L, &g->tmpbuf); /* Shrink temp buffer. */
587
594 /* Prepare for sweep phase. */ 588 /* Prepare for sweep phase. */
595 g->gc.currentwhite = (uint8_t)otherwhite(g); /* Flip current white. */ 589 g->gc.currentwhite = (uint8_t)otherwhite(g); /* Flip current white. */
596 g->strempty.marked = g->gc.currentwhite; 590 g->strempty.marked = g->gc.currentwhite;
@@ -631,7 +625,8 @@ static size_t gc_onestep(lua_State *L)
631 MSize old = g->gc.total; 625 MSize old = g->gc.total;
632 setmref(g->gc.sweep, gc_sweep(g, mref(g->gc.sweep, GCRef), GCSWEEPMAX)); 626 setmref(g->gc.sweep, gc_sweep(g, mref(g->gc.sweep, GCRef), GCSWEEPMAX));
633 if (gcref(*mref(g->gc.sweep, GCRef)) == NULL) { 627 if (gcref(*mref(g->gc.sweep, GCRef)) == NULL) {
634 gc_shrink(g, L); 628 if (g->strnum <= (g->strmask >> 2) && g->strmask > LJ_MIN_STRTAB*2-1)
629 lj_str_resize(L, g->strmask >> 1); /* Shrink string table. */
635 if (gcref(g->gc.mmudata)) { /* Need any finalizations? */ 630 if (gcref(g->gc.mmudata)) { /* Need any finalizations? */
636 g->gc.state = GCSfinalize; 631 g->gc.state = GCSfinalize;
637 } else { /* Otherwise skip this phase to help the JIT. */ 632 } else { /* Otherwise skip this phase to help the JIT. */
diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c
index 284195a1..4dbc9593 100644
--- a/src/lj_gdbjit.c
+++ b/src/lj_gdbjit.c
@@ -14,6 +14,8 @@
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_debug.h" 15#include "lj_debug.h"
16#include "lj_frame.h" 16#include "lj_frame.h"
17#include "lj_buf.h"
18#include "lj_strfmt.h"
17#include "lj_jit.h" 19#include "lj_jit.h"
18#include "lj_dispatch.h" 20#include "lj_dispatch.h"
19 21
@@ -426,16 +428,6 @@ static void gdbjit_catnum(GDBJITctx *ctx, uint32_t n)
426 *ctx->p++ = '0' + n; 428 *ctx->p++ = '0' + n;
427} 429}
428 430
429/* Add a ULEB128 value. */
430static void gdbjit_uleb128(GDBJITctx *ctx, uint32_t v)
431{
432 uint8_t *p = ctx->p;
433 for (; v >= 0x80; v >>= 7)
434 *p++ = (uint8_t)((v & 0x7f) | 0x80);
435 *p++ = (uint8_t)v;
436 ctx->p = p;
437}
438
439/* Add a SLEB128 value. */ 431/* Add a SLEB128 value. */
440static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v) 432static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v)
441{ 433{
@@ -452,7 +444,7 @@ static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v)
452#define DU16(x) (*(uint16_t *)p = (x), p += 2) 444#define DU16(x) (*(uint16_t *)p = (x), p += 2)
453#define DU32(x) (*(uint32_t *)p = (x), p += 4) 445#define DU32(x) (*(uint32_t *)p = (x), p += 4)
454#define DADDR(x) (*(uintptr_t *)p = (x), p += sizeof(uintptr_t)) 446#define DADDR(x) (*(uintptr_t *)p = (x), p += sizeof(uintptr_t))
455#define DUV(x) (ctx->p = p, gdbjit_uleb128(ctx, (x)), p = ctx->p) 447#define DUV(x) (p = (uint8_t *)lj_strfmt_wuleb128((char *)p, (x)))
456#define DSV(x) (ctx->p = p, gdbjit_sleb128(ctx, (x)), p = ctx->p) 448#define DSV(x) (ctx->p = p, gdbjit_sleb128(ctx, (x)), p = ctx->p)
457#define DSTR(str) (ctx->p = p, gdbjit_strz(ctx, (str)), p = ctx->p) 449#define DSTR(str) (ctx->p = p, gdbjit_strz(ctx, (str)), p = ctx->p)
458#define DALIGNNOP(s) while ((uintptr_t)p & ((s)-1)) *p++ = DW_CFA_nop 450#define DALIGNNOP(s) while ((uintptr_t)p & ((s)-1)) *p++ = DW_CFA_nop
diff --git a/src/lj_ir.c b/src/lj_ir.c
index e1a59105..fc2fc2ce 100644
--- a/src/lj_ir.c
+++ b/src/lj_ir.c
@@ -15,6 +15,7 @@
15#if LJ_HASJIT 15#if LJ_HASJIT
16 16
17#include "lj_gc.h" 17#include "lj_gc.h"
18#include "lj_buf.h"
18#include "lj_str.h" 19#include "lj_str.h"
19#include "lj_tab.h" 20#include "lj_tab.h"
20#include "lj_ir.h" 21#include "lj_ir.h"
@@ -29,6 +30,7 @@
29#endif 30#endif
30#include "lj_vm.h" 31#include "lj_vm.h"
31#include "lj_strscan.h" 32#include "lj_strscan.h"
33#include "lj_strfmt.h"
32#include "lj_lib.h" 34#include "lj_lib.h"
33 35
34/* Some local macros to save typing. Undef'd at the end. */ 36/* Some local macros to save typing. Undef'd at the end. */
@@ -443,7 +445,8 @@ TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr)
443 if (!tref_isstr(tr)) { 445 if (!tref_isstr(tr)) {
444 if (!tref_isnumber(tr)) 446 if (!tref_isnumber(tr))
445 lj_trace_err(J, LJ_TRERR_BADTYPE); 447 lj_trace_err(J, LJ_TRERR_BADTYPE);
446 tr = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0); 448 tr = emitir(IRT(IR_TOSTR, IRT_STR), tr,
449 tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT);
447 } 450 }
448 return tr; 451 return tr;
449} 452}
diff --git a/src/lj_ir.h b/src/lj_ir.h
index a9824325..841153d8 100644
--- a/src/lj_ir.h
+++ b/src/lj_ir.h
@@ -120,6 +120,11 @@
120 _(CNEW, AW, ref, ref) \ 120 _(CNEW, AW, ref, ref) \
121 _(CNEWI, NW, ref, ref) /* CSE is ok, not marked as A. */ \ 121 _(CNEWI, NW, ref, ref) /* CSE is ok, not marked as A. */ \
122 \ 122 \
123 /* Buffer operations. */ \
124 _(BUFHDR, L , ref, lit) \
125 _(BUFPUT, L , ref, ref) \
126 _(BUFSTR, A , ref, ref) \
127 \
123 /* Barriers. */ \ 128 /* Barriers. */ \
124 _(TBAR, S , ref, ___) \ 129 _(TBAR, S , ref, ___) \
125 _(OBAR, S , ref, ref) \ 130 _(OBAR, S , ref, ref) \
@@ -128,7 +133,7 @@
128 /* Type conversions. */ \ 133 /* Type conversions. */ \
129 _(CONV, NW, ref, lit) \ 134 _(CONV, NW, ref, lit) \
130 _(TOBIT, N , ref, ref) \ 135 _(TOBIT, N , ref, ref) \
131 _(TOSTR, N , ref, ___) \ 136 _(TOSTR, N , ref, lit) \
132 _(STRTO, N , ref, ___) \ 137 _(STRTO, N , ref, ___) \
133 \ 138 \
134 /* Calls. */ \ 139 /* Calls. */ \
@@ -221,13 +226,16 @@ IRFLDEF(FLENUM)
221#define IRXLOAD_VOLATILE 2 /* Load from volatile data. */ 226#define IRXLOAD_VOLATILE 2 /* Load from volatile data. */
222#define IRXLOAD_UNALIGNED 4 /* Unaligned load. */ 227#define IRXLOAD_UNALIGNED 4 /* Unaligned load. */
223 228
229/* BUFHDR mode, stored in op2. */
230#define IRBUFHDR_RESET 0 /* Reset buffer. */
231#define IRBUFHDR_APPEND 1 /* Append to buffer. */
232
224/* CONV mode, stored in op2. */ 233/* CONV mode, stored in op2. */
225#define IRCONV_SRCMASK 0x001f /* Source IRType. */ 234#define IRCONV_SRCMASK 0x001f /* Source IRType. */
226#define IRCONV_DSTMASK 0x03e0 /* Dest. IRType (also in ir->t). */ 235#define IRCONV_DSTMASK 0x03e0 /* Dest. IRType (also in ir->t). */
227#define IRCONV_DSH 5 236#define IRCONV_DSH 5
228#define IRCONV_NUM_INT ((IRT_NUM<<IRCONV_DSH)|IRT_INT) 237#define IRCONV_NUM_INT ((IRT_NUM<<IRCONV_DSH)|IRT_INT)
229#define IRCONV_INT_NUM ((IRT_INT<<IRCONV_DSH)|IRT_NUM) 238#define IRCONV_INT_NUM ((IRT_INT<<IRCONV_DSH)|IRT_NUM)
230#define IRCONV_TRUNC 0x0400 /* Truncate number to integer. */
231#define IRCONV_SEXT 0x0800 /* Sign-extend integer to integer. */ 239#define IRCONV_SEXT 0x0800 /* Sign-extend integer to integer. */
232#define IRCONV_MODEMASK 0x0fff 240#define IRCONV_MODEMASK 0x0fff
233#define IRCONV_CONVMASK 0xf000 241#define IRCONV_CONVMASK 0xf000
@@ -238,6 +246,11 @@ IRFLDEF(FLENUM)
238#define IRCONV_INDEX (2<<IRCONV_CSH) /* Check + special backprop rules. */ 246#define IRCONV_INDEX (2<<IRCONV_CSH) /* Check + special backprop rules. */
239#define IRCONV_CHECK (3<<IRCONV_CSH) /* Number checked for integerness. */ 247#define IRCONV_CHECK (3<<IRCONV_CSH) /* Number checked for integerness. */
240 248
249/* TOSTR mode, stored in op2. */
250#define IRTOSTR_INT 0 /* Convert integer to string. */
251#define IRTOSTR_NUM 1 /* Convert number to string. */
252#define IRTOSTR_CHAR 2 /* Convert char value to string. */
253
241/* -- IR operands --------------------------------------------------------- */ 254/* -- IR operands --------------------------------------------------------- */
242 255
243/* IR operand mode (2 bit). */ 256/* IR operand mode (2 bit). */
diff --git a/src/lj_ircall.h b/src/lj_ircall.h
index 7fcc532e..9e1fb367 100644
--- a/src/lj_ircall.h
+++ b/src/lj_ircall.h
@@ -16,7 +16,7 @@ typedef struct CCallInfo {
16 uint32_t flags; /* Number of arguments and flags. */ 16 uint32_t flags; /* Number of arguments and flags. */
17} CCallInfo; 17} CCallInfo;
18 18
19#define CCI_NARGS(ci) ((ci)->flags & 0xff) /* Extract # of args. */ 19#define CCI_NARGS(ci) ((ci)->flags & 0xff) /* # of args. */
20#define CCI_NARGS_MAX 32 /* Max. # of args. */ 20#define CCI_NARGS_MAX 32 /* Max. # of args. */
21 21
22#define CCI_OTSHIFT 16 22#define CCI_OTSHIFT 16
@@ -45,6 +45,17 @@ typedef struct CCallInfo {
45#define CCI_CC_FASTCALL 0x2000 /* Fastcall calling convention. */ 45#define CCI_CC_FASTCALL 0x2000 /* Fastcall calling convention. */
46#define CCI_CC_STDCALL 0x3000 /* Stdcall calling convention. */ 46#define CCI_CC_STDCALL 0x3000 /* Stdcall calling convention. */
47 47
48/* Extra args for SOFTFP, SPLIT 64 bit. */
49#define CCI_XARGS_SHIFT 14
50#define CCI_XARGS(ci) (((ci)->flags >> CCI_XARGS_SHIFT) & 3)
51#define CCI_XA (1u << CCI_XARGS_SHIFT)
52
53#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
54#define CCI_XNARGS(ci) (CCI_NARGS((ci)) + CCI_XARGS((ci)))
55#else
56#define CCI_XNARGS(ci) CCI_NARGS((ci))
57#endif
58
48/* Helpers for conditional function definitions. */ 59/* Helpers for conditional function definitions. */
49#define IRCALLCOND_ANY(x) x 60#define IRCALLCOND_ANY(x) x
50 61
@@ -87,24 +98,48 @@ typedef struct CCallInfo {
87#endif 98#endif
88 99
89#if LJ_SOFTFP 100#if LJ_SOFTFP
90#define ARG1_FP 2 /* Treat as 2 32 bit arguments. */ 101#define XA_FP CCI_XA
102#define XA2_FP (CCI_XA+CCI_XA)
91#else 103#else
92#define ARG1_FP 1 104#define XA_FP 0
105#define XA2_FP 0
93#endif 106#endif
94 107
95#if LJ_32 108#if LJ_32
96#define ARG2_64 4 /* Treat as 4 32 bit arguments. */ 109#define XA_64 CCI_XA
110#define XA2_64 (CCI_XA+CCI_XA)
97#else 111#else
98#define ARG2_64 2 112#define XA_64 0
113#define XA2_64 0
99#endif 114#endif
100 115
101/* Function definitions for CALL* instructions. */ 116/* Function definitions for CALL* instructions. */
102#define IRCALLDEF(_) \ 117#define IRCALLDEF(_) \
103 _(ANY, lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \ 118 _(ANY, lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \
119 _(ANY, lj_str_find, 4, N, P32, 0) \
104 _(ANY, lj_str_new, 3, S, STR, CCI_L) \ 120 _(ANY, lj_str_new, 3, S, STR, CCI_L) \
105 _(ANY, lj_strscan_num, 2, FN, INT, 0) \ 121 _(ANY, lj_strscan_num, 2, FN, INT, 0) \
106 _(ANY, lj_str_fromint, 2, FN, STR, CCI_L) \ 122 _(ANY, lj_strfmt_int, 2, FN, STR, CCI_L) \
107 _(ANY, lj_str_fromnum, 2, FN, STR, CCI_L) \ 123 _(ANY, lj_strfmt_num, 2, FN, STR, CCI_L) \
124 _(ANY, lj_strfmt_char, 2, FN, STR, CCI_L) \
125 _(ANY, lj_strfmt_putint, 2, FL, P32, 0) \
126 _(ANY, lj_strfmt_putnum, 2, FL, P32, 0) \
127 _(ANY, lj_strfmt_putquoted, 2, FL, P32, 0) \
128 _(ANY, lj_strfmt_putfxint, 3, L, P32, XA_64) \
129 _(ANY, lj_strfmt_putfnum_int, 3, L, P32, XA_FP) \
130 _(ANY, lj_strfmt_putfnum_uint, 3, L, P32, XA_FP) \
131 _(ANY, lj_strfmt_putfnum, 3, L, P32, XA_FP) \
132 _(ANY, lj_strfmt_putfstr, 3, L, P32, 0) \
133 _(ANY, lj_strfmt_putfchar, 3, L, P32, 0) \
134 _(ANY, lj_buf_putmem, 3, S, P32, 0) \
135 _(ANY, lj_buf_putstr, 2, FL, P32, 0) \
136 _(ANY, lj_buf_putchar, 2, FL, P32, 0) \
137 _(ANY, lj_buf_putstr_reverse, 2, FL, P32, 0) \
138 _(ANY, lj_buf_putstr_lower, 2, FL, P32, 0) \
139 _(ANY, lj_buf_putstr_upper, 2, FL, P32, 0) \
140 _(ANY, lj_buf_putstr_rep, 3, L, P32, 0) \
141 _(ANY, lj_buf_puttab, 5, L, P32, 0) \
142 _(ANY, lj_buf_tostr, 1, FL, STR, 0) \
108 _(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \ 143 _(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \
109 _(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \ 144 _(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \
110 _(ANY, lj_tab_newkey, 3, S, P32, CCI_L) \ 145 _(ANY, lj_tab_newkey, 3, S, P32, CCI_L) \
@@ -114,29 +149,29 @@ typedef struct CCallInfo {
114 _(ANY, lj_mem_newgco, 2, FS, P32, CCI_L) \ 149 _(ANY, lj_mem_newgco, 2, FS, P32, CCI_L) \
115 _(ANY, lj_math_random_step, 1, FS, NUM, CCI_CASTU64|CCI_NOFPRCLOBBER) \ 150 _(ANY, lj_math_random_step, 1, FS, NUM, CCI_CASTU64|CCI_NOFPRCLOBBER) \
116 _(ANY, lj_vm_modi, 2, FN, INT, 0) \ 151 _(ANY, lj_vm_modi, 2, FN, INT, 0) \
117 _(ANY, sinh, ARG1_FP, N, NUM, 0) \ 152 _(ANY, sinh, 1, N, NUM, XA_FP) \
118 _(ANY, cosh, ARG1_FP, N, NUM, 0) \ 153 _(ANY, cosh, 1, N, NUM, XA_FP) \
119 _(ANY, tanh, ARG1_FP, N, NUM, 0) \ 154 _(ANY, tanh, 1, N, NUM, XA_FP) \
120 _(ANY, fputc, 2, S, INT, 0) \ 155 _(ANY, fputc, 2, S, INT, 0) \
121 _(ANY, fwrite, 4, S, INT, 0) \ 156 _(ANY, fwrite, 4, S, INT, 0) \
122 _(ANY, fflush, 1, S, INT, 0) \ 157 _(ANY, fflush, 1, S, INT, 0) \
123 /* ORDER FPM */ \ 158 /* ORDER FPM */ \
124 _(FPMATH, lj_vm_floor, ARG1_FP, N, NUM, 0) \ 159 _(FPMATH, lj_vm_floor, 1, N, NUM, XA_FP) \
125 _(FPMATH, lj_vm_ceil, ARG1_FP, N, NUM, 0) \ 160 _(FPMATH, lj_vm_ceil, 1, N, NUM, XA_FP) \
126 _(FPMATH, lj_vm_trunc, ARG1_FP, N, NUM, 0) \ 161 _(FPMATH, lj_vm_trunc, 1, N, NUM, XA_FP) \
127 _(FPMATH, sqrt, ARG1_FP, N, NUM, 0) \ 162 _(FPMATH, sqrt, 1, N, NUM, XA_FP) \
128 _(FPMATH, exp, ARG1_FP, N, NUM, 0) \ 163 _(FPMATH, exp, 1, N, NUM, XA_FP) \
129 _(FPMATH, lj_vm_exp2, ARG1_FP, N, NUM, 0) \ 164 _(FPMATH, lj_vm_exp2, 1, N, NUM, XA_FP) \
130 _(FPMATH, log, ARG1_FP, N, NUM, 0) \ 165 _(FPMATH, log, 1, N, NUM, XA_FP) \
131 _(FPMATH, lj_vm_log2, ARG1_FP, N, NUM, 0) \ 166 _(FPMATH, lj_vm_log2, 1, N, NUM, XA_FP) \
132 _(FPMATH, log10, ARG1_FP, N, NUM, 0) \ 167 _(FPMATH, log10, 1, N, NUM, XA_FP) \
133 _(FPMATH, sin, ARG1_FP, N, NUM, 0) \ 168 _(FPMATH, sin, 1, N, NUM, XA_FP) \
134 _(FPMATH, cos, ARG1_FP, N, NUM, 0) \ 169 _(FPMATH, cos, 1, N, NUM, XA_FP) \
135 _(FPMATH, tan, ARG1_FP, N, NUM, 0) \ 170 _(FPMATH, tan, 1, N, NUM, XA_FP) \
136 _(FPMATH, lj_vm_powi, ARG1_FP+1, N, NUM, 0) \ 171 _(FPMATH, lj_vm_powi, 2, N, NUM, XA_FP) \
137 _(FPMATH, pow, ARG1_FP*2, N, NUM, 0) \ 172 _(FPMATH, pow, 2, N, NUM, XA2_FP) \
138 _(FPMATH, atan2, ARG1_FP*2, N, NUM, 0) \ 173 _(FPMATH, atan2, 2, N, NUM, XA2_FP) \
139 _(FPMATH, ldexp, ARG1_FP+1, N, NUM, 0) \ 174 _(FPMATH, ldexp, 2, N, NUM, XA_FP) \
140 _(SOFTFP, lj_vm_tobit, 2, N, INT, 0) \ 175 _(SOFTFP, lj_vm_tobit, 2, N, INT, 0) \
141 _(SOFTFP, softfp_add, 4, N, NUM, 0) \ 176 _(SOFTFP, softfp_add, 4, N, NUM, 0) \
142 _(SOFTFP, softfp_sub, 4, N, NUM, 0) \ 177 _(SOFTFP, softfp_sub, 4, N, NUM, 0) \
@@ -153,26 +188,32 @@ typedef struct CCallInfo {
153 _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \ 188 _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \
154 _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \ 189 _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \
155 _(SOFTFP_FFI, softfp_f2ui, 1, N, INT, 0) \ 190 _(SOFTFP_FFI, softfp_f2ui, 1, N, INT, 0) \
156 _(FP64_FFI, fp64_l2d, 2, N, NUM, 0) \ 191 _(FP64_FFI, fp64_l2d, 1, N, NUM, XA_64) \
157 _(FP64_FFI, fp64_ul2d, 2, N, NUM, 0) \ 192 _(FP64_FFI, fp64_ul2d, 1, N, NUM, XA_64) \
158 _(FP64_FFI, fp64_l2f, 2, N, FLOAT, 0) \ 193 _(FP64_FFI, fp64_l2f, 1, N, FLOAT, XA_64) \
159 _(FP64_FFI, fp64_ul2f, 2, N, FLOAT, 0) \ 194 _(FP64_FFI, fp64_ul2f, 1, N, FLOAT, XA_64) \
160 _(FP64_FFI, fp64_d2l, ARG1_FP, N, I64, 0) \ 195 _(FP64_FFI, fp64_d2l, 1, N, I64, XA_FP) \
161 _(FP64_FFI, fp64_d2ul, ARG1_FP, N, U64, 0) \ 196 _(FP64_FFI, fp64_d2ul, 1, N, U64, XA_FP) \
162 _(FP64_FFI, fp64_f2l, 1, N, I64, 0) \ 197 _(FP64_FFI, fp64_f2l, 1, N, I64, 0) \
163 _(FP64_FFI, fp64_f2ul, 1, N, U64, 0) \ 198 _(FP64_FFI, fp64_f2ul, 1, N, U64, 0) \
164 _(FFI, lj_carith_divi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \ 199 _(FFI, lj_carith_divi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
165 _(FFI, lj_carith_divu64, ARG2_64, N, U64, CCI_NOFPRCLOBBER) \ 200 _(FFI, lj_carith_divu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \
166 _(FFI, lj_carith_modi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \ 201 _(FFI, lj_carith_modi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
167 _(FFI, lj_carith_modu64, ARG2_64, N, U64, CCI_NOFPRCLOBBER) \ 202 _(FFI, lj_carith_modu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \
168 _(FFI, lj_carith_powi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \ 203 _(FFI, lj_carith_powi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
169 _(FFI, lj_carith_powu64, ARG2_64, N, U64, CCI_NOFPRCLOBBER) \ 204 _(FFI, lj_carith_powu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \
170 _(FFI, lj_cdata_setfin, 2, FN, P32, CCI_L) \ 205 _(FFI, lj_cdata_newv, 4, S, CDATA, CCI_L) \
171 _(FFI, strlen, 1, L, INTP, 0) \ 206 _(FFI, lj_cdata_setfin, 4, S, NIL, CCI_L) \
172 _(FFI, memcpy, 3, S, PTR, 0) \ 207 _(FFI, strlen, 1, L, INTP, 0) \
173 _(FFI, memset, 3, S, PTR, 0) \ 208 _(FFI, memcpy, 3, S, PTR, 0) \
174 _(FFI, lj_vm_errno, 0, S, INT, CCI_NOFPRCLOBBER) \ 209 _(FFI, memset, 3, S, PTR, 0) \
175 _(FFI32, lj_carith_mul64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) 210 _(FFI, lj_vm_errno, 0, S, INT, CCI_NOFPRCLOBBER) \
211 _(FFI32, lj_carith_mul64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
212 _(FFI32, lj_carith_shl64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
213 _(FFI32, lj_carith_shr64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
214 _(FFI32, lj_carith_sar64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
215 _(FFI32, lj_carith_rol64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
216 _(FFI32, lj_carith_ror64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
176 \ 217 \
177 /* End of list. */ 218 /* End of list. */
178 219
diff --git a/src/lj_jit.h b/src/lj_jit.h
index c0b1c41e..2683b462 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -14,18 +14,15 @@
14 14
15/* CPU-specific JIT engine flags. */ 15/* CPU-specific JIT engine flags. */
16#if LJ_TARGET_X86ORX64 16#if LJ_TARGET_X86ORX64
17#define JIT_F_CMOV 0x00000010 17#define JIT_F_SSE2 0x00000010
18#define JIT_F_SSE2 0x00000020 18#define JIT_F_SSE3 0x00000020
19#define JIT_F_SSE3 0x00000040 19#define JIT_F_SSE4_1 0x00000040
20#define JIT_F_SSE4_1 0x00000080 20#define JIT_F_PREFER_IMUL 0x00000080
21#define JIT_F_P4 0x00000100 21#define JIT_F_LEA_AGU 0x00000100
22#define JIT_F_PREFER_IMUL 0x00000200
23#define JIT_F_SPLIT_XMM 0x00000400
24#define JIT_F_LEA_AGU 0x00000800
25 22
26/* Names for the CPU-specific flags. Must match the order above. */ 23/* Names for the CPU-specific flags. Must match the order above. */
27#define JIT_F_CPU_FIRST JIT_F_CMOV 24#define JIT_F_CPU_FIRST JIT_F_SSE2
28#define JIT_F_CPUSTRING "\4CMOV\4SSE2\4SSE3\6SSE4.1\2P4\3AMD\2K8\4ATOM" 25#define JIT_F_CPUSTRING "\4SSE2\4SSE3\6SSE4.1\3AMD\4ATOM"
29#elif LJ_TARGET_ARM 26#elif LJ_TARGET_ARM
30#define JIT_F_ARMV6_ 0x00000010 27#define JIT_F_ARMV6_ 0x00000010
31#define JIT_F_ARMV6T2_ 0x00000020 28#define JIT_F_ARMV6T2_ 0x00000020
diff --git a/src/lj_lex.c b/src/lj_lex.c
index 9f2b06f8..7c2c6677 100644
--- a/src/lj_lex.c
+++ b/src/lj_lex.c
@@ -12,6 +12,7 @@
12#include "lj_obj.h" 12#include "lj_obj.h"
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_buf.h"
15#include "lj_str.h" 16#include "lj_str.h"
16#if LJ_HASFFI 17#if LJ_HASFFI
17#include "lj_tab.h" 18#include "lj_tab.h"
@@ -24,6 +25,7 @@
24#include "lj_parse.h" 25#include "lj_parse.h"
25#include "lj_char.h" 26#include "lj_char.h"
26#include "lj_strscan.h" 27#include "lj_strscan.h"
28#include "lj_strfmt.h"
27 29
28/* Lua lexer token names. */ 30/* Lua lexer token names. */
29static const char *const tokennames[] = { 31static const char *const tokennames[] = {
@@ -37,50 +39,48 @@ TKDEF(TKSTR1, TKSTR2)
37 39
38/* -- Buffer handling ----------------------------------------------------- */ 40/* -- Buffer handling ----------------------------------------------------- */
39 41
40#define char2int(c) ((int)(uint8_t)(c)) 42#define LEX_EOF (-1)
41#define next(ls) \ 43#define lex_iseol(ls) (ls->c == '\n' || ls->c == '\r')
42 (ls->current = (ls->n--) > 0 ? char2int(*ls->p++) : fillbuf(ls))
43#define save_and_next(ls) (save(ls, ls->current), next(ls))
44#define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r')
45#define END_OF_STREAM (-1)
46 44
47static int fillbuf(LexState *ls) 45/* Get more input from reader. */
46static LJ_NOINLINE LexChar lex_more(LexState *ls)
48{ 47{
49 size_t sz; 48 size_t sz;
50 const char *buf = ls->rfunc(ls->L, ls->rdata, &sz); 49 const char *p = ls->rfunc(ls->L, ls->rdata, &sz);
51 if (buf == NULL || sz == 0) return END_OF_STREAM; 50 if (p == NULL || sz == 0) return LEX_EOF;
52 ls->n = (MSize)sz - 1; 51 ls->pe = p + sz;
53 ls->p = buf; 52 ls->p = p + 1;
54 return char2int(*(ls->p++)); 53 return (LexChar)(uint8_t)p[0];
55} 54}
56 55
57static LJ_NOINLINE void save_grow(LexState *ls, int c) 56/* Get next character. */
57static LJ_AINLINE LexChar lex_next(LexState *ls)
58{ 58{
59 MSize newsize; 59 return (ls->c = ls->p < ls->pe ? (LexChar)(uint8_t)*ls->p++ : lex_more(ls));
60 if (ls->sb.sz >= LJ_MAX_STR/2)
61 lj_lex_error(ls, 0, LJ_ERR_XELEM);
62 newsize = ls->sb.sz * 2;
63 lj_str_resizebuf(ls->L, &ls->sb, newsize);
64 ls->sb.buf[ls->sb.n++] = (char)c;
65} 60}
66 61
67static LJ_AINLINE void save(LexState *ls, int c) 62/* Save character. */
63static LJ_AINLINE void lex_save(LexState *ls, LexChar c)
68{ 64{
69 if (LJ_UNLIKELY(ls->sb.n + 1 > ls->sb.sz)) 65 lj_buf_putb(&ls->sb, c);
70 save_grow(ls, c); 66}
71 else 67
72 ls->sb.buf[ls->sb.n++] = (char)c; 68/* Save previous character and get next character. */
69static LJ_AINLINE LexChar lex_savenext(LexState *ls)
70{
71 lex_save(ls, ls->c);
72 return lex_next(ls);
73} 73}
74 74
75static void inclinenumber(LexState *ls) 75/* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */
76static void lex_newline(LexState *ls)
76{ 77{
77 int old = ls->current; 78 LexChar old = ls->c;
78 lua_assert(currIsNewline(ls)); 79 lua_assert(lex_iseol(ls));
79 next(ls); /* skip `\n' or `\r' */ 80 lex_next(ls); /* Skip "\n" or "\r". */
80 if (currIsNewline(ls) && ls->current != old) 81 if (lex_iseol(ls) && ls->c != old) lex_next(ls); /* Skip "\n\r" or "\r\n". */
81 next(ls); /* skip `\n\r' or `\r\n' */
82 if (++ls->linenumber >= LJ_MAX_LINE) 82 if (++ls->linenumber >= LJ_MAX_LINE)
83 lj_lex_error(ls, ls->token, LJ_ERR_XLINES); 83 lj_lex_error(ls, ls->tok, LJ_ERR_XLINES);
84} 84}
85 85
86/* -- Scanner for terminals ----------------------------------------------- */ 86/* -- Scanner for terminals ----------------------------------------------- */
@@ -89,19 +89,17 @@ static void inclinenumber(LexState *ls)
89static void lex_number(LexState *ls, TValue *tv) 89static void lex_number(LexState *ls, TValue *tv)
90{ 90{
91 StrScanFmt fmt; 91 StrScanFmt fmt;
92 int c, xp = 'e'; 92 LexChar c, xp = 'e';
93 lua_assert(lj_char_isdigit(ls->current)); 93 lua_assert(lj_char_isdigit(ls->c));
94 if ((c = ls->current) == '0') { 94 if ((c = ls->c) == '0' && (lex_savenext(ls) | 0x20) == 'x')
95 save_and_next(ls); 95 xp = 'p';
96 if ((ls->current | 0x20) == 'x') xp = 'p'; 96 while (lj_char_isident(ls->c) || ls->c == '.' ||
97 } 97 ((ls->c == '-' || ls->c == '+') && (c | 0x20) == xp)) {
98 while (lj_char_isident(ls->current) || ls->current == '.' || 98 c = ls->c;
99 ((ls->current == '-' || ls->current == '+') && (c | 0x20) == xp)) { 99 lex_savenext(ls);
100 c = ls->current;
101 save_and_next(ls);
102 } 100 }
103 save(ls, '\0'); 101 lex_save(ls, '\0');
104 fmt = lj_strscan_scan((const uint8_t *)ls->sb.buf, tv, 102 fmt = lj_strscan_scan((const uint8_t *)sbufB(&ls->sb), tv,
105 (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) | 103 (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) |
106 (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0)); 104 (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0));
107 if (LJ_DUALNUM && fmt == STRSCAN_INT) { 105 if (LJ_DUALNUM && fmt == STRSCAN_INT) {
@@ -134,60 +132,60 @@ static void lex_number(LexState *ls, TValue *tv)
134 } 132 }
135} 133}
136 134
137static int skip_sep(LexState *ls) 135/* Skip equal signs for "[=...=[" and "]=...=]" and return their count. */
136static int lex_skipeq(LexState *ls)
138{ 137{
139 int count = 0; 138 int count = 0;
140 int s = ls->current; 139 LexChar s = ls->c;
141 lua_assert(s == '[' || s == ']'); 140 lua_assert(s == '[' || s == ']');
142 save_and_next(ls); 141 while (lex_savenext(ls) == '=')
143 while (ls->current == '=') {
144 save_and_next(ls);
145 count++; 142 count++;
146 } 143 return (ls->c == s) ? count : (-count) - 1;
147 return (ls->current == s) ? count : (-count) - 1;
148} 144}
149 145
150static void read_long_string(LexState *ls, TValue *tv, int sep) 146/* Parse a long string or long comment (tv set to NULL). */
147static void lex_longstring(LexState *ls, TValue *tv, int sep)
151{ 148{
152 save_and_next(ls); /* skip 2nd `[' */ 149 lex_savenext(ls); /* Skip second '['. */
153 if (currIsNewline(ls)) /* string starts with a newline? */ 150 if (lex_iseol(ls)) /* Skip initial newline. */
154 inclinenumber(ls); /* skip it */ 151 lex_newline(ls);
155 for (;;) { 152 for (;;) {
156 switch (ls->current) { 153 switch (ls->c) {
157 case END_OF_STREAM: 154 case LEX_EOF:
158 lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM); 155 lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM);
159 break; 156 break;
160 case ']': 157 case ']':
161 if (skip_sep(ls) == sep) { 158 if (lex_skipeq(ls) == sep) {
162 save_and_next(ls); /* skip 2nd `]' */ 159 lex_savenext(ls); /* Skip second ']'. */
163 goto endloop; 160 goto endloop;
164 } 161 }
165 break; 162 break;
166 case '\n': 163 case '\n':
167 case '\r': 164 case '\r':
168 save(ls, '\n'); 165 lex_save(ls, '\n');
169 inclinenumber(ls); 166 lex_newline(ls);
170 if (!tv) lj_str_resetbuf(&ls->sb); /* avoid wasting space */ 167 if (!tv) lj_buf_reset(&ls->sb); /* Don't waste space for comments. */
171 break; 168 break;
172 default: 169 default:
173 if (tv) save_and_next(ls); 170 lex_savenext(ls);
174 else next(ls);
175 break; 171 break;
176 } 172 }
177 } endloop: 173 } endloop:
178 if (tv) { 174 if (tv) {
179 GCstr *str = lj_parse_keepstr(ls, ls->sb.buf + (2 + (MSize)sep), 175 GCstr *str = lj_parse_keepstr(ls, sbufB(&ls->sb) + (2 + (MSize)sep),
180 ls->sb.n - 2*(2 + (MSize)sep)); 176 sbuflen(&ls->sb) - 2*(2 + (MSize)sep));
181 setstrV(ls->L, tv, str); 177 setstrV(ls->L, tv, str);
182 } 178 }
183} 179}
184 180
185static void read_string(LexState *ls, int delim, TValue *tv) 181/* Parse a string. */
182static void lex_string(LexState *ls, TValue *tv)
186{ 183{
187 save_and_next(ls); 184 LexChar delim = ls->c; /* Delimiter is '\'' or '"'. */
188 while (ls->current != delim) { 185 lex_savenext(ls);
189 switch (ls->current) { 186 while (ls->c != delim) {
190 case END_OF_STREAM: 187 switch (ls->c) {
188 case LEX_EOF:
191 lj_lex_error(ls, TK_eof, LJ_ERR_XSTR); 189 lj_lex_error(ls, TK_eof, LJ_ERR_XSTR);
192 continue; 190 continue;
193 case '\n': 191 case '\n':
@@ -195,7 +193,7 @@ static void read_string(LexState *ls, int delim, TValue *tv)
195 lj_lex_error(ls, TK_string, LJ_ERR_XSTR); 193 lj_lex_error(ls, TK_string, LJ_ERR_XSTR);
196 continue; 194 continue;
197 case '\\': { 195 case '\\': {
198 int c = next(ls); /* Skip the '\\'. */ 196 LexChar c = lex_next(ls); /* Skip the '\\'. */
199 switch (c) { 197 switch (c) {
200 case 'a': c = '\a'; break; 198 case 'a': c = '\a'; break;
201 case 'b': c = '\b'; break; 199 case 'b': c = '\b'; break;
@@ -205,111 +203,112 @@ static void read_string(LexState *ls, int delim, TValue *tv)
205 case 't': c = '\t'; break; 203 case 't': c = '\t'; break;
206 case 'v': c = '\v'; break; 204 case 'v': c = '\v'; break;
207 case 'x': /* Hexadecimal escape '\xXX'. */ 205 case 'x': /* Hexadecimal escape '\xXX'. */
208 c = (next(ls) & 15u) << 4; 206 c = (lex_next(ls) & 15u) << 4;
209 if (!lj_char_isdigit(ls->current)) { 207 if (!lj_char_isdigit(ls->c)) {
210 if (!lj_char_isxdigit(ls->current)) goto err_xesc; 208 if (!lj_char_isxdigit(ls->c)) goto err_xesc;
211 c += 9 << 4; 209 c += 9 << 4;
212 } 210 }
213 c += (next(ls) & 15u); 211 c += (lex_next(ls) & 15u);
214 if (!lj_char_isdigit(ls->current)) { 212 if (!lj_char_isdigit(ls->c)) {
215 if (!lj_char_isxdigit(ls->current)) goto err_xesc; 213 if (!lj_char_isxdigit(ls->c)) goto err_xesc;
216 c += 9; 214 c += 9;
217 } 215 }
218 break; 216 break;
219 case 'z': /* Skip whitespace. */ 217 case 'z': /* Skip whitespace. */
220 next(ls); 218 lex_next(ls);
221 while (lj_char_isspace(ls->current)) 219 while (lj_char_isspace(ls->c))
222 if (currIsNewline(ls)) inclinenumber(ls); else next(ls); 220 if (lex_iseol(ls)) lex_newline(ls); else lex_next(ls);
223 continue; 221 continue;
224 case '\n': case '\r': save(ls, '\n'); inclinenumber(ls); continue; 222 case '\n': case '\r': lex_save(ls, '\n'); lex_newline(ls); continue;
225 case '\\': case '\"': case '\'': break; 223 case '\\': case '\"': case '\'': break;
226 case END_OF_STREAM: continue; 224 case LEX_EOF: continue;
227 default: 225 default:
228 if (!lj_char_isdigit(c)) 226 if (!lj_char_isdigit(c))
229 goto err_xesc; 227 goto err_xesc;
230 c -= '0'; /* Decimal escape '\ddd'. */ 228 c -= '0'; /* Decimal escape '\ddd'. */
231 if (lj_char_isdigit(next(ls))) { 229 if (lj_char_isdigit(lex_next(ls))) {
232 c = c*10 + (ls->current - '0'); 230 c = c*10 + (ls->c - '0');
233 if (lj_char_isdigit(next(ls))) { 231 if (lj_char_isdigit(lex_next(ls))) {
234 c = c*10 + (ls->current - '0'); 232 c = c*10 + (ls->c - '0');
235 if (c > 255) { 233 if (c > 255) {
236 err_xesc: 234 err_xesc:
237 lj_lex_error(ls, TK_string, LJ_ERR_XESC); 235 lj_lex_error(ls, TK_string, LJ_ERR_XESC);
238 } 236 }
239 next(ls); 237 lex_next(ls);
240 } 238 }
241 } 239 }
242 save(ls, c); 240 lex_save(ls, c);
243 continue; 241 continue;
244 } 242 }
245 save(ls, c); 243 lex_save(ls, c);
246 next(ls); 244 lex_next(ls);
247 continue; 245 continue;
248 } 246 }
249 default: 247 default:
250 save_and_next(ls); 248 lex_savenext(ls);
251 break; 249 break;
252 } 250 }
253 } 251 }
254 save_and_next(ls); /* skip delimiter */ 252 lex_savenext(ls); /* Skip trailing delimiter. */
255 setstrV(ls->L, tv, lj_parse_keepstr(ls, ls->sb.buf + 1, ls->sb.n - 2)); 253 setstrV(ls->L, tv,
254 lj_parse_keepstr(ls, sbufB(&ls->sb)+1, sbuflen(&ls->sb)-2));
256} 255}
257 256
258/* -- Main lexical scanner ------------------------------------------------ */ 257/* -- Main lexical scanner ------------------------------------------------ */
259 258
260static int llex(LexState *ls, TValue *tv) 259/* Get next lexical token. */
260static LexToken lex_scan(LexState *ls, TValue *tv)
261{ 261{
262 lj_str_resetbuf(&ls->sb); 262 lj_buf_reset(&ls->sb);
263 for (;;) { 263 for (;;) {
264 if (lj_char_isident(ls->current)) { 264 if (lj_char_isident(ls->c)) {
265 GCstr *s; 265 GCstr *s;
266 if (lj_char_isdigit(ls->current)) { /* Numeric literal. */ 266 if (lj_char_isdigit(ls->c)) { /* Numeric literal. */
267 lex_number(ls, tv); 267 lex_number(ls, tv);
268 return TK_number; 268 return TK_number;
269 } 269 }
270 /* Identifier or reserved word. */ 270 /* Identifier or reserved word. */
271 do { 271 do {
272 save_and_next(ls); 272 lex_savenext(ls);
273 } while (lj_char_isident(ls->current)); 273 } while (lj_char_isident(ls->c));
274 s = lj_parse_keepstr(ls, ls->sb.buf, ls->sb.n); 274 s = lj_parse_keepstr(ls, sbufB(&ls->sb), sbuflen(&ls->sb));
275 setstrV(ls->L, tv, s); 275 setstrV(ls->L, tv, s);
276 if (s->reserved > 0) /* Reserved word? */ 276 if (s->reserved > 0) /* Reserved word? */
277 return TK_OFS + s->reserved; 277 return TK_OFS + s->reserved;
278 return TK_name; 278 return TK_name;
279 } 279 }
280 switch (ls->current) { 280 switch (ls->c) {
281 case '\n': 281 case '\n':
282 case '\r': 282 case '\r':
283 inclinenumber(ls); 283 lex_newline(ls);
284 continue; 284 continue;
285 case ' ': 285 case ' ':
286 case '\t': 286 case '\t':
287 case '\v': 287 case '\v':
288 case '\f': 288 case '\f':
289 next(ls); 289 lex_next(ls);
290 continue; 290 continue;
291 case '-': 291 case '-':
292 next(ls); 292 lex_next(ls);
293 if (ls->current != '-') return '-'; 293 if (ls->c != '-') return '-';
294 /* else is a comment */ 294 lex_next(ls);
295 next(ls); 295 if (ls->c == '[') { /* Long comment "--[=*[...]=*]". */
296 if (ls->current == '[') { 296 int sep = lex_skipeq(ls);
297 int sep = skip_sep(ls); 297 lj_buf_reset(&ls->sb); /* `lex_skipeq' may dirty the buffer */
298 lj_str_resetbuf(&ls->sb); /* `skip_sep' may dirty the buffer */
299 if (sep >= 0) { 298 if (sep >= 0) {
300 read_long_string(ls, NULL, sep); /* long comment */ 299 lex_longstring(ls, NULL, sep);
301 lj_str_resetbuf(&ls->sb); 300 lj_buf_reset(&ls->sb);
302 continue; 301 continue;
303 } 302 }
304 } 303 }
305 /* else short comment */ 304 /* Short comment "--.*\n". */
306 while (!currIsNewline(ls) && ls->current != END_OF_STREAM) 305 while (!lex_iseol(ls) && ls->c != LEX_EOF)
307 next(ls); 306 lex_next(ls);
308 continue; 307 continue;
309 case '[': { 308 case '[': {
310 int sep = skip_sep(ls); 309 int sep = lex_skipeq(ls);
311 if (sep >= 0) { 310 if (sep >= 0) {
312 read_long_string(ls, tv, sep); 311 lex_longstring(ls, tv, sep);
313 return TK_string; 312 return TK_string;
314 } else if (sep == -1) { 313 } else if (sep == -1) {
315 return '['; 314 return '[';
@@ -319,44 +318,43 @@ static int llex(LexState *ls, TValue *tv)
319 } 318 }
320 } 319 }
321 case '=': 320 case '=':
322 next(ls); 321 lex_next(ls);
323 if (ls->current != '=') return '='; else { next(ls); return TK_eq; } 322 if (ls->c != '=') return '='; else { lex_next(ls); return TK_eq; }
324 case '<': 323 case '<':
325 next(ls); 324 lex_next(ls);
326 if (ls->current != '=') return '<'; else { next(ls); return TK_le; } 325 if (ls->c != '=') return '<'; else { lex_next(ls); return TK_le; }
327 case '>': 326 case '>':
328 next(ls); 327 lex_next(ls);
329 if (ls->current != '=') return '>'; else { next(ls); return TK_ge; } 328 if (ls->c != '=') return '>'; else { lex_next(ls); return TK_ge; }
330 case '~': 329 case '~':
331 next(ls); 330 lex_next(ls);
332 if (ls->current != '=') return '~'; else { next(ls); return TK_ne; } 331 if (ls->c != '=') return '~'; else { lex_next(ls); return TK_ne; }
333 case ':': 332 case ':':
334 next(ls); 333 lex_next(ls);
335 if (ls->current != ':') return ':'; else { next(ls); return TK_label; } 334 if (ls->c != ':') return ':'; else { lex_next(ls); return TK_label; }
336 case '"': 335 case '"':
337 case '\'': 336 case '\'':
338 read_string(ls, ls->current, tv); 337 lex_string(ls, tv);
339 return TK_string; 338 return TK_string;
340 case '.': 339 case '.':
341 save_and_next(ls); 340 if (lex_savenext(ls) == '.') {
342 if (ls->current == '.') { 341 lex_next(ls);
343 next(ls); 342 if (ls->c == '.') {
344 if (ls->current == '.') { 343 lex_next(ls);
345 next(ls);
346 return TK_dots; /* ... */ 344 return TK_dots; /* ... */
347 } 345 }
348 return TK_concat; /* .. */ 346 return TK_concat; /* .. */
349 } else if (!lj_char_isdigit(ls->current)) { 347 } else if (!lj_char_isdigit(ls->c)) {
350 return '.'; 348 return '.';
351 } else { 349 } else {
352 lex_number(ls, tv); 350 lex_number(ls, tv);
353 return TK_number; 351 return TK_number;
354 } 352 }
355 case END_OF_STREAM: 353 case LEX_EOF:
356 return TK_eof; 354 return TK_eof;
357 default: { 355 default: {
358 int c = ls->current; 356 LexChar c = ls->c;
359 next(ls); 357 lex_next(ls);
360 return c; /* Single-char tokens (+ - / ...). */ 358 return c; /* Single-char tokens (+ - / ...). */
361 } 359 }
362 } 360 }
@@ -371,8 +369,7 @@ int lj_lex_setup(lua_State *L, LexState *ls)
371 int header = 0; 369 int header = 0;
372 ls->L = L; 370 ls->L = L;
373 ls->fs = NULL; 371 ls->fs = NULL;
374 ls->n = 0; 372 ls->pe = ls->p = NULL;
375 ls->p = NULL;
376 ls->vstack = NULL; 373 ls->vstack = NULL;
377 ls->sizevstack = 0; 374 ls->sizevstack = 0;
378 ls->vtop = 0; 375 ls->vtop = 0;
@@ -381,24 +378,22 @@ int lj_lex_setup(lua_State *L, LexState *ls)
381 ls->lookahead = TK_eof; /* No look-ahead token. */ 378 ls->lookahead = TK_eof; /* No look-ahead token. */
382 ls->linenumber = 1; 379 ls->linenumber = 1;
383 ls->lastline = 1; 380 ls->lastline = 1;
384 lj_str_resizebuf(ls->L, &ls->sb, LJ_MIN_SBUF); 381 lex_next(ls); /* Read-ahead first char. */
385 next(ls); /* Read-ahead first char. */ 382 if (ls->c == 0xef && ls->p + 2 <= ls->pe && (uint8_t)ls->p[0] == 0xbb &&
386 if (ls->current == 0xef && ls->n >= 2 && char2int(ls->p[0]) == 0xbb && 383 (uint8_t)ls->p[1] == 0xbf) { /* Skip UTF-8 BOM (if buffered). */
387 char2int(ls->p[1]) == 0xbf) { /* Skip UTF-8 BOM (if buffered). */
388 ls->n -= 2;
389 ls->p += 2; 384 ls->p += 2;
390 next(ls); 385 lex_next(ls);
391 header = 1; 386 header = 1;
392 } 387 }
393 if (ls->current == '#') { /* Skip POSIX #! header line. */ 388 if (ls->c == '#') { /* Skip POSIX #! header line. */
394 do { 389 do {
395 next(ls); 390 lex_next(ls);
396 if (ls->current == END_OF_STREAM) return 0; 391 if (ls->c == LEX_EOF) return 0;
397 } while (!currIsNewline(ls)); 392 } while (!lex_iseol(ls));
398 inclinenumber(ls); 393 lex_newline(ls);
399 header = 1; 394 header = 1;
400 } 395 }
401 if (ls->current == LUA_SIGNATURE[0]) { /* Bytecode dump. */ 396 if (ls->c == LUA_SIGNATURE[0]) { /* Bytecode dump. */
402 if (header) { 397 if (header) {
403 /* 398 /*
404 ** Loading bytecode with an extra header is disabled for security 399 ** Loading bytecode with an extra header is disabled for security
@@ -420,55 +415,60 @@ void lj_lex_cleanup(lua_State *L, LexState *ls)
420 global_State *g = G(L); 415 global_State *g = G(L);
421 lj_mem_freevec(g, ls->bcstack, ls->sizebcstack, BCInsLine); 416 lj_mem_freevec(g, ls->bcstack, ls->sizebcstack, BCInsLine);
422 lj_mem_freevec(g, ls->vstack, ls->sizevstack, VarInfo); 417 lj_mem_freevec(g, ls->vstack, ls->sizevstack, VarInfo);
423 lj_str_freebuf(g, &ls->sb); 418 lj_buf_free(g, &ls->sb);
424} 419}
425 420
421/* Return next lexical token. */
426void lj_lex_next(LexState *ls) 422void lj_lex_next(LexState *ls)
427{ 423{
428 ls->lastline = ls->linenumber; 424 ls->lastline = ls->linenumber;
429 if (LJ_LIKELY(ls->lookahead == TK_eof)) { /* No lookahead token? */ 425 if (LJ_LIKELY(ls->lookahead == TK_eof)) { /* No lookahead token? */
430 ls->token = llex(ls, &ls->tokenval); /* Get next token. */ 426 ls->tok = lex_scan(ls, &ls->tokval); /* Get next token. */
431 } else { /* Otherwise return lookahead token. */ 427 } else { /* Otherwise return lookahead token. */
432 ls->token = ls->lookahead; 428 ls->tok = ls->lookahead;
433 ls->lookahead = TK_eof; 429 ls->lookahead = TK_eof;
434 ls->tokenval = ls->lookaheadval; 430 ls->tokval = ls->lookaheadval;
435 } 431 }
436} 432}
437 433
434/* Look ahead for the next token. */
438LexToken lj_lex_lookahead(LexState *ls) 435LexToken lj_lex_lookahead(LexState *ls)
439{ 436{
440 lua_assert(ls->lookahead == TK_eof); 437 lua_assert(ls->lookahead == TK_eof);
441 ls->lookahead = llex(ls, &ls->lookaheadval); 438 ls->lookahead = lex_scan(ls, &ls->lookaheadval);
442 return ls->lookahead; 439 return ls->lookahead;
443} 440}
444 441
445const char *lj_lex_token2str(LexState *ls, LexToken token) 442/* Convert token to string. */
443const char *lj_lex_token2str(LexState *ls, LexToken tok)
446{ 444{
447 if (token > TK_OFS) 445 if (tok > TK_OFS)
448 return tokennames[token-TK_OFS-1]; 446 return tokennames[tok-TK_OFS-1];
449 else if (!lj_char_iscntrl(token)) 447 else if (!lj_char_iscntrl(tok))
450 return lj_str_pushf(ls->L, "%c", token); 448 return lj_strfmt_pushf(ls->L, "%c", tok);
451 else 449 else
452 return lj_str_pushf(ls->L, "char(%d)", token); 450 return lj_strfmt_pushf(ls->L, "char(%d)", tok);
453} 451}
454 452
455void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...) 453/* Lexer error. */
454void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...)
456{ 455{
457 const char *tok; 456 const char *tokstr;
458 va_list argp; 457 va_list argp;
459 if (token == 0) { 458 if (tok == 0) {
460 tok = NULL; 459 tokstr = NULL;
461 } else if (token == TK_name || token == TK_string || token == TK_number) { 460 } else if (tok == TK_name || tok == TK_string || tok == TK_number) {
462 save(ls, '\0'); 461 lex_save(ls, '\0');
463 tok = ls->sb.buf; 462 tokstr = sbufB(&ls->sb);
464 } else { 463 } else {
465 tok = lj_lex_token2str(ls, token); 464 tokstr = lj_lex_token2str(ls, tok);
466 } 465 }
467 va_start(argp, em); 466 va_start(argp, em);
468 lj_err_lex(ls->L, ls->chunkname, tok, ls->linenumber, em, argp); 467 lj_err_lex(ls->L, ls->chunkname, tokstr, ls->linenumber, em, argp);
469 va_end(argp); 468 va_end(argp);
470} 469}
471 470
471/* Initialize strings for reserved words. */
472void lj_lex_init(lua_State *L) 472void lj_lex_init(lua_State *L)
473{ 473{
474 uint32_t i; 474 uint32_t i;
diff --git a/src/lj_lex.h b/src/lj_lex.h
index 6e18e4b0..3e76e72a 100644
--- a/src/lj_lex.h
+++ b/src/lj_lex.h
@@ -30,7 +30,8 @@ TKDEF(TKENUM1, TKENUM2)
30 TK_RESERVED = TK_while - TK_OFS 30 TK_RESERVED = TK_while - TK_OFS
31}; 31};
32 32
33typedef int LexToken; 33typedef int LexChar; /* Lexical character. Unsigned ext. from char. */
34typedef int LexToken; /* Lexical token. */
34 35
35/* Combined bytecode ins/line. Only used during bytecode generation. */ 36/* Combined bytecode ins/line. Only used during bytecode generation. */
36typedef struct BCInsLine { 37typedef struct BCInsLine {
@@ -51,13 +52,13 @@ typedef struct VarInfo {
51typedef struct LexState { 52typedef struct LexState {
52 struct FuncState *fs; /* Current FuncState. Defined in lj_parse.c. */ 53 struct FuncState *fs; /* Current FuncState. Defined in lj_parse.c. */
53 struct lua_State *L; /* Lua state. */ 54 struct lua_State *L; /* Lua state. */
54 TValue tokenval; /* Current token value. */ 55 TValue tokval; /* Current token value. */
55 TValue lookaheadval; /* Lookahead token value. */ 56 TValue lookaheadval; /* Lookahead token value. */
56 int current; /* Current character (charint). */
57 LexToken token; /* Current token. */
58 LexToken lookahead; /* Lookahead token. */
59 MSize n; /* Bytes left in input buffer. */
60 const char *p; /* Current position in input buffer. */ 57 const char *p; /* Current position in input buffer. */
58 const char *pe; /* End of input buffer. */
59 LexChar c; /* Current character. */
60 LexToken tok; /* Current token. */
61 LexToken lookahead; /* Lookahead token. */
61 SBuf sb; /* String buffer for tokens. */ 62 SBuf sb; /* String buffer for tokens. */
62 lua_Reader rfunc; /* Reader callback. */ 63 lua_Reader rfunc; /* Reader callback. */
63 void *rdata; /* Reader callback data. */ 64 void *rdata; /* Reader callback data. */
@@ -78,8 +79,8 @@ LJ_FUNC int lj_lex_setup(lua_State *L, LexState *ls);
78LJ_FUNC void lj_lex_cleanup(lua_State *L, LexState *ls); 79LJ_FUNC void lj_lex_cleanup(lua_State *L, LexState *ls);
79LJ_FUNC void lj_lex_next(LexState *ls); 80LJ_FUNC void lj_lex_next(LexState *ls);
80LJ_FUNC LexToken lj_lex_lookahead(LexState *ls); 81LJ_FUNC LexToken lj_lex_lookahead(LexState *ls);
81LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken token); 82LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken tok);
82LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...); 83LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...);
83LJ_FUNC void lj_lex_init(lua_State *L); 84LJ_FUNC void lj_lex_init(lua_State *L);
84 85
85#endif 86#endif
diff --git a/src/lj_lib.c b/src/lj_lib.c
index 331eaa6a..bdbaba1d 100644
--- a/src/lj_lib.c
+++ b/src/lj_lib.c
@@ -18,6 +18,9 @@
18#include "lj_dispatch.h" 18#include "lj_dispatch.h"
19#include "lj_vm.h" 19#include "lj_vm.h"
20#include "lj_strscan.h" 20#include "lj_strscan.h"
21#include "lj_strfmt.h"
22#include "lj_lex.h"
23#include "lj_bcdump.h"
21#include "lj_lib.h" 24#include "lj_lib.h"
22 25
23/* -- Library initialization ---------------------------------------------- */ 26/* -- Library initialization ---------------------------------------------- */
@@ -43,6 +46,28 @@ static GCtab *lib_create_table(lua_State *L, const char *libname, int hsize)
43 return tabV(L->top-1); 46 return tabV(L->top-1);
44} 47}
45 48
49static const uint8_t *lib_read_lfunc(lua_State *L, const uint8_t *p, GCtab *tab)
50{
51 int len = *p++;
52 GCstr *name = lj_str_new(L, (const char *)p, len);
53 LexState ls;
54 GCproto *pt;
55 GCfunc *fn;
56 memset(&ls, 0, sizeof(ls));
57 ls.L = L;
58 ls.p = (const char *)(p+len);
59 ls.pe = (const char *)~(uintptr_t)0;
60 ls.c = -1;
61 ls.level = (BCDUMP_F_STRIP|(LJ_BE*BCDUMP_F_BE));
62 ls.chunkname = name;
63 pt = lj_bcread_proto(&ls);
64 pt->firstline = ~(BCLine)0;
65 fn = lj_func_newL_empty(L, pt, tabref(L->env));
66 /* NOBARRIER: See below for common barrier. */
67 setfuncV(L, lj_tab_setstr(L, tab, name), fn);
68 return (const uint8_t *)ls.p;
69}
70
46void lj_lib_register(lua_State *L, const char *libname, 71void lj_lib_register(lua_State *L, const char *libname,
47 const uint8_t *p, const lua_CFunction *cf) 72 const uint8_t *p, const lua_CFunction *cf)
48{ 73{
@@ -87,6 +112,9 @@ void lj_lib_register(lua_State *L, const char *libname,
87 ofn = fn; 112 ofn = fn;
88 } else { 113 } else {
89 switch (tag | len) { 114 switch (tag | len) {
115 case LIBINIT_LUA:
116 p = lib_read_lfunc(L, p, tab);
117 break;
90 case LIBINIT_SET: 118 case LIBINIT_SET:
91 L->top -= 2; 119 L->top -= 2;
92 if (tvisstr(L->top+1) && strV(L->top+1)->len == 0) 120 if (tvisstr(L->top+1) && strV(L->top+1)->len == 0)
@@ -137,7 +165,7 @@ GCstr *lj_lib_checkstr(lua_State *L, int narg)
137 if (LJ_LIKELY(tvisstr(o))) { 165 if (LJ_LIKELY(tvisstr(o))) {
138 return strV(o); 166 return strV(o);
139 } else if (tvisnumber(o)) { 167 } else if (tvisnumber(o)) {
140 GCstr *s = lj_str_fromnumber(L, o); 168 GCstr *s = lj_strfmt_number(L, o);
141 setstrV(L, o, s); 169 setstrV(L, o, s);
142 return s; 170 return s;
143 } 171 }
@@ -196,20 +224,6 @@ int32_t lj_lib_optint(lua_State *L, int narg, int32_t def)
196 return (o < L->top && !tvisnil(o)) ? lj_lib_checkint(L, narg) : def; 224 return (o < L->top && !tvisnil(o)) ? lj_lib_checkint(L, narg) : def;
197} 225}
198 226
199int32_t lj_lib_checkbit(lua_State *L, int narg)
200{
201 TValue *o = L->base + narg-1;
202 if (!(o < L->top && lj_strscan_numberobj(o)))
203 lj_err_argt(L, narg, LUA_TNUMBER);
204 if (LJ_LIKELY(tvisint(o))) {
205 return intV(o);
206 } else {
207 int32_t i = lj_num2bit(numV(o));
208 if (LJ_DUALNUM) setintV(o, i);
209 return i;
210 }
211}
212
213GCfunc *lj_lib_checkfunc(lua_State *L, int narg) 227GCfunc *lj_lib_checkfunc(lua_State *L, int narg)
214{ 228{
215 TValue *o = L->base + narg-1; 229 TValue *o = L->base + narg-1;
diff --git a/src/lj_lib.h b/src/lj_lib.h
index 2fe6d2a8..2dd45adb 100644
--- a/src/lj_lib.h
+++ b/src/lj_lib.h
@@ -41,7 +41,6 @@ LJ_FUNC void lj_lib_checknumber(lua_State *L, int narg);
41LJ_FUNC lua_Number lj_lib_checknum(lua_State *L, int narg); 41LJ_FUNC lua_Number lj_lib_checknum(lua_State *L, int narg);
42LJ_FUNC int32_t lj_lib_checkint(lua_State *L, int narg); 42LJ_FUNC int32_t lj_lib_checkint(lua_State *L, int narg);
43LJ_FUNC int32_t lj_lib_optint(lua_State *L, int narg, int32_t def); 43LJ_FUNC int32_t lj_lib_optint(lua_State *L, int narg, int32_t def);
44LJ_FUNC int32_t lj_lib_checkbit(lua_State *L, int narg);
45LJ_FUNC GCfunc *lj_lib_checkfunc(lua_State *L, int narg); 44LJ_FUNC GCfunc *lj_lib_checkfunc(lua_State *L, int narg);
46LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg); 45LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg);
47LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg); 46LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg);
@@ -77,6 +76,7 @@ static LJ_AINLINE void lj_lib_pushcc(lua_State *L, lua_CFunction f,
77#define LJLIB_CF(name) static int lj_cf_##name(lua_State *L) 76#define LJLIB_CF(name) static int lj_cf_##name(lua_State *L)
78#define LJLIB_ASM(name) static int lj_ffh_##name(lua_State *L) 77#define LJLIB_ASM(name) static int lj_ffh_##name(lua_State *L)
79#define LJLIB_ASM_(name) 78#define LJLIB_ASM_(name)
79#define LJLIB_LUA(name)
80#define LJLIB_SET(name) 80#define LJLIB_SET(name)
81#define LJLIB_PUSH(arg) 81#define LJLIB_PUSH(arg)
82#define LJLIB_REC(handler) 82#define LJLIB_REC(handler)
@@ -96,7 +96,8 @@ LJ_FUNC void lj_lib_register(lua_State *L, const char *libname,
96#define LIBINIT_ASM 0x40 96#define LIBINIT_ASM 0x40
97#define LIBINIT_ASM_ 0x80 97#define LIBINIT_ASM_ 0x80
98#define LIBINIT_STRING 0xc0 98#define LIBINIT_STRING 0xc0
99#define LIBINIT_MAXSTR 0x39 99#define LIBINIT_MAXSTR 0x38
100#define LIBINIT_LUA 0xf9
100#define LIBINIT_SET 0xfa 101#define LIBINIT_SET 0xfa
101#define LIBINIT_NUMBER 0xfb 102#define LIBINIT_NUMBER 0xfb
102#define LIBINIT_COPY 0xfc 103#define LIBINIT_COPY 0xfc
diff --git a/src/lj_load.c b/src/lj_load.c
index 9d892678..37dde79f 100644
--- a/src/lj_load.c
+++ b/src/lj_load.c
@@ -15,7 +15,7 @@
15#include "lj_obj.h" 15#include "lj_obj.h"
16#include "lj_gc.h" 16#include "lj_gc.h"
17#include "lj_err.h" 17#include "lj_err.h"
18#include "lj_str.h" 18#include "lj_buf.h"
19#include "lj_func.h" 19#include "lj_func.h"
20#include "lj_frame.h" 20#include "lj_frame.h"
21#include "lj_vm.h" 21#include "lj_vm.h"
@@ -54,7 +54,7 @@ LUA_API int lua_loadx(lua_State *L, lua_Reader reader, void *data,
54 ls.rdata = data; 54 ls.rdata = data;
55 ls.chunkarg = chunkname ? chunkname : "?"; 55 ls.chunkarg = chunkname ? chunkname : "?";
56 ls.mode = mode; 56 ls.mode = mode;
57 lj_str_initbuf(&ls.sb); 57 lj_buf_init(L, &ls.sb);
58 status = lj_vm_cpcall(L, NULL, &ls, cpparser); 58 status = lj_vm_cpcall(L, NULL, &ls, cpparser);
59 lj_lex_cleanup(L, &ls); 59 lj_lex_cleanup(L, &ls);
60 lj_gc_check(L); 60 lj_gc_check(L);
diff --git a/src/lj_meta.c b/src/lj_meta.c
index 441d571a..6cbfd927 100644
--- a/src/lj_meta.c
+++ b/src/lj_meta.c
@@ -12,6 +12,7 @@
12#include "lj_obj.h" 12#include "lj_obj.h"
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_buf.h"
15#include "lj_str.h" 16#include "lj_str.h"
16#include "lj_tab.h" 17#include "lj_tab.h"
17#include "lj_meta.h" 18#include "lj_meta.h"
@@ -19,6 +20,8 @@
19#include "lj_bc.h" 20#include "lj_bc.h"
20#include "lj_vm.h" 21#include "lj_vm.h"
21#include "lj_strscan.h" 22#include "lj_strscan.h"
23#include "lj_strfmt.h"
24#include "lj_lib.h"
22 25
23/* -- Metamethod handling ------------------------------------------------- */ 26/* -- Metamethod handling ------------------------------------------------- */
24 27
@@ -225,27 +228,14 @@ TValue *lj_meta_arith(lua_State *L, TValue *ra, cTValue *rb, cTValue *rc,
225 } 228 }
226} 229}
227 230
228/* In-place coercion of a number to a string. */
229static LJ_AINLINE int tostring(lua_State *L, TValue *o)
230{
231 if (tvisstr(o)) {
232 return 1;
233 } else if (tvisnumber(o)) {
234 setstrV(L, o, lj_str_fromnumber(L, o));
235 return 1;
236 } else {
237 return 0;
238 }
239}
240
241/* Helper for CAT. Coercion, iterative concat, __concat metamethod. */ 231/* Helper for CAT. Coercion, iterative concat, __concat metamethod. */
242TValue *lj_meta_cat(lua_State *L, TValue *top, int left) 232TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
243{ 233{
244 int fromc = 0; 234 int fromc = 0;
245 if (left < 0) { left = -left; fromc = 1; } 235 if (left < 0) { left = -left; fromc = 1; }
246 do { 236 do {
247 int n = 1; 237 if (!(tvisstr(top) || tvisnumber(top)) ||
248 if (!(tvisstr(top-1) || tvisnumber(top-1)) || !tostring(L, top)) { 238 !(tvisstr(top-1) || tvisnumber(top-1))) {
249 cTValue *mo = lj_meta_lookup(L, top-1, MM_concat); 239 cTValue *mo = lj_meta_lookup(L, top-1, MM_concat);
250 if (tvisnil(mo)) { 240 if (tvisnil(mo)) {
251 mo = lj_meta_lookup(L, top, MM_concat); 241 mo = lj_meta_lookup(L, top, MM_concat);
@@ -271,8 +261,6 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
271 copyTV(L, top, mo); 261 copyTV(L, top, mo);
272 setcont(top-1, lj_cont_cat); 262 setcont(top-1, lj_cont_cat);
273 return top+1; /* Trigger metamethod call. */ 263 return top+1; /* Trigger metamethod call. */
274 } else if (strV(top)->len == 0) { /* Shortcut. */
275 (void)tostring(L, top-1);
276 } else { 264 } else {
277 /* Pick as many strings as possible from the top and concatenate them: 265 /* Pick as many strings as possible from the top and concatenate them:
278 ** 266 **
@@ -281,27 +269,28 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
281 ** concat: [...][CAT stack ...] [result] 269 ** concat: [...][CAT stack ...] [result]
282 ** next step: [...][CAT stack ............] 270 ** next step: [...][CAT stack ............]
283 */ 271 */
284 MSize tlen = strV(top)->len; 272 TValue *e, *o = top;
285 char *buffer; 273 uint64_t tlen = tvisstr(o) ? strV(o)->len : STRFMT_MAXBUF_NUM;
286 int i; 274 char *p, *buf;
287 for (n = 1; n <= left && tostring(L, top-n); n++) { 275 do {
288 MSize len = strV(top-n)->len; 276 o--; tlen += tvisstr(o) ? strV(o)->len : STRFMT_MAXBUF_NUM;
289 if (len >= LJ_MAX_STR - tlen) 277 } while (--left > 0 && (tvisstr(o-1) || tvisnumber(o-1)));
290 lj_err_msg(L, LJ_ERR_STROV); 278 if (tlen >= LJ_MAX_STR) lj_err_msg(L, LJ_ERR_STROV);
291 tlen += len; 279 p = buf = lj_buf_tmp(L, (MSize)tlen);
292 } 280 for (e = top, top = o; o <= e; o++) {
293 buffer = lj_str_needbuf(L, &G(L)->tmpbuf, tlen); 281 if (tvisstr(o)) {
294 n--; 282 GCstr *s = strV(o);
295 tlen = 0; 283 MSize len = s->len;
296 for (i = n; i >= 0; i--) { 284 p = lj_buf_wmem(p, strdata(s), len);
297 MSize len = strV(top-i)->len; 285 } else if (tvisint(o)) {
298 memcpy(buffer + tlen, strVdata(top-i), len); 286 p = lj_strfmt_wint(p, intV(o));
299 tlen += len; 287 } else {
288 lua_assert(tvisnum(o));
289 p = lj_strfmt_wnum(p, o);
290 }
300 } 291 }
301 setstrV(L, top-n, lj_str_new(L, buffer, tlen)); 292 setstrV(L, top, lj_str_new(L, buf, (size_t)(p-buf)));
302 } 293 }
303 left -= n;
304 top -= n;
305 } while (left >= 1); 294 } while (left >= 1);
306 if (LJ_UNLIKELY(G(L)->gc.total >= G(L)->gc.threshold)) { 295 if (LJ_UNLIKELY(G(L)->gc.total >= G(L)->gc.threshold)) {
307 if (!fromc) L->top = curr_topL(L); 296 if (!fromc) L->top = curr_topL(L);
@@ -423,6 +412,18 @@ TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op)
423 } 412 }
424} 413}
425 414
415/* Helper for ISTYPE and ISNUM. Implicit coercion or error. */
416void lj_meta_istype(lua_State *L, BCReg ra, BCReg tp)
417{
418 L->top = curr_topL(L);
419 ra++; tp--;
420 lua_assert(LJ_DUALNUM || tp != ~LJ_TNUMX); /* ISTYPE -> ISNUM broken. */
421 if (LJ_DUALNUM && tp == ~LJ_TNUMX) lj_lib_checkint(L, ra);
422 else if (tp == ~LJ_TNUMX+1) lj_lib_checknum(L, ra);
423 else if (tp == ~LJ_TSTR) lj_lib_checkstr(L, ra);
424 else lj_err_argtype(L, ra, lj_obj_itypename[tp]);
425}
426
426/* Helper for calls. __call metamethod. */ 427/* Helper for calls. __call metamethod. */
427void lj_meta_call(lua_State *L, TValue *func, TValue *top) 428void lj_meta_call(lua_State *L, TValue *func, TValue *top)
428{ 429{
diff --git a/src/lj_meta.h b/src/lj_meta.h
index 6af5e514..970398ec 100644
--- a/src/lj_meta.h
+++ b/src/lj_meta.h
@@ -31,6 +31,7 @@ LJ_FUNCA TValue * LJ_FASTCALL lj_meta_len(lua_State *L, cTValue *o);
31LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne); 31LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne);
32LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins); 32LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins);
33LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op); 33LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op);
34LJ_FUNCA void lj_meta_istype(lua_State *L, BCReg ra, BCReg tp);
34LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top); 35LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top);
35LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o); 36LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o);
36 37
diff --git a/src/lj_obj.c b/src/lj_obj.c
index 322b7bec..208e4955 100644
--- a/src/lj_obj.c
+++ b/src/lj_obj.c
@@ -20,7 +20,7 @@ LJ_DATADEF const char *const lj_obj_itypename[] = { /* ORDER LJ_T */
20}; 20};
21 21
22/* Compare two objects without calling metamethods. */ 22/* Compare two objects without calling metamethods. */
23int lj_obj_equal(cTValue *o1, cTValue *o2) 23int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2)
24{ 24{
25 if (itype(o1) == itype(o2)) { 25 if (itype(o1) == itype(o2)) {
26 if (tvispri(o1)) 26 if (tvispri(o1))
@@ -33,3 +33,18 @@ int lj_obj_equal(cTValue *o1, cTValue *o2)
33 return numberVnum(o1) == numberVnum(o2); 33 return numberVnum(o1) == numberVnum(o2);
34} 34}
35 35
36/* Return pointer to object or its object data. */
37const void * LJ_FASTCALL lj_obj_ptr(cTValue *o)
38{
39 if (tvisudata(o))
40 return uddata(udataV(o));
41 else if (tvislightud(o))
42 return lightudV(o);
43 else if (LJ_HASFFI && tviscdata(o))
44 return cdataptr(cdataV(o));
45 else if (tvisgcv(o))
46 return gcV(o);
47 else
48 return NULL;
49}
50
diff --git a/src/lj_obj.h b/src/lj_obj.h
index b967819d..5a05f38d 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -119,11 +119,12 @@ typedef int32_t BCLine; /* Bytecode line number. */
119/* Internal assembler functions. Never call these directly from C. */ 119/* Internal assembler functions. Never call these directly from C. */
120typedef void (*ASMFunction)(void); 120typedef void (*ASMFunction)(void);
121 121
122/* Resizable string buffer. Need this here, details in lj_str.h. */ 122/* Resizable string buffer. Need this here, details in lj_buf.h. */
123typedef struct SBuf { 123typedef struct SBuf {
124 char *buf; /* String buffer base. */ 124 MRef p; /* String buffer pointer. */
125 MSize n; /* String buffer length. */ 125 MRef e; /* String buffer end pointer. */
126 MSize sz; /* String buffer size. */ 126 MRef b; /* String buffer base. */
127 MRef L; /* lua_State, used for buffer resizing. */
127} SBuf; 128} SBuf;
128 129
129/* -- Tags and values ----------------------------------------------------- */ 130/* -- Tags and values ----------------------------------------------------- */
@@ -516,8 +517,8 @@ typedef struct global_State {
516 lua_Alloc allocf; /* Memory allocator. */ 517 lua_Alloc allocf; /* Memory allocator. */
517 void *allocd; /* Memory allocator data. */ 518 void *allocd; /* Memory allocator data. */
518 GCState gc; /* Garbage collector. */ 519 GCState gc; /* Garbage collector. */
519 SBuf tmpbuf; /* Temporary buffer for string concatenation. */ 520 volatile int32_t vmstate; /* VM state or current JIT code trace number. */
520 Node nilnode; /* Fallback 1-element hash part (nil key and value). */ 521 SBuf tmpbuf; /* Temporary string buffer. */
521 GCstr strempty; /* Empty string. */ 522 GCstr strempty; /* Empty string. */
522 uint8_t stremptyz; /* Zero terminator of empty string. */ 523 uint8_t stremptyz; /* Zero terminator of empty string. */
523 uint8_t hookmask; /* Hook mask. */ 524 uint8_t hookmask; /* Hook mask. */
@@ -526,13 +527,13 @@ typedef struct global_State {
526 GCRef mainthref; /* Link to main thread. */ 527 GCRef mainthref; /* Link to main thread. */
527 TValue registrytv; /* Anchor for registry. */ 528 TValue registrytv; /* Anchor for registry. */
528 TValue tmptv, tmptv2; /* Temporary TValues. */ 529 TValue tmptv, tmptv2; /* Temporary TValues. */
530 Node nilnode; /* Fallback 1-element hash part (nil key and value). */
529 GCupval uvhead; /* Head of double-linked list of all open upvalues. */ 531 GCupval uvhead; /* Head of double-linked list of all open upvalues. */
530 int32_t hookcount; /* Instruction hook countdown. */ 532 int32_t hookcount; /* Instruction hook countdown. */
531 int32_t hookcstart; /* Start count for instruction hook counter. */ 533 int32_t hookcstart; /* Start count for instruction hook counter. */
532 lua_Hook hookf; /* Hook function. */ 534 lua_Hook hookf; /* Hook function. */
533 lua_CFunction wrapf; /* Wrapper for C function calls. */ 535 lua_CFunction wrapf; /* Wrapper for C function calls. */
534 lua_CFunction panic; /* Called as a last resort for errors. */ 536 lua_CFunction panic; /* Called as a last resort for errors. */
535 volatile int32_t vmstate; /* VM state or current JIT code trace number. */
536 BCIns bc_cfunc_int; /* Bytecode for internal C function calls. */ 537 BCIns bc_cfunc_int; /* Bytecode for internal C function calls. */
537 BCIns bc_cfunc_ext; /* Bytecode for external C function calls. */ 538 BCIns bc_cfunc_ext; /* Bytecode for external C function calls. */
538 GCRef jit_L; /* Current JIT code lua_State or NULL. */ 539 GCRef jit_L; /* Current JIT code lua_State or NULL. */
@@ -810,11 +811,7 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
810#endif 811#endif
811} 812}
812 813
813#if LJ_TARGET_X86 && !defined(__SSE2__)
814#define lj_num2int(n) lj_num2bit((n))
815#else
816#define lj_num2int(n) ((int32_t)(n)) 814#define lj_num2int(n) ((int32_t)(n))
817#endif
818 815
819static LJ_AINLINE uint64_t lj_num2u64(lua_Number n) 816static LJ_AINLINE uint64_t lj_num2u64(lua_Number n)
820{ 817{
@@ -851,6 +848,7 @@ LJ_DATA const char *const lj_obj_itypename[~LJ_TNUMX+1];
851#define lj_typename(o) (lj_obj_itypename[itypemap(o)]) 848#define lj_typename(o) (lj_obj_itypename[itypemap(o)])
852 849
853/* Compare two objects without calling metamethods. */ 850/* Compare two objects without calling metamethods. */
854LJ_FUNC int lj_obj_equal(cTValue *o1, cTValue *o2); 851LJ_FUNC int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2);
852LJ_FUNC const void * LJ_FASTCALL lj_obj_ptr(cTValue *o);
855 853
856#endif 854#endif
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index fe37b98a..1d37a7fd 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -14,18 +14,21 @@
14 14
15#if LJ_HASJIT 15#if LJ_HASJIT
16 16
17#include "lj_buf.h"
17#include "lj_str.h" 18#include "lj_str.h"
18#include "lj_tab.h" 19#include "lj_tab.h"
19#include "lj_ir.h" 20#include "lj_ir.h"
20#include "lj_jit.h" 21#include "lj_jit.h"
22#include "lj_ircall.h"
21#include "lj_iropt.h" 23#include "lj_iropt.h"
22#include "lj_trace.h" 24#include "lj_trace.h"
23#if LJ_HASFFI 25#if LJ_HASFFI
24#include "lj_ctype.h" 26#include "lj_ctype.h"
25#endif
26#include "lj_carith.h" 27#include "lj_carith.h"
28#endif
27#include "lj_vm.h" 29#include "lj_vm.h"
28#include "lj_strscan.h" 30#include "lj_strscan.h"
31#include "lj_strfmt.h"
29 32
30/* Here's a short description how the FOLD engine processes instructions: 33/* Here's a short description how the FOLD engine processes instructions:
31** 34**
@@ -155,13 +158,14 @@ typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
155 158
156/* Barrier to prevent folding across a GC step. 159/* Barrier to prevent folding across a GC step.
157** GC steps can only happen at the head of a trace and at LOOP. 160** GC steps can only happen at the head of a trace and at LOOP.
158** And the GC is only driven forward if there is at least one allocation. 161** And the GC is only driven forward if there's at least one allocation.
159*/ 162*/
160#define gcstep_barrier(J, ref) \ 163#define gcstep_barrier(J, ref) \
161 ((ref) < J->chain[IR_LOOP] && \ 164 ((ref) < J->chain[IR_LOOP] && \
162 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \ 165 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \
163 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \ 166 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
164 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR])) 167 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || \
168 J->chain[IR_BUFSTR] || J->chain[IR_TOSTR]))
165 169
166/* -- Constant folding for FP numbers ------------------------------------- */ 170/* -- Constant folding for FP numbers ------------------------------------- */
167 171
@@ -336,11 +340,9 @@ LJFOLDF(kfold_intcomp0)
336static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) 340static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op)
337{ 341{
338 switch (op) { 342 switch (op) {
339#if LJ_64 || LJ_HASFFI 343#if LJ_HASFFI
340 case IR_ADD: k1 += k2; break; 344 case IR_ADD: k1 += k2; break;
341 case IR_SUB: k1 -= k2; break; 345 case IR_SUB: k1 -= k2; break;
342#endif
343#if LJ_HASFFI
344 case IR_MUL: k1 *= k2; break; 346 case IR_MUL: k1 *= k2; break;
345 case IR_BAND: k1 &= k2; break; 347 case IR_BAND: k1 &= k2; break;
346 case IR_BOR: k1 |= k2; break; 348 case IR_BOR: k1 |= k2; break;
@@ -392,20 +394,10 @@ LJFOLD(BROL KINT64 KINT)
392LJFOLD(BROR KINT64 KINT) 394LJFOLD(BROR KINT64 KINT)
393LJFOLDF(kfold_int64shift) 395LJFOLDF(kfold_int64shift)
394{ 396{
395#if LJ_HASFFI || LJ_64 397#if LJ_HASFFI
396 uint64_t k = ir_k64(fleft)->u64; 398 uint64_t k = ir_k64(fleft)->u64;
397 int32_t sh = (fright->i & 63); 399 int32_t sh = (fright->i & 63);
398 switch ((IROp)fins->o) { 400 return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL));
399 case IR_BSHL: k <<= sh; break;
400#if LJ_HASFFI
401 case IR_BSHR: k >>= sh; break;
402 case IR_BSAR: k = (uint64_t)((int64_t)k >> sh); break;
403 case IR_BROL: k = lj_rol(k, sh); break;
404 case IR_BROR: k = lj_ror(k, sh); break;
405#endif
406 default: lua_assert(0); break;
407 }
408 return INT64FOLD(k);
409#else 401#else
410 UNUSED(J); lua_assert(0); return FAILFOLD; 402 UNUSED(J); lua_assert(0); return FAILFOLD;
411#endif 403#endif
@@ -527,6 +519,179 @@ LJFOLDF(kfold_strcmp)
527 return NEXTFOLD; 519 return NEXTFOLD;
528} 520}
529 521
522/* -- Constant folding and forwarding for buffers ------------------------- */
523
524/*
525** Buffer ops perform stores, but their effect is limited to the buffer
526** itself. Also, buffer ops are chained: a use of an op implies a use of
527** all other ops up the chain. Conversely, if an op is unused, all ops
528** up the chain can go unsed. This largely eliminates the need to treat
529** them as stores.
530**
531** Alas, treating them as normal (IRM_N) ops doesn't work, because they
532** cannot be CSEd in isolation. CSE for IRM_N is implicitly done in LOOP
533** or if FOLD is disabled.
534**
535** The compromise is to declare them as loads, emit them like stores and
536** CSE whole chains manually when the BUFSTR is to be emitted. Any chain
537** fragments left over from CSE are eliminated by DCE.
538*/
539
540/* BUFHDR is emitted like a store, see below. */
541
542LJFOLD(BUFPUT BUFHDR BUFSTR)
543LJFOLDF(bufput_append)
544{
545 /* New buffer, no other buffer op inbetween and same buffer? */
546 if ((J->flags & JIT_F_OPT_FWD) &&
547 !(fleft->op2 & IRBUFHDR_APPEND) &&
548 fleft->prev == fright->op2 &&
549 fleft->op1 == IR(fright->op2)->op1) {
550 IRRef ref = fins->op1;
551 IR(ref)->op2 = (fleft->op2 | IRBUFHDR_APPEND); /* Modify BUFHDR. */
552 IR(ref)->op1 = fright->op1;
553 return ref;
554 }
555 return EMITFOLD; /* Always emit, CSE later. */
556}
557
558LJFOLD(BUFPUT any any)
559LJFOLDF(bufput_kgc)
560{
561 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fright->o == IR_KGC) {
562 GCstr *s2 = ir_kstr(fright);
563 if (s2->len == 0) { /* Empty string? */
564 return LEFTFOLD;
565 } else {
566 if (fleft->o == IR_BUFPUT && irref_isk(fleft->op2) &&
567 !irt_isphi(fleft->t)) { /* Join two constant string puts in a row. */
568 GCstr *s1 = ir_kstr(IR(fleft->op2));
569 IRRef kref = lj_ir_kstr(J, lj_buf_cat2str(J->L, s1, s2));
570 /* lj_ir_kstr() may realloc the IR and invalidates any IRIns *. */
571 IR(fins->op1)->op2 = kref; /* Modify previous BUFPUT. */
572 return fins->op1;
573 }
574 }
575 }
576 return EMITFOLD; /* Always emit, CSE later. */
577}
578
579LJFOLD(BUFSTR any any)
580LJFOLDF(bufstr_kfold_cse)
581{
582 lua_assert(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT ||
583 fleft->o == IR_CALLL);
584 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
585 if (fleft->o == IR_BUFHDR) { /* No put operations? */
586 if (!(fleft->op2 & IRBUFHDR_APPEND)) /* Empty buffer? */
587 return lj_ir_kstr(J, &J2G(J)->strempty);
588 fins->op1 = fleft->prev; /* Relies on checks in bufput_append. */
589 return CSEFOLD;
590 } else if (fleft->o == IR_BUFPUT) {
591 IRIns *irb = IR(fleft->op1);
592 if (irb->o == IR_BUFHDR && !(irb->op2 & IRBUFHDR_APPEND))
593 return fleft->op2; /* Shortcut for a single put operation. */
594 }
595 }
596 /* Try to CSE the whole chain. */
597 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
598 IRRef ref = J->chain[IR_BUFSTR];
599 while (ref) {
600 IRIns *irs = IR(ref), *ira = fleft, *irb = IR(irs->op1);
601 while (ira->o == irb->o && ira->op2 == irb->op2) {
602 lua_assert(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT ||
603 ira->o == IR_CALLL || ira->o == IR_CARG);
604 if (ira->o == IR_BUFHDR && !(ira->op2 & IRBUFHDR_APPEND))
605 return ref; /* CSE succeeded. */
606 if (ira->o == IR_CALLL && ira->op2 == IRCALL_lj_buf_puttab)
607 break;
608 ira = IR(ira->op1);
609 irb = IR(irb->op1);
610 }
611 ref = irs->prev;
612 }
613 }
614 return EMITFOLD; /* No CSE possible. */
615}
616
617LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_reverse)
618LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_upper)
619LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_lower)
620LJFOLD(CALLL CARG IRCALL_lj_strfmt_putquoted)
621LJFOLDF(bufput_kfold_op)
622{
623 if (irref_isk(fleft->op2)) {
624 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
625 SBuf *sb = lj_buf_tmp_(J->L);
626 sb = ((SBuf * (LJ_FASTCALL *)(SBuf *, GCstr *))ci->func)(sb,
627 ir_kstr(IR(fleft->op2)));
628 fins->o = IR_BUFPUT;
629 fins->op1 = fleft->op1;
630 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
631 return RETRYFOLD;
632 }
633 return EMITFOLD; /* Always emit, CSE later. */
634}
635
636LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_rep)
637LJFOLDF(bufput_kfold_rep)
638{
639 if (irref_isk(fleft->op2)) {
640 IRIns *irc = IR(fleft->op1);
641 if (irref_isk(irc->op2)) {
642 SBuf *sb = lj_buf_tmp_(J->L);
643 sb = lj_buf_putstr_rep(sb, ir_kstr(IR(irc->op2)), IR(fleft->op2)->i);
644 fins->o = IR_BUFPUT;
645 fins->op1 = irc->op1;
646 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
647 return RETRYFOLD;
648 }
649 }
650 return EMITFOLD; /* Always emit, CSE later. */
651}
652
653LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfxint)
654LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_int)
655LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_uint)
656LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum)
657LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfstr)
658LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfchar)
659LJFOLDF(bufput_kfold_fmt)
660{
661 IRIns *irc = IR(fleft->op1);
662 lua_assert(irref_isk(irc->op2)); /* SFormat must be const. */
663 if (irref_isk(fleft->op2)) {
664 SFormat sf = (SFormat)IR(irc->op2)->i;
665 IRIns *ira = IR(fleft->op2);
666 SBuf *sb = lj_buf_tmp_(J->L);
667 switch (fins->op2) {
668 case IRCALL_lj_strfmt_putfxint:
669 sb = lj_strfmt_putfxint(sb, sf, ir_k64(ira)->u64);
670 break;
671 case IRCALL_lj_strfmt_putfstr:
672 sb = lj_strfmt_putfstr(sb, sf, ir_kstr(ira));
673 break;
674 case IRCALL_lj_strfmt_putfchar:
675 sb = lj_strfmt_putfchar(sb, sf, ira->i);
676 break;
677 case IRCALL_lj_strfmt_putfnum_int:
678 case IRCALL_lj_strfmt_putfnum_uint:
679 case IRCALL_lj_strfmt_putfnum:
680 default: {
681 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
682 sb = ((SBuf * (*)(SBuf *, SFormat, lua_Number))ci->func)(sb, sf,
683 ir_knum(ira)->n);
684 break;
685 }
686 }
687 fins->o = IR_BUFPUT;
688 fins->op1 = irc->op1;
689 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
690 return RETRYFOLD;
691 }
692 return EMITFOLD; /* Always emit, CSE later. */
693}
694
530/* -- Constant folding of pointer arithmetic ------------------------------ */ 695/* -- Constant folding of pointer arithmetic ------------------------------ */
531 696
532LJFOLD(ADD KGC KINT) 697LJFOLD(ADD KGC KINT)
@@ -647,27 +812,22 @@ LJFOLD(CONV KNUM IRCONV_INT_NUM)
647LJFOLDF(kfold_conv_knum_int_num) 812LJFOLDF(kfold_conv_knum_int_num)
648{ 813{
649 lua_Number n = knumleft; 814 lua_Number n = knumleft;
650 if (!(fins->op2 & IRCONV_TRUNC)) { 815 int32_t k = lj_num2int(n);
651 int32_t k = lj_num2int(n); 816 if (irt_isguard(fins->t) && n != (lua_Number)k) {
652 if (irt_isguard(fins->t) && n != (lua_Number)k) { 817 /* We're about to create a guard which always fails, like CONV +1.5.
653 /* We're about to create a guard which always fails, like CONV +1.5. 818 ** Some pathological loops cause this during LICM, e.g.:
654 ** Some pathological loops cause this during LICM, e.g.: 819 ** local x,k,t = 0,1.5,{1,[1.5]=2}
655 ** local x,k,t = 0,1.5,{1,[1.5]=2} 820 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
656 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end 821 ** assert(x == 300)
657 ** assert(x == 300) 822 */
658 */ 823 return FAILFOLD;
659 return FAILFOLD;
660 }
661 return INTFOLD(k);
662 } else {
663 return INTFOLD((int32_t)n);
664 } 824 }
825 return INTFOLD(k);
665} 826}
666 827
667LJFOLD(CONV KNUM IRCONV_U32_NUM) 828LJFOLD(CONV KNUM IRCONV_U32_NUM)
668LJFOLDF(kfold_conv_knum_u32_num) 829LJFOLDF(kfold_conv_knum_u32_num)
669{ 830{
670 lua_assert((fins->op2 & IRCONV_TRUNC));
671#ifdef _MSC_VER 831#ifdef _MSC_VER
672 { /* Workaround for MSVC bug. */ 832 { /* Workaround for MSVC bug. */
673 volatile uint32_t u = (uint32_t)knumleft; 833 volatile uint32_t u = (uint32_t)knumleft;
@@ -681,27 +841,27 @@ LJFOLDF(kfold_conv_knum_u32_num)
681LJFOLD(CONV KNUM IRCONV_I64_NUM) 841LJFOLD(CONV KNUM IRCONV_I64_NUM)
682LJFOLDF(kfold_conv_knum_i64_num) 842LJFOLDF(kfold_conv_knum_i64_num)
683{ 843{
684 lua_assert((fins->op2 & IRCONV_TRUNC));
685 return INT64FOLD((uint64_t)(int64_t)knumleft); 844 return INT64FOLD((uint64_t)(int64_t)knumleft);
686} 845}
687 846
688LJFOLD(CONV KNUM IRCONV_U64_NUM) 847LJFOLD(CONV KNUM IRCONV_U64_NUM)
689LJFOLDF(kfold_conv_knum_u64_num) 848LJFOLDF(kfold_conv_knum_u64_num)
690{ 849{
691 lua_assert((fins->op2 & IRCONV_TRUNC));
692 return INT64FOLD(lj_num2u64(knumleft)); 850 return INT64FOLD(lj_num2u64(knumleft));
693} 851}
694 852
695LJFOLD(TOSTR KNUM) 853LJFOLD(TOSTR KNUM any)
696LJFOLDF(kfold_tostr_knum) 854LJFOLDF(kfold_tostr_knum)
697{ 855{
698 return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft)); 856 return lj_ir_kstr(J, lj_strfmt_num(J->L, ir_knum(fleft)));
699} 857}
700 858
701LJFOLD(TOSTR KINT) 859LJFOLD(TOSTR KINT any)
702LJFOLDF(kfold_tostr_kint) 860LJFOLDF(kfold_tostr_kint)
703{ 861{
704 return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i)); 862 return lj_ir_kstr(J, fins->op2 == IRTOSTR_INT ?
863 lj_strfmt_int(J->L, fleft->i) :
864 lj_strfmt_char(J->L, fleft->i));
705} 865}
706 866
707LJFOLD(STRTO KGC) 867LJFOLD(STRTO KGC)
@@ -1199,7 +1359,9 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k)
1199 ** But this is mainly intended for simple address arithmetic. 1359 ** But this is mainly intended for simple address arithmetic.
1200 ** Also it's easier for the backend to optimize the original multiplies. 1360 ** Also it's easier for the backend to optimize the original multiplies.
1201 */ 1361 */
1202 if (k == 1) { /* i * 1 ==> i */ 1362 if (k == 0) { /* i * 0 ==> 0 */
1363 return RIGHTFOLD;
1364 } else if (k == 1) { /* i * 1 ==> i */
1203 return LEFTFOLD; 1365 return LEFTFOLD;
1204 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */ 1366 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */
1205 fins->o = IR_BSHL; 1367 fins->o = IR_BSHL;
@@ -1212,9 +1374,7 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k)
1212LJFOLD(MUL any KINT) 1374LJFOLD(MUL any KINT)
1213LJFOLDF(simplify_intmul_k32) 1375LJFOLDF(simplify_intmul_k32)
1214{ 1376{
1215 if (fright->i == 0) /* i * 0 ==> 0 */ 1377 if (fright->i >= 0)
1216 return INTFOLD(0);
1217 else if (fright->i > 0)
1218 return simplify_intmul_k(J, fright->i); 1378 return simplify_intmul_k(J, fright->i);
1219 return NEXTFOLD; 1379 return NEXTFOLD;
1220} 1380}
@@ -1222,14 +1382,13 @@ LJFOLDF(simplify_intmul_k32)
1222LJFOLD(MUL any KINT64) 1382LJFOLD(MUL any KINT64)
1223LJFOLDF(simplify_intmul_k64) 1383LJFOLDF(simplify_intmul_k64)
1224{ 1384{
1225 if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */ 1385#if LJ_HASFFI
1226 return INT64FOLD(0); 1386 if (ir_kint64(fright)->u64 < 0x80000000u)
1227#if LJ_64
1228 /* NYI: SPLIT for BSHL and 32 bit backend support. */
1229 else if (ir_kint64(fright)->u64 < 0x80000000u)
1230 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64); 1387 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
1231#endif
1232 return NEXTFOLD; 1388 return NEXTFOLD;
1389#else
1390 UNUSED(J); lua_assert(0); return FAILFOLD;
1391#endif
1233} 1392}
1234 1393
1235LJFOLD(MOD any KINT) 1394LJFOLD(MOD any KINT)
@@ -1529,7 +1688,7 @@ LJFOLD(BOR BOR KINT64)
1529LJFOLD(BXOR BXOR KINT64) 1688LJFOLD(BXOR BXOR KINT64)
1530LJFOLDF(reassoc_intarith_k64) 1689LJFOLDF(reassoc_intarith_k64)
1531{ 1690{
1532#if LJ_HASFFI || LJ_64 1691#if LJ_HASFFI
1533 IRIns *irk = IR(fleft->op2); 1692 IRIns *irk = IR(fleft->op2);
1534 if (irk->o == IR_KINT64) { 1693 if (irk->o == IR_KINT64) {
1535 uint64_t k = kfold_int64arith(ir_k64(irk)->u64, 1694 uint64_t k = kfold_int64arith(ir_k64(irk)->u64,
@@ -2007,6 +2166,14 @@ LJFOLDF(fload_str_len_snew)
2007 return NEXTFOLD; 2166 return NEXTFOLD;
2008} 2167}
2009 2168
2169LJFOLD(FLOAD TOSTR IRFL_STR_LEN)
2170LJFOLDF(fload_str_len_tostr)
2171{
2172 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fleft->op2 == IRTOSTR_CHAR)
2173 return INTFOLD(1);
2174 return NEXTFOLD;
2175}
2176
2010/* The C type ID of cdata objects is immutable. */ 2177/* The C type ID of cdata objects is immutable. */
2011LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID) 2178LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID)
2012LJFOLDF(fload_cdata_typeid_kgc) 2179LJFOLDF(fload_cdata_typeid_kgc)
@@ -2149,6 +2316,7 @@ LJFOLD(TNEW any any)
2149LJFOLD(TDUP any) 2316LJFOLD(TDUP any)
2150LJFOLD(CNEW any any) 2317LJFOLD(CNEW any any)
2151LJFOLD(XSNEW any any) 2318LJFOLD(XSNEW any any)
2319LJFOLD(BUFHDR any any)
2152LJFOLDX(lj_ir_emit) 2320LJFOLDX(lj_ir_emit)
2153 2321
2154/* ------------------------------------------------------------------------ */ 2322/* ------------------------------------------------------------------------ */
diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c
index 3a119f47..9637263c 100644
--- a/src/lj_opt_loop.c
+++ b/src/lj_opt_loop.c
@@ -11,7 +11,7 @@
11#if LJ_HASJIT 11#if LJ_HASJIT
12 12
13#include "lj_err.h" 13#include "lj_err.h"
14#include "lj_str.h" 14#include "lj_buf.h"
15#include "lj_ir.h" 15#include "lj_ir.h"
16#include "lj_jit.h" 16#include "lj_jit.h"
17#include "lj_iropt.h" 17#include "lj_iropt.h"
@@ -271,8 +271,7 @@ static void loop_unroll(jit_State *J)
271 ** Caveat: don't call into the VM or run the GC or the buffer may be gone. 271 ** Caveat: don't call into the VM or run the GC or the buffer may be gone.
272 */ 272 */
273 invar = J->cur.nins; 273 invar = J->cur.nins;
274 subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, 274 subst = (IRRef1 *)lj_buf_tmp(J->L, (invar-REF_BIAS)*sizeof(IRRef1))-REF_BIAS;
275 (invar-REF_BIAS)*sizeof(IRRef1)) - REF_BIAS;
276 subst[REF_BASE] = REF_BASE; 275 subst[REF_BASE] = REF_BASE;
277 276
278 /* LOOP separates the pre-roll from the loop body. */ 277 /* LOOP separates the pre-roll from the loop body. */
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c
index caf2a8df..5d0ea9cb 100644
--- a/src/lj_opt_narrow.c
+++ b/src/lj_opt_narrow.c
@@ -496,8 +496,7 @@ TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr)
496{ 496{
497 lua_assert(tref_isnumber(tr)); 497 lua_assert(tref_isnumber(tr));
498 if (tref_isnum(tr)) 498 if (tref_isnum(tr))
499 return emitir(IRT(IR_CONV, IRT_INTP), tr, 499 return emitir(IRT(IR_CONV, IRT_INTP), tr, (IRT_INTP<<5)|IRT_NUM|IRCONV_ANY);
500 (IRT_INTP<<5)|IRT_NUM|IRCONV_TRUNC|IRCONV_ANY);
501 /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */ 500 /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */
502 return narrow_stripov(J, tr, IR_MULOV, 501 return narrow_stripov(J, tr, IR_MULOV,
503 LJ_64 ? ((IRT_INTP<<5)|IRT_INT|IRCONV_SEXT) : 502 LJ_64 ? ((IRT_INTP<<5)|IRT_INT|IRCONV_SEXT) :
diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c
index 5a8c33b9..89db0e92 100644
--- a/src/lj_opt_split.c
+++ b/src/lj_opt_split.c
@@ -11,7 +11,7 @@
11#if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) 11#if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI))
12 12
13#include "lj_err.h" 13#include "lj_err.h"
14#include "lj_str.h" 14#include "lj_buf.h"
15#include "lj_ir.h" 15#include "lj_ir.h"
16#include "lj_jit.h" 16#include "lj_jit.h"
17#include "lj_ircall.h" 17#include "lj_ircall.h"
@@ -139,6 +139,7 @@ static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir,
139 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); 139 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
140 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); 140 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
141} 141}
142#endif
142 143
143/* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */ 144/* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */
144static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir, 145static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
@@ -155,7 +156,6 @@ static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
155 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); 156 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
156 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); 157 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
157} 158}
158#endif
159 159
160/* Emit a CALLN with two split 64 bit arguments. */ 160/* Emit a CALLN with two split 64 bit arguments. */
161static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir, 161static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir,
@@ -195,6 +195,118 @@ static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref)
195 return split_emit(J, IRTI(IR_ADD), nref, lj_ir_kint(J, ofs)); 195 return split_emit(J, IRTI(IR_ADD), nref, lj_ir_kint(J, ofs));
196} 196}
197 197
198#if LJ_HASFFI
199static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst,
200 IRIns *oir, IRIns *nir, IRIns *ir)
201{
202 IROp op = ir->o;
203 IRRef kref = nir->op2;
204 if (irref_isk(kref)) { /* Optimize constant shifts. */
205 int32_t k = (IR(kref)->i & 63);
206 IRRef lo = nir->op1, hi = hisubst[ir->op1];
207 if (op == IR_BROL || op == IR_BROR) {
208 if (op == IR_BROR) k = (-k & 63);
209 if (k >= 32) { IRRef t = lo; lo = hi; hi = t; k -= 32; }
210 if (k == 0) {
211 passthrough:
212 J->cur.nins--;
213 ir->prev = lo;
214 return hi;
215 } else {
216 TRef k1, k2;
217 IRRef t1, t2, t3, t4;
218 J->cur.nins--;
219 k1 = lj_ir_kint(J, k);
220 k2 = lj_ir_kint(J, (-k & 31));
221 t1 = split_emit(J, IRTI(IR_BSHL), lo, k1);
222 t2 = split_emit(J, IRTI(IR_BSHL), hi, k1);
223 t3 = split_emit(J, IRTI(IR_BSHR), lo, k2);
224 t4 = split_emit(J, IRTI(IR_BSHR), hi, k2);
225 ir->prev = split_emit(J, IRTI(IR_BOR), t1, t4);
226 return split_emit(J, IRTI(IR_BOR), t2, t3);
227 }
228 } else if (k == 0) {
229 goto passthrough;
230 } else if (k < 32) {
231 if (op == IR_BSHL) {
232 IRRef t1 = split_emit(J, IRTI(IR_BSHL), hi, kref);
233 IRRef t2 = split_emit(J, IRTI(IR_BSHR), lo, lj_ir_kint(J, (-k&31)));
234 return split_emit(J, IRTI(IR_BOR), t1, t2);
235 } else {
236 IRRef t1 = ir->prev, t2;
237 lua_assert(op == IR_BSHR || op == IR_BSAR);
238 nir->o = IR_BSHR;
239 t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31)));
240 ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2);
241 return split_emit(J, IRTI(op), hi, kref);
242 }
243 } else {
244 if (op == IR_BSHL) {
245 if (k == 32)
246 J->cur.nins--;
247 else
248 lo = ir->prev;
249 ir->prev = lj_ir_kint(J, 0);
250 return lo;
251 } else {
252 lua_assert(op == IR_BSHR || op == IR_BSAR);
253 if (k == 32) {
254 J->cur.nins--;
255 ir->prev = hi;
256 } else {
257 nir->op1 = hi;
258 }
259 if (op == IR_BSHR)
260 return lj_ir_kint(J, 0);
261 else
262 return split_emit(J, IRTI(IR_BSAR), hi, lj_ir_kint(J, 31));
263 }
264 }
265 }
266 return split_call_li(J, hisubst, oir, ir,
267 op - IR_BSHL + IRCALL_lj_carith_shl64);
268}
269
270static IRRef split_bitop(jit_State *J, IRRef1 *hisubst,
271 IRIns *nir, IRIns *ir)
272{
273 IROp op = ir->o;
274 IRRef hi, kref = nir->op2;
275 if (irref_isk(kref)) { /* Optimize bit operations with lo constant. */
276 int32_t k = IR(kref)->i;
277 if (k == 0 || k == -1) {
278 if (op == IR_BAND) k = ~k;
279 if (k == 0) {
280 J->cur.nins--;
281 ir->prev = nir->op1;
282 } else if (op == IR_BXOR) {
283 nir->o = IR_BNOT;
284 nir->op2 = 0;
285 } else {
286 J->cur.nins--;
287 ir->prev = kref;
288 }
289 }
290 }
291 hi = hisubst[ir->op1];
292 kref = hisubst[ir->op2];
293 if (irref_isk(kref)) { /* Optimize bit operations with hi constant. */
294 int32_t k = IR(kref)->i;
295 if (k == 0 || k == -1) {
296 if (op == IR_BAND) k = ~k;
297 if (k == 0) {
298 return hi;
299 } else if (op == IR_BXOR) {
300 return split_emit(J, IRTI(IR_BNOT), hi, 0);
301 } else {
302 return kref;
303 }
304 }
305 }
306 return split_emit(J, IRTI(op), hi, kref);
307}
308#endif
309
198/* Substitute references of a snapshot. */ 310/* Substitute references of a snapshot. */
199static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir) 311static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir)
200{ 312{
@@ -214,7 +326,7 @@ static void split_ir(jit_State *J)
214 IRRef nins = J->cur.nins, nk = J->cur.nk; 326 IRRef nins = J->cur.nins, nk = J->cur.nk;
215 MSize irlen = nins - nk; 327 MSize irlen = nins - nk;
216 MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1)); 328 MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1));
217 IRIns *oir = (IRIns *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, need); 329 IRIns *oir = (IRIns *)lj_buf_tmp(J->L, need);
218 IRRef1 *hisubst; 330 IRRef1 *hisubst;
219 IRRef ref, snref; 331 IRRef ref, snref;
220 SnapShot *snap; 332 SnapShot *snap;
@@ -438,6 +550,19 @@ static void split_ir(jit_State *J)
438 irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : 550 irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
439 IRCALL_lj_carith_powu64); 551 IRCALL_lj_carith_powu64);
440 break; 552 break;
553 case IR_BNOT:
554 hi = split_emit(J, IRTI(IR_BNOT), hiref, 0);
555 break;
556 case IR_BSWAP:
557 ir->prev = split_emit(J, IRTI(IR_BSWAP), hiref, 0);
558 hi = nref;
559 break;
560 case IR_BAND: case IR_BOR: case IR_BXOR:
561 hi = split_bitop(J, hisubst, nir, ir);
562 break;
563 case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
564 hi = split_bitshift(J, hisubst, oir, nir, ir);
565 break;
441 case IR_FLOAD: 566 case IR_FLOAD:
442 lua_assert(ir->op2 == IRFL_CDATA_INT64); 567 lua_assert(ir->op2 == IRFL_CDATA_INT64);
443 hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4); 568 hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4);
diff --git a/src/lj_parse.c b/src/lj_parse.c
index 7ff7d728..cdb89baf 100644
--- a/src/lj_parse.c
+++ b/src/lj_parse.c
@@ -13,6 +13,7 @@
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_debug.h" 15#include "lj_debug.h"
16#include "lj_buf.h"
16#include "lj_str.h" 17#include "lj_str.h"
17#include "lj_tab.h" 18#include "lj_tab.h"
18#include "lj_func.h" 19#include "lj_func.h"
@@ -21,6 +22,7 @@
21#if LJ_HASFFI 22#if LJ_HASFFI
22#include "lj_ctype.h" 23#include "lj_ctype.h"
23#endif 24#endif
25#include "lj_strfmt.h"
24#include "lj_lex.h" 26#include "lj_lex.h"
25#include "lj_parse.h" 27#include "lj_parse.h"
26#include "lj_vm.h" 28#include "lj_vm.h"
@@ -165,12 +167,12 @@ LJ_STATIC_ASSERT((int)BC_MODVV-(int)BC_ADDVV == (int)OPR_MOD-(int)OPR_ADD);
165 167
166LJ_NORET LJ_NOINLINE static void err_syntax(LexState *ls, ErrMsg em) 168LJ_NORET LJ_NOINLINE static void err_syntax(LexState *ls, ErrMsg em)
167{ 169{
168 lj_lex_error(ls, ls->token, em); 170 lj_lex_error(ls, ls->tok, em);
169} 171}
170 172
171LJ_NORET LJ_NOINLINE static void err_token(LexState *ls, LexToken token) 173LJ_NORET LJ_NOINLINE static void err_token(LexState *ls, LexToken tok)
172{ 174{
173 lj_lex_error(ls, ls->token, LJ_ERR_XTOKEN, lj_lex_token2str(ls, token)); 175 lj_lex_error(ls, ls->tok, LJ_ERR_XTOKEN, lj_lex_token2str(ls, tok));
174} 176}
175 177
176LJ_NORET static void err_limit(FuncState *fs, uint32_t limit, const char *what) 178LJ_NORET static void err_limit(FuncState *fs, uint32_t limit, const char *what)
@@ -981,7 +983,7 @@ static void bcemit_unop(FuncState *fs, BCOp op, ExpDesc *e)
981/* Check and consume optional token. */ 983/* Check and consume optional token. */
982static int lex_opt(LexState *ls, LexToken tok) 984static int lex_opt(LexState *ls, LexToken tok)
983{ 985{
984 if (ls->token == tok) { 986 if (ls->tok == tok) {
985 lj_lex_next(ls); 987 lj_lex_next(ls);
986 return 1; 988 return 1;
987 } 989 }
@@ -991,7 +993,7 @@ static int lex_opt(LexState *ls, LexToken tok)
991/* Check and consume token. */ 993/* Check and consume token. */
992static void lex_check(LexState *ls, LexToken tok) 994static void lex_check(LexState *ls, LexToken tok)
993{ 995{
994 if (ls->token != tok) 996 if (ls->tok != tok)
995 err_token(ls, tok); 997 err_token(ls, tok);
996 lj_lex_next(ls); 998 lj_lex_next(ls);
997} 999}
@@ -1005,7 +1007,7 @@ static void lex_match(LexState *ls, LexToken what, LexToken who, BCLine line)
1005 } else { 1007 } else {
1006 const char *swhat = lj_lex_token2str(ls, what); 1008 const char *swhat = lj_lex_token2str(ls, what);
1007 const char *swho = lj_lex_token2str(ls, who); 1009 const char *swho = lj_lex_token2str(ls, who);
1008 lj_lex_error(ls, ls->token, LJ_ERR_XMATCH, swhat, swho, line); 1010 lj_lex_error(ls, ls->tok, LJ_ERR_XMATCH, swhat, swho, line);
1009 } 1011 }
1010 } 1012 }
1011} 1013}
@@ -1014,9 +1016,9 @@ static void lex_match(LexState *ls, LexToken what, LexToken who, BCLine line)
1014static GCstr *lex_str(LexState *ls) 1016static GCstr *lex_str(LexState *ls)
1015{ 1017{
1016 GCstr *s; 1018 GCstr *s;
1017 if (ls->token != TK_name && (LJ_52 || ls->token != TK_goto)) 1019 if (ls->tok != TK_name && (LJ_52 || ls->tok != TK_goto))
1018 err_token(ls, TK_name); 1020 err_token(ls, TK_name);
1019 s = strV(&ls->tokenval); 1021 s = strV(&ls->tokval);
1020 lj_lex_next(ls); 1022 lj_lex_next(ls);
1021 return s; 1023 return s;
1022} 1024}
@@ -1429,78 +1431,46 @@ static void fs_fixup_line(FuncState *fs, GCproto *pt,
1429 } 1431 }
1430} 1432}
1431 1433
1432/* Resize buffer if needed. */
1433static LJ_NOINLINE void fs_buf_resize(LexState *ls, MSize len)
1434{
1435 MSize sz = ls->sb.sz * 2;
1436 while (ls->sb.n + len > sz) sz = sz * 2;
1437 lj_str_resizebuf(ls->L, &ls->sb, sz);
1438}
1439
1440static LJ_AINLINE void fs_buf_need(LexState *ls, MSize len)
1441{
1442 if (LJ_UNLIKELY(ls->sb.n + len > ls->sb.sz))
1443 fs_buf_resize(ls, len);
1444}
1445
1446/* Add string to buffer. */
1447static void fs_buf_str(LexState *ls, const char *str, MSize len)
1448{
1449 char *p = ls->sb.buf + ls->sb.n;
1450 MSize i;
1451 ls->sb.n += len;
1452 for (i = 0; i < len; i++) p[i] = str[i];
1453}
1454
1455/* Add ULEB128 value to buffer. */
1456static void fs_buf_uleb128(LexState *ls, uint32_t v)
1457{
1458 MSize n = ls->sb.n;
1459 uint8_t *p = (uint8_t *)ls->sb.buf;
1460 for (; v >= 0x80; v >>= 7)
1461 p[n++] = (uint8_t)((v & 0x7f) | 0x80);
1462 p[n++] = (uint8_t)v;
1463 ls->sb.n = n;
1464}
1465
1466/* Prepare variable info for prototype. */ 1434/* Prepare variable info for prototype. */
1467static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar) 1435static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar)
1468{ 1436{
1469 VarInfo *vs =ls->vstack, *ve; 1437 VarInfo *vs =ls->vstack, *ve;
1470 MSize i, n; 1438 MSize i, n;
1471 BCPos lastpc; 1439 BCPos lastpc;
1472 lj_str_resetbuf(&ls->sb); /* Copy to temp. string buffer. */ 1440 lj_buf_reset(&ls->sb); /* Copy to temp. string buffer. */
1473 /* Store upvalue names. */ 1441 /* Store upvalue names. */
1474 for (i = 0, n = fs->nuv; i < n; i++) { 1442 for (i = 0, n = fs->nuv; i < n; i++) {
1475 GCstr *s = strref(vs[fs->uvmap[i]].name); 1443 GCstr *s = strref(vs[fs->uvmap[i]].name);
1476 MSize len = s->len+1; 1444 MSize len = s->len+1;
1477 fs_buf_need(ls, len); 1445 char *p = lj_buf_more(&ls->sb, len);
1478 fs_buf_str(ls, strdata(s), len); 1446 p = lj_buf_wmem(p, strdata(s), len);
1447 setsbufP(&ls->sb, p);
1479 } 1448 }
1480 *ofsvar = ls->sb.n; 1449 *ofsvar = sbuflen(&ls->sb);
1481 lastpc = 0; 1450 lastpc = 0;
1482 /* Store local variable names and compressed ranges. */ 1451 /* Store local variable names and compressed ranges. */
1483 for (ve = vs + ls->vtop, vs += fs->vbase; vs < ve; vs++) { 1452 for (ve = vs + ls->vtop, vs += fs->vbase; vs < ve; vs++) {
1484 if (!gola_isgotolabel(vs)) { 1453 if (!gola_isgotolabel(vs)) {
1485 GCstr *s = strref(vs->name); 1454 GCstr *s = strref(vs->name);
1486 BCPos startpc; 1455 BCPos startpc;
1456 char *p;
1487 if ((uintptr_t)s < VARNAME__MAX) { 1457 if ((uintptr_t)s < VARNAME__MAX) {
1488 fs_buf_need(ls, 1 + 2*5); 1458 p = lj_buf_more(&ls->sb, 1 + 2*5);
1489 ls->sb.buf[ls->sb.n++] = (uint8_t)(uintptr_t)s; 1459 *p++ = (char)(uintptr_t)s;
1490 } else { 1460 } else {
1491 MSize len = s->len+1; 1461 MSize len = s->len+1;
1492 fs_buf_need(ls, len + 2*5); 1462 p = lj_buf_more(&ls->sb, len + 2*5);
1493 fs_buf_str(ls, strdata(s), len); 1463 p = lj_buf_wmem(p, strdata(s), len);
1494 } 1464 }
1495 startpc = vs->startpc; 1465 startpc = vs->startpc;
1496 fs_buf_uleb128(ls, startpc-lastpc); 1466 p = lj_strfmt_wuleb128(p, startpc-lastpc);
1497 fs_buf_uleb128(ls, vs->endpc-startpc); 1467 p = lj_strfmt_wuleb128(p, vs->endpc-startpc);
1468 setsbufP(&ls->sb, p);
1498 lastpc = startpc; 1469 lastpc = startpc;
1499 } 1470 }
1500 } 1471 }
1501 fs_buf_need(ls, 1); 1472 lj_buf_putb(&ls->sb, '\0'); /* Terminator for varinfo. */
1502 ls->sb.buf[ls->sb.n++] = '\0'; /* Terminator for varinfo. */ 1473 return sbuflen(&ls->sb);
1503 return ls->sb.n;
1504} 1474}
1505 1475
1506/* Fixup variable info for prototype. */ 1476/* Fixup variable info for prototype. */
@@ -1508,7 +1478,7 @@ static void fs_fixup_var(LexState *ls, GCproto *pt, uint8_t *p, size_t ofsvar)
1508{ 1478{
1509 setmref(pt->uvinfo, p); 1479 setmref(pt->uvinfo, p);
1510 setmref(pt->varinfo, (char *)p + ofsvar); 1480 setmref(pt->varinfo, (char *)p + ofsvar);
1511 memcpy(p, ls->sb.buf, ls->sb.n); /* Copy from temp. string buffer. */ 1481 memcpy(p, sbufB(&ls->sb), sbuflen(&ls->sb)); /* Copy from temp. buffer. */
1512} 1482}
1513#else 1483#else
1514 1484
@@ -1615,7 +1585,7 @@ static GCproto *fs_finish(LexState *ls, BCLine line)
1615 L->top--; /* Pop table of constants. */ 1585 L->top--; /* Pop table of constants. */
1616 ls->vtop = fs->vbase; /* Reset variable stack. */ 1586 ls->vtop = fs->vbase; /* Reset variable stack. */
1617 ls->fs = fs->prev; 1587 ls->fs = fs->prev;
1618 lua_assert(ls->fs != NULL || ls->token == TK_eof); 1588 lua_assert(ls->fs != NULL || ls->tok == TK_eof);
1619 return pt; 1589 return pt;
1620} 1590}
1621 1591
@@ -1737,15 +1707,15 @@ static void expr_table(LexState *ls, ExpDesc *e)
1737 bcreg_reserve(fs, 1); 1707 bcreg_reserve(fs, 1);
1738 freg++; 1708 freg++;
1739 lex_check(ls, '{'); 1709 lex_check(ls, '{');
1740 while (ls->token != '}') { 1710 while (ls->tok != '}') {
1741 ExpDesc key, val; 1711 ExpDesc key, val;
1742 vcall = 0; 1712 vcall = 0;
1743 if (ls->token == '[') { 1713 if (ls->tok == '[') {
1744 expr_bracket(ls, &key); /* Already calls expr_toval. */ 1714 expr_bracket(ls, &key); /* Already calls expr_toval. */
1745 if (!expr_isk(&key)) expr_index(fs, e, &key); 1715 if (!expr_isk(&key)) expr_index(fs, e, &key);
1746 if (expr_isnumk(&key) && expr_numiszero(&key)) needarr = 1; else nhash++; 1716 if (expr_isnumk(&key) && expr_numiszero(&key)) needarr = 1; else nhash++;
1747 lex_check(ls, '='); 1717 lex_check(ls, '=');
1748 } else if ((ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) && 1718 } else if ((ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) &&
1749 lj_lex_lookahead(ls) == '=') { 1719 lj_lex_lookahead(ls) == '=') {
1750 expr_str(ls, &key); 1720 expr_str(ls, &key);
1751 lex_check(ls, '='); 1721 lex_check(ls, '=');
@@ -1838,11 +1808,11 @@ static BCReg parse_params(LexState *ls, int needself)
1838 lex_check(ls, '('); 1808 lex_check(ls, '(');
1839 if (needself) 1809 if (needself)
1840 var_new_lit(ls, nparams++, "self"); 1810 var_new_lit(ls, nparams++, "self");
1841 if (ls->token != ')') { 1811 if (ls->tok != ')') {
1842 do { 1812 do {
1843 if (ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) { 1813 if (ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) {
1844 var_new(ls, nparams++, lex_str(ls)); 1814 var_new(ls, nparams++, lex_str(ls));
1845 } else if (ls->token == TK_dots) { 1815 } else if (ls->tok == TK_dots) {
1846 lj_lex_next(ls); 1816 lj_lex_next(ls);
1847 fs->flags |= PROTO_VARARG; 1817 fs->flags |= PROTO_VARARG;
1848 break; 1818 break;
@@ -1876,7 +1846,7 @@ static void parse_body(LexState *ls, ExpDesc *e, int needself, BCLine line)
1876 fs.bclim = pfs->bclim - pfs->pc; 1846 fs.bclim = pfs->bclim - pfs->pc;
1877 bcemit_AD(&fs, BC_FUNCF, 0, 0); /* Placeholder. */ 1847 bcemit_AD(&fs, BC_FUNCF, 0, 0); /* Placeholder. */
1878 parse_chunk(ls); 1848 parse_chunk(ls);
1879 if (ls->token != TK_end) lex_match(ls, TK_end, TK_function, line); 1849 if (ls->tok != TK_end) lex_match(ls, TK_end, TK_function, line);
1880 pt = fs_finish(ls, (ls->lastline = ls->linenumber)); 1850 pt = fs_finish(ls, (ls->lastline = ls->linenumber));
1881 pfs->bcbase = ls->bcstack + oldbase; /* May have been reallocated. */ 1851 pfs->bcbase = ls->bcstack + oldbase; /* May have been reallocated. */
1882 pfs->bclim = (BCPos)(ls->sizebcstack - oldbase); 1852 pfs->bclim = (BCPos)(ls->sizebcstack - oldbase);
@@ -1915,13 +1885,13 @@ static void parse_args(LexState *ls, ExpDesc *e)
1915 BCIns ins; 1885 BCIns ins;
1916 BCReg base; 1886 BCReg base;
1917 BCLine line = ls->linenumber; 1887 BCLine line = ls->linenumber;
1918 if (ls->token == '(') { 1888 if (ls->tok == '(') {
1919#if !LJ_52 1889#if !LJ_52
1920 if (line != ls->lastline) 1890 if (line != ls->lastline)
1921 err_syntax(ls, LJ_ERR_XAMBIG); 1891 err_syntax(ls, LJ_ERR_XAMBIG);
1922#endif 1892#endif
1923 lj_lex_next(ls); 1893 lj_lex_next(ls);
1924 if (ls->token == ')') { /* f(). */ 1894 if (ls->tok == ')') { /* f(). */
1925 args.k = VVOID; 1895 args.k = VVOID;
1926 } else { 1896 } else {
1927 expr_list(ls, &args); 1897 expr_list(ls, &args);
@@ -1929,11 +1899,11 @@ static void parse_args(LexState *ls, ExpDesc *e)
1929 setbc_b(bcptr(fs, &args), 0); /* Pass on multiple results. */ 1899 setbc_b(bcptr(fs, &args), 0); /* Pass on multiple results. */
1930 } 1900 }
1931 lex_match(ls, ')', '(', line); 1901 lex_match(ls, ')', '(', line);
1932 } else if (ls->token == '{') { 1902 } else if (ls->tok == '{') {
1933 expr_table(ls, &args); 1903 expr_table(ls, &args);
1934 } else if (ls->token == TK_string) { 1904 } else if (ls->tok == TK_string) {
1935 expr_init(&args, VKSTR, 0); 1905 expr_init(&args, VKSTR, 0);
1936 args.u.sval = strV(&ls->tokenval); 1906 args.u.sval = strV(&ls->tokval);
1937 lj_lex_next(ls); 1907 lj_lex_next(ls);
1938 } else { 1908 } else {
1939 err_syntax(ls, LJ_ERR_XFUNARG); 1909 err_syntax(ls, LJ_ERR_XFUNARG);
@@ -1959,32 +1929,32 @@ static void expr_primary(LexState *ls, ExpDesc *v)
1959{ 1929{
1960 FuncState *fs = ls->fs; 1930 FuncState *fs = ls->fs;
1961 /* Parse prefix expression. */ 1931 /* Parse prefix expression. */
1962 if (ls->token == '(') { 1932 if (ls->tok == '(') {
1963 BCLine line = ls->linenumber; 1933 BCLine line = ls->linenumber;
1964 lj_lex_next(ls); 1934 lj_lex_next(ls);
1965 expr(ls, v); 1935 expr(ls, v);
1966 lex_match(ls, ')', '(', line); 1936 lex_match(ls, ')', '(', line);
1967 expr_discharge(ls->fs, v); 1937 expr_discharge(ls->fs, v);
1968 } else if (ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) { 1938 } else if (ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) {
1969 var_lookup(ls, v); 1939 var_lookup(ls, v);
1970 } else { 1940 } else {
1971 err_syntax(ls, LJ_ERR_XSYMBOL); 1941 err_syntax(ls, LJ_ERR_XSYMBOL);
1972 } 1942 }
1973 for (;;) { /* Parse multiple expression suffixes. */ 1943 for (;;) { /* Parse multiple expression suffixes. */
1974 if (ls->token == '.') { 1944 if (ls->tok == '.') {
1975 expr_field(ls, v); 1945 expr_field(ls, v);
1976 } else if (ls->token == '[') { 1946 } else if (ls->tok == '[') {
1977 ExpDesc key; 1947 ExpDesc key;
1978 expr_toanyreg(fs, v); 1948 expr_toanyreg(fs, v);
1979 expr_bracket(ls, &key); 1949 expr_bracket(ls, &key);
1980 expr_index(fs, v, &key); 1950 expr_index(fs, v, &key);
1981 } else if (ls->token == ':') { 1951 } else if (ls->tok == ':') {
1982 ExpDesc key; 1952 ExpDesc key;
1983 lj_lex_next(ls); 1953 lj_lex_next(ls);
1984 expr_str(ls, &key); 1954 expr_str(ls, &key);
1985 bcemit_method(fs, v, &key); 1955 bcemit_method(fs, v, &key);
1986 parse_args(ls, v); 1956 parse_args(ls, v);
1987 } else if (ls->token == '(' || ls->token == TK_string || ls->token == '{') { 1957 } else if (ls->tok == '(' || ls->tok == TK_string || ls->tok == '{') {
1988 expr_tonextreg(fs, v); 1958 expr_tonextreg(fs, v);
1989 parse_args(ls, v); 1959 parse_args(ls, v);
1990 } else { 1960 } else {
@@ -1996,14 +1966,14 @@ static void expr_primary(LexState *ls, ExpDesc *v)
1996/* Parse simple expression. */ 1966/* Parse simple expression. */
1997static void expr_simple(LexState *ls, ExpDesc *v) 1967static void expr_simple(LexState *ls, ExpDesc *v)
1998{ 1968{
1999 switch (ls->token) { 1969 switch (ls->tok) {
2000 case TK_number: 1970 case TK_number:
2001 expr_init(v, (LJ_HASFFI && tviscdata(&ls->tokenval)) ? VKCDATA : VKNUM, 0); 1971 expr_init(v, (LJ_HASFFI && tviscdata(&ls->tokval)) ? VKCDATA : VKNUM, 0);
2002 copyTV(ls->L, &v->u.nval, &ls->tokenval); 1972 copyTV(ls->L, &v->u.nval, &ls->tokval);
2003 break; 1973 break;
2004 case TK_string: 1974 case TK_string:
2005 expr_init(v, VKSTR, 0); 1975 expr_init(v, VKSTR, 0);
2006 v->u.sval = strV(&ls->tokenval); 1976 v->u.sval = strV(&ls->tokval);
2007 break; 1977 break;
2008 case TK_nil: 1978 case TK_nil:
2009 expr_init(v, VKNIL, 0); 1979 expr_init(v, VKNIL, 0);
@@ -2091,11 +2061,11 @@ static BinOpr expr_binop(LexState *ls, ExpDesc *v, uint32_t limit);
2091static void expr_unop(LexState *ls, ExpDesc *v) 2061static void expr_unop(LexState *ls, ExpDesc *v)
2092{ 2062{
2093 BCOp op; 2063 BCOp op;
2094 if (ls->token == TK_not) { 2064 if (ls->tok == TK_not) {
2095 op = BC_NOT; 2065 op = BC_NOT;
2096 } else if (ls->token == '-') { 2066 } else if (ls->tok == '-') {
2097 op = BC_UNM; 2067 op = BC_UNM;
2098 } else if (ls->token == '#') { 2068 } else if (ls->tok == '#') {
2099 op = BC_LEN; 2069 op = BC_LEN;
2100 } else { 2070 } else {
2101 expr_simple(ls, v); 2071 expr_simple(ls, v);
@@ -2112,7 +2082,7 @@ static BinOpr expr_binop(LexState *ls, ExpDesc *v, uint32_t limit)
2112 BinOpr op; 2082 BinOpr op;
2113 synlevel_begin(ls); 2083 synlevel_begin(ls);
2114 expr_unop(ls, v); 2084 expr_unop(ls, v);
2115 op = token2binop(ls->token); 2085 op = token2binop(ls->tok);
2116 while (op != OPR_NOBINOPR && priority[op].left > limit) { 2086 while (op != OPR_NOBINOPR && priority[op].left > limit) {
2117 ExpDesc v2; 2087 ExpDesc v2;
2118 BinOpr nextop; 2088 BinOpr nextop;
@@ -2301,9 +2271,9 @@ static void parse_func(LexState *ls, BCLine line)
2301 lj_lex_next(ls); /* Skip 'function'. */ 2271 lj_lex_next(ls); /* Skip 'function'. */
2302 /* Parse function name. */ 2272 /* Parse function name. */
2303 var_lookup(ls, &v); 2273 var_lookup(ls, &v);
2304 while (ls->token == '.') /* Multiple dot-separated fields. */ 2274 while (ls->tok == '.') /* Multiple dot-separated fields. */
2305 expr_field(ls, &v); 2275 expr_field(ls, &v);
2306 if (ls->token == ':') { /* Optional colon to signify method call. */ 2276 if (ls->tok == ':') { /* Optional colon to signify method call. */
2307 needself = 1; 2277 needself = 1;
2308 expr_field(ls, &v); 2278 expr_field(ls, &v);
2309 } 2279 }
@@ -2316,9 +2286,9 @@ static void parse_func(LexState *ls, BCLine line)
2316/* -- Control transfer statements ----------------------------------------- */ 2286/* -- Control transfer statements ----------------------------------------- */
2317 2287
2318/* Check for end of block. */ 2288/* Check for end of block. */
2319static int endofblock(LexToken token) 2289static int parse_isend(LexToken tok)
2320{ 2290{
2321 switch (token) { 2291 switch (tok) {
2322 case TK_else: case TK_elseif: case TK_end: case TK_until: case TK_eof: 2292 case TK_else: case TK_elseif: case TK_end: case TK_until: case TK_eof:
2323 return 1; 2293 return 1;
2324 default: 2294 default:
@@ -2333,7 +2303,7 @@ static void parse_return(LexState *ls)
2333 FuncState *fs = ls->fs; 2303 FuncState *fs = ls->fs;
2334 lj_lex_next(ls); /* Skip 'return'. */ 2304 lj_lex_next(ls); /* Skip 'return'. */
2335 fs->flags |= PROTO_HAS_RETURN; 2305 fs->flags |= PROTO_HAS_RETURN;
2336 if (endofblock(ls->token) || ls->token == ';') { /* Bare return. */ 2306 if (parse_isend(ls->tok) || ls->tok == ';') { /* Bare return. */
2337 ins = BCINS_AD(BC_RET0, 0, 1); 2307 ins = BCINS_AD(BC_RET0, 0, 1);
2338 } else { /* Return with one or more values. */ 2308 } else { /* Return with one or more values. */
2339 ExpDesc e; /* Receives the _last_ expression in the list. */ 2309 ExpDesc e; /* Receives the _last_ expression in the list. */
@@ -2399,18 +2369,18 @@ static void parse_label(LexState *ls)
2399 lex_check(ls, TK_label); 2369 lex_check(ls, TK_label);
2400 /* Recursively parse trailing statements: labels and ';' (Lua 5.2 only). */ 2370 /* Recursively parse trailing statements: labels and ';' (Lua 5.2 only). */
2401 for (;;) { 2371 for (;;) {
2402 if (ls->token == TK_label) { 2372 if (ls->tok == TK_label) {
2403 synlevel_begin(ls); 2373 synlevel_begin(ls);
2404 parse_label(ls); 2374 parse_label(ls);
2405 synlevel_end(ls); 2375 synlevel_end(ls);
2406 } else if (LJ_52 && ls->token == ';') { 2376 } else if (LJ_52 && ls->tok == ';') {
2407 lj_lex_next(ls); 2377 lj_lex_next(ls);
2408 } else { 2378 } else {
2409 break; 2379 break;
2410 } 2380 }
2411 } 2381 }
2412 /* Trailing label is considered to be outside of scope. */ 2382 /* Trailing label is considered to be outside of scope. */
2413 if (endofblock(ls->token) && ls->token != TK_until) 2383 if (parse_isend(ls->tok) && ls->tok != TK_until)
2414 ls->vstack[idx].slot = fs->bl->nactvar; 2384 ls->vstack[idx].slot = fs->bl->nactvar;
2415 gola_resolve(ls, fs->bl, idx); 2385 gola_resolve(ls, fs->bl, idx);
2416} 2386}
@@ -2594,9 +2564,9 @@ static void parse_for(LexState *ls, BCLine line)
2594 fscope_begin(fs, &bl, FSCOPE_LOOP); 2564 fscope_begin(fs, &bl, FSCOPE_LOOP);
2595 lj_lex_next(ls); /* Skip 'for'. */ 2565 lj_lex_next(ls); /* Skip 'for'. */
2596 varname = lex_str(ls); /* Get first variable name. */ 2566 varname = lex_str(ls); /* Get first variable name. */
2597 if (ls->token == '=') 2567 if (ls->tok == '=')
2598 parse_for_num(ls, varname, line); 2568 parse_for_num(ls, varname, line);
2599 else if (ls->token == ',' || ls->token == TK_in) 2569 else if (ls->tok == ',' || ls->tok == TK_in)
2600 parse_for_iter(ls, varname); 2570 parse_for_iter(ls, varname);
2601 else 2571 else
2602 err_syntax(ls, LJ_ERR_XFOR); 2572 err_syntax(ls, LJ_ERR_XFOR);
@@ -2622,12 +2592,12 @@ static void parse_if(LexState *ls, BCLine line)
2622 BCPos flist; 2592 BCPos flist;
2623 BCPos escapelist = NO_JMP; 2593 BCPos escapelist = NO_JMP;
2624 flist = parse_then(ls); 2594 flist = parse_then(ls);
2625 while (ls->token == TK_elseif) { /* Parse multiple 'elseif' blocks. */ 2595 while (ls->tok == TK_elseif) { /* Parse multiple 'elseif' blocks. */
2626 jmp_append(fs, &escapelist, bcemit_jmp(fs)); 2596 jmp_append(fs, &escapelist, bcemit_jmp(fs));
2627 jmp_tohere(fs, flist); 2597 jmp_tohere(fs, flist);
2628 flist = parse_then(ls); 2598 flist = parse_then(ls);
2629 } 2599 }
2630 if (ls->token == TK_else) { /* Parse optional 'else' block. */ 2600 if (ls->tok == TK_else) { /* Parse optional 'else' block. */
2631 jmp_append(fs, &escapelist, bcemit_jmp(fs)); 2601 jmp_append(fs, &escapelist, bcemit_jmp(fs));
2632 jmp_tohere(fs, flist); 2602 jmp_tohere(fs, flist);
2633 lj_lex_next(ls); /* Skip 'else'. */ 2603 lj_lex_next(ls); /* Skip 'else'. */
@@ -2645,7 +2615,7 @@ static void parse_if(LexState *ls, BCLine line)
2645static int parse_stmt(LexState *ls) 2615static int parse_stmt(LexState *ls)
2646{ 2616{
2647 BCLine line = ls->linenumber; 2617 BCLine line = ls->linenumber;
2648 switch (ls->token) { 2618 switch (ls->tok) {
2649 case TK_if: 2619 case TK_if:
2650 parse_if(ls, line); 2620 parse_if(ls, line);
2651 break; 2621 break;
@@ -2703,7 +2673,7 @@ static void parse_chunk(LexState *ls)
2703{ 2673{
2704 int islast = 0; 2674 int islast = 0;
2705 synlevel_begin(ls); 2675 synlevel_begin(ls);
2706 while (!islast && !endofblock(ls->token)) { 2676 while (!islast && !parse_isend(ls->tok)) {
2707 islast = parse_stmt(ls); 2677 islast = parse_stmt(ls);
2708 lex_opt(ls, ';'); 2678 lex_opt(ls, ';');
2709 lua_assert(ls->fs->framesize >= ls->fs->freereg && 2679 lua_assert(ls->fs->framesize >= ls->fs->freereg &&
@@ -2738,7 +2708,7 @@ GCproto *lj_parse(LexState *ls)
2738 bcemit_AD(&fs, BC_FUNCV, 0, 0); /* Placeholder. */ 2708 bcemit_AD(&fs, BC_FUNCV, 0, 0); /* Placeholder. */
2739 lj_lex_next(ls); /* Read-ahead first token. */ 2709 lj_lex_next(ls); /* Read-ahead first token. */
2740 parse_chunk(ls); 2710 parse_chunk(ls);
2741 if (ls->token != TK_eof) 2711 if (ls->tok != TK_eof)
2742 err_token(ls, TK_eof); 2712 err_token(ls, TK_eof);
2743 pt = fs_finish(ls, ls->linenumber); 2713 pt = fs_finish(ls, ls->linenumber);
2744 L->top--; /* Drop chunkname. */ 2714 L->top--; /* Drop chunkname. */
diff --git a/src/lj_record.c b/src/lj_record.c
index 7336e0ac..bdf0212a 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -677,6 +677,8 @@ static int check_downrec_unroll(jit_State *J, GCproto *pt)
677 return 0; 677 return 0;
678} 678}
679 679
680static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot);
681
680/* Record return. */ 682/* Record return. */
681void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) 683void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
682{ 684{
@@ -769,7 +771,24 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
769 } else if (cont == lj_cont_nop) { 771 } else if (cont == lj_cont_nop) {
770 /* Nothing to do here. */ 772 /* Nothing to do here. */
771 } else if (cont == lj_cont_cat) { 773 } else if (cont == lj_cont_cat) {
772 lua_assert(0); 774 BCReg bslot = bc_b(*(frame_contpc(frame)-1));
775 TRef tr = gotresults ? J->base[cbase+rbase] : TREF_NIL;
776 if (bslot != cbase-2) { /* Concatenate the remainder. */
777 TValue *b = J->L->base, save; /* Simulate lower frame and result. */
778 J->base[cbase-2] = tr;
779 copyTV(J->L, &save, b-2);
780 if (gotresults) copyTV(J->L, b-2, b+rbase); else setnilV(b-2);
781 J->L->base = b - cbase;
782 tr = rec_cat(J, bslot, cbase-2);
783 b = J->L->base + cbase; /* Undo. */
784 J->L->base = b;
785 copyTV(J->L, b-2, &save);
786 }
787 if (tr) { /* Store final result. */
788 BCReg dst = bc_a(*(frame_contpc(frame)-1));
789 J->base[dst] = tr;
790 if (dst >= J->maxslot) J->maxslot = dst+1;
791 } /* Otherwise continue with another __concat call. */
773 } else { 792 } else {
774 /* Result type already specialized. */ 793 /* Result type already specialized. */
775 lua_assert(cont == lj_cont_condf || cont == lj_cont_condt); 794 lua_assert(cont == lj_cont_condf || cont == lj_cont_condt);
@@ -785,13 +804,11 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
785/* Prepare to record call to metamethod. */ 804/* Prepare to record call to metamethod. */
786static BCReg rec_mm_prep(jit_State *J, ASMFunction cont) 805static BCReg rec_mm_prep(jit_State *J, ASMFunction cont)
787{ 806{
788 BCReg s, top = curr_proto(J->L)->framesize; 807 BCReg s, top = cont == lj_cont_cat ? J->maxslot : curr_proto(J->L)->framesize;
789 TRef trcont;
790 setcont(&J->L->base[top], cont);
791#if LJ_64 808#if LJ_64
792 trcont = lj_ir_kptr(J, (void *)((int64_t)cont - (int64_t)lj_vm_asm_begin)); 809 TRef trcont = lj_ir_kptr(J, (void *)((int64_t)cont-(int64_t)lj_vm_asm_begin));
793#else 810#else
794 trcont = lj_ir_kptr(J, (void *)cont); 811 TRef trcont = lj_ir_kptr(J, (void *)cont);
795#endif 812#endif
796 J->base[top] = trcont | TREF_CONT; 813 J->base[top] = trcont | TREF_CONT;
797 J->framedepth++; 814 J->framedepth++;
@@ -872,7 +889,7 @@ nocheck:
872static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) 889static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm)
873{ 890{
874 /* Set up metamethod call first to save ix->tab and ix->tabv. */ 891 /* Set up metamethod call first to save ix->tab and ix->tabv. */
875 BCReg func = rec_mm_prep(J, lj_cont_ra); 892 BCReg func = rec_mm_prep(J, mm == MM_concat ? lj_cont_cat : lj_cont_ra);
876 TRef *base = J->base + func; 893 TRef *base = J->base + func;
877 TValue *basev = J->L->base + func; 894 TValue *basev = J->L->base + func;
878 base[1] = ix->tab; base[2] = ix->key; 895 base[1] = ix->tab; base[2] = ix->key;
@@ -1599,6 +1616,54 @@ static TRef rec_tnew(jit_State *J, uint32_t ah)
1599 return emitir(IRTG(IR_TNEW, IRT_TAB), asize, hbits); 1616 return emitir(IRTG(IR_TNEW, IRT_TAB), asize, hbits);
1600} 1617}
1601 1618
1619/* -- Concatenation ------------------------------------------------------- */
1620
1621static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot)
1622{
1623 TRef *top = &J->base[topslot];
1624 TValue savetv[5];
1625 BCReg s;
1626 RecordIndex ix;
1627 lua_assert(baseslot < topslot);
1628 for (s = baseslot; s <= topslot; s++)
1629 (void)getslot(J, s); /* Ensure all arguments have a reference. */
1630 if (tref_isnumber_str(top[0]) && tref_isnumber_str(top[-1])) {
1631 TRef tr, hdr, *trp, *xbase, *base = &J->base[baseslot];
1632 /* First convert numbers to strings. */
1633 for (trp = top; trp >= base; trp--) {
1634 if (tref_isnumber(*trp))
1635 *trp = emitir(IRT(IR_TOSTR, IRT_STR), *trp,
1636 tref_isnum(*trp) ? IRTOSTR_NUM : IRTOSTR_INT);
1637 else if (!tref_isstr(*trp))
1638 break;
1639 }
1640 xbase = ++trp;
1641 tr = hdr = emitir(IRT(IR_BUFHDR, IRT_P32),
1642 lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET);
1643 do {
1644 tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, *trp++);
1645 } while (trp <= top);
1646 tr = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
1647 J->maxslot = (BCReg)(xbase - J->base);
1648 if (xbase == base) return tr; /* Return simple concatenation result. */
1649 /* Pass partial result. */
1650 topslot = J->maxslot--;
1651 *xbase = tr;
1652 top = xbase;
1653 setstrV(J->L, &ix.keyv, &J2G(J)->strempty); /* Simulate string result. */
1654 } else {
1655 J->maxslot = topslot-1;
1656 copyTV(J->L, &ix.keyv, &J->L->base[topslot]);
1657 }
1658 copyTV(J->L, &ix.tabv, &J->L->base[topslot-1]);
1659 ix.tab = top[-1];
1660 ix.key = top[0];
1661 memcpy(savetv, &J->L->base[topslot-1], sizeof(savetv)); /* Save slots. */
1662 rec_mm_arith(J, &ix, MM_concat); /* Call __concat metamethod. */
1663 memcpy(&J->L->base[topslot-1], savetv, sizeof(savetv)); /* Restore slots. */
1664 return 0; /* No result yet. */
1665}
1666
1602/* -- Record bytecode ops ------------------------------------------------- */ 1667/* -- Record bytecode ops ------------------------------------------------- */
1603 1668
1604/* Prepare for comparison. */ 1669/* Prepare for comparison. */
@@ -1826,6 +1891,18 @@ void lj_record_ins(jit_State *J)
1826 J->maxslot = bc_a(pc[1]); /* Shrink used slots. */ 1891 J->maxslot = bc_a(pc[1]); /* Shrink used slots. */
1827 break; 1892 break;
1828 1893
1894 case BC_ISTYPE: case BC_ISNUM:
1895 /* These coercions need to correspond with lj_meta_istype(). */
1896 if (LJ_DUALNUM && rc == ~LJ_TNUMX+1)
1897 ra = lj_opt_narrow_toint(J, ra);
1898 else if (rc == ~LJ_TNUMX+2)
1899 ra = lj_ir_tonum(J, ra);
1900 else if (rc == ~LJ_TSTR+1)
1901 ra = lj_ir_tostr(J, ra);
1902 /* else: type specialization suffices. */
1903 J->base[bc_a(ins)] = ra;
1904 break;
1905
1829 /* -- Unary ops --------------------------------------------------------- */ 1906 /* -- Unary ops --------------------------------------------------------- */
1830 1907
1831 case BC_NOT: 1908 case BC_NOT:
@@ -1889,6 +1966,12 @@ void lj_record_ins(jit_State *J)
1889 rc = rec_mm_arith(J, &ix, MM_pow); 1966 rc = rec_mm_arith(J, &ix, MM_pow);
1890 break; 1967 break;
1891 1968
1969 /* -- Miscellaneous ops ------------------------------------------------- */
1970
1971 case BC_CAT:
1972 rc = rec_cat(J, rb, rc);
1973 break;
1974
1892 /* -- Constant and move ops --------------------------------------------- */ 1975 /* -- Constant and move ops --------------------------------------------- */
1893 1976
1894 case BC_MOV: 1977 case BC_MOV:
@@ -1937,6 +2020,10 @@ void lj_record_ins(jit_State *J)
1937 ix.idxchain = LJ_MAX_IDXCHAIN; 2020 ix.idxchain = LJ_MAX_IDXCHAIN;
1938 rc = lj_record_idx(J, &ix); 2021 rc = lj_record_idx(J, &ix);
1939 break; 2022 break;
2023 case BC_TGETR: case BC_TSETR:
2024 ix.idxchain = 0;
2025 rc = lj_record_idx(J, &ix);
2026 break;
1940 2027
1941 case BC_TNEW: 2028 case BC_TNEW:
1942 rc = rec_tnew(J, rc); 2029 rc = rec_tnew(J, rc);
@@ -2066,7 +2153,6 @@ void lj_record_ins(jit_State *J)
2066 /* fallthrough */ 2153 /* fallthrough */
2067 case BC_ITERN: 2154 case BC_ITERN:
2068 case BC_ISNEXT: 2155 case BC_ISNEXT:
2069 case BC_CAT:
2070 case BC_UCLO: 2156 case BC_UCLO:
2071 case BC_FNEW: 2157 case BC_FNEW:
2072 case BC_TSETM: 2158 case BC_TSETM:
diff --git a/src/lj_state.c b/src/lj_state.c
index 8c53d37f..604ff886 100644
--- a/src/lj_state.c
+++ b/src/lj_state.c
@@ -12,6 +12,7 @@
12#include "lj_obj.h" 12#include "lj_obj.h"
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_buf.h"
15#include "lj_str.h" 16#include "lj_str.h"
16#include "lj_tab.h" 17#include "lj_tab.h"
17#include "lj_func.h" 18#include "lj_func.h"
@@ -164,7 +165,7 @@ static void close_state(lua_State *L)
164 lj_ctype_freestate(g); 165 lj_ctype_freestate(g);
165#endif 166#endif
166 lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef); 167 lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef);
167 lj_str_freebuf(g, &g->tmpbuf); 168 lj_buf_free(g, &g->tmpbuf);
168 lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); 169 lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue);
169 lua_assert(g->gc.total == sizeof(GG_State)); 170 lua_assert(g->gc.total == sizeof(GG_State));
170#ifndef LUAJIT_USE_SYSMALLOC 171#ifndef LUAJIT_USE_SYSMALLOC
@@ -203,7 +204,7 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
203 setnilV(&g->nilnode.val); 204 setnilV(&g->nilnode.val);
204 setnilV(&g->nilnode.key); 205 setnilV(&g->nilnode.key);
205 setmref(g->nilnode.freetop, &g->nilnode); 206 setmref(g->nilnode.freetop, &g->nilnode);
206 lj_str_initbuf(&g->tmpbuf); 207 lj_buf_init(NULL, &g->tmpbuf);
207 g->gc.state = GCSpause; 208 g->gc.state = GCSpause;
208 setgcref(g->gc.root, obj2gco(L)); 209 setgcref(g->gc.root, obj2gco(L));
209 setmref(g->gc.sweep, &g->gc.root); 210 setmref(g->gc.sweep, &g->gc.root);
diff --git a/src/lj_str.c b/src/lj_str.c
index 6548ee4d..24d96067 100644
--- a/src/lj_str.c
+++ b/src/lj_str.c
@@ -1,9 +1,6 @@
1/* 1/*
2** String handling. 2** String handling.
3** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h 3** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
4**
5** Portions taken verbatim or adapted from the Lua interpreter.
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/ 4*/
8 5
9#include <stdio.h> 6#include <stdio.h>
@@ -15,10 +12,9 @@
15#include "lj_gc.h" 12#include "lj_gc.h"
16#include "lj_err.h" 13#include "lj_err.h"
17#include "lj_str.h" 14#include "lj_str.h"
18#include "lj_state.h"
19#include "lj_char.h" 15#include "lj_char.h"
20 16
21/* -- String interning ---------------------------------------------------- */ 17/* -- String helpers ------------------------------------------------------ */
22 18
23/* Ordered compare of strings. Assumes string data is 4-byte aligned. */ 19/* Ordered compare of strings. Assumes string data is 4-byte aligned. */
24int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) 20int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
@@ -64,6 +60,40 @@ static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len)
64 return 0; 60 return 0;
65} 61}
66 62
63/* Find fixed string p inside string s. */
64const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen)
65{
66 if (plen <= slen) {
67 if (plen == 0) {
68 return s;
69 } else {
70 int c = *(const uint8_t *)p++;
71 plen--; slen -= plen;
72 while (slen) {
73 const char *q = (const char *)memchr(s, c, slen);
74 if (!q) break;
75 if (memcmp(q+1, p, plen) == 0) return q;
76 q++; slen -= (MSize)(q-s); s = q;
77 }
78 }
79 }
80 return NULL;
81}
82
83/* Check whether a string has a pattern matching character. */
84int lj_str_haspattern(GCstr *s)
85{
86 const char *p = strdata(s), *q = p + s->len;
87 while (p < q) {
88 int c = *(const uint8_t *)p++;
89 if (lj_char_ispunct(c) && strchr("^$*+?.([%-", c))
90 return 1; /* Found a pattern matching char. */
91 }
92 return 0; /* No pattern matching chars found. */
93}
94
95/* -- String interning ---------------------------------------------------- */
96
67/* Resize the string hash table (grow and shrink). */ 97/* Resize the string hash table (grow and shrink). */
68void lj_str_resize(lua_State *L, MSize newmask) 98void lj_str_resize(lua_State *L, MSize newmask)
69{ 99{
@@ -167,173 +197,3 @@ void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
167 lj_mem_free(g, s, sizestring(s)); 197 lj_mem_free(g, s, sizestring(s));
168} 198}
169 199
170/* -- Type conversions ---------------------------------------------------- */
171
172/* Print number to buffer. Canonicalizes non-finite values. */
173size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o)
174{
175 if (LJ_LIKELY((o->u32.hi << 1) < 0xffe00000)) { /* Finite? */
176 lua_Number n = o->n;
177#if __BIONIC__
178 if (tvismzero(o)) { s[0] = '-'; s[1] = '0'; return 2; }
179#endif
180 return (size_t)lua_number2str(s, n);
181 } else if (((o->u32.hi & 0x000fffff) | o->u32.lo) != 0) {
182 s[0] = 'n'; s[1] = 'a'; s[2] = 'n'; return 3;
183 } else if ((o->u32.hi & 0x80000000) == 0) {
184 s[0] = 'i'; s[1] = 'n'; s[2] = 'f'; return 3;
185 } else {
186 s[0] = '-'; s[1] = 'i'; s[2] = 'n'; s[3] = 'f'; return 4;
187 }
188}
189
190/* Print integer to buffer. Returns pointer to start. */
191char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k)
192{
193 uint32_t u = (uint32_t)(k < 0 ? -k : k);
194 p += 1+10;
195 do { *--p = (char)('0' + u % 10); } while (u /= 10);
196 if (k < 0) *--p = '-';
197 return p;
198}
199
200/* Convert number to string. */
201GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np)
202{
203 char buf[LJ_STR_NUMBUF];
204 size_t len = lj_str_bufnum(buf, (TValue *)np);
205 return lj_str_new(L, buf, len);
206}
207
208/* Convert integer to string. */
209GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k)
210{
211 char s[1+10];
212 char *p = lj_str_bufint(s, k);
213 return lj_str_new(L, p, (size_t)(s+sizeof(s)-p));
214}
215
216GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o)
217{
218 return tvisint(o) ? lj_str_fromint(L, intV(o)) : lj_str_fromnum(L, &o->n);
219}
220
221/* -- String formatting --------------------------------------------------- */
222
223static void addstr(lua_State *L, SBuf *sb, const char *str, MSize len)
224{
225 char *p;
226 MSize i;
227 if (sb->n + len > sb->sz) {
228 MSize sz = sb->sz * 2;
229 while (sb->n + len > sz) sz = sz * 2;
230 lj_str_resizebuf(L, sb, sz);
231 }
232 p = sb->buf + sb->n;
233 sb->n += len;
234 for (i = 0; i < len; i++) p[i] = str[i];
235}
236
237static void addchar(lua_State *L, SBuf *sb, int c)
238{
239 if (sb->n + 1 > sb->sz) {
240 MSize sz = sb->sz * 2;
241 lj_str_resizebuf(L, sb, sz);
242 }
243 sb->buf[sb->n++] = (char)c;
244}
245
246/* Push formatted message as a string object to Lua stack. va_list variant. */
247const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp)
248{
249 SBuf *sb = &G(L)->tmpbuf;
250 lj_str_needbuf(L, sb, (MSize)strlen(fmt));
251 lj_str_resetbuf(sb);
252 for (;;) {
253 const char *e = strchr(fmt, '%');
254 if (e == NULL) break;
255 addstr(L, sb, fmt, (MSize)(e-fmt));
256 /* This function only handles %s, %c, %d, %f and %p formats. */
257 switch (e[1]) {
258 case 's': {
259 const char *s = va_arg(argp, char *);
260 if (s == NULL) s = "(null)";
261 addstr(L, sb, s, (MSize)strlen(s));
262 break;
263 }
264 case 'c':
265 addchar(L, sb, va_arg(argp, int));
266 break;
267 case 'd': {
268 char buf[LJ_STR_INTBUF];
269 char *p = lj_str_bufint(buf, va_arg(argp, int32_t));
270 addstr(L, sb, p, (MSize)(buf+LJ_STR_INTBUF-p));
271 break;
272 }
273 case 'f': {
274 char buf[LJ_STR_NUMBUF];
275 TValue tv;
276 MSize len;
277 tv.n = (lua_Number)(va_arg(argp, LUAI_UACNUMBER));
278 len = (MSize)lj_str_bufnum(buf, &tv);
279 addstr(L, sb, buf, len);
280 break;
281 }
282 case 'p': {
283#define FMTP_CHARS (2*sizeof(ptrdiff_t))
284 char buf[2+FMTP_CHARS];
285 ptrdiff_t p = (ptrdiff_t)(va_arg(argp, void *));
286 ptrdiff_t i, lasti = 2+FMTP_CHARS;
287 if (p == 0) {
288 addstr(L, sb, "NULL", 4);
289 break;
290 }
291#if LJ_64
292 /* Shorten output for 64 bit pointers. */
293 lasti = 2+2*4+((p >> 32) ? 2+2*(lj_fls((uint32_t)(p >> 32))>>3) : 0);
294#endif
295 buf[0] = '0';
296 buf[1] = 'x';
297 for (i = lasti-1; i >= 2; i--, p >>= 4)
298 buf[i] = "0123456789abcdef"[(p & 15)];
299 addstr(L, sb, buf, (MSize)lasti);
300 break;
301 }
302 case '%':
303 addchar(L, sb, '%');
304 break;
305 default:
306 addchar(L, sb, '%');
307 addchar(L, sb, e[1]);
308 break;
309 }
310 fmt = e+2;
311 }
312 addstr(L, sb, fmt, (MSize)strlen(fmt));
313 setstrV(L, L->top, lj_str_new(L, sb->buf, sb->n));
314 incr_top(L);
315 return strVdata(L->top - 1);
316}
317
318/* Push formatted message as a string object to Lua stack. Vararg variant. */
319const char *lj_str_pushf(lua_State *L, const char *fmt, ...)
320{
321 const char *msg;
322 va_list argp;
323 va_start(argp, fmt);
324 msg = lj_str_pushvf(L, fmt, argp);
325 va_end(argp);
326 return msg;
327}
328
329/* -- Buffer handling ----------------------------------------------------- */
330
331char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz)
332{
333 if (sz > sb->sz) {
334 if (sz < LJ_MIN_SBUF) sz = LJ_MIN_SBUF;
335 lj_str_resizebuf(L, sb, sz);
336 }
337 return sb->buf;
338}
339
diff --git a/src/lj_str.h b/src/lj_str.h
index 3aa03662..f156ff6f 100644
--- a/src/lj_str.h
+++ b/src/lj_str.h
@@ -10,8 +10,13 @@
10 10
11#include "lj_obj.h" 11#include "lj_obj.h"
12 12
13/* String interning. */ 13/* String helpers. */
14LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b); 14LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b);
15LJ_FUNC const char *lj_str_find(const char *s, const char *f,
16 MSize slen, MSize flen);
17LJ_FUNC int lj_str_haspattern(GCstr *s);
18
19/* String interning. */
15LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask); 20LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask);
16LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len); 21LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len);
17LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); 22LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
@@ -19,32 +24,4 @@ LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
19#define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s))) 24#define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s)))
20#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1)) 25#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1))
21 26
22/* Type conversions. */
23LJ_FUNC size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o);
24LJ_FUNC char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k);
25LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np);
26LJ_FUNC GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k);
27LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o);
28
29#define LJ_STR_INTBUF (1+10)
30#define LJ_STR_NUMBUF LUAI_MAXNUMBER2STR
31
32/* String formatting. */
33LJ_FUNC const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp);
34LJ_FUNC const char *lj_str_pushf(lua_State *L, const char *fmt, ...)
35#if defined(__GNUC__)
36 __attribute__ ((format (printf, 2, 3)))
37#endif
38 ;
39
40/* Resizable string buffers. Struct definition in lj_obj.h. */
41LJ_FUNC char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz);
42
43#define lj_str_initbuf(sb) ((sb)->buf = NULL, (sb)->sz = 0)
44#define lj_str_resetbuf(sb) ((sb)->n = 0)
45#define lj_str_resizebuf(L, sb, size) \
46 ((sb)->buf = (char *)lj_mem_realloc(L, (sb)->buf, (sb)->sz, (size)), \
47 (sb)->sz = (size))
48#define lj_str_freebuf(g, sb) lj_mem_free(g, (void *)(sb)->buf, (sb)->sz)
49
50#endif 27#endif
diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c
new file mode 100644
index 00000000..0003de70
--- /dev/null
+++ b/src/lj_strfmt.c
@@ -0,0 +1,549 @@
1/*
2** String formatting.
3** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#include <stdio.h>
7
8#define lj_strfmt_c
9#define LUA_CORE
10
11#include "lj_obj.h"
12#include "lj_buf.h"
13#include "lj_str.h"
14#include "lj_state.h"
15#include "lj_char.h"
16#include "lj_strfmt.h"
17
18/* -- Format parser ------------------------------------------------------- */
19
20static const uint8_t strfmt_map[('x'-'A')+1] = {
21 STRFMT_A,0,0,0,STRFMT_E,0,STRFMT_G,0,0,0,0,0,0,
22 0,0,0,0,0,0,0,0,0,0,STRFMT_X,0,0,
23 0,0,0,0,0,0,
24 STRFMT_A,0,STRFMT_C,STRFMT_D,STRFMT_E,STRFMT_F,STRFMT_G,0,STRFMT_I,0,0,0,0,
25 0,STRFMT_O,STRFMT_P,STRFMT_Q,0,STRFMT_S,0,STRFMT_U,0,0,STRFMT_X
26};
27
28SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs)
29{
30 const uint8_t *p = fs->p, *e = fs->e;
31 fs->str = (const char *)p;
32 for (; p < e; p++) {
33 if (*p == '%') { /* Escape char? */
34 if (p[1] == '%') { /* '%%'? */
35 fs->p = ++p+1;
36 goto retlit;
37 } else {
38 SFormat sf = 0;
39 uint32_t c;
40 if (p != (const uint8_t *)fs->str)
41 break;
42 for (p++; (uint32_t)*p - ' ' <= (uint32_t)('0' - ' '); p++) {
43 /* Parse flags. */
44 if (*p == '-') sf |= STRFMT_F_LEFT;
45 else if (*p == '+') sf |= STRFMT_F_PLUS;
46 else if (*p == '0') sf |= STRFMT_F_ZERO;
47 else if (*p == ' ') sf |= STRFMT_F_SPACE;
48 else if (*p == '#') sf |= STRFMT_F_ALT;
49 else break;
50 }
51 if ((uint32_t)*p - '0' < 10) { /* Parse width. */
52 uint32_t width = (uint32_t)*p++ - '0';
53 if ((uint32_t)*p - '0' < 10)
54 width = (uint32_t)*p++ - '0' + width*10;
55 sf |= (width << STRFMT_SH_WIDTH);
56 }
57 if (*p == '.') { /* Parse precision. */
58 uint32_t prec = 0;
59 p++;
60 if ((uint32_t)*p - '0' < 10) {
61 prec = (uint32_t)*p++ - '0';
62 if ((uint32_t)*p - '0' < 10)
63 prec = (uint32_t)*p++ - '0' + prec*10;
64 }
65 sf |= ((prec+1) << STRFMT_SH_PREC);
66 }
67 /* Parse conversion. */
68 c = (uint32_t)*p - 'A';
69 if (LJ_LIKELY(c <= (uint32_t)('x' - 'A'))) {
70 uint32_t sx = strfmt_map[c];
71 if (sx) {
72 fs->p = p+1;
73 return (sf | sx | ((c & 0x20) ? 0 : STRFMT_F_UPPER));
74 }
75 }
76 /* Return error location. */
77 if (*p >= 32) p++;
78 fs->len = (MSize)(p - (const uint8_t *)fs->str);
79 fs->p = fs->e;
80 return STRFMT_ERR;
81 }
82 }
83 }
84 fs->p = p;
85retlit:
86 fs->len = (MSize)(p - (const uint8_t *)fs->str);
87 return fs->len ? STRFMT_LIT : STRFMT_EOF;
88}
89
90/* -- Raw conversions ----------------------------------------------------- */
91
92/* Write number to bufer. */
93char * LJ_FASTCALL lj_strfmt_wnum(char *p, cTValue *o)
94{
95 if (LJ_LIKELY((o->u32.hi << 1) < 0xffe00000)) { /* Finite? */
96#if __BIONIC__
97 if (tvismzero(o)) { *p++ = '-'; *p++ = '0'; return p; }
98#endif
99 return p + lua_number2str(p, o->n);
100 } else if (((o->u32.hi & 0x000fffff) | o->u32.lo) != 0) {
101 *p++ = 'n'; *p++ = 'a'; *p++ = 'n';
102 } else if ((o->u32.hi & 0x80000000) == 0) {
103 *p++ = 'i'; *p++ = 'n'; *p++ = 'f';
104 } else {
105 *p++ = '-'; *p++ = 'i'; *p++ = 'n'; *p++ = 'f';
106 }
107 return p;
108}
109
110#define WINT_R(x, sh, sc) \
111 { uint32_t d = (x*(((1<<sh)+sc-1)/sc))>>sh; x -= d*sc; *p++ = (char)('0'+d); }
112
113/* Write integer to buffer. */
114char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k)
115{
116 uint32_t u = (uint32_t)k;
117 if (k < 0) { u = (uint32_t)-k; *p++ = '-'; }
118 if (u < 10000) {
119 if (u < 10) goto dig1; if (u < 100) goto dig2; if (u < 1000) goto dig3;
120 } else {
121 uint32_t v = u / 10000; u -= v * 10000;
122 if (v < 10000) {
123 if (v < 10) goto dig5; if (v < 100) goto dig6; if (v < 1000) goto dig7;
124 } else {
125 uint32_t w = v / 10000; v -= w * 10000;
126 if (w >= 10) WINT_R(w, 10, 10)
127 *p++ = (char)('0'+w);
128 }
129 WINT_R(v, 23, 1000)
130 dig7: WINT_R(v, 12, 100)
131 dig6: WINT_R(v, 10, 10)
132 dig5: *p++ = (char)('0'+v);
133 }
134 WINT_R(u, 23, 1000)
135 dig3: WINT_R(u, 12, 100)
136 dig2: WINT_R(u, 10, 10)
137 dig1: *p++ = (char)('0'+u);
138 return p;
139}
140#undef WINT_R
141
142/* Write pointer to buffer. */
143char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v)
144{
145 ptrdiff_t x = (ptrdiff_t)v;
146 MSize i, n = STRFMT_MAXBUF_PTR;
147 if (x == 0) {
148 *p++ = 'N'; *p++ = 'U'; *p++ = 'L'; *p++ = 'L';
149 return p;
150 }
151#if LJ_64
152 /* Shorten output for 64 bit pointers. */
153 n = 2+2*4+((x >> 32) ? 2+2*(lj_fls((uint32_t)(x >> 32))>>3) : 0);
154#endif
155 p[0] = '0';
156 p[1] = 'x';
157 for (i = n-1; i >= 2; i--, x >>= 4)
158 p[i] = "0123456789abcdef"[(x & 15)];
159 return p+n;
160}
161
162/* Write ULEB128 to buffer. */
163char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v)
164{
165 for (; v >= 0x80; v >>= 7)
166 *p++ = (char)((v & 0x7f) | 0x80);
167 *p++ = (char)v;
168 return p;
169}
170
171/* Return string or write number to buffer and return pointer to start. */
172const char *lj_strfmt_wstrnum(char *buf, cTValue *o, MSize *lenp)
173{
174 if (tvisstr(o)) {
175 *lenp = strV(o)->len;
176 return strVdata(o);
177 } else if (tvisint(o)) {
178 *lenp = (MSize)(lj_strfmt_wint(buf, intV(o)) - buf);
179 return buf;
180 } else if (tvisnum(o)) {
181 *lenp = (MSize)(lj_strfmt_wnum(buf, o) - buf);
182 return buf;
183 } else {
184 return NULL;
185 }
186}
187
188/* -- Unformatted conversions to buffer ----------------------------------- */
189
190/* Add integer to buffer. */
191SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k)
192{
193 setsbufP(sb, lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT), k));
194 return sb;
195}
196
197#if LJ_HASJIT
198/* Add number to buffer. */
199SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o)
200{
201 setsbufP(sb, lj_strfmt_wnum(lj_buf_more(sb, STRFMT_MAXBUF_NUM), o));
202 return sb;
203}
204#endif
205
206/* Add quoted string to buffer. */
207SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str)
208{
209 const char *s = strdata(str);
210 MSize len = str->len;
211 lj_buf_putb(sb, '"');
212 while (len--) {
213 uint32_t c = (uint32_t)(uint8_t)*s++;
214 char *p = lj_buf_more(sb, 4);
215 if (c == '"' || c == '\\' || c == '\n') {
216 *p++ = '\\';
217 } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */
218 uint32_t d;
219 *p++ = '\\';
220 if (c >= 100 || lj_char_isdigit((uint8_t)*s)) {
221 *p++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100;
222 goto tens;
223 } else if (c >= 10) {
224 tens:
225 d = (c * 205) >> 11; c -= d * 10; *p++ = (char)('0'+d);
226 }
227 c += '0';
228 }
229 *p++ = (char)c;
230 setsbufP(sb, p);
231 }
232 lj_buf_putb(sb, '"');
233 return sb;
234}
235
236/* -- Formatted conversions to buffer ------------------------------------- */
237
238/* Add formatted char to buffer. */
239SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat sf, int32_t c)
240{
241 MSize width = STRFMT_WIDTH(sf);
242 char *p = lj_buf_more(sb, width > 1 ? width : 1);
243 if ((sf & STRFMT_F_LEFT)) *p++ = (char)c;
244 while (width-- > 1) *p++ = ' ';
245 if (!(sf & STRFMT_F_LEFT)) *p++ = (char)c;
246 setsbufP(sb, p);
247 return sb;
248}
249
250/* Add formatted string to buffer. */
251SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat sf, GCstr *str)
252{
253 MSize len = str->len <= STRFMT_PREC(sf) ? str->len : STRFMT_PREC(sf);
254 MSize width = STRFMT_WIDTH(sf);
255 char *p = lj_buf_more(sb, width > len ? width : len);
256 if ((sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len);
257 while (width-- > len) *p++ = ' ';
258 if (!(sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len);
259 setsbufP(sb, p);
260 return sb;
261}
262
263/* Add formatted signed/unsigned integer to buffer. */
264SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k)
265{
266 char buf[STRFMT_MAXBUF_XINT], *q = buf + sizeof(buf), *p;
267#ifdef LUA_USE_ASSERT
268 char *ps;
269#endif
270 MSize prefix = 0, len, prec, pprec, width, need;
271
272 /* Figure out signed prefixes. */
273 if (STRFMT_TYPE(sf) == STRFMT_INT) {
274 if ((int64_t)k < 0) {
275 k = (uint64_t)-(int64_t)k;
276 prefix = 256 + '-';
277 } else if ((sf & STRFMT_F_PLUS)) {
278 prefix = 256 + '+';
279 } else if ((sf & STRFMT_F_SPACE)) {
280 prefix = 256 + ' ';
281 }
282 }
283
284 /* Convert number and store to fixed-size buffer in reverse order. */
285 prec = STRFMT_PREC(sf);
286 if ((int32_t)prec >= 0) sf &= ~STRFMT_F_ZERO;
287 if (k == 0) { /* Special-case zero argument. */
288 if (prec != 0 ||
289 (sf & (STRFMT_T_OCT|STRFMT_F_ALT)) == (STRFMT_T_OCT|STRFMT_F_ALT))
290 *--q = '0';
291 } else if (!(sf & (STRFMT_T_HEX|STRFMT_T_OCT))) { /* Decimal. */
292 uint32_t k2;
293 while ((k >> 32)) { *--q = (char)('0' + k % 10); k /= 10; }
294 k2 = (uint32_t)k;
295 do { *--q = (char)('0' + k2 % 10); k2 /= 10; } while (k2);
296 } else if ((sf & STRFMT_T_HEX)) { /* Hex. */
297 const char *hexdig = (sf & STRFMT_F_UPPER) ? "0123456789ABCDEF" :
298 "0123456789abcdef";
299 do { *--q = hexdig[(k & 15)]; k >>= 4; } while (k);
300 if ((sf & STRFMT_F_ALT)) prefix = 512 + 'x';
301 } else { /* Octal. */
302 do { *--q = (char)('0' + (uint32_t)(k & 7)); k >>= 3; } while (k);
303 if ((sf & STRFMT_F_ALT)) *--q = '0';
304 }
305
306 /* Calculate sizes. */
307 len = (MSize)(buf + sizeof(buf) - q);
308 if ((int32_t)len >= (int32_t)prec) prec = len;
309 width = STRFMT_WIDTH(sf);
310 pprec = prec + (prefix >> 8);
311 need = width > pprec ? width : pprec;
312 p = lj_buf_more(sb, need);
313#ifdef LUA_USE_ASSERT
314 ps = p;
315#endif
316
317 /* Format number with leading/trailing whitespace and zeros. */
318 if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == 0)
319 while (width-- > pprec) *p++ = ' ';
320 if (prefix) {
321 if ((char)prefix == 'x') *p++ = '0';
322 *p++ = (char)prefix;
323 }
324 if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == STRFMT_F_ZERO)
325 while (width-- > pprec) *p++ = '0';
326 while (prec-- > len) *p++ = '0';
327 while (q < buf + sizeof(buf)) *p++ = *q++; /* Add number itself. */
328 if ((sf & STRFMT_F_LEFT))
329 while (width-- > pprec) *p++ = ' ';
330
331 lua_assert(need == (MSize)(p - ps));
332 setsbufP(sb, p);
333 return sb;
334}
335
336/* Add number formatted as signed integer to buffer. */
337SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n)
338{
339 int64_t k = (int64_t)n;
340 if (checki32(k) && sf == STRFMT_INT)
341 return lj_strfmt_putint(sb, (int32_t)k); /* Shortcut for plain %d. */
342 else
343 return lj_strfmt_putfxint(sb, sf, (uint64_t)k);
344}
345
346/* Add number formatted as unsigned integer to buffer. */
347SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n)
348{
349 int64_t k;
350 if (n >= 9223372036854775808.0)
351 k = (int64_t)(n - 18446744073709551616.0);
352 else
353 k = (int64_t)n;
354 return lj_strfmt_putfxint(sb, sf, (uint64_t)k);
355}
356
357/* Max. sprintf buffer size needed. At least #string.format("%.99f", -1e308). */
358#define STRFMT_FMTNUMBUF 512
359
360/* Add formatted floating-point number to buffer. */
361SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat sf, lua_Number n)
362{
363 TValue tv;
364 tv.n = n;
365 if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) {
366 /* Canonicalize output of non-finite values. */
367 MSize width = STRFMT_WIDTH(sf), len = 3;
368 int prefix = 0, ch = (sf & STRFMT_F_UPPER) ? 0x202020 : 0;
369 char *p;
370 if (((tv.u32.hi & 0x000fffff) | tv.u32.lo) != 0) {
371 ch ^= ('n' << 16) | ('a' << 8) | 'n';
372 if ((sf & STRFMT_F_SPACE)) prefix = ' ';
373 } else {
374 ch ^= ('i' << 16) | ('n' << 8) | 'f';
375 if ((tv.u32.hi & 0x80000000)) prefix = '-';
376 else if ((sf & STRFMT_F_PLUS)) prefix = '+';
377 else if ((sf & STRFMT_F_SPACE)) prefix = ' ';
378 }
379 if (prefix) len = 4;
380 p = lj_buf_more(sb, width > len ? width : len);
381 if (!(sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' ';
382 if (prefix) *p++ = prefix;
383 *p++ = (char)(ch >> 16); *p++ = (char)(ch >> 8); *p++ = (char)ch;
384 if ((sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' ';
385 setsbufP(sb, p);
386 } else { /* Delegate to sprintf() for now. */
387 uint8_t width = (uint8_t)STRFMT_WIDTH(sf), prec = (uint8_t)STRFMT_PREC(sf);
388 char fmt[1+5+2+3+1+1], *p = fmt;
389 *p++ = '%';
390 if ((sf & STRFMT_F_LEFT)) *p++ = '-';
391 if ((sf & STRFMT_F_PLUS)) *p++ = '+';
392 if ((sf & STRFMT_F_ZERO)) *p++ = '0';
393 if ((sf & STRFMT_F_SPACE)) *p++ = ' ';
394 if ((sf & STRFMT_F_ALT)) *p++ = '#';
395 if (width) {
396 uint8_t x = width / 10, y = width % 10;
397 if (x) *p++ = '0' + x;
398 *p++ = '0' + y;
399 }
400 if (prec != 255) {
401 uint8_t x = prec / 10, y = prec % 10;
402 *p++ = '.';
403 if (x) *p++ = '0' + x;
404 *p++ = '0' + y;
405 }
406 *p++ = (0x67666561 >> (STRFMT_FP(sf)<<3)) ^ ((sf & STRFMT_F_UPPER)?0x20:0);
407 *p = '\0';
408 p = lj_buf_more(sb, STRFMT_FMTNUMBUF);
409 setsbufP(sb, p + sprintf(p, fmt, n));
410 }
411 return sb;
412}
413
414/* -- Conversions to strings ---------------------------------------------- */
415
416/* Convert integer to string. */
417GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k)
418{
419 char buf[STRFMT_MAXBUF_INT];
420 MSize len = (MSize)(lj_strfmt_wint(buf, k) - buf);
421 return lj_str_new(L, buf, len);
422}
423
424/* Convert number to string. */
425GCstr * LJ_FASTCALL lj_strfmt_num(lua_State *L, cTValue *o)
426{
427 char buf[STRFMT_MAXBUF_NUM];
428 MSize len = (MSize)(lj_strfmt_wnum(buf, o) - buf);
429 return lj_str_new(L, buf, len);
430}
431
432/* Convert integer or number to string. */
433GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o)
434{
435 return tvisint(o) ? lj_strfmt_int(L, intV(o)) : lj_strfmt_num(L, o);
436}
437
438#if LJ_HASJIT
439/* Convert char value to string. */
440GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c)
441{
442 char buf[1];
443 buf[0] = c;
444 return lj_str_new(L, buf, 1);
445}
446#endif
447
448/* Raw conversion of object to string. */
449GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o)
450{
451 if (tvisstr(o)) {
452 return strV(o);
453 } else if (tvisnumber(o)) {
454 return lj_strfmt_number(L, o);
455 } else if (tvisnil(o)) {
456 return lj_str_newlit(L, "nil");
457 } else if (tvisfalse(o)) {
458 return lj_str_newlit(L, "false");
459 } else if (tvistrue(o)) {
460 return lj_str_newlit(L, "true");
461 } else {
462 char buf[8+2+2+16], *p = buf;
463 p = lj_buf_wmem(p, lj_typename(o), (MSize)strlen(lj_typename(o)));
464 *p++ = ':'; *p++ = ' ';
465 if (tvisfunc(o) && isffunc(funcV(o))) {
466 p = lj_buf_wmem(p, "builtin#", 8);
467 p = lj_strfmt_wint(p, funcV(o)->c.ffid);
468 } else {
469 p = lj_strfmt_wptr(p, lj_obj_ptr(o));
470 }
471 return lj_str_new(L, buf, (size_t)(p - buf));
472 }
473}
474
475/* -- Internal string formatting ------------------------------------------ */
476
477/*
478** These functions are only used for lua_pushfstring(), lua_pushvfstring()
479** and for internal string formatting (e.g. error messages). Caveat: unlike
480** string.format(), only a limited subset of formats and flags are supported!
481**
482** LuaJIT has support for a couple more formats than Lua 5.1/5.2:
483** - %d %u %o %x with full formatting, 32 bit integers only.
484** - %f and other FP formats are really %.14g.
485** - %s %c %p without formatting.
486*/
487
488/* Push formatted message as a string object to Lua stack. va_list variant. */
489const char *lj_strfmt_pushvf(lua_State *L, const char *fmt, va_list argp)
490{
491 SBuf *sb = lj_buf_tmp_(L);
492 FormatState fs;
493 SFormat sf;
494 GCstr *str;
495 lj_strfmt_init(&fs, fmt, (MSize)strlen(fmt));
496 while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
497 switch (STRFMT_TYPE(sf)) {
498 case STRFMT_LIT:
499 lj_buf_putmem(sb, fs.str, fs.len);
500 break;
501 case STRFMT_INT:
502 lj_strfmt_putfxint(sb, sf, va_arg(argp, int32_t));
503 break;
504 case STRFMT_UINT:
505 lj_strfmt_putfxint(sb, sf, va_arg(argp, uint32_t));
506 break;
507 case STRFMT_NUM: {
508 TValue tv;
509 tv.n = va_arg(argp, lua_Number);
510 setsbufP(sb, lj_strfmt_wnum(lj_buf_more(sb, STRFMT_MAXBUF_NUM), &tv));
511 break;
512 }
513 case STRFMT_STR: {
514 const char *s = va_arg(argp, char *);
515 if (s == NULL) s = "(null)";
516 lj_buf_putmem(sb, s, (MSize)strlen(s));
517 break;
518 }
519 case STRFMT_CHAR:
520 lj_buf_putb(sb, va_arg(argp, int));
521 break;
522 case STRFMT_PTR:
523 setsbufP(sb, lj_strfmt_wptr(lj_buf_more(sb, STRFMT_MAXBUF_PTR),
524 va_arg(argp, void *)));
525 break;
526 case STRFMT_ERR:
527 default:
528 lj_buf_putb(sb, '?');
529 lua_assert(0);
530 break;
531 }
532 }
533 str = lj_buf_str(L, sb);
534 setstrV(L, L->top, str);
535 incr_top(L);
536 return strdata(str);
537}
538
539/* Push formatted message as a string object to Lua stack. Vararg variant. */
540const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...)
541{
542 const char *msg;
543 va_list argp;
544 va_start(argp, fmt);
545 msg = lj_strfmt_pushvf(L, fmt, argp);
546 va_end(argp);
547 return msg;
548}
549
diff --git a/src/lj_strfmt.h b/src/lj_strfmt.h
new file mode 100644
index 00000000..5454336f
--- /dev/null
+++ b/src/lj_strfmt.h
@@ -0,0 +1,124 @@
1/*
2** String formatting.
3** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_STRFMT_H
7#define _LJ_STRFMT_H
8
9#include "lj_obj.h"
10
11typedef uint32_t SFormat; /* Format indicator. */
12
13/* Format parser state. */
14typedef struct FormatState {
15 const uint8_t *p; /* Current format string pointer. */
16 const uint8_t *e; /* End of format string. */
17 const char *str; /* Returned literal string. */
18 MSize len; /* Size of literal string. */
19} FormatState;
20
21/* Format types (max. 16). */
22typedef enum FormatType {
23 STRFMT_EOF, STRFMT_ERR, STRFMT_LIT,
24 STRFMT_INT, STRFMT_UINT, STRFMT_NUM, STRFMT_STR, STRFMT_CHAR, STRFMT_PTR
25} FormatType;
26
27/* Format subtypes (bits are reused). */
28#define STRFMT_T_HEX 0x0010 /* STRFMT_UINT */
29#define STRFMT_T_OCT 0x0020 /* STRFMT_UINT */
30#define STRFMT_T_FP_A 0x0000 /* STRFMT_NUM */
31#define STRFMT_T_FP_E 0x0010 /* STRFMT_NUM */
32#define STRFMT_T_FP_F 0x0020 /* STRFMT_NUM */
33#define STRFMT_T_FP_G 0x0030 /* STRFMT_NUM */
34#define STRFMT_T_QUOTED 0x0010 /* STRFMT_STR */
35
36/* Format flags. */
37#define STRFMT_F_LEFT 0x0100
38#define STRFMT_F_PLUS 0x0200
39#define STRFMT_F_ZERO 0x0400
40#define STRFMT_F_SPACE 0x0800
41#define STRFMT_F_ALT 0x1000
42#define STRFMT_F_UPPER 0x2000
43
44/* Format indicator fields. */
45#define STRFMT_SH_WIDTH 16
46#define STRFMT_SH_PREC 24
47
48#define STRFMT_TYPE(sf) ((FormatType)((sf) & 15))
49#define STRFMT_WIDTH(sf) (((sf) >> STRFMT_SH_WIDTH) & 255u)
50#define STRFMT_PREC(sf) ((((sf) >> STRFMT_SH_PREC) & 255u) - 1u)
51#define STRFMT_FP(sf) (((sf) >> 4) & 3)
52
53/* Formats for conversion characters. */
54#define STRFMT_A (STRFMT_NUM|STRFMT_T_FP_A)
55#define STRFMT_C (STRFMT_CHAR)
56#define STRFMT_D (STRFMT_INT)
57#define STRFMT_E (STRFMT_NUM|STRFMT_T_FP_E)
58#define STRFMT_F (STRFMT_NUM|STRFMT_T_FP_F)
59#define STRFMT_G (STRFMT_NUM|STRFMT_T_FP_G)
60#define STRFMT_I STRFMT_D
61#define STRFMT_O (STRFMT_UINT|STRFMT_T_OCT)
62#define STRFMT_P (STRFMT_PTR)
63#define STRFMT_Q (STRFMT_STR|STRFMT_T_QUOTED)
64#define STRFMT_S (STRFMT_STR)
65#define STRFMT_U (STRFMT_UINT)
66#define STRFMT_X (STRFMT_UINT|STRFMT_T_HEX)
67
68/* Maximum buffer sizes for conversions. */
69#define STRFMT_MAXBUF_XINT (1+22) /* '0' prefix + uint64_t in octal. */
70#define STRFMT_MAXBUF_INT (1+10) /* Sign + int32_t in decimal. */
71#define STRFMT_MAXBUF_NUM LUAI_MAXNUMBER2STR
72#define STRFMT_MAXBUF_PTR (2+2*sizeof(ptrdiff_t)) /* "0x" + hex ptr. */
73
74/* Format parser. */
75LJ_FUNC SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs);
76
77static LJ_AINLINE void lj_strfmt_init(FormatState *fs, const char *p, MSize len)
78{
79 fs->p = (const uint8_t *)p;
80 fs->e = (const uint8_t *)p + len;
81 lua_assert(*fs->e == 0); /* Must be NUL-terminated (may have NULs inside). */
82}
83
84/* Raw conversions. */
85LJ_FUNC char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k);
86LJ_FUNC char * LJ_FASTCALL lj_strfmt_wnum(char *p, cTValue *o);
87LJ_FUNC char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v);
88LJ_FUNC char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v);
89LJ_FUNC const char *lj_strfmt_wstrnum(char *buf, cTValue *o, MSize *lenp);
90
91/* Unformatted conversions to buffer. */
92LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k);
93#if LJ_HASJIT
94LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o);
95#endif
96LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str);
97
98/* Formatted conversions to buffer. */
99LJ_FUNC SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k);
100LJ_FUNC SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n);
101LJ_FUNC SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n);
102LJ_FUNC SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat, lua_Number n);
103LJ_FUNC SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat, int32_t c);
104LJ_FUNC SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat, GCstr *str);
105
106/* Conversions to strings. */
107LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k);
108LJ_FUNCA GCstr * LJ_FASTCALL lj_strfmt_num(lua_State *L, cTValue *o);
109LJ_FUNCA GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o);
110#if LJ_HASJIT
111LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c);
112#endif
113LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o);
114
115/* Internal string formatting. */
116LJ_FUNC const char *lj_strfmt_pushvf(lua_State *L, const char *fmt,
117 va_list argp);
118LJ_FUNC const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...)
119#ifdef __GNUC__
120 __attribute__ ((format (printf, 2, 3)))
121#endif
122 ;
123
124#endif
diff --git a/src/lj_tab.h b/src/lj_tab.h
index 2787caa0..d361137c 100644
--- a/src/lj_tab.h
+++ b/src/lj_tab.h
@@ -50,7 +50,7 @@ LJ_FUNCA cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key);
50/* Caveat: all setters require a write barrier for the stored value. */ 50/* Caveat: all setters require a write barrier for the stored value. */
51 51
52LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key); 52LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key);
53LJ_FUNC TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key); 53LJ_FUNCA TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key);
54LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key); 54LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key);
55LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key); 55LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key);
56 56
diff --git a/src/lj_target_arm.h b/src/lj_target_arm.h
index bec55772..f1aedff0 100644
--- a/src/lj_target_arm.h
+++ b/src/lj_target_arm.h
@@ -243,10 +243,6 @@ typedef enum ARMIns {
243 ARMI_VCVT_S32_F64 = 0xeebd0bc0, 243 ARMI_VCVT_S32_F64 = 0xeebd0bc0,
244 ARMI_VCVT_U32_F32 = 0xeebc0ac0, 244 ARMI_VCVT_U32_F32 = 0xeebc0ac0,
245 ARMI_VCVT_U32_F64 = 0xeebc0bc0, 245 ARMI_VCVT_U32_F64 = 0xeebc0bc0,
246 ARMI_VCVTR_S32_F32 = 0xeebd0a40,
247 ARMI_VCVTR_S32_F64 = 0xeebd0b40,
248 ARMI_VCVTR_U32_F32 = 0xeebc0a40,
249 ARMI_VCVTR_U32_F64 = 0xeebc0b40,
250 ARMI_VCVT_F32_S32 = 0xeeb80ac0, 246 ARMI_VCVT_F32_S32 = 0xeeb80ac0,
251 ARMI_VCVT_F64_S32 = 0xeeb80bc0, 247 ARMI_VCVT_F64_S32 = 0xeeb80bc0,
252 ARMI_VCVT_F32_U32 = 0xeeb80a40, 248 ARMI_VCVT_F32_U32 = 0xeeb80a40,
diff --git a/src/lj_target_mips.h b/src/lj_target_mips.h
index 36f46c83..9bf1d2cb 100644
--- a/src/lj_target_mips.h
+++ b/src/lj_target_mips.h
@@ -169,6 +169,9 @@ typedef enum MIPSIns {
169 MIPSI_SLTU = 0x0000002b, 169 MIPSI_SLTU = 0x0000002b,
170 MIPSI_MOVZ = 0x0000000a, 170 MIPSI_MOVZ = 0x0000000a,
171 MIPSI_MOVN = 0x0000000b, 171 MIPSI_MOVN = 0x0000000b,
172 MIPSI_MFHI = 0x00000010,
173 MIPSI_MFLO = 0x00000012,
174 MIPSI_MULT = 0x00000018,
172 175
173 MIPSI_SLL = 0x00000000, 176 MIPSI_SLL = 0x00000000,
174 MIPSI_SRL = 0x00000002, 177 MIPSI_SRL = 0x00000002,
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h
index 84b0871d..38f464fc 100644
--- a/src/lj_target_x86.h
+++ b/src/lj_target_x86.h
@@ -33,6 +33,7 @@ enum {
33 RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */ 33 RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */
34 34
35 /* Calling conventions. */ 35 /* Calling conventions. */
36 RID_SP = RID_ESP,
36 RID_RET = RID_EAX, 37 RID_RET = RID_EAX,
37#if LJ_64 38#if LJ_64
38 RID_FPRET = RID_XMM0, 39 RID_FPRET = RID_XMM0,
@@ -277,10 +278,8 @@ typedef enum {
277 XO_ROUNDSD = 0x0b3a0ffc, /* Really 66 0f 3a 0b. See asm_fpmath. */ 278 XO_ROUNDSD = 0x0b3a0ffc, /* Really 66 0f 3a 0b. See asm_fpmath. */
278 XO_UCOMISD = XO_660f(2e), 279 XO_UCOMISD = XO_660f(2e),
279 XO_CVTSI2SD = XO_f20f(2a), 280 XO_CVTSI2SD = XO_f20f(2a),
280 XO_CVTSD2SI = XO_f20f(2d),
281 XO_CVTTSD2SI= XO_f20f(2c), 281 XO_CVTTSD2SI= XO_f20f(2c),
282 XO_CVTSI2SS = XO_f30f(2a), 282 XO_CVTSI2SS = XO_f30f(2a),
283 XO_CVTSS2SI = XO_f30f(2d),
284 XO_CVTTSS2SI= XO_f30f(2c), 283 XO_CVTTSS2SI= XO_f30f(2c),
285 XO_CVTSS2SD = XO_f30f(5a), 284 XO_CVTSS2SD = XO_f30f(5a),
286 XO_CVTSD2SS = XO_f20f(5a), 285 XO_CVTSD2SS = XO_f20f(5a),
diff --git a/src/lj_vm.h b/src/lj_vm.h
index c5d05de4..948d63c2 100644
--- a/src/lj_vm.h
+++ b/src/lj_vm.h
@@ -49,12 +49,14 @@ LJ_ASMF void lj_vm_exit_handler(void);
49LJ_ASMF void lj_vm_exit_interp(void); 49LJ_ASMF void lj_vm_exit_interp(void);
50 50
51/* Internal math helper functions. */ 51/* Internal math helper functions. */
52#if LJ_TARGET_X86ORX64 || LJ_TARGET_PPC 52#if LJ_TARGET_PPC
53#define lj_vm_floor floor 53#define lj_vm_floor floor
54#define lj_vm_ceil ceil 54#define lj_vm_ceil ceil
55#else 55#else
56LJ_ASMF double lj_vm_floor(double); 56LJ_ASMF double lj_vm_floor(double);
57#if !LJ_TARGET_X86ORX64
57LJ_ASMF double lj_vm_ceil(double); 58LJ_ASMF double lj_vm_ceil(double);
59#endif
58#if LJ_TARGET_ARM 60#if LJ_TARGET_ARM
59LJ_ASMF double lj_vm_floor_sf(double); 61LJ_ASMF double lj_vm_floor_sf(double);
60LJ_ASMF double lj_vm_ceil_sf(double); 62LJ_ASMF double lj_vm_ceil_sf(double);
diff --git a/src/ljamalg.c b/src/ljamalg.c
index 962b3134..7198a09f 100644
--- a/src/ljamalg.c
+++ b/src/ljamalg.c
@@ -33,6 +33,7 @@
33#include "lj_char.c" 33#include "lj_char.c"
34#include "lj_bc.c" 34#include "lj_bc.c"
35#include "lj_obj.c" 35#include "lj_obj.c"
36#include "lj_buf.c"
36#include "lj_str.c" 37#include "lj_str.c"
37#include "lj_tab.c" 38#include "lj_tab.c"
38#include "lj_func.c" 39#include "lj_func.c"
@@ -44,6 +45,7 @@
44#include "lj_vmevent.c" 45#include "lj_vmevent.c"
45#include "lj_vmmath.c" 46#include "lj_vmmath.c"
46#include "lj_strscan.c" 47#include "lj_strscan.c"
48#include "lj_strfmt.c"
47#include "lj_api.c" 49#include "lj_api.c"
48#include "lj_lex.c" 50#include "lj_lex.c"
49#include "lj_parse.c" 51#include "lj_parse.c"
diff --git a/src/luaconf.h b/src/luaconf.h
index 8e3a7aaa..d283233d 100644
--- a/src/luaconf.h
+++ b/src/luaconf.h
@@ -30,12 +30,12 @@
30#define LUA_LDIR LUA_ROOT "share/lua/5.1/" 30#define LUA_LDIR LUA_ROOT "share/lua/5.1/"
31#define LUA_CDIR LUA_ROOT "lib/lua/5.1/" 31#define LUA_CDIR LUA_ROOT "lib/lua/5.1/"
32#ifdef LUA_XROOT 32#ifdef LUA_XROOT
33#define LUA_JDIR LUA_XROOT "share/luajit-2.0.1/" 33#define LUA_JDIR LUA_XROOT "share/luajit-2.1.0-alpha/"
34#define LUA_XPATH \ 34#define LUA_XPATH \
35 ";" LUA_XROOT "share/lua/5.1/?.lua;" LUA_XROOT "share/lua/5.1/?/init.lua" 35 ";" LUA_XROOT "share/lua/5.1/?.lua;" LUA_XROOT "share/lua/5.1/?/init.lua"
36#define LUA_XCPATH LUA_XROOT "lib/lua/5.1/?.so;" 36#define LUA_XCPATH LUA_XROOT "lib/lua/5.1/?.so;"
37#else 37#else
38#define LUA_JDIR LUA_ROOT "share/luajit-2.0.1/" 38#define LUA_JDIR LUA_ROOT "share/luajit-2.1.0-alpha/"
39#define LUA_XPATH 39#define LUA_XPATH
40#define LUA_XCPATH 40#define LUA_XCPATH
41#endif 41#endif
diff --git a/src/luajit.c b/src/luajit.c
index e0eacc42..680984da 100644
--- a/src/luajit.c
+++ b/src/luajit.c
@@ -61,8 +61,9 @@ static void laction(int i)
61 61
62static void print_usage(void) 62static void print_usage(void)
63{ 63{
64 fprintf(stderr, 64 fputs("usage: ", stderr);
65 "usage: %s [options]... [script [args]...].\n" 65 fputs(progname, stderr);
66 fputs(" [options]... [script [args]...].\n"
66 "Available options are:\n" 67 "Available options are:\n"
67 " -e chunk Execute string " LUA_QL("chunk") ".\n" 68 " -e chunk Execute string " LUA_QL("chunk") ".\n"
68 " -l name Require library " LUA_QL("name") ".\n" 69 " -l name Require library " LUA_QL("name") ".\n"
@@ -73,16 +74,14 @@ static void print_usage(void)
73 " -v Show version information.\n" 74 " -v Show version information.\n"
74 " -E Ignore environment variables.\n" 75 " -E Ignore environment variables.\n"
75 " -- Stop handling options.\n" 76 " -- Stop handling options.\n"
76 " - Execute stdin and stop handling options.\n" 77 " - Execute stdin and stop handling options.\n", stderr);
77 ,
78 progname);
79 fflush(stderr); 78 fflush(stderr);
80} 79}
81 80
82static void l_message(const char *pname, const char *msg) 81static void l_message(const char *pname, const char *msg)
83{ 82{
84 if (pname) fprintf(stderr, "%s: ", pname); 83 if (pname) { fputs(pname, stderr); fputc(':', stderr); fputc(' ', stderr); }
85 fprintf(stderr, "%s\n", msg); 84 fputs(msg, stderr); fputc('\n', stderr);
86 fflush(stderr); 85 fflush(stderr);
87} 86}
88 87
diff --git a/src/luajit.h b/src/luajit.h
index ed39d014..a4c939bf 100644
--- a/src/luajit.h
+++ b/src/luajit.h
@@ -30,9 +30,9 @@
30 30
31#include "lua.h" 31#include "lua.h"
32 32
33#define LUAJIT_VERSION "LuaJIT 2.0.1" 33#define LUAJIT_VERSION "LuaJIT 2.1.0-alpha"
34#define LUAJIT_VERSION_NUM 20001 /* Version 2.0.1 = 02.00.01. */ 34#define LUAJIT_VERSION_NUM 20100 /* Version 2.1.0 = 02.01.00. */
35#define LUAJIT_VERSION_SYM luaJIT_version_2_0_1 35#define LUAJIT_VERSION_SYM luaJIT_version_2_1_0_alpha
36#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2013 Mike Pall" 36#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2013 Mike Pall"
37#define LUAJIT_URL "http://luajit.org/" 37#define LUAJIT_URL "http://luajit.org/"
38 38
diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat
index cdb42a8d..4a3c8e84 100644
--- a/src/msvcbuild.bat
+++ b/src/msvcbuild.bat
@@ -37,6 +37,7 @@ if exist minilua.exe.manifest^
37@if errorlevel 8 goto :X64 37@if errorlevel 8 goto :X64
38@set DASMFLAGS=-D WIN -D JIT -D FFI 38@set DASMFLAGS=-D WIN -D JIT -D FFI
39@set LJARCH=x86 39@set LJARCH=x86
40@set LJCOMPILE=%LJCOMPILE% /arch:SSE2
40:X64 41:X64
41minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc 42minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc
42@if errorlevel 1 goto :BAD 43@if errorlevel 1 goto :BAD
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
index 114416a4..83341e41 100644
--- a/src/vm_arm.dasc
+++ b/src/vm_arm.dasc
@@ -99,6 +99,7 @@
99|.type NODE, Node 99|.type NODE, Node
100|.type NARGS8, int 100|.type NARGS8, int
101|.type TRACE, GCtrace 101|.type TRACE, GCtrace
102|.type SBUF, SBuf
102| 103|
103|//----------------------------------------------------------------------- 104|//-----------------------------------------------------------------------
104| 105|
@@ -615,6 +616,16 @@ static void build_subroutines(BuildCtx *ctx)
615 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. 616 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
616 | b ->vm_call_dispatch_f 617 | b ->vm_call_dispatch_f
617 | 618 |
619 |->vmeta_tgetr:
620 | .IOS mov RC, BASE
621 | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
622 | // Returns cTValue * or NULL.
623 | .IOS mov BASE, RC
624 | cmp CRET1, #0
625 | ldrdne CARG12, [CRET1]
626 | mvneq CARG2, #~LJ_TNIL
627 | b ->BC_TGETR_Z
628 |
618 |//----------------------------------------------------------------------- 629 |//-----------------------------------------------------------------------
619 | 630 |
620 |->vmeta_tsets1: 631 |->vmeta_tsets1:
@@ -672,6 +683,15 @@ static void build_subroutines(BuildCtx *ctx)
672 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. 683 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
673 | b ->vm_call_dispatch_f 684 | b ->vm_call_dispatch_f
674 | 685 |
686 |->vmeta_tsetr:
687 | str BASE, L->base
688 | .IOS mov RC, BASE
689 | str PC, SAVE_PC
690 | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
691 | // Returns TValue *.
692 | .IOS mov BASE, RC
693 | b ->BC_TSETR_Z
694 |
675 |//-- Comparison metamethods --------------------------------------------- 695 |//-- Comparison metamethods ---------------------------------------------
676 | 696 |
677 |->vmeta_comp: 697 |->vmeta_comp:
@@ -736,6 +756,17 @@ static void build_subroutines(BuildCtx *ctx)
736 | b <3 756 | b <3
737 |.endif 757 |.endif
738 | 758 |
759 |->vmeta_istype:
760 | sub PC, PC, #4
761 | str BASE, L->base
762 | mov CARG1, L
763 | lsr CARG2, RA, #3
764 | mov CARG3, RC
765 | str PC, SAVE_PC
766 | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
767 | .IOS ldr BASE, L->base
768 | b ->cont_nop
769 |
739 |//-- Arithmetic metamethods --------------------------------------------- 770 |//-- Arithmetic metamethods ---------------------------------------------
740 | 771 |
741 |->vmeta_arith_vn: 772 |->vmeta_arith_vn:
@@ -1053,7 +1084,7 @@ static void build_subroutines(BuildCtx *ctx)
1053 | ffgccheck 1084 | ffgccheck
1054 | mov CARG1, L 1085 | mov CARG1, L
1055 | mov CARG2, BASE 1086 | mov CARG2, BASE
1056 | bl extern lj_str_fromnumber // (lua_State *L, cTValue *o) 1087 | bl extern lj_strfmt_number // (lua_State *L, cTValue *o)
1057 | // Returns GCstr *. 1088 | // Returns GCstr *.
1058 | ldr BASE, L->base 1089 | ldr BASE, L->base
1059 | mvn CARG2, #~LJ_TSTR 1090 | mvn CARG2, #~LJ_TSTR
@@ -1501,19 +1532,6 @@ static void build_subroutines(BuildCtx *ctx)
1501 | math_extern2 atan2 1532 | math_extern2 atan2
1502 | math_extern2 fmod 1533 | math_extern2 fmod
1503 | 1534 |
1504 |->ff_math_deg:
1505 |.if FPU
1506 | .ffunc_d math_rad
1507 | vldr d1, CFUNC:CARG3->upvalue[0]
1508 | vmul.f64 d0, d0, d1
1509 | b ->fff_resd
1510 |.else
1511 | .ffunc_n math_rad
1512 | ldrd CARG34, CFUNC:CARG3->upvalue[0]
1513 | bl extern __aeabi_dmul
1514 | b ->fff_restv
1515 |.endif
1516 |
1517 |.if HFABI 1535 |.if HFABI
1518 | .ffunc math_ldexp 1536 | .ffunc math_ldexp
1519 | ldr CARG4, [BASE, #4] 1537 | ldr CARG4, [BASE, #4]
@@ -1688,12 +1706,6 @@ static void build_subroutines(BuildCtx *ctx)
1688 | 1706 |
1689 |//-- String library ----------------------------------------------------- 1707 |//-- String library -----------------------------------------------------
1690 | 1708 |
1691 |.ffunc_1 string_len
1692 | checkstr CARG2, ->fff_fallback
1693 | ldr CARG1, STR:CARG1->len
1694 | mvn CARG2, #~LJ_TISNUM
1695 | b ->fff_restv
1696 |
1697 |.ffunc string_byte // Only handle the 1-arg case here. 1709 |.ffunc string_byte // Only handle the 1-arg case here.
1698 | ldrd CARG12, [BASE] 1710 | ldrd CARG12, [BASE]
1699 | ldr PC, [BASE, FRAME_PC] 1711 | ldr PC, [BASE, FRAME_PC]
@@ -1726,6 +1738,7 @@ static void build_subroutines(BuildCtx *ctx)
1726 | mov CARG1, L 1738 | mov CARG1, L
1727 | str PC, SAVE_PC 1739 | str PC, SAVE_PC
1728 | bl extern lj_str_new // (lua_State *L, char *str, size_t l) 1740 | bl extern lj_str_new // (lua_State *L, char *str, size_t l)
1741 |->fff_resstr:
1729 | // Returns GCstr *. 1742 | // Returns GCstr *.
1730 | ldr BASE, L->base 1743 | ldr BASE, L->base
1731 | mvn CARG2, #~LJ_TSTR 1744 | mvn CARG2, #~LJ_TSTR
@@ -1769,91 +1782,28 @@ static void build_subroutines(BuildCtx *ctx)
1769 | mvn CARG2, #~LJ_TSTR 1782 | mvn CARG2, #~LJ_TSTR
1770 | b ->fff_restv 1783 | b ->fff_restv
1771 | 1784 |
1772 |.ffunc string_rep // Only handle the 1-char case inline. 1785 |.macro ffstring_op, name
1773 | ffgccheck 1786 | .ffunc string_ .. name
1774 | ldrd CARG12, [BASE]
1775 | ldrd CARG34, [BASE, #8]
1776 | cmp NARGS8:RC, #16
1777 | bne ->fff_fallback // Exactly 2 arguments
1778 | checktp CARG2, LJ_TSTR
1779 | checktpeq CARG4, LJ_TISNUM
1780 | bne ->fff_fallback
1781 | subs CARG4, CARG3, #1
1782 | ldr CARG2, STR:CARG1->len
1783 | blt ->fff_emptystr // Count <= 0?
1784 | cmp CARG2, #1
1785 | blo ->fff_emptystr // Zero-length string?
1786 | bne ->fff_fallback // Fallback for > 1-char strings.
1787 | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)]
1788 | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)]
1789 | ldr CARG1, STR:CARG1[1]
1790 | cmp RB, CARG3
1791 | blo ->fff_fallback
1792 |1: // Fill buffer with char.
1793 | strb CARG1, [CARG2, CARG4]
1794 | subs CARG4, CARG4, #1
1795 | bge <1
1796 | b ->fff_newstr
1797 |
1798 |.ffunc string_reverse
1799 | ffgccheck
1800 | ldrd CARG12, [BASE]
1801 | cmp NARGS8:RC, #8
1802 | blo ->fff_fallback
1803 | checkstr CARG2, ->fff_fallback
1804 | ldr CARG3, STR:CARG1->len
1805 | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)]
1806 | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)]
1807 | mov CARG4, CARG3
1808 | add CARG1, STR:CARG1, #sizeof(GCstr)
1809 | cmp RB, CARG3
1810 | blo ->fff_fallback
1811 |1: // Reverse string copy.
1812 | ldrb RB, [CARG1], #1
1813 | subs CARG4, CARG4, #1
1814 | blt ->fff_newstr
1815 | strb RB, [CARG2, CARG4]
1816 | b <1
1817 |
1818 |.macro ffstring_case, name, lo
1819 | .ffunc name
1820 | ffgccheck 1787 | ffgccheck
1821 | ldrd CARG12, [BASE] 1788 | ldr CARG3, [BASE, #4]
1822 | cmp NARGS8:RC, #8 1789 | cmp NARGS8:RC, #8
1790 | ldr STR:CARG2, [BASE]
1823 | blo ->fff_fallback 1791 | blo ->fff_fallback
1824 | checkstr CARG2, ->fff_fallback 1792 | sub SBUF:CARG1, DISPATCH, #-DISPATCH_GL(tmpbuf)
1825 | ldr CARG3, STR:CARG1->len 1793 | checkstr CARG3, ->fff_fallback
1826 | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)] 1794 | ldr CARG4, SBUF:CARG1->b
1827 | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)] 1795 | str BASE, L->base
1828 | mov CARG4, #0 1796 | str PC, SAVE_PC
1829 | add CARG1, STR:CARG1, #sizeof(GCstr) 1797 | str L, SBUF:CARG1->L
1830 | cmp RB, CARG3 1798 | str CARG4, SBUF:CARG1->p
1831 | blo ->fff_fallback 1799 | bl extern lj_buf_putstr_ .. name
1832 |1: // ASCII case conversion. 1800 | bl extern lj_buf_tostr
1833 | ldrb RB, [CARG1, CARG4] 1801 | b ->fff_resstr
1834 | cmp CARG4, CARG3
1835 | bhs ->fff_newstr
1836 | sub RC, RB, #lo
1837 | cmp RC, #26
1838 | eorlo RB, RB, #0x20
1839 | strb RB, [CARG2, CARG4]
1840 | add CARG4, CARG4, #1
1841 | b <1
1842 |.endmacro 1802 |.endmacro
1843 | 1803 |
1844 |ffstring_case string_lower, 65 1804 |ffstring_op reverse
1845 |ffstring_case string_upper, 97 1805 |ffstring_op lower
1846 | 1806 |ffstring_op upper
1847 |//-- Table library ------------------------------------------------------
1848 |
1849 |.ffunc_1 table_getn
1850 | checktab CARG2, ->fff_fallback
1851 | .IOS mov RA, BASE
1852 | bl extern lj_tab_len // (GCtab *t)
1853 | // Returns uint32_t (but less than 2^31).
1854 | .IOS mov BASE, RA
1855 | mvn CARG2, #~LJ_TISNUM
1856 | b ->fff_restv
1857 | 1807 |
1858 |//-- Bit library -------------------------------------------------------- 1808 |//-- Bit library --------------------------------------------------------
1859 | 1809 |
@@ -2834,6 +2784,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2834 | ins_next 2784 | ins_next
2835 break; 2785 break;
2836 2786
2787 case BC_ISTYPE:
2788 | // RA = src*8, RC = -type
2789 | ldrd CARG12, [BASE, RA]
2790 | ins_next1
2791 | cmn CARG2, RC
2792 | ins_next2
2793 | bne ->vmeta_istype
2794 | ins_next3
2795 break;
2796 case BC_ISNUM:
2797 | // RA = src*8, RC = -(TISNUM-1)
2798 | ldrd CARG12, [BASE, RA]
2799 | ins_next1
2800 | checktp CARG2, LJ_TISNUM
2801 | ins_next2
2802 | bhs ->vmeta_istype
2803 | ins_next3
2804 break;
2805
2837 /* -- Unary ops --------------------------------------------------------- */ 2806 /* -- Unary ops --------------------------------------------------------- */
2838 2807
2839 case BC_MOV: 2808 case BC_MOV:
@@ -3504,6 +3473,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3504 | bne <1 // 'no __index' flag set: done. 3473 | bne <1 // 'no __index' flag set: done.
3505 | b ->vmeta_tgetb 3474 | b ->vmeta_tgetb
3506 break; 3475 break;
3476 case BC_TGETR:
3477 | decode_RB8 RB, INS
3478 | decode_RC8 RC, INS
3479 | // RA = dst*8, RB = table*8, RC = key*8
3480 | ldr TAB:CARG1, [BASE, RB]
3481 | ldr CARG2, [BASE, RC]
3482 | ldr CARG4, TAB:CARG1->array
3483 | ldr CARG3, TAB:CARG1->asize
3484 | add CARG4, CARG4, CARG2, lsl #3
3485 | cmp CARG2, CARG3 // In array part?
3486 | bhs ->vmeta_tgetr
3487 | ldrd CARG12, [CARG4]
3488 |->BC_TGETR_Z:
3489 | ins_next1
3490 | ins_next2
3491 | strd CARG12, [BASE, RA]
3492 | ins_next3
3493 break;
3507 3494
3508 case BC_TSETV: 3495 case BC_TSETV:
3509 | decode_RB8 RB, INS 3496 | decode_RB8 RB, INS
@@ -3674,6 +3661,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3674 | barrierback TAB:CARG1, INS, CARG3 3661 | barrierback TAB:CARG1, INS, CARG3
3675 | b <2 3662 | b <2
3676 break; 3663 break;
3664 case BC_TSETR:
3665 | decode_RB8 RB, INS
3666 | decode_RC8 RC, INS
3667 | // RA = dst*8, RB = table*8, RC = key*8
3668 | ldr TAB:CARG2, [BASE, RB]
3669 | ldr CARG3, [BASE, RC]
3670 | ldrb INS, TAB:CARG2->marked
3671 | ldr CARG1, TAB:CARG2->array
3672 | ldr CARG4, TAB:CARG2->asize
3673 | tst INS, #LJ_GC_BLACK // isblack(table)
3674 | add CARG1, CARG1, CARG3, lsl #3
3675 | bne >7
3676 |2:
3677 | cmp CARG3, CARG4 // In array part?
3678 | bhs ->vmeta_tsetr
3679 |->BC_TSETR_Z:
3680 | ldrd CARG34, [BASE, RA]
3681 | ins_next1
3682 | ins_next2
3683 | strd CARG34, [CARG1]
3684 | ins_next3
3685 |
3686 |7: // Possible table write barrier for the value. Skip valiswhite check.
3687 | barrierback TAB:CARG2, INS, RB
3688 | b <2
3689 break;
3677 3690
3678 case BC_TSETM: 3691 case BC_TSETM:
3679 | // RA = base*8 (table at base-1), RC = num_const (start index) 3692 | // RA = base*8 (table at base-1), RC = num_const (start index)
@@ -4272,6 +4285,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4272 | ldr RA, TRACE:RC->mcode 4285 | ldr RA, TRACE:RC->mcode
4273 | str BASE, [DISPATCH, #DISPATCH_GL(jit_base)] 4286 | str BASE, [DISPATCH, #DISPATCH_GL(jit_base)]
4274 | str L, [DISPATCH, #DISPATCH_GL(jit_L)] 4287 | str L, [DISPATCH, #DISPATCH_GL(jit_L)]
4288 | str L, [DISPATCH, #DISPATCH_GL(tmpbuf.L)]
4275 | bx RA 4289 | bx RA
4276 |.endif 4290 |.endif
4277 break; 4291 break;
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
index a81dbeeb..41ed303d 100644
--- a/src/vm_mips.dasc
+++ b/src/vm_mips.dasc
@@ -138,6 +138,7 @@
138|.type NODE, Node 138|.type NODE, Node
139|.type NARGS8, int 139|.type NARGS8, int
140|.type TRACE, GCtrace 140|.type TRACE, GCtrace
141|.type SBUF, SBuf
141| 142|
142|//----------------------------------------------------------------------- 143|//-----------------------------------------------------------------------
143| 144|
@@ -688,6 +689,16 @@ static void build_subroutines(BuildCtx *ctx)
688 | b ->vm_call_dispatch_f 689 | b ->vm_call_dispatch_f
689 |. li NARGS8:RC, 16 // 2 args for func(t, k). 690 |. li NARGS8:RC, 16 // 2 args for func(t, k).
690 | 691 |
692 |->vmeta_tgetr:
693 | load_got lj_tab_getinth
694 | call_intern lj_tab_getinth // (GCtab *t, int32_t key)
695 |. nop
696 | // Returns cTValue * or NULL.
697 | beqz CRET1, >1
698 |. nop
699 | b ->BC_TGETR_Z
700 |. ldc1 f0, 0(CRET1)
701 |
691 |//----------------------------------------------------------------------- 702 |//-----------------------------------------------------------------------
692 | 703 |
693 |->vmeta_tsets1: 704 |->vmeta_tsets1:
@@ -740,6 +751,16 @@ static void build_subroutines(BuildCtx *ctx)
740 | b ->vm_call_dispatch_f 751 | b ->vm_call_dispatch_f
741 |. li NARGS8:RC, 24 // 3 args for func(t, k, v) 752 |. li NARGS8:RC, 24 // 3 args for func(t, k, v)
742 | 753 |
754 |->vmeta_tsetr:
755 | load_got lj_tab_setinth
756 | sw BASE, L->base
757 | sw PC, SAVE_PC
758 | call_intern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
759 |. move CARG1, L
760 | // Returns TValue *.
761 | b ->BC_TSETR_Z
762 |. nop
763 |
743 |//-- Comparison metamethods --------------------------------------------- 764 |//-- Comparison metamethods ---------------------------------------------
744 | 765 |
745 |->vmeta_comp: 766 |->vmeta_comp:
@@ -813,6 +834,18 @@ static void build_subroutines(BuildCtx *ctx)
813 |. nop 834 |. nop
814 |.endif 835 |.endif
815 | 836 |
837 |->vmeta_istype:
838 | load_got lj_meta_istype
839 | addiu PC, PC, -4
840 | sw BASE, L->base
841 | srl CARG2, RA, 3
842 | srl CARG3, RD, 3
843 | sw PC, SAVE_PC
844 | call_intern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
845 |. move CARG1, L
846 | b ->cont_nop
847 |. nop
848 |
816 |//-- Arithmetic metamethods --------------------------------------------- 849 |//-- Arithmetic metamethods ---------------------------------------------
817 | 850 |
818 |->vmeta_unm: 851 |->vmeta_unm:
@@ -1119,9 +1152,9 @@ static void build_subroutines(BuildCtx *ctx)
1119 |. sw BASE, L->base // Add frame since C call can throw. 1152 |. sw BASE, L->base // Add frame since C call can throw.
1120 | ffgccheck 1153 | ffgccheck
1121 |. sw PC, SAVE_PC // Redundant (but a defined value). 1154 |. sw PC, SAVE_PC // Redundant (but a defined value).
1122 | load_got lj_str_fromnum 1155 | load_got lj_strfmt_num
1123 | move CARG1, L 1156 | move CARG1, L
1124 | call_intern lj_str_fromnum // (lua_State *L, lua_Number *np) 1157 | call_intern lj_strfmt_num // (lua_State *L, lua_Number *np)
1125 |. move CARG2, BASE 1158 |. move CARG2, BASE
1126 | // Returns GCstr *. 1159 | // Returns GCstr *.
1127 | li CARG3, LJ_TSTR 1160 | li CARG3, LJ_TSTR
@@ -1188,7 +1221,7 @@ static void build_subroutines(BuildCtx *ctx)
1188 | mtc1 TMP0, FARG1 1221 | mtc1 TMP0, FARG1
1189 | beqz AT, ->fff_fallback 1222 | beqz AT, ->fff_fallback
1190 |. lw PC, FRAME_PC(BASE) 1223 |. lw PC, FRAME_PC(BASE)
1191 | cvt.w.d FRET1, FARG2 1224 | trunc.w.d FRET1, FARG2
1192 | cvt.d.w FARG1, FARG1 1225 | cvt.d.w FARG1, FARG1
1193 | lw TMP0, TAB:CARG1->asize 1226 | lw TMP0, TAB:CARG1->asize
1194 | lw TMP1, TAB:CARG1->array 1227 | lw TMP1, TAB:CARG1->array
@@ -1521,14 +1554,8 @@ static void build_subroutines(BuildCtx *ctx)
1521 | b ->fff_resn 1554 | b ->fff_resn
1522 |. nop 1555 |. nop
1523 | 1556 |
1524 |->ff_math_deg:
1525 |.ffunc_n math_rad
1526 |. ldc1 FARG2, CFUNC:RB->upvalue[0]
1527 | b ->fff_resn
1528 |. mul.d FRET1, FARG1, FARG2
1529 |
1530 |.ffunc_nn math_ldexp 1557 |.ffunc_nn math_ldexp
1531 | cvt.w.d FARG2, FARG2 1558 | trunc.w.d FARG2, FARG2
1532 | load_got ldexp 1559 | load_got ldexp
1533 | mfc1 CARG3, FARG2 1560 | mfc1 CARG3, FARG2
1534 | call_extern 1561 | call_extern
@@ -1592,13 +1619,6 @@ static void build_subroutines(BuildCtx *ctx)
1592 | 1619 |
1593 |//-- String library ----------------------------------------------------- 1620 |//-- String library -----------------------------------------------------
1594 | 1621 |
1595 |.ffunc_1 string_len
1596 | li AT, LJ_TSTR
1597 | bne CARG3, AT, ->fff_fallback
1598 |. nop
1599 | b ->fff_resi
1600 |. lw CRET1, STR:CARG1->len
1601 |
1602 |.ffunc string_byte // Only handle the 1-arg case here. 1622 |.ffunc string_byte // Only handle the 1-arg case here.
1603 | lw CARG3, HI(BASE) 1623 | lw CARG3, HI(BASE)
1604 | lw STR:CARG1, LO(BASE) 1624 | lw STR:CARG1, LO(BASE)
@@ -1628,7 +1648,7 @@ static void build_subroutines(BuildCtx *ctx)
1628 |. sltiu AT, CARG3, LJ_TISNUM 1648 |. sltiu AT, CARG3, LJ_TISNUM
1629 | beqz AT, ->fff_fallback 1649 | beqz AT, ->fff_fallback
1630 |. li CARG3, 1 1650 |. li CARG3, 1
1631 | cvt.w.d FARG1, FARG1 1651 | trunc.w.d FARG1, FARG1
1632 | addiu CARG2, sp, ARG5_OFS 1652 | addiu CARG2, sp, ARG5_OFS
1633 | sltiu AT, TMP0, 256 1653 | sltiu AT, TMP0, 256
1634 | mfc1 TMP0, FARG1 1654 | mfc1 TMP0, FARG1
@@ -1642,6 +1662,7 @@ static void build_subroutines(BuildCtx *ctx)
1642 |. move CARG1, L 1662 |. move CARG1, L
1643 | // Returns GCstr *. 1663 | // Returns GCstr *.
1644 | lw BASE, L->base 1664 | lw BASE, L->base
1665 |->fff_resstr:
1645 | move CARG1, CRET1 1666 | move CARG1, CRET1
1646 | b ->fff_restv 1667 | b ->fff_restv
1647 |. li CARG3, LJ_TSTR 1668 |. li CARG3, LJ_TSTR
@@ -1658,7 +1679,7 @@ static void build_subroutines(BuildCtx *ctx)
1658 | ldc1 f2, 8(BASE) 1679 | ldc1 f2, 8(BASE)
1659 | beqz AT, >1 1680 | beqz AT, >1
1660 |. li CARG4, -1 1681 |. li CARG4, -1
1661 | cvt.w.d f0, f0 1682 | trunc.w.d f0, f0
1662 | sltiu AT, CARG3, LJ_TISNUM 1683 | sltiu AT, CARG3, LJ_TISNUM
1663 | beqz AT, ->fff_fallback 1684 | beqz AT, ->fff_fallback
1664 |. mfc1 CARG4, f0 1685 |. mfc1 CARG4, f0
@@ -1666,7 +1687,7 @@ static void build_subroutines(BuildCtx *ctx)
1666 | sltiu AT, CARG2, LJ_TISNUM 1687 | sltiu AT, CARG2, LJ_TISNUM
1667 | beqz AT, ->fff_fallback 1688 | beqz AT, ->fff_fallback
1668 |. li AT, LJ_TSTR 1689 |. li AT, LJ_TSTR
1669 | cvt.w.d f2, f2 1690 | trunc.w.d f2, f2
1670 | bne TMP0, AT, ->fff_fallback 1691 | bne TMP0, AT, ->fff_fallback
1671 |. lw CARG2, STR:CARG1->len 1692 |. lw CARG2, STR:CARG1->len
1672 | mfc1 CARG3, f2 1693 | mfc1 CARG3, f2
@@ -1695,108 +1716,32 @@ static void build_subroutines(BuildCtx *ctx)
1695 | b ->fff_restv 1716 | b ->fff_restv
1696 |. li CARG3, LJ_TSTR 1717 |. li CARG3, LJ_TSTR
1697 | 1718 |
1698 |.ffunc string_rep // Only handle the 1-char case inline. 1719 |.macro ffstring_op, name
1699 | ffgccheck 1720 | .ffunc string_ .. name
1700 | lw TMP0, HI(BASE)
1701 | addiu AT, NARGS8:RC, -16 // Exactly 2 arguments.
1702 | lw CARG4, 8+HI(BASE)
1703 | lw STR:CARG1, LO(BASE)
1704 | addiu TMP0, TMP0, -LJ_TSTR
1705 | ldc1 f0, 8(BASE)
1706 | or AT, AT, TMP0
1707 | bnez AT, ->fff_fallback
1708 |. sltiu AT, CARG4, LJ_TISNUM
1709 | cvt.w.d f0, f0
1710 | beqz AT, ->fff_fallback
1711 |. lw TMP0, STR:CARG1->len
1712 | mfc1 CARG3, f0
1713 | lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
1714 | li AT, 1
1715 | blez CARG3, ->fff_emptystr // Count <= 0?
1716 |. sltu AT, AT, TMP0
1717 | beqz TMP0, ->fff_emptystr // Zero length string?
1718 |. sltu TMP0, TMP1, CARG3
1719 | or AT, AT, TMP0
1720 | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
1721 | bnez AT, ->fff_fallback // Fallback for > 1-char strings.
1722 |. lbu TMP0, STR:CARG1[1]
1723 | addu TMP2, CARG2, CARG3
1724 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
1725 | addiu TMP2, TMP2, -1
1726 | sltu AT, CARG2, TMP2
1727 | bnez AT, <1
1728 |. sb TMP0, 0(TMP2)
1729 | b ->fff_newstr
1730 |. nop
1731 |
1732 |.ffunc string_reverse
1733 | ffgccheck
1734 | lw CARG3, HI(BASE)
1735 | lw STR:CARG1, LO(BASE)
1736 | beqz NARGS8:RC, ->fff_fallback
1737 |. li AT, LJ_TSTR
1738 | bne CARG3, AT, ->fff_fallback
1739 |. lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
1740 | lw CARG3, STR:CARG1->len
1741 | addiu CARG1, STR:CARG1, #STR
1742 | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
1743 | sltu AT, TMP1, CARG3
1744 | bnez AT, ->fff_fallback
1745 |. addu TMP3, CARG1, CARG3
1746 | addu CARG4, CARG2, CARG3
1747 |1: // Reverse string copy.
1748 | lbu TMP1, 0(CARG1)
1749 | sltu AT, CARG1, TMP3
1750 | beqz AT, ->fff_newstr
1751 |. addiu CARG1, CARG1, 1
1752 | addiu CARG4, CARG4, -1
1753 | b <1
1754 | sb TMP1, 0(CARG4)
1755 |
1756 |.macro ffstring_case, name, lo
1757 | .ffunc name
1758 | ffgccheck 1721 | ffgccheck
1759 | lw CARG3, HI(BASE) 1722 | lw CARG3, HI(BASE)
1760 | lw STR:CARG1, LO(BASE) 1723 | lw STR:CARG2, LO(BASE)
1761 | beqz NARGS8:RC, ->fff_fallback 1724 | beqz NARGS8:RC, ->fff_fallback
1762 |. li AT, LJ_TSTR 1725 |. li AT, LJ_TSTR
1763 | bne CARG3, AT, ->fff_fallback 1726 | bne CARG3, AT, ->fff_fallback
1764 |. lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) 1727 |. addiu SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf)
1765 | lw CARG3, STR:CARG1->len 1728 | load_got lj_buf_putstr_ .. name
1766 | addiu CARG1, STR:CARG1, #STR 1729 | lw TMP0, SBUF:CARG1->b
1767 | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) 1730 | sw L, SBUF:CARG1->L
1768 | sltu AT, TMP1, CARG3 1731 | sw BASE, L->base
1769 | bnez AT, ->fff_fallback 1732 | sw TMP0, SBUF:CARG1->p
1770 |. addu TMP3, CARG1, CARG3 1733 | call_intern extern lj_buf_putstr_ .. name
1771 | move CARG4, CARG2 1734 |. sw PC, SAVE_PC
1772 |1: // ASCII case conversion. 1735 | load_got lj_buf_tostr
1773 | lbu TMP1, 0(CARG1) 1736 | call_intern lj_buf_tostr
1774 | sltu AT, CARG1, TMP3 1737 |. move SBUF:CARG1, SBUF:CRET1
1775 | beqz AT, ->fff_newstr 1738 | b ->fff_resstr
1776 |. addiu TMP0, TMP1, -lo 1739 |. lw BASE, L->base
1777 | xori TMP2, TMP1, 0x20
1778 | sltiu AT, TMP0, 26
1779 | movn TMP1, TMP2, AT
1780 | addiu CARG1, CARG1, 1
1781 | sb TMP1, 0(CARG4)
1782 | b <1
1783 |. addiu CARG4, CARG4, 1
1784 |.endmacro 1740 |.endmacro
1785 | 1741 |
1786 |ffstring_case string_lower, 65 1742 |ffstring_op reverse
1787 |ffstring_case string_upper, 97 1743 |ffstring_op lower
1788 | 1744 |ffstring_op upper
1789 |//-- Table library ------------------------------------------------------
1790 |
1791 |.ffunc_1 table_getn
1792 | li AT, LJ_TTAB
1793 | bne CARG3, AT, ->fff_fallback
1794 |. load_got lj_tab_len
1795 | call_intern lj_tab_len // (GCtab *t)
1796 |. nop
1797 | // Returns uint32_t (but less than 2^31).
1798 | b ->fff_resi
1799 |. nop
1800 | 1745 |
1801 |//-- Bit library -------------------------------------------------------- 1746 |//-- Bit library --------------------------------------------------------
1802 | 1747 |
@@ -2572,6 +2517,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2572 | ins_next 2517 | ins_next
2573 break; 2518 break;
2574 2519
2520 case BC_ISTYPE:
2521 | // RA = src*8, RD = -type*8
2522 | addu TMP2, BASE, RA
2523 | srl TMP1, RD, 3
2524 | lw TMP0, HI(TMP2)
2525 | ins_next1
2526 | addu AT, TMP0, TMP1
2527 | bnez AT, ->vmeta_istype
2528 |. ins_next2
2529 break;
2530 case BC_ISNUM:
2531 | // RA = src*8, RD = -(TISNUM-1)*8
2532 | addu TMP2, BASE, RA
2533 | lw TMP0, HI(TMP2)
2534 | ins_next1
2535 | sltiu AT, TMP0, LJ_TISNUM
2536 | beqz AT, ->vmeta_istype
2537 |. ins_next2
2538 break;
2539
2575 /* -- Unary ops --------------------------------------------------------- */ 2540 /* -- Unary ops --------------------------------------------------------- */
2576 2541
2577 case BC_MOV: 2542 case BC_MOV:
@@ -3210,6 +3175,30 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3210 | b ->vmeta_tgetb // Caveat: preserve TMP0! 3175 | b ->vmeta_tgetb // Caveat: preserve TMP0!
3211 |. nop 3176 |. nop
3212 break; 3177 break;
3178 case BC_TGETR:
3179 | // RA = dst*8, RB = table*8, RC = key*8
3180 | decode_RB8a RB, INS
3181 | decode_RB8b RB
3182 | decode_RDtoRC8 RC, RD
3183 | addu CARG2, BASE, RB
3184 | addu CARG3, BASE, RC
3185 | lw TAB:CARG1, LO(CARG2)
3186 | ldc1 f0, 0(CARG3)
3187 | trunc.w.d f2, f0
3188 | lw TMP0, TAB:CARG1->asize
3189 | mfc1 CARG2, f2
3190 | lw TMP1, TAB:CARG1->array
3191 | sltu AT, CARG2, TMP0
3192 | sll TMP2, CARG2, 3
3193 | beqz AT, ->vmeta_tgetr // In array part?
3194 |. addu TMP2, TMP1, TMP2
3195 | ldc1 f0, 0(TMP2)
3196 |->BC_TGETR_Z:
3197 | addu RA, BASE, RA
3198 | ins_next1
3199 | sdc1 f0, 0(RA)
3200 | ins_next2
3201 break;
3213 3202
3214 case BC_TSETV: 3203 case BC_TSETV:
3215 | // RA = src*8, RB = table*8, RC = key*8 3204 | // RA = src*8, RB = table*8, RC = key*8
@@ -3398,6 +3387,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3398 |7: // Possible table write barrier for the value. Skip valiswhite check. 3387 |7: // Possible table write barrier for the value. Skip valiswhite check.
3399 | barrierback TAB:RB, TMP3, TMP0, <2 3388 | barrierback TAB:RB, TMP3, TMP0, <2
3400 break; 3389 break;
3390 case BC_TSETR:
3391 | // RA = dst*8, RB = table*8, RC = key*8
3392 | decode_RB8a RB, INS
3393 | decode_RB8b RB
3394 | decode_RDtoRC8 RC, RD
3395 | addu CARG1, BASE, RB
3396 | addu CARG3, BASE, RC
3397 | lw TAB:CARG2, LO(CARG1)
3398 | ldc1 f0, 0(CARG3)
3399 | trunc.w.d f2, f0
3400 | lbu TMP3, TAB:CARG2->marked
3401 | lw TMP0, TAB:CARG2->asize
3402 | mfc1 CARG3, f2
3403 | lw TMP1, TAB:CARG2->array
3404 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
3405 | bnez AT, >7
3406 |. addu RA, BASE, RA
3407 |2:
3408 | sltu AT, CARG3, TMP0
3409 | sll TMP2, CARG3, 3
3410 | beqz AT, ->vmeta_tsetr // In array part?
3411 |. ldc1 f20, 0(RA)
3412 | addu CRET1, TMP1, TMP2
3413 |->BC_TSETR_Z:
3414 | ins_next1
3415 | sdc1 f20, 0(CRET1)
3416 | ins_next2
3417 |
3418 |7: // Possible table write barrier for the value. Skip valiswhite check.
3419 | barrierback TAB:RB, TMP3, TMP0, <2
3420 break;
3421
3401 3422
3402 case BC_TSETM: 3423 case BC_TSETM:
3403 | // RA = base*8 (table at base-1), RD = num_const*8 (start index) 3424 | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
@@ -3959,6 +3980,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3959 | sw BASE, DISPATCH_GL(jit_base)(DISPATCH) 3980 | sw BASE, DISPATCH_GL(jit_base)(DISPATCH)
3960 | sw L, DISPATCH_GL(jit_L)(DISPATCH) 3981 | sw L, DISPATCH_GL(jit_L)(DISPATCH)
3961 | lw TMP2, TRACE:TMP2->mcode 3982 | lw TMP2, TRACE:TMP2->mcode
3983 | sw L, DISPATCH_GL(tmpbuf.L)(DISPATCH)
3962 | jr TMP2 3984 | jr TMP2
3963 |. addiu JGL, DISPATCH, GG_DISP2G+32768 3985 |. addiu JGL, DISPATCH, GG_DISP2G+32768
3964 |.endif 3986 |.endif
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
index 685ea518..6bceff9c 100644
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -293,6 +293,7 @@
293|.type NODE, Node 293|.type NODE, Node
294|.type NARGS8, int 294|.type NARGS8, int
295|.type TRACE, GCtrace 295|.type TRACE, GCtrace
296|.type SBUF, SBuf
296| 297|
297|//----------------------------------------------------------------------- 298|//-----------------------------------------------------------------------
298| 299|
@@ -895,6 +896,17 @@ static void build_subroutines(BuildCtx *ctx)
895 | li NARGS8:RC, 16 // 2 args for func(t, k). 896 | li NARGS8:RC, 16 // 2 args for func(t, k).
896 | b ->vm_call_dispatch_f 897 | b ->vm_call_dispatch_f
897 | 898 |
899 |->vmeta_tgetr:
900 | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
901 | // Returns cTValue * or NULL.
902 | cmplwi CRET1, 0
903 | beq >1
904 | lfd f14, 0(CRET1)
905 | b ->BC_TGETR_Z
906 |1:
907 | stwx TISNIL, BASE, RA
908 | b ->cont_nop
909 |
898 |//----------------------------------------------------------------------- 910 |//-----------------------------------------------------------------------
899 | 911 |
900 |->vmeta_tsets1: 912 |->vmeta_tsets1:
@@ -962,6 +974,14 @@ static void build_subroutines(BuildCtx *ctx)
962 | stfd f0, 16(BASE) // Copy value to third argument. 974 | stfd f0, 16(BASE) // Copy value to third argument.
963 | b ->vm_call_dispatch_f 975 | b ->vm_call_dispatch_f
964 | 976 |
977 |->vmeta_tsetr:
978 | stp BASE, L->base
979 | stw PC, SAVE_PC
980 | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
981 | // Returns TValue *.
982 | stfd f14, 0(CRET1)
983 | b ->cont_nop
984 |
965 |//-- Comparison metamethods --------------------------------------------- 985 |//-- Comparison metamethods ---------------------------------------------
966 | 986 |
967 |->vmeta_comp: 987 |->vmeta_comp:
@@ -1040,6 +1060,16 @@ static void build_subroutines(BuildCtx *ctx)
1040 | b <3 1060 | b <3
1041 |.endif 1061 |.endif
1042 | 1062 |
1063 |->vmeta_istype:
1064 | subi PC, PC, 4
1065 | stp BASE, L->base
1066 | srwi CARG2, RA, 3
1067 | mr CARG1, L
1068 | srwi CARG3, RD, 3
1069 | stw PC, SAVE_PC
1070 | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
1071 | b ->cont_nop
1072 |
1043 |//-- Arithmetic metamethods --------------------------------------------- 1073 |//-- Arithmetic metamethods ---------------------------------------------
1044 | 1074 |
1045 |->vmeta_arith_nv: 1075 |->vmeta_arith_nv:
@@ -1364,9 +1394,9 @@ static void build_subroutines(BuildCtx *ctx)
1364 | mr CARG1, L 1394 | mr CARG1, L
1365 | mr CARG2, BASE 1395 | mr CARG2, BASE
1366 |.if DUALNUM 1396 |.if DUALNUM
1367 | bl extern lj_str_fromnumber // (lua_State *L, cTValue *o) 1397 | bl extern lj_strfmt_number // (lua_State *L, cTValue *o)
1368 |.else 1398 |.else
1369 | bl extern lj_str_fromnum // (lua_State *L, lua_Number *np) 1399 | bl extern lj_strfmt_num // (lua_State *L, lua_Number *np)
1370 |.endif 1400 |.endif
1371 | // Returns GCstr *. 1401 | // Returns GCstr *.
1372 | li CARG3, LJ_TSTR 1402 | li CARG3, LJ_TSTR
@@ -1870,12 +1900,6 @@ static void build_subroutines(BuildCtx *ctx)
1870 | math_extern2 atan2 1900 | math_extern2 atan2
1871 | math_extern2 fmod 1901 | math_extern2 fmod
1872 | 1902 |
1873 |->ff_math_deg:
1874 |.ffunc_n math_rad
1875 | lfd FARG2, CFUNC:RB->upvalue[0]
1876 | fmul FARG1, FARG1, FARG2
1877 | b ->fff_resn
1878 |
1879 |.if DUALNUM 1903 |.if DUALNUM
1880 |.ffunc math_ldexp 1904 |.ffunc math_ldexp
1881 | cmplwi NARGS8:RC, 16 1905 | cmplwi NARGS8:RC, 16
@@ -2021,11 +2045,6 @@ static void build_subroutines(BuildCtx *ctx)
2021 | 2045 |
2022 |//-- String library ----------------------------------------------------- 2046 |//-- String library -----------------------------------------------------
2023 | 2047 |
2024 |.ffunc_1 string_len
2025 | checkstr CARG3; bne ->fff_fallback
2026 | lwz CRET1, STR:CARG1->len
2027 | b ->fff_resi
2028 |
2029 |.ffunc string_byte // Only handle the 1-arg case here. 2048 |.ffunc string_byte // Only handle the 1-arg case here.
2030 | cmplwi NARGS8:RC, 8 2049 | cmplwi NARGS8:RC, 8
2031 | lwz CARG3, 0(BASE) 2050 | lwz CARG3, 0(BASE)
@@ -2080,6 +2099,7 @@ static void build_subroutines(BuildCtx *ctx)
2080 | stp BASE, L->base 2099 | stp BASE, L->base
2081 | stw PC, SAVE_PC 2100 | stw PC, SAVE_PC
2082 | bl extern lj_str_new // (lua_State *L, char *str, size_t l) 2101 | bl extern lj_str_new // (lua_State *L, char *str, size_t l)
2102 |->fff_resstr:
2083 | // Returns GCstr *. 2103 | // Returns GCstr *.
2084 | lp BASE, L->base 2104 | lp BASE, L->base
2085 | li CARG3, LJ_TSTR 2105 | li CARG3, LJ_TSTR
@@ -2157,114 +2177,29 @@ static void build_subroutines(BuildCtx *ctx)
2157 | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0) 2177 | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0)
2158 | b <3 2178 | b <3
2159 | 2179 |
2160 |.ffunc string_rep // Only handle the 1-char case inline. 2180 |.macro ffstring_op, name
2161 | ffgccheck 2181 | .ffunc string_ .. name
2162 | cmplwi NARGS8:RC, 16
2163 | lwz TMP0, 0(BASE)
2164 | lwz STR:CARG1, 4(BASE)
2165 | lwz CARG4, 8(BASE)
2166 |.if DUALNUM
2167 | lwz CARG3, 12(BASE)
2168 |.else
2169 | lfd FARG2, 8(BASE)
2170 |.endif
2171 | bne ->fff_fallback // Exactly 2 arguments.
2172 | checkstr TMP0; bne ->fff_fallback
2173 |.if DUALNUM
2174 | checknum CARG4; bne ->fff_fallback
2175 |.else
2176 | checknum CARG4; bge ->fff_fallback
2177 | toint CARG3, FARG2
2178 |.endif
2179 | lwz TMP0, STR:CARG1->len
2180 | cmpwi CARG3, 0
2181 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
2182 | ble >2 // Count <= 0? (or non-int)
2183 | cmplwi TMP0, 1
2184 | subi TMP2, CARG3, 1
2185 | blt >2 // Zero length string?
2186 | cmplw cr1, TMP1, CARG3
2187 | bne ->fff_fallback // Fallback for > 1-char strings.
2188 | lbz TMP0, STR:CARG1[1]
2189 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
2190 | blt cr1, ->fff_fallback
2191 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
2192 | cmplwi TMP2, 0
2193 | stbx TMP0, CARG2, TMP2
2194 | subi TMP2, TMP2, 1
2195 | bne <1
2196 | b ->fff_newstr
2197 |2: // Return empty string.
2198 | la STR:CARG1, DISPATCH_GL(strempty)(DISPATCH)
2199 | li CARG3, LJ_TSTR
2200 | b ->fff_restv
2201 |
2202 |.ffunc string_reverse
2203 | ffgccheck
2204 | cmplwi NARGS8:RC, 8
2205 | lwz CARG3, 0(BASE)
2206 | lwz STR:CARG1, 4(BASE)
2207 | blt ->fff_fallback
2208 | checkstr CARG3
2209 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
2210 | bne ->fff_fallback
2211 | lwz CARG3, STR:CARG1->len
2212 | la CARG1, #STR(STR:CARG1)
2213 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
2214 | li TMP2, 0
2215 | cmplw TMP1, CARG3
2216 | subi TMP3, CARG3, 1
2217 | blt ->fff_fallback
2218 |1: // Reverse string copy.
2219 | cmpwi TMP3, 0
2220 | lbzx TMP1, CARG1, TMP2
2221 | blty ->fff_newstr
2222 | stbx TMP1, CARG2, TMP3
2223 | subi TMP3, TMP3, 1
2224 | addi TMP2, TMP2, 1
2225 | b <1
2226 |
2227 |.macro ffstring_case, name, lo
2228 | .ffunc name
2229 | ffgccheck 2182 | ffgccheck
2230 | cmplwi NARGS8:RC, 8 2183 | cmplwi NARGS8:RC, 8
2231 | lwz CARG3, 0(BASE) 2184 | lwz CARG3, 0(BASE)
2232 | lwz STR:CARG1, 4(BASE) 2185 | lwz STR:CARG2, 4(BASE)
2233 | blt ->fff_fallback 2186 | blt ->fff_fallback
2234 | checkstr CARG3 2187 | checkstr CARG3
2235 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) 2188 | la SBUF:CARG1, DISPATCH_GL(tmpbuf)(DISPATCH)
2236 | bne ->fff_fallback 2189 | bne ->fff_fallback
2237 | lwz CARG3, STR:CARG1->len 2190 | lwz TMP0, SBUF:CARG1->b
2238 | la CARG1, #STR(STR:CARG1) 2191 | stw L, SBUF:CARG1->L
2239 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) 2192 | stp BASE, L->base
2240 | cmplw TMP1, CARG3 2193 | stw PC, SAVE_PC
2241 | li TMP2, 0 2194 | stw TMP0, SBUF:CARG1->p
2242 | blt ->fff_fallback 2195 | bl extern lj_buf_putstr_ .. name
2243 |1: // ASCII case conversion. 2196 | bl extern lj_buf_tostr
2244 | cmplw TMP2, CARG3 2197 | b ->fff_resstr
2245 | lbzx TMP1, CARG1, TMP2
2246 | bgey ->fff_newstr
2247 | subi TMP0, TMP1, lo
2248 | xori TMP3, TMP1, 0x20
2249 | addic TMP0, TMP0, -26
2250 | subfe TMP3, TMP3, TMP3
2251 | rlwinm TMP3, TMP3, 0, 26, 26 // x &= 0x20.
2252 | xor TMP1, TMP1, TMP3
2253 | stbx TMP1, CARG2, TMP2
2254 | addi TMP2, TMP2, 1
2255 | b <1
2256 |.endmacro 2198 |.endmacro
2257 | 2199 |
2258 |ffstring_case string_lower, 65 2200 |ffstring_op reverse
2259 |ffstring_case string_upper, 97 2201 |ffstring_op lower
2260 | 2202 |ffstring_op upper
2261 |//-- Table library ------------------------------------------------------
2262 |
2263 |.ffunc_1 table_getn
2264 | checktab CARG3; bne ->fff_fallback
2265 | bl extern lj_tab_len // (GCtab *t)
2266 | // Returns uint32_t (but less than 2^31).
2267 | b ->fff_resi
2268 | 2203 |
2269 |//-- Bit library -------------------------------------------------------- 2204 |//-- Bit library --------------------------------------------------------
2270 | 2205 |
@@ -3265,6 +3200,29 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3265 | ins_next 3200 | ins_next
3266 break; 3201 break;
3267 3202
3203 case BC_ISTYPE:
3204 | // RA = src*8, RD = -type*8
3205 | lwzx TMP0, BASE, RA
3206 | srwi TMP1, RD, 3
3207 | ins_next1
3208 |.if not PPE and not GPR64
3209 | add. TMP0, TMP0, TMP1
3210 |.else
3211 | neg TMP1
3212 | cmpw TMP0, TMP1
3213 |.endif
3214 | bne ->vmeta_istype
3215 | ins_next2
3216 break;
3217 case BC_ISNUM:
3218 | // RA = src*8, RD = -(TISNUM-1)*8
3219 | lwzx TMP0, BASE, RA
3220 | ins_next1
3221 | checknum TMP0
3222 | bge ->vmeta_istype
3223 | ins_next2
3224 break;
3225
3268 /* -- Unary ops --------------------------------------------------------- */ 3226 /* -- Unary ops --------------------------------------------------------- */
3269 3227
3270 case BC_MOV: 3228 case BC_MOV:
@@ -4016,6 +3974,30 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4016 | bne <1 // 'no __index' flag set: done. 3974 | bne <1 // 'no __index' flag set: done.
4017 | b ->vmeta_tgetb // Caveat: preserve TMP0! 3975 | b ->vmeta_tgetb // Caveat: preserve TMP0!
4018 break; 3976 break;
3977 case BC_TGETR:
3978 | // RA = dst*8, RB = table*8, RC = key*8
3979 | add RB, BASE, RB
3980 | lwz TAB:CARG1, 4(RB)
3981 |.if DUALNUM
3982 | add RC, BASE, RC
3983 | lwz TMP0, TAB:CARG1->asize
3984 | lwz CARG2, 4(RC)
3985 | lwz TMP1, TAB:CARG1->array
3986 |.else
3987 | lfdx f0, BASE, RC
3988 | lwz TMP0, TAB:CARG1->asize
3989 | toint CARG2, f0
3990 | lwz TMP1, TAB:CARG1->array
3991 |.endif
3992 | cmplw TMP0, CARG2
3993 | slwi TMP2, CARG2, 3
3994 | ble ->vmeta_tgetr // In array part?
3995 | lfdx f14, TMP1, TMP2
3996 |->BC_TGETR_Z:
3997 | ins_next1
3998 | stfdx f14, BASE, RA
3999 | ins_next2
4000 break;
4019 4001
4020 case BC_TSETV: 4002 case BC_TSETV:
4021 | // RA = src*8, RB = table*8, RC = key*8 4003 | // RA = src*8, RB = table*8, RC = key*8
@@ -4195,6 +4177,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4195 | barrierback TAB:RB, TMP3, TMP0 4177 | barrierback TAB:RB, TMP3, TMP0
4196 | b <2 4178 | b <2
4197 break; 4179 break;
4180 case BC_TSETR:
4181 | // RA = dst*8, RB = table*8, RC = key*8
4182 | add RB, BASE, RB
4183 | lwz TAB:CARG2, 4(RB)
4184 |.if DUALNUM
4185 | add RC, BASE, RC
4186 | lbz TMP3, TAB:RB->marked
4187 | lwz TMP0, TAB:CARG2->asize
4188 | lwz CARG3, 4(RC)
4189 | lwz TMP1, TAB:CARG2->array
4190 |.else
4191 | lfdx f0, BASE, RC
4192 | lbz TMP3, TAB:RB->marked
4193 | lwz TMP0, TAB:CARG2->asize
4194 | toint CARG3, f0
4195 | lwz TMP1, TAB:CARG2->array
4196 |.endif
4197 | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table)
4198 | bne >7
4199 |2:
4200 | cmplw TMP0, CARG3
4201 | slwi TMP2, CARG3, 3
4202 | lfdx f14, BASE, RA
4203 | ble ->vmeta_tsetr // In array part?
4204 | ins_next1
4205 | stfdx f14, TMP1, TMP2
4206 | ins_next2
4207 |
4208 |7: // Possible table write barrier for the value. Skip valiswhite check.
4209 | barrierback TAB:CARG2, TMP3, TMP2
4210 | b <2
4211 break;
4212
4198 4213
4199 case BC_TSETM: 4214 case BC_TSETM:
4200 | // RA = base*8 (table at base-1), RD = num_const*8 (start index) 4215 | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
@@ -4838,6 +4853,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4838 | mtctr TMP2 4853 | mtctr TMP2
4839 | stw L, DISPATCH_GL(jit_L)(DISPATCH) 4854 | stw L, DISPATCH_GL(jit_L)(DISPATCH)
4840 | addi JGL, DISPATCH, GG_DISP2G+32768 4855 | addi JGL, DISPATCH, GG_DISP2G+32768
4856 | stw L, DISPATCH_GL(tmpbuf.L)(DISPATCH)
4841 | bctr 4857 | bctr
4842 |.endif 4858 |.endif
4843 break; 4859 break;
diff --git a/src/vm_ppcspe.dasc b/src/vm_ppcspe.dasc
index 4fabc02f..b443f1b3 100644
--- a/src/vm_ppcspe.dasc
+++ b/src/vm_ppcspe.dasc
@@ -1456,12 +1456,6 @@ static void build_subroutines(BuildCtx *ctx)
1456 | math_extern2 atan2 1456 | math_extern2 atan2
1457 | math_extern2 fmod 1457 | math_extern2 fmod
1458 | 1458 |
1459 |->ff_math_deg:
1460 |.ffunc_n math_rad
1461 | evldd CARG2, CFUNC:RB->upvalue[0]
1462 | efdmul CRET1, CARG1, CARG2
1463 | b ->fff_restv
1464 |
1465 |.ffunc math_ldexp 1459 |.ffunc math_ldexp
1466 | cmplwi NARGS8:RC, 16 1460 | cmplwi NARGS8:RC, 16
1467 | evldd CARG2, 0(BASE) 1461 | evldd CARG2, 0(BASE)
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index f25dfd30..62a5e139 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -18,7 +18,6 @@
18| 18|
19|.if P64 19|.if P64
20|.define X64, 1 20|.define X64, 1
21|.define SSE, 1
22|.if WIN 21|.if WIN
23|.define X64WIN, 1 22|.define X64WIN, 1
24|.endif 23|.endif
@@ -116,6 +115,7 @@
116|.type NODE, Node 115|.type NODE, Node
117|.type NARGS, int 116|.type NARGS, int
118|.type TRACE, GCtrace 117|.type TRACE, GCtrace
118|.type SBUF, SBuf
119| 119|
120|// Stack layout while in interpreter. Must match with lj_frame.h. 120|// Stack layout while in interpreter. Must match with lj_frame.h.
121|//----------------------------------------------------------------------- 121|//-----------------------------------------------------------------------
@@ -856,13 +856,9 @@ static void build_subroutines(BuildCtx *ctx)
856 |.if DUALNUM 856 |.if DUALNUM
857 | mov TMP2, LJ_TISNUM 857 | mov TMP2, LJ_TISNUM
858 | mov TMP1, RC 858 | mov TMP1, RC
859 |.elif SSE 859 |.else
860 | cvtsi2sd xmm0, RC 860 | cvtsi2sd xmm0, RC
861 | movsd TMPQ, xmm0 861 | movsd TMPQ, xmm0
862 |.else
863 | mov ARG4, RC
864 | fild ARG4
865 | fstp TMPQ
866 |.endif 862 |.endif
867 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 863 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
868 | jmp >1 864 | jmp >1
@@ -916,6 +912,19 @@ static void build_subroutines(BuildCtx *ctx)
916 | mov NARGS:RD, 2+1 // 2 args for func(t, k). 912 | mov NARGS:RD, 2+1 // 2 args for func(t, k).
917 | jmp ->vm_call_dispatch_f 913 | jmp ->vm_call_dispatch_f
918 | 914 |
915 |->vmeta_tgetr:
916 | mov FCARG1, TAB:RB
917 | mov RB, BASE // Save BASE.
918 | mov FCARG2, RC // Caveat: FCARG2 == BASE
919 | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
920 | // cTValue * or NULL returned in eax (RC).
921 | movzx RA, PC_RA
922 | mov BASE, RB // Restore BASE.
923 | test RC, RC
924 | jnz ->BC_TGETR_Z
925 | mov dword [BASE+RA*8+4], LJ_TNIL
926 | jmp ->BC_TGETR2_Z
927 |
919 |//----------------------------------------------------------------------- 928 |//-----------------------------------------------------------------------
920 | 929 |
921 |->vmeta_tsets: 930 |->vmeta_tsets:
@@ -935,13 +944,9 @@ static void build_subroutines(BuildCtx *ctx)
935 |.if DUALNUM 944 |.if DUALNUM
936 | mov TMP2, LJ_TISNUM 945 | mov TMP2, LJ_TISNUM
937 | mov TMP1, RC 946 | mov TMP1, RC
938 |.elif SSE 947 |.else
939 | cvtsi2sd xmm0, RC 948 | cvtsi2sd xmm0, RC
940 | movsd TMPQ, xmm0 949 | movsd TMPQ, xmm0
941 |.else
942 | mov ARG4, RC
943 | fild ARG4
944 | fstp TMPQ
945 |.endif 950 |.endif
946 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 951 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
947 | jmp >1 952 | jmp >1
@@ -1007,6 +1012,33 @@ static void build_subroutines(BuildCtx *ctx)
1007 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v). 1012 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v).
1008 | jmp ->vm_call_dispatch_f 1013 | jmp ->vm_call_dispatch_f
1009 | 1014 |
1015 |->vmeta_tsetr:
1016 |.if X64WIN
1017 | mov L:CARG1d, SAVE_L
1018 | mov CARG3d, RC
1019 | mov L:CARG1d->base, BASE
1020 | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE.
1021 |.elif X64
1022 | mov L:CARG1d, SAVE_L
1023 | mov CARG2d, TAB:RB
1024 | mov L:CARG1d->base, BASE
1025 | mov RB, BASE // Save BASE.
1026 | mov CARG3d, RC // Caveat: CARG3d == BASE.
1027 |.else
1028 | mov L:RA, SAVE_L
1029 | mov ARG2, TAB:RB
1030 | mov RB, BASE // Save BASE.
1031 | mov ARG3, RC
1032 | mov ARG1, L:RA
1033 | mov L:RA->base, BASE
1034 |.endif
1035 | mov SAVE_PC, PC
1036 | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
1037 | // TValue * returned in eax (RC).
1038 | movzx RA, PC_RA
1039 | mov BASE, RB // Restore BASE.
1040 | jmp ->BC_TSETR_Z
1041 |
1010 |//-- Comparison metamethods --------------------------------------------- 1042 |//-- Comparison metamethods ---------------------------------------------
1011 | 1043 |
1012 |->vmeta_comp: 1044 |->vmeta_comp:
@@ -1101,6 +1133,26 @@ static void build_subroutines(BuildCtx *ctx)
1101 | jmp <3 1133 | jmp <3
1102 |.endif 1134 |.endif
1103 | 1135 |
1136 |->vmeta_istype:
1137 |.if X64
1138 | mov L:RB, SAVE_L
1139 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
1140 | mov CARG2d, RA
1141 | movzx CARG3d, PC_RD
1142 | mov L:CARG1d, L:RB
1143 |.else
1144 | movzx RD, PC_RD
1145 | mov ARG2, RA
1146 | mov L:RB, SAVE_L
1147 | mov ARG3, RD
1148 | mov ARG1, L:RB
1149 | mov L:RB->base, BASE
1150 |.endif
1151 | mov SAVE_PC, PC
1152 | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
1153 | mov BASE, L:RB->base
1154 | jmp <6
1155 |
1104 |//-- Arithmetic metamethods --------------------------------------------- 1156 |//-- Arithmetic metamethods ---------------------------------------------
1105 | 1157 |
1106 |->vmeta_arith_vno: 1158 |->vmeta_arith_vno:
@@ -1509,11 +1561,7 @@ static void build_subroutines(BuildCtx *ctx)
1509 |.else 1561 |.else
1510 | jae ->fff_fallback 1562 | jae ->fff_fallback
1511 |.endif 1563 |.endif
1512 |.if SSE
1513 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 1564 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0
1514 |.else
1515 | fld qword [BASE]; jmp ->fff_resn
1516 |.endif
1517 | 1565 |
1518 |.ffunc_1 tostring 1566 |.ffunc_1 tostring
1519 | // Only handles the string or number case inline. 1567 | // Only handles the string or number case inline.
@@ -1538,9 +1586,9 @@ static void build_subroutines(BuildCtx *ctx)
1538 |.endif 1586 |.endif
1539 | mov L:FCARG1, L:RB 1587 | mov L:FCARG1, L:RB
1540 |.if DUALNUM 1588 |.if DUALNUM
1541 | call extern lj_str_fromnumber@8 // (lua_State *L, cTValue *o) 1589 | call extern lj_strfmt_number@8 // (lua_State *L, cTValue *o)
1542 |.else 1590 |.else
1543 | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np) 1591 | call extern lj_strfmt_num@8 // (lua_State *L, lua_Number *np)
1544 |.endif 1592 |.endif
1545 | // GCstr returned in eax (RD). 1593 | // GCstr returned in eax (RD).
1546 | mov BASE, L:RB->base 1594 | mov BASE, L:RB->base
@@ -1631,19 +1679,12 @@ static void build_subroutines(BuildCtx *ctx)
1631 | add RD, 1 1679 | add RD, 1
1632 | mov dword [BASE-4], LJ_TISNUM 1680 | mov dword [BASE-4], LJ_TISNUM
1633 | mov dword [BASE-8], RD 1681 | mov dword [BASE-8], RD
1634 |.elif SSE 1682 |.else
1635 | movsd xmm0, qword [BASE+8] 1683 | movsd xmm0, qword [BASE+8]
1636 | sseconst_1 xmm1, RBa 1684 | sseconst_1 xmm1, RBa
1637 | addsd xmm0, xmm1 1685 | addsd xmm0, xmm1
1638 | cvtsd2si RD, xmm0 1686 | cvttsd2si RD, xmm0
1639 | movsd qword [BASE-8], xmm0 1687 | movsd qword [BASE-8], xmm0
1640 |.else
1641 | fld qword [BASE+8]
1642 | fld1
1643 | faddp st1
1644 | fist ARG1
1645 | fstp qword [BASE-8]
1646 | mov RD, ARG1
1647 |.endif 1688 |.endif
1648 | mov TAB:RB, [BASE] 1689 | mov TAB:RB, [BASE]
1649 | cmp RD, TAB:RB->asize; jae >2 // Not in array part? 1690 | cmp RD, TAB:RB->asize; jae >2 // Not in array part?
@@ -1690,12 +1731,9 @@ static void build_subroutines(BuildCtx *ctx)
1690 |.if DUALNUM 1731 |.if DUALNUM
1691 | mov dword [BASE+12], LJ_TISNUM 1732 | mov dword [BASE+12], LJ_TISNUM
1692 | mov dword [BASE+8], 0 1733 | mov dword [BASE+8], 0
1693 |.elif SSE 1734 |.else
1694 | xorps xmm0, xmm0 1735 | xorps xmm0, xmm0
1695 | movsd qword [BASE+8], xmm0 1736 | movsd qword [BASE+8], xmm0
1696 |.else
1697 | fldz
1698 | fstp qword [BASE+8]
1699 |.endif 1737 |.endif
1700 | mov RD, 1+3 1738 | mov RD, 1+3
1701 | jmp ->fff_res 1739 | jmp ->fff_res
@@ -1925,12 +1963,10 @@ static void build_subroutines(BuildCtx *ctx)
1925 |->fff_resi: // Dummy. 1963 |->fff_resi: // Dummy.
1926 |.endif 1964 |.endif
1927 | 1965 |
1928 |.if SSE
1929 |->fff_resn: 1966 |->fff_resn:
1930 | mov PC, [BASE-4] 1967 | mov PC, [BASE-4]
1931 | fstp qword [BASE-8] 1968 | fstp qword [BASE-8]
1932 | jmp ->fff_res1 1969 | jmp ->fff_res1
1933 |.endif
1934 | 1970 |
1935 | .ffunc_1 math_abs 1971 | .ffunc_1 math_abs
1936 |.if DUALNUM 1972 |.if DUALNUM
@@ -1954,8 +1990,6 @@ static void build_subroutines(BuildCtx *ctx)
1954 |.else 1990 |.else
1955 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 1991 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1956 |.endif 1992 |.endif
1957 |
1958 |.if SSE
1959 | movsd xmm0, qword [BASE] 1993 | movsd xmm0, qword [BASE]
1960 | sseconst_abs xmm1, RDa 1994 | sseconst_abs xmm1, RDa
1961 | andps xmm0, xmm1 1995 | andps xmm0, xmm1
@@ -1963,15 +1997,6 @@ static void build_subroutines(BuildCtx *ctx)
1963 | mov PC, [BASE-4] 1997 | mov PC, [BASE-4]
1964 | movsd qword [BASE-8], xmm0 1998 | movsd qword [BASE-8], xmm0
1965 | // fallthrough 1999 | // fallthrough
1966 |.else
1967 | fld qword [BASE]
1968 | fabs
1969 | // fallthrough
1970 |->fff_resxmm0: // Dummy.
1971 |->fff_resn:
1972 | mov PC, [BASE-4]
1973 | fstp qword [BASE-8]
1974 |.endif
1975 | 2000 |
1976 |->fff_res1: 2001 |->fff_res1:
1977 | mov RD, 1+1 2002 | mov RD, 1+1
@@ -2008,48 +2033,24 @@ static void build_subroutines(BuildCtx *ctx)
2008 |.else 2033 |.else
2009 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2034 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
2010 |.endif 2035 |.endif
2011 |.if SSE
2012 | movsd xmm0, qword [BASE] 2036 | movsd xmm0, qword [BASE]
2013 | call ->vm_ .. func 2037 | call ->vm_ .. func .. _sse
2014 | .if DUALNUM 2038 |.if DUALNUM
2015 | cvtsd2si RB, xmm0 2039 | cvttsd2si RB, xmm0
2016 | cmp RB, 0x80000000 2040 | cmp RB, 0x80000000
2017 | jne ->fff_resi 2041 | jne ->fff_resi
2018 | cvtsi2sd xmm1, RB 2042 | cvtsi2sd xmm1, RB
2019 | ucomisd xmm0, xmm1 2043 | ucomisd xmm0, xmm1
2020 | jp ->fff_resxmm0 2044 | jp ->fff_resxmm0
2021 | je ->fff_resi 2045 | je ->fff_resi
2022 | .endif
2023 | jmp ->fff_resxmm0
2024 |.else
2025 | fld qword [BASE]
2026 | call ->vm_ .. func
2027 | .if DUALNUM
2028 | fist ARG1
2029 | mov RB, ARG1
2030 | cmp RB, 0x80000000; jne >2
2031 | fdup
2032 | fild ARG1
2033 | fcomparepp
2034 | jp ->fff_resn
2035 | jne ->fff_resn
2036 |2:
2037 | fpop
2038 | jmp ->fff_resi
2039 | .else
2040 | jmp ->fff_resn
2041 | .endif
2042 |.endif 2046 |.endif
2047 | jmp ->fff_resxmm0
2043 |.endmacro 2048 |.endmacro
2044 | 2049 |
2045 | math_round floor 2050 | math_round floor
2046 | math_round ceil 2051 | math_round ceil
2047 | 2052 |
2048 |.if SSE
2049 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 2053 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0
2050 |.else
2051 |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn
2052 |.endif
2053 | 2054 |
2054 |.ffunc math_log 2055 |.ffunc math_log
2055 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. 2056 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
@@ -2072,42 +2073,24 @@ static void build_subroutines(BuildCtx *ctx)
2072 |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn 2073 |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn
2073 | 2074 |
2074 |.macro math_extern, func 2075 |.macro math_extern, func
2075 |.if SSE
2076 | .ffunc_nsse math_ .. func 2076 | .ffunc_nsse math_ .. func
2077 | .if not X64 2077 |.if not X64
2078 | movsd FPARG1, xmm0 2078 | movsd FPARG1, xmm0
2079 | .endif
2080 |.else
2081 | .ffunc_n math_ .. func
2082 | fstp FPARG1
2083 |.endif 2079 |.endif
2084 | mov RB, BASE 2080 | mov RB, BASE
2085 | call extern lj_vm_ .. func 2081 | call extern lj_vm_ .. func
2086 | mov BASE, RB 2082 | mov BASE, RB
2087 | .if X64 2083 |.if X64
2088 | jmp ->fff_resxmm0 2084 | jmp ->fff_resxmm0
2089 | .else 2085 |.else
2090 | jmp ->fff_resn 2086 | jmp ->fff_resn
2091 | .endif 2087 |.endif
2092 |.endmacro 2088 |.endmacro
2093 | 2089 |
2094 | math_extern sinh 2090 | math_extern sinh
2095 | math_extern cosh 2091 | math_extern cosh
2096 | math_extern tanh 2092 | math_extern tanh
2097 | 2093 |
2098 |->ff_math_deg:
2099 |.if SSE
2100 |.ffunc_nsse math_rad
2101 | mov CFUNC:RB, [BASE-8]
2102 | mulsd xmm0, qword CFUNC:RB->upvalue[0]
2103 | jmp ->fff_resxmm0
2104 |.else
2105 |.ffunc_n math_rad
2106 | mov CFUNC:RB, [BASE-8]
2107 | fmul qword CFUNC:RB->upvalue[0]
2108 | jmp ->fff_resn
2109 |.endif
2110 |
2111 |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn 2094 |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn
2112 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn 2095 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn
2113 | 2096 |
@@ -2123,65 +2106,34 @@ static void build_subroutines(BuildCtx *ctx)
2123 | cmp RB, 0x00200000; jb >4 2106 | cmp RB, 0x00200000; jb >4
2124 |1: 2107 |1:
2125 | shr RB, 21; sub RB, RC // Extract and unbias exponent. 2108 | shr RB, 21; sub RB, RC // Extract and unbias exponent.
2126 |.if SSE
2127 | cvtsi2sd xmm0, RB 2109 | cvtsi2sd xmm0, RB
2128 |.else
2129 | mov TMP1, RB; fild TMP1
2130 |.endif
2131 | mov RB, [BASE-4] 2110 | mov RB, [BASE-4]
2132 | and RB, 0x800fffff // Mask off exponent. 2111 | and RB, 0x800fffff // Mask off exponent.
2133 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. 2112 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0.
2134 | mov [BASE-4], RB 2113 | mov [BASE-4], RB
2135 |2: 2114 |2:
2136 |.if SSE
2137 | movsd qword [BASE], xmm0 2115 | movsd qword [BASE], xmm0
2138 |.else
2139 | fstp qword [BASE]
2140 |.endif
2141 | mov RD, 1+2 2116 | mov RD, 1+2
2142 | jmp ->fff_res 2117 | jmp ->fff_res
2143 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. 2118 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0.
2144 |.if SSE
2145 | xorps xmm0, xmm0; jmp <2 2119 | xorps xmm0, xmm0; jmp <2
2146 |.else
2147 | fldz; jmp <2
2148 |.endif
2149 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. 2120 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias.
2150 |.if SSE
2151 | movsd xmm0, qword [BASE] 2121 | movsd xmm0, qword [BASE]
2152 | sseconst_hi xmm1, RBa, 43500000 // 2^54. 2122 | sseconst_hi xmm1, RBa, 43500000 // 2^54.
2153 | mulsd xmm0, xmm1 2123 | mulsd xmm0, xmm1
2154 | movsd qword [BASE-8], xmm0 2124 | movsd qword [BASE-8], xmm0
2155 |.else
2156 | fld qword [BASE]
2157 | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54
2158 | fstp qword [BASE-8]
2159 |.endif
2160 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 2125 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1
2161 | 2126 |
2162 |.if SSE
2163 |.ffunc_nsse math_modf 2127 |.ffunc_nsse math_modf
2164 |.else
2165 |.ffunc_n math_modf
2166 |.endif
2167 | mov RB, [BASE+4] 2128 | mov RB, [BASE+4]
2168 | mov PC, [BASE-4] 2129 | mov PC, [BASE-4]
2169 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? 2130 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf?
2170 |.if SSE
2171 | movaps xmm4, xmm0 2131 | movaps xmm4, xmm0
2172 | call ->vm_trunc 2132 | call ->vm_trunc_sse
2173 | subsd xmm4, xmm0 2133 | subsd xmm4, xmm0
2174 |1: 2134 |1:
2175 | movsd qword [BASE-8], xmm0 2135 | movsd qword [BASE-8], xmm0
2176 | movsd qword [BASE], xmm4 2136 | movsd qword [BASE], xmm4
2177 |.else
2178 | fdup
2179 | call ->vm_trunc
2180 | fsub st1, st0
2181 |1:
2182 | fstp qword [BASE-8]
2183 | fstp qword [BASE]
2184 |.endif
2185 | mov RC, [BASE-4]; mov RB, [BASE+4] 2137 | mov RC, [BASE-4]; mov RB, [BASE+4]
2186 | xor RC, RB; js >3 // Need to adjust sign? 2138 | xor RC, RB; js >3 // Need to adjust sign?
2187 |2: 2139 |2:
@@ -2191,24 +2143,16 @@ static void build_subroutines(BuildCtx *ctx)
2191 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. 2143 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction.
2192 | jmp <2 2144 | jmp <2
2193 |4: 2145 |4:
2194 |.if SSE
2195 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. 2146 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0.
2196 |.else
2197 | fldz; fxch; jmp <1 // Return +-Inf and +-0.
2198 |.endif
2199 | 2147 |
2200 |.ffunc_nnr math_fmod 2148 |.ffunc_nnr math_fmod
2201 |1: ; fprem; fnstsw ax; sahf; jp <1 2149 |1: ; fprem; fnstsw ax; sahf; jp <1
2202 | fpop1 2150 | fpop1
2203 | jmp ->fff_resn 2151 | jmp ->fff_resn
2204 | 2152 |
2205 |.if SSE 2153 |.ffunc_nnsse math_pow; call ->vm_pow_sse; jmp ->fff_resxmm0
2206 |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0
2207 |.else
2208 |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn
2209 |.endif
2210 | 2154 |
2211 |.macro math_minmax, name, cmovop, fcmovop, sseop 2155 |.macro math_minmax, name, cmovop, sseop
2212 | .ffunc name 2156 | .ffunc name
2213 | mov RA, 2 2157 | mov RA, 2
2214 | cmp dword [BASE+4], LJ_TISNUM 2158 | cmp dword [BASE+4], LJ_TISNUM
@@ -2225,12 +2169,7 @@ static void build_subroutines(BuildCtx *ctx)
2225 |3: 2169 |3:
2226 | ja ->fff_fallback 2170 | ja ->fff_fallback
2227 | // Convert intermediate result to number and continue below. 2171 | // Convert intermediate result to number and continue below.
2228 |.if SSE
2229 | cvtsi2sd xmm0, RB 2172 | cvtsi2sd xmm0, RB
2230 |.else
2231 | mov TMP1, RB
2232 | fild TMP1
2233 |.endif
2234 | jmp >6 2173 | jmp >6
2235 |4: 2174 |4:
2236 | ja ->fff_fallback 2175 | ja ->fff_fallback
@@ -2238,7 +2177,6 @@ static void build_subroutines(BuildCtx *ctx)
2238 | jae ->fff_fallback 2177 | jae ->fff_fallback
2239 |.endif 2178 |.endif
2240 | 2179 |
2241 |.if SSE
2242 | movsd xmm0, qword [BASE] 2180 | movsd xmm0, qword [BASE]
2243 |5: // Handle numbers or integers. 2181 |5: // Handle numbers or integers.
2244 | cmp RA, RD; jae ->fff_resxmm0 2182 | cmp RA, RD; jae ->fff_resxmm0
@@ -2257,48 +2195,13 @@ static void build_subroutines(BuildCtx *ctx)
2257 | sseop xmm0, xmm1 2195 | sseop xmm0, xmm1
2258 | add RA, 1 2196 | add RA, 1
2259 | jmp <5 2197 | jmp <5
2260 |.else
2261 | fld qword [BASE]
2262 |5: // Handle numbers or integers.
2263 | cmp RA, RD; jae ->fff_resn
2264 | cmp dword [BASE+RA*8-4], LJ_TISNUM
2265 |.if DUALNUM
2266 | jb >6
2267 | ja >9
2268 | fild dword [BASE+RA*8-8]
2269 | jmp >7
2270 |.else
2271 | jae >9
2272 |.endif
2273 |6:
2274 | fld qword [BASE+RA*8-8]
2275 |7:
2276 | fucomi st1; fcmovop st1; fpop1
2277 | add RA, 1
2278 | jmp <5
2279 |.endif
2280 |.endmacro 2198 |.endmacro
2281 | 2199 |
2282 | math_minmax math_min, cmovg, fcmovnbe, minsd 2200 | math_minmax math_min, cmovg, minsd
2283 | math_minmax math_max, cmovl, fcmovbe, maxsd 2201 | math_minmax math_max, cmovl, maxsd
2284 |.if not SSE
2285 |9:
2286 | fpop; jmp ->fff_fallback
2287 |.endif
2288 | 2202 |
2289 |//-- String library ----------------------------------------------------- 2203 |//-- String library -----------------------------------------------------
2290 | 2204 |
2291 |.ffunc_1 string_len
2292 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2293 | mov STR:RB, [BASE]
2294 |.if DUALNUM
2295 | mov RB, dword STR:RB->len; jmp ->fff_resi
2296 |.elif SSE
2297 | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0
2298 |.else
2299 | fild dword STR:RB->len; jmp ->fff_resn
2300 |.endif
2301 |
2302 |.ffunc string_byte // Only handle the 1-arg case here. 2205 |.ffunc string_byte // Only handle the 1-arg case here.
2303 | cmp NARGS:RD, 1+1; jne ->fff_fallback 2206 | cmp NARGS:RD, 1+1; jne ->fff_fallback
2304 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2207 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
@@ -2309,10 +2212,8 @@ static void build_subroutines(BuildCtx *ctx)
2309 | movzx RB, byte STR:RB[1] 2212 | movzx RB, byte STR:RB[1]
2310 |.if DUALNUM 2213 |.if DUALNUM
2311 | jmp ->fff_resi 2214 | jmp ->fff_resi
2312 |.elif SSE
2313 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2314 |.else 2215 |.else
2315 | mov TMP1, RB; fild TMP1; jmp ->fff_resn 2216 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2316 |.endif 2217 |.endif
2317 | 2218 |
2318 |.ffunc string_char // Only handle the 1-arg case here. 2219 |.ffunc string_char // Only handle the 1-arg case here.
@@ -2324,16 +2225,11 @@ static void build_subroutines(BuildCtx *ctx)
2324 | mov RB, dword [BASE] 2225 | mov RB, dword [BASE]
2325 | cmp RB, 255; ja ->fff_fallback 2226 | cmp RB, 255; ja ->fff_fallback
2326 | mov TMP2, RB 2227 | mov TMP2, RB
2327 |.elif SSE 2228 |.else
2328 | jae ->fff_fallback 2229 | jae ->fff_fallback
2329 | cvttsd2si RB, qword [BASE] 2230 | cvttsd2si RB, qword [BASE]
2330 | cmp RB, 255; ja ->fff_fallback 2231 | cmp RB, 255; ja ->fff_fallback
2331 | mov TMP2, RB 2232 | mov TMP2, RB
2332 |.else
2333 | jae ->fff_fallback
2334 | fld qword [BASE]
2335 | fistp TMP2
2336 | cmp TMP2, 255; ja ->fff_fallback
2337 |.endif 2233 |.endif
2338 |.if X64 2234 |.if X64
2339 | mov TMP3, 1 2235 | mov TMP3, 1
@@ -2354,6 +2250,7 @@ static void build_subroutines(BuildCtx *ctx)
2354 |.endif 2250 |.endif
2355 | mov SAVE_PC, PC 2251 | mov SAVE_PC, PC
2356 | call extern lj_str_new // (lua_State *L, char *str, size_t l) 2252 | call extern lj_str_new // (lua_State *L, char *str, size_t l)
2253 |->fff_resstr:
2357 | // GCstr * returned in eax (RD). 2254 | // GCstr * returned in eax (RD).
2358 | mov BASE, L:RB->base 2255 | mov BASE, L:RB->base
2359 | mov PC, [BASE-4] 2256 | mov PC, [BASE-4]
@@ -2371,14 +2268,10 @@ static void build_subroutines(BuildCtx *ctx)
2371 | jne ->fff_fallback 2268 | jne ->fff_fallback
2372 | mov RB, dword [BASE+16] 2269 | mov RB, dword [BASE+16]
2373 | mov TMP2, RB 2270 | mov TMP2, RB
2374 |.elif SSE 2271 |.else
2375 | jae ->fff_fallback 2272 | jae ->fff_fallback
2376 | cvttsd2si RB, qword [BASE+16] 2273 | cvttsd2si RB, qword [BASE+16]
2377 | mov TMP2, RB 2274 | mov TMP2, RB
2378 |.else
2379 | jae ->fff_fallback
2380 | fld qword [BASE+16]
2381 | fistp TMP2
2382 |.endif 2275 |.endif
2383 |1: 2276 |1:
2384 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2277 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
@@ -2393,12 +2286,8 @@ static void build_subroutines(BuildCtx *ctx)
2393 | mov RB, STR:RB->len 2286 | mov RB, STR:RB->len
2394 |.if DUALNUM 2287 |.if DUALNUM
2395 | mov RA, dword [BASE+8] 2288 | mov RA, dword [BASE+8]
2396 |.elif SSE
2397 | cvttsd2si RA, qword [BASE+8]
2398 |.else 2289 |.else
2399 | fld qword [BASE+8] 2290 | cvttsd2si RA, qword [BASE+8]
2400 | fistp ARG3
2401 | mov RA, ARG3
2402 |.endif 2291 |.endif
2403 | mov RC, TMP2 2292 | mov RC, TMP2
2404 | cmp RB, RC // len < end? (unsigned compare) 2293 | cmp RB, RC // len < end? (unsigned compare)
@@ -2442,123 +2331,27 @@ static void build_subroutines(BuildCtx *ctx)
2442 | xor RC, RC // Zero length. Any ptr in RB is ok. 2331 | xor RC, RC // Zero length. Any ptr in RB is ok.
2443 | jmp <4 2332 | jmp <4
2444 | 2333 |
2445 |.ffunc string_rep // Only handle the 1-char case inline. 2334 |.macro ffstring_op, name
2335 | .ffunc_1 string_ .. name
2446 | ffgccheck 2336 | ffgccheck
2447 | cmp NARGS:RD, 2+1; jne ->fff_fallback // Exactly 2 arguments.
2448 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2337 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2449 | cmp dword [BASE+12], LJ_TISNUM 2338 | mov L:RB, SAVE_L
2450 | mov STR:RB, [BASE] 2339 | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
2451 |.if DUALNUM 2340 | mov L:RB->base, BASE
2452 | jne ->fff_fallback 2341 | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE
2453 | mov RC, dword [BASE+8] 2342 | mov RC, SBUF:FCARG1->b
2454 |.elif SSE 2343 | mov SBUF:FCARG1->L, L:RB
2455 | jae ->fff_fallback 2344 | mov SBUF:FCARG1->p, RC
2456 | cvttsd2si RC, qword [BASE+8] 2345 | mov SAVE_PC, PC
2457 |.else 2346 | call extern lj_buf_putstr_ .. name .. @8
2458 | jae ->fff_fallback 2347 | mov FCARG1, eax
2459 | fld qword [BASE+8] 2348 | call extern lj_buf_tostr@4
2460 | fistp TMP2 2349 | jmp ->fff_resstr
2461 | mov RC, TMP2
2462 |.endif
2463 | test RC, RC
2464 | jle ->fff_emptystr // Count <= 0? (or non-int)
2465 | cmp dword STR:RB->len, 1
2466 | jb ->fff_emptystr // Zero length string?
2467 | jne ->fff_fallback_2 // Fallback for > 1-char strings.
2468 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_2
2469 | movzx RA, byte STR:RB[1]
2470 | mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2471 |.if X64
2472 | mov TMP3, RC
2473 |.else
2474 | mov ARG3, RC
2475 |.endif
2476 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
2477 | mov [RB], RAL
2478 | add RB, 1
2479 | sub RC, 1
2480 | jnz <1
2481 | mov RD, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2482 | jmp ->fff_newstr
2483 |
2484 |.ffunc_1 string_reverse
2485 | ffgccheck
2486 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2487 | mov STR:RB, [BASE]
2488 | mov RC, STR:RB->len
2489 | test RC, RC
2490 | jz ->fff_emptystr // Zero length string?
2491 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
2492 | add RB, #STR
2493 | mov TMP2, PC // Need another temp register.
2494 |.if X64
2495 | mov TMP3, RC
2496 |.else
2497 | mov ARG3, RC
2498 |.endif
2499 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2500 |1:
2501 | movzx RA, byte [RB]
2502 | add RB, 1
2503 | sub RC, 1
2504 | mov [PC+RC], RAL
2505 | jnz <1
2506 | mov RD, PC
2507 | mov PC, TMP2
2508 | jmp ->fff_newstr
2509 |
2510 |.macro ffstring_case, name, lo, hi
2511 | .ffunc_1 name
2512 | ffgccheck
2513 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2514 | mov STR:RB, [BASE]
2515 | mov RC, STR:RB->len
2516 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
2517 | add RB, #STR
2518 | mov TMP2, PC // Need another temp register.
2519 |.if X64
2520 | mov TMP3, RC
2521 |.else
2522 | mov ARG3, RC
2523 |.endif
2524 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2525 | jmp >3
2526 |1: // ASCII case conversion. Yes, this is suboptimal code (do you care?).
2527 | movzx RA, byte [RB+RC]
2528 | cmp RA, lo
2529 | jb >2
2530 | cmp RA, hi
2531 | ja >2
2532 | xor RA, 0x20
2533 |2:
2534 | mov [PC+RC], RAL
2535 |3:
2536 | sub RC, 1
2537 | jns <1
2538 | mov RD, PC
2539 | mov PC, TMP2
2540 | jmp ->fff_newstr
2541 |.endmacro 2350 |.endmacro
2542 | 2351 |
2543 |ffstring_case string_lower, 0x41, 0x5a 2352 |ffstring_op reverse
2544 |ffstring_case string_upper, 0x61, 0x7a 2353 |ffstring_op lower
2545 | 2354 |ffstring_op upper
2546 |//-- Table library ------------------------------------------------------
2547 |
2548 |.ffunc_1 table_getn
2549 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
2550 | mov RB, BASE // Save BASE.
2551 | mov TAB:FCARG1, [BASE]
2552 | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t)
2553 | // Length of table returned in eax (RD).
2554 | mov BASE, RB // Restore BASE.
2555 |.if DUALNUM
2556 | mov RB, RD; jmp ->fff_resi
2557 |.elif SSE
2558 | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0
2559 |.else
2560 | mov ARG1, RD; fild ARG1; jmp ->fff_resn
2561 |.endif
2562 | 2355 |
2563 |//-- Bit library -------------------------------------------------------- 2356 |//-- Bit library --------------------------------------------------------
2564 | 2357 |
@@ -2567,11 +2360,7 @@ static void build_subroutines(BuildCtx *ctx)
2567 |.macro .ffunc_bit, name, kind 2360 |.macro .ffunc_bit, name, kind
2568 | .ffunc_1 name 2361 | .ffunc_1 name
2569 |.if kind == 2 2362 |.if kind == 2
2570 |.if SSE
2571 | sseconst_tobit xmm1, RBa 2363 | sseconst_tobit xmm1, RBa
2572 |.else
2573 | mov TMP1, TOBIT_BIAS
2574 |.endif
2575 |.endif 2364 |.endif
2576 | cmp dword [BASE+4], LJ_TISNUM 2365 | cmp dword [BASE+4], LJ_TISNUM
2577 |.if DUALNUM 2366 |.if DUALNUM
@@ -2587,37 +2376,17 @@ static void build_subroutines(BuildCtx *ctx)
2587 |.else 2376 |.else
2588 | jae ->fff_fallback 2377 | jae ->fff_fallback
2589 |.endif 2378 |.endif
2590 |.if SSE
2591 | movsd xmm0, qword [BASE] 2379 | movsd xmm0, qword [BASE]
2592 |.if kind < 2 2380 |.if kind < 2
2593 | sseconst_tobit xmm1, RBa 2381 | sseconst_tobit xmm1, RBa
2594 |.endif 2382 |.endif
2595 | addsd xmm0, xmm1 2383 | addsd xmm0, xmm1
2596 | movd RB, xmm0 2384 | movd RB, xmm0
2597 |.else
2598 | fld qword [BASE]
2599 |.if kind < 2
2600 | mov TMP1, TOBIT_BIAS
2601 |.endif
2602 | fadd TMP1
2603 | fstp FPARG1
2604 |.if kind > 0
2605 | mov RB, ARG1
2606 |.endif
2607 |.endif
2608 |2: 2385 |2:
2609 |.endmacro 2386 |.endmacro
2610 | 2387 |
2611 |.ffunc_bit bit_tobit, 0 2388 |.ffunc_bit bit_tobit, 0
2612 |.if DUALNUM or SSE
2613 |.if not SSE
2614 | mov RB, ARG1
2615 |.endif
2616 | jmp ->fff_resbit 2389 | jmp ->fff_resbit
2617 |.else
2618 | fild ARG1
2619 | jmp ->fff_resn
2620 |.endif
2621 | 2390 |
2622 |.macro .ffunc_bit_op, name, ins 2391 |.macro .ffunc_bit_op, name, ins
2623 | .ffunc_bit name, 2 2392 | .ffunc_bit name, 2
@@ -2637,17 +2406,10 @@ static void build_subroutines(BuildCtx *ctx)
2637 |.else 2406 |.else
2638 | jae ->fff_fallback_bit_op 2407 | jae ->fff_fallback_bit_op
2639 |.endif 2408 |.endif
2640 |.if SSE
2641 | movsd xmm0, qword [RD] 2409 | movsd xmm0, qword [RD]
2642 | addsd xmm0, xmm1 2410 | addsd xmm0, xmm1
2643 | movd RA, xmm0 2411 | movd RA, xmm0
2644 | ins RB, RA 2412 | ins RB, RA
2645 |.else
2646 | fld qword [RD]
2647 | fadd TMP1
2648 | fstp FPARG1
2649 | ins RB, ARG1
2650 |.endif
2651 | sub RD, 8 2413 | sub RD, 8
2652 | jmp <1 2414 | jmp <1
2653 |.endmacro 2415 |.endmacro
@@ -2664,15 +2426,10 @@ static void build_subroutines(BuildCtx *ctx)
2664 | not RB 2426 | not RB
2665 |.if DUALNUM 2427 |.if DUALNUM
2666 | jmp ->fff_resbit 2428 | jmp ->fff_resbit
2667 |.elif SSE 2429 |.else
2668 |->fff_resbit: 2430 |->fff_resbit:
2669 | cvtsi2sd xmm0, RB 2431 | cvtsi2sd xmm0, RB
2670 | jmp ->fff_resxmm0 2432 | jmp ->fff_resxmm0
2671 |.else
2672 |->fff_resbit:
2673 | mov ARG1, RB
2674 | fild ARG1
2675 | jmp ->fff_resn
2676 |.endif 2433 |.endif
2677 | 2434 |
2678 |->fff_fallback_bit_op: 2435 |->fff_fallback_bit_op:
@@ -2685,22 +2442,13 @@ static void build_subroutines(BuildCtx *ctx)
2685 | // Note: no inline conversion from number for 2nd argument! 2442 | // Note: no inline conversion from number for 2nd argument!
2686 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback 2443 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback
2687 | mov RA, dword [BASE+8] 2444 | mov RA, dword [BASE+8]
2688 |.elif SSE 2445 |.else
2689 | .ffunc_nnsse name 2446 | .ffunc_nnsse name
2690 | sseconst_tobit xmm2, RBa 2447 | sseconst_tobit xmm2, RBa
2691 | addsd xmm0, xmm2 2448 | addsd xmm0, xmm2
2692 | addsd xmm1, xmm2 2449 | addsd xmm1, xmm2
2693 | movd RB, xmm0 2450 | movd RB, xmm0
2694 | movd RA, xmm1 2451 | movd RA, xmm1
2695 |.else
2696 | .ffunc_nn name
2697 | mov TMP1, TOBIT_BIAS
2698 | fadd TMP1
2699 | fstp FPARG3
2700 | fadd TMP1
2701 | fstp FPARG1
2702 | mov RA, ARG3
2703 | mov RB, ARG1
2704 |.endif 2452 |.endif
2705 | ins RB, cl // Assumes RA is ecx. 2453 | ins RB, cl // Assumes RA is ecx.
2706 | jmp ->fff_resbit 2454 | jmp ->fff_resbit
@@ -3051,27 +2799,9 @@ static void build_subroutines(BuildCtx *ctx)
3051 |//----------------------------------------------------------------------- 2799 |//-----------------------------------------------------------------------
3052 | 2800 |
3053 |// FP value rounding. Called by math.floor/math.ceil fast functions 2801 |// FP value rounding. Called by math.floor/math.ceil fast functions
3054 |// and from JIT code. 2802 |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
3055 | 2803 |.macro vm_round, name, mode
3056 |// x87 variant: Arg/ret on x87 stack. No int/xmm registers modified. 2804 |->name .. _sse:
3057 |.macro vm_round_x87, mode1, mode2
3058 | fnstcw word [esp+4] // Caveat: overwrites ARG1 and ARG2.
3059 | mov [esp+8], eax
3060 | mov ax, mode1
3061 | or ax, [esp+4]
3062 |.if mode2 ~= 0xffff
3063 | and ax, mode2
3064 |.endif
3065 | mov [esp+6], ax
3066 | fldcw word [esp+6]
3067 | frndint
3068 | fldcw word [esp+4]
3069 | mov eax, [esp+8]
3070 | ret
3071 |.endmacro
3072 |
3073 |// SSE variant: arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
3074 |.macro vm_round_sse, mode
3075 | sseconst_abs xmm2, RDa 2805 | sseconst_abs xmm2, RDa
3076 | sseconst_2p52 xmm3, RDa 2806 | sseconst_2p52 xmm3, RDa
3077 | movaps xmm1, xmm0 2807 | movaps xmm1, xmm0
@@ -3107,22 +2837,21 @@ static void build_subroutines(BuildCtx *ctx)
3107 | ret 2837 | ret
3108 |.endmacro 2838 |.endmacro
3109 | 2839 |
3110 |.macro vm_round, name, ssemode, mode1, mode2 2840 |->vm_floor:
3111 |->name: 2841 |.if not X64
3112 |.if not SSE 2842 | movsd xmm0, qword [esp+4]
3113 | vm_round_x87 mode1, mode2 2843 | call ->vm_floor_sse
2844 | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg.
2845 | fld qword [esp+4]
2846 | ret
3114 |.endif 2847 |.endif
3115 |->name .. _sse:
3116 | vm_round_sse ssemode
3117 |.endmacro
3118 | 2848 |
3119 | vm_round vm_floor, 0, 0x0400, 0xf7ff 2849 | vm_round vm_floor, 0
3120 | vm_round vm_ceil, 1, 0x0800, 0xfbff 2850 | vm_round vm_ceil, 1
3121 | vm_round vm_trunc, 2, 0x0c00, 0xffff 2851 | vm_round vm_trunc, 2
3122 | 2852 |
3123 |// FP modulo x%y. Called by BC_MOD* and vm_arith. 2853 |// FP modulo x%y. Called by BC_MOD* and vm_arith.
3124 |->vm_mod: 2854 |->vm_mod:
3125 |.if SSE
3126 |// Args in xmm0/xmm1, return value in xmm0. 2855 |// Args in xmm0/xmm1, return value in xmm0.
3127 |// Caveat: xmm0-xmm5 and RC (eax) modified! 2856 |// Caveat: xmm0-xmm5 and RC (eax) modified!
3128 | movaps xmm5, xmm0 2857 | movaps xmm5, xmm0
@@ -3150,23 +2879,6 @@ static void build_subroutines(BuildCtx *ctx)
3150 | movaps xmm0, xmm5 2879 | movaps xmm0, xmm5
3151 | subsd xmm0, xmm1 2880 | subsd xmm0, xmm1
3152 | ret 2881 | ret
3153 |.else
3154 |// Args/ret on x87 stack (y on top). No xmm registers modified.
3155 |// Caveat: needs 3 slots on x87 stack! RC (eax) modified!
3156 | fld st1
3157 | fdiv st1
3158 | fnstcw word [esp+4]
3159 | mov ax, 0x0400
3160 | or ax, [esp+4]
3161 | and ax, 0xf7ff
3162 | mov [esp+6], ax
3163 | fldcw word [esp+6]
3164 | frndint
3165 | fldcw word [esp+4]
3166 | fmulp st1
3167 | fsubp st1
3168 | ret
3169 |.endif
3170 | 2882 |
3171 |// FP log2(x). Called by math.log(x, base). 2883 |// FP log2(x). Called by math.log(x, base).
3172 |->vm_log2: 2884 |->vm_log2:
@@ -3217,105 +2929,15 @@ static void build_subroutines(BuildCtx *ctx)
3217 | 2929 |
3218 |// Generic power function x^y. Called by BC_POW, math.pow fast function, 2930 |// Generic power function x^y. Called by BC_POW, math.pow fast function,
3219 |// and vm_arith. 2931 |// and vm_arith.
3220 |// Args/ret on x87 stack (y on top). RC (eax) modified.
3221 |// Caveat: needs 3 slots on x87 stack!
3222 |->vm_pow:
3223 |.if not SSE
3224 | fist dword [esp+4] // Store/reload int before comparison.
3225 | fild dword [esp+4] // Integral exponent used in vm_powi.
3226 | fucomip st1
3227 | jnz >8 // Branch for FP exponents.
3228 | jp >9 // Branch for NaN exponent.
3229 | fpop // Pop y and fallthrough to vm_powi.
3230 |
3231 |// FP/int power function x^i. Arg1/ret on x87 stack.
3232 |// Arg2 (int) on C stack. RC (eax) modified.
3233 |// Caveat: needs 2 slots on x87 stack!
3234 | mov eax, [esp+4]
3235 | cmp eax, 1; jle >6 // i<=1?
3236 | // Now 1 < (unsigned)i <= 0x80000000.
3237 |1: // Handle leading zeros.
3238 | test eax, 1; jnz >2
3239 | fmul st0
3240 | shr eax, 1
3241 | jmp <1
3242 |2:
3243 | shr eax, 1; jz >5
3244 | fdup
3245 |3: // Handle trailing bits.
3246 | fmul st0
3247 | shr eax, 1; jz >4
3248 | jnc <3
3249 | fmul st1, st0
3250 | jmp <3
3251 |4:
3252 | fmulp st1
3253 |5:
3254 | ret
3255 |6:
3256 | je <5 // x^1 ==> x
3257 | jb >7
3258 | fld1; fdivrp st1
3259 | neg eax
3260 | cmp eax, 1; je <5 // x^-1 ==> 1/x
3261 | jmp <1 // x^-i ==> (1/x)^i
3262 |7:
3263 | fpop; fld1 // x^0 ==> 1
3264 | ret
3265 |
3266 |8: // FP/FP power function x^y.
3267 | fst dword [esp+4]
3268 | fxch
3269 | fst dword [esp+8]
3270 | mov eax, [esp+4]; shl eax, 1
3271 | cmp eax, 0xff000000; je >2 // x^+-Inf?
3272 | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
3273 | cmp eax, 0xff000000; je >4 // +-Inf^y?
3274 | fyl2x
3275 | jmp ->vm_exp2raw
3276 |
3277 |9: // Handle x^NaN.
3278 | fld1
3279 | fucomip st2
3280 | je >1 // 1^NaN ==> 1
3281 | fxch // x^NaN ==> NaN
3282 |1:
3283 | fpop
3284 | ret
3285 |
3286 |2: // Handle x^+-Inf.
3287 | fabs
3288 | fld1
3289 | fucomip st1
3290 | je >3 // +-1^+-Inf ==> 1
3291 | fpop; fabs; fldz; mov eax, 0; setc al
3292 | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0
3293 | fxch
3294 |3:
3295 | fpop1; fabs
3296 | ret
3297 |
3298 |4: // Handle +-0^y or +-Inf^y.
3299 | cmp dword [esp+4], 0; jge <3 // y >= 0, x^y ==> |x|
3300 | fpop; fpop
3301 | test eax, eax; jz >5 // y < 0, +-0^y ==> +Inf
3302 | fldz // y < 0, +-Inf^y ==> 0
3303 | ret
3304 |5:
3305 | mov dword [esp+4], 0x7f800000 // Return +Inf.
3306 | fld dword [esp+4]
3307 | ret
3308 |.endif
3309 |
3310 |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified. 2932 |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified.
3311 |// Needs 16 byte scratch area for x86. Also called from JIT code. 2933 |// Needs 16 byte scratch area for x86. Also called from JIT code.
3312 |->vm_pow_sse: 2934 |->vm_pow_sse:
3313 | cvtsd2si eax, xmm1 2935 | cvttsd2si eax, xmm1
3314 | cvtsi2sd xmm2, eax 2936 | cvtsi2sd xmm2, eax
3315 | ucomisd xmm1, xmm2 2937 | ucomisd xmm1, xmm2
3316 | jnz >8 // Branch for FP exponents. 2938 | jnz >8 // Branch for FP exponents.
3317 | jp >9 // Branch for NaN exponent. 2939 | jp >9 // Branch for NaN exponent.
3318 | // Fallthrough to vm_powi_sse. 2940 | // Fallthrough.
3319 | 2941 |
3320 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. 2942 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
3321 |->vm_powi_sse: 2943 |->vm_powi_sse:
@@ -3437,8 +3059,8 @@ static void build_subroutines(BuildCtx *ctx)
3437 | .else 3059 | .else
3438 | .define fpmop, CARG1d 3060 | .define fpmop, CARG1d
3439 | .endif 3061 | .endif
3440 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil 3062 | cmp fpmop, 1; jb ->vm_floor_sse; je ->vm_ceil_sse
3441 | cmp fpmop, 3; jb ->vm_trunc; ja >2 3063 | cmp fpmop, 3; jb ->vm_trunc_sse; ja >2
3442 | sqrtsd xmm0, xmm0; ret 3064 | sqrtsd xmm0, xmm0; ret
3443 |2: 3065 |2:
3444 | .if X64WIN 3066 | .if X64WIN
@@ -3478,14 +3100,13 @@ static void build_subroutines(BuildCtx *ctx)
3478 | ret 3100 | ret
3479 |.else // x86 calling convention. 3101 |.else // x86 calling convention.
3480 | .define fpmop, eax 3102 | .define fpmop, eax
3481 |.if SSE
3482 | mov fpmop, [esp+12] 3103 | mov fpmop, [esp+12]
3483 | movsd xmm0, qword [esp+4] 3104 | movsd xmm0, qword [esp+4]
3484 | cmp fpmop, 1; je >1; ja >2 3105 | cmp fpmop, 1; je >1; ja >2
3485 | call ->vm_floor; jmp >7 3106 | call ->vm_floor_sse; jmp >7
3486 |1: ; call ->vm_ceil; jmp >7 3107 |1: ; call ->vm_ceil_sse; jmp >7
3487 |2: ; cmp fpmop, 3; je >1; ja >2 3108 |2: ; cmp fpmop, 3; je >1; ja >2
3488 | call ->vm_trunc; jmp >7 3109 | call ->vm_trunc_sse; jmp >7
3489 |1: 3110 |1:
3490 | sqrtsd xmm0, xmm0 3111 | sqrtsd xmm0, xmm0
3491 |7: 3112 |7:
@@ -3503,23 +3124,6 @@ static void build_subroutines(BuildCtx *ctx)
3503 |2: ; cmp fpmop, 11; je >1; ja >9 3124 |2: ; cmp fpmop, 11; je >1; ja >9
3504 | fcos; ret 3125 | fcos; ret
3505 |1: ; fptan; fpop; ret 3126 |1: ; fptan; fpop; ret
3506 |.else
3507 | mov fpmop, [esp+12]
3508 | fld qword [esp+4]
3509 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
3510 | cmp fpmop, 3; jb ->vm_trunc; ja >2
3511 | fsqrt; ret
3512 |2: ; cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87
3513 | cmp fpmop, 7; je >1; ja >2
3514 | fldln2; fxch; fyl2x; ret
3515 |1: ; fld1; fxch; fyl2x; ret
3516 |2: ; cmp fpmop, 9; je >1; ja >2
3517 | fldlg2; fxch; fyl2x; ret
3518 |1: ; fsin; ret
3519 |2: ; cmp fpmop, 11; je >1; ja >9
3520 | fcos; ret
3521 |1: ; fptan; fpop; ret
3522 |.endif
3523 |.endif 3127 |.endif
3524 |9: ; int3 // Bad fpm. 3128 |9: ; int3 // Bad fpm.
3525 |.endif 3129 |.endif
@@ -3541,7 +3145,7 @@ static void build_subroutines(BuildCtx *ctx)
3541 |2: ; cmp foldop, 3; je >1; ja >2 3145 |2: ; cmp foldop, 3; je >1; ja >2
3542 | mulsd xmm0, xmm1; ret 3146 | mulsd xmm0, xmm1; ret
3543 |1: ; divsd xmm0, xmm1; ret 3147 |1: ; divsd xmm0, xmm1; ret
3544 |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow 3148 |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow_sse
3545 | cmp foldop, 7; je >1; ja >2 3149 | cmp foldop, 7; je >1; ja >2
3546 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret 3150 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret
3547 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret 3151 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret
@@ -3574,7 +3178,7 @@ static void build_subroutines(BuildCtx *ctx)
3574 |1: ; maxsd xmm0, xmm1; ret 3178 |1: ; maxsd xmm0, xmm1; ret
3575 |9: ; int3 // Bad op. 3179 |9: ; int3 // Bad op.
3576 | 3180 |
3577 |.elif SSE // x86 calling convention with SSE ops. 3181 |.else // x86 calling convention.
3578 | 3182 |
3579 | .define foldop, eax 3183 | .define foldop, eax
3580 | mov foldop, [esp+20] 3184 | mov foldop, [esp+20]
@@ -3593,7 +3197,7 @@ static void build_subroutines(BuildCtx *ctx)
3593 |2: ; cmp foldop, 5 3197 |2: ; cmp foldop, 5
3594 | je >1; ja >2 3198 | je >1; ja >2
3595 | call ->vm_mod; jmp <7 3199 | call ->vm_mod; jmp <7
3596 |1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area. 3200 |1: ; pop edx; call ->vm_pow_sse; push edx; jmp <7 // Writes to scratch area.
3597 |2: ; cmp foldop, 7; je >1; ja >2 3201 |2: ; cmp foldop, 7; je >1; ja >2
3598 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7 3202 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7
3599 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7 3203 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7
@@ -3608,29 +3212,6 @@ static void build_subroutines(BuildCtx *ctx)
3608 |1: ; maxsd xmm0, xmm1; jmp <7 3212 |1: ; maxsd xmm0, xmm1; jmp <7
3609 |9: ; int3 // Bad op. 3213 |9: ; int3 // Bad op.
3610 | 3214 |
3611 |.else // x86 calling convention with x87 ops.
3612 |
3613 | mov eax, [esp+20]
3614 | fld qword [esp+4]
3615 | fld qword [esp+12]
3616 | cmp eax, 1; je >1; ja >2
3617 | faddp st1; ret
3618 |1: ; fsubp st1; ret
3619 |2: ; cmp eax, 3; je >1; ja >2
3620 | fmulp st1; ret
3621 |1: ; fdivp st1; ret
3622 |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow
3623 | cmp eax, 7; je >1; ja >2
3624 | fpop; fchs; ret
3625 |1: ; fpop; fabs; ret
3626 |2: ; cmp eax, 9; je >1; ja >2
3627 | fpatan; ret
3628 |1: ; fxch; fscale; fpop1; ret
3629 |2: ; cmp eax, 11; je >1; ja >9
3630 | fucomi st1; fcmovnbe st1; fpop1; ret
3631 |1: ; fucomi st1; fcmovbe st1; fpop1; ret
3632 |9: ; int3 // Bad op.
3633 |
3634 |.endif 3215 |.endif
3635 | 3216 |
3636 |//----------------------------------------------------------------------- 3217 |//-----------------------------------------------------------------------
@@ -3943,19 +3524,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3943 | // RA is a number. 3524 | // RA is a number.
3944 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp 3525 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp
3945 | // RA is a number, RD is an integer. 3526 | // RA is a number, RD is an integer.
3946 |.if SSE
3947 | cvtsi2sd xmm0, dword [BASE+RD*8] 3527 | cvtsi2sd xmm0, dword [BASE+RD*8]
3948 | jmp >2 3528 | jmp >2
3949 |.else
3950 | fld qword [BASE+RA*8]
3951 | fild dword [BASE+RD*8]
3952 | jmp >3
3953 |.endif
3954 | 3529 |
3955 |8: // RA is an integer, RD is not an integer. 3530 |8: // RA is an integer, RD is not an integer.
3956 | ja ->vmeta_comp 3531 | ja ->vmeta_comp
3957 | // RA is an integer, RD is a number. 3532 | // RA is an integer, RD is a number.
3958 |.if SSE
3959 | cvtsi2sd xmm1, dword [BASE+RA*8] 3533 | cvtsi2sd xmm1, dword [BASE+RA*8]
3960 | movsd xmm0, qword [BASE+RD*8] 3534 | movsd xmm0, qword [BASE+RD*8]
3961 | add PC, 4 3535 | add PC, 4
@@ -3963,29 +3537,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3963 | jmp_comp jbe, ja, jb, jae, <9 3537 | jmp_comp jbe, ja, jb, jae, <9
3964 | jmp <6 3538 | jmp <6
3965 |.else 3539 |.else
3966 | fild dword [BASE+RA*8]
3967 | jmp >2
3968 |.endif
3969 |.else
3970 | checknum RA, ->vmeta_comp 3540 | checknum RA, ->vmeta_comp
3971 | checknum RD, ->vmeta_comp 3541 | checknum RD, ->vmeta_comp
3972 |.endif 3542 |.endif
3973 |.if SSE
3974 |1: 3543 |1:
3975 | movsd xmm0, qword [BASE+RD*8] 3544 | movsd xmm0, qword [BASE+RD*8]
3976 |2: 3545 |2:
3977 | add PC, 4 3546 | add PC, 4
3978 | ucomisd xmm0, qword [BASE+RA*8] 3547 | ucomisd xmm0, qword [BASE+RA*8]
3979 |3: 3548 |3:
3980 |.else
3981 |1:
3982 | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A.
3983 |2:
3984 | fld qword [BASE+RD*8]
3985 |3:
3986 | add PC, 4
3987 | fcomparepp
3988 |.endif
3989 | // Unordered: all of ZF CF PF set, ordered: PF clear. 3549 | // Unordered: all of ZF CF PF set, ordered: PF clear.
3990 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. 3550 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
3991 |.if DUALNUM 3551 |.if DUALNUM
@@ -4025,43 +3585,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4025 | // RD is a number. 3585 | // RD is a number.
4026 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 3586 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5
4027 | // RD is a number, RA is an integer. 3587 | // RD is a number, RA is an integer.
4028 |.if SSE
4029 | cvtsi2sd xmm0, dword [BASE+RA*8] 3588 | cvtsi2sd xmm0, dword [BASE+RA*8]
4030 |.else
4031 | fild dword [BASE+RA*8]
4032 |.endif
4033 | jmp >2 3589 | jmp >2
4034 | 3590 |
4035 |8: // RD is an integer, RA is not an integer. 3591 |8: // RD is an integer, RA is not an integer.
4036 | ja >5 3592 | ja >5
4037 | // RD is an integer, RA is a number. 3593 | // RD is an integer, RA is a number.
4038 |.if SSE
4039 | cvtsi2sd xmm0, dword [BASE+RD*8] 3594 | cvtsi2sd xmm0, dword [BASE+RD*8]
4040 | ucomisd xmm0, qword [BASE+RA*8] 3595 | ucomisd xmm0, qword [BASE+RA*8]
4041 |.else
4042 | fild dword [BASE+RD*8]
4043 | fld qword [BASE+RA*8]
4044 |.endif
4045 | jmp >4 3596 | jmp >4
4046 | 3597 |
4047 |.else 3598 |.else
4048 | cmp RB, LJ_TISNUM; jae >5 3599 | cmp RB, LJ_TISNUM; jae >5
4049 | checknum RA, >5 3600 | checknum RA, >5
4050 |.endif 3601 |.endif
4051 |.if SSE
4052 |1: 3602 |1:
4053 | movsd xmm0, qword [BASE+RA*8] 3603 | movsd xmm0, qword [BASE+RA*8]
4054 |2: 3604 |2:
4055 | ucomisd xmm0, qword [BASE+RD*8] 3605 | ucomisd xmm0, qword [BASE+RD*8]
4056 |4: 3606 |4:
4057 |.else
4058 |1:
4059 | fld qword [BASE+RA*8]
4060 |2:
4061 | fld qword [BASE+RD*8]
4062 |4:
4063 | fcomparepp
4064 |.endif
4065 iseqne_fp: 3607 iseqne_fp:
4066 if (vk) { 3608 if (vk) {
4067 | jp >2 // Unordered means not equal. 3609 | jp >2 // Unordered means not equal.
@@ -4184,39 +3726,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4184 | // RA is a number. 3726 | // RA is a number.
4185 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 3727 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1
4186 | // RA is a number, RD is an integer. 3728 | // RA is a number, RD is an integer.
4187 |.if SSE
4188 | cvtsi2sd xmm0, dword [KBASE+RD*8] 3729 | cvtsi2sd xmm0, dword [KBASE+RD*8]
4189 |.else
4190 | fild dword [KBASE+RD*8]
4191 |.endif
4192 | jmp >2 3730 | jmp >2
4193 | 3731 |
4194 |8: // RA is an integer, RD is a number. 3732 |8: // RA is an integer, RD is a number.
4195 |.if SSE
4196 | cvtsi2sd xmm0, dword [BASE+RA*8] 3733 | cvtsi2sd xmm0, dword [BASE+RA*8]
4197 | ucomisd xmm0, qword [KBASE+RD*8] 3734 | ucomisd xmm0, qword [KBASE+RD*8]
4198 |.else
4199 | fild dword [BASE+RA*8]
4200 | fld qword [KBASE+RD*8]
4201 |.endif
4202 | jmp >4 3735 | jmp >4
4203 |.else 3736 |.else
4204 | cmp RB, LJ_TISNUM; jae >3 3737 | cmp RB, LJ_TISNUM; jae >3
4205 |.endif 3738 |.endif
4206 |.if SSE
4207 |1: 3739 |1:
4208 | movsd xmm0, qword [KBASE+RD*8] 3740 | movsd xmm0, qword [KBASE+RD*8]
4209 |2: 3741 |2:
4210 | ucomisd xmm0, qword [BASE+RA*8] 3742 | ucomisd xmm0, qword [BASE+RA*8]
4211 |4: 3743 |4:
4212 |.else
4213 |1:
4214 | fld qword [KBASE+RD*8]
4215 |2:
4216 | fld qword [BASE+RA*8]
4217 |4:
4218 | fcomparepp
4219 |.endif
4220 goto iseqne_fp; 3744 goto iseqne_fp;
4221 case BC_ISEQP: case BC_ISNEP: 3745 case BC_ISEQP: case BC_ISNEP:
4222 vk = op == BC_ISEQP; 3746 vk = op == BC_ISEQP;
@@ -4267,6 +3791,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4267 | ins_next 3791 | ins_next
4268 break; 3792 break;
4269 3793
3794 case BC_ISTYPE:
3795 | ins_AD // RA = src, RD = -type
3796 | add RD, [BASE+RA*8+4]
3797 | jne ->vmeta_istype
3798 | ins_next
3799 break;
3800 case BC_ISNUM:
3801 | ins_AD // RA = src, RD = -(TISNUM-1)
3802 | checknum RA, ->vmeta_istype
3803 | ins_next
3804 break;
3805
4270 /* -- Unary ops --------------------------------------------------------- */ 3806 /* -- Unary ops --------------------------------------------------------- */
4271 3807
4272 case BC_MOV: 3808 case BC_MOV:
@@ -4310,16 +3846,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4310 |.else 3846 |.else
4311 | checknum RD, ->vmeta_unm 3847 | checknum RD, ->vmeta_unm
4312 |.endif 3848 |.endif
4313 |.if SSE
4314 | movsd xmm0, qword [BASE+RD*8] 3849 | movsd xmm0, qword [BASE+RD*8]
4315 | sseconst_sign xmm1, RDa 3850 | sseconst_sign xmm1, RDa
4316 | xorps xmm0, xmm1 3851 | xorps xmm0, xmm1
4317 | movsd qword [BASE+RA*8], xmm0 3852 | movsd qword [BASE+RA*8], xmm0
4318 |.else
4319 | fld qword [BASE+RD*8]
4320 | fchs
4321 | fstp qword [BASE+RA*8]
4322 |.endif
4323 |.if DUALNUM 3853 |.if DUALNUM
4324 | jmp <9 3854 | jmp <9
4325 |.else 3855 |.else
@@ -4335,15 +3865,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4335 |1: 3865 |1:
4336 | mov dword [BASE+RA*8+4], LJ_TISNUM 3866 | mov dword [BASE+RA*8+4], LJ_TISNUM
4337 | mov dword [BASE+RA*8], RD 3867 | mov dword [BASE+RA*8], RD
4338 |.elif SSE 3868 |.else
4339 | xorps xmm0, xmm0 3869 | xorps xmm0, xmm0
4340 | cvtsi2sd xmm0, dword STR:RD->len 3870 | cvtsi2sd xmm0, dword STR:RD->len
4341 |1: 3871 |1:
4342 | movsd qword [BASE+RA*8], xmm0 3872 | movsd qword [BASE+RA*8], xmm0
4343 |.else
4344 | fild dword STR:RD->len
4345 |1:
4346 | fstp qword [BASE+RA*8]
4347 |.endif 3873 |.endif
4348 | ins_next 3874 | ins_next
4349 |2: 3875 |2:
@@ -4361,11 +3887,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4361 | // Length of table returned in eax (RD). 3887 | // Length of table returned in eax (RD).
4362 |.if DUALNUM 3888 |.if DUALNUM
4363 | // Nothing to do. 3889 | // Nothing to do.
4364 |.elif SSE
4365 | cvtsi2sd xmm0, RD
4366 |.else 3890 |.else
4367 | mov ARG1, RD 3891 | cvtsi2sd xmm0, RD
4368 | fild ARG1
4369 |.endif 3892 |.endif
4370 | mov BASE, RB // Restore BASE. 3893 | mov BASE, RB // Restore BASE.
4371 | movzx RA, PC_RA 3894 | movzx RA, PC_RA
@@ -4380,7 +3903,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4380 3903
4381 /* -- Binary ops -------------------------------------------------------- */ 3904 /* -- Binary ops -------------------------------------------------------- */
4382 3905
4383 |.macro ins_arithpre, x87ins, sseins, ssereg 3906 |.macro ins_arithpre, sseins, ssereg
4384 | ins_ABC 3907 | ins_ABC
4385 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3908 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
4386 ||switch (vk) { 3909 ||switch (vk) {
@@ -4389,37 +3912,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4389 | .if DUALNUM 3912 | .if DUALNUM
4390 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn 3913 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn
4391 | .endif 3914 | .endif
4392 | .if SSE 3915 | movsd xmm0, qword [BASE+RB*8]
4393 | movsd xmm0, qword [BASE+RB*8] 3916 | sseins ssereg, qword [KBASE+RC*8]
4394 | sseins ssereg, qword [KBASE+RC*8]
4395 | .else
4396 | fld qword [BASE+RB*8]
4397 | x87ins qword [KBASE+RC*8]
4398 | .endif
4399 || break; 3917 || break;
4400 ||case 1: 3918 ||case 1:
4401 | checknum RB, ->vmeta_arith_nv 3919 | checknum RB, ->vmeta_arith_nv
4402 | .if DUALNUM 3920 | .if DUALNUM
4403 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv 3921 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv
4404 | .endif 3922 | .endif
4405 | .if SSE 3923 | movsd xmm0, qword [KBASE+RC*8]
4406 | movsd xmm0, qword [KBASE+RC*8] 3924 | sseins ssereg, qword [BASE+RB*8]
4407 | sseins ssereg, qword [BASE+RB*8]
4408 | .else
4409 | fld qword [KBASE+RC*8]
4410 | x87ins qword [BASE+RB*8]
4411 | .endif
4412 || break; 3925 || break;
4413 ||default: 3926 ||default:
4414 | checknum RB, ->vmeta_arith_vv 3927 | checknum RB, ->vmeta_arith_vv
4415 | checknum RC, ->vmeta_arith_vv 3928 | checknum RC, ->vmeta_arith_vv
4416 | .if SSE 3929 | movsd xmm0, qword [BASE+RB*8]
4417 | movsd xmm0, qword [BASE+RB*8] 3930 | sseins ssereg, qword [BASE+RC*8]
4418 | sseins ssereg, qword [BASE+RC*8]
4419 | .else
4420 | fld qword [BASE+RB*8]
4421 | x87ins qword [BASE+RC*8]
4422 | .endif
4423 || break; 3931 || break;
4424 ||} 3932 ||}
4425 |.endmacro 3933 |.endmacro
@@ -4457,54 +3965,50 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4457 |.endmacro 3965 |.endmacro
4458 | 3966 |
4459 |.macro ins_arithpost 3967 |.macro ins_arithpost
4460 |.if SSE
4461 | movsd qword [BASE+RA*8], xmm0 3968 | movsd qword [BASE+RA*8], xmm0
4462 |.else
4463 | fstp qword [BASE+RA*8]
4464 |.endif
4465 |.endmacro 3969 |.endmacro
4466 | 3970 |
4467 |.macro ins_arith, x87ins, sseins 3971 |.macro ins_arith, sseins
4468 | ins_arithpre x87ins, sseins, xmm0 3972 | ins_arithpre sseins, xmm0
4469 | ins_arithpost 3973 | ins_arithpost
4470 | ins_next 3974 | ins_next
4471 |.endmacro 3975 |.endmacro
4472 | 3976 |
4473 |.macro ins_arith, intins, x87ins, sseins 3977 |.macro ins_arith, intins, sseins
4474 |.if DUALNUM 3978 |.if DUALNUM
4475 | ins_arithdn intins 3979 | ins_arithdn intins
4476 |.else 3980 |.else
4477 | ins_arith, x87ins, sseins 3981 | ins_arith, sseins
4478 |.endif 3982 |.endif
4479 |.endmacro 3983 |.endmacro
4480 3984
4481 | // RA = dst, RB = src1 or num const, RC = src2 or num const 3985 | // RA = dst, RB = src1 or num const, RC = src2 or num const
4482 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 3986 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
4483 | ins_arith add, fadd, addsd 3987 | ins_arith add, addsd
4484 break; 3988 break;
4485 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 3989 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
4486 | ins_arith sub, fsub, subsd 3990 | ins_arith sub, subsd
4487 break; 3991 break;
4488 case BC_MULVN: case BC_MULNV: case BC_MULVV: 3992 case BC_MULVN: case BC_MULNV: case BC_MULVV:
4489 | ins_arith imul, fmul, mulsd 3993 | ins_arith imul, mulsd
4490 break; 3994 break;
4491 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 3995 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
4492 | ins_arith fdiv, divsd 3996 | ins_arith divsd
4493 break; 3997 break;
4494 case BC_MODVN: 3998 case BC_MODVN:
4495 | ins_arithpre fld, movsd, xmm1 3999 | ins_arithpre movsd, xmm1
4496 |->BC_MODVN_Z: 4000 |->BC_MODVN_Z:
4497 | call ->vm_mod 4001 | call ->vm_mod
4498 | ins_arithpost 4002 | ins_arithpost
4499 | ins_next 4003 | ins_next
4500 break; 4004 break;
4501 case BC_MODNV: case BC_MODVV: 4005 case BC_MODNV: case BC_MODVV:
4502 | ins_arithpre fld, movsd, xmm1 4006 | ins_arithpre movsd, xmm1
4503 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 4007 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
4504 break; 4008 break;
4505 case BC_POW: 4009 case BC_POW:
4506 | ins_arithpre fld, movsd, xmm1 4010 | ins_arithpre movsd, xmm1
4507 | call ->vm_pow 4011 | call ->vm_pow_sse
4508 | ins_arithpost 4012 | ins_arithpost
4509 | ins_next 4013 | ins_next
4510 break; 4014 break;
@@ -4573,25 +4077,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4573 | movsx RD, RDW 4077 | movsx RD, RDW
4574 | mov dword [BASE+RA*8+4], LJ_TISNUM 4078 | mov dword [BASE+RA*8+4], LJ_TISNUM
4575 | mov dword [BASE+RA*8], RD 4079 | mov dword [BASE+RA*8], RD
4576 |.elif SSE 4080 |.else
4577 | movsx RD, RDW // Sign-extend literal. 4081 | movsx RD, RDW // Sign-extend literal.
4578 | cvtsi2sd xmm0, RD 4082 | cvtsi2sd xmm0, RD
4579 | movsd qword [BASE+RA*8], xmm0 4083 | movsd qword [BASE+RA*8], xmm0
4580 |.else
4581 | fild PC_RD // Refetch signed RD from instruction.
4582 | fstp qword [BASE+RA*8]
4583 |.endif 4084 |.endif
4584 | ins_next 4085 | ins_next
4585 break; 4086 break;
4586 case BC_KNUM: 4087 case BC_KNUM:
4587 | ins_AD // RA = dst, RD = num const 4088 | ins_AD // RA = dst, RD = num const
4588 |.if SSE
4589 | movsd xmm0, qword [KBASE+RD*8] 4089 | movsd xmm0, qword [KBASE+RD*8]
4590 | movsd qword [BASE+RA*8], xmm0 4090 | movsd qword [BASE+RA*8], xmm0
4591 |.else
4592 | fld qword [KBASE+RD*8]
4593 | fstp qword [BASE+RA*8]
4594 |.endif
4595 | ins_next 4091 | ins_next
4596 break; 4092 break;
4597 case BC_KPRI: 4093 case BC_KPRI:
@@ -4698,18 +4194,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4698 case BC_USETN: 4194 case BC_USETN:
4699 | ins_AD // RA = upvalue #, RD = num const 4195 | ins_AD // RA = upvalue #, RD = num const
4700 | mov LFUNC:RB, [BASE-8] 4196 | mov LFUNC:RB, [BASE-8]
4701 |.if SSE
4702 | movsd xmm0, qword [KBASE+RD*8] 4197 | movsd xmm0, qword [KBASE+RD*8]
4703 |.else
4704 | fld qword [KBASE+RD*8]
4705 |.endif
4706 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] 4198 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
4707 | mov RA, UPVAL:RB->v 4199 | mov RA, UPVAL:RB->v
4708 |.if SSE
4709 | movsd qword [RA], xmm0 4200 | movsd qword [RA], xmm0
4710 |.else
4711 | fstp qword [RA]
4712 |.endif
4713 | ins_next 4201 | ins_next
4714 break; 4202 break;
4715 case BC_USETP: 4203 case BC_USETP:
@@ -4863,18 +4351,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4863 |.else 4351 |.else
4864 | // Convert number to int and back and compare. 4352 | // Convert number to int and back and compare.
4865 | checknum RC, >5 4353 | checknum RC, >5
4866 |.if SSE
4867 | movsd xmm0, qword [BASE+RC*8] 4354 | movsd xmm0, qword [BASE+RC*8]
4868 | cvtsd2si RC, xmm0 4355 | cvttsd2si RC, xmm0
4869 | cvtsi2sd xmm1, RC 4356 | cvtsi2sd xmm1, RC
4870 | ucomisd xmm0, xmm1 4357 | ucomisd xmm0, xmm1
4871 |.else
4872 | fld qword [BASE+RC*8]
4873 | fist ARG1
4874 | fild ARG1
4875 | fcomparepp
4876 | mov RC, ARG1
4877 |.endif
4878 | jne ->vmeta_tgetv // Generic numeric key? Use fallback. 4358 | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
4879 |.endif 4359 |.endif
4880 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4360 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
@@ -4998,6 +4478,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4998 | mov dword [BASE+RA*8+4], LJ_TNIL 4478 | mov dword [BASE+RA*8+4], LJ_TNIL
4999 | jmp <1 4479 | jmp <1
5000 break; 4480 break;
4481 case BC_TGETR:
4482 | ins_ABC // RA = dst, RB = table, RC = key
4483 | mov TAB:RB, [BASE+RB*8]
4484 |.if DUALNUM
4485 | mov RC, dword [BASE+RC*8]
4486 |.else
4487 | cvttsd2si RC, qword [BASE+RC*8]
4488 |.endif
4489 | cmp RC, TAB:RB->asize
4490 | jae ->vmeta_tgetr // Not in array part? Use fallback.
4491 | shl RC, 3
4492 | add RC, TAB:RB->array
4493 | // Get array slot.
4494 |->BC_TGETR_Z:
4495 |.if X64
4496 | mov RBa, [RC]
4497 | mov [BASE+RA*8], RBa
4498 |.else
4499 | mov RB, [RC]
4500 | mov RC, [RC+4]
4501 | mov [BASE+RA*8], RB
4502 | mov [BASE+RA*8+4], RC
4503 |.endif
4504 |->BC_TGETR2_Z:
4505 | ins_next
4506 break;
5001 4507
5002 case BC_TSETV: 4508 case BC_TSETV:
5003 | ins_ABC // RA = src, RB = table, RC = key 4509 | ins_ABC // RA = src, RB = table, RC = key
@@ -5011,18 +4517,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5011 |.else 4517 |.else
5012 | // Convert number to int and back and compare. 4518 | // Convert number to int and back and compare.
5013 | checknum RC, >5 4519 | checknum RC, >5
5014 |.if SSE
5015 | movsd xmm0, qword [BASE+RC*8] 4520 | movsd xmm0, qword [BASE+RC*8]
5016 | cvtsd2si RC, xmm0 4521 | cvttsd2si RC, xmm0
5017 | cvtsi2sd xmm1, RC 4522 | cvtsi2sd xmm1, RC
5018 | ucomisd xmm0, xmm1 4523 | ucomisd xmm0, xmm1
5019 |.else
5020 | fld qword [BASE+RC*8]
5021 | fist ARG1
5022 | fild ARG1
5023 | fcomparepp
5024 | mov RC, ARG1
5025 |.endif
5026 | jne ->vmeta_tsetv // Generic numeric key? Use fallback. 4524 | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
5027 |.endif 4525 |.endif
5028 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4526 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
@@ -5192,6 +4690,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5192 | movzx RA, PC_RA // Restore RA. 4690 | movzx RA, PC_RA // Restore RA.
5193 | jmp <2 4691 | jmp <2
5194 break; 4692 break;
4693 case BC_TSETR:
4694 | ins_ABC // RA = src, RB = table, RC = key
4695 | mov TAB:RB, [BASE+RB*8]
4696 |.if DUALNUM
4697 | mov RC, dword [BASE+RC*8]
4698 |.else
4699 | cvttsd2si RC, qword [BASE+RC*8]
4700 |.endif
4701 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
4702 | jnz >7
4703 |2:
4704 | cmp RC, TAB:RB->asize
4705 | jae ->vmeta_tsetr
4706 | shl RC, 3
4707 | add RC, TAB:RB->array
4708 | // Set array slot.
4709 |->BC_TSETR_Z:
4710 |.if X64
4711 | mov RBa, [BASE+RA*8]
4712 | mov [RC], RBa
4713 |.else
4714 | mov RB, [BASE+RA*8+4]
4715 | mov RA, [BASE+RA*8]
4716 | mov [RC+4], RB
4717 | mov [RC], RA
4718 |.endif
4719 | ins_next
4720 |
4721 |7: // Possible table write barrier for the value. Skip valiswhite check.
4722 | barrierback TAB:RB, RA
4723 | movzx RA, PC_RA // Restore RA.
4724 | jmp <2
4725 break;
5195 4726
5196 case BC_TSETM: 4727 case BC_TSETM:
5197 | ins_AD // RA = base (table at base-1), RD = num const (start index) 4728 | ins_AD // RA = base (table at base-1), RD = num const (start index)
@@ -5386,10 +4917,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5386 |.if DUALNUM 4917 |.if DUALNUM
5387 | mov dword [BASE+RA*8+4], LJ_TISNUM 4918 | mov dword [BASE+RA*8+4], LJ_TISNUM
5388 | mov dword [BASE+RA*8], RC 4919 | mov dword [BASE+RA*8], RC
5389 |.elif SSE
5390 | cvtsi2sd xmm0, RC
5391 |.else 4920 |.else
5392 | fild dword [BASE+RA*8-8] 4921 | cvtsi2sd xmm0, RC
5393 |.endif 4922 |.endif
5394 | // Copy array slot to returned value. 4923 | // Copy array slot to returned value.
5395 |.if X64 4924 |.if X64
@@ -5405,10 +4934,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5405 | // Return array index as a numeric key. 4934 | // Return array index as a numeric key.
5406 |.if DUALNUM 4935 |.if DUALNUM
5407 | // See above. 4936 | // See above.
5408 |.elif SSE
5409 | movsd qword [BASE+RA*8], xmm0
5410 |.else 4937 |.else
5411 | fstp qword [BASE+RA*8] 4938 | movsd qword [BASE+RA*8], xmm0
5412 |.endif 4939 |.endif
5413 | mov [BASE+RA*8-8], RC // Update control var. 4940 | mov [BASE+RA*8-8], RC // Update control var.
5414 |2: 4941 |2:
@@ -5421,9 +4948,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5421 | 4948 |
5422 |4: // Skip holes in array part. 4949 |4: // Skip holes in array part.
5423 | add RC, 1 4950 | add RC, 1
5424 |.if not (DUALNUM or SSE)
5425 | mov [BASE+RA*8-8], RC
5426 |.endif
5427 | jmp <1 4951 | jmp <1
5428 | 4952 |
5429 |5: // Traverse hash part. 4953 |5: // Traverse hash part.
@@ -5757,7 +5281,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5757 if (!vk) { 5281 if (!vk) {
5758 | cmp RB, LJ_TISNUM; jae ->vmeta_for 5282 | cmp RB, LJ_TISNUM; jae ->vmeta_for
5759 } 5283 }
5760 |.if SSE
5761 | movsd xmm0, qword FOR_IDX 5284 | movsd xmm0, qword FOR_IDX
5762 | movsd xmm1, qword FOR_STOP 5285 | movsd xmm1, qword FOR_STOP
5763 if (vk) { 5286 if (vk) {
@@ -5770,22 +5293,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5770 | ucomisd xmm1, xmm0 5293 | ucomisd xmm1, xmm0
5771 |1: 5294 |1:
5772 | movsd qword FOR_EXT, xmm0 5295 | movsd qword FOR_EXT, xmm0
5773 |.else
5774 | fld qword FOR_STOP
5775 | fld qword FOR_IDX
5776 if (vk) {
5777 | fadd qword FOR_STEP // nidx = idx + step
5778 | fst qword FOR_IDX
5779 | fst qword FOR_EXT
5780 | test RB, RB; js >1
5781 } else {
5782 | fst qword FOR_EXT
5783 | jl >1
5784 }
5785 | fxch // Swap lim/(n)idx if step non-negative.
5786 |1:
5787 | fcomparepp
5788 |.endif
5789 if (op == BC_FORI) { 5296 if (op == BC_FORI) {
5790 |.if DUALNUM 5297 |.if DUALNUM
5791 | jnb <7 5298 | jnb <7
@@ -5813,11 +5320,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5813 |2: 5320 |2:
5814 | ins_next 5321 | ins_next
5815 |.endif 5322 |.endif
5816 |.if SSE 5323 |
5817 |3: // Invert comparison if step is negative. 5324 |3: // Invert comparison if step is negative.
5818 | ucomisd xmm0, xmm1 5325 | ucomisd xmm0, xmm1
5819 | jmp <1 5326 | jmp <1
5820 |.endif
5821 break; 5327 break;
5822 5328
5823 case BC_ITERL: 5329 case BC_ITERL:
@@ -5875,6 +5381,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5875 | mov L:RB, SAVE_L 5381 | mov L:RB, SAVE_L
5876 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE 5382 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
5877 | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB 5383 | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB
5384 | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
5878 | // Save additional callee-save registers only used in compiled code. 5385 | // Save additional callee-save registers only used in compiled code.
5879 |.if X64WIN 5386 |.if X64WIN
5880 | mov TMPQ, r12 5387 | mov TMPQ, r12