aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Makefile19
-rw-r--r--src/Makefile.dep132
-rw-r--r--src/host/buildvm_lib.c59
-rw-r--r--src/host/buildvm_libbc.h28
-rw-r--r--src/host/genlibbc.lua197
-rw-r--r--src/jit/bc.lua2
-rw-r--r--src/jit/bcsave.lua2
-rw-r--r--src/jit/dump.lua5
-rw-r--r--src/jit/v.lua2
-rw-r--r--src/lib_base.c18
-rw-r--r--src/lib_bit.c98
-rw-r--r--src/lib_io.c24
-rw-r--r--src/lib_jit.c24
-rw-r--r--src/lib_math.c7
-rw-r--r--src/lib_os.c34
-rw-r--r--src/lib_string.c234
-rw-r--r--src/lib_table.c137
-rw-r--r--src/lj_arch.h1
-rw-r--r--src/lj_asm.c2
-rw-r--r--src/lj_asm_mips.h2
-rw-r--r--src/lj_asm_x86.h30
-rw-r--r--src/lj_bc.h4
-rw-r--r--src/lj_bcdump.h3
-rw-r--r--src/lj_bcread.c136
-rw-r--r--src/lj_bcwrite.c226
-rw-r--r--src/lj_buf.c87
-rw-r--r--src/lj_buf.h76
-rw-r--r--src/lj_carith.c76
-rw-r--r--src/lj_carith.h10
-rw-r--r--src/lj_cparse.c38
-rw-r--r--src/lj_crecord.c106
-rw-r--r--src/lj_crecord.h6
-rw-r--r--src/lj_ctype.c2
-rw-r--r--src/lj_debug.c36
-rw-r--r--src/lj_debug.h2
-rw-r--r--src/lj_dispatch.h12
-rw-r--r--src/lj_emit_x86.h10
-rw-r--r--src/lj_err.c2
-rw-r--r--src/lj_ffrecord.c114
-rw-r--r--src/lj_gc.c4
-rw-r--r--src/lj_gdbjit.c13
-rw-r--r--src/lj_ir.h1
-rw-r--r--src/lj_ircall.h7
-rw-r--r--src/lj_jit.h17
-rw-r--r--src/lj_lex.c341
-rw-r--r--src/lj_lex.h17
-rw-r--r--src/lj_lib.c27
-rw-r--r--src/lj_lib.h4
-rw-r--r--src/lj_load.c3
-rw-r--r--src/lj_meta.c22
-rw-r--r--src/lj_meta.h1
-rw-r--r--src/lj_obj.h19
-rw-r--r--src/lj_opt_fold.c68
-rw-r--r--src/lj_opt_loop.c4
-rw-r--r--src/lj_opt_narrow.c3
-rw-r--r--src/lj_opt_split.c130
-rw-r--r--src/lj_parse.c165
-rw-r--r--src/lj_record.c16
-rw-r--r--src/lj_state.c5
-rw-r--r--src/lj_str.c140
-rw-r--r--src/lj_str.h17
-rw-r--r--src/lj_tab.h2
-rw-r--r--src/lj_target_arm.h4
-rw-r--r--src/lj_target_x86.h2
-rw-r--r--src/lj_vm.h4
-rw-r--r--src/ljamalg.c1
-rw-r--r--src/luaconf.h4
-rw-r--r--src/luajit.h6
-rw-r--r--src/msvcbuild.bat1
-rw-r--r--src/vm_arm.dasc138
-rw-r--r--src/vm_mips.dasc164
-rw-r--r--src/vm_ppc.dasc144
-rw-r--r--src/vm_ppcspe.dasc6
-rw-r--r--src/vm_x86.dasc872
74 files changed, 2455 insertions, 1920 deletions
diff --git a/src/Makefile b/src/Makefile
index 278324a1..0065b8c2 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -42,13 +42,10 @@ CCOPT= -O2 -fomit-frame-pointer
42# 42#
43# Target-specific compiler options: 43# Target-specific compiler options:
44# 44#
45# x86 only: it's recommended to compile at least for i686. Better yet,
46# compile for an architecture that has SSE2, too (-msse -msse2).
47#
48# x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute 45# x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute
49# the binaries to a different machine you could also use: -march=native 46# the binaries to a different machine you could also use: -march=native
50# 47#
51CCOPT_x86= -march=i686 48CCOPT_x86= -march=i686 -msse -msse2 -mfpmath=sse
52CCOPT_x64= 49CCOPT_x64=
53CCOPT_arm= 50CCOPT_arm=
54CCOPT_ppc= 51CCOPT_ppc=
@@ -394,11 +391,6 @@ DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subs
394ifeq (Windows,$(TARGET_SYS)) 391ifeq (Windows,$(TARGET_SYS))
395 DASM_AFLAGS+= -D WIN 392 DASM_AFLAGS+= -D WIN
396endif 393endif
397ifeq (x86,$(TARGET_LJARCH))
398 ifneq (,$(findstring __SSE2__ 1,$(TARGET_TESTARCH)))
399 DASM_AFLAGS+= -D SSE
400 endif
401else
402ifeq (x64,$(TARGET_LJARCH)) 394ifeq (x64,$(TARGET_LJARCH))
403 DASM_ARCH= x86 395 DASM_ARCH= x86
404else 396else
@@ -423,7 +415,6 @@ ifeq (ppc,$(TARGET_LJARCH))
423endif 415endif
424endif 416endif
425endif 417endif
426endif
427 418
428DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS) 419DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS)
429DASM_DASC= vm_$(DASM_ARCH).dasc 420DASM_DASC= vm_$(DASM_ARCH).dasc
@@ -445,7 +436,7 @@ LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \
445 lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o 436 lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o
446LJLIB_C= $(LJLIB_O:.o=.c) 437LJLIB_C= $(LJLIB_O:.o=.c)
447 438
448LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o \ 439LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
449 lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \ 440 lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \
450 lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \ 441 lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \
451 lj_api.o lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \ 442 lj_api.o lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
@@ -567,6 +558,10 @@ amalg:
567clean: 558clean:
568 $(HOST_RM) $(ALL_RM) 559 $(HOST_RM) $(ALL_RM)
569 560
561libbc:
562 ./$(LUAJIT_T) host/genlibbc.lua -o host/buildvm_libbc.h $(LJLIB_C)
563 $(MAKE) all
564
570depend: 565depend:
571 @for file in $(ALL_HDRGEN); do \ 566 @for file in $(ALL_HDRGEN); do \
572 test -f $$file || touch $$file; \ 567 test -f $$file || touch $$file; \
@@ -581,7 +576,7 @@ depend:
581 test -s $$file || $(HOST_RM) $$file; \ 576 test -s $$file || $(HOST_RM) $$file; \
582 done 577 done
583 578
584.PHONY: default all amalg clean depend 579.PHONY: default all amalg clean libbc depend
585 580
586############################################################################## 581##############################################################################
587# Rules for generated files. 582# Rules for generated files.
diff --git a/src/Makefile.dep b/src/Makefile.dep
index 5d91723a..82834811 100644
--- a/src/Makefile.dep
+++ b/src/Makefile.dep
@@ -7,7 +7,8 @@ lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
7 lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \ 7 lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \
8 lj_lib.h lj_libdef.h 8 lj_lib.h lj_libdef.h
9lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ 9lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
10 lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_lib.h lj_libdef.h 10 lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ctype.h lj_gc.h lj_cdata.h \
11 lj_cconv.h lj_carith.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
11lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 12lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
12 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \ 13 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \
13 lj_libdef.h 14 lj_libdef.h
@@ -17,8 +18,8 @@ lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
17 lj_ccallback.h lj_clib.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h 18 lj_ccallback.h lj_clib.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
18lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h 19lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h
19lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ 20lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
20 lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_ff.h lj_ffdef.h \ 21 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_state.h \
21 lj_lib.h lj_libdef.h 22 lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
22lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \ 23lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \
23 lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \ 24 lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \
24 lj_bc.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_target.h \ 25 lj_bc.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_target.h \
@@ -27,16 +28,17 @@ lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \
27lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 28lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
28 lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_libdef.h 29 lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_libdef.h
29lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ 30lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
30 lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h 31 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_lib.h \
32 lj_libdef.h
31lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 33lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
32 lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h 34 lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h
33lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 35lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
34 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h \ 36 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
35 lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h lj_char.h \ 37 lj_tab.h lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h \
36 lj_lib.h lj_libdef.h 38 lj_char.h lj_lib.h lj_libdef.h
37lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 39lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
38 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_lib.h \ 40 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
39 lj_libdef.h 41 lj_tab.h lj_lib.h lj_libdef.h
40lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h 42lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h
41lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 43lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
42 lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \ 44 lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
@@ -50,14 +52,16 @@ lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
50lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \ 52lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \
51 lj_bcdef.h 53 lj_bcdef.h
52lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 54lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
53 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_bc.h lj_ctype.h \ 55 lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_bc.h \
54 lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h 56 lj_ctype.h lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h
55lj_bcwrite.o: lj_bcwrite.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 57lj_bcwrite.o: lj_bcwrite.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
56 lj_gc.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h lj_ir.h \ 58 lj_gc.h lj_buf.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h \
57 lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h 59 lj_ir.h lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h
60lj_buf.o: lj_buf.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
61 lj_err.h lj_errmsg.h lj_buf.h lj_str.h
58lj_carith.o: lj_carith.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 62lj_carith.o: lj_carith.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
59 lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ctype.h lj_cconv.h \ 63 lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ir.h lj_ctype.h \
60 lj_cdata.h lj_carith.h 64 lj_cconv.h lj_cdata.h lj_carith.h lj_strscan.h
61lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 65lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
62 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \ 66 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \
63 lj_cdata.h lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ 67 lj_cdata.h lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
@@ -78,8 +82,8 @@ lj_clib.o: lj_clib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
78 lj_err.h lj_errmsg.h lj_tab.h lj_str.h lj_udata.h lj_ctype.h lj_cconv.h \ 82 lj_err.h lj_errmsg.h lj_tab.h lj_str.h lj_udata.h lj_ctype.h lj_cconv.h \
79 lj_cdata.h lj_clib.h 83 lj_cdata.h lj_clib.h
80lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 84lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
81 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ctype.h lj_cparse.h lj_frame.h \ 85 lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_ctype.h lj_cparse.h \
82 lj_bc.h lj_vm.h lj_char.h lj_strscan.h 86 lj_frame.h lj_bc.h lj_vm.h lj_char.h lj_strscan.h
83lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 87lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
84 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h \ 88 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h \
85 lj_gc.h lj_cdata.h lj_cparse.h lj_cconv.h lj_clib.h lj_ccall.h lj_ff.h \ 89 lj_gc.h lj_cdata.h lj_cparse.h lj_cconv.h lj_clib.h lj_ccall.h lj_ff.h \
@@ -89,8 +93,8 @@ lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
89lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 93lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
90 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_ccallback.h 94 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_ccallback.h
91lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 95lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
92 lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_state.h lj_frame.h \ 96 lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \
93 lj_bc.h lj_jit.h lj_ir.h 97 lj_state.h lj_frame.h lj_bc.h lj_jit.h lj_ir.h
94lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 98lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
95 lj_err.h lj_errmsg.h lj_func.h lj_str.h lj_tab.h lj_meta.h lj_debug.h \ 99 lj_err.h lj_errmsg.h lj_func.h lj_str.h lj_tab.h lj_meta.h lj_debug.h \
96 lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h lj_jit.h lj_ir.h \ 100 lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h lj_jit.h lj_ir.h \
@@ -109,31 +113,32 @@ lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
109 lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ 113 lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
110 lj_traceerr.h lj_vm.h 114 lj_traceerr.h lj_vm.h
111lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 115lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
112 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h \ 116 lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
113 lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h lj_jit.h \ 117 lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h \
114 lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h 118 lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h
115lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 119lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
116 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_jit.h \ 120 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_buf.h \
117 lj_ir.h lj_dispatch.h 121 lj_str.h lj_jit.h lj_ir.h lj_dispatch.h
118lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 122lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
119 lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ 123 lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
120 lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h lj_carith.h \ 124 lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h lj_carith.h \
121 lj_vm.h lj_strscan.h lj_lib.h 125 lj_vm.h lj_strscan.h lj_lib.h
122lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 126lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
123 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h lualib.h \ 127 lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h \
124 lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h 128 lualib.h lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h
125lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \ 129lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \
126 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \ 130 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \
127 lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_lib.h 131 lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_lex.h lj_bcdump.h \
132 lj_lib.h
128lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ 133lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
129 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_func.h lj_frame.h \ 134 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_func.h \
130 lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h 135 lj_frame.h lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h
131lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 136lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
132 lj_gc.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h lj_dispatch.h lj_bc.h \ 137 lj_gc.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h lj_dispatch.h lj_bc.h \
133 lj_traceerr.h lj_vm.h 138 lj_traceerr.h lj_vm.h
134lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 139lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
135 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ 140 lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_meta.h lj_frame.h \
136 lj_vm.h lj_strscan.h 141 lj_bc.h lj_vm.h lj_strscan.h lj_lib.h
137lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h 142lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
138lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 143lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
139 lj_ir.h lj_jit.h lj_iropt.h 144 lj_ir.h lj_jit.h lj_iropt.h
@@ -142,8 +147,9 @@ lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
142 lj_bc.h lj_traceerr.h lj_ctype.h lj_gc.h lj_carith.h lj_vm.h \ 147 lj_bc.h lj_traceerr.h lj_ctype.h lj_gc.h lj_carith.h lj_vm.h \
143 lj_strscan.h lj_folddef.h 148 lj_strscan.h lj_folddef.h
144lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 149lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
145 lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ 150 lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h lj_jit.h \
146 lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h 151 lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h \
152 lj_vm.h
147lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 153lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
148 lj_tab.h lj_ir.h lj_jit.h lj_iropt.h 154 lj_tab.h lj_ir.h lj_jit.h lj_iropt.h
149lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ 155lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \
@@ -152,11 +158,12 @@ lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \
152lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 158lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
153 lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h 159 lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h
154lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ 160lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \
155 lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_ircall.h \ 161 lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h \
156 lj_iropt.h lj_vm.h 162 lj_jit.h lj_ircall.h lj_iropt.h lj_vm.h
157lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 163lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
158 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h \ 164 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \
159 lj_state.h lj_bc.h lj_ctype.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h 165 lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_lex.h lj_parse.h lj_vm.h \
166 lj_vmevent.h
160lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 167lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
161 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ 168 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \
162 lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h \ 169 lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h \
@@ -167,11 +174,11 @@ lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
167 lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \ 174 lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \
168 lj_target_*.h lj_ctype.h lj_cdata.h 175 lj_target_*.h lj_ctype.h lj_cdata.h
169lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 176lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
170 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_meta.h \ 177 lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h \
171 lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h lj_ir.h \ 178 lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h \
172 lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h 179 lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h
173lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 180lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
174 lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_char.h 181 lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_state.h lj_char.h
175lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 182lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
176 lj_char.h lj_strscan.h 183 lj_char.h lj_strscan.h
177lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 184lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
@@ -189,26 +196,26 @@ lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
189lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 196lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
190 lj_ir.h lj_vm.h 197 lj_ir.h lj_vm.h
191ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \ 198ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
192 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h \ 199 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h \
193 lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h \ 200 lj_func.h lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h \
194 lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_err.c \ 201 lj_cdata.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h \
195 lj_debug.h lj_ff.h lj_ffdef.h lj_char.c lj_char.h lj_bc.c lj_bcdef.h \ 202 lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_char.c lj_char.h \
196 lj_obj.c lj_str.c lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h \ 203 lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c lj_tab.c lj_func.c \
197 lj_debug.c lj_state.c lj_lex.h lj_alloc.h lj_dispatch.c lj_ccallback.h \ 204 lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c lj_state.c \
198 luajit.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c lj_api.c \ 205 lj_lex.h lj_alloc.h lj_dispatch.c lj_ccallback.h luajit.h lj_vmevent.c \
199 lj_lex.c lualib.h lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h \ 206 lj_vmevent.h lj_vmmath.c lj_strscan.c lj_api.c lj_lex.c lualib.h \
200 lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c \ 207 lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c \
201 lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h lj_target_*.h \ 208 lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h \
202 lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h lj_cparse.c \ 209 lj_ccallback.c lj_target.h lj_target_*.h lj_mcode.h lj_carith.c \
203 lj_cparse.h lj_lib.c lj_lib.h lj_ir.c lj_ircall.h lj_iropt.h \ 210 lj_carith.h lj_clib.c lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c \
204 lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c \ 211 lj_ircall.h lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h \
205 lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c \ 212 lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c \
206 lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \ 213 lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h \
207 lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \ 214 lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h \
208 lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \ 215 lj_emit_*.h lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c \
209 lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \ 216 lib_aux.c lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c \
210 lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \ 217 lib_io.c lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c \
211 lib_init.c 218 lib_ffi.c lib_init.c
212luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h 219luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h
213host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \ 220host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \
214 lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \ 221 lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \
@@ -220,7 +227,8 @@ host/buildvm_asm.o: host/buildvm_asm.c host/buildvm.h lj_def.h lua.h luaconf.h \
220host/buildvm_fold.o: host/buildvm_fold.c host/buildvm.h lj_def.h lua.h \ 227host/buildvm_fold.o: host/buildvm_fold.c host/buildvm.h lj_def.h lua.h \
221 luaconf.h lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_ir.h lj_obj.h 228 luaconf.h lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_ir.h lj_obj.h
222host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \ 229host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \
223 lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_lib.h lj_obj.h 230 lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_bc.h lj_lib.h lj_obj.h \
231 host/buildvm_libbc.h
224host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \ 232host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \
225 luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h 233 luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h
226host/minilua.o: host/minilua.c 234host/minilua.o: host/minilua.c
diff --git a/src/host/buildvm_lib.c b/src/host/buildvm_lib.c
index 40141dfb..dcd3ca41 100644
--- a/src/host/buildvm_lib.c
+++ b/src/host/buildvm_lib.c
@@ -5,7 +5,9 @@
5 5
6#include "buildvm.h" 6#include "buildvm.h"
7#include "lj_obj.h" 7#include "lj_obj.h"
8#include "lj_bc.h"
8#include "lj_lib.h" 9#include "lj_lib.h"
10#include "buildvm_libbc.h"
9 11
10/* Context for library definitions. */ 12/* Context for library definitions. */
11static uint8_t obuf[8192]; 13static uint8_t obuf[8192];
@@ -151,6 +153,62 @@ static void libdef_func(BuildCtx *ctx, char *p, int arg)
151 regfunc = REGFUNC_OK; 153 regfunc = REGFUNC_OK;
152} 154}
153 155
156static uint8_t *libdef_uleb128(uint8_t *p, uint32_t *vv)
157{
158 uint32_t v = *p++;
159 if (v >= 0x80) {
160 int sh = 0; v &= 0x7f;
161 do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
162 }
163 *vv = v;
164 return p;
165}
166
167static void libdef_fixupbc(uint8_t *p)
168{
169 uint32_t i, sizebc;
170 p += 4;
171 p = libdef_uleb128(p, &sizebc);
172 p = libdef_uleb128(p, &sizebc);
173 p = libdef_uleb128(p, &sizebc);
174 for (i = 0; i < sizebc; i++, p += 4) {
175 uint8_t op = p[libbc_endian ? 3 : 0];
176 uint8_t ra = p[libbc_endian ? 2 : 1];
177 uint8_t rc = p[libbc_endian ? 1 : 2];
178 uint8_t rb = p[libbc_endian ? 0 : 3];
179 if (!LJ_DUALNUM && op == BC_ISTYPE && rc == ~LJ_TNUMX+1) {
180 op = BC_ISNUM; rc++;
181 }
182 p[LJ_ENDIAN_SELECT(0, 3)] = op;
183 p[LJ_ENDIAN_SELECT(1, 2)] = ra;
184 p[LJ_ENDIAN_SELECT(2, 1)] = rc;
185 p[LJ_ENDIAN_SELECT(3, 0)] = rb;
186 }
187}
188
189static void libdef_lua(BuildCtx *ctx, char *p, int arg)
190{
191 UNUSED(arg);
192 if (ctx->mode == BUILD_libdef) {
193 int i;
194 for (i = 0; libbc_map[i].name != NULL; i++) {
195 if (!strcmp(libbc_map[i].name, p)) {
196 int ofs = libbc_map[i].ofs;
197 int len = libbc_map[i+1].ofs - ofs;
198 obuf[2]++; /* Bump hash table size. */
199 *optr++ = LIBINIT_LUA;
200 libdef_name(p, 0);
201 memcpy(optr, libbc_code + ofs, len);
202 libdef_fixupbc(optr);
203 optr += len;
204 return;
205 }
206 }
207 fprintf(stderr, "Error: missing libbc definition for %s\n", p);
208 exit(1);
209 }
210}
211
154static uint32_t find_rec(char *name) 212static uint32_t find_rec(char *name)
155{ 213{
156 char *p = (char *)obuf; 214 char *p = (char *)obuf;
@@ -277,6 +335,7 @@ static const LibDefHandler libdef_handlers[] = {
277 { "CF(", ")", libdef_func, LIBINIT_CF }, 335 { "CF(", ")", libdef_func, LIBINIT_CF },
278 { "ASM(", ")", libdef_func, LIBINIT_ASM }, 336 { "ASM(", ")", libdef_func, LIBINIT_ASM },
279 { "ASM_(", ")", libdef_func, LIBINIT_ASM_ }, 337 { "ASM_(", ")", libdef_func, LIBINIT_ASM_ },
338 { "LUA(", ")", libdef_lua, 0 },
280 { "REC(", ")", libdef_rec, 0 }, 339 { "REC(", ")", libdef_rec, 0 },
281 { "PUSH(", ")", libdef_push, 0 }, 340 { "PUSH(", ")", libdef_push, 0 },
282 { "SET(", ")", libdef_set, 0 }, 341 { "SET(", ")", libdef_set, 0 },
diff --git a/src/host/buildvm_libbc.h b/src/host/buildvm_libbc.h
new file mode 100644
index 00000000..ec2a55f8
--- /dev/null
+++ b/src/host/buildvm_libbc.h
@@ -0,0 +1,28 @@
1/* This is a generated file. DO NOT EDIT! */
2
3static const int libbc_endian = 0;
4
5static const uint8_t libbc_code[] = {
60,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0,
70,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,2,9,0,0,0,15,
816,0,12,0,16,1,9,0,41,2,1,0,21,3,0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,7,5,0,
959,8,5,0,66,6,3,2,10,6,0,0,88,7,1,128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,10,
100,0,0,16,16,0,12,0,16,1,9,0,43,2,0,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,
1118,8,5,0,18,9,6,0,66,7,3,2,10,7,0,0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,
1275,0,1,0,0,1,2,0,0,0,3,16,0,12,0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,
130,21,2,0,0,11,1,0,0,88,3,7,128,8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,
140,76,3,2,0,88,3,18,128,16,1,14,0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,
1512,128,59,3,1,0,22,4,1,1,18,5,2,0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,
169,8,0,79,4,252,127,43,4,0,0,64,4,2,0,76,3,2,0,75,0,1,0,0,2,0
17};
18
19static const struct { const char *name; int ofs; } libbc_map[] = {
20{"math_deg",0},
21{"math_rad",25},
22{"table_foreachi",50},
23{"table_foreach",117},
24{"table_getn",188},
25{"table_remove",207},
26{NULL,336}
27};
28
diff --git a/src/host/genlibbc.lua b/src/host/genlibbc.lua
new file mode 100644
index 00000000..16f0a0b6
--- /dev/null
+++ b/src/host/genlibbc.lua
@@ -0,0 +1,197 @@
1----------------------------------------------------------------------------
2-- Lua script to dump the bytecode of the library functions written in Lua.
3-- The resulting 'buildvm_libbc.h' is used for the build process of LuaJIT.
4----------------------------------------------------------------------------
5-- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
6-- Released under the MIT license. See Copyright Notice in luajit.h
7----------------------------------------------------------------------------
8
9local ffi = require("ffi")
10local bit = require("bit")
11local vmdef = require("jit.vmdef")
12local bcnames = vmdef.bcnames
13
14local format = string.format
15
16local isbe = (string.byte(string.dump(function() end), 5) % 2 == 1)
17
18local function usage(arg)
19 io.stderr:write("Usage: ", arg and arg[0] or "genlibbc",
20 " [-o buildvm_libbc.h] lib_*.c\n")
21 os.exit(1)
22end
23
24local function parse_arg(arg)
25 local outfile = "-"
26 if not (arg and arg[1]) then
27 usage(arg)
28 end
29 if arg[1] == "-o" then
30 outfile = arg[2]
31 if not outfile then usage(arg) end
32 table.remove(arg, 1)
33 table.remove(arg, 1)
34 end
35 return outfile
36end
37
38local function read_files(names)
39 local src = ""
40 for _,name in ipairs(names) do
41 local fp = assert(io.open(name))
42 src = src .. fp:read("*a")
43 fp:close()
44 end
45 return src
46end
47
48local function transform_lua(code)
49 local fixup = {}
50 local n = -30000
51 code = string.gsub(code, "CHECK_(%w*)%((.-)%)", function(tp, var)
52 n = n + 1
53 fixup[n] = { "CHECK", tp }
54 return format("%s=%d", var, n)
55 end)
56 code = string.gsub(code, "PAIRS%((.-)%)", function(var)
57 fixup.PAIRS = true
58 return format("nil, %s, 0", var)
59 end)
60 return "return "..code, fixup
61end
62
63local function read_uleb128(p)
64 local v = p[0]; p = p + 1
65 if v >= 128 then
66 local sh = 7; v = v - 128
67 repeat
68 local r = p[0]
69 v = v + bit.lshift(bit.band(r, 127), sh)
70 sh = sh + 7
71 p = p + 1
72 until r < 128
73 end
74 return p, v
75end
76
77-- ORDER LJ_T
78local name2itype = {
79 str = 5, func = 9, tab = 12, int = 14, num = 15
80}
81
82local BC = {}
83for i=0,#bcnames/6-1 do
84 BC[string.gsub(string.sub(bcnames, i*6+1, i*6+6), " ", "")] = i
85end
86local xop, xra = isbe and 3 or 0, isbe and 2 or 1
87local xrc, xrb = isbe and 1 or 2, isbe and 0 or 3
88
89local function fixup_dump(dump, fixup)
90 local buf = ffi.new("uint8_t[?]", #dump+1, dump)
91 local p = buf+5
92 local n, sizebc
93 p, n = read_uleb128(p)
94 local start = p
95 p = p + 4
96 p = read_uleb128(p)
97 p = read_uleb128(p)
98 p, sizebc = read_uleb128(p)
99 local rawtab = {}
100 for i=0,sizebc-1 do
101 local op = p[xop]
102 if op == BC.KSHORT then
103 local rd = p[xrc] + 256*p[xrb]
104 rd = bit.arshift(bit.lshift(rd, 16), 16)
105 local f = fixup[rd]
106 if f then
107 if f[1] == "CHECK" then
108 local tp = f[2]
109 if tp == "tab" then rawtab[p[xra]] = true end
110 p[xop] = tp == "num" and BC.ISNUM or BC.ISTYPE
111 p[xrb] = 0
112 p[xrc] = name2itype[tp]
113 else
114 error("unhandled fixup type: "..f[1])
115 end
116 end
117 elseif op == BC.TGETV then
118 if rawtab[p[xrb]] then
119 p[xop] = BC.TGETR
120 end
121 elseif op == BC.TSETV then
122 if rawtab[p[xrb]] then
123 p[xop] = BC.TSETR
124 end
125 elseif op == BC.ITERC then
126 if fixup.PAIRS then
127 p[xop] = BC.ITERN
128 end
129 end
130 p = p + 4
131 end
132 return ffi.string(start, n)
133end
134
135local function find_defs(src)
136 local defs = {}
137 for name, code in string.gmatch(src, "LJLIB_LUA%(([^)]*)%)%s*/%*(.-)%*/") do
138 local env = {}
139 local tcode, fixup = transform_lua(code)
140 local func = assert(load(tcode, "", nil, env))()
141 defs[name] = fixup_dump(string.dump(func, true), fixup)
142 defs[#defs+1] = name
143 end
144 return defs
145end
146
147local function gen_header(defs)
148 local t = {}
149 local function w(x) t[#t+1] = x end
150 w("/* This is a generated file. DO NOT EDIT! */\n\n")
151 w("static const int libbc_endian = ") w(isbe and 1 or 0) w(";\n\n")
152 local s = ""
153 for _,name in ipairs(defs) do
154 s = s .. defs[name]
155 end
156 w("static const uint8_t libbc_code[] = {\n")
157 local n = 0
158 for i=1,#s do
159 local x = string.byte(s, i)
160 w(x); w(",")
161 n = n + (x < 10 and 2 or (x < 100 and 3 or 4))
162 if n >= 75 then n = 0; w("\n") end
163 end
164 w("0\n};\n\n")
165 w("static const struct { const char *name; int ofs; } libbc_map[] = {\n")
166 local m = 0
167 for _,name in ipairs(defs) do
168 w('{"'); w(name); w('",'); w(m) w('},\n')
169 m = m + #defs[name]
170 end
171 w("{NULL,"); w(m); w("}\n};\n\n")
172 return table.concat(t)
173end
174
175local function write_file(name, data)
176 if name == "-" then
177 assert(io.write(data))
178 assert(io.flush())
179 else
180 local fp = io.open(name)
181 if fp then
182 local old = fp:read("*a")
183 fp:close()
184 if data == old then return end
185 end
186 fp = assert(io.open(name, "w"))
187 assert(fp:write(data))
188 assert(fp:close())
189 end
190end
191
192local outfile = parse_arg(arg)
193local src = read_files(arg)
194local defs = find_defs(src)
195local hdr = gen_header(defs)
196write_file(outfile, hdr)
197
diff --git a/src/jit/bc.lua b/src/jit/bc.lua
index 5c00ebe3..dd1c1f3b 100644
--- a/src/jit/bc.lua
+++ b/src/jit/bc.lua
@@ -41,7 +41,7 @@
41 41
42-- Cache some library functions and objects. 42-- Cache some library functions and objects.
43local jit = require("jit") 43local jit = require("jit")
44assert(jit.version_num == 20001, "LuaJIT core/library version mismatch") 44assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
45local jutil = require("jit.util") 45local jutil = require("jit.util")
46local vmdef = require("jit.vmdef") 46local vmdef = require("jit.vmdef")
47local bit = require("bit") 47local bit = require("bit")
diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua
index 25bd6042..a54094dd 100644
--- a/src/jit/bcsave.lua
+++ b/src/jit/bcsave.lua
@@ -11,7 +11,7 @@
11------------------------------------------------------------------------------ 11------------------------------------------------------------------------------
12 12
13local jit = require("jit") 13local jit = require("jit")
14assert(jit.version_num == 20001, "LuaJIT core/library version mismatch") 14assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
15local bit = require("bit") 15local bit = require("bit")
16 16
17-- Symbol name prefix for LuaJIT bytecode. 17-- Symbol name prefix for LuaJIT bytecode.
diff --git a/src/jit/dump.lua b/src/jit/dump.lua
index 70a59280..7f930f51 100644
--- a/src/jit/dump.lua
+++ b/src/jit/dump.lua
@@ -54,7 +54,7 @@
54 54
55-- Cache some library functions and objects. 55-- Cache some library functions and objects.
56local jit = require("jit") 56local jit = require("jit")
57assert(jit.version_num == 20001, "LuaJIT core/library version mismatch") 57assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
58local jutil = require("jit.util") 58local jutil = require("jit.util")
59local vmdef = require("jit.vmdef") 59local vmdef = require("jit.vmdef")
60local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc 60local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc
@@ -269,8 +269,7 @@ local litname = {
269 ["CONV "] = setmetatable({}, { __index = function(t, mode) 269 ["CONV "] = setmetatable({}, { __index = function(t, mode)
270 local s = irtype[band(mode, 31)] 270 local s = irtype[band(mode, 31)]
271 s = irtype[band(shr(mode, 5), 31)].."."..s 271 s = irtype[band(shr(mode, 5), 31)].."."..s
272 if band(mode, 0x400) ~= 0 then s = s.." trunc" 272 if band(mode, 0x800) ~= 0 then s = s.." sext" end
273 elseif band(mode, 0x800) ~= 0 then s = s.." sext" end
274 local c = shr(mode, 14) 273 local c = shr(mode, 14)
275 if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end 274 if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end
276 t[mode] = s 275 t[mode] = s
diff --git a/src/jit/v.lua b/src/jit/v.lua
index f4a9b054..88c358b5 100644
--- a/src/jit/v.lua
+++ b/src/jit/v.lua
@@ -59,7 +59,7 @@
59 59
60-- Cache some library functions and objects. 60-- Cache some library functions and objects.
61local jit = require("jit") 61local jit = require("jit")
62assert(jit.version_num == 20001, "LuaJIT core/library version mismatch") 62assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
63local jutil = require("jit.util") 63local jutil = require("jit.util")
64local vmdef = require("jit.vmdef") 64local vmdef = require("jit.vmdef")
65local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo 65local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo
diff --git a/src/lib_base.c b/src/lib_base.c
index 070970ed..8fecddea 100644
--- a/src/lib_base.c
+++ b/src/lib_base.c
@@ -506,21 +506,13 @@ LJLIB_CF(print)
506 } 506 }
507 shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring); 507 shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring);
508 for (i = 0; i < nargs; i++) { 508 for (i = 0; i < nargs; i++) {
509 cTValue *o = &L->base[i];
510 char buf[LJ_STR_NUMBERBUF];
509 const char *str; 511 const char *str;
510 size_t size; 512 size_t size;
511 cTValue *o = &L->base[i]; 513 MSize len;
512 if (shortcut && tvisstr(o)) { 514 if (shortcut && (str = lj_str_buftv(buf, o, &len)) != NULL) {
513 str = strVdata(o); 515 size = len;
514 size = strV(o)->len;
515 } else if (shortcut && tvisint(o)) {
516 char buf[LJ_STR_INTBUF];
517 char *p = lj_str_bufint(buf, intV(o));
518 size = (size_t)(buf+LJ_STR_INTBUF-p);
519 str = p;
520 } else if (shortcut && tvisnum(o)) {
521 char buf[LJ_STR_NUMBUF];
522 size = lj_str_bufnum(buf, o);
523 str = buf;
524 } else { 516 } else {
525 copyTV(L, L->top+1, o); 517 copyTV(L, L->top+1, o);
526 copyTV(L, L->top, L->top-1); 518 copyTV(L, L->top, L->top-1);
diff --git a/src/lib_bit.c b/src/lib_bit.c
index 93fead92..85821b81 100644
--- a/src/lib_bit.c
+++ b/src/lib_bit.c
@@ -13,25 +13,82 @@
13#include "lj_obj.h" 13#include "lj_obj.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_str.h" 15#include "lj_str.h"
16#if LJ_HASFFI
17#include "lj_ctype.h"
18#include "lj_cdata.h"
19#include "lj_cconv.h"
20#include "lj_carith.h"
21#endif
22#include "lj_ff.h"
16#include "lj_lib.h" 23#include "lj_lib.h"
17 24
18/* ------------------------------------------------------------------------ */ 25/* ------------------------------------------------------------------------ */
19 26
20#define LJLIB_MODULE_bit 27#define LJLIB_MODULE_bit
21 28
22LJLIB_ASM(bit_tobit) LJLIB_REC(bit_unary IR_TOBIT) 29#if LJ_HASFFI
30static int bit_result64(lua_State *L, CTypeID id, uint64_t x)
23{ 31{
32 GCcdata *cd = lj_cdata_new_(L, id, 8);
33 *(uint64_t *)cdataptr(cd) = x;
34 setcdataV(L, L->base-1, cd);
35 return FFH_RES(1);
36}
37#endif
38
39LJLIB_ASM(bit_tobit) LJLIB_REC(bit_tobit)
40{
41#if LJ_HASFFI
42 CTypeID id = 0;
43 setintV(L->base-1, (int32_t)lj_carith_check64(L, 1, &id));
44 return FFH_RES(1);
45#else
46 lj_lib_checknumber(L, 1);
47 return FFH_RETRY;
48#endif
49}
50
51LJLIB_ASM(bit_bnot) LJLIB_REC(bit_unary IR_BNOT)
52{
53#if LJ_HASFFI
54 CTypeID id = 0;
55 uint64_t x = lj_carith_check64(L, 1, &id);
56 return id ? bit_result64(L, id, ~x) : FFH_RETRY;
57#else
58 lj_lib_checknumber(L, 1);
59 return FFH_RETRY;
60#endif
61}
62
63LJLIB_ASM(bit_bswap) LJLIB_REC(bit_unary IR_BSWAP)
64{
65#if LJ_HASFFI
66 CTypeID id = 0;
67 uint64_t x = lj_carith_check64(L, 1, &id);
68 return id ? bit_result64(L, id, lj_bswap64(x)) : FFH_RETRY;
69#else
24 lj_lib_checknumber(L, 1); 70 lj_lib_checknumber(L, 1);
25 return FFH_RETRY; 71 return FFH_RETRY;
72#endif
26} 73}
27LJLIB_ASM_(bit_bnot) LJLIB_REC(bit_unary IR_BNOT)
28LJLIB_ASM_(bit_bswap) LJLIB_REC(bit_unary IR_BSWAP)
29 74
30LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL) 75LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL)
31{ 76{
77#if LJ_HASFFI
78 CTypeID id = 0, id2 = 0;
79 uint64_t x = lj_carith_check64(L, 1, &id);
80 int32_t sh = (int32_t)lj_carith_check64(L, 2, &id2);
81 if (id) {
82 x = lj_carith_shift64(x, sh, curr_func(L)->c.ffid - (int)FF_bit_lshift);
83 return bit_result64(L, id, x);
84 }
85 if (id2) setintV(L->base+1, sh);
86 return FFH_RETRY;
87#else
32 lj_lib_checknumber(L, 1); 88 lj_lib_checknumber(L, 1);
33 lj_lib_checkbit(L, 2); 89 lj_lib_checkbit(L, 2);
34 return FFH_RETRY; 90 return FFH_RETRY;
91#endif
35} 92}
36LJLIB_ASM_(bit_rshift) LJLIB_REC(bit_shift IR_BSHR) 93LJLIB_ASM_(bit_rshift) LJLIB_REC(bit_shift IR_BSHR)
37LJLIB_ASM_(bit_arshift) LJLIB_REC(bit_shift IR_BSAR) 94LJLIB_ASM_(bit_arshift) LJLIB_REC(bit_shift IR_BSAR)
@@ -40,9 +97,29 @@ LJLIB_ASM_(bit_ror) LJLIB_REC(bit_shift IR_BROR)
40 97
41LJLIB_ASM(bit_band) LJLIB_REC(bit_nary IR_BAND) 98LJLIB_ASM(bit_band) LJLIB_REC(bit_nary IR_BAND)
42{ 99{
100#if LJ_HASFFI
101 CTypeID id = 0;
102 TValue *o = L->base, *top = L->top;
103 int i = 0;
104 do { lj_carith_check64(L, ++i, &id); } while (++o < top);
105 if (id) {
106 CTState *cts = ctype_cts(L);
107 CType *ct = ctype_get(cts, id);
108 int op = curr_func(L)->c.ffid - (int)FF_bit_bor;
109 uint64_t x, y = op >= 0 ? 0 : ~(uint64_t)0;
110 o = L->base;
111 do {
112 lj_cconv_ct_tv(cts, ct, (uint8_t *)&x, o, 0);
113 if (op < 0) y &= x; else if (op == 0) y |= x; else y ^= x;
114 } while (++o < top);
115 return bit_result64(L, id, y);
116 }
117 return FFH_RETRY;
118#else
43 int i = 0; 119 int i = 0;
44 do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top); 120 do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top);
45 return FFH_RETRY; 121 return FFH_RETRY;
122#endif
46} 123}
47LJLIB_ASM_(bit_bor) LJLIB_REC(bit_nary IR_BOR) 124LJLIB_ASM_(bit_bor) LJLIB_REC(bit_nary IR_BOR)
48LJLIB_ASM_(bit_bxor) LJLIB_REC(bit_nary IR_BXOR) 125LJLIB_ASM_(bit_bxor) LJLIB_REC(bit_nary IR_BXOR)
@@ -51,12 +128,21 @@ LJLIB_ASM_(bit_bxor) LJLIB_REC(bit_nary IR_BXOR)
51 128
52LJLIB_CF(bit_tohex) 129LJLIB_CF(bit_tohex)
53{ 130{
131#if LJ_HASFFI
132 CTypeID id = 0, id2 = 0;
133 uint64_t b = lj_carith_check64(L, 1, &id);
134 int32_t i, dig = id ? 16 : 8;
135 int32_t n = L->base+1>=L->top ? dig : (int32_t)lj_carith_check64(L, 2, &id2);
136 char buf[16];
137#else
54 uint32_t b = (uint32_t)lj_lib_checkbit(L, 1); 138 uint32_t b = (uint32_t)lj_lib_checkbit(L, 1);
55 int32_t i, n = L->base+1 >= L->top ? 8 : lj_lib_checkbit(L, 2); 139 int32_t i, dig = 8;
56 const char *hexdigits = "0123456789abcdef"; 140 int32_t n = L->base+1>=L->top ? dig : lj_lib_checkbit(L, 2);
57 char buf[8]; 141 char buf[8];
142#endif
143 const char *hexdigits = "0123456789abcdef";
58 if (n < 0) { n = -n; hexdigits = "0123456789ABCDEF"; } 144 if (n < 0) { n = -n; hexdigits = "0123456789ABCDEF"; }
59 if (n > 8) n = 8; 145 if (n > dig) n = dig;
60 for (i = n; --i >= 0; ) { buf[i] = hexdigits[b & 15]; b >>= 4; } 146 for (i = n; --i >= 0; ) { buf[i] = hexdigits[b & 15]; b >>= 4; }
61 lua_pushlstring(L, buf, (size_t)n); 147 lua_pushlstring(L, buf, (size_t)n);
62 return 1; 148 return 1;
diff --git a/src/lib_io.c b/src/lib_io.c
index e0c6908f..18d87a89 100644
--- a/src/lib_io.c
+++ b/src/lib_io.c
@@ -19,6 +19,7 @@
19#include "lj_obj.h" 19#include "lj_obj.h"
20#include "lj_gc.h" 20#include "lj_gc.h"
21#include "lj_err.h" 21#include "lj_err.h"
22#include "lj_buf.h"
22#include "lj_str.h" 23#include "lj_str.h"
23#include "lj_state.h" 24#include "lj_state.h"
24#include "lj_ff.h" 25#include "lj_ff.h"
@@ -145,7 +146,7 @@ static int io_file_readline(lua_State *L, FILE *fp, MSize chop)
145 MSize m = LUAL_BUFFERSIZE, n = 0, ok = 0; 146 MSize m = LUAL_BUFFERSIZE, n = 0, ok = 0;
146 char *buf; 147 char *buf;
147 for (;;) { 148 for (;;) {
148 buf = lj_str_needbuf(L, &G(L)->tmpbuf, m); 149 buf = lj_buf_tmp(L, m);
149 if (fgets(buf+n, m-n, fp) == NULL) break; 150 if (fgets(buf+n, m-n, fp) == NULL) break;
150 n += (MSize)strlen(buf+n); 151 n += (MSize)strlen(buf+n);
151 ok |= n; 152 ok |= n;
@@ -161,7 +162,7 @@ static void io_file_readall(lua_State *L, FILE *fp)
161{ 162{
162 MSize m, n; 163 MSize m, n;
163 for (m = LUAL_BUFFERSIZE, n = 0; ; m += m) { 164 for (m = LUAL_BUFFERSIZE, n = 0; ; m += m) {
164 char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m); 165 char *buf = lj_buf_tmp(L, m);
165 n += (MSize)fread(buf+n, 1, m-n, fp); 166 n += (MSize)fread(buf+n, 1, m-n, fp);
166 if (n != m) { 167 if (n != m) {
167 setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n)); 168 setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
@@ -174,7 +175,7 @@ static void io_file_readall(lua_State *L, FILE *fp)
174static int io_file_readlen(lua_State *L, FILE *fp, MSize m) 175static int io_file_readlen(lua_State *L, FILE *fp, MSize m)
175{ 176{
176 if (m) { 177 if (m) {
177 char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m); 178 char *buf = lj_buf_tmp(L, m);
178 MSize n = (MSize)fread(buf, 1, m, fp); 179 MSize n = (MSize)fread(buf, 1, m, fp);
179 setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n)); 180 setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
180 lj_gc_check(L); 181 lj_gc_check(L);
@@ -230,19 +231,12 @@ static int io_file_write(lua_State *L, FILE *fp, int start)
230 cTValue *tv; 231 cTValue *tv;
231 int status = 1; 232 int status = 1;
232 for (tv = L->base+start; tv < L->top; tv++) { 233 for (tv = L->base+start; tv < L->top; tv++) {
233 if (tvisstr(tv)) { 234 char buf[LJ_STR_NUMBERBUF];
234 MSize len = strV(tv)->len; 235 MSize len;
235 status = status && (fwrite(strVdata(tv), 1, len, fp) == len); 236 const char *p = lj_str_buftv(buf, tv, &len);
236 } else if (tvisint(tv)) { 237 if (!p)
237 char buf[LJ_STR_INTBUF];
238 char *p = lj_str_bufint(buf, intV(tv));
239 size_t len = (size_t)(buf+LJ_STR_INTBUF-p);
240 status = status && (fwrite(p, 1, len, fp) == len);
241 } else if (tvisnum(tv)) {
242 status = status && (fprintf(fp, LUA_NUMBER_FMT, numV(tv)) > 0);
243 } else {
244 lj_err_argt(L, (int)(tv - L->base) + 1, LUA_TSTRING); 238 lj_err_argt(L, (int)(tv - L->base) + 1, LUA_TSTRING);
245 } 239 status = status && (fwrite(p, 1, len, fp) == len);
246 } 240 }
247 if (LJ_52 && status) { 241 if (LJ_52 && status) {
248 L->top = L->base+1; 242 L->top = L->base+1;
diff --git a/src/lib_jit.c b/src/lib_jit.c
index 82e68258..125b48ce 100644
--- a/src/lib_jit.c
+++ b/src/lib_jit.c
@@ -538,23 +538,17 @@ static uint32_t jit_cpudetect(lua_State *L)
538 uint32_t features[4]; 538 uint32_t features[4];
539 if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { 539 if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
540#if !LJ_HASJIT 540#if !LJ_HASJIT
541#define JIT_F_CMOV 1
542#define JIT_F_SSE2 2 541#define JIT_F_SSE2 2
543#endif 542#endif
544 flags |= ((features[3] >> 15)&1) * JIT_F_CMOV;
545 flags |= ((features[3] >> 26)&1) * JIT_F_SSE2; 543 flags |= ((features[3] >> 26)&1) * JIT_F_SSE2;
546#if LJ_HASJIT 544#if LJ_HASJIT
547 flags |= ((features[2] >> 0)&1) * JIT_F_SSE3; 545 flags |= ((features[2] >> 0)&1) * JIT_F_SSE3;
548 flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; 546 flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
549 if (vendor[2] == 0x6c65746e) { /* Intel. */ 547 if (vendor[2] == 0x6c65746e) { /* Intel. */
550 if ((features[0] & 0x0ff00f00) == 0x00000f00) /* P4. */ 548 if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */
551 flags |= JIT_F_P4; /* Currently unused. */
552 else if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */
553 flags |= JIT_F_LEA_AGU; 549 flags |= JIT_F_LEA_AGU;
554 } else if (vendor[2] == 0x444d4163) { /* AMD. */ 550 } else if (vendor[2] == 0x444d4163) { /* AMD. */
555 uint32_t fam = (features[0] & 0x0ff00f00); 551 uint32_t fam = (features[0] & 0x0ff00f00);
556 if (fam == 0x00000f00) /* K8. */
557 flags |= JIT_F_SPLIT_XMM;
558 if (fam >= 0x00000f00) /* K8, K10. */ 552 if (fam >= 0x00000f00) /* K8, K10. */
559 flags |= JIT_F_PREFER_IMUL; 553 flags |= JIT_F_PREFER_IMUL;
560 } 554 }
@@ -562,14 +556,8 @@ static uint32_t jit_cpudetect(lua_State *L)
562 } 556 }
563 /* Check for required instruction set support on x86 (unnecessary on x64). */ 557 /* Check for required instruction set support on x86 (unnecessary on x64). */
564#if LJ_TARGET_X86 558#if LJ_TARGET_X86
565#if !defined(LUAJIT_CPU_NOCMOV)
566 if (!(flags & JIT_F_CMOV))
567 luaL_error(L, "CPU not supported");
568#endif
569#if defined(LUAJIT_CPU_SSE2)
570 if (!(flags & JIT_F_SSE2)) 559 if (!(flags & JIT_F_SSE2))
571 luaL_error(L, "CPU does not support SSE2 (recompile without -DLUAJIT_CPU_SSE2)"); 560 luaL_error(L, "CPU with SSE2 required");
572#endif
573#endif 561#endif
574#elif LJ_TARGET_ARM 562#elif LJ_TARGET_ARM
575#if LJ_HASJIT 563#if LJ_HASJIT
@@ -631,11 +619,7 @@ static void jit_init(lua_State *L)
631 uint32_t flags = jit_cpudetect(L); 619 uint32_t flags = jit_cpudetect(L);
632#if LJ_HASJIT 620#if LJ_HASJIT
633 jit_State *J = L2J(L); 621 jit_State *J = L2J(L);
634#if LJ_TARGET_X86 622 J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
635 /* Silently turn off the JIT compiler on CPUs without SSE2. */
636 if ((flags & JIT_F_SSE2))
637#endif
638 J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
639 memcpy(J->param, jit_param_default, sizeof(J->param)); 623 memcpy(J->param, jit_param_default, sizeof(J->param));
640 lj_dispatch_update(G(L)); 624 lj_dispatch_update(G(L));
641#else 625#else
@@ -645,6 +629,7 @@ static void jit_init(lua_State *L)
645 629
646LUALIB_API int luaopen_jit(lua_State *L) 630LUALIB_API int luaopen_jit(lua_State *L)
647{ 631{
632 jit_init(L);
648 lua_pushliteral(L, LJ_OS_NAME); 633 lua_pushliteral(L, LJ_OS_NAME);
649 lua_pushliteral(L, LJ_ARCH_NAME); 634 lua_pushliteral(L, LJ_ARCH_NAME);
650 lua_pushinteger(L, LUAJIT_VERSION_NUM); 635 lua_pushinteger(L, LUAJIT_VERSION_NUM);
@@ -657,7 +642,6 @@ LUALIB_API int luaopen_jit(lua_State *L)
657 LJ_LIB_REG(L, "jit.opt", jit_opt); 642 LJ_LIB_REG(L, "jit.opt", jit_opt);
658#endif 643#endif
659 L->top -= 2; 644 L->top -= 2;
660 jit_init(L);
661 return 1; 645 return 1;
662} 646}
663 647
diff --git a/src/lib_math.c b/src/lib_math.c
index b23d9a2d..e474f980 100644
--- a/src/lib_math.c
+++ b/src/lib_math.c
@@ -63,11 +63,8 @@ LJLIB_ASM(math_log) LJLIB_REC(math_log)
63 return FFH_RETRY; 63 return FFH_RETRY;
64} 64}
65 65
66LJLIB_PUSH(57.29577951308232) 66LJLIB_LUA(math_deg) /* function(x) return x * 57.29577951308232 end */
67LJLIB_ASM_(math_deg) LJLIB_REC(math_degrad) 67LJLIB_LUA(math_rad) /* function(x) return x * 0.017453292519943295 end */
68
69LJLIB_PUSH(0.017453292519943295)
70LJLIB_ASM_(math_rad) LJLIB_REC(math_degrad)
71 68
72LJLIB_ASM(math_atan2) LJLIB_REC(.) 69LJLIB_ASM(math_atan2) LJLIB_REC(.)
73{ 70{
diff --git a/src/lib_os.c b/src/lib_os.c
index 0a784129..d0291f52 100644
--- a/src/lib_os.c
+++ b/src/lib_os.c
@@ -18,7 +18,9 @@
18#include "lualib.h" 18#include "lualib.h"
19 19
20#include "lj_obj.h" 20#include "lj_obj.h"
21#include "lj_gc.h"
21#include "lj_err.h" 22#include "lj_err.h"
23#include "lj_buf.h"
22#include "lj_lib.h" 24#include "lj_lib.h"
23 25
24#if LJ_TARGET_POSIX 26#if LJ_TARGET_POSIX
@@ -197,23 +199,25 @@ LJLIB_CF(os_date)
197 setfield(L, "wday", stm->tm_wday+1); 199 setfield(L, "wday", stm->tm_wday+1);
198 setfield(L, "yday", stm->tm_yday+1); 200 setfield(L, "yday", stm->tm_yday+1);
199 setboolfield(L, "isdst", stm->tm_isdst); 201 setboolfield(L, "isdst", stm->tm_isdst);
200 } else { 202 } else if (*s) {
201 char cc[3]; 203 SBuf *sb = &G(L)->tmpbuf;
202 luaL_Buffer b; 204 MSize sz = 0;
203 cc[0] = '%'; cc[2] = '\0'; 205 const char *q;
204 luaL_buffinit(L, &b); 206 for (q = s; *q; q++)
205 for (; *s; s++) { 207 sz += (*q == '%') ? 30 : 1; /* Overflow doesn't matter. */
206 if (*s != '%' || *(s + 1) == '\0') { /* No conversion specifier? */ 208 setmref(sb->L, L);
207 luaL_addchar(&b, *s); 209 for (;;) {
208 } else { 210 char *buf = lj_buf_need(sb, sz);
209 size_t reslen; 211 size_t len = strftime(buf, sbufsz(sb), s, stm);
210 char buff[200]; /* Should be big enough for any conversion result. */ 212 if (len) {
211 cc[1] = *(++s); 213 setstrV(L, L->top-1, lj_str_new(L, buf, len));
212 reslen = strftime(buff, sizeof(buff), cc, stm); 214 lj_gc_check(L);
213 luaL_addlstring(&b, buff, reslen); 215 break;
214 } 216 }
217 sz += (sz|1);
215 } 218 }
216 luaL_pushresult(&b); 219 } else {
220 setstrV(L, L->top-1, &G(L)->strempty);
217 } 221 }
218 return 1; 222 return 1;
219} 223}
diff --git a/src/lib_string.c b/src/lib_string.c
index 9e8ab900..09010b15 100644
--- a/src/lib_string.c
+++ b/src/lib_string.c
@@ -18,6 +18,7 @@
18#include "lj_obj.h" 18#include "lj_obj.h"
19#include "lj_gc.h" 19#include "lj_gc.h"
20#include "lj_err.h" 20#include "lj_err.h"
21#include "lj_buf.h"
21#include "lj_str.h" 22#include "lj_str.h"
22#include "lj_tab.h" 23#include "lj_tab.h"
23#include "lj_meta.h" 24#include "lj_meta.h"
@@ -64,7 +65,7 @@ LJLIB_ASM(string_byte) LJLIB_REC(string_range 0)
64LJLIB_ASM(string_char) 65LJLIB_ASM(string_char)
65{ 66{
66 int i, nargs = (int)(L->top - L->base); 67 int i, nargs = (int)(L->top - L->base);
67 char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, (size_t)nargs); 68 char *buf = lj_buf_tmp(L, (size_t)nargs);
68 for (i = 1; i <= nargs; i++) { 69 for (i = 1; i <= nargs; i++) {
69 int32_t k = lj_lib_checkint(L, i); 70 int32_t k = lj_lib_checkint(L, i);
70 if (!checku8(k)) 71 if (!checku8(k))
@@ -91,8 +92,6 @@ LJLIB_ASM(string_rep)
91 int32_t len = (int32_t)s->len; 92 int32_t len = (int32_t)s->len;
92 global_State *g = G(L); 93 global_State *g = G(L);
93 int64_t tlen; 94 int64_t tlen;
94 const char *src;
95 char *buf;
96 if (k <= 0) { 95 if (k <= 0) {
97 empty: 96 empty:
98 setstrV(L, L->base-1, &g->strempty); 97 setstrV(L, L->base-1, &g->strempty);
@@ -110,31 +109,34 @@ LJLIB_ASM(string_rep)
110 if (tlen > LJ_MAX_STR) 109 if (tlen > LJ_MAX_STR)
111 lj_err_caller(L, LJ_ERR_STROV); 110 lj_err_caller(L, LJ_ERR_STROV);
112 } 111 }
113 if (tlen == 0) goto empty; 112 if (tlen == 0) {
114 buf = lj_str_needbuf(L, &g->tmpbuf, (MSize)tlen); 113 goto empty;
115 src = strdata(s); 114 } else {
116 if (sep) { 115 char *buf = lj_buf_tmp(L, (MSize)tlen), *p = buf;
117 tlen -= sep->len; /* Ignore trailing separator. */ 116 const char *src = strdata(s);
118 if (k > 1) { /* Paste one string and one separator. */ 117 if (sep) {
119 int32_t i; 118 tlen -= sep->len; /* Ignore trailing separator. */
120 i = 0; while (i < len) *buf++ = src[i++]; 119 if (k > 1) { /* Paste one string and one separator. */
121 src = strdata(sep); len = sep->len; 120 int32_t i;
122 i = 0; while (i < len) *buf++ = src[i++]; 121 i = 0; while (i < len) *p++ = src[i++];
123 src = g->tmpbuf.buf; len += s->len; k--; /* Now copy that k-1 times. */ 122 src = strdata(sep); len = sep->len;
123 i = 0; while (i < len) *p++ = src[i++];
124 src = buf; len += s->len; k--; /* Now copy that k-1 times. */
125 }
124 } 126 }
127 do {
128 int32_t i = 0;
129 do { *p++ = src[i++]; } while (i < len);
130 } while (--k > 0);
131 setstrV(L, L->base-1, lj_str_new(L, buf, (size_t)tlen));
125 } 132 }
126 do {
127 int32_t i = 0;
128 do { *buf++ = src[i++]; } while (i < len);
129 } while (--k > 0);
130 setstrV(L, L->base-1, lj_str_new(L, g->tmpbuf.buf, (size_t)tlen));
131 return FFH_RES(1); 133 return FFH_RES(1);
132} 134}
133 135
134LJLIB_ASM(string_reverse) 136LJLIB_ASM(string_reverse)
135{ 137{
136 GCstr *s = lj_lib_checkstr(L, 1); 138 GCstr *s = lj_lib_checkstr(L, 1);
137 lj_str_needbuf(L, &G(L)->tmpbuf, s->len); 139 lj_buf_tmp(L, s->len);
138 return FFH_RETRY; 140 return FFH_RETRY;
139} 141}
140LJLIB_ASM_(string_lower) 142LJLIB_ASM_(string_lower)
@@ -142,9 +144,9 @@ LJLIB_ASM_(string_upper)
142 144
143/* ------------------------------------------------------------------------ */ 145/* ------------------------------------------------------------------------ */
144 146
145static int writer_buf(lua_State *L, const void *p, size_t size, void *b) 147static int writer_buf(lua_State *L, const void *p, size_t size, void *sb)
146{ 148{
147 luaL_addlstring((luaL_Buffer *)b, (const char *)p, size); 149 lj_buf_putmem((SBuf *)sb, p, (MSize)size);
148 UNUSED(L); 150 UNUSED(L);
149 return 0; 151 return 0;
150} 152}
@@ -153,12 +155,14 @@ LJLIB_CF(string_dump)
153{ 155{
154 GCfunc *fn = lj_lib_checkfunc(L, 1); 156 GCfunc *fn = lj_lib_checkfunc(L, 1);
155 int strip = L->base+1 < L->top && tvistruecond(L->base+1); 157 int strip = L->base+1 < L->top && tvistruecond(L->base+1);
156 luaL_Buffer b; 158 SBuf *sb = &G(L)->tmpbuf; /* Assumes lj_bcwrite() doesn't use tmpbuf. */
159 setmref(sb->L, L);
160 lj_buf_reset(sb);
157 L->top = L->base+1; 161 L->top = L->base+1;
158 luaL_buffinit(L, &b); 162 if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, sb, strip))
159 if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, &b, strip))
160 lj_err_caller(L, LJ_ERR_STRDUMP); 163 lj_err_caller(L, LJ_ERR_STRDUMP);
161 luaL_pushresult(&b); 164 setstrV(L, L->top-1, lj_buf_str(L, sb));
165 lj_gc_check(L);
162 return 1; 166 return 1;
163} 167}
164 168
@@ -698,76 +702,81 @@ LJLIB_CF(string_gsub)
698 702
699/* ------------------------------------------------------------------------ */ 703/* ------------------------------------------------------------------------ */
700 704
701/* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */ 705/* Max. buffer size needed (at least #string.format("%99.99f", -1e308)). */
702#define MAX_FMTITEM 512 706#define STRING_FMT_MAXBUF 512
703/* valid flags in a format specification */ 707/* Valid format specifier flags. */
704#define FMT_FLAGS "-+ #0" 708#define STRING_FMT_FLAGS "-+ #0"
705/* 709/* Max. format specifier size. */
706** maximum size of each format specification (such as '%-099.99d') 710#define STRING_FMT_MAXSPEC \
707** (+10 accounts for %99.99x plus margin of error) 711 (sizeof(STRING_FMT_FLAGS) + sizeof(LUA_INTFRMLEN) + 10)
708*/
709#define MAX_FMTSPEC (sizeof(FMT_FLAGS) + sizeof(LUA_INTFRMLEN) + 10)
710 712
711static void addquoted(lua_State *L, luaL_Buffer *b, int arg) 713/* Add quoted string to buffer. */
714static void string_fmt_quoted(SBuf *sb, GCstr *str)
712{ 715{
713 GCstr *str = lj_lib_checkstr(L, arg);
714 int32_t len = (int32_t)str->len;
715 const char *s = strdata(str); 716 const char *s = strdata(str);
716 luaL_addchar(b, '"'); 717 MSize len = str->len;
718 lj_buf_putb(sb, '"');
717 while (len--) { 719 while (len--) {
718 uint32_t c = uchar(*s); 720 uint32_t c = (uint32_t)(uint8_t)*s++;
721 char *p = lj_buf_more(sb, 4);
719 if (c == '"' || c == '\\' || c == '\n') { 722 if (c == '"' || c == '\\' || c == '\n') {
720 luaL_addchar(b, '\\'); 723 *p++ = '\\';
721 } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */ 724 } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */
722 uint32_t d; 725 uint32_t d;
723 luaL_addchar(b, '\\'); 726 *p++ = '\\';
724 if (c >= 100 || lj_char_isdigit(uchar(s[1]))) { 727 if (c >= 100 || lj_char_isdigit((uint8_t)*s)) {
725 luaL_addchar(b, '0'+(c >= 100)); if (c >= 100) c -= 100; 728 *p++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100;
726 goto tens; 729 goto tens;
727 } else if (c >= 10) { 730 } else if (c >= 10) {
728 tens: 731 tens:
729 d = (c * 205) >> 11; c -= d * 10; luaL_addchar(b, '0'+d); 732 d = (c * 205) >> 11; c -= d * 10; *p++ = (char)('0'+d);
730 } 733 }
731 c += '0'; 734 c += '0';
732 } 735 }
733 luaL_addchar(b, c); 736 *p++ = (char)c;
734 s++; 737 setsbufP(sb, p);
735 } 738 }
736 luaL_addchar(b, '"'); 739 lj_buf_putb(sb, '"');
737} 740}
738 741
739static const char *scanformat(lua_State *L, const char *strfrmt, char *form) 742/* Scan format and generate format specifier. */
743static const char *string_fmt_scan(lua_State *L, char *spec, const char *fmt)
740{ 744{
741 const char *p = strfrmt; 745 const char *p = fmt;
742 while (*p != '\0' && strchr(FMT_FLAGS, *p) != NULL) p++; /* skip flags */ 746 while (*p && strchr(STRING_FMT_FLAGS, *p) != NULL) p++; /* Skip flags. */
743 if ((size_t)(p - strfrmt) >= sizeof(FMT_FLAGS)) 747 if ((size_t)(p - fmt) >= sizeof(STRING_FMT_FLAGS))
744 lj_err_caller(L, LJ_ERR_STRFMTR); 748 lj_err_caller(L, LJ_ERR_STRFMTR);
745 if (lj_char_isdigit(uchar(*p))) p++; /* skip width */ 749 if (lj_char_isdigit((uint8_t)*p)) p++; /* Skip max. 2 digits for width. */
746 if (lj_char_isdigit(uchar(*p))) p++; /* (2 digits at most) */ 750 if (lj_char_isdigit((uint8_t)*p)) p++;
747 if (*p == '.') { 751 if (*p == '.') {
748 p++; 752 p++;
749 if (lj_char_isdigit(uchar(*p))) p++; /* skip precision */ 753 if (lj_char_isdigit((uint8_t)*p)) p++; /* Skip max. 2 digits for prec. */
750 if (lj_char_isdigit(uchar(*p))) p++; /* (2 digits at most) */ 754 if (lj_char_isdigit((uint8_t)*p)) p++;
751 } 755 }
752 if (lj_char_isdigit(uchar(*p))) 756 if (lj_char_isdigit((uint8_t)*p))
753 lj_err_caller(L, LJ_ERR_STRFMTW); 757 lj_err_caller(L, LJ_ERR_STRFMTW);
754 *(form++) = '%'; 758 *spec++ = '%';
755 strncpy(form, strfrmt, (size_t)(p - strfrmt + 1)); 759 strncpy(spec, fmt, (size_t)(p - fmt + 1));
756 form += p - strfrmt + 1; 760 spec += p - fmt + 1;
757 *form = '\0'; 761 *spec = '\0';
758 return p; 762 return p;
759} 763}
760 764
761static void addintlen(char *form) 765/* Patch LUA_INTRFRMLEN into integer format specifier. */
766static void string_fmt_intfmt(char *spec)
762{ 767{
763 size_t l = strlen(form); 768 char c;
764 char spec = form[l - 1]; 769 do {
765 strcpy(form + l - 1, LUA_INTFRMLEN); 770 c = *spec++;
766 form[l + sizeof(LUA_INTFRMLEN) - 2] = spec; 771 } while (*spec);
767 form[l + sizeof(LUA_INTFRMLEN) - 1] = '\0'; 772 *--spec = (LUA_INTFRMLEN)[0];
773 if ((LUA_INTFRMLEN)[1]) *++spec = (LUA_INTFRMLEN)[1];
774 *++spec = c;
775 *++spec = '\0';
768} 776}
769 777
770static unsigned LUA_INTFRM_T num2intfrm(lua_State *L, int arg) 778/* Derive sprintf argument for integer format. Ugly. */
779static LUA_INTFRM_T string_fmt_intarg(lua_State *L, int arg)
771{ 780{
772 if (sizeof(LUA_INTFRM_T) == 4) { 781 if (sizeof(LUA_INTFRM_T) == 4) {
773 return (LUA_INTFRM_T)lj_lib_checkbit(L, arg); 782 return (LUA_INTFRM_T)lj_lib_checkbit(L, arg);
@@ -782,7 +791,8 @@ static unsigned LUA_INTFRM_T num2intfrm(lua_State *L, int arg)
782 } 791 }
783} 792}
784 793
785static unsigned LUA_INTFRM_T num2uintfrm(lua_State *L, int arg) 794/* Derive sprintf argument for unsigned integer format. Ugly. */
795static unsigned LUA_INTFRM_T string_fmt_uintarg(lua_State *L, int arg)
786{ 796{
787 if (sizeof(LUA_INTFRM_T) == 4) { 797 if (sizeof(LUA_INTFRM_T) == 4) {
788 return (unsigned LUA_INTFRM_T)lj_lib_checkbit(L, arg); 798 return (unsigned LUA_INTFRM_T)lj_lib_checkbit(L, arg);
@@ -799,7 +809,8 @@ static unsigned LUA_INTFRM_T num2uintfrm(lua_State *L, int arg)
799 } 809 }
800} 810}
801 811
802static GCstr *meta_tostring(lua_State *L, int arg) 812/* Emulate tostring() inline. */
813static GCstr *string_fmt_tostring(lua_State *L, int arg)
803{ 814{
804 TValue *o = L->base+arg-1; 815 TValue *o = L->base+arg-1;
805 cTValue *mo; 816 cTValue *mo;
@@ -837,33 +848,33 @@ static GCstr *meta_tostring(lua_State *L, int arg)
837LJLIB_CF(string_format) 848LJLIB_CF(string_format)
838{ 849{
839 int arg = 1, top = (int)(L->top - L->base); 850 int arg = 1, top = (int)(L->top - L->base);
840 GCstr *fmt = lj_lib_checkstr(L, arg); 851 GCstr *sfmt = lj_lib_checkstr(L, arg);
841 const char *strfrmt = strdata(fmt); 852 const char *fmt = strdata(sfmt);
842 const char *strfrmt_end = strfrmt + fmt->len; 853 const char *efmt = fmt + sfmt->len;
843 luaL_Buffer b; 854 SBuf *sb = &G(L)->tmpbuf;
844 luaL_buffinit(L, &b); 855 setmref(sb->L, L);
845 while (strfrmt < strfrmt_end) { 856 lj_buf_reset(sb);
846 if (*strfrmt != L_ESC) { 857 while (fmt < efmt) {
847 luaL_addchar(&b, *strfrmt++); 858 if (*fmt != L_ESC || *++fmt == L_ESC) {
848 } else if (*++strfrmt == L_ESC) { 859 lj_buf_putb(sb, *fmt++);
849 luaL_addchar(&b, *strfrmt++); /* %% */ 860 } else {
850 } else { /* format item */ 861 char buf[STRING_FMT_MAXBUF];
851 char form[MAX_FMTSPEC]; /* to store the format (`%...') */ 862 char spec[STRING_FMT_MAXSPEC];
852 char buff[MAX_FMTITEM]; /* to store the formatted item */ 863 MSize len = 0;
853 if (++arg > top) 864 if (++arg > top)
854 luaL_argerror(L, arg, lj_obj_typename[0]); 865 luaL_argerror(L, arg, lj_obj_typename[0]);
855 strfrmt = scanformat(L, strfrmt, form); 866 fmt = string_fmt_scan(L, spec, fmt);
856 switch (*strfrmt++) { 867 switch (*fmt++) {
857 case 'c': 868 case 'c':
858 sprintf(buff, form, lj_lib_checkint(L, arg)); 869 len = (MSize)sprintf(buf, spec, lj_lib_checkint(L, arg));
859 break; 870 break;
860 case 'd': case 'i': 871 case 'd': case 'i':
861 addintlen(form); 872 string_fmt_intfmt(spec);
862 sprintf(buff, form, num2intfrm(L, arg)); 873 len = (MSize)sprintf(buf, spec, string_fmt_intarg(L, arg));
863 break; 874 break;
864 case 'o': case 'u': case 'x': case 'X': 875 case 'o': case 'u': case 'x': case 'X':
865 addintlen(form); 876 string_fmt_intfmt(spec);
866 sprintf(buff, form, num2uintfrm(L, arg)); 877 len = (MSize)sprintf(buf, spec, string_fmt_uintarg(L, arg));
867 break; 878 break;
868 case 'e': case 'E': case 'f': case 'g': case 'G': case 'a': case 'A': { 879 case 'e': case 'E': case 'f': case 'g': case 'G': case 'a': case 'A': {
869 TValue tv; 880 TValue tv;
@@ -871,48 +882,45 @@ LJLIB_CF(string_format)
871 if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) { 882 if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) {
872 /* Canonicalize output of non-finite values. */ 883 /* Canonicalize output of non-finite values. */
873 char *p, nbuf[LJ_STR_NUMBUF]; 884 char *p, nbuf[LJ_STR_NUMBUF];
874 size_t len = lj_str_bufnum(nbuf, &tv); 885 MSize n = lj_str_bufnum(nbuf, &tv);
875 if (strfrmt[-1] < 'a') { 886 if (fmt[-1] < 'a') {
876 nbuf[len-3] = nbuf[len-3] - 0x20; 887 nbuf[n-3] = nbuf[n-3] - 0x20;
877 nbuf[len-2] = nbuf[len-2] - 0x20; 888 nbuf[n-2] = nbuf[n-2] - 0x20;
878 nbuf[len-1] = nbuf[len-1] - 0x20; 889 nbuf[n-1] = nbuf[n-1] - 0x20;
879 } 890 }
880 nbuf[len] = '\0'; 891 nbuf[n] = '\0';
881 for (p = form; *p < 'A' && *p != '.'; p++) ; 892 for (p = spec; *p < 'A' && *p != '.'; p++) ;
882 *p++ = 's'; *p = '\0'; 893 *p++ = 's'; *p = '\0';
883 sprintf(buff, form, nbuf); 894 len = (MSize)sprintf(buf, spec, nbuf);
884 break; 895 break;
885 } 896 }
886 sprintf(buff, form, (double)tv.n); 897 len = (MSize)sprintf(buf, spec, (double)tv.n);
887 break; 898 break;
888 } 899 }
889 case 'q': 900 case 'q':
890 addquoted(L, &b, arg); 901 string_fmt_quoted(sb, lj_lib_checkstr(L, arg));
891 continue; 902 continue;
892 case 'p': 903 case 'p':
893 lj_str_pushf(L, "%p", lua_topointer(L, arg)); 904 len = lj_str_bufptr(buf, lua_topointer(L, arg));
894 luaL_addvalue(&b); 905 break;
895 continue;
896 case 's': { 906 case 's': {
897 GCstr *str = meta_tostring(L, arg); 907 GCstr *str = string_fmt_tostring(L, arg);
898 if (!strchr(form, '.') && str->len >= 100) { 908 if (!strchr(spec, '.') && str->len >= 100) { /* Format overflow? */
899 /* no precision and string is too long to be formatted; 909 lj_buf_putmem(sb, strdata(str), str->len); /* Use orig string. */
900 keep original string */
901 setstrV(L, L->top++, str);
902 luaL_addvalue(&b);
903 continue; 910 continue;
904 } 911 }
905 sprintf(buff, form, strdata(str)); 912 len = (MSize)sprintf(buf, spec, strdata(str));
906 break; 913 break;
907 } 914 }
908 default: 915 default:
909 lj_err_callerv(L, LJ_ERR_STRFMTO, *(strfrmt -1)); 916 lj_err_callerv(L, LJ_ERR_STRFMTO, fmt[-1] ? fmt[-1] : ' ');
910 break; 917 break;
911 } 918 }
912 luaL_addlstring(&b, buff, strlen(buff)); 919 lj_buf_putmem(sb, buf, len);
913 } 920 }
914 } 921 }
915 luaL_pushresult(&b); 922 setstrV(L, L->top-1, lj_buf_str(L, sb));
923 lj_gc_check(L);
916 return 1; 924 return 1;
917} 925}
918 926
diff --git a/src/lib_table.c b/src/lib_table.c
index 8d53a6cd..5619f9ad 100644
--- a/src/lib_table.c
+++ b/src/lib_table.c
@@ -16,6 +16,8 @@
16#include "lj_obj.h" 16#include "lj_obj.h"
17#include "lj_gc.h" 17#include "lj_gc.h"
18#include "lj_err.h" 18#include "lj_err.h"
19#include "lj_buf.h"
20#include "lj_str.h"
19#include "lj_tab.h" 21#include "lj_tab.h"
20#include "lj_lib.h" 22#include "lj_lib.h"
21 23
@@ -23,50 +25,34 @@
23 25
24#define LJLIB_MODULE_table 26#define LJLIB_MODULE_table
25 27
26LJLIB_CF(table_foreachi) 28LJLIB_LUA(table_foreachi) /*
27{ 29 function(t, f)
28 GCtab *t = lj_lib_checktab(L, 1); 30 CHECK_tab(t)
29 GCfunc *func = lj_lib_checkfunc(L, 2); 31 CHECK_func(f)
30 MSize i, n = lj_tab_len(t); 32 for i=1,#t do
31 for (i = 1; i <= n; i++) { 33 local r = f(i, t[i])
32 cTValue *val; 34 if r ~= nil then return r end
33 setfuncV(L, L->top, func); 35 end
34 setintV(L->top+1, i); 36 end
35 val = lj_tab_getint(t, (int32_t)i); 37*/
36 if (val) { copyTV(L, L->top+2, val); } else { setnilV(L->top+2); }
37 L->top += 3;
38 lua_call(L, 2, 1);
39 if (!tvisnil(L->top-1))
40 return 1;
41 L->top--;
42 }
43 return 0;
44}
45 38
46LJLIB_CF(table_foreach) 39LJLIB_LUA(table_foreach) /*
47{ 40 function(t, f)
48 GCtab *t = lj_lib_checktab(L, 1); 41 CHECK_tab(t)
49 GCfunc *func = lj_lib_checkfunc(L, 2); 42 CHECK_func(f)
50 L->top = L->base+3; 43 for k, v in PAIRS(t) do
51 setnilV(L->top-1); 44 local r = f(k, v)
52 while (lj_tab_next(L, t, L->top-1)) { 45 if r ~= nil then return r end
53 copyTV(L, L->top+2, L->top); 46 end
54 copyTV(L, L->top+1, L->top-1); 47 end
55 setfuncV(L, L->top, func); 48*/
56 L->top += 3;
57 lua_call(L, 2, 1);
58 if (!tvisnil(L->top-1))
59 return 1;
60 L->top--;
61 }
62 return 0;
63}
64 49
65LJLIB_ASM(table_getn) LJLIB_REC(.) 50LJLIB_LUA(table_getn) /*
66{ 51 function(t)
67 lj_lib_checktab(L, 1); 52 CHECK_tab(t)
68 return FFH_UNREACHABLE; 53 return #t
69} 54 end
55*/
70 56
71LJLIB_CF(table_maxn) 57LJLIB_CF(table_maxn)
72{ 58{
@@ -119,52 +105,59 @@ LJLIB_CF(table_insert) LJLIB_REC(.)
119 return 0; 105 return 0;
120} 106}
121 107
122LJLIB_CF(table_remove) LJLIB_REC(.) 108LJLIB_LUA(table_remove) /*
123{ 109 function(t, pos)
124 GCtab *t = lj_lib_checktab(L, 1); 110 CHECK_tab(t)
125 int32_t e = (int32_t)lj_tab_len(t); 111 local len = #t
126 int32_t pos = lj_lib_optint(L, 2, e); 112 if pos == nil then
127 if (!(1 <= pos && pos <= e)) /* Nothing to remove? */ 113 if len ~= 0 then
128 return 0; 114 local old = t[len]
129 lua_rawgeti(L, 1, pos); /* Get previous value. */ 115 t[len] = nil
130 /* NOBARRIER: This just moves existing elements around. */ 116 return old
131 for (; pos < e; pos++) { 117 end
132 cTValue *src = lj_tab_getint(t, pos+1); 118 else
133 TValue *dst = lj_tab_setint(L, t, pos); 119 CHECK_int(pos)
134 if (src) { 120 if pos >= 1 and pos <= len then
135 copyTV(L, dst, src); 121 local old = t[pos]
136 } else { 122 for i=pos+1,len do
137 setnilV(dst); 123 t[i-1] = t[i]
138 } 124 end
139 } 125 t[len] = nil
140 setnilV(lj_tab_setint(L, t, e)); /* Remove (last) value. */ 126 return old
141 return 1; /* Return previous value. */ 127 end
142} 128 end
129 end
130*/
143 131
144LJLIB_CF(table_concat) 132LJLIB_CF(table_concat)
145{ 133{
146 luaL_Buffer b;
147 GCtab *t = lj_lib_checktab(L, 1); 134 GCtab *t = lj_lib_checktab(L, 1);
148 GCstr *sep = lj_lib_optstr(L, 2); 135 GCstr *sep = lj_lib_optstr(L, 2);
149 MSize seplen = sep ? sep->len : 0; 136 MSize seplen = sep ? sep->len : 0;
150 int32_t i = lj_lib_optint(L, 3, 1); 137 int32_t i = lj_lib_optint(L, 3, 1);
151 int32_t e = L->base+3 < L->top ? lj_lib_checkint(L, 4) : 138 int32_t e = L->base+3 < L->top ? lj_lib_checkint(L, 4) :
152 (int32_t)lj_tab_len(t); 139 (int32_t)lj_tab_len(t);
153 luaL_buffinit(L, &b);
154 if (i <= e) { 140 if (i <= e) {
141 char buf[LJ_STR_NUMBERBUF];
142 SBuf *sb = &G(L)->tmpbuf;
143 setsbufL(sb, L);
144 lj_buf_reset(sb);
155 for (;;) { 145 for (;;) {
156 cTValue *o; 146 cTValue *o = lj_tab_getint(t, i);
157 lua_rawgeti(L, 1, i); 147 MSize len;
158 o = L->top-1; 148 const char *p = lj_str_buftv(buf, o, &len);
159 if (!(tvisstr(o) || tvisnumber(o))) 149 if (!p)
160 lj_err_callerv(L, LJ_ERR_TABCAT, lj_typename(o), i); 150 lj_err_callerv(L, LJ_ERR_TABCAT, lj_typename(o), i);
161 luaL_addvalue(&b); 151 lj_buf_putmem(sb, p, len);
162 if (i++ == e) break; 152 if (i++ == e) break;
163 if (seplen) 153 if (seplen)
164 luaL_addlstring(&b, strdata(sep), seplen); 154 lj_buf_putmem(sb, strdata(sep), seplen);
165 } 155 }
156 setstrV(L, L->top-1, lj_buf_str(L, sb));
157 lj_gc_check(L);
158 } else {
159 setstrV(L, L->top-1, &G(L)->strempty);
166 } 160 }
167 luaL_pushresult(&b);
168 return 1; 161 return 1;
169} 162}
170 163
diff --git a/src/lj_arch.h b/src/lj_arch.h
index 9ea10d0f..c5f2fb3d 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -227,6 +227,7 @@
227 227
228#elif LUAJIT_TARGET == LUAJIT_ARCH_PPCSPE 228#elif LUAJIT_TARGET == LUAJIT_ARCH_PPCSPE
229 229
230#error "The PPC/e500 port is broken and will be abandoned with LuaJIT 2.1"
230#define LJ_ARCH_NAME "ppcspe" 231#define LJ_ARCH_NAME "ppcspe"
231#define LJ_ARCH_BITS 32 232#define LJ_ARCH_BITS 32
232#define LJ_ARCH_ENDIAN LUAJIT_BE 233#define LJ_ARCH_ENDIAN LUAJIT_BE
diff --git a/src/lj_asm.c b/src/lj_asm.c
index c7365404..a01b4e52 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -1730,7 +1730,7 @@ static void asm_setup_regsp(ASMState *as)
1730 break; 1730 break;
1731 case IR_FPMATH: 1731 case IR_FPMATH:
1732#if LJ_TARGET_X86ORX64 1732#if LJ_TARGET_X86ORX64
1733 if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */ 1733 if (ir->op2 == IRFPM_EXP2) { /* May be joined to pow. */
1734 ir->prev = REGSP_HINT(RID_XMM0); 1734 ir->prev = REGSP_HINT(RID_XMM0);
1735#if !LJ_64 1735#if !LJ_64
1736 if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */ 1736 if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */
diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
index e80f7582..b41e3dfe 100644
--- a/src/lj_asm_mips.h
+++ b/src/lj_asm_mips.h
@@ -1001,7 +1001,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
1001 if (irt_isint(t)) { 1001 if (irt_isint(t)) {
1002 Reg tmp = ra_scratch(as, RSET_FPR); 1002 Reg tmp = ra_scratch(as, RSET_FPR);
1003 emit_tg(as, MIPSI_MFC1, dest, tmp); 1003 emit_tg(as, MIPSI_MFC1, dest, tmp);
1004 emit_fg(as, MIPSI_CVT_W_D, tmp, tmp); 1004 emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp);
1005 dest = tmp; 1005 dest = tmp;
1006 t.irt = IRT_NUM; /* Check for original type. */ 1006 t.irt = IRT_NUM; /* Check for original type. */
1007 } else { 1007 } else {
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index e9c53a09..9dba6b70 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -551,7 +551,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
551 if (ra_hasreg(dest)) { 551 if (ra_hasreg(dest)) {
552 ra_free(as, dest); 552 ra_free(as, dest);
553 ra_modified(as, dest); 553 ra_modified(as, dest);
554 emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, 554 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS,
555 dest, RID_ESP, ofs); 555 dest, RID_ESP, ofs);
556 } 556 }
557 if ((ci->flags & CCI_CASTU64)) { 557 if ((ci->flags & CCI_CASTU64)) {
@@ -662,8 +662,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
662 asm_guardcc(as, CC_NE); 662 asm_guardcc(as, CC_NE);
663 emit_rr(as, XO_UCOMISD, left, tmp); 663 emit_rr(as, XO_UCOMISD, left, tmp);
664 emit_rr(as, XO_CVTSI2SD, tmp, dest); 664 emit_rr(as, XO_CVTSI2SD, tmp, dest);
665 if (!(as->flags & JIT_F_SPLIT_XMM)) 665 emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
666 emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
667 emit_rr(as, XO_CVTTSD2SI, dest, left); 666 emit_rr(as, XO_CVTTSD2SI, dest, left);
668 /* Can't fuse since left is needed twice. */ 667 /* Can't fuse since left is needed twice. */
669} 668}
@@ -719,8 +718,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
719 emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS, 718 emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS,
720 dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left); 719 dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left);
721 } 720 }
722 if (!(as->flags & JIT_F_SPLIT_XMM)) 721 emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */
723 emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */
724 } else if (stfp) { /* FP to integer conversion. */ 722 } else if (stfp) { /* FP to integer conversion. */
725 if (irt_isguard(ir->t)) { 723 if (irt_isguard(ir->t)) {
726 /* Checked conversions are only supported from number to int. */ 724 /* Checked conversions are only supported from number to int. */
@@ -728,9 +726,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
728 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); 726 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
729 } else { 727 } else {
730 Reg dest = ra_dest(as, ir, RSET_GPR); 728 Reg dest = ra_dest(as, ir, RSET_GPR);
731 x86Op op = st == IRT_NUM ? 729 x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI;
732 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) :
733 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI);
734 if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) { 730 if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) {
735 /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */ 731 /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */
736 /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */ 732 /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */
@@ -824,8 +820,7 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir)
824 if (ra_hasreg(dest)) { 820 if (ra_hasreg(dest)) {
825 ra_free(as, dest); 821 ra_free(as, dest);
826 ra_modified(as, dest); 822 ra_modified(as, dest);
827 emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, 823 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, dest, RID_ESP, ofs);
828 dest, RID_ESP, ofs);
829 } 824 }
830 emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, 825 emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd,
831 irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); 826 irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs);
@@ -853,7 +848,6 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
853 Reg lo, hi; 848 Reg lo, hi;
854 lua_assert(st == IRT_NUM || st == IRT_FLOAT); 849 lua_assert(st == IRT_NUM || st == IRT_FLOAT);
855 lua_assert(dt == IRT_I64 || dt == IRT_U64); 850 lua_assert(dt == IRT_I64 || dt == IRT_U64);
856 lua_assert(((ir-1)->op2 & IRCONV_TRUNC));
857 hi = ra_dest(as, ir, RSET_GPR); 851 hi = ra_dest(as, ir, RSET_GPR);
858 lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi)); 852 lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi));
859 if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0); 853 if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0);
@@ -1262,7 +1256,7 @@ static void asm_fxload(ASMState *as, IRIns *ir)
1262 case IRT_U8: xo = XO_MOVZXb; break; 1256 case IRT_U8: xo = XO_MOVZXb; break;
1263 case IRT_I16: xo = XO_MOVSXw; break; 1257 case IRT_I16: xo = XO_MOVSXw; break;
1264 case IRT_U16: xo = XO_MOVZXw; break; 1258 case IRT_U16: xo = XO_MOVZXw; break;
1265 case IRT_NUM: xo = XMM_MOVRM(as); break; 1259 case IRT_NUM: xo = XO_MOVSD; break;
1266 case IRT_FLOAT: xo = XO_MOVSS; break; 1260 case IRT_FLOAT: xo = XO_MOVSS; break;
1267 default: 1261 default:
1268 if (LJ_64 && irt_is64(ir->t)) 1262 if (LJ_64 && irt_is64(ir->t))
@@ -1376,7 +1370,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
1376 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; 1370 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
1377 Reg dest = ra_dest(as, ir, allow); 1371 Reg dest = ra_dest(as, ir, allow);
1378 asm_fuseahuref(as, ir->op1, RSET_GPR); 1372 asm_fuseahuref(as, ir->op1, RSET_GPR);
1379 emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM); 1373 emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM);
1380 } else { 1374 } else {
1381 asm_fuseahuref(as, ir->op1, RSET_GPR); 1375 asm_fuseahuref(as, ir->op1, RSET_GPR);
1382 } 1376 }
@@ -1442,7 +1436,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
1442 Reg left = ra_scratch(as, RSET_FPR); 1436 Reg left = ra_scratch(as, RSET_FPR);
1443 asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ 1437 asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */
1444 base = ra_alloc1(as, REF_BASE, RSET_GPR); 1438 base = ra_alloc1(as, REF_BASE, RSET_GPR);
1445 emit_rmro(as, XMM_MOVRM(as), left, base, ofs); 1439 emit_rmro(as, XO_MOVSD, left, base, ofs);
1446 t.irt = IRT_NUM; /* Continue with a regular number type check. */ 1440 t.irt = IRT_NUM; /* Continue with a regular number type check. */
1447#if LJ_64 1441#if LJ_64
1448 } else if (irt_islightud(t)) { 1442 } else if (irt_islightud(t)) {
@@ -1460,11 +1454,9 @@ static void asm_sload(ASMState *as, IRIns *ir)
1460 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); 1454 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
1461 if ((ir->op2 & IRSLOAD_CONVERT)) { 1455 if ((ir->op2 & IRSLOAD_CONVERT)) {
1462 t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ 1456 t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */
1463 emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs); 1457 emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs);
1464 } else if (irt_isnum(t)) {
1465 emit_rmro(as, XMM_MOVRM(as), dest, base, ofs);
1466 } else { 1458 } else {
1467 emit_rmro(as, XO_MOV, dest, base, ofs); 1459 emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs);
1468 } 1460 }
1469 } else { 1461 } else {
1470 if (!(ir->op2 & IRSLOAD_TYPECHECK)) 1462 if (!(ir->op2 & IRSLOAD_TYPECHECK))
@@ -1696,7 +1688,7 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
1696 if (ra_hasreg(dest)) { 1688 if (ra_hasreg(dest)) {
1697 ra_free(as, dest); 1689 ra_free(as, dest);
1698 ra_modified(as, dest); 1690 ra_modified(as, dest);
1699 emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs); 1691 emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs);
1700 } 1692 }
1701 emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); 1693 emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
1702 switch (fpm) { /* st0 = lj_vm_*(st0) */ 1694 switch (fpm) { /* st0 = lj_vm_*(st0) */
diff --git a/src/lj_bc.h b/src/lj_bc.h
index 56e71dd9..ac9cc5e1 100644
--- a/src/lj_bc.h
+++ b/src/lj_bc.h
@@ -89,6 +89,8 @@
89 _(ISFC, dst, ___, var, ___) \ 89 _(ISFC, dst, ___, var, ___) \
90 _(IST, ___, ___, var, ___) \ 90 _(IST, ___, ___, var, ___) \
91 _(ISF, ___, ___, var, ___) \ 91 _(ISF, ___, ___, var, ___) \
92 _(ISTYPE, var, ___, lit, ___) \
93 _(ISNUM, var, ___, lit, ___) \
92 \ 94 \
93 /* Unary ops. */ \ 95 /* Unary ops. */ \
94 _(MOV, dst, ___, var, ___) \ 96 _(MOV, dst, ___, var, ___) \
@@ -143,10 +145,12 @@
143 _(TGETV, dst, var, var, index) \ 145 _(TGETV, dst, var, var, index) \
144 _(TGETS, dst, var, str, index) \ 146 _(TGETS, dst, var, str, index) \
145 _(TGETB, dst, var, lit, index) \ 147 _(TGETB, dst, var, lit, index) \
148 _(TGETR, dst, var, var, index) \
146 _(TSETV, var, var, var, newindex) \ 149 _(TSETV, var, var, var, newindex) \
147 _(TSETS, var, var, str, newindex) \ 150 _(TSETS, var, var, str, newindex) \
148 _(TSETB, var, var, lit, newindex) \ 151 _(TSETB, var, var, lit, newindex) \
149 _(TSETM, base, ___, num, newindex) \ 152 _(TSETM, base, ___, num, newindex) \
153 _(TSETR, var, var, var, newindex) \
150 \ 154 \
151 /* Calls and vararg handling. T = tail call. */ \ 155 /* Calls and vararg handling. T = tail call. */ \
152 _(CALLM, base, lit, lit, call) \ 156 _(CALLM, base, lit, lit, call) \
diff --git a/src/lj_bcdump.h b/src/lj_bcdump.h
index e660156d..22a8b823 100644
--- a/src/lj_bcdump.h
+++ b/src/lj_bcdump.h
@@ -36,7 +36,7 @@
36/* If you perform *any* kind of private modifications to the bytecode itself 36/* If you perform *any* kind of private modifications to the bytecode itself
37** or to the dump format, you *must* set BCDUMP_VERSION to 0x80 or higher. 37** or to the dump format, you *must* set BCDUMP_VERSION to 0x80 or higher.
38*/ 38*/
39#define BCDUMP_VERSION 1 39#define BCDUMP_VERSION 2
40 40
41/* Compatibility flags. */ 41/* Compatibility flags. */
42#define BCDUMP_F_BE 0x01 42#define BCDUMP_F_BE 0x01
@@ -61,6 +61,7 @@ enum {
61 61
62LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, 62LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer,
63 void *data, int strip); 63 void *data, int strip);
64LJ_FUNC GCproto *lj_bcread_proto(LexState *ls);
64LJ_FUNC GCproto *lj_bcread(LexState *ls); 65LJ_FUNC GCproto *lj_bcread(LexState *ls);
65 66
66#endif 67#endif
diff --git a/src/lj_bcread.c b/src/lj_bcread.c
index 2b5ba855..7bb16a60 100644
--- a/src/lj_bcread.c
+++ b/src/lj_bcread.c
@@ -9,6 +9,7 @@
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_buf.h"
12#include "lj_str.h" 13#include "lj_str.h"
13#include "lj_tab.h" 14#include "lj_tab.h"
14#include "lj_bc.h" 15#include "lj_bc.h"
@@ -42,80 +43,69 @@ static LJ_NOINLINE void bcread_error(LexState *ls, ErrMsg em)
42 lj_err_throw(L, LUA_ERRSYNTAX); 43 lj_err_throw(L, LUA_ERRSYNTAX);
43} 44}
44 45
45/* Resize input buffer. */ 46/* Refill buffer. */
46static void bcread_resize(LexState *ls, MSize len)
47{
48 if (ls->sb.sz < len) {
49 MSize sz = ls->sb.sz * 2;
50 while (len > sz) sz = sz * 2;
51 lj_str_resizebuf(ls->L, &ls->sb, sz);
52 /* Caveat: this may change ls->sb.buf which may affect ls->p. */
53 }
54}
55
56/* Refill buffer if needed. */
57static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need) 47static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
58{ 48{
59 lua_assert(len != 0); 49 lua_assert(len != 0);
60 if (len > LJ_MAX_MEM || ls->current < 0) 50 if (len > LJ_MAX_MEM || ls->c < 0)
61 bcread_error(ls, LJ_ERR_BCBAD); 51 bcread_error(ls, LJ_ERR_BCBAD);
62 do { 52 do {
63 const char *buf; 53 const char *buf;
64 size_t size; 54 size_t sz;
65 if (ls->n) { /* Copy remainder to buffer. */ 55 char *p = sbufB(&ls->sb);
66 if (ls->sb.n) { /* Move down in buffer. */ 56 MSize n = (MSize)(ls->pe - ls->p);
67 lua_assert(ls->p + ls->n == ls->sb.buf + ls->sb.n); 57 if (n) { /* Copy remainder to buffer. */
68 if (ls->n != ls->sb.n) 58 if (sbuflen(&ls->sb)) { /* Move down in buffer. */
69 memmove(ls->sb.buf, ls->p, ls->n); 59 lua_assert(ls->pe == sbufP(&ls->sb));
60 if (ls->p != p) memmove(p, ls->p, n);
70 } else { /* Copy from buffer provided by reader. */ 61 } else { /* Copy from buffer provided by reader. */
71 bcread_resize(ls, len); 62 p = lj_buf_need(&ls->sb, len);
72 memcpy(ls->sb.buf, ls->p, ls->n); 63 memcpy(p, ls->p, n);
73 } 64 }
74 ls->p = ls->sb.buf; 65 ls->p = p;
66 ls->pe = p + n;
75 } 67 }
76 ls->sb.n = ls->n; 68 setsbufP(&ls->sb, p + n);
77 buf = ls->rfunc(ls->L, ls->rdata, &size); /* Get more data from reader. */ 69 buf = ls->rfunc(ls->L, ls->rdata, &sz); /* Get more data from reader. */
78 if (buf == NULL || size == 0) { /* EOF? */ 70 if (buf == NULL || sz == 0) { /* EOF? */
79 if (need) bcread_error(ls, LJ_ERR_BCBAD); 71 if (need) bcread_error(ls, LJ_ERR_BCBAD);
80 ls->current = -1; /* Only bad if we get called again. */ 72 ls->c = -1; /* Only bad if we get called again. */
81 break; 73 break;
82 } 74 }
83 if (ls->sb.n) { /* Append to buffer. */ 75 if (n) { /* Append to buffer. */
84 MSize n = ls->sb.n + (MSize)size; 76 n += (MSize)sz;
85 bcread_resize(ls, n < len ? len : n); 77 p = lj_buf_need(&ls->sb, n < len ? len : n);
86 memcpy(ls->sb.buf + ls->sb.n, buf, size); 78 memcpy(sbufP(&ls->sb), buf, sz);
87 ls->n = ls->sb.n = n; 79 setsbufP(&ls->sb, p + n);
88 ls->p = ls->sb.buf; 80 ls->p = p;
81 ls->pe = p + n;
89 } else { /* Return buffer provided by reader. */ 82 } else { /* Return buffer provided by reader. */
90 ls->n = (MSize)size;
91 ls->p = buf; 83 ls->p = buf;
84 ls->pe = buf + sz;
92 } 85 }
93 } while (ls->n < len); 86 } while (ls->p + len > ls->pe);
94} 87}
95 88
96/* Need a certain number of bytes. */ 89/* Need a certain number of bytes. */
97static LJ_AINLINE void bcread_need(LexState *ls, MSize len) 90static LJ_AINLINE void bcread_need(LexState *ls, MSize len)
98{ 91{
99 if (LJ_UNLIKELY(ls->n < len)) 92 if (LJ_UNLIKELY(ls->p + len > ls->pe))
100 bcread_fill(ls, len, 1); 93 bcread_fill(ls, len, 1);
101} 94}
102 95
103/* Want to read up to a certain number of bytes, but may need less. */ 96/* Want to read up to a certain number of bytes, but may need less. */
104static LJ_AINLINE void bcread_want(LexState *ls, MSize len) 97static LJ_AINLINE void bcread_want(LexState *ls, MSize len)
105{ 98{
106 if (LJ_UNLIKELY(ls->n < len)) 99 if (LJ_UNLIKELY(ls->p + len > ls->pe))
107 bcread_fill(ls, len, 0); 100 bcread_fill(ls, len, 0);
108} 101}
109 102
110#define bcread_dec(ls) check_exp(ls->n > 0, ls->n--)
111#define bcread_consume(ls, len) check_exp(ls->n >= (len), ls->n -= (len))
112
113/* Return memory block from buffer. */ 103/* Return memory block from buffer. */
114static uint8_t *bcread_mem(LexState *ls, MSize len) 104static LJ_AINLINE uint8_t *bcread_mem(LexState *ls, MSize len)
115{ 105{
116 uint8_t *p = (uint8_t *)ls->p; 106 uint8_t *p = (uint8_t *)ls->p;
117 bcread_consume(ls, len); 107 ls->p += len;
118 ls->p = (char *)p + len; 108 lua_assert(ls->p <= ls->pe);
119 return p; 109 return p;
120} 110}
121 111
@@ -128,25 +118,15 @@ static void bcread_block(LexState *ls, void *q, MSize len)
128/* Read byte from buffer. */ 118/* Read byte from buffer. */
129static LJ_AINLINE uint32_t bcread_byte(LexState *ls) 119static LJ_AINLINE uint32_t bcread_byte(LexState *ls)
130{ 120{
131 bcread_dec(ls); 121 lua_assert(ls->p < ls->pe);
132 return (uint32_t)(uint8_t)*ls->p++; 122 return (uint32_t)(uint8_t)*ls->p++;
133} 123}
134 124
135/* Read ULEB128 value from buffer. */ 125/* Read ULEB128 value from buffer. */
136static uint32_t bcread_uleb128(LexState *ls) 126static LJ_AINLINE uint32_t bcread_uleb128(LexState *ls)
137{ 127{
138 const uint8_t *p = (const uint8_t *)ls->p; 128 uint32_t v = lj_buf_ruleb128(&ls->p);
139 uint32_t v = *p++; 129 lua_assert(ls->p <= ls->pe);
140 if (LJ_UNLIKELY(v >= 0x80)) {
141 int sh = 0;
142 v &= 0x7f;
143 do {
144 v |= ((*p & 0x7f) << (sh += 7));
145 bcread_dec(ls);
146 } while (*p++ >= 0x80);
147 }
148 bcread_dec(ls);
149 ls->p = (char *)p;
150 return v; 130 return v;
151} 131}
152 132
@@ -160,11 +140,10 @@ static uint32_t bcread_uleb128_33(LexState *ls)
160 v &= 0x3f; 140 v &= 0x3f;
161 do { 141 do {
162 v |= ((*p & 0x7f) << (sh += 7)); 142 v |= ((*p & 0x7f) << (sh += 7));
163 bcread_dec(ls);
164 } while (*p++ >= 0x80); 143 } while (*p++ >= 0x80);
165 } 144 }
166 bcread_dec(ls);
167 ls->p = (char *)p; 145 ls->p = (char *)p;
146 lua_assert(ls->p <= ls->pe);
168 return v; 147 return v;
169} 148}
170 149
@@ -326,25 +305,13 @@ static void bcread_uv(LexState *ls, GCproto *pt, MSize sizeuv)
326} 305}
327 306
328/* Read a prototype. */ 307/* Read a prototype. */
329static GCproto *bcread_proto(LexState *ls) 308GCproto *lj_bcread_proto(LexState *ls)
330{ 309{
331 GCproto *pt; 310 GCproto *pt;
332 MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept; 311 MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept;
333 MSize ofsk, ofsuv, ofsdbg; 312 MSize ofsk, ofsuv, ofsdbg;
334 MSize sizedbg = 0; 313 MSize sizedbg = 0;
335 BCLine firstline = 0, numline = 0; 314 BCLine firstline = 0, numline = 0;
336 MSize len, startn;
337
338 /* Read length. */
339 if (ls->n > 0 && ls->p[0] == 0) { /* Shortcut EOF. */
340 ls->n--; ls->p++;
341 return NULL;
342 }
343 bcread_want(ls, 5);
344 len = bcread_uleb128(ls);
345 if (!len) return NULL; /* EOF */
346 bcread_need(ls, len);
347 startn = ls->n;
348 315
349 /* Read prototype header. */ 316 /* Read prototype header. */
350 flags = bcread_byte(ls); 317 flags = bcread_byte(ls);
@@ -413,9 +380,6 @@ static GCproto *bcread_proto(LexState *ls)
413 setmref(pt->uvinfo, NULL); 380 setmref(pt->uvinfo, NULL);
414 setmref(pt->varinfo, NULL); 381 setmref(pt->varinfo, NULL);
415 } 382 }
416
417 if (len != startn - ls->n)
418 bcread_error(ls, LJ_ERR_BCBAD);
419 return pt; 383 return pt;
420} 384}
421 385
@@ -455,19 +419,33 @@ static int bcread_header(LexState *ls)
455GCproto *lj_bcread(LexState *ls) 419GCproto *lj_bcread(LexState *ls)
456{ 420{
457 lua_State *L = ls->L; 421 lua_State *L = ls->L;
458 lua_assert(ls->current == BCDUMP_HEAD1); 422 lua_assert(ls->c == BCDUMP_HEAD1);
459 bcread_savetop(L, ls, L->top); 423 bcread_savetop(L, ls, L->top);
460 lj_str_resetbuf(&ls->sb); 424 lj_buf_reset(&ls->sb);
461 /* Check for a valid bytecode dump header. */ 425 /* Check for a valid bytecode dump header. */
462 if (!bcread_header(ls)) 426 if (!bcread_header(ls))
463 bcread_error(ls, LJ_ERR_BCFMT); 427 bcread_error(ls, LJ_ERR_BCFMT);
464 for (;;) { /* Process all prototypes in the bytecode dump. */ 428 for (;;) { /* Process all prototypes in the bytecode dump. */
465 GCproto *pt = bcread_proto(ls); 429 GCproto *pt;
466 if (!pt) break; 430 MSize len;
431 const char *startp;
432 /* Read length. */
433 if (ls->p < ls->pe && ls->p[0] == 0) { /* Shortcut EOF. */
434 ls->p++;
435 break;
436 }
437 bcread_want(ls, 5);
438 len = bcread_uleb128(ls);
439 if (!len) break; /* EOF */
440 bcread_need(ls, len);
441 startp = ls->p;
442 pt = lj_bcread_proto(ls);
443 if (ls->p != startp + len)
444 bcread_error(ls, LJ_ERR_BCBAD);
467 setprotoV(L, L->top, pt); 445 setprotoV(L, L->top, pt);
468 incr_top(L); 446 incr_top(L);
469 } 447 }
470 if ((int32_t)ls->n > 0 || L->top-1 != bcread_oldtop(L, ls)) 448 if (ls->p < ls->pe || L->top-1 != bcread_oldtop(L, ls))
471 bcread_error(ls, LJ_ERR_BCBAD); 449 bcread_error(ls, LJ_ERR_BCBAD);
472 /* Pop off last prototype. */ 450 /* Pop off last prototype. */
473 L->top--; 451 L->top--;
diff --git a/src/lj_bcwrite.c b/src/lj_bcwrite.c
index 4805d515..b3289a13 100644
--- a/src/lj_bcwrite.c
+++ b/src/lj_bcwrite.c
@@ -8,6 +8,7 @@
8 8
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_buf.h"
11#include "lj_str.h" 12#include "lj_str.h"
12#include "lj_bc.h" 13#include "lj_bc.h"
13#if LJ_HASFFI 14#if LJ_HASFFI
@@ -23,7 +24,6 @@
23/* Context for bytecode writer. */ 24/* Context for bytecode writer. */
24typedef struct BCWriteCtx { 25typedef struct BCWriteCtx {
25 SBuf sb; /* Output buffer. */ 26 SBuf sb; /* Output buffer. */
26 lua_State *L; /* Lua state. */
27 GCproto *pt; /* Root prototype. */ 27 GCproto *pt; /* Root prototype. */
28 lua_Writer wfunc; /* Writer callback. */ 28 lua_Writer wfunc; /* Writer callback. */
29 void *wdata; /* Writer callback data. */ 29 void *wdata; /* Writer callback data. */
@@ -31,85 +31,44 @@ typedef struct BCWriteCtx {
31 int status; /* Status from writer callback. */ 31 int status; /* Status from writer callback. */
32} BCWriteCtx; 32} BCWriteCtx;
33 33
34/* -- Output buffer handling ---------------------------------------------- */
35
36/* Resize buffer if needed. */
37static LJ_NOINLINE void bcwrite_resize(BCWriteCtx *ctx, MSize len)
38{
39 MSize sz = ctx->sb.sz * 2;
40 while (ctx->sb.n + len > sz) sz = sz * 2;
41 lj_str_resizebuf(ctx->L, &ctx->sb, sz);
42}
43
44/* Need a certain amount of buffer space. */
45static LJ_AINLINE void bcwrite_need(BCWriteCtx *ctx, MSize len)
46{
47 if (LJ_UNLIKELY(ctx->sb.n + len > ctx->sb.sz))
48 bcwrite_resize(ctx, len);
49}
50
51/* Add memory block to buffer. */
52static void bcwrite_block(BCWriteCtx *ctx, const void *p, MSize len)
53{
54 uint8_t *q = (uint8_t *)(ctx->sb.buf + ctx->sb.n);
55 MSize i;
56 ctx->sb.n += len;
57 for (i = 0; i < len; i++) q[i] = ((uint8_t *)p)[i];
58}
59
60/* Add byte to buffer. */
61static LJ_AINLINE void bcwrite_byte(BCWriteCtx *ctx, uint8_t b)
62{
63 ctx->sb.buf[ctx->sb.n++] = b;
64}
65
66/* Add ULEB128 value to buffer. */
67static void bcwrite_uleb128(BCWriteCtx *ctx, uint32_t v)
68{
69 MSize n = ctx->sb.n;
70 uint8_t *p = (uint8_t *)ctx->sb.buf;
71 for (; v >= 0x80; v >>= 7)
72 p[n++] = (uint8_t)((v & 0x7f) | 0x80);
73 p[n++] = (uint8_t)v;
74 ctx->sb.n = n;
75}
76
77/* -- Bytecode writer ----------------------------------------------------- */ 34/* -- Bytecode writer ----------------------------------------------------- */
78 35
79/* Write a single constant key/value of a template table. */ 36/* Write a single constant key/value of a template table. */
80static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow) 37static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow)
81{ 38{
82 bcwrite_need(ctx, 1+10); 39 char *p = lj_buf_more(&ctx->sb, 1+10);
83 if (tvisstr(o)) { 40 if (tvisstr(o)) {
84 const GCstr *str = strV(o); 41 const GCstr *str = strV(o);
85 MSize len = str->len; 42 MSize len = str->len;
86 bcwrite_need(ctx, 5+len); 43 p = lj_buf_more(&ctx->sb, 5+len);
87 bcwrite_uleb128(ctx, BCDUMP_KTAB_STR+len); 44 p = lj_buf_wuleb128(p, BCDUMP_KTAB_STR+len);
88 bcwrite_block(ctx, strdata(str), len); 45 p = lj_buf_wmem(p, strdata(str), len);
89 } else if (tvisint(o)) { 46 } else if (tvisint(o)) {
90 bcwrite_byte(ctx, BCDUMP_KTAB_INT); 47 *p++ = BCDUMP_KTAB_INT;
91 bcwrite_uleb128(ctx, intV(o)); 48 p = lj_buf_wuleb128(p, intV(o));
92 } else if (tvisnum(o)) { 49 } else if (tvisnum(o)) {
93 if (!LJ_DUALNUM && narrow) { /* Narrow number constants to integers. */ 50 if (!LJ_DUALNUM && narrow) { /* Narrow number constants to integers. */
94 lua_Number num = numV(o); 51 lua_Number num = numV(o);
95 int32_t k = lj_num2int(num); 52 int32_t k = lj_num2int(num);
96 if (num == (lua_Number)k) { /* -0 is never a constant. */ 53 if (num == (lua_Number)k) { /* -0 is never a constant. */
97 bcwrite_byte(ctx, BCDUMP_KTAB_INT); 54 *p++ = BCDUMP_KTAB_INT;
98 bcwrite_uleb128(ctx, k); 55 p = lj_buf_wuleb128(p, k);
56 setsbufP(&ctx->sb, p);
99 return; 57 return;
100 } 58 }
101 } 59 }
102 bcwrite_byte(ctx, BCDUMP_KTAB_NUM); 60 *p++ = BCDUMP_KTAB_NUM;
103 bcwrite_uleb128(ctx, o->u32.lo); 61 p = lj_buf_wuleb128(p, o->u32.lo);
104 bcwrite_uleb128(ctx, o->u32.hi); 62 p = lj_buf_wuleb128(p, o->u32.hi);
105 } else { 63 } else {
106 lua_assert(tvispri(o)); 64 lua_assert(tvispri(o));
107 bcwrite_byte(ctx, BCDUMP_KTAB_NIL+~itype(o)); 65 *p++ = BCDUMP_KTAB_NIL+~itype(o);
108 } 66 }
67 setsbufP(&ctx->sb, p);
109} 68}
110 69
111/* Write a template table. */ 70/* Write a template table. */
112static void bcwrite_ktab(BCWriteCtx *ctx, const GCtab *t) 71static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t)
113{ 72{
114 MSize narray = 0, nhash = 0; 73 MSize narray = 0, nhash = 0;
115 if (t->asize > 0) { /* Determine max. length of array part. */ 74 if (t->asize > 0) { /* Determine max. length of array part. */
@@ -127,8 +86,9 @@ static void bcwrite_ktab(BCWriteCtx *ctx, const GCtab *t)
127 nhash += !tvisnil(&node[i].val); 86 nhash += !tvisnil(&node[i].val);
128 } 87 }
129 /* Write number of array slots and hash slots. */ 88 /* Write number of array slots and hash slots. */
130 bcwrite_uleb128(ctx, narray); 89 p = lj_buf_wuleb128(p, narray);
131 bcwrite_uleb128(ctx, nhash); 90 p = lj_buf_wuleb128(p, nhash);
91 setsbufP(&ctx->sb, p);
132 if (narray) { /* Write array entries (may contain nil). */ 92 if (narray) { /* Write array entries (may contain nil). */
133 MSize i; 93 MSize i;
134 TValue *o = tvref(t->array); 94 TValue *o = tvref(t->array);
@@ -155,6 +115,7 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt)
155 for (i = 0; i < sizekgc; i++, kr++) { 115 for (i = 0; i < sizekgc; i++, kr++) {
156 GCobj *o = gcref(*kr); 116 GCobj *o = gcref(*kr);
157 MSize tp, need = 1; 117 MSize tp, need = 1;
118 char *p;
158 /* Determine constant type and needed size. */ 119 /* Determine constant type and needed size. */
159 if (o->gch.gct == ~LJ_TSTR) { 120 if (o->gch.gct == ~LJ_TSTR) {
160 tp = BCDUMP_KGC_STR + gco2str(o)->len; 121 tp = BCDUMP_KGC_STR + gco2str(o)->len;
@@ -181,24 +142,26 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt)
181 need = 1+2*5; 142 need = 1+2*5;
182 } 143 }
183 /* Write constant type. */ 144 /* Write constant type. */
184 bcwrite_need(ctx, need); 145 p = lj_buf_more(&ctx->sb, need);
185 bcwrite_uleb128(ctx, tp); 146 p = lj_buf_wuleb128(p, tp);
186 /* Write constant data (if any). */ 147 /* Write constant data (if any). */
187 if (tp >= BCDUMP_KGC_STR) { 148 if (tp >= BCDUMP_KGC_STR) {
188 bcwrite_block(ctx, strdata(gco2str(o)), gco2str(o)->len); 149 p = lj_buf_wmem(p, strdata(gco2str(o)), gco2str(o)->len);
189 } else if (tp == BCDUMP_KGC_TAB) { 150 } else if (tp == BCDUMP_KGC_TAB) {
190 bcwrite_ktab(ctx, gco2tab(o)); 151 bcwrite_ktab(ctx, p, gco2tab(o));
152 continue;
191#if LJ_HASFFI 153#if LJ_HASFFI
192 } else if (tp != BCDUMP_KGC_CHILD) { 154 } else if (tp != BCDUMP_KGC_CHILD) {
193 cTValue *p = (TValue *)cdataptr(gco2cd(o)); 155 cTValue *q = (TValue *)cdataptr(gco2cd(o));
194 bcwrite_uleb128(ctx, p[0].u32.lo); 156 p = lj_buf_wuleb128(p, q[0].u32.lo);
195 bcwrite_uleb128(ctx, p[0].u32.hi); 157 p = lj_buf_wuleb128(p, q[0].u32.hi);
196 if (tp == BCDUMP_KGC_COMPLEX) { 158 if (tp == BCDUMP_KGC_COMPLEX) {
197 bcwrite_uleb128(ctx, p[1].u32.lo); 159 p = lj_buf_wuleb128(p, q[1].u32.lo);
198 bcwrite_uleb128(ctx, p[1].u32.hi); 160 p = lj_buf_wuleb128(p, q[1].u32.hi);
199 } 161 }
200#endif 162#endif
201 } 163 }
164 setsbufP(&ctx->sb, p);
202 } 165 }
203} 166}
204 167
@@ -207,7 +170,7 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt)
207{ 170{
208 MSize i, sizekn = pt->sizekn; 171 MSize i, sizekn = pt->sizekn;
209 cTValue *o = mref(pt->k, TValue); 172 cTValue *o = mref(pt->k, TValue);
210 bcwrite_need(ctx, 10*sizekn); 173 char *p = lj_buf_more(&ctx->sb, 10*sizekn);
211 for (i = 0; i < sizekn; i++, o++) { 174 for (i = 0; i < sizekn; i++, o++) {
212 int32_t k; 175 int32_t k;
213 if (tvisint(o)) { 176 if (tvisint(o)) {
@@ -220,58 +183,58 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt)
220 k = lj_num2int(num); 183 k = lj_num2int(num);
221 if (num == (lua_Number)k) { /* -0 is never a constant. */ 184 if (num == (lua_Number)k) { /* -0 is never a constant. */
222 save_int: 185 save_int:
223 bcwrite_uleb128(ctx, 2*(uint32_t)k | ((uint32_t)k & 0x80000000u)); 186 p = lj_buf_wuleb128(p, 2*(uint32_t)k | ((uint32_t)k & 0x80000000u));
224 if (k < 0) { 187 if (k < 0)
225 char *p = &ctx->sb.buf[ctx->sb.n-1]; 188 p[-1] = (p[-1] & 7) | ((k>>27) & 0x18);
226 *p = (*p & 7) | ((k>>27) & 0x18);
227 }
228 continue; 189 continue;
229 } 190 }
230 } 191 }
231 bcwrite_uleb128(ctx, 1+(2*o->u32.lo | (o->u32.lo & 0x80000000u))); 192 p = lj_buf_wuleb128(p, 1+(2*o->u32.lo | (o->u32.lo & 0x80000000u)));
232 if (o->u32.lo >= 0x80000000u) { 193 if (o->u32.lo >= 0x80000000u)
233 char *p = &ctx->sb.buf[ctx->sb.n-1]; 194 p[-1] = (p[-1] & 7) | ((o->u32.lo>>27) & 0x18);
234 *p = (*p & 7) | ((o->u32.lo>>27) & 0x18); 195 p = lj_buf_wuleb128(p, o->u32.hi);
235 }
236 bcwrite_uleb128(ctx, o->u32.hi);
237 } 196 }
238 } 197 }
198 setsbufP(&ctx->sb, p);
239} 199}
240 200
241/* Write bytecode instructions. */ 201/* Write bytecode instructions. */
242static void bcwrite_bytecode(BCWriteCtx *ctx, GCproto *pt) 202static char *bcwrite_bytecode(BCWriteCtx *ctx, char *p, GCproto *pt)
243{ 203{
244 MSize nbc = pt->sizebc-1; /* Omit the [JI]FUNC* header. */ 204 MSize nbc = pt->sizebc-1; /* Omit the [JI]FUNC* header. */
245#if LJ_HASJIT 205#if LJ_HASJIT
246 uint8_t *p = (uint8_t *)&ctx->sb.buf[ctx->sb.n]; 206 uint8_t *q = (uint8_t *)p;
247#endif 207#endif
248 bcwrite_block(ctx, proto_bc(pt)+1, nbc*(MSize)sizeof(BCIns)); 208 p = lj_buf_wmem(p, proto_bc(pt)+1, nbc*(MSize)sizeof(BCIns));
209 UNUSED(ctx);
249#if LJ_HASJIT 210#if LJ_HASJIT
250 /* Unpatch modified bytecode containing ILOOP/JLOOP etc. */ 211 /* Unpatch modified bytecode containing ILOOP/JLOOP etc. */
251 if ((pt->flags & PROTO_ILOOP) || pt->trace) { 212 if ((pt->flags & PROTO_ILOOP) || pt->trace) {
252 jit_State *J = L2J(ctx->L); 213 jit_State *J = L2J(sbufL(&ctx->sb));
253 MSize i; 214 MSize i;
254 for (i = 0; i < nbc; i++, p += sizeof(BCIns)) { 215 for (i = 0; i < nbc; i++, q += sizeof(BCIns)) {
255 BCOp op = (BCOp)p[LJ_ENDIAN_SELECT(0, 3)]; 216 BCOp op = (BCOp)q[LJ_ENDIAN_SELECT(0, 3)];
256 if (op == BC_IFORL || op == BC_IITERL || op == BC_ILOOP || 217 if (op == BC_IFORL || op == BC_IITERL || op == BC_ILOOP ||
257 op == BC_JFORI) { 218 op == BC_JFORI) {
258 p[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_IFORL+BC_FORL); 219 q[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_IFORL+BC_FORL);
259 } else if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) { 220 } else if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) {
260 BCReg rd = p[LJ_ENDIAN_SELECT(2, 1)] + (p[LJ_ENDIAN_SELECT(3, 0)] << 8); 221 BCReg rd = q[LJ_ENDIAN_SELECT(2, 1)] + (q[LJ_ENDIAN_SELECT(3, 0)] << 8);
261 BCIns ins = traceref(J, rd)->startins; 222 BCIns ins = traceref(J, rd)->startins;
262 p[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_JFORL+BC_FORL); 223 q[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_JFORL+BC_FORL);
263 p[LJ_ENDIAN_SELECT(2, 1)] = bc_c(ins); 224 q[LJ_ENDIAN_SELECT(2, 1)] = bc_c(ins);
264 p[LJ_ENDIAN_SELECT(3, 0)] = bc_b(ins); 225 q[LJ_ENDIAN_SELECT(3, 0)] = bc_b(ins);
265 } 226 }
266 } 227 }
267 } 228 }
268#endif 229#endif
230 return p;
269} 231}
270 232
271/* Write prototype. */ 233/* Write prototype. */
272static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt) 234static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
273{ 235{
274 MSize sizedbg = 0; 236 MSize sizedbg = 0;
237 char *p;
275 238
276 /* Recursively write children of prototype. */ 239 /* Recursively write children of prototype. */
277 if ((pt->flags & PROTO_CHILD)) { 240 if ((pt->flags & PROTO_CHILD)) {
@@ -285,31 +248,32 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
285 } 248 }
286 249
287 /* Start writing the prototype info to a buffer. */ 250 /* Start writing the prototype info to a buffer. */
288 lj_str_resetbuf(&ctx->sb); 251 p = lj_buf_need(&ctx->sb,
289 ctx->sb.n = 5; /* Leave room for final size. */ 252 5+4+6*5+(pt->sizebc-1)*(MSize)sizeof(BCIns)+pt->sizeuv*2);
290 bcwrite_need(ctx, 4+6*5+(pt->sizebc-1)*(MSize)sizeof(BCIns)+pt->sizeuv*2); 253 p += 5; /* Leave room for final size. */
291 254
292 /* Write prototype header. */ 255 /* Write prototype header. */
293 bcwrite_byte(ctx, (pt->flags & (PROTO_CHILD|PROTO_VARARG|PROTO_FFI))); 256 *p++ = (pt->flags & (PROTO_CHILD|PROTO_VARARG|PROTO_FFI));
294 bcwrite_byte(ctx, pt->numparams); 257 *p++ = pt->numparams;
295 bcwrite_byte(ctx, pt->framesize); 258 *p++ = pt->framesize;
296 bcwrite_byte(ctx, pt->sizeuv); 259 *p++ = pt->sizeuv;
297 bcwrite_uleb128(ctx, pt->sizekgc); 260 p = lj_buf_wuleb128(p, pt->sizekgc);
298 bcwrite_uleb128(ctx, pt->sizekn); 261 p = lj_buf_wuleb128(p, pt->sizekn);
299 bcwrite_uleb128(ctx, pt->sizebc-1); 262 p = lj_buf_wuleb128(p, pt->sizebc-1);
300 if (!ctx->strip) { 263 if (!ctx->strip) {
301 if (proto_lineinfo(pt)) 264 if (proto_lineinfo(pt))
302 sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt); 265 sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt);
303 bcwrite_uleb128(ctx, sizedbg); 266 p = lj_buf_wuleb128(p, sizedbg);
304 if (sizedbg) { 267 if (sizedbg) {
305 bcwrite_uleb128(ctx, pt->firstline); 268 p = lj_buf_wuleb128(p, pt->firstline);
306 bcwrite_uleb128(ctx, pt->numline); 269 p = lj_buf_wuleb128(p, pt->numline);
307 } 270 }
308 } 271 }
309 272
310 /* Write bytecode instructions and upvalue refs. */ 273 /* Write bytecode instructions and upvalue refs. */
311 bcwrite_bytecode(ctx, pt); 274 p = bcwrite_bytecode(ctx, p, pt);
312 bcwrite_block(ctx, proto_uv(pt), pt->sizeuv*2); 275 p = lj_buf_wmem(p, proto_uv(pt), pt->sizeuv*2);
276 setsbufP(&ctx->sb, p);
313 277
314 /* Write constants. */ 278 /* Write constants. */
315 bcwrite_kgc(ctx, pt); 279 bcwrite_kgc(ctx, pt);
@@ -317,18 +281,19 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
317 281
318 /* Write debug info, if not stripped. */ 282 /* Write debug info, if not stripped. */
319 if (sizedbg) { 283 if (sizedbg) {
320 bcwrite_need(ctx, sizedbg); 284 p = lj_buf_more(&ctx->sb, sizedbg);
321 bcwrite_block(ctx, proto_lineinfo(pt), sizedbg); 285 p = lj_buf_wmem(p, proto_lineinfo(pt), sizedbg);
286 setsbufP(&ctx->sb, p);
322 } 287 }
323 288
324 /* Pass buffer to writer function. */ 289 /* Pass buffer to writer function. */
325 if (ctx->status == 0) { 290 if (ctx->status == 0) {
326 MSize n = ctx->sb.n - 5; 291 MSize n = sbuflen(&ctx->sb) - 5;
327 MSize nn = (lj_fls(n)+8)*9 >> 6; 292 MSize nn = (lj_fls(n)+8)*9 >> 6;
328 ctx->sb.n = 5 - nn; 293 char *q = sbufB(&ctx->sb) + (5 - nn);
329 bcwrite_uleb128(ctx, n); /* Fill in final size. */ 294 p = lj_buf_wuleb128(q, n); /* Fill in final size. */
330 lua_assert(ctx->sb.n == 5); 295 lua_assert(p == sbufB(&ctx->sb) + 5);
331 ctx->status = ctx->wfunc(ctx->L, ctx->sb.buf+5-nn, nn+n, ctx->wdata); 296 ctx->status = ctx->wfunc(sbufL(&ctx->sb), q, nn+n, ctx->wdata);
332 } 297 }
333} 298}
334 299
@@ -338,20 +303,20 @@ static void bcwrite_header(BCWriteCtx *ctx)
338 GCstr *chunkname = proto_chunkname(ctx->pt); 303 GCstr *chunkname = proto_chunkname(ctx->pt);
339 const char *name = strdata(chunkname); 304 const char *name = strdata(chunkname);
340 MSize len = chunkname->len; 305 MSize len = chunkname->len;
341 lj_str_resetbuf(&ctx->sb); 306 char *p = lj_buf_need(&ctx->sb, 5+5+len);
342 bcwrite_need(ctx, 5+5+len); 307 *p++ = BCDUMP_HEAD1;
343 bcwrite_byte(ctx, BCDUMP_HEAD1); 308 *p++ = BCDUMP_HEAD2;
344 bcwrite_byte(ctx, BCDUMP_HEAD2); 309 *p++ = BCDUMP_HEAD3;
345 bcwrite_byte(ctx, BCDUMP_HEAD3); 310 *p++ = BCDUMP_VERSION;
346 bcwrite_byte(ctx, BCDUMP_VERSION); 311 *p++ = (ctx->strip ? BCDUMP_F_STRIP : 0) +
347 bcwrite_byte(ctx, (ctx->strip ? BCDUMP_F_STRIP : 0) + 312 (LJ_BE ? BCDUMP_F_BE : 0) +
348 (LJ_BE ? BCDUMP_F_BE : 0) + 313 ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0);
349 ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0));
350 if (!ctx->strip) { 314 if (!ctx->strip) {
351 bcwrite_uleb128(ctx, len); 315 p = lj_buf_wuleb128(p, len);
352 bcwrite_block(ctx, name, len); 316 p = lj_buf_wmem(p, name, len);
353 } 317 }
354 ctx->status = ctx->wfunc(ctx->L, ctx->sb.buf, ctx->sb.n, ctx->wdata); 318 ctx->status = ctx->wfunc(sbufL(&ctx->sb), sbufB(&ctx->sb),
319 (MSize)(p - sbufB(&ctx->sb)), ctx->wdata);
355} 320}
356 321
357/* Write footer of bytecode dump. */ 322/* Write footer of bytecode dump. */
@@ -359,7 +324,7 @@ static void bcwrite_footer(BCWriteCtx *ctx)
359{ 324{
360 if (ctx->status == 0) { 325 if (ctx->status == 0) {
361 uint8_t zero = 0; 326 uint8_t zero = 0;
362 ctx->status = ctx->wfunc(ctx->L, &zero, 1, ctx->wdata); 327 ctx->status = ctx->wfunc(sbufL(&ctx->sb), &zero, 1, ctx->wdata);
363 } 328 }
364} 329}
365 330
@@ -367,8 +332,8 @@ static void bcwrite_footer(BCWriteCtx *ctx)
367static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud) 332static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud)
368{ 333{
369 BCWriteCtx *ctx = (BCWriteCtx *)ud; 334 BCWriteCtx *ctx = (BCWriteCtx *)ud;
370 UNUSED(dummy); 335 UNUSED(L); UNUSED(dummy);
371 lj_str_resizebuf(L, &ctx->sb, 1024); /* Avoids resize for most prototypes. */ 336 lj_buf_need(&ctx->sb, 1024); /* Avoids resize for most prototypes. */
372 bcwrite_header(ctx); 337 bcwrite_header(ctx);
373 bcwrite_proto(ctx, ctx->pt); 338 bcwrite_proto(ctx, ctx->pt);
374 bcwrite_footer(ctx); 339 bcwrite_footer(ctx);
@@ -381,16 +346,15 @@ int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data,
381{ 346{
382 BCWriteCtx ctx; 347 BCWriteCtx ctx;
383 int status; 348 int status;
384 ctx.L = L;
385 ctx.pt = pt; 349 ctx.pt = pt;
386 ctx.wfunc = writer; 350 ctx.wfunc = writer;
387 ctx.wdata = data; 351 ctx.wdata = data;
388 ctx.strip = strip; 352 ctx.strip = strip;
389 ctx.status = 0; 353 ctx.status = 0;
390 lj_str_initbuf(&ctx.sb); 354 lj_buf_init(L, &ctx.sb);
391 status = lj_vm_cpcall(L, NULL, &ctx, cpwriter); 355 status = lj_vm_cpcall(L, NULL, &ctx, cpwriter);
392 if (status == 0) status = ctx.status; 356 if (status == 0) status = ctx.status;
393 lj_str_freebuf(G(ctx.L), &ctx.sb); 357 lj_buf_free(G(sbufL(&ctx.sb)), &ctx.sb);
394 return status; 358 return status;
395} 359}
396 360
diff --git a/src/lj_buf.c b/src/lj_buf.c
new file mode 100644
index 00000000..c08d23c9
--- /dev/null
+++ b/src/lj_buf.c
@@ -0,0 +1,87 @@
1/*
2** Buffer handling.
3** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#include <stdio.h>
7
8#define lj_buf_c
9#define LUA_CORE
10
11#include "lj_obj.h"
12#include "lj_gc.h"
13#include "lj_err.h"
14#include "lj_buf.h"
15
16LJ_NOINLINE void LJ_FASTCALL lj_buf_grow(SBuf *sb, char *en)
17{
18 lua_State *L = sbufL(sb);
19 char *b = sbufB(sb);
20 MSize sz = (MSize)(en - b);
21 MSize osz = (MSize)(sbufE(sb) - b), nsz = osz;
22 MSize n = (MSize)(sbufP(sb) - b);
23 if (LJ_UNLIKELY(sz > LJ_MAX_MEM))
24 lj_err_mem(L);
25 if (nsz < LJ_MIN_SBUF) nsz = LJ_MIN_SBUF;
26 while (nsz < sz) nsz += nsz;
27 b = (char *)lj_mem_realloc(L, b, osz, nsz);
28 setmref(sb->b, b);
29 setmref(sb->p, b + n);
30 setmref(sb->e, b + nsz);
31}
32
33char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz)
34{
35 SBuf *sb = &G(L)->tmpbuf;
36 setmref(sb->L, L);
37 return lj_buf_need(sb, sz);
38}
39
40void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb)
41{
42 char *b = sbufB(sb);
43 MSize osz = (MSize)(sbufE(sb) - b);
44 if (osz > 2*LJ_MIN_SBUF) {
45 MSize n = (MSize)(sbufP(sb) - b);
46 b = lj_mem_realloc(L, b, osz, (osz >> 1));
47 setmref(sb->b, b);
48 setmref(sb->p, b + n);
49 setmref(sb->e, b + (osz >> 1));
50 }
51}
52
53char *lj_buf_wmem(char *p, const void *q, MSize len)
54{
55 const char *s = (const char *)q, *e = s + len;
56 while (s < e) *p++ = *s++;
57 return p;
58}
59
60void lj_buf_putmem(SBuf *sb, const void *q, MSize len)
61{
62 char *p = lj_buf_more(sb, len);
63 p = lj_buf_wmem(p, q, len);
64 setsbufP(sb, p);
65}
66
67uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp)
68{
69 const uint8_t *p = (const uint8_t *)*pp;
70 uint32_t v = *p++;
71 if (LJ_UNLIKELY(v >= 0x80)) {
72 int sh = 0;
73 v &= 0x7f;
74 do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
75 }
76 *pp = (const char *)p;
77 return v;
78}
79
80char * LJ_FASTCALL lj_buf_wuleb128(char *p, uint32_t v)
81{
82 for (; v >= 0x80; v >>= 7)
83 *p++ = (char)((v & 0x7f) | 0x80);
84 *p++ = (char)v;
85 return p;
86}
87
diff --git a/src/lj_buf.h b/src/lj_buf.h
new file mode 100644
index 00000000..289eb01d
--- /dev/null
+++ b/src/lj_buf.h
@@ -0,0 +1,76 @@
1/*
2** Buffer handling.
3** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_BUF_H
7#define _LJ_BUF_H
8
9#include "lj_obj.h"
10#include "lj_gc.h"
11#include "lj_str.h"
12
13/* Resizable string buffers. Struct definition in lj_obj.h. */
14#define sbufB(sb) (mref((sb)->b, char))
15#define sbufP(sb) (mref((sb)->p, char))
16#define sbufE(sb) (mref((sb)->e, char))
17#define sbufL(sb) (mref((sb)->L, lua_State))
18#define sbufsz(sb) ((MSize)(sbufE((sb)) - sbufB((sb))))
19#define sbuflen(sb) ((MSize)(sbufP((sb)) - sbufB((sb))))
20#define setsbufP(sb, q) (setmref((sb)->p, (q)))
21#define setsbufL(sb, l) (setmref((sb)->L, (l)))
22
23LJ_FUNC char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz);
24LJ_FUNC void LJ_FASTCALL lj_buf_grow(SBuf *sb, char *en);
25LJ_FUNC void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb);
26
27LJ_FUNC char *lj_buf_wmem(char *p, const void *q, MSize len);
28LJ_FUNC void lj_buf_putmem(SBuf *sb, const void *q, MSize len);
29LJ_FUNC uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp);
30LJ_FUNC char * LJ_FASTCALL lj_buf_wuleb128(char *p, uint32_t v);
31
32static LJ_AINLINE void lj_buf_init(lua_State *L, SBuf *sb)
33{
34 setsbufL(sb, L);
35 setmref(sb->p, NULL); setmref(sb->e, NULL); setmref(sb->b, NULL);
36}
37
38static LJ_AINLINE void lj_buf_reset(SBuf *sb)
39{
40 setmrefr(sb->p, sb->b);
41}
42
43static LJ_AINLINE void lj_buf_free(global_State *g, SBuf *sb)
44{
45 lj_mem_free(g, sbufB(sb), sbufsz(sb));
46}
47
48static LJ_AINLINE GCstr *lj_buf_str(lua_State *L, SBuf *sb)
49{
50 return lj_str_new(L, sbufB(sb), sbuflen(sb));
51}
52
53static LJ_AINLINE char *lj_buf_need(SBuf *sb, MSize sz)
54{
55 char *en = sbufB(sb) + sz;
56 if (LJ_UNLIKELY(en > sbufE(sb)))
57 lj_buf_grow(sb, en);
58 return sbufB(sb);
59}
60
61static LJ_AINLINE char *lj_buf_more(SBuf *sb, MSize sz)
62{
63 char *en = sbufP(sb) + sz;
64 if (LJ_UNLIKELY(en > sbufE(sb)))
65 lj_buf_grow(sb, en);
66 return sbufP(sb);
67}
68
69static LJ_AINLINE void lj_buf_putb(SBuf *sb, int c)
70{
71 char *p = lj_buf_more(sb, 1);
72 *p++ = (char)c;
73 setsbufP(sb, p);
74}
75
76#endif
diff --git a/src/lj_carith.c b/src/lj_carith.c
index 18708d66..9f3208a8 100644
--- a/src/lj_carith.c
+++ b/src/lj_carith.c
@@ -11,10 +11,12 @@
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_tab.h" 12#include "lj_tab.h"
13#include "lj_meta.h" 13#include "lj_meta.h"
14#include "lj_ir.h"
14#include "lj_ctype.h" 15#include "lj_ctype.h"
15#include "lj_cconv.h" 16#include "lj_cconv.h"
16#include "lj_cdata.h" 17#include "lj_cdata.h"
17#include "lj_carith.h" 18#include "lj_carith.h"
19#include "lj_strscan.h"
18 20
19/* -- C data arithmetic --------------------------------------------------- */ 21/* -- C data arithmetic --------------------------------------------------- */
20 22
@@ -270,6 +272,80 @@ int lj_carith_op(lua_State *L, MMS mm)
270 return lj_carith_meta(L, cts, &ca, mm); 272 return lj_carith_meta(L, cts, &ca, mm);
271} 273}
272 274
275/* -- 64 bit bit operations helpers --------------------------------------- */
276
277#if LJ_64
278#define B64DEF(name) \
279 static LJ_AINLINE uint64_t lj_carith_##name(uint64_t x, int32_t sh)
280#else
281/* Not inlined on 32 bit archs, since some of these are quite lengthy. */
282#define B64DEF(name) \
283 uint64_t LJ_NOINLINE lj_carith_##name(uint64_t x, int32_t sh)
284#endif
285
286B64DEF(shl64) { return x << (sh&63); }
287B64DEF(shr64) { return x >> (sh&63); }
288B64DEF(sar64) { return (uint64_t)((int64_t)x >> (sh&63)); }
289B64DEF(rol64) { return lj_rol(x, (sh&63)); }
290B64DEF(ror64) { return lj_ror(x, (sh&63)); }
291
292#undef B64DEF
293
294uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op)
295{
296 switch (op) {
297 case IR_BSHL-IR_BSHL: x = lj_carith_shl64(x, sh); break;
298 case IR_BSHR-IR_BSHL: x = lj_carith_shr64(x, sh); break;
299 case IR_BSAR-IR_BSHL: x = lj_carith_sar64(x, sh); break;
300 case IR_BROL-IR_BSHL: x = lj_carith_rol64(x, sh); break;
301 case IR_BROR-IR_BSHL: x = lj_carith_ror64(x, sh); break;
302 default: lua_assert(0); break;
303 }
304 return x;
305}
306
307/* Equivalent to lj_lib_checkbit(), but handles cdata. */
308uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id)
309{
310 TValue *o = L->base + narg-1;
311 if (o >= L->top) {
312 err:
313 lj_err_argt(L, narg, LUA_TNUMBER);
314 } else if (LJ_LIKELY(tvisnumber(o))) {
315 /* Handled below. */
316 } else if (tviscdata(o)) {
317 CTState *cts = ctype_cts(L);
318 uint8_t *sp = (uint8_t *)cdataptr(cdataV(o));
319 CTypeID sid = cdataV(o)->ctypeid;
320 CType *s = ctype_get(cts, sid);
321 uint64_t x;
322 if (ctype_isref(s->info)) {
323 sp = *(void **)sp;
324 sid = ctype_cid(s->info);
325 }
326 s = ctype_raw(cts, sid);
327 if (ctype_isenum(s->info)) s = ctype_child(cts, s);
328 if ((s->info & (CTMASK_NUM|CTF_BOOL|CTF_FP|CTF_UNSIGNED)) ==
329 CTINFO(CT_NUM, CTF_UNSIGNED) && s->size == 8)
330 *id = CTID_UINT64; /* Use uint64_t, since it has the highest rank. */
331 else if (!*id)
332 *id = CTID_INT64; /* Use int64_t, unless already set. */
333 lj_cconv_ct_ct(cts, ctype_get(cts, *id), s,
334 (uint8_t *)&x, sp, CCF_ARG(narg));
335 return x;
336 } else if (!(tvisstr(o) && lj_strscan_number(strV(o), o))) {
337 goto err;
338 }
339 if (LJ_LIKELY(tvisint(o))) {
340 return intV(o);
341 } else {
342 int32_t i = lj_num2bit(numV(o));
343 if (LJ_DUALNUM) setintV(o, i);
344 return i;
345 }
346}
347
348
273/* -- 64 bit integer arithmetic helpers ----------------------------------- */ 349/* -- 64 bit integer arithmetic helpers ----------------------------------- */
274 350
275#if LJ_32 && LJ_HASJIT 351#if LJ_32 && LJ_HASJIT
diff --git a/src/lj_carith.h b/src/lj_carith.h
index ae17df00..b1a65d35 100644
--- a/src/lj_carith.h
+++ b/src/lj_carith.h
@@ -12,6 +12,16 @@
12 12
13LJ_FUNC int lj_carith_op(lua_State *L, MMS mm); 13LJ_FUNC int lj_carith_op(lua_State *L, MMS mm);
14 14
15#if LJ_32
16LJ_FUNC uint64_t lj_carith_shl64(uint64_t x, int32_t sh);
17LJ_FUNC uint64_t lj_carith_shr64(uint64_t x, int32_t sh);
18LJ_FUNC uint64_t lj_carith_sar64(uint64_t x, int32_t sh);
19LJ_FUNC uint64_t lj_carith_rol64(uint64_t x, int32_t sh);
20LJ_FUNC uint64_t lj_carith_ror64(uint64_t x, int32_t sh);
21#endif
22LJ_FUNC uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op);
23LJ_FUNC uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id);
24
15#if LJ_32 && LJ_HASJIT 25#if LJ_32 && LJ_HASJIT
16LJ_FUNC int64_t lj_carith_mul64(int64_t x, int64_t k); 26LJ_FUNC int64_t lj_carith_mul64(int64_t x, int64_t k);
17#endif 27#endif
diff --git a/src/lj_cparse.c b/src/lj_cparse.c
index 107c0381..b8c95bd3 100644
--- a/src/lj_cparse.c
+++ b/src/lj_cparse.c
@@ -9,6 +9,7 @@
9 9
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_buf.h"
12#include "lj_str.h" 13#include "lj_str.h"
13#include "lj_ctype.h" 14#include "lj_ctype.h"
14#include "lj_cparse.h" 15#include "lj_cparse.h"
@@ -85,24 +86,10 @@ static LJ_AINLINE CPChar cp_get(CPState *cp)
85 return cp_get_bs(cp); 86 return cp_get_bs(cp);
86} 87}
87 88
88/* Grow save buffer. */
89static LJ_NOINLINE void cp_save_grow(CPState *cp, CPChar c)
90{
91 MSize newsize;
92 if (cp->sb.sz >= CPARSE_MAX_BUF/2)
93 cp_err(cp, LJ_ERR_XELEM);
94 newsize = cp->sb.sz * 2;
95 lj_str_resizebuf(cp->L, &cp->sb, newsize);
96 cp->sb.buf[cp->sb.n++] = (char)c;
97}
98
99/* Save character in buffer. */ 89/* Save character in buffer. */
100static LJ_AINLINE void cp_save(CPState *cp, CPChar c) 90static LJ_AINLINE void cp_save(CPState *cp, CPChar c)
101{ 91{
102 if (LJ_UNLIKELY(cp->sb.n + 1 > cp->sb.sz)) 92 lj_buf_putb(&cp->sb, c);
103 cp_save_grow(cp, c);
104 else
105 cp->sb.buf[cp->sb.n++] = (char)c;
106} 93}
107 94
108/* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */ 95/* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */
@@ -122,9 +109,9 @@ LJ_NORET static void cp_errmsg(CPState *cp, CPToken tok, ErrMsg em, ...)
122 tokstr = NULL; 109 tokstr = NULL;
123 } else if (tok == CTOK_IDENT || tok == CTOK_INTEGER || tok == CTOK_STRING || 110 } else if (tok == CTOK_IDENT || tok == CTOK_INTEGER || tok == CTOK_STRING ||
124 tok >= CTOK_FIRSTDECL) { 111 tok >= CTOK_FIRSTDECL) {
125 if (cp->sb.n == 0) cp_save(cp, '$'); 112 if (sbufP(&cp->sb) == sbufB(&cp->sb)) cp_save(cp, '$');
126 cp_save(cp, '\0'); 113 cp_save(cp, '\0');
127 tokstr = cp->sb.buf; 114 tokstr = sbufB(&cp->sb);
128 } else { 115 } else {
129 tokstr = cp_tok2str(cp, tok); 116 tokstr = cp_tok2str(cp, tok);
130 } 117 }
@@ -164,7 +151,7 @@ static CPToken cp_number(CPState *cp)
164 TValue o; 151 TValue o;
165 do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp))); 152 do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp)));
166 cp_save(cp, '\0'); 153 cp_save(cp, '\0');
167 fmt = lj_strscan_scan((const uint8_t *)cp->sb.buf, &o, STRSCAN_OPT_C); 154 fmt = lj_strscan_scan((const uint8_t *)sbufB(&cp->sb), &o, STRSCAN_OPT_C);
168 if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32; 155 if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32;
169 else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32; 156 else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32;
170 else if (!(cp->mode & CPARSE_MODE_SKIP)) 157 else if (!(cp->mode & CPARSE_MODE_SKIP))
@@ -177,7 +164,7 @@ static CPToken cp_number(CPState *cp)
177static CPToken cp_ident(CPState *cp) 164static CPToken cp_ident(CPState *cp)
178{ 165{
179 do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp))); 166 do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp)));
180 cp->str = lj_str_new(cp->L, cp->sb.buf, cp->sb.n); 167 cp->str = lj_buf_str(cp->L, &cp->sb);
181 cp->val.id = lj_ctype_getname(cp->cts, &cp->ct, cp->str, cp->tmask); 168 cp->val.id = lj_ctype_getname(cp->cts, &cp->ct, cp->str, cp->tmask);
182 if (ctype_type(cp->ct->info) == CT_KW) 169 if (ctype_type(cp->ct->info) == CT_KW)
183 return ctype_cid(cp->ct->info); 170 return ctype_cid(cp->ct->info);
@@ -263,11 +250,11 @@ static CPToken cp_string(CPState *cp)
263 } 250 }
264 cp_get(cp); 251 cp_get(cp);
265 if (delim == '"') { 252 if (delim == '"') {
266 cp->str = lj_str_new(cp->L, cp->sb.buf, cp->sb.n); 253 cp->str = lj_buf_str(cp->L, &cp->sb);
267 return CTOK_STRING; 254 return CTOK_STRING;
268 } else { 255 } else {
269 if (cp->sb.n != 1) cp_err_token(cp, '\''); 256 if (sbuflen(&cp->sb) != 1) cp_err_token(cp, '\'');
270 cp->val.i32 = (int32_t)(char)cp->sb.buf[0]; 257 cp->val.i32 = (int32_t)(char)*sbufB(&cp->sb);
271 cp->val.id = CTID_INT32; 258 cp->val.id = CTID_INT32;
272 return CTOK_INTEGER; 259 return CTOK_INTEGER;
273 } 260 }
@@ -296,7 +283,7 @@ static void cp_comment_cpp(CPState *cp)
296/* Lexical scanner for C. Only a minimal subset is implemented. */ 283/* Lexical scanner for C. Only a minimal subset is implemented. */
297static CPToken cp_next_(CPState *cp) 284static CPToken cp_next_(CPState *cp)
298{ 285{
299 lj_str_resetbuf(&cp->sb); 286 lj_buf_reset(&cp->sb);
300 for (;;) { 287 for (;;) {
301 if (lj_char_isident(cp->c)) 288 if (lj_char_isident(cp->c))
302 return lj_char_isdigit(cp->c) ? cp_number(cp) : cp_ident(cp); 289 return lj_char_isdigit(cp->c) ? cp_number(cp) : cp_ident(cp);
@@ -380,8 +367,7 @@ static void cp_init(CPState *cp)
380 cp->depth = 0; 367 cp->depth = 0;
381 cp->curpack = 0; 368 cp->curpack = 0;
382 cp->packstack[0] = 255; 369 cp->packstack[0] = 255;
383 lj_str_initbuf(&cp->sb); 370 lj_buf_init(cp->L, &cp->sb);
384 lj_str_resizebuf(cp->L, &cp->sb, LJ_MIN_SBUF);
385 lua_assert(cp->p != NULL); 371 lua_assert(cp->p != NULL);
386 cp_get(cp); /* Read-ahead first char. */ 372 cp_get(cp); /* Read-ahead first char. */
387 cp->tok = 0; 373 cp->tok = 0;
@@ -393,7 +379,7 @@ static void cp_init(CPState *cp)
393static void cp_cleanup(CPState *cp) 379static void cp_cleanup(CPState *cp)
394{ 380{
395 global_State *g = G(cp->L); 381 global_State *g = G(cp->L);
396 lj_str_freebuf(g, &cp->sb); 382 lj_buf_free(g, &cp->sb);
397} 383}
398 384
399/* Check and consume optional token. */ 385/* Check and consume optional token. */
diff --git a/src/lj_crecord.c b/src/lj_crecord.c
index b60eb7b3..2bf0bc1d 100644
--- a/src/lj_crecord.c
+++ b/src/lj_crecord.c
@@ -446,7 +446,7 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
446 /* fallthrough */ 446 /* fallthrough */
447 case CCX(I, F): 447 case CCX(I, F):
448 if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi; 448 if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi;
449 sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_TRUNC|IRCONV_ANY); 449 sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_ANY);
450 goto xstore; 450 goto xstore;
451 case CCX(I, P): 451 case CCX(I, P):
452 case CCX(I, A): 452 case CCX(I, A):
@@ -522,7 +522,7 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
522 if (st == IRT_CDATA) goto err_nyi; 522 if (st == IRT_CDATA) goto err_nyi;
523 /* The signed conversion is cheaper. x64 really has 47 bit pointers. */ 523 /* The signed conversion is cheaper. x64 really has 47 bit pointers. */
524 sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32, 524 sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32,
525 st, IRCONV_TRUNC|IRCONV_ANY); 525 st, IRCONV_ANY);
526 goto xstore; 526 goto xstore;
527 527
528 /* Destination is an array. */ 528 /* Destination is an array. */
@@ -1229,7 +1229,7 @@ static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm)
1229 for (i = 0; i < 2; i++) { 1229 for (i = 0; i < 2; i++) {
1230 IRType st = tref_type(sp[i]); 1230 IRType st = tref_type(sp[i]);
1231 if (st == IRT_NUM || st == IRT_FLOAT) 1231 if (st == IRT_NUM || st == IRT_FLOAT)
1232 sp[i] = emitconv(sp[i], dt, st, IRCONV_TRUNC|IRCONV_ANY); 1232 sp[i] = emitconv(sp[i], dt, st, IRCONV_ANY);
1233 else if (!(st == IRT_I64 || st == IRT_U64)) 1233 else if (!(st == IRT_I64 || st == IRT_U64))
1234 sp[i] = emitconv(sp[i], dt, IRT_INT, 1234 sp[i] = emitconv(sp[i], dt, IRT_INT,
1235 (s[i]->info & CTF_UNSIGNED) ? 0 : IRCONV_SEXT); 1235 (s[i]->info & CTF_UNSIGNED) ? 0 : IRCONV_SEXT);
@@ -1297,15 +1297,14 @@ static TRef crec_arith_ptr(jit_State *J, TRef *sp, CType **s, MMS mm)
1297 CTypeID id; 1297 CTypeID id;
1298#if LJ_64 1298#if LJ_64
1299 if (t == IRT_NUM || t == IRT_FLOAT) 1299 if (t == IRT_NUM || t == IRT_FLOAT)
1300 tr = emitconv(tr, IRT_INTP, t, IRCONV_TRUNC|IRCONV_ANY); 1300 tr = emitconv(tr, IRT_INTP, t, IRCONV_ANY);
1301 else if (!(t == IRT_I64 || t == IRT_U64)) 1301 else if (!(t == IRT_I64 || t == IRT_U64))
1302 tr = emitconv(tr, IRT_INTP, IRT_INT, 1302 tr = emitconv(tr, IRT_INTP, IRT_INT,
1303 ((t - IRT_I8) & 1) ? 0 : IRCONV_SEXT); 1303 ((t - IRT_I8) & 1) ? 0 : IRCONV_SEXT);
1304#else 1304#else
1305 if (!tref_typerange(sp[1], IRT_I8, IRT_U32)) { 1305 if (!tref_typerange(sp[1], IRT_I8, IRT_U32)) {
1306 tr = emitconv(tr, IRT_INTP, t, 1306 tr = emitconv(tr, IRT_INTP, t,
1307 (t == IRT_NUM || t == IRT_FLOAT) ? 1307 (t == IRT_NUM || t == IRT_FLOAT) ? IRCONV_ANY : 0);
1308 IRCONV_TRUNC|IRCONV_ANY : 0);
1309 } 1308 }
1310#endif 1309#endif
1311 tr = emitir(IRT(IR_MUL, IRT_INTP), tr, lj_ir_kintp(J, sz)); 1310 tr = emitir(IRT(IR_MUL, IRT_INTP), tr, lj_ir_kintp(J, sz));
@@ -1627,6 +1626,101 @@ void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd)
1627 crec_finalizer(J, J->base[0], &rd->argv[1]); 1626 crec_finalizer(J, J->base[0], &rd->argv[1]);
1628} 1627}
1629 1628
1629/* -- 64 bit bit.* library functions -------------------------------------- */
1630
1631/* Determine bit operation type from argument type. */
1632static CTypeID crec_bit64_type(CTState *cts, cTValue *tv)
1633{
1634 if (tviscdata(tv)) {
1635 CType *ct = lj_ctype_rawref(cts, cdataV(tv)->ctypeid);
1636 if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct);
1637 if ((ct->info & (CTMASK_NUM|CTF_BOOL|CTF_FP|CTF_UNSIGNED)) ==
1638 CTINFO(CT_NUM, CTF_UNSIGNED) && ct->size == 8)
1639 return CTID_UINT64; /* Use uint64_t, since it has the highest rank. */
1640 return CTID_INT64; /* Otherwise use int64_t. */
1641 }
1642 return 0; /* Use regular 32 bit ops. */
1643}
1644
1645void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd)
1646{
1647 CTState *cts = ctype_ctsG(J2G(J));
1648 TRef tr = crec_ct_tv(J, ctype_get(cts, CTID_INT64), 0,
1649 J->base[0], &rd->argv[0]);
1650 if (!tref_isinteger(tr))
1651 tr = emitconv(tr, IRT_INT, tref_type(tr), 0);
1652 J->base[0] = tr;
1653}
1654
1655int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd)
1656{
1657 CTState *cts = ctype_ctsG(J2G(J));
1658 CTypeID id = crec_bit64_type(cts, &rd->argv[0]);
1659 if (id) {
1660 TRef tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]);
1661 tr = emitir(IRT(rd->data, id-CTID_INT64+IRT_I64), tr, 0);
1662 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
1663 return 1;
1664 }
1665 return 0;
1666}
1667
1668int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd)
1669{
1670 CTState *cts = ctype_ctsG(J2G(J));
1671 CTypeID id = 0;
1672 MSize i;
1673 for (i = 0; J->base[i] != 0; i++) {
1674 CTypeID aid = crec_bit64_type(cts, &rd->argv[i]);
1675 if (id < aid) id = aid; /* Determine highest type rank of all arguments. */
1676 }
1677 if (id) {
1678 CType *ct = ctype_get(cts, id);
1679 uint32_t ot = IRT(rd->data, id-CTID_INT64+IRT_I64);
1680 TRef tr = crec_ct_tv(J, ct, 0, J->base[0], &rd->argv[0]);
1681 for (i = 1; J->base[i] != 0; i++) {
1682 TRef tr2 = crec_ct_tv(J, ct, 0, J->base[i], &rd->argv[i]);
1683 tr = emitir(ot, tr, tr2);
1684 }
1685 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
1686 return 1;
1687 }
1688 return 0;
1689}
1690
1691int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd)
1692{
1693 CTState *cts = ctype_ctsG(J2G(J));
1694 CTypeID id;
1695 TRef tsh = 0;
1696 if (J->base[0] && tref_iscdata(J->base[1])) {
1697 tsh = crec_ct_tv(J, ctype_get(cts, CTID_INT64), 0,
1698 J->base[1], &rd->argv[1]);
1699 if (!tref_isinteger(tsh))
1700 tsh = emitconv(tsh, IRT_INT, tref_type(tsh), 0);
1701 J->base[1] = tsh;
1702 }
1703 id = crec_bit64_type(cts, &rd->argv[0]);
1704 if (id) {
1705 TRef tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]);
1706 uint32_t op = rd->data;
1707 if (!tsh) tsh = lj_opt_narrow_tobit(J, J->base[1]);
1708 if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
1709 !tref_isk(tsh))
1710 tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 63));
1711#ifdef LJ_TARGET_UNIFYROT
1712 if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) {
1713 op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR;
1714 tsh = emitir(IRTI(IR_NEG), tsh, tsh);
1715 }
1716#endif
1717 tr = emitir(IRT(op, id-CTID_INT64+IRT_I64), tr, tsh);
1718 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
1719 return 1;
1720 }
1721 return 0;
1722}
1723
1630/* -- Miscellaneous library functions ------------------------------------- */ 1724/* -- Miscellaneous library functions ------------------------------------- */
1631 1725
1632void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd) 1726void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd)
diff --git a/src/lj_crecord.h b/src/lj_crecord.h
index dea05f78..92d777b8 100644
--- a/src/lj_crecord.h
+++ b/src/lj_crecord.h
@@ -25,6 +25,12 @@ LJ_FUNC void LJ_FASTCALL recff_ffi_istype(jit_State *J, RecordFFData *rd);
25LJ_FUNC void LJ_FASTCALL recff_ffi_abi(jit_State *J, RecordFFData *rd); 25LJ_FUNC void LJ_FASTCALL recff_ffi_abi(jit_State *J, RecordFFData *rd);
26LJ_FUNC void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd); 26LJ_FUNC void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd);
27LJ_FUNC void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd); 27LJ_FUNC void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd);
28
29LJ_FUNC void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd);
30LJ_FUNC int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd);
31LJ_FUNC int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd);
32LJ_FUNC int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd);
33
28LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd); 34LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd);
29#endif 35#endif
30 36
diff --git a/src/lj_ctype.c b/src/lj_ctype.c
index 57a0d7cc..69ba76d1 100644
--- a/src/lj_ctype.c
+++ b/src/lj_ctype.c
@@ -570,7 +570,7 @@ GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size)
570{ 570{
571 char buf[2*LJ_STR_NUMBUF+2+1]; 571 char buf[2*LJ_STR_NUMBUF+2+1];
572 TValue re, im; 572 TValue re, im;
573 size_t len; 573 MSize len;
574 if (size == 2*sizeof(double)) { 574 if (size == 2*sizeof(double)) {
575 re.n = *(double *)sp; im.n = ((double *)sp)[1]; 575 re.n = *(double *)sp; im.n = ((double *)sp)[1];
576 } else { 576 } else {
diff --git a/src/lj_debug.c b/src/lj_debug.c
index be7fb2b1..3f502864 100644
--- a/src/lj_debug.c
+++ b/src/lj_debug.c
@@ -9,6 +9,7 @@
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_err.h" 10#include "lj_err.h"
11#include "lj_debug.h" 11#include "lj_debug.h"
12#include "lj_buf.h"
12#include "lj_str.h" 13#include "lj_str.h"
13#include "lj_tab.h" 14#include "lj_tab.h"
14#include "lj_state.h" 15#include "lj_state.h"
@@ -133,20 +134,6 @@ static BCLine debug_frameline(lua_State *L, GCfunc *fn, cTValue *nextframe)
133 134
134/* -- Variable names ------------------------------------------------------ */ 135/* -- Variable names ------------------------------------------------------ */
135 136
136/* Read ULEB128 value. */
137static uint32_t debug_read_uleb128(const uint8_t **pp)
138{
139 const uint8_t *p = *pp;
140 uint32_t v = *p++;
141 if (LJ_UNLIKELY(v >= 0x80)) {
142 int sh = 0;
143 v &= 0x7f;
144 do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
145 }
146 *pp = p;
147 return v;
148}
149
150/* Get name of a local variable from slot number and PC. */ 137/* Get name of a local variable from slot number and PC. */
151static const char *debug_varname(const GCproto *pt, BCPos pc, BCReg slot) 138static const char *debug_varname(const GCproto *pt, BCPos pc, BCReg slot)
152{ 139{
@@ -162,9 +149,9 @@ static const char *debug_varname(const GCproto *pt, BCPos pc, BCReg slot)
162 } else { 149 } else {
163 while (*p++) ; /* Skip over variable name string. */ 150 while (*p++) ; /* Skip over variable name string. */
164 } 151 }
165 lastpc = startpc = lastpc + debug_read_uleb128(&p); 152 lastpc = startpc = lastpc + lj_buf_ruleb128((const char **)&p);
166 if (startpc > pc) break; 153 if (startpc > pc) break;
167 endpc = startpc + debug_read_uleb128(&p); 154 endpc = startpc + lj_buf_ruleb128((const char **)&p);
168 if (pc < endpc && slot-- == 0) { 155 if (pc < endpc && slot-- == 0) {
169 if (vn < VARNAME__MAX) { 156 if (vn < VARNAME__MAX) {
170#define VARNAMESTR(name, str) str "\0" 157#define VARNAMESTR(name, str) str "\0"
@@ -321,7 +308,7 @@ const char *lj_debug_funcname(lua_State *L, TValue *frame, const char **name)
321/* -- Source code locations ----------------------------------------------- */ 308/* -- Source code locations ----------------------------------------------- */
322 309
323/* Generate shortened source name. */ 310/* Generate shortened source name. */
324void lj_debug_shortname(char *out, GCstr *str) 311void lj_debug_shortname(char *out, GCstr *str, BCLine line)
325{ 312{
326 const char *src = strdata(str); 313 const char *src = strdata(str);
327 if (*src == '=') { 314 if (*src == '=') {
@@ -335,11 +322,11 @@ void lj_debug_shortname(char *out, GCstr *str)
335 *out++ = '.'; *out++ = '.'; *out++ = '.'; 322 *out++ = '.'; *out++ = '.'; *out++ = '.';
336 } 323 }
337 strcpy(out, src); 324 strcpy(out, src);
338 } else { /* Output [string "string"]. */ 325 } else { /* Output [string "string"] or [builtin:name]. */
339 size_t len; /* Length, up to first control char. */ 326 size_t len; /* Length, up to first control char. */
340 for (len = 0; len < LUA_IDSIZE-12; len++) 327 for (len = 0; len < LUA_IDSIZE-12; len++)
341 if (((const unsigned char *)src)[len] < ' ') break; 328 if (((const unsigned char *)src)[len] < ' ') break;
342 strcpy(out, "[string \""); out += 9; 329 strcpy(out, line == ~(BCLine)0 ? "[builtin:" : "[string \""); out += 9;
343 if (src[len] != '\0') { /* Must truncate? */ 330 if (src[len] != '\0') { /* Must truncate? */
344 if (len > LUA_IDSIZE-15) len = LUA_IDSIZE-15; 331 if (len > LUA_IDSIZE-15) len = LUA_IDSIZE-15;
345 strncpy(out, src, len); out += len; 332 strncpy(out, src, len); out += len;
@@ -347,7 +334,7 @@ void lj_debug_shortname(char *out, GCstr *str)
347 } else { 334 } else {
348 strcpy(out, src); out += len; 335 strcpy(out, src); out += len;
349 } 336 }
350 strcpy(out, "\"]"); 337 strcpy(out, line == ~(BCLine)0 ? "]" : "\"]");
351 } 338 }
352} 339}
353 340
@@ -360,8 +347,9 @@ void lj_debug_addloc(lua_State *L, const char *msg,
360 if (isluafunc(fn)) { 347 if (isluafunc(fn)) {
361 BCLine line = debug_frameline(L, fn, nextframe); 348 BCLine line = debug_frameline(L, fn, nextframe);
362 if (line >= 0) { 349 if (line >= 0) {
350 GCproto *pt = funcproto(fn);
363 char buf[LUA_IDSIZE]; 351 char buf[LUA_IDSIZE];
364 lj_debug_shortname(buf, proto_chunkname(funcproto(fn))); 352 lj_debug_shortname(buf, proto_chunkname(pt), pt->firstline);
365 lj_str_pushf(L, "%s:%d: %s", buf, line, msg); 353 lj_str_pushf(L, "%s:%d: %s", buf, line, msg);
366 return; 354 return;
367 } 355 }
@@ -377,7 +365,9 @@ void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc)
377 const char *s = strdata(name); 365 const char *s = strdata(name);
378 MSize i, len = name->len; 366 MSize i, len = name->len;
379 BCLine line = lj_debug_line(pt, pc); 367 BCLine line = lj_debug_line(pt, pc);
380 if (*s == '@') { 368 if (pt->firstline == ~(BCLine)0) {
369 lj_str_pushf(L, "builtin:%s", s);
370 } else if (*s == '@') {
381 s++; len--; 371 s++; len--;
382 for (i = len; i > 0; i--) 372 for (i = len; i > 0; i--)
383 if (s[i] == '/' || s[i] == '\\') { 373 if (s[i] == '/' || s[i] == '\\') {
@@ -453,7 +443,7 @@ int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, int ext)
453 BCLine firstline = pt->firstline; 443 BCLine firstline = pt->firstline;
454 GCstr *name = proto_chunkname(pt); 444 GCstr *name = proto_chunkname(pt);
455 ar->source = strdata(name); 445 ar->source = strdata(name);
456 lj_debug_shortname(ar->short_src, name); 446 lj_debug_shortname(ar->short_src, name, pt->firstline);
457 ar->linedefined = (int)firstline; 447 ar->linedefined = (int)firstline;
458 ar->lastlinedefined = (int)(firstline + pt->numline); 448 ar->lastlinedefined = (int)(firstline + pt->numline);
459 ar->what = firstline ? "Lua" : "main"; 449 ar->what = firstline ? "Lua" : "main";
diff --git a/src/lj_debug.h b/src/lj_debug.h
index 7cf57de7..4144b47e 100644
--- a/src/lj_debug.h
+++ b/src/lj_debug.h
@@ -34,7 +34,7 @@ LJ_FUNC const char *lj_debug_slotname(GCproto *pt, const BCIns *pc,
34 BCReg slot, const char **name); 34 BCReg slot, const char **name);
35LJ_FUNC const char *lj_debug_funcname(lua_State *L, TValue *frame, 35LJ_FUNC const char *lj_debug_funcname(lua_State *L, TValue *frame,
36 const char **name); 36 const char **name);
37LJ_FUNC void lj_debug_shortname(char *out, GCstr *str); 37LJ_FUNC void lj_debug_shortname(char *out, GCstr *str, BCLine line);
38LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg, 38LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg,
39 cTValue *frame, cTValue *nextframe); 39 cTValue *frame, cTValue *nextframe);
40LJ_FUNC void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc); 40LJ_FUNC void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc);
diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h
index a56b6260..a03804af 100644
--- a/src/lj_dispatch.h
+++ b/src/lj_dispatch.h
@@ -33,11 +33,11 @@
33 _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \ 33 _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \
34 _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \ 34 _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \
35 _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \ 35 _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \
36 _(lj_meta_for) _(lj_meta_len) _(lj_meta_tget) _(lj_meta_tset) \ 36 _(lj_meta_for) _(lj_meta_istype) _(lj_meta_len) _(lj_meta_tget) \
37 _(lj_state_growstack) _(lj_str_fromnum) _(lj_str_fromnumber) _(lj_str_new) \ 37 _(lj_meta_tset) _(lj_state_growstack) _(lj_str_fromnum) _(lj_str_fromnumber) \
38 _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) _(lj_tab_new) \ 38 _(lj_str_new) _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) \
39 _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \ 39 _(lj_tab_new) _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \
40 JITGOTDEF(_) FFIGOTDEF(_) 40 _(lj_tab_setinth) JITGOTDEF(_) FFIGOTDEF(_)
41 41
42enum { 42enum {
43#define GOTENUM(name) LJ_GOT_##name, 43#define GOTENUM(name) LJ_GOT_##name,
@@ -60,7 +60,7 @@ typedef uint16_t HotCount;
60#define HOTCOUNT_CALL 1 60#define HOTCOUNT_CALL 1
61 61
62/* This solves a circular dependency problem -- bump as needed. Sigh. */ 62/* This solves a circular dependency problem -- bump as needed. Sigh. */
63#define GG_NUM_ASMFF 62 63#define GG_NUM_ASMFF 59
64 64
65#define GG_LEN_DDISP (BC__MAX + GG_NUM_ASMFF) 65#define GG_LEN_DDISP (BC__MAX + GG_NUM_ASMFF)
66#define GG_LEN_SDISP BC_FUNCF 66#define GG_LEN_SDISP BC_FUNCF
diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h
index bd184a30..2454c899 100644
--- a/src/lj_emit_x86.h
+++ b/src/lj_emit_x86.h
@@ -241,10 +241,6 @@ static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i)
241 241
242/* -- Emit loads/stores --------------------------------------------------- */ 242/* -- Emit loads/stores --------------------------------------------------- */
243 243
244/* Instruction selection for XMM moves. */
245#define XMM_MOVRR(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVSD : XO_MOVAPS)
246#define XMM_MOVRM(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVLPD : XO_MOVSD)
247
248/* mov [base+ofs], i */ 244/* mov [base+ofs], i */
249static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) 245static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
250{ 246{
@@ -314,7 +310,7 @@ static void emit_loadn(ASMState *as, Reg r, cTValue *tv)
314 if (tvispzero(tv)) /* Use xor only for +0. */ 310 if (tvispzero(tv)) /* Use xor only for +0. */
315 emit_rr(as, XO_XORPS, r, r); 311 emit_rr(as, XO_XORPS, r, r);
316 else 312 else
317 emit_rma(as, XMM_MOVRM(as), r, &tv->n); 313 emit_rma(as, XO_MOVSD, r, &tv->n);
318} 314}
319 315
320/* -- Emit control-flow instructions -------------------------------------- */ 316/* -- Emit control-flow instructions -------------------------------------- */
@@ -427,7 +423,7 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
427 if (dst < RID_MAX_GPR) 423 if (dst < RID_MAX_GPR)
428 emit_rr(as, XO_MOV, REX_64IR(ir, dst), src); 424 emit_rr(as, XO_MOV, REX_64IR(ir, dst), src);
429 else 425 else
430 emit_rr(as, XMM_MOVRR(as), dst, src); 426 emit_rr(as, XO_MOVAPS, dst, src);
431} 427}
432 428
433/* Generic load of register from stack slot. */ 429/* Generic load of register from stack slot. */
@@ -436,7 +432,7 @@ static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
436 if (r < RID_MAX_GPR) 432 if (r < RID_MAX_GPR)
437 emit_rmro(as, XO_MOV, REX_64IR(ir, r), RID_ESP, ofs); 433 emit_rmro(as, XO_MOV, REX_64IR(ir, r), RID_ESP, ofs);
438 else 434 else
439 emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, r, RID_ESP, ofs); 435 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, r, RID_ESP, ofs);
440} 436}
441 437
442/* Generic store of register to stack slot. */ 438/* Generic store of register to stack slot. */
diff --git a/src/lj_err.c b/src/lj_err.c
index 4a33a233..e0fb7167 100644
--- a/src/lj_err.c
+++ b/src/lj_err.c
@@ -587,7 +587,7 @@ LJ_NOINLINE void lj_err_lex(lua_State *L, GCstr *src, const char *tok,
587{ 587{
588 char buff[LUA_IDSIZE]; 588 char buff[LUA_IDSIZE];
589 const char *msg; 589 const char *msg;
590 lj_debug_shortname(buff, src); 590 lj_debug_shortname(buff, src, line);
591 msg = lj_str_pushvf(L, err2msg(em), argp); 591 msg = lj_str_pushvf(L, err2msg(em), argp);
592 msg = lj_str_pushf(L, "%s:%d: %s", buff, line, msg); 592 msg = lj_str_pushf(L, "%s:%d: %s", buff, line, msg);
593 if (tok) 593 if (tok)
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
index 4aa4f064..4f6aeb37 100644
--- a/src/lj_ffrecord.c
+++ b/src/lj_ffrecord.c
@@ -528,14 +528,6 @@ static void LJ_FASTCALL recff_math_modf(jit_State *J, RecordFFData *rd)
528 rd->nres = 2; 528 rd->nres = 2;
529} 529}
530 530
531static void LJ_FASTCALL recff_math_degrad(jit_State *J, RecordFFData *rd)
532{
533 TRef tr = lj_ir_tonum(J, J->base[0]);
534 TRef trm = lj_ir_knum(J, numV(&J->fn->c.upvalue[0]));
535 J->base[0] = emitir(IRTN(IR_MUL), tr, trm);
536 UNUSED(rd);
537}
538
539static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd) 531static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd)
540{ 532{
541 TRef tr = lj_ir_tonum(J, J->base[0]); 533 TRef tr = lj_ir_tonum(J, J->base[0]);
@@ -592,40 +584,66 @@ static void LJ_FASTCALL recff_math_random(jit_State *J, RecordFFData *rd)
592 584
593/* -- Bit library fast functions ------------------------------------------ */ 585/* -- Bit library fast functions ------------------------------------------ */
594 586
595/* Record unary bit.tobit, bit.bnot, bit.bswap. */ 587/* Record bit.tobit. */
588static void LJ_FASTCALL recff_bit_tobit(jit_State *J, RecordFFData *rd)
589{
590 TRef tr = J->base[0];
591#if LJ_HASFFI
592 if (tref_iscdata(tr)) { recff_bit64_tobit(J, rd); return; }
593#endif
594 J->base[0] = lj_opt_narrow_tobit(J, tr);
595 UNUSED(rd);
596}
597
598/* Record unary bit.bnot, bit.bswap. */
596static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd) 599static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd)
597{ 600{
598 TRef tr = lj_opt_narrow_tobit(J, J->base[0]); 601#if LJ_HASFFI
599 J->base[0] = (rd->data == IR_TOBIT) ? tr : emitir(IRTI(rd->data), tr, 0); 602 if (recff_bit64_unary(J, rd))
603 return;
604#endif
605 J->base[0] = emitir(IRTI(rd->data), lj_opt_narrow_tobit(J, J->base[0]), 0);
600} 606}
601 607
602/* Record N-ary bit.band, bit.bor, bit.bxor. */ 608/* Record N-ary bit.band, bit.bor, bit.bxor. */
603static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd) 609static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd)
604{ 610{
605 TRef tr = lj_opt_narrow_tobit(J, J->base[0]); 611#if LJ_HASFFI
606 uint32_t op = rd->data; 612 if (recff_bit64_nary(J, rd))
607 BCReg i; 613 return;
608 for (i = 1; J->base[i] != 0; i++) 614#endif
609 tr = emitir(IRTI(op), tr, lj_opt_narrow_tobit(J, J->base[i])); 615 {
610 J->base[0] = tr; 616 TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
617 uint32_t ot = IRTI(rd->data);
618 BCReg i;
619 for (i = 1; J->base[i] != 0; i++)
620 tr = emitir(ot, tr, lj_opt_narrow_tobit(J, J->base[i]));
621 J->base[0] = tr;
622 }
611} 623}
612 624
613/* Record bit shifts. */ 625/* Record bit shifts. */
614static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd) 626static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd)
615{ 627{
616 TRef tr = lj_opt_narrow_tobit(J, J->base[0]); 628#if LJ_HASFFI
617 TRef tsh = lj_opt_narrow_tobit(J, J->base[1]); 629 if (recff_bit64_shift(J, rd))
618 IROp op = (IROp)rd->data; 630 return;
619 if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && 631#endif
620 !tref_isk(tsh)) 632 {
621 tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31)); 633 TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
634 TRef tsh = lj_opt_narrow_tobit(J, J->base[1]);
635 IROp op = (IROp)rd->data;
636 if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
637 !tref_isk(tsh))
638 tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31));
622#ifdef LJ_TARGET_UNIFYROT 639#ifdef LJ_TARGET_UNIFYROT
623 if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) { 640 if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) {
624 op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR; 641 op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR;
625 tsh = emitir(IRTI(IR_NEG), tsh, tsh); 642 tsh = emitir(IRTI(IR_NEG), tsh, tsh);
626 } 643 }
627#endif 644#endif
628 J->base[0] = emitir(IRTI(op), tr, tsh); 645 J->base[0] = emitir(IRTI(op), tr, tsh);
646 }
629} 647}
630 648
631/* -- String library fast functions --------------------------------------- */ 649/* -- String library fast functions --------------------------------------- */
@@ -737,46 +755,6 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
737 755
738/* -- Table library fast functions ---------------------------------------- */ 756/* -- Table library fast functions ---------------------------------------- */
739 757
740static void LJ_FASTCALL recff_table_getn(jit_State *J, RecordFFData *rd)
741{
742 if (tref_istab(J->base[0]))
743 J->base[0] = lj_ir_call(J, IRCALL_lj_tab_len, J->base[0]);
744 /* else: Interpreter will throw. */
745 UNUSED(rd);
746}
747
748static void LJ_FASTCALL recff_table_remove(jit_State *J, RecordFFData *rd)
749{
750 TRef tab = J->base[0];
751 rd->nres = 0;
752 if (tref_istab(tab)) {
753 if (!J->base[1] || tref_isnil(J->base[1])) { /* Simple pop: t[#t] = nil */
754 TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, tab);
755 GCtab *t = tabV(&rd->argv[0]);
756 MSize len = lj_tab_len(t);
757 emitir(IRTGI(len ? IR_NE : IR_EQ), trlen, lj_ir_kint(J, 0));
758 if (len) {
759 RecordIndex ix;
760 ix.tab = tab;
761 ix.key = trlen;
762 settabV(J->L, &ix.tabv, t);
763 setintV(&ix.keyv, len);
764 ix.idxchain = 0;
765 if (results_wanted(J) != 0) { /* Specialize load only if needed. */
766 ix.val = 0;
767 J->base[0] = lj_record_idx(J, &ix); /* Load previous value. */
768 rd->nres = 1;
769 /* Assumes ix.key/ix.tab is not modified for raw lj_record_idx(). */
770 }
771 ix.val = TREF_NIL;
772 lj_record_idx(J, &ix); /* Remove value. */
773 }
774 } else { /* Complex case: remove in the middle. */
775 recff_nyiu(J);
776 }
777 } /* else: Interpreter will throw. */
778}
779
780static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd) 758static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd)
781{ 759{
782 RecordIndex ix; 760 RecordIndex ix;
diff --git a/src/lj_gc.c b/src/lj_gc.c
index 79f8b720..4ce6eb19 100644
--- a/src/lj_gc.c
+++ b/src/lj_gc.c
@@ -12,6 +12,7 @@
12#include "lj_obj.h" 12#include "lj_obj.h"
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_buf.h"
15#include "lj_str.h" 16#include "lj_str.h"
16#include "lj_tab.h" 17#include "lj_tab.h"
17#include "lj_func.h" 18#include "lj_func.h"
@@ -353,8 +354,7 @@ static void gc_shrink(global_State *g, lua_State *L)
353{ 354{
354 if (g->strnum <= (g->strmask >> 2) && g->strmask > LJ_MIN_STRTAB*2-1) 355 if (g->strnum <= (g->strmask >> 2) && g->strmask > LJ_MIN_STRTAB*2-1)
355 lj_str_resize(L, g->strmask >> 1); /* Shrink string table. */ 356 lj_str_resize(L, g->strmask >> 1); /* Shrink string table. */
356 if (g->tmpbuf.sz > LJ_MIN_SBUF*2) 357 lj_buf_shrink(L, &g->tmpbuf); /* Shrink temp buffer. */
357 lj_str_resizebuf(L, &g->tmpbuf, g->tmpbuf.sz >> 1); /* Shrink temp buf. */
358} 358}
359 359
360/* Type of GC free functions. */ 360/* Type of GC free functions. */
diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c
index 284195a1..334a906e 100644
--- a/src/lj_gdbjit.c
+++ b/src/lj_gdbjit.c
@@ -14,6 +14,7 @@
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_debug.h" 15#include "lj_debug.h"
16#include "lj_frame.h" 16#include "lj_frame.h"
17#include "lj_buf.h"
17#include "lj_jit.h" 18#include "lj_jit.h"
18#include "lj_dispatch.h" 19#include "lj_dispatch.h"
19 20
@@ -426,16 +427,6 @@ static void gdbjit_catnum(GDBJITctx *ctx, uint32_t n)
426 *ctx->p++ = '0' + n; 427 *ctx->p++ = '0' + n;
427} 428}
428 429
429/* Add a ULEB128 value. */
430static void gdbjit_uleb128(GDBJITctx *ctx, uint32_t v)
431{
432 uint8_t *p = ctx->p;
433 for (; v >= 0x80; v >>= 7)
434 *p++ = (uint8_t)((v & 0x7f) | 0x80);
435 *p++ = (uint8_t)v;
436 ctx->p = p;
437}
438
439/* Add a SLEB128 value. */ 430/* Add a SLEB128 value. */
440static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v) 431static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v)
441{ 432{
@@ -452,7 +443,7 @@ static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v)
452#define DU16(x) (*(uint16_t *)p = (x), p += 2) 443#define DU16(x) (*(uint16_t *)p = (x), p += 2)
453#define DU32(x) (*(uint32_t *)p = (x), p += 4) 444#define DU32(x) (*(uint32_t *)p = (x), p += 4)
454#define DADDR(x) (*(uintptr_t *)p = (x), p += sizeof(uintptr_t)) 445#define DADDR(x) (*(uintptr_t *)p = (x), p += sizeof(uintptr_t))
455#define DUV(x) (ctx->p = p, gdbjit_uleb128(ctx, (x)), p = ctx->p) 446#define DUV(x) (p = (uint8_t *)lj_buf_wuleb128((char *)p, (x)))
456#define DSV(x) (ctx->p = p, gdbjit_sleb128(ctx, (x)), p = ctx->p) 447#define DSV(x) (ctx->p = p, gdbjit_sleb128(ctx, (x)), p = ctx->p)
457#define DSTR(str) (ctx->p = p, gdbjit_strz(ctx, (str)), p = ctx->p) 448#define DSTR(str) (ctx->p = p, gdbjit_strz(ctx, (str)), p = ctx->p)
458#define DALIGNNOP(s) while ((uintptr_t)p & ((s)-1)) *p++ = DW_CFA_nop 449#define DALIGNNOP(s) while ((uintptr_t)p & ((s)-1)) *p++ = DW_CFA_nop
diff --git a/src/lj_ir.h b/src/lj_ir.h
index a9824325..9d2521c9 100644
--- a/src/lj_ir.h
+++ b/src/lj_ir.h
@@ -227,7 +227,6 @@ IRFLDEF(FLENUM)
227#define IRCONV_DSH 5 227#define IRCONV_DSH 5
228#define IRCONV_NUM_INT ((IRT_NUM<<IRCONV_DSH)|IRT_INT) 228#define IRCONV_NUM_INT ((IRT_NUM<<IRCONV_DSH)|IRT_INT)
229#define IRCONV_INT_NUM ((IRT_INT<<IRCONV_DSH)|IRT_NUM) 229#define IRCONV_INT_NUM ((IRT_INT<<IRCONV_DSH)|IRT_NUM)
230#define IRCONV_TRUNC 0x0400 /* Truncate number to integer. */
231#define IRCONV_SEXT 0x0800 /* Sign-extend integer to integer. */ 230#define IRCONV_SEXT 0x0800 /* Sign-extend integer to integer. */
232#define IRCONV_MODEMASK 0x0fff 231#define IRCONV_MODEMASK 0x0fff
233#define IRCONV_CONVMASK 0xf000 232#define IRCONV_CONVMASK 0xf000
diff --git a/src/lj_ircall.h b/src/lj_ircall.h
index 7fcc532e..2c160bdf 100644
--- a/src/lj_ircall.h
+++ b/src/lj_ircall.h
@@ -172,7 +172,12 @@ typedef struct CCallInfo {
172 _(FFI, memcpy, 3, S, PTR, 0) \ 172 _(FFI, memcpy, 3, S, PTR, 0) \
173 _(FFI, memset, 3, S, PTR, 0) \ 173 _(FFI, memset, 3, S, PTR, 0) \
174 _(FFI, lj_vm_errno, 0, S, INT, CCI_NOFPRCLOBBER) \ 174 _(FFI, lj_vm_errno, 0, S, INT, CCI_NOFPRCLOBBER) \
175 _(FFI32, lj_carith_mul64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) 175 _(FFI32, lj_carith_mul64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \
176 _(FFI32, lj_carith_shl64, 3, N, U64, CCI_NOFPRCLOBBER) \
177 _(FFI32, lj_carith_shr64, 3, N, U64, CCI_NOFPRCLOBBER) \
178 _(FFI32, lj_carith_sar64, 3, N, U64, CCI_NOFPRCLOBBER) \
179 _(FFI32, lj_carith_rol64, 3, N, U64, CCI_NOFPRCLOBBER) \
180 _(FFI32, lj_carith_ror64, 3, N, U64, CCI_NOFPRCLOBBER) \
176 \ 181 \
177 /* End of list. */ 182 /* End of list. */
178 183
diff --git a/src/lj_jit.h b/src/lj_jit.h
index c0b1c41e..2683b462 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -14,18 +14,15 @@
14 14
15/* CPU-specific JIT engine flags. */ 15/* CPU-specific JIT engine flags. */
16#if LJ_TARGET_X86ORX64 16#if LJ_TARGET_X86ORX64
17#define JIT_F_CMOV 0x00000010 17#define JIT_F_SSE2 0x00000010
18#define JIT_F_SSE2 0x00000020 18#define JIT_F_SSE3 0x00000020
19#define JIT_F_SSE3 0x00000040 19#define JIT_F_SSE4_1 0x00000040
20#define JIT_F_SSE4_1 0x00000080 20#define JIT_F_PREFER_IMUL 0x00000080
21#define JIT_F_P4 0x00000100 21#define JIT_F_LEA_AGU 0x00000100
22#define JIT_F_PREFER_IMUL 0x00000200
23#define JIT_F_SPLIT_XMM 0x00000400
24#define JIT_F_LEA_AGU 0x00000800
25 22
26/* Names for the CPU-specific flags. Must match the order above. */ 23/* Names for the CPU-specific flags. Must match the order above. */
27#define JIT_F_CPU_FIRST JIT_F_CMOV 24#define JIT_F_CPU_FIRST JIT_F_SSE2
28#define JIT_F_CPUSTRING "\4CMOV\4SSE2\4SSE3\6SSE4.1\2P4\3AMD\2K8\4ATOM" 25#define JIT_F_CPUSTRING "\4SSE2\4SSE3\6SSE4.1\3AMD\4ATOM"
29#elif LJ_TARGET_ARM 26#elif LJ_TARGET_ARM
30#define JIT_F_ARMV6_ 0x00000010 27#define JIT_F_ARMV6_ 0x00000010
31#define JIT_F_ARMV6T2_ 0x00000020 28#define JIT_F_ARMV6T2_ 0x00000020
diff --git a/src/lj_lex.c b/src/lj_lex.c
index 9f2b06f8..c988a6c1 100644
--- a/src/lj_lex.c
+++ b/src/lj_lex.c
@@ -12,6 +12,7 @@
12#include "lj_obj.h" 12#include "lj_obj.h"
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_buf.h"
15#include "lj_str.h" 16#include "lj_str.h"
16#if LJ_HASFFI 17#if LJ_HASFFI
17#include "lj_tab.h" 18#include "lj_tab.h"
@@ -37,50 +38,48 @@ TKDEF(TKSTR1, TKSTR2)
37 38
38/* -- Buffer handling ----------------------------------------------------- */ 39/* -- Buffer handling ----------------------------------------------------- */
39 40
40#define char2int(c) ((int)(uint8_t)(c)) 41#define LEX_EOF (-1)
41#define next(ls) \ 42#define lex_iseol(ls) (ls->c == '\n' || ls->c == '\r')
42 (ls->current = (ls->n--) > 0 ? char2int(*ls->p++) : fillbuf(ls))
43#define save_and_next(ls) (save(ls, ls->current), next(ls))
44#define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r')
45#define END_OF_STREAM (-1)
46 43
47static int fillbuf(LexState *ls) 44/* Get more input from reader. */
45static LJ_NOINLINE LexChar lex_more(LexState *ls)
48{ 46{
49 size_t sz; 47 size_t sz;
50 const char *buf = ls->rfunc(ls->L, ls->rdata, &sz); 48 const char *p = ls->rfunc(ls->L, ls->rdata, &sz);
51 if (buf == NULL || sz == 0) return END_OF_STREAM; 49 if (p == NULL || sz == 0) return LEX_EOF;
52 ls->n = (MSize)sz - 1; 50 ls->pe = p + sz;
53 ls->p = buf; 51 ls->p = p + 1;
54 return char2int(*(ls->p++)); 52 return (LexChar)(uint8_t)p[0];
55} 53}
56 54
57static LJ_NOINLINE void save_grow(LexState *ls, int c) 55/* Get next character. */
56static LJ_AINLINE LexChar lex_next(LexState *ls)
58{ 57{
59 MSize newsize; 58 return (ls->c = ls->p < ls->pe ? (LexChar)(uint8_t)*ls->p++ : lex_more(ls));
60 if (ls->sb.sz >= LJ_MAX_STR/2)
61 lj_lex_error(ls, 0, LJ_ERR_XELEM);
62 newsize = ls->sb.sz * 2;
63 lj_str_resizebuf(ls->L, &ls->sb, newsize);
64 ls->sb.buf[ls->sb.n++] = (char)c;
65} 59}
66 60
67static LJ_AINLINE void save(LexState *ls, int c) 61/* Save character. */
62static LJ_AINLINE void lex_save(LexState *ls, LexChar c)
68{ 63{
69 if (LJ_UNLIKELY(ls->sb.n + 1 > ls->sb.sz)) 64 lj_buf_putb(&ls->sb, c);
70 save_grow(ls, c); 65}
71 else 66
72 ls->sb.buf[ls->sb.n++] = (char)c; 67/* Save previous character and get next character. */
68static LJ_AINLINE LexChar lex_savenext(LexState *ls)
69{
70 lex_save(ls, ls->c);
71 return lex_next(ls);
73} 72}
74 73
75static void inclinenumber(LexState *ls) 74/* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */
75static void lex_newline(LexState *ls)
76{ 76{
77 int old = ls->current; 77 LexChar old = ls->c;
78 lua_assert(currIsNewline(ls)); 78 lua_assert(lex_iseol(ls));
79 next(ls); /* skip `\n' or `\r' */ 79 lex_next(ls); /* Skip "\n" or "\r". */
80 if (currIsNewline(ls) && ls->current != old) 80 if (lex_iseol(ls) && ls->c != old) lex_next(ls); /* Skip "\n\r" or "\r\n". */
81 next(ls); /* skip `\n\r' or `\r\n' */
82 if (++ls->linenumber >= LJ_MAX_LINE) 81 if (++ls->linenumber >= LJ_MAX_LINE)
83 lj_lex_error(ls, ls->token, LJ_ERR_XLINES); 82 lj_lex_error(ls, ls->tok, LJ_ERR_XLINES);
84} 83}
85 84
86/* -- Scanner for terminals ----------------------------------------------- */ 85/* -- Scanner for terminals ----------------------------------------------- */
@@ -89,19 +88,17 @@ static void inclinenumber(LexState *ls)
89static void lex_number(LexState *ls, TValue *tv) 88static void lex_number(LexState *ls, TValue *tv)
90{ 89{
91 StrScanFmt fmt; 90 StrScanFmt fmt;
92 int c, xp = 'e'; 91 LexChar c, xp = 'e';
93 lua_assert(lj_char_isdigit(ls->current)); 92 lua_assert(lj_char_isdigit(ls->c));
94 if ((c = ls->current) == '0') { 93 if ((c = ls->c) == '0' && (lex_savenext(ls) | 0x20) == 'x')
95 save_and_next(ls); 94 xp = 'p';
96 if ((ls->current | 0x20) == 'x') xp = 'p'; 95 while (lj_char_isident(ls->c) || ls->c == '.' ||
97 } 96 ((ls->c == '-' || ls->c == '+') && (c | 0x20) == xp)) {
98 while (lj_char_isident(ls->current) || ls->current == '.' || 97 c = ls->c;
99 ((ls->current == '-' || ls->current == '+') && (c | 0x20) == xp)) { 98 lex_savenext(ls);
100 c = ls->current;
101 save_and_next(ls);
102 } 99 }
103 save(ls, '\0'); 100 lex_save(ls, '\0');
104 fmt = lj_strscan_scan((const uint8_t *)ls->sb.buf, tv, 101 fmt = lj_strscan_scan((const uint8_t *)sbufB(&ls->sb), tv,
105 (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) | 102 (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) |
106 (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0)); 103 (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0));
107 if (LJ_DUALNUM && fmt == STRSCAN_INT) { 104 if (LJ_DUALNUM && fmt == STRSCAN_INT) {
@@ -134,60 +131,60 @@ static void lex_number(LexState *ls, TValue *tv)
134 } 131 }
135} 132}
136 133
137static int skip_sep(LexState *ls) 134/* Skip equal signs for "[=...=[" and "]=...=]" and return their count. */
135static int lex_skipeq(LexState *ls)
138{ 136{
139 int count = 0; 137 int count = 0;
140 int s = ls->current; 138 LexChar s = ls->c;
141 lua_assert(s == '[' || s == ']'); 139 lua_assert(s == '[' || s == ']');
142 save_and_next(ls); 140 while (lex_savenext(ls) == '=')
143 while (ls->current == '=') {
144 save_and_next(ls);
145 count++; 141 count++;
146 } 142 return (ls->c == s) ? count : (-count) - 1;
147 return (ls->current == s) ? count : (-count) - 1;
148} 143}
149 144
150static void read_long_string(LexState *ls, TValue *tv, int sep) 145/* Parse a long string or long comment (tv set to NULL). */
146static void lex_longstring(LexState *ls, TValue *tv, int sep)
151{ 147{
152 save_and_next(ls); /* skip 2nd `[' */ 148 lex_savenext(ls); /* Skip second '['. */
153 if (currIsNewline(ls)) /* string starts with a newline? */ 149 if (lex_iseol(ls)) /* Skip initial newline. */
154 inclinenumber(ls); /* skip it */ 150 lex_newline(ls);
155 for (;;) { 151 for (;;) {
156 switch (ls->current) { 152 switch (ls->c) {
157 case END_OF_STREAM: 153 case LEX_EOF:
158 lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM); 154 lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM);
159 break; 155 break;
160 case ']': 156 case ']':
161 if (skip_sep(ls) == sep) { 157 if (lex_skipeq(ls) == sep) {
162 save_and_next(ls); /* skip 2nd `]' */ 158 lex_savenext(ls); /* Skip second ']'. */
163 goto endloop; 159 goto endloop;
164 } 160 }
165 break; 161 break;
166 case '\n': 162 case '\n':
167 case '\r': 163 case '\r':
168 save(ls, '\n'); 164 lex_save(ls, '\n');
169 inclinenumber(ls); 165 lex_newline(ls);
170 if (!tv) lj_str_resetbuf(&ls->sb); /* avoid wasting space */ 166 if (!tv) lj_buf_reset(&ls->sb); /* Don't waste space for comments. */
171 break; 167 break;
172 default: 168 default:
173 if (tv) save_and_next(ls); 169 lex_savenext(ls);
174 else next(ls);
175 break; 170 break;
176 } 171 }
177 } endloop: 172 } endloop:
178 if (tv) { 173 if (tv) {
179 GCstr *str = lj_parse_keepstr(ls, ls->sb.buf + (2 + (MSize)sep), 174 GCstr *str = lj_parse_keepstr(ls, sbufB(&ls->sb) + (2 + (MSize)sep),
180 ls->sb.n - 2*(2 + (MSize)sep)); 175 sbuflen(&ls->sb) - 2*(2 + (MSize)sep));
181 setstrV(ls->L, tv, str); 176 setstrV(ls->L, tv, str);
182 } 177 }
183} 178}
184 179
185static void read_string(LexState *ls, int delim, TValue *tv) 180/* Parse a string. */
181static void lex_string(LexState *ls, TValue *tv)
186{ 182{
187 save_and_next(ls); 183 LexChar delim = ls->c; /* Delimiter is '\'' or '"'. */
188 while (ls->current != delim) { 184 lex_savenext(ls);
189 switch (ls->current) { 185 while (ls->c != delim) {
190 case END_OF_STREAM: 186 switch (ls->c) {
187 case LEX_EOF:
191 lj_lex_error(ls, TK_eof, LJ_ERR_XSTR); 188 lj_lex_error(ls, TK_eof, LJ_ERR_XSTR);
192 continue; 189 continue;
193 case '\n': 190 case '\n':
@@ -195,7 +192,7 @@ static void read_string(LexState *ls, int delim, TValue *tv)
195 lj_lex_error(ls, TK_string, LJ_ERR_XSTR); 192 lj_lex_error(ls, TK_string, LJ_ERR_XSTR);
196 continue; 193 continue;
197 case '\\': { 194 case '\\': {
198 int c = next(ls); /* Skip the '\\'. */ 195 LexChar c = lex_next(ls); /* Skip the '\\'. */
199 switch (c) { 196 switch (c) {
200 case 'a': c = '\a'; break; 197 case 'a': c = '\a'; break;
201 case 'b': c = '\b'; break; 198 case 'b': c = '\b'; break;
@@ -205,111 +202,112 @@ static void read_string(LexState *ls, int delim, TValue *tv)
205 case 't': c = '\t'; break; 202 case 't': c = '\t'; break;
206 case 'v': c = '\v'; break; 203 case 'v': c = '\v'; break;
207 case 'x': /* Hexadecimal escape '\xXX'. */ 204 case 'x': /* Hexadecimal escape '\xXX'. */
208 c = (next(ls) & 15u) << 4; 205 c = (lex_next(ls) & 15u) << 4;
209 if (!lj_char_isdigit(ls->current)) { 206 if (!lj_char_isdigit(ls->c)) {
210 if (!lj_char_isxdigit(ls->current)) goto err_xesc; 207 if (!lj_char_isxdigit(ls->c)) goto err_xesc;
211 c += 9 << 4; 208 c += 9 << 4;
212 } 209 }
213 c += (next(ls) & 15u); 210 c += (lex_next(ls) & 15u);
214 if (!lj_char_isdigit(ls->current)) { 211 if (!lj_char_isdigit(ls->c)) {
215 if (!lj_char_isxdigit(ls->current)) goto err_xesc; 212 if (!lj_char_isxdigit(ls->c)) goto err_xesc;
216 c += 9; 213 c += 9;
217 } 214 }
218 break; 215 break;
219 case 'z': /* Skip whitespace. */ 216 case 'z': /* Skip whitespace. */
220 next(ls); 217 lex_next(ls);
221 while (lj_char_isspace(ls->current)) 218 while (lj_char_isspace(ls->c))
222 if (currIsNewline(ls)) inclinenumber(ls); else next(ls); 219 if (lex_iseol(ls)) lex_newline(ls); else lex_next(ls);
223 continue; 220 continue;
224 case '\n': case '\r': save(ls, '\n'); inclinenumber(ls); continue; 221 case '\n': case '\r': lex_save(ls, '\n'); lex_newline(ls); continue;
225 case '\\': case '\"': case '\'': break; 222 case '\\': case '\"': case '\'': break;
226 case END_OF_STREAM: continue; 223 case LEX_EOF: continue;
227 default: 224 default:
228 if (!lj_char_isdigit(c)) 225 if (!lj_char_isdigit(c))
229 goto err_xesc; 226 goto err_xesc;
230 c -= '0'; /* Decimal escape '\ddd'. */ 227 c -= '0'; /* Decimal escape '\ddd'. */
231 if (lj_char_isdigit(next(ls))) { 228 if (lj_char_isdigit(lex_next(ls))) {
232 c = c*10 + (ls->current - '0'); 229 c = c*10 + (ls->c - '0');
233 if (lj_char_isdigit(next(ls))) { 230 if (lj_char_isdigit(lex_next(ls))) {
234 c = c*10 + (ls->current - '0'); 231 c = c*10 + (ls->c - '0');
235 if (c > 255) { 232 if (c > 255) {
236 err_xesc: 233 err_xesc:
237 lj_lex_error(ls, TK_string, LJ_ERR_XESC); 234 lj_lex_error(ls, TK_string, LJ_ERR_XESC);
238 } 235 }
239 next(ls); 236 lex_next(ls);
240 } 237 }
241 } 238 }
242 save(ls, c); 239 lex_save(ls, c);
243 continue; 240 continue;
244 } 241 }
245 save(ls, c); 242 lex_save(ls, c);
246 next(ls); 243 lex_next(ls);
247 continue; 244 continue;
248 } 245 }
249 default: 246 default:
250 save_and_next(ls); 247 lex_savenext(ls);
251 break; 248 break;
252 } 249 }
253 } 250 }
254 save_and_next(ls); /* skip delimiter */ 251 lex_savenext(ls); /* Skip trailing delimiter. */
255 setstrV(ls->L, tv, lj_parse_keepstr(ls, ls->sb.buf + 1, ls->sb.n - 2)); 252 setstrV(ls->L, tv,
253 lj_parse_keepstr(ls, sbufB(&ls->sb)+1, sbuflen(&ls->sb)-2));
256} 254}
257 255
258/* -- Main lexical scanner ------------------------------------------------ */ 256/* -- Main lexical scanner ------------------------------------------------ */
259 257
260static int llex(LexState *ls, TValue *tv) 258/* Get next lexical token. */
259static LexToken lex_scan(LexState *ls, TValue *tv)
261{ 260{
262 lj_str_resetbuf(&ls->sb); 261 lj_buf_reset(&ls->sb);
263 for (;;) { 262 for (;;) {
264 if (lj_char_isident(ls->current)) { 263 if (lj_char_isident(ls->c)) {
265 GCstr *s; 264 GCstr *s;
266 if (lj_char_isdigit(ls->current)) { /* Numeric literal. */ 265 if (lj_char_isdigit(ls->c)) { /* Numeric literal. */
267 lex_number(ls, tv); 266 lex_number(ls, tv);
268 return TK_number; 267 return TK_number;
269 } 268 }
270 /* Identifier or reserved word. */ 269 /* Identifier or reserved word. */
271 do { 270 do {
272 save_and_next(ls); 271 lex_savenext(ls);
273 } while (lj_char_isident(ls->current)); 272 } while (lj_char_isident(ls->c));
274 s = lj_parse_keepstr(ls, ls->sb.buf, ls->sb.n); 273 s = lj_parse_keepstr(ls, sbufB(&ls->sb), sbuflen(&ls->sb));
275 setstrV(ls->L, tv, s); 274 setstrV(ls->L, tv, s);
276 if (s->reserved > 0) /* Reserved word? */ 275 if (s->reserved > 0) /* Reserved word? */
277 return TK_OFS + s->reserved; 276 return TK_OFS + s->reserved;
278 return TK_name; 277 return TK_name;
279 } 278 }
280 switch (ls->current) { 279 switch (ls->c) {
281 case '\n': 280 case '\n':
282 case '\r': 281 case '\r':
283 inclinenumber(ls); 282 lex_newline(ls);
284 continue; 283 continue;
285 case ' ': 284 case ' ':
286 case '\t': 285 case '\t':
287 case '\v': 286 case '\v':
288 case '\f': 287 case '\f':
289 next(ls); 288 lex_next(ls);
290 continue; 289 continue;
291 case '-': 290 case '-':
292 next(ls); 291 lex_next(ls);
293 if (ls->current != '-') return '-'; 292 if (ls->c != '-') return '-';
294 /* else is a comment */ 293 lex_next(ls);
295 next(ls); 294 if (ls->c == '[') { /* Long comment "--[=*[...]=*]". */
296 if (ls->current == '[') { 295 int sep = lex_skipeq(ls);
297 int sep = skip_sep(ls); 296 lj_buf_reset(&ls->sb); /* `lex_skipeq' may dirty the buffer */
298 lj_str_resetbuf(&ls->sb); /* `skip_sep' may dirty the buffer */
299 if (sep >= 0) { 297 if (sep >= 0) {
300 read_long_string(ls, NULL, sep); /* long comment */ 298 lex_longstring(ls, NULL, sep);
301 lj_str_resetbuf(&ls->sb); 299 lj_buf_reset(&ls->sb);
302 continue; 300 continue;
303 } 301 }
304 } 302 }
305 /* else short comment */ 303 /* Short comment "--.*\n". */
306 while (!currIsNewline(ls) && ls->current != END_OF_STREAM) 304 while (!lex_iseol(ls) && ls->c != LEX_EOF)
307 next(ls); 305 lex_next(ls);
308 continue; 306 continue;
309 case '[': { 307 case '[': {
310 int sep = skip_sep(ls); 308 int sep = lex_skipeq(ls);
311 if (sep >= 0) { 309 if (sep >= 0) {
312 read_long_string(ls, tv, sep); 310 lex_longstring(ls, tv, sep);
313 return TK_string; 311 return TK_string;
314 } else if (sep == -1) { 312 } else if (sep == -1) {
315 return '['; 313 return '[';
@@ -319,44 +317,43 @@ static int llex(LexState *ls, TValue *tv)
319 } 317 }
320 } 318 }
321 case '=': 319 case '=':
322 next(ls); 320 lex_next(ls);
323 if (ls->current != '=') return '='; else { next(ls); return TK_eq; } 321 if (ls->c != '=') return '='; else { lex_next(ls); return TK_eq; }
324 case '<': 322 case '<':
325 next(ls); 323 lex_next(ls);
326 if (ls->current != '=') return '<'; else { next(ls); return TK_le; } 324 if (ls->c != '=') return '<'; else { lex_next(ls); return TK_le; }
327 case '>': 325 case '>':
328 next(ls); 326 lex_next(ls);
329 if (ls->current != '=') return '>'; else { next(ls); return TK_ge; } 327 if (ls->c != '=') return '>'; else { lex_next(ls); return TK_ge; }
330 case '~': 328 case '~':
331 next(ls); 329 lex_next(ls);
332 if (ls->current != '=') return '~'; else { next(ls); return TK_ne; } 330 if (ls->c != '=') return '~'; else { lex_next(ls); return TK_ne; }
333 case ':': 331 case ':':
334 next(ls); 332 lex_next(ls);
335 if (ls->current != ':') return ':'; else { next(ls); return TK_label; } 333 if (ls->c != ':') return ':'; else { lex_next(ls); return TK_label; }
336 case '"': 334 case '"':
337 case '\'': 335 case '\'':
338 read_string(ls, ls->current, tv); 336 lex_string(ls, tv);
339 return TK_string; 337 return TK_string;
340 case '.': 338 case '.':
341 save_and_next(ls); 339 if (lex_savenext(ls) == '.') {
342 if (ls->current == '.') { 340 lex_next(ls);
343 next(ls); 341 if (ls->c == '.') {
344 if (ls->current == '.') { 342 lex_next(ls);
345 next(ls);
346 return TK_dots; /* ... */ 343 return TK_dots; /* ... */
347 } 344 }
348 return TK_concat; /* .. */ 345 return TK_concat; /* .. */
349 } else if (!lj_char_isdigit(ls->current)) { 346 } else if (!lj_char_isdigit(ls->c)) {
350 return '.'; 347 return '.';
351 } else { 348 } else {
352 lex_number(ls, tv); 349 lex_number(ls, tv);
353 return TK_number; 350 return TK_number;
354 } 351 }
355 case END_OF_STREAM: 352 case LEX_EOF:
356 return TK_eof; 353 return TK_eof;
357 default: { 354 default: {
358 int c = ls->current; 355 LexChar c = ls->c;
359 next(ls); 356 lex_next(ls);
360 return c; /* Single-char tokens (+ - / ...). */ 357 return c; /* Single-char tokens (+ - / ...). */
361 } 358 }
362 } 359 }
@@ -371,8 +368,7 @@ int lj_lex_setup(lua_State *L, LexState *ls)
371 int header = 0; 368 int header = 0;
372 ls->L = L; 369 ls->L = L;
373 ls->fs = NULL; 370 ls->fs = NULL;
374 ls->n = 0; 371 ls->pe = ls->p = NULL;
375 ls->p = NULL;
376 ls->vstack = NULL; 372 ls->vstack = NULL;
377 ls->sizevstack = 0; 373 ls->sizevstack = 0;
378 ls->vtop = 0; 374 ls->vtop = 0;
@@ -381,24 +377,22 @@ int lj_lex_setup(lua_State *L, LexState *ls)
381 ls->lookahead = TK_eof; /* No look-ahead token. */ 377 ls->lookahead = TK_eof; /* No look-ahead token. */
382 ls->linenumber = 1; 378 ls->linenumber = 1;
383 ls->lastline = 1; 379 ls->lastline = 1;
384 lj_str_resizebuf(ls->L, &ls->sb, LJ_MIN_SBUF); 380 lex_next(ls); /* Read-ahead first char. */
385 next(ls); /* Read-ahead first char. */ 381 if (ls->c == 0xef && ls->p + 2 <= ls->pe && (uint8_t)ls->p[0] == 0xbb &&
386 if (ls->current == 0xef && ls->n >= 2 && char2int(ls->p[0]) == 0xbb && 382 (uint8_t)ls->p[1] == 0xbf) { /* Skip UTF-8 BOM (if buffered). */
387 char2int(ls->p[1]) == 0xbf) { /* Skip UTF-8 BOM (if buffered). */
388 ls->n -= 2;
389 ls->p += 2; 383 ls->p += 2;
390 next(ls); 384 lex_next(ls);
391 header = 1; 385 header = 1;
392 } 386 }
393 if (ls->current == '#') { /* Skip POSIX #! header line. */ 387 if (ls->c == '#') { /* Skip POSIX #! header line. */
394 do { 388 do {
395 next(ls); 389 lex_next(ls);
396 if (ls->current == END_OF_STREAM) return 0; 390 if (ls->c == LEX_EOF) return 0;
397 } while (!currIsNewline(ls)); 391 } while (!lex_iseol(ls));
398 inclinenumber(ls); 392 lex_newline(ls);
399 header = 1; 393 header = 1;
400 } 394 }
401 if (ls->current == LUA_SIGNATURE[0]) { /* Bytecode dump. */ 395 if (ls->c == LUA_SIGNATURE[0]) { /* Bytecode dump. */
402 if (header) { 396 if (header) {
403 /* 397 /*
404 ** Loading bytecode with an extra header is disabled for security 398 ** Loading bytecode with an extra header is disabled for security
@@ -420,55 +414,60 @@ void lj_lex_cleanup(lua_State *L, LexState *ls)
420 global_State *g = G(L); 414 global_State *g = G(L);
421 lj_mem_freevec(g, ls->bcstack, ls->sizebcstack, BCInsLine); 415 lj_mem_freevec(g, ls->bcstack, ls->sizebcstack, BCInsLine);
422 lj_mem_freevec(g, ls->vstack, ls->sizevstack, VarInfo); 416 lj_mem_freevec(g, ls->vstack, ls->sizevstack, VarInfo);
423 lj_str_freebuf(g, &ls->sb); 417 lj_buf_free(g, &ls->sb);
424} 418}
425 419
420/* Return next lexical token. */
426void lj_lex_next(LexState *ls) 421void lj_lex_next(LexState *ls)
427{ 422{
428 ls->lastline = ls->linenumber; 423 ls->lastline = ls->linenumber;
429 if (LJ_LIKELY(ls->lookahead == TK_eof)) { /* No lookahead token? */ 424 if (LJ_LIKELY(ls->lookahead == TK_eof)) { /* No lookahead token? */
430 ls->token = llex(ls, &ls->tokenval); /* Get next token. */ 425 ls->tok = lex_scan(ls, &ls->tokval); /* Get next token. */
431 } else { /* Otherwise return lookahead token. */ 426 } else { /* Otherwise return lookahead token. */
432 ls->token = ls->lookahead; 427 ls->tok = ls->lookahead;
433 ls->lookahead = TK_eof; 428 ls->lookahead = TK_eof;
434 ls->tokenval = ls->lookaheadval; 429 ls->tokval = ls->lookaheadval;
435 } 430 }
436} 431}
437 432
433/* Look ahead for the next token. */
438LexToken lj_lex_lookahead(LexState *ls) 434LexToken lj_lex_lookahead(LexState *ls)
439{ 435{
440 lua_assert(ls->lookahead == TK_eof); 436 lua_assert(ls->lookahead == TK_eof);
441 ls->lookahead = llex(ls, &ls->lookaheadval); 437 ls->lookahead = lex_scan(ls, &ls->lookaheadval);
442 return ls->lookahead; 438 return ls->lookahead;
443} 439}
444 440
445const char *lj_lex_token2str(LexState *ls, LexToken token) 441/* Convert token to string. */
442const char *lj_lex_token2str(LexState *ls, LexToken tok)
446{ 443{
447 if (token > TK_OFS) 444 if (tok > TK_OFS)
448 return tokennames[token-TK_OFS-1]; 445 return tokennames[tok-TK_OFS-1];
449 else if (!lj_char_iscntrl(token)) 446 else if (!lj_char_iscntrl(tok))
450 return lj_str_pushf(ls->L, "%c", token); 447 return lj_str_pushf(ls->L, "%c", tok);
451 else 448 else
452 return lj_str_pushf(ls->L, "char(%d)", token); 449 return lj_str_pushf(ls->L, "char(%d)", tok);
453} 450}
454 451
455void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...) 452/* Lexer error. */
453void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...)
456{ 454{
457 const char *tok; 455 const char *tokstr;
458 va_list argp; 456 va_list argp;
459 if (token == 0) { 457 if (tok == 0) {
460 tok = NULL; 458 tokstr = NULL;
461 } else if (token == TK_name || token == TK_string || token == TK_number) { 459 } else if (tok == TK_name || tok == TK_string || tok == TK_number) {
462 save(ls, '\0'); 460 lex_save(ls, '\0');
463 tok = ls->sb.buf; 461 tokstr = sbufB(&ls->sb);
464 } else { 462 } else {
465 tok = lj_lex_token2str(ls, token); 463 tokstr = lj_lex_token2str(ls, tok);
466 } 464 }
467 va_start(argp, em); 465 va_start(argp, em);
468 lj_err_lex(ls->L, ls->chunkname, tok, ls->linenumber, em, argp); 466 lj_err_lex(ls->L, ls->chunkname, tokstr, ls->linenumber, em, argp);
469 va_end(argp); 467 va_end(argp);
470} 468}
471 469
470/* Initialize strings for reserved words. */
472void lj_lex_init(lua_State *L) 471void lj_lex_init(lua_State *L)
473{ 472{
474 uint32_t i; 473 uint32_t i;
diff --git a/src/lj_lex.h b/src/lj_lex.h
index 6e18e4b0..3e76e72a 100644
--- a/src/lj_lex.h
+++ b/src/lj_lex.h
@@ -30,7 +30,8 @@ TKDEF(TKENUM1, TKENUM2)
30 TK_RESERVED = TK_while - TK_OFS 30 TK_RESERVED = TK_while - TK_OFS
31}; 31};
32 32
33typedef int LexToken; 33typedef int LexChar; /* Lexical character. Unsigned ext. from char. */
34typedef int LexToken; /* Lexical token. */
34 35
35/* Combined bytecode ins/line. Only used during bytecode generation. */ 36/* Combined bytecode ins/line. Only used during bytecode generation. */
36typedef struct BCInsLine { 37typedef struct BCInsLine {
@@ -51,13 +52,13 @@ typedef struct VarInfo {
51typedef struct LexState { 52typedef struct LexState {
52 struct FuncState *fs; /* Current FuncState. Defined in lj_parse.c. */ 53 struct FuncState *fs; /* Current FuncState. Defined in lj_parse.c. */
53 struct lua_State *L; /* Lua state. */ 54 struct lua_State *L; /* Lua state. */
54 TValue tokenval; /* Current token value. */ 55 TValue tokval; /* Current token value. */
55 TValue lookaheadval; /* Lookahead token value. */ 56 TValue lookaheadval; /* Lookahead token value. */
56 int current; /* Current character (charint). */
57 LexToken token; /* Current token. */
58 LexToken lookahead; /* Lookahead token. */
59 MSize n; /* Bytes left in input buffer. */
60 const char *p; /* Current position in input buffer. */ 57 const char *p; /* Current position in input buffer. */
58 const char *pe; /* End of input buffer. */
59 LexChar c; /* Current character. */
60 LexToken tok; /* Current token. */
61 LexToken lookahead; /* Lookahead token. */
61 SBuf sb; /* String buffer for tokens. */ 62 SBuf sb; /* String buffer for tokens. */
62 lua_Reader rfunc; /* Reader callback. */ 63 lua_Reader rfunc; /* Reader callback. */
63 void *rdata; /* Reader callback data. */ 64 void *rdata; /* Reader callback data. */
@@ -78,8 +79,8 @@ LJ_FUNC int lj_lex_setup(lua_State *L, LexState *ls);
78LJ_FUNC void lj_lex_cleanup(lua_State *L, LexState *ls); 79LJ_FUNC void lj_lex_cleanup(lua_State *L, LexState *ls);
79LJ_FUNC void lj_lex_next(LexState *ls); 80LJ_FUNC void lj_lex_next(LexState *ls);
80LJ_FUNC LexToken lj_lex_lookahead(LexState *ls); 81LJ_FUNC LexToken lj_lex_lookahead(LexState *ls);
81LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken token); 82LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken tok);
82LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...); 83LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...);
83LJ_FUNC void lj_lex_init(lua_State *L); 84LJ_FUNC void lj_lex_init(lua_State *L);
84 85
85#endif 86#endif
diff --git a/src/lj_lib.c b/src/lj_lib.c
index 331eaa6a..b6aa97a0 100644
--- a/src/lj_lib.c
+++ b/src/lj_lib.c
@@ -18,6 +18,8 @@
18#include "lj_dispatch.h" 18#include "lj_dispatch.h"
19#include "lj_vm.h" 19#include "lj_vm.h"
20#include "lj_strscan.h" 20#include "lj_strscan.h"
21#include "lj_lex.h"
22#include "lj_bcdump.h"
21#include "lj_lib.h" 23#include "lj_lib.h"
22 24
23/* -- Library initialization ---------------------------------------------- */ 25/* -- Library initialization ---------------------------------------------- */
@@ -43,6 +45,28 @@ static GCtab *lib_create_table(lua_State *L, const char *libname, int hsize)
43 return tabV(L->top-1); 45 return tabV(L->top-1);
44} 46}
45 47
48static const uint8_t *lib_read_lfunc(lua_State *L, const uint8_t *p, GCtab *tab)
49{
50 int len = *p++;
51 GCstr *name = lj_str_new(L, (const char *)p, len);
52 LexState ls;
53 GCproto *pt;
54 GCfunc *fn;
55 memset(&ls, 0, sizeof(ls));
56 ls.L = L;
57 ls.p = (const char *)(p+len);
58 ls.pe = (const char *)~(uintptr_t)0;
59 ls.c = -1;
60 ls.level = (BCDUMP_F_STRIP|(LJ_BE*BCDUMP_F_BE));
61 ls.chunkname = name;
62 pt = lj_bcread_proto(&ls);
63 pt->firstline = ~(BCLine)0;
64 fn = lj_func_newL_empty(L, pt, tabref(L->env));
65 /* NOBARRIER: See below for common barrier. */
66 setfuncV(L, lj_tab_setstr(L, tab, name), fn);
67 return (const uint8_t *)ls.p;
68}
69
46void lj_lib_register(lua_State *L, const char *libname, 70void lj_lib_register(lua_State *L, const char *libname,
47 const uint8_t *p, const lua_CFunction *cf) 71 const uint8_t *p, const lua_CFunction *cf)
48{ 72{
@@ -87,6 +111,9 @@ void lj_lib_register(lua_State *L, const char *libname,
87 ofn = fn; 111 ofn = fn;
88 } else { 112 } else {
89 switch (tag | len) { 113 switch (tag | len) {
114 case LIBINIT_LUA:
115 p = lib_read_lfunc(L, p, tab);
116 break;
90 case LIBINIT_SET: 117 case LIBINIT_SET:
91 L->top -= 2; 118 L->top -= 2;
92 if (tvisstr(L->top+1) && strV(L->top+1)->len == 0) 119 if (tvisstr(L->top+1) && strV(L->top+1)->len == 0)
diff --git a/src/lj_lib.h b/src/lj_lib.h
index 2fe6d2a8..05f90de5 100644
--- a/src/lj_lib.h
+++ b/src/lj_lib.h
@@ -77,6 +77,7 @@ static LJ_AINLINE void lj_lib_pushcc(lua_State *L, lua_CFunction f,
77#define LJLIB_CF(name) static int lj_cf_##name(lua_State *L) 77#define LJLIB_CF(name) static int lj_cf_##name(lua_State *L)
78#define LJLIB_ASM(name) static int lj_ffh_##name(lua_State *L) 78#define LJLIB_ASM(name) static int lj_ffh_##name(lua_State *L)
79#define LJLIB_ASM_(name) 79#define LJLIB_ASM_(name)
80#define LJLIB_LUA(name)
80#define LJLIB_SET(name) 81#define LJLIB_SET(name)
81#define LJLIB_PUSH(arg) 82#define LJLIB_PUSH(arg)
82#define LJLIB_REC(handler) 83#define LJLIB_REC(handler)
@@ -96,7 +97,8 @@ LJ_FUNC void lj_lib_register(lua_State *L, const char *libname,
96#define LIBINIT_ASM 0x40 97#define LIBINIT_ASM 0x40
97#define LIBINIT_ASM_ 0x80 98#define LIBINIT_ASM_ 0x80
98#define LIBINIT_STRING 0xc0 99#define LIBINIT_STRING 0xc0
99#define LIBINIT_MAXSTR 0x39 100#define LIBINIT_MAXSTR 0x38
101#define LIBINIT_LUA 0xf9
100#define LIBINIT_SET 0xfa 102#define LIBINIT_SET 0xfa
101#define LIBINIT_NUMBER 0xfb 103#define LIBINIT_NUMBER 0xfb
102#define LIBINIT_COPY 0xfc 104#define LIBINIT_COPY 0xfc
diff --git a/src/lj_load.c b/src/lj_load.c
index 9d892678..b5cbb3ba 100644
--- a/src/lj_load.c
+++ b/src/lj_load.c
@@ -15,6 +15,7 @@
15#include "lj_obj.h" 15#include "lj_obj.h"
16#include "lj_gc.h" 16#include "lj_gc.h"
17#include "lj_err.h" 17#include "lj_err.h"
18#include "lj_buf.h"
18#include "lj_str.h" 19#include "lj_str.h"
19#include "lj_func.h" 20#include "lj_func.h"
20#include "lj_frame.h" 21#include "lj_frame.h"
@@ -54,7 +55,7 @@ LUA_API int lua_loadx(lua_State *L, lua_Reader reader, void *data,
54 ls.rdata = data; 55 ls.rdata = data;
55 ls.chunkarg = chunkname ? chunkname : "?"; 56 ls.chunkarg = chunkname ? chunkname : "?";
56 ls.mode = mode; 57 ls.mode = mode;
57 lj_str_initbuf(&ls.sb); 58 lj_buf_init(L, &ls.sb);
58 status = lj_vm_cpcall(L, NULL, &ls, cpparser); 59 status = lj_vm_cpcall(L, NULL, &ls, cpparser);
59 lj_lex_cleanup(L, &ls); 60 lj_lex_cleanup(L, &ls);
60 lj_gc_check(L); 61 lj_gc_check(L);
diff --git a/src/lj_meta.c b/src/lj_meta.c
index 441d571a..db1ce928 100644
--- a/src/lj_meta.c
+++ b/src/lj_meta.c
@@ -12,6 +12,7 @@
12#include "lj_obj.h" 12#include "lj_obj.h"
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_buf.h"
15#include "lj_str.h" 16#include "lj_str.h"
16#include "lj_tab.h" 17#include "lj_tab.h"
17#include "lj_meta.h" 18#include "lj_meta.h"
@@ -19,6 +20,7 @@
19#include "lj_bc.h" 20#include "lj_bc.h"
20#include "lj_vm.h" 21#include "lj_vm.h"
21#include "lj_strscan.h" 22#include "lj_strscan.h"
23#include "lj_lib.h"
22 24
23/* -- Metamethod handling ------------------------------------------------- */ 25/* -- Metamethod handling ------------------------------------------------- */
24 26
@@ -282,7 +284,7 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
282 ** next step: [...][CAT stack ............] 284 ** next step: [...][CAT stack ............]
283 */ 285 */
284 MSize tlen = strV(top)->len; 286 MSize tlen = strV(top)->len;
285 char *buffer; 287 char *buf;
286 int i; 288 int i;
287 for (n = 1; n <= left && tostring(L, top-n); n++) { 289 for (n = 1; n <= left && tostring(L, top-n); n++) {
288 MSize len = strV(top-n)->len; 290 MSize len = strV(top-n)->len;
@@ -290,15 +292,15 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
290 lj_err_msg(L, LJ_ERR_STROV); 292 lj_err_msg(L, LJ_ERR_STROV);
291 tlen += len; 293 tlen += len;
292 } 294 }
293 buffer = lj_str_needbuf(L, &G(L)->tmpbuf, tlen); 295 buf = lj_buf_tmp(L, tlen);
294 n--; 296 n--;
295 tlen = 0; 297 tlen = 0;
296 for (i = n; i >= 0; i--) { 298 for (i = n; i >= 0; i--) {
297 MSize len = strV(top-i)->len; 299 MSize len = strV(top-i)->len;
298 memcpy(buffer + tlen, strVdata(top-i), len); 300 memcpy(buf + tlen, strVdata(top-i), len);
299 tlen += len; 301 tlen += len;
300 } 302 }
301 setstrV(L, top-n, lj_str_new(L, buffer, tlen)); 303 setstrV(L, top-n, lj_str_new(L, buf, tlen));
302 } 304 }
303 left -= n; 305 left -= n;
304 top -= n; 306 top -= n;
@@ -423,6 +425,18 @@ TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op)
423 } 425 }
424} 426}
425 427
428/* Helper for ISTYPE and ISNUM. Implicit coercion or error. */
429void lj_meta_istype(lua_State *L, BCReg ra, BCReg tp)
430{
431 L->top = curr_topL(L);
432 ra++; tp--;
433 lua_assert(LJ_DUALNUM || tp != ~LJ_TNUMX); /* ISTYPE -> ISNUM broken. */
434 if (LJ_DUALNUM && tp == ~LJ_TNUMX) lj_lib_checkint(L, ra);
435 else if (tp == ~LJ_TNUMX+1) lj_lib_checknum(L, ra);
436 else if (tp == ~LJ_TSTR) lj_lib_checkstr(L, ra);
437 else lj_err_argtype(L, ra, lj_obj_itypename[tp]);
438}
439
426/* Helper for calls. __call metamethod. */ 440/* Helper for calls. __call metamethod. */
427void lj_meta_call(lua_State *L, TValue *func, TValue *top) 441void lj_meta_call(lua_State *L, TValue *func, TValue *top)
428{ 442{
diff --git a/src/lj_meta.h b/src/lj_meta.h
index 6af5e514..970398ec 100644
--- a/src/lj_meta.h
+++ b/src/lj_meta.h
@@ -31,6 +31,7 @@ LJ_FUNCA TValue * LJ_FASTCALL lj_meta_len(lua_State *L, cTValue *o);
31LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne); 31LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne);
32LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins); 32LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins);
33LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op); 33LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op);
34LJ_FUNCA void lj_meta_istype(lua_State *L, BCReg ra, BCReg tp);
34LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top); 35LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top);
35LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o); 36LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o);
36 37
diff --git a/src/lj_obj.h b/src/lj_obj.h
index b967819d..31104429 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -119,11 +119,12 @@ typedef int32_t BCLine; /* Bytecode line number. */
119/* Internal assembler functions. Never call these directly from C. */ 119/* Internal assembler functions. Never call these directly from C. */
120typedef void (*ASMFunction)(void); 120typedef void (*ASMFunction)(void);
121 121
122/* Resizable string buffer. Need this here, details in lj_str.h. */ 122/* Resizable string buffer. Need this here, details in lj_buf.h. */
123typedef struct SBuf { 123typedef struct SBuf {
124 char *buf; /* String buffer base. */ 124 MRef p; /* String buffer pointer. */
125 MSize n; /* String buffer length. */ 125 MRef e; /* String buffer end pointer. */
126 MSize sz; /* String buffer size. */ 126 MRef b; /* String buffer base. */
127 MRef L; /* lua_State, used for buffer resizing. */
127} SBuf; 128} SBuf;
128 129
129/* -- Tags and values ----------------------------------------------------- */ 130/* -- Tags and values ----------------------------------------------------- */
@@ -516,8 +517,8 @@ typedef struct global_State {
516 lua_Alloc allocf; /* Memory allocator. */ 517 lua_Alloc allocf; /* Memory allocator. */
517 void *allocd; /* Memory allocator data. */ 518 void *allocd; /* Memory allocator data. */
518 GCState gc; /* Garbage collector. */ 519 GCState gc; /* Garbage collector. */
519 SBuf tmpbuf; /* Temporary buffer for string concatenation. */ 520 volatile int32_t vmstate; /* VM state or current JIT code trace number. */
520 Node nilnode; /* Fallback 1-element hash part (nil key and value). */ 521 SBuf tmpbuf; /* Temporary string buffer. */
521 GCstr strempty; /* Empty string. */ 522 GCstr strempty; /* Empty string. */
522 uint8_t stremptyz; /* Zero terminator of empty string. */ 523 uint8_t stremptyz; /* Zero terminator of empty string. */
523 uint8_t hookmask; /* Hook mask. */ 524 uint8_t hookmask; /* Hook mask. */
@@ -526,13 +527,13 @@ typedef struct global_State {
526 GCRef mainthref; /* Link to main thread. */ 527 GCRef mainthref; /* Link to main thread. */
527 TValue registrytv; /* Anchor for registry. */ 528 TValue registrytv; /* Anchor for registry. */
528 TValue tmptv, tmptv2; /* Temporary TValues. */ 529 TValue tmptv, tmptv2; /* Temporary TValues. */
530 Node nilnode; /* Fallback 1-element hash part (nil key and value). */
529 GCupval uvhead; /* Head of double-linked list of all open upvalues. */ 531 GCupval uvhead; /* Head of double-linked list of all open upvalues. */
530 int32_t hookcount; /* Instruction hook countdown. */ 532 int32_t hookcount; /* Instruction hook countdown. */
531 int32_t hookcstart; /* Start count for instruction hook counter. */ 533 int32_t hookcstart; /* Start count for instruction hook counter. */
532 lua_Hook hookf; /* Hook function. */ 534 lua_Hook hookf; /* Hook function. */
533 lua_CFunction wrapf; /* Wrapper for C function calls. */ 535 lua_CFunction wrapf; /* Wrapper for C function calls. */
534 lua_CFunction panic; /* Called as a last resort for errors. */ 536 lua_CFunction panic; /* Called as a last resort for errors. */
535 volatile int32_t vmstate; /* VM state or current JIT code trace number. */
536 BCIns bc_cfunc_int; /* Bytecode for internal C function calls. */ 537 BCIns bc_cfunc_int; /* Bytecode for internal C function calls. */
537 BCIns bc_cfunc_ext; /* Bytecode for external C function calls. */ 538 BCIns bc_cfunc_ext; /* Bytecode for external C function calls. */
538 GCRef jit_L; /* Current JIT code lua_State or NULL. */ 539 GCRef jit_L; /* Current JIT code lua_State or NULL. */
@@ -810,11 +811,7 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
810#endif 811#endif
811} 812}
812 813
813#if LJ_TARGET_X86 && !defined(__SSE2__)
814#define lj_num2int(n) lj_num2bit((n))
815#else
816#define lj_num2int(n) ((int32_t)(n)) 814#define lj_num2int(n) ((int32_t)(n))
817#endif
818 815
819static LJ_AINLINE uint64_t lj_num2u64(lua_Number n) 816static LJ_AINLINE uint64_t lj_num2u64(lua_Number n)
820{ 817{
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index be50bf97..75db47df 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -22,8 +22,8 @@
22#include "lj_trace.h" 22#include "lj_trace.h"
23#if LJ_HASFFI 23#if LJ_HASFFI
24#include "lj_ctype.h" 24#include "lj_ctype.h"
25#endif
26#include "lj_carith.h" 25#include "lj_carith.h"
26#endif
27#include "lj_vm.h" 27#include "lj_vm.h"
28#include "lj_strscan.h" 28#include "lj_strscan.h"
29 29
@@ -336,11 +336,9 @@ LJFOLDF(kfold_intcomp0)
336static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) 336static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op)
337{ 337{
338 switch (op) { 338 switch (op) {
339#if LJ_64 || LJ_HASFFI 339#if LJ_HASFFI
340 case IR_ADD: k1 += k2; break; 340 case IR_ADD: k1 += k2; break;
341 case IR_SUB: k1 -= k2; break; 341 case IR_SUB: k1 -= k2; break;
342#endif
343#if LJ_HASFFI
344 case IR_MUL: k1 *= k2; break; 342 case IR_MUL: k1 *= k2; break;
345 case IR_BAND: k1 &= k2; break; 343 case IR_BAND: k1 &= k2; break;
346 case IR_BOR: k1 |= k2; break; 344 case IR_BOR: k1 |= k2; break;
@@ -392,20 +390,10 @@ LJFOLD(BROL KINT64 KINT)
392LJFOLD(BROR KINT64 KINT) 390LJFOLD(BROR KINT64 KINT)
393LJFOLDF(kfold_int64shift) 391LJFOLDF(kfold_int64shift)
394{ 392{
395#if LJ_HASFFI || LJ_64 393#if LJ_HASFFI
396 uint64_t k = ir_k64(fleft)->u64; 394 uint64_t k = ir_k64(fleft)->u64;
397 int32_t sh = (fright->i & 63); 395 int32_t sh = (fright->i & 63);
398 switch ((IROp)fins->o) { 396 return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL));
399 case IR_BSHL: k <<= sh; break;
400#if LJ_HASFFI
401 case IR_BSHR: k >>= sh; break;
402 case IR_BSAR: k = (uint64_t)((int64_t)k >> sh); break;
403 case IR_BROL: k = lj_rol(k, sh); break;
404 case IR_BROR: k = lj_ror(k, sh); break;
405#endif
406 default: lua_assert(0); break;
407 }
408 return INT64FOLD(k);
409#else 397#else
410 UNUSED(J); lua_assert(0); return FAILFOLD; 398 UNUSED(J); lua_assert(0); return FAILFOLD;
411#endif 399#endif
@@ -647,27 +635,22 @@ LJFOLD(CONV KNUM IRCONV_INT_NUM)
647LJFOLDF(kfold_conv_knum_int_num) 635LJFOLDF(kfold_conv_knum_int_num)
648{ 636{
649 lua_Number n = knumleft; 637 lua_Number n = knumleft;
650 if (!(fins->op2 & IRCONV_TRUNC)) { 638 int32_t k = lj_num2int(n);
651 int32_t k = lj_num2int(n); 639 if (irt_isguard(fins->t) && n != (lua_Number)k) {
652 if (irt_isguard(fins->t) && n != (lua_Number)k) { 640 /* We're about to create a guard which always fails, like CONV +1.5.
653 /* We're about to create a guard which always fails, like CONV +1.5. 641 ** Some pathological loops cause this during LICM, e.g.:
654 ** Some pathological loops cause this during LICM, e.g.: 642 ** local x,k,t = 0,1.5,{1,[1.5]=2}
655 ** local x,k,t = 0,1.5,{1,[1.5]=2} 643 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
656 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end 644 ** assert(x == 300)
657 ** assert(x == 300) 645 */
658 */ 646 return FAILFOLD;
659 return FAILFOLD;
660 }
661 return INTFOLD(k);
662 } else {
663 return INTFOLD((int32_t)n);
664 } 647 }
648 return INTFOLD(k);
665} 649}
666 650
667LJFOLD(CONV KNUM IRCONV_U32_NUM) 651LJFOLD(CONV KNUM IRCONV_U32_NUM)
668LJFOLDF(kfold_conv_knum_u32_num) 652LJFOLDF(kfold_conv_knum_u32_num)
669{ 653{
670 lua_assert((fins->op2 & IRCONV_TRUNC));
671#ifdef _MSC_VER 654#ifdef _MSC_VER
672 { /* Workaround for MSVC bug. */ 655 { /* Workaround for MSVC bug. */
673 volatile uint32_t u = (uint32_t)knumleft; 656 volatile uint32_t u = (uint32_t)knumleft;
@@ -681,14 +664,12 @@ LJFOLDF(kfold_conv_knum_u32_num)
681LJFOLD(CONV KNUM IRCONV_I64_NUM) 664LJFOLD(CONV KNUM IRCONV_I64_NUM)
682LJFOLDF(kfold_conv_knum_i64_num) 665LJFOLDF(kfold_conv_knum_i64_num)
683{ 666{
684 lua_assert((fins->op2 & IRCONV_TRUNC));
685 return INT64FOLD((uint64_t)(int64_t)knumleft); 667 return INT64FOLD((uint64_t)(int64_t)knumleft);
686} 668}
687 669
688LJFOLD(CONV KNUM IRCONV_U64_NUM) 670LJFOLD(CONV KNUM IRCONV_U64_NUM)
689LJFOLDF(kfold_conv_knum_u64_num) 671LJFOLDF(kfold_conv_knum_u64_num)
690{ 672{
691 lua_assert((fins->op2 & IRCONV_TRUNC));
692 return INT64FOLD(lj_num2u64(knumleft)); 673 return INT64FOLD(lj_num2u64(knumleft));
693} 674}
694 675
@@ -1199,7 +1180,9 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k)
1199 ** But this is mainly intended for simple address arithmetic. 1180 ** But this is mainly intended for simple address arithmetic.
1200 ** Also it's easier for the backend to optimize the original multiplies. 1181 ** Also it's easier for the backend to optimize the original multiplies.
1201 */ 1182 */
1202 if (k == 1) { /* i * 1 ==> i */ 1183 if (k == 0) { /* i * 0 ==> 0 */
1184 return RIGHTFOLD;
1185 } else if (k == 1) { /* i * 1 ==> i */
1203 return LEFTFOLD; 1186 return LEFTFOLD;
1204 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */ 1187 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */
1205 fins->o = IR_BSHL; 1188 fins->o = IR_BSHL;
@@ -1212,9 +1195,7 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k)
1212LJFOLD(MUL any KINT) 1195LJFOLD(MUL any KINT)
1213LJFOLDF(simplify_intmul_k32) 1196LJFOLDF(simplify_intmul_k32)
1214{ 1197{
1215 if (fright->i == 0) /* i * 0 ==> 0 */ 1198 if (fright->i >= 0)
1216 return INTFOLD(0);
1217 else if (fright->i > 0)
1218 return simplify_intmul_k(J, fright->i); 1199 return simplify_intmul_k(J, fright->i);
1219 return NEXTFOLD; 1200 return NEXTFOLD;
1220} 1201}
@@ -1222,14 +1203,13 @@ LJFOLDF(simplify_intmul_k32)
1222LJFOLD(MUL any KINT64) 1203LJFOLD(MUL any KINT64)
1223LJFOLDF(simplify_intmul_k64) 1204LJFOLDF(simplify_intmul_k64)
1224{ 1205{
1225 if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */ 1206#if LJ_HASFFI
1226 return INT64FOLD(0); 1207 if (ir_kint64(fright)->u64 < 0x80000000u)
1227#if LJ_64
1228 /* NYI: SPLIT for BSHL and 32 bit backend support. */
1229 else if (ir_kint64(fright)->u64 < 0x80000000u)
1230 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64); 1208 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
1231#endif
1232 return NEXTFOLD; 1209 return NEXTFOLD;
1210#else
1211 UNUSED(J); lua_assert(0); return FAILFOLD;
1212#endif
1233} 1213}
1234 1214
1235LJFOLD(MOD any KINT) 1215LJFOLD(MOD any KINT)
@@ -1529,7 +1509,7 @@ LJFOLD(BOR BOR KINT64)
1529LJFOLD(BXOR BXOR KINT64) 1509LJFOLD(BXOR BXOR KINT64)
1530LJFOLDF(reassoc_intarith_k64) 1510LJFOLDF(reassoc_intarith_k64)
1531{ 1511{
1532#if LJ_HASFFI || LJ_64 1512#if LJ_HASFFI
1533 IRIns *irk = IR(fleft->op2); 1513 IRIns *irk = IR(fleft->op2);
1534 if (irk->o == IR_KINT64) { 1514 if (irk->o == IR_KINT64) {
1535 uint64_t k = kfold_int64arith(ir_k64(irk)->u64, 1515 uint64_t k = kfold_int64arith(ir_k64(irk)->u64,
diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c
index 3a119f47..2d574089 100644
--- a/src/lj_opt_loop.c
+++ b/src/lj_opt_loop.c
@@ -11,6 +11,7 @@
11#if LJ_HASJIT 11#if LJ_HASJIT
12 12
13#include "lj_err.h" 13#include "lj_err.h"
14#include "lj_buf.h"
14#include "lj_str.h" 15#include "lj_str.h"
15#include "lj_ir.h" 16#include "lj_ir.h"
16#include "lj_jit.h" 17#include "lj_jit.h"
@@ -271,8 +272,7 @@ static void loop_unroll(jit_State *J)
271 ** Caveat: don't call into the VM or run the GC or the buffer may be gone. 272 ** Caveat: don't call into the VM or run the GC or the buffer may be gone.
272 */ 273 */
273 invar = J->cur.nins; 274 invar = J->cur.nins;
274 subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, 275 subst = (IRRef1 *)lj_buf_tmp(J->L, (invar-REF_BIAS)*sizeof(IRRef1))-REF_BIAS;
275 (invar-REF_BIAS)*sizeof(IRRef1)) - REF_BIAS;
276 subst[REF_BASE] = REF_BASE; 276 subst[REF_BASE] = REF_BASE;
277 277
278 /* LOOP separates the pre-roll from the loop body. */ 278 /* LOOP separates the pre-roll from the loop body. */
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c
index caf2a8df..5d0ea9cb 100644
--- a/src/lj_opt_narrow.c
+++ b/src/lj_opt_narrow.c
@@ -496,8 +496,7 @@ TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr)
496{ 496{
497 lua_assert(tref_isnumber(tr)); 497 lua_assert(tref_isnumber(tr));
498 if (tref_isnum(tr)) 498 if (tref_isnum(tr))
499 return emitir(IRT(IR_CONV, IRT_INTP), tr, 499 return emitir(IRT(IR_CONV, IRT_INTP), tr, (IRT_INTP<<5)|IRT_NUM|IRCONV_ANY);
500 (IRT_INTP<<5)|IRT_NUM|IRCONV_TRUNC|IRCONV_ANY);
501 /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */ 500 /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */
502 return narrow_stripov(J, tr, IR_MULOV, 501 return narrow_stripov(J, tr, IR_MULOV,
503 LJ_64 ? ((IRT_INTP<<5)|IRT_INT|IRCONV_SEXT) : 502 LJ_64 ? ((IRT_INTP<<5)|IRT_INT|IRCONV_SEXT) :
diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c
index 5a8c33b9..6ab509eb 100644
--- a/src/lj_opt_split.c
+++ b/src/lj_opt_split.c
@@ -11,6 +11,7 @@
11#if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) 11#if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI))
12 12
13#include "lj_err.h" 13#include "lj_err.h"
14#include "lj_buf.h"
14#include "lj_str.h" 15#include "lj_str.h"
15#include "lj_ir.h" 16#include "lj_ir.h"
16#include "lj_jit.h" 17#include "lj_jit.h"
@@ -139,6 +140,7 @@ static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir,
139 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); 140 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
140 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); 141 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
141} 142}
143#endif
142 144
143/* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */ 145/* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */
144static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir, 146static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
@@ -155,7 +157,6 @@ static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
155 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); 157 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
156 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); 158 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
157} 159}
158#endif
159 160
160/* Emit a CALLN with two split 64 bit arguments. */ 161/* Emit a CALLN with two split 64 bit arguments. */
161static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir, 162static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir,
@@ -195,6 +196,118 @@ static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref)
195 return split_emit(J, IRTI(IR_ADD), nref, lj_ir_kint(J, ofs)); 196 return split_emit(J, IRTI(IR_ADD), nref, lj_ir_kint(J, ofs));
196} 197}
197 198
199#if LJ_HASFFI
200static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst,
201 IRIns *oir, IRIns *nir, IRIns *ir)
202{
203 IROp op = ir->o;
204 IRRef kref = nir->op2;
205 if (irref_isk(kref)) { /* Optimize constant shifts. */
206 int32_t k = (IR(kref)->i & 63);
207 IRRef lo = nir->op1, hi = hisubst[ir->op1];
208 if (op == IR_BROL || op == IR_BROR) {
209 if (op == IR_BROR) k = (-k & 63);
210 if (k >= 32) { IRRef t = lo; lo = hi; hi = t; k -= 32; }
211 if (k == 0) {
212 passthrough:
213 J->cur.nins--;
214 ir->prev = lo;
215 return hi;
216 } else {
217 TRef k1, k2;
218 IRRef t1, t2, t3, t4;
219 J->cur.nins--;
220 k1 = lj_ir_kint(J, k);
221 k2 = lj_ir_kint(J, (-k & 31));
222 t1 = split_emit(J, IRTI(IR_BSHL), lo, k1);
223 t2 = split_emit(J, IRTI(IR_BSHL), hi, k1);
224 t3 = split_emit(J, IRTI(IR_BSHR), lo, k2);
225 t4 = split_emit(J, IRTI(IR_BSHR), hi, k2);
226 ir->prev = split_emit(J, IRTI(IR_BOR), t1, t4);
227 return split_emit(J, IRTI(IR_BOR), t2, t3);
228 }
229 } else if (k == 0) {
230 goto passthrough;
231 } else if (k < 32) {
232 if (op == IR_BSHL) {
233 IRRef t1 = split_emit(J, IRTI(IR_BSHL), hi, kref);
234 IRRef t2 = split_emit(J, IRTI(IR_BSHR), lo, lj_ir_kint(J, (-k&31)));
235 return split_emit(J, IRTI(IR_BOR), t1, t2);
236 } else {
237 IRRef t1 = ir->prev, t2;
238 lua_assert(op == IR_BSHR || op == IR_BSAR);
239 nir->o = IR_BSHR;
240 t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31)));
241 ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2);
242 return split_emit(J, IRTI(op), hi, kref);
243 }
244 } else {
245 if (op == IR_BSHL) {
246 if (k == 32)
247 J->cur.nins--;
248 else
249 lo = ir->prev;
250 ir->prev = lj_ir_kint(J, 0);
251 return lo;
252 } else {
253 lua_assert(op == IR_BSHR || op == IR_BSAR);
254 if (k == 32) {
255 J->cur.nins--;
256 ir->prev = hi;
257 } else {
258 nir->op1 = hi;
259 }
260 if (op == IR_BSHR)
261 return lj_ir_kint(J, 0);
262 else
263 return split_emit(J, IRTI(IR_BSAR), hi, lj_ir_kint(J, 31));
264 }
265 }
266 }
267 return split_call_li(J, hisubst, oir, ir,
268 op - IR_BSHL + IRCALL_lj_carith_shl64);
269}
270
271static IRRef split_bitop(jit_State *J, IRRef1 *hisubst,
272 IRIns *nir, IRIns *ir)
273{
274 IROp op = ir->o;
275 IRRef hi, kref = nir->op2;
276 if (irref_isk(kref)) { /* Optimize bit operations with lo constant. */
277 int32_t k = IR(kref)->i;
278 if (k == 0 || k == -1) {
279 if (op == IR_BAND) k = ~k;
280 if (k == 0) {
281 J->cur.nins--;
282 ir->prev = nir->op1;
283 } else if (op == IR_BXOR) {
284 nir->o = IR_BNOT;
285 nir->op2 = 0;
286 } else {
287 J->cur.nins--;
288 ir->prev = kref;
289 }
290 }
291 }
292 hi = hisubst[ir->op1];
293 kref = hisubst[ir->op2];
294 if (irref_isk(kref)) { /* Optimize bit operations with hi constant. */
295 int32_t k = IR(kref)->i;
296 if (k == 0 || k == -1) {
297 if (op == IR_BAND) k = ~k;
298 if (k == 0) {
299 return hi;
300 } else if (op == IR_BXOR) {
301 return split_emit(J, IRTI(IR_BNOT), hi, 0);
302 } else {
303 return kref;
304 }
305 }
306 }
307 return split_emit(J, IRTI(op), hi, kref);
308}
309#endif
310
198/* Substitute references of a snapshot. */ 311/* Substitute references of a snapshot. */
199static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir) 312static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir)
200{ 313{
@@ -214,7 +327,7 @@ static void split_ir(jit_State *J)
214 IRRef nins = J->cur.nins, nk = J->cur.nk; 327 IRRef nins = J->cur.nins, nk = J->cur.nk;
215 MSize irlen = nins - nk; 328 MSize irlen = nins - nk;
216 MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1)); 329 MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1));
217 IRIns *oir = (IRIns *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, need); 330 IRIns *oir = (IRIns *)lj_buf_tmp(J->L, need);
218 IRRef1 *hisubst; 331 IRRef1 *hisubst;
219 IRRef ref, snref; 332 IRRef ref, snref;
220 SnapShot *snap; 333 SnapShot *snap;
@@ -438,6 +551,19 @@ static void split_ir(jit_State *J)
438 irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : 551 irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
439 IRCALL_lj_carith_powu64); 552 IRCALL_lj_carith_powu64);
440 break; 553 break;
554 case IR_BNOT:
555 hi = split_emit(J, IRTI(IR_BNOT), hiref, 0);
556 break;
557 case IR_BSWAP:
558 ir->prev = split_emit(J, IRTI(IR_BSWAP), hiref, 0);
559 hi = nref;
560 break;
561 case IR_BAND: case IR_BOR: case IR_BXOR:
562 hi = split_bitop(J, hisubst, nir, ir);
563 break;
564 case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
565 hi = split_bitshift(J, hisubst, oir, nir, ir);
566 break;
441 case IR_FLOAD: 567 case IR_FLOAD:
442 lua_assert(ir->op2 == IRFL_CDATA_INT64); 568 lua_assert(ir->op2 == IRFL_CDATA_INT64);
443 hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4); 569 hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4);
diff --git a/src/lj_parse.c b/src/lj_parse.c
index 7ff7d728..c1ef2593 100644
--- a/src/lj_parse.c
+++ b/src/lj_parse.c
@@ -13,6 +13,7 @@
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_debug.h" 15#include "lj_debug.h"
16#include "lj_buf.h"
16#include "lj_str.h" 17#include "lj_str.h"
17#include "lj_tab.h" 18#include "lj_tab.h"
18#include "lj_func.h" 19#include "lj_func.h"
@@ -165,12 +166,12 @@ LJ_STATIC_ASSERT((int)BC_MODVV-(int)BC_ADDVV == (int)OPR_MOD-(int)OPR_ADD);
165 166
166LJ_NORET LJ_NOINLINE static void err_syntax(LexState *ls, ErrMsg em) 167LJ_NORET LJ_NOINLINE static void err_syntax(LexState *ls, ErrMsg em)
167{ 168{
168 lj_lex_error(ls, ls->token, em); 169 lj_lex_error(ls, ls->tok, em);
169} 170}
170 171
171LJ_NORET LJ_NOINLINE static void err_token(LexState *ls, LexToken token) 172LJ_NORET LJ_NOINLINE static void err_token(LexState *ls, LexToken tok)
172{ 173{
173 lj_lex_error(ls, ls->token, LJ_ERR_XTOKEN, lj_lex_token2str(ls, token)); 174 lj_lex_error(ls, ls->tok, LJ_ERR_XTOKEN, lj_lex_token2str(ls, tok));
174} 175}
175 176
176LJ_NORET static void err_limit(FuncState *fs, uint32_t limit, const char *what) 177LJ_NORET static void err_limit(FuncState *fs, uint32_t limit, const char *what)
@@ -981,7 +982,7 @@ static void bcemit_unop(FuncState *fs, BCOp op, ExpDesc *e)
981/* Check and consume optional token. */ 982/* Check and consume optional token. */
982static int lex_opt(LexState *ls, LexToken tok) 983static int lex_opt(LexState *ls, LexToken tok)
983{ 984{
984 if (ls->token == tok) { 985 if (ls->tok == tok) {
985 lj_lex_next(ls); 986 lj_lex_next(ls);
986 return 1; 987 return 1;
987 } 988 }
@@ -991,7 +992,7 @@ static int lex_opt(LexState *ls, LexToken tok)
991/* Check and consume token. */ 992/* Check and consume token. */
992static void lex_check(LexState *ls, LexToken tok) 993static void lex_check(LexState *ls, LexToken tok)
993{ 994{
994 if (ls->token != tok) 995 if (ls->tok != tok)
995 err_token(ls, tok); 996 err_token(ls, tok);
996 lj_lex_next(ls); 997 lj_lex_next(ls);
997} 998}
@@ -1005,7 +1006,7 @@ static void lex_match(LexState *ls, LexToken what, LexToken who, BCLine line)
1005 } else { 1006 } else {
1006 const char *swhat = lj_lex_token2str(ls, what); 1007 const char *swhat = lj_lex_token2str(ls, what);
1007 const char *swho = lj_lex_token2str(ls, who); 1008 const char *swho = lj_lex_token2str(ls, who);
1008 lj_lex_error(ls, ls->token, LJ_ERR_XMATCH, swhat, swho, line); 1009 lj_lex_error(ls, ls->tok, LJ_ERR_XMATCH, swhat, swho, line);
1009 } 1010 }
1010 } 1011 }
1011} 1012}
@@ -1014,9 +1015,9 @@ static void lex_match(LexState *ls, LexToken what, LexToken who, BCLine line)
1014static GCstr *lex_str(LexState *ls) 1015static GCstr *lex_str(LexState *ls)
1015{ 1016{
1016 GCstr *s; 1017 GCstr *s;
1017 if (ls->token != TK_name && (LJ_52 || ls->token != TK_goto)) 1018 if (ls->tok != TK_name && (LJ_52 || ls->tok != TK_goto))
1018 err_token(ls, TK_name); 1019 err_token(ls, TK_name);
1019 s = strV(&ls->tokenval); 1020 s = strV(&ls->tokval);
1020 lj_lex_next(ls); 1021 lj_lex_next(ls);
1021 return s; 1022 return s;
1022} 1023}
@@ -1429,78 +1430,46 @@ static void fs_fixup_line(FuncState *fs, GCproto *pt,
1429 } 1430 }
1430} 1431}
1431 1432
1432/* Resize buffer if needed. */
1433static LJ_NOINLINE void fs_buf_resize(LexState *ls, MSize len)
1434{
1435 MSize sz = ls->sb.sz * 2;
1436 while (ls->sb.n + len > sz) sz = sz * 2;
1437 lj_str_resizebuf(ls->L, &ls->sb, sz);
1438}
1439
1440static LJ_AINLINE void fs_buf_need(LexState *ls, MSize len)
1441{
1442 if (LJ_UNLIKELY(ls->sb.n + len > ls->sb.sz))
1443 fs_buf_resize(ls, len);
1444}
1445
1446/* Add string to buffer. */
1447static void fs_buf_str(LexState *ls, const char *str, MSize len)
1448{
1449 char *p = ls->sb.buf + ls->sb.n;
1450 MSize i;
1451 ls->sb.n += len;
1452 for (i = 0; i < len; i++) p[i] = str[i];
1453}
1454
1455/* Add ULEB128 value to buffer. */
1456static void fs_buf_uleb128(LexState *ls, uint32_t v)
1457{
1458 MSize n = ls->sb.n;
1459 uint8_t *p = (uint8_t *)ls->sb.buf;
1460 for (; v >= 0x80; v >>= 7)
1461 p[n++] = (uint8_t)((v & 0x7f) | 0x80);
1462 p[n++] = (uint8_t)v;
1463 ls->sb.n = n;
1464}
1465
1466/* Prepare variable info for prototype. */ 1433/* Prepare variable info for prototype. */
1467static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar) 1434static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar)
1468{ 1435{
1469 VarInfo *vs =ls->vstack, *ve; 1436 VarInfo *vs =ls->vstack, *ve;
1470 MSize i, n; 1437 MSize i, n;
1471 BCPos lastpc; 1438 BCPos lastpc;
1472 lj_str_resetbuf(&ls->sb); /* Copy to temp. string buffer. */ 1439 lj_buf_reset(&ls->sb); /* Copy to temp. string buffer. */
1473 /* Store upvalue names. */ 1440 /* Store upvalue names. */
1474 for (i = 0, n = fs->nuv; i < n; i++) { 1441 for (i = 0, n = fs->nuv; i < n; i++) {
1475 GCstr *s = strref(vs[fs->uvmap[i]].name); 1442 GCstr *s = strref(vs[fs->uvmap[i]].name);
1476 MSize len = s->len+1; 1443 MSize len = s->len+1;
1477 fs_buf_need(ls, len); 1444 char *p = lj_buf_more(&ls->sb, len);
1478 fs_buf_str(ls, strdata(s), len); 1445 p = lj_buf_wmem(p, strdata(s), len);
1446 setsbufP(&ls->sb, p);
1479 } 1447 }
1480 *ofsvar = ls->sb.n; 1448 *ofsvar = sbuflen(&ls->sb);
1481 lastpc = 0; 1449 lastpc = 0;
1482 /* Store local variable names and compressed ranges. */ 1450 /* Store local variable names and compressed ranges. */
1483 for (ve = vs + ls->vtop, vs += fs->vbase; vs < ve; vs++) { 1451 for (ve = vs + ls->vtop, vs += fs->vbase; vs < ve; vs++) {
1484 if (!gola_isgotolabel(vs)) { 1452 if (!gola_isgotolabel(vs)) {
1485 GCstr *s = strref(vs->name); 1453 GCstr *s = strref(vs->name);
1486 BCPos startpc; 1454 BCPos startpc;
1455 char *p;
1487 if ((uintptr_t)s < VARNAME__MAX) { 1456 if ((uintptr_t)s < VARNAME__MAX) {
1488 fs_buf_need(ls, 1 + 2*5); 1457 p = lj_buf_more(&ls->sb, 1 + 2*5);
1489 ls->sb.buf[ls->sb.n++] = (uint8_t)(uintptr_t)s; 1458 *p++ = (char)(uintptr_t)s;
1490 } else { 1459 } else {
1491 MSize len = s->len+1; 1460 MSize len = s->len+1;
1492 fs_buf_need(ls, len + 2*5); 1461 p = lj_buf_more(&ls->sb, len + 2*5);
1493 fs_buf_str(ls, strdata(s), len); 1462 p = lj_buf_wmem(p, strdata(s), len);
1494 } 1463 }
1495 startpc = vs->startpc; 1464 startpc = vs->startpc;
1496 fs_buf_uleb128(ls, startpc-lastpc); 1465 p = lj_buf_wuleb128(p, startpc-lastpc);
1497 fs_buf_uleb128(ls, vs->endpc-startpc); 1466 p = lj_buf_wuleb128(p, vs->endpc-startpc);
1467 setsbufP(&ls->sb, p);
1498 lastpc = startpc; 1468 lastpc = startpc;
1499 } 1469 }
1500 } 1470 }
1501 fs_buf_need(ls, 1); 1471 lj_buf_putb(&ls->sb, '\0'); /* Terminator for varinfo. */
1502 ls->sb.buf[ls->sb.n++] = '\0'; /* Terminator for varinfo. */ 1472 return sbuflen(&ls->sb);
1503 return ls->sb.n;
1504} 1473}
1505 1474
1506/* Fixup variable info for prototype. */ 1475/* Fixup variable info for prototype. */
@@ -1508,7 +1477,7 @@ static void fs_fixup_var(LexState *ls, GCproto *pt, uint8_t *p, size_t ofsvar)
1508{ 1477{
1509 setmref(pt->uvinfo, p); 1478 setmref(pt->uvinfo, p);
1510 setmref(pt->varinfo, (char *)p + ofsvar); 1479 setmref(pt->varinfo, (char *)p + ofsvar);
1511 memcpy(p, ls->sb.buf, ls->sb.n); /* Copy from temp. string buffer. */ 1480 memcpy(p, sbufB(&ls->sb), sbuflen(&ls->sb)); /* Copy from temp. buffer. */
1512} 1481}
1513#else 1482#else
1514 1483
@@ -1615,7 +1584,7 @@ static GCproto *fs_finish(LexState *ls, BCLine line)
1615 L->top--; /* Pop table of constants. */ 1584 L->top--; /* Pop table of constants. */
1616 ls->vtop = fs->vbase; /* Reset variable stack. */ 1585 ls->vtop = fs->vbase; /* Reset variable stack. */
1617 ls->fs = fs->prev; 1586 ls->fs = fs->prev;
1618 lua_assert(ls->fs != NULL || ls->token == TK_eof); 1587 lua_assert(ls->fs != NULL || ls->tok == TK_eof);
1619 return pt; 1588 return pt;
1620} 1589}
1621 1590
@@ -1737,15 +1706,15 @@ static void expr_table(LexState *ls, ExpDesc *e)
1737 bcreg_reserve(fs, 1); 1706 bcreg_reserve(fs, 1);
1738 freg++; 1707 freg++;
1739 lex_check(ls, '{'); 1708 lex_check(ls, '{');
1740 while (ls->token != '}') { 1709 while (ls->tok != '}') {
1741 ExpDesc key, val; 1710 ExpDesc key, val;
1742 vcall = 0; 1711 vcall = 0;
1743 if (ls->token == '[') { 1712 if (ls->tok == '[') {
1744 expr_bracket(ls, &key); /* Already calls expr_toval. */ 1713 expr_bracket(ls, &key); /* Already calls expr_toval. */
1745 if (!expr_isk(&key)) expr_index(fs, e, &key); 1714 if (!expr_isk(&key)) expr_index(fs, e, &key);
1746 if (expr_isnumk(&key) && expr_numiszero(&key)) needarr = 1; else nhash++; 1715 if (expr_isnumk(&key) && expr_numiszero(&key)) needarr = 1; else nhash++;
1747 lex_check(ls, '='); 1716 lex_check(ls, '=');
1748 } else if ((ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) && 1717 } else if ((ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) &&
1749 lj_lex_lookahead(ls) == '=') { 1718 lj_lex_lookahead(ls) == '=') {
1750 expr_str(ls, &key); 1719 expr_str(ls, &key);
1751 lex_check(ls, '='); 1720 lex_check(ls, '=');
@@ -1838,11 +1807,11 @@ static BCReg parse_params(LexState *ls, int needself)
1838 lex_check(ls, '('); 1807 lex_check(ls, '(');
1839 if (needself) 1808 if (needself)
1840 var_new_lit(ls, nparams++, "self"); 1809 var_new_lit(ls, nparams++, "self");
1841 if (ls->token != ')') { 1810 if (ls->tok != ')') {
1842 do { 1811 do {
1843 if (ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) { 1812 if (ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) {
1844 var_new(ls, nparams++, lex_str(ls)); 1813 var_new(ls, nparams++, lex_str(ls));
1845 } else if (ls->token == TK_dots) { 1814 } else if (ls->tok == TK_dots) {
1846 lj_lex_next(ls); 1815 lj_lex_next(ls);
1847 fs->flags |= PROTO_VARARG; 1816 fs->flags |= PROTO_VARARG;
1848 break; 1817 break;
@@ -1876,7 +1845,7 @@ static void parse_body(LexState *ls, ExpDesc *e, int needself, BCLine line)
1876 fs.bclim = pfs->bclim - pfs->pc; 1845 fs.bclim = pfs->bclim - pfs->pc;
1877 bcemit_AD(&fs, BC_FUNCF, 0, 0); /* Placeholder. */ 1846 bcemit_AD(&fs, BC_FUNCF, 0, 0); /* Placeholder. */
1878 parse_chunk(ls); 1847 parse_chunk(ls);
1879 if (ls->token != TK_end) lex_match(ls, TK_end, TK_function, line); 1848 if (ls->tok != TK_end) lex_match(ls, TK_end, TK_function, line);
1880 pt = fs_finish(ls, (ls->lastline = ls->linenumber)); 1849 pt = fs_finish(ls, (ls->lastline = ls->linenumber));
1881 pfs->bcbase = ls->bcstack + oldbase; /* May have been reallocated. */ 1850 pfs->bcbase = ls->bcstack + oldbase; /* May have been reallocated. */
1882 pfs->bclim = (BCPos)(ls->sizebcstack - oldbase); 1851 pfs->bclim = (BCPos)(ls->sizebcstack - oldbase);
@@ -1915,13 +1884,13 @@ static void parse_args(LexState *ls, ExpDesc *e)
1915 BCIns ins; 1884 BCIns ins;
1916 BCReg base; 1885 BCReg base;
1917 BCLine line = ls->linenumber; 1886 BCLine line = ls->linenumber;
1918 if (ls->token == '(') { 1887 if (ls->tok == '(') {
1919#if !LJ_52 1888#if !LJ_52
1920 if (line != ls->lastline) 1889 if (line != ls->lastline)
1921 err_syntax(ls, LJ_ERR_XAMBIG); 1890 err_syntax(ls, LJ_ERR_XAMBIG);
1922#endif 1891#endif
1923 lj_lex_next(ls); 1892 lj_lex_next(ls);
1924 if (ls->token == ')') { /* f(). */ 1893 if (ls->tok == ')') { /* f(). */
1925 args.k = VVOID; 1894 args.k = VVOID;
1926 } else { 1895 } else {
1927 expr_list(ls, &args); 1896 expr_list(ls, &args);
@@ -1929,11 +1898,11 @@ static void parse_args(LexState *ls, ExpDesc *e)
1929 setbc_b(bcptr(fs, &args), 0); /* Pass on multiple results. */ 1898 setbc_b(bcptr(fs, &args), 0); /* Pass on multiple results. */
1930 } 1899 }
1931 lex_match(ls, ')', '(', line); 1900 lex_match(ls, ')', '(', line);
1932 } else if (ls->token == '{') { 1901 } else if (ls->tok == '{') {
1933 expr_table(ls, &args); 1902 expr_table(ls, &args);
1934 } else if (ls->token == TK_string) { 1903 } else if (ls->tok == TK_string) {
1935 expr_init(&args, VKSTR, 0); 1904 expr_init(&args, VKSTR, 0);
1936 args.u.sval = strV(&ls->tokenval); 1905 args.u.sval = strV(&ls->tokval);
1937 lj_lex_next(ls); 1906 lj_lex_next(ls);
1938 } else { 1907 } else {
1939 err_syntax(ls, LJ_ERR_XFUNARG); 1908 err_syntax(ls, LJ_ERR_XFUNARG);
@@ -1959,32 +1928,32 @@ static void expr_primary(LexState *ls, ExpDesc *v)
1959{ 1928{
1960 FuncState *fs = ls->fs; 1929 FuncState *fs = ls->fs;
1961 /* Parse prefix expression. */ 1930 /* Parse prefix expression. */
1962 if (ls->token == '(') { 1931 if (ls->tok == '(') {
1963 BCLine line = ls->linenumber; 1932 BCLine line = ls->linenumber;
1964 lj_lex_next(ls); 1933 lj_lex_next(ls);
1965 expr(ls, v); 1934 expr(ls, v);
1966 lex_match(ls, ')', '(', line); 1935 lex_match(ls, ')', '(', line);
1967 expr_discharge(ls->fs, v); 1936 expr_discharge(ls->fs, v);
1968 } else if (ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) { 1937 } else if (ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) {
1969 var_lookup(ls, v); 1938 var_lookup(ls, v);
1970 } else { 1939 } else {
1971 err_syntax(ls, LJ_ERR_XSYMBOL); 1940 err_syntax(ls, LJ_ERR_XSYMBOL);
1972 } 1941 }
1973 for (;;) { /* Parse multiple expression suffixes. */ 1942 for (;;) { /* Parse multiple expression suffixes. */
1974 if (ls->token == '.') { 1943 if (ls->tok == '.') {
1975 expr_field(ls, v); 1944 expr_field(ls, v);
1976 } else if (ls->token == '[') { 1945 } else if (ls->tok == '[') {
1977 ExpDesc key; 1946 ExpDesc key;
1978 expr_toanyreg(fs, v); 1947 expr_toanyreg(fs, v);
1979 expr_bracket(ls, &key); 1948 expr_bracket(ls, &key);
1980 expr_index(fs, v, &key); 1949 expr_index(fs, v, &key);
1981 } else if (ls->token == ':') { 1950 } else if (ls->tok == ':') {
1982 ExpDesc key; 1951 ExpDesc key;
1983 lj_lex_next(ls); 1952 lj_lex_next(ls);
1984 expr_str(ls, &key); 1953 expr_str(ls, &key);
1985 bcemit_method(fs, v, &key); 1954 bcemit_method(fs, v, &key);
1986 parse_args(ls, v); 1955 parse_args(ls, v);
1987 } else if (ls->token == '(' || ls->token == TK_string || ls->token == '{') { 1956 } else if (ls->tok == '(' || ls->tok == TK_string || ls->tok == '{') {
1988 expr_tonextreg(fs, v); 1957 expr_tonextreg(fs, v);
1989 parse_args(ls, v); 1958 parse_args(ls, v);
1990 } else { 1959 } else {
@@ -1996,14 +1965,14 @@ static void expr_primary(LexState *ls, ExpDesc *v)
1996/* Parse simple expression. */ 1965/* Parse simple expression. */
1997static void expr_simple(LexState *ls, ExpDesc *v) 1966static void expr_simple(LexState *ls, ExpDesc *v)
1998{ 1967{
1999 switch (ls->token) { 1968 switch (ls->tok) {
2000 case TK_number: 1969 case TK_number:
2001 expr_init(v, (LJ_HASFFI && tviscdata(&ls->tokenval)) ? VKCDATA : VKNUM, 0); 1970 expr_init(v, (LJ_HASFFI && tviscdata(&ls->tokval)) ? VKCDATA : VKNUM, 0);
2002 copyTV(ls->L, &v->u.nval, &ls->tokenval); 1971 copyTV(ls->L, &v->u.nval, &ls->tokval);
2003 break; 1972 break;
2004 case TK_string: 1973 case TK_string:
2005 expr_init(v, VKSTR, 0); 1974 expr_init(v, VKSTR, 0);
2006 v->u.sval = strV(&ls->tokenval); 1975 v->u.sval = strV(&ls->tokval);
2007 break; 1976 break;
2008 case TK_nil: 1977 case TK_nil:
2009 expr_init(v, VKNIL, 0); 1978 expr_init(v, VKNIL, 0);
@@ -2091,11 +2060,11 @@ static BinOpr expr_binop(LexState *ls, ExpDesc *v, uint32_t limit);
2091static void expr_unop(LexState *ls, ExpDesc *v) 2060static void expr_unop(LexState *ls, ExpDesc *v)
2092{ 2061{
2093 BCOp op; 2062 BCOp op;
2094 if (ls->token == TK_not) { 2063 if (ls->tok == TK_not) {
2095 op = BC_NOT; 2064 op = BC_NOT;
2096 } else if (ls->token == '-') { 2065 } else if (ls->tok == '-') {
2097 op = BC_UNM; 2066 op = BC_UNM;
2098 } else if (ls->token == '#') { 2067 } else if (ls->tok == '#') {
2099 op = BC_LEN; 2068 op = BC_LEN;
2100 } else { 2069 } else {
2101 expr_simple(ls, v); 2070 expr_simple(ls, v);
@@ -2112,7 +2081,7 @@ static BinOpr expr_binop(LexState *ls, ExpDesc *v, uint32_t limit)
2112 BinOpr op; 2081 BinOpr op;
2113 synlevel_begin(ls); 2082 synlevel_begin(ls);
2114 expr_unop(ls, v); 2083 expr_unop(ls, v);
2115 op = token2binop(ls->token); 2084 op = token2binop(ls->tok);
2116 while (op != OPR_NOBINOPR && priority[op].left > limit) { 2085 while (op != OPR_NOBINOPR && priority[op].left > limit) {
2117 ExpDesc v2; 2086 ExpDesc v2;
2118 BinOpr nextop; 2087 BinOpr nextop;
@@ -2301,9 +2270,9 @@ static void parse_func(LexState *ls, BCLine line)
2301 lj_lex_next(ls); /* Skip 'function'. */ 2270 lj_lex_next(ls); /* Skip 'function'. */
2302 /* Parse function name. */ 2271 /* Parse function name. */
2303 var_lookup(ls, &v); 2272 var_lookup(ls, &v);
2304 while (ls->token == '.') /* Multiple dot-separated fields. */ 2273 while (ls->tok == '.') /* Multiple dot-separated fields. */
2305 expr_field(ls, &v); 2274 expr_field(ls, &v);
2306 if (ls->token == ':') { /* Optional colon to signify method call. */ 2275 if (ls->tok == ':') { /* Optional colon to signify method call. */
2307 needself = 1; 2276 needself = 1;
2308 expr_field(ls, &v); 2277 expr_field(ls, &v);
2309 } 2278 }
@@ -2316,9 +2285,9 @@ static void parse_func(LexState *ls, BCLine line)
2316/* -- Control transfer statements ----------------------------------------- */ 2285/* -- Control transfer statements ----------------------------------------- */
2317 2286
2318/* Check for end of block. */ 2287/* Check for end of block. */
2319static int endofblock(LexToken token) 2288static int parse_isend(LexToken tok)
2320{ 2289{
2321 switch (token) { 2290 switch (tok) {
2322 case TK_else: case TK_elseif: case TK_end: case TK_until: case TK_eof: 2291 case TK_else: case TK_elseif: case TK_end: case TK_until: case TK_eof:
2323 return 1; 2292 return 1;
2324 default: 2293 default:
@@ -2333,7 +2302,7 @@ static void parse_return(LexState *ls)
2333 FuncState *fs = ls->fs; 2302 FuncState *fs = ls->fs;
2334 lj_lex_next(ls); /* Skip 'return'. */ 2303 lj_lex_next(ls); /* Skip 'return'. */
2335 fs->flags |= PROTO_HAS_RETURN; 2304 fs->flags |= PROTO_HAS_RETURN;
2336 if (endofblock(ls->token) || ls->token == ';') { /* Bare return. */ 2305 if (parse_isend(ls->tok) || ls->tok == ';') { /* Bare return. */
2337 ins = BCINS_AD(BC_RET0, 0, 1); 2306 ins = BCINS_AD(BC_RET0, 0, 1);
2338 } else { /* Return with one or more values. */ 2307 } else { /* Return with one or more values. */
2339 ExpDesc e; /* Receives the _last_ expression in the list. */ 2308 ExpDesc e; /* Receives the _last_ expression in the list. */
@@ -2399,18 +2368,18 @@ static void parse_label(LexState *ls)
2399 lex_check(ls, TK_label); 2368 lex_check(ls, TK_label);
2400 /* Recursively parse trailing statements: labels and ';' (Lua 5.2 only). */ 2369 /* Recursively parse trailing statements: labels and ';' (Lua 5.2 only). */
2401 for (;;) { 2370 for (;;) {
2402 if (ls->token == TK_label) { 2371 if (ls->tok == TK_label) {
2403 synlevel_begin(ls); 2372 synlevel_begin(ls);
2404 parse_label(ls); 2373 parse_label(ls);
2405 synlevel_end(ls); 2374 synlevel_end(ls);
2406 } else if (LJ_52 && ls->token == ';') { 2375 } else if (LJ_52 && ls->tok == ';') {
2407 lj_lex_next(ls); 2376 lj_lex_next(ls);
2408 } else { 2377 } else {
2409 break; 2378 break;
2410 } 2379 }
2411 } 2380 }
2412 /* Trailing label is considered to be outside of scope. */ 2381 /* Trailing label is considered to be outside of scope. */
2413 if (endofblock(ls->token) && ls->token != TK_until) 2382 if (parse_isend(ls->tok) && ls->tok != TK_until)
2414 ls->vstack[idx].slot = fs->bl->nactvar; 2383 ls->vstack[idx].slot = fs->bl->nactvar;
2415 gola_resolve(ls, fs->bl, idx); 2384 gola_resolve(ls, fs->bl, idx);
2416} 2385}
@@ -2594,9 +2563,9 @@ static void parse_for(LexState *ls, BCLine line)
2594 fscope_begin(fs, &bl, FSCOPE_LOOP); 2563 fscope_begin(fs, &bl, FSCOPE_LOOP);
2595 lj_lex_next(ls); /* Skip 'for'. */ 2564 lj_lex_next(ls); /* Skip 'for'. */
2596 varname = lex_str(ls); /* Get first variable name. */ 2565 varname = lex_str(ls); /* Get first variable name. */
2597 if (ls->token == '=') 2566 if (ls->tok == '=')
2598 parse_for_num(ls, varname, line); 2567 parse_for_num(ls, varname, line);
2599 else if (ls->token == ',' || ls->token == TK_in) 2568 else if (ls->tok == ',' || ls->tok == TK_in)
2600 parse_for_iter(ls, varname); 2569 parse_for_iter(ls, varname);
2601 else 2570 else
2602 err_syntax(ls, LJ_ERR_XFOR); 2571 err_syntax(ls, LJ_ERR_XFOR);
@@ -2622,12 +2591,12 @@ static void parse_if(LexState *ls, BCLine line)
2622 BCPos flist; 2591 BCPos flist;
2623 BCPos escapelist = NO_JMP; 2592 BCPos escapelist = NO_JMP;
2624 flist = parse_then(ls); 2593 flist = parse_then(ls);
2625 while (ls->token == TK_elseif) { /* Parse multiple 'elseif' blocks. */ 2594 while (ls->tok == TK_elseif) { /* Parse multiple 'elseif' blocks. */
2626 jmp_append(fs, &escapelist, bcemit_jmp(fs)); 2595 jmp_append(fs, &escapelist, bcemit_jmp(fs));
2627 jmp_tohere(fs, flist); 2596 jmp_tohere(fs, flist);
2628 flist = parse_then(ls); 2597 flist = parse_then(ls);
2629 } 2598 }
2630 if (ls->token == TK_else) { /* Parse optional 'else' block. */ 2599 if (ls->tok == TK_else) { /* Parse optional 'else' block. */
2631 jmp_append(fs, &escapelist, bcemit_jmp(fs)); 2600 jmp_append(fs, &escapelist, bcemit_jmp(fs));
2632 jmp_tohere(fs, flist); 2601 jmp_tohere(fs, flist);
2633 lj_lex_next(ls); /* Skip 'else'. */ 2602 lj_lex_next(ls); /* Skip 'else'. */
@@ -2645,7 +2614,7 @@ static void parse_if(LexState *ls, BCLine line)
2645static int parse_stmt(LexState *ls) 2614static int parse_stmt(LexState *ls)
2646{ 2615{
2647 BCLine line = ls->linenumber; 2616 BCLine line = ls->linenumber;
2648 switch (ls->token) { 2617 switch (ls->tok) {
2649 case TK_if: 2618 case TK_if:
2650 parse_if(ls, line); 2619 parse_if(ls, line);
2651 break; 2620 break;
@@ -2703,7 +2672,7 @@ static void parse_chunk(LexState *ls)
2703{ 2672{
2704 int islast = 0; 2673 int islast = 0;
2705 synlevel_begin(ls); 2674 synlevel_begin(ls);
2706 while (!islast && !endofblock(ls->token)) { 2675 while (!islast && !parse_isend(ls->tok)) {
2707 islast = parse_stmt(ls); 2676 islast = parse_stmt(ls);
2708 lex_opt(ls, ';'); 2677 lex_opt(ls, ';');
2709 lua_assert(ls->fs->framesize >= ls->fs->freereg && 2678 lua_assert(ls->fs->framesize >= ls->fs->freereg &&
@@ -2738,7 +2707,7 @@ GCproto *lj_parse(LexState *ls)
2738 bcemit_AD(&fs, BC_FUNCV, 0, 0); /* Placeholder. */ 2707 bcemit_AD(&fs, BC_FUNCV, 0, 0); /* Placeholder. */
2739 lj_lex_next(ls); /* Read-ahead first token. */ 2708 lj_lex_next(ls); /* Read-ahead first token. */
2740 parse_chunk(ls); 2709 parse_chunk(ls);
2741 if (ls->token != TK_eof) 2710 if (ls->tok != TK_eof)
2742 err_token(ls, TK_eof); 2711 err_token(ls, TK_eof);
2743 pt = fs_finish(ls, ls->linenumber); 2712 pt = fs_finish(ls, ls->linenumber);
2744 L->top--; /* Drop chunkname. */ 2713 L->top--; /* Drop chunkname. */
diff --git a/src/lj_record.c b/src/lj_record.c
index 7336e0ac..003910a9 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -1826,6 +1826,18 @@ void lj_record_ins(jit_State *J)
1826 J->maxslot = bc_a(pc[1]); /* Shrink used slots. */ 1826 J->maxslot = bc_a(pc[1]); /* Shrink used slots. */
1827 break; 1827 break;
1828 1828
1829 case BC_ISTYPE: case BC_ISNUM:
1830 /* These coercions need to correspond with lj_meta_istype(). */
1831 if (LJ_DUALNUM && rc == ~LJ_TNUMX+1)
1832 ra = lj_opt_narrow_toint(J, ra);
1833 else if (rc == ~LJ_TNUMX+2)
1834 ra = lj_ir_tonum(J, ra);
1835 else if (rc == ~LJ_TSTR+1)
1836 ra = lj_ir_tostr(J, ra);
1837 /* else: type specialization suffices. */
1838 J->base[bc_a(ins)] = ra;
1839 break;
1840
1829 /* -- Unary ops --------------------------------------------------------- */ 1841 /* -- Unary ops --------------------------------------------------------- */
1830 1842
1831 case BC_NOT: 1843 case BC_NOT:
@@ -1937,6 +1949,10 @@ void lj_record_ins(jit_State *J)
1937 ix.idxchain = LJ_MAX_IDXCHAIN; 1949 ix.idxchain = LJ_MAX_IDXCHAIN;
1938 rc = lj_record_idx(J, &ix); 1950 rc = lj_record_idx(J, &ix);
1939 break; 1951 break;
1952 case BC_TGETR: case BC_TSETR:
1953 ix.idxchain = 0;
1954 rc = lj_record_idx(J, &ix);
1955 break;
1940 1956
1941 case BC_TNEW: 1957 case BC_TNEW:
1942 rc = rec_tnew(J, rc); 1958 rc = rec_tnew(J, rc);
diff --git a/src/lj_state.c b/src/lj_state.c
index 8c53d37f..604ff886 100644
--- a/src/lj_state.c
+++ b/src/lj_state.c
@@ -12,6 +12,7 @@
12#include "lj_obj.h" 12#include "lj_obj.h"
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_buf.h"
15#include "lj_str.h" 16#include "lj_str.h"
16#include "lj_tab.h" 17#include "lj_tab.h"
17#include "lj_func.h" 18#include "lj_func.h"
@@ -164,7 +165,7 @@ static void close_state(lua_State *L)
164 lj_ctype_freestate(g); 165 lj_ctype_freestate(g);
165#endif 166#endif
166 lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef); 167 lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef);
167 lj_str_freebuf(g, &g->tmpbuf); 168 lj_buf_free(g, &g->tmpbuf);
168 lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); 169 lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue);
169 lua_assert(g->gc.total == sizeof(GG_State)); 170 lua_assert(g->gc.total == sizeof(GG_State));
170#ifndef LUAJIT_USE_SYSMALLOC 171#ifndef LUAJIT_USE_SYSMALLOC
@@ -203,7 +204,7 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
203 setnilV(&g->nilnode.val); 204 setnilV(&g->nilnode.val);
204 setnilV(&g->nilnode.key); 205 setnilV(&g->nilnode.key);
205 setmref(g->nilnode.freetop, &g->nilnode); 206 setmref(g->nilnode.freetop, &g->nilnode);
206 lj_str_initbuf(&g->tmpbuf); 207 lj_buf_init(NULL, &g->tmpbuf);
207 g->gc.state = GCSpause; 208 g->gc.state = GCSpause;
208 setgcref(g->gc.root, obj2gco(L)); 209 setgcref(g->gc.root, obj2gco(L));
209 setmref(g->gc.sweep, &g->gc.root); 210 setmref(g->gc.sweep, &g->gc.root);
diff --git a/src/lj_str.c b/src/lj_str.c
index 6548ee4d..9eb04c57 100644
--- a/src/lj_str.c
+++ b/src/lj_str.c
@@ -1,9 +1,6 @@
1/* 1/*
2** String handling. 2** String handling.
3** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h 3** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
4**
5** Portions taken verbatim or adapted from the Lua interpreter.
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/ 4*/
8 5
9#include <stdio.h> 6#include <stdio.h>
@@ -14,6 +11,7 @@
14#include "lj_obj.h" 11#include "lj_obj.h"
15#include "lj_gc.h" 12#include "lj_gc.h"
16#include "lj_err.h" 13#include "lj_err.h"
14#include "lj_buf.h"
17#include "lj_str.h" 15#include "lj_str.h"
18#include "lj_state.h" 16#include "lj_state.h"
19#include "lj_char.h" 17#include "lj_char.h"
@@ -170,14 +168,14 @@ void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
170/* -- Type conversions ---------------------------------------------------- */ 168/* -- Type conversions ---------------------------------------------------- */
171 169
172/* Print number to buffer. Canonicalizes non-finite values. */ 170/* Print number to buffer. Canonicalizes non-finite values. */
173size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o) 171MSize LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o)
174{ 172{
175 if (LJ_LIKELY((o->u32.hi << 1) < 0xffe00000)) { /* Finite? */ 173 if (LJ_LIKELY((o->u32.hi << 1) < 0xffe00000)) { /* Finite? */
176 lua_Number n = o->n; 174 lua_Number n = o->n;
177#if __BIONIC__ 175#if __BIONIC__
178 if (tvismzero(o)) { s[0] = '-'; s[1] = '0'; return 2; } 176 if (tvismzero(o)) { s[0] = '-'; s[1] = '0'; return 2; }
179#endif 177#endif
180 return (size_t)lua_number2str(s, n); 178 return (MSize)lua_number2str(s, n);
181 } else if (((o->u32.hi & 0x000fffff) | o->u32.lo) != 0) { 179 } else if (((o->u32.hi & 0x000fffff) | o->u32.lo) != 0) {
182 s[0] = 'n'; s[1] = 'a'; s[2] = 'n'; return 3; 180 s[0] = 'n'; s[1] = 'a'; s[2] = 'n'; return 3;
183 } else if ((o->u32.hi & 0x80000000) == 0) { 181 } else if ((o->u32.hi & 0x80000000) == 0) {
@@ -187,30 +185,68 @@ size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o)
187 } 185 }
188} 186}
189 187
190/* Print integer to buffer. Returns pointer to start. */ 188/* Print integer to buffer. Returns pointer to start (!= buffer start). */
191char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k) 189static char *str_bufint(char *p, int32_t k)
192{ 190{
193 uint32_t u = (uint32_t)(k < 0 ? -k : k); 191 uint32_t u = (uint32_t)(k < 0 ? -k : k);
194 p += 1+10; 192 p += LJ_STR_INTBUF;
195 do { *--p = (char)('0' + u % 10); } while (u /= 10); 193 do { *--p = (char)('0' + u % 10); } while (u /= 10);
196 if (k < 0) *--p = '-'; 194 if (k < 0) *--p = '-';
197 return p; 195 return p;
198} 196}
199 197
198/* Print pointer to buffer. */
199MSize LJ_FASTCALL lj_str_bufptr(char *p, const void *v)
200{
201 ptrdiff_t x = (ptrdiff_t)v;
202 MSize i, n = LJ_STR_PTRBUF;
203 if (x == 0) {
204 p[0] = 'N'; p[1] = 'U'; p[2] = 'L'; p[3] = 'L';
205 return 4;
206 }
207#if LJ_64
208 /* Shorten output for 64 bit pointers. */
209 n = 2+2*4+((x >> 32) ? 2+2*(lj_fls((uint32_t)(x >> 32))>>3) : 0);
210#endif
211 p[0] = '0';
212 p[1] = 'x';
213 for (i = n-1; i >= 2; i--, x >>= 4)
214 p[i] = "0123456789abcdef"[(x & 15)];
215 return n;
216}
217
218/* Print TValue to buffer (only for numbers) and return pointer to start. */
219const char *lj_str_buftv(char *buf, cTValue *o, MSize *lenp)
220{
221 if (tvisstr(o)) {
222 *lenp = strV(o)->len;
223 return strVdata(o);
224 } else if (tvisint(o)) {
225 char *p = str_bufint(buf, intV(o));
226 *lenp = (MSize)(buf+LJ_STR_INTBUF-p);
227 return p;
228 } else if (tvisnum(o)) {
229 *lenp = lj_str_bufnum(buf, o);
230 return buf;
231 } else {
232 return NULL;
233 }
234}
235
200/* Convert number to string. */ 236/* Convert number to string. */
201GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np) 237GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np)
202{ 238{
203 char buf[LJ_STR_NUMBUF]; 239 char buf[LJ_STR_NUMBUF];
204 size_t len = lj_str_bufnum(buf, (TValue *)np); 240 MSize len = lj_str_bufnum(buf, (TValue *)np);
205 return lj_str_new(L, buf, len); 241 return lj_str_new(L, buf, len);
206} 242}
207 243
208/* Convert integer to string. */ 244/* Convert integer to string. */
209GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k) 245GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k)
210{ 246{
211 char s[1+10]; 247 char buf[LJ_STR_INTBUF];
212 char *p = lj_str_bufint(s, k); 248 char *p = str_bufint(buf, k);
213 return lj_str_new(L, p, (size_t)(s+sizeof(s)-p)); 249 return lj_str_new(L, p, (size_t)(buf+sizeof(buf)-p));
214} 250}
215 251
216GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o) 252GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o)
@@ -220,54 +256,32 @@ GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o)
220 256
221/* -- String formatting --------------------------------------------------- */ 257/* -- String formatting --------------------------------------------------- */
222 258
223static void addstr(lua_State *L, SBuf *sb, const char *str, MSize len)
224{
225 char *p;
226 MSize i;
227 if (sb->n + len > sb->sz) {
228 MSize sz = sb->sz * 2;
229 while (sb->n + len > sz) sz = sz * 2;
230 lj_str_resizebuf(L, sb, sz);
231 }
232 p = sb->buf + sb->n;
233 sb->n += len;
234 for (i = 0; i < len; i++) p[i] = str[i];
235}
236
237static void addchar(lua_State *L, SBuf *sb, int c)
238{
239 if (sb->n + 1 > sb->sz) {
240 MSize sz = sb->sz * 2;
241 lj_str_resizebuf(L, sb, sz);
242 }
243 sb->buf[sb->n++] = (char)c;
244}
245
246/* Push formatted message as a string object to Lua stack. va_list variant. */ 259/* Push formatted message as a string object to Lua stack. va_list variant. */
247const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp) 260const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp)
248{ 261{
249 SBuf *sb = &G(L)->tmpbuf; 262 SBuf *sb = &G(L)->tmpbuf;
250 lj_str_needbuf(L, sb, (MSize)strlen(fmt)); 263 setsbufL(sb, L);
251 lj_str_resetbuf(sb); 264 lj_buf_need(sb, (MSize)strlen(fmt));
265 lj_buf_reset(sb);
252 for (;;) { 266 for (;;) {
253 const char *e = strchr(fmt, '%'); 267 const char *e = strchr(fmt, '%');
254 if (e == NULL) break; 268 if (e == NULL) break;
255 addstr(L, sb, fmt, (MSize)(e-fmt)); 269 lj_buf_putmem(sb, fmt, (MSize)(e-fmt));
256 /* This function only handles %s, %c, %d, %f and %p formats. */ 270 /* This function only handles %s, %c, %d, %f and %p formats. */
257 switch (e[1]) { 271 switch (e[1]) {
258 case 's': { 272 case 's': {
259 const char *s = va_arg(argp, char *); 273 const char *s = va_arg(argp, char *);
260 if (s == NULL) s = "(null)"; 274 if (s == NULL) s = "(null)";
261 addstr(L, sb, s, (MSize)strlen(s)); 275 lj_buf_putmem(sb, s, (MSize)strlen(s));
262 break; 276 break;
263 } 277 }
264 case 'c': 278 case 'c':
265 addchar(L, sb, va_arg(argp, int)); 279 lj_buf_putb(sb, va_arg(argp, int));
266 break; 280 break;
267 case 'd': { 281 case 'd': {
268 char buf[LJ_STR_INTBUF]; 282 char buf[LJ_STR_INTBUF];
269 char *p = lj_str_bufint(buf, va_arg(argp, int32_t)); 283 char *p = str_bufint(buf, va_arg(argp, int32_t));
270 addstr(L, sb, p, (MSize)(buf+LJ_STR_INTBUF-p)); 284 lj_buf_putmem(sb, p, (MSize)(buf+LJ_STR_INTBUF-p));
271 break; 285 break;
272 } 286 }
273 case 'f': { 287 case 'f': {
@@ -276,41 +290,28 @@ const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp)
276 MSize len; 290 MSize len;
277 tv.n = (lua_Number)(va_arg(argp, LUAI_UACNUMBER)); 291 tv.n = (lua_Number)(va_arg(argp, LUAI_UACNUMBER));
278 len = (MSize)lj_str_bufnum(buf, &tv); 292 len = (MSize)lj_str_bufnum(buf, &tv);
279 addstr(L, sb, buf, len); 293 lj_buf_putmem(sb, buf, len);
280 break; 294 break;
281 } 295 }
282 case 'p': { 296 case 'p': {
283#define FMTP_CHARS (2*sizeof(ptrdiff_t)) 297#define FMTP_CHARS (2*sizeof(ptrdiff_t))
284 char buf[2+FMTP_CHARS]; 298 char buf[LJ_STR_PTRBUF];
285 ptrdiff_t p = (ptrdiff_t)(va_arg(argp, void *)); 299 MSize len = lj_str_bufptr(buf, va_arg(argp, void *));
286 ptrdiff_t i, lasti = 2+FMTP_CHARS; 300 lj_buf_putmem(sb, buf, len);
287 if (p == 0) {
288 addstr(L, sb, "NULL", 4);
289 break;
290 }
291#if LJ_64
292 /* Shorten output for 64 bit pointers. */
293 lasti = 2+2*4+((p >> 32) ? 2+2*(lj_fls((uint32_t)(p >> 32))>>3) : 0);
294#endif
295 buf[0] = '0';
296 buf[1] = 'x';
297 for (i = lasti-1; i >= 2; i--, p >>= 4)
298 buf[i] = "0123456789abcdef"[(p & 15)];
299 addstr(L, sb, buf, (MSize)lasti);
300 break; 301 break;
301 } 302 }
302 case '%': 303 case '%':
303 addchar(L, sb, '%'); 304 lj_buf_putb(sb, '%');
304 break; 305 break;
305 default: 306 default:
306 addchar(L, sb, '%'); 307 lj_buf_putb(sb, '%');
307 addchar(L, sb, e[1]); 308 lj_buf_putb(sb, e[1]);
308 break; 309 break;
309 } 310 }
310 fmt = e+2; 311 fmt = e+2;
311 } 312 }
312 addstr(L, sb, fmt, (MSize)strlen(fmt)); 313 lj_buf_putmem(sb, fmt, (MSize)strlen(fmt));
313 setstrV(L, L->top, lj_str_new(L, sb->buf, sb->n)); 314 setstrV(L, L->top, lj_buf_str(L, sb));
314 incr_top(L); 315 incr_top(L);
315 return strVdata(L->top - 1); 316 return strVdata(L->top - 1);
316} 317}
@@ -326,14 +327,3 @@ const char *lj_str_pushf(lua_State *L, const char *fmt, ...)
326 return msg; 327 return msg;
327} 328}
328 329
329/* -- Buffer handling ----------------------------------------------------- */
330
331char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz)
332{
333 if (sz > sb->sz) {
334 if (sz < LJ_MIN_SBUF) sz = LJ_MIN_SBUF;
335 lj_str_resizebuf(L, sb, sz);
336 }
337 return sb->buf;
338}
339
diff --git a/src/lj_str.h b/src/lj_str.h
index 3aa03662..6317e794 100644
--- a/src/lj_str.h
+++ b/src/lj_str.h
@@ -20,14 +20,17 @@ LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
20#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1)) 20#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1))
21 21
22/* Type conversions. */ 22/* Type conversions. */
23LJ_FUNC size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o); 23LJ_FUNC MSize LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o);
24LJ_FUNC char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k); 24LJ_FUNC MSize LJ_FASTCALL lj_str_bufptr(char *p, const void *v);
25LJ_FUNC const char *lj_str_buftv(char *buf, cTValue *o, MSize *lenp);
25LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np); 26LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np);
26LJ_FUNC GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k); 27LJ_FUNC GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k);
27LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o); 28LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o);
28 29
29#define LJ_STR_INTBUF (1+10) 30#define LJ_STR_INTBUF (1+10)
30#define LJ_STR_NUMBUF LUAI_MAXNUMBER2STR 31#define LJ_STR_NUMBUF LUAI_MAXNUMBER2STR
32#define LJ_STR_NUMBERBUF LUAI_MAXNUMBER2STR
33#define LJ_STR_PTRBUF (2*sizeof(ptrdiff_t)+2)
31 34
32/* String formatting. */ 35/* String formatting. */
33LJ_FUNC const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp); 36LJ_FUNC const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp);
@@ -37,14 +40,4 @@ LJ_FUNC const char *lj_str_pushf(lua_State *L, const char *fmt, ...)
37#endif 40#endif
38 ; 41 ;
39 42
40/* Resizable string buffers. Struct definition in lj_obj.h. */
41LJ_FUNC char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz);
42
43#define lj_str_initbuf(sb) ((sb)->buf = NULL, (sb)->sz = 0)
44#define lj_str_resetbuf(sb) ((sb)->n = 0)
45#define lj_str_resizebuf(L, sb, size) \
46 ((sb)->buf = (char *)lj_mem_realloc(L, (sb)->buf, (sb)->sz, (size)), \
47 (sb)->sz = (size))
48#define lj_str_freebuf(g, sb) lj_mem_free(g, (void *)(sb)->buf, (sb)->sz)
49
50#endif 43#endif
diff --git a/src/lj_tab.h b/src/lj_tab.h
index 2787caa0..d361137c 100644
--- a/src/lj_tab.h
+++ b/src/lj_tab.h
@@ -50,7 +50,7 @@ LJ_FUNCA cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key);
50/* Caveat: all setters require a write barrier for the stored value. */ 50/* Caveat: all setters require a write barrier for the stored value. */
51 51
52LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key); 52LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key);
53LJ_FUNC TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key); 53LJ_FUNCA TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key);
54LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key); 54LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key);
55LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key); 55LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key);
56 56
diff --git a/src/lj_target_arm.h b/src/lj_target_arm.h
index bec55772..f1aedff0 100644
--- a/src/lj_target_arm.h
+++ b/src/lj_target_arm.h
@@ -243,10 +243,6 @@ typedef enum ARMIns {
243 ARMI_VCVT_S32_F64 = 0xeebd0bc0, 243 ARMI_VCVT_S32_F64 = 0xeebd0bc0,
244 ARMI_VCVT_U32_F32 = 0xeebc0ac0, 244 ARMI_VCVT_U32_F32 = 0xeebc0ac0,
245 ARMI_VCVT_U32_F64 = 0xeebc0bc0, 245 ARMI_VCVT_U32_F64 = 0xeebc0bc0,
246 ARMI_VCVTR_S32_F32 = 0xeebd0a40,
247 ARMI_VCVTR_S32_F64 = 0xeebd0b40,
248 ARMI_VCVTR_U32_F32 = 0xeebc0a40,
249 ARMI_VCVTR_U32_F64 = 0xeebc0b40,
250 ARMI_VCVT_F32_S32 = 0xeeb80ac0, 246 ARMI_VCVT_F32_S32 = 0xeeb80ac0,
251 ARMI_VCVT_F64_S32 = 0xeeb80bc0, 247 ARMI_VCVT_F64_S32 = 0xeeb80bc0,
252 ARMI_VCVT_F32_U32 = 0xeeb80a40, 248 ARMI_VCVT_F32_U32 = 0xeeb80a40,
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h
index 84b0871d..450df77f 100644
--- a/src/lj_target_x86.h
+++ b/src/lj_target_x86.h
@@ -277,10 +277,8 @@ typedef enum {
277 XO_ROUNDSD = 0x0b3a0ffc, /* Really 66 0f 3a 0b. See asm_fpmath. */ 277 XO_ROUNDSD = 0x0b3a0ffc, /* Really 66 0f 3a 0b. See asm_fpmath. */
278 XO_UCOMISD = XO_660f(2e), 278 XO_UCOMISD = XO_660f(2e),
279 XO_CVTSI2SD = XO_f20f(2a), 279 XO_CVTSI2SD = XO_f20f(2a),
280 XO_CVTSD2SI = XO_f20f(2d),
281 XO_CVTTSD2SI= XO_f20f(2c), 280 XO_CVTTSD2SI= XO_f20f(2c),
282 XO_CVTSI2SS = XO_f30f(2a), 281 XO_CVTSI2SS = XO_f30f(2a),
283 XO_CVTSS2SI = XO_f30f(2d),
284 XO_CVTTSS2SI= XO_f30f(2c), 282 XO_CVTTSS2SI= XO_f30f(2c),
285 XO_CVTSS2SD = XO_f30f(5a), 283 XO_CVTSS2SD = XO_f30f(5a),
286 XO_CVTSD2SS = XO_f20f(5a), 284 XO_CVTSD2SS = XO_f20f(5a),
diff --git a/src/lj_vm.h b/src/lj_vm.h
index c5d05de4..948d63c2 100644
--- a/src/lj_vm.h
+++ b/src/lj_vm.h
@@ -49,12 +49,14 @@ LJ_ASMF void lj_vm_exit_handler(void);
49LJ_ASMF void lj_vm_exit_interp(void); 49LJ_ASMF void lj_vm_exit_interp(void);
50 50
51/* Internal math helper functions. */ 51/* Internal math helper functions. */
52#if LJ_TARGET_X86ORX64 || LJ_TARGET_PPC 52#if LJ_TARGET_PPC
53#define lj_vm_floor floor 53#define lj_vm_floor floor
54#define lj_vm_ceil ceil 54#define lj_vm_ceil ceil
55#else 55#else
56LJ_ASMF double lj_vm_floor(double); 56LJ_ASMF double lj_vm_floor(double);
57#if !LJ_TARGET_X86ORX64
57LJ_ASMF double lj_vm_ceil(double); 58LJ_ASMF double lj_vm_ceil(double);
59#endif
58#if LJ_TARGET_ARM 60#if LJ_TARGET_ARM
59LJ_ASMF double lj_vm_floor_sf(double); 61LJ_ASMF double lj_vm_floor_sf(double);
60LJ_ASMF double lj_vm_ceil_sf(double); 62LJ_ASMF double lj_vm_ceil_sf(double);
diff --git a/src/ljamalg.c b/src/ljamalg.c
index 962b3134..487609c4 100644
--- a/src/ljamalg.c
+++ b/src/ljamalg.c
@@ -33,6 +33,7 @@
33#include "lj_char.c" 33#include "lj_char.c"
34#include "lj_bc.c" 34#include "lj_bc.c"
35#include "lj_obj.c" 35#include "lj_obj.c"
36#include "lj_buf.c"
36#include "lj_str.c" 37#include "lj_str.c"
37#include "lj_tab.c" 38#include "lj_tab.c"
38#include "lj_func.c" 39#include "lj_func.c"
diff --git a/src/luaconf.h b/src/luaconf.h
index 8e3a7aaa..d283233d 100644
--- a/src/luaconf.h
+++ b/src/luaconf.h
@@ -30,12 +30,12 @@
30#define LUA_LDIR LUA_ROOT "share/lua/5.1/" 30#define LUA_LDIR LUA_ROOT "share/lua/5.1/"
31#define LUA_CDIR LUA_ROOT "lib/lua/5.1/" 31#define LUA_CDIR LUA_ROOT "lib/lua/5.1/"
32#ifdef LUA_XROOT 32#ifdef LUA_XROOT
33#define LUA_JDIR LUA_XROOT "share/luajit-2.0.1/" 33#define LUA_JDIR LUA_XROOT "share/luajit-2.1.0-alpha/"
34#define LUA_XPATH \ 34#define LUA_XPATH \
35 ";" LUA_XROOT "share/lua/5.1/?.lua;" LUA_XROOT "share/lua/5.1/?/init.lua" 35 ";" LUA_XROOT "share/lua/5.1/?.lua;" LUA_XROOT "share/lua/5.1/?/init.lua"
36#define LUA_XCPATH LUA_XROOT "lib/lua/5.1/?.so;" 36#define LUA_XCPATH LUA_XROOT "lib/lua/5.1/?.so;"
37#else 37#else
38#define LUA_JDIR LUA_ROOT "share/luajit-2.0.1/" 38#define LUA_JDIR LUA_ROOT "share/luajit-2.1.0-alpha/"
39#define LUA_XPATH 39#define LUA_XPATH
40#define LUA_XCPATH 40#define LUA_XCPATH
41#endif 41#endif
diff --git a/src/luajit.h b/src/luajit.h
index ed39d014..a4c939bf 100644
--- a/src/luajit.h
+++ b/src/luajit.h
@@ -30,9 +30,9 @@
30 30
31#include "lua.h" 31#include "lua.h"
32 32
33#define LUAJIT_VERSION "LuaJIT 2.0.1" 33#define LUAJIT_VERSION "LuaJIT 2.1.0-alpha"
34#define LUAJIT_VERSION_NUM 20001 /* Version 2.0.1 = 02.00.01. */ 34#define LUAJIT_VERSION_NUM 20100 /* Version 2.1.0 = 02.01.00. */
35#define LUAJIT_VERSION_SYM luaJIT_version_2_0_1 35#define LUAJIT_VERSION_SYM luaJIT_version_2_1_0_alpha
36#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2013 Mike Pall" 36#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2013 Mike Pall"
37#define LUAJIT_URL "http://luajit.org/" 37#define LUAJIT_URL "http://luajit.org/"
38 38
diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat
index cdb42a8d..4a3c8e84 100644
--- a/src/msvcbuild.bat
+++ b/src/msvcbuild.bat
@@ -37,6 +37,7 @@ if exist minilua.exe.manifest^
37@if errorlevel 8 goto :X64 37@if errorlevel 8 goto :X64
38@set DASMFLAGS=-D WIN -D JIT -D FFI 38@set DASMFLAGS=-D WIN -D JIT -D FFI
39@set LJARCH=x86 39@set LJARCH=x86
40@set LJCOMPILE=%LJCOMPILE% /arch:SSE2
40:X64 41:X64
41minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc 42minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc
42@if errorlevel 1 goto :BAD 43@if errorlevel 1 goto :BAD
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
index 114416a4..1d4b60f4 100644
--- a/src/vm_arm.dasc
+++ b/src/vm_arm.dasc
@@ -615,6 +615,16 @@ static void build_subroutines(BuildCtx *ctx)
615 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. 615 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
616 | b ->vm_call_dispatch_f 616 | b ->vm_call_dispatch_f
617 | 617 |
618 |->vmeta_tgetr:
619 | .IOS mov RC, BASE
620 | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
621 | // Returns cTValue * or NULL.
622 | .IOS mov BASE, RC
623 | cmp CRET1, #0
624 | ldrdne CARG12, [CRET1]
625 | mvneq CARG2, #~LJ_TNIL
626 | b ->BC_TGETR_Z
627 |
618 |//----------------------------------------------------------------------- 628 |//-----------------------------------------------------------------------
619 | 629 |
620 |->vmeta_tsets1: 630 |->vmeta_tsets1:
@@ -672,6 +682,15 @@ static void build_subroutines(BuildCtx *ctx)
672 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. 682 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
673 | b ->vm_call_dispatch_f 683 | b ->vm_call_dispatch_f
674 | 684 |
685 |->vmeta_tsetr:
686 | str BASE, L->base
687 | .IOS mov RC, BASE
688 | str PC, SAVE_PC
689 | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
690 | // Returns TValue *.
691 | .IOS mov BASE, RC
692 | b ->BC_TSETR_Z
693 |
675 |//-- Comparison metamethods --------------------------------------------- 694 |//-- Comparison metamethods ---------------------------------------------
676 | 695 |
677 |->vmeta_comp: 696 |->vmeta_comp:
@@ -736,6 +755,17 @@ static void build_subroutines(BuildCtx *ctx)
736 | b <3 755 | b <3
737 |.endif 756 |.endif
738 | 757 |
758 |->vmeta_istype:
759 | sub PC, PC, #4
760 | str BASE, L->base
761 | mov CARG1, L
762 | lsr CARG2, RA, #3
763 | mov CARG3, RC
764 | str PC, SAVE_PC
765 | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
766 | .IOS ldr BASE, L->base
767 | b ->cont_nop
768 |
739 |//-- Arithmetic metamethods --------------------------------------------- 769 |//-- Arithmetic metamethods ---------------------------------------------
740 | 770 |
741 |->vmeta_arith_vn: 771 |->vmeta_arith_vn:
@@ -1501,19 +1531,6 @@ static void build_subroutines(BuildCtx *ctx)
1501 | math_extern2 atan2 1531 | math_extern2 atan2
1502 | math_extern2 fmod 1532 | math_extern2 fmod
1503 | 1533 |
1504 |->ff_math_deg:
1505 |.if FPU
1506 | .ffunc_d math_rad
1507 | vldr d1, CFUNC:CARG3->upvalue[0]
1508 | vmul.f64 d0, d0, d1
1509 | b ->fff_resd
1510 |.else
1511 | .ffunc_n math_rad
1512 | ldrd CARG34, CFUNC:CARG3->upvalue[0]
1513 | bl extern __aeabi_dmul
1514 | b ->fff_restv
1515 |.endif
1516 |
1517 |.if HFABI 1534 |.if HFABI
1518 | .ffunc math_ldexp 1535 | .ffunc math_ldexp
1519 | ldr CARG4, [BASE, #4] 1536 | ldr CARG4, [BASE, #4]
@@ -1784,10 +1801,11 @@ static void build_subroutines(BuildCtx *ctx)
1784 | cmp CARG2, #1 1801 | cmp CARG2, #1
1785 | blo ->fff_emptystr // Zero-length string? 1802 | blo ->fff_emptystr // Zero-length string?
1786 | bne ->fff_fallback // Fallback for > 1-char strings. 1803 | bne ->fff_fallback // Fallback for > 1-char strings.
1787 | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)] 1804 | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.b)]
1788 | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)] 1805 | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.e)]
1789 | ldr CARG1, STR:CARG1[1] 1806 | ldr CARG1, STR:CARG1[1]
1790 | cmp RB, CARG3 1807 | add INS, CARG2, CARG3
1808 | cmp RB, INS
1791 | blo ->fff_fallback 1809 | blo ->fff_fallback
1792 |1: // Fill buffer with char. 1810 |1: // Fill buffer with char.
1793 | strb CARG1, [CARG2, CARG4] 1811 | strb CARG1, [CARG2, CARG4]
@@ -1802,11 +1820,12 @@ static void build_subroutines(BuildCtx *ctx)
1802 | blo ->fff_fallback 1820 | blo ->fff_fallback
1803 | checkstr CARG2, ->fff_fallback 1821 | checkstr CARG2, ->fff_fallback
1804 | ldr CARG3, STR:CARG1->len 1822 | ldr CARG3, STR:CARG1->len
1805 | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)] 1823 | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.b)]
1806 | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)] 1824 | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.e)]
1807 | mov CARG4, CARG3 1825 | mov CARG4, CARG3
1808 | add CARG1, STR:CARG1, #sizeof(GCstr) 1826 | add CARG1, STR:CARG1, #sizeof(GCstr)
1809 | cmp RB, CARG3 1827 | add INS, CARG2, CARG3
1828 | cmp RB, INS
1810 | blo ->fff_fallback 1829 | blo ->fff_fallback
1811 |1: // Reverse string copy. 1830 |1: // Reverse string copy.
1812 | ldrb RB, [CARG1], #1 1831 | ldrb RB, [CARG1], #1
@@ -1823,11 +1842,12 @@ static void build_subroutines(BuildCtx *ctx)
1823 | blo ->fff_fallback 1842 | blo ->fff_fallback
1824 | checkstr CARG2, ->fff_fallback 1843 | checkstr CARG2, ->fff_fallback
1825 | ldr CARG3, STR:CARG1->len 1844 | ldr CARG3, STR:CARG1->len
1826 | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)] 1845 | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.b)]
1827 | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)] 1846 | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.e)]
1828 | mov CARG4, #0 1847 | mov CARG4, #0
1829 | add CARG1, STR:CARG1, #sizeof(GCstr) 1848 | add CARG1, STR:CARG1, #sizeof(GCstr)
1830 | cmp RB, CARG3 1849 | add INS, CARG2, CARG3
1850 | cmp RB, INS
1831 | blo ->fff_fallback 1851 | blo ->fff_fallback
1832 |1: // ASCII case conversion. 1852 |1: // ASCII case conversion.
1833 | ldrb RB, [CARG1, CARG4] 1853 | ldrb RB, [CARG1, CARG4]
@@ -1844,17 +1864,6 @@ static void build_subroutines(BuildCtx *ctx)
1844 |ffstring_case string_lower, 65 1864 |ffstring_case string_lower, 65
1845 |ffstring_case string_upper, 97 1865 |ffstring_case string_upper, 97
1846 | 1866 |
1847 |//-- Table library ------------------------------------------------------
1848 |
1849 |.ffunc_1 table_getn
1850 | checktab CARG2, ->fff_fallback
1851 | .IOS mov RA, BASE
1852 | bl extern lj_tab_len // (GCtab *t)
1853 | // Returns uint32_t (but less than 2^31).
1854 | .IOS mov BASE, RA
1855 | mvn CARG2, #~LJ_TISNUM
1856 | b ->fff_restv
1857 |
1858 |//-- Bit library -------------------------------------------------------- 1867 |//-- Bit library --------------------------------------------------------
1859 | 1868 |
1860 |// FP number to bit conversion for soft-float. Clobbers r0-r3. 1869 |// FP number to bit conversion for soft-float. Clobbers r0-r3.
@@ -2834,6 +2843,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2834 | ins_next 2843 | ins_next
2835 break; 2844 break;
2836 2845
2846 case BC_ISTYPE:
2847 | // RA = src*8, RC = -type
2848 | ldrd CARG12, [BASE, RA]
2849 | ins_next1
2850 | cmn CARG2, RC
2851 | ins_next2
2852 | bne ->vmeta_istype
2853 | ins_next3
2854 break;
2855 case BC_ISNUM:
2856 | // RA = src*8, RC = -(TISNUM-1)
2857 | ldrd CARG12, [BASE, RA]
2858 | ins_next1
2859 | checktp CARG2, LJ_TISNUM
2860 | ins_next2
2861 | bhs ->vmeta_istype
2862 | ins_next3
2863 break;
2864
2837 /* -- Unary ops --------------------------------------------------------- */ 2865 /* -- Unary ops --------------------------------------------------------- */
2838 2866
2839 case BC_MOV: 2867 case BC_MOV:
@@ -3504,6 +3532,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3504 | bne <1 // 'no __index' flag set: done. 3532 | bne <1 // 'no __index' flag set: done.
3505 | b ->vmeta_tgetb 3533 | b ->vmeta_tgetb
3506 break; 3534 break;
3535 case BC_TGETR:
3536 | decode_RB8 RB, INS
3537 | decode_RC8 RC, INS
3538 | // RA = dst*8, RB = table*8, RC = key*8
3539 | ldr TAB:CARG1, [BASE, RB]
3540 | ldr CARG2, [BASE, RC]
3541 | ldr CARG4, TAB:CARG1->array
3542 | ldr CARG3, TAB:CARG1->asize
3543 | add CARG4, CARG4, CARG2, lsl #3
3544 | cmp CARG2, CARG3 // In array part?
3545 | bhs ->vmeta_tgetr
3546 | ldrd CARG12, [CARG4]
3547 |->BC_TGETR_Z:
3548 | ins_next1
3549 | ins_next2
3550 | strd CARG12, [BASE, RA]
3551 | ins_next3
3552 break;
3507 3553
3508 case BC_TSETV: 3554 case BC_TSETV:
3509 | decode_RB8 RB, INS 3555 | decode_RB8 RB, INS
@@ -3674,6 +3720,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3674 | barrierback TAB:CARG1, INS, CARG3 3720 | barrierback TAB:CARG1, INS, CARG3
3675 | b <2 3721 | b <2
3676 break; 3722 break;
3723 case BC_TSETR:
3724 | decode_RB8 RB, INS
3725 | decode_RC8 RC, INS
3726 | // RA = dst*8, RB = table*8, RC = key*8
3727 | ldr TAB:CARG2, [BASE, RB]
3728 | ldr CARG3, [BASE, RC]
3729 | ldrb INS, TAB:CARG2->marked
3730 | ldr CARG1, TAB:CARG2->array
3731 | ldr CARG4, TAB:CARG2->asize
3732 | tst INS, #LJ_GC_BLACK // isblack(table)
3733 | add CARG1, CARG1, CARG3, lsl #3
3734 | bne >7
3735 |2:
3736 | cmp CARG3, CARG4 // In array part?
3737 | bhs ->vmeta_tsetr
3738 |->BC_TSETR_Z:
3739 | ldrd CARG34, [BASE, RA]
3740 | ins_next1
3741 | ins_next2
3742 | strd CARG34, [CARG1]
3743 | ins_next3
3744 |
3745 |7: // Possible table write barrier for the value. Skip valiswhite check.
3746 | barrierback TAB:CARG2, INS, RB
3747 | b <2
3748 break;
3677 3749
3678 case BC_TSETM: 3750 case BC_TSETM:
3679 | // RA = base*8 (table at base-1), RC = num_const (start index) 3751 | // RA = base*8 (table at base-1), RC = num_const (start index)
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
index a81dbeeb..53000411 100644
--- a/src/vm_mips.dasc
+++ b/src/vm_mips.dasc
@@ -688,6 +688,16 @@ static void build_subroutines(BuildCtx *ctx)
688 | b ->vm_call_dispatch_f 688 | b ->vm_call_dispatch_f
689 |. li NARGS8:RC, 16 // 2 args for func(t, k). 689 |. li NARGS8:RC, 16 // 2 args for func(t, k).
690 | 690 |
691 |->vmeta_tgetr:
692 | load_got lj_tab_getinth
693 | call_intern lj_tab_getinth // (GCtab *t, int32_t key)
694 |. nop
695 | // Returns cTValue * or NULL.
696 | beqz CRET1, >1
697 |. nop
698 | b ->BC_TGETR_Z
699 |. ldc1 f0, 0(CRET1)
700 |
691 |//----------------------------------------------------------------------- 701 |//-----------------------------------------------------------------------
692 | 702 |
693 |->vmeta_tsets1: 703 |->vmeta_tsets1:
@@ -740,6 +750,16 @@ static void build_subroutines(BuildCtx *ctx)
740 | b ->vm_call_dispatch_f 750 | b ->vm_call_dispatch_f
741 |. li NARGS8:RC, 24 // 3 args for func(t, k, v) 751 |. li NARGS8:RC, 24 // 3 args for func(t, k, v)
742 | 752 |
753 |->vmeta_tsetr:
754 | load_got lj_tab_setinth
755 | sw BASE, L->base
756 | sw PC, SAVE_PC
757 | call_intern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
758 |. move CARG1, L
759 | // Returns TValue *.
760 | b ->BC_TSETR_Z
761 |. nop
762 |
743 |//-- Comparison metamethods --------------------------------------------- 763 |//-- Comparison metamethods ---------------------------------------------
744 | 764 |
745 |->vmeta_comp: 765 |->vmeta_comp:
@@ -813,6 +833,18 @@ static void build_subroutines(BuildCtx *ctx)
813 |. nop 833 |. nop
814 |.endif 834 |.endif
815 | 835 |
836 |->vmeta_istype:
837 | load_got lj_meta_istype
838 | addiu PC, PC, -4
839 | sw BASE, L->base
840 | srl CARG2, RA, 3
841 | srl CARG3, RD, 3
842 | sw PC, SAVE_PC
843 | call_intern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
844 |. move CARG1, L
845 | b ->cont_nop
846 |. nop
847 |
816 |//-- Arithmetic metamethods --------------------------------------------- 848 |//-- Arithmetic metamethods ---------------------------------------------
817 | 849 |
818 |->vmeta_unm: 850 |->vmeta_unm:
@@ -1188,7 +1220,7 @@ static void build_subroutines(BuildCtx *ctx)
1188 | mtc1 TMP0, FARG1 1220 | mtc1 TMP0, FARG1
1189 | beqz AT, ->fff_fallback 1221 | beqz AT, ->fff_fallback
1190 |. lw PC, FRAME_PC(BASE) 1222 |. lw PC, FRAME_PC(BASE)
1191 | cvt.w.d FRET1, FARG2 1223 | trunc.w.d FRET1, FARG2
1192 | cvt.d.w FARG1, FARG1 1224 | cvt.d.w FARG1, FARG1
1193 | lw TMP0, TAB:CARG1->asize 1225 | lw TMP0, TAB:CARG1->asize
1194 | lw TMP1, TAB:CARG1->array 1226 | lw TMP1, TAB:CARG1->array
@@ -1521,14 +1553,8 @@ static void build_subroutines(BuildCtx *ctx)
1521 | b ->fff_resn 1553 | b ->fff_resn
1522 |. nop 1554 |. nop
1523 | 1555 |
1524 |->ff_math_deg:
1525 |.ffunc_n math_rad
1526 |. ldc1 FARG2, CFUNC:RB->upvalue[0]
1527 | b ->fff_resn
1528 |. mul.d FRET1, FARG1, FARG2
1529 |
1530 |.ffunc_nn math_ldexp 1556 |.ffunc_nn math_ldexp
1531 | cvt.w.d FARG2, FARG2 1557 | trunc.w.d FARG2, FARG2
1532 | load_got ldexp 1558 | load_got ldexp
1533 | mfc1 CARG3, FARG2 1559 | mfc1 CARG3, FARG2
1534 | call_extern 1560 | call_extern
@@ -1628,7 +1654,7 @@ static void build_subroutines(BuildCtx *ctx)
1628 |. sltiu AT, CARG3, LJ_TISNUM 1654 |. sltiu AT, CARG3, LJ_TISNUM
1629 | beqz AT, ->fff_fallback 1655 | beqz AT, ->fff_fallback
1630 |. li CARG3, 1 1656 |. li CARG3, 1
1631 | cvt.w.d FARG1, FARG1 1657 | trunc.w.d FARG1, FARG1
1632 | addiu CARG2, sp, ARG5_OFS 1658 | addiu CARG2, sp, ARG5_OFS
1633 | sltiu AT, TMP0, 256 1659 | sltiu AT, TMP0, 256
1634 | mfc1 TMP0, FARG1 1660 | mfc1 TMP0, FARG1
@@ -1658,7 +1684,7 @@ static void build_subroutines(BuildCtx *ctx)
1658 | ldc1 f2, 8(BASE) 1684 | ldc1 f2, 8(BASE)
1659 | beqz AT, >1 1685 | beqz AT, >1
1660 |. li CARG4, -1 1686 |. li CARG4, -1
1661 | cvt.w.d f0, f0 1687 | trunc.w.d f0, f0
1662 | sltiu AT, CARG3, LJ_TISNUM 1688 | sltiu AT, CARG3, LJ_TISNUM
1663 | beqz AT, ->fff_fallback 1689 | beqz AT, ->fff_fallback
1664 |. mfc1 CARG4, f0 1690 |. mfc1 CARG4, f0
@@ -1666,7 +1692,7 @@ static void build_subroutines(BuildCtx *ctx)
1666 | sltiu AT, CARG2, LJ_TISNUM 1692 | sltiu AT, CARG2, LJ_TISNUM
1667 | beqz AT, ->fff_fallback 1693 | beqz AT, ->fff_fallback
1668 |. li AT, LJ_TSTR 1694 |. li AT, LJ_TSTR
1669 | cvt.w.d f2, f2 1695 | trunc.w.d f2, f2
1670 | bne TMP0, AT, ->fff_fallback 1696 | bne TMP0, AT, ->fff_fallback
1671 |. lw CARG2, STR:CARG1->len 1697 |. lw CARG2, STR:CARG1->len
1672 | mfc1 CARG3, f2 1698 | mfc1 CARG3, f2
@@ -1706,18 +1732,19 @@ static void build_subroutines(BuildCtx *ctx)
1706 | or AT, AT, TMP0 1732 | or AT, AT, TMP0
1707 | bnez AT, ->fff_fallback 1733 | bnez AT, ->fff_fallback
1708 |. sltiu AT, CARG4, LJ_TISNUM 1734 |. sltiu AT, CARG4, LJ_TISNUM
1709 | cvt.w.d f0, f0 1735 | trunc.w.d f0, f0
1710 | beqz AT, ->fff_fallback 1736 | beqz AT, ->fff_fallback
1711 |. lw TMP0, STR:CARG1->len 1737 |. lw TMP0, STR:CARG1->len
1712 | mfc1 CARG3, f0 1738 | mfc1 CARG3, f0
1713 | lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) 1739 | lw CARG2, DISPATCH_GL(tmpbuf.b)(DISPATCH)
1740 | lw TMP1, DISPATCH_GL(tmpbuf.e)(DISPATCH)
1714 | li AT, 1 1741 | li AT, 1
1715 | blez CARG3, ->fff_emptystr // Count <= 0? 1742 | blez CARG3, ->fff_emptystr // Count <= 0?
1716 |. sltu AT, AT, TMP0 1743 |. sltu AT, AT, TMP0
1717 | beqz TMP0, ->fff_emptystr // Zero length string? 1744 | beqz TMP0, ->fff_emptystr // Zero length string?
1718 |. sltu TMP0, TMP1, CARG3 1745 |. addu TMP3, CARG2, CARG3
1746 | sltu TMP0, TMP1, TMP3
1719 | or AT, AT, TMP0 1747 | or AT, AT, TMP0
1720 | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
1721 | bnez AT, ->fff_fallback // Fallback for > 1-char strings. 1748 | bnez AT, ->fff_fallback // Fallback for > 1-char strings.
1722 |. lbu TMP0, STR:CARG1[1] 1749 |. lbu TMP0, STR:CARG1[1]
1723 | addu TMP2, CARG2, CARG3 1750 | addu TMP2, CARG2, CARG3
@@ -1736,14 +1763,14 @@ static void build_subroutines(BuildCtx *ctx)
1736 | beqz NARGS8:RC, ->fff_fallback 1763 | beqz NARGS8:RC, ->fff_fallback
1737 |. li AT, LJ_TSTR 1764 |. li AT, LJ_TSTR
1738 | bne CARG3, AT, ->fff_fallback 1765 | bne CARG3, AT, ->fff_fallback
1739 |. lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) 1766 |. lw CARG2, DISPATCH_GL(tmpbuf.b)(DISPATCH)
1740 | lw CARG3, STR:CARG1->len 1767 | lw CARG3, STR:CARG1->len
1768 | lw TMP1, DISPATCH_GL(tmpbuf.e)(DISPATCH)
1741 | addiu CARG1, STR:CARG1, #STR 1769 | addiu CARG1, STR:CARG1, #STR
1742 | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) 1770 | addu CARG4, CARG2, CARG3
1743 | sltu AT, TMP1, CARG3 1771 | sltu AT, TMP1, CARG4
1744 | bnez AT, ->fff_fallback 1772 | bnez AT, ->fff_fallback
1745 |. addu TMP3, CARG1, CARG3 1773 |. addu TMP3, CARG1, CARG3
1746 | addu CARG4, CARG2, CARG3
1747 |1: // Reverse string copy. 1774 |1: // Reverse string copy.
1748 | lbu TMP1, 0(CARG1) 1775 | lbu TMP1, 0(CARG1)
1749 | sltu AT, CARG1, TMP3 1776 | sltu AT, CARG1, TMP3
@@ -1761,11 +1788,12 @@ static void build_subroutines(BuildCtx *ctx)
1761 | beqz NARGS8:RC, ->fff_fallback 1788 | beqz NARGS8:RC, ->fff_fallback
1762 |. li AT, LJ_TSTR 1789 |. li AT, LJ_TSTR
1763 | bne CARG3, AT, ->fff_fallback 1790 | bne CARG3, AT, ->fff_fallback
1764 |. lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) 1791 |. lw CARG2, DISPATCH_GL(tmpbuf.b)(DISPATCH)
1765 | lw CARG3, STR:CARG1->len 1792 | lw CARG3, STR:CARG1->len
1793 | lw TMP1, DISPATCH_GL(tmpbuf.e)(DISPATCH)
1766 | addiu CARG1, STR:CARG1, #STR 1794 | addiu CARG1, STR:CARG1, #STR
1767 | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) 1795 | addu TMP3, CARG2, CARG3
1768 | sltu AT, TMP1, CARG3 1796 | sltu AT, TMP1, TMP3
1769 | bnez AT, ->fff_fallback 1797 | bnez AT, ->fff_fallback
1770 |. addu TMP3, CARG1, CARG3 1798 |. addu TMP3, CARG1, CARG3
1771 | move CARG4, CARG2 1799 | move CARG4, CARG2
@@ -1786,18 +1814,6 @@ static void build_subroutines(BuildCtx *ctx)
1786 |ffstring_case string_lower, 65 1814 |ffstring_case string_lower, 65
1787 |ffstring_case string_upper, 97 1815 |ffstring_case string_upper, 97
1788 | 1816 |
1789 |//-- Table library ------------------------------------------------------
1790 |
1791 |.ffunc_1 table_getn
1792 | li AT, LJ_TTAB
1793 | bne CARG3, AT, ->fff_fallback
1794 |. load_got lj_tab_len
1795 | call_intern lj_tab_len // (GCtab *t)
1796 |. nop
1797 | // Returns uint32_t (but less than 2^31).
1798 | b ->fff_resi
1799 |. nop
1800 |
1801 |//-- Bit library -------------------------------------------------------- 1817 |//-- Bit library --------------------------------------------------------
1802 | 1818 |
1803 |.macro .ffunc_bit, name 1819 |.macro .ffunc_bit, name
@@ -2572,6 +2588,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2572 | ins_next 2588 | ins_next
2573 break; 2589 break;
2574 2590
2591 case BC_ISTYPE:
2592 | // RA = src*8, RD = -type*8
2593 | addu TMP2, BASE, RA
2594 | srl TMP1, RD, 3
2595 | lw TMP0, HI(TMP2)
2596 | ins_next1
2597 | addu AT, TMP0, TMP1
2598 | bnez AT, ->vmeta_istype
2599 |. ins_next2
2600 break;
2601 case BC_ISNUM:
2602 | // RA = src*8, RD = -(TISNUM-1)*8
2603 | addu TMP2, BASE, RA
2604 | lw TMP0, HI(TMP2)
2605 | ins_next1
2606 | sltiu AT, TMP0, LJ_TISNUM
2607 | beqz AT, ->vmeta_istype
2608 |. ins_next2
2609 break;
2610
2575 /* -- Unary ops --------------------------------------------------------- */ 2611 /* -- Unary ops --------------------------------------------------------- */
2576 2612
2577 case BC_MOV: 2613 case BC_MOV:
@@ -3210,6 +3246,30 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3210 | b ->vmeta_tgetb // Caveat: preserve TMP0! 3246 | b ->vmeta_tgetb // Caveat: preserve TMP0!
3211 |. nop 3247 |. nop
3212 break; 3248 break;
3249 case BC_TGETR:
3250 | // RA = dst*8, RB = table*8, RC = key*8
3251 | decode_RB8a RB, INS
3252 | decode_RB8b RB
3253 | decode_RDtoRC8 RC, RD
3254 | addu CARG2, BASE, RB
3255 | addu CARG3, BASE, RC
3256 | lw TAB:CARG1, LO(CARG2)
3257 | ldc1 f0, 0(CARG3)
3258 | trunc.w.d f2, f0
3259 | lw TMP0, TAB:CARG1->asize
3260 | mfc1 CARG2, f2
3261 | lw TMP1, TAB:CARG1->array
3262 | sltu AT, CARG2, TMP0
3263 | sll TMP2, CARG2, 3
3264 | beqz AT, ->vmeta_tgetr // In array part?
3265 |. addu TMP2, TMP1, TMP2
3266 | ldc1 f0, 0(TMP2)
3267 |->BC_TGETR_Z:
3268 | addu RA, BASE, RA
3269 | ins_next1
3270 | sdc1 f0, 0(RA)
3271 | ins_next2
3272 break;
3213 3273
3214 case BC_TSETV: 3274 case BC_TSETV:
3215 | // RA = src*8, RB = table*8, RC = key*8 3275 | // RA = src*8, RB = table*8, RC = key*8
@@ -3398,6 +3458,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3398 |7: // Possible table write barrier for the value. Skip valiswhite check. 3458 |7: // Possible table write barrier for the value. Skip valiswhite check.
3399 | barrierback TAB:RB, TMP3, TMP0, <2 3459 | barrierback TAB:RB, TMP3, TMP0, <2
3400 break; 3460 break;
3461 case BC_TSETR:
3462 | // RA = dst*8, RB = table*8, RC = key*8
3463 | decode_RB8a RB, INS
3464 | decode_RB8b RB
3465 | decode_RDtoRC8 RC, RD
3466 | addu CARG1, BASE, RB
3467 | addu CARG3, BASE, RC
3468 | lw TAB:CARG2, LO(CARG1)
3469 | ldc1 f0, 0(CARG3)
3470 | trunc.w.d f2, f0
3471 | lbu TMP3, TAB:CARG2->marked
3472 | lw TMP0, TAB:CARG2->asize
3473 | mfc1 CARG3, f2
3474 | lw TMP1, TAB:CARG2->array
3475 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
3476 | bnez AT, >7
3477 |. addu RA, BASE, RA
3478 |2:
3479 | sltu AT, CARG3, TMP0
3480 | sll TMP2, CARG3, 3
3481 | beqz AT, ->vmeta_tsetr // In array part?
3482 |. ldc1 f20, 0(RA)
3483 | addu CRET1, TMP1, TMP2
3484 |->BC_TSETR_Z:
3485 | ins_next1
3486 | sdc1 f20, 0(CRET1)
3487 | ins_next2
3488 |
3489 |7: // Possible table write barrier for the value. Skip valiswhite check.
3490 | barrierback TAB:RB, TMP3, TMP0, <2
3491 break;
3492
3401 3493
3402 case BC_TSETM: 3494 case BC_TSETM:
3403 | // RA = base*8 (table at base-1), RD = num_const*8 (start index) 3495 | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
index 685ea518..514bd231 100644
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -895,6 +895,17 @@ static void build_subroutines(BuildCtx *ctx)
895 | li NARGS8:RC, 16 // 2 args for func(t, k). 895 | li NARGS8:RC, 16 // 2 args for func(t, k).
896 | b ->vm_call_dispatch_f 896 | b ->vm_call_dispatch_f
897 | 897 |
898 |->vmeta_tgetr:
899 | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
900 | // Returns cTValue * or NULL.
901 | cmplwi CRET1, 0
902 | beq >1
903 | lfd f14, 0(CRET1)
904 | b ->BC_TGETR_Z
905 |1:
906 | stwx TISNIL, BASE, RA
907 | b ->cont_nop
908 |
898 |//----------------------------------------------------------------------- 909 |//-----------------------------------------------------------------------
899 | 910 |
900 |->vmeta_tsets1: 911 |->vmeta_tsets1:
@@ -962,6 +973,14 @@ static void build_subroutines(BuildCtx *ctx)
962 | stfd f0, 16(BASE) // Copy value to third argument. 973 | stfd f0, 16(BASE) // Copy value to third argument.
963 | b ->vm_call_dispatch_f 974 | b ->vm_call_dispatch_f
964 | 975 |
976 |->vmeta_tsetr:
977 | stp BASE, L->base
978 | stw PC, SAVE_PC
979 | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
980 | // Returns TValue *.
981 | stfd f14, 0(CRET1)
982 | b ->cont_nop
983 |
965 |//-- Comparison metamethods --------------------------------------------- 984 |//-- Comparison metamethods ---------------------------------------------
966 | 985 |
967 |->vmeta_comp: 986 |->vmeta_comp:
@@ -1040,6 +1059,16 @@ static void build_subroutines(BuildCtx *ctx)
1040 | b <3 1059 | b <3
1041 |.endif 1060 |.endif
1042 | 1061 |
1062 |->vmeta_istype:
1063 | subi PC, PC, 4
1064 | stp BASE, L->base
1065 | srwi CARG2, RA, 3
1066 | mr CARG1, L
1067 | srwi CARG3, RD, 3
1068 | stw PC, SAVE_PC
1069 | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
1070 | b ->cont_nop
1071 |
1043 |//-- Arithmetic metamethods --------------------------------------------- 1072 |//-- Arithmetic metamethods ---------------------------------------------
1044 | 1073 |
1045 |->vmeta_arith_nv: 1074 |->vmeta_arith_nv:
@@ -1870,12 +1899,6 @@ static void build_subroutines(BuildCtx *ctx)
1870 | math_extern2 atan2 1899 | math_extern2 atan2
1871 | math_extern2 fmod 1900 | math_extern2 fmod
1872 | 1901 |
1873 |->ff_math_deg:
1874 |.ffunc_n math_rad
1875 | lfd FARG2, CFUNC:RB->upvalue[0]
1876 | fmul FARG1, FARG1, FARG2
1877 | b ->fff_resn
1878 |
1879 |.if DUALNUM 1902 |.if DUALNUM
1880 |.ffunc math_ldexp 1903 |.ffunc math_ldexp
1881 | cmplwi NARGS8:RC, 16 1904 | cmplwi NARGS8:RC, 16
@@ -2178,15 +2201,16 @@ static void build_subroutines(BuildCtx *ctx)
2178 |.endif 2201 |.endif
2179 | lwz TMP0, STR:CARG1->len 2202 | lwz TMP0, STR:CARG1->len
2180 | cmpwi CARG3, 0 2203 | cmpwi CARG3, 0
2181 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) 2204 | lwz TMP1, DISPATCH_GL(tmpbuf.e)(DISPATCH)
2205 | lwz CARG2, DISPATCH_GL(tmpbuf.b)(DISPATCH)
2182 | ble >2 // Count <= 0? (or non-int) 2206 | ble >2 // Count <= 0? (or non-int)
2183 | cmplwi TMP0, 1 2207 | cmplwi TMP0, 1
2208 | add TMP3, CARG2, CARG3
2184 | subi TMP2, CARG3, 1 2209 | subi TMP2, CARG3, 1
2185 | blt >2 // Zero length string? 2210 | blt >2 // Zero length string?
2186 | cmplw cr1, TMP1, CARG3 2211 | cmplw cr1, TMP1, TMP3
2187 | bne ->fff_fallback // Fallback for > 1-char strings. 2212 | bne ->fff_fallback // Fallback for > 1-char strings.
2188 | lbz TMP0, STR:CARG1[1] 2213 | lbz TMP0, STR:CARG1[1]
2189 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
2190 | blt cr1, ->fff_fallback 2214 | blt cr1, ->fff_fallback
2191 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?). 2215 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
2192 | cmplwi TMP2, 0 2216 | cmplwi TMP2, 0
@@ -2206,13 +2230,14 @@ static void build_subroutines(BuildCtx *ctx)
2206 | lwz STR:CARG1, 4(BASE) 2230 | lwz STR:CARG1, 4(BASE)
2207 | blt ->fff_fallback 2231 | blt ->fff_fallback
2208 | checkstr CARG3 2232 | checkstr CARG3
2209 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) 2233 | lwz CARG2, DISPATCH_GL(tmpbuf.b)(DISPATCH)
2234 | lwz TMP1, DISPATCH_GL(tmpbuf.e)(DISPATCH)
2210 | bne ->fff_fallback 2235 | bne ->fff_fallback
2211 | lwz CARG3, STR:CARG1->len 2236 | lwz CARG3, STR:CARG1->len
2212 | la CARG1, #STR(STR:CARG1) 2237 | la CARG1, #STR(STR:CARG1)
2213 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
2214 | li TMP2, 0 2238 | li TMP2, 0
2215 | cmplw TMP1, CARG3 2239 | add TMP3, CARG2, CARG3
2240 | cmplw TMP1, TMP3
2216 | subi TMP3, CARG3, 1 2241 | subi TMP3, CARG3, 1
2217 | blt ->fff_fallback 2242 | blt ->fff_fallback
2218 |1: // Reverse string copy. 2243 |1: // Reverse string copy.
@@ -2232,13 +2257,14 @@ static void build_subroutines(BuildCtx *ctx)
2232 | lwz STR:CARG1, 4(BASE) 2257 | lwz STR:CARG1, 4(BASE)
2233 | blt ->fff_fallback 2258 | blt ->fff_fallback
2234 | checkstr CARG3 2259 | checkstr CARG3
2235 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) 2260 | lwz CARG2, DISPATCH_GL(tmpbuf.b)(DISPATCH)
2261 | lwz TMP1, DISPATCH_GL(tmpbuf.e)(DISPATCH)
2236 | bne ->fff_fallback 2262 | bne ->fff_fallback
2237 | lwz CARG3, STR:CARG1->len 2263 | lwz CARG3, STR:CARG1->len
2238 | la CARG1, #STR(STR:CARG1) 2264 | la CARG1, #STR(STR:CARG1)
2239 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
2240 | cmplw TMP1, CARG3
2241 | li TMP2, 0 2265 | li TMP2, 0
2266 | add TMP3, CARG2, CARG3
2267 | cmplw TMP1, TMP3
2242 | blt ->fff_fallback 2268 | blt ->fff_fallback
2243 |1: // ASCII case conversion. 2269 |1: // ASCII case conversion.
2244 | cmplw TMP2, CARG3 2270 | cmplw TMP2, CARG3
@@ -2258,14 +2284,6 @@ static void build_subroutines(BuildCtx *ctx)
2258 |ffstring_case string_lower, 65 2284 |ffstring_case string_lower, 65
2259 |ffstring_case string_upper, 97 2285 |ffstring_case string_upper, 97
2260 | 2286 |
2261 |//-- Table library ------------------------------------------------------
2262 |
2263 |.ffunc_1 table_getn
2264 | checktab CARG3; bne ->fff_fallback
2265 | bl extern lj_tab_len // (GCtab *t)
2266 | // Returns uint32_t (but less than 2^31).
2267 | b ->fff_resi
2268 |
2269 |//-- Bit library -------------------------------------------------------- 2287 |//-- Bit library --------------------------------------------------------
2270 | 2288 |
2271 |.macro .ffunc_bit, name 2289 |.macro .ffunc_bit, name
@@ -3265,6 +3283,29 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3265 | ins_next 3283 | ins_next
3266 break; 3284 break;
3267 3285
3286 case BC_ISTYPE:
3287 | // RA = src*8, RD = -type*8
3288 | lwzx TMP0, BASE, RA
3289 | srwi TMP1, RD, 3
3290 | ins_next1
3291 |.if not PPE and not GPR64
3292 | add. TMP0, TMP0, TMP1
3293 |.else
3294 | neg TMP1
3295 | cmpw TMP0, TMP1
3296 |.endif
3297 | bne ->vmeta_istype
3298 | ins_next2
3299 break;
3300 case BC_ISNUM:
3301 | // RA = src*8, RD = -(TISNUM-1)*8
3302 | lwzx TMP0, BASE, RA
3303 | ins_next1
3304 | checknum TMP0
3305 | bge ->vmeta_istype
3306 | ins_next2
3307 break;
3308
3268 /* -- Unary ops --------------------------------------------------------- */ 3309 /* -- Unary ops --------------------------------------------------------- */
3269 3310
3270 case BC_MOV: 3311 case BC_MOV:
@@ -4016,6 +4057,30 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4016 | bne <1 // 'no __index' flag set: done. 4057 | bne <1 // 'no __index' flag set: done.
4017 | b ->vmeta_tgetb // Caveat: preserve TMP0! 4058 | b ->vmeta_tgetb // Caveat: preserve TMP0!
4018 break; 4059 break;
4060 case BC_TGETR:
4061 | // RA = dst*8, RB = table*8, RC = key*8
4062 | add RB, BASE, RB
4063 | lwz TAB:CARG1, 4(RB)
4064 |.if DUALNUM
4065 | add RC, BASE, RC
4066 | lwz TMP0, TAB:CARG1->asize
4067 | lwz CARG2, 4(RC)
4068 | lwz TMP1, TAB:CARG1->array
4069 |.else
4070 | lfdx f0, BASE, RC
4071 | lwz TMP0, TAB:CARG1->asize
4072 | toint CARG2, f0
4073 | lwz TMP1, TAB:CARG1->array
4074 |.endif
4075 | cmplw TMP0, CARG2
4076 | slwi TMP2, CARG2, 3
4077 | ble ->vmeta_tgetr // In array part?
4078 | lfdx f14, TMP1, TMP2
4079 |->BC_TGETR_Z:
4080 | ins_next1
4081 | stfdx f14, BASE, RA
4082 | ins_next2
4083 break;
4019 4084
4020 case BC_TSETV: 4085 case BC_TSETV:
4021 | // RA = src*8, RB = table*8, RC = key*8 4086 | // RA = src*8, RB = table*8, RC = key*8
@@ -4195,6 +4260,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4195 | barrierback TAB:RB, TMP3, TMP0 4260 | barrierback TAB:RB, TMP3, TMP0
4196 | b <2 4261 | b <2
4197 break; 4262 break;
4263 case BC_TSETR:
4264 | // RA = dst*8, RB = table*8, RC = key*8
4265 | add RB, BASE, RB
4266 | lwz TAB:CARG2, 4(RB)
4267 |.if DUALNUM
4268 | add RC, BASE, RC
4269 | lbz TMP3, TAB:RB->marked
4270 | lwz TMP0, TAB:CARG2->asize
4271 | lwz CARG3, 4(RC)
4272 | lwz TMP1, TAB:CARG2->array
4273 |.else
4274 | lfdx f0, BASE, RC
4275 | lbz TMP3, TAB:RB->marked
4276 | lwz TMP0, TAB:CARG2->asize
4277 | toint CARG3, f0
4278 | lwz TMP1, TAB:CARG2->array
4279 |.endif
4280 | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table)
4281 | bne >7
4282 |2:
4283 | cmplw TMP0, CARG3
4284 | slwi TMP2, CARG3, 3
4285 | lfdx f14, BASE, RA
4286 | ble ->vmeta_tsetr // In array part?
4287 | ins_next1
4288 | stfdx f14, TMP1, TMP2
4289 | ins_next2
4290 |
4291 |7: // Possible table write barrier for the value. Skip valiswhite check.
4292 | barrierback TAB:CARG2, TMP3, TMP2
4293 | b <2
4294 break;
4295
4198 4296
4199 case BC_TSETM: 4297 case BC_TSETM:
4200 | // RA = base*8 (table at base-1), RD = num_const*8 (start index) 4298 | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
diff --git a/src/vm_ppcspe.dasc b/src/vm_ppcspe.dasc
index 4fabc02f..b443f1b3 100644
--- a/src/vm_ppcspe.dasc
+++ b/src/vm_ppcspe.dasc
@@ -1456,12 +1456,6 @@ static void build_subroutines(BuildCtx *ctx)
1456 | math_extern2 atan2 1456 | math_extern2 atan2
1457 | math_extern2 fmod 1457 | math_extern2 fmod
1458 | 1458 |
1459 |->ff_math_deg:
1460 |.ffunc_n math_rad
1461 | evldd CARG2, CFUNC:RB->upvalue[0]
1462 | efdmul CRET1, CARG1, CARG2
1463 | b ->fff_restv
1464 |
1465 |.ffunc math_ldexp 1459 |.ffunc math_ldexp
1466 | cmplwi NARGS8:RC, 16 1460 | cmplwi NARGS8:RC, 16
1467 | evldd CARG2, 0(BASE) 1461 | evldd CARG2, 0(BASE)
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index b4674e2b..3fd897ec 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -18,7 +18,6 @@
18| 18|
19|.if P64 19|.if P64
20|.define X64, 1 20|.define X64, 1
21|.define SSE, 1
22|.if WIN 21|.if WIN
23|.define X64WIN, 1 22|.define X64WIN, 1
24|.endif 23|.endif
@@ -856,13 +855,9 @@ static void build_subroutines(BuildCtx *ctx)
856 |.if DUALNUM 855 |.if DUALNUM
857 | mov TMP2, LJ_TISNUM 856 | mov TMP2, LJ_TISNUM
858 | mov TMP1, RC 857 | mov TMP1, RC
859 |.elif SSE 858 |.else
860 | cvtsi2sd xmm0, RC 859 | cvtsi2sd xmm0, RC
861 | movsd TMPQ, xmm0 860 | movsd TMPQ, xmm0
862 |.else
863 | mov ARG4, RC
864 | fild ARG4
865 | fstp TMPQ
866 |.endif 861 |.endif
867 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 862 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
868 | jmp >1 863 | jmp >1
@@ -916,6 +911,19 @@ static void build_subroutines(BuildCtx *ctx)
916 | mov NARGS:RD, 2+1 // 2 args for func(t, k). 911 | mov NARGS:RD, 2+1 // 2 args for func(t, k).
917 | jmp ->vm_call_dispatch_f 912 | jmp ->vm_call_dispatch_f
918 | 913 |
914 |->vmeta_tgetr:
915 | mov FCARG1, TAB:RB
916 | mov RB, BASE // Save BASE.
917 | mov FCARG2, RC // Caveat: FCARG2 == BASE
918 | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
919 | // cTValue * or NULL returned in eax (RC).
920 | movzx RA, PC_RA
921 | mov BASE, RB // Restore BASE.
922 | test RC, RC
923 | jnz ->BC_TGETR_Z
924 | mov dword [BASE+RA*8+4], LJ_TNIL
925 | jmp ->BC_TGETR2_Z
926 |
919 |//----------------------------------------------------------------------- 927 |//-----------------------------------------------------------------------
920 | 928 |
921 |->vmeta_tsets: 929 |->vmeta_tsets:
@@ -935,13 +943,9 @@ static void build_subroutines(BuildCtx *ctx)
935 |.if DUALNUM 943 |.if DUALNUM
936 | mov TMP2, LJ_TISNUM 944 | mov TMP2, LJ_TISNUM
937 | mov TMP1, RC 945 | mov TMP1, RC
938 |.elif SSE 946 |.else
939 | cvtsi2sd xmm0, RC 947 | cvtsi2sd xmm0, RC
940 | movsd TMPQ, xmm0 948 | movsd TMPQ, xmm0
941 |.else
942 | mov ARG4, RC
943 | fild ARG4
944 | fstp TMPQ
945 |.endif 949 |.endif
946 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 950 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
947 | jmp >1 951 | jmp >1
@@ -1007,6 +1011,33 @@ static void build_subroutines(BuildCtx *ctx)
1007 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v). 1011 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v).
1008 | jmp ->vm_call_dispatch_f 1012 | jmp ->vm_call_dispatch_f
1009 | 1013 |
1014 |->vmeta_tsetr:
1015 |.if X64WIN
1016 | mov L:CARG1d, SAVE_L
1017 | mov CARG3d, RC
1018 | mov L:CARG1d->base, BASE
1019 | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE.
1020 |.elif X64
1021 | mov L:CARG1d, SAVE_L
1022 | mov CARG2d, TAB:RB
1023 | mov L:CARG1d->base, BASE
1024 | mov RB, BASE // Save BASE.
1025 | mov CARG3d, RC // Caveat: CARG3d == BASE.
1026 |.else
1027 | mov L:RA, SAVE_L
1028 | mov ARG2, TAB:RB
1029 | mov RB, BASE // Save BASE.
1030 | mov ARG3, RC
1031 | mov ARG1, L:RA
1032 | mov L:RA->base, BASE
1033 |.endif
1034 | mov SAVE_PC, PC
1035 | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
1036 | // TValue * returned in eax (RC).
1037 | movzx RA, PC_RA
1038 | mov BASE, RB // Restore BASE.
1039 | jmp ->BC_TSETR_Z
1040 |
1010 |//-- Comparison metamethods --------------------------------------------- 1041 |//-- Comparison metamethods ---------------------------------------------
1011 | 1042 |
1012 |->vmeta_comp: 1043 |->vmeta_comp:
@@ -1101,6 +1132,26 @@ static void build_subroutines(BuildCtx *ctx)
1101 | jmp <3 1132 | jmp <3
1102 |.endif 1133 |.endif
1103 | 1134 |
1135 |->vmeta_istype:
1136 |.if X64
1137 | mov L:CARG1d, SAVE_L
1138 | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
1139 | mov CARG2d, RA
1140 | movzx CARG3d, PC_RD
1141 | mov L:RB, L:CARG1d
1142 |.else
1143 | movzx RD, PC_RD
1144 | mov ARG2, RA
1145 | mov L:RB, SAVE_L
1146 | mov ARG3, RD
1147 | mov ARG1, L:RB
1148 | mov L:RB->base, BASE
1149 |.endif
1150 | mov SAVE_PC, PC
1151 | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
1152 | mov BASE, L:RB->base
1153 | jmp <6
1154 |
1104 |//-- Arithmetic metamethods --------------------------------------------- 1155 |//-- Arithmetic metamethods ---------------------------------------------
1105 | 1156 |
1106 |->vmeta_arith_vno: 1157 |->vmeta_arith_vno:
@@ -1509,11 +1560,7 @@ static void build_subroutines(BuildCtx *ctx)
1509 |.else 1560 |.else
1510 | jae ->fff_fallback 1561 | jae ->fff_fallback
1511 |.endif 1562 |.endif
1512 |.if SSE
1513 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 1563 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0
1514 |.else
1515 | fld qword [BASE]; jmp ->fff_resn
1516 |.endif
1517 | 1564 |
1518 |.ffunc_1 tostring 1565 |.ffunc_1 tostring
1519 | // Only handles the string or number case inline. 1566 | // Only handles the string or number case inline.
@@ -1631,19 +1678,12 @@ static void build_subroutines(BuildCtx *ctx)
1631 | add RD, 1 1678 | add RD, 1
1632 | mov dword [BASE-4], LJ_TISNUM 1679 | mov dword [BASE-4], LJ_TISNUM
1633 | mov dword [BASE-8], RD 1680 | mov dword [BASE-8], RD
1634 |.elif SSE 1681 |.else
1635 | movsd xmm0, qword [BASE+8] 1682 | movsd xmm0, qword [BASE+8]
1636 | sseconst_1 xmm1, RBa 1683 | sseconst_1 xmm1, RBa
1637 | addsd xmm0, xmm1 1684 | addsd xmm0, xmm1
1638 | cvtsd2si RD, xmm0 1685 | cvttsd2si RD, xmm0
1639 | movsd qword [BASE-8], xmm0 1686 | movsd qword [BASE-8], xmm0
1640 |.else
1641 | fld qword [BASE+8]
1642 | fld1
1643 | faddp st1
1644 | fist ARG1
1645 | fstp qword [BASE-8]
1646 | mov RD, ARG1
1647 |.endif 1687 |.endif
1648 | mov TAB:RB, [BASE] 1688 | mov TAB:RB, [BASE]
1649 | cmp RD, TAB:RB->asize; jae >2 // Not in array part? 1689 | cmp RD, TAB:RB->asize; jae >2 // Not in array part?
@@ -1690,12 +1730,9 @@ static void build_subroutines(BuildCtx *ctx)
1690 |.if DUALNUM 1730 |.if DUALNUM
1691 | mov dword [BASE+12], LJ_TISNUM 1731 | mov dword [BASE+12], LJ_TISNUM
1692 | mov dword [BASE+8], 0 1732 | mov dword [BASE+8], 0
1693 |.elif SSE 1733 |.else
1694 | xorps xmm0, xmm0 1734 | xorps xmm0, xmm0
1695 | movsd qword [BASE+8], xmm0 1735 | movsd qword [BASE+8], xmm0
1696 |.else
1697 | fldz
1698 | fstp qword [BASE+8]
1699 |.endif 1736 |.endif
1700 | mov RD, 1+3 1737 | mov RD, 1+3
1701 | jmp ->fff_res 1738 | jmp ->fff_res
@@ -1925,12 +1962,10 @@ static void build_subroutines(BuildCtx *ctx)
1925 |->fff_resi: // Dummy. 1962 |->fff_resi: // Dummy.
1926 |.endif 1963 |.endif
1927 | 1964 |
1928 |.if SSE
1929 |->fff_resn: 1965 |->fff_resn:
1930 | mov PC, [BASE-4] 1966 | mov PC, [BASE-4]
1931 | fstp qword [BASE-8] 1967 | fstp qword [BASE-8]
1932 | jmp ->fff_res1 1968 | jmp ->fff_res1
1933 |.endif
1934 | 1969 |
1935 | .ffunc_1 math_abs 1970 | .ffunc_1 math_abs
1936 |.if DUALNUM 1971 |.if DUALNUM
@@ -1954,8 +1989,6 @@ static void build_subroutines(BuildCtx *ctx)
1954 |.else 1989 |.else
1955 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 1990 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1956 |.endif 1991 |.endif
1957 |
1958 |.if SSE
1959 | movsd xmm0, qword [BASE] 1992 | movsd xmm0, qword [BASE]
1960 | sseconst_abs xmm1, RDa 1993 | sseconst_abs xmm1, RDa
1961 | andps xmm0, xmm1 1994 | andps xmm0, xmm1
@@ -1963,15 +1996,6 @@ static void build_subroutines(BuildCtx *ctx)
1963 | mov PC, [BASE-4] 1996 | mov PC, [BASE-4]
1964 | movsd qword [BASE-8], xmm0 1997 | movsd qword [BASE-8], xmm0
1965 | // fallthrough 1998 | // fallthrough
1966 |.else
1967 | fld qword [BASE]
1968 | fabs
1969 | // fallthrough
1970 |->fff_resxmm0: // Dummy.
1971 |->fff_resn:
1972 | mov PC, [BASE-4]
1973 | fstp qword [BASE-8]
1974 |.endif
1975 | 1999 |
1976 |->fff_res1: 2000 |->fff_res1:
1977 | mov RD, 1+1 2001 | mov RD, 1+1
@@ -2008,48 +2032,24 @@ static void build_subroutines(BuildCtx *ctx)
2008 |.else 2032 |.else
2009 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2033 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
2010 |.endif 2034 |.endif
2011 |.if SSE
2012 | movsd xmm0, qword [BASE] 2035 | movsd xmm0, qword [BASE]
2013 | call ->vm_ .. func 2036 | call ->vm_ .. func .. _sse
2014 | .if DUALNUM 2037 |.if DUALNUM
2015 | cvtsd2si RB, xmm0 2038 | cvttsd2si RB, xmm0
2016 | cmp RB, 0x80000000 2039 | cmp RB, 0x80000000
2017 | jne ->fff_resi 2040 | jne ->fff_resi
2018 | cvtsi2sd xmm1, RB 2041 | cvtsi2sd xmm1, RB
2019 | ucomisd xmm0, xmm1 2042 | ucomisd xmm0, xmm1
2020 | jp ->fff_resxmm0 2043 | jp ->fff_resxmm0
2021 | je ->fff_resi 2044 | je ->fff_resi
2022 | .endif
2023 | jmp ->fff_resxmm0
2024 |.else
2025 | fld qword [BASE]
2026 | call ->vm_ .. func
2027 | .if DUALNUM
2028 | fist ARG1
2029 | mov RB, ARG1
2030 | cmp RB, 0x80000000; jne >2
2031 | fdup
2032 | fild ARG1
2033 | fcomparepp
2034 | jp ->fff_resn
2035 | jne ->fff_resn
2036 |2:
2037 | fpop
2038 | jmp ->fff_resi
2039 | .else
2040 | jmp ->fff_resn
2041 | .endif
2042 |.endif 2045 |.endif
2046 | jmp ->fff_resxmm0
2043 |.endmacro 2047 |.endmacro
2044 | 2048 |
2045 | math_round floor 2049 | math_round floor
2046 | math_round ceil 2050 | math_round ceil
2047 | 2051 |
2048 |.if SSE
2049 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 2052 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0
2050 |.else
2051 |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn
2052 |.endif
2053 | 2053 |
2054 |.ffunc math_log 2054 |.ffunc math_log
2055 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. 2055 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
@@ -2072,42 +2072,24 @@ static void build_subroutines(BuildCtx *ctx)
2072 |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn 2072 |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn
2073 | 2073 |
2074 |.macro math_extern, func 2074 |.macro math_extern, func
2075 |.if SSE
2076 | .ffunc_nsse math_ .. func 2075 | .ffunc_nsse math_ .. func
2077 | .if not X64 2076 |.if not X64
2078 | movsd FPARG1, xmm0 2077 | movsd FPARG1, xmm0
2079 | .endif
2080 |.else
2081 | .ffunc_n math_ .. func
2082 | fstp FPARG1
2083 |.endif 2078 |.endif
2084 | mov RB, BASE 2079 | mov RB, BASE
2085 | call extern lj_vm_ .. func 2080 | call extern lj_vm_ .. func
2086 | mov BASE, RB 2081 | mov BASE, RB
2087 | .if X64 2082 |.if X64
2088 | jmp ->fff_resxmm0 2083 | jmp ->fff_resxmm0
2089 | .else 2084 |.else
2090 | jmp ->fff_resn 2085 | jmp ->fff_resn
2091 | .endif 2086 |.endif
2092 |.endmacro 2087 |.endmacro
2093 | 2088 |
2094 | math_extern sinh 2089 | math_extern sinh
2095 | math_extern cosh 2090 | math_extern cosh
2096 | math_extern tanh 2091 | math_extern tanh
2097 | 2092 |
2098 |->ff_math_deg:
2099 |.if SSE
2100 |.ffunc_nsse math_rad
2101 | mov CFUNC:RB, [BASE-8]
2102 | mulsd xmm0, qword CFUNC:RB->upvalue[0]
2103 | jmp ->fff_resxmm0
2104 |.else
2105 |.ffunc_n math_rad
2106 | mov CFUNC:RB, [BASE-8]
2107 | fmul qword CFUNC:RB->upvalue[0]
2108 | jmp ->fff_resn
2109 |.endif
2110 |
2111 |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn 2093 |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn
2112 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn 2094 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn
2113 | 2095 |
@@ -2123,65 +2105,34 @@ static void build_subroutines(BuildCtx *ctx)
2123 | cmp RB, 0x00200000; jb >4 2105 | cmp RB, 0x00200000; jb >4
2124 |1: 2106 |1:
2125 | shr RB, 21; sub RB, RC // Extract and unbias exponent. 2107 | shr RB, 21; sub RB, RC // Extract and unbias exponent.
2126 |.if SSE
2127 | cvtsi2sd xmm0, RB 2108 | cvtsi2sd xmm0, RB
2128 |.else
2129 | mov TMP1, RB; fild TMP1
2130 |.endif
2131 | mov RB, [BASE-4] 2109 | mov RB, [BASE-4]
2132 | and RB, 0x800fffff // Mask off exponent. 2110 | and RB, 0x800fffff // Mask off exponent.
2133 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. 2111 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0.
2134 | mov [BASE-4], RB 2112 | mov [BASE-4], RB
2135 |2: 2113 |2:
2136 |.if SSE
2137 | movsd qword [BASE], xmm0 2114 | movsd qword [BASE], xmm0
2138 |.else
2139 | fstp qword [BASE]
2140 |.endif
2141 | mov RD, 1+2 2115 | mov RD, 1+2
2142 | jmp ->fff_res 2116 | jmp ->fff_res
2143 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. 2117 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0.
2144 |.if SSE
2145 | xorps xmm0, xmm0; jmp <2 2118 | xorps xmm0, xmm0; jmp <2
2146 |.else
2147 | fldz; jmp <2
2148 |.endif
2149 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. 2119 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias.
2150 |.if SSE
2151 | movsd xmm0, qword [BASE] 2120 | movsd xmm0, qword [BASE]
2152 | sseconst_hi xmm1, RBa, 43500000 // 2^54. 2121 | sseconst_hi xmm1, RBa, 43500000 // 2^54.
2153 | mulsd xmm0, xmm1 2122 | mulsd xmm0, xmm1
2154 | movsd qword [BASE-8], xmm0 2123 | movsd qword [BASE-8], xmm0
2155 |.else
2156 | fld qword [BASE]
2157 | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54
2158 | fstp qword [BASE-8]
2159 |.endif
2160 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 2124 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1
2161 | 2125 |
2162 |.if SSE
2163 |.ffunc_nsse math_modf 2126 |.ffunc_nsse math_modf
2164 |.else
2165 |.ffunc_n math_modf
2166 |.endif
2167 | mov RB, [BASE+4] 2127 | mov RB, [BASE+4]
2168 | mov PC, [BASE-4] 2128 | mov PC, [BASE-4]
2169 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? 2129 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf?
2170 |.if SSE
2171 | movaps xmm4, xmm0 2130 | movaps xmm4, xmm0
2172 | call ->vm_trunc 2131 | call ->vm_trunc_sse
2173 | subsd xmm4, xmm0 2132 | subsd xmm4, xmm0
2174 |1: 2133 |1:
2175 | movsd qword [BASE-8], xmm0 2134 | movsd qword [BASE-8], xmm0
2176 | movsd qword [BASE], xmm4 2135 | movsd qword [BASE], xmm4
2177 |.else
2178 | fdup
2179 | call ->vm_trunc
2180 | fsub st1, st0
2181 |1:
2182 | fstp qword [BASE-8]
2183 | fstp qword [BASE]
2184 |.endif
2185 | mov RC, [BASE-4]; mov RB, [BASE+4] 2136 | mov RC, [BASE-4]; mov RB, [BASE+4]
2186 | xor RC, RB; js >3 // Need to adjust sign? 2137 | xor RC, RB; js >3 // Need to adjust sign?
2187 |2: 2138 |2:
@@ -2191,24 +2142,16 @@ static void build_subroutines(BuildCtx *ctx)
2191 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. 2142 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction.
2192 | jmp <2 2143 | jmp <2
2193 |4: 2144 |4:
2194 |.if SSE
2195 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. 2145 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0.
2196 |.else
2197 | fldz; fxch; jmp <1 // Return +-Inf and +-0.
2198 |.endif
2199 | 2146 |
2200 |.ffunc_nnr math_fmod 2147 |.ffunc_nnr math_fmod
2201 |1: ; fprem; fnstsw ax; sahf; jp <1 2148 |1: ; fprem; fnstsw ax; sahf; jp <1
2202 | fpop1 2149 | fpop1
2203 | jmp ->fff_resn 2150 | jmp ->fff_resn
2204 | 2151 |
2205 |.if SSE 2152 |.ffunc_nnsse math_pow; call ->vm_pow_sse; jmp ->fff_resxmm0
2206 |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0
2207 |.else
2208 |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn
2209 |.endif
2210 | 2153 |
2211 |.macro math_minmax, name, cmovop, fcmovop, sseop 2154 |.macro math_minmax, name, cmovop, sseop
2212 | .ffunc name 2155 | .ffunc name
2213 | mov RA, 2 2156 | mov RA, 2
2214 | cmp dword [BASE+4], LJ_TISNUM 2157 | cmp dword [BASE+4], LJ_TISNUM
@@ -2225,12 +2168,7 @@ static void build_subroutines(BuildCtx *ctx)
2225 |3: 2168 |3:
2226 | ja ->fff_fallback 2169 | ja ->fff_fallback
2227 | // Convert intermediate result to number and continue below. 2170 | // Convert intermediate result to number and continue below.
2228 |.if SSE
2229 | cvtsi2sd xmm0, RB 2171 | cvtsi2sd xmm0, RB
2230 |.else
2231 | mov TMP1, RB
2232 | fild TMP1
2233 |.endif
2234 | jmp >6 2172 | jmp >6
2235 |4: 2173 |4:
2236 | ja ->fff_fallback 2174 | ja ->fff_fallback
@@ -2238,7 +2176,6 @@ static void build_subroutines(BuildCtx *ctx)
2238 | jae ->fff_fallback 2176 | jae ->fff_fallback
2239 |.endif 2177 |.endif
2240 | 2178 |
2241 |.if SSE
2242 | movsd xmm0, qword [BASE] 2179 | movsd xmm0, qword [BASE]
2243 |5: // Handle numbers or integers. 2180 |5: // Handle numbers or integers.
2244 | cmp RA, RD; jae ->fff_resxmm0 2181 | cmp RA, RD; jae ->fff_resxmm0
@@ -2257,34 +2194,10 @@ static void build_subroutines(BuildCtx *ctx)
2257 | sseop xmm0, xmm1 2194 | sseop xmm0, xmm1
2258 | add RA, 1 2195 | add RA, 1
2259 | jmp <5 2196 | jmp <5
2260 |.else
2261 | fld qword [BASE]
2262 |5: // Handle numbers or integers.
2263 | cmp RA, RD; jae ->fff_resn
2264 | cmp dword [BASE+RA*8-4], LJ_TISNUM
2265 |.if DUALNUM
2266 | jb >6
2267 | ja >9
2268 | fild dword [BASE+RA*8-8]
2269 | jmp >7
2270 |.else
2271 | jae >9
2272 |.endif
2273 |6:
2274 | fld qword [BASE+RA*8-8]
2275 |7:
2276 | fucomi st1; fcmovop st1; fpop1
2277 | add RA, 1
2278 | jmp <5
2279 |.endif
2280 |.endmacro 2197 |.endmacro
2281 | 2198 |
2282 | math_minmax math_min, cmovg, fcmovnbe, minsd 2199 | math_minmax math_min, cmovg, minsd
2283 | math_minmax math_max, cmovl, fcmovbe, maxsd 2200 | math_minmax math_max, cmovl, maxsd
2284 |.if not SSE
2285 |9:
2286 | fpop; jmp ->fff_fallback
2287 |.endif
2288 | 2201 |
2289 |//-- String library ----------------------------------------------------- 2202 |//-- String library -----------------------------------------------------
2290 | 2203 |
@@ -2293,10 +2206,8 @@ static void build_subroutines(BuildCtx *ctx)
2293 | mov STR:RB, [BASE] 2206 | mov STR:RB, [BASE]
2294 |.if DUALNUM 2207 |.if DUALNUM
2295 | mov RB, dword STR:RB->len; jmp ->fff_resi 2208 | mov RB, dword STR:RB->len; jmp ->fff_resi
2296 |.elif SSE
2297 | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0
2298 |.else 2209 |.else
2299 | fild dword STR:RB->len; jmp ->fff_resn 2210 | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0
2300 |.endif 2211 |.endif
2301 | 2212 |
2302 |.ffunc string_byte // Only handle the 1-arg case here. 2213 |.ffunc string_byte // Only handle the 1-arg case here.
@@ -2309,10 +2220,8 @@ static void build_subroutines(BuildCtx *ctx)
2309 | movzx RB, byte STR:RB[1] 2220 | movzx RB, byte STR:RB[1]
2310 |.if DUALNUM 2221 |.if DUALNUM
2311 | jmp ->fff_resi 2222 | jmp ->fff_resi
2312 |.elif SSE
2313 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2314 |.else 2223 |.else
2315 | mov TMP1, RB; fild TMP1; jmp ->fff_resn 2224 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2316 |.endif 2225 |.endif
2317 | 2226 |
2318 |.ffunc string_char // Only handle the 1-arg case here. 2227 |.ffunc string_char // Only handle the 1-arg case here.
@@ -2324,16 +2233,11 @@ static void build_subroutines(BuildCtx *ctx)
2324 | mov RB, dword [BASE] 2233 | mov RB, dword [BASE]
2325 | cmp RB, 255; ja ->fff_fallback 2234 | cmp RB, 255; ja ->fff_fallback
2326 | mov TMP2, RB 2235 | mov TMP2, RB
2327 |.elif SSE 2236 |.else
2328 | jae ->fff_fallback 2237 | jae ->fff_fallback
2329 | cvttsd2si RB, qword [BASE] 2238 | cvttsd2si RB, qword [BASE]
2330 | cmp RB, 255; ja ->fff_fallback 2239 | cmp RB, 255; ja ->fff_fallback
2331 | mov TMP2, RB 2240 | mov TMP2, RB
2332 |.else
2333 | jae ->fff_fallback
2334 | fld qword [BASE]
2335 | fistp TMP2
2336 | cmp TMP2, 255; ja ->fff_fallback
2337 |.endif 2241 |.endif
2338 |.if X64 2242 |.if X64
2339 | mov TMP3, 1 2243 | mov TMP3, 1
@@ -2371,14 +2275,10 @@ static void build_subroutines(BuildCtx *ctx)
2371 | jne ->fff_fallback 2275 | jne ->fff_fallback
2372 | mov RB, dword [BASE+16] 2276 | mov RB, dword [BASE+16]
2373 | mov TMP2, RB 2277 | mov TMP2, RB
2374 |.elif SSE 2278 |.else
2375 | jae ->fff_fallback 2279 | jae ->fff_fallback
2376 | cvttsd2si RB, qword [BASE+16] 2280 | cvttsd2si RB, qword [BASE+16]
2377 | mov TMP2, RB 2281 | mov TMP2, RB
2378 |.else
2379 | jae ->fff_fallback
2380 | fld qword [BASE+16]
2381 | fistp TMP2
2382 |.endif 2282 |.endif
2383 |1: 2283 |1:
2384 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2284 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
@@ -2393,12 +2293,8 @@ static void build_subroutines(BuildCtx *ctx)
2393 | mov RB, STR:RB->len 2293 | mov RB, STR:RB->len
2394 |.if DUALNUM 2294 |.if DUALNUM
2395 | mov RA, dword [BASE+8] 2295 | mov RA, dword [BASE+8]
2396 |.elif SSE
2397 | cvttsd2si RA, qword [BASE+8]
2398 |.else 2296 |.else
2399 | fld qword [BASE+8] 2297 | cvttsd2si RA, qword [BASE+8]
2400 | fistp ARG3
2401 | mov RA, ARG3
2402 |.endif 2298 |.endif
2403 | mov RC, TMP2 2299 | mov RC, TMP2
2404 | cmp RB, RC // len < end? (unsigned compare) 2300 | cmp RB, RC // len < end? (unsigned compare)
@@ -2451,34 +2347,30 @@ static void build_subroutines(BuildCtx *ctx)
2451 |.if DUALNUM 2347 |.if DUALNUM
2452 | jne ->fff_fallback 2348 | jne ->fff_fallback
2453 | mov RC, dword [BASE+8] 2349 | mov RC, dword [BASE+8]
2454 |.elif SSE
2455 | jae ->fff_fallback
2456 | cvttsd2si RC, qword [BASE+8]
2457 |.else 2350 |.else
2458 | jae ->fff_fallback 2351 | jae ->fff_fallback
2459 | fld qword [BASE+8] 2352 | cvttsd2si RC, qword [BASE+8]
2460 | fistp TMP2
2461 | mov RC, TMP2
2462 |.endif 2353 |.endif
2463 | test RC, RC 2354 | test RC, RC
2464 | jle ->fff_emptystr // Count <= 0? (or non-int) 2355 | jle ->fff_emptystr // Count <= 0? (or non-int)
2465 | cmp dword STR:RB->len, 1 2356 | cmp dword STR:RB->len, 1
2466 | jb ->fff_emptystr // Zero length string? 2357 | jb ->fff_emptystr // Zero length string?
2467 | jne ->fff_fallback_2 // Fallback for > 1-char strings. 2358 | jne ->fff_fallback_2 // Fallback for > 1-char strings.
2468 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_2
2469 | movzx RA, byte STR:RB[1] 2359 | movzx RA, byte STR:RB[1]
2470 | mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] 2360 | mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.b)]
2361 | add RB, RC
2362 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.e)], RB; jb ->fff_fallback_2
2471 |.if X64 2363 |.if X64
2472 | mov TMP3, RC 2364 | mov TMP3, RC
2473 |.else 2365 |.else
2474 | mov ARG3, RC 2366 | mov ARG3, RC
2475 |.endif 2367 |.endif
2476 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?). 2368 |1: // Fill buffer with char.
2477 | mov [RB], RAL 2369 | sub RB, 1
2478 | add RB, 1
2479 | sub RC, 1 2370 | sub RC, 1
2371 | mov [RB], RAL
2480 | jnz <1 2372 | jnz <1
2481 | mov RD, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] 2373 | mov RD, [DISPATCH+DISPATCH_GL(tmpbuf.b)]
2482 | jmp ->fff_newstr 2374 | jmp ->fff_newstr
2483 | 2375 |
2484 |.ffunc_1 string_reverse 2376 |.ffunc_1 string_reverse
@@ -2488,15 +2380,16 @@ static void build_subroutines(BuildCtx *ctx)
2488 | mov RC, STR:RB->len 2380 | mov RC, STR:RB->len
2489 | test RC, RC 2381 | test RC, RC
2490 | jz ->fff_emptystr // Zero length string? 2382 | jz ->fff_emptystr // Zero length string?
2491 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
2492 | add RB, #STR
2493 | mov TMP2, PC // Need another temp register. 2383 | mov TMP2, PC // Need another temp register.
2384 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.b)]
2385 | lea RA, [PC+RC]
2386 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.e)], RA; jb ->fff_fallback_1
2387 | add RB, #STR
2494 |.if X64 2388 |.if X64
2495 | mov TMP3, RC 2389 | mov TMP3, RC
2496 |.else 2390 |.else
2497 | mov ARG3, RC 2391 | mov ARG3, RC
2498 |.endif 2392 |.endif
2499 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2500 |1: 2393 |1:
2501 | movzx RA, byte [RB] 2394 | movzx RA, byte [RB]
2502 | add RB, 1 2395 | add RB, 1
@@ -2511,17 +2404,18 @@ static void build_subroutines(BuildCtx *ctx)
2511 | .ffunc_1 name 2404 | .ffunc_1 name
2512 | ffgccheck 2405 | ffgccheck
2513 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2406 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2407 | mov TMP2, PC // Need another temp register.
2514 | mov STR:RB, [BASE] 2408 | mov STR:RB, [BASE]
2515 | mov RC, STR:RB->len 2409 | mov RC, STR:RB->len
2516 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 2410 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.b)]
2411 | lea RA, [PC+RC]
2412 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.e)], RA; jb ->fff_fallback_1
2517 | add RB, #STR 2413 | add RB, #STR
2518 | mov TMP2, PC // Need another temp register.
2519 |.if X64 2414 |.if X64
2520 | mov TMP3, RC 2415 | mov TMP3, RC
2521 |.else 2416 |.else
2522 | mov ARG3, RC 2417 | mov ARG3, RC
2523 |.endif 2418 |.endif
2524 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2525 | jmp >3 2419 | jmp >3
2526 |1: // ASCII case conversion. Yes, this is suboptimal code (do you care?). 2420 |1: // ASCII case conversion. Yes, this is suboptimal code (do you care?).
2527 | movzx RA, byte [RB+RC] 2421 | movzx RA, byte [RB+RC]
@@ -2543,23 +2437,6 @@ static void build_subroutines(BuildCtx *ctx)
2543 |ffstring_case string_lower, 0x41, 0x5a 2437 |ffstring_case string_lower, 0x41, 0x5a
2544 |ffstring_case string_upper, 0x61, 0x7a 2438 |ffstring_case string_upper, 0x61, 0x7a
2545 | 2439 |
2546 |//-- Table library ------------------------------------------------------
2547 |
2548 |.ffunc_1 table_getn
2549 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
2550 | mov RB, BASE // Save BASE.
2551 | mov TAB:FCARG1, [BASE]
2552 | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t)
2553 | // Length of table returned in eax (RD).
2554 | mov BASE, RB // Restore BASE.
2555 |.if DUALNUM
2556 | mov RB, RD; jmp ->fff_resi
2557 |.elif SSE
2558 | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0
2559 |.else
2560 | mov ARG1, RD; fild ARG1; jmp ->fff_resn
2561 |.endif
2562 |
2563 |//-- Bit library -------------------------------------------------------- 2440 |//-- Bit library --------------------------------------------------------
2564 | 2441 |
2565 |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!). 2442 |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!).
@@ -2567,11 +2444,7 @@ static void build_subroutines(BuildCtx *ctx)
2567 |.macro .ffunc_bit, name, kind 2444 |.macro .ffunc_bit, name, kind
2568 | .ffunc_1 name 2445 | .ffunc_1 name
2569 |.if kind == 2 2446 |.if kind == 2
2570 |.if SSE
2571 | sseconst_tobit xmm1, RBa 2447 | sseconst_tobit xmm1, RBa
2572 |.else
2573 | mov TMP1, TOBIT_BIAS
2574 |.endif
2575 |.endif 2448 |.endif
2576 | cmp dword [BASE+4], LJ_TISNUM 2449 | cmp dword [BASE+4], LJ_TISNUM
2577 |.if DUALNUM 2450 |.if DUALNUM
@@ -2587,37 +2460,17 @@ static void build_subroutines(BuildCtx *ctx)
2587 |.else 2460 |.else
2588 | jae ->fff_fallback 2461 | jae ->fff_fallback
2589 |.endif 2462 |.endif
2590 |.if SSE
2591 | movsd xmm0, qword [BASE] 2463 | movsd xmm0, qword [BASE]
2592 |.if kind < 2 2464 |.if kind < 2
2593 | sseconst_tobit xmm1, RBa 2465 | sseconst_tobit xmm1, RBa
2594 |.endif 2466 |.endif
2595 | addsd xmm0, xmm1 2467 | addsd xmm0, xmm1
2596 | movd RB, xmm0 2468 | movd RB, xmm0
2597 |.else
2598 | fld qword [BASE]
2599 |.if kind < 2
2600 | mov TMP1, TOBIT_BIAS
2601 |.endif
2602 | fadd TMP1
2603 | fstp FPARG1
2604 |.if kind > 0
2605 | mov RB, ARG1
2606 |.endif
2607 |.endif
2608 |2: 2469 |2:
2609 |.endmacro 2470 |.endmacro
2610 | 2471 |
2611 |.ffunc_bit bit_tobit, 0 2472 |.ffunc_bit bit_tobit, 0
2612 |.if DUALNUM or SSE
2613 |.if not SSE
2614 | mov RB, ARG1
2615 |.endif
2616 | jmp ->fff_resbit 2473 | jmp ->fff_resbit
2617 |.else
2618 | fild ARG1
2619 | jmp ->fff_resn
2620 |.endif
2621 | 2474 |
2622 |.macro .ffunc_bit_op, name, ins 2475 |.macro .ffunc_bit_op, name, ins
2623 | .ffunc_bit name, 2 2476 | .ffunc_bit name, 2
@@ -2637,17 +2490,10 @@ static void build_subroutines(BuildCtx *ctx)
2637 |.else 2490 |.else
2638 | jae ->fff_fallback_bit_op 2491 | jae ->fff_fallback_bit_op
2639 |.endif 2492 |.endif
2640 |.if SSE
2641 | movsd xmm0, qword [RD] 2493 | movsd xmm0, qword [RD]
2642 | addsd xmm0, xmm1 2494 | addsd xmm0, xmm1
2643 | movd RA, xmm0 2495 | movd RA, xmm0
2644 | ins RB, RA 2496 | ins RB, RA
2645 |.else
2646 | fld qword [RD]
2647 | fadd TMP1
2648 | fstp FPARG1
2649 | ins RB, ARG1
2650 |.endif
2651 | sub RD, 8 2497 | sub RD, 8
2652 | jmp <1 2498 | jmp <1
2653 |.endmacro 2499 |.endmacro
@@ -2664,15 +2510,10 @@ static void build_subroutines(BuildCtx *ctx)
2664 | not RB 2510 | not RB
2665 |.if DUALNUM 2511 |.if DUALNUM
2666 | jmp ->fff_resbit 2512 | jmp ->fff_resbit
2667 |.elif SSE 2513 |.else
2668 |->fff_resbit: 2514 |->fff_resbit:
2669 | cvtsi2sd xmm0, RB 2515 | cvtsi2sd xmm0, RB
2670 | jmp ->fff_resxmm0 2516 | jmp ->fff_resxmm0
2671 |.else
2672 |->fff_resbit:
2673 | mov ARG1, RB
2674 | fild ARG1
2675 | jmp ->fff_resn
2676 |.endif 2517 |.endif
2677 | 2518 |
2678 |->fff_fallback_bit_op: 2519 |->fff_fallback_bit_op:
@@ -2685,22 +2526,13 @@ static void build_subroutines(BuildCtx *ctx)
2685 | // Note: no inline conversion from number for 2nd argument! 2526 | // Note: no inline conversion from number for 2nd argument!
2686 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback 2527 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback
2687 | mov RA, dword [BASE+8] 2528 | mov RA, dword [BASE+8]
2688 |.elif SSE 2529 |.else
2689 | .ffunc_nnsse name 2530 | .ffunc_nnsse name
2690 | sseconst_tobit xmm2, RBa 2531 | sseconst_tobit xmm2, RBa
2691 | addsd xmm0, xmm2 2532 | addsd xmm0, xmm2
2692 | addsd xmm1, xmm2 2533 | addsd xmm1, xmm2
2693 | movd RB, xmm0 2534 | movd RB, xmm0
2694 | movd RA, xmm1 2535 | movd RA, xmm1
2695 |.else
2696 | .ffunc_nn name
2697 | mov TMP1, TOBIT_BIAS
2698 | fadd TMP1
2699 | fstp FPARG3
2700 | fadd TMP1
2701 | fstp FPARG1
2702 | mov RA, ARG3
2703 | mov RB, ARG1
2704 |.endif 2536 |.endif
2705 | ins RB, cl // Assumes RA is ecx. 2537 | ins RB, cl // Assumes RA is ecx.
2706 | jmp ->fff_resbit 2538 | jmp ->fff_resbit
@@ -3051,27 +2883,9 @@ static void build_subroutines(BuildCtx *ctx)
3051 |//----------------------------------------------------------------------- 2883 |//-----------------------------------------------------------------------
3052 | 2884 |
3053 |// FP value rounding. Called by math.floor/math.ceil fast functions 2885 |// FP value rounding. Called by math.floor/math.ceil fast functions
3054 |// and from JIT code. 2886 |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
3055 | 2887 |.macro vm_round, name, mode
3056 |// x87 variant: Arg/ret on x87 stack. No int/xmm registers modified. 2888 |->name .. _sse:
3057 |.macro vm_round_x87, mode1, mode2
3058 | fnstcw word [esp+4] // Caveat: overwrites ARG1 and ARG2.
3059 | mov [esp+8], eax
3060 | mov ax, mode1
3061 | or ax, [esp+4]
3062 |.if mode2 ~= 0xffff
3063 | and ax, mode2
3064 |.endif
3065 | mov [esp+6], ax
3066 | fldcw word [esp+6]
3067 | frndint
3068 | fldcw word [esp+4]
3069 | mov eax, [esp+8]
3070 | ret
3071 |.endmacro
3072 |
3073 |// SSE variant: arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
3074 |.macro vm_round_sse, mode
3075 | sseconst_abs xmm2, RDa 2889 | sseconst_abs xmm2, RDa
3076 | sseconst_2p52 xmm3, RDa 2890 | sseconst_2p52 xmm3, RDa
3077 | movaps xmm1, xmm0 2891 | movaps xmm1, xmm0
@@ -3107,22 +2921,21 @@ static void build_subroutines(BuildCtx *ctx)
3107 | ret 2921 | ret
3108 |.endmacro 2922 |.endmacro
3109 | 2923 |
3110 |.macro vm_round, name, ssemode, mode1, mode2 2924 |->vm_floor:
3111 |->name: 2925 |.if not X64
3112 |.if not SSE 2926 | movsd xmm0, qword [esp+4]
3113 | vm_round_x87 mode1, mode2 2927 | call ->vm_floor_sse
2928 | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg.
2929 | fld qword [esp+4]
2930 | ret
3114 |.endif 2931 |.endif
3115 |->name .. _sse:
3116 | vm_round_sse ssemode
3117 |.endmacro
3118 | 2932 |
3119 | vm_round vm_floor, 0, 0x0400, 0xf7ff 2933 | vm_round vm_floor, 0
3120 | vm_round vm_ceil, 1, 0x0800, 0xfbff 2934 | vm_round vm_ceil, 1
3121 | vm_round vm_trunc, 2, 0x0c00, 0xffff 2935 | vm_round vm_trunc, 2
3122 | 2936 |
3123 |// FP modulo x%y. Called by BC_MOD* and vm_arith. 2937 |// FP modulo x%y. Called by BC_MOD* and vm_arith.
3124 |->vm_mod: 2938 |->vm_mod:
3125 |.if SSE
3126 |// Args in xmm0/xmm1, return value in xmm0. 2939 |// Args in xmm0/xmm1, return value in xmm0.
3127 |// Caveat: xmm0-xmm5 and RC (eax) modified! 2940 |// Caveat: xmm0-xmm5 and RC (eax) modified!
3128 | movaps xmm5, xmm0 2941 | movaps xmm5, xmm0
@@ -3150,23 +2963,6 @@ static void build_subroutines(BuildCtx *ctx)
3150 | movaps xmm0, xmm5 2963 | movaps xmm0, xmm5
3151 | subsd xmm0, xmm1 2964 | subsd xmm0, xmm1
3152 | ret 2965 | ret
3153 |.else
3154 |// Args/ret on x87 stack (y on top). No xmm registers modified.
3155 |// Caveat: needs 3 slots on x87 stack! RC (eax) modified!
3156 | fld st1
3157 | fdiv st1
3158 | fnstcw word [esp+4]
3159 | mov ax, 0x0400
3160 | or ax, [esp+4]
3161 | and ax, 0xf7ff
3162 | mov [esp+6], ax
3163 | fldcw word [esp+6]
3164 | frndint
3165 | fldcw word [esp+4]
3166 | fmulp st1
3167 | fsubp st1
3168 | ret
3169 |.endif
3170 | 2966 |
3171 |// FP log2(x). Called by math.log(x, base). 2967 |// FP log2(x). Called by math.log(x, base).
3172 |->vm_log2: 2968 |->vm_log2:
@@ -3217,105 +3013,15 @@ static void build_subroutines(BuildCtx *ctx)
3217 | 3013 |
3218 |// Generic power function x^y. Called by BC_POW, math.pow fast function, 3014 |// Generic power function x^y. Called by BC_POW, math.pow fast function,
3219 |// and vm_arith. 3015 |// and vm_arith.
3220 |// Args/ret on x87 stack (y on top). RC (eax) modified.
3221 |// Caveat: needs 3 slots on x87 stack!
3222 |->vm_pow:
3223 |.if not SSE
3224 | fist dword [esp+4] // Store/reload int before comparison.
3225 | fild dword [esp+4] // Integral exponent used in vm_powi.
3226 | fucomip st1
3227 | jnz >8 // Branch for FP exponents.
3228 | jp >9 // Branch for NaN exponent.
3229 | fpop // Pop y and fallthrough to vm_powi.
3230 |
3231 |// FP/int power function x^i. Arg1/ret on x87 stack.
3232 |// Arg2 (int) on C stack. RC (eax) modified.
3233 |// Caveat: needs 2 slots on x87 stack!
3234 | mov eax, [esp+4]
3235 | cmp eax, 1; jle >6 // i<=1?
3236 | // Now 1 < (unsigned)i <= 0x80000000.
3237 |1: // Handle leading zeros.
3238 | test eax, 1; jnz >2
3239 | fmul st0
3240 | shr eax, 1
3241 | jmp <1
3242 |2:
3243 | shr eax, 1; jz >5
3244 | fdup
3245 |3: // Handle trailing bits.
3246 | fmul st0
3247 | shr eax, 1; jz >4
3248 | jnc <3
3249 | fmul st1, st0
3250 | jmp <3
3251 |4:
3252 | fmulp st1
3253 |5:
3254 | ret
3255 |6:
3256 | je <5 // x^1 ==> x
3257 | jb >7
3258 | fld1; fdivrp st1
3259 | neg eax
3260 | cmp eax, 1; je <5 // x^-1 ==> 1/x
3261 | jmp <1 // x^-i ==> (1/x)^i
3262 |7:
3263 | fpop; fld1 // x^0 ==> 1
3264 | ret
3265 |
3266 |8: // FP/FP power function x^y.
3267 | fst dword [esp+4]
3268 | fxch
3269 | fst dword [esp+8]
3270 | mov eax, [esp+4]; shl eax, 1
3271 | cmp eax, 0xff000000; je >2 // x^+-Inf?
3272 | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
3273 | cmp eax, 0xff000000; je >4 // +-Inf^y?
3274 | fyl2x
3275 | jmp ->vm_exp2raw
3276 |
3277 |9: // Handle x^NaN.
3278 | fld1
3279 | fucomip st2
3280 | je >1 // 1^NaN ==> 1
3281 | fxch // x^NaN ==> NaN
3282 |1:
3283 | fpop
3284 | ret
3285 |
3286 |2: // Handle x^+-Inf.
3287 | fabs
3288 | fld1
3289 | fucomip st1
3290 | je >3 // +-1^+-Inf ==> 1
3291 | fpop; fabs; fldz; mov eax, 0; setc al
3292 | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0
3293 | fxch
3294 |3:
3295 | fpop1; fabs
3296 | ret
3297 |
3298 |4: // Handle +-0^y or +-Inf^y.
3299 | cmp dword [esp+4], 0; jge <3 // y >= 0, x^y ==> |x|
3300 | fpop; fpop
3301 | test eax, eax; jz >5 // y < 0, +-0^y ==> +Inf
3302 | fldz // y < 0, +-Inf^y ==> 0
3303 | ret
3304 |5:
3305 | mov dword [esp+4], 0x7f800000 // Return +Inf.
3306 | fld dword [esp+4]
3307 | ret
3308 |.endif
3309 |
3310 |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified. 3016 |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified.
3311 |// Needs 16 byte scratch area for x86. Also called from JIT code. 3017 |// Needs 16 byte scratch area for x86. Also called from JIT code.
3312 |->vm_pow_sse: 3018 |->vm_pow_sse:
3313 | cvtsd2si eax, xmm1 3019 | cvttsd2si eax, xmm1
3314 | cvtsi2sd xmm2, eax 3020 | cvtsi2sd xmm2, eax
3315 | ucomisd xmm1, xmm2 3021 | ucomisd xmm1, xmm2
3316 | jnz >8 // Branch for FP exponents. 3022 | jnz >8 // Branch for FP exponents.
3317 | jp >9 // Branch for NaN exponent. 3023 | jp >9 // Branch for NaN exponent.
3318 | // Fallthrough to vm_powi_sse. 3024 | // Fallthrough.
3319 | 3025 |
3320 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. 3026 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
3321 |->vm_powi_sse: 3027 |->vm_powi_sse:
@@ -3437,8 +3143,8 @@ static void build_subroutines(BuildCtx *ctx)
3437 | .else 3143 | .else
3438 | .define fpmop, CARG1d 3144 | .define fpmop, CARG1d
3439 | .endif 3145 | .endif
3440 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil 3146 | cmp fpmop, 1; jb ->vm_floor_sse; je ->vm_ceil_sse
3441 | cmp fpmop, 3; jb ->vm_trunc; ja >2 3147 | cmp fpmop, 3; jb ->vm_trunc_sse; ja >2
3442 | sqrtsd xmm0, xmm0; ret 3148 | sqrtsd xmm0, xmm0; ret
3443 |2: 3149 |2:
3444 | .if X64WIN 3150 | .if X64WIN
@@ -3478,14 +3184,13 @@ static void build_subroutines(BuildCtx *ctx)
3478 | ret 3184 | ret
3479 |.else // x86 calling convention. 3185 |.else // x86 calling convention.
3480 | .define fpmop, eax 3186 | .define fpmop, eax
3481 |.if SSE
3482 | mov fpmop, [esp+12] 3187 | mov fpmop, [esp+12]
3483 | movsd xmm0, qword [esp+4] 3188 | movsd xmm0, qword [esp+4]
3484 | cmp fpmop, 1; je >1; ja >2 3189 | cmp fpmop, 1; je >1; ja >2
3485 | call ->vm_floor; jmp >7 3190 | call ->vm_floor_sse; jmp >7
3486 |1: ; call ->vm_ceil; jmp >7 3191 |1: ; call ->vm_ceil_sse; jmp >7
3487 |2: ; cmp fpmop, 3; je >1; ja >2 3192 |2: ; cmp fpmop, 3; je >1; ja >2
3488 | call ->vm_trunc; jmp >7 3193 | call ->vm_trunc_sse; jmp >7
3489 |1: 3194 |1:
3490 | sqrtsd xmm0, xmm0 3195 | sqrtsd xmm0, xmm0
3491 |7: 3196 |7:
@@ -3503,23 +3208,6 @@ static void build_subroutines(BuildCtx *ctx)
3503 |2: ; cmp fpmop, 11; je >1; ja >9 3208 |2: ; cmp fpmop, 11; je >1; ja >9
3504 | fcos; ret 3209 | fcos; ret
3505 |1: ; fptan; fpop; ret 3210 |1: ; fptan; fpop; ret
3506 |.else
3507 | mov fpmop, [esp+12]
3508 | fld qword [esp+4]
3509 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
3510 | cmp fpmop, 3; jb ->vm_trunc; ja >2
3511 | fsqrt; ret
3512 |2: ; cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87
3513 | cmp fpmop, 7; je >1; ja >2
3514 | fldln2; fxch; fyl2x; ret
3515 |1: ; fld1; fxch; fyl2x; ret
3516 |2: ; cmp fpmop, 9; je >1; ja >2
3517 | fldlg2; fxch; fyl2x; ret
3518 |1: ; fsin; ret
3519 |2: ; cmp fpmop, 11; je >1; ja >9
3520 | fcos; ret
3521 |1: ; fptan; fpop; ret
3522 |.endif
3523 |.endif 3211 |.endif
3524 |9: ; int3 // Bad fpm. 3212 |9: ; int3 // Bad fpm.
3525 |.endif 3213 |.endif
@@ -3541,7 +3229,7 @@ static void build_subroutines(BuildCtx *ctx)
3541 |2: ; cmp foldop, 3; je >1; ja >2 3229 |2: ; cmp foldop, 3; je >1; ja >2
3542 | mulsd xmm0, xmm1; ret 3230 | mulsd xmm0, xmm1; ret
3543 |1: ; divsd xmm0, xmm1; ret 3231 |1: ; divsd xmm0, xmm1; ret
3544 |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow 3232 |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow_sse
3545 | cmp foldop, 7; je >1; ja >2 3233 | cmp foldop, 7; je >1; ja >2
3546 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret 3234 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret
3547 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret 3235 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret
@@ -3574,7 +3262,7 @@ static void build_subroutines(BuildCtx *ctx)
3574 |1: ; maxsd xmm0, xmm1; ret 3262 |1: ; maxsd xmm0, xmm1; ret
3575 |9: ; int3 // Bad op. 3263 |9: ; int3 // Bad op.
3576 | 3264 |
3577 |.elif SSE // x86 calling convention with SSE ops. 3265 |.else // x86 calling convention.
3578 | 3266 |
3579 | .define foldop, eax 3267 | .define foldop, eax
3580 | mov foldop, [esp+20] 3268 | mov foldop, [esp+20]
@@ -3593,7 +3281,7 @@ static void build_subroutines(BuildCtx *ctx)
3593 |2: ; cmp foldop, 5 3281 |2: ; cmp foldop, 5
3594 | je >1; ja >2 3282 | je >1; ja >2
3595 | call ->vm_mod; jmp <7 3283 | call ->vm_mod; jmp <7
3596 |1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area. 3284 |1: ; pop edx; call ->vm_pow_sse; push edx; jmp <7 // Writes to scratch area.
3597 |2: ; cmp foldop, 7; je >1; ja >2 3285 |2: ; cmp foldop, 7; je >1; ja >2
3598 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7 3286 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7
3599 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7 3287 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7
@@ -3608,29 +3296,6 @@ static void build_subroutines(BuildCtx *ctx)
3608 |1: ; maxsd xmm0, xmm1; jmp <7 3296 |1: ; maxsd xmm0, xmm1; jmp <7
3609 |9: ; int3 // Bad op. 3297 |9: ; int3 // Bad op.
3610 | 3298 |
3611 |.else // x86 calling convention with x87 ops.
3612 |
3613 | mov eax, [esp+20]
3614 | fld qword [esp+4]
3615 | fld qword [esp+12]
3616 | cmp eax, 1; je >1; ja >2
3617 | faddp st1; ret
3618 |1: ; fsubp st1; ret
3619 |2: ; cmp eax, 3; je >1; ja >2
3620 | fmulp st1; ret
3621 |1: ; fdivp st1; ret
3622 |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow
3623 | cmp eax, 7; je >1; ja >2
3624 | fpop; fchs; ret
3625 |1: ; fpop; fabs; ret
3626 |2: ; cmp eax, 9; je >1; ja >2
3627 | fpatan; ret
3628 |1: ; fxch; fscale; fpop1; ret
3629 |2: ; cmp eax, 11; je >1; ja >9
3630 | fucomi st1; fcmovnbe st1; fpop1; ret
3631 |1: ; fucomi st1; fcmovbe st1; fpop1; ret
3632 |9: ; int3 // Bad op.
3633 |
3634 |.endif 3299 |.endif
3635 | 3300 |
3636 |//----------------------------------------------------------------------- 3301 |//-----------------------------------------------------------------------
@@ -3943,19 +3608,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3943 | // RA is a number. 3608 | // RA is a number.
3944 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp 3609 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp
3945 | // RA is a number, RD is an integer. 3610 | // RA is a number, RD is an integer.
3946 |.if SSE
3947 | cvtsi2sd xmm0, dword [BASE+RD*8] 3611 | cvtsi2sd xmm0, dword [BASE+RD*8]
3948 | jmp >2 3612 | jmp >2
3949 |.else
3950 | fld qword [BASE+RA*8]
3951 | fild dword [BASE+RD*8]
3952 | jmp >3
3953 |.endif
3954 | 3613 |
3955 |8: // RA is an integer, RD is not an integer. 3614 |8: // RA is an integer, RD is not an integer.
3956 | ja ->vmeta_comp 3615 | ja ->vmeta_comp
3957 | // RA is an integer, RD is a number. 3616 | // RA is an integer, RD is a number.
3958 |.if SSE
3959 | cvtsi2sd xmm1, dword [BASE+RA*8] 3617 | cvtsi2sd xmm1, dword [BASE+RA*8]
3960 | movsd xmm0, qword [BASE+RD*8] 3618 | movsd xmm0, qword [BASE+RD*8]
3961 | add PC, 4 3619 | add PC, 4
@@ -3963,29 +3621,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3963 | jmp_comp jbe, ja, jb, jae, <9 3621 | jmp_comp jbe, ja, jb, jae, <9
3964 | jmp <6 3622 | jmp <6
3965 |.else 3623 |.else
3966 | fild dword [BASE+RA*8]
3967 | jmp >2
3968 |.endif
3969 |.else
3970 | checknum RA, ->vmeta_comp 3624 | checknum RA, ->vmeta_comp
3971 | checknum RD, ->vmeta_comp 3625 | checknum RD, ->vmeta_comp
3972 |.endif 3626 |.endif
3973 |.if SSE
3974 |1: 3627 |1:
3975 | movsd xmm0, qword [BASE+RD*8] 3628 | movsd xmm0, qword [BASE+RD*8]
3976 |2: 3629 |2:
3977 | add PC, 4 3630 | add PC, 4
3978 | ucomisd xmm0, qword [BASE+RA*8] 3631 | ucomisd xmm0, qword [BASE+RA*8]
3979 |3: 3632 |3:
3980 |.else
3981 |1:
3982 | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A.
3983 |2:
3984 | fld qword [BASE+RD*8]
3985 |3:
3986 | add PC, 4
3987 | fcomparepp
3988 |.endif
3989 | // Unordered: all of ZF CF PF set, ordered: PF clear. 3633 | // Unordered: all of ZF CF PF set, ordered: PF clear.
3990 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. 3634 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
3991 |.if DUALNUM 3635 |.if DUALNUM
@@ -4025,43 +3669,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4025 | // RD is a number. 3669 | // RD is a number.
4026 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 3670 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5
4027 | // RD is a number, RA is an integer. 3671 | // RD is a number, RA is an integer.
4028 |.if SSE
4029 | cvtsi2sd xmm0, dword [BASE+RA*8] 3672 | cvtsi2sd xmm0, dword [BASE+RA*8]
4030 |.else
4031 | fild dword [BASE+RA*8]
4032 |.endif
4033 | jmp >2 3673 | jmp >2
4034 | 3674 |
4035 |8: // RD is an integer, RA is not an integer. 3675 |8: // RD is an integer, RA is not an integer.
4036 | ja >5 3676 | ja >5
4037 | // RD is an integer, RA is a number. 3677 | // RD is an integer, RA is a number.
4038 |.if SSE
4039 | cvtsi2sd xmm0, dword [BASE+RD*8] 3678 | cvtsi2sd xmm0, dword [BASE+RD*8]
4040 | ucomisd xmm0, qword [BASE+RA*8] 3679 | ucomisd xmm0, qword [BASE+RA*8]
4041 |.else
4042 | fild dword [BASE+RD*8]
4043 | fld qword [BASE+RA*8]
4044 |.endif
4045 | jmp >4 3680 | jmp >4
4046 | 3681 |
4047 |.else 3682 |.else
4048 | cmp RB, LJ_TISNUM; jae >5 3683 | cmp RB, LJ_TISNUM; jae >5
4049 | checknum RA, >5 3684 | checknum RA, >5
4050 |.endif 3685 |.endif
4051 |.if SSE
4052 |1: 3686 |1:
4053 | movsd xmm0, qword [BASE+RA*8] 3687 | movsd xmm0, qword [BASE+RA*8]
4054 |2: 3688 |2:
4055 | ucomisd xmm0, qword [BASE+RD*8] 3689 | ucomisd xmm0, qword [BASE+RD*8]
4056 |4: 3690 |4:
4057 |.else
4058 |1:
4059 | fld qword [BASE+RA*8]
4060 |2:
4061 | fld qword [BASE+RD*8]
4062 |4:
4063 | fcomparepp
4064 |.endif
4065 iseqne_fp: 3691 iseqne_fp:
4066 if (vk) { 3692 if (vk) {
4067 | jp >2 // Unordered means not equal. 3693 | jp >2 // Unordered means not equal.
@@ -4184,39 +3810,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4184 | // RA is a number. 3810 | // RA is a number.
4185 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 3811 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1
4186 | // RA is a number, RD is an integer. 3812 | // RA is a number, RD is an integer.
4187 |.if SSE
4188 | cvtsi2sd xmm0, dword [KBASE+RD*8] 3813 | cvtsi2sd xmm0, dword [KBASE+RD*8]
4189 |.else
4190 | fild dword [KBASE+RD*8]
4191 |.endif
4192 | jmp >2 3814 | jmp >2
4193 | 3815 |
4194 |8: // RA is an integer, RD is a number. 3816 |8: // RA is an integer, RD is a number.
4195 |.if SSE
4196 | cvtsi2sd xmm0, dword [BASE+RA*8] 3817 | cvtsi2sd xmm0, dword [BASE+RA*8]
4197 | ucomisd xmm0, qword [KBASE+RD*8] 3818 | ucomisd xmm0, qword [KBASE+RD*8]
4198 |.else
4199 | fild dword [BASE+RA*8]
4200 | fld qword [KBASE+RD*8]
4201 |.endif
4202 | jmp >4 3819 | jmp >4
4203 |.else 3820 |.else
4204 | cmp RB, LJ_TISNUM; jae >3 3821 | cmp RB, LJ_TISNUM; jae >3
4205 |.endif 3822 |.endif
4206 |.if SSE
4207 |1: 3823 |1:
4208 | movsd xmm0, qword [KBASE+RD*8] 3824 | movsd xmm0, qword [KBASE+RD*8]
4209 |2: 3825 |2:
4210 | ucomisd xmm0, qword [BASE+RA*8] 3826 | ucomisd xmm0, qword [BASE+RA*8]
4211 |4: 3827 |4:
4212 |.else
4213 |1:
4214 | fld qword [KBASE+RD*8]
4215 |2:
4216 | fld qword [BASE+RA*8]
4217 |4:
4218 | fcomparepp
4219 |.endif
4220 goto iseqne_fp; 3828 goto iseqne_fp;
4221 case BC_ISEQP: case BC_ISNEP: 3829 case BC_ISEQP: case BC_ISNEP:
4222 vk = op == BC_ISEQP; 3830 vk = op == BC_ISEQP;
@@ -4267,6 +3875,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4267 | ins_next 3875 | ins_next
4268 break; 3876 break;
4269 3877
3878 case BC_ISTYPE:
3879 | ins_AD // RA = src, RD = -type
3880 | add RD, [BASE+RA*8+4]
3881 | jne ->vmeta_istype
3882 | ins_next
3883 break;
3884 case BC_ISNUM:
3885 | ins_AD // RA = src, RD = -(TISNUM-1)
3886 | checknum RA, ->vmeta_istype
3887 | ins_next
3888 break;
3889
4270 /* -- Unary ops --------------------------------------------------------- */ 3890 /* -- Unary ops --------------------------------------------------------- */
4271 3891
4272 case BC_MOV: 3892 case BC_MOV:
@@ -4310,16 +3930,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4310 |.else 3930 |.else
4311 | checknum RD, ->vmeta_unm 3931 | checknum RD, ->vmeta_unm
4312 |.endif 3932 |.endif
4313 |.if SSE
4314 | movsd xmm0, qword [BASE+RD*8] 3933 | movsd xmm0, qword [BASE+RD*8]
4315 | sseconst_sign xmm1, RDa 3934 | sseconst_sign xmm1, RDa
4316 | xorps xmm0, xmm1 3935 | xorps xmm0, xmm1
4317 | movsd qword [BASE+RA*8], xmm0 3936 | movsd qword [BASE+RA*8], xmm0
4318 |.else
4319 | fld qword [BASE+RD*8]
4320 | fchs
4321 | fstp qword [BASE+RA*8]
4322 |.endif
4323 |.if DUALNUM 3937 |.if DUALNUM
4324 | jmp <9 3938 | jmp <9
4325 |.else 3939 |.else
@@ -4335,15 +3949,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4335 |1: 3949 |1:
4336 | mov dword [BASE+RA*8+4], LJ_TISNUM 3950 | mov dword [BASE+RA*8+4], LJ_TISNUM
4337 | mov dword [BASE+RA*8], RD 3951 | mov dword [BASE+RA*8], RD
4338 |.elif SSE 3952 |.else
4339 | xorps xmm0, xmm0 3953 | xorps xmm0, xmm0
4340 | cvtsi2sd xmm0, dword STR:RD->len 3954 | cvtsi2sd xmm0, dword STR:RD->len
4341 |1: 3955 |1:
4342 | movsd qword [BASE+RA*8], xmm0 3956 | movsd qword [BASE+RA*8], xmm0
4343 |.else
4344 | fild dword STR:RD->len
4345 |1:
4346 | fstp qword [BASE+RA*8]
4347 |.endif 3957 |.endif
4348 | ins_next 3958 | ins_next
4349 |2: 3959 |2:
@@ -4361,11 +3971,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4361 | // Length of table returned in eax (RD). 3971 | // Length of table returned in eax (RD).
4362 |.if DUALNUM 3972 |.if DUALNUM
4363 | // Nothing to do. 3973 | // Nothing to do.
4364 |.elif SSE
4365 | cvtsi2sd xmm0, RD
4366 |.else 3974 |.else
4367 | mov ARG1, RD 3975 | cvtsi2sd xmm0, RD
4368 | fild ARG1
4369 |.endif 3976 |.endif
4370 | mov BASE, RB // Restore BASE. 3977 | mov BASE, RB // Restore BASE.
4371 | movzx RA, PC_RA 3978 | movzx RA, PC_RA
@@ -4380,7 +3987,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4380 3987
4381 /* -- Binary ops -------------------------------------------------------- */ 3988 /* -- Binary ops -------------------------------------------------------- */
4382 3989
4383 |.macro ins_arithpre, x87ins, sseins, ssereg 3990 |.macro ins_arithpre, sseins, ssereg
4384 | ins_ABC 3991 | ins_ABC
4385 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3992 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
4386 ||switch (vk) { 3993 ||switch (vk) {
@@ -4389,37 +3996,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4389 | .if DUALNUM 3996 | .if DUALNUM
4390 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn 3997 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn
4391 | .endif 3998 | .endif
4392 | .if SSE 3999 | movsd xmm0, qword [BASE+RB*8]
4393 | movsd xmm0, qword [BASE+RB*8] 4000 | sseins ssereg, qword [KBASE+RC*8]
4394 | sseins ssereg, qword [KBASE+RC*8]
4395 | .else
4396 | fld qword [BASE+RB*8]
4397 | x87ins qword [KBASE+RC*8]
4398 | .endif
4399 || break; 4001 || break;
4400 ||case 1: 4002 ||case 1:
4401 | checknum RB, ->vmeta_arith_nv 4003 | checknum RB, ->vmeta_arith_nv
4402 | .if DUALNUM 4004 | .if DUALNUM
4403 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv 4005 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv
4404 | .endif 4006 | .endif
4405 | .if SSE 4007 | movsd xmm0, qword [KBASE+RC*8]
4406 | movsd xmm0, qword [KBASE+RC*8] 4008 | sseins ssereg, qword [BASE+RB*8]
4407 | sseins ssereg, qword [BASE+RB*8]
4408 | .else
4409 | fld qword [KBASE+RC*8]
4410 | x87ins qword [BASE+RB*8]
4411 | .endif
4412 || break; 4009 || break;
4413 ||default: 4010 ||default:
4414 | checknum RB, ->vmeta_arith_vv 4011 | checknum RB, ->vmeta_arith_vv
4415 | checknum RC, ->vmeta_arith_vv 4012 | checknum RC, ->vmeta_arith_vv
4416 | .if SSE 4013 | movsd xmm0, qword [BASE+RB*8]
4417 | movsd xmm0, qword [BASE+RB*8] 4014 | sseins ssereg, qword [BASE+RC*8]
4418 | sseins ssereg, qword [BASE+RC*8]
4419 | .else
4420 | fld qword [BASE+RB*8]
4421 | x87ins qword [BASE+RC*8]
4422 | .endif
4423 || break; 4015 || break;
4424 ||} 4016 ||}
4425 |.endmacro 4017 |.endmacro
@@ -4457,54 +4049,50 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4457 |.endmacro 4049 |.endmacro
4458 | 4050 |
4459 |.macro ins_arithpost 4051 |.macro ins_arithpost
4460 |.if SSE
4461 | movsd qword [BASE+RA*8], xmm0 4052 | movsd qword [BASE+RA*8], xmm0
4462 |.else
4463 | fstp qword [BASE+RA*8]
4464 |.endif
4465 |.endmacro 4053 |.endmacro
4466 | 4054 |
4467 |.macro ins_arith, x87ins, sseins 4055 |.macro ins_arith, sseins
4468 | ins_arithpre x87ins, sseins, xmm0 4056 | ins_arithpre sseins, xmm0
4469 | ins_arithpost 4057 | ins_arithpost
4470 | ins_next 4058 | ins_next
4471 |.endmacro 4059 |.endmacro
4472 | 4060 |
4473 |.macro ins_arith, intins, x87ins, sseins 4061 |.macro ins_arith, intins, sseins
4474 |.if DUALNUM 4062 |.if DUALNUM
4475 | ins_arithdn intins 4063 | ins_arithdn intins
4476 |.else 4064 |.else
4477 | ins_arith, x87ins, sseins 4065 | ins_arith, sseins
4478 |.endif 4066 |.endif
4479 |.endmacro 4067 |.endmacro
4480 4068
4481 | // RA = dst, RB = src1 or num const, RC = src2 or num const 4069 | // RA = dst, RB = src1 or num const, RC = src2 or num const
4482 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 4070 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
4483 | ins_arith add, fadd, addsd 4071 | ins_arith add, addsd
4484 break; 4072 break;
4485 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 4073 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
4486 | ins_arith sub, fsub, subsd 4074 | ins_arith sub, subsd
4487 break; 4075 break;
4488 case BC_MULVN: case BC_MULNV: case BC_MULVV: 4076 case BC_MULVN: case BC_MULNV: case BC_MULVV:
4489 | ins_arith imul, fmul, mulsd 4077 | ins_arith imul, mulsd
4490 break; 4078 break;
4491 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 4079 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
4492 | ins_arith fdiv, divsd 4080 | ins_arith divsd
4493 break; 4081 break;
4494 case BC_MODVN: 4082 case BC_MODVN:
4495 | ins_arithpre fld, movsd, xmm1 4083 | ins_arithpre movsd, xmm1
4496 |->BC_MODVN_Z: 4084 |->BC_MODVN_Z:
4497 | call ->vm_mod 4085 | call ->vm_mod
4498 | ins_arithpost 4086 | ins_arithpost
4499 | ins_next 4087 | ins_next
4500 break; 4088 break;
4501 case BC_MODNV: case BC_MODVV: 4089 case BC_MODNV: case BC_MODVV:
4502 | ins_arithpre fld, movsd, xmm1 4090 | ins_arithpre movsd, xmm1
4503 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 4091 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
4504 break; 4092 break;
4505 case BC_POW: 4093 case BC_POW:
4506 | ins_arithpre fld, movsd, xmm1 4094 | ins_arithpre movsd, xmm1
4507 | call ->vm_pow 4095 | call ->vm_pow_sse
4508 | ins_arithpost 4096 | ins_arithpost
4509 | ins_next 4097 | ins_next
4510 break; 4098 break;
@@ -4573,25 +4161,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4573 | movsx RD, RDW 4161 | movsx RD, RDW
4574 | mov dword [BASE+RA*8+4], LJ_TISNUM 4162 | mov dword [BASE+RA*8+4], LJ_TISNUM
4575 | mov dword [BASE+RA*8], RD 4163 | mov dword [BASE+RA*8], RD
4576 |.elif SSE 4164 |.else
4577 | movsx RD, RDW // Sign-extend literal. 4165 | movsx RD, RDW // Sign-extend literal.
4578 | cvtsi2sd xmm0, RD 4166 | cvtsi2sd xmm0, RD
4579 | movsd qword [BASE+RA*8], xmm0 4167 | movsd qword [BASE+RA*8], xmm0
4580 |.else
4581 | fild PC_RD // Refetch signed RD from instruction.
4582 | fstp qword [BASE+RA*8]
4583 |.endif 4168 |.endif
4584 | ins_next 4169 | ins_next
4585 break; 4170 break;
4586 case BC_KNUM: 4171 case BC_KNUM:
4587 | ins_AD // RA = dst, RD = num const 4172 | ins_AD // RA = dst, RD = num const
4588 |.if SSE
4589 | movsd xmm0, qword [KBASE+RD*8] 4173 | movsd xmm0, qword [KBASE+RD*8]
4590 | movsd qword [BASE+RA*8], xmm0 4174 | movsd qword [BASE+RA*8], xmm0
4591 |.else
4592 | fld qword [KBASE+RD*8]
4593 | fstp qword [BASE+RA*8]
4594 |.endif
4595 | ins_next 4175 | ins_next
4596 break; 4176 break;
4597 case BC_KPRI: 4177 case BC_KPRI:
@@ -4698,18 +4278,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4698 case BC_USETN: 4278 case BC_USETN:
4699 | ins_AD // RA = upvalue #, RD = num const 4279 | ins_AD // RA = upvalue #, RD = num const
4700 | mov LFUNC:RB, [BASE-8] 4280 | mov LFUNC:RB, [BASE-8]
4701 |.if SSE
4702 | movsd xmm0, qword [KBASE+RD*8] 4281 | movsd xmm0, qword [KBASE+RD*8]
4703 |.else
4704 | fld qword [KBASE+RD*8]
4705 |.endif
4706 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] 4282 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
4707 | mov RA, UPVAL:RB->v 4283 | mov RA, UPVAL:RB->v
4708 |.if SSE
4709 | movsd qword [RA], xmm0 4284 | movsd qword [RA], xmm0
4710 |.else
4711 | fstp qword [RA]
4712 |.endif
4713 | ins_next 4285 | ins_next
4714 break; 4286 break;
4715 case BC_USETP: 4287 case BC_USETP:
@@ -4863,18 +4435,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4863 |.else 4435 |.else
4864 | // Convert number to int and back and compare. 4436 | // Convert number to int and back and compare.
4865 | checknum RC, >5 4437 | checknum RC, >5
4866 |.if SSE
4867 | movsd xmm0, qword [BASE+RC*8] 4438 | movsd xmm0, qword [BASE+RC*8]
4868 | cvtsd2si RC, xmm0 4439 | cvttsd2si RC, xmm0
4869 | cvtsi2sd xmm1, RC 4440 | cvtsi2sd xmm1, RC
4870 | ucomisd xmm0, xmm1 4441 | ucomisd xmm0, xmm1
4871 |.else
4872 | fld qword [BASE+RC*8]
4873 | fist ARG1
4874 | fild ARG1
4875 | fcomparepp
4876 | mov RC, ARG1
4877 |.endif
4878 | jne ->vmeta_tgetv // Generic numeric key? Use fallback. 4442 | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
4879 |.endif 4443 |.endif
4880 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4444 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
@@ -4998,6 +4562,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4998 | mov dword [BASE+RA*8+4], LJ_TNIL 4562 | mov dword [BASE+RA*8+4], LJ_TNIL
4999 | jmp <1 4563 | jmp <1
5000 break; 4564 break;
4565 case BC_TGETR:
4566 | ins_ABC // RA = dst, RB = table, RC = key
4567 | mov TAB:RB, [BASE+RB*8]
4568 |.if DUALNUM
4569 | mov RC, dword [BASE+RC*8]
4570 |.else
4571 | cvttsd2si RC, qword [BASE+RC*8]
4572 |.endif
4573 | cmp RC, TAB:RB->asize
4574 | jae ->vmeta_tgetr // Not in array part? Use fallback.
4575 | shl RC, 3
4576 | add RC, TAB:RB->array
4577 | // Get array slot.
4578 |->BC_TGETR_Z:
4579 |.if X64
4580 | mov RBa, [RC]
4581 | mov [BASE+RA*8], RBa
4582 |.else
4583 | mov RB, [RC]
4584 | mov RC, [RC+4]
4585 | mov [BASE+RA*8], RB
4586 | mov [BASE+RA*8+4], RC
4587 |.endif
4588 |->BC_TGETR2_Z:
4589 | ins_next
4590 break;
5001 4591
5002 case BC_TSETV: 4592 case BC_TSETV:
5003 | ins_ABC // RA = src, RB = table, RC = key 4593 | ins_ABC // RA = src, RB = table, RC = key
@@ -5011,18 +4601,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5011 |.else 4601 |.else
5012 | // Convert number to int and back and compare. 4602 | // Convert number to int and back and compare.
5013 | checknum RC, >5 4603 | checknum RC, >5
5014 |.if SSE
5015 | movsd xmm0, qword [BASE+RC*8] 4604 | movsd xmm0, qword [BASE+RC*8]
5016 | cvtsd2si RC, xmm0 4605 | cvttsd2si RC, xmm0
5017 | cvtsi2sd xmm1, RC 4606 | cvtsi2sd xmm1, RC
5018 | ucomisd xmm0, xmm1 4607 | ucomisd xmm0, xmm1
5019 |.else
5020 | fld qword [BASE+RC*8]
5021 | fist ARG1
5022 | fild ARG1
5023 | fcomparepp
5024 | mov RC, ARG1
5025 |.endif
5026 | jne ->vmeta_tsetv // Generic numeric key? Use fallback. 4608 | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
5027 |.endif 4609 |.endif
5028 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4610 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
@@ -5192,6 +4774,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5192 | movzx RA, PC_RA // Restore RA. 4774 | movzx RA, PC_RA // Restore RA.
5193 | jmp <2 4775 | jmp <2
5194 break; 4776 break;
4777 case BC_TSETR:
4778 | ins_ABC // RA = src, RB = table, RC = key
4779 | mov TAB:RB, [BASE+RB*8]
4780 |.if DUALNUM
4781 | mov RC, dword [BASE+RC*8]
4782 |.else
4783 | cvttsd2si RC, qword [BASE+RC*8]
4784 |.endif
4785 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
4786 | jnz >7
4787 |2:
4788 | cmp RC, TAB:RB->asize
4789 | jae ->vmeta_tsetr
4790 | shl RC, 3
4791 | add RC, TAB:RB->array
4792 | // Set array slot.
4793 |->BC_TSETR_Z:
4794 |.if X64
4795 | mov RBa, [BASE+RA*8]
4796 | mov [RC], RBa
4797 |.else
4798 | mov RB, [BASE+RA*8+4]
4799 | mov RA, [BASE+RA*8]
4800 | mov [RC+4], RB
4801 | mov [RC], RA
4802 |.endif
4803 | ins_next
4804 |
4805 |7: // Possible table write barrier for the value. Skip valiswhite check.
4806 | barrierback TAB:RB, RA
4807 | movzx RA, PC_RA // Restore RA.
4808 | jmp <2
4809 break;
5195 4810
5196 case BC_TSETM: 4811 case BC_TSETM:
5197 | ins_AD // RA = base (table at base-1), RD = num const (start index) 4812 | ins_AD // RA = base (table at base-1), RD = num const (start index)
@@ -5386,10 +5001,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5386 |.if DUALNUM 5001 |.if DUALNUM
5387 | mov dword [BASE+RA*8+4], LJ_TISNUM 5002 | mov dword [BASE+RA*8+4], LJ_TISNUM
5388 | mov dword [BASE+RA*8], RC 5003 | mov dword [BASE+RA*8], RC
5389 |.elif SSE
5390 | cvtsi2sd xmm0, RC
5391 |.else 5004 |.else
5392 | fild dword [BASE+RA*8-8] 5005 | cvtsi2sd xmm0, RC
5393 |.endif 5006 |.endif
5394 | // Copy array slot to returned value. 5007 | // Copy array slot to returned value.
5395 |.if X64 5008 |.if X64
@@ -5405,10 +5018,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5405 | // Return array index as a numeric key. 5018 | // Return array index as a numeric key.
5406 |.if DUALNUM 5019 |.if DUALNUM
5407 | // See above. 5020 | // See above.
5408 |.elif SSE
5409 | movsd qword [BASE+RA*8], xmm0
5410 |.else 5021 |.else
5411 | fstp qword [BASE+RA*8] 5022 | movsd qword [BASE+RA*8], xmm0
5412 |.endif 5023 |.endif
5413 | mov [BASE+RA*8-8], RC // Update control var. 5024 | mov [BASE+RA*8-8], RC // Update control var.
5414 |2: 5025 |2:
@@ -5421,9 +5032,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5421 | 5032 |
5422 |4: // Skip holes in array part. 5033 |4: // Skip holes in array part.
5423 | add RC, 1 5034 | add RC, 1
5424 |.if not (DUALNUM or SSE)
5425 | mov [BASE+RA*8-8], RC
5426 |.endif
5427 | jmp <1 5035 | jmp <1
5428 | 5036 |
5429 |5: // Traverse hash part. 5037 |5: // Traverse hash part.
@@ -5757,7 +5365,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5757 if (!vk) { 5365 if (!vk) {
5758 | cmp RB, LJ_TISNUM; jae ->vmeta_for 5366 | cmp RB, LJ_TISNUM; jae ->vmeta_for
5759 } 5367 }
5760 |.if SSE
5761 | movsd xmm0, qword FOR_IDX 5368 | movsd xmm0, qword FOR_IDX
5762 | movsd xmm1, qword FOR_STOP 5369 | movsd xmm1, qword FOR_STOP
5763 if (vk) { 5370 if (vk) {
@@ -5770,22 +5377,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5770 | ucomisd xmm1, xmm0 5377 | ucomisd xmm1, xmm0
5771 |1: 5378 |1:
5772 | movsd qword FOR_EXT, xmm0 5379 | movsd qword FOR_EXT, xmm0
5773 |.else
5774 | fld qword FOR_STOP
5775 | fld qword FOR_IDX
5776 if (vk) {
5777 | fadd qword FOR_STEP // nidx = idx + step
5778 | fst qword FOR_IDX
5779 | fst qword FOR_EXT
5780 | test RB, RB; js >1
5781 } else {
5782 | fst qword FOR_EXT
5783 | jl >1
5784 }
5785 | fxch // Swap lim/(n)idx if step non-negative.
5786 |1:
5787 | fcomparepp
5788 |.endif
5789 if (op == BC_FORI) { 5380 if (op == BC_FORI) {
5790 |.if DUALNUM 5381 |.if DUALNUM
5791 | jnb <7 5382 | jnb <7
@@ -5813,11 +5404,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5813 |2: 5404 |2:
5814 | ins_next 5405 | ins_next
5815 |.endif 5406 |.endif
5816 |.if SSE 5407 |
5817 |3: // Invert comparison if step is negative. 5408 |3: // Invert comparison if step is negative.
5818 | ucomisd xmm0, xmm1 5409 | ucomisd xmm0, xmm1
5819 | jmp <1 5410 | jmp <1
5820 |.endif
5821 break; 5411 break;
5822 5412
5823 case BC_ITERL: 5413 case BC_ITERL: